From ad1faa20e18d3ec4fa7d11b8a78bde1c7c5a7784 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 29 Jan 2026 20:06:23 -0500 Subject: [PATCH 01/13] skopeo: Fix layer reference ordering in config splitstream Add layer references in the original manifest diff_id order rather than the order layers finish downloading. This ensures reproducible config splitstream digests regardless of download timing. Fixes: 54c3c6de ("splitstream: Rework file format") Assisted-by: OpenCode (Claude claude-opus-4-5-20250514) --- crates/composefs-oci/src/skopeo.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/crates/composefs-oci/src/skopeo.rs b/crates/composefs-oci/src/skopeo.rs index 01dae7d6..f22d0038 100644 --- a/crates/composefs-oci/src/skopeo.rs +++ b/crates/composefs-oci/src/skopeo.rs @@ -247,9 +247,18 @@ impl ImageOp { let mut splitstream = self.repo.create_stream(OCI_CONFIG_CONTENT_TYPE); - // Collect the results. + // Collect the results and build a map of diff_id -> verity + let mut layer_verities = std::collections::HashMap::new(); for (diff_id, future) in entries { - splitstream.add_named_stream_ref(diff_id, &future.await??); + layer_verities.insert(diff_id.clone(), future.await??); + } + + // Add layer references in the original diff_id order (not download order) + for diff_id in config.rootfs().diff_ids() { + let verity = layer_verities + .get(diff_id) + .ok_or_else(|| anyhow::anyhow!("Missing verity for layer {diff_id}"))?; + splitstream.add_named_stream_ref(diff_id, verity); } // NB: We trust that skopeo has verified that raw_config has the correct digest From e242ef6263219f0f38cf7cd37289c719f9440cc2 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 29 Jan 2026 11:37:14 -0500 Subject: [PATCH 02/13] Add tar-header crate for zerocopy-based tar parsing This new internal crate provides safe, zero-copy parsing of tar archive headers using the zerocopy crate. It supports: - POSIX.1-1988, UStar (POSIX.1-2001), and GNU tar header formats - Base-256 encoding for large values (GNU extension) - EntryType enum for all standard tar entry types - Checksum verification The goal is to share code between composefs-oci and cstorage (which both do tar header parsing), and eventually enable upstream contribution to the tar-rs crate (ref: alexcrichton/tar-rs#392). Assisted-by: OpenCode (Opus 4.5) Signed-off-by: Colin Walters --- crates/tar-header/Cargo.toml | 23 + crates/tar-header/src/lib.rs | 2581 ++++++++++++++++++++++++ crates/tar-header/src/stream/entry.rs | 137 ++ crates/tar-header/src/stream/error.rs | 93 + crates/tar-header/src/stream/limits.rs | 141 ++ crates/tar-header/src/stream/mod.rs | 76 + crates/tar-header/src/stream/parser.rs | 548 +++++ crates/tar-header/src/stream/tests.rs | 553 +++++ 8 files changed, 4152 insertions(+) create mode 100644 crates/tar-header/Cargo.toml create mode 100644 crates/tar-header/src/lib.rs create mode 100644 crates/tar-header/src/stream/entry.rs create mode 100644 crates/tar-header/src/stream/error.rs create mode 100644 crates/tar-header/src/stream/limits.rs create mode 100644 crates/tar-header/src/stream/mod.rs create mode 100644 crates/tar-header/src/stream/parser.rs create mode 100644 crates/tar-header/src/stream/tests.rs diff --git a/crates/tar-header/Cargo.toml b/crates/tar-header/Cargo.toml new file mode 100644 index 00000000..8fc6c129 --- /dev/null +++ b/crates/tar-header/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "tar-header" +description = "Zerocopy-based raw tar header structs for safe parsing" +keywords = ["tar", "zerocopy", "header"] + +edition.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[dependencies] +thiserror = { version = "2.0", default-features = false } +zerocopy = { version = "0.8.0", default-features = false, features = ["derive", "std"] } + +[dev-dependencies] +proptest = "1" +tar = "0.4" +tempfile = "3" + +[lints] +workspace = true diff --git a/crates/tar-header/src/lib.rs b/crates/tar-header/src/lib.rs new file mode 100644 index 00000000..64e7859b --- /dev/null +++ b/crates/tar-header/src/lib.rs @@ -0,0 +1,2581 @@ +//! Zerocopy-based raw tar header structs for safe parsing. +//! +//! This crate provides zero-copy parsing of tar archive headers, supporting +//! POSIX.1-1988, UStar (POSIX.1-2001), and GNU tar formats. All header structs +//! use the [`zerocopy`] crate for safe, efficient memory-mapped access without +//! allocations. +//! +//! # Header Formats +//! +//! Tar archives have evolved through several formats: +//! +//! - **Old (POSIX.1-1988)**: The original Unix tar format with basic fields +//! - **UStar (POSIX.1-2001)**: Adds `magic`/`version`, user/group names, and path prefix +//! - **GNU tar**: Extends UStar with sparse file support and long name/link extensions +//! +//! # Header Field Layout +//! +//! All tar headers are 512 bytes. The common fields (offsets 0-156) are shared: +//! +//! | Offset | Size | Field | Description | +//! |--------|------|-----------|------------------------------------------| +//! | 0 | 100 | name | File path (null-terminated if < 100) | +//! | 100 | 8 | mode | File mode in octal ASCII | +//! | 108 | 8 | uid | Owner user ID in octal ASCII | +//! | 116 | 8 | gid | Owner group ID in octal ASCII | +//! | 124 | 12 | size | File size in octal ASCII | +//! | 136 | 12 | mtime | Modification time (Unix epoch, octal) | +//! | 148 | 8 | checksum | Header checksum in octal ASCII | +//! | 156 | 1 | typeflag | Entry type (see [`EntryType`]) | +//! | 157 | 100 | linkname | Link target for hard/symbolic links | +//! +//! **UStar extension** (offsets 257-500): +//! +//! | Offset | Size | Field | +//! |--------|------|-----------| +//! | 257 | 6 | magic | "ustar\0" | +//! | 263 | 2 | version | "00" | +//! | 265 | 32 | uname | Owner user name | +//! | 297 | 32 | gname | Owner group name | +//! | 329 | 8 | devmajor | Device major number | +//! | 337 | 8 | devminor | Device minor number | +//! | 345 | 155 | prefix | Path prefix for long names | +//! +//! **GNU extension** (offsets 257-500, replaces prefix): +//! +//! | Offset | Size | Field | +//! |--------|------|-------------| +//! | 345 | 12 | atime | Access time | +//! | 357 | 12 | ctime | Change time | +//! | 369 | 12 | offset | Multivolume offset | +//! | 381 | 4 | longnames | (deprecated) | +//! | 386 | 96 | sparse | 4 × 24-byte sparse descriptors | +//! | 482 | 1 | isextended | More sparse headers follow | +//! | 483 | 12 | realsize | Real size of sparse file | +//! +//! # Example +//! +//! ``` +//! use tar_header::{Header, EntryType}; +//! +//! // Parse a header from raw bytes +//! let data = [0u8; 512]; // Would normally come from a tar file +//! let header = Header::from_bytes(&data).unwrap(); +//! +//! // Access header fields +//! let entry_type = header.entry_type(); +//! let path = header.path_bytes(); +//! ``` +//! +//! # Streaming Parser +//! +//! For parsing complete tar archives with automatic handling of GNU and PAX +//! extensions, see the [`stream`] module: +//! +//! ```no_run +//! use std::fs::File; +//! use std::io::BufReader; +//! use tar_header::stream::{TarStreamParser, Limits}; +//! +//! let file = File::open("archive.tar").unwrap(); +//! let mut parser = TarStreamParser::new(BufReader::new(file), Limits::default()); +//! +//! while let Some(entry) = parser.next_entry().unwrap() { +//! println!("{} ({} bytes)", entry.path_lossy(), entry.size); +//! let size = entry.size; +//! drop(entry); +//! parser.skip_content(size).unwrap(); +//! } +//! ``` + +pub mod stream; + +use std::fmt; + +use thiserror::Error; +use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; + +/// Size of a tar header block in bytes. +pub const HEADER_SIZE: usize = 512; + +/// Magic string for UStar format headers ("ustar\0"). +pub const USTAR_MAGIC: &[u8; 6] = b"ustar\0"; + +/// Version field for UStar format headers ("00"). +pub const USTAR_VERSION: &[u8; 2] = b"00"; + +/// Magic string for GNU tar format headers ("ustar "). +pub const GNU_MAGIC: &[u8; 6] = b"ustar "; + +/// Version field for GNU tar format headers (" \0"). +pub const GNU_VERSION: &[u8; 2] = b" \0"; + +/// Errors that can occur when parsing tar headers. +#[derive(Debug, Error)] +pub enum HeaderError { + /// The provided data is too short to contain a header. + #[error("insufficient data: expected {HEADER_SIZE} bytes, got {0}")] + InsufficientData(usize), + + /// An octal field contains invalid characters. + #[error("invalid octal field: {0:?}")] + InvalidOctal(Vec), + + /// The header checksum does not match the computed value. + #[error("checksum mismatch: expected {expected}, computed {computed}")] + ChecksumMismatch { + /// The checksum value stored in the header. + expected: u64, + /// The checksum computed from the header bytes. + computed: u64, + }, +} + +/// Result type for header parsing operations. +pub type Result = std::result::Result; + +// ============================================================================ +// Raw Header Structs +// ============================================================================ + +/// Raw 512-byte tar header block. +/// +/// This is the most basic representation of a tar header, treating the +/// entire block as an opaque byte array. Use [`Header`] for a higher-level +/// interface with accessor methods. +#[derive(Clone, Copy, FromBytes, IntoBytes, Immutable, KnownLayout)] +#[repr(C)] +pub struct RawHeader { + /// The raw header bytes. + pub bytes: [u8; 512], +} + +impl Default for RawHeader { + fn default() -> Self { + Self { bytes: [0u8; 512] } + } +} + +impl fmt::Debug for RawHeader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RawHeader") + .field("name", &truncate_null(&self.bytes[0..100])) + .finish_non_exhaustive() + } +} + +/// Old-style (POSIX.1-1988) tar header with named fields. +/// +/// This represents the original Unix tar format. Fields after `linkname` +/// are undefined in this format and may contain garbage. See module-level +/// documentation for the field layout table. +#[derive(Clone, Copy, FromBytes, IntoBytes, Immutable, KnownLayout)] +#[repr(C)] +pub struct OldHeader { + /// File path name (null-terminated if shorter than 100 bytes). + pub name: [u8; 100], + /// File mode in octal ASCII. + pub mode: [u8; 8], + /// Owner user ID in octal ASCII. + pub uid: [u8; 8], + /// Owner group ID in octal ASCII. + pub gid: [u8; 8], + /// File size in octal ASCII. + pub size: [u8; 12], + /// Modification time as Unix timestamp in octal ASCII. + pub mtime: [u8; 12], + /// Header checksum in octal ASCII. + pub checksum: [u8; 8], + /// Entry type flag. + pub typeflag: u8, + /// Link target name for hard/symbolic links. + pub linkname: [u8; 100], + /// Padding to fill the 512-byte block. + pub pad: [u8; 255], +} + +impl Default for OldHeader { + fn default() -> Self { + Self { + name: [0u8; 100], + mode: [0u8; 8], + uid: [0u8; 8], + gid: [0u8; 8], + size: [0u8; 12], + mtime: [0u8; 12], + checksum: [0u8; 8], + typeflag: 0, + linkname: [0u8; 100], + pad: [0u8; 255], + } + } +} + +impl fmt::Debug for OldHeader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("OldHeader") + .field("name", &String::from_utf8_lossy(truncate_null(&self.name))) + .field("mode", &String::from_utf8_lossy(truncate_null(&self.mode))) + .field("typeflag", &self.typeflag) + .finish_non_exhaustive() + } +} + +/// UStar (POSIX.1-2001) tar header format. +/// +/// This format adds a magic number, version, user/group names, device +/// numbers for special files, and a path prefix for long filenames. +/// See module-level documentation for the field layout table. +#[derive(Clone, Copy, FromBytes, IntoBytes, Immutable, KnownLayout)] +#[repr(C)] +pub struct UstarHeader { + /// File path name (null-terminated if shorter than 100 bytes). + pub name: [u8; 100], + /// File mode in octal ASCII. + pub mode: [u8; 8], + /// Owner user ID in octal ASCII. + pub uid: [u8; 8], + /// Owner group ID in octal ASCII. + pub gid: [u8; 8], + /// File size in octal ASCII. + pub size: [u8; 12], + /// Modification time as Unix timestamp in octal ASCII. + pub mtime: [u8; 12], + /// Header checksum in octal ASCII. + pub checksum: [u8; 8], + /// Entry type flag. + pub typeflag: u8, + /// Link target name for hard/symbolic links. + pub linkname: [u8; 100], + /// Magic string identifying the format ("ustar\0" for UStar). + pub magic: [u8; 6], + /// Format version ("00" for UStar). + pub version: [u8; 2], + /// Owner user name (null-terminated). + pub uname: [u8; 32], + /// Owner group name (null-terminated). + pub gname: [u8; 32], + /// Device major number in octal ASCII (for special files). + pub devmajor: [u8; 8], + /// Device minor number in octal ASCII (for special files). + pub devminor: [u8; 8], + /// Path prefix for names longer than 100 bytes. + pub prefix: [u8; 155], + /// Padding to fill the 512-byte block. + pub pad: [u8; 12], +} + +impl Default for UstarHeader { + fn default() -> Self { + let mut header = Self { + name: [0u8; 100], + mode: [0u8; 8], + uid: [0u8; 8], + gid: [0u8; 8], + size: [0u8; 12], + mtime: [0u8; 12], + checksum: [0u8; 8], + typeflag: 0, + linkname: [0u8; 100], + magic: [0u8; 6], + version: [0u8; 2], + uname: [0u8; 32], + gname: [0u8; 32], + devmajor: [0u8; 8], + devminor: [0u8; 8], + prefix: [0u8; 155], + pad: [0u8; 12], + }; + header.magic.copy_from_slice(USTAR_MAGIC); + header.version.copy_from_slice(USTAR_VERSION); + header + } +} + +impl fmt::Debug for UstarHeader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("UstarHeader") + .field("name", &String::from_utf8_lossy(truncate_null(&self.name))) + .field("mode", &String::from_utf8_lossy(truncate_null(&self.mode))) + .field("typeflag", &self.typeflag) + .field("magic", &self.magic) + .field( + "uname", + &String::from_utf8_lossy(truncate_null(&self.uname)), + ) + .finish_non_exhaustive() + } +} + +/// GNU tar sparse file chunk descriptor. +/// +/// Each descriptor specifies a region of data in a sparse file. +/// Both offset and numbytes are 12-byte octal ASCII fields. +#[derive(Clone, Copy, Default, FromBytes, IntoBytes, Immutable, KnownLayout)] +#[repr(C)] +pub struct GnuSparseHeader { + /// Byte offset of this chunk within the file. + pub offset: [u8; 12], + /// Number of bytes in this chunk. + pub numbytes: [u8; 12], +} + +impl fmt::Debug for GnuSparseHeader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("GnuSparseHeader") + .field("offset", &parse_octal(&self.offset).ok()) + .field("numbytes", &parse_octal(&self.numbytes).ok()) + .finish() + } +} + +/// GNU tar header format with sparse file support. +/// +/// This format extends UStar with support for sparse files, access/creation +/// times, and long name handling. The prefix field is replaced with +/// additional metadata. See module-level documentation for the field layout table. +#[derive(Clone, Copy, FromBytes, IntoBytes, Immutable, KnownLayout)] +#[repr(C)] +pub struct GnuHeader { + /// File path name (null-terminated if shorter than 100 bytes). + pub name: [u8; 100], + /// File mode in octal ASCII. + pub mode: [u8; 8], + /// Owner user ID in octal ASCII. + pub uid: [u8; 8], + /// Owner group ID in octal ASCII. + pub gid: [u8; 8], + /// File size in octal ASCII (for sparse files, this is the size on disk). + pub size: [u8; 12], + /// Modification time as Unix timestamp in octal ASCII. + pub mtime: [u8; 12], + /// Header checksum in octal ASCII. + pub checksum: [u8; 8], + /// Entry type flag. + pub typeflag: u8, + /// Link target name for hard/symbolic links. + pub linkname: [u8; 100], + /// Magic string identifying the format ("ustar " for GNU). + pub magic: [u8; 6], + /// Format version (" \0" for GNU). + pub version: [u8; 2], + /// Owner user name (null-terminated). + pub uname: [u8; 32], + /// Owner group name (null-terminated). + pub gname: [u8; 32], + /// Device major number in octal ASCII (for special files). + pub devmajor: [u8; 8], + /// Device minor number in octal ASCII (for special files). + pub devminor: [u8; 8], + /// Access time in octal ASCII. + pub atime: [u8; 12], + /// Creation time in octal ASCII. + pub ctime: [u8; 12], + /// Offset for multivolume archives. + pub offset: [u8; 12], + /// Long names support (deprecated). + pub longnames: [u8; 4], + /// Unused padding byte. + pub unused: u8, + /// Sparse file chunk descriptors (4 entries). + pub sparse: [GnuSparseHeader; 4], + /// Flag indicating more sparse headers follow. + pub isextended: u8, + /// Real size of sparse file (uncompressed). + pub realsize: [u8; 12], + /// Padding to fill the 512-byte block. + pub pad: [u8; 17], +} + +impl Default for GnuHeader { + fn default() -> Self { + let mut header = Self { + name: [0u8; 100], + mode: [0u8; 8], + uid: [0u8; 8], + gid: [0u8; 8], + size: [0u8; 12], + mtime: [0u8; 12], + checksum: [0u8; 8], + typeflag: 0, + linkname: [0u8; 100], + magic: [0u8; 6], + version: [0u8; 2], + uname: [0u8; 32], + gname: [0u8; 32], + devmajor: [0u8; 8], + devminor: [0u8; 8], + atime: [0u8; 12], + ctime: [0u8; 12], + offset: [0u8; 12], + longnames: [0u8; 4], + unused: 0, + sparse: [GnuSparseHeader::default(); 4], + isextended: 0, + realsize: [0u8; 12], + pad: [0u8; 17], + }; + header.magic.copy_from_slice(GNU_MAGIC); + header.version.copy_from_slice(GNU_VERSION); + header + } +} + +impl fmt::Debug for GnuHeader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("GnuHeader") + .field("name", &String::from_utf8_lossy(truncate_null(&self.name))) + .field("mode", &String::from_utf8_lossy(truncate_null(&self.mode))) + .field("typeflag", &self.typeflag) + .field("magic", &self.magic) + .field("isextended", &self.isextended) + .finish_non_exhaustive() + } +} + +/// Extended sparse header block for GNU tar. +/// +/// When a file has more than 4 sparse regions, additional sparse headers +/// are stored in separate 512-byte blocks following the main header. +/// Each block contains 21 sparse descriptors plus an `isextended` flag. +#[derive(Clone, Copy, Default, FromBytes, IntoBytes, Immutable, KnownLayout)] +#[repr(C)] +pub struct GnuExtSparseHeader { + /// Sparse chunk descriptors (21 entries). + pub sparse: [GnuSparseHeader; 21], + /// Flag indicating more sparse headers follow. + pub isextended: u8, + /// Padding to fill the 512-byte block. + pub pad: [u8; 7], +} + +impl fmt::Debug for GnuExtSparseHeader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("GnuExtSparseHeader") + .field("isextended", &self.isextended) + .finish_non_exhaustive() + } +} + +// ============================================================================ +// Entry Type +// ============================================================================ + +/// Tar entry type indicating the kind of file system object. +/// +/// The type is stored as a single ASCII byte in the header. Some types +/// are extensions defined by POSIX or GNU tar. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum EntryType { + /// Regular file (type '0' or '\0' for old tar compatibility). + Regular, + /// Hard link to another file in the archive (type '1'). + Link, + /// Symbolic link (type '2'). + Symlink, + /// Character device (type '3'). + Char, + /// Block device (type '4'). + Block, + /// Directory (type '5'). + Directory, + /// FIFO/named pipe (type '6'). + Fifo, + /// Contiguous file (type '7', rarely used). + Continuous, + /// GNU tar long name extension (type 'L'). + GnuLongName, + /// GNU tar long link extension (type 'K'). + GnuLongLink, + /// GNU tar sparse file (type 'S'). + GnuSparse, + /// PAX extended header for next entry (type 'x'). + XHeader, + /// PAX global extended header (type 'g'). + XGlobalHeader, + /// Unknown or unsupported entry type. + Other(u8), +} + +impl EntryType { + /// Parse an entry type from a raw byte value. + #[must_use] + pub fn from_byte(byte: u8) -> Self { + match byte { + b'0' | b'\0' => EntryType::Regular, + b'1' => EntryType::Link, + b'2' => EntryType::Symlink, + b'3' => EntryType::Char, + b'4' => EntryType::Block, + b'5' => EntryType::Directory, + b'6' => EntryType::Fifo, + b'7' => EntryType::Continuous, + b'L' => EntryType::GnuLongName, + b'K' => EntryType::GnuLongLink, + b'S' => EntryType::GnuSparse, + b'x' => EntryType::XHeader, + b'g' => EntryType::XGlobalHeader, + other => EntryType::Other(other), + } + } + + /// Convert an entry type to its raw byte representation. + /// + /// Note that `Regular` is encoded as '0', not '\0'. + #[must_use] + pub fn to_byte(self) -> u8 { + match self { + EntryType::Regular => b'0', + EntryType::Link => b'1', + EntryType::Symlink => b'2', + EntryType::Char => b'3', + EntryType::Block => b'4', + EntryType::Directory => b'5', + EntryType::Fifo => b'6', + EntryType::Continuous => b'7', + EntryType::GnuLongName => b'L', + EntryType::GnuLongLink => b'K', + EntryType::GnuSparse => b'S', + EntryType::XHeader => b'x', + EntryType::XGlobalHeader => b'g', + EntryType::Other(b) => b, + } + } + + /// Returns true if this is a regular file entry. + #[must_use] + pub fn is_file(self) -> bool { + matches!(self, EntryType::Regular | EntryType::Continuous) + } + + /// Returns true if this is a directory entry. + #[must_use] + pub fn is_dir(self) -> bool { + self == EntryType::Directory + } + + /// Returns true if this is a symbolic link entry. + #[must_use] + pub fn is_symlink(self) -> bool { + self == EntryType::Symlink + } + + /// Returns true if this is a hard link entry. + #[must_use] + pub fn is_hard_link(self) -> bool { + self == EntryType::Link + } +} + +impl From for EntryType { + fn from(byte: u8) -> Self { + Self::from_byte(byte) + } +} + +impl From for u8 { + fn from(entry_type: EntryType) -> Self { + entry_type.to_byte() + } +} + +// ============================================================================ +// Header Wrapper +// ============================================================================ + +/// High-level tar header wrapper with accessor methods. +/// +/// This struct wraps a [`RawHeader`] and provides convenient methods for +/// accessing header fields, detecting the format, and verifying checksums. +/// +/// # Format Detection +/// +/// The format is detected by examining the magic field: +/// - UStar: magic = "ustar\0", version = "00" +/// - GNU: magic = "ustar ", version = " \0" +/// - Old: anything else +/// +/// # Example +/// +/// ``` +/// use tar_header::Header; +/// +/// let mut header = Header::new_ustar(); +/// assert!(header.is_ustar()); +/// assert!(!header.is_gnu()); +/// ``` +#[derive(Clone, Copy, FromBytes, Immutable, KnownLayout)] +#[repr(transparent)] +pub struct Header { + raw: RawHeader, +} + +impl Header { + /// Create a new header with UStar format magic and version. + #[must_use] + pub fn new_ustar() -> Self { + let mut header = Self { + raw: RawHeader::default(), + }; + // Set magic and version for UStar format + header.raw.bytes[257..263].copy_from_slice(USTAR_MAGIC); + header.raw.bytes[263..265].copy_from_slice(USTAR_VERSION); + header + } + + /// Create a new header with GNU tar format magic and version. + #[must_use] + pub fn new_gnu() -> Self { + let mut header = Self { + raw: RawHeader::default(), + }; + // Set magic and version for GNU format + header.raw.bytes[257..263].copy_from_slice(GNU_MAGIC); + header.raw.bytes[263..265].copy_from_slice(GNU_VERSION); + header + } + + /// Get a reference to the underlying bytes. + #[must_use] + pub fn as_bytes(&self) -> &[u8; 512] { + &self.raw.bytes + } + + /// Get a mutable reference to the underlying bytes. + pub fn as_mut_bytes(&mut self) -> &mut [u8; 512] { + &mut self.raw.bytes + } + + /// Parse a header from a byte slice. + /// + /// Returns a reference to the header if the slice is at least 512 bytes. + /// + /// # Errors + /// + /// Returns [`HeaderError::InsufficientData`] if the slice is too short. + pub fn from_bytes(bytes: &[u8]) -> Result<&Header> { + if bytes.len() < HEADER_SIZE { + return Err(HeaderError::InsufficientData(bytes.len())); + } + // SAFETY: Header is repr(transparent) over RawHeader, and we verify + // the slice is properly sized. zerocopy handles alignment. + let raw = RawHeader::ref_from_bytes(&bytes[..HEADER_SIZE]) + .map_err(|_| HeaderError::InsufficientData(bytes.len()))?; + // SAFETY: Header is #[repr(transparent)] over RawHeader + Ok(zerocopy::transmute_ref!(raw)) + } + + /// Parse from exactly 512 bytes without size checking. + /// + /// This is useful when you've already validated the buffer size. + #[must_use] + pub fn from_bytes_exact(bytes: &[u8; 512]) -> &Header { + // SAFETY: Header is repr(transparent) over RawHeader which is + // repr(C) with a [u8; 512] field. Both have the same layout. + let raw = RawHeader::ref_from_bytes(bytes).expect("size is correct"); + zerocopy::transmute_ref!(raw) + } + + /// View this header as an old-style header. + #[must_use] + pub fn as_old(&self) -> &OldHeader { + OldHeader::ref_from_bytes(&self.raw.bytes).expect("size is correct") + } + + /// View this header as a UStar header. + #[must_use] + pub fn as_ustar(&self) -> &UstarHeader { + UstarHeader::ref_from_bytes(&self.raw.bytes).expect("size is correct") + } + + /// View this header as a GNU header. + #[must_use] + pub fn as_gnu(&self) -> &GnuHeader { + GnuHeader::ref_from_bytes(&self.raw.bytes).expect("size is correct") + } + + /// Check if this header uses UStar format. + #[must_use] + pub fn is_ustar(&self) -> bool { + self.raw.bytes[257..263] == *USTAR_MAGIC && self.raw.bytes[263..265] == *USTAR_VERSION + } + + /// Check if this header uses GNU tar format. + #[must_use] + pub fn is_gnu(&self) -> bool { + self.raw.bytes[257..263] == *GNU_MAGIC && self.raw.bytes[263..265] == *GNU_VERSION + } + + /// Get the entry type. + #[must_use] + pub fn entry_type(&self) -> EntryType { + EntryType::from_byte(self.raw.bytes[156]) + } + + /// Get the entry size (file content length) in bytes. + /// + /// # Errors + /// + /// Returns [`HeaderError::InvalidOctal`] if the size field is not valid. + pub fn entry_size(&self) -> Result { + parse_numeric(&self.raw.bytes[124..136]) + } + + /// Get the file mode (permissions). + /// + /// # Errors + /// + /// Returns [`HeaderError::InvalidOctal`] if the mode field is not valid. + pub fn mode(&self) -> Result { + parse_numeric(&self.raw.bytes[100..108]).map(|v| v as u32) + } + + /// Get the owner user ID. + /// + /// # Errors + /// + /// Returns [`HeaderError::InvalidOctal`] if the uid field is not valid. + pub fn uid(&self) -> Result { + parse_numeric(&self.raw.bytes[108..116]) + } + + /// Get the owner group ID. + /// + /// # Errors + /// + /// Returns [`HeaderError::InvalidOctal`] if the gid field is not valid. + pub fn gid(&self) -> Result { + parse_numeric(&self.raw.bytes[116..124]) + } + + /// Get the modification time as a Unix timestamp. + /// + /// # Errors + /// + /// Returns [`HeaderError::InvalidOctal`] if the mtime field is not valid. + pub fn mtime(&self) -> Result { + parse_numeric(&self.raw.bytes[136..148]) + } + + /// Get the raw path bytes from the header. + /// + /// This returns only the name field (bytes 0..100). For UStar format, + /// the prefix field (bytes 345..500) may also contain path components + /// that should be prepended. + #[must_use] + pub fn path_bytes(&self) -> &[u8] { + truncate_null(&self.raw.bytes[0..100]) + } + + /// Get the raw link name bytes. + #[must_use] + pub fn link_name_bytes(&self) -> &[u8] { + truncate_null(&self.raw.bytes[157..257]) + } + + /// Get the device major number (for character/block devices). + /// + /// Returns `None` for old-style headers without device fields. + /// + /// # Errors + /// + /// Returns [`HeaderError::InvalidOctal`] if the field is not valid octal. + pub fn device_major(&self) -> Result> { + if !self.is_ustar() && !self.is_gnu() { + return Ok(None); + } + parse_octal(&self.raw.bytes[329..337]).map(|v| Some(v as u32)) + } + + /// Get the device minor number (for character/block devices). + /// + /// Returns `None` for old-style headers without device fields. + /// + /// # Errors + /// + /// Returns [`HeaderError::InvalidOctal`] if the field is not valid octal. + pub fn device_minor(&self) -> Result> { + if !self.is_ustar() && !self.is_gnu() { + return Ok(None); + } + parse_octal(&self.raw.bytes[337..345]).map(|v| Some(v as u32)) + } + + /// Get the owner user name. + /// + /// Returns `None` for old-style headers without user/group name fields. + #[must_use] + pub fn username(&self) -> Option<&[u8]> { + if !self.is_ustar() && !self.is_gnu() { + return None; + } + Some(truncate_null(&self.raw.bytes[265..297])) + } + + /// Get the owner group name. + /// + /// Returns `None` for old-style headers without user/group name fields. + #[must_use] + pub fn groupname(&self) -> Option<&[u8]> { + if !self.is_ustar() && !self.is_gnu() { + return None; + } + Some(truncate_null(&self.raw.bytes[297..329])) + } + + /// Get the UStar prefix field for long paths. + /// + /// Returns `None` for old-style or GNU headers. + #[must_use] + pub fn prefix(&self) -> Option<&[u8]> { + if !self.is_ustar() { + return None; + } + Some(truncate_null(&self.raw.bytes[345..500])) + } + + /// Verify the header checksum. + /// + /// The checksum is computed as the unsigned sum of all header bytes, + /// treating the checksum field (bytes 148..156) as spaces. + /// + /// # Errors + /// + /// Returns [`HeaderError::ChecksumMismatch`] if the checksum is invalid, + /// or [`HeaderError::InvalidOctal`] if the stored checksum cannot be parsed. + pub fn verify_checksum(&self) -> Result<()> { + let expected = parse_octal(&self.raw.bytes[148..156])?; + let computed = self.compute_checksum(); + if expected == computed { + Ok(()) + } else { + Err(HeaderError::ChecksumMismatch { expected, computed }) + } + } + + /// Compute the header checksum. + /// + /// This computes the unsigned sum of all header bytes, treating the + /// checksum field (bytes 148..156) as spaces (0x20). + #[must_use] + pub fn compute_checksum(&self) -> u64 { + let mut sum: u64 = 0; + for (i, &byte) in self.raw.bytes.iter().enumerate() { + if (148..156).contains(&i) { + // Treat checksum field as spaces + sum += u64::from(b' '); + } else { + sum += u64::from(byte); + } + } + sum + } + + /// Check if this header represents an empty block (all zeros). + /// + /// Two consecutive empty blocks mark the end of a tar archive. + #[must_use] + pub fn is_empty(&self) -> bool { + self.raw.bytes.iter().all(|&b| b == 0) + } +} + +impl Default for Header { + fn default() -> Self { + Self::new_ustar() + } +} + +impl fmt::Debug for Header { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Header") + .field("path", &String::from_utf8_lossy(self.path_bytes())) + .field("entry_type", &self.entry_type()) + .field("size", &self.entry_size().ok()) + .field("mode", &self.mode().ok().map(|m| format!("{m:04o}"))) + .field("is_ustar", &self.is_ustar()) + .field("is_gnu", &self.is_gnu()) + .finish() + } +} + +// ============================================================================ +// Helper Functions +// ============================================================================ + +/// Parse an octal ASCII field into a u64. +/// +/// Octal fields in tar headers are ASCII strings with optional leading +/// spaces and trailing spaces or null bytes. For example: +/// - `"0000644\0"` -> 420 (file mode 0644) +/// - `" 123 "` -> 83 +/// +/// # Errors +/// +/// Returns [`HeaderError::InvalidOctal`] if the field contains invalid +/// characters (anything other than spaces, digits 0-7, or null bytes). +pub fn parse_octal(bytes: &[u8]) -> Result { + // Skip leading spaces + let start = bytes.iter().position(|&b| b != b' ').unwrap_or(bytes.len()); + // Find end (first space or null after digits) + let end = bytes[start..] + .iter() + .position(|&b| b == b' ' || b == b'\0') + .map_or(bytes.len(), |i| start + i); + + let trimmed = &bytes[start..end]; + + if trimmed.is_empty() { + return Ok(0); + } + + // Parse the octal string + let mut value: u64 = 0; + for &byte in trimmed { + if !byte.is_ascii_digit() || byte > b'7' { + return Err(HeaderError::InvalidOctal(bytes.to_vec())); + } + value = value + .checked_mul(8) + .and_then(|v| v.checked_add(u64::from(byte - b'0'))) + .ok_or_else(|| HeaderError::InvalidOctal(bytes.to_vec()))?; + } + + Ok(value) +} + +/// Parse a numeric field that may be octal ASCII or GNU base-256 encoded. +/// +/// GNU tar uses base-256 encoding for values that don't fit in octal. +/// When the high bit of the first byte is set (0x80), the value is stored +/// as big-endian binary in the remaining bytes. Otherwise, it's parsed as +/// octal ASCII. +/// +/// # Errors +/// +/// Returns [`HeaderError::InvalidOctal`] if octal parsing fails. +pub fn parse_numeric(bytes: &[u8]) -> Result { + if bytes.is_empty() { + return Ok(0); + } + + // Check for GNU base-256 encoding (high bit set) + if bytes[0] & 0x80 != 0 { + // Base-256: interpret remaining bytes as big-endian, masking off the + // high bit of the first byte + let mut value: u64 = 0; + for (i, &byte) in bytes.iter().enumerate() { + let b = if i == 0 { byte & 0x7f } else { byte }; + value = value + .checked_shl(8) + .and_then(|v| v.checked_add(u64::from(b))) + .ok_or_else(|| HeaderError::InvalidOctal(bytes.to_vec()))?; + } + Ok(value) + } else { + // Standard octal ASCII + parse_octal(bytes) + } +} + +/// Truncate a byte slice at the first null byte. +/// +/// This is used to extract null-terminated strings from fixed-size fields. +/// If no null byte is found, returns the entire slice. +/// +/// # Example +/// +/// ``` +/// use tar_header::truncate_null; +/// +/// assert_eq!(truncate_null(b"hello\0world"), b"hello"); +/// assert_eq!(truncate_null(b"no null here"), b"no null here"); +/// assert_eq!(truncate_null(b"\0empty"), b""); +/// ``` +#[must_use] +pub fn truncate_null(bytes: &[u8]) -> &[u8] { + match bytes.iter().position(|&b| b == 0) { + Some(pos) => &bytes[..pos], + None => bytes, + } +} + +// ============================================================================ +// PAX Extended Headers +// ============================================================================ + +/// PAX extended header key for the file path. +pub const PAX_PATH: &str = "path"; +/// PAX extended header key for the link target path. +pub const PAX_LINKPATH: &str = "linkpath"; +/// PAX extended header key for file size. +pub const PAX_SIZE: &str = "size"; +/// PAX extended header key for owner user ID. +pub const PAX_UID: &str = "uid"; +/// PAX extended header key for owner group ID. +pub const PAX_GID: &str = "gid"; +/// PAX extended header key for owner user name. +pub const PAX_UNAME: &str = "uname"; +/// PAX extended header key for owner group name. +pub const PAX_GNAME: &str = "gname"; +/// PAX extended header key for modification time. +pub const PAX_MTIME: &str = "mtime"; +/// PAX extended header key for access time. +pub const PAX_ATIME: &str = "atime"; +/// PAX extended header key for change time. +pub const PAX_CTIME: &str = "ctime"; +/// PAX extended header prefix for SCHILY extended attributes. +pub const PAX_SCHILY_XATTR: &str = "SCHILY.xattr."; + +/// PAX extended header prefix for GNU sparse file extensions. +pub const PAX_GNU_SPARSE: &str = "GNU.sparse."; +/// PAX key for GNU sparse file number of blocks. +pub const PAX_GNU_SPARSE_NUMBLOCKS: &str = "GNU.sparse.numblocks"; +/// PAX key for GNU sparse file offset. +pub const PAX_GNU_SPARSE_OFFSET: &str = "GNU.sparse.offset"; +/// PAX key for GNU sparse file numbytes. +pub const PAX_GNU_SPARSE_NUMBYTES: &str = "GNU.sparse.numbytes"; +/// PAX key for GNU sparse file map. +pub const PAX_GNU_SPARSE_MAP: &str = "GNU.sparse.map"; +/// PAX key for GNU sparse file name. +pub const PAX_GNU_SPARSE_NAME: &str = "GNU.sparse.name"; +/// PAX key for GNU sparse file format major version. +pub const PAX_GNU_SPARSE_MAJOR: &str = "GNU.sparse.major"; +/// PAX key for GNU sparse file format minor version. +pub const PAX_GNU_SPARSE_MINOR: &str = "GNU.sparse.minor"; +/// PAX key for GNU sparse file size. +pub const PAX_GNU_SPARSE_SIZE: &str = "GNU.sparse.size"; +/// PAX key for GNU sparse file real size. +pub const PAX_GNU_SPARSE_REALSIZE: &str = "GNU.sparse.realsize"; + +/// Error parsing a PAX extension record. +#[derive(Debug, Error)] +pub enum PaxError { + /// The record format is malformed. + #[error("malformed PAX extension record")] + Malformed, + /// The key is not valid UTF-8. + #[error("PAX key is not valid UTF-8: {0}")] + InvalidKey(#[from] std::str::Utf8Error), +} + +/// A single PAX extended header key/value pair. +#[derive(Debug, Clone)] +pub struct PaxExtension<'a> { + key: &'a [u8], + value: &'a [u8], +} + +impl<'a> PaxExtension<'a> { + /// Returns the key as a string. + /// + /// # Errors + /// + /// Returns an error if the key is not valid UTF-8. + pub fn key(&self) -> std::result::Result<&'a str, std::str::Utf8Error> { + std::str::from_utf8(self.key) + } + + /// Returns the raw key bytes. + #[must_use] + pub fn key_bytes(&self) -> &'a [u8] { + self.key + } + + /// Returns the value as a string. + /// + /// # Errors + /// + /// Returns an error if the value is not valid UTF-8. + pub fn value(&self) -> std::result::Result<&'a str, std::str::Utf8Error> { + std::str::from_utf8(self.value) + } + + /// Returns the raw value bytes. + #[must_use] + pub fn value_bytes(&self) -> &'a [u8] { + self.value + } +} + +/// Iterator over PAX extended header records. +/// +/// PAX extended headers consist of records in the format: +/// ` =\n` +/// +/// where `` is the total record length including the length field itself. +/// +/// # Example +/// +/// ``` +/// use tar_header::PaxExtensions; +/// +/// let data = b"20 path=foo/bar.txt\n"; +/// let mut iter = PaxExtensions::new(data); +/// let ext = iter.next().unwrap().unwrap(); +/// assert_eq!(ext.key().unwrap(), "path"); +/// assert_eq!(ext.value().unwrap(), "foo/bar.txt"); +/// ``` +#[derive(Debug)] +pub struct PaxExtensions<'a> { + data: &'a [u8], +} + +impl<'a> PaxExtensions<'a> { + /// Create a new iterator over PAX extension records. + #[must_use] + pub fn new(data: &'a [u8]) -> Self { + Self { data } + } + + /// Look up a specific key and return its value as a string. + /// + /// Returns `None` if the key is not found or if parsing fails. + #[must_use] + pub fn get(&self, key: &str) -> Option<&'a str> { + for ext in PaxExtensions::new(self.data).flatten() { + if ext.key().ok() == Some(key) { + return ext.value().ok(); + } + } + None + } + + /// Look up a specific key and parse its value as u64. + /// + /// Returns `None` if the key is not found, parsing fails, or the value + /// is not a valid integer. + #[must_use] + pub fn get_u64(&self, key: &str) -> Option { + self.get(key).and_then(|v| v.parse().ok()) + } +} + +impl<'a> Iterator for PaxExtensions<'a> { + type Item = std::result::Result, PaxError>; + + fn next(&mut self) -> Option { + if self.data.is_empty() { + return None; + } + + // Find the space separating length from key=value + let space_pos = self.data.iter().position(|&b| b == b' ')?; + + // Parse the length + let len_str = std::str::from_utf8(&self.data[..space_pos]).ok()?; + let len: usize = len_str.parse().ok()?; + + // Validate we have enough data + if len > self.data.len() || len < space_pos + 2 { + return Some(Err(PaxError::Malformed)); + } + + // The record should end with newline + if self.data.get(len.saturating_sub(1)) != Some(&b'\n') { + return Some(Err(PaxError::Malformed)); + } + + // Extract key=value (excluding length prefix and trailing newline) + let kv = &self.data[space_pos + 1..len - 1]; + + // Find the equals sign + let eq_pos = match kv.iter().position(|&b| b == b'=') { + Some(pos) => pos, + None => return Some(Err(PaxError::Malformed)), + }; + + let key = &kv[..eq_pos]; + let value = &kv[eq_pos + 1..]; + + // Advance past this record + self.data = &self.data[len..]; + + Some(Ok(PaxExtension { key, value })) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_header_size() { + assert_eq!(size_of::(), HEADER_SIZE); + assert_eq!(size_of::(), HEADER_SIZE); + assert_eq!(size_of::(), HEADER_SIZE); + assert_eq!(size_of::(), HEADER_SIZE); + assert_eq!(size_of::(), HEADER_SIZE); + assert_eq!(size_of::
(), HEADER_SIZE); + } + + #[test] + fn test_sparse_header_size() { + // Each sparse header is 24 bytes (12 + 12) + assert_eq!(size_of::(), 24); + // Extended sparse: 21 * 24 + 1 + 7 = 512 + assert_eq!(21 * 24 + 1 + 7, HEADER_SIZE); + } + + #[test] + fn test_new_ustar() { + let header = Header::new_ustar(); + assert!(header.is_ustar()); + assert!(!header.is_gnu()); + } + + #[test] + fn test_new_gnu() { + let header = Header::new_gnu(); + assert!(header.is_gnu()); + assert!(!header.is_ustar()); + } + + #[test] + fn test_from_bytes_insufficient() { + let short = [0u8; 100]; + let result = Header::from_bytes(&short); + assert!(matches!(result, Err(HeaderError::InsufficientData(100)))); + } + + #[test] + fn test_from_bytes_success() { + let mut data = [0u8; 512]; + // Set up a valid UStar header + data[257..263].copy_from_slice(USTAR_MAGIC); + data[263..265].copy_from_slice(USTAR_VERSION); + + let header = Header::from_bytes(&data).unwrap(); + assert!(header.is_ustar()); + } + + #[test] + fn test_parse_octal() { + assert_eq!(parse_octal(b"0000644\0").unwrap(), 0o644); + assert_eq!(parse_octal(b"0000755\0").unwrap(), 0o755); + assert_eq!(parse_octal(b" 123 ").unwrap(), 0o123); + assert_eq!(parse_octal(b"0").unwrap(), 0); + assert_eq!(parse_octal(b"").unwrap(), 0); + assert_eq!(parse_octal(b" \0\0\0").unwrap(), 0); + assert_eq!(parse_octal(b"77777777777").unwrap(), 0o77777777777); + } + + #[test] + fn test_parse_octal_invalid() { + assert!(parse_octal(b"abc").is_err()); + assert!(parse_octal(b"128").is_err()); // 8 and 9 are not octal + } + + #[test] + fn test_truncate_null() { + assert_eq!(truncate_null(b"hello\0world"), b"hello"); + assert_eq!(truncate_null(b"no null"), b"no null"); + assert_eq!(truncate_null(b"\0start"), b""); + assert_eq!(truncate_null(b""), b""); + } + + #[test] + fn test_entry_type_roundtrip() { + let types = [ + EntryType::Regular, + EntryType::Link, + EntryType::Symlink, + EntryType::Char, + EntryType::Block, + EntryType::Directory, + EntryType::Fifo, + EntryType::Continuous, + EntryType::GnuLongName, + EntryType::GnuLongLink, + EntryType::GnuSparse, + EntryType::XHeader, + EntryType::XGlobalHeader, + ]; + + for t in types { + let byte = t.to_byte(); + let parsed = EntryType::from_byte(byte); + assert_eq!(parsed, t); + } + } + + #[test] + fn test_entry_type_old_regular() { + // Old tar uses '\0' for regular files + assert_eq!(EntryType::from_byte(b'\0'), EntryType::Regular); + assert_eq!(EntryType::from_byte(b'0'), EntryType::Regular); + } + + #[test] + fn test_entry_type_predicates() { + assert!(EntryType::Regular.is_file()); + assert!(EntryType::Continuous.is_file()); + assert!(!EntryType::Directory.is_file()); + + assert!(EntryType::Directory.is_dir()); + assert!(!EntryType::Regular.is_dir()); + + assert!(EntryType::Symlink.is_symlink()); + assert!(EntryType::Link.is_hard_link()); + } + + #[test] + fn test_checksum_empty_header() { + let header = Header::new_ustar(); + // Computed checksum should be consistent + let checksum = header.compute_checksum(); + // For an empty header with only magic/version set, checksum includes: + // - 148 spaces (0x20) for checksum field = 148 * 32 = 4736 + // - "ustar\0" = 117+115+116+97+114+0 = 559 + // - "00" = 48+48 = 96 + // - Rest are zeros + assert!(checksum > 0); + } + + #[test] + fn test_is_empty() { + let mut header = Header::new_ustar(); + assert!(!header.is_empty()); + + // Create truly empty header + header.as_mut_bytes().fill(0); + assert!(header.is_empty()); + } + + #[test] + fn test_as_format_views() { + let header = Header::new_ustar(); + + // All views should work without panicking + let _old = header.as_old(); + let _ustar = header.as_ustar(); + let _gnu = header.as_gnu(); + } + + #[test] + fn test_ustar_default_magic() { + let ustar = UstarHeader::default(); + assert_eq!(&ustar.magic, USTAR_MAGIC); + assert_eq!(&ustar.version, USTAR_VERSION); + } + + #[test] + fn test_gnu_default_magic() { + let gnu = GnuHeader::default(); + assert_eq!(&gnu.magic, GNU_MAGIC); + assert_eq!(&gnu.version, GNU_VERSION); + } + + #[test] + fn test_path_bytes() { + let mut header = Header::new_ustar(); + header.as_mut_bytes()[0..5].copy_from_slice(b"hello"); + assert_eq!(header.path_bytes(), b"hello"); + } + + #[test] + fn test_link_name_bytes() { + let mut header = Header::new_ustar(); + header.as_mut_bytes()[157..163].copy_from_slice(b"target"); + assert_eq!(header.link_name_bytes(), b"target"); + } + + #[test] + fn test_username_groupname() { + let header = Header::new_ustar(); + assert!(header.username().is_some()); + assert!(header.groupname().is_some()); + + // Old-style header should return None + let mut old_header = Header::new_ustar(); + old_header.as_mut_bytes()[257..265].fill(0); + assert!(old_header.username().is_none()); + assert!(old_header.groupname().is_none()); + } + + #[test] + fn test_prefix() { + let header = Header::new_ustar(); + assert!(header.prefix().is_some()); + + let gnu_header = Header::new_gnu(); + // GNU format doesn't use prefix the same way + assert!(gnu_header.prefix().is_none()); + } + + #[test] + fn test_device_numbers() { + let header = Header::new_ustar(); + assert!(header.device_major().unwrap().is_some()); + assert!(header.device_minor().unwrap().is_some()); + + // Old-style header should return None + let mut old_header = Header::new_ustar(); + old_header.as_mut_bytes()[257..265].fill(0); + assert!(old_header.device_major().unwrap().is_none()); + assert!(old_header.device_minor().unwrap().is_none()); + } + + #[test] + fn test_debug_impls() { + // Just ensure Debug impls don't panic + let header = Header::new_ustar(); + let _ = format!("{header:?}"); + let _ = format!("{:?}", header.as_old()); + let _ = format!("{:?}", header.as_ustar()); + let _ = format!("{:?}", header.as_gnu()); + let _ = format!("{:?}", GnuExtSparseHeader::default()); + let _ = format!("{:?}", GnuSparseHeader::default()); + let _ = format!("{:?}", RawHeader::default()); + } + + #[test] + fn test_parse_numeric_octal() { + // parse_numeric should handle octal just like parse_octal + assert_eq!(parse_numeric(b"0000644\0").unwrap(), 0o644); + assert_eq!(parse_numeric(b"0000755\0").unwrap(), 0o755); + assert_eq!(parse_numeric(b" 123 ").unwrap(), 0o123); + assert_eq!(parse_numeric(b"").unwrap(), 0); + } + + #[test] + fn test_parse_numeric_base256() { + // Base-256 encoding: high bit set, remaining bytes are big-endian value + // 0x80 0x00 0x00 0x01 = 1 (with marker bit in first byte) + assert_eq!(parse_numeric(&[0x80, 0x00, 0x00, 0x01]).unwrap(), 1); + + // 0x80 0x00 0x01 0x00 = 256 + assert_eq!(parse_numeric(&[0x80, 0x00, 0x01, 0x00]).unwrap(), 256); + + // 0x80 0xFF = 255 (first byte 0x80 & 0x7f = 0, second byte 0xFF = 255) + assert_eq!(parse_numeric(&[0x80, 0xFF]).unwrap(), 255); + + // Larger value: 0x80 0x00 0x00 0x00 0x00 0x00 0x01 0x00 0x00 0x00 0x00 0x00 + // = 2^40 = 1099511627776 + let bytes = [ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + assert_eq!(parse_numeric(&bytes).unwrap(), 1099511627776); + } + + #[test] + fn test_parse_numeric_base256_in_header() { + // Test that base-256 encoded size field works in Header + let mut header = Header::new_ustar(); + + // Set size field (bytes 124..136) to base-256 encoded value + // 12-byte field: first byte has 0x80 marker, remaining 11 bytes are the value + // We want to encode a large value that wouldn't fit in octal + let size_field = &mut header.as_mut_bytes()[124..136]; + size_field.fill(0); + size_field[0] = 0x80; // base-256 marker (first byte & 0x7f = 0) + // Put value in last 4 bytes for simplicity: 0x12345678 + size_field[8] = 0x12; + size_field[9] = 0x34; + size_field[10] = 0x56; + size_field[11] = 0x78; + + assert_eq!(header.entry_size().unwrap(), 0x12345678); + } + + #[test] + fn test_parse_numeric_base256_uid_gid() { + let mut header = Header::new_ustar(); + + // Set uid field (bytes 108..116) to base-256 encoded value + let uid_field = &mut header.as_mut_bytes()[108..116]; + uid_field.fill(0); + uid_field[0] = 0x80; // base-256 marker + uid_field[7] = 0x42; // value = 66 + assert_eq!(header.uid().unwrap(), 66); + + // Set gid field (bytes 116..124) to base-256 encoded value + let gid_field = &mut header.as_mut_bytes()[116..124]; + gid_field.fill(0); + gid_field[0] = 0x80; // base-256 marker + gid_field[6] = 0x01; + gid_field[7] = 0x00; // value = 256 + assert_eq!(header.gid().unwrap(), 256); + } + + #[test] + fn test_parse_octal_edge_cases() { + // All spaces should return 0 + assert_eq!(parse_octal(b" ").unwrap(), 0); + + // All nulls should return 0 + assert_eq!(parse_octal(b"\0\0\0\0\0\0").unwrap(), 0); + + // Mixed spaces and nulls + assert_eq!(parse_octal(b" \0\0\0").unwrap(), 0); + + // Value at very end with trailing null + assert_eq!(parse_octal(b" 7\0").unwrap(), 7); + + // Value with no trailing delimiter (field fills entire space) + assert_eq!(parse_octal(b"0000755").unwrap(), 0o755); + + // Single digit + assert_eq!(parse_octal(b"7").unwrap(), 7); + + // Leading zeros + assert_eq!(parse_octal(b"00000001").unwrap(), 1); + + // Max value that fits in 11 octal digits (typical for 12-byte fields) + assert_eq!(parse_octal(b"77777777777\0").unwrap(), 0o77777777777); + } + + #[test] + fn test_from_bytes_exact() { + let mut data = [0u8; 512]; + // Set up a valid UStar header + data[257..263].copy_from_slice(USTAR_MAGIC); + data[263..265].copy_from_slice(USTAR_VERSION); + data[0..4].copy_from_slice(b"test"); + + let header = Header::from_bytes_exact(&data); + assert!(header.is_ustar()); + assert_eq!(header.path_bytes(), b"test"); + } + + #[test] + fn test_from_bytes_exact_gnu() { + let mut data = [0u8; 512]; + data[257..263].copy_from_slice(GNU_MAGIC); + data[263..265].copy_from_slice(GNU_VERSION); + + let header = Header::from_bytes_exact(&data); + assert!(header.is_gnu()); + assert!(!header.is_ustar()); + } + + // ========================================================================= + // PAX Extension Tests + // ========================================================================= + + #[test] + fn test_pax_simple() { + let data = b"20 path=foo/bar.txt\n"; + let mut iter = PaxExtensions::new(data); + let ext = iter.next().unwrap().unwrap(); + assert_eq!(ext.key().unwrap(), "path"); + assert_eq!(ext.value().unwrap(), "foo/bar.txt"); + assert!(iter.next().is_none()); + } + + #[test] + fn test_pax_multiple() { + let data = b"20 path=foo/bar.txt\n12 uid=1000\n12 gid=1000\n"; + let exts: Vec<_> = PaxExtensions::new(data).collect(); + assert_eq!(exts.len(), 3); + assert_eq!(exts[0].as_ref().unwrap().key().unwrap(), "path"); + assert_eq!(exts[0].as_ref().unwrap().value().unwrap(), "foo/bar.txt"); + assert_eq!(exts[1].as_ref().unwrap().key().unwrap(), "uid"); + assert_eq!(exts[1].as_ref().unwrap().value().unwrap(), "1000"); + assert_eq!(exts[2].as_ref().unwrap().key().unwrap(), "gid"); + assert_eq!(exts[2].as_ref().unwrap().value().unwrap(), "1000"); + } + + #[test] + fn test_pax_get() { + let data = b"20 path=foo/bar.txt\n12 uid=1000\n"; + let pax = PaxExtensions::new(data); + assert_eq!(pax.get("path"), Some("foo/bar.txt")); + assert_eq!(pax.get("uid"), Some("1000")); + assert_eq!(pax.get("missing"), None); + } + + #[test] + fn test_pax_get_u64() { + let data = b"12 uid=1000\n16 size=1234567\n"; + let pax = PaxExtensions::new(data); + assert_eq!(pax.get_u64("uid"), Some(1000)); + assert_eq!(pax.get_u64("size"), Some(1234567)); + assert_eq!(pax.get_u64("missing"), None); + } + + #[test] + fn test_pax_empty() { + let data = b""; + let mut iter = PaxExtensions::new(data); + assert!(iter.next().is_none()); + } + + #[test] + fn test_pax_binary_value() { + // PAX values can contain binary data (e.g., xattrs) + // Format: " =\n" where len includes everything + // 24 = 2 (digits) + 1 (space) + 16 (key) + 1 (=) + 3 (value) + 1 (newline) + let data = b"24 SCHILY.xattr.foo=\x00\x01\x02\n"; + let mut iter = PaxExtensions::new(data); + let ext = iter.next().unwrap().unwrap(); + assert_eq!(ext.key().unwrap(), "SCHILY.xattr.foo"); + assert_eq!(ext.value_bytes(), b"\x00\x01\x02"); + } + + #[test] + fn test_pax_long_path() { + // Test a path that's exactly at various boundary lengths + let long_path = "a".repeat(200); + // PAX format: "length path=value\n" where length includes ALL bytes including itself + // For 200-char path: 5 (path=) + 1 (\n) + 200 (value) + 1 (space) + 3 (length digits) = 210 + let record = format!("210 path={}\n", long_path); + let data = record.as_bytes(); + let pax = PaxExtensions::new(data); + assert_eq!(pax.get("path"), Some(long_path.as_str())); + } + + #[test] + fn test_pax_unicode_path() { + // PAX supports UTF-8 paths + let data = "35 path=日本語/ファイル.txt\n".as_bytes(); + let pax = PaxExtensions::new(data); + assert_eq!(pax.get("path"), Some("日本語/ファイル.txt")); + } + + #[test] + fn test_pax_mtime_fractional() { + // PAX mtime can have fractional seconds + let data = b"22 mtime=1234567890.5\n"; + let pax = PaxExtensions::new(data); + assert_eq!(pax.get("mtime"), Some("1234567890.5")); + // get_u64 won't parse fractional + assert_eq!(pax.get_u64("mtime"), None); + } + + #[test] + fn test_pax_schily_xattr() { + let data = b"30 SCHILY.xattr.user.test=val\n"; + let mut iter = PaxExtensions::new(data); + let ext = iter.next().unwrap().unwrap(); + let key = ext.key().unwrap(); + assert!(key.starts_with(PAX_SCHILY_XATTR)); + assert_eq!(&key[PAX_SCHILY_XATTR.len()..], "user.test"); + } + + #[test] + fn test_pax_malformed_no_equals() { + let data = b"15 pathfoobar\n"; + let mut iter = PaxExtensions::new(data); + let result = iter.next().unwrap(); + assert!(result.is_err()); + } + + #[test] + fn test_pax_malformed_wrong_length() { + // Length says 100 but record is shorter + let data = b"100 path=foo\n"; + let mut iter = PaxExtensions::new(data); + let result = iter.next().unwrap(); + assert!(result.is_err()); + } + + // ========================================================================= + // Edge Case Tests + // ========================================================================= + + #[test] + fn test_path_exactly_100_bytes() { + // Path that fills entire name field (no null terminator needed) + let mut header = Header::new_ustar(); + let path = "a".repeat(100); + header.as_mut_bytes()[0..100].copy_from_slice(path.as_bytes()); + + assert_eq!(header.path_bytes().len(), 100); + assert_eq!(header.path_bytes(), path.as_bytes()); + } + + #[test] + fn test_link_name_exactly_100_bytes() { + let mut header = Header::new_ustar(); + let target = "t".repeat(100); + header.as_mut_bytes()[157..257].copy_from_slice(target.as_bytes()); + + assert_eq!(header.link_name_bytes().len(), 100); + assert_eq!(header.link_name_bytes(), target.as_bytes()); + } + + #[test] + fn test_prefix_exactly_155_bytes() { + let mut header = Header::new_ustar(); + let prefix = "p".repeat(155); + header.as_mut_bytes()[345..500].copy_from_slice(prefix.as_bytes()); + + assert_eq!(header.prefix().unwrap().len(), 155); + assert_eq!(header.prefix().unwrap(), prefix.as_bytes()); + } + + #[test] + fn test_sparse_header_parsing() { + let header = Header::new_gnu(); + let gnu = header.as_gnu(); + + // Default sparse headers should have zero offset and numbytes + for sparse in &gnu.sparse { + assert_eq!(parse_octal(&sparse.offset).unwrap(), 0); + assert_eq!(parse_octal(&sparse.numbytes).unwrap(), 0); + } + } + + #[test] + fn test_gnu_atime_ctime() { + let mut header = Header::new_gnu(); + let gnu = header.as_gnu(); + + // Default should be zeros + assert_eq!(parse_octal(&gnu.atime).unwrap(), 0); + assert_eq!(parse_octal(&gnu.ctime).unwrap(), 0); + + // Set some values (valid octal: 12345670123) + header.as_mut_bytes()[345..356].copy_from_slice(b"12345670123"); + let gnu = header.as_gnu(); + assert_eq!(parse_octal(&gnu.atime).unwrap(), 0o12345670123); + } + + #[test] + fn test_ext_sparse_header() { + let ext = GnuExtSparseHeader::default(); + assert_eq!(ext.isextended, 0); + assert_eq!(ext.sparse.len(), 21); + + // Verify size is exactly 512 bytes + assert_eq!(size_of::(), HEADER_SIZE); + } + + #[test] + fn test_max_octal_values() { + // 12-byte field max (11 octal digits + null) + assert_eq!(parse_octal(b"77777777777\0").unwrap(), 0o77777777777); + + // 8-byte field max (7 octal digits + null) + assert_eq!(parse_octal(b"7777777\0").unwrap(), 0o7777777); + } + + #[test] + fn test_base256_max_values() { + // Large UID that needs base-256 + let mut bytes = [0u8; 8]; + bytes[0] = 0x80; // marker + bytes[4] = 0xFF; + bytes[5] = 0xFF; + bytes[6] = 0xFF; + bytes[7] = 0xFF; + assert_eq!(parse_numeric(&bytes).unwrap(), 0xFFFFFFFF); + } + + #[test] + fn test_entry_type_gnu_extensions() { + // GNU long name/link types + assert!(matches!(EntryType::from_byte(b'L'), EntryType::GnuLongName)); + assert!(matches!(EntryType::from_byte(b'K'), EntryType::GnuLongLink)); + assert!(matches!(EntryType::from_byte(b'S'), EntryType::GnuSparse)); + } + + #[test] + fn test_entry_type_pax() { + assert!(matches!(EntryType::from_byte(b'x'), EntryType::XHeader)); + assert!(matches!( + EntryType::from_byte(b'g'), + EntryType::XGlobalHeader + )); + } + + /// Cross-checking tests against the `tar` crate using proptest. + mod proptest_tests { + use super::*; + use proptest::prelude::*; + use std::io::Cursor; + + /// Strategy for generating valid file paths (ASCII, no null bytes, reasonable length). + fn path_strategy() -> impl Strategy { + proptest::string::string_regex( + "[a-zA-Z0-9_][a-zA-Z0-9_.+-]*(/[a-zA-Z0-9_][a-zA-Z0-9_.+-]*)*", + ) + .expect("valid regex") + .prop_filter("reasonable length", |s| !s.is_empty() && s.len() < 100) + } + + /// Strategy for generating valid link targets. + /// Avoids consecutive slashes and `.`/`..` segments which the tar crate normalizes. + fn link_target_strategy() -> impl Strategy { + proptest::string::string_regex( + "[a-zA-Z0-9_][a-zA-Z0-9_+-]*(/[a-zA-Z0-9_][a-zA-Z0-9_+-]*)*", + ) + .expect("valid regex") + .prop_filter("reasonable length", |s| !s.is_empty() && s.len() < 100) + } + + /// Strategy for generating valid user/group names. + fn name_strategy() -> impl Strategy { + proptest::string::string_regex("[a-zA-Z_][a-zA-Z0-9_]{0,30}").expect("valid regex") + } + + /// Strategy for file mode (valid Unix permissions). + fn mode_strategy() -> impl Strategy { + // Standard Unix permission modes + prop_oneof![ + Just(0o644), // regular file + Just(0o755), // executable + Just(0o600), // private + Just(0o777), // all permissions + Just(0o400), // read-only + (0u32..0o7777), // any valid mode + ] + } + + /// Strategy for uid/gid values that fit in octal. + fn id_strategy() -> impl Strategy { + prop_oneof![ + Just(0u64), + Just(1000u64), + Just(65534u64), // nobody + (0u64..0o7777777), // fits in 7 octal digits + ] + } + + /// Strategy for mtime values. + fn mtime_strategy() -> impl Strategy { + prop_oneof![ + Just(0u64), + Just(1234567890u64), + (0u64..0o77777777777u64), // fits in 11 octal digits + ] + } + + /// Strategy for file size values. + fn size_strategy() -> impl Strategy { + prop_oneof![ + Just(0u64), + Just(1u64), + Just(512u64), + Just(4096u64), + (0u64..1024 * 1024), // up to 1 MB + ] + } + + /// Test parameters for a regular file entry. + #[derive(Debug, Clone)] + struct FileParams { + path: String, + mode: u32, + uid: u64, + gid: u64, + mtime: u64, + size: u64, + username: String, + groupname: String, + } + + fn file_params_strategy() -> impl Strategy { + ( + path_strategy(), + mode_strategy(), + id_strategy(), + id_strategy(), + mtime_strategy(), + size_strategy(), + name_strategy(), + name_strategy(), + ) + .prop_map( + |(path, mode, uid, gid, mtime, size, username, groupname)| FileParams { + path, + mode, + uid, + gid, + mtime, + size, + username, + groupname, + }, + ) + } + + /// Test parameters for a symlink entry. + #[derive(Debug, Clone)] + struct SymlinkParams { + path: String, + target: String, + uid: u64, + gid: u64, + mtime: u64, + } + + fn symlink_params_strategy() -> impl Strategy { + ( + path_strategy(), + link_target_strategy(), + id_strategy(), + id_strategy(), + mtime_strategy(), + ) + .prop_map(|(path, target, uid, gid, mtime)| SymlinkParams { + path, + target, + uid, + gid, + mtime, + }) + } + + /// Test parameters for a directory entry. + #[derive(Debug, Clone)] + struct DirParams { + path: String, + mode: u32, + uid: u64, + gid: u64, + mtime: u64, + } + + fn dir_params_strategy() -> impl Strategy { + ( + path_strategy(), + mode_strategy(), + id_strategy(), + id_strategy(), + mtime_strategy(), + ) + .prop_map(|(path, mode, uid, gid, mtime)| DirParams { + path, + mode, + uid, + gid, + mtime, + }) + } + + /// Create a tar archive with a single file entry and return the header bytes. + fn create_file_tar(params: &FileParams) -> Vec { + let mut builder = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_ustar(); + header.set_path(¶ms.path).unwrap(); + header.set_mode(params.mode); + header.set_uid(params.uid); + header.set_gid(params.gid); + header.set_mtime(params.mtime); + header.set_size(params.size); + header.set_entry_type(tar::EntryType::Regular); + header.set_username(¶ms.username).unwrap(); + header.set_groupname(¶ms.groupname).unwrap(); + header.set_cksum(); + + // Create dummy content of the right size + let content = vec![0u8; params.size as usize]; + builder + .append_data(&mut header, ¶ms.path, content.as_slice()) + .unwrap(); + + builder.into_inner().unwrap() + } + + /// Create a tar archive with a symlink entry and return the header bytes. + fn create_symlink_tar(params: &SymlinkParams) -> Vec { + let mut builder = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_ustar(); + header.set_path(¶ms.path).unwrap(); + header.set_mode(0o777); + header.set_uid(params.uid); + header.set_gid(params.gid); + header.set_mtime(params.mtime); + header.set_size(0); + header.set_entry_type(tar::EntryType::Symlink); + header.set_link_name(¶ms.target).unwrap(); + header.set_cksum(); + + builder + .append_data(&mut header, ¶ms.path, std::io::empty()) + .unwrap(); + + builder.into_inner().unwrap() + } + + /// Create a tar archive with a directory entry and return the header bytes. + fn create_dir_tar(params: &DirParams) -> Vec { + let mut builder = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_ustar(); + // Ensure directory path ends with / + let path = if params.path.ends_with('/') { + params.path.clone() + } else { + format!("{}/", params.path) + }; + header.set_path(&path).unwrap(); + header.set_mode(params.mode); + header.set_uid(params.uid); + header.set_gid(params.gid); + header.set_mtime(params.mtime); + header.set_size(0); + header.set_entry_type(tar::EntryType::Directory); + header.set_cksum(); + + builder + .append_data(&mut header, &path, std::io::empty()) + .unwrap(); + + builder.into_inner().unwrap() + } + + /// Extract the first 512-byte header from a tar archive. + fn extract_header_bytes(tar_data: &[u8]) -> [u8; 512] { + let mut header = [0u8; 512]; + header.copy_from_slice(&tar_data[..512]); + header + } + + /// Compare our Header parsing against tar crate's parsing. + fn compare_headers( + our_header: &Header, + tar_header: &tar::Header, + ) -> std::result::Result<(), TestCaseError> { + // Entry type + let our_type = our_header.entry_type(); + let tar_type = tar_header.entry_type(); + match (our_type, tar_type) { + (EntryType::Regular, tar::EntryType::Regular) => {} + (EntryType::Directory, tar::EntryType::Directory) => {} + (EntryType::Symlink, tar::EntryType::Symlink) => {} + (EntryType::Link, tar::EntryType::Link) => {} + (EntryType::Char, tar::EntryType::Char) => {} + (EntryType::Block, tar::EntryType::Block) => {} + (EntryType::Fifo, tar::EntryType::Fifo) => {} + (EntryType::Continuous, tar::EntryType::Continuous) => {} + (EntryType::GnuLongName, tar::EntryType::GNULongName) => {} + (EntryType::GnuLongLink, tar::EntryType::GNULongLink) => {} + (EntryType::GnuSparse, tar::EntryType::GNUSparse) => {} + (EntryType::XHeader, tar::EntryType::XHeader) => {} + (EntryType::XGlobalHeader, tar::EntryType::XGlobalHeader) => {} + _ => { + return Err(TestCaseError::fail(format!( + "entry type mismatch: ours={our_type:?}, tar={tar_type:?}" + ))); + } + } + + // Size + let our_size = our_header + .entry_size() + .map_err(|e| TestCaseError::fail(format!("our entry_size failed: {e}")))?; + let tar_size = tar_header + .size() + .map_err(|e| TestCaseError::fail(format!("tar size failed: {e}")))?; + prop_assert_eq!(our_size, tar_size, "size mismatch"); + + // Mode + let our_mode = our_header + .mode() + .map_err(|e| TestCaseError::fail(format!("our mode failed: {e}")))?; + let tar_mode = tar_header + .mode() + .map_err(|e| TestCaseError::fail(format!("tar mode failed: {e}")))?; + prop_assert_eq!(our_mode, tar_mode, "mode mismatch"); + + // UID + let our_uid = our_header + .uid() + .map_err(|e| TestCaseError::fail(format!("our uid failed: {e}")))?; + let tar_uid = tar_header + .uid() + .map_err(|e| TestCaseError::fail(format!("tar uid failed: {e}")))?; + prop_assert_eq!(our_uid, tar_uid, "uid mismatch"); + + // GID + let our_gid = our_header + .gid() + .map_err(|e| TestCaseError::fail(format!("our gid failed: {e}")))?; + let tar_gid = tar_header + .gid() + .map_err(|e| TestCaseError::fail(format!("tar gid failed: {e}")))?; + prop_assert_eq!(our_gid, tar_gid, "gid mismatch"); + + // Mtime + let our_mtime = our_header + .mtime() + .map_err(|e| TestCaseError::fail(format!("our mtime failed: {e}")))?; + let tar_mtime = tar_header + .mtime() + .map_err(|e| TestCaseError::fail(format!("tar mtime failed: {e}")))?; + prop_assert_eq!(our_mtime, tar_mtime, "mtime mismatch"); + + // Path bytes + let our_path = our_header.path_bytes(); + let tar_path = tar_header.path_bytes(); + prop_assert_eq!(our_path, tar_path.as_ref(), "path mismatch"); + + // Link name (for symlinks) + let our_link = our_header.link_name_bytes(); + if let Some(tar_link) = tar_header.link_name_bytes() { + prop_assert_eq!(our_link, tar_link.as_ref(), "link_name mismatch"); + } else { + prop_assert!(our_link.is_empty(), "expected empty link name"); + } + + // Username + if let Some(our_username) = our_header.username() { + if let Some(tar_username) = tar_header.username_bytes() { + prop_assert_eq!(our_username, tar_username, "username mismatch"); + } + } + + // Groupname + if let Some(our_groupname) = our_header.groupname() { + if let Some(tar_groupname) = tar_header.groupname_bytes() { + prop_assert_eq!(our_groupname, tar_groupname, "groupname mismatch"); + } + } + + // Checksum verification + our_header + .verify_checksum() + .map_err(|e| TestCaseError::fail(format!("checksum verification failed: {e}")))?; + + Ok(()) + } + + proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + #[test] + fn test_file_header_crosscheck(params in file_params_strategy()) { + let tar_data = create_file_tar(¶ms); + let header_bytes = extract_header_bytes(&tar_data); + + // Parse with our crate + let our_header = Header::from_bytes_exact(&header_bytes); + + // Parse with tar crate + let tar_header = tar::Header::from_byte_slice(&header_bytes); + + compare_headers(our_header, tar_header)?; + + // Additional file-specific checks + prop_assert!(our_header.entry_type().is_file()); + prop_assert_eq!(our_header.entry_size().unwrap(), params.size); + } + + #[test] + fn test_symlink_header_crosscheck(params in symlink_params_strategy()) { + let tar_data = create_symlink_tar(¶ms); + let header_bytes = extract_header_bytes(&tar_data); + + let our_header = Header::from_bytes_exact(&header_bytes); + let tar_header = tar::Header::from_byte_slice(&header_bytes); + + compare_headers(our_header, tar_header)?; + + // Additional symlink-specific checks + prop_assert!(our_header.entry_type().is_symlink()); + prop_assert_eq!( + our_header.link_name_bytes(), + params.target.as_bytes() + ); + } + + #[test] + fn test_dir_header_crosscheck(params in dir_params_strategy()) { + let tar_data = create_dir_tar(¶ms); + let header_bytes = extract_header_bytes(&tar_data); + + let our_header = Header::from_bytes_exact(&header_bytes); + let tar_header = tar::Header::from_byte_slice(&header_bytes); + + compare_headers(our_header, tar_header)?; + + // Additional directory-specific checks + prop_assert!(our_header.entry_type().is_dir()); + } + } + + /// Test reading entries from real tar archives created by the tar crate. + mod archive_tests { + use super::*; + + proptest! { + #![proptest_config(ProptestConfig::with_cases(64))] + + #[test] + fn test_multi_entry_archive( + files in prop::collection::vec(file_params_strategy(), 1..8), + dirs in prop::collection::vec(dir_params_strategy(), 0..4), + ) { + // Build an archive with multiple entries + let mut builder = tar::Builder::new(Vec::new()); + + // Add directories first + for params in &dirs { + let mut header = tar::Header::new_ustar(); + let path = if params.path.ends_with('/') { + params.path.clone() + } else { + format!("{}/", params.path) + }; + header.set_path(&path).unwrap(); + header.set_mode(params.mode); + header.set_uid(params.uid); + header.set_gid(params.gid); + header.set_mtime(params.mtime); + header.set_size(0); + header.set_entry_type(tar::EntryType::Directory); + header.set_cksum(); + builder.append_data(&mut header, &path, std::io::empty()).unwrap(); + } + + // Add files + for params in &files { + let mut header = tar::Header::new_ustar(); + header.set_path(¶ms.path).unwrap(); + header.set_mode(params.mode); + header.set_uid(params.uid); + header.set_gid(params.gid); + header.set_mtime(params.mtime); + header.set_size(params.size); + header.set_entry_type(tar::EntryType::Regular); + header.set_username(¶ms.username).unwrap(); + header.set_groupname(¶ms.groupname).unwrap(); + header.set_cksum(); + + let content = vec![0u8; params.size as usize]; + builder.append_data(&mut header, ¶ms.path, content.as_slice()).unwrap(); + } + + let tar_data = builder.into_inner().unwrap(); + + // Now iterate through the archive and verify each header + let mut archive = tar::Archive::new(Cursor::new(&tar_data)); + let entries = archive.entries().unwrap(); + + for entry_result in entries { + let entry = entry_result.unwrap(); + let tar_header = entry.header(); + + // Get the raw header bytes from the archive + let our_header = Header::from_bytes_exact(tar_header.as_bytes()); + + compare_headers(our_header, tar_header)?; + } + } + } + } + + /// Test GNU format headers. + mod gnu_tests { + use super::*; + + fn create_gnu_file_tar(params: &FileParams) -> Vec { + let mut builder = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_path(¶ms.path).unwrap(); + header.set_mode(params.mode); + header.set_uid(params.uid); + header.set_gid(params.gid); + header.set_mtime(params.mtime); + header.set_size(params.size); + header.set_entry_type(tar::EntryType::Regular); + header.set_username(¶ms.username).unwrap(); + header.set_groupname(¶ms.groupname).unwrap(); + header.set_cksum(); + + let content = vec![0u8; params.size as usize]; + builder + .append_data(&mut header, ¶ms.path, content.as_slice()) + .unwrap(); + + builder.into_inner().unwrap() + } + + fn create_gnu_symlink_tar(params: &SymlinkParams) -> Vec { + let mut builder = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_path(¶ms.path).unwrap(); + header.set_mode(0o777); + header.set_uid(params.uid); + header.set_gid(params.gid); + header.set_mtime(params.mtime); + header.set_size(0); + header.set_entry_type(tar::EntryType::Symlink); + header.set_link_name(¶ms.target).unwrap(); + header.set_cksum(); + + builder + .append_data(&mut header, ¶ms.path, std::io::empty()) + .unwrap(); + + builder.into_inner().unwrap() + } + + fn create_gnu_dir_tar(params: &DirParams) -> Vec { + let mut builder = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + let path = if params.path.ends_with('/') { + params.path.clone() + } else { + format!("{}/", params.path) + }; + header.set_path(&path).unwrap(); + header.set_mode(params.mode); + header.set_uid(params.uid); + header.set_gid(params.gid); + header.set_mtime(params.mtime); + header.set_size(0); + header.set_entry_type(tar::EntryType::Directory); + header.set_cksum(); + + builder + .append_data(&mut header, &path, std::io::empty()) + .unwrap(); + + builder.into_inner().unwrap() + } + + proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + #[test] + fn test_gnu_file_header_crosscheck(params in file_params_strategy()) { + let tar_data = create_gnu_file_tar(¶ms); + let header_bytes = extract_header_bytes(&tar_data); + + let our_header = Header::from_bytes_exact(&header_bytes); + let tar_header = tar::Header::from_byte_slice(&header_bytes); + + // Verify it's detected as GNU format + prop_assert!(our_header.is_gnu()); + prop_assert!(!our_header.is_ustar()); + + compare_headers(our_header, tar_header)?; + } + + #[test] + fn test_gnu_symlink_header_crosscheck(params in symlink_params_strategy()) { + let tar_data = create_gnu_symlink_tar(¶ms); + let header_bytes = extract_header_bytes(&tar_data); + + let our_header = Header::from_bytes_exact(&header_bytes); + let tar_header = tar::Header::from_byte_slice(&header_bytes); + + prop_assert!(our_header.is_gnu()); + prop_assert!(our_header.entry_type().is_symlink()); + + compare_headers(our_header, tar_header)?; + } + + #[test] + fn test_gnu_dir_header_crosscheck(params in dir_params_strategy()) { + let tar_data = create_gnu_dir_tar(¶ms); + let header_bytes = extract_header_bytes(&tar_data); + + let our_header = Header::from_bytes_exact(&header_bytes); + let tar_header = tar::Header::from_byte_slice(&header_bytes); + + prop_assert!(our_header.is_gnu()); + prop_assert!(our_header.entry_type().is_dir()); + + compare_headers(our_header, tar_header)?; + } + } + } + + /// Test format detection (UStar vs GNU vs Old). + mod format_detection_tests { + use super::*; + + fn create_gnu_file_tar_for_detection(params: &FileParams) -> Vec { + let mut builder = tar::Builder::new(Vec::new()); + + let mut header = tar::Header::new_gnu(); + header.set_path(¶ms.path).unwrap(); + header.set_mode(params.mode); + header.set_uid(params.uid); + header.set_gid(params.gid); + header.set_mtime(params.mtime); + header.set_size(params.size); + header.set_entry_type(tar::EntryType::Regular); + header.set_username(¶ms.username).unwrap(); + header.set_groupname(¶ms.groupname).unwrap(); + header.set_cksum(); + + let content = vec![0u8; params.size as usize]; + builder + .append_data(&mut header, ¶ms.path, content.as_slice()) + .unwrap(); + + builder.into_inner().unwrap() + } + + proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + #[test] + fn test_ustar_format_detected(params in file_params_strategy()) { + let tar_data = create_file_tar(¶ms); + let header_bytes = extract_header_bytes(&tar_data); + + let our_header = Header::from_bytes_exact(&header_bytes); + + // UStar headers should be detected correctly + prop_assert!(our_header.is_ustar(), "should be UStar"); + prop_assert!(!our_header.is_gnu(), "should not be GNU"); + + // Check magic bytes directly + prop_assert_eq!(&header_bytes[257..263], USTAR_MAGIC); + prop_assert_eq!(&header_bytes[263..265], USTAR_VERSION); + } + + #[test] + fn test_gnu_format_detected(params in file_params_strategy()) { + let tar_data = create_gnu_file_tar_for_detection(¶ms); + let header_bytes = extract_header_bytes(&tar_data); + + let our_header = Header::from_bytes_exact(&header_bytes); + + // GNU headers should be detected correctly + prop_assert!(our_header.is_gnu(), "should be GNU"); + prop_assert!(!our_header.is_ustar(), "should not be UStar"); + + // Check magic bytes directly + prop_assert_eq!(&header_bytes[257..263], GNU_MAGIC); + prop_assert_eq!(&header_bytes[263..265], GNU_VERSION); + } + } + + #[test] + fn test_old_format_detection() { + // Create a header with no magic (old format) + let mut header_bytes = [0u8; 512]; + + // Set a simple file name + header_bytes[0..4].copy_from_slice(b"test"); + + // Set mode (octal) + header_bytes[100..107].copy_from_slice(b"0000644"); + + // Set size = 0 + header_bytes[124..135].copy_from_slice(b"00000000000"); + + // Set typeflag = regular file + header_bytes[156] = b'0'; + + // Compute and set checksum + let mut checksum: u64 = 0; + for (i, &byte) in header_bytes.iter().enumerate() { + if (148..156).contains(&i) { + checksum += u64::from(b' '); + } else { + checksum += u64::from(byte); + } + } + let checksum_str = format!("{checksum:06o}\0 "); + header_bytes[148..156].copy_from_slice(checksum_str.as_bytes()); + + let our_header = Header::from_bytes_exact(&header_bytes); + + // Old format: neither UStar nor GNU + assert!(!our_header.is_ustar()); + assert!(!our_header.is_gnu()); + + // But we can still parse basic fields + assert_eq!(our_header.path_bytes(), b"test"); + assert_eq!(our_header.entry_type(), EntryType::Regular); + } + } + + /// Test checksum computation matches tar crate. + mod checksum_tests { + use super::*; + + proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + #[test] + fn test_checksum_always_valid(params in file_params_strategy()) { + let tar_data = create_file_tar(¶ms); + let header_bytes = extract_header_bytes(&tar_data); + + let our_header = Header::from_bytes_exact(&header_bytes); + + // Checksum should always verify for valid tar headers + our_header.verify_checksum().map_err(|e| { + TestCaseError::fail(format!("checksum failed: {e}")) + })?; + } + + #[test] + fn test_checksum_recompute(params in file_params_strategy()) { + let tar_data = create_file_tar(¶ms); + let header_bytes = extract_header_bytes(&tar_data); + + let our_header = Header::from_bytes_exact(&header_bytes); + + // Our computed checksum should match + let computed = our_header.compute_checksum(); + let stored = parse_octal(&header_bytes[148..156]).unwrap(); + + prop_assert_eq!(computed, stored); + } + } + } + + /// Test entry type mapping is complete. + mod entry_type_tests { + use super::*; + + #[test] + fn test_all_entry_types_map_correctly() { + // Test all known entry type bytes + let mappings: &[(u8, EntryType, tar::EntryType)] = &[ + (b'0', EntryType::Regular, tar::EntryType::Regular), + (b'\0', EntryType::Regular, tar::EntryType::Regular), + (b'1', EntryType::Link, tar::EntryType::Link), + (b'2', EntryType::Symlink, tar::EntryType::Symlink), + (b'3', EntryType::Char, tar::EntryType::Char), + (b'4', EntryType::Block, tar::EntryType::Block), + (b'5', EntryType::Directory, tar::EntryType::Directory), + (b'6', EntryType::Fifo, tar::EntryType::Fifo), + (b'7', EntryType::Continuous, tar::EntryType::Continuous), + (b'L', EntryType::GnuLongName, tar::EntryType::GNULongName), + (b'K', EntryType::GnuLongLink, tar::EntryType::GNULongLink), + (b'S', EntryType::GnuSparse, tar::EntryType::GNUSparse), + (b'x', EntryType::XHeader, tar::EntryType::XHeader), + ( + b'g', + EntryType::XGlobalHeader, + tar::EntryType::XGlobalHeader, + ), + ]; + + for &(byte, expected_ours, expected_tar) in mappings { + let ours = EntryType::from_byte(byte); + let tar_type = tar::EntryType::new(byte); + + assert_eq!(ours, expected_ours, "our mapping for byte {byte}"); + assert_eq!(tar_type, expected_tar, "tar mapping for byte {byte}"); + } + } + + proptest! { + #[test] + fn test_entry_type_roundtrip(byte: u8) { + let our_type = EntryType::from_byte(byte); + let tar_type = tar::EntryType::new(byte); + + // Both should handle unknown types gracefully + let our_byte = our_type.to_byte(); + let tar_byte = tar_type.as_byte(); + + // For regular files, '\0' maps to '0' + if byte == b'\0' { + prop_assert_eq!(our_byte, b'0'); + } else { + prop_assert_eq!(our_byte, tar_byte); + } + } + } + } + } +} diff --git a/crates/tar-header/src/stream/entry.rs b/crates/tar-header/src/stream/entry.rs new file mode 100644 index 00000000..376a01ad --- /dev/null +++ b/crates/tar-header/src/stream/entry.rs @@ -0,0 +1,137 @@ +//! Parsed tar entry with resolved metadata. + +use std::borrow::Cow; + +use crate::EntryType; + +/// A fully-resolved tar entry with all extensions applied. +/// +/// This represents the "logical" entry after accumulating GNU long name/link +/// and PAX extensions. The path and link_target use `Cow` to avoid allocations +/// when the header's inline fields suffice. +/// +/// # Lifetime +/// +/// The entry borrows from the parser's internal buffers. It is valid until +/// the next call to [`TarStreamParser::next_entry`]. +/// +/// [`TarStreamParser::next_entry`]: super::TarStreamParser::next_entry +#[derive(Debug)] +pub struct ParsedEntry<'a> { + /// The raw 512-byte header bytes for this entry. + /// + /// Useful for accessing format-specific fields not exposed here, + /// or for writing the header to a split stream. + pub header_bytes: &'a [u8; 512], + + /// The entry type (Regular, Directory, Symlink, etc.). + pub entry_type: EntryType, + + /// The resolved file path. + /// + /// Priority: PAX `path` > GNU long name > header `name` (+ UStar `prefix`). + /// Borrowed when using header fields, owned when using extensions. + pub path: Cow<'a, [u8]>, + + /// The resolved link target (for symlinks and hardlinks). + /// + /// Priority: PAX `linkpath` > GNU long link > header `linkname`. + /// `None` for non-link entry types. + pub link_target: Option>, + + /// File mode/permissions from header. + pub mode: u32, + + /// Owner UID (PAX `uid` overrides header). + pub uid: u64, + + /// Owner GID (PAX `gid` overrides header). + pub gid: u64, + + /// Modification time as Unix timestamp (PAX `mtime` overrides header). + /// + /// Note: PAX mtime can have sub-second precision, but this field + /// only stores the integer seconds. + pub mtime: u64, + + /// Content size in bytes. + /// + /// For regular files, this is the actual file size. PAX `size` overrides + /// header size when present. + pub size: u64, + + /// User name (from header or PAX `uname`). + pub uname: Option>, + + /// Group name (from header or PAX `gname`). + pub gname: Option>, + + /// Device major number (for block/char devices). + pub dev_major: Option, + + /// Device minor number (for block/char devices). + pub dev_minor: Option, + + /// Extended attributes from PAX `SCHILY.xattr.*` entries. + /// + /// Each tuple is (attribute_name, attribute_value) where the name + /// has the `SCHILY.xattr.` prefix stripped. + #[allow(clippy::type_complexity)] + pub xattrs: Vec<(Cow<'a, [u8]>, Cow<'a, [u8]>)>, + + /// Raw PAX extensions data, if present. + /// + /// Allows callers to access any PAX keys not explicitly parsed above + /// (e.g., `GNU.sparse.*`, `LIBARCHIVE.*`, `SCHILY.acl.*`). + pub pax_data: Option>, +} + +impl<'a> ParsedEntry<'a> { + /// Get the path as a lossy UTF-8 string. + /// + /// Invalid UTF-8 sequences are replaced with the Unicode replacement character. + #[must_use] + pub fn path_lossy(&self) -> Cow<'_, str> { + String::from_utf8_lossy(&self.path) + } + + /// Get the link target as a lossy UTF-8 string, if present. + #[must_use] + pub fn link_target_lossy(&self) -> Option> { + self.link_target + .as_ref() + .map(|t| String::from_utf8_lossy(t)) + } + + /// Check if this is a regular file entry. + #[must_use] + pub fn is_file(&self) -> bool { + self.entry_type.is_file() + } + + /// Check if this is a directory entry. + #[must_use] + pub fn is_dir(&self) -> bool { + self.entry_type.is_dir() + } + + /// Check if this is a symbolic link entry. + #[must_use] + pub fn is_symlink(&self) -> bool { + self.entry_type.is_symlink() + } + + /// Check if this is a hard link entry. + #[must_use] + pub fn is_hard_link(&self) -> bool { + self.entry_type.is_hard_link() + } + + /// Get the padded size (rounded up to 512-byte boundary). + /// + /// This is the number of bytes that follow the header in the tar stream. + #[must_use] + pub fn padded_size(&self) -> u64 { + self.size.next_multiple_of(512) + } +} diff --git a/crates/tar-header/src/stream/error.rs b/crates/tar-header/src/stream/error.rs new file mode 100644 index 00000000..5b6e446d --- /dev/null +++ b/crates/tar-header/src/stream/error.rs @@ -0,0 +1,93 @@ +//! Error types for tar stream parsing. + +use std::str::Utf8Error; + +use thiserror::Error; + +use crate::{HeaderError, PaxError}; + +/// Errors that can occur during tar stream parsing. +#[derive(Debug, Error)] +pub enum StreamError { + /// I/O error from the underlying reader. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + /// Header parsing error (checksum, invalid octal, etc.). + #[error("header error: {0}")] + Header(#[from] HeaderError), + + /// PAX extension parsing error. + #[error("PAX error: {0}")] + Pax(#[from] PaxError), + + /// Invalid UTF-8 in PAX key. + #[error("invalid UTF-8 in PAX key: {0}")] + InvalidUtf8(#[from] Utf8Error), + + /// Path exceeds configured maximum length. + #[error("path exceeds limit: {len} bytes > {limit} bytes")] + PathTooLong { + /// Actual path length. + len: usize, + /// Configured limit. + limit: usize, + }, + + /// PAX extended header exceeds configured maximum size. + #[error("PAX header exceeds limit: {size} bytes > {limit} bytes")] + PaxTooLarge { + /// Actual PAX header size. + size: u64, + /// Configured limit. + limit: u64, + }, + + /// GNU long name/link exceeds configured maximum size. + #[error("GNU long name/link exceeds limit: {size} bytes > {limit} bytes")] + GnuLongTooLarge { + /// Actual GNU long name/link size. + size: u64, + /// Configured limit. + limit: u64, + }, + + /// Duplicate GNU long name entry without an intervening actual entry. + #[error("duplicate GNU long name entry")] + DuplicateGnuLongName, + + /// Duplicate GNU long link entry without an intervening actual entry. + #[error("duplicate GNU long link entry")] + DuplicateGnuLongLink, + + /// Duplicate PAX extended header without an intervening actual entry. + #[error("duplicate PAX extended header")] + DuplicatePaxHeader, + + /// Metadata entries (GNU long name, PAX, etc.) found but no actual entry followed. + #[error("metadata entries without a following actual entry")] + OrphanedMetadata, + + /// Too many consecutive metadata entries (possible infinite loop or malicious archive). + #[error("too many pending metadata entries: {count} > {limit}")] + TooManyPendingEntries { + /// Number of pending metadata entries. + count: usize, + /// Configured limit. + limit: usize, + }, + + /// Entry size in header is invalid (e.g., overflow when computing padded size). + #[error("invalid entry size: {0}")] + InvalidSize(u64), + + /// Unexpected EOF while reading entry content or padding. + #[error("unexpected EOF at position {pos}")] + UnexpectedEof { + /// Position in the stream where EOF occurred. + pos: u64, + }, +} + +/// Result type for stream parsing operations. +pub type Result = std::result::Result; diff --git a/crates/tar-header/src/stream/limits.rs b/crates/tar-header/src/stream/limits.rs new file mode 100644 index 00000000..a75d42ef --- /dev/null +++ b/crates/tar-header/src/stream/limits.rs @@ -0,0 +1,141 @@ +//! Configurable security limits for tar stream parsing. + +/// Configurable security limits for tar stream parsing. +/// +/// These limits protect against malicious or malformed archives that could +/// exhaust memory or create excessively long paths. +/// +/// # Example +/// +/// ``` +/// use tar_header::stream::Limits; +/// +/// // Use defaults +/// let limits = Limits::default(); +/// +/// // Customize limits +/// let strict_limits = Limits { +/// max_path_len: 1024, +/// max_pax_size: 64 * 1024, +/// ..Default::default() +/// }; +/// ``` +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Limits { + /// Maximum path length in bytes. + /// + /// Applies to both file paths and link targets. Paths exceeding this + /// limit will cause a [`StreamError::PathTooLong`] error. + /// + /// Default: 4096 bytes (Linux PATH_MAX). + /// + /// [`StreamError::PathTooLong`]: super::StreamError::PathTooLong + pub max_path_len: usize, + + /// Maximum size of PAX extended header data in bytes. + /// + /// This limits the total size of a single PAX 'x' entry's content. + /// PAX headers larger than this will cause a [`StreamError::PaxTooLarge`] error. + /// + /// Default: 1 MiB (1,048,576 bytes). + /// + /// [`StreamError::PaxTooLarge`]: super::StreamError::PaxTooLarge + pub max_pax_size: u64, + + /// Maximum size of GNU long name/link data in bytes. + /// + /// GNU 'L' (long name) and 'K' (long link) entries should only contain + /// a single path. Values exceeding this limit will cause a + /// [`StreamError::GnuLongTooLarge`] error. + /// + /// Default: 4096 bytes. + /// + /// [`StreamError::GnuLongTooLarge`]: super::StreamError::GnuLongTooLarge + pub max_gnu_long_size: u64, + + /// Maximum number of consecutive metadata entries before an actual entry. + /// + /// Prevents infinite loops from malformed archives that contain only + /// metadata entries (GNU long name, PAX headers) without actual file entries. + /// Exceeding this limit will cause a [`StreamError::TooManyPendingEntries`] error. + /// + /// Default: 16 entries. + /// + /// [`StreamError::TooManyPendingEntries`]: super::StreamError::TooManyPendingEntries + pub max_pending_entries: usize, +} + +impl Default for Limits { + fn default() -> Self { + Self { + max_path_len: 4096, + max_pax_size: 1024 * 1024, // 1 MiB + max_gnu_long_size: 4096, + max_pending_entries: 16, + } + } +} + +impl Limits { + /// Create a new `Limits` with default values. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Create permissive limits suitable for trusted archives. + /// + /// This sets very high limits that effectively disable most checks. + /// Only use this for archives from trusted sources. + #[must_use] + pub fn permissive() -> Self { + Self { + max_path_len: usize::MAX, + max_pax_size: u64::MAX, + max_gnu_long_size: u64::MAX, + max_pending_entries: usize::MAX, + } + } + + /// Create strict limits suitable for untrusted archives. + /// + /// This sets conservative limits to minimize resource consumption + /// from potentially malicious archives. + #[must_use] + pub fn strict() -> Self { + Self { + max_path_len: 1024, + max_pax_size: 64 * 1024, // 64 KiB + max_gnu_long_size: 1024, + max_pending_entries: 8, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_limits() { + let limits = Limits::default(); + assert_eq!(limits.max_path_len, 4096); + assert_eq!(limits.max_pax_size, 1024 * 1024); + assert_eq!(limits.max_gnu_long_size, 4096); + assert_eq!(limits.max_pending_entries, 16); + } + + #[test] + fn test_permissive_limits() { + let limits = Limits::permissive(); + assert_eq!(limits.max_path_len, usize::MAX); + assert_eq!(limits.max_pax_size, u64::MAX); + } + + #[test] + fn test_strict_limits() { + let limits = Limits::strict(); + assert!(limits.max_path_len < Limits::default().max_path_len); + assert!(limits.max_pax_size < Limits::default().max_pax_size); + } +} diff --git a/crates/tar-header/src/stream/mod.rs b/crates/tar-header/src/stream/mod.rs new file mode 100644 index 00000000..b7681463 --- /dev/null +++ b/crates/tar-header/src/stream/mod.rs @@ -0,0 +1,76 @@ +//! Streaming tar parser with transparent GNU and PAX extension handling. +//! +//! This module provides a high-level streaming parser for tar archives that +//! automatically handles GNU long name/link extensions and PAX extended headers, +//! yielding only "actual" entries with fully resolved metadata. +//! +//! # Overview +//! +//! Tar archives can use several mechanisms for extended metadata: +//! +//! - **GNU long name (type 'L')**: Stores paths longer than 100 bytes +//! - **GNU long link (type 'K')**: Stores link targets longer than 100 bytes +//! - **PAX extended headers (type 'x')**: Key-value pairs for path, size, uid, gid, xattrs, etc. +//! - **PAX global headers (type 'g')**: Global defaults for all subsequent entries +//! +//! The [`TarStreamParser`] handles all of these transparently, accumulating +//! metadata entries and applying them to the next actual entry. +//! +//! # Security +//! +//! The parser applies configurable [`Limits`] to prevent resource exhaustion +//! from malicious or malformed archives: +//! +//! - Maximum path length +//! - Maximum PAX extension size +//! - Maximum GNU long name/link size +//! - Maximum consecutive metadata entries +//! +//! # Example +//! +//! ```no_run +//! use std::fs::File; +//! use std::io::{BufReader, Read}; +//! use tar_header::stream::{TarStreamParser, Limits}; +//! +//! let file = File::open("archive.tar").unwrap(); +//! let reader = BufReader::new(file); +//! let mut parser = TarStreamParser::new(reader, Limits::default()); +//! +//! while let Some(entry) = parser.next_entry().unwrap() { +//! println!("{} ({} bytes)", entry.path_lossy(), entry.size); +//! +//! // Save size before dropping entry borrow +//! let size = entry.size; +//! drop(entry); +//! +//! // Must skip or read content before next entry +//! if size > 0 { +//! parser.skip_content(size).unwrap(); +//! } +//! } +//! ``` +//! +//! # Comparison with tar-rs +//! +//! This parser is designed to be a potential replacement for the parsing layer +//! of the [`tar`](https://crates.io/crates/tar) crate, with: +//! +//! - Explicit security limits +//! - zerocopy-based header parsing +//! - Cleaner separation of parsing from I/O +//! +//! The goal is to eventually upstream this as a shared core for tar-rs. + +mod entry; +mod error; +mod limits; +mod parser; + +pub use entry::ParsedEntry; +pub use error::{Result, StreamError}; +pub use limits::Limits; +pub use parser::TarStreamParser; + +#[cfg(test)] +mod tests; diff --git a/crates/tar-header/src/stream/parser.rs b/crates/tar-header/src/stream/parser.rs new file mode 100644 index 00000000..6bb4a5ee --- /dev/null +++ b/crates/tar-header/src/stream/parser.rs @@ -0,0 +1,548 @@ +//! Streaming tar parser with GNU and PAX extension support. + +use std::borrow::Cow; +use std::io::Read; + +use crate::{EntryType, Header, PaxExtensions, HEADER_SIZE, PAX_SCHILY_XATTR}; + +use super::entry::ParsedEntry; +use super::error::{Result, StreamError}; +use super::limits::Limits; + +/// Internal state for accumulating metadata entries. +/// +/// As we read GNU long name ('L'), GNU long link ('K'), and PAX ('x') entries, +/// we store their contents here until an actual entry arrives. +#[derive(Debug, Default)] +struct PendingMetadata { + /// Content of the most recent GNU long name entry. + gnu_long_name: Option>, + + /// Content of the most recent GNU long link entry. + gnu_long_link: Option>, + + /// Content of the most recent PAX extended header. + pax_extensions: Option>, + + /// Number of metadata entries accumulated so far. + count: usize, +} + +impl PendingMetadata { + fn is_empty(&self) -> bool { + self.gnu_long_name.is_none() + && self.gnu_long_link.is_none() + && self.pax_extensions.is_none() + } +} + +/// Streaming tar parser that handles GNU and PAX extensions transparently. +/// +/// This parser reads a tar stream and yields [`ParsedEntry`] values for each +/// actual entry (file, directory, symlink, etc.), automatically handling: +/// +/// - GNU long name extensions (type 'L') +/// - GNU long link extensions (type 'K') +/// - PAX extended headers (type 'x') +/// - PAX global headers (type 'g') - skipped +/// +/// The parser applies configurable security [`Limits`] to prevent resource +/// exhaustion from malicious or malformed archives. +/// +/// # Example +/// +/// ```no_run +/// use std::fs::File; +/// use std::io::{BufReader, Read}; +/// use tar_header::stream::{TarStreamParser, Limits}; +/// +/// let file = File::open("archive.tar").unwrap(); +/// let reader = BufReader::new(file); +/// let mut parser = TarStreamParser::new(reader, Limits::default()); +/// +/// while let Some(entry) = parser.next_entry().unwrap() { +/// println!("Entry: {:?}", entry.path_lossy()); +/// let size = entry.size; +/// let is_file = entry.is_file(); +/// drop(entry); // Release borrow before calling skip_content +/// if is_file && size > 0 { +/// parser.skip_content(size).unwrap(); +/// } +/// } +/// ``` +/// +/// # Content Reading +/// +/// After `next_entry()` returns an entry, the content bytes (if any) have +/// NOT been read. The caller must either: +/// +/// 1. Call [`skip_content`] to skip past the content and padding +/// 2. Read exactly `entry.size` bytes from [`reader`], then call +/// [`skip_padding`] to advance past the padding +/// +/// [`skip_content`]: TarStreamParser::skip_content +/// [`reader`]: TarStreamParser::reader +/// [`skip_padding`]: TarStreamParser::skip_padding +#[derive(Debug)] +pub struct TarStreamParser { + reader: R, + limits: Limits, + pending: PendingMetadata, + /// Buffer for the current header (reused across entries) + header_buf: [u8; HEADER_SIZE], + /// Current position in the stream (for error messages) + pos: u64, + /// Whether we've seen EOF or end-of-archive marker + done: bool, +} + +impl TarStreamParser { + /// Create a new tar stream parser with the given reader and limits. + pub fn new(reader: R, limits: Limits) -> Self { + Self { + reader, + limits, + pending: PendingMetadata::default(), + header_buf: [0u8; HEADER_SIZE], + pos: 0, + done: false, + } + } + + /// Create a new tar stream parser with default limits. + pub fn with_defaults(reader: R) -> Self { + Self::new(reader, Limits::default()) + } + + /// Get the current position in the stream. + #[must_use] + pub fn position(&self) -> u64 { + self.pos + } + + /// Get the next actual entry, handling all metadata entries transparently. + /// + /// Returns `Ok(None)` at end of archive (zero block or EOF). + /// Returns `Err(OrphanedMetadata)` if metadata entries exist but archive ends. + /// + /// After this returns `Some(entry)`, the caller must read or skip the + /// entry's content before calling `next_entry` again. + pub fn next_entry(&mut self) -> Result>> { + if self.done { + return Ok(None); + } + + loop { + // Check pending entry limit + if self.pending.count > self.limits.max_pending_entries { + return Err(StreamError::TooManyPendingEntries { + count: self.pending.count, + limit: self.limits.max_pending_entries, + }); + } + + // Read the next header + let got_header = read_exact_or_eof(&mut self.reader, &mut self.header_buf)?; + if !got_header { + // EOF reached + self.done = true; + if !self.pending.is_empty() { + return Err(StreamError::OrphanedMetadata); + } + return Ok(None); + } + self.pos += HEADER_SIZE as u64; + + // Check for zero block (end of archive marker) + if self.header_buf.iter().all(|&b| b == 0) { + self.done = true; + if !self.pending.is_empty() { + return Err(StreamError::OrphanedMetadata); + } + return Ok(None); + } + + // Parse and verify header + let header = Header::from_bytes_exact(&self.header_buf); + header.verify_checksum()?; + + let entry_type = header.entry_type(); + let size = header.entry_size()?; + let padded_size = size + .checked_next_multiple_of(512) + .ok_or(StreamError::InvalidSize(size))?; + + match entry_type { + EntryType::GnuLongName => { + self.handle_gnu_long_name(size, padded_size)?; + continue; + } + EntryType::GnuLongLink => { + self.handle_gnu_long_link(size, padded_size)?; + continue; + } + EntryType::XHeader => { + self.handle_pax_header(size, padded_size)?; + continue; + } + EntryType::XGlobalHeader => { + // Global PAX headers affect all subsequent entries. + // For simplicity, we skip them. A more complete impl + // would merge them into parser state. + self.skip_bytes(padded_size)?; + continue; + } + _ => { + // This is an actual entry - resolve metadata and return + // We need to reset pending BEFORE creating the entry to avoid borrow issues + // But we need the pending data to create the entry... + // Solution: take ownership of pending data + let gnu_long_name = self.pending.gnu_long_name.take(); + let gnu_long_link = self.pending.gnu_long_link.take(); + let pax_extensions = self.pending.pax_extensions.take(); + self.pending.count = 0; + + let entry = self.resolve_entry_with_pending( + gnu_long_name, + gnu_long_link, + pax_extensions, + )?; + return Ok(Some(entry)); + } + } + } + } + + /// Skip the content and padding of the current entry. + /// + /// Call this after `next_entry()` returns to advance past the entry's data. + /// This is equivalent to calling `skip_bytes(entry.padded_size())`. + pub fn skip_content(&mut self, size: u64) -> Result<()> { + let padded = size + .checked_next_multiple_of(512) + .ok_or(StreamError::InvalidSize(size))?; + self.skip_bytes(padded) + } + + /// Skip the padding after reading content. + /// + /// After reading exactly `content_size` bytes from the reader, call this + /// to advance past the padding bytes to the next header. + pub fn skip_padding(&mut self, content_size: u64) -> Result<()> { + let padded = content_size + .checked_next_multiple_of(512) + .ok_or(StreamError::InvalidSize(content_size))?; + let padding = padded - content_size; + if padding > 0 { + self.skip_bytes(padding)?; + } + Ok(()) + } + + /// Get a mutable reference to the underlying reader. + /// + /// Use this to read entry content after `next_entry()` returns. + /// Read exactly `entry.size` bytes, then call `skip_padding(entry.size)`. + pub fn reader(&mut self) -> &mut R { + &mut self.reader + } + + /// Consume the parser and return the underlying reader. + pub fn into_inner(self) -> R { + self.reader + } + + /// Get the current limits. + #[must_use] + pub fn limits(&self) -> &Limits { + &self.limits + } + + // ========================================================================= + // Internal helpers + // ========================================================================= + + /// Read exactly `len` bytes into a new Vec. + fn read_vec(&mut self, len: usize) -> Result> { + let mut buf = vec![0u8; len]; + self.reader.read_exact(&mut buf)?; + Ok(buf) + } + + /// Skip `len` bytes (read and discard). + fn skip_bytes(&mut self, len: u64) -> Result<()> { + let mut remaining = len; + let mut buf = [0u8; 8192]; + while remaining > 0 { + let to_read = std::cmp::min(remaining, buf.len() as u64) as usize; + self.reader.read_exact(&mut buf[..to_read])?; + remaining -= to_read as u64; + } + self.pos += len; + Ok(()) + } + + fn handle_gnu_long_name(&mut self, size: u64, padded_size: u64) -> Result<()> { + // Check for duplicate + if self.pending.gnu_long_name.is_some() { + return Err(StreamError::DuplicateGnuLongName); + } + + // Check size limit + if size > self.limits.max_gnu_long_size { + return Err(StreamError::GnuLongTooLarge { + size, + limit: self.limits.max_gnu_long_size, + }); + } + + // Read content + let mut data = self.read_vec(size as usize)?; + self.skip_bytes(padded_size - size)?; + + // Strip trailing null + data.pop_if(|&mut x| x == 0); + + // Check path length limit + if data.len() > self.limits.max_path_len { + return Err(StreamError::PathTooLong { + len: data.len(), + limit: self.limits.max_path_len, + }); + } + + self.pending.gnu_long_name = Some(data); + self.pending.count += 1; + Ok(()) + } + + fn handle_gnu_long_link(&mut self, size: u64, padded_size: u64) -> Result<()> { + // Check for duplicate + if self.pending.gnu_long_link.is_some() { + return Err(StreamError::DuplicateGnuLongLink); + } + + // Check size limit + if size > self.limits.max_gnu_long_size { + return Err(StreamError::GnuLongTooLarge { + size, + limit: self.limits.max_gnu_long_size, + }); + } + + // Read content + let mut data = self.read_vec(size as usize)?; + self.skip_bytes(padded_size - size)?; + + // Strip trailing null + data.pop_if(|&mut x| x == 0); + + // Check path length limit + if data.len() > self.limits.max_path_len { + return Err(StreamError::PathTooLong { + len: data.len(), + limit: self.limits.max_path_len, + }); + } + + self.pending.gnu_long_link = Some(data); + self.pending.count += 1; + Ok(()) + } + + fn handle_pax_header(&mut self, size: u64, padded_size: u64) -> Result<()> { + // Check for duplicate + if self.pending.pax_extensions.is_some() { + return Err(StreamError::DuplicatePaxHeader); + } + + // Check size limit + if size > self.limits.max_pax_size { + return Err(StreamError::PaxTooLarge { + size, + limit: self.limits.max_pax_size, + }); + } + + // Read content + let data = self.read_vec(size as usize)?; + self.skip_bytes(padded_size - size)?; + + self.pending.pax_extensions = Some(data); + self.pending.count += 1; + Ok(()) + } + + fn resolve_entry_with_pending( + &self, + gnu_long_name: Option>, + gnu_long_link: Option>, + pax_extensions: Option>, + ) -> Result> { + let header = Header::from_bytes_exact(&self.header_buf); + + // Start with header values + let mut path: Cow<'_, [u8]> = Cow::Borrowed(header.path_bytes()); + let mut link_target: Option> = None; + let mut uid = header.uid()?; + let mut gid = header.gid()?; + let mut mtime = header.mtime()?; + let mut entry_size = header.entry_size()?; + let mut xattrs = Vec::new(); + let mut uname: Option> = header.username().map(Cow::Borrowed); + let mut gname: Option> = header.groupname().map(Cow::Borrowed); + + // Handle UStar prefix for path + if let Some(prefix) = header.prefix() { + if !prefix.is_empty() { + let mut full_path = prefix.to_vec(); + full_path.push(b'/'); + full_path.extend_from_slice(header.path_bytes()); + path = Cow::Owned(full_path); + } + } + + // Apply GNU long name (overrides header + prefix) + if let Some(long_name) = gnu_long_name { + path = Cow::Owned(long_name); + } + + // Apply GNU long link + if let Some(long_link) = gnu_long_link { + link_target = Some(Cow::Owned(long_link)); + } else { + let header_link = header.link_name_bytes(); + if !header_link.is_empty() { + link_target = Some(Cow::Borrowed(header_link)); + } + } + + // Apply PAX extensions (highest priority) + let pax_data: Option> = + pax_extensions.as_ref().map(|v| Cow::Owned(v.clone())); + + if let Some(ref pax) = pax_extensions { + let extensions = PaxExtensions::new(pax); + + for ext in extensions { + let ext = ext?; + let key = ext.key().map_err(StreamError::from)?; + let value = ext.value_bytes(); + + match key { + "path" => { + // Check length limit + if value.len() > self.limits.max_path_len { + return Err(StreamError::PathTooLong { + len: value.len(), + limit: self.limits.max_path_len, + }); + } + path = Cow::Owned(value.to_vec()); + } + "linkpath" => { + if value.len() > self.limits.max_path_len { + return Err(StreamError::PathTooLong { + len: value.len(), + limit: self.limits.max_path_len, + }); + } + link_target = Some(Cow::Owned(value.to_vec())); + } + "size" => { + if let Ok(v) = ext.value() { + if let Ok(s) = v.parse::() { + entry_size = s; + } + } + } + "uid" => { + if let Ok(v) = ext.value() { + if let Ok(u) = v.parse::() { + uid = u; + } + } + } + "gid" => { + if let Ok(v) = ext.value() { + if let Ok(g) = v.parse::() { + gid = g; + } + } + } + "mtime" => { + // PAX mtime can be a decimal; truncate to integer + if let Ok(v) = ext.value() { + if let Some(s) = v.split('.').next() { + if let Ok(m) = s.parse::() { + mtime = m; + } + } + } + } + "uname" => { + uname = Some(Cow::Owned(value.to_vec())); + } + "gname" => { + gname = Some(Cow::Owned(value.to_vec())); + } + _ if key.starts_with(PAX_SCHILY_XATTR) => { + let attr_name = &key[PAX_SCHILY_XATTR.len()..]; + xattrs.push(( + Cow::Owned(attr_name.as_bytes().to_vec()), + Cow::Owned(value.to_vec()), + )); + } + _ => { + // Ignore unknown keys + } + } + } + } + + // Validate final path length + if path.len() > self.limits.max_path_len { + return Err(StreamError::PathTooLong { + len: path.len(), + limit: self.limits.max_path_len, + }); + } + + Ok(ParsedEntry { + header_bytes: &self.header_buf, + entry_type: header.entry_type(), + path, + link_target, + mode: header.mode()?, + uid, + gid, + mtime, + size: entry_size, + uname, + gname, + dev_major: header.device_major()?, + dev_minor: header.device_minor()?, + xattrs, + pax_data, + }) + } +} + +/// Read exactly `buf.len()` bytes, returning false if EOF before any bytes. +fn read_exact_or_eof(reader: &mut R, buf: &mut [u8]) -> Result { + let mut total = 0; + while total < buf.len() { + match reader.read(&mut buf[total..]) { + Ok(0) => { + if total == 0 { + return Ok(false); // Clean EOF + } + return Err(StreamError::UnexpectedEof { pos: 0 }); + } + Ok(n) => total += n, + Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue, + Err(e) => return Err(e.into()), + } + } + Ok(true) +} diff --git a/crates/tar-header/src/stream/tests.rs b/crates/tar-header/src/stream/tests.rs new file mode 100644 index 00000000..7fc25704 --- /dev/null +++ b/crates/tar-header/src/stream/tests.rs @@ -0,0 +1,553 @@ +//! Tests for the streaming tar parser. + +use std::io::Cursor; + +use crate::EntryType; + +use super::*; + +/// Helper to create a tar archive using the tar crate. +fn create_tar_with(f: F) -> Vec +where + F: FnOnce(&mut tar::Builder<&mut Vec>), +{ + let mut data = Vec::new(); + { + let mut builder = tar::Builder::new(&mut data); + f(&mut builder); + builder.finish().unwrap(); + } + data +} + +/// Helper to append a file to a tar builder. +fn append_file(builder: &mut tar::Builder<&mut Vec>, path: &str, content: &[u8]) { + let mut header = tar::Header::new_gnu(); + header.set_mode(0o644); + header.set_uid(1000); + header.set_gid(1000); + header.set_mtime(1234567890); + header.set_size(content.len() as u64); + header.set_entry_type(tar::EntryType::Regular); + builder.append_data(&mut header, path, content).unwrap(); +} + +// ============================================================================= +// Basic parsing tests +// ============================================================================= + +#[test] +fn test_empty_tar() { + let data = create_tar_with(|_| {}); + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + assert!(parser.next_entry().unwrap().is_none()); +} + +#[test] +fn test_single_file() { + let data = create_tar_with(|b| { + append_file(b, "hello.txt", b"Hello, World!"); + }); + + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + let entry = parser.next_entry().unwrap().expect("should have entry"); + assert_eq!(entry.path.as_ref(), b"hello.txt"); + assert_eq!(entry.entry_type, EntryType::Regular); + assert_eq!(entry.size, 13); + assert_eq!(entry.mode, 0o644); + assert_eq!(entry.uid, 1000); + assert_eq!(entry.gid, 1000); + assert_eq!(entry.mtime, 1234567890); + + let size = entry.size; + drop(entry); + parser.skip_content(size).unwrap(); + + assert!(parser.next_entry().unwrap().is_none()); +} + +#[test] +fn test_multiple_files() { + let data = create_tar_with(|b| { + append_file(b, "file1.txt", b"Content 1"); + append_file(b, "file2.txt", b"Content 2"); + append_file(b, "file3.txt", b"Content 3"); + }); + + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + for i in 1..=3 { + let entry = parser.next_entry().unwrap().expect("should have entry"); + assert_eq!(entry.path.as_ref(), format!("file{}.txt", i).as_bytes()); + let size = entry.size; + drop(entry); + parser.skip_content(size).unwrap(); + } + + assert!(parser.next_entry().unwrap().is_none()); +} + +#[test] +fn test_directory() { + let data = create_tar_with(|b| { + let mut header = tar::Header::new_gnu(); + header.set_mode(0o755); + header.set_entry_type(tar::EntryType::Directory); + header.set_size(0); + b.append_data(&mut header, "mydir/", std::io::empty()) + .unwrap(); + }); + + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + let entry = parser.next_entry().unwrap().expect("should have entry"); + assert_eq!(entry.path.as_ref(), b"mydir/"); + assert_eq!(entry.entry_type, EntryType::Directory); + assert!(entry.is_dir()); + + assert!(parser.next_entry().unwrap().is_none()); +} + +#[test] +fn test_symlink() { + let data = create_tar_with(|b| { + let mut header = tar::Header::new_gnu(); + header.set_mode(0o777); + header.set_entry_type(tar::EntryType::Symlink); + header.set_size(0); + b.append_link(&mut header, "link", "target").unwrap(); + }); + + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + let entry = parser.next_entry().unwrap().expect("should have entry"); + assert_eq!(entry.path.as_ref(), b"link"); + assert_eq!(entry.entry_type, EntryType::Symlink); + assert!(entry.is_symlink()); + assert_eq!(entry.link_target.as_ref().unwrap().as_ref(), b"target"); + + assert!(parser.next_entry().unwrap().is_none()); +} + +#[test] +fn test_hardlink() { + let data = create_tar_with(|b| { + // First create a regular file + append_file(b, "original.txt", b"content"); + + // Then create a hard link to it + let mut header = tar::Header::new_gnu(); + header.set_mode(0o644); + header.set_entry_type(tar::EntryType::Link); + header.set_size(0); + b.append_link(&mut header, "hardlink.txt", "original.txt") + .unwrap(); + }); + + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + // Skip original file + let entry1 = parser.next_entry().unwrap().expect("should have entry"); + assert_eq!(entry1.path.as_ref(), b"original.txt"); + let size = entry1.size; + drop(entry1); + parser.skip_content(size).unwrap(); + + // Check hardlink + let entry2 = parser.next_entry().unwrap().expect("should have entry"); + assert_eq!(entry2.path.as_ref(), b"hardlink.txt"); + assert_eq!(entry2.entry_type, EntryType::Link); + assert!(entry2.is_hard_link()); + assert_eq!( + entry2.link_target.as_ref().unwrap().as_ref(), + b"original.txt" + ); + + assert!(parser.next_entry().unwrap().is_none()); +} + +// ============================================================================= +// GNU long name/link tests +// ============================================================================= + +#[test] +fn test_gnu_long_name() { + // Create a path that exceeds 100 bytes + let long_path = format!("very/long/path/{}", "x".repeat(120)); + + let data = create_tar_with(|b| { + append_file(b, &long_path, b"content"); + }); + + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + let entry = parser.next_entry().unwrap().expect("should have entry"); + assert_eq!(entry.path.as_ref(), long_path.as_bytes()); + assert_eq!(entry.entry_type, EntryType::Regular); + + let size = entry.size; + drop(entry); + parser.skip_content(size).unwrap(); + assert!(parser.next_entry().unwrap().is_none()); +} + +#[test] +fn test_gnu_long_link() { + let long_target = "t".repeat(120); + + let data = create_tar_with(|b| { + let mut header = tar::Header::new_gnu(); + header.set_mode(0o777); + header.set_entry_type(tar::EntryType::Symlink); + header.set_size(0); + b.append_link(&mut header, "link", &long_target).unwrap(); + }); + + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + let entry = parser.next_entry().unwrap().expect("should have entry"); + assert_eq!(entry.path.as_ref(), b"link"); + assert!(entry.is_symlink()); + assert_eq!( + entry.link_target.as_ref().unwrap().as_ref(), + long_target.as_bytes() + ); + + assert!(parser.next_entry().unwrap().is_none()); +} + +#[test] +fn test_gnu_long_name_and_link() { + let long_path = "p".repeat(120); + let long_target = "t".repeat(120); + + let data = create_tar_with(|b| { + let mut header = tar::Header::new_gnu(); + header.set_mode(0o777); + header.set_entry_type(tar::EntryType::Symlink); + header.set_size(0); + b.append_link(&mut header, &long_path, &long_target) + .unwrap(); + }); + + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + let entry = parser.next_entry().unwrap().expect("should have entry"); + assert_eq!(entry.path.as_ref(), long_path.as_bytes()); + assert_eq!( + entry.link_target.as_ref().unwrap().as_ref(), + long_target.as_bytes() + ); + + assert!(parser.next_entry().unwrap().is_none()); +} + +// ============================================================================= +// PAX extension tests +// ============================================================================= + +#[test] +fn test_pax_long_path() { + // Use tar crate's PAX builder for paths > 100 bytes + let long_path = format!("pax/path/{}", "y".repeat(200)); + + let data = create_tar_with(|b| { + let mut header = tar::Header::new_ustar(); + header.set_mode(0o644); + header.set_size(7); + header.set_entry_type(tar::EntryType::Regular); + // This will create a PAX header for the long path + b.append_data(&mut header, &long_path, b"content".as_slice()) + .unwrap(); + }); + + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + let entry = parser.next_entry().unwrap().expect("should have entry"); + assert_eq!(entry.path.as_ref(), long_path.as_bytes()); + + let size = entry.size; + drop(entry); + parser.skip_content(size).unwrap(); + assert!(parser.next_entry().unwrap().is_none()); +} + +// ============================================================================= +// Security limit tests +// ============================================================================= + +#[test] +fn test_path_too_long() { + let long_path = "x".repeat(200); + + let data = create_tar_with(|b| { + append_file(b, &long_path, b"content"); + }); + + let limits = Limits { + max_path_len: 100, + ..Default::default() + }; + let mut parser = TarStreamParser::new(Cursor::new(data), limits); + + let err = parser.next_entry().unwrap_err(); + assert!(matches!( + err, + StreamError::PathTooLong { + len: 200, + limit: 100 + } + )); +} + +#[test] +fn test_gnu_long_too_large() { + let long_path = "x".repeat(200); + + let data = create_tar_with(|b| { + append_file(b, &long_path, b"content"); + }); + + let limits = Limits { + max_gnu_long_size: 100, + ..Default::default() + }; + let mut parser = TarStreamParser::new(Cursor::new(data), limits); + + let err = parser.next_entry().unwrap_err(); + assert!(matches!(err, StreamError::GnuLongTooLarge { .. })); +} + +// ============================================================================= +// Cross-checking with tar crate +// ============================================================================= + +#[test] +fn test_crosscheck_simple() { + let data = create_tar_with(|b| { + append_file(b, "file1.txt", b"Hello"); + append_file(b, "file2.txt", b"World"); + }); + + // Parse with tar crate + let mut tar_archive = tar::Archive::new(Cursor::new(data.clone())); + let tar_entries: Vec<_> = tar_archive.entries().unwrap().collect(); + + // Parse with our crate + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + let mut our_entries = Vec::new(); + while let Some(entry) = parser.next_entry().unwrap() { + let info = ( + entry.path.to_vec(), + entry.size, + entry.mode, + entry.uid, + entry.gid, + entry.mtime, + ); + let size = entry.size; + drop(entry); + our_entries.push(info); + parser.skip_content(size).unwrap(); + } + + assert_eq!(tar_entries.len(), our_entries.len()); + + for (tar_entry, our_entry) in tar_entries.into_iter().zip(our_entries.into_iter()) { + let tar_entry = tar_entry.unwrap(); + let tar_header = tar_entry.header(); + + assert_eq!( + tar_header.path_bytes().as_ref(), + our_entry.0.as_slice(), + "path mismatch" + ); + assert_eq!(tar_header.size().unwrap(), our_entry.1, "size mismatch"); + assert_eq!(tar_header.mode().unwrap(), our_entry.2, "mode mismatch"); + assert_eq!(tar_header.uid().unwrap(), our_entry.3, "uid mismatch"); + assert_eq!(tar_header.gid().unwrap(), our_entry.4, "gid mismatch"); + assert_eq!(tar_header.mtime().unwrap(), our_entry.5, "mtime mismatch"); + } +} + +#[test] +fn test_crosscheck_gnu_long_names() { + let paths = vec![ + "short.txt".to_string(), + format!("medium/{}", "m".repeat(80)), + format!("long/{}", "l".repeat(150)), + ]; + + let data = create_tar_with(|b| { + for path in &paths { + append_file(b, path, b"content"); + } + }); + + // Parse with tar crate + let mut tar_archive = tar::Archive::new(Cursor::new(data.clone())); + let tar_paths: Vec<_> = tar_archive + .entries() + .unwrap() + .map(|e| e.unwrap().path().unwrap().to_path_buf()) + .collect(); + + // Parse with our crate + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + let mut our_paths = Vec::new(); + while let Some(entry) = parser.next_entry().unwrap() { + let path = String::from_utf8_lossy(&entry.path).to_string(); + let size = entry.size; + drop(entry); + our_paths.push(path); + parser.skip_content(size).unwrap(); + } + + assert_eq!(tar_paths.len(), our_paths.len()); + for (tar_path, our_path) in tar_paths.into_iter().zip(our_paths.into_iter()) { + assert_eq!(tar_path.to_string_lossy(), our_path); + } +} + +// ============================================================================= +// Edge cases +// ============================================================================= + +#[test] +fn test_empty_file() { + let data = create_tar_with(|b| { + append_file(b, "empty.txt", b""); + }); + + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + let entry = parser.next_entry().unwrap().expect("should have entry"); + assert_eq!(entry.path.as_ref(), b"empty.txt"); + assert_eq!(entry.size, 0); + + // No content to skip for empty file + assert!(parser.next_entry().unwrap().is_none()); +} + +#[test] +fn test_read_content() { + let data = create_tar_with(|b| { + append_file(b, "file.txt", b"Hello, World!"); + }); + + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + let entry = parser.next_entry().unwrap().expect("should have entry"); + assert_eq!(entry.size, 13); + let size = entry.size; + drop(entry); + + // Read the actual content + let mut content = vec![0u8; size as usize]; + std::io::Read::read_exact(parser.reader(), &mut content).unwrap(); + assert_eq!(content, b"Hello, World!"); + + // Skip padding + parser.skip_padding(size).unwrap(); + + assert!(parser.next_entry().unwrap().is_none()); +} + +#[test] +fn test_padded_size() { + let data = create_tar_with(|b| { + append_file(b, "file.txt", b"x"); // 1 byte, padded to 512 + }); + + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + let entry = parser.next_entry().unwrap().expect("should have entry"); + assert_eq!(entry.size, 1); + assert_eq!(entry.padded_size(), 512); + + let size = entry.size; + drop(entry); + parser.skip_content(size).unwrap(); + assert!(parser.next_entry().unwrap().is_none()); +} + +// ============================================================================= +// Proptest cross-checking +// ============================================================================= + +mod proptest_tests { + use super::*; + use proptest::prelude::*; + + /// Strategy for generating valid file paths. + fn path_strategy() -> impl Strategy { + proptest::string::string_regex("[a-zA-Z0-9_][a-zA-Z0-9_.+-]{0,50}") + .expect("valid regex") + .prop_filter("non-empty", |s| !s.is_empty()) + } + + /// Strategy for file content. + fn content_strategy() -> impl Strategy> { + prop::collection::vec(any::(), 0..1024) + } + + proptest! { + #![proptest_config(ProptestConfig::with_cases(64))] + + #[test] + fn test_roundtrip_single_file(path in path_strategy(), content in content_strategy()) { + let data = create_tar_with(|b| { + append_file(b, &path, &content); + }); + + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + + let entry = parser.next_entry().unwrap().expect("should have entry"); + prop_assert_eq!(entry.path.as_ref(), path.as_bytes()); + prop_assert_eq!(entry.size, content.len() as u64); + let size = entry.size; + drop(entry); + + // Read content and verify + let mut read_content = vec![0u8; size as usize]; + if size > 0 { + std::io::Read::read_exact(parser.reader(), &mut read_content).unwrap(); + parser.skip_padding(size).unwrap(); + } + prop_assert_eq!(read_content, content); + + prop_assert!(parser.next_entry().unwrap().is_none()); + } + + #[test] + fn test_roundtrip_multiple_files( + paths in prop::collection::vec(path_strategy(), 1..8) + ) { + let data = create_tar_with(|b| { + for (i, path) in paths.iter().enumerate() { + let content = format!("content{}", i); + append_file(b, path, content.as_bytes()); + } + }); + + // Parse with tar crate + let mut tar_archive = tar::Archive::new(Cursor::new(data.clone())); + let tar_count = tar_archive.entries().unwrap().count(); + + // Parse with our crate + let mut parser = TarStreamParser::new(Cursor::new(data), Limits::default()); + let mut our_count = 0; + while let Some(entry) = parser.next_entry().unwrap() { + our_count += 1; + let size = entry.size; + drop(entry); + parser.skip_content(size).unwrap(); + } + + prop_assert_eq!(tar_count, our_count); + prop_assert_eq!(our_count, paths.len()); + } + } +} From a85a19b4232b61ea45816d1498889a4bfcb9e803 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 29 Jan 2026 11:46:40 -0500 Subject: [PATCH 03/13] oci: Use tar-header crate for header parsing Migrate composefs-oci to use the shared tar-header crate instead of the tar crate for header parsing. The tar crate is still used for PaxExtensions (PAX header parsing) and Builder (test utilities). This eliminates duplicate tar header parsing logic and uses zerocopy for safe, zero-copy access to header fields. Assisted-by: OpenCode (Opus 4.5) Signed-off-by: Colin Walters --- crates/composefs-oci/Cargo.toml | 4 +- crates/composefs-oci/src/image.rs | 14 +- crates/composefs-oci/src/tar.rs | 393 ++++++++++++++++++++++++++++-- 3 files changed, 391 insertions(+), 20 deletions(-) diff --git a/crates/composefs-oci/Cargo.toml b/crates/composefs-oci/Cargo.toml index 4caf0455..89929f3a 100644 --- a/crates/composefs-oci/Cargo.toml +++ b/crates/composefs-oci/Cargo.toml @@ -22,13 +22,15 @@ oci-spec = { version = "0.8.0", default-features = false } rustix = { version = "1.0.0", features = ["fs"] } sha2 = { version = "0.10.1", default-features = false } tar = { version = "0.4.38", default-features = false } +tar-header = { path = "../tar-header" } tokio = { version = "1.24.2", features = ["rt-multi-thread"] } tokio-util = { version = "0.7", default-features = false, features = ["io"] } [dev-dependencies] -similar-asserts = "1.7.0" composefs = { workspace = true, features = ["test"] } once_cell = "1.21.3" +proptest = "1" +similar-asserts = "1.7.0" tempfile = "3.8.0" [lints] diff --git a/crates/composefs-oci/src/image.rs b/crates/composefs-oci/src/image.rs index cc1ce72e..c98ec7de 100644 --- a/crates/composefs-oci/src/image.rs +++ b/crates/composefs-oci/src/image.rs @@ -53,8 +53,18 @@ pub fn process_entry( content, })), TarItem::Hardlink(target) => { - let (dir, filename) = filesystem.root.split(&target)?; - Inode::Leaf(dir.ref_leaf(filename)?) + let (dir, filename) = filesystem.root.split(&target).with_context(|| { + format!( + "Looking up hardlink target directory for {:?} -> {:?}", + entry.path, target + ) + })?; + Inode::Leaf(dir.ref_leaf(filename).with_context(|| { + format!( + "Looking up hardlink target {:?} for {:?}", + target, entry.path + ) + })?) } }; diff --git a/crates/composefs-oci/src/tar.rs b/crates/composefs-oci/src/tar.rs index f6fd645a..c3d15a89 100644 --- a/crates/composefs-oci/src/tar.rs +++ b/crates/composefs-oci/src/tar.rs @@ -24,7 +24,8 @@ use std::{ use anyhow::{bail, ensure, Result}; use bytes::Bytes; use rustix::fs::makedev; -use tar::{EntryType, Header, PaxExtensions}; +use tar::PaxExtensions; +use tar_header::{EntryType, Header}; use tokio::{ io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt}, sync::mpsc, @@ -260,15 +261,26 @@ impl fmt::Display for TarEntry { } } -fn path_from_tar(pax: Option>, gnu: Vec, short: &[u8]) -> PathBuf { +/// Build a file path from tar metadata (PAX > GNU > header name + UStar prefix). +fn path_from_tar(pax: Option>, gnu: Vec, header: &Header) -> PathBuf { // Prepend leading / let mut path = vec![b'/']; + if let Some(name) = pax { + // PAX extended header has highest priority path.extend(name); } else if !gnu.is_empty() { + // GNU long name has second priority path.extend(gnu); } else { - path.extend(short); + // Standard header - check for UStar prefix field + if let Some(prefix) = header.prefix() { + if !prefix.is_empty() { + path.extend(prefix); + path.push(b'/'); + } + } + path.extend(header.path_bytes()); } // Drop trailing '/' characters in case of directories. @@ -277,6 +289,25 @@ fn path_from_tar(pax: Option>, gnu: Vec, short: &[u8]) -> PathBuf PathBuf::from(OsString::from_vec(path)) } +/// Build a link target path from tar metadata (PAX > GNU > header link_name). +/// Link targets don't use the UStar prefix field - they use the linkname field directly. +fn link_target_from_tar(pax: Option>, gnu: Vec, short: &[u8]) -> PathBuf { + // Prepend leading / + let mut path = vec![b'/']; + if let Some(name) = pax { + path.extend(name); + } else if !gnu.is_empty() { + path.extend(gnu); + } else { + path.extend(short); + } + + // Drop trailing '/' characters. + path.pop_if(|x| x == &b'/'); + + PathBuf::from(OsString::from_vec(path)) +} + fn symlink_target_from_tar(pax: Option>, gnu: Vec, short: &[u8]) -> Box { if let Some(name) = pax { OsStr::from_bytes(name.as_ref()).into() @@ -315,7 +346,7 @@ pub fn get_entry( return Ok(None); } - let header = tar::Header::from_byte_slice(&buf); + let header = Header::from_bytes_exact(&buf); let size = header.entry_size()?; let stored_size = size.next_multiple_of(512); @@ -332,19 +363,22 @@ pub fn get_entry( _ => bail!("Unsupported external-chunked entry {header:?} {id:?}"), }, SplitStreamData::Inline(content) => match header.entry_type() { - EntryType::GNULongLink => { + EntryType::GnuLongLink => { gnu_longlink.extend(content); gnu_longlink.pop_if(|x| *x == b'\0'); continue; } - EntryType::GNULongName => { + EntryType::GnuLongName => { gnu_longname.extend(content); gnu_longname.pop_if(|x| *x == b'\0'); continue; } EntryType::XGlobalHeader => { - todo!(); + // Global PAX headers affect all subsequent entries. + // For simplicity, we skip them (matching tar-rs and TarStreamParser behavior). + // A more complete implementation would merge them into parser state. + continue; } EntryType::XHeader => { for item in PaxExtensions::new(&content) { @@ -372,16 +406,12 @@ pub fn get_entry( TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(content))) } EntryType::Link => TarItem::Hardlink({ - let Some(link_name) = header.link_name_bytes() else { - bail!("link without a name?") - }; - OsString::from(path_from_tar(pax_longlink, gnu_longlink, &link_name)) + let link_name = header.link_name_bytes(); + link_target_from_tar(pax_longlink, gnu_longlink, link_name).into_os_string() }), EntryType::Symlink => TarItem::Leaf(LeafContent::Symlink({ - let Some(link_name) = header.link_name_bytes() else { - bail!("symlink without a name?") - }; - symlink_target_from_tar(pax_longlink, gnu_longlink, &link_name) + let link_name = header.link_name_bytes(); + symlink_target_from_tar(pax_longlink, gnu_longlink, link_name) })), EntryType::Block => TarItem::Leaf(LeafContent::BlockDevice( match (header.device_major()?, header.device_minor()?) { @@ -403,7 +433,7 @@ pub fn get_entry( }; return Ok(Some(TarEntry { - path: path_from_tar(pax_longname, gnu_longname, &header.path_bytes()), + path: path_from_tar(pax_longname, gnu_longname, header), stat: Stat { st_uid: header.uid()? as u32, st_gid: header.gid()? as u32, @@ -784,7 +814,7 @@ mod tests { let mut builder = Builder::new(&mut tar_data); let mut header = tar::Header::new_gnu(); header.set_mode(0o777); - header.set_entry_type(EntryType::Symlink); + header.set_entry_type(tar::EntryType::Symlink); header.set_size(0); header.set_uid(0); header.set_gid(0); @@ -1050,4 +1080,333 @@ mod tests { } } } + + // ========================================================================== + // Long path format tests using proptest + // ========================================================================== + // + // Tar archives use different mechanisms for paths > 100 characters: + // - GNU LongName: type 'L' entry before actual entry (used by tar crate with new_gnu()) + // - UStar prefix: 155-byte prefix field + 100-byte name field (max ~255 bytes) + // - PAX extended: type 'x' entry with key=value pairs (unlimited length) + + /// Table-driven test for specific path length edge cases and format triggers. + #[test] + fn test_longpath_formats() { + // (description, path generator, use_gnu_header) + // The tar crate auto-selects format based on path length and header type + let cases: &[(&str, fn() -> String, bool)] = &[ + // Basic name field (≤100 chars) + ("short path", || "short.txt".to_string(), false), + ("exactly 100 chars", || "x".repeat(100), false), + // UStar prefix (101-255 chars with /) + ( + "ustar prefix", + || format!("{}/{}", "dir".repeat(40), "file.txt"), + false, + ), + ( + "max ustar (~254 chars)", + || format!("{}/{}", "p".repeat(154), "n".repeat(99)), + false, + ), + // GNU LongName (>100 chars with gnu header) + ( + "gnu longname", + || format!("{}/{}", "a".repeat(80), "b".repeat(50)), + true, + ), + // PAX (>255 chars, any header) + ( + "pax extended", + || format!("{}/{}", "sub/".repeat(60), "file.txt"), + false, + ), + ]; + + for (desc, make_path, use_gnu) in cases { + let path = make_path(); + let content = b"test content"; + + let mut tar_data = Vec::new(); + { + let mut builder = Builder::new(&mut tar_data); + let mut header = if *use_gnu { + tar::Header::new_gnu() + } else { + tar::Header::new_ustar() + }; + header.set_mode(0o644); + header.set_uid(1000); + header.set_gid(1000); + header.set_mtime(1234567890); + header.set_size(content.len() as u64); + header.set_entry_type(tar::EntryType::Regular); + builder + .append_data(&mut header, &path, &content[..]) + .unwrap(); + builder.finish().unwrap(); + } + + let entries = read_all_via_splitstream(tar_data).unwrap(); + assert_eq!(entries.len(), 1, "{desc}: expected 1 entry"); + assert_eq!( + entries[0].path, + PathBuf::from(format!("/{}", path)), + "{desc}: path mismatch (len={})", + path.len() + ); + } + } + + /// Table-driven test for hardlinks with long targets. + #[test] + fn test_longpath_hardlinks() { + let cases: &[(&str, fn() -> String, bool)] = &[ + ("short target", || "target.txt".to_string(), true), + ( + "gnu longlink", + || format!("{}/{}", "c".repeat(80), "d".repeat(50)), + true, + ), + ( + "pax linkpath", + || format!("{}/{}", "sub/".repeat(60), "target.txt"), + false, + ), + ]; + + for (desc, make_target, use_gnu) in cases { + let target_path = make_target(); + let link_name = "hardlink"; + let content = b"target content"; + + let mut tar_data = Vec::new(); + { + let mut builder = Builder::new(&mut tar_data); + + // Create target file + let mut header = if *use_gnu { + tar::Header::new_gnu() + } else { + tar::Header::new_ustar() + }; + header.set_mode(0o644); + header.set_uid(1000); + header.set_gid(1000); + header.set_mtime(1234567890); + header.set_size(content.len() as u64); + header.set_entry_type(tar::EntryType::Regular); + builder + .append_data(&mut header, &target_path, &content[..]) + .unwrap(); + + // Create hardlink + let mut link_header = if *use_gnu { + tar::Header::new_gnu() + } else { + tar::Header::new_ustar() + }; + link_header.set_mode(0o644); + link_header.set_uid(1000); + link_header.set_gid(1000); + link_header.set_mtime(1234567890); + link_header.set_size(0); + link_header.set_entry_type(tar::EntryType::Link); + builder + .append_link(&mut link_header, link_name, &target_path) + .unwrap(); + + builder.finish().unwrap(); + } + + let entries = read_all_via_splitstream(tar_data).unwrap(); + assert_eq!(entries.len(), 2, "{desc}: expected 2 entries"); + assert_eq!( + entries[0].path, + PathBuf::from(format!("/{}", target_path)), + "{desc}" + ); + assert_eq!( + entries[1].path, + PathBuf::from(format!("/{}", link_name)), + "{desc}" + ); + + match &entries[1].item { + TarItem::Hardlink(target) => { + assert_eq!( + target.to_str().unwrap(), + format!("/{}", target_path), + "{desc}: hardlink target mismatch" + ); + } + _ => panic!("{desc}: expected hardlink entry"), + } + } + } + + /// Verify UStar prefix field is actually used for paths > 100 chars. + #[test] + fn test_ustar_prefix_field_used() { + // Path must be > 100 chars to trigger prefix usage, but filename must be <= 100 chars + let dir_path = + "usr/lib/python3.12/site-packages/some-very-long-package-name-here/__pycache__/subdir"; + let filename = "module_name_with_extra_stuff.cpython-312.opt-2.pyc"; + let full_path = format!("{dir_path}/{filename}"); + + // Verify our test setup: full path > 100 chars, filename <= 100 chars + assert!( + full_path.len() > 100, + "full path must exceed 100 chars to use prefix" + ); + assert!(filename.len() <= 100, "filename must fit in name field"); + + let mut tar_data = Vec::new(); + { + let mut builder = Builder::new(&mut tar_data); + let mut header = tar::Header::new_ustar(); + header.set_mode(0o644); + header.set_size(4); + header.set_entry_type(tar::EntryType::Regular); + header.set_path(&full_path).unwrap(); + header.set_cksum(); + builder.append(&header, b"test".as_slice()).unwrap(); + builder.finish().unwrap(); + } + + // Verify prefix field (bytes 345-500) is populated + let prefix_field = &tar_data[345..500]; + let prefix_str = std::str::from_utf8(prefix_field) + .unwrap() + .trim_end_matches('\0'); + assert_eq!( + prefix_str, dir_path, + "UStar prefix field should contain directory" + ); + + let entries = read_all_via_splitstream(tar_data).unwrap(); + assert_eq!(entries[0].path, PathBuf::from(format!("/{full_path}"))); + } + + /// Property-based tests for tar path handling. + mod proptest_tests { + use super::*; + use proptest::prelude::*; + + /// Strategy for generating valid path components. + fn path_component() -> impl Strategy { + proptest::string::string_regex("[a-zA-Z0-9_][a-zA-Z0-9_.-]{0,30}") + .expect("valid regex") + .prop_filter("non-empty", |s| !s.is_empty()) + } + + /// Strategy for generating paths with a target total length. + fn path_with_length(min_len: usize, max_len: usize) -> impl Strategy { + prop::collection::vec(path_component(), 1..20) + .prop_map(|components| components.join("/")) + .prop_filter("length in range", move |p| { + p.len() >= min_len && p.len() <= max_len + }) + } + + /// Create a tar archive with a single file and verify round-trip. + fn roundtrip_path(path: &str) { + let content = b"proptest content"; + + let mut tar_data = Vec::new(); + { + let mut builder = Builder::new(&mut tar_data); + let mut header = tar::Header::new_ustar(); + header.set_mode(0o644); + header.set_uid(1000); + header.set_gid(1000); + header.set_mtime(1234567890); + header.set_size(content.len() as u64); + header.set_entry_type(tar::EntryType::Regular); + builder + .append_data(&mut header, path, &content[..]) + .unwrap(); + builder.finish().unwrap(); + } + + let entries = read_all_via_splitstream(tar_data).unwrap(); + assert_eq!(entries.len(), 1, "expected 1 entry for path: {path}"); + assert_eq!( + entries[0].path, + PathBuf::from(format!("/{path}")), + "path mismatch" + ); + } + + /// Create a tar archive with a hardlink and verify round-trip. + fn roundtrip_hardlink(target_path: &str) { + let link_name = "link"; + let content = b"target content"; + + let mut tar_data = Vec::new(); + { + let mut builder = Builder::new(&mut tar_data); + + let mut header = tar::Header::new_ustar(); + header.set_mode(0o644); + header.set_uid(1000); + header.set_gid(1000); + header.set_mtime(1234567890); + header.set_size(content.len() as u64); + header.set_entry_type(tar::EntryType::Regular); + builder + .append_data(&mut header, target_path, &content[..]) + .unwrap(); + + let mut link_header = tar::Header::new_ustar(); + link_header.set_mode(0o644); + link_header.set_uid(1000); + link_header.set_gid(1000); + link_header.set_mtime(1234567890); + link_header.set_size(0); + link_header.set_entry_type(tar::EntryType::Link); + builder + .append_link(&mut link_header, link_name, target_path) + .unwrap(); + + builder.finish().unwrap(); + } + + let entries = read_all_via_splitstream(tar_data).unwrap(); + assert_eq!(entries.len(), 2); + assert_eq!(entries[0].path, PathBuf::from(format!("/{target_path}"))); + + match &entries[1].item { + TarItem::Hardlink(target) => { + assert_eq!(target.to_str().unwrap(), format!("/{target_path}")); + } + _ => panic!("expected hardlink"), + } + } + + proptest! { + #![proptest_config(ProptestConfig::with_cases(64))] + + #[test] + fn test_short_paths(path in path_with_length(1, 100)) { + roundtrip_path(&path); + } + + #[test] + fn test_medium_paths(path in path_with_length(101, 255)) { + roundtrip_path(&path); + } + + #[test] + fn test_long_paths(path in path_with_length(256, 500)) { + roundtrip_path(&path); + } + + #[test] + fn test_hardlink_targets(target in path_with_length(1, 400)) { + roundtrip_hardlink(&target); + } + } + } } From f992ce39dad538eaa17ec20396dd652a97add0d4 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 27 Jan 2026 11:56:01 -0500 Subject: [PATCH 04/13] Justfile: Add containers-storage build targets Add test-all and build-cstorage targets for building and testing with the containers-storage feature enabled. Assisted-by: OpenCode (Opus 4.5) Signed-off-by: Colin Walters --- Justfile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Justfile b/Justfile index 151576c8..a32ca799 100644 --- a/Justfile +++ b/Justfile @@ -31,6 +31,14 @@ fmt: # Run all checks (clippy + fmt + test) check: clippy fmt-check test +# Run all tests with all features enabled +test-all: + cargo test --workspace --all-features + +# Build with containers-storage feature +build-cstorage: + cargo build --workspace --features containers-storage + # Clean build artifacts clean: cargo clean From 51d827c2c749840740d8cd2c4e997876185c3dfc Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 29 Jan 2026 10:24:40 -0500 Subject: [PATCH 05/13] Add splitfdstream crate for binary streaming format Add a new crate implementing the splitfdstream binary format, which enables streaming tar archives with external file references. The format uses an 8-byte LE signed prefix followed by optional data: - Negative prefix: inline data of size |prefix| - Non-negative prefix: external file descriptor reference by index Key types: - SplitfdstreamWriter: Build splitfdstreams with inline/external chunks - SplitfdstreamReader: Parse splitfdstreams back into chunks - SplitfdstreamTarReader: Read adapter that reconstructs byte streams This enables zero-copy tar reconstruction from containers-storage's tar-split metadata by streaming headers inline while referencing file content via fd. Assisted-by: OpenCode (Opus 4.5) Signed-off-by: Colin Walters --- crates/splitfdstream/Cargo.toml | 21 + crates/splitfdstream/src/lib.rs | 1346 +++++++++++++++++++++++++++++++ 2 files changed, 1367 insertions(+) create mode 100644 crates/splitfdstream/Cargo.toml create mode 100644 crates/splitfdstream/src/lib.rs diff --git a/crates/splitfdstream/Cargo.toml b/crates/splitfdstream/Cargo.toml new file mode 100644 index 00000000..d577dc17 --- /dev/null +++ b/crates/splitfdstream/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "splitfdstream" +description = "Binary format for serializing data with external file descriptor references" +keywords = ["splitfdstream", "fd", "serialization"] + +edition.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[dependencies] +rustix = { version = "1.0.0", default-features = false, features = ["fs", "std"] } + +[dev-dependencies] +proptest = "1" +tempfile = { version = "3.8.0", default-features = false } + +[lints] +workspace = true diff --git a/crates/splitfdstream/src/lib.rs b/crates/splitfdstream/src/lib.rs new file mode 100644 index 00000000..9180b49d --- /dev/null +++ b/crates/splitfdstream/src/lib.rs @@ -0,0 +1,1346 @@ +//! Split file descriptor stream format for serializing binary data with external chunks. +//! +//! This module implements a binary format for representing serialized binary files +//! (tar archives, zip files, filesystem images, etc.) where data chunks can be stored +//! externally as file descriptors rather than inline in the stream. +//! +//! # Format Overview +//! +//! A splitfdstream is a sequential stream of chunks. Each chunk begins with a signed +//! 64-bit little-endian prefix that determines the chunk type: +//! +//! | Prefix Value | Meaning | +//! |--------------|---------| +//! | `< 0` | **Inline**: The next `abs(prefix)` bytes are literal data | +//! | `>= 0` | **External**: Content comes from `fd[prefix + 1]` | +//! +//! # Use Cases +//! +//! The splitfdstream format is designed for scenarios where: +//! +//! - Large binary files need to be transferred with some data stored externally +//! - File descriptors can be passed alongside the stream (e.g., via Unix sockets) +//! - Deduplication is desired by referencing the same external fd multiple times +//! - Zero-copy operations are possible by referencing files directly +//! +//! # Example +//! +//! ``` +//! use splitfdstream::{SplitfdstreamWriter, SplitfdstreamReader, Chunk}; +//! +//! // Write a stream with mixed inline and external chunks +//! let mut buffer = Vec::new(); +//! let mut writer = SplitfdstreamWriter::new(&mut buffer); +//! writer.write_inline(b"inline data").unwrap(); +//! writer.write_external(0).unwrap(); // Reference fd[1] +//! writer.write_inline(b"more inline").unwrap(); +//! writer.finish().unwrap(); +//! +//! // Read the stream back +//! let mut reader = SplitfdstreamReader::new(buffer.as_slice()); +//! while let Some(chunk) = reader.next_chunk().unwrap() { +//! match chunk { +//! Chunk::Inline(data) => println!("Inline: {} bytes", data.len()), +//! Chunk::External(fd_index) => println!("External: fd[{}]", fd_index + 1), +//! } +//! } +//! ``` +//! +//! # Wire Format Details +//! +//! The stream consists of a sequence of chunks with no framing header or footer. +//! Each chunk is: +//! +//! 1. An 8-byte signed little-endian integer (the prefix) +//! 2. For inline chunks only: `abs(prefix)` bytes of literal data +//! +//! External chunks have no additional data after the prefix; the content is +//! retrieved from the file descriptor array passed alongside the stream. + +use std::io::{self, Read, Write}; +use std::os::fd::AsFd; + +/// Maximum size for an inline chunk (256 MB). +/// +/// This limit prevents denial-of-service attacks where a malicious stream +/// could specify an extremely large inline chunk size, causing unbounded +/// memory allocation. +pub const MAX_INLINE_CHUNK_SIZE: usize = 256 * 1024 * 1024; + +/// A chunk read from a splitfdstream. +/// +/// Chunks are either inline data embedded in the stream, or references to +/// external file descriptors that should be read separately. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Chunk<'a> { + /// Inline data embedded directly in the stream. + Inline(&'a [u8]), + + /// Reference to an external file descriptor. + /// + /// The value is the fd index (0-based), meaning the actual fd is at + /// position `fd_index + 1` in the fd array (fd\[0\] is typically the + /// stream itself). + External(u32), +} + +/// Writer for building a splitfdstream. +/// +/// The writer encodes inline data and external fd references into the +/// splitfdstream binary format. +/// +/// # Example +/// +/// ``` +/// use splitfdstream::SplitfdstreamWriter; +/// +/// let mut buffer = Vec::new(); +/// let mut writer = SplitfdstreamWriter::new(&mut buffer); +/// +/// // Write some inline data +/// writer.write_inline(b"Hello, world!").unwrap(); +/// +/// // Reference external fd at index 0 (fd[1]) +/// writer.write_external(0).unwrap(); +/// +/// // Finish and get the underlying writer back +/// let buffer = writer.finish().unwrap(); +/// ``` +#[derive(Debug)] +pub struct SplitfdstreamWriter { + writer: W, +} + +impl SplitfdstreamWriter { + /// Create a new splitfdstream writer wrapping the given writer. + pub fn new(writer: W) -> Self { + Self { writer } + } + + /// Write inline data to the stream. + /// + /// The data is prefixed with a negative i64 indicating the length, + /// followed by the literal bytes. + /// + /// # Errors + /// + /// Returns an error if writing to the underlying writer fails. + pub fn write_inline(&mut self, data: &[u8]) -> io::Result<()> { + if data.is_empty() { + return Ok(()); + } + + // Prefix is negative length + let len = data.len() as i64; + let prefix = -len; + self.writer.write_all(&prefix.to_le_bytes())?; + self.writer.write_all(data)?; + Ok(()) + } + + /// Write an external fd reference to the stream. + /// + /// The fd_index is the 0-based index into the fd array. The actual + /// file descriptor is at position `fd_index + 1` (since fd\[0\] is + /// typically the stream itself). + /// + /// # Errors + /// + /// Returns an error if writing to the underlying writer fails. + pub fn write_external(&mut self, fd_index: u32) -> io::Result<()> { + // Prefix is fd_index (non-negative), actual fd is at fd_index + 1 + let prefix = fd_index as i64; + self.writer.write_all(&prefix.to_le_bytes())?; + Ok(()) + } + + /// Finish writing and return the underlying writer. + /// + /// This consumes the writer and returns the underlying `Write` impl. + pub fn finish(self) -> io::Result { + Ok(self.writer) + } +} + +/// Reader for parsing a splitfdstream. +/// +/// The reader parses the binary format and yields chunks that are either +/// inline data or references to external file descriptors. +/// +/// # Example +/// +/// ``` +/// use splitfdstream::{SplitfdstreamReader, Chunk}; +/// +/// let data = vec![ +/// // Inline chunk: prefix = -5, then 5 bytes +/// 0xfb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -5 as i64 LE +/// b'h', b'e', b'l', b'l', b'o', +/// ]; +/// +/// let mut reader = SplitfdstreamReader::new(data.as_slice()); +/// let chunk = reader.next_chunk().unwrap().unwrap(); +/// assert_eq!(chunk, Chunk::Inline(b"hello")); +/// ``` +#[derive(Debug)] +pub struct SplitfdstreamReader { + reader: R, + /// Buffer for reading inline data + buffer: Vec, +} + +impl SplitfdstreamReader { + /// Create a new splitfdstream reader wrapping the given reader. + pub fn new(reader: R) -> Self { + Self { + reader, + buffer: Vec::new(), + } + } + + /// Consume this reader, returning the underlying reader. + pub fn into_inner(self) -> R { + self.reader + } + + /// Read the next chunk from the stream. + /// + /// Returns `Ok(None)` when the stream is exhausted. + /// + /// # Errors + /// + /// Returns an error if: + /// - Reading from the underlying reader fails + /// - The stream contains invalid data (e.g., inline size exceeds maximum) + pub fn next_chunk(&mut self) -> io::Result>> { + // Read the 8-byte prefix + let mut prefix_bytes = [0u8; 8]; + match self.reader.read_exact(&mut prefix_bytes) { + Ok(()) => {} + Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None), + Err(e) => return Err(e), + } + + let prefix = i64::from_le_bytes(prefix_bytes); + + if prefix < 0 { + // Inline chunk: read abs(prefix) bytes + let len = (-prefix) as usize; + if len > MAX_INLINE_CHUNK_SIZE { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "inline chunk size {} exceeds maximum allowed size {}", + len, MAX_INLINE_CHUNK_SIZE + ), + )); + } + self.buffer.clear(); + self.buffer.resize(len, 0); + self.reader.read_exact(&mut self.buffer)?; + Ok(Some(Chunk::Inline(&self.buffer))) + } else { + // External chunk: prefix is the fd index + Ok(Some(Chunk::External(prefix as u32))) + } + } +} + +/// A helper that reads a file from offset 0 using positional reads. +/// +/// This allows reading the same file multiple times without seeking, +/// since each read specifies its position explicitly. +#[derive(Debug)] +struct ReadAtReader<'a, F> { + file: &'a F, + offset: u64, +} + +impl<'a, F: AsFd> ReadAtReader<'a, F> { + fn new(file: &'a F) -> Self { + Self { file, offset: 0 } + } +} + +impl Read for ReadAtReader<'_, F> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let n = rustix::io::pread(self.file, buf, self.offset)?; + self.offset += n as u64; + Ok(n) + } +} + +/// A `Read` adapter that reconstructs a byte stream from a splitfdstream. +/// +/// This struct implements `Read` by combining inline chunks and external file +/// descriptor content into a contiguous byte stream. It can be used with +/// `tar::Archive` to parse tar entries from a splitfdstream. +/// +/// External files are read using positional read (pread/read_at), so the +/// same file can be referenced multiple times in the splitfdstream without +/// needing to reopen or seek it. +/// +/// # Example +/// +/// ```no_run +/// use std::fs::File; +/// use splitfdstream::SplitfdstreamTarReader; +/// +/// let stream_data: &[u8] = &[/* splitfdstream bytes */]; +/// let files: Vec = vec![/* external files */]; +/// +/// let mut reader = SplitfdstreamTarReader::new(stream_data, &files); +/// // Use with tar::Archive or any Read consumer +/// ``` +#[derive(Debug)] +pub struct SplitfdstreamTarReader<'files, R: Read> { + reader: SplitfdstreamReader, + files: &'files [std::fs::File], + /// Buffer for inline data (partially consumed) + inline_buffer: Vec, + /// Position within inline_buffer + inline_pos: usize, + /// Current external file being read (if any) + current_external: Option>, +} + +impl<'files, R: Read> SplitfdstreamTarReader<'files, R> { + /// Create a new tar reader from a splitfdstream and files. + /// + /// The `files` slice provides the external files referenced by the + /// splitfdstream. Each external chunk at index N reads from `files[N]`. + pub fn new(splitfdstream: R, files: &'files [std::fs::File]) -> Self { + Self { + reader: SplitfdstreamReader::new(splitfdstream), + files, + inline_buffer: Vec::new(), + inline_pos: 0, + current_external: None, + } + } +} + +impl Read for SplitfdstreamTarReader<'_, R> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + // First, drain any buffered inline data + if self.inline_pos < self.inline_buffer.len() { + let remaining = &self.inline_buffer[self.inline_pos..]; + let n = buf.len().min(remaining.len()); + buf[..n].copy_from_slice(&remaining[..n]); + self.inline_pos += n; + return Ok(n); + } + + // Next, drain current external file if any + if let Some(ref mut ext) = self.current_external { + let n = ext.read(buf)?; + if n > 0 { + return Ok(n); + } + // External exhausted, move to next chunk + self.current_external = None; + } + + // Get next chunk from splitfdstream + match self.reader.next_chunk()? { + None => Ok(0), // EOF + Some(Chunk::Inline(data)) => { + let n = buf.len().min(data.len()); + buf[..n].copy_from_slice(&data[..n]); + if n < data.len() { + // Buffer remaining data for next read + self.inline_buffer.clear(); + self.inline_buffer.extend_from_slice(&data[n..]); + self.inline_pos = 0; + } + Ok(n) + } + Some(Chunk::External(idx)) => { + let idx = idx as usize; + if idx >= self.files.len() { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "external chunk references fd index {} but only {} files provided", + idx, + self.files.len() + ), + )); + } + self.current_external = Some(ReadAtReader::new(&self.files[idx])); + // Recurse to read from the new external + self.read(buf) + } + } + } +} + +/// Reconstruct a stream from splitfdstream + file descriptors. +/// +/// This function reads a splitfdstream and writes the reconstructed data to `output`. +/// Inline chunks are written directly, while external chunks are read from the +/// corresponding file descriptors in `files`. +/// +/// # Arguments +/// +/// * `splitfdstream` - A reader providing the splitfdstream data +/// * `files` - Array of files for external chunks +/// * `output` - Writer to receive the reconstructed stream +/// +/// # Returns +/// +/// The total number of bytes written to `output`. +/// +/// # Errors +/// +/// Returns an error if: +/// * Reading from the splitfdstream fails +/// * An external chunk references a file index outside the bounds of `files` +/// * Reading from an external file fails +/// * Writing to the output fails +pub fn reconstruct( + splitfdstream: R, + files: &[std::fs::File], + output: &mut W, +) -> io::Result +where + R: Read, + W: Write, +{ + let mut reader = SplitfdstreamReader::new(splitfdstream); + let mut bytes_written = 0u64; + + while let Some(chunk) = reader.next_chunk()? { + match chunk { + Chunk::Inline(data) => { + output.write_all(data)?; + bytes_written += data.len() as u64; + } + Chunk::External(idx) => { + let file = files.get(idx as usize).ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidData, + format!( + "external chunk references fd index {} but only {} files provided", + idx, + files.len() + ), + ) + })?; + let mut ext_reader = ReadAtReader::new(file); + let copied = io::copy(&mut ext_reader, output)?; + bytes_written += copied; + } + } + } + + Ok(bytes_written) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Helper to write and read back chunks, verifying round-trip. + fn roundtrip_chunks( + inline_chunks: &[&[u8]], + external_indices: &[u32], + interleave: bool, + ) -> Vec<(bool, Vec, u32)> { + let mut buffer = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut buffer); + + if interleave { + let max_len = inline_chunks.len().max(external_indices.len()); + for i in 0..max_len { + if i < inline_chunks.len() { + writer.write_inline(inline_chunks[i]).unwrap(); + } + if i < external_indices.len() { + writer.write_external(external_indices[i]).unwrap(); + } + } + } else { + for chunk in inline_chunks { + writer.write_inline(chunk).unwrap(); + } + for &idx in external_indices { + writer.write_external(idx).unwrap(); + } + } + + writer.finish().unwrap(); + } + + // Read back + let mut reader = SplitfdstreamReader::new(buffer.as_slice()); + let mut results = Vec::new(); + + while let Some(chunk) = reader.next_chunk().unwrap() { + match chunk { + Chunk::Inline(data) => { + results.push((true, data.to_vec(), 0)); + } + Chunk::External(idx) => { + results.push((false, Vec::new(), idx)); + } + } + } + + results + } + + #[test] + fn test_empty_stream() { + let buffer: Vec = Vec::new(); + let mut reader = SplitfdstreamReader::new(buffer.as_slice()); + assert!(reader.next_chunk().unwrap().is_none()); + } + + #[test] + fn test_only_inline_chunks() { + let chunks: &[&[u8]] = &[b"hello", b"world", b"test"]; + let results = roundtrip_chunks(chunks, &[], false); + + assert_eq!(results.len(), 3); + assert!(results[0].0); // is_inline + assert_eq!(results[0].1, b"hello"); + assert!(results[1].0); + assert_eq!(results[1].1, b"world"); + assert!(results[2].0); + assert_eq!(results[2].1, b"test"); + } + + #[test] + fn test_only_external_chunks() { + let results = roundtrip_chunks(&[], &[0, 5, 42, 100], false); + + assert_eq!(results.len(), 4); + assert!(!results[0].0); // is_external + assert_eq!(results[0].2, 0); + assert!(!results[1].0); + assert_eq!(results[1].2, 5); + assert!(!results[2].0); + assert_eq!(results[2].2, 42); + assert!(!results[3].0); + assert_eq!(results[3].2, 100); + } + + #[test] + fn test_mixed_inline_external() { + let inline: &[&[u8]] = &[b"header", b"middle", b"footer"]; + let external: &[u32] = &[0, 1, 2]; + let results = roundtrip_chunks(inline, external, true); + + // Interleaved: inline0, ext0, inline1, ext1, inline2, ext2 + assert_eq!(results.len(), 6); + + assert!(results[0].0); + assert_eq!(results[0].1, b"header"); + + assert!(!results[1].0); + assert_eq!(results[1].2, 0); + + assert!(results[2].0); + assert_eq!(results[2].1, b"middle"); + + assert!(!results[3].0); + assert_eq!(results[3].2, 1); + + assert!(results[4].0); + assert_eq!(results[4].1, b"footer"); + + assert!(!results[5].0); + assert_eq!(results[5].2, 2); + } + + #[test] + fn test_large_inline_chunk() { + // Test with a large chunk to verify i64 handles sizes correctly + let large_data: Vec = (0..100_000).map(|i| (i % 256) as u8).collect(); + + let mut buffer = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut buffer); + writer.write_inline(&large_data).unwrap(); + writer.finish().unwrap(); + } + + let mut reader = SplitfdstreamReader::new(buffer.as_slice()); + let chunk = reader.next_chunk().unwrap().unwrap(); + + match chunk { + Chunk::Inline(data) => { + assert_eq!(data.len(), 100_000); + assert_eq!(data, large_data.as_slice()); + } + Chunk::External(_) => panic!("Expected inline chunk"), + } + + assert!(reader.next_chunk().unwrap().is_none()); + } + + #[test] + fn test_empty_inline_chunk_is_skipped() { + // Empty inline writes should be no-ops + let mut buffer = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut buffer); + writer.write_inline(b"").unwrap(); + writer.write_inline(b"actual").unwrap(); + writer.write_inline(b"").unwrap(); + writer.finish().unwrap(); + } + + let mut reader = SplitfdstreamReader::new(buffer.as_slice()); + let chunk = reader.next_chunk().unwrap().unwrap(); + assert_eq!(chunk, Chunk::Inline(b"actual")); + assert!(reader.next_chunk().unwrap().is_none()); + } + + #[test] + fn test_boundary_sizes() { + // Test various boundary sizes + let sizes = [ + 1, 7, 8, 9, 255, 256, 257, 1023, 1024, 1025, 4095, 4096, 4097, + ]; + + for &size in &sizes { + let data: Vec = (0..size).map(|i| (i % 256) as u8).collect(); + + let mut buffer = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut buffer); + writer.write_inline(&data).unwrap(); + writer.finish().unwrap(); + } + + // Verify buffer structure: 8-byte prefix + data + assert_eq!(buffer.len(), 8 + size); + + // Verify prefix is correct negative value + let prefix = i64::from_le_bytes(buffer[..8].try_into().unwrap()); + assert_eq!(prefix, -(size as i64)); + + // Read back and verify + let mut reader = SplitfdstreamReader::new(buffer.as_slice()); + let chunk = reader.next_chunk().unwrap().unwrap(); + match chunk { + Chunk::Inline(read_data) => { + assert_eq!(read_data.len(), size); + assert_eq!(read_data, data.as_slice()); + } + Chunk::External(_) => panic!("Expected inline"), + } + } + } + + #[test] + fn test_external_fd_index_zero() { + // fd_index 0 means fd[1], test this boundary + let mut buffer = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut buffer); + writer.write_external(0).unwrap(); + writer.finish().unwrap(); + } + + // Should be exactly 8 bytes (the prefix) + assert_eq!(buffer.len(), 8); + + // Prefix should be 0 + let prefix = i64::from_le_bytes(buffer[..8].try_into().unwrap()); + assert_eq!(prefix, 0); + + let mut reader = SplitfdstreamReader::new(buffer.as_slice()); + let chunk = reader.next_chunk().unwrap().unwrap(); + assert_eq!(chunk, Chunk::External(0)); + } + + #[test] + fn test_large_fd_index() { + // Test with maximum u32 fd index + let mut buffer = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut buffer); + writer.write_external(u32::MAX).unwrap(); + writer.finish().unwrap(); + } + + let mut reader = SplitfdstreamReader::new(buffer.as_slice()); + let chunk = reader.next_chunk().unwrap().unwrap(); + assert_eq!(chunk, Chunk::External(u32::MAX)); + } + + #[test] + fn test_single_byte_inline() { + let results = roundtrip_chunks(&[b"x"], &[], false); + assert_eq!(results.len(), 1); + assert!(results[0].0); + assert_eq!(results[0].1, b"x"); + } + + #[test] + fn test_writer_finish_returns_writer() { + let mut buffer = Vec::new(); + let writer = SplitfdstreamWriter::new(&mut buffer); + let returned = writer.finish().unwrap(); + + // Verify we got the writer back (can write to it) + returned.len(); // Just verify it's accessible + } + + #[test] + fn test_chunk_equality() { + assert_eq!(Chunk::Inline(b"test"), Chunk::Inline(b"test")); + assert_ne!(Chunk::Inline(b"test"), Chunk::Inline(b"other")); + assert_eq!(Chunk::External(5), Chunk::External(5)); + assert_ne!(Chunk::External(5), Chunk::External(6)); + assert_ne!(Chunk::Inline(b"test"), Chunk::External(0)); + } + + #[test] + fn test_many_small_chunks() { + // Stress test with many small chunks + let chunks: Vec> = (0..1000).map(|i| vec![i as u8; (i % 10) + 1]).collect(); + let chunk_refs: Vec<&[u8]> = chunks.iter().map(|c| c.as_slice()).collect(); + + let mut buffer = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut buffer); + for chunk in &chunk_refs { + writer.write_inline(chunk).unwrap(); + } + writer.finish().unwrap(); + } + + let mut reader = SplitfdstreamReader::new(buffer.as_slice()); + let mut count = 0; + while let Some(chunk) = reader.next_chunk().unwrap() { + match chunk { + Chunk::Inline(data) => { + assert_eq!(data, chunk_refs[count]); + count += 1; + } + Chunk::External(_) => panic!("Unexpected external"), + } + } + assert_eq!(count, 1000); + } + + #[test] + fn test_alternating_inline_external() { + let mut buffer = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut buffer); + for i in 0..50 { + writer.write_inline(&[i as u8]).unwrap(); + writer.write_external(i as u32).unwrap(); + } + writer.finish().unwrap(); + } + + let mut reader = SplitfdstreamReader::new(buffer.as_slice()); + let mut inline_count = 0; + let mut external_count = 0; + + while let Some(chunk) = reader.next_chunk().unwrap() { + match chunk { + Chunk::Inline(data) => { + assert_eq!(data.len(), 1); + assert_eq!(data[0], inline_count as u8); + inline_count += 1; + } + Chunk::External(idx) => { + assert_eq!(idx, external_count as u32); + external_count += 1; + } + } + } + + assert_eq!(inline_count, 50); + assert_eq!(external_count, 50); + } + + #[test] + fn test_truncated_prefix_returns_none() { + // Partial prefix (less than 8 bytes) at end of stream + let buffer = vec![0x01, 0x02, 0x03]; // Only 3 bytes + + let mut reader = SplitfdstreamReader::new(buffer.as_slice()); + // Should return None (EOF) since we can't read a complete prefix + assert!(reader.next_chunk().unwrap().is_none()); + } + + #[test] + fn test_truncated_data_is_error() { + // Valid prefix saying 100 bytes, but only 10 bytes of data + let mut buffer = Vec::new(); + let prefix: i64 = -100; // Inline, 100 bytes + buffer.extend_from_slice(&prefix.to_le_bytes()); + buffer.extend_from_slice(&[0u8; 10]); // Only 10 bytes + + let mut reader = SplitfdstreamReader::new(buffer.as_slice()); + let result = reader.next_chunk(); + assert!(result.is_err()); + } + + #[test] + fn test_inline_chunk_size_limit() { + // Attempt to read a chunk that exceeds MAX_INLINE_CHUNK_SIZE + let mut buffer = Vec::new(); + // Request 512 MB (exceeds 256 MB limit) + let prefix: i64 = -(512 * 1024 * 1024); + buffer.extend_from_slice(&prefix.to_le_bytes()); + + let mut reader = SplitfdstreamReader::new(buffer.as_slice()); + let result = reader.next_chunk(); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert_eq!(err.kind(), io::ErrorKind::InvalidData); + assert!(err.to_string().contains("exceeds maximum")); + } + + mod reconstruct_tests { + use super::*; + use std::io::Cursor; + use tempfile::NamedTempFile; + + #[test] + fn test_reconstruct_inline_only() { + // Create a splitfdstream with only inline data + let mut stream_buf = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut stream_buf); + writer.write_inline(b"Hello, ").unwrap(); + writer.write_inline(b"world!").unwrap(); + writer.finish().unwrap(); + } + + let mut output = Vec::new(); + let files: &[std::fs::File] = &[]; + let bytes = reconstruct(stream_buf.as_slice(), files, &mut output).unwrap(); + + assert_eq!(output, b"Hello, world!"); + assert_eq!(bytes, 13); + } + + #[test] + fn test_reconstruct_empty_stream() { + let stream_buf: Vec = Vec::new(); + let mut output = Vec::new(); + let files: &[std::fs::File] = &[]; + let bytes = reconstruct(stream_buf.as_slice(), files, &mut output).unwrap(); + + assert!(output.is_empty()); + assert_eq!(bytes, 0); + } + + #[test] + fn test_reconstruct_with_external_fds() { + // Create temp files with known content + let mut file0 = NamedTempFile::new().unwrap(); + let mut file1 = NamedTempFile::new().unwrap(); + + use std::io::Write; + file0.write_all(b"EXTERNAL0").unwrap(); + file1.write_all(b"EXTERNAL1").unwrap(); + + // Create splitfdstream that references these files + let mut stream_buf = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut stream_buf); + writer.write_inline(b"[start]").unwrap(); + writer.write_external(0).unwrap(); // Reference first fd + writer.write_inline(b"[mid]").unwrap(); + writer.write_external(1).unwrap(); // Reference second fd + writer.write_inline(b"[end]").unwrap(); + writer.finish().unwrap(); + } + + // Open files for reading + let f0 = std::fs::File::open(file0.path()).unwrap(); + let f1 = std::fs::File::open(file1.path()).unwrap(); + let files = [f0, f1]; + + let mut output = Vec::new(); + let bytes = reconstruct(stream_buf.as_slice(), &files, &mut output).unwrap(); + + assert_eq!(output, b"[start]EXTERNAL0[mid]EXTERNAL1[end]"); + assert_eq!(bytes, output.len() as u64); + } + + #[test] + fn test_reconstruct_external_fd_out_of_bounds() { + // Create splitfdstream referencing fd index 5, but only provide 1 file + let mut stream_buf = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut stream_buf); + writer.write_external(5).unwrap(); // Out of bounds + writer.finish().unwrap(); + } + + let file = NamedTempFile::new().unwrap(); + let f = std::fs::File::open(file.path()).unwrap(); + let files = [f]; + + let mut output = Vec::new(); + let result = reconstruct(stream_buf.as_slice(), &files, &mut output); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert_eq!(err.kind(), std::io::ErrorKind::InvalidData); + assert!(err.to_string().contains("fd index 5")); + } + + #[test] + fn test_reconstruct_large_external_file() { + // Create a larger external file to test efficient copying + let mut file = NamedTempFile::new().unwrap(); + let large_data: Vec = (0..100_000).map(|i| (i % 256) as u8).collect(); + + use std::io::Write; + file.write_all(&large_data).unwrap(); + + let mut stream_buf = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut stream_buf); + writer.write_inline(b"header").unwrap(); + writer.write_external(0).unwrap(); + writer.write_inline(b"footer").unwrap(); + writer.finish().unwrap(); + } + + let f = std::fs::File::open(file.path()).unwrap(); + let files = [f]; + + let mut output = Vec::new(); + let bytes = reconstruct(stream_buf.as_slice(), &files, &mut output).unwrap(); + + // Verify header + large data + footer + assert_eq!(&output[..6], b"header"); + assert_eq!(&output[6..100_006], large_data.as_slice()); + assert_eq!(&output[100_006..], b"footer"); + assert_eq!(bytes, 6 + 100_000 + 6); + } + + #[test] + fn test_reconstruct_same_fd_multiple_times() { + // Test that the same fd can be referenced multiple times + let mut file = NamedTempFile::new().unwrap(); + + use std::io::Write; + file.write_all(b"REPEATED").unwrap(); + + let mut stream_buf = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut stream_buf); + writer.write_external(0).unwrap(); + writer.write_inline(b"-").unwrap(); + writer.write_external(0).unwrap(); + writer.write_inline(b"-").unwrap(); + writer.write_external(0).unwrap(); + writer.finish().unwrap(); + } + + let f = std::fs::File::open(file.path()).unwrap(); + let files = [f]; + + let mut output = Vec::new(); + let bytes = reconstruct(stream_buf.as_slice(), &files, &mut output).unwrap(); + + // Each reference uses pread from offset 0, so each reads from start + assert_eq!(output, b"REPEATED-REPEATED-REPEATED"); + assert_eq!(bytes, 26); + } + + #[test] + fn test_into_inner() { + let data = vec![1, 2, 3, 4]; + let cursor = Cursor::new(data.clone()); + let reader = SplitfdstreamReader::new(cursor); + let inner = reader.into_inner(); + assert_eq!(inner.into_inner(), data); + } + } + + mod proptest_tests { + use super::*; + use proptest::prelude::*; + use std::io::Write; + use tempfile::NamedTempFile; + + /// Represents a chunk in the stream for testing purposes. + #[derive(Debug, Clone)] + enum TestChunk { + Inline(Vec), + External { fd_index: usize, content: Vec }, + } + + /// Strategy for generating inline chunk data. + /// Bounded to reasonable sizes to keep tests fast. + fn inline_data_strategy() -> impl Strategy> { + prop::collection::vec(any::(), 0..4096) + } + + /// Strategy for generating external chunk content. + fn external_content_strategy() -> impl Strategy> { + prop::collection::vec(any::(), 0..8192) + } + + /// Strategy for generating a single test chunk. + /// The fd_index is relative and will be resolved during test execution. + fn chunk_strategy() -> impl Strategy { + prop_oneof![ + inline_data_strategy().prop_map(TestChunk::Inline), + (0..16usize, external_content_strategy()).prop_map(|(idx, content)| { + TestChunk::External { + fd_index: idx, + content, + } + }) + ] + } + + /// Strategy for generating a sequence of chunks. + fn chunks_strategy() -> impl Strategy> { + prop::collection::vec(chunk_strategy(), 0..64) + } + + /// Execute a roundtrip test: write chunks, read them back, verify reconstruction. + fn roundtrip_test(chunks: Vec) -> Result<(), TestCaseError> { + // Collect unique external contents and assign fd indices + let mut external_contents: Vec> = Vec::new(); + + // Normalize fd_indices to actual file indices + let normalized_chunks: Vec = chunks + .into_iter() + .filter_map(|chunk| match chunk { + TestChunk::Inline(data) => { + // Skip empty inline chunks (writer skips them) + if data.is_empty() { + None + } else { + Some(TestChunk::Inline(data)) + } + } + TestChunk::External { fd_index, content } => { + // Map fd_index to actual position in external_contents + let actual_index = fd_index % 8.max(1); // Limit to 8 files max + + // Ensure we have enough files + while external_contents.len() <= actual_index { + external_contents.push(Vec::new()); + } + + // Store content (may overwrite previous) + external_contents[actual_index] = content.clone(); + + Some(TestChunk::External { + fd_index: actual_index, + content, + }) + } + }) + .collect(); + + // Create temp files for external data + let mut temp_files: Vec = Vec::new(); + for content in &external_contents { + let mut f = NamedTempFile::new().map_err(|e| TestCaseError::fail(e.to_string()))?; + f.write_all(content) + .map_err(|e| TestCaseError::fail(e.to_string()))?; + temp_files.push(f); + } + + // Write the splitfdstream + let mut stream_buf = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut stream_buf); + for chunk in &normalized_chunks { + match chunk { + TestChunk::Inline(data) => { + writer + .write_inline(data) + .map_err(|e| TestCaseError::fail(e.to_string()))?; + } + TestChunk::External { fd_index, .. } => { + writer + .write_external(*fd_index as u32) + .map_err(|e| TestCaseError::fail(e.to_string()))?; + } + } + } + writer + .finish() + .map_err(|e| TestCaseError::fail(e.to_string()))?; + } + + // Read back and verify chunk sequence + let mut reader = SplitfdstreamReader::new(stream_buf.as_slice()); + let mut read_chunks = Vec::new(); + while let Some(chunk) = reader + .next_chunk() + .map_err(|e| TestCaseError::fail(e.to_string()))? + { + match chunk { + Chunk::Inline(data) => read_chunks.push(TestChunk::Inline(data.to_vec())), + Chunk::External(idx) => read_chunks.push(TestChunk::External { + fd_index: idx as usize, + content: external_contents + .get(idx as usize) + .cloned() + .unwrap_or_default(), + }), + } + } + + // Verify we got the same number of chunks + prop_assert_eq!( + normalized_chunks.len(), + read_chunks.len(), + "Chunk count mismatch" + ); + + // Verify each chunk matches + for (i, (expected, actual)) in + normalized_chunks.iter().zip(read_chunks.iter()).enumerate() + { + match (expected, actual) { + (TestChunk::Inline(expected_data), TestChunk::Inline(actual_data)) => { + prop_assert_eq!( + expected_data, + actual_data, + "Inline chunk {} data mismatch", + i + ); + } + ( + TestChunk::External { fd_index: ei, .. }, + TestChunk::External { fd_index: ai, .. }, + ) => { + prop_assert_eq!(ei, ai, "External chunk {} fd_index mismatch", i); + } + _ => { + return Err(TestCaseError::fail(format!( + "Chunk {} type mismatch: expected {:?}, got {:?}", + i, expected, actual + ))); + } + } + } + + // Verify reconstruction produces correct output + let files: Vec = temp_files + .iter() + .map(|f| std::fs::File::open(f.path())) + .collect::, _>>() + .map_err(|e| TestCaseError::fail(e.to_string()))?; + + let mut output = Vec::new(); + reconstruct(stream_buf.as_slice(), &files, &mut output) + .map_err(|e| TestCaseError::fail(e.to_string()))?; + + // Build expected output + let mut expected_output = Vec::new(); + for chunk in &normalized_chunks { + match chunk { + TestChunk::Inline(data) => expected_output.extend_from_slice(data), + TestChunk::External { fd_index, .. } => { + expected_output.extend_from_slice(&external_contents[*fd_index]); + } + } + } + + prop_assert_eq!(output, expected_output, "Reconstructed output mismatch"); + + Ok(()) + } + + proptest! { + #![proptest_config(ProptestConfig::with_cases(256))] + + #[test] + fn test_arbitrary_chunk_sequences(chunks in chunks_strategy()) { + roundtrip_test(chunks)?; + } + + #[test] + fn test_inline_only_sequences( + chunks in prop::collection::vec(inline_data_strategy(), 0..32) + ) { + let test_chunks: Vec = chunks.into_iter() + .map(TestChunk::Inline) + .collect(); + roundtrip_test(test_chunks)?; + } + + #[test] + fn test_external_only_sequences( + chunks in prop::collection::vec( + (0..8usize, external_content_strategy()), + 0..32 + ) + ) { + let test_chunks: Vec = chunks.into_iter() + .map(|(idx, content)| TestChunk::External { fd_index: idx, content }) + .collect(); + roundtrip_test(test_chunks)?; + } + + #[test] + fn test_alternating_pattern( + inline_data in prop::collection::vec(inline_data_strategy(), 1..16), + external_data in prop::collection::vec(external_content_strategy(), 1..16) + ) { + let mut test_chunks = Vec::new(); + let max_len = inline_data.len().max(external_data.len()); + for i in 0..max_len { + if i < inline_data.len() { + test_chunks.push(TestChunk::Inline(inline_data[i].clone())); + } + if i < external_data.len() { + test_chunks.push(TestChunk::External { + fd_index: i % 8, + content: external_data[i].clone(), + }); + } + } + roundtrip_test(test_chunks)?; + } + + #[test] + fn test_same_fd_multiple_references( + content in external_content_strategy(), + ref_count in 1..10usize + ) { + let mut test_chunks = Vec::new(); + for _ in 0..ref_count { + test_chunks.push(TestChunk::External { + fd_index: 0, + content: content.clone(), + }); + } + roundtrip_test(test_chunks)?; + } + + #[test] + fn test_varying_chunk_sizes( + small in prop::collection::vec(any::(), 0..16), + medium in prop::collection::vec(any::(), 256..1024), + large in prop::collection::vec(any::(), 4096..8192) + ) { + let test_chunks = vec![ + TestChunk::Inline(small), + TestChunk::Inline(medium), + TestChunk::Inline(large), + ]; + roundtrip_test(test_chunks)?; + } + } + + /// Test SplitfdstreamTarReader with property-based approach + mod tar_reader_tests { + use super::*; + + proptest! { + #![proptest_config(ProptestConfig::with_cases(128))] + + #[test] + fn test_tar_reader_matches_reconstruct(chunks in chunks_strategy()) { + tar_reader_test(chunks)?; + } + } + + fn tar_reader_test(chunks: Vec) -> Result<(), TestCaseError> { + // Collect unique external contents and assign fd indices + let mut external_contents: Vec> = Vec::new(); + + // Normalize fd_indices to actual file indices + let normalized_chunks: Vec = chunks + .into_iter() + .filter_map(|chunk| match chunk { + TestChunk::Inline(data) => { + if data.is_empty() { + None + } else { + Some(TestChunk::Inline(data)) + } + } + TestChunk::External { fd_index, content } => { + let actual_index = fd_index % 8.max(1); + while external_contents.len() <= actual_index { + external_contents.push(Vec::new()); + } + external_contents[actual_index] = content.clone(); + Some(TestChunk::External { + fd_index: actual_index, + content, + }) + } + }) + .collect(); + + // Create temp files for external data + let mut temp_files: Vec = Vec::new(); + for content in &external_contents { + let mut f = + NamedTempFile::new().map_err(|e| TestCaseError::fail(e.to_string()))?; + f.write_all(content) + .map_err(|e| TestCaseError::fail(e.to_string()))?; + temp_files.push(f); + } + + // Write the splitfdstream + let mut stream_buf = Vec::new(); + { + let mut writer = SplitfdstreamWriter::new(&mut stream_buf); + for chunk in &normalized_chunks { + match chunk { + TestChunk::Inline(data) => { + writer + .write_inline(data) + .map_err(|e| TestCaseError::fail(e.to_string()))?; + } + TestChunk::External { fd_index, .. } => { + writer + .write_external(*fd_index as u32) + .map_err(|e| TestCaseError::fail(e.to_string()))?; + } + } + } + writer + .finish() + .map_err(|e| TestCaseError::fail(e.to_string()))?; + } + + // Open files for reading + let files: Vec = temp_files + .iter() + .map(|f| std::fs::File::open(f.path())) + .collect::, _>>() + .map_err(|e| TestCaseError::fail(e.to_string()))?; + + // Read via SplitfdstreamTarReader + let mut tar_reader = SplitfdstreamTarReader::new(stream_buf.as_slice(), &files); + let mut tar_output = Vec::new(); + std::io::copy(&mut tar_reader, &mut tar_output) + .map_err(|e| TestCaseError::fail(e.to_string()))?; + + // Read via reconstruct + let mut reconstruct_output = Vec::new(); + reconstruct(stream_buf.as_slice(), &files, &mut reconstruct_output) + .map_err(|e| TestCaseError::fail(e.to_string()))?; + + prop_assert_eq!( + tar_output, + reconstruct_output, + "TarReader and reconstruct outputs differ" + ); + + Ok(()) + } + } + } +} From baccd08a35f8bdb27d9f408d248287a10744bd4e Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 29 Jan 2026 10:24:51 -0500 Subject: [PATCH 06/13] Add cstorage crate for containers-storage access Add a new crate providing read-only access to containers-storage (the storage backend used by podman, buildah, and other container tools). Key components: - Storage: Main entry point, discovers and opens storage locations - Layer: Overlay layer with content access via diff_dir - Image: OCI image with manifest/config parsing - TarSplitFdStream: Zero-copy tar-split streaming with fd passing - LockFile: Wire-compatible locking with Go containers/storage The crate uses cap-std for capability-based file operations and provides a safe, read-only interface to access container images and layers stored by podman/buildah. This is adapted from cgwalters/cstor-rs with modifications to fit the composefs-rs codebase style and conventions. Assisted-by: OpenCode (Opus 4.5) Signed-off-by: Colin Walters --- crates/cstorage/Cargo.toml | 35 ++ crates/cstorage/src/config.rs | 119 ++++++ crates/cstorage/src/error.rs | 73 ++++ crates/cstorage/src/image.rs | 239 +++++++++++ crates/cstorage/src/layer.rs | 290 +++++++++++++ crates/cstorage/src/lib.rs | 67 +++ crates/cstorage/src/lockfile.rs | 279 ++++++++++++ crates/cstorage/src/storage.rs | 664 ++++++++++++++++++++++++++++ crates/cstorage/src/tar_split.rs | 712 +++++++++++++++++++++++++++++++ 9 files changed, 2478 insertions(+) create mode 100644 crates/cstorage/Cargo.toml create mode 100644 crates/cstorage/src/config.rs create mode 100644 crates/cstorage/src/error.rs create mode 100644 crates/cstorage/src/image.rs create mode 100644 crates/cstorage/src/layer.rs create mode 100644 crates/cstorage/src/lib.rs create mode 100644 crates/cstorage/src/lockfile.rs create mode 100644 crates/cstorage/src/storage.rs create mode 100644 crates/cstorage/src/tar_split.rs diff --git a/crates/cstorage/Cargo.toml b/crates/cstorage/Cargo.toml new file mode 100644 index 00000000..fbfdf53d --- /dev/null +++ b/crates/cstorage/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "cstorage" +description = "Read-only access to containers-storage (overlay driver)" +keywords = ["containers", "storage", "overlay", "podman", "buildah"] + +edition.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[dependencies] +anyhow = { version = "1.0", default-features = false, features = ["std"] } +base64 = { version = "0.22", default-features = false, features = ["std"] } +cap-std = { version = "4.0", default-features = false } +cap-std-ext = { version = "4.0", default-features = false } +crc = { version = "3.0", default-features = false } +flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] } +oci-spec = { version = "0.8", default-features = false, features = ["image"] } +rusqlite = { version = "0.34", default-features = false } +rustix = { version = "1.0", default-features = false, features = ["fs", "std"] } +serde = { version = "1.0", default-features = false, features = ["derive"] } +serde_json = { version = "1.0", default-features = false, features = ["std"] } +sha2 = { version = "0.10", default-features = false, features = ["std"] } +tar = { version = "0.4", default-features = false } +thiserror = { version = "2.0", default-features = false } +toml = { version = "0.8", default-features = false, features = ["parse"] } +zstd = { version = "0.13", default-features = false } + +[dev-dependencies] +tempfile = { version = "3.8", default-features = false } + +[lints] +workspace = true diff --git a/crates/cstorage/src/config.rs b/crates/cstorage/src/config.rs new file mode 100644 index 00000000..8d8d14a2 --- /dev/null +++ b/crates/cstorage/src/config.rs @@ -0,0 +1,119 @@ +//! Configuration parsing for container storage. +//! +//! This module provides structures for parsing storage.conf files used by +//! containers-storage. Configuration files define storage locations, drivers, +//! and additional read-only image stores. +//! +//! # Overview +//! +//! Container storage configuration is typically found in: +//! - System-wide: `/etc/containers/storage.conf` +//! - User-specific: `~/.config/containers/storage.conf` +//! +//! The configuration uses TOML format and specifies the storage driver +//! (overlay, btrfs, etc.), root paths, and additional layer/image stores. +//! +//! # Configuration Structure +//! +//! A typical storage.conf file looks like: +//! ```toml +//! [storage] +//! driver = "overlay" +//! root = "/var/lib/containers/storage" +//! run_root = "/run/containers/storage" +//! +//! # Additional read-only image stores +//! image_stores = [ +//! "/usr/share/containers/storage" +//! ] +//! +//! # Additional layer stores configuration +//! [[storage.layer_stores]] +//! path = "/mnt/layers" +//! with_reference = true +//! ``` + +use serde::Deserialize; +use std::path::PathBuf; + +/// Storage configuration, typically parsed from storage.conf files. +/// +/// Configuration files are searched in: +/// - `/etc/containers/storage.conf` +/// - `$HOME/.config/containers/storage.conf` +#[derive(Debug, Clone, Deserialize)] +pub struct StorageConfig { + /// Storage driver name (should be "overlay" for this library). + #[serde(default)] + pub driver: String, + + /// Primary storage root path. + #[serde(default)] + pub root: PathBuf, + + /// Runtime root for transient data. + #[serde(default)] + pub run_root: PathBuf, + + /// Additional read-only image stores. + #[serde(default)] + pub image_stores: Vec, + + /// Additional layer stores configuration. + #[serde(default)] + pub layer_stores: Vec, +} + +/// Configuration for an additional layer store. +#[derive(Debug, Clone, Deserialize)] +pub struct AdditionalLayerStore { + /// Path to the additional layer store. + pub path: PathBuf, + + /// Whether to use base64-encoded references in paths. + #[serde(default)] + pub with_reference: bool, +} + +impl StorageConfig { + /// Parse storage configuration from TOML content. + /// + /// # Errors + /// + /// Returns an error if the TOML content is invalid. + pub fn from_toml(content: &str) -> Result { + toml::from_str(content) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_basic_config() { + let config_str = r#" +driver = "overlay" +root = "/var/lib/containers/storage" +"#; + let config = StorageConfig::from_toml(config_str).unwrap(); + assert_eq!(config.driver, "overlay"); + assert_eq!(config.root, PathBuf::from("/var/lib/containers/storage")); + } + + #[test] + fn test_parse_with_layer_stores() { + let config_str = r#" +driver = "overlay" +root = "/var/lib/containers/storage" + +[[layer_stores]] +path = "/mnt/layers" +with_reference = true +"#; + let config = StorageConfig::from_toml(config_str).unwrap(); + assert_eq!(config.layer_stores.len(), 1); + assert_eq!(config.layer_stores[0].path, PathBuf::from("/mnt/layers")); + assert!(config.layer_stores[0].with_reference); + } +} diff --git a/crates/cstorage/src/error.rs b/crates/cstorage/src/error.rs new file mode 100644 index 00000000..9676b3f3 --- /dev/null +++ b/crates/cstorage/src/error.rs @@ -0,0 +1,73 @@ +//! Error types for the cstorage library. +//! +//! This module defines the error types used throughout the library. All operations +//! that can fail return a [`Result`] which is an alias for `Result`. +//! +//! # Error Categories +//! +//! Errors are organized into several categories: +//! +//! - **Storage errors**: [`RootNotFound`], [`InvalidStorage`] +//! - **Entity errors**: [`LayerNotFound`], [`ImageNotFound`] +//! - **Link resolution**: [`LinkReadError`] +//! - **Tar-split processing**: [`TarSplitError`] +//! - **System errors**: [`Io`], [`Database`], [`JsonParse`] +//! +//! [`RootNotFound`]: StorageError::RootNotFound +//! [`InvalidStorage`]: StorageError::InvalidStorage +//! [`LayerNotFound`]: StorageError::LayerNotFound +//! [`ImageNotFound`]: StorageError::ImageNotFound +//! [`LinkReadError`]: StorageError::LinkReadError +//! [`TarSplitError`]: StorageError::TarSplitError +//! [`Io`]: StorageError::Io +//! [`Database`]: StorageError::Database +//! [`JsonParse`]: StorageError::JsonParse + +use std::path::PathBuf; + +/// Result type alias for operations that may return a StorageError. +pub type Result = std::result::Result; + +/// Error types for storage operations. +#[derive(Debug, thiserror::Error)] +pub enum StorageError { + /// Storage root directory was not found at the specified path. + #[error("storage root not found at {0}")] + RootNotFound(PathBuf), + + /// Storage validation failed with the provided reason. + #[error("invalid storage: {0}")] + InvalidStorage(String), + + /// The requested layer was not found. + #[error("layer not found: {0}")] + LayerNotFound(String), + + /// The requested image was not found. + #[error("image not found: {0}")] + ImageNotFound(String), + + /// Failed to read a link file. + #[error("failed to read link file: {0}")] + LinkReadError(String), + + /// Error related to tar-split processing. + #[error("tar-split error: {0}")] + TarSplitError(String), + + /// I/O error occurred during file operations. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + /// Database error occurred during SQLite operations. + #[error("database error: {0}")] + Database(#[from] rusqlite::Error), + + /// JSON parsing error occurred. + #[error("JSON parse error: {0}")] + JsonParse(#[from] serde_json::Error), + + /// Lock file operation failed. + #[error("lock error: {0}")] + Lock(#[from] crate::lockfile::LockError), +} diff --git a/crates/cstorage/src/image.rs b/crates/cstorage/src/image.rs new file mode 100644 index 00000000..c086d0db --- /dev/null +++ b/crates/cstorage/src/image.rs @@ -0,0 +1,239 @@ +//! Image reading and manifest parsing. +//! +//! This module provides access to OCI image manifests and metadata stored in +//! the `overlay-images/` directory. All operations use fd-relative access via +//! cap-std Dir handles. +//! +//! # Overview +//! +//! The [`Image`] struct represents a container image stored in the overlay driver. +//! It provides access to: +//! - OCI image manifests ([`oci_spec::image::ImageManifest`]) +//! - OCI image configurations ([`oci_spec::image::ImageConfiguration`]) +//! - Layer information (diff_ids that map to storage layer IDs) +//! - Additional metadata stored in base64-encoded files +//! +//! # Image Directory Structure +//! +//! Each image is stored in `overlay-images//`: +//! ```text +//! overlay-images// +//! +-- manifest # OCI image manifest (JSON) +//! +-- = # Additional metadata files +//! ``` + +use base64::{engine::general_purpose::STANDARD, Engine}; +use cap_std::fs::Dir; +use oci_spec::image::{ImageConfiguration, ImageManifest}; +use std::io::Read; + +use crate::error::{Result, StorageError}; +use crate::storage::Storage; + +/// Filename for OCI image manifest in the image directory. +const MANIFEST_FILENAME: &str = "manifest"; + +/// Represents an OCI image with its metadata and manifest. +#[derive(Debug)] +pub struct Image { + /// Image ID (typically a 64-character hex digest). + id: String, + + /// Directory handle for overlay-images/\/. + image_dir: Dir, +} + +impl Image { + /// Open an image by ID using fd-relative operations. + /// + /// # Errors + /// + /// Returns an error if the image directory doesn't exist or cannot be opened. + pub fn open(storage: &Storage, id: &str) -> Result { + // Open overlay-images directory from storage root + let images_dir = storage.root_dir().open_dir("overlay-images")?; + + // Open specific image directory + let image_dir = images_dir + .open_dir(id) + .map_err(|_| StorageError::ImageNotFound(id.to_string()))?; + + Ok(Self { + id: id.to_string(), + image_dir, + }) + } + + /// Get the image ID. + pub fn id(&self) -> &str { + &self.id + } + + /// Read and parse the image manifest. + /// + /// The manifest is stored as a JSON file named "manifest" in the image directory. + /// + /// # Errors + /// + /// Returns an error if the manifest file cannot be read or parsed. + pub fn manifest(&self) -> Result { + let file = self.image_dir.open(MANIFEST_FILENAME)?; + serde_json::from_reader(file) + .map_err(|e| StorageError::InvalidStorage(format!("Invalid manifest JSON: {}", e))) + } + + /// Read and parse the image configuration. + /// + /// The image config is stored with a base64-encoded key based on the image digest. + /// + /// # Errors + /// + /// Returns an error if the config file cannot be read or parsed. + pub fn config(&self) -> Result { + // The config is stored with key: sha256: + // Base64 encode: "sha256:" + let key = format!("sha256:{}", self.id); + let encoded_key = STANDARD.encode(key.as_bytes()); + + let config_data = self.read_metadata(&encoded_key)?; + serde_json::from_slice(&config_data) + .map_err(|e| StorageError::InvalidStorage(format!("Invalid config JSON: {}", e))) + } + + /// Get the OCI diff_ids for this image in order (base to top). + /// + /// This returns the diff_ids from the image config, which are the uncompressed + /// tar digests. Note that these are **not** the same as the storage layer IDs! + /// To get the actual storage layer IDs, use [`storage_layer_ids()`](Self::storage_layer_ids). + /// + /// # Errors + /// + /// Returns an error if the config cannot be read or parsed. + pub fn layers(&self) -> Result> { + let config = self.config()?; + + // Extract diff_ids from config - these are NOT the storage layer IDs + let diff_ids: Vec = config + .rootfs() + .diff_ids() + .iter() + .map(|digest| { + // Remove the "sha256:" prefix if present + let diff_id = digest.to_string(); + diff_id + .strip_prefix("sha256:") + .unwrap_or(&diff_id) + .to_string() + }) + .collect(); + + Ok(diff_ids) + } + + /// Get the storage layer IDs for this image in order (base to top). + /// + /// Unlike [`layers()`](Self::layers) which returns OCI diff_ids, this method + /// returns the actual storage layer directory names by resolving diff_ids + /// through the `layers.json` mapping file. + /// + /// # Errors + /// + /// Returns an error if the config cannot be read, parsed, or if any layer + /// cannot be resolved. + pub fn storage_layer_ids(&self, storage: &Storage) -> Result> { + let diff_ids = self.layers()?; + diff_ids + .iter() + .map(|diff_id| storage.resolve_diff_id(diff_id)) + .collect() + } + + /// Read additional metadata files. + /// + /// Metadata files are stored with base64-encoded keys as filenames, + /// prefixed with '='. + /// + /// # Errors + /// + /// Returns an error if the metadata file doesn't exist or cannot be read. + pub fn read_metadata(&self, key: &str) -> Result> { + let filename = format!("={}", key); + let mut file = self.image_dir.open(&filename)?; + let mut data = Vec::new(); + file.read_to_end(&mut data)?; + Ok(data) + } + + /// Get a reference to the image directory handle. + pub fn image_dir(&self) -> &Dir { + &self.image_dir + } + + /// Get the repository names/tags for this image. + /// + /// Reads from the `overlay-images/images.json` index file to find the + /// names associated with this image. + /// + /// # Errors + /// + /// Returns an error if the images.json file cannot be read or parsed. + pub fn names(&self, storage: &Storage) -> Result> { + let images_dir = storage.root_dir().open_dir("overlay-images")?; + let mut file = images_dir.open("images.json")?; + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + + let entries: Vec = serde_json::from_str(&contents) + .map_err(|e| StorageError::InvalidStorage(format!("Invalid images.json: {}", e)))?; + + for entry in entries { + if entry.id == self.id { + return Ok(entry.names.unwrap_or_default()); + } + } + + // Image not found in images.json - return empty names + Ok(Vec::new()) + } +} + +/// Entry in images.json for image name lookups. +#[derive(Debug, serde::Deserialize)] +struct ImageJsonEntry { + id: String, + names: Option>, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_manifest_parsing() { + let manifest_json = r#"{ + "schemaVersion": 2, + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "config": { + "mediaType": "application/vnd.oci.image.config.v1+json", + "digest": "sha256:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + "size": 1234 + }, + "layers": [ + { + "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", + "digest": "sha256:1111111111111111111111111111111111111111111111111111111111111111", + "size": 5678 + }, + { + "mediaType": "application/vnd.oci.image.layer.v1.tar+gzip", + "digest": "sha256:2222222222222222222222222222222222222222222222222222222222222222", + "size": 9012 + } + ] + }"#; + + let manifest: ImageManifest = serde_json::from_str(manifest_json).unwrap(); + assert_eq!(manifest.schema_version(), 2); + assert_eq!(manifest.layers().len(), 2); + } +} diff --git a/crates/cstorage/src/layer.rs b/crates/cstorage/src/layer.rs new file mode 100644 index 00000000..eeb44b3b --- /dev/null +++ b/crates/cstorage/src/layer.rs @@ -0,0 +1,290 @@ +//! Layer reading and metadata handling. +//! +//! This module provides access to individual overlay layers and their metadata. +//! Layers are the fundamental storage units in the overlay driver, representing +//! filesystem changes that are stacked to form complete container images. +//! +//! # Overview +//! +//! The [`Layer`] struct represents a single layer in the overlay filesystem. +//! Each layer contains: +//! - A `diff/` directory with the actual file contents +//! - A `link` file containing a short 26-character identifier +//! - A `lower` file listing parent layers (if not a base layer) +//! - Metadata for whiteouts and opaque directories +//! +//! # Layer Structure +//! +//! Each layer is stored in `overlay//`: +//! ```text +//! overlay// +//! +-- diff/ # Layer file contents +//! | +-- etc/ +//! | | +-- hosts +//! | +-- usr/ +//! | +-- bin/ +//! +-- link # Short link ID (26 chars) +//! +-- lower # Parent references: "l/:l/:..." +//! ``` +//! +//! # Whiteouts and Opaque Directories +//! +//! The overlay driver uses special markers to indicate file deletions: +//! - `.wh.` - Whiteout file (marks `` as deleted) +//! - `.wh..wh..opq` - Opaque directory marker (hides lower layer contents) + +use crate::error::{Result, StorageError}; +use crate::storage::Storage; +use cap_std::fs::Dir; + +/// Represents an overlay layer with its metadata and content. +#[derive(Debug)] +pub struct Layer { + /// Layer ID (typically a 64-character hex digest). + id: String, + + /// Directory handle for the layer directory (overlay/\/). + layer_dir: Dir, + + /// Directory handle for the diff/ subdirectory containing layer content. + diff_dir: Dir, + + /// Short link identifier from the link file (26 characters). + link_id: String, + + /// Parent layer link IDs from the lower file. + parent_links: Vec, +} + +impl Layer { + /// Open a layer by ID using fd-relative operations. + /// + /// # Errors + /// + /// Returns an error if the layer directory doesn't exist or cannot be opened. + pub fn open(storage: &Storage, id: &str) -> Result { + // Open overlay directory from storage root + let overlay_dir = storage.root_dir().open_dir("overlay")?; + + // Open layer directory relative to overlay + let layer_dir = overlay_dir + .open_dir(id) + .map_err(|_| StorageError::LayerNotFound(id.to_string()))?; + + // Open diff directory for content access + let diff_dir = layer_dir.open_dir("diff")?; + + // Read metadata files using fd-relative operations + let link_id = Self::read_link(&layer_dir)?; + let parent_links = Self::read_lower(&layer_dir)?; + + Ok(Self { + id: id.to_string(), + layer_dir, + diff_dir, + link_id, + parent_links, + }) + } + + /// Get the layer ID. + pub fn id(&self) -> &str { + &self.id + } + + /// Read the link file (26-char identifier) via Dir handle. + fn read_link(layer_dir: &Dir) -> Result { + let content = layer_dir.read_to_string("link")?; + Ok(content.trim().to_string()) + } + + /// Read the lower file (colon-separated parent links) via Dir handle. + fn read_lower(layer_dir: &Dir) -> Result> { + match layer_dir.read_to_string("lower") { + Ok(content) => { + // Format is "l/:l/:..." + let links: Vec = content + .trim() + .split(':') + .filter_map(|s| s.strip_prefix("l/")) + .map(|s| s.to_string()) + .collect(); + Ok(links) + } + Err(_) => Ok(Vec::new()), // Base layer has no lower file + } + } + + /// Get the short link ID for this layer. + pub fn link_id(&self) -> &str { + &self.link_id + } + + /// Get the parent link IDs for this layer. + pub fn parent_links(&self) -> &[String] { + &self.parent_links + } + + /// Get parent layer IDs (resolved from link IDs). + /// + /// This resolves the short link IDs from the `lower` file to full layer IDs + /// by reading the symlinks in the `overlay/l/` directory. + /// + /// # Errors + /// + /// Returns an error if any link cannot be resolved. + pub fn parents(&self, storage: &Storage) -> Result> { + self.parent_links + .iter() + .map(|link_id| storage.resolve_link(link_id)) + .collect() + } + + /// Get a reference to the layer directory handle. + pub fn layer_dir(&self) -> &Dir { + &self.layer_dir + } + + /// Get a reference to the diff directory handle. + pub fn diff_dir(&self) -> &Dir { + &self.diff_dir + } + + /// Get the complete chain of layers from this layer to the base. + /// + /// Returns layers in order: [self, parent, grandparent, ..., base] + /// + /// # Errors + /// + /// Returns an error if the layer chain exceeds the maximum depth of 500 layers. + pub fn layer_chain(self, storage: &Storage) -> Result> { + let mut chain = vec![self]; + let mut current_idx = 0; + + // Maximum depth to prevent infinite loops + const MAX_DEPTH: usize = 500; + + while current_idx < chain.len() && chain.len() < MAX_DEPTH { + let parent_ids = chain[current_idx].parents(storage)?; + + // Add all parents to the chain + for parent_id in parent_ids { + chain.push(Layer::open(storage, &parent_id)?); + } + + current_idx += 1; + } + + if chain.len() >= MAX_DEPTH { + return Err(StorageError::InvalidStorage( + "Layer chain exceeds maximum depth of 500".to_string(), + )); + } + + Ok(chain) + } + + /// Open a file in the layer's diff directory using fd-relative operations. + /// + /// # Errors + /// + /// Returns an error if the file doesn't exist or cannot be opened. + pub fn open_file(&self, path: impl AsRef) -> Result { + self.diff_dir.open(path).map_err(StorageError::Io) + } + + /// Open a file and return a standard library File. + /// + /// # Errors + /// + /// Returns an error if the file doesn't exist or cannot be opened. + pub fn open_file_std(&self, path: impl AsRef) -> Result { + let file = self.diff_dir.open(path).map_err(StorageError::Io)?; + Ok(file.into_std()) + } + + /// Get metadata for a file in the layer's diff directory. + /// + /// # Errors + /// + /// Returns an error if the file doesn't exist. + pub fn metadata(&self, path: impl AsRef) -> Result { + self.diff_dir.metadata(path).map_err(StorageError::Io) + } + + /// Read directory entries using Dir handle. + /// + /// # Errors + /// + /// Returns an error if the directory doesn't exist. + pub fn read_dir(&self, path: impl AsRef) -> Result { + self.diff_dir.read_dir(path).map_err(StorageError::Io) + } + + /// Check if a whiteout file exists for the given filename. + /// + /// Whiteout format: `.wh.` + /// + /// # Arguments + /// + /// * `parent_path` - The directory path containing the file (empty string or "." for root) + /// * `filename` - The name of the file to check for whiteout + /// + /// # Errors + /// + /// Returns an error if the directory cannot be accessed. + pub fn has_whiteout(&self, parent_path: &str, filename: &str) -> Result { + let whiteout_name = format!(".wh.{}", filename); + + // Handle root directory case + if parent_path.is_empty() || parent_path == "." { + Ok(self.diff_dir.try_exists(&whiteout_name)?) + } else { + match self.diff_dir.open_dir(parent_path) { + Ok(parent_dir) => Ok(parent_dir.try_exists(&whiteout_name)?), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(false), + Err(e) => Err(StorageError::Io(e)), + } + } + } + + /// Check if a directory is marked as opaque (hides lower layers). + /// + /// Opaque marker: `.wh..wh..opq` + /// + /// # Errors + /// + /// Returns an error if the directory cannot be accessed. + pub fn is_opaque_dir(&self, path: &str) -> Result { + const OPAQUE_MARKER: &str = ".wh..wh..opq"; + + if path.is_empty() || path == "." { + Ok(self.diff_dir.try_exists(OPAQUE_MARKER)?) + } else { + match self.diff_dir.open_dir(path) { + Ok(dir) => Ok(dir.try_exists(OPAQUE_MARKER)?), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(false), + Err(e) => Err(StorageError::Io(e)), + } + } + } +} + +#[cfg(test)] +mod tests { + #[test] + fn test_parse_lower_format() { + // Test that we correctly parse the lower file format + let content = "l/ABCDEFGHIJKLMNOPQRSTUVWXY:l/BCDEFGHIJKLMNOPQRSTUVWXYZ"; + let links: Vec = content + .trim() + .split(':') + .filter_map(|s| s.strip_prefix("l/")) + .map(|s| s.to_string()) + .collect(); + + assert_eq!(links.len(), 2); + assert_eq!(links[0], "ABCDEFGHIJKLMNOPQRSTUVWXY"); + assert_eq!(links[1], "BCDEFGHIJKLMNOPQRSTUVWXYZ"); + } +} diff --git a/crates/cstorage/src/lib.rs b/crates/cstorage/src/lib.rs new file mode 100644 index 00000000..a2cc4046 --- /dev/null +++ b/crates/cstorage/src/lib.rs @@ -0,0 +1,67 @@ +//! Read-only access to containers-storage overlay driver. +//! +//! This library provides efficient, capability-based access to container image +//! storage using the overlay driver. All file operations are performed using +//! file descriptor-relative operations via cap-std, providing security against +//! path traversal attacks and TOCTOU race conditions. +//! +//! # Overview +//! +//! The library is designed to access containers-storage (overlay driver) without +//! requiring tar serialization. Instead, it provides direct file descriptor access +//! to layer content, enabling zero-copy operations. +//! +//! # Key Features +//! +//! - **Capability-based security**: All file access via `cap_std::fs::Dir` handles +//! - **Zero-copy access**: File descriptors instead of data copies +//! - **Safe by design**: No path traversal vulnerabilities +//! - **Tar-split integration**: Bit-for-bit identical TAR reconstruction +//! - **OCI compatibility**: Uses oci-spec for standard image formats +//! +//! # Example +//! +//! ```no_run +//! use cstorage::Storage; +//! +//! // Discover storage from default locations +//! let storage = Storage::discover()?; +//! +//! // Or open storage at a specific path +//! let storage = Storage::open("/var/lib/containers/storage")?; +//! +//! // List images +//! for image in storage.list_images()? { +//! println!("Image: {}", image.id()); +//! } +//! # Ok::<(), cstorage::StorageError>(()) +//! ``` +//! +//! # Architecture +//! +//! The library uses cap-std for all file operations: +//! - `Storage` holds a `Dir` handle to the storage root +//! - All file access is relative to `Dir` handles +//! - No absolute paths are constructed during operations +//! - SQLite database accessed via fd-relative path + +// Core storage access +pub mod config; +pub mod error; +pub mod image; +pub mod layer; +pub mod lockfile; +pub mod storage; +pub mod tar_split; + +// Re-export commonly used types +pub use config::{AdditionalLayerStore, StorageConfig}; +pub use error::{Result, StorageError}; +pub use image::Image; +pub use layer::Layer; +pub use lockfile::LastWrite; +pub use storage::{ImageRLockGuard, LayerMetadata, LayerRLockGuard, Storage}; +pub use tar_split::{TarHeader, TarSplitFdStream, TarSplitItem}; + +// Re-export OCI spec types for convenience +pub use oci_spec::image::{Descriptor, ImageConfiguration, ImageManifest}; diff --git a/crates/cstorage/src/lockfile.rs b/crates/cstorage/src/lockfile.rs new file mode 100644 index 00000000..d671935a --- /dev/null +++ b/crates/cstorage/src/lockfile.rs @@ -0,0 +1,279 @@ +//! Lock file implementation compatible with containers/storage. +//! +//! This module provides file-based locking that is wire-compatible with +//! the Go implementation in containers/storage. It uses POSIX fcntl locks +//! for cross-process synchronization and in-process RwLock for thread safety. +//! +//! # LastWrite Token +//! +//! The lock file stores a 64-byte "last write" token that allows callers to +//! detect if any writer has modified shared state since they last checked. +//! The format is: +//! - bytes 0-7: Unix timestamp (nanoseconds, little-endian) +//! - bytes 8-15: Counter (little-endian) +//! - bytes 16-19: Process ID (little-endian) +//! - bytes 20-63: Random bytes + +use std::fs::{File, OpenOptions}; +use std::io::{Read, Seek, SeekFrom}; +use std::os::fd::{AsFd, OwnedFd}; +use std::path::{Path, PathBuf}; +use std::sync::{RwLock, RwLockReadGuard}; + +use rustix::fs::{fcntl_lock, FlockOperation}; + +/// Size of the LastWrite token in bytes. +const LAST_WRITE_SIZE: usize = 64; + +/// Error types for lock file operations. +#[derive(Debug, thiserror::Error)] +pub enum LockError { + /// I/O error during lock file operations. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + /// Lock file operation failed. + #[error("lock operation failed: {0}")] + LockFailed(#[from] rustix::io::Errno), + + /// Would block on non-blocking lock attempt. + #[error("lock would block")] + WouldBlock, + + /// Invalid LastWrite data in lock file. + #[error("invalid last write data: {0}")] + InvalidData(String), +} + +/// Result type for lock file operations. +pub type Result = std::result::Result; + +/// A 64-byte token representing the last write to the lock file. +/// +/// This token can be used to detect if any writer has modified shared state +/// since the token was obtained. The format is compatible with the Go +/// implementation in containers/storage. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct LastWrite { + /// Unix timestamp in nanoseconds. + timestamp_nanos: u64, + /// Monotonic counter. + counter: u64, + /// Process ID of the writer. + pid: u32, + /// Random bytes for uniqueness. + random: [u8; 44], +} + +impl LastWrite { + /// Deserialize a LastWrite token from a 64-byte array. + fn from_bytes(buf: &[u8; LAST_WRITE_SIZE]) -> Self { + let timestamp_nanos = u64::from_le_bytes(buf[0..8].try_into().unwrap()); + let counter = u64::from_le_bytes(buf[8..16].try_into().unwrap()); + let pid = u32::from_le_bytes(buf[16..20].try_into().unwrap()); + let mut random = [0u8; 44]; + random.copy_from_slice(&buf[20..64]); + + Self { + timestamp_nanos, + counter, + pid, + random, + } + } + + /// Check if this token represents an empty/uninitialized state. + pub fn is_empty(&self) -> bool { + self.timestamp_nanos == 0 && self.counter == 0 && self.pid == 0 + } +} + +impl Default for LastWrite { + fn default() -> Self { + Self { + timestamp_nanos: 0, + counter: 0, + pid: 0, + random: [0u8; 44], + } + } +} + +/// A file-based lock compatible with containers/storage (read-only). +/// +/// This provides cross-process read locking (via fcntl) and in-process +/// thread synchronization (via RwLock). The lock file also stores a +/// LastWrite token that can be used to detect modifications. +#[derive(Debug)] +pub struct LockFile { + /// Path to the lock file. + path: PathBuf, + /// File descriptor for the lock file. + fd: OwnedFd, + /// In-process synchronization lock. + in_process_lock: RwLock<()>, +} + +/// RAII guard for a shared (read) lock. +/// +/// The lock is released when this guard is dropped. +#[derive(Debug)] +pub struct RLockGuard<'a> { + lockfile: &'a LockFile, + /// Hold the in-process read lock guard. + _guard: RwLockReadGuard<'a, ()>, +} + +impl Drop for RLockGuard<'_> { + fn drop(&mut self) { + // Release the fcntl lock + let _ = fcntl_lock(self.lockfile.fd.as_fd(), FlockOperation::Unlock); + } +} + +impl LockFile { + /// Open a lock file at the specified path in read-only mode. + /// + /// # Errors + /// + /// Returns an error if the file cannot be opened. + pub fn open>(path: P) -> Result { + let path = path.as_ref().to_path_buf(); + + let file = OpenOptions::new().read(true).open(&path)?; + + let fd: OwnedFd = file.into(); + + Ok(Self { + path, + fd, + in_process_lock: RwLock::new(()), + }) + } + + /// Get the path to the lock file. + pub fn path(&self) -> &Path { + &self.path + } + + /// Acquire a shared (read) lock, blocking until available. + /// + /// Returns a guard that releases the lock when dropped. + pub fn rlock(&self) -> RLockGuard<'_> { + // First acquire the in-process lock + let guard = self + .in_process_lock + .read() + .expect("in-process lock poisoned"); + + // Then acquire the fcntl lock (blocking) + fcntl_lock(self.fd.as_fd(), FlockOperation::LockShared) + .expect("fcntl_lock failed unexpectedly"); + + RLockGuard { + lockfile: self, + _guard: guard, + } + } + + /// Try to acquire a shared (read) lock without blocking. + /// + /// Returns `Err(LockError::WouldBlock)` if the lock is not available. + pub fn try_rlock(&self) -> Result> { + // Try to acquire the in-process lock + let guard = self + .in_process_lock + .try_read() + .map_err(|_| LockError::WouldBlock)?; + + // Try to acquire the fcntl lock (non-blocking) + match fcntl_lock(self.fd.as_fd(), FlockOperation::NonBlockingLockShared) { + Ok(()) => Ok(RLockGuard { + lockfile: self, + _guard: guard, + }), + Err(rustix::io::Errno::AGAIN) => Err(LockError::WouldBlock), + Err(e) => Err(LockError::LockFailed(e)), + } + } + + /// Read the current LastWrite token from the lock file. + /// + /// This reads the token directly from the file, not from cache. + pub fn get_last_write(&self) -> Result { + let mut file = self.as_file(); + file.seek(SeekFrom::Start(0))?; + + let mut buf = [0u8; LAST_WRITE_SIZE]; + match file.read_exact(&mut buf) { + Ok(()) => Ok(LastWrite::from_bytes(&buf)), + Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => { + // File is empty or too small - return empty token + Ok(LastWrite::default()) + } + Err(e) => Err(e.into()), + } + } + + /// Check if the lock file has been modified since the given token. + /// + /// This reads the current token from the file and compares it to + /// the provided token. Returns `true` if they differ. + pub fn modified_since(&self, prev: &LastWrite) -> Result { + let current = self.get_last_write()?; + Ok(current != *prev) + } + + /// Helper to get a File reference for I/O operations. + /// + /// This borrows the fd without taking ownership. + fn as_file(&self) -> File { + use std::os::fd::BorrowedFd; + let borrowed: BorrowedFd<'_> = self.fd.as_fd(); + + // Use dup to create a new fd that File can own + let duped = rustix::io::fcntl_dupfd_cloexec(borrowed, 0).expect("fcntl_dupfd failed"); + File::from(duped) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_lastwrite_default_is_empty() { + let token = LastWrite::default(); + assert!(token.is_empty()); + } + + #[test] + fn test_basic_read_lock() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("test.lock"); + + // Create the file first + std::fs::write(&path, &[0u8; 64]).unwrap(); + + let lockfile = LockFile::open(&path).unwrap(); + + // Acquire and release shared lock + { + let _guard = lockfile.rlock(); + } + } + + #[test] + fn test_try_rlock_succeeds_when_available() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("test.lock"); + + // Create the file first + std::fs::write(&path, &[0u8; 64]).unwrap(); + + let lockfile = LockFile::open(&path).unwrap(); + + let guard = lockfile.try_rlock(); + assert!(guard.is_ok()); + } +} diff --git a/crates/cstorage/src/storage.rs b/crates/cstorage/src/storage.rs new file mode 100644 index 00000000..b339b4e6 --- /dev/null +++ b/crates/cstorage/src/storage.rs @@ -0,0 +1,664 @@ +//! Storage access for container overlay filesystem. +//! +//! This module provides the main [`Storage`] struct for accessing containers-storage +//! overlay driver data. All file access uses cap-std for fd-relative operations, +//! providing security against path traversal attacks and TOCTOU race conditions. +//! +//! # Overview +//! +//! The `Storage` struct is the primary entry point for interacting with container +//! storage. It holds a capability-based directory handle to the storage root and +//! a SQLite database connection for metadata queries. +//! +//! # Storage Structure +//! +//! Container storage on disk follows this layout: +//! ```text +//! /var/lib/containers/storage/ +//! +-- db.sql # SQLite metadata database +//! +-- overlay/ # Layer data +//! | +-- / # Individual layer directories +//! | | +-- diff/ # Layer file contents +//! | | +-- link # Short link ID (26 chars) +//! | | +-- lower # Parent layer references +//! | +-- l/ # Short link directory (symlinks) +//! +-- overlay-layers/ # Tar-split metadata +//! | +-- .tar-split.gz +//! +-- overlay-images/ # Image metadata +//! +-- / +//! +-- manifest # OCI image manifest +//! +-- = # Base64-encoded metadata files +//! ``` +//! +//! # Security Model +//! +//! All file operations are performed via [`cap_std::fs::Dir`] handles, which provide: +//! - Protection against path traversal attacks +//! - Prevention of TOCTOU race conditions +//! - Guarantee that all access stays within the storage directory tree + +use crate::error::{Result, StorageError}; +use crate::lockfile::{LastWrite, LockFile, RLockGuard}; +use cap_std::ambient_authority; +use cap_std::fs::Dir; +use rusqlite::Connection; +use rustix::path::DecInt; +use std::env; +use std::io::Read; +use std::path::{Path, PathBuf}; + +/// Main storage handle providing read-only access to container storage. +/// +/// The Storage struct holds: +/// - A `Dir` handle to the storage root for fd-relative file operations +/// - A SQLite database connection for metadata access +/// - Optional lock files for coordinating reads with other processes +#[derive(Debug)] +pub struct Storage { + /// Directory handle for the storage root, used for all fd-relative operations. + root_dir: Dir, + + /// SQLite database connection for metadata queries. + db: Connection, + + /// Lock file for layer operations (overlay-layers/layers.lock). + layers_lock: Option, + + /// Lock file for image operations (overlay-images/images.lock). + images_lock: Option, +} + +impl Storage { + /// Open storage at the given root path. + /// + /// This validates that the path points to a valid container storage directory + /// by checking for required subdirectories and the database file. + /// + /// # Errors + /// + /// Returns an error if: + /// - The path does not exist or is not a directory + /// - Required subdirectories are missing + /// - The database file is missing or invalid + pub fn open>(root: P) -> Result { + let root_path = root.as_ref(); + + // Open the directory handle for fd-relative operations + let root_dir = Dir::open_ambient_dir(root_path, ambient_authority()).map_err(|e| { + if e.kind() == std::io::ErrorKind::NotFound { + StorageError::RootNotFound(root_path.to_path_buf()) + } else { + StorageError::Io(e) + } + })?; + + // Validate storage structure + Self::validate_storage(&root_dir)?; + + // Open database via fd-relative path + let db = Self::open_database(&root_dir)?; + + // Try to open lock files (they may not exist for read-only storage) + let layers_lock_path = root_path.join("overlay-layers/layers.lock"); + let images_lock_path = root_path.join("overlay-images/images.lock"); + + let layers_lock = LockFile::open(&layers_lock_path).ok(); + let images_lock = LockFile::open(&images_lock_path).ok(); + + Ok(Self { + root_dir, + db, + layers_lock, + images_lock, + }) + } + + /// Discover storage root from default locations. + /// + /// Searches for container storage in the following order: + /// 1. `$CONTAINERS_STORAGE_ROOT` environment variable + /// 2. Rootless storage: `$XDG_DATA_HOME/containers/storage` or `~/.local/share/containers/storage` + /// 3. Root storage: `/var/lib/containers/storage` + /// + /// # Errors + /// + /// Returns an error if no valid storage location is found. + pub fn discover() -> Result { + let search_paths = Self::default_search_paths(); + + for path in search_paths { + if path.exists() { + match Self::open(&path) { + Ok(storage) => return Ok(storage), + Err(_) => continue, + } + } + } + + Err(StorageError::InvalidStorage( + "No valid storage location found. Searched default locations.".to_string(), + )) + } + + /// Get the default search paths for storage discovery. + fn default_search_paths() -> Vec { + let mut paths = Vec::new(); + + // 1. Check CONTAINERS_STORAGE_ROOT environment variable + if let Ok(root) = env::var("CONTAINERS_STORAGE_ROOT") { + paths.push(PathBuf::from(root)); + } + + // 2. Check rootless locations + if let Ok(home) = env::var("HOME") { + let home_path = PathBuf::from(home); + + // Try XDG_DATA_HOME first + if let Ok(xdg_data) = env::var("XDG_DATA_HOME") { + paths.push(PathBuf::from(xdg_data).join("containers/storage")); + } + + // Fallback to ~/.local/share/containers/storage + paths.push(home_path.join(".local/share/containers/storage")); + } + + // 3. Check root location + paths.push(PathBuf::from("/var/lib/containers/storage")); + + paths + } + + /// Validate that the directory structure is a valid overlay storage. + fn validate_storage(root_dir: &Dir) -> Result<()> { + // Check for required subdirectories + let required_dirs = ["overlay", "overlay-layers", "overlay-images"]; + + for dir_name in &required_dirs { + match root_dir.try_exists(dir_name) { + Ok(exists) if !exists => { + return Err(StorageError::InvalidStorage(format!( + "Missing required directory: {}", + dir_name + ))); + } + Err(e) => return Err(StorageError::Io(e)), + _ => {} + } + } + + // Check for database file + match root_dir.try_exists("db.sql") { + Ok(exists) if !exists => { + return Err(StorageError::InvalidStorage( + "Missing database file: db.sql".to_string(), + )); + } + Err(e) => return Err(StorageError::Io(e)), + _ => {} + } + + Ok(()) + } + + /// Open the SQLite database using fd-relative access. + fn open_database(root_dir: &Dir) -> Result { + let db_file = root_dir.open("db.sql")?; + + // Get the fd number and use /proc/self/fd path for SQLite. + // SQLite's Connection::open opens its own fd to this path, so db_file + // can be dropped normally after the connection is established. + let fd_name = DecInt::from_fd(&db_file); + let db_path = format!("/proc/self/fd/{}", fd_name.as_ref().to_string_lossy()); + let conn = Connection::open(&db_path)?; + + // db_file is dropped here, closing our fd. SQLite has its own fd. + Ok(conn) + } + + /// Create storage from an existing root directory handle. + /// + /// # Errors + /// + /// Returns an error if the directory is not a valid container storage. + pub fn from_root_dir(root_dir: Dir) -> Result { + Self::validate_storage(&root_dir)?; + let db = Self::open_database(&root_dir)?; + Ok(Self { + root_dir, + db, + layers_lock: None, + images_lock: None, + }) + } + + /// Get a reference to the root directory handle. + pub fn root_dir(&self) -> &Dir { + &self.root_dir + } + + /// Get a reference to the database connection. + pub fn database(&self) -> &Connection { + &self.db + } + + // ========== Locking Methods ========== + + /// Acquire a shared (read) lock on the layers store. + /// + /// This lock allows concurrent readers but blocks writers. Use this when + /// reading layer data to ensure consistency. + /// + /// # Errors + /// + /// Returns an error if the lock file is not available. + pub fn rlock_layers(&self) -> Result> { + let lock = self + .layers_lock + .as_ref() + .ok_or_else(|| StorageError::InvalidStorage("No layers lock file".to_string()))?; + let guard = lock.rlock(); + Ok(LayerRLockGuard { + storage: self, + _lock: guard, + }) + } + + /// Acquire a shared (read) lock on the images store. + /// + /// # Errors + /// + /// Returns an error if the lock file is not available. + pub fn rlock_images(&self) -> Result> { + let lock = self + .images_lock + .as_ref() + .ok_or_else(|| StorageError::InvalidStorage("No images lock file".to_string()))?; + let guard = lock.rlock(); + Ok(ImageRLockGuard { + storage: self, + _lock: guard, + }) + } + + // ========== Change Detection Methods ========== + + /// Get the current "last write" token for the layers store. + /// + /// # Errors + /// + /// Returns an error if the lock file is not available. + pub fn get_layers_last_write(&self) -> Result { + let lock = self + .layers_lock + .as_ref() + .ok_or_else(|| StorageError::InvalidStorage("No layers lock file".to_string()))?; + Ok(lock.get_last_write()?) + } + + /// Get the current "last write" token for the images store. + /// + /// # Errors + /// + /// Returns an error if the lock file is not available. + pub fn get_images_last_write(&self) -> Result { + let lock = self + .images_lock + .as_ref() + .ok_or_else(|| StorageError::InvalidStorage("No images lock file".to_string()))?; + Ok(lock.get_last_write()?) + } + + /// Check if the layers store was modified since the given token. + /// + /// # Errors + /// + /// Returns an error if the lock file is not available. + pub fn layers_modified_since(&self, token: &LastWrite) -> Result { + let lock = self + .layers_lock + .as_ref() + .ok_or_else(|| StorageError::InvalidStorage("No layers lock file".to_string()))?; + Ok(lock.modified_since(token)?) + } + + /// Check if the images store was modified since the given token. + /// + /// # Errors + /// + /// Returns an error if the lock file is not available. + pub fn images_modified_since(&self, token: &LastWrite) -> Result { + let lock = self + .images_lock + .as_ref() + .ok_or_else(|| StorageError::InvalidStorage("No images lock file".to_string()))?; + Ok(lock.modified_since(token)?) + } + + /// Resolve a link ID to a layer ID using fd-relative symlink reading. + /// + /// # Errors + /// + /// Returns an error if the link doesn't exist or has an invalid format. + pub fn resolve_link(&self, link_id: &str) -> Result { + // Open overlay directory from storage root + let overlay_dir = self.root_dir.open_dir("overlay")?; + + // Open link directory + let link_dir = overlay_dir.open_dir("l")?; + + // Read symlink target using fd-relative operation + let target = link_dir.read_link(link_id).map_err(|e| { + StorageError::LinkReadError(format!("Failed to read link {}: {}", link_id, e)) + })?; + + // Extract layer ID from symlink target + Self::extract_layer_id_from_link(&target) + } + + /// Extract layer ID from symlink target path. + /// + /// Target format: ..//diff + fn extract_layer_id_from_link(target: &Path) -> Result { + // Convert to string for processing + let target_str = target.to_str().ok_or_else(|| { + StorageError::LinkReadError("Invalid UTF-8 in link target".to_string()) + })?; + + // Split by '/' and find the layer ID component + let components: Vec<&str> = target_str.split('/').collect(); + + // Expected format: ..//diff + // So we need the second-to-last component + if components.len() >= 2 { + let layer_id = components[components.len() - 2]; + if !layer_id.is_empty() && layer_id != ".." { + return Ok(layer_id.to_string()); + } + } + + Err(StorageError::LinkReadError(format!( + "Invalid link target format: {}", + target_str + ))) + } + + /// List all images in storage. + /// + /// # Errors + /// + /// Returns an error if the images directory cannot be read. + pub fn list_images(&self) -> Result> { + use crate::image::Image; + + let images_dir = self.root_dir.open_dir("overlay-images")?; + let mut images = Vec::new(); + + for entry in images_dir.entries()? { + let entry = entry?; + if entry.file_type()?.is_dir() { + let id = entry + .file_name() + .to_str() + .ok_or_else(|| { + StorageError::InvalidStorage( + "Invalid UTF-8 in image directory name".to_string(), + ) + })? + .to_string(); + images.push(Image::open(self, &id)?); + } + } + Ok(images) + } + + /// Get an image by ID. + /// + /// # Errors + /// + /// Returns [`StorageError::ImageNotFound`] if the image doesn't exist. + pub fn get_image(&self, id: &str) -> Result { + crate::image::Image::open(self, id) + } + + /// Get layers for an image (in order from base to top). + /// + /// # Errors + /// + /// Returns an error if any layer cannot be opened. + pub fn get_image_layers( + &self, + image: &crate::image::Image, + ) -> Result> { + use crate::layer::Layer; + // image.layers() returns diff_ids, which need to be mapped to storage layer IDs + let diff_ids = image.layers()?; + let mut layers = Vec::new(); + for diff_id in diff_ids { + let layer_id = self.resolve_diff_id(&diff_id)?; + layers.push(Layer::open(self, &layer_id)?); + } + Ok(layers) + } + + /// Find an image by name. + /// + /// # Errors + /// + /// Returns [`StorageError::ImageNotFound`] if no image with the given name is found. + pub fn find_image_by_name(&self, name: &str) -> Result { + // Read images.json from overlay-images/ + let images_dir = self.root_dir.open_dir("overlay-images")?; + let mut file = images_dir.open("images.json")?; + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + + // Parse the JSON array + let entries: Vec = serde_json::from_str(&contents) + .map_err(|e| StorageError::InvalidStorage(format!("Invalid images.json: {}", e)))?; + + // Search for matching name + for entry in &entries { + if let Some(names) = &entry.names { + for image_name in names { + if image_name == name { + return self.get_image(&entry.id); + } + } + } + } + + // Try partial matching (e.g., "alpine:latest" matches "docker.io/library/alpine:latest") + for entry in &entries { + if let Some(names) = &entry.names { + for image_name in names { + // Check if name is a suffix (after removing registry/namespace prefix) + if let Some(prefix) = image_name.strip_suffix(name) { + // Verify it's a proper boundary (preceded by '/') + if prefix.is_empty() || prefix.ends_with('/') { + return self.get_image(&entry.id); + } + } + } + } + } + + Err(StorageError::ImageNotFound(name.to_string())) + } + + /// Resolve a diff-digest to a storage layer ID. + /// + /// # Errors + /// + /// Returns [`StorageError::LayerNotFound`] if no layer with the given diff-digest exists. + pub fn resolve_diff_id(&self, diff_digest: &str) -> Result { + // Normalize the diff_digest to include sha256: prefix for comparison + let normalized = if diff_digest.starts_with("sha256:") { + diff_digest.to_string() + } else { + format!("sha256:{}", diff_digest) + }; + + // Read layers.json from overlay-layers/ + let layers_dir = self.root_dir.open_dir("overlay-layers")?; + let mut file = layers_dir.open("layers.json")?; + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + + // Parse the JSON array + let entries: Vec = serde_json::from_str(&contents) + .map_err(|e| StorageError::InvalidStorage(format!("Invalid layers.json: {}", e)))?; + + // Search for matching diff-digest + for entry in entries { + if entry.diff_digest.as_ref() == Some(&normalized) { + return Ok(entry.id); + } + } + + Err(StorageError::LayerNotFound(diff_digest.to_string())) + } + + /// Get layer metadata including size information. + /// + /// # Errors + /// + /// Returns an error if the layer is not found. + pub fn get_layer_metadata(&self, layer_id: &str) -> Result { + // Read layers.json from overlay-layers/ + let layers_dir = self.root_dir.open_dir("overlay-layers")?; + let mut file = layers_dir.open("layers.json")?; + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + + // Parse the JSON array + let entries: Vec = serde_json::from_str(&contents) + .map_err(|e| StorageError::InvalidStorage(format!("Invalid layers.json: {}", e)))?; + + // Search for matching layer ID + for entry in entries { + if entry.id == layer_id { + return Ok(LayerMetadata { + id: entry.id, + parent: entry.parent, + diff_size: entry.diff_size, + compressed_size: entry.compressed_size, + }); + } + } + + Err(StorageError::LayerNotFound(layer_id.to_string())) + } + + /// Calculate the total uncompressed size of an image. + /// + /// # Errors + /// + /// Returns an error if any layer metadata cannot be read. + pub fn calculate_image_size(&self, image: &crate::image::Image) -> Result { + let layers = self.get_image_layers(image)?; + let mut total_size: u64 = 0; + + for layer in &layers { + let metadata = self.get_layer_metadata(layer.id())?; + if let Some(size) = metadata.diff_size { + total_size = total_size.saturating_add(size); + } + } + + Ok(total_size) + } +} + +/// Entry in images.json for image name lookups. +#[derive(Debug, serde::Deserialize)] +struct ImageEntry { + id: String, + names: Option>, +} + +/// Entry in layers.json for layer ID lookups. +#[derive(Debug, serde::Deserialize)] +#[serde(rename_all = "kebab-case")] +struct LayerEntry { + id: String, + parent: Option, + diff_digest: Option, + diff_size: Option, + compressed_size: Option, +} + +/// Metadata about a layer from layers.json. +#[derive(Debug, Clone)] +pub struct LayerMetadata { + /// Layer storage ID. + pub id: String, + /// Parent layer ID (if not base layer). + pub parent: Option, + /// Uncompressed diff size in bytes. + pub diff_size: Option, + /// Compressed size in bytes. + pub compressed_size: Option, +} + +// ========== Lock Guard Types ========== + +/// RAII guard for a shared (read) lock on the layers store. +#[derive(Debug)] +pub struct LayerRLockGuard<'a> { + /// Reference to the storage that owns the lock. + storage: &'a Storage, + /// The underlying read lock guard from the lockfile module. + _lock: RLockGuard<'a>, +} + +impl<'a> LayerRLockGuard<'a> { + /// Get a reference to the storage. + pub fn storage(&self) -> &Storage { + self.storage + } +} + +/// RAII guard for a shared (read) lock on the images store. +#[derive(Debug)] +pub struct ImageRLockGuard<'a> { + /// Reference to the storage that owns the lock. + storage: &'a Storage, + /// The underlying read lock guard from the lockfile module. + _lock: RLockGuard<'a>, +} + +impl<'a> ImageRLockGuard<'a> { + /// Get a reference to the storage. + pub fn storage(&self) -> &Storage { + self.storage + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_search_paths() { + let paths = Storage::default_search_paths(); + assert!(!paths.is_empty(), "Should have at least one search path"); + } + + #[test] + fn test_storage_validation() { + // Create a mock storage directory structure for testing + let dir = tempfile::tempdir().unwrap(); + let storage_path = dir.path(); + + // Create required directories and files + std::fs::create_dir_all(storage_path.join("overlay")).unwrap(); + std::fs::create_dir_all(storage_path.join("overlay-layers")).unwrap(); + std::fs::create_dir_all(storage_path.join("overlay-images")).unwrap(); + + // Create an empty db.sql file + std::fs::write(storage_path.join("db.sql"), "").unwrap(); + + let storage = Storage::open(storage_path).unwrap(); + assert!(storage.root_dir().try_exists("overlay").unwrap()); + } +} diff --git a/crates/cstorage/src/tar_split.rs b/crates/cstorage/src/tar_split.rs new file mode 100644 index 00000000..bef8fc1f --- /dev/null +++ b/crates/cstorage/src/tar_split.rs @@ -0,0 +1,712 @@ +//! Tar-split integration for reading container layers without full tar serialization. +//! +//! This module provides the `TarSplitFdStream` which reads tar-split metadata files +//! and returns file descriptors for the actual file content, enabling zero-copy +//! access to layer data. +//! +//! # Overview +//! +//! The tar-split format stores tar header metadata separately from file content, +//! allowing reconstruction of tar archives without duplicating the actual file data. +//! This implementation uses that metadata to provide file descriptors directly to +//! the files in the overlay diff directory. +//! +//! # Architecture +//! +//! The tar-split format is NDJSON (newline-delimited JSON), gzip-compressed: +//! - Type 1 (FileType): File/directory references with name, optional size, optional CRC64 +//! - Type 2 (SegmentType): Raw TAR header bytes and padding (base64-encoded) +//! - CRC64-ISO algorithm for checksums + +use std::io::{BufRead, BufReader, Read}; +use std::os::fd::OwnedFd; + +use base64::prelude::*; +use cap_std::fs::{Dir, File}; +use crc::{Crc, CRC_64_GO_ISO}; +use flate2::read::GzDecoder; +use serde::Deserialize; + +use crate::error::{Result, StorageError}; +use crate::layer::Layer; +use crate::storage::Storage; + +/// CRC64-ISO implementation for verifying file checksums. +const CRC64_ISO: Crc = Crc::::new(&CRC_64_GO_ISO); + +/// Item returned from tar-split stream iteration. +#[derive(Debug)] +pub enum TarSplitItem { + /// Raw segment bytes (TAR header + padding) to write directly. + Segment(Vec), + + /// File content to write. + FileContent { + /// File descriptor for reading the content. + /// + /// The caller takes ownership of this file descriptor and is responsible + /// for reading the content and closing it when done. + fd: OwnedFd, + /// Expected file size in bytes. + /// + /// Used for tar padding calculation: TAR files are padded to 512-byte + /// boundaries, so the consumer needs to know the size to write the + /// correct amount of padding after the file content. + size: u64, + /// File path from the tar-split entry. + /// + /// This is the path as recorded in the original tar archive + /// (e.g., "./etc/hosts"). + name: String, + }, +} + +/// Raw tar-split entry from NDJSON format before validation. +#[derive(Debug, Deserialize)] +struct TarSplitEntryRaw { + /// Entry type discriminant: 1 for File, 2 for Segment. + #[serde(rename = "type")] + type_id: u8, + /// File name from TAR header (type 1 only). + #[serde(default)] + name: Option, + /// File size in bytes (type 1 only). + #[serde(default)] + size: Option, + /// CRC64-ISO checksum, base64-encoded (type 1 only). + #[serde(default)] + crc64: Option, + /// Base64-encoded TAR header bytes or padding (type 2 only). + #[serde(default)] + payload: Option, +} + +/// Tar-split entry from NDJSON format. +#[derive(Debug)] +enum TarSplitEntry { + /// File type entry: references a file/directory with metadata. + File { + /// File name from TAR header. + name: Option, + /// File size in bytes. + size: Option, + /// CRC64-ISO checksum (base64-encoded). + crc64: Option, + }, + /// Segment type entry: raw TAR header bytes and padding. + Segment { + /// Base64-encoded TAR header bytes (512 bytes) or padding. + payload: Option, + }, +} + +impl TarSplitEntry { + /// Parse a tar-split entry from raw format with validation. + fn from_raw(raw: TarSplitEntryRaw) -> Result { + match raw.type_id { + 1 => Ok(TarSplitEntry::File { + name: raw.name, + size: raw.size, + crc64: raw.crc64, + }), + 2 => Ok(TarSplitEntry::Segment { + payload: raw.payload, + }), + _ => Err(StorageError::TarSplitError(format!( + "Invalid tar-split entry type: {}", + raw.type_id + ))), + } + } +} + +/// Tar header information extracted from tar-split metadata. +#[derive(Debug, Clone)] +pub struct TarHeader { + /// File path in the tar archive (e.g., "./etc/hosts") + pub name: String, + + /// File mode (permissions and type information) + pub mode: u32, + + /// User ID of the file owner + pub uid: u32, + + /// Group ID of the file owner + pub gid: u32, + + /// File size in bytes + pub size: u64, + + /// Modification time (Unix timestamp) + pub mtime: i64, + + /// Tar entry type flag + pub typeflag: u8, + + /// Link target for symbolic links and hard links + pub linkname: String, + + /// User name of the file owner + pub uname: String, + + /// Group name of the file owner + pub gname: String, + + /// Major device number (for device files) + pub devmajor: u32, + + /// Minor device number (for device files) + pub devminor: u32, +} + +impl TarHeader { + /// Parse a TarHeader from a 512-byte TAR header block. + /// + /// # Errors + /// + /// Returns an error if the header is too short or has an invalid checksum. + pub fn from_bytes(header: &[u8]) -> Result { + if header.len() < 512 { + return Err(StorageError::TarSplitError(format!( + "TAR header too short: {} bytes", + header.len() + ))); + } + + // Verify checksum first + let stored_checksum = { + let checksum_bytes = &header[148..156]; + let null_pos = checksum_bytes + .iter() + .position(|&b| b == 0 || b == b' ') + .unwrap_or(checksum_bytes.len()); + let s = std::str::from_utf8(&checksum_bytes[..null_pos]) + .map_err(|_| StorageError::TarSplitError("Invalid checksum field".to_string()))? + .trim(); + if s.is_empty() { + return Err(StorageError::TarSplitError( + "Empty checksum field".to_string(), + )); + } + u32::from_str_radix(s, 8).map_err(|e| { + StorageError::TarSplitError(format!("Invalid checksum '{}': {}", s, e)) + })? + }; + + let computed_checksum: u32 = header[..148] + .iter() + .chain(std::iter::repeat_n(&b' ', 8)) // checksum field treated as spaces + .chain(header[156..512].iter()) + .map(|&b| b as u32) + .sum(); + + if stored_checksum != computed_checksum { + return Err(StorageError::TarSplitError(format!( + "Checksum mismatch: stored {} != computed {}", + stored_checksum, computed_checksum + ))); + } + + // Extract null-terminated string from byte range + let extract_string = |start: usize, end: usize| -> String { + let bytes = &header[start..end]; + let null_pos = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len()); + String::from_utf8_lossy(&bytes[..null_pos]).to_string() + }; + + // Parse octal field from byte range + let parse_octal = |start: usize, end: usize| -> Result { + let s = extract_string(start, end); + let trimmed = s.trim(); + if trimmed.is_empty() { + return Ok(0); + } + u64::from_str_radix(trimmed, 8).map_err(|e| { + StorageError::TarSplitError(format!("Failed to parse octal '{}': {}", trimmed, e)) + }) + }; + + let name = extract_string(0, 100); + let mode = parse_octal(100, 108)? as u32; + let uid = parse_octal(108, 116)? as u32; + let gid = parse_octal(116, 124)? as u32; + let size = parse_octal(124, 136)?; + let mtime = parse_octal(136, 148)? as i64; + let typeflag = header[156]; + let linkname = extract_string(157, 257); + let uname = extract_string(265, 297); + let gname = extract_string(297, 329); + let devmajor = parse_octal(329, 337)? as u32; + let devminor = parse_octal(337, 345)? as u32; + + Ok(TarHeader { + name, + mode, + uid, + gid, + size, + mtime, + typeflag, + linkname, + uname, + gname, + devmajor, + devminor, + }) + } + + /// Check if this header represents a regular file. + pub fn is_regular_file(&self) -> bool { + self.typeflag == b'0' || self.typeflag == b'\0' + } + + /// Check if this header represents a directory. + pub fn is_directory(&self) -> bool { + self.typeflag == b'5' + } + + /// Check if this header represents a symbolic link. + pub fn is_symlink(&self) -> bool { + self.typeflag == b'2' + } + + /// Check if this header represents a hard link. + pub fn is_hardlink(&self) -> bool { + self.typeflag == b'1' + } + + /// Normalize the path by stripping leading "./" + pub fn normalized_name(&self) -> &str { + self.name.strip_prefix("./").unwrap_or(&self.name) + } +} + +/// Stream that reads tar-split metadata and provides file descriptors for file content. +#[derive(Debug)] +pub struct TarSplitFdStream { + /// The current layer for file lookups. + layer: Layer, + + /// Storage root directory for accessing parent layers on-demand. + storage_root: Dir, + + /// Gzip decompressor reading from the tar-split file. + reader: BufReader>, + + /// Entry counter for debugging and error messages. + entry_count: usize, +} + +impl TarSplitFdStream { + /// Create a new tar-split stream for a layer. + /// + /// # Errors + /// + /// Returns an error if the tar-split file doesn't exist or cannot be opened. + pub fn new(storage: &Storage, layer: &Layer) -> Result { + // Open overlay-layers directory via Dir handle + let layers_dir = storage.root_dir().open_dir("overlay-layers").map_err(|e| { + StorageError::TarSplitError(format!("Failed to open overlay-layers directory: {}", e)) + })?; + + // Open tar-split file relative to layers directory + let filename = format!("{}.tar-split.gz", layer.id()); + let file = layers_dir.open(&filename).map_err(|e| { + StorageError::TarSplitError(format!( + "Failed to open tar-split file {}: {}", + filename, e + )) + })?; + + // Wrap in gzip decompressor + let gz_decoder = GzDecoder::new(file); + let reader = BufReader::new(gz_decoder); + + // Open the layer for on-demand file lookups + let layer = Layer::open(storage, layer.id())?; + + // Clone storage root dir for on-demand parent layer access + let storage_root = storage.root_dir().try_clone()?; + + Ok(Self { + layer, + storage_root, + reader, + entry_count: 0, + }) + } + + /// Open a file in the layer chain, trying current layer first then parents. + fn open_file_in_chain(&self, path: &str) -> Result { + // Normalize path (remove leading ./) + let normalized_path = path.strip_prefix("./").unwrap_or(path); + + // Try to open in current layer first + match self.layer.diff_dir().open(normalized_path) { + Ok(file) => return Ok(file), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + // Continue to search parent layers + } + Err(e) => return Err(StorageError::Io(e)), + } + + // Search parent layers on-demand + self.search_parent_layers(&self.layer, normalized_path, 0) + } + + /// Recursively search parent layers for a file. + fn search_parent_layers( + &self, + current_layer: &Layer, + path: &str, + depth: usize, + ) -> Result { + const MAX_DEPTH: usize = 500; + + if depth >= MAX_DEPTH { + return Err(StorageError::TarSplitError(format!( + "Layer chain exceeds maximum depth of {} while searching for file: {}", + MAX_DEPTH, path + ))); + } + + // Get parent link IDs + let parent_links = current_layer.parent_links(); + + // Try each parent + for link_id in parent_links { + // Resolve link ID to layer ID by reading the symlink directly + let parent_id = self.resolve_link_direct(link_id)?; + + // Try to open file directly in parent's diff directory + match self.open_file_in_layer(&parent_id, path) { + Ok(file) => return Ok(file), + Err(StorageError::Io(e)) if e.kind() == std::io::ErrorKind::NotFound => { + // File not in this parent, recursively search its parents + match self.search_by_layer_id(&parent_id, path, depth + 1) { + Ok(file) => return Ok(file), + Err(_) => continue, // Try next parent at this level + } + } + Err(_) => continue, // Try next parent + } + } + + Err(StorageError::TarSplitError(format!( + "File not found in layer chain: {}", + path + ))) + } + + /// Search for a file starting from a layer ID. + fn search_by_layer_id( + &self, + layer_id: &str, + path: &str, + depth: usize, + ) -> Result { + const MAX_DEPTH: usize = 500; + + if depth >= MAX_DEPTH { + return Err(StorageError::TarSplitError(format!( + "Layer chain exceeds maximum depth of {} while searching for file: {}", + MAX_DEPTH, path + ))); + } + + // Try to open file in this layer + match self.open_file_in_layer(layer_id, path) { + Ok(file) => return Ok(file), + Err(StorageError::Io(e)) if e.kind() == std::io::ErrorKind::NotFound => { + // File not found, check parents + } + Err(e) => return Err(e), + } + + // Read parent links for this layer + let parent_links = self.read_layer_parent_links(layer_id)?; + + // Try each parent + for link_id in parent_links { + let parent_id = self.resolve_link_direct(&link_id)?; + match self.search_by_layer_id(&parent_id, path, depth + 1) { + Ok(file) => return Ok(file), + Err(_) => continue, + } + } + + Err(StorageError::TarSplitError(format!( + "File not found in layer chain: {}", + path + ))) + } + + /// Resolve a link ID to layer ID by directly reading the symlink. + fn resolve_link_direct(&self, link_id: &str) -> Result { + let overlay_dir = self.storage_root.open_dir("overlay")?; + let link_dir = overlay_dir.open_dir("l")?; + let target = link_dir.read_link(link_id).map_err(|e| { + StorageError::LinkReadError(format!("Failed to read link {}: {}", link_id, e)) + })?; + + // Extract layer ID from symlink target (format: ..//diff) + let target_str = target.to_str().ok_or_else(|| { + StorageError::LinkReadError("Invalid UTF-8 in link target".to_string()) + })?; + let components: Vec<&str> = target_str.split('/').collect(); + if components.len() >= 2 { + let layer_id = components[components.len() - 2]; + if !layer_id.is_empty() && layer_id != ".." { + return Ok(layer_id.to_string()); + } + } + Err(StorageError::LinkReadError(format!( + "Invalid link target format: {}", + target_str + ))) + } + + /// Open a file in a specific layer's diff directory. + fn open_file_in_layer(&self, layer_id: &str, path: &str) -> Result { + let overlay_dir = self.storage_root.open_dir("overlay")?; + let layer_dir = overlay_dir.open_dir(layer_id)?; + let diff_dir = layer_dir.open_dir("diff")?; + diff_dir.open(path).map_err(StorageError::Io) + } + + /// Read parent link IDs from a layer's lower file. + fn read_layer_parent_links(&self, layer_id: &str) -> Result> { + let overlay_dir = self.storage_root.open_dir("overlay")?; + let layer_dir = overlay_dir.open_dir(layer_id)?; + + match layer_dir.read_to_string("lower") { + Ok(content) => Ok(content + .trim() + .split(':') + .filter_map(|s| s.strip_prefix("l/")) + .map(|s| s.to_string()) + .collect()), + Err(_) => Ok(Vec::new()), // Base layer has no lower file + } + } + + /// Verify CRC64-ISO checksum of a file. + fn verify_crc64( + &self, + file: &mut cap_std::fs::File, + expected_b64: &str, + size: u64, + ) -> Result<()> { + // Decode base64 checksum + let expected_bytes = BASE64_STANDARD.decode(expected_b64).map_err(|e| { + StorageError::TarSplitError(format!("Failed to decode base64 CRC64: {}", e)) + })?; + + if expected_bytes.len() != 8 { + return Err(StorageError::TarSplitError(format!( + "Invalid CRC64 length: {} bytes", + expected_bytes.len() + ))); + } + + // Convert to u64 (big-endian) + let expected = u64::from_be_bytes(expected_bytes.try_into().unwrap()); + + // Compute CRC64 of file content + let mut digest = CRC64_ISO.digest(); + let mut buffer = vec![0u8; 8192]; + let mut bytes_read = 0u64; + + loop { + let n = file.read(&mut buffer).map_err(|e| { + StorageError::TarSplitError(format!( + "Failed to read file for CRC64 verification: {}", + e + )) + })?; + if n == 0 { + break; + } + digest.update(&buffer[..n]); + bytes_read += n as u64; + } + + // Verify size matches + if bytes_read != size { + return Err(StorageError::TarSplitError(format!( + "File size mismatch: expected {}, got {}", + size, bytes_read + ))); + } + + let computed = digest.finalize(); + if computed != expected { + return Err(StorageError::TarSplitError(format!( + "CRC64 mismatch: expected {:016x}, got {:016x}", + expected, computed + ))); + } + + Ok(()) + } + + /// Read the next item from the tar-split stream. + /// + /// Returns: + /// - `Ok(Some(item))` - Next item was read successfully + /// - `Ok(None)` - End of stream reached + /// - `Err(...)` - Error occurred during reading + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> Result> { + loop { + // Read next line from NDJSON stream + let mut line = String::new(); + match self.reader.read_line(&mut line) { + Ok(0) => { + return Ok(None); + } + Ok(_) => { + // Parse NDJSON entry + let raw: TarSplitEntryRaw = serde_json::from_str(&line).map_err(|e| { + StorageError::TarSplitError(format!( + "Failed to parse tar-split entry: {}", + e + )) + })?; + let entry = TarSplitEntry::from_raw(raw)?; + + match entry { + TarSplitEntry::Segment { payload } => { + if let Some(payload_b64) = payload { + let payload_bytes = + BASE64_STANDARD.decode(&payload_b64).map_err(|e| { + StorageError::TarSplitError(format!( + "Failed to decode base64 payload: {}", + e + )) + })?; + + return Ok(Some(TarSplitItem::Segment(payload_bytes))); + } + // Empty segment, continue + } + + TarSplitEntry::File { name, size, crc64 } => { + self.entry_count += 1; + + // Check if this file has content to write + let file_size = size.unwrap_or(0); + if file_size > 0 { + // Regular file with content - open it + let path = name.as_ref().ok_or_else(|| { + StorageError::TarSplitError( + "FileType entry missing name".to_string(), + ) + })?; + + let mut file = self.open_file_in_chain(path)?; + + // Verify CRC64 if provided + if let Some(ref crc64_b64) = crc64 { + self.verify_crc64(&mut file, crc64_b64, file_size)?; + + // Reopen file since we consumed it for CRC check + file = self.open_file_in_chain(path)?; + } + + // Convert to OwnedFd and return + let std_file = file.into_std(); + let owned_fd: OwnedFd = std_file.into(); + return Ok(Some(TarSplitItem::FileContent { + fd: owned_fd, + size: file_size, + name: path.clone(), + })); + } + // Empty file or directory - header already in preceding Segment + } + } + } + Err(e) => { + return Err(StorageError::TarSplitError(format!( + "Failed to read tar-split line: {}", + e + ))); + } + } + } + } + + /// Get the number of entries processed so far. + pub fn entry_count(&self) -> usize { + self.entry_count + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tar_header_type_checks() { + let mut header = TarHeader { + name: "test.txt".to_string(), + mode: 0o644, + uid: 1000, + gid: 1000, + size: 100, + mtime: 0, + typeflag: b'0', + linkname: String::new(), + uname: "user".to_string(), + gname: "group".to_string(), + devmajor: 0, + devminor: 0, + }; + + assert!(header.is_regular_file()); + assert!(!header.is_directory()); + assert!(!header.is_symlink()); + + header.typeflag = b'5'; + assert!(!header.is_regular_file()); + assert!(header.is_directory()); + + header.typeflag = b'2'; + assert!(header.is_symlink()); + } + + #[test] + fn test_tar_split_entry_deserialization() { + // Test type 2 (Segment) with integer discriminant + let json_segment = r#"{"type":2,"payload":"dXN0YXIAMDA="}"#; + let raw: TarSplitEntryRaw = serde_json::from_str(json_segment).unwrap(); + let entry = TarSplitEntry::from_raw(raw).unwrap(); + match entry { + TarSplitEntry::Segment { payload } => { + assert_eq!(payload, Some("dXN0YXIAMDA=".to_string())); + } + _ => panic!("Expected Segment variant"), + } + + // Test type 1 (File) with integer discriminant + let json_file = r#"{"type":1,"name":"./etc/hosts","size":123,"crc64":"AAAAAAAAAA=="}"#; + let raw: TarSplitEntryRaw = serde_json::from_str(json_file).unwrap(); + let entry = TarSplitEntry::from_raw(raw).unwrap(); + match entry { + TarSplitEntry::File { name, size, crc64 } => { + assert_eq!(name, Some("./etc/hosts".to_string())); + assert_eq!(size, Some(123)); + assert_eq!(crc64, Some("AAAAAAAAAA==".to_string())); + } + _ => panic!("Expected File variant"), + } + + // Test invalid type + let json_invalid = r#"{"type":99}"#; + let raw: TarSplitEntryRaw = serde_json::from_str(json_invalid).unwrap(); + let result = TarSplitEntry::from_raw(raw); + assert!(result.is_err()); + } +} From 36f0aa5be57b29e72e4e40ff9a81f5052bcf5e5d Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 29 Jan 2026 10:24:58 -0500 Subject: [PATCH 07/13] repository: Add ensure_object_from_file for reflink support Add a new method to Repository that stores objects using FICLONE reflinks when available, falling back to regular copy otherwise. This enables zero-copy import from containers-storage by reflinking files directly into the objects/ directory, avoiding data duplication on filesystems that support reflinks (btrfs, XFS with reflinks). The method only falls back to copy on EOPNOTSUPP (reflink not supported) or EXDEV (cross-device); other errors are propagated to the caller. Includes test coverage for the new method. Assisted-by: OpenCode (Opus 4.5) Signed-off-by: Colin Walters --- crates/composefs/src/repository.rs | 88 ++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/crates/composefs/src/repository.rs b/crates/composefs/src/repository.rs index a9ae0475..a5fb36bd 100644 --- a/crates/composefs/src/repository.rs +++ b/crates/composefs/src/repository.rs @@ -298,6 +298,63 @@ impl Repository { tokio::task::spawn_blocking(move || self_.finalize_object_tmpfile(tmpfile_fd.into(), size)) } + /// Ensure an object exists by reflinking from a source file. + /// + /// This method attempts to use FICLONE (reflink) to copy the source file + /// to the objects directory without duplicating data on disk. If reflinks + /// are not supported, it falls back to a regular copy. + /// + /// This is particularly useful for importing from containers-storage where + /// we already have the file on disk and want to avoid copying data. + /// + /// # Arguments + /// * `src` - An open file descriptor to read from + /// * `size` - The size of the source file in bytes + /// + /// # Returns + /// The fs-verity ObjectID of the stored object. + pub fn ensure_object_from_file(&self, src: &std::fs::File, size: u64) -> Result { + use rustix::fs::{fstat, ioctl_ficlone}; + + // Create tmpfile in objects directory + let objects_dir = self.objects_dir()?; + let tmpfile_fd = openat( + objects_dir, + ".", + OFlags::RDWR | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + )?; + + // Try reflink first + let mut tmpfile = File::from(tmpfile_fd); + match ioctl_ficlone(&tmpfile, src) { + Ok(()) => { + // Reflink succeeded - verify size matches + let stat = fstat(&tmpfile)?; + anyhow::ensure!( + stat.st_size as u64 == size, + "Reflink size mismatch: expected {}, got {}", + size, + stat.st_size + ); + } + Err(Errno::OPNOTSUPP | Errno::XDEV) => { + // Reflink not supported or cross-device, fall back to copy + use std::io::{Seek, SeekFrom}; + let mut src_clone = src.try_clone()?; + src_clone.seek(SeekFrom::Start(0))?; + std::io::copy(&mut src_clone, &mut tmpfile)?; + } + Err(e) => { + // Other errors (EACCES, ENOSPC, etc.) should be propagated + return Err(e).context("Reflinking source file to objects directory")?; + } + } + + // Finalize the tmpfile (enable verity, link into objects/) + self.finalize_object_tmpfile(tmpfile, size) + } + /// Finalize a tmpfile as an object. /// /// This method should be called from a blocking context (e.g., `spawn_blocking`) @@ -2060,4 +2117,35 @@ mod tests { assert_eq!(result.streams_pruned, 0); Ok(()) } + + #[test] + fn test_ensure_object_from_file() -> Result<()> { + use std::io::{Seek, SeekFrom, Write}; + + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + // Create test data and write to a temp file + let test_data = generate_test_data(64 * 1024, 0xBE); + let mut temp_file = crate::test::tempfile(); + temp_file.write_all(&test_data)?; + temp_file.seek(SeekFrom::Start(0))?; + + // Store via ensure_object_from_file + let object_id = repo.ensure_object_from_file(&temp_file, test_data.len() as u64)?; + + // Verify the object exists in the repository + assert!(test_object_exists(&tmp, &object_id)?); + + // Read back the object and verify contents match + let stored_data = repo.read_object(&object_id)?; + assert_eq!(stored_data, test_data); + + // Verify idempotency: calling again with same file returns same ID + temp_file.seek(SeekFrom::Start(0))?; + let object_id_2 = repo.ensure_object_from_file(&temp_file, test_data.len() as u64)?; + assert_eq!(object_id, object_id_2); + + Ok(()) + } } From e962b9f5bea3bb5152354c9505b9f95a19406542 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 29 Jan 2026 10:25:05 -0500 Subject: [PATCH 08/13] oci: Add containers-storage integration for zero-copy import Add optional containers-storage feature that enables importing container images directly from podman/buildah's local storage into composefs repositories. Key functionality: - import_from_containers_storage(): Main async entry point - Uses cstorage crate for Storage/Layer/Image access - Leverages tar-split metadata for zero-copy streaming - Uses ensure_object_from_file() for reflink-based storage - Shows progress via indicatif progress bar The integration uses spawn_blocking to avoid blocking the async runtime during synchronous file I/O operations. Usage: cfsctl oci pull containers-storage:alpine:latest Assisted-by: OpenCode (Opus 4.5) Signed-off-by: Colin Walters --- crates/composefs-oci/Cargo.toml | 6 + crates/composefs-oci/src/cstor.rs | 255 ++++++++++++++++++++++++++++++ crates/composefs-oci/src/lib.rs | 10 +- 3 files changed, 268 insertions(+), 3 deletions(-) create mode 100644 crates/composefs-oci/src/cstor.rs diff --git a/crates/composefs-oci/Cargo.toml b/crates/composefs-oci/Cargo.toml index 89929f3a..4ebb516e 100644 --- a/crates/composefs-oci/Cargo.toml +++ b/crates/composefs-oci/Cargo.toml @@ -10,12 +10,18 @@ repository.workspace = true rust-version.workspace = true version.workspace = true +[features] +default = ["containers-storage"] +containers-storage = ["dep:cstorage"] + [dependencies] anyhow = { version = "1.0.87", default-features = false } async-compression = { version = "0.4.0", default-features = false, features = ["tokio", "zstd", "gzip"] } +base64 = { version = "0.22", default-features = false, features = ["std"] } bytes = { version = "1", default-features = false } composefs = { workspace = true } containers-image-proxy = { version = "0.9.2", default-features = false } +cstorage = { path = "../cstorage", optional = true } hex = { version = "0.4.0", default-features = false } indicatif = { version = "0.17.0", default-features = false, features = ["tokio"] } oci-spec = { version = "0.8.0", default-features = false } diff --git a/crates/composefs-oci/src/cstor.rs b/crates/composefs-oci/src/cstor.rs new file mode 100644 index 00000000..0d8ebc25 --- /dev/null +++ b/crates/composefs-oci/src/cstor.rs @@ -0,0 +1,255 @@ +//! containers-storage integration for zero-copy layer import. +//! +//! This module provides functionality to import container images directly from +//! containers-storage (as used by podman/buildah) into composefs repositories. +//! It uses the cstorage crate to access the storage and leverages reflinks when +//! available to avoid copying file data, enabling efficient zero-copy extraction. +//! +//! This module requires the `containers-storage` feature to be enabled. +//! +//! The main entry point is [`import_from_containers_storage`], which takes an +//! image ID and imports all layers into the repository. +//! +//! # Overview +//! +//! When importing from containers-storage, we: +//! 1. Open the storage and locate the image +//! 2. For each layer, iterate through the tar-split metadata +//! 3. For large files (> INLINE_CONTENT_MAX), reflink directly to objects/ +//! 4. For small files, embed inline in the splitstream +//! 5. Handle overlay whiteouts properly +//! +//! # Example +//! +//! ```ignore +//! use composefs_oci::cstor::import_from_containers_storage; +//! +//! let repo = Arc::new(Repository::open_user()?); +//! let result = import_from_containers_storage(&repo, "sha256:abc123...", None).await?; +//! println!("Imported config: {}", result.0); +//! ``` + +use std::io::{Read, Seek, SeekFrom}; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use base64::Engine; +use indicatif::{ProgressBar, ProgressStyle}; +use sha2::Digest; +use tokio::task::spawn_blocking; + +use composefs::{fsverity::FsVerityHashValue, repository::Repository, INLINE_CONTENT_MAX}; + +use cstorage::{Image, Layer, Storage, TarSplitFdStream, TarSplitItem}; + +use crate::skopeo::{OCI_CONFIG_CONTENT_TYPE, TAR_LAYER_CONTENT_TYPE}; +use crate::{config_identifier, layer_identifier, ContentAndVerity}; + +/// Import a container image from containers-storage into the composefs repository. +/// +/// This function reads an image from the local containers-storage (podman/buildah) +/// and imports all layers using reflinks when possible, avoiding data duplication. +/// +/// # Arguments +/// * `repo` - The composefs repository to import into +/// * `image_id` - The image ID (sha256 digest or name) to import +/// * `reference` - Optional reference name to assign to the imported config +/// +/// # Returns +/// A tuple of (config_digest, config_verity_id) for the imported image. +pub async fn import_from_containers_storage( + repo: &Arc>, + image_id: &str, + reference: Option<&str>, +) -> Result> { + let repo = Arc::clone(repo); + let image_id = image_id.to_owned(); + let reference = reference.map(|s| s.to_owned()); + + spawn_blocking(move || { + import_from_containers_storage_blocking(&repo, &image_id, reference.as_deref()) + }) + .await + .context("spawn_blocking failed")? +} + +/// Synchronous implementation of containers-storage import. +/// +/// All file I/O operations in this function are blocking, so it must be called +/// from a blocking context (e.g., via `spawn_blocking`). +fn import_from_containers_storage_blocking( + repo: &Arc>, + image_id: &str, + reference: Option<&str>, +) -> Result> { + // Open containers-storage + let storage = Storage::discover().context("Failed to discover containers-storage")?; + + // Open the image - first try by ID, then fall back to name lookup + let image = Image::open(&storage, image_id) + .or_else(|_| storage.find_image_by_name(image_id)) + .with_context(|| format!("Failed to open image {}", image_id))?; + + // Get the storage layer IDs + let storage_layer_ids = image + .storage_layer_ids(&storage) + .context("Failed to get storage layer IDs from image")?; + + // Get the config to access diff_ids + let config = image.config().context("Failed to read image config")?; + let diff_ids: Vec = config + .rootfs() + .diff_ids() + .iter() + .map(|s| s.to_string()) + .collect(); + + // Ensure layer count matches + anyhow::ensure!( + storage_layer_ids.len() == diff_ids.len(), + "Layer count mismatch: {} layers in storage, {} diff_ids in config", + storage_layer_ids.len(), + diff_ids.len() + ); + + // Import each layer with progress bar + let progress = ProgressBar::new(storage_layer_ids.len() as u64); + progress.set_style( + ProgressStyle::default_bar() + .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} {msg}") + .expect("valid template") + .progress_chars("=>-"), + ); + + let mut layer_refs = Vec::with_capacity(storage_layer_ids.len()); + for (storage_layer_id, diff_id) in storage_layer_ids.iter().zip(diff_ids.iter()) { + let content_id = layer_identifier(diff_id); + let short_id = &diff_id[..std::cmp::min(19, diff_id.len())]; + + let layer_verity = if let Some(existing) = repo.has_stream(&content_id)? { + progress.set_message(format!("Already have {short_id}...")); + existing + } else { + progress.set_message(format!("Importing {short_id}...")); + let layer = Layer::open(&storage, storage_layer_id) + .with_context(|| format!("Failed to open layer {}", storage_layer_id))?; + import_layer_with_writer(repo, &storage, &layer, diff_id)? + }; + + layer_refs.push((diff_id.clone(), layer_verity)); + progress.inc(1); + } + progress.finish_with_message("Layers imported"); + + // Create the config splitstream with layer references + // Read the raw config JSON bytes from metadata + let config_key = format!("sha256:{}", image.id()); + let encoded_key = base64::engine::general_purpose::STANDARD.encode(config_key.as_bytes()); + let config_json = image + .read_metadata(&encoded_key) + .context("Failed to read config bytes")?; + let config_digest = format!("sha256:{}", hex::encode(sha2::Sha256::digest(&config_json))); + let content_id = config_identifier(&config_digest); + + let config_verity = if let Some(existing) = repo.has_stream(&content_id)? { + progress.println(format!("Already have config {}", config_digest)); + existing + } else { + progress.println(format!("Creating config splitstream {}", config_digest)); + let mut writer = repo.create_stream(OCI_CONFIG_CONTENT_TYPE); + + // Add layer references + for (diff_id, verity) in &layer_refs { + writer.add_named_stream_ref(diff_id, verity); + } + + // Write config inline + writer.write_inline(&config_json); + + repo.write_stream(writer, &content_id, reference)? + }; + + Ok((config_digest, config_verity)) +} + +/// Import a single layer from containers-storage using the writer pattern. +/// +/// This function reads tar-split metadata and: +/// - For large files: reflinks the file content to the objects directory +/// - For small files: embeds content inline in the splitstream +/// - Writes tar headers and padding as inline data +fn import_layer_with_writer( + repo: &Arc>, + storage: &Storage, + layer: &Layer, + diff_id: &str, +) -> Result { + let mut stream = TarSplitFdStream::new(storage, layer) + .with_context(|| format!("Failed to create tar-split stream for layer {}", layer.id()))?; + + let mut writer = repo.create_stream(TAR_LAYER_CONTENT_TYPE); + let content_id = layer_identifier(diff_id); + + while let Some(item) = stream.next()? { + match item { + TarSplitItem::Segment(bytes) => { + // Write raw segment bytes (tar headers, padding) as inline data + writer.write_inline(&bytes); + } + TarSplitItem::FileContent { fd, size, name } => { + // Convert fd to File for operations + let file = std::fs::File::from(fd); + + if size as usize > INLINE_CONTENT_MAX { + // Large file: use reflink to store as external object + let object_id = repo + .ensure_object_from_file(&file, size) + .with_context(|| format!("Failed to store object for {}", name))?; + + writer.add_external_size(size); + writer.write_reference(object_id)?; + } else { + // Small file: read and embed inline + let mut content = vec![0u8; size as usize]; + let mut file = file; + file.seek(SeekFrom::Start(0))?; + file.read_exact(&mut content)?; + writer.write_inline(&content); + } + } + } + } + + // Write the stream with the content identifier + repo.write_stream(writer, &content_id, None) +} + +/// Check if an image reference uses the containers-storage transport. +/// +/// Returns the image ID portion if the reference starts with "containers-storage:", +/// otherwise returns None. +pub fn parse_containers_storage_ref(imgref: &str) -> Option<&str> { + imgref.strip_prefix("containers-storage:") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_containers_storage_ref() { + assert_eq!( + parse_containers_storage_ref("containers-storage:sha256:abc123"), + Some("sha256:abc123") + ); + assert_eq!( + parse_containers_storage_ref("containers-storage:quay.io/fedora:latest"), + Some("quay.io/fedora:latest") + ); + assert_eq!( + parse_containers_storage_ref("docker://quay.io/fedora:latest"), + None + ); + assert_eq!(parse_containers_storage_ref("sha256:abc123"), None); + } +} diff --git a/crates/composefs-oci/src/lib.rs b/crates/composefs-oci/src/lib.rs index d0992211..d7a5f4c7 100644 --- a/crates/composefs-oci/src/lib.rs +++ b/crates/composefs-oci/src/lib.rs @@ -9,7 +9,10 @@ //! - Converting OCI image layers from tar format to composefs split streams //! - Creating mountable filesystems from OCI image configurations //! - Sealing containers with fs-verity hashes for integrity verification +//! - Importing from containers-storage with zero-copy reflinks (optional feature) +#[cfg(feature = "containers-storage")] +pub mod cstor; pub mod image; pub mod skopeo; pub mod tar; @@ -26,13 +29,14 @@ use composefs::{fsverity::FsVerityHashValue, repository::Repository}; use crate::skopeo::{OCI_CONFIG_CONTENT_TYPE, TAR_LAYER_CONTENT_TYPE}; use crate::tar::get_entry; -type ContentAndVerity = (String, ObjectID); +/// A tuple of (content digest, fs-verity ObjectID). +pub type ContentAndVerity = (String, ObjectID); -fn layer_identifier(diff_id: &str) -> String { +pub(crate) fn layer_identifier(diff_id: &str) -> String { format!("oci-layer-{diff_id}") } -fn config_identifier(config: &str) -> String { +pub(crate) fn config_identifier(config: &str) -> String { format!("oci-config-{config}") } From 46bd4d917f0dbe02a9e872702361f1d41dc9de57 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 29 Jan 2026 10:51:01 -0500 Subject: [PATCH 09/13] cfsctl: Add containers-storage transport support Update cfsctl to detect and handle containers-storage: image references when the containers-storage feature is enabled. When an image reference starts with 'containers-storage:', cfsctl routes to the native cstor import function instead of using skopeo, enabling zero-copy layer import via reflinks. Also improve image name matching in cstorage to support short names like 'busybox' that resolve to 'docker.io/library/busybox:latest'. The containers-storage feature is now enabled by default in cfsctl. Assisted-by: OpenCode (Opus 4.5) Signed-off-by: Colin Walters --- crates/cfsctl/Cargo.toml | 3 ++- crates/cfsctl/src/main.rs | 21 +++++++++++++++++++-- crates/cstorage/src/storage.rs | 21 +++++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/crates/cfsctl/Cargo.toml b/crates/cfsctl/Cargo.toml index 4901f490..fded0843 100644 --- a/crates/cfsctl/Cargo.toml +++ b/crates/cfsctl/Cargo.toml @@ -11,9 +11,10 @@ rust-version.workspace = true version.workspace = true [features] -default = ['pre-6.15', 'oci'] +default = ['pre-6.15', 'oci', 'containers-storage'] http = ['composefs-http'] oci = ['composefs-oci'] +containers-storage = ['composefs-oci/containers-storage'] rhel9 = ['composefs/rhel9'] 'pre-6.15' = ['composefs/pre-6.15'] diff --git a/crates/cfsctl/src/main.rs b/crates/cfsctl/src/main.rs index 6c81c0ea..d4833bc5 100644 --- a/crates/cfsctl/src/main.rs +++ b/crates/cfsctl/src/main.rs @@ -298,8 +298,25 @@ where println!("{}", image_id.to_id()); } OciCommand::Pull { ref image, name } => { - let (digest, verity) = - composefs_oci::pull(&Arc::new(repo), image, name.as_deref(), None).await?; + let repo = Arc::new(repo); + let (digest, verity) = { + #[cfg(feature = "containers-storage")] + if let Some(image_id) = + composefs_oci::cstor::parse_containers_storage_ref(image) + { + composefs_oci::cstor::import_from_containers_storage( + &repo, + image_id, + name.as_deref(), + ) + .await? + } else { + composefs_oci::pull(&repo, image, name.as_deref(), None).await? + } + + #[cfg(not(feature = "containers-storage"))] + composefs_oci::pull(&repo, image, name.as_deref(), None).await? + }; println!("config {digest}"); println!("verity {}", verity.to_hex()); diff --git a/crates/cstorage/src/storage.rs b/crates/cstorage/src/storage.rs index b339b4e6..6e805a3e 100644 --- a/crates/cstorage/src/storage.rs +++ b/crates/cstorage/src/storage.rs @@ -482,6 +482,27 @@ impl Storage { } } + // Try matching short name without tag (e.g., "busybox" matches "docker.io/library/busybox:latest") + // This handles the common case of just specifying the image name + let name_with_tag = if name.contains(':') { + name.to_string() + } else { + format!("{}:latest", name) + }; + + for entry in &entries { + if let Some(names) = &entry.names { + for image_name in names { + // Check if image_name ends with /name:tag pattern + if let Some(prefix) = image_name.strip_suffix(&name_with_tag) { + if prefix.is_empty() || prefix.ends_with('/') { + return self.get_image(&entry.id); + } + } + } + } + } + Err(StorageError::ImageNotFound(name.to_string())) } From 17e29981fff3bd44c523c3fc998894b2064be11b Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 29 Jan 2026 11:09:25 -0500 Subject: [PATCH 10/13] cstor: Add import statistics and route pull through library Add ImportStats struct to track containers-storage import operations: - Count of layers (total and already present) - Count of objects (reflinked, copied, already present) - Bytes (reflinked, copied, inlined) The repository now provides ensure_object_from_file_with_stats() and ObjectStoreMethod enum to report how each object was stored. The pull() function in composefs-oci now automatically routes containers-storage: references to the native cstor import path, returning a PullResult struct that includes optional ImportStats. This keeps the routing logic in the library rather than CLI. cfsctl prints statistics after a containers-storage pull showing whether reflinks were used for zero-copy import. Example output: Import statistics: layers: 1 (0 already present) objects: 16 total (16 reflinked, 0 copied, 0 already present) reflinked: 4.22 MiB (zero-copy) inlined: 228.90 KiB Assisted-by: OpenCode (Opus 4.5) Signed-off-by: Colin Walters --- crates/cfsctl/Cargo.toml | 1 + crates/cfsctl/src/main.rs | 49 +++++++----- crates/composefs-oci/src/cstor.rs | 106 +++++++++++++++++++++++--- crates/composefs-oci/src/lib.rs | 39 +++++++++- crates/composefs/src/repository.rs | 74 +++++++++++++++++-- crates/cstorage/Cargo.toml | 2 +- crates/cstorage/src/tar_split.rs | 115 +++++++++++------------------ 7 files changed, 276 insertions(+), 110 deletions(-) diff --git a/crates/cfsctl/Cargo.toml b/crates/cfsctl/Cargo.toml index fded0843..49cae8ca 100644 --- a/crates/cfsctl/Cargo.toml +++ b/crates/cfsctl/Cargo.toml @@ -27,6 +27,7 @@ composefs-oci = { workspace = true, optional = true } composefs-http = { workspace = true, optional = true } env_logger = { version = "0.11.0", default-features = false } hex = { version = "0.4.0", default-features = false } +indicatif = { version = "0.17.0", default-features = false } rustix = { version = "1.0.0", default-features = false, features = ["fs", "process"] } tokio = { version = "1.24.2", default-features = false } diff --git a/crates/cfsctl/src/main.rs b/crates/cfsctl/src/main.rs index d4833bc5..e0c79bd2 100644 --- a/crates/cfsctl/src/main.rs +++ b/crates/cfsctl/src/main.rs @@ -299,27 +299,38 @@ where } OciCommand::Pull { ref image, name } => { let repo = Arc::new(repo); - let (digest, verity) = { - #[cfg(feature = "containers-storage")] - if let Some(image_id) = - composefs_oci::cstor::parse_containers_storage_ref(image) - { - composefs_oci::cstor::import_from_containers_storage( - &repo, - image_id, - name.as_deref(), - ) - .await? - } else { - composefs_oci::pull(&repo, image, name.as_deref(), None).await? - } + let result = composefs_oci::pull(&repo, image, name.as_deref(), None).await?; - #[cfg(not(feature = "containers-storage"))] - composefs_oci::pull(&repo, image, name.as_deref(), None).await? - }; + println!("config {}", result.config_digest); + println!("verity {}", result.config_verity.to_hex()); - println!("config {digest}"); - println!("verity {}", verity.to_hex()); + // Print import statistics if available (containers-storage imports) + #[cfg(feature = "containers-storage")] + if let Some(stats) = result.stats { + println!(); + println!("Import statistics:"); + println!( + " layers: {} ({} already present)", + stats.layers, stats.layers_already_present + ); + println!( + " objects: {} total ({} reflinked, {} copied, {} already present)", + stats.total_objects(), + stats.objects_reflinked, + stats.objects_copied, + stats.objects_already_present + ); + if stats.used_reflinks() { + println!( + " reflinked: {} (zero-copy)", + indicatif::HumanBytes(stats.bytes_reflinked) + ); + } + if stats.bytes_copied > 0 { + println!(" copied: {}", indicatif::HumanBytes(stats.bytes_copied)); + } + println!(" inlined: {}", indicatif::HumanBytes(stats.bytes_inlined)); + } } OciCommand::Seal { ref config_name, diff --git a/crates/composefs-oci/src/cstor.rs b/crates/composefs-oci/src/cstor.rs index 0d8ebc25..7ee0ff6b 100644 --- a/crates/composefs-oci/src/cstor.rs +++ b/crates/composefs-oci/src/cstor.rs @@ -25,8 +25,9 @@ //! use composefs_oci::cstor::import_from_containers_storage; //! //! let repo = Arc::new(Repository::open_user()?); -//! let result = import_from_containers_storage(&repo, "sha256:abc123...", None).await?; +//! let (result, stats) = import_from_containers_storage(&repo, "sha256:abc123...", None).await?; //! println!("Imported config: {}", result.0); +//! println!("Stats: {:?}", stats); //! ``` use std::io::{Read, Seek, SeekFrom}; @@ -38,13 +39,67 @@ use indicatif::{ProgressBar, ProgressStyle}; use sha2::Digest; use tokio::task::spawn_blocking; -use composefs::{fsverity::FsVerityHashValue, repository::Repository, INLINE_CONTENT_MAX}; +use composefs::{ + fsverity::FsVerityHashValue, + repository::{ObjectStoreMethod, Repository}, + INLINE_CONTENT_MAX, +}; use cstorage::{Image, Layer, Storage, TarSplitFdStream, TarSplitItem}; use crate::skopeo::{OCI_CONFIG_CONTENT_TYPE, TAR_LAYER_CONTENT_TYPE}; use crate::{config_identifier, layer_identifier, ContentAndVerity}; +/// Statistics from a containers-storage import operation. +#[derive(Debug, Clone, Default)] +pub struct ImportStats { + /// Number of layers in the image. + pub layers: u64, + /// Number of layers that were already present (skipped). + pub layers_already_present: u64, + /// Number of objects stored via reflink (zero-copy). + pub objects_reflinked: u64, + /// Number of objects stored via regular copy (reflink not supported). + pub objects_copied: u64, + /// Number of objects that were already present (deduplicated). + pub objects_already_present: u64, + /// Total bytes stored via reflink. + pub bytes_reflinked: u64, + /// Total bytes stored via regular copy. + pub bytes_copied: u64, + /// Total bytes inlined in splitstreams (small files + headers). + pub bytes_inlined: u64, +} + +impl ImportStats { + /// Merge stats from another ImportStats into this one. + pub fn merge(&mut self, other: &ImportStats) { + self.layers += other.layers; + self.layers_already_present += other.layers_already_present; + self.objects_reflinked += other.objects_reflinked; + self.objects_copied += other.objects_copied; + self.objects_already_present += other.objects_already_present; + self.bytes_reflinked += other.bytes_reflinked; + self.bytes_copied += other.bytes_copied; + self.bytes_inlined += other.bytes_inlined; + } + + /// Returns true if any objects were stored via reflink. + pub fn used_reflinks(&self) -> bool { + self.objects_reflinked > 0 + } + + /// Total number of objects processed. + pub fn total_objects(&self) -> u64 { + self.objects_reflinked + self.objects_copied + self.objects_already_present + } + + /// Total bytes processed (external objects only, not inline). + pub fn total_external_bytes(&self) -> u64 { + self.bytes_reflinked + self.bytes_copied + } +} + /// Import a container image from containers-storage into the composefs repository. /// /// This function reads an image from the local containers-storage (podman/buildah) @@ -56,12 +111,12 @@ use crate::{config_identifier, layer_identifier, ContentAndVerity}; /// * `reference` - Optional reference name to assign to the imported config /// /// # Returns -/// A tuple of (config_digest, config_verity_id) for the imported image. +/// A tuple of ((config_digest, config_verity_id), import_stats). pub async fn import_from_containers_storage( repo: &Arc>, image_id: &str, reference: Option<&str>, -) -> Result> { +) -> Result<(ContentAndVerity, ImportStats)> { let repo = Arc::clone(repo); let image_id = image_id.to_owned(); let reference = reference.map(|s| s.to_owned()); @@ -81,7 +136,9 @@ fn import_from_containers_storage_blocking( repo: &Arc>, image_id: &str, reference: Option<&str>, -) -> Result> { +) -> Result<(ContentAndVerity, ImportStats)> { + let mut stats = ImportStats::default(); + // Open containers-storage let storage = Storage::discover().context("Failed to discover containers-storage")?; @@ -112,6 +169,8 @@ fn import_from_containers_storage_blocking( diff_ids.len() ); + stats.layers = storage_layer_ids.len() as u64; + // Import each layer with progress bar let progress = ProgressBar::new(storage_layer_ids.len() as u64); progress.set_style( @@ -128,12 +187,15 @@ fn import_from_containers_storage_blocking( let layer_verity = if let Some(existing) = repo.has_stream(&content_id)? { progress.set_message(format!("Already have {short_id}...")); + stats.layers_already_present += 1; existing } else { progress.set_message(format!("Importing {short_id}...")); let layer = Layer::open(&storage, storage_layer_id) .with_context(|| format!("Failed to open layer {}", storage_layer_id))?; - import_layer_with_writer(repo, &storage, &layer, diff_id)? + let (verity, layer_stats) = import_layer_with_writer(repo, &storage, &layer, diff_id)?; + stats.merge(&layer_stats); + verity }; layer_refs.push((diff_id.clone(), layer_verity)); @@ -165,11 +227,12 @@ fn import_from_containers_storage_blocking( // Write config inline writer.write_inline(&config_json); + stats.bytes_inlined += config_json.len() as u64; repo.write_stream(writer, &content_id, reference)? }; - Ok((config_digest, config_verity)) + Ok(((config_digest, config_verity), stats)) } /// Import a single layer from containers-storage using the writer pattern. @@ -178,12 +241,16 @@ fn import_from_containers_storage_blocking( /// - For large files: reflinks the file content to the objects directory /// - For small files: embeds content inline in the splitstream /// - Writes tar headers and padding as inline data +/// +/// Returns the layer's verity ID and import statistics for this layer. fn import_layer_with_writer( repo: &Arc>, storage: &Storage, layer: &Layer, diff_id: &str, -) -> Result { +) -> Result<(ObjectID, ImportStats)> { + let mut stats = ImportStats::default(); + let mut stream = TarSplitFdStream::new(storage, layer) .with_context(|| format!("Failed to create tar-split stream for layer {}", layer.id()))?; @@ -194,6 +261,7 @@ fn import_layer_with_writer( match item { TarSplitItem::Segment(bytes) => { // Write raw segment bytes (tar headers, padding) as inline data + stats.bytes_inlined += bytes.len() as u64; writer.write_inline(&bytes); } TarSplitItem::FileContent { fd, size, name } => { @@ -202,10 +270,24 @@ fn import_layer_with_writer( if size as usize > INLINE_CONTENT_MAX { // Large file: use reflink to store as external object - let object_id = repo - .ensure_object_from_file(&file, size) + let (object_id, method) = repo + .ensure_object_from_file_with_stats(&file, size) .with_context(|| format!("Failed to store object for {}", name))?; + match method { + ObjectStoreMethod::Reflinked => { + stats.objects_reflinked += 1; + stats.bytes_reflinked += size; + } + ObjectStoreMethod::Copied => { + stats.objects_copied += 1; + stats.bytes_copied += size; + } + ObjectStoreMethod::AlreadyPresent => { + stats.objects_already_present += 1; + } + } + writer.add_external_size(size); writer.write_reference(object_id)?; } else { @@ -214,6 +296,7 @@ fn import_layer_with_writer( let mut file = file; file.seek(SeekFrom::Start(0))?; file.read_exact(&mut content)?; + stats.bytes_inlined += size; writer.write_inline(&content); } } @@ -221,7 +304,8 @@ fn import_layer_with_writer( } // Write the stream with the content identifier - repo.write_stream(writer, &content_id, None) + let verity = repo.write_stream(writer, &content_id, None)?; + Ok((verity, stats)) } /// Check if an image reference uses the containers-storage transport. diff --git a/crates/composefs-oci/src/lib.rs b/crates/composefs-oci/src/lib.rs index d7a5f4c7..36ce6fc5 100644 --- a/crates/composefs-oci/src/lib.rs +++ b/crates/composefs-oci/src/lib.rs @@ -81,15 +81,50 @@ pub fn ls_layer( Ok(()) } +/// Result of a pull operation. +#[derive(Debug)] +pub struct PullResult { + /// The config digest (sha256:...). + pub config_digest: String, + /// The fs-verity hash of the config splitstream. + pub config_verity: ObjectID, + /// Import statistics (only present for containers-storage imports). + #[cfg(feature = "containers-storage")] + pub stats: Option, +} + /// Pull the target image, and add the provided tag. If this is a mountable /// image (i.e. not an artifact), it is *not* unpacked by default. +/// +/// When the `containers-storage` feature is enabled and the image reference +/// starts with `containers-storage:`, this uses the native cstor import path +/// which supports zero-copy reflinks. Otherwise, it uses skopeo. pub async fn pull( repo: &Arc>, imgref: &str, reference: Option<&str>, img_proxy_config: Option, -) -> Result<(String, ObjectID)> { - skopeo::pull(repo, imgref, reference, img_proxy_config).await +) -> Result> { + #[cfg(feature = "containers-storage")] + if let Some(image_id) = cstor::parse_containers_storage_ref(imgref) { + let ((config_digest, config_verity), stats) = + cstor::import_from_containers_storage(repo, image_id, reference).await?; + return Ok(PullResult { + config_digest, + config_verity, + stats: Some(stats), + }); + } + + let (config_digest, config_verity) = + skopeo::pull(repo, imgref, reference, img_proxy_config).await?; + + Ok(PullResult { + config_digest, + config_verity, + #[cfg(feature = "containers-storage")] + stats: None, + }) } fn hash(bytes: &[u8]) -> String { diff --git a/crates/composefs/src/repository.rs b/crates/composefs/src/repository.rs index a5fb36bd..460922db 100644 --- a/crates/composefs/src/repository.rs +++ b/crates/composefs/src/repository.rs @@ -115,6 +115,21 @@ use crate::{ util::{proc_self_fd, replace_symlinkat, ErrnoFilter}, }; +/// How an object was stored in the repository. +/// +/// Returned by [`Repository::ensure_object_from_file_with_stats`] to indicate +/// whether the operation used zero-copy reflinks, a regular copy, or found +/// an existing object. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ObjectStoreMethod { + /// Object was stored via reflink (zero-copy, FICLONE ioctl). + Reflinked, + /// Object was stored via regular file copy (reflink not supported). + Copied, + /// Object already existed in the repository (deduplicated). + AlreadyPresent, +} + /// Call openat() on the named subdirectory of "dirfd", possibly creating it first. /// /// We assume that the directory will probably exist (ie: we try the open first), and on ENOENT, we @@ -314,6 +329,28 @@ impl Repository { /// # Returns /// The fs-verity ObjectID of the stored object. pub fn ensure_object_from_file(&self, src: &std::fs::File, size: u64) -> Result { + let (object_id, _) = self.ensure_object_from_file_with_stats(src, size)?; + Ok(object_id) + } + + /// Ensure an object exists by reflinking from a source file, with statistics. + /// + /// Like [`ensure_object_from_file`], but also returns how the object was stored: + /// - `ObjectStoreMethod::Reflinked` - Zero-copy via FICLONE + /// - `ObjectStoreMethod::Copied` - Regular file copy (reflink not supported) + /// - `ObjectStoreMethod::AlreadyPresent` - Object already existed (deduplicated) + /// + /// # Arguments + /// * `src` - An open file descriptor to read from + /// * `size` - The size of the source file in bytes + /// + /// # Returns + /// A tuple of (ObjectID, ObjectStoreMethod). + pub fn ensure_object_from_file_with_stats( + &self, + src: &std::fs::File, + size: u64, + ) -> Result<(ObjectID, ObjectStoreMethod)> { use rustix::fs::{fstat, ioctl_ficlone}; // Create tmpfile in objects directory @@ -327,7 +364,7 @@ impl Repository { // Try reflink first let mut tmpfile = File::from(tmpfile_fd); - match ioctl_ficlone(&tmpfile, src) { + let used_reflink = match ioctl_ficlone(&tmpfile, src) { Ok(()) => { // Reflink succeeded - verify size matches let stat = fstat(&tmpfile)?; @@ -337,6 +374,7 @@ impl Repository { size, stat.st_size ); + true } Err(Errno::OPNOTSUPP | Errno::XDEV) => { // Reflink not supported or cross-device, fall back to copy @@ -344,15 +382,26 @@ impl Repository { let mut src_clone = src.try_clone()?; src_clone.seek(SeekFrom::Start(0))?; std::io::copy(&mut src_clone, &mut tmpfile)?; + false } Err(e) => { // Other errors (EACCES, ENOSPC, etc.) should be propagated return Err(e).context("Reflinking source file to objects directory")?; } - } + }; // Finalize the tmpfile (enable verity, link into objects/) - self.finalize_object_tmpfile(tmpfile, size) + let (object_id, was_new) = self.finalize_object_tmpfile_with_stats(tmpfile, size)?; + + let method = if !was_new { + ObjectStoreMethod::AlreadyPresent + } else if used_reflink { + ObjectStoreMethod::Reflinked + } else { + ObjectStoreMethod::Copied + }; + + Ok((object_id, method)) } /// Finalize a tmpfile as an object. @@ -370,6 +419,19 @@ impl Repository { /// By letting the kernel compute the digest during verity enable, we avoid /// reading the file an extra time in userspace. pub fn finalize_object_tmpfile(&self, file: File, size: u64) -> Result { + let (id, _was_new) = self.finalize_object_tmpfile_with_stats(file, size)?; + Ok(id) + } + + /// Finalize a tmpfile as an object, returning whether it was newly created. + /// + /// Like [`finalize_object_tmpfile`], but also returns a boolean indicating + /// whether the object was newly stored (`true`) or already existed (`false`). + pub fn finalize_object_tmpfile_with_stats( + &self, + file: File, + size: u64, + ) -> Result<(ObjectID, bool)> { // Re-open as read-only via /proc/self/fd (required for verity enable) let fd_path = proc_self_fd(&file); let ro_fd = open(&*fd_path, OFlags::RDONLY | OFlags::CLOEXEC, Mode::empty())?; @@ -407,7 +469,7 @@ impl Repository { match statat(objects_dir, &path, AtFlags::empty()) { Ok(stat) if stat.st_size as u64 == size => { // Object already exists with correct size, skip storage - return Ok(id); + return Ok((id, false)); } _ => {} } @@ -424,8 +486,8 @@ impl Repository { &path, AtFlags::SYMLINK_FOLLOW, ) { - Ok(()) => Ok(id), - Err(Errno::EXIST) => Ok(id), // Race: another task created it + Ok(()) => Ok((id, true)), + Err(Errno::EXIST) => Ok((id, false)), // Race: another task created it Err(e) => Err(e).context("Linking tmpfile into objects directory")?, } } diff --git a/crates/cstorage/Cargo.toml b/crates/cstorage/Cargo.toml index fbfdf53d..484f39eb 100644 --- a/crates/cstorage/Cargo.toml +++ b/crates/cstorage/Cargo.toml @@ -23,7 +23,7 @@ rustix = { version = "1.0", default-features = false, features = ["fs", "std"] } serde = { version = "1.0", default-features = false, features = ["derive"] } serde_json = { version = "1.0", default-features = false, features = ["std"] } sha2 = { version = "0.10", default-features = false, features = ["std"] } -tar = { version = "0.4", default-features = false } +tar-header = { path = "../tar-header" } thiserror = { version = "2.0", default-features = false } toml = { version = "0.8", default-features = false, features = ["parse"] } zstd = { version = "0.13", default-features = false } diff --git a/crates/cstorage/src/tar_split.rs b/crates/cstorage/src/tar_split.rs index bef8fc1f..7a708c07 100644 --- a/crates/cstorage/src/tar_split.rs +++ b/crates/cstorage/src/tar_split.rs @@ -166,79 +166,52 @@ impl TarHeader { /// # Errors /// /// Returns an error if the header is too short or has an invalid checksum. - pub fn from_bytes(header: &[u8]) -> Result { - if header.len() < 512 { - return Err(StorageError::TarSplitError(format!( - "TAR header too short: {} bytes", - header.len() - ))); - } - - // Verify checksum first - let stored_checksum = { - let checksum_bytes = &header[148..156]; - let null_pos = checksum_bytes - .iter() - .position(|&b| b == 0 || b == b' ') - .unwrap_or(checksum_bytes.len()); - let s = std::str::from_utf8(&checksum_bytes[..null_pos]) - .map_err(|_| StorageError::TarSplitError("Invalid checksum field".to_string()))? - .trim(); - if s.is_empty() { - return Err(StorageError::TarSplitError( - "Empty checksum field".to_string(), - )); - } - u32::from_str_radix(s, 8).map_err(|e| { - StorageError::TarSplitError(format!("Invalid checksum '{}': {}", s, e)) - })? - }; - - let computed_checksum: u32 = header[..148] - .iter() - .chain(std::iter::repeat_n(&b' ', 8)) // checksum field treated as spaces - .chain(header[156..512].iter()) - .map(|&b| b as u32) - .sum(); - - if stored_checksum != computed_checksum { - return Err(StorageError::TarSplitError(format!( - "Checksum mismatch: stored {} != computed {}", - stored_checksum, computed_checksum - ))); - } - - // Extract null-terminated string from byte range - let extract_string = |start: usize, end: usize| -> String { - let bytes = &header[start..end]; - let null_pos = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len()); - String::from_utf8_lossy(&bytes[..null_pos]).to_string() - }; + pub fn from_bytes(header_bytes: &[u8]) -> Result { + let header = tar_header::Header::from_bytes(header_bytes).map_err(|e| { + StorageError::TarSplitError(format!("Failed to parse TAR header: {}", e)) + })?; - // Parse octal field from byte range - let parse_octal = |start: usize, end: usize| -> Result { - let s = extract_string(start, end); - let trimmed = s.trim(); - if trimmed.is_empty() { - return Ok(0); - } - u64::from_str_radix(trimmed, 8).map_err(|e| { - StorageError::TarSplitError(format!("Failed to parse octal '{}': {}", trimmed, e)) - }) - }; + header.verify_checksum().map_err(|e| { + StorageError::TarSplitError(format!("TAR header checksum error: {}", e)) + })?; - let name = extract_string(0, 100); - let mode = parse_octal(100, 108)? as u32; - let uid = parse_octal(108, 116)? as u32; - let gid = parse_octal(116, 124)? as u32; - let size = parse_octal(124, 136)?; - let mtime = parse_octal(136, 148)? as i64; - let typeflag = header[156]; - let linkname = extract_string(157, 257); - let uname = extract_string(265, 297); - let gname = extract_string(297, 329); - let devmajor = parse_octal(329, 337)? as u32; - let devminor = parse_octal(337, 345)? as u32; + let name = String::from_utf8_lossy(header.path_bytes()).to_string(); + let mode = header + .mode() + .map_err(|e| StorageError::TarSplitError(format!("Invalid mode: {}", e)))?; + let uid = header + .uid() + .map_err(|e| StorageError::TarSplitError(format!("Invalid uid: {}", e)))? + as u32; + let gid = header + .gid() + .map_err(|e| StorageError::TarSplitError(format!("Invalid gid: {}", e)))? + as u32; + let size = header + .entry_size() + .map_err(|e| StorageError::TarSplitError(format!("Invalid size: {}", e)))?; + let mtime = header + .mtime() + .map_err(|e| StorageError::TarSplitError(format!("Invalid mtime: {}", e)))? + as i64; + let typeflag = header.entry_type().to_byte(); + let linkname = String::from_utf8_lossy(header.link_name_bytes()).to_string(); + let uname = header + .username() + .map(|b| String::from_utf8_lossy(b).to_string()) + .unwrap_or_default(); + let gname = header + .groupname() + .map(|b| String::from_utf8_lossy(b).to_string()) + .unwrap_or_default(); + let devmajor = header + .device_major() + .map_err(|e| StorageError::TarSplitError(format!("Invalid devmajor: {}", e)))? + .unwrap_or(0); + let devminor = header + .device_minor() + .map_err(|e| StorageError::TarSplitError(format!("Invalid devminor: {}", e)))? + .unwrap_or(0); Ok(TarHeader { name, From 8609c2ac0704bacd226e095391c0573cabbcb454 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 29 Jan 2026 12:24:39 -0500 Subject: [PATCH 11/13] cstorage: Add userns helper for rootless containers-storage access When running as an unprivileged user, files in containers-storage may have restrictive permissions (e.g., /etc/shadow with mode 0600 owned by remapped UIDs). This commit adds a user namespace helper that enables reading these files by spawning a helper process via `podman unshare`. The helper runs as UID 0 inside the user namespace and can read any file. It communicates with the parent process via Unix socket using JSON-RPC 2.0 with SCM_RIGHTS file descriptor passing for zero-copy streaming. Key components: - userns.rs: can_bypass_file_permissions() to detect if helper is needed - userns_helper.rs: Helper process with JSON-RPC protocol, StorageProxy client, and ProxiedLayerStream for streaming layer content The cstor.rs import code now automatically uses the proxy when running as an unprivileged user, falling back to direct access when running as root or with CAP_DAC_OVERRIDE. Rather than manually setting up user namespaces (parsing /etc/subuid, calling newuidmap/newgidmap, etc.), we delegate all that complexity to `podman unshare` which already handles all the edge cases. Ported from cgwalters/cstor-rs. Assisted-by: OpenCode (Opus 4.5) Signed-off-by: Colin Walters --- Cargo.toml | 3 + crates/cfsctl/Cargo.toml | 3 +- crates/cfsctl/src/main.rs | 16 +- crates/composefs-oci/Cargo.toml | 2 +- crates/composefs-oci/src/cstor.rs | 352 +++++++-- crates/cstorage/Cargo.toml | 9 +- crates/cstorage/src/lib.rs | 11 + crates/cstorage/src/userns.rs | 67 ++ crates/cstorage/src/userns_helper.rs | 1086 ++++++++++++++++++++++++++ 9 files changed, 1484 insertions(+), 65 deletions(-) create mode 100644 crates/cstorage/src/userns.rs create mode 100644 crates/cstorage/src/userns_helper.rs diff --git a/Cargo.toml b/Cargo.toml index 8da538f8..08ddf08a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,9 @@ composefs-oci = { version = "0.3.0", path = "crates/composefs-oci", default-feat composefs-boot = { version = "0.3.0", path = "crates/composefs-boot", default-features = false } composefs-http = { version = "0.3.0", path = "crates/composefs-http", default-features = false } +# JSON-RPC with FD passing for userns helper +jsonrpc-fdpass = { git = "https://github.com/cgwalters/jsonrpc-fdpass", rev = "b30fa1d" } + [profile.dev.package.sha2] # this is *really* slow otherwise opt-level = 3 diff --git a/crates/cfsctl/Cargo.toml b/crates/cfsctl/Cargo.toml index 49cae8ca..082a4b8b 100644 --- a/crates/cfsctl/Cargo.toml +++ b/crates/cfsctl/Cargo.toml @@ -14,7 +14,7 @@ version.workspace = true default = ['pre-6.15', 'oci', 'containers-storage'] http = ['composefs-http'] oci = ['composefs-oci'] -containers-storage = ['composefs-oci/containers-storage'] +containers-storage = ['composefs-oci/containers-storage', 'cstorage'] rhel9 = ['composefs/rhel9'] 'pre-6.15' = ['composefs/pre-6.15'] @@ -25,6 +25,7 @@ composefs = { workspace = true } composefs-boot = { workspace = true } composefs-oci = { workspace = true, optional = true } composefs-http = { workspace = true, optional = true } +cstorage = { path = "../cstorage", features = ["userns-helper"], optional = true } env_logger = { version = "0.11.0", default-features = false } hex = { version = "0.4.0", default-features = false } indicatif = { version = "0.17.0", default-features = false } diff --git a/crates/cfsctl/src/main.rs b/crates/cfsctl/src/main.rs index e0c79bd2..7e7b718a 100644 --- a/crates/cfsctl/src/main.rs +++ b/crates/cfsctl/src/main.rs @@ -211,8 +211,20 @@ where Ok(repo) } -#[tokio::main] -async fn main() -> Result<()> { +fn main() -> Result<()> { + // If we were spawned as a userns helper process, handle that and exit. + // This MUST be called before the tokio runtime is created. + #[cfg(feature = "containers-storage")] + cstorage::init_if_helper(); + + // Now we can create the tokio runtime for the main application + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()? + .block_on(async_main()) +} + +async fn async_main() -> Result<()> { env_logger::init(); let args = App::parse(); diff --git a/crates/composefs-oci/Cargo.toml b/crates/composefs-oci/Cargo.toml index 4ebb516e..719e2662 100644 --- a/crates/composefs-oci/Cargo.toml +++ b/crates/composefs-oci/Cargo.toml @@ -12,7 +12,7 @@ version.workspace = true [features] default = ["containers-storage"] -containers-storage = ["dep:cstorage"] +containers-storage = ["dep:cstorage", "cstorage?/userns-helper"] [dependencies] anyhow = { version = "1.0.87", default-features = false } diff --git a/crates/composefs-oci/src/cstor.rs b/crates/composefs-oci/src/cstor.rs index 7ee0ff6b..68f6ad6e 100644 --- a/crates/composefs-oci/src/cstor.rs +++ b/crates/composefs-oci/src/cstor.rs @@ -19,6 +19,14 @@ //! 4. For small files, embed inline in the splitstream //! 5. Handle overlay whiteouts properly //! +//! # Rootless Support +//! +//! When running as an unprivileged user, files in containers-storage may have +//! restrictive permissions (e.g., `/etc/shadow` with mode 0600 owned by remapped +//! UIDs). In this case, we spawn a helper process via `podman unshare` that can +//! read all files, and it streams the content back to us via a Unix socket with +//! file descriptor passing. +//! //! # Example //! //! ```ignore @@ -30,14 +38,14 @@ //! println!("Stats: {:?}", stats); //! ``` -use std::io::{Read, Seek, SeekFrom}; +use std::os::unix::fs::FileExt; +use std::os::unix::io::OwnedFd; use std::sync::Arc; use anyhow::{Context, Result}; use base64::Engine; use indicatif::{ProgressBar, ProgressStyle}; use sha2::Digest; -use tokio::task::spawn_blocking; use composefs::{ fsverity::FsVerityHashValue, @@ -45,11 +53,17 @@ use composefs::{ INLINE_CONTENT_MAX, }; -use cstorage::{Image, Layer, Storage, TarSplitFdStream, TarSplitItem}; +use cstorage::{ + can_bypass_file_permissions, Image, Layer, ProxiedTarSplitItem, Storage, StorageProxy, + TarSplitFdStream, TarSplitItem, +}; use crate::skopeo::{OCI_CONFIG_CONTENT_TYPE, TAR_LAYER_CONTENT_TYPE}; use crate::{config_identifier, layer_identifier, ContentAndVerity}; +/// Zero padding buffer for tar block alignment (512 bytes max needed). +const ZERO_PADDING: [u8; 512] = [0u8; 512]; + /// Statistics from a containers-storage import operation. #[derive(Debug, Clone, Default)] pub struct ImportStats { @@ -105,6 +119,9 @@ impl ImportStats { /// This function reads an image from the local containers-storage (podman/buildah) /// and imports all layers using reflinks when possible, avoiding data duplication. /// +/// For rootless access, this function will automatically spawn a userns helper +/// process via `podman unshare` to read files with restrictive permissions. +/// /// # Arguments /// * `repo` - The composefs repository to import into /// * `image_id` - The image ID (sha256 digest or name) to import @@ -117,22 +134,29 @@ pub async fn import_from_containers_storage( image_id: &str, reference: Option<&str>, ) -> Result<(ContentAndVerity, ImportStats)> { - let repo = Arc::clone(repo); - let image_id = image_id.to_owned(); - let reference = reference.map(|s| s.to_owned()); - - spawn_blocking(move || { - import_from_containers_storage_blocking(&repo, &image_id, reference.as_deref()) - }) - .await - .context("spawn_blocking failed")? + // Check if we can access files directly or need a proxy + if can_bypass_file_permissions() { + // Direct access - use blocking implementation + let repo = Arc::clone(repo); + let image_id = image_id.to_owned(); + let reference = reference.map(|s| s.to_owned()); + + tokio::task::spawn_blocking(move || { + import_from_containers_storage_direct(&repo, &image_id, reference.as_deref()) + }) + .await + .context("spawn_blocking failed")? + } else { + // Need proxy for rootless access + import_from_containers_storage_proxied(repo, image_id, reference).await + } } -/// Synchronous implementation of containers-storage import. +/// Direct (privileged) implementation of containers-storage import. /// /// All file I/O operations in this function are blocking, so it must be called /// from a blocking context (e.g., via `spawn_blocking`). -fn import_from_containers_storage_blocking( +fn import_from_containers_storage_direct( repo: &Arc>, image_id: &str, reference: Option<&str>, @@ -183,7 +207,7 @@ fn import_from_containers_storage_blocking( let mut layer_refs = Vec::with_capacity(storage_layer_ids.len()); for (storage_layer_id, diff_id) in storage_layer_ids.iter().zip(diff_ids.iter()) { let content_id = layer_identifier(diff_id); - let short_id = &diff_id[..std::cmp::min(19, diff_id.len())]; + let short_id = diff_id.get(..19).unwrap_or(diff_id); let layer_verity = if let Some(existing) = repo.has_stream(&content_id)? { progress.set_message(format!("Already have {short_id}...")); @@ -193,7 +217,7 @@ fn import_from_containers_storage_blocking( progress.set_message(format!("Importing {short_id}...")); let layer = Layer::open(&storage, storage_layer_id) .with_context(|| format!("Failed to open layer {}", storage_layer_id))?; - let (verity, layer_stats) = import_layer_with_writer(repo, &storage, &layer, diff_id)?; + let (verity, layer_stats) = import_layer_direct(repo, &storage, &layer, diff_id)?; stats.merge(&layer_stats); verity }; @@ -235,15 +259,121 @@ fn import_from_containers_storage_blocking( Ok(((config_digest, config_verity), stats)) } -/// Import a single layer from containers-storage using the writer pattern. -/// -/// This function reads tar-split metadata and: -/// - For large files: reflinks the file content to the objects directory -/// - For small files: embeds content inline in the splitstream -/// - Writes tar headers and padding as inline data +/// Proxied (rootless) implementation of containers-storage import. /// -/// Returns the layer's verity ID and import statistics for this layer. -fn import_layer_with_writer( +/// This spawns a helper process via `podman unshare` that can read all files +/// in containers-storage, and communicates with it via Unix socket + fd passing. +async fn import_from_containers_storage_proxied( + repo: &Arc>, + image_id: &str, + reference: Option<&str>, +) -> Result<(ContentAndVerity, ImportStats)> { + let mut stats = ImportStats::default(); + + // Spawn the proxy helper + let mut proxy = StorageProxy::spawn() + .await + .context("Failed to spawn userns helper")? + .context("Expected proxy but got None")?; + + // Discover storage path for the proxy + let storage_path = discover_storage_path()?; + + // Get image info via the proxy + let image_info = proxy + .get_image(&storage_path, image_id) + .await + .context("Failed to get image info via proxy")?; + + // Ensure layer count matches + anyhow::ensure!( + image_info.storage_layer_ids.len() == image_info.layer_diff_ids.len(), + "Layer count mismatch: {} layers in storage, {} diff_ids in config", + image_info.storage_layer_ids.len(), + image_info.layer_diff_ids.len() + ); + + stats.layers = image_info.storage_layer_ids.len() as u64; + + // Import each layer with progress bar + let progress = ProgressBar::new(image_info.storage_layer_ids.len() as u64); + progress.set_style( + ProgressStyle::default_bar() + .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} {msg}") + .expect("valid template") + .progress_chars("=>-"), + ); + + let mut layer_refs = Vec::with_capacity(image_info.storage_layer_ids.len()); + + for (storage_layer_id, diff_id) in image_info + .storage_layer_ids + .iter() + .zip(image_info.layer_diff_ids.iter()) + { + let content_id = layer_identifier(diff_id); + let short_id = diff_id.get(..19).unwrap_or(diff_id); + + let layer_verity = if let Some(existing) = repo.has_stream(&content_id)? { + progress.set_message(format!("Already have {short_id}...")); + stats.layers_already_present += 1; + existing + } else { + progress.set_message(format!("Importing {short_id}...")); + let (verity, layer_stats) = + import_layer_proxied(repo, &mut proxy, &storage_path, storage_layer_id, diff_id) + .await?; + stats.merge(&layer_stats); + verity + }; + + layer_refs.push((diff_id.clone(), layer_verity)); + progress.inc(1); + } + progress.finish_with_message("Layers imported"); + + // For the config, we need to read it from storage. + // The config is stored as metadata in containers-storage. + // Note: We can read the metadata directly (it doesn't have restrictive permissions). + let direct_storage = Storage::discover().context("Failed to discover containers-storage")?; + let image = Image::open(&direct_storage, &image_info.id) + .with_context(|| format!("Failed to open image {}", image_info.id))?; + + let config_key = format!("sha256:{}", image.id()); + let encoded_key = base64::engine::general_purpose::STANDARD.encode(config_key.as_bytes()); + let config_json = image + .read_metadata(&encoded_key) + .context("Failed to read config bytes")?; + let config_digest = format!("sha256:{}", hex::encode(sha2::Sha256::digest(&config_json))); + let content_id = config_identifier(&config_digest); + + let config_verity = if let Some(existing) = repo.has_stream(&content_id)? { + progress.println(format!("Already have config {}", config_digest)); + existing + } else { + progress.println(format!("Creating config splitstream {}", config_digest)); + let mut writer = repo.create_stream(OCI_CONFIG_CONTENT_TYPE); + + // Add layer references + for (diff_id, verity) in &layer_refs { + writer.add_named_stream_ref(diff_id, verity); + } + + // Write config inline + writer.write_inline(&config_json); + stats.bytes_inlined += config_json.len() as u64; + + repo.write_stream(writer, &content_id, reference)? + }; + + // Shutdown the proxy + proxy.shutdown().await.context("Failed to shutdown proxy")?; + + Ok(((config_digest, config_verity), stats)) +} + +/// Import a single layer directly (privileged mode). +fn import_layer_direct( repo: &Arc>, storage: &Storage, layer: &Layer, @@ -257,48 +387,30 @@ fn import_layer_with_writer( let mut writer = repo.create_stream(TAR_LAYER_CONTENT_TYPE); let content_id = layer_identifier(diff_id); + // Track padding from previous file - tar-split bundles padding with the NEXT + // file's header in Segment entries, but we need to write padding immediately + // after file content (like tar.rs does) for consistent splitstream output. + let mut prev_file_padding: usize = 0; + while let Some(item) = stream.next()? { match item { TarSplitItem::Segment(bytes) => { - // Write raw segment bytes (tar headers, padding) as inline data - stats.bytes_inlined += bytes.len() as u64; - writer.write_inline(&bytes); + // Skip the leading padding bytes (we already wrote them after prev file) + let header_bytes = &bytes[prev_file_padding..]; + stats.bytes_inlined += header_bytes.len() as u64; + writer.write_inline(header_bytes); + prev_file_padding = 0; } TarSplitItem::FileContent { fd, size, name } => { - // Convert fd to File for operations - let file = std::fs::File::from(fd); - - if size as usize > INLINE_CONTENT_MAX { - // Large file: use reflink to store as external object - let (object_id, method) = repo - .ensure_object_from_file_with_stats(&file, size) - .with_context(|| format!("Failed to store object for {}", name))?; - - match method { - ObjectStoreMethod::Reflinked => { - stats.objects_reflinked += 1; - stats.bytes_reflinked += size; - } - ObjectStoreMethod::Copied => { - stats.objects_copied += 1; - stats.bytes_copied += size; - } - ObjectStoreMethod::AlreadyPresent => { - stats.objects_already_present += 1; - } - } - - writer.add_external_size(size); - writer.write_reference(object_id)?; - } else { - // Small file: read and embed inline - let mut content = vec![0u8; size as usize]; - let mut file = file; - file.seek(SeekFrom::Start(0))?; - file.read_exact(&mut content)?; - stats.bytes_inlined += size; - writer.write_inline(&content); + process_file_content(repo, &mut writer, &mut stats, fd, size, &name)?; + + // Write padding inline immediately after file content + let padding_size = (size as usize).next_multiple_of(512) - size as usize; + if padding_size > 0 { + stats.bytes_inlined += padding_size as u64; + writer.write_inline(&ZERO_PADDING[..padding_size]); } + prev_file_padding = padding_size; } } } @@ -308,6 +420,126 @@ fn import_layer_with_writer( Ok((verity, stats)) } +/// Import a single layer via the proxy (rootless mode). +async fn import_layer_proxied( + repo: &Arc>, + proxy: &mut StorageProxy, + storage_path: &str, + layer_id: &str, + diff_id: &str, +) -> Result<(ObjectID, ImportStats)> { + let mut stats = ImportStats::default(); + + let mut writer = repo.create_stream(TAR_LAYER_CONTENT_TYPE); + let content_id = layer_identifier(diff_id); + + // Track padding from previous file - tar-split bundles padding with the NEXT + // file's header in Segment entries, but we need to write padding immediately + // after file content (like tar.rs does) for consistent splitstream output. + let mut prev_file_padding: usize = 0; + + // Stream the layer via the proxy + let mut stream = proxy + .stream_layer(storage_path, layer_id) + .await + .with_context(|| format!("Failed to start streaming layer {}", layer_id))?; + + while let Some(item) = stream + .next() + .await + .with_context(|| format!("Failed to receive stream item for layer {}", layer_id))? + { + match item { + ProxiedTarSplitItem::Segment(bytes) => { + // Skip the leading padding bytes (we already wrote them after prev file) + let header_bytes = &bytes[prev_file_padding..]; + stats.bytes_inlined += header_bytes.len() as u64; + writer.write_inline(header_bytes); + prev_file_padding = 0; + } + ProxiedTarSplitItem::FileContent { fd, size, name } => { + process_file_content(repo, &mut writer, &mut stats, fd, size, &name)?; + + // Write padding inline immediately after file content + let padding_size = (size as usize).next_multiple_of(512) - size as usize; + if padding_size > 0 { + stats.bytes_inlined += padding_size as u64; + writer.write_inline(&ZERO_PADDING[..padding_size]); + } + prev_file_padding = padding_size; + } + } + } + + // Write the stream with the content identifier + let verity = repo.write_stream(writer, &content_id, None)?; + Ok((verity, stats)) +} + +/// Process file content (shared between direct and proxied modes). +fn process_file_content( + repo: &Arc>, + writer: &mut composefs::splitstream::SplitStreamWriter, + stats: &mut ImportStats, + fd: OwnedFd, + size: u64, + name: &str, +) -> Result<()> { + // Convert fd to File for operations + let file = std::fs::File::from(fd); + + if size as usize > INLINE_CONTENT_MAX { + // Large file: use reflink to store as external object + let (object_id, method) = repo + .ensure_object_from_file_with_stats(&file, size) + .with_context(|| format!("Failed to store object for {}", name))?; + + match method { + ObjectStoreMethod::Reflinked => { + stats.objects_reflinked += 1; + stats.bytes_reflinked += size; + } + ObjectStoreMethod::Copied => { + stats.objects_copied += 1; + stats.bytes_copied += size; + } + ObjectStoreMethod::AlreadyPresent => { + stats.objects_already_present += 1; + } + } + + writer.add_external_size(size); + writer.write_reference(object_id)?; + } else { + // Small file: read and embed inline + let mut content = vec![0u8; size as usize]; + file.read_exact_at(&mut content, 0)?; + stats.bytes_inlined += size; + writer.write_inline(&content); + } + + Ok(()) +} + +/// Discover the storage path by trying standard locations. +fn discover_storage_path() -> Result { + // Try user storage first (rootless podman) + if let Ok(home) = std::env::var("HOME") { + let user_path = format!("{}/.local/share/containers/storage", home); + if std::path::Path::new(&user_path).exists() { + return Ok(user_path); + } + } + + // Fall back to system storage + let system_path = "/var/lib/containers/storage"; + if std::path::Path::new(system_path).exists() { + return Ok(system_path.to_string()); + } + + anyhow::bail!("Could not find containers-storage at standard locations") +} + /// Check if an image reference uses the containers-storage transport. /// /// Returns the image ID portion if the reference starts with "containers-storage:", diff --git a/crates/cstorage/Cargo.toml b/crates/cstorage/Cargo.toml index 484f39eb..0710f980 100644 --- a/crates/cstorage/Cargo.toml +++ b/crates/cstorage/Cargo.toml @@ -17,17 +17,24 @@ cap-std = { version = "4.0", default-features = false } cap-std-ext = { version = "4.0", default-features = false } crc = { version = "3.0", default-features = false } flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] } +jsonrpc-fdpass = { workspace = true, optional = true } oci-spec = { version = "0.8", default-features = false, features = ["image"] } rusqlite = { version = "0.34", default-features = false } -rustix = { version = "1.0", default-features = false, features = ["fs", "std"] } +rustix = { version = "1.0", default-features = false, features = ["fs", "std", "process", "thread"] } serde = { version = "1.0", default-features = false, features = ["derive"] } serde_json = { version = "1.0", default-features = false, features = ["std"] } sha2 = { version = "0.10", default-features = false, features = ["std"] } tar-header = { path = "../tar-header" } thiserror = { version = "2.0", default-features = false } +tokio = { version = "1.40", default-features = false, features = ["rt", "net", "sync"], optional = true } toml = { version = "0.8", default-features = false, features = ["parse"] } +tracing = { version = "0.1", default-features = false, optional = true } zstd = { version = "0.13", default-features = false } +[features] +default = [] +userns-helper = ["dep:jsonrpc-fdpass", "dep:tokio", "dep:tracing"] + [dev-dependencies] tempfile = { version = "3.8", default-features = false } diff --git a/crates/cstorage/src/lib.rs b/crates/cstorage/src/lib.rs index a2cc4046..6c0e8c40 100644 --- a/crates/cstorage/src/lib.rs +++ b/crates/cstorage/src/lib.rs @@ -54,6 +54,11 @@ pub mod lockfile; pub mod storage; pub mod tar_split; +// User namespace support for rootless access +pub mod userns; +#[cfg(feature = "userns-helper")] +pub mod userns_helper; + // Re-export commonly used types pub use config::{AdditionalLayerStore, StorageConfig}; pub use error::{Result, StorageError}; @@ -62,6 +67,12 @@ pub use layer::Layer; pub use lockfile::LastWrite; pub use storage::{ImageRLockGuard, LayerMetadata, LayerRLockGuard, Storage}; pub use tar_split::{TarHeader, TarSplitFdStream, TarSplitItem}; +pub use userns::can_bypass_file_permissions; +#[cfg(feature = "userns-helper")] +pub use userns_helper::{ + init_if_helper, GetImageResult, HelperError, ImageInfo, ProxiedLayerStream, + ProxiedTarSplitItem, StorageProxy, +}; // Re-export OCI spec types for convenience pub use oci_spec::image::{Descriptor, ImageConfiguration, ImageManifest}; diff --git a/crates/cstorage/src/userns.rs b/crates/cstorage/src/userns.rs new file mode 100644 index 00000000..720df82b --- /dev/null +++ b/crates/cstorage/src/userns.rs @@ -0,0 +1,67 @@ +//! User namespace utilities for rootless containers-storage access. +//! +//! This module provides utilities for determining when user namespace entry is +//! needed to access overlay storage files that are owned by remapped UIDs/GIDs. +//! +//! # Background +//! +//! When podman runs rootless, it uses user namespaces to remap UIDs. Files in +//! the overlay storage are owned by these remapped UIDs (e.g., UID 100000+N on +//! the host corresponds to UID N inside the container). These files also retain +//! their original permission bits from the container image. +//! +//! Files with restrictive permissions (e.g., `/etc/shadow` with mode 0600) are +//! only readable by their owner - a remapped UID we cannot access as an +//! unprivileged user. +//! +//! # Solution +//! +//! Rather than manually setting up user namespaces (parsing `/etc/subuid`, +//! calling `newuidmap`/`newgidmap`, etc.), we delegate to `podman unshare` +//! which handles all the edge cases. See [`crate::userns_helper`] for the +//! helper process that runs inside the user namespace. + +use rustix::process::getuid; +use rustix::thread::{capabilities, CapabilitySet}; + +/// Check if the current process can read arbitrary files regardless of permissions. +/// +/// This returns `true` if: +/// - The process is running as real root (UID 0), or +/// - The process has `CAP_DAC_OVERRIDE` in its effective capability set +/// +/// When this returns `true`, there's no need to spawn a userns helper for +/// file access - the process can already read any file in the storage. +pub fn can_bypass_file_permissions() -> bool { + // Real root can read anything + if getuid().is_root() { + return true; + } + + // Check for CAP_DAC_OVERRIDE capability + if let Ok(caps) = capabilities(None) { + if caps.effective.contains(CapabilitySet::DAC_OVERRIDE) { + return true; + } + } + + false +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_can_bypass_file_permissions() { + // This function should not panic and should return a consistent result + let result1 = can_bypass_file_permissions(); + let result2 = can_bypass_file_permissions(); + assert_eq!(result1, result2); + + // If we're root, it should return true + if getuid().is_root() { + assert!(result1, "root should be able to bypass permissions"); + } + } +} diff --git a/crates/cstorage/src/userns_helper.rs b/crates/cstorage/src/userns_helper.rs new file mode 100644 index 00000000..86df7656 --- /dev/null +++ b/crates/cstorage/src/userns_helper.rs @@ -0,0 +1,1086 @@ +//! User namespace helper process for privileged storage access. +//! +//! This module provides a mechanism for unprivileged processes to access +//! containers-storage content that has restrictive permissions. It works by +//! spawning a helper process inside a user namespace (via `podman unshare`) +//! that can read any file, and communicating with it via JSON-RPC over a +//! Unix socket with fd-passing. +//! +//! # Why This Is Needed +//! +//! Container images contain files with various permission bits (e.g., `/etc/shadow` +//! with mode 0600). When stored in rootless containers-storage, these files are +//! owned by remapped UIDs that the unprivileged user cannot access. Even though +//! we have tar-split metadata telling us the file structure, we still need to +//! read the actual file content. +//! +//! # Architecture +//! +//! The helper uses stdin (fd 0) for IPC, avoiding the need for unsafe code: +//! +//! ```text +//! ┌─────────────────────────────────────┐ +//! │ Parent Process │ +//! │ (unprivileged, library user) │ +//! │ │ +//! │ StorageProxy::spawn() │ +//! │ │ │ +//! │ ├─► Create socketpair │ +//! │ ├─► Spawn: podman unshare │ +//! │ │ /proc/self/exe │ +//! │ │ (child's stdin=socket) │ +//! │ │ │ +//! │ proxy.stream_layer() ───────────► │ +//! │ │ │ +//! │ ◄─── receives OwnedFd via SCM_RIGHTS│ +//! └─────────────────────────────────────┘ +//! ``` +//! +//! # Usage +//! +//! Library users must call [`init_if_helper`] early in their `main()` function: +//! +//! ```no_run +//! // This must be called before any other cstorage operations. +//! // If this process was spawned as a userns helper, it will +//! // serve requests and exit, never returning. +//! cstorage::userns_helper::init_if_helper(); +//! +//! // Normal application code continues here... +//! ``` + +use std::os::fd::AsFd; +use std::os::unix::io::OwnedFd; +use std::os::unix::net::UnixStream as StdUnixStream; +use std::path::Path; +use std::process::{Child, Command, Stdio}; + +use base64::prelude::*; +use jsonrpc_fdpass::transport::UnixSocketTransport; +use jsonrpc_fdpass::{JsonRpcMessage, JsonRpcRequest, JsonRpcResponse, MessageWithFds}; +use rustix::io::dup; +use rustix::process::{set_parent_process_death_signal, Signal}; +use serde::{Deserialize, Serialize}; +use tokio::net::UnixStream as TokioUnixStream; + +use crate::layer::Layer; +use crate::storage::Storage; +use crate::tar_split::{TarSplitFdStream, TarSplitItem}; +use crate::userns::can_bypass_file_permissions; + +/// Environment variable that indicates this process is a userns helper. +const HELPER_ENV: &str = "__CSTORAGE_USERNS_HELPER"; + +/// JSON-RPC 2.0 error codes. +/// +/// These codes follow the JSON-RPC 2.0 specification: +/// - Standard errors: -32700 to -32600 +/// - Server errors: -32099 to -32000 (implementation-defined) +mod error_codes { + /// Invalid params - the params passed to a method are invalid. + pub const INVALID_PARAMS: i32 = -32602; + + /// Method not found - the requested method does not exist. + pub const METHOD_NOT_FOUND: i32 = -32601; + + /// Resource not found - the requested resource (image, layer, etc.) was not found. + pub const RESOURCE_NOT_FOUND: i32 = -32000; + + /// Internal error - a server-side error occurred (I/O, storage access, etc.). + pub const INTERNAL_ERROR: i32 = -32003; +} + +/// JSON-RPC method names. +mod methods { + /// Open a file and return its fd. + pub const OPEN_FILE: &str = "userns.openFile"; + /// Shutdown the helper process. + pub const SHUTDOWN: &str = "userns.shutdown"; + /// List images in storage. + pub const LIST_IMAGES: &str = "userns.listImages"; + /// Get image metadata. + pub const GET_IMAGE: &str = "userns.getImage"; + /// Stream layer as tar-split entries with fds. + pub const STREAM_LAYER: &str = "userns.streamLayer"; +} + +/// Parameters for the open_file method. +#[derive(Debug, Serialize, Deserialize)] +pub struct OpenFileParams { + /// Path to open. + pub path: String, +} + +/// Result for the open_file method. +#[derive(Debug, Serialize, Deserialize)] +pub struct OpenFileResult { + /// True if successful (fd is passed out-of-band). + pub success: bool, +} + +/// Parameters for list_images method. +#[derive(Debug, Serialize, Deserialize)] +pub struct ListImagesParams { + /// Storage root path. + pub storage_path: String, +} + +/// Image info returned by list_images. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ImageInfo { + /// Image ID. + pub id: String, + /// Image names/tags. + pub names: Vec, +} + +/// Result for list_images method. +#[derive(Debug, Serialize, Deserialize)] +pub struct ListImagesResult { + /// List of images. + pub images: Vec, +} + +/// Parameters for get_image method. +#[derive(Debug, Serialize, Deserialize)] +pub struct GetImageParams { + /// Storage root path. + pub storage_path: String, + /// Image ID or name. + pub image_ref: String, +} + +/// Result for get_image method. +#[derive(Debug, Serialize, Deserialize)] +pub struct GetImageResult { + /// Image ID. + pub id: String, + /// Image names. + pub names: Vec, + /// Layer diff IDs (sha256:...). + pub layer_diff_ids: Vec, + /// Storage layer IDs (internal IDs used by containers-storage). + pub storage_layer_ids: Vec, +} + +/// Parameters for stream_layer method. +#[derive(Debug, Serialize, Deserialize)] +pub struct StreamLayerParams { + /// Storage root path. + pub storage_path: String, + /// Layer ID (storage layer ID, not diff ID). + pub layer_id: String, +} + +/// Streaming notification for a segment. +#[derive(Debug, Serialize, Deserialize)] +pub struct StreamSegmentNotification { + /// Base64-encoded segment data. + pub data: String, +} + +/// Streaming notification for a file (fd is passed out-of-band). +#[derive(Debug, Serialize, Deserialize)] +pub struct StreamFileNotification { + /// File path in the tar. + pub name: String, + /// File size. + pub size: u64, +} + +/// Result for stream_layer method (sent after all notifications). +#[derive(Debug, Serialize, Deserialize)] +pub struct StreamLayerResult { + /// Number of items streamed. + pub items_sent: usize, +} + +/// Error type for userns helper operations. +#[derive(Debug, thiserror::Error)] +pub enum HelperError { + /// Failed to create socket. + #[error("failed to create socket: {0}")] + Socket(#[source] std::io::Error), + + /// Failed to spawn helper process. + #[error("failed to spawn helper process: {0}")] + Spawn(#[source] std::io::Error), + + /// IPC error. + #[error("IPC error: {0}")] + Ipc(String), + + /// Helper returned an error. + #[error("helper error: {0}")] + HelperError(String), + + /// I/O error. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + /// JSON-RPC error from the helper. + #[error("RPC error: code={code}, message={message}")] + RpcError { + /// JSON-RPC error code. + code: i32, + /// Error message. + message: String, + }, +} + +/// Check if this process was spawned as a userns helper and run the helper loop if so. +/// +/// This function **must** be called early in `main()`, before any other cstorage +/// operations. If this process was spawned as a helper, this function will: +/// +/// 1. Read from stdin (which is a Unix socket from the parent) +/// 2. Serve JSON-RPC requests for file operations +/// 3. Exit when the parent closes the connection +/// +/// If this is not a helper process, this function returns immediately. +pub fn init_if_helper() { + // Check if we're a helper via environment variable + if std::env::var(HELPER_ENV).is_err() { + return; // Not a helper, continue normal execution + } + + // Ensure we exit if parent dies (avoids orphan helper processes) + if let Err(e) = set_parent_process_death_signal(Some(Signal::TERM)) { + eprintln!("cstorage helper: failed to set parent death signal: {}", e); + // Continue anyway - this is a nice-to-have, not critical + } + + // We're a helper - stdin is our IPC socket. + // Use dup() to get a new owned fd from stdin (fd 0). + // This is safe because: + // 1. We were spawned with stdin set to a socket + // 2. dup() gives us a new fd that we own + // 3. We use std::io::stdin().as_fd() which is the safe way to get the fd + let stdin_fd = match dup(std::io::stdin().as_fd()) { + Ok(fd) => fd, + Err(e) => { + eprintln!("cstorage helper: failed to dup stdin: {}", e); + std::process::exit(1); + } + }; + let std_socket = StdUnixStream::from(stdin_fd); + + // Run the helper loop (never returns on success) + if let Err(e) = run_helper_loop_blocking(std_socket) { + eprintln!("cstorage helper: error in helper loop: {}", e); + std::process::exit(1); + } + std::process::exit(0); +} + +/// Run the helper loop synchronously by creating a tokio runtime. +fn run_helper_loop_blocking(std_socket: StdUnixStream) -> std::result::Result<(), HelperError> { + // Set non-blocking for tokio + std_socket.set_nonblocking(true)?; + + // Create a tokio runtime for the helper + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .map_err(|e| HelperError::Ipc(format!("failed to create tokio runtime: {}", e)))?; + + rt.block_on(run_helper_loop_async(std_socket)) +} + +/// Run the helper loop, serving requests from the parent. +async fn run_helper_loop_async(std_socket: StdUnixStream) -> std::result::Result<(), HelperError> { + // Convert std socket to tokio socket + let tokio_socket = TokioUnixStream::from_std(std_socket) + .map_err(|e| HelperError::Ipc(format!("failed to convert socket: {}", e)))?; + + let transport = UnixSocketTransport::new(tokio_socket); + let (mut sender, mut receiver) = transport.split(); + + tracing::debug!("userns helper: starting request loop"); + + loop { + let msg_with_fds = match receiver.receive().await { + Ok(m) => m, + Err(jsonrpc_fdpass::Error::ConnectionClosed) => { + tracing::debug!("userns helper: connection closed"); + return Ok(()); + } + Err(e) => { + return Err(HelperError::Ipc(format!( + "failed to receive message: {}", + e + ))); + } + }; + + match msg_with_fds.message { + JsonRpcMessage::Request(request) => { + let id = request.id.clone(); + + // Handle stream_layer specially since it needs to send multiple messages + if request.method == methods::STREAM_LAYER { + if let Err((code, msg)) = handle_stream_layer(&request, &mut sender).await { + let error = jsonrpc_fdpass::JsonRpcError::owned(code, msg, None::<()>); + let response = JsonRpcResponse::error(error, id); + let message = + MessageWithFds::new(JsonRpcMessage::Response(response), vec![]); + sender.send(message).await.map_err(|e| { + HelperError::Ipc(format!("failed to send error response: {}", e)) + })?; + } + // Success response is sent by handle_stream_layer + continue; + } + + let (result, fds) = handle_request(&request); + + match result { + Ok(response_value) => { + let response = JsonRpcResponse::success(response_value, id); + let message = MessageWithFds::new(JsonRpcMessage::Response(response), fds); + sender.send(message).await.map_err(|e| { + HelperError::Ipc(format!("failed to send response: {}", e)) + })?; + } + Err((code, message_str)) => { + let error = + jsonrpc_fdpass::JsonRpcError::owned(code, message_str, None::<()>); + let response = JsonRpcResponse::error(error, id); + let message = + MessageWithFds::new(JsonRpcMessage::Response(response), vec![]); + sender.send(message).await.map_err(|e| { + HelperError::Ipc(format!("failed to send error response: {}", e)) + })?; + } + } + + // Check for shutdown request (handle after sending response) + if request.method == methods::SHUTDOWN { + tracing::debug!("userns helper: received shutdown request"); + return Ok(()); + } + } + JsonRpcMessage::Notification(notif) => { + if notif.method == methods::SHUTDOWN { + tracing::debug!("userns helper: received shutdown notification"); + return Ok(()); + } + // Ignore other notifications + } + JsonRpcMessage::Response(_) => { + // Unexpected response - ignore + } + } + } +} + +/// Handle stream_layer request - sends multiple notifications with fds. +async fn handle_stream_layer( + request: &JsonRpcRequest, + sender: &mut jsonrpc_fdpass::transport::Sender, +) -> std::result::Result<(), (i32, String)> { + let params: StreamLayerParams = request + .params + .as_ref() + .and_then(|p| serde_json::from_value(p.clone()).ok()) + .ok_or(( + error_codes::INVALID_PARAMS, + "invalid params for streamLayer".to_string(), + ))?; + + let storage = Storage::open(¶ms.storage_path).map_err(|e| { + ( + error_codes::INTERNAL_ERROR, + format!("failed to open storage: {}", e), + ) + })?; + + let layer = Layer::open(&storage, ¶ms.layer_id).map_err(|e| { + ( + error_codes::RESOURCE_NOT_FOUND, + format!("layer not found: {}", e), + ) + })?; + + let mut stream = TarSplitFdStream::new(&storage, &layer).map_err(|e| { + ( + error_codes::INTERNAL_ERROR, + format!("failed to create tar-split stream: {}", e), + ) + })?; + + let mut items_sent = 0usize; + + // Stream all items as notifications + while let Some(item) = stream + .next() + .map_err(|e| (error_codes::INTERNAL_ERROR, format!("stream error: {}", e)))? + { + match item { + TarSplitItem::Segment(bytes) => { + // Send segment as base64-encoded notification + let params = StreamSegmentNotification { + data: BASE64_STANDARD.encode(&bytes), + }; + let notif = jsonrpc_fdpass::JsonRpcNotification::new( + "stream.segment".to_string(), + Some(serde_json::to_value(¶ms).unwrap()), + ); + let message = MessageWithFds::new(JsonRpcMessage::Notification(notif), vec![]); + sender.send(message).await.map_err(|e| { + ( + error_codes::INTERNAL_ERROR, + format!("failed to send segment: {}", e), + ) + })?; + items_sent += 1; + } + TarSplitItem::FileContent { fd, size, name } => { + // Send file notification with fd + let params = StreamFileNotification { name, size }; + let notif = jsonrpc_fdpass::JsonRpcNotification::new( + "stream.file".to_string(), + Some(serde_json::to_value(¶ms).unwrap()), + ); + let message = MessageWithFds::new(JsonRpcMessage::Notification(notif), vec![fd]); + sender.send(message).await.map_err(|e| { + ( + error_codes::INTERNAL_ERROR, + format!("failed to send file: {}", e), + ) + })?; + items_sent += 1; + } + } + } + + // Send success response + let result = StreamLayerResult { items_sent }; + let response = + JsonRpcResponse::success(serde_json::to_value(result).unwrap(), request.id.clone()); + let message = MessageWithFds::new(JsonRpcMessage::Response(response), vec![]); + sender.send(message).await.map_err(|e| { + ( + error_codes::INTERNAL_ERROR, + format!("failed to send response: {}", e), + ) + })?; + + Ok(()) +} + +/// Handle a JSON-RPC request. +fn handle_request( + request: &JsonRpcRequest, +) -> ( + std::result::Result, + Vec, +) { + match request.method.as_str() { + methods::OPEN_FILE => { + let params: OpenFileParams = match request + .params + .as_ref() + .and_then(|p| serde_json::from_value(p.clone()).ok()) + { + Some(p) => p, + None => { + return ( + Err(( + error_codes::INVALID_PARAMS, + "invalid params: missing 'path' field".to_string(), + )), + vec![], + ); + } + }; + + match std::fs::File::open(¶ms.path) { + Ok(file) => { + let fd: OwnedFd = file.into(); + let result = OpenFileResult { success: true }; + (Ok(serde_json::to_value(result).unwrap()), vec![fd]) + } + Err(e) => ( + Err(( + error_codes::INTERNAL_ERROR, + format!("failed to open file: {}", e), + )), + vec![], + ), + } + } + methods::LIST_IMAGES => handle_list_images(request), + methods::GET_IMAGE => handle_get_image(request), + methods::SHUTDOWN => { + // Just return success - the loop will exit after sending the response + (Ok(serde_json::json!({"success": true})), vec![]) + } + _ => ( + Err(( + error_codes::METHOD_NOT_FOUND, + format!("method not found: {}", request.method), + )), + vec![], + ), + } +} + +/// Handle list_images request. +fn handle_list_images( + request: &JsonRpcRequest, +) -> ( + std::result::Result, + Vec, +) { + let params: ListImagesParams = match request + .params + .as_ref() + .and_then(|p| serde_json::from_value(p.clone()).ok()) + { + Some(p) => p, + None => { + return ( + Err(( + error_codes::INVALID_PARAMS, + "invalid params for listImages".to_string(), + )), + vec![], + ); + } + }; + + let storage = match Storage::open(¶ms.storage_path) { + Ok(s) => s, + Err(e) => { + return ( + Err(( + error_codes::INTERNAL_ERROR, + format!("failed to open storage: {}", e), + )), + vec![], + ); + } + }; + + let images = match storage.list_images() { + Ok(imgs) => imgs, + Err(e) => { + return ( + Err(( + error_codes::INTERNAL_ERROR, + format!("failed to list images: {}", e), + )), + vec![], + ); + } + }; + + let image_infos: Vec = images + .iter() + .map(|img| ImageInfo { + id: img.id().to_string(), + names: img.names(&storage).unwrap_or_default(), + }) + .collect(); + + let result = ListImagesResult { + images: image_infos, + }; + (Ok(serde_json::to_value(result).unwrap()), vec![]) +} + +/// Handle get_image request. +fn handle_get_image( + request: &JsonRpcRequest, +) -> ( + std::result::Result, + Vec, +) { + let params: GetImageParams = match request + .params + .as_ref() + .and_then(|p| serde_json::from_value(p.clone()).ok()) + { + Some(p) => p, + None => { + return ( + Err(( + error_codes::INVALID_PARAMS, + "invalid params for getImage".to_string(), + )), + vec![], + ); + } + }; + + let storage = match Storage::open(¶ms.storage_path) { + Ok(s) => s, + Err(e) => { + return ( + Err(( + error_codes::INTERNAL_ERROR, + format!("failed to open storage: {}", e), + )), + vec![], + ); + } + }; + + // Try by ID first, then by name + let image = match crate::image::Image::open(&storage, ¶ms.image_ref) { + Ok(img) => img, + Err(_) => match storage.find_image_by_name(¶ms.image_ref) { + Ok(img) => img, + Err(e) => { + return ( + Err(( + error_codes::RESOURCE_NOT_FOUND, + format!("image not found: {}", e), + )), + vec![], + ); + } + }, + }; + + let config = match image.config() { + Ok(cfg) => cfg, + Err(e) => { + return ( + Err(( + error_codes::INTERNAL_ERROR, + format!("failed to read config: {}", e), + )), + vec![], + ); + } + }; + + let diff_ids: Vec = config + .rootfs() + .diff_ids() + .iter() + .map(|s| s.to_string()) + .collect(); + + let storage_layer_ids = match image.storage_layer_ids(&storage) { + Ok(ids) => ids, + Err(e) => { + return ( + Err(( + error_codes::INTERNAL_ERROR, + format!("failed to get storage layer IDs: {}", e), + )), + vec![], + ); + } + }; + + let result = GetImageResult { + id: image.id().to_string(), + names: image.names(&storage).unwrap_or_default(), + layer_diff_ids: diff_ids, + storage_layer_ids, + }; + (Ok(serde_json::to_value(result).unwrap()), vec![]) +} + +/// Proxy for accessing files via the userns helper process. +/// +/// This spawns a helper process (via `podman unshare`) that runs inside a +/// user namespace and can read files with restrictive permissions. File +/// descriptors are passed back via SCM_RIGHTS. +pub struct StorageProxy { + child: Child, + sender: jsonrpc_fdpass::transport::Sender, + receiver: jsonrpc_fdpass::transport::Receiver, + next_id: u64, +} + +impl std::fmt::Debug for StorageProxy { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("StorageProxy") + .field("child_pid", &self.child.id()) + .finish_non_exhaustive() + } +} + +impl StorageProxy { + /// Spawn a userns helper process. + /// + /// If the current process can already bypass file permissions (running as + /// root or has CAP_DAC_OVERRIDE), this returns `Ok(None)` since no helper + /// is needed. + pub async fn spawn() -> std::result::Result, HelperError> { + // Check if we even need a helper + if can_bypass_file_permissions() { + return Ok(None); + } + + Self::spawn_helper().await.map(Some) + } + + /// Spawn the helper unconditionally. + async fn spawn_helper() -> std::result::Result { + let exe = std::fs::read_link("/proc/self/exe").map_err(HelperError::Io)?; + Self::spawn_helper_with_binary(exe).await + } + + /// Spawn the helper with a specific binary path. + /// + /// This is used when the default /proc/self/exe is not suitable, + /// such as when running from a test harness. + async fn spawn_helper_with_binary( + exe: std::path::PathBuf, + ) -> std::result::Result { + // Create a socket pair - one end for us, one for the child's stdin + let (parent_sock, child_sock) = StdUnixStream::pair().map_err(HelperError::Socket)?; + + // Spawn via podman unshare, with child_sock as the child's stdin. + // We use `env` to set the HELPER_ENV because podman unshare doesn't + // propagate the parent's environment to the inner command. + let child = Command::new("podman") + .arg("unshare") + .arg("env") + .arg(format!("{}=1", HELPER_ENV)) + .arg(&exe) + .stdin(Stdio::from(OwnedFd::from(child_sock))) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .spawn() + .map_err(HelperError::Spawn)?; + + // Convert our socket to async + parent_sock.set_nonblocking(true)?; + let tokio_socket = TokioUnixStream::from_std(parent_sock) + .map_err(|e| HelperError::Ipc(format!("failed to convert socket: {}", e)))?; + + let transport = UnixSocketTransport::new(tokio_socket); + let (sender, receiver) = transport.split(); + + Ok(Self { + child, + sender, + receiver, + next_id: 1, + }) + } + + /// Open a file via the helper, returning its fd. + /// + /// # Arguments + /// + /// * `path` - The path to open (should be absolute) + /// + /// # Returns + /// + /// The opened file descriptor, which can be used for reading. + pub async fn open_file( + &mut self, + path: impl AsRef, + ) -> std::result::Result { + let params = OpenFileParams { + path: path.as_ref().to_string_lossy().to_string(), + }; + + let id = self.next_id; + self.next_id += 1; + + let request = JsonRpcRequest::new( + methods::OPEN_FILE.to_string(), + Some(serde_json::to_value(¶ms).unwrap()), + serde_json::Value::Number(id.into()), + ); + + let message = MessageWithFds::new(JsonRpcMessage::Request(request), vec![]); + self.sender + .send(message) + .await + .map_err(|e| HelperError::Ipc(format!("failed to send request: {}", e)))?; + + // Receive response + let response = self + .receiver + .receive() + .await + .map_err(|e| HelperError::Ipc(format!("failed to receive response: {}", e)))?; + + match response.message { + JsonRpcMessage::Response(resp) => { + if let Some(error) = resp.error { + return Err(HelperError::RpcError { + code: error.code(), + message: error.message().to_string(), + }); + } + + // The fd should be in the response + if response.file_descriptors.is_empty() { + return Err(HelperError::Ipc( + "response missing file descriptor".to_string(), + )); + } + + Ok(response.file_descriptors.into_iter().next().unwrap()) + } + other => Err(HelperError::Ipc(format!( + "unexpected message type: {:?}", + other + ))), + } + } + + /// Shutdown the helper process gracefully. + pub async fn shutdown(mut self) -> std::result::Result<(), HelperError> { + let id = self.next_id; + + let request = JsonRpcRequest::new( + methods::SHUTDOWN.to_string(), + None, + serde_json::Value::Number(id.into()), + ); + + let message = MessageWithFds::new(JsonRpcMessage::Request(request), vec![]); + // Ignore send errors - the child may have already exited + let _ = self.sender.send(message).await; + + // Wait for the child to exit + let _ = self.child.wait(); + + Ok(()) + } + + /// List images in storage via the helper. + pub async fn list_images( + &mut self, + storage_path: &str, + ) -> std::result::Result, HelperError> { + let params = ListImagesParams { + storage_path: storage_path.to_string(), + }; + let result: ListImagesResult = self.call(methods::LIST_IMAGES, ¶ms).await?; + Ok(result.images) + } + + /// Get image information via the helper. + pub async fn get_image( + &mut self, + storage_path: &str, + image_ref: &str, + ) -> std::result::Result { + let params = GetImageParams { + storage_path: storage_path.to_string(), + image_ref: image_ref.to_string(), + }; + self.call(methods::GET_IMAGE, ¶ms).await + } + + /// Start streaming a layer's tar-split content. + /// + /// Returns a stream that yields `ProxiedTarSplitItem`s. The helper sends + /// notifications with file descriptors for each file in the layer. + pub async fn stream_layer( + &mut self, + storage_path: &str, + layer_id: &str, + ) -> std::result::Result, HelperError> { + let params = StreamLayerParams { + storage_path: storage_path.to_string(), + layer_id: layer_id.to_string(), + }; + + let id = self.next_id; + self.next_id += 1; + + let request = JsonRpcRequest::new( + methods::STREAM_LAYER.to_string(), + Some(serde_json::to_value(¶ms).unwrap()), + serde_json::Value::Number(id.into()), + ); + + let message = MessageWithFds::new(JsonRpcMessage::Request(request), vec![]); + self.sender + .send(message) + .await + .map_err(|e| HelperError::Ipc(format!("failed to send stream_layer request: {}", e)))?; + + Ok(ProxiedLayerStream { + receiver: &mut self.receiver, + request_id: id, + finished: false, + }) + } + + /// Make an RPC call and parse the response. + async fn call Deserialize<'de>>( + &mut self, + method: &str, + params: &P, + ) -> std::result::Result { + let id = self.next_id; + self.next_id += 1; + + let request = JsonRpcRequest::new( + method.to_string(), + Some(serde_json::to_value(params).unwrap()), + serde_json::Value::Number(id.into()), + ); + + let message = MessageWithFds::new(JsonRpcMessage::Request(request), vec![]); + self.sender + .send(message) + .await + .map_err(|e| HelperError::Ipc(format!("failed to send request: {}", e)))?; + + // Receive response + let response = self + .receiver + .receive() + .await + .map_err(|e| HelperError::Ipc(format!("failed to receive response: {}", e)))?; + + match response.message { + JsonRpcMessage::Response(resp) => { + if let Some(error) = resp.error { + return Err(HelperError::RpcError { + code: error.code(), + message: error.message().to_string(), + }); + } + + let result = resp + .result + .ok_or_else(|| HelperError::Ipc("response missing result".to_string()))?; + + serde_json::from_value(result) + .map_err(|e| HelperError::Ipc(format!("failed to parse result: {}", e))) + } + other => Err(HelperError::Ipc(format!( + "unexpected message type: {:?}", + other + ))), + } + } +} + +/// Item received from a proxied layer stream. +#[derive(Debug)] +pub enum ProxiedTarSplitItem { + /// Raw segment bytes (tar header/padding). + Segment(Vec), + /// File content with metadata and fd. + FileContent { + /// File descriptor for the content. + fd: OwnedFd, + /// File size. + size: u64, + /// File name/path. + name: String, + }, +} + +/// Stream of tar-split items received via the helper proxy. +pub struct ProxiedLayerStream<'a> { + receiver: &'a mut jsonrpc_fdpass::transport::Receiver, + request_id: u64, + finished: bool, +} + +impl std::fmt::Debug for ProxiedLayerStream<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ProxiedLayerStream") + .field("request_id", &self.request_id) + .field("finished", &self.finished) + .finish_non_exhaustive() + } +} + +impl<'a> ProxiedLayerStream<'a> { + /// Get the next item from the stream. + /// + /// Returns `None` when the stream is complete. + pub async fn next(&mut self) -> std::result::Result, HelperError> { + if self.finished { + return Ok(None); + } + + let msg_with_fds = match self.receiver.receive().await { + Ok(m) => m, + Err(jsonrpc_fdpass::Error::ConnectionClosed) => { + self.finished = true; + return Ok(None); + } + Err(e) => { + return Err(HelperError::Ipc(format!("failed to receive: {}", e))); + } + }; + + let mut fds = msg_with_fds.file_descriptors; + + match msg_with_fds.message { + JsonRpcMessage::Notification(notif) => { + let params = notif.params.unwrap_or(serde_json::Value::Null); + + match notif.method.as_str() { + "stream.segment" => { + let seg: StreamSegmentNotification = serde_json::from_value(params) + .map_err(|e| { + HelperError::Ipc(format!("invalid segment params: {}", e)) + })?; + + let bytes = BASE64_STANDARD.decode(&seg.data).map_err(|e| { + HelperError::Ipc(format!("failed to decode segment: {}", e)) + })?; + + Ok(Some(ProxiedTarSplitItem::Segment(bytes))) + } + "stream.file" => { + let file: StreamFileNotification = serde_json::from_value(params) + .map_err(|e| HelperError::Ipc(format!("invalid file params: {}", e)))?; + + if fds.is_empty() { + return Err(HelperError::Ipc( + "file notification missing fd".to_string(), + )); + } + + let fd = fds.remove(0); + Ok(Some(ProxiedTarSplitItem::FileContent { + fd, + size: file.size, + name: file.name, + })) + } + other => Err(HelperError::Ipc(format!( + "unknown notification method: {}", + other + ))), + } + } + JsonRpcMessage::Response(resp) => { + // Final response - stream is complete + self.finished = true; + + if let Some(error) = resp.error { + return Err(HelperError::RpcError { + code: error.code(), + message: error.message().to_string(), + }); + } + + Ok(None) + } + JsonRpcMessage::Request(_) => Err(HelperError::Ipc( + "unexpected request from helper".to_string(), + )), + } + } +} + +impl Drop for StorageProxy { + fn drop(&mut self) { + // Try to kill the child if it's still running + let _ = self.child.kill(); + } +} From 8728f3ec0f0ecd88b7d00380fbc4e5f5f707fef1 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 29 Jan 2026 16:18:04 -0500 Subject: [PATCH 12/13] cstorage: Strip sha256: prefix from image IDs Image IDs from `podman build --iidfile` include the `sha256:` prefix, but containers-storage directories use just the hex digest. Strip the prefix when opening images to support both formats. Fixes CI failure where `cfsctl oci pull containers-storage:sha256:...` failed with "image not found". See https://github.com/containers/skopeo/issues/2750 Assisted-by: OpenCode (Opus 4.5) --- crates/cstorage/src/image.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/crates/cstorage/src/image.rs b/crates/cstorage/src/image.rs index c086d0db..0bcc1c13 100644 --- a/crates/cstorage/src/image.rs +++ b/crates/cstorage/src/image.rs @@ -46,10 +46,19 @@ pub struct Image { impl Image { /// Open an image by ID using fd-relative operations. /// + /// The ID can be provided with or without a `sha256:` prefix - the prefix + /// will be stripped if present, since containers-storage directories use + /// just the hex digest. + /// /// # Errors /// /// Returns an error if the image directory doesn't exist or cannot be opened. pub fn open(storage: &Storage, id: &str) -> Result { + // Strip the sha256: prefix if present - containers-storage directories + // use just the hex digest, but image IDs from podman (e.g. via --iidfile) + // include the prefix. See https://github.com/containers/skopeo/issues/2750 + let id = id.strip_prefix("sha256:").unwrap_or(id); + // Open overlay-images directory from storage root let images_dir = storage.root_dir().open_dir("overlay-images")?; From 6c16d202852b21aa7d7bbb957afbc5a5ab227267 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 29 Jan 2026 16:48:09 -0500 Subject: [PATCH 13/13] integration-tests: Add crate with cstor/skopeo equivalence test Add a libtest-mimic based integration test suite that: - Runs as a userns helper (calls init_if_helper at startup) - Tests cfsctl CLI functionality - Compares containers-storage vs skopeo import paths The equivalence test builds a minimal podman image, imports via both paths, and verifies both produce identical splitstream digests. Assisted-by: OpenCode (Claude claude-opus-4-5-20250514) --- Justfile | 4 + crates/composefs/src/repository.rs | 2 - crates/integration-tests/Cargo.toml | 35 +++++ crates/integration-tests/src/cleanup.rs | 54 ++++++++ crates/integration-tests/src/lib.rs | 164 ++++++++++++++++++++++ crates/integration-tests/src/main.rs | 175 ++++++++++++++++++++++++ 6 files changed, 432 insertions(+), 2 deletions(-) create mode 100644 crates/integration-tests/Cargo.toml create mode 100644 crates/integration-tests/src/cleanup.rs create mode 100644 crates/integration-tests/src/lib.rs create mode 100644 crates/integration-tests/src/main.rs diff --git a/Justfile b/Justfile index a32ca799..f33aa96b 100644 --- a/Justfile +++ b/Justfile @@ -39,6 +39,10 @@ test-all: build-cstorage: cargo build --workspace --features containers-storage +# Run integration tests (requires podman and skopeo) +integration-test: build-release + cargo run --release -p integration-tests --bin integration-tests + # Clean build artifacts clean: cargo clean diff --git a/crates/composefs/src/repository.rs b/crates/composefs/src/repository.rs index 460922db..b0b6fdd2 100644 --- a/crates/composefs/src/repository.rs +++ b/crates/composefs/src/repository.rs @@ -1406,8 +1406,6 @@ impl Repository { #[cfg(test)] mod tests { - use std::vec; - use super::*; use crate::fsverity::Sha512HashValue; use crate::test::tempdir; diff --git a/crates/integration-tests/Cargo.toml b/crates/integration-tests/Cargo.toml new file mode 100644 index 00000000..5678d4b9 --- /dev/null +++ b/crates/integration-tests/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "integration-tests" +version = "0.0.0" +edition.workspace = true +license.workspace = true +publish = false +description = "Integration tests for composefs-rs (not published)" + +# The main integration test binary +[[bin]] +name = "integration-tests" +path = "src/main.rs" + +[[bin]] +name = "test-cleanup" +path = "src/cleanup.rs" + +[dependencies] +anyhow = "1" +composefs = { workspace = true } +composefs-oci = { path = "../composefs-oci", features = ["containers-storage"] } +cstorage = { path = "../cstorage", features = ["userns-helper"] } +hex = "0.4" +libtest-mimic = "0.8" +rustix = { version = "1", features = ["fs"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +tempfile = "3" +tokio = { version = "1", features = ["rt-multi-thread", "macros"] } +xshell = "0.2" + +# This crate doesn't follow the same linting rules +[lints.rust] +missing_docs = "allow" +missing_debug_implementations = "allow" diff --git a/crates/integration-tests/src/cleanup.rs b/crates/integration-tests/src/cleanup.rs new file mode 100644 index 00000000..6a2ef8d5 --- /dev/null +++ b/crates/integration-tests/src/cleanup.rs @@ -0,0 +1,54 @@ +//! Cleanup utility for integration test resources +//! +//! This binary cleans up any leftover resources from integration tests. + +use std::process::Command; + +use integration_tests::INTEGRATION_TEST_LABEL; + +fn main() { + println!("Cleaning up integration test resources..."); + + // Clean up podman containers with our label + let output = Command::new("podman") + .args([ + "ps", + "-a", + "--filter", + &format!("label={}", INTEGRATION_TEST_LABEL), + "-q", + ]) + .output(); + + if let Ok(output) = output { + let container_ids = String::from_utf8_lossy(&output.stdout); + for id in container_ids.lines() { + if !id.is_empty() { + println!("Removing container: {}", id); + let _ = Command::new("podman").args(["rm", "-f", id]).output(); + } + } + } + + // Clean up podman images with our label + let output = Command::new("podman") + .args([ + "images", + "--filter", + &format!("label={}", INTEGRATION_TEST_LABEL), + "-q", + ]) + .output(); + + if let Ok(output) = output { + let image_ids = String::from_utf8_lossy(&output.stdout); + for id in image_ids.lines() { + if !id.is_empty() { + println!("Removing image: {}", id); + let _ = Command::new("podman").args(["rmi", "-f", id]).output(); + } + } + } + + println!("Cleanup complete."); +} diff --git a/crates/integration-tests/src/lib.rs b/crates/integration-tests/src/lib.rs new file mode 100644 index 00000000..837cfe9f --- /dev/null +++ b/crates/integration-tests/src/lib.rs @@ -0,0 +1,164 @@ +//! Integration test utilities for composefs-rs +//! +//! This library provides utilities for running integration tests. +//! The main test runner is in main.rs. + +use std::process::{Command, ExitStatus, Stdio}; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use composefs::fsverity::Sha256HashValue; +use composefs::repository::Repository; +use tempfile::TempDir; + +/// Test label for cleanup +pub const INTEGRATION_TEST_LABEL: &str = "composefs-rs.integration-test=1"; + +/// Get the path to cfsctl binary +pub fn get_cfsctl_path() -> Result { + // Check environment first + if let Ok(path) = std::env::var("CFSCTL_PATH") { + return Ok(path); + } + // Look in common locations + for path in [ + "./target/release/cfsctl", + "./target/debug/cfsctl", + "/usr/bin/cfsctl", + ] { + if std::path::Path::new(path).exists() { + return Ok(path.to_string()); + } + } + anyhow::bail!("cfsctl not found; set CFSCTL_PATH or build with `cargo build --release`") +} + +/// Get the primary test image +pub fn get_primary_image() -> String { + std::env::var("COMPOSEFS_RS_PRIMARY_IMAGE") + .unwrap_or_else(|_| "quay.io/centos-bootc/centos-bootc:stream10".to_string()) +} + +/// Get all test images +pub fn get_all_images() -> Vec { + std::env::var("COMPOSEFS_RS_ALL_IMAGES") + .unwrap_or_else(|_| get_primary_image()) + .split_whitespace() + .map(String::from) + .collect() +} + +/// Captured command output +#[derive(Debug)] +pub struct CapturedOutput { + /// Exit status + pub status: ExitStatus, + /// Captured stdout + pub stdout: String, + /// Captured stderr + pub stderr: String, +} + +impl CapturedOutput { + /// Assert the command succeeded + pub fn assert_success(&self) -> Result<()> { + if !self.status.success() { + anyhow::bail!( + "Command failed with status {}\nstdout: {}\nstderr: {}", + self.status, + self.stdout, + self.stderr + ); + } + Ok(()) + } +} + +/// Run a command and capture output +pub fn run_command(cmd: &str, args: &[&str]) -> Result { + let output = Command::new(cmd) + .args(args) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output() + .with_context(|| format!("Failed to execute: {} {:?}", cmd, args))?; + + Ok(CapturedOutput { + status: output.status, + stdout: String::from_utf8_lossy(&output.stdout).to_string(), + stderr: String::from_utf8_lossy(&output.stderr).to_string(), + }) +} + +/// Run cfsctl with arguments +pub fn run_cfsctl(args: &[&str]) -> Result { + let cfsctl = get_cfsctl_path()?; + run_command(&cfsctl, args) +} + +/// Create a test repository in a temporary directory. +/// +/// The TempDir is returned alongside the repo to keep it alive. +pub fn create_test_repository(tempdir: &TempDir) -> Result>> { + let fd = rustix::fs::open( + tempdir.path(), + rustix::fs::OFlags::CLOEXEC | rustix::fs::OFlags::PATH, + 0.into(), + )?; + + let mut repo = Repository::open_path(&fd, ".")?; + repo.set_insecure(true); + Ok(Arc::new(repo)) +} + +/// Build a minimal test image using podman and return its ID +pub fn build_test_image() -> Result { + let temp_dir = TempDir::new()?; + let containerfile = temp_dir.path().join("Containerfile"); + + // Create a simple Containerfile with various file sizes to test + // both inline and external storage paths + std::fs::write( + &containerfile, + r#"FROM busybox:latest +# Small file (should be inlined) +RUN echo "small content" > /small.txt +# Larger file (should be external) +RUN dd if=/dev/zero of=/large.bin bs=1024 count=100 2>/dev/null +# Directory with files +RUN mkdir -p /testdir && echo "file1" > /testdir/a.txt && echo "file2" > /testdir/b.txt +# Symlink +RUN ln -s /small.txt /link.txt +"#, + )?; + + let iid_file = temp_dir.path().join("image.iid"); + + let output = Command::new("podman") + .args([ + "build", + "--pull=newer", + &format!("--iidfile={}", iid_file.display()), + "-f", + &containerfile.to_string_lossy(), + &temp_dir.path().to_string_lossy(), + ]) + .output()?; + + if !output.status.success() { + anyhow::bail!( + "podman build failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + + let image_id = std::fs::read_to_string(&iid_file)?.trim().to_string(); + Ok(image_id) +} + +/// Remove a test image +pub fn cleanup_test_image(image_id: &str) { + let _ = Command::new("podman") + .args(["rmi", "-f", image_id]) + .output(); +} diff --git a/crates/integration-tests/src/main.rs b/crates/integration-tests/src/main.rs new file mode 100644 index 00000000..1d5b4b3f --- /dev/null +++ b/crates/integration-tests/src/main.rs @@ -0,0 +1,175 @@ +//! Integration test runner for composefs-rs +//! +//! This binary runs integration tests using libtest-mimic for nextest compatibility. +//! +//! IMPORTANT: This binary may be re-executed via `podman unshare` to act as a +//! userns helper for rootless containers-storage access. The init_if_helper() +//! call at the start of main() handles this. + +use anyhow::Result; +use integration_tests::{ + build_test_image, cleanup_test_image, create_test_repository, get_all_images, run_cfsctl, +}; +use libtest_mimic::{Arguments, Failed, Trial}; +use tempfile::TempDir; +use xshell::{cmd, Shell}; + +// ============================================================================ +// Test implementations +// ============================================================================ + +fn test_cfsctl_version() -> Result<()> { + let output = run_cfsctl(&["--version"])?; + output.assert_success()?; + assert!( + output.stdout.contains("cfsctl") || output.stderr.contains("cfsctl"), + "Version output should mention cfsctl" + ); + Ok(()) +} + +fn test_cfsctl_help() -> Result<()> { + let output = run_cfsctl(&["--help"])?; + output.assert_success()?; + assert!( + output.stdout.contains("Usage") || output.stdout.contains("USAGE"), + "Help should show usage" + ); + Ok(()) +} + +/// Test that containers-storage import produces identical results to skopeo/tar import. +/// +/// This is a critical correctness test: both import paths should produce the +/// exact same splitstream digests because they represent the same content. +fn test_cstor_vs_skopeo_equivalence() -> Result<()> { + let sh = Shell::new()?; + let rt = tokio::runtime::Runtime::new()?; + rt.block_on(async { + println!("Building test image..."); + let test_image = build_test_image()?; + println!("Built test image: {}", test_image); + + // Create two separate repositories for comparison + let cstor_repo_dir = TempDir::new()?; + let skopeo_repo_dir = TempDir::new()?; + + let cstor_repo = create_test_repository(&cstor_repo_dir)?; + let skopeo_repo = create_test_repository(&skopeo_repo_dir)?; + + // Import via containers-storage (reflink path) + let cstor_image_ref = format!("containers-storage:{}", test_image); + println!("Importing via containers-storage: {}", cstor_image_ref); + let cstor_result = composefs_oci::pull(&cstor_repo, &cstor_image_ref, None, None).await?; + + // Import via skopeo (tar streaming path) - copy to OCI directory first + let oci_dir = TempDir::new()?; + let oci_path = oci_dir.path().join("image"); + + // Use skopeo to copy from containers-storage to oci directory + // Strip sha256: prefix for skopeo compatibility + let image_id_for_skopeo = test_image.strip_prefix("sha256:").unwrap_or(&test_image); + let cstor_ref = format!("containers-storage:{}", image_id_for_skopeo); + let oci_ref = format!("oci:{}:test", oci_path.display()); + println!("Copying to OCI dir via skopeo..."); + cmd!(sh, "skopeo copy {cstor_ref} {oci_ref}").run()?; + + // Import from the OCI directory via skopeo/tar path + let skopeo_image_ref = format!("oci:{}:test", oci_path.display()); + println!("Importing via skopeo/OCI: {}", skopeo_image_ref); + let skopeo_result = + composefs_oci::pull(&skopeo_repo, &skopeo_image_ref, None, None).await?; + + // Get layer maps from both configs + let (_cstor_config, cstor_layers) = composefs_oci::open_config( + &cstor_repo, + &cstor_result.config_digest, + Some(&cstor_result.config_verity), + )?; + let (_skopeo_config, skopeo_layers) = composefs_oci::open_config( + &skopeo_repo, + &skopeo_result.config_digest, + Some(&skopeo_result.config_verity), + )?; + + // Compare results + assert_eq!( + cstor_result.config_digest, skopeo_result.config_digest, + "config digests must match" + ); + assert_eq!(cstor_layers, skopeo_layers, "layer verity IDs must match"); + assert_eq!( + cstor_result.config_verity, skopeo_result.config_verity, + "config verity IDs must match" + ); + + println!("SUCCESS: Both import paths produced identical digests"); + println!(" Config digest: {}", cstor_result.config_digest); + println!(" Layers: {}", cstor_layers.len()); + + // Cleanup + cleanup_test_image(&test_image); + + Ok(()) + }) +} + +// Parameterized test - runs for each image +fn test_image_pull(image: &str) -> Result<()> { + println!("Would test pulling image: {}", image); + // TODO: implement actual image pull test + Ok(()) +} + +/// All simple integration tests +fn get_simple_tests() -> Vec<(&'static str, fn() -> Result<()>)> { + vec![ + ("test_cfsctl_version", test_cfsctl_version), + ("test_cfsctl_help", test_cfsctl_help), + ( + "test_cstor_vs_skopeo_equivalence", + test_cstor_vs_skopeo_equivalence, + ), + ] +} + +/// All parameterized tests (run for each image) +fn get_parameterized_tests() -> Vec<(&'static str, fn(&str) -> Result<()>)> { + vec![("test_image_pull", test_image_pull)] +} + +// ============================================================================ +// Test harness main +// ============================================================================ + +fn main() { + // CRITICAL: Handle userns helper re-execution. + // When running rootless, this binary may be re-executed via `podman unshare` + // to act as a helper process for containers-storage access. + cstorage::init_if_helper(); + + let args = Arguments::from_args(); + + let mut trials = Vec::new(); + + // Register simple tests + for (name, test_fn) in get_simple_tests() { + trials.push(Trial::test(name, move || { + test_fn().map_err(|e| Failed::from(format!("{:?}", e))) + })); + } + + // Register parameterized tests + let images = get_all_images(); + for (name, test_fn) in get_parameterized_tests() { + for image in &images { + let test_name = format!("{}::{}", name, image.rsplit('/').next().unwrap_or(image)); + let image = image.clone(); + trials.push(Trial::test(test_name, move || { + test_fn(&image).map_err(|e| Failed::from(format!("{:?}", e))) + })); + } + } + + libtest_mimic::run(&args, trials).exit(); +}