diff --git a/crates/cfsctl/Cargo.toml b/crates/cfsctl/Cargo.toml index 850a9a14..2b9309ab 100644 --- a/crates/cfsctl/Cargo.toml +++ b/crates/cfsctl/Cargo.toml @@ -21,6 +21,7 @@ rhel9 = ['composefs/rhel9'] anyhow = { version = "1.0.87", default-features = false } fn-error-context = "0.2" clap = { version = "4.5.0", default-features = false, features = ["std", "help", "usage", "derive", "wrap_help"] } +comfy-table = { version = "7.1", default-features = false } composefs = { workspace = true } composefs-boot = { workspace = true } composefs-oci = { workspace = true, optional = true } @@ -28,6 +29,7 @@ composefs-http = { workspace = true, optional = true } env_logger = { version = "0.11.0", default-features = false } hex = { version = "0.4.0", default-features = false } rustix = { version = "1.0.0", default-features = false, features = ["fs", "process"] } +serde_json = { version = "1.0", default-features = false, features = ["std"] } tokio = { version = "1.24.2", default-features = false } [lints] diff --git a/crates/cfsctl/src/main.rs b/crates/cfsctl/src/main.rs index 512817a2..ef2bb37c 100644 --- a/crates/cfsctl/src/main.rs +++ b/crates/cfsctl/src/main.rs @@ -12,6 +12,7 @@ use std::{ use anyhow::Result; use clap::{Parser, Subcommand, ValueEnum}; +use comfy_table::{presets::UTF8_FULL, Table}; use rustix::fs::CWD; @@ -103,12 +104,25 @@ enum OciCommand { }, /// List all tagged OCI images in the repository #[clap(name = "images")] - ListImages, + ListImages { + /// Output as JSON array + #[clap(long)] + json: bool, + }, /// Show information about an OCI image + /// + /// By default, outputs JSON with manifest, config, and referrers. + /// Use --manifest or --config to output just that raw JSON. #[clap(name = "inspect")] Inspect { /// Image reference (tag name or manifest digest) image: String, + /// Output only the raw manifest JSON (as originally stored) + #[clap(long, conflicts_with = "config")] + manifest: bool, + /// Output only the raw config JSON (as originally stored) + #[clap(long, conflicts_with = "manifest")] + config: bool, }, /// Tag an image with a new name Tag { @@ -122,6 +136,21 @@ enum OciCommand { /// Tag name to remove name: String, }, + /// Inspect a stored layer + /// + /// By default, outputs the raw tar stream to stdout. + /// Use --dumpfile for composefs dumpfile format, or --json for metadata. + #[clap(name = "layer")] + LayerInspect { + /// Layer diff_id (sha256:...) + layer: String, + /// Output as composefs dumpfile format (one entry per line) + #[clap(long, conflicts_with = "json")] + dumpfile: bool, + /// Output layer metadata as JSON + #[clap(long, conflicts_with = "dumpfile")] + json: bool, + }, /// Compute the composefs image object id of the rootfs of a stored OCI image ComputeId { #[clap(flatten)] @@ -390,16 +419,18 @@ where println!("verity {}", result.manifest_verity.to_hex()); println!("tagged {tag_name}"); } - OciCommand::ListImages => { + OciCommand::ListImages { json } => { let images = composefs_oci::oci_image::list_images(&repo)?; - if images.is_empty() { + if json { + println!("{}", serde_json::to_string_pretty(&images)?); + } else if images.is_empty() { println!("No images found"); } else { - println!( - "{:<30} {:<12} {:<10} {:<8} {:<6}", - "NAME", "DIGEST", "ARCH", "SEALED", "LAYERS" - ); + let mut table = Table::new(); + table.load_preset(UTF8_FULL); + table.set_header(["NAME", "DIGEST", "ARCH", "SEALED", "LAYERS", "REFS"]); + for img in images { let digest_short = img .manifest_digest @@ -410,68 +441,49 @@ where } else { digest_short }; - println!( - "{:<30} {:<12} {:<10} {:<8} {:<6}", - img.name, + let arch = if img.architecture.is_empty() { + "artifact" + } else { + &img.architecture + }; + let sealed = if img.sealed { "yes" } else { "no" }; + table.add_row([ + img.name.as_str(), digest_display, - if img.architecture.is_empty() { - "artifact" - } else { - &img.architecture - }, - if img.sealed { "yes" } else { "no" }, - img.layer_count - ); + arch, + sealed, + &img.layer_count.to_string(), + &img.referrer_count.to_string(), + ]); } + println!("{table}"); } } - OciCommand::Inspect { ref image } => { + OciCommand::Inspect { + ref image, + manifest, + config, + } => { let img = if image.starts_with("sha256:") { composefs_oci::oci_image::OciImage::open(&repo, image, None)? } else { composefs_oci::oci_image::OciImage::open_ref(&repo, image)? }; - println!("Manifest: {}", img.manifest_digest()); - println!("Config: {}", img.config_digest()); - println!( - "Type: {}", - if img.is_container_image() { - "container" - } else { - "artifact" - } - ); - - if img.is_container_image() { - println!("Architecture: {}", img.architecture()); - println!("OS: {}", img.os()); - } - - if let Some(created) = img.created() { - println!("Created: {created}"); - } - - println!( - "Sealed: {}", - if img.is_sealed() { "yes" } else { "no" } - ); - if let Some(seal) = img.seal_digest() { - println!("Seal digest: {seal}"); - } - - println!("Layers: {}", img.layer_descriptors().len()); - for (i, layer) in img.layer_descriptors().iter().enumerate() { - println!(" [{i}] {} ({} bytes)", layer.digest(), layer.size()); - } - - if let Some(labels) = img.labels() { - if !labels.is_empty() { - println!("Labels:"); - for (k, v) in labels { - println!(" {k}: {v}"); - } - } + if manifest { + // Output raw manifest JSON exactly as stored + let manifest_json = img.read_manifest_json(&repo)?; + std::io::Write::write_all(&mut std::io::stdout(), &manifest_json)?; + println!(); + } else if config { + // Output raw config JSON exactly as stored + let config_json = img.read_config_json(&repo)?; + std::io::Write::write_all(&mut std::io::stdout(), &config_json)?; + println!(); + } else { + // Default: output combined JSON with manifest, config, and referrers + let output = img.inspect_json(&repo)?; + println!("{}", serde_json::to_string_pretty(&output)?); } } OciCommand::Tag { @@ -485,6 +497,21 @@ where composefs_oci::oci_image::untag_image(&repo, name)?; println!("Removed tag {name}"); } + OciCommand::LayerInspect { + ref layer, + dumpfile, + json, + } => { + if json { + let info = composefs_oci::layer_info(&repo, layer)?; + println!("{}", serde_json::to_string_pretty(&info)?); + } else if dumpfile { + composefs_oci::layer_dumpfile(&repo, layer, &mut std::io::stdout())?; + } else { + // Default: output raw tar + composefs_oci::layer_tar(&repo, layer, &mut std::io::stdout())?; + } + } OciCommand::Seal { config_opts: OCIConfigOptions { diff --git a/crates/composefs-oci/Cargo.toml b/crates/composefs-oci/Cargo.toml index 6c38d3a9..67cbef2f 100644 --- a/crates/composefs-oci/Cargo.toml +++ b/crates/composefs-oci/Cargo.toml @@ -21,10 +21,17 @@ hex = { version = "0.4.0", default-features = false } indicatif = { version = "0.17.0", default-features = false, features = ["tokio"] } oci-spec = { version = "0.8.0", default-features = false } rustix = { version = "1.0.0", features = ["fs"] } +serde = { version = "1.0", default-features = false, features = ["derive"] } +serde_json = { version = "1.0", default-features = false, features = ["std"] } sha2 = { version = "0.10.1", default-features = false } tar = { version = "0.4.38", default-features = false } tokio = { version = "1.24.2", features = ["rt-multi-thread"] } tokio-util = { version = "0.7", default-features = false, features = ["io"] } +tracing = "0.1" +cap-std-ext = "4.0" +flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] } +ocidir = "0.6" +zstd = { version = "0.13.0", default-features = false } [dev-dependencies] similar-asserts = "1.7.0" diff --git a/crates/composefs-oci/src/image.rs b/crates/composefs-oci/src/image.rs index 9fd6f1d0..de4247ea 100644 --- a/crates/composefs-oci/src/image.rs +++ b/crates/composefs-oci/src/image.rs @@ -144,7 +144,7 @@ mod test { fsverity::Sha256HashValue, tree::{LeafContent, RegularFile, Stat}, }; - use std::{cell::RefCell, collections::BTreeMap, io::BufRead, io::Read, path::PathBuf}; + use std::{cell::RefCell, collections::BTreeMap, io::BufRead, path::PathBuf}; use super::*; diff --git a/crates/composefs-oci/src/layer.rs b/crates/composefs-oci/src/layer.rs new file mode 100644 index 00000000..cd966b0d --- /dev/null +++ b/crates/composefs-oci/src/layer.rs @@ -0,0 +1,94 @@ +//! Shared layer import logic for OCI container images. +//! +//! This module provides common functionality for importing OCI image layers +//! into a composefs repository, shared between the skopeo proxy path and +//! direct OCI layout import. + +use std::sync::Arc; + +use anyhow::{bail, Result}; +use async_compression::tokio::bufread::{GzipDecoder, ZstdDecoder}; +use oci_spec::image::MediaType; +use tokio::io::{AsyncBufRead, AsyncRead, AsyncWriteExt}; + +use composefs::fsverity::FsVerityHashValue; +use composefs::repository::Repository; + +use crate::skopeo::TAR_LAYER_CONTENT_TYPE; +use crate::tar::split_async; + +/// Check if a media type represents a tar-based layer. +pub fn is_tar_media_type(media_type: &MediaType) -> bool { + matches!( + media_type, + MediaType::ImageLayer + | MediaType::ImageLayerGzip + | MediaType::ImageLayerZstd + | MediaType::ImageLayerNonDistributable + | MediaType::ImageLayerNonDistributableGzip + | MediaType::ImageLayerNonDistributableZstd + ) +} + +/// Wrap an async reader with the appropriate decompressor for the media type. +/// +/// Returns a boxed reader that decompresses the stream if needed. +pub fn decompress_async<'a, R>( + reader: R, + media_type: &MediaType, +) -> Result> +where + R: AsyncRead + Unpin + Send + 'a, +{ + let buf = tokio::io::BufReader::new(reader); + let reader: Box = match media_type { + MediaType::ImageLayer | MediaType::ImageLayerNonDistributable => Box::new(buf), + MediaType::ImageLayerGzip | MediaType::ImageLayerNonDistributableGzip => { + Box::new(tokio::io::BufReader::new(GzipDecoder::new(buf))) + } + MediaType::ImageLayerZstd | MediaType::ImageLayerNonDistributableZstd => { + Box::new(tokio::io::BufReader::new(ZstdDecoder::new(buf))) + } + _ => bail!("Unsupported layer media type for decompression: {media_type}"), + }; + Ok(reader) +} + +/// Import a tar layer from an async reader into the repository. +/// +/// The reader should already be decompressed (use `decompress_async` first). +/// Returns the fs-verity object ID of the imported splitstream. +pub async fn import_tar_async( + repo: Arc>, + reader: R, +) -> Result +where + ObjectID: FsVerityHashValue, + R: AsyncBufRead + Unpin + Send, +{ + split_async(reader, repo, TAR_LAYER_CONTENT_TYPE).await +} + +/// Store raw bytes from an async reader as a repository object. +/// +/// Streams the raw bytes into a repository object without creating a splitstream. +/// Use this for non-tar blobs (OCI artifacts) where the caller will create +/// the splitstream wrapper. +/// +/// Returns (object_id, size) of the stored object. +pub async fn store_blob_async( + repo: &Repository, + mut reader: R, +) -> Result<(ObjectID, u64)> +where + ObjectID: FsVerityHashValue, + R: AsyncRead + Unpin, +{ + let tmpfile = repo.create_object_tmpfile()?; + let mut writer = tokio::fs::File::from(std::fs::File::from(tmpfile)); + let size = tokio::io::copy(&mut reader, &mut writer).await?; + writer.flush().await?; + let tmpfile = writer.into_std().await; + let object_id = repo.finalize_object_tmpfile(tmpfile, size)?; + Ok((object_id, size)) +} diff --git a/crates/composefs-oci/src/lib.rs b/crates/composefs-oci/src/lib.rs index bbb41c84..70dfdbd4 100644 --- a/crates/composefs-oci/src/lib.rs +++ b/crates/composefs-oci/src/lib.rs @@ -13,7 +13,9 @@ #![forbid(unsafe_code)] pub mod image; +pub mod layer; pub mod oci_image; +pub mod oci_layout; pub mod skopeo; pub mod tar; @@ -31,9 +33,9 @@ use crate::tar::get_entry; // Re-export key types for convenience pub use oci_image::{ - add_referrer, list_images, list_referrers, list_refs, remove_referrer, - remove_referrers_for_subject, resolve_ref, tag_image, untag_image, ImageInfo, OciImage, - OCI_REF_PREFIX, + add_referrer, layer_dumpfile, layer_info, layer_tar, list_images, list_referrers, list_refs, + remove_referrer, remove_referrers_for_subject, resolve_ref, tag_image, untag_image, ImageInfo, + LayerInfo, OciImage, SplitstreamInfo, OCI_REF_PREFIX, }; pub use skopeo::{pull_image, PullResult}; diff --git a/crates/composefs-oci/src/oci_image.rs b/crates/composefs-oci/src/oci_image.rs index 256168ec..f5eb3a3d 100644 --- a/crates/composefs-oci/src/oci_image.rs +++ b/crates/composefs-oci/src/oci_image.rs @@ -46,10 +46,12 @@ use containers_image_proxy::oci_spec::image::{ }; use rustix::fs::{openat, readlinkat, unlinkat, AtFlags, Dir, Mode, OFlags}; use rustix::io::Errno; +use serde::Serialize; use sha2::{Digest, Sha256}; use composefs::{fsverity::FsVerityHashValue, repository::Repository}; +use crate::layer::is_tar_media_type; use crate::skopeo::{OCI_BLOB_CONTENT_TYPE, OCI_CONFIG_CONTENT_TYPE, OCI_MANIFEST_CONTENT_TYPE}; /// Data and named refs from a splitstream with external object storage. @@ -99,6 +101,8 @@ pub struct OciImage { manifest: ImageManifest, /// The config digest (sha256 content hash) config_digest: String, + /// The fs-verity ID of the config splitstream + config_verity: ObjectID, /// The parsed OCI config (may be empty for artifacts) config: Option, /// Map from layer diff_id to its fs-verity object ID @@ -136,13 +140,14 @@ impl OciImage { let config_key = format!("config:{config_digest}"); let config_verity = named_refs .get(config_key.as_str()) - .context("Manifest missing config reference")?; + .context("Manifest missing config reference")? + .clone(); let config_id = crate::config_identifier(&config_digest); let (config_data, config_named_refs) = read_external_splitstream( repo, &config_id, - Some(config_verity), + Some(&config_verity), Some(OCI_CONFIG_CONTENT_TYPE), )?; @@ -181,6 +186,7 @@ impl OciImage { manifest_digest: manifest_digest.to_string(), manifest, config_digest, + config_verity, config, layer_refs, manifest_verity, @@ -334,6 +340,60 @@ impl OciImage { .and_then(|c| c.config().as_ref()) .and_then(|cfg| cfg.labels().as_ref()) } + + /// Reads the raw manifest JSON bytes from the repository. + /// + /// This retrieves the original manifest JSON as stored, which may differ + /// slightly from re-serializing the parsed manifest (e.g., whitespace). + pub fn read_manifest_json(&self, repo: &Repository) -> Result> { + let manifest_id = manifest_identifier(&self.manifest_digest); + let (data, _) = read_external_splitstream( + repo, + &manifest_id, + Some(&self.manifest_verity), + Some(OCI_MANIFEST_CONTENT_TYPE), + )?; + Ok(data) + } + + /// Reads the raw config JSON bytes from the repository. + /// + /// This retrieves the original config JSON as stored, which may differ + /// slightly from re-serializing the parsed config (e.g., whitespace). + pub fn read_config_json(&self, repo: &Repository) -> Result> { + let config_id = crate::config_identifier(&self.config_digest); + let (data, _) = read_external_splitstream( + repo, + &config_id, + Some(&self.config_verity), + Some(OCI_CONFIG_CONTENT_TYPE), + )?; + Ok(data) + } + + /// Returns the full inspect output as a JSON value. + /// + /// This includes the manifest, config, and referrers in a single JSON object. + /// The manifest and config are included as their original JSON structure. + pub fn inspect_json(&self, repo: &Repository) -> Result { + let manifest_json = self.read_manifest_json(repo)?; + let config_json = self.read_config_json(repo)?; + let referrers = list_referrers(repo, &self.manifest_digest)?; + + let manifest_value: serde_json::Value = serde_json::from_slice(&manifest_json)?; + let config_value: serde_json::Value = serde_json::from_slice(&config_json)?; + + let referrers_value: Vec = referrers + .iter() + .map(|(digest, _verity)| serde_json::json!({ "digest": digest })) + .collect(); + + Ok(serde_json::json!({ + "manifest": manifest_value, + "config": config_value, + "referrers": referrers_value, + })) + } } // ============================================================================= @@ -425,7 +485,8 @@ pub fn list_refs( /// FIXME change this to just have a struct of manifest+config JSON /// plus a few helper methods. We shouldn't be re-parsing created timestamp here /// callers should directly access that etc -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] pub struct ImageInfo { /// The tag/name of the image pub name: String, @@ -443,6 +504,8 @@ pub struct ImageInfo { pub sealed: bool, /// Number of layers/blobs pub layer_count: usize, + /// Number of OCI referrers (signatures, attestations, etc.) + pub referrer_count: usize, } /// Lists all tagged images with their metadata. @@ -454,6 +517,7 @@ pub fn list_images( for (name, digest) in list_refs(repo)? { match OciImage::open(repo, &digest, None) { Ok(img) => { + let referrer_count = list_referrers(repo, &digest).map(|r| r.len()).unwrap_or(0); images.push(ImageInfo { name, manifest_digest: digest, @@ -463,6 +527,7 @@ pub fn list_images( created: img.created().map(String::from), sealed: img.is_sealed(), layer_count: img.layer_descriptors().len(), + referrer_count, }); } Err(e) => { @@ -543,19 +608,6 @@ pub fn manifest_identifier(digest: &str) -> String { format!("oci-manifest-{digest}") } -/// Returns true if this is a tar-based layer media type. -pub(crate) fn is_tar_media_type(media_type: &MediaType) -> bool { - matches!( - media_type, - MediaType::ImageLayer - | MediaType::ImageLayerGzip - | MediaType::ImageLayerZstd - | MediaType::ImageLayerNonDistributable - | MediaType::ImageLayerNonDistributableGzip - | MediaType::ImageLayerNonDistributableZstd - ) -} - /// Returns the reference path for an OCI name. fn oci_ref_path(name: &str) -> String { format!("{OCI_REF_PREFIX}{}", encode_tag(name)) @@ -857,6 +909,142 @@ pub fn cleanup_dangling_referrers( Ok(removed) } +// ============================================================================= +// Layer Inspection +// ============================================================================= + +/// Metadata about a layer stored in the repository. +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct LayerInfo { + /// The layer diff_id (sha256 hash of uncompressed content) + pub diff_id: String, + /// The fs-verity hash of the layer splitstream + pub verity: String, + /// Size of the uncompressed tar layer in bytes + pub size: u64, + /// Number of files/entries in the layer + pub entry_count: usize, + /// Splitstream metadata + pub splitstream: SplitstreamInfo, +} + +/// Metadata about the splitstream representation of a layer. +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct SplitstreamInfo { + /// Number of external object references (large files stored separately) + pub external_objects: usize, + /// Total size of external objects in bytes + pub external_size: u64, + /// Size of inline data in bytes (small files + tar headers) + pub inline_size: u64, +} + +/// Opens a layer by its diff_id and returns metadata about it. +/// +/// The diff_id should be in the `sha256:...` format used by OCI. +pub fn layer_info( + repo: &Repository, + diff_id: &str, +) -> Result { + let content_id = crate::layer_identifier(diff_id); + let verity = repo + .has_stream(&content_id)? + .with_context(|| format!("Layer {diff_id} not found"))?; + + let mut stream = repo.open_stream( + &content_id, + Some(&verity), + Some(crate::skopeo::TAR_LAYER_CONTENT_TYPE), + )?; + + // Get the total size from the splitstream header (this is the merged/tar size) + let size = stream.total_size; + + // Count external object references (this doesn't consume the stream) + let mut external_objects = 0usize; + stream.get_object_refs(|_| external_objects += 1)?; + + // Iterate entries and gather sizes + let mut entry_count = 0usize; + let mut external_size = 0u64; + + while let Some(entry) = crate::tar::get_entry(&mut stream)? { + entry_count += 1; + if let crate::tar::TarItem::Leaf(composefs::tree::LeafContent::Regular( + composefs::tree::RegularFile::External(_, file_size), + )) = entry.item + { + external_size += file_size; + } + } + + // inline_size includes tar headers, small files, and other metadata + let inline_size = size.saturating_sub(external_size); + + Ok(LayerInfo { + diff_id: diff_id.to_string(), + verity: verity.to_hex(), + size, + entry_count, + splitstream: SplitstreamInfo { + external_objects, + external_size, + inline_size, + }, + }) +} + +/// Writes the layer contents in composefs dumpfile format. +/// +/// Each entry is written on its own line in the composefs dumpfile format, +/// which includes path, size, mode, ownership, timestamps, and content references. +pub fn layer_dumpfile( + repo: &Repository, + diff_id: &str, + output: &mut impl std::io::Write, +) -> Result<()> { + let content_id = crate::layer_identifier(diff_id); + let verity = repo + .has_stream(&content_id)? + .with_context(|| format!("Layer {diff_id} not found"))?; + + let mut stream = repo.open_stream( + &content_id, + Some(&verity), + Some(crate::skopeo::TAR_LAYER_CONTENT_TYPE), + )?; + + while let Some(entry) = crate::tar::get_entry(&mut stream)? { + writeln!(output, "{entry}")?; + } + + Ok(()) +} + +/// Reconstitutes and writes the original tar layer. +/// +/// This merges the splitstream back into the original tar format by +/// combining inline data with external object references. +pub fn layer_tar( + repo: &Repository, + diff_id: &str, + output: &mut impl std::io::Write, +) -> Result<()> { + let content_id = crate::layer_identifier(diff_id); + let verity = repo + .has_stream(&content_id)? + .with_context(|| format!("Layer {diff_id} not found"))?; + + repo.merge_splitstream( + &content_id, + Some(&verity), + Some(crate::skopeo::TAR_LAYER_CONTENT_TYPE), + output, + ) +} + #[cfg(test)] mod test { use super::*; diff --git a/crates/composefs-oci/src/oci_layout.rs b/crates/composefs-oci/src/oci_layout.rs new file mode 100644 index 00000000..2f31beca --- /dev/null +++ b/crates/composefs-oci/src/oci_layout.rs @@ -0,0 +1,473 @@ +//! Direct OCI layout directory import without the skopeo proxy. +//! +//! This module provides a fast path for importing images from local OCI layout +//! directories (the `oci:` transport). Instead of going through the +//! containers-image-proxy (which spawns skopeo as a subprocess), we read the +//! OCI layout directly using the `ocidir` crate. +//! +//! This is significantly faster for local imports since: +//! - No subprocess overhead from skopeo +//! - No IPC/pipe overhead for blob streaming +//! - Direct file I/O instead of proxy protocol parsing +//! +//! The import produces identical results to the proxy path: the same +//! splitstream format with the same content identifiers. + +use std::cmp::Reverse; +use std::collections::HashMap; +use std::fs::File; +use std::io::Read; +use std::path::Path; +use std::sync::Arc; +use std::thread::available_parallelism; + +use anyhow::{bail, Context, Result}; +use cap_std_ext::cap_std; +use fn_error_context::context; +use oci_spec::image::{Arch, Descriptor, ImageConfiguration, ImageManifest, MediaType, Os}; +use ocidir::OciDir; +use tokio::sync::Semaphore; +use tokio::task::JoinSet; +use tracing::debug; + +use composefs::fsverity::FsVerityHashValue; +use composefs::repository::Repository; + +use crate::layer::{decompress_async, import_tar_async, is_tar_media_type, store_blob_async}; +use crate::skopeo::OCI_BLOB_CONTENT_TYPE; +use crate::oci_image::manifest_identifier; +use crate::skopeo::{OCI_CONFIG_CONTENT_TYPE, OCI_MANIFEST_CONTENT_TYPE}; +use crate::{config_identifier, layer_identifier, oci_image::tag_image, PullResult}; + +/// Parse an OCI layout reference like "/path/to/dir:tag" or "/path/to/dir". +/// +/// Returns (path, optional_tag). +fn parse_oci_layout_ref(imgref: &str) -> (&str, Option<&str>) { + // The format is: path[:tag] + // We need to be careful: paths can contain colons (on Windows, or weird Unix paths). + // The convention is that if the last colon is after the last slash, it's a tag separator. + if let Some(last_slash) = imgref.rfind('/') { + if let Some(colon_pos) = imgref[last_slash..].rfind(':') { + let absolute_colon = last_slash + colon_pos; + let (path, tag_with_colon) = imgref.split_at(absolute_colon); + return (path, Some(&tag_with_colon[1..])); + } + } else if let Some(colon_pos) = imgref.rfind(':') { + // No slash at all, but there's a colon + let (path, tag_with_colon) = imgref.split_at(colon_pos); + return (path, Some(&tag_with_colon[1..])); + } + (imgref, None) +} + +/// Read a blob from an OCI layout as bytes. +fn read_blob_bytes(ocidir: &OciDir, desc: &Descriptor) -> Result> { + let mut file = ocidir.read_blob(desc)?; + let mut bytes = Vec::with_capacity(desc.size() as usize); + file.read_to_end(&mut bytes)?; + Ok(bytes) +} + +/// Import an image from a local OCI layout directory. +/// +/// This is the fast path for `oci:` transport references. It reads the OCI +/// layout directly without going through skopeo. +#[context("Importing OCI layout from {}", layout_path.display())] +pub async fn import_oci_layout( + repo: &Arc>, + layout_path: &Path, + layout_tag: Option<&str>, + reference: Option<&str>, +) -> Result> { + // Open the OCI layout directory + let dir = cap_std::fs::Dir::open_ambient_dir(layout_path, cap_std::ambient_authority()) + .with_context(|| format!("Opening OCI layout directory {}", layout_path.display()))?; + let ocidir = OciDir::open(dir).context("Opening OCI directory")?; + + // Resolve the manifest descriptor from the index + let manifest_descriptor = resolve_manifest_descriptor(&ocidir, layout_tag) + .context("Resolving manifest from index")?; + + // Reject nested indices - they're allowed by spec but extremely rare in practice + if *manifest_descriptor.media_type() == MediaType::ImageIndex { + bail!( + "Nested image index not supported; the selected manifest points to another index \ + rather than an image manifest" + ); + } + + let manifest_digest = manifest_descriptor.digest().to_string(); + + let raw_manifest = + read_blob_bytes(&ocidir, &manifest_descriptor).context("Reading manifest blob")?; + let manifest = ImageManifest::from_reader(&raw_manifest[..]).context("Parsing manifest")?; + + // Import config and layers + let config_descriptor = manifest.config(); + let layers = manifest.layers(); + let (config_digest, config_verity, layer_verities) = + import_config_and_layers(repo, &ocidir, layers, config_descriptor) + .await + .with_context(|| format!("Failed to import config {}", config_descriptor.digest()))?; + + // Store the manifest + let manifest_content_id = manifest_identifier(&manifest_digest); + let manifest_verity = if let Some(verity) = repo.has_stream(&manifest_content_id)? { + debug!("Already have manifest {manifest_digest}"); + verity + } else { + debug!("Storing manifest {manifest_digest}"); + + let mut splitstream = repo.create_stream(OCI_MANIFEST_CONTENT_TYPE); + + let config_key = format!("config:{}", config_descriptor.digest()); + splitstream.add_named_stream_ref(&config_key, &config_verity); + + for (diff_id, verity) in &layer_verities { + splitstream.add_named_stream_ref(diff_id, verity); + } + + splitstream.write_external(&raw_manifest)?; + repo.write_stream(splitstream, &manifest_content_id, None)? + }; + + // Tag if requested + if let Some(name) = reference { + tag_image(repo, &manifest_digest, name)?; + } + + Ok(PullResult { + manifest_digest, + manifest_verity, + config_digest, + config_verity, + }) +} + +/// Resolve the manifest descriptor from an OCI layout's index. +/// +/// If `tag` is provided, looks for a manifest with that annotation. +/// Otherwise, selects the native platform or the only manifest available. +fn resolve_manifest_descriptor(ocidir: &OciDir, tag: Option<&str>) -> Result { + let index = ocidir.read_index().context("Reading index.json")?; + let manifests = index.manifests(); + + if manifests.is_empty() { + bail!("OCI layout index contains no manifests"); + } + + // If a tag is specified, look for it in annotations + if let Some(tag) = tag { + for desc in manifests { + if let Some(annotations) = desc.annotations() { + if let Some(ref_tag) = annotations.get("org.opencontainers.image.ref.name") { + if ref_tag == tag { + return Ok(desc.clone()); + } + } + } + } + bail!("Tag '{tag}' not found in OCI layout index"); + } + + // No tag specified - try to find the native platform manifest + let native_arch = Arch::default(); + let native_os = Os::default(); + + for desc in manifests { + if let Some(platform) = desc.platform() { + if *platform.architecture() == native_arch && *platform.os() == native_os { + return Ok(desc.clone()); + } + } + } + + let oci_arch = native_arch.to_string(); + let oci_os = native_os.to_string(); + + // Fall back to the first manifest if there's only one + if manifests.len() == 1 { + return Ok(manifests[0].clone()); + } + + bail!( + "Could not find manifest for native platform ({oci_os}/{oci_arch}) in OCI layout. \ + Available manifests: {}", + manifests + .iter() + .filter_map(|d| d.platform().as_ref().map(|p| format!( + "{}/{}", + p.os(), + p.architecture() + ))) + .collect::>() + .join(", ") + ); +} + +/// Import config and all layers from an OCI layout. +async fn import_config_and_layers( + repo: &Arc>, + ocidir: &OciDir, + manifest_layers: &[Descriptor], + config_descriptor: &Descriptor, +) -> Result<(String, ObjectID, HashMap)> { + let config_digest: &str = config_descriptor.digest().as_ref(); + let content_id = config_identifier(config_digest); + + if let Some(config_id) = repo.has_stream(&content_id)? { + // Already have this config - read layer refs from it + debug!("Already have container config {config_digest}"); + + let stream = + repo.open_stream(&content_id, Some(&config_id), Some(OCI_CONFIG_CONTENT_TYPE))?; + let layer_refs: HashMap = stream + .into_named_refs() + .into_iter() + .map(|(k, v)| (k.to_string(), v)) + .collect(); + + return Ok((config_digest.to_string(), config_id, layer_refs)); + } + + // Fetch config + debug!("Reading config {config_digest}"); + let raw_config = read_blob_bytes(ocidir, config_descriptor).context("Reading config blob")?; + + // Parse config to get diff_ids (if this is a container image) + let is_image_config = *config_descriptor.media_type() == MediaType::ImageConfig; + let diff_ids: Vec = if is_image_config { + let config = ImageConfiguration::from_reader(&raw_config[..])?; + config.rootfs().diff_ids().to_vec() + } else { + // Artifact - use manifest layer digests + manifest_layers + .iter() + .map(|d| d.digest().to_string()) + .collect() + }; + + // Sort layers by size for parallel fetching (largest first) + let mut layers: Vec<_> = manifest_layers.iter().zip(&diff_ids).collect(); + layers.sort_by_key(|(desc, _)| Reverse(desc.size())); + + let threads = available_parallelism()?; + let sem = Arc::new(Semaphore::new(threads.into())); + let mut layer_tasks = JoinSet::new(); + + for (idx, (descriptor, diff_id)) in layers.iter().enumerate() { + let diff_id = diff_id.to_string(); + let repo = Arc::clone(repo); + let permit = Arc::clone(&sem).acquire_owned().await?; + + // Open a file handle to the layer blob - we'll stream through it + let layer_file = ocidir + .read_blob(descriptor) + .with_context(|| format!("Opening layer blob {}", descriptor.digest()))?; + + let media_type = descriptor.media_type().clone(); + + layer_tasks.spawn(async move { + let _permit = permit; + let verity = import_layer_from_file(&repo, &diff_id, layer_file, &media_type).await?; + anyhow::Ok((idx, diff_id, verity)) + }); + } + + // Collect results and sort by index + let mut results: Vec<_> = layer_tasks + .join_all() + .await + .into_iter() + .collect::>()?; + results.sort_by_key(|(idx, _, _)| *idx); + + // Build config splitstream with layer refs + let mut splitstream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE); + let mut layer_refs = HashMap::new(); + for (_, diff_id, verity) in results { + splitstream.add_named_stream_ref(&diff_id, &verity); + layer_refs.insert(diff_id, verity); + } + + splitstream.write_external(&raw_config)?; + let config_id = repo.write_stream(splitstream, &content_id, None)?; + + Ok((config_digest.to_string(), config_id, layer_refs)) +} + +/// Import a single layer by streaming from a file handle. +/// +/// This avoids buffering entire layers in memory by streaming through +/// the file handle directly. +async fn import_layer_from_file( + repo: &Arc>, + diff_id: &str, + layer_file: File, + media_type: &MediaType, +) -> Result { + let content_id = layer_identifier(diff_id); + + if let Some(layer_id) = repo.has_stream(&content_id)? { + debug!("Already have layer {diff_id}"); + return Ok(layer_id); + } + + debug!("Importing layer {diff_id}"); + + // Convert std::fs::File to tokio::fs::File for async I/O + let async_file = tokio::fs::File::from_std(layer_file); + + let object_id = if is_tar_media_type(media_type) { + let reader = decompress_async(async_file, media_type)?; + import_tar_async(repo.clone(), reader).await? + } else { + // Non-tar blob: store as object and create splitstream wrapper + let (object_id, size) = store_blob_async(repo, async_file).await?; + let mut stream = repo.create_stream(OCI_BLOB_CONTENT_TYPE); + stream.add_external_size(size); + stream.write_reference(object_id)?; + stream.done()? + }; + + // Register the stream with its content identifier + repo.register_stream(&object_id, &content_id, None).await?; + + Ok(object_id) +} + +/// Check if an image reference is an OCI layout path. +/// +/// Returns the path portion if this is an `oci:` reference. +pub fn parse_oci_transport(imgref: &str) -> Option<&str> { + imgref.strip_prefix("oci:") +} + +/// Pull from an OCI layout if the reference uses the `oci:` transport. +/// +/// Returns `None` if this is not an OCI transport reference. +pub async fn try_pull_oci_layout( + repo: &Arc>, + imgref: &str, + reference: Option<&str>, +) -> Result>> { + let Some(oci_path) = parse_oci_transport(imgref) else { + return Ok(None); + }; + + let (path_str, layout_tag) = parse_oci_layout_ref(oci_path); + let layout_path = Path::new(path_str); + + let result = import_oci_layout(repo, layout_path, layout_tag, reference).await?; + Ok(Some(result)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_oci_layout_ref() { + let cases = [ + ("/path/to/oci", ("/path/to/oci", None)), + ("/path/to/oci:latest", ("/path/to/oci", Some("latest"))), + ("/path/to/oci:v1.0.0", ("/path/to/oci", Some("v1.0.0"))), + ("./local/oci:mytag", ("./local/oci", Some("mytag"))), + ("ocidir:latest", ("ocidir", Some("latest"))), + ("ocidir", ("ocidir", None)), + ]; + for (input, expected) in cases { + assert_eq!(parse_oci_layout_ref(input), expected, "input: {input}"); + } + } + + #[test] + fn test_parse_oci_transport() { + let cases = [ + ("oci:/path/to/dir", Some("/path/to/dir")), + ("oci:/path/to/dir:tag", Some("/path/to/dir:tag")), + ("docker://image", None), + ("containers-storage:image", None), + ]; + for (input, expected) in cases { + assert_eq!(parse_oci_transport(input), expected, "input: {input}"); + } + } + + #[tokio::test] + async fn test_nested_index_rejected() { + use composefs::fsverity::Sha256HashValue; + use oci_spec::image::{DescriptorBuilder, ImageIndexBuilder, OciLayoutBuilder}; + use sha2::Digest; + + // Create a temporary OCI layout with a nested index + let tempdir = tempfile::tempdir().unwrap(); + let layout_path = tempdir.path(); + + // Create oci-layout file + let oci_layout = OciLayoutBuilder::default() + .image_layout_version("1.0.0".to_string()) + .build() + .unwrap(); + let oci_layout_path = layout_path.join("oci-layout"); + std::fs::write(&oci_layout_path, oci_layout.to_string().unwrap()).unwrap(); + + // Create blobs directory + let blobs_dir = layout_path.join("blobs/sha256"); + std::fs::create_dir_all(&blobs_dir).unwrap(); + + // Create a nested index (the thing we want to reject) + let nested_index = ImageIndexBuilder::default() + .schema_version(2u32) + .media_type(MediaType::ImageIndex) + .manifests(vec![]) + .build() + .unwrap(); + let nested_index_json = nested_index.to_string().unwrap(); + let nested_index_digest = format!( + "sha256:{}", + hex::encode(sha2::Sha256::digest(nested_index_json.as_bytes())) + ); + let nested_blob_path = blobs_dir.join(&nested_index_digest[7..]); + std::fs::write(&nested_blob_path, &nested_index_json).unwrap(); + + // Create the top-level index that points to the nested index + let nested_desc = DescriptorBuilder::default() + .media_type(MediaType::ImageIndex) + .digest( + nested_index_digest + .parse::() + .unwrap(), + ) + .size(nested_index_json.len() as u64) + .build() + .unwrap(); + + let top_index = ImageIndexBuilder::default() + .schema_version(2u32) + .media_type(MediaType::ImageIndex) + .manifests(vec![nested_desc]) + .build() + .unwrap(); + let index_path = layout_path.join("index.json"); + std::fs::write(&index_path, top_index.to_string().unwrap()).unwrap(); + + // Try to import - should fail with nested index error + let repo_dir = tempfile::tempdir().unwrap(); + let repo = std::sync::Arc::new( + composefs::repository::Repository::::open_path( + rustix::fs::CWD, + repo_dir.path(), + ) + .unwrap(), + ); + + let result = import_oci_layout(&repo, layout_path, None, None).await; + let err = result.expect_err("should reject nested index"); + let err_msg = format!("{err:#}"); + assert!( + err_msg.contains("Nested image index not supported"), + "unexpected error: {err_msg}" + ); + } +} diff --git a/crates/composefs-oci/src/skopeo.rs b/crates/composefs-oci/src/skopeo.rs index d616a864..8e4b4e5a 100644 --- a/crates/composefs-oci/src/skopeo.rs +++ b/crates/composefs-oci/src/skopeo.rs @@ -13,7 +13,6 @@ use std::{cmp::Reverse, process::Command, thread::available_parallelism}; use std::{iter::zip, sync::Arc}; use anyhow::{Context, Result}; -use async_compression::tokio::bufread::{GzipDecoder, ZstdDecoder}; use containers_image_proxy::{ ConvertedLayerInfo, ImageProxy, ImageProxyConfig, OpenedImage, Transport, }; @@ -22,7 +21,7 @@ use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use oci_spec::image::{Descriptor, ImageConfiguration, MediaType}; use rustix::process::geteuid; use tokio::{ - io::{AsyncReadExt, AsyncWriteExt, BufReader}, + io::AsyncReadExt, sync::Semaphore, task::JoinSet, }; @@ -30,9 +29,10 @@ use tokio::{ use composefs::{fsverity::FsVerityHashValue, repository::Repository}; use crate::{ - config_identifier, layer_identifier, - oci_image::{is_tar_media_type, manifest_identifier, tag_image}, - tar::split_async, + config_identifier, + layer::{decompress_async, import_tar_async, is_tar_media_type, store_blob_async}, + layer_identifier, + oci_image::{manifest_identifier, tag_image}, ContentAndVerity, }; @@ -192,40 +192,16 @@ impl ImageOp { let media_type = descriptor.media_type(); let object_id = if is_tar_media_type(media_type) { // Tar layers: decompress and split into a splitstream - let reader: Box = match media_type { - MediaType::ImageLayer | MediaType::ImageLayerNonDistributable => { - Box::new(BufReader::new(progress)) - } - MediaType::ImageLayerGzip | MediaType::ImageLayerNonDistributableGzip => { - Box::new(BufReader::new(GzipDecoder::new(BufReader::new(progress)))) - } - MediaType::ImageLayerZstd | MediaType::ImageLayerNonDistributableZstd => { - Box::new(BufReader::new(ZstdDecoder::new(BufReader::new(progress)))) - } - _ => unreachable!("is_tar_media_type returned true"), - }; - split_async(reader, self.repo.clone(), TAR_LAYER_CONTENT_TYPE).await? + let reader = decompress_async(progress, media_type)?; + import_tar_async(self.repo.clone(), reader).await? } else { - // Non-tar layers (OCI artifacts like SBOMs, disk images, - // etc.): stream the raw bytes into a repository object and - // create a splitstream with a single external reference. - // This avoids buffering arbitrarily large blobs in memory - // and lets callers get an fd to the object directly via - // open_object(). - let tmpfile = self.repo.create_object_tmpfile()?; - let mut writer = tokio::fs::File::from(std::fs::File::from(tmpfile)); - let mut reader = progress; - let size = tokio::io::copy(&mut reader, &mut writer).await?; - writer.flush().await?; - let tmpfile = writer.into_std().await; + // Non-tar layers (OCI artifacts): stream raw bytes to object store + let (object_id, size) = store_blob_async(&self.repo, progress).await?; driver.await?; - let object_id = self.repo.finalize_object_tmpfile(tmpfile, size)?; - + // Create splitstream with external reference and register it let mut stream = self.repo.create_stream(OCI_BLOB_CONTENT_TYPE); stream.add_external_size(size); stream.write_reference(object_id)?; - // write_stream handles both object storage and stream - // registration, so we return directly. return self.repo.write_stream(stream, &content_id, None); }; @@ -420,12 +396,21 @@ impl ImageOp { /// /// Note: For backward compatibility, use `.into_config()` on the result to get /// the (config_digest, config_verity) tuple that was previously returned. +/// +/// For `oci:` transport (local OCI layout directories), this uses a fast path +/// that reads the layout directly without going through the skopeo proxy. pub async fn pull_image( repo: &Arc>, imgref: &str, reference: Option<&str>, img_proxy_config: Option, ) -> Result> { + // Fast path: local OCI layout directories + if let Some(result) = crate::oci_layout::try_pull_oci_layout(repo, imgref, reference).await? { + return Ok(result); + } + + // Standard path: use skopeo proxy for other transports let op = Arc::new(ImageOp::new(repo, imgref, img_proxy_config).await?); let result = op .pull() diff --git a/crates/integration-tests/Cargo.toml b/crates/integration-tests/Cargo.toml index 21602666..01b24fc9 100644 --- a/crates/integration-tests/Cargo.toml +++ b/crates/integration-tests/Cargo.toml @@ -10,12 +10,17 @@ path = "src/main.rs" [dependencies] anyhow = "1" -xshell = "0.2" +cap-std-ext = "4.0" +composefs = { path = "../composefs" } libtest-mimic = "0.8" linkme = "0.3" +ocidir = "0.6" paste = "1" rustix = { version = "1.0.0", default-features = false, features = ["process"] } +serde_json = "1.0" +tar = "0.4" tempfile = "3" +xshell = "0.2" [lints] workspace = true diff --git a/crates/integration-tests/src/tests/cli.rs b/crates/integration-tests/src/tests/cli.rs index a133218d..eb954f26 100644 --- a/crates/integration-tests/src/tests/cli.rs +++ b/crates/integration-tests/src/tests/cli.rs @@ -160,3 +160,297 @@ fn test_gc_dry_run() -> Result<()> { Ok(()) } integration_test!(test_gc_dry_run); + +fn test_oci_images_empty_repo() -> Result<()> { + let sh = Shell::new()?; + let cfsctl = cfsctl()?; + let repo_dir = tempfile::tempdir()?; + let repo = repo_dir.path(); + + let output = cmd!(sh, "{cfsctl} --insecure --repo {repo} oci images").read()?; + assert!( + output.contains("No images found"), + "expected 'No images found', got: {output}" + ); + Ok(()) +} +integration_test!(test_oci_images_empty_repo); + +fn test_oci_images_json_empty_repo() -> Result<()> { + let sh = Shell::new()?; + let cfsctl = cfsctl()?; + let repo_dir = tempfile::tempdir()?; + let repo = repo_dir.path(); + + let output = cmd!(sh, "{cfsctl} --insecure --repo {repo} oci images --json").read()?; + // Empty JSON array + let parsed: serde_json::Value = serde_json::from_str(&output)?; + assert!( + parsed.as_array().map(|a| a.is_empty()).unwrap_or(false), + "expected empty JSON array, got: {output}" + ); + Ok(()) +} +integration_test!(test_oci_images_json_empty_repo); + +/// Creates a minimal OCI image layout directory for testing using the ocidir crate. +/// +/// Returns the path to the OCI layout directory. +fn create_oci_layout(parent: &std::path::Path) -> Result { + use cap_std_ext::cap_std; + use ocidir::oci_spec::image::{ + ImageConfigurationBuilder, Platform, PlatformBuilder, RootFsBuilder, + }; + + let oci_dir = parent.join("oci-image"); + std::fs::create_dir_all(&oci_dir)?; + + let dir = cap_std::fs::Dir::open_ambient_dir(&oci_dir, cap_std::ambient_authority())?; + let ocidir = ocidir::OciDir::ensure(dir)?; + + // Create a new empty manifest + let mut manifest = ocidir.new_empty_manifest()?.build()?; + + // Create config with architecture and OS + let rootfs = RootFsBuilder::default() + .typ("layers") + .diff_ids(Vec::::new()) + .build()?; + let mut config = ImageConfigurationBuilder::default() + .architecture("amd64") + .os("linux") + .rootfs(rootfs) + .build()?; + + // Create a simple layer with one file + let mut layer_builder = ocidir.create_layer(None)?; + { + let data = b"hello from test layer\n"; + let mut header = tar::Header::new_gnu(); + header.set_size(data.len() as u64); + header.set_mode(0o644); + header.set_uid(0); + header.set_gid(0); + header.set_mtime(1234567890); + header.set_cksum(); + layer_builder.append_data(&mut header, "hello.txt", &data[..])?; + } + let layer = layer_builder.into_inner()?.complete()?; + + // Push the layer to manifest and config + ocidir.push_layer(&mut manifest, &mut config, layer, "test layer", None); + + // Create platform for the manifest + let platform: Platform = PlatformBuilder::default() + .architecture("amd64") + .os("linux") + .build()?; + + // Insert manifest and config into the OCI directory + ocidir.insert_manifest_and_config(manifest, config, None, platform)?; + + Ok(oci_dir) +} + +fn test_oci_pull_and_inspect() -> Result<()> { + let sh = Shell::new()?; + let cfsctl = cfsctl()?; + let repo_dir = tempfile::tempdir()?; + let repo = repo_dir.path(); + let fixture_dir = tempfile::tempdir()?; + let oci_layout = create_oci_layout(fixture_dir.path())?; + + // Pull from OCI layout + let pull_output = cmd!( + sh, + "{cfsctl} --insecure --repo {repo} oci pull oci:{oci_layout} test-image" + ) + .read()?; + assert!( + pull_output.contains("manifest sha256:"), + "expected manifest digest in output, got: {pull_output}" + ); + assert!( + pull_output.contains("tagged") && pull_output.contains("test-image"), + "expected tagged confirmation, got: {pull_output}" + ); + + // List images + let list_output = cmd!(sh, "{cfsctl} --insecure --repo {repo} oci images").read()?; + assert!( + list_output.contains("test-image"), + "expected test-image in list, got: {list_output}" + ); + + // List images as JSON + let json_output = cmd!(sh, "{cfsctl} --insecure --repo {repo} oci images --json").read()?; + let images: serde_json::Value = serde_json::from_str(&json_output)?; + let arr = images.as_array().expect("expected array"); + assert_eq!(arr.len(), 1, "expected 1 image"); + assert_eq!(arr[0]["name"], "test-image"); + assert_eq!(arr[0]["architecture"], "amd64"); + + // Inspect the image + let inspect_output = cmd!( + sh, + "{cfsctl} --insecure --repo {repo} oci inspect test-image" + ) + .read()?; + let inspect: serde_json::Value = serde_json::from_str(&inspect_output)?; + assert!( + inspect.get("manifest").is_some(), + "expected manifest in inspect output" + ); + assert!( + inspect.get("config").is_some(), + "expected config in inspect output" + ); + assert!( + inspect.get("referrers").is_some(), + "expected referrers in inspect output" + ); + + // Inspect --manifest + let manifest_output = cmd!( + sh, + "{cfsctl} --insecure --repo {repo} oci inspect test-image --manifest" + ) + .read()?; + let manifest: serde_json::Value = serde_json::from_str(&manifest_output)?; + assert_eq!(manifest["schemaVersion"], 2); + assert!(manifest.get("config").is_some()); + assert!(manifest.get("layers").is_some()); + + // Inspect --config + let config_output = cmd!( + sh, + "{cfsctl} --insecure --repo {repo} oci inspect test-image --config" + ) + .read()?; + let config: serde_json::Value = serde_json::from_str(&config_output)?; + assert_eq!(config["architecture"], "amd64"); + assert_eq!(config["os"], "linux"); + + Ok(()) +} +integration_test!(test_oci_pull_and_inspect); + +fn test_oci_layer_inspect() -> Result<()> { + use composefs::dumpfile_parse::{Entry, Item}; + use std::io::Read; + use std::path::Path; + + let sh = Shell::new()?; + let cfsctl = cfsctl()?; + let repo_dir = tempfile::tempdir()?; + let repo = repo_dir.path(); + let fixture_dir = tempfile::tempdir()?; + let oci_layout = create_oci_layout(fixture_dir.path())?; + + // Pull from OCI layout + cmd!( + sh, + "{cfsctl} --insecure --repo {repo} oci pull oci:{oci_layout} test-image" + ) + .read()?; + + // Get the layer diff_id from the config + let config_output = cmd!( + sh, + "{cfsctl} --insecure --repo {repo} oci inspect test-image --config" + ) + .read()?; + let config: serde_json::Value = serde_json::from_str(&config_output)?; + let diff_ids = config["rootfs"]["diff_ids"] + .as_array() + .expect("expected diff_ids array"); + assert_eq!(diff_ids.len(), 1, "expected 1 layer"); + let layer_id = diff_ids[0].as_str().expect("expected string"); + + // Test --json output + let json_output = cmd!( + sh, + "{cfsctl} --insecure --repo {repo} oci layer {layer_id} --json" + ) + .read()?; + let info: serde_json::Value = serde_json::from_str(&json_output)?; + assert_eq!(info["diffId"], layer_id); + assert!(info["verity"].as_str().is_some(), "expected verity hash"); + assert!(info["size"].as_u64().unwrap() > 0, "expected non-zero size"); + assert_eq!( + info["entryCount"].as_u64().unwrap(), + 1, + "expected exactly 1 entry (hello.txt)" + ); + // Check splitstream metadata + let splitstream = info + .get("splitstream") + .expect("expected splitstream metadata"); + assert!( + splitstream["externalObjects"].as_u64().is_some(), + "expected externalObjects" + ); + assert!( + splitstream["externalSize"].as_u64().is_some(), + "expected externalSize" + ); + assert!( + splitstream["inlineSize"].as_u64().is_some(), + "expected inlineSize" + ); + + // Test --dumpfile output - parse each line with the dumpfile parser + let dumpfile_output = cmd!( + sh, + "{cfsctl} --insecure --repo {repo} oci layer {layer_id} --dumpfile" + ) + .read()?; + + let mut found_hello_txt = false; + for line in dumpfile_output.lines() { + if line.trim().is_empty() { + continue; + } + let entry = Entry::parse(line) + .unwrap_or_else(|e| panic!("failed to parse dumpfile line '{line}': {e}")); + + if entry.path.as_ref() == Path::new("/hello.txt") { + found_hello_txt = true; + // Verify it's a regular file with inline content + match &entry.item { + Item::RegularInline { content, .. } => { + assert_eq!( + content.as_ref(), + b"hello from test layer\n", + "hello.txt content mismatch" + ); + } + other => panic!("expected RegularInline for hello.txt, got {:?}", other), + } + assert_eq!(entry.uid, 0, "expected uid 0"); + assert_eq!(entry.gid, 0, "expected gid 0"); + // Mode 0o644 + regular file bit (0o100000) = 0o100644 = 33188 + assert_eq!(entry.mode, 0o100644, "expected mode 0o100644"); + } + } + assert!(found_hello_txt, "expected to find /hello.txt in dumpfile"); + + // Test raw tar output - parse as actual tar and verify contents + let tar_output = cmd!(sh, "{cfsctl} --insecure --repo {repo} oci layer {layer_id}").output()?; + let mut archive = tar::Archive::new(tar_output.stdout.as_slice()); + let mut found_in_tar = false; + for entry in archive.entries()? { + let mut entry = entry?; + let path = entry.path()?; + if path.as_ref() == Path::new("hello.txt") { + found_in_tar = true; + let mut content = String::new(); + entry.read_to_string(&mut content)?; + assert_eq!(content, "hello from test layer\n", "tar content mismatch"); + } + } + assert!(found_in_tar, "expected to find hello.txt in tar output"); + + Ok(()) +} +integration_test!(test_oci_layer_inspect);