diff --git a/Cargo.toml b/Cargo.toml index eea0734..1bec1f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libcramjam" -version = "0.7.0" +version = "0.8.0" edition = "2021" license = "MIT" description = "Compression library combining a plethora of algorithms in a similar as possible API" @@ -33,16 +33,16 @@ isal-shared = ["dep:isal-rs", "isal-rs/shared"] use-system-isal = ["dep:isal-rs", "isal-rs/use-system-isal"] gzip = ["gzip-static"] -gzip-static = ["dep:libdeflater", "dep:libdeflate-sys", "dep:flate2"] -gzip-shared = ["dep:libdeflater", "dep:libdeflate-sys", "dep:flate2", "libdeflate-sys/dynamic"] +gzip-static = ["dep:libdeflate-sys", "dep:flate2", "deflate-static"] +gzip-shared = ["dep:libdeflate-sys", "dep:flate2", "libdeflate-sys/dynamic", "deflate-shared"] zlib = ["zlib-static"] -zlib-static = ["dep:libdeflater", "dep:libdeflate-sys", "dep:flate2"] -zlib-shared = ["dep:libdeflater", "dep:libdeflate-sys", "dep:flate2", "libdeflate-sys/dynamic"] +zlib-static = ["dep:libdeflate-sys", "dep:flate2", "deflate-static"] +zlib-shared = ["dep:libdeflate-sys", "dep:flate2", "libdeflate-sys/dynamic", "deflate-shared"] deflate = ["deflate-static"] -deflate-static = ["dep:libdeflater", "dep:libdeflate-sys", "dep:flate2"] -deflate-shared = ["dep:libdeflater", "dep:libdeflate-sys", "dep:flate2", "libdeflate-sys/dynamic"] +deflate-static = ["dep:libdeflate-sys", "dep:flate2"] +deflate-shared = ["dep:libdeflate-sys", "dep:flate2", "libdeflate-sys/dynamic"] xz = ["xz-static"] xz-static = ["dep:xz2", "xz2/static"] # builds from vendored source of xz @@ -62,7 +62,6 @@ brotli = { version = "^7", default-features = false, features = ["std", bzip2 = { version = ">=0.4,<0.6", optional = true } lz4 = { version = "^1", optional = true } flate2 = { version = "^1", optional = true } -libdeflater = { version = "^1", optional = true } libdeflate-sys = { version = "<1.20.0", optional = true } # TODO: requires gcc>=4.9 not available on Python's CI wheel builds blosc2-rs = { version = "0.4.0+2.15.2", optional = true, default-features = false } zstd = { version = "^0.13", optional = true } diff --git a/src/capi.rs b/src/capi.rs index 8a34560..60f27cf 100644 --- a/src/capi.rs +++ b/src/capi.rs @@ -253,23 +253,34 @@ pub extern "C" fn compress( #[cfg(feature = "bzip2")] Codec::Bzip2 => bzip2::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), #[cfg(feature = "brotli")] - Codec::Brotli => brotli::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), + Codec::Brotli => { + brotli::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)) + } #[cfg(feature = "gzip")] Codec::Gzip => gzip::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), #[cfg(feature = "zstd")] - Codec::Zstd => zstd::compress(&mut decompressed, &mut compressed, level.map(|v: i32| v as i32)), + Codec::Zstd => zstd::compress( + &mut decompressed, + &mut compressed, + level.map(|v: i32| v as i32), + ), #[cfg(feature = "lz4")] Codec::Lz4 => lz4::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), // TODO: Support passing acceleration #[cfg(feature = "lz4")] - Codec::Lz4Block => lz4::block::compress_vec(decompressed.get_ref(), level.map(|v| v as _), None, Some(true)) - .map(|v| { - let len = v.len(); - *compressed.get_mut() = v; - compressed.set_position(len as _); - decompressed.set_position(input_len as _); - len - }), // TODO + Codec::Lz4Block => lz4::block::compress_vec( + decompressed.get_ref(), + level.map(|v| v as _), + None, + Some(true), + ) + .map(|v| { + let len = v.len(); + *compressed.get_mut() = v; + compressed.set_position(len as _); + decompressed.set_position(input_len as _); + len + }), // TODO }; match ret { Ok(n) => { @@ -302,7 +313,8 @@ pub extern "C" fn decompress_into( error: &mut *mut c_char, ) { let mut compressed = Cursor::new(unsafe { std::slice::from_raw_parts(input, input_len) }); - let mut decompressed = Cursor::new(unsafe { std::slice::from_raw_parts_mut(output, output_len) }); + let mut decompressed = + Cursor::new(unsafe { std::slice::from_raw_parts_mut(output, output_len) }); let ret: Result = match codec { #[cfg(feature = "snappy")] @@ -320,7 +332,9 @@ pub extern "C" fn decompress_into( #[cfg(feature = "lz4")] Codec::Lz4 => lz4::decompress(&mut compressed, &mut decompressed), #[cfg(feature = "lz4")] - Codec::Lz4Block => lz4::block::decompress_into(&compressed.get_ref(), decompressed.get_mut(), None), + Codec::Lz4Block => { + lz4::block::decompress_into(&compressed.get_ref(), decompressed.get_mut(), None) + } }; match ret { Ok(n) => { @@ -364,16 +378,28 @@ pub extern "C" fn compress_into( #[cfg(feature = "bzip2")] Codec::Bzip2 => bzip2::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), #[cfg(feature = "brotli")] - Codec::Brotli => brotli::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), + Codec::Brotli => { + brotli::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)) + } #[cfg(feature = "gzip")] Codec::Gzip => gzip::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), #[cfg(feature = "zstd")] - Codec::Zstd => zstd::compress(&mut decompressed, &mut compressed, level.map(|v: i32| v as i32)), + Codec::Zstd => zstd::compress( + &mut decompressed, + &mut compressed, + level.map(|v: i32| v as i32), + ), #[cfg(feature = "lz4")] Codec::Lz4 => lz4::compress(&mut decompressed, &mut compressed, level.map(|v| v as _)), // TODO: Support passing acceleration #[cfg(feature = "lz4")] - Codec::Lz4Block => lz4::block::compress_into(decompressed, compressed, level.map(|v| v as _), None, Some(true)), + Codec::Lz4Block => lz4::block::compress_into( + decompressed, + compressed, + level.map(|v| v as _), + None, + Some(true), + ), }; match ret { Ok(n) => { @@ -391,7 +417,11 @@ pub extern "C" fn compress_into( /* ---------- Streaming Compressor --------------- */ #[no_mangle] #[allow(unused_variables)] -pub extern "C" fn compressor_init(codec: StreamingCodec, level: i32, error: &mut *mut c_char) -> *mut c_void { +pub extern "C" fn compressor_init( + codec: StreamingCodec, + level: i32, + error: &mut *mut c_char, +) -> *mut c_void { match codec { #[cfg(feature = "bzip2")] StreamingCodec::StreamingBzip2 => { @@ -399,7 +429,10 @@ pub extern "C" fn compressor_init(codec: StreamingCodec, level: i32, error: &mut error_to_ptr("Bzip2 requires compression level >= 0", error); return std::ptr::null_mut(); } - let compressor = bzip2::bzip2::write::BzEncoder::new(vec![], bzip2::bzip2::Compression::new(level as _)); + let compressor = bzip2::bzip2::write::BzEncoder::new( + vec![], + bzip2::bzip2::Compression::new(level as _), + ); Box::into_raw(Box::new(compressor)) as _ } #[cfg(feature = "brotli")] @@ -417,7 +450,10 @@ pub extern "C" fn compressor_init(codec: StreamingCodec, level: i32, error: &mut error_to_ptr("Gzip requires compression level >= 1", error); return std::ptr::null_mut(); } - let compressor = gzip::flate2::write::GzEncoder::new(vec![], gzip::flate2::Compression::new(level as _)); + let compressor = gzip::flate2::write::GzEncoder::new( + vec![], + gzip::flate2::Compression::new(level as _), + ); Box::into_raw(Box::new(compressor)) as _ } #[cfg(feature = "zstd")] @@ -478,7 +514,10 @@ pub extern "C" fn free_compressor(codec: StreamingCodec, compressor_ptr: &mut *m } #[no_mangle] -pub extern "C" fn compressor_inner(codec: StreamingCodec, compressor_ptr: &mut *mut c_void) -> Buffer { +pub extern "C" fn compressor_inner( + codec: StreamingCodec, + compressor_ptr: &mut *mut c_void, +) -> Buffer { match codec { #[cfg(feature = "bzip2")] StreamingCodec::StreamingBzip2 => { @@ -510,7 +549,8 @@ pub extern "C" fn compressor_inner(codec: StreamingCodec, compressor_ptr: &mut * } #[cfg(feature = "snappy")] StreamingCodec::StreamingSnappy => { - let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; + let compressor = + unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; let buffer = Buffer::from(compressor.get_ref()); *compressor_ptr = Box::into_raw(compressor) as _; buffer @@ -577,7 +617,8 @@ pub extern "C" fn compressor_finish( } #[cfg(feature = "snappy")] StreamingCodec::StreamingSnappy => { - let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; + let compressor = + unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; match compressor.into_inner() { Ok(buf) => Buffer::from(buf), Err(err) => { @@ -604,7 +645,11 @@ pub extern "C" fn compressor_finish( } #[no_mangle] -pub extern "C" fn compressor_flush(codec: StreamingCodec, compressor_ptr: &mut *mut c_void, error: &mut *mut c_char) { +pub extern "C" fn compressor_flush( + codec: StreamingCodec, + compressor_ptr: &mut *mut c_void, + error: &mut *mut c_char, +) { match codec { #[cfg(feature = "bzip2")] StreamingCodec::StreamingBzip2 => { @@ -640,7 +685,8 @@ pub extern "C" fn compressor_flush(codec: StreamingCodec, compressor_ptr: &mut * } #[cfg(feature = "snappy")] StreamingCodec::StreamingSnappy => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; + let mut compressor = + unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; if let Err(err) = compressor.flush() { error_to_ptr(err, error); } @@ -727,7 +773,8 @@ pub extern "C" fn compressor_compress( } #[cfg(feature = "snappy")] StreamingCodec::StreamingSnappy => { - let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; + let mut compressor = + unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; match std::io::copy(&mut decompressed, &mut compressor) { Ok(n) => { *nbytes_written = n as _; @@ -778,7 +825,10 @@ pub extern "C" fn free_decompressor(codec: StreamingCodec, decompressor_ptr: &mu #[no_mangle] #[allow(unused_variables)] -pub extern "C" fn decompressor_inner(codec: StreamingCodec, decompressor_ptr: &mut *mut c_void) -> Buffer { +pub extern "C" fn decompressor_inner( + codec: StreamingCodec, + decompressor_ptr: &mut *mut c_void, +) -> Buffer { let decompressor = unsafe { Box::from_raw(*decompressor_ptr as *mut Decompressor) }; let buf = Buffer::from(decompressor.get_ref()); *decompressor_ptr = Box::into_raw(decompressor) as _; @@ -877,15 +927,14 @@ pub extern "C" fn lz4_block_max_compressed_len(input_len: usize, error: &mut *mu #[cfg(feature = "deflate")] #[no_mangle] -pub extern "C" fn deflate_max_compressed_len(input_len: usize, level: i32) -> usize { - deflate::compress_bound(input_len, Some(level)) +pub extern "C" fn deflate_max_compressed_len(input_len: usize) -> usize { + deflate::compress_bound(input_len) } #[cfg(feature = "gzip")] #[no_mangle] -pub extern "C" fn gzip_max_compressed_len(input_len: usize, level: i32) -> usize { - let level = if level < 0 { 0 } else { level }; - gzip::compress_bound(input_len, Some(level)).unwrap() +pub extern "C" fn gzip_max_compressed_len(input_len: usize) -> usize { + gzip::compress_bound(input_len) } #[cfg(feature = "zstd")] @@ -908,7 +957,11 @@ pub extern "C" fn brotli_max_compressed_len(input_len: usize) -> usize { #[cfg(feature = "snappy")] #[no_mangle] -pub extern "C" fn snappy_raw_decompressed_len(input: *const u8, input_len: usize, error: &mut *mut c_char) -> isize { +pub extern "C" fn snappy_raw_decompressed_len( + input: *const u8, + input_len: usize, + error: &mut *mut c_char, +) -> isize { let input = unsafe { slice::from_raw_parts(input, input_len) }; match snap::raw::decompress_len(input) { Ok(n) => n as _, @@ -1037,7 +1090,8 @@ mod tests { assert!(buffer.owned); // retrieve compressed data and compare to actual rust impl - let compressed = unsafe { Vec::from_raw_parts(buffer.data as *mut u8, buffer.len, buffer.len) }; + let compressed = + unsafe { Vec::from_raw_parts(buffer.data as *mut u8, buffer.len, buffer.len) }; assert_eq!(&compressed, expected); // And decompress @@ -1060,7 +1114,8 @@ mod tests { assert_eq!(nbytes_written, buffer.len); assert_eq!(nbytes_written, DATA.len()); assert!(buffer.owned); - let decompressed = unsafe { Vec::from_raw_parts(buffer.data as *mut u8, buffer.len, buffer.len) }; + let decompressed = + unsafe { Vec::from_raw_parts(buffer.data as *mut u8, buffer.len, buffer.len) }; assert_eq!(DATA.as_slice(), &decompressed); } } diff --git a/src/deflate.rs b/src/deflate.rs index c946b99..e687868 100644 --- a/src/deflate.rs +++ b/src/deflate.rs @@ -2,16 +2,17 @@ pub use flate2; use flate2::read::{DeflateDecoder, DeflateEncoder}; use flate2::Compression; -use libdeflater; use std::io::prelude::*; use std::io::Error; -const DEFAULT_COMPRESSION_LEVEL: u32 = 6; +pub const DEFAULT_COMPRESSION_LEVEL: u32 = 6; +pub const MIN_BLOCK_LENGTH: usize = 5_000; -pub fn compress_bound(input_len: usize, level: Option) -> usize { - let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL as _); - let mut c = libdeflater::Compressor::new(libdeflater::CompressionLvl::new(level).unwrap()); - c.deflate_compress_bound(input_len) +/// Compression upper bound +// xref: https://github.com/ebiggers/libdeflate/blob/6bb493615b0ef35c98fc4aa4ec04f448788db6a5/lib/deflate_compress.c#L4081 +pub fn compress_bound(input_len: usize) -> usize { + let max_blocks = std::cmp::max((input_len + MIN_BLOCK_LENGTH - 1) / MIN_BLOCK_LENGTH, 1); + (5 * max_blocks) + input_len } /// Decompress gzip data @@ -24,7 +25,11 @@ pub fn decompress(input: R, output: &mut W) -> Resul /// Compress gzip data #[inline(always)] -pub fn compress(input: R, output: &mut W, level: Option) -> Result { +pub fn compress( + input: R, + output: &mut W, + level: Option, +) -> Result { let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); let mut encoder = DeflateEncoder::new(input, Compression::new(level)); diff --git a/src/gzip.rs b/src/gzip.rs index 588c254..79005f8 100644 --- a/src/gzip.rs +++ b/src/gzip.rs @@ -5,15 +5,15 @@ use flate2::Compression; use std::io::prelude::*; use std::io::{Cursor, Error}; -const DEFAULT_COMPRESSION_LEVEL: u32 = 6; +pub const DEFAULT_COMPRESSION_LEVEL: u32 = 6; +pub const GZIP_FOOTER_SIZE: usize = 8; +pub const GZIP_MIN_HEADER_SIZE: usize = 10; +pub const GZIP_MIN_OVERHEAD: usize = GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE; -pub fn compress_bound(input_len: usize, level: Option) -> Result { - let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL as _); - let mut c = libdeflater::Compressor::new( - libdeflater::CompressionLvl::new(level) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, format!("{:?}", e)))?, - ); - Ok(c.gzip_compress_bound(input_len)) +/// Compression upper bound +// xref: https://github.com/ebiggers/libdeflate/blob/6bb493615b0ef35c98fc4aa4ec04f448788db6a5/lib/gzip_compress.c#L85 +pub fn compress_bound(input_len: usize) -> usize { + GZIP_MIN_OVERHEAD + crate::deflate::compress_bound(input_len) } /// Decompress gzip data @@ -28,7 +28,11 @@ pub fn decompress(input: R, output: &mut W) -> Resul /// Compress gzip data #[inline(always)] -pub fn compress(input: R, output: &mut W, level: Option) -> Result { +pub fn compress( + input: R, + output: &mut W, + level: Option, +) -> Result { let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); let mut encoder = GzEncoder::new(input, Compression::new(level)); let n_bytes = std::io::copy(&mut encoder, output)?; diff --git a/src/zlib.rs b/src/zlib.rs index daf151f..a103493 100644 --- a/src/zlib.rs +++ b/src/zlib.rs @@ -7,6 +7,16 @@ use std::io::{Cursor, Error}; const DEFAULT_COMPRESSION_LEVEL: u32 = 6; +pub const ZLIB_MIN_HEADER_SIZE: usize = 2; +pub const ZLIB_FOOTER_SIZE: usize = 4; +pub const ZLIB_MIN_OVERHEAD: usize = ZLIB_MIN_HEADER_SIZE + ZLIB_FOOTER_SIZE; + +/// Compression upper bound +// xref: https://github.com/ebiggers/libdeflate/blob/6bb493615b0ef35c98fc4aa4ec04f448788db6a5/lib/zlib_compress.c#L77 +pub fn compress_bound(len: usize) -> usize { + ZLIB_MIN_OVERHEAD + crate::deflate::compress_bound(len) +} + /// Decompress zlib data #[inline(always)] pub fn decompress(input: R, output: &mut W) -> Result {