Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ jobs:
rust: [stable]
task:
- name: Test (default-tls)
run: cargo test --features build-binary,default-tls
run: cargo test --features build-binary,lzma,default-tls

- name: Test (rustls-tls)
run: cargo test --features build-binary,rustls-tls
run: cargo test --features build-binary,lzma,rustls-tls

include:
- os: ubuntu-latest
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Added

- Support `.tar.xz` and `.tar.lzma` archives

### Fixed

- Infer archive type from bytes instead of filename extension

## [v0.7.0](https://github.com/epwalsh/rust-cached-path/releases/tag/v0.7.0) - 2025-05-14

### Added
Expand Down
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,15 @@ indicatif = "0.16"
env_logger = { version = "0.10", optional = true }
structopt = { version = "0.3", optional = true }
color-eyre = { version = "0.6", optional = true }
infer = "0.19.0"
lzma-rs = { version = "0.3", optional = true }

[features]
default = ["default-tls"]
build-binary = ["env_logger", "structopt", "color-eyre"]
rustls-tls = ["reqwest/rustls-tls"]
default-tls = ["reqwest/default-tls"]
lzma = ["lzma-rs"]

[dev-dependencies]
httpmock = "0.7"
Expand Down
199 changes: 193 additions & 6 deletions src/archives.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,88 @@
use crate::error::Error;
use flate2::read::GzDecoder;
use std::fs::{self, File};
use std::io::Read;
use std::path::Path;
use tempfile::tempdir_in;

/// Supported archive types.
pub(crate) enum ArchiveFormat {
TarGz,
#[cfg(feature = "lzma")]
TarXz,
#[cfg(feature = "lzma")]
TarLzma,
Zip,
}

// see https://github.com/bojand/infer/issues/91
#[allow(clippy::nonminimal_bool)]
fn is_lzma(buf: &[u8]) -> bool {
buf.len() > 4
&& buf[0] == 0x5D
&& buf[1] == 0x00
&& buf[2] == 0x00
&& (buf[3] == 0x80
|| buf[3] == 0x01
|| buf[3] == 0x10
|| buf[3] == 0x08
|| buf[3] == 0x20
|| buf[3] == 0x40
|| buf[3] == 0x80
|| buf[3] == 0x00)
&& (buf[4] == 0x00 || buf[4] == 0x01 || buf[4] == 0x02)
}

fn infer() -> infer::Infer {
let mut infer = infer::Infer::new();
infer.add("application/x-lzma", "lzma", is_lzma);
infer
}

impl ArchiveFormat {
fn is_tar<R: Read>(read: &mut R) -> bool {
let mut buf = [0; 262];
read.read_exact(&mut buf)
.is_ok_and(|_| infer::archive::is_tar(&buf))
}

/// Parse archive type from resource extension.
pub(crate) fn parse_from_extension(resource: &str) -> Result<Self, Error> {
if resource.ends_with(".tar.gz") {
Ok(Self::TarGz)
} else if resource.ends_with(".zip") {
Ok(Self::Zip)
pub(crate) fn parse_from_extension(resource: &Path) -> Result<Self, Error> {
if let Some(file_type) = infer().get_from_path(resource)? {
let archive_type = match file_type.mime_type() {
"application/gzip" if Self::is_tar(&mut GzDecoder::new(File::open(resource)?)) => {
Self::TarGz
}
#[cfg(feature = "lzma")]
"application/x-xz"
if Self::is_tar(&mut lzma::LzmaDecoder::new(
lzma::Codec::Xz,
File::open(resource)?,
)?) =>
{
Self::TarXz
}
#[cfg(feature = "lzma")]
"application/x-lzma"
if Self::is_tar(&mut lzma::LzmaDecoder::new(
lzma::Codec::Lzma,
File::open(resource)?,
)?) =>
{
Self::TarLzma
}
"application/zip" => Self::Zip,
tpe => {
return Err(Error::ExtractionError(format!(
"unsupported file format: {tpe}"
)))
}
};
Ok(archive_type)
} else {
Err(Error::ExtractionError("unsupported archive format".into()))
Err(Error::ExtractionError(
"cannot determine archive file type".into(),
))
}
}
}
Expand All @@ -39,6 +103,18 @@ pub(crate) fn extract_archive<P: AsRef<Path>>(
let mut archive = tar::Archive::new(tar);
archive.unpack(&temp_target)?;
}
#[cfg(feature = "lzma")]
ArchiveFormat::TarXz => {
let xz_decoder = lzma::LzmaDecoder::new(lzma::Codec::Xz, File::open(path)?)?;
let mut archive = tar::Archive::new(xz_decoder);
archive.unpack(&temp_target)?;
}
#[cfg(feature = "lzma")]
ArchiveFormat::TarLzma => {
let lzma_decoder = lzma::LzmaDecoder::new(lzma::Codec::Lzma, File::open(path)?)?;
let mut archive = tar::Archive::new(lzma_decoder);
archive.unpack(&temp_target)?;
}
ArchiveFormat::Zip => {
let file = File::open(path)?;
let mut archive =
Expand All @@ -54,3 +130,114 @@ pub(crate) fn extract_archive<P: AsRef<Path>>(

Ok(())
}

#[cfg(feature = "lzma")]
mod lzma {
use std::io::Read;
use std::thread::JoinHandle;

#[derive(Clone, Copy)]
pub(super) enum Codec {
Lzma,
Xz,
}

impl std::fmt::Display for Codec {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Codec::Lzma => write!(f, "lzma"),
Codec::Xz => write!(f, "xz"),
}
}
}

pub(super) struct LzmaDecoder {
codec: Codec,
decoder_handle: Option<JoinHandle<Result<(), lzma_rs::error::Error>>>,
pipe_reader: std::io::PipeReader,
}

impl LzmaDecoder {
pub(super) fn new<R: Read + Send + 'static>(
codec: Codec,
reader: R,
) -> std::io::Result<Self> {
let (pipe_reader, mut pipe_writer) = std::io::pipe()?;
let decoder_handle = std::thread::spawn(move || {
let mut reader = std::io::BufReader::new(reader);
match codec {
Codec::Lzma => lzma_rs::lzma_decompress(&mut reader, &mut pipe_writer),
Codec::Xz => lzma_rs::xz_decompress(&mut reader, &mut pipe_writer),
}
});
Ok(Self {
codec,
decoder_handle: Some(decoder_handle),
pipe_reader,
})
}
}

impl Read for LzmaDecoder {
fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
let size = self.pipe_reader.read(buf);
if let Some(handle) = self.decoder_handle.take_if(|h| h.is_finished()) {
handle
.join()
.map_err(|_| {
std::io::Error::other(format!(
"{} decompression thread panicked",
self.codec
))
})?
.map_err(|e| {
std::io::Error::other(format!("{} decompression error: {e}", self.codec))
})?;
}
// handle 0-byte read edge case
match size {
Ok(0) if self.decoder_handle.is_some() => {
// we read nothing, but the thread is still running, most likely a race condition, retry
self.read(buf)
}
other => other,
}
}
}

#[cfg(test)]
mod test {

use super::*;

#[test]
#[should_panic(expected = "xz decompression error")]
fn test_xz_decoder_empty() {
let mut decoder = LzmaDecoder::new(Codec::Xz, std::io::empty()).unwrap();
std::io::copy(&mut decoder, &mut Vec::new()).unwrap();
}

#[test]
#[should_panic(expected = "xz decompression error")]
fn test_xz_decoder_bad() {
let bad: &[u8] = &[0x42u8; 1024];
let mut decoder = LzmaDecoder::new(Codec::Xz, bad).unwrap();
std::io::copy(&mut decoder, &mut Vec::new()).unwrap();
}

#[test]
#[should_panic(expected = "lzma decompression error")]
fn test_lzma_decoder_empty() {
let mut decoder = LzmaDecoder::new(Codec::Lzma, std::io::empty()).unwrap();
std::io::copy(&mut decoder, &mut Vec::new()).unwrap();
}

#[test]
#[should_panic(expected = "lzma decompression error")]
fn test_lzma_decoder_bad() {
let bad: &[u8] = &[0x42u8; 1024];
let mut decoder = LzmaDecoder::new(Codec::Lzma, bad).unwrap();
std::io::copy(&mut decoder, &mut Vec::new()).unwrap();
}
}
}
2 changes: 1 addition & 1 deletion src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ impl Cache {

if !dirpath.is_dir() {
info!("Extracting {} to {:?}", resource, dirpath);
let format = ArchiveFormat::parse_from_extension(resource)?;
let format = ArchiveFormat::parse_from_extension(&cached_path)?;
extract_archive(&cached_path, &dirpath, &format)?;
}

Expand Down
59 changes: 21 additions & 38 deletions src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,24 +193,17 @@ fn test_cached_path_remote_file_in_subdir() {
assert!(Meta::meta_path(&path).is_file());
}

#[test]
fn test_extract_tar_gz() {
fn assert_extract_archive(filename: &str) {
let cache_dir = tempdir().unwrap();
let cache = Cache::builder()
.dir(cache_dir.path().to_owned())
.progress_bar(None)
.build()
.unwrap();

let resource: PathBuf = [
".",
"test_fixtures",
"utf-8_sample",
"archives",
"utf-8.tar.gz",
]
.iter()
.collect();
let resource: PathBuf = [".", "test_fixtures", "utf-8_sample", "archives", filename]
.iter()
.collect();

let path = cache
.cached_path_with_options(resource.to_str().unwrap(), &Options::default().extract())
Expand All @@ -226,35 +219,25 @@ fn test_extract_tar_gz() {
}

#[test]
fn test_extract_zip() {
let cache_dir = tempdir().unwrap();
let cache = Cache::builder()
.dir(cache_dir.path().to_owned())
.progress_bar(None)
.build()
.unwrap();
fn test_extract_tar_gz() {
assert_extract_archive("utf-8.tar.gz");
}

let resource: PathBuf = [
".",
"test_fixtures",
"utf-8_sample",
"archives",
"utf-8.zip",
]
.iter()
.collect();
#[cfg(feature = "lzma")]
#[test]
fn test_extract_tar_xz() {
assert_extract_archive("utf-8.tar.xz");
}

let path = cache
.cached_path_with_options(resource.to_str().unwrap(), &Options::default().extract())
.unwrap();
assert!(path.is_dir());
assert!(path.to_str().unwrap().ends_with("-extracted"));
assert!(path
.to_str()
.unwrap()
.starts_with(cache_dir.path().to_str().unwrap()));
let sample_file_path = path.join("dummy.txt");
assert!(sample_file_path.is_file());
#[cfg(feature = "lzma")]
#[test]
fn test_extract_tar_lzma() {
assert_extract_archive("utf-8.tar.lzma");
}

#[test]
fn test_extract_zip() {
assert_extract_archive("utf-8.zip");
}

#[test]
Expand Down
Binary file added test_fixtures/utf-8_sample/archives/utf-8.tar.xz
Binary file not shown.
Loading