From a0c509bf160ecdc3f48b50b4e733b1ab50841b35 Mon Sep 17 00:00:00 2001 From: Philippe Laflamme Date: Tue, 20 May 2025 17:40:59 -0400 Subject: [PATCH 1/4] feat: use infer to determine archive type --- Cargo.toml | 1 + src/archives.rs | 26 ++++++++++++++++++++------ src/cache.rs | 2 +- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7fd7dbd..1cc45e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ indicatif = "0.16" env_logger = { version = "0.10", optional = true } structopt = { version = "0.3", optional = true } color-eyre = { version = "0.6", optional = true } +infer = "0.19.0" [features] default = ["default-tls"] diff --git a/src/archives.rs b/src/archives.rs index 3095a2a..0ba5989 100644 --- a/src/archives.rs +++ b/src/archives.rs @@ -1,6 +1,7 @@ use crate::error::Error; use flate2::read::GzDecoder; use std::fs::{self, File}; +use std::io::Read; use std::path::Path; use tempfile::tempdir_in; @@ -11,14 +12,27 @@ pub(crate) enum ArchiveFormat { } impl ArchiveFormat { + fn is_tar(read: &mut R) -> bool { + let mut buf = [0; 262]; + read.read_exact(&mut buf) + .is_ok_and(|_| infer::archive::is_tar(&buf)) + } + /// Parse archive type from resource extension. - pub(crate) fn parse_from_extension(resource: &str) -> Result { - if resource.ends_with(".tar.gz") { - Ok(Self::TarGz) - } else if resource.ends_with(".zip") { - Ok(Self::Zip) + pub(crate) fn parse_from_extension(resource: &Path) -> Result { + if let Some(file_type) = infer::get_from_path(resource)? { + let archive_type = match file_type.mime_type() { + "application/gzip" if Self::is_tar(&mut GzDecoder::new(File::open(resource)?)) => { + Self::TarGz + } + "application/zip" => Self::Zip, + _ => return Err(Error::ExtractionError("unsupported archive format".into())), + }; + Ok(archive_type) } else { - Err(Error::ExtractionError("unsupported archive format".into())) + Err(Error::ExtractionError( + "cannot determine archive file type".into(), + )) } } } diff --git a/src/cache.rs b/src/cache.rs index b667e00..ca260c9 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -330,7 +330,7 @@ impl Cache { if !dirpath.is_dir() { info!("Extracting {} to {:?}", resource, dirpath); - let format = ArchiveFormat::parse_from_extension(resource)?; + let format = ArchiveFormat::parse_from_extension(&cached_path)?; extract_archive(&cached_path, &dirpath, &format)?; } From e68eb3daacd2db26377bb1df87ee89cf17736929 Mon Sep 17 00:00:00 2001 From: Philippe Laflamme Date: Tue, 20 May 2025 21:29:39 -0400 Subject: [PATCH 2/4] feat: add optional `tar.xz` support --- Cargo.toml | 2 + src/archives.rs | 76 +++++++++++++++++- src/test.rs | 58 +++++-------- .../utf-8_sample/archives/utf-8.tar.xz | Bin 0 -> 5580 bytes 4 files changed, 97 insertions(+), 39 deletions(-) create mode 100644 test_fixtures/utf-8_sample/archives/utf-8.tar.xz diff --git a/Cargo.toml b/Cargo.toml index 1cc45e0..bd7c368 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,12 +42,14 @@ env_logger = { version = "0.10", optional = true } structopt = { version = "0.3", optional = true } color-eyre = { version = "0.6", optional = true } infer = "0.19.0" +lzma-rs = { version = "0.3", optional = true } [features] default = ["default-tls"] build-binary = ["env_logger", "structopt", "color-eyre"] rustls-tls = ["reqwest/rustls-tls"] default-tls = ["reqwest/default-tls"] +lzma = ["lzma-rs"] [dev-dependencies] httpmock = "0.7" diff --git a/src/archives.rs b/src/archives.rs index 0ba5989..a86be75 100644 --- a/src/archives.rs +++ b/src/archives.rs @@ -8,6 +8,8 @@ use tempfile::tempdir_in; /// Supported archive types. pub(crate) enum ArchiveFormat { TarGz, + #[cfg(feature = "lzma")] + TarXz, Zip, } @@ -25,8 +27,18 @@ impl ArchiveFormat { "application/gzip" if Self::is_tar(&mut GzDecoder::new(File::open(resource)?)) => { Self::TarGz } + #[cfg(feature = "lzma")] + "application/x-xz" + if Self::is_tar(&mut xz::XzDecoder::new(File::open(resource)?)?) => + { + Self::TarXz + } "application/zip" => Self::Zip, - _ => return Err(Error::ExtractionError("unsupported archive format".into())), + tpe => { + return Err(Error::ExtractionError(format!( + "unsupported file format: {tpe}" + ))) + } }; Ok(archive_type) } else { @@ -53,6 +65,12 @@ pub(crate) fn extract_archive>( let mut archive = tar::Archive::new(tar); archive.unpack(&temp_target)?; } + #[cfg(feature = "lzma")] + ArchiveFormat::TarXz => { + let xz_decoder = xz::XzDecoder::new(File::open(path)?)?; + let mut archive = tar::Archive::new(xz_decoder); + archive.unpack(&temp_target)?; + } ArchiveFormat::Zip => { let file = File::open(path)?; let mut archive = @@ -68,3 +86,59 @@ pub(crate) fn extract_archive>( Ok(()) } + +#[cfg(feature = "lzma")] +mod xz { + use std::io::Read; + use std::thread::JoinHandle; + pub(super) struct XzDecoder { + decoder_handle: Option>>, + pipe_reader: std::io::PipeReader, + } + + impl XzDecoder { + pub(super) fn new(reader: R) -> std::io::Result { + let (pipe_reader, mut pipe_writer) = std::io::pipe()?; + let decoder_handle = std::thread::spawn(move || { + lzma_rs::xz_decompress(&mut std::io::BufReader::new(reader), &mut pipe_writer) + }); + Ok(Self { + decoder_handle: Some(decoder_handle), + pipe_reader, + }) + } + } + + impl Read for XzDecoder { + fn read(&mut self, buf: &mut [u8]) -> Result { + let size = self.pipe_reader.read(buf)?; + if let Some(handle) = self.decoder_handle.take_if(|h| h.is_finished()) { + handle + .join() + .map_err(|_| std::io::Error::other("xz decompression thread panicked"))? + .map_err(|e| std::io::Error::other(format!("xz decompression error: {e}")))?; + } + Ok(size) + } + } + + #[cfg(test)] + mod test { + + use super::XzDecoder; + #[test] + #[should_panic(expected = "xz decompression error")] + fn test_xz_decoder_empty() { + let mut decoder = XzDecoder::new(std::io::empty()).unwrap(); + std::io::copy(&mut decoder, &mut Vec::new()).unwrap(); + } + + #[test] + #[should_panic(expected = "xz decompression error")] + fn test_xz_decoder_bad() { + let bad: &[u8] = &[0x42u8; 1024]; + let mut decoder = XzDecoder::new(bad).unwrap(); + std::io::copy(&mut decoder, &mut Vec::new()).unwrap(); + } + } +} diff --git a/src/test.rs b/src/test.rs index 24e40b2..6ffd31f 100644 --- a/src/test.rs +++ b/src/test.rs @@ -193,8 +193,7 @@ fn test_cached_path_remote_file_in_subdir() { assert!(Meta::meta_path(&path).is_file()); } -#[test] -fn test_extract_tar_gz() { +fn assert_extract_archive(filename: &str) { let cache_dir = tempdir().unwrap(); let cache = Cache::builder() .dir(cache_dir.path().to_owned()) @@ -202,15 +201,9 @@ fn test_extract_tar_gz() { .build() .unwrap(); - let resource: PathBuf = [ - ".", - "test_fixtures", - "utf-8_sample", - "archives", - "utf-8.tar.gz", - ] - .iter() - .collect(); + let resource: PathBuf = [".", "test_fixtures", "utf-8_sample", "archives", filename] + .iter() + .collect(); let path = cache .cached_path_with_options(resource.to_str().unwrap(), &Options::default().extract()) @@ -226,35 +219,24 @@ fn test_extract_tar_gz() { } #[test] -fn test_extract_zip() { - let cache_dir = tempdir().unwrap(); - let cache = Cache::builder() - .dir(cache_dir.path().to_owned()) - .progress_bar(None) - .build() - .unwrap(); +fn test_extract_tar_gz() { + assert_extract_archive("utf-8.tar.gz"); +} - let resource: PathBuf = [ - ".", - "test_fixtures", - "utf-8_sample", - "archives", - "utf-8.zip", - ] - .iter() - .collect(); +#[test] +fn test_extract_tar_xz() { + assert_extract_archive("utf-8.tar.xz"); +} - let path = cache - .cached_path_with_options(resource.to_str().unwrap(), &Options::default().extract()) - .unwrap(); - assert!(path.is_dir()); - assert!(path.to_str().unwrap().ends_with("-extracted")); - assert!(path - .to_str() - .unwrap() - .starts_with(cache_dir.path().to_str().unwrap())); - let sample_file_path = path.join("dummy.txt"); - assert!(sample_file_path.is_file()); +#[test] +#[should_panic(expected = "cannot determine archive file type")] +fn test_extract_tar_lzma() { + assert_extract_archive("utf-8.tar.lzma"); +} + +#[test] +fn test_extract_zip() { + assert_extract_archive("utf-8.zip"); } #[test] diff --git a/test_fixtures/utf-8_sample/archives/utf-8.tar.xz b/test_fixtures/utf-8_sample/archives/utf-8.tar.xz new file mode 100644 index 0000000000000000000000000000000000000000..9db8100c53aad34685eecd3b241cc7240773ce5b GIT binary patch literal 5580 zcmV;-6*KDnH+ooF000E$*0e?hz>F(^hyfu1761SM00003>CRr@L;n?oT>vs2N$~EL z`VE<01S0D?Ao?ch<&XFLI(NC);vvY7 zAa@CFrdvMu2j?b^dFv^o29m3)HBf?}H)G+~(`bHoJUXnz%?8AkaRQ^HvKe&UZW=_V z(}%)y=+!3{pkXS%C&fa|HPcZia0SlT((2#I5-_V;LWOm)^(h?du`sGOt!nx|h9z44 zHnZk-U~YfN4rRRbF*a0ncFH1y36DN4ZjBiCNotQ5vcQ-VZ_D$(Y93Yzfvc0f(@`v< zMwdIEQ2vYW@~*dQkh73h?6Ep9uaud?YNrnYas;u2V4yY~>}4qC=92QXPe?Q3h_GAs zuIIg4s?@$hJG6rni2~*h>i2(Ok_&npB05V2Z&kbX$a3Iq?Rz8{dF(ZTQaZb`NcWR5|d1@nW) z_oT=4_cISio5xQygd z79V$xK~3;qD78{SPP%|>FKDJq;$Yu($Ps?7ARzZ{ff{pR#^+%#W9JB1JKPINxlDK& zG@;CFc<&&j6zxlAr50jnqDKN{c5ddvFW0asCte+dEX_xbT=-iPpqf89(_h`Xc)Fkr zRwfGCa@bJfiUWDS>C?O`NM+lj^e|*Vg{=bP&kJ20z%zj=J|$;wl9lW(&S<6`AcR&; zUTbp&wLUS53-z@EWh9FBIhXiusq6v(l1~fYk;4NoD#oGD#uU&#Ns#Fb9^?`Cjx6PKfKQvHPRATCaoi$o`y|~a87rI3(j(lTjOEHQwdG1Uk@k^G zEpV&axZFyt@>3%%)U2y99{oRZ&!xUd^>r1*x}+%ScS4FF7W)2;hQP3}_llqtOVrKh z^T-izPmbFqg*Mmp+LZQjk`(qoOCjq<5j9-T$c z_2=UXQRE_m56v-338p-o)k2@|7v?yuQ^MpEXm`IeE1>fhTXt#e#MbI6y$u4D>;xd>;Tedzk4(ea0%4`=xMOVT=A?zqbz|C1Ty| zFiQBq$3-#(dP&*ftVxA3TOIn4o%coGKWZhdjyo{Y1m&&d@Wy$O>%)^)gS#zF51Dj3 z7D82B(DP{i`>Cf#F(9cFMNY$wxD72yl@8pTiYjD|Y(f;U6{%w1B~bsR)u@^nh}5M^ zPP(ALsY`v~p_2aaL5@ZyQYUho@BB~)h}q|q7)P??5&;5}nGO;>*yc`7I{Hn(@EO{=# zSY?m@wg4C|hRWL-Uk@*o2ZqgEDK$X@sFRGul37A-R}(u1!+kbBgL7 zIL#N!W!tkKnJx-Z{h>`w&&`qXB78Z*46S=w**}%ClHp9xe zB-ZQ|EWBUQDd7NkPDhiEwr?ugfdW?Lu+M2zeaVK+P#|Cg0>4MB16Op zw}xAT*HgLoKQou})=I|1?`+JoDvu73w7>H$pK~&4o;hR2X4gquzf?btkcJ0Mi3TuWfAJV0Gp< zBxSd1S3pSg$IX5UE;G{~oE3Nx=UYt3F;7^MN8v>^B}P!Wo}oAguhkrX&uV*g5dDh3 z4Cx}RY|wBGPYpv<3bdy&DWeqrE*$mK`Zm;5mLSaqfo?Ih11myRkTe}J90r?ZRpYYy zv^ntn`t=d9&0(|Unx)tRy!bL;knB7?rSfi1VQ;krHn(x=Y;o+?KDF!v70Ec{b#l0N z(N>DFym2$>Zx=%>@lA9px+MXX5c^@rEMgpRMia7h~gPJQ(!Mm{PLfmOAzp_c^P&y^B0=jfk54%(bq`{ z?CDTg+5m0|oNs1+ryWo@=VZhJ>C6In=r~v?YuOgevxVqc9^rDNB~OMC_`>6Rn4@g{ zo)z2kCJgTi&BR#VxiwjNd}9Ku=V~JXo7V2WT$Fvm&MhHX2%b1Q04dYAWWGS;k7ojZ zlPvFPR>41~@Y5uC@&MJXe2H7VB*YP@72e2ar$BVM(!U&6l@F&UO_T0i>vNXq^Y2{_ z&6SYEs#HuNZF~yiC?^gmw{oP9iYXg$PB8dVH$Rww2}yriOo5FfN6fV{)|GaWcfXEH@n zbG^lX&i*B&<^21os@G_Kj0Qp-P!ue8G|R0>lZZ-dA_Wvwj0|28R_I&}!2QPA+6* z3O~*~O&#g6PBt|{W~vO#?7j5_>}dZdAAjS#)>W8LjpwPo9>lh?`VBX?z$NYS&<}sn z_cEL)XvYXyTY*JyC@+00O4kIOo8b9Llbo-BC7kO0Pv$SGGYH-tXfn6p!1)ynvJZ0L zAM$45s)MF{Lt$x3&f0HH6xD3>e^Ku}H9aNbSXX1GY%6W7s5Au7AipTLTW8R_$X^!) z6SLeOKmmMshz%QSt{2i_n-12vnyVF~L{U`U?gJ-dHwjLsh}B(@Fr$b2IXt${VY;di zbVQQHF@rE~3>9~#CURW~J08}+JUcy*O)Q1$!wP0U)I$dj&nuC}HJ6e(-LnC+k0`wV zQ|8ule%=tc9RK4{@H6;aXMhj2CllF$Aul0#n*>JS5c&x2Ksk{PGm4#-Dlj+P?X?kO zL&Q1$I)OW7YN#^hcBqg5gZCR4O($WY_6QSoP_UL=U5+sF)3rrhDCc|x98}a7<^@NO z*g8u}2oegn0>u_COgCg1X=B~xm)3b&zo*uanSuVBi2VL1sW!7x)D_LXL|4D^O}1ue z$q#keCrT-WT_h@lve$V9J2E!DS6N%U$|DuKH(^kKZabB zu6t^cp_H*16uJk(xNUkclPSq}4$88f`E6}JL4 zx>VvxxCI7bwv=a!O#Jsp=+v>q+^h)mk1ymZc3F5&cXm%G9WF>*^LTpZmAgpzz7!;T zJ&Cpo$8DUMh*ySF@o959hP zvfsPcO>@5&>9})i6@z0&1}idELE=~E8i4Y~R0T>1#J`C)BhIUpwJjM;)?410dFeyG+|~n;egkaALC&Ru{m^70 z=(paz0_jJCj0vb>zzXhVAnCX^!KukcLAC+d`U-%k{HSM4-`nzWx>F*4{43fDcK-Tr zBQri87{76l`(K|fhgu{8P_Su*p(Nb*$LC2GwAZf%il4NUgH2pWRi5sxM~y%9zKno^UVZB%I-8&EyNGd zssV{V!qG2*(V2*vK^}OCm)%a)V_`Tp)vnbEE|45 z?i-loi)ZU0aRWaW3e+Po1<6uu%!tM%XN_wt0y~|qu0G?WpS(tQ_BFW9&oqO5j`$wdc zbq26Y)5>)X#){);EHi6TX3I+D{PQ%2t2gFuPT&3Ly+)+y;Y?qP@>dJEP-aj;dBB}MB zaJU!Ef+~2nkAVF45fWRZ_as#OIp1AAm7+^t)5;d0mk2>nygT7tVQ18{ z$sH)Fovb~Uxv?kGQcOaR6yvb1uUHN@MMg0W(D09zpo5UpAq-D<&Y|6a3+l; z_hovZDC(}=9hT&vNLeMGHlOTkvkwbC7fIw~Q=gVJtw;zUdy!ixHd%Z;eT(ufzm1irwty^P0vc7KNYJO$cTrnadB(L&xwU;!`UuQf4!bi z{S}jif2LNw6ZHPu7DL4s2Us2LizBo)`;*<{PXi4<&*^U_2)kL%9Sl{2n>7EF{9E!M z7asL&KS&)14lLW#uR9&($d`7;(GVaMW$hlSL$=i69@e@tR0Qe?a7>N%Ii`VTRWUdv zF!%cU#2%~YUO1RZ8PdAW-RtjNEYWgS40HQCF1pSj%k+Y?dZVbToyO2!&$*2NE3MAo zKySU5)fM0b5!f*`;E79rX7_OM>fgIX?<5UbaP16N-ZugN03V5ON|@l+KxAIA)!n=0 zzg9PZXf=*KZyP(7g@X%=#J%>-_qRR(;4Dhy3Tpptx?;%B1!;_G`jsPGQ|L8Tf|Fbq zeF6~_SujK#LQY|lP=E)-jJ@@Y3orq55F16K=QeU;dP*~)DA2^S{+@hb*@uZt@Qxj< z^ic9_H1aohH4vR5Y@#)5cq=06`w{@X3HhmuS6OT>Ah$Xz7xv!aNVIRsT|$49@VZ?b6tQfeEl9 z2Z(gSL#ExCeRcht;cpJ}O^->#N6GTscPj z&KY}ANwCgtbOLpAYS3R-*<3KN<}FYihx)elPVBxwdJ@H_IAnbWbSgDkU-u5m)s5uD zw2z7?UbdrU%y08dW0<%yDKnFL)$I?kJh*$5dn_hw?W>Ap@6f74Ck-5ez1$J^6HLC( zrW$JWrv#`Mc2>!Ib>yGUYpPBYoK7R|cm}G3t&CHRdU;uTH;erkG$&BD&xhsvU~5g_(xYBuww- zhxNwyKNB*N8m1XC*0yNgLpyY)|Ib}q&Pt7Zcj!qD?6SR=e^3m`$;!4`3{9EI!(=#I z+PTRWVR$<*GvVHl~Ao;IpLxVIqYxfEx!aX9JN8HPe55tkd zu0}pGMDCi#O#0R12Q@fGm;`<4o@;%y5IMVZXCV%uX6=W5`DaeE2gDI^1Se-m_Hem-2Q5@#t|MobT{c=~T8UhcYuX8(xW2(&fzP#Nd*MPXGYTsWs#yTd>9e a0jMj0hyefzly94{#Ao{g000001X)_ey1@bf literal 0 HcmV?d00001 From 43da610271d57c6f859c1b99c011b88afee5c410 Mon Sep 17 00:00:00 2001 From: Philippe Laflamme Date: Tue, 20 May 2025 22:07:37 -0400 Subject: [PATCH 3/4] feat: also support `tar.lzma` --- .github/workflows/rust.yml | 4 +- src/archives.rs | 131 ++++++++++++++++++++++++++++++++----- src/test.rs | 3 +- 3 files changed, 119 insertions(+), 19 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 6ee2555..2572029 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -25,10 +25,10 @@ jobs: rust: [stable] task: - name: Test (default-tls) - run: cargo test --features build-binary,default-tls + run: cargo test --features build-binary,lzma,default-tls - name: Test (rustls-tls) - run: cargo test --features build-binary,rustls-tls + run: cargo test --features build-binary,lzma,rustls-tls include: - os: ubuntu-latest diff --git a/src/archives.rs b/src/archives.rs index a86be75..1816ef1 100644 --- a/src/archives.rs +++ b/src/archives.rs @@ -10,9 +10,35 @@ pub(crate) enum ArchiveFormat { TarGz, #[cfg(feature = "lzma")] TarXz, + #[cfg(feature = "lzma")] + TarLzma, Zip, } +// see https://github.com/bojand/infer/issues/91 +#[allow(clippy::nonminimal_bool)] +fn is_lzma(buf: &[u8]) -> bool { + buf.len() > 4 + && buf[0] == 0x5D + && buf[1] == 0x00 + && buf[2] == 0x00 + && (buf[3] == 0x80 + || buf[3] == 0x01 + || buf[3] == 0x10 + || buf[3] == 0x08 + || buf[3] == 0x20 + || buf[3] == 0x40 + || buf[3] == 0x80 + || buf[3] == 0x00) + && (buf[4] == 0x00 || buf[4] == 0x01 || buf[4] == 0x02) +} + +fn infer() -> infer::Infer { + let mut infer = infer::Infer::new(); + infer.add("application/x-lzma", "lzma", is_lzma); + infer +} + impl ArchiveFormat { fn is_tar(read: &mut R) -> bool { let mut buf = [0; 262]; @@ -22,17 +48,29 @@ impl ArchiveFormat { /// Parse archive type from resource extension. pub(crate) fn parse_from_extension(resource: &Path) -> Result { - if let Some(file_type) = infer::get_from_path(resource)? { + if let Some(file_type) = infer().get_from_path(resource)? { let archive_type = match file_type.mime_type() { "application/gzip" if Self::is_tar(&mut GzDecoder::new(File::open(resource)?)) => { Self::TarGz } #[cfg(feature = "lzma")] "application/x-xz" - if Self::is_tar(&mut xz::XzDecoder::new(File::open(resource)?)?) => + if Self::is_tar(&mut lzma::LzmaDecoder::new( + lzma::Codec::Xz, + File::open(resource)?, + )?) => { Self::TarXz } + #[cfg(feature = "lzma")] + "application/x-lzma" + if Self::is_tar(&mut lzma::LzmaDecoder::new( + lzma::Codec::Lzma, + File::open(resource)?, + )?) => + { + Self::TarLzma + } "application/zip" => Self::Zip, tpe => { return Err(Error::ExtractionError(format!( @@ -67,10 +105,16 @@ pub(crate) fn extract_archive>( } #[cfg(feature = "lzma")] ArchiveFormat::TarXz => { - let xz_decoder = xz::XzDecoder::new(File::open(path)?)?; + let xz_decoder = lzma::LzmaDecoder::new(lzma::Codec::Xz, File::open(path)?)?; let mut archive = tar::Archive::new(xz_decoder); archive.unpack(&temp_target)?; } + #[cfg(feature = "lzma")] + ArchiveFormat::TarLzma => { + let lzma_decoder = lzma::LzmaDecoder::new(lzma::Codec::Lzma, File::open(path)?)?; + let mut archive = tar::Archive::new(lzma_decoder); + archive.unpack(&temp_target)?; + } ArchiveFormat::Zip => { let file = File::open(path)?; let mut archive = @@ -88,48 +132,88 @@ pub(crate) fn extract_archive>( } #[cfg(feature = "lzma")] -mod xz { +mod lzma { use std::io::Read; use std::thread::JoinHandle; - pub(super) struct XzDecoder { + + #[derive(Clone, Copy)] + pub(super) enum Codec { + Lzma, + Xz, + } + + impl std::fmt::Display for Codec { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Codec::Lzma => write!(f, "lzma"), + Codec::Xz => write!(f, "xz"), + } + } + } + + pub(super) struct LzmaDecoder { + codec: Codec, decoder_handle: Option>>, pipe_reader: std::io::PipeReader, } - impl XzDecoder { - pub(super) fn new(reader: R) -> std::io::Result { + impl LzmaDecoder { + pub(super) fn new( + codec: Codec, + reader: R, + ) -> std::io::Result { let (pipe_reader, mut pipe_writer) = std::io::pipe()?; let decoder_handle = std::thread::spawn(move || { - lzma_rs::xz_decompress(&mut std::io::BufReader::new(reader), &mut pipe_writer) + let mut reader = std::io::BufReader::new(reader); + match codec { + Codec::Lzma => lzma_rs::lzma_decompress(&mut reader, &mut pipe_writer), + Codec::Xz => lzma_rs::xz_decompress(&mut reader, &mut pipe_writer), + } }); Ok(Self { + codec, decoder_handle: Some(decoder_handle), pipe_reader, }) } } - impl Read for XzDecoder { + impl Read for LzmaDecoder { fn read(&mut self, buf: &mut [u8]) -> Result { - let size = self.pipe_reader.read(buf)?; + let size = self.pipe_reader.read(buf); if let Some(handle) = self.decoder_handle.take_if(|h| h.is_finished()) { handle .join() - .map_err(|_| std::io::Error::other("xz decompression thread panicked"))? - .map_err(|e| std::io::Error::other(format!("xz decompression error: {e}")))?; + .map_err(|_| { + std::io::Error::other(format!( + "{} decompression thread panicked", + self.codec + )) + })? + .map_err(|e| { + std::io::Error::other(format!("{} decompression error: {e}", self.codec)) + })?; + } + // handle 0-byte read edge case + match size { + Ok(0) if self.decoder_handle.is_some() => { + // we read nothing, but the thread is still running, most likely a race condition, retry + self.read(buf) + } + other => other, } - Ok(size) } } #[cfg(test)] mod test { - use super::XzDecoder; + use super::*; + #[test] #[should_panic(expected = "xz decompression error")] fn test_xz_decoder_empty() { - let mut decoder = XzDecoder::new(std::io::empty()).unwrap(); + let mut decoder = LzmaDecoder::new(Codec::Xz, std::io::empty()).unwrap(); std::io::copy(&mut decoder, &mut Vec::new()).unwrap(); } @@ -137,7 +221,22 @@ mod xz { #[should_panic(expected = "xz decompression error")] fn test_xz_decoder_bad() { let bad: &[u8] = &[0x42u8; 1024]; - let mut decoder = XzDecoder::new(bad).unwrap(); + let mut decoder = LzmaDecoder::new(Codec::Xz, bad).unwrap(); + std::io::copy(&mut decoder, &mut Vec::new()).unwrap(); + } + + #[test] + #[should_panic(expected = "lzma decompression error")] + fn test_lzma_decoder_empty() { + let mut decoder = LzmaDecoder::new(Codec::Lzma, std::io::empty()).unwrap(); + std::io::copy(&mut decoder, &mut Vec::new()).unwrap(); + } + + #[test] + #[should_panic(expected = "lzma decompression error")] + fn test_lzma_decoder_bad() { + let bad: &[u8] = &[0x42u8; 1024]; + let mut decoder = LzmaDecoder::new(Codec::Lzma, bad).unwrap(); std::io::copy(&mut decoder, &mut Vec::new()).unwrap(); } } diff --git a/src/test.rs b/src/test.rs index 6ffd31f..3d9bb53 100644 --- a/src/test.rs +++ b/src/test.rs @@ -223,13 +223,14 @@ fn test_extract_tar_gz() { assert_extract_archive("utf-8.tar.gz"); } +#[cfg(feature = "lzma")] #[test] fn test_extract_tar_xz() { assert_extract_archive("utf-8.tar.xz"); } +#[cfg(feature = "lzma")] #[test] -#[should_panic(expected = "cannot determine archive file type")] fn test_extract_tar_lzma() { assert_extract_archive("utf-8.tar.lzma"); } From 1db8562ee9ee999ca1eec561144398af2114d83b Mon Sep 17 00:00:00 2001 From: Philippe Laflamme Date: Tue, 20 May 2025 23:05:50 -0400 Subject: [PATCH 4/4] docs: add changelog entry --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cbbc6a..0f19a70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- Support `.tar.xz` and `.tar.lzma` archives + +### Fixed + +- Infer archive type from bytes instead of filename extension + ## [v0.7.0](https://github.com/epwalsh/rust-cached-path/releases/tag/v0.7.0) - 2025-05-14 ### Added