From c90a4bdcf0be900b70cdc6072662233d5105a0c1 Mon Sep 17 00:00:00 2001 From: Magnus Larsen Date: Fri, 11 Jul 2025 19:19:46 -0700 Subject: [PATCH 01/10] Add explicit lifetimes to Source 'new's These were inferred, but the nightly compiler now has a warning: ``` warning: lifetime flowing from input to output with different syntax can be confusing ``` --- src/handles.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/handles.rs b/src/handles.rs index 94cecbd..20b6d9b 100644 --- a/src/handles.rs +++ b/src/handles.rs @@ -387,7 +387,7 @@ pub struct ByteSource<'a> { impl<'a> ByteSource<'a> { #[inline(always)] - pub fn new(src: &[u8]) -> ByteSource { + pub fn new(src: &'a [u8]) -> ByteSource<'a> { ByteSource { slice: src, pos: 0 } } #[inline(always)] @@ -1164,7 +1164,7 @@ pub struct Utf16Source<'a> { impl<'a> Utf16Source<'a> { #[inline(always)] - pub fn new(src: &[u16]) -> Utf16Source { + pub fn new(src: &'a [u16]) -> Utf16Source<'a> { Utf16Source { slice: src, pos: 0 } } #[inline(always)] @@ -1466,7 +1466,7 @@ pub struct Utf8Source<'a> { impl<'a> Utf8Source<'a> { #[inline(always)] - pub fn new(src: &str) -> Utf8Source { + pub fn new(src: &'a str) -> Utf8Source<'a> { Utf8Source { slice: src.as_bytes(), pos: 0, From dcbf55f361e6ab0cbcaeb3030f8cfa40feb5e4d0 Mon Sep 17 00:00:00 2001 From: Magnus Larsen Date: Fri, 27 Jun 2025 18:59:38 -0700 Subject: [PATCH 02/10] Switch to Rust 1.17's ptr::{read,write}_unaligned --- src/simd_funcs.rs | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/simd_funcs.rs b/src/simd_funcs.rs index d082418..11aabc4 100644 --- a/src/simd_funcs.rs +++ b/src/simd_funcs.rs @@ -20,16 +20,10 @@ use core::simd::u16x8; use core::simd::u8x16; use core::simd::ToBytes; -// TODO: Migrate unaligned access to stdlib code if/when the RFC -// https://github.com/rust-lang/rfcs/pull/1725 is implemented. - /// Safety invariant: ptr must be valid for an unaligned read of 16 bytes #[inline(always)] pub unsafe fn load16_unaligned(ptr: *const u8) -> u8x16 { - let mut simd = ::core::mem::MaybeUninit::::uninit(); - ::core::ptr::copy_nonoverlapping(ptr, simd.as_mut_ptr() as *mut u8, 16); - // Safety: copied 16 bytes of initialized memory into this, it is now initialized - simd.assume_init() + ::core::ptr::read_unaligned(ptr as *const u8x16) } /// Safety invariant: ptr must be valid for an aligned-for-u8x16 read of 16 bytes @@ -42,7 +36,7 @@ pub unsafe fn load16_aligned(ptr: *const u8) -> u8x16 { /// Safety invariant: ptr must be valid for an unaligned store of 16 bytes #[inline(always)] pub unsafe fn store16_unaligned(ptr: *mut u8, s: u8x16) { - ::core::ptr::copy_nonoverlapping(&s as *const u8x16 as *const u8, ptr, 16); + ::core::ptr::write_unaligned(ptr as *mut u8x16, s); } /// Safety invariant: ptr must be valid for an aligned-for-u8x16 store of 16 bytes @@ -55,10 +49,7 @@ pub unsafe fn store16_aligned(ptr: *mut u8, s: u8x16) { /// Safety invariant: ptr must be valid for an unaligned read of 16 bytes #[inline(always)] pub unsafe fn load8_unaligned(ptr: *const u16) -> u16x8 { - let mut simd = ::core::mem::MaybeUninit::::uninit(); - ::core::ptr::copy_nonoverlapping(ptr as *const u8, simd.as_mut_ptr() as *mut u8, 16); - // Safety: copied 16 bytes of initialized memory into this, it is now initialized - simd.assume_init() + ::core::ptr::read_unaligned(ptr as *const u16x8) } /// Safety invariant: ptr must be valid for an aligned-for-u16x8 read of 16 bytes @@ -71,7 +62,7 @@ pub unsafe fn load8_aligned(ptr: *const u16) -> u16x8 { /// Safety invariant: ptr must be valid for an unaligned store of 16 bytes #[inline(always)] pub unsafe fn store8_unaligned(ptr: *mut u16, s: u16x8) { - ::core::ptr::copy_nonoverlapping(&s as *const u16x8 as *const u8, ptr as *mut u8, 16); + ::core::ptr::write_unaligned(ptr as *mut u16x8, s); } /// Safety invariant: ptr must be valid for an aligned-for-u16x8 store of 16 bytes From 02d595ec2e234bba5223bd71e575145986a67557 Mon Sep 17 00:00:00 2001 From: Magnus Larsen Date: Fri, 27 Jun 2025 19:11:14 -0700 Subject: [PATCH 03/10] Allow (silence) the clippy::len_zero warning This warning advises rewriting many places where we check if there is at least 1 byte in a read buffer as an is_empty() check, which seems like bad advice here. --- src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 78e9849..4ab4cc0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -42,7 +42,8 @@ clippy::doc_markdown, clippy::inline_always, clippy::new_ret_no_self, - clippy::redundant_static_lifetimes + clippy::redundant_static_lifetimes, + clippy::len_zero, )] //! encoding_rs is a Gecko-oriented Free Software / Open Source implementation From 0c55f0486b25ff92d5b094daa4673ab7c0c24596 Mon Sep 17 00:00:00 2001 From: Magnus Larsen Date: Fri, 27 Jun 2025 19:15:41 -0700 Subject: [PATCH 04/10] Allow (silence) clippy manual_range_contains lint --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index 4ab4cc0..956569e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,6 +44,7 @@ clippy::new_ret_no_self, clippy::redundant_static_lifetimes, clippy::len_zero, + clippy::manual_range_contains )] //! encoding_rs is a Gecko-oriented Free Software / Open Source implementation From afbf5bf364646748cbf2bb9016874c77a0bc824e Mon Sep 17 00:00:00 2001 From: Magnus Larsen Date: Fri, 27 Jun 2025 19:17:47 -0700 Subject: [PATCH 05/10] Fix minor stylistic clippy lints --- src/euc_kr.rs | 2 +- src/lib.rs | 8 +++----- src/mem.rs | 2 +- src/utf_16.rs | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/euc_kr.rs b/src/euc_kr.rs index ab92b0f..26e6116 100644 --- a/src/euc_kr.rs +++ b/src/euc_kr.rs @@ -255,7 +255,7 @@ fn ksx1001_encode_hangul(bmp: u16, _: u16) -> (u8, u8) { } else { 0x41 }; - (lead as u8, (cp949_trail + offset) as u8) + (lead, (cp949_trail + offset)) } } } diff --git a/src/lib.rs b/src/lib.rs index 956569e..11d3028 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2748,7 +2748,7 @@ impl Encoding { pub fn for_label(label: &[u8]) -> Option<&'static Encoding> { let mut trimmed = [0u8; LONGEST_LABEL_LENGTH]; let mut trimmed_pos = 0usize; - let mut iter = label.into_iter(); + let mut iter = label.iter(); // before loop { match iter.next() { @@ -3454,7 +3454,7 @@ impl Encoding { impl PartialEq for Encoding { #[inline] fn eq(&self, other: &Encoding) -> bool { - (self as *const Encoding) == (other as *const Encoding) + ::core::ptr::eq(self, other) } } @@ -4354,9 +4354,7 @@ impl Decoder { /// Available via the C wrapper. pub fn latin1_byte_compatible_up_to(&self, bytes: &[u8]) -> Option { match self.life_cycle { - DecoderLifeCycle::Converting => { - return self.variant.latin1_byte_compatible_up_to(bytes); - } + DecoderLifeCycle::Converting => self.variant.latin1_byte_compatible_up_to(bytes), DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished."), _ => None, } diff --git a/src/mem.rs b/src/mem.rs index 92941bb..f9d1bcc 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -2079,7 +2079,7 @@ pub fn utf8_latin1_up_to(buffer: &[u8]) -> usize { /// Returns the index of first byte that starts a non-Latin1 byte /// sequence, or the length of the string if there are none. pub fn str_latin1_up_to(buffer: &str) -> usize { - is_str_latin1_impl(buffer).unwrap_or_else(|| buffer.len()) + is_str_latin1_impl(buffer).unwrap_or(buffer.len()) } /// Replaces unpaired surrogates in the input with the REPLACEMENT CHARACTER. diff --git a/src/utf_16.rs b/src/utf_16.rs index f24806d..de402cf 100644 --- a/src/utf_16.rs +++ b/src/utf_16.rs @@ -145,7 +145,7 @@ impl Utf16Decoder { // The previous high surrogate was in // error and this one becomes the new // pending one. - self.lead_surrogate = code_unit as u16; + self.lead_surrogate = code_unit; return ( DecoderResult::Malformed(2, 2), unread_handle.consumed(), From db4dff8ca737faccba3de3fa20d1f30084259491 Mon Sep 17 00:00:00 2001 From: Magnus Larsen Date: Fri, 11 Jul 2025 19:20:52 -0700 Subject: [PATCH 06/10] Fix clippy::needless_borrow lint --- src/mem.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mem.rs b/src/mem.rs index f9d1bcc..32c016a 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -566,7 +566,7 @@ cfg_if! { } } } - let mut iter = (&buffer[offset..]).iter(); + let mut iter = buffer[offset..].iter(); loop { if let Some(&u) = iter.next() { if u > 0xFF { @@ -623,7 +623,7 @@ cfg_if! { } } } - let mut iter = (&buffer[offset..]).iter(); + let mut iter = buffer[offset..].iter(); loop { if let Some(&u) = iter.next() { if u > 0xFF { From e3b52e9c182f1aabe7e61ac7e0f20941bb92c57d Mon Sep 17 00:00:00 2001 From: Magnus Larsen Date: Fri, 27 Jun 2025 19:35:29 -0700 Subject: [PATCH 07/10] Fix minor stylistic clippy lints in simd_funcs --- src/mem.rs | 9 ++------- src/simd_funcs.rs | 4 ++-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/mem.rs b/src/mem.rs index 32c016a..b065b77 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -229,7 +229,7 @@ macro_rules! by_unit_check_simd { let mut simd_accu = $splat; while offset <= len_minus_stride { // Safety: the above check lets us perform one $simd_ty read. - simd_accu = simd_accu | unsafe { *(src.add(offset) as *const $simd_ty) }; + simd_accu |= unsafe { *(src.add(offset) as *const $simd_ty) }; offset += SIMD_STRIDE_SIZE / unit_size; } if !$func(simd_accu) { @@ -412,12 +412,7 @@ cfg_if! { } } } - for i in offset..len { - if bytes[i] > 0xC3 { - return Some(i); - } - } - None + bytes[offset..len].iter().position(|&byte| byte > 0xC3) } } else { #[inline(always)] diff --git a/src/simd_funcs.rs b/src/simd_funcs.rs index 11aabc4..e88291f 100644 --- a/src/simd_funcs.rs +++ b/src/simd_funcs.rs @@ -289,7 +289,7 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool { // Quick refutation failed. Let's do the full check. any_mask16x8( - (in_range16x8!(s, 0x0590, 0x0900) + in_range16x8!(s, 0x0590, 0x0900) | in_range16x8!(s, 0xFB1D, 0xFE00) | in_range16x8!(s, 0xFE70, 0xFEFF) | in_range16x8!(s, 0xD802, 0xD804) @@ -297,7 +297,7 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool { | s.simd_eq(u16x8::splat(0x200F)) | s.simd_eq(u16x8::splat(0x202B)) | s.simd_eq(u16x8::splat(0x202E)) - | s.simd_eq(u16x8::splat(0x2067))), + | s.simd_eq(u16x8::splat(0x2067)), ) } From f8ac96466ae7a1255f759fa70f012fc2f6638b6f Mon Sep 17 00:00:00 2001 From: Magnus Larsen Date: Fri, 27 Jun 2025 19:38:22 -0700 Subject: [PATCH 08/10] Allow simd_funcs' unused imports Alternative solutions include switching to a glob import, or removing the unused imports entirely. --- src/simd_funcs.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/simd_funcs.rs b/src/simd_funcs.rs index e88291f..107b3aa 100644 --- a/src/simd_funcs.rs +++ b/src/simd_funcs.rs @@ -7,18 +7,14 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use any_all_workaround::all_mask16x8; -use any_all_workaround::all_mask8x16; -use any_all_workaround::any_mask16x8; -use any_all_workaround::any_mask8x16; +#[allow(unused)] +use any_all_workaround::{all_mask16x8, all_mask8x16, any_mask16x8, any_mask8x16}; use core::simd::cmp::SimdPartialEq; use core::simd::cmp::SimdPartialOrd; -use core::simd::mask16x8; -use core::simd::mask8x16; use core::simd::simd_swizzle; -use core::simd::u16x8; -use core::simd::u8x16; use core::simd::ToBytes; +#[allow(unused)] +use core::simd::{mask16x8, mask8x16, u16x8, u8x16}; /// Safety invariant: ptr must be valid for an unaligned read of 16 bytes #[inline(always)] @@ -74,10 +70,12 @@ pub unsafe fn store8_aligned(ptr: *mut u16, s: u16x8) { cfg_if! { if #[cfg(all(target_feature = "sse2", target_arch = "x86_64"))] { + #[allow(unused)] use core::arch::x86_64::__m128i; use core::arch::x86_64::_mm_movemask_epi8; use core::arch::x86_64::_mm_packus_epi16; } else if #[cfg(all(target_feature = "sse2", target_arch = "x86"))] { + #[allow(unused)] use core::arch::x86::__m128i; use core::arch::x86::_mm_movemask_epi8; use core::arch::x86::_mm_packus_epi16; From 4919453f1dd8fc3c8f4f0b484b70e4f7098c7e3a Mon Sep 17 00:00:00 2001 From: Magnus Larsen Date: Fri, 11 Jul 2025 14:04:11 -0700 Subject: [PATCH 09/10] Fix copypaste typo in convert_latin1_to_utf8 doc --- src/mem.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mem.rs b/src/mem.rs index b065b77..cb27b3c 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -1780,7 +1780,7 @@ pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) { /// # Safety /// /// If you want to convert into a `&mut str`, use -/// `convert_utf16_to_str_partial()` instead of using this function +/// `convert_latin1_to_str_partial()` instead of using this function /// together with the `unsafe` method `as_bytes_mut()` on `&mut str`. pub fn convert_latin1_to_utf8_partial(src: &[u8], dst: &mut [u8]) -> (usize, usize) { let src_len = src.len(); From 5283f8f88a4a2a0a47fcbe9149a2962a919f6790 Mon Sep 17 00:00:00 2001 From: Magnus Larsen Date: Sat, 2 Aug 2025 23:03:33 -0700 Subject: [PATCH 10/10] Remove mention of unsafe from safe code sample in docs When this doc comment was first written in commit 2d67eca87631293efe6fc3906c0ef341ffa565e6, there was some unsafe-- but this is no longer the case. Closes issue #72 --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 11d3028..22ad3d1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -107,7 +107,7 @@ //! } //! ``` //! -//! Decode using the streaming API with minimal `unsafe`: +//! Decode using the streaming API: //! //! ``` //! use encoding_rs::*;