Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"git.ignoreLimitWarning": true,
"files.eol": "\n",
"editor.formatOnSave": true,
"files.exclude": {
"tmp/**": true
},
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
},
"[markdown]": {
"editor.formatOnSave": true,
"editor.formatOnPaste": true
},
"markdownlint.config": {
"MD013": false,
"MD024": false
},
"cSpell.diagnosticLevel": "Hint",
}
8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[package]
name = "encoding_rs"
description = "A Gecko-oriented implementation of the Encoding Standard"
version = "0.8.35"
edition = '2018'
version = "0.8.36"
edition = '2024'
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
license = "(Apache-2.0 OR MIT) AND BSD-3-Clause"
include = ["src/**/*.rs", "/data", "Cargo.toml", "COPYRIGHT", "LICENSE*", "README.md"]
Expand All @@ -12,7 +12,7 @@ homepage = "https://docs.rs/encoding_rs/"
repository = "https://github.com/hsivonen/encoding_rs"
keywords = ["encoding", "web", "unicode", "charset"]
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
rust-version = "1.40"
rust-version = "1.86"

[features]
default = ["alloc"]
Expand All @@ -39,7 +39,7 @@ any_all_workaround = { version = "0.1.0" , optional = true }

[dev-dependencies]
serde_derive = "1.0"
bincode = "1.0"
bincode = "2.0"
serde_json = "1.0"

[profile.release]
Expand Down
202 changes: 112 additions & 90 deletions src/ascii.rs

Large diffs are not rendered by default.

64 changes: 32 additions & 32 deletions src/big5.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,82 +279,82 @@ mod tests {
#[test]
fn test_big5_decode() {
// Empty
decode_big5(b"", &"");
decode_big5(b"", "");

// ASCII
decode_big5(&[0x61u8, 0x62u8], &"\u{0061}\u{0062}");
decode_big5(&[0x61u8, 0x62u8], "\u{0061}\u{0062}");

// Edge cases
decode_big5(&[0x87u8, 0x40u8], &"\u{43F0}");
decode_big5(&[0xFEu8, 0xFEu8], &"\u{79D4}");
decode_big5(&[0xFEu8, 0xFDu8], &"\u{2910D}");
decode_big5(&[0x88u8, 0x62u8], &"\u{00CA}\u{0304}");
decode_big5(&[0x88u8, 0x64u8], &"\u{00CA}\u{030C}");
decode_big5(&[0x88u8, 0x66u8], &"\u{00CA}");
decode_big5(&[0x88u8, 0xA3u8], &"\u{00EA}\u{0304}");
decode_big5(&[0x88u8, 0xA5u8], &"\u{00EA}\u{030C}");
decode_big5(&[0x88u8, 0xA7u8], &"\u{00EA}");
decode_big5(&[0x99u8, 0xD4u8], &"\u{8991}");
decode_big5(&[0x99u8, 0xD5u8], &"\u{27967}");
decode_big5(&[0x99u8, 0xD6u8], &"\u{8A29}");
decode_big5(&[0x87u8, 0x40u8], "\u{43F0}");
decode_big5(&[0xFEu8, 0xFEu8], "\u{79D4}");
decode_big5(&[0xFEu8, 0xFDu8], "\u{2910D}");
decode_big5(&[0x88u8, 0x62u8], "\u{00CA}\u{0304}");
decode_big5(&[0x88u8, 0x64u8], "\u{00CA}\u{030C}");
decode_big5(&[0x88u8, 0x66u8], "\u{00CA}");
decode_big5(&[0x88u8, 0xA3u8], "\u{00EA}\u{0304}");
decode_big5(&[0x88u8, 0xA5u8], "\u{00EA}\u{030C}");
decode_big5(&[0x88u8, 0xA7u8], "\u{00EA}");
decode_big5(&[0x99u8, 0xD4u8], "\u{8991}");
decode_big5(&[0x99u8, 0xD5u8], "\u{27967}");
decode_big5(&[0x99u8, 0xD6u8], "\u{8A29}");

// Edge cases surrounded with ASCII
decode_big5(
&[0x61u8, 0x87u8, 0x40u8, 0x62u8],
&"\u{0061}\u{43F0}\u{0062}",
"\u{0061}\u{43F0}\u{0062}",
);
decode_big5(
&[0x61u8, 0xFEu8, 0xFEu8, 0x62u8],
&"\u{0061}\u{79D4}\u{0062}",
"\u{0061}\u{79D4}\u{0062}",
);
decode_big5(
&[0x61u8, 0xFEu8, 0xFDu8, 0x62u8],
&"\u{0061}\u{2910D}\u{0062}",
"\u{0061}\u{2910D}\u{0062}",
);
decode_big5(
&[0x61u8, 0x88u8, 0x62u8, 0x62u8],
&"\u{0061}\u{00CA}\u{0304}\u{0062}",
"\u{0061}\u{00CA}\u{0304}\u{0062}",
);
decode_big5(
&[0x61u8, 0x88u8, 0x64u8, 0x62u8],
&"\u{0061}\u{00CA}\u{030C}\u{0062}",
"\u{0061}\u{00CA}\u{030C}\u{0062}",
);
decode_big5(
&[0x61u8, 0x88u8, 0x66u8, 0x62u8],
&"\u{0061}\u{00CA}\u{0062}",
"\u{0061}\u{00CA}\u{0062}",
);
decode_big5(
&[0x61u8, 0x88u8, 0xA3u8, 0x62u8],
&"\u{0061}\u{00EA}\u{0304}\u{0062}",
"\u{0061}\u{00EA}\u{0304}\u{0062}",
);
decode_big5(
&[0x61u8, 0x88u8, 0xA5u8, 0x62u8],
&"\u{0061}\u{00EA}\u{030C}\u{0062}",
"\u{0061}\u{00EA}\u{030C}\u{0062}",
);
decode_big5(
&[0x61u8, 0x88u8, 0xA7u8, 0x62u8],
&"\u{0061}\u{00EA}\u{0062}",
"\u{0061}\u{00EA}\u{0062}",
);
decode_big5(
&[0x61u8, 0x99u8, 0xD4u8, 0x62u8],
&"\u{0061}\u{8991}\u{0062}",
"\u{0061}\u{8991}\u{0062}",
);
decode_big5(
&[0x61u8, 0x99u8, 0xD5u8, 0x62u8],
&"\u{0061}\u{27967}\u{0062}",
"\u{0061}\u{27967}\u{0062}",
);
decode_big5(
&[0x61u8, 0x99u8, 0xD6u8, 0x62u8],
&"\u{0061}\u{8A29}\u{0062}",
"\u{0061}\u{8A29}\u{0062}",
);

// Bad sequences
decode_big5(&[0x80u8, 0x61u8], &"\u{FFFD}\u{0061}");
decode_big5(&[0xFFu8, 0x61u8], &"\u{FFFD}\u{0061}");
decode_big5(&[0xFEu8, 0x39u8], &"\u{FFFD}\u{0039}");
decode_big5(&[0x87u8, 0x66u8], &"\u{FFFD}\u{0066}");
decode_big5(&[0x81u8, 0x40u8], &"\u{FFFD}\u{0040}");
decode_big5(&[0x61u8, 0x81u8], &"\u{0061}\u{FFFD}");
decode_big5(&[0x80u8, 0x61u8], "\u{FFFD}\u{0061}");
decode_big5(&[0xFFu8, 0x61u8], "\u{FFFD}\u{0061}");
decode_big5(&[0xFEu8, 0x39u8], "\u{FFFD}\u{0039}");
decode_big5(&[0x87u8, 0x66u8], "\u{FFFD}\u{0066}");
decode_big5(&[0x81u8, 0x40u8], "\u{FFFD}\u{0040}");
decode_big5(&[0x61u8, 0x81u8], "\u{0061}\u{FFFD}");
}

#[test]
Expand Down
7 changes: 2 additions & 5 deletions src/euc_jp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,7 @@ enum EucJpPending {

impl EucJpPending {
fn is_none(&self) -> bool {
match *self {
EucJpPending::None => true,
_ => false,
}
matches!(*self, EucJpPending::None)
}

fn count(&self) -> usize {
Expand Down Expand Up @@ -362,7 +359,7 @@ mod tests {
#[test]
fn test_euc_jp_decode() {
// Empty
decode_euc_jp(b"", &"");
decode_euc_jp(b"", "");

// ASCII
decode_euc_jp(b"\x61\x62", "\u{0061}\u{0062}");
Expand Down
4 changes: 2 additions & 2 deletions src/euc_kr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ fn ksx1001_encode_hangul(bmp: u16, _: u16) -> (u8, u8) {
} else {
0x41
};
(lead as u8, (cp949_trail + offset) as u8)
(lead, cp949_trail + offset)
}
}
}
Expand Down Expand Up @@ -378,7 +378,7 @@ mod tests {
#[test]
fn test_euc_kr_decode() {
// Empty
decode_euc_kr(b"", &"");
decode_euc_kr(b"", "");

// ASCII
decode_euc_kr(b"\x61\x62", "\u{0061}\u{0062}");
Expand Down
9 changes: 3 additions & 6 deletions src/gb18030.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,7 @@ enum Gb18030Pending {

impl Gb18030Pending {
fn is_none(&self) -> bool {
match *self {
Gb18030Pending::None => true,
_ => false,
}
matches!(*self, Gb18030Pending::None)
}

fn count(&self) -> usize {
Expand Down Expand Up @@ -270,7 +267,7 @@ impl Gb18030Decoder {
} else {
handle.write_bmp_excl_ascii(gb18030_range_decode(pointer as u16))
}
} else if pointer >= 189_000 && pointer <= 1_237_575 {
} else if (189_000..=1_237_575).contains(&pointer) {
// Astral
handle.write_astral((pointer - (189_000usize - 0x1_0000usize)) as u32)
} else {
Expand Down Expand Up @@ -602,7 +599,7 @@ mod tests {
#[test]
fn test_gb18030_decode() {
// Empty
decode_gb18030(b"", &"");
decode_gb18030(b"", "");

// ASCII
decode_gb18030(b"\x61\x62", "\u{0061}\u{0062}");
Expand Down
19 changes: 11 additions & 8 deletions src/handles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ pub struct ByteSource<'a> {

impl<'a> ByteSource<'a> {
#[inline(always)]
pub fn new(src: &[u8]) -> ByteSource {
pub fn new(src: &'a [u8]) -> ByteSource<'a> {
ByteSource { slice: src, pos: 0 }
}
#[inline(always)]
Expand Down Expand Up @@ -594,7 +594,7 @@ pub struct Utf16Destination<'a> {

impl<'a> Utf16Destination<'a> {
#[inline(always)]
pub fn new(dst: &mut [u16]) -> Utf16Destination {
pub fn new(dst: &'a mut [u16]) -> Utf16Destination<'a> {
Utf16Destination { slice: dst, pos: 0 }
}
#[inline(always)]
Expand Down Expand Up @@ -939,7 +939,7 @@ pub struct Utf8Destination<'a> {

impl<'a> Utf8Destination<'a> {
#[inline(always)]
pub fn new(dst: &mut [u8]) -> Utf8Destination {
pub fn new(dst: &mut [u8]) -> Utf8Destination<'_> {
Utf8Destination { slice: dst, pos: 0 }
}
#[inline(always)]
Expand Down Expand Up @@ -1116,7 +1116,7 @@ impl<'a> Utf8Destination<'a> {
// Validate first, then memcpy to let memcpy do its thing even for
// non-ASCII. (And potentially do something better than SSE2 for ASCII.)
let valid_len = utf8_valid_up_to(&src_remaining[..min_len]);
(&mut dst_remaining[..valid_len]).copy_from_slice(&src_remaining[..valid_len]);
dst_remaining[..valid_len].copy_from_slice(&src_remaining[..valid_len]);
source.pos += valid_len;
self.pos += valid_len;
}
Expand Down Expand Up @@ -1164,7 +1164,7 @@ pub struct Utf16Source<'a> {

impl<'a> Utf16Source<'a> {
#[inline(always)]
pub fn new(src: &[u16]) -> Utf16Source {
pub fn new(src: &[u16]) -> Utf16Source<'_> {
Utf16Source { slice: src, pos: 0 }
}
#[inline(always)]
Expand Down Expand Up @@ -1272,6 +1272,7 @@ impl<'a> Utf16Source<'a> {
Some((non_ascii, consumed)) => {
self.pos += consumed;
dest.advance(consumed);
#[allow(clippy::len_zero)]
if dest.remaining().len() >= 1 {
self.pos += 1; // commit to reading `non_ascii`
let unit = non_ascii;
Expand Down Expand Up @@ -1466,7 +1467,7 @@ pub struct Utf8Source<'a> {

impl<'a> Utf8Source<'a> {
#[inline(always)]
pub fn new(src: &str) -> Utf8Source {
pub fn new(src: &str) -> Utf8Source<'_> {
Utf8Source {
slice: src.as_bytes(),
pos: 0,
Expand Down Expand Up @@ -1614,6 +1615,7 @@ impl<'a> Utf8Source<'a> {
Some((non_ascii, consumed)) => {
self.pos += consumed;
dest.advance(consumed);
#[allow(clippy::len_zero)]
if dest.remaining().len() >= 1 {
if non_ascii < 0xE0 {
let point = ((u16::from(non_ascii) & 0x1F) << 6)
Expand Down Expand Up @@ -1922,18 +1924,19 @@ pub struct ByteDestination<'a> {

impl<'a> ByteDestination<'a> {
#[inline(always)]
pub fn new(dst: &mut [u8]) -> ByteDestination {
pub fn new(dst: &mut [u8]) -> ByteDestination<'_> {
ByteDestination {
start: dst.as_ptr(),
slice: dst,
}
}
#[inline(always)]
pub fn remaining(&mut self) -> &mut [u8] {
&mut self.slice
self.slice
}
#[inline(always)]
pub fn check_space_one<'b>(&'b mut self) -> Space<ByteOneHandle<'b, 'a>> {
#[allow(clippy::len_zero)]
if self.slice.len() >= 1 {
Space::Available(ByteOneHandle::new(self))
} else {
Expand Down
13 changes: 6 additions & 7 deletions src/iso_2022_jp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ impl Iso2022JpDecoder {
continue;
}
self.output_flag = false;
if b >= 0x21u8 && b <= 0x5Fu8 {
if (0x21u8..=0x5Fu8).contains(&b) {
destination_handle.write_upper_bmp(u16::from(b) - 0x21u16 + 0xFF61u16);
continue;
}
Expand All @@ -206,7 +206,7 @@ impl Iso2022JpDecoder {
continue;
}
self.output_flag = false;
if b >= 0x21u8 && b <= 0x7Eu8 {
if (0x21u8..=0x7Eu8).contains(&b) {
self.lead = b;
self.decoder_state = Iso2022JpDecoderState::TrailByte;
continue;
Expand Down Expand Up @@ -376,6 +376,7 @@ fn is_kanji_mapped(bmp: u16) -> bool {
#[allow(clippy::redundant_pattern_matching, clippy::if_same_then_else)]
#[inline(always)]
fn is_kanji_mapped(bmp: u16) -> bool {
#[allow(clippy::match_like_matches_macro)]
if 0x4EDD == bmp {
true
} else if let Some(_) = jis0208_level1_kanji_shift_jis_encode(bmp) {
Expand Down Expand Up @@ -407,6 +408,7 @@ fn is_mapped_for_two_byte_encode(bmp: u16) -> bool {
true
} else {
let bmp_minus_space = bmp.wrapping_sub(0x3000);
#[allow(clippy::match_like_matches_macro)]
if bmp_minus_space < 3 {
// fast-track common punctuation
true
Expand Down Expand Up @@ -477,10 +479,7 @@ impl Iso2022JpEncoder {
}

pub fn has_pending_state(&self) -> bool {
match self.state {
Iso2022JpEncoderState::Ascii => false,
_ => true,
}
!matches!(self.state, Iso2022JpEncoderState::Ascii)
}

pub fn max_buffer_length_from_utf16_without_replacement(
Expand Down Expand Up @@ -764,7 +763,7 @@ mod tests {
#[test]
fn test_iso_2022_jp_decode() {
// Empty
decode_iso_2022_jp(b"", &"");
decode_iso_2022_jp(b"", "");

// ASCII
decode_iso_2022_jp(b"\x61\x62", "\u{0061}\u{0062}");
Expand Down
Loading