Skip to content

to GBK and to UTF8 is not right work #128

@whereisyou

Description

@whereisyou

i have a GBK string, GBK.decode(rst_raw, DecoderTrap::Strict).is_err() and UTF_8.decode(rst_raw, DecoderTrap::Strict).is_err() can not judge right result.i don't why, so i writed judge "utf8 str" code:
fn is_utf8(data: &[u8]) -> bool {
let mut i = 0;
while i < data.len() {
let num = preNUm(data[i]);
if data[i] & 0x80 == 0x00 {
i += 1;
continue;
} else if num > 2 {
i += 1;
let mut j = 0;
while j < num -1 {
if data[i] & 0xc0 != 0x80 {
return false;
}
j += 1;
i += 1;
}
} else {
return false;
}

}
return true;

}

fn preNUm(data: u8) -> i32 {
let rst = format!("{:b}", data);
let mut i = 0;
for j in rst.chars() {
if j != '1' {
break;
}
i += 1;
}
return i;
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions