diff --git a/Cargo.toml b/Cargo.toml index db273d1..a18bf48 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,11 +12,17 @@ description = "This is a WHOIS client library for Rust, inspired by https://gith license = "MIT" include = ["src/**/*", "Cargo.toml", "README.md", "LICENSE"] +[features] +default = ["chardet"] +chardet = ["dep:chardetng", "dep:encoding_rs"] + [dependencies] serde_json = "1" once_cell = "1" regex = "1" trust-dns-client = "0.23" +chardetng = { version = "0.1.17", optional = true } +encoding_rs = { version = "0.8.35", optional = true } tokio = { version = "1", features = ["fs", "net", "time", "io-util"], optional = true } diff --git a/src/who_is.rs b/src/who_is.rs index 3769e33..65181eb 100644 --- a/src/who_is.rs +++ b/src/who_is.rs @@ -34,15 +34,13 @@ static RE_SERVER: Lazy = Lazy::new(|| { #[derive(Debug, Clone)] pub struct WhoIs { map: HashMap, - ip: WhoIsServerValue, + ip: WhoIsServerValue, } impl WhoIs { /// Create a `WhoIs` instance which doesn't have a WHOIS server list. You should provide the host that is used for query ip. You may want to use the host `"whois.arin.net"`. pub fn from_host>(host: T) -> Result { - Ok(Self { - map: HashMap::new(), ip: WhoIsServerValue::from_string(host)? - }) + Ok(Self { map: HashMap::new(), ip: WhoIsServerValue::from_string(host)? }) } /// Read the list of WHOIS servers (JSON data) from a file to create a `WhoIs` instance. @@ -114,10 +112,7 @@ impl WhoIs { } } - Ok(WhoIs { - map: new_map, - ip, - }) + Ok(WhoIs { map: new_map, ip }) } } @@ -175,7 +170,7 @@ impl WhoIs { false } - fn get_server_by_tld(&self, mut tld: &str) -> Option<&WhoIsServerValue> { + pub fn get_server_by_tld(&self, mut tld: &str) -> Option<&WhoIsServerValue> { let mut server; loop { @@ -202,6 +197,53 @@ impl WhoIs { server } + #[cfg(not(feature = "chardet"))] + fn decode_string_sync( + mut client: TcpStream, + query_result: &mut String, + ) -> Result<(), std::io::Error> { + client.read_to_string(query_result)?; + + Ok(()) + } + + #[cfg(feature = "chardet")] + fn decode_string_sync( + mut client: TcpStream, + query_result: &mut String, + ) -> Result<(), WhoIsError> { + let mut bytes = Vec::new(); + + client.read_to_end(&mut bytes)?; + + let utf8_res = std::str::from_utf8(&bytes); + + if let Ok(utf8_data) = utf8_res { + *query_result = utf8_data.to_string(); + } else { + *query_result = Self::decode_non_utf8(&bytes)?; + } + + Ok(()) + } + + #[cfg(feature = "chardet")] + fn decode_non_utf8(bytes: &[u8]) -> Result { + // Use chardet + let encoding = chardetng::EncodingDetector::new().guess(Some(bytes), false); + + let mut utf16_bytes = Vec::new(); + + let (result, _, _, _) = + encoding.new_decoder().decode_to_utf16(bytes, &mut utf16_bytes, true); + + if !matches!(result, encoding_rs::CoderResult::InputEmpty) { + return Err(WhoIsError::OutputFull) + } + + String::from_utf16(&utf16_bytes).map_err(|e| e.into()) + } + fn lookup_once( server: &WhoIsServerValue, text: &str, @@ -245,7 +287,7 @@ impl WhoIs { let mut query_result = String::new(); - client.read_to_string(&mut query_result)?; + Self::decode_string_sync(client, &mut query_result)?; Ok((addr, query_result)) } @@ -319,6 +361,36 @@ impl WhoIs { #[cfg(feature = "tokio")] impl WhoIs { + #[cfg(not(feature = "chardet"))] + async fn decode_string_async( + mut client: tokio::net::TcpStream, + query_result: &mut String, + ) -> Result<(), tokio::io::Error> { + client.read_to_string(query_result).await?; + + Ok(()) + } + + #[cfg(feature = "chardet")] + async fn decode_string_async( + mut client: tokio::net::TcpStream, + query_result: &mut String, + ) -> Result<(), WhoIsError> { + let mut bytes = Vec::new(); + + client.read_to_end(&mut bytes).await?; + + let utf8_res = std::str::from_utf8(&bytes); + + if let Ok(utf8_data) = utf8_res { + *query_result = utf8_data.to_string(); + } else { + *query_result = Self::decode_non_utf8(&bytes)?; + } + + Ok(()) + } + async fn lookup_inner_once_async<'a>( server: &WhoIsServerValue, text: &str, @@ -366,7 +438,8 @@ impl WhoIs { let mut query_result = String::new(); - tokio::time::timeout(timeout, client.read_to_string(&mut query_result)).await??; + tokio::time::timeout(timeout, Self::decode_string_async(client, &mut query_result)) + .await??; Ok((addr, query_result)) } else { @@ -384,7 +457,7 @@ impl WhoIs { let mut query_result = String::new(); - client.read_to_string(&mut query_result).await?; + Self::decode_string_async(client, &mut query_result).await?; Ok((addr, query_result)) } diff --git a/src/who_is_error.rs b/src/who_is_error.rs index 38bfdaf..2522a41 100644 --- a/src/who_is_error.rs +++ b/src/who_is_error.rs @@ -14,6 +14,10 @@ pub enum WhoIsError { SerdeJsonError(serde_json::Error), IOError(io::Error), HostError(HostError), + #[cfg(feature = "chardet")] + Utf16Error(std::string::FromUtf16Error), + #[cfg(feature = "chardet")] + OutputFull, #[cfg(feature = "tokio")] Elapsed(tokio::time::error::Elapsed), /// This kind of errors is recommended to be panic! @@ -49,6 +53,14 @@ impl From for WhoIsError { } } +#[cfg(feature = "chardet")] +impl From for WhoIsError { + #[inline] + fn from(error: std::string::FromUtf16Error) -> Self { + WhoIsError::Utf16Error(error) + } +} + impl Display for WhoIsError { #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> { @@ -58,6 +70,12 @@ impl Display for WhoIsError { WhoIsError::HostError(error) => Display::fmt(error, f), #[cfg(feature = "tokio")] WhoIsError::Elapsed(error) => Display::fmt(error, f), + #[cfg(feature = "chardet")] + WhoIsError::Utf16Error(error) => Display::fmt(error, f), + #[cfg(feature = "chardet")] + WhoIsError::OutputFull => f.write_str( + "The output buffer was filled before the string could be decoded to utf16", + ), WhoIsError::MapError(text) => f.write_str(text), } } diff --git a/src/who_is_lookup_options.rs b/src/who_is_lookup_options.rs index 8e571ea..a0b4eff 100644 --- a/src/who_is_lookup_options.rs +++ b/src/who_is_lookup_options.rs @@ -11,11 +11,11 @@ const DEFAULT_TIMEOUT: u64 = 60000; #[derive(Debug, Clone)] pub struct WhoIsLookupOptions { /// The target that you want to lookup. - pub target: Target, + pub target: Target, /// The WHOIS server that you want to use. If it is **None**, an appropriate WHOIS server will be chosen from the list of WHOIS servers that the `WhoIs` instance have. The default value is **None**. - pub server: Option, + pub server: Option, /// Number of times to follow redirects. The default value is 2. - pub follow: u16, + pub follow: u16, /// Socket timeout in milliseconds. The default value is 60000. pub timeout: Option, } diff --git a/src/who_is_server_value.rs b/src/who_is_server_value.rs index 62cb26e..d3ab85c 100644 --- a/src/who_is_server_value.rs +++ b/src/who_is_server_value.rs @@ -8,8 +8,8 @@ const DEFAULT_PUNYCODE: bool = true; /// The model of a WHOIS server. #[derive(Debug, Clone)] pub struct WhoIsServerValue { - pub host: WhoIsHost, - pub query: Option, + pub host: WhoIsHost, + pub query: Option, pub punycode: bool, } @@ -56,11 +56,7 @@ impl WhoIsServerValue { None => DEFAULT_PUNYCODE, }; - Ok(WhoIsServerValue { - host, - query, - punycode, - }) + Ok(WhoIsServerValue { host, query, punycode }) }, _ => Err(WhoIsError::MapError( "The server value is an object, but it has not a host string.", @@ -82,10 +78,6 @@ impl WhoIsServerValue { }, }; - Ok(WhoIsServerValue { - host, - query: None, - punycode: DEFAULT_PUNYCODE, - }) + Ok(WhoIsServerValue { host, query: None, punycode: DEFAULT_PUNYCODE }) } }