diff --git a/Cargo.lock b/Cargo.lock index 2bbde22..1653aaa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -22,6 +22,7 @@ dependencies = [ name = "auris" version = "0.1.2" dependencies = [ + "bumpalo 3.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "criterion 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "nom 5.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] diff --git a/Cargo.toml b/Cargo.toml index ca2d046..d4cef45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,10 +6,15 @@ edition = "2018" license = "MIT" description = "A simple (incomplete) URI parser" + # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] nom = "5.1.1" +[dependencies.bumpalo] +version = "3.2.1" +features = ["collections"] + [dev-dependencies] criterion = "0.3" @@ -17,5 +22,5 @@ criterion = "0.3" name = "uri_parse" harness = false -[profile.release] -debug = true +# [profile.release] +# debug = true \ No newline at end of file diff --git a/README.md b/README.md index 1aba0a8..be14414 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ In the case of duplicated query string tags the last one wins: - [x] Ports - [x] Split up into multiple files - [x] Domains with . +- [x] Rendering of URIs and Authority with fmt::Display - [ ] Parsing IPv4, IPv6 - [ ] Parsing fragments - [ ] Percent encoding and decoding diff --git a/benches/uri_parse.rs b/benches/uri_parse.rs index 61d8f67..6baf0f4 100644 --- a/benches/uri_parse.rs +++ b/benches/uri_parse.rs @@ -1,5 +1,4 @@ use auris::parsers::uri; -use auris::URI; use criterion::{criterion_group, criterion_main, Criterion, Throughput}; fn criterion_benchmark(c: &mut Criterion) { @@ -15,5 +14,20 @@ fn criterion_benchmark(c: &mut Criterion) { }); } -criterion_group!(benches, criterion_benchmark); +fn bench_f(c: &mut Criterion) { + let mut group = c.benchmark_group("My own parser ffuu"); + + let string = "foo://user:pass@hotdog.com"; + group.bench_function("parsers::uri", |b| { + b.iter(|| auris::parsers::f(string)); + }); +} + +criterion_group! { + name = benches; + config = Criterion::default(); + targets = + criterion_benchmark, + bench_f +} criterion_main!(benches); diff --git a/src/lib.rs b/src/lib.rs index c85c3ad..23423c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,12 +3,39 @@ //! - Uses only safe features in rust. //! - `rfc2396` & `rfc3986` compliant (incomplete) //! +//! +//! ## Parses structure: +//! +//! ```notrust +//! foo://example.com:8042/over/there?name=ferret#nose +//! \_/ \______________/\_________/ \_________/ \__/ +//! | | | | | +//! scheme authority path query fragment +//! ``` +//! //! # Usage //! //! ``` -//! use auris::parsers; +//! use auris::URI; +//! +//! "postgres://user:password@host".parse::>(); //! -//! parsers::uri("https://crates.io/crates/auris"); +//! "https://crates.io/crates/auris".parse::>(); +//! ``` +//! +//! ## Query strings +//! +//! We also parse query strings into HashMaps: +//! +//! ``` +//! # use auris::URI; +//! "postgres://user:password@example.com/db?replication=true".parse::>(); +//! ``` +//! +//! In the case of duplicated query string tags the last one wins: +//! ``` +//! # use auris::URI; +//! "scheme://host/path?a=1&a=2".parse::>(); //! ``` extern crate nom; use std::str; @@ -21,18 +48,47 @@ use std::str::FromStr; pub mod parsers; -pub enum AurisParseErrorKind { +#[derive(Debug)] +pub enum ParseError { Failed, } -pub struct ParseError { - kind: AurisParseErrorKind, +#[derive(Debug)] +pub struct AurisErr { + kind: ParseError, } -impl fmt::Display for ParseError { +impl fmt::Display for AurisErr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match &self.kind { - AurisParseErrorKind::Failed => write!(f, "Parsing failed"), + ParseError::Failed => write!(f, "Parsing failed"), + } + } +} + +/// Make impossible authentication states unrepresentable +#[derive(Debug, PartialEq, Eq)] +pub enum UserInfo { + User(T), + UserAndPassword(T, T), +} + +impl UserInfo<&str> { + fn to_owned(&self) -> UserInfo { + match self { + UserInfo::User(d) => UserInfo::User((*d).to_string()), + UserInfo::UserAndPassword(u, p) => { + UserInfo::UserAndPassword((*u).to_string(), (*p).to_string()) + } + } + } +} + +impl fmt::Display for UserInfo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + UserInfo::User(user) => write!(f, "{}", user), + UserInfo::UserAndPassword(user, password) => write!(f, "{}:{}", user, password), } } } @@ -45,8 +101,7 @@ where { //TODO(bradford): IPV6, IPV4, DNS enum pub host: T, - pub username: Option, - pub password: Option, + pub userinfo: Option>, pub port: Option, } @@ -54,13 +109,44 @@ impl Authority<&str> { fn to_owned(&self) -> Authority { Authority { host: self.host.to_string(), - username: self.username.map(|u| u.to_string()), - password: self.password.map(|p| p.to_string()), + userinfo: self.userinfo.as_ref().map(|u| u.to_owned()), port: self.port, } } } +/// Converts the URI struct back to a string +/// +/// # Examples +/// ``` +/// use auris::{Authority, UserInfo}; +/// +/// assert_eq!("a:b@bob.com:443", +/// format!("{}", Authority { +/// host: "bob.com".to_string(), +/// userinfo: Some(UserInfo::UserAndPassword("a".to_string(), "b".to_string())), +/// port: Some(443), +/// })); +/// ``` +impl fmt::Display for Authority { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut formatted = String::new(); + // using a match as this feels cleaner than a map + let userinfo_string = match self.userinfo.as_ref() { + Some(userinfo) => format!("{}@", userinfo), + None => String::new(), + }; + formatted.push_str(&userinfo_string); + formatted.push_str(&self.host); + let port_string = match self.port.as_ref() { + Some(port) => format!(":{}", port), + None => String::new(), + }; + formatted.push_str(&port_string); + write!(f, "{}", formatted) + } +} + /// URI is the whole URI object /// /// # Examples @@ -102,17 +188,37 @@ impl URI<&str> { } impl FromStr for URI { - type Err = ParseError; + type Err = AurisErr; fn from_str(s: &str) -> Result { match parsers::uri(s) { Ok((_, obj)) => Ok(obj.to_owned()), - Err(_) => Err(ParseError { - kind: AurisParseErrorKind::Failed, + Err(_) => Err(AurisErr { + kind: ParseError::Failed, }), } } } +/// Converts the URI struct back to a string +/// +/// # Examples +/// ``` +/// use auris::URI; +/// +/// let parsed = "http://bob.com".parse::>().unwrap(); +/// +/// assert_eq!("http://bob.com", +/// format!("{}", parsed)); +/// ``` +impl fmt::Display for URI { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut formatted = String::new(); + formatted.push_str(&self.scheme); + formatted.push_str("://"); + formatted.push_str(&format!("{}", self.authority)); + write!(f, "{}", formatted) + } +} // The host name of an URL. pub enum Host { diff --git a/src/parsers.rs b/src/parsers.rs index 619967c..f5ff188 100644 --- a/src/parsers.rs +++ b/src/parsers.rs @@ -8,19 +8,57 @@ use nom::{ IResult, }; -use crate::{Authority, URI}; +use crate::{Authority, UserInfo, URI, ParseError}; use std::collections::HashMap; use std::str; -/// Parses structure: -/// -/// ```notrust -/// foo://example.com:8042/over/there?name=ferret#nose -/// \_/ \______________/\_________/ \_________/ \__/ -/// | | | | | -/// scheme authority path query fragment -/// ``` -/// +use bumpalo::Bump; +use bumpalo::collections::String as BString; + +#[inline] +pub fn is_alphabetic(chr:u8) -> bool { + (chr >= 0x41 && chr <= 0x5A) || (chr >= 0x61 && chr <= 0x7A) +} + +#[inline] +pub fn is_digit(chr: u8) -> bool { + chr >= 0x30 && chr <= 0x39 +} + +#[inline] +pub fn is_alphanumeric(chr: u8) -> bool { + is_alphabetic(chr) || is_digit(chr) +} + +pub fn span<'i>(input: &'i str, i_pos: usize, rest_pos: usize) -> &'i str { + &input[..i_pos - rest_pos] +} + +fn take<'a, F: Fn(char) -> bool>(f: F) -> impl Fn(&'a str) -> Result<(&'a str, &'a str), (ParseError, &str)> { + move |i: &str| { + let mut iter = i.chars(); + loop { + let rest = iter.as_str(); + match iter.next() { + Some(c) if f(c) => {} + _ => { + let rest_len = rest.len(); + let i_len = i.len(); + return if rest_len != i_len { + Ok((span(i, i_len, rest_len), rest)) + } else { + Err((ParseError::Failed, i)) + }; + } + } + } + } +} + +pub fn f(i: &str) -> Result<(&str, &str), (ParseError, &str)> { + let (i, scheme) = take(|f| is_alphanumeric(f as u8) && f != ':')(i)?; + Ok((i, scheme)) +} /// Parse out the scheme /// @@ -35,9 +73,15 @@ use std::str; /// // Guidelines for URL schemes // https://tools.ietf.org/html/rfc2718 -pub fn scheme(input: &str) -> IResult<&str, &str> { +pub fn scheme<'a>(input: &'a str) -> IResult<&'a str, &'a str>{ // postgres:// // bob:// + // let bump = Bump::new(); + // let bstr = BString::from_str_in(input, &bump); + + // let r = take(":")(&bstr)?; + // + // Ok(r) let (remaining, scheme_chunk) = take_till(|c| c == ':')(input)?; // :// is the hier part let (remaining_post_scheme, _) = tag("://")(remaining)?; @@ -60,34 +104,26 @@ fn host_port_combinator<'a>(input: &'a str) -> IResult<&'a str, (&'a str, Option Ok((i, (host, port))) } -/// Parse the user credentials from the authority section. We can -/// always expect this function to return a tuple of options. Instead of using -/// `Option<(Option<&str>, Option<&str>)>`, `(Option<&str>, Option<&str>)` is used -fn authority_credentials<'a>( - input: &'a str, -) -> IResult<&'a str, (Option<&'a str>, Option<&'a str>)> { - let user_pw_combinator = |i: &'a str| -> IResult<&str, (Option<&str>, Option<&str>)> { +/// Parse the user credentials from the authority section. +fn authority_credentials<'a>(input: &'a str) -> IResult<&'a str, Option>> { + let user_pw_combinator = |i: &'a str| -> IResult<&str, UserInfo<&str>> { // user:pw@ let (remain_chunk_1, user) = cut(alpha1)(i)?; let (remain_chunk_2, _) = tag(":")(remain_chunk_1)?; let (remain_chunk_3, password) = cut(alpha1)(remain_chunk_2)?; let (remain_chunk_4, _) = tag("@")(remain_chunk_3)?; - Ok((remain_chunk_4, (Some(user), Some(password)))) + Ok((remain_chunk_4, UserInfo::UserAndPassword(user, password))) }; // Parse user string without a password - let user_combinator = |i: &'a str| -> IResult<&str, (Option<&str>, Option<&str>)> { + let user_combinator = |i: &'a str| -> IResult<&str, UserInfo<&str>> { let (remain_chunk_1, user) = cut(alpha1)(i)?; let (remain_chunk_2, _) = tag("@")(remain_chunk_1)?; - Ok((remain_chunk_2, (Some(user), None))) + Ok((remain_chunk_2, UserInfo::User(user))) }; // The whole statement may fail if there is no match // we flatten this out so that you will just get (None, None) - let (remain, alt_opt) = opt(alt((user_pw_combinator, user_combinator)))(input)?; - match alt_opt { - Some(options) => Ok((remain, options)), - None => Ok((remain, (None, None))), - } + opt(alt((user_pw_combinator, user_combinator)))(input) } /// Parse the whole path chunk @@ -138,13 +174,12 @@ pub fn query<'a>(input: &'a str) -> IResult<&'a str, HashMap<&'a str, &'a str>> // postgres://user:pw@host:5432/db pub fn authority(input: &str) -> IResult<&str, Authority<&str>> { match all_consuming(tuple((authority_credentials, host_port_combinator)))(input) { - Ok((remaining_input, ((username, password), (host, port)))) => Ok(( + Ok((remaining_input, (userinfo, (host, port)))) => Ok(( remaining_input, Authority { host, - password, + userinfo, port, - username, }, )), Err(e) => Err(e), @@ -161,7 +196,7 @@ pub fn authority(input: &str) -> IResult<&str, Authority<&str>> { /// ``` pub fn uri(input: &str) -> IResult<&str, URI<&str>> { let (i, scheme) = scheme(input)?; - let (i, (username, password)) = authority_credentials(i)?; + let (i, userinfo) = authority_credentials(i)?; let (i, (host, port)) = host_port_combinator(i)?; let (i, path) = path(i)?; let (i, query) = opt(query)(i)?; @@ -172,8 +207,7 @@ pub fn uri(input: &str) -> IResult<&str, URI<&str>> { scheme, authority: Authority { host, - username, - password, + userinfo, port, }, path: Some(path), @@ -194,8 +228,7 @@ mod test { "", Authority { host: "bob".as_ref(), - password: Some("bob".as_ref()), - username: Some("bob".as_ref()), + userinfo: Some(UserInfo::UserAndPassword("bob".as_ref(), "bob".as_ref())), port: None } )) @@ -206,9 +239,8 @@ mod test { "", Authority { host: "b".as_ref(), - password: None, + userinfo: None, port: None, - username: None } )) ) @@ -218,7 +250,13 @@ mod test { fn test_user_info() { assert_eq!( authority_credentials("bob:password@host"), - Ok(("host", (Some("bob"), Some("password")))) + Ok(( + "host", + Some(UserInfo::UserAndPassword( + "bob".as_ref(), + "password".as_ref() + )) + )) ) } @@ -226,7 +264,7 @@ mod test { fn test_bad_user_info() { assert_eq!( authority_credentials("iamnotahost.com"), - Ok(("iamnotahost.com", (None, None))) + Ok(("iamnotahost.com", None)) ) } @@ -257,8 +295,7 @@ mod test { scheme: "a".as_ref(), authority: Authority { host: "d.e".as_ref(), - username: Some("b".as_ref()), - password: Some("c".as_ref()), + userinfo: Some(UserInfo::UserAndPassword("b".as_ref(), "c".as_ref())), port: None }, path: Some(vec!("f".as_ref(), "g".as_ref(), "h".as_ref())),