diff --git a/crates/core/src/query/mod.rs b/crates/core/src/query/mod.rs index fd9cad5ce..b2601a73e 100644 --- a/crates/core/src/query/mod.rs +++ b/crates/core/src/query/mod.rs @@ -1244,6 +1244,48 @@ mod tests { .unwrap(), ) .expect("failed to insert webpage"); + index + .insert( + &Webpage::test_parse( + &format!( + r#" + + + Test page with trailing slash + + + This is a test page with a trailing slash {} + + + "#, + rand_words(1000) + ), + "https://www.example.com/trailing-slash/", + ) + .unwrap(), + ) + .expect("failed to insert webpage"); + index + .insert( + &Webpage::test_parse( + &format!( + r#" + + + Test page with UPPERCASE link + + + This is a test page with an UPPERCASE link {} + + + "#, + rand_words(1000) + ), + "https://www.example.com/UPPERCASE", + ) + .unwrap(), + ) + .expect("failed to insert webpage"); index.commit().expect("failed to commit index"); let searcher = LocalSearcher::builder(Arc::new(RwLock::new(index))).build(); @@ -1260,6 +1302,34 @@ mod tests { }; let result = searcher.search_sync(&query).expect("Search failed"); assert_eq!(result.webpages.len(), 0); + + let query = SearchQuery { + query: "test exacturl:https://www.example.com/trailing-slash/".to_string(), + ..Default::default() + }; + let result = searcher.search_sync(&query).expect("Search failed"); + assert_eq!(result.webpages.len(), 1); + + let query = SearchQuery { + query: "test exacturl:https://www.example.com/trailing-slash".to_string(), + ..Default::default() + }; + let result = searcher.search_sync(&query).expect("Search failed"); + assert_eq!(result.webpages.len(), 0); + + let query = SearchQuery { + query: "test exacturl:https://www.example.com/UPPERCASE".to_string(), + ..Default::default() + }; + let result = searcher.search_sync(&query).expect("Search failed"); + assert_eq!(result.webpages.len(), 1); + + let query = SearchQuery { + query: "test exacturl:https://www.example.com/uppercase".to_string(), + ..Default::default() + }; + let result = searcher.search_sync(&query).expect("Search failed"); + assert_eq!(result.webpages.len(), 0); } #[test] diff --git a/crates/core/src/schema/text_field.rs b/crates/core/src/schema/text_field.rs index d430b00c3..4e80fe2d8 100644 --- a/crates/core/src/schema/text_field.rs +++ b/crates/core/src/schema/text_field.rs @@ -584,7 +584,7 @@ impl TextField for UrlNoTokenizer { } fn query_tokenizer(&self, _: Option<&whatlang::Lang>) -> FieldTokenizer { - FieldTokenizer::Words(WordTokenizer::default()) + FieldTokenizer::Identity(Identity {}) } fn is_searchable(&self) -> bool {