diff --git a/NEWS.md b/NEWS.md index 886a06dd4..f70be9ebf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -67,6 +67,7 @@ * Fix soft-deleted items being indexed into elasticsearch ([MSEARCH-1119](https://folio-org.atlassian.net/browse/MSEARCH-1119)) * Add error handling on upload range processing ([MSEARCH-1151](https://folio-org.atlassian.net/browse/MSEARCH-1151)) * Ignore shadow locations and location units while indexing domain events ([MSEARCH-1154](https://folio-org.atlassian.net/browse/MSEARCH-1154)) + * Change instance index folio_word_delimiter_graph to catenate_all, honor '*' in IsbnSearchTermProcessor ([MSEARCH-1011](https://folio-org.atlassian.net/browse/MSEARCH-1011)) ### Tech Dept * Migrate to Opensearch 3.0.0 ([MSEARCH-1033](https://folio-org.atlassian.net/browse/MSEARCH-1033)) diff --git a/src/main/java/org/folio/search/cql/searchterm/IsbnSearchTermProcessor.java b/src/main/java/org/folio/search/cql/searchterm/IsbnSearchTermProcessor.java index c7ab61b1f..0800b21b5 100644 --- a/src/main/java/org/folio/search/cql/searchterm/IsbnSearchTermProcessor.java +++ b/src/main/java/org/folio/search/cql/searchterm/IsbnSearchTermProcessor.java @@ -1,5 +1,7 @@ package org.folio.search.cql.searchterm; +import static org.folio.search.utils.SearchUtils.ASTERISKS_SIGN; + import lombok.RequiredArgsConstructor; import org.folio.search.service.setter.instance.IsbnProcessor; import org.springframework.stereotype.Component; @@ -12,6 +14,10 @@ public class IsbnSearchTermProcessor implements SearchTermProcessor { @Override public String getSearchTerm(String inputTerm) { - return String.join(" ", isbnProcessor.normalizeIsbn(inputTerm)); + var hasWildcard = inputTerm.endsWith(ASTERISKS_SIGN); + var termToNormalize = hasWildcard ? inputTerm.substring(0, inputTerm.length() - 1) : inputTerm; + var normalized = String.join(" ", isbnProcessor.normalizeIsbn(termToNormalize)); + + return hasWildcard ? normalized + ASTERISKS_SIGN : normalized; } } diff --git a/src/main/resources/elasticsearch/index/instance.json b/src/main/resources/elasticsearch/index/instance.json index fb97c70d7..fce46cf83 100644 --- a/src/main/resources/elasticsearch/index/instance.json +++ b/src/main/resources/elasticsearch/index/instance.json @@ -10,7 +10,7 @@ "filter": { "folio_word_delimiter_graph": { "type": "word_delimiter_graph", - "catenate_words": true + "catenate_all": true } }, "normalizer": { diff --git a/src/test/java/org/folio/search/cql/searchterm/IsbnSearchTermProcessorTest.java b/src/test/java/org/folio/search/cql/searchterm/IsbnSearchTermProcessorTest.java index b2ad47a33..f5b2cc04b 100644 --- a/src/test/java/org/folio/search/cql/searchterm/IsbnSearchTermProcessorTest.java +++ b/src/test/java/org/folio/search/cql/searchterm/IsbnSearchTermProcessorTest.java @@ -36,4 +36,20 @@ void getSearchTerm_positive_multipleValues() { var actual = isbnSearchTermProcessor.getSearchTerm(searchTerm); assertThat(actual).isEqualTo("1861972717 9781861972712 (paper)"); } + + @Test + void getSearchTerm_withTrailingWildcard() { + var searchTerm = "9781609383657*"; + when(isbnProcessor.normalizeIsbn("9781609383657")).thenReturn(List.of("9781609383657")); + var actual = isbnSearchTermProcessor.getSearchTerm(searchTerm); + assertThat(actual).isEqualTo("9781609383657*"); + } + + @Test + void getSearchTerm_withTrailingWildcard_isbn10() { + var searchTerm = "047144250X*"; + when(isbnProcessor.normalizeIsbn("047144250X")).thenReturn(List.of("047144250x", "9780471442509")); + var actual = isbnSearchTermProcessor.getSearchTerm(searchTerm); + assertThat(actual).isEqualTo("047144250x 9780471442509*"); + } } diff --git a/src/test/java/org/folio/search/service/setter/instance/IsbnProcessorTest.java b/src/test/java/org/folio/search/service/setter/instance/IsbnProcessorTest.java index cf599964f..7da96d47d 100644 --- a/src/test/java/org/folio/search/service/setter/instance/IsbnProcessorTest.java +++ b/src/test/java/org/folio/search/service/setter/instance/IsbnProcessorTest.java @@ -60,6 +60,54 @@ void getFieldValue_negative_failedToLoadReferenceData() { assertThat(actual).isEmpty(); } + @MethodSource("normalizeIsbnDataProvider") + @DisplayName("normalizeIsbn_parameterized") + @ParameterizedTest(name = "[{index}] input=''{0}'', expected={1}") + void normalizeIsbn_parameterized(String input, List expected) { + var actual = isbnProcessor.normalizeIsbn(input); + assertThat(actual).containsExactlyElementsOf(expected); + } + + @SuppressWarnings("checkstyle:MethodLength") + private static Stream normalizeIsbnDataProvider() { + return Stream.of( + // Empty/whitespace cases + arguments("", emptyList()), + arguments(" ", emptyList()), + + // Valid ISBN-10 (with valid checksum - converts to ISBN-13) + arguments(" 1-86197-271-7 ", List.of("1861972717", "9781861972712")), // Covers trimming + formatting + arguments("1 86197 271-7 (paper)", List.of("1861972717", "9781861972712", "(paper)")), + + // Invalid ISBN-10 checksum (normalized only, no conversion) + arguments("047144250X", List.of("047144250x")), + arguments("047144250X (paper)", List.of("047144250x (paper)")), + + // Invalid ISBN-10 format (non-standard spacing/hyphens) + arguments("1-86-197 271-7", List.of("1861972717")), // Invalid spacing + arguments("1 86197 2717 (paper)", List.of("1861972717 (paper)")), + + // Valid ISBN-13 + arguments("9781609383657", List.of("9781609383657")), + arguments("9790471442509", List.of("9790471442509")), // 979 prefix + + // ISBN-13 with formatting variations + arguments("978 0 471 44250 9", List.of("9780471442509")), // Multiple spaces + arguments("978 0 471 44250 9 (alk. paper)", List.of("9780471442509", "(alk. paper)")), + + // Invalid ISBN-13 (wrong prefix or malformed) + arguments("89780471442509 (alk. paper)", List.of("89780471442509 (alk. paper)")), + arguments("978-0 4712 442509 (alk. paper)", List.of("97804712442509 (alk. paper)")), + + // Valid ISBN-10 with qualifier treated as extra text + arguments("1861972717 extra text", List.of("1861972717", "9781861972712", "extra text")), + + // Non-ISBN strings (normalized with char removal) + arguments("ISBN 047144250X", List.of("isbn 047144250x")), + arguments("1 2 3 4 5", List.of("12345")) + ); + } + @SuppressWarnings("checkstyle:MethodLength") private static Stream isbnDataProvider() { return Stream.of( diff --git a/src/test/resources/test-resources/instance-search-test-queries.csv b/src/test/resources/test-resources/instance-search-test-queries.csv index c25b950b6..e7d6c30fa 100644 --- a/src/test/resources/test-resources/instance-search-test-queries.csv +++ b/src/test/resources/test-resources/instance-search-test-queries.csv @@ -393,4 +393,11 @@ Case,Query,Value 392,"issn = ""{value}""",0*-*x 393,"issn = ""{value}""",*X 394,"issn = ""{value}""",*x -395,"issn = ""{value}""",0040-781* \ No newline at end of file +395,"issn = ""{value}""",0040-781* +396,"isbn = ""{value}""",0471442* +397,"isbn = ""{value}""",047144250X* +398,"isbn == ""{value}""",047144250X +399,"isbn == ""{value}""",047144250 +400,"isbn = ""{value}""",9781609383* +401,"isbn = ""{value}""",9781609383657* +402,"isbn == ""{value}""",9781609383657 \ No newline at end of file