diff --git a/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/CaseSensitiveAnalyzer.scala b/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/CaseSensitiveAnalyzer.scala new file mode 100644 index 00000000000..ccba2be5654 --- /dev/null +++ b/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/CaseSensitiveAnalyzer.scala @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package edu.uci.ics.amber.operator.keywordSearch + +import org.apache.lucene.analysis.{Analyzer, TokenStream} +import org.apache.lucene.analysis.core.WhitespaceTokenizer +import org.apache.lucene.analysis.CharArraySet +import org.apache.lucene.analysis.StopFilter +import org.apache.lucene.analysis.Analyzer.TokenStreamComponents + +class CaseSensitiveAnalyzer extends Analyzer { + override protected def createComponents(fieldName: String): TokenStreamComponents = { + val tokenizer = new WhitespaceTokenizer() + val stream: TokenStream = new StopFilter(tokenizer, CharArraySet.EMPTY_SET) + new TokenStreamComponents(tokenizer, stream) + } +} diff --git a/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/KeywordSearchOpDesc.scala b/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/KeywordSearchOpDesc.scala index 6e655dd36ea..df416686775 100644 --- a/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/KeywordSearchOpDesc.scala +++ b/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/KeywordSearchOpDesc.scala @@ -43,6 +43,11 @@ class KeywordSearchOpDesc extends FilterOpDesc { @JsonPropertyDescription("keywords") var keyword: String = _ + @JsonProperty(required = true, defaultValue = "false") + @JsonSchemaTitle("Case Sensitive") + @JsonPropertyDescription("Whether the keyword is case sensitive or not") + var isCaseSensitive: Boolean = false + override def getPhysicalOp( workflowId: WorkflowIdentity, executionId: ExecutionIdentity diff --git a/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/KeywordSearchOpExec.scala b/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/KeywordSearchOpExec.scala index c2f2e6a3f6e..f28acde3f35 100644 --- a/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/KeywordSearchOpExec.scala +++ b/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/KeywordSearchOpExec.scala @@ -26,6 +26,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer import org.apache.lucene.index.memory.MemoryIndex import org.apache.lucene.queryparser.classic.QueryParser import org.apache.lucene.search.Query +import org.apache.lucene.analysis.Analyzer class KeywordSearchOpExec(descString: String) extends FilterOpExec { private val desc: KeywordSearchOpDesc = @@ -33,7 +34,11 @@ class KeywordSearchOpExec(descString: String) extends FilterOpExec { // We chose StandardAnalyzer because it provides more comprehensive tokenization, retaining numeric tokens and handling a broader range of characters. // This ensures that search functionality can include standalone numbers (e.g., "3") and complex queries while offering robust performance for most use cases. - @transient private lazy val analyzer = new StandardAnalyzer() + + @transient private lazy val analyzer: Analyzer = { + if (desc.isCaseSensitive) new CaseSensitiveAnalyzer() else new StandardAnalyzer() + } + @transient lazy val query: Query = new QueryParser(desc.attribute, analyzer).parse(desc.keyword) @transient private lazy val memoryIndex: MemoryIndex = new MemoryIndex()