apache · SarahAsad23 · Jun 28, 2025 · Jun 28, 2025 · Jun 28, 2025 · Oct 11, 2025
diff --git a/...rator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/CaseSensitiveAnalyzer.scala b/...rator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/CaseSensitiveAnalyzer.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.amber.operator.keywordSearch
+
+import org.apache.lucene.analysis.{Analyzer, TokenStream}
+import org.apache.lucene.analysis.core.WhitespaceTokenizer
+import org.apache.lucene.analysis.CharArraySet
+import org.apache.lucene.analysis.StopFilter
+import org.apache.lucene.analysis.Analyzer.TokenStreamComponents
+
+class CaseSensitiveAnalyzer extends Analyzer {
+  override protected def createComponents(fieldName: String): TokenStreamComponents = {
+    val tokenizer = new WhitespaceTokenizer()
+    val stream: TokenStream = new StopFilter(tokenizer, CharArraySet.EMPTY_SET)
+    new TokenStreamComponents(tokenizer, stream)
+  }
+}
diff --git a/...perator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/KeywordSearchOpDesc.scala b/...perator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/KeywordSearchOpDesc.scala
@@ -43,6 +43,11 @@ class KeywordSearchOpDesc extends FilterOpDesc {
   @JsonPropertyDescription("keywords")
   var keyword: String = _
 
+  @JsonProperty(required = true, defaultValue = "false")
+  @JsonSchemaTitle("Case Sensitive")
+  @JsonPropertyDescription("Whether the keyword is case sensitive or not")
+  var isCaseSensitive: Boolean = false
+
   override def getPhysicalOp(
       workflowId: WorkflowIdentity,
       executionId: ExecutionIdentity

diff --git a/...perator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/KeywordSearchOpExec.scala b/...perator/src/main/scala/edu/uci/ics/amber/operator/keywordSearch/KeywordSearchOpExec.scala
@@ -26,14 +26,19 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer
 import org.apache.lucene.index.memory.MemoryIndex
 import org.apache.lucene.queryparser.classic.QueryParser
 import org.apache.lucene.search.Query
+import org.apache.lucene.analysis.Analyzer
 
 class KeywordSearchOpExec(descString: String) extends FilterOpExec {
   private val desc: KeywordSearchOpDesc =
     objectMapper.readValue(descString, classOf[KeywordSearchOpDesc])
 
   // We chose StandardAnalyzer because it provides more comprehensive tokenization, retaining numeric tokens and handling a broader range of characters.
   // This ensures that search functionality can include standalone numbers (e.g., "3") and complex queries while offering robust performance for most use cases.
-  @transient private lazy val analyzer = new StandardAnalyzer()
+
+  @transient private lazy val analyzer: Analyzer = {
+    if (desc.isCaseSensitive) new CaseSensitiveAnalyzer() else new StandardAnalyzer()
+  }
+
   @transient lazy val query: Query = new QueryParser(desc.attribute, analyzer).parse(desc.keyword)
   @transient private lazy val memoryIndex: MemoryIndex = new MemoryIndex()