This is an automated email from the ASF dual-hosted git repository.

yunyd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/main by this push:
     new d217b8c20e feat(op): add Substring Search operator with case 
sensitivity support (#3780)
d217b8c20e is described below

commit d217b8c20e9f99edb93119215fa3b4a47db573ef
Author: yunyad <[email protected]>
AuthorDate: Wed Oct 15 15:28:17 2025 -0700

    feat(op): add Substring Search operator with case sensitivity support 
(#3780)
    
    This PR introduces a new `Substring Search` operator that performs
    substring-based matching on a string column, with optional case
    sensitivity.
    
    ### Motivation
    
    Unlike the existing `KeywordSearch` operator which relies on Lucene
    token-based parsing, this operator performs a raw substring match,
    making it more suitable for exact or partial field matches where
    tokenization is not desired.
    
    ### Changes
    
    - Added `SubstringSearchOpDesc` extending `FilterOpDesc`, with three
    parameters:
      - `attribute`: target column to search
      - `substring`: query substring
      - `isCaseSensitive`: whether matching is case-sensitive
    - Implemented `SubstringSearchOpExec` with string matching logic
    - Registered the operator in the `SEARCH_GROUP` group with proper
    metadata
    - Included an operator icon for UI integration
    
    ### Demo
    ![Screen Recording 2025-09-28 at 6 31
    41 
PM](https://github.com/user-attachments/assets/f28cc018-de68-4b35-aa99-e5a7c4adea3c)
    
    Fix #3107
---
 .../org/apache/amber/operator/LogicalOp.scala      |   2 +
 .../substringSearch/SubstringSearchOpDesc.scala    |  77 +++++++++++++++++++++
 .../substringSearch/SubstringSearchOpExec.scala    |  40 +++++++++++
 .../src/assets/operator_images/SubstringSearch.png | Bin 0 -> 507231 bytes
 4 files changed, 119 insertions(+)

diff --git 
a/common/workflow-operator/src/main/scala/org/apache/amber/operator/LogicalOp.scala
 
b/common/workflow-operator/src/main/scala/org/apache/amber/operator/LogicalOp.scala
index e00dfd5c72..209ac0e481 100644
--- 
a/common/workflow-operator/src/main/scala/org/apache/amber/operator/LogicalOp.scala
+++ 
b/common/workflow-operator/src/main/scala/org/apache/amber/operator/LogicalOp.scala
@@ -79,6 +79,7 @@ import 
org.apache.amber.operator.source.sql.asterixdb.AsterixDBSourceOpDesc
 import org.apache.amber.operator.source.sql.mysql.MySQLSourceOpDesc
 import org.apache.amber.operator.source.sql.postgresql.PostgreSQLSourceOpDesc
 import org.apache.amber.operator.split.SplitOpDesc
+import org.apache.amber.operator.substringSearch.SubstringSearchOpDesc
 import org.apache.amber.operator.symmetricDifference.SymmetricDifferenceOpDesc
 import org.apache.amber.operator.typecasting.TypeCastingOpDesc
 import org.apache.amber.operator.udf.java.JavaUDFOpDesc
@@ -171,6 +172,7 @@ trait StateTransferFunc
     new Type(value = classOf[ProjectionOpDesc], name = "Projection"),
     new Type(value = classOf[UnionOpDesc], name = "Union"),
     new Type(value = classOf[KeywordSearchOpDesc], name = "KeywordSearch"),
+    new Type(value = classOf[SubstringSearchOpDesc], name = "SubstringSearch"),
     new Type(value = classOf[AggregateOpDesc], name = "Aggregate"),
     new Type(value = classOf[LineChartOpDesc], name = "LineChart"),
     new Type(value = classOf[WaterfallChartOpDesc], name = "WaterfallChart"),
diff --git 
a/common/workflow-operator/src/main/scala/org/apache/amber/operator/substringSearch/SubstringSearchOpDesc.scala
 
b/common/workflow-operator/src/main/scala/org/apache/amber/operator/substringSearch/SubstringSearchOpDesc.scala
new file mode 100644
index 0000000000..a5b290d237
--- /dev/null
+++ 
b/common/workflow-operator/src/main/scala/org/apache/amber/operator/substringSearch/SubstringSearchOpDesc.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.amber.operator.substringSearch
+
+import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
+import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
+import org.apache.amber.core.executor.OpExecWithClassName
+import org.apache.amber.core.virtualidentity.{ExecutionIdentity, 
WorkflowIdentity}
+import org.apache.amber.core.workflow.{InputPort, OutputPort, PhysicalOp}
+import org.apache.amber.operator.filter.FilterOpDesc
+import org.apache.amber.operator.metadata.annotations.AutofillAttributeName
+import org.apache.amber.operator.metadata.{OperatorGroupConstants, 
OperatorInfo}
+import org.apache.amber.util.JSONUtils.objectMapper
+
+class SubstringSearchOpDesc extends FilterOpDesc {
+
+  @JsonProperty(required = true)
+  @JsonSchemaTitle("attribute")
+  @JsonPropertyDescription("column to search substring on")
+  @AutofillAttributeName
+  var attribute: String = _
+
+  @JsonProperty(required = true)
+  @JsonSchemaTitle("Substring")
+  @JsonPropertyDescription("substring")
+  var substring: String = _
+
+  @JsonProperty(required = true, defaultValue = "false")
+  @JsonSchemaTitle("Case Sensitive")
+  @JsonPropertyDescription("Whether the substring match is case sensitive.")
+  var isCaseSensitive: Boolean = false
+
+  override def getPhysicalOp(
+      workflowId: WorkflowIdentity,
+      executionId: ExecutionIdentity
+  ): PhysicalOp = {
+    PhysicalOp
+      .oneToOnePhysicalOp(
+        workflowId,
+        executionId,
+        operatorIdentifier,
+        OpExecWithClassName(
+          "org.apache.amber.operator.substringSearch.SubstringSearchOpExec",
+          objectMapper.writeValueAsString(this)
+        )
+      )
+      .withInputPorts(operatorInfo.inputPorts)
+      .withOutputPorts(operatorInfo.outputPorts)
+  }
+
+  override def operatorInfo: OperatorInfo =
+    OperatorInfo(
+      userFriendlyName = "Substring Search",
+      operatorDescription = "Search for Substring(s) in a string column",
+      operatorGroupName = OperatorGroupConstants.SEARCH_GROUP,
+      inputPorts = List(InputPort()),
+      outputPorts = List(OutputPort()),
+      supportReconfiguration = true
+    )
+}
diff --git 
a/common/workflow-operator/src/main/scala/org/apache/amber/operator/substringSearch/SubstringSearchOpExec.scala
 
b/common/workflow-operator/src/main/scala/org/apache/amber/operator/substringSearch/SubstringSearchOpExec.scala
new file mode 100644
index 0000000000..704854b339
--- /dev/null
+++ 
b/common/workflow-operator/src/main/scala/org/apache/amber/operator/substringSearch/SubstringSearchOpExec.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.amber.operator.substringSearch
+
+import org.apache.amber.core.tuple.Tuple
+import org.apache.amber.operator.filter.FilterOpExec
+import org.apache.amber.util.JSONUtils.objectMapper
+
+class SubstringSearchOpExec(descString: String) extends FilterOpExec {
+  private val desc: SubstringSearchOpDesc =
+    objectMapper.readValue(descString, classOf[SubstringSearchOpDesc])
+
+  this.setFilterFunc(findSubstring)
+
+  private def findSubstring(tuple: Tuple): Boolean = {
+    val content = tuple.getField(desc.attribute).toString
+    if (desc.isCaseSensitive) {
+      content.contains(desc.substring)
+    } else {
+      content.toLowerCase.contains(desc.substring.toLowerCase)
+    }
+  }
+}
diff --git a/frontend/src/assets/operator_images/SubstringSearch.png 
b/frontend/src/assets/operator_images/SubstringSearch.png
new file mode 100644
index 0000000000..ea571c3956
Binary files /dev/null and 
b/frontend/src/assets/operator_images/SubstringSearch.png differ

Reply via email to