This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 4dcfa8669 [VL] Enable full functionality of split function (#4752)
4dcfa8669 is described below

commit 4dcfa866950e8936959c9f4b829fe61f13aa78cb
Author: Rui Mo <[email protected]>
AuthorDate: Thu Aug 15 08:32:31 2024 +0800

    [VL] Enable full functionality of split function (#4752)
---
 .../backendsapi/velox/VeloxSparkPlanExecApi.scala  | 11 -----
 .../gluten/expression/ExpressionTransformer.scala  | 30 -------------
 .../execution/VeloxStringFunctionsSuite.scala      | 51 ++++++++++++++++++----
 .../gluten/backendsapi/SparkPlanExecApi.scala      | 10 -----
 .../gluten/expression/ExpressionConverter.scala    |  8 ----
 5 files changed, 42 insertions(+), 68 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index 89d29781c..fd0fc62dc 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -661,17 +661,6 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
    * * Expressions.
    */
 
-  /** Generate StringSplit transformer. */
-  override def genStringSplitTransformer(
-      substraitExprName: String,
-      srcExpr: ExpressionTransformer,
-      regexExpr: ExpressionTransformer,
-      limitExpr: ExpressionTransformer,
-      original: StringSplit): ExpressionTransformer = {
-    // In velox, split function just support tow args, not support limit arg 
for now
-    VeloxStringSplitTransformer(substraitExprName, srcExpr, regexExpr, 
limitExpr, original)
-  }
-
   /**
    * Generate Alias transformer.
    *
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
index 51b19ab14..71e58f124 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
@@ -107,33 +107,3 @@ case class VeloxHashExpressionTransformer(
     ExpressionBuilder.makeScalarFunction(functionId, nodes, typeNode)
   }
 }
-
-case class VeloxStringSplitTransformer(
-    substraitExprName: String,
-    srcExpr: ExpressionTransformer,
-    regexExpr: ExpressionTransformer,
-    limitExpr: ExpressionTransformer,
-    original: StringSplit)
-  extends ExpressionTransformer {
-  // TODO: split function support limit arg
-  override def children: Seq[ExpressionTransformer] = srcExpr :: regexExpr :: 
Nil
-
-  override def doTransform(args: java.lang.Object): ExpressionNode = {
-    if (
-      !regexExpr.isInstanceOf[LiteralTransformer] ||
-      !limitExpr.isInstanceOf[LiteralTransformer]
-    ) {
-      throw new GlutenNotSupportException(
-        "Gluten only supports literal input as limit/regex for split 
function.")
-    }
-
-    val limit = 
limitExpr.doTransform(args).asInstanceOf[IntLiteralNode].getValue
-    val regex = 
regexExpr.doTransform(args).asInstanceOf[StringLiteralNode].getValue
-    if (limit > 0 || regex.length > 1) {
-      throw new GlutenNotSupportException(
-        s"$original supported single-length regex and negative limit, but 
given $limit and $regex")
-    }
-
-    super.doTransform(args)
-  }
-}
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala
index 2aedde12a..9357bc754 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala
@@ -519,23 +519,56 @@ class VeloxStringFunctionsSuite extends 
VeloxWholeStageTransformerSuite {
         s"from $LINEITEM_TABLE limit 5") { _ => }
   }
 
-  ignore("split") {
+  testWithSpecifiedSparkVersion("split", Some("3.4")) {
     runQueryAndCompare(
-      s"select l_orderkey, l_comment, split(l_comment, ' ', 3) " +
-        s"from $LINEITEM_TABLE limit 5") { _ => }
+      s"select l_orderkey, l_comment, split(l_comment, '') " +
+        s"from $LINEITEM_TABLE limit 5") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare(
+      s"select l_orderkey, l_comment, split(l_comment, '', 1) " +
+        s"from $LINEITEM_TABLE limit 5") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
 
-    // todo incorrect results
     runQueryAndCompare(
-      s"select l_orderkey, l_comment, split(l_comment, '[a]', 3) " +
-        s"from $LINEITEM_TABLE limit 5") { _ => }
+      s"select l_orderkey, l_comment, split(l_comment, ',') " +
+        s"from $LINEITEM_TABLE limit 5") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare(
+      s"select l_orderkey, l_comment, split(l_comment, ',', 10) " +
+        s"from $LINEITEM_TABLE limit 
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
 
     runQueryAndCompare(
       s"select l_orderkey, split(l_comment, ' ') " +
-        s"from $LINEITEM_TABLE limit 5") { _ => }
+        s"from $LINEITEM_TABLE limit 
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
+    runQueryAndCompare(
+      s"select l_orderkey, l_comment, split(l_comment, ' ', 3) " +
+        s"from $LINEITEM_TABLE limit 
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
 
     runQueryAndCompare(
-      s"select l_orderkey, split(l_comment, 'h') " +
-        s"from $LINEITEM_TABLE limit 5") { _ => }
+      s"select l_orderkey, l_comment, split(l_comment, '[a-z]+') " +
+        s"from $LINEITEM_TABLE limit 
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
+    runQueryAndCompare(
+      s"select l_orderkey, l_comment, split(l_comment, '[a-z]+', 3) " +
+        s"from $LINEITEM_TABLE limit 
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
+
+    runQueryAndCompare(
+      s"select l_orderkey, split(l_comment, '[1-9]+', -2) " +
+        s"from $LINEITEM_TABLE limit 
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
+    runQueryAndCompare(
+      s"select l_orderkey, split(l_comment, '[1-9]+', 0) " +
+        s"from $LINEITEM_TABLE limit 
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
+
+    runQueryAndCompare(
+      s"select l_orderkey, l_comment, split(l_comment, 'h') " +
+        s"from $LINEITEM_TABLE limit 5") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare(
+      s"select l_orderkey, l_comment, split(l_comment, '[a]', 3) " +
+        s"from $LINEITEM_TABLE limit 
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
   }
 
   test("substr") {
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index 69392a353..3b9e87a20 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -164,16 +164,6 @@ trait SparkPlanExecApi {
       original: Expression): ExpressionTransformer =
     AliasTransformer(substraitExprName, child, original)
 
-  /** Generate SplitTransformer. */
-  def genStringSplitTransformer(
-      substraitExprName: String,
-      srcExpr: ExpressionTransformer,
-      regexExpr: ExpressionTransformer,
-      limitExpr: ExpressionTransformer,
-      original: StringSplit): ExpressionTransformer = {
-    GenericExpressionTransformer(substraitExprName, Seq(srcExpr, regexExpr, 
limitExpr), original)
-  }
-
   /** Generate an expression transformer to transform GetMapValue to 
Substrait. */
   def genGetMapValueTransformer(
       substraitExprName: String,
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index 3ca66b518..8bca5dbf8 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -377,14 +377,6 @@ object ExpressionConverter extends SQLConfHelper with 
Logging {
           replaceWithExpressionTransformerInternal(t.replaceExpr, 
attributeSeq, expressionsMap),
           t
         )
-      case s: StringSplit =>
-        
BackendsApiManager.getSparkPlanExecApiInstance.genStringSplitTransformer(
-          substraitExprName,
-          replaceWithExpressionTransformerInternal(s.str, attributeSeq, 
expressionsMap),
-          replaceWithExpressionTransformerInternal(s.regex, attributeSeq, 
expressionsMap),
-          replaceWithExpressionTransformerInternal(s.limit, attributeSeq, 
expressionsMap),
-          s
-        )
       case r: RegExpReplace =>
         
BackendsApiManager.getSparkPlanExecApiInstance.genRegexpReplaceTransformer(
           substraitExprName,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to