This is an automated email from the ASF dual-hosted git repository.
rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 4dcfa8669 [VL] Enable full functionality of split function (#4752)
4dcfa8669 is described below
commit 4dcfa866950e8936959c9f4b829fe61f13aa78cb
Author: Rui Mo <[email protected]>
AuthorDate: Thu Aug 15 08:32:31 2024 +0800
[VL] Enable full functionality of split function (#4752)
---
.../backendsapi/velox/VeloxSparkPlanExecApi.scala | 11 -----
.../gluten/expression/ExpressionTransformer.scala | 30 -------------
.../execution/VeloxStringFunctionsSuite.scala | 51 ++++++++++++++++++----
.../gluten/backendsapi/SparkPlanExecApi.scala | 10 -----
.../gluten/expression/ExpressionConverter.scala | 8 ----
5 files changed, 42 insertions(+), 68 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index 89d29781c..fd0fc62dc 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -661,17 +661,6 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
* * Expressions.
*/
- /** Generate StringSplit transformer. */
- override def genStringSplitTransformer(
- substraitExprName: String,
- srcExpr: ExpressionTransformer,
- regexExpr: ExpressionTransformer,
- limitExpr: ExpressionTransformer,
- original: StringSplit): ExpressionTransformer = {
- // In velox, split function just support tow args, not support limit arg
for now
- VeloxStringSplitTransformer(substraitExprName, srcExpr, regexExpr,
limitExpr, original)
- }
-
/**
* Generate Alias transformer.
*
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
index 51b19ab14..71e58f124 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
@@ -107,33 +107,3 @@ case class VeloxHashExpressionTransformer(
ExpressionBuilder.makeScalarFunction(functionId, nodes, typeNode)
}
}
-
-case class VeloxStringSplitTransformer(
- substraitExprName: String,
- srcExpr: ExpressionTransformer,
- regexExpr: ExpressionTransformer,
- limitExpr: ExpressionTransformer,
- original: StringSplit)
- extends ExpressionTransformer {
- // TODO: split function support limit arg
- override def children: Seq[ExpressionTransformer] = srcExpr :: regexExpr ::
Nil
-
- override def doTransform(args: java.lang.Object): ExpressionNode = {
- if (
- !regexExpr.isInstanceOf[LiteralTransformer] ||
- !limitExpr.isInstanceOf[LiteralTransformer]
- ) {
- throw new GlutenNotSupportException(
- "Gluten only supports literal input as limit/regex for split
function.")
- }
-
- val limit =
limitExpr.doTransform(args).asInstanceOf[IntLiteralNode].getValue
- val regex =
regexExpr.doTransform(args).asInstanceOf[StringLiteralNode].getValue
- if (limit > 0 || regex.length > 1) {
- throw new GlutenNotSupportException(
- s"$original supported single-length regex and negative limit, but
given $limit and $regex")
- }
-
- super.doTransform(args)
- }
-}
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala
index 2aedde12a..9357bc754 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala
@@ -519,23 +519,56 @@ class VeloxStringFunctionsSuite extends
VeloxWholeStageTransformerSuite {
s"from $LINEITEM_TABLE limit 5") { _ => }
}
- ignore("split") {
+ testWithSpecifiedSparkVersion("split", Some("3.4")) {
runQueryAndCompare(
- s"select l_orderkey, l_comment, split(l_comment, ' ', 3) " +
- s"from $LINEITEM_TABLE limit 5") { _ => }
+ s"select l_orderkey, l_comment, split(l_comment, '') " +
+ s"from $LINEITEM_TABLE limit 5") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare(
+ s"select l_orderkey, l_comment, split(l_comment, '', 1) " +
+ s"from $LINEITEM_TABLE limit 5") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
- // todo incorrect results
runQueryAndCompare(
- s"select l_orderkey, l_comment, split(l_comment, '[a]', 3) " +
- s"from $LINEITEM_TABLE limit 5") { _ => }
+ s"select l_orderkey, l_comment, split(l_comment, ',') " +
+ s"from $LINEITEM_TABLE limit 5") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare(
+ s"select l_orderkey, l_comment, split(l_comment, ',', 10) " +
+ s"from $LINEITEM_TABLE limit
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
runQueryAndCompare(
s"select l_orderkey, split(l_comment, ' ') " +
- s"from $LINEITEM_TABLE limit 5") { _ => }
+ s"from $LINEITEM_TABLE limit
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
+ runQueryAndCompare(
+ s"select l_orderkey, l_comment, split(l_comment, ' ', 3) " +
+ s"from $LINEITEM_TABLE limit
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
runQueryAndCompare(
- s"select l_orderkey, split(l_comment, 'h') " +
- s"from $LINEITEM_TABLE limit 5") { _ => }
+ s"select l_orderkey, l_comment, split(l_comment, '[a-z]+') " +
+ s"from $LINEITEM_TABLE limit
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
+ runQueryAndCompare(
+ s"select l_orderkey, l_comment, split(l_comment, '[a-z]+', 3) " +
+ s"from $LINEITEM_TABLE limit
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
+
+ runQueryAndCompare(
+ s"select l_orderkey, split(l_comment, '[1-9]+', -2) " +
+ s"from $LINEITEM_TABLE limit
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
+ runQueryAndCompare(
+ s"select l_orderkey, split(l_comment, '[1-9]+', 0) " +
+ s"from $LINEITEM_TABLE limit
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
+
+ runQueryAndCompare(
+ s"select l_orderkey, l_comment, split(l_comment, 'h') " +
+ s"from $LINEITEM_TABLE limit 5") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare(
+ s"select l_orderkey, l_comment, split(l_comment, '[a]', 3) " +
+ s"from $LINEITEM_TABLE limit
5")(checkGlutenOperatorMatch[ProjectExecTransformer])
}
test("substr") {
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index 69392a353..3b9e87a20 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -164,16 +164,6 @@ trait SparkPlanExecApi {
original: Expression): ExpressionTransformer =
AliasTransformer(substraitExprName, child, original)
- /** Generate SplitTransformer. */
- def genStringSplitTransformer(
- substraitExprName: String,
- srcExpr: ExpressionTransformer,
- regexExpr: ExpressionTransformer,
- limitExpr: ExpressionTransformer,
- original: StringSplit): ExpressionTransformer = {
- GenericExpressionTransformer(substraitExprName, Seq(srcExpr, regexExpr,
limitExpr), original)
- }
-
/** Generate an expression transformer to transform GetMapValue to
Substrait. */
def genGetMapValueTransformer(
substraitExprName: String,
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index 3ca66b518..8bca5dbf8 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -377,14 +377,6 @@ object ExpressionConverter extends SQLConfHelper with
Logging {
replaceWithExpressionTransformerInternal(t.replaceExpr,
attributeSeq, expressionsMap),
t
)
- case s: StringSplit =>
-
BackendsApiManager.getSparkPlanExecApiInstance.genStringSplitTransformer(
- substraitExprName,
- replaceWithExpressionTransformerInternal(s.str, attributeSeq,
expressionsMap),
- replaceWithExpressionTransformerInternal(s.regex, attributeSeq,
expressionsMap),
- replaceWithExpressionTransformerInternal(s.limit, attributeSeq,
expressionsMap),
- s
- )
case r: RegExpReplace =>
BackendsApiManager.getSparkPlanExecApiInstance.genRegexpReplaceTransformer(
substraitExprName,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]