This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new d659c80413 [GLUTEN-9287][VL] Enable array_compact function for Spark
3.4+ (#9349)
d659c80413 is described below
commit d659c804134f48c50d90a5932132fd489daae803
Author: dcoliversun <[email protected]>
AuthorDate: Fri Apr 18 01:42:03 2025 +0800
[GLUTEN-9287][VL] Enable array_compact function for Spark 3.4+ (#9349)
---
.../backendsapi/velox/VeloxSparkPlanExecApi.scala | 3 +++
.../execution/ScalarFunctionsValidateSuite.scala | 21 +++++++++++++++++++++
.../apache/gluten/expression/ExpressionNames.scala | 1 +
.../gluten/sql/shims/spark34/Spark34Shims.scala | 1 +
.../gluten/sql/shims/spark35/Spark35Shims.scala | 1 +
5 files changed, 27 insertions(+)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index 9577b2b3f0..77cb2d68be 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -181,6 +181,9 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
function: ExpressionTransformer,
expr: ArrayFilter): ExpressionTransformer = {
expr.function match {
+ // Transformer for array_compact.
+ case LambdaFunction(_: IsNotNull, _, _) =>
+ GenericExpressionTransformer(ExpressionNames.ARRAY_COMPACT,
Seq(argument), expr)
case LambdaFunction(_, arguments, _) if arguments.size == 2 =>
throw new GlutenNotSupportException(
"filter on array with lambda using index argument is not supported
yet")
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index 45b14bfb2d..58c18400b6 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -196,6 +196,27 @@ abstract class ScalarFunctionsValidateSuite extends
FunctionsValidateSuite {
}
}
+ testWithMinSparkVersion("Test array_compact function", "3.4") {
+ withTempPath {
+ path =>
+ Seq[Array[String]](
+ Array("a", "b"),
+ Array(),
+ Array("a", "b", null.asInstanceOf[String]),
+ Array(null.asInstanceOf[String])
+ )
+ .toDF("arr")
+ .write
+ .parquet(path.getCanonicalPath)
+
+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("tbl")
+
+ runQueryAndCompare("select arr, array_compact(arr) from tbl") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ }
+
test("Test round function") {
runQueryAndCompare(
"SELECT round(cast(l_orderkey as int), 2)" +
diff --git
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index cb8ce2826b..c822161ea5 100644
---
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -286,6 +286,7 @@ object ExpressionNames {
final val ARRAY_PREPEND = "array_prepend"
final val ARRAY_SIZE = "array_size"
final val GET = "get"
+ final val ARRAY_COMPACT = "array_compact"
// Map functions
final val CREATE_MAP = "map"
diff --git
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
index de0f72b82f..8ff5e17873 100644
---
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
+++
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
@@ -105,6 +105,7 @@ class Spark34Shims extends SparkShims {
override def runtimeReplaceableExpressionMappings: Seq[Sig] = {
Seq(
+ Sig[ArrayCompact](ExpressionNames.ARRAY_COMPACT),
Sig[ArraySize](ExpressionNames.ARRAY_SIZE),
Sig[EqualNull](ExpressionNames.EQUAL_NULL),
Sig[ILike](ExpressionNames.ILIKE),
diff --git
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
index 5284a4188b..43a3239a2a 100644
---
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
+++
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
@@ -109,6 +109,7 @@ class Spark35Shims extends SparkShims {
override def runtimeReplaceableExpressionMappings: Seq[Sig] = {
Seq(
+ Sig[ArrayCompact](ExpressionNames.ARRAY_COMPACT),
Sig[ArrayPrepend](ExpressionNames.ARRAY_PREPEND),
Sig[ArraySize](ExpressionNames.ARRAY_SIZE),
Sig[EqualNull](ExpressionNames.EQUAL_NULL),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]