This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 9b0da7709 [VL] Support collect_list in window operator (#5408)
9b0da7709 is described below

commit 9b0da7709feffc0502f41f60bc4fac89d0e587cc
Author: Joey <[email protected]>
AuthorDate: Mon Apr 15 22:38:44 2024 +0800

    [VL] Support collect_list in window operator (#5408)
---
 .../execution/VeloxWindowExpressionSuite.scala     | 41 ++++++++++++++++++++++
 .../substrait/SubstraitToVeloxPlanValidator.cc     |  2 +-
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxWindowExpressionSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxWindowExpressionSuite.scala
index 5bb7b0dba..3dfbd6bd2 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxWindowExpressionSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxWindowExpressionSuite.scala
@@ -17,6 +17,8 @@
 package org.apache.gluten.execution
 
 import org.apache.spark.SparkConf
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types._
 
 class VeloxWindowExpressionSuite extends WholeStageTransformerSuite {
 
@@ -69,4 +71,43 @@ class VeloxWindowExpressionSuite extends 
WholeStageTransformerSuite {
       checkGlutenOperatorMatch[WindowExecTransformer]
     }
   }
+
+  test("collect_list") {
+    withTable("t") {
+      val data = Seq(
+        Row(0, 1),
+        Row(0, 2),
+        Row(1, 1),
+        Row(1, 2),
+        Row(1, 2),
+        Row(2, 2),
+        Row(2, 3),
+        Row(3, null),
+        Row(3, null),
+        Row(4, 1),
+        Row(4, null)
+      )
+      val schema = new StructType()
+        .add("c1", IntegerType)
+        .add("c2", IntegerType, nullable = true)
+      spark
+        .createDataFrame(spark.sparkContext.parallelize(data), schema)
+        .write
+        .format("parquet")
+        .saveAsTable("t")
+
+      runQueryAndCompare("""
+                           |SELECT
+                           | c1,
+                           | collect_list(c2) OVER (
+                           |   PARTITION BY c1
+                           | )
+                           |FROM
+                           | t
+                           |ORDER BY 1, 2;
+                           |""".stripMargin) {
+        checkGlutenOperatorMatch[WindowExecTransformer]
+      }
+    }
+  }
 }
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 2739ede64..ba1d1afb0 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -624,7 +624,7 @@ bool SubstraitToVeloxPlanValidator::validate(const 
::substrait::WindowRel& windo
   }
 
   // Validate supported aggregate functions.
-  static const std::unordered_set<std::string> unsupportedFuncs = 
{"collect_list", "collect_set"};
+  static const std::unordered_set<std::string> unsupportedFuncs = 
{"collect_set"};
   for (const auto& funcSpec : funcSpecs) {
     auto funcName = SubstraitParser::getNameBeforeDelimiter(funcSpec);
     if (unsupportedFuncs.find(funcName) != unsupportedFuncs.end()) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to