This is an automated email from the ASF dual-hosted git repository.

zhli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new efd6f31fb [VL] Support DecimalType for approx_count_distinct (#5868)
efd6f31fb is described below

commit efd6f31fb44ea21846c0292d5ea31a3e05aa3af3
Author: Joey <[email protected]>
AuthorDate: Mon May 27 20:44:36 2024 +0800

    [VL] Support DecimalType for approx_count_distinct (#5868)
    
    [VL] Support DecimalType for approx_count_distinct.
---
 .../org/apache/gluten/extension/HLLRewriteRule.scala |  1 +
 .../execution/VeloxAggregateFunctionsSuite.scala     | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/extension/HLLRewriteRule.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/extension/HLLRewriteRule.scala
index cb1e626a1..03819fc10 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/extension/HLLRewriteRule.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/extension/HLLRewriteRule.scala
@@ -73,6 +73,7 @@ case class HLLRewriteRule(spark: SparkSession) extends 
Rule[LogicalPlan] {
       case LongType => true
       case ShortType => true
       case StringType => true
+      case _: DecimalType => true
       case _ => false
     }
   }
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
index faa361edf..ffed63731 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
@@ -571,6 +571,26 @@ abstract class VeloxAggregateFunctionsSuite extends 
VeloxWholeStageTransformerSu
     }
   }
 
+  test("approx_count_distinct decimal") {
+    // The data type of l_discount is decimal.
+    runQueryAndCompare("""
+                         |select approx_count_distinct(l_discount) from 
lineitem;
+                         |""".stripMargin) {
+      checkGlutenOperatorMatch[HashAggregateExecTransformer]
+    }
+    runQueryAndCompare(
+      "select approx_count_distinct(l_discount), count(distinct l_orderkey) 
from lineitem") {
+      df =>
+        {
+          assert(
+            getExecutedPlan(df).count(
+              plan => {
+                plan.isInstanceOf[HashAggregateExecTransformer]
+              }) == 0)
+        }
+    }
+  }
+
   test("max_by") {
     runQueryAndCompare(s"""
                           |select max_by(l_linenumber, l_comment) from 
lineitem;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to