This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new a1db28ddf [VL] Add a bad test case when bloom_filter_agg is fallen 
back while might_contain is not
a1db28ddf is described below

commit a1db28ddfa8f54ab7bff4f60d4dbfb734cc94d26
Author: Hongze Zhang <[email protected]>
AuthorDate: Wed Apr 17 15:46:06 2024 +0800

    [VL] Add a bad test case when bloom_filter_agg is fallen back while 
might_contain is not
---
 .../extension/columnar/TransformHintRule.scala     |  2 +-
 .../gluten/utils/velox/VeloxTestSettings.scala     |  1 +
 .../sql/GlutenBloomFilterAggregateQuerySuite.scala | 33 ++++++++++++++++++++++
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala
index d19a82050..1559ba8b3 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala
@@ -324,7 +324,7 @@ case class FallbackBloomFilterAggIfNeeded() extends 
Rule[SparkPlan] {
         case a: org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec 
=>
           tagNotTransformableRecursive(a.executedPlan)
         case _ =>
-          p.children.map(tagNotTransformableRecursive)
+          p.children.foreach(tagNotTransformableRecursive)
       }
     }
 
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 79f182e24..518908c9c 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -46,6 +46,7 @@ import 
org.apache.spark.sql.sources.{GlutenBucketedReadWithoutHiveSupportSuite,
 class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenStringFunctionsSuite]
   enableSuite[GlutenBloomFilterAggregateQuerySuite]
+    .excludeGlutenTest("Test bloom_filter_agg fallback with might_contain 
offloaded")
   enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
   enableSuite[GlutenDataSourceV2DataFrameSuite]
   enableSuite[GlutenDataSourceV2FunctionSuite]
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenBloomFilterAggregateQuerySuite.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenBloomFilterAggregateQuerySuite.scala
index c12cf8217..7a4a4b427 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenBloomFilterAggregateQuerySuite.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenBloomFilterAggregateQuerySuite.scala
@@ -113,4 +113,37 @@ class GlutenBloomFilterAggregateQuerySuite
       }
     }
   }
+
+  testGluten("Test bloom_filter_agg fallback with might_contain offloaded") {
+    val table = "bloom_filter_test"
+    val numEstimatedItems = 5000000L
+    val numBits = GlutenConfig.getConf.veloxBloomFilterMaxNumBits
+    val sqlString = s"""
+                       |SELECT col positive_membership_test
+                       |FROM $table
+                       |WHERE might_contain(
+                       |            (SELECT bloom_filter_agg(col,
+                       |              cast($numEstimatedItems as long),
+                       |              cast($numBits as long))
+                       |             FROM $table), col)
+                      """.stripMargin
+
+    withTempView(table) {
+      (Seq(Long.MinValue, 0, Long.MaxValue) ++ (1L to 200000L))
+        .toDF("col")
+        .createOrReplaceTempView(table)
+      withSQLConf(
+        GlutenConfig.COLUMNAR_HASHAGG_ENABLED.key -> "false"
+      ) {
+        val df = spark.sql(sqlString)
+        df.collect
+        assert(
+          collectWithSubqueries(df.queryExecution.executedPlan) {
+            case h if h.isInstanceOf[HashAggregateExecBaseTransformer] => h
+          }.isEmpty,
+          df.queryExecution.executedPlan
+        )
+      }
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to