This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new a1db28ddf [VL] Add a bad test case when bloom_filter_agg is fallen
back while might_contain is not
a1db28ddf is described below
commit a1db28ddfa8f54ab7bff4f60d4dbfb734cc94d26
Author: Hongze Zhang <[email protected]>
AuthorDate: Wed Apr 17 15:46:06 2024 +0800
[VL] Add a bad test case when bloom_filter_agg is fallen back while
might_contain is not
---
.../extension/columnar/TransformHintRule.scala | 2 +-
.../gluten/utils/velox/VeloxTestSettings.scala | 1 +
.../sql/GlutenBloomFilterAggregateQuerySuite.scala | 33 ++++++++++++++++++++++
3 files changed, 35 insertions(+), 1 deletion(-)
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala
b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala
index d19a82050..1559ba8b3 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala
@@ -324,7 +324,7 @@ case class FallbackBloomFilterAggIfNeeded() extends
Rule[SparkPlan] {
case a: org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
=>
tagNotTransformableRecursive(a.executedPlan)
case _ =>
- p.children.map(tagNotTransformableRecursive)
+ p.children.foreach(tagNotTransformableRecursive)
}
}
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 79f182e24..518908c9c 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -46,6 +46,7 @@ import
org.apache.spark.sql.sources.{GlutenBucketedReadWithoutHiveSupportSuite,
class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenStringFunctionsSuite]
enableSuite[GlutenBloomFilterAggregateQuerySuite]
+ .excludeGlutenTest("Test bloom_filter_agg fallback with might_contain
offloaded")
enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
enableSuite[GlutenDataSourceV2DataFrameSuite]
enableSuite[GlutenDataSourceV2FunctionSuite]
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenBloomFilterAggregateQuerySuite.scala
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenBloomFilterAggregateQuerySuite.scala
index c12cf8217..7a4a4b427 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenBloomFilterAggregateQuerySuite.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenBloomFilterAggregateQuerySuite.scala
@@ -113,4 +113,37 @@ class GlutenBloomFilterAggregateQuerySuite
}
}
}
+
+ testGluten("Test bloom_filter_agg fallback with might_contain offloaded") {
+ val table = "bloom_filter_test"
+ val numEstimatedItems = 5000000L
+ val numBits = GlutenConfig.getConf.veloxBloomFilterMaxNumBits
+ val sqlString = s"""
+ |SELECT col positive_membership_test
+ |FROM $table
+ |WHERE might_contain(
+ | (SELECT bloom_filter_agg(col,
+ | cast($numEstimatedItems as long),
+ | cast($numBits as long))
+ | FROM $table), col)
+ """.stripMargin
+
+ withTempView(table) {
+ (Seq(Long.MinValue, 0, Long.MaxValue) ++ (1L to 200000L))
+ .toDF("col")
+ .createOrReplaceTempView(table)
+ withSQLConf(
+ GlutenConfig.COLUMNAR_HASHAGG_ENABLED.key -> "false"
+ ) {
+ val df = spark.sql(sqlString)
+ df.collect
+ assert(
+ collectWithSubqueries(df.queryExecution.executedPlan) {
+ case h if h.isInstanceOf[HashAggregateExecBaseTransformer] => h
+ }.isEmpty,
+ df.queryExecution.executedPlan
+ )
+ }
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]