This is an automated email from the ASF dual-hosted git repository.

baibaichen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 2731470f62 [GLUTEN-12143][VL] Route bitmap_construct_agg to native 
Velox execution (#12142)
2731470f62 is described below

commit 2731470f62a96240f6addacb374130a7d9f9acdd
Author: Minni Mittal <[email protected]>
AuthorDate: Thu Jun 4 19:22:33 2026 +0530

    [GLUTEN-12143][VL] Route bitmap_construct_agg to native Velox execution 
(#12142)
    
    * [CORE] Route bitmap_construct_agg to native Velox execution
    
    Register bitmap_construct_agg as a supported aggregate expression in the
    Velox backend, allowing it to be executed natively instead of falling back
    to vanilla Spark.
    
    Changes:
    - Add BITMAP_CONSTRUCT_AGG constant to ExpressionNames
    - Register Sig[BitmapConstructAgg] in Spark 3.5, 4.0, and 4.1 shims
    - Add bitmap_construct_agg to C++ plan validator allowed list
    
    Co-authored-by: Copilot <[email protected]>
    
    * Add plan-shape assertion test for bitmap_construct_agg
    
    Adds a test verifying that bitmap_construct_agg routes to native Velox
    execution (HashAggregateExecBaseTransformer) rather than falling back
    to vanilla Spark. Test added for Spark 3.5, 4.0, and 4.1.
    
    Co-authored-by: Copilot <[email protected]>
    
    * Exclude INVALID_BITMAP_POSITION error tests for native execution
    
    bitmap_construct_agg offloaded to Velox throws GlutenException instead of
    SparkArrayIndexOutOfBoundsException. Exclude these error-path tests 
following
    the established pattern for native execution error type mismatches.
    
    Co-authored-by: Copilot <[email protected]>
    
    * Move BitmapConstructAgg registration to Velox-only extraExpressionMappings
    
    Move Sig[BitmapConstructAgg] from SparkXShims.aggregateExpressionMappings
    (all-backend) to VeloxSparkPlanExecApi.extraExpressionMappings (Velox-only).
    This prevents ClickHouse backend from attempting to push down
    bitmap_construct_agg, which it does not support.
    
    Follows the same pattern as BloomFilterAgg registration.
    
    Co-authored-by: Copilot <[email protected]>
    
    * Use Class.forName for BitmapConstructAgg registration in 
extraExpressionMappings
    
    BitmapConstructAgg only exists in Spark 3.5+. Use runtime class loading
    with scala.util.Try to gracefully handle Spark 3.3/3.4 where the class
    is absent. This keeps the registration Velox-only (CH unaffected) and
    avoids compilation failures on older Spark versions.
    
    Co-authored-by: Copilot <[email protected]>
    
    * Move BitmapConstructAgg to shims and add CH blacklist entry
    
    Move Sig[BitmapConstructAgg] back to shims/aggregateExpressionMappings
    (spark35/40/41) following the established pattern for version-specific
    expressions. Add BITMAP_CONSTRUCT_AGG -> DefaultValidator() to
    CH_AGGREGATE_FUNC_BLACKLIST so ClickHouse backend falls back to vanilla
    Spark instead of attempting unsupported native push-down.
    
    Remove the Class.forName reflection workaround from VeloxSparkPlanExecApi.
    
    Co-authored-by: Copilot <[email protected]>
    
    * Exclude bitmap_construct_agg plan assertion test from CH backend
    
    The plan-shape assertion test verifies native Velox execution, which is
    not applicable to the ClickHouse backend. Exclude it using .excludeCH()
    following the same pattern as GlutenBloomFilterAggregateQuerySuite.
    
    Co-authored-by: Copilot <[email protected]>
    
    ---------
    
    Co-authored-by: Minni Mittal <[email protected]>
    Co-authored-by: Copilot <[email protected]>
---
 .../org/apache/gluten/utils/CHExpressionUtil.scala  |  1 +
 .../substrait/SubstraitToVeloxPlanValidator.cc      |  3 ++-
 .../utils/clickhouse/ClickHouseTestSettings.scala   |  2 ++
 .../gluten/utils/velox/VeloxTestSettings.scala      |  4 ++++
 .../sql/GlutenBitmapExpressionsQuerySuite.scala     | 21 ++++++++++++++++++++-
 .../utils/clickhouse/ClickHouseTestSettings.scala   |  2 ++
 .../gluten/utils/velox/VeloxTestSettings.scala      |  4 ++++
 .../sql/GlutenBitmapExpressionsQuerySuite.scala     | 21 ++++++++++++++++++++-
 .../utils/clickhouse/ClickHouseTestSettings.scala   |  2 ++
 .../gluten/utils/velox/VeloxTestSettings.scala      |  4 ++++
 .../sql/GlutenBitmapExpressionsQuerySuite.scala     | 21 ++++++++++++++++++++-
 .../apache/gluten/expression/ExpressionNames.scala  |  1 +
 .../gluten/sql/shims/spark35/Spark35Shims.scala     |  3 ++-
 .../gluten/sql/shims/spark40/Spark40Shims.scala     |  3 ++-
 .../gluten/sql/shims/spark41/Spark41Shims.scala     |  3 ++-
 15 files changed, 88 insertions(+), 7 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index ddef6957a9..3bbb4a467f 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -200,6 +200,7 @@ object CHExpressionUtil {
     REGR_SLOPE -> DefaultValidator(),
     REGR_INTERCEPT -> DefaultValidator(),
     REGR_SXY -> DefaultValidator(),
+    BITMAP_CONSTRUCT_AGG -> DefaultValidator(),
     TO_UTC_TIMESTAMP -> UtcTimestampValidator(),
     FROM_UTC_TIMESTAMP -> UtcTimestampValidator(),
     STACK -> DefaultValidator(),
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 3ea800b60c..1b1b2cbe85 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -1323,7 +1323,8 @@ bool SubstraitToVeloxPlanValidator::validate(const 
::substrait::AggregateRel& ag
       "regr_slope",
       "regr_intercept",
       "regr_sxy",
-      "regr_replacement"};
+      "regr_replacement",
+      "bitmap_construct_agg"};
 
   auto udafFuncs = UdfLoader::getInstance()->getRegisteredUdafNames();
 
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 59f08ebf2f..0b7e2d4037 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -90,6 +90,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     // Exception.
     .exclude("column pruning - non-readable file")
   enableSuite[GlutenBitmapExpressionsQuerySuite]
+    // bitmap_construct_agg is not supported natively in CH backend.
+    .excludeCH("bitmap_construct_agg routes to native")
   enableSuite[GlutenBitwiseExpressionsSuite]
   enableSuite[GlutenBloomFilterAggregateQuerySuite]
     .excludeCH("Test bloom_filter_agg and might_contain")
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 904bb5ef94..0680080345 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -85,6 +85,10 @@ class VeloxTestSettings extends BackendTestSettings {
       "INCONSISTENT_BEHAVIOR_CROSS_VERSION: compatibility with Spark 2.4/3.2 
in reading/writing dates")
     // Doesn't support unhex with failOnError=true.
     .exclude("CONVERSION_INVALID_INPUT: to_binary conversion function hex")
+    // bitmap_construct_agg offloaded to Velox throws GlutenException instead 
of
+    // SparkArrayIndexOutOfBoundsException.
+    .exclude("INVALID_BITMAP_POSITION: position out of bounds")
+    .exclude("INVALID_BITMAP_POSITION: negative position")
   enableSuite[GlutenQueryParsingErrorsSuite]
   enableSuite[GlutenArithmeticExpressionSuite]
     .exclude("SPARK-45786: Decimal multiply, divide, remainder, quot")
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala
index e07821857a..97072bfeff 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala
@@ -16,6 +16,25 @@
  */
 package org.apache.spark.sql
 
+import org.apache.gluten.execution.HashAggregateExecBaseTransformer
+
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+
 class GlutenBitmapExpressionsQuerySuite
   extends BitmapExpressionsQuerySuite
-  with GlutenSQLTestsTrait {}
+  with GlutenSQLTestsTrait
+  with AdaptiveSparkPlanHelper {
+
+  test("bitmap_construct_agg routes to native") {
+    val df = spark.sql(
+      "SELECT bitmap_construct_agg(bitmap_bit_position(col)) " +
+        "FROM values (1L), (2L), (3L) AS t(col)")
+    df.collect()
+    assert(
+      collectWithSubqueries(df.queryExecution.executedPlan) {
+        case h: HashAggregateExecBaseTransformer => h
+      }.nonEmpty,
+      "Expected native HashAggregateExecBaseTransformer in plan"
+    )
+  }
+}
diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 59a79c77d2..cfcfe8198f 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -89,6 +89,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     // Exception.
     .exclude("column pruning - non-readable file")
   enableSuite[GlutenBitmapExpressionsQuerySuite]
+    // bitmap_construct_agg is not supported natively in CH backend.
+    .excludeCH("bitmap_construct_agg routes to native")
   enableSuite[GlutenBitwiseExpressionsSuite]
   enableSuite[GlutenBloomFilterAggregateQuerySuite]
     .excludeCH("Test bloom_filter_agg and might_contain")
diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index f564324b14..3692322e6e 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -282,6 +282,10 @@ class VeloxTestSettings extends BackendTestSettings {
       "INCONSISTENT_BEHAVIOR_CROSS_VERSION: compatibility with Spark 2.4/3.2 
in reading/writing dates")
     // Doesn't support unhex with failOnError=true.
     .exclude("CONVERSION_INVALID_INPUT: to_binary conversion function hex")
+    // bitmap_construct_agg offloaded to Velox throws GlutenException instead 
of
+    // SparkArrayIndexOutOfBoundsException.
+    .exclude("INVALID_BITMAP_POSITION: position out of bounds")
+    .exclude("INVALID_BITMAP_POSITION: negative position")
   enableSuite[GlutenQueryParsingErrorsSuite]
   enableSuite[GlutenQueryContextSuite]
   enableSuite[GlutenQueryExecutionAnsiErrorsSuite]
diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala
index e07821857a..97072bfeff 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala
@@ -16,6 +16,25 @@
  */
 package org.apache.spark.sql
 
+import org.apache.gluten.execution.HashAggregateExecBaseTransformer
+
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+
 class GlutenBitmapExpressionsQuerySuite
   extends BitmapExpressionsQuerySuite
-  with GlutenSQLTestsTrait {}
+  with GlutenSQLTestsTrait
+  with AdaptiveSparkPlanHelper {
+
+  test("bitmap_construct_agg routes to native") {
+    val df = spark.sql(
+      "SELECT bitmap_construct_agg(bitmap_bit_position(col)) " +
+        "FROM values (1L), (2L), (3L) AS t(col)")
+    df.collect()
+    assert(
+      collectWithSubqueries(df.queryExecution.executedPlan) {
+        case h: HashAggregateExecBaseTransformer => h
+      }.nonEmpty,
+      "Expected native HashAggregateExecBaseTransformer in plan"
+    )
+  }
+}
diff --git 
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 59a79c77d2..cfcfe8198f 100644
--- 
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -89,6 +89,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     // Exception.
     .exclude("column pruning - non-readable file")
   enableSuite[GlutenBitmapExpressionsQuerySuite]
+    // bitmap_construct_agg is not supported natively in CH backend.
+    .excludeCH("bitmap_construct_agg routes to native")
   enableSuite[GlutenBitwiseExpressionsSuite]
   enableSuite[GlutenBloomFilterAggregateQuerySuite]
     .excludeCH("Test bloom_filter_agg and might_contain")
diff --git 
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index a3712e62ae..1f5eaf3ac5 100644
--- 
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -293,6 +293,10 @@ class VeloxTestSettings extends BackendTestSettings {
       "INCONSISTENT_BEHAVIOR_CROSS_VERSION: compatibility with Spark 2.4/3.2 
in reading/writing dates")
     // Doesn't support unhex with failOnError=true.
     .exclude("CONVERSION_INVALID_INPUT: to_binary conversion function hex")
+    // bitmap_construct_agg offloaded to Velox throws GlutenException instead 
of
+    // SparkArrayIndexOutOfBoundsException.
+    .exclude("INVALID_BITMAP_POSITION: position out of bounds")
+    .exclude("INVALID_BITMAP_POSITION: negative position")
   enableSuite[GlutenQueryParsingErrorsSuite]
   enableSuite[GlutenQueryContextSuite]
   enableSuite[GlutenQueryExecutionAnsiErrorsSuite]
diff --git 
a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala
 
b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala
index e07821857a..97072bfeff 100644
--- 
a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala
+++ 
b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenBitmapExpressionsQuerySuite.scala
@@ -16,6 +16,25 @@
  */
 package org.apache.spark.sql
 
+import org.apache.gluten.execution.HashAggregateExecBaseTransformer
+
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+
 class GlutenBitmapExpressionsQuerySuite
   extends BitmapExpressionsQuerySuite
-  with GlutenSQLTestsTrait {}
+  with GlutenSQLTestsTrait
+  with AdaptiveSparkPlanHelper {
+
+  test("bitmap_construct_agg routes to native") {
+    val df = spark.sql(
+      "SELECT bitmap_construct_agg(bitmap_bit_position(col)) " +
+        "FROM values (1L), (2L), (3L) AS t(col)")
+    df.collect()
+    assert(
+      collectWithSubqueries(df.queryExecution.executedPlan) {
+        case h: HashAggregateExecBaseTransformer => h
+      }.nonEmpty,
+      "Expected native HashAggregateExecBaseTransformer in plan"
+    )
+  }
+}
diff --git 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index d4afb7ff73..edd285b0f8 100644
--- 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++ 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -32,6 +32,7 @@ object ExpressionNames {
   final val COLLECT_LIST = "collect_list"
   final val COLLECT_SET = "collect_set"
   final val BLOOM_FILTER_AGG = "bloom_filter_agg"
+  final val BITMAP_CONSTRUCT_AGG = "bitmap_construct_agg"
   final val VAR_SAMP = "var_samp"
   final val VAR_POP = "var_pop"
   final val BIT_AND_AGG = "bit_and"
diff --git 
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
 
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
index 8be204816a..28c1bb177a 100644
--- 
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
+++ 
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
@@ -80,7 +80,8 @@ class Spark35Shims extends SparkShims {
       Sig[RegrSlope](ExpressionNames.REGR_SLOPE),
       Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT),
       Sig[RegrSXY](ExpressionNames.REGR_SXY),
-      Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT)
+      Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT),
+      Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG)
     )
   }
 
diff --git 
a/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala
 
b/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala
index fb38af3060..6363be3303 100644
--- 
a/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala
+++ 
b/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala
@@ -85,7 +85,8 @@ class Spark40Shims extends SparkShims {
       Sig[RegrSlope](ExpressionNames.REGR_SLOPE),
       Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT),
       Sig[RegrSXY](ExpressionNames.REGR_SXY),
-      Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT)
+      Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT),
+      Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG)
     )
   }
 
diff --git 
a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
 
b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
index bcd433e7da..226c295389 100644
--- 
a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
+++ 
b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
@@ -84,7 +84,8 @@ class Spark41Shims extends SparkShims {
       Sig[RegrSlope](ExpressionNames.REGR_SLOPE),
       Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT),
       Sig[RegrSXY](ExpressionNames.REGR_SXY),
-      Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT)
+      Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT),
+      Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG)
     )
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to