This is an automated email from the ASF dual-hosted git repository.

yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new c9f6d45ecc [GLUTEN-11088][VL] Fix GlutenDatasetSuite in Spark-4.0 
(#11197)
c9f6d45ecc is described below

commit c9f6d45ecc582ec29e76d671cc0e9e09dd84b5a9
Author: Zhen Li <[email protected]>
AuthorDate: Fri Nov 28 17:55:19 2025 +0800

    [GLUTEN-11088][VL] Fix GlutenDatasetSuite in Spark-4.0 (#11197)
    
    * [VL] Fix GlutenDatasetSuite in Spark-4.0
---
 .../apache/gluten/backendsapi/velox/VeloxBackend.scala   |  3 +++
 .../gluten/backendsapi/velox/VeloxValidatorApi.scala     | 16 +++++++++-------
 .../apache/gluten/backendsapi/BackendSettingsApi.scala   |  3 +++
 .../extension/columnar/CollectLimitTransformerRule.scala |  5 ++++-
 .../apache/gluten/utils/velox/VeloxTestSettings.scala    |  2 --
 5 files changed, 19 insertions(+), 10 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
index ee300372f6..aa1d8a6559 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
@@ -583,4 +583,7 @@ object VeloxBackendSettings extends BackendSettingsApi {
   override def supportOverwriteByExpression(): Boolean = 
enableEnhancedFeatures()
 
   override def supportOverwritePartitionsDynamic(): Boolean = 
enableEnhancedFeatures()
+
+  /** Velox does not support columnar shuffle with empty schema. */
+  override def supportEmptySchemaColumnarShuffle(): Boolean = false
 }
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
index 53892704f7..8b2193b580 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
@@ -88,13 +88,15 @@ class VeloxValidatorApi extends ValidatorApi {
       outputAttributes: Seq[Attribute],
       outputPartitioning: Partitioning,
       child: SparkPlan): Option[String] = {
-    if (outputAttributes.isEmpty) {
-      // See: https://github.com/apache/incubator-gluten/issues/7600.
-      return Some("Shuffle with empty output schema is not supported")
-    }
-    if (child.output.isEmpty) {
-      // See: https://github.com/apache/incubator-gluten/issues/7600.
-      return Some("Shuffle with empty input schema is not supported")
+    if (!BackendsApiManager.getSettings.supportEmptySchemaColumnarShuffle()) {
+      if (outputAttributes.isEmpty) {
+        // See: https://github.com/apache/incubator-gluten/issues/7600.
+        return Some("Shuffle with empty output schema is not supported")
+      }
+      if (child.output.isEmpty) {
+        // See: https://github.com/apache/incubator-gluten/issues/7600.
+        return Some("Shuffle with empty input schema is not supported")
+      }
     }
     doSchemaValidate(child.schema)
   }
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
index a1647b2f30..7bb28eca50 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
@@ -162,4 +162,7 @@ trait BackendSettingsApi {
   def supportOverwriteByExpression(): Boolean = false
 
   def supportOverwritePartitionsDynamic(): Boolean = false
+
+  /** Whether the backend supports columnar shuffle with empty schema. */
+  def supportEmptySchemaColumnarShuffle(): Boolean = true
 }
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/CollectLimitTransformerRule.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/CollectLimitTransformerRule.scala
index bb4761a07e..66d6a38ef8 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/CollectLimitTransformerRule.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/CollectLimitTransformerRule.scala
@@ -30,7 +30,10 @@ case class CollectLimitTransformerRule() extends 
Rule[SparkPlan] {
     }
 
     val transformed = plan.transformUp {
-      case exec: CollectLimitExec if exec.child.supportsColumnar =>
+      case exec: CollectLimitExec
+          if exec.child.supportsColumnar &&
+            (exec.child.output.nonEmpty ||
+              
BackendsApiManager.getSettings.supportEmptySchemaColumnarShuffle()) =>
         val offset = SparkShimLoader.getSparkShims.getCollectLimitOffset(exec)
         BackendsApiManager.getSparkPlanExecApiInstance
           .genColumnarCollectLimitExec(exec.limit, exec.child, offset)
diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 1383beae79..74ace889e9 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -839,8 +839,6 @@ class VeloxTestSettings extends BackendTestSettings {
     // Rewrite the following two tests in GlutenDatasetSuite.
     .exclude("dropDuplicates: columns with same column name")
     .exclude("groupBy.as")
-    // TODO: fix in Spark-4.0
-    .exclude("SPARK-23627: provide isEmpty in DataSet")
   enableSuite[GlutenDateFunctionsSuite]
     // The below two are replaced by two modified versions.
     .exclude("unix_timestamp")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to