This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new c9f6d45ecc [GLUTEN-11088][VL] Fix GlutenDatasetSuite in Spark-4.0
(#11197)
c9f6d45ecc is described below
commit c9f6d45ecc582ec29e76d671cc0e9e09dd84b5a9
Author: Zhen Li <[email protected]>
AuthorDate: Fri Nov 28 17:55:19 2025 +0800
[GLUTEN-11088][VL] Fix GlutenDatasetSuite in Spark-4.0 (#11197)
* [VL] Fix GlutenDatasetSuite in Spark-4.0
---
.../apache/gluten/backendsapi/velox/VeloxBackend.scala | 3 +++
.../gluten/backendsapi/velox/VeloxValidatorApi.scala | 16 +++++++++-------
.../apache/gluten/backendsapi/BackendSettingsApi.scala | 3 +++
.../extension/columnar/CollectLimitTransformerRule.scala | 5 ++++-
.../apache/gluten/utils/velox/VeloxTestSettings.scala | 2 --
5 files changed, 19 insertions(+), 10 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
index ee300372f6..aa1d8a6559 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
@@ -583,4 +583,7 @@ object VeloxBackendSettings extends BackendSettingsApi {
override def supportOverwriteByExpression(): Boolean =
enableEnhancedFeatures()
override def supportOverwritePartitionsDynamic(): Boolean =
enableEnhancedFeatures()
+
+ /** Velox does not support columnar shuffle with empty schema. */
+ override def supportEmptySchemaColumnarShuffle(): Boolean = false
}
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
index 53892704f7..8b2193b580 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
@@ -88,13 +88,15 @@ class VeloxValidatorApi extends ValidatorApi {
outputAttributes: Seq[Attribute],
outputPartitioning: Partitioning,
child: SparkPlan): Option[String] = {
- if (outputAttributes.isEmpty) {
- // See: https://github.com/apache/incubator-gluten/issues/7600.
- return Some("Shuffle with empty output schema is not supported")
- }
- if (child.output.isEmpty) {
- // See: https://github.com/apache/incubator-gluten/issues/7600.
- return Some("Shuffle with empty input schema is not supported")
+ if (!BackendsApiManager.getSettings.supportEmptySchemaColumnarShuffle()) {
+ if (outputAttributes.isEmpty) {
+ // See: https://github.com/apache/incubator-gluten/issues/7600.
+ return Some("Shuffle with empty output schema is not supported")
+ }
+ if (child.output.isEmpty) {
+ // See: https://github.com/apache/incubator-gluten/issues/7600.
+ return Some("Shuffle with empty input schema is not supported")
+ }
}
doSchemaValidate(child.schema)
}
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
index a1647b2f30..7bb28eca50 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
@@ -162,4 +162,7 @@ trait BackendSettingsApi {
def supportOverwriteByExpression(): Boolean = false
def supportOverwritePartitionsDynamic(): Boolean = false
+
+ /** Whether the backend supports columnar shuffle with empty schema. */
+ def supportEmptySchemaColumnarShuffle(): Boolean = true
}
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/CollectLimitTransformerRule.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/CollectLimitTransformerRule.scala
index bb4761a07e..66d6a38ef8 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/CollectLimitTransformerRule.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/CollectLimitTransformerRule.scala
@@ -30,7 +30,10 @@ case class CollectLimitTransformerRule() extends
Rule[SparkPlan] {
}
val transformed = plan.transformUp {
- case exec: CollectLimitExec if exec.child.supportsColumnar =>
+ case exec: CollectLimitExec
+ if exec.child.supportsColumnar &&
+ (exec.child.output.nonEmpty ||
+
BackendsApiManager.getSettings.supportEmptySchemaColumnarShuffle()) =>
val offset = SparkShimLoader.getSparkShims.getCollectLimitOffset(exec)
BackendsApiManager.getSparkPlanExecApiInstance
.genColumnarCollectLimitExec(exec.limit, exec.child, offset)
diff --git
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 1383beae79..74ace889e9 100644
---
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -839,8 +839,6 @@ class VeloxTestSettings extends BackendTestSettings {
// Rewrite the following two tests in GlutenDatasetSuite.
.exclude("dropDuplicates: columns with same column name")
.exclude("groupBy.as")
- // TODO: fix in Spark-4.0
- .exclude("SPARK-23627: provide isEmpty in DataSet")
enableSuite[GlutenDateFunctionsSuite]
// The below two are replaced by two modified versions.
.exclude("unix_timestamp")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]