This is an automated email from the ASF dual-hosted git repository.
zhli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 6c989af27 [VL] Spark 3.5: fix and enable all ut for
GlutenFileMetadataStructSuite (#5377)
6c989af27 is described below
commit 6c989af27be18b45f87f7a371d6235d72eba3430
Author: 高阳阳 <[email protected]>
AuthorDate: Fri Apr 12 16:49:34 2024 +0800
[VL] Spark 3.5: fix and enable all ut for GlutenFileMetadataStructSuite
(#5377)
[VL] Spark 3.5: fix and enable all ut for GlutenFileMetadataStructSuite
---
.../test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 4 ----
.../scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala | 3 ++-
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index b33c0780d..5396a7c5e 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -737,10 +737,6 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenFileFormatWriterSuite]
enableSuite[GlutenFileIndexSuite]
enableSuite[GlutenFileMetadataStructSuite]
- // Disbale as failed in Spark3.5.
- .exclude("SPARK-41896: Filter on row_index and a stored column at the same
time")
- .exclude("SPARK-43450: Filter on full _metadata column struct")
- .exclude("SPARK-43450: Filter on aliased _metadata.row_index")
enableSuite[GlutenParquetV1AggregatePushDownSuite]
enableSuite[GlutenParquetV2AggregatePushDownSuite]
enableSuite[GlutenOrcV1AggregatePushDownSuite]
diff --git
a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
index 4d25192b4..77786ba0d 100644
---
a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
+++
b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
@@ -21,7 +21,7 @@ import org.apache.gluten.metrics.GlutenTimeMetric
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.{And, Attribute,
AttributeReference, BoundReference, DynamicPruningExpression, Expression,
FileSourceConstantMetadataAttribute, FileSourceGeneratedMetadataAttribute,
FileSourceMetadataAttribute, PlanExpression, Predicate}
import org.apache.spark.sql.execution.datasources.{FileFormat,
HadoopFsRelation, PartitionDirectory}
-import org.apache.spark.sql.execution.datasources.parquet.ParquetUtils
+import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat,
ParquetUtils}
import org.apache.spark.sql.execution.metric.SQLMetric
import org.apache.spark.sql.types.StructType
import org.apache.spark.util.collection.BitSet
@@ -66,6 +66,7 @@ abstract class FileSourceScanExecShim(
// TODO, fallback if user define same name column due to we can't right now
// detect which column is metadata column which is user defined column.
val metadataColumnsNames = metadataColumns.map(_.name)
+
metadataColumnsNames.contains(ParquetFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME)
||
output
.filterNot(metadataColumns.toSet)
.exists(v => metadataColumnsNames.contains(v.name)) ||
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]