This is an automated email from the ASF dual-hosted git repository.
chengchengjin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 3353c89cee [VL] Enable CSV test in Spark41 (#11449)
3353c89cee is described below
commit 3353c89ceea67c5836e14428d9256f0485f18b81
Author: Jin Chengcheng <[email protected]>
AuthorDate: Thu Jan 29 16:25:53 2026 +0800
[VL] Enable CSV test in Spark41 (#11449)
---
.../gluten/utils/velox/VeloxTestSettings.scala | 68 +++-------------------
.../execution/datasources/csv/GlutenCSVSuite.scala | 10 +++-
2 files changed, 17 insertions(+), 61 deletions(-)
diff --git
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 83ce6d20b1..584701e0ff 100644
---
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.execution._
import
org.apache.spark.sql.execution.adaptive.velox.VeloxAdaptiveQueryExecSuite
import org.apache.spark.sql.execution.datasources._
import
org.apache.spark.sql.execution.datasources.binaryfile.GlutenBinaryFileFormatSuite
+import
org.apache.spark.sql.execution.datasources.csv.{GlutenCSVLegacyTimeParserSuite,
GlutenCSVv1Suite, GlutenCSVv2Suite}
import
org.apache.spark.sql.execution.datasources.json.{GlutenJsonLegacyTimeParserSuite,
GlutenJsonV1Suite, GlutenJsonV2Suite}
import org.apache.spark.sql.execution.datasources.orc._
import org.apache.spark.sql.execution.datasources.parquet._
@@ -247,61 +248,12 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenBinaryFileFormatSuite]
// Exception.
.exclude("column pruning - non-readable file")
- // TODO: fix in Spark-4.0
- // enableSuite[GlutenCSVv1Suite]
- // // file cars.csv include null string, Arrow not support to read
- // .exclude("DDL test with schema")
- // .exclude("save csv")
- // .exclude("save csv with compression codec option")
- // .exclude("save csv with empty fields with user defined empty values")
- // .exclude("save csv with quote")
- // .exclude("SPARK-13543 Write the output as uncompressed via option()")
- // .exclude("DDL test with tab separated file")
- // .exclude("DDL test parsing decimal type")
- // .exclude("test with tab delimiter and double quote")
- // // Arrow not support corrupt record
- // .exclude("SPARK-27873: disabling enforceSchema should not fail
columnNameOfCorruptRecord")
- // // varchar
- // .exclude("SPARK-48241: CSV parsing failure with char/varchar type
columns")
- // // Flaky and already excluded in other cases
- // .exclude("Gluten - test for FAILFAST parsing mode")
-
- // enableSuite[GlutenCSVv2Suite]
- // .exclude("Gluten - test for FAILFAST parsing mode")
- // // Rule
org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown in batch
- // // Early Filter and Projection Push-Down generated an invalid plan
- // .exclude("SPARK-26208: write and read empty data to csv file with
headers")
- // // file cars.csv include null string, Arrow not support to read
- // .exclude("old csv data source name works")
- // .exclude("DDL test with schema")
- // .exclude("save csv")
- // .exclude("save csv with compression codec option")
- // .exclude("save csv with empty fields with user defined empty values")
- // .exclude("save csv with quote")
- // .exclude("SPARK-13543 Write the output as uncompressed via option()")
- // .exclude("DDL test with tab separated file")
- // .exclude("DDL test parsing decimal type")
- // .exclude("test with tab delimiter and double quote")
- // // Arrow not support corrupt record
- // .exclude("SPARK-27873: disabling enforceSchema should not fail
columnNameOfCorruptRecord")
- // // varchar
- // .exclude("SPARK-48241: CSV parsing failure with char/varchar type
columns")
-
- // enableSuite[GlutenCSVLegacyTimeParserSuite]
- // // file cars.csv include null string, Arrow not support to read
- // .exclude("DDL test with schema")
- // .exclude("save csv")
- // .exclude("save csv with compression codec option")
- // .exclude("save csv with empty fields with user defined empty values")
- // .exclude("save csv with quote")
- // .exclude("SPARK-13543 Write the output as uncompressed via option()")
- // // Arrow not support corrupt record
- // .exclude("SPARK-27873: disabling enforceSchema should not fail
columnNameOfCorruptRecord")
- // .exclude("DDL test with tab separated file")
- // .exclude("DDL test parsing decimal type")
- // .exclude("test with tab delimiter and double quote")
- // // varchar
- // .exclude("SPARK-48241: CSV parsing failure with char/varchar type
columns")
+ enableSuite[GlutenCSVv1Suite]
+ enableSuite[GlutenCSVv2Suite]
+ // https://github.com/apache/incubator-gluten/issues/11505
+ enableSuite[GlutenCSVLegacyTimeParserSuite]
+ .exclude("Write timestamps correctly in ISO8601 format by default")
+ .exclude("csv with variant")
enableSuite[GlutenJsonV1Suite]
// FIXME: Array direct selection fails
.exclude("Complex field and type inferring")
@@ -574,10 +526,8 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenPathFilterStrategySuite]
enableSuite[GlutenPathFilterSuite]
enableSuite[GlutenPruneFileSourcePartitionsSuite]
- // TODO: fix in Spark-4.0
- // enableSuite[GlutenCSVReadSchemaSuite]
- // enableSuite[GlutenHeaderCSVReadSchemaSuite]
- // .exclude("change column type from int to long")
+ enableSuite[GlutenCSVReadSchemaSuite]
+ enableSuite[GlutenHeaderCSVReadSchemaSuite]
enableSuite[GlutenJsonReadSchemaSuite]
enableSuite[GlutenOrcReadSchemaSuite]
enableSuite[GlutenVectorizedOrcReadSchemaSuite]
diff --git
a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
index 63f0327e07..b137a1a77f 100644
---
a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
+++
b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
@@ -36,7 +36,12 @@ class GlutenCSVSuite extends CSVSuite with
GlutenSQLTestsBaseTrait {
/** Returns full path to the given file in the resource folder */
override protected def testFile(fileName: String): String = {
- getWorkspaceFilePath("sql", "core", "src", "test", "resources").toString +
"/" + fileName
+ "file://" + getWorkspaceFilePath(
+ "sql",
+ "core",
+ "src",
+ "test",
+ "resources").toString + "/" + fileName
}
}
@@ -75,7 +80,8 @@ class GlutenCSVv2Suite extends GlutenCSVSuite {
assert(exception.getCause.isInstanceOf[GlutenException])
assert(
exception.getMessage.contains(
- "[MALFORMED_RECORD_IN_PARSING] Malformed records are detected in
record parsing: " +
+ "[MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION] " +
+ "Malformed records are detected in record parsing: " +
"[2015,Chevy,Volt,null,null]"))
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]