(incubator-gluten) branch main updated: [VL] Enable CSV test in Spark41 (#11449)

chengchengjin Thu, 29 Jan 2026 00:27:06 -0800

This is an automated email from the ASF dual-hosted git repository.

chengchengjin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 3353c89cee [VL] Enable CSV test in Spark41 (#11449)
3353c89cee is described below

commit 3353c89ceea67c5836e14428d9256f0485f18b81
Author: Jin Chengcheng <[email protected]>
AuthorDate: Thu Jan 29 16:25:53 2026 +0800

    [VL] Enable CSV test in Spark41 (#11449)
---
 .../gluten/utils/velox/VeloxTestSettings.scala     | 68 +++-------------------
 .../execution/datasources/csv/GlutenCSVSuite.scala | 10 +++-
 2 files changed, 17 insertions(+), 61 deletions(-)

diff --git 
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 83ce6d20b1..584701e0ff 100644
--- 
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.execution._
 import 
org.apache.spark.sql.execution.adaptive.velox.VeloxAdaptiveQueryExecSuite
 import org.apache.spark.sql.execution.datasources._
 import 
org.apache.spark.sql.execution.datasources.binaryfile.GlutenBinaryFileFormatSuite
+import 
org.apache.spark.sql.execution.datasources.csv.{GlutenCSVLegacyTimeParserSuite, 
GlutenCSVv1Suite, GlutenCSVv2Suite}
 import 
org.apache.spark.sql.execution.datasources.json.{GlutenJsonLegacyTimeParserSuite,
 GlutenJsonV1Suite, GlutenJsonV2Suite}
 import org.apache.spark.sql.execution.datasources.orc._
 import org.apache.spark.sql.execution.datasources.parquet._
@@ -247,61 +248,12 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenBinaryFileFormatSuite]
     // Exception.
     .exclude("column pruning - non-readable file")
-  // TODO: fix in Spark-4.0
-  // enableSuite[GlutenCSVv1Suite]
-  //   // file cars.csv include null string, Arrow not support to read
-  //   .exclude("DDL test with schema")
-  //   .exclude("save csv")
-  //   .exclude("save csv with compression codec option")
-  //   .exclude("save csv with empty fields with user defined empty values")
-  //   .exclude("save csv with quote")
-  //   .exclude("SPARK-13543 Write the output as uncompressed via option()")
-  //   .exclude("DDL test with tab separated file")
-  //   .exclude("DDL test parsing decimal type")
-  //   .exclude("test with tab delimiter and double quote")
-  //   // Arrow not support corrupt record
-  //   .exclude("SPARK-27873: disabling enforceSchema should not fail 
columnNameOfCorruptRecord")
-  //   // varchar
-  //   .exclude("SPARK-48241: CSV parsing failure with char/varchar type 
columns")
-  //   // Flaky and already excluded in other cases
-  //   .exclude("Gluten - test for FAILFAST parsing mode")
-
-  // enableSuite[GlutenCSVv2Suite]
-  //   .exclude("Gluten - test for FAILFAST parsing mode")
-  //   // Rule 
org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown in batch
-  //   // Early Filter and Projection Push-Down generated an invalid plan
-  //   .exclude("SPARK-26208: write and read empty data to csv file with 
headers")
-  //   // file cars.csv include null string, Arrow not support to read
-  //   .exclude("old csv data source name works")
-  //   .exclude("DDL test with schema")
-  //   .exclude("save csv")
-  //   .exclude("save csv with compression codec option")
-  //   .exclude("save csv with empty fields with user defined empty values")
-  //   .exclude("save csv with quote")
-  //   .exclude("SPARK-13543 Write the output as uncompressed via option()")
-  //   .exclude("DDL test with tab separated file")
-  //   .exclude("DDL test parsing decimal type")
-  //   .exclude("test with tab delimiter and double quote")
-  //   // Arrow not support corrupt record
-  //   .exclude("SPARK-27873: disabling enforceSchema should not fail 
columnNameOfCorruptRecord")
-  //   // varchar
-  //   .exclude("SPARK-48241: CSV parsing failure with char/varchar type 
columns")
-
-  // enableSuite[GlutenCSVLegacyTimeParserSuite]
-  //   // file cars.csv include null string, Arrow not support to read
-  //   .exclude("DDL test with schema")
-  //   .exclude("save csv")
-  //   .exclude("save csv with compression codec option")
-  //   .exclude("save csv with empty fields with user defined empty values")
-  //   .exclude("save csv with quote")
-  //   .exclude("SPARK-13543 Write the output as uncompressed via option()")
-  //   // Arrow not support corrupt record
-  //   .exclude("SPARK-27873: disabling enforceSchema should not fail 
columnNameOfCorruptRecord")
-  //   .exclude("DDL test with tab separated file")
-  //   .exclude("DDL test parsing decimal type")
-  //   .exclude("test with tab delimiter and double quote")
-  //   // varchar
-  //   .exclude("SPARK-48241: CSV parsing failure with char/varchar type 
columns")
+  enableSuite[GlutenCSVv1Suite]
+  enableSuite[GlutenCSVv2Suite]
+  // https://github.com/apache/incubator-gluten/issues/11505
+  enableSuite[GlutenCSVLegacyTimeParserSuite]
+    .exclude("Write timestamps correctly in ISO8601 format by default")
+    .exclude("csv with variant")
   enableSuite[GlutenJsonV1Suite]
     // FIXME: Array direct selection fails
     .exclude("Complex field and type inferring")
@@ -574,10 +526,8 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenPathFilterStrategySuite]
   enableSuite[GlutenPathFilterSuite]
   enableSuite[GlutenPruneFileSourcePartitionsSuite]
-  // TODO: fix in Spark-4.0
-  // enableSuite[GlutenCSVReadSchemaSuite]
-  // enableSuite[GlutenHeaderCSVReadSchemaSuite]
-  //   .exclude("change column type from int to long")
+  enableSuite[GlutenCSVReadSchemaSuite]
+  enableSuite[GlutenHeaderCSVReadSchemaSuite]
   enableSuite[GlutenJsonReadSchemaSuite]
   enableSuite[GlutenOrcReadSchemaSuite]
   enableSuite[GlutenVectorizedOrcReadSchemaSuite]
diff --git 
a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
 
b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
index 63f0327e07..b137a1a77f 100644
--- 
a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
+++ 
b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/datasources/csv/GlutenCSVSuite.scala
@@ -36,7 +36,12 @@ class GlutenCSVSuite extends CSVSuite with 
GlutenSQLTestsBaseTrait {
 
   /** Returns full path to the given file in the resource folder */
   override protected def testFile(fileName: String): String = {
-    getWorkspaceFilePath("sql", "core", "src", "test", "resources").toString + 
"/" + fileName
+    "file://" + getWorkspaceFilePath(
+      "sql",
+      "core",
+      "src",
+      "test",
+      "resources").toString + "/" + fileName
   }
 }
 
@@ -75,7 +80,8 @@ class GlutenCSVv2Suite extends GlutenCSVSuite {
         assert(exception.getCause.isInstanceOf[GlutenException])
         assert(
           exception.getMessage.contains(
-            "[MALFORMED_RECORD_IN_PARSING] Malformed records are detected in 
record parsing: " +
+            "[MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION] " +
+              "Malformed records are detected in record parsing: " +
               "[2015,Chevy,Volt,null,null]"))
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [VL] Enable CSV test in Spark41 (#11449)

Reply via email to