dtenedor commented on code in PR #44939:
URL: https://github.com/apache/spark/pull/44939#discussion_r1476612504


##########
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala:
##########
@@ -125,7 +125,11 @@ class CSVFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
         actualRequiredSchema,
         parsedOptions,
         actualFilters)
-      val schema = if (isColumnPruningEnabled) actualRequiredSchema else 
actualDataSchema
+      // Use column pruning when specified by Catalyst, except when one or 
more columns have
+      // existence default value(s), since in that case we instruct the CSV 
parser to disable column
+      // pruning and instead read each entire row in order to correctly assign 
the default value(s).
+      val useColumnPruningForCheckingHeader = isColumnPruningEnabled
+      val schema = if (useColumnPruningForCheckingHeader) actualRequiredSchema 
else actualDataSchema

Review Comment:
   Sounds good, done.



##########
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala:
##########
@@ -3248,6 +3249,44 @@ abstract class CSVSuite
       }
     }
   }
+
+  test("SPARK-46890: CSV fails on a column with default and without enforcing 
schema") {
+    withTable("CarsTable") {
+      spark.sql(
+        s"""
+           |CREATE TABLE CarsTable(
+           |  year INT,
+           |  make STRING,
+           |  model STRING,
+           |  comment STRING DEFAULT '',
+           |  blank STRING DEFAULT '')
+           |USING csv
+           |OPTIONS (
+           |  header "true",
+           |  inferSchema "false",
+           |  enforceSchema "false",
+           |  path "${testFile(carsFile)}"
+           |)
+       """.stripMargin)
+      val expected = Seq(
+        Row("No comment"),
+        Row("Go get one now they are going fast"))
+      checkAnswer(
+        sql("SELECT comment FROM CarsTable WHERE year < 2014"),
+        expected)
+      checkAnswer(
+        spark.read.format("csv")
+          .options(
+      Map(

Review Comment:
   Done.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to