This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6d29c72f6c2b [SPARK-46875][SQL] When the `mode` is null, a
`NullPointException` should `not` be thrown
6d29c72f6c2b is described below
commit 6d29c72f6c2bdf534c2b079f9a3563bf8cfc8dab
Author: panbingkun <[email protected]>
AuthorDate: Sat Jan 27 10:53:39 2024 +0300
[SPARK-46875][SQL] When the `mode` is null, a `NullPointException` should
`not` be thrown
### What changes were proposed in this pull request?
The pr aims to provide better prompts when option's `mode` is null.
### Why are the changes needed?
In the original logic, if the mode is null, Spark will throw a
`NullPointerException`, which is obviously unfriendly to the user.
```
val cars = spark.read
.format("csv")
.options(Map("header" -> "true", "mode" -> null))
.load(testFile(carsFile))
cars.show(false)
```
Before:
```
Cannot invoke "String.toUpperCase(java.util.Locale)" because "mode" is null
java.lang.NullPointerException: Cannot invoke
"String.toUpperCase(java.util.Locale)" because "mode" is null
at
org.apache.spark.sql.catalyst.util.ParseMode$.fromString(ParseMode.scala:50)
at
org.apache.spark.sql.catalyst.csv.CSVOptions.$anonfun$parseMode$1(CSVOptions.scala:105)
at scala.Option.map(Option.scala:242)
at
org.apache.spark.sql.catalyst.csv.CSVOptions.<init>(CSVOptions.scala:105)
at
org.apache.spark.sql.catalyst.csv.CSVOptions.<init>(CSVOptions.scala:49)
at
org.apache.spark.sql.execution.datasources.csv.CSVFileFormat.inferSchema(CSVFileFormat.scala:60)
```
After:
It will fall back to `PermissiveMode` mode and then display the data
normally, as shown below:
```
18:54:06.727 WARN org.apache.spark.sql.catalyst.util.ParseMode: mode is
null and not a valid parse mode. Using PERMISSIVE.
+----+-----+-----+----------------------------------+-----+
|year|make |model|comment |blank|
+----+-----+-----+----------------------------------+-----+
|2012|Tesla|S |No comment |NULL |
|1997|Ford |E350 |Go get one now they are going fast|NULL |
|2015|Chevy|Volt |NULL |NULL |
+----+-----+-----+----------------------------------+-----+
```
### Does this PR introduce _any_ user-facing change?
Yes, When `mode` is null, it fallback to `PermissiveMode ` instead of
throwing a `NullPointerException`.
### How was this patch tested?
- Add new UT.
- Pass GA.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44900 from panbingkun/SPARK-46875.
Authored-by: panbingkun <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../apache/spark/sql/catalyst/util/ParseMode.scala | 19 ++++++++++++-------
.../sql/execution/datasources/csv/CSVSuite.scala | 10 ++++++++++
2 files changed, 22 insertions(+), 7 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ParseMode.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ParseMode.scala
index 2beb875d1751..b35da8e2c80f 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ParseMode.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ParseMode.scala
@@ -47,12 +47,17 @@ object ParseMode extends Logging {
/**
* Returns the parse mode from the given string.
*/
- def fromString(mode: String): ParseMode = mode.toUpperCase(Locale.ROOT)
match {
- case PermissiveMode.name => PermissiveMode
- case DropMalformedMode.name => DropMalformedMode
- case FailFastMode.name => FailFastMode
- case _ =>
- logWarning(s"$mode is not a valid parse mode. Using
${PermissiveMode.name}.")
- PermissiveMode
+ def fromString(mode: String): ParseMode = Option(mode).map {
+ v => v.toUpperCase(Locale.ROOT) match {
+ case PermissiveMode.name => PermissiveMode
+ case DropMalformedMode.name => DropMalformedMode
+ case FailFastMode.name => FailFastMode
+ case _ =>
+ logWarning(s"$v is not a valid parse mode. Using
${PermissiveMode.name}.")
+ PermissiveMode
+ }
+ }.getOrElse {
+ logWarning(s"mode is null and not a valid parse mode. Using
${PermissiveMode.name}.")
+ PermissiveMode
}
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 8e6282bd5a42..2ec9e1086b92 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -348,6 +348,16 @@ abstract class CSVSuite
}
}
+ test("when mode is null, will fall back to PermissiveMode mode") {
+ val cars = spark.read
+ .format("csv")
+ .options(Map("header" -> "true", "mode" -> null))
+ .load(testFile(carsFile))
+ assert(cars.collect().length == 3)
+ assert(cars.select("make").collect() sameElements
+ Array(Row("Tesla"), Row("Ford"), Row("Chevy")))
+ }
+
test("test for blank column names on read and select columns") {
val cars = spark.read
.format("csv")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]