Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/20140#discussion_r184870456
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
---
@@ -140,14 +141,23 @@ private[csv] object CSVInferSchema {
private def tryParseDouble(field: String, options: CSVOptions): DataType
= {
if ((allCatch opt field.toDouble).isDefined || isInfOrNan(field,
options)) {
DoubleType
+ } else {
+ tryParseDate(field, options)
+ }
+ }
+
+ private def tryParseDate(field: String, options: CSVOptions): DataType =
{
+ // This case infers a custom `dateFormat` is set.
+ if ((allCatch opt options.dateFormatter.parse(field)).isDefined) {
+ DateType
} else {
tryParseTimestamp(field, options)
}
}
private def tryParseTimestamp(field: String, options: CSVOptions):
DataType = {
- // This case infers a custom `dataFormat` is set.
- if ((allCatch opt options.timestampFormat.parse(field)).isDefined) {
+ // This case infers a custom `timestampFormat` is set.
+ if ((allCatch opt options.timestampFormatter.parse(field)).isDefined) {
--- End diff --
Should we replace it to `timestampFormat` everywhere and document it in the
migration guide? (e.g., date format is now inferred correctly and also things
you mentioned in
https://github.com/apache/spark/pull/20140#discussion_r166261313)
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]