Github user MaxGekk commented on a diff in the pull request:
https://github.com/apache/spark/pull/23150#discussion_r238075711
--- Diff:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
---
@@ -86,62 +85,74 @@ class UnivocityParserSuite extends SparkFunSuite with
SQLHelper {
// null.
Seq(true, false).foreach { b =>
val options = new CSVOptions(Map("nullValue" -> "null"), false,
"GMT")
- val converter =
- parser.makeConverter("_1", StringType, nullable = b, options =
options)
+ val parser = new UnivocityParser(StructType(Seq.empty), options)
+ val converter = parser.makeConverter("_1", StringType, nullable = b)
assert(converter.apply("") == UTF8String.fromString(""))
}
}
test("Throws exception for empty string with non null type") {
- val options = new CSVOptions(Map.empty[String, String], false, "GMT")
+ val options = new CSVOptions(Map.empty[String, String], false, "GMT")
+ val parser = new UnivocityParser(StructType(Seq.empty), options)
val exception = intercept[RuntimeException]{
- parser.makeConverter("_1", IntegerType, nullable = false, options =
options).apply("")
+ parser.makeConverter("_1", IntegerType, nullable = false).apply("")
}
assert(exception.getMessage.contains("null value found but field _1 is
not nullable."))
}
test("Types are cast correctly") {
val options = new CSVOptions(Map.empty[String, String], false, "GMT")
- assert(parser.makeConverter("_1", ByteType, options =
options).apply("10") == 10)
- assert(parser.makeConverter("_1", ShortType, options =
options).apply("10") == 10)
- assert(parser.makeConverter("_1", IntegerType, options =
options).apply("10") == 10)
- assert(parser.makeConverter("_1", LongType, options =
options).apply("10") == 10)
- assert(parser.makeConverter("_1", FloatType, options =
options).apply("1.00") == 1.0)
- assert(parser.makeConverter("_1", DoubleType, options =
options).apply("1.00") == 1.0)
- assert(parser.makeConverter("_1", BooleanType, options =
options).apply("true") == true)
-
- val timestampsOptions =
+ var parser = new UnivocityParser(StructType(Seq.empty), options)
+ assert(parser.makeConverter("_1", ByteType).apply("10") == 10)
+ assert(parser.makeConverter("_1", ShortType).apply("10") == 10)
+ assert(parser.makeConverter("_1", IntegerType).apply("10") == 10)
+ assert(parser.makeConverter("_1", LongType).apply("10") == 10)
+ assert(parser.makeConverter("_1", FloatType).apply("1.00") == 1.0)
+ assert(parser.makeConverter("_1", DoubleType).apply("1.00") == 1.0)
+ assert(parser.makeConverter("_1", BooleanType).apply("true") == true)
+
+ var timestampsOptions =
new CSVOptions(Map("timestampFormat" -> "dd/MM/yyyy hh:mm"), false,
"GMT")
+ parser = new UnivocityParser(StructType(Seq.empty), timestampsOptions)
val customTimestamp = "31/01/2015 00:00"
- val expectedTime =
timestampsOptions.timestampFormat.parse(customTimestamp).getTime
- val castedTimestamp =
- parser.makeConverter("_1", TimestampType, nullable = true, options =
timestampsOptions)
+ var format = FastDateFormat.getInstance(
+ timestampsOptions.timestampFormat, timestampsOptions.timeZone,
timestampsOptions.locale)
+ val expectedTime = format.parse(customTimestamp).getTime
+ val castedTimestamp = parser.makeConverter("_1", TimestampType,
nullable = true)
.apply(customTimestamp)
assert(castedTimestamp == expectedTime * 1000L)
val customDate = "31/01/2015"
val dateOptions = new CSVOptions(Map("dateFormat" -> "dd/MM/yyyy"),
false, "GMT")
- val expectedDate = dateOptions.dateFormat.parse(customDate).getTime
- val castedDate =
- parser.makeConverter("_1", DateType, nullable = true, options =
dateOptions)
- .apply(customTimestamp)
- assert(castedDate == DateTimeUtils.millisToDays(expectedDate))
+ parser = new UnivocityParser(StructType(Seq.empty), dateOptions)
+ format = FastDateFormat.getInstance(
+ dateOptions.dateFormat, dateOptions.timeZone, dateOptions.locale)
+ val expectedDate = format.parse(customDate).getTime
+ val castedDate = parser.makeConverter("_1", DateType, nullable = true)
+ .apply(customDate)
+ assert(castedDate == DateTimeUtils.millisToDays(expectedDate,
TimeZone.getTimeZone("GMT")))
val timestamp = "2015-01-01 00:00:00"
- assert(parser.makeConverter("_1", TimestampType, options =
options).apply(timestamp) ==
- DateTimeUtils.stringToTime(timestamp).getTime * 1000L)
- assert(parser.makeConverter("_1", DateType, options =
options).apply("2015-01-01") ==
-
DateTimeUtils.millisToDays(DateTimeUtils.stringToTime("2015-01-01").getTime))
+ timestampsOptions = new CSVOptions(Map(
+ "timestampFormat" -> "yyyy-MM-dd HH:mm:ss",
+ "dateFormat" -> "yyyy-MM-dd"), false, "UTC")
+ parser = new UnivocityParser(StructType(Seq.empty), timestampsOptions)
+ val expected = 1420070400 * DateTimeUtils.MICROS_PER_SECOND
--- End diff --
I set number of seconds since epoch in UTC because
`DateTimeUtils.stringToTime(timestamp).getTime` depends on current local time
zone in jvm.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]