sadikovi commented on a change in pull request #34596:
URL: https://github.com/apache/spark/pull/34596#discussion_r751930741
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
##########
@@ -1012,6 +1012,162 @@ abstract class CSVSuite
}
}
+ test("SPARK-37326: Use different pattern to write and infer TIMESTAMP_NTZ
values") {
+ withTempDir { dir =>
+ val path = s"${dir.getCanonicalPath}/csv"
+
+ val exp = spark.sql("select timestamp_ntz'2020-12-12 12:12:12' as col0")
+ exp.write.format("csv").option("timestampNTZFormat", "yyyy-MM-dd
HH:mm:ss").save(path)
+
+ withSQLConf(SQLConf.TIMESTAMP_TYPE.key ->
SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString) {
+ val res = spark.read
+ .format("csv")
+ .option("inferSchema", "true")
+ .option("timestampNTZFormat", "yyyy-MM-dd HH:mm:ss")
+ .load(path)
+
+ checkAnswer(res, exp)
+ }
+ }
+ }
+
+ test("SPARK-37326: Use different pattern to write and infer TIMESTAMP_LTZ
values") {
+ withTempDir { dir =>
+ val path = s"${dir.getCanonicalPath}/csv"
+
+ val exp = spark.sql("select timestamp_ltz'2020-12-12 12:12:12' as col0")
+ exp.write.format("csv").option("timestampFormat", "yyyy-MM-dd
HH:mm:ss").save(path)
+
+ withSQLConf(SQLConf.TIMESTAMP_TYPE.key ->
SQLConf.TimestampTypes.TIMESTAMP_LTZ.toString) {
+ val res = spark.read
+ .format("csv")
+ .option("inferSchema", "true")
+ .option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
+ .load(path)
+
+ checkAnswer(res, exp)
+ }
+ }
+ }
+
+ test("SPARK-37326: Roundtrip in reading and writing TIMESTAMP_NTZ values
with custom schema") {
+ withTempDir { dir =>
+ val path = s"${dir.getCanonicalPath}/csv"
+
+ val exp = spark.sql("""
+ select
+ timestamp_ntz'2020-12-12 12:12:12' as col1,
+ timestamp_ltz'2020-12-12 12:12:12' as col2
+ """)
+
+ exp.write.format("csv").option("header", "true").save(path)
+
+ val res = spark.read
+ .format("csv")
+ .schema("col1 TIMESTAMP_NTZ, col2 TIMESTAMP_LTZ")
+ .option("header", "true")
+ .load(path)
+
+ checkAnswer(res, exp)
+ }
+ }
+
+ test("SPARK-37326: Timestamp type inference for a column with TIMESTAMP_NTZ
values") {
+ withTempDir { dir =>
+ val path = s"${dir.getCanonicalPath}/csv"
+
+ val exp = spark.sql("""
+ select timestamp_ntz'2020-12-12 12:12:12' as col0 union all
+ select timestamp_ntz'2020-12-12 12:12:12' as col0
+ """)
+
+ exp.write.format("csv").option("header", "true").save(path)
+
+ val timestampTypes = Seq(
+ SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString,
+ SQLConf.TimestampTypes.TIMESTAMP_LTZ.toString)
+
+ for (timestampType <- timestampTypes) {
+ withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> timestampType) {
+ val res = spark.read
+ .format("csv")
+ .option("inferSchema", "true")
+ .option("header", "true")
+ .load(path)
+
+ if (timestampType == SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString) {
+ checkAnswer(res, exp)
+ } else {
+ checkAnswer(
+ res,
+ spark.sql("""
+ select timestamp_ltz'2020-12-12 12:12:12' as col0 union all
+ select timestamp_ltz'2020-12-12 12:12:12' as col0
+ """)
+ )
+ }
+ }
+ }
+ }
+ }
+
+ test("SPARK-37326: Timestamp type inference for a mix of TIMESTAMP_NTZ and
TIMESTAMP_LTZ") {
+ withTempDir { dir =>
+ val path = s"${dir.getCanonicalPath}/csv"
+
+ Seq(
+ "col0",
+ "2020-12-12T12:12:12.000",
+ "2020-12-12T17:12:12.000Z",
+ "2020-12-12T17:12:12.000+05:00",
+ "2020-12-12T12:12:12.000"
+ ).toDF("data")
+ .coalesce(1)
+ .write.text(path)
+
+ val res = spark.read
+ .format("csv")
+ .option("inferSchema", "true")
Review comment:
I added a test for this, now if we cannot parse timestamp as
TIMESTAMP_NTZ, we will mark this value as malformed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]