Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/19769#discussion_r151578253
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
---
@@ -87,4 +96,113 @@ class ParquetInteroperabilitySuite extends
ParquetCompatibilityTest with SharedS
Row(Seq(2, 3))))
}
}
+
+ val ImpalaFile = "test-data/impala_timestamp.parq"
+ test("parquet timestamp conversion") {
+ // Make a table with one parquet file written by impala, and one
parquet file written by spark.
+ // We should only adjust the timestamps in the impala file, and only
if the conf is set
+
+ // here's the timestamps in the impala file, as they were saved by
impala
+ val impalaFileData =
+ Seq(
+ "2001-01-01 01:01:01",
+ "2002-02-02 02:02:02",
+ "2003-03-03 03:03:03"
+ ).map { s => java.sql.Timestamp.valueOf(s) }
+ val impalaFile =
Thread.currentThread().getContextClassLoader.getResource(ImpalaFile)
+ .toURI.getPath
+ withTempPath { tableDir =>
+ val ts = Seq(
+ "2004-04-04 04:04:04",
+ "2005-05-05 05:05:05",
+ "2006-06-06 06:06:06"
+ ).map { s => java.sql.Timestamp.valueOf(s) }
+ val s = spark
+ import s.implicits._
+ // match the column names of the file from impala
+ val df =
spark.createDataset(ts).toDF().repartition(1).withColumnRenamed("value", "ts")
+ val schema = df.schema
--- End diff --
seems `schema ` is not used.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]