sarutak commented on a change in pull request #35440:
URL: https://github.com/apache/spark/pull/35440#discussion_r802193246
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
##########
@@ -470,7 +470,98 @@ class StatisticsCollectionSuite extends
StatisticsCollectionTestBase with Shared
}
}
- def getStatAttrNames(tableName: String): Set[String] = {
+ private def checkDescTimestampColStatsByZone(
+ tableName: String,
+ timestampColumn: String,
+ expectedMinTimestamp: String,
+ expectedMaxTimestamp: String): Unit = {
+
+ def extractColumnStatsFromDesc(statsName: String, rows: Array[Row]):
String = {
+ rows.collect {
+ case r: Row if r.getString(0) == statsName =>
+ r.getString(1)
+ }.head
+ }
+
+ val descTsCol = sql(s"DESC FORMATTED $tableName
$timestampColumn").collect()
+ assert(extractColumnStatsFromDesc("min", descTsCol) ==
expectedMinTimestamp)
+ assert(extractColumnStatsFromDesc("max", descTsCol) ==
expectedMaxTimestamp)
+ }
+
+ test("describe column stats (min, max) for timestamp column: desc results
should be consistent " +
+ "with the written value if writing and desc happen in the same time zone")
{
+
+ val original = TimeZone.getDefault
+ try {
+ Seq("UTC", "PST", "Asia/Hong_Kong").foreach { timeZoneId =>
+ val table = "insert_desc_same_time_zone"
+ val tsCol = "timestamp_typed_col"
+ withTable(table) {
+
+ TimeZone.setDefault(DateTimeUtils.getTimeZone(timeZoneId))
+
+ val minTimestamp = "make_timestamp(2022, 1, 1, 0, 0, 1.123456)"
+ val maxTimestamp = "make_timestamp(2022, 1, 3, 0, 0, 2.987654)"
+ sql(s"CREATE TABLE $table ($tsCol Timestamp) USING parquet")
+ sql(s"INSERT INTO $table VALUES $minTimestamp, $maxTimestamp")
+ sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR ALL COLUMNS")
+
+ checkDescTimestampColStatsByZone(
+ tableName = table,
+ timestampColumn = tsCol,
+ expectedMinTimestamp = "2022-01-01 00:00:01.123456",
+ expectedMaxTimestamp = "2022-01-03 00:00:02.987654")
+ }
+ }
+ } finally {
+ TimeZone.setDefault(original)
+ }
+ }
+
+ test("describe column stats (min, max) for timestamp column: desc should
show different " +
+ "results if writing in UTC and desc in other time zones") {
+
+ val table = "insert_desc_diff_time_zones"
+ val tsCol = "timestamp_typed_col"
+
+ val original = TimeZone.getDefault
Review comment:
Can we use `withDefaultTimeZone` here?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]