Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/19932#discussion_r156397168
--- Diff:
sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala ---
@@ -213,6 +213,27 @@ class StatisticsSuite extends
StatisticsCollectionTestBase with TestHiveSingleto
}
}
+ test("SPARK-22745 - read Hive's statistics for partition") {
+ val tableName = "hive_stats_part_table"
+ withTable(tableName) {
+ sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED
BY (ds STRING)")
+ sql(s"INSERT INTO TABLE $tableName PARTITION (ds='2017-01-01')
SELECT * FROM src")
+ var partition = spark.sessionState.catalog
+ .getPartition(TableIdentifier(tableName), Map("ds" ->
"2017-01-01"))
+
+ assert(partition.stats.get.sizeInBytes == 5812)
+ assert(partition.stats.get.rowCount.isEmpty)
+
+ hiveClient
+ .runSqlHive(s"ANALYZE TABLE $tableName PARTITION (ds='2017-01-01')
COMPUTE STATISTICS")
+ partition = spark.sessionState.catalog
+ .getPartition(TableIdentifier(tableName), Map("ds" ->
"2017-01-01"))
+
+ assert(partition.stats.get.sizeInBytes == 5812)
--- End diff --
I'm expecting `totalSize` is picked here and the `sizeInBytes` would be
changed, did I miss something?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]