Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/19500#discussion_r144745990
--- Diff:
sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala ---
@@ -937,26 +937,22 @@ class StatisticsSuite extends
StatisticsCollectionTestBase with TestHiveSingleto
}
test("test statistics of LogicalRelation converted from Hive serde
tables") {
- val parquetTable = "parquetTable"
- val orcTable = "orcTable"
- withTable(parquetTable, orcTable) {
- sql(s"CREATE TABLE $parquetTable (key STRING, value STRING) STORED
AS PARQUET")
- sql(s"CREATE TABLE $orcTable (key STRING, value STRING) STORED AS
ORC")
- sql(s"INSERT INTO TABLE $parquetTable SELECT * FROM src")
- sql(s"INSERT INTO TABLE $orcTable SELECT * FROM src")
-
- // the default value for `spark.sql.hive.convertMetastoreParquet` is
true, here we just set it
- // for robustness
- withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "true") {
- checkTableStats(parquetTable, hasSizeInBytes = false,
expectedRowCounts = None)
- sql(s"ANALYZE TABLE $parquetTable COMPUTE STATISTICS")
- checkTableStats(parquetTable, hasSizeInBytes = true,
expectedRowCounts = Some(500))
- }
- withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") {
- // We still can get tableSize from Hive before Analyze
- checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts
= None)
- sql(s"ANALYZE TABLE $orcTable COMPUTE STATISTICS")
- checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts
= Some(500))
+ Seq("orc", "parquet").foreach { format =>
+ Seq("true", "false").foreach { isConverted =>
--- End diff --
We prefer to using `Seq(true, false)`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]