[GitHub] spark pull request #19500: [SPARK-22280][SQL][TEST] Improve StatisticsSuite ...

gatorsmile Sun, 15 Oct 2017 19:50:23 -0700

Github user gatorsmile commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19500#discussion_r144745990
  
    --- Diff: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala ---
    @@ -937,26 +937,22 @@ class StatisticsSuite extends 
StatisticsCollectionTestBase with TestHiveSingleto
       }
     
       test("test statistics of LogicalRelation converted from Hive serde 
tables") {
    -    val parquetTable = "parquetTable"
    -    val orcTable = "orcTable"
    -    withTable(parquetTable, orcTable) {
    -      sql(s"CREATE TABLE $parquetTable (key STRING, value STRING) STORED 
AS PARQUET")
    -      sql(s"CREATE TABLE $orcTable (key STRING, value STRING) STORED AS 
ORC")
    -      sql(s"INSERT INTO TABLE $parquetTable SELECT * FROM src")
    -      sql(s"INSERT INTO TABLE $orcTable SELECT * FROM src")
    -
    -      // the default value for `spark.sql.hive.convertMetastoreParquet` is 
true, here we just set it
    -      // for robustness
    -      withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "true") {
    -        checkTableStats(parquetTable, hasSizeInBytes = false, 
expectedRowCounts = None)
    -        sql(s"ANALYZE TABLE $parquetTable COMPUTE STATISTICS")
    -        checkTableStats(parquetTable, hasSizeInBytes = true, 
expectedRowCounts = Some(500))
    -      }
    -      withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") {
    -        // We still can get tableSize from Hive before Analyze
    -        checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts 
= None)
    -        sql(s"ANALYZE TABLE $orcTable COMPUTE STATISTICS")
    -        checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts 
= Some(500))
    +    Seq("orc", "parquet").foreach { format =>
    +      Seq("true", "false").foreach { isConverted =>
    --- End diff --
    
    We prefer to using `Seq(true, false)`



---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #19500: [SPARK-22280][SQL][TEST] Improve StatisticsSuite ...

Reply via email to