Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/19743#discussion_r153515401
--- Diff:
sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala ---
@@ -41,7 +41,35 @@ import org.apache.spark.sql.types._
class StatisticsSuite extends StatisticsCollectionTestBase with
TestHiveSingleton {
- test("Hive serde tables should fallback to HDFS for size estimation") {
+
+ test("size estimation for relations based on row size * number of rows")
{
+ val dsTbl = "rel_est_ds_table"
+ val hiveTbl = "rel_est_hive_table"
+ withTable(dsTbl, hiveTbl) {
+ spark.range(1000L).write.format("parquet").saveAsTable(dsTbl)
+ sql(s"CREATE TABLE $hiveTbl STORED AS parquet AS SELECT * FROM
$dsTbl")
--- End diff --
nit: `spark.range(1000L).write.format("hive").saveAsTable(hiveTbl)`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]