dongjoon-hyun commented on a change in pull request #22502: [SPARK-25474][SQL] 
When the "fallBackToHdfsForStats= true", Size in bytes is coming as default 
size in bytes ( 8.0 EB)
URL: https://github.com/apache/spark/pull/22502#discussion_r308020855
 
 

 ##########
 File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
 ##########
 @@ -1484,4 +1484,44 @@ class StatisticsSuite extends 
StatisticsCollectionTestBase with TestHiveSingleto
       }
     }
   }
+
+  test("SPARK-25474: test sizeInBytes for CatalogFileIndex dataSourceTable") {
+    withSQLConf("spark.sql.statistics.fallBackToHdfs" -> "true") {
+      withTable("t1", "t2") {
+        sql("CREATE TABLE t1 (id INT, name STRING) USING PARQUET PARTITIONED 
BY (name)")
+        sql("INSERT INTO t1 VALUES (1, 'a')")
+        checkKeywordsNotExist(sql("EXPLAIN COST SELECT * FROM t1"), 
"sizeInBytes=8.0 EiB")
+        sql("CREATE TABLE t2 (id INT, name STRING) USING PARQUET PARTITIONED 
BY (name)")
+        sql("INSERT INTO t2 VALUES (1, 'a')")
+        checkKeywordsExist(sql("EXPLAIN SELECT * FROM t1, t2 WHERE 
t1.id=t2.id"),
+          "BroadcastHashJoin")
+      }
+    }
+  }
+
+  test("SPARK-25474: should not fall back to hdfs when table statistics 
exists" +
+    " for CatalogFileIndex dataSourceTable") {
+
+    var sizeInBytesDisabledFallBack, sizeInBytesEnabledFallBack = 0L
+    Seq(true, false).foreach { fallBackToHdfs =>
+      withSQLConf("spark.sql.statistics.fallBackToHdfs" -> 
fallBackToHdfs.toString) {
+        withTable("t1") {
+          sql("CREATE TABLE t1 (id INT, name STRING) USING PARQUET PARTITIONED 
BY (name)")
+          sql("INSERT INTO t1 VALUES (1, 'a')")
+          // Analyze command updates the statistics of table `t1`
+          sql("analyze table t1 compute statistics")
+          val catalogTable = getCatalogTable("t1")
+          assert(catalogTable.stats.isDefined)
+
+          if (!fallBackToHdfs) {
+            sizeInBytesDisabledFallBack = 
catalogTable.stats.get.sizeInBytes.toLong
+          } else {
+            sizeInBytesEnabledFallBack = 
catalogTable.stats.get.sizeInBytes.toLong
+          }
+          checkKeywordsNotExist(sql("EXPLAIN COST SELECT * FROM t1"), 
"sizeInBytes=8.0 EiB")
+        }
+      }
+    }
+    assert(sizeInBytesEnabledFallBack === sizeInBytesDisabledFallBack)
 
 Review comment:
   ~Ur, if the fallback logic returns the same value with `ANALYZE TABLE t1 
COMPUTE STATISTICS`, this assertion doesn't prove anything.~ Never mind.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to