dongjoon-hyun commented on a change in pull request #22502: [SPARK-25474][SQL]
When the "fallBackToHdfsForStats= true", Size in bytes is coming as default
size in bytes ( 8.0 EB)
URL: https://github.com/apache/spark/pull/22502#discussion_r307981710
##########
File path:
sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
##########
@@ -1453,4 +1453,21 @@ class StatisticsSuite extends
StatisticsCollectionTestBase with TestHiveSingleto
}
}
}
+
+ test("SPARK-25474: test sizeInBytes for CatalogFileIndex dataSourceTable") {
+ val table1 = "table1"
+ val table2 = "table2"
+ withSQLConf(
+ "spark.sql.statistics.fallBackToHdfs" -> "true") {
+ withTable(table1, table2) {
+ sql(s"create table $table1 (id int, name string) using parquet
partitioned by (name)")
+ sql(s"insert into $table1 values (1, 'a')")
+ checkKeywordsNotExist(sql(s"explain cost select * from $table1"),
"sizeInBytes=8.0 EiB")
+ sql(s"create table $table2 (id int, name string) using parquet
partitioned by (name)")
+ sql(s"insert into $table2 values (1, 'a')")
+ checkKeywordsExist(sql(s"explain select * from $table1 join $table2 on
$table1.id=" +
+ s"$table2.id"), "BroadcastHashJoin")
+ }
+ }
+ }
Review comment:
(please check the line length).
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]