[GitHub] [spark] karuppayya commented on a change in pull request #28662: [SPARK-31850][SQL]Prevent DetermineTableStats from computing stats multiple times for same table

GitBox Wed, 03 Jun 2020 23:30:26 -0700


karuppayya commented on a change in pull request #28662:
URL: https://github.com/apache/spark/pull/28662#discussion_r435019904




##########
File path: 
sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
##########
@@ -112,18 +112,20 @@ class ResolveHiveSerdeTable(session: SparkSession) 
extends Rule[LogicalPlan] {
 }
 
 class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
-  private def hiveTableWithStats(relation: HiveTableRelation): 
HiveTableRelation = {
-    val table = relation.tableMeta
+
+  private[hive] def hiveTableWithStats(relation: HiveTableRelation): 
HiveTableRelation = {
     val partitionCols = relation.partitionCols
     val conf = session.sessionState.conf
     // For partitioned tables, the partition directory may be outside of the 
table directory.
     // Which is expensive to get table size. Please see how we implemented it 
in the AnalyzeTable.
     val sizeInBytes = if (conf.fallBackToHdfsForStatsEnabled && 
partitionCols.isEmpty) {
       try {
+        val table = relation.tableMeta
         val hadoopConf = session.sessionState.newHadoopConf()
         val tablePath = new Path(table.location)
         val fs: FileSystem = tablePath.getFileSystem(hadoopConf)
-        fs.getContentSummary(tablePath).getLength
+        val size = fs.getContentSummary(tablePath).getLength
+        size

Review comment:
       Removed




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] karuppayya commented on a change in pull request #28662: [SPARK-31850][SQL]Prevent DetermineTableStats from computing stats multiple times for same table

Reply via email to