dongjoon-hyun commented on a change in pull request #24047: [SPARK-25196][SQL] 
Extends Analyze commands for cached tables 
URL: https://github.com/apache/spark/pull/24047#discussion_r264524078
 
 

 ##########
 File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
 ##########
 @@ -89,6 +89,37 @@ case class AnalyzeColumnCommand(
     columnsToAnalyze
   }
 
+  private def analyzeColumnInCatalog(sparkSession: SparkSession): Unit = {
+    val sessionState = sparkSession.sessionState
+    val db = 
tableIdent.database.getOrElse(sessionState.catalog.getCurrentDatabase)
+    val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db))
+    val tableMeta = sessionState.catalog.getTableMetadata(tableIdentWithDB)
+    if (tableMeta.tableType == CatalogTableType.VIEW) {
+      throw new AnalysisException("ANALYZE TABLE is not supported on views.")
+    }
+    val sizeInBytes = CommandUtils.calculateTotalSize(sparkSession, tableMeta)
+    val relation = sparkSession.table(tableIdent).logicalPlan
+    val columnsToAnalyze = getColumnsToAnalyze(tableIdent, relation, 
columnNames, allColumns)
+
+    // Compute stats for the computed list of columns.
+    val (rowCount, newColStats) =
+      CommandUtils.computeColumnStats(sparkSession, relation, columnsToAnalyze)
+
+    val newColCatalogStats = newColStats.map {
+      case (attr, columnStat) =>
+        attr.name -> columnStat.toCatalogColumnStat(attr.name, attr.dataType)
+    }
+
+    // We also update table-level stats in order to keep them consistent with 
column-level stats.
+    val statistics = CatalogStatistics(
+      sizeInBytes = sizeInBytes,
+      rowCount = Some(rowCount),
+      // Newly computed column stats should override the existing ones.
+      colStats = tableMeta.stats.map(_.colStats).getOrElse(Map.empty) ++ 
newColCatalogStats)
+
+    sessionState.catalog.alterTableStats(tableIdentWithDB, Some(statistics))
 
 Review comment:
   ditto.
   ```scala
   -    sessionState.catalog.alterTableStats(tableIdentWithDB, Some(statistics))
   +    sessionState.catalog.alterTableStats(tableIdent, Some(statistics))
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to