Github user dilipbiswal commented on a diff in the pull request:
https://github.com/apache/spark/pull/22566#discussion_r220985607
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
---
@@ -50,7 +52,26 @@ case class AnalyzeColumnCommand(
val sizeInBytes = CommandUtils.calculateTotalSize(sparkSession,
tableMeta)
// Compute stats for each column
- val (rowCount, newColStats) = computeColumnStats(sparkSession,
tableIdentWithDB, columnNames)
+ val conf = sparkSession.sessionState.conf
+ val relation = sparkSession.table(tableIdent).logicalPlan
+ val attributesToAnalyze = if (allColumns) {
+ relation.output
+ } else {
+ columnNames.get.map { col =>
+ val exprOption = relation.output.find(attr =>
conf.resolver(attr.name, col))
+ exprOption.getOrElse(throw new AnalysisException(s"Column $col
does not exist."))
+ }
+ }
+ // Make sure the column types are supported for stats gathering.
+ attributesToAnalyze.foreach { attr =>
+ if (!supportsType(attr.dataType)) {
+ throw new AnalysisException(
+ s"Column ${attr.name} in table $tableIdent is of type
${attr.dataType}, " +
+ "and Spark does not support statistics collection on this
column type.")
+ }
+ }
--- End diff --
@gatorsmile OK
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]