[GitHub] spark pull request #16422: [SPARK-17642] [SQL] support DESC EXTENDED/FORMATT...

cloud-fan Mon, 10 Jul 2017 00:55:05 -0700

Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16422#discussion_r126354680
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala ---
    @@ -626,6 +624,120 @@ case class DescribeTableCommand(
       }
     }
     
    +/**
    + * A command to list the info for a column, including name, data type, 
column stats and comment.
    + * This function creates a [[DescribeColumnCommand]] logical plan.
    + *
    + * The syntax of using this command in SQL is:
    + * {{{
    + *   DESCRIBE [EXTENDED|FORMATTED] table_name column_name;
    + * }}}
    + */
    +case class DescribeColumnCommand(
    +    table: TableIdentifier,
    +    colNameParts: Seq[String],
    +    isExtended: Boolean)
    +  extends RunnableCommand {
    +
    +  override val output: Seq[Attribute] = {
    +    // The displayed names are based on Hive.
    +    // (Link for the corresponding Hive Jira: 
https://issues.apache.org/jira/browse/HIVE-7050)
    +    if (isExtended) {
    +      Seq(
    +        AttributeReference("col_name", StringType, nullable = false,
    +          new MetadataBuilder().putString("comment", "name of the 
column").build())(),
    +        AttributeReference("data_type", StringType, nullable = false,
    +          new MetadataBuilder().putString("comment", "data type of the 
column").build())(),
    +        AttributeReference("min", StringType, nullable = true,
    +          new MetadataBuilder().putString("comment", "min value of the 
column").build())(),
    +        AttributeReference("max", StringType, nullable = true,
    +          new MetadataBuilder().putString("comment", "max value of the 
column").build())(),
    +        AttributeReference("num_nulls", StringType, nullable = true,
    +          new MetadataBuilder().putString("comment", "number of nulls of 
the column").build())(),
    +        AttributeReference("distinct_count", StringType, nullable = true,
    +          new MetadataBuilder().putString("comment", "distinct count of 
the column").build())(),
    +        AttributeReference("avg_col_len", StringType, nullable = true,
    +          new MetadataBuilder().putString("comment",
    +            "average length of the values of the column").build())(),
    +        AttributeReference("max_col_len", StringType, nullable = true,
    +          new MetadataBuilder().putString("comment",
    +            "maximum length of the values of the column").build())(),
    +        AttributeReference("comment", StringType, nullable = true,
    +          new MetadataBuilder().putString("comment", "comment of the 
column").build())())
    +    } else {
    +      Seq(
    +        AttributeReference("col_name", StringType, nullable = false,
    +          new MetadataBuilder().putString("comment", "name of the 
column").build())(),
    +        AttributeReference("data_type", StringType, nullable = false,
    +          new MetadataBuilder().putString("comment", "data type of the 
column").build())(),
    +        AttributeReference("comment", StringType, nullable = true,
    +          new MetadataBuilder().putString("comment", "comment of the 
column").build())())
    +    }
    +  }
    +
    +  override def run(sparkSession: SparkSession): Seq[Row] = {
    +    val catalog = sparkSession.sessionState.catalog
    +    val resolver = sparkSession.sessionState.conf.resolver
    +    val relation = sparkSession.table(table).queryExecution.analyzed
    +    val field = {
    +      relation.resolve(colNameParts, resolver).getOrElse {
    +        throw new AnalysisException(s"Column 
${UnresolvedAttribute(colNameParts).name} does not " +
    +          s"exist")
    +      }
    +    }
    +    if (!field.isInstanceOf[Attribute]) {
    +      // If the field is not an attribute after `resolve`, then it's a 
nested field.
    +      throw new AnalysisException(s"DESC TABLE COLUMN command is not 
supported for nested column:" +
    +        s" ${UnresolvedAttribute(colNameParts).name}")
    +    }
    +
    +    val catalogTable = catalog.getTempViewOrPermanentTableMetadata(table)
    +    val colStats = catalogTable.stats.map(_.colStats).getOrElse(Map.empty)
    +    val cs = colStats.get(field.name)
    +
    +    val comment = if (field.metadata.contains("comment")) {
    +      Option(field.metadata.getString("comment"))
    +    } else {
    +      None
    +    }
    +
    +    val fieldValues = if (isExtended) {
    +      // Show column stats when extended or formatted is specified.
    +      Seq(
    +        field.name,
    +        field.dataType.catalogString,
    +        cs.flatMap(_.min.map(_.toString)).getOrElse("NULL"),
    +        cs.flatMap(_.max.map(_.toString)).getOrElse("NULL"),
    +        cs.map(_.nullCount.toString).getOrElse("NULL"),
    +        cs.map(_.distinctCount.toString).getOrElse("NULL"),
    +        cs.map(_.avgLen.toString).getOrElse("NULL"),
    +        cs.map(_.maxLen.toString).getOrElse("NULL"),
    +        comment.getOrElse("NULL"))
    +    } else {
    +      Seq(
    +        field.name,
    +        field.dataType.catalogString,
    +        comment.getOrElse("NULL"))
    +    }
    +
    +    Seq(Row(formatColumnInfo(fieldValues)))
    --- End diff --
    
    1. why we need a row with column info name which is duplicated with schema?
    2. We can NOT make the output mismatch with schema, or `sql("desc 
column...").select("max")` will pass analysis but fail at runtime.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #16422: [SPARK-17642] [SQL] support DESC EXTENDED/FORMATT...

Reply via email to