spark git commit: [SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables
Repository: spark Updated Branches: refs/heads/branch-2.0 a7f5e7066 -> 3500dbc9b [SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables Currently there are 2 inconsistence: 1. for data source table, we only print partition names, for hive table, we also print partition schema. After this PR, we will always print schema 2. if column doesn't have comment, data source table will print empty string, hive table will print null. After this PR, we will always print null new test in `HiveDDLSuite` Author: Wenchen Fan Closes #14302 from cloud-fan/minor3. (cherry picked from commit a2abb583caaec9a2cecd5d65b05d172fc096c125) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3500dbc9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3500dbc9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3500dbc9 Branch: refs/heads/branch-2.0 Commit: 3500dbc9bcce243b6656f308ee4941de0350d198 Parents: a7f5e70 Author: Wenchen Fan Authored: Tue Jul 26 18:46:12 2016 +0800 Committer: Wenchen Fan Committed: Sun Sep 4 00:15:57 2016 +0800 -- .../spark/sql/execution/command/tables.scala| 11 +++ .../apache/spark/sql/sources/DDLTestSuite.scala | 30 ++-- .../sql/hive/MetastoreDataSourcesSuite.scala| 2 +- .../spark/sql/hive/execution/HiveDDLSuite.scala | 30 +++- .../sql/hive/execution/HiveQuerySuite.scala | 4 +-- 5 files changed, 47 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index a5ccbcf..7e6a352 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -436,11 +436,12 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { if (DDLUtils.isDatasourceTable(table)) { - val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table) - if (partCols.nonEmpty) { + val userSpecifiedSchema = DDLUtils.getSchemaFromTableProperties(table) + val partColNames = DDLUtils.getPartitionColumnsFromTableProperties(table) + for (schema <- userSpecifiedSchema if partColNames.nonEmpty) { append(buffer, "# Partition Information", "", "") -append(buffer, s"# ${output.head.name}", "", "") -partCols.foreach(col => append(buffer, col, "", "")) +append(buffer, s"# ${output.head.name}", output(1).name, output(2).name) +describeSchema(StructType(partColNames.map(schema(_))), buffer) } } else { if (table.partitionColumns.nonEmpty) { @@ -527,7 +528,7 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): Unit = { schema.foreach { column => val comment = -if (column.metadata.contains("comment")) column.metadata.getString("comment") else "" +if (column.metadata.contains("comment")) column.metadata.getString("comment") else null append(buffer, column.name, column.dataType.simpleString, comment) } } http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala index 5a7a907..c2aedff 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala @@ -98,21 +98,21 @@ class DDLTestSuite extends DataSourceTest with SharedSQLContext { "describe ddlPeople", Seq( Row("intType", "int", "test comment test1"), -Row("stringType", "string", ""), -Row("dateType", "date", ""), -Row("timestampType", "timestamp", ""), -Row("doubleType", "double", ""), -Row("bigintType", "bigint", ""), -Row("tinyintType", "tinyint", ""), -Row("decimalType", "decimal(10,0)", ""), -Row("fixedDecimalType", "decimal(5,1)", ""), -Row("binaryType", "binary", ""), -Row("booleanType", "boolean", ""), -Row("smallIntType", "
spark git commit: [SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables
Repository: spark Updated Branches: refs/heads/master 4c9695598 -> a2abb583c [SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables ## What changes were proposed in this pull request? Currently there are 2 inconsistence: 1. for data source table, we only print partition names, for hive table, we also print partition schema. After this PR, we will always print schema 2. if column doesn't have comment, data source table will print empty string, hive table will print null. After this PR, we will always print null ## How was this patch tested? new test in `HiveDDLSuite` Author: Wenchen Fan Closes #14302 from cloud-fan/minor3. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a2abb583 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a2abb583 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a2abb583 Branch: refs/heads/master Commit: a2abb583caaec9a2cecd5d65b05d172fc096c125 Parents: 4c96955 Author: Wenchen Fan Authored: Tue Jul 26 18:46:12 2016 +0800 Committer: Cheng Lian Committed: Tue Jul 26 18:46:12 2016 +0800 -- .../spark/sql/execution/command/tables.scala| 12 .../apache/spark/sql/sources/DDLTestSuite.scala | 30 ++-- .../sql/hive/MetastoreDataSourcesSuite.scala| 2 +- .../spark/sql/hive/execution/HiveDDLSuite.scala | 30 +++- .../sql/hive/execution/HiveQuerySuite.scala | 4 +-- 5 files changed, 47 insertions(+), 31 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a2abb583/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index c6daa95..8263380 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -439,11 +439,12 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { if (DDLUtils.isDatasourceTable(table)) { - val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table) - if (partCols.nonEmpty) { + val userSpecifiedSchema = DDLUtils.getSchemaFromTableProperties(table) + val partColNames = DDLUtils.getPartitionColumnsFromTableProperties(table) + for (schema <- userSpecifiedSchema if partColNames.nonEmpty) { append(buffer, "# Partition Information", "", "") -append(buffer, s"# ${output.head.name}", "", "") -partCols.foreach(col => append(buffer, col, "", "")) +append(buffer, s"# ${output.head.name}", output(1).name, output(2).name) +describeSchema(StructType(partColNames.map(schema(_))), buffer) } } else { if (table.partitionColumns.nonEmpty) { @@ -525,8 +526,7 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): Unit = { schema.foreach { column => - val comment = column.getComment().getOrElse("") - append(buffer, column.name, column.dataType.simpleString, comment) + append(buffer, column.name, column.dataType.simpleString, column.getComment().orNull) } } http://git-wip-us.apache.org/repos/asf/spark/blob/a2abb583/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala index d0ad319..e535d4d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala @@ -97,21 +97,21 @@ class DDLTestSuite extends DataSourceTest with SharedSQLContext { "describe ddlPeople", Seq( Row("intType", "int", "test comment test1"), -Row("stringType", "string", ""), -Row("dateType", "date", ""), -Row("timestampType", "timestamp", ""), -Row("doubleType", "double", ""), -Row("bigintType", "bigint", ""), -Row("tinyintType", "tinyint", ""), -Row("decimalType", "decimal(10,0)", ""), -Row("fixedDecimalType", "decimal(5,1)", ""), -Row("binaryType", "binary", ""), -Row("booleanType", "boolean", ""), -Row("smallIntType", "smallint", ""), -Row("floatType", "float", ""), -Row("mapType", "ma