Repository: spark Updated Branches: refs/heads/master 571a6f057 -> 2f77616e1
[SPARK-24849][SPARK-24911][SQL] Converting a value of StructType to a DDL string ## What changes were proposed in this pull request? In the PR, I propose to extend the `StructType`/`StructField` classes by new method `toDDL` which converts a value of the `StructType`/`StructField` type to a string formatted in DDL style. The resulted string can be used in a table creation. The `toDDL` method of `StructField` is reused in `SHOW CREATE TABLE`. In this way the PR fixes the bug of unquoted names of nested fields. ## How was this patch tested? I add a test for checking the new method and 2 round trip tests: `fromDDL` -> `toDDL` and `toDDL` -> `fromDDL` Author: Maxim Gekk <maxim.g...@databricks.com> Closes #21803 from MaxGekk/to-ddl. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2f77616e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2f77616e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2f77616e Branch: refs/heads/master Commit: 2f77616e1dc593fb9c376a8bd72416198cf3d6f5 Parents: 571a6f0 Author: Maxim Gekk <maxim.g...@databricks.com> Authored: Wed Jul 25 11:09:12 2018 -0700 Committer: Xiao Li <gatorsm...@gmail.com> Committed: Wed Jul 25 11:09:12 2018 -0700 ---------------------------------------------------------------------- .../spark/sql/catalyst/util/package.scala | 12 +++++++ .../apache/spark/sql/types/StructField.scala | 13 ++++++++ .../org/apache/spark/sql/types/StructType.scala | 10 +++++- .../spark/sql/types/StructTypeSuite.scala | 33 ++++++++++++++++++++ .../spark/sql/execution/command/tables.scala | 23 +++----------- .../spark/sql/hive/ShowCreateTableSuite.scala | 15 +++++++++ 6 files changed, 86 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/2f77616e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala index 4005087..0978e92 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala @@ -155,6 +155,18 @@ package object util { def toPrettySQL(e: Expression): String = usePrettyExpression(e).sql + + def escapeSingleQuotedString(str: String): String = { + val builder = StringBuilder.newBuilder + + str.foreach { + case '\'' => builder ++= s"\\\'" + case ch => builder += ch + } + + builder.toString() + } + /* FIX ME implicit class debugLogging(a: Any) { def debugLogging() { http://git-wip-us.apache.org/repos/asf/spark/blob/2f77616e/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala index 2c18fdc..902cae9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala @@ -21,6 +21,7 @@ import org.json4s.JsonAST.JValue import org.json4s.JsonDSL._ import org.apache.spark.annotation.InterfaceStability +import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier} /** * A field inside a StructType. @@ -74,4 +75,16 @@ case class StructField( def getComment(): Option[String] = { if (metadata.contains("comment")) Option(metadata.getString("comment")) else None } + + /** + * Returns a string containing a schema in DDL format. For example, the following value: + * `StructField("eventId", IntegerType)` will be converted to `eventId` INT. + */ + def toDDL: String = { + val comment = getComment() + .map(escapeSingleQuotedString) + .map(" COMMENT '" + _ + "'") + + s"${quoteIdentifier(name)} ${dataType.sql}${comment.getOrElse("")}" + } } http://git-wip-us.apache.org/repos/asf/spark/blob/2f77616e/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala index b13e95f..c5ca169 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala @@ -27,7 +27,7 @@ import org.apache.spark.SparkException import org.apache.spark.annotation.InterfaceStability import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, InterpretedOrdering} import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, LegacyTypeStringParser} -import org.apache.spark.sql.catalyst.util.quoteIdentifier +import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier} import org.apache.spark.util.Utils /** @@ -360,6 +360,14 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru s"STRUCT<${fieldTypes.mkString(", ")}>" } + /** + * Returns a string containing a schema in DDL format. For example, the following value: + * `StructType(Seq(StructField("eventId", IntegerType), StructField("s", StringType)))` + * will be converted to `eventId` INT, `s` STRING. + * The returned DDL schema can be used in a table creation. + */ + def toDDL: String = fields.map(_.toDDL).mkString(",") + private[sql] override def simpleString(maxNumberFields: Int): String = { val builder = new StringBuilder val fieldTypes = fields.take(maxNumberFields).map { http://git-wip-us.apache.org/repos/asf/spark/blob/2f77616e/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala index c6ca8bb..53a78c9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.types import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.types.StructType.fromDDL class StructTypeSuite extends SparkFunSuite { @@ -37,4 +38,36 @@ class StructTypeSuite extends SparkFunSuite { val e = intercept[IllegalArgumentException](s.fieldIndex("c")).getMessage assert(e.contains("Available fields: a, b")) } + + test("SPARK-24849: toDDL - simple struct") { + val struct = StructType(Seq(StructField("a", IntegerType))) + + assert(struct.toDDL == "`a` INT") + } + + test("SPARK-24849: round trip toDDL - fromDDL") { + val struct = new StructType().add("a", IntegerType).add("b", StringType) + + assert(fromDDL(struct.toDDL) === struct) + } + + test("SPARK-24849: round trip fromDDL - toDDL") { + val struct = "`a` MAP<INT, STRING>,`b` INT" + + assert(fromDDL(struct).toDDL === struct) + } + + test("SPARK-24849: toDDL must take into account case of fields.") { + val struct = new StructType() + .add("metaData", new StructType().add("eventId", StringType)) + + assert(struct.toDDL == "`metaData` STRUCT<`eventId`: STRING>") + } + + test("SPARK-24849: toDDL should output field's comment") { + val struct = StructType(Seq( + StructField("b", BooleanType).withComment("Field's comment"))) + + assert(struct.toDDL == """`b` BOOLEAN COMMENT 'Field\'s comment'""") + } } http://git-wip-us.apache.org/repos/asf/spark/blob/2f77616e/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index ec3961f..56f48b7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTableType._ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.Histogram -import org.apache.spark.sql.catalyst.util.quoteIdentifier +import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier} import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils} import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat import org.apache.spark.sql.execution.datasources.json.JsonFileFormat @@ -982,7 +982,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman private def showHiveTableHeader(metadata: CatalogTable, builder: StringBuilder): Unit = { val columns = metadata.schema.filterNot { column => metadata.partitionColumnNames.contains(column.name) - }.map(columnToDDLFragment) + }.map(_.toDDL) if (columns.nonEmpty) { builder ++= columns.mkString("(", ", ", ")\n") @@ -994,14 +994,10 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman .foreach(builder.append) } - private def columnToDDLFragment(column: StructField): String = { - val comment = column.getComment().map(escapeSingleQuotedString).map(" COMMENT '" + _ + "'") - s"${quoteIdentifier(column.name)} ${column.dataType.catalogString}${comment.getOrElse("")}" - } private def showHiveTableNonDataColumns(metadata: CatalogTable, builder: StringBuilder): Unit = { if (metadata.partitionColumnNames.nonEmpty) { - val partCols = metadata.partitionSchema.map(columnToDDLFragment) + val partCols = metadata.partitionSchema.map(_.toDDL) builder ++= partCols.mkString("PARTITIONED BY (", ", ", ")\n") } @@ -1072,7 +1068,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman private def showDataSourceTableDataColumns( metadata: CatalogTable, builder: StringBuilder): Unit = { - val columns = metadata.schema.fields.map(f => s"${quoteIdentifier(f.name)} ${f.dataType.sql}") + val columns = metadata.schema.fields.map(_.toDDL) builder ++= columns.mkString("(", ", ", ")\n") } @@ -1117,15 +1113,4 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman } } } - - private def escapeSingleQuotedString(str: String): String = { - val builder = StringBuilder.newBuilder - - str.foreach { - case '\'' => builder ++= s"\\\'" - case ch => builder += ch - } - - builder.toString() - } } http://git-wip-us.apache.org/repos/asf/spark/blob/2f77616e/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala index 473bbce..34ca790 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala @@ -288,6 +288,21 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing } } + test("SPARK-24911: keep quotes for nested fields") { + withTable("t1") { + val createTable = "CREATE TABLE `t1`(`a` STRUCT<`b`: STRING>)" + sql(createTable) + val shownDDL = sql(s"SHOW CREATE TABLE t1") + .head() + .getString(0) + .split("\n") + .head + assert(shownDDL == createTable) + + checkCreateTable("t1") + } + } + private def createRawHiveTable(ddl: String): Unit = { hiveContext.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog] .client.runSqlHive(ddl) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org