spark git commit: [SPARK-24849][SPARK-24911][SQL] Converting a value of StructType to a DDL string

lixiao Wed, 25 Jul 2018 11:09:38 -0700

Repository: spark
Updated Branches:
  refs/heads/master 571a6f057 -> 2f77616e1



[SPARK-24849][SPARK-24911][SQL] Converting a value of StructType to a DDL string

## What changes were proposed in this pull request?

In the PR, I propose to extend the `StructType`/`StructField` classes by new 
method `toDDL` which converts a value of the `StructType`/`StructField` type to 
a string formatted in DDL style. The resulted string can be used in a table 
creation.

The `toDDL` method of `StructField` is reused in `SHOW CREATE TABLE`. In this 
way the PR fixes the bug of unquoted names of nested fields.

## How was this patch tested?

I add a test for checking the new method and 2 round trip tests: `fromDDL` -> 
`toDDL` and `toDDL` -> `fromDDL`

Author: Maxim Gekk <maxim.g...@databricks.com>

Closes #21803 from MaxGekk/to-ddl.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2f77616e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2f77616e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2f77616e

Branch: refs/heads/master
Commit: 2f77616e1dc593fb9c376a8bd72416198cf3d6f5
Parents: 571a6f0
Author: Maxim Gekk <maxim.g...@databricks.com>
Authored: Wed Jul 25 11:09:12 2018 -0700
Committer: Xiao Li <gatorsm...@gmail.com>
Committed: Wed Jul 25 11:09:12 2018 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/util/package.scala       | 12 +++++++
 .../apache/spark/sql/types/StructField.scala    | 13 ++++++++
 .../org/apache/spark/sql/types/StructType.scala | 10 +++++-
 .../spark/sql/types/StructTypeSuite.scala       | 33 ++++++++++++++++++++
 .../spark/sql/execution/command/tables.scala    | 23 +++-----------
 .../spark/sql/hive/ShowCreateTableSuite.scala   | 15 +++++++++
 6 files changed, 86 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2f77616e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index 4005087..0978e92 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -155,6 +155,18 @@ package object util {
 
   def toPrettySQL(e: Expression): String = usePrettyExpression(e).sql
 
+
+  def escapeSingleQuotedString(str: String): String = {
+    val builder = StringBuilder.newBuilder
+
+    str.foreach {
+      case '\'' => builder ++= s"\\\'"
+      case ch => builder += ch
+    }
+
+    builder.toString()
+  }
+
   /* FIX ME
   implicit class debugLogging(a: Any) {
     def debugLogging() {

http://git-wip-us.apache.org/repos/asf/spark/blob/2f77616e/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
index 2c18fdc..902cae9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
@@ -21,6 +21,7 @@ import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.InterfaceStability
+import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, 
quoteIdentifier}
 
 /**
  * A field inside a StructType.
@@ -74,4 +75,16 @@ case class StructField(
   def getComment(): Option[String] = {
     if (metadata.contains("comment")) Option(metadata.getString("comment")) 
else None
   }
+
+  /**
+   * Returns a string containing a schema in DDL format. For example, the 
following value:
+   * `StructField("eventId", IntegerType)` will be converted to `eventId` INT.
+   */
+  def toDDL: String = {
+    val comment = getComment()
+      .map(escapeSingleQuotedString)
+      .map(" COMMENT '" + _ + "'")
+
+    s"${quoteIdentifier(name)} ${dataType.sql}${comment.getOrElse("")}"
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/2f77616e/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index b13e95f..c5ca169 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -27,7 +27,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.catalyst.expressions.{Attribute, 
AttributeReference, InterpretedOrdering}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, 
LegacyTypeStringParser}
-import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, 
quoteIdentifier}
 import org.apache.spark.util.Utils
 
 /**
@@ -360,6 +360,14 @@ case class StructType(fields: Array[StructField]) extends 
DataType with Seq[Stru
     s"STRUCT<${fieldTypes.mkString(", ")}>"
   }
 
+  /**
+   * Returns a string containing a schema in DDL format. For example, the 
following value:
+   * `StructType(Seq(StructField("eventId", IntegerType), StructField("s", 
StringType)))`
+   * will be converted to `eventId` INT, `s` STRING.
+   * The returned DDL schema can be used in a table creation.
+   */
+  def toDDL: String = fields.map(_.toDDL).mkString(",")
+
   private[sql] override def simpleString(maxNumberFields: Int): String = {
     val builder = new StringBuilder
     val fieldTypes = fields.take(maxNumberFields).map {

http://git-wip-us.apache.org/repos/asf/spark/blob/2f77616e/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
index c6ca8bb..53a78c9 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.types
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.types.StructType.fromDDL
 
 class StructTypeSuite extends SparkFunSuite {
 
@@ -37,4 +38,36 @@ class StructTypeSuite extends SparkFunSuite {
     val e = intercept[IllegalArgumentException](s.fieldIndex("c")).getMessage
     assert(e.contains("Available fields: a, b"))
   }
+
+  test("SPARK-24849: toDDL - simple struct") {
+    val struct = StructType(Seq(StructField("a", IntegerType)))
+
+    assert(struct.toDDL == "`a` INT")
+  }
+
+  test("SPARK-24849: round trip toDDL - fromDDL") {
+    val struct = new StructType().add("a", IntegerType).add("b", StringType)
+
+    assert(fromDDL(struct.toDDL) === struct)
+  }
+
+  test("SPARK-24849: round trip fromDDL - toDDL") {
+    val struct = "`a` MAP<INT, STRING>,`b` INT"
+
+    assert(fromDDL(struct).toDDL === struct)
+  }
+
+  test("SPARK-24849: toDDL must take into account case of fields.") {
+    val struct = new StructType()
+      .add("metaData", new StructType().add("eventId", StringType))
+
+    assert(struct.toDDL == "`metaData` STRUCT<`eventId`: STRING>")
+  }
+
+  test("SPARK-24849: toDDL should output field's comment") {
+    val struct = StructType(Seq(
+      StructField("b", BooleanType).withComment("Field's comment")))
+
+    assert(struct.toDDL == """`b` BOOLEAN COMMENT 'Field\'s comment'""")
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/2f77616e/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index ec3961f..56f48b7 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -35,7 +35,7 @@ import 
org.apache.spark.sql.catalyst.catalog.CatalogTableType._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, 
AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.Histogram
-import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, 
quoteIdentifier}
 import org.apache.spark.sql.execution.datasources.{DataSource, 
PartitioningUtils}
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
@@ -982,7 +982,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) 
extends RunnableComman
   private def showHiveTableHeader(metadata: CatalogTable, builder: 
StringBuilder): Unit = {
     val columns = metadata.schema.filterNot { column =>
       metadata.partitionColumnNames.contains(column.name)
-    }.map(columnToDDLFragment)
+    }.map(_.toDDL)
 
     if (columns.nonEmpty) {
       builder ++= columns.mkString("(", ", ", ")\n")
@@ -994,14 +994,10 @@ case class ShowCreateTableCommand(table: TableIdentifier) 
extends RunnableComman
       .foreach(builder.append)
   }
 
-  private def columnToDDLFragment(column: StructField): String = {
-    val comment = column.getComment().map(escapeSingleQuotedString).map(" 
COMMENT '" + _ + "'")
-    s"${quoteIdentifier(column.name)} 
${column.dataType.catalogString}${comment.getOrElse("")}"
-  }
 
   private def showHiveTableNonDataColumns(metadata: CatalogTable, builder: 
StringBuilder): Unit = {
     if (metadata.partitionColumnNames.nonEmpty) {
-      val partCols = metadata.partitionSchema.map(columnToDDLFragment)
+      val partCols = metadata.partitionSchema.map(_.toDDL)
       builder ++= partCols.mkString("PARTITIONED BY (", ", ", ")\n")
     }
 
@@ -1072,7 +1068,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) 
extends RunnableComman
 
   private def showDataSourceTableDataColumns(
       metadata: CatalogTable, builder: StringBuilder): Unit = {
-    val columns = metadata.schema.fields.map(f => s"${quoteIdentifier(f.name)} 
${f.dataType.sql}")
+    val columns = metadata.schema.fields.map(_.toDDL)
     builder ++= columns.mkString("(", ", ", ")\n")
   }
 
@@ -1117,15 +1113,4 @@ case class ShowCreateTableCommand(table: 
TableIdentifier) extends RunnableComman
       }
     }
   }
-
-  private def escapeSingleQuotedString(str: String): String = {
-    val builder = StringBuilder.newBuilder
-
-    str.foreach {
-      case '\'' => builder ++= s"\\\'"
-      case ch => builder += ch
-    }
-
-    builder.toString()
-  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/2f77616e/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
index 473bbce..34ca790 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
@@ -288,6 +288,21 @@ class ShowCreateTableSuite extends QueryTest with 
SQLTestUtils with TestHiveSing
     }
   }
 
+  test("SPARK-24911: keep quotes for nested fields") {
+    withTable("t1") {
+      val createTable = "CREATE TABLE `t1`(`a` STRUCT<`b`: STRING>)"
+      sql(createTable)
+      val shownDDL = sql(s"SHOW CREATE TABLE t1")
+        .head()
+        .getString(0)
+        .split("\n")
+        .head
+      assert(shownDDL == createTable)
+
+      checkCreateTable("t1")
+    }
+  }
+
   private def createRawHiveTable(ddl: String): Unit = {
     
hiveContext.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog]
       .client.runSqlHive(ddl)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-24849][SPARK-24911][SQL] Converting a value of StructType to a DDL string

Reply via email to