[spark] branch master updated: [SPARK-27166][SQL] Improve `printSchema` to print up to the given level

dongjoon Thu, 14 Mar 2019 20:29:14 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 74d2f04  [SPARK-27166][SQL] Improve `printSchema` to print up to the 
given level
74d2f04 is described below

commit 74d2f04183a876406ca24c59b53f02eafd993ce5
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Thu Mar 14 20:27:55 2019 -0700

    [SPARK-27166][SQL] Improve `printSchema` to print up to the given level
    
    ## What changes were proposed in this pull request?
    
    This PR aims to improve `printSchema` to be able to print up to the given 
level of the schema.
    
    ```scala
    scala> val df = Seq((1,(2,(3,4)))).toDF
    df: org.apache.spark.sql.DataFrame = [_1: int, _2: struct<_1: int, _2: 
struct<_1: int, _2: int>>]
    
    scala> df.printSchema
    root
    |-- _1: integer (nullable = false)
    |-- _2: struct (nullable = true)
    | |-- _1: integer (nullable = false)
    | |-- _2: struct (nullable = true)
    | | |-- _1: integer (nullable = false)
    | | |-- _2: integer (nullable = false)
    
    scala> df.printSchema(1)
    root
    |-- _1: integer (nullable = false)
    |-- _2: struct (nullable = true)
    
    scala> df.printSchema(2)
    root
    |-- _1: integer (nullable = false)
    |-- _2: struct (nullable = true)
    | |-- _1: integer (nullable = false)
    | |-- _2: struct (nullable = true)
    
    scala> df.printSchema(3)
    root
    |-- _1: integer (nullable = false)
    |-- _2: struct (nullable = true)
    | |-- _1: integer (nullable = false)
    | |-- _2: struct (nullable = true)
    | | |-- _1: integer (nullable = false)
    | | |-- _2: integer (nullable = false)
    ```
    
    ## How was this patch tested?
    
    Pass the Jenkins with the newly added test case.
    
    Closes #24098 from dongjoon-hyun/SPARK-27166.
    
    Authored-by: Dongjoon Hyun <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../main/scala/org/apache/spark/sql/types/StructType.scala   | 10 ++++++++--
 .../scala/org/apache/spark/sql/types/StructTypeSuite.scala   | 12 ++++++++++++
 sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala   | 10 +++++++++-
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index d563276..73a86ac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -310,13 +310,19 @@ case class StructType(fields: Array[StructField]) extends 
DataType with Seq[Stru
   protected[sql] def toAttributes: Seq[AttributeReference] =
     map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
 
-  def treeString: String = {
+  def treeString: String = treeString(Int.MaxValue)
+
+  def treeString(level: Int): String = {
     val builder = new StringBuilder
     builder.append("root\n")
     val prefix = " |"
     fields.foreach(field => field.buildFormattedString(prefix, builder))
 
-    builder.toString()
+    if (level <= 0 || level == Int.MaxValue) {
+      builder.toString()
+    } else {
+      builder.toString().split("\n").filter(_.lastIndexOf("|--") < level * 5 + 
1).mkString("\n")
+    }
   }
 
   // scalastyle:off println
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
index b4ce26b..c493088 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
@@ -70,4 +70,16 @@ class StructTypeSuite extends SparkFunSuite {
 
     assert(struct.toDDL == """`b` BOOLEAN COMMENT 'Field\'s comment'""")
   }
+
+
+  test("Print up to the given level") {
+    val schema = StructType.fromDDL(
+      "c1 INT, c2 STRUCT<c3: INT, c4: STRUCT<c5: INT, c6: INT>>")
+
+    assert(5 == schema.treeString(2).split("\n").length)
+    assert(3 == schema.treeString(1).split("\n").length)
+    assert(7 == schema.treeString.split("\n").length)
+    assert(7 == schema.treeString(0).split("\n").length)
+    assert(7 == schema.treeString(-1).split("\n").length)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 24f5c81..c2c2ebc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -478,8 +478,16 @@ class Dataset[T] private[sql](
    * @group basic
    * @since 1.6.0
    */
+  def printSchema(): Unit = printSchema(Int.MaxValue)
+
   // scalastyle:off println
-  def printSchema(): Unit = println(schema.treeString)
+  /**
+   * Prints the schema up to the given level to the console in a nice tree 
format.
+   *
+   * @group basic
+   * @since 3.0.0
+   */
+  def printSchema(level: Int): Unit = println(schema.treeString(level))
   // scalastyle:on println
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [SPARK-27166][SQL] Improve `printSchema` to print up to the given level

Reply via email to