This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 74d2f04 [SPARK-27166][SQL] Improve `printSchema` to print up to the
given level
74d2f04 is described below
commit 74d2f04183a876406ca24c59b53f02eafd993ce5
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Thu Mar 14 20:27:55 2019 -0700
[SPARK-27166][SQL] Improve `printSchema` to print up to the given level
## What changes were proposed in this pull request?
This PR aims to improve `printSchema` to be able to print up to the given
level of the schema.
```scala
scala> val df = Seq((1,(2,(3,4)))).toDF
df: org.apache.spark.sql.DataFrame = [_1: int, _2: struct<_1: int, _2:
struct<_1: int, _2: int>>]
scala> df.printSchema
root
|-- _1: integer (nullable = false)
|-- _2: struct (nullable = true)
| |-- _1: integer (nullable = false)
| |-- _2: struct (nullable = true)
| | |-- _1: integer (nullable = false)
| | |-- _2: integer (nullable = false)
scala> df.printSchema(1)
root
|-- _1: integer (nullable = false)
|-- _2: struct (nullable = true)
scala> df.printSchema(2)
root
|-- _1: integer (nullable = false)
|-- _2: struct (nullable = true)
| |-- _1: integer (nullable = false)
| |-- _2: struct (nullable = true)
scala> df.printSchema(3)
root
|-- _1: integer (nullable = false)
|-- _2: struct (nullable = true)
| |-- _1: integer (nullable = false)
| |-- _2: struct (nullable = true)
| | |-- _1: integer (nullable = false)
| | |-- _2: integer (nullable = false)
```
## How was this patch tested?
Pass the Jenkins with the newly added test case.
Closes #24098 from dongjoon-hyun/SPARK-27166.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../main/scala/org/apache/spark/sql/types/StructType.scala | 10 ++++++++--
.../scala/org/apache/spark/sql/types/StructTypeSuite.scala | 12 ++++++++++++
sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 10 +++++++++-
3 files changed, 29 insertions(+), 3 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index d563276..73a86ac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -310,13 +310,19 @@ case class StructType(fields: Array[StructField]) extends
DataType with Seq[Stru
protected[sql] def toAttributes: Seq[AttributeReference] =
map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
- def treeString: String = {
+ def treeString: String = treeString(Int.MaxValue)
+
+ def treeString(level: Int): String = {
val builder = new StringBuilder
builder.append("root\n")
val prefix = " |"
fields.foreach(field => field.buildFormattedString(prefix, builder))
- builder.toString()
+ if (level <= 0 || level == Int.MaxValue) {
+ builder.toString()
+ } else {
+ builder.toString().split("\n").filter(_.lastIndexOf("|--") < level * 5 +
1).mkString("\n")
+ }
}
// scalastyle:off println
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
index b4ce26b..c493088 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
@@ -70,4 +70,16 @@ class StructTypeSuite extends SparkFunSuite {
assert(struct.toDDL == """`b` BOOLEAN COMMENT 'Field\'s comment'""")
}
+
+
+ test("Print up to the given level") {
+ val schema = StructType.fromDDL(
+ "c1 INT, c2 STRUCT<c3: INT, c4: STRUCT<c5: INT, c6: INT>>")
+
+ assert(5 == schema.treeString(2).split("\n").length)
+ assert(3 == schema.treeString(1).split("\n").length)
+ assert(7 == schema.treeString.split("\n").length)
+ assert(7 == schema.treeString(0).split("\n").length)
+ assert(7 == schema.treeString(-1).split("\n").length)
+ }
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 24f5c81..c2c2ebc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -478,8 +478,16 @@ class Dataset[T] private[sql](
* @group basic
* @since 1.6.0
*/
+ def printSchema(): Unit = printSchema(Int.MaxValue)
+
// scalastyle:off println
- def printSchema(): Unit = println(schema.treeString)
+ /**
+ * Prints the schema up to the given level to the console in a nice tree
format.
+ *
+ * @group basic
+ * @since 3.0.0
+ */
+ def printSchema(level: Int): Unit = println(schema.treeString(level))
// scalastyle:on println
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]