Repository: spark Updated Branches: refs/heads/master 71cc17bfa -> 19834fa91
[SPARK-7993] [SQL] Improved DataFrame.show() output Closes #6633 Author: akhilthatipamula <[email protected]> Author: zsxwing <[email protected]> Closes #6784 from zsxwing/pr6633 and squashes the following commits: 5da1c51 [zsxwing] Address comments and add unit tests 17eab7b [akhilthatipamula] refactored code 19874b3 [akhilthatipamula] Update DataFrame.scala 0a76a5e [akhilthatipamula] Optimised showString() e3dd03f [akhilthatipamula] Modified showString() method a21012b [akhilthatipamula] improved the show() 4bb742f [akhilthatipamula] Modified dataframe.show() method Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/19834fa9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/19834fa9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/19834fa9 Branch: refs/heads/master Commit: 19834fa9184f0365a160bcb54bcd33eaa87c70dc Parents: 71cc17b Author: akhilthatipamula <[email protected]> Authored: Fri Jun 12 10:40:28 2015 -0700 Committer: Reynold Xin <[email protected]> Committed: Fri Jun 12 10:40:28 2015 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/sql/DataFrame.scala | 30 +++++++++--- .../org/apache/spark/sql/DataFrameSuite.scala | 51 ++++++++++++++++++++ 2 files changed, 75 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/19834fa9/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 59f64dd..f041fd3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -169,23 +169,34 @@ class DataFrame private[sql]( /** * Internal API for Python - * @param numRows Number of rows to show + * @param _numRows Number of rows to show */ - private[sql] def showString(numRows: Int): String = { + private[sql] def showString(_numRows: Int): String = { + val numRows = _numRows.max(0) val sb = new StringBuilder - val data = take(numRows) + val takeResult = take(numRows + 1) + val hasMoreData = takeResult.length > numRows + val data = takeResult.take(numRows) val numCols = schema.fieldNames.length + // For array values, replace Seq and Array with square brackets // For cells that are beyond 20 characters, replace it with the first 17 and "..." val rows: Seq[Seq[String]] = schema.fieldNames.toSeq +: data.map { row => row.toSeq.map { cell => - val str = if (cell == null) "null" else cell.toString + val str = cell match { + case null => "null" + case array: Array[_] => array.mkString("[", ", ", "]") + case seq: Seq[_] => seq.mkString("[", ", ", "]") + case _ => cell.toString + } if (str.length > 20) str.substring(0, 17) + "..." else str }: Seq[String] } + // Initialise the width of each column to a minimum value of '3' + val colWidths = Array.fill(numCols)(3) + // Compute the width of each column - val colWidths = Array.fill(numCols)(0) for (row <- rows) { for ((cell, i) <- row.zipWithIndex) { colWidths(i) = math.max(colWidths(i), cell.length) @@ -197,7 +208,7 @@ class DataFrame private[sql]( // column names rows.head.zipWithIndex.map { case (cell, i) => - StringUtils.leftPad(cell.toString, colWidths(i)) + StringUtils.leftPad(cell, colWidths(i)) }.addString(sb, "|", "|", "|\n") sb.append(sep) @@ -210,6 +221,13 @@ class DataFrame private[sql]( } sb.append(sep) + + // For Data that has more than "numRows" records + if (hasMoreData) { + val rowsString = if (numRows == 1) "row" else "rows" + sb.append(s"only showing top $numRows ${rowsString}\n") + } + sb.toString() } http://git-wip-us.apache.org/repos/asf/spark/blob/19834fa9/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index bb8621a..84835c0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -469,12 +469,63 @@ class DataFrameSuite extends QueryTest { testData.select($"*").show(1000) } + test("showString(negative)") { + val expectedAnswer = """+---+-----+ + ||key|value| + |+---+-----+ + |+---+-----+ + |only showing top 0 rows + |""".stripMargin + assert(testData.select($"*").showString(-1) === expectedAnswer) + } + + test("showString(0)") { + val expectedAnswer = """+---+-----+ + ||key|value| + |+---+-----+ + |+---+-----+ + |only showing top 0 rows + |""".stripMargin + assert(testData.select($"*").showString(0) === expectedAnswer) + } + + test("showString: array") { + val df = Seq( + (Array(1, 2, 3), Array(1, 2, 3)), + (Array(2, 3, 4), Array(2, 3, 4)) + ).toDF() + val expectedAnswer = """+---------+---------+ + || _1| _2| + |+---------+---------+ + ||[1, 2, 3]|[1, 2, 3]| + ||[2, 3, 4]|[2, 3, 4]| + |+---------+---------+ + |""".stripMargin + assert(df.showString(10) === expectedAnswer) + } + + test("showString: minimum column width") { + val df = Seq( + (1, 1), + (2, 2) + ).toDF() + val expectedAnswer = """+---+---+ + || _1| _2| + |+---+---+ + || 1| 1| + || 2| 2| + |+---+---+ + |""".stripMargin + assert(df.showString(10) === expectedAnswer) + } + test("SPARK-7319 showString") { val expectedAnswer = """+---+-----+ ||key|value| |+---+-----+ || 1| 1| |+---+-----+ + |only showing top 1 row |""".stripMargin assert(testData.select($"*").showString(1) === expectedAnswer) } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
