Repository: spark Updated Branches: refs/heads/branch-1.3 b5c5e93d7 -> 7c779d8d5
[SPARK-6007][SQL] Add numRows param in DataFrame.show() It is useful to let the user decide the number of rows to show in DataFrame.show Author: Jacky Li <[email protected]> Closes #4767 from jackylk/show and squashes the following commits: a0e0f4b [Jacky Li] fix testcase 7cdbe91 [Jacky Li] modify according to comment bb54537 [Jacky Li] for Java compatibility d7acc18 [Jacky Li] modify according to comments 981be52 [Jacky Li] add numRows param in DataFrame.show() (cherry picked from commit 2358657547016d647cdd2e2d363426fcd8d3e9ff) Signed-off-by: Reynold Xin <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7c779d8d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7c779d8d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7c779d8d Branch: refs/heads/branch-1.3 Commit: 7c779d8d52a445362fc57e740ce51bdc2e93ad7f Parents: b5c5e93 Author: Jacky Li <[email protected]> Authored: Thu Feb 26 10:40:58 2015 -0800 Committer: Reynold Xin <[email protected]> Committed: Thu Feb 26 10:43:20 2015 -0800 ---------------------------------------------------------------------- python/pyspark/sql/dataframe.py | 6 +++--- .../main/scala/org/apache/spark/sql/DataFrame.scala | 13 ++++++++++--- .../test/org/apache/spark/sql/JavaDataFrameSuite.java | 9 +++++++++ .../scala/org/apache/spark/sql/DataFrameSuite.scala | 5 +++++ 4 files changed, 27 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/7c779d8d/python/pyspark/sql/dataframe.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 6d42410..aec9901 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -272,9 +272,9 @@ class DataFrame(object): """ return self._jdf.isLocal() - def show(self): + def show(self, n=20): """ - Print the first 20 rows. + Print the first n rows. >>> df DataFrame[age: int, name: string] @@ -283,7 +283,7 @@ class DataFrame(object): 2 Alice 5 Bob """ - print self._jdf.showString().encode('utf8', 'ignore') + print self._jdf.showString(n).encode('utf8', 'ignore') def __repr__(self): return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes)) http://git-wip-us.apache.org/repos/asf/spark/blob/7c779d8d/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index f045da3..060ab5e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -159,9 +159,10 @@ class DataFrame protected[sql]( /** * Internal API for Python + * @param numRows Number of rows to show */ - private[sql] def showString(): String = { - val data = take(20) + private[sql] def showString(numRows: Int): String = { + val data = take(numRows) val numCols = schema.fieldNames.length // For cells that are beyond 20 characters, replace it with the first 17 and "..." @@ -293,9 +294,15 @@ class DataFrame protected[sql]( * 1983 03 0.410516 0.442194 * 1984 04 0.450090 0.483521 * }}} + * @param numRows Number of rows to show * @group basic */ - def show(): Unit = println(showString()) + def show(numRows: Int): Unit = println(showString(numRows)) + + /** + * Displays the top 20 rows of [[DataFrame]] in a tabular form. + */ + def show(): Unit = show(20) /** * Cartesian join with another [[DataFrame]]. http://git-wip-us.apache.org/repos/asf/spark/blob/7c779d8d/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java ---------------------------------------------------------------------- diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java index c1c51f8..2d586f7 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java @@ -20,6 +20,7 @@ package test.org.apache.spark.sql; import org.junit.After; import org.junit.Assert; import org.junit.Before; +import org.junit.Ignore; import org.junit.Test; import org.apache.spark.sql.*; @@ -81,4 +82,12 @@ public class JavaDataFrameSuite { df.groupBy().agg(countDistinct(col("key"), col("value"))); df.select(coalesce(col("key"))); } + + @Ignore + public void testShow() { + // This test case is intended ignored, but to make sure it compiles correctly + DataFrame df = context.table("testData"); + df.show(); + df.show(1000); + } } http://git-wip-us.apache.org/repos/asf/spark/blob/7c779d8d/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index c392a55..ff441ef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -441,4 +441,9 @@ class DataFrameSuite extends QueryTest { checkAnswer(df.select(df("key")), testData.select('key).collect().toSeq) } + ignore("show") { + // This test case is intended ignored, but to make sure it compiles correctly + testData.select($"*").show() + testData.select($"*").show(1000) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
