Repository: zeppelin Updated Branches: refs/heads/master 14d52cff1 -> 6f867ceb0
[ZEPPELIN-1255] Add cast to string in z.show() for Pandas DataFrame ### What is this PR for? Casting data types in Pandas DataFrame to string in z.show() ### What type of PR is it? Bug Fix ### What is the Jira issue? [ZEPPELIN-1255](https://issues.apache.org/jira/browse/ZEPPELIN-1255) ### How should this be tested? ``` %python import pandas as pd df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) df.columns=[1, 2, 3, 'PetalWidth', 'Name'] z.show(df) %python.sql SELECT * FROM df LIMIT 10 ``` ### Questions: * Does the licenses files need update? No * Is there breaking changes for older versions? No * Does this needs documentation? No Author: paulbustios <[email protected]> Closes #1249 from bustios/ZEPPELIN-1255 and squashes the following commits: 82c1412 [paulbustios] Add test case for z.show() Pandas DataFrame 4a8c0a9 [paulbustios] [ZEPPELIN-1255] Add cast to string in z.show() for Pandas DataFrame Project: http://git-wip-us.apache.org/repos/asf/zeppelin/repo Commit: http://git-wip-us.apache.org/repos/asf/zeppelin/commit/6f867ceb Tree: http://git-wip-us.apache.org/repos/asf/zeppelin/tree/6f867ceb Diff: http://git-wip-us.apache.org/repos/asf/zeppelin/diff/6f867ceb Branch: refs/heads/master Commit: 6f867ceb0c2b93ecca3be8a16c155547182eb0dc Parents: 14d52cf Author: paulbustios <[email protected]> Authored: Mon Aug 1 01:13:27 2016 -0300 Committer: Alexander Bezzubov <[email protected]> Committed: Mon Aug 1 21:50:25 2016 +0900 ---------------------------------------------------------------------- python/src/main/resources/bootstrap.py | 8 ++++---- .../python/PythonInterpreterPandasSqlTest.java | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/zeppelin/blob/6f867ceb/python/src/main/resources/bootstrap.py ---------------------------------------------------------------------- diff --git a/python/src/main/resources/bootstrap.py b/python/src/main/resources/bootstrap.py index ce28baf..638ef9b 100644 --- a/python/src/main/resources/bootstrap.py +++ b/python/src/main/resources/bootstrap.py @@ -140,19 +140,19 @@ class PyZeppelinContext(object): """ limit = len(df) > self.max_result header_buf = io.StringIO("") - header_buf.write(df.columns[0]) + header_buf.write(str(df.columns[0])) for col in df.columns[1:]: header_buf.write("\t") - header_buf.write(col) + header_buf.write(str(col)) header_buf.write("\n") body_buf = io.StringIO("") rows = df.head(self.max_result).values if limit else df.values for row in rows: - body_buf.write(row[0]) + body_buf.write(str(row[0])) for cell in row[1:]: body_buf.write("\t") - body_buf.write(cell) + body_buf.write(str(cell)) body_buf.write("\n") body_buf.seek(0); header_buf.seek(0) #TODO(bzz): fix it, so it shows red notice, as in Spark http://git-wip-us.apache.org/repos/asf/zeppelin/blob/6f867ceb/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java ---------------------------------------------------------------------- diff --git a/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java b/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java index 5f26adb..f953856 100644 --- a/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java +++ b/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterPandasSqlTest.java @@ -153,4 +153,24 @@ public class PythonInterpreterPandasSqlTest { assertTrue(ret.message().length() > 0); } + @Test + public void showDataFrame() { + InterpreterResult ret; + ret = python.interpret("import pandas as pd", context); + ret = python.interpret("import numpy as np", context); + + // given a Pandas DataFrame with non-text data + ret = python.interpret("d1 = {1 : [np.nan, 1, 2, 3], 'two' : [3., 4., 5., 6.7]}", context); + ret = python.interpret("df1 = pd.DataFrame(d1)", context); + assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code()); + + // when + ret = python.interpret("z.show(df1)", context); + + // then + assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code()); + assertEquals(ret.message(), Type.TABLE, ret.type()); + assertTrue(ret.message().indexOf("nan") > 0); + assertTrue(ret.message().indexOf("6.7") > 0); + } }
