Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/21370#discussion_r192610559
--- Diff: python/pyspark/sql/dataframe.py ---
@@ -351,8 +354,70 @@ def show(self, n=20, truncate=True, vertical=False):
else:
print(self._jdf.showString(n, int(truncate), vertical))
+ @property
+ def _eager_eval(self):
+ """Returns true if the eager evaluation enabled.
+ """
+ return self.sql_ctx.getConf(
+ "spark.sql.repl.eagerEval.enabled", "false").lower() == "true"
+
+ @property
+ def _max_num_rows(self):
+ """Returns the max row number for eager evaluation.
+ """
+ return int(self.sql_ctx.getConf(
+ "spark.sql.repl.eagerEval.maxNumRows", "20"))
+
+ @property
+ def _truncate(self):
+ """Returns the truncate length for eager evaluation.
+ """
+ return int(self.sql_ctx.getConf(
+ "spark.sql.repl.eagerEval.truncate", "20"))
+
def __repr__(self):
- return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in
self.dtypes))
+ if not self._support_repr_html and self._eager_eval:
+ vertical = False
+ return self._jdf.showString(
+ self._max_num_rows, self._truncate, vertical)
+ else:
+ return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in
self.dtypes))
+
+ def _repr_html_(self):
+ """Returns a dataframe with html code when you enabled eager
evaluation
+ by 'spark.sql.repl.eagerEval.enabled', this only called by REPL
you are
+ using support eager evaluation with HTML.
+ """
+ import cgi
+ if not self._support_repr_html:
+ self._support_repr_html = True
+ if self._eager_eval:
+ max_num_rows = max(self._max_num_rows, 0)
+ with SCCallSiteSync(self._sc) as css:
+ vertical = False
+ sock_info = self._jdf.getRowsToPython(
+ max_num_rows, self._truncate, vertical)
+ rows = list(_load_from_socket(sock_info,
BatchedSerializer(PickleSerializer())))
+ head = rows[0]
+ row_data = rows[1:]
+ has_more_data = len(row_data) > max_num_rows
+ row_data = row_data[0:max_num_rows]
+
+ html = "<table border='1'>\n<tr><th>"
+ # generate table head
+ html += "</th><th>".join(map(lambda x: cgi.escape(x), head)) +
"</th></tr>\n"
+ # generate table rows
+ for row in row_data:
+ data = "<tr><td>" + "</td><td>".join(map(lambda x:
cgi.escape(x), row)) + \
+ "</td></tr>\n"
--- End diff --
ditto:
```
"<tr><td>%s</td></tr>\n" % "</td><td>".join(map(lambda x: cgi.escape(x),
row))
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]