This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new 58375a86e6f [SPARK-40270][PS] Make 'compute.max_rows' as None working
in DataFrame.style
58375a86e6f is described below
commit 58375a86e6ff49c5bcee49939fbd98eb848ae59f
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Tue Aug 30 16:25:26 2022 +0900
[SPARK-40270][PS] Make 'compute.max_rows' as None working in DataFrame.style
This PR make `compute.max_rows` option as `None` working in
`DataFrame.style`, as expected instead of throwing an exception., by collecting
it all to a pandas DataFrame.
To make the configuration working as expected.
Yes.
```python
import pyspark.pandas as ps
ps.set_option("compute.max_rows", None)
ps.get_option("compute.max_rows")
ps.range(1).style
```
**Before:**
```
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/.../spark/python/pyspark/pandas/frame.py", line 3656, in style
pdf = self.head(max_results + 1)._to_internal_pandas()
TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'
```
**After:**
```
<pandas.io.formats.style.Styler object at 0x7fdf78250430>
```
Manually tested, and unittest was added.
Closes #37718 from HyukjinKwon/SPARK-40270.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
(cherry picked from commit 0f0e8cc26b6c80cc179368e3009d4d6c88103a64)
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/pandas/frame.py | 16 +++++++++-------
python/pyspark/pandas/tests/test_dataframe.py | 23 +++++++++++++++++++++++
2 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index efc677b33ce..fd112357bdd 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -3503,19 +3503,21 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
Property returning a Styler object containing methods for
building a styled HTML representation for the DataFrame.
- .. note:: currently it collects top 1000 rows and return its
- pandas `pandas.io.formats.style.Styler` instance.
-
Examples
--------
>>> ps.range(1001).style # doctest: +SKIP
<pandas.io.formats.style.Styler object at ...>
"""
max_results = get_option("compute.max_rows")
- pdf = self.head(max_results + 1)._to_internal_pandas()
- if len(pdf) > max_results:
- warnings.warn("'style' property will only use top %s rows." %
max_results, UserWarning)
- return pdf.head(max_results).style
+ if max_results is not None:
+ pdf = self.head(max_results + 1)._to_internal_pandas()
+ if len(pdf) > max_results:
+ warnings.warn(
+ "'style' property will only use top %s rows." %
max_results, UserWarning
+ )
+ return pdf.head(max_results).style
+ else:
+ return self._to_internal_pandas().style
def set_index(
self,
diff --git a/python/pyspark/pandas/tests/test_dataframe.py
b/python/pyspark/pandas/tests/test_dataframe.py
index 27c670026d0..b4187d59ae7 100644
--- a/python/pyspark/pandas/tests/test_dataframe.py
+++ b/python/pyspark/pandas/tests/test_dataframe.py
@@ -5774,6 +5774,29 @@ class DataFrameTest(PandasOnSparkTestCase, SQLTestUtils):
for value_psdf, value_pdf in zip(psdf, pdf):
self.assert_eq(value_psdf, value_pdf)
+ def test_style(self):
+ # Currently, the `style` function returns a pandas object `Styler` as
it is,
+ # processing only the number of rows declared in `compute.max_rows`.
+ # So it's a bit vague to test, but we are doing minimal tests instead
of not testing at all.
+ pdf = pd.DataFrame(np.random.randn(10, 4), columns=["A", "B", "C",
"D"])
+ psdf = ps.from_pandas(pdf)
+
+ def style_negative(v, props=""):
+ return props if v < 0 else None
+
+ def check_style():
+ # If the value is negative, the text color will be displayed as
red.
+ pdf_style = pdf.style.applymap(style_negative, props="color:red;")
+ psdf_style = psdf.style.applymap(style_negative,
props="color:red;")
+
+ # Test whether the same shape as pandas table is created including
the color.
+ self.assert_eq(pdf_style.to_latex(), psdf_style.to_latex())
+
+ check_style()
+
+ with ps.option_context("compute.max_rows", None):
+ check_style()
+
if __name__ == "__main__":
from pyspark.pandas.tests.test_dataframe import * # noqa: F401
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]