This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 2ef15cabd4b4 [SPARK-44708][PYTHON] Migrate test_reset_index assert_eq
to use assertDataFrameEqual
2ef15cabd4b4 is described below
commit 2ef15cabd4b4d73f8a081fa20dc92bcac3e4607f
Author: Gurpreet Singh <[email protected]>
AuthorDate: Mon Mar 25 08:42:48 2024 +0900
[SPARK-44708][PYTHON] Migrate test_reset_index assert_eq to use
assertDataFrameEqual
### What changes were proposed in this pull request?
This PR updates the
[python/pyspark/pandas/tests/test_sql.py](https://github.com/apache/spark/blob/42e5daddf3ba16ff7d08e82e51cd8924cc56e180/python/pyspark/pandas/tests/indexes/test_reset_index.py#L46)
to use the new PySpark test util function, assertDataFrameEqual, introduced in
[SPARK-44042](https://issues.apache.org/jira/browse/SPARK-44042).
### Why are the changes needed?
Use the new `assertDataFrameEqual` util function across all tests in PySpark
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Existing Tests
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #45629 from gdhuper/gdhuper/SPARK-44708.
Authored-by: Gurpreet Singh <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../pandas/tests/indexes/test_reset_index.py | 41 +++++++++++-----------
1 file changed, 21 insertions(+), 20 deletions(-)
diff --git a/python/pyspark/pandas/tests/indexes/test_reset_index.py
b/python/pyspark/pandas/tests/indexes/test_reset_index.py
index cab2888cae4b..06136341a843 100644
--- a/python/pyspark/pandas/tests/indexes/test_reset_index.py
+++ b/python/pyspark/pandas/tests/indexes/test_reset_index.py
@@ -22,6 +22,7 @@ import pandas as pd
from pyspark import pandas as ps
from pyspark.testing.pandasutils import PandasOnSparkTestCase
from pyspark.testing.sqlutils import SQLTestUtils
+from pyspark.pandas.testing import assert_frame_equal, assert_index_equal,
assert_series_equal
class FrameResetIndexMixin:
@@ -42,9 +43,9 @@ class FrameResetIndexMixin:
pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]},
index=np.random.rand(3))
psdf = ps.from_pandas(pdf)
- self.assert_eq(psdf.reset_index(), pdf.reset_index())
- self.assert_eq(psdf.reset_index().index, pdf.reset_index().index)
- self.assert_eq(psdf.reset_index(drop=True), pdf.reset_index(drop=True))
+ assert_frame_equal(psdf.reset_index(), pdf.reset_index())
+ assert_index_equal(psdf.reset_index().index, pdf.reset_index().index)
+ assert_frame_equal(psdf.reset_index(drop=True),
pdf.reset_index(drop=True))
pdf.index.name = "a"
psdf.index.name = "a"
@@ -52,33 +53,33 @@ class FrameResetIndexMixin:
with self.assertRaisesRegex(ValueError, "cannot insert a, already
exists"):
psdf.reset_index()
- self.assert_eq(psdf.reset_index(drop=True), pdf.reset_index(drop=True))
+ assert_frame_equal(psdf.reset_index(drop=True),
pdf.reset_index(drop=True))
# inplace
pser = pdf.a
psser = psdf.a
pdf.reset_index(drop=True, inplace=True)
psdf.reset_index(drop=True, inplace=True)
- self.assert_eq(psdf, pdf)
- self.assert_eq(psser, pser)
+ assert_frame_equal(psdf, pdf)
+ assert_series_equal(psser, pser)
pdf.columns = ["index", "b"]
psdf.columns = ["index", "b"]
- self.assert_eq(psdf.reset_index(), pdf.reset_index())
+ assert_frame_equal(psdf.reset_index(), pdf.reset_index())
def test_reset_index_with_default_index_types(self):
pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]},
index=np.random.rand(3))
psdf = ps.from_pandas(pdf)
with ps.option_context("compute.default_index_type", "sequence"):
- self.assert_eq(psdf.reset_index(), pdf.reset_index())
+ assert_frame_equal(psdf.reset_index(), pdf.reset_index())
with ps.option_context("compute.default_index_type",
"distributed-sequence"):
- self.assert_eq(psdf.reset_index(), pdf.reset_index())
+ assert_frame_equal(psdf.reset_index(), pdf.reset_index())
with ps.option_context("compute.default_index_type", "distributed"):
# the index is different.
- self.assert_eq(
+ assert_frame_equal(
psdf.reset_index()._to_pandas().reset_index(drop=True),
pdf.reset_index()
)
@@ -95,18 +96,18 @@ class FrameResetIndexMixin:
)
psdf = ps.from_pandas(pdf)
- self.assert_eq(psdf, pdf)
- self.assert_eq(psdf.reset_index(), pdf.reset_index())
- self.assert_eq(psdf.reset_index(level="class"),
pdf.reset_index(level="class"))
- self.assert_eq(
+ assert_frame_equal(psdf, pdf)
+ assert_frame_equal(psdf.reset_index(), pdf.reset_index())
+ assert_frame_equal(psdf.reset_index(level="class"),
pdf.reset_index(level="class"))
+ assert_frame_equal(
psdf.reset_index(level="class", col_level=1),
pdf.reset_index(level="class", col_level=1),
)
- self.assert_eq(
+ assert_frame_equal(
psdf.reset_index(level="class", col_level=1, col_fill="species"),
pdf.reset_index(level="class", col_level=1, col_fill="species"),
)
- self.assert_eq(
+ assert_frame_equal(
psdf.reset_index(level="class", col_level=1, col_fill="genus"),
pdf.reset_index(level="class", col_level=1, col_fill="genus"),
)
@@ -117,19 +118,19 @@ class FrameResetIndexMixin:
pdf.index.names = [("x", "class"), ("y", "name")]
psdf.index.names = [("x", "class"), ("y", "name")]
- self.assert_eq(psdf.reset_index(), pdf.reset_index())
+ assert_frame_equal(psdf.reset_index(), pdf.reset_index())
with self.assertRaisesRegex(ValueError, "Item must have length equal
to number of levels."):
psdf.reset_index(col_level=1)
def test_index_to_frame_reset_index(self):
def check(psdf, pdf):
- self.assert_eq(psdf.reset_index(), pdf.reset_index())
- self.assert_eq(psdf.reset_index(drop=True),
pdf.reset_index(drop=True))
+ assert_frame_equal(psdf.reset_index(), pdf.reset_index())
+ assert_frame_equal(psdf.reset_index(drop=True),
pdf.reset_index(drop=True))
pdf.reset_index(drop=True, inplace=True)
psdf.reset_index(drop=True, inplace=True)
- self.assert_eq(psdf, pdf)
+ assert_frame_equal(psdf, pdf)
pdf, psdf = self.df_pair
check(psdf.index.to_frame(), pdf.index.to_frame())
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]