This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 2ef15cabd4b4 [SPARK-44708][PYTHON] Migrate test_reset_index assert_eq to use assertDataFrameEqual 2ef15cabd4b4 is described below commit 2ef15cabd4b4d73f8a081fa20dc92bcac3e4607f Author: Gurpreet Singh <gdhu...@gmail.com> AuthorDate: Mon Mar 25 08:42:48 2024 +0900 [SPARK-44708][PYTHON] Migrate test_reset_index assert_eq to use assertDataFrameEqual ### What changes were proposed in this pull request? This PR updates the [python/pyspark/pandas/tests/test_sql.py](https://github.com/apache/spark/blob/42e5daddf3ba16ff7d08e82e51cd8924cc56e180/python/pyspark/pandas/tests/indexes/test_reset_index.py#L46) to use the new PySpark test util function, assertDataFrameEqual, introduced in [SPARK-44042](https://issues.apache.org/jira/browse/SPARK-44042). ### Why are the changes needed? Use the new `assertDataFrameEqual` util function across all tests in PySpark ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing Tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #45629 from gdhuper/gdhuper/SPARK-44708. Authored-by: Gurpreet Singh <gdhu...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../pandas/tests/indexes/test_reset_index.py | 41 +++++++++++----------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/python/pyspark/pandas/tests/indexes/test_reset_index.py b/python/pyspark/pandas/tests/indexes/test_reset_index.py index cab2888cae4b..06136341a843 100644 --- a/python/pyspark/pandas/tests/indexes/test_reset_index.py +++ b/python/pyspark/pandas/tests/indexes/test_reset_index.py @@ -22,6 +22,7 @@ import pandas as pd from pyspark import pandas as ps from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.testing.sqlutils import SQLTestUtils +from pyspark.pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal class FrameResetIndexMixin: @@ -42,9 +43,9 @@ class FrameResetIndexMixin: pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=np.random.rand(3)) psdf = ps.from_pandas(pdf) - self.assert_eq(psdf.reset_index(), pdf.reset_index()) - self.assert_eq(psdf.reset_index().index, pdf.reset_index().index) - self.assert_eq(psdf.reset_index(drop=True), pdf.reset_index(drop=True)) + assert_frame_equal(psdf.reset_index(), pdf.reset_index()) + assert_index_equal(psdf.reset_index().index, pdf.reset_index().index) + assert_frame_equal(psdf.reset_index(drop=True), pdf.reset_index(drop=True)) pdf.index.name = "a" psdf.index.name = "a" @@ -52,33 +53,33 @@ class FrameResetIndexMixin: with self.assertRaisesRegex(ValueError, "cannot insert a, already exists"): psdf.reset_index() - self.assert_eq(psdf.reset_index(drop=True), pdf.reset_index(drop=True)) + assert_frame_equal(psdf.reset_index(drop=True), pdf.reset_index(drop=True)) # inplace pser = pdf.a psser = psdf.a pdf.reset_index(drop=True, inplace=True) psdf.reset_index(drop=True, inplace=True) - self.assert_eq(psdf, pdf) - self.assert_eq(psser, pser) + assert_frame_equal(psdf, pdf) + assert_series_equal(psser, pser) pdf.columns = ["index", "b"] psdf.columns = ["index", "b"] - self.assert_eq(psdf.reset_index(), pdf.reset_index()) + assert_frame_equal(psdf.reset_index(), pdf.reset_index()) def test_reset_index_with_default_index_types(self): pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=np.random.rand(3)) psdf = ps.from_pandas(pdf) with ps.option_context("compute.default_index_type", "sequence"): - self.assert_eq(psdf.reset_index(), pdf.reset_index()) + assert_frame_equal(psdf.reset_index(), pdf.reset_index()) with ps.option_context("compute.default_index_type", "distributed-sequence"): - self.assert_eq(psdf.reset_index(), pdf.reset_index()) + assert_frame_equal(psdf.reset_index(), pdf.reset_index()) with ps.option_context("compute.default_index_type", "distributed"): # the index is different. - self.assert_eq( + assert_frame_equal( psdf.reset_index()._to_pandas().reset_index(drop=True), pdf.reset_index() ) @@ -95,18 +96,18 @@ class FrameResetIndexMixin: ) psdf = ps.from_pandas(pdf) - self.assert_eq(psdf, pdf) - self.assert_eq(psdf.reset_index(), pdf.reset_index()) - self.assert_eq(psdf.reset_index(level="class"), pdf.reset_index(level="class")) - self.assert_eq( + assert_frame_equal(psdf, pdf) + assert_frame_equal(psdf.reset_index(), pdf.reset_index()) + assert_frame_equal(psdf.reset_index(level="class"), pdf.reset_index(level="class")) + assert_frame_equal( psdf.reset_index(level="class", col_level=1), pdf.reset_index(level="class", col_level=1), ) - self.assert_eq( + assert_frame_equal( psdf.reset_index(level="class", col_level=1, col_fill="species"), pdf.reset_index(level="class", col_level=1, col_fill="species"), ) - self.assert_eq( + assert_frame_equal( psdf.reset_index(level="class", col_level=1, col_fill="genus"), pdf.reset_index(level="class", col_level=1, col_fill="genus"), ) @@ -117,19 +118,19 @@ class FrameResetIndexMixin: pdf.index.names = [("x", "class"), ("y", "name")] psdf.index.names = [("x", "class"), ("y", "name")] - self.assert_eq(psdf.reset_index(), pdf.reset_index()) + assert_frame_equal(psdf.reset_index(), pdf.reset_index()) with self.assertRaisesRegex(ValueError, "Item must have length equal to number of levels."): psdf.reset_index(col_level=1) def test_index_to_frame_reset_index(self): def check(psdf, pdf): - self.assert_eq(psdf.reset_index(), pdf.reset_index()) - self.assert_eq(psdf.reset_index(drop=True), pdf.reset_index(drop=True)) + assert_frame_equal(psdf.reset_index(), pdf.reset_index()) + assert_frame_equal(psdf.reset_index(drop=True), pdf.reset_index(drop=True)) pdf.reset_index(drop=True, inplace=True) psdf.reset_index(drop=True, inplace=True) - self.assert_eq(psdf, pdf) + assert_frame_equal(psdf, pdf) pdf, psdf = self.df_pair check(psdf.index.to_frame(), pdf.index.to_frame()) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org