This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ef7e1e685f61 [SPARK-46471][PS][TESTS][FOLLOWUPS] Move `OpsOnDiffFramesEnabledTests` to `pyspark.pandas.tests.diff_frames_ops.*`` ef7e1e685f61 is described below commit ef7e1e685f615f7763998b076dafb206cf46985d Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Sun Dec 24 14:31:12 2023 -0800 [SPARK-46471][PS][TESTS][FOLLOWUPS] Move `OpsOnDiffFramesEnabledTests` to `pyspark.pandas.tests.diff_frames_ops.*`` ### What changes were proposed in this pull request? Move `OpsOnDiffFramesEnabledTests` to `pyspark.pandas.tests.diff_frames_ops.*`` ### Why are the changes needed? test code clean up ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #44471 from zhengruifeng/ps_test_diff_ops_3. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- dev/sparktestsupport/modules.py | 4 +- .../test_parity_basic.py} | 11 +-- .../tests/diff_frames_ops/test_arithmetic.py | 45 +++++++++++ .../test_basic.py} | 91 ++-------------------- 4 files changed, 59 insertions(+), 92 deletions(-) diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 939e88bf95b2..e4e3803a8f87 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -862,7 +862,6 @@ pyspark_pandas_slow = Module( "pyspark.pandas.tests.groupby.test_stat_prod", "pyspark.pandas.tests.groupby.test_value_counts", "pyspark.pandas.tests.test_indexing", - "pyspark.pandas.tests.test_ops_on_diff_frames", "pyspark.pandas.tests.diff_frames_ops.test_align", "pyspark.pandas.tests.diff_frames_ops.test_arithmetic", "pyspark.pandas.tests.diff_frames_ops.test_arithmetic_ext", @@ -872,6 +871,7 @@ pyspark_pandas_slow = Module( "pyspark.pandas.tests.diff_frames_ops.test_arithmetic_chain_ext_float", "pyspark.pandas.tests.diff_frames_ops.test_assign_frame", "pyspark.pandas.tests.diff_frames_ops.test_assign_series", + "pyspark.pandas.tests.diff_frames_ops.test_basic", "pyspark.pandas.tests.diff_frames_ops.test_bitwise", "pyspark.pandas.tests.diff_frames_ops.test_combine_first", "pyspark.pandas.tests.diff_frames_ops.test_compare_series", @@ -1235,7 +1235,6 @@ pyspark_pandas_connect_part3 = Module( "pyspark.pandas.tests.connect.indexes.test_parity_datetime_map", "pyspark.pandas.tests.connect.indexes.test_parity_datetime_property", "pyspark.pandas.tests.connect.indexes.test_parity_datetime_round", - "pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext_float", @@ -1244,6 +1243,7 @@ pyspark_pandas_connect_part3 = Module( "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_chain_ext_float", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_frame", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_series", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_basic", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_bitwise", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_combine_first", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_compare_series", diff --git a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic.py similarity index 79% rename from python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py rename to python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic.py index 777c3620a4a8..339427f4e00c 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +++ b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic.py @@ -16,20 +16,21 @@ # import unittest -from pyspark.pandas.tests.test_ops_on_diff_frames import OpsOnDiffFramesEnabledTestsMixin - from pyspark.testing.connectutils import ReusedConnectTestCase from pyspark.testing.pandasutils import PandasOnSparkTestUtils +from pyspark.pandas.tests.diff_frames_ops.test_basic import BasicMixin -class OpsOnDiffFramesEnabledParityTests( - OpsOnDiffFramesEnabledTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase +class BasicParityTests( + BasicMixin, + PandasOnSparkTestUtils, + ReusedConnectTestCase, ): pass if __name__ == "__main__": - from pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames import * # noqa: F401 + from pyspark.pandas.tests.connect.diff_frames_ops.test_parity_basic import * # noqa: F401 try: import xmlrunner # type: ignore[import] diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_arithmetic.py b/python/pyspark/pandas/tests/diff_frames_ops/test_arithmetic.py index 8af0e80c6e60..8e14fa768779 100644 --- a/python/pyspark/pandas/tests/diff_frames_ops/test_arithmetic.py +++ b/python/pyspark/pandas/tests/diff_frames_ops/test_arithmetic.py @@ -114,6 +114,29 @@ class ArithmeticMixin(ArithmeticTestingFuncMixin): index=list(range(9)), ) + @property + def pdf5(self): + return pd.DataFrame( + { + "a": [1, 2, 3, 4, 5, 6, 7, 8, 9], + "b": [4, 5, 6, 3, 2, 1, 0, 0, 0], + "c": [4, 5, 6, 3, 2, 1, 0, 0, 0], + }, + index=[0, 1, 3, 5, 6, 8, 9, 10, 11], + ).set_index(["a", "b"]) + + @property + def pdf6(self): + return pd.DataFrame( + { + "a": [9, 8, 7, 6, 5, 4, 3, 2, 1], + "b": [0, 0, 0, 4, 5, 6, 1, 2, 3], + "c": [9, 8, 7, 6, 5, 4, 3, 2, 1], + "e": [4, 5, 6, 3, 2, 1, 0, 0, 0], + }, + index=list(range(9)), + ).set_index(["a", "b"]) + @property def pser1(self): midx = pd.MultiIndex( @@ -130,10 +153,32 @@ class ArithmeticMixin(ArithmeticTestingFuncMixin): ) return pd.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3], index=midx) + @property + def psdf5(self): + return ps.from_pandas(self.pdf5) + + @property + def psdf6(self): + return ps.from_pandas(self.pdf6) + def test_arithmetic(self): self._test_arithmetic_frame(self.pdf1, self.pdf2, check_extension=False) self._test_arithmetic_series(self.pser1, self.pser2, check_extension=False) + def test_multi_index_arithmetic(self): + psdf5 = self.psdf5 + psdf6 = self.psdf6 + pdf5 = self.pdf5 + pdf6 = self.pdf6 + + # Series + self.assert_eq((psdf5.c - psdf6.e).sort_index(), (pdf5.c - pdf6.e).sort_index()) + + self.assert_eq((psdf5["c"] / psdf6["e"]).sort_index(), (pdf5["c"] / pdf6["e"]).sort_index()) + + # DataFrame + self.assert_eq((psdf5 + psdf6).sort_index(), (pdf5 + pdf6).sort_index(), almost=True) + class ArithmeticTests( ArithmeticMixin, diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py b/python/pyspark/pandas/tests/diff_frames_ops/test_basic.py similarity index 74% rename from python/pyspark/pandas/tests/test_ops_on_diff_frames.py rename to python/pyspark/pandas/tests/diff_frames_ops/test_basic.py index 75410a65227d..1075188779a1 100644 --- a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +++ b/python/pyspark/pandas/tests/diff_frames_ops/test_basic.py @@ -26,7 +26,7 @@ from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.testing.sqlutils import SQLTestUtils -class OpsOnDiffFramesEnabledTestsMixin: +class BasicMixin: @classmethod def setUpClass(cls): super().setUpClass() @@ -65,53 +65,6 @@ class OpsOnDiffFramesEnabledTestsMixin: index=list(range(9)), ) - @property - def pdf5(self): - return pd.DataFrame( - { - "a": [1, 2, 3, 4, 5, 6, 7, 8, 9], - "b": [4, 5, 6, 3, 2, 1, 0, 0, 0], - "c": [4, 5, 6, 3, 2, 1, 0, 0, 0], - }, - index=[0, 1, 3, 5, 6, 8, 9, 10, 11], - ).set_index(["a", "b"]) - - @property - def pdf6(self): - return pd.DataFrame( - { - "a": [9, 8, 7, 6, 5, 4, 3, 2, 1], - "b": [0, 0, 0, 4, 5, 6, 1, 2, 3], - "c": [9, 8, 7, 6, 5, 4, 3, 2, 1], - "e": [4, 5, 6, 3, 2, 1, 0, 0, 0], - }, - index=list(range(9)), - ).set_index(["a", "b"]) - - @property - def pser1(self): - midx = pd.MultiIndex( - [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length", "power"]], - [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]], - ) - return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx) - - @property - def pser2(self): - midx = pd.MultiIndex( - [["lama", "cow", "falcon"], ["speed", "weight", "length"]], - [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]], - ) - return pd.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3], index=midx) - - @property - def pser3(self): - midx = pd.MultiIndex( - [["koalas", "cow", "falcon"], ["speed", "weight", "length"]], - [[0, 0, 0, 1, 1, 1, 2, 2, 2], [1, 1, 2, 0, 0, 2, 2, 2, 1]], - ) - return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx) - @property def psdf1(self): return ps.from_pandas(self.pdf1) @@ -128,26 +81,6 @@ class OpsOnDiffFramesEnabledTestsMixin: def psdf4(self): return ps.from_pandas(self.pdf4) - @property - def psdf5(self): - return ps.from_pandas(self.pdf5) - - @property - def psdf6(self): - return ps.from_pandas(self.pdf6) - - @property - def psser1(self): - return ps.from_pandas(self.pser1) - - @property - def psser2(self): - return ps.from_pandas(self.pser2) - - @property - def psser3(self): - return ps.from_pandas(self.pser3) - def test_ranges(self): self.assert_eq( (ps.range(10) + ps.range(10)).sort_index(), @@ -286,29 +219,17 @@ class OpsOnDiffFramesEnabledTestsMixin: self.assert_eq((psdf1 + psdf4).sort_index(), (pdf1 + pdf4).sort_index(), almost=True) - def test_multi_index_arithmetic(self): - psdf5 = self.psdf5 - psdf6 = self.psdf6 - pdf5 = self.pdf5 - pdf6 = self.pdf6 - - # Series - self.assert_eq((psdf5.c - psdf6.e).sort_index(), (pdf5.c - pdf6.e).sort_index()) - - self.assert_eq((psdf5["c"] / psdf6["e"]).sort_index(), (pdf5["c"] / pdf6["e"]).sort_index()) - - # DataFrame - self.assert_eq((psdf5 + psdf6).sort_index(), (pdf5 + pdf6).sort_index(), almost=True) - -class OpsOnDiffFramesEnabledTests( - OpsOnDiffFramesEnabledTestsMixin, PandasOnSparkTestCase, SQLTestUtils +class BasicTests( + BasicMixin, + PandasOnSparkTestCase, + SQLTestUtils, ): pass if __name__ == "__main__": - from pyspark.pandas.tests.test_ops_on_diff_frames import * # noqa: F401 + from pyspark.pandas.tests.diff_frames_ops.test_basic import * # noqa: F401 try: import xmlrunner --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org