This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new ef7e1e685f61 [SPARK-46471][PS][TESTS][FOLLOWUPS] Move
`OpsOnDiffFramesEnabledTests` to `pyspark.pandas.tests.diff_frames_ops.*``
ef7e1e685f61 is described below
commit ef7e1e685f615f7763998b076dafb206cf46985d
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Sun Dec 24 14:31:12 2023 -0800
[SPARK-46471][PS][TESTS][FOLLOWUPS] Move `OpsOnDiffFramesEnabledTests` to
`pyspark.pandas.tests.diff_frames_ops.*``
### What changes were proposed in this pull request?
Move `OpsOnDiffFramesEnabledTests` to
`pyspark.pandas.tests.diff_frames_ops.*``
### Why are the changes needed?
test code clean up
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
ci
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #44471 from zhengruifeng/ps_test_diff_ops_3.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
dev/sparktestsupport/modules.py | 4 +-
.../test_parity_basic.py} | 11 +--
.../tests/diff_frames_ops/test_arithmetic.py | 45 +++++++++++
.../test_basic.py} | 91 ++--------------------
4 files changed, 59 insertions(+), 92 deletions(-)
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 939e88bf95b2..e4e3803a8f87 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -862,7 +862,6 @@ pyspark_pandas_slow = Module(
"pyspark.pandas.tests.groupby.test_stat_prod",
"pyspark.pandas.tests.groupby.test_value_counts",
"pyspark.pandas.tests.test_indexing",
- "pyspark.pandas.tests.test_ops_on_diff_frames",
"pyspark.pandas.tests.diff_frames_ops.test_align",
"pyspark.pandas.tests.diff_frames_ops.test_arithmetic",
"pyspark.pandas.tests.diff_frames_ops.test_arithmetic_ext",
@@ -872,6 +871,7 @@ pyspark_pandas_slow = Module(
"pyspark.pandas.tests.diff_frames_ops.test_arithmetic_chain_ext_float",
"pyspark.pandas.tests.diff_frames_ops.test_assign_frame",
"pyspark.pandas.tests.diff_frames_ops.test_assign_series",
+ "pyspark.pandas.tests.diff_frames_ops.test_basic",
"pyspark.pandas.tests.diff_frames_ops.test_bitwise",
"pyspark.pandas.tests.diff_frames_ops.test_combine_first",
"pyspark.pandas.tests.diff_frames_ops.test_compare_series",
@@ -1235,7 +1235,6 @@ pyspark_pandas_connect_part3 = Module(
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_map",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_property",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_round",
- "pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext_float",
@@ -1244,6 +1243,7 @@ pyspark_pandas_connect_part3 = Module(
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_chain_ext_float",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_frame",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_series",
+ "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_basic",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_bitwise",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_combine_first",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_compare_series",
diff --git
a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic.py
similarity index 79%
rename from
python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py
rename to
python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic.py
index 777c3620a4a8..339427f4e00c 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py
+++ b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic.py
@@ -16,20 +16,21 @@
#
import unittest
-from pyspark.pandas.tests.test_ops_on_diff_frames import
OpsOnDiffFramesEnabledTestsMixin
-
from pyspark.testing.connectutils import ReusedConnectTestCase
from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+from pyspark.pandas.tests.diff_frames_ops.test_basic import BasicMixin
-class OpsOnDiffFramesEnabledParityTests(
- OpsOnDiffFramesEnabledTestsMixin, PandasOnSparkTestUtils,
ReusedConnectTestCase
+class BasicParityTests(
+ BasicMixin,
+ PandasOnSparkTestUtils,
+ ReusedConnectTestCase,
):
pass
if __name__ == "__main__":
- from pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames import *
# noqa: F401
+ from pyspark.pandas.tests.connect.diff_frames_ops.test_parity_basic import
* # noqa: F401
try:
import xmlrunner # type: ignore[import]
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_arithmetic.py
b/python/pyspark/pandas/tests/diff_frames_ops/test_arithmetic.py
index 8af0e80c6e60..8e14fa768779 100644
--- a/python/pyspark/pandas/tests/diff_frames_ops/test_arithmetic.py
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_arithmetic.py
@@ -114,6 +114,29 @@ class ArithmeticMixin(ArithmeticTestingFuncMixin):
index=list(range(9)),
)
+ @property
+ def pdf5(self):
+ return pd.DataFrame(
+ {
+ "a": [1, 2, 3, 4, 5, 6, 7, 8, 9],
+ "b": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+ "c": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+ },
+ index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
+ ).set_index(["a", "b"])
+
+ @property
+ def pdf6(self):
+ return pd.DataFrame(
+ {
+ "a": [9, 8, 7, 6, 5, 4, 3, 2, 1],
+ "b": [0, 0, 0, 4, 5, 6, 1, 2, 3],
+ "c": [9, 8, 7, 6, 5, 4, 3, 2, 1],
+ "e": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+ },
+ index=list(range(9)),
+ ).set_index(["a", "b"])
+
@property
def pser1(self):
midx = pd.MultiIndex(
@@ -130,10 +153,32 @@ class ArithmeticMixin(ArithmeticTestingFuncMixin):
)
return pd.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3],
index=midx)
+ @property
+ def psdf5(self):
+ return ps.from_pandas(self.pdf5)
+
+ @property
+ def psdf6(self):
+ return ps.from_pandas(self.pdf6)
+
def test_arithmetic(self):
self._test_arithmetic_frame(self.pdf1, self.pdf2,
check_extension=False)
self._test_arithmetic_series(self.pser1, self.pser2,
check_extension=False)
+ def test_multi_index_arithmetic(self):
+ psdf5 = self.psdf5
+ psdf6 = self.psdf6
+ pdf5 = self.pdf5
+ pdf6 = self.pdf6
+
+ # Series
+ self.assert_eq((psdf5.c - psdf6.e).sort_index(), (pdf5.c -
pdf6.e).sort_index())
+
+ self.assert_eq((psdf5["c"] / psdf6["e"]).sort_index(), (pdf5["c"] /
pdf6["e"]).sort_index())
+
+ # DataFrame
+ self.assert_eq((psdf5 + psdf6).sort_index(), (pdf5 +
pdf6).sort_index(), almost=True)
+
class ArithmeticTests(
ArithmeticMixin,
diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
b/python/pyspark/pandas/tests/diff_frames_ops/test_basic.py
similarity index 74%
rename from python/pyspark/pandas/tests/test_ops_on_diff_frames.py
rename to python/pyspark/pandas/tests/diff_frames_ops/test_basic.py
index 75410a65227d..1075188779a1 100644
--- a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_basic.py
@@ -26,7 +26,7 @@ from pyspark.testing.pandasutils import PandasOnSparkTestCase
from pyspark.testing.sqlutils import SQLTestUtils
-class OpsOnDiffFramesEnabledTestsMixin:
+class BasicMixin:
@classmethod
def setUpClass(cls):
super().setUpClass()
@@ -65,53 +65,6 @@ class OpsOnDiffFramesEnabledTestsMixin:
index=list(range(9)),
)
- @property
- def pdf5(self):
- return pd.DataFrame(
- {
- "a": [1, 2, 3, 4, 5, 6, 7, 8, 9],
- "b": [4, 5, 6, 3, 2, 1, 0, 0, 0],
- "c": [4, 5, 6, 3, 2, 1, 0, 0, 0],
- },
- index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
- ).set_index(["a", "b"])
-
- @property
- def pdf6(self):
- return pd.DataFrame(
- {
- "a": [9, 8, 7, 6, 5, 4, 3, 2, 1],
- "b": [0, 0, 0, 4, 5, 6, 1, 2, 3],
- "c": [9, 8, 7, 6, 5, 4, 3, 2, 1],
- "e": [4, 5, 6, 3, 2, 1, 0, 0, 0],
- },
- index=list(range(9)),
- ).set_index(["a", "b"])
-
- @property
- def pser1(self):
- midx = pd.MultiIndex(
- [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length",
"power"]],
- [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]],
- )
- return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx)
-
- @property
- def pser2(self):
- midx = pd.MultiIndex(
- [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
- [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
- )
- return pd.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3],
index=midx)
-
- @property
- def pser3(self):
- midx = pd.MultiIndex(
- [["koalas", "cow", "falcon"], ["speed", "weight", "length"]],
- [[0, 0, 0, 1, 1, 1, 2, 2, 2], [1, 1, 2, 0, 0, 2, 2, 2, 1]],
- )
- return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)
-
@property
def psdf1(self):
return ps.from_pandas(self.pdf1)
@@ -128,26 +81,6 @@ class OpsOnDiffFramesEnabledTestsMixin:
def psdf4(self):
return ps.from_pandas(self.pdf4)
- @property
- def psdf5(self):
- return ps.from_pandas(self.pdf5)
-
- @property
- def psdf6(self):
- return ps.from_pandas(self.pdf6)
-
- @property
- def psser1(self):
- return ps.from_pandas(self.pser1)
-
- @property
- def psser2(self):
- return ps.from_pandas(self.pser2)
-
- @property
- def psser3(self):
- return ps.from_pandas(self.pser3)
-
def test_ranges(self):
self.assert_eq(
(ps.range(10) + ps.range(10)).sort_index(),
@@ -286,29 +219,17 @@ class OpsOnDiffFramesEnabledTestsMixin:
self.assert_eq((psdf1 + psdf4).sort_index(), (pdf1 +
pdf4).sort_index(), almost=True)
- def test_multi_index_arithmetic(self):
- psdf5 = self.psdf5
- psdf6 = self.psdf6
- pdf5 = self.pdf5
- pdf6 = self.pdf6
-
- # Series
- self.assert_eq((psdf5.c - psdf6.e).sort_index(), (pdf5.c -
pdf6.e).sort_index())
-
- self.assert_eq((psdf5["c"] / psdf6["e"]).sort_index(), (pdf5["c"] /
pdf6["e"]).sort_index())
-
- # DataFrame
- self.assert_eq((psdf5 + psdf6).sort_index(), (pdf5 +
pdf6).sort_index(), almost=True)
-
-class OpsOnDiffFramesEnabledTests(
- OpsOnDiffFramesEnabledTestsMixin, PandasOnSparkTestCase, SQLTestUtils
+class BasicTests(
+ BasicMixin,
+ PandasOnSparkTestCase,
+ SQLTestUtils,
):
pass
if __name__ == "__main__":
- from pyspark.pandas.tests.test_ops_on_diff_frames import * # noqa: F401
+ from pyspark.pandas.tests.diff_frames_ops.test_basic import * # noqa: F401
try:
import xmlrunner
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]