This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 091d8da54930 [SPARK-46463][PS][TESTS] Reorganize
`OpsOnDiffFramesGroupByExpandingTests`
091d8da54930 is described below
commit 091d8da549306e0474e413e2984a744058be707a
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Dec 21 08:44:41 2023 +0900
[SPARK-46463][PS][TESTS] Reorganize `OpsOnDiffFramesGroupByExpandingTests`
### What changes were proposed in this pull request?
break `OpsOnDiffFramesGroupByExpandingTests` into small tests
### Why are the changes needed?
for parallelism
### Does this PR introduce _any_ user-facing change?
no, test-only
### How was this patch tested?
ci
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #44421 from zhengruifeng/ps_test_diff_group_exp.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
dev/sparktestsupport/modules.py | 8 +++--
.../test_parity_groupby_expanding.py} | 13 +++----
.../test_parity_groupby_expanding_adv.py} | 13 +++----
.../test_parity_groupby_expanding_count.py} | 13 ++++---
.../test_groupby_expanding.py} | 42 ++++++++++------------
.../test_groupby_expanding_adv.py} | 42 +++++++++++++++-------
.../test_groupby_expanding_count.py} | 39 +++++++++++++-------
7 files changed, 98 insertions(+), 72 deletions(-)
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 80930c22af8b..0388f1812b0d 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -742,7 +742,9 @@ pyspark_pandas = Module(
"pyspark.pandas.tests.test_internal",
"pyspark.pandas.tests.test_namespace",
"pyspark.pandas.tests.test_numpy_compat",
- "pyspark.pandas.tests.test_ops_on_diff_frames_groupby_expanding",
+ "pyspark.pandas.tests.diff_frames_ops.test_groupby_expanding",
+ "pyspark.pandas.tests.diff_frames_ops.test_groupby_expanding_adv",
+ "pyspark.pandas.tests.diff_frames_ops.test_groupby_expanding_count",
"pyspark.pandas.tests.test_ops_on_diff_frames_groupby_rolling",
"pyspark.pandas.tests.test_repr",
"pyspark.pandas.tests.resample.test_on",
@@ -1128,7 +1130,6 @@ pyspark_pandas_connect_part1 = Module(
"pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_object",
"pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_prefix",
"pyspark.pandas.tests.connect.reshape.test_parity_merge_asof",
-
"pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames_groupby_expanding",
],
excluded_python_implementations=[
"PyPy" # Skip these tests under PyPy since they require numpy,
pandas, and pyarrow and
@@ -1229,6 +1230,9 @@ pyspark_pandas_connect_part3 = Module(
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_shift",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_split_apply_combine",
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_transform",
+
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding",
+
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_adv",
+
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_count",
],
excluded_python_implementations=[
"PyPy" # Skip these tests under PyPy since they require numpy,
pandas, and pyarrow and
diff --git
a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_groupby_expanding.py
similarity index 74%
copy from
python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
copy to
python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_groupby_expanding.py
index c373268cdb23..7cd3d17de440 100644
---
a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
+++
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_groupby_expanding.py
@@ -16,24 +16,21 @@
#
import unittest
-from pyspark.pandas.tests.test_ops_on_diff_frames_groupby_expanding import (
- OpsOnDiffFramesGroupByExpandingTestsMixin,
-)
+from pyspark.pandas.tests.diff_frames_ops.test_groupby_expanding import
GroupByExpandingMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
-class OpsOnDiffFramesGroupByExpandingParityTests(
- OpsOnDiffFramesGroupByExpandingTestsMixin,
+class GroupByExpandingParityTests(
+ GroupByExpandingMixin,
PandasOnSparkTestUtils,
- TestUtils,
ReusedConnectTestCase,
):
pass
if __name__ == "__main__":
- from
pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames_groupby_expanding
import *
+ from
pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding
import * # noqa
try:
import xmlrunner # type: ignore[import]
diff --git
a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_groupby_expanding_adv.py
similarity index 74%
copy from
python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
copy to
python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_groupby_expanding_adv.py
index c373268cdb23..1563d891b6ea 100644
---
a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
+++
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_groupby_expanding_adv.py
@@ -16,24 +16,21 @@
#
import unittest
-from pyspark.pandas.tests.test_ops_on_diff_frames_groupby_expanding import (
- OpsOnDiffFramesGroupByExpandingTestsMixin,
-)
+from pyspark.pandas.tests.diff_frames_ops.test_groupby_expanding_adv import
GroupByExpandingAdvMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
-class OpsOnDiffFramesGroupByExpandingParityTests(
- OpsOnDiffFramesGroupByExpandingTestsMixin,
+class GroupByExpandingAdvParityTests(
+ GroupByExpandingAdvMixin,
PandasOnSparkTestUtils,
- TestUtils,
ReusedConnectTestCase,
):
pass
if __name__ == "__main__":
- from
pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames_groupby_expanding
import *
+ from
pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_adv
import * # noqa
try:
import xmlrunner # type: ignore[import]
diff --git
a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_groupby_expanding_count.py
similarity index 74%
copy from
python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
copy to
python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_groupby_expanding_count.py
index c373268cdb23..e95893518202 100644
---
a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
+++
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_groupby_expanding_count.py
@@ -16,24 +16,23 @@
#
import unittest
-from pyspark.pandas.tests.test_ops_on_diff_frames_groupby_expanding import (
- OpsOnDiffFramesGroupByExpandingTestsMixin,
+from pyspark.pandas.tests.diff_frames_ops.test_groupby_expanding_count import (
+ GroupByExpandingCountMixin,
)
from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
-class OpsOnDiffFramesGroupByExpandingParityTests(
- OpsOnDiffFramesGroupByExpandingTestsMixin,
+class GroupByExpandingCountParityTests(
+ GroupByExpandingCountMixin,
PandasOnSparkTestUtils,
- TestUtils,
ReusedConnectTestCase,
):
pass
if __name__ == "__main__":
- from
pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames_groupby_expanding
import *
+ from
pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_count
import * # noqa
try:
import xmlrunner # type: ignore[import]
diff --git
a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py
b/python/pyspark/pandas/tests/diff_frames_ops/test_groupby_expanding.py
similarity index 82%
rename from
python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py
rename to python/pyspark/pandas/tests/diff_frames_ops/test_groupby_expanding.py
index c52341785110..ba098153f67a 100644
--- a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_groupby_expanding.py
@@ -19,20 +19,11 @@ import pandas as pd
from pyspark import pandas as ps
from pyspark.pandas.config import set_option, reset_option
-from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
-class OpsOnDiffFramesGroupByExpandingTestsMixin:
- @classmethod
- def setUpClass(cls):
- super().setUpClass()
- set_option("compute.ops_on_diff_frames", True)
-
- @classmethod
- def tearDownClass(cls):
- reset_option("compute.ops_on_diff_frames")
- super().tearDownClass()
-
+class GroupByExpandingTestingFuncMixin:
def _test_groupby_expanding_func(self, f):
pser = pd.Series([1, 2, 3])
pkey = pd.Series([1, 2, 3], name="a")
@@ -63,8 +54,17 @@ class OpsOnDiffFramesGroupByExpandingTestsMixin:
getattr(pdf.groupby(pkey)[["b"]].expanding(2), f)().sort_index(),
)
- def test_groupby_expanding_count(self):
- self._test_groupby_expanding_func("count")
+
+class GroupByExpandingMixin(GroupByExpandingTestingFuncMixin):
+ @classmethod
+ def setUpClass(cls):
+ super().setUpClass()
+ set_option("compute.ops_on_diff_frames", True)
+
+ @classmethod
+ def tearDownClass(cls):
+ reset_option("compute.ops_on_diff_frames")
+ super().tearDownClass()
def test_groupby_expanding_min(self):
self._test_groupby_expanding_func("min")
@@ -78,22 +78,18 @@ class OpsOnDiffFramesGroupByExpandingTestsMixin:
def test_groupby_expanding_sum(self):
self._test_groupby_expanding_func("sum")
- def test_groupby_expanding_std(self):
- self._test_groupby_expanding_func("std")
-
- def test_groupby_expanding_var(self):
- self._test_groupby_expanding_func("var")
-
-class OpsOnDiffFramesGroupByExpandingTests(
- OpsOnDiffFramesGroupByExpandingTestsMixin, PandasOnSparkTestCase, TestUtils
+class GroupByExpandingTests(
+ GroupByExpandingMixin,
+ PandasOnSparkTestCase,
+ SQLTestUtils,
):
pass
if __name__ == "__main__":
import unittest
- from pyspark.pandas.tests.test_ops_on_diff_frames_groupby_expanding import
* # noqa: F401
+ from pyspark.pandas.tests.diff_frames_ops.test_groupby_expanding import *
# noqa
try:
import xmlrunner
diff --git
a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
b/python/pyspark/pandas/tests/diff_frames_ops/test_groupby_expanding_adv.py
similarity index 50%
copy from
python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
copy to
python/pyspark/pandas/tests/diff_frames_ops/test_groupby_expanding_adv.py
index c373268cdb23..93ed85d173b9 100644
---
a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_groupby_expanding_adv.py
@@ -14,29 +14,47 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-import unittest
-from pyspark.pandas.tests.test_ops_on_diff_frames_groupby_expanding import (
- OpsOnDiffFramesGroupByExpandingTestsMixin,
+from pyspark.pandas.config import set_option, reset_option
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+from pyspark.pandas.tests.diff_frames_ops.test_groupby_expanding import (
+ GroupByExpandingTestingFuncMixin,
)
-from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
-class OpsOnDiffFramesGroupByExpandingParityTests(
- OpsOnDiffFramesGroupByExpandingTestsMixin,
- PandasOnSparkTestUtils,
- TestUtils,
- ReusedConnectTestCase,
+class GroupByExpandingAdvMixin(GroupByExpandingTestingFuncMixin):
+ @classmethod
+ def setUpClass(cls):
+ super().setUpClass()
+ set_option("compute.ops_on_diff_frames", True)
+
+ @classmethod
+ def tearDownClass(cls):
+ reset_option("compute.ops_on_diff_frames")
+ super().tearDownClass()
+
+ def test_groupby_expanding_std(self):
+ self._test_groupby_expanding_func("std")
+
+ def test_groupby_expanding_var(self):
+ self._test_groupby_expanding_func("var")
+
+
+class GroupByExpandingAdvTests(
+ GroupByExpandingAdvMixin,
+ PandasOnSparkTestCase,
+ SQLTestUtils,
):
pass
if __name__ == "__main__":
- from
pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames_groupby_expanding
import *
+ import unittest
+ from pyspark.pandas.tests.diff_frames_ops.test_groupby_expanding_adv
import * # noqa
try:
- import xmlrunner # type: ignore[import]
+ import xmlrunner
testRunner = xmlrunner.XMLTestRunner(output="target/test-reports",
verbosity=2)
except ImportError:
diff --git
a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
b/python/pyspark/pandas/tests/diff_frames_ops/test_groupby_expanding_count.py
similarity index 52%
rename from
python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
rename to
python/pyspark/pandas/tests/diff_frames_ops/test_groupby_expanding_count.py
index c373268cdb23..9b968ddff7f4 100644
---
a/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py
+++
b/python/pyspark/pandas/tests/diff_frames_ops/test_groupby_expanding_count.py
@@ -14,29 +14,44 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-import unittest
-from pyspark.pandas.tests.test_ops_on_diff_frames_groupby_expanding import (
- OpsOnDiffFramesGroupByExpandingTestsMixin,
+from pyspark.pandas.config import set_option, reset_option
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+from pyspark.pandas.tests.diff_frames_ops.test_groupby_expanding import (
+ GroupByExpandingTestingFuncMixin,
)
-from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
-class OpsOnDiffFramesGroupByExpandingParityTests(
- OpsOnDiffFramesGroupByExpandingTestsMixin,
- PandasOnSparkTestUtils,
- TestUtils,
- ReusedConnectTestCase,
+class GroupByExpandingCountMixin(GroupByExpandingTestingFuncMixin):
+ @classmethod
+ def setUpClass(cls):
+ super().setUpClass()
+ set_option("compute.ops_on_diff_frames", True)
+
+ @classmethod
+ def tearDownClass(cls):
+ reset_option("compute.ops_on_diff_frames")
+ super().tearDownClass()
+
+ def test_groupby_expanding_count(self):
+ self._test_groupby_expanding_func("count")
+
+
+class GroupByExpandingCountTests(
+ GroupByExpandingCountMixin,
+ PandasOnSparkTestCase,
+ SQLTestUtils,
):
pass
if __name__ == "__main__":
- from
pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames_groupby_expanding
import *
+ import unittest
+ from pyspark.pandas.tests.diff_frames_ops.test_groupby_expanding_count
import * # noqa
try:
- import xmlrunner # type: ignore[import]
+ import xmlrunner
testRunner = xmlrunner.XMLTestRunner(output="target/test-reports",
verbosity=2)
except ImportError:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]