(spark) branch master updated: [SPARK-46500][PS][TESTS] Reorganize `FrameParityPivotTests`

ruifengz Mon, 25 Dec 2023 01:44:28 -0800

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new d85ad1c14182 [SPARK-46500][PS][TESTS] Reorganize 
`FrameParityPivotTests`
d85ad1c14182 is described below

commit d85ad1c14182d847e8a5d5d49cf21cd9079b284f
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Mon Dec 25 17:43:57 2023 +0800

    [SPARK-46500][PS][TESTS] Reorganize `FrameParityPivotTests`
    
    ### What changes were proposed in this pull request?
    Reorganize `FrameParityPivotTests`: break `test_pivot_table` into mutiple 
tests
    
    ### Why are the changes needed?
    this test is slow
    ```
    Starting test(python3.9): 
pyspark.pandas.tests.connect.computation.test_parity_pivot (temp output: 
/__w/spark/spark/python/target/5f37e442-9037-47cc-8c6b-e9a273299d0d/python3.9__pyspark.pandas.tests.connect.computation.test_parity_pivot__ozvdx_ay.log)
    Finished test(python3.9): 
pyspark.pandas.tests.connect.computation.test_parity_pivot (524s)
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    no, test only
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #44478 from zhengruifeng/ps_test_pivot_multi.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 dev/sparktestsupport/modules.py                    |   8 ++
 .../pyspark/pandas/tests/computation/test_pivot.py | 149 +--------------------
 .../pandas/tests/computation/test_pivot_table.py   |  93 +++++++++++++
 .../tests/computation/test_pivot_table_adv.py      |  93 +++++++++++++
 .../computation/test_pivot_table_multi_idx.py      |  91 +++++++++++++
 .../computation/test_pivot_table_multi_idx_adv.py  |  93 +++++++++++++
 .../tests/connect/computation/test_parity_pivot.py |   6 +-
 ..._parity_pivot.py => test_parity_pivot_table.py} |  10 +-
 ...ity_pivot.py => test_parity_pivot_table_adv.py} |  10 +-
 ...vot.py => test_parity_pivot_table_multi_idx.py} |  10 +-
 ...py => test_parity_pivot_table_multi_idx_adv.py} |  12 +-
 11 files changed, 418 insertions(+), 157 deletions(-)

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index e4e3803a8f87..6f41b6f0eddf 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -824,6 +824,10 @@ pyspark_pandas_slow = Module(
         "pyspark.pandas.tests.computation.test_melt",
         "pyspark.pandas.tests.computation.test_missing_data",
         "pyspark.pandas.tests.computation.test_pivot",
+        "pyspark.pandas.tests.computation.test_pivot_table",
+        "pyspark.pandas.tests.computation.test_pivot_table_adv",
+        "pyspark.pandas.tests.computation.test_pivot_table_multi_idx",
+        "pyspark.pandas.tests.computation.test_pivot_table_multi_idx_adv",
         "pyspark.pandas.tests.computation.test_stats",
         "pyspark.pandas.tests.frame.test_attrs",
         "pyspark.pandas.tests.frame.test_axis",
@@ -1162,6 +1166,10 @@ pyspark_pandas_connect_part2 = Module(
     python_test_goals=[
         # pandas-on-Spark unittests
         "pyspark.pandas.tests.connect.computation.test_parity_pivot",
+        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table",
+        "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_adv",
+        
"pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx",
+        
"pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx_adv",
         "pyspark.pandas.tests.connect.computation.test_parity_stats",
         "pyspark.pandas.tests.connect.indexes.test_parity_base_slow",
         "pyspark.pandas.tests.connect.frame.test_parity_interpolate",
diff --git a/python/pyspark/pandas/tests/computation/test_pivot.py 
b/python/pyspark/pandas/tests/computation/test_pivot.py
index 8a373108ddc8..2670fa384dc7 100644
--- a/python/pyspark/pandas/tests/computation/test_pivot.py
+++ b/python/pyspark/pandas/tests/computation/test_pivot.py
@@ -61,149 +61,6 @@ class FramePivotMixin:
         #  columns="a", values="b", fill_value=999).dtypes, 
pdf.pivot_table(index=['e', 'c'],
         #  columns="a", values="b", fill_value=999).dtypes)
 
-    def test_pivot_table(self):
-        pdf = pd.DataFrame(
-            {
-                "a": [4, 2, 3, 4, 8, 6],
-                "b": [1, 2, 2, 4, 2, 4],
-                "e": [10, 20, 20, 40, 20, 40],
-                "c": [1, 2, 9, 4, 7, 4],
-                "d": [-1, -2, -3, -4, -5, -6],
-            },
-            index=np.random.rand(6),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        # Checking if both DataFrames have the same results
-        self.assert_eq(
-            psdf.pivot_table(columns="a", values="b").sort_index(),
-            pdf.pivot_table(columns="a", values="b").sort_index(),
-            almost=True,
-        )
-
-        self.assert_eq(
-            psdf.pivot_table(index=["c"], columns="a", 
values="b").sort_index(),
-            pdf.pivot_table(index=["c"], columns="a", values="b").sort_index(),
-            almost=True,
-        )
-
-        self.assert_eq(
-            psdf.pivot_table(index=["c"], columns="a", values="b", 
aggfunc="sum").sort_index(),
-            pdf.pivot_table(index=["c"], columns="a", values="b", 
aggfunc="sum").sort_index(),
-            almost=True,
-        )
-
-        self.assert_eq(
-            psdf.pivot_table(index=["c"], columns="a", values=["b"], 
aggfunc="sum").sort_index(),
-            pdf.pivot_table(index=["c"], columns="a", values=["b"], 
aggfunc="sum").sort_index(),
-            almost=True,
-        )
-
-        self.assert_eq(
-            psdf.pivot_table(
-                index=["c"], columns="a", values=["b", "e"], aggfunc="sum"
-            ).sort_index(),
-            pdf.pivot_table(
-                index=["c"], columns="a", values=["b", "e"], aggfunc="sum"
-            ).sort_index(),
-            almost=True,
-        )
-
-        self.assert_eq(
-            psdf.pivot_table(
-                index=["c"], columns="a", values=["b", "e", "d"], aggfunc="sum"
-            ).sort_index(),
-            pdf.pivot_table(
-                index=["c"], columns="a", values=["b", "e", "d"], aggfunc="sum"
-            ).sort_index(),
-            almost=True,
-        )
-
-        self.assert_eq(
-            psdf.pivot_table(
-                index=["c"], columns="a", values=["b", "e"], aggfunc={"b": 
"mean", "e": "sum"}
-            ).sort_index(),
-            pdf.pivot_table(
-                index=["c"], columns="a", values=["b", "e"], aggfunc={"b": 
"mean", "e": "sum"}
-            ).sort_index(),
-            almost=True,
-        )
-
-        self.assert_eq(
-            psdf.pivot_table(index=["e", "c"], columns="a", 
values="b").sort_index(),
-            pdf.pivot_table(index=["e", "c"], columns="a", 
values="b").sort_index(),
-            almost=True,
-        )
-
-        self.assert_eq(
-            psdf.pivot_table(
-                index=["e", "c"], columns="a", values="b", fill_value=999
-            ).sort_index(),
-            pdf.pivot_table(index=["e", "c"], columns="a", values="b", 
fill_value=999).sort_index(),
-            almost=True,
-        )
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples(
-            [("x", "a"), ("x", "b"), ("y", "e"), ("z", "c"), ("w", "d")]
-        )
-        pdf.columns = columns
-        psdf.columns = columns
-
-        self.assert_eq(
-            psdf.pivot_table(columns=("x", "a"), values=("x", 
"b")).sort_index(),
-            pdf.pivot_table(columns=[("x", "a")], values=[("x", 
"b")]).sort_index(),
-            almost=True,
-        )
-
-        self.assert_eq(
-            psdf.pivot_table(
-                index=[("z", "c")], columns=("x", "a"), values=[("x", "b")]
-            ).sort_index(),
-            pdf.pivot_table(
-                index=[("z", "c")], columns=[("x", "a")], values=[("x", "b")]
-            ).sort_index(),
-            almost=True,
-        )
-
-        self.assert_eq(
-            psdf.pivot_table(
-                index=[("z", "c")], columns=("x", "a"), values=[("x", "b"), 
("y", "e")]
-            ).sort_index(),
-            pdf.pivot_table(
-                index=[("z", "c")], columns=[("x", "a")], values=[("x", "b"), 
("y", "e")]
-            ).sort_index(),
-            almost=True,
-        )
-
-        self.assert_eq(
-            psdf.pivot_table(
-                index=[("z", "c")], columns=("x", "a"), values=[("x", "b"), 
("y", "e"), ("w", "d")]
-            ).sort_index(),
-            pdf.pivot_table(
-                index=[("z", "c")],
-                columns=[("x", "a")],
-                values=[("x", "b"), ("y", "e"), ("w", "d")],
-            ).sort_index(),
-            almost=True,
-        )
-
-        self.assert_eq(
-            psdf.pivot_table(
-                index=[("z", "c")],
-                columns=("x", "a"),
-                values=[("x", "b"), ("y", "e")],
-                aggfunc={("x", "b"): "mean", ("y", "e"): "sum"},
-            ).sort_index(),
-            pdf.pivot_table(
-                index=[("z", "c")],
-                columns=[("x", "a")],
-                values=[("x", "b"), ("y", "e")],
-                aggfunc={("x", "b"): "mean", ("y", "e"): "sum"},
-            ).sort_index(),
-            almost=True,
-        )
-
     def test_pivot_table_and_index(self):
         # https://github.com/databricks/koalas/issues/805
         pdf = pd.DataFrame(
@@ -332,7 +189,11 @@ class FramePivotMixin:
             psdf.pivot_table(index=["C"], columns="A", values="B", 
aggfunc={"B": "mean"})
 
 
-class FramePivotTests(FramePivotMixin, ComparisonTestBase, SQLTestUtils):
+class FramePivotTests(
+    FramePivotMixin,
+    ComparisonTestBase,
+    SQLTestUtils,
+):
     pass
 
 
diff --git a/python/pyspark/pandas/tests/computation/test_pivot_table.py 
b/python/pyspark/pandas/tests/computation/test_pivot_table.py
new file mode 100644
index 000000000000..5bca2cee55f5
--- /dev/null
+++ b/python/pyspark/pandas/tests/computation/test_pivot_table.py
@@ -0,0 +1,93 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+import numpy as np
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.testing.pandasutils import ComparisonTestBase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class PivotTableMixin:
+    def test_pivot_table(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [4, 2, 3, 4, 8, 6],
+                "b": [1, 2, 2, 4, 2, 4],
+                "e": [10, 20, 20, 40, 20, 40],
+                "c": [1, 2, 9, 4, 7, 4],
+                "d": [-1, -2, -3, -4, -5, -6],
+            },
+            index=np.random.rand(6),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            psdf.pivot_table(columns="a", values="b").sort_index(),
+            pdf.pivot_table(columns="a", values="b").sort_index(),
+            almost=True,
+        )
+
+        self.assert_eq(
+            psdf.pivot_table(index=["c"], columns="a", 
values="b").sort_index(),
+            pdf.pivot_table(index=["c"], columns="a", values="b").sort_index(),
+            almost=True,
+        )
+
+        self.assert_eq(
+            psdf.pivot_table(index=["c"], columns="a", values="b", 
aggfunc="sum").sort_index(),
+            pdf.pivot_table(index=["c"], columns="a", values="b", 
aggfunc="sum").sort_index(),
+            almost=True,
+        )
+
+        self.assert_eq(
+            psdf.pivot_table(index=["c"], columns="a", values=["b"], 
aggfunc="sum").sort_index(),
+            pdf.pivot_table(index=["c"], columns="a", values=["b"], 
aggfunc="sum").sort_index(),
+            almost=True,
+        )
+
+        self.assert_eq(
+            psdf.pivot_table(
+                index=["c"], columns="a", values=["b", "e"], aggfunc="sum"
+            ).sort_index(),
+            pdf.pivot_table(
+                index=["c"], columns="a", values=["b", "e"], aggfunc="sum"
+            ).sort_index(),
+            almost=True,
+        )
+
+
+class PivotTableTests(
+    PivotTableMixin,
+    ComparisonTestBase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.computation.test_pivot_table import *  # noqa: 
F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/computation/test_pivot_table_adv.py 
b/python/pyspark/pandas/tests/computation/test_pivot_table_adv.py
new file mode 100644
index 000000000000..d4aa75607529
--- /dev/null
+++ b/python/pyspark/pandas/tests/computation/test_pivot_table_adv.py
@@ -0,0 +1,93 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+import numpy as np
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.testing.pandasutils import ComparisonTestBase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class PivotTableAdvMixin:
+    def test_pivot_table(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [4, 2, 3, 4, 8, 6],
+                "b": [1, 2, 2, 4, 2, 4],
+                "e": [10, 20, 20, 40, 20, 40],
+                "c": [1, 2, 9, 4, 7, 4],
+                "d": [-1, -2, -3, -4, -5, -6],
+            },
+            index=np.random.rand(6),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            psdf.pivot_table(
+                index=["c"], columns="a", values=["b", "e", "d"], aggfunc="sum"
+            ).sort_index(),
+            pdf.pivot_table(
+                index=["c"], columns="a", values=["b", "e", "d"], aggfunc="sum"
+            ).sort_index(),
+            almost=True,
+        )
+
+        self.assert_eq(
+            psdf.pivot_table(
+                index=["c"], columns="a", values=["b", "e"], aggfunc={"b": 
"mean", "e": "sum"}
+            ).sort_index(),
+            pdf.pivot_table(
+                index=["c"], columns="a", values=["b", "e"], aggfunc={"b": 
"mean", "e": "sum"}
+            ).sort_index(),
+            almost=True,
+        )
+
+        self.assert_eq(
+            psdf.pivot_table(index=["e", "c"], columns="a", 
values="b").sort_index(),
+            pdf.pivot_table(index=["e", "c"], columns="a", 
values="b").sort_index(),
+            almost=True,
+        )
+
+        self.assert_eq(
+            psdf.pivot_table(
+                index=["e", "c"], columns="a", values="b", fill_value=999
+            ).sort_index(),
+            pdf.pivot_table(index=["e", "c"], columns="a", values="b", 
fill_value=999).sort_index(),
+            almost=True,
+        )
+
+
+class PivotTableAdvTests(
+    PivotTableAdvMixin,
+    ComparisonTestBase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.computation.test_pivot_table_adv import *  # 
noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git 
a/python/pyspark/pandas/tests/computation/test_pivot_table_multi_idx.py 
b/python/pyspark/pandas/tests/computation/test_pivot_table_multi_idx.py
new file mode 100644
index 000000000000..fbf7cd480114
--- /dev/null
+++ b/python/pyspark/pandas/tests/computation/test_pivot_table_multi_idx.py
@@ -0,0 +1,91 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+import numpy as np
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.testing.pandasutils import ComparisonTestBase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class PivotTableMultiIdxMixin:
+    def test_pivot_table(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [4, 2, 3, 4, 8, 6],
+                "b": [1, 2, 2, 4, 2, 4],
+                "e": [10, 20, 20, 40, 20, 40],
+                "c": [1, 2, 9, 4, 7, 4],
+                "d": [-1, -2, -3, -4, -5, -6],
+            },
+            index=np.random.rand(6),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        columns = pd.MultiIndex.from_tuples(
+            [("x", "a"), ("x", "b"), ("y", "e"), ("z", "c"), ("w", "d")]
+        )
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(
+            psdf.pivot_table(columns=("x", "a"), values=("x", 
"b")).sort_index(),
+            pdf.pivot_table(columns=[("x", "a")], values=[("x", 
"b")]).sort_index(),
+            almost=True,
+        )
+
+        self.assert_eq(
+            psdf.pivot_table(
+                index=[("z", "c")], columns=("x", "a"), values=[("x", "b")]
+            ).sort_index(),
+            pdf.pivot_table(
+                index=[("z", "c")], columns=[("x", "a")], values=[("x", "b")]
+            ).sort_index(),
+            almost=True,
+        )
+
+        self.assert_eq(
+            psdf.pivot_table(
+                index=[("z", "c")], columns=("x", "a"), values=[("x", "b"), 
("y", "e")]
+            ).sort_index(),
+            pdf.pivot_table(
+                index=[("z", "c")], columns=[("x", "a")], values=[("x", "b"), 
("y", "e")]
+            ).sort_index(),
+            almost=True,
+        )
+
+
+class PivotTableMultiIdxTests(
+    PivotTableMultiIdxMixin,
+    ComparisonTestBase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.computation.test_pivot_table_multi_idx import *  
# noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git 
a/python/pyspark/pandas/tests/computation/test_pivot_table_multi_idx_adv.py 
b/python/pyspark/pandas/tests/computation/test_pivot_table_multi_idx_adv.py
new file mode 100644
index 000000000000..16a8ca2c0869
--- /dev/null
+++ b/python/pyspark/pandas/tests/computation/test_pivot_table_multi_idx_adv.py
@@ -0,0 +1,93 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+import numpy as np
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.testing.pandasutils import ComparisonTestBase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class PivotTableMultiIdxAdvMixin:
+    def test_pivot_table(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [4, 2, 3, 4, 8, 6],
+                "b": [1, 2, 2, 4, 2, 4],
+                "e": [10, 20, 20, 40, 20, 40],
+                "c": [1, 2, 9, 4, 7, 4],
+                "d": [-1, -2, -3, -4, -5, -6],
+            },
+            index=np.random.rand(6),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        columns = pd.MultiIndex.from_tuples(
+            [("x", "a"), ("x", "b"), ("y", "e"), ("z", "c"), ("w", "d")]
+        )
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(
+            psdf.pivot_table(
+                index=[("z", "c")], columns=("x", "a"), values=[("x", "b"), 
("y", "e"), ("w", "d")]
+            ).sort_index(),
+            pdf.pivot_table(
+                index=[("z", "c")],
+                columns=[("x", "a")],
+                values=[("x", "b"), ("y", "e"), ("w", "d")],
+            ).sort_index(),
+            almost=True,
+        )
+
+        self.assert_eq(
+            psdf.pivot_table(
+                index=[("z", "c")],
+                columns=("x", "a"),
+                values=[("x", "b"), ("y", "e")],
+                aggfunc={("x", "b"): "mean", ("y", "e"): "sum"},
+            ).sort_index(),
+            pdf.pivot_table(
+                index=[("z", "c")],
+                columns=[("x", "a")],
+                values=[("x", "b"), ("y", "e")],
+                aggfunc={("x", "b"): "mean", ("y", "e"): "sum"},
+            ).sort_index(),
+            almost=True,
+        )
+
+
+class PivotTableMultiIdxAdvTests(
+    PivotTableMultiIdxAdvMixin,
+    ComparisonTestBase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.computation.test_pivot_table_multi_idx_adv 
import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git 
a/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py 
b/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py
index c8ec48eb06aa..fabdba7c7018 100644
--- a/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py
+++ b/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py
@@ -21,7 +21,11 @@ from pyspark.testing.connectutils import 
ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
-class FrameParityPivotTests(FramePivotMixin, PandasOnSparkTestUtils, 
ReusedConnectTestCase):
+class FrameParityPivotTests(
+    FramePivotMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
     pass
 
 
diff --git 
a/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py 
b/python/pyspark/pandas/tests/connect/computation/test_parity_pivot_table.py
similarity index 85%
copy from python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py
copy to 
python/pyspark/pandas/tests/connect/computation/test_parity_pivot_table.py
index c8ec48eb06aa..3afdd875f177 100644
--- a/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py
+++ b/python/pyspark/pandas/tests/connect/computation/test_parity_pivot_table.py
@@ -16,17 +16,21 @@
 #
 import unittest
 
-from pyspark.pandas.tests.computation.test_pivot import FramePivotMixin
+from pyspark.pandas.tests.computation.test_pivot_table import PivotTableMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
-class FrameParityPivotTests(FramePivotMixin, PandasOnSparkTestUtils, 
ReusedConnectTestCase):
+class PivotTableParityTests(
+    PivotTableMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
     pass
 
 
 if __name__ == "__main__":
-    from pyspark.pandas.tests.connect.computation.test_parity_pivot import *  
# noqa: F401
+    from pyspark.pandas.tests.connect.computation.test_parity_pivot_table 
import *  # noqa
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git 
a/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py 
b/python/pyspark/pandas/tests/connect/computation/test_parity_pivot_table_adv.py
similarity index 84%
copy from python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py
copy to 
python/pyspark/pandas/tests/connect/computation/test_parity_pivot_table_adv.py
index c8ec48eb06aa..2d6b7e697574 100644
--- a/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py
+++ 
b/python/pyspark/pandas/tests/connect/computation/test_parity_pivot_table_adv.py
@@ -16,17 +16,21 @@
 #
 import unittest
 
-from pyspark.pandas.tests.computation.test_pivot import FramePivotMixin
+from pyspark.pandas.tests.computation.test_pivot_table_adv import 
PivotTableAdvMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
-class FrameParityPivotTests(FramePivotMixin, PandasOnSparkTestUtils, 
ReusedConnectTestCase):
+class PivotTableAdvParityTests(
+    PivotTableAdvMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
     pass
 
 
 if __name__ == "__main__":
-    from pyspark.pandas.tests.connect.computation.test_parity_pivot import *  
# noqa: F401
+    from pyspark.pandas.tests.connect.computation.test_parity_pivot_table_adv 
import *  # noqa
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git 
a/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py 
b/python/pyspark/pandas/tests/connect/computation/test_parity_pivot_table_multi_idx.py
similarity index 83%
copy from python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py
copy to 
python/pyspark/pandas/tests/connect/computation/test_parity_pivot_table_multi_idx.py
index c8ec48eb06aa..eb0f676f8bce 100644
--- a/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py
+++ 
b/python/pyspark/pandas/tests/connect/computation/test_parity_pivot_table_multi_idx.py
@@ -16,17 +16,21 @@
 #
 import unittest
 
-from pyspark.pandas.tests.computation.test_pivot import FramePivotMixin
+from pyspark.pandas.tests.computation.test_pivot_table_multi_idx import 
PivotTableMultiIdxMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
-class FrameParityPivotTests(FramePivotMixin, PandasOnSparkTestUtils, 
ReusedConnectTestCase):
+class PivotTableMultiIdxParityTests(
+    PivotTableMultiIdxMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
     pass
 
 
 if __name__ == "__main__":
-    from pyspark.pandas.tests.connect.computation.test_parity_pivot import *  
# noqa: F401
+    from 
pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx 
import *  # noqa
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git 
a/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py 
b/python/pyspark/pandas/tests/connect/computation/test_parity_pivot_table_multi_idx_adv.py
similarity index 81%
copy from python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py
copy to 
python/pyspark/pandas/tests/connect/computation/test_parity_pivot_table_multi_idx_adv.py
index c8ec48eb06aa..361b5e8d9c27 100644
--- a/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py
+++ 
b/python/pyspark/pandas/tests/connect/computation/test_parity_pivot_table_multi_idx_adv.py
@@ -16,17 +16,23 @@
 #
 import unittest
 
-from pyspark.pandas.tests.computation.test_pivot import FramePivotMixin
+from pyspark.pandas.tests.computation.test_pivot_table_multi_idx_adv import (
+    PivotTableMultiIdxAdvMixin,
+)
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
-class FrameParityPivotTests(FramePivotMixin, PandasOnSparkTestUtils, 
ReusedConnectTestCase):
+class PivotTableMultiIdxAdvParityTests(
+    PivotTableMultiIdxAdvMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
     pass
 
 
 if __name__ == "__main__":
-    from pyspark.pandas.tests.connect.computation.test_parity_pivot import *  
# noqa: F401
+    from 
pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx_adv 
import *  # noqa
 
     try:
         import xmlrunner  # type: ignore[import]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-46500][PS][TESTS] Reorganize `FrameParityPivotTests`

Reply via email to