(spark) branch master updated: [SPARK-46471][PS][TESTS][FOLLOWUPS] Reorganize `OpsOnDiffFramesEnabledTests`: Factor out `test_assignment_*`

dongjoon Sat, 23 Dec 2023 15:13:42 -0800

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 98fef6ea5855 [SPARK-46471][PS][TESTS][FOLLOWUPS] Reorganize 
`OpsOnDiffFramesEnabledTests`: Factor out `test_assignment_*`
98fef6ea5855 is described below

commit 98fef6ea5855580b46d41e269e0ddcd9a2c8bbe8
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Sat Dec 23 15:13:26 2023 -0800

    [SPARK-46471][PS][TESTS][FOLLOWUPS] Reorganize 
`OpsOnDiffFramesEnabledTests`: Factor out `test_assignment_*`
    
    ### What changes were proposed in this pull request?
    Factor out `test_assignment_*`
    
    ### Why are the changes needed?
    for testing parallelism
    
    ### Does this PR introduce _any_ user-facing change?
    no, test-only
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #44461 from zhengruifeng/ps_test_diff_ops_1.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 dev/sparktestsupport/modules.py                    |   4 +
 .../diff_frames_ops/test_parity_assign_frame.py    |  41 ++++
 .../diff_frames_ops/test_parity_assign_series.py   |  41 ++++
 .../tests/diff_frames_ops/test_assign_frame.py     | 243 +++++++++++++++++++++
 .../tests/diff_frames_ops/test_assign_series.py    | 241 ++++++++++++++++++++
 .../pandas/tests/test_ops_on_diff_frames.py        | 174 ---------------
 6 files changed, 570 insertions(+), 174 deletions(-)

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 47db204e2fa1..33e7dd3af97a 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -870,6 +870,8 @@ pyspark_pandas_slow = Module(
         "pyspark.pandas.tests.diff_frames_ops.test_arithmetic_chain",
         "pyspark.pandas.tests.diff_frames_ops.test_arithmetic_chain_ext",
         "pyspark.pandas.tests.diff_frames_ops.test_arithmetic_chain_ext_float",
+        "pyspark.pandas.tests.diff_frames_ops.test_assign_frame",
+        "pyspark.pandas.tests.diff_frames_ops.test_assign_series",
         "pyspark.pandas.tests.diff_frames_ops.test_basic_slow",
         "pyspark.pandas.tests.diff_frames_ops.test_cov",
         "pyspark.pandas.tests.diff_frames_ops.test_corrwith",
@@ -1235,6 +1237,8 @@ pyspark_pandas_connect_part3 = Module(
         
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_chain",
         
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_chain_ext",
         
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_chain_ext_float",
+        
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_frame",
+        
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_series",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby",
         
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_aggregate",
         
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_apply",
diff --git 
a/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_assign_frame.py
 
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_assign_frame.py
new file mode 100644
index 000000000000..82ce5a2e15bb
--- /dev/null
+++ 
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_assign_frame.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.diff_frames_ops.test_assign_frame import 
AssignFrameMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class AssignFrameParityTests(
+    AssignFrameMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_frame 
import *  # noqa
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git 
a/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_assign_series.py
 
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_assign_series.py
new file mode 100644
index 000000000000..24a1e9b966cf
--- /dev/null
+++ 
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_assign_series.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.diff_frames_ops.test_assign_series import 
AssignSeriesMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class AssignSeriesParityTests(
+    AssignSeriesMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from 
pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_series import * 
 # noqa
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_assign_frame.py 
b/python/pyspark/pandas/tests/diff_frames_ops/test_assign_frame.py
new file mode 100644
index 000000000000..e6f2e78d7499
--- /dev/null
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_assign_frame.py
@@ -0,0 +1,243 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.pandas.config import set_option, reset_option
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class AssignFrameMixin:
+    @property
+    def pdf1(self):
+        return pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 
0]},
+            index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
+        )
+
+    @property
+    def pdf2(self):
+        return pd.DataFrame(
+            {"a": [9, 8, 7, 6, 5, 4, 3, 2, 1], "b": [0, 0, 0, 4, 5, 6, 1, 2, 
3]},
+            index=list(range(9)),
+        )
+
+    @property
+    def pdf3(self):
+        return pd.DataFrame(
+            {"b": [1, 1, 1, 1, 1, 1, 1, 1, 1], "c": [1, 1, 1, 1, 1, 1, 1, 1, 
1]},
+            index=list(range(9)),
+        )
+
+    @property
+    def pdf4(self):
+        return pd.DataFrame(
+            {"e": [2, 2, 2, 2, 2, 2, 2, 2, 2], "f": [2, 2, 2, 2, 2, 2, 2, 2, 
2]},
+            index=list(range(9)),
+        )
+
+    @property
+    def pdf5(self):
+        return pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6, 7, 8, 9],
+                "b": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+                "c": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+            },
+            index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
+        ).set_index(["a", "b"])
+
+    @property
+    def pdf6(self):
+        return pd.DataFrame(
+            {
+                "a": [9, 8, 7, 6, 5, 4, 3, 2, 1],
+                "b": [0, 0, 0, 4, 5, 6, 1, 2, 3],
+                "c": [9, 8, 7, 6, 5, 4, 3, 2, 1],
+                "e": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+            },
+            index=list(range(9)),
+        ).set_index(["a", "b"])
+
+    @property
+    def pser1(self):
+        midx = pd.MultiIndex(
+            [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length", 
"power"]],
+            [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]],
+        )
+        return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx)
+
+    @property
+    def pser2(self):
+        midx = pd.MultiIndex(
+            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+        )
+        return pd.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3], 
index=midx)
+
+    @property
+    def pser3(self):
+        midx = pd.MultiIndex(
+            [["koalas", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [1, 1, 2, 0, 0, 2, 2, 2, 1]],
+        )
+        return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)
+
+    @property
+    def psdf1(self):
+        return ps.from_pandas(self.pdf1)
+
+    @property
+    def psdf2(self):
+        return ps.from_pandas(self.pdf2)
+
+    @property
+    def psdf3(self):
+        return ps.from_pandas(self.pdf3)
+
+    @property
+    def psdf4(self):
+        return ps.from_pandas(self.pdf4)
+
+    @property
+    def psdf5(self):
+        return ps.from_pandas(self.pdf5)
+
+    @property
+    def psdf6(self):
+        return ps.from_pandas(self.pdf6)
+
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        set_option("compute.ops_on_diff_frames", True)
+
+    @classmethod
+    def tearDownClass(cls):
+        reset_option("compute.ops_on_diff_frames")
+        super().tearDownClass()
+
+    def test_assignment_frame(self):
+        psdf = ps.from_pandas(self.pdf1)
+        pdf = self.pdf1
+        psser = psdf.a
+        pser = pdf.a
+        psdf[["a", "b"]] = self.psdf1
+        pdf[["a", "b"]] = self.pdf1
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+        self.assert_eq(psser, pser)
+
+        # 'c' does not exist in `psdf`.
+        psdf = ps.from_pandas(self.pdf1)
+        pdf = self.pdf1
+        psser = psdf.a
+        pser = pdf.a
+        psdf[["b", "c"]] = self.psdf1
+        pdf[["b", "c"]] = self.pdf1
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+        self.assert_eq(psser, pser)
+
+        # 'c' and 'd' do not exist in `psdf`.
+        psdf = ps.from_pandas(self.pdf1)
+        pdf = self.pdf1
+        psdf[["c", "d"]] = self.psdf1
+        pdf[["c", "d"]] = self.pdf1
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+        # Multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b")])
+        psdf = ps.from_pandas(self.pdf1)
+        pdf = self.pdf1
+        psdf.columns = columns
+        pdf.columns = columns
+        psdf[[("y", "c"), ("z", "d")]] = self.psdf1
+        pdf[[("y", "c"), ("z", "d")]] = self.pdf1
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+        psdf = ps.from_pandas(self.pdf1)
+        pdf = self.pdf1
+        psdf1 = ps.from_pandas(self.pdf1)
+        pdf1 = self.pdf1
+        psdf1.columns = columns
+        pdf1.columns = columns
+        psdf[["c", "d"]] = psdf1
+        pdf[["c", "d"]] = pdf1
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+    def test_assignment_frame_chain(self):
+        psdf = ps.from_pandas(self.pdf1)
+        pdf = self.pdf1
+        psdf[["a", "b"]] = self.psdf1
+        pdf[["a", "b"]] = self.pdf1
+
+        psdf[["e", "f"]] = self.psdf3
+        pdf[["e", "f"]] = self.pdf3
+
+        psdf[["b", "c"]] = self.psdf2
+        pdf[["b", "c"]] = self.pdf2
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+    def test_multi_index_assignment_frame(self):
+        psdf = ps.from_pandas(self.pdf5)
+        pdf = self.pdf5
+        psdf[["c"]] = self.psdf5
+        pdf[["c"]] = self.pdf5
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+        psdf = ps.from_pandas(self.pdf5)
+        pdf = self.pdf5
+        psdf[["x"]] = self.psdf5
+        pdf[["x"]] = self.pdf5
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+        psdf = ps.from_pandas(self.pdf6)
+        pdf = self.pdf6
+        psdf[["x", "y"]] = self.psdf6
+        pdf[["x", "y"]] = self.pdf6
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+
+class AssignFrameTests(
+    AssignFrameMixin,
+    PandasOnSparkTestCase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.pandas.tests.diff_frames_ops.test_assign_frame import *  # 
noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_assign_series.py 
b/python/pyspark/pandas/tests/diff_frames_ops/test_assign_series.py
new file mode 100644
index 000000000000..338214c99e12
--- /dev/null
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_assign_series.py
@@ -0,0 +1,241 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.pandas.config import set_option, reset_option
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class AssignSeriesMixin:
+    @property
+    def pdf1(self):
+        return pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 
0]},
+            index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
+        )
+
+    @property
+    def pdf2(self):
+        return pd.DataFrame(
+            {"a": [9, 8, 7, 6, 5, 4, 3, 2, 1], "b": [0, 0, 0, 4, 5, 6, 1, 2, 
3]},
+            index=list(range(9)),
+        )
+
+    @property
+    def pdf3(self):
+        return pd.DataFrame(
+            {"b": [1, 1, 1, 1, 1, 1, 1, 1, 1], "c": [1, 1, 1, 1, 1, 1, 1, 1, 
1]},
+            index=list(range(9)),
+        )
+
+    @property
+    def pdf4(self):
+        return pd.DataFrame(
+            {"e": [2, 2, 2, 2, 2, 2, 2, 2, 2], "f": [2, 2, 2, 2, 2, 2, 2, 2, 
2]},
+            index=list(range(9)),
+        )
+
+    @property
+    def pdf5(self):
+        return pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6, 7, 8, 9],
+                "b": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+                "c": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+            },
+            index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
+        ).set_index(["a", "b"])
+
+    @property
+    def pdf6(self):
+        return pd.DataFrame(
+            {
+                "a": [9, 8, 7, 6, 5, 4, 3, 2, 1],
+                "b": [0, 0, 0, 4, 5, 6, 1, 2, 3],
+                "c": [9, 8, 7, 6, 5, 4, 3, 2, 1],
+                "e": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+            },
+            index=list(range(9)),
+        ).set_index(["a", "b"])
+
+    @property
+    def pser1(self):
+        midx = pd.MultiIndex(
+            [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length", 
"power"]],
+            [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]],
+        )
+        return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx)
+
+    @property
+    def pser2(self):
+        midx = pd.MultiIndex(
+            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+        )
+        return pd.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3], 
index=midx)
+
+    @property
+    def pser3(self):
+        midx = pd.MultiIndex(
+            [["koalas", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [1, 1, 2, 0, 0, 2, 2, 2, 1]],
+        )
+        return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)
+
+    @property
+    def psdf1(self):
+        return ps.from_pandas(self.pdf1)
+
+    @property
+    def psdf2(self):
+        return ps.from_pandas(self.pdf2)
+
+    @property
+    def psdf3(self):
+        return ps.from_pandas(self.pdf3)
+
+    @property
+    def psdf4(self):
+        return ps.from_pandas(self.pdf4)
+
+    @property
+    def psdf5(self):
+        return ps.from_pandas(self.pdf5)
+
+    @property
+    def psdf6(self):
+        return ps.from_pandas(self.pdf6)
+
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        set_option("compute.ops_on_diff_frames", True)
+
+    @classmethod
+    def tearDownClass(cls):
+        reset_option("compute.ops_on_diff_frames")
+        super().tearDownClass()
+
+    def test_assignment_series(self):
+        psdf = ps.from_pandas(self.pdf1)
+        pdf = self.pdf1
+        psser = psdf.a
+        pser = pdf.a
+        psdf["a"] = self.psdf2.a
+        pdf["a"] = self.pdf2.a
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+        self.assert_eq(psser, pser)
+
+        psdf = ps.from_pandas(self.pdf1)
+        pdf = self.pdf1
+        psser = psdf.a
+        pser = pdf.a
+        psdf["a"] = self.psdf2.b
+        pdf["a"] = self.pdf2.b
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+        self.assert_eq(psser, pser)
+
+        psdf = ps.from_pandas(self.pdf1)
+        pdf = self.pdf1
+        psdf["c"] = self.psdf2.a
+        pdf["c"] = self.pdf2.a
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+        # Multi-index columns
+        psdf = ps.from_pandas(self.pdf1)
+        pdf = self.pdf1
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b")])
+        psdf.columns = columns
+        pdf.columns = columns
+        psdf[("y", "c")] = self.psdf2.a
+        pdf[("y", "c")] = self.pdf2.a
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+        pdf = pd.DataFrame({"a": [1, 2, 3], "Koalas": [0, 1, 
2]}).set_index("Koalas", drop=False)
+        psdf = ps.from_pandas(pdf)
+
+        psdf.index.name = None
+        psdf["NEW"] = ps.Series([100, 200, 300])
+
+        pdf.index.name = None
+        pdf["NEW"] = pd.Series([100, 200, 300])
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+    def test_assignment_series_chain(self):
+        psdf = ps.from_pandas(self.pdf1)
+        pdf = self.pdf1
+        psdf["a"] = self.psdf1.a
+        pdf["a"] = self.pdf1.a
+
+        psdf["a"] = self.psdf2.b
+        pdf["a"] = self.pdf2.b
+
+        psdf["d"] = self.psdf3.c
+        pdf["d"] = self.pdf3.c
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+    def test_multi_index_assignment_series(self):
+        psdf = ps.from_pandas(self.pdf5)
+        pdf = self.pdf5
+        psdf["x"] = self.psdf6.e
+        pdf["x"] = self.pdf6.e
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+        psdf = ps.from_pandas(self.pdf5)
+        pdf = self.pdf5
+        psdf["e"] = self.psdf6.e
+        pdf["e"] = self.pdf6.e
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+        psdf = ps.from_pandas(self.pdf5)
+        pdf = self.pdf5
+        psdf["c"] = self.psdf6.e
+        pdf["c"] = self.pdf6.e
+
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+
+class AssignSeriesTests(
+    AssignSeriesMixin,
+    PandasOnSparkTestCase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.pandas.tests.diff_frames_ops.test_assign_series import *  # 
noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py 
b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
index 016908f0a9d4..505e96e68752 100644
--- a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
+++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
@@ -559,136 +559,6 @@ class OpsOnDiffFramesEnabledTestsMixin:
 
         self.assert_eq((psdf1 + psdf4).sort_index(), (pdf1 + 
pdf4).sort_index(), almost=True)
 
-    def test_assignment_series(self):
-        psdf = ps.from_pandas(self.pdf1)
-        pdf = self.pdf1
-        psser = psdf.a
-        pser = pdf.a
-        psdf["a"] = self.psdf2.a
-        pdf["a"] = self.pdf2.a
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-        self.assert_eq(psser, pser)
-
-        psdf = ps.from_pandas(self.pdf1)
-        pdf = self.pdf1
-        psser = psdf.a
-        pser = pdf.a
-        psdf["a"] = self.psdf2.b
-        pdf["a"] = self.pdf2.b
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-        self.assert_eq(psser, pser)
-
-        psdf = ps.from_pandas(self.pdf1)
-        pdf = self.pdf1
-        psdf["c"] = self.psdf2.a
-        pdf["c"] = self.pdf2.a
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
-        # Multi-index columns
-        psdf = ps.from_pandas(self.pdf1)
-        pdf = self.pdf1
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b")])
-        psdf.columns = columns
-        pdf.columns = columns
-        psdf[("y", "c")] = self.psdf2.a
-        pdf[("y", "c")] = self.pdf2.a
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
-        pdf = pd.DataFrame({"a": [1, 2, 3], "Koalas": [0, 1, 
2]}).set_index("Koalas", drop=False)
-        psdf = ps.from_pandas(pdf)
-
-        psdf.index.name = None
-        psdf["NEW"] = ps.Series([100, 200, 300])
-
-        pdf.index.name = None
-        pdf["NEW"] = pd.Series([100, 200, 300])
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
-    def test_assignment_frame(self):
-        psdf = ps.from_pandas(self.pdf1)
-        pdf = self.pdf1
-        psser = psdf.a
-        pser = pdf.a
-        psdf[["a", "b"]] = self.psdf1
-        pdf[["a", "b"]] = self.pdf1
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-        self.assert_eq(psser, pser)
-
-        # 'c' does not exist in `psdf`.
-        psdf = ps.from_pandas(self.pdf1)
-        pdf = self.pdf1
-        psser = psdf.a
-        pser = pdf.a
-        psdf[["b", "c"]] = self.psdf1
-        pdf[["b", "c"]] = self.pdf1
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-        self.assert_eq(psser, pser)
-
-        # 'c' and 'd' do not exist in `psdf`.
-        psdf = ps.from_pandas(self.pdf1)
-        pdf = self.pdf1
-        psdf[["c", "d"]] = self.psdf1
-        pdf[["c", "d"]] = self.pdf1
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
-        # Multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b")])
-        psdf = ps.from_pandas(self.pdf1)
-        pdf = self.pdf1
-        psdf.columns = columns
-        pdf.columns = columns
-        psdf[[("y", "c"), ("z", "d")]] = self.psdf1
-        pdf[[("y", "c"), ("z", "d")]] = self.pdf1
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
-        psdf = ps.from_pandas(self.pdf1)
-        pdf = self.pdf1
-        psdf1 = ps.from_pandas(self.pdf1)
-        pdf1 = self.pdf1
-        psdf1.columns = columns
-        pdf1.columns = columns
-        psdf[["c", "d"]] = psdf1
-        pdf[["c", "d"]] = pdf1
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
-    def test_assignment_series_chain(self):
-        psdf = ps.from_pandas(self.pdf1)
-        pdf = self.pdf1
-        psdf["a"] = self.psdf1.a
-        pdf["a"] = self.pdf1.a
-
-        psdf["a"] = self.psdf2.b
-        pdf["a"] = self.pdf2.b
-
-        psdf["d"] = self.psdf3.c
-        pdf["d"] = self.pdf3.c
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
-    def test_assignment_frame_chain(self):
-        psdf = ps.from_pandas(self.pdf1)
-        pdf = self.pdf1
-        psdf[["a", "b"]] = self.psdf1
-        pdf[["a", "b"]] = self.pdf1
-
-        psdf[["e", "f"]] = self.psdf3
-        pdf[["e", "f"]] = self.pdf3
-
-        psdf[["b", "c"]] = self.psdf2
-        pdf[["b", "c"]] = self.pdf2
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
     def test_multi_index_arithmetic(self):
         psdf5 = self.psdf5
         psdf6 = self.psdf6
@@ -703,50 +573,6 @@ class OpsOnDiffFramesEnabledTestsMixin:
         # DataFrame
         self.assert_eq((psdf5 + psdf6).sort_index(), (pdf5 + 
pdf6).sort_index(), almost=True)
 
-    def test_multi_index_assignment_series(self):
-        psdf = ps.from_pandas(self.pdf5)
-        pdf = self.pdf5
-        psdf["x"] = self.psdf6.e
-        pdf["x"] = self.pdf6.e
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
-        psdf = ps.from_pandas(self.pdf5)
-        pdf = self.pdf5
-        psdf["e"] = self.psdf6.e
-        pdf["e"] = self.pdf6.e
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
-        psdf = ps.from_pandas(self.pdf5)
-        pdf = self.pdf5
-        psdf["c"] = self.psdf6.e
-        pdf["c"] = self.pdf6.e
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
-    def test_multi_index_assignment_frame(self):
-        psdf = ps.from_pandas(self.pdf5)
-        pdf = self.pdf5
-        psdf[["c"]] = self.psdf5
-        pdf[["c"]] = self.pdf5
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
-        psdf = ps.from_pandas(self.pdf5)
-        pdf = self.pdf5
-        psdf[["x"]] = self.psdf5
-        pdf[["x"]] = self.pdf5
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
-        psdf = ps.from_pandas(self.pdf6)
-        pdf = self.pdf6
-        psdf[["x", "y"]] = self.psdf6
-        pdf[["x", "y"]] = self.pdf6
-
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
 
 class OpsOnDiffFramesEnabledTests(
     OpsOnDiffFramesEnabledTestsMixin, PandasOnSparkTestCase, SQLTestUtils


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-46471][PS][TESTS][FOLLOWUPS] Reorganize `OpsOnDiffFramesEnabledTests`: Factor out `test_assignment_*`

Reply via email to