(spark) branch master updated: [SPARK-46471][PS][TESTS][FOLLOWUPS] Reorganize `OpsOnDiffFramesEnabledTests`: Factor out more tests

ruifengz Sun, 24 Dec 2023 01:44:30 -0800

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 4f56958c1231 [SPARK-46471][PS][TESTS][FOLLOWUPS] Reorganize 
`OpsOnDiffFramesEnabledTests`: Factor out more tests
4f56958c1231 is described below

commit 4f56958c1231794da71160a385427ffd730bb396
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Sun Dec 24 17:43:59 2023 +0800

    [SPARK-46471][PS][TESTS][FOLLOWUPS] Reorganize 
`OpsOnDiffFramesEnabledTests`: Factor out more tests
    
    ### What changes were proposed in this pull request?
    factor out following tests:
    
    - test_bitwise
    - test_bitwise_extension_dtype
    - test_combine_first
    - test_compare
    - test_concat_column_axis -> test_concat_column_axis_inner & 
test_concat_column_axis_outer
    
    ### Why are the changes needed?
    for testing parallelism
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #44469 from zhengruifeng/ps_test_diff_ops_2.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 dev/sparktestsupport/modules.py                    |  10 +
 .../connect/diff_frames_ops/test_parity_bitwise.py |  41 +++
 .../diff_frames_ops/test_parity_combine_first.py   |  41 +++
 .../diff_frames_ops/test_parity_compare_series.py  |  41 +++
 .../diff_frames_ops/test_parity_concat_inner.py    |  41 +++
 .../diff_frames_ops/test_parity_concat_outer.py    |  41 +++
 .../pandas/tests/diff_frames_ops/test_bitwise.py   | 110 +++++++++
 .../tests/diff_frames_ops/test_combine_first.py    | 110 +++++++++
 .../tests/diff_frames_ops/test_compare_series.py   | 155 ++++++++++++
 .../tests/diff_frames_ops/test_concat_inner.py     | 123 +++++++++
 .../tests/diff_frames_ops/test_concat_outer.py     |  81 ++++++
 .../pandas/tests/test_ops_on_diff_frames.py        | 275 +--------------------
 12 files changed, 795 insertions(+), 274 deletions(-)

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 33e7dd3af97a..939e88bf95b2 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -872,6 +872,11 @@ pyspark_pandas_slow = Module(
         "pyspark.pandas.tests.diff_frames_ops.test_arithmetic_chain_ext_float",
         "pyspark.pandas.tests.diff_frames_ops.test_assign_frame",
         "pyspark.pandas.tests.diff_frames_ops.test_assign_series",
+        "pyspark.pandas.tests.diff_frames_ops.test_bitwise",
+        "pyspark.pandas.tests.diff_frames_ops.test_combine_first",
+        "pyspark.pandas.tests.diff_frames_ops.test_compare_series",
+        "pyspark.pandas.tests.diff_frames_ops.test_concat_inner",
+        "pyspark.pandas.tests.diff_frames_ops.test_concat_outer",
         "pyspark.pandas.tests.diff_frames_ops.test_basic_slow",
         "pyspark.pandas.tests.diff_frames_ops.test_cov",
         "pyspark.pandas.tests.diff_frames_ops.test_corrwith",
@@ -1239,6 +1244,11 @@ pyspark_pandas_connect_part3 = Module(
         
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_chain_ext_float",
         
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_frame",
         
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_assign_series",
+        "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_bitwise",
+        
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_combine_first",
+        
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_compare_series",
+        
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_concat_inner",
+        
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_concat_outer",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby",
         
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_aggregate",
         
"pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_apply",
diff --git 
a/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_bitwise.py 
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_bitwise.py
new file mode 100644
index 000000000000..75335adc4162
--- /dev/null
+++ b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_bitwise.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.diff_frames_ops.test_bitwise import BitwiseMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class BitwiseParityTests(
+    BitwiseMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.diff_frames_ops.test_parity_bitwise 
import *  # noqa
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git 
a/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_combine_first.py
 
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_combine_first.py
new file mode 100644
index 000000000000..3ee500c316e9
--- /dev/null
+++ 
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_combine_first.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.diff_frames_ops.test_combine_first import 
CombineFirstMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class CombineFirstParityTests(
+    CombineFirstMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from 
pyspark.pandas.tests.connect.diff_frames_ops.test_parity_combine_first import * 
 # noqa
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git 
a/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_compare_series.py
 
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_compare_series.py
new file mode 100644
index 000000000000..af866a5948ad
--- /dev/null
+++ 
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_compare_series.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.diff_frames_ops.test_compare_series import 
CompareSeriesMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class CompareSeriesParityTests(
+    CompareSeriesMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from 
pyspark.pandas.tests.connect.diff_frames_ops.test_parity_compare_series import 
*  # noqa
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git 
a/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_concat_inner.py
 
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_concat_inner.py
new file mode 100644
index 000000000000..fd13d617792e
--- /dev/null
+++ 
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_concat_inner.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.diff_frames_ops.test_concat_inner import 
ConcatInnerMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class ConcatInnerParityTests(
+    ConcatInnerMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.diff_frames_ops.test_parity_concat_inner 
import *  # noqa
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git 
a/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_concat_outer.py
 
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_concat_outer.py
new file mode 100644
index 000000000000..f7fa2c550c30
--- /dev/null
+++ 
b/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_concat_outer.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.diff_frames_ops.test_concat_outer import 
ConcatOuterMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class ConcatOuterParityTests(
+    ConcatOuterMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.diff_frames_ops.test_parity_concat_outer 
import *  # noqa
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_bitwise.py 
b/python/pyspark/pandas/tests/diff_frames_ops/test_bitwise.py
new file mode 100644
index 000000000000..04e9734ff823
--- /dev/null
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_bitwise.py
@@ -0,0 +1,110 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+import numpy as np
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.pandas.config import set_option, reset_option
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+from pyspark.pandas.typedef.typehints import extension_object_dtypes_available
+
+
+class BitwiseMixin:
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        set_option("compute.ops_on_diff_frames", True)
+
+    @classmethod
+    def tearDownClass(cls):
+        reset_option("compute.ops_on_diff_frames")
+        super().tearDownClass()
+
+    def test_bitwise(self):
+        pser1 = pd.Series([True, False, True, False, np.nan, np.nan, True, 
False, np.nan])
+        pser2 = pd.Series([True, False, False, True, True, False, np.nan, 
np.nan, np.nan])
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+
+        self.assert_eq(pser1 | pser2, (psser1 | psser2).sort_index())
+        self.assert_eq(pser1 & pser2, (psser1 & psser2).sort_index())
+
+        pser1 = pd.Series([True, False, np.nan], index=list("ABC"))
+        pser2 = pd.Series([False, True, np.nan], index=list("DEF"))
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+
+        self.assert_eq(pser1 | pser2, (psser1 | psser2).sort_index())
+        self.assert_eq(pser1 & pser2, (psser1 & psser2).sort_index())
+
+    @unittest.skipIf(
+        not extension_object_dtypes_available, "pandas extension object dtypes 
are not available"
+    )
+    def test_bitwise_extension_dtype(self):
+        pser1 = pd.Series(
+            [True, False, True, False, np.nan, np.nan, True, False, np.nan], 
dtype="boolean"
+        )
+        pser2 = pd.Series(
+            [True, False, False, True, True, False, np.nan, np.nan, np.nan], 
dtype="boolean"
+        )
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+
+        self.assert_eq((psser1 | psser2).sort_index(), pser1 | pser2)
+        self.assert_eq((psser1 & psser2).sort_index(), pser1 & pser2)
+
+        pser1 = pd.Series([True, False, np.nan], index=list("ABC"), 
dtype="boolean")
+        pser2 = pd.Series([False, True, np.nan], index=list("DEF"), 
dtype="boolean")
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+
+        # a pandas bug?
+        # assert_eq((psser1 | psser2).sort_index(), pser1 | pser2)
+        # assert_eq((psser1 & psser2).sort_index(), pser1 & pser2)
+        self.assert_eq(
+            (psser1 | psser2).sort_index(),
+            pd.Series([True, None, None, None, True, None], 
index=list("ABCDEF"), dtype="boolean"),
+        )
+        self.assert_eq(
+            (psser1 & psser2).sort_index(),
+            pd.Series(
+                [None, False, None, False, None, None], index=list("ABCDEF"), 
dtype="boolean"
+            ),
+        )
+
+
+class BitwiseTests(
+    BitwiseMixin,
+    PandasOnSparkTestCase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.pandas.tests.diff_frames_ops.test_bitwise import *  # noqa: 
F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_combine_first.py 
b/python/pyspark/pandas/tests/diff_frames_ops/test_combine_first.py
new file mode 100644
index 000000000000..3fae57ac47c0
--- /dev/null
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_combine_first.py
@@ -0,0 +1,110 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.pandas.config import set_option, reset_option
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class CombineFirstMixin:
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        set_option("compute.ops_on_diff_frames", True)
+
+    @classmethod
+    def tearDownClass(cls):
+        reset_option("compute.ops_on_diff_frames")
+        super().tearDownClass()
+
+    def test_combine_first(self):
+        pser1 = pd.Series({"falcon": 330.0, "eagle": 160.0})
+        pser2 = pd.Series({"falcon": 345.0, "eagle": 200.0, "duck": 30.0})
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+
+        self.assert_eq(
+            psser1.combine_first(psser2).sort_index(), 
pser1.combine_first(pser2).sort_index()
+        )
+        with self.assertRaisesRegex(
+            TypeError, "`combine_first` only allows `Series` for parameter 
`other`"
+        ):
+            psser1.combine_first(50)
+
+        psser1.name = ("X", "A")
+        psser2.name = ("Y", "B")
+        pser1.name = ("X", "A")
+        pser2.name = ("Y", "B")
+        self.assert_eq(
+            psser1.combine_first(psser2).sort_index(), 
pser1.combine_first(pser2).sort_index()
+        )
+
+        # MultiIndex
+        midx1 = pd.MultiIndex(
+            [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length", 
"power"]],
+            [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]],
+        )
+        midx2 = pd.MultiIndex(
+            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+        )
+        pser1 = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx1)
+        pser2 = pd.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3], 
index=midx2)
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+
+        self.assert_eq(
+            psser1.combine_first(psser2).sort_index(), 
pser1.combine_first(pser2).sort_index()
+        )
+
+        # DataFrame
+        pdf1 = pd.DataFrame({"A": [None, 0], "B": [4, None]})
+        psdf1 = ps.from_pandas(pdf1)
+        pdf2 = pd.DataFrame({"C": [3, 3], "B": [1, 1]})
+        psdf2 = ps.from_pandas(pdf2)
+
+        self.assert_eq(pdf1.combine_first(pdf2), 
psdf1.combine_first(psdf2).sort_index())
+
+        pdf1.columns = pd.MultiIndex.from_tuples([("A", "willow"), ("B", 
"pine")])
+        psdf1 = ps.from_pandas(pdf1)
+        pdf2.columns = pd.MultiIndex.from_tuples([("C", "oak"), ("B", "pine")])
+        psdf2 = ps.from_pandas(pdf2)
+
+        self.assert_eq(pdf1.combine_first(pdf2), 
psdf1.combine_first(psdf2).sort_index())
+
+
+class CombineFirstTests(
+    CombineFirstMixin,
+    PandasOnSparkTestCase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.pandas.tests.diff_frames_ops.test_combine_first import *  # 
noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_compare_series.py 
b/python/pyspark/pandas/tests/diff_frames_ops/test_compare_series.py
new file mode 100644
index 000000000000..c548f8a2d32c
--- /dev/null
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_compare_series.py
@@ -0,0 +1,155 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.pandas.config import set_option, reset_option
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class CompareSeriesMixin:
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        set_option("compute.ops_on_diff_frames", True)
+
+    @classmethod
+    def tearDownClass(cls):
+        reset_option("compute.ops_on_diff_frames")
+        super().tearDownClass()
+
+    def test_compare(self):
+        pser1 = pd.Series(["b", "c", np.nan, "g", np.nan])
+        pser2 = pd.Series(["a", "c", np.nan, np.nan, "h"])
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+        self.assert_eq(
+            pser1.compare(pser2).sort_index(),
+            psser1.compare(psser2).sort_index(),
+        )
+
+        # `keep_shape=True`
+        self.assert_eq(
+            pser1.compare(pser2, keep_shape=True).sort_index(),
+            psser1.compare(psser2, keep_shape=True).sort_index(),
+        )
+        # `keep_equal=True`
+        self.assert_eq(
+            pser1.compare(pser2, keep_equal=True).sort_index(),
+            psser1.compare(psser2, keep_equal=True).sort_index(),
+        )
+        # `keep_shape=True` and `keep_equal=True`
+        self.assert_eq(
+            pser1.compare(pser2, keep_shape=True, 
keep_equal=True).sort_index(),
+            psser1.compare(psser2, keep_shape=True, 
keep_equal=True).sort_index(),
+        )
+
+        # MultiIndex
+        pser1.index = pd.MultiIndex.from_tuples(
+            [("a", "x"), ("b", "y"), ("c", "z"), ("x", "k"), ("q", "l")]
+        )
+        pser2.index = pd.MultiIndex.from_tuples(
+            [("a", "x"), ("b", "y"), ("c", "z"), ("x", "k"), ("q", "l")]
+        )
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+        self.assert_eq(
+            pser1.compare(pser2).sort_index(),
+            psser1.compare(psser2).sort_index(),
+        )
+
+        # `keep_shape=True` with MultiIndex
+        self.assert_eq(
+            pser1.compare(pser2, keep_shape=True).sort_index(),
+            psser1.compare(psser2, keep_shape=True).sort_index(),
+        )
+        # `keep_equal=True` with MultiIndex
+        self.assert_eq(
+            pser1.compare(pser2, keep_equal=True).sort_index(),
+            psser1.compare(psser2, keep_equal=True).sort_index(),
+        )
+        # `keep_shape=True` and `keep_equal=True` with MultiIndex
+        self.assert_eq(
+            pser1.compare(pser2, keep_shape=True, 
keep_equal=True).sort_index(),
+            psser1.compare(psser2, keep_shape=True, 
keep_equal=True).sort_index(),
+        )
+
+        # Different Index
+        with self.assertRaisesRegex(
+            ValueError, "Can only compare identically-labeled Series objects"
+        ):
+            psser1 = ps.Series(
+                [1, 2, 3, 4, 5],
+                index=pd.Index([1, 2, 3, 4, 5]),
+            )
+            psser2 = ps.Series(
+                [2, 2, 3, 4, 1],
+                index=pd.Index([5, 4, 3, 2, 1]),
+            )
+            psser1.compare(psser2)
+        # Different MultiIndex
+        with self.assertRaisesRegex(
+            ValueError, "Can only compare identically-labeled Series objects"
+        ):
+            psser1 = ps.Series(
+                [1, 2, 3, 4, 5],
+                index=pd.MultiIndex.from_tuples(
+                    [("a", "x"), ("b", "y"), ("c", "z"), ("x", "k"), ("q", 
"l")]
+                ),
+            )
+            psser2 = ps.Series(
+                [2, 2, 3, 4, 1],
+                index=pd.MultiIndex.from_tuples(
+                    [("a", "x"), ("b", "y"), ("c", "a"), ("x", "k"), ("q", 
"l")]
+                ),
+            )
+            psser1.compare(psser2)
+        # SPARK-37495: Skip identical index checking of Series.compare when 
config
+        # 'compute.eager_check' is disabled
+        psser1 = ps.Series([1, 2, 3, 4, 5], index=pd.Index([1, 2, 3, 4, 5]))
+        psser2 = ps.Series([1, 2, 3, 4, 5, 6], index=pd.Index([1, 2, 4, 3, 6, 
7]))
+        expected = ps.DataFrame(
+            {"self": [3, 4, 5, np.nan, np.nan], "other": [4, 3, np.nan, 5.0, 
6.0]},
+            index=[3, 4, 5, 6, 7],
+        )
+
+        with ps.option_context("compute.eager_check", False):
+            self.assert_eq(expected, psser1.compare(psser2))
+
+
+class CompareSeriesTests(
+    CompareSeriesMixin,
+    PandasOnSparkTestCase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.pandas.tests.diff_frames_ops.test_compare_series import *  # 
noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_concat_inner.py 
b/python/pyspark/pandas/tests/diff_frames_ops/test_concat_inner.py
new file mode 100644
index 000000000000..57e0d3948944
--- /dev/null
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_concat_inner.py
@@ -0,0 +1,123 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.pandas.config import set_option, reset_option
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class ConcatTestingFuncMixin:
+    def _test_frames(self):
+        pdf1 = pd.DataFrame({"A": [0, 2, 4], "B": [1, 3, 5]}, index=[1, 2, 3])
+        pdf1.columns.names = ["AB"]
+        pdf2 = pd.DataFrame({"C": [1, 2, 3], "D": [4, 5, 6]}, index=[1, 3, 5])
+        pdf2.columns.names = ["CD"]
+        psdf1 = ps.from_pandas(pdf1)
+        psdf2 = ps.from_pandas(pdf2)
+
+        psdf3 = psdf1.copy()
+        psdf4 = psdf2.copy()
+        pdf3 = pdf1.copy()
+        pdf4 = pdf2.copy()
+
+        columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B")], 
names=["X", "AB"])
+        pdf3.columns = columns
+        psdf3.columns = columns
+
+        columns = pd.MultiIndex.from_tuples([("X", "C"), ("X", "D")], 
names=["Y", "CD"])
+        pdf4.columns = columns
+        psdf4.columns = columns
+
+        pdf5 = pd.DataFrame({"A": [0, 2, 4], "B": [1, 3, 5]}, index=[1, 2, 3])
+        pdf6 = pd.DataFrame({"C": [1, 2, 3]}, index=[1, 3, 5])
+        psdf5 = ps.from_pandas(pdf5)
+        psdf6 = ps.from_pandas(pdf6)
+
+        objs = [
+            ([psdf1.A, psdf2.C], [pdf1.A, pdf2.C]),
+            # TODO: ([psdf1, psdf2.C], [pdf1, pdf2.C]),
+            ([psdf1.A, psdf2], [pdf1.A, pdf2]),
+            ([psdf1.A, psdf2.C], [pdf1.A, pdf2.C]),
+            ([psdf3[("X", "A")], psdf4[("X", "C")]], [pdf3[("X", "A")], 
pdf4[("X", "C")]]),
+            ([psdf3, psdf4[("X", "C")]], [pdf3, pdf4[("X", "C")]]),
+            ([psdf3[("X", "A")], psdf4], [pdf3[("X", "A")], pdf4]),
+            ([psdf3, psdf4], [pdf3, pdf4]),
+            ([psdf5, psdf6], [pdf5, pdf6]),
+            ([psdf6, psdf5], [pdf6, pdf5]),
+        ]
+
+        return objs
+
+
+class ConcatInnerMixin(ConcatTestingFuncMixin):
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        set_option("compute.ops_on_diff_frames", True)
+
+    @classmethod
+    def tearDownClass(cls):
+        reset_option("compute.ops_on_diff_frames")
+        super().tearDownClass()
+
+    def test_concat_column_axis_inner(self):
+        join = "inner"
+
+        objs = self._test_frames()
+        for i, (psdfs, pdfs) in enumerate(objs):
+            for ignore_index in [True, False]:
+                with self.subTest(ignore_index=ignore_index, join=join, 
pdfs=pdfs, pair=i):
+                    actual = ps.concat(psdfs, axis=1, 
ignore_index=ignore_index, join=join)
+                    expected = pd.concat(pdfs, axis=1, 
ignore_index=ignore_index, join=join)
+                    self.assert_eq(
+                        
repr(actual.sort_values(list(actual.columns)).reset_index(drop=True)),
+                        
repr(expected.sort_values(list(expected.columns)).reset_index(drop=True)),
+                    )
+                    actual = ps.concat(
+                        psdfs, axis=1, ignore_index=ignore_index, join=join, 
sort=True
+                    )
+                    expected = pd.concat(
+                        pdfs, axis=1, ignore_index=ignore_index, join=join, 
sort=True
+                    )
+                    self.assert_eq(
+                        repr(actual.reset_index(drop=True)),
+                        repr(expected.reset_index(drop=True)),
+                    )
+
+
+class ConcatInnerTests(
+    ConcatInnerMixin,
+    PandasOnSparkTestCase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.pandas.tests.diff_frames_ops.test_concat_inner import *  # 
noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_concat_outer.py 
b/python/pyspark/pandas/tests/diff_frames_ops/test_concat_outer.py
new file mode 100644
index 000000000000..bc6942b73226
--- /dev/null
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_concat_outer.py
@@ -0,0 +1,81 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.pandas.config import set_option, reset_option
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+from pyspark.pandas.tests.diff_frames_ops.test_concat_inner import 
ConcatTestingFuncMixin
+
+
+class ConcatOuterMixin(ConcatTestingFuncMixin):
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        set_option("compute.ops_on_diff_frames", True)
+
+    @classmethod
+    def tearDownClass(cls):
+        reset_option("compute.ops_on_diff_frames")
+        super().tearDownClass()
+
+    def test_concat_column_axis_outer(self):
+        join = "outer"
+
+        objs = self._test_frames()
+        for i, (psdfs, pdfs) in enumerate(objs):
+            for ignore_index in [True, False]:
+                with self.subTest(ignore_index=ignore_index, join=join, 
pdfs=pdfs, pair=i):
+                    actual = ps.concat(psdfs, axis=1, 
ignore_index=ignore_index, join=join)
+                    expected = pd.concat(pdfs, axis=1, 
ignore_index=ignore_index, join=join)
+                    self.assert_eq(
+                        
repr(actual.sort_values(list(actual.columns)).reset_index(drop=True)),
+                        
repr(expected.sort_values(list(expected.columns)).reset_index(drop=True)),
+                    )
+                    actual = ps.concat(
+                        psdfs, axis=1, ignore_index=ignore_index, join=join, 
sort=True
+                    )
+                    expected = pd.concat(
+                        pdfs, axis=1, ignore_index=ignore_index, join=join, 
sort=True
+                    )
+                    self.assert_eq(
+                        repr(actual.reset_index(drop=True)),
+                        repr(expected.reset_index(drop=True)),
+                    )
+
+
+class ConcatOuterTests(
+    ConcatOuterMixin,
+    PandasOnSparkTestCase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.pandas.tests.diff_frames_ops.test_concat_outer import *  # 
noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py 
b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
index 505e96e68752..75410a65227d 100644
--- a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
+++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
@@ -15,21 +15,15 @@
 # limitations under the License.
 #
 
-from itertools import product
+
 import unittest
 
 import pandas as pd
-import numpy as np
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import set_option, reset_option
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.testing.sqlutils import SQLTestUtils
-from pyspark.pandas.typedef.typehints import (
-    extension_dtypes_available,
-    extension_float_dtypes_available,
-    extension_object_dtypes_available,
-)
 
 
 class OpsOnDiffFramesEnabledTestsMixin:
@@ -232,175 +226,6 @@ class OpsOnDiffFramesEnabledTestsMixin:
         pser.name = psser.name = "B"
         self.assert_eq(pser.loc[pdf2.A > -3].sort_index(), psser.loc[psdf2.A > 
-3].sort_index())
 
-    def test_bitwise(self):
-        pser1 = pd.Series([True, False, True, False, np.nan, np.nan, True, 
False, np.nan])
-        pser2 = pd.Series([True, False, False, True, True, False, np.nan, 
np.nan, np.nan])
-        psser1 = ps.from_pandas(pser1)
-        psser2 = ps.from_pandas(pser2)
-
-        self.assert_eq(pser1 | pser2, (psser1 | psser2).sort_index())
-        self.assert_eq(pser1 & pser2, (psser1 & psser2).sort_index())
-
-        pser1 = pd.Series([True, False, np.nan], index=list("ABC"))
-        pser2 = pd.Series([False, True, np.nan], index=list("DEF"))
-        psser1 = ps.from_pandas(pser1)
-        psser2 = ps.from_pandas(pser2)
-
-        self.assert_eq(pser1 | pser2, (psser1 | psser2).sort_index())
-        self.assert_eq(pser1 & pser2, (psser1 & psser2).sort_index())
-
-    @unittest.skipIf(
-        not extension_object_dtypes_available, "pandas extension object dtypes 
are not available"
-    )
-    def test_bitwise_extension_dtype(self):
-        pser1 = pd.Series(
-            [True, False, True, False, np.nan, np.nan, True, False, np.nan], 
dtype="boolean"
-        )
-        pser2 = pd.Series(
-            [True, False, False, True, True, False, np.nan, np.nan, np.nan], 
dtype="boolean"
-        )
-        psser1 = ps.from_pandas(pser1)
-        psser2 = ps.from_pandas(pser2)
-
-        self.assert_eq((psser1 | psser2).sort_index(), pser1 | pser2)
-        self.assert_eq((psser1 & psser2).sort_index(), pser1 & pser2)
-
-        pser1 = pd.Series([True, False, np.nan], index=list("ABC"), 
dtype="boolean")
-        pser2 = pd.Series([False, True, np.nan], index=list("DEF"), 
dtype="boolean")
-        psser1 = ps.from_pandas(pser1)
-        psser2 = ps.from_pandas(pser2)
-
-        # a pandas bug?
-        # assert_eq((psser1 | psser2).sort_index(), pser1 | pser2)
-        # assert_eq((psser1 & psser2).sort_index(), pser1 & pser2)
-        self.assert_eq(
-            (psser1 | psser2).sort_index(),
-            pd.Series([True, None, None, None, True, None], 
index=list("ABCDEF"), dtype="boolean"),
-        )
-        self.assert_eq(
-            (psser1 & psser2).sort_index(),
-            pd.Series(
-                [None, False, None, False, None, None], index=list("ABCDEF"), 
dtype="boolean"
-            ),
-        )
-
-    def test_concat_column_axis(self):
-        pdf1 = pd.DataFrame({"A": [0, 2, 4], "B": [1, 3, 5]}, index=[1, 2, 3])
-        pdf1.columns.names = ["AB"]
-        pdf2 = pd.DataFrame({"C": [1, 2, 3], "D": [4, 5, 6]}, index=[1, 3, 5])
-        pdf2.columns.names = ["CD"]
-        psdf1 = ps.from_pandas(pdf1)
-        psdf2 = ps.from_pandas(pdf2)
-
-        psdf3 = psdf1.copy()
-        psdf4 = psdf2.copy()
-        pdf3 = pdf1.copy()
-        pdf4 = pdf2.copy()
-
-        columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B")], 
names=["X", "AB"])
-        pdf3.columns = columns
-        psdf3.columns = columns
-
-        columns = pd.MultiIndex.from_tuples([("X", "C"), ("X", "D")], 
names=["Y", "CD"])
-        pdf4.columns = columns
-        psdf4.columns = columns
-
-        pdf5 = pd.DataFrame({"A": [0, 2, 4], "B": [1, 3, 5]}, index=[1, 2, 3])
-        pdf6 = pd.DataFrame({"C": [1, 2, 3]}, index=[1, 3, 5])
-        psdf5 = ps.from_pandas(pdf5)
-        psdf6 = ps.from_pandas(pdf6)
-
-        ignore_indexes = [True, False]
-        joins = ["inner", "outer"]
-
-        objs = [
-            ([psdf1.A, psdf2.C], [pdf1.A, pdf2.C]),
-            # TODO: ([psdf1, psdf2.C], [pdf1, pdf2.C]),
-            ([psdf1.A, psdf2], [pdf1.A, pdf2]),
-            ([psdf1.A, psdf2.C], [pdf1.A, pdf2.C]),
-            ([psdf3[("X", "A")], psdf4[("X", "C")]], [pdf3[("X", "A")], 
pdf4[("X", "C")]]),
-            ([psdf3, psdf4[("X", "C")]], [pdf3, pdf4[("X", "C")]]),
-            ([psdf3[("X", "A")], psdf4], [pdf3[("X", "A")], pdf4]),
-            ([psdf3, psdf4], [pdf3, pdf4]),
-            ([psdf5, psdf6], [pdf5, pdf6]),
-            ([psdf6, psdf5], [pdf6, pdf5]),
-        ]
-
-        for ignore_index, join in product(ignore_indexes, joins):
-            for i, (psdfs, pdfs) in enumerate(objs):
-                with self.subTest(ignore_index=ignore_index, join=join, 
pdfs=pdfs, pair=i):
-                    actual = ps.concat(psdfs, axis=1, 
ignore_index=ignore_index, join=join)
-                    expected = pd.concat(pdfs, axis=1, 
ignore_index=ignore_index, join=join)
-                    self.assert_eq(
-                        
repr(actual.sort_values(list(actual.columns)).reset_index(drop=True)),
-                        
repr(expected.sort_values(list(expected.columns)).reset_index(drop=True)),
-                    )
-                    actual = ps.concat(
-                        psdfs, axis=1, ignore_index=ignore_index, join=join, 
sort=True
-                    )
-                    expected = pd.concat(
-                        pdfs, axis=1, ignore_index=ignore_index, join=join, 
sort=True
-                    )
-                    self.assert_eq(
-                        repr(actual.reset_index(drop=True)),
-                        repr(expected.reset_index(drop=True)),
-                    )
-
-    def test_combine_first(self):
-        pser1 = pd.Series({"falcon": 330.0, "eagle": 160.0})
-        pser2 = pd.Series({"falcon": 345.0, "eagle": 200.0, "duck": 30.0})
-        psser1 = ps.from_pandas(pser1)
-        psser2 = ps.from_pandas(pser2)
-
-        self.assert_eq(
-            psser1.combine_first(psser2).sort_index(), 
pser1.combine_first(pser2).sort_index()
-        )
-        with self.assertRaisesRegex(
-            TypeError, "`combine_first` only allows `Series` for parameter 
`other`"
-        ):
-            psser1.combine_first(50)
-
-        psser1.name = ("X", "A")
-        psser2.name = ("Y", "B")
-        pser1.name = ("X", "A")
-        pser2.name = ("Y", "B")
-        self.assert_eq(
-            psser1.combine_first(psser2).sort_index(), 
pser1.combine_first(pser2).sort_index()
-        )
-
-        # MultiIndex
-        midx1 = pd.MultiIndex(
-            [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length", 
"power"]],
-            [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]],
-        )
-        midx2 = pd.MultiIndex(
-            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
-            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
-        )
-        pser1 = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx1)
-        pser2 = pd.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3], 
index=midx2)
-        psser1 = ps.from_pandas(pser1)
-        psser2 = ps.from_pandas(pser2)
-
-        self.assert_eq(
-            psser1.combine_first(psser2).sort_index(), 
pser1.combine_first(pser2).sort_index()
-        )
-
-        # DataFrame
-        pdf1 = pd.DataFrame({"A": [None, 0], "B": [4, None]})
-        psdf1 = ps.from_pandas(pdf1)
-        pdf2 = pd.DataFrame({"C": [3, 3], "B": [1, 1]})
-        psdf2 = ps.from_pandas(pdf2)
-
-        self.assert_eq(pdf1.combine_first(pdf2), 
psdf1.combine_first(psdf2).sort_index())
-
-        pdf1.columns = pd.MultiIndex.from_tuples([("A", "willow"), ("B", 
"pine")])
-        psdf1 = ps.from_pandas(pdf1)
-        pdf2.columns = pd.MultiIndex.from_tuples([("C", "oak"), ("B", "pine")])
-        psdf2 = ps.from_pandas(pdf2)
-
-        self.assert_eq(pdf1.combine_first(pdf2), 
psdf1.combine_first(psdf2).sort_index())
-
     def test_insert(self):
         #
         # Basic DataFrame
@@ -443,104 +268,6 @@ class OpsOnDiffFramesEnabledTestsMixin:
         pdf.insert(0, ("b", "c", ""), pser)
         self.assert_eq(psdf.sort_index(), pdf.sort_index())
 
-    def test_compare(self):
-        pser1 = pd.Series(["b", "c", np.nan, "g", np.nan])
-        pser2 = pd.Series(["a", "c", np.nan, np.nan, "h"])
-        psser1 = ps.from_pandas(pser1)
-        psser2 = ps.from_pandas(pser2)
-        self.assert_eq(
-            pser1.compare(pser2).sort_index(),
-            psser1.compare(psser2).sort_index(),
-        )
-
-        # `keep_shape=True`
-        self.assert_eq(
-            pser1.compare(pser2, keep_shape=True).sort_index(),
-            psser1.compare(psser2, keep_shape=True).sort_index(),
-        )
-        # `keep_equal=True`
-        self.assert_eq(
-            pser1.compare(pser2, keep_equal=True).sort_index(),
-            psser1.compare(psser2, keep_equal=True).sort_index(),
-        )
-        # `keep_shape=True` and `keep_equal=True`
-        self.assert_eq(
-            pser1.compare(pser2, keep_shape=True, 
keep_equal=True).sort_index(),
-            psser1.compare(psser2, keep_shape=True, 
keep_equal=True).sort_index(),
-        )
-
-        # MultiIndex
-        pser1.index = pd.MultiIndex.from_tuples(
-            [("a", "x"), ("b", "y"), ("c", "z"), ("x", "k"), ("q", "l")]
-        )
-        pser2.index = pd.MultiIndex.from_tuples(
-            [("a", "x"), ("b", "y"), ("c", "z"), ("x", "k"), ("q", "l")]
-        )
-        psser1 = ps.from_pandas(pser1)
-        psser2 = ps.from_pandas(pser2)
-        self.assert_eq(
-            pser1.compare(pser2).sort_index(),
-            psser1.compare(psser2).sort_index(),
-        )
-
-        # `keep_shape=True` with MultiIndex
-        self.assert_eq(
-            pser1.compare(pser2, keep_shape=True).sort_index(),
-            psser1.compare(psser2, keep_shape=True).sort_index(),
-        )
-        # `keep_equal=True` with MultiIndex
-        self.assert_eq(
-            pser1.compare(pser2, keep_equal=True).sort_index(),
-            psser1.compare(psser2, keep_equal=True).sort_index(),
-        )
-        # `keep_shape=True` and `keep_equal=True` with MultiIndex
-        self.assert_eq(
-            pser1.compare(pser2, keep_shape=True, 
keep_equal=True).sort_index(),
-            psser1.compare(psser2, keep_shape=True, 
keep_equal=True).sort_index(),
-        )
-
-        # Different Index
-        with self.assertRaisesRegex(
-            ValueError, "Can only compare identically-labeled Series objects"
-        ):
-            psser1 = ps.Series(
-                [1, 2, 3, 4, 5],
-                index=pd.Index([1, 2, 3, 4, 5]),
-            )
-            psser2 = ps.Series(
-                [2, 2, 3, 4, 1],
-                index=pd.Index([5, 4, 3, 2, 1]),
-            )
-            psser1.compare(psser2)
-        # Different MultiIndex
-        with self.assertRaisesRegex(
-            ValueError, "Can only compare identically-labeled Series objects"
-        ):
-            psser1 = ps.Series(
-                [1, 2, 3, 4, 5],
-                index=pd.MultiIndex.from_tuples(
-                    [("a", "x"), ("b", "y"), ("c", "z"), ("x", "k"), ("q", 
"l")]
-                ),
-            )
-            psser2 = ps.Series(
-                [2, 2, 3, 4, 1],
-                index=pd.MultiIndex.from_tuples(
-                    [("a", "x"), ("b", "y"), ("c", "a"), ("x", "k"), ("q", 
"l")]
-                ),
-            )
-            psser1.compare(psser2)
-        # SPARK-37495: Skip identical index checking of Series.compare when 
config
-        # 'compute.eager_check' is disabled
-        psser1 = ps.Series([1, 2, 3, 4, 5], index=pd.Index([1, 2, 3, 4, 5]))
-        psser2 = ps.Series([1, 2, 3, 4, 5, 6], index=pd.Index([1, 2, 4, 3, 6, 
7]))
-        expected = ps.DataFrame(
-            {"self": [3, 4, 5, np.nan, np.nan], "other": [4, 3, np.nan, 5.0, 
6.0]},
-            index=[3, 4, 5, 6, 7],
-        )
-
-        with ps.option_context("compute.eager_check", False):
-            self.assert_eq(expected, psser1.compare(psser2))
-
     def test_different_columns(self):
         psdf1 = self.psdf1
         psdf4 = self.psdf4


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-46471][PS][TESTS][FOLLOWUPS] Reorganize `OpsOnDiffFramesEnabledTests`: Factor out more tests

Reply via email to