(spark) branch master updated: [SPARK-46504][PS][TESTS][FOLLOWUP] Break the remaining part of `IndexesTests` into small test files

ruifengz Wed, 03 Jan 2024 22:34:07 -0800

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 59d147a4f48f [SPARK-46504][PS][TESTS][FOLLOWUP] Break the remaining 
part of `IndexesTests` into small test files
59d147a4f48f is described below

commit 59d147a4f48ff6112c682e9797dbd982022bfc10
Author: Ruifeng Zheng <ruife...@apache.org>
AuthorDate: Thu Jan 4 14:33:42 2024 +0800

    [SPARK-46504][PS][TESTS][FOLLOWUP] Break the remaining part of 
`IndexesTests` into small test files
    
    ### What changes were proposed in this pull request?
    Break the remaining part of `IndexesTests` into small test files
    
    ### Why are the changes needed?
    testing parallelism
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #44588 from zhengruifeng/ps_test_idx_base_lastlast.
    
    Authored-by: Ruifeng Zheng <ruife...@apache.org>
    Signed-off-by: Ruifeng Zheng <ruife...@apache.org>
---
 dev/sparktestsupport/modules.py                    |   8 +-
 .../{test_parity_base.py => test_parity_basic.py}  |  17 +-
 ...{test_parity_base.py => test_parity_getattr.py} |  17 +-
 .../{test_parity_base.py => test_parity_name.py}   |  17 +-
 .../tests/indexes/{test_base.py => test_basic.py}  | 155 +----------------
 .../pyspark/pandas/tests/indexes/test_getattr.py   |  79 +++++++++
 python/pyspark/pandas/tests/indexes/test_name.py   | 183 +++++++++++++++++++++
 7 files changed, 296 insertions(+), 180 deletions(-)

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index a97e6afdc356..699a9d07452d 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -795,7 +795,9 @@ pyspark_pandas_slow = Module(
         "pyspark.pandas.generic",
         "pyspark.pandas.series",
         # unittests
-        "pyspark.pandas.tests.indexes.test_base",
+        "pyspark.pandas.tests.indexes.test_basic",
+        "pyspark.pandas.tests.indexes.test_getattr",
+        "pyspark.pandas.tests.indexes.test_name",
         "pyspark.pandas.tests.indexes.test_conversion",
         "pyspark.pandas.tests.indexes.test_drop",
         "pyspark.pandas.tests.indexes.test_level",
@@ -1095,7 +1097,9 @@ pyspark_pandas_connect_part0 = Module(
         "pyspark.pandas.tests.connect.test_parity_sql",
         "pyspark.pandas.tests.connect.test_parity_typedef",
         "pyspark.pandas.tests.connect.test_parity_utils",
-        "pyspark.pandas.tests.connect.indexes.test_parity_base",
+        "pyspark.pandas.tests.connect.indexes.test_parity_basic",
+        "pyspark.pandas.tests.connect.indexes.test_parity_getattr",
+        "pyspark.pandas.tests.connect.indexes.test_parity_name",
         "pyspark.pandas.tests.connect.indexes.test_parity_conversion",
         "pyspark.pandas.tests.connect.indexes.test_parity_drop",
         "pyspark.pandas.tests.connect.indexes.test_parity_level",
diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py 
b/python/pyspark/pandas/tests/connect/indexes/test_parity_basic.py
similarity index 72%
copy from python/pyspark/pandas/tests/connect/indexes/test_parity_base.py
copy to python/pyspark/pandas/tests/connect/indexes/test_parity_basic.py
index 83ce92eb34b2..94651552ea8d 100644
--- a/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py
+++ b/python/pyspark/pandas/tests/connect/indexes/test_parity_basic.py
@@ -16,22 +16,21 @@
 #
 import unittest
 
-from pyspark import pandas as ps
-from pyspark.pandas.tests.indexes.test_base import IndexesTestsMixin
+from pyspark.pandas.tests.indexes.test_basic import IndexBasicMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
-class IndexesParityTests(
-    IndexesTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
+class IndexBasicParityTests(
+    IndexBasicMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
 ):
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
+    pass
 
 
 if __name__ == "__main__":
-    from pyspark.pandas.tests.connect.indexes.test_parity_base import *  # 
noqa: F401
+    from pyspark.pandas.tests.connect.indexes.test_parity_basic import *  # 
noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py 
b/python/pyspark/pandas/tests/connect/indexes/test_parity_getattr.py
similarity index 72%
copy from python/pyspark/pandas/tests/connect/indexes/test_parity_base.py
copy to python/pyspark/pandas/tests/connect/indexes/test_parity_getattr.py
index 83ce92eb34b2..47d893bda3be 100644
--- a/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py
+++ b/python/pyspark/pandas/tests/connect/indexes/test_parity_getattr.py
@@ -16,22 +16,21 @@
 #
 import unittest
 
-from pyspark import pandas as ps
-from pyspark.pandas.tests.indexes.test_base import IndexesTestsMixin
+from pyspark.pandas.tests.indexes.test_getattr import IndexGetattrMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
-class IndexesParityTests(
-    IndexesTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
+class IndexGetattrParityTests(
+    IndexGetattrMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
 ):
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
+    pass
 
 
 if __name__ == "__main__":
-    from pyspark.pandas.tests.connect.indexes.test_parity_base import *  # 
noqa: F401
+    from pyspark.pandas.tests.connect.indexes.test_parity_getattr import *  # 
noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py 
b/python/pyspark/pandas/tests/connect/indexes/test_parity_name.py
similarity index 73%
rename from python/pyspark/pandas/tests/connect/indexes/test_parity_base.py
rename to python/pyspark/pandas/tests/connect/indexes/test_parity_name.py
index 83ce92eb34b2..c4cbc8cbfbb1 100644
--- a/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py
+++ b/python/pyspark/pandas/tests/connect/indexes/test_parity_name.py
@@ -16,22 +16,21 @@
 #
 import unittest
 
-from pyspark import pandas as ps
-from pyspark.pandas.tests.indexes.test_base import IndexesTestsMixin
+from pyspark.pandas.tests.indexes.test_name import IndexNameMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
-class IndexesParityTests(
-    IndexesTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
+class IndexNameParityTests(
+    IndexNameMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
 ):
-    @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
+    pass
 
 
 if __name__ == "__main__":
-    from pyspark.pandas.tests.connect.indexes.test_parity_base import *  # 
noqa: F401
+    from pyspark.pandas.tests.connect.indexes.test_parity_name import *  # 
noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/pandas/tests/indexes/test_base.py 
b/python/pyspark/pandas/tests/indexes/test_basic.py
similarity index 56%
rename from python/pyspark/pandas/tests/indexes/test_base.py
rename to python/pyspark/pandas/tests/indexes/test_basic.py
index 6671e3d93f75..a8f2c5b97c42 100644
--- a/python/pyspark/pandas/tests/indexes/test_base.py
+++ b/python/pyspark/pandas/tests/indexes/test_basic.py
@@ -22,12 +22,11 @@ import numpy as np
 import pandas as pd
 
 import pyspark.pandas as ps
-from pyspark.loose_version import LooseVersion
 from pyspark.pandas.exceptions import PandasNotImplementedError
 from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils, 
SPARK_CONF_ARROW_ENABLED
 
 
-class IndexesTestsMixin:
+class IndexBasicMixin:
     @property
     def pdf(self):
         return pd.DataFrame(
@@ -66,109 +65,6 @@ class IndexesTestsMixin:
         with self.assertRaisesRegex(TypeError, "Index.name must be a hashable 
type"):
             ps.Index([1.0, 2.0, 3.0], name=[(1, 2, 3)])
 
-    def test_index_getattr(self):
-        psidx = self.psdf.index
-        item = "databricks"
-
-        expected_error_message = "'.*Index' object has no attribute 
'{}'".format(item)
-        with self.assertRaisesRegex(AttributeError, expected_error_message):
-            psidx.__getattr__(item)
-        with self.assertRaisesRegex(AttributeError, expected_error_message):
-            ps.from_pandas(pd.date_range("2011-01-01", freq="D", 
periods=10)).__getattr__(item)
-
-    def test_multi_index_getattr(self):
-        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
-        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
-        pdf = pd.DataFrame(np.random.randn(4, 5), idx)
-        psdf = ps.from_pandas(pdf)
-        psidx = psdf.index
-        item = "databricks"
-
-        expected_error_message = "'MultiIndex' object has no attribute 
'{}'".format(item)
-        with self.assertRaisesRegex(AttributeError, expected_error_message):
-            psidx.__getattr__(item)
-
-    def test_index_names(self):
-        psdf = self.psdf
-        self.assertIsNone(psdf.index.name)
-
-        idx = pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], name="x")
-        pdf = pd.DataFrame(np.random.randn(10, 5), index=idx, 
columns=list("abcde"))
-        psdf = ps.from_pandas(pdf)
-
-        pser = pdf.a
-        psser = psdf.a
-
-        self.assertEqual(psdf.index.name, pdf.index.name)
-        self.assertEqual(psdf.index.names, pdf.index.names)
-
-        pidx = pdf.index
-        psidx = psdf.index
-        pidx.name = "renamed"
-        psidx.name = "renamed"
-        self.assertEqual(psidx.name, pidx.name)
-        self.assertEqual(psidx.names, pidx.names)
-        self.assert_eq(psidx, pidx)
-        self.assertEqual(psdf.index.name, pdf.index.name)
-        self.assertEqual(psdf.index.names, pdf.index.names)
-        self.assertEqual(psser.index.names, pser.index.names)
-
-        pidx.name = None
-        psidx.name = None
-        self.assertEqual(psidx.name, pidx.name)
-        self.assertEqual(psidx.names, pidx.names)
-        self.assert_eq(psidx, pidx)
-        self.assertEqual(psdf.index.name, pdf.index.name)
-        self.assertEqual(psdf.index.names, pdf.index.names)
-        self.assertEqual(psser.index.names, pser.index.names)
-
-        with self.assertRaisesRegex(ValueError, "Names must be a list-like"):
-            psidx.names = "hi"
-
-        expected_error_message = "Length of new names must be {}, got 
{}".format(
-            psdf._internal.index_level, len(["0", "1"])
-        )
-        with self.assertRaisesRegex(ValueError, expected_error_message):
-            psidx.names = ["0", "1"]
-
-        expected_error_message = "Index.name must be a hashable type"
-        with self.assertRaisesRegex(TypeError, expected_error_message):
-            ps.Index([1, 2, 3], name=["0", "1"])
-        with self.assertRaisesRegex(TypeError, expected_error_message):
-            psidx.name = ["renamed"]
-        with self.assertRaisesRegex(TypeError, expected_error_message):
-            psidx.name = ["0", "1"]
-        # Specifying `names` when creating Index is no longer supported from 
pandas 2.0.0.
-        if LooseVersion(pd.__version__) >= LooseVersion("2.0.0"):
-            pass
-        else:
-            with self.assertRaisesRegex(TypeError, expected_error_message):
-                ps.Index([(1, 2), (3, 4)], names=["a", ["b"]])
-
-    def test_multi_index_names(self):
-        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
-        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
-        pdf = pd.DataFrame(np.random.randn(4, 5), idx)
-        psdf = ps.from_pandas(pdf)
-
-        self.assertEqual(psdf.index.names, pdf.index.names)
-
-        pidx = pdf.index
-        psidx = psdf.index
-        pidx.names = ["renamed_number", "renamed_color"]
-        psidx.names = ["renamed_number", "renamed_color"]
-        self.assertEqual(psidx.names, pidx.names)
-
-        pidx.names = ["renamed_number", None]
-        psidx.names = ["renamed_number", None]
-        self.assertEqual(psidx.names, pidx.names)
-        self.assert_eq(psidx, pidx)
-
-        with self.assertRaises(PandasNotImplementedError):
-            psidx.name
-        with self.assertRaises(PandasNotImplementedError):
-            psidx.name = "renamed"
-
     def test_multi_index_copy(self):
         arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
         idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
@@ -177,49 +73,6 @@ class IndexesTestsMixin:
 
         self.assert_eq(psdf.index.copy(), pdf.index.copy())
 
-    def test_multiindex_set_names(self):
-        pidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", 
"z", 3)])
-        psidx = ps.from_pandas(pidx)
-
-        pidx = pidx.set_names(["set", "new", "names"])
-        psidx = psidx.set_names(["set", "new", "names"])
-        self.assert_eq(pidx, psidx)
-
-        pidx.set_names(["set", "new", "names"], inplace=True)
-        psidx.set_names(["set", "new", "names"], inplace=True)
-        self.assert_eq(pidx, psidx)
-
-        pidx = pidx.set_names("first", level=0)
-        psidx = psidx.set_names("first", level=0)
-        self.assert_eq(pidx, psidx)
-
-        pidx = pidx.set_names("second", level=1)
-        psidx = psidx.set_names("second", level=1)
-        self.assert_eq(pidx, psidx)
-
-        pidx = pidx.set_names("third", level=2)
-        psidx = psidx.set_names("third", level=2)
-        self.assert_eq(pidx, psidx)
-
-        pidx.set_names("first", level=0, inplace=True)
-        psidx.set_names("first", level=0, inplace=True)
-        self.assert_eq(pidx, psidx)
-
-        pidx.set_names("second", level=1, inplace=True)
-        psidx.set_names("second", level=1, inplace=True)
-        self.assert_eq(pidx, psidx)
-
-        pidx.set_names("third", level=2, inplace=True)
-        psidx.set_names("third", level=2, inplace=True)
-        self.assert_eq(pidx, psidx)
-
-    def test_multiindex_tuple_column_name(self):
-        column_labels = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y"), 
("b", "z")])
-        pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], 
columns=column_labels)
-        pdf.set_index(("a", "x"), append=True, inplace=True)
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf, psdf)
-
     def test_holds_integer(self):
         pidx = pd.Index([1, 2, 3, 4])
         psidx = ps.from_pandas(pidx)
@@ -347,8 +200,8 @@ class IndexesTestsMixin:
         self.assertRaises(PandasNotImplementedError, lambda: 
psmidx.factorize())
 
 
-class IndexesTests(
-    IndexesTestsMixin,
+class IndexBasicTests(
+    IndexBasicMixin,
     PandasOnSparkTestCase,
     TestUtils,
 ):
@@ -356,7 +209,7 @@ class IndexesTests(
 
 
 if __name__ == "__main__":
-    from pyspark.pandas.tests.indexes.test_base import *  # noqa: F401
+    from pyspark.pandas.tests.indexes.test_basic import *  # noqa: F401
 
     try:
         import xmlrunner
diff --git a/python/pyspark/pandas/tests/indexes/test_getattr.py 
b/python/pyspark/pandas/tests/indexes/test_getattr.py
new file mode 100644
index 000000000000..8632aa5dfeba
--- /dev/null
+++ b/python/pyspark/pandas/tests/indexes/test_getattr.py
@@ -0,0 +1,79 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+import numpy as np
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class IndexGetattrMixin:
+    @property
+    def pdf(self):
+        return pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 
0]},
+            index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
+        )
+
+    @property
+    def psdf(self):
+        return ps.from_pandas(self.pdf)
+
+    def test_index_getattr(self):
+        psidx = self.psdf.index
+        item = "databricks"
+
+        expected_error_message = "'.*Index' object has no attribute 
'{}'".format(item)
+        with self.assertRaisesRegex(AttributeError, expected_error_message):
+            psidx.__getattr__(item)
+        with self.assertRaisesRegex(AttributeError, expected_error_message):
+            ps.from_pandas(pd.date_range("2011-01-01", freq="D", 
periods=10)).__getattr__(item)
+
+    def test_multi_index_getattr(self):
+        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
+        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
+        pdf = pd.DataFrame(np.random.randn(4, 5), idx)
+        psdf = ps.from_pandas(pdf)
+        psidx = psdf.index
+        item = "databricks"
+
+        expected_error_message = "'MultiIndex' object has no attribute 
'{}'".format(item)
+        with self.assertRaisesRegex(AttributeError, expected_error_message):
+            psidx.__getattr__(item)
+
+
+class IndexGetattrTests(
+    IndexGetattrMixin,
+    PandasOnSparkTestCase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.indexes.test_getattr import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/indexes/test_name.py 
b/python/pyspark/pandas/tests/indexes/test_name.py
new file mode 100644
index 000000000000..cacf3efcb38b
--- /dev/null
+++ b/python/pyspark/pandas/tests/indexes/test_name.py
@@ -0,0 +1,183 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+import numpy as np
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.loose_version import LooseVersion
+from pyspark.pandas.exceptions import PandasNotImplementedError
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class IndexNameMixin:
+    @property
+    def pdf(self):
+        return pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 
0]},
+            index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
+        )
+
+    @property
+    def psdf(self):
+        return ps.from_pandas(self.pdf)
+
+    def test_index_names(self):
+        psdf = self.psdf
+        self.assertIsNone(psdf.index.name)
+
+        idx = pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], name="x")
+        pdf = pd.DataFrame(np.random.randn(10, 5), index=idx, 
columns=list("abcde"))
+        psdf = ps.from_pandas(pdf)
+
+        pser = pdf.a
+        psser = psdf.a
+
+        self.assertEqual(psdf.index.name, pdf.index.name)
+        self.assertEqual(psdf.index.names, pdf.index.names)
+
+        pidx = pdf.index
+        psidx = psdf.index
+        pidx.name = "renamed"
+        psidx.name = "renamed"
+        self.assertEqual(psidx.name, pidx.name)
+        self.assertEqual(psidx.names, pidx.names)
+        self.assert_eq(psidx, pidx)
+        self.assertEqual(psdf.index.name, pdf.index.name)
+        self.assertEqual(psdf.index.names, pdf.index.names)
+        self.assertEqual(psser.index.names, pser.index.names)
+
+        pidx.name = None
+        psidx.name = None
+        self.assertEqual(psidx.name, pidx.name)
+        self.assertEqual(psidx.names, pidx.names)
+        self.assert_eq(psidx, pidx)
+        self.assertEqual(psdf.index.name, pdf.index.name)
+        self.assertEqual(psdf.index.names, pdf.index.names)
+        self.assertEqual(psser.index.names, pser.index.names)
+
+        with self.assertRaisesRegex(ValueError, "Names must be a list-like"):
+            psidx.names = "hi"
+
+        expected_error_message = "Length of new names must be {}, got 
{}".format(
+            psdf._internal.index_level, len(["0", "1"])
+        )
+        with self.assertRaisesRegex(ValueError, expected_error_message):
+            psidx.names = ["0", "1"]
+
+        expected_error_message = "Index.name must be a hashable type"
+        with self.assertRaisesRegex(TypeError, expected_error_message):
+            ps.Index([1, 2, 3], name=["0", "1"])
+        with self.assertRaisesRegex(TypeError, expected_error_message):
+            psidx.name = ["renamed"]
+        with self.assertRaisesRegex(TypeError, expected_error_message):
+            psidx.name = ["0", "1"]
+        # Specifying `names` when creating Index is no longer supported from 
pandas 2.0.0.
+        if LooseVersion(pd.__version__) >= LooseVersion("2.0.0"):
+            pass
+        else:
+            with self.assertRaisesRegex(TypeError, expected_error_message):
+                ps.Index([(1, 2), (3, 4)], names=["a", ["b"]])
+
+    def test_multi_index_names(self):
+        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
+        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
+        pdf = pd.DataFrame(np.random.randn(4, 5), idx)
+        psdf = ps.from_pandas(pdf)
+
+        self.assertEqual(psdf.index.names, pdf.index.names)
+
+        pidx = pdf.index
+        psidx = psdf.index
+        pidx.names = ["renamed_number", "renamed_color"]
+        psidx.names = ["renamed_number", "renamed_color"]
+        self.assertEqual(psidx.names, pidx.names)
+
+        pidx.names = ["renamed_number", None]
+        psidx.names = ["renamed_number", None]
+        self.assertEqual(psidx.names, pidx.names)
+        self.assert_eq(psidx, pidx)
+
+        with self.assertRaises(PandasNotImplementedError):
+            psidx.name
+        with self.assertRaises(PandasNotImplementedError):
+            psidx.name = "renamed"
+
+    def test_multiindex_set_names(self):
+        pidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", 
"z", 3)])
+        psidx = ps.from_pandas(pidx)
+
+        pidx = pidx.set_names(["set", "new", "names"])
+        psidx = psidx.set_names(["set", "new", "names"])
+        self.assert_eq(pidx, psidx)
+
+        pidx.set_names(["set", "new", "names"], inplace=True)
+        psidx.set_names(["set", "new", "names"], inplace=True)
+        self.assert_eq(pidx, psidx)
+
+        pidx = pidx.set_names("first", level=0)
+        psidx = psidx.set_names("first", level=0)
+        self.assert_eq(pidx, psidx)
+
+        pidx = pidx.set_names("second", level=1)
+        psidx = psidx.set_names("second", level=1)
+        self.assert_eq(pidx, psidx)
+
+        pidx = pidx.set_names("third", level=2)
+        psidx = psidx.set_names("third", level=2)
+        self.assert_eq(pidx, psidx)
+
+        pidx.set_names("first", level=0, inplace=True)
+        psidx.set_names("first", level=0, inplace=True)
+        self.assert_eq(pidx, psidx)
+
+        pidx.set_names("second", level=1, inplace=True)
+        psidx.set_names("second", level=1, inplace=True)
+        self.assert_eq(pidx, psidx)
+
+        pidx.set_names("third", level=2, inplace=True)
+        psidx.set_names("third", level=2, inplace=True)
+        self.assert_eq(pidx, psidx)
+
+    def test_multiindex_tuple_column_name(self):
+        column_labels = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y"), 
("b", "z")])
+        pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], 
columns=column_labels)
+        pdf.set_index(("a", "x"), append=True, inplace=True)
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf, psdf)
+
+
+class IndexNameTests(
+    IndexNameMixin,
+    PandasOnSparkTestCase,
+    SQLTestUtils,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.indexes.test_name import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-46504][PS][TESTS][FOLLOWUP] Break the remaining part of `IndexesTests` into small test files

Reply via email to