itholic commented on a change in pull request #34179:
URL: https://github.com/apache/spark/pull/34179#discussion_r721920147



##########
File path: python/pyspark/pandas/tests/test_dataframe.py
##########
@@ -6000,6 +6000,14 @@ def test_combine_first(self):
             expected_pdf = pd.DataFrame({"A": [None, 0], "B": [4.0, 1.0], "C": 
[3, 3]})
             self.assert_eq(expected_pdf, psdf1.combine_first(psdf2))
 
+    def test_multi_index_dtypes(self):
+        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
+        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
+        pdf = pd.DataFrame(np.random.randn(4, 5), idx)
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.index.dtypes, pdf.index.dtypes)

Review comment:
       nit: Can we just compare with MultiIndex itself ??
   
   For example,
   
   ```python
           arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
           pmidx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
           psmidx = ps.from_pandas(pmidx)
   
           self.assert_eq(psmidx.dtypes, pmidx.dtypes)
   ```

##########
File path: python/pyspark/pandas/indexes/multi.py
##########
@@ -375,6 +375,15 @@ def name(self) -> Name:
     def name(self, name: Name) -> None:
         raise PandasNotImplementedError(class_name="pd.MultiIndex", 
property_name="name")
 
+    @property
+    def dtypes(self) -> Series:
+        return pd.Series(
+            [field.dtype for field in self._internal.index_fields],
+            index=pd.Index(
+                [name if len(name) > 1 else name[0] for name in 
self._internal.index_names]
+            ),
+        )

Review comment:
       Can we create pandas-on-Spark Series rather than pandas Series ??
   
   For example,
   
   ```python
   return ps.Series(
       [field.dtype.name for field in self._internal.index_fields],
       index=[name if len(name) > 1 else name[0] for name in 
self._internal.index_names])
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to