ueshin commented on a change in pull request #33471:
URL: https://github.com/apache/spark/pull/33471#discussion_r675006227
##########
File path: python/pyspark/pandas/tests/test_categorical.py
##########
@@ -560,6 +560,38 @@ def test_unstack(self):
self.assert_eq(psdf.a.unstack().sort_index(),
pdf.a.unstack().sort_index())
self.assert_eq(psdf.b.unstack().sort_index(),
pdf.b.unstack().sort_index())
+ def test_rename_categories(self):
+ pdf, psdf = self.df_pair
+
+ pser = pdf.b
+ psser = psdf.b
+
+ self.assert_eq(
+ pser.cat.rename_categories([0, 1, 3, 2]),
psser.cat.rename_categories([0, 1, 3, 2])
+ )
+ self.assert_eq(
+ pser.cat.rename_categories({"a": "A", "c": "C"}),
+ psser.cat.rename_categories({"a": "A", "c": "C"}),
+ )
+ self.assert_eq(
+ pser.cat.rename_categories(lambda x: x.upper()),
+ psser.cat.rename_categories(lambda x: x.upper()),
+ )
+ pser.cat.rename_categories({"a": "A", "c": "C"}, inplace=True)
+ psser.cat.rename_categories({"a": "A", "c": "C"}, inplace=True)
+ self.assert_eq(pser, psser)
Review comment:
Could you check `pdf` as well after `inplace=True`?
##########
File path: python/pyspark/pandas/indexes/category.py
##########
@@ -364,6 +364,76 @@ def __getattr__(self, item: str) -> Any:
return partial(property_or_func, self)
raise AttributeError("'CategoricalIndex' object has no attribute
'{}'".format(item))
+ def rename_categories(
+ self, new_categories: Union[list, dict, Callable], inplace: bool =
False
+ ) -> Optional["CategoricalIndex"]:
+ """
+ Rename categories.
+
+ Parameters
+ ----------
+ new_categories : list-like, dict-like or callable
+
+ New categories which will replace old categories.
+
+ * list-like: all items must be unique and the number of items in
+ the new categories must match the existing number of categories.
+
+ * dict-like: specifies a mapping from
+ old categories to new. Categories not contained in the mapping
+ are passed through and extra categories in the mapping are
+ ignored.
+
+ * callable : a callable that is called on all items in the old
+ categories and whose return values comprise the new categories.
+
+ inplace : bool, default False
+ Whether or not to rename the categories inplace or return a copy of
+ this categorical with renamed categories.
+
+ Returns
+ -------
+ cat : CategoricalIndex or None
+ Categorical with removed categories or None if ``inplace=True``.
+
+ Raises
+ ------
+ ValueError
+ If new categories are list-like and do not have the same number of
+ items than the current categories or do not validate as categories
+
+ See Also
+ --------
+ reorder_categories : Reorder categories.
+ add_categories : Add new categories.
+ remove_categories : Remove the specified categories.
+ remove_unused_categories : Remove categories which are not used.
+ set_categories : Set the categories to the specified ones.
+
+ Examples
+ --------
+ >>> psser = ps.CategoricalIndex(["a", "a", "b"])
Review comment:
`idx` instead of `psser`?
##########
File path: python/pyspark/pandas/tests/test_categorical.py
##########
@@ -560,6 +560,38 @@ def test_unstack(self):
self.assert_eq(psdf.a.unstack().sort_index(),
pdf.a.unstack().sort_index())
self.assert_eq(psdf.b.unstack().sort_index(),
pdf.b.unstack().sort_index())
+ def test_rename_categories(self):
+ pdf, psdf = self.df_pair
+
+ pser = pdf.b
+ psser = psdf.b
+
+ self.assert_eq(
+ pser.cat.rename_categories([0, 1, 3, 2]),
psser.cat.rename_categories([0, 1, 3, 2])
+ )
+ self.assert_eq(
+ pser.cat.rename_categories({"a": "A", "c": "C"}),
+ psser.cat.rename_categories({"a": "A", "c": "C"}),
+ )
+ self.assert_eq(
+ pser.cat.rename_categories(lambda x: x.upper()),
+ psser.cat.rename_categories(lambda x: x.upper()),
+ )
+ pser.cat.rename_categories({"a": "A", "c": "C"}, inplace=True)
+ psser.cat.rename_categories({"a": "A", "c": "C"}, inplace=True)
+ self.assert_eq(pser, psser)
+ pser.cat.rename_categories(lambda x: x.upper(), inplace=True)
+ psser.cat.rename_categories(lambda x: x.upper(), inplace=True)
+ self.assert_eq(pser, psser)
+ pser.cat.rename_categories(lambda x: x.upper(), inplace=True)
+ psser.cat.rename_categories(lambda x: x.upper(), inplace=True)
+ self.assert_eq(pser, psser)
+ self.assertRaisesRegex(
+ ValueError,
+ "new categories need to have the same number of items as the old
categories",
+ lambda: psser.cat.rename_categories([0, 1, 2]),
+ )
+
Review comment:
Could you add more negative cases? e.g., calling with `None`, `1`, etc.
##########
File path: python/pyspark/pandas/tests/indexes/test_category.py
##########
@@ -257,6 +257,19 @@ def test_insert(self):
self.assert_eq(psidx.insert(1, "w"), pidx.insert(1, "w"))
+ def test_rename_categories(self):
+ pidx = pd.CategoricalIndex(["a", "b", "c", "d"])
+ psidx = ps.from_pandas(pidx)
+ self.assert_eq(pidx.rename_categories([0, 1, 3, 2]),
psidx.rename_categories([0, 1, 3, 2]))
+ self.assert_eq(
+ pidx.rename_categories({"a": "A", "c": "C"}),
+ psidx.rename_categories({"a": "A", "c": "C"}),
+ )
+ self.assert_eq(
+ pidx.rename_categories(lambda x: x.upper()),
+ psidx.rename_categories(lambda x: x.upper()),
+ )
+
Review comment:
Could you add more negative cases?
##########
File path: python/pyspark/pandas/categorical.py
##########
@@ -442,8 +442,108 @@ def remove_categories(
def remove_unused_categories(self) -> "ps.Series":
raise NotImplementedError()
- def rename_categories(self, new_categories: pd.Index, inplace: bool =
False) -> "ps.Series":
- raise NotImplementedError()
+ def rename_categories(
+ self, new_categories: Union[list, dict, Callable], inplace: bool =
False
+ ) -> Optional["ps.Series"]:
+ """
+ Rename categories.
+
+ Parameters
+ ----------
+ new_categories : list-like, dict-like or callable
+
+ New categories which will replace old categories.
+
+ * list-like: all items must be unique and the number of items in
+ the new categories must match the existing number of categories.
+
+ * dict-like: specifies a mapping from
+ old categories to new. Categories not contained in the mapping
+ are passed through and extra categories in the mapping are
+ ignored.
+
+ * callable : a callable that is called on all items in the old
+ categories and whose return values comprise the new categories.
+
+ inplace : bool, default False
+ Whether or not to rename the categories inplace or return a copy of
+ this categorical with renamed categories.
+
+ Returns
+ -------
+ cat : Categorical or None
Review comment:
`Series or None`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]