itholic commented on a change in pull request #35747:
URL: https://github.com/apache/spark/pull/35747#discussion_r826429339



##########
File path: python/pyspark/pandas/series.py
##########
@@ -4307,17 +4307,20 @@ def keys(self) -> "ps.Index":
         """
         return self.index
 
-    # TODO: 'regex', 'method' parameter
+    # TODO: introduce 'method', 'limit', 'in_place'; fully support 'regex'
     def replace(
         self,
         to_replace: Optional[Union[Any, List, Tuple, Dict]] = None,
         value: Optional[Union[List, Tuple]] = None,
-        regex: bool = False,
+        regex: Union[str, bool] = False,
     ) -> "Series":
         """
         Replace values given in to_replace with value.
         Values of the Series are replaced with other values dynamically.
 
+        .. note:: For partial pattern matching, the replacement is against the 
whole string,
+            which is different from pandas'.

Review comment:
       Can we a bit more elaborate why our behavior is different from pandas'?

##########
File path: python/pyspark/pandas/tests/test_series.py
##########
@@ -1735,15 +1735,32 @@ def test_replace(self):
         self.assert_eq(psser.replace([10, 15], [45, 50]), pser.replace([10, 
15], [45, 50]))
         self.assert_eq(psser.replace((10, 15), (45, 50)), pser.replace((10, 
15), (45, 50)))
 
+        pser = pd.Series(["bat", "foo", "bait", "abc", "bar", "zoo"])
+        psser = ps.from_pandas(pser)
+        self.assert_eq(
+            psser.replace(to_replace=r"^ba.$", value="new", regex=True),
+            pser.replace(to_replace=r"^ba.$", value="new", regex=True),
+        )
+        self.assert_eq(
+            psser.replace(regex=r"^.oo$", value="new"), 
pser.replace(regex=r"^.oo$", value="new")
+        )
+
         msg = "'to_replace' should be one of str, list, tuple, dict, int, 
float"
         with self.assertRaisesRegex(TypeError, msg):
             psser.replace(ps.range(5))
         msg = "Replacement lists must match in length. Expecting 3 got 2"
         with self.assertRaisesRegex(ValueError, msg):
-            psser.replace([10, 20, 30], [1, 2])
-        msg = "replace currently not support for regex"
+            psser.replace(["bat", "foo", "bait"], ["a", "b"])
+        msg = "'to_replace' must be 'None' if 'regex' is not a bool"
+        with self.assertRaisesRegex(ValueError, msg):
+            psser.replace(to_replace="foo", regex=r"^.oo$")
+        msg = "If 'regex' is True then 'to_replace' must be a string"
+        with self.assertRaisesRegex(AssertionError, msg):
+            psser.replace(["bat", "foo", "bait"], regex=True)
+        unsupported_regex = [r"^.oo$", r"^ba.$"]
+        msg = "'regex' of %s type is not supported" % 
type(unsupported_regex).__name__
         with self.assertRaisesRegex(NotImplementedError, msg):
-            psser.replace(r"^1.$", regex=True)
+            psser.replace(regex=unsupported_regex, value="new")

Review comment:
       Can we add test case for chained operation?
   
   e.g.
   `(pser + "A").replace(to_replace=r"^ba.$", value="new", regex=True)`
   or
   `pser.replace(to_replace=r"^ba.$", value="new", 
regex=True).replace(to_replace="new", value="New", regex=True)`




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to