This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new acb50d9 [SPARK-38612][PYTHON] Fix Inline type hint for duplicated.keep acb50d9 is described below commit acb50d95a4952dea1cbbc27d4ddcc0b3432a13cf Author: Yikun Jiang <yikunk...@gmail.com> AuthorDate: Mon Mar 21 21:02:39 2022 +0900 [SPARK-38612][PYTHON] Fix Inline type hint for duplicated.keep ### What changes were proposed in this pull request? Fix Inline type hint for `duplicated.keep` ### Why are the changes needed? `keep` can be "first", "last" and False in pandas ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed Closes #35920 from Yikun/SPARK-38612. Authored-by: Yikun Jiang <yikunk...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/pandas/frame.py | 6 +++--- python/pyspark/pandas/series.py | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py index 41a0dde..b355708 100644 --- a/python/pyspark/pandas/frame.py +++ b/python/pyspark/pandas/frame.py @@ -4307,7 +4307,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})] def _mark_duplicates( self, subset: Optional[Union[Name, List[Name]]] = None, - keep: str = "first", + keep: Union[bool, str] = "first", ) -> Tuple[SparkDataFrame, str]: if subset is None: subset_list = self._internal.column_labels @@ -4350,7 +4350,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})] def duplicated( self, subset: Optional[Union[Name, List[Name]]] = None, - keep: str = "first", + keep: Union[bool, str] = "first", ) -> "Series": """ Return boolean Series denoting duplicate rows, optionally only considering certain columns. @@ -9037,7 +9037,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})] def drop_duplicates( self, subset: Optional[Union[Name, List[Name]]] = None, - keep: str = "first", + keep: Union[bool, str] = "first", inplace: bool = False, ) -> Optional["DataFrame"]: """ diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py index 038f78f..cae0838 100644 --- a/python/pyspark/pandas/series.py +++ b/python/pyspark/pandas/series.py @@ -1647,7 +1647,9 @@ class Series(Frame, IndexOpsMixin, Generic[T]): tolist = to_list - def drop_duplicates(self, keep: str = "first", inplace: bool = False) -> Optional["Series"]: + def drop_duplicates( + self, keep: Union[bool, str] = "first", inplace: bool = False + ) -> Optional["Series"]: """ Return Series with duplicate values removed. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org