zhengruifeng commented on code in PR #37756: URL: https://github.com/apache/spark/pull/37756#discussion_r961280658
########## python/pyspark/pandas/groupby.py: ########## @@ -827,6 +827,76 @@ def mad(self) -> FrameLike: return self._prepare_return(DataFrame(internal)) + def sem(self, ddof: int = 1) -> FrameLike: + """ + Compute standard error of the mean of groups, excluding missing values. + + .. versionadded:: 3.4.0 + + Parameters + ---------- + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + + Examples + -------- + >>> df = ps.DataFrame({"A": [1, 2, 1, 1], "B": [True, False, False, True], + ... "C": [3, None, 3, 4], "D": ["a", "b", "b", "a"]}) + + >>> df.groupby("A").sem() + B C + A + 1 0.333333 0.333333 + 2 NaN NaN + + >>> df.groupby("D").sem(ddof=1) + A B C + D + a 0.0 0.0 0.5 + b 0.5 0.0 NaN + + >>> df.B.groupby(df.A).sem() + A + 1 0.333333 + 2 NaN + Name: B, dtype: float64 + + See Also + -------- + pyspark.pandas.Series.sem + pyspark.pandas.DataFrame.sem + """ + if ddof not in [0, 1]: + raise TypeError("ddof must be 0 or 1") + + # Raise the TypeError when all aggregation columns are of unaccepted data types + all_unaccepted = True + for _agg_col in self._agg_columns: + if isinstance(_agg_col.spark.data_type, (NumericType, BooleanType)): + all_unaccepted = False + break + if all_unaccepted: Review Comment: nice, this is more concise. I just copied it from other places -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org