svetakvsundhar commented on a change in pull request #15809:
URL: https://github.com/apache/beam/pull/15809#discussion_r741296085
##########
File path: sdks/python/apache_beam/dataframe/frames.py
##########
@@ -1430,6 +1430,72 @@ def corr(self, other, method, min_periods):
[self._expr, other._expr],
requires_partition_by=partitionings.Singleton(reason=reason)))
+ @frame_base.with_docs_from(pd.Series)
+ @frame_base.args_to_kwargs(pd.Series)
+ @frame_base.populate_defaults(pd.Series)
+ def skew(self, axis, skipna, level, numeric_only, **kwargs):
+ if level is not None:
+ raise NotImplementedError("per-level aggregation")
+ if skipna is None or skipna:
+ self = self.dropna() # pylint: disable=self-cls-assignment
+ # See the online, numerically stable formulae at
+ #
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics
+ def compute_moments(x):
+ n = len(x)
+ if n == 0:
+ m, s, third_moment = 0, 0, 0
+ elif n < 3:
+ m = x.std(ddof=0)**2 * n
+ s = x.sum()
+ third_moment = (((x - x.mean())**3).sum())
+ else:
+ m = x.std(ddof=0)**2 * n
+ s = x.sum()
+ third_moment = (((x - x.mean())**3).sum())
+ return pd.DataFrame(
+ dict(m=[m], s=[s], n=[n], third_moment=[third_moment]))
+
+ def combine_moments(data):
+ m = s = n = third_moment = 0.0
+ for datum in data.itertuples():
+ if datum.n == 0:
+ continue
+ elif n == 0:
+ m, s, n, third_moment = datum.m, datum.s, datum.n, datum.third_moment
+ else:
+ mean_b = s / n
+ mean_a = datum.s / datum.n
+ delta = mean_b - mean_a
+ n_a = datum.n
+ n_b = n
Review comment:
rewritten, however, I think third_moment can stay as is.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]