robertwb commented on a change in pull request #13139:
URL: https://github.com/apache/beam/pull/13139#discussion_r513682982
##########
File path: sdks/python/apache_beam/dataframe/frames.py
##########
@@ -619,21 +619,75 @@ def assign(self, **kwargs):
"instances are supported.")
return frame_base._elementwise_method('assign')(self, **kwargs)
-
apply = frame_base.not_implemented_method('apply')
- explode = frame_base.not_implemented_method('explode')
isin = frame_base.not_implemented_method('isin')
append = frame_base.not_implemented_method('append')
combine = frame_base.not_implemented_method('combine')
combine_first = frame_base.not_implemented_method('combine_first')
count = frame_base.not_implemented_method('count')
- drop = frame_base.not_implemented_method('drop')
eval = frame_base.not_implemented_method('eval')
reindex = frame_base.not_implemented_method('reindex')
melt = frame_base.not_implemented_method('melt')
pivot = frame_base.not_implemented_method('pivot')
pivot_table = frame_base.not_implemented_method('pivot_table')
+ @frame_base.args_to_kwargs(pd.DataFrame)
+ @frame_base.populate_defaults(pd.DataFrame)
+ def explode(self, column, ignore_index):
+ # ignoring the index will not preserve it
+ preserves = (partitionings.Nothing() if ignore_index
+ else partitionings.Singleton())
+ return frame_base.DeferredFrame.wrap(
+ expressions.ComputedExpression(
+ 'explode',
+ lambda df: df.explode(column, ignore_index),
+ [self._expr],
+ preserves_partition_by=preserves,
+ requires_partition_by=partitionings.Nothing()))
+
+
+ @frame_base.args_to_kwargs(pd.DataFrame)
+ @frame_base.populate_defaults(pd.DataFrame)
+ @frame_base.maybe_inplace
+ def drop(self, **kwargs):
+ labels = kwargs.get('labels', None)
Review comment:
One danger here of using kwargs.get rather than letting it be a
parameter is that you're hard-coding what all the defaults are (rather than
using populate_defaults).
##########
File path: sdks/python/apache_beam/dataframe/frames.py
##########
@@ -1513,6 +1584,8 @@ def repeat(self, repeats):
'repeat',
lambda series: series.str.repeat(repeats),
[self._expr],
+ # Output will also be a str Series
Review comment:
Is there a drawback to this being automatically inferred? (Or was it
not?)
Same below.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]