robertwb commented on a change in pull request #13139:
URL: https://github.com/apache/beam/pull/13139#discussion_r513682982



##########
File path: sdks/python/apache_beam/dataframe/frames.py
##########
@@ -619,21 +619,75 @@ def assign(self, **kwargs):
                                             "instances are supported.")
     return frame_base._elementwise_method('assign')(self, **kwargs)
 
-
   apply = frame_base.not_implemented_method('apply')
-  explode = frame_base.not_implemented_method('explode')
   isin = frame_base.not_implemented_method('isin')
   append = frame_base.not_implemented_method('append')
   combine = frame_base.not_implemented_method('combine')
   combine_first = frame_base.not_implemented_method('combine_first')
   count = frame_base.not_implemented_method('count')
-  drop = frame_base.not_implemented_method('drop')
   eval = frame_base.not_implemented_method('eval')
   reindex = frame_base.not_implemented_method('reindex')
   melt = frame_base.not_implemented_method('melt')
   pivot = frame_base.not_implemented_method('pivot')
   pivot_table = frame_base.not_implemented_method('pivot_table')
 
+  @frame_base.args_to_kwargs(pd.DataFrame)
+  @frame_base.populate_defaults(pd.DataFrame)
+  def explode(self, column, ignore_index):
+    # ignoring the index will not preserve it
+    preserves = (partitionings.Nothing() if ignore_index
+                 else partitionings.Singleton())
+    return frame_base.DeferredFrame.wrap(
+        expressions.ComputedExpression(
+            'explode',
+            lambda df: df.explode(column, ignore_index),
+            [self._expr],
+            preserves_partition_by=preserves,
+            requires_partition_by=partitionings.Nothing()))
+
+
+  @frame_base.args_to_kwargs(pd.DataFrame)
+  @frame_base.populate_defaults(pd.DataFrame)
+  @frame_base.maybe_inplace
+  def drop(self, **kwargs):
+    labels = kwargs.get('labels', None)

Review comment:
       One danger here of using kwargs.get rather than letting it be a 
parameter is that you're hard-coding what all the defaults are (rather than 
using populate_defaults). 

##########
File path: sdks/python/apache_beam/dataframe/frames.py
##########
@@ -1513,6 +1584,8 @@ def repeat(self, repeats):
               'repeat',
               lambda series: series.str.repeat(repeats),
               [self._expr],
+              # Output will also be a str Series

Review comment:
       Is there a drawback to this being automatically inferred? (Or was it 
not?)
   
   Same below.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to