This is an automated email from the ASF dual-hosted git repository.

bhulette pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new d74a9de  [BEAM-9547] Add get implementation (#15030)
d74a9de is described below

commit d74a9de2265243e7d704369c9cd61a0308c4f1e1
Author: Brian Hulette <[email protected]>
AuthorDate: Mon Jun 21 10:37:53 2021 -0700

    [BEAM-9547] Add get implementation (#15030)
---
 sdks/python/apache_beam/dataframe/frames.py      | 16 ++++++++++++++++
 sdks/python/apache_beam/dataframe/frames_test.py |  7 +++++++
 2 files changed, 23 insertions(+)

diff --git a/sdks/python/apache_beam/dataframe/frames.py 
b/sdks/python/apache_beam/dataframe/frames.py
index 57cb997..f798b52 100644
--- a/sdks/python/apache_beam/dataframe/frames.py
+++ b/sdks/python/apache_beam/dataframe/frames.py
@@ -1091,6 +1091,13 @@ class DeferredSeries(DeferredDataFrameOrSeries):
       frame_base.wont_implement_method(
           pd.Series, 'array', reason="non-deferred-result"))
 
+  # We can't reliably predict the output type, it depends on whether `key` is:
+  # - not in the index (default_value)
+  # - in the index once (constant)
+  # - in the index multiple times (Series)
+  get = frame_base.wont_implement_method(
+      pd.Series, 'get', reason="non-deferred-columns")
+
   ravel = frame_base.wont_implement_method(
       pd.Series, 'ravel', reason="non-deferred-result")
 
@@ -1993,6 +2000,15 @@ class DeferredDataFrame(DeferredDataFrameOrSeries):
         )
     )
 
+  # If column name exists this is a simple project, otherwise it is a constant
+  # (default_value)
+  @frame_base.with_docs_from(pd.DataFrame)
+  def get(self, key, default_value=None):
+    if key in self.columns:
+      return self[key]
+    else:
+      return default_value
+
   @frame_base.with_docs_from(pd.DataFrame)
   @frame_base.args_to_kwargs(pd.DataFrame)
   @frame_base.populate_defaults(pd.DataFrame)
diff --git a/sdks/python/apache_beam/dataframe/frames_test.py 
b/sdks/python/apache_beam/dataframe/frames_test.py
index a0b1f83..6f2bfc2 100644
--- a/sdks/python/apache_beam/dataframe/frames_test.py
+++ b/sdks/python/apache_beam/dataframe/frames_test.py
@@ -232,6 +232,8 @@ class DeferredFrameTest(_AbstractFrameTest):
     })
     self._run_test(lambda df: df['Animal'], df)
     self._run_test(lambda df: df.Speed, df)
+    self._run_test(lambda df: df.get('Animal'), df)
+    self._run_test(lambda df: df.get('FOO', df.Animal), df)
 
   def test_set_column(self):
     def new_column(df):
@@ -1874,6 +1876,11 @@ class ConstructionTimeTest(unittest.TestCase):
     self._run_test(
         lambda df: df.groupby('int_col')[['flt_col', 'str_col']].ndim)
 
+  def test_get_column_default_None(self):
+    # .get just returns default_value=None at construction time if the column
+    # doesn't exist
+    self._run_test(lambda df: df.get('FOO'))
+
 
 class DocstringTest(unittest.TestCase):
   @parameterized.expand([

Reply via email to