This is an automated email from the ASF dual-hosted git repository.
bhulette pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new d74a9de [BEAM-9547] Add get implementation (#15030)
d74a9de is described below
commit d74a9de2265243e7d704369c9cd61a0308c4f1e1
Author: Brian Hulette <[email protected]>
AuthorDate: Mon Jun 21 10:37:53 2021 -0700
[BEAM-9547] Add get implementation (#15030)
---
sdks/python/apache_beam/dataframe/frames.py | 16 ++++++++++++++++
sdks/python/apache_beam/dataframe/frames_test.py | 7 +++++++
2 files changed, 23 insertions(+)
diff --git a/sdks/python/apache_beam/dataframe/frames.py
b/sdks/python/apache_beam/dataframe/frames.py
index 57cb997..f798b52 100644
--- a/sdks/python/apache_beam/dataframe/frames.py
+++ b/sdks/python/apache_beam/dataframe/frames.py
@@ -1091,6 +1091,13 @@ class DeferredSeries(DeferredDataFrameOrSeries):
frame_base.wont_implement_method(
pd.Series, 'array', reason="non-deferred-result"))
+ # We can't reliably predict the output type, it depends on whether `key` is:
+ # - not in the index (default_value)
+ # - in the index once (constant)
+ # - in the index multiple times (Series)
+ get = frame_base.wont_implement_method(
+ pd.Series, 'get', reason="non-deferred-columns")
+
ravel = frame_base.wont_implement_method(
pd.Series, 'ravel', reason="non-deferred-result")
@@ -1993,6 +2000,15 @@ class DeferredDataFrame(DeferredDataFrameOrSeries):
)
)
+ # If column name exists this is a simple project, otherwise it is a constant
+ # (default_value)
+ @frame_base.with_docs_from(pd.DataFrame)
+ def get(self, key, default_value=None):
+ if key in self.columns:
+ return self[key]
+ else:
+ return default_value
+
@frame_base.with_docs_from(pd.DataFrame)
@frame_base.args_to_kwargs(pd.DataFrame)
@frame_base.populate_defaults(pd.DataFrame)
diff --git a/sdks/python/apache_beam/dataframe/frames_test.py
b/sdks/python/apache_beam/dataframe/frames_test.py
index a0b1f83..6f2bfc2 100644
--- a/sdks/python/apache_beam/dataframe/frames_test.py
+++ b/sdks/python/apache_beam/dataframe/frames_test.py
@@ -232,6 +232,8 @@ class DeferredFrameTest(_AbstractFrameTest):
})
self._run_test(lambda df: df['Animal'], df)
self._run_test(lambda df: df.Speed, df)
+ self._run_test(lambda df: df.get('Animal'), df)
+ self._run_test(lambda df: df.get('FOO', df.Animal), df)
def test_set_column(self):
def new_column(df):
@@ -1874,6 +1876,11 @@ class ConstructionTimeTest(unittest.TestCase):
self._run_test(
lambda df: df.groupby('int_col')[['flt_col', 'str_col']].ndim)
+ def test_get_column_default_None(self):
+ # .get just returns default_value=None at construction time if the column
+ # doesn't exist
+ self._run_test(lambda df: df.get('FOO'))
+
class DocstringTest(unittest.TestCase):
@parameterized.expand([