This is an automated email from the ASF dual-hosted git repository.
tvalentyn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 0b131c9ae7c Change handling of copy=None defaults for Pandas 2 (#28523)
0b131c9ae7c is described below
commit 0b131c9ae7cafd7a43f875b0df1fb714683bdcda
Author: caneff <[email protected]>
AuthorDate: Wed Sep 20 15:16:51 2023 -0400
Change handling of copy=None defaults for Pandas 2 (#28523)
---
sdks/python/apache_beam/dataframe/frame_base.py | 8 ++++++++
sdks/python/apache_beam/dataframe/frame_base_test.py | 15 +++++++++++++++
2 files changed, 23 insertions(+)
diff --git a/sdks/python/apache_beam/dataframe/frame_base.py
b/sdks/python/apache_beam/dataframe/frame_base.py
index 48a4c29d058..4e89e473b73 100644
--- a/sdks/python/apache_beam/dataframe/frame_base.py
+++ b/sdks/python/apache_beam/dataframe/frame_base.py
@@ -674,11 +674,19 @@ def populate_defaults(base_type, removed_method=False,
removed_args=None):
if removed_args:
defaults_to_populate -= set(removed_args)
+ # In pandas 2, many methods rely on the default copy=None
+ # to mean that copy is the value of copy_on_write. Since
+ # copy_on_write will always be true for Beam, just fill it
+ # in here. In pandas 1, the default was True anyway.
+ if 'copy' in arg_to_default and arg_to_default['copy'] is None:
+ arg_to_default['copy'] = True
+
@functools.wraps(func)
def wrapper(**kwargs):
for name in defaults_to_populate:
if name not in kwargs:
kwargs[name] = arg_to_default[name]
+
return func(**kwargs)
return wrapper
diff --git a/sdks/python/apache_beam/dataframe/frame_base_test.py
b/sdks/python/apache_beam/dataframe/frame_base_test.py
index b3077320720..0a73905339f 100644
--- a/sdks/python/apache_beam/dataframe/frame_base_test.py
+++ b/sdks/python/apache_beam/dataframe/frame_base_test.py
@@ -174,6 +174,21 @@ class FrameBaseTest(unittest.TestCase):
'a': 2, 'b': 4, 'c': 6, 'kw_only': 8
})
+ def test_populate_defaults_overwrites_copy(self):
+ class Base(object):
+ def func(self, a=1, b=2, c=3, *, copy=None):
+ pass
+
+ class Proxy(object):
+ @frame_base.args_to_kwargs(Base)
+ @frame_base.populate_defaults(Base)
+ def func(self, a, copy, **kwargs):
+ return dict(kwargs, a=a, copy=copy)
+
+ proxy = Proxy()
+ self.assertEqual(proxy.func(), {'a': 1, 'copy': True})
+ self.assertEqual(proxy.func(copy=False), {'a': 1, 'copy': False})
+
if __name__ == '__main__':
unittest.main()