tvalentyn commented on code in PR #35656:
URL: https://github.com/apache/beam/pull/35656#discussion_r2277164905
##########
sdks/python/apache_beam/internal/code_object_pickler.py:
##########
@@ -406,11 +411,11 @@ def _get_code_object_from_lambda_with_hash_pattern(
raise AttributeError(f'Could not find code object with path: {path}')
-def _get_code_from_stable_reference(path: str):
- """Returns the code object from a stable reference.
+def get_code_from_stable_reference(path: str):
Review Comment:
Could you please add a typehint for the returned result?
##########
sdks/python/apache_beam/internal/code_object_pickler.py:
##########
@@ -17,10 +17,14 @@
"""Customizations to how Python code objects are pickled.
-This module provides functions for pickling code objects, especially lambdas,
-in a consistent way. It addresses issues with non-deterministic pickling by
-creating a unique identifier that is invariant to small changes in the source
-code.
+This module provides helper functions to improve pickling code objects,
+especially lambdas, in a consistent way by using code object identifiers. These
+helper functions will be used to patch pickler implementations used by Beam
+(e.g. Cloudpickle).
+
+A code object identifier is a unique identifier for a code object that provides
+a unique reference to the code object in the context where the code is defined
+and is invariant to small changes in the currounding code.
Review Comment:
```suggestion
and is invariant to small changes in the surrounding code.
```
##########
sdks/python/apache_beam/internal/code_object_pickler_test.py:
##########
@@ -189,23 +207,404 @@ def get_lambda_from_dictionary():
]
-class CodeObjectPicklerTest(unittest.TestCase):
+class CodePathGenerationTest(unittest.TestCase):
Review Comment:
```suggestion
class CodeObjectIdentifierGenerationTest(unittest.TestCase):
```
##########
sdks/python/apache_beam/internal/code_object_pickler.py:
##########
@@ -406,11 +411,11 @@ def _get_code_object_from_lambda_with_hash_pattern(
raise AttributeError(f'Could not find code object with path: {path}')
-def _get_code_from_stable_reference(path: str):
- """Returns the code object from a stable reference.
+def get_code_from_stable_reference(path: str):
Review Comment:
WDYT about naming this method:
get_code(code_object_identifer: str):
##########
sdks/python/apache_beam/internal/code_object_pickler.py:
##########
@@ -406,11 +411,11 @@ def _get_code_object_from_lambda_with_hash_pattern(
raise AttributeError(f'Could not find code object with path: {path}')
-def _get_code_from_stable_reference(path: str):
- """Returns the code object from a stable reference.
+def get_code_from_stable_reference(path: str):
+ """Returns the code object from a stable reference code object identifier.
Review Comment:
```suggestion
"""Returns the code object corresponding to the code object identifier.
```
##########
sdks/python/apache_beam/internal/code_object_pickler_test.py:
##########
@@ -189,23 +207,404 @@ def get_lambda_from_dictionary():
]
-class CodeObjectPicklerTest(unittest.TestCase):
+class CodePathGenerationTest(unittest.TestCase):
@parameterized.expand(test_cases)
- def test_get_code_path(self, callable, expected):
+ def test_get_code_path(self, callable, expected_path):
actual = code_object_pickler.get_code_path(callable)
self.assertEqual(actual, expected)
@parameterized.expand(test_cases)
- def test_get_code_from_stable_reference(self, callable, path):
- actual = code_object_pickler._get_code_from_stable_reference(path)
+ def test_get_code_from_stable_reference(self, expected_callable, path):
+ actual = code_object_pickler.get_code_from_stable_reference(path)
self.assertEqual(actual, callable.__code__)
@parameterized.expand(test_cases)
- def test_roundtrip(self, callable, _):
+ def test_roundtrip(self, callable, unused_path):
path = code_object_pickler.get_code_path(callable)
- actual = code_object_pickler._get_code_from_stable_reference(path)
+ actual = code_object_pickler.get_code_from_stable_reference(path)
self.assertEqual(actual, callable.__code__)
+class GetCodeFromStableReferenceTest(unittest.TestCase):
+ def empty_path_raises_exception(self):
+ with self.assertRaisesRegex(ValueError, "Path must not be empty"):
+ code_object_pickler.get_code_from_stable_reference("")
+
+ def invalid_default_index_raises_exception(self):
+ with self.assertRaisesRegex(ValueError, "out of bounds"):
+ code_object_pickler.get_code_from_stable_reference(
+ "apache_beam.internal.test_cases.module_with_default_argument."
+ "function_with_lambda_default_argument.__defaults__[1]")
+
+ def invalid_single_name_path_raises_exception(self):
+ with self.assertRaisesRegex(AttributeError,
+ "Could not find code object with path"):
+ code_object_pickler.get_code_from_stable_reference(
+ "apache_beam.internal.test_cases.module_3."
+ "my_function.__code__.co_consts[something]")
+
+ def invalid_lambda_with_args_path_raises_exception(self):
+ with self.assertRaisesRegex(AttributeError,
+ "Could not find code object with path"):
+ code_object_pickler.get_code_from_stable_reference(
+ "apache_beam.internal.test_cases.module_3."
+ "my_function.__code__.co_consts[<lambda>, ('x',)]")
+
+ def invalid_lambda_with_hash_path_raises_exception(self):
+ with self.assertRaisesRegex(AttributeError,
+ "Could not find code object with path"):
+ code_object_pickler.get_code_from_stable_reference(
+ "apache_beam.internal.test_cases.module_3."
+ "my_function.__code__.co_consts[<lambda>, ('',), 1234567890]")
+
+ def test_adding_local_variable_in_class_preserves_object(self):
Review Comment:
Should this and below tests be part of class CodePathStabilityTest instead?
##########
sdks/python/apache_beam/internal/code_object_pickler.py:
##########
@@ -60,15 +65,13 @@ def get_normalized_path(path):
def get_code_path(callable: types.FunctionType):
Review Comment:
(totally optional) wdyt about naming this method:
get_code_object_identifier
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]