tvalentyn commented on code in PR #35656:
URL: https://github.com/apache/beam/pull/35656#discussion_r2258305563


##########
sdks/python/apache_beam/internal/code_object_pickler.py:
##########
@@ -15,7 +15,445 @@
 # limitations under the License.
 #
 
+"""Customizations to how Python code objects are pickled.
+
+This module provides functions for pickling code objects, especially lambdas,
+in a consistent way. It addresses issues with non-deterministic pickling by
+creating a unique identifier that is invariant to small changes in the source
+code.
+
+The code object identifiers consists of a sequence of the following parts
+separated by periods:
+- Module names - The name of the module the code object is in
+- Class names - The name of a class containing the code object. There can be
+  multiple of these in the same identifier in the case of nested
+  classes.
+- Function names - The name of the function containing the code object.
+  There can be multiple of these in the case of nested functions.
+- __code__ - Attribute indicating that we are entering the code object of a
+  function/method.
+- __co_consts__[<name>] - The name of the local variable containing the
+  code object. In the case of lambdas, the name is created by using the
+  signature of the lambda and hashing the bytecode, as shown below.
+
+Examples:
+- __main__.top_level_function.__code__
+- __main__.ClassWithNestedFunction.process.__code__.co_consts[nested_function]
+- __main__.ClassWithNestedLambda.process.__code__.co_consts[
+    get_lambda_from_dictionary].co_consts[<lambda>, ('x',)]
+- __main__.ClassWithNestedLambda.process.__code__.co_consts[
+    <lambda>, ('x',), 1234567890]
+"""
+
+import collections
+import hashlib
+import inspect
+import re
+import sys
+import types
+
 
 def get_normalized_path(path):
   """Returns a normalized path. This function is intended to be overridden."""
   return path
+
+
+def get_code_path(callable: types.FunctionType):
+  """Returns the stable reference to the code object.
+
+  Will be implemented using cloudpickle in a future version.
+
+  Args:
+    callable: The callable object to search for.
+
+  Returns:
+    The stable reference to the code object.
+      Examples:
+      - __main__.top_level_function.__code__
+      - __main__.ClassWithNestedFunction.process.__code__.co_consts[
+        nested_function]
+      - __main__.ClassWithNestedLambda.process.__code__.co_consts[
+        get_lambda_from_dictionary].co_consts[<lambda>, ('x',)]
+      - __main__.ClassWithNestedLambda.process.__code__.co_consts[
+        <lambda>, ('x',), 1234567890]
+  """
+  code_path = _extend_path(
+      callable.__module__,
+      _search(
+          callable,
+          sys.modules[callable.__module__],
+          callable.__qualname__.split('.'),
+      ),
+  )
+  return code_path
+
+
+def _extend_path(prefix: str, suffix: str):
+  """Extends the path to the code object.
+
+  Args:
+    prefix: The prefix of the path.
+    suffix: The rest of the path.
+
+  Returns:
+    The extended path.
+  """
+  if suffix is None:
+    return None
+  if not suffix:
+    return prefix
+  return prefix + '.' + suffix
+
+
+def _search(
+    callable: types.FunctionType, node: object, qual_name_parts: list[str]):

Review Comment:
   similarly `object` can be `Any` if, you mean that this can be literally any 
object. is that the case?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@beam.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to