This is an automated email from the ASF dual-hosted git repository.
jrmccluskey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new fad7f96dc85 Add support for CPython 3.11 opcodes (#25795)
fad7f96dc85 is described below
commit fad7f96dc85047eb1a01536f817565d6f959ee35
Author: Jack McCluskey <[email protected]>
AuthorDate: Fri Mar 17 16:14:44 2023 -0400
Add support for CPython 3.11 opcodes (#25795)
* Add support for CPython 3.11 opcodes
* Remove unused utility function
* Uncomment test
* Drop camel case form
* Add addition documentation comments
---
sdks/python/apache_beam/typehints/opcodes.py | 36 +++++-
.../apache_beam/typehints/trivial_inference.py | 129 +++++++++++++++++++--
2 files changed, 151 insertions(+), 14 deletions(-)
diff --git a/sdks/python/apache_beam/typehints/opcodes.py
b/sdks/python/apache_beam/typehints/opcodes.py
index 1ac3538c691..5a35b56b932 100644
--- a/sdks/python/apache_beam/typehints/opcodes.py
+++ b/sdks/python/apache_beam/typehints/opcodes.py
@@ -178,7 +178,9 @@ binary_lshift = inplace_lshift = binary_rshift =
inplace_rshift = pop_top
binary_and = inplace_and = symmetric_binary_op
binary_xor = inplace_xor = symmetric_binary_op
-binary_or = inpalce_or = symmetric_binary_op
+binary_or = inplace_or = symmetric_binary_op
+
+binary_op = symmetric_binary_op
def store_subscr(unused_state, unused_args):
@@ -403,6 +405,8 @@ import_from = push_value(Any)
def load_global(state, arg):
+ if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+ arg = arg >> 1
state.stack.append(state.get_global(arg))
@@ -428,10 +432,18 @@ def gen_start(state, arg):
def load_closure(state, arg):
+ # The arg is no longer offset by len(covar_names) as of 3.11
+ # See https://docs.python.org/3/library/dis.html#opcode-LOAD_CLOSURE
+ if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+ arg -= len(state.co.co_varnames)
state.stack.append(state.get_closure(arg))
def load_deref(state, arg):
+ # The arg is no longer offset by len(covar_names) as of 3.11
+ # See https://docs.python.org/3/library/dis.html#opcode-LOAD_DEREF
+ if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+ arg -= len(state.co.co_varnames)
state.stack.append(state.closure_type(arg))
@@ -440,9 +452,19 @@ def make_function(state, arg):
"""
# TODO(luke-zhu): Handle default argument types
globals = state.f.__globals__ # Inherits globals from the current frame
- func_name = state.stack[-1].value
- func_code = state.stack[-2].value
- pop_count = 2
+ tos = state.stack[-1].value
+ # In Python 3.11 lambdas no longer have fully qualified names on the stack,
+ # so we check for this case (AKA the code is top of stack.)
+ if isinstance(tos, types.CodeType):
+ func_name = None
+ func_code = tos
+ pop_count = 1
+ is_lambda = True
+ else:
+ func_name = tos
+ func_code = state.stack[-2].value
+ pop_count = 2
+ is_lambda = False
closure = None
# arg contains flags, with corresponding stack values if positive.
# https://docs.python.org/3.6/library/dis.html#opcode-MAKE_FUNCTION
@@ -450,8 +472,12 @@ def make_function(state, arg):
if arg & 0x08:
# Convert types in Tuple constraint to a tuple of CPython cells.
# https://stackoverflow.com/a/44670295
+ if is_lambda:
+ closureTuplePos = -2
+ else:
+ closureTuplePos = -3
closure = tuple((lambda _: lambda: _)(t).__closure__[0]
- for t in state.stack[-3].tuple_types)
+ for t in state.stack[closureTuplePos].tuple_types)
func = types.FunctionType(func_code, globals, name=func_name,
closure=closure)
diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py
b/sdks/python/apache_beam/typehints/trivial_inference.py
index f69a87192e3..f4b350e8f05 100644
--- a/sdks/python/apache_beam/typehints/trivial_inference.py
+++ b/sdks/python/apache_beam/typehints/trivial_inference.py
@@ -126,11 +126,12 @@ class Const(object):
class FrameState(object):
"""Stores the state of the frame at a particular point of execution.
"""
- def __init__(self, f, local_vars=None, stack=()):
+ def __init__(self, f, local_vars=None, stack=(), kw_names=None):
self.f = f
self.co = f.__code__
self.vars = list(local_vars)
self.stack = list(stack)
+ self.kw_names = kw_names
def __eq__(self, other):
return isinstance(other, FrameState) and self.__dict__ == other.__dict__
@@ -139,7 +140,7 @@ class FrameState(object):
return hash(tuple(sorted(self.__dict__.items())))
def copy(self):
- return FrameState(self.f, self.vars, self.stack)
+ return FrameState(self.f, self.vars, self.stack, self.kw_names)
def const_type(self, i):
return Const(self.co.co_consts[i])
@@ -352,7 +353,10 @@ def infer_return_type_func(f, input_types, debug=False,
depth=0):
if debug:
print()
print(f, id(f), input_types)
- dis.dis(f)
+ if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+ dis.dis(f, show_caches=True)
+ else:
+ dis.dis(f)
from . import opcodes
simple_ops = dict((k.upper(), v) for k, v in opcodes.__dict__.items())
@@ -374,7 +378,12 @@ def infer_return_type_func(f, input_types, debug=False,
depth=0):
# In Python 3, use dis library functions to disassemble bytecode and handle
# EXTENDED_ARGs.
ofs_table = {} # offset -> instruction
- for instruction in dis.get_instructions(f):
+ if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+ dis_ints = dis.get_instructions(f, show_caches=True)
+ else:
+ dis_ints = dis.get_instructions(f)
+
+ for instruction in dis_ints:
ofs_table[instruction.offset] = instruction
# Python 3.6+: 1 byte opcode + 1 byte arg (2 bytes, arg may be ignored).
@@ -384,7 +393,7 @@ def infer_return_type_func(f, input_types, debug=False,
depth=0):
# Python 3.10: bpo-27129 changes jump offsets to use instruction offsets,
# not byte offsets. The offsets were halved (16 bits fro instructions vs 8
# bits for bytes), so we have to double the value of arg.
- if (sys.version_info.major, sys.version_info.minor) == (3, 10):
+ if (sys.version_info.major, sys.version_info.minor) >= (3, 10):
jump_multiplier = 2
else:
jump_multiplier = 1
@@ -400,6 +409,7 @@ def infer_return_type_func(f, input_types, debug=False,
depth=0):
print(dis.opname[op].ljust(20), end=' ')
pc += inst_size
+ arg = None
if op >= dis.HAVE_ARGUMENT:
arg = instruction.arg
pc += opt_arg_size
@@ -408,9 +418,14 @@ def infer_return_type_func(f, input_types, debug=False,
depth=0):
if op in dis.hasconst:
print('(' + repr(co.co_consts[arg]) + ')', end=' ')
elif op in dis.hasname:
- print('(' + co.co_names[arg] + ')', end=' ')
+ if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+ # Pre-emptively bit-shift so the print doesn't go out of index
+ print_arg = arg >> 1
+ else:
+ print_arg = arg
+ print('(' + co.co_names[print_arg] + ')', end=' ')
elif op in dis.hasjrel:
- print('(to ' + repr(pc + arg) + ')', end=' ')
+ print('(to ' + repr(pc + (arg * jump_multiplier)) + ')', end=' ')
elif op in dis.haslocal:
print('(' + co.co_varnames[arg] + ')', end=' ')
elif op in dis.hascompare:
@@ -418,7 +433,12 @@ def infer_return_type_func(f, input_types, debug=False,
depth=0):
elif op in dis.hasfree:
if free is None:
free = co.co_cellvars + co.co_freevars
- print('(' + free[arg] + ')', end=' ')
+ # From 3.11 on the arg is no longer offset by len(co_varnames)
+ # so we adjust it back
+ print_arg = arg
+ if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+ print_arg = arg - len(co.co_varnames)
+ print('(' + free[print_arg] + ')', end=' ')
# Actually emulate the op.
if state is None and states[start] is None:
@@ -498,6 +518,40 @@ def infer_return_type_func(f, input_types, debug=False,
depth=0):
else:
return_type = typehints.Any
state.stack[-pop_count:] = [return_type]
+ elif opname == 'CALL':
+ pop_count = 1 + arg
+ # Keyword Args case
+ if state.kw_names is not None:
+ if isinstance(state.stack[-pop_count], Const):
+ from apache_beam.pvalue import Row
+ if state.stack[-pop_count].value == Row:
+ fields = state.kw_names
+ return_type = row_type.RowTypeConstraint.from_fields(
+ list(
+ zip(fields,
+ Const.unwrap_all(state.stack[-pop_count + 1:]))))
+ else:
+ return_type = Any
+ state.kw_names = None
+ else:
+ # Handle lambdas always having an arg of 0 for CALL
+ # See https://github.com/python/cpython/issues/102403 for context.
+ if pop_count == 1:
+ while pop_count <= len(state.stack):
+ if isinstance(state.stack[-pop_count], Const):
+ break
+ pop_count += 1
+ if depth <= 0 or pop_count > len(state.stack):
+ return_type = Any
+ elif isinstance(state.stack[-pop_count], Const):
+ return_type = infer_return_type(
+ state.stack[-pop_count].value,
+ state.stack[1 - pop_count:],
+ debug=debug,
+ depth=depth - 1)
+ else:
+ return_type = Any
+ state.stack[-pop_count:] = [return_type]
elif opname in simple_ops:
if debug:
print("Executing simple op " + opname)
@@ -511,6 +565,10 @@ def infer_return_type_func(f, input_types, debug=False,
depth=0):
jmp = pc + arg * jump_multiplier
jmp_state = state
state = None
+ elif opname in ('JUMP_BACKWARD', 'JUMP_BACKWARD_NO_INTERRUPT'):
+ jmp = pc - (arg * jump_multiplier)
+ jmp_state = state
+ state = None
elif opname == 'JUMP_ABSOLUTE':
jmp = arg * jump_multiplier
jmp_state = state
@@ -519,8 +577,30 @@ def infer_return_type_func(f, input_types, debug=False,
depth=0):
state.stack.pop()
jmp = arg * jump_multiplier
jmp_state = state.copy()
+ elif opname in ('POP_JUMP_FORWARD_IF_TRUE', 'POP_JUMP_FORWARD_IF_FALSE'):
+ state.stack.pop()
+ jmp = pc + arg * jump_multiplier
+ jmp_state = state.copy()
+ elif opname in ('POP_JUMP_BACKWARD_IF_TRUE', 'POP_JUMP_BACKWARD_IF_FALSE'):
+ state.stack.pop()
+ jmp = pc - (arg * jump_multiplier)
+ jmp_state = state.copy()
+ elif opname in ('POP_JUMP_FORWARD_IF_NONE',
'POP_JUMP_FORWARD_IF_NOT_NONE'):
+ state.stack.pop()
+ jmp = pc + arg * jump_multiplier
+ jmp_state = state.copy()
+ elif opname in ('POP_JUMP_BACKWARD_IF_NONE',
+ 'POP_JUMP_BACKWARD_IF_NOT_NONE'):
+ state.stack.pop()
+ jmp = pc - (arg * jump_multiplier)
+ jmp_state = state.copy()
elif opname in ('JUMP_IF_TRUE_OR_POP', 'JUMP_IF_FALSE_OR_POP'):
- jmp = arg * jump_multiplier
+ # The arg was changed to be a relative delta instead of an absolute
+ # in 3.11
+ if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+ jmp = pc + arg * jump_multiplier
+ else:
+ jmp = arg * jump_multiplier
jmp_state = state.copy()
state.stack.pop()
elif opname == 'FOR_ITER':
@@ -528,6 +608,37 @@ def infer_return_type_func(f, input_types, debug=False,
depth=0):
jmp_state = state.copy()
jmp_state.stack.pop()
state.stack.append(element_type(state.stack[-1]))
+ elif opname == 'COPY_FREE_VARS':
+ # Helps with calling closures, but since we aren't executing
+ # them we can treat this as a no-op
+ pass
+ elif opname == 'KW_NAMES':
+ tup = co.co_consts[arg]
+ state.kw_names = tup
+ elif opname == 'RESUME':
+ # RESUME is a no-op
+ pass
+ elif opname == 'PUSH_NULL':
+ # We're treating this as a no-op to avoid having to check
+ # for extra None values on the stack when we extract return
+ # values
+ pass
+ elif opname == 'PRECALL':
+ # PRECALL is a no-op.
+ pass
+ elif opname == 'MAKE_CELL':
+ # TODO: see if we need to implement cells like this
+ pass
+ elif opname == 'RETURN_GENERATOR':
+ # TODO: see what this behavior is supposed to be beyond
+ # putting something on the stack to be popped off
+ state.stack.append(None)
+ pass
+ elif opname == 'CACHE':
+ # No-op introduced in 3.11. Without handling this some
+ # instructions have functionally > 2 byte size.
+ pass
+
else:
raise TypeInferenceError('unable to handle %s' % opname)