https://github.com/python/cpython/commit/74bb3ca1f8ff7b667a003c9e795458c1a9e14780
commit: 74bb3ca1f8ff7b667a003c9e795458c1a9e14780
branch: main
author: Kumar Aditya <[email protected]>
committer: kumaraditya303 <[email protected]>
date: 2026-01-06T23:23:28+05:30
summary:
gh-142863: optimize `list` and `set` calls with generator expressions (#142864)
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2025-12-17-10-12-09.gh-issue-142863.ZW2ZF8.rst
M Include/internal/pycore_magic_number.h
M Include/internal/pycore_opcode_utils.h
M Lib/opcode.py
M Lib/test/test_builtin.py
M Lib/test/test_dis.py
M Python/codegen.c
M Python/pylifecycle.c
diff --git a/Include/internal/pycore_magic_number.h
b/Include/internal/pycore_magic_number.h
index 09954856204a10..0724da99b45ba6 100644
--- a/Include/internal/pycore_magic_number.h
+++ b/Include/internal/pycore_magic_number.h
@@ -288,6 +288,7 @@ Known values:
Python 3.15a1 3655 (Fix miscompilation of some module-level annotations)
Python 3.15a2 3656 (Add TRACE_RECORD instruction, for platforms with
switch based interpreter)
Python 3.15a4 3657 (Add BINARY_OP_SUBSCR_USTR_INT)
+ Python 3.15a4 3658 (Optimize bytecode for list/set called on genexp)
Python 3.16 will start with 3700
@@ -301,7 +302,7 @@ PC/launcher.c must also be updated.
*/
-#define PYC_MAGIC_NUMBER 3657
+#define PYC_MAGIC_NUMBER 3658
/* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes
(little-endian) and then appending b'\r\n'. */
#define PYC_MAGIC_NUMBER_TOKEN \
diff --git a/Include/internal/pycore_opcode_utils.h
b/Include/internal/pycore_opcode_utils.h
index 79a1a242556a52..e4d859fcc47d02 100644
--- a/Include/internal/pycore_opcode_utils.h
+++ b/Include/internal/pycore_opcode_utils.h
@@ -73,7 +73,9 @@ extern "C" {
#define CONSTANT_BUILTIN_TUPLE 2
#define CONSTANT_BUILTIN_ALL 3
#define CONSTANT_BUILTIN_ANY 4
-#define NUM_COMMON_CONSTANTS 5
+#define CONSTANT_BUILTIN_LIST 5
+#define CONSTANT_BUILTIN_SET 6
+#define NUM_COMMON_CONSTANTS 7
/* Values used in the oparg for RESUME */
#define RESUME_AT_FUNC_START 0
diff --git a/Lib/opcode.py b/Lib/opcode.py
index 0e9520b6832499..d8374c45481a94 100644
--- a/Lib/opcode.py
+++ b/Lib/opcode.py
@@ -40,7 +40,8 @@
_intrinsic_2_descs = _opcode.get_intrinsic2_descs()
_special_method_names = _opcode.get_special_method_names()
_common_constants = [builtins.AssertionError, builtins.NotImplementedError,
- builtins.tuple, builtins.all, builtins.any]
+ builtins.tuple, builtins.all, builtins.any, builtins.list,
+ builtins.set]
_nb_ops = _opcode.get_nb_ops()
hascompare = [opmap["COMPARE_OP"]]
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index ce60a5d095dd52..7b69374b1868d1 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -246,7 +246,7 @@ def test_any(self):
S = [10, 20, 30]
self.assertEqual(any(x > 42 for x in S), False)
- def test_all_any_tuple_optimization(self):
+ def test_all_any_tuple_list_set_optimization(self):
def f_all():
return all(x-2 for x in [1,2,3])
@@ -256,7 +256,13 @@ def f_any():
def f_tuple():
return tuple(2*x for x in [1,2,3])
- funcs = [f_all, f_any, f_tuple]
+ def f_list():
+ return list(2*x for x in [1,2,3])
+
+ def f_set():
+ return set(2*x for x in [1,2,3])
+
+ funcs = [f_all, f_any, f_tuple, f_list, f_set]
for f in funcs:
# check that generator code object is not duplicated
@@ -266,33 +272,35 @@ def f_tuple():
# check the overriding the builtins works
- global all, any, tuple
- saved = all, any, tuple
+ global all, any, tuple, list, set
+ saved = all, any, tuple, list, set
try:
all = lambda x : "all"
any = lambda x : "any"
tuple = lambda x : "tuple"
+ list = lambda x : "list"
+ set = lambda x : "set"
overridden_outputs = [f() for f in funcs]
finally:
- all, any, tuple = saved
-
- self.assertEqual(overridden_outputs, ['all', 'any', 'tuple'])
+ all, any, tuple, list, set = saved
+ self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list',
'set'])
# Now repeat, overriding the builtins module as well
- saved = all, any, tuple
+ saved = all, any, tuple, list, set
try:
builtins.all = all = lambda x : "all"
builtins.any = any = lambda x : "any"
builtins.tuple = tuple = lambda x : "tuple"
+ builtins.list = list = lambda x : "list"
+ builtins.set = set = lambda x : "set"
overridden_outputs = [f() for f in funcs]
finally:
- all, any, tuple = saved
- builtins.all, builtins.any, builtins.tuple = saved
-
- self.assertEqual(overridden_outputs, ['all', 'any', 'tuple'])
+ all, any, tuple, list, set = saved
+ builtins.all, builtins.any, builtins.tuple, builtins.list,
builtins.set = saved
+ self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list',
'set'])
def test_ascii(self):
self.assertEqual(ascii(''), '\'\'')
diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py
index 3e7477487200d0..902bcdd196ec93 100644
--- a/Lib/test/test_dis.py
+++ b/Lib/test/test_dis.py
@@ -827,7 +827,14 @@ def foo(x):
%4d RESUME 0
-%4d LOAD_GLOBAL 1 (list + NULL)
+%4d LOAD_GLOBAL 0 (list)
+ COPY 1
+ LOAD_COMMON_CONSTANT 5 (list)
+ IS_OP 0 (is)
+ POP_JUMP_IF_FALSE 22 (to L3)
+ NOT_TAKEN
+ POP_TOP
+ BUILD_LIST 0
LOAD_FAST_BORROW 0 (x)
BUILD_TUPLE 1
LOAD_CONST %d (<code object <genexpr> at 0x...,
file "%s", line %d>)
@@ -835,6 +842,21 @@ def foo(x):
SET_FUNCTION_ATTRIBUTE 8 (closure)
LOAD_DEREF 1 (y)
CALL 0
+ PUSH_NULL
+ L1: FOR_ITER 3 (to L2)
+ LIST_APPEND 3
+ JUMP_BACKWARD 5 (to L1)
+ L2: END_FOR
+ POP_ITER
+ RETURN_VALUE
+ L3: PUSH_NULL
+ LOAD_FAST_BORROW 0 (x)
+ BUILD_TUPLE 1
+ LOAD_CONST 1 (<code object <genexpr> at 0x...,
file "%s", line %d>)
+ MAKE_FUNCTION
+ SET_FUNCTION_ATTRIBUTE 8 (closure)
+ LOAD_DEREF 1 (y)
+ CALL 0
CALL 1
RETURN_VALUE
""" % (dis_nested_0,
@@ -845,6 +867,8 @@ def foo(x):
1 if __debug__ else 0,
__file__,
_h.__code__.co_firstlineno + 3,
+ __file__,
+ _h.__code__.co_firstlineno + 3,
)
dis_nested_2 = """%s
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-17-10-12-09.gh-issue-142863.ZW2ZF8.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-17-10-12-09.gh-issue-142863.ZW2ZF8.rst
new file mode 100644
index 00000000000000..90c8f32b54156f
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-17-10-12-09.gh-issue-142863.ZW2ZF8.rst
@@ -0,0 +1 @@
+Generate optimized bytecode when calling :class:`list` or :class:`set` with
generator expression.
diff --git a/Python/codegen.c b/Python/codegen.c
index c4109fcaa48dbe..acd8d84bc9ec6d 100644
--- a/Python/codegen.c
+++ b/Python/codegen.c
@@ -3892,6 +3892,12 @@ maybe_optimize_function_call(compiler *c, expr_ty e,
jump_target_label end)
else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "tuple")) {
const_oparg = CONSTANT_BUILTIN_TUPLE;
}
+ else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "list")) {
+ const_oparg = CONSTANT_BUILTIN_LIST;
+ }
+ else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "set")) {
+ const_oparg = CONSTANT_BUILTIN_SET;
+ }
if (const_oparg != -1) {
ADDOP_I(c, loc, COPY, 1); // the function
ADDOP_I(c, loc, LOAD_COMMON_CONSTANT, const_oparg);
@@ -3899,8 +3905,10 @@ maybe_optimize_function_call(compiler *c, expr_ty e,
jump_target_label end)
ADDOP_JUMP(c, loc, POP_JUMP_IF_FALSE, skip_optimization);
ADDOP(c, loc, POP_TOP);
- if (const_oparg == CONSTANT_BUILTIN_TUPLE) {
+ if (const_oparg == CONSTANT_BUILTIN_TUPLE || const_oparg ==
CONSTANT_BUILTIN_LIST) {
ADDOP_I(c, loc, BUILD_LIST, 0);
+ } else if (const_oparg == CONSTANT_BUILTIN_SET) {
+ ADDOP_I(c, loc, BUILD_SET, 0);
}
expr_ty generator_exp = asdl_seq_GET(args, 0);
VISIT(c, expr, generator_exp);
@@ -3911,9 +3919,12 @@ maybe_optimize_function_call(compiler *c, expr_ty e,
jump_target_label end)
ADDOP(c, loc, PUSH_NULL); // Push NULL index for loop
USE_LABEL(c, loop);
ADDOP_JUMP(c, loc, FOR_ITER, cleanup);
- if (const_oparg == CONSTANT_BUILTIN_TUPLE) {
+ if (const_oparg == CONSTANT_BUILTIN_TUPLE || const_oparg ==
CONSTANT_BUILTIN_LIST) {
ADDOP_I(c, loc, LIST_APPEND, 3);
ADDOP_JUMP(c, loc, JUMP, loop);
+ } else if (const_oparg == CONSTANT_BUILTIN_SET) {
+ ADDOP_I(c, loc, SET_ADD, 3);
+ ADDOP_JUMP(c, loc, JUMP, loop);
}
else {
ADDOP(c, loc, TO_BOOL);
@@ -3921,7 +3932,9 @@ maybe_optimize_function_call(compiler *c, expr_ty e,
jump_target_label end)
}
ADDOP(c, NO_LOCATION, POP_ITER);
- if (const_oparg != CONSTANT_BUILTIN_TUPLE) {
+ if (const_oparg != CONSTANT_BUILTIN_TUPLE &&
+ const_oparg != CONSTANT_BUILTIN_LIST &&
+ const_oparg != CONSTANT_BUILTIN_SET) {
ADDOP_LOAD_CONST(c, loc, initial_res == Py_True ? Py_False :
Py_True);
}
ADDOP_JUMP(c, loc, JUMP, end);
@@ -3931,6 +3944,10 @@ maybe_optimize_function_call(compiler *c, expr_ty e,
jump_target_label end)
ADDOP(c, NO_LOCATION, POP_ITER);
if (const_oparg == CONSTANT_BUILTIN_TUPLE) {
ADDOP_I(c, loc, CALL_INTRINSIC_1, INTRINSIC_LIST_TO_TUPLE);
+ } else if (const_oparg == CONSTANT_BUILTIN_LIST) {
+ // result is already a list
+ } else if (const_oparg == CONSTANT_BUILTIN_SET) {
+ // result is already a set
}
else {
ADDOP_LOAD_CONST(c, loc, initial_res);
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index bb663db195c089..16fb43ea191439 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -832,6 +832,8 @@ pycore_init_builtins(PyThreadState *tstate)
interp->common_consts[CONSTANT_BUILTIN_TUPLE] = (PyObject*)&PyTuple_Type;
interp->common_consts[CONSTANT_BUILTIN_ALL] = all;
interp->common_consts[CONSTANT_BUILTIN_ANY] = any;
+ interp->common_consts[CONSTANT_BUILTIN_LIST] = (PyObject*)&PyList_Type;
+ interp->common_consts[CONSTANT_BUILTIN_SET] = (PyObject*)&PySet_Type;
for (int i=0; i < NUM_COMMON_CONSTANTS; i++) {
assert(interp->common_consts[i] != NULL);
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]