https://github.com/python/cpython/commit/3618240624d98de2a68a79dc174d49efb96cc2e6
commit: 3618240624d98de2a68a79dc174d49efb96cc2e6
branch: main
author: Yan Yanchii <yyanc...@gmail.com>
committer: iritkatriel <1055913+iritkatr...@users.noreply.github.com>
date: 2025-03-12T21:45:54Z
summary:

gh-126835: Avoid creating unnecessary tuple when looking for constant sequence 
during constant folding (#131054)

files:
M Include/internal/pycore_compile.h
M Python/codegen.c
M Python/flowgraph.c

diff --git a/Include/internal/pycore_compile.h 
b/Include/internal/pycore_compile.h
index 9f0ca33892a43b..b61ac8fccdca14 100644
--- a/Include/internal/pycore_compile.h
+++ b/Include/internal/pycore_compile.h
@@ -14,6 +14,15 @@ extern "C" {
 #include "pycore_symtable.h"  // _Py_SourceLocation
 #include "pycore_instruction_sequence.h"
 
+/* A soft limit for stack use, to avoid excessive
+ * memory use for large constants, etc.
+ *
+ * The value 30 is plucked out of thin air.
+ * Code that could use more stack than this is
+ * rare, so the exact value is unimportant.
+ */
+#define _PY_STACK_USE_GUIDELINE 30
+
 struct _arena;   // Type defined in pycore_pyarena.h
 struct _mod;     // Type defined in pycore_ast.h
 
diff --git a/Python/codegen.c b/Python/codegen.c
index 908711250d14bc..6ef3c63b33530e 100644
--- a/Python/codegen.c
+++ b/Python/codegen.c
@@ -37,15 +37,6 @@
 #define COMP_SETCOMP  2
 #define COMP_DICTCOMP 3
 
-/* A soft limit for stack use, to avoid excessive
- * memory use for large constants, etc.
- *
- * The value 30 is plucked out of thin air.
- * Code that could use more stack than this is
- * rare, so the exact value is unimportant.
- */
-#define STACK_USE_GUIDELINE 30
-
 #undef SUCCESS
 #undef ERROR
 #define SUCCESS 0
@@ -3209,7 +3200,7 @@ starunpack_helper_impl(compiler *c, location loc,
                        int build, int add, int extend, int tuple)
 {
     Py_ssize_t n = asdl_seq_LEN(elts);
-    int big = n + pushed + (injected_arg ? 1 : 0) > STACK_USE_GUIDELINE;
+    int big = n + pushed + (injected_arg ? 1 : 0) > _PY_STACK_USE_GUIDELINE;
     int seen_star = 0;
     for (Py_ssize_t i = 0; i < n; i++) {
         expr_ty elt = asdl_seq_GET(elts, i);
@@ -3364,7 +3355,7 @@ static int
 codegen_subdict(compiler *c, expr_ty e, Py_ssize_t begin, Py_ssize_t end)
 {
     Py_ssize_t i, n = end - begin;
-    int big = n*2 > STACK_USE_GUIDELINE;
+    int big = n*2 > _PY_STACK_USE_GUIDELINE;
     location loc = LOC(e);
     if (big) {
         ADDOP_I(c, loc, BUILD_MAP, 0);
@@ -3411,7 +3402,7 @@ codegen_dict(compiler *c, expr_ty e)
             ADDOP_I(c, loc, DICT_UPDATE, 1);
         }
         else {
-            if (elements*2 > STACK_USE_GUIDELINE) {
+            if (elements*2 > _PY_STACK_USE_GUIDELINE) {
                 RETURN_IF_ERROR(codegen_subdict(c, e, i - elements, i + 1));
                 if (have_dict) {
                     ADDOP_I(c, loc, DICT_UPDATE, 1);
@@ -3752,7 +3743,7 @@ maybe_optimize_method_call(compiler *c, expr_ty e)
     /* Check that there aren't too many arguments */
     argsl = asdl_seq_LEN(args);
     kwdsl = asdl_seq_LEN(kwds);
-    if (argsl + kwdsl + (kwdsl != 0) >= STACK_USE_GUIDELINE) {
+    if (argsl + kwdsl + (kwdsl != 0) >= _PY_STACK_USE_GUIDELINE) {
         return 0;
     }
     /* Check that there are no *varargs types of arguments. */
@@ -3849,7 +3840,7 @@ codegen_joined_str(compiler *c, expr_ty e)
 {
     location loc = LOC(e);
     Py_ssize_t value_count = asdl_seq_LEN(e->v.JoinedStr.values);
-    if (value_count > STACK_USE_GUIDELINE) {
+    if (value_count > _PY_STACK_USE_GUIDELINE) {
         _Py_DECLARE_STR(empty, "");
         ADDOP_LOAD_CONST_NEW(c, loc, Py_NewRef(&_Py_STR(empty)));
         ADDOP_NAME(c, loc, LOAD_METHOD, &_Py_ID(join), names);
@@ -3928,7 +3919,7 @@ codegen_subkwargs(compiler *c, location loc,
     Py_ssize_t i, n = end - begin;
     keyword_ty kw;
     assert(n > 0);
-    int big = n*2 > STACK_USE_GUIDELINE;
+    int big = n*2 > _PY_STACK_USE_GUIDELINE;
     if (big) {
         ADDOP_I(c, NO_LOCATION, BUILD_MAP, 0);
     }
@@ -3981,7 +3972,7 @@ codegen_call_helper_impl(compiler *c, location loc,
     nelts = asdl_seq_LEN(args);
     nkwelts = asdl_seq_LEN(keywords);
 
-    if (nelts + nkwelts*2 > STACK_USE_GUIDELINE) {
+    if (nelts + nkwelts*2 > _PY_STACK_USE_GUIDELINE) {
          goto ex_call;
     }
     for (i = 0; i < nelts; i++) {
diff --git a/Python/flowgraph.c b/Python/flowgraph.c
index 286f8999902e32..fdafafd76617a8 100644
--- a/Python/flowgraph.c
+++ b/Python/flowgraph.c
@@ -1344,64 +1344,45 @@ add_const(PyObject *newconst, PyObject *consts, 
PyObject *const_cache)
 }
 
 /*
-   Walk basic block backwards starting from "start" trying to collect "size" 
number of
-   subsequent constants from instructions loading constants into new tuple 
ignoring NOP's in between.
+  Traverse the instructions of the basic block backwards from index "start", 
skipping over NOPs.
+  Try to collect "size" number of consecutive instructions that load constants 
into the array "instrs".
+  Caller must make sure that length of "instrs" is sufficient to fit in at 
least "size" instructions.
 
-   Returns ERROR on error and sets "seq" to NULL.
-   Returns SUCCESS on success and sets "seq" to NULL if failed to collect 
requested number of constants.
-   Returns SUCCESS on success and sets "seq" to resulting tuple if succeeded 
to collect requested number of constants.
+  Return boolean indicating whether "size" such instructions were found.
 */
-static int
-get_constant_sequence(basicblock *bb, int start, int size,
-                      PyObject *consts, PyObject **seq)
+static bool
+get_const_loading_instrs(basicblock *bb, int start, cfg_instr **instrs, int 
size)
 {
     assert(start < bb->b_iused);
-    *seq = NULL;
-    PyObject *res = PyTuple_New((Py_ssize_t)size);
-    if (res == NULL) {
-        return ERROR;
-    }
+    assert(size >= 0);
+    assert(size <= _PY_STACK_USE_GUIDELINE);
+
     for (; start >= 0 && size > 0; start--) {
         cfg_instr *instr = &bb->b_instr[start];
         if (instr->i_opcode == NOP) {
             continue;
         }
         if (!loads_const(instr->i_opcode)) {
-            break;
-        }
-        PyObject *constant = get_const_value(instr->i_opcode, instr->i_oparg, 
consts);
-        if (constant == NULL) {
-            Py_DECREF(res);
-            return ERROR;
+            return false;
         }
-        PyTuple_SET_ITEM(res, --size, constant);
+        instrs[--size] = instr;
     }
-    if (size > 0) {
-        Py_DECREF(res);
-    }
-    else {
-        *seq = res;
-    }
-    return SUCCESS;
+
+    return size == 0;
 }
 
 /*
-  Walk basic block backwards starting from "start" and change "count" number of
-  non-NOP instructions to NOP's and set their location to NO_LOCATION.
+  Change every instruction in "instrs" NOP and set its location to NO_LOCATION.
+  Caller must make sure "instrs" has at least "size" elements.
 */
 static void
-nop_out(basicblock *bb, int start, int count)
+nop_out(cfg_instr **instrs, int size)
 {
-    assert(start < bb->b_iused);
-    for (; count > 0; start--) {
-        assert(start >= 0);
-        cfg_instr *instr = &bb->b_instr[start];
-        if (instr->i_opcode == NOP) {
-            continue;
-        }
+    for (int i = 0; i < size; i++) {
+        cfg_instr *instr = instrs[i];
+        assert(instr->i_opcode != NOP);
         INSTR_SET_OP0(instr, NOP);
         INSTR_SET_LOC(instr, NO_LOCATION);
-        count--;
     }
 }
 
@@ -1437,19 +1418,39 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject 
*consts, PyObject *const
     /* Pre-conditions */
     assert(PyDict_CheckExact(const_cache));
     assert(PyList_CheckExact(consts));
+
     cfg_instr *instr = &bb->b_instr[i];
     assert(instr->i_opcode == BUILD_TUPLE);
+
     int seq_size = instr->i_oparg;
-    PyObject *newconst;
-    RETURN_IF_ERROR(get_constant_sequence(bb, i-1, seq_size, consts, 
&newconst));
-    if (newconst == NULL) {
+    if (seq_size > _PY_STACK_USE_GUIDELINE) {
+        return SUCCESS;
+    }
+
+    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
+    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {
         /* not a const sequence */
         return SUCCESS;
     }
-    assert(PyTuple_Size(newconst) == seq_size);
-    RETURN_IF_ERROR(instr_make_load_const(instr, newconst, consts, 
const_cache));
-    nop_out(bb, i-1, seq_size);
-    return SUCCESS;
+
+    PyObject *const_tuple = PyTuple_New((Py_ssize_t)seq_size);
+    if (const_tuple == NULL) {
+        return ERROR;
+    }
+
+    for (int i = 0; i < seq_size; i++) {
+        cfg_instr *inst = const_instrs[i];
+        assert(loads_const(inst->i_opcode));
+        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, 
consts);
+        if (element == NULL) {
+            Py_DECREF(const_tuple);
+            return ERROR;
+        }
+        PyTuple_SET_ITEM(const_tuple, i, element);
+    }
+
+    nop_out(const_instrs, seq_size);
+    return instr_make_load_const(instr, const_tuple, consts, const_cache);
 }
 
 #define MIN_CONST_SEQUENCE_SIZE 3
@@ -1470,34 +1471,56 @@ optimize_lists_and_sets(basicblock *bb, int i, int 
nextop,
 {
     assert(PyDict_CheckExact(const_cache));
     assert(PyList_CheckExact(consts));
+
     cfg_instr *instr = &bb->b_instr[i];
     assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);
+
     bool contains_or_iter = nextop == GET_ITER || nextop == CONTAINS_OP;
     int seq_size = instr->i_oparg;
-    if (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter) {
+    if (seq_size > _PY_STACK_USE_GUIDELINE ||
+        (seq_size < MIN_CONST_SEQUENCE_SIZE && !contains_or_iter))
+    {
         return SUCCESS;
     }
-    PyObject *newconst;
-    RETURN_IF_ERROR(get_constant_sequence(bb, i-1, seq_size, consts, 
&newconst));
-    if (newconst == NULL) {    /* not a const sequence */
+
+    cfg_instr *const_instrs[_PY_STACK_USE_GUIDELINE];
+    if (!get_const_loading_instrs(bb, i-1, const_instrs, seq_size)) {  /* not 
a const sequence */
         if (contains_or_iter && instr->i_opcode == BUILD_LIST) {
             /* iterate over a tuple instead of list */
             INSTR_SET_OP1(instr, BUILD_TUPLE, instr->i_oparg);
         }
         return SUCCESS;
     }
-    assert(PyTuple_Size(newconst) == seq_size);
+
+    PyObject *const_result = PyTuple_New((Py_ssize_t)seq_size);
+    if (const_result == NULL) {
+        return ERROR;
+    }
+
+    for (int i = 0; i < seq_size; i++) {
+        cfg_instr *inst = const_instrs[i];
+        assert(loads_const(inst->i_opcode));
+        PyObject *element = get_const_value(inst->i_opcode, inst->i_oparg, 
consts);
+        if (element == NULL) {
+            Py_DECREF(const_result);
+            return ERROR;
+        }
+        PyTuple_SET_ITEM(const_result, i, element);
+    }
+
     if (instr->i_opcode == BUILD_SET) {
-        PyObject *frozenset = PyFrozenSet_New(newconst);
+        PyObject *frozenset = PyFrozenSet_New(const_result);
         if (frozenset == NULL) {
-            Py_DECREF(newconst);
+            Py_DECREF(const_result);
             return ERROR;
         }
-        Py_SETREF(newconst, frozenset);
+        Py_SETREF(const_result, frozenset);
     }
-    int index = add_const(newconst, consts, const_cache);
+
+    int index = add_const(const_result, consts, const_cache);
     RETURN_IF_ERROR(index);
-    nop_out(bb, i-1, seq_size);
+    nop_out(const_instrs, seq_size);
+
     if (contains_or_iter) {
         INSTR_SET_OP1(instr, LOAD_CONST, index);
     }
@@ -1696,19 +1719,34 @@ fold_const_binop(basicblock *bb, int i, PyObject 
*consts, PyObject *const_cache)
     #define BINOP_OPERAND_COUNT 2
     assert(PyDict_CheckExact(const_cache));
     assert(PyList_CheckExact(consts));
+
     cfg_instr *binop = &bb->b_instr[i];
     assert(binop->i_opcode == BINARY_OP);
-    PyObject *pair;
-    RETURN_IF_ERROR(get_constant_sequence(bb, i-1, BINOP_OPERAND_COUNT, 
consts, &pair));
-    if (pair == NULL) {
+
+    cfg_instr *operands_instrs[BINOP_OPERAND_COUNT];
+    if (!get_const_loading_instrs(bb, i-1, operands_instrs, 
BINOP_OPERAND_COUNT)) {
         /* not a const sequence */
         return SUCCESS;
     }
-    assert(PyTuple_Size(pair) == BINOP_OPERAND_COUNT);
-    PyObject *left = PyTuple_GET_ITEM(pair, 0);
-    PyObject *right = PyTuple_GET_ITEM(pair, 1);
-    PyObject *newconst = eval_const_binop(left, binop->i_oparg, right);
-    Py_DECREF(pair);
+
+    cfg_instr *lhs_instr = operands_instrs[0];
+    assert(loads_const(lhs_instr->i_opcode));
+    PyObject *lhs = get_const_value(lhs_instr->i_opcode, lhs_instr->i_oparg, 
consts);
+    if (lhs == NULL) {
+        return ERROR;
+    }
+
+    cfg_instr *rhs_instr = operands_instrs[1];
+    assert(loads_const(rhs_instr->i_opcode));
+    PyObject *rhs = get_const_value(rhs_instr->i_opcode, rhs_instr->i_oparg, 
consts);
+    if (rhs == NULL) {
+        Py_DECREF(lhs);
+        return ERROR;
+    }
+
+    PyObject *newconst = eval_const_binop(lhs, binop->i_oparg, rhs);
+    Py_DECREF(lhs);
+    Py_DECREF(rhs);
     if (newconst == NULL) {
         if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
             return ERROR;
@@ -1716,9 +1754,9 @@ fold_const_binop(basicblock *bb, int i, PyObject *consts, 
PyObject *const_cache)
         PyErr_Clear();
         return SUCCESS;
     }
-    RETURN_IF_ERROR(instr_make_load_const(binop, newconst, consts, 
const_cache));
-    nop_out(bb, i-1, BINOP_OPERAND_COUNT);
-    return SUCCESS;
+
+    nop_out(operands_instrs, BINOP_OPERAND_COUNT);
+    return instr_make_load_const(binop, newconst, consts, const_cache);
 }
 
 static PyObject *
@@ -1765,17 +1803,26 @@ fold_const_unaryop(basicblock *bb, int i, PyObject 
*consts, PyObject *const_cach
     #define UNARYOP_OPERAND_COUNT 1
     assert(PyDict_CheckExact(const_cache));
     assert(PyList_CheckExact(consts));
-    cfg_instr *instr = &bb->b_instr[i];
-    PyObject *seq;
-    RETURN_IF_ERROR(get_constant_sequence(bb, i-1, UNARYOP_OPERAND_COUNT, 
consts, &seq));
-    if (seq == NULL) {
+    cfg_instr *unaryop = &bb->b_instr[i];
+
+    cfg_instr *operand_instr;
+    if (!get_const_loading_instrs(bb, i-1, &operand_instr, 
UNARYOP_OPERAND_COUNT)) {
         /* not a const */
         return SUCCESS;
     }
-    assert(PyTuple_Size(seq) == UNARYOP_OPERAND_COUNT);
-    PyObject *operand = PyTuple_GET_ITEM(seq, 0);
-    PyObject *newconst = eval_const_unaryop(operand, instr->i_opcode, 
instr->i_oparg);
-    Py_DECREF(seq);
+
+    assert(loads_const(operand_instr->i_opcode));
+    PyObject *operand = get_const_value(
+        operand_instr->i_opcode,
+        operand_instr->i_oparg,
+        consts
+    );
+    if (operand == NULL) {
+        return ERROR;
+    }
+
+    PyObject *newconst = eval_const_unaryop(operand, unaryop->i_opcode, 
unaryop->i_oparg);
+    Py_DECREF(operand);
     if (newconst == NULL) {
         if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
             return ERROR;
@@ -1783,12 +1830,12 @@ fold_const_unaryop(basicblock *bb, int i, PyObject 
*consts, PyObject *const_cach
         PyErr_Clear();
         return SUCCESS;
     }
-    if (instr->i_opcode == UNARY_NOT) {
+
+    if (unaryop->i_opcode == UNARY_NOT) {
         assert(PyBool_Check(newconst));
     }
-    RETURN_IF_ERROR(instr_make_load_const(instr, newconst, consts, 
const_cache));
-    nop_out(bb, i-1, UNARYOP_OPERAND_COUNT);
-    return SUCCESS;
+    nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
+    return instr_make_load_const(unaryop, newconst, consts, const_cache);
 }
 
 #define VISITED (-1)

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to