https://github.com/python/cpython/commit/cbf3d38cbeb6e640d5959549169ec45cdedc1a71
commit: cbf3d38cbeb6e640d5959549169ec45cdedc1a71
branch: main
author: Mark Shannon <[email protected]>
committer: markshannon <[email protected]>
date: 2024-03-05T11:23:46Z
summary:

GH-115685: Optimize `TO_BOOL` and variants based on truthiness of input. 
(GH-116311)

files:
M Include/internal/pycore_optimizer.h
M Python/optimizer_analysis.c
M Python/optimizer_bytecodes.c
M Python/optimizer_cases.c.h
M Python/optimizer_symbols.c

diff --git a/Include/internal/pycore_optimizer.h 
b/Include/internal/pycore_optimizer.h
index d32e6c0174f680..7c977728a95024 100644
--- a/Include/internal/pycore_optimizer.h
+++ b/Include/internal/pycore_optimizer.h
@@ -96,6 +96,7 @@ extern bool _Py_uop_sym_set_non_null(_Py_UopsSymbol *sym);
 extern bool _Py_uop_sym_set_type(_Py_UopsSymbol *sym, PyTypeObject *typ);
 extern bool _Py_uop_sym_set_const(_Py_UopsSymbol *sym, PyObject *const_val);
 extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym);
+extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym);
 
 
 extern int _Py_uop_abstractcontext_init(_Py_UOpsContext *ctx);
diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c
index a326e2249bb4de..1e1d5529ee17d7 100644
--- a/Python/optimizer_analysis.c
+++ b/Python/optimizer_analysis.c
@@ -298,9 +298,31 @@ remove_globals(_PyInterpreterFrame *frame, 
_PyUOpInstruction *buffer,
 #define sym_set_type _Py_uop_sym_set_type
 #define sym_set_const _Py_uop_sym_set_const
 #define sym_is_bottom _Py_uop_sym_is_bottom
+#define sym_truthiness _Py_uop_sym_truthiness
 #define frame_new _Py_uop_frame_new
 #define frame_pop _Py_uop_frame_pop
 
+static int
+optimize_to_bool(
+    _PyUOpInstruction *this_instr,
+    _Py_UOpsContext *ctx,
+    _Py_UopsSymbol *value,
+    _Py_UopsSymbol **result_ptr)
+{
+    if (sym_matches_type(value, &PyBool_Type)) {
+        REPLACE_OP(this_instr, _NOP, 0, 0);
+        *result_ptr = value;
+        return 1;
+    }
+    int truthiness = sym_truthiness(value);
+    if (truthiness >= 0) {
+        PyObject *load = truthiness ? Py_True : Py_False;
+        REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, 
(uintptr_t)load);
+        *result_ptr = sym_new_const(ctx, load);
+        return 1;
+    }
+    return 0;
+}
 
 /* 1 for success, 0 for not ready, cannot error at the moment. */
 static int
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
index 786d884fc5a1a8..2cf54270e4ad35 100644
--- a/Python/optimizer_bytecodes.c
+++ b/Python/optimizer_bytecodes.c
@@ -29,6 +29,14 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
 #define frame_new _Py_uop_frame_new
 #define frame_pop _Py_uop_frame_pop
 
+extern int
+optimize_to_bool(
+    _PyUOpInstruction *this_instr,
+    _Py_UOpsContext *ctx,
+    _Py_UopsSymbol *value,
+    _Py_UopsSymbol **result_ptr);
+
+
 static int
 dummy_func(void) {
 
@@ -271,63 +279,72 @@ dummy_func(void) {
     }
 
     op(_TO_BOOL, (value -- res)) {
-        (void)value;
-        res = sym_new_type(ctx, &PyBool_Type);
-        OUT_OF_SPACE_IF_NULL(res);
+        if (optimize_to_bool(this_instr, ctx, value, &res)) {
+            OUT_OF_SPACE_IF_NULL(res);
+        }
+        else {
+            res = sym_new_type(ctx, &PyBool_Type);
+            OUT_OF_SPACE_IF_NULL(res);
+        }
     }
 
-    op(_TO_BOOL_BOOL, (value -- value)) {
-        if (sym_matches_type(value, &PyBool_Type)) {
-            REPLACE_OP(this_instr, _NOP, 0, 0);
+    op(_TO_BOOL_BOOL, (value -- res)) {
+        if (optimize_to_bool(this_instr, ctx, value, &res)) {
+            OUT_OF_SPACE_IF_NULL(res);
         }
         else {
             if(!sym_set_type(value, &PyBool_Type)) {
                 goto hit_bottom;
             }
+            res = value;
         }
     }
 
     op(_TO_BOOL_INT, (value -- res)) {
-        if (sym_is_const(value) && sym_matches_type(value, &PyLong_Type)) {
-            PyObject *load = _PyLong_IsZero((PyLongObject 
*)sym_get_const(value))
-                             ? Py_False : Py_True;
-            REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, 
(uintptr_t)load);
-            OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, load));
+        if (optimize_to_bool(this_instr, ctx, value, &res)) {
+            OUT_OF_SPACE_IF_NULL(res);
         }
         else {
+            if(!sym_set_type(value, &PyLong_Type)) {
+                goto hit_bottom;
+            }
             OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
         }
-        if(!sym_set_type(value, &PyLong_Type)) {
-            goto hit_bottom;
-        }
     }
 
     op(_TO_BOOL_LIST, (value -- res)) {
-        if(!sym_set_type(value, &PyList_Type)) {
-            goto hit_bottom;
+        if (optimize_to_bool(this_instr, ctx, value, &res)) {
+            OUT_OF_SPACE_IF_NULL(res);
+        }
+        else {
+            if(!sym_set_type(value, &PyList_Type)) {
+                goto hit_bottom;
+            }
+            OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
         }
-        OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
     }
 
     op(_TO_BOOL_NONE, (value -- res)) {
-        if (sym_get_const(value) == Py_None) {
-            REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, 
(uintptr_t)Py_False);
+        if (optimize_to_bool(this_instr, ctx, value, &res)) {
+            OUT_OF_SPACE_IF_NULL(res);
+        }
+        else {
+            if (!sym_set_const(value, Py_None)) {
+                goto hit_bottom;
+            }
+            OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, Py_False));
         }
-        sym_set_const(value, Py_None);
-        OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, Py_False));
     }
 
     op(_TO_BOOL_STR, (value -- res)) {
-        if (sym_is_const(value) && sym_matches_type(value, &PyUnicode_Type)) {
-            PyObject *load = sym_get_const(value) == &_Py_STR(empty) ? 
Py_False : Py_True;
-            REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, 
(uintptr_t)load);
-            OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, load));
+        if (optimize_to_bool(this_instr, ctx, value, &res)) {
+            OUT_OF_SPACE_IF_NULL(res);
         }
         else {
             OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
-        }
-        if(!sym_set_type(value, &PyUnicode_Type)) {
-            goto hit_bottom;
+            if(!sym_set_type(value, &PyUnicode_Type)) {
+                goto hit_bottom;
+            }
         }
     }
 
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index 6d3488f2118589..f2c186a0ae1380 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -107,24 +107,31 @@
             _Py_UopsSymbol *value;
             _Py_UopsSymbol *res;
             value = stack_pointer[-1];
-            (void)value;
-            res = sym_new_type(ctx, &PyBool_Type);
-            OUT_OF_SPACE_IF_NULL(res);
+            if (optimize_to_bool(this_instr, ctx, value, &res)) {
+                OUT_OF_SPACE_IF_NULL(res);
+            }
+            else {
+                res = sym_new_type(ctx, &PyBool_Type);
+                OUT_OF_SPACE_IF_NULL(res);
+            }
             stack_pointer[-1] = res;
             break;
         }
 
         case _TO_BOOL_BOOL: {
             _Py_UopsSymbol *value;
+            _Py_UopsSymbol *res;
             value = stack_pointer[-1];
-            if (sym_matches_type(value, &PyBool_Type)) {
-                REPLACE_OP(this_instr, _NOP, 0, 0);
+            if (optimize_to_bool(this_instr, ctx, value, &res)) {
+                OUT_OF_SPACE_IF_NULL(res);
             }
             else {
                 if(!sym_set_type(value, &PyBool_Type)) {
                     goto hit_bottom;
                 }
+                res = value;
             }
+            stack_pointer[-1] = res;
             break;
         }
 
@@ -132,18 +139,15 @@
             _Py_UopsSymbol *value;
             _Py_UopsSymbol *res;
             value = stack_pointer[-1];
-            if (sym_is_const(value) && sym_matches_type(value, &PyLong_Type)) {
-                PyObject *load = _PyLong_IsZero((PyLongObject 
*)sym_get_const(value))
-                ? Py_False : Py_True;
-                REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, 
(uintptr_t)load);
-                OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, load));
+            if (optimize_to_bool(this_instr, ctx, value, &res)) {
+                OUT_OF_SPACE_IF_NULL(res);
             }
             else {
+                if(!sym_set_type(value, &PyLong_Type)) {
+                    goto hit_bottom;
+                }
                 OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
             }
-            if(!sym_set_type(value, &PyLong_Type)) {
-                goto hit_bottom;
-            }
             stack_pointer[-1] = res;
             break;
         }
@@ -152,10 +156,15 @@
             _Py_UopsSymbol *value;
             _Py_UopsSymbol *res;
             value = stack_pointer[-1];
-            if(!sym_set_type(value, &PyList_Type)) {
-                goto hit_bottom;
+            if (optimize_to_bool(this_instr, ctx, value, &res)) {
+                OUT_OF_SPACE_IF_NULL(res);
+            }
+            else {
+                if(!sym_set_type(value, &PyList_Type)) {
+                    goto hit_bottom;
+                }
+                OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
             }
-            OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
             stack_pointer[-1] = res;
             break;
         }
@@ -164,11 +173,15 @@
             _Py_UopsSymbol *value;
             _Py_UopsSymbol *res;
             value = stack_pointer[-1];
-            if (sym_get_const(value) == Py_None) {
-                REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, 
(uintptr_t)Py_False);
+            if (optimize_to_bool(this_instr, ctx, value, &res)) {
+                OUT_OF_SPACE_IF_NULL(res);
+            }
+            else {
+                if (!sym_set_const(value, Py_None)) {
+                    goto hit_bottom;
+                }
+                OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, Py_False));
             }
-            sym_set_const(value, Py_None);
-            OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, Py_False));
             stack_pointer[-1] = res;
             break;
         }
@@ -177,16 +190,14 @@
             _Py_UopsSymbol *value;
             _Py_UopsSymbol *res;
             value = stack_pointer[-1];
-            if (sym_is_const(value) && sym_matches_type(value, 
&PyUnicode_Type)) {
-                PyObject *load = sym_get_const(value) == &_Py_STR(empty) ? 
Py_False : Py_True;
-                REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, 
(uintptr_t)load);
-                OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, load));
+            if (optimize_to_bool(this_instr, ctx, value, &res)) {
+                OUT_OF_SPACE_IF_NULL(res);
             }
             else {
                 OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
-            }
-            if(!sym_set_type(value, &PyUnicode_Type)) {
-                goto hit_bottom;
+                if(!sym_set_type(value, &PyUnicode_Type)) {
+                    goto hit_bottom;
+                }
             }
             stack_pointer[-1] = res;
             break;
diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c
index 5c3ec2b5ed1a4c..29fe31a0e9b94c 100644
--- a/Python/optimizer_symbols.c
+++ b/Python/optimizer_symbols.c
@@ -4,6 +4,7 @@
 #include "cpython/optimizer.h"
 #include "pycore_code.h"
 #include "pycore_frame.h"
+#include "pycore_long.h"
 #include "pycore_optimizer.h"
 
 #include <stdbool.h>
@@ -240,6 +241,40 @@ _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject 
*typ)
     return sym->typ == typ;
 }
 
+int
+_Py_uop_sym_truthiness(_Py_UopsSymbol *sym)
+{
+    /* There are some non-constant values for
+     * which `bool(val)` always evaluates to
+     * True or False, such as tuples with known
+     * length, but unknown contents, or bound-methods.
+     * This function will need updating
+     * should we support those values.
+     */
+    if (_Py_uop_sym_is_bottom(sym)) {
+        return -1;
+    }
+    if (!_Py_uop_sym_is_const(sym)) {
+        return -1;
+    }
+    PyObject *value = _Py_uop_sym_get_const(sym);
+    if (value == Py_None) {
+        return 0;
+    }
+    /* Only handle a few known safe types */
+    PyTypeObject *tp = Py_TYPE(value);
+    if (tp == &PyLong_Type) {
+        return !_PyLong_IsZero((PyLongObject *)value);
+    }
+    if (tp == &PyUnicode_Type) {
+        return value != &_Py_STR(empty);
+    }
+    if (tp == &PyBool_Type) {
+        return value == Py_True;
+    }
+    return -1;
+}
+
 // 0 on success, -1 on error.
 _Py_UOpsAbstractFrame *
 _Py_uop_frame_new(
@@ -413,6 +448,7 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject 
*Py_UNUSED(ignored))
         goto fail;
     }
     _Py_uop_sym_set_const(sym, val_42);
+    TEST_PREDICATE(_Py_uop_sym_truthiness(sym) == 1, "bool(42) is not True");
     TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "42 is NULL");
     TEST_PREDICATE(_Py_uop_sym_is_not_null(sym), "42 isn't not NULL");
     TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "42 isn't an 
int");
@@ -436,6 +472,14 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject 
*Py_UNUSED(ignored))
     _Py_uop_sym_set_const(sym, val_43);  // Should make it bottom
     TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "(42 and 43) isn't bottom");
 
+
+    sym = _Py_uop_sym_new_const(ctx, Py_None);
+    TEST_PREDICATE(_Py_uop_sym_truthiness(sym) == 0, "bool(None) is not 
False");
+    sym = _Py_uop_sym_new_const(ctx, Py_False);
+    TEST_PREDICATE(_Py_uop_sym_truthiness(sym) == 0, "bool(False) is not 
False");
+    sym = _Py_uop_sym_new_const(ctx, PyLong_FromLong(0));
+    TEST_PREDICATE(_Py_uop_sym_truthiness(sym) == 0, "bool(0) is not False");
+
     _Py_uop_abstractcontext_fini(ctx);
     Py_DECREF(val_42);
     Py_DECREF(val_43);

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to