Raymond Hettinger <[email protected]> added the comment:
> OTOH on my Mac I still find that 3.10 with PGO is still
> more than twice as slow than 2.7.
> Thinking about it that's a bit odd, since (presumably)
> the majority of the work in sum() involves a long int result
> (even though the values returned by range() all fit in 30 bits,
> the sum quickly exceeds that).
The actual accumulation of a long int result is still as fast as it ever was.
The main difference from Py2.7 isn't the addition, it is that detecting and
extracting a small int added has become expensive.
-- Python 2 fastpath --------------------------------------
if (PyInt_CheckExact(item)) { // Very cheap
long b = PyInt_AS_LONG(item); // Very cheap
long x = i_result + b; // Very cheap
if ((x^i_result) >= 0 || (x^b) >= 0) { // Semi cheap
i_result = x; // Zero cost
Py_DECREF(item); // Most expensive step,
but still cheap
continue;
}
}
-- Python 3 fastpath --------------------------------------
if (PyLong_CheckExact(item) || PyBool_Check(item)) { // Cheap
long b = PyLong_AsLongAndOverflow(item, &overflow); // Super
Expensive
if (overflow == 0 && // Branch
predictable test
(i_result >= 0 ? (b <= LONG_MAX - i_result) // Slower
but better test
: (b >= LONG_MIN - i_result)))
{
i_result += b; // Very
cheap
Py_DECREF(item);
continue;
}
}
-- Supporting function ------------------------------------
long
PyLong_AsLongAndOverflow(PyObject *vv, int *overflow) // OMG,
this does a lot of work
{
/* This version by Tim Peters */
PyLongObject *v;
unsigned long x, prev;
long res;
Py_ssize_t i;
int sign;
int do_decref = 0; /* if PyNumber_Index was called */
*overflow = 0;
if (vv == NULL) {
PyErr_BadInternalCall();
return -1;
}
if (PyLong_Check(vv)) {
v = (PyLongObject *)vv;
}
else {
v = (PyLongObject *)_PyNumber_Index(vv);
if (v == NULL)
return -1;
do_decref = 1;
}
res = -1;
i = Py_SIZE(v);
switch (i) {
case -1:
res = -(sdigit)v->ob_digit[0];
break;
case 0:
res = 0;
break;
case 1:
res = v->ob_digit[0];
break;
default:
sign = 1;
x = 0;
if (i < 0) {
sign = -1;
i = -(i);
}
while (--i >= 0) {
prev = x;
x = (x << PyLong_SHIFT) | v->ob_digit[i];
if ((x >> PyLong_SHIFT) != prev) {
*overflow = sign;
goto exit;
}
}
/* Haven't lost any bits, but casting to long requires extra
* care (see comment above).
*/
if (x <= (unsigned long)LONG_MAX) {
res = (long)x * sign;
}
else if (sign < 0 && x == PY_ABS_LONG_MIN) {
res = LONG_MIN;
}
else {
*overflow = sign;
/* res is already set to -1 */
}
}
exit:
if (do_decref) {
Py_DECREF(v);
}
return res;
}
----------
_______________________________________
Python tracker <[email protected]>
<https://bugs.python.org/issue24076>
_______________________________________
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com