[issue14339] Optimizing bin, oct and hex

Serhiy Storchaka Sun, 15 Apr 2012 13:32:17 -0700

Serhiy Storchaka <storch...@gmail.com> added the comment:

> (1) The patch appears to assume that a Unicode string created with 
> PyUnicode_New(size, 127) will have 'kind' PyUnicode_1BYTE_KIND.  While this 
> might be true in the current implementation, I don't know whether this is 
> guaranteed in general.  Martin, any comments on this?


The same assumption is used above in long_to_decimal_string(). I added
the same assert and changed maxchar to 'f'.

> (2) The patch doesn't compile with '--with-pydebug':  there's a reference to 
> the (now) undefined variable 'buffer' in one of the asserts.

Fixed.

> (3) The overflow check looks as though it needs to be reworked.

I was left an old constraint (it is enough), but it really can be
weakened (in fact, it can be so weakened in the current code).

Thank you for the comments and the found error.

----------
Added file: http://bugs.python.org/file25227/long_to_binary_base_2.patch

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue14339>
_______________________________________

diff -r 13307eb5bf47 Objects/longobject.c
--- a/Objects/longobject.c      Sat Apr 14 14:20:29 2012 -0500
+++ b/Objects/longobject.c      Sun Apr 15 23:29:34 2012 +0300
@@ -1672,11 +1672,10 @@
 {
     register PyLongObject *a = (PyLongObject *)aa;
     PyObject *v;
-    Py_ssize_t i, sz;
+    Py_ssize_t sz;
     Py_ssize_t size_a;
-    char *p;
-    char sign = '\0';
-    char *buffer;
+    Py_UCS1 *p;
+    int negative;
     int bits;
 
     assert(base == 2 || base == 8 || base == 10 || base == 16);
@@ -1688,6 +1687,7 @@
         return NULL;
     }
     size_a = ABS(Py_SIZE(a));
+    negative = Py_SIZE(a) < 0;
 
     /* Compute a rough upper bound for the length of the string */
     switch (base) {
@@ -1706,31 +1706,37 @@
     }
     /* compute length of output string: allow 2 characters for prefix and
        1 for possible '-' sign. */
-    if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT / sizeof(Py_UCS4)) {
+    if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT) {
         PyErr_SetString(PyExc_OverflowError,
                         "int is too large to format");
         return NULL;
     }
     /* now size_a * PyLong_SHIFT + 3 <= PY_SSIZE_T_MAX, so the RHS below
        is safe from overflow */
-    sz = 3 + (size_a * PyLong_SHIFT + (bits - 1)) / bits;
-    assert(sz >= 0);
-    buffer = PyMem_Malloc(sz);
-    if (buffer == NULL) {
+    if (size_a == 0) {
+        sz = 3;
+    }
+    else {
+        sz = 2 + negative + ((size_a - 1) * PyLong_SHIFT +
+                             bits_in_digit(a->ob_digit[size_a - 1]) +
+                             (bits - 1)) / bits;
+    }
+    v = PyUnicode_New(sz, 'f');
+    if (v == NULL) {
         PyErr_NoMemory();
         return NULL;
     }
-    p = &buffer[sz];
-    if (Py_SIZE(a) < 0)
-        sign = '-';
-
-    if (Py_SIZE(a) == 0) {
+    assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
+    p = PyUnicode_1BYTE_DATA(v) + sz;
+
+    if (size_a == 0) {
         *--p = '0';
     }
     else {
         /* JRH: special case for power-of-2 bases */
         twodigits accum = 0;
         int accumbits = 0;              /* # of bits in accum */
+        Py_ssize_t i;
         for (i = 0; i < size_a; ++i) {
             accum |= (twodigits)a->ob_digit[i] << accumbits;
             accumbits += PyLong_SHIFT;
@@ -1739,7 +1745,7 @@
                 char cdigit;
                 cdigit = (char)(accum & (base - 1));
                 cdigit += (cdigit < 10) ? '0' : 'a'-10;
-                assert(p > buffer);
+                assert(p > PyUnicode_1BYTE_DATA(v));
                 *--p = cdigit;
                 accumbits -= bits;
                 accum >>= bits;
@@ -1747,6 +1753,7 @@
         }
     }
 
+    assert(p == PyUnicode_1BYTE_DATA(v) + 2 + negative);
     if (base == 16)
         *--p = 'x';
     else if (base == 8)
@@ -1754,10 +1761,8 @@
     else /* (base == 2) */
         *--p = 'b';
     *--p = '0';
-    if (sign)
-        *--p = sign;
-    v = PyUnicode_DecodeASCII(p, &buffer[sz] - p, NULL);
-    PyMem_Free(buffer);
+    if (negative)
+        *--p = '-';
     return v;
 }

_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

[issue14339] Optimizing bin, oct and hex

Reply via email to