[issue10156] Initialization of globals in unicodeobject.c

Serhiy Storchaka Mon, 07 Jan 2013 03:28:51 -0800

Serhiy Storchaka added the comment:

Here are patches for all four Python versions. They fixes possible usage of the 
followed non-initialized global variables: free_list, numfree, interned, 
unicode_empty, static_strings, unicode_latin1, bloom_linebreak, 
unicode_default_encoding.


----------
Added file: http://bugs.python.org/file28607/unicode_globals-2.7.patch
Added file: http://bugs.python.org/file28608/unicode_globals-3.2.patch
Added file: http://bugs.python.org/file28609/unicode_globals-3.3.patch
Added file: http://bugs.python.org/file28610/unicode_globals-3.4.patch

_______________________________________
Python tracker <[email protected]>
<http://bugs.python.org/issue10156>
_______________________________________

diff -r 0f24c65fb7e5 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c   Sat Jan 05 07:37:47 2013 +0200
+++ b/Objects/unicodeobject.c   Mon Jan 07 13:26:16 2013 +0200
@@ -93,15 +93,26 @@
 #endif
 
 /* Free list for Unicode objects */
-static PyUnicodeObject *free_list;
-static int numfree;
+static PyUnicodeObject *free_list = NULL;
+static int numfree = 0;
 
 /* The empty Unicode object is shared to improve performance. */
-static PyUnicodeObject *unicode_empty;
+static PyUnicodeObject *unicode_empty = NULL;
+
+#define _Py_RETURN_UNICODE_EMPTY()  do {                \
+            if (unicode_empty != NULL)                  \
+                Py_INCREF(unicode_empty);               \
+            else {                                      \
+                unicode_empty = _PyUnicode_New(0);      \
+                if (unicode_empty != NULL)              \
+                    Py_INCREF(unicode_empty);           \
+            }                                           \
+            return (PyObject *)unicode_empty;           \
+        } while (0)
 
 /* Single character Unicode strings in the Latin-1 range are being
    shared as well. */
-static PyUnicodeObject *unicode_latin1[256];
+static PyUnicodeObject *unicode_latin1[256] = {NULL};
 
 /* Default encoding to use and assume when NULL is passed as encoding
    parameter; it is initialized by _PyUnicode_Init().
@@ -110,7 +121,7 @@
    PyUnicode_GetDefaultEncoding() APIs to access this global.
 
 */
-static char unicode_default_encoding[100];
+static char unicode_default_encoding[100 + 1] = "ascii";
 
 /* Fast detection of the most frequent whitespace characters */
 const unsigned char _Py_ascii_whitespace[] = {
@@ -204,7 +215,7 @@
 
 #define BLOOM_MASK unsigned long
 
-static BLOOM_MASK bloom_linebreak;
+static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
 
 #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
 #define BLOOM(mask, ch)     ((mask &  (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
@@ -448,10 +459,8 @@
     if (u != NULL) {
 
         /* Optimization for empty strings */
-        if (size == 0 && unicode_empty != NULL) {
-            Py_INCREF(unicode_empty);
-            return (PyObject *)unicode_empty;
-        }
+        if (size == 0)
+            _Py_RETURN_UNICODE_EMPTY();
 
         /* Single character Unicode objects in the Latin-1 range are
            shared when using this constructor */
@@ -497,10 +506,8 @@
     if (u != NULL) {
 
         /* Optimization for empty strings */
-        if (size == 0 && unicode_empty != NULL) {
-            Py_INCREF(unicode_empty);
-            return (PyObject *)unicode_empty;
-        }
+        if (size == 0)
+            _Py_RETURN_UNICODE_EMPTY();
 
         /* Single characters are shared when using this constructor.
            Restrict to ASCII, since the input must be UTF-8. */
@@ -1162,13 +1169,10 @@
     }
 
     /* Convert to Unicode */
-    if (len == 0) {
-        Py_INCREF(unicode_empty);
-        v = (PyObject *)unicode_empty;
-    }
-    else
-        v = PyUnicode_Decode(s, len, encoding, errors);
-
+    if (len == 0)
+        _Py_RETURN_UNICODE_EMPTY();
+
+    v = PyUnicode_Decode(s, len, encoding, errors);
     return v;
 
   onError:
@@ -1381,7 +1385,7 @@
     Py_DECREF(v);
     strncpy(unicode_default_encoding,
             encoding,
-            sizeof(unicode_default_encoding));
+            sizeof(unicode_default_encoding) - 1);
     return 0;
 
   onError:
@@ -8838,8 +8842,6 @@
 
 void _PyUnicode_Init(void)
 {
-    int i;
-
     /* XXX - move this array to unicodectype.c ? */
     Py_UNICODE linebreak[] = {
         0x000A, /* LINE FEED */
@@ -8853,15 +8855,10 @@
     };
 
     /* Init the implementation */
-    free_list = NULL;
-    numfree = 0;
     unicode_empty = _PyUnicode_New(0);
     if (!unicode_empty)
         return;
 
-    strcpy(unicode_default_encoding, "ascii");
-    for (i = 0; i < 256; i++)
-        unicode_latin1[i] = NULL;
     if (PyType_Ready(&PyUnicode_Type) < 0)
         Py_FatalError("Can't initialize 'unicode'");
 
@@ -8906,15 +8903,11 @@
 {
     int i;
 
-    Py_XDECREF(unicode_empty);
-    unicode_empty = NULL;
-
-    for (i = 0; i < 256; i++) {
-        if (unicode_latin1[i]) {
-            Py_DECREF(unicode_latin1[i]);
-            unicode_latin1[i] = NULL;
-        }
-    }
+    Py_CLEAR(unicode_empty);
+
+    for (i = 0; i < 256; i++)
+        Py_CLEAR(unicode_latin1[i]);
+
     (void)PyUnicode_ClearFreeList();
 }

diff -r d2867c430333 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c   Sat Jan 05 07:37:47 2013 +0200
+++ b/Objects/unicodeobject.c   Mon Jan 07 12:57:32 2013 +0200
@@ -98,18 +98,29 @@
    Another way to look at this is that to say that the actual reference
    count of a string is:  s->ob_refcnt + (s->state ? 2 : 0)
 */
-static PyObject *interned;
+static PyObject *interned = NULL;
 
 /* Free list for Unicode objects */
-static PyUnicodeObject *free_list;
-static int numfree;
+static PyUnicodeObject *free_list = NULL;
+static int numfree = 0;
 
 /* The empty Unicode object is shared to improve performance. */
-static PyUnicodeObject *unicode_empty;
+static PyUnicodeObject *unicode_empty = NULL;
+
+#define _Py_RETURN_UNICODE_EMPTY()  do {                \
+            if (unicode_empty != NULL)                  \
+                Py_INCREF(unicode_empty);               \
+            else {                                      \
+                unicode_empty = _PyUnicode_New(0);      \
+                if (unicode_empty != NULL)              \
+                    Py_INCREF(unicode_empty);           \
+            }                                           \
+            return (PyObject *)unicode_empty;           \
+        } while (0)
 
 /* Single character Unicode strings in the Latin-1 range are being
    shared as well. */
-static PyUnicodeObject *unicode_latin1[256];
+static PyUnicodeObject *unicode_latin1[256] = {NULL};
 
 /* Fast detection of the most frequent whitespace characters */
 const unsigned char _Py_ascii_whitespace[] = {
@@ -214,7 +225,7 @@
 
 #define BLOOM_MASK unsigned long
 
-static BLOOM_MASK bloom_linebreak;
+static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
 
 #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
 #define BLOOM(mask, ch)     ((mask &  (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
@@ -479,10 +490,8 @@
     if (u != NULL) {
 
         /* Optimization for empty strings */
-        if (size == 0 && unicode_empty != NULL) {
-            Py_INCREF(unicode_empty);
-            return (PyObject *)unicode_empty;
-        }
+        if (size == 0)
+            _Py_RETURN_UNICODE_EMPTY();
 
         /* Single character Unicode objects in the Latin-1 range are
            shared when using this constructor */
@@ -528,10 +537,8 @@
     if (u != NULL) {
 
         /* Optimization for empty strings */
-        if (size == 0 && unicode_empty != NULL) {
-            Py_INCREF(unicode_empty);
-            return (PyObject *)unicode_empty;
-        }
+        if (size == 0)
+            _Py_RETURN_UNICODE_EMPTY();
 
         /* Single characters are shared when using this constructor.
            Restrict to ASCII, since the input must be UTF-8. */
@@ -1393,15 +1400,11 @@
 
     /* Decoding bytes objects is the most common case and should be fast */
     if (PyBytes_Check(obj)) {
-        if (PyBytes_GET_SIZE(obj) == 0) {
-            Py_INCREF(unicode_empty);
-            v = (PyObject *) unicode_empty;
-        }
-        else {
-            v = PyUnicode_Decode(
-                    PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
-                    encoding, errors);
-        }
+        if (PyBytes_GET_SIZE(obj) == 0)
+            _Py_RETURN_UNICODE_EMPTY();
+        v = PyUnicode_Decode(
+                PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
+                encoding, errors);
         return v;
     }
 
@@ -1421,12 +1424,11 @@
     }
 
     if (buffer.len == 0) {
-        Py_INCREF(unicode_empty);
-        v = (PyObject *) unicode_empty;
-    }
-    else
-        v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
-
+        PyBuffer_Release(&buffer);
+        _Py_RETURN_UNICODE_EMPTY();
+    }
+
+    v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
     PyBuffer_Release(&buffer);
     return v;
 }
@@ -8315,10 +8317,8 @@
     Py_ssize_t nchars;
     size_t nbytes;
 
-    if (len < 1) {
-        Py_INCREF(unicode_empty);
-        return (PyObject *)unicode_empty;
-    }
+    if (len < 1)
+        _Py_RETURN_UNICODE_EMPTY();
 
     if (len == 1 && PyUnicode_CheckExact(str)) {
         /* no repeat, return original string */
@@ -10048,8 +10048,6 @@
 
 void _PyUnicode_Init(void)
 {
-    int i;
-
     /* XXX - move this array to unicodectype.c ? */
     Py_UNICODE linebreak[] = {
         0x000A, /* LINE FEED */
@@ -10063,14 +10061,10 @@
     };
 
     /* Init the implementation */
-    free_list = NULL;
-    numfree = 0;
     unicode_empty = _PyUnicode_New(0);
     if (!unicode_empty)
         return;
 
-    for (i = 0; i < 256; i++)
-        unicode_latin1[i] = NULL;
     if (PyType_Ready(&PyUnicode_Type) < 0)
         Py_FatalError("Can't initialize 'unicode'");
 
@@ -10115,15 +10109,11 @@
 {
     int i;
 
-    Py_XDECREF(unicode_empty);
-    unicode_empty = NULL;
-
-    for (i = 0; i < 256; i++) {
-        if (unicode_latin1[i]) {
-            Py_DECREF(unicode_latin1[i]);
-            unicode_latin1[i] = NULL;
-        }
-    }
+    Py_CLEAR(unicode_empty);
+
+    for (i = 0; i < 256; i++)
+        Py_CLEAR(unicode_latin1[i]);
+
     (void)PyUnicode_ClearFreeList();
 }
 
@@ -10242,8 +10232,7 @@
             "mortal/immortal\n", mortal_size, immortal_size);
     Py_DECREF(keys);
     PyDict_Clear(interned);
-    Py_DECREF(interned);
-    interned = NULL;
+    Py_CLEAR(interned);
 }

diff -r 13c83199c211 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c   Sun Jan 06 16:41:56 2013 +0100
+++ b/Objects/unicodeobject.c   Mon Jan 07 12:57:36 2013 +0200
@@ -179,17 +179,32 @@
    Another way to look at this is that to say that the actual reference
    count of a string is:  s->ob_refcnt + (s->state ? 2 : 0)
 */
-static PyObject *interned;
+static PyObject *interned = NULL;
 
 /* The empty Unicode object is shared to improve performance. */
-static PyObject *unicode_empty;
+static PyObject *unicode_empty = NULL;
+
+#define _Py_INCREF_UNICODE_EMPTY() do {                 \
+            if (unicode_empty != NULL)                  \
+                Py_INCREF(unicode_empty);               \
+            else {                                      \
+                unicode_empty = PyUnicode_New(0, 0);    \
+                if (unicode_empty != NULL)              \
+                    Py_INCREF(unicode_empty);           \
+            }                                           \
+        } while (0)
+
+#define _Py_RETURN_UNICODE_EMPTY()  do {                \
+            _Py_INCREF_UNICODE_EMPTY();                 \
+            return unicode_empty;                       \
+        } while (0)
 
 /* List of static strings. */
-static _Py_Identifier *static_strings;
+static _Py_Identifier *static_strings = NULL;
 
 /* Single character Unicode strings in the Latin-1 range are being
    shared as well. */
-static PyObject *unicode_latin1[256];
+static PyObject *unicode_latin1[256] = {NULL};
 
 /* Fast detection of the most frequent whitespace characters */
 const unsigned char _Py_ascii_whitespace[] = {
@@ -416,9 +431,8 @@
 
     len = _PyUnicode_WSTR_LENGTH(unicode);
     if (len == 0) {
-        Py_INCREF(unicode_empty);
         Py_DECREF(unicode);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
     if (len == 1) {
@@ -450,8 +464,8 @@
     length = PyUnicode_GET_LENGTH(unicode);
     if (length == 0) {
         if (unicode != unicode_empty) {
-            Py_INCREF(unicode_empty);
             Py_DECREF(unicode);
+            _Py_RETURN_UNICODE_EMPTY();
         }
         return unicode_empty;
     }
@@ -528,7 +542,7 @@
 
 #define BLOOM_MASK unsigned long
 
-static BLOOM_MASK bloom_linebreak;
+static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
 
 #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
 #define BLOOM(mask, ch)     ((mask &  (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
@@ -1582,9 +1596,9 @@
         return 0;
 
     if (length == 0) {
+        _Py_INCREF_UNICODE_EMPTY();
         Py_DECREF(*p_unicode);
         *p_unicode = unicode_empty;
-        Py_INCREF(*p_unicode);
         return 0;
     }
 
@@ -1731,10 +1745,8 @@
        some optimizations which share commonly used objects. */
 
     /* Optimization for empty strings */
-    if (size == 0 && unicode_empty != NULL) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* Single character Unicode objects in the Latin-1 range are
        shared when using this constructor */
@@ -1890,10 +1902,8 @@
     PyObject *res;
     unsigned char max_char;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
     if (size == 1)
         return get_latin1_char(u[0]);
@@ -1913,10 +1923,8 @@
     PyObject *res;
     Py_UCS2 max_char;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
     if (size == 1) {
         Py_UCS4 ch = u[0];
@@ -1951,10 +1959,8 @@
     PyObject *res;
     Py_UCS4 max_char;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
     if (size == 1) {
         Py_UCS4 ch = u[0];
@@ -2246,10 +2252,8 @@
 PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size)
 {
     if (w == NULL) {
-        if (size == 0) {
-            Py_INCREF(unicode_empty);
-            return unicode_empty;
-        }
+        if (size == 0)
+            _Py_RETURN_UNICODE_EMPTY();
         PyErr_BadInternalCall();
         return NULL;
     }
@@ -3004,15 +3008,11 @@
 
     /* Decoding bytes objects is the most common case and should be fast */
     if (PyBytes_Check(obj)) {
-        if (PyBytes_GET_SIZE(obj) == 0) {
-            Py_INCREF(unicode_empty);
-            v = unicode_empty;
-        }
-        else {
-            v = PyUnicode_Decode(
-                    PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
-                    encoding, errors);
-        }
+        if (PyBytes_GET_SIZE(obj) == 0)
+            _Py_RETURN_UNICODE_EMPTY();
+        v = PyUnicode_Decode(
+                PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
+                encoding, errors);
         return v;
     }
 
@@ -3032,12 +3032,11 @@
     }
 
     if (buffer.len == 0) {
-        Py_INCREF(unicode_empty);
-        v = unicode_empty;
-    }
-    else
-        v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
-
+        PyBuffer_Release(&buffer);
+        _Py_RETURN_UNICODE_EMPTY();
+    }
+
+    v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
     PyBuffer_Release(&buffer);
     return v;
 }
@@ -4717,8 +4716,7 @@
     if (size == 0) {
         if (consumed)
             *consumed = 0;
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
     /* ASCII is equivalent to the first 128 ordinals in Unicode. */
@@ -5229,8 +5227,7 @@
     if (q == e) {
         if (consumed)
             *consumed = size;
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
@@ -6551,10 +6548,8 @@
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* ASCII is equivalent to the first 128 ordinals in Unicode. */
     if (size == 1 && (unsigned char)s[0] < 128)
@@ -6933,8 +6928,7 @@
         if (chunk_size == 0 && done) {
             if (v != NULL)
                 break;
-            Py_INCREF(unicode_empty);
-            return unicode_empty;
+            _Py_RETURN_UNICODE_EMPTY();
         }
 
 
@@ -9493,9 +9487,7 @@
     /* If empty sequence, return u"". */
     if (seqlen == 0) {
         Py_DECREF(fseq);
-        Py_INCREF(unicode_empty);
-        res = unicode_empty;
-        return res;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
     /* If singleton sequence with an exact Unicode, return that. */
@@ -10195,7 +10187,7 @@
         }
         new_size = slen + n * (len2 - len1);
         if (new_size == 0) {
-            Py_INCREF(unicode_empty);
+            _Py_INCREF_UNICODE_EMPTY();
             u = unicode_empty;
             goto done;
         }
@@ -11662,10 +11654,8 @@
         PyErr_SetString(PyExc_IndexError, "string index out of range");
         return NULL;
     }
-    if (start >= length || end < start) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (start >= length || end < start)
+        _Py_RETURN_UNICODE_EMPTY();
 
     length = end - start;
     if (PyUnicode_IS_ASCII(self)) {
@@ -11792,10 +11782,8 @@
     PyObject *u;
     Py_ssize_t nchars, n;
 
-    if (len < 1) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (len < 1)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* no repeat, return original string */
     if (len == 1)
@@ -12914,8 +12902,7 @@
 {
     if (writer->pos == 0) {
         Py_XDECREF(writer->buffer);
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
     if (writer->readonly) {
         assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos);
@@ -13133,8 +13120,7 @@
         }
 
         if (slicelength <= 0) {
-            Py_INCREF(unicode_empty);
-            return unicode_empty;
+            _Py_RETURN_UNICODE_EMPTY();
         } else if (start == 0 && step == 1 &&
                    slicelength == PyUnicode_GET_LENGTH(self)) {
             return unicode_result_unchanged(self);
@@ -13964,10 +13950,8 @@
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str",
                                      kwlist, &x, &encoding, &errors))
         return NULL;
-    if (x == NULL) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (x == NULL)
+        _Py_RETURN_UNICODE_EMPTY();
     if (encoding == NULL && errors == NULL)
         return PyObject_Str(x);
     else
@@ -14151,9 +14135,11 @@
     };
 
     /* Init the implementation */
-    unicode_empty = PyUnicode_New(0, 0);
-    if (!unicode_empty)
-        Py_FatalError("Can't create empty string");
+    if (unicode_empty == NULL) {
+        unicode_empty = PyUnicode_New(0, 0);
+        if (!unicode_empty)
+            Py_FatalError("Can't create empty string");
+    }
     assert(_PyUnicode_CheckConsistency(unicode_empty, 1));
 
     for (i = 0; i < 256; i++)
@@ -14197,15 +14183,10 @@
 {
     int i;
 
-    Py_XDECREF(unicode_empty);
-    unicode_empty = NULL;
-
-    for (i = 0; i < 256; i++) {
-        if (unicode_latin1[i]) {
-            Py_DECREF(unicode_latin1[i]);
-            unicode_latin1[i] = NULL;
-        }
-    }
+    Py_CLEAR(unicode_empty);
+
+    for (i = 0; i < 256; i++)
+        Py_CLEAR(unicode_latin1[i]);
     _PyUnicode_ClearStaticStrings();
     (void)PyUnicode_ClearFreeList();
 }
@@ -14334,8 +14315,7 @@
             "mortal/immortal\n", mortal_size, immortal_size);
     Py_DECREF(keys);
     PyDict_Clear(interned);
-    Py_DECREF(interned);
-    interned = NULL;
+    Py_CLEAR(interned);
 }

diff -r b6284d2aaada Objects/unicodeobject.c
--- a/Objects/unicodeobject.c   Sun Jan 06 16:14:22 2013 -0500
+++ b/Objects/unicodeobject.c   Mon Jan 07 13:06:23 2013 +0200
@@ -171,17 +171,32 @@
    Another way to look at this is that to say that the actual reference
    count of a string is:  s->ob_refcnt + (s->state ? 2 : 0)
 */
-static PyObject *interned;
+static PyObject *interned = NULL;
 
 /* The empty Unicode object is shared to improve performance. */
-static PyObject *unicode_empty;
+static PyObject *unicode_empty = NULL;
+
+#define _Py_INCREF_UNICODE_EMPTY() do {                 \
+            if (unicode_empty != NULL)                  \
+                Py_INCREF(unicode_empty);               \
+            else {                                      \
+                unicode_empty = PyUnicode_New(0, 0);    \
+                if (unicode_empty != NULL)              \
+                    Py_INCREF(unicode_empty);           \
+            }                                           \
+        } while (0)
+
+#define _Py_RETURN_UNICODE_EMPTY()  do {                \
+            _Py_INCREF_UNICODE_EMPTY();                 \
+            return unicode_empty;                       \
+        } while (0)
 
 /* List of static strings. */
-static _Py_Identifier *static_strings;
+static _Py_Identifier *static_strings = NULL;
 
 /* Single character Unicode strings in the Latin-1 range are being
    shared as well. */
-static PyObject *unicode_latin1[256];
+static PyObject *unicode_latin1[256] = {NULL};
 
 /* Fast detection of the most frequent whitespace characters */
 const unsigned char _Py_ascii_whitespace[] = {
@@ -406,9 +421,8 @@
 
     len = _PyUnicode_WSTR_LENGTH(unicode);
     if (len == 0) {
-        Py_INCREF(unicode_empty);
         Py_DECREF(unicode);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
     if (len == 1) {
@@ -442,8 +456,8 @@
     length = PyUnicode_GET_LENGTH(unicode);
     if (length == 0) {
         if (unicode != unicode_empty) {
-            Py_INCREF(unicode_empty);
             Py_DECREF(unicode);
+            _Py_RETURN_UNICODE_EMPTY();
         }
         return unicode_empty;
     }
@@ -520,7 +534,7 @@
 
 #define BLOOM_MASK unsigned long
 
-static BLOOM_MASK bloom_linebreak;
+static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
 
 #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
 #define BLOOM(mask, ch)     ((mask &  (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
@@ -1602,9 +1616,9 @@
         return 0;
 
     if (length == 0) {
+        _Py_INCREF_UNICODE_EMPTY();
         Py_DECREF(*p_unicode);
         *p_unicode = unicode_empty;
-        Py_INCREF(*p_unicode);
         return 0;
     }
 
@@ -1727,10 +1741,8 @@
        some optimizations which share commonly used objects. */
 
     /* Optimization for empty strings */
-    if (size == 0 && unicode_empty != NULL) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* Single character Unicode objects in the Latin-1 range are
        shared when using this constructor */
@@ -1886,10 +1898,8 @@
     PyObject *res;
     unsigned char max_char;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
     if (size == 1)
         return get_latin1_char(u[0]);
@@ -1909,10 +1919,8 @@
     PyObject *res;
     Py_UCS2 max_char;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
     if (size == 1) {
         Py_UCS4 ch = u[0];
@@ -1947,10 +1955,8 @@
     PyObject *res;
     Py_UCS4 max_char;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
     if (size == 1) {
         Py_UCS4 ch = u[0];
@@ -2242,10 +2248,8 @@
 PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size)
 {
     if (w == NULL) {
-        if (size == 0) {
-            Py_INCREF(unicode_empty);
-            return unicode_empty;
-        }
+        if (size == 0)
+            _Py_RETURN_UNICODE_EMPTY();
         PyErr_BadInternalCall();
         return NULL;
     }
@@ -2822,15 +2826,11 @@
 
     /* Decoding bytes objects is the most common case and should be fast */
     if (PyBytes_Check(obj)) {
-        if (PyBytes_GET_SIZE(obj) == 0) {
-            Py_INCREF(unicode_empty);
-            v = unicode_empty;
-        }
-        else {
-            v = PyUnicode_Decode(
-                    PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
-                    encoding, errors);
-        }
+        if (PyBytes_GET_SIZE(obj) == 0)
+            _Py_RETURN_UNICODE_EMPTY();
+        v = PyUnicode_Decode(
+                PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
+                encoding, errors);
         return v;
     }
 
@@ -2850,12 +2850,11 @@
     }
 
     if (buffer.len == 0) {
-        Py_INCREF(unicode_empty);
-        v = unicode_empty;
-    }
-    else
-        v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
-
+        PyBuffer_Release(&buffer);
+        _Py_RETURN_UNICODE_EMPTY();
+    }
+
+    v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
     PyBuffer_Release(&buffer);
     return v;
 }
@@ -4198,8 +4197,7 @@
     if (size == 0) {
         if (consumed)
             *consumed = 0;
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
     /* Start off assuming it's all ASCII. Widen later as necessary. */
@@ -4606,8 +4604,7 @@
     if (size == 0) {
         if (consumed)
             *consumed = 0;
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
     /* ASCII is equivalent to the first 128 ordinals in Unicode. */
@@ -4865,8 +4862,7 @@
     if (q == e) {
         if (consumed)
             *consumed = size;
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
 #ifdef WORDS_BIGENDIAN
@@ -5105,8 +5101,7 @@
     if (q == e) {
         if (consumed)
             *consumed = size;
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
 #if PY_LITTLE_ENDIAN
@@ -5380,10 +5375,8 @@
     Py_ssize_t len;
 
     len = length_of_escaped_ascii_string(s, size);
-    if (len == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (len == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* After length_of_escaped_ascii_string() there are two alternatives,
        either the string is pure ASCII with named escapes like \n, etc.
@@ -5774,10 +5767,8 @@
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* Escaped strings will always be longer than the resulting
        Unicode string, so we start with size here and then reduce the
@@ -5981,10 +5972,8 @@
                      1))
         return NULL;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* XXX overflow detection missing */
     _PyUnicodeWriter_Init(&writer, 0);
@@ -6432,10 +6421,8 @@
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* ASCII is equivalent to the first 128 ordinals in Unicode. */
     if (size == 1 && (unsigned char)s[0] < 128)
@@ -6813,8 +6800,7 @@
         if (chunk_size == 0 && done) {
             if (v != NULL)
                 break;
-            Py_INCREF(unicode_empty);
-            return unicode_empty;
+            _Py_RETURN_UNICODE_EMPTY();
         }
 
 
@@ -7291,10 +7277,8 @@
     if (mapping == NULL)
         return PyUnicode_DecodeLatin1(s, size, errors);
 
-    if (size == 0) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
     _PyUnicodeWriter_Init(&writer, 0);
     if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1)
         goto onError;
@@ -9333,9 +9317,7 @@
     /* If empty sequence, return u"". */
     if (seqlen == 0) {
         Py_DECREF(fseq);
-        Py_INCREF(unicode_empty);
-        res = unicode_empty;
-        return res;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
     /* If singleton sequence with an exact Unicode, return that. */
@@ -10035,7 +10017,7 @@
         }
         new_size = slen + n * (len2 - len1);
         if (new_size == 0) {
-            Py_INCREF(unicode_empty);
+            _Py_INCREF_UNICODE_EMPTY();
             u = unicode_empty;
             goto done;
         }
@@ -11538,10 +11520,8 @@
         PyErr_SetString(PyExc_IndexError, "string index out of range");
         return NULL;
     }
-    if (start >= length || end < start) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (start >= length || end < start)
+        _Py_RETURN_UNICODE_EMPTY();
 
     length = end - start;
     if (PyUnicode_IS_ASCII(self)) {
@@ -11668,10 +11648,8 @@
     PyObject *u;
     Py_ssize_t nchars, n;
 
-    if (len < 1) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (len < 1)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* no repeat, return original string */
     if (len == 1)
@@ -12811,8 +12789,7 @@
 {
     if (writer->pos == 0) {
         Py_XDECREF(writer->buffer);
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
+        _Py_RETURN_UNICODE_EMPTY();
     }
     if (writer->readonly) {
         assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos);
@@ -13030,8 +13007,7 @@
         }
 
         if (slicelength <= 0) {
-            Py_INCREF(unicode_empty);
-            return unicode_empty;
+            _Py_RETURN_UNICODE_EMPTY();
         } else if (start == 0 && step == 1 &&
                    slicelength == PyUnicode_GET_LENGTH(self)) {
             return unicode_result_unchanged(self);
@@ -14035,10 +14011,8 @@
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str",
                                      kwlist, &x, &encoding, &errors))
         return NULL;
-    if (x == NULL) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
+    if (x == NULL)
+        _Py_RETURN_UNICODE_EMPTY();
     if (encoding == NULL && errors == NULL)
         return PyObject_Str(x);
     else
@@ -14222,9 +14196,11 @@
     };
 
     /* Init the implementation */
-    unicode_empty = PyUnicode_New(0, 0);
-    if (!unicode_empty)
-        Py_FatalError("Can't create empty string");
+    if (unicode_empty == NULL) {
+        unicode_empty = PyUnicode_New(0, 0);
+        if (!unicode_empty)
+            Py_FatalError("Can't create empty string");
+    }
     assert(_PyUnicode_CheckConsistency(unicode_empty, 1));
 
     for (i = 0; i < 256; i++)
@@ -14268,15 +14244,10 @@
 {
     int i;
 
-    Py_XDECREF(unicode_empty);
-    unicode_empty = NULL;
-
-    for (i = 0; i < 256; i++) {
-        if (unicode_latin1[i]) {
-            Py_DECREF(unicode_latin1[i]);
-            unicode_latin1[i] = NULL;
-        }
-    }
+    Py_CLEAR(unicode_empty);
+
+    for (i = 0; i < 256; i++)
+        Py_CLEAR(unicode_latin1[i]);
     _PyUnicode_ClearStaticStrings();
     (void)PyUnicode_ClearFreeList();
 }
@@ -14405,8 +14376,7 @@
             "mortal/immortal\n", mortal_size, immortal_size);
     Py_DECREF(keys);
     PyDict_Clear(interned);
-    Py_DECREF(interned);
-    interned = NULL;
+    Py_CLEAR(interned);
 }

_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

[issue10156] Initialization of globals in unicodeobject.c

Reply via email to