Author: guido.van.rossum
Date: Sat Oct 27 18:56:32 2007
New Revision: 58692

Modified:
   python/branches/py3k-pep3137/Include/stringobject.h
   python/branches/py3k-pep3137/Lib/test/test_sys.py
   python/branches/py3k-pep3137/Objects/stringobject.c
   python/branches/py3k-pep3137/Python/import.c
   python/branches/py3k-pep3137/Python/marshal.c
   python/branches/py3k-pep3137/Python/sysmodule.c
Log:
Kill PyString interning.

There's one mystery: if I remove ob_sstate from the PyStringObject struct,
some (unicode) string literals are mutilated, e.g. ('\\1', '\1') prints
('\\1', '\t').  This must be an out of bounds write or something that I
can't track down.  (It doesn't help that it doesn't occur in debug mode.
And no, make clean + recompilation doesn't help either.)

So, in the mean time, I just keep the field, renamed to 'ob_placeholder'.


Modified: python/branches/py3k-pep3137/Include/stringobject.h
==============================================================================
--- python/branches/py3k-pep3137/Include/stringobject.h (original)
+++ python/branches/py3k-pep3137/Include/stringobject.h Sat Oct 27 18:56:32 2007
@@ -25,26 +25,18 @@
 */
 
 /* Caching the hash (ob_shash) saves recalculation of a string's hash value.
-   Interning strings (ob_sstate) tries to ensure that only one string
-   object with a given value exists, so equality tests can be one pointer
-   comparison.  This is generally restricted to strings that "look like"
-   Python identifiers, although the sys.intern() function can be used to force
-   interning of any string.
-   Together, these sped the interpreter by up to 20%. */
+   This significantly speeds up dict lookups. */
 
 typedef struct {
     PyObject_VAR_HEAD
     long ob_shash;
-    int ob_sstate;
+    int ob_placeholder;  /* XXX If I remove this things break?!?! */
     char ob_sval[1];
 
     /* Invariants:
      *     ob_sval contains space for 'ob_size+1' elements.
      *     ob_sval[ob_size] == 0.
      *     ob_shash is the hash of the string or -1 if not computed yet.
-     *     ob_sstate != 0 iff the string object is in stringobject.c's
-     *       'interned' dictionary; in this case the two references
-     *       from 'interned' to this object are *not counted* in ob_refcnt.
      */
 } PyStringObject;
 
@@ -74,14 +66,6 @@
                                                   const char *, Py_ssize_t,
                                                   const char *);
 
-PyAPI_FUNC(void) PyString_InternInPlace(PyObject **);
-PyAPI_FUNC(void) PyString_InternImmortal(PyObject **);
-PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *);
-PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void);
-
-/* Use only if you know it's a string */
-#define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate)
-
 /* Macro, trading safety for speed */
 #define PyString_AS_STRING(op) (assert(PyString_Check(op)), \
                                 (((PyStringObject *)(op))->ob_sval))

Modified: python/branches/py3k-pep3137/Lib/test/test_sys.py
==============================================================================
--- python/branches/py3k-pep3137/Lib/test/test_sys.py   (original)
+++ python/branches/py3k-pep3137/Lib/test/test_sys.py   Sat Oct 27 18:56:32 2007
@@ -300,7 +300,7 @@
 
     def test_intern(self):
         self.assertRaises(TypeError, sys.intern)
-        s = str8(b"never interned before")
+        s = "never interned before"
         self.assert_(sys.intern(s) is s)
         s2 = s.swapcase().swapcase()
         self.assert_(sys.intern(s2) is s)
@@ -310,28 +310,11 @@
         # We don't want them in the interned dict and if they aren't
         # actually interned, we don't want to create the appearance
         # that they are by allowing intern() to succeeed.
-        class S(str8):
+        class S(str):
             def __hash__(self):
                 return 123
 
-        self.assertRaises(TypeError, sys.intern, S(b"abc"))
-
-        s = "never interned as unicode before"
-        self.assert_(sys.intern(s) is s)
-        s2 = s.swapcase().swapcase()
-        self.assert_(sys.intern(s2) is s)
-
-        class U(str):
-            def __hash__(self):
-                return 123
-
-        self.assertRaises(TypeError, sys.intern, U("abc"))
-
-        # It's still safe to pass these strings to routines that
-        # call intern internally, e.g. PyObject_SetAttr().
-        s = U("abc")
-        setattr(s, s, s)
-        self.assertEqual(getattr(s, s), s)
+        self.assertRaises(TypeError, sys.intern, S("abc"))
 
 
 def test_main():

Modified: python/branches/py3k-pep3137/Objects/stringobject.c
==============================================================================
--- python/branches/py3k-pep3137/Objects/stringobject.c (original)
+++ python/branches/py3k-pep3137/Objects/stringobject.c Sat Oct 27 18:56:32 2007
@@ -13,16 +13,6 @@
 static PyStringObject *characters[UCHAR_MAX + 1];
 static PyStringObject *nullstring;
 
-/* This dictionary holds all interned strings.  Note that references to
-   strings in this dictionary are *not* counted in the string's ob_refcnt.
-   When the interned string reaches a refcnt of 0 the string deallocation
-   function will delete the reference from this dictionary.
-
-   Another way to look at this is that to say that the actual reference
-   count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
-*/
-static PyObject *interned;
-
 /*
    For both PyString_FromString() and PyString_FromStringAndSize(), the
    parameter `size' denotes number of characters to allocate, not counting any
@@ -77,21 +67,14 @@
                return PyErr_NoMemory();
        PyObject_INIT_VAR(op, &PyString_Type, size);
        op->ob_shash = -1;
-       op->ob_sstate = SSTATE_NOT_INTERNED;
        if (str != NULL)
                Py_MEMCPY(op->ob_sval, str, size);
        op->ob_sval[size] = '\0';
        /* share short strings */
        if (size == 0) {
-               PyObject *t = (PyObject *)op;
-               PyString_InternInPlace(&t);
-               op = (PyStringObject *)t;
                nullstring = op;
                Py_INCREF(op);
        } else if (size == 1 && str != NULL) {
-               PyObject *t = (PyObject *)op;
-               PyString_InternInPlace(&t);
-               op = (PyStringObject *)t;
                characters[*str & UCHAR_MAX] = op;
                Py_INCREF(op);
        }
@@ -132,19 +115,12 @@
                return PyErr_NoMemory();
        PyObject_INIT_VAR(op, &PyString_Type, size);
        op->ob_shash = -1;
-       op->ob_sstate = SSTATE_NOT_INTERNED;
        Py_MEMCPY(op->ob_sval, str, size+1);
        /* share short strings */
        if (size == 0) {
-               PyObject *t = (PyObject *)op;
-               PyString_InternInPlace(&t);
-               op = (PyStringObject *)t;
                nullstring = op;
                Py_INCREF(op);
        } else if (size == 1) {
-               PyObject *t = (PyObject *)op;
-               PyString_InternInPlace(&t);
-               op = (PyStringObject *)t;
                characters[*str & UCHAR_MAX] = op;
                Py_INCREF(op);
        }
@@ -354,24 +330,6 @@
 static void
 string_dealloc(PyObject *op)
 {
-       switch (PyString_CHECK_INTERNED(op)) {
-               case SSTATE_NOT_INTERNED:
-                       break;
-
-               case SSTATE_INTERNED_MORTAL:
-                       /* revive dead object temporarily for DelItem */
-                       Py_Refcnt(op) = 3;
-                       if (PyDict_DelItem(interned, op) != 0)
-                               Py_FatalError(
-                                       "deletion of interned string failed");
-                       break;
-
-               case SSTATE_INTERNED_IMMORTAL:
-                       Py_FatalError("Immortal interned string died.");
-
-               default:
-                       Py_FatalError("Inconsistent interned string state.");
-       }
        Py_Type(op)->tp_free(op);
 }
 
@@ -760,7 +718,6 @@
                return PyErr_NoMemory();
        PyObject_INIT_VAR(op, &PyString_Type, size);
        op->ob_shash = -1;
-       op->ob_sstate = SSTATE_NOT_INTERNED;
        Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
        Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
        op->ob_sval[size] = '\0';
@@ -803,7 +760,6 @@
                return PyErr_NoMemory();
        PyObject_INIT_VAR(op, &PyString_Type, size);
        op->ob_shash = -1;
-       op->ob_sstate = SSTATE_NOT_INTERNED;
        op->ob_sval[size] = '\0';
        if (Py_Size(a) == 1 && n > 0) {
                memset(op->ob_sval, a->ob_sval[0] , n);
@@ -3053,10 +3009,10 @@
        n = PyString_GET_SIZE(tmp);
        pnew = type->tp_alloc(type, n);
        if (pnew != NULL) {
-               Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), 
n+1);
+               Py_MEMCPY(PyString_AS_STRING(pnew),
+                         PyString_AS_STRING(tmp), n+1);
                ((PyStringObject *)pnew)->ob_shash =
                        ((PyStringObject *)tmp)->ob_shash;
-               ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
        }
        Py_DECREF(tmp);
        return pnew;
@@ -3157,8 +3113,7 @@
        register PyObject *v;
        register PyStringObject *sv;
        v = *pv;
-       if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
-           PyString_CHECK_INTERNED(v)) {
+       if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0) {
                *pv = 0;
                Py_DECREF(v);
                PyErr_BadInternalCall();
@@ -3326,65 +3281,6 @@
 }
 
 void
-PyString_InternInPlace(PyObject **p)
-{
-       register PyStringObject *s = (PyStringObject *)(*p);
-       PyObject *t;
-       if (s == NULL || !PyString_Check(s))
-               Py_FatalError("PyString_InternInPlace: strings only please!");
-       /* If it's a string subclass, we don't really know what putting
-          it in the interned dict might do. */
-       if (!PyString_CheckExact(s))
-               return;
-       if (PyString_CHECK_INTERNED(s))
-               return;
-       if (interned == NULL) {
-               interned = PyDict_New();
-               if (interned == NULL) {
-                       PyErr_Clear(); /* Don't leave an exception */
-                       return;
-               }
-       }
-       t = PyDict_GetItem(interned, (PyObject *)s);
-       if (t) {
-               Py_INCREF(t);
-               Py_DECREF(*p);
-               *p = t;
-               return;
-       }
-
-       if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
-               PyErr_Clear();
-               return;
-       }
-       /* The two references in interned are not counted by refcnt.
-          The string deallocator will take care of this */
-       Py_Refcnt(s) -= 2;
-       PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
-}
-
-void
-PyString_InternImmortal(PyObject **p)
-{
-       PyString_InternInPlace(p);
-       if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
-               PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
-               Py_INCREF(*p);
-       }
-}
-
-
-PyObject *
-PyString_InternFromString(const char *cp)
-{
-       PyObject *s = PyString_FromString(cp);
-       if (s == NULL)
-               return NULL;
-       PyString_InternInPlace(&s);
-       return s;
-}
-
-void
 PyString_Fini(void)
 {
        int i;
@@ -3396,58 +3292,6 @@
        nullstring = NULL;
 }
 
-void _Py_ReleaseInternedStrings(void)
-{
-       PyObject *keys;
-       PyStringObject *s;
-       Py_ssize_t i, n;
-       Py_ssize_t immortal_size = 0, mortal_size = 0;
-
-       if (interned == NULL || !PyDict_Check(interned))
-               return;
-       keys = PyDict_Keys(interned);
-       if (keys == NULL || !PyList_Check(keys)) {
-               PyErr_Clear();
-               return;
-       }
-
-       /* Since _Py_ReleaseInternedStrings() is intended to help a leak
-          detector, interned strings are not forcibly deallocated; rather, we
-          give them their stolen references back, and then clear and DECREF
-          the interned dict. */
-
-       n = PyList_GET_SIZE(keys);
-       fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
-               n);
-       for (i = 0; i < n; i++) {
-               s = (PyStringObject *) PyList_GET_ITEM(keys, i);
-               switch (s->ob_sstate) {
-               case SSTATE_NOT_INTERNED:
-                       /* XXX Shouldn't happen */
-                       break;
-               case SSTATE_INTERNED_IMMORTAL:
-                       Py_Refcnt(s) += 1;
-                       immortal_size += Py_Size(s);
-                       break;
-               case SSTATE_INTERNED_MORTAL:
-                       Py_Refcnt(s) += 2;
-                       mortal_size += Py_Size(s);
-                       break;
-               default:
-                       Py_FatalError("Inconsistent interned string state.");
-               }
-               s->ob_sstate = SSTATE_NOT_INTERNED;
-       }
-       fprintf(stderr, "total size of all interned strings: "
-                       "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
-                       "mortal/immortal\n", mortal_size, immortal_size);
-       Py_DECREF(keys);
-       PyDict_Clear(interned);
-       Py_DECREF(interned);
-       interned = NULL;
-}
-
-
 /*********************** Str Iterator ****************************/
 
 typedef struct {

Modified: python/branches/py3k-pep3137/Python/import.c
==============================================================================
--- python/branches/py3k-pep3137/Python/import.c        (original)
+++ python/branches/py3k-pep3137/Python/import.c        Sat Oct 27 18:56:32 2007
@@ -76,9 +76,10 @@
                      3060 (PEP 3115 metaclass syntax)
                      3070 (PEP 3109 raise changes)
                      3080 (PEP 3137 make __file__ and __name__ unicode)
+                     3090 (kill str8 interning)
 .
 */
-#define MAGIC (3080 | ((long)'\r'<<16) | ((long)'\n'<<24))
+#define MAGIC (3090 | ((long)'\r'<<16) | ((long)'\n'<<24))
 
 /* Magic word as global; note that _PyImport_Init() can change the
    value of this global to accommodate for alterations of how the

Modified: python/branches/py3k-pep3137/Python/marshal.c
==============================================================================
--- python/branches/py3k-pep3137/Python/marshal.c       (original)
+++ python/branches/py3k-pep3137/Python/marshal.c       Sat Oct 27 18:56:32 2007
@@ -36,8 +36,6 @@
 #define TYPE_BINARY_COMPLEX    'y'
 #define TYPE_LONG              'l'
 #define TYPE_STRING            's'
-#define TYPE_INTERNED          't'
-#define TYPE_STRINGREF         'R'
 #define TYPE_TUPLE             '('
 #define TYPE_LIST              '['
 #define TYPE_DICT              '{'
@@ -231,31 +229,7 @@
        }
 #endif
        else if (PyString_Check(v)) {
-               if (p->strings && PyString_CHECK_INTERNED(v)) {
-                       PyObject *o = PyDict_GetItem(p->strings, v);
-                       if (o) {
-                               long w = PyInt_AsLong(o);
-                               w_byte(TYPE_STRINGREF, p);
-                               w_long(w, p);
-                               goto exit;
-                       }
-                       else {
-                               int ok;
-                               o = PyInt_FromSsize_t(PyDict_Size(p->strings));
-                               ok = o &&
-                                    PyDict_SetItem(p->strings, v, o) >= 0;
-                               Py_XDECREF(o);
-                               if (!ok) {
-                                       p->depth--;
-                                       p->error = 1;
-                                       return;
-                               }
-                               w_byte(TYPE_INTERNED, p);
-                       }
-               }
-               else {
-                       w_byte(TYPE_STRING, p);
-               }
+               w_byte(TYPE_STRING, p);
                n = PyString_GET_SIZE(v);
                if (n > INT_MAX) {
                        /* huge strings are not supported */
@@ -389,7 +363,6 @@
                w_byte(TYPE_UNKNOWN, p);
                p->error = 1;
        }
-   exit:
        p->depth--;
 }
 
@@ -703,7 +676,6 @@
                }
 #endif
 
-       case TYPE_INTERNED:
        case TYPE_STRING:
                n = r_long(p);
                if (n < 0 || n > INT_MAX) {
@@ -723,25 +695,6 @@
                        retval = NULL;
                        break;
                }
-               if (type == TYPE_INTERNED) {
-                       PyString_InternInPlace(&v);
-                       if (PyList_Append(p->strings, v) < 0) {
-                               retval = NULL;
-                               break;
-                       }
-               }
-               retval = v;
-               break;
-
-       case TYPE_STRINGREF:
-               n = r_long(p);
-               if (n < 0 || n >= PyList_GET_SIZE(p->strings)) {
-                       PyErr_SetString(PyExc_ValueError, "bad marshal data");
-                       retval = NULL;
-                       break;
-               }
-               v = PyList_GET_ITEM(p->strings, n);
-               Py_INCREF(v);
                retval = v;
                break;
 

Modified: python/branches/py3k-pep3137/Python/sysmodule.c
==============================================================================
--- python/branches/py3k-pep3137/Python/sysmodule.c     (original)
+++ python/branches/py3k-pep3137/Python/sysmodule.c     Sat Oct 27 18:56:32 2007
@@ -225,14 +225,9 @@
 sys_intern(PyObject *self, PyObject *args)
 {
        PyObject *s;
-       if (!PyArg_ParseTuple(args, "S:intern", &s))
+       if (!PyArg_ParseTuple(args, "U:intern", &s))
                return NULL;
-       if (PyString_CheckExact(s)) {
-               Py_INCREF(s);
-               PyString_InternInPlace(&s);
-               return s;
-       }
-       else if (PyUnicode_CheckExact(s)) {
+       if (PyUnicode_CheckExact(s)) {
                Py_INCREF(s);
                PyUnicode_InternInPlace(&s);
                return s;
_______________________________________________
Python-3000-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/python-3000-checkins

Reply via email to