Author: christian.heimes
Date: Mon Dec 10 16:50:56 2007
New Revision: 59449

Added:
   python/branches/py3k/Objects/stringlib/eq.h   (contents, props changed)
Modified:
   python/branches/py3k/Include/stringobject.h
   python/branches/py3k/Lib/test/test_set.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Objects/dictobject.c
   python/branches/py3k/Objects/setobject.c
   python/branches/py3k/Objects/stringobject.c
Log:
Fixed issue #1564: The set implementation should special-case PyUnicode instead 
of PyString
I moved the unicode_eq to stringlib/eq.h to keep the function static and 
possible inline for setobject.c and dictobject.h. I also removed the unused 
_PyString_Eq method. If it's required in the future it can be added to eq.h

Modified: python/branches/py3k/Include/stringobject.h
==============================================================================
--- python/branches/py3k/Include/stringobject.h (original)
+++ python/branches/py3k/Include/stringobject.h Mon Dec 10 16:50:56 2007
@@ -58,7 +58,6 @@
 PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *);
 PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *);
 PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t);
-PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*);
 PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *);
 PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int,
                                                  int, char**, int*);

Modified: python/branches/py3k/Lib/test/test_set.py
==============================================================================
--- python/branches/py3k/Lib/test/test_set.py   (original)
+++ python/branches/py3k/Lib/test/test_set.py   Mon Dec 10 16:50:56 2007
@@ -7,6 +7,7 @@
 import os
 from random import randrange, shuffle
 import sys
+import warnings
 
 class PassThru(Exception):
     pass
@@ -817,6 +818,44 @@
         self.length = 3
         self.repr   = None
 
+#------------------------------------------------------------------------------
+
+class TestBasicOpsString(TestBasicOps):
+    def setUp(self):
+        self.case   = "string set"
+        self.values = ["a", "b", "c"]
+        self.set    = set(self.values)
+        self.dup    = set(self.values)
+        self.length = 3
+        self.repr   = "{'a', 'c', 'b'}"
+
+#------------------------------------------------------------------------------
+
+class TestBasicOpsBytes(TestBasicOps):
+    def setUp(self):
+        self.case   = "string set"
+        self.values = [b"a", b"b", b"c"]
+        self.set    = set(self.values)
+        self.dup    = set(self.values)
+        self.length = 3
+        self.repr   = "{b'a', b'c', b'b'}"
+
+#------------------------------------------------------------------------------
+
+class TestBasicOpsMixedStringBytes(TestBasicOps):
+    def setUp(self):
+        self.warning_filters = warnings.filters[:]
+        warnings.simplefilter('ignore', BytesWarning)
+        self.case   = "string and bytes set"
+        self.values = ["a", "b", b"a", b"b"]
+        self.set    = set(self.values)
+        self.dup    = set(self.values)
+        self.length = 4
+        self.repr   = "{'a', b'a', 'b', b'b'}"
+
+    def tearDown(self):
+        warnings.filters = self.warning_filters
+
 #==============================================================================
 
 def baditer():
@@ -1581,6 +1620,9 @@
         TestBasicOpsSingleton,
         TestBasicOpsTuple,
         TestBasicOpsTriple,
+        TestBasicOpsString,
+        TestBasicOpsBytes,
+        TestBasicOpsMixedStringBytes,
         TestBinaryOps,
         TestUpdateOps,
         TestMutate,

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS      (original)
+++ python/branches/py3k/Misc/NEWS      Mon Dec 10 16:50:56 2007
@@ -14,6 +14,9 @@
 
 - Issue #1573: Improper use of the keyword-only syntax makes the parser crash
 
+- Issue #1564: The set implementation should special-case PyUnicode instead
+  of PyString
+
 
 Extension Modules
 -----------------

Modified: python/branches/py3k/Objects/dictobject.c
==============================================================================
--- python/branches/py3k/Objects/dictobject.c   (original)
+++ python/branches/py3k/Objects/dictobject.c   Mon Dec 10 16:50:56 2007
@@ -8,6 +8,7 @@
 */
 
 #include "Python.h"
+#include "stringlib/eq.h"
 
 
 /* Set a key error with the specified argument, wrapping it in a
@@ -327,25 +328,6 @@
        return 0;
 }
 
-/* Return 1 if two unicode objects are equal, 0 if not. */
-static int
-unicode_eq(PyObject *aa, PyObject *bb)
-{
-       PyUnicodeObject *a = (PyUnicodeObject *)aa;
-       PyUnicodeObject *b = (PyUnicodeObject *)bb;
-
-       if (a->length != b->length)
-               return 0;
-       if (a->length == 0)
-               return 1;
-       if (a->str[0] != b->str[0])
-               return 0;
-       if (a->length == 1)
-               return 1;
-       return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
-}
-
-
 /*
  * Hacked up version of lookdict which can assume keys are always
  * unicodes; this assumption allows testing for errors during

Modified: python/branches/py3k/Objects/setobject.c
==============================================================================
--- python/branches/py3k/Objects/setobject.c    (original)
+++ python/branches/py3k/Objects/setobject.c    Mon Dec 10 16:50:56 2007
@@ -9,6 +9,7 @@
 
 #include "Python.h"
 #include "structmember.h"
+#include "stringlib/eq.h"
 
 /* Set a key error with the specified argument, wrapping it in a
  * tuple automatically so that tuple keys are not unpacked as the
@@ -55,6 +56,7 @@
 static PySetObject *free_sets[MAXFREESETS];
 static int num_free_sets = 0;
 
+
 /*
 The basic lookup function used by all operations.
 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
@@ -144,12 +146,12 @@
 }
 
 /*
- * Hacked up version of set_lookkey which can assume keys are always strings;
- * This means we can always use _PyString_Eq directly and not have to check to
+ * Hacked up version of set_lookkey which can assume keys are always unicode;
+ * This means we can always use unicode_eq directly and not have to check to
  * see if the comparison altered the table.
  */
 static setentry *
-set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
+set_lookkey_unicode(PySetObject *so, PyObject *key, register long hash)
 {
        register Py_ssize_t i;
        register size_t perturb;
@@ -158,11 +160,11 @@
        setentry *table = so->table;
        register setentry *entry;
 
-       /* Make sure this function doesn't have to handle non-string keys,
+       /* Make sure this function doesn't have to handle non-unicode keys,
           including subclasses of str; e.g., one reason to subclass
           strings is to override __eq__, and for speed we don't cater to
           that here. */
-       if (!PyString_CheckExact(key)) {
+       if (!PyUnicode_CheckExact(key)) {
                so->lookup = set_lookkey;
                return set_lookkey(so, key, hash);
        }
@@ -173,7 +175,7 @@
        if (entry->key == dummy)
                freeslot = entry;
        else {
-               if (entry->hash == hash && _PyString_Eq(entry->key, key))
+               if (entry->hash == hash && unicode_eq(entry->key, key))
                        return entry;
                freeslot = NULL;
        }
@@ -188,7 +190,7 @@
                if (entry->key == key
                    || (entry->hash == hash
                        && entry->key != dummy
-                       && _PyString_Eq(entry->key, key)))
+                       && unicode_eq(entry->key, key)))
                        return entry;
                if (entry->key == dummy && freeslot == NULL)
                        freeslot = entry;
@@ -375,8 +377,8 @@
        register long hash;
        register Py_ssize_t n_used;
 
-       if (!PyString_CheckExact(key) ||
-           (hash = ((PyStringObject *) key)->ob_shash) == -1) {
+       if (!PyUnicode_CheckExact(key) ||
+           (hash = ((PyUnicodeObject *) key)->hash) == -1) {
                hash = PyObject_Hash(key);
                if (hash == -1)
                        return -1;
@@ -422,8 +424,9 @@
        PyObject *old_key;
 
        assert (PyAnySet_Check(so));
-       if (!PyString_CheckExact(key) ||
-           (hash = ((PyStringObject *) key)->ob_shash) == -1) {
+
+       if (!PyUnicode_CheckExact(key) ||
+           (hash = ((PyUnicodeObject *) key)->hash) == -1) {
                hash = PyObject_Hash(key);
                if (hash == -1)
                        return -1;
@@ -668,8 +671,8 @@
        long hash;
        setentry *entry;
 
-       if (!PyString_CheckExact(key) ||
-           (hash = ((PyStringObject *) key)->ob_shash) == -1) {
+       if (!PyUnicode_CheckExact(key) ||
+           (hash = ((PyUnicodeObject *) key)->hash) == -1) {
                hash = PyObject_Hash(key);
                if (hash == -1)
                        return -1;
@@ -989,7 +992,7 @@
                INIT_NONZERO_SET_SLOTS(so);
        }
 
-       so->lookup = set_lookkey_string;
+       so->lookup = set_lookkey_unicode;
        so->weakreflist = NULL;
 
        if (iterable != NULL) {
@@ -1352,7 +1355,7 @@
        while ((key = PyIter_Next(it)) != NULL) {
                int rv;
                setentry entry;
-               long hash = PyObject_Hash(key);
+               long hash = PyObject_Hash(key);;
 
                if (hash == -1) {
                        Py_DECREF(key);

Added: python/branches/py3k/Objects/stringlib/eq.h
==============================================================================
--- (empty file)
+++ python/branches/py3k/Objects/stringlib/eq.h Mon Dec 10 16:50:56 2007
@@ -0,0 +1,21 @@
+/* Fast unicode equal function optimized for dictobject.c and setobject.c */
+
+/* Return 1 if two unicode objects are equal, 0 if not.
+ * unicode_eq() is called when the hash of two unicode objects is equal.
+ */
+Py_LOCAL_INLINE(int)
+unicode_eq(PyObject *aa, PyObject *bb)
+{
+       register PyUnicodeObject *a = (PyUnicodeObject *)aa;
+       register PyUnicodeObject *b = (PyUnicodeObject *)bb;
+
+       if (a->length != b->length)
+               return 0;
+       if (a->length == 0)
+               return 1;
+       if (a->str[0] != b->str[0])
+               return 0;
+       if (a->length == 1)
+               return 1;
+       return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
+}

Modified: python/branches/py3k/Objects/stringobject.c
==============================================================================
--- python/branches/py3k/Objects/stringobject.c (original)
+++ python/branches/py3k/Objects/stringobject.c Mon Dec 10 16:50:56 2007
@@ -877,16 +877,6 @@
        return result;
 }
 
-int
-_PyString_Eq(PyObject *o1, PyObject *o2)
-{
-       PyStringObject *a = (PyStringObject*) o1;
-       PyStringObject *b = (PyStringObject*) o2;
-       return Py_Size(a) == Py_Size(b)
-               && *a->ob_sval == *b->ob_sval
-               && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
-}
-
 static long
 string_hash(PyStringObject *a)
 {
_______________________________________________
Python-3000-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/python-3000-checkins

Reply via email to