Author: guido.van.rossum
Date: Fri Nov  2 18:01:32 2007
New Revision: 58779

Modified:
   python/branches/py3k-pep3137/Objects/bytesobject.c
   python/branches/py3k-pep3137/Objects/stringobject.c
Log:
Quickly fixed byts_join() and string_join() to be more like each other,
and to accept mixed bytes/buffer items.
The code should be unified into a single function though.


Modified: python/branches/py3k-pep3137/Objects/bytesobject.c
==============================================================================
--- python/branches/py3k-pep3137/Objects/bytesobject.c  (original)
+++ python/branches/py3k-pep3137/Objects/bytesobject.c  Fri Nov  2 18:01:32 2007
@@ -2800,9 +2800,10 @@
     items = PySequence_Fast_ITEMS(seq);
 
     /* Compute the total size, and check that they are all bytes */
+    /* XXX Shouldn't we use _getbuffer() on these items instead? */
     for (i = 0; i < n; i++) {
         PyObject *obj = items[i];
-        if (!PyBytes_Check(obj)) {
+        if (!PyBytes_Check(obj) && !PyString_Check(obj)) {
             PyErr_Format(PyExc_TypeError,
                          "can only join an iterable of bytes "
                          "(item %ld has type '%.100s')",
@@ -2812,7 +2813,7 @@
         }
         if (i > 0)
             totalsize += mysize;
-        totalsize += PyBytes_GET_SIZE(obj);
+        totalsize += Py_Size(obj);
         if (totalsize < 0) {
             PyErr_NoMemory();
             goto error;
@@ -2826,12 +2827,17 @@
     dest = PyBytes_AS_STRING(result);
     for (i = 0; i < n; i++) {
         PyObject *obj = items[i];
-        Py_ssize_t size = PyBytes_GET_SIZE(obj);
-        if (i > 0) {
+        Py_ssize_t size = Py_Size(obj);
+        char *buf;
+        if (PyBytes_Check(obj))
+           buf = PyBytes_AS_STRING(obj);
+        else
+           buf = PyString_AS_STRING(obj);
+        if (i) {
             memcpy(dest, self->ob_bytes, mysize);
             dest += mysize;
         }
-        memcpy(dest, PyBytes_AS_STRING(obj), size);
+        memcpy(dest, buf, size);
         dest += size;
     }
 

Modified: python/branches/py3k-pep3137/Objects/stringobject.c
==============================================================================
--- python/branches/py3k-pep3137/Objects/stringobject.c (original)
+++ python/branches/py3k-pep3137/Objects/stringobject.c Fri Nov  2 18:01:32 2007
@@ -1,5 +1,8 @@
 /* String object implementation */
 
+/* XXX This is now called 'bytes' as far as the user is concerned.
+   Many docstrings and error messages need to be cleaned up. */
+
 #define PY_SSIZE_T_CLEAN
 
 #include "Python.h"
@@ -1410,7 +1413,7 @@
 sequence.  The separator between elements is S.");
 
 static PyObject *
-string_join(PyStringObject *self, PyObject *orig)
+string_join(PyObject *self, PyObject *orig)
 {
        char *sep = PyString_AS_STRING(self);
        const Py_ssize_t seplen = PyString_GET_SIZE(self);
@@ -1433,7 +1436,7 @@
        }
        if (seqlen == 1) {
                item = PySequence_Fast_GET_ITEM(seq, 0);
-               if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
+               if (PyString_CheckExact(item)) {
                        Py_INCREF(item);
                        Py_DECREF(seq);
                        return item;
@@ -1443,32 +1446,21 @@
        /* There are at least two things to join, or else we have a subclass
         * of the builtin types in the sequence.
         * Do a pre-pass to figure out the total amount of space we'll
-        * need (sz), see whether any argument is absurd, and defer to
-        * the Unicode join if appropriate.
+        * need (sz), and see whether all argument are bytes.
         */
+       /* XXX Shouldn't we use _getbuffer() on these items instead? */
        for (i = 0; i < seqlen; i++) {
                const size_t old_sz = sz;
                item = PySequence_Fast_GET_ITEM(seq, i);
-               if (!PyString_Check(item)){
-                       if (PyUnicode_Check(item)) {
-                               /* Defer to Unicode join.
-                                * CAUTION:  There's no gurantee that the
-                                * original sequence can be iterated over
-                                * again, so we must pass seq here.
-                                */
-                               PyObject *result;
-                               result = PyUnicode_Join((PyObject *)self, seq);
-                               Py_DECREF(seq);
-                               return result;
-                       }
+               if (!PyString_Check(item) && !PyBytes_Check(item)) {
                        PyErr_Format(PyExc_TypeError,
-                                    "sequence item %zd: expected string,"
+                                    "sequence item %zd: expected bytes,"
                                     " %.80s found",
                                     i, Py_Type(item)->tp_name);
                        Py_DECREF(seq);
                        return NULL;
                }
-               sz += PyString_GET_SIZE(item);
+               sz += Py_Size(item);
                if (i != 0)
                        sz += seplen;
                if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
@@ -1487,17 +1479,24 @@
        }
 
        /* Catenate everything. */
+       /* I'm not worried about a PyBytes item growing because there's
+          nowhere in this function where we release the GIL. */
        p = PyString_AS_STRING(res);
        for (i = 0; i < seqlen; ++i) {
                size_t n;
-               item = PySequence_Fast_GET_ITEM(seq, i);
-               n = PyString_GET_SIZE(item);
-               Py_MEMCPY(p, PyString_AS_STRING(item), n);
-               p += n;
-               if (i < seqlen - 1) {
+                char *q;
+               if (i) {
                        Py_MEMCPY(p, sep, seplen);
                        p += seplen;
                }
+               item = PySequence_Fast_GET_ITEM(seq, i);
+               n = Py_Size(item);
+                if (PyString_Check(item))
+                       q = PyString_AS_STRING(item);
+               else
+                       q = PyBytes_AS_STRING(item);
+               Py_MEMCPY(p, q, n);
+               p += n;
        }
 
        Py_DECREF(seq);
@@ -1509,7 +1508,7 @@
 {
        assert(sep != NULL && PyString_Check(sep));
        assert(x != NULL);
-       return string_join((PyStringObject *)sep, x);
+       return string_join(sep, x);
 }
 
 Py_LOCAL_INLINE(void)
_______________________________________________
Python-3000-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/python-3000-checkins

Reply via email to