[issue1261] PEP 3137: make bytesobject.c methods use PEP 3118 buffer API

Gregory P. Smith Thu, 11 Oct 2007 01:45:51 -0700

New submission from Gregory P. Smith:

This makes all existing bytesobject.c methods use the buffer API rather
than explicitly requiring bytes objects as input.  It also fixes input
to append() and remove() that was not strict enough and improves a few
unit tests in that area.


NOTE: this patch likely depends on http://bugs.python.org/issue1260
removing the buffer API from the unicode type in order for all unit
tests to pass (i only tested it with that applied since thats where
we're headed).

----------
files: bytes-methods-use-buffer-api-01.diff.txt
keywords: patch, py3k
messages: 56341
nosy: gregory.p.smith
severity: normal
status: open
title: PEP 3137: make bytesobject.c methods use PEP 3118 buffer API
versions: Python 3.0

__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1261>
__________________________________

Index: Objects/bytesobject.c
===================================================================
--- Objects/bytesobject.c       (revision 58412)
+++ Objects/bytesobject.c       (working copy)
@@ -37,15 +37,20 @@
 static int
 _getbytevalue(PyObject* arg, int *value)
 {
-    PyObject *intarg = PyNumber_Int(arg);
-    if (! intarg)
+    long face_value;
+
+    if (PyInt_Check(arg)) {
+        face_value = PyInt_AsLong(arg);
+        if (face_value < 0 || face_value >= 256) {
+            PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
+            return 0;
+        }
+    } else {
+        PyErr_Format(PyExc_TypeError, "an integer is required");
         return 0;
-    *value = PyInt_AsLong(intarg);
-    Py_DECREF(intarg);
-    if (*value < 0 || *value >= 256) {
-        PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
-        return 0;
     }
+
+    *value = face_value;
     return 1;
 }
 
@@ -80,9 +85,7 @@
 {
     PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
 
-    if (buffer == NULL ||
-        PyUnicode_Check(obj) ||
-        buffer->bf_getbuffer == NULL)
+    if (buffer == NULL || buffer->bf_getbuffer == NULL)
     {
         PyErr_Format(PyExc_TypeError,
                      "Type %.100s doesn't support the buffer API",
@@ -1088,6 +1102,23 @@
     return res;
 }
 
+/* TODO(gps):
+ * These methods need implementing (porting over from stringobject.c):
+ *
+ * .capitalize(), .center(), 
+ * .expandtabs(), .isalnum(), .isalpha(), .isdigit(),
+ * .islower(), .isspace(), .istitle(), .isupper(), 
+ * .rjust(), 
+ * .splitlines(), .swapcase(), .title(),
+ * .upper(), .zfill()
+ *
+ * XXX(gps) the code is -shared- for so many of these, thats gross.  I wish
+ * we had templates or generics or OO inheritance here.  A .h file with the
+ * methods as big CPP macros as templates would work but is ugly (especially
+ * when debugging).  Or can we do an (evil?) common substructure hack to
+ * allow us to write generic methods that work on both buffer (PyBytes_*)
+ * and bytes (PyString_*) objects?
+ */
 
 PyDoc_STRVAR(find__doc__,
 "B.find(sub [,start [,end]]) -> int\n\
@@ -1118,27 +1149,25 @@
 bytes_count(PyBytesObject *self, PyObject *args)
 {
     PyObject *sub_obj;
-    const char *str = PyBytes_AS_STRING(self), *sub;
-    Py_ssize_t sub_len;
+    const char *str = PyBytes_AS_STRING(self);
     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
+    Py_buffer vsub;
+    PyObject *count_obj;
 
     if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
         _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
         return NULL;
 
-    if (PyBytes_Check(sub_obj)) {
-        sub = PyBytes_AS_STRING(sub_obj);
-        sub_len = PyBytes_GET_SIZE(sub_obj);
-    }
-    /* XXX --> use the modern buffer interface */
-    else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
+    if (_getbuffer(sub_obj, &vsub) < 0)
         return NULL;
 
     _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
 
-    return PyInt_FromSsize_t(
-        stringlib_count(str + start, end - start, sub, sub_len)
+    count_obj = PyInt_FromSsize_t(
+        stringlib_count(str + start, end - start, vsub.buf, vsub.len)
         );
+    PyObject_ReleaseBuffer(sub_obj, &vsub);
+    return count_obj;
 }
 
 
@@ -1210,36 +1239,39 @@
                  Py_ssize_t end, int direction)
 {
     Py_ssize_t len = PyBytes_GET_SIZE(self);
-    Py_ssize_t slen;
-    const char* sub;
     const char* str;
+    Py_buffer vsubstr;
+    int rv;
 
-    if (PyBytes_Check(substr)) {
-        sub = PyBytes_AS_STRING(substr);
-        slen = PyBytes_GET_SIZE(substr);
-    }
-    /* XXX --> Use the modern buffer interface */
-    else if (PyObject_AsCharBuffer(substr, &sub, &slen))
-        return -1;
     str = PyBytes_AS_STRING(self);
 
+    if (_getbuffer(substr, &vsubstr) < 0)
+        return -1;
+
     _adjust_indices(&start, &end, len);
 
     if (direction < 0) {
         /* startswith */
-        if (start+slen > len)
-            return 0;
+        if (start+vsubstr.len > len) {
+            rv = 0;
+            goto done;
+        }
     } else {
         /* endswith */
-        if (end-start < slen || start > len)
-            return 0;
+        if (end-start < vsubstr.len || start > len) {
+            rv = 0;
+            goto done;
+        }
 
-        if (end-slen > start)
-            start = end - slen;
+        if (end-vsubstr.len > start)
+            start = end - vsubstr.len;
     }
-    if (end-start >= slen)
-        return ! memcmp(str+start, sub, slen);
-    return 0;
+    if (end-start >= vsubstr.len)
+        rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
+
+done:
+    PyObject_ReleaseBuffer(substr, &vsubstr);
+    return rv;
 }
 
 
@@ -1340,53 +1372,47 @@
     register const char *table;
     register Py_ssize_t i, c, changed = 0;
     PyObject *input_obj = (PyObject*)self;
-    const char *table1, *output_start, *del_table=NULL;
-    Py_ssize_t inlen, tablen, dellen = 0;
+    const char *output_start;
+    Py_ssize_t inlen;
     PyObject *result;
     int trans_table[256];
     PyObject *tableobj, *delobj = NULL;
+    Py_buffer vtable, vdel;
 
     if (!PyArg_UnpackTuple(args, "translate", 1, 2,
                            &tableobj, &delobj))
           return NULL;
 
-    if (PyBytes_Check(tableobj)) {
-        table1 = PyBytes_AS_STRING(tableobj);
-        tablen = PyBytes_GET_SIZE(tableobj);
-    }
-    /* XXX -> Use the modern buffer interface */
-    else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
+    if (_getbuffer(tableobj, &vtable) < 0)
         return NULL;
 
-    if (tablen != 256) {
+    if (vtable.len != 256) {
         PyErr_SetString(PyExc_ValueError,
                         "translation table must be 256 characters long");
-        return NULL;
+        result = NULL;
+        goto done;
     }
 
     if (delobj != NULL) {
-        if (PyBytes_Check(delobj)) {
-            del_table = PyBytes_AS_STRING(delobj);
-            dellen = PyBytes_GET_SIZE(delobj);
+        if (_getbuffer(delobj, &vdel) < 0) {
+            result = NULL;
+            goto done;
         }
-        /* XXX -> use the modern buffer interface */
-        else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
-            return NULL;
     }
     else {
-        del_table = NULL;
-        dellen = 0;
+        vdel.buf = NULL;
+        vdel.len = 0;
     }
 
-    table = table1;
+    table = (const char *)vtable.buf;
     inlen = PyBytes_GET_SIZE(input_obj);
     result = PyBytes_FromStringAndSize((char *)NULL, inlen);
     if (result == NULL)
-        return NULL;
+        goto done;
     output_start = output = PyBytes_AsString(result);
     input = PyBytes_AS_STRING(input_obj);
 
-    if (dellen == 0) {
+    if (vdel.len == 0) {
         /* If no deletions are required, use faster code */
         for (i = inlen; --i >= 0; ) {
             c = Py_CHARMASK(*input++);
@@ -1394,17 +1420,18 @@
                 changed = 1;
         }
         if (changed || !PyBytes_CheckExact(input_obj))
-            return result;
+            goto done;
         Py_DECREF(result);
         Py_INCREF(input_obj);
-        return input_obj;
+        result = input_obj;
+        goto done;
     }
 
     for (i = 0; i < 256; i++)
         trans_table[i] = Py_CHARMASK(table[i]);
 
-    for (i = 0; i < dellen; i++)
-        trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
+    for (i = 0; i < vdel.len; i++)
+        trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
 
     for (i = inlen; --i >= 0; ) {
         c = Py_CHARMASK(*input++);
@@ -1416,11 +1443,17 @@
     if (!changed && PyBytes_CheckExact(input_obj)) {
         Py_DECREF(result);
         Py_INCREF(input_obj);
-        return input_obj;
+        result = input_obj;
+        goto done;
     }
     /* Fix the size of the resulting string */
     if (inlen > 0)
         PyBytes_Resize(result, output - output_start);
+
+done:
+    PyObject_ReleaseBuffer(tableobj, &vtable);
+    if (delobj != NULL)
+        PyObject_ReleaseBuffer(delobj, &vdel);
     return result;
 }
 
@@ -2264,6 +2297,8 @@
 {
     PyObject *bytesep, *result;
 
+    /* XXX(gps) could this use _getbuffer instead of creating an entire new
+     * copy in the bytesep object? */
     bytesep = PyBytes_FromObject(sep_obj);
     if (! bytesep)
         return NULL;
@@ -2291,6 +2326,8 @@
 {
     PyObject *bytesep, *result;
 
+    /* XXX(gps) could this use _getbuffer instead of creating an entire new
+     * copy in the bytesep object? */
     bytesep = PyBytes_FromObject(sep_obj);
     if (! bytesep)
         return NULL;
@@ -2459,6 +2496,9 @@
 static PyObject *
 bytes_extend(PyBytesObject *self, PyObject *arg)
 {
+    /* XXX(gps): the docstring above says any iterable int will do but the
+     * bytes_setslice code really wants something supporting PEP 3118.
+     * Is a list or tuple of 0 <= ints <= 255 also supposed to work? */
     if (bytes_setslice(self, Py_Size(self), Py_Size(self), arg) == -1)
         return NULL;
     Py_RETURN_NONE;
@@ -2866,26 +2906,34 @@
 static PyObject *
 bytes_fromhex(PyObject *cls, PyObject *args)
 {
-    PyObject *newbytes;
-    char *hex, *buf;
-    Py_ssize_t len, byteslen, i, j;
+    PyObject *newbytes, *hexobj;
+    char *buf;
+    unsigned char *hex;
+    Py_ssize_t byteslen, i, j;
     int top, bot;
+    Py_buffer vhex;
 
-    if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
+    if (!PyArg_ParseTuple(args, "O:fromhex", &hexobj))
         return NULL;
 
-    byteslen = len / 2; /* max length if there are no spaces */
+    if (_getbuffer(hexobj, &vhex) < 0)
+        return NULL;
 
+    byteslen = vhex.len / 2; /* max length if there are no spaces */
+    hex = vhex.buf;
+
     newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
-    if (!newbytes)
+    if (!newbytes) {
+        PyObject_ReleaseBuffer(hexobj, &vhex);
         return NULL;
+    }
     buf = PyBytes_AS_STRING(newbytes);
 
-    for (i = j = 0; i < len; i += 2) {
+    for (i = j = 0; i < vhex.len; i += 2) {
         /* skip over spaces in the input */
-        while (Py_CHARMASK(hex[i]) == ' ')
+        while (Py_CHARMASK( hex[i] ) == ' ')
             i++;
-        if (i >= len)
+        if (i >= vhex.len)
             break;
         top = hex_digit_to_int(Py_CHARMASK(hex[i]));
         bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
@@ -2900,10 +2948,12 @@
     }
     if (PyBytes_Resize(newbytes, j) < 0)
         goto error;
+    PyObject_ReleaseBuffer(hexobj, &vhex);
     return newbytes;
 
   error:
     Py_DECREF(newbytes);
+    PyObject_ReleaseBuffer(hexobj, &vhex);
     return NULL;
 }
 
Index: Lib/test/test_bytes.py
===================================================================
--- Lib/test/test_bytes.py      (revision 58412)
+++ Lib/test/test_bytes.py      (working copy)
@@ -454,17 +454,18 @@
     def test_fromhex(self):
         self.assertRaises(TypeError, bytes.fromhex)
         self.assertRaises(TypeError, bytes.fromhex, 1)
-        self.assertEquals(bytes.fromhex(''), bytes())
+        self.assertEquals(bytes.fromhex(b''), bytes())
         b = bytes([0x1a, 0x2b, 0x30])
-        self.assertEquals(bytes.fromhex('1a2B30'), b)
-        self.assertEquals(bytes.fromhex('  1A 2B  30   '), b)
+        self.assertEquals(bytes.fromhex(b'1a2B30'), b)
+        self.assertEquals(bytes.fromhex(b'  1A 2B  30   '), b)
         self.assertEquals(bytes.fromhex(memoryview(b'')), bytes())
         self.assertEquals(bytes.fromhex(memoryview(b'0000')), bytes([0, 0]))
-        self.assertRaises(ValueError, bytes.fromhex, 'a')
-        self.assertRaises(ValueError, bytes.fromhex, 'rt')
-        self.assertRaises(ValueError, bytes.fromhex, '1a b cd')
-        self.assertRaises(ValueError, bytes.fromhex, '\x00')
-        self.assertRaises(ValueError, bytes.fromhex, '12   \x00   34')
+        self.assertRaises(TypeError, bytes.fromhex, '1B')
+        self.assertRaises(ValueError, bytes.fromhex, b'a')
+        self.assertRaises(ValueError, bytes.fromhex, b'rt')
+        self.assertRaises(ValueError, bytes.fromhex, b'1a b cd')
+        self.assertRaises(ValueError, bytes.fromhex, b'\x00')
+        self.assertRaises(ValueError, bytes.fromhex, b'12   \x00   34')
 
     def test_join(self):
         self.assertEqual(b"".join([]), bytes())
@@ -504,11 +505,12 @@
         self.assertEqual(b, b'heo')
         self.assertRaises(ValueError, lambda: b.remove(ord('l')))
         self.assertRaises(ValueError, lambda: b.remove(400))
-        self.assertRaises(ValueError, lambda: b.remove('e'))
+        self.assertRaises(TypeError, lambda: b.remove('e'))
         # remove first and last
         b.remove(ord('o'))
         b.remove(ord('h'))
         self.assertEqual(b, b'e')
+        self.assertRaises(TypeError, lambda: b.remove(b'e'))
 
     def test_pop(self):
         b = b'world'
@@ -542,6 +544,7 @@
         b = bytes()
         b.append(ord('A'))
         self.assertEqual(len(b), 1)
+        self.assertRaises(TypeError, lambda: b.append(b'o'))
 
     def test_insert(self):
         b = b'msssspp'
@@ -550,6 +553,7 @@
         b.insert(-2, ord('i'))
         b.insert(1000, ord('i'))
         self.assertEqual(b, b'mississippi')
+        self.assertRaises(TypeError, lambda: b.insert(0, b'1'))
 
     def test_startswith(self):
         b = b'hello'

_______________________________________________
Python-bugs-list mailing list 
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

[issue1261] PEP 3137: make bytesobject.c methods use PEP 3118 buffer API

Reply via email to