Serhiy Storchaka added the comment:

Here are patches for 3.2 and 2.7.

Note that due to unicode-str autoconversions 2.7 not always raises TypeError 
(sometimes it can do nor raise an exception, sometimes it raises 
UnicodeEncodeError). 2.7 tests are not so strong as 3.x tests.

----------
Added file: http://bugs.python.org/file28944/textio_type_check-3.2_2.patch
Added file: http://bugs.python.org/file28945/textio_type_check-2.7_2.patch

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue17106>
_______________________________________
diff -r 6c9f4c22fd81 Lib/test/test_io.py
--- a/Lib/test/test_io.py       Sat Feb 02 15:08:52 2013 -0800
+++ b/Lib/test/test_io.py       Sun Feb 03 15:02:34 2013 +0200
@@ -2481,6 +2481,30 @@
         txt.write('5')
         self.assertEqual(b''.join(raw._write_stack), b'123\n45')
 
+    def test_read_nonbytes(self):
+        # Issue #17106
+        # Crash when underlying read() returns non-bytes
+        t = self.TextIOWrapper(self.StringIO('a'))
+        self.assertRaises(TypeError, t.read, 1)
+        t = self.TextIOWrapper(self.StringIO('a'))
+        self.assertRaises(TypeError, t.readline)
+        t = self.TextIOWrapper(self.StringIO('a'))
+        self.assertRaises(TypeError, t.read)
+
+    def test_illegal_decoder(self):
+        # Issue #17106
+        # Crash when decoder returns non-string
+        t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
+                               encoding='quopri_codec')
+        self.assertRaises(TypeError, t.read, 1)
+        t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
+                               encoding='quopri_codec')
+        self.assertRaises(TypeError, t.readline)
+        t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
+                               encoding='quopri_codec')
+        self.assertRaises(TypeError, t.read)
+
+
 class CTextIOWrapperTest(TextIOWrapperTest):
 
     def test_initialization(self):
diff -r 6c9f4c22fd81 Modules/_io/textio.c
--- a/Modules/_io/textio.c      Sat Feb 02 15:08:52 2013 -0800
+++ b/Modules/_io/textio.c      Sun Feb 03 15:02:34 2013 +0200
@@ -236,6 +236,21 @@
     Py_TYPE(self)->tp_free((PyObject *)self);
 }
 
+static int
+check_decoded(PyObject *decoded)
+{
+    if (decoded == NULL)
+        return -1;
+    if (!PyUnicode_Check(decoded)) {
+        PyErr_Format(PyExc_TypeError,
+                     "decoder should return a string result, not '%.200s'",
+                     Py_TYPE(decoded)->tp_name);
+        Py_DECREF(decoded);
+        return -1;
+    }
+    return 0;
+}
+
 #define SEEN_CR   1
 #define SEEN_LF   2
 #define SEEN_CRLF 4
@@ -265,15 +280,9 @@
         Py_INCREF(output);
     }
 
-    if (output == NULL)
+    if (check_decoded(output) < 0)
         return NULL;
 
-    if (!PyUnicode_Check(output)) {
-        PyErr_SetString(PyExc_TypeError,
-                        "decoder should return a string result");
-        goto error;
-    }
-
     output_len = PyUnicode_GET_SIZE(output);
     if (self->pendingcr && (final || output_len > 0)) {
         Py_UNICODE *out;
@@ -1454,7 +1463,13 @@
     Py_DECREF(chunk_size);
     if (input_chunk == NULL)
         goto fail;
-    assert(PyBytes_Check(input_chunk));
+    if (!PyBytes_Check(input_chunk)) {
+        PyErr_Format(PyExc_TypeError,
+                     "underlying %s() should have returned a bytes object, "
+                     "not '%.200s'", (self->has_read1 ? "read1": "read"),
+                     Py_TYPE(input_chunk)->tp_name);
+        goto fail;
+    }
 
     eof = (PyBytes_Size(input_chunk) == 0);
 
@@ -1467,8 +1482,7 @@
             _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
     }
 
-    /* TODO sanity check: isinstance(decoded_chars, unicode) */
-    if (decoded_chars == NULL)
+    if (check_decoded(decoded_chars) < 0)
         goto fail;
     textiowrapper_set_decoded_chars(self, decoded_chars);
     if (PyUnicode_GET_SIZE(decoded_chars) > 0)
@@ -1481,7 +1495,14 @@
         PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
         if (next_input == NULL)
             goto fail;
-        assert (PyBytes_Check(next_input));
+        if (!PyBytes_Check(next_input)) {
+            PyErr_Format(PyExc_TypeError,
+                         "decoder getstate() should have returned a bytes "
+                         "object, not '%.200s'",
+                         Py_TYPE(next_input)->tp_name);
+            Py_DECREF(next_input);
+            goto fail;
+        }
         Py_DECREF(dec_buffer);
         Py_CLEAR(self->snapshot);
         self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
@@ -1525,7 +1546,7 @@
         decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
                                              bytes, Py_True, NULL);
         Py_DECREF(bytes);
-        if (decoded == NULL)
+        if (check_decoded(decoded) < 0)
             goto fail;
 
         result = textiowrapper_get_decoded_chars(self, -1);
@@ -2123,7 +2144,14 @@
         if (input_chunk == NULL)
             goto fail;
 
-        assert (PyBytes_Check(input_chunk));
+        if (!PyBytes_Check(input_chunk)) {
+            PyErr_Format(PyExc_TypeError,
+                         "underlying read() should have returned a bytes "
+                         "object, not '%.200s'",
+                         Py_TYPE(input_chunk)->tp_name);
+            Py_DECREF(input_chunk);
+            goto fail;
+        }
 
         self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
         if (self->snapshot == NULL) {
@@ -2134,7 +2162,7 @@
         decoded = PyObject_CallMethod(self->decoder, "decode",
                                       "Oi", input_chunk, (int)cookie.need_eof);
 
-        if (decoded == NULL)
+        if (check_decoded(decoded) < 0)
             goto fail;
 
         textiowrapper_set_decoded_chars(self, decoded);
@@ -2257,9 +2285,8 @@
 
         PyObject *decoded = PyObject_CallMethod(
             self->decoder, "decode", "y#", input, 1);
-        if (decoded == NULL)
+        if (check_decoded(decoded) < 0)
             goto fail;
-        assert (PyUnicode_Check(decoded));
         chars_decoded += PyUnicode_GET_SIZE(decoded);
         Py_DECREF(decoded);
 
@@ -2291,9 +2318,8 @@
         /* We didn't get enough decoded data; signal EOF to get more. */
         PyObject *decoded = PyObject_CallMethod(
             self->decoder, "decode", "yi", "", /* final = */ 1);
-        if (decoded == NULL)
+        if (check_decoded(decoded) < 0)
             goto fail;
-        assert (PyUnicode_Check(decoded));
         chars_decoded += PyUnicode_GET_SIZE(decoded);
         Py_DECREF(decoded);
         cookie.need_eof = 1;
diff -r c37ac05119ff Lib/test/test_io.py
--- a/Lib/test/test_io.py       Sat Feb 02 15:06:45 2013 -0800
+++ b/Lib/test/test_io.py       Sun Feb 03 15:49:38 2013 +0200
@@ -36,6 +36,7 @@
 from collections import deque
 from UserList import UserList
 from test import test_support as support
+import contextlib
 
 import codecs
 import io  # C implementation of io
@@ -2419,6 +2420,39 @@
         with self.assertRaises((AttributeError, TypeError)):
             txt.buffer = buf
 
+    def test_read_nonbytes(self):
+        # Issue #17106
+        # Crash when underlying read() returns non-bytes
+        class NonbytesStream(self.StringIO):
+            read1 = self.StringIO.read
+        class NonbytesStream(self.StringIO):
+            read1 = self.StringIO.read
+        t = self.TextIOWrapper(NonbytesStream('a'))
+        with self.maybeRaises(TypeError):
+            t.read(1)
+        t = self.TextIOWrapper(NonbytesStream('a'))
+        with self.maybeRaises(TypeError):
+            t.readline()
+        t = self.TextIOWrapper(NonbytesStream('a'))
+        self.assertEqual(t.read(), u'a')
+
+    def test_illegal_decoder(self):
+        # Issue #17106
+        # Crash when decoder returns non-string
+        t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
+                               encoding='quopri_codec')
+        with self.maybeRaises(TypeError):
+            t.read(1)
+        t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
+                               encoding='quopri_codec')
+        with self.maybeRaises(TypeError):
+            t.readline()
+        t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
+                               encoding='quopri_codec')
+        with self.maybeRaises(TypeError):
+            t.read()
+
+
 class CTextIOWrapperTest(TextIOWrapperTest):
 
     def test_initialization(self):
@@ -2460,9 +2494,13 @@
             t2.buddy = t1
         support.gc_collect()
 
+    maybeRaises = unittest.TestCase.assertRaises
+
 
 class PyTextIOWrapperTest(TextIOWrapperTest):
-    pass
+    @contextlib.contextmanager
+    def maybeRaises(self, *args, **kwds):
+        yield
 
 
 class IncrementalNewlineDecoderTest(unittest.TestCase):
diff -r c37ac05119ff Modules/_io/textio.c
--- a/Modules/_io/textio.c      Sat Feb 02 15:06:45 2013 -0800
+++ b/Modules/_io/textio.c      Sun Feb 03 15:49:38 2013 +0200
@@ -236,6 +236,21 @@
     Py_TYPE(self)->tp_free((PyObject *)self);
 }
 
+static int
+check_decoded(PyObject *decoded)
+{
+    if (decoded == NULL)
+        return -1;
+    if (!PyUnicode_Check(decoded)) {
+        PyErr_Format(PyExc_TypeError,
+                     "decoder should return a string result, not '%.200s'",
+                     Py_TYPE(decoded)->tp_name);
+        Py_DECREF(decoded);
+        return -1;
+    }
+    return 0;
+}
+
 #define SEEN_CR   1
 #define SEEN_LF   2
 #define SEEN_CRLF 4
@@ -265,15 +280,9 @@
         Py_INCREF(output);
     }
 
-    if (output == NULL)
+    if (check_decoded(output) < 0)
         return NULL;
 
-    if (!PyUnicode_Check(output)) {
-        PyErr_SetString(PyExc_TypeError,
-                        "decoder should return a string result");
-        goto error;
-    }
-
     output_len = PyUnicode_GET_SIZE(output);
     if (self->pendingcr && (final || output_len > 0)) {
         Py_UNICODE *out;
@@ -1417,7 +1426,12 @@
     Py_DECREF(chunk_size);
     if (input_chunk == NULL)
         goto fail;
-    assert(PyBytes_Check(input_chunk));
+    if (!PyBytes_Check(input_chunk)) {
+        PyErr_Format(PyExc_TypeError,
+                     "underlying read1() should have returned a bytes object, "
+                     "not '%.200s'", Py_TYPE(input_chunk)->tp_name);
+        goto fail;
+    }
 
     eof = (PyBytes_Size(input_chunk) == 0);
 
@@ -1430,8 +1444,7 @@
             _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
     }
 
-    /* TODO sanity check: isinstance(decoded_chars, unicode) */
-    if (decoded_chars == NULL)
+    if (check_decoded(decoded_chars) < 0)
         goto fail;
     textiowrapper_set_decoded_chars(self, decoded_chars);
     if (PyUnicode_GET_SIZE(decoded_chars) > 0)
@@ -1444,7 +1457,14 @@
         PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
         if (next_input == NULL)
             goto fail;
-        assert (PyBytes_Check(next_input));
+        if (!PyBytes_Check(next_input)) {
+            PyErr_Format(PyExc_TypeError,
+                         "decoder getstate() should have returned a bytes "
+                         "object, not '%.200s'",
+                         Py_TYPE(next_input)->tp_name);
+            Py_DECREF(next_input);
+            goto fail;
+        }
         Py_DECREF(dec_buffer);
         Py_CLEAR(self->snapshot);
         self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
@@ -1490,7 +1510,7 @@
         decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
                                              bytes, Py_True, NULL);
         Py_DECREF(bytes);
-        if (decoded == NULL)
+        if (check_decoded(decoded) < 0)
             goto fail;
 
         result = textiowrapper_get_decoded_chars(self, -1);
@@ -2110,7 +2130,14 @@
         if (input_chunk == NULL)
             goto fail;
 
-        assert (PyBytes_Check(input_chunk));
+        if (!PyBytes_Check(input_chunk)) {
+            PyErr_Format(PyExc_TypeError,
+                         "underlying read() should have returned a bytes "
+                         "object, not '%.200s'",
+                         Py_TYPE(input_chunk)->tp_name);
+            Py_DECREF(input_chunk);
+            goto fail;
+        }
 
         self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
         if (self->snapshot == NULL) {
@@ -2121,7 +2148,7 @@
         decoded = PyObject_CallMethod(self->decoder, "decode",
                                       "Oi", input_chunk, (int)cookie.need_eof);
 
-        if (decoded == NULL)
+        if (check_decoded(decoded) < 0)
             goto fail;
 
         textiowrapper_set_decoded_chars(self, decoded);
@@ -2245,9 +2272,8 @@
 
         PyObject *decoded = PyObject_CallMethod(
             self->decoder, "decode", "s#", input, 1);
-        if (decoded == NULL)
+        if (check_decoded(decoded) < 0)
             goto fail;
-        assert (PyUnicode_Check(decoded));
         chars_decoded += PyUnicode_GET_SIZE(decoded);
         Py_DECREF(decoded);
 
@@ -2279,9 +2305,8 @@
         /* We didn't get enough decoded data; signal EOF to get more. */
         PyObject *decoded = PyObject_CallMethod(
             self->decoder, "decode", "si", "", /* final = */ 1);
-        if (decoded == NULL)
+        if (check_decoded(decoded) < 0)
             goto fail;
-        assert (PyUnicode_Check(decoded));
         chars_decoded += PyUnicode_GET_SIZE(decoded);
         Py_DECREF(decoded);
         cookie.need_eof = 1;
@@ -2440,7 +2465,7 @@
     Py_DECREF(res);
     if (r < 0)
         return NULL;
-    
+
     if (r > 0) {
         Py_RETURN_NONE; /* stream already closed */
     }
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to