Eryk Sun <eryk...@gmail.com> added the comment:

> stdout.write("small text")
> stdout.write("very large text")  # Calls writeflush, but can not allocate 
> buffer.

Without the optimization, in most cases this will likely fail in 
_io_TextIOWrapper_write_impl() at the line `b = (*self->encodefunc)((PyObject 
*) self, text)`. In some cases, it could be that the latter succeeds, but its 
size combined with the existing pending_bytes_count leads to a memory error in 
_textiowrapper_writeflush().

> * If input text is large (>1M?)

I'd change write() to only optimize ASCII writes so long as the new total size 
of pending writes would not exceed the text wrapper's chunk size. Then 
rearrange the logic to pre-flush the text wrapper if the pending bytes plus the 
write would exceed the chunk size. Thus the total size of a list of pending 
writes (aggregating small writes as a chunk), or that of a single ASCII str() 
object, would be limited to the chunk size, in which case 
PyBytes_FromStringAndSize in _textiowrapper_writeflush() shouldn't fail in any 
normal circumstances. For example:

    if (self->encodefunc != NULL) {

[NEW CONDITION]

        if (PyUnicode_IS_ASCII(text) &&
              (PyUnicode_GET_LENGTH(text) +
                (self->pending_bytes ? self->pending_bytes_count : 0)) <=
                  self->chunk_size &&
              is_asciicompat_encoding(self->encodefunc)) {
            b = text;
            Py_INCREF(b);
        }
        else {
            b = (*self->encodefunc)((PyObject *) self, text);
        }
        self->encoding_start_of_stream = 0;
    }
    else {
        b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
    }

    Py_DECREF(text);
    if (b == NULL)
        return NULL;
    if (b != text && !PyBytes_Check(b)) {
        PyErr_Format(PyExc_TypeError,
                     "encoder should return a bytes object, not '%.200s'",
                     Py_TYPE(b)->tp_name);
        Py_DECREF(b);
        return NULL;
    }

    Py_ssize_t bytes_len;
    if (b == text) {
        bytes_len = PyUnicode_GET_LENGTH(b);
    }
    else {
        bytes_len = PyBytes_GET_SIZE(b);
    }

    if (self->pending_bytes == NULL) {
        self->pending_bytes_count = 0;
        self->pending_bytes = b;
    }

[NEW PRE-FLUSH]

    else if ((self->pending_bytes_count + bytes_len) > self->chunk_size) {
        if (_textiowrapper_writeflush(self) < 0) {
            Py_DECREF(b);
            return NULL;
        }
        self->pending_bytes = b;
    }
    else if (!PyList_CheckExact(self->pending_bytes)) {
        PyObject *list = PyList_New(2);
        if (list == NULL) {
            Py_DECREF(b);
            return NULL;
        }
        PyList_SET_ITEM(list, 0, self->pending_bytes);
        PyList_SET_ITEM(list, 1, b);
        self->pending_bytes = list;
    }
    else {
        if (PyList_Append(self->pending_bytes, b) < 0) {
            Py_DECREF(b);
            return NULL;
        }
        Py_DECREF(b);
    }

    self->pending_bytes_count += bytes_len;
    if (self->pending_bytes_count > self->chunk_size || needflush ||
        text_needflush) {
        if (_textiowrapper_writeflush(self) < 0)
            return NULL;
    }

----------

_______________________________________
Python tracker <rep...@bugs.python.org>
<https://bugs.python.org/issue43260>
_______________________________________
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to