Eryk Sun <[email protected]> added the comment:
> stdout.write("small text")
> stdout.write("very large text") # Calls writeflush, but can not allocate
> buffer.
Without the optimization, in most cases this will likely fail in
_io_TextIOWrapper_write_impl() at the line `b = (*self->encodefunc)((PyObject
*) self, text)`. In some cases, it could be that the latter succeeds, but its
size combined with the existing pending_bytes_count leads to a memory error in
_textiowrapper_writeflush().
> * If input text is large (>1M?)
I'd change write() to only optimize ASCII writes so long as the new total size
of pending writes would not exceed the text wrapper's chunk size. Then
rearrange the logic to pre-flush the text wrapper if the pending bytes plus the
write would exceed the chunk size. Thus the total size of a list of pending
writes (aggregating small writes as a chunk), or that of a single ASCII str()
object, would be limited to the chunk size, in which case
PyBytes_FromStringAndSize in _textiowrapper_writeflush() shouldn't fail in any
normal circumstances. For example:
if (self->encodefunc != NULL) {
[NEW CONDITION]
if (PyUnicode_IS_ASCII(text) &&
(PyUnicode_GET_LENGTH(text) +
(self->pending_bytes ? self->pending_bytes_count : 0)) <=
self->chunk_size &&
is_asciicompat_encoding(self->encodefunc)) {
b = text;
Py_INCREF(b);
}
else {
b = (*self->encodefunc)((PyObject *) self, text);
}
self->encoding_start_of_stream = 0;
}
else {
b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
}
Py_DECREF(text);
if (b == NULL)
return NULL;
if (b != text && !PyBytes_Check(b)) {
PyErr_Format(PyExc_TypeError,
"encoder should return a bytes object, not '%.200s'",
Py_TYPE(b)->tp_name);
Py_DECREF(b);
return NULL;
}
Py_ssize_t bytes_len;
if (b == text) {
bytes_len = PyUnicode_GET_LENGTH(b);
}
else {
bytes_len = PyBytes_GET_SIZE(b);
}
if (self->pending_bytes == NULL) {
self->pending_bytes_count = 0;
self->pending_bytes = b;
}
[NEW PRE-FLUSH]
else if ((self->pending_bytes_count + bytes_len) > self->chunk_size) {
if (_textiowrapper_writeflush(self) < 0) {
Py_DECREF(b);
return NULL;
}
self->pending_bytes = b;
}
else if (!PyList_CheckExact(self->pending_bytes)) {
PyObject *list = PyList_New(2);
if (list == NULL) {
Py_DECREF(b);
return NULL;
}
PyList_SET_ITEM(list, 0, self->pending_bytes);
PyList_SET_ITEM(list, 1, b);
self->pending_bytes = list;
}
else {
if (PyList_Append(self->pending_bytes, b) < 0) {
Py_DECREF(b);
return NULL;
}
Py_DECREF(b);
}
self->pending_bytes_count += bytes_len;
if (self->pending_bytes_count > self->chunk_size || needflush ||
text_needflush) {
if (_textiowrapper_writeflush(self) < 0)
return NULL;
}
----------
_______________________________________
Python tracker <[email protected]>
<https://bugs.python.org/issue43260>
_______________________________________
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com