Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Optimized write buffering in C implementation of ``TextIOWrapper``. Writing
ASCII string to ``TextIOWrapper`` with ascii, latin1, or utf-8 encoding is
about 20% faster. Patch by Inada Naoki.
116 changes: 95 additions & 21 deletions 116 Modules/_io/textio.c
Original file line number Diff line number Diff line change
Expand Up @@ -674,8 +674,8 @@ typedef struct
*/
PyObject *decoded_chars; /* buffer for text returned from decoder */
Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
PyObject *pending_bytes; /* list of bytes objects waiting to be
written, or NULL */
PyObject *pending_bytes; // data waiting to be written.
// ascii unicode, bytes, or list of them.
Py_ssize_t pending_bytes_count;

/* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Expand Down Expand Up @@ -777,6 +777,15 @@ latin1_encode(textio *self, PyObject *text)
return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
}

// Return true when encoding can be skipped when text is ascii.
static inline int
is_asciicompat_encoding(encodefunc_t f)
{
return f == (encodefunc_t) ascii_encode
|| f == (encodefunc_t) latin1_encode
|| f == (encodefunc_t) utf8_encode;
}

/* Map normalized encoding names onto the specialized encoding funcs */

typedef struct {
Expand Down Expand Up @@ -1489,21 +1498,62 @@ _io_TextIOWrapper_detach_impl(textio *self)
static int
_textiowrapper_writeflush(textio *self)
{
PyObject *pending, *b, *ret;

if (self->pending_bytes == NULL)
return 0;

pending = self->pending_bytes;
Py_INCREF(pending);
self->pending_bytes_count = 0;
Py_CLEAR(self->pending_bytes);
PyObject *pending = self->pending_bytes;
PyObject *b;

b = _PyBytes_Join(_PyIO_empty_bytes, pending);
if (PyBytes_Check(pending)) {
b = pending;
Py_INCREF(b);
}
else if (PyUnicode_Check(pending)) {
assert(PyUnicode_IS_ASCII(pending));
assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
b = PyBytes_FromStringAndSize(
PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
if (b == NULL) {
return -1;
}
}
else {
assert(PyList_Check(pending));
b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
if (b == NULL) {
return -1;
}

char *buf = PyBytes_AsString(b);
Py_ssize_t pos = 0;

for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
PyObject *obj = PyList_GET_ITEM(pending, i);
char *src;
Py_ssize_t len;
if (PyUnicode_Check(obj)) {
assert(PyUnicode_IS_ASCII(obj));
src = PyUnicode_DATA(obj);
len = PyUnicode_GET_LENGTH(obj);
}
else {
assert(PyBytes_Check(obj));
if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
Py_DECREF(b);
return -1;
}
}
memcpy(buf + pos, src, len);
pos += len;
}
assert(pos == self->pending_bytes_count);
}

self->pending_bytes_count = 0;
self->pending_bytes = NULL;
Py_DECREF(pending);
if (b == NULL)
return -1;
ret = NULL;

PyObject *ret;
do {
ret = PyObject_CallMethodObjArgs(self->buffer,
_PyIO_str_write, b, NULL);
Expand Down Expand Up @@ -1566,37 +1616,61 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)

/* XXX What if we were just reading? */
if (self->encodefunc != NULL) {
b = (*self->encodefunc)((PyObject *) self, text);
if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
b = text;
Py_INCREF(b);
}
else {
b = (*self->encodefunc)((PyObject *) self, text);
}
self->encoding_start_of_stream = 0;
}
else
b = PyObject_CallMethodObjArgs(self->encoder,
_PyIO_str_encode, text, NULL);

Py_DECREF(text);
if (b == NULL)
return NULL;
if (!PyBytes_Check(b)) {
if (b != text && !PyBytes_Check(b)) {
PyErr_Format(PyExc_TypeError,
"encoder should return a bytes object, not '%.200s'",
Py_TYPE(b)->tp_name);
Py_DECREF(b);
return NULL;
}

Py_ssize_t bytes_len;
if (b == text) {
bytes_len = PyUnicode_GET_LENGTH(b);
}
else {
bytes_len = PyBytes_GET_SIZE(b);
}

if (self->pending_bytes == NULL) {
self->pending_bytes = PyList_New(0);
if (self->pending_bytes == NULL) {
self->pending_bytes_count = 0;
self->pending_bytes = b;
}
else if (!PyList_CheckExact(self->pending_bytes)) {
PyObject *list = PyList_New(2);
if (list == NULL) {
Py_DECREF(b);
return NULL;
}
self->pending_bytes_count = 0;
PyList_SET_ITEM(list, 0, self->pending_bytes);
PyList_SET_ITEM(list, 1, b);
self->pending_bytes = list;
}
if (PyList_Append(self->pending_bytes, b) < 0) {
else {
if (PyList_Append(self->pending_bytes, b) < 0) {
Py_DECREF(b);
return NULL;
}
Py_DECREF(b);
return NULL;
}
self->pending_bytes_count += PyBytes_GET_SIZE(b);
Py_DECREF(b);

self->pending_bytes_count += bytes_len;
if (self->pending_bytes_count > self->chunk_size || needflush ||
text_needflush) {
if (_textiowrapper_writeflush(self) < 0)
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.