Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 8e5e00b

Browse filesBrowse files
committed
use tmp buffer
1 parent d026be3 commit 8e5e00b
Copy full SHA for 8e5e00b

File tree

1 file changed

+97
-23
lines changed
Filter options

1 file changed

+97
-23
lines changed

‎Modules/_json.c

Copy file name to clipboardExpand all lines: Modules/_json.c
+97-23Lines changed: 97 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,10 @@ typedef struct _PyEncoderObject {
4848
PyObject *indent;
4949
PyObject *key_separator;
5050
PyObject *item_separator;
51+
int (*fast_encode)(PyUnicodeWriter *, PyObject *);
5152
bool sort_keys;
5253
bool skipkeys;
5354
bool allow_nan;
54-
bool fast_encode;
55-
bool ensure_ascii; /* used only when fast_encode == true */
5655
} PyEncoderObject;
5756

5857
#define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op))
@@ -304,18 +303,20 @@ escape_unicode(PyObject *pystr)
304303
return rval;
305304
}
306305

307-
// Take a PyUnicode pystr and write an escaped string to writer.
306+
#define ESCAPE_BUF_SIZE 200
307+
308+
// Take a PyUnicode pystr and write an escaped string to writer. (ensure_ascii)
308309
static int
309-
write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr, bool ascii_only)
310+
write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
310311
{
311312
Py_ssize_t i;
312313
Py_ssize_t input_chars;
313-
Py_ssize_t chars;
314-
Py_ssize_t copy_len = 0;
314+
Py_ssize_t buf_len;
315315
const void *input;
316+
Py_UCS4 c = 0;
316317
int kind;
317318
int ret;
318-
unsigned char buf[12];
319+
char buf[ESCAPE_BUF_SIZE]; // avoid overhead of PyUnicodeWriter APIs
319320

320321
input_chars = PyUnicode_GET_LENGTH(pystr);
321322
input = PyUnicode_DATA(pystr);
@@ -324,27 +325,102 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr, bool ascii_only)
324325
ret = PyUnicodeWriter_WriteChar(writer, '"');
325326
if (ret) return ret;
326327

328+
// Fast path for string doesn't need escape at all: e.g. "id", "name"
327329
for (i = 0; i < input_chars; i++) {
330+
c = PyUnicode_READ(kind, input, i);
331+
if (!S_CHAR(c)) {
332+
break;
333+
}
334+
}
335+
if (i > 0) {
336+
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, 0, i);
337+
if (ret) return ret;
338+
}
339+
if (i == input_chars) {
340+
return PyUnicodeWriter_WriteChar(writer, '"');
341+
}
342+
343+
buf_len = ascii_escape_unichar(c, (unsigned char*)buf, 0);
344+
345+
for (i++ ; i < input_chars; i++) {
328346
Py_UCS4 c = PyUnicode_READ(kind, input, i);
329-
if (c <= 0x1f || c == '\\' || c == '"' || (ascii_only && c >= 0x7f)) {
330-
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
331-
if (ret) return ret;
332-
copy_len = 0;
347+
if (S_CHAR(c)) {
348+
buf[buf_len++] = c;
349+
}
350+
else {
351+
buf_len = ascii_escape_unichar(c, (unsigned char*)buf, buf_len);
352+
}
333353

334-
chars = ascii_escape_unichar(c, buf, 0);
335-
ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars);
354+
if (buf_len + 12 > ESCAPE_BUF_SIZE) {
355+
ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
336356
if (ret) return ret;
357+
buf_len = 0;
358+
}
359+
}
360+
361+
assert(buf_len < ESCAPE_BUF_SIZE);
362+
buf[buf_len++] = '"';
363+
return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
364+
}
365+
366+
static int
367+
write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
368+
{
369+
Py_ssize_t i;
370+
Py_ssize_t input_size;
371+
Py_ssize_t buf_len;
372+
const unsigned char *input;
373+
int ret;
374+
unsigned char c = 0;
375+
char buf[ESCAPE_BUF_SIZE];
376+
377+
// We don't need to escape non-ASCII chars.
378+
// So we just copy UTF-8 from pystr to buf.
379+
input = (const unsigned char*) PyUnicode_AsUTF8AndSize(pystr, &input_size);
380+
381+
ret = PyUnicodeWriter_WriteChar(writer, '"');
382+
if (ret) return ret;
383+
384+
// Fast path for string doesn't need escape at all: e.g. "id", "name"
385+
for (i = 0; i < input_size; i++) {
386+
c = input[i];
387+
if (c <= 0x1f || c == '\\' || c == '"') {
388+
break;
389+
}
390+
}
391+
if (i > 0) {
392+
ret = PyUnicodeWriter_WriteUTF8(writer, (const char *)input, i);
393+
if (ret) return ret;
394+
}
395+
if (i == input_size) {
396+
return PyUnicodeWriter_WriteChar(writer, '"');
397+
}
398+
399+
buf_len = ascii_escape_unichar(c, (unsigned char *)buf, 0);
400+
401+
for (i++; i < input_size; i++) {
402+
c = input[i];
403+
if (c <= 0x1f || c == '\\' || c == '"') {
404+
buf_len = ascii_escape_unichar(c, (unsigned char *)buf, buf_len);
337405
}
338406
else {
339-
copy_len++;
407+
buf[buf_len++] = c;
408+
}
409+
410+
if (buf_len + 6 > ESCAPE_BUF_SIZE) {
411+
ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
412+
if (ret) return ret;
413+
buf_len = 0;
340414
}
341415
}
342416

343-
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
344-
if (ret) return ret;
345-
return PyUnicodeWriter_WriteChar(writer, '"');
417+
assert(buf_len < ESCAPE_BUF_SIZE);
418+
buf[buf_len++] = '"';
419+
return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
346420
}
347421

422+
#undef ESCAPE_BUF_SIZE
423+
348424
static void
349425
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
350426
{
@@ -1293,17 +1369,15 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
12931369
s->sort_keys = sort_keys;
12941370
s->skipkeys = skipkeys;
12951371
s->allow_nan = allow_nan;
1296-
s->fast_encode = false;
1297-
s->ensure_ascii = false;
1372+
s->fast_encode = NULL;
12981373

12991374
if (PyCFunction_Check(s->encoder)) {
13001375
PyCFunction f = PyCFunction_GetFunction(s->encoder);
13011376
if (f == py_encode_basestring_ascii){
1302-
s->fast_encode = true;
1303-
s->ensure_ascii = true;
1377+
s->fast_encode = write_escaped_ascii;
13041378
}
13051379
else if (f == py_encode_basestring) {
1306-
s->fast_encode = true;
1380+
s->fast_encode = write_escaped_unicode;
13071381
}
13081382
}
13091383

@@ -1497,7 +1571,7 @@ static int
14971571
encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj)
14981572
{
14991573
if (s->fast_encode) {
1500-
return write_escaped_unicode(writer, obj, s->ensure_ascii);
1574+
return s->fast_encode(writer, obj);
15011575
}
15021576

15031577
/* Return the JSON representation of a string */

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.