Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit b66863d

Browse filesBrowse files
committed
use UCS4 instead of UTF8
1 parent 8e5e00b commit b66863d
Copy full SHA for b66863d

File tree

1 file changed

+41
-28
lines changed
Filter options

1 file changed

+41
-28
lines changed

‎Modules/_json.c

Copy file name to clipboardExpand all lines: Modules/_json.c
+41-28Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -303,12 +303,11 @@ escape_unicode(PyObject *pystr)
303303
return rval;
304304
}
305305

306-
#define ESCAPE_BUF_SIZE 200
307-
308306
// Take a PyUnicode pystr and write an escaped string to writer. (ensure_ascii)
309307
static int
310308
write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
311309
{
310+
#define ESCAPE_BUF_SIZE 200
312311
Py_ssize_t i;
313312
Py_ssize_t input_chars;
314313
Py_ssize_t buf_len;
@@ -367,60 +366,74 @@ static int
367366
write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
368367
{
369368
Py_ssize_t i;
370-
Py_ssize_t input_size;
371-
Py_ssize_t buf_len;
372-
const unsigned char *input;
369+
Py_ssize_t input_chars;
370+
Py_ssize_t chars = 0;
371+
const void *input;
372+
int kind;
373373
int ret;
374-
unsigned char c = 0;
375-
char buf[ESCAPE_BUF_SIZE];
374+
Py_UCS4 output[ESCAPE_BUF_SIZE];
376375

377-
// We don't need to escape non-ASCII chars.
378-
// So we just copy UTF-8 from pystr to buf.
379-
input = (const unsigned char*) PyUnicode_AsUTF8AndSize(pystr, &input_size);
376+
input_chars = PyUnicode_GET_LENGTH(pystr);
377+
input = PyUnicode_DATA(pystr);
378+
kind = PyUnicode_KIND(pystr);
380379

381380
ret = PyUnicodeWriter_WriteChar(writer, '"');
382381
if (ret) return ret;
383382

384383
// Fast path for string doesn't need escape at all: e.g. "id", "name"
385-
for (i = 0; i < input_size; i++) {
386-
c = input[i];
384+
for (i = 0; i < input_chars; i++) {
385+
Py_UCS4 c = PyUnicode_READ(kind, input, i);
387386
if (c <= 0x1f || c == '\\' || c == '"') {
388387
break;
389388
}
390389
}
391390
if (i > 0) {
392-
ret = PyUnicodeWriter_WriteUTF8(writer, (const char *)input, i);
391+
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, 0, i);
393392
if (ret) return ret;
394393
}
395-
if (i == input_size) {
394+
if (i == input_chars) {
396395
return PyUnicodeWriter_WriteChar(writer, '"');
397396
}
398397

399-
buf_len = ascii_escape_unichar(c, (unsigned char *)buf, 0);
398+
for (; i < input_chars; i++) {
399+
Py_UCS4 c = PyUnicode_READ(kind, input, i);
400400

401-
for (i++; i < input_size; i++) {
402-
c = input[i];
403-
if (c <= 0x1f || c == '\\' || c == '"') {
404-
buf_len = ascii_escape_unichar(c, (unsigned char *)buf, buf_len);
405-
}
406-
else {
407-
buf[buf_len++] = c;
401+
// Same to ENCODE_OUTPUT in escape_unicode
402+
switch (c) {
403+
case '\\': output[chars++] = '\\'; output[chars++] = c; break;
404+
case '"': output[chars++] = '\\'; output[chars++] = c; break;
405+
case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break;
406+
case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break;
407+
case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break;
408+
case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break;
409+
case '\t': output[chars++] = '\\'; output[chars++] = 't'; break;
410+
default:
411+
if (c <= 0x1f) {
412+
output[chars++] = '\\';
413+
output[chars++] = 'u';
414+
output[chars++] = '0';
415+
output[chars++] = '0';
416+
output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
417+
output[chars++] = Py_hexdigits[(c ) & 0xf];
418+
} else {
419+
output[chars++] = c;
420+
}
408421
}
409422

410-
if (buf_len + 6 > ESCAPE_BUF_SIZE) {
411-
ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
423+
if (chars + 6 > ESCAPE_BUF_SIZE) {
424+
ret = PyUnicodeWriter_WriteUCS4(writer, output, chars);
412425
if (ret) return ret;
413-
buf_len = 0;
426+
chars = 0;
414427
}
415428
}
416429

417430
assert(buf_len < ESCAPE_BUF_SIZE);
418-
buf[buf_len++] = '"';
419-
return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
431+
output[chars++] = '"';
432+
return PyUnicodeWriter_WriteUCS4(writer, output, chars);
420433
}
421-
422434
#undef ESCAPE_BUF_SIZE
423435

436+
424437
static void
425438
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
426439
{

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.