@@ -303,12 +303,11 @@ escape_unicode(PyObject *pystr)
303
303
return rval ;
304
304
}
305
305
306
- #define ESCAPE_BUF_SIZE 200
307
-
308
306
// Take a PyUnicode pystr and write an escaped string to writer. (ensure_ascii)
309
307
static int
310
308
write_escaped_ascii (PyUnicodeWriter * writer , PyObject * pystr )
311
309
{
310
+ #define ESCAPE_BUF_SIZE 200
312
311
Py_ssize_t i ;
313
312
Py_ssize_t input_chars ;
314
313
Py_ssize_t buf_len ;
@@ -367,60 +366,74 @@ static int
367
366
write_escaped_unicode (PyUnicodeWriter * writer , PyObject * pystr )
368
367
{
369
368
Py_ssize_t i ;
370
- Py_ssize_t input_size ;
371
- Py_ssize_t buf_len ;
372
- const unsigned char * input ;
369
+ Py_ssize_t input_chars ;
370
+ Py_ssize_t chars = 0 ;
371
+ const void * input ;
372
+ int kind ;
373
373
int ret ;
374
- unsigned char c = 0 ;
375
- char buf [ESCAPE_BUF_SIZE ];
374
+ Py_UCS4 output [ESCAPE_BUF_SIZE ];
376
375
377
- // We don't need to escape non-ASCII chars.
378
- // So we just copy UTF-8 from pystr to buf.
379
- input = ( const unsigned char * ) PyUnicode_AsUTF8AndSize ( pystr , & input_size );
376
+ input_chars = PyUnicode_GET_LENGTH ( pystr );
377
+ input = PyUnicode_DATA ( pystr );
378
+ kind = PyUnicode_KIND ( pystr );
380
379
381
380
ret = PyUnicodeWriter_WriteChar (writer , '"' );
382
381
if (ret ) return ret ;
383
382
384
383
// Fast path for string doesn't need escape at all: e.g. "id", "name"
385
- for (i = 0 ; i < input_size ; i ++ ) {
386
- c = input [ i ] ;
384
+ for (i = 0 ; i < input_chars ; i ++ ) {
385
+ Py_UCS4 c = PyUnicode_READ ( kind , input , i ) ;
387
386
if (c <= 0x1f || c == '\\' || c == '"' ) {
388
387
break ;
389
388
}
390
389
}
391
390
if (i > 0 ) {
392
- ret = PyUnicodeWriter_WriteUTF8 (writer , ( const char * ) input , i );
391
+ ret = PyUnicodeWriter_WriteSubstring (writer , pystr , 0 , i );
393
392
if (ret ) return ret ;
394
393
}
395
- if (i == input_size ) {
394
+ if (i == input_chars ) {
396
395
return PyUnicodeWriter_WriteChar (writer , '"' );
397
396
}
398
397
399
- buf_len = ascii_escape_unichar (c , (unsigned char * )buf , 0 );
398
+ for (; i < input_chars ; i ++ ) {
399
+ Py_UCS4 c = PyUnicode_READ (kind , input , i );
400
400
401
- for (i ++ ; i < input_size ; i ++ ) {
402
- c = input [i ];
403
- if (c <= 0x1f || c == '\\' || c == '"' ) {
404
- buf_len = ascii_escape_unichar (c , (unsigned char * )buf , buf_len );
405
- }
406
- else {
407
- buf [buf_len ++ ] = c ;
401
+ // Same to ENCODE_OUTPUT in escape_unicode
402
+ switch (c ) {
403
+ case '\\' : output [chars ++ ] = '\\' ; output [chars ++ ] = c ; break ;
404
+ case '"' : output [chars ++ ] = '\\' ; output [chars ++ ] = c ; break ;
405
+ case '\b' : output [chars ++ ] = '\\' ; output [chars ++ ] = 'b' ; break ;
406
+ case '\f' : output [chars ++ ] = '\\' ; output [chars ++ ] = 'f' ; break ;
407
+ case '\n' : output [chars ++ ] = '\\' ; output [chars ++ ] = 'n' ; break ;
408
+ case '\r' : output [chars ++ ] = '\\' ; output [chars ++ ] = 'r' ; break ;
409
+ case '\t' : output [chars ++ ] = '\\' ; output [chars ++ ] = 't' ; break ;
410
+ default :
411
+ if (c <= 0x1f ) {
412
+ output [chars ++ ] = '\\' ;
413
+ output [chars ++ ] = 'u' ;
414
+ output [chars ++ ] = '0' ;
415
+ output [chars ++ ] = '0' ;
416
+ output [chars ++ ] = Py_hexdigits [(c >> 4 ) & 0xf ];
417
+ output [chars ++ ] = Py_hexdigits [(c ) & 0xf ];
418
+ } else {
419
+ output [chars ++ ] = c ;
420
+ }
408
421
}
409
422
410
- if (buf_len + 6 > ESCAPE_BUF_SIZE ) {
411
- ret = PyUnicodeWriter_WriteUTF8 (writer , buf , buf_len );
423
+ if (chars + 6 > ESCAPE_BUF_SIZE ) {
424
+ ret = PyUnicodeWriter_WriteUCS4 (writer , output , chars );
412
425
if (ret ) return ret ;
413
- buf_len = 0 ;
426
+ chars = 0 ;
414
427
}
415
428
}
416
429
417
430
assert (buf_len < ESCAPE_BUF_SIZE );
418
- buf [ buf_len ++ ] = '"' ;
419
- return PyUnicodeWriter_WriteUTF8 (writer , buf , buf_len );
431
+ output [ chars ++ ] = '"' ;
432
+ return PyUnicodeWriter_WriteUCS4 (writer , output , chars );
420
433
}
421
-
422
434
#undef ESCAPE_BUF_SIZE
423
435
436
+
424
437
static void
425
438
raise_errmsg (const char * msg , PyObject * s , Py_ssize_t end )
426
439
{
0 commit comments