Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 14e739b

Browse filesBrowse files
committed
gh-119182: Add PyUnicodeWriter C API
1 parent e94dbe4 commit 14e739b
Copy full SHA for 14e739b

File tree

Expand file treeCollapse file tree

3 files changed

+359
-19
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+359
-19
lines changed

‎Include/cpython/unicodeobject.h

Copy file name to clipboardExpand all lines: Include/cpython/unicodeobject.h
+39-2Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,44 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
444444
Py_ssize_t size);
445445

446446

447-
/* --- _PyUnicodeWriter API ----------------------------------------------- */
447+
/* --- Public PyUnicodeWriter API ----------------------------------------- */
448+
449+
typedef struct PyUnicodeWriter PyUnicodeWriter;
450+
451+
PyAPI_FUNC(PyUnicodeWriter*) PyUnicodeWriter_Create(void);
452+
PyAPI_FUNC(void) PyUnicodeWriter_Discard(PyUnicodeWriter *writer);
453+
PyAPI_FUNC(PyObject*) PyUnicodeWriter_Finish(PyUnicodeWriter *writer);
454+
455+
PyAPI_FUNC(void) PyUnicodeWriter_SetOverallocate(
456+
PyUnicodeWriter *writer,
457+
int overallocate);
458+
459+
PyAPI_FUNC(int) PyUnicodeWriter_WriteChar(
460+
PyUnicodeWriter *writer,
461+
Py_UCS4 ch);
462+
PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8(
463+
PyUnicodeWriter *writer,
464+
const char *str,
465+
Py_ssize_t size);
466+
467+
PyAPI_FUNC(int) PyUnicodeWriter_WriteStr(
468+
PyUnicodeWriter *writer,
469+
PyObject *str);
470+
PyAPI_FUNC(int) PyUnicodeWriter_WriteRepr(
471+
PyUnicodeWriter *writer,
472+
PyObject *obj);
473+
PyAPI_FUNC(int) PyUnicodeWriter_WriteSubstring(
474+
PyUnicodeWriter *writer,
475+
PyObject *str,
476+
Py_ssize_t start,
477+
Py_ssize_t end);
478+
PyAPI_FUNC(int) PyUnicodeWriter_Format(
479+
PyUnicodeWriter *writer,
480+
const char *format,
481+
...);
482+
483+
484+
/* --- Private _PyUnicodeWriter API --------------------------------------- */
448485

449486
typedef struct {
450487
PyObject *buffer;
@@ -466,7 +503,7 @@ typedef struct {
466503
/* If readonly is 1, buffer is a shared string (cannot be modified)
467504
and size is set to 0. */
468505
unsigned char readonly;
469-
} _PyUnicodeWriter ;
506+
} _PyUnicodeWriter;
470507

471508
// Initialize a Unicode writer.
472509
//

‎Modules/_testcapi/unicode.c

Copy file name to clipboardExpand all lines: Modules/_testcapi/unicode.c
+165Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,167 @@ unicode_copycharacters(PyObject *self, PyObject *args)
221221
}
222222

223223

224+
static PyObject *
225+
test_unicodewriter(PyObject *self, PyObject *Py_UNUSED(args))
226+
{
227+
PyUnicodeWriter *writer = PyUnicodeWriter_Create();
228+
if (writer == NULL) {
229+
return NULL;
230+
}
231+
232+
// test PyUnicodeWriter_SetOverallocate()
233+
PyUnicodeWriter_SetOverallocate(writer, 1);
234+
235+
// test PyUnicodeWriter_WriteUTF8()
236+
if (PyUnicodeWriter_WriteUTF8(writer, "var", -1) < 0) {
237+
goto error;
238+
}
239+
240+
// test PyUnicodeWriter_WriteChar()
241+
if (PyUnicodeWriter_WriteChar(writer, '=') < 0) {
242+
goto error;
243+
}
244+
245+
// test PyUnicodeWriter_WriteSubstring()
246+
PyObject *str = PyUnicode_FromString("[long]");
247+
if (str == NULL) {
248+
goto error;
249+
}
250+
int ret = PyUnicodeWriter_WriteSubstring(writer, str, 1, 5);
251+
Py_CLEAR(str);
252+
if (ret < 0) {
253+
goto error;
254+
}
255+
256+
// test PyUnicodeWriter_WriteStr()
257+
str = PyUnicode_FromString(" value ");
258+
if (str == NULL) {
259+
goto error;
260+
}
261+
ret = PyUnicodeWriter_WriteStr(writer, str);
262+
Py_CLEAR(str);
263+
if (ret < 0) {
264+
goto error;
265+
}
266+
267+
// test PyUnicodeWriter_WriteRepr()
268+
str = PyUnicode_FromString("repr");
269+
if (str == NULL) {
270+
goto error;
271+
}
272+
ret = PyUnicodeWriter_WriteRepr(writer, str);
273+
Py_CLEAR(str);
274+
if (ret < 0) {
275+
goto error;
276+
}
277+
278+
PyObject *result = PyUnicodeWriter_Finish(writer);
279+
if (result == NULL) {
280+
return NULL;
281+
}
282+
assert(PyUnicode_EqualToUTF8(result, "var=long value 'repr'"));
283+
Py_DECREF(result);
284+
285+
Py_RETURN_NONE;
286+
287+
error:
288+
PyUnicodeWriter_Discard(writer);
289+
return NULL;
290+
}
291+
292+
293+
static PyObject *
294+
test_unicodewriter_utf8(PyObject *self, PyObject *Py_UNUSED(args))
295+
{
296+
PyUnicodeWriter *writer = PyUnicodeWriter_Create();
297+
if (writer == NULL) {
298+
return NULL;
299+
}
300+
if (PyUnicodeWriter_WriteUTF8(writer, "ascii", -1) < 0) {
301+
goto error;
302+
}
303+
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
304+
goto error;
305+
}
306+
if (PyUnicodeWriter_WriteUTF8(writer, "latin1=\xC3\xA9", -1) < 0) {
307+
goto error;
308+
}
309+
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
310+
goto error;
311+
}
312+
if (PyUnicodeWriter_WriteUTF8(writer, "euro=\xE2\x82\xAC", -1) < 0) {
313+
goto error;
314+
}
315+
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
316+
goto error;
317+
}
318+
319+
PyObject *result = PyUnicodeWriter_Finish(writer);
320+
if (result == NULL) {
321+
return NULL;
322+
}
323+
assert(PyUnicode_EqualToUTF8(result,
324+
"ascii-latin1=\xC3\xA9-euro=\xE2\x82\xAC."));
325+
Py_DECREF(result);
326+
327+
Py_RETURN_NONE;
328+
329+
error:
330+
PyUnicodeWriter_Discard(writer);
331+
return NULL;
332+
}
333+
334+
335+
static PyObject *
336+
test_unicodewriter_invalid_utf8(PyObject *self, PyObject *Py_UNUSED(args))
337+
{
338+
PyUnicodeWriter *writer = PyUnicodeWriter_Create();
339+
if (writer == NULL) {
340+
return NULL;
341+
}
342+
assert(PyUnicodeWriter_WriteUTF8(writer, "invalid=\xFF", -1) < 0);
343+
PyUnicodeWriter_Discard(writer);
344+
345+
assert(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError));
346+
PyErr_Clear();
347+
348+
Py_RETURN_NONE;
349+
}
350+
351+
352+
static PyObject *
353+
test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args))
354+
{
355+
PyUnicodeWriter *writer = PyUnicodeWriter_Create();
356+
if (writer == NULL) {
357+
return NULL;
358+
}
359+
360+
// test PyUnicodeWriter_Format()
361+
if (PyUnicodeWriter_Format(writer, "%s %i", "Hello", 123) < 0) {
362+
goto error;
363+
}
364+
365+
// test PyUnicodeWriter_WriteChar()
366+
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
367+
goto error;
368+
}
369+
370+
PyObject *result = PyUnicodeWriter_Finish(writer);
371+
if (result == NULL) {
372+
return NULL;
373+
}
374+
assert(PyUnicode_EqualToUTF8(result, "Hello 123."));
375+
Py_DECREF(result);
376+
377+
Py_RETURN_NONE;
378+
379+
error:
380+
PyUnicodeWriter_Discard(writer);
381+
return NULL;
382+
}
383+
384+
224385
static PyMethodDef TestMethods[] = {
225386
{"unicode_new", unicode_new, METH_VARARGS},
226387
{"unicode_fill", unicode_fill, METH_VARARGS},
@@ -229,6 +390,10 @@ static PyMethodDef TestMethods[] = {
229390
{"unicode_asucs4copy", unicode_asucs4copy, METH_VARARGS},
230391
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
231392
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
393+
{"test_unicodewriter", test_unicodewriter, METH_NOARGS},
394+
{"test_unicodewriter_utf8", test_unicodewriter_utf8, METH_NOARGS},
395+
{"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, METH_NOARGS},
396+
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS},
232397
{NULL},
233398
};
234399

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.