Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit fc4a44b

Browse filesBrowse files
authored
bpo-33954: Fix _PyUnicode_InsertThousandsGrouping() (GH-10623) (GH-10718) (GH-10720)
Fix str.format(), float.__format__() and complex.__format__() methods for non-ASCII decimal point when using the "n" formatter. Rewrite _PyUnicode_InsertThousandsGrouping(): it now requires a _PyUnicodeWriter object for the buffer and a Python str object for digits. (cherry picked from commit 59423e3) (cherry picked from commit 6f5fa1b)
1 parent 6c12091 commit fc4a44b
Copy full SHA for fc4a44b

File tree

Expand file treeCollapse file tree

5 files changed

+215
-247
lines changed
Filter options
Expand file treeCollapse file tree

5 files changed

+215
-247
lines changed

‎Include/unicodeobject.h

Copy file name to clipboardExpand all lines: Include/unicodeobject.h
+3-3Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2143,10 +2143,10 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
21432143
see Objects/stringlib/localeutil.h */
21442144
#ifndef Py_LIMITED_API
21452145
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
2146-
PyObject *unicode,
2147-
Py_ssize_t index,
2146+
_PyUnicodeWriter *writer,
21482147
Py_ssize_t n_buffer,
2149-
void *digits,
2148+
PyObject *digits,
2149+
Py_ssize_t d_pos,
21502150
Py_ssize_t n_digits,
21512151
Py_ssize_t min_width,
21522152
const char *grouping,
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
For :meth:`str.format`, :meth:`float.__format__` and
2+
:meth:`complex.__format__` methods for non-ASCII decimal point when using
3+
the "n" formatter.

‎Objects/stringlib/localeutil.h

Copy file name to clipboard
+35-133Lines changed: 35 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,24 @@
1-
/* stringlib: locale related helpers implementation */
2-
3-
#include <locale.h>
4-
5-
#if !STRINGLIB_IS_UNICODE
6-
# error "localeutil.h is specific to Unicode"
7-
#endif
1+
/* _PyUnicode_InsertThousandsGrouping() helper functions */
82

93
typedef struct {
104
const char *grouping;
115
char previous;
126
Py_ssize_t i; /* Where we're currently pointing in grouping. */
13-
} STRINGLIB(GroupGenerator);
7+
} GroupGenerator;
8+
149

1510
static void
16-
STRINGLIB(GroupGenerator_init)(STRINGLIB(GroupGenerator) *self, const char *grouping)
11+
GroupGenerator_init(GroupGenerator *self, const char *grouping)
1712
{
1813
self->grouping = grouping;
1914
self->i = 0;
2015
self->previous = 0;
2116
}
2217

18+
2319
/* Returns the next grouping, or 0 to signify end. */
2420
static Py_ssize_t
25-
STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
21+
GroupGenerator_next(GroupGenerator *self)
2622
{
2723
/* Note that we don't really do much error checking here. If a
2824
grouping string contains just CHAR_MAX, for example, then just
@@ -43,138 +39,44 @@ STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
4339
}
4440
}
4541

42+
4643
/* Fill in some digits, leading zeros, and thousands separator. All
4744
are optional, depending on when we're called. */
4845
static void
49-
STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
50-
Py_ssize_t n_chars, Py_ssize_t n_zeros, STRINGLIB_CHAR* thousands_sep,
51-
Py_ssize_t thousands_sep_len)
46+
InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
47+
PyObject *digits, Py_ssize_t *digits_pos,
48+
Py_ssize_t n_chars, Py_ssize_t n_zeros,
49+
PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
50+
Py_UCS4 *maxchar)
5251
{
53-
Py_ssize_t i;
52+
if (!writer) {
53+
/* if maxchar > 127, maxchar is already set */
54+
if (*maxchar == 127 && thousands_sep) {
55+
Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
56+
*maxchar = Py_MAX(*maxchar, maxchar2);
57+
}
58+
return;
59+
}
5460

5561
if (thousands_sep) {
56-
*buffer_end -= thousands_sep_len;
62+
*buffer_pos -= thousands_sep_len;
5763

5864
/* Copy the thousands_sep chars into the buffer. */
59-
memcpy(*buffer_end, thousands_sep,
60-
thousands_sep_len * STRINGLIB_SIZEOF_CHAR);
61-
}
62-
63-
*buffer_end -= n_chars;
64-
*digits_end -= n_chars;
65-
memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
66-
67-
*buffer_end -= n_zeros;
68-
for (i = 0; i < n_zeros; i++)
69-
(*buffer_end)[i] = '0';
70-
}
71-
72-
/**
73-
* InsertThousandsGrouping:
74-
* @buffer: A pointer to the start of a string.
75-
* @n_buffer: Number of characters in @buffer.
76-
* @digits: A pointer to the digits we're reading from. If count
77-
* is non-NULL, this is unused.
78-
* @n_digits: The number of digits in the string, in which we want
79-
* to put the grouping chars.
80-
* @min_width: The minimum width of the digits in the output string.
81-
* Output will be zero-padded on the left to fill.
82-
* @grouping: see definition in localeconv().
83-
* @thousands_sep: see definition in localeconv().
84-
*
85-
* There are 2 modes: counting and filling. If @buffer is NULL,
86-
* we are in counting mode, else filling mode.
87-
* If counting, the required buffer size is returned.
88-
* If filling, we know the buffer will be large enough, so we don't
89-
* need to pass in the buffer size.
90-
* Inserts thousand grouping characters (as defined by grouping and
91-
* thousands_sep) into the string between buffer and buffer+n_digits.
92-
*
93-
* Return value: 0 on error, else 1. Note that no error can occur if
94-
* count is non-NULL.
95-
*
96-
* This name won't be used, the includer of this file should define
97-
* it to be the actual function name, based on unicode or string.
98-
*
99-
* As closely as possible, this code mimics the logic in decimal.py's
100-
_insert_thousands_sep().
101-
**/
102-
static Py_ssize_t
103-
STRINGLIB(InsertThousandsGrouping)(
104-
STRINGLIB_CHAR *buffer,
105-
Py_ssize_t n_buffer,
106-
STRINGLIB_CHAR *digits,
107-
Py_ssize_t n_digits,
108-
Py_ssize_t min_width,
109-
const char *grouping,
110-
STRINGLIB_CHAR *thousands_sep,
111-
Py_ssize_t thousands_sep_len)
112-
{
113-
Py_ssize_t count = 0;
114-
Py_ssize_t n_zeros;
115-
int loop_broken = 0;
116-
int use_separator = 0; /* First time through, don't append the
117-
separator. They only go between
118-
groups. */
119-
STRINGLIB_CHAR *buffer_end = NULL;
120-
STRINGLIB_CHAR *digits_end = NULL;
121-
Py_ssize_t l;
122-
Py_ssize_t n_chars;
123-
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
124-
be looked at */
125-
/* A generator that returns all of the grouping widths, until it
126-
returns 0. */
127-
STRINGLIB(GroupGenerator) groupgen;
128-
STRINGLIB(GroupGenerator_init)(&groupgen, grouping);
129-
130-
if (buffer) {
131-
buffer_end = buffer + n_buffer;
132-
digits_end = digits + n_digits;
133-
}
134-
135-
while ((l = STRINGLIB(GroupGenerator_next)(&groupgen)) > 0) {
136-
l = Py_MIN(l, Py_MAX(Py_MAX(remaining, min_width), 1));
137-
n_zeros = Py_MAX(0, l - remaining);
138-
n_chars = Py_MAX(0, Py_MIN(remaining, l));
139-
140-
/* Use n_zero zero's and n_chars chars */
141-
142-
/* Count only, don't do anything. */
143-
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
144-
145-
if (buffer) {
146-
/* Copy into the output buffer. */
147-
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
148-
use_separator ? thousands_sep : NULL, thousands_sep_len);
149-
}
150-
151-
/* Use a separator next time. */
152-
use_separator = 1;
153-
154-
remaining -= n_chars;
155-
min_width -= l;
156-
157-
if (remaining <= 0 && min_width <= 0) {
158-
loop_broken = 1;
159-
break;
160-
}
161-
min_width -= thousands_sep_len;
65+
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
66+
thousands_sep, 0,
67+
thousands_sep_len);
16268
}
163-
if (!loop_broken) {
164-
/* We left the loop without using a break statement. */
16569

166-
l = Py_MAX(Py_MAX(remaining, min_width), 1);
167-
n_zeros = Py_MAX(0, l - remaining);
168-
n_chars = Py_MAX(0, Py_MIN(remaining, l));
169-
170-
/* Use n_zero zero's and n_chars chars */
171-
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
172-
if (buffer) {
173-
/* Copy into the output buffer. */
174-
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
175-
use_separator ? thousands_sep : NULL, thousands_sep_len);
176-
}
70+
*buffer_pos -= n_chars;
71+
*digits_pos -= n_chars;
72+
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
73+
digits, *digits_pos,
74+
n_chars);
75+
76+
if (n_zeros) {
77+
*buffer_pos -= n_zeros;
78+
enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer);
79+
void *data = PyUnicode_DATA(writer->buffer);
80+
FILL(kind, data, '0', *buffer_pos, n_zeros);
17781
}
178-
return count;
17982
}
180-

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.