Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 36f341c

Browse filesBrowse files
authored
gh-127787: allow retrieving the clipped slice length in _PyUnicodeError_GetParams (GH-128980)
1 parent bf150f6 commit 36f341c
Copy full SHA for 36f341c

File tree

Expand file treeCollapse file tree

2 files changed

+110
-15
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+110
-15
lines changed

‎Include/internal/pycore_pyerrors.h

Copy file name to clipboardExpand all lines: Include/internal/pycore_pyerrors.h
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,9 @@ extern int _PyUnicodeError_GetParams(
196196
Py_ssize_t *objlen,
197197
Py_ssize_t *start,
198198
Py_ssize_t *end,
199+
Py_ssize_t *slen,
199200
int as_bytes);
200201

201-
202202
#ifdef __cplusplus
203203
}
204204
#endif

‎Objects/exceptions.c

Copy file name to clipboardExpand all lines: Objects/exceptions.c
+109-14Lines changed: 109 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2954,8 +2954,10 @@ unicode_error_set_end_impl(PyObject *self, Py_ssize_t end)
29542954
* The 'start' can be negative or not, but when adjusting the value,
29552955
* we clip it in [0, max(0, objlen - 1)] and do not interpret it as
29562956
* a relative offset.
2957+
*
2958+
* This function always succeeds.
29572959
*/
2958-
static inline Py_ssize_t
2960+
static Py_ssize_t
29592961
unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen)
29602962
{
29612963
assert(objlen >= 0);
@@ -2969,14 +2971,34 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen)
29692971
}
29702972

29712973

2974+
/* Assert some properties of the adjusted 'start' value. */
2975+
#ifndef NDEBUG
2976+
static void
2977+
assert_adjusted_unicode_error_start(Py_ssize_t start, Py_ssize_t objlen)
2978+
{
2979+
assert(objlen >= 0);
2980+
/* in the future, `min_start` may be something else */
2981+
Py_ssize_t min_start = 0;
2982+
assert(start >= min_start);
2983+
/* in the future, `max_start` may be something else */
2984+
Py_ssize_t max_start = Py_MAX(min_start, objlen - 1);
2985+
assert(start <= max_start);
2986+
}
2987+
#else
2988+
#define assert_adjusted_unicode_error_start(...)
2989+
#endif
2990+
2991+
29722992
/*
29732993
* Adjust the (exclusive) 'end' value of a UnicodeError object.
29742994
*
29752995
* The 'end' can be negative or not, but when adjusting the value,
29762996
* we clip it in [min(1, objlen), max(min(1, objlen), objlen)] and
29772997
* do not interpret it as a relative offset.
2998+
*
2999+
* This function always succeeds.
29783000
*/
2979-
static inline Py_ssize_t
3001+
static Py_ssize_t
29803002
unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
29813003
{
29823004
assert(objlen >= 0);
@@ -2990,6 +3012,59 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
29903012
}
29913013

29923014

3015+
/* Assert some properties of the adjusted 'end' value. */
3016+
#ifndef NDEBUG
3017+
static void
3018+
assert_adjusted_unicode_error_end(Py_ssize_t end, Py_ssize_t objlen)
3019+
{
3020+
assert(objlen >= 0);
3021+
/* in the future, `min_end` may be something else */
3022+
Py_ssize_t min_end = Py_MIN(1, objlen);
3023+
assert(end >= min_end);
3024+
/* in the future, `max_end` may be something else */
3025+
Py_ssize_t max_end = Py_MAX(min_end, objlen);
3026+
assert(end <= max_end);
3027+
}
3028+
#else
3029+
#define assert_adjusted_unicode_error_end(...)
3030+
#endif
3031+
3032+
3033+
/*
3034+
* Adjust the length of the range described by a UnicodeError object.
3035+
*
3036+
* The 'start' and 'end' arguments must have been obtained by
3037+
* unicode_error_adjust_start() and unicode_error_adjust_end().
3038+
*
3039+
* The result is clipped in [0, objlen]. By construction, it
3040+
* will always be smaller than 'objlen' as 'start' and 'end'
3041+
* are smaller than 'objlen'.
3042+
*/
3043+
static Py_ssize_t
3044+
unicode_error_adjust_len(Py_ssize_t start, Py_ssize_t end, Py_ssize_t objlen)
3045+
{
3046+
assert_adjusted_unicode_error_start(start, objlen);
3047+
assert_adjusted_unicode_error_end(end, objlen);
3048+
Py_ssize_t ranlen = end - start;
3049+
assert(ranlen <= objlen);
3050+
return ranlen < 0 ? 0 : ranlen;
3051+
}
3052+
3053+
3054+
/* Assert some properties of the adjusted range 'len' value. */
3055+
#ifndef NDEBUG
3056+
static void
3057+
assert_adjusted_unicode_error_len(Py_ssize_t ranlen, Py_ssize_t objlen)
3058+
{
3059+
assert(objlen >= 0);
3060+
assert(ranlen >= 0);
3061+
assert(ranlen <= objlen);
3062+
}
3063+
#else
3064+
#define assert_adjusted_unicode_error_len(...)
3065+
#endif
3066+
3067+
29933068
/*
29943069
* Get various common parameters of a UnicodeError object.
29953070
*
@@ -3004,22 +3079,24 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
30043079
* objlen The 'object' length.
30053080
* start The clipped 'start' attribute.
30063081
* end The clipped 'end' attribute.
3082+
* slen The length of the slice described by the clipped 'start'
3083+
* and 'end' values. It always lies in [0, objlen].
30073084
*
30083085
* An output parameter can be NULL to indicate that
30093086
* the corresponding value does not need to be stored.
30103087
*
30113088
* Input parameter:
30123089
*
3013-
* as_bytes If 1, the error's 'object' attribute must be a bytes object,
3014-
* i.e. the call is for a `UnicodeDecodeError`. Otherwise, the
3015-
* 'object' attribute must be a string.
3090+
* as_bytes If true, the error's 'object' attribute must be a `bytes`,
3091+
* i.e. 'self' is a `UnicodeDecodeError` instance. Otherwise,
3092+
* the 'object' attribute must be a string.
30163093
*
30173094
* A TypeError is raised if the 'object' type is incompatible.
30183095
*/
30193096
int
30203097
_PyUnicodeError_GetParams(PyObject *self,
30213098
PyObject **obj, Py_ssize_t *objlen,
3022-
Py_ssize_t *start, Py_ssize_t *end,
3099+
Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t *slen,
30233100
int as_bytes)
30243101
{
30253102
assert(self != NULL);
@@ -3034,16 +3111,30 @@ _PyUnicodeError_GetParams(PyObject *self,
30343111
if (objlen != NULL) {
30353112
*objlen = n;
30363113
}
3114+
3115+
Py_ssize_t start_value = -1;
3116+
if (start != NULL || slen != NULL) {
3117+
start_value = unicode_error_adjust_start(exc->start, n);
3118+
}
30373119
if (start != NULL) {
3038-
*start = unicode_error_adjust_start(exc->start, n);
3039-
assert(*start >= 0);
3040-
assert(*start <= n);
3120+
assert_adjusted_unicode_error_start(start_value, n);
3121+
*start = start_value;
3122+
}
3123+
3124+
Py_ssize_t end_value = -1;
3125+
if (end != NULL || slen != NULL) {
3126+
end_value = unicode_error_adjust_end(exc->end, n);
30413127
}
30423128
if (end != NULL) {
3043-
*end = unicode_error_adjust_end(exc->end, n);
3044-
assert(*end >= 0);
3045-
assert(*end <= n);
3129+
assert_adjusted_unicode_error_end(end_value, n);
3130+
*end = end_value;
3131+
}
3132+
3133+
if (slen != NULL) {
3134+
*slen = unicode_error_adjust_len(start_value, end_value, n);
3135+
assert_adjusted_unicode_error_len(*slen, n);
30463136
}
3137+
30473138
if (obj != NULL) {
30483139
*obj = r;
30493140
}
@@ -3111,7 +3202,9 @@ static inline int
31113202
unicode_error_get_start_impl(PyObject *self, Py_ssize_t *start, int as_bytes)
31123203
{
31133204
assert(self != NULL);
3114-
return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, as_bytes);
3205+
return _PyUnicodeError_GetParams(self, NULL, NULL,
3206+
start, NULL, NULL,
3207+
as_bytes);
31153208
}
31163209

31173210

@@ -3177,7 +3270,9 @@ static inline int
31773270
unicode_error_get_end_impl(PyObject *self, Py_ssize_t *end, int as_bytes)
31783271
{
31793272
assert(self != NULL);
3180-
return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, as_bytes);
3273+
return _PyUnicodeError_GetParams(self, NULL, NULL,
3274+
NULL, end, NULL,
3275+
as_bytes);
31813276
}
31823277

31833278

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.