Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Unify PyUncode_Count and unicode_count #97982

Copy link
Copy link
Closed
@sobolevn

Description

@sobolevn
Issue body actions

Feature or enhancement

Right now PyUnicode_Count from

Py_ssize_t
PyUnicode_Count(PyObject *str,
PyObject *substr,
Py_ssize_t start,
Py_ssize_t end)
{
Py_ssize_t result;
int kind1, kind2;
const void *buf1 = NULL, *buf2 = NULL;
Py_ssize_t len1, len2;
if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
return -1;
kind1 = PyUnicode_KIND(str);
kind2 = PyUnicode_KIND(substr);
if (kind1 < kind2)
return 0;
len1 = PyUnicode_GET_LENGTH(str);
len2 = PyUnicode_GET_LENGTH(substr);
ADJUST_INDICES(start, end, len1);
if (end - start < len2)
return 0;
buf1 = PyUnicode_DATA(str);
buf2 = PyUnicode_DATA(substr);
if (kind2 != kind1) {
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
goto onError;
}
switch (kind1) {
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(str) && PyUnicode_IS_ASCII(substr))
result = asciilib_count(
((const Py_UCS1*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
else
result = ucs1lib_count(
((const Py_UCS1*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
case PyUnicode_2BYTE_KIND:
result = ucs2lib_count(
((const Py_UCS2*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
case PyUnicode_4BYTE_KIND:
result = ucs4lib_count(
((const Py_UCS4*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
default:
Py_UNREACHABLE();
}
assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
if (kind2 != kind1)
PyMem_Free((void *)buf2);
return result;
onError:
assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
if (kind2 != kind1)
PyMem_Free((void *)buf2);
return -1;
}
and unicode_count from

cpython/Objects/unicodeobject.c

Lines 10854 to 10916 in cbdeda8

static PyObject *
unicode_count(PyObject *self, PyObject *args)
{
PyObject *substring = NULL; /* initialize to fix a compiler warning */
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result;
int kind1, kind2;
const void *buf1, *buf2;
Py_ssize_t len1, len2, iresult;
if (!parse_args_finds_unicode("count", args, &substring, &start, &end))
return NULL;
kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
if (kind1 < kind2)
return PyLong_FromLong(0);
len1 = PyUnicode_GET_LENGTH(self);
len2 = PyUnicode_GET_LENGTH(substring);
ADJUST_INDICES(start, end, len1);
if (end - start < len2)
return PyLong_FromLong(0);
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
switch (kind1) {
case PyUnicode_1BYTE_KIND:
iresult = ucs1lib_count(
((const Py_UCS1*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
case PyUnicode_2BYTE_KIND:
iresult = ucs2lib_count(
((const Py_UCS2*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
case PyUnicode_4BYTE_KIND:
iresult = ucs4lib_count(
((const Py_UCS4*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
default:
Py_UNREACHABLE();
}
result = PyLong_FromSsize_t(iresult);
assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substring)));
if (kind2 != kind1)
PyMem_Free((void *)buf2);
return result;
}
share a lot of code.

They can be unified, because the do the same thing.

Pitch

Citing @encukou:

Apparently unicode_count missed an optimization in 2011, otherwise they're equivalent (except arg parsing & converting the return value). Merging them could add the optimization to unicode_count.
If you want to work on that, note that there's also anylib_count that duplicates the main switch.

Previous discussion

Link: #96929

PR in the works.

Metadata

Metadata

Assignees

Labels

interpreter-core(Objects, Python, Grammar, and Parser dirs)(Objects, Python, Grammar, and Parser dirs)type-featureA feature request or enhancementA feature request or enhancement

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions

    Morty Proxy This is a proxified and sanitized view of the page, visit original site.