Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e63d7da

Browse filesBrowse files
authored
gh-94808: Cover PyUnicode_Count in CAPI (#96929)
1 parent e39ae6b commit e63d7da
Copy full SHA for e63d7da

File tree

2 files changed

+59
-0
lines changed
Filter options

2 files changed

+59
-0
lines changed

‎Lib/test/test_unicode.py

Copy file name to clipboardExpand all lines: Lib/test/test_unicode.py
+38Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2945,6 +2945,44 @@ def test_asutf8andsize(self):
29452945
self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
29462946
self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')
29472947

2948+
# Test PyUnicode_Count()
2949+
@support.cpython_only
2950+
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
2951+
def test_count(self):
2952+
from _testcapi import unicode_count
2953+
2954+
st = 'abcabd'
2955+
self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2)
2956+
self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2)
2957+
self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1)
2958+
self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0) # cyrillic "a"
2959+
# start < end
2960+
self.assertEqual(unicode_count(st, 'a', 3, len(st)), 1)
2961+
self.assertEqual(unicode_count(st, 'a', 4, len(st)), 0)
2962+
self.assertEqual(unicode_count(st, 'a', 0, sys.maxsize), 2)
2963+
# start >= end
2964+
self.assertEqual(unicode_count(st, 'abc', 0, 0), 0)
2965+
self.assertEqual(unicode_count(st, 'a', 3, 2), 0)
2966+
self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0)
2967+
# negative
2968+
self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2)
2969+
self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1)
2970+
# wrong args
2971+
self.assertRaises(TypeError, unicode_count, 'a', 'a')
2972+
self.assertRaises(TypeError, unicode_count, 'a', 'a', 1)
2973+
self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1)
2974+
self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1)
2975+
# empty string
2976+
self.assertEqual(unicode_count('abc', '', 0, 3), 4)
2977+
self.assertEqual(unicode_count('abc', '', 1, 3), 3)
2978+
self.assertEqual(unicode_count('', '', 0, 1), 1)
2979+
self.assertEqual(unicode_count('', 'a', 0, 1), 0)
2980+
# different unicode kinds
2981+
for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
2982+
for ch in uni:
2983+
self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1)
2984+
self.assertEqual(unicode_count(st, ch, 0, len(st)), 0)
2985+
29482986
# Test PyUnicode_FindChar()
29492987
@support.cpython_only
29502988
@unittest.skipIf(_testcapi is None, 'need _testcapi module')

‎Modules/_testcapi/unicode.c

Copy file name to clipboardExpand all lines: Modules/_testcapi/unicode.c
+21Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,26 @@ unicode_asutf8andsize(PyObject *self, PyObject *args)
223223
return Py_BuildValue("(Nn)", result, utf8_len);
224224
}
225225

226+
static PyObject *
227+
unicode_count(PyObject *self, PyObject *args)
228+
{
229+
PyObject *str;
230+
PyObject *substr;
231+
Py_ssize_t result;
232+
Py_ssize_t start, end;
233+
234+
if (!PyArg_ParseTuple(args, "UUnn:unicode_count", &str, &substr,
235+
&start, &end)) {
236+
return NULL;
237+
}
238+
239+
result = PyUnicode_Count(str, substr, start, end);
240+
if (result == -1)
241+
return NULL;
242+
else
243+
return PyLong_FromSsize_t(result);
244+
}
245+
226246
static PyObject *
227247
unicode_findchar(PyObject *self, PyObject *args)
228248
{
@@ -696,6 +716,7 @@ static PyMethodDef TestMethods[] = {
696716
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
697717
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
698718
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
719+
{"unicode_count", unicode_count, METH_VARARGS},
699720
{"unicode_findchar", unicode_findchar, METH_VARARGS},
700721
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
701722
{NULL},

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.