Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 9b55505

Browse filesBrowse files
authored
Merge pull request #4678 from dalinaum/test_unicodedata
Update test_unicodedata from CPython 3.11.2
2 parents 87728c4 + b687960 commit 9b55505
Copy full SHA for 9b55505

File tree

1 file changed

+128
-33
lines changed
Filter options

1 file changed

+128
-33
lines changed

‎Lib/test/test_unicodedata.py

Copy file name to clipboardExpand all lines: Lib/test/test_unicodedata.py
+128-33Lines changed: 128 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,32 @@
1-
""" Test script for the unicodedata module.
1+
""" Tests for the unicodedata module.
22
33
Written by Marc-Andre Lemburg (mal@lemburg.com).
44
55
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
66
77
"""
88

9+
import hashlib
10+
from http.client import HTTPException
911
import sys
12+
import unicodedata
1013
import unittest
11-
import hashlib
12-
from test.support import script_helper
13-
14-
encoding = 'utf-8'
15-
errors = 'surrogatepass'
14+
from test.support import (open_urlresource, requires_resource, script_helper,
15+
cpython_only, check_disallow_instantiation,
16+
ResourceDenied)
1617

1718

18-
### Run tests
19-
2019
class UnicodeMethodsTest(unittest.TestCase):
2120

2221
# update this, if the database changes
23-
expectedchecksum = '9129d6f2bdf008a81c2476e5b5127014a62130c1'
22+
expectedchecksum = '4739770dd4d0e5f1b1677accfc3552ed3c8ef326'
2423

2524
# TODO: RUSTPYTHON
2625
@unittest.expectedFailure
26+
@requires_resource('cpu')
2727
def test_method_checksum(self):
2828
h = hashlib.sha1()
29-
for i in range(0x10000):
29+
for i in range(sys.maxunicode + 1):
3030
char = chr(i)
3131
data = [
3232
# Predicates (single char)
@@ -63,33 +63,26 @@ def test_method_checksum(self):
6363
(char + 'ABC').title(),
6464

6565
]
66-
h.update(''.join(data).encode(encoding, errors))
66+
h.update(''.join(data).encode('utf-8', 'surrogatepass'))
6767
result = h.hexdigest()
6868
self.assertEqual(result, self.expectedchecksum)
6969

7070
class UnicodeDatabaseTest(unittest.TestCase):
71-
72-
def setUp(self):
73-
# In case unicodedata is not available, this will raise an ImportError,
74-
# but the other test cases will still be run
75-
import unicodedata
76-
self.db = unicodedata
77-
78-
def tearDown(self):
79-
del self.db
71+
db = unicodedata
8072

8173
class UnicodeFunctionsTest(UnicodeDatabaseTest):
8274

8375
# Update this if the database changes. Make sure to do a full rebuild
8476
# (e.g. 'make distclean && make') to get the correct checksum.
85-
expectedchecksum = 'c44a49ca7c5cb6441640fe174ede604b45028652'
77+
expectedchecksum = '98d602e1f69d5c5bb8a5910c40bbbad4e18e8370'
8678
# TODO: RUSTPYTHON
8779
@unittest.expectedFailure
80+
@requires_resource('cpu')
8881
def test_function_checksum(self):
8982
data = []
9083
h = hashlib.sha1()
9184

92-
for i in range(0x10000):
85+
for i in range(sys.maxunicode + 1):
9386
char = chr(i)
9487
data = [
9588
# Properties
@@ -106,6 +99,15 @@ def test_function_checksum(self):
10699
result = h.hexdigest()
107100
self.assertEqual(result, self.expectedchecksum)
108101

102+
# TODO: RUSTPYTHON
103+
@unittest.expectedFailure
104+
@requires_resource('cpu')
105+
def test_name_inverse_lookup(self):
106+
for i in range(sys.maxunicode + 1):
107+
char = chr(i)
108+
if looked_name := self.db.name(char, None):
109+
self.assertEqual(self.db.lookup(looked_name), char)
110+
109111
# TODO: RUSTPYTHON
110112
@unittest.expectedFailure
111113
def test_digit(self):
@@ -201,15 +203,8 @@ def test_combining(self):
201203
self.assertRaises(TypeError, self.db.combining)
202204
self.assertRaises(TypeError, self.db.combining, 'xx')
203205

204-
def test_normalize(self):
205-
self.assertRaises(TypeError, self.db.normalize)
206-
self.assertRaises(ValueError, self.db.normalize, 'unknown', 'xx')
207-
self.assertEqual(self.db.normalize('NFKC', ''), '')
208-
# The rest can be found in test_normalization.py
209-
# which requires an external file.
210-
211206
def test_pr29(self):
212-
# http://www.unicode.org/review/pr-29.html
207+
# https://www.unicode.org/review/pr-29.html
213208
# See issues #1054943 and #10254.
214209
composed = ("\u0b47\u0300\u0b3e", "\u1100\u0300\u1161",
215210
'Li\u030dt-s\u1e73\u0301',
@@ -240,9 +235,6 @@ def test_issue29456(self):
240235
self.assertEqual(self.db.normalize('NFC', u11a7_str_a), u11a7_str_b)
241236
self.assertEqual(self.db.normalize('NFC', u11c3_str_a), u11c3_str_b)
242237

243-
# For tests of unicodedata.is_normalized / self.db.is_normalized ,
244-
# see test_normalization.py .
245-
246238
def test_east_asian_width(self):
247239
eaw = self.db.east_asian_width
248240
self.assertRaises(TypeError, eaw, b'a')
@@ -265,6 +257,11 @@ def test_east_asian_width_9_0_changes(self):
265257

266258
class UnicodeMiscTest(UnicodeDatabaseTest):
267259

260+
@cpython_only
261+
def test_disallow_instantiation(self):
262+
# Ensure that the type disallows instantiation (bpo-43916)
263+
check_disallow_instantiation(self, unicodedata.UCD)
264+
268265
# TODO: RUSTPYTHON
269266
@unittest.expectedFailure
270267
def test_failed_import_during_compiling(self):
@@ -363,5 +360,103 @@ def test_linebreak_7643(self):
363360
self.assertEqual(len(lines), 1,
364361
r"\u%.4x should not be a linebreak" % i)
365362

363+
class NormalizationTest(unittest.TestCase):
364+
@staticmethod
365+
def check_version(testfile):
366+
hdr = testfile.readline()
367+
return unicodedata.unidata_version in hdr
368+
369+
@staticmethod
370+
def unistr(data):
371+
data = [int(x, 16) for x in data.split(" ")]
372+
return "".join([chr(x) for x in data])
373+
374+
@requires_resource('network')
375+
def test_normalization(self):
376+
TESTDATAFILE = "NormalizationTest.txt"
377+
TESTDATAURL = f"http://www.pythontest.net/unicode/{unicodedata.unidata_version}/{TESTDATAFILE}"
378+
379+
# Hit the exception early
380+
try:
381+
testdata = open_urlresource(TESTDATAURL, encoding="utf-8",
382+
check=self.check_version)
383+
except PermissionError:
384+
self.skipTest(f"Permission error when downloading {TESTDATAURL} "
385+
f"into the test data directory")
386+
except (OSError, HTTPException) as exc:
387+
self.skipTest(f"Failed to download {TESTDATAURL}: {exc}")
388+
389+
with testdata:
390+
self.run_normalization_tests(testdata)
391+
392+
def run_normalization_tests(self, testdata):
393+
part = None
394+
part1_data = {}
395+
396+
def NFC(str):
397+
return unicodedata.normalize("NFC", str)
398+
399+
def NFKC(str):
400+
return unicodedata.normalize("NFKC", str)
401+
402+
def NFD(str):
403+
return unicodedata.normalize("NFD", str)
404+
405+
def NFKD(str):
406+
return unicodedata.normalize("NFKD", str)
407+
408+
for line in testdata:
409+
if '#' in line:
410+
line = line.split('#')[0]
411+
line = line.strip()
412+
if not line:
413+
continue
414+
if line.startswith("@Part"):
415+
part = line.split()[0]
416+
continue
417+
c1,c2,c3,c4,c5 = [self.unistr(x) for x in line.split(';')[:-1]]
418+
419+
# Perform tests
420+
self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line)
421+
self.assertTrue(c4 == NFC(c4) == NFC(c5), line)
422+
self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line)
423+
self.assertTrue(c5 == NFD(c4) == NFD(c5), line)
424+
self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
425+
NFKC(c3) == NFKC(c4) == NFKC(c5),
426+
line)
427+
self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
428+
NFKD(c3) == NFKD(c4) == NFKD(c5),
429+
line)
430+
431+
self.assertTrue(unicodedata.is_normalized("NFC", c2))
432+
self.assertTrue(unicodedata.is_normalized("NFC", c4))
433+
434+
self.assertTrue(unicodedata.is_normalized("NFD", c3))
435+
self.assertTrue(unicodedata.is_normalized("NFD", c5))
436+
437+
self.assertTrue(unicodedata.is_normalized("NFKC", c4))
438+
self.assertTrue(unicodedata.is_normalized("NFKD", c5))
439+
440+
# Record part 1 data
441+
if part == "@Part1":
442+
part1_data[c1] = 1
443+
444+
# Perform tests for all other data
445+
for c in range(sys.maxunicode+1):
446+
X = chr(c)
447+
if X in part1_data:
448+
continue
449+
self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
450+
451+
def test_edge_cases(self):
452+
self.assertRaises(TypeError, unicodedata.normalize)
453+
self.assertRaises(ValueError, unicodedata.normalize, 'unknown', 'xx')
454+
self.assertEqual(unicodedata.normalize('NFKC', ''), '')
455+
456+
def test_bug_834676(self):
457+
# Check for bug 834676
458+
unicodedata.normalize('NFC', '\ud55c\uae00')
459+
460+
366461
if __name__ == "__main__":
367462
unittest.main()

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.