Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit ee19ea7

Browse filesBrowse files
authored
[mypyc] Add primitives and specialization for ord() (#18240)
This makes a microbenchmark adapted from an internal production codebase that heavily uses `ord()` over 10x faster. Work on mypyc/mypyc#644 and mypyc/mypyc#880.
1 parent cc45bec commit ee19ea7
Copy full SHA for ee19ea7

File tree

Expand file treeCollapse file tree

10 files changed

+147
-10
lines changed
Filter options
Expand file treeCollapse file tree

10 files changed

+147
-10
lines changed

‎mypyc/doc/str_operations.rst

Copy file name to clipboardExpand all lines: mypyc/doc/str_operations.rst
+6Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,9 @@ Methods
3333
* ``s.split(sep: str)``
3434
* ``s.split(sep: str, maxsplit: int)``
3535
* ``s1.startswith(s2: str)``
36+
37+
Functions
38+
---------
39+
40+
* ``len(s: str)``
41+
* ``ord(s: str)``

‎mypyc/irbuild/specialize.py

Copy file name to clipboardExpand all lines: mypyc/irbuild/specialize.py
+11Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from mypy.nodes import (
2020
ARG_NAMED,
2121
ARG_POS,
22+
BytesExpr,
2223
CallExpr,
2324
DictExpr,
2425
Expression,
@@ -877,3 +878,13 @@ def translate_float(builder: IRBuilder, expr: CallExpr, callee: RefExpr) -> Valu
877878
# No-op float conversion.
878879
return builder.accept(arg)
879880
return None
881+
882+
883+
@specialize_function("builtins.ord")
884+
def translate_ord(builder: IRBuilder, expr: CallExpr, callee: RefExpr) -> Value | None:
885+
if len(expr.args) != 1 or expr.arg_kinds[0] != ARG_POS:
886+
return None
887+
arg = expr.args[0]
888+
if isinstance(arg, (StrExpr, BytesExpr)) and len(arg.value) == 1:
889+
return Integer(ord(arg.value))
890+
return None

‎mypyc/lib-rt/CPy.h

Copy file name to clipboardExpand all lines: mypyc/lib-rt/CPy.h
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,7 @@ bool CPyStr_IsTrue(PyObject *obj);
730730
Py_ssize_t CPyStr_Size_size_t(PyObject *str);
731731
PyObject *CPy_Decode(PyObject *obj, PyObject *encoding, PyObject *errors);
732732
PyObject *CPy_Encode(PyObject *obj, PyObject *encoding, PyObject *errors);
733+
CPyTagged CPyStr_Ord(PyObject *obj);
733734

734735

735736
// Bytes operations
@@ -740,6 +741,7 @@ PyObject *CPyBytes_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end);
740741
CPyTagged CPyBytes_GetItem(PyObject *o, CPyTagged index);
741742
PyObject *CPyBytes_Concat(PyObject *a, PyObject *b);
742743
PyObject *CPyBytes_Join(PyObject *sep, PyObject *iter);
744+
CPyTagged CPyBytes_Ord(PyObject *obj);
743745

744746

745747
int CPyBytes_Compare(PyObject *left, PyObject *right);

‎mypyc/lib-rt/bytes_ops.c

Copy file name to clipboardExpand all lines: mypyc/lib-rt/bytes_ops.c
+17Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,3 +141,20 @@ PyObject *CPyBytes_Build(Py_ssize_t len, ...) {
141141

142142
return (PyObject *)ret;
143143
}
144+
145+
146+
CPyTagged CPyBytes_Ord(PyObject *obj) {
147+
if (PyBytes_Check(obj)) {
148+
Py_ssize_t s = PyBytes_GET_SIZE(obj);
149+
if (s == 1) {
150+
return (unsigned char)(PyBytes_AS_STRING(obj)[0]) << 1;
151+
}
152+
} else if (PyByteArray_Check(obj)) {
153+
Py_ssize_t s = PyByteArray_GET_SIZE(obj);
154+
if (s == 1) {
155+
return (unsigned char)(PyByteArray_AS_STRING(obj)[0]) << 1;
156+
}
157+
}
158+
PyErr_SetString(PyExc_TypeError, "ord() expects a character");
159+
return CPY_INT_TAG;
160+
}

‎mypyc/lib-rt/str_ops.c

Copy file name to clipboardExpand all lines: mypyc/lib-rt/str_ops.c
+12Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,3 +243,15 @@ PyObject *CPy_Encode(PyObject *obj, PyObject *encoding, PyObject *errors) {
243243
return NULL;
244244
}
245245
}
246+
247+
248+
CPyTagged CPyStr_Ord(PyObject *obj) {
249+
Py_ssize_t s = PyUnicode_GET_LENGTH(obj);
250+
if (s == 1) {
251+
int kind = PyUnicode_KIND(obj);
252+
return PyUnicode_READ(kind, PyUnicode_DATA(obj), 0) << 1;
253+
}
254+
PyErr_Format(
255+
PyExc_TypeError, "ord() expected a character, but a string of length %zd found", s);
256+
return CPY_INT_TAG;
257+
}

‎mypyc/primitives/bytes_ops.py

Copy file name to clipboardExpand all lines: mypyc/primitives/bytes_ops.py
+8Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,11 @@
9999
error_kind=ERR_MAGIC,
100100
var_arg_type=bytes_rprimitive,
101101
)
102+
103+
function_op(
104+
name="builtins.ord",
105+
arg_types=[bytes_rprimitive],
106+
return_type=int_rprimitive,
107+
c_function_name="CPyBytes_Ord",
108+
error_kind=ERR_MAGIC,
109+
)

‎mypyc/primitives/str_ops.py

Copy file name to clipboardExpand all lines: mypyc/primitives/str_ops.py
+8Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,3 +251,11 @@
251251
c_function_name="CPy_Encode",
252252
error_kind=ERR_MAGIC,
253253
)
254+
255+
function_op(
256+
name="builtins.ord",
257+
arg_types=[str_rprimitive],
258+
return_type=int_rprimitive,
259+
c_function_name="CPyStr_Ord",
260+
error_kind=ERR_MAGIC,
261+
)

‎mypyc/test-data/irbuild-str.test

Copy file name to clipboardExpand all lines: mypyc/test-data/irbuild-str.test
+43Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,3 +383,46 @@ L0:
383383
r37 = 'latin2'
384384
r38 = CPy_Encode(s, r37, 0)
385385
return 1
386+
387+
[case testOrd]
388+
def str_ord(x: str) -> int:
389+
return ord(x)
390+
def str_ord_literal() -> int:
391+
return ord("a")
392+
def bytes_ord(x: bytes) -> int:
393+
return ord(x)
394+
def bytes_ord_literal() -> int:
395+
return ord(b"a")
396+
def any_ord(x) -> int:
397+
return ord(x)
398+
[out]
399+
def str_ord(x):
400+
x :: str
401+
r0 :: int
402+
L0:
403+
r0 = CPyStr_Ord(x)
404+
return r0
405+
def str_ord_literal():
406+
L0:
407+
return 194
408+
def bytes_ord(x):
409+
x :: bytes
410+
r0 :: int
411+
L0:
412+
r0 = CPyBytes_Ord(x)
413+
return r0
414+
def bytes_ord_literal():
415+
L0:
416+
return 194
417+
def any_ord(x):
418+
x, r0 :: object
419+
r1 :: str
420+
r2, r3 :: object
421+
r4 :: int
422+
L0:
423+
r0 = builtins :: module
424+
r1 = 'ord'
425+
r2 = CPyObject_GetAttr(r0, r1)
426+
r3 = PyObject_CallFunctionObjArgs(r2, x, 0)
427+
r4 = unbox(int, r3)
428+
return r4

‎mypyc/test-data/run-bytes.test

Copy file name to clipboardExpand all lines: mypyc/test-data/run-bytes.test
+23Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,29 @@ def test_len() -> None:
111111
assert len(b) == 3
112112
assert len(bytes()) == 0
113113

114+
def test_ord() -> None:
115+
assert ord(b'a') == ord('a')
116+
assert ord(b'a' + bytes()) == ord('a')
117+
assert ord(b'\x00') == 0
118+
assert ord(b'\x00' + bytes()) == 0
119+
assert ord(b'\xfe') == 254
120+
assert ord(b'\xfe' + bytes()) == 254
121+
122+
with assertRaises(TypeError):
123+
ord(b'aa')
124+
with assertRaises(TypeError):
125+
ord(b'')
126+
127+
def test_ord_bytesarray() -> None:
128+
assert ord(bytearray(b'a')) == ord('a')
129+
assert ord(bytearray(b'\x00')) == 0
130+
assert ord(bytearray(b'\xfe')) == 254
131+
132+
with assertRaises(TypeError):
133+
ord(bytearray(b'aa'))
134+
with assertRaises(TypeError):
135+
ord(bytearray(b''))
136+
114137
[case testBytesSlicing]
115138
def test_bytes_slicing() -> None:
116139
b = b'abcdefg'

‎mypyc/test-data/run-strings.test

Copy file name to clipboardExpand all lines: mypyc/test-data/run-strings.test
+17-10Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -565,25 +565,32 @@ def test_chr() -> None:
565565
assert try_invalid(1114112)
566566

567567
[case testOrd]
568+
from testutil import assertRaises
569+
568570
def test_ord() -> None:
571+
assert ord(' ') == 32
572+
assert ord(' ' + str()) == 32
573+
assert ord('\x00') == 0
574+
assert ord('\x00' + str()) == 0
569575
assert ord('\ue000') == 57344
570-
s = "a\xac\u1234\u20ac\U00008000"
571-
# ^^^^ two-digit hex escape
572-
# ^^^^^^ four-digit Unicode escape
573-
# ^^^^^^^^^^ eight-digit Unicode escape
576+
assert ord('\ue000' + str()) == 57344
577+
s = "a\xac\u1234\u20ac\U00010000"
578+
# ^^^^ two-digit hex escape
579+
# ^^^^^^ four-digit Unicode escape
580+
# ^^^^^^^^^^ eight-digit Unicode escape
574581
l1 = [ord(c) for c in s]
575-
assert l1 == [97, 172, 4660, 8364, 32768]
582+
assert l1 == [97, 172, 4660, 8364, 65536]
576583
u = 'abcdé'
577584
assert ord(u[-1]) == 233
578585
assert ord(b'a') == 97
579586
assert ord(b'a' + bytes()) == 97
580-
u2 = '\U0010ffff'
587+
u2 = '\U0010ffff' + str()
581588
assert ord(u2) == 1114111
582-
try:
589+
assert ord('\U0010ffff') == 1114111
590+
with assertRaises(TypeError, "ord() expected a character, but a string of length 2 found"):
583591
ord('aa')
584-
assert False
585-
except TypeError:
586-
pass
592+
with assertRaises(TypeError):
593+
ord('')
587594

588595
[case testDecode]
589596
def test_decode() -> None:

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.