Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 0f6a627

Browse filesBrowse files
authored
Merge pull request #23295 from anntzer/re.sub-str.translate
Replace re.sub by the faster str.translate.
2 parents 7864e76 + ef16bfb commit 0f6a627
Copy full SHA for 0f6a627

File tree

1 file changed

+17
-29
lines changed
Filter options

1 file changed

+17
-29
lines changed

‎lib/matplotlib/backends/backend_pdf.py

Copy file name to clipboardExpand all lines: lib/matplotlib/backends/backend_pdf.py
+17-29Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import logging
1414
import math
1515
import os
16-
import re
1716
import string
1817
import struct
1918
import sys
@@ -119,25 +118,6 @@ def _fill(strings, linelen=75):
119118
result.append(b' '.join(strings[lasti:]))
120119
return b'\n'.join(result)
121120

122-
# PDF strings are supposed to be able to include any eight-bit data,
123-
# except that unbalanced parens and backslashes must be escaped by a
124-
# backslash. However, sf bug #2708559 shows that the carriage return
125-
# character may get read as a newline; these characters correspond to
126-
# \gamma and \Omega in TeX's math font encoding. Escaping them fixes
127-
# the bug.
128-
_string_escape_regex = re.compile(br'([\\()\r\n])')
129-
130-
131-
def _string_escape(match):
132-
m = match.group(0)
133-
if m in br'\()':
134-
return b'\\' + m
135-
elif m == b'\n':
136-
return br'\n'
137-
elif m == b'\r':
138-
return br'\r'
139-
assert False
140-
141121

142122
def _create_pdf_info_dict(backend, metadata):
143123
"""
@@ -267,6 +247,15 @@ def _get_link_annotation(gc, x, y, width, height):
267247
return link_annotation
268248

269249

250+
# PDF strings are supposed to be able to include any eight-bit data, except
251+
# that unbalanced parens and backslashes must be escaped by a backslash.
252+
# However, sf bug #2708559 shows that the carriage return character may get
253+
# read as a newline; these characters correspond to \gamma and \Omega in TeX's
254+
# math font encoding. Escaping them fixes the bug.
255+
_str_escapes = str.maketrans({
256+
'\\': '\\\\', '(': '\\(', ')': '\\)', '\n': '\\n', '\r': '\\r'})
257+
258+
270259
def pdfRepr(obj):
271260
"""Map Python objects to PDF syntax."""
272261

@@ -292,22 +281,21 @@ def pdfRepr(obj):
292281
elif isinstance(obj, (int, np.integer)):
293282
return b"%d" % obj
294283

295-
# Unicode strings are encoded in UTF-16BE with byte-order mark.
284+
# Non-ASCII Unicode strings are encoded in UTF-16BE with byte-order mark.
296285
elif isinstance(obj, str):
297-
try:
298-
# But maybe it's really ASCII?
299-
s = obj.encode('ASCII')
300-
return pdfRepr(s)
301-
except UnicodeEncodeError:
302-
s = codecs.BOM_UTF16_BE + obj.encode('UTF-16BE')
303-
return pdfRepr(s)
286+
return pdfRepr(obj.encode('ascii') if obj.isascii()
287+
else codecs.BOM_UTF16_BE + obj.encode('UTF-16BE'))
304288

305289
# Strings are written in parentheses, with backslashes and parens
306290
# escaped. Actually balanced parens are allowed, but it is
307291
# simpler to escape them all. TODO: cut long strings into lines;
308292
# I believe there is some maximum line length in PDF.
293+
# Despite the extra decode/encode, translate is faster than regex.
309294
elif isinstance(obj, bytes):
310-
return b'(' + _string_escape_regex.sub(_string_escape, obj) + b')'
295+
return (
296+
b'(' +
297+
obj.decode('latin-1').translate(_str_escapes).encode('latin-1')
298+
+ b')')
311299

312300
# Dictionaries. The keys must be PDF names, so if we find strings
313301
# there, we make Name objects from them. The values may be

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.