Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit bd127f3

Browse filesBrowse files
authored
Merge pull request #20633 from sauerburger/pdf-non-bmp-chars
Emit non BMP chars as XObjects in PDF
2 parents 1a8caa5 + a4067a0 commit bd127f3
Copy full SHA for bd127f3

File tree

Expand file treeCollapse file tree

3 files changed

+67
-22
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+67
-22
lines changed

‎lib/matplotlib/backends/backend_pdf.py

Copy file name to clipboardExpand all lines: lib/matplotlib/backends/backend_pdf.py
+59-22Lines changed: 59 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,21 @@ def pdfRepr(obj):
321321
.format(type(obj)))
322322

323323

324+
def _font_supports_char(fonttype, char):
325+
"""
326+
Returns True if the font is able to provide *char* in a PDF.
327+
328+
For a Type 3 font, this method returns True only for single-byte
329+
chars. For Type 42 fonts this method return True if the char is from
330+
the Basic Multilingual Plane.
331+
"""
332+
if fonttype == 3:
333+
return ord(char) <= 255
334+
if fonttype == 42:
335+
return ord(char) <= 65535
336+
raise NotImplementedError()
337+
338+
324339
class Reference:
325340
"""
326341
PDF reference object.
@@ -1268,13 +1283,48 @@ def embedTTFType42(font, characters, descriptor):
12681283

12691284
unicode_bfrange = []
12701285
for start, end in unicode_groups:
1286+
# Ensure the CID map contains only chars from BMP
1287+
if start > 65535:
1288+
continue
1289+
end = min(65535, end)
1290+
12711291
unicode_bfrange.append(
12721292
b"<%04x> <%04x> [%s]" %
12731293
(start, end,
12741294
b" ".join(b"<%04x>" % x for x in range(start, end+1))))
12751295
unicode_cmap = (self._identityToUnicodeCMap %
12761296
(len(unicode_groups), b"\n".join(unicode_bfrange)))
12771297

1298+
# Add XObjects for unsupported chars
1299+
glyph_ids = []
1300+
for ccode in characters:
1301+
if not _font_supports_char(fonttype, chr(ccode)):
1302+
gind = font.get_char_index(ccode)
1303+
glyph_ids.append(gind)
1304+
1305+
bbox = [cvt(x, nearest=False) for x in font.bbox]
1306+
rawcharprocs = _get_pdf_charprocs(filename, glyph_ids)
1307+
for charname in sorted(rawcharprocs):
1308+
stream = rawcharprocs[charname]
1309+
charprocDict = {'Length': len(stream)}
1310+
charprocDict['Type'] = Name('XObject')
1311+
charprocDict['Subtype'] = Name('Form')
1312+
charprocDict['BBox'] = bbox
1313+
# Each glyph includes bounding box information,
1314+
# but xpdf and ghostscript can't handle it in a
1315+
# Form XObject (they segfault!!!), so we remove it
1316+
# from the stream here. It's not needed anyway,
1317+
# since the Form XObject includes it in its BBox
1318+
# value.
1319+
stream = stream[stream.find(b"d1") + 2:]
1320+
charprocObject = self.reserveObject('charProc')
1321+
self.beginStream(charprocObject.id, None, charprocDict)
1322+
self.currentstream.write(stream)
1323+
self.endStream()
1324+
1325+
name = self._get_xobject_symbol_name(filename, charname)
1326+
self.multi_byte_charprocs[name] = charprocObject
1327+
12781328
# CIDToGIDMap stream
12791329
cid_to_gid_map = "".join(cid_to_gid_map).encode("utf-16be")
12801330
self.beginStream(cidToGidMapObject.id,
@@ -2106,16 +2156,17 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21062156
self.check_gc(gc, gc._rgb)
21072157
prev_font = None, None
21082158
oldx, oldy = 0, 0
2109-
type3_multibytes = []
2159+
unsupported_chars = []
21102160

21112161
self.file.output(Op.begin_text)
21122162
for font, fontsize, num, ox, oy in glyphs:
2113-
self.file._character_tracker.track(font, chr(num))
2163+
char = chr(num)
2164+
self.file._character_tracker.track(font, char)
21142165
fontname = font.fname
2115-
if fonttype == 3 and num > 255:
2116-
# For Type3 fonts, multibyte characters must be emitted
2117-
# separately (below).
2118-
type3_multibytes.append((font, fontsize, ox, oy, num))
2166+
if not _font_supports_char(fonttype, char):
2167+
# Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
2168+
# Type 42) must be emitted separately (below).
2169+
unsupported_chars.append((font, fontsize, ox, oy, num))
21192170
else:
21202171
self._setup_textpos(ox, oy, 0, oldx, oldy)
21212172
oldx, oldy = ox, oy
@@ -2127,7 +2178,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21272178
Op.show)
21282179
self.file.output(Op.end_text)
21292180

2130-
for font, fontsize, ox, oy, num in type3_multibytes:
2181+
for font, fontsize, ox, oy, num in unsupported_chars:
21312182
self._draw_xobject_glyph(
21322183
font, fontsize, font.get_char_index(num), ox, oy)
21332184

@@ -2236,20 +2287,6 @@ def encode_string(self, s, fonttype):
22362287
return s.encode('cp1252', 'replace')
22372288
return s.encode('utf-16be', 'replace')
22382289

2239-
@staticmethod
2240-
def _font_supports_char(fonttype, char):
2241-
"""
2242-
Returns True if the font is able to provided the char in a PDF
2243-
2244-
For a Type 3 font, this method returns True only for single-byte
2245-
chars. For Type 42 fonts this method always returns True.
2246-
"""
2247-
if fonttype == 3:
2248-
return ord(char) <= 255
2249-
if fonttype == 42:
2250-
return True
2251-
raise NotImplementedError()
2252-
22532290
def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
22542291
# docstring inherited
22552292

@@ -2313,7 +2350,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
23132350
prev_was_multibyte = True
23142351
for item in _text_helpers.layout(
23152352
s, font, kern_mode=KERNING_UNFITTED):
2316-
if self._font_supports_char(fonttype, item.char):
2353+
if _font_supports_char(fonttype, item.char):
23172354
if prev_was_multibyte:
23182355
singlebyte_chunks.append((item.x, []))
23192356
if item.prev_kern:
Binary file not shown.

‎lib/matplotlib/tests/test_text.py

Copy file name to clipboardExpand all lines: lib/matplotlib/tests/test_text.py
+8Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,3 +748,11 @@ def test_pdf_font42_kerning():
748748
plt.rcParams['pdf.fonttype'] = 42
749749
plt.figure()
750750
plt.figtext(0.1, 0.5, "ATAVATAVATAVATAVATA", size=30)
751+
752+
753+
@image_comparison(['text_pdf_chars_beyond_bmp.pdf'], style='mpl20')
754+
def test_pdf_chars_beyond_bmp():
755+
plt.rcParams['pdf.fonttype'] = 42
756+
plt.rcParams['mathtext.fontset'] = 'stixsans'
757+
plt.figure()
758+
plt.figtext(0.1, 0.5, "Mass $m$ \U00010308", size=30)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.