Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 336c1bb

Browse filesBrowse files
committed
Merge pull request #4605 from jkseppan/png-in-pdf
ENH: Use png predictors when compressing images in pdf files
2 parents bc185a2 + 18dcc54 commit 336c1bb
Copy full SHA for 336c1bb

File tree

Expand file treeCollapse file tree

2 files changed

+141
-74
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+141
-74
lines changed

‎lib/matplotlib/backends/backend_pdf.py

Copy file name to clipboardExpand all lines: lib/matplotlib/backends/backend_pdf.py
+96-61Lines changed: 96 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import codecs
1313
import os
1414
import re
15+
import struct
1516
import sys
1617
import time
1718
import warnings
@@ -43,6 +44,7 @@
4344
from matplotlib.transforms import Affine2D, BboxBase
4445
from matplotlib.path import Path
4546
from matplotlib import _path
47+
from matplotlib import _png
4648
from matplotlib import ttconv
4749

4850
# Overview
@@ -87,7 +89,6 @@
8789

8890
# TODOs:
8991
#
90-
# * image compression could be improved (PDF supports png-like compression)
9192
# * encoding of fonts, including mathtext fonts and unicode support
9293
# * TTF support has lots of small TODOs, e.g., how do you know if a font
9394
# is serif/sans-serif, or symbolic/non-symbolic?
@@ -334,11 +335,12 @@ class Stream(object):
334335
"""
335336
__slots__ = ('id', 'len', 'pdfFile', 'file', 'compressobj', 'extra', 'pos')
336337

337-
def __init__(self, id, len, file, extra=None):
338+
def __init__(self, id, len, file, extra=None, png=None):
338339
"""id: object id of stream; len: an unused Reference object for the
339340
length of the stream, or None (to use a memory buffer); file:
340341
a PdfFile; extra: a dictionary of extra key-value pairs to
341-
include in the stream header """
342+
include in the stream header; png: if the data is already
343+
png compressed, the decode parameters"""
342344
self.id = id # object id
343345
self.len = len # id of length object
344346
self.pdfFile = file
@@ -347,10 +349,13 @@ def __init__(self, id, len, file, extra=None):
347349
if extra is None:
348350
self.extra = dict()
349351
else:
350-
self.extra = extra
352+
self.extra = extra.copy()
353+
if png is not None:
354+
self.extra.update({'Filter': Name('FlateDecode'),
355+
'DecodeParms': png})
351356

352357
self.pdfFile.recordXref(self.id)
353-
if rcParams['pdf.compression']:
358+
if rcParams['pdf.compression'] and not png:
354359
self.compressobj = zlib.compressobj(rcParams['pdf.compression'])
355360
if self.len is None:
356361
self.file = BytesIO()
@@ -583,9 +588,9 @@ def output(self, *data):
583588
self.write(fill([pdfRepr(x) for x in data]))
584589
self.write(b'\n')
585590

586-
def beginStream(self, id, len, extra=None):
591+
def beginStream(self, id, len, extra=None, png=None):
587592
assert self.currentstream is None
588-
self.currentstream = Stream(id, len, self, extra)
593+
self.currentstream = Stream(id, len, self, extra, png)
589594

590595
def endStream(self):
591596
if self.currentstream is not None:
@@ -1247,73 +1252,103 @@ def imageObject(self, image):
12471252
self.images[image] = (name, ob)
12481253
return name
12491254

1250-
def _rgb(self, im):
1251-
h, w, s = im.as_rgba_str()
1255+
def _unpack(self, im):
1256+
"""
1257+
Unpack the image object im into height, width, data, alpha,
1258+
where data and alpha are HxWx3 (RGB) or HxWx1 (grayscale or alpha)
1259+
arrays, except alpha is None if the image is fully opaque.
1260+
"""
12521261

1262+
h, w, s = im.as_rgba_str()
12531263
rgba = np.fromstring(s, np.uint8)
12541264
rgba.shape = (h, w, 4)
12551265
rgba = rgba[::-1]
1256-
rgb = rgba[:, :, :3].tostring()
1257-
a = rgba[:, :, 3]
1258-
if np.all(a == 255):
1266+
rgb = rgba[:, :, :3]
1267+
alpha = rgba[:, :, 3][..., None]
1268+
if np.all(alpha == 255):
12591269
alpha = None
12601270
else:
1261-
alpha = a.tostring()
1262-
return h, w, rgb, alpha
1263-
1264-
def _gray(self, im, rc=0.3, gc=0.59, bc=0.11):
1265-
rgbat = im.as_rgba_str()
1266-
rgba = np.fromstring(rgbat[2], np.uint8)
1267-
rgba.shape = (rgbat[0], rgbat[1], 4)
1268-
rgba = rgba[::-1]
1269-
rgba_f = rgba.astype(np.float32)
1270-
r = rgba_f[:, :, 0]
1271-
g = rgba_f[:, :, 1]
1272-
b = rgba_f[:, :, 2]
1273-
a = rgba[:, :, 3]
1274-
if np.all(a == 255):
1275-
alpha = None
1271+
alpha = np.array(alpha, order='C')
1272+
if im.is_grayscale:
1273+
r, g, b = rgb.astype(np.float32).transpose(2, 0, 1)
1274+
gray = (0.3 * r + 0.59 * g + 0.11 * b).astype(np.uint8)[..., None]
1275+
return h, w, gray, alpha
12761276
else:
1277-
alpha = a.tostring()
1278-
gray = (r*rc + g*gc + b*bc).astype(np.uint8).tostring()
1279-
return rgbat[0], rgbat[1], gray, alpha
1277+
rgb = np.array(rgb, order='C')
1278+
return h, w, rgb, alpha
12801279

1281-
def writeImages(self):
1282-
for img, pair in six.iteritems(self.images):
1283-
if img.is_grayscale:
1284-
height, width, data, adata = self._gray(img)
1280+
def _writePng(self, data):
1281+
"""
1282+
Write the image *data* into the pdf file using png
1283+
predictors with Flate compression.
1284+
"""
1285+
1286+
buffer = BytesIO()
1287+
_png.write_png(data, buffer)
1288+
buffer.seek(8)
1289+
written = 0
1290+
header = bytearray(8)
1291+
while True:
1292+
n = buffer.readinto(header)
1293+
assert n == 8
1294+
length, type = struct.unpack(b'!L4s', bytes(header))
1295+
if type == b'IDAT':
1296+
data = bytearray(length)
1297+
n = buffer.readinto(data)
1298+
assert n == length
1299+
self.currentstream.write(bytes(data))
1300+
written += n
1301+
elif type == b'IEND':
1302+
break
12851303
else:
1286-
height, width, data, adata = self._rgb(img)
1304+
buffer.seek(length, 1)
1305+
buffer.seek(4, 1) # skip CRC
1306+
1307+
def _writeImg(self, data, height, width, grayscale, id, smask=None):
1308+
"""
1309+
Write the image *data* of size *height* x *width*, as grayscale
1310+
if *grayscale* is true and RGB otherwise, as pdf object *id*
1311+
and with the soft mask (alpha channel) *smask*, which should be
1312+
either None or a *height* x *width* x 1 array.
1313+
"""
12871314

1288-
colorspace = 'DeviceGray' if img.is_grayscale else 'DeviceRGB'
1289-
obj = {'Type': Name('XObject'),
1290-
'Subtype': Name('Image'),
1291-
'Width': width,
1292-
'Height': height,
1293-
'ColorSpace': Name(colorspace),
1294-
'BitsPerComponent': 8}
1315+
obj = {'Type': Name('XObject'),
1316+
'Subtype': Name('Image'),
1317+
'Width': width,
1318+
'Height': height,
1319+
'ColorSpace': Name('DeviceGray' if grayscale
1320+
else 'DeviceRGB'),
1321+
'BitsPerComponent': 8}
1322+
if smask:
1323+
obj['SMask'] = smask
1324+
if rcParams['pdf.compression']:
1325+
png = {'Predictor': 10,
1326+
'Colors': 1 if grayscale else 3,
1327+
'Columns': width}
1328+
else:
1329+
png = None
1330+
self.beginStream(
1331+
id,
1332+
self.reserveObject('length of image stream'),
1333+
obj,
1334+
png=png
1335+
)
1336+
if png:
1337+
self._writePng(data)
1338+
else:
1339+
self.currentstream.write(data.tostring())
1340+
self.endStream()
12951341

1342+
def writeImages(self):
1343+
for img, pair in six.iteritems(self.images):
1344+
height, width, data, adata = self._unpack(img)
12961345
if adata is not None:
12971346
smaskObject = self.reserveObject("smask")
1298-
self.beginStream(
1299-
smaskObject.id,
1300-
self.reserveObject('length of smask stream'),
1301-
{'Type': Name('XObject'), 'Subtype': Name('Image'),
1302-
'Width': width, 'Height': height,
1303-
'ColorSpace': Name('DeviceGray'), 'BitsPerComponent': 8})
1304-
# TODO: predictors (i.e., output png)
1305-
self.currentstream.write(adata)
1306-
self.endStream()
1307-
obj['SMask'] = smaskObject
1308-
1309-
self.beginStream(
1310-
pair[1].id,
1311-
self.reserveObject('length of image stream'),
1312-
obj
1313-
)
1314-
# TODO: predictors (i.e., output png)
1315-
self.currentstream.write(data)
1316-
self.endStream()
1347+
self._writeImg(adata, height, width, True, smaskObject.id)
1348+
else:
1349+
smaskObject = None
1350+
self._writeImg(data, height, width, img.is_grayscale,
1351+
pair[1].id, smaskObject)
13171352

13181353
def markerObject(self, path, trans, fill, stroke, lw, joinstyle,
13191354
capstyle):

‎src/_png.cpp

Copy file name to clipboardExpand all lines: src/_png.cpp
+45-13Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,15 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
7171
double dpi = 0;
7272
const char *names[] = { "buffer", "file", "dpi", NULL };
7373

74+
// We don't need strict contiguity, just for each row to be
75+
// contiguous, and libpng has special handling for getting RGB out
76+
// of RGBA, ARGB or BGR. But the simplest thing to do is to
77+
// enforce contiguity using array_view::converter_contiguous.
7478
if (!PyArg_ParseTupleAndKeywords(args,
7579
kwds,
7680
"O&O|d:write_png",
7781
(char **)names,
78-
&buffer.converter,
82+
&buffer.converter_contiguous,
7983
&buffer,
8084
&filein,
8185
&dpi)) {
@@ -84,6 +88,7 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
8488

8589
png_uint_32 width = (png_uint_32)buffer.dim(1);
8690
png_uint_32 height = (png_uint_32)buffer.dim(0);
91+
int channels = buffer.dim(2);
8792
std::vector<png_bytep> row_pointers(height);
8893
for (png_uint_32 row = 0; row < (png_uint_32)height; ++row) {
8994
row_pointers[row] = (png_bytep)buffer[row].data();
@@ -98,9 +103,22 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
98103
png_structp png_ptr = NULL;
99104
png_infop info_ptr = NULL;
100105
struct png_color_8_struct sig_bit;
101-
102-
if (buffer.dim(2) != 4) {
103-
PyErr_SetString(PyExc_ValueError, "Buffer must be RGBA NxMx4 array");
106+
int png_color_type;
107+
108+
switch (channels) {
109+
case 1:
110+
png_color_type = PNG_COLOR_TYPE_GRAY;
111+
break;
112+
case 3:
113+
png_color_type = PNG_COLOR_TYPE_RGB;
114+
break;
115+
case 4:
116+
png_color_type = PNG_COLOR_TYPE_RGB_ALPHA;
117+
break;
118+
default:
119+
PyErr_SetString(PyExc_ValueError,
120+
"Buffer must be an NxMxD array with D in 1, 3, 4 "
121+
"(grayscale, RGB, RGBA)");
104122
goto exit;
105123
}
106124

@@ -141,7 +159,7 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
141159
}
142160

143161
if (setjmp(png_jmpbuf(png_ptr))) {
144-
PyErr_SetString(PyExc_RuntimeError, "Error setting jumps");
162+
PyErr_SetString(PyExc_RuntimeError, "libpng signaled error");
145163
goto exit;
146164
}
147165

@@ -155,7 +173,7 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
155173
width,
156174
height,
157175
8,
158-
PNG_COLOR_TYPE_RGB_ALPHA,
176+
png_color_type,
159177
PNG_INTERLACE_NONE,
160178
PNG_COMPRESSION_TYPE_BASE,
161179
PNG_FILTER_TYPE_BASE);
@@ -166,13 +184,27 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
166184
png_set_pHYs(png_ptr, info_ptr, dots_per_meter, dots_per_meter, PNG_RESOLUTION_METER);
167185
}
168186

169-
// this a a color image!
170-
sig_bit.gray = 0;
171-
sig_bit.red = 8;
172-
sig_bit.green = 8;
173-
sig_bit.blue = 8;
174-
/* if the image has an alpha channel then */
175-
sig_bit.alpha = 8;
187+
sig_bit.alpha = 0;
188+
switch (png_color_type) {
189+
case PNG_COLOR_TYPE_GRAY:
190+
sig_bit.gray = 8;
191+
sig_bit.red = 0;
192+
sig_bit.green = 0;
193+
sig_bit.blue = 0;
194+
break;
195+
case PNG_COLOR_TYPE_RGB_ALPHA:
196+
sig_bit.alpha = 8;
197+
// fall through
198+
case PNG_COLOR_TYPE_RGB:
199+
sig_bit.gray = 0;
200+
sig_bit.red = 8;
201+
sig_bit.green = 8;
202+
sig_bit.blue = 8;
203+
break;
204+
default:
205+
PyErr_SetString(PyExc_RuntimeError, "internal error, bad png_color_type");
206+
goto exit;
207+
}
176208
png_set_sBIT(png_ptr, info_ptr, &sig_bit);
177209

178210
png_write_info(png_ptr, info_ptr);

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.