diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 0000000..ed3ed7e --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,29 @@ +name: Build + +on: [push, pull_request] + +jobs: + testing: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.8, 3.9, '3.10', 3.11] + fail-fast: false + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + + - name: Install dependencies + run: | + chmod +x ./ci/before-script.sh + ./ci/before-script.sh + python -m pip install --upgrade pip + pip install pynose + + - name: Run tests + run: nosetests -w tests diff --git a/.gitignore b/.gitignore index c10666e..3faf7f2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,11 @@ .idea *.pyc + +# Build +.cache +.eggs +pdfkit.egg-info + +# Tests +.tox +.python-version diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 15dc4fe..0000000 --- a/.travis.yml +++ /dev/null @@ -1,9 +0,0 @@ -language: python -python: - - 2.7 - - 3.2 - - 3.3 -before_script: "./travis/before-script.sh" -script: python setup.py test -notifications: - email: false diff --git a/AUTHORS.rst b/AUTHORS.rst index b7b6746..07d9fd0 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -11,3 +11,10 @@ Contributors * `Tomscytale `_ * `Matheus Marchini `_ * `Rory McCann `_ +* `Matheus Marchini `_ +* `signalkraft `_ +* `Pietro Delsante `_ +* `Hung Le `_ +* `Zachary Kazanski `_ +* `Fasih Ahmad Fakhri `_ +* `Alan Hamlett `_ diff --git a/HISTORY.rst b/HISTORY.rst index 62240fb..7470f24 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,5 +1,23 @@ Changelog --------- +* `2.0.0` + * Drop support for Python <= 3.7 +* `1.0.0` + * By default PDFKit now passes "quiet" option to wkhtmltopdf. Now if you need to get output you should pass "verbose=True" to API calls + * Fix different issues with searching for wkhtmltopdf binary + * Update error handling for wkhtmltopdf + * Fix different issues with options handling + * Better handling of unicode input + * Switch from Travis to GitHub Actions + * Update README +* `0.6.1` + * Fix regression on python 3+ when trying to decode pdf output +* `0.6.0` + * Support repeatable options + * Support multiple values for some options + * Fix some corner cases when specific argument order is required + * Some Python 3+ compatibility fixes + * Update README * `0.5.0` * Allow passing multiple css files * Fix problems with external file encodings diff --git a/README.rst b/README.rst index 96c0cdf..2822f0c 100644 --- a/README.rst +++ b/README.rst @@ -2,16 +2,21 @@ Python-PDFKit: HTML to PDF wrapper ================================== -.. image:: https://travis-ci.org/JazzCore/python-pdfkit.png?branch=master - :target: https://travis-ci.org/JazzCore/python-pdfkit +.. image:: https://github.com/JazzCore/python-pdfkit/actions/workflows/main.yaml/badge.svg?branch=master + :target: https://github.com/JazzCore/python-pdfkit/actions/workflows/main.yaml .. image:: https://badge.fury.io/py/pdfkit.svg :target: http://badge.fury.io/py/pdfkit -Python 2 and 3 wrapper for wkhtmltopdf utility to convert HTML to PDF using Webkit. +Python 3 wrapper for wkhtmltopdf utility to convert HTML to PDF using Webkit. This is adapted version of `ruby PDFKit `_ library, so big thanks to them! +Deprecation Warning +------------------- + +This library has been deprecated to match the `wkhtmltopdf project status `_. + Installation ------------ @@ -29,7 +34,13 @@ Installation $ sudo apt-get install wkhtmltopdf -**Warning!** Version in debian/ubuntu repos have reduced functionality (because it compiled without the wkhtmltopdf QT patches), such as adding outlines, headers, footers, TOC etc. To use this options you should install static binary from `wkhtmltopdf `_ site or you can use `this script `_. +* macOS: + +.. code-block:: bash + + $ brew install homebrew/cask/wkhtmltopdf + +**Warning!** Version in debian/ubuntu repos have reduced functionality (because it compiled without the wkhtmltopdf QT patches), such as adding outlines, headers, footers, TOC etc. To use this options you should install static binary from `wkhtmltopdf `_ site or you can use `this script `_ (written for CI servers with Ubuntu 18.04 Bionic, but it could work on other Ubuntu/Debian versions). * Windows and other options: check wkhtmltopdf `homepage `_ for binary installers @@ -64,10 +75,10 @@ If you wish to further process generated PDF, you can read it to a variable: .. code-block:: python - # Use False instead of output path to save pdf to a variable - pdf = pdfkit.from_url('http://google.com', False) + # Without output_path, PDF is returned for assigning to a variable + pdf = pdfkit.from_url('http://google.com') -You can specify all wkhtmltopdf `options `_. You can drop '--' in option name. If option without value, use *None, False* or *''* for dict value: +You can specify all wkhtmltopdf `options `_. You can drop '--' in option name. If option without value, use *None, False* or *''* for dict value:. For repeatable options (incl. allow, cookie, custom-header, post, postfile, run-script, replace) you may use a list or a tuple. With option that need multiple values (e.g. --custom-header Authorization secret) we may use a 2-tuple (see example below). .. code-block:: python @@ -78,22 +89,26 @@ You can specify all wkhtmltopdf `options 0: + self.wkhtmltopdf = lines[0].strip() + with open(self.wkhtmltopdf) as f: pass - except IOError: + except (IOError, FileNotFoundError) as e: raise IOError('No wkhtmltopdf executable found: "%s"\n' 'If this file exists please check that this process can ' - 'read it. Otherwise please install wkhtmltopdf - ' + 'read it or you can pass path to it manually in method call, ' + 'check README. Otherwise please install wkhtmltopdf - ' 'https://github.com/JazzCore/python-pdfkit/wiki/Installing-wkhtmltopdf' % self.wkhtmltopdf) + + + self.environ = environ + + if not self.environ: + self.environ = os.environ + + for key in self.environ.keys(): + if not isinstance(self.environ[key], str): + self.environ[key] = str(self.environ[key]) diff --git a/pdfkit/pdfkit.py b/pdfkit/pdfkit.py index 3591d5b..88b3c92 100644 --- a/pdfkit/pdfkit.py +++ b/pdfkit/pdfkit.py @@ -2,9 +2,9 @@ import re import subprocess import sys +from collections import OrderedDict from .source import Source from .configuration import Configuration -from itertools import chain import io import codecs @@ -32,65 +32,149 @@ def __str__(self): return self.msg def __init__(self, url_or_file, type_, options=None, toc=None, cover=None, - css=None, configuration=None): + css=None, configuration=None, cover_first=False, verbose=False): self.source = Source(url_or_file, type_) self.configuration = (Configuration() if configuration is None else configuration) - self.wkhtmltopdf = self.configuration.wkhtmltopdf.decode('utf-8') + try: + self.wkhtmltopdf = self.configuration.wkhtmltopdf.decode('utf-8') + except AttributeError: + self.wkhtmltopdf = self.configuration.wkhtmltopdf - self.options = dict() + self.options = OrderedDict() if self.source.isString(): self.options.update(self._find_options_in_meta(url_or_file)) - if options is not None: self.options.update(options) - self.options = self._normalize_options(self.options) - toc = {} if toc is None else toc - self.toc = self._normalize_options(toc) + self.environ = self.configuration.environ + + if options is not None: + self.options.update(options) + + self.toc = {} if toc is None else toc self.cover = cover + self.cover_first = cover_first + self.verbose = verbose self.css = css self.stylesheets = [] - def command(self, path=None): + def _genargs(self, opts): + """ + Generator of args parts based on options specification. + + Note: Empty parts will be filtered out at _command generator + """ + for optkey, optval in self._normalize_options(opts): + yield optkey + + if isinstance(optval, (list, tuple)): + assert len(optval) == 2 and optval[0] and optval[1], 'Option value can only be either a string or a (tuple, list) of 2 items' + yield optval[0] + yield optval[1] + else: + yield optval + + def _command(self, path=None): + """ + Generator of all command parts + """ if self.css: self._prepend_css(self.css) - args = [self.wkhtmltopdf] + yield self.wkhtmltopdf + + if not self.verbose: + self.options.update({'--quiet': ''}) - args += list(chain.from_iterable(list(self.options.items()))) - args = [_f for _f in args if _f] + for argpart in self._genargs(self.options): + if argpart: + yield argpart + + if self.cover and self.cover_first: + yield 'cover' + yield self.cover if self.toc: - args.append('toc') - args += list(chain.from_iterable(list(self.toc.items()))) - if self.cover: - args.append('cover') - args.append(self.cover) + yield 'toc' + for argpart in self._genargs(self.toc): + if argpart: + yield argpart + + if self.cover and not self.cover_first: + yield 'cover' + yield self.cover # If the source is a string then we will pipe it into wkhtmltopdf # If the source is file-like then we will read from it and pipe it in if self.source.isString() or self.source.isFileObj(): - args.append('-') + yield '-' else: if isinstance(self.source.source, str): - args.append(self.source.to_s()) + yield self.source.to_s() else: - args += self.source.source + for s in self.source.source: + yield s # If output_path evaluates to False append '-' to end of args # and wkhtmltopdf will pass generated PDF to stdout if path: - args.append(path) + yield path else: - args.append('-') + yield '-' + + def command(self, path=None): + return list(self._command(path)) - return args + @staticmethod + def handle_error(exit_code, stderr): + if exit_code == 0: + return + + stderr_lines = stderr.splitlines() + + # Sometimes wkhtmltopdf will exit with non-zero + # even if it finishes generation. + # If will display 'Done' in the second last line + if len(stderr_lines) > 1 and stderr.splitlines()[-2].strip() == 'Done': + return + + if 'cannot connect to X server' in stderr: + raise IOError('%s\n' + 'You will need to run wkhtmltopdf within a "virtual" X server.\n' + 'Go to the link below for more information\n' + 'https://github.com/JazzCore/python-pdfkit/wiki/Using-wkhtmltopdf-without-X-server' % stderr) + + if 'Error' in stderr: + raise IOError('wkhtmltopdf reported an error:\n' + stderr) + + error_msg = stderr or 'Unknown Error' + raise IOError("wkhtmltopdf exited with non-zero code {0}. error:\n{1}".format(exit_code, error_msg)) def to_pdf(self, path=None): args = self.command(path) - result = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + if sys.platform == 'win32': + # hide cmd window + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + startupinfo.wShowWindow = subprocess.SW_HIDE + + result = subprocess.Popen( + args, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self.environ, + startupinfo=startupinfo + ) + else: + result = subprocess.Popen( + args, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self.environ + ) # If the source is a string then we will pipe it into wkhtmltopdf. # If we want to add custom CSS to file then we read input file to @@ -102,63 +186,59 @@ def to_pdf(self, path=None): input = self.source.source.read().encode('utf-8') else: input = None - stdout, stderr = result.communicate(input=input) + stdout, stderr = result.communicate(input=input) + stderr = stderr or stdout or b"" + stderr = stderr.decode('utf-8', errors='replace') exit_code = result.returncode - - if 'cannot connect to X server' in stderr.decode('utf-8'): - raise IOError('%s\n' - 'You will need to run whktmltopdf within a "virutal" X server.\n' - 'Go to the link above for more information\n' - 'https://github.com/JazzCore/python-pdfkit/wiki/Using-wkhtmltopdf-without-X-server' % stderr.decode('utf-8')) - - if 'Error' in stderr.decode('utf-8'): - raise IOError('wkhtmltopdf reported an error:\n' + stderr.decode('utf-8')) - - if exit_code != 0: - raise IOError("wkhtmltopdf exited with non-zero code {0}. error:\n{1}".format(exit_code, stderr.decode("utf-8"))) + self.handle_error(exit_code, stderr) # Since wkhtmltopdf sends its output to stderr we will capture it # and properly send to stdout if '--quiet' not in args: - sys.stdout.write(stderr.decode('utf-8')) + sys.stdout.write(stderr) if not path: return stdout - else: - try: - with codecs.open(path, encoding='utf-8') as f: - # read 4 bytes to get PDF signature '%PDF' - text = f.read(4) - if text == '': - raise IOError('Command failed: %s\n' - 'Check whhtmltopdf output without \'quiet\' ' - 'option' % ' '.join(args)) - return True - except IOError: - raise IOError('Command failed: %s\n' - 'Check whhtmltopdf output without \'quiet\' option' % - ' '.join(args)) + + try: + with codecs.open(path, encoding='utf-8') as f: + # read 4 bytes to get PDF signature '%PDF' + text = f.read(4) + if text == '': + raise IOError('Command failed: %s\n' + 'Check whhtmltopdf output without \'quiet\' ' + 'option' % ' '.join(args)) + return True + except (IOError, OSError) as e: + raise IOError('Command failed: %s\n' + 'Check whhtmltopdf output without \'quiet\' option\n' + '%s ' % (' '.join(args), e)) def _normalize_options(self, options): - """Updates a dict of config options to make then usable on command line + """ Generator of 2-tuples (option-key, option-value). + When options spec is a list, generate a 2-tuples per list item. :param options: dict {option name: value} returns: - dict: {option name: value} - option names lower cased and prepended with - '--' if necessary. Non-empty values cast to str + iterator (option-key, option-value) + - option names lower cased and prepended with + '--' if necessary. Non-empty values cast to str """ - normalized_options = {} for key, value in list(options.items()): - if not '--' in key: + if '--' not in key: normalized_key = '--%s' % self._normalize_arg(key) else: normalized_key = self._normalize_arg(key) - normalized_options[normalized_key] = str(value) if value else value - return normalized_options + if isinstance(value, (list, tuple)): + for optval in value: + yield (normalized_key, optval) + else: + normalized_value = '' if isinstance(value, bool) else value + yield (normalized_key, str(normalized_value) if value else value) def _normalize_arg(self, arg): return arg.lower() diff --git a/pdfkit/source.py b/pdfkit/source.py index 44ca1bf..3005bbd 100644 --- a/pdfkit/source.py +++ b/pdfkit/source.py @@ -2,13 +2,17 @@ import os import io +# Python 2.x and 3.x support for checking string types +basestring = str.__mro__[-2] +unicode = type(u'') + class Source(object): def __init__(self, url_or_file, type_): self.source = url_or_file self.type = type_ - if self.type is 'file': + if self.type == 'file': self.checkFiles() def isUrl(self): @@ -38,4 +42,12 @@ def isFileObj(self): return hasattr(self.source, 'read') def to_s(self): - return self.source + # String should be in unicode(python2)/str(python3) type since we will + # later encode it to utf-8 bytes array to pipe into subprocess + # With some charachters on python 2 it sets to str type (bytes) which is wrong + # and cant later encode properly, this is a workaround for this. + # See issue #42 + if isinstance(self.source, unicode): + return self.source + else: + return unicode(self.source, 'utf-8') diff --git a/setup.py b/setup.py index 251c3ab..4006a34 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,3 @@ -import codecs from distutils.core import setup from setuptools.command.test import test as TestCommand import re @@ -20,7 +19,7 @@ def run_tests(self): def long_description(): """Pre-process the README so that PyPi can render it properly.""" - with codecs.open('README.rst', encoding='utf8') as f: + with open('README.rst') as f: rst = f.read() code_block = '(:\n\n)?\.\. code-block::.*' rst = re.sub(code_block, '::', rst) @@ -40,9 +39,10 @@ def long_description(): author_email='stgolovanov@gmail.com', classifiers=[ 'Programming Language :: Python', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.2', - 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', 'Topic :: Text Processing', 'Topic :: Text Processing :: General', 'Topic :: Text Processing :: Markup', diff --git a/tests/fixtures/issue_42_bad_char_page.html b/tests/fixtures/issue_42_bad_char_page.html new file mode 100644 index 0000000..a6044d5 --- /dev/null +++ b/tests/fixtures/issue_42_bad_char_page.html @@ -0,0 +1,6 @@ + + + + This is a bad character -->   <-- + + diff --git a/tests/pdfkit-tests.py b/tests/pdfkit-tests.py index 9b72ade..969afbd 100644 --- a/tests/pdfkit-tests.py +++ b/tests/pdfkit-tests.py @@ -6,6 +6,10 @@ import unittest +if sys.version_info[0] == 2 and sys.version_info[1] == 7: + unittest.TestCase.assertRegex = unittest.TestCase.assertRegexpMatches + + #Prepend ../ to PYTHONPATH so that we can import PDFKIT form there. TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) sys.path.insert(0, os.path.realpath(os.path.join(TESTS_ROOT, '..'))) @@ -42,11 +46,82 @@ def test_file_source_with_path(self): def test_options_parsing(self): r = pdfkit.PDFKit('html', 'string', options={'page-size': 'Letter'}) - self.assertTrue(r.options['--page-size']) + test_command = r.command('test') + idx = test_command.index('--page-size') # Raise exception in case of not found + self.assertTrue(test_command[idx+1] == 'Letter') def test_options_parsing_with_dashes(self): r = pdfkit.PDFKit('html', 'string', options={'--page-size': 'Letter'}) - self.assertTrue(r.options['--page-size']) + + test_command = r.command('test') + idx = test_command.index('--page-size') # Raise exception in case of not found + self.assertTrue(test_command[idx+1] == 'Letter') + + def test_options_parsing_with_tuple(self): + options = { + '--custom-header': [ + ('Accept-Encoding','gzip') + ] + } + r = pdfkit.PDFKit('html', 'string', options=options) + command = r.command() + idx1 = command.index('--custom-header') # Raise exception in case of not found + self.assertTrue(command[idx1 + 1] == 'Accept-Encoding') + self.assertTrue(command[idx1 + 2] == 'gzip') + + def test_options_parsing_with_tuple_no_dashes(self): + options = { + 'custom-header': [ + ('Accept-Encoding','gzip') + ] + } + r = pdfkit.PDFKit('html', 'string', options=options) + command = r.command() + idx1 = command.index('--custom-header') # Raise exception in case of not found + self.assertTrue(command[idx1 + 1] == 'Accept-Encoding') + self.assertTrue(command[idx1 + 2] == 'gzip') + + def test_repeatable_options(self): + roptions = { + '--page-size': 'Letter', + 'cookies': [ + ('test_cookie1','cookie_value1'), + ('test_cookie2','cookie_value2'), + ] + } + + r = pdfkit.PDFKit('html', 'string', options=roptions) + + test_command = r.command('test') + + idx1 = test_command.index('--page-size') # Raise exception in case of not found + self.assertTrue(test_command[idx1 + 1] == 'Letter') + + self.assertTrue(test_command.count('--cookies') == 2) + + idx2 = test_command.index('--cookies') + self.assertTrue(test_command[idx2 + 1] == 'test_cookie1') + self.assertTrue(test_command[idx2 + 2] == 'cookie_value1') + + idx3 = test_command.index('--cookies', idx2 + 2) + self.assertTrue(test_command[idx3 + 1] == 'test_cookie2') + self.assertTrue(test_command[idx3 + 2] == 'cookie_value2') + + def test_empty_cookie_value(self): + roptions = { + '--page-size': 'Letter', + 'cookies': [ + ('test_cookie1','""'), + ('test_cookie2','cookie_value2'), + ] + } + + r = pdfkit.PDFKit('html', 'string', options=roptions) + + test_command = r.command('test') + + idx1 = test_command.index('--page-size') # Raise exception in case of not found + self.assertTrue(test_command[idx1 + 1] == 'Letter') def test_custom_configuration(self): conf = pdfkit.configuration() @@ -142,8 +217,9 @@ def test_skip_nonpdfkit_tags(self): def test_toc_handling_without_options(self): r = pdfkit.PDFKit('hmtl', 'string', toc={'xsl-style-sheet': 'test.xsl'}) - self.assertEqual(r.command()[1], 'toc') - self.assertEqual(r.command()[2], '--xsl-style-sheet') + self.assertEqual(r.command()[1], '--quiet') + self.assertEqual(r.command()[2], 'toc') + self.assertEqual(r.command()[3], '--xsl-style-sheet') def test_toc_with_options(self): options = { @@ -156,14 +232,20 @@ def test_toc_with_options(self): } r = pdfkit.PDFKit('html', 'string', options=options, toc={'xsl-style-sheet': 'test.xsl'}) - self.assertEqual(r.command()[1 + len(options) * 2], 'toc') - self.assertEqual(r.command()[1 + len(options) * 2 + 1], '--xsl-style-sheet') + command = r.command() + + self.assertEqual(command[1 + len(options) * 2], '--quiet') + self.assertEqual(command[2 + len(options) * 2], 'toc') + self.assertEqual(command[2 + len(options) * 2 + 1], '--xsl-style-sheet') def test_cover_without_options(self): r = pdfkit.PDFKit('html', 'string', cover='test.html') - self.assertEqual(r.command()[1], 'cover') - self.assertEqual(r.command()[2], 'test.html') + command = r.command() + + self.assertEqual(command[1], '--quiet') + self.assertEqual(command[2], 'cover') + self.assertEqual(command[3], 'test.html') def test_cover_with_options(self): options = { @@ -176,8 +258,11 @@ def test_cover_with_options(self): } r = pdfkit.PDFKit('html', 'string', options=options, cover='test.html') - self.assertEqual(r.command()[1 + len(options) * 2], 'cover') - self.assertEqual(r.command()[1 + len(options) * 2 + 1], 'test.html') + command = r.command() + + self.assertEqual(command[1 + len(options) * 2], '--quiet') + self.assertEqual(command[2 + len(options) * 2], 'cover') + self.assertEqual(command[2 + len(options) * 2 + 1], 'test.html') def test_cover_and_toc(self): options = { @@ -192,6 +277,19 @@ def test_cover_and_toc(self): command = r.command() self.assertEqual(command[-7:], ['toc', '--xsl-style-sheet', 'test.xsl', 'cover', 'test.html', '-', '-']) + def test_cover_and_toc_cover_first(self): + options = { + 'page-size': 'Letter', + 'margin-top': '0.75in', + 'margin-right': '0.75in', + 'margin-bottom': '0.75in', + 'margin-left': '0.75in', + 'encoding': "UTF-8" + } + r = pdfkit.PDFKit('html', 'string', options=options, toc={'xsl-style-sheet': 'test.xsl'}, cover='test.html', cover_first=True) + command = r.command() + self.assertEqual(command[-7:], ['cover', 'test.html', 'toc', '--xsl-style-sheet', 'test.xsl', '-', '-']) + def test_outline_options(self): options = { 'outline': None, @@ -211,10 +309,25 @@ def test_filter_empty_and_none_values_in_opts(self): 'quiet': False } - r = pdfkit.PDFKit('html', 'string', options=options) + r = pdfkit.PDFKit('html', 'string', options=options, verbose=True) cmd = r.command() self.assertEqual(len(cmd), 6) + def test_verbose_option(self): + options = { + 'outline': '', + 'footer-line': None, + 'quiet': True + } + + r = pdfkit.PDFKit('html', 'string') + cmd = r.command() + self.assertTrue('--quiet' in cmd) + + r = pdfkit.PDFKit('html', 'string', options=options) + cmd = r.command() + self.assertTrue('--quiet' in cmd) + class TestPDFKitGeneration(unittest.TestCase): """Test to_pdf() method""" @@ -231,6 +344,11 @@ def test_pdf_generation(self): pdf = r.to_pdf('out.pdf') self.assertTrue(pdf) + def test_pdf_generation_into_variable(self): + r = pdfkit.PDFKit('html', 'string', options={'page-size': 'Letter'}) + pdf = r.to_pdf() + self.assertTrue(pdf[0:4].decode('ascii').startswith('%PDF')) + def test_raise_error_with_invalid_url(self): r = pdfkit.PDFKit('wrongurl', 'url') with self.assertRaises(IOError): @@ -328,7 +446,51 @@ def test_raise_error_if_bad_wkhtmltopdf_option(self): r.to_pdf() raised_exception = cm.exception - self.assertRegexpMatches(str(raised_exception), '^wkhtmltopdf exited with non-zero code 1. error:\nUnknown long argument --bad-option\r?\n') + self.assertRegex(str(raised_exception), '^wkhtmltopdf exited with non-zero code 1. error:\nUnknown long argument --bad-option\r?\n') + + def test_issue_42_encode_file_with_unicode_char(self): + with open('fixtures/issue_42_bad_char_page.html', 'r') as f: + data = f.read() + r = pdfkit.PDFKit(data, 'string') + output = r.to_pdf() + self.assertEqual(output[:4].decode('utf-8'), '%PDF') + + def test_issue_140_empty_cookie_value(self): + roptions_bad = { + '--page-size': 'Letter', + 'cookie': [ + ('test_cookie1',''), + ('test_cookie2','cookie_value2'), + ] + } + + roptions_good = { + '--page-size': 'Letter', + 'cookie': [ + ('test_cookie1','""'), + ('test_cookie2','cookie_value2'), + ] + } + + r1 = pdfkit.PDFKit('html', 'string', options=roptions_bad) + + self.assertRaises(AssertionError, r1.to_pdf) + + r2 = pdfkit.PDFKit('html', 'string', options=roptions_good) + output2 = r2.to_pdf() + + self.assertEqual(output2[:4].decode('utf-8'), '%PDF') + + def test_issue_169_quiet_boolean_True(self): + options = { + 'outline': '', + 'footer-line': None, + 'quiet': True + } + + r = pdfkit.PDFKit('html', 'string', options=options) + output = r.to_pdf() + self.assertEqual(output[:4].decode('utf-8'), '%PDF') if __name__ == "__main__": unittest.main() diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..2ccafd3 --- /dev/null +++ b/tox.ini @@ -0,0 +1,6 @@ +[tox] +envlist = py38,py39,py310,py311 + +[testenv] +deps = pytest +commands = python setup.py test diff --git a/travis/before-script.sh b/travis/before-script.sh deleted file mode 100755 index bd171e4..0000000 --- a/travis/before-script.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -sudo apt-get install -y openssl build-essential xorg libssl-dev -wget http://wkhtmltopdf.googlecode.com/files/wkhtmltopdf-0.10.0_rc2-static-amd64.tar.bz2 -tar xvjf wkhtmltopdf-0.10.0_rc2-static-amd64.tar.bz2 -sudo chown root:root wkhtmltopdf-amd64 -sudo mv wkhtmltopdf-amd64 /usr/bin/wkhtmltopdf