diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 00000000..b696b926 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,33 @@ +name: docs + +on: ["push", "pull_request"] + +jobs: + docs: + # We want to run on external PRs, but not on our own internal PRs as they'll be run + # by the push to the branch. + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + cache: "pip" + cache-dependency-path: | + requirements.txt + docs/requirements.txt + + - name: Build + run: | + pip install -r requirements.txt + make cython + + - name: Sphinx Documentation Generator + run: | + pip install -r docs/requirements.txt + make docs diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..198cf7b5 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,22 @@ +name: lint + +on: ["push", "pull_request"] + +jobs: + lint: + # We want to run on external PRs, but not on our own internal PRs as they'll be run + # by the push to the branch. + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: ruff check + run: | + pipx run ruff check --diff msgpack/ test/ setup.py + + - name: ruff format + run: | + pipx run ruff format --diff msgpack/ test/ setup.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..6b1664ae --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,61 @@ +name: Run tests +on: + push: + branches: [main] + pull_request: + create: + +jobs: + test: + strategy: + matrix: + os: ["ubuntu-latest", "windows-latest", "windows-11-arm", "macos-latest"] + py: ["3.14", "3.14t", "3.13", "3.12", "3.11", "3.10"] + exclude: + - os: windows-11-arm + py: "3.10" + runs-on: ${{ matrix.os }} + name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} + + steps: + - name: Checkout + uses: actions/checkout@v5 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.py }} + allow-prereleases: true + cache: "pip" + + - name: Prepare + shell: bash + run: | + python -m pip install -r requirements.txt pytest + + - name: Build + shell: bash + run: | + make cython + pip install . + + - name: Test (C extension) + shell: bash + run: | + pytest -v test + + - name: Test (pure Python fallback) + shell: bash + run: | + MSGPACK_PUREPYTHON=1 pytest -v test + + - name: build packages + shell: bash + run: | + python -m build -nv + + - name: upload packages + uses: actions/upload-artifact@v4 + with: + name: dist-${{ matrix.os }}-${{ matrix.py }} + path: dist diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml new file mode 100644 index 00000000..531abbc3 --- /dev/null +++ b/.github/workflows/wheel.yml @@ -0,0 +1,88 @@ +name: Build sdist and Wheels +on: + push: + branches: [main] + release: + types: + - published + workflow_dispatch: + +jobs: + build_wheels: + strategy: + matrix: + # macos-13 is for intel + os: ["ubuntu-24.04", "ubuntu-24.04-arm", "windows-latest", "windows-11-arm", "macos-13", "macos-latest"] + runs-on: ${{ matrix.os }} + name: Build wheels on ${{ matrix.os }} + + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.x" + cache: "pip" + - name: Cythonize + shell: bash + run: | + pip install -r requirements.txt + make cython + + - name: Build + uses: pypa/cibuildwheel@v3.3.0 + env: + CIBW_TEST_REQUIRES: "pytest" + CIBW_TEST_COMMAND: "pytest {package}/test" + CIBW_SKIP: "pp* cp38-* cp39-* cp310-win_arm64" + + - name: Build sdist + if: runner.os == 'Linux' && runner.arch == 'X64' + run: | + pip install build + python -m build -s -o wheelhouse + + - name: Upload Wheels to artifact + uses: actions/upload-artifact@v4 + with: + name: wheels-${{ matrix.os }} + path: wheelhouse + + # combine all wheels into one artifact + combine_wheels: + needs: [build_wheels] + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + # unpacks all CIBW artifacts into dist/ + pattern: wheels-* + path: dist + merge-multiple: true + + - name: Upload Wheels to artifact + uses: actions/upload-artifact@v4 + with: + name: wheels-all + path: dist + + # https://github.com/pypa/cibuildwheel/blob/main/examples/github-deploy.yml + upload_pypi: + needs: [build_wheels] + runs-on: ubuntu-latest + environment: pypi + permissions: + id-token: write + if: github.event_name == 'release' && github.event.action == 'published' + # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this) + # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + steps: + - uses: actions/download-artifact@v4 + with: + # unpacks all CIBW artifacts into dist/ + pattern: wheels-* + path: dist + merge-multiple: true + + - uses: pypa/gh-action-pypi-publish@release/v1 + #with: + # To test: repository-url: https://test.pypi.org/legacy/ diff --git a/.gitignore b/.gitignore index 1c29d6e7..341be631 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,17 @@ MANIFEST build/* dist/* +.tox +.python-version *.pyc *.pyo *.so +*~ msgpack/__version__.py -msgpack/_msgpack.c +msgpack/*.c +msgpack/*.cpp +*.egg-info +/venv +/tags +/docs/_build +.cache diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..88d87182 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,24 @@ +# Read the Docs configuration file for Sphinx projects. +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details. + +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.11" + apt_packages: + - build-essential + jobs: + pre_install: + - pip install -r requirements.txt + - make cython + +python: + install: + - method: pip + path: . + - requirements: docs/requirements.txt + +sphinx: + configuration: docs/conf.py diff --git a/ChangeLog.rst b/ChangeLog.rst index 89a97d78..beeab15c 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,19 +1,561 @@ +1.1.2 +===== + +Release Date: 2025-10-08 + +This release does not change source code. It updates only building wheels: + +* Update Cython to v3.1.4 +* Update cibuildwheel to v3.2.0 +* Drop Python 3.8 +* Add Python 3.14 +* Add windows-arm + +1.1.1 +===== + +Release Date: 2025-06-13 + +* No change from 1.1.1rc1. + +1.1.1rc1 +======== + +Release Date: 2025-06-06 + +* Update Cython to 3.1.1 and cibuildwheel to 2.23.3. + +1.1.0 +===== + +Release Date: 2024-09-10 + +* use ``PyLong_*`` instead of ``PyInt_*`` for compatibility with + future Cython. (#620) + +1.1.0rc2 +======== + +Release Date: 2024-08-19 + +* Update Cython to 3.0.11 for better Python 3.13 support. +* Update cibuildwheel to 2.20.0 to build Python 3.13 wheels. + +1.1.0rc1 +======== + +Release Date: 2024-05-07 + +* Update Cython to 3.0.10 to reduce C warnings and future support for Python 3.13. +* Stop using C++ mode in Cython to reduce compile error on some compilers. +* ``Packer()`` has ``buf_size`` option to specify initial size of + internal buffer to reduce reallocation. +* The default internal buffer size of ``Packer()`` is reduced from + 1MiB to 256KiB to optimize for common use cases. Use ``buf_size`` + if you are packing large data. +* ``Timestamp.to_datetime()`` and ``Timestamp.from_datetime()`` become + more accurate by avoiding floating point calculations. (#591) +* The Cython code for ``Unpacker`` has been slightly rewritten for maintainability. +* The fallback implementation of ``Packer()`` and ``Unpacker()`` now uses keyword-only + arguments to improve compatibility with the Cython implementation. + +1.0.8 +===== + +Release Date: 2024-03-01 + +* Update Cython to 3.0.8. This fixes memory leak when iterating + ``Unpacker`` object on Python 3.12. +* Do not include C/Cython files in binary wheels. + + +1.0.7 +===== + +Release Date: 2023-09-28 + +* Fix build error of extension module on Windows. (#567) +* ``setup.py`` doesn't skip build error of extension module. (#568) + + +1.0.6 +===== + +Release Date: 2023-09-21 + +.. note:: + v1.0.6 Wheels for Windows don't contain extension module. + Please upgrade to v1.0.7 or newer. + +* Add Python 3.12 wheels (#517) +* Remove Python 2.7, 3.6, and 3.7 support + + +1.0.5 +===== + +Release Date: 2023-03-08 + +* Use ``__BYTE_ORDER__`` instead of ``__BYTE_ORDER`` for portability. (#513, #514) +* Add Python 3.11 wheels (#517) +* fallback: Fix packing multidimensional memoryview (#527) + +1.0.4 +===== + +Release Date: 2022-06-03 + +* Support Python 3.11 (beta). +* Don't define `__*_ENDIAN__` macro on Unix. by @methane in https://github.com/msgpack/msgpack-python/pull/495 +* Use PyFloat_Pack8() on Python 3.11a7 by @vstinner in https://github.com/msgpack/msgpack-python/pull/499 +* Fix Unpacker max_buffer_length handling by @methane in https://github.com/msgpack/msgpack-python/pull/506 + +1.0.3 +===== + +Release Date: 2021-11-24 JST + +* Fix Docstring (#459) +* Fix error formatting (#463) +* Improve error message about strict_map_key (#485) + +1.0.2 +===== + +* Fix year 2038 problem regression in 1.0.1. (#451) + +1.0.1 +===== + +* Add Python 3.9 and linux/arm64 wheels. (#439) +* Fixed Unpacker.tell() after read_bytes() (#426) +* Fixed unpacking datetime before epoch on Windows (#433) +* Fixed fallback Packer didn't check DateTime.tzinfo (#434) + +1.0.0 +===== + +Release Date: 2020-02-17 + +* Remove Python 2 support from the ``msgpack/_cmsgpack``. + ``msgpack/fallback`` still supports Python 2. +* Remove ``encoding`` option from the Packer and Unpacker. +* Unpacker: The default value of ``max_buffer_size`` is changed to 100MiB. +* Unpacker: ``strict_map_key`` is True by default now. +* Unpacker: String map keys are interned. +* Drop old buffer protocol support. +* Support Timestamp type. +* Support serializing and decerializing ``datetime`` object + with tzinfo. +* Unpacker: ``Fix Unpacker.read_bytes()`` in fallback implementation. (#352) + + +0.6.2 +===== + +Release Date: 2019-09-20 + +* Support Python 3.8. +* Update Cython to 0.29.13 for support Python 3.8. +* Some small optimizations. + + +0.6.1 +====== + +Release Date: 2019-01-25 + +This release is for mitigating pain caused by v0.6.0 reduced max input limits +for security reason. + +* ``unpackb(data)`` configures ``max_*_len`` options from ``len(data)``, + instead of static default sizes. + +* ``Unpacker(max_buffer_len=N)`` configures ``max_*_len`` options from ``N``, + instead of static default sizes. + +* ``max_bin_len``, ``max_str_len``, and ``max_ext_len`` are deprecated. + Since this is minor release, it's document only deprecation. + + +0.6.0 +====== + +Release Date: 2018-11-30 + +This release contains some backward incompatible changes for security reason (DoS). + +Important changes +----------------- + +* unpacker: Default value of input limits are smaller than before to avoid DoS attack. + If you need to handle large data, you need to specify limits manually. (#319) + +* Unpacker doesn't wrap underlying ``ValueError`` (including ``UnicodeError``) into + ``UnpackValueError``. If you want to catch all exception during unpack, you need + to use ``try ... except Exception`` with minimum try code block. (#323, #233) + +* ``PackValueError`` and ``PackOverflowError`` are also removed. You need to catch + normal ``ValueError`` and ``OverflowError``. (#323, #233) + +* Unpacker has ``strict_map_key`` option now. When it is true, only bytes and str + (unicode in Python 2) are allowed for map keys. It is recommended to avoid + hashdos. Default value of this option is False for backward compatibility reason. + But it will be changed True in 1.0. (#296, #334) + +Other changes +------------- + +* Extension modules are merged. There is ``msgpack._cmsgpack`` instead of + ``msgpack._packer`` and ``msgpack._unpacker``. (#314, #328) + +* Add ``Unpacker.getbuffer()`` method. (#320) + +* unpacker: ``msgpack.StackError`` is raised when input data contains too + nested data. (#331) + +* unpacker: ``msgpack.FormatError`` is raised when input data is not valid + msgpack format. (#331) + + +0.5.6 +====== + +* Fix fallback.Unpacker.feed() dropped unused data from buffer (#287) +* Resurrect fallback.unpack() and _unpacker.unpack(). + They were removed at 0.5.5 but it breaks backward compatibility. (#288, #290) + +0.5.5 +====== + +* Fix memory leak in pure Python Unpacker.feed() (#283) +* Fix unpack() didn't support `raw` option (#285) + +0.5.4 +====== + +* Undeprecate ``unicode_errors`` option. (#278) + +0.5.3 +====== + +* Fixed regression when passing ``unicode_errors`` to Packer but not ``encoding``. (#277) + +0.5.2 +====== + +* Add ``raw`` option to Unpacker. It is preferred way than ``encoding`` option. + +* Packer.pack() reset buffer on exception (#274) + + +0.5.1 +====== + +* Remove FutureWarning about use_bin_type option (#271) + +0.5.0 +====== + +There are some deprecations. Please read changes carefully. + +Changes +------- + +* Drop Python 2.6 and ~3.4 support. Python 2.7 and 3.5+ are supported. + +* Deprecate useless custom exceptions. Use ValueError instead of PackValueError, + Exception instead of PackException and UnpackException, etc... + See msgpack/exceptions.py + +* Add *strict_types* option to packer. It can be used to serialize subclass of + builtin types. For example, when packing object which type is subclass of dict, + ``default()`` is called. ``default()`` is called for tuple too. + +* Pure Python implementation supports packing memoryview object. + +* Support packing bytearray. + +* Add ``Unpacker.tell()``. And ``write_bytes`` option is deprecated. + + +Bugs fixed +---------- + +* Fixed zero length raw can't be decoded when encoding is specified. (#236) + + +0.4.8 +===== +:release date: 2016-07-29 + +Bugs fixed +---------- + +* Calling ext_hook with wrong length. (Only on Windows, maybe. #203) + + +0.4.7 +===== +:release date: 2016-01-25 + +Bugs fixed +---------- + +* Memory leak when unpack is failed + +Changes +------- + +* Reduce compiler warnings while building extension module +* unpack() now accepts ext_hook argument like Unpacker and unpackb() +* Update Cython version to 0.23.4 +* default function is called when integer overflow + + +0.4.6 +===== +:release date: 2015-03-13 + +Bugs fixed +---------- + +* fallback.Unpacker: Fix Data corruption when OutOfData. + This bug only affects "Streaming unpacking." + + +0.4.5 +===== +:release date: 2015-01-25 + +Incompatible Changes +-------------------- + +Changes +------- + +Bugs fixed +---------- + +* Fix test failure on pytest 2.3. (by @ktdreyer) +* Fix typos in ChangeLog. (Thanks to @dmick) +* Improve README.rst (by @msabramo) + + +0.4.4 +===== +:release date: 2015-01-09 + +Incompatible Changes +-------------------- + +Changes +------- + +Bugs fixed +---------- + +* Fix compile error. + +0.4.3 +===== +:release date: 2015-01-07 + +Incompatible Changes +-------------------- + +Changes +------- + +Bugs fixed +---------- + +* Unpacker may unpack wrong uint32 value on 32bit or LLP64 environment. (#101) +* Build failed on Windows Python 2.7. + +0.4.2 +===== +:release date: 2014-03-26 + +Incompatible Changes +-------------------- + +Changes +------- + +Bugs fixed +---------- + +* Unpacker doesn't increment refcount of ExtType hook. +* Packer raises no exception for inputs doesn't fit to msgpack format. + +0.4.1 +===== +:release date: 2014-02-17 + +Incompatible Changes +-------------------- + +Changes +------- + +* fallback.Unpacker.feed() supports bytearray. + +Bugs fixed +---------- + +* Unpacker doesn't increment refcount of hooks. Hooks may be GCed while unpacking. +* Unpacker may read unfilled internal buffer. + +0.4.0 +===== +:release date: 2013-10-21 + +Incompatible Changes +-------------------- + +* Raises TypeError instead of ValueError when packer receives unsupported type. + +Changes +------- + +* Support New msgpack spec. + + +0.3.0 +===== + +Incompatible Changes +-------------------- + +* Default value of ``use_list`` is ``True`` for now. (It was ``False`` for 0.2.x) + You should pass it explicitly for compatibility to 0.2.x. +* `Unpacker.unpack()` and some unpack methods now raise `OutOfData` instead of + `StopIteration`. `StopIteration` is used for iterator protocol only. + +Changes +------- +* Pure Python fallback module is added. (thanks to bwesterb) +* Add ``.skip()`` method to ``Unpacker`` (thanks to jnothman) +* Add capturing feature. You can pass the writable object to + ``Unpacker.unpack()`` as a second parameter. +* Add ``Packer.pack_array_header`` and ``Packer.pack_map_header``. + These methods only pack header of each type. +* Add ``autoreset`` option to ``Packer`` (default: True). + Packer doesn't return packed bytes and clear internal buffer. +* Add ``Packer.pack_map_pairs``. It packs sequence of pair to map type. + + + +0.2.4 +===== +:release date: 2012-12-22 + +Bugs fixed +---------- + +* Fix SEGV when object_hook or object_pairs_hook raise Exception. (#39) + +0.2.3 +===== +:release date: 2012-12-11 + +Changes +------- +* Warn when use_list is not specified. It's default value will be changed in 0.3. + +Bugs fixed +---------- +* Can't pack subclass of dict. + +0.2.2 +===== +:release date: 2012-09-21 + +Changes +------- +* Add ``use_single_float`` option to ``Packer``. When it is true, packs float + object in single precision format. + +Bugs fixed +---------- +* ``unpack()`` didn't restores gc state when it called with gc disabled. + ``unpack()`` doesn't control gc now instead of restoring gc state collectly. + User can control gc state when gc cause performance issue. + +* ``Unpacker``'s ``read_size`` option didn't used. + +0.2.1 +===== +:release date: 2012-08-20 + +Changes +------- +* Add ``max_buffer_size`` parameter to Unpacker. It limits internal buffer size + and allows unpack data from untrusted source safely. + +* Unpacker's buffer reallocation algorithm is less greedy now. It cause performance + decrease in rare case but memory efficient and don't allocate than ``max_buffer_size``. + +Bugs fixed +---------- +* Fix msgpack didn't work on SPARC Solaris. It was because choosing wrong byteorder + on compilation time. Use ``sys.byteorder`` to get correct byte order. + Very thanks to Chris Casey for giving test environment to me. + + +0.2.0 +===== +:release date: 2012-06-27 + +Changes +------- +* Drop supporting Python 2.5 and unify tests for Py2 and Py3. +* Use new version of msgpack-c. It packs correctly on big endian platforms. +* Remove deprecated packs and unpacks API. + +Bugs fixed +---------- +* #8 Packing subclass of dict raises TypeError. (Thanks to Steeve Morin.) + + +0.1.13 +====== +:release date: 2012-04-21 + +New +--- +* Don't accept subtype of list and tuple as msgpack list. (Steeve Morin) + It allows customize how it serialized with ``default`` argument. + +Bugs fixed +---------- +* Fix wrong error message. (David Wolever) +* Fix memory leak while unpacking when ``object_hook`` or ``list_hook`` is used. + (Steeve Morin) + +Other changes +------------- +* setup.py works on Python 2.5 (Steffen Siering) +* Optimization for serializing dict. + + 0.1.12 -======= +====== :release date: 2011-12-27 Bugs fixed -------------- +---------- * Re-enable packs/unpacks removed at 0.1.11. It will be removed when 0.2 is released. 0.1.11 -======= +====== :release date: 2011-12-26 Bugs fixed -------------- +---------- * Include test code for Python3 to sdist. (Johan Bergström) * Fix compilation error on MSVC. (davidgaleano) @@ -31,7 +573,7 @@ New feature 0.1.9 -====== +===== :release date: 2011-01-29 New feature @@ -45,16 +587,16 @@ Bugs fixed * Add MemoryError check. 0.1.8 -====== +===== :release date: 2011-01-10 New feature ------------- +----------- * Support ``loads`` and ``dumps`` aliases for API compatibility with simplejson and pickle. * Add *object_hook* and *list_hook* option to unpacker. It allows you to - hook unpacing mapping type and array type. + hook unpacking mapping type and array type. * Add *default* option to packer. It allows you to pack unsupported types. @@ -66,13 +608,13 @@ Bugs fixed 0.1.7 -====== +===== :release date: 2010-11-02 New feature ------------- +----------- * Add *object_hook* and *list_hook* option to unpacker. It allows you to - hook unpacing mapping type and array type. + hook unpacking mapping type and array type. * Add *default* option to packer. It allows you to pack unsupported types. diff --git a/DEVELOP.md b/DEVELOP.md new file mode 100644 index 00000000..27adf8c0 --- /dev/null +++ b/DEVELOP.md @@ -0,0 +1,17 @@ +# Developer's note + +### Build + +``` +$ make cython +``` + + +### Test + +MessagePack uses `pytest` for testing. +Run test with following command: + +``` +$ make test +``` diff --git a/MANIFEST.in b/MANIFEST.in index bf312d57..6317706e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ include setup.py include COPYING +include README.md recursive-include msgpack *.h *.c *.pyx recursive-include test *.py -recursive-include test3 *.py diff --git a/Makefile b/Makefile index 245c09c2..51f3e0ef 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,59 @@ -.PHONY: test all python3 +PYTHON_SOURCES = msgpack test setup.py -all: +.PHONY: all +all: cython python setup.py build_ext -i -f - python setup.py build sdist -python3: - python3 setup.py build_ext -i -f - python3 setup.py build sdist +.PHONY: format +format: + ruff format $(PYTHON_SOURCES) -test: - nosetests test +.PHONY: lint +lint: + ruff check $(PYTHON_SOURCES) + +.PHONY: doc +doc: + cd docs && sphinx-build -n -v -W --keep-going -b html -d doctrees . html + +.PHONY: pyupgrade +pyupgrade: + @find $(PYTHON_SOURCES) -name '*.py' -type f -exec pyupgrade --py37-plus '{}' \; + +.PHONY: cython +cython: + cython msgpack/_cmsgpack.pyx + +.PHONY: test +test: cython + pip install -e . + pytest -v test + MSGPACK_PUREPYTHON=1 pytest -v test + +.PHONY: serve-doc +serve-doc: all + cd docs && make serve + +.PHONY: clean +clean: + rm -rf build + rm -f msgpack/_cmsgpack.cpp + rm -f msgpack/_cmsgpack.*.so + rm -f msgpack/_cmsgpack.*.pyd + rm -rf msgpack/__pycache__ + rm -rf test/__pycache__ + +.PHONY: update-docker +update-docker: + docker pull quay.io/pypa/manylinux2014_i686 + docker pull quay.io/pypa/manylinux2014_x86_64 + docker pull quay.io/pypa/manylinux2014_aarch64 + +.PHONY: linux-wheel +linux-wheel: + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_i686 bash docker/buildwheel.sh + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_x86_64 bash docker/buildwheel.sh + +.PHONY: linux-arm64-wheel +linux-arm64-wheel: + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_aarch64 bash docker/buildwheel.sh diff --git a/README.md b/README.md new file mode 100644 index 00000000..1f063242 --- /dev/null +++ b/README.md @@ -0,0 +1,242 @@ +# MessagePack for Python + +[![Build Status](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml/badge.svg)](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml) +[![Documentation Status](https://readthedocs.org/projects/msgpack-python/badge/?version=latest)](https://msgpack-python.readthedocs.io/en/latest/?badge=latest) + +## What is this? + +[MessagePack](https://msgpack.org/) is an efficient binary serialization format. +It lets you exchange data among multiple languages like JSON. +But it's faster and smaller. +This package provides CPython bindings for reading and writing MessagePack data. + +## Install + +``` +$ pip install msgpack +``` + +### Pure Python implementation + +The extension module in msgpack (`msgpack._cmsgpack`) does not support PyPy. + +But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy. + + +### Windows + +If you can't use a binary distribution, you need to install Visual Studio +or the Windows SDK on Windows. +Without the extension, the pure Python implementation on CPython runs slowly. + + +## How to use + +### One-shot pack & unpack + +Use `packb` for packing and `unpackb` for unpacking. +msgpack provides `dumps` and `loads` as aliases for compatibility with +`json` and `pickle`. + +`pack` and `dump` pack to a file-like object. +`unpack` and `load` unpack from a file-like object. + +```pycon +>>> import msgpack +>>> msgpack.packb([1, 2, 3]) +'\x93\x01\x02\x03' +>>> msgpack.unpackb(_) +[1, 2, 3] +``` + +Read the docstring for options. + + +### Streaming unpacking + +`Unpacker` is a "streaming unpacker". It unpacks multiple objects from one +stream (or from bytes provided through its `feed` method). + +```py +import msgpack +from io import BytesIO + +buf = BytesIO() +for i in range(100): + buf.write(msgpack.packb(i)) + +buf.seek(0) + +unpacker = msgpack.Unpacker(buf) +for unpacked in unpacker: + print(unpacked) +``` + + +### Packing/unpacking of custom data types + +It is also possible to pack/unpack custom data types. Here is an example for +`datetime.datetime`. + +```py +import datetime +import msgpack + +useful_dict = { + "id": 1, + "created": datetime.datetime.now(), +} + +def decode_datetime(obj): + if '__datetime__' in obj: + obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f") + return obj + +def encode_datetime(obj): + if isinstance(obj, datetime.datetime): + return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")} + return obj + + +packed_dict = msgpack.packb(useful_dict, default=encode_datetime) +this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime) +``` + +`Unpacker`'s `object_hook` callback receives a dict; the +`object_pairs_hook` callback may instead be used to receive a list of +key-value pairs. + +NOTE: msgpack can encode datetime with tzinfo into standard ext type for now. +See `datetime` option in `Packer` docstring. + + +### Extended types + +It is also possible to pack/unpack custom data types using the **ext** type. + +```pycon +>>> import msgpack +>>> import array +>>> def default(obj): +... if isinstance(obj, array.array) and obj.typecode == 'd': +... return msgpack.ExtType(42, obj.tostring()) +... raise TypeError("Unknown type: %r" % (obj,)) +... +>>> def ext_hook(code, data): +... if code == 42: +... a = array.array('d') +... a.fromstring(data) +... return a +... return ExtType(code, data) +... +>>> data = array.array('d', [1.2, 3.4]) +>>> packed = msgpack.packb(data, default=default) +>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) +>>> data == unpacked +True +``` + + +### Advanced unpacking control + +As an alternative to iteration, `Unpacker` objects provide `unpack`, +`skip`, `read_array_header`, and `read_map_header` methods. The former two +read an entire message from the stream, respectively deserializing and returning +the result, or ignoring it. The latter two methods return the number of elements +in the upcoming container, so that each element in an array, or key-value pair +in a map, can be unpacked or skipped individually. + + +## Notes + +### String and binary types in the old MessagePack spec + +Early versions of msgpack didn't distinguish string and binary types. +The type for representing both string and binary types was named **raw**. + +You can pack into and unpack from this old spec using `use_bin_type=False` +and `raw=True` options. + +```pycon +>>> import msgpack +>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=False), raw=True) +[b'spam', b'eggs'] +>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=True), raw=False) +[b'spam', 'eggs'] +``` + +### ext type + +To use the **ext** type, pass a `msgpack.ExtType` object to the packer. + +```pycon +>>> import msgpack +>>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) +>>> msgpack.unpackb(packed) +ExtType(code=42, data='xyzzy') +``` + +You can use it with `default` and `ext_hook`. See below. + + +### Security + +When unpacking data received from an unreliable source, msgpack provides +two security options. + +`max_buffer_size` (default: `100*1024*1024`) limits the internal buffer size. +It is also used to limit preallocated list sizes. + +`strict_map_key` (default: `True`) limits the type of map keys to bytes and str. +While the MessagePack spec doesn't limit map key types, +there is a risk of a hash DoS. +If you need to support other types for map keys, use `strict_map_key=False`. + + +### Performance tips + +CPython's GC starts when the number of allocated objects grows. +This means unpacking may trigger unnecessary GC. +You can use `gc.disable()` when unpacking a large message. + +A list is the default sequence type in Python. +However, a tuple is lighter than a list. +You can use `use_list=False` while unpacking when performance is important. + + +## Major breaking changes in the history + +### msgpack 0.5 + +The package name on PyPI was changed from `msgpack-python` to `msgpack` in 0.5. + +When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before +`pip install -U msgpack`. + + +### msgpack 1.0 + +* Python 2 support + + * The extension module no longer supports Python 2. + The pure Python implementation (`msgpack.fallback`) is used for Python 2. + + * msgpack 1.0.6 drops official support of Python 2.7, as pip and + GitHub Action "setup-python" no longer supports Python 2.7. + +* Packer + + * Packer uses `use_bin_type=True` by default. + Bytes are encoded in the bin type in MessagePack. + * The `encoding` option is removed. UTF-8 is always used. + +* Unpacker + + * Unpacker uses `raw=False` by default. It assumes str values are valid UTF-8 strings + and decodes them to Python str (Unicode) objects. + * `encoding` option is removed. You can use `raw=True` to support old format (e.g. unpack into bytes, not str). + * The default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attacks. + You need to pass `max_buffer_size=0` if you have large but safe data. + * The default value of `strict_map_key` is changed to True to avoid hash DoS. + You need to pass `strict_map_key=False` if you have data that contain map keys + whose type is neither bytes nor str. diff --git a/README.rst b/README.rst deleted file mode 100644 index 994492e6..00000000 --- a/README.rst +++ /dev/null @@ -1,43 +0,0 @@ -=========================== -MessagePack Python Binding -=========================== - -:author: INADA Naoki -:version: 0.1.0 -:date: 2009-07-12 - -HOW TO USE ------------ -You can read document in docstring after `import msgpack` - - -INSTALL ---------- -Cython_ is required to build msgpack. - -.. _Cython: http://www.cython.org/ - -posix -'''''' -You can install msgpack in common way. - - $ python setup.py install - -Windows -'''''''' -MessagePack requires gcc currently. So you need to prepare -MinGW GCC. - - $ python setup.py build -c mingw32 - $ python setup.py install - -TEST ----- -MessagePack uses `nosetest` for testing. -Run test with following command: - - $ nosetests test - - -.. - vim: filetype=rst diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..75f0c541 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,5 @@ +## Security contact information + +To report a security vulnerability, please use the +[Tidelift security contact](https://tidelift.com/security). +Tidelift will coordinate the fix and disclosure. \ No newline at end of file diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py new file mode 100644 index 00000000..2e778dd2 --- /dev/null +++ b/benchmark/benchmark.py @@ -0,0 +1,38 @@ +from msgpack import fallback + +try: + from msgpack import _cmsgpack + + has_ext = True +except ImportError: + has_ext = False +import timeit + + +def profile(name, func): + times = timeit.repeat(func, number=1000, repeat=4) + times = ", ".join(["%8f" % t for t in times]) + print("%-30s %40s" % (name, times)) + + +def simple(name, data): + if has_ext: + packer = _cmsgpack.Packer() + profile("packing %s (ext)" % name, lambda: packer.pack(data)) + packer = fallback.Packer() + profile("packing %s (fallback)" % name, lambda: packer.pack(data)) + + data = packer.pack(data) + if has_ext: + profile("unpacking %s (ext)" % name, lambda: _cmsgpack.unpackb(data)) + profile("unpacking %s (fallback)" % name, lambda: fallback.unpackb(data)) + + +def main(): + simple("integers", [7] * 10000) + simple("bytes", [b"x" * n for n in range(100)] * 10) + simple("lists", [[]] * 10000) + simple("dicts", [{}] * 10000) + + +main() diff --git a/docker/buildwheel.sh b/docker/buildwheel.sh new file mode 100644 index 00000000..ff34139d --- /dev/null +++ b/docker/buildwheel.sh @@ -0,0 +1,22 @@ +#!/bin/bash +DOCKER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "$DOCKER_DIR/shared.env" + +set -e -x + +ARCH=`uname -p` +echo "arch=$ARCH" + +ls /opt/python + +for V in "${PYTHON_VERSIONS[@]}"; do + PYBIN=/opt/python/$V/bin + rm -rf build/ # Avoid lib build by narrow Python is used by wide python + $PYBIN/python -m build -w +done + +cd dist +for whl in *.whl; do + auditwheel repair "$whl" + rm "$whl" +done diff --git a/docker/runtests.sh b/docker/runtests.sh new file mode 100755 index 00000000..fa7e979b --- /dev/null +++ b/docker/runtests.sh @@ -0,0 +1,17 @@ +#!/bin/bash +DOCKER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "$DOCKER_DIR/shared.env" + +set -e -x + +for V in "${PYTHON_VERSIONS[@]}"; do + PYBIN=/opt/python/$V/bin + $PYBIN/python setup.py install + rm -rf build/ # Avoid lib build by narrow Python is used by wide python + $PYBIN/pip install pytest + pushd test # prevent importing msgpack package in current directory. + $PYBIN/python -c 'import sys; print(hex(sys.maxsize))' + $PYBIN/python -c 'from msgpack import _cmsgpack' # Ensure extension is available + $PYBIN/pytest -v . + popd +done diff --git a/docker/shared.env b/docker/shared.env new file mode 100644 index 00000000..80274ac6 --- /dev/null +++ b/docker/shared.env @@ -0,0 +1,7 @@ +PYTHON_VERSIONS=( + cp310-cp310 + cp39-cp39 + cp38-cp38 + cp37-cp37m + cp36-cp36m +) diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..831a6a7f --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,159 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -E -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/msgpack.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/msgpack.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/msgpack" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/msgpack" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +serve: html + python3 -m http.server -d _build/html + +zip: html + cd _build/html && zip -r ../../../msgpack-doc.zip . diff --git a/docs/_static/README.txt b/docs/_static/README.txt new file mode 100644 index 00000000..1c70594f --- /dev/null +++ b/docs/_static/README.txt @@ -0,0 +1 @@ +Sphinx will copy the contents of docs/_static/ directory to the build location. diff --git a/docs/advanced.rst b/docs/advanced.rst new file mode 100644 index 00000000..38370088 --- /dev/null +++ b/docs/advanced.rst @@ -0,0 +1,32 @@ +Advanced usage +=============== + +Packer +------ + +autoreset +~~~~~~~~~ + +When you used ``autoreset=False`` option of :class:`~msgpack.Packer`, +``pack()`` method doesn't return packed ``bytes``. + +You can use :meth:`~msgpack.Packer.bytes` or :meth:`~msgpack.Packer.getbuffer` to +get packed data. + +``bytes()`` returns ``bytes`` object. ``getbuffer()`` returns some bytes-like +object. It's concrete type is implement detail and it will be changed in future +versions. + +You can reduce temporary bytes object by using ``Unpacker.getbuffer()``. + +.. code-block:: python + + packer = Packer(use_bin_type=True, autoreset=False) + + packer.pack([1, 2]) + packer.pack([3, 4]) + + with open('data.bin', 'wb') as f: + f.write(packer.getbuffer()) + + packer.reset() # reset internal buffer diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 00000000..f5dfbbd2 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,43 @@ +API reference +============= + +.. module:: msgpack + +.. autofunction:: pack + +``dump()`` is an alias for :func:`pack` + +.. autofunction:: packb + +``dumps()`` is an alias for :func:`packb` + +.. autofunction:: unpack + +``load()`` is an alias for :func:`unpack` + +.. autofunction:: unpackb + +``loads()`` is an alias for :func:`unpackb` + +.. autoclass:: Packer + :members: + +.. autoclass:: Unpacker + :members: + +.. autoclass:: ExtType + +.. autoclass:: Timestamp + :members: + :special-members: __init__ + +exceptions +---------- + +These exceptions are accessible via `msgpack` package. +(For example, `msgpack.OutOfData` is shortcut for `msgpack.exceptions.OutOfData`) + +.. automodule:: msgpack.exceptions + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..28116cd6 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,283 @@ +# msgpack documentation build configuration file, created by +# sphinx-quickstart on Sun Feb 24 14:20:50 2013. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# import os +# import sys +# sys.path.insert(0, os.path.abspath('..')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The suffix of source filenames. +source_suffix = ".rst" + +# The encoding of source files. +# source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = "index" + +# General information about the project. +project = "msgpack" +copyright = "Inada Naoki" + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +# The full version, including alpha/beta/rc tags. +version = release = "1.0" + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +# today = '' +# Else, today_fmt is used as the format for a strftime call. +# today_fmt = '%B %d, %Y' +today_fmt = "%Y-%m-%d" + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ["_build"] + +# The reST default role (used for this markup: `text`) to use for all documents. +# default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +# add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +# add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +# show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" + +# A list of ignored prefixes for module index sorting. +# modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = "sphinx_rtd_theme" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +# html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +# html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +# html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +# html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +# html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +# html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +# html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# html_additional_pages = {} + +# If false, no module index is generated. +# html_domain_indices = True + +# If false, no index is generated. +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +# html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +# html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +# html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +# html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +# html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = "msgpackdoc" + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ("index", "msgpack.tex", "msgpack Documentation", "Author", "manual"), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +# latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +# latex_use_parts = False + +# If true, show page references after internal links. +# latex_show_pagerefs = False + +# If true, show URL addresses after external links. +# latex_show_urls = False + +# Documents to append as an appendix to all manuals. +# latex_appendices = [] + +# If false, no module index is generated. +# latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [("index", "msgpack", "msgpack Documentation", ["Author"], 1)] + +# If true, show URL addresses after external links. +# man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ( + "index", + "msgpack", + "msgpack Documentation", + "Author", + "msgpack", + "One line description of project.", + "Miscellaneous", + ), +] + +# Documents to append as an appendix to all manuals. +# texinfo_appendices = [] + +# If false, no module index is generated. +# texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +# texinfo_show_urls = 'footnote' + + +# -- Options for Epub output --------------------------------------------------- + +# Bibliographic Dublin Core info. +epub_title = "msgpack" +epub_author = "Author" +epub_publisher = "Author" +epub_copyright = "2013, Author" + +# The language of the text. It defaults to the language option +# or en if the language is not set. +# epub_language = '' + +# The scheme of the identifier. Typical schemes are ISBN or URL. +# epub_scheme = '' + +# The unique identifier of the text. This can be a ISBN number +# or the project homepage. +# epub_identifier = '' + +# A unique identification for the text. +# epub_uid = '' + +# A tuple containing the cover image and cover page html template filenames. +# epub_cover = () + +# HTML files that should be inserted before the pages created by sphinx. +# The format is a list of tuples containing the path and title. +# epub_pre_files = [] + +# HTML files shat should be inserted after the pages created by sphinx. +# The format is a list of tuples containing the path and title. +# epub_post_files = [] + +# A list of files that should not be packed into the epub file. +# epub_exclude_files = [] + +# The depth of the table of contents in toc.ncx. +# epub_tocdepth = 3 + +# Allow duplicate toc entries. +# epub_tocdup = True diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..e9c2ce83 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,11 @@ +msgpack document +================ + +`MessagePack `_ is a efficient format for inter +language data exchange. + +.. toctree:: + :maxdepth: 1 + + api + advanced diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..26002de4 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx~=7.3.7 +sphinx-rtd-theme~=2.0.0 diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 6b9735e1..f3266b70 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,6 +1,51 @@ -# coding: utf-8 -from msgpack.__version__ import * -from msgpack._msgpack import * +# ruff: noqa: F401 +import os + +from .exceptions import * # noqa: F403 +from .ext import ExtType, Timestamp + +version = (1, 1, 2) +__version__ = "1.1.2" + + +if os.environ.get("MSGPACK_PUREPYTHON"): + from .fallback import Packer, Unpacker, unpackb +else: + try: + from ._cmsgpack import Packer, Unpacker, unpackb + except ImportError: + from .fallback import Packer, Unpacker, unpackb + + +def pack(o, stream, **kwargs): + """ + Pack object `o` and write it to `stream` + + See :class:`Packer` for options. + """ + packer = Packer(**kwargs) + stream.write(packer.pack(o)) + + +def packb(o, **kwargs): + """ + Pack object `o` and return packed bytes + + See :class:`Packer` for options. + """ + return Packer(**kwargs).pack(o) + + +def unpack(stream, **kwargs): + """ + Unpack an object from `stream`. + + Raises `ExtraData` when `stream` contains extra bytes. + See :class:`Unpacker` for options. + """ + data = stream.read() + return unpackb(data, **kwargs) + # alias for compatibility to simplejson/marshal/pickle. load = unpack @@ -8,13 +53,3 @@ dump = pack dumps = packb - -def packs(*args, **kw): - from warnings import warn - warn("msgpack.packs() is deprecated. Use packb() instead.", DeprecationWarning) - return packb(*args, **kw) - -def unpacks(*args, **kw): - from warnings import warn - warn("msgpack.unpacks() is deprecated. Use unpackb() instead.", DeprecationWarning) - return unpackb(*args, **kw) diff --git a/msgpack/_cmsgpack.pyx b/msgpack/_cmsgpack.pyx new file mode 100644 index 00000000..9680b31e --- /dev/null +++ b/msgpack/_cmsgpack.pyx @@ -0,0 +1,12 @@ +#cython: embedsignature=True, c_string_encoding=ascii, language_level=3 +#cython: freethreading_compatible = True +import cython +from cpython.datetime cimport import_datetime, datetime_new +import_datetime() + +import datetime +cdef object utc = datetime.timezone.utc +cdef object epoch = datetime_new(1970, 1, 1, 0, 0, 0, 0, tz=utc) + +include "_packer.pyx" +include "_unpacker.pyx" diff --git a/msgpack/_msgpack.pyx b/msgpack/_msgpack.pyx deleted file mode 100644 index 7a817466..00000000 --- a/msgpack/_msgpack.pyx +++ /dev/null @@ -1,427 +0,0 @@ -# coding: utf-8 -#cython: embedsignature=True - -from cpython cimport * -cdef extern from "Python.h": - ctypedef char* const_char_ptr "const char*" - ctypedef char* const_void_ptr "const void*" - ctypedef struct PyObject - cdef int PyObject_AsReadBuffer(object o, const_void_ptr* buff, Py_ssize_t* buf_len) except -1 - -from libc.stdlib cimport * -from libc.string cimport * -import gc -_gc_disable = gc.disable -_gc_enable = gc.enable - -cdef extern from "pack.h": - struct msgpack_packer: - char* buf - size_t length - size_t buf_size - - int msgpack_pack_int(msgpack_packer* pk, int d) - int msgpack_pack_nil(msgpack_packer* pk) - int msgpack_pack_true(msgpack_packer* pk) - int msgpack_pack_false(msgpack_packer* pk) - int msgpack_pack_long(msgpack_packer* pk, long d) - int msgpack_pack_long_long(msgpack_packer* pk, long long d) - int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) - int msgpack_pack_double(msgpack_packer* pk, double d) - int msgpack_pack_array(msgpack_packer* pk, size_t l) - int msgpack_pack_map(msgpack_packer* pk, size_t l) - int msgpack_pack_raw(msgpack_packer* pk, size_t l) - int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) - -cdef int DEFAULT_RECURSE_LIMIT=511 - -cdef class Packer(object): - """MessagePack Packer - - usage: - - packer = Packer() - astream.write(packer.pack(a)) - astream.write(packer.pack(b)) - """ - cdef msgpack_packer pk - cdef object _default - cdef object _bencoding - cdef object _berrors - cdef char *encoding - cdef char *unicode_errors - - def __cinit__(self): - cdef int buf_size = 1024*1024 - self.pk.buf = malloc(buf_size); - if self.pk.buf == NULL: - raise MemoryError("Unable to allocate internal buffer.") - self.pk.buf_size = buf_size - self.pk.length = 0 - - def __init__(self, default=None, encoding='utf-8', unicode_errors='strict'): - if default is not None: - if not PyCallable_Check(default): - raise TypeError("default must be a callable.") - self._default = default - if encoding is None: - self.encoding = NULL - self.unicode_errors = NULL - else: - if isinstance(encoding, unicode): - self._bencoding = encoding.encode('ascii') - else: - self._bencoding = encoding - self.encoding = PyBytes_AsString(self._bencoding) - if isinstance(unicode_errors, unicode): - self._berrors = unicode_errors.encode('ascii') - else: - self._berrors = unicode_errors - self.unicode_errors = PyBytes_AsString(self._berrors) - - def __dealloc__(self): - free(self.pk.buf); - - cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: - cdef long long llval - cdef unsigned long long ullval - cdef long longval - cdef double fval - cdef char* rawval - cdef int ret - cdef dict d - - if nest_limit < 0: - raise ValueError("Too deep.") - - if o is None: - ret = msgpack_pack_nil(&self.pk) - elif isinstance(o, bool): - if o: - ret = msgpack_pack_true(&self.pk) - else: - ret = msgpack_pack_false(&self.pk) - elif PyLong_Check(o): - if o > 0: - ullval = o - ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - ret = msgpack_pack_long_long(&self.pk, llval) - elif PyInt_Check(o): - longval = o - ret = msgpack_pack_long(&self.pk, longval) - elif PyFloat_Check(o): - fval = o - ret = msgpack_pack_double(&self.pk, fval) - elif PyBytes_Check(o): - rawval = o - ret = msgpack_pack_raw(&self.pk, len(o)) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) - elif PyUnicode_Check(o): - if not self.encoding: - raise TypeError("Can't encode utf-8 no encoding is specified") - o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) - rawval = o - ret = msgpack_pack_raw(&self.pk, len(o)) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) - elif PyDict_Check(o): - d = o - ret = msgpack_pack_map(&self.pk, len(d)) - if ret == 0: - for k,v in d.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif PySequence_Check(o): - ret = msgpack_pack_array(&self.pk, len(o)) - if ret == 0: - for v in o: - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif self._default: - o = self._default(o) - ret = self._pack(o, nest_limit-1) - else: - raise TypeError("can't serialize %r" % (o,)) - return ret - - def pack(self, object obj): - cdef int ret - ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) - if ret: - raise TypeError - buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - self.pk.length = 0 - return buf - - -def pack(object o, object stream, default=None, encoding='utf-8', unicode_errors='strict'): - """ - pack an object `o` and write it to stream).""" - packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors) - stream.write(packer.pack(o)) - -def packb(object o, default=None, encoding='utf-8', unicode_errors='strict'): - """ - pack o and return packed bytes.""" - packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors) - return packer.pack(o) - - -cdef extern from "unpack.h": - ctypedef struct msgpack_user: - int use_list - PyObject* object_hook - PyObject* list_hook - char *encoding - char *unicode_errors - - ctypedef struct template_context: - msgpack_user user - PyObject* obj - size_t count - unsigned int ct - PyObject* key - - int template_execute(template_context* ctx, const_char_ptr data, - size_t len, size_t* off) except -1 - void template_init(template_context* ctx) - object template_data(template_context* ctx) - - -def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=0, encoding=None, unicode_errors="strict"): - """ - Unpack packed_bytes to object. Returns an unpacked object.""" - cdef template_context ctx - cdef size_t off = 0 - cdef int ret - - cdef char* buf - cdef Py_ssize_t buf_len - PyObject_AsReadBuffer(packed, &buf, &buf_len) - - if encoding is None: - enc = NULL - err = NULL - else: - if isinstance(encoding, unicode): - bencoding = encoding.encode('ascii') - else: - bencoding = encoding - if isinstance(unicode_errors, unicode): - berrors = unicode_errors.encode('ascii') - else: - berrors = unicode_errors - enc = PyBytes_AsString(bencoding) - err = PyBytes_AsString(berrors) - - template_init(&ctx) - ctx.user.use_list = use_list - ctx.user.object_hook = ctx.user.list_hook = NULL - ctx.user.encoding = enc - ctx.user.unicode_errors = err - if object_hook is not None: - if not PyCallable_Check(object_hook): - raise TypeError("object_hook must be a callable.") - ctx.user.object_hook = object_hook - if list_hook is not None: - if not PyCallable_Check(list_hook): - raise TypeError("list_hook must be a callable.") - ctx.user.list_hook = list_hook - _gc_disable() - try: - ret = template_execute(&ctx, buf, buf_len, &off) - finally: - _gc_enable() - if ret == 1: - return template_data(&ctx) - else: - return None - - -def unpack(object stream, object object_hook=None, object list_hook=None, bint use_list=0, encoding=None, unicode_errors="strict"): - """ - unpack an object from stream. - """ - return unpackb(stream.read(), use_list=use_list, - object_hook=object_hook, list_hook=list_hook, encoding=encoding, unicode_errors=unicode_errors) - -cdef class Unpacker(object): - """ - Streaming unpacker. - read_size is used like file_like.read(read_size) - - `file_like` is a file-like object having `.read(n)` method. - When `Unpacker` initialized with `file_like`, unpacker reads serialized data - from it and `.feed()` method is not usable. - - `read_size` is used as `file_like.read(read_size)`. (default: 1M) - - If `use_list` is true, msgpack list is deserialized to Python list. - Otherwise, it is deserialized to Python tuple. (default: False) - - `object_hook` is same to simplejson. If it is not None, it should be callable - and Unpacker calls it when deserializing key-value. - - `encoding` is encoding used for decoding msgpack bytes. If it is None (default), - msgpack bytes is deserialized to Python bytes. - - `unicode_errors` is used for decoding bytes. - - example:: - - unpacker = Unpacker() - while 1: - buf = astream.read() - unpacker.feed(buf) - for o in unpacker: - do_something(o) - """ - cdef template_context ctx - cdef char* buf - cdef size_t buf_size, buf_head, buf_tail - cdef object file_like - cdef object file_like_read - cdef Py_ssize_t read_size - cdef bint use_list - cdef object object_hook - cdef object _bencoding - cdef object _berrors - cdef char *encoding - cdef char *unicode_errors - - def __cinit__(self): - self.buf = NULL - - def __dealloc__(self): - free(self.buf) - self.buf = NULL - - def __init__(self, file_like=None, Py_ssize_t read_size=1024*1024, bint use_list=0, - object object_hook=None, object list_hook=None, - encoding=None, unicode_errors='strict'): - self.use_list = use_list - self.file_like = file_like - if file_like: - self.file_like_read = file_like.read - if not PyCallable_Check(self.file_like_read): - raise ValueError("`file_like.read` must be a callable.") - self.read_size = read_size - self.buf = malloc(read_size) - if self.buf == NULL: - raise MemoryError("Unable to allocate internal buffer.") - self.buf_size = read_size - self.buf_head = 0 - self.buf_tail = 0 - template_init(&self.ctx) - self.ctx.user.use_list = use_list - self.ctx.user.object_hook = self.ctx.user.list_hook = NULL - if object_hook is not None: - if not PyCallable_Check(object_hook): - raise TypeError("object_hook must be a callable.") - self.ctx.user.object_hook = object_hook - if list_hook is not None: - if not PyCallable_Check(list_hook): - raise TypeError("list_hook must be a callable.") - self.ctx.user.list_hook = list_hook - if encoding is None: - self.ctx.user.encoding = NULL - self.ctx.user.unicode_errors = NULL - else: - if isinstance(encoding, unicode): - self._bencoding = encoding.encode('ascii') - else: - self._bencoding = encoding - self.ctx.user.encoding = PyBytes_AsString(self._bencoding) - if isinstance(unicode_errors, unicode): - self._berrors = unicode_errors.encode('ascii') - else: - self._berrors = unicode_errors - self.ctx.user.unicode_errors = PyBytes_AsString(self._berrors) - - def feed(self, object next_bytes): - cdef char* buf - cdef Py_ssize_t buf_len - if self.file_like is not None: - raise AssertionError( - "unpacker.feed() is not be able to use with`file_like`.") - PyObject_AsReadBuffer(next_bytes, &buf, &buf_len) - self.append_buffer(buf, buf_len) - - cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): - cdef: - char* buf = self.buf - size_t head = self.buf_head - size_t tail = self.buf_tail - size_t buf_size = self.buf_size - size_t new_size - - if tail + _buf_len > buf_size: - if ((tail - head) + _buf_len)*2 < buf_size: - # move to front. - memmove(buf, buf + head, tail - head) - tail -= head - head = 0 - else: - # expand buffer. - new_size = tail + _buf_len - if new_size < buf_size*2: - new_size = buf_size*2 - buf = realloc(buf, new_size) - if buf == NULL: - # self.buf still holds old buffer and will be freed during - # obj destruction - raise MemoryError("Unable to enlarge internal buffer.") - buf_size = new_size - - memcpy(buf + tail, (_buf), _buf_len) - self.buf = buf - self.buf_head = head - self.buf_size = buf_size - self.buf_tail = tail + _buf_len - - # prepare self.buf from file_like - cdef fill_buffer(self): - if self.file_like is not None: - next_bytes = self.file_like_read(self.read_size) - if next_bytes: - self.append_buffer(PyBytes_AsString(next_bytes), - PyBytes_Size(next_bytes)) - else: - self.file_like = None - - cpdef unpack(self): - """unpack one object""" - cdef int ret - while 1: - _gc_disable() - ret = template_execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) - _gc_enable() - if ret == 1: - o = template_data(&self.ctx) - template_init(&self.ctx) - return o - elif ret == 0: - if self.file_like is not None: - self.fill_buffer() - continue - raise StopIteration("No more unpack data.") - else: - raise ValueError("Unpack failed: error = %d" % (ret,)) - - def __iter__(self): - return self - - def __next__(self): - return self.unpack() - - # for debug. - #def _buf(self): - # return PyString_FromStringAndSize(self.buf, self.buf_tail) - - #def _off(self): - # return self.buf_head diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx new file mode 100644 index 00000000..94d1462c --- /dev/null +++ b/msgpack/_packer.pyx @@ -0,0 +1,364 @@ +from cpython cimport * +from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact +from cpython.datetime cimport ( + PyDateTime_CheckExact, PyDelta_CheckExact, + datetime_tzinfo, timedelta_days, timedelta_seconds, timedelta_microseconds, +) + +cdef ExtType +cdef Timestamp + +from .ext import ExtType, Timestamp + + +cdef extern from "Python.h": + + int PyMemoryView_Check(object obj) + +cdef extern from "pack.h": + struct msgpack_packer: + char* buf + size_t length + size_t buf_size + bint use_bin_type + + int msgpack_pack_nil(msgpack_packer* pk) except -1 + int msgpack_pack_true(msgpack_packer* pk) except -1 + int msgpack_pack_false(msgpack_packer* pk) except -1 + int msgpack_pack_long_long(msgpack_packer* pk, long long d) except -1 + int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) except -1 + int msgpack_pack_float(msgpack_packer* pk, float d) except -1 + int msgpack_pack_double(msgpack_packer* pk, double d) except -1 + int msgpack_pack_array(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_map(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_raw(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_bin(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) except -1 + int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) except -1 + int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds) except -1 + + +cdef int DEFAULT_RECURSE_LIMIT=511 +cdef long long ITEM_LIMIT = (2**32)-1 + + +cdef inline int PyBytesLike_Check(object o): + return PyBytes_Check(o) or PyByteArray_Check(o) + + +cdef inline int PyBytesLike_CheckExact(object o): + return PyBytes_CheckExact(o) or PyByteArray_CheckExact(o) + + +cdef class Packer: + """ + MessagePack Packer + + Usage:: + + packer = Packer() + astream.write(packer.pack(a)) + astream.write(packer.pack(b)) + + Packer's constructor has some keyword arguments: + + :param default: + When specified, it should be callable. + Convert user type to builtin type that Packer supports. + See also simplejson's document. + + :param bool use_single_float: + Use single precision float type for float. (default: False) + + :param bool autoreset: + Reset buffer after each pack and return its content as `bytes`. (default: True). + If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. + + :param bool use_bin_type: + Use bin type introduced in msgpack spec 2.0 for bytes. + It also enables str8 type for unicode. (default: True) + + :param bool strict_types: + If set to true, types will be checked to be exact. Derived classes + from serializeable types will not be serialized and will be + treated as unsupported type and forwarded to default. + Additionally tuples will not be serialized as lists. + This is useful when trying to implement accurate serialization + for python types. + + :param bool datetime: + If set to true, datetime with tzinfo is packed into Timestamp type. + Note that the tzinfo is stripped in the timestamp. + You can get UTC datetime with `timestamp=3` option of the Unpacker. + + :param str unicode_errors: + The error handler for encoding unicode. (default: 'strict') + DO NOT USE THIS!! This option is kept for very specific usage. + + :param int buf_size: + The size of the internal buffer. (default: 256*1024) + Useful if serialisation size can be correctly estimated, + avoid unnecessary reallocations. + """ + cdef msgpack_packer pk + cdef object _default + cdef object _berrors + cdef const char *unicode_errors + cdef size_t exports # number of exported buffers + cdef bint strict_types + cdef bint use_float + cdef bint autoreset + cdef bint datetime + + def __cinit__(self, buf_size=256*1024, **_kwargs): + self.pk.buf = PyMem_Malloc(buf_size) + if self.pk.buf == NULL: + raise MemoryError("Unable to allocate internal buffer.") + self.pk.buf_size = buf_size + self.pk.length = 0 + self.exports = 0 + + def __dealloc__(self): + PyMem_Free(self.pk.buf) + self.pk.buf = NULL + assert self.exports == 0 + + cdef _check_exports(self): + if self.exports > 0: + raise BufferError("Existing exports of data: Packer cannot be changed") + + @cython.critical_section + def __init__(self, *, default=None, + bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, + bint strict_types=False, bint datetime=False, unicode_errors=None, + buf_size=256*1024): + self.use_float = use_single_float + self.strict_types = strict_types + self.autoreset = autoreset + self.datetime = datetime + self.pk.use_bin_type = use_bin_type + if default is not None: + if not PyCallable_Check(default): + raise TypeError("default must be a callable.") + self._default = default + + self._berrors = unicode_errors + if unicode_errors is None: + self.unicode_errors = NULL + else: + self.unicode_errors = self._berrors + + # returns -2 when default should(o) be called + cdef int _pack_inner(self, object o, bint will_default, int nest_limit) except -1: + cdef long long llval + cdef unsigned long long ullval + cdef unsigned long ulval + cdef const char* rawval + cdef Py_ssize_t L + cdef Py_buffer view + cdef bint strict = self.strict_types + + if o is None: + msgpack_pack_nil(&self.pk) + elif o is True: + msgpack_pack_true(&self.pk) + elif o is False: + msgpack_pack_false(&self.pk) + elif PyLong_CheckExact(o) if strict else PyLong_Check(o): + try: + if o > 0: + ullval = o + msgpack_pack_unsigned_long_long(&self.pk, ullval) + else: + llval = o + msgpack_pack_long_long(&self.pk, llval) + except OverflowError as oe: + if will_default: + return -2 + else: + raise OverflowError("Integer value out of range") + elif PyFloat_CheckExact(o) if strict else PyFloat_Check(o): + if self.use_float: + msgpack_pack_float(&self.pk, o) + else: + msgpack_pack_double(&self.pk, o) + elif PyBytesLike_CheckExact(o) if strict else PyBytesLike_Check(o): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) + rawval = o + msgpack_pack_bin(&self.pk, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyUnicode_CheckExact(o) if strict else PyUnicode_Check(o): + if self.unicode_errors == NULL: + rawval = PyUnicode_AsUTF8AndSize(o, &L) + if L >ITEM_LIMIT: + raise ValueError("unicode string is too large") + else: + o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) + L = Py_SIZE(o) + if L > ITEM_LIMIT: + raise ValueError("unicode string is too large") + rawval = o + msgpack_pack_raw(&self.pk, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyDict_CheckExact(o) if strict else PyDict_Check(o): + L = len(o) + if L > ITEM_LIMIT: + raise ValueError("dict is too large") + msgpack_pack_map(&self.pk, L) + for k, v in o.items(): + self._pack(k, nest_limit) + self._pack(v, nest_limit) + elif type(o) is ExtType if strict else isinstance(o, ExtType): + # This should be before Tuple because ExtType is namedtuple. + rawval = o.data + L = len(o.data) + if L > ITEM_LIMIT: + raise ValueError("EXT data is too large") + msgpack_pack_ext(&self.pk, o.code, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif type(o) is Timestamp: + llval = o.seconds + ulval = o.nanoseconds + msgpack_pack_timestamp(&self.pk, llval, ulval) + elif PyList_CheckExact(o) if strict else (PyTuple_Check(o) or PyList_Check(o)): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + raise ValueError("list is too large") + msgpack_pack_array(&self.pk, L) + for v in o: + self._pack(v, nest_limit) + elif PyMemoryView_Check(o): + PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) + L = view.len + if L > ITEM_LIMIT: + PyBuffer_Release(&view); + raise ValueError("memoryview is too large") + try: + msgpack_pack_bin(&self.pk, L) + msgpack_pack_raw_body(&self.pk, view.buf, L) + finally: + PyBuffer_Release(&view); + elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: + delta = o - epoch + if not PyDelta_CheckExact(delta): + raise ValueError("failed to calculate delta") + llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) + ulval = timedelta_microseconds(delta) * 1000 + msgpack_pack_timestamp(&self.pk, llval, ulval) + elif will_default: + return -2 + elif self.datetime and PyDateTime_CheckExact(o): + # this should be later than will_default + PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name) + else: + PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) + + cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + cdef int ret + if nest_limit < 0: + raise ValueError("recursion limit exceeded.") + nest_limit -= 1 + if self._default is not None: + ret = self._pack_inner(o, 1, nest_limit) + if ret == -2: + o = self._default(o) + else: + return ret + return self._pack_inner(o, 0, nest_limit) + + @cython.critical_section + def pack(self, object obj): + cdef int ret + self._check_exports() + try: + ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) + except: + self.pk.length = 0 + raise + if ret: # should not happen. + raise RuntimeError("internal error") + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + @cython.critical_section + def pack_ext_type(self, typecode, data): + self._check_exports() + if len(data) > ITEM_LIMIT: + raise ValueError("ext data too large") + msgpack_pack_ext(&self.pk, typecode, len(data)) + msgpack_pack_raw_body(&self.pk, data, len(data)) + + @cython.critical_section + def pack_array_header(self, long long size): + self._check_exports() + if size > ITEM_LIMIT: + raise ValueError("array too large") + msgpack_pack_array(&self.pk, size) + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + @cython.critical_section + def pack_map_header(self, long long size): + self._check_exports() + if size > ITEM_LIMIT: + raise ValueError("map too learge") + msgpack_pack_map(&self.pk, size) + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + @cython.critical_section + def pack_map_pairs(self, object pairs): + """ + Pack *pairs* as msgpack map type. + + *pairs* should be a sequence of pairs. + (`len(pairs)` and `for k, v in pairs:` should be supported.) + """ + self._check_exports() + size = len(pairs) + if size > ITEM_LIMIT: + raise ValueError("map too large") + msgpack_pack_map(&self.pk, size) + for k, v in pairs: + self._pack(k) + self._pack(v) + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + @cython.critical_section + def reset(self): + """Reset internal buffer. + + This method is useful only when autoreset=False. + """ + self._check_exports() + self.pk.length = 0 + + @cython.critical_section + def bytes(self): + """Return internal buffer contents as bytes object""" + return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + + def getbuffer(self): + """Return memoryview of internal buffer. + + Note: Packer now supports buffer protocol. You can use memoryview(packer). + """ + return memoryview(self) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + PyBuffer_FillInfo(buffer, self, self.pk.buf, self.pk.length, 1, flags) + self.exports += 1 + + def __releasebuffer__(self, Py_buffer *buffer): + self.exports -= 1 diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx new file mode 100644 index 00000000..f0cf96d7 --- /dev/null +++ b/msgpack/_unpacker.pyx @@ -0,0 +1,554 @@ +from cpython cimport * +cdef extern from "Python.h": + ctypedef struct PyObject + object PyMemoryView_GetContiguous(object obj, int buffertype, char order) + +from libc.stdlib cimport * +from libc.string cimport * +from libc.limits cimport * +from libc.stdint cimport uint64_t + +from .exceptions import ( + BufferFull, + OutOfData, + ExtraData, + FormatError, + StackError, +) +from .ext import ExtType, Timestamp + +cdef object giga = 1_000_000_000 + + +cdef extern from "unpack.h": + ctypedef struct msgpack_user: + bint use_list + bint raw + bint has_pairs_hook # call object_hook with k-v pairs + bint strict_map_key + int timestamp + PyObject* object_hook + PyObject* list_hook + PyObject* ext_hook + PyObject* timestamp_t + PyObject *giga; + PyObject *utc; + const char *unicode_errors + Py_ssize_t max_str_len + Py_ssize_t max_bin_len + Py_ssize_t max_array_len + Py_ssize_t max_map_len + Py_ssize_t max_ext_len + + ctypedef struct unpack_context: + msgpack_user user + PyObject* obj + Py_ssize_t count + + ctypedef int (*execute_fn)(unpack_context* ctx, const char* data, + Py_ssize_t len, Py_ssize_t* off) except? -1 + execute_fn unpack_construct + execute_fn unpack_skip + execute_fn read_array_header + execute_fn read_map_header + void unpack_init(unpack_context* ctx) + object unpack_data(unpack_context* ctx) + void unpack_clear(unpack_context* ctx) + +cdef inline init_ctx(unpack_context *ctx, + object object_hook, object object_pairs_hook, + object list_hook, object ext_hook, + bint use_list, bint raw, int timestamp, + bint strict_map_key, + const char* unicode_errors, + Py_ssize_t max_str_len, Py_ssize_t max_bin_len, + Py_ssize_t max_array_len, Py_ssize_t max_map_len, + Py_ssize_t max_ext_len): + unpack_init(ctx) + ctx.user.use_list = use_list + ctx.user.raw = raw + ctx.user.strict_map_key = strict_map_key + ctx.user.object_hook = ctx.user.list_hook = NULL + ctx.user.max_str_len = max_str_len + ctx.user.max_bin_len = max_bin_len + ctx.user.max_array_len = max_array_len + ctx.user.max_map_len = max_map_len + ctx.user.max_ext_len = max_ext_len + + if object_hook is not None and object_pairs_hook is not None: + raise TypeError("object_pairs_hook and object_hook are mutually exclusive.") + + if object_hook is not None: + if not PyCallable_Check(object_hook): + raise TypeError("object_hook must be a callable.") + ctx.user.object_hook = object_hook + + if object_pairs_hook is None: + ctx.user.has_pairs_hook = False + else: + if not PyCallable_Check(object_pairs_hook): + raise TypeError("object_pairs_hook must be a callable.") + ctx.user.object_hook = object_pairs_hook + ctx.user.has_pairs_hook = True + + if list_hook is not None: + if not PyCallable_Check(list_hook): + raise TypeError("list_hook must be a callable.") + ctx.user.list_hook = list_hook + + if ext_hook is not None: + if not PyCallable_Check(ext_hook): + raise TypeError("ext_hook must be a callable.") + ctx.user.ext_hook = ext_hook + + if timestamp < 0 or 3 < timestamp: + raise ValueError("timestamp must be 0..3") + + # Add Timestamp type to the user object so it may be used in unpack.h + ctx.user.timestamp = timestamp + ctx.user.timestamp_t = Timestamp + ctx.user.giga = giga + ctx.user.utc = utc + ctx.user.unicode_errors = unicode_errors + +def default_read_extended_type(typecode, data): + raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) + +cdef inline int get_data_from_buffer(object obj, + Py_buffer *view, + char **buf, + Py_ssize_t *buffer_len) except 0: + cdef object contiguous + cdef Py_buffer tmp + if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1: + raise + if view.itemsize != 1: + PyBuffer_Release(view) + raise BufferError("cannot unpack from multi-byte object") + if PyBuffer_IsContiguous(view, b'A') == 0: + PyBuffer_Release(view) + # create a contiguous copy and get buffer + contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, b'C') + PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE) + # view must hold the only reference to contiguous, + # so memory is freed when view is released + Py_DECREF(contiguous) + buffer_len[0] = view.len + buf[0] = view.buf + return 1 + + +def unpackb(object packed, *, object object_hook=None, object list_hook=None, + bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, + unicode_errors=None, + object_pairs_hook=None, ext_hook=ExtType, + Py_ssize_t max_str_len=-1, + Py_ssize_t max_bin_len=-1, + Py_ssize_t max_array_len=-1, + Py_ssize_t max_map_len=-1, + Py_ssize_t max_ext_len=-1): + """ + Unpack packed_bytes to object. Returns an unpacked object. + + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``ValueError`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. + + See :class:`Unpacker` for options. + + *max_xxx_len* options are configured automatically from ``len(packed)``. + """ + cdef unpack_context ctx + cdef Py_ssize_t off = 0 + cdef int ret + + cdef Py_buffer view + cdef char* buf = NULL + cdef Py_ssize_t buf_len + cdef const char* cerr = NULL + + if unicode_errors is not None: + cerr = unicode_errors + + get_data_from_buffer(packed, &view, &buf, &buf_len) + + if max_str_len == -1: + max_str_len = buf_len + if max_bin_len == -1: + max_bin_len = buf_len + if max_array_len == -1: + max_array_len = buf_len + if max_map_len == -1: + max_map_len = buf_len//2 + if max_ext_len == -1: + max_ext_len = buf_len + + try: + init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, + use_list, raw, timestamp, strict_map_key, cerr, + max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) + ret = unpack_construct(&ctx, buf, buf_len, &off) + finally: + PyBuffer_Release(&view); + + if ret == 1: + obj = unpack_data(&ctx) + if off < buf_len: + raise ExtraData(obj, PyBytes_FromStringAndSize(buf+off, buf_len-off)) + return obj + unpack_clear(&ctx) + if ret == 0: + raise ValueError("Unpack failed: incomplete input") + elif ret == -2: + raise FormatError + elif ret == -3: + raise StackError + raise ValueError("Unpack failed: error = %d" % (ret,)) + + +cdef class Unpacker: + """Streaming unpacker. + + Arguments: + + :param file_like: + File-like object having `.read(n)` method. + If specified, unpacker reads serialized data from it and `.feed()` is not usable. + + :param int read_size: + Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) + + :param bool use_list: + If true, unpack msgpack array to Python list. + Otherwise, unpack to Python tuple. (default: True) + + :param bool raw: + If true, unpack msgpack raw to Python bytes. + Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). + + :param int timestamp: + Control how timestamp type is unpacked: + + 0 - Timestamp + 1 - float (Seconds from the EPOCH) + 2 - int (Nanoseconds from the EPOCH) + 3 - datetime.datetime (UTC). + + :param bool strict_map_key: + If true (default), only str or bytes are accepted for map (dict) keys. + + :param object_hook: + When specified, it should be callable. + Unpacker calls it with a dict argument after unpacking msgpack map. + (See also simplejson) + + :param object_pairs_hook: + When specified, it should be callable. + Unpacker calls it with a list of key-value pairs after unpacking msgpack map. + (See also simplejson) + + :param str unicode_errors: + The error handler for decoding unicode. (default: 'strict') + This option should be used only when you have msgpack data which + contains invalid UTF-8 string. + + :param int max_buffer_size: + Limits size of data waiting unpacked. 0 means 2**32-1. + The default value is 100*1024*1024 (100MiB). + Raises `BufferFull` exception when it is insufficient. + You should set this parameter when unpacking data from untrusted source. + + :param int max_str_len: + Deprecated, use *max_buffer_size* instead. + Limits max length of str. (default: max_buffer_size) + + :param int max_bin_len: + Deprecated, use *max_buffer_size* instead. + Limits max length of bin. (default: max_buffer_size) + + :param int max_array_len: + Limits max length of array. + (default: max_buffer_size) + + :param int max_map_len: + Limits max length of map. + (default: max_buffer_size//2) + + :param int max_ext_len: + Deprecated, use *max_buffer_size* instead. + Limits max size of ext type. (default: max_buffer_size) + + Example of streaming deserialize from file-like object:: + + unpacker = Unpacker(file_like) + for o in unpacker: + process(o) + + Example of streaming deserialize from socket:: + + unpacker = Unpacker() + while True: + buf = sock.recv(1024**2) + if not buf: + break + unpacker.feed(buf) + for o in unpacker: + process(o) + + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``OutOfData`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. + """ + cdef unpack_context ctx + cdef char* buf + cdef Py_ssize_t buf_size, buf_head, buf_tail + cdef object file_like + cdef object file_like_read + cdef Py_ssize_t read_size + # To maintain refcnt. + cdef object object_hook, object_pairs_hook, list_hook, ext_hook + cdef object unicode_errors + cdef Py_ssize_t max_buffer_size + cdef uint64_t stream_offset + + def __cinit__(self): + self.buf = NULL + + def __dealloc__(self): + PyMem_Free(self.buf) + self.buf = NULL + + @cython.critical_section + def __init__(self, file_like=None, *, Py_ssize_t read_size=0, + bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, + object object_hook=None, object object_pairs_hook=None, object list_hook=None, + unicode_errors=None, Py_ssize_t max_buffer_size=100*1024*1024, + object ext_hook=ExtType, + Py_ssize_t max_str_len=-1, + Py_ssize_t max_bin_len=-1, + Py_ssize_t max_array_len=-1, + Py_ssize_t max_map_len=-1, + Py_ssize_t max_ext_len=-1): + cdef const char *cerr=NULL + + self.object_hook = object_hook + self.object_pairs_hook = object_pairs_hook + self.list_hook = list_hook + self.ext_hook = ext_hook + + self.file_like = file_like + if file_like: + self.file_like_read = file_like.read + if not PyCallable_Check(self.file_like_read): + raise TypeError("`file_like.read` must be a callable.") + + if not max_buffer_size: + max_buffer_size = INT_MAX + if max_str_len == -1: + max_str_len = max_buffer_size + if max_bin_len == -1: + max_bin_len = max_buffer_size + if max_array_len == -1: + max_array_len = max_buffer_size + if max_map_len == -1: + max_map_len = max_buffer_size//2 + if max_ext_len == -1: + max_ext_len = max_buffer_size + + if read_size > max_buffer_size: + raise ValueError("read_size should be less or equal to max_buffer_size") + if not read_size: + read_size = min(max_buffer_size, 1024**2) + + self.max_buffer_size = max_buffer_size + self.read_size = read_size + self.buf = PyMem_Malloc(read_size) + if self.buf == NULL: + raise MemoryError("Unable to allocate internal buffer.") + self.buf_size = read_size + self.buf_head = 0 + self.buf_tail = 0 + self.stream_offset = 0 + + if unicode_errors is not None: + self.unicode_errors = unicode_errors + cerr = unicode_errors + + init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, + ext_hook, use_list, raw, timestamp, strict_map_key, cerr, + max_str_len, max_bin_len, max_array_len, + max_map_len, max_ext_len) + + @cython.critical_section + def feed(self, object next_bytes): + """Append `next_bytes` to internal buffer.""" + cdef Py_buffer pybuff + cdef char* buf + cdef Py_ssize_t buf_len + + if self.file_like is not None: + raise AssertionError( + "unpacker.feed() is not be able to use with `file_like`.") + + get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len) + try: + self.append_buffer(buf, buf_len) + finally: + PyBuffer_Release(&pybuff) + + cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): + cdef: + char* buf = self.buf + char* new_buf + Py_ssize_t head = self.buf_head + Py_ssize_t tail = self.buf_tail + Py_ssize_t buf_size = self.buf_size + Py_ssize_t new_size + + if tail + _buf_len > buf_size: + if ((tail - head) + _buf_len) <= buf_size: + # move to front. + memmove(buf, buf + head, tail - head) + tail -= head + head = 0 + else: + # expand buffer. + new_size = (tail-head) + _buf_len + if new_size > self.max_buffer_size: + raise BufferFull + new_size = min(new_size*2, self.max_buffer_size) + new_buf = PyMem_Malloc(new_size) + if new_buf == NULL: + # self.buf still holds old buffer and will be freed during + # obj destruction + raise MemoryError("Unable to enlarge internal buffer.") + memcpy(new_buf, buf + head, tail - head) + PyMem_Free(buf) + + buf = new_buf + buf_size = new_size + tail -= head + head = 0 + + memcpy(buf + tail, (_buf), _buf_len) + self.buf = buf + self.buf_head = head + self.buf_size = buf_size + self.buf_tail = tail + _buf_len + + cdef int read_from_file(self) except -1: + cdef Py_ssize_t remains = self.max_buffer_size - (self.buf_tail - self.buf_head) + if remains <= 0: + raise BufferFull + + next_bytes = self.file_like_read(min(self.read_size, remains)) + if next_bytes: + self.append_buffer(PyBytes_AsString(next_bytes), PyBytes_Size(next_bytes)) + else: + self.file_like = None + return 0 + + cdef object _unpack(self, execute_fn execute, bint iter=0): + cdef int ret + cdef object obj + cdef Py_ssize_t prev_head + + while 1: + prev_head = self.buf_head + if prev_head < self.buf_tail: + ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + self.stream_offset += self.buf_head - prev_head + else: + ret = 0 + + if ret == 1: + obj = unpack_data(&self.ctx) + unpack_init(&self.ctx) + return obj + elif ret == 0: + if self.file_like is not None: + self.read_from_file() + continue + if iter: + raise StopIteration("No more data to unpack.") + else: + raise OutOfData("No more data to unpack.") + elif ret == -2: + raise FormatError + elif ret == -3: + raise StackError + else: + raise ValueError("Unpack failed: error = %d" % (ret,)) + + @cython.critical_section + def read_bytes(self, Py_ssize_t nbytes): + """Read a specified number of raw bytes from the stream""" + cdef Py_ssize_t nread + nread = min(self.buf_tail - self.buf_head, nbytes) + ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread) + self.buf_head += nread + if nread < nbytes and self.file_like is not None: + ret += self.file_like.read(nbytes - nread) + nread = len(ret) + self.stream_offset += nread + return ret + + @cython.critical_section + def unpack(self): + """Unpack one object + + Raises `OutOfData` when there are no more bytes to unpack. + """ + return self._unpack(unpack_construct) + + @cython.critical_section + def skip(self): + """Read and ignore one object, returning None + + Raises `OutOfData` when there are no more bytes to unpack. + """ + return self._unpack(unpack_skip) + + @cython.critical_section + def read_array_header(self): + """assuming the next object is an array, return its size n, such that + the next n unpack() calls will iterate over its contents. + + Raises `OutOfData` when there are no more bytes to unpack. + """ + return self._unpack(read_array_header) + + @cython.critical_section + def read_map_header(self): + """assuming the next object is a map, return its size n, such that the + next n * 2 unpack() calls will iterate over its key-value pairs. + + Raises `OutOfData` when there are no more bytes to unpack. + """ + return self._unpack(read_map_header) + + @cython.critical_section + def tell(self): + """Returns the current position of the Unpacker in bytes, i.e., the + number of bytes that were read from the input, also the starting + position of the next object. + """ + return self.stream_offset + + def __iter__(self): + return self + + @cython.critical_section + def __next__(self): + return self._unpack(unpack_construct, 1) + + # for debug. + #def _buf(self): + # return PyString_FromStringAndSize(self.buf, self.buf_tail) + + #def _off(self): + # return self.buf_head diff --git a/msgpack/exceptions.py b/msgpack/exceptions.py new file mode 100644 index 00000000..d6d2615c --- /dev/null +++ b/msgpack/exceptions.py @@ -0,0 +1,48 @@ +class UnpackException(Exception): + """Base class for some exceptions raised while unpacking. + + NOTE: unpack may raise exception other than subclass of + UnpackException. If you want to catch all error, catch + Exception instead. + """ + + +class BufferFull(UnpackException): + pass + + +class OutOfData(UnpackException): + pass + + +class FormatError(ValueError, UnpackException): + """Invalid msgpack format""" + + +class StackError(ValueError, UnpackException): + """Too nested""" + + +# Deprecated. Use ValueError instead +UnpackValueError = ValueError + + +class ExtraData(UnpackValueError): + """ExtraData is raised when there is trailing data. + + This exception is raised while only one-shot (not streaming) + unpack. + """ + + def __init__(self, unpacked, extra): + self.unpacked = unpacked + self.extra = extra + + def __str__(self): + return "unpack(b) received extra data." + + +# Deprecated. Use Exception instead to catch all exception during packing. +PackException = Exception +PackValueError = ValueError +PackOverflowError = OverflowError diff --git a/msgpack/ext.py b/msgpack/ext.py new file mode 100644 index 00000000..9694819a --- /dev/null +++ b/msgpack/ext.py @@ -0,0 +1,170 @@ +import datetime +import struct +from collections import namedtuple + + +class ExtType(namedtuple("ExtType", "code data")): + """ExtType represents ext type in msgpack.""" + + def __new__(cls, code, data): + if not isinstance(code, int): + raise TypeError("code must be int") + if not isinstance(data, bytes): + raise TypeError("data must be bytes") + if not 0 <= code <= 127: + raise ValueError("code must be 0~127") + return super().__new__(cls, code, data) + + +class Timestamp: + """Timestamp represents the Timestamp extension type in msgpack. + + When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. + When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and + unpack `Timestamp`. + + This class is immutable: Do not override seconds and nanoseconds. + """ + + __slots__ = ["seconds", "nanoseconds"] + + def __init__(self, seconds, nanoseconds=0): + """Initialize a Timestamp object. + + :param int seconds: + Number of seconds since the UNIX epoch (00:00:00 UTC Jan 1 1970, minus leap seconds). + May be negative. + + :param int nanoseconds: + Number of nanoseconds to add to `seconds` to get fractional time. + Maximum is 999_999_999. Default is 0. + + Note: Negative times (before the UNIX epoch) are represented as neg. seconds + pos. ns. + """ + if not isinstance(seconds, int): + raise TypeError("seconds must be an integer") + if not isinstance(nanoseconds, int): + raise TypeError("nanoseconds must be an integer") + if not (0 <= nanoseconds < 10**9): + raise ValueError("nanoseconds must be a non-negative integer less than 999999999.") + self.seconds = seconds + self.nanoseconds = nanoseconds + + def __repr__(self): + """String representation of Timestamp.""" + return f"Timestamp(seconds={self.seconds}, nanoseconds={self.nanoseconds})" + + def __eq__(self, other): + """Check for equality with another Timestamp object""" + if type(other) is self.__class__: + return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds + return False + + def __ne__(self, other): + """not-equals method (see :func:`__eq__()`)""" + return not self.__eq__(other) + + def __hash__(self): + return hash((self.seconds, self.nanoseconds)) + + @staticmethod + def from_bytes(b): + """Unpack bytes into a `Timestamp` object. + + Used for pure-Python msgpack unpacking. + + :param b: Payload from msgpack ext message with code -1 + :type b: bytes + + :returns: Timestamp object unpacked from msgpack ext payload + :rtype: Timestamp + """ + if len(b) == 4: + seconds = struct.unpack("!L", b)[0] + nanoseconds = 0 + elif len(b) == 8: + data64 = struct.unpack("!Q", b)[0] + seconds = data64 & 0x00000003FFFFFFFF + nanoseconds = data64 >> 34 + elif len(b) == 12: + nanoseconds, seconds = struct.unpack("!Iq", b) + else: + raise ValueError( + "Timestamp type can only be created from 32, 64, or 96-bit byte objects" + ) + return Timestamp(seconds, nanoseconds) + + def to_bytes(self): + """Pack this Timestamp object into bytes. + + Used for pure-Python msgpack packing. + + :returns data: Payload for EXT message with code -1 (timestamp type) + :rtype: bytes + """ + if (self.seconds >> 34) == 0: # seconds is non-negative and fits in 34 bits + data64 = self.nanoseconds << 34 | self.seconds + if data64 & 0xFFFFFFFF00000000 == 0: + # nanoseconds is zero and seconds < 2**32, so timestamp 32 + data = struct.pack("!L", data64) + else: + # timestamp 64 + data = struct.pack("!Q", data64) + else: + # timestamp 96 + data = struct.pack("!Iq", self.nanoseconds, self.seconds) + return data + + @staticmethod + def from_unix(unix_sec): + """Create a Timestamp from posix timestamp in seconds. + + :param unix_float: Posix timestamp in seconds. + :type unix_float: int or float + """ + seconds = int(unix_sec // 1) + nanoseconds = int((unix_sec % 1) * 10**9) + return Timestamp(seconds, nanoseconds) + + def to_unix(self): + """Get the timestamp as a floating-point value. + + :returns: posix timestamp + :rtype: float + """ + return self.seconds + self.nanoseconds / 1e9 + + @staticmethod + def from_unix_nano(unix_ns): + """Create a Timestamp from posix timestamp in nanoseconds. + + :param int unix_ns: Posix timestamp in nanoseconds. + :rtype: Timestamp + """ + return Timestamp(*divmod(unix_ns, 10**9)) + + def to_unix_nano(self): + """Get the timestamp as a unixtime in nanoseconds. + + :returns: posix timestamp in nanoseconds + :rtype: int + """ + return self.seconds * 10**9 + self.nanoseconds + + def to_datetime(self): + """Get the timestamp as a UTC datetime. + + :rtype: `datetime.datetime` + """ + utc = datetime.timezone.utc + return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta( + seconds=self.seconds, microseconds=self.nanoseconds // 1000 + ) + + @staticmethod + def from_datetime(dt): + """Create a Timestamp from datetime with tzinfo. + + :rtype: Timestamp + """ + return Timestamp(seconds=int(dt.timestamp()), nanoseconds=dt.microsecond * 1000) diff --git a/msgpack/fallback.py b/msgpack/fallback.py new file mode 100644 index 00000000..b02e47cf --- /dev/null +++ b/msgpack/fallback.py @@ -0,0 +1,929 @@ +"""Fallback pure Python implementation of msgpack""" + +import struct +import sys +from datetime import datetime as _DateTime + +if hasattr(sys, "pypy_version_info"): + from __pypy__ import newlist_hint + from __pypy__.builders import BytesBuilder + + _USING_STRINGBUILDER = True + + class BytesIO: + def __init__(self, s=b""): + if s: + self.builder = BytesBuilder(len(s)) + self.builder.append(s) + else: + self.builder = BytesBuilder() + + def write(self, s): + if isinstance(s, memoryview): + s = s.tobytes() + elif isinstance(s, bytearray): + s = bytes(s) + self.builder.append(s) + + def getvalue(self): + return self.builder.build() + +else: + from io import BytesIO + + _USING_STRINGBUILDER = False + + def newlist_hint(size): + return [] + + +from .exceptions import BufferFull, ExtraData, FormatError, OutOfData, StackError +from .ext import ExtType, Timestamp + +EX_SKIP = 0 +EX_CONSTRUCT = 1 +EX_READ_ARRAY_HEADER = 2 +EX_READ_MAP_HEADER = 3 + +TYPE_IMMEDIATE = 0 +TYPE_ARRAY = 1 +TYPE_MAP = 2 +TYPE_RAW = 3 +TYPE_BIN = 4 +TYPE_EXT = 5 + +DEFAULT_RECURSE_LIMIT = 511 + + +def _check_type_strict(obj, t, type=type, tuple=tuple): + if type(t) is tuple: + return type(obj) in t + else: + return type(obj) is t + + +def _get_data_from_buffer(obj): + view = memoryview(obj) + if view.itemsize != 1: + raise ValueError("cannot unpack from multi-byte object") + return view + + +def unpackb(packed, **kwargs): + """ + Unpack an object from `packed`. + + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``ValueError`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. + + See :class:`Unpacker` for options. + """ + unpacker = Unpacker(None, max_buffer_size=len(packed), **kwargs) + unpacker.feed(packed) + try: + ret = unpacker._unpack() + except OutOfData: + raise ValueError("Unpack failed: incomplete input") + except RecursionError: + raise StackError + if unpacker._got_extradata(): + raise ExtraData(ret, unpacker._get_extradata()) + return ret + + +_NO_FORMAT_USED = "" +_MSGPACK_HEADERS = { + 0xC4: (1, _NO_FORMAT_USED, TYPE_BIN), + 0xC5: (2, ">H", TYPE_BIN), + 0xC6: (4, ">I", TYPE_BIN), + 0xC7: (2, "Bb", TYPE_EXT), + 0xC8: (3, ">Hb", TYPE_EXT), + 0xC9: (5, ">Ib", TYPE_EXT), + 0xCA: (4, ">f"), + 0xCB: (8, ">d"), + 0xCC: (1, _NO_FORMAT_USED), + 0xCD: (2, ">H"), + 0xCE: (4, ">I"), + 0xCF: (8, ">Q"), + 0xD0: (1, "b"), + 0xD1: (2, ">h"), + 0xD2: (4, ">i"), + 0xD3: (8, ">q"), + 0xD4: (1, "b1s", TYPE_EXT), + 0xD5: (2, "b2s", TYPE_EXT), + 0xD6: (4, "b4s", TYPE_EXT), + 0xD7: (8, "b8s", TYPE_EXT), + 0xD8: (16, "b16s", TYPE_EXT), + 0xD9: (1, _NO_FORMAT_USED, TYPE_RAW), + 0xDA: (2, ">H", TYPE_RAW), + 0xDB: (4, ">I", TYPE_RAW), + 0xDC: (2, ">H", TYPE_ARRAY), + 0xDD: (4, ">I", TYPE_ARRAY), + 0xDE: (2, ">H", TYPE_MAP), + 0xDF: (4, ">I", TYPE_MAP), +} + + +class Unpacker: + """Streaming unpacker. + + Arguments: + + :param file_like: + File-like object having `.read(n)` method. + If specified, unpacker reads serialized data from it and `.feed()` is not usable. + + :param int read_size: + Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) + + :param bool use_list: + If true, unpack msgpack array to Python list. + Otherwise, unpack to Python tuple. (default: True) + + :param bool raw: + If true, unpack msgpack raw to Python bytes. + Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). + + :param int timestamp: + Control how timestamp type is unpacked: + + 0 - Timestamp + 1 - float (Seconds from the EPOCH) + 2 - int (Nanoseconds from the EPOCH) + 3 - datetime.datetime (UTC). + + :param bool strict_map_key: + If true (default), only str or bytes are accepted for map (dict) keys. + + :param object_hook: + When specified, it should be callable. + Unpacker calls it with a dict argument after unpacking msgpack map. + (See also simplejson) + + :param object_pairs_hook: + When specified, it should be callable. + Unpacker calls it with a list of key-value pairs after unpacking msgpack map. + (See also simplejson) + + :param str unicode_errors: + The error handler for decoding unicode. (default: 'strict') + This option should be used only when you have msgpack data which + contains invalid UTF-8 string. + + :param int max_buffer_size: + Limits size of data waiting unpacked. 0 means 2**32-1. + The default value is 100*1024*1024 (100MiB). + Raises `BufferFull` exception when it is insufficient. + You should set this parameter when unpacking data from untrusted source. + + :param int max_str_len: + Deprecated, use *max_buffer_size* instead. + Limits max length of str. (default: max_buffer_size) + + :param int max_bin_len: + Deprecated, use *max_buffer_size* instead. + Limits max length of bin. (default: max_buffer_size) + + :param int max_array_len: + Limits max length of array. + (default: max_buffer_size) + + :param int max_map_len: + Limits max length of map. + (default: max_buffer_size//2) + + :param int max_ext_len: + Deprecated, use *max_buffer_size* instead. + Limits max size of ext type. (default: max_buffer_size) + + Example of streaming deserialize from file-like object:: + + unpacker = Unpacker(file_like) + for o in unpacker: + process(o) + + Example of streaming deserialize from socket:: + + unpacker = Unpacker() + while True: + buf = sock.recv(1024**2) + if not buf: + break + unpacker.feed(buf) + for o in unpacker: + process(o) + + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``OutOfData`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. + """ + + def __init__( + self, + file_like=None, + *, + read_size=0, + use_list=True, + raw=False, + timestamp=0, + strict_map_key=True, + object_hook=None, + object_pairs_hook=None, + list_hook=None, + unicode_errors=None, + max_buffer_size=100 * 1024 * 1024, + ext_hook=ExtType, + max_str_len=-1, + max_bin_len=-1, + max_array_len=-1, + max_map_len=-1, + max_ext_len=-1, + ): + if unicode_errors is None: + unicode_errors = "strict" + + if file_like is None: + self._feeding = True + else: + if not callable(file_like.read): + raise TypeError("`file_like.read` must be callable") + self.file_like = file_like + self._feeding = False + + #: array of bytes fed. + self._buffer = bytearray() + #: Which position we currently reads + self._buff_i = 0 + + # When Unpacker is used as an iterable, between the calls to next(), + # the buffer is not "consumed" completely, for efficiency sake. + # Instead, it is done sloppily. To make sure we raise BufferFull at + # the correct moments, we have to keep track of how sloppy we were. + # Furthermore, when the buffer is incomplete (that is: in the case + # we raise an OutOfData) we need to rollback the buffer to the correct + # state, which _buf_checkpoint records. + self._buf_checkpoint = 0 + + if not max_buffer_size: + max_buffer_size = 2**31 - 1 + if max_str_len == -1: + max_str_len = max_buffer_size + if max_bin_len == -1: + max_bin_len = max_buffer_size + if max_array_len == -1: + max_array_len = max_buffer_size + if max_map_len == -1: + max_map_len = max_buffer_size // 2 + if max_ext_len == -1: + max_ext_len = max_buffer_size + + self._max_buffer_size = max_buffer_size + if read_size > self._max_buffer_size: + raise ValueError("read_size must be smaller than max_buffer_size") + self._read_size = read_size or min(self._max_buffer_size, 16 * 1024) + self._raw = bool(raw) + self._strict_map_key = bool(strict_map_key) + self._unicode_errors = unicode_errors + self._use_list = use_list + if not (0 <= timestamp <= 3): + raise ValueError("timestamp must be 0..3") + self._timestamp = timestamp + self._list_hook = list_hook + self._object_hook = object_hook + self._object_pairs_hook = object_pairs_hook + self._ext_hook = ext_hook + self._max_str_len = max_str_len + self._max_bin_len = max_bin_len + self._max_array_len = max_array_len + self._max_map_len = max_map_len + self._max_ext_len = max_ext_len + self._stream_offset = 0 + + if list_hook is not None and not callable(list_hook): + raise TypeError("`list_hook` is not callable") + if object_hook is not None and not callable(object_hook): + raise TypeError("`object_hook` is not callable") + if object_pairs_hook is not None and not callable(object_pairs_hook): + raise TypeError("`object_pairs_hook` is not callable") + if object_hook is not None and object_pairs_hook is not None: + raise TypeError("object_pairs_hook and object_hook are mutually exclusive") + if not callable(ext_hook): + raise TypeError("`ext_hook` is not callable") + + def feed(self, next_bytes): + assert self._feeding + view = _get_data_from_buffer(next_bytes) + if len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size: + raise BufferFull + + # Strip buffer before checkpoint before reading file. + if self._buf_checkpoint > 0: + del self._buffer[: self._buf_checkpoint] + self._buff_i -= self._buf_checkpoint + self._buf_checkpoint = 0 + + # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython + self._buffer.extend(view) + view.release() + + def _consume(self): + """Gets rid of the used parts of the buffer.""" + self._stream_offset += self._buff_i - self._buf_checkpoint + self._buf_checkpoint = self._buff_i + + def _got_extradata(self): + return self._buff_i < len(self._buffer) + + def _get_extradata(self): + return self._buffer[self._buff_i :] + + def read_bytes(self, n): + ret = self._read(n, raise_outofdata=False) + self._consume() + return ret + + def _read(self, n, raise_outofdata=True): + # (int) -> bytearray + self._reserve(n, raise_outofdata=raise_outofdata) + i = self._buff_i + ret = self._buffer[i : i + n] + self._buff_i = i + len(ret) + return ret + + def _reserve(self, n, raise_outofdata=True): + remain_bytes = len(self._buffer) - self._buff_i - n + + # Fast path: buffer has n bytes already + if remain_bytes >= 0: + return + + if self._feeding: + self._buff_i = self._buf_checkpoint + raise OutOfData + + # Strip buffer before checkpoint before reading file. + if self._buf_checkpoint > 0: + del self._buffer[: self._buf_checkpoint] + self._buff_i -= self._buf_checkpoint + self._buf_checkpoint = 0 + + # Read from file + remain_bytes = -remain_bytes + if remain_bytes + len(self._buffer) > self._max_buffer_size: + raise BufferFull + while remain_bytes > 0: + to_read_bytes = max(self._read_size, remain_bytes) + read_data = self.file_like.read(to_read_bytes) + if not read_data: + break + assert isinstance(read_data, bytes) + self._buffer += read_data + remain_bytes -= len(read_data) + + if len(self._buffer) < n + self._buff_i and raise_outofdata: + self._buff_i = 0 # rollback + raise OutOfData + + def _read_header(self): + typ = TYPE_IMMEDIATE + n = 0 + obj = None + self._reserve(1) + b = self._buffer[self._buff_i] + self._buff_i += 1 + if b & 0b10000000 == 0: + obj = b + elif b & 0b11100000 == 0b11100000: + obj = -1 - (b ^ 0xFF) + elif b & 0b11100000 == 0b10100000: + n = b & 0b00011111 + typ = TYPE_RAW + if n > self._max_str_len: + raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") + obj = self._read(n) + elif b & 0b11110000 == 0b10010000: + n = b & 0b00001111 + typ = TYPE_ARRAY + if n > self._max_array_len: + raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") + elif b & 0b11110000 == 0b10000000: + n = b & 0b00001111 + typ = TYPE_MAP + if n > self._max_map_len: + raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") + elif b == 0xC0: + obj = None + elif b == 0xC2: + obj = False + elif b == 0xC3: + obj = True + elif 0xC4 <= b <= 0xC6: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + n = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] + else: + n = self._buffer[self._buff_i] + self._buff_i += size + if n > self._max_bin_len: + raise ValueError(f"{n} exceeds max_bin_len({self._max_bin_len})") + obj = self._read(n) + elif 0xC7 <= b <= 0xC9: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + L, n = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size + if L > self._max_ext_len: + raise ValueError(f"{L} exceeds max_ext_len({self._max_ext_len})") + obj = self._read(L) + elif 0xCA <= b <= 0xD3: + size, fmt = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + obj = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] + else: + obj = self._buffer[self._buff_i] + self._buff_i += size + elif 0xD4 <= b <= 0xD8: + size, fmt, typ = _MSGPACK_HEADERS[b] + if self._max_ext_len < size: + raise ValueError(f"{size} exceeds max_ext_len({self._max_ext_len})") + self._reserve(size + 1) + n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size + 1 + elif 0xD9 <= b <= 0xDB: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) + else: + n = self._buffer[self._buff_i] + self._buff_i += size + if n > self._max_str_len: + raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") + obj = self._read(n) + elif 0xDC <= b <= 0xDD: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size + if n > self._max_array_len: + raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") + elif 0xDE <= b <= 0xDF: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size + if n > self._max_map_len: + raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") + else: + raise FormatError("Unknown header: 0x%x" % b) + return typ, n, obj + + def _unpack(self, execute=EX_CONSTRUCT): + typ, n, obj = self._read_header() + + if execute == EX_READ_ARRAY_HEADER: + if typ != TYPE_ARRAY: + raise ValueError("Expected array") + return n + if execute == EX_READ_MAP_HEADER: + if typ != TYPE_MAP: + raise ValueError("Expected map") + return n + # TODO should we eliminate the recursion? + if typ == TYPE_ARRAY: + if execute == EX_SKIP: + for i in range(n): + # TODO check whether we need to call `list_hook` + self._unpack(EX_SKIP) + return + ret = newlist_hint(n) + for i in range(n): + ret.append(self._unpack(EX_CONSTRUCT)) + if self._list_hook is not None: + ret = self._list_hook(ret) + # TODO is the interaction between `list_hook` and `use_list` ok? + return ret if self._use_list else tuple(ret) + if typ == TYPE_MAP: + if execute == EX_SKIP: + for i in range(n): + # TODO check whether we need to call hooks + self._unpack(EX_SKIP) + self._unpack(EX_SKIP) + return + if self._object_pairs_hook is not None: + ret = self._object_pairs_hook( + (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n) + ) + else: + ret = {} + for _ in range(n): + key = self._unpack(EX_CONSTRUCT) + if self._strict_map_key and type(key) not in (str, bytes): + raise ValueError("%s is not allowed for map key" % str(type(key))) + if isinstance(key, str): + key = sys.intern(key) + ret[key] = self._unpack(EX_CONSTRUCT) + if self._object_hook is not None: + ret = self._object_hook(ret) + return ret + if execute == EX_SKIP: + return + if typ == TYPE_RAW: + if self._raw: + obj = bytes(obj) + else: + obj = obj.decode("utf_8", self._unicode_errors) + return obj + if typ == TYPE_BIN: + return bytes(obj) + if typ == TYPE_EXT: + if n == -1: # timestamp + ts = Timestamp.from_bytes(bytes(obj)) + if self._timestamp == 1: + return ts.to_unix() + elif self._timestamp == 2: + return ts.to_unix_nano() + elif self._timestamp == 3: + return ts.to_datetime() + else: + return ts + else: + return self._ext_hook(n, bytes(obj)) + assert typ == TYPE_IMMEDIATE + return obj + + def __iter__(self): + return self + + def __next__(self): + try: + ret = self._unpack(EX_CONSTRUCT) + self._consume() + return ret + except OutOfData: + self._consume() + raise StopIteration + except RecursionError: + raise StackError + + next = __next__ + + def skip(self): + self._unpack(EX_SKIP) + self._consume() + + def unpack(self): + try: + ret = self._unpack(EX_CONSTRUCT) + except RecursionError: + raise StackError + self._consume() + return ret + + def read_array_header(self): + ret = self._unpack(EX_READ_ARRAY_HEADER) + self._consume() + return ret + + def read_map_header(self): + ret = self._unpack(EX_READ_MAP_HEADER) + self._consume() + return ret + + def tell(self): + return self._stream_offset + + +class Packer: + """ + MessagePack Packer + + Usage:: + + packer = Packer() + astream.write(packer.pack(a)) + astream.write(packer.pack(b)) + + Packer's constructor has some keyword arguments: + + :param default: + When specified, it should be callable. + Convert user type to builtin type that Packer supports. + See also simplejson's document. + + :param bool use_single_float: + Use single precision float type for float. (default: False) + + :param bool autoreset: + Reset buffer after each pack and return its content as `bytes`. (default: True). + If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. + + :param bool use_bin_type: + Use bin type introduced in msgpack spec 2.0 for bytes. + It also enables str8 type for unicode. (default: True) + + :param bool strict_types: + If set to true, types will be checked to be exact. Derived classes + from serializable types will not be serialized and will be + treated as unsupported type and forwarded to default. + Additionally tuples will not be serialized as lists. + This is useful when trying to implement accurate serialization + for python types. + + :param bool datetime: + If set to true, datetime with tzinfo is packed into Timestamp type. + Note that the tzinfo is stripped in the timestamp. + You can get UTC datetime with `timestamp=3` option of the Unpacker. + + :param str unicode_errors: + The error handler for encoding unicode. (default: 'strict') + DO NOT USE THIS!! This option is kept for very specific usage. + + :param int buf_size: + Internal buffer size. This option is used only for C implementation. + """ + + def __init__( + self, + *, + default=None, + use_single_float=False, + autoreset=True, + use_bin_type=True, + strict_types=False, + datetime=False, + unicode_errors=None, + buf_size=None, + ): + self._strict_types = strict_types + self._use_float = use_single_float + self._autoreset = autoreset + self._use_bin_type = use_bin_type + self._buffer = BytesIO() + self._datetime = bool(datetime) + self._unicode_errors = unicode_errors or "strict" + if default is not None and not callable(default): + raise TypeError("default must be callable") + self._default = default + + def _pack( + self, + obj, + nest_limit=DEFAULT_RECURSE_LIMIT, + check=isinstance, + check_type_strict=_check_type_strict, + ): + default_used = False + if self._strict_types: + check = check_type_strict + list_types = list + else: + list_types = (list, tuple) + while True: + if nest_limit < 0: + raise ValueError("recursion limit exceeded") + if obj is None: + return self._buffer.write(b"\xc0") + if check(obj, bool): + if obj: + return self._buffer.write(b"\xc3") + return self._buffer.write(b"\xc2") + if check(obj, int): + if 0 <= obj < 0x80: + return self._buffer.write(struct.pack("B", obj)) + if -0x20 <= obj < 0: + return self._buffer.write(struct.pack("b", obj)) + if 0x80 <= obj <= 0xFF: + return self._buffer.write(struct.pack("BB", 0xCC, obj)) + if -0x80 <= obj < 0: + return self._buffer.write(struct.pack(">Bb", 0xD0, obj)) + if 0xFF < obj <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xCD, obj)) + if -0x8000 <= obj < -0x80: + return self._buffer.write(struct.pack(">Bh", 0xD1, obj)) + if 0xFFFF < obj <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xCE, obj)) + if -0x80000000 <= obj < -0x8000: + return self._buffer.write(struct.pack(">Bi", 0xD2, obj)) + if 0xFFFFFFFF < obj <= 0xFFFFFFFFFFFFFFFF: + return self._buffer.write(struct.pack(">BQ", 0xCF, obj)) + if -0x8000000000000000 <= obj < -0x80000000: + return self._buffer.write(struct.pack(">Bq", 0xD3, obj)) + if not default_used and self._default is not None: + obj = self._default(obj) + default_used = True + continue + raise OverflowError("Integer value out of range") + if check(obj, (bytes, bytearray)): + n = len(obj) + if n >= 2**32: + raise ValueError("%s is too large" % type(obj).__name__) + self._pack_bin_header(n) + return self._buffer.write(obj) + if check(obj, str): + obj = obj.encode("utf-8", self._unicode_errors) + n = len(obj) + if n >= 2**32: + raise ValueError("String is too large") + self._pack_raw_header(n) + return self._buffer.write(obj) + if check(obj, memoryview): + n = obj.nbytes + if n >= 2**32: + raise ValueError("Memoryview is too large") + self._pack_bin_header(n) + return self._buffer.write(obj) + if check(obj, float): + if self._use_float: + return self._buffer.write(struct.pack(">Bf", 0xCA, obj)) + return self._buffer.write(struct.pack(">Bd", 0xCB, obj)) + if check(obj, (ExtType, Timestamp)): + if check(obj, Timestamp): + code = -1 + data = obj.to_bytes() + else: + code = obj.code + data = obj.data + assert isinstance(code, int) + assert isinstance(data, bytes) + L = len(data) + if L == 1: + self._buffer.write(b"\xd4") + elif L == 2: + self._buffer.write(b"\xd5") + elif L == 4: + self._buffer.write(b"\xd6") + elif L == 8: + self._buffer.write(b"\xd7") + elif L == 16: + self._buffer.write(b"\xd8") + elif L <= 0xFF: + self._buffer.write(struct.pack(">BB", 0xC7, L)) + elif L <= 0xFFFF: + self._buffer.write(struct.pack(">BH", 0xC8, L)) + else: + self._buffer.write(struct.pack(">BI", 0xC9, L)) + self._buffer.write(struct.pack("b", code)) + self._buffer.write(data) + return + if check(obj, list_types): + n = len(obj) + self._pack_array_header(n) + for i in range(n): + self._pack(obj[i], nest_limit - 1) + return + if check(obj, dict): + return self._pack_map_pairs(len(obj), obj.items(), nest_limit - 1) + + if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None: + obj = Timestamp.from_datetime(obj) + default_used = 1 + continue + + if not default_used and self._default is not None: + obj = self._default(obj) + default_used = 1 + continue + + if self._datetime and check(obj, _DateTime): + raise ValueError(f"Cannot serialize {obj!r} where tzinfo=None") + + raise TypeError(f"Cannot serialize {obj!r}") + + def pack(self, obj): + try: + self._pack(obj) + except: + self._buffer = BytesIO() # force reset + raise + if self._autoreset: + ret = self._buffer.getvalue() + self._buffer = BytesIO() + return ret + + def pack_map_pairs(self, pairs): + self._pack_map_pairs(len(pairs), pairs) + if self._autoreset: + ret = self._buffer.getvalue() + self._buffer = BytesIO() + return ret + + def pack_array_header(self, n): + if n >= 2**32: + raise ValueError + self._pack_array_header(n) + if self._autoreset: + ret = self._buffer.getvalue() + self._buffer = BytesIO() + return ret + + def pack_map_header(self, n): + if n >= 2**32: + raise ValueError + self._pack_map_header(n) + if self._autoreset: + ret = self._buffer.getvalue() + self._buffer = BytesIO() + return ret + + def pack_ext_type(self, typecode, data): + if not isinstance(typecode, int): + raise TypeError("typecode must have int type.") + if not 0 <= typecode <= 127: + raise ValueError("typecode should be 0-127") + if not isinstance(data, bytes): + raise TypeError("data must have bytes type") + L = len(data) + if L > 0xFFFFFFFF: + raise ValueError("Too large data") + if L == 1: + self._buffer.write(b"\xd4") + elif L == 2: + self._buffer.write(b"\xd5") + elif L == 4: + self._buffer.write(b"\xd6") + elif L == 8: + self._buffer.write(b"\xd7") + elif L == 16: + self._buffer.write(b"\xd8") + elif L <= 0xFF: + self._buffer.write(b"\xc7" + struct.pack("B", L)) + elif L <= 0xFFFF: + self._buffer.write(b"\xc8" + struct.pack(">H", L)) + else: + self._buffer.write(b"\xc9" + struct.pack(">I", L)) + self._buffer.write(struct.pack("B", typecode)) + self._buffer.write(data) + + def _pack_array_header(self, n): + if n <= 0x0F: + return self._buffer.write(struct.pack("B", 0x90 + n)) + if n <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xDC, n)) + if n <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xDD, n)) + raise ValueError("Array is too large") + + def _pack_map_header(self, n): + if n <= 0x0F: + return self._buffer.write(struct.pack("B", 0x80 + n)) + if n <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xDE, n)) + if n <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xDF, n)) + raise ValueError("Dict is too large") + + def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): + self._pack_map_header(n) + for k, v in pairs: + self._pack(k, nest_limit - 1) + self._pack(v, nest_limit - 1) + + def _pack_raw_header(self, n): + if n <= 0x1F: + self._buffer.write(struct.pack("B", 0xA0 + n)) + elif self._use_bin_type and n <= 0xFF: + self._buffer.write(struct.pack(">BB", 0xD9, n)) + elif n <= 0xFFFF: + self._buffer.write(struct.pack(">BH", 0xDA, n)) + elif n <= 0xFFFFFFFF: + self._buffer.write(struct.pack(">BI", 0xDB, n)) + else: + raise ValueError("Raw is too large") + + def _pack_bin_header(self, n): + if not self._use_bin_type: + return self._pack_raw_header(n) + elif n <= 0xFF: + return self._buffer.write(struct.pack(">BB", 0xC4, n)) + elif n <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xC5, n)) + elif n <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xC6, n)) + else: + raise ValueError("Bin is too large") + + def bytes(self): + """Return internal buffer contents as bytes object""" + return self._buffer.getvalue() + + def reset(self): + """Reset internal buffer. + + This method is useful only when autoreset=False. + """ + self._buffer = BytesIO() + + def getbuffer(self): + """Return view of internal buffer.""" + if _USING_STRINGBUILDER: + return memoryview(self.bytes()) + else: + return self._buffer.getbuffer() diff --git a/msgpack/pack.h b/msgpack/pack.h index d36b436c..edf3a3fe 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -19,56 +19,23 @@ #include #include #include "sysdep.h" -#include "pack_define.h" +#include +#include +#include #ifdef __cplusplus extern "C" { #endif -#ifdef _MSC_VER -#define inline __inline -#endif - typedef struct msgpack_packer { char *buf; size_t length; size_t buf_size; + bool use_bin_type; } msgpack_packer; typedef struct Packer Packer; -static inline int msgpack_pack_short(msgpack_packer* pk, short d); -static inline int msgpack_pack_int(msgpack_packer* pk, int d); -static inline int msgpack_pack_long(msgpack_packer* pk, long d); -static inline int msgpack_pack_long_long(msgpack_packer* pk, long long d); -static inline int msgpack_pack_unsigned_short(msgpack_packer* pk, unsigned short d); -static inline int msgpack_pack_unsigned_int(msgpack_packer* pk, unsigned int d); -static inline int msgpack_pack_unsigned_long(msgpack_packer* pk, unsigned long d); -static inline int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d); - -static inline int msgpack_pack_uint8(msgpack_packer* pk, uint8_t d); -static inline int msgpack_pack_uint16(msgpack_packer* pk, uint16_t d); -static inline int msgpack_pack_uint32(msgpack_packer* pk, uint32_t d); -static inline int msgpack_pack_uint64(msgpack_packer* pk, uint64_t d); -static inline int msgpack_pack_int8(msgpack_packer* pk, int8_t d); -static inline int msgpack_pack_int16(msgpack_packer* pk, int16_t d); -static inline int msgpack_pack_int32(msgpack_packer* pk, int32_t d); -static inline int msgpack_pack_int64(msgpack_packer* pk, int64_t d); - -static inline int msgpack_pack_float(msgpack_packer* pk, float d); -static inline int msgpack_pack_double(msgpack_packer* pk, double d); - -static inline int msgpack_pack_nil(msgpack_packer* pk); -static inline int msgpack_pack_true(msgpack_packer* pk); -static inline int msgpack_pack_false(msgpack_packer* pk); - -static inline int msgpack_pack_array(msgpack_packer* pk, unsigned int n); - -static inline int msgpack_pack_map(msgpack_packer* pk, unsigned int n); - -static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l); -static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l); - static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l) { char* buf = pk->buf; @@ -77,8 +44,11 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ if (len + l > bs) { bs = (len + l) * 2; - buf = realloc(buf, bs); - if (!buf) return -1; + buf = (char*)PyMem_Realloc(buf, bs); + if (!buf) { + PyErr_NoMemory(); + return -1; + } } memcpy(buf + len, data, l); len += l; @@ -89,14 +59,6 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ return 0; } -#define msgpack_pack_inline_func(name) \ - static inline int msgpack_pack ## name - -#define msgpack_pack_inline_func_cint(name) \ - static inline int msgpack_pack ## name - -#define msgpack_pack_user msgpack_packer* - #define msgpack_pack_append_buffer(user, buf, len) \ return msgpack_pack_write(user, (const char*)buf, len) diff --git a/msgpack/pack_define.h b/msgpack/pack_define.h deleted file mode 100644 index f72391b7..00000000 --- a/msgpack/pack_define.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * MessagePack unpacking routine template - * - * Copyright (C) 2008-2009 FURUHASHI Sadayuki - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MSGPACK_PACK_DEFINE_H__ -#define MSGPACK_PACK_DEFINE_H__ - -#include "sysdep.h" -#include - -#endif /* msgpack/pack_define.h */ - diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index 4b508958..b8959f02 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -1,7 +1,7 @@ /* * MessagePack packing routine template * - * Copyright (C) 2008-2009 FURUHASHI Sadayuki + * Copyright (C) 2008-2010 FURUHASHI Sadayuki * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,26 +16,18 @@ * limitations under the License. */ -#ifdef __LITTLE_ENDIAN__ +#if defined(__LITTLE_ENDIAN__) #define TAKE8_8(d) ((uint8_t*)&d)[0] #define TAKE8_16(d) ((uint8_t*)&d)[0] #define TAKE8_32(d) ((uint8_t*)&d)[0] #define TAKE8_64(d) ((uint8_t*)&d)[0] -#elif __BIG_ENDIAN__ +#elif defined(__BIG_ENDIAN__) #define TAKE8_8(d) ((uint8_t*)&d)[0] #define TAKE8_16(d) ((uint8_t*)&d)[1] #define TAKE8_32(d) ((uint8_t*)&d)[3] #define TAKE8_64(d) ((uint8_t*)&d)[7] #endif -#ifndef msgpack_pack_inline_func -#error msgpack_pack_inline_func template is not defined -#endif - -#ifndef msgpack_pack_user -#error msgpack_pack_user type is not defined -#endif - #ifndef msgpack_pack_append_buffer #error msgpack_pack_append_buffer callback is not defined #endif @@ -45,529 +37,334 @@ * Integer */ -#define msgpack_pack_real_uint8(x, d) \ -do { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } else { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ -} while(0) - #define msgpack_pack_real_uint16(x, d) \ do { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ - } else if(d < (1<<8)) { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; *(uint16_t*)&buf[1] = _msgpack_be16(d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } \ + if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ + } else if(d < (1<<8)) { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } \ } while(0) #define msgpack_pack_real_uint32(x, d) \ do { \ - if(d < (1<<8)) { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ - } else { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } else { \ - if(d < (1<<16)) { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; *(uint16_t*)&buf[1] = _msgpack_be16(d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* unsigned 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xce; *(uint32_t*)&buf[1] = _msgpack_be32(d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } \ - } \ + if(d < (1<<8)) { \ + if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ + } else { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } else { \ + if(d < (1<<16)) { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* unsigned 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } \ + } \ } while(0) #define msgpack_pack_real_uint64(x, d) \ do { \ - if(d < (1ULL<<8)) { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ - } else { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } else { \ - if(d < (1ULL<<16)) { \ - /* signed 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; *(uint16_t*)&buf[1] = _msgpack_be16(d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else if(d < (1ULL<<32)) { \ - /* signed 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xce; *(uint32_t*)&buf[1] = _msgpack_be32(d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } else { \ - /* signed 64 */ \ - unsigned char buf[9]; \ - buf[0] = 0xcf; *(uint64_t*)&buf[1] = _msgpack_be64(d); \ - msgpack_pack_append_buffer(x, buf, 9); \ - } \ - } \ -} while(0) - -#define msgpack_pack_real_int8(x, d) \ -do { \ - if(d < -(1<<5)) { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } \ + if(d < (1ULL<<8)) { \ + if(d < (1ULL<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ + } else { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } else { \ + if(d < (1ULL<<16)) { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else if(d < (1ULL<<32)) { \ + /* unsigned 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } else { \ + /* unsigned 64 */ \ + unsigned char buf[9]; \ + buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \ + msgpack_pack_append_buffer(x, buf, 9); \ + } \ + } \ } while(0) #define msgpack_pack_real_int16(x, d) \ do { \ - if(d < -(1<<5)) { \ - if(d < -(1<<7)) { \ - /* signed 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xd1; *(uint16_t*)&buf[1] = _msgpack_be16(d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_16(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } else if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ - } else { \ - if(d < (1<<8)) { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; *(uint16_t*)&buf[1] = _msgpack_be16(d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } \ - } \ + if(d < -(1<<5)) { \ + if(d < -(1<<7)) { \ + /* signed 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_16(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } else if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ + } else { \ + if(d < (1<<8)) { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } \ + } \ } while(0) #define msgpack_pack_real_int32(x, d) \ do { \ - if(d < -(1<<5)) { \ - if(d < -(1<<15)) { \ - /* signed 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xd2; *(uint32_t*)&buf[1] = _msgpack_be32(d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } else if(d < -(1<<7)) { \ - /* signed 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xd1; *(uint16_t*)&buf[1] = _msgpack_be16(d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_32(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } else if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ - } else { \ - if(d < (1<<8)) { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else if(d < (1<<16)) { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; *(uint16_t*)&buf[1] = _msgpack_be16(d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* unsigned 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xce; *(uint32_t*)&buf[1] = _msgpack_be32(d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } \ - } \ + if(d < -(1<<5)) { \ + if(d < -(1<<15)) { \ + /* signed 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } else if(d < -(1<<7)) { \ + /* signed 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_32(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } else if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ + } else { \ + if(d < (1<<8)) { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else if(d < (1<<16)) { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* unsigned 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } \ + } \ } while(0) #define msgpack_pack_real_int64(x, d) \ do { \ - if(d < -(1LL<<5)) { \ - if(d < -(1LL<<15)) { \ - if(d < -(1LL<<31)) { \ - /* signed 64 */ \ - unsigned char buf[9]; \ - buf[0] = 0xd3; *(uint64_t*)&buf[1] = _msgpack_be64(d); \ - msgpack_pack_append_buffer(x, buf, 9); \ - } else { \ - /* signed 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xd2; *(uint32_t*)&buf[1] = _msgpack_be32(d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } \ - } else { \ - if(d < -(1<<7)) { \ - /* signed 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xd1; *(uint16_t*)&buf[1] = _msgpack_be16(d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_64(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } \ - } else if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ - } else { \ - if(d < (1LL<<16)) { \ - if(d < (1<<8)) { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; *(uint16_t*)&buf[1] = _msgpack_be16(d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } \ - } else { \ - if(d < (1LL<<32)) { \ - /* unsigned 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xce; *(uint32_t*)&buf[1] = _msgpack_be32(d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } else { \ - /* unsigned 64 */ \ - unsigned char buf[9]; \ - buf[0] = 0xcf; *(uint64_t*)&buf[1] = _msgpack_be64(d); \ - msgpack_pack_append_buffer(x, buf, 9); \ - } \ - } \ - } \ + if(d < -(1LL<<5)) { \ + if(d < -(1LL<<15)) { \ + if(d < -(1LL<<31)) { \ + /* signed 64 */ \ + unsigned char buf[9]; \ + buf[0] = 0xd3; _msgpack_store64(&buf[1], d); \ + msgpack_pack_append_buffer(x, buf, 9); \ + } else { \ + /* signed 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } \ + } else { \ + if(d < -(1<<7)) { \ + /* signed 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_64(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } \ + } else if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ + } else { \ + if(d < (1LL<<16)) { \ + if(d < (1<<8)) { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } \ + } else { \ + if(d < (1LL<<32)) { \ + /* unsigned 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } else { \ + /* unsigned 64 */ \ + unsigned char buf[9]; \ + buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \ + msgpack_pack_append_buffer(x, buf, 9); \ + } \ + } \ + } \ } while(0) -#ifdef msgpack_pack_inline_func_fastint - -msgpack_pack_inline_func_fastint(_uint8)(msgpack_pack_user x, uint8_t d) -{ - unsigned char buf[2] = {0xcc, TAKE8_8(d)}; - msgpack_pack_append_buffer(x, buf, 2); -} - -msgpack_pack_inline_func_fastint(_uint16)(msgpack_pack_user x, uint16_t d) -{ - unsigned char buf[3]; - buf[0] = 0xcd; *(uint16_t*)&buf[1] = _msgpack_be16(d); - msgpack_pack_append_buffer(x, buf, 3); -} - -msgpack_pack_inline_func_fastint(_uint32)(msgpack_pack_user x, uint32_t d) -{ - unsigned char buf[5]; - buf[0] = 0xce; *(uint32_t*)&buf[1] = _msgpack_be32(d); - msgpack_pack_append_buffer(x, buf, 5); -} - -msgpack_pack_inline_func_fastint(_uint64)(msgpack_pack_user x, uint64_t d) -{ - unsigned char buf[9]; - buf[0] = 0xcf; *(uint64_t*)&buf[1] = _msgpack_be64(d); - msgpack_pack_append_buffer(x, buf, 9); -} - -msgpack_pack_inline_func_fastint(_int8)(msgpack_pack_user x, int8_t d) -{ - unsigned char buf[2] = {0xd0, TAKE8_8(d)}; - msgpack_pack_append_buffer(x, buf, 2); -} - -msgpack_pack_inline_func_fastint(_int16)(msgpack_pack_user x, int16_t d) -{ - unsigned char buf[3]; - buf[0] = 0xd1; *(uint16_t*)&buf[1] = _msgpack_be16(d); - msgpack_pack_append_buffer(x, buf, 3); -} - -msgpack_pack_inline_func_fastint(_int32)(msgpack_pack_user x, int32_t d) -{ - unsigned char buf[5]; - buf[0] = 0xd2; *(uint32_t*)&buf[1] = _msgpack_be32(d); - msgpack_pack_append_buffer(x, buf, 5); -} - -msgpack_pack_inline_func_fastint(_int64)(msgpack_pack_user x, int64_t d) +static inline int msgpack_pack_short(msgpack_packer* x, short d) { - unsigned char buf[9]; - buf[0] = 0xd3; *(uint64_t*)&buf[1] = _msgpack_be64(d); - msgpack_pack_append_buffer(x, buf, 9); -} - -#undef msgpack_pack_inline_func_fastint +#if defined(SIZEOF_SHORT) +#if SIZEOF_SHORT == 2 + msgpack_pack_real_int16(x, d); +#elif SIZEOF_SHORT == 4 + msgpack_pack_real_int32(x, d); +#else + msgpack_pack_real_int64(x, d); #endif - -msgpack_pack_inline_func(_uint8)(msgpack_pack_user x, uint8_t d) -{ - msgpack_pack_real_uint8(x, d); -} - -msgpack_pack_inline_func(_uint16)(msgpack_pack_user x, uint16_t d) -{ - msgpack_pack_real_uint16(x, d); -} - -msgpack_pack_inline_func(_uint32)(msgpack_pack_user x, uint32_t d) -{ - msgpack_pack_real_uint32(x, d); -} - -msgpack_pack_inline_func(_uint64)(msgpack_pack_user x, uint64_t d) -{ - msgpack_pack_real_uint64(x, d); -} - -msgpack_pack_inline_func(_int8)(msgpack_pack_user x, int8_t d) -{ - msgpack_pack_real_int8(x, d); -} - -msgpack_pack_inline_func(_int16)(msgpack_pack_user x, int16_t d) -{ - msgpack_pack_real_int16(x, d); -} - -msgpack_pack_inline_func(_int32)(msgpack_pack_user x, int32_t d) -{ - msgpack_pack_real_int32(x, d); -} - -msgpack_pack_inline_func(_int64)(msgpack_pack_user x, int64_t d) -{ - msgpack_pack_real_int64(x, d); -} - - -#ifdef msgpack_pack_inline_func_cint - -msgpack_pack_inline_func_cint(_short)(msgpack_pack_user x, short d) -{ -#if defined(SIZEOF_SHORT) || defined(SHRT_MAX) -#if SIZEOF_SHORT == 2 || SHRT_MAX == 0x7fff - msgpack_pack_real_int16(x, d); -#elif SIZEOF_SHORT == 4 || SHRT_MAX == 0x7fffffff - msgpack_pack_real_int32(x, d); +#elif defined(SHRT_MAX) +#if SHRT_MAX == 0x7fff + msgpack_pack_real_int16(x, d); +#elif SHRT_MAX == 0x7fffffff + msgpack_pack_real_int32(x, d); #else - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); #endif + #else if(sizeof(short) == 2) { - msgpack_pack_real_int16(x, d); + msgpack_pack_real_int16(x, d); } else if(sizeof(short) == 4) { - msgpack_pack_real_int32(x, d); + msgpack_pack_real_int32(x, d); } else { - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); } #endif } -msgpack_pack_inline_func_cint(_int)(msgpack_pack_user x, int d) +static inline int msgpack_pack_int(msgpack_packer* x, int d) { -#if defined(SIZEOF_INT) || defined(INT_MAX) -#if SIZEOF_INT == 2 || INT_MAX == 0x7fff - msgpack_pack_real_int16(x, d); -#elif SIZEOF_INT == 4 || INT_MAX == 0x7fffffff - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif +#if defined(SIZEOF_INT) +#if SIZEOF_INT == 2 + msgpack_pack_real_int16(x, d); +#elif SIZEOF_INT == 4 + msgpack_pack_real_int32(x, d); #else -if(sizeof(int) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(int) == 4) { - msgpack_pack_real_int32(x, d); -} else { - msgpack_pack_real_int64(x, d); -} + msgpack_pack_real_int64(x, d); #endif -} -msgpack_pack_inline_func_cint(_long)(msgpack_pack_user x, long d) -{ -#if defined(SIZEOF_LONG) || defined(LONG_MAX) -#if SIZEOF_LONG == 2 || LONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); -#elif SIZEOF_LONG == 4 || LONG_MAX == 0x7fffffffL - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif +#elif defined(INT_MAX) +#if INT_MAX == 0x7fff + msgpack_pack_real_int16(x, d); +#elif INT_MAX == 0x7fffffff + msgpack_pack_real_int32(x, d); #else -if(sizeof(long) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(long) == 4) { - msgpack_pack_real_int32(x, d); -} else { - msgpack_pack_real_int64(x, d); -} + msgpack_pack_real_int64(x, d); #endif -} -msgpack_pack_inline_func_cint(_long_long)(msgpack_pack_user x, long long d) -{ -#if defined(SIZEOF_LONG_LONG) || defined(LLONG_MAX) -#if SIZEOF_LONG_LONG == 2 || LLONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); -#elif SIZEOF_LONG_LONG == 4 || LLONG_MAX == 0x7fffffffL - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif #else -if(sizeof(long long) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(long long) == 4) { - msgpack_pack_real_int32(x, d); +if(sizeof(int) == 2) { + msgpack_pack_real_int16(x, d); +} else if(sizeof(int) == 4) { + msgpack_pack_real_int32(x, d); } else { - msgpack_pack_real_int64(x, d); + msgpack_pack_real_int64(x, d); } #endif } -msgpack_pack_inline_func_cint(_unsigned_short)(msgpack_pack_user x, unsigned short d) +static inline int msgpack_pack_long(msgpack_packer* x, long d) { -#if defined(SIZEOF_SHORT) || defined(USHRT_MAX) -#if SIZEOF_SHORT == 2 || USHRT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_SHORT == 4 || USHRT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif +#if defined(SIZEOF_LONG) +#if SIZEOF_LONG == 4 + msgpack_pack_real_int32(x, d); #else -if(sizeof(unsigned short) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned short) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} + msgpack_pack_real_int64(x, d); #endif -} -msgpack_pack_inline_func_cint(_unsigned_int)(msgpack_pack_user x, unsigned int d) -{ -#if defined(SIZEOF_INT) || defined(UINT_MAX) -#if SIZEOF_INT == 2 || UINT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_INT == 4 || UINT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); +#elif defined(LONG_MAX) +#if LONG_MAX == 0x7fffffffL + msgpack_pack_real_int32(x, d); #else - msgpack_pack_real_uint64(x, d); + msgpack_pack_real_int64(x, d); #endif + #else -if(sizeof(unsigned int) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned int) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} + if (sizeof(long) == 4) { + msgpack_pack_real_int32(x, d); + } else { + msgpack_pack_real_int64(x, d); + } #endif } -msgpack_pack_inline_func_cint(_unsigned_long)(msgpack_pack_user x, unsigned long d) +static inline int msgpack_pack_long_long(msgpack_packer* x, long long d) { -#if defined(SIZEOF_LONG) || defined(ULONG_MAX) -#if SIZEOF_LONG == 2 || ULONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_LONG == 4 || ULONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif -#else -if(sizeof(unsigned int) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned int) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif + msgpack_pack_real_int64(x, d); } -msgpack_pack_inline_func_cint(_unsigned_long_long)(msgpack_pack_user x, unsigned long long d) +static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned long long d) { -#if defined(SIZEOF_LONG_LONG) || defined(ULLONG_MAX) -#if SIZEOF_LONG_LONG == 2 || ULLONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_LONG_LONG == 4 || ULLONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif -#else -if(sizeof(unsigned long long) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned long long) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif + msgpack_pack_real_uint64(x, d); } -#undef msgpack_pack_inline_func_cint -#endif - - /* * Float */ -msgpack_pack_inline_func(_float)(msgpack_pack_user x, float d) +static inline int msgpack_pack_float(msgpack_packer* x, float d) { - union { char buf[4]; uint32_t num; } f; - unsigned char buf[5]; - *((float*)&f.buf) = d; // FIXME - buf[0] = 0xca; *(uint32_t*)&buf[1] = _msgpack_be32(f.num); - msgpack_pack_append_buffer(x, buf, 5); + unsigned char buf[5]; + buf[0] = 0xca; + +#if PY_VERSION_HEX >= 0x030B00A7 + PyFloat_Pack4(d, (char *)&buf[1], 0); +#else + _PyFloat_Pack4(d, &buf[1], 0); +#endif + msgpack_pack_append_buffer(x, buf, 5); } -msgpack_pack_inline_func(_double)(msgpack_pack_user x, double d) +static inline int msgpack_pack_double(msgpack_packer* x, double d) { - union { char buf[8]; uint64_t num; } f; - unsigned char buf[9]; - *((double*)&f.buf) = d; // FIXME - buf[0] = 0xcb; *(uint64_t*)&buf[1] = _msgpack_be64(f.num); - msgpack_pack_append_buffer(x, buf, 9); + unsigned char buf[9]; + buf[0] = 0xcb; +#if PY_VERSION_HEX >= 0x030B00A7 + PyFloat_Pack8(d, (char *)&buf[1], 0); +#else + _PyFloat_Pack8(d, &buf[1], 0); +#endif + msgpack_pack_append_buffer(x, buf, 9); } @@ -575,10 +372,10 @@ msgpack_pack_inline_func(_double)(msgpack_pack_user x, double d) * Nil */ -msgpack_pack_inline_func(_nil)(msgpack_pack_user x) +static inline int msgpack_pack_nil(msgpack_packer* x) { - static const unsigned char d = 0xc0; - msgpack_pack_append_buffer(x, &d, 1); + static const unsigned char d = 0xc0; + msgpack_pack_append_buffer(x, &d, 1); } @@ -586,16 +383,16 @@ msgpack_pack_inline_func(_nil)(msgpack_pack_user x) * Boolean */ -msgpack_pack_inline_func(_true)(msgpack_pack_user x) +static inline int msgpack_pack_true(msgpack_packer* x) { - static const unsigned char d = 0xc3; - msgpack_pack_append_buffer(x, &d, 1); + static const unsigned char d = 0xc3; + msgpack_pack_append_buffer(x, &d, 1); } -msgpack_pack_inline_func(_false)(msgpack_pack_user x) +static inline int msgpack_pack_false(msgpack_packer* x) { - static const unsigned char d = 0xc2; - msgpack_pack_append_buffer(x, &d, 1); + static const unsigned char d = 0xc2; + msgpack_pack_append_buffer(x, &d, 1); } @@ -603,20 +400,20 @@ msgpack_pack_inline_func(_false)(msgpack_pack_user x) * Array */ -msgpack_pack_inline_func(_array)(msgpack_pack_user x, unsigned int n) +static inline int msgpack_pack_array(msgpack_packer* x, unsigned int n) { - if(n < 16) { - unsigned char d = 0x90 | n; - msgpack_pack_append_buffer(x, &d, 1); - } else if(n < 65536) { - unsigned char buf[3]; - buf[0] = 0xdc; *(uint16_t*)&buf[1] = _msgpack_be16(n); - msgpack_pack_append_buffer(x, buf, 3); - } else { - unsigned char buf[5]; - buf[0] = 0xdd; *(uint32_t*)&buf[1] = _msgpack_be32(n); - msgpack_pack_append_buffer(x, buf, 5); - } + if(n < 16) { + unsigned char d = 0x90 | n; + msgpack_pack_append_buffer(x, &d, 1); + } else if(n < 65536) { + unsigned char buf[3]; + buf[0] = 0xdc; _msgpack_store16(&buf[1], (uint16_t)n); + msgpack_pack_append_buffer(x, buf, 3); + } else { + unsigned char buf[5]; + buf[0] = 0xdd; _msgpack_store32(&buf[1], (uint32_t)n); + msgpack_pack_append_buffer(x, buf, 5); + } } @@ -624,20 +421,20 @@ msgpack_pack_inline_func(_array)(msgpack_pack_user x, unsigned int n) * Map */ -msgpack_pack_inline_func(_map)(msgpack_pack_user x, unsigned int n) +static inline int msgpack_pack_map(msgpack_packer* x, unsigned int n) { - if(n < 16) { - unsigned char d = 0x80 | n; - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); - } else if(n < 65536) { - unsigned char buf[3]; - buf[0] = 0xde; *(uint16_t*)&buf[1] = _msgpack_be16(n); - msgpack_pack_append_buffer(x, buf, 3); - } else { - unsigned char buf[5]; - buf[0] = 0xdf; *(uint32_t*)&buf[1] = _msgpack_be32(n); - msgpack_pack_append_buffer(x, buf, 5); - } + if(n < 16) { + unsigned char d = 0x80 | n; + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); + } else if(n < 65536) { + unsigned char buf[3]; + buf[0] = 0xde; _msgpack_store16(&buf[1], (uint16_t)n); + msgpack_pack_append_buffer(x, buf, 3); + } else { + unsigned char buf[5]; + buf[0] = 0xdf; _msgpack_store32(&buf[1], (uint32_t)n); + msgpack_pack_append_buffer(x, buf, 5); + } } @@ -645,29 +442,145 @@ msgpack_pack_inline_func(_map)(msgpack_pack_user x, unsigned int n) * Raw */ -msgpack_pack_inline_func(_raw)(msgpack_pack_user x, size_t l) +static inline int msgpack_pack_raw(msgpack_packer* x, size_t l) +{ + if (l < 32) { + unsigned char d = 0xa0 | (uint8_t)l; + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); + } else if (x->use_bin_type && l < 256) { // str8 is new format introduced with bin. + unsigned char buf[2] = {0xd9, (uint8_t)l}; + msgpack_pack_append_buffer(x, buf, 2); + } else if (l < 65536) { + unsigned char buf[3]; + buf[0] = 0xda; _msgpack_store16(&buf[1], (uint16_t)l); + msgpack_pack_append_buffer(x, buf, 3); + } else { + unsigned char buf[5]; + buf[0] = 0xdb; _msgpack_store32(&buf[1], (uint32_t)l); + msgpack_pack_append_buffer(x, buf, 5); + } +} + +/* + * bin + */ +static inline int msgpack_pack_bin(msgpack_packer *x, size_t l) { - if(l < 32) { - unsigned char d = 0xa0 | l; - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); - } else if(l < 65536) { - unsigned char buf[3]; - buf[0] = 0xda; *(uint16_t*)&buf[1] = _msgpack_be16(l); - msgpack_pack_append_buffer(x, buf, 3); - } else { - unsigned char buf[5]; - buf[0] = 0xdb; *(uint32_t*)&buf[1] = _msgpack_be32(l); - msgpack_pack_append_buffer(x, buf, 5); - } + if (!x->use_bin_type) { + return msgpack_pack_raw(x, l); + } + if (l < 256) { + unsigned char buf[2] = {0xc4, (unsigned char)l}; + msgpack_pack_append_buffer(x, buf, 2); + } else if (l < 65536) { + unsigned char buf[3] = {0xc5}; + _msgpack_store16(&buf[1], (uint16_t)l); + msgpack_pack_append_buffer(x, buf, 3); + } else { + unsigned char buf[5] = {0xc6}; + _msgpack_store32(&buf[1], (uint32_t)l); + msgpack_pack_append_buffer(x, buf, 5); + } } -msgpack_pack_inline_func(_raw_body)(msgpack_pack_user x, const void* b, size_t l) +static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t l) { - msgpack_pack_append_buffer(x, (const unsigned char*)b, l); + if (l > 0) msgpack_pack_append_buffer(x, (const unsigned char*)b, l); + return 0; } -#undef msgpack_pack_inline_func -#undef msgpack_pack_user +/* + * Ext + */ +static inline int msgpack_pack_ext(msgpack_packer* x, char typecode, size_t l) +{ + if (l == 1) { + unsigned char buf[2]; + buf[0] = 0xd4; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 2) { + unsigned char buf[2]; + buf[0] = 0xd5; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 4) { + unsigned char buf[2]; + buf[0] = 0xd6; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 8) { + unsigned char buf[2]; + buf[0] = 0xd7; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l == 16) { + unsigned char buf[2]; + buf[0] = 0xd8; + buf[1] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 2); + } + else if(l < 256) { + unsigned char buf[3]; + buf[0] = 0xc7; + buf[1] = l; + buf[2] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 3); + } else if(l < 65536) { + unsigned char buf[4]; + buf[0] = 0xc8; + _msgpack_store16(&buf[1], (uint16_t)l); + buf[3] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 4); + } else { + unsigned char buf[6]; + buf[0] = 0xc9; + _msgpack_store32(&buf[1], (uint32_t)l); + buf[5] = (unsigned char)typecode; + msgpack_pack_append_buffer(x, buf, 6); + } + +} + +/* + * Pack Timestamp extension type. Follows msgpack-c pack_template.h. + */ +static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uint32_t nanoseconds) +{ + if ((seconds >> 34) == 0) { + /* seconds is unsigned and fits in 34 bits */ + uint64_t data64 = ((uint64_t)nanoseconds << 34) | (uint64_t)seconds; + if ((data64 & 0xffffffff00000000L) == 0) { + /* no nanoseconds and seconds is 32bits or smaller. timestamp32. */ + unsigned char buf[4]; + uint32_t data32 = (uint32_t)data64; + msgpack_pack_ext(x, -1, 4); + _msgpack_store32(buf, data32); + msgpack_pack_raw_body(x, buf, 4); + } else { + /* timestamp64 */ + unsigned char buf[8]; + msgpack_pack_ext(x, -1, 8); + _msgpack_store64(buf, data64); + msgpack_pack_raw_body(x, buf, 8); + + } + } else { + /* seconds is signed or >34bits */ + unsigned char buf[12]; + _msgpack_store32(&buf[0], nanoseconds); + _msgpack_store64(&buf[4], seconds); + msgpack_pack_ext(x, -1, 12); + msgpack_pack_raw_body(x, buf, 12); + } + return 0; +} + + #undef msgpack_pack_append_buffer #undef TAKE8_8 @@ -675,12 +588,9 @@ msgpack_pack_inline_func(_raw_body)(msgpack_pack_user x, const void* b, size_t l #undef TAKE8_32 #undef TAKE8_64 -#undef msgpack_pack_real_uint8 #undef msgpack_pack_real_uint16 #undef msgpack_pack_real_uint32 #undef msgpack_pack_real_uint64 -#undef msgpack_pack_real_int8 #undef msgpack_pack_real_int16 #undef msgpack_pack_real_int32 #undef msgpack_pack_real_int64 - diff --git a/msgpack/sysdep.h b/msgpack/sysdep.h index 106158ea..70673004 100644 --- a/msgpack/sysdep.h +++ b/msgpack/sysdep.h @@ -1,7 +1,7 @@ /* * MessagePack system dependencies * - * Copyright (C) 2008-2009 FURUHASHI Sadayuki + * Copyright (C) 2008-2010 FURUHASHI Sadayuki * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,9 @@ #ifndef MSGPACK_SYSDEP_H__ #define MSGPACK_SYSDEP_H__ - -#ifdef _MSC_VER +#include +#include +#if defined(_MSC_VER) && _MSC_VER < 1600 typedef __int8 int8_t; typedef unsigned __int8 uint8_t; typedef __int16 int16_t; @@ -28,44 +29,86 @@ typedef __int32 int32_t; typedef unsigned __int32 uint32_t; typedef __int64 int64_t; typedef unsigned __int64 uint64_t; +#elif defined(_MSC_VER) // && _MSC_VER >= 1600 +#include #else -#include #include #include #endif - #ifdef _WIN32 +#define _msgpack_atomic_counter_header typedef long _msgpack_atomic_counter_t; #define _msgpack_sync_decr_and_fetch(ptr) InterlockedDecrement(ptr) #define _msgpack_sync_incr_and_fetch(ptr) InterlockedIncrement(ptr) +#elif defined(__GNUC__) && ((__GNUC__*10 + __GNUC_MINOR__) < 41) +#define _msgpack_atomic_counter_header "gcc_atomic.h" #else typedef unsigned int _msgpack_atomic_counter_t; #define _msgpack_sync_decr_and_fetch(ptr) __sync_sub_and_fetch(ptr, 1) #define _msgpack_sync_incr_and_fetch(ptr) __sync_add_and_fetch(ptr, 1) #endif - #ifdef _WIN32 -#include -#else -#include /* __BYTE_ORDER */ + +#ifdef __cplusplus +/* numeric_limits::min,max */ +#ifdef max +#undef max +#endif +#ifdef min +#undef min +#endif +#endif + +#else /* _WIN32 */ +#include /* ntohs, ntohl */ #endif #if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __LITTLE_ENDIAN__ -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define __BIG_ENDIAN__ +#elif _WIN32 +#define __LITTLE_ENDIAN__ #endif #endif + #ifdef __LITTLE_ENDIAN__ -#define _msgpack_be16(x) ntohs(x) -#define _msgpack_be32(x) ntohl(x) +#ifdef _WIN32 +# if defined(ntohs) +# define _msgpack_be16(x) ntohs(x) +# elif defined(_byteswap_ushort) || (defined(_MSC_VER) && _MSC_VER >= 1400) +# define _msgpack_be16(x) ((uint16_t)_byteswap_ushort((unsigned short)x)) +# else +# define _msgpack_be16(x) ( \ + ((((uint16_t)x) << 8) ) | \ + ((((uint16_t)x) >> 8) ) ) +# endif +#else +# define _msgpack_be16(x) ntohs(x) +#endif -#if defined(_byteswap_uint64) +#ifdef _WIN32 +# if defined(ntohl) +# define _msgpack_be32(x) ntohl(x) +# elif defined(_byteswap_ulong) || defined(_MSC_VER) +# define _msgpack_be32(x) ((uint32_t)_byteswap_ulong((unsigned long)x)) +# else +# define _msgpack_be32(x) \ + ( ((((uint32_t)x) << 24) ) | \ + ((((uint32_t)x) << 8) & 0x00ff0000U ) | \ + ((((uint32_t)x) >> 8) & 0x0000ff00U ) | \ + ((((uint32_t)x) >> 24) ) ) +# endif +#else +# define _msgpack_be32(x) ntohl(x) +#endif + +#if defined(_byteswap_uint64) || defined(_MSC_VER) # define _msgpack_be64(x) (_byteswap_uint64(x)) #elif defined(bswap_64) # define _msgpack_be64(x) bswap_64(x) @@ -73,22 +116,79 @@ typedef unsigned int _msgpack_atomic_counter_t; # define _msgpack_be64(x) __DARWIN_OSSwapInt64(x) #else #define _msgpack_be64(x) \ - ( ((((uint64_t)x) << 56) & 0xff00000000000000ULL ) | \ - ((((uint64_t)x) << 40) & 0x00ff000000000000ULL ) | \ - ((((uint64_t)x) << 24) & 0x0000ff0000000000ULL ) | \ - ((((uint64_t)x) << 8) & 0x000000ff00000000ULL ) | \ - ((((uint64_t)x) >> 8) & 0x00000000ff000000ULL ) | \ - ((((uint64_t)x) >> 24) & 0x0000000000ff0000ULL ) | \ - ((((uint64_t)x) >> 40) & 0x000000000000ff00ULL ) | \ - ((((uint64_t)x) >> 56) & 0x00000000000000ffULL ) ) + ( ((((uint64_t)x) << 56) ) | \ + ((((uint64_t)x) << 40) & 0x00ff000000000000ULL ) | \ + ((((uint64_t)x) << 24) & 0x0000ff0000000000ULL ) | \ + ((((uint64_t)x) << 8) & 0x000000ff00000000ULL ) | \ + ((((uint64_t)x) >> 8) & 0x00000000ff000000ULL ) | \ + ((((uint64_t)x) >> 24) & 0x0000000000ff0000ULL ) | \ + ((((uint64_t)x) >> 40) & 0x000000000000ff00ULL ) | \ + ((((uint64_t)x) >> 56) ) ) #endif +#define _msgpack_load16(cast, from) ((cast)( \ + (((uint16_t)((uint8_t*)(from))[0]) << 8) | \ + (((uint16_t)((uint8_t*)(from))[1]) ) )) + +#define _msgpack_load32(cast, from) ((cast)( \ + (((uint32_t)((uint8_t*)(from))[0]) << 24) | \ + (((uint32_t)((uint8_t*)(from))[1]) << 16) | \ + (((uint32_t)((uint8_t*)(from))[2]) << 8) | \ + (((uint32_t)((uint8_t*)(from))[3]) ) )) + +#define _msgpack_load64(cast, from) ((cast)( \ + (((uint64_t)((uint8_t*)(from))[0]) << 56) | \ + (((uint64_t)((uint8_t*)(from))[1]) << 48) | \ + (((uint64_t)((uint8_t*)(from))[2]) << 40) | \ + (((uint64_t)((uint8_t*)(from))[3]) << 32) | \ + (((uint64_t)((uint8_t*)(from))[4]) << 24) | \ + (((uint64_t)((uint8_t*)(from))[5]) << 16) | \ + (((uint64_t)((uint8_t*)(from))[6]) << 8) | \ + (((uint64_t)((uint8_t*)(from))[7]) ) )) + #else + #define _msgpack_be16(x) (x) #define _msgpack_be32(x) (x) #define _msgpack_be64(x) (x) + +#define _msgpack_load16(cast, from) ((cast)( \ + (((uint16_t)((uint8_t*)from)[0]) << 8) | \ + (((uint16_t)((uint8_t*)from)[1]) ) )) + +#define _msgpack_load32(cast, from) ((cast)( \ + (((uint32_t)((uint8_t*)from)[0]) << 24) | \ + (((uint32_t)((uint8_t*)from)[1]) << 16) | \ + (((uint32_t)((uint8_t*)from)[2]) << 8) | \ + (((uint32_t)((uint8_t*)from)[3]) ) )) + +#define _msgpack_load64(cast, from) ((cast)( \ + (((uint64_t)((uint8_t*)from)[0]) << 56) | \ + (((uint64_t)((uint8_t*)from)[1]) << 48) | \ + (((uint64_t)((uint8_t*)from)[2]) << 40) | \ + (((uint64_t)((uint8_t*)from)[3]) << 32) | \ + (((uint64_t)((uint8_t*)from)[4]) << 24) | \ + (((uint64_t)((uint8_t*)from)[5]) << 16) | \ + (((uint64_t)((uint8_t*)from)[6]) << 8) | \ + (((uint64_t)((uint8_t*)from)[7]) ) )) #endif -#endif /* msgpack/sysdep.h */ +#define _msgpack_store16(to, num) \ + do { uint16_t val = _msgpack_be16(num); memcpy(to, &val, 2); } while(0) +#define _msgpack_store32(to, num) \ + do { uint32_t val = _msgpack_be32(num); memcpy(to, &val, 4); } while(0) +#define _msgpack_store64(to, num) \ + do { uint64_t val = _msgpack_be64(num); memcpy(to, &val, 8); } while(0) +/* +#define _msgpack_load16(cast, from) \ + ({ cast val; memcpy(&val, (char*)from, 2); _msgpack_be16(val); }) +#define _msgpack_load32(cast, from) \ + ({ cast val; memcpy(&val, (char*)from, 4); _msgpack_be32(val); }) +#define _msgpack_load64(cast, from) \ + ({ cast val; memcpy(&val, (char*)from, 8); _msgpack_be64(val); }) +*/ + + +#endif /* msgpack/sysdep.h */ diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 0586ca86..58a2f4f5 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -16,106 +16,104 @@ * limitations under the License. */ -#define MSGPACK_MAX_STACK_SIZE (1024) +#define MSGPACK_EMBED_STACK_SIZE (1024) #include "unpack_define.h" typedef struct unpack_user { - int use_list; + bool use_list; + bool raw; + bool has_pairs_hook; + bool strict_map_key; + int timestamp; PyObject *object_hook; PyObject *list_hook; - const char *encoding; + PyObject *ext_hook; + PyObject *timestamp_t; + PyObject *giga; + PyObject *utc; const char *unicode_errors; + Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len; } unpack_user; +typedef PyObject* msgpack_unpack_object; +struct unpack_context; +typedef struct unpack_context unpack_context; +typedef int (*execute_fn)(unpack_context *ctx, const char* data, Py_ssize_t len, Py_ssize_t* off); -#define msgpack_unpack_struct(name) \ - struct template ## name - -#define msgpack_unpack_func(ret, name) \ - static inline ret template ## name - -#define msgpack_unpack_callback(name) \ - template_callback ## name - -#define msgpack_unpack_object PyObject* - -#define msgpack_unpack_user unpack_user - - -struct template_context; -typedef struct template_context template_context; - -static inline msgpack_unpack_object template_callback_root(unpack_user* u) +static inline msgpack_unpack_object unpack_callback_root(unpack_user* u) { return NULL; } -static inline int template_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) +static inline int unpack_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromLong((long)d); + PyObject *p = PyLong_FromLong((long)d); if (!p) return -1; *o = p; return 0; } -static inline int template_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpack_object* o) +static inline int unpack_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpack_object* o) { - return template_callback_uint16(u, d, o); + return unpack_callback_uint16(u, d, o); } -static inline int template_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) +static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) { - PyObject *p; - if (d > LONG_MAX) { - p = PyLong_FromUnsignedLong((unsigned long)d); - } else { - p = PyInt_FromLong((long)d); - } + PyObject *p = PyLong_FromSize_t((size_t)d); if (!p) return -1; *o = p; return 0; } -static inline int template_callback_uint64(unpack_user* u, uint64_t d, msgpack_unpack_object* o) +static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unpack_object* o) { - PyObject *p = PyLong_FromUnsignedLongLong(d); + PyObject *p; + if (d > LONG_MAX) { + p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); + } else { + p = PyLong_FromLong((long)d); + } if (!p) return -1; *o = p; return 0; } -static inline int template_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) +static inline int unpack_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromLong(d); + PyObject *p = PyLong_FromLong(d); if (!p) return -1; *o = p; return 0; } -static inline int template_callback_int16(unpack_user* u, int16_t d, msgpack_unpack_object* o) +static inline int unpack_callback_int16(unpack_user* u, int16_t d, msgpack_unpack_object* o) { - return template_callback_int32(u, d, o); + return unpack_callback_int32(u, d, o); } -static inline int template_callback_int8(unpack_user* u, int8_t d, msgpack_unpack_object* o) +static inline int unpack_callback_int8(unpack_user* u, int8_t d, msgpack_unpack_object* o) { - return template_callback_int32(u, d, o); + return unpack_callback_int32(u, d, o); } -static inline int template_callback_int64(unpack_user* u, int64_t d, msgpack_unpack_object* o) +static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpack_object* o) { - PyObject *p = PyLong_FromLongLong(d); - if (!p) - return -1; + PyObject *p; + if (d > LONG_MAX || d < LONG_MIN) { + p = PyLong_FromLongLong((PY_LONG_LONG)d); + } else { + p = PyLong_FromLong((long)d); + } *o = p; return 0; } -static inline int template_callback_double(unpack_user* u, double d, msgpack_unpack_object* o) +static inline int unpack_callback_double(unpack_user* u, double d, msgpack_unpack_object* o) { PyObject *p = PyFloat_FromDouble(d); if (!p) @@ -124,22 +122,26 @@ static inline int template_callback_double(unpack_user* u, double d, msgpack_unp return 0; } -static inline int template_callback_float(unpack_user* u, float d, msgpack_unpack_object* o) +static inline int unpack_callback_float(unpack_user* u, float d, msgpack_unpack_object* o) { - return template_callback_double(u, d, o); + return unpack_callback_double(u, d, o); } -static inline int template_callback_nil(unpack_user* u, msgpack_unpack_object* o) +static inline int unpack_callback_nil(unpack_user* u, msgpack_unpack_object* o) { Py_INCREF(Py_None); *o = Py_None; return 0; } -static inline int template_callback_true(unpack_user* u, msgpack_unpack_object* o) +static inline int unpack_callback_true(unpack_user* u, msgpack_unpack_object* o) { Py_INCREF(Py_True); *o = Py_True; return 0; } -static inline int template_callback_false(unpack_user* u, msgpack_unpack_object* o) +static inline int unpack_callback_false(unpack_user* u, msgpack_unpack_object* o) { Py_INCREF(Py_False); *o = Py_False; return 0; } -static inline int template_callback_array(unpack_user* u, unsigned int n, msgpack_unpack_object* o) +static inline int unpack_callback_array(unpack_user* u, unsigned int n, msgpack_unpack_object* o) { + if (n > u->max_array_len) { + PyErr_Format(PyExc_ValueError, "%u exceeds max_array_len(%zd)", n, u->max_array_len); + return -1; + } PyObject *p = u->use_list ? PyList_New(n) : PyTuple_New(n); if (!p) @@ -148,7 +150,7 @@ static inline int template_callback_array(unpack_user* u, unsigned int n, msgpac return 0; } -static inline int template_callback_array_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object o) +static inline int unpack_callback_array_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object o) { if (u->use_list) PyList_SET_ITEM(*c, current, o); @@ -157,28 +159,56 @@ static inline int template_callback_array_item(unpack_user* u, unsigned int curr return 0; } -static inline int template_callback_array_end(unpack_user* u, msgpack_unpack_object* c) +static inline int unpack_callback_array_end(unpack_user* u, msgpack_unpack_object* c) { if (u->list_hook) { - PyObject *arglist = Py_BuildValue("(O)", *c); - *c = PyEval_CallObject(u->list_hook, arglist); - Py_DECREF(arglist); + PyObject *new_c = PyObject_CallFunctionObjArgs(u->list_hook, *c, NULL); + if (!new_c) + return -1; + Py_DECREF(*c); + *c = new_c; } return 0; } -static inline int template_callback_map(unpack_user* u, unsigned int n, msgpack_unpack_object* o) +static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_unpack_object* o) { - PyObject *p = PyDict_New(); + if (n > u->max_map_len) { + PyErr_Format(PyExc_ValueError, "%u exceeds max_map_len(%zd)", n, u->max_map_len); + return -1; + } + PyObject *p; + if (u->has_pairs_hook) { + p = PyList_New(n); // Or use tuple? + } + else { + p = PyDict_New(); + } if (!p) return -1; *o = p; return 0; } -static inline int template_callback_map_item(unpack_user* u, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) +static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) { - if (PyDict_SetItem(*c, k, v) == 0) { + if (u->strict_map_key && !PyUnicode_CheckExact(k) && !PyBytes_CheckExact(k)) { + PyErr_Format(PyExc_ValueError, "%.100s is not allowed for map key when strict_map_key=True", Py_TYPE(k)->tp_name); + return -1; + } + if (PyUnicode_CheckExact(k)) { + PyUnicode_InternInPlace(&k); + } + if (u->has_pairs_hook) { + msgpack_unpack_object item = PyTuple_Pack(2, k, v); + if (!item) + return -1; + Py_DECREF(k); + Py_DECREF(v); + PyList_SET_ITEM(*c, current, item); + return 0; + } + else if (PyDict_SetItem(*c, k, v) == 0) { Py_DECREF(k); Py_DECREF(v); return 0; @@ -186,23 +216,171 @@ static inline int template_callback_map_item(unpack_user* u, msgpack_unpack_obje return -1; } -static inline int template_callback_map_end(unpack_user* u, msgpack_unpack_object* c) +static inline int unpack_callback_map_end(unpack_user* u, msgpack_unpack_object* c) { if (u->object_hook) { - PyObject *arglist = Py_BuildValue("(O)", *c); - *c = PyEval_CallObject(u->object_hook, arglist); - Py_DECREF(arglist); + PyObject *new_c = PyObject_CallFunctionObjArgs(u->object_hook, *c, NULL); + if (!new_c) + return -1; + + Py_DECREF(*c); + *c = new_c; } return 0; } -static inline int template_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o) +static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o) { + if (l > u->max_str_len) { + PyErr_Format(PyExc_ValueError, "%u exceeds max_str_len(%zd)", l, u->max_str_len); + return -1; + } + PyObject *py; - if(u->encoding) { - py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors); - } else { + + if (u->raw) { py = PyBytes_FromStringAndSize(p, l); + } else { + py = PyUnicode_DecodeUTF8(p, l, u->unicode_errors); + } + if (!py) + return -1; + *o = py; + return 0; +} + +static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o) +{ + if (l > u->max_bin_len) { + PyErr_Format(PyExc_ValueError, "%u exceeds max_bin_len(%zd)", l, u->max_bin_len); + return -1; + } + + PyObject *py = PyBytes_FromStringAndSize(p, l); + if (!py) + return -1; + *o = py; + return 0; +} + +typedef struct msgpack_timestamp { + int64_t tv_sec; + uint32_t tv_nsec; +} msgpack_timestamp; + +/* + * Unpack ext buffer to a timestamp. Pulled from msgpack-c timestamp.h. + */ +static int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) { + switch (buflen) { + case 4: + ts->tv_nsec = 0; + { + uint32_t v = _msgpack_load32(uint32_t, buf); + ts->tv_sec = (int64_t)v; + } + return 0; + case 8: { + uint64_t value =_msgpack_load64(uint64_t, buf); + ts->tv_nsec = (uint32_t)(value >> 34); + ts->tv_sec = value & 0x00000003ffffffffLL; + return 0; + } + case 12: + ts->tv_nsec = _msgpack_load32(uint32_t, buf); + ts->tv_sec = _msgpack_load64(int64_t, buf + 4); + return 0; + default: + return -1; + } +} + +#include "datetime.h" + +static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, + unsigned int length, msgpack_unpack_object* o) +{ + int8_t typecode = (int8_t)*pos++; + if (!u->ext_hook) { + PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL"); + return -1; + } + if (length-1 > u->max_ext_len) { + PyErr_Format(PyExc_ValueError, "%u exceeds max_ext_len(%zd)", length, u->max_ext_len); + return -1; + } + + PyObject *py = NULL; + // length also includes the typecode, so the actual data is length-1 + if (typecode == -1) { + msgpack_timestamp ts; + if (unpack_timestamp(pos, length-1, &ts) < 0) { + return -1; + } + + if (u->timestamp == 2) { // int + PyObject *a = PyLong_FromLongLong(ts.tv_sec); + if (a == NULL) return -1; + + PyObject *c = PyNumber_Multiply(a, u->giga); + Py_DECREF(a); + if (c == NULL) { + return -1; + } + + PyObject *b = PyLong_FromUnsignedLong(ts.tv_nsec); + if (b == NULL) { + Py_DECREF(c); + return -1; + } + + py = PyNumber_Add(c, b); + Py_DECREF(c); + Py_DECREF(b); + } + else if (u->timestamp == 0) { // Timestamp + py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec); + } + else if (u->timestamp == 3) { // datetime + // Calculate datetime using epoch + delta + // due to limitations PyDateTime_FromTimestamp on Windows with negative timestamps + PyObject *epoch = PyDateTimeAPI->DateTime_FromDateAndTime(1970, 1, 1, 0, 0, 0, 0, u->utc, PyDateTimeAPI->DateTimeType); + if (epoch == NULL) { + return -1; + } + + PyObject* d = PyDelta_FromDSU(ts.tv_sec/(24*3600), ts.tv_sec%(24*3600), ts.tv_nsec / 1000); + if (d == NULL) { + Py_DECREF(epoch); + return -1; + } + + py = PyNumber_Add(epoch, d); + + Py_DECREF(epoch); + Py_DECREF(d); + } + else { // float + PyObject *a = PyFloat_FromDouble((double)ts.tv_nsec); + if (a == NULL) return -1; + + PyObject *b = PyNumber_TrueDivide(a, u->giga); + Py_DECREF(a); + if (b == NULL) return -1; + + PyObject *c = PyLong_FromLongLong(ts.tv_sec); + if (c == NULL) { + Py_DECREF(b); + return -1; + } + + a = PyNumber_Add(b, c); + Py_DECREF(b); + Py_DECREF(c); + py = a; + } + } else { + py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); } if (!py) return -1; diff --git a/msgpack/unpack_container_header.h b/msgpack/unpack_container_header.h new file mode 100644 index 00000000..c14a3c2b --- /dev/null +++ b/msgpack/unpack_container_header.h @@ -0,0 +1,51 @@ +static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +{ + assert(len >= *off); + uint32_t size; + const unsigned char *const p = (unsigned char*)data + *off; + +#define inc_offset(inc) \ + if (len - *off < inc) \ + return 0; \ + *off += inc; + + switch (*p) { + case var_offset: + inc_offset(3); + size = _msgpack_load16(uint16_t, p + 1); + break; + case var_offset + 1: + inc_offset(5); + size = _msgpack_load32(uint32_t, p + 1); + break; +#ifdef USE_CASE_RANGE + case fixed_offset + 0x0 ... fixed_offset + 0xf: +#else + case fixed_offset + 0x0: + case fixed_offset + 0x1: + case fixed_offset + 0x2: + case fixed_offset + 0x3: + case fixed_offset + 0x4: + case fixed_offset + 0x5: + case fixed_offset + 0x6: + case fixed_offset + 0x7: + case fixed_offset + 0x8: + case fixed_offset + 0x9: + case fixed_offset + 0xa: + case fixed_offset + 0xb: + case fixed_offset + 0xc: + case fixed_offset + 0xd: + case fixed_offset + 0xe: + case fixed_offset + 0xf: +#endif + ++*off; + size = ((unsigned int)*p) & 0x0f; + break; + default: + PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); + return -1; + } + unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); + return 1; +} + diff --git a/msgpack/unpack_define.h b/msgpack/unpack_define.h index 63d90a8e..0dd708d1 100644 --- a/msgpack/unpack_define.h +++ b/msgpack/unpack_define.h @@ -1,7 +1,7 @@ /* * MessagePack unpacking routine template * - * Copyright (C) 2008-2009 FURUHASHI Sadayuki + * Copyright (C) 2008-2010 FURUHASHI Sadayuki * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,8 @@ #ifndef MSGPACK_UNPACK_DEFINE_H__ #define MSGPACK_UNPACK_DEFINE_H__ -#include "sysdep.h" +#include "msgpack/sysdep.h" +#include #include #include #include @@ -28,59 +29,62 @@ extern "C" { #endif -#ifndef MSGPACK_MAX_STACK_SIZE -#define MSGPACK_MAX_STACK_SIZE 16 +#ifndef MSGPACK_EMBED_STACK_SIZE +#define MSGPACK_EMBED_STACK_SIZE 32 #endif +// CS is first byte & 0x1f typedef enum { - CS_HEADER = 0x00, // nil - - //CS_ = 0x01, - //CS_ = 0x02, // false - //CS_ = 0x03, // true - - //CS_ = 0x04, - //CS_ = 0x05, - //CS_ = 0x06, - //CS_ = 0x07, - - //CS_ = 0x08, - //CS_ = 0x09, - CS_FLOAT = 0x0a, - CS_DOUBLE = 0x0b, - CS_UINT_8 = 0x0c, - CS_UINT_16 = 0x0d, - CS_UINT_32 = 0x0e, - CS_UINT_64 = 0x0f, - CS_INT_8 = 0x10, - CS_INT_16 = 0x11, - CS_INT_32 = 0x12, - CS_INT_64 = 0x13, - - //CS_ = 0x14, - //CS_ = 0x15, - //CS_BIG_INT_16 = 0x16, - //CS_BIG_INT_32 = 0x17, - //CS_BIG_FLOAT_16 = 0x18, - //CS_BIG_FLOAT_32 = 0x19, - CS_RAW_16 = 0x1a, - CS_RAW_32 = 0x1b, - CS_ARRAY_16 = 0x1c, - CS_ARRAY_32 = 0x1d, - CS_MAP_16 = 0x1e, - CS_MAP_32 = 0x1f, - - //ACS_BIG_INT_VALUE, - //ACS_BIG_FLOAT_VALUE, - ACS_RAW_VALUE, + CS_HEADER = 0x00, // nil + + //CS_ = 0x01, + //CS_ = 0x02, // false + //CS_ = 0x03, // true + + CS_BIN_8 = 0x04, + CS_BIN_16 = 0x05, + CS_BIN_32 = 0x06, + + CS_EXT_8 = 0x07, + CS_EXT_16 = 0x08, + CS_EXT_32 = 0x09, + + CS_FLOAT = 0x0a, + CS_DOUBLE = 0x0b, + CS_UINT_8 = 0x0c, + CS_UINT_16 = 0x0d, + CS_UINT_32 = 0x0e, + CS_UINT_64 = 0x0f, + CS_INT_8 = 0x10, + CS_INT_16 = 0x11, + CS_INT_32 = 0x12, + CS_INT_64 = 0x13, + + //CS_FIXEXT1 = 0x14, + //CS_FIXEXT2 = 0x15, + //CS_FIXEXT4 = 0x16, + //CS_FIXEXT8 = 0x17, + //CS_FIXEXT16 = 0x18, + + CS_RAW_8 = 0x19, + CS_RAW_16 = 0x1a, + CS_RAW_32 = 0x1b, + CS_ARRAY_16 = 0x1c, + CS_ARRAY_32 = 0x1d, + CS_MAP_16 = 0x1e, + CS_MAP_32 = 0x1f, + + ACS_RAW_VALUE, + ACS_BIN_VALUE, + ACS_EXT_VALUE, } msgpack_unpack_state; typedef enum { - CT_ARRAY_ITEM, - CT_MAP_KEY, - CT_MAP_VALUE, + CT_ARRAY_ITEM, + CT_MAP_KEY, + CT_MAP_VALUE, } msgpack_container_type; @@ -89,4 +93,3 @@ typedef enum { #endif #endif /* msgpack/unpack_define.h */ - diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 42fe3a13..cce29e7a 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -1,7 +1,7 @@ /* * MessagePack unpacking routine template * - * Copyright (C) 2008-2009 FURUHASHI Sadayuki + * Copyright (C) 2008-2010 FURUHASHI Sadayuki * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,130 +16,124 @@ * limitations under the License. */ -#ifndef msgpack_unpack_func -#error msgpack_unpack_func template is not defined -#endif - -#ifndef msgpack_unpack_callback -#error msgpack_unpack_callback template is not defined -#endif - -#ifndef msgpack_unpack_struct -#error msgpack_unpack_struct template is not defined -#endif - -#ifndef msgpack_unpack_struct_decl -#define msgpack_unpack_struct_decl(name) msgpack_unpack_struct(name) -#endif - -#ifndef msgpack_unpack_object -#error msgpack_unpack_object type is not defined -#endif - -#ifndef msgpack_unpack_user -#error msgpack_unpack_user type is not defined -#endif - #ifndef USE_CASE_RANGE #if !defined(_MSC_VER) #define USE_CASE_RANGE #endif #endif -msgpack_unpack_struct_decl(_stack) { - msgpack_unpack_object obj; - size_t count; - unsigned int ct; - - union { - size_t curr; - msgpack_unpack_object map_key; - }; +typedef struct unpack_stack { + PyObject* obj; + Py_ssize_t size; + Py_ssize_t count; + unsigned int ct; + PyObject* map_key; +} unpack_stack; + +struct unpack_context { + unpack_user user; + unsigned int cs; + unsigned int trail; + unsigned int top; + /* + unpack_stack* stack; + unsigned int stack_size; + unpack_stack embed_stack[MSGPACK_EMBED_STACK_SIZE]; + */ + unpack_stack stack[MSGPACK_EMBED_STACK_SIZE]; }; -msgpack_unpack_struct_decl(_context) { - msgpack_unpack_user user; - unsigned int cs; - unsigned int trail; - unsigned int top; - msgpack_unpack_struct(_stack) stack[MSGPACK_MAX_STACK_SIZE]; -}; +static inline void unpack_init(unpack_context* ctx) +{ + ctx->cs = CS_HEADER; + ctx->trail = 0; + ctx->top = 0; + /* + ctx->stack = ctx->embed_stack; + ctx->stack_size = MSGPACK_EMBED_STACK_SIZE; + */ + ctx->stack[0].obj = unpack_callback_root(&ctx->user); +} -msgpack_unpack_func(void, _init)(msgpack_unpack_struct(_context)* ctx) +/* +static inline void unpack_destroy(unpack_context* ctx) { - ctx->cs = CS_HEADER; - ctx->trail = 0; - ctx->top = 0; - ctx->stack[0].obj = msgpack_unpack_callback(_root)(&ctx->user); + if(ctx->stack_size != MSGPACK_EMBED_STACK_SIZE) { + free(ctx->stack); + } } +*/ -msgpack_unpack_func(msgpack_unpack_object, _data)(msgpack_unpack_struct(_context)* ctx) +static inline PyObject* unpack_data(unpack_context* ctx) { - return (ctx)->stack[0].obj; + return (ctx)->stack[0].obj; } +static inline void unpack_clear(unpack_context *ctx) +{ + Py_CLEAR(ctx->stack[0].obj); +} -msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off) +static inline int unpack_execute(bool construct, unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) { - const unsigned char* p = (unsigned char*)data + *off; - const unsigned char* const pe = (unsigned char*)data + len; - const void* n = NULL; + assert(len >= *off); - unsigned int trail = ctx->trail; - unsigned int cs = ctx->cs; - unsigned int top = ctx->top; + const unsigned char* p = (unsigned char*)data + *off; + const unsigned char* const pe = (unsigned char*)data + len; + const void* n = p; - msgpack_unpack_struct(_stack)* stack = ctx->stack; - msgpack_unpack_user* user = &ctx->user; + unsigned int trail = ctx->trail; + unsigned int cs = ctx->cs; + unsigned int top = ctx->top; + unpack_stack* stack = ctx->stack; + /* + unsigned int stack_size = ctx->stack_size; + */ + unpack_user* user = &ctx->user; - msgpack_unpack_object obj; - msgpack_unpack_struct(_stack)* c = NULL; + PyObject* obj = NULL; + unpack_stack* c = NULL; - int ret; + int ret; - assert(len >= *off); +#define construct_cb(name) \ + construct && unpack_callback ## name #define push_simple_value(func) \ - if(msgpack_unpack_callback(func)(user, &obj) < 0) { goto _failed; } \ - goto _push + if(construct_cb(func)(user, &obj) < 0) { goto _failed; } \ + goto _push #define push_fixed_value(func, arg) \ - if(msgpack_unpack_callback(func)(user, arg, &obj) < 0) { goto _failed; } \ - goto _push + if(construct_cb(func)(user, arg, &obj) < 0) { goto _failed; } \ + goto _push #define push_variable_value(func, base, pos, len) \ - if(msgpack_unpack_callback(func)(user, \ - (const char*)base, (const char*)pos, len, &obj) < 0) { goto _failed; } \ - goto _push + if(construct_cb(func)(user, \ + (const char*)base, (const char*)pos, len, &obj) < 0) { goto _failed; } \ + goto _push #define again_fixed_trail(_cs, trail_len) \ - trail = trail_len; \ - cs = _cs; \ - goto _fixed_trail_again + trail = trail_len; \ + cs = _cs; \ + goto _fixed_trail_again #define again_fixed_trail_if_zero(_cs, trail_len, ifzero) \ - trail = trail_len; \ - if(trail == 0) { goto ifzero; } \ - cs = _cs; \ - goto _fixed_trail_again + trail = trail_len; \ + if(trail == 0) { goto ifzero; } \ + cs = _cs; \ + goto _fixed_trail_again #define start_container(func, count_, ct_) \ - if(msgpack_unpack_callback(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \ - if((count_) == 0) { obj = stack[top].obj; goto _push; } \ - if(top >= MSGPACK_MAX_STACK_SIZE) { goto _failed; } \ - stack[top].ct = ct_; \ - stack[top].curr = 0; \ - stack[top].count = count_; \ - /*printf("container %d count %d stack %d\n",stack[top].obj,count_,top);*/ \ - /*printf("stack push %d\n", top);*/ \ - ++top; \ - goto _header_again - -#define NEXT_CS(p) \ - ((unsigned int)*p & 0x1f) - -#define PTR_CAST_8(ptr) (*(uint8_t*)ptr) -#define PTR_CAST_16(ptr) _msgpack_be16(*(uint16_t*)ptr) -#define PTR_CAST_32(ptr) _msgpack_be32(*(uint32_t*)ptr) -#define PTR_CAST_64(ptr) _msgpack_be64(*(uint64_t*)ptr) + if(top >= MSGPACK_EMBED_STACK_SIZE) { ret = -3; goto _end; } \ + if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \ + if((count_) == 0) { obj = stack[top].obj; \ + if (construct_cb(func##_end)(user, &obj) < 0) { goto _failed; } \ + goto _push; } \ + stack[top].ct = ct_; \ + stack[top].size = count_; \ + stack[top].count = 0; \ + ++top; \ + goto _header_again + +#define NEXT_CS(p) ((unsigned int)*p & 0x1f) #ifdef USE_CASE_RANGE #define SWITCH_RANGE_BEGIN switch(*p) { @@ -153,224 +147,249 @@ msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const c #define SWITCH_RANGE_END } } #endif - if(p == pe) { goto _out; } - do { - switch(cs) { - case CS_HEADER: - SWITCH_RANGE_BEGIN - SWITCH_RANGE(0x00, 0x7f) // Positive Fixnum - push_fixed_value(_uint8, *(uint8_t*)p); - SWITCH_RANGE(0xe0, 0xff) // Negative Fixnum - push_fixed_value(_int8, *(int8_t*)p); - SWITCH_RANGE(0xc0, 0xdf) // Variable - switch(*p) { - case 0xc0: // nil - push_simple_value(_nil); - //case 0xc1: // string - // again_terminal_trail(NEXT_CS(p), p+1); - case 0xc2: // false - push_simple_value(_false); - case 0xc3: // true - push_simple_value(_true); - //case 0xc4: - //case 0xc5: - //case 0xc6: - //case 0xc7: - //case 0xc8: - //case 0xc9: - case 0xca: // float - case 0xcb: // double - case 0xcc: // unsigned int 8 - case 0xcd: // unsigned int 16 - case 0xce: // unsigned int 32 - case 0xcf: // unsigned int 64 - case 0xd0: // signed int 8 - case 0xd1: // signed int 16 - case 0xd2: // signed int 32 - case 0xd3: // signed int 64 - again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03)); - //case 0xd4: - //case 0xd5: - //case 0xd6: // big integer 16 - //case 0xd7: // big integer 32 - //case 0xd8: // big float 16 - //case 0xd9: // big float 32 - case 0xda: // raw 16 - case 0xdb: // raw 32 - case 0xdc: // array 16 - case 0xdd: // array 32 - case 0xde: // map 16 - case 0xdf: // map 32 - again_fixed_trail(NEXT_CS(p), 2 << (((unsigned int)*p) & 0x01)); - default: - goto _failed; - } - SWITCH_RANGE(0xa0, 0xbf) // FixRaw - again_fixed_trail_if_zero(ACS_RAW_VALUE, ((unsigned int)*p & 0x1f), _raw_zero); - SWITCH_RANGE(0x90, 0x9f) // FixArray - start_container(_array, ((unsigned int)*p) & 0x0f, CT_ARRAY_ITEM); - SWITCH_RANGE(0x80, 0x8f) // FixMap - start_container(_map, ((unsigned int)*p) & 0x0f, CT_MAP_KEY); - - SWITCH_RANGE_DEFAULT - goto _failed; - SWITCH_RANGE_END - // end CS_HEADER - - - _fixed_trail_again: - ++p; - - default: - if((size_t)(pe - p) < trail) { goto _out; } - n = p; p += trail - 1; - switch(cs) { - //case CS_ - //case CS_ - case CS_FLOAT: { - union { uint32_t num; char buf[4]; } f; - f.num = PTR_CAST_32(n); // FIXME - push_fixed_value(_float, *((float*)f.buf)); } - case CS_DOUBLE: { - union { uint64_t num; char buf[8]; } f; - f.num = PTR_CAST_64(n); // FIXME - push_fixed_value(_double, *((double*)f.buf)); } - case CS_UINT_8: - push_fixed_value(_uint8, (uint8_t)PTR_CAST_8(n)); - case CS_UINT_16: - push_fixed_value(_uint16, (uint16_t)PTR_CAST_16(n)); - case CS_UINT_32: - push_fixed_value(_uint32, (uint32_t)PTR_CAST_32(n)); - case CS_UINT_64: - push_fixed_value(_uint64, (uint64_t)PTR_CAST_64(n)); - - case CS_INT_8: - push_fixed_value(_int8, (int8_t)PTR_CAST_8(n)); - case CS_INT_16: - push_fixed_value(_int16, (int16_t)PTR_CAST_16(n)); - case CS_INT_32: - push_fixed_value(_int32, (int32_t)PTR_CAST_32(n)); - case CS_INT_64: - push_fixed_value(_int64, (int64_t)PTR_CAST_64(n)); - - //case CS_ - //case CS_ - //case CS_BIG_INT_16: - // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, (uint16_t)PTR_CAST_16(n), _big_int_zero); - //case CS_BIG_INT_32: - // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, (uint32_t)PTR_CAST_32(n), _big_int_zero); - //case ACS_BIG_INT_VALUE: - //_big_int_zero: - // // FIXME - // push_variable_value(_big_int, data, n, trail); - - //case CS_BIG_FLOAT_16: - // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, (uint16_t)PTR_CAST_16(n), _big_float_zero); - //case CS_BIG_FLOAT_32: - // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, (uint32_t)PTR_CAST_32(n), _big_float_zero); - //case ACS_BIG_FLOAT_VALUE: - //_big_float_zero: - // // FIXME - // push_variable_value(_big_float, data, n, trail); - - case CS_RAW_16: - again_fixed_trail_if_zero(ACS_RAW_VALUE, (uint16_t)PTR_CAST_16(n), _raw_zero); - case CS_RAW_32: - again_fixed_trail_if_zero(ACS_RAW_VALUE, (uint32_t)PTR_CAST_32(n), _raw_zero); - case ACS_RAW_VALUE: - _raw_zero: - push_variable_value(_raw, data, n, trail); - - case CS_ARRAY_16: - start_container(_array, (uint16_t)PTR_CAST_16(n), CT_ARRAY_ITEM); - case CS_ARRAY_32: - /* FIXME security guard */ - start_container(_array, (uint32_t)PTR_CAST_32(n), CT_ARRAY_ITEM); - - case CS_MAP_16: - start_container(_map, (uint16_t)PTR_CAST_16(n), CT_MAP_KEY); - case CS_MAP_32: - /* FIXME security guard */ - start_container(_map, (uint32_t)PTR_CAST_32(n), CT_MAP_KEY); - - default: - goto _failed; - } - } + if(p == pe) { goto _out; } + do { + switch(cs) { + case CS_HEADER: + SWITCH_RANGE_BEGIN + SWITCH_RANGE(0x00, 0x7f) // Positive Fixnum + push_fixed_value(_uint8, *(uint8_t*)p); + SWITCH_RANGE(0xe0, 0xff) // Negative Fixnum + push_fixed_value(_int8, *(int8_t*)p); + SWITCH_RANGE(0xc0, 0xdf) // Variable + switch(*p) { + case 0xc0: // nil + push_simple_value(_nil); + //case 0xc1: // never used + case 0xc2: // false + push_simple_value(_false); + case 0xc3: // true + push_simple_value(_true); + case 0xc4: // bin 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xc5: // bin 16 + again_fixed_trail(NEXT_CS(p), 2); + case 0xc6: // bin 32 + again_fixed_trail(NEXT_CS(p), 4); + case 0xc7: // ext 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xc8: // ext 16 + again_fixed_trail(NEXT_CS(p), 2); + case 0xc9: // ext 32 + again_fixed_trail(NEXT_CS(p), 4); + case 0xca: // float + case 0xcb: // double + case 0xcc: // unsigned int 8 + case 0xcd: // unsigned int 16 + case 0xce: // unsigned int 32 + case 0xcf: // unsigned int 64 + case 0xd0: // signed int 8 + case 0xd1: // signed int 16 + case 0xd2: // signed int 32 + case 0xd3: // signed int 64 + again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03)); + case 0xd4: // fixext 1 + case 0xd5: // fixext 2 + case 0xd6: // fixext 4 + case 0xd7: // fixext 8 + again_fixed_trail_if_zero(ACS_EXT_VALUE, + (1 << (((unsigned int)*p) & 0x03))+1, + _ext_zero); + case 0xd8: // fixext 16 + again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero); + case 0xd9: // str 8 + again_fixed_trail(NEXT_CS(p), 1); + case 0xda: // raw 16 + case 0xdb: // raw 32 + case 0xdc: // array 16 + case 0xdd: // array 32 + case 0xde: // map 16 + case 0xdf: // map 32 + again_fixed_trail(NEXT_CS(p), 2 << (((unsigned int)*p) & 0x01)); + default: + ret = -2; + goto _end; + } + SWITCH_RANGE(0xa0, 0xbf) // FixRaw + again_fixed_trail_if_zero(ACS_RAW_VALUE, ((unsigned int)*p & 0x1f), _raw_zero); + SWITCH_RANGE(0x90, 0x9f) // FixArray + start_container(_array, ((unsigned int)*p) & 0x0f, CT_ARRAY_ITEM); + SWITCH_RANGE(0x80, 0x8f) // FixMap + start_container(_map, ((unsigned int)*p) & 0x0f, CT_MAP_KEY); + + SWITCH_RANGE_DEFAULT + ret = -2; + goto _end; + SWITCH_RANGE_END + // end CS_HEADER + + + _fixed_trail_again: + ++p; + + default: + if((size_t)(pe - p) < trail) { goto _out; } + n = p; p += trail - 1; + switch(cs) { + case CS_EXT_8: + again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero); + case CS_EXT_16: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load16(uint16_t,n)+1, + _ext_zero); + case CS_EXT_32: + again_fixed_trail_if_zero(ACS_EXT_VALUE, + _msgpack_load32(uint32_t,n)+1, + _ext_zero); + case CS_FLOAT: { + double f; +#if PY_VERSION_HEX >= 0x030B00A7 + f = PyFloat_Unpack4((const char*)n, 0); +#else + f = _PyFloat_Unpack4((unsigned char*)n, 0); +#endif + push_fixed_value(_float, f); } + case CS_DOUBLE: { + double f; +#if PY_VERSION_HEX >= 0x030B00A7 + f = PyFloat_Unpack8((const char*)n, 0); +#else + f = _PyFloat_Unpack8((unsigned char*)n, 0); +#endif + push_fixed_value(_double, f); } + case CS_UINT_8: + push_fixed_value(_uint8, *(uint8_t*)n); + case CS_UINT_16: + push_fixed_value(_uint16, _msgpack_load16(uint16_t,n)); + case CS_UINT_32: + push_fixed_value(_uint32, _msgpack_load32(uint32_t,n)); + case CS_UINT_64: + push_fixed_value(_uint64, _msgpack_load64(uint64_t,n)); + + case CS_INT_8: + push_fixed_value(_int8, *(int8_t*)n); + case CS_INT_16: + push_fixed_value(_int16, _msgpack_load16(int16_t,n)); + case CS_INT_32: + push_fixed_value(_int32, _msgpack_load32(int32_t,n)); + case CS_INT_64: + push_fixed_value(_int64, _msgpack_load64(int64_t,n)); + + case CS_BIN_8: + again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero); + case CS_BIN_16: + again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load16(uint16_t,n), _bin_zero); + case CS_BIN_32: + again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load32(uint32_t,n), _bin_zero); + case ACS_BIN_VALUE: + _bin_zero: + push_variable_value(_bin, data, n, trail); + + case CS_RAW_8: + again_fixed_trail_if_zero(ACS_RAW_VALUE, *(uint8_t*)n, _raw_zero); + case CS_RAW_16: + again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load16(uint16_t,n), _raw_zero); + case CS_RAW_32: + again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load32(uint32_t,n), _raw_zero); + case ACS_RAW_VALUE: + _raw_zero: + push_variable_value(_raw, data, n, trail); + + case ACS_EXT_VALUE: + _ext_zero: + push_variable_value(_ext, data, n, trail); + + case CS_ARRAY_16: + start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM); + case CS_ARRAY_32: + /* FIXME security guard */ + start_container(_array, _msgpack_load32(uint32_t,n), CT_ARRAY_ITEM); + + case CS_MAP_16: + start_container(_map, _msgpack_load16(uint16_t,n), CT_MAP_KEY); + case CS_MAP_32: + /* FIXME security guard */ + start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY); + + default: + goto _failed; + } + } _push: - if(top == 0) { goto _finish; } - c = &stack[top-1]; - switch(c->ct) { - case CT_ARRAY_ITEM: - if(msgpack_unpack_callback(_array_item)(user, c->curr, &c->obj, obj) < 0) { goto _failed; } - if(++c->curr == c->count) { - msgpack_unpack_callback(_array_end)(user, &c->obj); - obj = c->obj; - --top; - /*printf("stack pop %d\n", top);*/ - goto _push; - } - goto _header_again; - case CT_MAP_KEY: - c->map_key = obj; - c->ct = CT_MAP_VALUE; - goto _header_again; - case CT_MAP_VALUE: - if(msgpack_unpack_callback(_map_item)(user, &c->obj, c->map_key, obj) < 0) { goto _failed; } - if(--c->count == 0) { - msgpack_unpack_callback(_map_end)(user, &c->obj); - obj = c->obj; - --top; - /*printf("stack pop %d\n", top);*/ - goto _push; - } - c->ct = CT_MAP_KEY; - goto _header_again; - - default: - goto _failed; - } + if(top == 0) { goto _finish; } + c = &stack[top-1]; + switch(c->ct) { + case CT_ARRAY_ITEM: + if(construct_cb(_array_item)(user, c->count, &c->obj, obj) < 0) { goto _failed; } + if(++c->count == c->size) { + obj = c->obj; + if (construct_cb(_array_end)(user, &obj) < 0) { goto _failed; } + --top; + /*printf("stack pop %d\n", top);*/ + goto _push; + } + goto _header_again; + case CT_MAP_KEY: + c->map_key = obj; + c->ct = CT_MAP_VALUE; + goto _header_again; + case CT_MAP_VALUE: + if(construct_cb(_map_item)(user, c->count, &c->obj, c->map_key, obj) < 0) { goto _failed; } + if(++c->count == c->size) { + obj = c->obj; + if (construct_cb(_map_end)(user, &obj) < 0) { goto _failed; } + --top; + /*printf("stack pop %d\n", top);*/ + goto _push; + } + c->ct = CT_MAP_KEY; + goto _header_again; + + default: + goto _failed; + } _header_again: - cs = CS_HEADER; - ++p; - } while(p != pe); - goto _out; + cs = CS_HEADER; + ++p; + } while(p != pe); + goto _out; _finish: - stack[0].obj = obj; - ++p; - ret = 1; - /*printf("-- finish --\n"); */ - goto _end; + if (!construct) + unpack_callback_nil(user, &obj); + stack[0].obj = obj; + ++p; + ret = 1; + /*printf("-- finish --\n"); */ + goto _end; _failed: - /*printf("** FAILED **\n"); */ - ret = -1; - goto _end; + /*printf("** FAILED **\n"); */ + ret = -1; + goto _end; _out: - ret = 0; - goto _end; + ret = 0; + goto _end; _end: - ctx->cs = cs; - ctx->trail = trail; - ctx->top = top; - *off = p - (const unsigned char*)data; + ctx->cs = cs; + ctx->trail = trail; + ctx->top = top; + *off = p - (const unsigned char*)data; - return ret; + return ret; +#undef construct_cb } - -#undef msgpack_unpack_func -#undef msgpack_unpack_callback -#undef msgpack_unpack_struct -#undef msgpack_unpack_object -#undef msgpack_unpack_user - +#undef NEXT_CS +#undef SWITCH_RANGE_BEGIN +#undef SWITCH_RANGE +#undef SWITCH_RANGE_DEFAULT +#undef SWITCH_RANGE_END #undef push_simple_value #undef push_fixed_value #undef push_variable_value @@ -378,9 +397,27 @@ msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const c #undef again_fixed_trail_if_zero #undef start_container -#undef NEXT_CS -#undef PTR_CAST_8 -#undef PTR_CAST_16 -#undef PTR_CAST_32 -#undef PTR_CAST_64 +static int unpack_construct(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { + return unpack_execute(1, ctx, data, len, off); +} +static int unpack_skip(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { + return unpack_execute(0, ctx, data, len, off); +} +#define unpack_container_header read_array_header +#define fixed_offset 0x90 +#define var_offset 0xdc +#include "unpack_container_header.h" +#undef unpack_container_header +#undef fixed_offset +#undef var_offset + +#define unpack_container_header read_map_header +#define fixed_offset 0x80 +#define var_offset 0xde +#include "unpack_container_header.h" +#undef unpack_container_header +#undef fixed_offset +#undef var_offset + +/* vim: set ts=4 sw=4 sts=4 expandtab */ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..c69d5a7c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,45 @@ +[build-system] +requires = ["setuptools >= 78.1.1"] +build-backend = "setuptools.build_meta" + +[project] +name = "msgpack" +dynamic = ["version"] +license = "Apache-2.0" +authors = [{name="Inada Naoki", email="songofacandy@gmail.com"}] +description = "MessagePack serializer" +readme = "README.md" +keywords = ["msgpack", "messagepack", "serializer", "serialization", "binary"] +requires-python = ">=3.10" +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Operating System :: OS Independent", + "Topic :: File Formats", + "Intended Audience :: Developers", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] + +[project.urls] +Homepage = "https://msgpack.org/" +Documentation = "https://msgpack-python.readthedocs.io/" +Repository = "https://github.com/msgpack/msgpack-python/" +Tracker = "https://github.com/msgpack/msgpack-python/issues" +Changelog = "https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst" + +[tool.setuptools] +# Do not install C/C++/Cython source files +include-package-data = false + +[tool.setuptools.dynamic] +version = {attr = "msgpack.__version__"} + +[tool.ruff] +line-length = 100 +target-version = "py310" +lint.select = [ + "E", # pycodestyle + "F", # Pyflakes + "I", # isort + #"UP", pyupgrade +] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..9e4643b6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +Cython==3.2.1 +setuptools==78.1.1 +build diff --git a/setup.py b/setup.py old mode 100755 new mode 100644 index f34e341d..4029e9ed --- a/setup.py +++ b/setup.py @@ -1,86 +1,32 @@ #!/usr/bin/env python -# coding: utf-8 -version = (0, 1, 12) - import os import sys -from glob import glob -from distutils.core import setup, Extension -from distutils.command.sdist import sdist - -try: - from Cython.Distutils import build_ext - import Cython.Compiler.Main as cython_compiler - have_cython = True -except ImportError: - from distutils.command.build_ext import build_ext - have_cython = False - -# make msgpack/__verison__.py -f = open('msgpack/__version__.py', 'w') -f.write("version = %r\n" % (version,)) -f.close() -del f - -version_str = '.'.join(str(x) for x in version[:3]) -if len(version) > 3 and version[3] != 'final': - version_str += version[3] - -# take care of extension modules. -if have_cython: - sources = ['msgpack/_msgpack.pyx'] - - class Sdist(sdist): - def __init__(self, *args, **kwargs): - for src in glob('msgpack/*.pyx'): - cython_compiler.compile(glob('msgpack/*.pyx'), - cython_compiler.default_options) - sdist.__init__(self, *args, **kwargs) -else: - sources = ['msgpack/_msgpack.c'] - for f in sources: - if not os.path.exists(f): - raise ImportError("Building msgpack from VCS needs Cython. Install Cython or use sdist package.") +from setuptools import Extension, setup - Sdist = sdist +PYPY = hasattr(sys, "pypy_version_info") libraries = [] -if sys.platform == 'win32': - libraries.append('ws2_32') - -msgpack_mod = Extension('msgpack._msgpack', - sources=sources, - libraries=libraries, - ) -del sources, libraries - - -desc = 'MessagePack (de)serializer.' -long_desc = """MessagePack (de)serializer for Python. - -What's MessagePack? (from http://msgpack.org/) - - MessagePack is a binary-based efficient data interchange format that is - focused on high performance. It is like JSON, but very fast and small. -""" - -setup(name='msgpack-python', - author='INADA Naoki', - author_email='songofacandy@gmail.com', - version=version_str, - cmdclass={'build_ext': build_ext, 'sdist': Sdist}, - ext_modules=[msgpack_mod], - packages=['msgpack'], - description=desc, - long_description=long_desc, - url='http://msgpack.org/', - download_url='http://pypi.python.org/pypi/msgpack/', - classifiers=[ - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 3', - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - ] - ) +macros = [] +ext_modules = [] + +if sys.platform == "win32": + libraries.append("ws2_32") + macros = [("__LITTLE_ENDIAN__", "1")] + +if not PYPY and not os.environ.get("MSGPACK_PUREPYTHON"): + ext_modules.append( + Extension( + "msgpack._cmsgpack", + sources=["msgpack/_cmsgpack.c"], + libraries=libraries, + include_dirs=["."], + define_macros=macros, + ) + ) +del libraries, macros + +setup( + ext_modules=ext_modules, + packages=["msgpack"], +) diff --git a/test/test_buffer.py b/test/test_buffer.py index ce7a72d2..ca097222 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -1,16 +1,49 @@ -#!/usr/bin/env python -# coding: utf-8 +from pytest import raises + +from msgpack import Packer, packb, unpackb -from nose import main -from nose.tools import * -from msgpack import packb, unpackb def test_unpack_buffer(): from array import array - buf = array('c') - buf.fromstring(packb(('foo', 'bar'))) - obj = unpackb(buf) - assert_equal(('foo', 'bar'), obj) -if __name__ == '__main__': - main() + buf = array("b") + buf.frombytes(packb((b"foo", b"bar"))) + obj = unpackb(buf, use_list=1) + assert [b"foo", b"bar"] == obj + + +def test_unpack_bytearray(): + buf = bytearray(packb((b"foo", b"bar"))) + obj = unpackb(buf, use_list=1) + assert [b"foo", b"bar"] == obj + expected_type = bytes + assert all(type(s) is expected_type for s in obj) + + +def test_unpack_memoryview(): + buf = bytearray(packb((b"foo", b"bar"))) + view = memoryview(buf) + obj = unpackb(view, use_list=1) + assert [b"foo", b"bar"] == obj + expected_type = bytes + assert all(type(s) is expected_type for s in obj) + + +def test_packer_getbuffer(): + packer = Packer(autoreset=False) + packer.pack_array_header(2) + packer.pack(42) + packer.pack("hello") + buffer = packer.getbuffer() + assert isinstance(buffer, memoryview) + assert bytes(buffer) == b"\x92*\xa5hello" + + if Packer.__module__ == "msgpack._cmsgpack": # only for Cython + # cython Packer supports buffer protocol directly + assert bytes(packer) == b"\x92*\xa5hello" + + with raises(BufferError): + packer.pack(42) + buffer.release() + packer.pack(42) + assert bytes(packer) == b"\x92*\xa5hello*" diff --git a/test/test_case.py b/test/test_case.py index 1cbc4945..c4c615e3 100644 --- a/test/test_case.py +++ b/test/test_case.py @@ -1,105 +1,136 @@ #!/usr/bin/env python -# coding: utf-8 +from msgpack import packb, unpackb -from nose import main -from nose.tools import * -from msgpack import packs, unpacks +def check(length, obj, use_bin_type=True): + v = packb(obj, use_bin_type=use_bin_type) + assert len(v) == length, f"{obj!r} length should be {length!r} but get {len(v)!r}" + assert unpackb(v, use_list=0, raw=not use_bin_type) == obj -def check(length, obj): - v = packs(obj) - assert_equal(len(v), length, "%r length should be %r but get %r" % (obj, length, len(v))) - assert_equal(unpacks(v), obj) def test_1(): - for o in [None, True, False, 0, 1, (1 << 6), (1 << 7) - 1, -1, - -((1<<5)-1), -(1<<5)]: + for o in [ + None, + True, + False, + 0, + 1, + (1 << 6), + (1 << 7) - 1, + -1, + -((1 << 5) - 1), + -(1 << 5), + ]: check(1, o) + def test_2(): - for o in [1 << 7, (1 << 8) - 1, - -((1<<5)+1), -(1<<7) - ]: + for o in [1 << 7, (1 << 8) - 1, -((1 << 5) + 1), -(1 << 7)]: check(2, o) + def test_3(): - for o in [1 << 8, (1 << 16) - 1, - -((1<<7)+1), -(1<<15)]: + for o in [1 << 8, (1 << 16) - 1, -((1 << 7) + 1), -(1 << 15)]: check(3, o) + def test_5(): - for o in [1 << 16, (1 << 32) - 1, - -((1<<15)+1), -(1<<31)]: + for o in [1 << 16, (1 << 32) - 1, -((1 << 15) + 1), -(1 << 31)]: check(5, o) + def test_9(): - for o in [1 << 32, (1 << 64) - 1, - -((1<<31)+1), -(1<<63), - 1.0, 0.1, -0.1, -1.0]: + for o in [ + 1 << 32, + (1 << 64) - 1, + -((1 << 31) + 1), + -(1 << 63), + 1.0, + 0.1, + -0.1, + -1.0, + ]: check(9, o) def check_raw(overhead, num): - check(num + overhead, " " * num) + check(num + overhead, b" " * num, use_bin_type=False) + def test_fixraw(): check_raw(1, 0) - check_raw(1, (1<<5) - 1) + check_raw(1, (1 << 5) - 1) + def test_raw16(): - check_raw(3, 1<<5) - check_raw(3, (1<<16) - 1) + check_raw(3, 1 << 5) + check_raw(3, (1 << 16) - 1) + def test_raw32(): - check_raw(5, 1<<16) + check_raw(5, 1 << 16) def check_array(overhead, num): check(num + overhead, (None,) * num) + def test_fixarray(): check_array(1, 0) check_array(1, (1 << 4) - 1) + def test_array16(): check_array(3, 1 << 4) - check_array(3, (1<<16)-1) + check_array(3, (1 << 16) - 1) + def test_array32(): - check_array(5, (1<<16)) + check_array(5, (1 << 16)) def match(obj, buf): - assert_equal(packs(obj), buf) - assert_equal(unpacks(buf), obj) + assert packb(obj) == buf + assert unpackb(buf, use_list=0, strict_map_key=False) == obj + def test_match(): cases = [ - (None, '\xc0'), - (False, '\xc2'), - (True, '\xc3'), - (0, '\x00'), - (127, '\x7f'), - (128, '\xcc\x80'), - (256, '\xcd\x01\x00'), - (-1, '\xff'), - (-33, '\xd0\xdf'), - (-129, '\xd1\xff\x7f'), - ({1:1}, '\x81\x01\x01'), - (1.0, "\xcb\x3f\xf0\x00\x00\x00\x00\x00\x00"), - ((), '\x90'), - (tuple(range(15)),"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e"), - (tuple(range(16)),"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), - ({}, '\x80'), - (dict([(x,x) for x in range(15)]), '\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e'), - (dict([(x,x) for x in range(16)]), '\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f'), - ] + (None, b"\xc0"), + (False, b"\xc2"), + (True, b"\xc3"), + (0, b"\x00"), + (127, b"\x7f"), + (128, b"\xcc\x80"), + (256, b"\xcd\x01\x00"), + (-1, b"\xff"), + (-33, b"\xd0\xdf"), + (-129, b"\xd1\xff\x7f"), + ({1: 1}, b"\x81\x01\x01"), + (1.0, b"\xcb\x3f\xf0\x00\x00\x00\x00\x00\x00"), + ((), b"\x90"), + ( + tuple(range(15)), + b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e", + ), + ( + tuple(range(16)), + b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + ), + ({}, b"\x80"), + ( + {x: x for x in range(15)}, + b"\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e", + ), + ( + {x: x for x in range(16)}, + b"\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f", + ), + ] for v, p in cases: match(v, p) -def test_unicode(): - assert_equal('foobar', unpacks(packs(u'foobar'))) -if __name__ == '__main__': - main() +def test_unicode(): + assert unpackb(packb("foobar"), use_list=1) == "foobar" diff --git a/test/test_except.py b/test/test_except.py index 574728fb..b77ac800 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -1,14 +1,63 @@ #!/usr/bin/env python -# coding: utf-8 - -from nose.tools import * -from msgpack import packs, unpacks import datetime +from pytest import raises + +from msgpack import FormatError, OutOfData, StackError, Unpacker, packb, unpackb + + +class DummyException(Exception): + pass + + def test_raise_on_find_unsupported_value(): - assert_raises(TypeError, packs, datetime.datetime.now()) + with raises(TypeError): + packb(datetime.datetime.now()) + + +def test_raise_from_object_hook(): + def hook(obj): + raise DummyException + + raises(DummyException, unpackb, packb({}), object_hook=hook) + raises(DummyException, unpackb, packb({"fizz": "buzz"}), object_hook=hook) + raises(DummyException, unpackb, packb({"fizz": "buzz"}), object_pairs_hook=hook) + raises(DummyException, unpackb, packb({"fizz": {"buzz": "spam"}}), object_hook=hook) + raises( + DummyException, + unpackb, + packb({"fizz": {"buzz": "spam"}}), + object_pairs_hook=hook, + ) + + +def test_invalidvalue(): + incomplete = b"\xd9\x97#DL_" # raw8 - length=0x97 + with raises(ValueError): + unpackb(incomplete) + + with raises(OutOfData): + unpacker = Unpacker() + unpacker.feed(incomplete) + unpacker.unpack() + + with raises(FormatError): + unpackb(b"\xc1") # (undefined tag) + + with raises(FormatError): + unpackb(b"\x91\xc1") # fixarray(len=1) [ (undefined tag) ] + + with raises(StackError): + unpackb(b"\x91" * 3000) # nested fixarray(len=1) + + +def test_strict_map_key(): + valid = {"unicode": 1, b"bytes": 2} + packed = packb(valid, use_bin_type=True) + assert valid == unpackb(packed, raw=False, strict_map_key=True) -if __name__ == '__main__': - from nose import main - main() + invalid = {42: 1} + packed = packb(invalid, use_bin_type=True) + with raises(ValueError): + unpackb(packed, raw=False, strict_map_key=True) diff --git a/test/test_extension.py b/test/test_extension.py new file mode 100644 index 00000000..aaf0fd92 --- /dev/null +++ b/test/test_extension.py @@ -0,0 +1,78 @@ +import array + +import msgpack +from msgpack import ExtType + + +def test_pack_ext_type(): + def p(s): + packer = msgpack.Packer() + packer.pack_ext_type(0x42, s) + return packer.bytes() + + assert p(b"A") == b"\xd4\x42A" # fixext 1 + assert p(b"AB") == b"\xd5\x42AB" # fixext 2 + assert p(b"ABCD") == b"\xd6\x42ABCD" # fixext 4 + assert p(b"ABCDEFGH") == b"\xd7\x42ABCDEFGH" # fixext 8 + assert p(b"A" * 16) == b"\xd8\x42" + b"A" * 16 # fixext 16 + assert p(b"ABC") == b"\xc7\x03\x42ABC" # ext 8 + assert p(b"A" * 0x0123) == b"\xc8\x01\x23\x42" + b"A" * 0x0123 # ext 16 + assert p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 # ext 32 + + +def test_unpack_ext_type(): + def check(b, expected): + assert msgpack.unpackb(b) == expected + + check(b"\xd4\x42A", ExtType(0x42, b"A")) # fixext 1 + check(b"\xd5\x42AB", ExtType(0x42, b"AB")) # fixext 2 + check(b"\xd6\x42ABCD", ExtType(0x42, b"ABCD")) # fixext 4 + check(b"\xd7\x42ABCDEFGH", ExtType(0x42, b"ABCDEFGH")) # fixext 8 + check(b"\xd8\x42" + b"A" * 16, ExtType(0x42, b"A" * 16)) # fixext 16 + check(b"\xc7\x03\x42ABC", ExtType(0x42, b"ABC")) # ext 8 + check(b"\xc8\x01\x23\x42" + b"A" * 0x0123, ExtType(0x42, b"A" * 0x0123)) # ext 16 + check( + b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345, + ExtType(0x42, b"A" * 0x00012345), + ) # ext 32 + + +def test_extension_type(): + def default(obj): + print("default called", obj) + if isinstance(obj, array.array): + typecode = 123 # application specific typecode + try: + data = obj.tobytes() + except AttributeError: + data = obj.tostring() + return ExtType(typecode, data) + raise TypeError(f"Unknown type object {obj!r}") + + def ext_hook(code, data): + print("ext_hook called", code, data) + assert code == 123 + obj = array.array("d") + obj.frombytes(data) + return obj + + obj = [42, b"hello", array.array("d", [1.1, 2.2, 3.3])] + s = msgpack.packb(obj, default=default) + obj2 = msgpack.unpackb(s, ext_hook=ext_hook) + assert obj == obj2 + + +def test_overriding_hooks(): + def default(obj): + if isinstance(obj, int): + return {"__type__": "long", "__data__": str(obj)} + else: + return obj + + obj = {"testval": 1823746192837461928374619} + refobj = {"testval": default(obj["testval"])} + refout = msgpack.packb(refobj) + assert isinstance(refout, (str, bytes)) + testout = msgpack.packb(obj, default=default) + + assert refout == testout diff --git a/test/test_format.py b/test/test_format.py index 562ef54a..c06c87dc 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -1,75 +1,88 @@ #!/usr/bin/env python -# coding: utf-8 -from nose import main -from nose.tools import * -from msgpack import unpacks +from msgpack import unpackb + + +def check(src, should, use_list=0, raw=True): + assert unpackb(src, use_list=use_list, raw=raw, strict_map_key=False) == should -def check(src, should): - assert_equal(unpacks(src), should) def testSimpleValue(): - check("\x93\xc0\xc2\xc3", - (None, False, True,)) + check(b"\x93\xc0\xc2\xc3", (None, False, True)) + def testFixnum(): - check("\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", - ((0,64,127,), (-32,-16,-1,),) - ) + check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", ((0, 64, 127), (-32, -16, -1))) + def testFixArray(): - check("\x92\x90\x91\x91\xc0", - ((),((None,),),), - ) + check(b"\x92\x90\x91\x91\xc0", ((), ((None,),))) + def testFixRaw(): - check("\x94\xa0\xa1a\xa2bc\xa3def", - ("", "a", "bc", "def",), - ) + check(b"\x94\xa0\xa1a\xa2bc\xa3def", (b"", b"a", b"bc", b"def")) + def testFixMap(): - check( - "\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", - {False: {None: None}, True:{None:{}}}, - ) + check(b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}}) + def testUnsignedInt(): check( - "\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" - "\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" - "\xce\xff\xff\xff\xff", - (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295,), - ) + b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" + b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" + b"\xce\xff\xff\xff\xff", + (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295), + ) + def testSignedInt(): - check("\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" - "\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" - "\xd2\xff\xff\xff\xff", - (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1,)) + check( + b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" + b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" + b"\xd2\xff\xff\xff\xff", + (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1), + ) + def testRaw(): - check("\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" - "\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", - ("", "a", "ab", "", "a", "ab")) + check( + b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" + b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", + (b"", b"a", b"ab", b"", b"a", b"ab"), + ) + check( + b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" + b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", + ("", "a", "ab", "", "a", "ab"), + raw=False, + ) + def testArray(): - check("\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00" - "\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02" - "\xc2\xc3", - ((), (None,), (False,True), (), (None,), (False,True)) - ) + check( + b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00" + b"\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02" + b"\xc2\xc3", + ((), (None,), (False, True), (), (None,), (False, True)), + ) + def testMap(): check( - "\x96" - "\xde\x00\x00" - "\xde\x00\x01\xc0\xc2" - "\xde\x00\x02\xc0\xc2\xc3\xc2" - "\xdf\x00\x00\x00\x00" - "\xdf\x00\x00\x00\x01\xc0\xc2" - "\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", - ({}, {None: False}, {True: False, None: False}, {}, - {None: False}, {True: False, None: False})) - -if __name__ == '__main__': - main() + b"\x96" + b"\xde\x00\x00" + b"\xde\x00\x01\xc0\xc2" + b"\xde\x00\x02\xc0\xc2\xc3\xc2" + b"\xdf\x00\x00\x00\x00" + b"\xdf\x00\x00\x00\x01\xc0\xc2" + b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", + ( + {}, + {None: False}, + {True: False, None: False}, + {}, + {None: False}, + {True: False, None: False}, + ), + ) diff --git a/test/test_limits.py b/test/test_limits.py new file mode 100644 index 00000000..9b92b4d9 --- /dev/null +++ b/test/test_limits.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python +import pytest + +from msgpack import ( + ExtType, + Packer, + PackOverflowError, + PackValueError, + Unpacker, + UnpackValueError, + packb, + unpackb, +) + + +def test_integer(): + x = -(2**63) + assert unpackb(packb(x)) == x + with pytest.raises(PackOverflowError): + packb(x - 1) + + x = 2**64 - 1 + assert unpackb(packb(x)) == x + with pytest.raises(PackOverflowError): + packb(x + 1) + + +def test_array_header(): + packer = Packer() + packer.pack_array_header(2**32 - 1) + with pytest.raises(PackValueError): + packer.pack_array_header(2**32) + + +def test_map_header(): + packer = Packer() + packer.pack_map_header(2**32 - 1) + with pytest.raises(PackValueError): + packer.pack_array_header(2**32) + + +def test_max_str_len(): + d = "x" * 3 + packed = packb(d) + + unpacker = Unpacker(max_str_len=3, raw=False) + unpacker.feed(packed) + assert unpacker.unpack() == d + + unpacker = Unpacker(max_str_len=2, raw=False) + with pytest.raises(UnpackValueError): + unpacker.feed(packed) + unpacker.unpack() + + +def test_max_bin_len(): + d = b"x" * 3 + packed = packb(d, use_bin_type=True) + + unpacker = Unpacker(max_bin_len=3) + unpacker.feed(packed) + assert unpacker.unpack() == d + + unpacker = Unpacker(max_bin_len=2) + with pytest.raises(UnpackValueError): + unpacker.feed(packed) + unpacker.unpack() + + +def test_max_array_len(): + d = [1, 2, 3] + packed = packb(d) + + unpacker = Unpacker(max_array_len=3) + unpacker.feed(packed) + assert unpacker.unpack() == d + + unpacker = Unpacker(max_array_len=2) + with pytest.raises(UnpackValueError): + unpacker.feed(packed) + unpacker.unpack() + + +def test_max_map_len(): + d = {1: 2, 3: 4, 5: 6} + packed = packb(d) + + unpacker = Unpacker(max_map_len=3, strict_map_key=False) + unpacker.feed(packed) + assert unpacker.unpack() == d + + unpacker = Unpacker(max_map_len=2, strict_map_key=False) + with pytest.raises(UnpackValueError): + unpacker.feed(packed) + unpacker.unpack() + + +def test_max_ext_len(): + d = ExtType(42, b"abc") + packed = packb(d) + + unpacker = Unpacker(max_ext_len=3) + unpacker.feed(packed) + assert unpacker.unpack() == d + + unpacker = Unpacker(max_ext_len=2) + with pytest.raises(UnpackValueError): + unpacker.feed(packed) + unpacker.unpack() + + +# PyPy fails following tests because of constant folding? +# https://bugs.pypy.org/issue1721 +# @pytest.mark.skipif(True, reason="Requires very large memory.") +# def test_binary(): +# x = b'x' * (2**32 - 1) +# assert unpackb(packb(x)) == x +# del x +# x = b'x' * (2**32) +# with pytest.raises(ValueError): +# packb(x) +# +# +# @pytest.mark.skipif(True, reason="Requires very large memory.") +# def test_string(): +# x = 'x' * (2**32 - 1) +# assert unpackb(packb(x)) == x +# x += 'y' +# with pytest.raises(ValueError): +# packb(x) +# +# +# @pytest.mark.skipif(True, reason="Requires very large memory.") +# def test_array(): +# x = [0] * (2**32 - 1) +# assert unpackb(packb(x)) == x +# x.append(0) +# with pytest.raises(ValueError): +# packb(x) + + +# auto max len + + +def test_auto_max_array_len(): + packed = b"\xde\x00\x06zz" + with pytest.raises(UnpackValueError): + unpackb(packed, raw=False) + + unpacker = Unpacker(max_buffer_size=5, raw=False) + unpacker.feed(packed) + with pytest.raises(UnpackValueError): + unpacker.unpack() + + +def test_auto_max_map_len(): + # len(packed) == 6 -> max_map_len == 3 + packed = b"\xde\x00\x04zzz" + with pytest.raises(UnpackValueError): + unpackb(packed, raw=False) + + unpacker = Unpacker(max_buffer_size=6, raw=False) + unpacker.feed(packed) + with pytest.raises(UnpackValueError): + unpacker.unpack() diff --git a/test/test_memoryview.py b/test/test_memoryview.py new file mode 100644 index 00000000..0a2a6f53 --- /dev/null +++ b/test/test_memoryview.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python + +from array import array + +from msgpack import packb, unpackb + + +def make_array(f, data): + a = array(f) + a.frombytes(data) + return a + + +def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type): + # create a new array + original_array = array(format) + original_array.fromlist([255] * (nbytes // original_array.itemsize)) + original_data = original_array.tobytes() + view = memoryview(original_array) + + # pack, unpack, and reconstruct array + packed = packb(view, use_bin_type=use_bin_type) + unpacked = unpackb(packed, raw=(not use_bin_type)) + reconstructed_array = make_array(format, unpacked) + + # check that we got the right amount of data + assert len(original_data) == nbytes + # check packed header + assert packed[:1] == expected_header + # check packed length prefix, if any + assert packed[1 : 1 + len(expected_prefix)] == expected_prefix + # check packed data + assert packed[1 + len(expected_prefix) :] == original_data + # check array unpacked correctly + assert original_array == reconstructed_array + + +def test_fixstr_from_byte(): + _runtest("B", 1, b"\xa1", b"", False) + _runtest("B", 31, b"\xbf", b"", False) + + +def test_fixstr_from_float(): + _runtest("f", 4, b"\xa4", b"", False) + _runtest("f", 28, b"\xbc", b"", False) + + +def test_str16_from_byte(): + _runtest("B", 2**8, b"\xda", b"\x01\x00", False) + _runtest("B", 2**16 - 1, b"\xda", b"\xff\xff", False) + + +def test_str16_from_float(): + _runtest("f", 2**8, b"\xda", b"\x01\x00", False) + _runtest("f", 2**16 - 4, b"\xda", b"\xff\xfc", False) + + +def test_str32_from_byte(): + _runtest("B", 2**16, b"\xdb", b"\x00\x01\x00\x00", False) + + +def test_str32_from_float(): + _runtest("f", 2**16, b"\xdb", b"\x00\x01\x00\x00", False) + + +def test_bin8_from_byte(): + _runtest("B", 1, b"\xc4", b"\x01", True) + _runtest("B", 2**8 - 1, b"\xc4", b"\xff", True) + + +def test_bin8_from_float(): + _runtest("f", 4, b"\xc4", b"\x04", True) + _runtest("f", 2**8 - 4, b"\xc4", b"\xfc", True) + + +def test_bin16_from_byte(): + _runtest("B", 2**8, b"\xc5", b"\x01\x00", True) + _runtest("B", 2**16 - 1, b"\xc5", b"\xff\xff", True) + + +def test_bin16_from_float(): + _runtest("f", 2**8, b"\xc5", b"\x01\x00", True) + _runtest("f", 2**16 - 4, b"\xc5", b"\xff\xfc", True) + + +def test_bin32_from_byte(): + _runtest("B", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) + + +def test_bin32_from_float(): + _runtest("f", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) + + +def test_multidim_memoryview(): + # See https://github.com/msgpack/msgpack-python/issues/526 + view = memoryview(b"\00" * 6) + data = view.cast(view.format, (3, 2)) + packed = packb(data) + assert packed == b"\xc4\x06\x00\x00\x00\x00\x00\x00" diff --git a/test/test_newspec.py b/test/test_newspec.py new file mode 100644 index 00000000..9e2f9be5 --- /dev/null +++ b/test/test_newspec.py @@ -0,0 +1,90 @@ +from msgpack import ExtType, packb, unpackb + + +def test_str8(): + header = b"\xd9" + data = b"x" * 32 + b = packb(data.decode(), use_bin_type=True) + assert len(b) == len(data) + 2 + assert b[0:2] == header + b"\x20" + assert b[2:] == data + assert unpackb(b, raw=True) == data + assert unpackb(b, raw=False) == data.decode() + + data = b"x" * 255 + b = packb(data.decode(), use_bin_type=True) + assert len(b) == len(data) + 2 + assert b[0:2] == header + b"\xff" + assert b[2:] == data + assert unpackb(b, raw=True) == data + assert unpackb(b, raw=False) == data.decode() + + +def test_bin8(): + header = b"\xc4" + data = b"" + b = packb(data, use_bin_type=True) + assert len(b) == len(data) + 2 + assert b[0:2] == header + b"\x00" + assert b[2:] == data + assert unpackb(b) == data + + data = b"x" * 255 + b = packb(data, use_bin_type=True) + assert len(b) == len(data) + 2 + assert b[0:2] == header + b"\xff" + assert b[2:] == data + assert unpackb(b) == data + + +def test_bin16(): + header = b"\xc5" + data = b"x" * 256 + b = packb(data, use_bin_type=True) + assert len(b) == len(data) + 3 + assert b[0:1] == header + assert b[1:3] == b"\x01\x00" + assert b[3:] == data + assert unpackb(b) == data + + data = b"x" * 65535 + b = packb(data, use_bin_type=True) + assert len(b) == len(data) + 3 + assert b[0:1] == header + assert b[1:3] == b"\xff\xff" + assert b[3:] == data + assert unpackb(b) == data + + +def test_bin32(): + header = b"\xc6" + data = b"x" * 65536 + b = packb(data, use_bin_type=True) + assert len(b) == len(data) + 5 + assert b[0:1] == header + assert b[1:5] == b"\x00\x01\x00\x00" + assert b[5:] == data + assert unpackb(b) == data + + +def test_ext(): + def check(ext, packed): + assert packb(ext) == packed + assert unpackb(packed) == ext + + check(ExtType(0x42, b"Z"), b"\xd4\x42Z") # fixext 1 + check(ExtType(0x42, b"ZZ"), b"\xd5\x42ZZ") # fixext 2 + check(ExtType(0x42, b"Z" * 4), b"\xd6\x42" + b"Z" * 4) # fixext 4 + check(ExtType(0x42, b"Z" * 8), b"\xd7\x42" + b"Z" * 8) # fixext 8 + check(ExtType(0x42, b"Z" * 16), b"\xd8\x42" + b"Z" * 16) # fixext 16 + # ext 8 + check(ExtType(0x42, b""), b"\xc7\x00\x42") + check(ExtType(0x42, b"Z" * 255), b"\xc7\xff\x42" + b"Z" * 255) + # ext 16 + check(ExtType(0x42, b"Z" * 256), b"\xc8\x01\x00\x42" + b"Z" * 256) + check(ExtType(0x42, b"Z" * 0xFFFF), b"\xc8\xff\xff\x42" + b"Z" * 0xFFFF) + # ext 32 + check(ExtType(0x42, b"Z" * 0x10000), b"\xc9\x00\x01\x00\x00\x42" + b"Z" * 0x10000) + # needs large memory + # check(ExtType(0x42, b'Z'*0xffffffff), + # b'\xc9\xff\xff\xff\xff\x42' + b'Z'*0xffffffff) diff --git a/test/test_obj.py b/test/test_obj.py index 0eebe7be..23be06d5 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -1,46 +1,82 @@ #!/usr/bin/env python -# coding: utf-8 -from nose import main -from nose.tools import * +from pytest import raises + +from msgpack import packb, unpackb -from msgpack import packs, unpacks def _decode_complex(obj): - if '__complex__' in obj: - return complex(obj['real'], obj['imag']) + if b"__complex__" in obj: + return complex(obj[b"real"], obj[b"imag"]) return obj + def _encode_complex(obj): if isinstance(obj, complex): - return {'__complex__': True, 'real': 1, 'imag': 2} + return {b"__complex__": True, b"real": 1, b"imag": 2} return obj + def test_encode_hook(): - packed = packs([3, 1+2j], default=_encode_complex) - unpacked = unpacks(packed) - eq_(unpacked[1], {'__complex__': True, 'real': 1, 'imag': 2}) + packed = packb([3, 1 + 2j], default=_encode_complex) + unpacked = unpackb(packed, use_list=1) + assert unpacked[1] == {b"__complex__": True, b"real": 1, b"imag": 2} + def test_decode_hook(): - packed = packs([3, {'__complex__': True, 'real': 1, 'imag': 2}]) - unpacked = unpacks(packed, object_hook=_decode_complex) - eq_(unpacked[1], 1+2j) + packed = packb([3, {b"__complex__": True, b"real": 1, b"imag": 2}]) + unpacked = unpackb(packed, object_hook=_decode_complex, use_list=1) + assert unpacked[1] == 1 + 2j + + +def test_decode_pairs_hook(): + packed = packb([3, {1: 2, 3: 4}]) + prod_sum = 1 * 2 + 3 * 4 + unpacked = unpackb( + packed, + object_pairs_hook=lambda lst: sum(k * v for k, v in lst), + use_list=1, + strict_map_key=False, + ) + assert unpacked[1] == prod_sum + + +def test_only_one_obj_hook(): + with raises(TypeError): + unpackb(b"", object_hook=lambda x: x, object_pairs_hook=lambda x: x) + -@raises(ValueError) def test_bad_hook(): - packed = packs([3, 1+2j], default=lambda o: o) - unpacked = unpacks(packed) + with raises(TypeError): + packed = packb([3, 1 + 2j], default=lambda o: o) + unpackb(packed, use_list=1) + def _arr_to_str(arr): - return ''.join(str(c) for c in arr) + return "".join(str(c) for c in arr) + def test_array_hook(): - packed = packs([1,2,3]) - unpacked = unpacks(packed, list_hook=_arr_to_str) - eq_(unpacked, '123') - -if __name__ == '__main__': - test_decode_hook() - test_encode_hook() - test_bad_hook() - test_array_hook() + packed = packb([1, 2, 3]) + unpacked = unpackb(packed, list_hook=_arr_to_str, use_list=1) + assert unpacked == "123" + + +class DecodeError(Exception): + pass + + +def bad_complex_decoder(o): + raise DecodeError("Ooops!") + + +def test_an_exception_in_objecthook1(): + with raises(DecodeError): + packed = packb({1: {"__complex__": True, "real": 1, "imag": 2}}) + unpackb(packed, object_hook=bad_complex_decoder, strict_map_key=False) + + +def test_an_exception_in_objecthook2(): + with raises(DecodeError): + packed = packb({1: [{"__complex__": True, "real": 1, "imag": 2}]}) + unpackb(packed, list_hook=bad_complex_decoder, use_list=1, strict_map_key=False) diff --git a/test/test_pack.py b/test/test_pack.py index 2e1ac224..374d1549 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -1,88 +1,181 @@ #!/usr/bin/env python -# coding: utf-8 -from nose import main -from nose.tools import * -from nose.plugins.skip import SkipTest +import struct +from collections import OrderedDict +from io import BytesIO -from msgpack import packs, unpacks, Packer, Unpacker +import pytest -from StringIO import StringIO +from msgpack import Packer, Unpacker, packb, unpackb + + +def check(data, use_list=False): + re = unpackb(packb(data), use_list=use_list, strict_map_key=False) + assert re == data -def check(data): - re = unpacks(packs(data)) - assert_equal(re, data) def testPack(): test_data = [ - 0, 1, 127, 128, 255, 256, 65535, 65536, - -1, -32, -33, -128, -129, -32768, -32769, - 1.0, - "", "a", "a"*31, "a"*32, - None, True, False, - (), ((),), ((), None,), + 0, + 1, + 127, + 128, + 255, + 256, + 65535, + 65536, + 4294967295, + 4294967296, + -1, + -32, + -33, + -128, + -129, + -32768, + -32769, + -4294967296, + -4294967297, + 1.0, + b"", + b"a", + b"a" * 31, + b"a" * 32, + None, + True, + False, + (), + ((),), + ((), None), {None: 0}, - (1<<23), - ] + (1 << 23), + ] for td in test_data: check(td) + def testPackUnicode(): - test_data = [ - u"", u"abcd", (u"defgh",), u"Русский текст", - ] + test_data = ["", "abcd", ["defgh"], "Русский текст"] for td in test_data: - re = unpacks(packs(td, encoding='utf-8'), encoding='utf-8') - assert_equal(re, td) - packer = Packer(encoding='utf-8') + re = unpackb(packb(td), use_list=1, raw=False) + assert re == td + packer = Packer() data = packer.pack(td) - re = Unpacker(StringIO(data), encoding='utf-8').unpack() - assert_equal(re, td) - -def testPackUTF32(): - try: - test_data = [ - u"", u"abcd", (u"defgh",), u"Русский текст", - ] - for td in test_data: - re = unpacks(packs(td, encoding='utf-32'), encoding='utf-32') - assert_equal(re, td) - except LookupError: - raise SkipTest + re = Unpacker(BytesIO(data), raw=False, use_list=1).unpack() + assert re == td + def testPackBytes(): - test_data = [ - "", "abcd", ("defgh",), - ] + test_data = [b"", b"abcd", (b"defgh",)] + for td in test_data: + check(td) + + +def testPackByteArrays(): + test_data = [bytearray(b""), bytearray(b"abcd"), (bytearray(b"defgh"),)] for td in test_data: check(td) + def testIgnoreUnicodeErrors(): - re = unpacks(packs('abc\xeddef'), - encoding='ascii', unicode_errors='ignore') - assert_equal(re, "abcdef") + re = unpackb(packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore") + assert re == "abcdef" + -@raises(UnicodeDecodeError) def testStrictUnicodeUnpack(): - unpacks(packs('abc\xeddef'), encoding='utf-8') + packed = packb(b"abc\xeddef", use_bin_type=False) + with pytest.raises(UnicodeDecodeError): + unpackb(packed, raw=False, use_list=1) -@raises(UnicodeEncodeError) -def testStrictUnicodePack(): - packs(u"abc\xeddef", encoding='ascii', unicode_errors='strict') def testIgnoreErrorsPack(): - re = unpacks( - packs(u"abcФФФdef", encoding='ascii', unicode_errors='ignore'), - encoding='utf-8') - assert_equal(re, u"abcdef") + re = unpackb( + packb("abc\udc80\udcffdef", use_bin_type=True, unicode_errors="ignore"), + raw=False, + use_list=1, + ) + assert re == "abcdef" -@raises(TypeError) -def testNoEncoding(): - packs(u"abc", encoding=None) def testDecodeBinary(): - re = unpacks(packs(u"abc"), encoding=None) - assert_equal(re, "abc") + re = unpackb(packb(b"abc"), use_list=1) + assert re == b"abc" + + +def testPackFloat(): + assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(">f", 1.0) + assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(">d", 1.0) + + +def testArraySize(sizes=[0, 5, 50, 1000]): + bio = BytesIO() + packer = Packer() + for size in sizes: + bio.write(packer.pack_array_header(size)) + for i in range(size): + bio.write(packer.pack(i)) + + bio.seek(0) + unpacker = Unpacker(bio, use_list=1) + for size in sizes: + assert unpacker.unpack() == list(range(size)) + + +def test_manualreset(sizes=[0, 5, 50, 1000]): + packer = Packer(autoreset=False) + for size in sizes: + packer.pack_array_header(size) + for i in range(size): + packer.pack(i) + + bio = BytesIO(packer.bytes()) + unpacker = Unpacker(bio, use_list=1) + for size in sizes: + assert unpacker.unpack() == list(range(size)) + + packer.reset() + assert packer.bytes() == b"" + + +def testMapSize(sizes=[0, 5, 50, 1000]): + bio = BytesIO() + packer = Packer() + for size in sizes: + bio.write(packer.pack_map_header(size)) + for i in range(size): + bio.write(packer.pack(i)) # key + bio.write(packer.pack(i * 2)) # value + + bio.seek(0) + unpacker = Unpacker(bio, strict_map_key=False) + for size in sizes: + assert unpacker.unpack() == {i: i * 2 for i in range(size)} + + +def test_odict(): + seq = [(b"one", 1), (b"two", 2), (b"three", 3), (b"four", 4)] + od = OrderedDict(seq) + assert unpackb(packb(od), use_list=1) == dict(seq) + + def pair_hook(seq): + return list(seq) + + assert unpackb(packb(od), object_pairs_hook=pair_hook, use_list=1) == seq + + +def test_pairlist(): + pairlist = [(b"a", 1), (2, b"b"), (b"foo", b"bar")] + packer = Packer() + packed = packer.pack_map_pairs(pairlist) + unpacked = unpackb(packed, object_pairs_hook=list, strict_map_key=False) + assert pairlist == unpacked + + +def test_get_buffer(): + packer = Packer(autoreset=0, use_bin_type=True) + packer.pack([1, 2]) + strm = BytesIO() + strm.write(packer.getbuffer()) + written = strm.getvalue() -if __name__ == '__main__': - main() + expected = packb([1, 2], use_bin_type=True) + assert written == expected diff --git a/test/test_read_size.py b/test/test_read_size.py new file mode 100644 index 00000000..0f6c1b50 --- /dev/null +++ b/test/test_read_size.py @@ -0,0 +1,72 @@ +"""Test Unpacker's read_array_header and read_map_header methods""" + +from msgpack import OutOfData, Unpacker, packb + +UnexpectedTypeException = ValueError + + +def test_read_array_header(): + unpacker = Unpacker() + unpacker.feed(packb(["a", "b", "c"])) + assert unpacker.read_array_header() == 3 + assert unpacker.unpack() == "a" + assert unpacker.unpack() == "b" + assert unpacker.unpack() == "c" + try: + unpacker.unpack() + assert 0, "should raise exception" + except OutOfData: + assert 1, "okay" + + +def test_read_map_header(): + unpacker = Unpacker() + unpacker.feed(packb({"a": "A"})) + assert unpacker.read_map_header() == 1 + assert unpacker.unpack() == "a" + assert unpacker.unpack() == "A" + try: + unpacker.unpack() + assert 0, "should raise exception" + except OutOfData: + assert 1, "okay" + + +def test_incorrect_type_array(): + unpacker = Unpacker() + unpacker.feed(packb(1)) + try: + unpacker.read_array_header() + assert 0, "should raise exception" + except UnexpectedTypeException: + assert 1, "okay" + + +def test_incorrect_type_map(): + unpacker = Unpacker() + unpacker.feed(packb(1)) + try: + unpacker.read_map_header() + assert 0, "should raise exception" + except UnexpectedTypeException: + assert 1, "okay" + + +def test_correct_type_nested_array(): + unpacker = Unpacker() + unpacker.feed(packb({"a": ["b", "c", "d"]})) + try: + unpacker.read_array_header() + assert 0, "should raise exception" + except UnexpectedTypeException: + assert 1, "okay" + + +def test_incorrect_type_nested_map(): + unpacker = Unpacker() + unpacker.feed(packb([{"a": "b"}])) + try: + unpacker.read_map_header() + assert 0, "should raise exception" + except UnexpectedTypeException: + assert 1, "okay" diff --git a/test/test_seq.py b/test/test_seq.py index 993a59ec..8dee4620 100644 --- a/test/test_seq.py +++ b/test/test_seq.py @@ -1,49 +1,41 @@ -#!/usr/bin/env python -# coding: utf-8 +# ruff: noqa: E501 +# ignore line length limit for long comments +import io -from nose import main -from nose.tools import * - -import StringIO import msgpack -binarydata = [chr(i) for i in xrange(256)] -binarydata = "".join(binarydata) +binarydata = bytes(bytearray(range(256))) + def gen_binary_data(idx): - data = binarydata[:idx % 300] - return data + return binarydata[: idx % 300] + def test_exceeding_unpacker_read_size(): - dumpf = StringIO.StringIO() + dumpf = io.BytesIO() packer = msgpack.Packer() NUMBER_OF_STRINGS = 6 read_size = 16 - # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop): - # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev) - # 40 ok for read_size=1024, while 50 introduces errors - # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** python: double free or corruption (!prev): + # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop): + # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev) + # 40 ok for read_size=1024, while 50 introduces errors + # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** python: double free or corruption (!prev): - for idx in xrange(NUMBER_OF_STRINGS): + for idx in range(NUMBER_OF_STRINGS): data = gen_binary_data(idx) dumpf.write(packer.pack(data)) - f = StringIO.StringIO(dumpf.getvalue()) + f = io.BytesIO(dumpf.getvalue()) dumpf.close() - unpacker = msgpack.Unpacker(f, read_size=read_size) + unpacker = msgpack.Unpacker(f, read_size=read_size, use_list=1) read_count = 0 for idx, o in enumerate(unpacker): - assert_equal(type(o), str) - assert_equal(o, gen_binary_data(idx)) + assert isinstance(o, bytes) + assert o == gen_binary_data(idx) read_count += 1 - assert_equal(read_count, NUMBER_OF_STRINGS) - - -if __name__ == '__main__': - # main() - test_exceeding_unpacker_read_size() + assert read_count == NUMBER_OF_STRINGS diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index d61be230..0f895d7d 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,34 +1,148 @@ #!/usr/bin/env python -# coding: utf-8 +import io + +from pytest import raises + +from msgpack import BufferFull, Unpacker, pack, packb +from msgpack.exceptions import OutOfData + + +def test_partialdata(): + unpacker = Unpacker() + unpacker.feed(b"\xa5") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"h") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"a") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"l") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"l") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"o") + assert next(iter(unpacker)) == "hallo" -from msgpack import Unpacker def test_foobar(): - unpacker = Unpacker(read_size=3) - unpacker.feed('foobar') - assert unpacker.unpack() == ord('f') - assert unpacker.unpack() == ord('o') - assert unpacker.unpack() == ord('o') - assert unpacker.unpack() == ord('b') - assert unpacker.unpack() == ord('a') - assert unpacker.unpack() == ord('r') - try: - o = unpacker.unpack() - print "Oops!", o - assert 0 - except StopIteration: - assert 1 - else: - assert 0 - unpacker.feed('foo') - unpacker.feed('bar') + unpacker = Unpacker(read_size=3, use_list=1) + unpacker.feed(b"foobar") + assert unpacker.unpack() == ord(b"f") + assert unpacker.unpack() == ord(b"o") + assert unpacker.unpack() == ord(b"o") + assert unpacker.unpack() == ord(b"b") + assert unpacker.unpack() == ord(b"a") + assert unpacker.unpack() == ord(b"r") + with raises(OutOfData): + unpacker.unpack() + + unpacker.feed(b"foo") + unpacker.feed(b"bar") k = 0 - for o, e in zip(unpacker, 'foobarbaz'): + for o, e in zip(unpacker, "foobarbaz"): assert o == ord(e) k += 1 - assert k == len('foobar') + assert k == len(b"foobar") + + +def test_foobar_skip(): + unpacker = Unpacker(read_size=3, use_list=1) + unpacker.feed(b"foobar") + assert unpacker.unpack() == ord(b"f") + unpacker.skip() + assert unpacker.unpack() == ord(b"o") + unpacker.skip() + assert unpacker.unpack() == ord(b"a") + unpacker.skip() + with raises(OutOfData): + unpacker.unpack() + + +def test_maxbuffersize(): + with raises(ValueError): + Unpacker(read_size=5, max_buffer_size=3) + unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1) + unpacker.feed(b"fo") + with raises(BufferFull): + unpacker.feed(b"ob") + unpacker.feed(b"o") + assert ord("f") == next(unpacker) + unpacker.feed(b"b") + assert ord("o") == next(unpacker) + assert ord("o") == next(unpacker) + assert ord("b") == next(unpacker) + + +def test_maxbuffersize_file(): + buff = io.BytesIO(packb(b"a" * 10) + packb([b"a" * 20] * 2)) + unpacker = Unpacker(buff, read_size=1, max_buffer_size=19, max_bin_len=20) + assert unpacker.unpack() == b"a" * 10 + # assert unpacker.unpack() == [b"a" * 20]*2 + with raises(BufferFull): + print(unpacker.unpack()) + + +def test_readbytes(): + unpacker = Unpacker(read_size=3) + unpacker.feed(b"foobar") + assert unpacker.unpack() == ord(b"f") + assert unpacker.read_bytes(3) == b"oob" + assert unpacker.unpack() == ord(b"a") + assert unpacker.unpack() == ord(b"r") + + # Test buffer refill + unpacker = Unpacker(io.BytesIO(b"foobar"), read_size=3) + assert unpacker.unpack() == ord(b"f") + assert unpacker.read_bytes(3) == b"oob" + assert unpacker.unpack() == ord(b"a") + assert unpacker.unpack() == ord(b"r") + + # Issue 352 + u = Unpacker() + u.feed(b"x") + assert bytes(u.read_bytes(1)) == b"x" + with raises(StopIteration): + next(u) + u.feed(b"\1") + assert next(u) == 1 + + +def test_issue124(): + unpacker = Unpacker() + unpacker.feed(b"\xa1?\xa1!") + assert tuple(unpacker) == ("?", "!") + assert tuple(unpacker) == () + unpacker.feed(b"\xa1?\xa1") + assert tuple(unpacker) == ("?",) + assert tuple(unpacker) == () + unpacker.feed(b"!") + assert tuple(unpacker) == ("!",) + assert tuple(unpacker) == () -if __name__ == '__main__': - test_foobar() +def test_unpack_tell(): + stream = io.BytesIO() + messages = [2**i - 1 for i in range(65)] + messages += [-(2**i) for i in range(1, 64)] + messages += [ + b"hello", + b"hello" * 1000, + list(range(20)), + {i: bytes(i) * i for i in range(10)}, + {i: bytes(i) * i for i in range(32)}, + ] + offsets = [] + for m in messages: + pack(m, stream) + offsets.append(stream.tell()) + stream.seek(0) + unpacker = Unpacker(stream, strict_map_key=False) + for m, o in zip(messages, offsets): + m2 = next(unpacker) + assert m == m2 + assert o == unpacker.tell() diff --git a/test/test_stricttype.py b/test/test_stricttype.py new file mode 100644 index 00000000..72776a2c --- /dev/null +++ b/test/test_stricttype.py @@ -0,0 +1,59 @@ +from collections import namedtuple + +from msgpack import ExtType, packb, unpackb + + +def test_namedtuple(): + T = namedtuple("T", "foo bar") + + def default(o): + if isinstance(o, T): + return dict(o._asdict()) + raise TypeError(f"Unsupported type {type(o)}") + + packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default) + unpacked = unpackb(packed, raw=False) + assert unpacked == {"foo": 1, "bar": 42} + + +def test_tuple(): + t = ("one", 2, b"three", (4,)) + + def default(o): + if isinstance(o, tuple): + return {"__type__": "tuple", "value": list(o)} + raise TypeError(f"Unsupported type {type(o)}") + + def convert(o): + if o.get("__type__") == "tuple": + return tuple(o["value"]) + return o + + data = packb(t, strict_types=True, use_bin_type=True, default=default) + expected = unpackb(data, raw=False, object_hook=convert) + + assert expected == t + + +def test_tuple_ext(): + t = ("one", 2, b"three", (4,)) + + MSGPACK_EXT_TYPE_TUPLE = 0 + + def default(o): + if isinstance(o, tuple): + # Convert to list and pack + payload = packb(list(o), strict_types=True, use_bin_type=True, default=default) + return ExtType(MSGPACK_EXT_TYPE_TUPLE, payload) + raise TypeError(repr(o)) + + def convert(code, payload): + if code == MSGPACK_EXT_TYPE_TUPLE: + # Unpack and convert to tuple + return tuple(unpackb(payload, raw=False, ext_hook=convert)) + raise ValueError(f"Unknown Ext code {code}") + + data = packb(t, strict_types=True, use_bin_type=True, default=default) + expected = unpackb(data, raw=False, ext_hook=convert) + + assert expected == t diff --git a/test/test_subtype.py b/test/test_subtype.py new file mode 100644 index 00000000..a911578c --- /dev/null +++ b/test/test_subtype.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +from collections import namedtuple + +from msgpack import packb + + +class MyList(list): + pass + + +class MyDict(dict): + pass + + +class MyTuple(tuple): + pass + + +MyNamedTuple = namedtuple("MyNamedTuple", "x y") + + +def test_types(): + assert packb(MyDict()) == packb(dict()) + assert packb(MyList()) == packb(list()) + assert packb(MyNamedTuple(1, 2)) == packb((1, 2)) diff --git a/test/test_timestamp.py b/test/test_timestamp.py new file mode 100644 index 00000000..831141a1 --- /dev/null +++ b/test/test_timestamp.py @@ -0,0 +1,171 @@ +import datetime + +import pytest + +import msgpack +from msgpack.ext import Timestamp + + +def test_timestamp(): + # timestamp32 + ts = Timestamp(2**32 - 1) + assert ts.to_bytes() == b"\xff\xff\xff\xff" + packed = msgpack.packb(ts) + assert packed == b"\xd6\xff" + ts.to_bytes() + unpacked = msgpack.unpackb(packed) + assert ts == unpacked + assert ts.seconds == 2**32 - 1 and ts.nanoseconds == 0 + + # timestamp64 + ts = Timestamp(2**34 - 1, 999999999) + assert ts.to_bytes() == b"\xee\x6b\x27\xff\xff\xff\xff\xff" + packed = msgpack.packb(ts) + assert packed == b"\xd7\xff" + ts.to_bytes() + unpacked = msgpack.unpackb(packed) + assert ts == unpacked + assert ts.seconds == 2**34 - 1 and ts.nanoseconds == 999999999 + + # timestamp96 + ts = Timestamp(2**63 - 1, 999999999) + assert ts.to_bytes() == b"\x3b\x9a\xc9\xff\x7f\xff\xff\xff\xff\xff\xff\xff" + packed = msgpack.packb(ts) + assert packed == b"\xc7\x0c\xff" + ts.to_bytes() + unpacked = msgpack.unpackb(packed) + assert ts == unpacked + assert ts.seconds == 2**63 - 1 and ts.nanoseconds == 999999999 + + # negative fractional + ts = Timestamp.from_unix(-2.3) # s: -3, ns: 700000000 + assert ts.seconds == -3 and ts.nanoseconds == 700000000 + assert ts.to_bytes() == b"\x29\xb9\x27\x00\xff\xff\xff\xff\xff\xff\xff\xfd" + packed = msgpack.packb(ts) + assert packed == b"\xc7\x0c\xff" + ts.to_bytes() + unpacked = msgpack.unpackb(packed) + assert ts == unpacked + + +def test_unpack_timestamp(): + # timestamp 32 + assert msgpack.unpackb(b"\xd6\xff\x00\x00\x00\x00") == Timestamp(0) + + # timestamp 64 + assert msgpack.unpackb(b"\xd7\xff" + b"\x00" * 8) == Timestamp(0) + with pytest.raises(ValueError): + msgpack.unpackb(b"\xd7\xff" + b"\xff" * 8) + + # timestamp 96 + assert msgpack.unpackb(b"\xc7\x0c\xff" + b"\x00" * 12) == Timestamp(0) + with pytest.raises(ValueError): + msgpack.unpackb(b"\xc7\x0c\xff" + b"\xff" * 12) == Timestamp(0) + + # Undefined + with pytest.raises(ValueError): + msgpack.unpackb(b"\xd4\xff\x00") # fixext 1 + with pytest.raises(ValueError): + msgpack.unpackb(b"\xd5\xff\x00\x00") # fixext 2 + with pytest.raises(ValueError): + msgpack.unpackb(b"\xc7\x00\xff") # ext8 (len=0) + with pytest.raises(ValueError): + msgpack.unpackb(b"\xc7\x03\xff\0\0\0") # ext8 (len=3) + with pytest.raises(ValueError): + msgpack.unpackb(b"\xc7\x05\xff\0\0\0\0\0") # ext8 (len=5) + + +def test_timestamp_from(): + t = Timestamp(42, 14000) + assert Timestamp.from_unix(42.000014) == t + assert Timestamp.from_unix_nano(42000014000) == t + + +def test_timestamp_to(): + t = Timestamp(42, 14000) + assert t.to_unix() == 42.000014 + assert t.to_unix_nano() == 42000014000 + + +def test_timestamp_datetime(): + t = Timestamp(42, 14) + utc = datetime.timezone.utc + assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) + + ts = datetime.datetime(2024, 4, 16, 8, 43, 9, 420317, tzinfo=utc) + ts2 = datetime.datetime(2024, 4, 16, 8, 43, 9, 420318, tzinfo=utc) + + assert ( + Timestamp.from_datetime(ts2).nanoseconds - Timestamp.from_datetime(ts).nanoseconds == 1000 + ) + + ts3 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4256) + ts4 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4257) + assert ( + Timestamp.from_datetime(ts4).nanoseconds - Timestamp.from_datetime(ts3).nanoseconds == 1000 + ) + + assert Timestamp.from_datetime(ts).to_datetime() == ts + + +def test_unpack_datetime(): + t = Timestamp(42, 14) + utc = datetime.timezone.utc + packed = msgpack.packb(t) + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) + + +def test_pack_unpack_before_epoch(): + utc = datetime.timezone.utc + t_in = datetime.datetime(1960, 1, 1, tzinfo=utc) + packed = msgpack.packb(t_in, datetime=True) + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == t_in + + +def test_pack_datetime(): + t = Timestamp(42, 14000) + dt = t.to_datetime() + utc = datetime.timezone.utc + assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) + + packed = msgpack.packb(dt, datetime=True) + packed2 = msgpack.packb(t) + assert packed == packed2 + + unpacked = msgpack.unpackb(packed) + print(packed, unpacked) + assert unpacked == t + + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == dt + + x = [] + packed = msgpack.packb(dt, datetime=False, default=x.append) + assert x + assert x[0] == dt + assert msgpack.unpackb(packed) is None + + +def test_issue451(): + # https://github.com/msgpack/msgpack-python/issues/451 + utc = datetime.timezone.utc + dt = datetime.datetime(2100, 1, 1, 1, 1, tzinfo=utc) + packed = msgpack.packb(dt, datetime=True) + assert packed == b"\xd6\xff\xf4\x86eL" + + unpacked = msgpack.unpackb(packed, timestamp=3) + assert dt == unpacked + + +def test_pack_datetime_without_tzinfo(): + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) + with pytest.raises(ValueError, match="where tzinfo=None"): + packed = msgpack.packb(dt, datetime=True) + + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) + packed = msgpack.packb(dt, datetime=True, default=lambda x: None) + assert packed == msgpack.packb(None) + + utc = datetime.timezone.utc + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) + packed = msgpack.packb(dt, datetime=True) + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == dt diff --git a/test/test_unpack.py b/test/test_unpack.py new file mode 100644 index 00000000..b17c3c53 --- /dev/null +++ b/test/test_unpack.py @@ -0,0 +1,89 @@ +import sys +from io import BytesIO + +from pytest import mark, raises + +from msgpack import ExtType, OutOfData, Unpacker, packb + + +def test_unpack_array_header_from_file(): + f = BytesIO(packb([1, 2, 3, 4])) + unpacker = Unpacker(f) + assert unpacker.read_array_header() == 4 + assert unpacker.unpack() == 1 + assert unpacker.unpack() == 2 + assert unpacker.unpack() == 3 + assert unpacker.unpack() == 4 + with raises(OutOfData): + unpacker.unpack() + + +@mark.skipif( + "not hasattr(sys, 'getrefcount') == True", + reason="sys.getrefcount() is needed to pass this test", +) +def test_unpacker_hook_refcnt(): + result = [] + + def hook(x): + result.append(x) + return x + + basecnt = sys.getrefcount(hook) + + up = Unpacker(object_hook=hook, list_hook=hook) + + assert sys.getrefcount(hook) >= basecnt + 2 + + up.feed(packb([{}])) + up.feed(packb([{}])) + assert up.unpack() == [{}] + assert up.unpack() == [{}] + assert result == [{}, [{}], {}, [{}]] + + del up + + assert sys.getrefcount(hook) == basecnt + + +def test_unpacker_ext_hook(): + class MyUnpacker(Unpacker): + def __init__(self): + super().__init__(ext_hook=self._hook, raw=False) + + def _hook(self, code, data): + if code == 1: + return int(data) + else: + return ExtType(code, data) + + unpacker = MyUnpacker() + unpacker.feed(packb({"a": 1})) + assert unpacker.unpack() == {"a": 1} + unpacker.feed(packb({"a": ExtType(1, b"123")})) + assert unpacker.unpack() == {"a": 123} + unpacker.feed(packb({"a": ExtType(2, b"321")})) + assert unpacker.unpack() == {"a": ExtType(2, b"321")} + + +def test_unpacker_tell(): + objects = 1, 2, "abc", "def", "ghi" + packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" + positions = 1, 2, 6, 10, 14 + unpacker = Unpacker(BytesIO(packed)) + for obj, unp, pos in zip(objects, unpacker, positions): + assert obj == unp + assert pos == unpacker.tell() + + +def test_unpacker_tell_read_bytes(): + objects = 1, "abc", "ghi" + packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" + raw_data = b"\x02", b"\xa3def", b"" + lenghts = 1, 4, 999 + positions = 1, 6, 14 + unpacker = Unpacker(BytesIO(packed)) + for obj, unp, pos, n, raw in zip(objects, unpacker, positions, lenghts, raw_data): + assert obj == unp + assert pos == unpacker.tell() + assert unpacker.read_bytes(n) == raw diff --git a/test3/test_buffer.py b/test3/test_buffer.py deleted file mode 100644 index 01310a07..00000000 --- a/test3/test_buffer.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from nose import main -from nose.tools import * -from msgpack import packb, unpackb - -def test_unpack_buffer(): - from array import array - buf = array('b') - buf.fromstring(packb(('foo', 'bar'))) - obj = unpackb(buf) - assert_equal((b'foo', b'bar'), obj) - -if __name__ == '__main__': - main() diff --git a/test3/test_case.py b/test3/test_case.py deleted file mode 100644 index 2f423161..00000000 --- a/test3/test_case.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from nose import main -from nose.tools import * -from msgpack import packs, unpacks - - -def check(length, obj): - v = packs(obj) - assert_equal(len(v), length, "%r length should be %r but get %r" % (obj, length, len(v))) - assert_equal(unpacks(v), obj) - -def test_1(): - for o in [None, True, False, 0, 1, (1 << 6), (1 << 7) - 1, -1, - -((1<<5)-1), -(1<<5)]: - check(1, o) - -def test_2(): - for o in [1 << 7, (1 << 8) - 1, - -((1<<5)+1), -(1<<7) - ]: - check(2, o) - -def test_3(): - for o in [1 << 8, (1 << 16) - 1, - -((1<<7)+1), -(1<<15)]: - check(3, o) - -def test_5(): - for o in [1 << 16, (1 << 32) - 1, - -((1<<15)+1), -(1<<31)]: - check(5, o) - -def test_9(): - for o in [1 << 32, (1 << 64) - 1, - -((1<<31)+1), -(1<<63), - 1.0, 0.1, -0.1, -1.0]: - check(9, o) - - -def check_raw(overhead, num): - check(num + overhead, b" " * num) - -def test_fixraw(): - check_raw(1, 0) - check_raw(1, (1<<5) - 1) - -def test_raw16(): - check_raw(3, 1<<5) - check_raw(3, (1<<16) - 1) - -def test_raw32(): - check_raw(5, 1<<16) - - -def check_array(overhead, num): - check(num + overhead, (None,) * num) - -def test_fixarray(): - check_array(1, 0) - check_array(1, (1 << 4) - 1) - -def test_array16(): - check_array(3, 1 << 4) - check_array(3, (1<<16)-1) - -def test_array32(): - check_array(5, (1<<16)) - - -def match(obj, buf): - assert_equal(packs(obj), buf) - assert_equal(unpacks(buf), obj) - -def test_match(): - cases = [ - (None, b'\xc0'), - (False, b'\xc2'), - (True, b'\xc3'), - (0, b'\x00'), - (127, b'\x7f'), - (128, b'\xcc\x80'), - (256, b'\xcd\x01\x00'), - (-1, b'\xff'), - (-33, b'\xd0\xdf'), - (-129, b'\xd1\xff\x7f'), - ({1:1}, b'\x81\x01\x01'), - (1.0, b"\xcb\x3f\xf0\x00\x00\x00\x00\x00\x00"), - ((), b'\x90'), - (tuple(range(15)),b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e"), - (tuple(range(16)),b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), - ({}, b'\x80'), - (dict([(x,x) for x in range(15)]), b'\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e'), - (dict([(x,x) for x in range(16)]), b'\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f'), - ] - - for v, p in cases: - match(v, p) - -def test_unicode(): - assert_equal(b'foobar', unpacks(packs('foobar'))) - -if __name__ == '__main__': - main() diff --git a/test3/test_except.py b/test3/test_except.py deleted file mode 100644 index 574728fb..00000000 --- a/test3/test_except.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from nose.tools import * -from msgpack import packs, unpacks - -import datetime - -def test_raise_on_find_unsupported_value(): - assert_raises(TypeError, packs, datetime.datetime.now()) - -if __name__ == '__main__': - from nose import main - main() diff --git a/test3/test_format.py b/test3/test_format.py deleted file mode 100644 index 022e6801..00000000 --- a/test3/test_format.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from nose import main -from nose.tools import * -from msgpack import unpacks - -def check(src, should): - assert_equal(unpacks(src), should) - -def testSimpleValue(): - check(b"\x93\xc0\xc2\xc3", - (None, False, True,)) - -def testFixnum(): - check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", - ((0,64,127,), (-32,-16,-1,),) - ) - -def testFixArray(): - check(b"\x92\x90\x91\x91\xc0", - ((),((None,),),), - ) - -def testFixRaw(): - check(b"\x94\xa0\xa1a\xa2bc\xa3def", - (b"", b"a", b"bc", b"def",), - ) - -def testFixMap(): - check( - b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", - {False: {None: None}, True:{None:{}}}, - ) - -def testUnsignedInt(): - check( - b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" - b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" - b"\xce\xff\xff\xff\xff", - (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295,), - ) - -def testSignedInt(): - check(b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" - b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" - b"\xd2\xff\xff\xff\xff", - (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1,)) - -def testRaw(): - check(b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" - b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", - (b"", b"a", b"ab", b"", b"a", b"ab")) - -def testArray(): - check(b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00" - b"\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02" - b"\xc2\xc3", - ((), (None,), (False,True), (), (None,), (False,True)) - ) - -def testMap(): - check( - b"\x96" - b"\xde\x00\x00" - b"\xde\x00\x01\xc0\xc2" - b"\xde\x00\x02\xc0\xc2\xc3\xc2" - b"\xdf\x00\x00\x00\x00" - b"\xdf\x00\x00\x00\x01\xc0\xc2" - b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", - ({}, {None: False}, {True: False, None: False}, {}, - {None: False}, {True: False, None: False})) - -if __name__ == '__main__': - main() diff --git a/test3/test_obj.py b/test3/test_obj.py deleted file mode 100644 index b54021f2..00000000 --- a/test3/test_obj.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from nose import main -from nose.tools import * - -from msgpack import packs, unpacks - -def _decode_complex(obj): - if b'__complex__' in obj: - return complex(obj[b'real'], obj[b'imag']) - return obj - -def _encode_complex(obj): - if isinstance(obj, complex): - return {b'__complex__': True, b'real': 1, b'imag': 2} - return obj - -def test_encode_hook(): - packed = packs([3, 1+2j], default=_encode_complex) - unpacked = unpacks(packed) - eq_(unpacked[1], {b'__complex__': True, b'real': 1, b'imag': 2}) - -def test_decode_hook(): - packed = packs([3, {b'__complex__': True, b'real': 1, b'imag': 2}]) - unpacked = unpacks(packed, object_hook=_decode_complex) - eq_(unpacked[1], 1+2j) - -@raises(ValueError) -def test_bad_hook(): - packed = packs([3, 1+2j], default=lambda o: o) - unpacked = unpacks(packed) - -def _arr_to_str(arr): - return ''.join(str(c) for c in arr) - -def test_array_hook(): - packed = packs([1,2,3]) - unpacked = unpacks(packed, list_hook=_arr_to_str) - eq_(unpacked, '123') - -if __name__ == '__main__': - #main() - test_decode_hook() diff --git a/test3/test_pack.py b/test3/test_pack.py deleted file mode 100644 index 5ff04e79..00000000 --- a/test3/test_pack.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from nose import main -from nose.tools import * - -from msgpack import packs, unpacks, Unpacker, Packer - -from io import BytesIO - -def check(data): - re = unpacks(packs(data)) - assert_equal(re, data) - -def testPack(): - test_data = [ - 0, 1, 127, 128, 255, 256, 65535, 65536, - -1, -32, -33, -128, -129, -32768, -32769, - 1.0, - b"", b"a", b"a"*31, b"a"*32, - None, True, False, - (), ((),), ((), None,), - {None: 0}, - (1<<23), - ] - for td in test_data: - check(td) - -def testPackUnicode(): - test_data = [ - "", "abcd", ("defgh",), "Русский текст", - ] - for td in test_data: - re = unpacks(packs(td, encoding='utf-8'), encoding='utf-8') - assert_equal(re, td) - packer = Packer(encoding='utf-8') - data = packer.pack(td) - re = Unpacker(BytesIO(data), encoding='utf-8').unpack() - assert_equal(re, td) - -def testPackUTF32(): - test_data = [ - "", "abcd", ("defgh",), "Русский текст", - ] - for td in test_data: - re = unpacks(packs(td, encoding='utf-32'), encoding='utf-32') - assert_equal(re, td) - -def testPackBytes(): - test_data = [ - b"", b"abcd", (b"defgh",), - ] - for td in test_data: - check(td) - -def testIgnoreUnicodeErrors(): - re = unpacks(packs(b'abc\xeddef'), - encoding='utf-8', unicode_errors='ignore') - assert_equal(re, "abcdef") - -@raises(UnicodeDecodeError) -def testStrictUnicodeUnpack(): - unpacks(packs(b'abc\xeddef'), encoding='utf-8') - -@raises(UnicodeEncodeError) -def testStrictUnicodePack(): - packs("abc\xeddef", encoding='ascii', unicode_errors='strict') - -def testIgnoreErrorsPack(): - re = unpacks(packs("abcФФФdef", encoding='ascii', unicode_errors='ignore'), encoding='utf-8') - assert_equal(re, "abcdef") - -@raises(TypeError) -def testNoEncoding(): - packs("abc", encoding=None) - -def testDecodeBinary(): - re = unpacks(packs("abc"), encoding=None) - assert_equal(re, b"abc") - -if __name__ == '__main__': - main() diff --git a/test3/test_sequnpack.py b/test3/test_sequnpack.py deleted file mode 100644 index 5fd377c4..00000000 --- a/test3/test_sequnpack.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - - - -from msgpack import Unpacker - -def test_foobar(): - unpacker = Unpacker(read_size=3) - unpacker.feed(b'foobar') - assert unpacker.unpack() == ord(b'f') - assert unpacker.unpack() == ord(b'o') - assert unpacker.unpack() == ord(b'o') - assert unpacker.unpack() == ord(b'b') - assert unpacker.unpack() == ord(b'a') - assert unpacker.unpack() == ord(b'r') - try: - o = unpacker.unpack() - print(("Oops!", o)) - assert 0 - except StopIteration: - assert 1 - else: - assert 0 - unpacker.feed(b'foo') - unpacker.feed(b'bar') - - k = 0 - for o, e in zip(unpacker, b'foobarbaz'): - assert o == e - k += 1 - assert k == len(b'foobar') - -if __name__ == '__main__': - test_foobar() -