From 1d53d63b97d6c598f1c817e2e741c77c1bfd3020 Mon Sep 17 00:00:00 2001 From: Phil Krylov Date: Tue, 25 Sep 2018 00:34:06 +0300 Subject: [PATCH 1/3] sort_keys support (#1) --- msgpack/_packer.pyx | 60 +++++++++++++++++++++++++++++++++------------ msgpack/fallback.py | 10 ++++++-- 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 225f24ae..bd05a1ac 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -1,6 +1,8 @@ # coding: utf-8 #cython: embedsignature=True, c_string_encoding=ascii +from operator import itemgetter + from cpython cimport * from cpython.version cimport PY_MAJOR_VERSION from cpython.exc cimport PyErr_WarnEx @@ -95,6 +97,9 @@ cdef class Packer(object): :param str encoding: (deprecated) Convert unicode to bytes with this encoding. (default: 'utf-8') + + :param bool sort_keys: + Iterate dictionaries in lexicographical order. (default: False) """ cdef msgpack_packer pk cdef object _default @@ -105,6 +110,7 @@ cdef class Packer(object): cdef bint strict_types cdef bool use_float cdef bint autoreset + cdef bool sort_keys def __cinit__(self): cdef int buf_size = 1024*1024 @@ -116,13 +122,14 @@ cdef class Packer(object): def __init__(self, default=None, encoding=None, unicode_errors=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=False, - bint strict_types=False): + bint strict_types=False, bint sort_keys=False): if encoding is not None: PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated.", 1) self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset self.pk.use_bin_type = use_bin_type + self.sort_keys = sort_keys if default is not None: if not PyCallable_Check(default): raise TypeError("default must be a callable.") @@ -228,22 +235,36 @@ cdef class Packer(object): raise PackValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: - for k, v in d.iteritems(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break + if self.sort_keys: + for k, v in sorted(d.iteritems(), key=itemgetter(1)): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + else: + for k, v in d.iteritems(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break elif not strict_types and PyDict_Check(o): L = len(o) if L > ITEM_LIMIT: raise PackValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: - for k, v in o.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break + if self.sort_keys: + for k, v in sorted(o.items()): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + else: + for k, v in o.items(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break elif type(o) is ExtType if strict_types else isinstance(o, ExtType): # This should be before Tuple because ExtType is namedtuple. longval = o.code @@ -334,11 +355,18 @@ cdef class Packer(object): """ cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) if ret == 0: - for k, v in pairs: - ret = self._pack(k) - if ret != 0: break - ret = self._pack(v) - if ret != 0: break + if self.sort_keys: + for k, v in sorted(pairs, key=itemgetter(0)): + ret = self._pack(k) + if ret != 0: break + ret = self._pack(v) + if ret != 0: break + else: + for k, v in pairs: + ret = self._pack(k) + if ret != 0: break + ret = self._pack(v) + if ret != 0: break if ret == -1: raise MemoryError elif ret: # should not happen diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 20ad4c90..8f031c18 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,5 +1,6 @@ """Fallback pure Python implementation of msgpack""" +from operator import itemgetter import sys import struct import warnings @@ -720,10 +721,13 @@ class Packer(object): :param str unicode_errors: Error handler for encoding unicode. (default: 'strict') + + :param bool sort_keys: + Iterate dictionaries in lexicographical order. (default: False) """ def __init__(self, default=None, encoding=None, unicode_errors=None, use_single_float=False, autoreset=True, use_bin_type=False, - strict_types=False): + strict_types=False, sort_keys=False): if encoding is None: encoding = 'utf_8' else: @@ -740,6 +744,7 @@ def __init__(self, default=None, encoding=None, unicode_errors=None, self._use_bin_type = use_bin_type self._encoding = encoding self._unicode_errors = unicode_errors + self._sort_keys = sort_keys self._buffer = StringIO() if default is not None: if not callable(default): @@ -949,7 +954,8 @@ def _pack_map_header(self, n): def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): self._pack_map_header(n) - for (k, v) in pairs: + for (k, v) in (sorted(pairs, key=itemgetter(0)) if self._sort_keys + else pairs): self._pack(k, nest_limit - 1) self._pack(v, nest_limit - 1) From f49840e79f6aaaa51468ecfad1728dc74bafe86e Mon Sep 17 00:00:00 2001 From: Phil Krylov Date: Tue, 25 Sep 2018 00:34:46 +0300 Subject: [PATCH 2/3] Updated README --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index 8925a65c..7098562e 100644 --- a/README.rst +++ b/README.rst @@ -1,3 +1,6 @@ +This is a fork of the original msgpack-python project, adding support for +`sort_keys` serialization option. + ====================== MessagePack for Python ====================== From d792681e135e4f5d1ef57b4183c59e2601502a4c Mon Sep 17 00:00:00 2001 From: Phil Krylov Date: Tue, 25 Sep 2018 01:02:29 +0300 Subject: [PATCH 3/3] Sort keys (#2) * sort_keys support --- msgpack/_packer.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index bd05a1ac..c0b793fc 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -236,7 +236,7 @@ cdef class Packer(object): ret = msgpack_pack_map(&self.pk, L) if ret == 0: if self.sort_keys: - for k, v in sorted(d.iteritems(), key=itemgetter(1)): + for k, v in sorted(d.iteritems(), key=itemgetter(0)): ret = self._pack(k, nest_limit-1) if ret != 0: break ret = self._pack(v, nest_limit-1)