diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 000000000..8857958fd --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,33 @@ +name: CI + +on: [push, pull_request] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest] + python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12', 'pypy3.9' ] + exclude: + - os: windows-latest + python-version: pypy3.9 + name: Python ${{ matrix.python-version }} (${{ matrix.os }}) + steps: + - uses: actions/checkout@v3 + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install "pytest<8.0.0" pytest-benchmark numpy arrow ruamel.yaml cloudpickle lz4 + - name: Install cryptography (but not for pypy on windows) + if: ${{ !((matrix.os == 'windows-latest') && (matrix.python-version == 'pypy3.9')) }} + run: | + pip install cryptography + - name: Run tests + run: | + py.test tests/ --benchmark-disable --showlocals --verbose diff --git a/.gitignore b/.gitignore index 0d20b6487..d9ff6080a 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,14 @@ +.project +.pydevproject *.pyc +__pycache__ +.settings +*.egg-info +dist/ +build/ +.pypirc +.pypi* +.cache +.tox +.pytest_cache +example_* diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 000000000..77e91eab1 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,20 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +build: + os: "ubuntu-22.04" + tools: + python: "3.8" + +python: + install: + - method: pip + path: . diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 82388580e..000000000 --- a/.travis.yml +++ /dev/null @@ -1,8 +0,0 @@ -language: python -python: - - 2.5 - - 2.6 - - 2.7 - -install: pip install Twisted -script: trial construct diff --git a/CHANGELOG.rst b/CHANGELOG.rst deleted file mode 100644 index 650af9764..000000000 --- a/CHANGELOG.rst +++ /dev/null @@ -1,47 +0,0 @@ -========= -Changelog -========= - -2.06 -==== - -Bugfixes --------- - - * Fix regression with Containers not being printable (#10) - -2.05 -==== - -Bugfixes --------- - - * Add a license (#1) - * Fix text parsing of hex and binary (#2) - * Container fixups - * Proper dictionary behavior in corner cases - * Correct bool(), len(), and "in" operator - -Enhancements ------------- - - * Introduce strong unit tests - * Container improvements - * Fully implement dict interface - * Speedups - * Container creation is around 3.8x faster - * Container attribute setting is around 4.1x faster - * Container iteration is around 1.6x faster - -Removals --------- - - * Completely replace AttrDict with Container - -Other ------ - - * Too many whitespace cleanups to count - * Lots of docstring cleanups - * Lots of documentation - * Documentation cleanups (#3, #8) diff --git a/LICENSE b/LICENSE index 6529f04a8..f05364146 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,7 @@ -Copyright (C) 2009 Tomer Filiba, 2010-2011 Corbin Simpson +Copyright (C) 2006-2020 + Arkadiusz Bulski (arek.bulski@gmail.com) + Tomer Filiba (tomerfiliba@gmail.com) + Corbin Simpson (MostAwesomeDude@gmail.com) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/MANIFEST.in b/MANIFEST.in index 2faaa96b3..a5021c60e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,2 @@ include README.rst -include todo.txt -include docs/*.rst -include docs/conf.py -include docs/Makefile +include LICENSE diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..798f75c6d --- /dev/null +++ b/Makefile @@ -0,0 +1,37 @@ +help: + cat Makefile + +test: + python3 -m pytest --benchmark-disable --showlocals + +verbose: + python3 -m pytest --benchmark-disable --showlocals --verbose + +xfails: + python3 -m pytest --benchmark-disable --verbose | egrep --color=always "xfail|XFAIL|xpass|XPASS" + +cover: + python3 -m pytest --benchmark-disable --cov construct --cov-report html --cov-report term --verbose + +bench: + python3 -m pytest --benchmark-enable --benchmark-columns=min,stddev --benchmark-sort=name --benchmark-compare + +benchsave: + python3 -m pytest --benchmark-enable --benchmark-columns=min,stddev --benchmark-sort=name --benchmark-compare --benchmark-autosave + +html: + cd docs; make html + +installdeps: + apt-get install python3 python3-pip python3-sphinx --upgrade + python3 -m pip install pytest pytest-benchmark pytest-cov twine wheel --upgrade + python3 -m pip install numpy arrow ruamel.yaml cloudpickle lz4 cryptography --upgrade + +version: + ./version-increment + +upload: + python3 ./setup.py sdist bdist_wheel + python3 -m twine check dist/* + python3 -m twine upload dist/* + diff --git a/README.rst b/README.rst index f32b8e40f..052bdab69 100644 --- a/README.rst +++ b/README.rst @@ -1,31 +1,44 @@ -========= -Construct -========= - -Construct is a powerful declarative parser for binary data. - -It is based on the concept of defining data structures in a declarative -manner, rather than procedural code: Simple constructs can be combined -hierarchically to form increasingly complex data structures. It's the first -library that makes parsing fun, instead of the usual headache it is today. - -Construct features bit and byte granularity, symmetrical operation (parsing -and building), component-oriented declarative design, easy debugging and -testing, an easy-to-extend subclass system, and lots of primitive -constructs to make your work easier: - - * Fields - * Structs - * Unions - * Repeaters - * Meta constructs - * Switches - * On-demand parsing - * Pointers - * And more! - -Requirements -============ - -Construct should run on any Python 2.5+ implementation. It has no external -dependencies. +Construct 2.10 +=================== + +Construct is a powerful **declarative** and **symmetrical** parser and builder for binary data. + +Instead of writing *imperative code* to parse a piece of data, you declaratively define a *data structure* that describes your data. As this data structure is not code, you can use it in one direction to *parse* data into Pythonic objects, and in the other direction, to *build* objects into binary data. + +The library provides both simple, atomic constructs (such as integers of various sizes), as well as composite ones which allow you form hierarchical and sequential structures of increasing complexity. Construct features **bit and byte granularity**, easy debugging and testing, an **easy-to-extend subclass system**, and lots of primitive constructs to make your work easier: + +* Fields: raw bytes or numerical types +* Structs and Sequences: combine simpler constructs into more complex ones +* Bitwise: splitting bytes into bit-grained fields +* Adapters: change how data is represented +* Arrays/Ranges: duplicate constructs +* Meta-constructs: use the context (history) to compute the size of data +* If/Switch: branch the computational path based on the context +* On-demand (lazy) parsing: read and parse only what you require +* Pointers: jump from here to there in the data stream +* Tunneling: prefix data with a byte count or compress it + + +Example +--------- + +A ``Struct`` is a collection of ordered, named fields:: + + >>> format = Struct( + ... "signature" / Const(b"BMP"), + ... "width" / Int8ub, + ... "height" / Int8ub, + ... "pixels" / Array(this.width * this.height, Byte), + ... ) + >>> format.build(dict(width=3,height=2,pixels=[7,8,9,11,12,13])) + b'BMP\x03\x02\x07\x08\t\x0b\x0c\r' + >>> format.parse(b'BMP\x03\x02\x07\x08\t\x0b\x0c\r') + Container(signature=b'BMP')(width=3)(height=2)(pixels=[7, 8, 9, 11, 12, 13]) + +A ``Sequence`` is a collection of ordered fields, and differs from ``Array`` and ``GreedyRange`` in that those two are homogenous:: + + >>> format = Sequence(PascalString(Byte, "utf8"), GreedyRange(Byte)) + >>> format.build([u"lalaland", [255,1,2]]) + b'\nlalaland\xff\x01\x02' + >>> format.parse(b"\x004361789432197") + ['', [52, 51, 54, 49, 55, 56, 57, 52, 51, 50, 49, 57, 55]] diff --git a/construct/__init__.py b/construct/__init__.py index feac2960b..f5b5a8cab 100644 --- a/construct/__init__.py +++ b/construct/__init__.py @@ -1,110 +1,215 @@ -""" - #### #### - ## #### ## ## #### ###### ##### ## ## #### ###### ## ## - ## ## ## ### ## ## ## ## ## ## ## ## ## #### ## - ## ## ## ###### ### ## ##### ## ## ## ## ## - ## ## ## ## ### ## ## ## ## ## ## ## ## ## - #### #### ## ## #### ## ## ## ##### #### ## ###### - - Parsing made even more fun (and faster too) +r""" +Construct 2 -- Parsing Made Fun Homepage: - http://construct.wikispaces.com (including online tutorial) - -Typical usage: - >>> from construct import * + https://github.com/construct/construct + http://construct.readthedocs.org Hands-on example: >>> from construct import * - >>> s = Struct("foo", - ... UBInt8("a"), - ... UBInt16("b"), + >>> s = Struct( + ... "a" / Byte, + ... "b" / Short, ... ) - >>> s.parse("\\x01\\x02\\x03") - Container(a = 1, b = 515) - >>> print s.parse("\\x01\\x02\\x03") + >>> print s.parse(b"\x01\x02\x03") Container: a = 1 b = 515 - >>> s.build(Container(a = 1, b = 0x0203)) - "\\x01\\x02\\x03" + >>> s.build(Container(a=1, b=0x0203)) + b"\x01\x02\x03" """ from construct.core import * -from construct.adapters import * -from construct.macros import * -from debug import Probe, Debugger - - -#=============================================================================== -# Metadata -#=============================================================================== -__author__ = "tomer filiba (tomerfiliba [at] gmail.com)" -__maintainer__ = "Corbin Simpson " -__version__ = "2.06" +from construct.expr import * +from construct.debug import * +from construct.version import * +from construct import lib -#=============================================================================== -# Shorthand expressions -#=============================================================================== -Bits = BitField -Byte = UBInt8 -Bytes = Field -Const = ConstAdapter -Tunnel = TunnelAdapter -Embed = Embedded #=============================================================================== -# Deprecated names -# Next scheduled name cleanout: 2.1 +# metadata #=============================================================================== -import functools, warnings - -def deprecated(f): - @functools.wraps(f) - def wrapper(*args, **kwargs): - warnings.warn( - "This name is deprecated, use %s instead" % f.__name__, - DeprecationWarning, stacklevel=2) - return f(*args, **kwargs) - return wrapper - -MetaBytes = deprecated(MetaField) -GreedyRepeater = deprecated(GreedyRange) -OptionalGreedyRepeater = deprecated(OptionalGreedyRange) -Repeater = deprecated(Range) -StrictRepeater = deprecated(Array) -MetaRepeater = deprecated(Array) -OneOfValidator = deprecated(OneOf) -NoneOfValidator = deprecated(NoneOf) +__author__ = "Arkadiusz Bulski , Tomer Filiba , Corbin Simpson " +__version__ = version_string #=============================================================================== # exposed names #=============================================================================== __all__ = [ - 'AdaptationError', 'Adapter', 'Alias', 'Aligned', 'AlignedStruct', - 'Anchor', 'Array', 'ArrayError', 'BFloat32', 'BFloat64', 'Bit', 'BitField', - 'BitIntegerAdapter', 'BitIntegerError', 'BitStruct', 'Bits', 'Bitwise', - 'Buffered', 'Byte', 'Bytes', 'CString', 'CStringAdapter', 'Const', - 'ConstAdapter', 'ConstError', 'Construct', 'ConstructError', 'Container', - 'Debugger', 'Embed', 'Embedded', 'EmbeddedBitStruct', 'Enum', 'ExprAdapter', - 'Field', 'FieldError', 'Flag', 'FlagsAdapter', 'FlagsContainer', - 'FlagsEnum', 'FormatField', 'GreedyRange', 'GreedyRepeater', - 'HexDumpAdapter', 'If', 'IfThenElse', 'IndexingAdapter', 'LFloat32', - 'LFloat64', 'LazyBound', 'LengthValueAdapter', 'ListContainer', - 'MappingAdapter', 'MappingError', 'MetaArray', 'MetaBytes', 'MetaField', - 'MetaRepeater', 'NFloat32', 'NFloat64', 'Nibble', 'NoneOf', - 'NoneOfValidator', 'Octet', 'OnDemand', 'OnDemandPointer', 'OneOf', - 'OneOfValidator', 'OpenRange', 'Optional', 'OptionalGreedyRange', - 'OptionalGreedyRepeater', 'PaddedStringAdapter', 'Padding', - 'PaddingAdapter', 'PaddingError', 'PascalString', 'Pass', 'Peek', - 'Pointer', 'PrefixedArray', 'Probe', 'Range', 'RangeError', 'Reconfig', - 'Rename', 'RepeatUntil', 'Repeater', 'Restream', 'SBInt16', 'SBInt32', - 'SBInt64', 'SBInt8', 'SLInt16', 'SLInt32', 'SLInt64', 'SLInt8', 'SNInt16', - 'SNInt32', 'SNInt64', 'SNInt8', 'Select', 'SelectError', 'Sequence', - 'SizeofError', 'SlicingAdapter', 'StaticField', 'StrictRepeater', 'String', - 'StringAdapter', 'Struct', 'Subconstruct', 'Switch', 'SwitchError', - 'SymmetricMapping', 'Terminator', 'TerminatorError', 'Tunnel', - 'TunnelAdapter', 'UBInt16', 'UBInt32', 'UBInt64', 'UBInt8', 'ULInt16', - 'ULInt32', 'ULInt64', 'ULInt8', 'UNInt16', 'UNInt32', 'UNInt64', 'UNInt8', - 'Union', 'ValidationError', 'Validator', 'Value', "Magic", + '__author__', + '__version__', + 'abs_', + 'AdaptationError', + 'Adapter', + 'Aligned', + 'AlignedStruct', + 'Array', + 'Bit', + 'BitsInteger', + 'BitsSwapped', + 'BitStruct', + 'BitwisableString', + 'Bitwise', + 'Byte', + 'Bytes', + 'BytesInteger', + 'ByteSwapped', + 'Bytewise', + 'CancelParsing', + 'Check', + 'CheckError', + 'Checksum', + 'ChecksumError', + 'CipherError', + 'Compiled', + 'Compressed', + 'CompressedLZ4', + 'Computed', + 'Const', + 'ConstError', + 'Construct', + 'ConstructError', + 'Container', + 'CString', + 'Debugger', + 'Default', + 'Double', + 'EncryptedSym', + 'EncryptedSymAead', + 'Enum', + 'EnumInteger', + 'EnumIntegerString', + 'Error', + 'ExplicitError', + 'ExprAdapter', + 'ExprSymmetricAdapter', + 'ExprValidator', + 'Filter', + 'FixedSized', + 'Flag', + 'FlagsEnum', + 'FocusedSeq', + 'FormatField', + 'FormatFieldError', + 'FuncPath', + 'globalPrintFalseFlags', + 'globalPrintFullStrings', + 'GreedyBytes', + 'GreedyRange', + 'GreedyString', + 'Half', + 'Hex', + 'HexDump', + 'If', + 'IfThenElse', + 'Index', + 'IndexFieldError', + 'Indexing', + 'Int', + 'IntegerError', + 'Lazy', + 'LazyArray', + 'LazyBound', + 'LazyContainer', + 'LazyListContainer', + 'LazyStruct', + 'len_', + 'lib', + 'list_', + 'ListContainer', + 'Long', + 'Mapping', + 'MappingError', + 'max_', + 'min_', + 'NamedTuple', + 'NamedTupleError', + 'Nibble', + 'NoneOf', + 'NullStripped', + 'NullTerminated', + 'Numpy', + 'obj_', + 'Octet', + 'OffsettedEnd', + 'OneOf', + 'Optional', + 'Padded', + 'PaddedString', + 'Padding', + 'PaddingError', + 'PascalString', + 'Pass', + 'Path', + 'Path2', + 'Peek', + 'Pickled', + 'Pointer', + 'possiblestringencodings', + 'Prefixed', + 'PrefixedArray', + 'Probe', + 'ProcessRotateLeft', + 'ProcessXor', + 'RangeError', + 'RawCopy', + 'Rebuffered', + 'RebufferedBytesIO', + 'Rebuild', + 'release_date', + 'Renamed', + 'RepeatError', + 'RepeatUntil', + 'RestreamData', + 'Restreamed', + 'RestreamedBytesIO', + 'RotationError', + 'Seek', + 'Select', + 'SelectError', + 'Sequence', + 'setGlobalPrintFalseFlags', + 'setGlobalPrintFullStrings', + 'setGlobalPrintPrivateEntries', + 'Short', + 'Single', + 'SizeofError', + 'Slicing', + 'StopFieldError', + 'StopIf', + 'stream_iseof', + 'stream_read', + 'stream_read_entire', + 'stream_seek', + 'stream_size', + 'stream_tell', + 'stream_write', + 'StreamError', + 'StringEncoded', + 'StringError', + 'Struct', + 'Subconstruct', + 'sum_', + 'Switch', + 'SwitchError', + 'SymmetricAdapter', + 'Tell', + 'Terminated', + 'TerminatedError', + 'this', + 'Timestamp', + 'TimestampAdapter', + 'TimestampError', + 'Transformed', + 'Tunnel', + 'Union', + 'UnionError', + 'ValidationError', + 'Validator', + 'VarInt', + 'version', + 'version_string', + 'ZigZag', ] +__all__ += ["Int%s%s%s" % (n,us,bln) for n in (8,16,24,32,64) for us in "us" for bln in "bln"] +__all__ += ["Float%s%s" % (n,bln) for n in (16,32,64) for bln in "bln"] diff --git a/construct/adapters.py b/construct/adapters.py deleted file mode 100644 index ef857fa4c..000000000 --- a/construct/adapters.py +++ /dev/null @@ -1,468 +0,0 @@ -from core import Adapter, AdaptationError, Pass -from lib import int_to_bin, bin_to_int, swap_bytes, StringIO -from lib import FlagsContainer, HexString - - -#=============================================================================== -# exceptions -#=============================================================================== -class BitIntegerError(AdaptationError): - __slots__ = [] -class MappingError(AdaptationError): - __slots__ = [] -class ConstError(AdaptationError): - __slots__ = [] -class ValidationError(AdaptationError): - __slots__ = [] -class PaddingError(AdaptationError): - __slots__ = [] - -#=============================================================================== -# adapters -#=============================================================================== -class BitIntegerAdapter(Adapter): - """ - Adapter for bit-integers (converts bitstrings to integers, and vice versa). - See BitField. - - Parameters: - * subcon - the subcon to adapt - * width - the size of the subcon, in bits - * swapped - whether to swap byte order (little endian/big endian). - default is False (big endian) - * signed - whether the value is signed (two's complement). the default - is False (unsigned) - * bytesize - number of bits per byte, used for byte-swapping (if swapped). - default is 8. - """ - __slots__ = ["width", "swapped", "signed", "bytesize"] - def __init__(self, subcon, width, swapped = False, signed = False, - bytesize = 8): - Adapter.__init__(self, subcon) - self.width = width - self.swapped = swapped - self.signed = signed - self.bytesize = bytesize - def _encode(self, obj, context): - if obj < 0 and not self.signed: - raise BitIntegerError("object is negative, but field is not signed", - obj) - obj2 = int_to_bin(obj, width = self.width) - if self.swapped: - obj2 = swap_bytes(obj2, bytesize = self.bytesize) - return obj2 - def _decode(self, obj, context): - if self.swapped: - obj = swap_bytes(obj, bytesize = self.bytesize) - return bin_to_int(obj, signed = self.signed) - -class MappingAdapter(Adapter): - """ - Adapter that maps objects to other objects. - See SymmetricMapping and Enum. - - Parameters: - * subcon - the subcon to map - * decoding - the decoding (parsing) mapping (a dict) - * encoding - the encoding (building) mapping (a dict) - * decdefault - the default return value when the object is not found - in the decoding mapping. if no object is given, an exception is raised. - if `Pass` is used, the unmapped object will be passed as-is - * encdefault - the default return value when the object is not found - in the encoding mapping. if no object is given, an exception is raised. - if `Pass` is used, the unmapped object will be passed as-is - """ - __slots__ = ["encoding", "decoding", "encdefault", "decdefault"] - def __init__(self, subcon, decoding, encoding, - decdefault = NotImplemented, encdefault = NotImplemented): - Adapter.__init__(self, subcon) - self.decoding = decoding - self.encoding = encoding - self.decdefault = decdefault - self.encdefault = encdefault - def _encode(self, obj, context): - try: - return self.encoding[obj] - except (KeyError, TypeError): - if self.encdefault is NotImplemented: - raise MappingError("no encoding mapping for %r" % (obj,)) - if self.encdefault is Pass: - return obj - return self.encdefault - def _decode(self, obj, context): - try: - return self.decoding[obj] - except (KeyError, TypeError): - if self.decdefault is NotImplemented: - raise MappingError("no decoding mapping for %r" % (obj,)) - if self.decdefault is Pass: - return obj - return self.decdefault - -class FlagsAdapter(Adapter): - """ - Adapter for flag fields. Each flag is extracted from the number, resulting - in a FlagsContainer object. Not intended for direct usage. - See FlagsEnum. - - Parameters - * subcon - the subcon to extract - * flags - a dictionary mapping flag-names to their value - """ - __slots__ = ["flags"] - def __init__(self, subcon, flags): - Adapter.__init__(self, subcon) - self.flags = flags - def _encode(self, obj, context): - flags = 0 - for name, value in self.flags.iteritems(): - if getattr(obj, name, False): - flags |= value - return flags - def _decode(self, obj, context): - obj2 = FlagsContainer() - for name, value in self.flags.iteritems(): - setattr(obj2, name, bool(obj & value)) - return obj2 - -class StringAdapter(Adapter): - """ - Adapter for strings. Converts a sequence of characters into a python - string, and optionally handles character encoding. - See String. - - Parameters: - * subcon - the subcon to convert - * encoding - the character encoding name (e.g., "utf8"), or None to - return raw bytes (usually 8-bit ASCII). - """ - __slots__ = ["encoding"] - def __init__(self, subcon, encoding = None): - Adapter.__init__(self, subcon) - self.encoding = encoding - def _encode(self, obj, context): - if self.encoding: - obj = obj.encode(self.encoding) - return obj - def _decode(self, obj, context): - obj = "".join(obj) - if self.encoding: - obj = obj.decode(self.encoding) - return obj - -class PaddedStringAdapter(Adapter): - r""" - Adapter for padded strings. - See String. - - Parameters: - * subcon - the subcon to adapt - * padchar - the padding character. default is "\x00". - * paddir - the direction where padding is placed ("right", "left", or - "center"). the default is "right". - * trimdir - the direction where trimming will take place ("right" or - "left"). the default is "right". trimming is only meaningful for - building, when the given string is too long. - """ - __slots__ = ["padchar", "paddir", "trimdir"] - def __init__(self, subcon, padchar = "\x00", paddir = "right", - trimdir = "right"): - if paddir not in ("right", "left", "center"): - raise ValueError("paddir must be 'right', 'left' or 'center'", - paddir) - if trimdir not in ("right", "left"): - raise ValueError("trimdir must be 'right' or 'left'", trimdir) - Adapter.__init__(self, subcon) - self.padchar = padchar - self.paddir = paddir - self.trimdir = trimdir - def _decode(self, obj, context): - if self.paddir == "right": - obj = obj.rstrip(self.padchar) - elif self.paddir == "left": - obj = obj.lstrip(self.padchar) - else: - obj = obj.strip(self.padchar) - return obj - def _encode(self, obj, context): - size = self._sizeof(context) - if self.paddir == "right": - obj = obj.ljust(size, self.padchar) - elif self.paddir == "left": - obj = obj.rjust(size, self.padchar) - else: - obj = obj.center(size, self.padchar) - if len(obj) > size: - if self.trimdir == "right": - obj = obj[:size] - else: - obj = obj[-size:] - return obj - -class LengthValueAdapter(Adapter): - """ - Adapter for length-value pairs. It extracts only the value from the - pair, and calculates the length based on the value. - See PrefixedArray and PascalString. - - Parameters: - * subcon - the subcon returning a length-value pair - """ - __slots__ = [] - def _encode(self, obj, context): - return (len(obj), obj) - def _decode(self, obj, context): - return obj[1] - -class CStringAdapter(StringAdapter): - r""" - Adapter for C-style strings (strings terminated by a terminator char). - - Parameters: - * subcon - the subcon to convert - * terminators - a sequence of terminator chars. default is "\x00". - * encoding - the character encoding to use (e.g., "utf8"), or None to - return raw-bytes. the terminator characters are not affected by the - encoding. - """ - __slots__ = ["terminators"] - def __init__(self, subcon, terminators = "\x00", encoding = None): - StringAdapter.__init__(self, subcon, encoding = encoding) - self.terminators = terminators - def _encode(self, obj, context): - return StringAdapter._encode(self, obj, context) + self.terminators[0] - def _decode(self, obj, context): - return StringAdapter._decode(self, obj[:-1], context) - -class TunnelAdapter(Adapter): - """ - Adapter for tunneling (as in protocol tunneling). A tunnel is construct - nested upon another (layering). For parsing, the lower layer first parses - the data (note: it must return a string!), then the upper layer is called - to parse that data (bottom-up). For building it works in a top-down manner; - first the upper layer builds the data, then the lower layer takes it and - writes it to the stream. - - Parameters: - * subcon - the lower layer subcon - * inner_subcon - the upper layer (tunneled/nested) subcon - - Example: - # a pascal string containing compressed data (zlib encoding), so first - # the string is read, decompressed, and finally re-parsed as an array - # of UBInt16 - TunnelAdapter( - PascalString("data", encoding = "zlib"), - GreedyRange(UBInt16("elements")) - ) - """ - __slots__ = ["inner_subcon"] - def __init__(self, subcon, inner_subcon): - Adapter.__init__(self, subcon) - self.inner_subcon = inner_subcon - def _decode(self, obj, context): - return self.inner_subcon._parse(StringIO(obj), context) - def _encode(self, obj, context): - stream = StringIO() - self.inner_subcon._build(obj, stream, context) - return stream.getvalue() - -class ExprAdapter(Adapter): - """ - A generic adapter that accepts 'encoder' and 'decoder' as parameters. You - can use ExprAdapter instead of writing a full-blown class when only a - simple expression is needed. - - Parameters: - * subcon - the subcon to adapt - * encoder - a function that takes (obj, context) and returns an encoded - version of obj - * decoder - a function that takes (obj, context) and returns an decoded - version of obj - - Example: - ExprAdapter(UBInt8("foo"), - encoder = lambda obj, ctx: obj / 4, - decoder = lambda obj, ctx: obj * 4, - ) - """ - __slots__ = ["_encode", "_decode"] - def __init__(self, subcon, encoder, decoder): - Adapter.__init__(self, subcon) - self._encode = encoder - self._decode = decoder - -class HexDumpAdapter(Adapter): - """ - Adapter for hex-dumping strings. It returns a HexString, which is a string - """ - __slots__ = ["linesize"] - def __init__(self, subcon, linesize = 16): - Adapter.__init__(self, subcon) - self.linesize = linesize - def _encode(self, obj, context): - return obj - def _decode(self, obj, context): - return HexString(obj, linesize = self.linesize) - -class ConstAdapter(Adapter): - """ - Adapter for enforcing a constant value ("magic numbers"). When decoding, - the return value is checked; when building, the value is substituted in. - - Parameters: - * subcon - the subcon to validate - * value - the expected value - - Example: - Const(Field("signature", 2), "MZ") - """ - __slots__ = ["value"] - def __init__(self, subcon, value): - Adapter.__init__(self, subcon) - self.value = value - def _encode(self, obj, context): - if obj is None or obj == self.value: - return self.value - else: - raise ConstError("expected %r, found %r" % (self.value, obj)) - def _decode(self, obj, context): - if obj != self.value: - raise ConstError("expected %r, found %r" % (self.value, obj)) - return obj - -class SlicingAdapter(Adapter): - """ - Adapter for slicing a list (getting a slice from that list) - - Parameters: - * subcon - the subcon to slice - * start - start index - * stop - stop index (or None for up-to-end) - * step - step (or None for every element) - """ - __slots__ = ["start", "stop", "step"] - def __init__(self, subcon, start, stop = None): - Adapter.__init__(self, subcon) - self.start = start - self.stop = stop - def _encode(self, obj, context): - if self.start is None: - return obj - return [None] * self.start + obj - def _decode(self, obj, context): - return obj[self.start:self.stop] - -class IndexingAdapter(Adapter): - """ - Adapter for indexing a list (getting a single item from that list) - - Parameters: - * subcon - the subcon to index - * index - the index of the list to get - """ - __slots__ = ["index"] - def __init__(self, subcon, index): - Adapter.__init__(self, subcon) - if type(index) is not int: - raise TypeError("index must be an integer", type(index)) - self.index = index - def _encode(self, obj, context): - return [None] * self.index + [obj] - def _decode(self, obj, context): - return obj[self.index] - -class PaddingAdapter(Adapter): - r""" - Adapter for padding. - - Parameters: - * subcon - the subcon to pad - * pattern - the padding pattern (character). default is "\x00" - * strict - whether or not to verify, during parsing, that the given - padding matches the padding pattern. default is False (unstrict) - """ - __slots__ = ["pattern", "strict"] - def __init__(self, subcon, pattern = "\x00", strict = False): - Adapter.__init__(self, subcon) - self.pattern = pattern - self.strict = strict - def _encode(self, obj, context): - return self._sizeof(context) * self.pattern - def _decode(self, obj, context): - if self.strict: - expected = self._sizeof(context) * self.pattern - if obj != expected: - raise PaddingError("expected %r, found %r" % (expected, obj)) - return obj - - -#=============================================================================== -# validators -#=============================================================================== -class Validator(Adapter): - """ - Abstract class: validates a condition on the encoded/decoded object. - Override _validate(obj, context) in deriving classes. - - Parameters: - * subcon - the subcon to validate - """ - __slots__ = [] - def _decode(self, obj, context): - if not self._validate(obj, context): - raise ValidationError("invalid object", obj) - return obj - def _encode(self, obj, context): - return self._decode(obj, context) - def _validate(self, obj, context): - raise NotImplementedError() - -class OneOf(Validator): - """ - Validates that the object is one of the listed values. - - :param ``Construct`` subcon: object to validate - :param iterable valids: a set of valid values - - >>> OneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x05") - 5 - >>> OneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x08") - Traceback (most recent call last): - ... - construct.core.ValidationError: ('invalid object', 8) - >>> - >>> OneOf(UBInt8("foo"), [4,5,6,7]).build(5) - '\\x05' - >>> OneOf(UBInt8("foo"), [4,5,6,7]).build(9) - Traceback (most recent call last): - ... - construct.core.ValidationError: ('invalid object', 9) - """ - __slots__ = ["valids"] - def __init__(self, subcon, valids): - Validator.__init__(self, subcon) - self.valids = valids - def _validate(self, obj, context): - return obj in self.valids - -class NoneOf(Validator): - """ - Validates that the object is none of the listed values. - - :param ``Construct`` subcon: object to validate - :param iterable invalids: a set of invalid values - - >>> NoneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x08") - 8 - >>> NoneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x06") - Traceback (most recent call last): - ... - construct.core.ValidationError: ('invalid object', 6) - """ - __slots__ = ["invalids"] - def __init__(self, subcon, invalids): - Validator.__init__(self, subcon) - self.invalids = invalids - def _validate(self, obj, context): - return obj not in self.invalids diff --git a/construct/core.py b/construct/core.py index cd8dc5beb..3d0e75392 100644 --- a/construct/core.py +++ b/construct/core.py @@ -1,1321 +1,6447 @@ -from struct import Struct as Packer +# -*- coding: utf-8 -*- -from lib import StringIO -from lib import Container, ListContainer, LazyContainer +import struct, io, binascii, itertools, collections, pickle, sys, os, hashlib, importlib, importlib.machinery, importlib.util + +from construct.lib import * +from construct.expr import * +from construct.version import * #=============================================================================== # exceptions #=============================================================================== class ConstructError(Exception): - __slots__ = [] -class FieldError(ConstructError): - __slots__ = [] + """ + This is the root of all exceptions raised by parsing classes in this library. Note that the helper functions in lib module can raise standard ValueError (but parsing classes are not allowed to). + """ + def __init__(self, message='', path=None): + self.path = path + if path is None: + super().__init__(message) + else: + message = "Error in path {}\n".format(path) + message + super().__init__(message) class SizeofError(ConstructError): - __slots__ = [] + """ + Parsing classes sizeof() methods are only allowed to either return an integer or raise SizeofError instead. Note that this exception can mean the parsing class cannot be measured apriori in principle, however it can also mean that it just cannot be measured in these particular circumstances (eg. there is a key missing in the context dictionary at this time). + """ + pass class AdaptationError(ConstructError): - __slots__ = [] -class ArrayError(ConstructError): - __slots__ = [] + """ + Currently not used. + """ + pass +class ValidationError(ConstructError): + """ + Validator ExprValidator derived parsing classes can raise this exception: OneOf NoneOf. It can mean that the parse or build value is or is not one of specified values. + """ + pass +class StreamError(ConstructError): + """ + Almost all parsing classes can raise this exception: it can mean a variety of things. Maybe requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, could not write all bytes, stream is not seekable, stream is not tellable, etc. Note that there are a few parsing classes that do not use the stream to compute output and therefore do not raise this exception. + """ + pass +class FormatFieldError(ConstructError): + """ + Only one parsing class can raise this exception: FormatField. It can either mean the format string is invalid or the value is not valid for provided format string. See standard struct module for what is acceptable. + """ + pass +class IntegerError(ConstructError): + """ + Only some numeric parsing classes can raise this exception: BytesInteger BitsInteger VarInt ZigZag. It can mean either the length parameter is invalid, the value is not an integer, the value is negative or too low or too high for given parameters, or the selected endianness cannot be applied. + """ + pass +class StringError(ConstructError): + """ + Almost all parsing classes can raise this exception: It can mean a unicode string was passed instead of bytes, or a bytes was passed instead of a unicode string. Also some classes can raise it explicitly: PascalString CString GreedyString. It can mean no encoding or invalid encoding was selected. Note that currently, if the data cannot be encoded decoded given selected encoding then UnicodeEncodeError UnicodeDecodeError are raised, which are not rooted at ConstructError. + """ + pass +class MappingError(ConstructError): + """ + Few parsing classes can raise this exception: Enum FlagsEnum Mapping. It can mean the build value is not recognized and therefore cannot be mapped onto bytes. + """ + pass class RangeError(ConstructError): - __slots__ = [] -class SwitchError(ConstructError): - __slots__ = [] + """ + Few parsing classes can raise this exception: Array PrefixedArray LazyArray. It can mean the count parameter is invalid, or the build object has too little or too many elements. + """ + pass +class RepeatError(ConstructError): + """ + Only one parsing class can raise this exception: RepeatUntil. It can mean none of the elements in build object passed the given predicate. + """ + pass +class ConstError(ConstructError): + """ + Only one parsing class can raise this exception: Const. It can mean the wrong data was parsed, or wrong object was built from. + """ + pass +class IndexFieldError(ConstructError): + """ + Only one parsing class can raise this exception: Index. It can mean the class was not nested in an array parsing class properly and therefore cannot access the _index context key. + """ + pass +class CheckError(ConstructError): + """ + Only one parsing class can raise this exception: Check. It can mean the condition lambda failed during a routine parsing building check. + """ + pass +class ExplicitError(ConstructError): + """ + Only one parsing class can raise this exception: Error. It can mean the parsing class was merely parsed or built with. + """ + pass +class NamedTupleError(ConstructError): + """ + Only one parsing class can raise this exception: NamedTuple. It can mean the subcon is not of a valid type. + """ + pass +class TimestampError(ConstructError): + """ + Only one parsing class can raise this exception: Timestamp. It can mean the subcon unit or epoch are invalid. + """ + pass +class UnionError(ConstructError): + """ + Only one parsing class can raise this exception: Union. It can mean none of given subcons was properly selected, or trying to build without providing a proper value. + """ + pass class SelectError(ConstructError): - __slots__ = [] -class TerminatorError(ConstructError): - __slots__ = [] + """ + Only one parsing class can raise this exception: Select. It can mean neither subcon succeded when parsing or building. + """ + pass +class SwitchError(ConstructError): + """ + Currently not used. + """ + pass +class StopFieldError(ConstructError): + """ + Only one parsing class can raise this exception: StopIf. It can mean the given condition was met during parsing or building. + """ + pass +class PaddingError(ConstructError): + """ + Multiple parsing classes can raise this exception: PaddedString Padding Padded Aligned FixedSized NullTerminated NullStripped. It can mean multiple issues: the encoded string or bytes takes more bytes than padding allows, length parameter was invalid, pattern terminator or pad is not a proper bytes value, modulus was less than 2. + """ + pass +class TerminatedError(ConstructError): + """ + Only one parsing class can raise this exception: Terminated. It can mean EOF was not found as expected during parsing. + """ + pass +class RawCopyError(ConstructError): + """ + Only one parsing class can raise this exception: RawCopy. It can mean it cannot build as both data and value keys are missing from build dict object. + """ + pass +class RotationError(ConstructError): + """ + Only one parsing class can raise this exception: ProcessRotateLeft. It can mean the specified group is less than 1, data is not of valid length. + """ + pass +class ChecksumError(ConstructError): + """ + Only one parsing class can raise this exception: Checksum. It can mean expected and actual checksum do not match. + """ + pass +class CancelParsing(ConstructError): + """ + This exception can only be raise explicitly by the user, and it causes the parsing class to stop what it is doing (interrupts parsing or building). + """ + pass +class CipherError(ConstructError): + """ + Two parsing classes can raise this exception: EncryptedSym EncryptedSymAead. It can mean none or invalid cipher object was provided. + """ + pass + + + +#=============================================================================== +# used internally +#=============================================================================== +def singleton(arg): + x = arg() + return x + + +def stream_read(stream, length, path): + if length < 0: + raise StreamError("length must be non-negative, found %s" % length, path=path) + try: + data = stream.read(length) + except Exception: + raise StreamError("stream.read() failed, requested %s bytes" % (length,), path=path) + if len(data) != length: + raise StreamError("stream read less than specified amount, expected %d, found %d" % (length, len(data)), path=path) + return data + + +def stream_read_entire(stream, path): + try: + return stream.read() + except Exception: + raise StreamError("stream.read() failed when reading until EOF", path=path) + + +def stream_write(stream, data, length, path): + if not isinstance(data, bytes): + raise StringError("given non-bytes value, perhaps unicode? %r" % (data,), path=path) + if length < 0: + raise StreamError("length must be non-negative, found %s" % length, path=path) + if len(data) != length: + raise StreamError("bytes object of wrong length, expected %d, found %d" % (length, len(data)), path=path) + try: + written = stream.write(data) + except Exception: + raise StreamError("stream.write() failed, given %r" % (data,), path=path) + if written != length: + raise StreamError("stream written less than specified, expected %d, written %d" % (length, written), path=path) + + +def stream_seek(stream, offset, whence, path): + try: + return stream.seek(offset, whence) + except Exception: + raise StreamError("stream.seek() failed, offset %s, whence %s" % (offset, whence), path=path) + + +def stream_tell(stream, path): + try: + return stream.tell() + except Exception: + raise StreamError("stream.tell() failed", path=path) + + +def stream_size(stream): + try: + fallback = stream.tell() + end = stream.seek(0, 2) + stream.seek(fallback) + return end + except Exception: + raise StreamError("stream. seek() tell() failed", path="???") + + +def stream_iseof(stream): + try: + fallback = stream.tell() + data = stream.read(1) + stream.seek(fallback) + return not data + except Exception: + raise StreamError("stream. read() seek() tell() failed", path="???") + + +class BytesIOWithOffsets(io.BytesIO): + @staticmethod + def from_reading(stream, length: int, path: str): + offset = stream_tell(stream, path) + contents = stream_read(stream, length, path) + return BytesIOWithOffsets(contents, stream, offset) + + def __init__(self, contents: bytes, parent_stream, offset: int): + super().__init__(contents) + self.parent_stream = parent_stream + self.parent_stream_offset = offset + + def tell(self) -> int: + return super().tell() + self.parent_stream_offset + + def seek(self, offset: int, whence: int = io.SEEK_SET) -> int: + if whence != io.SEEK_SET: + super().seek(offset, whence) + else: + super().seek(offset - self.parent_stream_offset) + return self.tell() + + +class CodeGen: + def __init__(self): + self.blocks = [] + self.nextid = 0 + self.parsercache = {} + self.buildercache = {} + self.linkedinstances = {} + self.linkedparsers = {} + self.linkedbuilders = {} + self.userfunction = {} + + def allocateId(self): + self.nextid += 1 + return self.nextid + + def append(self, block): + block = [s for s in block.splitlines() if s.strip()] + firstline = block[0] + trim = len(firstline) - len(firstline.lstrip()) + block = "\n".join(s[trim:] for s in block) + if block not in self.blocks: + self.blocks.append(block) + + def toString(self): + return "\n".join(self.blocks + [""]) + + +class KsyGen: + def __init__(self): + self.instances = {} + self.enums = {} + self.types = {} + self.nextid = 0 + + def allocateId(self): + self.nextid += 1 + return self.nextid + + +def hyphenatedict(d): + return {k.replace("_","-").rstrip("-"):v for k,v in d.items()} + + +def hyphenatelist(l): + return [hyphenatedict(d) for d in l] + + +def extractfield(sc): + if isinstance(sc, Renamed): + return extractfield(sc.subcon) + return sc + + +def evaluate(param, context): + return param(context) if callable(param) else param + #=============================================================================== # abstract constructs #=============================================================================== class Construct(object): - """ + r""" The mother of all constructs. - This object is generally not directly instantiated, and it does not - directly implement parsing and building, so it is largely only of interest - to subclass implementors. + This object is generally not directly instantiated, and it does not directly implement parsing and building, so it is largely only of interest to subclass implementors. There are also other abstract classes sitting on top of this one. The external user API: - * parse() - * parse_stream() - * build() - * build_stream() - * sizeof() + * `parse` + * `parse_stream` + * `parse_file` + * `build` + * `build_stream` + * `build_file` + * `sizeof` + * `compile` + * `benchmark` + + Subclass authors should not override the external methods. Instead, another API is available: + + * `_parse` + * `_build` + * `_sizeof` + * `_actualsize` + * `_emitparse` + * `_emitbuild` + * `_emitseq` + * `_emitprimitivetype` + * `_emitfulltype` + * `__getstate__` + * `__setstate__` + + Attributes and Inheritance: + + All constructs have a name and flags. The name is used for naming struct members and context dictionaries. Note that the name can be a string, or None by default. A single underscore "_" is a reserved name, used as up-level in nested containers. The name should be descriptive, short, and valid as a Python identifier, although these rules are not enforced. The flags specify additional behavioral information about this construct. Flags are used by enclosing constructs to determine a proper course of action. Flags are often inherited from inner subconstructs but that depends on each class. + """ + + def __init__(self): + self.name = None + self.docs = "" + self.flagbuildnone = False + self.parsed = None + + def __repr__(self): + return "<%s%s%s%s>" % (self.__class__.__name__, " "+self.name if self.name else "", " +nonbuild" if self.flagbuildnone else "", " +docs" if self.docs else "", ) + + def __getstate__(self): + attrs = {} + if hasattr(self, "__dict__"): + attrs.update(self.__dict__) + slots = [] + c = self.__class__ + while c is not None: + if hasattr(c, "__slots__"): + slots.extend(c.__slots__) + c = c.__base__ + for name in slots: + if hasattr(self, name): + attrs[name] = getattr(self, name) + return attrs + + def __setstate__(self, attrs): + for name, value in attrs.items(): + setattr(self, name, value) + + def __copy__(self): + self2 = object.__new__(self.__class__) + self2.__setstate__(self.__getstate__()) + return self2 + + def parse(self, data, **contextkw): + r""" + Parse an in-memory buffer (often bytes object). Strings, buffers, memoryviews, and other complete buffers can be parsed with this method. + + Whenever data cannot be read, ConstructError or its derivative is raised. This method is NOT ALLOWED to raise any other exceptions although (1) user-defined lambdas can raise arbitrary exceptions which are propagated (2) external libraries like numpy can raise arbitrary exceptions which are propagated (3) some list and dict lookups can raise IndexError and KeyError which are propagated. + + Context entries are passed only as keyword parameters \*\*contextkw. + + :param \*\*contextkw: context entries, usually empty + + :returns: some value, usually based on bytes read from the stream but sometimes it is computed from nothing or from the context dictionary, sometimes its non-deterministic + + :raises ConstructError: raised for any reason + """ + return self.parse_stream(io.BytesIO(data), **contextkw) + + def parse_stream(self, stream, **contextkw): + r""" + Parse a stream. Files, pipes, sockets, and other streaming sources of data are handled by this method. See parse(). + """ + context = Container(**contextkw) + context._parsing = True + context._building = False + context._sizing = False + context._params = context + try: + return self._parsereport(stream, context, "(parsing)") + except CancelParsing: + pass + + def parse_file(self, filename, **contextkw): + r""" + Parse a closed binary file. See parse(). + """ + with open(filename, 'rb') as f: + return self.parse_stream(f, **contextkw) + + def _parsereport(self, stream, context, path): + obj = self._parse(stream, context, path) + if self.parsed is not None: + self.parsed(obj, context) + return obj + + def _parse(self, stream, context, path): + """Override in your subclass.""" + raise NotImplementedError + + def build(self, obj, **contextkw): + r""" + Build an object in memory (a bytes object). + + Whenever data cannot be written, ConstructError or its derivative is raised. This method is NOT ALLOWED to raise any other exceptions although (1) user-defined lambdas can raise arbitrary exceptions which are propagated (2) external libraries like numpy can raise arbitrary exceptions which are propagated (3) some list and dict lookups can raise IndexError and KeyError which are propagated. + + Context entries are passed only as keyword parameters \*\*contextkw. + + :param \*\*contextkw: context entries, usually empty + + :returns: bytes + + :raises ConstructError: raised for any reason + """ + stream = io.BytesIO() + self.build_stream(obj, stream, **contextkw) + return stream.getvalue() + + def build_stream(self, obj, stream, **contextkw): + r""" + Build an object directly into a stream. See build(). + """ + context = Container(**contextkw) + context._parsing = False + context._building = True + context._sizing = False + context._params = context + self._build(obj, stream, context, "(building)") + + def build_file(self, obj, filename, **contextkw): + r""" + Build an object into a closed binary file. See build(). + """ + # Open the file for reading as well as writing. This allows builders to + # read back the stream just written. For example. RawCopy does this. + # See issue #888. + with open(filename, 'w+b') as f: + self.build_stream(obj, f, **contextkw) + + def _build(self, obj, stream, context, path): + """Override in your subclass.""" + raise NotImplementedError + + def sizeof(self, **contextkw): + r""" + Calculate the size of this object, optionally using a context. + + Some constructs have fixed size (like FormatField), some have variable-size and can determine their size given a context entry (like Bytes(this.otherfield1)), and some cannot determine their size (like VarInt). + + Whenever size cannot be determined, SizeofError is raised. This method is NOT ALLOWED to raise any other exception, even if eg. context dictionary is missing a key, or subcon propagates ConstructError-derivative exception. + + Context entries are passed only as keyword parameters \*\*contextkw. + + :param \*\*contextkw: context entries, usually empty + + :returns: integer if computable, SizeofError otherwise + + :raises SizeofError: size could not be determined in actual context, or is impossible to be determined + """ + context = Container(**contextkw) + context._parsing = False + context._building = False + context._sizing = True + context._params = context + return self._sizeof(context, "(sizeof)") + + def _sizeof(self, context, path): + """Override in your subclass.""" + raise SizeofError(path=path) + + def _actualsize(self, stream, context, path): + return self._sizeof(context, path) + + def compile(self, filename=None): + """ + Transforms a construct into another construct that does same thing (has same parsing and building semantics) but is much faster when parsing. Already compiled instances just compile into itself. + + Optionally, partial source code can be saved to a text file. This is meant only to inspect the generated code, not to import it from external scripts. + + :returns: Compiled instance + """ + + code = CodeGen() + code.append(""" + # generated by Construct, this source is for inspection only! do not import! + + from construct import * + from construct.lib import * + from io import BytesIO + import struct + import collections + import itertools + + def restream(data, func): + return func(BytesIO(data)) + def reuse(obj, func): + return func(obj) + + linkedinstances = {} + linkedparsers = {} + linkedbuilders = {} + userfunction = {} + + len_ = len + sum_ = sum + min_ = min + max_ = max + abs_ = abs + """) + code.append(f""" + def parseall(io, this): + return {self._compileparse(code)} + def buildall(obj, io, this): + return {self._compilebuild(code)} + compiled = Compiled(parseall, buildall) + """) + source = code.toString() + + if filename: + with open(filename, "wt") as f: + f.write(source) + + modulename = hexlify(hashlib.sha1(source.encode()).digest()).decode() + module_spec = importlib.machinery.ModuleSpec(modulename, None) + module = importlib.util.module_from_spec(module_spec) + c = compile(source, '', 'exec') + exec(c, module.__dict__) + + module.linkedinstances = code.linkedinstances + module.linkedparsers = code.linkedparsers + module.linkedbuilders = code.linkedbuilders + module.userfunction = code.userfunction + compiled = module.compiled + compiled.source = source + compiled.module = module + compiled.modulename = modulename + compiled.defersubcon = self + return compiled + + def _compileinstance(self, code): + """Used internally.""" + if id(self) in code.linkedinstances: + return + code.append(f""" + # linkedinstances[{id(self)}] is {self} + """) + field = extractfield(self) + code.linkedinstances[id(self)] = field + code.linkedparsers[id(self)] = field._parse + code.linkedbuilders[id(self)] = field._build + + def _compileparse(self, code): + """Used internally.""" + try: + if id(self) in code.parsercache: + return code.parsercache[id(self)] + emitted = self._emitparse(code) + code.parsercache[id(self)] = emitted + return emitted + except NotImplementedError: + self._compileinstance(code) + return f"linkedparsers[{id(self)}](io, this, '(???)')" + + def _compilebuild(self, code): + """Used internally.""" + try: + if id(self) in code.buildercache: + return code.buildercache[id(self)] + emitted = self._emitbuild(code) + code.buildercache[id(self)] = emitted + return emitted + except NotImplementedError: + self._compileinstance(code) + return f"linkedbuilders[{id(self)}](obj, io, this, '(???)')" + + def _emitparse(self, code): + """Override in your subclass.""" + raise NotImplementedError + + def _emitbuild(self, code): + """Override in your subclass.""" + raise NotImplementedError + + def benchmark(self, sampledata, filename=None): + """ + Measures performance of your construct (its parsing and building runtime), both for the original instance and the compiled instance. Uses timeit module, over at min 1 loop, and at max over 100 millisecond time. + + Optionally, results are saved to a text file for later inspection. Otherwise you can print the resulting string to terminal. + + :param sampledata: bytes, a valid blob parsable by this construct + :param filename: optional, string, results are saved to that file + + :returns: string containing measurements + """ + from timeit import timeit + + sampleobj = self.parse(sampledata) + parsetime = timeit(lambda: self.parse(sampledata), number=1) + runs = int(0.1/parsetime) + if runs > 1: + parsetime = timeit(lambda: self.parse(sampledata), number=runs)/runs + parsetime = "{:.10f} sec/call".format(parsetime) + + self.build(sampleobj) + buildtime = timeit(lambda: self.build(sampleobj), number=1) + runs = int(0.1/buildtime) + if runs > 1: + buildtime = timeit(lambda: self.build(sampleobj), number=runs)/runs + buildtime = "{:.10f} sec/call".format(buildtime) + + compiled = self.compile() + compiled.parse(sampledata) + parsetime2 = timeit(lambda: compiled.parse(sampledata), number=1) + runs = int(0.1/parsetime2) + if runs > 1: + parsetime2 = timeit(lambda: compiled.parse(sampledata), number=runs)/runs + parsetime2 = "{:.10f} sec/call".format(parsetime2) + + compiled.build(sampleobj) + buildtime2 = timeit(lambda: compiled.build(sampleobj), number=1) + runs = int(0.1/buildtime2) + if runs > 1: + buildtime2 = timeit(lambda: compiled.build(sampleobj), number=runs)/runs + buildtime2 = "{:.10f} sec/call".format(buildtime2) + + lines = [ + "Compiled instance performance:", + "parsing: {}", + "parsing compiled: {}", + "building: {}", + "building compiled: {}", + "" + ] + results = "\n".join(lines).format(parsetime, parsetime2, buildtime, buildtime2) + + if filename: + with open(filename, "wt") as f: + f.write(results) + + return results + + def export_ksy(self, schemaname="unnamed_schema", filename=None): + from ruamel.yaml import YAML + yaml = YAML() + yaml.default_flow_style = False + output = io.StringIO() + gen = KsyGen() + main = dict(meta=dict(id=schemaname), seq=self._compileseq(gen), instances=gen.instances, enums=gen.enums, types=gen.types) + yaml.dump(main, output) + source = output.getvalue() + + if filename: + with open(filename, "wt") as f: + f.write(source) + return source + + def _compileseq(self, ksy, bitwise=False, recursion=0): + if recursion >= 3: + raise ConstructError("construct does not implement KSY export") + try: + return hyphenatelist(self._emitseq(ksy, bitwise)) + except NotImplementedError: + return [dict(id="x", **self._compilefulltype(ksy, bitwise, recursion+1))] + + def _compileprimitivetype(self, ksy, bitwise=False, recursion=0): + if recursion >= 3: + raise ConstructError("construct does not implement KSY export") + try: + return self._emitprimitivetype(ksy, bitwise) + except NotImplementedError: + name = "type_%s" % ksy.allocateId() + ksy.types[name] = dict(seq=self._compileseq(ksy, bitwise, recursion+1)) + return name + + def _compilefulltype(self, ksy, bitwise=False, recursion=0): + if recursion >= 3: + raise ConstructError("construct does not implement KSY export") + try: + return hyphenatedict(self._emitfulltype(ksy, bitwise)) + except NotImplementedError: + return dict(type=self._compileprimitivetype(ksy, bitwise, recursion+1)) + + def _emitseq(self, ksy, bitwise): + """Override in your subclass.""" + raise NotImplementedError + + def _emitprimitivetype(self, ksy, bitwise): + """Override in your subclass.""" + raise NotImplementedError + + def _emitfulltype(self, ksy, bitwise): + """Override in your subclass.""" + raise NotImplementedError + + def __rtruediv__(self, name): + """ + Used for renaming subcons, usually part of a Struct, like Struct("index" / Byte). + """ + return Renamed(self, newname=name) + + __rdiv__ = __rtruediv__ + + def __mul__(self, other): + """ + Used for adding docstrings and parsed hooks to subcons, like "field" / Byte * "docstring" * processfunc. + """ + if isinstance(other, str): + return Renamed(self, newdocs=other) + if callable(other): + return Renamed(self, newparsed=other) + raise ConstructError("operator * can only be used with string or lambda") + + def __rmul__(self, other): + """ + Used for adding docstrings and parsed hooks to subcons, like "field" / Byte * "docstring" * processfunc. + """ + if isinstance(other, str): + return Renamed(self, newdocs=other) + if callable(other): + return Renamed(self, newparsed=other) + raise ConstructError("operator * can only be used with string or lambda") + + def __add__(self, other): + """ + Used for making Struct like ("index"/Byte + "prefix"/Byte). + """ + lhs = self.subcons if isinstance(self, Struct) else [self] + rhs = other.subcons if isinstance(other, Struct) else [other] + return Struct(*(lhs + rhs)) + + def __rshift__(self, other): + """ + Used for making Sequences like (Byte >> Short). + """ + lhs = self.subcons if isinstance(self, Sequence) else [self] + rhs = other.subcons if isinstance(other, Sequence) else [other] + return Sequence(*(lhs + rhs)) + + def __getitem__(self, count): + """ + Used for making Arrays like Byte[5] and Byte[this.count]. + """ + if isinstance(count, slice): + raise ConstructError("subcon[N] syntax can only be used for Arrays, use GreedyRange(subcon) instead?") + if isinstance(count, int) or callable(count): + return Array(count, self) + raise ConstructError("subcon[N] syntax expects integer or context lambda") + + +class Subconstruct(Construct): + r""" + Abstract subconstruct (wraps an inner construct, inheriting its name and flags). Parsing and building is by default deferred to subcon, same as sizeof. + + :param subcon: Construct instance + """ + def __init__(self, subcon): + if not isinstance(subcon, Construct): + raise TypeError("subcon should be a Construct field") + super().__init__() + self.subcon = subcon + self.flagbuildnone = subcon.flagbuildnone + + def __repr__(self): + return "<%s%s%s%s %s>" % (self.__class__.__name__, " "+self.name if self.name else "", " +nonbuild" if self.flagbuildnone else "", " +docs" if self.docs else "", repr(self.subcon), ) + + def _parse(self, stream, context, path): + return self.subcon._parsereport(stream, context, path) + + def _build(self, obj, stream, context, path): + return self.subcon._build(obj, stream, context, path) + + def _sizeof(self, context, path): + return self.subcon._sizeof(context, path) + + +class Adapter(Subconstruct): + r""" + Abstract adapter class. + + Needs to implement `_decode()` for parsing and `_encode()` for building. + + :param subcon: Construct instance + """ + def _parse(self, stream, context, path): + obj = self.subcon._parsereport(stream, context, path) + return self._decode(obj, context, path) + + def _build(self, obj, stream, context, path): + obj2 = self._encode(obj, context, path) + buildret = self.subcon._build(obj2, stream, context, path) + return obj + + def _decode(self, obj, context, path): + raise NotImplementedError + + def _encode(self, obj, context, path): + raise NotImplementedError + + +class SymmetricAdapter(Adapter): + r""" + Abstract adapter class. + + Needs to implement `_decode()` only, for both parsing and building. + + :param subcon: Construct instance + """ + def _encode(self, obj, context, path): + return self._decode(obj, context, path) + + +class Validator(SymmetricAdapter): + r""" + Abstract class that validates a condition on the encoded/decoded object. + + Needs to implement `_validate()` that returns a bool (or a truthy value) + + :param subcon: Construct instance + """ + def _decode(self, obj, context, path): + if not self._validate(obj, context, path): + raise ValidationError("object failed validation: %s" % (obj,), path=path) + return obj + + def _validate(self, obj, context, path): + raise NotImplementedError + + +class Tunnel(Subconstruct): + r""" + Abstract class that allows other constructs to read part of the stream as if they were reading the entire stream. See Prefixed for example. + + Needs to implement `_decode()` for parsing and `_encode()` for building. + """ + def _parse(self, stream, context, path): + data = stream_read_entire(stream, path) # reads entire stream + data = self._decode(data, context, path) + return self.subcon.parse(data, **context) + + def _build(self, obj, stream, context, path): + stream2 = io.BytesIO() + buildret = self.subcon._build(obj, stream2, context, path) + data = stream2.getvalue() + data = self._encode(data, context, path) + stream_write(stream, data, len(data), path) + return obj + + def _sizeof(self, context, path): + raise SizeofError(path=path) + + def _decode(self, data, context, path): + raise NotImplementedError + + def _encode(self, data, context, path): + raise NotImplementedError + + +class Compiled(Construct): + """Used internally.""" + + def __init__(self, parsefunc, buildfunc): + super().__init__() + self.source = None + self.defersubcon = None + self.parsefunc = parsefunc + self.buildfunc = buildfunc + + def _parse(self, stream, context, path): + return self.parsefunc(stream, context) + + def _build(self, obj, stream, context, path): + return self.buildfunc(obj, stream, context) + + def _sizeof(self, context, path): + return self.defersubcon._sizeof(context, path) + + def compile(self, filename=None): + return self + + def benchmark(self, sampledata, filename=None): + return self.defersubcon.benchmark(sampledata, filename) + + +#=============================================================================== +# bytes and bits +#=============================================================================== +class Bytes(Construct): + r""" + Field consisting of a specified number of bytes. + + Parses into a bytes (of given length). Builds into the stream directly (but checks that given object matches specified length). Can also build from an integer for convenience (although BytesInteger should be used instead). Size is the specified length. + + Can also build from a bytearray. + + :param length: integer or context lambda + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises StringError: building from non-bytes value, perhaps unicode + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = Bytes(4) + >>> d.parse(b'beef') + b'beef' + >>> d.build(b'beef') + b'beef' + >>> d.build(0) + b'\x00\x00\x00\x00' + >>> d.sizeof() + 4 + + >>> d = Struct( + ... "length" / Int8ub, + ... "data" / Bytes(this.length), + ... ) + >>> d.parse(b"\x04beef") + Container(length=4, data=b'beef') + >>> d.sizeof() + construct.core.SizeofError: cannot calculate size, key not found in context + """ + + def __init__(self, length): + super().__init__() + self.length = length + + def _parse(self, stream, context, path): + length = self.length(context) if callable(self.length) else self.length + return stream_read(stream, length, path) + + def _build(self, obj, stream, context, path): + length = self.length(context) if callable(self.length) else self.length + data = integer2bytes(obj, length) if isinstance(obj, int) else obj + data = bytes(data) if type(data) is bytearray else data + stream_write(stream, data, length, path) + return data + + def _sizeof(self, context, path): + try: + return self.length(context) if callable(self.length) else self.length + except (KeyError, AttributeError): + raise SizeofError("cannot calculate size, key not found in context", path=path) + + def _emitparse(self, code): + return f"io.read({self.length})" + + def _emitbuild(self, code): + return f"(io.write(obj), obj)[1]" + + def _emitfulltype(self, ksy, bitwise): + return dict(size=self.length) + + +@singleton +class GreedyBytes(Construct): + r""" + Field consisting of unknown number of bytes. + + Parses the stream to the end. Builds into the stream directly (without checks). Size is undefined. + + Can also build from a bytearray. + + :raises StreamError: stream failed when reading until EOF + :raises StringError: building from non-bytes value, perhaps unicode + + Example:: + + >>> GreedyBytes.parse(b"asislight") + b'asislight' + >>> GreedyBytes.build(b"asislight") + b'asislight' + """ + + def _parse(self, stream, context, path): + return stream_read_entire(stream, path) + + def _build(self, obj, stream, context, path): + data = bytes(obj) if type(obj) is bytearray else obj + stream_write(stream, data, len(data), path) + return data + + def _emitparse(self, code): + return f"io.read()" + + def _emitbuild(self, code): + return f"(io.write(obj), obj)[1]" + + def _emitfulltype(self, ksy, bitwise): + return dict(size_eos=True) + + +def Bitwise(subcon): + r""" + Converts the stream from bytes to bits, and passes the bitstream to underlying subcon. Bitstream is a stream that contains 8 times as many bytes, and each byte is either \\x00 or \\x01 (in documentation those bytes are called bits). + + Parsing building and size are deferred to subcon, although size gets divided by 8 (therefore the subcon's size must be a multiple of 8). + + Note that by default the bit ordering is from MSB to LSB for every byte (ie. bit-level big-endian). If you need it reversed, wrap this subcon with :class:`construct.core.BitsSwapped`. + + :param subcon: Construct instance, any field that works with bits (like BitsInteger) or is bit-byte agnostic (like Struct or Flag) + + See :class:`~construct.core.Transformed` and :class:`~construct.core.Restreamed` for raisable exceptions. + + Example:: + + >>> d = Bitwise(Struct( + ... 'a' / Nibble, + ... 'b' / Bytewise(Float32b), + ... 'c' / Padding(4), + ... )) + >>> d.parse(bytes(5)) + Container(a=0, b=0.0, c=None) + >>> d.sizeof() + 5 + + Obtaining other byte or bit orderings:: + + >>> d = Bitwise(Bytes(16)) + >>> d.parse(b'\x01\x03') + b'\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x01\x01' + >>> d = BitsSwapped(Bitwise(Bytes(16))) + >>> d.parse(b'\x01\x03') + b'\x01\x00\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00\x00\x00\x00' + """ + + try: + size = subcon.sizeof() + macro = Transformed(subcon, bytes2bits, size//8, bits2bytes, size//8) + except SizeofError: + macro = Restreamed(subcon, bytes2bits, 1, bits2bytes, 8, lambda n: n//8) + def _emitseq(ksy, bitwise): + return subcon._compileseq(ksy, bitwise=True) + def _emitprimitivetype(ksy, bitwise): + return subcon._compileprimitivetype(ksy, bitwise=True) + def _emitfulltype(ksy, bitwise): + return subcon._compilefulltype(ksy, bitwise=True) + macro._emitseq = _emitseq + macro._emitprimitivetype = _emitprimitivetype + macro._emitfulltype = _emitfulltype + return macro + + +def Bytewise(subcon): + r""" + Converts the bitstream back to normal byte stream. Must be used within :class:`~construct.core.Bitwise`. + + Parsing building and size are deferred to subcon, although size gets multiplied by 8. + + :param subcon: Construct instance, any field that works with bytes or is bit-byte agnostic + + See :class:`~construct.core.Transformed` and :class:`~construct.core.Restreamed` for raisable exceptions. + + Example:: + + >>> d = Bitwise(Struct( + ... 'a' / Nibble, + ... 'b' / Bytewise(Float32b), + ... 'c' / Padding(4), + ... )) + >>> d.parse(bytes(5)) + Container(a=0, b=0.0, c=None) + >>> d.sizeof() + 5 + """ + + try: + size = subcon.sizeof() + macro = Transformed(subcon, bits2bytes, size*8, bytes2bits, size*8) + except SizeofError: + macro = Restreamed(subcon, bits2bytes, 8, bytes2bits, 1, lambda n: n*8) + def _emitseq(ksy, bitwise): + return subcon._compileseq(ksy, bitwise=False) + def _emitprimitivetype(ksy, bitwise): + return subcon._compileprimitivetype(ksy, bitwise=False) + def _emitfulltype(ksy, bitwise): + return subcon._compilefulltype(ksy, bitwise=False) + macro._emitseq = _emitseq + macro._emitprimitivetype = _emitprimitivetype + macro._emitfulltype = _emitfulltype + return macro + + +#=============================================================================== +# integers and floats +#=============================================================================== +class FormatField(Construct): + r""" + Field that uses `struct` module to pack and unpack CPU-sized integers and floats and booleans. This is used to implement most Int* Float* fields, but for example cannot pack 24-bit integers, which is left to :class:`~construct.core.BytesInteger` class. For booleans I also recommend using Flag class instead. + + See `struct module `_ documentation for instructions on crafting format strings. + + Parses into an integer or float or boolean. Builds from an integer or float or boolean into specified byte count and endianness. Size is determined by `struct` module according to specified format string. + + :param endianity: string, character like: < > = + :param format: string, character like: B H L Q b h l q e f d ? + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises FormatFieldError: wrong format string, or struct.(un)pack complained about the value + + Example:: + + >>> d = FormatField(">", "H") or Int16ub + >>> d.parse(b"\x01\x00") + 256 + >>> d.build(256) + b"\x01\x00" + >>> d.sizeof() + 2 + """ + + def __init__(self, endianity, format): + if endianity not in list("=<>"): + raise FormatFieldError("endianity must be like: = < >", endianity) + if format not in list("fdBHLQbhlqe?"): + raise FormatFieldError("format must be like: B H L Q b h l q e f d ?", format) + + super().__init__() + self.fmtstr = endianity+format + self.length = struct.calcsize(endianity+format) + + def _parse(self, stream, context, path): + data = stream_read(stream, self.length, path) + try: + return struct.unpack(self.fmtstr, data)[0] + except Exception: + raise FormatFieldError("struct %r error during parsing" % self.fmtstr, path=path) + + def _build(self, obj, stream, context, path): + try: + data = struct.pack(self.fmtstr, obj) + except Exception: + raise FormatFieldError("struct %r error during building, given value %r" % (self.fmtstr, obj), path=path) + stream_write(stream, data, self.length, path) + return obj + + def _sizeof(self, context, path): + return self.length + + def _emitparse(self, code): + fname = f"formatfield_{code.allocateId()}" + code.append(f"{fname} = struct.Struct({repr(self.fmtstr)})") + return f"{fname}.unpack(io.read({self.length}))[0]" + + def _emitbuild(self, code): + fname = f"formatfield_{code.allocateId()}" + code.append(f"{fname} = struct.Struct({repr(self.fmtstr)})") + return f"(io.write({fname}.pack(obj)), obj)[1]" + + def _emitprimitivetype(self, ksy, bitwise): + endianity,format = self.fmtstr + signed = format.islower() + swapped = (endianity == "<") or (endianity == "=" and sys.byteorder == "little") + if format in "bhlqBHLQ": + if bitwise: + assert not signed + assert not swapped + return "b%s" % (8*self.length, ) + else: + return "%s%s%s" % ("s" if signed else "u", self.length, "le" if swapped else "be", ) + if format in "fd": + assert not bitwise + return "f%s%s" % (self.length, "le" if swapped else "be", ) + + +class BytesInteger(Construct): + r""" + Field that packs integers of arbitrary size. Int24* fields use this class. + + Parses into an integer. Builds from an integer into specified byte count and endianness. Size is specified in ctor. + + Analog to :class:`~construct.core.BitsInteger` which operates on bits. In fact:: + + BytesInteger(n) <--> Bitwise(BitsInteger(8*n)) + BitsInteger(8*n) <--> Bytewise(BytesInteger(n)) + + Byte ordering refers to bytes (chunks of 8 bits) so, for example:: + + BytesInteger(n, swapped=True) <--> Bitwise(BitsInteger(8*n, swapped=True)) + + :param length: integer or context lambda, number of bytes in the field + :param signed: bool, whether the value is signed (two's complement), default is False (unsigned) + :param swapped: bool or context lambda, whether to swap byte order (little endian), default is False (big endian) + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises IntegerError: length is negative or zero + :raises IntegerError: value is not an integer + :raises IntegerError: number does not fit given width and signed parameters + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = BytesInteger(4) or Int32ub + >>> d.parse(b"abcd") + 1633837924 + >>> d.build(1) + b'\x00\x00\x00\x01' + >>> d.sizeof() + 4 + """ + + def __init__(self, length, signed=False, swapped=False): + super().__init__() + self.length = length + self.signed = signed + self.swapped = swapped + + def _parse(self, stream, context, path): + length = evaluate(self.length, context) + if length <= 0: + raise IntegerError(f"length {length} must be positive", path=path) + data = stream_read(stream, length, path) + if evaluate(self.swapped, context): + data = swapbytes(data) + try: + return bytes2integer(data, self.signed) + except ValueError as e: + raise IntegerError(str(e), path=path) + + def _build(self, obj, stream, context, path): + if not isinstance(obj, int): + raise IntegerError(f"value {obj} is not an integer", path=path) + length = evaluate(self.length, context) + if length <= 0: + raise IntegerError(f"length {length} must be positive", path=path) + try: + data = integer2bytes(obj, length, self.signed) + except ValueError as e: + raise IntegerError(str(e), path=path) + if evaluate(self.swapped, context): + data = swapbytes(data) + stream_write(stream, data, length, path) + return obj + + def _sizeof(self, context, path): + try: + return evaluate(self.length, context) + except (KeyError, AttributeError): + raise SizeofError("cannot calculate size, key not found in context", path=path) + + def _emitparse(self, code): + return f"bytes2integer(swapbytes(io.read({self.length})) if {self.swapped} else io.read({self.length}), {self.signed})" + + def _emitbuild(self, code): + return f"((io.write(swapbytes(integer2bytes(obj, {self.length}, {self.signed})) if ({self.swapped}) else integer2bytes(obj, {self.length}, {self.signed}))), obj)[1]" + + def _emitprimitivetype(self, ksy, bitwise): + if bitwise: + assert not self.signed + assert not self.swapped + return "b%s" % (8*self.length, ) + else: + assert not callable(self.swapped) + return "%s%s%s" % ("s" if self.signed else "u", self.length, "le" if self.swapped else "be", ) + + +class BitsInteger(Construct): + r""" + Field that packs arbitrarily large (or small) integers. Some fields (Bit Nibble Octet) use this class. Must be enclosed in :class:`~construct.core.Bitwise` context. + + Parses into an integer. Builds from an integer into specified bit count and endianness. Size (in bits) is specified in ctor. + + Analog to :class:`~construct.core.BytesInteger` which operates on bytes. In fact:: + + BytesInteger(n) <--> Bitwise(BitsInteger(8*n)) + BitsInteger(8*n) <--> Bytewise(BytesInteger(n)) + + Note that little-endianness is only defined for multiples of 8 bits. + + Byte ordering (i.e. `swapped` parameter) refers to bytes (chunks of 8 bits) so, for example:: + + BytesInteger(n, swapped=True) <--> Bitwise(BitsInteger(8*n, swapped=True)) + + Swapped argument was recently fixed. To obtain previous (faulty) behavior, you can use `ByteSwapped`, `BitsSwapped` and `Bitwise` in whatever particular order (see examples). + + :param length: integer or context lambda, number of bits in the field + :param signed: bool, whether the value is signed (two's complement), default is False (unsigned) + :param swapped: bool or context lambda, whether to swap byte order (little endian), default is False (big endian) + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises IntegerError: length is negative or zero + :raises IntegerError: value is not an integer + :raises IntegerError: number does not fit given width and signed parameters + :raises IntegerError: little-endianness selected but length is not multiple of 8 bits + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Examples:: + + >>> d = Bitwise(BitsInteger(8)) or Bitwise(Octet) + >>> d.parse(b"\x10") + 16 + >>> d.build(255) + b'\xff' + >>> d.sizeof() + 1 + + Obtaining other byte or bit orderings:: + + >>> d = BitsInteger(2) + >>> d.parse(b'\x01\x00') # Bit-Level Big-Endian + 2 + >>> d = ByteSwapped(BitsInteger(2)) + >>> d.parse(b'\x01\x00') # Bit-Level Little-Endian + 1 + >>> d = BitsInteger(16) # Byte-Level Big-Endian, Bit-Level Big-Endian + >>> d.build(5 + 19*256) + b'\x00\x00\x00\x01\x00\x00\x01\x01\x00\x00\x00\x00\x00\x01\x00\x01' + >>> d = BitsInteger(16, swapped=True) # Byte-Level Little-Endian, Bit-Level Big-Endian + >>> d.build(5 + 19*256) + b'\x00\x00\x00\x00\x00\x01\x00\x01\x00\x00\x00\x01\x00\x00\x01\x01' + >>> d = ByteSwapped(BitsInteger(16)) # Byte-Level Little-Endian, Bit-Level Little-Endian + >>> d.build(5 + 19*256) + b'\x01\x00\x01\x00\x00\x00\x00\x00\x01\x01\x00\x00\x01\x00\x00\x00' + >>> d = ByteSwapped(BitsInteger(16, swapped=True)) # Byte-Level Big-Endian, Bit-Level Little-Endian + >>> d.build(5 + 19*256) + b'\x01\x01\x00\x00\x01\x00\x00\x00\x01\x00\x01\x00\x00\x00\x00\x00' + """ + + def __init__(self, length, signed=False, swapped=False): + super().__init__() + self.length = length + self.signed = signed + self.swapped = swapped + + def _parse(self, stream, context, path): + length = evaluate(self.length, context) + if length <= 0: + raise IntegerError(f"length {length} must be positive", path=path) + data = stream_read(stream, length, path) + try: + if evaluate(self.swapped, context): + data = swapbytesinbits(data) + return bits2integer(data, self.signed) + except ValueError as e: + raise IntegerError(str(e), path=path) + + def _build(self, obj, stream, context, path): + if not isinstance(obj, int): + raise IntegerError(f"value {obj} is not an integer", path=path) + length = evaluate(self.length, context) + if length <= 0: + raise IntegerError(f"length {length} must be positive", path=path) + try: + data = integer2bits(obj, length, self.signed) + if evaluate(self.swapped, context): + data = swapbytesinbits(data) + except ValueError as e: + raise IntegerError(str(e), path=path) + stream_write(stream, data, length, path) + return obj + + def _sizeof(self, context, path): + try: + return evaluate(self.length, context) + except (KeyError, AttributeError): + raise SizeofError("cannot calculate size, key not found in context", path=path) + + def _emitparse(self, code): + return f"bits2integer(swapbytesinbits(io.read({self.length})) if {self.swapped} else io.read({self.length}), {self.signed})" + + def _emitbuild(self, code): + return f"((io.write(swapbytesinbits(integer2bits(obj, {self.length}, {self.signed})) if ({self.swapped}) else integer2bits(obj, {self.length}, {self.signed}))), obj)[1]" + + def _emitprimitivetype(self, ksy, bitwise): + assert not self.signed + assert not self.swapped + return "b%s" % (self.length, ) + + +@singleton +def Bit(): + """A 1-bit integer, must be enclosed in a Bitwise (eg. BitStruct)""" + return BitsInteger(1) +@singleton +def Nibble(): + """A 4-bit integer, must be enclosed in a Bitwise (eg. BitStruct)""" + return BitsInteger(4) +@singleton +def Octet(): + """A 8-bit integer, must be enclosed in a Bitwise (eg. BitStruct)""" + return BitsInteger(8) + +@singleton +def Int8ub(): + """Unsigned, big endian 8-bit integer""" + return FormatField(">", "B") +@singleton +def Int16ub(): + """Unsigned, big endian 16-bit integer""" + return FormatField(">", "H") +@singleton +def Int32ub(): + """Unsigned, big endian 32-bit integer""" + return FormatField(">", "L") +@singleton +def Int64ub(): + """Unsigned, big endian 64-bit integer""" + return FormatField(">", "Q") + +@singleton +def Int8sb(): + """Signed, big endian 8-bit integer""" + return FormatField(">", "b") +@singleton +def Int16sb(): + """Signed, big endian 16-bit integer""" + return FormatField(">", "h") +@singleton +def Int32sb(): + """Signed, big endian 32-bit integer""" + return FormatField(">", "l") +@singleton +def Int64sb(): + """Signed, big endian 64-bit integer""" + return FormatField(">", "q") + +@singleton +def Int8ul(): + """Unsigned, little endian 8-bit integer""" + return FormatField("<", "B") +@singleton +def Int16ul(): + """Unsigned, little endian 16-bit integer""" + return FormatField("<", "H") +@singleton +def Int32ul(): + """Unsigned, little endian 32-bit integer""" + return FormatField("<", "L") +@singleton +def Int64ul(): + """Unsigned, little endian 64-bit integer""" + return FormatField("<", "Q") + +@singleton +def Int8sl(): + """Signed, little endian 8-bit integer""" + return FormatField("<", "b") +@singleton +def Int16sl(): + """Signed, little endian 16-bit integer""" + return FormatField("<", "h") +@singleton +def Int32sl(): + """Signed, little endian 32-bit integer""" + return FormatField("<", "l") +@singleton +def Int64sl(): + """Signed, little endian 64-bit integer""" + return FormatField("<", "q") + +@singleton +def Int8un(): + """Unsigned, native endianity 8-bit integer""" + return FormatField("=", "B") +@singleton +def Int16un(): + """Unsigned, native endianity 16-bit integer""" + return FormatField("=", "H") +@singleton +def Int32un(): + """Unsigned, native endianity 32-bit integer""" + return FormatField("=", "L") +@singleton +def Int64un(): + """Unsigned, native endianity 64-bit integer""" + return FormatField("=", "Q") + +@singleton +def Int8sn(): + """Signed, native endianity 8-bit integer""" + return FormatField("=", "b") +@singleton +def Int16sn(): + """Signed, native endianity 16-bit integer""" + return FormatField("=", "h") +@singleton +def Int32sn(): + """Signed, native endianity 32-bit integer""" + return FormatField("=", "l") +@singleton +def Int64sn(): + """Signed, native endianity 64-bit integer""" + return FormatField("=", "q") + +Byte = Int8ub +Short = Int16ub +Int = Int32ub +Long = Int64ub + +@singleton +def Float16b(): + """Big endian, 16-bit IEEE 754 floating point number""" + return FormatField(">", "e") +@singleton +def Float16l(): + """Little endian, 16-bit IEEE 754 floating point number""" + return FormatField("<", "e") +@singleton +def Float16n(): + """Native endianity, 16-bit IEEE 754 floating point number""" + return FormatField("=", "e") + +@singleton +def Float32b(): + """Big endian, 32-bit IEEE floating point number""" + return FormatField(">", "f") +@singleton +def Float32l(): + """Little endian, 32-bit IEEE floating point number""" + return FormatField("<", "f") +@singleton +def Float32n(): + """Native endianity, 32-bit IEEE floating point number""" + return FormatField("=", "f") + +@singleton +def Float64b(): + """Big endian, 64-bit IEEE floating point number""" + return FormatField(">", "d") +@singleton +def Float64l(): + """Little endian, 64-bit IEEE floating point number""" + return FormatField("<", "d") +@singleton +def Float64n(): + """Native endianity, 64-bit IEEE floating point number""" + return FormatField("=", "d") + +Half = Float16b +Single = Float32b +Double = Float64b + +native = (sys.byteorder == "little") + +@singleton +def Int24ub(): + """A 3-byte big-endian unsigned integer, as used in ancient file formats.""" + return BytesInteger(3, signed=False, swapped=False) +@singleton +def Int24ul(): + """A 3-byte little-endian unsigned integer, as used in ancient file formats.""" + return BytesInteger(3, signed=False, swapped=True) +@singleton +def Int24un(): + """A 3-byte native-endian unsigned integer, as used in ancient file formats.""" + return BytesInteger(3, signed=False, swapped=native) +@singleton +def Int24sb(): + """A 3-byte big-endian signed integer, as used in ancient file formats.""" + return BytesInteger(3, signed=True, swapped=False) +@singleton +def Int24sl(): + """A 3-byte little-endian signed integer, as used in ancient file formats.""" + return BytesInteger(3, signed=True, swapped=True) +@singleton +def Int24sn(): + """A 3-byte native-endian signed integer, as used in ancient file formats.""" + return BytesInteger(3, signed=True, swapped=native) + + +@singleton +class VarInt(Construct): + r""" + VarInt encoded unsigned integer. Each 7 bits of the number are encoded in one byte of the stream, where leftmost bit (MSB) is unset when byte is terminal. Scheme is defined at Google site related to `Protocol Buffers `_. + + Can only encode non-negative numbers. + + Parses into an integer. Builds from an integer. Size is undefined. + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises IntegerError: given a negative value, or not an integer + + Example:: + + >>> VarInt.build(1) + b'\x01' + >>> VarInt.build(2**100) + b'\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x04' + """ + + def _parse(self, stream, context, path): + acc = [] + while True: + b = byte2int(stream_read(stream, 1, path)) + acc.append(b & 0b01111111) + if b & 0b10000000 == 0: + break + num = 0 + for b in reversed(acc): + num = (num << 7) | b + return num + + def _build(self, obj, stream, context, path): + if not isinstance(obj, int): + raise IntegerError(f"value {obj} is not an integer", path=path) + if obj < 0: + raise IntegerError(f"VarInt cannot build from negative number {obj}", path=path) + x = obj + B = bytearray() + while x > 0b01111111: + B.append(0b10000000 | (x & 0b01111111)) + x >>= 7 + B.append(x) + stream_write(stream, bytes(B), len(B), path) + return obj + + def _emitprimitivetype(self, ksy, bitwise): + return "vlq_base128_le" + + +@singleton +class ZigZag(Construct): + r""" + ZigZag encoded signed integer. This is a variant of VarInt encoding that also can encode negative numbers. Scheme is defined at Google site related to `Protocol Buffers `_. + + Can also encode negative numbers. + + Parses into an integer. Builds from an integer. Size is undefined. + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises IntegerError: given not an integer + + Example:: + + >>> ZigZag.build(-3) + b'\x05' + >>> ZigZag.build(3) + b'\x06' + """ + + def _parse(self, stream, context, path): + x = VarInt._parse(stream, context, path) + if x & 1 == 0: + x = x//2 + else: + x = -(x//2+1) + return x + + def _build(self, obj, stream, context, path): + if not isinstance(obj, int): + raise IntegerError(f"value {obj} is not an integer", path=path) + if obj >= 0: + x = 2*obj + else: + x = 2*abs(obj)-1 + VarInt._build(x, stream, context, path) + return obj + + +#=============================================================================== +# strings +#=============================================================================== + +#: Explicitly supported encodings (by PaddedString and CString classes). +#: +possiblestringencodings = dict( + ascii=1, + utf8=1, utf_8=1, u8=1, + utf16=2, utf_16=2, u16=2, utf_16_be=2, utf_16_le=2, + utf32=4, utf_32=4, u32=4, utf_32_be=4, utf_32_le=4, +) + + +def encodingunit(encoding): + """Used internally.""" + encoding = encoding.replace("-","_").lower() + if encoding not in possiblestringencodings: + raise StringError("encoding %r not found among %r" % (encoding, possiblestringencodings,)) + return bytes(possiblestringencodings[encoding]) + + +class StringEncoded(Adapter): + """Used internally.""" + + def __init__(self, subcon, encoding): + super().__init__(subcon) + if not encoding: + raise StringError("String* classes require explicit encoding") + self.encoding = encoding + + def _decode(self, obj, context, path): + try: + return obj.decode(self.encoding) + except: + raise StringError(f"cannot use encoding {self.encoding!r} to decode {obj!r}") + + def _encode(self, obj, context, path): + if not isinstance(obj, str): + raise StringError("string encoding failed, expected unicode string", path=path) + if obj == u"": + return b"" + try: + return obj.encode(self.encoding) + except: + raise StringError(f"cannot use encoding {self.encoding!r} to encode {obj!r}") + + def _emitparse(self, code): + raise NotImplementedError + # Not sure what the correct implementation would be. + # return f"({self.subcon._compileparse(code)}).decode({repr(self.encoding)})" + + def _emitbuild(self, code): + raise NotImplementedError + # This is not a valid implementation. obj.encode() should be inserted into subcon + # return f"({self.subcon._compilebuild(code)}).encode({repr(self.encoding)})" + + +def PaddedString(length, encoding): + r""" + Configurable, fixed-length or variable-length string field. + + When parsing, the byte string is stripped of null bytes (per encoding unit), then decoded. Length is an integer or context lambda. When building, the string is encoded and then padded to specified length. If encoded string is larger than the specified length, it fails with PaddingError. Size is same as length parameter. + + .. warning:: PaddedString and CString only support encodings explicitly listed in :class:`~construct.core.possiblestringencodings` . + + :param length: integer or context lambda, length in bytes (not unicode characters) + :param encoding: string like: utf8 utf16 utf32 ascii + + :raises StringError: building a non-unicode string + :raises StringError: selected encoding is not on supported list + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = PaddedString(10, "utf8") + >>> d.build(u"Афон") + b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00\x00' + >>> d.parse(_) + u'Афон' + """ + macro = StringEncoded(FixedSized(length, NullStripped(GreedyBytes, pad=encodingunit(encoding))), encoding) + def _emitfulltype(ksy, bitwise): + return dict(size=length, type="strz", encoding=encoding) + macro._emitfulltype = _emitfulltype + return macro + + +def PascalString(lengthfield, encoding): + r""" + Length-prefixed string. The length field can be variable length (such as VarInt) or fixed length (such as Int64ub). :class:`~construct.core.VarInt` is recommended when designing new protocols. Stored length is in bytes, not characters. Size is not defined. + + :param lengthfield: Construct instance, field used to parse and build the length (like VarInt Int64ub) + :param encoding: string like: utf8 utf16 utf32 ascii + + :raises StringError: building a non-unicode string + + Example:: + + >>> d = PascalString(VarInt, "utf8") + >>> d.build(u"Афон") + b'\x08\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd' + >>> d.parse(_) + u'Афон' + """ + macro = StringEncoded(Prefixed(lengthfield, GreedyBytes), encoding) + + def _emitparse(code): + return f"io.read({lengthfield._compileparse(code)}).decode({repr(encoding)})" + macro._emitparse = _emitparse + + def _emitseq(ksy, bitwise): + return [ + dict(id="lengthfield", type=lengthfield._compileprimitivetype(ksy, bitwise)), + dict(id="data", size="lengthfield", type="str", encoding=encoding), + ] + macro._emitseq = _emitseq + + return macro + + +def CString(encoding): + r""" + String ending in a terminating null byte (or null bytes in case of UTF16 UTF32). + + .. warning:: String and CString only support encodings explicitly listed in :class:`~construct.core.possiblestringencodings` . + + :param encoding: string like: utf8 utf16 utf32 ascii + + :raises StringError: building a non-unicode string + :raises StringError: selected encoding is not on supported list + + Example:: + + >>> d = CString("utf8") + >>> d.build(u"Афон") + b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00' + >>> d.parse(_) + u'Афон' + """ + macro = StringEncoded(NullTerminated(GreedyBytes, term=encodingunit(encoding)), encoding) + def _emitfulltype(ksy, bitwise): + return dict(type="strz", encoding=encoding) + macro._emitfulltype = _emitfulltype + return macro + + +def GreedyString(encoding): + r""" + String that reads entire stream until EOF, and writes a given string as-is. Analog to :class:`~construct.core.GreedyBytes` but also applies unicode-to-bytes encoding. + + :param encoding: string like: utf8 utf16 utf32 ascii + + :raises StringError: building a non-unicode string + :raises StreamError: stream failed when reading until EOF + + Example:: + + >>> d = GreedyString("utf8") + >>> d.build(u"Афон") + b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd' + >>> d.parse(_) + u'Афон' + """ + macro = StringEncoded(GreedyBytes, encoding) + def _emitfulltype(ksy, bitwise): + return dict(size_eos=True, type="str", encoding=encoding) + macro._emitfulltype = _emitfulltype + return macro + + +#=============================================================================== +# mappings +#=============================================================================== +@singleton +class Flag(Construct): + r""" + One byte (or one bit) field that maps to True or False. Other non-zero bytes are also considered True. Size is defined as 1. + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + + Example:: + + >>> Flag.parse(b"\x01") + True + >>> Flag.build(True) + b'\x01' + """ + + def _parse(self, stream, context, path): + return stream_read(stream, 1, path) != b"\x00" + + def _build(self, obj, stream, context, path): + stream_write(stream, b"\x01" if obj else b"\x00", 1, path) + return obj + + def _sizeof(self, context, path): + return 1 + + def _emitparse(self, code): + return f"(io.read(1) != b'\\x00')" + + def _emitbuild(self, code): + return f"((io.write(b'\\x01') if obj else io.write(b'\\x00')), obj)[1]" + + def _emitfulltype(self, ksy, bitwise): + return dict(type=("b1" if bitwise else "u1"), _construct_render="Flag") + + +class EnumInteger(int): + """Used internally.""" + pass + + +class EnumIntegerString(str): + """Used internally.""" + + def __repr__(self): + return "EnumIntegerString.new(%s, %s)" % (self.intvalue, str.__repr__(self), ) + + def __int__(self): + return self.intvalue + + @staticmethod + def new(intvalue, stringvalue): + ret = EnumIntegerString(stringvalue) + ret.intvalue = intvalue + return ret + + +class Enum(Adapter): + r""" + Translates unicode label names to subcon values, and vice versa. + + Parses integer subcon, then uses that value to lookup mapping dictionary. Returns an integer-convertible string (if mapping found) or an integer (otherwise). Building is a reversed process. Can build from an integer flag or string label. Size is same as subcon, unless it raises SizeofError. + + There is no default parameter, because if no mapping is found, it parses into an integer without error. + + This class supports enum module. See examples. + + This class supports exposing member labels as attributes, as integer-convertible strings. See examples. + + :param subcon: Construct instance, subcon to map to/from + :param \*merge: optional, list of enum.IntEnum and enum.IntFlag instances, to merge labels and values from + :param \*\*mapping: dict, mapping string names to values + + :raises MappingError: building from string but no mapping found + + Example:: + + >>> d = Enum(Byte, one=1, two=2, four=4, eight=8) + >>> d.parse(b"\x01") + 'one' + >>> int(d.parse(b"\x01")) + 1 + >>> d.parse(b"\xff") + 255 + >>> int(d.parse(b"\xff")) + 255 + + >>> d.build(d.one or "one" or 1) + b'\x01' + >>> d.one + 'one' + + import enum + class E(enum.IntEnum or enum.IntFlag): + one = 1 + two = 2 + + Enum(Byte, E) <--> Enum(Byte, one=1, two=2) + FlagsEnum(Byte, E) <--> FlagsEnum(Byte, one=1, two=2) + """ + + def __init__(self, subcon, *merge, **mapping): + super().__init__(subcon) + for enum in merge: + for enumentry in enum: + mapping[enumentry.name] = enumentry.value + self.encmapping = {EnumIntegerString.new(v,k):v for k,v in mapping.items()} + self.decmapping = {v:EnumIntegerString.new(v,k) for k,v in mapping.items()} + self.ksymapping = {v:k for k,v in mapping.items()} + + def __getattr__(self, name): + if name in self.encmapping: + return self.decmapping[self.encmapping[name]] + raise AttributeError + + def _decode(self, obj, context, path): + try: + return self.decmapping[obj] + except KeyError: + return EnumInteger(obj) + + def _encode(self, obj, context, path): + try: + if isinstance(obj, int): + return obj + return self.encmapping[obj] + except KeyError: + raise MappingError("building failed, no mapping for %r" % (obj,), path=path) + + def _emitparse(self, code): + fname = f"factory_{code.allocateId()}" + code.append(f"{fname} = {repr(self.decmapping)}") + return f"reuse(({self.subcon._compileparse(code)}), lambda x: {fname}.get(x, EnumInteger(x)))" + + def _emitbuild(self, code): + fname = f"factory_{code.allocateId()}" + code.append(f"{fname} = {repr(self.encmapping)}") + return f"reuse({fname}.get(obj, obj), lambda obj: ({self.subcon._compilebuild(code)}))" + + def _emitprimitivetype(self, ksy, bitwise): + name = "enum_%s" % ksy.allocateId() + ksy.enums[name] = self.ksymapping + return name + + +class BitwisableString(str): + """Used internally.""" + + # def __repr__(self): + # return "BitwisableString(%s)" % (str.__repr__(self), ) + + def __or__(self, other): + return BitwisableString("{}|{}".format(self, other)) + + +class FlagsEnum(Adapter): + r""" + Translates unicode label names to subcon integer (sub)values, and vice versa. + + Parses integer subcon, then creates a Container, where flags define each key. Builds from a container by bitwise-oring of each flag if it matches a set key. Can build from an integer flag or string label directly, as well as | concatenations thereof (see examples). Size is same as subcon, unless it raises SizeofError. + + This class supports enum module. See examples. + + This class supports exposing member labels as attributes, as bitwisable strings. See examples. + + :param subcon: Construct instance, must operate on integers + :param \*merge: optional, list of enum.IntEnum and enum.IntFlag instances, to merge labels and values from + :param \*\*flags: dict, mapping string names to integer values + + :raises MappingError: building from object not like: integer string dict + :raises MappingError: building from string but no mapping found + + Can raise arbitrary exceptions when computing | and & and value is non-integer. + + Example:: + + >>> d = FlagsEnum(Byte, one=1, two=2, four=4, eight=8) + >>> d.parse(b"\x03") + Container(one=True, two=True, four=False, eight=False) + >>> d.build(dict(one=True,two=True)) + b'\x03' + + >>> d.build(d.one|d.two or "one|two" or 1|2) + b'\x03' + + import enum + class E(enum.IntEnum or enum.IntFlag): + one = 1 + two = 2 + + Enum(Byte, E) <--> Enum(Byte, one=1, two=2) + FlagsEnum(Byte, E) <--> FlagsEnum(Byte, one=1, two=2) + """ + + def __init__(self, subcon, *merge, **flags): + super().__init__(subcon) + for enum in merge: + for enumentry in enum: + flags[enumentry.name] = enumentry.value + self.flags = flags + self.reverseflags = {v:k for k,v in flags.items()} + + def __getattr__(self, name): + if name in self.flags: + return BitwisableString(name) + raise AttributeError + + def _decode(self, obj, context, path): + obj2 = Container() + obj2._flagsenum = True + for name,value in self.flags.items(): + obj2[BitwisableString(name)] = (obj & value == value) + return obj2 + + def _encode(self, obj, context, path): + try: + if isinstance(obj, int): + return obj + if isinstance(obj, str): + flags = 0 + for name in obj.split("|"): + name = name.strip() + if name: + flags |= self.flags[name] # KeyError + return flags + if isinstance(obj, dict): + flags = 0 + for name,value in obj.items(): + if not name.startswith("_"): # assumes key is a string + if value: + flags |= self.flags[name] # KeyError + return flags + raise MappingError("building failed, unknown object: %r" % (obj,), path=path) + except KeyError: + raise MappingError("building failed, unknown label: %r" % (obj,), path=path) + + def _emitparse(self, code): + return f"reuse(({self.subcon._compileparse(code)}), lambda x: Container({', '.join(f'{k}=bool(x & {v} == {v})' for k,v in self.flags.items()) }))" + + def _emitseq(self, ksy, bitwise): + bitstotal = self.subcon.sizeof() * 8 + seq = [] + for i in range(bitstotal): + value = 1<>> x = object + >>> d = Mapping(Byte, {x:0}) + >>> d.parse(b"\x00") + x + >>> d.build(x) + b'\x00' + """ + + def __init__(self, subcon, mapping): + super().__init__(subcon) + self.decmapping = {v:k for k,v in mapping.items()} + self.encmapping = mapping + + def _decode(self, obj, context, path): + try: + return self.decmapping[obj] # KeyError + except (KeyError, TypeError): + raise MappingError("parsing failed, no decoding mapping for %r" % (obj,), path=path) + + def _encode(self, obj, context, path): + try: + return self.encmapping[obj] # KeyError + except (KeyError, TypeError): + raise MappingError("building failed, no encoding mapping for %r" % (obj,), path=path) + + def _emitparse(self, code): + fname = f"factory_{code.allocateId()}" + code.append(f"{fname} = {repr(self.decmapping)}") + return f"{fname}[{self.subcon._compileparse(code)}]" + + def _emitbuild(self, code): + fname = f"factory_{code.allocateId()}" + code.append(f"{fname} = {repr(self.encmapping)}") + return f"reuse({fname}[obj], lambda obj: ({self.subcon._compilebuild(code)}))" + + +#=============================================================================== +# structures and sequences +#=============================================================================== +class Struct(Construct): + r""" + Sequence of usually named constructs, similar to structs in C. The members are parsed and build in the order they are defined. If a member is anonymous (its name is None) then it gets parsed and the value discarded, or it gets build from nothing (from None). + + Some fields do not need to be named, since they are built without value anyway. See: Const Padding Check Error Pass Terminated Seek Tell for examples of such fields. + + Operator + can also be used to make Structs (although not recommended). + + Parses into a Container (dict with attribute and key access) where keys match subcon names. Builds from a dict (not necessarily a Container) where each member gets a value from the dict matching the subcon name. If field has build-from-none flag, it gets build even when there is no matching entry in the dict. Size is the sum of all subcon sizes, unless any subcon raises SizeofError. + + This class does context nesting, meaning its members are given access to a new dictionary where the "_" entry points to the outer context. When parsing, each member gets parsed and subcon parse return value is inserted into context under matching key only if the member was named. When building, the matching entry gets inserted into context before subcon gets build, and if subcon build returns a new value (not None) that gets replaced in the context. + + This class exposes subcons as attributes. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) by accessing the struct attributes, under same name. Also note that compiler does not support this feature. See examples. + + This class exposes subcons in the context. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) within other inlined fields using the context. Note that you need to use a lambda (`this` expression is not supported). Also note that compiler does not support this feature. See examples. + + This class supports stopping. If :class:`~construct.core.StopIf` field is a member, and it evaluates its lambda as positive, this class ends parsing or building as successful without processing further fields. + + :param \*subcons: Construct instances, list of members, some can be anonymous + :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises KeyError: building a subcon but found no corresponding key in dictionary + + Example:: + + >>> d = Struct("num"/Int8ub, "data"/Bytes(this.num)) + >>> d.parse(b"\x04DATA") + Container(num=4, data=b"DATA") + >>> d.build(dict(num=4, data=b"DATA")) + b"\x04DATA" + + >>> d = Struct(Const(b"MZ"), Padding(2), Pass, Terminated) + >>> d.build({}) + b'MZ\x00\x00' + >>> d.parse(_) + Container() + >>> d.sizeof() + 4 + + >>> d = Struct( + ... "animal" / Enum(Byte, giraffe=1), + ... ) + >>> d.animal.giraffe + 'giraffe' + >>> d = Struct( + ... "count" / Byte, + ... "data" / Bytes(lambda this: this.count - this._subcons.count.sizeof()), + ... ) + >>> d.build(dict(count=3, data=b"12")) + b'\x0312' + + Alternative syntax (not recommended): + >>> ("a"/Byte + "b"/Byte + "c"/Byte + "d"/Byte) + + Alternative syntax, but requires Python 3.6 or any PyPy: + >>> Struct(a=Byte, b=Byte, c=Byte, d=Byte) + """ + + def __init__(self, *subcons, **subconskw): + super().__init__() + self.subcons = list(subcons) + list(k/v for k,v in subconskw.items()) + self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) + self.flagbuildnone = all(sc.flagbuildnone for sc in self.subcons) + + def __getattr__(self, name): + if name in self._subcons: + return self._subcons[name] + raise AttributeError + + def _parse(self, stream, context, path): + obj = Container() + obj._io = stream + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + for sc in self.subcons: + try: + subobj = sc._parsereport(stream, context, path) + if sc.name: + obj[sc.name] = subobj + context[sc.name] = subobj + except StopFieldError: + break + return obj + + def _build(self, obj, stream, context, path): + if obj is None: + obj = Container() + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + context.update(obj) + for sc in self.subcons: + try: + if sc.flagbuildnone: + subobj = obj.get(sc.name, None) + else: + subobj = obj[sc.name] # raises KeyError + + if sc.name: + context[sc.name] = subobj + + buildret = sc._build(subobj, stream, context, path) + if sc.name: + context[sc.name] = buildret + except StopFieldError: + break + return context + + def _sizeof(self, context, path): + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = None, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + try: + return sum(sc._sizeof(context, path) for sc in self.subcons) + except (KeyError, AttributeError): + raise SizeofError("cannot calculate size, key not found in context", path=path) + + def _emitparse(self, code): + fname = f"parse_struct_{code.allocateId()}" + block = f""" + def {fname}(io, this): + result = Container() + this = Container(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) + this['_root'] = this['_'].get('_root', this) + try: + """ + for sc in self.subcons: + block += f""" + {f'result[{repr(sc.name)}] = this[{repr(sc.name)}] = ' if sc.name else ''}{sc._compileparse(code)} + """ + block += f""" + pass + except StopFieldError: + pass + return result + """ + code.append(block) + return f"{fname}(io, this)" + + def _emitbuild(self, code): + fname = f"build_struct_{code.allocateId()}" + block = f""" + def {fname}(obj, io, this): + this = Container(_ = this, _params = this['_params'], _root = None, _parsing = False, _building = True, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) + this['_root'] = this['_'].get('_root', this) + this.update(obj) + try: + objdict = obj + """ + for sc in self.subcons: + block += f""" + {f'obj = objdict.get({repr(sc.name)}, None)' if sc.flagbuildnone else f'obj = objdict[{repr(sc.name)}]'} + {f'this[{repr(sc.name)}] = obj' if sc.name else ''} + {f'this[{repr(sc.name)}] = ' if sc.name else ''}{sc._compilebuild(code)} + """ + block += f""" + pass + except StopFieldError: + pass + return this + """ + code.append(block) + return f"{fname}(obj, io, this)" + + def _emitseq(self, ksy, bitwise): + return [sc._compilefulltype(ksy, bitwise) for sc in self.subcons] + + +class Sequence(Construct): + r""" + Sequence of usually un-named constructs. The members are parsed and build in the order they are defined. If a member is named, its parsed value gets inserted into the context. This allows using members that refer to previous members. + + Operator >> can also be used to make Sequences (although not recommended). + + Parses into a ListContainer (list with pretty-printing) where values are in same order as subcons. Builds from a list (not necessarily a ListContainer) where each subcon is given the element at respective position. Size is the sum of all subcon sizes, unless any subcon raises SizeofError. + + This class does context nesting, meaning its members are given access to a new dictionary where the "_" entry points to the outer context. When parsing, each member gets parsed and subcon parse return value is inserted into context under matching key only if the member was named. When building, the matching entry gets inserted into context before subcon gets build, and if subcon build returns a new value (not None) that gets replaced in the context. + + This class exposes subcons as attributes. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) by accessing the struct attributes, under same name. Also note that compiler does not support this feature. See examples. + + This class exposes subcons in the context. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) within other inlined fields using the context. Note that you need to use a lambda (`this` expression is not supported). Also note that compiler does not support this feature. See examples. + + This class supports stopping. If :class:`~construct.core.StopIf` field is a member, and it evaluates its lambda as positive, this class ends parsing or building as successful without processing further fields. + + :param \*subcons: Construct instances, list of members, some can be named + :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises KeyError: building a subcon but found no corresponding key in dictionary + + Example:: + + >>> d = Sequence(Byte, Float32b) + >>> d.build([0, 1.23]) + b'\x00?\x9dp\xa4' + >>> d.parse(_) + [0, 1.2300000190734863] # a ListContainer + + >>> d = Sequence( + ... "animal" / Enum(Byte, giraffe=1), + ... ) + >>> d.animal.giraffe + 'giraffe' + >>> d = Sequence( + ... "count" / Byte, + ... "data" / Bytes(lambda this: this.count - this._subcons.count.sizeof()), + ... ) + >>> d.build([3, b"12"]) + b'\x0312' + + Alternative syntax (not recommended): + >>> (Byte >> "Byte >> "c"/Byte >> "d"/Byte) + + Alternative syntax, but requires Python 3.6 or any PyPy: + >>> Sequence(a=Byte, b=Byte, c=Byte, d=Byte) + """ + + def __init__(self, *subcons, **subconskw): + super().__init__() + self.subcons = list(subcons) + list(k/v for k,v in subconskw.items()) + self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) + self.flagbuildnone = all(sc.flagbuildnone for sc in self.subcons) + + def __getattr__(self, name): + if name in self._subcons: + return self._subcons[name] + raise AttributeError + + def _parse(self, stream, context, path): + obj = ListContainer() + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + for sc in self.subcons: + try: + subobj = sc._parsereport(stream, context, path) + obj.append(subobj) + if sc.name: + context[sc.name] = subobj + except StopFieldError: + break + return obj + + def _build(self, obj, stream, context, path): + if obj is None: + obj = ListContainer([None for sc in self.subcons]) + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + objiter = iter(obj) + retlist = ListContainer() + for i,sc in enumerate(self.subcons): + try: + subobj = next(objiter) + if sc.name: + context[sc.name] = subobj + + buildret = sc._build(subobj, stream, context, path) + retlist.append(buildret) + + if sc.name: + context[sc.name] = buildret + except StopFieldError: + break + return retlist + + def _sizeof(self, context, path): + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = None, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + try: + return sum(sc._sizeof(context, path) for sc in self.subcons) + except (KeyError, AttributeError): + raise SizeofError("cannot calculate size, key not found in context", path=path) + + def _emitparse(self, code): + fname = f"parse_sequence_{code.allocateId()}" + block = f""" + def {fname}(io, this): + result = ListContainer() + this = Container(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) + this['_root'] = this['_'].get('_root', this) + try: + """ + for sc in self.subcons: + block += f""" + result.append({sc._compileparse(code)}) + """ + if sc.name: + block += f""" + this[{repr(sc.name)}] = result[-1] + """ + block += f""" + pass + except StopFieldError: + pass + return result + """ + code.append(block) + return f"{fname}(io, this)" + + def _emitbuild(self, code): + fname = f"build_sequence_{code.allocateId()}" + block = f""" + def {fname}(obj, io, this): + this = Container(_ = this, _params = this['_params'], _root = None, _parsing = False, _building = True, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) + this['_root'] = this['_'].get('_root', this) + try: + objiter = iter(obj) + retlist = ListContainer() + """ + for sc in self.subcons: + block += f""" + {f'obj = next(objiter)'} + {f'this[{repr(sc.name)}] = obj' if sc.name else ''} + {f'x = '}{sc._compilebuild(code)} + {f'retlist.append(x)'} + {f'this[{repr(sc.name)}] = x' if sc.name else ''} + """ + block += f""" + pass + except StopFieldError: + pass + return retlist + """ + code.append(block) + return f"{fname}(obj, io, this)" + + def _emitseq(self, ksy, bitwise): + return [sc._compilefulltype(ksy, bitwise) for sc in self.subcons] + + +#=============================================================================== +# arrays ranges and repeaters +#=============================================================================== +class Array(Subconstruct): + r""" + Homogenous array of elements, similar to C# generic T[]. + + Parses into a ListContainer (a list). Parsing and building processes an exact amount of elements. If given list has more or less than count elements, raises RangeError. Size is defined as count multiplied by subcon size, but only if subcon is fixed size. + + Operator [] can be used to make Array instances (recommended syntax). + + :param count: integer or context lambda, strict amount of elements + :param subcon: Construct instance, subcon to process individual elements + :param discard: optional, bool, if set then parsing returns empty list + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises RangeError: specified count is not valid + :raises RangeError: given object has different length than specified count + + Can propagate any exception from the lambdas, possibly non-ConstructError. + + Example:: + + >>> d = Array(5, Byte) or Byte[5] + >>> d.build(range(5)) + b'\x00\x01\x02\x03\x04' + >>> d.parse(_) + [0, 1, 2, 3, 4] + """ + + def __init__(self, count, subcon, discard=False): + super().__init__(subcon) + self.count = count + self.discard = discard + + def _parse(self, stream, context, path): + count = evaluate(self.count, context) + if not 0 <= count: + raise RangeError("invalid count %s" % (count,), path=path) + discard = self.discard + obj = ListContainer() + for i in range(count): + context._index = i + e = self.subcon._parsereport(stream, context, path) + if not discard: + obj.append(e) + return obj + + def _build(self, obj, stream, context, path): + count = evaluate(self.count, context) + if not 0 <= count: + raise RangeError("invalid count %s" % (count,), path=path) + if not len(obj) == count: + raise RangeError("expected %d elements, found %d" % (count, len(obj)), path=path) + discard = self.discard + retlist = ListContainer() + for i,e in enumerate(obj): + context._index = i + buildret = self.subcon._build(e, stream, context, path) + if not discard: + retlist.append(buildret) + return retlist + + def _sizeof(self, context, path): + try: + count = evaluate(self.count, context) + except (KeyError, AttributeError): + raise SizeofError("cannot calculate size, key not found in context", path=path) + return count * self.subcon._sizeof(context, path) + + def _emitparse(self, code): + return f"ListContainer(({self.subcon._compileparse(code)}) for i in range({self.count}))" + + def _emitbuild(self, code): + return f"ListContainer(reuse(obj[i], lambda obj: ({self.subcon._compilebuild(code)})) for i in range({self.count}))" + + def _emitfulltype(self, ksy, bitwise): + return dict(type=self.subcon._compileprimitivetype(ksy, bitwise), repeat="expr", repeat_expr=self.count) + + +class GreedyRange(Subconstruct): + r""" + Homogenous array of elements, similar to C# generic IEnumerable, but works with unknown count of elements by parsing until end of stream. + + Parses into a ListContainer (a list). Parsing stops when an exception occured when parsing the subcon, either due to EOF or subcon format not being able to parse the data. Either way, when GreedyRange encounters either failure it seeks the stream back to a position after last successful subcon parsing. Builds from enumerable, each element as-is. Size is undefined. + + This class supports stopping. If :class:`~construct.core.StopIf` field is a member, and it evaluates its lambda as positive, this class ends parsing or building as successful without processing further fields. + + :param subcon: Construct instance, subcon to process individual elements + :param discard: optional, bool, if set then parsing returns empty list + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises StreamError: stream is not seekable and tellable + + Can propagate any exception from the lambdas, possibly non-ConstructError. + + Example:: + + >>> d = GreedyRange(Byte) + >>> d.build(range(8)) + b'\x00\x01\x02\x03\x04\x05\x06\x07' + >>> d.parse(_) + [0, 1, 2, 3, 4, 5, 6, 7] + """ + + def __init__(self, subcon, discard=False): + super().__init__(subcon) + self.discard = discard + + def _parse(self, stream, context, path): + discard = self.discard + obj = ListContainer() + try: + for i in itertools.count(): + context._index = i + fallback = stream_tell(stream, path) + e = self.subcon._parsereport(stream, context, path) + if not discard: + obj.append(e) + except StopFieldError: + pass + except ExplicitError: + raise + except Exception: + stream_seek(stream, fallback, 0, path) + return obj + + def _build(self, obj, stream, context, path): + discard = self.discard + try: + retlist = ListContainer() + for i,e in enumerate(obj): + context._index = i + buildret = self.subcon._build(e, stream, context, path) + if not discard: + retlist.append(buildret) + return retlist + except StopFieldError: + pass + + def _sizeof(self, context, path): + raise SizeofError(path=path) + + def _emitfulltype(self, ksy, bitwise): + return dict(type=self.subcon._compileprimitivetype(ksy, bitwise), repeat="eos") + + +class RepeatUntil(Subconstruct): + r""" + Homogenous array of elements, similar to C# generic IEnumerable, that repeats until the predicate indicates it to stop. Note that the last element (that predicate indicated as True) is included in the return list. + + Parse iterates indefinately until last element passed the predicate. Build iterates indefinately over given list, until an element passed the precicate (or raises RepeatError if no element passed it). Size is undefined. + + :param predicate: lambda that takes (obj, list, context) and returns True to break or False to continue (or a truthy value) + :param subcon: Construct instance, subcon used to parse and build each element + :param discard: optional, bool, if set then parsing returns empty list + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises RepeatError: consumed all elements in the stream but neither passed the predicate + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = RepeatUntil(lambda x,lst,ctx: x > 7, Byte) + >>> d.build(range(20)) + b'\x00\x01\x02\x03\x04\x05\x06\x07\x08' + >>> d.parse(b"\x01\xff\x02") + [1, 255] + + >>> d = RepeatUntil(lambda x,lst,ctx: lst[-2:] == [0,0], Byte) + >>> d.parse(b"\x01\x00\x00\xff") + [1, 0, 0] + """ + + def __init__(self, predicate, subcon, discard=False): + super().__init__(subcon) + self.predicate = predicate + self.discard = discard + + def _parse(self, stream, context, path): + predicate = self.predicate + discard = self.discard + if not callable(predicate): + predicate = lambda _1,_2,_3: predicate + obj = ListContainer() + for i in itertools.count(): + context._index = i + e = self.subcon._parsereport(stream, context, path) + if not discard: + obj.append(e) + if predicate(e, obj, context): + return obj + + def _build(self, obj, stream, context, path): + predicate = self.predicate + discard = self.discard + if not callable(predicate): + predicate = lambda _1,_2,_3: predicate + partiallist = ListContainer() + retlist = ListContainer() + for i,e in enumerate(obj): + context._index = i + buildret = self.subcon._build(e, stream, context, path) + if not discard: + retlist.append(buildret) + partiallist.append(buildret) + if predicate(e, partiallist, context): + break + else: + raise RepeatError("expected any item to match predicate, when building", path=path) + return retlist + + def _sizeof(self, context, path): + raise SizeofError("cannot calculate size, amount depends on actual data", path=path) + + def _emitparse(self, code): + fname = f"parse_repeatuntil_{code.allocateId()}" + block = f""" + def {fname}(io, this): + list_ = ListContainer() + while True: + obj_ = {self.subcon._compileparse(code)} + if not ({self.discard}): + list_.append(obj_) + if ({self.predicate}): + return list_ + """ + code.append(block) + return f"{fname}(io, this)" + + def _emitbuild(self, code): + fname = f"build_repeatuntil_{code.allocateId()}" + block = f""" + def {fname}(obj, io, this): + objiter = iter(obj) + list_ = ListContainer() + while True: + obj_ = reuse(next(objiter), lambda obj: {self.subcon._compilebuild(code)}) + list_.append(obj_) + if ({self.predicate}): + return list_ + """ + code.append(block) + return f"{fname}(obj, io, this)" + + def _emitfulltype(self, ksy, bitwise): + return dict(type=self.subcon._compileprimitivetype(ksy, bitwise), repeat="until", repeat_until=repr(self.predicate).replace("obj_","_")) + + +#=============================================================================== +# specials +#=============================================================================== +class Renamed(Subconstruct): + r""" + Special wrapper that allows a Struct (or other similar class) to see a field as having a name (or a different name) or having a parsed hook. Library classes do not have names (its None). Renamed does not change a field, only wraps it like a candy with a label. Used internally by / and * operators. + + Also this wrapper is responsible for building a path info (a chain of names) that gets attached to error message when parsing, building, or sizeof fails. Fields that are not named do not appear in the path string. + + Parsing building and size are deferred to subcon. + + :param subcon: Construct instance + :param newname: optional, string + :param newdocs: optional, string + :param newparsed: optional, lambda + + Example:: + + >>> "number" / Int32ub + + """ + + def __init__(self, subcon, newname=None, newdocs=None, newparsed=None): + super().__init__(subcon) + self.name = newname if newname else subcon.name + self.docs = newdocs if newdocs else subcon.docs + self.parsed = newparsed if newparsed else subcon.parsed + + def __getattr__(self, name): + return getattr(self.subcon, name) + + def _parse(self, stream, context, path): + path += " -> %s" % (self.name,) + return self.subcon._parsereport(stream, context, path) + + def _build(self, obj, stream, context, path): + path += " -> %s" % (self.name,) + return self.subcon._build(obj, stream, context, path) + + def _sizeof(self, context, path): + path += " -> %s" % (self.name,) + return self.subcon._sizeof(context, path) + + def _emitparse(self, code): + return self.subcon._compileparse(code) + + def _emitbuild(self, code): + return self.subcon._compilebuild(code) + + def _emitseq(self, ksy, bitwise): + return self.subcon._compileseq(ksy, bitwise) + + def _emitprimitivetype(self, ksy, bitwise): + return self.subcon._compileprimitivetype(ksy, bitwise) + + def _emitfulltype(self, ksy, bitwise): + r = dict() + if self.name: + r.update(id=self.name) + r.update(self.subcon._compilefulltype(ksy, bitwise)) + if self.docs: + r.update(doc=self.docs) + return r + + +#=============================================================================== +# miscellaneous +#=============================================================================== +class Const(Subconstruct): + r""" + Field enforcing a constant. It is used for file signatures, to validate that the given pattern exists. Data in the stream must strictly match the specified value. + + Note that a variable sized subcon may still provide positive verification. Const does not consume a precomputed amount of bytes, but depends on the subcon to read the appropriate amount (eg. VarInt is acceptable). Whatever subcon parses into, gets compared against the specified value. + + Parses using subcon and return its value (after checking). Builds using subcon from nothing (or given object, if not None). Size is the same as subcon, unless it raises SizeofError. + + :param value: expected value, usually a bytes literal + :param subcon: optional, Construct instance, subcon used to build value from, assumed to be Bytes if value parameter was a bytes literal + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises ConstError: parsed data does not match specified value, or building from wrong value + :raises StringError: building from non-bytes value, perhaps unicode + + Example:: + + >>> d = Const(b"IHDR") + >>> d.build(None) + b'IHDR' + >>> d.parse(b"JPEG") + construct.core.ConstError: expected b'IHDR' but parsed b'JPEG' + + >>> d = Const(255, Int32ul) + >>> d.build(None) + b'\xff\x00\x00\x00' + """ + + def __init__(self, value, subcon=None): + if subcon is None: + if not isinstance(value, bytes): + raise StringError(f"given non-bytes value {repr(value)}, perhaps unicode?") + subcon = Bytes(len(value)) + super().__init__(subcon) + self.value = value + self.flagbuildnone = True + + def _parse(self, stream, context, path): + obj = self.subcon._parsereport(stream, context, path) + if not obj == self.value: + raise ConstError(f"parsing expected {repr(self.value)} but parsed {repr(obj)}", path=path) + return obj + + def _build(self, obj, stream, context, path): + if obj not in (None, self.value): + raise ConstError(f"building expected None or {repr(self.value)} but got {repr(obj)}", path=path) + return self.subcon._build(self.value, stream, context, path) + + def _sizeof(self, context, path): + return self.subcon._sizeof(context, path) + + def _emitparse(self, code): + code.append(f""" + def parse_const(value, expected): + if not value == expected: raise ConstError + return value + """) + return f"parse_const({self.subcon._compileparse(code)}, {repr(self.value)})" + + def _emitbuild(self, code): + if isinstance(self.value, bytes): + return f"(io.write({repr(self.value)}), {repr(self.value)})[1]" + else: + return f"reuse({repr(self.value)}, lambda obj: {self.subcon._compilebuild(code)})" + + def _emitfulltype(self, ksy, bitwise): + data = self.subcon.build(self.value) + return dict(contents=list(data)) + + +class Computed(Construct): + r""" + Field computing a value from the context dictionary or some outer source like os.urandom or random module. Underlying byte stream is unaffected. The source can be non-deterministic. + + Parsing and Building return the value returned by the context lambda (although a constant value can also be used). Size is defined as 0 because parsing and building does not consume or produce bytes into the stream. + + :param func: context lambda or constant value + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + >>> d = Struct( + ... "width" / Byte, + ... "height" / Byte, + ... "total" / Computed(this.width * this.height), + ... ) + >>> d.build(dict(width=4,height=5)) + b'\x04\x05' + >>> d.parse(b"12") + Container(width=49, height=50, total=2450) + + >>> d = Computed(7) + >>> d.parse(b"") + 7 + >>> d = Computed(lambda ctx: 7) + >>> d.parse(b"") + 7 + + >>> import os + >>> d = Computed(lambda ctx: os.urandom(10)) + >>> d.parse(b"") + b'\x98\xc2\xec\x10\x07\xf5\x8e\x98\xc2\xec' + """ + + def __init__(self, func): + super().__init__() + self.func = func + self.flagbuildnone = True + + def _parse(self, stream, context, path): + return self.func(context) if callable(self.func) else self.func + + def _build(self, obj, stream, context, path): + return self.func(context) if callable(self.func) else self.func + + def _sizeof(self, context, path): + return 0 + + def _emitparse(self, code): + return repr(self.func) + + def _emitbuild(self, code): + return repr(self.func) + + +@singleton +class Index(Construct): + r""" + Indexes a field inside outer :class:`~construct.core.Array` :class:`~construct.core.GreedyRange` :class:`~construct.core.RepeatUntil` context. + + Note that you can use this class, or use `this._index` expression instead, depending on how its used. See the examples. + + Parsing and building pulls _index key from the context. Size is 0 because stream is unaffected. + + :raises IndexFieldError: did not find either key in context + + Example:: + + >>> d = Array(3, Index) + >>> d.parse(b"") + [0, 1, 2] + >>> d = Array(3, Struct("i" / Index)) + >>> d.parse(b"") + [Container(i=0), Container(i=1), Container(i=2)] + + >>> d = Array(3, Computed(this._index+1)) + >>> d.parse(b"") + [1, 2, 3] + >>> d = Array(3, Struct("i" / Computed(this._._index+1))) + >>> d.parse(b"") + [Container(i=1), Container(i=2), Container(i=3)] + """ + + def __init__(self): + super().__init__() + self.flagbuildnone = True + + def _parse(self, stream, context, path): + return context.get("_index", None) + + def _build(self, obj, stream, context, path): + return context.get("_index", None) + + def _sizeof(self, context, path): + return 0 + + +class Rebuild(Subconstruct): + r""" + Field where building does not require a value, because the value gets recomputed when needed. Comes handy when building a Struct from a dict with missing keys. Useful for length and count fields when :class:`~construct.core.Prefixed` and :class:`~construct.core.PrefixedArray` cannot be used. + + Parsing defers to subcon. Building is defered to subcon, but it builds from a value provided by the context lambda (or constant). Size is the same as subcon, unless it raises SizeofError. + + Difference between Default and Rebuild, is that in first the build value is optional and in second the build value is ignored. + + :param subcon: Construct instance + :param func: context lambda or constant value + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = Struct( + ... "count" / Rebuild(Byte, len_(this.items)), + ... "items" / Byte[this.count], + ... ) + >>> d.build(dict(items=[1,2,3])) + b'\x03\x01\x02\x03' + """ + + def __init__(self, subcon, func): + super().__init__(subcon) + self.func = func + self.flagbuildnone = True + + def _build(self, obj, stream, context, path): + obj = evaluate(self.func, context) + return self.subcon._build(obj, stream, context, path) + + def _emitparse(self, code): + return self.subcon._compileparse(code) + + def _emitbuild(self, code): + if isinstance(self.func, ExprMixin) or (not callable(self.func)): + return f"reuse({repr(self.func)}, lambda obj: ({self.subcon._compilebuild(code)}))" + else: + aid = code.allocateId() + code.userfunction[aid] = self.func + return f"reuse(userfunction[{aid}](this), lambda obj: ({self.subcon._compilebuild(code)}))" + + def _emitseq(self, ksy, bitwise): + return self.subcon._compileseq(ksy, bitwise) + + def _emitprimitivetype(self, ksy, bitwise): + return self.subcon._compileprimitivetype(ksy, bitwise) + + def _emitfulltype(self, ksy, bitwise): + return self.subcon._compilefulltype(ksy, bitwise) + + +class Default(Subconstruct): + r""" + Field where building does not require a value, because the value gets taken from default. Comes handy when building a Struct from a dict with missing keys. + + Parsing defers to subcon. Building is defered to subcon, but it builds from a default (if given object is None) or from given object. Building does not require a value, but can accept one. Size is the same as subcon, unless it raises SizeofError. + + Difference between Default and Rebuild, is that in first the build value is optional and in second the build value is ignored. + + :param subcon: Construct instance + :param value: context lambda or constant value + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = Struct( + ... "a" / Default(Byte, 0), + ... ) + >>> d.build(dict(a=1)) + b'\x01' + >>> d.build(dict()) + b'\x00' + """ + + def __init__(self, subcon, value): + super().__init__(subcon) + self.value = value + self.flagbuildnone = True + + def _build(self, obj, stream, context, path): + obj = evaluate(self.value, context) if obj is None else obj + return self.subcon._build(obj, stream, context, path) + + def _emitparse(self, code): + return self.subcon._compileparse(code) + + def _emitbuild(self, code): + return f"reuse({repr(self.value)} if obj is None else obj, lambda obj: ({self.subcon._compilebuild(code)}))" + + def _emitseq(self, ksy, bitwise): + return self.subcon._compileseq(ksy, bitwise) + + def _emitprimitivetype(self, ksy, bitwise): + return self.subcon._compileprimitivetype(ksy, bitwise) + + def _emitfulltype(self, ksy, bitwise): + return self.subcon._compilefulltype(ksy, bitwise) + + +class Check(Construct): + r""" + Checks for a condition, and raises CheckError if the check fails. + + Parsing and building return nothing (but check the condition). Size is 0 because stream is unaffected. + + :param func: bool or context lambda, that gets run on parsing and building + + :raises CheckError: lambda returned false + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + Check(lambda ctx: len(ctx.payload.data) == ctx.payload_len) + Check(len_(this.payload.data) == this.payload_len) + """ + + def __init__(self, func): + super().__init__() + self.func = func + self.flagbuildnone = True + + def _parse(self, stream, context, path): + passed = evaluate(self.func, context) + if not passed: + raise CheckError("check failed during parsing", path=path) + + def _build(self, obj, stream, context, path): + passed = evaluate(self.func, context) + if not passed: + raise CheckError("check failed during building", path=path) + + def _sizeof(self, context, path): + return 0 + + def _emitparse(self, code): + code.append(f""" + def parse_check(condition): + if not condition: raise CheckError + """) + return f"parse_check({repr(self.func)})" + + def _emitbuild(self, code): + code.append(f""" + def build_check(condition): + if not condition: raise CheckError + """) + return f"build_check({repr(self.func)})" + + +@singleton +class Error(Construct): + r""" + Raises ExplicitError, unconditionally. + + Parsing and building always raise ExplicitError. Size is undefined. + + :raises ExplicitError: unconditionally, on parsing and building + + Example:: + + >>> d = Struct("num"/Byte, Error) + >>> d.parse(b"data...") + construct.core.ExplicitError: Error field was activated during parsing + """ + + def __init__(self): + super().__init__() + self.flagbuildnone = True + + def _parse(self, stream, context, path): + raise ExplicitError("Error field was activated during parsing", path=path) + + def _build(self, obj, stream, context, path): + raise ExplicitError("Error field was activated during building", path=path) + + def _sizeof(self, context, path): + raise SizeofError("Error does not have size, because it interrupts parsing and building", path=path) + + def _emitparse(self, code): + code.append(""" + def parse_error(): + raise ExplicitError + """) + return "parse_error()" + + def _emitbuild(self, code): + code.append(""" + def build_error(): + raise ExplicitError + """) + return "build_error()" + + +class FocusedSeq(Construct): + r""" + Allows constructing more elaborate "adapters" than Adapter class. + + Parse does parse all subcons in sequence, but returns only the element that was selected (discards other values). Build does build all subcons in sequence, where each gets build from nothing (except the selected subcon which is given the object). Size is the sum of all subcon sizes, unless any subcon raises SizeofError. + + This class does context nesting, meaning its members are given access to a new dictionary where the "_" entry points to the outer context. When parsing, each member gets parsed and subcon parse return value is inserted into context under matching key only if the member was named. When building, the matching entry gets inserted into context before subcon gets build, and if subcon build returns a new value (not None) that gets replaced in the context. + + This class exposes subcons as attributes. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) by accessing the struct attributes, under same name. Also note that compiler does not support this feature. See examples. + + This class exposes subcons in the context. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) within other inlined fields using the context. Note that you need to use a lambda (`this` expression is not supported). Also note that compiler does not support this feature. See examples. + + This class is used internally to implement :class:`~construct.core.PrefixedArray`. + + :param parsebuildfrom: string name or context lambda, selects a subcon + :param \*subcons: Construct instances, list of members, some can be named + :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises UnboundLocalError: selector does not match any subcon + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Excample:: + + >>> d = FocusedSeq("num", Const(b"SIG"), "num"/Byte, Terminated) + >>> d.parse(b"SIG\xff") + 255 + >>> d.build(255) + b'SIG\xff' + + >>> d = FocusedSeq("animal", + ... "animal" / Enum(Byte, giraffe=1), + ... ) + >>> d.animal.giraffe + 'giraffe' + >>> d = FocusedSeq("count", + ... "count" / Byte, + ... "data" / Padding(lambda this: this.count - this._subcons.count.sizeof()), + ... ) + >>> d.build(4) + b'\x04\x00\x00\x00' + + PrefixedArray <--> FocusedSeq("items", + "count" / Rebuild(lengthfield, len_(this.items)), + "items" / subcon[this.count], + ) + """ + + def __init__(self, parsebuildfrom, *subcons, **subconskw): + super().__init__() + self.parsebuildfrom = parsebuildfrom + self.subcons = list(subcons) + list(k/v for k,v in subconskw.items()) + self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) + + def __getattr__(self, name): + if name in self._subcons: + return self._subcons[name] + raise AttributeError + + def _parse(self, stream, context, path): + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + parsebuildfrom = evaluate(self.parsebuildfrom, context) + for i,sc in enumerate(self.subcons): + parseret = sc._parsereport(stream, context, path) + if sc.name: + context[sc.name] = parseret + if sc.name == parsebuildfrom: + finalret = parseret + return finalret + + def _build(self, obj, stream, context, path): + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + parsebuildfrom = evaluate(self.parsebuildfrom, context) + context[parsebuildfrom] = obj + for i,sc in enumerate(self.subcons): + buildret = sc._build(obj if sc.name == parsebuildfrom else None, stream, context, path) + if sc.name: + context[sc.name] = buildret + if sc.name == parsebuildfrom: + finalret = buildret + return finalret + + def _sizeof(self, context, path): + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = None, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + try: + return sum(sc._sizeof(context, path) for sc in self.subcons) + except (KeyError, AttributeError): + raise SizeofError("cannot calculate size, key not found in context", path=path) + + def _emitparse(self, code): + fname = f"parse_focusedseq_{code.allocateId()}" + block = f""" + def {fname}(io, this): + result = [] + this = Container(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) + this['_root'] = this['_'].get('_root', this) + """ + for sc in self.subcons: + block += f""" + result.append({sc._compileparse(code)}) + """ + if sc.name: + block += f""" + this[{repr(sc.name)}] = result[-1] + """ + block += f""" + return this[{repr(self.parsebuildfrom)}] + """ + code.append(block) + return f"{fname}(io, this)" + + def _emitbuild(self, code): + fname = f"build_focusedseq_{code.allocateId()}" + block = f""" + def {fname}(obj, io, this): + this = Container(_ = this, _params = this['_params'], _root = None, _parsing = False, _building = True, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) + this['_root'] = this['_'].get('_root', this) + try: + this[{repr(self.parsebuildfrom)}] = obj + finalobj = obj + """ + for sc in self.subcons: + block += f""" + {f'obj = {"finalobj" if sc.name == self.parsebuildfrom else "None"}'} + {f'buildret = '}{sc._compilebuild(code)} + {f'this[{repr(sc.name)}] = buildret' if sc.name else ''} + {f'{"finalret = buildret" if sc.name == self.parsebuildfrom else ""}'} + """ + block += f""" + pass + except StopFieldError: + pass + return finalret + """ + code.append(block) + return f"{fname}(obj, io, this)" + + def _emitseq(self, ksy, bitwise): + return [sc._compilefulltype(ksy, bitwise) for sc in self.subcons] + + +@singleton +class Pickled(Construct): + r""" + Preserves arbitrary Python objects. + + Parses using `pickle.load() `_ and builds using `pickle.dump() `_ functions, using default Pickle binary protocol. Size is undefined. + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + + Can propagate pickle.load() and pickle.dump() exceptions. + + Example:: + + >>> x = [1, 2.3, {}] + >>> Pickled.build(x) + b'\x80\x03]q\x00(K\x01G@\x02ffffff}q\x01e.' + >>> Pickled.parse(_) + [1, 2.3, {}] + """ + + def _parse(self, stream, context, path): + return pickle.load(stream) + + def _build(self, obj, stream, context, path): + pickle.dump(obj, stream) + return obj + + +@singleton +class Numpy(Construct): + r""" + Preserves numpy arrays (both shape, dtype and values). + + Parses using `numpy.load() `_ and builds using `numpy.save() `_ functions, using Numpy binary protocol. Size is undefined. + + :raises ImportError: numpy could not be imported during parsing or building + :raises ValueError: could not read enough bytes, or so + + Can propagate numpy.load() and numpy.save() exceptions. + + Example:: + + >>> import numpy + >>> a = numpy.asarray([1,2,3]) + >>> Numpy.build(a) + b"\x93NUMPY\x01\x00F\x00{'descr': '>> Numpy.parse(_) + array([1, 2, 3]) + """ + + def _parse(self, stream, context, path): + import numpy + return numpy.load(stream) + + def _build(self, obj, stream, context, path): + import numpy + numpy.save(stream, obj) + return obj + + +class NamedTuple(Adapter): + r""" + Both arrays, structs, and sequences can be mapped to a namedtuple from `collections module `_. To create a named tuple, you need to provide a name and a sequence of fields, either a string with space-separated names or a list of string names, like the standard namedtuple. + + Parses into a collections.namedtuple instance, and builds from such instance (although it also builds from lists and dicts). Size is undefined. + + :param tuplename: string + :param tuplefields: string or list of strings + :param subcon: Construct instance, either Struct Sequence Array GreedyRange + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises NamedTupleError: subcon is neither Struct Sequence Array GreedyRange + + Can propagate collections exceptions. + + Example:: + + >>> d = NamedTuple("coord", "x y z", Byte[3]) + >>> d = NamedTuple("coord", "x y z", Byte >> Byte >> Byte) + >>> d = NamedTuple("coord", "x y z", "x"/Byte + "y"/Byte + "z"/Byte) + >>> d.parse(b"123") + coord(x=49, y=50, z=51) + """ + + def __init__(self, tuplename, tuplefields, subcon): + if not isinstance(subcon, (Struct,Sequence,Array,GreedyRange)): + raise NamedTupleError("subcon is neither Struct Sequence Array GreedyRange") + super().__init__(subcon) + self.tuplename = tuplename + self.tuplefields = tuplefields + self.factory = collections.namedtuple(tuplename, tuplefields) + + def _decode(self, obj, context, path): + if isinstance(self.subcon, Struct): + del obj["_io"] + return self.factory(**obj) + if isinstance(self.subcon, (Sequence,Array,GreedyRange)): + return self.factory(*obj) + raise NamedTupleError("subcon is neither Struct Sequence Array GreedyRangeGreedyRange", path=path) + + def _encode(self, obj, context, path): + if isinstance(self.subcon, Struct): + return Container({sc.name:getattr(obj,sc.name) for sc in self.subcon.subcons if sc.name}) + if isinstance(self.subcon, (Sequence,Array,GreedyRange)): + return list(obj) + raise NamedTupleError("subcon is neither Struct Sequence Array GreedyRange", path=path) + + def _emitparse(self, code): + fname = "factory_%s" % code.allocateId() + code.append(""" + %s = collections.namedtuple(%r, %r) + """ % (fname, self.tuplename, self.tuplefields, )) + if isinstance(self.subcon, Struct): + return "%s(**(%s))" % (fname, self.subcon._compileparse(code), ) + if isinstance(self.subcon, (Sequence,Array,GreedyRange)): + return "%s(*(%s))" % (fname, self.subcon._compileparse(code), ) + raise NamedTupleError("subcon is neither Struct Sequence Array GreedyRange") + + def _emitseq(self, ksy, bitwise): + return self.subcon._compileseq(ksy, bitwise) + + def _emitprimitivetype(self, ksy, bitwise): + return self.subcon._compileprimitivetype(ksy, bitwise) + + def _emitfulltype(self, ksy, bitwise): + return self.subcon._compilefulltype(ksy, bitwise) + + +class TimestampAdapter(Adapter): + """Used internally.""" + + +def Timestamp(subcon, unit, epoch): + r""" + Datetime, represented as `Arrow `_ object. + + Note that accuracy is not guaranteed, because building rounds the value to integer (even when Float subcon is used), due to floating-point errors in general, and because MSDOS scheme has only 5-bit (32 values) seconds field (seconds are rounded to multiple of 2). + + Unit is a fraction of a second. 1 is second resolution, 10**-3 is milliseconds resolution, 10**-6 is microseconds resolution, etc. Usually its 1 on Unix and MacOSX, 10**-7 on Windows. Epoch is a year (if integer) or a specific day (if Arrow object). Usually its 1970 on Unix, 1904 on MacOSX, 1600 on Windows. MSDOS format doesnt support custom unit or epoch, it uses 2-seconds resolution and 1980 epoch. + + :param subcon: Construct instance like Int* Float*, or Int32ub with msdos format + :param unit: integer or float, or msdos string + :param epoch: integer, or Arrow instance, or msdos string + + :raises ImportError: arrow could not be imported during ctor + :raises TimestampError: subcon is not a Construct instance + :raises TimestampError: unit or epoch is a wrong type + + Example:: + + >>> d = Timestamp(Int64ub, 1., 1970) + >>> d.parse(b'\x00\x00\x00\x00ZIz\x00') + + >>> d = Timestamp(Int32ub, "msdos", "msdos") + >>> d.parse(b'H9\x8c"') + + """ + import arrow + + if not isinstance(subcon, Construct): + raise TimestampError("subcon should be Int*, experimentally Float*, or Int32ub when using msdos format") + if not isinstance(unit, (int, float, str)): + raise TimestampError("unit must be one of: int float string") + if not isinstance(epoch, (int, arrow.Arrow, str)): + raise TimestampError("epoch must be one of: int Arrow string") + + if unit == "msdos" or epoch == "msdos": + st = BitStruct( + "year" / BitsInteger(7), + "month" / BitsInteger(4), + "day" / BitsInteger(5), + "hour" / BitsInteger(5), + "minute" / BitsInteger(6), + "second" / BitsInteger(5), + ) + class MsdosTimestampAdapter(TimestampAdapter): + def _decode(self, obj, context, path): + return arrow.Arrow(1980,1,1).shift(years=obj.year, months=obj.month-1, days=obj.day-1, hours=obj.hour, minutes=obj.minute, seconds=obj.second*2) + def _encode(self, obj, context, path): + t = obj.timetuple() + return Container(year=t.tm_year-1980, month=t.tm_mon, day=t.tm_mday, hour=t.tm_hour, minute=t.tm_min, second=t.tm_sec//2) + macro = MsdosTimestampAdapter(st) + + else: + if isinstance(epoch, int): + epoch = arrow.Arrow(epoch, 1, 1) + class EpochTimestampAdapter(TimestampAdapter): + def _decode(self, obj, context, path): + return epoch.shift(seconds=obj*unit) + def _encode(self, obj, context, path): + return int((obj-epoch).total_seconds()/unit) + macro = EpochTimestampAdapter(subcon) + + def _emitfulltype(ksy, bitwise): + return subcon._compilefulltype(ksy, bitwise) + def _emitprimitivetype(ksy, bitwise): + return subcon._compileprimitivetype(ksy, bitwise) + macro._emitfulltype = _emitfulltype + macro._emitprimitivetype = _emitprimitivetype + return macro + + +class Hex(Adapter): + r""" + Adapter for displaying hexadecimal/hexlified representation of integers/bytes/RawCopy dictionaries. + + Parsing results in int-alike bytes-alike or dict-alike object, whose only difference from original is pretty-printing. If you look at the result, you will be presented with its `repr` which remains as-is. If you print it, then you will see its `str` whic is a hexlified representation. Building and sizeof defer to subcon. + + To obtain a hexlified string (like before Hex HexDump changed semantics) use binascii.(un)hexlify on parsed results. + + Example:: + + >>> d = Hex(Int32ub) + >>> obj = d.parse(b"\x00\x00\x01\x02") + >>> obj + 258 + >>> print(obj) + 0x00000102 + + >>> d = Hex(GreedyBytes) + >>> obj = d.parse(b"\x00\x00\x01\x02") + >>> obj + b'\x00\x00\x01\x02' + >>> print(obj) + unhexlify('00000102') + + >>> d = Hex(RawCopy(Int32ub)) + >>> obj = d.parse(b"\x00\x00\x01\x02") + >>> obj + {'data': b'\x00\x00\x01\x02', + 'length': 4, + 'offset1': 0, + 'offset2': 4, + 'value': 258} + >>> print(obj) + unhexlify('00000102') + """ + def _decode(self, obj, context, path): + if isinstance(obj, int): + return HexDisplayedInteger.new(obj, "0%sX" % (2 * self.subcon._sizeof(context, path))) + if isinstance(obj, bytes): + return HexDisplayedBytes(obj) + if isinstance(obj, dict): + return HexDisplayedDict(obj) + return obj + + def _encode(self, obj, context, path): + return obj + + def _emitparse(self, code): + return self.subcon._compileparse(code) + + def _emitseq(self, ksy, bitwise): + return self.subcon._compileseq(ksy, bitwise) + + def _emitprimitivetype(self, ksy, bitwise): + return self.subcon._compileprimitivetype(ksy, bitwise) + + def _emitfulltype(self, ksy, bitwise): + return self.subcon._compilefulltype(ksy, bitwise) + + +class HexDump(Adapter): + r""" + Adapter for displaying hexlified representation of bytes/RawCopy dictionaries. + + Parsing results in bytes-alike or dict-alike object, whose only difference from original is pretty-printing. If you look at the result, you will be presented with its `repr` which remains as-is. If you print it, then you will see its `str` whic is a hexlified representation. Building and sizeof defer to subcon. + + To obtain a hexlified string (like before Hex HexDump changed semantics) use construct.lib.hexdump on parsed results. + + Example:: + + >>> d = HexDump(GreedyBytes) + >>> obj = d.parse(b"\x00\x00\x01\x02") + >>> obj + b'\x00\x00\x01\x02' + >>> print(obj) + hexundump(''' + 0000 00 00 01 02 .... + ''') + + >>> d = HexDump(RawCopy(Int32ub)) + >>> obj = d.parse(b"\x00\x00\x01\x02") + >>> obj + {'data': b'\x00\x00\x01\x02', + 'length': 4, + 'offset1': 0, + 'offset2': 4, + 'value': 258} + >>> print(obj) + hexundump(''' + 0000 00 00 01 02 .... + ''') + """ + def _decode(self, obj, context, path): + if isinstance(obj, bytes): + return HexDumpDisplayedBytes(obj) + if isinstance(obj, dict): + return HexDumpDisplayedDict(obj) + return obj + + def _encode(self, obj, context, path): + return obj + + def _emitparse(self, code): + return self.subcon._compileparse(code) + + def _emitseq(self, ksy, bitwise): + return self.subcon._compileseq(ksy, bitwise) + + def _emitprimitivetype(self, ksy, bitwise): + return self.subcon._compileprimitivetype(ksy, bitwise) + + def _emitfulltype(self, ksy, bitwise): + return self.subcon._compilefulltype(ksy, bitwise) + + +#=============================================================================== +# conditional +#=============================================================================== +class Union(Construct): + r""" + Treats the same data as multiple constructs (similar to C union) so you can look at the data in multiple views. Fields are usually named (so parsed values are inserted into dictionary under same name). + + Parses subcons in sequence, and reverts the stream back to original position after each subcon. Afterwards, advances the stream by selected subcon. Builds from first subcon that has a matching key in given dict. Size is undefined (because parsefrom is not used for building). + + This class does context nesting, meaning its members are given access to a new dictionary where the "_" entry points to the outer context. When parsing, each member gets parsed and subcon parse return value is inserted into context under matching key only if the member was named. When building, the matching entry gets inserted into context before subcon gets build, and if subcon build returns a new value (not None) that gets replaced in the context. + + This class exposes subcons as attributes. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) by accessing the struct attributes, under same name. Also note that compiler does not support this feature. See examples. + + This class exposes subcons in the context. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) within other inlined fields using the context. Note that you need to use a lambda (`this` expression is not supported). Also note that compiler does not support this feature. See examples. + + .. warning:: If you skip `parsefrom` parameter then stream will be left back at starting offset, not seeked to any common denominator. + + :param parsefrom: how to leave stream after parsing, can be integer index or string name selecting a subcon, or None (leaves stream at initial offset, the default), or context lambda + :param \*subcons: Construct instances, list of members, some can be anonymous + :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises StreamError: stream is not seekable and tellable + :raises UnionError: selector does not match any subcon, or dict given to build does not contain any keys matching any subcon + :raises IndexError: selector does not match any subcon + :raises KeyError: selector does not match any subcon + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = Union(0, + ... "raw" / Bytes(8), + ... "ints" / Int32ub[2], + ... "shorts" / Int16ub[4], + ... "chars" / Byte[8], + ... ) + >>> d.parse(b"12345678") + Container(raw=b'12345678', ints=[825373492, 892745528], shorts=[12594, 13108, 13622, 14136], chars=[49, 50, 51, 52, 53, 54, 55, 56]) + >>> d.build(dict(chars=range(8))) + b'\x00\x01\x02\x03\x04\x05\x06\x07' + + >>> d = Union(None, + ... "animal" / Enum(Byte, giraffe=1), + ... ) + >>> d.animal.giraffe + 'giraffe' + >>> d = Union(None, + ... "chars" / Byte[4], + ... "data" / Bytes(lambda this: this._subcons.chars.sizeof()), + ... ) + >>> d.parse(b"\x01\x02\x03\x04") + Container(chars=[1, 2, 3, 4], data=b'\x01\x02\x03\x04') + + Alternative syntax, but requires Python 3.6 or any PyPy: + >>> Union(0, raw=Bytes(8), ints=Int32ub[2], shorts=Int16ub[4], chars=Byte[8]) + """ + + def __init__(self, parsefrom, *subcons, **subconskw): + if isinstance(parsefrom, Construct): + raise UnionError("parsefrom should be either: None int str context-function") + super().__init__() + self.parsefrom = parsefrom + self.subcons = list(subcons) + list(k/v for k,v in subconskw.items()) + self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) + + def __getattr__(self, name): + if name in self._subcons: + return self._subcons[name] + raise AttributeError + + def _parse(self, stream, context, path): + obj = Container() + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + fallback = stream_tell(stream, path) + forwards = {} + for i,sc in enumerate(self.subcons): + subobj = sc._parsereport(stream, context, path) + if sc.name: + obj[sc.name] = subobj + context[sc.name] = subobj + forwards[i] = stream_tell(stream, path) + if sc.name: + forwards[sc.name] = stream_tell(stream, path) + stream_seek(stream, fallback, 0, path) + parsefrom = evaluate(self.parsefrom, context) + if parsefrom is not None: + stream_seek(stream, forwards[parsefrom], 0, path) # raises KeyError + return obj + + def _build(self, obj, stream, context, path): + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + context.update(obj) + for sc in self.subcons: + if sc.flagbuildnone: + subobj = obj.get(sc.name, None) + elif sc.name in obj: + subobj = obj[sc.name] + else: + continue + + if sc.name: + context[sc.name] = subobj + + buildret = sc._build(subobj, stream, context, path) + if sc.name: + context[sc.name] = buildret + return Container({sc.name:buildret}) + else: + raise UnionError("cannot build, none of subcons were found in the dictionary %r" % (obj, ), path=path) + + def _sizeof(self, context, path): + raise SizeofError("Union builds depending on actual object dict, size is unknown", path=path) + + def _emitparse(self, code): + if callable(self.parsefrom): + raise NotImplementedError("Union does not compile non-constant parsefrom") + fname = "parse_union_%s" % code.allocateId() + block = """ + def %s(io, this): + this = Container(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) + this['_root'] = this['_'].get('_root', this) + fallback = io.tell() + """ % (fname, ) + if isinstance(self.parsefrom, type(None)): + index = -1 + skipfallback = False + skipforward = True + if isinstance(self.parsefrom, int): + index = self.parsefrom + self.subcons[index] # raises IndexError + skipfallback = True + skipforward = self.subcons[index].sizeof() == self.subcons[-1].sizeof() + if isinstance(self.parsefrom, str): + index = {sc.name:i for i,sc in enumerate(self.subcons) if sc.name}[self.parsefrom] # raises KeyError + skipfallback = True + skipforward = self.subcons[index].sizeof() == self.subcons[-1].sizeof() + + for i,sc in enumerate(self.subcons): + block += """ + %s%s + """ % ("this[%r] = " % sc.name if sc.name else "", sc._compileparse(code)) + if i == index and not skipforward: + block += """ + forward = io.tell() + """ + if i < len(self.subcons)-1: + block += """ + io.seek(fallback) + """ + if not skipfallback: + block += """ + io.seek(fallback) + """ + if not skipforward: + block += """ + io.seek(forward) + """ + block += """ + del this['_'] + del this['_index'] + return this + """ + code.append(block) + return "%s(io, this)" % (fname,) + + def _emitbuild(self, code): + fname = f"build_union_{code.allocateId()}" + block = f""" + def {fname}(obj, io, this): + this = Container(_ = this, _params = this['_params'], _root = None, _parsing = False, _building = True, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) + this['_root'] = this['_'].get('_root', this) + this.update(obj) + objdict = obj + """ + for sc in self.subcons: + block += f""" + if {'True' if sc.flagbuildnone else f'{repr(sc.name)} in objdict'}: + {f'obj = objdict.get({repr(sc.name)}, None)' if sc.flagbuildnone else f'obj = objdict[{repr(sc.name)}]'} + {f'this[{repr(sc.name)}] = obj' if sc.name else ''} + {f'buildret = this[{repr(sc.name)}] = ' if sc.name else ''}{sc._compilebuild(code)} + {f'return Container({{ {repr(sc.name)}:buildret }})'} + """ + block += f""" + raise UnionError('cannot build, none of subcons were found in the dictionary') + """ + code.append(block) + return f"{fname}(obj, io, this)" + + +class Select(Construct): + r""" + Selects the first matching subconstruct. + + Parses and builds by literally trying each subcon in sequence until one of them parses or builds without exception. Stream gets reverted back to original position after each failed attempt, but not if parsing succeeds. Size is not defined. + + :param \*subcons: Construct instances, list of members, some can be anonymous + :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises StreamError: stream is not seekable and tellable + :raises SelectError: neither subcon succeded when parsing or building + + Example:: + + >>> d = Select(Int32ub, CString("utf8")) + >>> d.build(1) + b'\x00\x00\x00\x01' + >>> d.build(u"Афон") + b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00' + + Alternative syntax, but requires Python 3.6 or any PyPy: + >>> Select(num=Int32ub, text=CString("utf8")) + """ + + def __init__(self, *subcons, **subconskw): + super().__init__() + self.subcons = list(subcons) + list(k/v for k,v in subconskw.items()) + self.flagbuildnone = any(sc.flagbuildnone for sc in self.subcons) + + def _parse(self, stream, context, path): + for sc in self.subcons: + fallback = stream_tell(stream, path) + try: + obj = sc._parsereport(stream, context, path) + except ExplicitError: + raise + except Exception: + stream_seek(stream, fallback, 0, path) + else: + return obj + raise SelectError("no subconstruct matched", path=path) + + def _build(self, obj, stream, context, path): + for sc in self.subcons: + try: + data = sc.build(obj, **context) + except ExplicitError: + raise + except Exception: + pass + else: + stream_write(stream, data, len(data), path) + return obj + raise SelectError("no subconstruct matched: %s" % (obj,), path=path) + + +def Optional(subcon): + r""" + Makes an optional field. + + Parsing attempts to parse subcon. If sub-parsing fails, returns None and reports success. Building attempts to build subcon. If sub-building fails, writes nothing and reports success. Size is undefined, because whether bytes would be consumed or produced depends on actual data and actual context. + + :param subcon: Construct instance + + Example:: + + Optional <--> Select(subcon, Pass) + + >>> d = Optional(Int64ul) + >>> d.parse(b"12345678") + 4050765991979987505 + >>> d.parse(b"") + None + >>> d.build(1) + b'\x01\x00\x00\x00\x00\x00\x00\x00' + >>> d.build(None) + b'' + """ + return Select(subcon, Pass) + + +def If(condfunc, subcon): + r""" + If-then conditional construct. + + Parsing evaluates condition, if True then subcon is parsed, otherwise just returns None. Building also evaluates condition, if True then subcon gets build from, otherwise does nothing. Size is either same as subcon or 0, depending how condfunc evaluates. + + :param condfunc: bool or context lambda (or a truthy value) + :param subcon: Construct instance, used if condition indicates True + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + If <--> IfThenElse(condfunc, subcon, Pass) + + >>> d = If(this.x > 0, Byte) + >>> d.build(255, x=1) + b'\xff' + >>> d.build(255, x=0) + b'' + """ + macro = IfThenElse(condfunc, subcon, Pass) + + def _emitfulltype(ksy, bitwise): + return dict(type=subcon._compileprimitivetype(ksy, bitwise), if_=repr(condfunc).replace("this.","")) + macro._emitfulltype = _emitfulltype + + return macro + + +class IfThenElse(Construct): + r""" + If-then-else conditional construct, similar to ternary operator. + + Parsing and building evaluates condition, and defers to either subcon depending on the value. Size is computed the same way. + + :param condfunc: bool or context lambda (or a truthy value) + :param thensubcon: Construct instance, used if condition indicates True + :param elsesubcon: Construct instance, used if condition indicates False + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = IfThenElse(this.x > 0, VarInt, Byte) + >>> d.build(255, dict(x=1)) + b'\xff\x01' + >>> d.build(255, dict(x=0)) + b'\xff' + """ + + def __init__(self, condfunc, thensubcon, elsesubcon): + super().__init__() + self.condfunc = condfunc + self.thensubcon = thensubcon + self.elsesubcon = elsesubcon + self.flagbuildnone = thensubcon.flagbuildnone and elsesubcon.flagbuildnone + + def _parse(self, stream, context, path): + condfunc = evaluate(self.condfunc, context) + sc = self.thensubcon if condfunc else self.elsesubcon + return sc._parsereport(stream, context, path) + + def _build(self, obj, stream, context, path): + condfunc = evaluate(self.condfunc, context) + sc = self.thensubcon if condfunc else self.elsesubcon + return sc._build(obj, stream, context, path) + + def _sizeof(self, context, path): + condfunc = evaluate(self.condfunc, context) + sc = self.thensubcon if condfunc else self.elsesubcon + return sc._sizeof(context, path) + + def _emitparse(self, code): + return "((%s) if (%s) else (%s))" % (self.thensubcon._compileparse(code), self.condfunc, self.elsesubcon._compileparse(code), ) + + def _emitbuild(self, code): + return f"(({self.thensubcon._compilebuild(code)}) if ({repr(self.condfunc)}) else ({self.elsesubcon._compilebuild(code)}))" + + def _emitseq(self, ksy, bitwise): + return [ + dict(id="thenvalue", type=self.thensubcon._compileprimitivetype(ksy, bitwise), if_=repr(self.condfunc).replace("this.","")), + dict(id="elsesubcon", type=self.elsesubcon._compileprimitivetype(ksy, bitwise), if_=repr(~self.condfunc).replace("this.","")), + ] + + +class Switch(Construct): + r""" + A conditional branch. + + Parsing and building evaluate keyfunc and select a subcon based on the value and dictionary entries. Dictionary (cases) maps values into subcons. If no case matches then `default` is used (that is Pass by default). Note that `default` is a Construct instance, not a dictionary key. Size is evaluated in same way as parsing and building, by evaluating keyfunc and selecting a field accordingly. + + :param keyfunc: context lambda or constant, that matches some key in cases + :param cases: dict mapping keys to Construct instances + :param default: optional, Construct instance, used when keyfunc is not found in cases, Pass is default value for this parameter, Error is a possible value for this parameter + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = Switch(this.n, { 1:Int8ub, 2:Int16ub, 4:Int32ub }) + >>> d.build(5, n=1) + b'\x05' + >>> d.build(5, n=4) + b'\x00\x00\x00\x05' + + >>> d = Switch(this.n, {}, default=Byte) + >>> d.parse(b"\x01", n=255) + 1 + >>> d.build(1, n=255) + b"\x01" + """ + + def __init__(self, keyfunc, cases, default=None): + if default is None: + default = Pass + super().__init__() + self.keyfunc = keyfunc + self.cases = cases + self.default = default + allcases = list(cases.values()) + [default] + self.flagbuildnone = all(sc.flagbuildnone for sc in allcases) + + def _parse(self, stream, context, path): + keyfunc = evaluate(self.keyfunc, context) + sc = self.cases.get(keyfunc, self.default) + return sc._parsereport(stream, context, path) + + def _build(self, obj, stream, context, path): + keyfunc = evaluate(self.keyfunc, context) + sc = self.cases.get(keyfunc, self.default) + return sc._build(obj, stream, context, path) + + def _sizeof(self, context, path): + try: + keyfunc = evaluate(self.keyfunc, context) + sc = self.cases.get(keyfunc, self.default) + return sc._sizeof(context, path) + + except (KeyError, AttributeError): + raise SizeofError("cannot calculate size, key not found in context", path=path) + + def _emitparse(self, code): + fname = f"switch_cases_{code.allocateId()}" + code.append(f"{fname} = {{}}") + for key,sc in self.cases.items(): + code.append(f"{fname}[{repr(key)}] = lambda io,this: {sc._compileparse(code)}") + defaultfname = f"switch_defaultcase_{code.allocateId()}" + code.append(f"{defaultfname} = lambda io,this: {self.default._compileparse(code)}") + return f"{fname}.get({repr(self.keyfunc)}, {defaultfname})(io, this)" + + def _emitbuild(self, code): + fname = f"switch_cases_{code.allocateId()}" + code.append(f"{fname} = {{}}") + for key,sc in self.cases.items(): + code.append(f"{fname}[{repr(key)}] = lambda obj,io,this: {sc._compilebuild(code)}") + defaultfname = f"switch_defaultcase_{code.allocateId()}" + code.append(f"{defaultfname} = lambda obj,io,this: {self.default._compilebuild(code)}") + return f"{fname}.get({repr(self.keyfunc)}, {defaultfname})(obj, io, this)" + + +class StopIf(Construct): + r""" + Checks for a condition, and stops certain classes (:class:`~construct.core.Struct` :class:`~construct.core.Sequence` :class:`~construct.core.GreedyRange`) from parsing or building further. + + Parsing and building check the condition, and raise StopFieldError if indicated. Size is undefined. + + :param condfunc: bool or context lambda (or truthy value) + + :raises StopFieldError: used internally + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> Struct('x'/Byte, StopIf(this.x == 0), 'y'/Byte) + >>> Sequence('x'/Byte, StopIf(this.x == 0), 'y'/Byte) + >>> GreedyRange(FocusedSeq(0, 'x'/Byte, StopIf(this.x == 0))) + """ + + def __init__(self, condfunc): + super().__init__() + self.condfunc = condfunc + self.flagbuildnone = True + + def _parse(self, stream, context, path): + condfunc = evaluate(self.condfunc, context) + if condfunc: + raise StopFieldError(path=path) + + def _build(self, obj, stream, context, path): + condfunc = evaluate(self.condfunc, context) + if condfunc: + raise StopFieldError(path=path) + + def _sizeof(self, context, path): + raise SizeofError("StopIf cannot determine size because it depends on actual context which then depends on actual data and outer constructs", path=path) + + def _emitparse(self, code): + code.append(f""" + def parse_stopif(condition): + if condition: + raise StopFieldError + """) + return f"parse_stopif({repr(self.condfunc)})" + + def _emitbuild(self, code): + code.append(f""" + def build_stopif(condition): + if condition: + raise StopFieldError + """) + return f"build_stopif({repr(self.condfunc)})" + + +#=============================================================================== +# alignment and padding +#=============================================================================== +def Padding(length, pattern=b"\x00"): + r""" + Appends null bytes. + + Parsing consumes specified amount of bytes and discards it. Building writes specified pattern byte multiplied into specified length. Size is same as specified. + + :param length: integer or context lambda, length of the padding + :param pattern: b-character, padding pattern, default is \\x00 + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises PaddingError: length was negative + :raises PaddingError: pattern was not bytes (b-character) + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = Padding(4) or Padded(4, Pass) + >>> d.build(None) + b'\x00\x00\x00\x00' + >>> d.parse(b"****") + None + >>> d.sizeof() + 4 + """ + macro = Padded(length, Pass, pattern=pattern) + def _emitprimitivetype(ksy, bitwise): + if not bitwise: + raise NotImplementedError + return "b%s" % (length, ) + def _emitfulltype(ksy, bitwise): + if bitwise: + raise NotImplementedError + return dict(size=length) + macro._emitprimitivetype = _emitprimitivetype + macro._emitfulltype = _emitfulltype + return macro + + +class Padded(Subconstruct): + r""" + Appends additional null bytes to achieve a length. + + Parsing first parses the subcon, then uses stream.tell() to measure how many bytes were read and consumes additional bytes accordingly. Building first builds the subcon, then uses stream.tell() to measure how many bytes were written and produces additional bytes accordingly. Size is same as `length`, but negative amount results in error. Note that subcon can actually be variable size, it is the eventual amount of bytes that is read or written during parsing or building that determines actual padding. + + :param length: integer or context lambda, length of the padding + :param subcon: Construct instance + :param pattern: optional, b-character, padding pattern, default is \\x00 + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises PaddingError: length is negative + :raises PaddingError: subcon read or written more than the length (would cause negative pad) + :raises PaddingError: pattern is not bytes of length 1 + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = Padded(4, Byte) + >>> d.build(255) + b'\xff\x00\x00\x00' + >>> d.parse(_) + 255 + >>> d.sizeof() + 4 + + >>> d = Padded(4, VarInt) + >>> d.build(1) + b'\x01\x00\x00\x00' + >>> d.build(70000) + b'\xf0\xa2\x04\x00' + """ + + def __init__(self, length, subcon, pattern=b"\x00"): + if not isinstance(pattern, bytes) or len(pattern) != 1: + raise PaddingError("pattern expected to be bytes of length 1") + super().__init__(subcon) + self.length = length + self.pattern = pattern + + def _parse(self, stream, context, path): + length = evaluate(self.length, context) + if length < 0: + raise PaddingError("length cannot be negative", path=path) + position1 = stream_tell(stream, path) + obj = self.subcon._parsereport(stream, context, path) + position2 = stream_tell(stream, path) + pad = length - (position2 - position1) + if pad < 0: + raise PaddingError("subcon parsed %d bytes but was allowed only %d" % (position2-position1, length), path=path) + stream_read(stream, pad, path) + return obj + + def _build(self, obj, stream, context, path): + length = evaluate(self.length, context) + if length < 0: + raise PaddingError("length cannot be negative", path=path) + position1 = stream_tell(stream, path) + buildret = self.subcon._build(obj, stream, context, path) + position2 = stream_tell(stream, path) + pad = length - (position2 - position1) + if pad < 0: + raise PaddingError("subcon build %d bytes but was allowed only %d" % (position2-position1, length), path=path) + stream_write(stream, self.pattern * pad, pad, path) + return buildret + + def _sizeof(self, context, path): + try: + length = evaluate(self.length, context) + if length < 0: + raise PaddingError("length cannot be negative", path=path) + return length + except (KeyError, AttributeError): + raise SizeofError("cannot calculate size, key not found in context", path=path) + + def _emitparse(self, code): + return f"({self.subcon._compileparse(code)}, io.read(({self.length})-({self.subcon.sizeof()}) ))[0]" + + def _emitbuild(self, code): + return f"({self.subcon._compilebuild(code)}, io.write({repr(self.pattern)}*(({self.length})-({self.subcon.sizeof()})) ))[0]" + + def _emitfulltype(self, ksy, bitwise): + return dict(size=self.length, type=self.subcon._compileprimitivetype(ksy, bitwise)) + + +class Aligned(Subconstruct): + r""" + Appends additional null bytes to achieve a length that is shortest multiple of a modulus. + + Note that subcon can actually be variable size, it is the eventual amount of bytes that is read or written during parsing or building that determines actual padding. + + Parsing first parses subcon, then consumes an amount of bytes to sum up to specified length, and discards it. Building first builds subcon, then writes specified pattern byte to sum up to specified length. Size is subcon size plus modulo remainder, unless SizeofError was raised. + + :param modulus: integer or context lambda, modulus to final length + :param subcon: Construct instance + :param pattern: optional, b-character, padding pattern, default is \\x00 + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises PaddingError: modulus was less than 2 + :raises PaddingError: pattern was not bytes (b-character) + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = Aligned(4, Int16ub) + >>> d.parse(b'\x00\x01\x00\x00') + 1 + >>> d.sizeof() + 4 + """ + + def __init__(self, modulus, subcon, pattern=b"\x00"): + if not isinstance(pattern, bytes) or len(pattern) != 1: + raise PaddingError("pattern expected to be bytes character") + super().__init__(subcon) + self.modulus = modulus + self.pattern = pattern + + def _parse(self, stream, context, path): + modulus = evaluate(self.modulus, context) + if modulus < 2: + raise PaddingError("expected modulo 2 or greater", path=path) + position1 = stream_tell(stream, path) + obj = self.subcon._parsereport(stream, context, path) + position2 = stream_tell(stream, path) + pad = -(position2 - position1) % modulus + stream_read(stream, pad, path) + return obj + + def _build(self, obj, stream, context, path): + modulus = evaluate(self.modulus, context) + if modulus < 2: + raise PaddingError("expected modulo 2 or greater", path=path) + position1 = stream_tell(stream, path) + buildret = self.subcon._build(obj, stream, context, path) + position2 = stream_tell(stream, path) + pad = -(position2 - position1) % modulus + stream_write(stream, self.pattern * pad, pad, path) + return buildret + + def _sizeof(self, context, path): + try: + modulus = evaluate(self.modulus, context) + if modulus < 2: + raise PaddingError("expected modulo 2 or greater", path=path) + subconlen = self.subcon._sizeof(context, path) + return subconlen + (-subconlen % modulus) + except (KeyError, AttributeError): + raise SizeofError("cannot calculate size, key not found in context", path=path) + + def _emitparse(self, code): + return f"({self.subcon._compileparse(code)}, io.read(-({self.subcon.sizeof()}) % ({self.modulus}) ))[0]" + + def _emitbuild(self, code): + return f"({self.subcon._compilebuild(code)}, io.write({repr(self.pattern)}*(-({self.subcon.sizeof()}) % ({self.modulus}))) )[0]" + + +def AlignedStruct(modulus, *subcons, **subconskw): + r""" + Makes a structure where each field is aligned to the same modulus (it is a struct of aligned fields, NOT an aligned struct). + + See :class:`~construct.core.Aligned` and :class:`~construct.core.Struct` for semantics and raisable exceptions. + + :param modulus: integer or context lambda, passed to each member + :param \*subcons: Construct instances, list of members, some can be anonymous + :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) + + Example:: + + >>> d = AlignedStruct(4, "a"/Int8ub, "b"/Int16ub) + >>> d.build(dict(a=0xFF,b=0xFFFF)) + b'\xff\x00\x00\x00\xff\xff\x00\x00' + """ + subcons = list(subcons) + list(k/v for k,v in subconskw.items()) + return Struct(*[sc.name / Aligned(modulus, sc) for sc in subcons]) + + +def BitStruct(*subcons, **subconskw): + r""" + Makes a structure inside a Bitwise. + + See :class:`~construct.core.Bitwise` and :class:`~construct.core.Struct` for semantics and raisable exceptions. + + :param \*subcons: Construct instances, list of members, some can be anonymous + :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) + + Example:: + + BitStruct <--> Bitwise(Struct(...)) + + >>> d = BitStruct( + ... "a" / Flag, + ... "b" / Nibble, + ... "c" / BitsInteger(10), + ... "d" / Padding(1), + ... ) + >>> d.parse(b"\xbe\xef") + Container(a=True, b=7, c=887, d=None) + >>> d.sizeof() + 2 + """ + return Bitwise(Struct(*subcons, **subconskw)) + + +#=============================================================================== +# stream manipulation +#=============================================================================== +class Pointer(Subconstruct): + r""" + Jumps in the stream forth and back for one field. + + Parsing and building seeks the stream to new location, processes subcon, and seeks back to original location. Size is defined as 0 but that does not mean no bytes are written into the stream. + + Offset can be positive, indicating a position from stream beginning forward, or negative, indicating a position from EOF backwards. Alternatively the offset can be interpreted as relative to the current stream position. + + :param offset: integer or context lambda, positive or negative + :param subcon: Construct instance + :param stream: None to use original stream (default), or context lambda to provide a different stream + :param relativeOffset: True to interpret the offset as relative to the current stream position + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises StreamError: stream is not seekable and tellable + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = Pointer(8, Bytes(1)) + >>> d.parse(b"abcdefghijkl") + b'i' + >>> d.build(b"Z") + b'\x00\x00\x00\x00\x00\x00\x00\x00Z' + """ + + def __init__(self, offset, subcon, stream=None, relativeOffset=False): + super().__init__(subcon) + self.offset = offset + self.stream = stream + self.relativeOffset = relativeOffset + + def _pointer_seek(self, stream, context, path): + offset = evaluate(self.offset, context) + stream = evaluate(self.stream, context) or stream + fallback = stream_tell(stream, path) + if self.relativeOffset: + stream_seek(stream, offset, 1, path) + else: + stream_seek(stream, offset, 2 if offset < 0 else 0, path) + + return fallback + + def _parse(self, stream, context, path): + fallback = self._pointer_seek(stream, context, path) + obj = self.subcon._parsereport(stream, context, path) + stream_seek(stream, fallback, 0, path) + return obj + + def _build(self, obj, stream, context, path): + fallback = self._pointer_seek(stream, context, path) + buildret = self.subcon._build(obj, stream, context, path) + stream_seek(stream, fallback, 0, path) + return buildret + + def _sizeof(self, context, path): + return 0 + + def _emitparse(self, code): + if self.relativeOffset: + func_name = "parse_relative_pointer" + seek_args = "1" + else: + func_name = "parse_pointer" + seek_args = "2 if offset < 0 else 0" + + code.append(f""" + def {func_name}(io, offset, func): + fallback = io.tell() + io.seek(offset, {seek_args}) + obj = func() + io.seek(fallback) + return obj + """) + return f"{func_name}(io, {self.offset}, lambda: {self.subcon._compileparse(code)})" + + def _emitbuild(self, code): + if self.relativeOffset: + func_name = "build_relative_pointer" + seek_args = "1" + else: + func_name = "build_pointer" + seek_args = "2 if offset < 0 else 0" + + code.append(f""" + def {func_name}(obj, io, offset, func): + fallback = io.tell() + io.seek(offset, {seek_args}) + ret = func() + io.seek(fallback) + return ret + """) + return f"{func_name}(obj, io, {self.offset}, lambda: {self.subcon._compilebuild(code)})" + + def _emitprimitivetype(self, ksy, bitwise): + offset = self.offset.__getfield__() if callable(self.offset) else self.offset + name = "instance_%s" % ksy.allocateId() + ksy.instances[name] = dict(pos=offset, **self.subcon._compilefulltype(ksy, bitwise)) + return name + + +class Peek(Subconstruct): + r""" + Peeks at the stream. + + Parsing sub-parses (and returns None if failed), then reverts stream to original position. Building does nothing (its NOT deferred). Size is defined as 0 because there is no building. + + This class is used in :class:`~construct.core.Union` class to parse each member. + + :param subcon: Construct instance + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises StreamError: stream is not seekable and tellable + + Example:: + + >>> d = Sequence(Peek(Int8ub), Peek(Int16ub)) + >>> d.parse(b"\x01\x02") + [1, 258] + >>> d.sizeof() + 0 + """ + + def __init__(self, subcon): + super().__init__(subcon) + self.flagbuildnone = True + + def _parse(self, stream, context, path): + fallback = stream_tell(stream, path) + try: + return self.subcon._parsereport(stream, context, path) + except ExplicitError: + raise + except ConstructError: + pass + finally: + stream_seek(stream, fallback, 0, path) + + def _build(self, obj, stream, context, path): + return obj + + def _sizeof(self, context, path): + return 0 + + def _emitparse(self, code): + code.append(""" + def parse_peek(io, func): + fallback = io.tell() + try: + return func() + except ExplicitError: + raise + except ConstructError: + pass + finally: + io.seek(fallback) + """) + return "parse_peek(io, lambda: %s)" % (self.subcon._compileparse(code),) + + def _emitbuild(self, code): + return "obj" + + +class OffsettedEnd(Subconstruct): + r""" + Parses all bytes in the stream till `EOF plus a negative endoffset` is reached. + + This is useful when GreedyBytes (or any other greedy construct) is followed by a fixed-size footer. + + Parsing determines the length of the stream and reads all bytes till EOF plus `endoffset` is reached, then defers to subcon using new BytesIO with said bytes. Building defers to subcon as-is. Size is undefined. + + :param endoffset: integer or context lambda, only negative offsets or zero are allowed + :param subcon: Construct instance + + :raises StreamError: could not read enough bytes + :raises StreamError: reads behind the stream (if endoffset is positive) + + Example:: + + >>> d = Struct( + ... "header" / Bytes(2), + ... "data" / OffsettedEnd(-2, GreedyBytes), + ... "footer" / Bytes(2), + ... ) + >>> d.parse(b"\x01\x02\x03\x04\x05\x06\x07") + Container(header=b'\x01\x02', data=b'\x03\x04\x05', footer=b'\x06\x07') + """ + + def __init__(self, endoffset, subcon): + super().__init__(subcon) + self.endoffset = endoffset + + def _parse(self, stream, context, path): + endoffset = evaluate(self.endoffset, context) + curpos = stream_tell(stream, path) + stream_seek(stream, 0, 2, path) + endpos = stream_tell(stream, path) + stream_seek(stream, curpos, 0, path) + length = endpos + endoffset - curpos + substream = BytesIOWithOffsets.from_reading(stream, length, path) + return self.subcon._parsereport(substream, context, path) + + def _build(self, obj, stream, context, path): + return self.subcon._build(obj, stream, context, path) + + def _sizeof(self, context, path): + raise SizeofError(path=path) + + +class Seek(Construct): + r""" + Seeks the stream. + + Parsing and building seek the stream to given location (and whence), and return stream.seek() return value. Size is not defined. + + .. seealso:: Analog :class:`~construct.core.Pointer` wrapper that has same side effect but also processes a subcon, and also seeks back. + + :param at: integer or context lambda, where to jump to + :param whence: optional, integer or context lambda, is the offset from beginning (0) or from current position (1) or from EOF (2), default is 0 + + :raises StreamError: stream is not seekable + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = (Seek(5) >> Byte) + >>> d.parse(b"01234x") + [5, 120] + + >>> d = (Bytes(10) >> Seek(5) >> Byte) + >>> d.build([b"0123456789", None, 255]) + b'01234\xff6789' + """ + + def __init__(self, at, whence=0): + super().__init__() + self.at = at + self.whence = whence + self.flagbuildnone = True + + def _parse(self, stream, context, path): + at = evaluate(self.at, context) + whence = evaluate(self.whence, context) + return stream_seek(stream, at, whence, path) + + def _build(self, obj, stream, context, path): + at = evaluate(self.at, context) + whence = evaluate(self.whence, context) + return stream_seek(stream, at, whence, path) + + def _sizeof(self, context, path): + raise SizeofError("Seek only moves the stream, size is not meaningful", path=path) + + def _emitparse(self, code): + return f"io.seek({self.at}, {self.whence})" + + def _emitbuild(self, code): + return f"io.seek({self.at}, {self.whence})" + + +@singleton +class Tell(Construct): + r""" + Tells the stream. + + Parsing and building return current stream offset using using stream.tell(). Size is defined as 0 because parsing and building does not consume or add into the stream. + + Tell is useful for adjusting relative offsets to absolute positions, or to measure sizes of Constructs. To get an absolute pointer, use a Tell plus a relative offset. To get a size, place two Tells and measure their difference using a Compute field. However, its recommended to use :class:`~construct.core.RawCopy` instead of manually extracting two positions and computing difference. + + :raises StreamError: stream is not tellable + + Example:: + + >>> d = Struct("num"/VarInt, "offset"/Tell) + >>> d.parse(b"X") + Container(num=88, offset=1) + >>> d.build(dict(num=88)) + b'X' + """ + + def __init__(self): + super().__init__() + self.flagbuildnone = True + + def _parse(self, stream, context, path): + return stream_tell(stream, path) + + def _build(self, obj, stream, context, path): + return stream_tell(stream, path) + + def _sizeof(self, context, path): + return 0 + + def _emitparse(self, code): + return "io.tell()" + + def _emitbuild(self, code): + return "io.tell()" + + +@singleton +class Pass(Construct): + r""" + No-op construct, useful as default cases for Switch and Enum. + + Parsing returns None. Building does nothing. Size is 0 by definition. + + Example:: + + >>> Pass.parse(b"") + None + >>> Pass.build(None) + b'' + >>> Pass.sizeof() + 0 + """ + + def __init__(self): + super().__init__() + self.flagbuildnone = True + + def _parse(self, stream, context, path): + return None + + def _build(self, obj, stream, context, path): + return obj + + def _sizeof(self, context, path): + return 0 + + def _emitparse(self, code): + return "None" + + def _emitbuild(self, code): + return "None" + + def _emitfulltype(self, ksy, bitwise): + return dict(size=0) + + +@singleton +class Terminated(Construct): + r""" + Asserts end of stream (EOF). You can use it to ensure no more unparsed data follows in the stream. + + Parsing checks if stream reached EOF, and raises TerminatedError if not. Building does nothing. Size is defined as 0 because parsing and building does not consume or add into the stream, as far as other constructs see it. + + :raises TerminatedError: stream not at EOF when parsing + + Example:: + + >>> Terminated.parse(b"") + None + >>> Terminated.parse(b"remaining") + construct.core.TerminatedError: expected end of stream + """ + + def __init__(self): + super().__init__() + self.flagbuildnone = True + + def _parse(self, stream, context, path): + if stream.read(1): + raise TerminatedError("expected end of stream", path=path) + + def _build(self, obj, stream, context, path): + return obj + + def _sizeof(self, context, path): + raise SizeofError(path=path) + + +#=============================================================================== +# tunneling and byte/bit swapping +#=============================================================================== +class RawCopy(Subconstruct): + r""" + Used to obtain byte representation of a field (aside of object value). + + Returns a dict containing both parsed subcon value, the raw bytes that were consumed by subcon, starting and ending offset in the stream, and amount in bytes. Builds either from raw bytes representation or a value used by subcon. Size is same as subcon. + + Object is a dictionary with either "data" or "value" keys, or both. + + When building, if both the "value" and "data" keys are present, then the "data" key is used and the "value" key is ignored. This is undesirable in the case that you parse some data for the purpose of modifying it and writing it back; in this case, delete the "data" key when modifying the "value" key to correctly rebuild the former. + + :param subcon: Construct instance + + :raises StreamError: stream is not seekable and tellable + :raises RawCopyError: building and neither data or value was given + :raises StringError: building from non-bytes value, perhaps unicode + + Example:: + + >>> d = RawCopy(Byte) + >>> d.parse(b"\xff") + Container(data=b'\xff', value=255, offset1=0, offset2=1, length=1) + >>> d.build(dict(data=b"\xff")) + '\xff' + >>> d.build(dict(value=255)) + '\xff' + """ + + def _parse(self, stream, context, path): + offset1 = stream_tell(stream, path) + obj = self.subcon._parsereport(stream, context, path) + offset2 = stream_tell(stream, path) + stream_seek(stream, offset1, 0, path) + data = stream_read(stream, offset2-offset1, path) + return Container(data=data, value=obj, offset1=offset1, offset2=offset2, length=(offset2-offset1)) + + def _build(self, obj, stream, context, path): + if obj is None and self.subcon.flagbuildnone: + obj = dict(value=None) + if 'data' in obj: + data = obj['data'] + offset1 = stream_tell(stream, path) + stream_write(stream, data, len(data), path) + offset2 = stream_tell(stream, path) + return Container(obj, data=data, offset1=offset1, offset2=offset2, length=(offset2-offset1)) + if 'value' in obj: + value = obj['value'] + offset1 = stream_tell(stream, path) + buildret = self.subcon._build(value, stream, context, path) + value = value if buildret is None else buildret + offset2 = stream_tell(stream, path) + stream_seek(stream, offset1, 0, path) + data = stream_read(stream, offset2-offset1, path) + return Container(obj, data=data, value=value, offset1=offset1, offset2=offset2, length=(offset2-offset1)) + raise RawCopyError('RawCopy cannot build, both data and value keys are missing', path=path) + + +def ByteSwapped(subcon): + r""" + Swaps the byte order within boundaries of given subcon. Requires a fixed sized subcon. + + :param subcon: Construct instance, subcon on top of byte swapped bytes + + :raises SizeofError: ctor or compiler could not compute subcon size + + See :class:`~construct.core.Transformed` and :class:`~construct.core.Restreamed` for raisable exceptions. + + Example:: + + Int24ul <--> ByteSwapped(Int24ub) <--> BytesInteger(3, swapped=True) <--> ByteSwapped(BytesInteger(3)) + """ + + size = subcon.sizeof() + return Transformed(subcon, swapbytes, size, swapbytes, size) + + +def BitsSwapped(subcon): + r""" + Swaps the bit order within each byte within boundaries of given subcon. Does NOT require a fixed sized subcon. + + :param subcon: Construct instance, subcon on top of bit swapped bytes + + :raises SizeofError: compiler could not compute subcon size + + See :class:`~construct.core.Transformed` and :class:`~construct.core.Restreamed` for raisable exceptions. + + Example:: + + >>> d = Bitwise(Bytes(8)) + >>> d.parse(b"\x01") + '\x00\x00\x00\x00\x00\x00\x00\x01' + >>>> BitsSwapped(d).parse(b"\x01") + '\x01\x00\x00\x00\x00\x00\x00\x00' + """ + + try: + size = subcon.sizeof() + return Transformed(subcon, swapbitsinbytes, size, swapbitsinbytes, size) + except SizeofError: + return Restreamed(subcon, swapbitsinbytes, 1, swapbitsinbytes, 1, lambda n: n) + + +class Prefixed(Subconstruct): + r""" + Prefixes a field with byte count. + + Parses the length field. Then reads that amount of bytes, and parses subcon using only those bytes. Constructs that consume entire remaining stream are constrained to consuming only the specified amount of bytes (a substream). When building, data gets prefixed by its length. Optionally, length field can include its own size. Size is the sum of both fields sizes, unless either raises SizeofError. + + Analog to :class:`~construct.core.PrefixedArray` which prefixes with an element count, instead of byte count. Semantics is similar but implementation is different. + + :class:`~construct.core.VarInt` is recommended for new protocols, as it is more compact and never overflows. + + :param lengthfield: Construct instance, field used for storing the length + :param subcon: Construct instance, subcon used for storing the value + :param includelength: optional, bool, whether length field should include its own size, default is False + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + + Example:: + + >>> d = Prefixed(VarInt, GreedyRange(Int32ul)) + >>> d.parse(b"\x08abcdefgh") + [1684234849, 1751606885] + + >>> d = PrefixedArray(VarInt, Int32ul) + >>> d.parse(b"\x02abcdefgh") + [1684234849, 1751606885] + """ + + def __init__(self, lengthfield, subcon, includelength=False): + super().__init__(subcon) + self.lengthfield = lengthfield + self.includelength = includelength + + def _parse(self, stream, context, path): + length = self.lengthfield._parsereport(stream, context, path) + if self.includelength: + length -= self.lengthfield._sizeof(context, path) + substream = BytesIOWithOffsets.from_reading(stream, length, path) + return self.subcon._parsereport(substream, context, path) + + def _build(self, obj, stream, context, path): + stream2 = io.BytesIO() + buildret = self.subcon._build(obj, stream2, context, path) + data = stream2.getvalue() + length = len(data) + if self.includelength: + length += self.lengthfield._sizeof(context, path) + self.lengthfield._build(length, stream, context, path) + stream_write(stream, data, len(data), path) + return buildret + + def _sizeof(self, context, path): + return self.lengthfield._sizeof(context, path) + self.subcon._sizeof(context, path) + + def _actualsize(self, stream, context, path): + position1 = stream_tell(stream, path) + length = self.lengthfield._parse(stream, context, path) + if self.includelength: + length -= self.lengthfield._sizeof(context, path) + position2 = stream_tell(stream, path) + return (position2-position1) + length + + def _emitparse(self, code): + sub = self.lengthfield.sizeof() if self.includelength else 0 + return f"restream(io.read(({self.lengthfield._compileparse(code)})-({sub})), lambda io: ({self.subcon._compileparse(code)}))" + + def _emitseq(self, ksy, bitwise): + return [ + dict(id="lengthfield", type=self.lengthfield._compileprimitivetype(ksy, bitwise)), + dict(id="data", size="lengthfield", type=self.subcon._compileprimitivetype(ksy, bitwise)), + ] + + +def PrefixedArray(countfield, subcon): + r""" + Prefixes an array with item count (as opposed to prefixed by byte count, see :class:`~construct.core.Prefixed`). + + :class:`~construct.core.VarInt` is recommended for new protocols, as it is more compact and never overflows. + + :param countfield: Construct instance, field used for storing the element count + :param subcon: Construct instance, subcon used for storing each element + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises RangeError: consumed or produced too little elements + + Example:: + + >>> d = Prefixed(VarInt, GreedyRange(Int32ul)) + >>> d.parse(b"\x08abcdefgh") + [1684234849, 1751606885] + + >>> d = PrefixedArray(VarInt, Int32ul) + >>> d.parse(b"\x02abcdefgh") + [1684234849, 1751606885] + """ + macro = FocusedSeq("items", + "count" / Rebuild(countfield, len_(this.items)), + "items" / subcon[this.count], + ) + + def _emitparse(code): + return "ListContainer((%s) for i in range(%s))" % (subcon._compileparse(code), countfield._compileparse(code), ) + macro._emitparse = _emitparse + + def _emitbuild(code): + return f"(reuse(len(obj), lambda obj: {countfield._compilebuild(code)}), list({subcon._compilebuild(code)} for obj in obj), obj)[2]" + macro._emitbuild = _emitbuild + + def _actualsize(self, stream, context, path): + position1 = stream_tell(stream, path) + count = countfield._parse(stream, context, path) + position2 = stream_tell(stream, path) + return (position2-position1) + count * subcon._sizeof(context, path) + macro._actualsize = _actualsize + + def _emitseq(ksy, bitwise): + return [ + dict(id="countfield", type=countfield._compileprimitivetype(ksy, bitwise)), + dict(id="data", type=subcon._compileprimitivetype(ksy, bitwise), repeat="expr", repeat_expr="countfield"), + ] + macro._emitseq = _emitseq + + return macro + + +class FixedSized(Subconstruct): + r""" + Restricts parsing to specified amount of bytes. + + Parsing reads `length` bytes, then defers to subcon using new BytesIO with said bytes. Building builds the subcon using new BytesIO, then writes said data and additional null bytes accordingly. Size is same as `length`, although negative amount raises an error. + + :param length: integer or context lambda, total amount of bytes (both data and padding) + :param subcon: Construct instance + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises PaddingError: length is negative + :raises PaddingError: subcon written more bytes than entire length (negative padding) + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = FixedSized(10, Byte) + >>> d.parse(b'\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00') + 255 + >>> d.build(255) + b'\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00' + >>> d.sizeof() + 10 + """ + + def __init__(self, length, subcon): + super().__init__(subcon) + self.length = length + + def _parse(self, stream, context, path): + length = evaluate(self.length, context) + if length < 0: + raise PaddingError("length cannot be negative", path=path) + substream = BytesIOWithOffsets.from_reading(stream, length, path) + return self.subcon._parsereport(substream, context, path) + + def _build(self, obj, stream, context, path): + length = evaluate(self.length, context) + if length < 0: + raise PaddingError("length cannot be negative", path=path) + stream2 = io.BytesIO() + buildret = self.subcon._build(obj, stream2, context, path) + data = stream2.getvalue() + pad = length - len(data) + if pad < 0: + raise PaddingError("subcon build %d bytes but was allowed only %d" % (len(data), length), path=path) + stream_write(stream, data, len(data), path) + stream_write(stream, bytes(pad), pad, path) + return buildret + + def _sizeof(self, context, path): + length = evaluate(self.length, context) + if length < 0: + raise PaddingError("length cannot be negative", path=path) + return length + + def _emitparse(self, code): + return f"restream(io.read({self.length}), lambda io: ({self.subcon._compileparse(code)}))" + + def _emitfulltype(self, ksy, bitwise): + return dict(size=repr(self.length).replace("this.",""), **self.subcon._compilefulltype(ksy, bitwise)) + + +class NullTerminated(Subconstruct): + r""" + Restricts parsing to bytes preceding a null byte. + + Parsing reads one byte at a time and accumulates it with previous bytes. When term was found, (by default) consumes but discards the term. When EOF was found, (by default) raises same StreamError exception. Then subcon is parsed using new BytesIO made with said data. Building builds the subcon and then writes the term. Size is undefined. + + The term can be multiple bytes, to support string classes with UTF16/32 encodings for example. Be warned however: as reported in Issue 1046, the data read must be a multiple of the term length and the term must start at a unit boundary, otherwise strange things happen when parsing. + + :param subcon: Construct instance + :param term: optional, bytes, terminator byte-string, default is \x00 single null byte + :param include: optional, bool, if to include terminator in resulting data, default is False + :param consume: optional, bool, if to consume terminator or leave it in the stream, default is True + :param require: optional, bool, if EOF results in failure or not, default is True + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises StreamError: encountered EOF but require is not disabled + :raises PaddingError: terminator is less than 1 bytes in length + + Example:: + + >>> d = NullTerminated(Byte) + >>> d.parse(b'\xff\x00') + 255 + >>> d.build(255) + b'\xff\x00' + """ + + def __init__(self, subcon, term=b"\x00", include=False, consume=True, require=True): + super().__init__(subcon) + self.term = term + self.include = include + self.consume = consume + self.require = require + + def _parse(self, stream, context, path): + term = self.term + unit = len(term) + if unit < 1: + raise PaddingError("NullTerminated term must be at least 1 byte", path=path) + data = b'' + offset = stream_tell(stream, path) + while True: + try: + b = stream_read(stream, unit, path) + except StreamError: + if self.require: + raise + else: + break + if b == term: + if self.include: + data += b + if not self.consume: + stream_seek(stream, -unit, 1, path) + break + data += b + substream = BytesIOWithOffsets(data, stream, offset) + return self.subcon._parsereport(substream, context, path) + + def _build(self, obj, stream, context, path): + buildret = self.subcon._build(obj, stream, context, path) + stream_write(stream, self.term, len(self.term), path) + return buildret + + def _sizeof(self, context, path): + raise SizeofError(path=path) + + def _emitfulltype(self, ksy, bitwise): + if len(self.term) > 1: + raise NotImplementedError + return dict(terminator=byte2int(self.term), include=self.include, consume=self.consume, eos_error=self.require, **self.subcon._compilefulltype(ksy, bitwise)) + + +class NullStripped(Subconstruct): + r""" + Restricts parsing to bytes except padding left of EOF. + + Parsing reads entire stream, then strips the data from right to left of null bytes, then parses subcon using new BytesIO made of said data. Building defers to subcon as-is. Size is undefined, because it reads till EOF. + + The pad can be multiple bytes, to support string classes with UTF16/32 encodings. + + :param subcon: Construct instance + :param pad: optional, bytes, padding byte-string, default is \x00 single null byte + + :raises PaddingError: pad is less than 1 bytes in length + + Example:: + + >>> d = NullStripped(Byte) + >>> d.parse(b'\xff\x00\x00') + 255 + >>> d.build(255) + b'\xff' + """ + + def __init__(self, subcon, pad=b"\x00"): + super().__init__(subcon) + self.pad = pad + + def _parse(self, stream, context, path): + pad = self.pad + unit = len(pad) + if unit < 1: + raise PaddingError("NullStripped pad must be at least 1 byte", path=path) + offset = stream_tell(stream, path) + data = stream_read_entire(stream, path) + if unit == 1: + data = data.rstrip(pad) + else: + tailunit = len(data) % unit + end = len(data) + if tailunit and data[-tailunit:] == pad[:tailunit]: + end -= tailunit + while end-unit >= 0 and data[end-unit:end] == pad: + end -= unit + data = data[:end] + substream = BytesIOWithOffsets(data, stream, offset) + return self.subcon._parsereport(substream, context, path) + + def _build(self, obj, stream, context, path): + return self.subcon._build(obj, stream, context, path) + + def _sizeof(self, context, path): + raise SizeofError(path=path) + + def _emitfulltype(self, ksy, bitwise): + if len(self.pad) > 1: + raise NotImplementedError + return dict(pad_right=byte2int(self.pad), **self.subcon._compilefulltype(ksy, bitwise)) + + +class RestreamData(Subconstruct): + r""" + Parses a field on external data (but does not build). + + Parsing defers to subcon, but provides it a separate BytesIO stream based on data provided by datafunc (a bytes literal or another BytesIO stream or Construct instances that returns bytes or context lambda). Building does nothing. Size is 0 because as far as other fields see it, this field does not produce or consume any bytes from the stream. + + :param datafunc: bytes or BytesIO or Construct instance (that parses into bytes) or context lambda, provides data for subcon to parse from + :param subcon: Construct instance + + Can propagate any exception from the lambdas, possibly non-ConstructError. + + Example:: + + >>> d = RestreamData(b"\x01", Int8ub) + >>> d.parse(b"") + 1 + >>> d.build(0) + b'' + + >>> d = RestreamData(NullTerminated(GreedyBytes), Int16ub) + >>> d.parse(b"\x01\x02\x00") + 0x0102 + >>> d = RestreamData(FixedSized(2, GreedyBytes), Int16ub) + >>> d.parse(b"\x01\x02\x00") + 0x0102 + """ + + def __init__(self, datafunc, subcon): + super().__init__(subcon) + self.datafunc = datafunc + self.flagbuildnone = True + + def _parse(self, stream, context, path): + data = evaluate(self.datafunc, context) + if isinstance(data, bytes): + stream2 = io.BytesIO(data) + if isinstance(data, io.BytesIO): + stream2 = data + if isinstance(data, Construct): + stream2 = io.BytesIO(data._parsereport(stream, context, path)) + return self.subcon._parsereport(stream2, context, path) + + def _build(self, obj, stream, context, path): + return obj + + def _sizeof(self, context, path): + return 0 + + def _emitparse(self, code): + return "restream(%r, lambda io: %s)" % (self.datafunc, self.subcon._compileparse(code), ) + + +class Transformed(Subconstruct): + r""" + Transforms bytes between the underlying stream and the (fixed-sized) subcon. + + Parsing reads a specified amount (or till EOF), processes data using a bytes-to-bytes decoding function, then parses subcon using those data. Building does build subcon into separate bytes, then processes it using encoding bytes-to-bytes function, then writes those data into main stream. Size is reported as `decodeamount` or `encodeamount` if those are equal, otherwise its SizeofError. + + Used internally to implement :class:`~construct.core.Bitwise` :class:`~construct.core.Bytewise` :class:`~construct.core.ByteSwapped` :class:`~construct.core.BitsSwapped` . + + Possible use-cases include encryption, obfuscation, byte-level encoding. + + .. warning:: Remember that subcon must consume (or produce) an amount of bytes that is same as `decodeamount` (or `encodeamount`). + + .. warning:: Do NOT use seeking/telling classes inside Transformed context. + + :param subcon: Construct instance + :param decodefunc: bytes-to-bytes function, applied before parsing subcon + :param decodeamount: integer, amount of bytes to read + :param encodefunc: bytes-to-bytes function, applied after building subcon + :param encodeamount: integer, amount of bytes to write + + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises StreamError: subcon build and encoder transformed more or less than `encodeamount` bytes, if amount is specified + :raises StringError: building from non-bytes value, perhaps unicode + + Can propagate any exception from the lambdas, possibly non-ConstructError. + + Example:: + + >>> d = Transformed(Bytes(16), bytes2bits, 2, bits2bytes, 2) + >>> d.parse(b"\x00\x00") + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + + >>> d = Transformed(GreedyBytes, bytes2bits, None, bits2bytes, None) + >>> d.parse(b"\x00\x00") + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + """ + + def __init__(self, subcon, decodefunc, decodeamount, encodefunc, encodeamount): + super().__init__(subcon) + self.decodefunc = decodefunc + self.decodeamount = decodeamount + self.encodefunc = encodefunc + self.encodeamount = encodeamount + + def _parse(self, stream, context, path): + if isinstance(self.decodeamount, type(None)): + data = stream_read_entire(stream, path) + if isinstance(self.decodeamount, int): + data = stream_read(stream, self.decodeamount, path) + data = self.decodefunc(data) + return self.subcon._parsereport(io.BytesIO(data), context, path) + + def _build(self, obj, stream, context, path): + stream2 = io.BytesIO() + buildret = self.subcon._build(obj, stream2, context, path) + data = stream2.getvalue() + data = self.encodefunc(data) + if isinstance(self.encodeamount, int): + if len(data) != self.encodeamount: + raise StreamError("encoding transformation produced wrong amount of bytes, %s instead of expected %s" % (len(data), self.encodeamount, ), path=path) + stream_write(stream, data, len(data), path) + return buildret + + def _sizeof(self, context, path): + if self.decodeamount is None or self.encodeamount is None: + raise SizeofError(path=path) + if self.decodeamount == self.encodeamount: + return self.encodeamount + raise SizeofError(path=path) + + +class Restreamed(Subconstruct): + r""" + Transforms bytes between the underlying stream and the (variable-sized) subcon. + + Used internally to implement :class:`~construct.core.Bitwise` :class:`~construct.core.Bytewise` :class:`~construct.core.ByteSwapped` :class:`~construct.core.BitsSwapped` . + + .. warning:: Remember that subcon must consume or produce an amount of bytes that is a multiple of encoding or decoding units. For example, in a Bitwise context you should process a multiple of 8 bits or the stream will fail during parsing/building. + + .. warning:: Do NOT use seeking/telling classes inside Restreamed context. + + :param subcon: Construct instance + :param decoder: bytes-to-bytes function, used on data chunks when parsing + :param decoderunit: integer, decoder takes chunks of this size + :param encoder: bytes-to-bytes function, used on data chunks when building + :param encoderunit: integer, encoder takes chunks of this size + :param sizecomputer: function that computes amount of bytes outputed + + Can propagate any exception from the lambda, possibly non-ConstructError. + Can also raise arbitrary exceptions in RestreamedBytesIO implementation. + + Example:: + + Bitwise <--> Restreamed(subcon, bits2bytes, 8, bytes2bits, 1, lambda n: n//8) + Bytewise <--> Restreamed(subcon, bytes2bits, 1, bits2bytes, 8, lambda n: n*8) + """ + + def __init__(self, subcon, decoder, decoderunit, encoder, encoderunit, sizecomputer): + super().__init__(subcon) + self.decoder = decoder + self.decoderunit = decoderunit + self.encoder = encoder + self.encoderunit = encoderunit + self.sizecomputer = sizecomputer + + def _parse(self, stream, context, path): + stream2 = RestreamedBytesIO(stream, self.decoder, self.decoderunit, self.encoder, self.encoderunit) + obj = self.subcon._parsereport(stream2, context, path) + stream2.close() + return obj + + def _build(self, obj, stream, context, path): + stream2 = RestreamedBytesIO(stream, self.decoder, self.decoderunit, self.encoder, self.encoderunit) + buildret = self.subcon._build(obj, stream2, context, path) + stream2.close() + return obj + + def _sizeof(self, context, path): + if self.sizecomputer is None: + raise SizeofError("Restreamed cannot calculate size without a sizecomputer", path=path) + else: + return self.sizecomputer(self.subcon._sizeof(context, path)) + + +class ProcessXor(Subconstruct): + r""" + Transforms bytes between the underlying stream and the subcon. + + Used internally by KaitaiStruct compiler, when translating `process: xor` tags. + + Parsing reads till EOF, xors data with the pad, then feeds that data into subcon. Building first builds the subcon into separate BytesIO stream, xors data with the pad, then writes that data into the main stream. Size is the same as subcon, unless it raises SizeofError. + + :param padfunc: integer or bytes or context lambda, single or multiple bytes to xor data with + :param subcon: Construct instance - Subclass authors should not override the external methods. Instead, - another API is available: + :raises StringError: pad is not integer or bytes - * _parse() - * _build() - * _sizeof() + Can propagate any exception from the lambda, possibly non-ConstructError. - There is also a flag API: + Example:: - * _set_flag() - * _clear_flag() - * _inherit_flags() - * _is_flag() + >>> d = ProcessXor(0xf0 or b'\xf0', Int16ub) + >>> d.parse(b"\x00\xff") + 0xf00f + >>> d.sizeof() + 2 + """ - And stateful copying: + def __init__(self, padfunc, subcon): + super().__init__(subcon) + self.padfunc = padfunc + + def _parse(self, stream, context, path): + pad = evaluate(self.padfunc, context) + if not isinstance(pad, (int, bytes)): + raise StringError("ProcessXor needs integer or bytes pad", path=path) + if isinstance(pad, bytes) and len(pad) == 1: + pad = byte2int(pad) + offset = stream_tell(stream, path) + data = stream_read_entire(stream, path) + if isinstance(pad, int): + if not (pad == 0): + data = bytes((b ^ pad) for b in data) + if isinstance(pad, bytes): + if not (len(pad) <= 64 and pad == bytes(len(pad))): + data = bytes((b ^ p) for b,p in zip(data, itertools.cycle(pad))) + substream = BytesIOWithOffsets(data, stream, offset) + return self.subcon._parsereport(substream, context, path) + + def _build(self, obj, stream, context, path): + pad = evaluate(self.padfunc, context) + if not isinstance(pad, (int, bytes)): + raise StringError("ProcessXor needs integer or bytes pad", path=path) + if isinstance(pad, bytes) and len(pad) == 1: + pad = byte2int(pad) + stream2 = io.BytesIO() + buildret = self.subcon._build(obj, stream2, context, path) + data = stream2.getvalue() + if isinstance(pad, int): + if not (pad == 0): + data = bytes((b ^ pad) for b in data) + if isinstance(pad, bytes): + if not (len(pad) <= 64 and pad == bytes(len(pad))): + data = bytes((b ^ p) for b,p in zip(data, itertools.cycle(pad))) + stream_write(stream, data, len(data), path) + return buildret + + def _sizeof(self, context, path): + return self.subcon._sizeof(context, path) + + +class ProcessRotateLeft(Subconstruct): + r""" + Transforms bytes between the underlying stream and the subcon. + + Used internally by KaitaiStruct compiler, when translating `process: rol/ror` tags. + + Parsing reads till EOF, rotates (shifts) the data *left* by amount in bits, then feeds that data into subcon. Building first builds the subcon into separate BytesIO stream, rotates *right* by negating amount, then writes that data into the main stream. Size is the same as subcon, unless it raises SizeofError. + + :param amount: integer or context lambda, shift by this amount in bits, treated modulo (group x 8) + :param group: integer or context lambda, shifting is applied to chunks of this size in bytes + :param subcon: Construct instance + + :raises RotationError: group is less than 1 + :raises RotationError: data length is not a multiple of group size + + Can propagate any exception from the lambda, possibly non-ConstructError. + + Example:: + + >>> d = ProcessRotateLeft(4, 1, Int16ub) + >>> d.parse(b'\x0f\xf0') + 0xf00f + >>> d = ProcessRotateLeft(4, 2, Int16ub) + >>> d.parse(b'\x0f\xf0') + 0xff00 + >>> d.sizeof() + 2 + """ - * __getstate__() - * __setstate__() + # formula taken from: http://stackoverflow.com/a/812039 + precomputed_single_rotations = {amount: [(i << amount) & 0xff | (i >> (8-amount)) for i in range(256)] for amount in range(1,8)} - Attributes and Inheritance - ========================== + def __init__(self, amount, group, subcon): + super().__init__(subcon) + self.amount = amount + self.group = group - All constructs have a name and flags. The name is used for naming struct - members and context dictionaries. Note that the name can either be a - string, or None if the name is not needed. A single underscore ("_") is a - reserved name, and so are names starting with a less-than character ("<"). - The name should be descriptive, short, and valid as a Python identifier, - although these rules are not enforced. + def _parse(self, stream, context, path): + amount = evaluate(self.amount, context) + group = evaluate(self.group, context) + if group < 1: + raise RotationError("group size must be at least 1 to be valid", path=path) - The flags specify additional behavioral information about this construct. - Flags are used by enclosing constructs to determine a proper course of - action. Flags are inherited by default, from inner subconstructs to outer - constructs. The enclosing construct may set new flags or clear existing - ones, as necessary. + amount = amount % (group * 8) + amount_bytes = amount // 8 + data = stream_read_entire(stream, path) - For example, if FLAG_COPY_CONTEXT is set, repeaters will pass a copy of - the context for each iteration, which is necessary for OnDemand parsing. - """ + if len(data) % group != 0: + raise RotationError("data length must be a multiple of group size", path=path) - FLAG_COPY_CONTEXT = 0x0001 - FLAG_DYNAMIC = 0x0002 - FLAG_EMBED = 0x0004 - FLAG_NESTING = 0x0008 + if amount == 0: + pass - __slots__ = ["name", "conflags"] - def __init__(self, name, flags = 0): - if name is not None: - if type(name) is not str: - raise TypeError("name must be a string or None", name) - if name == "_" or name.startswith("<"): - raise ValueError("reserved name", name) - self.name = name - self.conflags = flags + elif group == 1: + translate = ProcessRotateLeft.precomputed_single_rotations[amount] + data = bytes(translate[a] for a in data) - def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self.name) + elif amount % 8 == 0: + indices = [(i + amount_bytes) % group for i in range(group)] + data = bytes(data[i+k] for i in range(0,len(data),group) for k in indices) - def _set_flag(self, flag): - """ - Set the given flag or flags. + else: + amount1 = amount % 8 + amount2 = 8 - amount1 + indices_pairs = [ ((i+amount_bytes) % group, (i+1+amount_bytes) % group) for i in range(group)] + data = bytes((data[i+k1] << amount1) & 0xff | (data[i+k2] >> amount2) for i in range(0,len(data),group) for k1,k2 in indices_pairs) - :param int flag: flag to set; may be OR'd combination of flags - """ + return self.subcon._parsereport(io.BytesIO(data), context, path) - self.conflags |= flag + def _build(self, obj, stream, context, path): + amount = evaluate(self.amount, context) + group = evaluate(self.group, context) + if group < 1: + raise RotationError("group size must be at least 1 to be valid", path=path) - def _clear_flag(self, flag): - """ - Clear the given flag or flags. + amount = -amount % (group * 8) + amount_bytes = amount // 8 + stream2 = io.BytesIO() + buildret = self.subcon._build(obj, stream2, context, path) + data = stream2.getvalue() - :param int flag: flag to clear; may be OR'd combination of flags - """ + if len(data) % group != 0: + raise RotationError("data length must be a multiple of group size", path=path) - self.conflags &= ~flag + if amount == 0: + pass - def _inherit_flags(self, *subcons): - """ - Pull flags from subconstructs. - """ + elif group == 1: + translate = ProcessRotateLeft.precomputed_single_rotations[amount] + data = bytes(translate[a] for a in data) - for sc in subcons: - self._set_flag(sc.conflags) + elif amount % 8 == 0: + indices = [(i + amount_bytes) % group for i in range(group)] + data = bytes(data[i+k] for i in range(0,len(data),group) for k in indices) - def _is_flag(self, flag): - """ - Check whether a given flag is set. + else: + amount1 = amount % 8 + amount2 = 8 - amount1 + indices_pairs = [ ((i+amount_bytes) % group, (i+1+amount_bytes) % group) for i in range(group)] + data = bytes((data[i+k1] << amount1) & 0xff | (data[i+k2] >> amount2) for i in range(0,len(data),group) for k1,k2 in indices_pairs) - :param int flag: flag to check - """ + stream_write(stream, data, len(data), path) + return buildret - return bool(self.conflags & flag) + def _sizeof(self, context, path): + return self.subcon._sizeof(context, path) - def __getstate__(self): - """ - Obtain a dictionary representing this construct's state. - """ - attrs = {} - if hasattr(self, "__dict__"): - attrs.update(self.__dict__) - slots = [] - c = self.__class__ - while c is not None: - if hasattr(c, "__slots__"): - slots.extend(c.__slots__) - c = c.__base__ - for name in slots: - if hasattr(self, name): - attrs[name] = getattr(self, name) - return attrs +class Checksum(Construct): + r""" + Field that is build or validated by a hash of a given byte range. Usually used with :class:`~construct.core.RawCopy` . - def __setstate__(self, attrs): - """ - Set this construct's state to a given state. - """ + Parsing compares parsed subcon `checksumfield` with a context entry provided by `bytesfunc` and transformed by `hashfunc`. Building fetches the contect entry, transforms it, then writes is using subcon. Size is same as subcon. - for name, value in attrs.iteritems(): - setattr(self, name, value) + :param checksumfield: a subcon field that reads the checksum, usually Bytes(int) + :param hashfunc: function that takes bytes and returns whatever checksumfield takes when building, usually from hashlib module + :param bytesfunc: context lambda that returns bytes (or object) to be hashed, usually like this.rawcopy1.data - def __copy__(self): - """returns a copy of this construct""" - self2 = object.__new__(self.__class__) - self2.__setstate__(self.__getstate__()) - return self2 + :raises ChecksumError: parsing and actual checksum does not match actual data - def parse(self, data): - """ - Parse an in-memory buffer. + Can propagate any exception from the lambdas, possibly non-ConstructError. - Strings, buffers, memoryviews, and other complete buffers can be - parsed with this method. - """ + Example:: - return self.parse_stream(StringIO(data)) + import hashlib + d = Struct( + "fields" / RawCopy(Struct( + Padding(1000), + )), + "checksum" / Checksum(Bytes(64), + lambda data: hashlib.sha512(data).digest(), + this.fields.data), + ) + d.build(dict(fields=dict(value={}))) + + :: + + import hashlib + d = Struct( + "offset" / Tell, + "checksum" / Padding(64), + "fields" / RawCopy(Struct( + Padding(1000), + )), + "checksum" / Pointer(this.offset, Checksum(Bytes(64), + lambda data: hashlib.sha512(data).digest(), + this.fields.data)), + ) + d.build(dict(fields=dict(value={}))) + """ - def parse_stream(self, stream): - """ - Parse a stream. + def __init__(self, checksumfield, hashfunc, bytesfunc): + super().__init__() + self.checksumfield = checksumfield + self.hashfunc = hashfunc + self.bytesfunc = bytesfunc + self.flagbuildnone = True + + def _parse(self, stream, context, path): + hash1 = self.checksumfield._parsereport(stream, context, path) + hash2 = self.hashfunc(self.bytesfunc(context)) + if hash1 != hash2: + raise ChecksumError( + "wrong checksum, read %r, computed %r" % ( + hash1 if not isinstance(hash1, bytes) else binascii.hexlify(hash1), + hash2 if not isinstance(hash2, bytes) else binascii.hexlify(hash2), ), + path=path + ) + return hash1 + + def _build(self, obj, stream, context, path): + hash2 = self.hashfunc(self.bytesfunc(context)) + self.checksumfield._build(hash2, stream, context, path) + return hash2 + + def _sizeof(self, context, path): + return self.checksumfield._sizeof(context, path) + + +class Compressed(Tunnel): + r""" + Compresses and decompresses underlying stream when processing subcon. When parsing, entire stream is consumed. When building, it puts compressed bytes without marking the end. This construct should be used with :class:`~construct.core.Prefixed` . + + Parsing and building transforms all bytes using a specified codec. Since data is processed until EOF, it behaves similar to `GreedyBytes`. Size is undefined. + + :param subcon: Construct instance, subcon used for storing the value + :param encoding: string, any of module names like zlib/gzip/bzip2/lzma, otherwise any of codecs module bytes<->bytes encodings, each codec usually requires some Python version + :param level: optional, integer between 0..9, although lzma discards it, some encoders allow different compression levels + + :raises ImportError: needed module could not be imported by ctor + :raises StreamError: stream failed when reading until EOF + + Example:: + + >>> d = Prefixed(VarInt, Compressed(GreedyBytes, "zlib")) + >>> d.build(bytes(100)) + b'\x0cx\x9cc`\xa0=\x00\x00\x00d\x00\x01' + >>> len(_) + 13 + """ + + def __init__(self, subcon, encoding, level=None): + super().__init__(subcon) + self.encoding = encoding + self.level = level + if self.encoding == "zlib": + import zlib + self.lib = zlib + elif self.encoding == "gzip": + import gzip + self.lib = gzip + elif self.encoding == "bzip2": + import bz2 + self.lib = bz2 + elif self.encoding == "lzma": + import lzma + self.lib = lzma + else: + import codecs + self.lib = codecs + + def _decode(self, data, context, path): + if self.encoding in ("zlib", "gzip", "bzip2", "lzma"): + return self.lib.decompress(data) + return self.lib.decode(data, self.encoding) + + def _encode(self, data, context, path): + if self.encoding in ("zlib", "gzip", "bzip2", "lzma"): + if self.level is None or self.encoding == "lzma": + return self.lib.compress(data) + else: + return self.lib.compress(data, self.level) + return self.lib.encode(data, self.encoding) - Files, pipes, sockets, and other streaming sources of data are handled - by this method. - """ - return self._parse(stream, Container()) +class CompressedLZ4(Tunnel): + r""" + Compresses and decompresses underlying stream before processing subcon. When parsing, entire stream is consumed. When building, it puts compressed bytes without marking the end. This construct should be used with :class:`~construct.core.Prefixed` . - def _parse(self, stream, context): - """ - Override me in your subclass. - """ + Parsing and building transforms all bytes using LZ4 library. Since data is processed until EOF, it behaves similar to `GreedyBytes`. Size is undefined. - raise NotImplementedError() + :param subcon: Construct instance, subcon used for storing the value - def build(self, obj): - """ - Build an object in memory. - """ + :raises ImportError: needed module could not be imported by ctor + :raises StreamError: stream failed when reading until EOF - stream = StringIO() - self.build_stream(obj, stream) - return stream.getvalue() + Can propagate lz4.frame exceptions. - def build_stream(self, obj, stream): - """ - Build an object directly into a stream. - """ + Example:: - self._build(obj, stream, Container()) + >>> d = Prefixed(VarInt, CompressedLZ4(GreedyBytes)) + >>> d.build(bytes(100)) + b'"\x04"M\x18h@d\x00\x00\x00\x00\x00\x00\x00#\x0b\x00\x00\x00\x1f\x00\x01\x00KP\x00\x00\x00\x00\x00\x00\x00\x00\x00' + >>> len(_) + 35 + """ - def _build(self, obj, stream, context): - """ - Override me in your subclass. - """ + def __init__(self, subcon): + super().__init__(subcon) + import lz4.frame + self.lib = lz4.frame + + def _decode(self, data, context, path): + return self.lib.decompress(data) + + def _encode(self, data, context, path): + return self.lib.compress(data) + + +class EncryptedSym(Tunnel): + r""" + Perform symmetrical encryption and decryption of the underlying stream before processing subcon. When parsing, entire stream is consumed. When building, it puts encrypted bytes without marking the end. + + Parsing and building transforms all bytes using the selected cipher. Since data is processed until EOF, it behaves similar to `GreedyBytes`. Size is undefined. + + The key for encryption and decryption should be passed via `contextkw` to `build` and `parse` methods. + + This construct is heavily based on the `cryptography` library, which supports the following algorithms and modes. For more details please see the documentation of that library. + + Algorithms: + - AES + - Camellia + - ChaCha20 + - TripleDES + - CAST5 + - SEED + - SM4 + - Blowfish (weak cipher) + - ARC4 (weak cipher) + - IDEA (weak cipher) + + Modes: + - CBC + - CTR + - OFB + - CFB + - CFB8 + - XTS + - ECB (insecure) + + .. note:: Keep in mind that some of the algorithms require padding of the data. This can be done e.g. with :class:`~construct.core.Aligned`. + .. note:: For GCM mode use :class:`~construct.core.EncryptedSymAead`. + + :param subcon: Construct instance, subcon used for storing the value + :param cipher: Cipher object or context lambda from cryptography.hazmat.primitives.ciphers + + :raises ImportError: needed module could not be imported + :raises StreamError: stream failed when reading until EOF + :raises CipherError: no cipher object is provided + :raises CipherError: an AEAD cipher is used + + Can propagate cryptography.exceptions exceptions. + + Example:: + + >>> from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + >>> d = Struct( + ... "iv" / Default(Bytes(16), os.urandom(16)), + ... "enc_data" / EncryptedSym( + ... Aligned(16, + ... Struct( + ... "width" / Int16ul, + ... "height" / Int16ul, + ... ) + ... ), + ... lambda ctx: Cipher(algorithms.AES(ctx._.key), modes.CBC(ctx.iv)) + ... ) + ... ) + >>> key128 = b"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + >>> d.build({"enc_data": {"width": 5, "height": 4}}, key=key128) + b"o\x11i\x98~H\xc9\x1c\x17\x83\xf6|U:\x1a\x86+\x00\x89\xf7\x8e\xc3L\x04\t\xca\x8a\xc8\xc2\xfb'\xc8" + >>> d.parse(b"o\x11i\x98~H\xc9\x1c\x17\x83\xf6|U:\x1a\x86+\x00\x89\xf7\x8e\xc3L\x04\t\xca\x8a\xc8\xc2\xfb'\xc8", key=key128) + Container: + iv = b'o\x11i\x98~H\xc9\x1c\x17\x83\xf6|U:\x1a\x86' (total 16) + enc_data = Container: + width = 5 + height = 4 + """ + + def __init__(self, subcon, cipher): + import cryptography + super().__init__(subcon) + self.cipher = cipher + + def _evaluate_cipher(self, context, path): + from cryptography.hazmat.primitives.ciphers import Cipher, modes + cipher = evaluate(self.cipher, context) + if not isinstance(cipher, Cipher): + raise CipherError(f"cipher {repr(cipher)} is not a cryptography.hazmat.primitives.ciphers.Cipher object", path=path) + if isinstance(cipher.mode, modes.GCM): + raise CipherError(f"AEAD cipher is not supported in this class, use EncryptedSymAead", path=path) + return cipher + + def _decode(self, data, context, path): + cipher = self._evaluate_cipher(context, path) + decryptor = cipher.decryptor() + return decryptor.update(data) + decryptor.finalize() + + def _encode(self, data, context, path): + cipher = self._evaluate_cipher(context, path) + encryptor = cipher.encryptor() + return encryptor.update(data) + encryptor.finalize() + + +class EncryptedSymAead(Tunnel): + r""" + Perform symmetrical AEAD encryption and decryption of the underlying stream before processing subcon. When parsing, entire stream is consumed. When building, it puts encrypted bytes and tag without marking the end. + + Parsing and building transforms all bytes using the selected cipher and also authenticates the `associated_data`. Since data is processed until EOF, it behaves similar to `GreedyBytes`. Size is undefined. + + The key for encryption and decryption should be passed via `contextkw` to `build` and `parse` methods. + + This construct is heavily based on the `cryptography` library, which supports the following AEAD ciphers. For more details please see the documentation of that library. + + AEAD ciphers: + - AESGCM + - AESCCM + - ChaCha20Poly1305 + + :param subcon: Construct instance, subcon used for storing the value + :param cipher: Cipher object or context lambda from cryptography.hazmat.primitives.ciphers + + :raises ImportError: needed module could not be imported + :raises StreamError: stream failed when reading until EOF + :raises CipherError: unsupported cipher object is provided + + Can propagate cryptography.exceptions exceptions. + + Example:: + + >>> from cryptography.hazmat.primitives.ciphers import aead + >>> d = Struct( + ... "nonce" / Default(Bytes(16), os.urandom(16)), + ... "associated_data" / Bytes(21), + ... "enc_data" / EncryptedSymAead( + ... GreedyBytes, + ... lambda ctx: aead.AESGCM(ctx._.key), + ... this.nonce, + ... this.associated_data + ... ) + ... ) + >>> key128 = b"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + >>> d.build({"associated_data": b"This is authenticated", "enc_data": b"The secret message"}, key=key128) + b'\xe3\xb0"\xbaQ\x18\xd3|\x14\xb0q\x11\xb5XZ\xeeThis is authenticated\x88~\xe5Vh\x00\x01m\xacn\xad k\x02\x13\xf4\xb4[\xbe\x12$\xa0\x7f\xfb\xbf\x82Ar\xb0\x97C\x0b\xe3\x85' + >>> d.parse(b'\xe3\xb0"\xbaQ\x18\xd3|\x14\xb0q\x11\xb5XZ\xeeThis is authenticated\x88~\xe5Vh\x00\x01m\xacn\xad k\x02\x13\xf4\xb4[\xbe\x12$\xa0\x7f\xfb\xbf\x82Ar\xb0\x97C\x0b\xe3\x85', key=key128) + Container: + nonce = b'\xe3\xb0"\xbaQ\x18\xd3|\x14\xb0q\x11\xb5XZ\xee' (total 16) + associated_data = b'This is authenti'... (truncated, total 21) + enc_data = b'The secret messa'... (truncated, total 18) + """ + + def __init__(self, subcon, cipher, nonce, associated_data=b""): + super().__init__(subcon) + self.cipher = cipher + self.nonce = nonce + self.associated_data = associated_data + + def _evaluate_cipher(self, context, path): + from cryptography.hazmat.primitives.ciphers.aead import AESGCM, AESCCM, ChaCha20Poly1305 + cipher = evaluate(self.cipher, context) + if not isinstance(cipher, (AESGCM, AESCCM, ChaCha20Poly1305)): + raise CipherError(f"cipher object {repr(cipher)} is not supported", path=path) + return cipher + + def _decode(self, data, context, path): + cipher = self._evaluate_cipher(context, path) + nonce = evaluate(self.nonce, context) + associated_data = evaluate(self.associated_data, context) + return cipher.decrypt(nonce, data, associated_data) + + def _encode(self, data, context, path): + cipher = self._evaluate_cipher(context, path) + nonce = evaluate(self.nonce, context) + associated_data = evaluate(self.associated_data, context) + return cipher.encrypt(nonce, data, associated_data) + + +class Rebuffered(Subconstruct): + r""" + Caches bytes from underlying stream, so it becomes seekable and tellable, and also becomes blocking on reading. Useful for processing non-file streams like pipes, sockets, etc. + + .. warning:: Experimental implementation. May not be mature enough. + + :param subcon: Construct instance, subcon which will operate on the buffered stream + :param tailcutoff: optional, integer, amount of bytes kept in buffer, by default buffers everything + + Can also raise arbitrary exceptions in its implementation. + + Example:: + + Rebuffered(..., tailcutoff=1024).parse_stream(nonseekable_stream) + """ - raise NotImplementedError() + def __init__(self, subcon, tailcutoff=None): + super().__init__(subcon) + self.stream2 = RebufferedBytesIO(None, tailcutoff=tailcutoff) - def sizeof(self, context=None): - """ - Calculate the size of this object, optionally using a context. + def _parse(self, stream, context, path): + self.stream2.substream = stream + return self.subcon._parsereport(self.stream2, context, path) - Some constructs have no fixed size and can only know their size for a - given hunk of data; these constructs will raise an error if they are - not passed a context. + def _build(self, obj, stream, context, path): + self.stream2.substream = stream + return self.subcon._build(obj, self.stream2, context, path) - :param ``Container`` context: contextual data - :returns: int of the length of this construct - :raises SizeofError: the size could not be determined - """ +#=============================================================================== +# lazy equivalents +#=============================================================================== +class Lazy(Subconstruct): + r""" + Lazyfies a field. - if context is None: - context = Container() - try: - return self._sizeof(context) - except Exception, e: - raise SizeofError(e) + This wrapper allows you to do lazy parsing of individual fields inside a normal Struct (without using LazyStruct which may not work in every scenario). It is also used by KaitaiStruct compiler to emit `instances` because those are not processed greedily, and they may refer to other not yet parsed fields. Those are 2 entirely different applications but semantics are the same. - def _sizeof(self, context): - """ - Override me in your subclass. - """ + Parsing saves the current stream offset and returns a lambda. If and when that lambda gets evaluated, it seeks the stream to then-current position, parses the subcon, and seeks the stream back to previous position. Building evaluates that lambda into an object (if needed), then defers to subcon. Size also defers to subcon. - raise SizeofError("Raw Constructs have no size!") + :param subcon: Construct instance -class Subconstruct(Construct): - """ - Abstract parent class of all subconstructs. + :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes + :raises StreamError: stream is not seekable and tellable - Subconstructs wrap an inner Construct, inheriting its name and flags. + Example:: - :param ``Construct`` subcon: the construct to wrap + >>> d = Lazy(Byte) + >>> x = d.parse(b'\x00') + >>> x + .execute> + >>> x() + 0 + >>> d.build(0) + b'\x00' + >>> d.build(x) + b'\x00' + >>> d.sizeof() + 1 """ - __slots__ = ["subcon"] def __init__(self, subcon): - Construct.__init__(self, subcon.name, subcon.conflags) - self.subcon = subcon - def _parse(self, stream, context): - return self.subcon._parse(stream, context) - def _build(self, obj, stream, context): - self.subcon._build(obj, stream, context) - def _sizeof(self, context): - return self.subcon._sizeof(context) + super().__init__(subcon) + + def _parse(self, stream, context, path): + offset = stream_tell(stream, path) + def execute(): + fallback = stream_tell(stream, path) + stream_seek(stream, offset, 0, path) + obj = self.subcon._parsereport(stream, context, path) + stream_seek(stream, fallback, 0, path) + return obj + len = self.subcon._actualsize(stream, context, path) + stream_seek(stream, len, 1, path) + return execute -class Adapter(Subconstruct): - """ - Abstract adapter parent class. + def _build(self, obj, stream, context, path): + if callable(obj): + obj = obj() + return self.subcon._build(obj, stream, context, path) - Adapters should implement ``_decode()`` and ``_encode()``. - :param ``Construct`` subcon: the construct to wrap - """ +class LazyContainer(dict): + """Used internally.""" - __slots__ = [] - def _parse(self, stream, context): - return self._decode(self.subcon._parse(stream, context), context) - def _build(self, obj, stream, context): - self.subcon._build(self._encode(obj, context), stream, context) - def _decode(self, obj, context): - raise NotImplementedError() - def _encode(self, obj, context): - raise NotImplementedError() + def __init__(self, struct, stream, offsets, values, context, path): + self._struct = struct + self._stream = stream + self._offsets = offsets + self._values = values + self._context = context + self._path = path + def __getattr__(self, name): + if name in self._struct._subconsindexes: + return self[name] + raise AttributeError -#=============================================================================== -# Fields -#=============================================================================== -def _read_stream(stream, length): - if length < 0: - raise ValueError("length must be >= 0", length) - data = stream.read(length) - if len(data) != length: - raise FieldError("expected %d, found %d" % (length, len(data))) - return data + def __getitem__(self, index): + if isinstance(index, str): + index = self._struct._subconsindexes[index] # KeyError + if index in self._values: + return self._values[index] + stream_seek(self._stream, self._offsets[index], 0, self._path) # KeyError + parseret = self._struct.subcons[index]._parsereport(self._stream, self._context, self._path) + self._values[index] = parseret + return parseret -def _write_stream(stream, length, data): - if length < 0: - raise ValueError("length must be >= 0", length) - if len(data) != length: - raise FieldError("expected %d, found %d" % (length, len(data))) - stream.write(data) + def __len__(self): + return len(self._struct.subcons) -class StaticField(Construct): - """ - A fixed-size byte field. + def keys(self): + return iter(self._struct._subcons) - :param str name: field name - :param int length: number of bytes in the field - """ + def values(self): + return (self[k] for k in self._struct._subcons) - __slots__ = ["length"] - def __init__(self, name, length): - Construct.__init__(self, name) - self.length = length - def _parse(self, stream, context): - return _read_stream(stream, self.length) - def _build(self, obj, stream, context): - _write_stream(stream, self.length, obj) - def _sizeof(self, context): - return self.length + def items(self): + return ((k, self[k]) for k in self._struct._subcons) -class FormatField(StaticField): - """ - A field that uses ``struct`` to pack and unpack data. + __iter__ = keys - See ``struct`` documentation for instructions on crafting format strings. + def __eq__(self, other): + return Container.__eq__(self, other) - :param str name: name of the field - :param str endianness: format endianness string; one of "<", ">", or "=" - :param str format: a single format character - """ + def __repr__(self): + return "" % (len(self._values), len(self._struct.subcons), ) - __slots__ = ["packer"] - def __init__(self, name, endianity, format): - if endianity not in (">", "<", "="): - raise ValueError("endianity must be be '=', '<', or '>'", - endianity) - if len(format) != 1: - raise ValueError("must specify one and only one format char") - self.packer = Packer(endianity + format) - StaticField.__init__(self, name, self.packer.size) - def __getstate__(self): - attrs = StaticField.__getstate__(self) - attrs["packer"] = attrs["packer"].format - return attrs - def __setstate__(self, attrs): - attrs["packer"] = Packer(attrs["packer"]) - return StaticField.__setstate__(attrs) - def _parse(self, stream, context): - try: - return self.packer.unpack(_read_stream(stream, self.length))[0] - except Exception, ex: - raise FieldError(ex) - def _build(self, obj, stream, context): - try: - _write_stream(stream, self.length, self.packer.pack(obj)) - except Exception, ex: - raise FieldError(ex) - -class MetaField(Construct): - """ - A variable-length field. The length is obtained at runtime from a - function. - - :param str name: name of the field - :param callable lengthfunc: callable that takes a context and returns - length as an int - - >>> foo = Struct("foo", - ... Byte("length"), - ... MetaField("data", lambda ctx: ctx["length"]) - ... ) - >>> foo.parse("\\x03ABC") - Container(data = 'ABC', length = 3) - >>> foo.parse("\\x04ABCD") - Container(data = 'ABCD', length = 4) - """ - - __slots__ = ["lengthfunc"] - def __init__(self, name, lengthfunc): - Construct.__init__(self, name) - self.lengthfunc = lengthfunc - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - return _read_stream(stream, self.lengthfunc(context)) - def _build(self, obj, stream, context): - _write_stream(stream, self.lengthfunc(context), obj) - def _sizeof(self, context): - return self.lengthfunc(context) +class LazyStruct(Construct): + r""" + Equivalent to :class:`~construct.core.Struct`, but when this class is parsed, most fields are not parsed (they are skipped if their size can be measured by _actualsize or _sizeof method). See its docstring for details. -#=============================================================================== -# arrays and repeaters -#=============================================================================== -class MetaArray(Subconstruct): - """ - An array (repeater) of a meta-count. The array will iterate exactly - `countfunc()` times. Will raise ArrayError if less elements are found. - See also Array, Range and RepeatUntil. - - Parameters: - * countfunc - a function that takes the context as a parameter and returns - the number of elements of the array (count) - * subcon - the subcon to repeat `countfunc()` times - - Example: - MetaArray(lambda ctx: 5, UBInt8("foo")) - """ - __slots__ = ["countfunc"] - def __init__(self, countfunc, subcon): - Subconstruct.__init__(self, subcon) - self.countfunc = countfunc - self._clear_flag(self.FLAG_COPY_CONTEXT) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - obj = ListContainer() - c = 0 - count = self.countfunc(context) - try: - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - while c < count: - obj.append(self.subcon._parse(stream, context.__copy__())) - c += 1 - else: - while c < count: - obj.append(self.subcon._parse(stream, context)) - c += 1 - except ConstructError, ex: - raise ArrayError("expected %d, found %d" % (count, c), ex) - return obj - def _build(self, obj, stream, context): - count = self.countfunc(context) - if len(obj) != count: - raise ArrayError("expected %d, found %d" % (count, len(obj))) - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - for subobj in obj: - self.subcon._build(subobj, stream, context.__copy__()) - else: - for subobj in obj: - self.subcon._build(subobj, stream, context) - def _sizeof(self, context): - return self.subcon._sizeof(context) * self.countfunc(context) - -class Range(Subconstruct): - """ - A range-array. The subcon will iterate between `mincount` to `maxcount` - times. If less than `mincount` elements are found, raises RangeError. - See also GreedyRange and OptionalGreedyRange. - - The general-case repeater. Repeats the given unit for at least mincount - times, and up to maxcount times. If an exception occurs (EOF, validation - error), the repeater exits. If less than mincount units have been - successfully parsed, a RangeError is raised. - - .. note:: - This object requires a seekable stream for parsing. - - :param int mincount: the minimal count - :param int maxcount: the maximal count - :param Construct subcon: the subcon to repeat - - >>> c = Range(3, 7, UBInt8("foo")) - >>> c.parse("\\x01\\x02") - Traceback (most recent call last): - ... - construct.core.RangeError: expected 3..7, found 2 - >>> c.parse("\\x01\\x02\\x03") - [1, 2, 3] - >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06") - [1, 2, 3, 4, 5, 6] - >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06\\x07") - [1, 2, 3, 4, 5, 6, 7] - >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09") - [1, 2, 3, 4, 5, 6, 7] - >>> c.build([1,2]) - Traceback (most recent call last): - ... - construct.core.RangeError: expected 3..7, found 2 - >>> c.build([1,2,3,4]) - '\\x01\\x02\\x03\\x04' - >>> c.build([1,2,3,4,5,6,7,8]) - Traceback (most recent call last): - ... - construct.core.RangeError: expected 3..7, found 8 - """ - - __slots__ = ["mincount", "maxcout"] - def __init__(self, mincount, maxcout, subcon): - Subconstruct.__init__(self, subcon) - self.mincount = mincount - self.maxcout = maxcout - self._clear_flag(self.FLAG_COPY_CONTEXT) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - obj = ListContainer() - c = 0 - try: - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - while c < self.maxcout: - pos = stream.tell() - obj.append(self.subcon._parse(stream, context.__copy__())) - c += 1 - else: - while c < self.maxcout: - pos = stream.tell() - obj.append(self.subcon._parse(stream, context)) - c += 1 - except ConstructError, ex: - if c < self.mincount: - raise RangeError("expected %d to %d, found %d" % - (self.mincount, self.maxcout, c), ex) - stream.seek(pos) - return obj - def _build(self, obj, stream, context): - if len(obj) < self.mincount or len(obj) > self.maxcout: - raise RangeError("expected %d to %d, found %d" % - (self.mincount, self.maxcout, len(obj))) - cnt = 0 - try: - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - for subobj in obj: - self.subcon._build(subobj, stream, context.__copy__()) - cnt += 1 - else: - for subobj in obj: - self.subcon._build(subobj, stream, context) - cnt += 1 - except ConstructError, ex: - if cnt < self.mincount: - raise RangeError("expected %d to %d, found %d" % - (self.mincount, self.maxcout, len(obj)), ex) - def _sizeof(self, context): - raise SizeofError("can't calculate size") + Fields are parsed depending on some factors: -class RepeatUntil(Subconstruct): - """ - An array that repeat until the predicate indicates it to stop. Note that - the last element (which caused the repeat to exit) is included in the - return value. + * Some fields like Int* Float* Bytes(5) Array(5,Byte) Pointer are fixed-size and are therefore skipped. Stream is not read. + * Some fields like Bytes(this.field) are variable-size but their size is known during parsing when there is a corresponding context entry. Those fields are also skipped. Stream is not read. + * Some fields like Prefixed PrefixedArray PascalString are variable-size but their size can be computed by partially reading the stream. Only first few bytes are read (the lengthfield). + * Other fields like VarInt need to be parsed. Stream position that is left after the field was parsed is used. + * Some fields may not work properly, due to the fact that this class attempts to skip fields, and parses them only out of necessity. Miscellaneous fields often have size defined as 0, and fixed sized fields are skippable. - Parameters: - * predicate - a predicate function that takes (obj, context) and returns - True if the stop-condition is met, or False to continue. - * subcon - the subcon to repeat. + Note there are restrictions: - Example: - # will read chars until \x00 (inclusive) - RepeatUntil(lambda obj, ctx: obj == "\x00", - Field("chars", 1) - ) - """ - __slots__ = ["predicate"] - def __init__(self, predicate, subcon): - Subconstruct.__init__(self, subcon) - self.predicate = predicate - self._clear_flag(self.FLAG_COPY_CONTEXT) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - obj = [] - try: - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - while True: - subobj = self.subcon._parse(stream, context.__copy__()) - obj.append(subobj) - if self.predicate(subobj, context): - break - else: - while True: - subobj = self.subcon._parse(stream, context) - obj.append(subobj) - if self.predicate(subobj, context): - break - except ConstructError, ex: - raise ArrayError("missing terminator", ex) - return obj - def _build(self, obj, stream, context): - terminated = False - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - for subobj in obj: - self.subcon._build(subobj, stream, context.__copy__()) - if self.predicate(subobj, context): - terminated = True - break - else: - for subobj in obj: - self.subcon._build(subobj, stream, context.__copy__()) - if self.predicate(subobj, context): - terminated = True - break - if not terminated: - raise ArrayError("missing terminator") - def _sizeof(self, context): - raise SizeofError("can't calculate size") + * If a field like Bytes(this.field) references another field in the same struct, you need to access the referenced field first (to trigger its parsing) and then you can access the Bytes field. Otherwise it would fail due to missing context entry. + * If a field references another field within inner (nested) or outer (super) struct, things may break. Context is nested, but this class was not rigorously tested in that manner. + Building and sizeof are greedy, like in Struct. -#=============================================================================== -# structures and sequences -#=============================================================================== -class Struct(Construct): + :param \*subcons: Construct instances, list of members, some can be anonymous + :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) """ - A sequence of named constructs, similar to structs in C. The elements are - parsed and built in the order they are defined. - See also Embedded. - - Parameters: - * name - the name of the structure - * subcons - a sequence of subconstructs that make up this structure. - * nested - a keyword-only argument that indicates whether this struct - creates a nested context. The default is True. This parameter is - considered "advanced usage", and may be removed in the future. - - Example: - Struct("foo", - UBInt8("first_element"), - UBInt16("second_element"), - Padding(2), - UBInt8("third_element"), - ) - """ - __slots__ = ["subcons", "nested"] - def __init__(self, name, *subcons, **kw): - self.nested = kw.pop("nested", True) - if kw: - raise TypeError("the only keyword argument accepted is 'nested'", kw) - Construct.__init__(self, name) - self.subcons = subcons - self._inherit_flags(*subcons) - self._clear_flag(self.FLAG_EMBED) - def _parse(self, stream, context): - if "" in context: - obj = context[""] - del context[""] - else: + + def __init__(self, *subcons, **subconskw): + super().__init__() + self.subcons = list(subcons) + list(k/v for k,v in subconskw.items()) + self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) + self._subconsindexes = Container((sc.name,i) for i,sc in enumerate(self.subcons) if sc.name) + self.flagbuildnone = all(sc.flagbuildnone for sc in self.subcons) + + def __getattr__(self, name): + if name in self._subcons: + return self._subcons[name] + raise AttributeError + + def _parse(self, stream, context, path): + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + offset = stream_tell(stream, path) + offsets = {0: offset} + values = {} + for i,sc in enumerate(self.subcons): + try: + offset += sc._actualsize(stream, context, path) + stream_seek(stream, offset, 0, path) + except SizeofError: + parseret = sc._parsereport(stream, context, path) + values[i] = parseret + if sc.name: + context[sc.name] = parseret + offset = stream_tell(stream, path) + offsets[i+1] = offset + return LazyContainer(self, stream, offsets, values, context, path) + + def _build(self, obj, stream, context, path): + # exact copy from Struct class + if obj is None: obj = Container() - if self.nested: - context = Container(_ = context) - for sc in self.subcons: - if sc.conflags & self.FLAG_EMBED: - context[""] = obj - sc._parse(stream, context) - else: - subobj = sc._parse(stream, context) - if sc.name is not None: - obj[sc.name] = subobj - context[sc.name] = subobj - return obj - def _build(self, obj, stream, context): - if "" in context: - del context[""] - elif self.nested: - context = Container(_ = context) - for sc in self.subcons: - if sc.conflags & self.FLAG_EMBED: - context[""] = True - subobj = obj - elif sc.name is None: - subobj = None - else: - subobj = getattr(obj, sc.name) - context[sc.name] = subobj - sc._build(subobj, stream, context) - def _sizeof(self, context): - if self.nested: - context = Container(_ = context) - return sum(sc._sizeof(context) for sc in self.subcons) - -class Sequence(Struct): - """ - A sequence of unnamed constructs. The elements are parsed and built in the - order they are defined. - See also Embedded. - - Parameters: - * name - the name of the structure - * subcons - a sequence of subconstructs that make up this structure. - * nested - a keyword-only argument that indicates whether this struct - creates a nested context. The default is True. This parameter is - considered "advanced usage", and may be removed in the future. - - Example: - Sequence("foo", - UBInt8("first_element"), - UBInt16("second_element"), - Padding(2), - UBInt8("third_element"), - ) - """ - __slots__ = [] - def _parse(self, stream, context): - if "" in context: - obj = context[""] - del context[""] - else: - obj = ListContainer() - if self.nested: - context = Container(_ = context) + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + context.update(obj) for sc in self.subcons: - if sc.conflags & self.FLAG_EMBED: - context[""] = obj - sc._parse(stream, context) - else: - subobj = sc._parse(stream, context) - if sc.name is not None: - obj.append(subobj) + try: + if sc.flagbuildnone: + subobj = obj.get(sc.name, None) + else: + subobj = obj[sc.name] # raises KeyError + + if sc.name: context[sc.name] = subobj - return obj - def _build(self, obj, stream, context): - if "" in context: - del context[""] - elif self.nested: - context = Container(_ = context) - objiter = iter(obj) - for sc in self.subcons: - if sc.conflags & self.FLAG_EMBED: - context[""] = True - subobj = objiter - elif sc.name is None: - subobj = None - else: - subobj = objiter.next() - context[sc.name] = subobj - sc._build(subobj, stream, context) -class Union(Construct): - """ - a set of overlapping fields (like unions in C). when parsing, - all fields read the same data; when building, only the first subcon - (called "master") is used. - - Parameters: - * name - the name of the union - * master - the master subcon, i.e., the subcon used for building and - calculating the total size - * subcons - additional subcons - - Example: - Union("what_are_four_bytes", - UBInt32("one_dword"), - Struct("two_words", UBInt16("first"), UBInt16("second")), - Struct("four_bytes", - UBInt8("a"), - UBInt8("b"), - UBInt8("c"), - UBInt8("d") - ), - ) - """ - __slots__ = ["parser", "builder"] - def __init__(self, name, master, *subcons, **kw): - Construct.__init__(self, name) - args = [Peek(sc) for sc in subcons] - args.append(MetaField(None, lambda ctx: master._sizeof(ctx))) - self.parser = Struct(name, Peek(master, perform_build = True), *args) - self.builder = Struct(name, master) - def _parse(self, stream, context): - return self.parser._parse(stream, context) - def _build(self, obj, stream, context): - return self.builder._build(obj, stream, context) - def _sizeof(self, context): - return self.builder._sizeof(context) + buildret = sc._build(subobj, stream, context, path) + if sc.name: + context[sc.name] = buildret + except StopFieldError: + break + return context + + def _sizeof(self, context, path): + # exact copy from Struct class + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = None, _index = context.get("_index", None)) + context._root = context._.get("_root", context) + try: + return sum(sc._sizeof(context, path) for sc in self.subcons) + except (KeyError, AttributeError): + raise SizeofError("cannot calculate size, key not found in context", path=path) + + +class LazyListContainer(list): + """Used internally.""" + + def __init__(self, subcon, stream, count, offsets, values, context, path): + self._subcon = subcon + self._stream = stream + self._count = count + self._offsets = offsets + self._values = values + self._context = context + self._path = path + + def __getitem__(self, index): + if isinstance(index, slice): + return [self[i] for i in range(*index.indices(self._count))] + if index in self._values: + return self._values[index] + stream_seek(self._stream, self._offsets[index], 0, self._path) # KeyError + parseret = self._subcon._parsereport(self._stream, self._context, self._path) + self._values[index] = parseret + return parseret + + def __getslice__(self, start, stop): + if stop == sys.maxsize: + stop = self._count + return self.__getitem__(slice(start, stop)) + + def __len__(self): + return self._count + + def __iter__(self): + return (self[i] for i in range(self._count)) + + def __eq__(self, other): + return len(self) == len(other) and all(self[i] == other[i] for i in range(self._count)) -#=============================================================================== -# conditional -#=============================================================================== -class Switch(Construct): - """ - A conditional branch. Switch will choose the case to follow based on - the return value of keyfunc. If no case is matched, and no default value - is given, SwitchError will be raised. - See also Pass. - - Parameters: - * name - the name of the construct - * keyfunc - a function that takes the context and returns a key, which - will ne used to choose the relevant case. - * cases - a dictionary mapping keys to constructs. the keys can be any - values that may be returned by keyfunc. - * default - a default value to use when the key is not found in the cases. - if not supplied, an exception will be raised when the key is not found. - You can use the builtin construct Pass for 'do-nothing'. - * include_key - whether or not to include the key in the return value - of parsing. defualt is False. - - Example: - Struct("foo", - UBInt8("type"), - Switch("value", lambda ctx: ctx.type, { - 1 : UBInt8("spam"), - 2 : UBInt16("spam"), - 3 : UBInt32("spam"), - 4 : UBInt64("spam"), - } - ), - ) - """ + def __repr__(self): + return "" % (len(self._values), self._count, ) - class NoDefault(Construct): - def _parse(self, stream, context): - raise SwitchError("no default case defined") - def _build(self, obj, stream, context): - raise SwitchError("no default case defined") - def _sizeof(self, context): - raise SwitchError("no default case defined") - NoDefault = NoDefault("No default value specified") - __slots__ = ["subcons", "keyfunc", "cases", "default", "include_key"] +class LazyArray(Subconstruct): + r""" + Equivalent to :class:`~construct.core.Array`, but the subcon is not parsed when possible (it gets skipped if the size can be measured by _actualsize or _sizeof method). See its docstring for details. - def __init__(self, name, keyfunc, cases, default = NoDefault, - include_key = False): - Construct.__init__(self, name) - self._inherit_flags(*cases.values()) - self.keyfunc = keyfunc - self.cases = cases - self.default = default - self.include_key = include_key - self._inherit_flags(*cases.values()) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - key = self.keyfunc(context) - obj = self.cases.get(key, self.default)._parse(stream, context) - if self.include_key: - return key, obj - else: - return obj - def _build(self, obj, stream, context): - if self.include_key: - key, obj = obj - else: - key = self.keyfunc(context) - case = self.cases.get(key, self.default) - case._build(obj, stream, context) - def _sizeof(self, context): - case = self.cases.get(self.keyfunc(context), self.default) - return case._sizeof(context) + Fields are parsed depending on some factors: -class Select(Construct): - """ - Selects the first matching subconstruct. It will literally try each of - the subconstructs, until one matches. - - Notes: - * requires a seekable stream. - - Parameters: - * name - the name of the construct - * subcons - the subcons to try (order-sensitive) - * include_name - a keyword only argument, indicating whether to include - the name of the selected subcon in the return value of parsing. default - is false. - - Example: - Select("foo", - UBInt64("large"), - UBInt32("medium"), - UBInt16("small"), - UBInt8("tiny"), - ) - """ - __slots__ = ["subcons", "include_name"] - def __init__(self, name, *subcons, **kw): - include_name = kw.pop("include_name", False) - if kw: - raise TypeError("the only keyword argument accepted " - "is 'include_name'", kw) - Construct.__init__(self, name) - self.subcons = subcons - self.include_name = include_name - self._inherit_flags(*subcons) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - for sc in self.subcons: - pos = stream.tell() - context2 = context.__copy__() - try: - obj = sc._parse(stream, context2) - except ConstructError: - stream.seek(pos) - else: - context.__update__(context2) - if self.include_name: - return sc.name, obj - else: - return obj - raise SelectError("no subconstruct matched") - def _build(self, obj, stream, context): - if self.include_name: - name, obj = obj - for sc in self.subcons: - if sc.name == name: - sc._build(obj, stream, context) - return - else: - for sc in self.subcons: - stream2 = StringIO() - context2 = context.__copy__() - try: - sc._build(obj, stream2, context2) - except Exception: - pass - else: - context.__update__(context2) - stream.write(stream2.getvalue()) - return - raise SelectError("no subconstruct matched", obj) - def _sizeof(self, context): - raise SizeofError("can't calculate size") + * Some fields like Int* Float* Bytes(5) Array(5,Byte) Pointer are fixed-size and are therefore skipped. Stream is not read. + * Some fields like Bytes(this.field) are variable-size but their size is known during parsing when there is a corresponding context entry. Those fields are also skipped. Stream is not read. + * Some fields like Prefixed PrefixedArray PascalString are variable-size but their size can be computed by partially reading the stream. Only first few bytes are read (the lengthfield). + * Other fields like VarInt need to be parsed. Stream position that is left after the field was parsed is used. + * Some fields may not work properly, due to the fact that this class attempts to skip fields, and parses them only out of necessity. Miscellaneous fields often have size defined as 0, and fixed sized fields are skippable. + Note there are restrictions: -#=============================================================================== -# stream manipulation -#=============================================================================== -class Pointer(Subconstruct): + * If a field references another field within inner (nested) or outer (super) struct, things may break. Context is nested, but this class was not rigorously tested in that manner. + + Building and sizeof are greedy, like in Array. + + :param count: integer or context lambda, strict amount of elements + :param subcon: Construct instance, subcon to process individual elements """ - Changes the stream position to a given offset, where the construction - should take place, and restores the stream position when finished. - See also Anchor, OnDemand and OnDemandPointer. - Notes: - * requires a seekable stream. + def __init__(self, count, subcon): + super().__init__(subcon) + self.count = count + + def _parse(self, stream, context, path): + sc = self.subcon + count = self.count + if callable(count): + count = count(context) + if not 0 <= count: + raise RangeError("invalid count %s" % (count,), path=path) + offset = stream_tell(stream, path) + offsets = {0: offset} + values = {} + for i in range(count): + try: + offset += sc._actualsize(stream, context, path) + stream_seek(stream, offset, 0, path) + except SizeofError: + parseret = sc._parsereport(stream, context, path) + values[i] = parseret + offset = stream_tell(stream, path) + offsets[i+1] = offset + return LazyListContainer(sc, stream, count, offsets, values, context, path) + + def _build(self, obj, stream, context, path): + # exact copy from Array class + count = self.count + if callable(count): + count = count(context) + if not 0 <= count: + raise RangeError("invalid count %s" % (count,), path=path) + if not len(obj) == count: + raise RangeError("expected %d elements, found %d" % (count, len(obj)), path=path) + retlist = ListContainer() + for i,e in enumerate(obj): + context._index = i + buildret = self.subcon._build(e, stream, context, path) + retlist.append(buildret) + return retlist + + def _sizeof(self, context, path): + # exact copy from Array class + try: + count = self.count + if callable(count): + count = count(context) + except (KeyError, AttributeError): + raise SizeofError("cannot calculate size, key not found in context", path=path) + return count * self.subcon._sizeof(context, path) - Parameters: - * offsetfunc: a function that takes the context and returns an absolute - stream position, where the construction would take place - * subcon - the subcon to use at `offsetfunc()` - Example: - Struct("foo", - UBInt32("spam_pointer"), - Pointer(lambda ctx: ctx.spam_pointer, - Array(5, UBInt8("spam")) - ) - ) - """ - __slots__ = ["offsetfunc"] - def __init__(self, offsetfunc, subcon): - Subconstruct.__init__(self, subcon) - self.offsetfunc = offsetfunc - def _parse(self, stream, context): - newpos = self.offsetfunc(context) - origpos = stream.tell() - stream.seek(newpos) - obj = self.subcon._parse(stream, context) - stream.seek(origpos) - return obj - def _build(self, obj, stream, context): - newpos = self.offsetfunc(context) - origpos = stream.tell() - stream.seek(newpos) - self.subcon._build(obj, stream, context) - stream.seek(origpos) - def _sizeof(self, context): - return 0 +class LazyBound(Construct): + r""" + Field that binds to the subcon only at runtime (during parsing and building, not ctor). Useful for recursive data structures, like linked-lists and trees, where a construct needs to refer to itself (while it does not exist yet in the namespace). -class Peek(Subconstruct): - """ - Peeks at the stream: parses without changing the stream position. - See also Union. If the end of the stream is reached when peeking, - returns None. + Note that it is possible to obtain same effect without using this class, using a loop. However there are usecases where that is not possible (if remaining nodes cannot be sized-up, and there is data following the recursive structure). There is also a significant difference, namely that LazyBound actually does greedy parsing while the loop does lazy parsing. See examples. - Notes: - * requires a seekable stream. + To break recursion, use `If` field. See examples. - Parameters: - * subcon - the subcon to peek at - * perform_build - whether or not to perform building. by default this - parameter is set to False, meaning building is a no-op. + :param subconfunc: parameter-less lambda returning Construct instance, can also return itself - Example: - Peek(UBInt8("foo")) - """ - __slots__ = ["perform_build"] - def __init__(self, subcon, perform_build = False): - Subconstruct.__init__(self, subcon) - self.perform_build = perform_build - def _parse(self, stream, context): - pos = stream.tell() - try: - return self.subcon._parse(stream, context) - except FieldError: - pass - finally: - stream.seek(pos) - def _build(self, obj, stream, context): - if self.perform_build: - self.subcon._build(obj, stream, context) - def _sizeof(self, context): - return 0 + Example:: -class OnDemand(Subconstruct): - """ - Allows for on-demand (lazy) parsing. When parsing, it will return a - LazyContainer that represents a pointer to the data, but does not actually - parses it from stream until it's "demanded". - By accessing the 'value' property of LazyContainers, you will demand the - data from the stream. The data will be parsed and cached for later use. - You can use the 'has_value' property to know whether the data has already - been demanded. - See also OnDemandPointer. - - Notes: - * requires a seekable stream. - - Parameters: - * subcon - - * advance_stream - whether or not to advance the stream position. by - default this is True, but if subcon is a pointer, this should be False. - * force_build - whether or not to force build. If set to False, and the - LazyContainer has not been demaned, building is a no-op. - - Example: - OnDemand(Array(10000, UBInt8("foo")) - """ - __slots__ = ["advance_stream", "force_build"] - def __init__(self, subcon, advance_stream = True, force_build = True): - Subconstruct.__init__(self, subcon) - self.advance_stream = advance_stream - self.force_build = force_build - def _parse(self, stream, context): - obj = LazyContainer(self.subcon, stream, stream.tell(), context) - if self.advance_stream: - stream.seek(self.subcon._sizeof(context), 1) - return obj - def _build(self, obj, stream, context): - if not isinstance(obj, LazyContainer): - self.subcon._build(obj, stream, context) - elif self.force_build or obj.has_value: - self.subcon._build(obj.value, stream, context) - elif self.advance_stream: - stream.seek(self.subcon._sizeof(context), 1) - -class Buffered(Subconstruct): - """ - Creates an in-memory buffered stream, which can undergo encoding and - decoding prior to being passed on to the subconstruct. - See also Bitwise. - - Note: - * Do not use pointers inside Buffered - - Parameters: - * subcon - the subcon which will operate on the buffer - * encoder - a function that takes a string and returns an encoded - string (used after building) - * decoder - a function that takes a string and returns a decoded - string (used before parsing) - * resizer - a function that takes the size of the subcon and "adjusts" - or "resizes" it according to the encoding/decoding process. - - Example: - Buffered(BitField("foo", 16), - encoder = decode_bin, - decoder = encode_bin, - resizer = lambda size: size / 8, - ) - """ - __slots__ = ["encoder", "decoder", "resizer"] - def __init__(self, subcon, decoder, encoder, resizer): - Subconstruct.__init__(self, subcon) - self.encoder = encoder - self.decoder = decoder - self.resizer = resizer - def _parse(self, stream, context): - data = _read_stream(stream, self._sizeof(context)) - stream2 = StringIO(self.decoder(data)) - return self.subcon._parse(stream2, context) - def _build(self, obj, stream, context): - size = self._sizeof(context) - stream2 = StringIO() - self.subcon._build(obj, stream2, context) - data = self.encoder(stream2.getvalue()) - assert len(data) == size - _write_stream(stream, self._sizeof(context), data) - def _sizeof(self, context): - return self.resizer(self.subcon._sizeof(context)) - -class Restream(Subconstruct): - """ - Wraps the stream with a read-wrapper (for parsing) or a - write-wrapper (for building). The stream wrapper can buffer the data - internally, reading it from- or writing it to the underlying stream - as needed. For example, BitStreamReader reads whole bytes from the - underlying stream, but returns them as individual bits. - See also Bitwise. - - When the parsing or building is done, the stream's close method - will be invoked. It can perform any finalization needed for the stream - wrapper, but it must not close the underlying stream. - - Note: - * Do not use pointers inside Restream - - Parameters: - * subcon - the subcon - * stream_reader - the read-wrapper - * stream_writer - the write wrapper - * resizer - a function that takes the size of the subcon and "adjusts" - or "resizes" it according to the encoding/decoding process. - - Example: - Restream(BitField("foo", 16), - stream_reader = BitStreamReader, - stream_writer = BitStreamWriter, - resizer = lambda size: size / 8, - ) + d = Struct( + "value" / Byte, + "next" / If(this.value > 0, LazyBound(lambda: d)), + ) + >>> print(d.parse(b"\x05\x09\x00")) + Container: + value = 5 + next = Container: + value = 9 + next = Container: + value = 0 + next = None + + :: + + d = Struct( + "value" / Byte, + "next" / GreedyBytes, + ) + data = b"\x05\x09\x00" + while data: + x = d.parse(data) + data = x.next + print(x) + # print outputs + Container: + value = 5 + next = \t\x00 (total 2) + # print outputs + Container: + value = 9 + next = \x00 (total 1) + # print outputs + Container: + value = 0 + next = (total 0) """ - __slots__ = ["stream_reader", "stream_writer", "resizer"] - def __init__(self, subcon, stream_reader, stream_writer, resizer): - Subconstruct.__init__(self, subcon) - self.stream_reader = stream_reader - self.stream_writer = stream_writer - self.resizer = resizer - def _parse(self, stream, context): - stream2 = self.stream_reader(stream) - obj = self.subcon._parse(stream2, context) - stream2.close() - return obj - def _build(self, obj, stream, context): - stream2 = self.stream_writer(stream) - self.subcon._build(obj, stream2, context) - stream2.close() - def _sizeof(self, context): - return self.resizer(self.subcon._sizeof(context)) + + def __init__(self, subconfunc): + super().__init__() + self.subconfunc = subconfunc + + def _parse(self, stream, context, path): + sc = self.subconfunc() + return sc._parsereport(stream, context, path) + + def _build(self, obj, stream, context, path): + sc = self.subconfunc() + return sc._build(obj, stream, context, path) #=============================================================================== -# miscellaneous +# adapters and validators #=============================================================================== -class Reconfig(Subconstruct): +class ExprAdapter(Adapter): + r""" + Generic adapter that takes `decoder` and `encoder` lambdas as parameters. You can use ExprAdapter instead of writing a full-blown class deriving from Adapter when only a simple lambda is needed. + + :param subcon: Construct instance, subcon to adapt + :param decoder: lambda that takes (obj, context) and returns an decoded version of obj + :param encoder: lambda that takes (obj, context) and returns an encoded version of obj + + Example:: + + >>> d = ExprAdapter(Byte, obj_+1, obj_-1) + >>> d.parse(b'\x04') + 5 + >>> d.build(5) + b'\x04' """ - Reconfigures a subconstruct. Reconfig can be used to change the name and - set and clear flags of the inner subcon. + def __init__(self, subcon, decoder, encoder): + super().__init__(subcon) + self._decode = lambda obj,ctx,path: decoder(obj,ctx) + self._encode = lambda obj,ctx,path: encoder(obj,ctx) - Parameters: - * name - the new name - * subcon - the subcon to reconfigure - * setflags - the flags to set (default is 0) - * clearflags - the flags to clear (default is 0) - Example: - Reconfig("foo", UBInt8("bar")) +class ExprSymmetricAdapter(ExprAdapter): """ - __slots__ = [] - def __init__(self, name, subcon, setflags = 0, clearflags = 0): - Construct.__init__(self, name, subcon.conflags) - self.subcon = subcon - self._set_flag(setflags) - self._clear_flag(clearflags) + Macro around :class:`~construct.core.ExprAdapter`. -class Anchor(Construct): + :param subcon: Construct instance, subcon to adapt + :param encoder: lambda that takes (obj, context) and returns both encoded version and decoded version of obj + + Example:: + + >>> d = ExprSymmetricAdapter(Byte, obj_ & 0b00001111) + >>> d.parse(b"\xff") + 15 + >>> d.build(255) + b'\x0f' """ - The **anchor**, or stream position at a point in a Construct. + def __init__(self, subcon, encoder): + super().__init__(subcon, encoder, encoder) - Anchors are useful for adjusting relative offsets to absolute positions, - or to measure sizes of Constructs. - To get an absolute pointer, use an Anchor plus a relative offset. To get a - size, place two Anchors and measure their difference. +class ExprValidator(Validator): + r""" + Generic adapter that takes `validator` lambda as parameter. You can use ExprValidator instead of writing a full-blown class deriving from Validator when only a simple lambda is needed. - :param str name: the name of the anchor + :param subcon: Construct instance, subcon to adapt + :param validator: lambda that takes (obj, context) and returns a bool - .. note:: + Example:: - Anchor requires a seekable stream, or at least a tellable stream; it is - implemented using the ``tell()`` method of file-like objects. + >>> d = ExprValidator(Byte, obj_ & 0b11111110 == 0) + >>> d.build(1) + b'\x01' + >>> d.build(88) + ValidationError: object failed validation: 88 - .. seealso:: Pointer """ + def __init__(self, subcon, validator): + super().__init__(subcon) + self._validate = lambda obj,ctx,path: validator(obj,ctx) - __slots__ = [] - def _parse(self, stream, context): - return stream.tell() - def _build(self, obj, stream, context): - context[self.name] = stream.tell() - def _sizeof(self, context): - return 0 -class Value(Construct): - """ - A computed value. +def OneOf(subcon, valids): + r""" + Validates that the object is one of the listed values, both during parsing and building. - Parameters: - * name - the name of the value - * func - a function that takes the context and return the computed value + .. note:: For performance, `valids` should be a set or frozenset. - Example: - Struct("foo", - UBInt8("width"), - UBInt8("height"), - Value("total_pixels", lambda ctx: ctx.width * ctx.height), - ) - """ - __slots__ = ["func"] - def __init__(self, name, func): - Construct.__init__(self, name) - self.func = func - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - return self.func(context) - def _build(self, obj, stream, context): - context[self.name] = self.func(context) - def _sizeof(self, context): - return 0 + :param subcon: Construct instance, subcon to validate + :param valids: collection implementing __contains__, usually a list or set -#class Dynamic(Construct): -# """ -# Dynamically creates a construct and uses it for parsing and building. -# This allows you to create change the construction tree on the fly. -# Deprecated. -# -# Parameters: -# * name - the name of the construct -# * factoryfunc - a function that takes the context and returns a new -# construct object which will be used for parsing and building. -# -# Example: -# def factory(ctx): -# if ctx.bar == 8: -# return UBInt8("spam") -# if ctx.bar == 9: -# return String("spam", 9) -# -# Struct("foo", -# UBInt8("bar"), -# Dynamic("spam", factory), -# ) -# """ -# __slots__ = ["factoryfunc"] -# def __init__(self, name, factoryfunc): -# Construct.__init__(self, name, self.FLAG_COPY_CONTEXT) -# self.factoryfunc = factoryfunc -# self._set_flag(self.FLAG_DYNAMIC) -# def _parse(self, stream, context): -# return self.factoryfunc(context)._parse(stream, context) -# def _build(self, obj, stream, context): -# return self.factoryfunc(context)._build(obj, stream, context) -# def _sizeof(self, context): -# return self.factoryfunc(context)._sizeof(context) + :raises ValidationError: parsed or build value is not among valids -class LazyBound(Construct): + Example:: + + >>> d = OneOf(Byte, [1,2,3]) + >>> d.parse(b"\x01") + 1 + >>> d.parse(b"\xff") + construct.core.ValidationError: object failed validation: 255 """ - Lazily bound construct, useful for constructs that need to make cyclic - references (linked-lists, expression trees, etc.). + return ExprValidator(subcon, lambda obj,ctx: obj in valids) - Parameters: +def NoneOf(subcon, invalids): + r""" + Validates that the object is none of the listed values, both during parsing and building. - Example: - foo = Struct("foo", - UBInt8("bar"), - LazyBound("next", lambda: foo), - ) - """ - __slots__ = ["bindfunc", "bound"] - def __init__(self, name, bindfunc): - Construct.__init__(self, name) - self.bound = None - self.bindfunc = bindfunc - def _parse(self, stream, context): - if self.bound is None: - self.bound = self.bindfunc() - return self.bound._parse(stream, context) - def _build(self, obj, stream, context): - if self.bound is None: - self.bound = self.bindfunc() - self.bound._build(obj, stream, context) - def _sizeof(self, context): - if self.bound is None: - self.bound = self.bindfunc() - return self.bound._sizeof(context) + .. note:: For performance, `valids` should be a set or frozenset. + + :param subcon: Construct instance, subcon to validate + :param invalids: collection implementing __contains__, usually a list or set + + :raises ValidationError: parsed or build value is among invalids -class Pass(Construct): """ - A do-nothing construct, useful as the default case for Switch, or - to indicate Enums. - See also Switch and Enum. + return ExprValidator(subcon, lambda obj,ctx: obj not in invalids) + + +def Filter(predicate, subcon): + r""" + Filters a list leaving only the elements that passed through the predicate. + + :param subcon: Construct instance, usually Array GreedyRange Sequence + :param predicate: lambda that takes (obj, context) and returns a bool + + Can propagate any exception from the lambda, possibly non-ConstructError. - Notes: - * this construct is a singleton. do not try to instatiate it, as it - will not work... + Example:: - Example: - Pass + >>> d = Filter(obj_ != 0, Byte[:]) + >>> d.parse(b"\x00\x02\x00") + [2] + >>> d.build([0,1,0,2,0]) + b'\x01\x02' """ - __slots__ = [] - def _parse(self, stream, context): - pass - def _build(self, obj, stream, context): - assert obj is None - def _sizeof(self, context): - return 0 -Pass = Pass(None) + return ExprSymmetricAdapter(subcon, lambda obj,ctx: [x for x in obj if predicate(x,ctx)]) + + +class Slicing(Adapter): + r""" + Adapter for slicing a list. Works with GreedyRange and Sequence. + + :param subcon: Construct instance, subcon to slice + :param count: integer, expected number of elements, needed during building + :param start: integer for start index (or None for entire list) + :param stop: integer for stop index (or None for up-to-end) + :param step: integer, step (or 1 for every element) + :param empty: object, value to fill the list with, during building -class Terminator(Construct): + Example:: + + d = Slicing(Array(4,Byte), 4, 1, 3, empty=0) + assert d.parse(b"\x01\x02\x03\x04") == [2,3] + assert d.build([2,3]) == b"\x00\x02\x03\x00" + assert d.sizeof() == 4 """ - Asserts the end of the stream has been reached at the point it's placed. - You can use this to ensure no more unparsed data follows. + def __init__(self, subcon, count, start, stop, step=1, empty=None): + super().__init__(subcon) + self.count = count + self.start = start + self.stop = stop + self.step = step + self.empty = empty + def _decode(self, obj, context, path): + return obj[self.start:self.stop:self.step] + def _encode(self, obj, context, path): + if self.start is None: + return obj + elif self.stop is None: + output = [self.empty] * self.count + output[self.start::self.step] = obj + else: + output = [self.empty] * self.count + output[self.start:self.stop:self.step] = obj + return output + - Notes: - * this construct is only meaningful for parsing. for building, it's - a no-op. - * this construct is a singleton. do not try to instatiate it, as it - will not work... +class Indexing(Adapter): + r""" + Adapter for indexing a list (getting a single item from that list). Works with Range and Sequence and their lazy equivalents. - Example: - Terminator + :param subcon: Construct instance, subcon to index + :param count: integer, expected number of elements, needed during building + :param index: integer, index of the list to get + :param empty: object, value to fill the list with, during building + + Example:: + + d = Indexing(Array(4,Byte), 4, 2, empty=0) + assert d.parse(b"\x01\x02\x03\x04") == 3 + assert d.build(3) == b"\x00\x00\x03\x00" + assert d.sizeof() == 4 """ - __slots__ = [] - def _parse(self, stream, context): - if stream.read(1): - raise TerminatorError("expected end of stream") - def _build(self, obj, stream, context): - assert obj is None - def _sizeof(self, context): - return 0 -Terminator = Terminator(None) + def __init__(self, subcon, count, index, empty=None): + super().__init__(subcon) + self.count = count + self.index = index + self.empty = empty + def _decode(self, obj, context, path): + return obj[self.index] + def _encode(self, obj, context, path): + output = [self.empty] * self.count + output[self.index] = obj + return output + + +#=============================================================================== +# end of file +#=============================================================================== diff --git a/construct/debug.py b/construct/debug.py index 39a116302..c486cfb4b 100644 --- a/construct/debug.py +++ b/construct/debug.py @@ -1,160 +1,160 @@ -""" -Debugging utilities for constructs -""" -import sys -import traceback -import pdb -import inspect -from core import Construct, Subconstruct -from lib import HexString, Container, ListContainer +from construct import * +from construct.lib import * +import sys, traceback, pdb, inspect class Probe(Construct): + r""" + Probe that dumps the context, and some stream content (peeks into it) to the screen to aid the debugging process. It can optionally limit itself to a single context entry, instead of printing entire context. + + :param into: optional, None by default, or context lambda + :param lookahead: optional, integer, number of bytes to dump from the stream + + Example:: + + >>> d = Struct( + ... "count" / Byte, + ... "items" / Byte[this.count], + ... Probe(lookahead=32), + ... ) + >>> d.parse(b"\x05abcde\x01\x02\x03") + + -------------------------------------------------- + Probe, path is (parsing), into is None + Stream peek: (hexlified) b'010203'... + Container: + count = 5 + items = ListContainer: + 97 + 98 + 99 + 100 + 101 + -------------------------------------------------- + + :: + + >>> d = Struct( + ... "count" / Byte, + ... "items" / Byte[this.count], + ... Probe(this.count), + ... ) + >>> d.parse(b"\x05abcde\x01\x02\x03") + + -------------------------------------------------- + Probe, path is (parsing), into is this.count + 5 + -------------------------------------------------- + """ - A probe: dumps the context, stack frames, and stream content to the screen - to aid the debugging process. - See also Debugger. - - Parameters: - * name - the display name - * show_stream - whether or not to show stream contents. default is True. - the stream must be seekable. - * show_context - whether or not to show the context. default is True. - * show_stack - whether or not to show the upper stack frames. default - is True. - * stream_lookahead - the number of bytes to dump when show_stack is set. - default is 100. - - Example: - Struct("foo", - UBInt8("a"), - Probe("between a and b"), - UBInt8("b"), - ) - """ - __slots__ = [ - "printname", "show_stream", "show_context", "show_stack", - "stream_lookahead" - ] - counter = 0 - - def __init__(self, name = None, show_stream = True, - show_context = True, show_stack = True, - stream_lookahead = 100): - Construct.__init__(self, None) - if name is None: - Probe.counter += 1 - name = "" % (Probe.counter,) - self.printname = name - self.show_stream = show_stream - self.show_context = show_context - self.show_stack = show_stack - self.stream_lookahead = stream_lookahead - def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self.printname) - def _parse(self, stream, context): - self.printout(stream, context) - def _build(self, obj, stream, context): - self.printout(stream, context) - def _sizeof(self, context): + + def __init__(self, into=None, lookahead=None): + super(Probe, self).__init__() + self.flagbuildnone = True + self.into = into + self.lookahead = lookahead + + def _parse(self, stream, context, path): + self.printout(stream, context, path) + + def _build(self, obj, stream, context, path): + self.printout(stream, context, path) + + def _sizeof(self, context, path): + self.printout(None, context, path) return 0 - - def printout(self, stream, context): - obj = Container() - if self.show_stream: - obj.stream_position = stream.tell() - follows = stream.read(self.stream_lookahead) - if not follows: - obj.following_stream_data = "EOF reached" + + def _emitparse(self, code): + return f"print({self.into})" if self.into else "print(this)" + + def _emitbuild(self, code): + return f"print({self.into})" if self.into else "print(this)" + + def printout(self, stream, context, path): + print("--------------------------------------------------") + print("Probe, path is %s, into is %r" % (path, self.into, )) + + if self.lookahead and stream is not None: + fallback = stream.tell() + datafollows = stream.read(self.lookahead) + stream.seek(fallback) + if datafollows: + print("Stream peek: (hexlified) %s..." % (hexlify(datafollows), )) + else: + print("Stream peek: EOF reached") + + if context is not None: + if self.into: + try: + subcontext = self.into(context) + print(subcontext) + except Exception: + print("Failed to compute %r on the context %r" % (self.into, context, )) else: - stream.seek(-len(follows), 1) - obj.following_stream_data = HexString(follows) - print - - if self.show_context: - obj.context = context - - if self.show_stack: - obj.stack = ListContainer() - frames = [s[0] for s in inspect.stack()][1:-1] - frames.reverse() - for f in frames: - a = Container() - a.__update__(f.f_locals) - obj.stack.append(a) - - print "=" * 80 - print "Probe", self.printname - print obj - print "=" * 80 + print(context) + print("--------------------------------------------------") + class Debugger(Subconstruct): - """ - A pdb-based debugger. When an exception occurs in the subcon, a debugger - will appear and allow you to debug the error (and even fix on-the-fly). + r""" + PDB-based debugger. When an exception occurs in the subcon, a debugger will appear and allow you to debug the error (and even fix it on-the-fly). + + :param subcon: Construct instance, subcon to debug - Parameters: - * subcon - the subcon to debug + Example:: - Example: - Debugger( - Enum(UBInt8("foo"), - a = 1, - b = 2, - c = 3 - ) - ) + >>> Debugger(Byte[3]).build([]) + + -------------------------------------------------- + Debugging exception of + path is (building) + File "/media/arkadiusz/MAIN/GitHub/construct/construct/debug.py", line 192, in _build + return self.subcon._build(obj, stream, context, path) + File "/media/arkadiusz/MAIN/GitHub/construct/construct/core.py", line 2149, in _build + raise RangeError("expected %d elements, found %d" % (count, len(obj))) + construct.core.RangeError: expected 3 elements, found 0 + + > /media/arkadiusz/MAIN/GitHub/construct/construct/core.py(2149)_build() + -> raise RangeError("expected %d elements, found %d" % (count, len(obj))) + (Pdb) q + -------------------------------------------------- """ - __slots__ = ["retval"] - def _parse(self, stream, context): + + def _parse(self, stream, context, path): try: - return self.subcon._parse(stream, context) + return self.subcon._parse(stream, context, path) except Exception: self.retval = NotImplemented - self.handle_exc("(you can set the value of 'self.retval', " - "which will be returned)") + self.handle_exc(path, msg="(you can set self.retval, which will be returned from method)") if self.retval is NotImplemented: raise else: return self.retval - def _build(self, obj, stream, context): + + def _build(self, obj, stream, context, path): try: - self.subcon._build(obj, stream, context) + return self.subcon._build(obj, stream, context, path) except Exception: - self.handle_exc() - def handle_exc(self, msg = None): - print "=" * 80 - print "Debugging exception of %s:" % (self.subcon,) - print "".join(traceback.format_exception(*sys.exc_info())[1:]) - if msg: - print msg - pdb.post_mortem(sys.exc_info()[2]) - print "=" * 80 - - - - - - - - - - - - - - - - - - - - - - - - + self.handle_exc(path) + def _sizeof(self, context, path): + try: + return self.subcon._sizeof(context, path) + except Exception: + self.handle_exc(path) + def _emitparse(self, code): + return self.subcon._compileparse(code) + def _emitbuild(self, code): + return self.subcon._compilebuild(code) + def handle_exc(self, path, msg=None): + print("--------------------------------------------------") + print("Debugging exception of %r" % (self.subcon, )) + print("path is %s" % (path, )) + print("".join(traceback.format_exception(*sys.exc_info())[1:])) + if msg: + print(msg) + pdb.post_mortem(sys.exc_info()[2]) + print("--------------------------------------------------") diff --git a/construct/expr.py b/construct/expr.py new file mode 100644 index 000000000..c1dd17957 --- /dev/null +++ b/construct/expr.py @@ -0,0 +1,256 @@ +import operator +if not hasattr(operator, "div"): + operator.div = operator.truediv + + +opnames = { + operator.add : "+", + operator.sub : "-", + operator.mul : "*", + operator.div : "/", + operator.floordiv : "//", + operator.mod : "%", + operator.pow : "**", + operator.xor : "^", + operator.lshift : "<<", + operator.rshift : ">>", + operator.and_ : "&", + operator.or_ : "|", + operator.not_ : "not", + operator.neg : "-", + operator.pos : "+", + operator.contains : "in", + operator.gt : ">", + operator.ge : ">=", + operator.lt : "<", + operator.le : "<=", + operator.eq : "==", + operator.ne : "!=", +} + + +class ExprMixin(object): + + def __add__(self, other): + return BinExpr(operator.add, self, other) + def __sub__(self, other): + return BinExpr(operator.sub, self, other) + def __mul__(self, other): + return BinExpr(operator.mul, self, other) + def __floordiv__(self, other): + return BinExpr(operator.floordiv, self, other) + def __truediv__(self, other): + return BinExpr(operator.div, self, other) + __div__ = __floordiv__ + def __mod__(self, other): + return BinExpr(operator.mod, self, other) + def __pow__(self, other): + return BinExpr(operator.pow, self, other) + def __xor__(self, other): + return BinExpr(operator.xor, self, other) + def __rshift__(self, other): + return BinExpr(operator.rshift, self, other) + def __lshift__(self, other): + return BinExpr(operator.lshift, self, other) + def __and__(self, other): + return BinExpr(operator.and_, self, other) + def __or__(self, other): + return BinExpr(operator.or_, self, other) + + def __radd__(self, other): + return BinExpr(operator.add, other, self) + def __rsub__(self, other): + return BinExpr(operator.sub, other, self) + def __rmul__(self, other): + return BinExpr(operator.mul, other, self) + def __rfloordiv__(self, other): + return BinExpr(operator.floordiv, other, self) + def __rtruediv__(self, other): + return BinExpr(operator.div, other, self) + __rdiv__ = __rfloordiv__ + def __rmod__(self, other): + return BinExpr(operator.mod, other, self) + def __rpow__(self, other): + return BinExpr(operator.pow, other, self) + def __rxor__(self, other): + return BinExpr(operator.xor, other, self) + def __rrshift__(self, other): + return BinExpr(operator.rshift, other, self) + def __rlshift__(self, other): + return BinExpr(operator.lshift, other, self) + def __rand__(self, other): + return BinExpr(operator.and_, other, self) + def __ror__(self, other): + return BinExpr(operator.or_, other, self) + + def __neg__(self): + return UniExpr(operator.neg, self) + def __pos__(self): + return UniExpr(operator.pos, self) + def __invert__(self): + return UniExpr(operator.not_, self) + __inv__ = __invert__ + + def __contains__(self, other): + return BinExpr(operator.contains, self, other) + def __gt__(self, other): + return BinExpr(operator.gt, self, other) + def __ge__(self, other): + return BinExpr(operator.ge, self, other) + def __lt__(self, other): + return BinExpr(operator.lt, self, other) + def __le__(self, other): + return BinExpr(operator.le, self, other) + def __eq__(self, other): + return BinExpr(operator.eq, self, other) + def __ne__(self, other): + return BinExpr(operator.ne, self, other) + + def __getstate__(self): + attrs = {} + if hasattr(self, "__dict__"): + attrs.update(self.__dict__) + slots = [] + c = self.__class__ + while c is not None: + if hasattr(c, "__slots__"): + slots.extend(c.__slots__) + c = c.__base__ + for name in slots: + if hasattr(self, name): + attrs[name] = getattr(self, name) + return attrs + + def __setstate__(self, attrs): + for name, value in attrs.items(): + setattr(self, name, value) + + +class UniExpr(ExprMixin): + + def __init__(self, op, operand): + self.op = op + self.operand = operand + + def __repr__(self): + return "%s %r" % (opnames[self.op], self.operand) + + def __str__(self): + return "%s %s" % (opnames[self.op], self.operand) + + def __call__(self, obj, *args): + operand = self.operand(obj) if callable(self.operand) else self.operand + return self.op(operand) + + +class BinExpr(ExprMixin): + + def __init__(self, op, lhs, rhs): + self.op = op + self.lhs = lhs + self.rhs = rhs + + def __repr__(self): + return "(%r %s %r)" % (self.lhs, opnames[self.op], self.rhs) + + def __str__(self): + return "(%s %s %s)" % (self.lhs, opnames[self.op], self.rhs) + + def __call__(self, obj, *args): + lhs = self.lhs(obj) if callable(self.lhs) else self.lhs + rhs = self.rhs(obj) if callable(self.rhs) else self.rhs + return self.op(lhs, rhs) + + +class Path(ExprMixin): + + def __init__(self, name, field=None, parent=None): + self.__name = name + self.__field = field + self.__parent = parent + + def __repr__(self): + if self.__parent is None: + return self.__name + else: + return "%s[%r]" % (self.__parent, self.__field) + + def __str__(self): + if self.__parent is None: + return self.__name + else: + return "%s[%r]" % (self.__parent, self.__field) + + def __call__(self, obj, *args): + if self.__parent is None: + return obj + else: + return self.__parent(obj)[self.__field] + + def __getfield__(self): + return self.__field + + def __getattr__(self, name): + return Path(self.__name, name, self) + + def __getitem__(self, name): + return Path(self.__name, name, self) + + +class Path2(ExprMixin): + + def __init__(self, name, index=None, parent=None): + self.__name = name + self.__index = index + self.__parent = parent + + def __repr__(self): + if self.__parent is None: + return self.__name + else: + return "%r[%r]" % (self.__parent, self.__index) + + def __call__(self, *args): + if self.__parent is None: + return args[1] + else: + return self.__parent(*args)[self.__index] + + def __getitem__(self, index): + return Path2(self.__name, index, self) + + +class FuncPath(ExprMixin): + + def __init__(self, func, operand=None): + self.__func = func + self.__operand = operand + + def __repr__(self): + if self.__operand is None: + return "%s_" % (self.__func.__name__) + else: + return "%s_(%r)" % (self.__func.__name__, self.__operand) + + def __str__(self): + if self.__operand is None: + return "%s_" % (self.__func.__name__) + else: + return "%s_(%s)" % (self.__func.__name__, self.__operand) + + def __call__(self, operand, *args): + if self.__operand is None: + return FuncPath(self.__func, operand) if callable(operand) else operand + else: + return self.__func(self.__operand(operand) if callable(self.__operand) else self.__operand) + + +this = Path("this") +obj_ = Path("obj_") +list_ = Path2("list_") + +len_ = FuncPath(len) +sum_ = FuncPath(sum) +min_ = FuncPath(min) +max_ = FuncPath(max) +abs_ = FuncPath(abs) diff --git a/construct/formats/data/__init__.py b/construct/formats/data/__init__.py deleted file mode 100644 index 50ce2de30..000000000 --- a/construct/formats/data/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -all sorts of raw data serialization (tcpdump capture files, etc.) -""" diff --git a/construct/formats/data/cap.py b/construct/formats/data/cap.py deleted file mode 100644 index 620a0b128..000000000 --- a/construct/formats/data/cap.py +++ /dev/null @@ -1,55 +0,0 @@ -""" -tcpdump capture file -""" -from construct import * -import time -from datetime import datetime - - -class MicrosecAdapter(Adapter): - def _decode(self, obj, context): - return datetime.fromtimestamp(obj[0] + (obj[1] / 1000000.0)) - def _encode(self, obj, context): - offset = time.mktime(*obj.timetuple()) - sec = int(offset) - usec = (offset - sec) * 1000000 - return (sec, usec) - -packet = Struct("packet", - MicrosecAdapter( - Sequence("time", - ULInt32("time"), - ULInt32("usec"), - ) - ), - ULInt32("length"), - Padding(4), - HexDumpAdapter(Field("data", lambda ctx: ctx.length)), -) - -cap_file = Struct("cap_file", - Padding(24), - Rename("packets", OptionalGreedyRange(packet)), -) - - -if __name__ == "__main__": - obj = cap_file.parse_stream(open("../../tests/cap2.cap", "rb")) - print len(obj.packets) - - - - - - - - - - - - - - - - - diff --git a/construct/formats/data/snoop.py b/construct/formats/data/snoop.py deleted file mode 100644 index 3963e44f1..000000000 --- a/construct/formats/data/snoop.py +++ /dev/null @@ -1,60 +0,0 @@ -""" -what : snoop v2 capture file. - how : http://tools.ietf.org/html/rfc1761 - who : jesse @ housejunkie . ca -""" - -import time -from construct import ( - Adapter, - Enum, - Field, - GreedyRange, - HexDumpAdapter, - Magic, - OptionalGreedyRange, - Padding, - Struct, - UBInt32, - ) - -class EpochTimeStampAdapter(Adapter): - """ Convert epoch timestamp <-> localtime """ - - def _decode(self, obj, context): - return time.ctime(obj) - def _encode(self, obj, context): - return int(time.mktime(time.strptime(obj))) - -packet_record = Struct("packet_record", - UBInt32("original_length"), - UBInt32("included_length"), - UBInt32("record_length"), - UBInt32("cumulative_drops"), - EpochTimeStampAdapter(UBInt32("timestamp_seconds")), - UBInt32("timestamp_microseconds"), - HexDumpAdapter(Field("data", lambda ctx: ctx.included_length)), - # 24 being the static length of the packet_record header - Padding(lambda ctx: ctx.record_length - ctx.included_length - 24), - ) - -datalink_type = Enum(UBInt32("datalink"), - IEEE802dot3 = 0, - IEEE802dot4 = 1, - IEEE802dot5 = 2, - IEEE802dot6 = 3, - ETHERNET = 4, - HDLC = 5, - CHARSYNC = 6, - IBMCHANNEL = 7, - FDDI = 8, - OTHER = 9, - UNASSIGNED = 10, - ) - -snoop_file = Struct("snoop", - Magic("snoop\x00\x00\x00"), - UBInt32("version"), # snoop v1 is deprecated - datalink_type, - OptionalGreedyRange(packet_record), - ) diff --git a/construct/formats/executable/__init__.py b/construct/formats/executable/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/construct/formats/executable/elf32.py b/construct/formats/executable/elf32.py deleted file mode 100644 index 8cd3f1599..000000000 --- a/construct/formats/executable/elf32.py +++ /dev/null @@ -1,156 +0,0 @@ -""" -Executable and Linkable Format (ELF), 32 bit, big or little endian -Used on *nix systems as a replacement of the older a.out format - -Big-endian support kindly submitted by Craig McQueen (mcqueen-c#edsrd1!yzk!co!jp) -""" -from construct import * - - -def elf32_body(ElfInt16, ElfInt32): - elf32_program_header = Struct("program_header", - Enum(ElfInt32("type"), - NULL = 0, - LOAD = 1, - DYNAMIC = 2, - INTERP = 3, - NOTE = 4, - SHLIB = 5, - PHDR = 6, - _default_ = Pass, - ), - ElfInt32("offset"), - ElfInt32("vaddr"), - ElfInt32("paddr"), - ElfInt32("file_size"), - ElfInt32("mem_size"), - ElfInt32("flags"), - ElfInt32("align"), - ) - - elf32_section_header = Struct("section_header", - ElfInt32("name_offset"), - Pointer(lambda ctx: ctx._.strtab_data_offset + ctx.name_offset, - CString("name") - ), - Enum(ElfInt32("type"), - NULL = 0, - PROGBITS = 1, - SYMTAB = 2, - STRTAB = 3, - RELA = 4, - HASH = 5, - DYNAMIC = 6, - NOTE = 7, - NOBITS = 8, - REL = 9, - SHLIB = 10, - DYNSYM = 11, - _default_ = Pass, - ), - ElfInt32("flags"), - ElfInt32("addr"), - ElfInt32("offset"), - ElfInt32("size"), - ElfInt32("link"), - ElfInt32("info"), - ElfInt32("align"), - ElfInt32("entry_size"), - OnDemandPointer(lambda ctx: ctx.offset, - HexDumpAdapter(Field("data", lambda ctx: ctx.size)) - ), - ) - - return Struct("body", - Enum(ElfInt16("type"), - NONE = 0, - RELOCATABLE = 1, - EXECUTABLE = 2, - SHARED = 3, - CORE = 4, - ), - Enum(ElfInt16("machine"), - NONE = 0, - M32 = 1, - SPARC = 2, - I386 = 3, - Motorolla68K = 4, - Motorolla88K = 5, - Intel860 = 7, - MIPS = 8, - _default_ = Pass - ), - ElfInt32("version"), - ElfInt32("entry"), - ElfInt32("ph_offset"), - ElfInt32("sh_offset"), - ElfInt32("flags"), - ElfInt16("header_size"), - ElfInt16("ph_entry_size"), - ElfInt16("ph_count"), - ElfInt16("sh_entry_size"), - ElfInt16("sh_count"), - ElfInt16("strtab_section_index"), - - # calculate the string table data offset (pointer arithmetics) - # ugh... anyway, we need it in order to read the section names, later on - Pointer(lambda ctx: - ctx.sh_offset + ctx.strtab_section_index * ctx.sh_entry_size + 16, - ElfInt32("strtab_data_offset"), - ), - - # program header table - Rename("program_table", - Pointer(lambda ctx: ctx.ph_offset, - Array(lambda ctx: ctx.ph_count, - elf32_program_header - ) - ) - ), - - # section table - Rename("sections", - Pointer(lambda ctx: ctx.sh_offset, - Array(lambda ctx: ctx.sh_count, - elf32_section_header - ) - ) - ), - ) - -elf32_body_little_endian = elf32_body(ULInt16, ULInt32) -elf32_body_big_endian = elf32_body(UBInt16, UBInt32) - -def Magic(name, value): - return Const(Bytes(name, len(value)), value) - -elf32_file = Struct("elf32_file", - Struct("identifier", - Magic("magic", "\x7fELF"), - Enum(Byte("file_class"), - NONE = 0, - CLASS32 = 1, - CLASS64 = 2, - ), - Enum(Byte("encoding"), - NONE = 0, - LSB = 1, - MSB = 2, - ), - Byte("version"), - Padding(9), - ), - Embedded(IfThenElse("body", lambda ctx: ctx.identifier.encoding == "LSB", - elf32_body_little_endian, - elf32_body_big_endian, - )), -) - - -if __name__ == "__main__": - obj = elf32_file.parse_stream(open("../../tests/_ctypes_test.so", "rb")) - #[s.data.value for s in obj.sections] - print obj - - - diff --git a/construct/formats/filesystem/__init__.py b/construct/formats/filesystem/__init__.py deleted file mode 100644 index 217ec839a..000000000 --- a/construct/formats/filesystem/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -""" -file systems on-disk formats (ext2, fat32, ntfs, ...) -and related disk formats (mbr, ...) -""" diff --git a/construct/formats/filesystem/ext2.py b/construct/formats/filesystem/ext2.py deleted file mode 100644 index 232c5beb4..000000000 --- a/construct/formats/filesystem/ext2.py +++ /dev/null @@ -1,157 +0,0 @@ -""" -Extension 2 (ext2) -Used in Linux systems -""" -from construct import * - - -Char = SLInt8 -UChar = ULInt8 -Short = SLInt16 -UShort = ULInt16 -Long = SLInt32 -ULong = ULInt32 - -def BlockPointer(name): - return Struct(name, - ULong("block_number"), - OnDemandPointer(lambda ctx: ctx["block_number"]), - ) - -superblock = Struct("superblock", - ULong('inodes_count'), - ULong('blocks_count'), - ULong('reserved_blocks_count'), - ULong('free_blocks_count'), - ULong('free_inodes_count'), - ULong('first_data_block'), - Enum(ULong('log_block_size'), - OneKB = 0, - TwoKB = 1, - FourKB = 2, - ), - Long('log_frag_size'), - ULong('blocks_per_group'), - ULong('frags_per_group'), - ULong('inodes_per_group'), - ULong('mtime'), - ULong('wtime'), - UShort('mnt_count'), - Short('max_mnt_count'), - Const(UShort('magic'), 0xEF53), - UShort('state'), - UShort('errors'), - Padding(2), - ULong('lastcheck'), - ULong('checkinterval'), - ULong('creator_os'), - ULong('rev_level'), - Padding(235 * 4), -) - -group_descriptor = Struct("group_descriptor", - ULong('block_bitmap'), - ULong('inode_bitmap'), - ULong('inode_table'), - UShort('free_blocks_count'), - UShort('free_inodes_count'), - UShort('used_dirs_count'), - Padding(14), -) - -inode = Struct("inode", - FlagsEnum(UShort('mode'), - IXOTH = 0x0001, - IWOTH = 0x0002, - IROTH = 0x0004, - IRWXO = 0x0007, - IXGRP = 0x0008, - IWGRP = 0x0010, - IRGRP = 0x0020, - IRWXG = 0x0038, - IXUSR = 0x0040, - IWUSR = 0x0080, - IRUSR = 0x0100, - IRWXU = 0x01C0, - ISVTX = 0x0200, - ISGID = 0x0400, - ISUID = 0x0800, - IFIFO = 0x1000, - IFCHR = 0x2000, - IFDIR = 0x4000, - IFBLK = 0x6000, - IFREG = 0x8000, - IFLNK = 0xC000, - IFSOCK = 0xA000, - IFMT = 0xF000, - ), - UShort('uid'), - ULong('size'), - ULong('atime'), - ULong('ctime'), - ULong('mtime'), - ULong('dtime'), - UShort('gid'), - UShort('links_count'), - ULong('blocks'), - FlagsEnum(ULong('flags'), - SecureDelete = 0x0001, - AllowUndelete = 0x0002, - Compressed = 0x0004, - Synchronous = 0x0008, - ), - Padding(4), - StrictRepeater(12, ULong('blocks')), - ULong("indirect1_block"), - ULong("indirect2_block"), - ULong("indirect3_block"), - ULong('version'), - ULong('file_acl'), - ULong('dir_acl'), - ULong('faddr'), - UChar('frag'), - Byte('fsize'), - Padding(10) , -) - -# special inodes -EXT2_BAD_INO = 1 -EXT2_ROOT_INO = 2 -EXT2_ACL_IDX_INO = 3 -EXT2_ACL_DATA_INO = 4 -EXT2_BOOT_LOADER_INO = 5 -EXT2_UNDEL_DIR_INO = 6 -EXT2_FIRST_INO = 11 - -directory_record = Struct("directory_entry", - ULong("inode"), - UShort("rec_length"), - UShort("name_length"), - Field("name", lambda ctx: ctx["name_length"]), - Padding(lambda ctx: ctx["rec_length"] - ctx["name_length"]) -) - - -print superblock.sizeof() - - - - - - - - - - - - - - - - - - - - - - diff --git a/construct/formats/filesystem/mbr.py b/construct/formats/filesystem/mbr.py deleted file mode 100644 index 007bb07ac..000000000 --- a/construct/formats/filesystem/mbr.py +++ /dev/null @@ -1,76 +0,0 @@ -""" -Master Boot Record -The first sector on disk, contains the partition table, bootloader, et al. - -http://www.win.tue.nl/~aeb/partitions/partition_types-1.html -""" -from construct import * - - -mbr = Struct("mbr", - HexDumpAdapter(Bytes("bootloader_code", 446)), - Array(4, - Struct("partitions", - Enum(Byte("state"), - INACTIVE = 0x00, - ACTIVE = 0x80, - ), - BitStruct("beginning", - Octet("head"), - Bits("sect", 6), - Bits("cyl", 10), - ), - Enum(UBInt8("type"), - Nothing = 0x00, - FAT12 = 0x01, - XENIX_ROOT = 0x02, - XENIX_USR = 0x03, - FAT16_old = 0x04, - Extended_DOS = 0x05, - FAT16 = 0x06, - FAT32 = 0x0b, - FAT32_LBA = 0x0c, - NTFS = 0x07, - LINUX_SWAP = 0x82, - LINUX_NATIVE = 0x83, - _default_ = Pass, - ), - BitStruct("ending", - Octet("head"), - Bits("sect", 6), - Bits("cyl", 10), - ), - UBInt32("sector_offset"), # offset from MBR in sectors - UBInt32("size"), # in sectors - ) - ), - Const(Bytes("signature", 2), "\x55\xAA"), -) - - - -if __name__ == "__main__": - cap1 = ( - "33C08ED0BC007CFB5007501FFCBE1B7CBF1B065057B9E501F3A4CBBDBE07B104386E00" - "7C09751383C510E2F4CD188BF583C610497419382C74F6A0B507B4078BF0AC3C0074FC" - "BB0700B40ECD10EBF2884E10E84600732AFE4610807E040B740B807E040C7405A0B607" - "75D2804602068346080683560A00E821007305A0B607EBBC813EFE7D55AA740B807E10" - "0074C8A0B707EBA98BFC1E578BF5CBBF05008A5600B408CD1372238AC1243F988ADE8A" - "FC43F7E38BD186D6B106D2EE42F7E239560A77237205394608731CB80102BB007C8B4E" - "028B5600CD1373514F744E32E48A5600CD13EBE48A560060BBAA55B441CD13723681FB" - "55AA7530F6C101742B61606A006A00FF760AFF76086A0068007C6A016A10B4428BF4CD" - "136161730E4F740B32E48A5600CD13EBD661F9C3496E76616C69642070617274697469" - "6F6E207461626C65004572726F72206C6F6164696E67206F7065726174696E67207379" - "7374656D004D697373696E67206F7065726174696E672073797374656D000000000000" - "0000000000000000000000000000000000000000000000000000000000000000000000" - "00000000000000000000000000000000002C4463B7BDB7BD00008001010007FEFFFF3F" - "000000371671020000C1FF0FFEFFFF761671028A8FDF06000000000000000000000000" - "000000000000000000000000000000000000000055AA" - ).decode("hex") - - print mbr.parse(cap1) - - - - - diff --git a/construct/formats/graphics/__init__.py b/construct/formats/graphics/__init__.py deleted file mode 100644 index 4abda02dd..000000000 --- a/construct/formats/graphics/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -""" -graphic file formats, including imagery (bmp, jpg, gif, png, ...), -models (3ds, ...), etc. -""" diff --git a/construct/formats/graphics/bmp.py b/construct/formats/graphics/bmp.py deleted file mode 100644 index 687421de0..000000000 --- a/construct/formats/graphics/bmp.py +++ /dev/null @@ -1,113 +0,0 @@ -""" -Windows/OS2 Bitmap (BMP) -this could have been a perfect show-case file format, but they had to make -it ugly (all sorts of alignment or -""" -from construct import * - - -#=============================================================================== -# pixels: uncompressed -#=============================================================================== -def UncompressedRows(subcon, align_to_byte = False): - """argh! lines must be aligned to a 4-byte boundary, and bit-pixel - lines must be aligned to full bytes...""" - if align_to_byte: - line_pixels = Bitwise( - Aligned(Array(lambda ctx: ctx.width, subcon), modulus = 8) - ) - else: - line_pixels = Array(lambda ctx: ctx.width, subcon) - return Array(lambda ctx: ctx.height, - Aligned(line_pixels, modulus = 4) - ) - -uncompressed_pixels = Switch("uncompressed", lambda ctx: ctx.bpp, - { - 1 : UncompressedRows(Bit("index"), align_to_byte = True), - 4 : UncompressedRows(Nibble("index"), align_to_byte = True), - 8 : UncompressedRows(Byte("index")), - 24 : UncompressedRows( - Sequence("rgb", Byte("red"), Byte("green"), Byte("blue")) - ), - } -) - -#=============================================================================== -# pixels: Run Length Encoding (RLE) 8 bit -#=============================================================================== -class RunLengthAdapter(Adapter): - def _encode(self, obj): - return len(obj), obj[0] - def _decode(self, obj): - length, value = obj - return [value] * length - -rle8pixel = RunLengthAdapter( - Sequence("rle8pixel", - Byte("length"), - Byte("value") - ) -) - -#=============================================================================== -# file structure -#=============================================================================== -bitmap_file = Struct("bitmap_file", - # header - Const(String("signature", 2), "BM"), - ULInt32("file_size"), - Padding(4), - ULInt32("data_offset"), - ULInt32("header_size"), - Enum(Alias("version", "header_size"), - v2 = 12, - v3 = 40, - v4 = 108, - ), - ULInt32("width"), - ULInt32("height"), - Value("number_of_pixels", lambda ctx: ctx.width * ctx.height), - ULInt16("planes"), - ULInt16("bpp"), # bits per pixel - Enum(ULInt32("compression"), - Uncompressed = 0, - RLE8 = 1, - RLE4 = 2, - Bitfields = 3, - JPEG = 4, - PNG = 5, - ), - ULInt32("image_data_size"), # in bytes - ULInt32("horizontal_dpi"), - ULInt32("vertical_dpi"), - ULInt32("colors_used"), - ULInt32("important_colors"), - - # palette (24 bit has no palette) - OnDemand( - Array(lambda ctx: 2 ** ctx.bpp if ctx.bpp <= 8 else 0, - Struct("palette", - Byte("blue"), - Byte("green"), - Byte("red"), - Padding(1), - ) - ) - ), - - # pixels - OnDemandPointer(lambda ctx: ctx.data_offset, - Switch("pixels", lambda ctx: ctx.compression, - { - "Uncompressed" : uncompressed_pixels, - } - ), - ), -) - - -if __name__ == "__main__": - obj = bitmap_file.parse_stream(open("../../tests/bitmap8.bmp", "rb")) - print obj - print repr(obj.pixels.value) diff --git a/construct/formats/graphics/emf.py b/construct/formats/graphics/emf.py deleted file mode 100644 index 6f4d0105f..000000000 --- a/construct/formats/graphics/emf.py +++ /dev/null @@ -1,198 +0,0 @@ -""" -Enhanced Meta File -""" -from construct import * - - -record_type = Enum(ULInt32("record_type"), - ABORTPATH = 68, - ANGLEARC = 41, - ARC = 45, - ARCTO = 55, - BEGINPATH = 59, - BITBLT = 76, - CHORD = 46, - CLOSEFIGURE = 61, - CREATEBRUSHINDIRECT = 39, - CREATEDIBPATTERNBRUSHPT = 94, - CREATEMONOBRUSH = 93, - CREATEPALETTE = 49, - CREATEPEN = 38, - DELETEOBJECT = 40, - ELLIPSE = 42, - ENDPATH = 60, - EOF = 14, - EXCLUDECLIPRECT = 29, - EXTCREATEFONTINDIRECTW = 82, - EXTCREATEPEN = 95, - EXTFLOODFILL = 53, - EXTSELECTCLIPRGN = 75, - EXTTEXTOUTA = 83, - EXTTEXTOUTW = 84, - FILLPATH = 62, - FILLRGN = 71, - FLATTENPATH = 65, - FRAMERGN = 72, - GDICOMMENT = 70, - HEADER = 1, - INTERSECTCLIPRECT = 30, - INVERTRGN = 73, - LINETO = 54, - MASKBLT = 78, - MODIFYWORLDTRANSFORM = 36, - MOVETOEX = 27, - OFFSETCLIPRGN = 26, - PAINTRGN = 74, - PIE = 47, - PLGBLT = 79, - POLYBEZIER = 2, - POLYBEZIER16 = 85, - POLYBEZIERTO = 5, - POLYBEZIERTO16 = 88, - POLYDRAW = 56, - POLYDRAW16 = 92, - POLYGON = 3, - POLYGON16 = 86, - POLYLINE = 4, - POLYLINE16 = 87, - POLYLINETO = 6, - POLYLINETO16 = 89, - POLYPOLYGON = 8, - POLYPOLYGON16 = 91, - POLYPOLYLINE = 7, - POLYPOLYLINE16 = 90, - POLYTEXTOUTA = 96, - POLYTEXTOUTW = 97, - REALIZEPALETTE = 52, - RECTANGLE = 43, - RESIZEPALETTE = 51, - RESTOREDC = 34, - ROUNDRECT = 44, - SAVEDC = 33, - SCALEVIEWPORTEXTEX = 31, - SCALEWINDOWEXTEX = 32, - SELECTCLIPPATH = 67, - SELECTOBJECT = 37, - SELECTPALETTE = 48, - SETARCDIRECTION = 57, - SETBKCOLOR = 25, - SETBKMODE = 18, - SETBRUSHORGEX = 13, - SETCOLORADJUSTMENT = 23, - SETDIBITSTODEVICE = 80, - SETMAPMODE = 17, - SETMAPPERFLAGS = 16, - SETMETARGN = 28, - SETMITERLIMIT = 58, - SETPALETTEENTRIES = 50, - SETPIXELV = 15, - SETPOLYFILLMODE = 19, - SETROP2 = 20, - SETSTRETCHBLTMODE = 21, - SETTEXTALIGN = 22, - SETTEXTCOLOR = 24, - SETVIEWPORTEXTEX = 11, - SETVIEWPORTORGEX = 12, - SETWINDOWEXTEX = 9, - SETWINDOWORGEX = 10, - SETWORLDTRANSFORM = 35, - STRETCHBLT = 77, - STRETCHDIBITS = 81, - STROKEANDFILLPATH = 63, - STROKEPATH = 64, - WIDENPATH = 66, - _default_ = Pass, -) - -generic_record = Struct("records", - record_type, - ULInt32("record_size"), # Size of the record in bytes - Union("params", # Parameters - Field("raw", lambda ctx: ctx._.record_size - 8), - Array(lambda ctx: (ctx._.record_size - 8) // 4, ULInt32("params")) - ), -) - -header_record = Struct("header_record", - Const(record_type, "HEADER"), - ULInt32("record_size"), # Size of the record in bytes - SLInt32("bounds_left"), # Left inclusive bounds - SLInt32("bounds_right"), # Right inclusive bounds - SLInt32("bounds_top"), # Top inclusive bounds - SLInt32("bounds_bottom"), # Bottom inclusive bounds - SLInt32("frame_left"), # Left side of inclusive picture frame - SLInt32("frame_right"), # Right side of inclusive picture frame - SLInt32("frame_top"), # Top side of inclusive picture frame - SLInt32("frame_bottom"), # Bottom side of inclusive picture frame - Const(ULInt32("signature"), 0x464D4520), - ULInt32("version"), # Version of the metafile - ULInt32("size"), # Size of the metafile in bytes - ULInt32("num_of_records"), # Number of records in the metafile - ULInt16("num_of_handles"), # Number of handles in the handle table - Padding(2), - ULInt32("description_size"), # Size of description string in WORDs - ULInt32("description_offset"), # Offset of description string in metafile - ULInt32("num_of_palette_entries"), # Number of color palette entries - SLInt32("device_width_pixels"), # Width of reference device in pixels - SLInt32("device_height_pixels"), # Height of reference device in pixels - SLInt32("device_width_mm"), # Width of reference device in millimeters - SLInt32("device_height_mm"), # Height of reference device in millimeters - - # description string - Pointer(lambda ctx: ctx.description_offset, - StringAdapter( - Array(lambda ctx: ctx.description_size, - Field("description", 2) - ) - ) - ), - - # padding up to end of record - Padding(lambda ctx: ctx.record_size - 88), -) - -emf_file = Struct("emf_file", - header_record, - Array(lambda ctx: ctx.header_record.num_of_records - 1, - generic_record - ), -) - - -if __name__ == "__main__": - obj = emf_file.parse_stream(open("../../tests/emf1.emf", "rb")) - print obj - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/construct/formats/graphics/png.py b/construct/formats/graphics/png.py deleted file mode 100644 index 3db70a3a6..000000000 --- a/construct/formats/graphics/png.py +++ /dev/null @@ -1,354 +0,0 @@ -""" -Portable Network Graphics (PNG) file format -Official spec: http://www.w3.org/TR/PNG - -Original code contributed by Robin Munn (rmunn at pobox dot com) -(although the code has been extensively reorganized to meet Construct's -coding conventions) -""" -from construct import * - - -#=============================================================================== -# utils -#=============================================================================== -def Coord(name, field=UBInt8): - return Struct(name, - field("x"), - field("y"), - ) - -compression_method = Enum(UBInt8("compression_method"), - deflate = 0, - _default_ = Pass -) - - -#=============================================================================== -# 11.2.3: PLTE - Palette -#=============================================================================== -plte_info = Struct("plte_info", - Value("num_entries", lambda ctx: ctx._.length / 3), - Array(lambda ctx: ctx.num_entries, - Struct("palette_entries", - UBInt8("red"), - UBInt8("green"), - UBInt8("blue"), - ), - ), -) - -#=============================================================================== -# 11.2.4: IDAT - Image data -#=============================================================================== -idat_info = OnDemand( - Field("idat_info", lambda ctx: ctx.length), -) - -#=============================================================================== -# 11.3.2.1: tRNS - Transparency -#=============================================================================== -trns_info = Switch("trns_info", lambda ctx: ctx._.image_header.color_type, - { - "greyscale": Struct("data", - UBInt16("grey_sample") - ), - "truecolor": Struct("data", - UBInt16("red_sample"), - UBInt16("blue_sample"), - UBInt16("green_sample"), - ), - "indexed": Array(lambda ctx: ctx.length, - UBInt8("alpha"), - ), - } -) - -#=============================================================================== -# 11.3.3.1: cHRM - Primary chromacities and white point -#=============================================================================== -chrm_info = Struct("chrm_info", - Coord("white_point", UBInt32), - Coord("red", UBInt32), - Coord("green", UBInt32), - Coord("blue", UBInt32), -) - -#=============================================================================== -# 11.3.3.2: gAMA - Image gamma -#=============================================================================== -gama_info = Struct("gama_info", - UBInt32("gamma"), -) - -#=============================================================================== -# 11.3.3.3: iCCP - Embedded ICC profile -#=============================================================================== -iccp_info = Struct("iccp_info", - CString("name"), - compression_method, - Field("compressed_profile", - lambda ctx: ctx._.length - (len(ctx.name) + 2) - ), -) - -#=============================================================================== -# 11.3.3.4: sBIT - Significant bits -#=============================================================================== -sbit_info = Switch("sbit_info", lambda ctx: ctx._.image_header.color_type, - { - "greyscale": Struct("data", - UBInt8("significant_grey_bits"), - ), - "truecolor": Struct("data", - UBInt8("significant_red_bits"), - UBInt8("significant_green_bits"), - UBInt8("significant_blue_bits"), - ), - "indexed": Struct("data", - UBInt8("significant_red_bits"), - UBInt8("significant_green_bits"), - UBInt8("significant_blue_bits"), - ), - "greywithalpha": Struct("data", - UBInt8("significant_grey_bits"), - UBInt8("significant_alpha_bits"), - ), - "truewithalpha": Struct("data", - UBInt8("significant_red_bits"), - UBInt8("significant_green_bits"), - UBInt8("significant_blue_bits"), - UBInt8("significant_alpha_bits"), - ), - } -) - -#=============================================================================== -# 11.3.3.5: sRGB - Standard RPG color space -#=============================================================================== -srgb_info = Struct("srgb_info", - Enum(UBInt8("rendering_intent"), - perceptual = 0, - relative_colorimetric = 1, - saturation = 2, - absolute_colorimetric = 3, - _default_ = Pass, - ), -) - -#=============================================================================== -# 11.3.4.3: tEXt - Textual data -#=============================================================================== -text_info = Struct("text_info", - CString("keyword"), - Field("text", lambda ctx: ctx._.length - (len(ctx.keyword) + 1)), -) - -#=============================================================================== -# 11.3.4.4: zTXt - Compressed textual data -#=============================================================================== -ztxt_info = Struct("ztxt_info", - CString("keyword"), - compression_method, - OnDemand( - Field("compressed_text", - # As with iCCP, length is chunk length, minus length of - # keyword, minus two: one byte for the null terminator, - # and one byte for the compression method. - lambda ctx: ctx._.length - (len(ctx.keyword) + 2), - ), - ), -) - -#=============================================================================== -# 11.3.4.5: iTXt - International textual data -#=============================================================================== -itxt_info = Struct("itxt_info", - CString("keyword"), - UBInt8("compression_flag"), - compression_method, - CString("language_tag"), - CString("translated_keyword"), - OnDemand( - Field("text", - lambda ctx: ctx._.length - (len(ctx.keyword) + - len(ctx.language_tag) + len(ctx.translated_keyword) + 5), - ), - ), -) - -#=============================================================================== -# 11.3.5.1: bKGD - Background color -#=============================================================================== -bkgd_info = Switch("bkgd_info", lambda ctx: ctx._.image_header.color_type, - { - "greyscale": Struct("data", - UBInt16("background_greyscale_value"), - Alias("grey", "background_greyscale_value"), - ), - "greywithalpha": Struct("data", - UBInt16("background_greyscale_value"), - Alias("grey", "background_greyscale_value"), - ), - "truecolor": Struct("data", - UBInt16("background_red_value"), - UBInt16("background_green_value"), - UBInt16("background_blue_value"), - Alias("red", "background_red_value"), - Alias("green", "background_green_value"), - Alias("blue", "background_blue_value"), - ), - "truewithalpha": Struct("data", - UBInt16("background_red_value"), - UBInt16("background_green_value"), - UBInt16("background_blue_value"), - Alias("red", "background_red_value"), - Alias("green", "background_green_value"), - Alias("blue", "background_blue_value"), - ), - "indexed": Struct("data", - UBInt16("background_palette_index"), - Alias("index", "background_palette_index"), - ), - } -) - -#=============================================================================== -# 11.3.5.2: hIST - Image histogram -#=============================================================================== -hist_info = Array(lambda ctx: ctx._.length / 2, - UBInt16("frequency"), -) - -#=============================================================================== -# 11.3.5.3: pHYs - Physical pixel dimensions -#=============================================================================== -phys_info = Struct("phys_info", - UBInt32("pixels_per_unit_x"), - UBInt32("pixels_per_unit_y"), - Enum(UBInt8("unit"), - unknown = 0, - meter = 1, - _default_ = Pass - ), -) - -#=============================================================================== -# 11.3.5.4: sPLT - Suggested palette -#=============================================================================== -def splt_info_data_length(ctx): - if ctx.sample_depth == 8: - entry_size = 6 - else: - entry_size = 10 - return (ctx._.length - len(ctx.name) - 2) / entry_size - -splt_info = Struct("data", - CString("name"), - UBInt8("sample_depth"), - Array(lambda ctx: splt_info_data_length, - IfThenElse("table", lambda ctx: ctx.sample_depth == 8, - # Sample depth 8 - Struct("table", - UBInt8("red"), - UBInt8("green"), - UBInt8("blue"), - UBInt8("alpha"), - UBInt16("frequency"), - ), - # Sample depth 16 - Struct("table", - UBInt16("red"), - UBInt16("green"), - UBInt16("blue"), - UBInt16("alpha"), - UBInt16("frequency"), - ), - ), - ), -) - -#=============================================================================== -# 11.3.6.1: tIME - Image last-modification time -#=============================================================================== -time_info = Struct("data", - UBInt16("year"), - UBInt8("month"), - UBInt8("day"), - UBInt8("hour"), - UBInt8("minute"), - UBInt8("second"), -) - -#=============================================================================== -# chunks -#=============================================================================== -default_chunk_info = OnDemand( - HexDumpAdapter(Field(None, lambda ctx: ctx.length)) -) - -chunk = Struct("chunk", - UBInt32("length"), - String("type", 4), - Switch("data", lambda ctx: ctx.type, - { - "PLTE" : plte_info, - "IEND" : Pass, - "IDAT" : idat_info, - "tRNS" : trns_info, - "cHRM" : chrm_info, - "gAMA" : gama_info, - "iCCP" : iccp_info, - "sBIT" : sbit_info, - "sRGB" : srgb_info, - "tEXt" : text_info, - "zTXt" : ztxt_info, - "iTXt" : itxt_info, - "bKGD" : bkgd_info, - "hIST" : hist_info, - "pHYs" : phys_info, - "sPLT" : splt_info, - "tIME" : time_info, - }, - default = default_chunk_info, - ), - UBInt32("crc"), -) - -image_header_chunk = Struct("image_header", - UBInt32("length"), - Const(String("type", 4), "IHDR"), - UBInt32("width"), - UBInt32("height"), - UBInt8("bit_depth"), - Enum(UBInt8("color_type"), - greyscale = 0, - truecolor = 2, - indexed = 3, - greywithalpha = 4, - truewithalpha = 6, - _default_ = Pass, - ), - compression_method, - Enum(UBInt8("filter_method"), - # "adaptive filtering with five basic filter types" - adaptive5 = 0, - _default_ = Pass, - ), - Enum(UBInt8("interlace_method"), - none = 0, - adam7 = 1, - _default_ = Pass, - ), - UBInt32("crc"), -) - - -#=============================================================================== -# the complete PNG file -#=============================================================================== -png_file = Struct("png", - Magic("\x89PNG\r\n\x1a\n"), - image_header_chunk, - Rename("chunks", GreedyRange(chunk)), -) diff --git a/construct/lib/__init__.py b/construct/lib/__init__.py index aaa03198d..5894f5708 100644 --- a/construct/lib/__init__.py +++ b/construct/lib/__init__.py @@ -1,10 +1,53 @@ -from binary import int_to_bin, bin_to_int, swap_bytes, encode_bin, decode_bin -from bitstream import BitStreamReader, BitStreamWriter -from container import (Container, FlagsContainer, ListContainer, - LazyContainer) -from hex import HexString, hexdump +from construct.lib.containers import * +from construct.lib.binary import * +from construct.lib.bitstream import * +from construct.lib.hex import * +from construct.lib.py3compat import * -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO +__all__ = [ + 'bits2bytes', + 'bits2integer', + 'byte2int', + 'bytes2bits', + 'bytes2integer', + 'bytes2str', + 'Container', + 'globalPrintFalseFlags', + 'globalPrintFullStrings', + 'HexDisplayedBytes', + 'HexDisplayedDict', + 'HexDisplayedInteger', + 'hexdump', + 'HexDumpDisplayedBytes', + 'HexDumpDisplayedDict', + 'hexlify', + 'hexundump', + 'int2byte', + 'integer2bits', + 'integer2bytes', + 'ListContainer', + 'ONWINDOWS', + 'PY', + 'PYPY', + 'RebufferedBytesIO', + 'RestreamedBytesIO', + 'setGlobalPrintFalseFlags', + 'setGlobalPrintFullStrings', + 'setGlobalPrintPrivateEntries', + 'str2bytes', + 'swapbitsinbytes', + 'swapbytes', + 'swapbytesinbits', + 'unhexlify', + # deprecated: + 'PY2', + 'PY3', + 'bytestringtype', + 'bytes2integers', + 'integers2bytes', + 'integertypes', + 'reprstring', + 'stringtypes', + 'unicodestringtype', + 'trimstring', +] diff --git a/construct/lib/binary.py b/construct/lib/binary.py index 93bf002a6..9f971cdbf 100644 --- a/construct/lib/binary.py +++ b/construct/lib/binary.py @@ -1,59 +1,169 @@ -def int_to_bin(number, width = 32): +from construct import * +from construct.lib import * +import binascii + + +def integer2bits(number, width, signed=False): + r""" + Converts an integer into its binary representation in a bit-string. Width is the amount of bits to generate. Each bit is represented as either \\x00 or \\x01. The most significant bit is first, big-endian. This is reverse to `bits2integer`. + + Examples: + + >>> integer2bits(19, 8) + b'\x00\x00\x00\x01\x00\x00\x01\x01' + """ + if not width >= 1: + raise ValueError(f"width {width} must be positive") + + if signed: + min = -(2 ** width // 2) + max = 2 ** width // 2 - 1 + else: + min = 0 + max = 2 ** width - 1 + if not min <= number <= max: + raise ValueError(f"number {number} is out of range (min={min}, max={max})") + if number < 0: number += 1 << width + bits = bytearray(width) i = width - 1 - bits = ["\x00"] * width while number and i >= 0: - bits[i] = "\x00\x01"[number & 1] + bits[i] = number & 1 number >>= 1 i -= 1 - return "".join(bits) + return bytes(bits) + + +def integer2bytes(number, width, signed=False): + r""" + Converts an integer into a byte-string. This is reverse to `bytes2integer`. + + Examples: + + >>> integer2bytes(19, 4) + '\x00\x00\x00\x13' + """ + # pypy does not check this in int.to_bytes, lazy fuckers + if not width >= 1: + raise ValueError(f"width {width} must be positive") + + try: + return int.to_bytes(number, width, 'big', signed=signed) + except OverflowError: + raise ValueError(f"number {number} does not fit width {width}, signed {signed}") + + +def bits2integer(data, signed=False): + r""" + Converts a bit-string into an integer. Set signed to interpret the number as a 2-s complement signed integer. This is reverse to `integer2bits`. + + Examples: + + >>> bits2integer(b"\x01\x00\x00\x01\x01") + 19 + """ + if data == b"": + raise ValueError("bit-string cannot be empty") -_bit_values = {"\x00" : 0, "\x01" : 1, "0" : 0, "1" : 1} -def bin_to_int(bits, signed = False): number = 0 - bias = 0 - if signed and _bit_values[bits[0]] == 1: - bits = bits[1:] - bias = 1 << len(bits) - for b in bits: - number <<= 1 - number |= _bit_values[b] - return number - bias - -def swap_bytes(bits, bytesize = 8): - i = 0 - l = len(bits) - output = [""] * ((l // bytesize) + 1) - j = len(output) - 1 - while i < l: - output[j] = bits[i : i + bytesize] - i += bytesize - j -= 1 - return "".join(output) - -_char_to_bin = {} -_bin_to_char = {} -for i in range(256): - ch = chr(i) - bin = int_to_bin(i, 8) - _char_to_bin[ch] = bin - _bin_to_char[bin] = ch - _bin_to_char[bin] = ch - -def encode_bin(data): - return "".join(_char_to_bin[ch] for ch in data) - -def decode_bin(data): - if len(data) & 7: - raise ValueError("Data length must be a multiple of 8") - - i = 0 - j = 0 - l = len(data) // 8 - chars = [""] * l - while j < l: - chars[j] = _bin_to_char[data[i:i+8]] - i += 8 - j += 1 - return "".join(chars) + for b in data: + number = (number << 1) | b + + if signed and data[0]: + bias = 1 << len(data) + return number - bias + else: + return number + + +def bytes2integer(data, signed=False): + r""" + Converts a byte-string into an integer. This is reverse to `integer2bytes`. + + Examples: + + >>> bytes2integer(b'\x00\x00\x00\x13') + 19 + """ + if data == b"": + raise ValueError("byte-string cannot be empty") + + return int.from_bytes(data, 'big', signed=signed) + + +BYTES2BITS_CACHE = {i:integer2bits(i,8) for i in range(256)} +def bytes2bits(data): + r""" + Converts between bit-string and byte-string representations, both as bytes type. + + Example: + + >>> bytes2bits(b'ab') + b"\x00\x01\x01\x00\x00\x00\x00\x01\x00\x01\x01\x00\x00\x00\x01\x00" + """ + return b"".join(BYTES2BITS_CACHE[b] for b in data) + + +BITS2BYTES_CACHE = {bytes2bits(int2byte(i)):i for i in range(256)} +def bits2bytes(data): + r""" + Converts between bit-string and byte-string representations, both as bytes type. Its length must be multiple of 8. + + Example: + + >>> bits2bytes(b"\x00\x01\x01\x00\x00\x00\x00\x01\x00\x01\x01\x00\x00\x00\x01\x00") + b'ab' + """ + if len(data) % 8 != 0: + raise ValueError(f"data length {len(data)} must be a multiple of 8") + return bytes(BITS2BYTES_CACHE[data[i:i+8]] for i in range(0,len(data),8)) + + +def swapbytes(data): + r""" + Performs an endianness swap on byte-string. + + Example: + + >>> swapbytes(b'abcd') + b'dcba' + """ + return data[::-1] + + +def swapbytesinbits(data): + r""" + Performs an byte-swap within a bit-string. Its length must be multiple of 8. + + Example: + + >>> swapbytesinbits(b'0000000011111111') + b'1111111100000000' + """ + if len(data) % 8 != 0: + raise ValueError(f"little-endianness is only defined if data length {len(data)} is multiple of 8") + return b"".join(data[i:i+8] for i in reversed(range(0,len(data),8))) + + +SWAPBITSINBYTES_CACHE = {i:byte2int(bits2bytes(swapbytes(bytes2bits(int2byte(i))))) for i in range(256)} +def swapbitsinbytes(data): + r""" + Performs a bit-reversal on each byte within a byte-string. + + Example: + + >>> swapbitsinbytes(b"\xf0\x00") + b"\x0f\x00" + """ + return bytes(SWAPBITSINBYTES_CACHE[b] for b in data) + + +def hexlify(data): + """Returns binascii.hexlify(data).""" + return binascii.hexlify(data) + + +def unhexlify(data): + """Returns binascii.unhexlify(data).""" + return binascii.unhexlify(data) diff --git a/construct/lib/bitstream.py b/construct/lib/bitstream.py index ff3d93ca5..baa44d717 100644 --- a/construct/lib/bitstream.py +++ b/construct/lib/bitstream.py @@ -1,77 +1,147 @@ -from construct.lib.binary import encode_bin, decode_bin +from io import BlockingIOError +from time import sleep +from sys import maxsize -class BitStreamReader(object): - __slots__ = ["substream", "buffer", "total_size"] +class RestreamedBytesIO(object): - def __init__(self, substream): + def __init__(self, substream, decoder, decoderunit, encoder, encoderunit): self.substream = substream - self.total_size = 0 - self.buffer = "" + self.encoder = encoder + self.encoderunit = encoderunit + self.decoder = decoder + self.decoderunit = decoderunit + self.rbuffer = b"" + self.wbuffer = b"" + self.sincereadwritten = 0 + + def read(self, count=None): + if count is None: + while True: + data = self.substream.read(self.decoderunit) + if data is None or len(data) == 0: + break + self.rbuffer += self.decoder(data) + data, self.rbuffer = self.rbuffer, b'' + self.sincereadwritten += len(data) + return data + + else: + if count < 0: + raise ValueError("count cannot be negative") + while len(self.rbuffer) < count: + data = self.substream.read(self.decoderunit) + if data is None or len(data) == 0: + return b'' + self.rbuffer += self.decoder(data) + data, self.rbuffer = self.rbuffer[:count], self.rbuffer[count:] + self.sincereadwritten += count + return data + + def write(self, data): + self.wbuffer += data + datalen = len(data) + while len(self.wbuffer) >= self.encoderunit: + data, self.wbuffer = self.wbuffer[:self.encoderunit], self.wbuffer[self.encoderunit:] + self.substream.write(self.encoder(data)) + self.sincereadwritten += datalen + return datalen def close(self): - if self.total_size % 8 != 0: - raise ValueError("total size of read data must be a multiple of 8", - self.total_size) + if len(self.rbuffer): + raise ValueError("closing stream but %d unread bytes remain, %d is decoded unit" % (len(self.rbuffer), self.decoderunit)) + if len(self.wbuffer): + raise ValueError("closing stream but %d unwritten bytes remain, %d is encoded unit" % (len(self.wbuffer), self.encoderunit)) + + def seek(self, at, whence=0): + if whence == 0 and at == self.sincereadwritten: + pass + else: + raise IOError + + def seekable(self): + return False def tell(self): - return self.substream.tell() - - def seek(self, pos, whence = 0): - self.buffer = "" - self.total_size = 0 - self.substream.seek(pos, whence) - - def read(self, count): - if count < 0: - raise ValueError("count cannot be negative") - - l = len(self.buffer) - if count == 0: - data = "" - elif count <= l: - data = self.buffer[:count] - self.buffer = self.buffer[count:] - else: - data = self.buffer - count -= l - bytes = count // 8 - if count & 7: - bytes += 1 - buf = encode_bin(self.substream.read(bytes)) - data += buf[:count] - self.buffer = buf[count:] - self.total_size += len(data) - return data + """WARNING: tell is correct only on read-only and write-only instances.""" + return self.sincereadwritten -class BitStreamWriter(object): + def tellable(self): + return True - __slots__ = ["substream", "buffer", "pos"] - def __init__(self, substream): +class RebufferedBytesIO(object): + + def __init__(self, substream, tailcutoff=None): self.substream = substream - self.buffer = [] - self.pos = 0 + self.offset = 0 + self.rwbuffer = b"" + self.moved = 0 + self.tailcutoff = tailcutoff + + def read(self, count=None): + if count is None: + raise ValueError("count must be integer, reading until EOF not supported") + startsat = self.offset + endsat = startsat + count + if startsat < self.moved: + raise IOError("could not read because tail was cut off") + while self.moved + len(self.rwbuffer) < endsat: + try: + newdata = self.substream.read(128*1024) + except BlockingIOError: + newdata = None + if not newdata: + sleep(0) + continue + self.rwbuffer += newdata + data = self.rwbuffer[startsat-self.moved:endsat-self.moved] + self.offset += count + if self.tailcutoff is not None and self.moved < self.offset - self.tailcutoff: + removed = self.offset - self.tailcutoff - self.moved + self.moved += removed + self.rwbuffer = self.rwbuffer[removed:] + if len(data) < count: + raise IOError("could not read enough bytes, something went wrong") + return data - def close(self): - self.flush() + def write(self, data): + startsat = self.offset + endsat = startsat + len(data) + while self.moved + len(self.rwbuffer) < startsat: + newdata = self.substream.read(128*1024) + self.rwbuffer += newdata + if not newdata: + sleep(0) + self.rwbuffer = self.rwbuffer[:startsat-self.moved] + data + self.rwbuffer[endsat-self.moved:] + self.offset = endsat + if self.tailcutoff is not None and self.moved < self.offset - self.tailcutoff: + removed = self.offset - self.tailcutoff - self.moved + self.moved += removed + self.rwbuffer = self.rwbuffer[removed:] + return len(data) + + def seek(self, at, whence=0): + if whence == 0: + self.offset = at + return self.offset + elif whence == 1: + self.offset += at + return self.offset + else: + raise ValueError("this class seeks only with whence: 0 and 1 (excluded 2)") - def flush(self): - bytes = decode_bin("".join(self.buffer)) - self.substream.write(bytes) - self.buffer = [] - self.pos = 0 + def seekable(self): + return True def tell(self): - return self.substream.tell() + self.pos // 8 + return self.offset - def seek(self, pos, whence = 0): - self.flush() - self.substream.seek(pos, whence) + def tellable(self): + return True - def write(self, data): - if not data: - return - if type(data) is not str: - raise TypeError("data must be a string, not %r" % (type(data),)) - self.buffer.append(data) + def cachedfrom(self): + return self.moved + + def cachedto(self): + return self.moved + len(self.rwbuffer) diff --git a/construct/lib/container.py b/construct/lib/container.py deleted file mode 100644 index df0d321ee..000000000 --- a/construct/lib/container.py +++ /dev/null @@ -1,161 +0,0 @@ -""" -Various containers. -""" - -from UserDict import DictMixin -from pprint import pformat - -def recursion_lock(retval, lock_name = "__recursion_lock__"): - def decorator(func): - def wrapper(self, *args, **kw): - if getattr(self, lock_name, False): - return retval - setattr(self, lock_name, True) - try: - return func(self, *args, **kw) - finally: - setattr(self, lock_name, False) - wrapper.__name__ = func.__name__ - return wrapper - return decorator - -class Container(object, DictMixin): - """ - A generic container of attributes. - - Containers are the common way to express parsed data. - """ - - def __init__(self, **kw): - self.__dict__ = kw - - # The core dictionary interface. - - def __getitem__(self, name): - return self.__dict__[name] - - def __delitem__(self, name): - del self.__dict__[name] - - def __setitem__(self, name, value): - self.__dict__[name] = value - - def keys(self): - return self.__dict__.keys() - - # Extended dictionary interface. - - def update(self, other): - self.__dict__.update(other) - - __update__ = update - - def __contains__(self, value): - return value in self.__dict__ - - def iteritems(self): - return self.__dict__.iteritems() - - # Rich comparisons. - - def __eq__(self, other): - try: - return self.__dict__ == other.__dict__ - except AttributeError: - return False - - def __ne__(self, other): - return not self == other - - # Copy interface. - - def copy(self): - return self.__class__(**self.__dict__) - - __copy__ = copy - - # Iterator interface. - - def __iter__(self): - return iter(self.__dict__) - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, repr(self.__dict__)) - - def __str__(self): - return "%s(%s)" % (self.__class__.__name__, str(self.__dict__)) - -class FlagsContainer(Container): - """ - A container providing pretty-printing for flags. - - Only set flags are displayed. - """ - - @recursion_lock("<...>") - def __str__(self): - d = dict((k, self[k]) for k in self - if self[k] and not k.startswith("_")) - return "%s(%s)" % (self.__class__.__name__, pformat(d)) - -class ListContainer(list): - """ - A container for lists. - """ - - __slots__ = ["__recursion_lock__"] - - @recursion_lock("[...]") - def __str__(self): - return pformat(self) - -class LazyContainer(object): - - __slots__ = ["subcon", "stream", "pos", "context", "_value"] - - def __init__(self, subcon, stream, pos, context): - self.subcon = subcon - self.stream = stream - self.pos = pos - self.context = context - self._value = NotImplemented - - def __eq__(self, other): - try: - return self._value == other._value - except AttributeError: - return False - - def __ne__(self, other): - return not (self == other) - - def __str__(self): - return self.__pretty_str__() - - def __pretty_str__(self, nesting = 1, indentation = " "): - if self._value is NotImplemented: - text = "" - elif hasattr(self._value, "__pretty_str__"): - text = self._value.__pretty_str__(nesting, indentation) - else: - text = repr(self._value) - return "%s: %s" % (self.__class__.__name__, text) - - def read(self): - self.stream.seek(self.pos) - return self.subcon._parse(self.stream, self.context) - - def dispose(self): - self.subcon = None - self.stream = None - self.context = None - self.pos = None - - def _get_value(self): - if self._value is NotImplemented: - self._value = self.read() - return self._value - - value = property(_get_value) - - has_value = property(lambda self: self._value is not NotImplemented) diff --git a/construct/lib/containers.py b/construct/lib/containers.py new file mode 100644 index 000000000..66b6ef98b --- /dev/null +++ b/construct/lib/containers.py @@ -0,0 +1,290 @@ +from construct.lib.py3compat import * +import re +import sys + + +globalPrintFullStrings = False +globalPrintFalseFlags = False +globalPrintPrivateEntries = False + + +def setGlobalPrintFullStrings(enabled=False): + r""" + When enabled, Container __str__ produces full content of bytes and unicode strings, otherwise and by default, it produces truncated output (16 bytes and 32 characters). + + :param enabled: bool + """ + global globalPrintFullStrings + globalPrintFullStrings = enabled + + +def setGlobalPrintFalseFlags(enabled=False): + r""" + When enabled, Container __str__ that was produced by FlagsEnum parsing prints all values, otherwise and by default, it prints only the values that are True. + + :param enabled: bool + """ + global globalPrintFalseFlags + globalPrintFalseFlags = enabled + + +def setGlobalPrintPrivateEntries(enabled=False): + r""" + When enabled, Container __str__ shows keys like _ _index _etc, otherwise and by default, it hides those keys. __repr__ never shows private entries. + + :param enabled: bool + """ + global globalPrintPrivateEntries + globalPrintPrivateEntries = enabled + + +def recursion_lock(retval="", lock_name="__recursion_lock__"): + """Used internally.""" + def decorator(func): + def wrapper(self, *args, **kw): + if getattr(self, lock_name, False): + return retval + setattr(self, lock_name, True) + try: + return func(self, *args, **kw) + finally: + delattr(self, lock_name) + + wrapper.__name__ = func.__name__ + return wrapper + + return decorator + + +def value_to_string(value): + if value.__class__.__name__ == "EnumInteger": + return "(enum) (unknown) %s" % (value, ) + + if value.__class__.__name__ == "EnumIntegerString": + return "(enum) %s %s" % (value, value.intvalue, ) + + if value.__class__.__name__ in ["HexDisplayedBytes", "HexDumpDisplayedBytes"]: + return str(value) + + if isinstance(value, bytes): + printingcap = 16 + if len(value) <= printingcap or globalPrintFullStrings: + return "%s (total %d)" % (repr(value), len(value)) + return "%s... (truncated, total %d)" % (repr(value[:printingcap]), len(value)) + + if isinstance(value, str): + printingcap = 32 + if len(value) <= printingcap or globalPrintFullStrings: + return "%s (total %d)" % (repr(value), len(value)) + return "%s... (truncated, total %d)" % (repr(value[:printingcap]), len(value)) + + return str(value) + + +class Container(dict): + # NOTE: be careful when working with these objects. Any method can be shadowed, so instead of doing `self.items()` you should do `dict.items(self)`. Operation that use methods implicitly (such as `x in self` or `self[k]`) will work as usual. + r""" + Generic ordered dictionary that allows both key and attribute access, and preserves key order by insertion. Adding keys is preferred using \*\*entrieskw. Equality does NOT check item order. Also provides regex searching. + + Example:: + + >>> Container() + >>> Container([("name", "anonymous"), ("age", 21)]) + >>> Container(name="anonymous", age=21) + >>> Container(dict2) + >>> Container(container2) + + :: + + >>> print(repr(obj)) + Container(text='utf8 decoded string...', value=123) + >>> print(obj) + Container + text = u'utf8 decoded string...' (total 22) + value = 123 + """ + __slots__ = ('__dict__', '__recursion_lock__') + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.__dict__ = self + + def copy(self, /): + return self.__class__(self) + + def __copy__(self, /): + return self.__class__.copy(self) + + # this is required because otherwise copy.deepcopy() will + # copy self and self.__dict__ separately for some reason + def __deepcopy__(self, _, /): + return self.__class__.copy(self) + + def __dir__(self, /): + """For auto completion of attributes based on container values.""" + return list(self.__class__.keys(self)) + list(self.__class__.__dict__) + dir(super(Container, self)) + + def __eq__(self, other, /): + if self is other: + return True + if not isinstance(other, dict): + return False + def isequal(v1, v2): + if v1.__class__.__name__ == "ndarray" or v2.__class__.__name__ == "ndarray": + import numpy + return numpy.array_equal(v1, v2) + return v1 == v2 + for k, v in self.__class__.items(self): + if isinstance(k, str) and k.startswith("_"): + continue + if k not in other or not isequal(v, other[k]): + return False + for k, v in other.__class__.items(other): + if isinstance(k, str) and k.startswith("_"): + continue + if k not in self or not isequal(v, self[k]): + return False + return True + + def __ne__(self, other, /): + return not self == other + + @recursion_lock() + def __repr__(self, /): + parts = [] + for k, v in self.__class__.items(self): + if isinstance(k, str) and k.startswith("_"): + continue + parts.append(f'{k}={v!r}') + return "Container(%s)" % ", ".join(parts) + + @recursion_lock() + def __str__(self, /): + indentation = "\n " + text = ["Container: "] + isflags = getattr(self, "_flagsenum", False) + for k, v in self.__class__.items(self): + if isinstance(k, str) and k.startswith("_") and not globalPrintPrivateEntries: + continue + if isflags and not v and not globalPrintFalseFlags: + continue + text.extend([indentation, str(k), " = ", indentation.join(value_to_string(v).split("\n"))]) + return "".join(text) + + def _search(self, compiled_pattern, search_all, /): + items = [] + for key, value in self.__class__.items(self): + try: + if isinstance(value, (Container, ListContainer)): + ret = value.__class__._search(value, compiled_pattern, search_all) + if ret is not None: + if search_all: + items.extend(ret) + else: + return ret + elif compiled_pattern.match(key): + if search_all: + items.append(value) + else: + return value + except Exception: + pass + if search_all: + return items + else: + return None + + def search(self, pattern): + """ + Searches a container (non-recursively) using regex. + """ + compiled_pattern = re.compile(pattern) + return self.__class__._search(self, compiled_pattern, False) + + def search_all(self, pattern): + """ + Searches a container (recursively) using regex. + """ + compiled_pattern = re.compile(pattern) + return self.__class__._search(self, compiled_pattern, True) + + def __getstate__(self, /): + """ + Used by pickle to serialize an instance to a dict. + """ + return dict(self) + + def __setstate__(self, state, /): + """ + Used by pickle to de-serialize from a dict. + """ + self.__class__.clear(self) + self.__class__.update(self, state) + + +class ListContainer(list): + r""" + Generic container like list. Provides pretty-printing. Also provides regex searching. + + Example:: + + >>> ListContainer() + >>> ListContainer([1, 2, 3]) + + :: + + >>> obj + ListContainer([1, 2, 3]) + >>> print(repr(obj)) + ListContainer([1, 2, 3]) + >>> print(obj) + ListContainer + 1 + 2 + 3 + """ + + @recursion_lock() + def __repr__(self, /): + return "ListContainer(%s)" % (list.__repr__(self),) + + @recursion_lock() + def __str__(self, /): + indentation = "\n " + text = ["ListContainer: "] + for k in self: + text.append(indentation) + lines = value_to_string(k).split("\n") + text.append(indentation.join(lines)) + return "".join(text) + + def _search(self, compiled_pattern, search_all, /): + items = [] + for item in self: + try: + ret = item.__class__._search(item, compiled_pattern, search_all) + except Exception: + continue + if ret is not None: + if search_all: + items.extend(ret) + else: + return ret + if search_all: + return items + else: + return None + + def search(self, pattern): + """ + Searches a container (non-recursively) using regex. + """ + compiled_pattern = re.compile(pattern) + return self._search(compiled_pattern, False) + + def search_all(self, pattern): + """ + Searches a container (recursively) using regex. + """ + compiled_pattern = re.compile(pattern) + return self._search(compiled_pattern, True) diff --git a/construct/lib/hex.py b/construct/lib/hex.py index 049e2964f..364f6c851 100644 --- a/construct/lib/hex.py +++ b/construct/lib/hex.py @@ -1,35 +1,94 @@ -_printable = dict((chr(i), ".") for i in range(256)) -_printable.update((chr(i), chr(i)) for i in range(32, 128)) +from construct.lib.py3compat import * +import binascii -def hexdump(data, linesize): - prettylines = [] - if len(data) < 65536: - fmt = "%%04X %%-%ds %%s" - else: - fmt = "%%08X %%-%ds %%s" - fmt = fmt % (3 * linesize - 1,) - for i in xrange(0, len(data), linesize): - line = data[i : i + linesize] - hextext = " ".join(b.encode("hex") for b in line) - rawtext = "".join(_printable[b] for b in line) - prettylines.append(fmt % (i, hextext, rawtext)) - return prettylines - -class HexString(str): - """ - represents a string that will be hex-dumped (only via __pretty_str__). - this class derives of str, and behaves just like a normal string in all - other contexts. - """ - def __init__(self, data, linesize = 16): - self.linesize = linesize +class HexDisplayedInteger(int): + """Used internally.""" + def __str__(self): + return "0x" + format(self, self.fmtstr).upper() - def __new__(cls, data, *args, **kwargs): - return str.__new__(cls, data) + @staticmethod + def new(intvalue, fmtstr): + obj = HexDisplayedInteger(intvalue) + obj.fmtstr = fmtstr + return obj +class HexDisplayedBytes(bytes): + """Used internally.""" def __str__(self): - if not self: - return "''" - sep = "\n" - return sep + sep.join(hexdump(self, self.linesize)) + if not hasattr(self, "render"): + self.render = "unhexlify(%r)" % (binascii.hexlify(self).decode(), ) + return self.render + +class HexDisplayedDict(dict): + """Used internally.""" + def __str__(self): + if not hasattr(self, "render"): + self.render = "unhexlify(%r)" % (binascii.hexlify(self["data"]).decode(), ) + return self.render + +class HexDumpDisplayedBytes(bytes): + """Used internally.""" + def __str__(self): + if not hasattr(self, "render"): + self.render = hexdump(self, 16) + return self.render + +class HexDumpDisplayedDict(dict): + """Used internally.""" + def __str__(self): + if not hasattr(self, "render"): + self.render = hexdump(self["data"], 16) + return self.render + + +# Map an integer in the inclusive range 0-255 to its string byte representation +PRINTABLE = [bytes2str(int2byte(i)) if 32 <= i < 128 else '.' for i in range(256)] +HEXPRINT = [format(i, '02X') for i in range(256)] + + +def hexdump(data, linesize): + r""" + Turns bytes into a unicode string of the format: + + :: + + >>>print(hexdump(b'0' * 100, 16)) + hexundump(\"\"\" + 0000 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 0000000000000000 + 0010 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 0000000000000000 + 0020 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 0000000000000000 + 0030 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 0000000000000000 + 0040 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 0000000000000000 + 0050 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 0000000000000000 + 0060 30 30 30 30 0000 + \"\"\") + """ + if len(data) < 16**4: + fmt = "%%04X %%-%ds %%s" % (3*linesize-1,) + elif len(data) < 16**8: + fmt = "%%08X %%-%ds %%s" % (3*linesize-1,) + else: + raise ValueError("hexdump cannot process more than 16**8 or 4294967296 bytes") + prettylines = [] + prettylines.append('hexundump("""') + for i in range(0, len(data), linesize): + line = data[i:i+linesize] + hextext = " ".join(HEXPRINT[b] for b in line) + rawtext = "".join(PRINTABLE[b] for b in line) + prettylines.append(fmt % (i, str(hextext), str(rawtext))) + prettylines.append('""")') + prettylines.append("") + return "\n".join(prettylines) + + +def hexundump(data, linesize): + r""" + Reverse of `hexdump`. + """ + raw = [] + for line in data.split("\n")[1:-2]: + line = line[line.find(" "):].lstrip() + bytes = [int2byte(int(s,16)) for s in line[:3*linesize].split()] + raw.extend(bytes) + return b"".join(raw) diff --git a/construct/lib/py3compat.py b/construct/lib/py3compat.py new file mode 100644 index 000000000..2dbd4ea4b --- /dev/null +++ b/construct/lib/py3compat.py @@ -0,0 +1,43 @@ +import sys +import platform + +PY = sys.version_info[:2] +PYPY = '__pypy__' in sys.builtin_module_names +ONWINDOWS = platform.system() == "Windows" + +INT2BYTE_CACHE = {i: bytes([i]) for i in range(256)} + +# these 2 function probably should be moved to construct.lib.binary +def int2byte(character: int) -> bytes: + """Converts integer in range 0..255 into 1-byte string.""" + return INT2BYTE_CACHE[character] + + +def byte2int(character: bytes) -> int: + """Converts 1-byte string into integer in range 0..255.""" + return character[0] + +# these 2 probably should be inlined where they are used +def str2bytes(string: str) -> bytes: + """Converts '...' string into b'...' string. On PY2 they are equivalent. On PY3 its utf8 encoded.""" + return string.encode("utf8") + + +def bytes2str(string: bytes) -> str: + """Converts b'...' string into '...' string. On PY2 they are equivalent. On PY3 its utf8 decoded.""" + return string.decode("utf8") + +# Deprecated, kept for backwards compatibility: +PY2 = False +PY3 = True +stringtypes = (bytes, str) +integertypes = (int,) +unicodestringtype = str +bytestringtype = bytes +reprstring = repr +integers2bytes = bytes +bytes2integers = list + +def trimstring(data: 'str | bytes') -> str: + """Trims b- u- prefix""" + return repr(data).lstrip('b') diff --git a/construct/macros.py b/construct/macros.py deleted file mode 100644 index cfdcf465c..000000000 --- a/construct/macros.py +++ /dev/null @@ -1,634 +0,0 @@ -from construct.lib import BitStreamReader, BitStreamWriter, encode_bin, decode_bin -from construct.core import (Struct, MetaField, StaticField, FormatField, - OnDemand, Pointer, Switch, Value, RepeatUntil, MetaArray, Sequence, Range, - Select, Pass, SizeofError, Buffered, Restream, Reconfig) -from construct.adapters import (BitIntegerAdapter, PaddingAdapter, - ConstAdapter, CStringAdapter, LengthValueAdapter, IndexingAdapter, - PaddedStringAdapter, FlagsAdapter, StringAdapter, MappingAdapter) - - -#=============================================================================== -# fields -#=============================================================================== -def Field(name, length): - """ - A field consisting of a specified number of bytes. - - :param str name: the name of the field - :param length: the length of the field. the length can be either an integer - (StaticField), or a function that takes the context as an argument and - returns the length (MetaField) - """ - if callable(length): - return MetaField(name, length) - else: - return StaticField(name, length) - -def BitField(name, length, swapped = False, signed = False, bytesize = 8): - """ - BitFields, as the name suggests, are fields that operate on raw, unaligned - bits, and therefore must be enclosed in a BitStruct. Using them is very - similar to all normal fields: they take a name and a length (in bits). - - :param str name: name of the field - :param int length: number of bits in the field, or a function that takes - the context as its argument and returns the length - :param bool swapped: whether the value is byte-swapped - :param bool signed: whether the value is signed - :param int bytesize: number of bits per byte, for byte-swapping - - >>> foo = BitStruct("foo", - ... BitField("a", 3), - ... Flag("b"), - ... Padding(3), - ... Nibble("c"), - ... BitField("d", 5), - ... ) - >>> foo.parse("\\xe1\\x1f") - Container(a = 7, b = False, c = 8, d = 31) - >>> foo = BitStruct("foo", - ... BitField("a", 3), - ... Flag("b"), - ... Padding(3), - ... Nibble("c"), - ... Struct("bar", - ... Nibble("d"), - ... Bit("e"), - ... ) - ... ) - >>> foo.parse("\\xe1\\x1f") - Container(a = 7, b = False, bar = Container(d = 15, e = 1), c = 8) - """ - - return BitIntegerAdapter(Field(name, length), - length, - swapped=swapped, - signed=signed, - bytesize=bytesize - ) - -def Padding(length, pattern = "\x00", strict = False): - r"""a padding field (value is discarded) - * length - the length of the field. the length can be either an integer, - or a function that takes the context as an argument and returns the - length - * pattern - the padding pattern (character) to use. default is "\x00" - * strict - whether or not to raise an exception is the actual padding - pattern mismatches the desired pattern. default is False. - """ - return PaddingAdapter(Field(None, length), - pattern = pattern, - strict = strict, - ) - -def Flag(name, truth = 1, falsehood = 0, default = False): - """ - A flag. - - Flags are usually used to signify a Boolean value, and this construct - maps values onto the ``bool`` type. - - .. note:: This construct works with both bit and byte contexts. - - .. warning:: Flags default to False, not True. This is different from the - C and Python way of thinking about truth, and may be subject to change - in the future. - - :param str name: field name - :param int truth: value of truth (default 1) - :param int falsehood: value of falsehood (default 0) - :param bool default: default value (default False) - """ - - return SymmetricMapping(Field(name, 1), - {True : chr(truth), False : chr(falsehood)}, - default = default, - ) - -#=============================================================================== -# field shortcuts -#=============================================================================== -def Bit(name): - """a 1-bit BitField; must be enclosed in a BitStruct""" - return BitField(name, 1) -def Nibble(name): - """a 4-bit BitField; must be enclosed in a BitStruct""" - return BitField(name, 4) -def Octet(name): - """an 8-bit BitField; must be enclosed in a BitStruct""" - return BitField(name, 8) - -def UBInt8(name): - """unsigned, big endian 8-bit integer""" - return FormatField(name, ">", "B") -def UBInt16(name): - """unsigned, big endian 16-bit integer""" - return FormatField(name, ">", "H") -def UBInt32(name): - """unsigned, big endian 32-bit integer""" - return FormatField(name, ">", "L") -def UBInt64(name): - """unsigned, big endian 64-bit integer""" - return FormatField(name, ">", "Q") - -def SBInt8(name): - """signed, big endian 8-bit integer""" - return FormatField(name, ">", "b") -def SBInt16(name): - """signed, big endian 16-bit integer""" - return FormatField(name, ">", "h") -def SBInt32(name): - """signed, big endian 32-bit integer""" - return FormatField(name, ">", "l") -def SBInt64(name): - """signed, big endian 64-bit integer""" - return FormatField(name, ">", "q") - -def ULInt8(name): - """unsigned, little endian 8-bit integer""" - return FormatField(name, "<", "B") -def ULInt16(name): - """unsigned, little endian 16-bit integer""" - return FormatField(name, "<", "H") -def ULInt32(name): - """unsigned, little endian 32-bit integer""" - return FormatField(name, "<", "L") -def ULInt64(name): - """unsigned, little endian 64-bit integer""" - return FormatField(name, "<", "Q") - -def SLInt8(name): - """signed, little endian 8-bit integer""" - return FormatField(name, "<", "b") -def SLInt16(name): - """signed, little endian 16-bit integer""" - return FormatField(name, "<", "h") -def SLInt32(name): - """signed, little endian 32-bit integer""" - return FormatField(name, "<", "l") -def SLInt64(name): - """signed, little endian 64-bit integer""" - return FormatField(name, "<", "q") - -def UNInt8(name): - """unsigned, native endianity 8-bit integer""" - return FormatField(name, "=", "B") -def UNInt16(name): - """unsigned, native endianity 16-bit integer""" - return FormatField(name, "=", "H") -def UNInt32(name): - """unsigned, native endianity 32-bit integer""" - return FormatField(name, "=", "L") -def UNInt64(name): - """unsigned, native endianity 64-bit integer""" - return FormatField(name, "=", "Q") - -def SNInt8(name): - """signed, native endianity 8-bit integer""" - return FormatField(name, "=", "b") -def SNInt16(name): - """signed, native endianity 16-bit integer""" - return FormatField(name, "=", "h") -def SNInt32(name): - """signed, native endianity 32-bit integer""" - return FormatField(name, "=", "l") -def SNInt64(name): - """signed, native endianity 64-bit integer""" - return FormatField(name, "=", "q") - -def BFloat32(name): - """big endian, 32-bit IEEE floating point number""" - return FormatField(name, ">", "f") -def LFloat32(name): - """little endian, 32-bit IEEE floating point number""" - return FormatField(name, "<", "f") -def NFloat32(name): - """native endianity, 32-bit IEEE floating point number""" - return FormatField(name, "=", "f") - -def BFloat64(name): - """big endian, 64-bit IEEE floating point number""" - return FormatField(name, ">", "d") -def LFloat64(name): - """little endian, 64-bit IEEE floating point number""" - return FormatField(name, "<", "d") -def NFloat64(name): - """native endianity, 64-bit IEEE floating point number""" - return FormatField(name, "=", "d") - - -#=============================================================================== -# arrays -#=============================================================================== -def Array(count, subcon): - """ - Repeats the given unit a fixed number of times. - - :param int count: number of times to repeat - :param ``Construct`` subcon: construct to repeat - - >>> c = Array(4, UBInt8("foo")) - >>> c.parse("\\x01\\x02\\x03\\x04") - [1, 2, 3, 4] - >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06") - [1, 2, 3, 4] - >>> c.build([5,6,7,8]) - '\\x05\\x06\\x07\\x08' - >>> c.build([5,6,7,8,9]) - Traceback (most recent call last): - ... - construct.core.RangeError: expected 4..4, found 5 - """ - - if callable(count): - con = MetaArray(count, subcon) - else: - con = MetaArray(lambda ctx: count, subcon) - con._clear_flag(con.FLAG_DYNAMIC) - return con - -def PrefixedArray(subcon, length_field = UBInt8("length")): - """an array prefixed by a length field. - * subcon - the subcon to be repeated - * length_field - a construct returning an integer - """ - return LengthValueAdapter( - Sequence(subcon.name, - length_field, - Array(lambda ctx: ctx[length_field.name], subcon), - nested = False - ) - ) - -def OpenRange(mincount, subcon): - from sys import maxint - return Range(mincount, maxint, subcon) - -def GreedyRange(subcon): - """ - Repeats the given unit one or more times. - - :param ``Construct`` subcon: construct to repeat - - >>> from construct import GreedyRange, UBInt8 - >>> c = GreedyRange(UBInt8("foo")) - >>> c.parse("\\x01") - [1] - >>> c.parse("\\x01\\x02\\x03") - [1, 2, 3] - >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06") - [1, 2, 3, 4, 5, 6] - >>> c.parse("") - Traceback (most recent call last): - ... - construct.core.RangeError: expected 1..2147483647, found 0 - >>> c.build([1,2]) - '\\x01\\x02' - >>> c.build([]) - Traceback (most recent call last): - ... - construct.core.RangeError: expected 1..2147483647, found 0 - """ - - return OpenRange(1, subcon) - -def OptionalGreedyRange(subcon): - """ - Repeats the given unit zero or more times. This repeater can't - fail, as it accepts lists of any length. - - :param ``Construct`` subcon: construct to repeat - - >>> from construct import OptionalGreedyRange, UBInt8 - >>> c = OptionalGreedyRange(UBInt8("foo")) - >>> c.parse("") - [] - >>> c.parse("\\x01\\x02") - [1, 2] - >>> c.build([]) - '' - >>> c.build([1,2]) - '\\x01\\x02' - """ - - return OpenRange(0, subcon) - - -#=============================================================================== -# subconstructs -#=============================================================================== -def Optional(subcon): - """an optional construct. if parsing fails, returns None. - * subcon - the subcon to optionally parse or build - """ - return Select(subcon.name, subcon, Pass) - -def Bitwise(subcon): - """converts the stream to bits, and passes the bitstream to subcon - * subcon - a bitwise construct (usually BitField) - """ - # subcons larger than MAX_BUFFER will be wrapped by Restream instead - # of Buffered. implementation details, don't stick your nose in :) - MAX_BUFFER = 1024 * 8 - def resizer(length): - if length & 7: - raise SizeofError("size must be a multiple of 8", length) - return length >> 3 - if not subcon._is_flag(subcon.FLAG_DYNAMIC) and subcon.sizeof() < MAX_BUFFER: - con = Buffered(subcon, - encoder = decode_bin, - decoder = encode_bin, - resizer = resizer - ) - else: - con = Restream(subcon, - stream_reader = BitStreamReader, - stream_writer = BitStreamWriter, - resizer = resizer) - return con - -def Aligned(subcon, modulus = 4, pattern = "\x00"): - r"""aligns subcon to modulus boundary using padding pattern - * subcon - the subcon to align - * modulus - the modulus boundary (default is 4) - * pattern - the padding pattern (default is \x00) - """ - if modulus < 2: - raise ValueError("modulus must be >= 2", modulus) - def padlength(ctx): - return (modulus - (subcon._sizeof(ctx) % modulus)) % modulus - return SeqOfOne(subcon.name, - subcon, - # ?????? - # ?????? - # ?????? - # ?????? - Padding(padlength, pattern = pattern), - nested = False, - ) - -def SeqOfOne(name, *args, **kw): - """a sequence of one element. only the first element is meaningful, the - rest are discarded - * name - the name of the sequence - * args - subconstructs - * kw - any keyword arguments to Sequence - """ - return IndexingAdapter(Sequence(name, *args, **kw), index = 0) - -def Embedded(subcon): - """embeds a struct into the enclosing struct. - * subcon - the struct to embed - """ - return Reconfig(subcon.name, subcon, subcon.FLAG_EMBED) - -def Rename(newname, subcon): - """renames an existing construct - * newname - the new name - * subcon - the subcon to rename - """ - return Reconfig(newname, subcon) - -def Alias(newname, oldname): - """creates an alias for an existing element in a struct - * newname - the new name - * oldname - the name of an existing element - """ - return Value(newname, lambda ctx: ctx[oldname]) - - -#=============================================================================== -# mapping -#=============================================================================== -def SymmetricMapping(subcon, mapping, default = NotImplemented): - """defines a symmetrical mapping: a->b, b->a. - * subcon - the subcon to map - * mapping - the encoding mapping (a dict); the decoding mapping is - achieved by reversing this mapping - * default - the default value to use when no mapping is found. if no - default value is given, and exception is raised. setting to Pass would - return the value "as is" (unmapped) - """ - reversed_mapping = dict((v, k) for k, v in mapping.iteritems()) - return MappingAdapter(subcon, - encoding = mapping, - decoding = reversed_mapping, - encdefault = default, - decdefault = default, - ) - -def Enum(subcon, **kw): - """a set of named values mapping. - * subcon - the subcon to map - * kw - keyword arguments which serve as the encoding mapping - * _default_ - an optional, keyword-only argument that specifies the - default value to use when the mapping is undefined. if not given, - and exception is raised when the mapping is undefined. use `Pass` to - pass the unmapped value as-is - """ - return SymmetricMapping(subcon, kw, kw.pop("_default_", NotImplemented)) - -def FlagsEnum(subcon, **kw): - """a set of flag values mapping. - * subcon - the subcon to map - * kw - keyword arguments which serve as the encoding mapping - """ - return FlagsAdapter(subcon, kw) - - -#=============================================================================== -# structs -#=============================================================================== -def AlignedStruct(name, *subcons, **kw): - """a struct of aligned fields - * name - the name of the struct - * subcons - the subcons that make up this structure - * kw - keyword arguments to pass to Aligned: 'modulus' and 'pattern' - """ - return Struct(name, *(Aligned(sc, **kw) for sc in subcons)) - -def BitStruct(name, *subcons): - """a struct of bitwise fields - * name - the name of the struct - * subcons - the subcons that make up this structure - """ - return Bitwise(Struct(name, *subcons)) - -def EmbeddedBitStruct(*subcons): - """an embedded BitStruct. no name is necessary. - * subcons - the subcons that make up this structure - """ - return Bitwise(Embedded(Struct(None, *subcons))) - -#=============================================================================== -# strings -#=============================================================================== -def String(name, length, encoding=None, padchar=None, paddir="right", - trimdir="right"): - """ - A configurable, fixed-length string field. - - The padding character must be specified for padding and trimming to work. - - :param str name: name - :param int length: length, in bytes - :param str encoding: encoding (e.g. "utf8") or None for no encoding - :param str padchar: optional character to pad out strings - :param str paddir: direction to pad out strings; one of "right", "left", - or "both" - :param str trim: direction to trim strings; one of "right", "left" - - >>> from construct import String - >>> String("foo", 5).parse("hello") - 'hello' - >>> - >>> String("foo", 12, encoding = "utf8").parse("hello joh\\xd4\\x83n") - u'hello joh\\u0503n' - >>> - >>> foo = String("foo", 10, padchar = "X", paddir = "right") - >>> foo.parse("helloXXXXX") - 'hello' - >>> foo.build("hello") - 'helloXXXXX' - """ - - con = StringAdapter(Field(name, length), encoding=encoding) - if padchar is not None: - con = PaddedStringAdapter(con, padchar=padchar, paddir=paddir, - trimdir=trimdir) - return con - -def PascalString(name, length_field=UBInt8("length"), encoding=None): - """ - A length-prefixed string. - - ``PascalString`` is named after the string types of Pascal, which are - length-prefixed. Lisp strings also follow this convention. - - The length field will appear in the same ``Container`` as the - ``PascalString``, with the given name. - - :param str name: name - :param ``Construct`` length_field: a field which will store the length of - the string - :param str encoding: encoding (e.g. "utf8") or None for no encoding - - >>> foo = PascalString("foo") - >>> foo.parse("\\x05hello") - 'hello' - >>> foo.build("hello world") - '\\x0bhello world' - >>> - >>> foo = PascalString("foo", length_field = UBInt16("length")) - >>> foo.parse("\\x00\\x05hello") - 'hello' - >>> foo.build("hello") - '\\x00\\x05hello' - """ - - return StringAdapter( - LengthValueAdapter( - Sequence(name, - length_field, - Field("data", lambda ctx: ctx[length_field.name]), - ) - ), - encoding=encoding, - ) - -def CString(name, terminators="\x00", encoding=None, - char_field=Field(None, 1)): - """ - A string ending in a terminator. - - ``CString`` is similar to the strings of C, C++, and other related - programming languages. - - By default, the terminator is the NULL byte (``0x00``). - - :param str name: name - :param iterable terminators: sequence of valid terminators, in order of - preference - :param str encoding: encoding (e.g. "utf8") or None for no encoding - :param ``Construct`` char_field: construct representing a single character - - >>> foo = CString("foo") - >>> foo.parse("hello\\x00") - 'hello' - >>> foo.build("hello") - 'hello\\x00' - >>> foo = CString("foo", terminators = "XYZ") - >>> foo.parse("helloX") - 'hello' - >>> foo.parse("helloY") - 'hello' - >>> foo.parse("helloZ") - 'hello' - >>> foo.build("hello") - 'helloX' - """ - - return Rename(name, - CStringAdapter( - RepeatUntil(lambda obj, ctx: obj in terminators, - char_field, - ), - terminators=terminators, - encoding=encoding, - ) - ) - - -#=============================================================================== -# conditional -#=============================================================================== -def IfThenElse(name, predicate, then_subcon, else_subcon): - """an if-then-else conditional construct: if the predicate indicates True, - `then_subcon` will be used; otherwise `else_subcon` - * name - the name of the construct - * predicate - a function taking the context as an argument and returning - True or False - * then_subcon - the subcon that will be used if the predicate returns True - * else_subcon - the subcon that will be used if the predicate returns False - """ - return Switch(name, lambda ctx: bool(predicate(ctx)), - { - True : then_subcon, - False : else_subcon, - } - ) - -def If(predicate, subcon, elsevalue = None): - """an if-then conditional construct: if the predicate indicates True, - subcon will be used; otherwise, `elsevalue` will be returned instead. - * predicate - a function taking the context as an argument and returning - True or False - * subcon - the subcon that will be used if the predicate returns True - * elsevalue - the value that will be used should the predicate return False. - by default this value is None. - """ - return IfThenElse(subcon.name, - predicate, - subcon, - Value("elsevalue", lambda ctx: elsevalue) - ) - - -#=============================================================================== -# misc -#=============================================================================== -def OnDemandPointer(offsetfunc, subcon, force_build = True): - """an on-demand pointer. - * offsetfunc - a function taking the context as an argument and returning - the absolute stream position - * subcon - the subcon that will be parsed from the `offsetfunc()` stream - position on demand - * force_build - see OnDemand. by default True. - """ - return OnDemand(Pointer(offsetfunc, subcon), - advance_stream = False, - force_build = force_build - ) - -def Magic(data): - return ConstAdapter(Field(None, len(data)), data) diff --git a/construct/protocols/__init__.py b/construct/protocols/__init__.py deleted file mode 100644 index 0ec215ec9..000000000 --- a/construct/protocols/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -""" -protocols - a collection of network protocols -unlike the formats package, protocols convey information between two sides -""" diff --git a/construct/protocols/application/__init__.py b/construct/protocols/application/__init__.py deleted file mode 100644 index 7ea61f703..000000000 --- a/construct/protocols/application/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -""" -application layer (various) protocols -""" - diff --git a/construct/protocols/application/dns.py b/construct/protocols/application/dns.py deleted file mode 100644 index e98ac098b..000000000 --- a/construct/protocols/application/dns.py +++ /dev/null @@ -1,147 +0,0 @@ -""" -Domain Name System (TCP/IP protocol stack) -""" -from construct import * -from construct.protocols.layer3.ipv4 import IpAddressAdapter - - -class DnsStringAdapter(Adapter): - def _encode(self, obj, context): - parts = obj.split(".") - parts.append("") - return parts - def _decode(self, obj, context): - return ".".join(obj[:-1]) - -dns_record_class = Enum(UBInt16("class"), - RESERVED = 0, - INTERNET = 1, - CHAOS = 3, - HESIOD = 4, - NONE = 254, - ANY = 255, -) - -dns_record_type = Enum(UBInt16("type"), - IPv4 = 1, - AUTHORITIVE_NAME_SERVER = 2, - CANONICAL_NAME = 5, - NULL = 10, - MAIL_EXCHANGE = 15, - TEXT = 16, - X25 = 19, - ISDN = 20, - IPv6 = 28, - UNSPECIFIED = 103, - ALL = 255, -) - -query_record = Struct("query_record", - DnsStringAdapter( - RepeatUntil(lambda obj, ctx: obj == "", - PascalString("name") - ) - ), - dns_record_type, - dns_record_class, -) - -rdata = Field("rdata", lambda ctx: ctx.rdata_length) - -resource_record = Struct("resource_record", - CString("name", terminators = "\xc0\x00"), - Padding(1), - dns_record_type, - dns_record_class, - UBInt32("ttl"), - UBInt16("rdata_length"), - IfThenElse("data", lambda ctx: ctx.type == "IPv4", - IpAddressAdapter(rdata), - rdata - ) -) - -dns = Struct("dns", - UBInt16("id"), - BitStruct("flags", - Enum(Bit("type"), - QUERY = 0, - RESPONSE = 1, - ), - Enum(Nibble("opcode"), - STANDARD_QUERY = 0, - INVERSE_QUERY = 1, - SERVER_STATUS_REQUEST = 2, - NOTIFY = 4, - UPDATE = 5, - ), - Flag("authoritive_answer"), - Flag("truncation"), - Flag("recurssion_desired"), - Flag("recursion_available"), - Padding(1), - Flag("authenticated_data"), - Flag("checking_disabled"), - Enum(Nibble("response_code"), - SUCCESS = 0, - FORMAT_ERROR = 1, - SERVER_FAILURE = 2, - NAME_DOES_NOT_EXIST = 3, - NOT_IMPLEMENTED = 4, - REFUSED = 5, - NAME_SHOULD_NOT_EXIST = 6, - RR_SHOULD_NOT_EXIST = 7, - RR_SHOULD_EXIST = 8, - NOT_AUTHORITIVE = 9, - NOT_ZONE = 10, - ), - ), - UBInt16("question_count"), - UBInt16("answer_count"), - UBInt16("authority_count"), - UBInt16("additional_count"), - Array(lambda ctx: ctx.question_count, - Rename("questions", query_record), - ), - Rename("answers", - Array(lambda ctx: ctx.answer_count, resource_record) - ), - Rename("authorities", - Array(lambda ctx: ctx.authority_count, resource_record) - ), - Array(lambda ctx: ctx.additional_count, - Rename("additionals", resource_record), - ), -) - - -if __name__ == "__main__": - cap1 = ( - "2624010000010000000000000377777706676f6f676c6503636f6d0000010001" - ).decode("hex") - - cap2 = ( - "2624818000010005000600060377777706676f6f676c6503636f6d0000010001c00c00" - "05000100089065000803777777016cc010c02c0001000100000004000440e9b768c02c" - "0001000100000004000440e9b793c02c0001000100000004000440e9b763c02c000100" - "0100000004000440e9b767c030000200010000a88600040163c030c030000200010000" - "a88600040164c030c030000200010000a88600040165c030c030000200010000a88600" - "040167c030c030000200010000a88600040161c030c030000200010000a88600040162" - "c030c0c00001000100011d0c0004d8ef3509c0d0000100010000ca7c000440e9b309c0" - "80000100010000c4c5000440e9a109c0900001000100004391000440e9b709c0a00001" - "00010000ca7c000442660b09c0b00001000100000266000440e9a709" - ).decode("hex") - - obj = dns.parse(cap1) - print obj - print repr(dns.build(obj)) - - print "-" * 80 - - obj = dns.parse(cap2) - print obj - print repr(dns.build(obj)) - - - - diff --git a/construct/protocols/application/http.py b/construct/protocols/application/http.py deleted file mode 100644 index 8442cc4d8..000000000 --- a/construct/protocols/application/http.py +++ /dev/null @@ -1,154 +0,0 @@ -""" -Hyper Text Transfer Protocol (TCP/IP protocol stack) - -Construct is not meant for text manipulation, and is probably not the right -tool for the job, but I wanted to demonstrate how this could be done using -the provided `text` module. -""" -from construct import * -from construct.text import * - - -class HttpParamDictAdapter(Adapter): - """turns the sequence of params into a dict""" - def _encode(self, obj, context): - return [Container(name = k, value = v) for k, v in obj.iteritems()] - def _decode(self, obj, context): - return dict((o.name, o.value) for o in obj) - - -lineterm = Literal("\r\n") -space = Whitespace() - -# http parameter: 'name: value\r\n' -http_param = Struct("params", - StringUpto("name", ":\r\n"), - Literal(":"), - space, - StringUpto("value", "\r"), - lineterm, -) - -http_params = HttpParamDictAdapter( - OptionalGreedyRange(http_param) -) - -# request: command and params -http_request = Struct("request", - StringUpto("command", " "), - space, - StringUpto("url", " "), - space, - Literal("HTTP/"), - StringUpto("version", "\r"), - lineterm, - http_params, - lineterm, -) - -# reply: header (answer and params) and data -http_reply = Struct("reply", - Literal("HTTP/"), - StringUpto("version", " "), - space, - DecNumber("code"), - space, - StringUpto("text", "\r"), - lineterm, - http_params, - lineterm, - HexDumpAdapter( - Field("data", lambda ctx: int(ctx["params"]["Content-length"])) - ), -) - -# session: request followed reply -http_session = Struct("session", - http_request, - http_reply, -) - - -if __name__ == "__main__": - cap1 = ( - "474554202f636e6e2f2e656c656d656e742f696d672f312e352f6365696c696e672f6e" - "61765f706970656c696e655f646b626c75652e67696620485454502f312e310d0a486f" - "73743a20692e636e6e2e6e65740d0a557365722d4167656e743a204d6f7a696c6c612f" - "352e30202857696e646f77733b20553b2057696e646f7773204e5420352e313b20656e" - "2d55533b2072763a312e382e3129204765636b6f2f3230303631303130204669726566" - "6f782f322e300d0a4163636570743a20696d6167652f706e672c2a2f2a3b713d302e35" - "0d0a4163636570742d4c616e67756167653a20656e2d75732c656e3b713d302e350d0a" - "4163636570742d456e636f64696e673a20677a69702c6465666c6174650d0a41636365" - "70742d436861727365743a2049534f2d383835392d312c7574662d383b713d302e372c" - "2a3b713d302e370d0a4b6565702d416c6976653a203330300d0a436f6e6e656374696f" - "6e3a206b6565702d616c6976650d0a526566657265723a20687474703a2f2f7777772e" - "636e6e2e636f6d2f0d0a0d0a485454502f312e3120323030204f4b0d0a446174653a20" - "53756e2c2031302044656320323030362031373a34383a303120474d540d0a53657276" - "65723a204170616368650d0a436f6e74656e742d747970653a20696d6167652f676966" - "0d0a457461673a202266313232383761352d63642d3562312d30220d0a4c6173742d6d" - "6f6469666965643a204d6f6e2c2032372046656220323030362032323a33393a303920" - "474d540d0a436f6e74656e742d6c656e6774683a20313435370d0a4163636570742d72" - "616e6765733a2062797465730d0a4b6565702d416c6976653a2074696d656f75743d35" - "2c206d61783d313032340d0a436f6e6e656374696f6e3a204b6565702d416c6976650d" - "0a0d0a47494638396148001600f7000037618d436a94ebf0f4cad5e1bccad93a638fd2" - "dce639628e52769c97adc44c7299426a93dce3eb6182a5dee5ec5d7fa338628d466d95" - "88a1bb3c65907b97b4d43f3ba7bacdd9e1eaa6b8cce6ebf1dc5a59cc1313718faed8e0" - "e99fb3c8ced9e350759b6989aa6787a85e80a391a8c0ffffffbbc9d8b1c2d3e0e7eed1" - "dae5c2cfdcd2dbe57c98b4e7ecf23b648f587ba098aec4859eb9e4e9ef3e67918aa3bc" - "aebfd17793b1cfd9e4abbdcfbfcddbb3c3d44b71995a7da13f6791a5b8cccbd6e17491" - "b051759cd535327390afc7d2dfb8c7d7b0c0d24e739a7693b19bb0c64f749ac3cfdd49" - "6f97afc0d14f749b3d66916e8cacb167758ba3bdd84b4c476e96c8d4e0d84340406892" - "597ca0d53331adbed0a3b7cb52779d6f8ead9eb2c87a96b3a6b9cc567a9f94aac294ab" - "c24b70985a7ca1b5c5d5b9c8d7aabccfd94849819bb7acbdd0c5d1dedb5253486f9744" - "6c95da4943ae3832b7464fc40e0e3d659096acc3546d93c63c42796b88dce4eb815b74" - "d02d1e9db2c7dc4a4a89a1bbc2393cd8413e9aafc5d01d1eb7c6d6da4142d43837c542" - "48d3dce6687897d3322a829cb8d93438b2c2d3cd2120c4d1dd95abc3d6dfe8ca0e0cd8" - "4c45e1e7eeb6c5d5cdd7e2d93c3c6c8bab5f5a73b14c56c6282b5b6386cd2826cf2829" - "d5dee73e638c9f788acf3626686683436790d02724d32f2f7f728cde6261dd6864df6d" - "6bc0353ecc3537dd545499617387637a864a5e8e697fd437388ca5be90a7c085687e8f" - "a6bfd31d1e48648ce26665476d96d93137cd100fcb4944587195c02e34cd1619d94342" - "7d7a95da4141da4343d63930d73c3399677bc3d0ddd22a2ad01f22d42f2d6d7d9dd124" - "1de14b516384a6c64c52a64b58ab49514969915b7ea2c3636a734a5daa5255d9454468" - "87a9bb3439be3b39dc353ecf26245e7396bc444c585d806081a46283a6dd615dd74a46" - "dd675dd74138c90909dbe2ea6d8cac834d6489a2bcb15a65c34851b8636d54789e5679" - "9ec26e78ae5762c20000d0dae4955c68dde4ecc0676fe0e6ed87a0bb4a7098446b948c" - "a4bd8f6980aa39317d98b5c50b0d21f90400000000002c00000000480016000008ff00" - "01081c48b0a0c18308132a5c583000c38710234a04e070a2c58b122b62dcc8d1a0c68e" - "20377ec4c802038290080f24b08070e4453627d0b8406950828f160f0eba9c38228311" - "09340df2f0704f8c4e83b4b2d98a82e79fb703b77c455a06204e33816226e1100140e5" - "191f024d267c43a18005270a17241830e8e8c051fcb88d2b044f8e3860b0be914aa5ea" - "53bf6d02cd40da5206800d01fe189d2b500744c217022204729c10028d220edc0a74b5" - "2a0dbb6a98a8c1d160281d2f0dd7e8595b24f086010c5c007c3921d0c11726002e0df8" - "9153c18f79057a5ce8d10000901a066c00b8b2a40365292704680610cd8a103b02ed15" - "db706a8ea45d539471ff222450460a3e0a00207104a08100272978e4d9c7020500062c" - "b0a5d84124170a2e9e9c018e00fa7c90c4112d3c01803a5a48e71141d058b78940ed94" - "f30b20b1109385206d6c204c792b78915e17678cd14208000c80c0000a3651830561c4" - "20401766bcb1441004a447003e044c13c28c00f8b186830d1164ca1d6968f28a1e7f54" - "10c53a1590f38c31c8e062496b068011847a2a0ce442154a54e20e0060e8e001191444" - "e0c6070ba8a0440e5c994001013b70501c00d01149d047740493cc14c3e8c24a16adf4" - "d2082a9d4893491b7d08a4c3058401a00803035de14018393803050a4c5ca0861bf920" - "20c01b176061c01000d4034415304c100e0010c88e5204a50f16248a368984b2073388" - "00008a3cf100d08d39a5084442065bb597c4401390108109631c820e0058acc0001a33" - "c0b0c02364ccf20e005e1c01c10a17b001c00c6b5132dd450f64d0040d0909000e470f" - "78e0402deb5ef4c1315a1470d0016a2cc09104438e70101520bd00c4044119844d0c08" - "71d0f0c40c7549f1c506895102c61c53d1051125941010003b").decode("hex") - x = http_session.parse(cap1) - print x - #print x.request.url - #print x.request.params["Referer"] - #print x.reply.params["Server"] - #print "-" * 80 - #print x - - - - - - - - - - - - - diff --git a/construct/protocols/application/telnet.py b/construct/protocols/application/telnet.py deleted file mode 100644 index d4a48b2ad..000000000 --- a/construct/protocols/application/telnet.py +++ /dev/null @@ -1,305 +0,0 @@ -""" -Telnet (TCP/IP protocol stack) - -http://support.microsoft.com/kb/231866 -""" -from construct import * -from construct.text import * - - -command_code = Enum(Byte("code"), - SE = 240, # suboption end - NOP = 241, # no-op - Data_Mark = 242, # - Break = 243, # - Suspend = 244, # - Abort_output = 245, # - Are_You_There = 246, # - Erase_Char = 247, # - Erase_Line = 248, # - Go_Ahead = 249, # other side can transmit now - SB = 250, # suboption begin - WILL = 251, # send says it will do option - WONT = 252, # send says it will NOT do option - DO = 253, # sender asks other side to do option - DONT = 254, # sender asks other side NOT to do option - IAC = 255, # interpretr as command (escape char) -) - -option_code = Enum(Byte("option"), - TRANSMIT_BINARY = 0, - ECHO = 1, - RECONNECTION = 2, - SUPPRESS_GO_AHEAD = 3, - APPROX_MESSAGE_SIZE_NEGOTIATION = 4, - STATUS = 5, - TIMING_MARK = 6, - RCTE = 7, - OUTPUT_LINE_WIDTH = 8, - OUTPUT_PAGE_SIZE = 9, - NAOCRD = 10, - NAOHTS = 11, - NAOHTD = 12, - NAOFFD = 13, - NAOVTS = 14, - NAOVTD = 15, - NAOLFD = 16, - EXTENDED_ASCII = 17, - LOGOUT = 18, - BM = 19, - DATA_ENTRY_TERMINAL = 20, - SUPDUP = 21, - SUPDUP_OUTPUT = 22, - SEND_LOCATION = 23, - TERMINAL_TYPE = 24, - END_OF_RECORD = 25, - TUID = 26, - OUTMRK = 27, - TTYLOC = 28, - TELNET_3270_REGIME = 29, - X3_PAD = 30, - NAWS = 31, - TERMINAL_SPEED = 32, - REMOTE_FLOW_CONTROL = 33, - LINEMODE = 34, - X_DISPLAY_LOCATION = 35, - ENVIRONMENT_OPTION = 36, - AUTHENTICATION = 37, - ENCRYPTION_OPTION = 38, - NEW_ENVIRONMENT_OPTION = 39, - TN3270E = 40, - XAUTH = 41, - CHARSET = 42, - RSP = 43, - COM_PORT_CONTROL_OPTION = 44, - TELNET_SUPPRESS_LOCAL_ECHO = 45, - TELNET_START_TLS = 46, - _default_ = Pass, -) - -class LookaheadAdapter(Adapter): - def _encode(self, obj, context): - if obj == "\xff": - obj = "\xff\xff" - return obj - def _decode(self, obj, context): - first, second = obj - if first == "\xff": - if second == "\xff": - return "\xff" - else: - raise ValidationError("IAC") - else: - return second - -def TelnetData(name): - return StringAdapter( - GreedyRange( - LookaheadAdapter( - Sequence(name, - Char("data"), - Peek(Char("next")), - ) - ) - ) - ) - -telnet_suboption = Struct("suboption", - option_code, - TelnetData("parameters"), -) - -telnet_command = Struct("command", - Literal("\xff"), - command_code, - Switch("option", lambda ctx: ctx.code, - { - "WILL" : option_code, - "WONT" : option_code, - "DO" : option_code, - "DONT" : option_code, - "SB" : telnet_suboption, - }, - default = Pass, - ), -) - -telnet_unit = Select("telnet_unit", - HexDumpAdapter(TelnetData("data")), - telnet_command, -) - -telnet_session = Rename("telnet_session", GreedyRange(telnet_unit)) - - -if __name__ == "__main__": - # note: this capture contains both the client and server sides - # so you'll see echos and stuff all mingled. it's not Construct's - # fault, i was just too lazy to separate the two. - cap1 = ( - "fffd25fffb25fffa2501fff0fffa25000000fff0fffb26fffd18fffd20fffd23fffd27" - "fffd24fffe26fffb18fffb1ffffc20fffc23fffb27fffc24fffd1ffffa2701fff0fffa" - "1801fff0fffa1f009d0042fff0fffa2700fff0fffa1800414e5349fff0fffb03fffd01" - "fffd22fffb05fffd21fffd03fffb01fffc22fffe05fffc21fffe01fffb01fffd06fffd" - "00fffc010d0a7364662e6c6f6e65737461722e6f726720287474797239290d0a696620" - "6e65772c206c6f67696e20276e657727202e2e0d0a0d0a6c6f67696e3a20fffd01fffc" - "06fffb006e6e657765770d0a0d0a0d0a4c617374206c6f67696e3a2054687520446563" - "2032312032303a31333a353320323030362066726f6d2038372e36392e34312e323034" - "2e6361626c652e3031322e6e65742e696c206f6e2074747972760d0a0d0a596f752077" - "696c6c206e6f7720626520636f6e6e656374656420746f204e455755534552206d6b61" - "636374207365727665722e0d0a506c65617365206c6f67696e20617320276e65772720" - "7768656e2070726f6d707465642e0d0a0d0a5b52455455524e5d202d2054484953204d" - "41592054414b452041204d4f4d454e54202e2e201b5b481b5b4a547279696e67203139" - "322e39342e37332e32302e2e2e0d0a436f6e6e656374656420746f206f6c2e66726565" - "7368656c6c2e6f72672e0d0a4573636170652063686172616374657220697320276f66" - "66272e0d0a0d0a7364662e6c6f6e65737461722e6f726720287474797033290d0a6966" - "206e65772c206c6f67696e20276e657727202e2e0d0a0d0a6c6f67696e3a206e6e6577" - "65770d0a0d0a0d0a4c617374206c6f67696e3a20546875204465632032312032303a30" - "343a303120323030362066726f6d207364662e6c6f6e65737461722e6f7267206f6e20" - "74747970390d0a1b5b481b5b4a57656c636f6d6520746f207468652053444620507562" - "6c69632041636365737320554e49582053797374656d202d204573742e20313938370d" - "0a596f75206172652074686520333735746820677565737420746f6461792c206c6f67" - "67656420696e206f6e2032312d4465632d30362032303a31353a32332e0d0a0d0a4172" - "6520796f75207573696e672057696e646f777320324b206f722058503f2028592f4e29" - "200d0a202020202020202d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d" - "2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d0d0a2020202020207c20494d504f52" - "54414e54212020504c4541534520524541442054484953205645525920434152454655" - "4c4c59207c0d0a202020202020202d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d" - "2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d0d0a0d0a54686572652069" - "7320612062756720696e207468652057696e646f777328746d292032303030202f2058" - "502054454c4e455420636c69656e742077686963680d0a63617573657320697420746f" - "2073656e642061203c43523e3c4c463e2028646f75626c652072657475726e29206279" - "2064656661756c742e202049660d0a796f7520617265207573696e672057696e646f77" - "7328746d292054454c4e455420796f75204d55535420636f7272656374207468697320" - "5249474854204e4f570d0a696e206f7264657220746f20434f4e54494e55452e202050" - "6c656173652074616b652074686520666f6c6c6f77696e6720342073746570733a0d0a" - "0d0a2020312e202045534341504520746f207468652054454c4e45543e2070726f6d70" - "74206279207072657373696e67205b4354524c5d207769746820796f7572205d206b65" - "790d0a2020322e2020417420796f75722054454c4e45543e2070726f6d707420747970" - "653a202027756e7365742063726c66270d0a20202020202028446f206e6f7420747970" - "652027717569742720616674657220746869732073746570290d0a2020332e20205468" - "656e20707265737320796f7572205b454e5445525d206b657920545749434520746f20" - "72657475726e20746f205344460d0a2020342e20205479706520276d6b616363742720" - "746f2063726561746520796f7572206e657720534446206163636f756e740d0a0d0a41" - "6e20616c7465726e61746976652054454c4e455420636c69656e743a2020687474703a" - "2f2f736466312e6f72672f74656c6e65740d0a0d0a46455020436f6d6d616e643a206d" - "6d6b6b61616363636374740d0d0a1b5b481b5b4a0d0a504c4541534520524541442054" - "484953204341524546554c4c593a0d0a0d0a596f75206172652061626f757420746f20" - "637265617465206120554e4958207368656c6c206163636f756e742e20205468697320" - "6163636f756e74206d617920626520756e6c696b650d0a616e797468696e6720796f75" - "2776652075736564206265666f72652e20205765207572676520796f7520746f206361" - "726566756c6c79207265616420616c6c2074686520746578740d0a646973706c617965" - "64206f6e20796f7572207465726d696e616c2c2061732069742077696c6c2061696465" - "20796f7520696e20796f7572206c6561726e696e672e0d0a576520616c736f20656e63" - "6f757261676520796f7520746f2074727920616c6c2074686520636f6d6d616e647320" - "617661696c61626c65207769746820796f7572206e65770d0a6163636f756e742e2020" - "546865726520617265206d616e79207479706573206f662067616d65732c206170706c" - "69636174696f6e7320616e64207574696c69746965730d0a796f752077696c6c206265" - "2061626c6520746f20696e7374616e746c792072756e20696e206a7573742061206665" - "77206d6f6d656e74732e2020496620796f75206172650d0a6c6f6f6b696e6720666f72" - "206120706172746963756c617220636f6d6d616e64206f722076657273696f6e206f66" - "206120636f6d6d616e64207468617420776520646f206e6f740d0a686176652c207468" - "65726520617265207761797320746f2072657175657374207468617420697420626520" - "696e7374616c6c65642e2020576520616c736f206f666665720d0a4449414c55502061" - "636365737320696e207468652055534120616e642043616e6164612077686963682079" - "6f752077696c6c2062652061626c6520746f206c6561726e2061626f75740d0a73686f" - "72746c792e202042652070617469656e742c2072656164207768617420697320646973" - "706c61796564202d204578706c6f726520616e6420456e6a6f79210d0a0d0a5b524554" - "55524e5d0d0d0a0d0a46697273742c20796f75206e65656420746f2063686f6f736520" - "61204c4f47494e2e202041204c4f47494e20616c6c6f777320796f7520746f204c4f47" - "20494e0d0a746f207468652073797374656d2e2020596f7572204c4f47494e2063616e" - "206265203120746f2038206368617261637465727320696e206c656e67746820616e64" - "0d0a63616e20626520636f6d706f736564206f6620616c706861206e756d6572696320" - "636861726163746572732e0d0a0d0a5768617420776f756c6420796f75206c696b6520" - "746f2075736520666f7220796f7572206c6f67696e3f20737365626562756c756c6261" - "62610d0d0a0d0a436f6e67726174756c6174696f6e732c20796f75277665207069636b" - "6564206120434c45414e20757365722069642e20205768617420646f65732074686973" - "206d65616e3f0d0a576520706572666f726d206461696c7920617564697473206f6e20" - "6f7572206d61696c73657276657220776869636820616c6c6f777320757320746f2063" - "6865636b206f6e20617474656d7074730d0a6f6620656d61696c2064656c6976657279" - "20666f72206e6f6e2d6578697374656e74206c6f67696e732c206c696b652027736562" - "756c6261272e202049662027736562756c626127207761730d0a746172676574746564" - "20666f7220656d61696c2c20697420776f756c64206c696b656c792068617665206265" - "656e20554345206f72207370616d2e2020486f77657665722c2074686572650d0a6861" - "7665206265656e204e4f20617474656d70747320746f20656d61696c2027736562756c" - "6261407364662e6c6f6e65737461722e6f72672720696e207468652070617374203234" - "3020646179732c0d0a7768696368206d65616e732069742069732061205350414d2046" - "524545206c6f67696e2e2020506c656173652070726f7465637420697420616e642065" - "6e6a6f79210d0a0d0a636f6e74696e75653f20287965732f6e6f29207965730d796573" - "0d0a1b5b481b5b4a1b5b3f31681b3d1b5b36363b31481b5b4b0d0a0d0a2a2a6c696d69" - "746174696f6e7320616e6420706f6c6963792a2a0d0a20205f5f5f5f5f5f5f5f5f5f5f" - "5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f" - "5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f5f0d0a0d0a546865" - "20534446205075626c69632041636365737320554e49582053797374656d2c20612035" - "303128632937206e6f6e2d70726f66697420636f72706f726174696f6e2c0d0a726573" - "65727665732074686520726967687420746f2064656e792061636365737320746f2061" - "6e796f6e65207265676172646c6573732069662074686520757365720d0a686173206d" - "616465206120646f6e6174696f6e206f722070616964206d656d626572736869702064" - "7565732e2020496620612075736572277320616374697669746965730d0a6172652069" - "6e74657266657272696e67207769746820616e6f746865722075736572206f72207573" - "65727320286f6e20746869732073797374656d206f72206f6e0d0a616e6f7468657229" - "20746865207573657220696e207175657374696f6e2077696c6c206861766520746865" - "6972206163636f756e7420616363657373206c696d697465640d0a6f7220706f737369" - "626c792072656d6f7665642e20205370616d6d696e67206f6620616e7920736f727420" - "6973206e6f74207065726d697474656420616e6420776f756c640d0a726573756c7420" - "696e206163636f756e742072656d6f76616c2e2020496c6c6567616c20616374697669" - "746965732074686174206163746976656c7920696e766f6c7665200d0a534446202869" - "64206573742c207573696e672053444620746f2072756e20637261636b206f7220666f" - "72206775657373696e672070617373776f72647320616e642f6f720d0a74726164696e" - "6720636f70797269676874656420776f726b292077696c6c206d6f7374206c696b656c" - "7920726573756c7420696e206163636f756e742072656d6f76616c2e0d0a0d0a546865" - "20534446205075626c69632041636365737320554e49582053797374656d206d616b65" - "73206e6f2067756172616e7465657320696e207468652072656c696162696c6974790d" - "0a6f7220707265736572766174696f6e206f66206163636f756e742061636365737369" - "62696c6974792c20656d61696c2073656e74206f722072656365697665642c0d0a6669" - "6c65732075706c6f61646564206f722063726561746564206279206f6e6c696e652065" - "646974696e67206f7220636f6d70696c6174696f6e2e2020546861740d0a6265696e67" - "20736169642c2064617461206c6f73732073686f756c64206f6e6c79206f6363757220" - "647572696e67206120636174617374726f706869632068617264776172650d0a666169" - "6c75726520696e20776869636820637269746963616c2066696c657320776f756c6420" - "626520726573746f7265642066726f6d20746170652061726368697665732e200d0a0d" - "0a4d656d62657273206f662074686520534446205075626c6963204163636573732055" - "4e49582053797374656d2061726520657870656374656420746f20636f6e647563740d" - "0a7468656d73656c76657320696e20616e20617070726f70726961746520616e642072" - "6561736f6e61626c65206d616e6e6572207768656e207573696e67206f757220666163" - "696c69746965732e0d0a0d0a4c69666574696d652041525041206d656d626572736869" - "70206973206261736564206f6e20746865206c69666574696d65206f66205344462c20" - "6e6f74206f66207468650d0a7573657220616e64206973206e6f6e2d7472616e736665" - "7261626c652e20205344462068617320657869737465642073696e6365203139383720" - "616e6420776974680d0a796f757220737570706f7274206974206a757374206d696768" - "74206f7574206c69766520796f752e203b2d290d0a2020200d0a416e7920696c6c6567" - "616c206163746976697469657320776869636820696e636c756465732c206275742063" - "65727461696e6c792069736e2774206c696d6974656420746f0d0a7370616d6d696e67" - "2c20706f7274666c6f6f64696e672c20706f72747363616e6e696e672c206972632062" - "6f7473206f7220756e617474656e6465642070726f6365737365730d0a696e74656e64" - "6564206173206120626f742c20656e6372797074696f6e20637261636b696e672c2075" - "6e617574686f726973656420636f6e6e656374696f6e7320746f0d0a72656d6f746520" - "686f73747320616e6420616e7920736f7274206f66207363616d2063616e207265616c" - "6c79206e6f7420626520746f6c65726174656420686572652e0d0a5768793f20426563" - "6175736520746865726520617265206d616e792068657265206f6e2074686973207379" - "7374656d20746861742063616e207375666665722066726f6d0d0a7468697320736f72" - "74206f662061627573652e2020496620796f752077616e7420746f2075736520534446" - "2c20796f75207265616c6c79206861766520746f20636172650d0a61626f7574207468" - "69732073797374656d20616e64207468652070656f706c6520686572652e2020496620" - "796f7520646f6e27742077616e7420746f20636172652c207468656e0d0a796f752072" - "65616c6c792073686f756c646e2774207573652074686973207265736f757263652e0d" - "0a1b5b36363b31481b5b4b1b5b3f316c1b3e0d0a49206167726565207769746820796f" - "757220706f6c69637920616e642061636365707420697420287965732f6e6f293a2079" - "79657365730d0d0a0d0a4279206167726565696e6720616e6420616363657074696e67" - "206f757220706f6c69637920776520747275737420796f7520746f0d0a666f6c6c6f77" - "2069742e20205468616e6b20796f7520616e6420626520726573706f6e7369626c6521" - "0d0a0d0a5b52455455524e5d0d0d0a1b5b481b5b4a534556454e20564552592053494d" - "504c45205155455354494f4e533a0d0a0d0a506c656173652070726f76696465207468" - "6520666f6c6c6f77696e6720696e666f726d6174696f6e2e2020596f757220686f6e65" - "737479206973207265717565737465640d0a617320697420697320637269746963616c" - "20696e206d61696e7461696e696e672074686520696e74656772697479206f66206f75" - "722073797374656d2e20204e65770d0a6163636f756e7473207769746820626f677573" - "20696e666f726d6174696f6e206d617920626520707572676564202a776974686f7574" - "2a207761726e696e672e0d0a0d0a4354524c2d552077696c6c20636c65617220696e70" - "7574202e2e0d0a0d0a596f75722046756c6c204e616d653a202020202066666f6f6f6f" - "626261617272085e48085e4808080808080808085e485e485e485e485e485e485e485e" - "48035e43035e4315082008082008082008082008082008082008082008082008082008" - "0820080820080820080820080820080820080820080820080820080820080820080820" - "08082008082008082008082008082008082008082008082008082008" - ).decode("hex") - print telnet_session.parse(cap1) diff --git a/construct/protocols/ipstack.py b/construct/protocols/ipstack.py deleted file mode 100644 index 1af9bbc0f..000000000 --- a/construct/protocols/ipstack.py +++ /dev/null @@ -1,135 +0,0 @@ -""" -TCP/IP Protocol Stack -Note: before parsing the application layer over a TCP stream, you must -first combine all the TCP frames into a stream. See utils.tcpip for -some solutions -""" -from construct import Struct, Rename, HexDumpAdapter, Field, Switch, Pass -from construct.protocols.layer2.ethernet import ethernet_header -from construct.protocols.layer3.ipv4 import ipv4_header -from construct.protocols.layer3.ipv6 import ipv6_header -from construct.protocols.layer4.tcp import tcp_header -from construct.protocols.layer4.udp import udp_header - - -layer4_tcp = Struct("layer4_tcp", - Rename("header", tcp_header), - HexDumpAdapter( - Field("next", lambda ctx: - ctx["_"]["header"].payload_length - ctx["header"].header_length - ) - ), -) - -layer4_udp = Struct("layer4_udp", - Rename("header", udp_header), - HexDumpAdapter( - Field("next", lambda ctx: ctx["header"].payload_length) - ), -) - -layer3_payload = Switch("next", lambda ctx: ctx["header"].protocol, - { - "TCP" : layer4_tcp, - "UDP" : layer4_udp, - }, - default = Pass -) - -layer3_ipv4 = Struct("layer3_ipv4", - Rename("header", ipv4_header), - layer3_payload, -) - -layer3_ipv6 = Struct("layer3_ipv6", - Rename("header", ipv6_header), - layer3_payload, -) - -layer2_ethernet = Struct("layer2_ethernet", - Rename("header", ethernet_header), - Switch("next", lambda ctx: ctx["header"].type, - { - "IPv4" : layer3_ipv4, - "IPv6" : layer3_ipv6, - }, - default = Pass, - ) -) - -ip_stack = Rename("ip_stack", layer2_ethernet) - - -if __name__ == "__main__": - cap1 = ( - "0011508c283c001150886b570800450001e971474000800684e4c0a80202525eedda11" - "2a0050d98ec61d54fe977d501844705dcc0000474554202f20485454502f312e310d0a" - "486f73743a207777772e707974686f6e2e6f72670d0a557365722d4167656e743a204d" - "6f7a696c6c612f352e30202857696e646f77733b20553b2057696e646f7773204e5420" - "352e313b20656e2d55533b2072763a312e382e302e3129204765636b6f2f3230303630" - "3131312046697265666f782f312e352e302e310d0a4163636570743a20746578742f78" - "6d6c2c6170706c69636174696f6e2f786d6c2c6170706c69636174696f6e2f7868746d" - "6c2b786d6c2c746578742f68746d6c3b713d302e392c746578742f706c61696e3b713d" - "302e382c696d6167652f706e672c2a2f2a3b713d302e350d0a4163636570742d4c616e" - "67756167653a20656e2d75732c656e3b713d302e350d0a4163636570742d456e636f64" - "696e673a20677a69702c6465666c6174650d0a4163636570742d436861727365743a20" - "49534f2d383835392d312c7574662d383b713d302e372c2a3b713d302e370d0a4b6565" - "702d416c6976653a203330300d0a436f6e6e656374696f6e3a206b6565702d616c6976" - "650d0a507261676d613a206e6f2d63616368650d0a43616368652d436f6e74726f6c3a" - "206e6f2d63616368650d0a0d0a" - ).decode("hex") - - cap2 = ( - "0002e3426009001150f2c280080045900598fd22000036063291d149baeec0a8023c00" - "500cc33b8aa7dcc4e588065010ffffcecd0000485454502f312e3120323030204f4b0d" - "0a446174653a204672692c2031352044656320323030362032313a32363a323520474d" - "540d0a5033503a20706f6c6963797265663d22687474703a2f2f7033702e7961686f6f" - "2e636f6d2f7733632f7033702e786d6c222c2043503d2243414f2044535020434f5220" - "4355522041444d20444556205441492050534120505344204956416920495644692043" - "4f4e692054454c6f204f545069204f55522044454c692053414d69204f54526920554e" - "5269205055426920494e4420504859204f4e4c20554e49205055522046494e20434f4d" - "204e415620494e542044454d20434e542053544120504f4c204845412050524520474f" - "56220d0a43616368652d436f6e74726f6c3a20707269766174650d0a566172793a2055" - "7365722d4167656e740d0a5365742d436f6f6b69653a20443d5f796c683d58336f444d" - "54466b64476c6f5a7a567842463954417a49334d5459784e446b4563476c6b417a4578" - "4e6a59794d5463314e5463456447567a64414d7742485274634777446157356b5a5867" - "7462412d2d3b20706174683d2f3b20646f6d61696e3d2e7961686f6f2e636f6d0d0a43" - "6f6e6e656374696f6e3a20636c6f73650d0a5472616e736665722d456e636f64696e67" - "3a206368756e6b65640d0a436f6e74656e742d547970653a20746578742f68746d6c3b" - "20636861727365743d7574662d380d0a436f6e74656e742d456e636f64696e673a2067" - "7a69700d0a0d0a366263382020200d0a1f8b0800000000000003dcbd6977db38b200fa" - "f9fa9cf90f88326dd9b1169212b5d891739cd84ed2936d1277a7d3cbf1a1484a624c91" - "0c4979893bbfec7d7bbfec556121012eb29d65e6be7be7762c9240a1502854150a85c2" - "c37b87af9f9c7c7873449e9dbc7c41defcf2f8c5f327a4d1ee76dff79e74bb872787ec" - "43bfa3e9ddeed1ab06692cd234daed762f2e2e3a17bd4e18cfbb276fbb8b74e9f7bb49" - "1a7b76da7152a7b1bff110dfed3f5cb896030f4b37b508566dbb9f56def9a4f1240c52" - "3748db275791db20367b9a3452f732a5d0f688bdb0e2c44d27bf9c1cb7470830b1632f" - "4a490a3578c18fd6b9c5dec2f7732b2641783109dc0b7268a56e2bd527a931497b93b4" - "3f49cd493a98a4c3493a9aa4e349aa6bf01f7cd78d89d6b2ed49b3d9baf223f8b307b5" - "004a67eea627ded2dddadedb78d8656de428f856305f5973779223b0fff05ebbbde1db" - "67082a499289ae0f06863e1c8f4c0639eaccbdd9a3547abf798a1f0ec6c73fafd2e4f1" - "51ffd5f1c9e2f9e37ff74e74fbddd941b375eadb0942b3e3d5723a69f6060373a6cff4" - "9e6df586dac8b11c4d1f1afd81319b0df45e6fd4925a6cee6db4dbfb19e225bc1b12e5" - "6a098aed9309715c3b74dc5fde3e7f122ea3308061dac22f4018a4f8878367af5f4f2e" - "bcc001a2d187bfffbefeb2477f75026be9269165bb93d92ab0532f0cb68264fbda9b6d" - "dd0b92bfff867f3abe1bccd3c5f675eca6ab3820c1caf7f7be20e05363029f93c8f7d2" - "ad46a7b1bd475ff62614f2de2c8cb7f08537d93a35fed0fe9a4c1af44363fb91beabed" - "790f4f0d0e7a6f67c7dbbe3eedfd01e5bcbffe9a64bf289e00307bb1f7852371dadb13" - "3df0c3798efba9d93a1db44e87dbd7d8b4cf50e95c780e304be745389fbbf11ef4cddf" - "dcf4b162d629fa94d7defbe2fa892b3ece2c78d8fb221a84517003476a73dc3ad535d6" - "e22c7fbd0db8cf3a511ca6211d3e28933fed9d8ea54f381f66c0c7f2cb0e4c3898ad2b" - "3b0de3c9e918bf25abc88d6ddf02d65581418f94174addc9ebe94717e67ce557207b6d" - "45f892773ae393adc62af57c18ecd27b46e5aa2feea5b58c7c173e6d94be1d3bd5afa3" - "fcf571d409ded9b1eb06ef3d275d00c36f25f4916c6ed2a911cef88b0e4c0ecfa7a5b6" - "27936600b3d28d9bdbe411" - ).decode("hex") - - obj = ip_stack.parse(cap1) - print obj - print repr(ip_stack.build(obj)) - - print "-" * 80 - - obj = ip_stack.parse(cap2) - print obj - print repr(ip_stack.build(obj)) diff --git a/construct/protocols/layer2/__init__.py b/construct/protocols/layer2/__init__.py deleted file mode 100644 index bdcdb4a6e..000000000 --- a/construct/protocols/layer2/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -""" -layer 2 (data link) protocols -""" - diff --git a/construct/protocols/layer2/arp.py b/construct/protocols/layer2/arp.py deleted file mode 100644 index d61d52e43..000000000 --- a/construct/protocols/layer2/arp.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -Ethernet (TCP/IP protocol stack) -""" -from construct import * -from ethernet import MacAddressAdapter -from construct.protocols.layer3.ipv4 import IpAddressAdapter - - - -def HwAddress(name): - return IfThenElse(name, lambda ctx: ctx.hardware_type == "ETHERNET", - MacAddressAdapter(Field("data", lambda ctx: ctx.hwaddr_length)), - Field("data", lambda ctx: ctx.hwaddr_length) - ) - -def ProtoAddress(name): - return IfThenElse(name, lambda ctx: ctx.protocol_type == "IP", - IpAddressAdapter(Field("data", lambda ctx: ctx.protoaddr_length)), - Field("data", lambda ctx: ctx.protoaddr_length) - ) - -arp_header = Struct("arp_header", - Enum(UBInt16("hardware_type"), - ETHERNET = 1, - EXPERIMENTAL_ETHERNET = 2, - ProNET_TOKEN_RING = 4, - CHAOS = 5, - IEEE802 = 6, - ARCNET = 7, - HYPERCHANNEL = 8, - ULTRALINK = 13, - FRAME_RELAY = 15, - FIBRE_CHANNEL = 18, - IEEE1394 = 24, - HIPARP = 28, - ISO7816_3 = 29, - ARPSEC = 30, - IPSEC_TUNNEL = 31, - INFINIBAND = 32, - ), - Enum(UBInt16("protocol_type"), - IP = 0x0800, - ), - UBInt8("hwaddr_length"), - UBInt8("protoaddr_length"), - Enum(UBInt16("opcode"), - REQUEST = 1, - REPLY = 2, - REQUEST_REVERSE = 3, - REPLY_REVERSE = 4, - DRARP_REQUEST = 5, - DRARP_REPLY = 6, - DRARP_ERROR = 7, - InARP_REQUEST = 8, - InARP_REPLY = 9, - ARP_NAK = 10 - - ), - HwAddress("source_hwaddr"), - ProtoAddress("source_protoaddr"), - HwAddress("dest_hwaddr"), - ProtoAddress("dest_protoaddr"), -) - -rarp_header = Rename("rarp_header", arp_header) - - -if __name__ == "__main__": - cap1 = "00010800060400010002e3426009c0a80204000000000000c0a80201".decode("hex") - obj = arp_header.parse(cap1) - print obj - print repr(arp_header.build(obj)) - - print "-" * 80 - - cap2 = "00010800060400020011508c283cc0a802010002e3426009c0a80204".decode("hex") - obj = arp_header.parse(cap2) - print obj - print repr(arp_header.build(obj)) - - - - - - - - - - - - - diff --git a/construct/protocols/layer2/ethernet.py b/construct/protocols/layer2/ethernet.py deleted file mode 100644 index 0f9993836..000000000 --- a/construct/protocols/layer2/ethernet.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -Ethernet (TCP/IP protocol stack) -""" -from construct import * - - -class MacAddressAdapter(Adapter): - def _encode(self, obj, context): - return obj.replace("-", "").decode("hex") - def _decode(self, obj, context): - return "-".join(b.encode("hex") for b in obj) - -def MacAddress(name): - return MacAddressAdapter(Bytes(name, 6)) - -ethernet_header = Struct("ethernet_header", - MacAddress("destination"), - MacAddress("source"), - Enum(UBInt16("type"), - IPv4 = 0x0800, - ARP = 0x0806, - RARP = 0x8035, - X25 = 0x0805, - IPX = 0x8137, - IPv6 = 0x86DD, - _default_ = Pass, - ), -) - - -if __name__ == "__main__": - cap = "0011508c283c0002e34260090800".decode("hex") - obj = ethernet_header.parse(cap) - print obj - print repr(ethernet_header.build(obj)) - diff --git a/construct/protocols/layer2/mtp2.py b/construct/protocols/layer2/mtp2.py deleted file mode 100644 index 1cbe3a013..000000000 --- a/construct/protocols/layer2/mtp2.py +++ /dev/null @@ -1,21 +0,0 @@ -""" -Message Transport Part 2 (SS7 protocol stack) -(untested) -""" -from construct import * - - -mtp2_header = BitStruct("mtp2_header", - Octet("flag1"), - Bits("bsn", 7), - Bit("bib"), - Bits("fsn", 7), - Bit("sib"), - Octet("length"), - Octet("service_info"), - Octet("signalling_info"), - Bits("crc", 16), - Octet("flag2"), -) - - diff --git a/construct/protocols/layer3/__init__.py b/construct/protocols/layer3/__init__.py deleted file mode 100644 index 4477713fc..000000000 --- a/construct/protocols/layer3/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -""" -layer 3 (network) protocols -""" - diff --git a/construct/protocols/layer3/dhcpv4.py b/construct/protocols/layer3/dhcpv4.py deleted file mode 100644 index efb1aa78c..000000000 --- a/construct/protocols/layer3/dhcpv4.py +++ /dev/null @@ -1,210 +0,0 @@ -""" -Dynamic Host Configuration Protocol for IPv4 - -http://www.networksorcery.com/enp/protocol/dhcp.htm -http://www.networksorcery.com/enp/protocol/bootp/options.htm -""" -from construct import * -from ipv4 import IpAddress - - -dhcp_option = Struct("dhcp_option", - Enum(Byte("code"), - Pad = 0, - Subnet_Mask = 1, - Time_Offset = 2, - Router = 3, - Time_Server = 4, - Name_Server = 5, - Domain_Name_Server = 6, - Log_Server = 7, - Quote_Server = 8, - LPR_Server = 9, - Impress_Server = 10, - Resource_Location_Server = 11, - Host_Name = 12, - Boot_File_Size = 13, - Merit_Dump_File = 14, - Domain_Name = 15, - Swap_Server = 16, - Root_Path = 17, - Extensions_Path = 18, - IP_Forwarding_enabledisable = 19, - Nonlocal_Source_Routing_enabledisable = 20, - Policy_Filter = 21, - Maximum_Datagram_Reassembly_Size = 22, - Default_IP_TTL = 23, - Path_MTU_Aging_Timeout = 24, - Path_MTU_Plateau_Table = 25, - Interface_MTU = 26, - All_Subnets_are_Local = 27, - Broadcast_Address = 28, - Perform_Mask_Discovery = 29, - Mask_supplier = 30, - Perform_router_discovery = 31, - Router_solicitation_address = 32, - Static_routing_table = 33, - Trailer_encapsulation = 34, - ARP_cache_timeout = 35, - Ethernet_encapsulation = 36, - Default_TCP_TTL = 37, - TCP_keepalive_interval = 38, - TCP_keepalive_garbage = 39, - Network_Information_Service_domain = 40, - Network_Information_Servers = 41, - NTP_servers = 42, - Vendor_specific_information = 43, - NetBIOS_over_TCPIP_name_server = 44, - NetBIOS_over_TCPIP_Datagram_Distribution_Server = 45, - NetBIOS_over_TCPIP_Node_Type = 46, - NetBIOS_over_TCPIP_Scope = 47, - X_Window_System_Font_Server = 48, - X_Window_System_Display_Manager = 49, - Requested_IP_Address = 50, - IP_address_lease_time = 51, - Option_overload = 52, - DHCP_message_type = 53, - Server_identifier = 54, - Parameter_request_list = 55, - Message = 56, - Maximum_DHCP_message_size = 57, - Renew_time_value = 58, - Rebinding_time_value = 59, - Class_identifier = 60, - Client_identifier = 61, - NetWareIP_Domain_Name = 62, - NetWareIP_information = 63, - Network_Information_Service_Domain = 64, - Network_Information_Service_Servers = 65, - TFTP_server_name = 66, - Bootfile_name = 67, - Mobile_IP_Home_Agent = 68, - Simple_Mail_Transport_Protocol_Server = 69, - Post_Office_Protocol_Server = 70, - Network_News_Transport_Protocol_Server = 71, - Default_World_Wide_Web_Server = 72, - Default_Finger_Server = 73, - Default_Internet_Relay_Chat_Server = 74, - StreetTalk_Server = 75, - StreetTalk_Directory_Assistance_Server = 76, - User_Class_Information = 77, - SLP_Directory_Agent = 78, - SLP_Service_Scope = 79, - Rapid_Commit = 80, - Fully_Qualified_Domain_Name = 81, - Relay_Agent_Information = 82, - Internet_Storage_Name_Service = 83, - NDS_servers = 85, - NDS_tree_name = 86, - NDS_context = 87, - BCMCS_Controller_Domain_Name_list = 88, - BCMCS_Controller_IPv4_address_list = 89, - Authentication = 90, - Client_last_transaction_time = 91, - Associated_ip = 92, - Client_System_Architecture_Type = 93, - Client_Network_Interface_Identifier = 94, - Lightweight_Directory_Access_Protocol = 95, - Client_Machine_Identifier = 97, - Open_Group_User_Authentication = 98, - Autonomous_System_Number = 109, - NetInfo_Parent_Server_Address = 112, - NetInfo_Parent_Server_Tag = 113, - URL = 114, - Auto_Configure = 116, - Name_Service_Search = 117, - Subnet_Selection = 118, - DNS_domain_search_list = 119, - SIP_Servers_DHCP_Option = 120, - Classless_Static_Route_Option = 121, - CableLabs_Client_Configuration = 122, - GeoConf = 123, - ), - Switch("value", lambda ctx: ctx.code, - { - # codes without any value - "Pad" : Pass, - }, - # codes followed by length and value fields - default = Struct("value", - Byte("length"), - Field("data", lambda ctx: ctx.length), - ) - ) -) - -dhcp_header = Struct("dhcp_header", - Enum(Byte("opcode"), - BootRequest = 1, - BootReply = 2, - ), - Enum(Byte("hardware_type"), - Ethernet = 1, - Experimental_Ethernet = 2, - ProNET_Token_Ring = 4, - Chaos = 5, - IEEE_802 = 6, - ARCNET = 7, - Hyperchannel = 8, - Lanstar = 9, - ), - Byte("hardware_address_length"), - Byte("hop_count"), - UBInt32("transaction_id"), - UBInt16("elapsed_time"), - BitStruct("flags", - Flag("boardcast"), - Padding(15), - ), - IpAddress("client_addr"), - IpAddress("your_addr"), - IpAddress("server_addr"), - IpAddress("gateway_addr"), - IpAddress("client_addr"), - Bytes("client_hardware_addr", 16), - Bytes("server_host_name", 64), - Bytes("boot_filename", 128), - # BOOTP/DHCP options - # "The first four bytes contain the (decimal) values 99, 130, 83 and 99" - Const(Bytes("magic", 4), "\x63\x82\x53\x63"), - Rename("options", OptionalGreedyRange(dhcp_option)), -) - - -if __name__ == "__main__": - test = ( - "01" "01" "08" "ff" "11223344" "1234" "0000" - "11223344" "aabbccdd" "11223444" "aabbccdd" "11223344" - - "11223344556677889900aabbccddeeff" - - "41414141414141414141414141414141" "41414141414141414141414141414141" - "41414141414141414141414141414141" "41414141414141414141414141414141" - - "42424242424242424242424242424242" "42424242424242424242424242424242" - "42424242424242424242424242424242" "42424242424242424242424242424242" - "42424242424242424242424242424242" "42424242424242424242424242424242" - "42424242424242424242424242424242" "42424242424242424242424242424242" - - "63825363" - - "0104ffffff00" - "00" - "060811223344aabbccdd" - ).decode("hex") - - print dhcp_header.parse(test) - - - - - - - - - - - - - - diff --git a/construct/protocols/layer3/dhcpv6.py b/construct/protocols/layer3/dhcpv6.py deleted file mode 100644 index 540f3945a..000000000 --- a/construct/protocols/layer3/dhcpv6.py +++ /dev/null @@ -1,111 +0,0 @@ -""" -the Dynamic Host Configuration Protocol (DHCP) for IPv6 - -http://www.networksorcery.com/enp/rfc/rfc3315.txt -""" -from construct import * -from ipv6 import Ipv6Address - - -dhcp_option = Struct("dhcp_option", - Enum(UBInt16("code"), - OPTION_CLIENTID = 1, - OPTION_SERVERID = 2, - OPTION_IA_NA = 3, - OPTION_IA_TA = 4, - OPTION_IAADDR = 5, - OPTION_ORO = 6, - OPTION_PREFERENCE = 7, - OPTION_ELAPSED_TIME = 8, - OPTION_RELAY_MSG = 9, - OPTION_AUTH = 11, - OPTION_UNICAST = 12, - OPTION_STATUS_CODE = 13, - OPTION_RAPID_COMMIT = 14, - OPTION_USER_CLASS = 15, - OPTION_VENDOR_CLASS = 16, - OPTION_VENDOR_OPTS = 17, - OPTION_INTERFACE_ID = 18, - OPTION_RECONF_MSG = 19, - OPTION_RECONF_ACCEPT = 20, - SIP_SERVERS_DOMAIN_NAME_LIST = 21, - SIP_SERVERS_IPV6_ADDRESS_LIST = 22, - DNS_RECURSIVE_NAME_SERVER = 23, - DOMAIN_SEARCH_LIST = 24, - OPTION_IA_PD = 25, - OPTION_IAPREFIX = 26, - OPTION_NIS_SERVERS = 27, - OPTION_NISP_SERVERS = 28, - OPTION_NIS_DOMAIN_NAME = 29, - OPTION_NISP_DOMAIN_NAME = 30, - SNTP_SERVER_LIST = 31, - INFORMATION_REFRESH_TIME = 32, - BCMCS_CONTROLLER_DOMAIN_NAME_LIST = 33, - BCMCS_CONTROLLER_IPV6_ADDRESS_LIST = 34, - OPTION_GEOCONF_CIVIC = 36, - OPTION_REMOTE_ID = 37, - RELAY_AGENT_SUBSCRIBER_ID = 38, - OPTION_CLIENT_FQDN = 39, - ), - UBInt16("length"), - Field("data", lambda ctx: ctx.length), -) - -client_message = Struct("client_message", - Bitwise(BitField("transaction_id", 24)), -) - -relay_message = Struct("relay_message", - Byte("hop_count"), - Ipv6Address("linkaddr"), - Ipv6Address("peeraddr"), -) - -dhcp_message = Struct("dhcp_message", - Enum(Byte("msgtype"), - # these are client-server messages - SOLICIT = 1, - ADVERTISE = 2, - REQUEST = 3, - CONFIRM = 4, - RENEW = 5, - REBIND = 6, - REPLY = 7, - RELEASE_ = 8, - DECLINE_ = 9, - RECONFIGURE = 10, - INFORMATION_REQUEST = 11, - # these two are relay messages - RELAY_FORW = 12, - RELAY_REPL = 13, - ), - # relay messages have a different structure from client-server messages - Switch("params", lambda ctx: ctx.msgtype, - { - "RELAY_FORW" : relay_message, - "RELAY_REPL" : relay_message, - }, - default = client_message, - ), - Rename("options", GreedyRange(dhcp_option)), -) - - -if __name__ == "__main__": - test1 = "\x03\x11\x22\x33\x00\x17\x00\x03ABC\x00\x05\x00\x05HELLO" - test2 = "\x0c\x040123456789abcdef0123456789abcdef\x00\x09\x00\x0bhello world\x00\x01\x00\x00" - print dhcp_message.parse(test1) - print dhcp_message.parse(test2) - - - - - - - - - - - - - diff --git a/construct/protocols/layer3/icmpv4.py b/construct/protocols/layer3/icmpv4.py deleted file mode 100644 index bdd1f5f8e..000000000 --- a/construct/protocols/layer3/icmpv4.py +++ /dev/null @@ -1,96 +0,0 @@ -""" -Internet Control Message Protocol for IPv4 (TCP/IP protocol stack) -""" -from construct import * -from ipv4 import IpAddress - - -echo_payload = Struct("echo_payload", - UBInt16("identifier"), - UBInt16("sequence"), - Bytes("data", 32), # length is implementation dependent... - # is anyone using more than 32 bytes? -) - -dest_unreachable_payload = Struct("dest_unreachable_payload", - Padding(2), - UBInt16("next_hop_mtu"), - IpAddress("host"), - Bytes("echo", 8), -) - -dest_unreachable_code = Enum(Byte("code"), - Network_unreachable_error = 0, - Host_unreachable_error = 1, - Protocol_unreachable_error = 2, - Port_unreachable_error = 3, - The_datagram_is_too_big = 4, - Source_route_failed_error = 5, - Destination_network_unknown_error = 6, - Destination_host_unknown_error = 7, - Source_host_isolated_error = 8, - Desination_administratively_prohibited = 9, - Host_administratively_prohibited2 = 10, - Network_TOS_unreachable = 11, - Host_TOS_unreachable = 12, -) - -icmp_header = Struct("icmp_header", - Enum(Byte("type"), - Echo_reply = 0, - Destination_unreachable = 3, - Source_quench = 4, - Redirect = 5, - Alternate_host_address = 6, - Echo_request = 8, - Router_advertisement = 9, - Router_solicitation = 10, - Time_exceeded = 11, - Parameter_problem = 12, - Timestamp_request = 13, - Timestamp_reply = 14, - Information_request = 15, - Information_reply = 16, - Address_mask_request = 17, - Address_mask_reply = 18, - _default_ = Pass, - ), - Switch("code", lambda ctx: ctx.type, - { - "Destination_unreachable" : dest_unreachable_code, - }, - default = Byte("code"), - ), - UBInt16("crc"), - Switch("payload", lambda ctx: ctx.type, - { - "Echo_reply" : echo_payload, - "Echo_request" : echo_payload, - "Destination_unreachable" : dest_unreachable_payload, - }, - default = Pass - ) -) - - -if __name__ == "__main__": - cap1 = ("0800305c02001b006162636465666768696a6b6c6d6e6f70717273747576776162" - "63646566676869").decode("hex") - cap2 = ("0000385c02001b006162636465666768696a6b6c6d6e6f70717273747576776162" - "63646566676869").decode("hex") - cap3 = ("0301000000001122aabbccdd0102030405060708").decode("hex") - - print icmp_header.parse(cap1) - print icmp_header.parse(cap2) - print icmp_header.parse(cap3) - - - - - - - - - - - diff --git a/construct/protocols/layer3/igmpv2.py b/construct/protocols/layer3/igmpv2.py deleted file mode 100644 index c3b8ae6cd..000000000 --- a/construct/protocols/layer3/igmpv2.py +++ /dev/null @@ -1,33 +0,0 @@ -""" -What : Internet Group Management Protocol, Version 2 - How : http://www.ietf.org/rfc/rfc2236.txt - Who : jesse @ housejunkie . ca -""" - -from construct import ( - Byte, - Enum, - Struct, - UBInt16, - UBInt32, -) -from construct.protocols.layer3.ipv4 import IpAddress - -igmp_type = Enum(Byte("igmp_type"), - MEMBERSHIP_QUERY = 0x11, - MEMBERSHIP_REPORT_V1 = 0x12, - MEMBERSHIP_REPORT_V2 = 0x16, - LEAVE_GROUP = 0x17, -) - -igmpv2_header = Struct("igmpv2_header", - igmp_type, - Byte("max_resp_time"), - UBInt16("checksum"), - IpAddress("group_address"), -) - -if __name__ == '__main__': - - capture = "1600FA01EFFFFFFD".decode("hex") - print igmpv2_header.parse(capture) diff --git a/construct/protocols/layer3/ipv4.py b/construct/protocols/layer3/ipv4.py deleted file mode 100644 index f597c12c6..000000000 --- a/construct/protocols/layer3/ipv4.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -Internet Protocol version 4 (TCP/IP protocol stack) -""" -from construct import * - - -class IpAddressAdapter(Adapter): - def _encode(self, obj, context): - return "".join(chr(int(b)) for b in obj.split(".")) - def _decode(self, obj, context): - return ".".join(str(ord(b)) for b in obj) - -def IpAddress(name): - return IpAddressAdapter(Bytes(name, 4)) - -def ProtocolEnum(code): - return Enum(code, - ICMP = 1, - TCP = 6, - UDP = 17, - ) - -ipv4_header = Struct("ip_header", - EmbeddedBitStruct( - Const(Nibble("version"), 4), - ExprAdapter(Nibble("header_length"), - decoder = lambda obj, ctx: obj * 4, - encoder = lambda obj, ctx: obj / 4 - ), - ), - BitStruct("tos", - Bits("precedence", 3), - Flag("minimize_delay"), - Flag("high_throuput"), - Flag("high_reliability"), - Flag("minimize_cost"), - Padding(1), - ), - UBInt16("total_length"), - Value("payload_length", lambda ctx: ctx.total_length - ctx.header_length), - UBInt16("identification"), - EmbeddedBitStruct( - Struct("flags", - Padding(1), - Flag("dont_fragment"), - Flag("more_fragments"), - ), - Bits("frame_offset", 13), - ), - UBInt8("ttl"), - ProtocolEnum(UBInt8("protocol")), - UBInt16("checksum"), - IpAddress("source"), - IpAddress("destination"), - Field("options", lambda ctx: ctx.header_length - 20), -) - - -if __name__ == "__main__": - cap = "4500003ca0e3000080116185c0a80205d474a126".decode("hex") - obj = ipv4_header.parse(cap) - print obj - print repr(ipv4_header.build(obj)) - - - - - - - - - diff --git a/construct/protocols/layer3/ipv6.py b/construct/protocols/layer3/ipv6.py deleted file mode 100644 index 538116b9d..000000000 --- a/construct/protocols/layer3/ipv6.py +++ /dev/null @@ -1,44 +0,0 @@ -""" -Internet Protocol version 6 (TCP/IP protocol stack) -""" -from construct import * -from ipv4 import ProtocolEnum - - -class Ipv6AddressAdapter(Adapter): - def _encode(self, obj, context): - return "".join(part.decode("hex") for part in obj.split(":")) - def _decode(self, obj, context): - return ":".join(b.encode("hex") for b in obj) - -def Ipv6Address(name): - return Ipv6AddressAdapter(Bytes(name, 16)) - - -ipv6_header = Struct("ip_header", - EmbeddedBitStruct( - OneOf(Bits("version", 4), [6]), - Bits("traffic_class", 8), - Bits("flow_label", 20), - ), - UBInt16("payload_length"), - ProtocolEnum(UBInt8("protocol")), - UBInt8("hoplimit"), - Alias("ttl", "hoplimit"), - Ipv6Address("source"), - Ipv6Address("destination"), -) - - -if __name__ == "__main__": - o = ipv6_header.parse("\x6f\xf0\x00\x00\x01\x02\x06\x80" - "0123456789ABCDEF" "FEDCBA9876543210" - ) - print o - print repr(ipv6_header.build(o)) - - - - - - diff --git a/construct/protocols/layer3/mtp3.py b/construct/protocols/layer3/mtp3.py deleted file mode 100644 index 7f712f279..000000000 --- a/construct/protocols/layer3/mtp3.py +++ /dev/null @@ -1,12 +0,0 @@ -""" -Message Transport Part 3 (SS7 protocol stack) -(untested) -""" -from construct import * - - -mtp3_header = BitStruct("mtp3_header", - Nibble("service_indicator"), - Nibble("subservice"), -) - diff --git a/construct/protocols/layer4/__init__.py b/construct/protocols/layer4/__init__.py deleted file mode 100644 index 38693c699..000000000 --- a/construct/protocols/layer4/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -""" -layer 4 (transporation) protocols -""" - diff --git a/construct/protocols/layer4/isup.py b/construct/protocols/layer4/isup.py deleted file mode 100644 index 8111b60b1..000000000 --- a/construct/protocols/layer4/isup.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -ISDN User Part (SS7 protocol stack) -""" -from construct import * - - -isup_header = Struct("isup_header", - Bytes("routing_label", 5), - UBInt16("cic"), - UBInt8("message_type"), - # mandatory fixed parameters - # mandatory variable parameters - # optional parameters -) - diff --git a/construct/protocols/layer4/tcp.py b/construct/protocols/layer4/tcp.py deleted file mode 100644 index 0b6fce768..000000000 --- a/construct/protocols/layer4/tcp.py +++ /dev/null @@ -1,57 +0,0 @@ -""" -Transmission Control Protocol (TCP/IP protocol stack) -""" -from construct import * - - -tcp_header = Struct("tcp_header", - UBInt16("source"), - UBInt16("destination"), - UBInt32("seq"), - UBInt32("ack"), - EmbeddedBitStruct( - ExprAdapter(Nibble("header_length"), - encoder = lambda obj, ctx: obj / 4, - decoder = lambda obj, ctx: obj * 4, - ), - Padding(3), - Struct("flags", - Flag("ns"), - Flag("cwr"), - Flag("ece"), - Flag("urg"), - Flag("ack"), - Flag("psh"), - Flag("rst"), - Flag("syn"), - Flag("fin"), - ), - ), - UBInt16("window"), - UBInt16("checksum"), - UBInt16("urgent"), - Field("options", lambda ctx: ctx.header_length - 20), -) - -if __name__ == "__main__": - cap = "0db5005062303fb21836e9e650184470c9bc0000".decode("hex") - - obj = tcp_header.parse(cap) - print obj - print repr(tcp_header.build(obj)) - - - - - - - - - - - - - - - - diff --git a/construct/protocols/layer4/udp.py b/construct/protocols/layer4/udp.py deleted file mode 100644 index a7afed617..000000000 --- a/construct/protocols/layer4/udp.py +++ /dev/null @@ -1,24 +0,0 @@ -""" -User Datagram Protocol (TCP/IP protocol stack) -""" -from construct import * - - -udp_header = Struct("udp_header", - Value("header_length", lambda ctx: 8), - UBInt16("source"), - UBInt16("destination"), - ExprAdapter(UBInt16("payload_length"), - encoder = lambda obj, ctx: obj + 8, - decoder = lambda obj, ctx: obj - 8, - ), - UBInt16("checksum"), -) - -if __name__ == "__main__": - cap = "0bcc003500280689".decode("hex") - obj = udp_header.parse(cap) - print obj - print repr(udp_header.build(obj)) - - diff --git a/construct/tests/__init__.py b/construct/tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/construct/tests/csvtest.py b/construct/tests/csvtest.py deleted file mode 100644 index 04e0f51e8..000000000 --- a/construct/tests/csvtest.py +++ /dev/null @@ -1,95 +0,0 @@ -from construct import * -from construct.text import * - - -class LineSplitAdapter(Adapter): - def _decode(self, obj, context): - return obj.split('\t') - def _encode(self, obj, context): - return '\t'.join(obj)+'\n' - -sectionrow = Struct('sectionrow', - QuotedString('sectionname', start_quote='[', end_quote=']'), - Line('restofline'), - Literal('\n'), -) - -fieldsrow = Struct('fieldsrow', - Literal('FIELDS\t'), - LineSplitAdapter( - Line('items') - ), - Literal('\n'), -) - -data = Struct('data', - OptionalGreedyRange( - Struct('data', - Literal('DATA\t'), - LineSplitAdapter( - Line('items') - ), - Literal('\n'), - ) - ) -) - -section = Struct('section', - sectionrow, - fieldsrow, - data, - Literal('\n') -) - -sections = Struct('sections', - GreedyRange(section) -) - - -if __name__ == "__main__": - import psyco - psyco.full() - numdatarows = 2000 - - tsvstring = ( - '[ENGINEBAY]'+'\t'*80 + '\n' + - 'FIELDS'+('\tTIMESTAMP\tVOLTAGE\tCURRENT\tTEMPERATURE'*20) + '\n' + - ('DATA'+('\t12:13:14.15\t1.2345\t2.3456\t345.67'*20) + - '\n')*numdatarows + '\n' + - '[CARGOBAY]'+'\t'*80 + '\n' + - 'FIELDS'+('\tTIMESTAMP\tVOLTAGE\tCURRENT\tTEMPERATURE'*20) + '\n' + - ('DATA'+('\t12:13:14.15\t1.2345\t2.3456\t345.67'*20) + - '\n')*numdatarows + '\n' + - '[FRONTWHEELWELL]'+'\t'*80 + '\n' + - 'FIELDS'+('\tTIMESTAMP\tVOLTAGE\tCURRENT\tTEMPERATURE'*20) + '\n' + - ('DATA'+('\t12:13:14.15\t1.2345\t2.3456\t345.67'*20) + - '\n')*numdatarows + '\n' + - '[REARWHEELWELL]'+'\t'*80 + '\n' + - 'FIELDS'+('\tTIMESTAMP\tVOLTAGE\tCURRENT\tTEMPERATURE'*20) + '\n' + - ('DATA'+('\t12:13:14.15\t1.2345\t2.3456\t345.67'*20) + '\n') * numdatarows + '\n' - ) - - #print len(tsvstring) - - import time - t = time.time() - x = sections.parse(tsvstring) - print time.time() - t - # 43.2030000687 / 3.10899996758 with psyco (x13) - - t = time.time() - s = sections.build(x) - print time.time() - t - # 39.625 / 2.65700006485 with psyco (x14) - - print s == tsvstring - # True - - - - - - - - - diff --git a/construct/tests/debug.py b/construct/tests/debug.py deleted file mode 100644 index 6cc16fb54..000000000 --- a/construct/tests/debug.py +++ /dev/null @@ -1,20 +0,0 @@ -from construct import * - - -foo = Struct("foo", - UBInt8("bar"), - Debugger( - Enum(UBInt8("spam"), - ABC = 1, - DEF = 2, - GHI = 3, - ) - ), - UBInt8("eggs"), -) - - -print foo.parse("\x01\x02\x03") - -print foo.parse("\x01\x04\x03") - diff --git a/construct/tests/formats/__init__.py b/construct/tests/formats/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/construct/tests/formats/data/__init__.py b/construct/tests/formats/data/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/construct/tests/formats/data/test_snoop.py b/construct/tests/formats/data/test_snoop.py deleted file mode 100644 index c74bc4125..000000000 --- a/construct/tests/formats/data/test_snoop.py +++ /dev/null @@ -1,525 +0,0 @@ -from unittest import TestCase - -from construct.formats.data.snoop import snoop_file - -data = """ -c25vb3AAAAAAAAACAAAABAAAAFYAAABWAAAAcAAAAAA2UPLfAA2DDAAGKSEiuwgAIJJtoQgARRAA -SMDnQAD/Bq7ogW8FKYFvA8gAFgWxhAqySjnH6a2AGCeY89oAAAEBCAoCtYleNrAdPwAAAAqID9h8 -+c0lgVR5zdEwPj1cAAAAAABWAAAAVgAAAHAAAAAANlDy3wANy/oABikhIrsIACCSbaEIAEUQAEjA -6EAA/wau54FvBSmBbwPIABYFsYQKsl45x+mtgBgnmIGDAAABAQgKArWJYDawHT8AAAAPjYflu+W4 -lqVFCkarI8Xi4AEAAAAAbgAAAG4AAACIAAAAADZQ8t8ADhjW////////ACCvOXniAGD//wBgAAQT -AAAB////////BFITAAABACCvOXniQAgAAgZOR0laTU8hISEhISEhISEhQTU1NjlCMjBBQkU1MTFD -RTlDQTQwMDAwNEM3NjI4MzIAEwAAAQAgrzl54kAAAAEAbgAAANgAAADYAAAA8AAAAAA2UPLfAA57 -Mf///////wBglwju8AgARQAAyjhkAACAEUjEgW+2HIFv//8AigCKALb5bBEapTCBb7YcAIoAoAAA -IEVJRUZGQ0VDRVBFTUVFRERDQUNBQ0FDQUNBQ0FDQUFBACBFRkZBRUpFRUVGRU5FSkVQRU1FUEVI -RkpDQUNBQ0FCTgD/U01CJQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABEAAAYAAAAAAAAAAADo -AwAAAAAAAAAABgBWAAMAAQABAAIAFwBcTUFJTFNMT1RcQlJPV1NFAAkEOAMAAAAAADIAAAAyAAAA -TAAAAAA2UPLfAA6zQgkAB////wgAB29T7gAkqqoDAAAAgPMAAYCbBgQAAQgAB29T7gAALssAAAAA -AAAAAFE+AAAAAAA8AAAAPAAAAFQAAAAANlDy3wAOwH7///////8AEFofFs4IBgABCAAGBAABABBa -HxbOgW+2xwAAAAAAAIFv7SwAAAAAAAAAAAAAAAAAAAAAAAAAAABHAAAARwAAAGAAAAAANlDy3wAO -wO0DAAAAAAEAEFofFs4AOfDwAywA/+8IAAAAAAAAAEpTUE5STVBUR1NCU1NESVJTUEhTRVJWRVIz -ICAgICAGABBaHxbO+HcAAAAAAABZAAAAWQAAAHQAAAAANlDy3wAO3PcJAAcAAOMIAIcqJkEAS6qq -AwgAB4CbAEMAAAAAAFH/PgICAiGjAC7L/QAMRERTIENhbGVuZGVyDk5VRCAyLjAgU2VydmVyEkRF -TlRBTCBESUFHTk9TVElDUwBJQwAAAEIAAABCAAAAXAAAAAA2UPLgAAAGeQgAIJJtoQAGKSEiuwgA -RQAANBVJAAA8Bl2sgW8DyIFvBSkFsQAWOcfprYQKsnKAED447PkAAAEBCAo2sB0/ArWJXgK1AAAA -pwAAAKcAAADAAAAAADZQ8uAAAHGjCQArAAAPCAArBgazYAQoCAUFBQFiDO4FHh4BAQZEU05SMTEG -RFNOUjExBf8HT1VURElBTBNESUFMIE9VVCBNT0RFTSBQT09M/wZURUxEMTAOVEVMRVBIT05FIERB -VEH/BlRFTEQxMg5URUxFUEhPTkUgREFUQf8GVEVMRDEzDlRFTEVQSE9ORSBEQVRB/wZURUxEMTQO -VEVMRVBIT05FIERBVEEBAQAAAAAAgAAAAIAAAACYAAAAADZQ8uAAAKFZqwAEAQEBqgAEABVEYAdw -AKsABAEBAQEAqgAEABVEoAAIAACABAEAAAZBUldFTiAAgAH/gwAEAAAAAAAAAAAAEAMAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAIQF9w4GPDT1nAADRVdBAAAAAAAAAAAAAAAACAAr5g5nNwEA -AABiAAAAYgAAAHwAAAAANlDy4AACmw0IACstuI6qAAQAAkSAQVQABgAAAAgAK+KGTAAAAAADAQMD -AQZaSU5HRVIAAAAAAAAAAAAAAAAAAAAAAAA1EAAAAAAAAAAAAACAB04AUAEAAFNZU1RFTSRaSU5H -RVIAAAAAAAAAAAAAYAAAAGAAAAB4AAAAADZQ8uAAAycCCQAHAACKAGBwzFuJAFKqqgMIAAeAmwBK -OdsAAA+t/9sC/QIheg+t2/0AFVJJQyBDYWxlbmRhciBTZXJ2ZXIgMw5OVUQgMi4wIFNlcnZlchBS -RVNFQVJDSCBJTUFHSU5HAAAANAAAADQAAABMAAAAADZQ8uAAA0RgAYDCAAAAAGA+yU87ACZCQgMA -AAAAAAABAOCjPsEAAAAAAAABAOCjPsEAgUQAABQAAgAPAAAAADIAAAAyAAAATAAAAAA2UPLgAANT -OQkAB////wAFAt6AcQAkqqoDAAAAgPMAAYCbBgQAAQAFAt6AcQAAXkUAAAAAAAAAABLKVVUAAAA5 -AAAAOQAAAFQAAAAANlDy4AAEOVEJAAf///8A4B5+dgEAK6qqAwgAB4CbACOC9QAAACv/DQEBAQAr -CA0ACoAAY4IPr4APr4IPsoEPtIIAAAAAAAA8AAAAPAAAAFQAAAAANlDy4AAE9Tb///////8AoMmP -AmkIBgABCAAGBAABAKDJjwJpgW+/cAAAAAAAAIFvC1EAAAAAAAAAAAAAAAAAAAAAAAAAAAA8AAAA -PAAAAFQAAAAANlDy4AAFYyv///////8AIK9PhUgIBgABCAAGBAABACCvT4VIgW/nQQAAAAAAAIFv -50lJSUlJSUlJSUlJSUlJSUlJSUkAAABcAAAAXAAAAHQAAAAANlDy4AAGZb3///////8AYJcI7vAI -AEUAAE46ZAAAgBFHQIFvthyBb///AIkAiQA6TdqlJAEQAAEAAAAAAAAgRUZGQUVKRUVDQUNBQ0FD -QUNBQ0FDQUNBQ0FDQUNBQkwAACAAAQAAAFwAAABcAAAAdAAAAAA2UPLgAAaWxv///////wAgSAT9 -jAgARQAATuxrAACAEV4LgW/tSYFv//8AiQCJADqQvt0EARAAAQAAAAAAACBFTkVGRUVFSkVERUpF -T0VGRlBFSEVKQ0FDQUNBQ0FCTwAAIAABAAAAVwAAAFcAAABwAAAAADZQ8uAABt9ICQAHAACIAOAe -fnYBAEmqqgMIAAeAmwBBBKkAAAAr/w0C/gIhTQA2ZP4AEFNVUkdFUlktUkVTRUFSQ0gTU0FNVVJB -SSBMYXNlcldyaXRlcgdTVVJHRVJZAAAAAFwAAABcAAAAdAAAAAA2UPLgAAcQ0f///////wAgSAT9 -jAgARQAATu1rAACAEV0LgW/tSYFv//8AiQCJADp6r90KARAAAQAAAAAAACBFTkVGRUVFSkVERUpF -T0VGRlBFSkVPRUdFRkVERkVCTAAAIAABAAAARwAAAEcAAABgAAAAADZQ8uAAB1aDCQAHAADlCACH -KiZBADmqqgMIAAeAmwAxAAAAAABR/z4CAgIhxAAfpP0AAT0MU3Rhck5pbmUgS2V5DUlCVCAzcmQg -Zmxvb3IAAAAAPwAAAD8AAABYAAAAADZQ8uAAB1hZCACHCjQSCAAgHih5CABFAAAxlbZAAP8R2haB -bwVYgW8DuPN8B9EAHQYWAAUAAkFjY2VzcyB2aW9sYXRpb24AAAAAAE0AAABNAAAAaAAAAAA2UPLg -AAgblQkABwAAxggAhxq+cQA/qqoDCAAHgJsANwAAAAAAUf89AgICIVYASlL9AApGaXJzdENsYXNz -CEZDU2VydmVyDkNPTU1VTklUWSBERU5UAABNAAAAPAAAADwAAABUAAAAADZQ8uAACZQC//////// -ABBLr7GHCAYAAQgABgQAAQAQS6+xh4FvqEQAAAAAAACBbwVrBWsFawVrBWsFawVrBWsFawVrAAAA -TAAAAEwAAABkAAAAADZQ8uAACgvwCQAHAAB8CACHEzUEAD6qqgMIAAeAmwA2AAAAAABR/+ACAgIh -zABMJv0ACDEyNTg0NzQxDFN0YXJOaW5lIEtleQtQbGV4dXMgTWFpbgAAAEwAAABMAAAAZAAAAAA2 -UPLgAAoopwkABwAAfAgAhxM1BAA+qqoDCAAHgJsANgAAAAAAUf/gAgICIc0ATCb9AAgxMjYzMjc2 -MwxTdGFyTmluZSBLZXkLUGxleHVzIE1haW4AAACAAAAAgAAAAJgAAAAANlDy4AAKqGyrAAQBAgGq -AAQA1ERgB3AAqwAEAQIBAgCqAAQA1ESgAAgAAIAEAQAABkRBTVJPRACAAf+DAAQAAAAAAAAAAAAQ -AwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAhAXqXCgzNPWcAANFV0EAAAAAAAAAAAAAAAAI -ACvkVjU3AQAAANgAAADYAAAA8AAAAAA2UPLgAAriIP///////wAgSAT9jAgARQAAyu5rAACAEVuP -gW/tSYFv//8AigCKALaE6hEa3RKBb+1JAIoAoAAAIEVCRkdFRkVPRUhFRkZDQ0FDQUNBQ0FDQUNB -Q0FDQUFBACBFTkVGRUVFSkVERUpFT0VGRlBFSkVPRUdFRkVERkVCTgD/U01CJQAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAABEAAAYAAAAAAAAAAADoAwAAAAAAAAAABgBWAAMAAQABAAIAFwBcTUFJ -TFNMT1RcQlJPV1NFAAkEAkMAAAAAAFkAAABZAAAAdAAAAAA2UPLgAAud6wkABwAA4wgAhyomQQBL -qqoDCAAHgJsAQwAAAAAAUf8+AgICIaMALsv9AAxERFMgQ2FsZW5kZXIOTlVEIDIuMCBTZXJ2ZXIS -REVOVEFMIERJQUdOT1NUSUNTAElDAAAAMgAAADIAAABMAAAAADZQ8uAADMf0CQAH////AAUCiMIE -ACSqqgMAAACA8wABgJsGBAABAAUCiMIEAAAlmwAAAAAAAAAAEQMAAAAAAD0AAAA9AAAAWAAAAAA2 -UPLgAA0QjAMAAAAAAQCgJMahRAAv8PADLAD/7woXagAAAGoAVVNFUjcgICAgICAgICAgIFVTRVIy -MCAgICAgICAgIAAAT0YAAACeAAAAngAAALgAAAAANlDy4AANzPwABikhIrsIACCSbaEIAEUQAJDA -6UAA/waunoFvBSmBbwPIABYFsYQKsnI5x+mtgBgnmOL0AAABAQgKArWJxDawHT8AAABQQzK7GGWL -FfSvzj/78Vi/M6AOI3qzsoRDURpkJAx0zwzUFsnRLoNozkAVPgcrcVR9nUXSU5PB5bWi7DWIVcEy -UG0NIWCcQK2k94bGyAj9fTKYnwLBk9WwRjE7AAAAXAAAAFwAAAB0AAAAADZQ8uAADgm5//////// -AGCXCO7wCABFAABOO2QAAIARRkCBb7YcgW///wCJAIkAOkrWpSgBEAABAAAAAAAAIEVGRkFFSkVF -Q0FDQUNBQ0FDQUNBQ0FDQUNBQ0FDQUJPAAAgAAEAAABaAAAAWgAAAHQAAAAANlDy4AAOYiL///// -//8AYJcFDTEATODgA///AEkAAAAAAAD///////+QAQAAAAIAYJcFDTGQAYMbAgAPAQAAEQIAMviV -zAAeACtAAgAy+JXMAsAIAAAAAAAAAADFBAAABdkAAAAAAFwAAABcAAAAdAAAAAA2UPLgAA5+3P// -/////wBglwju8AgARQAATjxkAACAEUVAgW+2HIFv//8AiQCJADr4vqUuARAAAQAAAAAAACBFRkZB -RUpFRUVGRU5FSkVQRU1FUEVIRkpDQUNBQ0FCTAAAIAABAAAAMgAAADIAAABMAAAAADZQ8uAADoQK -CQAH////AAUCKN11ACSqqgMAAACA8wABgJsGBAABAAUCKN11AAAj6QAAAAAAAAAAXJsAAAAAADwA -AAA8AAAAVAAAAAA2UPLgAA64jP///////wAgr88tuwgGAAEIAAYEAAEAIK/PLbuBbwxqAAAAAAAA -gW8BDQ0NDQ0NDQ0NDQ0NDQ0NDQ0NDQAAADIAAAAyAAAATAAAAAA2UPLhAABEvgkAB////wAFAuZc -IAAkqqoDAAAAgPMAAYCbBgQAAQAFAuZcIAAASaIAAAAAAAAAAEoZAAAAAABgAAAAYAAAAHgAAAAA -NlDy4QAAaWMJAAcAAIoAYHDMW4kAUqqqAwgAB4CbAEo52wAAD63/2wL9AiF6D63b/QAVUklDIENh -bGVuZGFyIFNlcnZlciAzDk5VRCAyLjAgU2VydmVyEFJFU0VBUkNIIElNQUdJTkcAAAA8AAAAPAAA -AFQAAAAANlDy4QAAgX0JAAcAACsIAIcavmkALqqqAwgAB4CbACYAAAAAAFH/TgICAiH1D6uN/gAB -PQNOUkwLUEFUSE9MT0dZL0UAAABCAAAAQgAAAFwAAAAANlDy4QAAycUIACCSbaEABikhIrsIAEUA -ADQVSwAAPAZdqoFvA8iBbwUpBbEAFjnH6a2ECrLOgBA+OOw1AAABAQgKNrAdQQK1icQCtQAAAFwA -AABcAAAAdAAAAAA2UPLhAALGPf///////wAgSAT9jAgARQAATvBrAACAEVoLgW/tSYFv//8AiQCJ -ADqQvt0EARAAAQAAAAAAACBFTkVGRUVFSkVERUpFT0VGRlBFSEVKQ0FDQUNBQ0FCTwAAIAABAAAA -XAAAAFwAAAB0AAAAADZQ8uEAA0At////////ACBIBP2MCABFAABO8WsAAIARWQuBb+1JgW///wCJ -AIkAOnqv3QoBEAABAAAAAAAAIEVORUZFRUVKRURFSkVPRUZGUEVKRU9FR0VGRURGRUJMAAAgAAEA -AAA8AAAAPAAAAFQAAAAANlDy4QAEcewJAIeQ//8IAIcKPEUIiRgAaE4AAAAcEFhZUF8wODAwODcw -QTNDNDUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFWAAABVgAAAXAAAAAANlDy4QAEgHP///////8I -AIcKPEUIAEUAAUgAoQAAQBFyroFvA3iBb///AEQAQwE0AAABAQYAAAAAAAAAAACBbwN4AAAAAAAA -AACBbwENCACHCjxFAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB4MGEzYzQ1LnBybQAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABVgAAAD4A -AAA+AAAAWAAAAAA2UPLhAASEMf///////wgAhwo8RQgARQAAMACiAABAEXPFgW8DeIFv//8H0QBF -ABwAAAABeDBhM2M0NS5wcm0Ab2N0ZXQAAAAAAAA8AAAAPAAAAFQAAAAANlDy4QAEhdv///////8A -IEgE/YwIBgABCAAGBAABACBIBP2MgW/tSQAAAAAAAIFvEOXdBAEQAAEAAAAAAAAgRU5FRkUAAAA8 -AAAAPAAAAFQAAAAANlDy4QAEjXP///////8AwE95VMQIBgABCAAGBAABAMBPeVTEgW/tXQAAAAAA -AIFvA3gAAAAAAAAAAAAAAAAAAAAAAAAAAAA8AAAAPAAAAFQAAAAANlDy4QAEj4////////8AADIl -D/8IBgABCAAGBAABAAAyJQ//gW+oQwAAAAAAAIFvA3hFAADMOxwAAH4RAACBb5kJ//8AAABHAAAA -RwAAAGAAAAAANlDy4QAEoIQJAAcAAOUIAIcqJkEAOaqqAwgAB4CbADEAAAAAAFH/PgICAiHEAB+k -/QABPQxTdGFyTmluZSBLZXkNSUJUIDNyZCBmbG9vcgAAAAA8AAAAPAAAAFQAAAAANlDy4QAEsuz/ -//////8AQJU+BBkIBgABCAAGBAABAECVPgQZgW8QoQAAAAAAAIFvA3gAAAAAAAAAAAAAAAAAAAAA -AAAAAAA8AAAAPAAAAFQAAAAANlDy4QAExBf///////8IAGkJAggIBgABCAAGBAABCABpCQIIgW8N -CwAAAAAAAIFvA3iRm7wAi1oPAAAAAAAAVgQBiAYAAAA8AAAAPAAAAFQAAAAANlDy4QAEyJL///// -//8IAGkF+4UIBgABCAAGBAABCABpBfuFgW/nXQAAAAAAAIFvA3gAAAAAAAAAAAAAAAAAAAAAAAAA -AAA8AAAAPAAAAFQAAAAANlDy4QAE1FP///////8IAGkC8Y4IBgABCAAGBAABCABpAvGOgW/nfgAA -AAAAAIFvA3gAAAAAAAAAAAAAAAAAAAAAAAAAAAA8AAAAPAAAAFQAAAAANlDy4QAE2U3///////8I -AGkCsOIIBgABCAAGBAABCABpArDigW+vIAAAAAAAAIFvA3gAAAAAAAAAAAAAAAAAAAAAAAAAAABN -AAAATQAAAGgAAAAANlDy4QAFD5IJAAcAAMYIAIcavnEAP6qqAwgAB4CbADcAAAAAAFH/PQICAiFW -AEpS/QAKRmlyc3RDbGFzcwhGQ1NlcnZlcg5DT01NVU5JVFkgREVOVAAATQAAADwAAAA8AAAAVAAA -AAA2UPLhAAUYPP///////6oABAAmRQgGAAEIAAYEAAGqAAQAJkWBbwELAAAAAAAAgW8DeAAAAAAv -Ly8vLy8ALy8vLy8vLwAAAPMAAADzAAABDAAAAAA2UPLhAAVWl////////wDAtgBS8QgARQAA5cR3 -AAAeEfI6gW/id4Fv//8AigCKANEjLxEKbmOBb+J3AIoAyQAAIEZERkZGQ0VIQ05FREVGRU9GRUZQ -RkRFT0VCRkFDQUFBACBGREZGRkNFSEVGRkNGSkNORURFRkVPRkVGQ0VCRU1CTgD/U01CJQAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAABEAACEAAAAAAAAAAAD/////AAAAAAAAIQBWAAMAAQABAAIA -MgBcTUFJTFNMT1RcQlJPV1NFAAEAgKkDAFNVUkctQ0VOVF9TTkFQAAACAgMIAQABAFWqAAAAAAA8 -AAAAPAAAAFQAAAAANlDy4QAFsvz///////8IAGkC0WIIBgABCAAGBAABCABpAtFigW/wDwAAAAAA -AIFvA3gAAAAAAAAAAAAAAAAAAAAAAAAAAAA5AAAAOQAAAFQAAAAANlDy4QAGGmcJAAf///8IAIcV -hIsAK6qqAwgAB4CbACMAAAAAAFH/lwEBAQBRCJcACoAAY4IACoAAY4IPtYAPtoIAAAAAAAA8AAAA -PAAAAFQAAAAANlDy4QAGMM////////8IAFp1TAAIBgABCAAGBAABCABadUwAgW/ZPgAAAAAAAIFv -A3gAAAAAAAAAAAAAAAAAAAAAAAAAAAAyAAAAMgAAAEwAAAAANlDy4QAGNBcJAAf///8ABQJAlXQA -JKqqAwAAAIDzAAGAmwYEAAEABQJAlXQAAFirAAAAAAAAAABXPwAAAAAATAAAAEwAAABkAAAAADZQ -8uEABpaiCQAHAAB8CACHEzUEAD6qqgMIAAeAmwA2AAAAAABR/+ACAgIhzABMJv0ACDEyNTg0NzQx -DFN0YXJOaW5lIEtleQtQbGV4dXMgTWFpbgAAAEwAAABMAAAAZAAAAAA2UPLhAAaydQkABwAAfAgA -hxM1BAA+qqoDCAAHgJsANgAAAAAAUf/gAgICIc0ATCb9AAgxMjYzMjc2MwxTdGFyTmluZSBLZXkL -UGxleHVzIE1haW4AAAA5AAAAOQAAAFQAAAAANlDy4QAHC9EJAAf///8IAIcTNQQAK6qqAwgAB4Cb -ACMAAAAAAFH/4AEBAQBRCOAACoAAY4IPo4APo4IACoAAY4JydWwAAAA8AAAAPAAAAFQAAAAANlDy -4QAHHn3///////8AEFofFs4IBgABCAAGBAABABBaHxbOgW+2xwAAAAAAAIFvtjIAAAAAAAAAAAAA -AAAAAAAAAAAAAABZAAAAWQAAAHQAAAAANlDy4QAH7OEJAAcAAOMIAIcqJkEAS6qqAwgAB4CbAEMA -AAAAAFH/PgICAiGjAC7L/QAMRERTIENhbGVuZGVyDk5VRCAyLjAgU2VydmVyEkRFTlRBTCBESUFH -Tk9TVElDUwBJQwAAADQAAAA0AAAATAAAAAA2UPLhAAgqewGAwgcHBwgAhxM0dwAmQkIDAAAAAAAA -AggAhwM0dwAAAAAAAggAhwM0d4ABAAAUAAIADwAAAAAyAAAAMgAAAEwAAAAANlDy4QAITF0JAAf/ -//8AYLAEbnoAJKqqAwAAAIDzAAGAmwYEAAEAYLAEbnoAACelAAAAAAAAAABRPf//AAAASgAAAEoA -AABkAAAAADZQ8uEACYQKAQBeAAAKABB7kM0UCABFwAA8AAAAAAJYwi+Bb5QB4AAACgIF784AAAAA -AAAAAAAAAAAAAAABAAEADAEAAQAAAAAPAAQACAsDAQALAwAAAIAAAACAAAAAmAAAAAA2UPLhAAnt -fKsABAEBAaoABAAVRGAHcACrAAQBAQEBAKoABAAVRKAACAAAgAQBAAAGQVJXRU4gAIAB/4MABAAA -AAAAAAAAABADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACEBSed+zw09ZwAA0VXQQAAAAAA -AAAAAAAAAAgAK+YOZzcBAAAAXAAAAFwAAAB0AAAAADZQ8uEACj0v////////AGCXCO7wCABFAABO -QGQAAIARQUCBb7YcgW///wCJAIkAOkrWpSgBEAABAAAAAAAAIEVGRkFFSkVFQ0FDQUNBQ0FDQUNB -Q0FDQUNBQ0FDQUJPAAAgAAEAAAA8AAAAPAAAAFQAAAAANlDy4QAKoG7///////8ABQIuXxYIBgAB -CAAGBAABAAUCLl8WgW8Mnf///////4FvC1EAAAAAAAAAAAAAAAAAAAAAAAAAAABcAAAAXAAAAHQA -AAAANlDy4QAKsnD///////8AYJcI7vAIAEUAAE5BZAAAgBFAQIFvthyBb///AIkAiQA6+L6lLgEQ -AAEAAAAAAAAgRUZGQUVKRUVFRkVORUpFUEVNRVBFSEZKQ0FDQUNBQkwAACAAAQAAAIAAAACAAAAA -mAAAAAA2UPLhAArab6sABAECAaoABAALRGAHcACrAAQBAgECAKoABAALRKAACAAAgAQBAAAGRFJP -R08gAIAB/4MABAAAAAAAAAAAAB8DAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACEBaM7qD40 -9ZwAA0VUQQAAAAAAAAAAAAAAAAgAKxah6SIAAAAAXAAAAFwAAAB0AAAAADZQ8uEACuFl//////// -ACBIBP2MCABFAABO+msAAIARUAuBb+1JgW///wCJAIkAOnqp3RABEAABAAAAAAAAIEVORUZFRUVK -RURFSkVPRUZGUEVKRU9FR0VGRURGRUJMAAAgAAEAAABXAAAAVwAAAHAAAAAANlDy4QALhL0JAAcA -AIgIAIcTNQQASaqqAwgAB4CbAEEAAAAAAFH/4AICAiFNADZk/gAQU1VSR0VSWS1SRVNFQVJDSBNT -QU1VUkFJIExhc2VyV3JpdGVyB1NVUkdFUlkAAAAAPAAAADwAAABUAAAAADZQ8uEADBmH//////// -AKDJ0cO3CAYAAQgABgQAAQCgydHDt4FvC4YAAAAAAACBb+0sAAAAAAAAAAAAAAAAAAAAAAAAAAAA -RwAAAEcAAABgAAAAADZQ8uEADBorAwAAAAABAKDJ0cO3ADnw8AMsAP/vCAAAAAAAAABKU1BOUk1Q -VEdTQlNTRElSUFJPUzA5ICAgICAgICAgBgCgydHDtxQAgDEAAAAAYAAAAGAAAAB4AAAAADZQ8uEA -DO78CQAHAACKAGBwzFuJAFKqqgMIAAeAmwBKOdsAAA+t/9sC/QIheg+t2/0AFVJJQyBDYWxlbmRh -ciBTZXJ2ZXIgMw5OVUQgMi4wIFNlcnZlchBSRVNFQVJDSCBJTUFHSU5HAAAAVwAAAFcAAABwAAAA -ADZQ8uEADVOdCQArAAAPCACHA1rCYAQoCAUFBQL+/+4FHgIgEAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAFWFNDVEIFWFNDVEIB4QpNT0RFTU1BSU5UAAEBAAAAAK4AAACuAAAAyAAAAAA2 -UPLhAA4biQAGKSEiuwgAIJJtoQgARRAAoMDqQAD/Bq6NgW8FKYFvA8gAFgWxhAqyzjnH6a2AGCeY -gGEAAAEBCAoCtYoqNrAdQQAAAGCZRKcA0y9yXylry6qzMgcdW5/op4Y89ggkcmuQkGqIyRWo8+2Q -1nauyEbFfbp3emxWeDWA9Jxj6toyeVsx8kwdTIxdRtGaDgLyNGTeiKMssgjHBnk82HTD4hNkNEt4 -q5QOYy87LopYKQQAAAAA2AAAANgAAADwAAAAADZQ8uEADoYM////////AGCXCO7wCABFAADKQmQA -AIARPsSBb7YcgW///wCKAIoAtvlmERqlNoFvthwAigCgAAAgRUlFRkZDRUNFUEVNRUVERENBQ0FD -QUNBQ0FDQUNBQUEAIEVGRkFFSkVFRUZFTkVKRVBFTUVQRUhGSkNBQ0FDQUJOAP9TTUIlAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAEQAABgAAAAAAAAAAAOgDAAAAAAAAAAAGAFYAAwABAAEAAgAX -AFxNQUlMU0xPVFxCUk9XU0UACQQ4AwAAAAAAgAAAAIAAAACYAAAAADZQ8uEADqN6qwAEAQEBqgAE -AAxEYAdwAKsABAEBAQEAqgAEAAxEoAAIAACABAEAAAZCQUxJTiAAgAH/gwAEAAAAAAAAAAAAHwMA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIQFQOJxNjT1nAADRVhBAAAAAAAAAAAAAAAACAAr -L6z8JAAAAACAAAAAgAAAAJgAAAAANlDy4QAOqTWrAAQBAQGqAAQAB0RgB3AAqwAEAQEBAQCqAAQA -B0SgAAgAAIAEAQAABkJJTEJPIACAAf+DAAQAAAAAAAAAAAAQAwAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAhAXMQic7NPWcAANFV0EAAAAAAAAAAAAAAAAAAPgDZX9NAQAAADoAAAA6AAAAVAAA -AAA2UPLiAAAH+v///////wCgyRaeFAAs4OAD//8AKAABAAAAAv///////wRTAAAAAgCgyRaeFART -AAIAAAAJAAEAAgB+fgAAAIcAAACHAAAAoAAAAAA2UPLiAAFXsqsAAAIAAAgAhwE7DWACdwAHAAAA -AQADAwAAAgAC4QADAAYAAAAAAAAEAAIKAAUAAoQABgAC/wAHAAYIAIcBOw1kAAEhZQAIAAAAAAAA -AABmAARCTDIwZwAEQkwyOWgAAgAAaQAFWFBDUjNqABFNU1JEUCBQcmludGVyIHNlcpABAQGRAQLu -BQAAAABCAAAAQgAAAFwAAAAANlDy4gABjN4IACCSbaEABikhIrsIAEUAADQVTQAAPAZdqIFvA8iB -bwUpBbEAFjnH6a2ECrM6gBA+OOthAAABAQgKNrAdQwK1iioCtQAAADwAAAA8AAAAVAAAAAA2UPLi -AAG16f///////wAQS6+xhwgGAAEIAAYEAAEAEEuvsYeBb6hEAAAAAAAAgW8FawVrBWsFawVrBWsF -awVrBWsFawAAAE0AAABNAAAAaAAAAAA2UPLiAAIEZQkABwAAxggAhxq+cQA/qqoDCAAHgJsANwAA -AAAAUf89AgICIVYASlL9AApGaXJzdENsYXNzCEZDU2VydmVyDkNPTU1VTklUWSBERU5UAABNAAAA -RwAAAEcAAABgAAAAADZQ8uIAAiYpCQAHAADlCACHKiZBADmqqgMIAAeAmwAxAAAAAABR/z4CAgIh -xAAfpP0AAT0MU3Rhck5pbmUgS2V5DUlCVCAzcmQgZmxvb3IAAAAAYgAAAGIAAAB8AAAAADZQ8uIA -AqI5CAArLbiOqgAEAAJEgEFUAAYAAAAIACvihkwAAAAAAwEDAwEGWklOR0VSAAAAAAAAAAAAAAAA -AAAAAAAANRAAAAD/U01CJQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABEAAAAAADQAAAA0AAAA -TAAAAAA2UPLiAAM1jgGAwgAAAABgPslPOwAmQkIDAAAAAAAAAQDgoz7BAAAAAAAAAQDgoz7BAIFE -AAAUAAIADwAAAAA8AAAAPAAAAFQAAAAANlDy4gADl2QJAAcAACsIAIcavmkALqqqAwgAB4CbACYA -AAAAAFH/TgICAiH1D6uN/gABPQNOUkwLUEFUSE9MT0dZL0UAAABMAAAATAAAAGQAAAAANlDy4gAD -pOEJAAcAAHwIAIcTNQQAPqqqAwgAB4CbADYAAAAAAFH/4AICAiHMAEwm/QAIMTI1ODQ3NDEMU3Rh -ck5pbmUgS2V5C1BsZXh1cyBNYWluAAAATAAAAEwAAABkAAAAADZQ8uIAA8KtCQAHAAB8CACHEzUE -AD6qqgMIAAeAmwA2AAAAAABR/+ACAgIhzQBMJv0ACDEyNjMyNzYzDFN0YXJOaW5lIEtleQtQbGV4 -dXMgTWFpbgAAAFkAAABZAAAAdAAAAAA2UPLiAARGfAkABwAA4wgAhyomQQBLqqoDCAAHgJsAQwAA -AAAAUf8+AgICIaMALsv9AAxERFMgQ2FsZW5kZXIOTlVEIDIuMCBTZXJ2ZXISREVOVEFMIERJQUdO -T1NUSUNTAElDAAAA8wAAAPMAAAEMAAAAADZQ8uIABGKl////////AMBPuvt0CABFAADlRzgAAIAR -QtOBb60egW///wCKAIoA0bhhEQKJQIFvrR4AigC7AAAgRU5GREZDRUVGQUREREFDQUNBQ0FDQUNB -Q0FDQUNBQUEAIEVORkRGQ0VFRkFDQUNBQ0FDQUNBQ0FDQUNBQ0FDQUJOAP9TTUIlAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAEQAAIQAAAAAAAAAAAOgDAAAAAAAAAAAhAFYAAwABAAEAAgAyAFxN -QUlMU0xPVFxCUk9XU0UAAQCA/AoATVNSRFAzMAAAAAAAAAAAAAQAAxADAA8BVaoAAAAAALQAAAC0 -AAAAzAAAAAA2UPLiAARlSQMAAAAAAQDAT7r7dACm8PADLAD/7wgAAAAAAAAATVNSRFAgICAgICAg -ICAgHU1TUkRQMzAgICAgICAgIAD/U01CJQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABEAACEA -AAAAAAAAAADoAwAAAAAAAAAAIQBWAAMAAQABAAIAMgBcTUFJTFNMT1RcQlJPV1NFAAEAgPwKAE1T -UkRQMzAAAAAAAAAAAAAEAAMQAQAPAVWqAAAAADkAAAA5AAAAVAAAAAA2UPLiAAXKfwkAB////wgA -hxq+cQArqqoDCAAHgJsAIwAAAAAAUf89AQEBAFEIPQAKgABjgg+kgA+kggAKgABjgkNBQwAAAFwA -AABcAAAAdAAAAAA2UPLiAAZwp////////wBglwju8AgARQAATkRkAACAET1AgW+2HIFv//8AiQCJ -ADpK1qUoARAAAQAAAAAAACBFRkZBRUpFRUNBQ0FDQUNBQ0FDQUNBQ0FDQUNBQ0FCTwAAIAABAAAA -XAAAAFwAAAB0AAAAADZQ8uIABuXr////////AGCXCO7wCABFAABORWQAAIARPECBb7YcgW///wCJ -AIkAOvi+pS4BEAABAAAAAAAAIEVGRkFFSkVFRUZFTkVKRVBFTUVQRUhGSkNBQ0FDQUJMAAAgAAEA -AABcAAAAXAAAAHQAAAAANlDy4gAHEJr///////8AIEgE/YwIAEUAAE78awAAgBFOC4Fv7UmBb/// -AIkAiQA6eqndEAEQAAEAAAAAAAAgRU5FRkVFRUpFREVKRU9FRkZQRUpFT0VHRUZFREZFQkwAACAA -AQAAADIAAAAyAAAATAAAAAA2UPLiAAep+wkAB////wgAB/fXPwAkqqoDAAAAgPMAAYCbBgQAAQgA -B/fXPwAAVz8AAAAAAAAAAB6tAAAAAAA8AAAAPAAAAFQAAAAANlDy4gAIss2rAAADAACqAAQAEEVg -AyIADQIAAKoABAAQRQPaBQAAAAAAAAAAAKoABADKRA8AAAKqqgAAAAAAAAAAAAAAAABgAAAAYAAA -AHgAAAAANlDy4gAKOtEJAAcAAIoAYHDMW4kAUqqqAwgAB4CbAEo52wAAD63/2wL9AiF6D63b/QAV -UklDIENhbGVuZGFyIFNlcnZlciAzDk5VRCAyLjAgU2VydmVyEFJFU0VBUkNIIElNQUdJTkcAAAA5 -AAAAOQAAAFQAAAAANlDy4gAK1qgJAAf///8IAIcavmgAK6qqAwgAB4CbACMAAAAAAD///QEBAQA/ -CP0ACoAAY4IPpYAPpYIACoAAY4JydmUAAADYAAAA2AAAAPAAAAAANlDy4gAK5HT///////8AIEgE -/YwIAEUAAMr+awAAgBFLj4Fv7UmBb///AIoAigC2oN0RGt0cgW/tSQCKAKAAACBFQkZHRUZFT0VI -RUZGQ0NBQ0FDQUNBQ0FDQUNBQ0FBQQAgRURFUEVORkFGUEZDRUZGREVQRkZGQ0VERUZGRENBQk4A -/1NNQiUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAARAAAGAAAAAAAAAAAA6AMAAAAAAAAAAAYA -VgADAAEAAQACABcAXE1BSUxTTE9UXEJST1dTRQAJBANDAAAAAACAAAAAgAAAAJgAAAAANlDy4gAM -Xq6rAAQBAgGqAAQA1ERgB3AAqwAEAQIBAgCqAAQA1ESgAAgAAIAEAQAABkRBTVJPRACAAf+DAAQA -AAAAAAAAAAAQAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAhAWJp2o0NPWcAANFV0EAAAAA -AAAAAAAAAAAIACvkVjU3AQAAAJYAAACWAAAAsAAAAAA2UPLiAA5pNgAGKSEiuwgAIJJtoQgARRAA -iMDrQAD/Bq6kgW8FKYFvA8gAFgWxhAqzOjnH6a2AGCeYko4AAAEBCAoCtYqQNrAdQwAAAEzLML1u -LhCjeZFbq89wJoV5Cr/5hW4uqla4fw9z9PN8kgssjEtaNSPYK7t0pjyxCn0TGLvwJ+yV3XR+ND1L -J+xTDnLRsYcRv3llMoF6SFjLWgjHAAAAXAAAAFwAAAB0AAAAADZQ8uIADooA////////AGCXCO7w -CABFAABORmQAAIARO0CBb7YcgW///wCJAIkAOvi4pTQBEAABAAAAAAAAIEVGRkFFSkVFRUZFTkVK -RVBFTUVQRUhGSkNBQ0FDQUJMAAAgAAEAAABHAAAARwAAAGAAAAAANlDy4gAOsGwJAAcAAOUIAIcq -JkEAOaqqAwgAB4CbADEAAAAAAFH/PgICAiHEAB+k/QABPQxTdGFyTmluZSBLZXkNSUJUIDNyZCBm -bG9vcgAAAABcAAAAXAAAAHQAAAAANlDy4gAOv2L///////8AEFofFs4IAEUAAE43vwAAgBFJOoFv -tseBb///AIkAiQA6Byi5CgEQAAEAAAAAAAAgRUtGREZBRU9GQ0VORkFGRUVIRkRFQ0ZERkRFRUVK -RkMAACAAAQAAAFkAAABZAAAAdAAAAAA2UPLjAACMLgkABwAA4wgAhyomQQBLqqoDCAAHgJsAQwAA -AAAAUf8+AgICIaMALsv9AAxERFMgQ2FsZW5kZXIOTlVEIDIuMCBTZXJ2ZXISREVOVEFMIERJQUdO -T1NUSUNTAElDAAAATAAAAEwAAABkAAAAADZQ8uMAAMEeCQAHAAB8CACHEzUEAD6qqgMIAAeAmwA2 -AAAAAABR/+ACAgIhzABMJv0ACDEyNTg0NzQxDFN0YXJOaW5lIEtleQtQbGV4dXMgTWFpbgAAAEwA -AABMAAAAZAAAAAA2UPLjAADc9AkABwAAfAgAhxM1BAA+qqoDCAAHgJsANgAAAAAAUf/gAgICIc0A -TCb9AAgxMjYzMjc2MwxTdGFyTmluZSBLZXkLUGxleHVzIE1haW4AAABCAAAAQgAAAFwAAAAANlDy -4wACUBkIACCSbaEABikhIrsIAEUAADQVTwAAPAZdpoFvA8iBbwUpBbEAFjnH6a2ECrOOgBA+OOql -AAABAQgKNrAdRQK1ipACtQAAAK4AAACuAAAAyAAAAAA2UPLjAALfzv///////wAgr0+FSACg//8A -oAAEEwAAAf///////wRSEwAAAQAgr0+FSARSAAIGQFdJTExJQU1TUkYtMQAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAoAAAAAAAHohQABBk5XSUxMSUFNU1JGLTEhISFBNTU2 -OUIyMEFCRTUxMUNFOUNBNDAwMDA0Qzc2MjgzMgAAAAAKAAAAAAABQBgAAQCuAAAAXAAAAFwAAAB0 -AAAAADZQ8uMAA0AT////////ACBIBP2MCABFAABOAWwAAIARSQuBb+1JgW///wCJAIkAOnqp3RAB -EAABAAAAAAAAIEVORUZFRUVKRURFSkVPRUZGUEVKRU9FR0VGRURGRUJMAAAgAAEAAABYAAAAWAAA -AHAAAAAANlDy4wADze7///////8AYJcFDTEASv//AEkAAAAAAAD///////+QARMAAAEAYJcFDTGQ -AYMbAgAPAQAAEQIAMviVzAAeACtAAgAy+JXMAcAIAAAAAAAAAADFBAAABdwAAAAAPAAAADwAAABU -AAAAADZQ8uMABHeR////////AKDJ0cO3CAYAAQgABgQAAQCgydHDt4FvC4YAAAAAAACBbxLLAAAA -AAAAAAAAAAAAAAAAAAAAAAAAgAAAAIAAAACYAAAAADZQ8uMABN/gqwAEAQIBqgAEAAtEYAdwAKsA -BAECAQIAqgAEAAtEoAAIAACABAEAAAZEUk9HTyAAgAH/gwAEAAAAAAAAAAAAHwMAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAIQFo1+cPzT1nAADRVRBAAAAAAAAAAAAAAAACAArFqHpIgAAAAA8 -AAAAPAAAAFQAAAAANlDy4wAGryEJAAcAACsIAIcavmkALqqqAwgAB4CbACYAAAAAAFH/TgICAiH1 -D6uN/gABPQNOUkwLUEFUSE9MT0dZL0UAAABgAAAAYAAAAHgAAAAANlDy4wAHdeMJAAcAAIoAYHDM -W4kAUqqqAwgAB4CbAEo52wAAD63/2wL9AiF6D63b/QAVUklDIENhbGVuZGFyIFNlcnZlciAzDk5V -RCAyLjAgU2VydmVyEFJFU0VBUkNIIElNQUdJTkcAAAA0AAAANAAAAEwAAAAANlDy4wAIKh0BgMIH -BwcIAIcTNHcAJkJCAwAAAAAAAAIIAIcDNHcAAAAAAAIIAIcDNHeAAQAAFAACAA8AAAAA+wAAAPsA -AAEUAAAAADZQ8uMACKhI////////ACCvOZcwCABFAADt5wAAACARBB6Bb6wDgW///wCKAIoA2Y+N -EQIATYFvrAMAigDDAAAgRkRFRUZBRkNFQkVDRUlGRkNBQ0FDQUNBQ0FDQUNBQUEAIEVFRUZGQUZF -Q0FFUEVHQ0FFREVCRkNFRUNBQ0FDQUJOAP9TTUIlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -EQAAKQAAAAAAAAAAAAAAAAAAAAAAAAApAFYAAwABAAEAAgA6AFxNQUlMU0xPVFxCUk9XU0UAAUOg -uw0AU0RQUkFCSFUAAAAAAAAAAAQAAyBBABUEVapTRFBSQUJIVQAAAAAAPAAAADwAAABUAAAAADZQ -8uMACLNU////////AAAyJQ//CAYAAQgABgQAAQAAMiUP/4FvqEMAAAAAAACBb6wDRQAAzDscAAB+ -EQAAgW+ZCf//AAAA+gAAAPoAAAEUAAAAADZQ8uMACNlP////////ACCv1f40CABFAADsy2wAACAR -vleBbw1fgW///wCKAIoA2DC2EQIE+YFvDV8AigDCAAAgRUdGQ0VGRUZFTkVCRU9DQUNBQ0FDQUNB -Q0FDQUNBQUEAIEVFRUZGQUZFQ0FFUEVHQ0FFREVCRkNFRUNBQ0FDQUJOAP9TTUIlAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAEQAAKAAAAAAAAAAAAAAAAAAAAAAAAAAoAFYAAwABAAEAAgA5AFxN -QUlMU0xPVFxCUk9XU0UAAfaguw0ARlJFRU1BTgAAAAAAAAAAAAQAAyBBABUEVapwZW50aXVtAGl1 -AAAAPAAAADwAAABUAAAAADZQ8uMACORY////////AAAyJQ//CAYAAQgABgQAAQAAMiUP/4FvqEMA -AAAAAACBbw1fRQAAzDscAAB+EQAAgW+ZCf//AAAA/AAAAPwAAAEUAAAAADZQ8uMACYjI//////// -AKDJH1qBCABFAADuJAMAACARxxWBb6wIgW///wCKAIoA2qgYEQIAXoFvrAgAigDEAAAgRU5FUEVQ -RUVGSkNBQ0FDQUNBQ0FDQUNBQ0FDQUNBQUEAIEVFRUZGQUZFQ0FFUEVHQ0FFREVCRkNFRUNBQ0FD -QUJOAP9TTUIlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEQAAKgAAAAAAAAAAAAAAAAAAAAAA -AAAqAFYAAwABAAEAAgA7AFxNQUlMU0xPVFxCUk9XU0UAAVGguw0ATU9PRFkAAAAAAAAAAAAAAAQA -AyJBABUEVapqIG0gbW9vZHkAAAAAPAAAADwAAABUAAAAADZQ8uMACZPW////////AAAyJQ//CAYA -AQgABgQAAQAAMiUP/4FvqEMAAAAAAACBb6wIRQAAzDscAAB+EQAAgW+ZCf//AAAATgAAAE4AAABo -AAAAADZQ8uMAChHTCQAHAACICACHGr5oAECqqgMIAAeAmwA4AAAAAAA///0CAgIhTgA2ZP4AD1NV -UkdFUlktUExBU1RJQwtMYXNlcldyaXRlcgdTVVJHRVJZAE4AAABOAAAATgAAAGgAAAAANlDy4wAK -N4kJAAcAAIgIAIcavmgAQKqqAwgAB4CbADgAAAAAAD///QICAiFPADZk/gAPU1VSR0VSWS1DQVJE -SUFDC0xhc2VyV3JpdGVyB1NVUkdFUlkATgAAAE4AAABOAAAAaAAAAAA2UPLjAApcJQkABwAAiAgA -hxq+aABAqqoDCAAHgJsAOAAAAAAAP//9AgICIVAANmT+AA9TVVJHRVJZLUFETUlOIDELTGFzZXJX -cml0ZXIHU1VSR0VSWQBOAAAAXAAAAFwAAAB0AAAAADZQ8uMACr2A////////AGCXCO7wCABFAABO -TGQAAIARNUCBb7YcgW///wCJAIkAOvi4pTQBEAABAAAAAAAAIEVGRkFFSkVFRUZFTkVKRVBFTUVQ -RUhGSkNBQ0FDQUJMAAAgAAEAAABcAAAAXAAAAHQAAAAANlDy4wAK4Qr///////8AIEgE/YwIAEUA -AE4CbAAAgBFIC4Fv7UmBb///AIkAiQA6eqPdFgEQAAEAAAAAAAAgRU5FRkVFRUpFREVKRU9FRkZQ -RUpFT0VHRUZFREZFQkwAACAAAQAAAFwAAABcAAAAdAAAAAA2UPLjAArukf///////wAQWh8WzggA -RQAATjm/AACAEUc6gW+2x4Fv//8AiQCJADoHKLkKARAAAQAAAAAAACBFS0ZERkFFT0ZDRU5GQUZF -RUhGREVDRkRGREVFRUpGQwAAIAABAAAAMwAAADMAAABMAAAAADZQ8uMAC22i////////CAARCFdl -ACXg4AP//wAiAAQAAAAC////////BFIAAAACCAARCFdlRZEAAwAEUgAAADAAAAAwAAAASAAAAAA2 -UPLjAAtuYP///////wgAEQhXZQAi//8AIgAEEwAAAf///////wRSEwAAAQgAEQhXZUWRAAMABAAA -ADwAAAA8AAAAVAAAAAA2UPLjAAtvA////////wgAEQhXZYE3//8AIgAEAAAAAP///////wRSAAAA -AAgAEQhXZUWRAAMABAMABFIAAgU1VEVLMAAAADgAAAA4AAAAUAAAAAA2UPLjAAtvrf///////wgA -EQhXZQAqqqoDAAAAgTf//wAiAAQAAAAA////////BFIAAAAACAARCFdlRZEAAwAEAAAARwAAAEcA -AABgAAAAADZQ8uMAC/rQCQAHAADlCACHKiZBADmqqgMIAAeAmwAxAAAAAABR/z4CAgIhxAAfpP0A -AT0MU3Rhck5pbmUgS2V5DUlCVCAzcmQgZmxvb3IAAAAAWQAAAFkAAAB0AAAAADZQ8uMADCElCQAH -AADjCACHKiZBAEuqqgMIAAeAmwBDAAAAAABR/z4CAgIhowAuy/0ADEREUyBDYWxlbmRlcg5OVUQg -Mi4wIFNlcnZlchJERU5UQUwgRElBR05PU1RJQ1MASUMAAAD4AAAA+AAAARAAAAAANlDy4wAMQhv/ -//////8AgK24bvEIAEUAAOqG/AAAgBGgvYFvD2uBb///AIoAigDWbykRAgd+gW8PawCKAMAAACBF -SEVCRkNFREVKRUJDQUNBQ0FDQUNBQ0FDQUNBQ0FBQQAgRUVFRkZBRkVDQUVQRUdDQUVERUJGQ0VF -Q0FDQUNBQk4A/1NNQiUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAARAAAmAAAAAAAAAAAAAAAA -AAAAAAAAACYAVgADAAEAAQACADcAXE1BSUxTTE9UXEJST1dTRQABr6C7DQBHQVJDSUEAAAAAAAAA -AAAABAADIkEAFQRVqlVTRVIxAAAAAEwAAABMAAAAZAAAAAA2UPLjAA0VkQkABwAAfAgAhxM1BAA+ -qqoDCAAHgJsANgAAAAAAUf/gAgICIc0ATCb9AAgxMjYzMjc2MwxTdGFyTmluZSBLZXkLUGxleHVz -IE1haW4AAABMAAAATAAAAGQAAAAANlDy4wANMZgJAAcAAHwIAIcTNQQAPqqqAwgAB4CbADYAAAAA -AFH/4AICAiHMAEwm/QAIMTI1ODQ3NDEMU3Rhck5pbmUgS2V5C1BsZXh1cyBNYWluAAAAgAAAAIAA -AACYAAAAADZQ8uMADWAQqwAEAQEBqgAEAAdEYAdwAKsABAEBAQEAqgAEAAdEoAAIAACABAEAAAZC -SUxCTyAAgAH/gwAEAAAAAAAAAAAAEAMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIQFC5VL -PDT1nAADRVdBAAAAAAAAAAAAAAAAAAD4A2V/TQEAAADYAAAA2AAAAPAAAAAANlDy4wAOlPP///// -//8AYJcI7vAIAEUAAMpOZAAAgBEyxIFvthyBb///AIoAigC2QGMRGqVAgW+2HACKAKAAACBFSUVG -RkNFQ0VQRU1FRUREQ0FDQUNBQ0FDQUNBQ0FBQQAgRkRGQUVJRkRGRUVCRUdFR0NBQ0FDQUNBQ0FD -QUNBQk4A/1NNQiUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAARAAAGAAAAAAAAAAAA6AMAAAAA -AAAAAAYAVgADAAEAAQACABcAXE1BSUxTTE9UXEJST1dTRQAJBDkDAAAAAACmAAAApgAAAMAAAAAA -NlDy4wAOt7kABikhIrsIACCSbaEIAEUQAJjA7EAA/wauk4FvBSmBbwPIABYFsYQKs445x+mtgBgn -mEFQAAABAQgKArWK9jawHUUAAABaVN6L2lMh6zY8OuUOfQ6CmZeR8mMPbU9T0GhpcPxZHN7bzege -o4KJEF/4clymm1N6SxJCQKb3LHmeqmUMZTZMjQmOprFqgn3fCus6qHpO53pafpuWWYmmhMvMcS5Y -/aO9DmMAAABCAAAAQgAAAFwAAAAANlDy4wAPIUQIACCSbaEABikhIrsIAEUAADQVUQAAPAZdpIFv -A8iBbwUpBbEAFjnH6a2ECrPygBA+OOnZAAABAQgKNrAdRwK1ivYCtQAAAIAAAACAAAAAmAAAAAA2 -UPLkAAD8MqsABAEBAaoABAAVRGAHcACrAAQBAQEBAKoABAAVRKAACAAAgAQBAAAGQVJXRU4gAIAB -/4MABAAAAAAAAAAAABADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACEBW/yaz409ZwAA0VX -QQAAAAAAAAAAAAAAAAgAK+YOZzcBAAAAkgAAAJIAAACsAAAAADZQ8uQAAUBk////////AKDJ8l4d -CABFAACENGsAAEARMdWBbxFLgW///wXlAG8AcJZoNldUegAAAAAAAAACAAGGoAAAAAIAAAAFAAAA -AQAAACA2UPPOAAAAC2hhbnNlbi1sYWIzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGpAAAAAIAAAAC -AAAAEAAAAAxiaW9jaGVtaXN0cnkAAAAAAEsAAABLAAAAZAAAAAA2UPLkAAFxsAkAB////wgAhxq+ -aQA9qqoDCAAHgJsANQAAAAAAUf9OAQEBAFEITgAKgABjggAFgQAFggAGgQAGggAHgQAHgg+rgA+r -ggAKgABjggAAAABeAAAAXgAAAHgAAAAANlDy5AACM9f///////8AIK85l/wAUP//AFAABAAAAAD/ -//////8EVRMAAAEAIK85l/wEVRABJwBNAAQAIwAAACMAAwAGAEVQSURFTUlPTE9HWSAgAAEBAl9f -TVNCUk9XU0VfXwIBAF4AAAA8AAAAPAAAAFQAAAAANlDy5AACeZn///////8AEFoKQzkIBgABCAAG -BAABABBaCkM5gW+yEQAAAAAAAIFv9ikAAAAAAAAAAAAAAAAAAAAAAAAAAABiAAAAYgAAAHwAAAAA -NlDy5AACqXUIACstuI6qAAQAAkSAQVQABgAAAAgAK+KGTAAAAAADAQMDAQZaSU5HRVIAAAAAAAAA -AAAAAAAAAAAAAAA1EAAAAP9TTUIlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEQAAAAAAPAAA -ADwAAABUAAAAADZQ8uQAAwdI////////ABBaCkM5CAYAAQgABgQAAQAQWgpDOYFvshEAAAAAAACB -b/Y6AAAAAAAAAAAAAAAAAAAAAAAAAAAANAAAADQAAABMAAAAADZQ8uQAAybOAYDCAAAAAGA+yU87 -ACZCQgMAAAAAAAABAOCjPsEAAAAAAAABAOCjPsEAgUQAABQAAgAPAAAAADwAAAA8AAAAVAAAAAA2 -UPLkAAOMyv///////wAQWgpDOQgGAAEIAAYEAAEAEFoKQzmBb7IRAAAAAAAAgW/2OQAAAAAAAAAA -AAAAAAAAAAAAAAAAAGAAAABgAAAAeAAAAAA2UPLkAAS4RAkABwAAigBgcMxbiQBSqqoDCAAHgJsA -SjnbAAAPrf/bAv0CIXoPrdv9ABVSSUMgQ2FsZW5kYXIgU2VydmVyIDMOTlVEIDIuMCBTZXJ2ZXIQ -UkVTRUFSQ0ggSU1BR0lORwAAADIAAAAyAAAATAAAAAA2UPLkAAT6fwkAB////wAFAoYeAQAkqqoD -AAAAgPMAAYCbBgQAAQAFAoYeAQAAEdUAAAAAAAAAAEUDAAAAAACSAAAAkgAAAKwAAAAANlDy5AAF -v+D///////8AoMnyXh0IAEUAAIQ0bwAAQBEx0YFvEUuBb///BeYAbwBwGds2XNEBAAAAAAAAAAIA -AYagAAAAAgAAAAUAAAABAAAAIDZQ884AAAALaGFuc2VuLWxhYjMAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAYakAAAAAgAAAAIAAAAQAAAADGJpb2NoZW1pc3RyeQAAAAAAPAAAADwAAABUAAAAADZQ8uQA -BhyD////////AGAIKz/qCAYAAQgABgQAAQBgCCs/6oFv3SEAAAAAAACBbwENAQ0BDQENAQ0BDQEN -AQ0BDQENAAAAXAAAAFwAAAB0AAAAADZQ8uQABvEa////////AGCXCO7wCABFAABOUWQAAIARMECB -b7YcgW///wCJAIkAOvi4pTQBEAABAAAAAAAAIEVGRkFFSkVFRUZFTkVKRVBFTUVQRUhGSkNBQ0FD -QUJMAAAgAAEAAACAAAAAgAAAAJgAAAAANlDy5AAG/cirAAQBAQGqAAQADERgB3AAqwAEAQEBAQCq -AAQADESgAAgAAIAEAQAABkJBTElOIACAAf+DAAQAAAAAAAAAAAAfAwAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAhAWAWu83NPWcAANFWEEAAAAAAAAAAAAAAAAIACsvrPwkAAAAAFwAAABcAAAA -dAAAAAA2UPLkAAcQWP///////wAgSAT9jAgARQAATgRsAACAEUYLgW/tSYFv//8AiQCJADp6o90W -ARAAAQAAAAAAACBFTkVGRUVFSkVERUpFT0VGRlBFSkVPRUdFRkVERkVCTAAAIAABAAAAXAAAAFwA -AAB0AAAAADZQ8uQABx3O////////ABBaHxbOCABFAABOPr8AAIARQjqBb7bHgW///wCJAIkAOgco -uQoBEAABAAAAAAAAIEVLRkRGQUVPRkNFTkZBRkVFSEZERUNGREZERUVFSkZDAAAgAAEAAABHAAAA -RwAAAGAAAAAANlDy5AAH/vgDAAAAAAEAoMkWnhQAOfDwAywA/+8IAAAAAAAAAEpTUE5STVBUR1NC -U1NESVJGQU1QMzMgICAgICAgICAGAKDJFp4U+HcAAAAAAABZAAAAWQAAAHQAAAAANlDy5AAIcsgJ -AAcAAOMIAIcqJkEAS6qqAwgAB4CbAEMAAAAAAFH/PgICAiGjAC7L/QAMRERTIENhbGVuZGVyDk5V -RCAyLjAgU2VydmVyEkRFTlRBTCBESUFHTk9TVElDUwBJQwAAADwAAAA8AAAAVAAAAAA2UPLkAAir -A6sAAAMAAKoABAAMRGADKQALAgAAqgAEAAxEAtoFQAAPAAAWAAAAAAAAAA6qAAQAykTAqgAEAAtE -wAACAAAAAEcAAABHAAAAYAAAAAA2UPLkAAlKwgkABwAA5QgAhyomQQA5qqoDCAAHgJsAMQAAAAAA -Uf8+AgICIcQAH6T9AAE9DFN0YXJOaW5lIEtleQ1JQlQgM3JkIGZsb29yAAAAADwAAAA8AAAAVAAA -AAA2UPLkAAnL/AkABwAAKwgAhxq+aQAuqqoDCAAHgJsAJgAAAAAAUf9OAgICIfUPq43+AAE9A05S -TAtQQVRIT0xPR1kvRQAAAEwAAABMAAAAZAAAAAA2UPLkAAoyCgkABwAAfAgAhxM1BAA+qqoDCAAH -gJsANgAAAAAAUf/gAgICIc0ATCb9AAgxMjYzMjc2MwxTdGFyTmluZSBLZXkLUGxleHVzIE1haW4A -AABMAAAATAAAAGQAAAAANlDy5AAKTs0JAAcAAHwIAIcTNQQAPqqqAwgAB4CbADYAAAAAAFH/4AIC -AiHMAEwm/QAIMTI1ODQ3NDEMU3Rhck5pbmUgS2V5C1BsZXh1cyBNYWluAAAAbgAAAG4AAACIAAAA -ADZQ8uQAC4Dz////////AKDJJFTBAGD//wBgAAQTAAAB////////BFITAAABAKDJJFTBQAgAAgZA -Uk9PTS01MThGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEwAAAQCgySRU -weiFAAEAbgAAAFwAAABcAAAAdAAAAAA2UPLkAAwY7P///////wCgydHDtwgARQAATo9YAACAEZzi -gW8LhoFv//8AiQCJADp/zeumARAAAQAAAAAAACBFS0ZERkFFT0ZDRU5GQUZFRUhGREVDRkRGREVF -RUpGQwAAIAABAAAAcgAAAHIAAACMAAAAADZQ8uQADXsU////////CAAHpMrmAGTg4AP//wBgAAQA -AAAC////////BFIAAAACCAAHpMrmBFIAAgYYQVBQTEVfTFdhNGNhZTYAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAggAB6TK5kALAAExCwAAAABeAAAAXgAAAHgAAAAANlDy5AAO -Kf3///////8AIK85l/wAUP//AFAAFAAAAAD///////8EVRMAAAEAIK85l/wEVRABJwBNAAQAIwAA -ACMAAwAGAEVQSURFTUlPTE9HWSAgAAEBAl9fTVNCUk9XU0VfXwIBAF4AAABiAAAAYgAAAHwAAAAA -NlDy5AAOK7L///////8AYJcFDTEAU+DgA///AFABFAAAAAL///////8EVRMAAAEAIK85l/wEVRMA -AAFNAAQAIwAAACMAAwAGAEVQSURFTUlPTE9HWSAgAAEBAl9fTVNCUk9XU0VfXwIBAF8CAAAAXAAA -AFwAAAB0AAAAADZQ8uQADpT6////////AGCXCO7wCABFAABOUmQAAIARL0CBb7YcgW///wCJAIkA -OviypToBEAABAAAAAAAAIEVGRkFFSkVFRUZFTkVKRVBFTUVQRUhGSkNBQ0FDQUJMAAAgAAEAAACW -AAAAlgAAALAAAAAANlDy5AAO3wcABikhIrsIACCSbaEIAEUQAIjA7UAA/wauooFvBSmBbwPIABYF -sYQKs/I5x+mtgBgnmKJyAAABAQgKArWLWzawHUcAAABPOEzZZJBozNsJfDQLrXqLzXutMrAVQ2FL -4I8x/xOEGqTMdFGaKi4COI2HOeGDBLNW1T4Pljc++HiOJaPpczg0W9koIasEv+QrnPgLxKpmpERa -fgAAAPMAAADzAAABDAAAAAA2UPLkAA70Pv///////wAQSy6rkggARQAA5fxVAACAES1fgW8NdYFv -//8AigCKANHk7xEagJKBbw11AIoAuwAAIEZBRURFTkZEREJERUVPRkVDQUNBQ0FDQUNBQ0FDQUNB -ACBFTUVKRUNGQ0VCRkNGSkNBQ0FDQUNBQ0FDQUNBQ0FCTgD/U01CJQAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAABEAACEAAAAAAAAAAADoAwAAAAAAAAAAIQBWAAMAAQAAAAIAMgBcTUFJTFNMT1Rc -QlJPV1NFAAEAgPwKAFBDTVMxNE5UAAAAAAAAAAAEAAMQAQAPAVWqAAAAAAA5AAAAOQAAAFQAAAAA -NlDy5AAPFmUJAAf///8AYHDMW4kAK6qqAwgAB4CbACOS5QAAAC3/DwEBAQAtCA8ACoAAY4IPrIAP -roIPt4APuIIAAAAAAABuAAAAbgAAAIgAAAAANlDy5QAAL+H///////8AwE+Y+xcAYP//AGAABAAA -AAD///////8EUhMAAAEAwE+Y+xcEUgACBkBMVUFOTlNfUEMAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAATAAABAMBPmPsXQA4AAQBuAAAAQgAAAEIAAABcAAAAADZQ8uUAAKI7 -CAAgkm2hAAYpISK7CABFAAA0FVMAADwGXaKBbwPIgW8FKQWxABY5x+mthAq0RoAQPjjpHgAAAQEI -CjawHUkCtYtbArUAAACAAAAAgAAAAJgAAAAANlDy5QABskqrAAQBAgGqAAQAC0RgB3AAqwAEAQIB -AgCqAAQAC0SgAAgAAIAEAQAABkRST0dPIACAAf+DAAQAAAAAAAAAAAAfAwAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAhAUjCK9ANPWcAANFVEEAAAAAAAAAAAAAAAAIACsWoekiAAAAAIAAAACA -AAAAmAAAAAA2UPLlAAHkgasABAECAaoABADURGAHcACrAAQBAgECAKoABADURKAACAAAgAQBAAAG -REFNUk9EAIAB/4MABAAAAAAAAAAAABADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACEBe6j -yzU09ZwAA0VXQQAAAAAAAAAAAAAAAAgAK+RWNTcBAAAAYAAAAGAAAAB4AAAAADZQ8uUAAfuoCQAH -AACKAGBwzFuJAFKqqgMIAAeAmwBKOdsAAA+t/9sC/QIheg+t2/0AFVJJQyBDYWxlbmRhciBTZXJ2 -ZXIgMw5OVUQgMi4wIFNlcnZlchBSRVNFQVJDSCBJTUFHSU5HAAAAXAAAAFwAAAB0AAAAADZQ8uUA -Az/a////////ACBIBP2MCABFAABOBWwAAIARRQuBb+1JgW///wCJAIkAOnqj3RYBEAABAAAAAAAA -IEVORUZFRUVKRURFSkVPRUZGUEVKRU9FR0VGRURGRUJMAAAgAAEAAAA8AAAAPAAAAFQAAAAANlDy -5QADXA////////8AEFofFs4IBgABCAAGBAABABBaHxbOgW+2xwAAAAAAAIFvAQsAAAAAAAAAAAAA -AAAAAAAAAAAAAABZAAAAWQAAAHQAAAAANlDy5QAEwBwJAAcAAOMIAIcqJkEAS6qqAwgAB4CbAEMA -AAAAAFH/PgICAiGjAC7L/QAMRERTIENhbGVuZGVyDk5VRCAyLjAgU2VydmVyEkRFTlRBTCBESUFH -Tk9TVElDUwBJQwAAADIAAAAyAAAATAAAAAA2UPLlAAZXCgkAB////wAFAuimGQAkqqoDAAAAgPMA -AYCbBgQAAQAFAuimGQAAW7cAAAAAAAAAABCoAAAAAABHAAAARwAAAGAAAAAANlDy5QAGk+wJAAcA -AOUIAIcqJkEAOaqqAwgAB4CbADEAAAAAAFH/PgICAiHEAB+k/QABPQxTdGFyTmluZSBLZXkNSUJU -IDNyZCBmbG9vcgAAAABMAAAATAAAAGQAAAAANlDy5QAHQP0JAAcAAHwIAIcTNQQAPqqqAwgAB4Cb -ADYAAAAAAFH/4AICAiHNAEwm/QAIMTI2MzI3NjMMU3Rhck5pbmUgS2V5C1BsZXh1cyBNYWluAAAA -TAAAAEwAAABkAAAAADZQ8uUAB113CQAHAAB8CACHEzUEAD6qqgMIAAeAmwA2AAAAAABR/+ACAgIh -zABMJv0ACDEyNTg0NzQxDFN0YXJOaW5lIEtleQtQbGV4dXMgTWFpbgAAADQAAAA0AAAATAAAAAA2 -UPLlAAgp1AGAwgcHBwgAhxM0dwAmQkIDAAAAAAAAAggAhwM0dwAAAAAAAggAhwM0d4ABAAAUAAIA -DwAAAABcAAAAXAAAAHQAAAAANlDy5QAISET///////8AoMnRw7cIAEUAAE6QWAAAgBGb4oFvC4aB -b///AIkAiQA6f83rpgEQAAEAAAAAAAAgRUtGREZBRU9GQ0VORkFGRUVIRkRFQ0ZERkRFRUVKRkMA -ACAAAQAAADwAAAA8AAAAVAAAAAA2UPLlAAmTsP///////wAQS6+xhwgGAAEIAAYEAAEAEEuvsYeB -b6hEAAAAAAAAgW8FVwVXBVcFVwVXBVcFVwVXBVcFVwAAADwAAAA8AAAAVAAAAAA2UPLlAAmUEv// -/////wAQS6+xhwgGAAEIAAYEAAEAEEuvsYeBb6hEAAAAAAAAgW8Mdgx2DHYMdgx2DHYMdgx2DHYM -dgAAADwAAAA8AAAAVAAAAAA2UPLlAAmU8f///////wAQS6+xhwgGAAEIAAYEAAEAEEuvsYeBb6hE -AAAAAAAAgW8NjA2MDYwNjA2MDYwNjA2MDYwNjAAAADkAAAA5AAAAVAAAAAA2UPLlAAmtTQkAB/// -/wgAhxM1BQArqqoDCAAHgJsAIwAAAAAAUf/xAQEBAFEI8QAKgABjggAKgABjgg+hgA+hgm5ppgAA -AFwAAABcAAAAdAAAAAA2UPLlAArIff///////wBglwju8AgARQAATlZkAACAEStAgW+2HIFv//8A -iQCJADr4sqU6ARAAAQAAAAAAACBFRkZBRUpFRUVGRU5FSkVQRU1FUEVIRkpDQUNBQ0FCTAAAIAAB -AAAAXAAAAFwAAAB0AAAAADZQ8uUACs9o////////AKDJnSgjCABFAABO4IAAAIARS9aBbwtqgW// -/wCJAIkAOqTKz9wBEAABAAAAAAAAIEVORkpFREVQRlBFTUVCRUNDQUNBQ0FDQUNBQ0FDQUNBAAAg -AAEAAAA8AAAAPAAAAFQAAAAANlDy5QAK1WH///////8AYAgYb4kIBgABCAAGBAABAGAIGG+JgW+g -BAAAAAAAAIFvC2oLagtqC2oLagtqC2oLagtqC2oAAABcAAAAXAAAAHQAAAAANlDy5QAK4Nr///// -//8AIEgE/YwIAEUAAE4GbAAAgBFEC4Fv7UmBb///AIkAiQA6d5/dGgEQAAEAAAAAAAAgRU5FRkVF -RUpFREVKRU9FRkZQRUpFT0VHRUZFREZFQk8AACAAAQAAAOEAAADhAAAA/AAAAAA2UPLlAArnMv// -/////wAgSAT9jAgARQAA0wdsAACAEUKGgW/tSYFv//8AigCKAL+G9REa3RiBb+1JAIoAqQAAIEVC -RkdFRkVPRUhFRkZDQ0FDQUNBQ0FDQUNBQ0FDQUFBACBFTkVGRUVFSkVERUpFT0VGRlBFSkVPRUdF -RkVERkVCTwD/U01CJQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABEAAA8AAAAAAAAAAADoAwAA -AAAAAAAADwBWAAMAAQABAAIAIABcTUFJTFNMT1RcQlJPV1NFAAgAAAAAAAAAAAAAAAAAAAAAAAAA -ADwAAAA8AAAAVAAAAAA2UPLlAArr9v///////wAQWgqTbwgGAAEIAAYEAAEAEFoKk2+BbxD2AAAA -AAAAgW/tSQAAAAAAAAAAAAAAAAAAAAAAAAAAADwAAAA8AAAAVAAAAAA2UPLlAArsQf///////wAQ -WhcHTggGAAEIAAYEAAEAEFoXB06BbwgIAAAAAAAAgW/tSe1J7UntSe1J7UntSe1J7UntSQAAADwA -AAA8AAAAVAAAAAA2UPLlAArtZP///////wBgCBhviQgGAAEIAAYEAAEAYAgYb4mBb6AEAAAAAAAA -gW/tSe1J7UntSe1J7UntSe1J7UntSQAAADwAAAA8AAAAVAAAAAA2UPLlAArtrv///////wCgJE0r -iwgGAAEIAAYEAAEAoCRNK4uBbwgHAAAAAAAAgW/tSUlJSUlJSUlJSUlJSUlJSUlJSQAAADwAAAA8 -AAAAVAAAAAA2UPLlAArzaP///////wCgyR+JlQgGAAEIAAYEAAEAoMkfiZWBb6AFAAAAAAAAgW/t -SQAAAAAAAAAAAAAAAAAAAAAAAAAAADwAAAA8AAAAVAAAAAA2UPLlAArztv///////wCgyQi4YQgG -AAEIAAYEAAEAoMkIuGGBbxLWAAAAAAAAgW/tSQAAAAAAAAAAAAAAAAAAAAAAAAAAADwAAAA8AAAA -VAAAAAA2UPLlAAsGVP///////wDAtgBS8QgGAAEIAAYEAAEAwLYAUvGBb+J3AAAAAAAAgW/tSQAA -AAAAAAAAAAAAAAAAAAAAAAAAAF4AAABeAAAAeAAAAAA2UPLlAAsb/f///////wAgrzmX/ABQ//8A -UAAUAAAAAP///////wRVEwAAAQAgrzmX/ARVEAEnAE0ABAAjAAAAIwADAAYARVBJREVNSU9MT0dZ -ICAAAQECX19NU0JST1dTRV9fAgEAXgAAAGIAAABiAAAAfAAAAAA2UPLlAAsdsf///////wBglwUN -MQBT4OAD//8AUAEUAAAAAv///////wRVEwAAAQAgrzmX/ARVEwAAAU0ABAAjAAAAIwADAAYARVBJ -REVNSU9MT0dZICAAAQECX19NU0JST1dTRV9fAgEAXwIAAADpAAAA6QAAAQQAAAAANlDy5QAMgMz/ -//////8AYAgYb4kIAEUAANtjoQAAgBEzjoFvoASBb///AIoAigDHJ/kRAmwWgW+gBACKALEAACBF -TkZKRURFUEZQRU1FQkVDQ0FDQUNBQ0FDQUNBQ0FDQQAgRU5FRkVFRUpFREVKRU9FRkZQRUpFT0VH -RUZFREZFQk8A/1NNQiUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAARAAAXAAAAAAAAAAAAAAAA -AAAAAAAAABcAVgADAAEAAQACACgAXE1BSUxTTE9UXEJST1dTRQAIAQIVBAFGwtJEAAAAAE1ZQ09f -TEFCAABBQgAAADwAAAA8AAAAVAAAAAA2UPLlAAzkhgkABwAAKwgAhxq+aQAuqqoDCAAHgJsAJgAA -AAAAUf9OAgICIfUPq43+AAE9A05STAtQQVRIT0xPR1kvRQAAAG4AAABuAAAAiAAAAAA2UPLlAA0z -sv///////wgACXqifABg//8AYAAAEwAAAf///////wRSEwAAAQgACXqifARSAAIDDDA4MDAwOTdB -QTI3QzgzQ0dOUEk3QUEyN0MAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABMAAAEIAAl6onxADAABAG4A -AABxAAAAcQAAAIwAAAAANlDy5QANOI////////8IAAl6onwAY+DgA///AGAAAAAAAAL///////8E -UgAAAAIIAAl6onwEUgACAwwwODAwMDk3QUEyN0M4MENHTlBJN0FBMjdDAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAACCAAJeqJ8QAwAAQAMAAAAADIAAAAyAAAATAAAAAA2UPLlAA1FqwkAB////wgA -B2w1RAAkqqoDAAAAgPMAAYCbBgQAAQgAB2w1RAAAD3EAAAAAAAAAAEQx//8AAABgAAAAYAAAAHgA -AAAANlDy5QAOgkIJAAcAAIoAYHDMW4kAUqqqAwgAB4CbAEo52wAAD63/2wL9AiF6D63b/QAVUklD -IENhbGVuZGFyIFNlcnZlciAzDk5VRCAyLjAgU2VydmVyEFJFU0VBUkNIIElNQUdJTkcAAAC2AAAA -tgAAANAAAAAANlDy5QAO300ABikhIrsIACCSbaEIAEUQAKjA7kAA/waugYFvBSmBbwPIABYFsYQK -tEY5x+mtgBgnmCK2AAABAQgKArWLvzawHUkAAABpKuJeUPvU+bQ22Qx3n71PPGQJjmC/ATHiQnum -7Ls+FWR/kb3/NuQ47nxY010DqjRQI9Z1GeAdlVwp73ub8F4C2ZO2bOuNZ5hzk0YSZnCvONFJ538w -siPvOVWCq6Bf3RNUy27yHziyui/6wWzlcYrTCAIAAAAAXAAAAFwAAAB0AAAAADZQ8uYAAHUc//// -////AGAIGG+JCABFAABOZKEAAIARMxuBb6AEgW///wCJAIkAOpIEbAwBEAABAAAAAAAAIEVCRkdF -RkVPRUhFRkZDQ0FDQUNBQ0FDQUNBQ0FDQUFBAAAgAAEAAABZAAAAWQAAAHQAAAAANlDy5gABEs0J -AAcAAOMIAIcqJkEAS6qqAwgAB4CbAEMAAAAAAFH/PgICAiGjAC7L/QAMRERTIENhbGVuZGVyDk5V -RCAyLjAgU2VydmVyEkRFTlRBTCBESUFHTk9TVElDUwBJQwAAAEIAAABCAAAAXAAAAAA2UPLmAAFl -aQgAIJJtoQAGKSEiuwgARQAANBVVAAA8Bl2ggW8DyIFvBSkFsQAWOcfprYQKtLqAED446EQAAAEB -CAo2sB1LArWLvwK1AAAAMgAAADIAAABMAAAAADZQ8uYAAm85CQAH////AAUCGCgUACSqqgMAAACA -8wABgJsGBAABAAUCGCgUAAA8vwAAAAAAAAAAYzIAAAAAADIAAAAyAAAATAAAAAA2UPLmAAKHrwkA -B////wAAlDFzEAAkqqoDAAAAgPMAAYCbBgQAAQAAlDFzEAAAIxMAAAAAAAAAADbxA+QAAACAAAAA -gAAAAJgAAAAANlDy5gACrm6rAAQBAQGqAAQAFURgB3AAqwAEAQEBAQCqAAQAFUSgAAgAAIAEAQAA -BkFSV0VOIACAAf+DAAQAAAAAAAAAAAAQAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAhAW9 -R64/NPWcAANFV0EAAAAAAAAAAAAAAAAIACvmDmc3AQAAAGIAAABiAAAAfAAAAAA2UPLmAAKwhQgA -Ky24jqoABAACRIBBVAAGAAAACAAr4oZMAAAAAAMBAwMBBlpJTkdFUgAAAAAAAAAAAAAAAAAAAAAA -ADUQAAAAAAAAZHwAAGV8AAAIAKAPQAAAAAAAAAAAAAAAAAADMfxiAzEAAADkAAAA5AAAAPwAAAAA -NlDy5gACwPv///////8A4B5+dgEIAEUAANaMKQAAHhGXWoFv9yT/////AIoAigDCl1gRAhoTgW/3 -JACKAKwAACBFR0ZBRENDQUNBQ0FDQUNBQ0FDQUNBQ0FDQUNBQ0FDQQAgRURFUEZDRkFGRkZEQ0FF -REVJRkNFSkZERkVFSkNBQk8A/1NNQiUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAARAAASAAAA -AAAAAAAAAAAAAAAAAAAAABIAVgADAAEAAQACACMAXE1BSUxTTE9UXEJST1dTRQAIAQAAAAAAAAAA -AAAAAEZQMgAAAAA0AAAANAAAAEwAAAAANlDy5gADGiMBgMIAAAAAYD7JTzsAJkJCAwAAAAAAAAEA -4KM+wQAAAAAAAAEA4KM+wQCBRAAAFAACAA8AAAAAPAAAADwAAABUAAAAADZQ8uYAAz82qwAAAwAA -qgAEACZFYAMiAA0CAACqAAQAJkUD2gUAAAAAAAAAAACqAAQAykQPAAACqqoAAAAAAAAAAAAAAAAA -PAAAADwAAABUAAAAADZQ8uYAA8kNqwAAAwAAqgAEAMpEYAMpAAsCAACqAAQAykQB2gVAAA8ADxYA -AAAAAAAADqoABAALRMCqAAQADETAAAAAAAAAPAAAADwAAABUAAAAADZQ8uYAA9JDCQArAgAAqgAE -AMpEYAMpAAsCAQCqAAQAykQB2gVAAA8ADxYAAAAAAAAADqoABAALRMCqAAQADETAAAAAAAAATQAA -AE0AAABoAAAAADZQ8uYAA9bgCQAHAACICACHEzUFAD+qqgMIAAeAmwA3AAAAAABR//ECAgIhRwAK -JYEADlNVUkdFUlktVFJBVU1BC0xhc2VyV3JpdGVyB1NVUkdFUlkAAE0AAAA8AAAAPAAAAFQAAAAA -NlDy5gAD37yrAAAEAACqAAQAykRgAykACwIAAKoABADKRAHaBUAADwAPFgAAAAAAAAAOqgAEAAtE -wKoABAAMRMAAAAAAAABHAAAARwAAAGAAAAAANlDy5gAD4HEJAAcAAOUIAIcqJkEAOaqqAwgAB4Cb -ADEAAAAAAFH/PgICAiHEAB+k/QABPQxTdGFyTmluZSBLZXkNSUJUIDNyZCBmbG9vcgAAAABMAAAA -TAAAAGQAAAAANlDy5gAEXS4JAAcAAHwIAIcTNQQAPqqqAwgAB4CbADYAAAAAAFH/4AICAiHNAEwm -/QAIMTI2MzI3NjMMU3Rhck5pbmUgS2V5C1BsZXh1cyBNYWluAAAAXAAAAFwAAAB0AAAAADZQ8uYA -BHfG////////AKDJ0cO3CABFAABOkVgAAIARmuKBbwuGgW///wCJAIkAOn/N66YBEAABAAAAAAAA -IEVLRkRGQUVPRkNFTkZBRkVFSEZERUNGREZERUVFSkZDAAAgAAEAAABMAAAATAAAAGQAAAAANlDy -5gAEeiEJAAcAAHwIAIcTNQQAPqqqAwgAB4CbADYAAAAAAFH/4AICAiHMAEwm/QAIMTI1ODQ3NDEM -U3Rhck5pbmUgS2V5C1BsZXh1cyBNYWluAAAAMgAAADIAAABMAAAAADZQ8uYABYAVCQAH////AAUC -AZAoACSqqgMAAACA8wABgJsGBAABAAUCAZAoAAASaAAAAAAAAAAAMo5VVQAAAG4AAABuAAAAiAAA -AAA2UPLmAAX4uP///////wgAB6SJggBg//8AYAAEEwAAAf///////wRSEwAAAQgAB6SJggRSAAIG -GEFQUExFX0xXYTQ4OTgyAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABMAAAEIAAek -iYJACwABAG4AAAFWAAABVgAAAXAAAAAANlDy5gAGK63///////8AEFofFs4IAEUAAUhDvwAAgBG9 -r4Fvtsf/////AEQAQwE0fK8BARAA8A9RQQkAgAAAAAAAAAAAAAAAAAAAAAAAUkFTIDCpkPhGD74B -AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGOCU2M1AQE9EQFSQVMgMKmQ+EYPvgEBAAAADAtTUEhT -RVJWRVIzAP8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABVgAAADwAAAA8AAAAVAAAAAA2UPLmAAZC -SP///////wAQSzEZLggGAAEIAAYEAAEAEEsxGS6BbwwoAAAAAAAAgW8LUQtRC1ELUQtRC1ELUQtR -C1ELUQAAAFwAAABcAAAAdAAAAAA2UPLmAAb7/////////wBglwju8AgARQAATldkAACAESpAgW+2 -HIFv//8AiQCJADr4sqU6ARAAAQAAAAAAACBFRkZBRUpFRUVGRU5FSkVQRU1FUEVIRkpDQUNBQ0FC -TAAAIAABAAAAjgAAAI4AAACoAAAAADZQ8uYABvy3////////ACCvb/JCCABFAACAVZwAAEAROrmB -b+c5gW///wZqAG8AbAsdNlkAtQAAAAAAAAACAAGGoAAAAAIAAAAFAAAAAQAAABw2UPOIAAAABmJp -b2M1NwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAYakAAAAAgAAAAIAAAAQAAAADGJpb2NoZW1pc3Ry -eQAAAAAAgAAAAIAAAACYAAAAADZQ8uYABv22qwAEAQEBqgAEAAxEYAdwAKsABAEBAQEAqgAEAAxE -oAAIAACABAEAAAZCQUxJTiAAgAH/gwAEAAAAAAAAAAAAHwMAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAIQFgIcgOTT1nAADRVhBAAAAAAAAAAAAAAAACAArL6z8JAAAAACOAAAAjgAAAKgAAAAA -NlDy5gAHoLX///////8AIK9v8kIIAEUAAIBVoQAAQBE6tIFv5zmBb///BmsAbwBsp742WmQRAAAA -AAAAAAIAAYagAAAAAgAAAAUAAAABAAAAHDZQ84gAAAAGYmlvYzU3AAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAABhqQAAAACAAAAAgAAABAAAAAMYmlvY2hlbWlzdHJ5AAA= -""".decode("base64") diff --git a/construct/tests/lib/__init__.py b/construct/tests/lib/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/construct/tests/lib/test_container.py b/construct/tests/lib/test_container.py deleted file mode 100644 index c2c07b980..000000000 --- a/construct/tests/lib/test_container.py +++ /dev/null @@ -1,99 +0,0 @@ -import unittest - -from construct.lib.container import Container, ListContainer - -class TestContainer(unittest.TestCase): - - def test_getattr(self): - c = Container(a=1) - self.assertEqual(c["a"], c.a) - - def test_setattr(self): - c = Container() - c.a = 1 - self.assertEqual(c["a"], 1) - - def test_delattr(self): - c = Container(a=1) - del c.a - self.assertFalse("a" in c) - - def test_update(self): - c = Container(a=1) - d = Container() - d.update(c) - self.assertEqual(d.a, 1) - - def test_eq_eq(self): - c = Container(a=1) - d = Container(a=1) - self.assertEqual(c, d) - - def test_ne_wrong_type(self): - c = Container(a=1) - d = {"a": 1} - self.assertNotEqual(c, d) - - def test_ne_wrong_key(self): - c = Container(a=1) - d = Container(b=1) - self.assertNotEqual(c, d) - - def test_ne_wrong_value(self): - c = Container(a=1) - d = Container(a=2) - self.assertNotEqual(c, d) - - def test_copy(self): - c = Container(a=1) - d = c.copy() - self.assertEqual(c, d) - self.assertTrue(c is not d) - - def test_copy_module(self): - from copy import copy - - c = Container(a=1) - d = copy(c) - self.assertEqual(c, d) - self.assertTrue(c is not d) - - def test_bool_false(self): - c = Container() - self.assertFalse(c) - - def test_bool_true(self): - c = Container(a=1) - self.assertTrue(c) - - def test_in(self): - c = Container(a=1) - self.assertTrue("a" in c) - - def test_not_in(self): - c = Container() - self.assertTrue("a" not in c) - - def test_repr(self): - c = Container(a=1, b=2) - repr(c) - - def test_repr_recursive(self): - c = Container(a=1, b=2) - c.c = c - repr(c) - - def test_str(self): - c = Container(a=1, b=2) - str(c) - - def test_str_recursive(self): - c = Container(a=1, b=2) - c.c = c - str(c) - -class TestListContainer(unittest.TestCase): - - def test_str(self): - l = ListContainer(range(5)) - str(l) diff --git a/construct/tests/t1.py b/construct/tests/t1.py deleted file mode 100644 index ead434030..000000000 --- a/construct/tests/t1.py +++ /dev/null @@ -1,12 +0,0 @@ -from construct import * - - -s = Aligned( - Struct('test', - Byte('length'), - Array(lambda ctx: ctx.length, Byte('x')), - ) -) -print Debugger(s).parse("\x03aaab") - - diff --git a/construct/tests/test_adaptors.py b/construct/tests/test_adaptors.py deleted file mode 100644 index 6ffb70a8a..000000000 --- a/construct/tests/test_adaptors.py +++ /dev/null @@ -1,60 +0,0 @@ -import unittest - -from construct import Field, UBInt8 -from construct import OneOf, NoneOf, HexDumpAdapter -from construct import ValidationError - -class TestHexDumpAdapter(unittest.TestCase): - - def setUp(self): - self.hda = HexDumpAdapter(Field("hexdumpadapter", 6)) - - def test_trivial(self): - pass - - def test_parse(self): - self.assertEqual(self.hda.parse("abcdef"), "abcdef") - - def test_build(self): - self.assertEqual(self.hda.build("abcdef"), "abcdef") - - def test_str(self): - pretty = str(self.hda.parse("abcdef")).strip() - offset, digits, ascii = [i.strip() for i in pretty.split(" ") if i] - self.assertEqual(offset, "0000") - self.assertEqual(digits, "61 62 63 64 65 66") - self.assertEqual(ascii, "abcdef") - -class TestNoneOf(unittest.TestCase): - - def setUp(self): - self.n = NoneOf(UBInt8("foo"), [4, 5, 6, 7]) - - def test_trivial(self): - pass - - def test_parse(self): - self.assertEqual(self.n.parse("\x08"), 8) - - def test_parse_invalid(self): - self.assertRaises(ValidationError, self.n.parse, "\x06") - -class TestOneOf(unittest.TestCase): - - def setUp(self): - self.o = OneOf(UBInt8("foo"), [4, 5, 6, 7]) - - def test_trivial(self): - pass - - def test_parse(self): - self.assertEqual(self.o.parse("\x05"), 5) - - def test_parse_invalid(self): - self.assertRaises(ValidationError, self.o.parse, "\x08") - - def test_build(self): - self.assertEqual(self.o.build(5), "\x05") - - def test_build_invalid(self): - self.assertRaises(ValidationError, self.o.build, 9) diff --git a/construct/tests/test_ast.py b/construct/tests/test_ast.py deleted file mode 100644 index 4839b0255..000000000 --- a/construct/tests/test_ast.py +++ /dev/null @@ -1,137 +0,0 @@ -import unittest - -from construct import * -from construct.text import * - -class NodeAdapter(Adapter): - def __init__(self, factory, subcon): - Adapter.__init__(self, subcon) - self.factory = factory - def _decode(self, obj, context): - return self.factory(obj) - - -#=============================================================================== -# AST nodes -#=============================================================================== -class Node(Container): - def __init__(self, name, **kw): - Container.__init__(self) - self.name = name - for k, v in kw.iteritems(): - setattr(self, k, v) - - def accept(self, visitor): - return getattr(visitor, "visit_%s" % self.name)(self) - -def binop_node(obj): - lhs, rhs = obj - if rhs is None: - return lhs - else: - op, rhs = rhs - return Node("binop", lhs=lhs, op=op, rhs=rhs) - -def literal_node(value): - return Node("literal", value = value) - - -#=============================================================================== -# concrete grammar -#=============================================================================== -ws = Whitespace() -term = IndexingAdapter( - Sequence("term", - ws, - Select("term", - NodeAdapter(literal_node, DecNumber("number")), - IndexingAdapter( - Sequence("subexpr", - Literal("("), - LazyBound("expr", lambda: expr), - Literal(")") - ), - index = 0 - ), - ), - ws, - ), - index = 0 -) - -def OptSeq(name, *args): - return Optional(Sequence(name, *args)) - -expr1 = NodeAdapter(binop_node, - Sequence("expr1", - term, - OptSeq("rhs", - CharOf("op", "*/"), - LazyBound("rhs", lambda: expr1) - ), - ) -) - -expr2 = NodeAdapter(binop_node, - Sequence("expr2", - expr1, - OptSeq("rhs", - CharOf("op", "+-"), - LazyBound("rhs", lambda: expr2) - ), - ) -) - -expr = expr2 - - -#=============================================================================== -# evaluation visitor -#=============================================================================== -class EvalVisitor(object): - def visit_literal(self, obj): - return obj.value - def visit_binop(self, obj): - lhs = obj.lhs.accept(self) - op = obj.op - rhs = obj.rhs.accept(self) - if op == "+": - return lhs + rhs - elif op == "-": - return lhs - rhs - elif op == "*": - return lhs * rhs - elif op == "/": - return lhs / rhs - else: - raise ValueError("invalid op", op) - -ev = EvalVisitor() - -class TestSomethingSomething(unittest.TestCase): - - def test_that_one_thing(self): - node = expr.parse("2*3+4") - self.assertEqual(node.name, "binop") - self.assertEqual(node.op, "+") - self.assertEqual(node.rhs.name, "literal") - self.assertEqual(node.rhs.value, 4) - self.assertEqual(node.lhs.name, "binop") - self.assertEqual(node.lhs.op, "*") - self.assertEqual(node.lhs.rhs.name, "literal") - self.assertEqual(node.lhs.rhs.value, 3) - self.assertEqual(node.lhs.lhs.name, "literal") - self.assertEqual(node.lhs.lhs.value, 2) - - def test_that_other_thing(self): - node = expr.parse("2*(3+4)") - self.assertEqual(node.name, "binop") - self.assertEqual(node.op, "*") - self.assertEqual(node.rhs.name, "binop") - self.assertEqual(node.rhs.op, "+") - self.assertEqual(node.rhs.rhs.name, "literal") - self.assertEqual(node.rhs.rhs.value, 4) - self.assertEqual(node.rhs.lhs.name, "literal") - self.assertEqual(node.rhs.lhs.value, 3) - self.assertEqual(node.lhs.name, "literal") - self.assertEqual(node.lhs.value, 2) diff --git a/construct/tests/test_bit.py b/construct/tests/test_bit.py deleted file mode 100644 index 28e908c9a..000000000 --- a/construct/tests/test_bit.py +++ /dev/null @@ -1,31 +0,0 @@ -import unittest - -from construct import BitField, BitStruct, Struct, Container -from construct import Bit, Flag, Nibble, Padding - -class TestBitStruct(unittest.TestCase): - - def test_parse(self): - struct = BitStruct("foo", - BitField("a", 3), - Flag("b"), - Padding(3), - Nibble("c"), - BitField("d", 5), - ) - self.assertEqual(struct.parse("\xe1\x1f"), - Container(a=7, b=False, c=8, d=31)) - - def test_parse_nested(self): - struct = BitStruct("foo", - BitField("a", 3), - Flag("b"), - Padding(3), - Nibble("c"), - Struct("bar", - Nibble("d"), - Bit("e"), - ) - ) - self.assertEqual(struct.parse("\xe1\x1f"), - Container(a=7, b=False, bar=Container(d=15, e=1), c=8)) diff --git a/construct/tests/test_core.py b/construct/tests/test_core.py deleted file mode 100644 index e857d86fd..000000000 --- a/construct/tests/test_core.py +++ /dev/null @@ -1,99 +0,0 @@ -import unittest - -from construct import Struct, MetaField, StaticField, FormatField -from construct import Container, Byte -from construct import FieldError, SizeofError - -class TestStaticField(unittest.TestCase): - - def setUp(self): - self.sf = StaticField("staticfield", 2) - - def test_trivial(self): - pass - - def test_parse(self): - self.assertEqual(self.sf.parse("ab"), "ab") - - def test_build(self): - self.assertEqual(self.sf.build("ab"), "ab") - - def test_parse_too_short(self): - self.assertRaises(FieldError, self.sf.parse, "a") - - def test_build_too_short(self): - self.assertRaises(FieldError, self.sf.build, "a") - - def test_sizeof(self): - self.assertEqual(self.sf.sizeof(), 2) - -class TestFormatField(unittest.TestCase): - - def setUp(self): - self.ff = FormatField("formatfield", "<", "L") - - def test_trivial(self): - pass - - def test_parse(self): - self.assertEqual(self.ff.parse("\x12\x34\x56\x78"), 0x78563412) - - def test_build(self): - self.assertEqual(self.ff.build(0x78563412), "\x12\x34\x56\x78") - - def test_parse_too_short(self): - self.assertRaises(FieldError, self.ff.parse, "\x12\x34\x56") - - def test_build_too_long(self): - self.assertRaises(FieldError, self.ff.build, 9e9999) - - def test_sizeof(self): - self.assertEqual(self.ff.sizeof(), 4) - -class TestMetaField(unittest.TestCase): - - def setUp(self): - self.mf = MetaField("metafield", lambda context: 3) - - def test_trivial(self): - pass - - def test_parse(self): - self.assertEqual(self.mf.parse("abc"), "abc") - - def test_build(self): - self.assertEqual(self.mf.build("abc"), "abc") - - def test_parse_too_short(self): - self.assertRaises(FieldError, self.mf.parse, "ab") - - def test_build_too_short(self): - self.assertRaises(FieldError, self.mf.build, "ab") - - def test_sizeof(self): - self.assertEqual(self.mf.sizeof(), 3) - -class TestMetaFieldStruct(unittest.TestCase): - - def setUp(self): - self.mf = MetaField("data", lambda context: context["length"]) - self.s = Struct("foo", Byte("length"), self.mf) - - def test_trivial(self): - pass - - def test_parse(self): - c = self.s.parse("\x03ABC") - self.assertEqual(c.length, 3) - self.assertEqual(c.data, "ABC") - - c = self.s.parse("\x04ABCD") - self.assertEqual(c.length, 4) - self.assertEqual(c.data, "ABCD") - - def test_sizeof_default(self): - self.assertRaises(SizeofError, self.mf.sizeof) - - def test_sizeof(self): - context = Container(length=4) - self.assertEqual(self.mf.sizeof(context), 4) diff --git a/construct/tests/test_lib.py b/construct/tests/test_lib.py deleted file mode 100644 index 450c8bc6f..000000000 --- a/construct/tests/test_lib.py +++ /dev/null @@ -1,34 +0,0 @@ -import unittest - -from construct.lib.binary import (int_to_bin, bin_to_int, swap_bytes, - encode_bin, decode_bin) - -class TestBinary(unittest.TestCase): - pass - - def test_int_to_bin(self): - self.assertEqual(int_to_bin(19, 5), "\x01\x00\x00\x01\x01") - - def test_int_to_bin_signed(self): - self.assertEqual(int_to_bin(-13, 5), "\x01\x00\x00\x01\x01") - - def test_bin_to_int(self): - self.assertEqual(bin_to_int("\x01\x00\x00\x01\x01"), 19) - - def test_bin_to_int_signed(self): - self.assertEqual(bin_to_int("\x01\x00\x00\x01\x01", True), -13) - - def test_swap_bytes(self): - self.assertEqual(swap_bytes("aaaabbbbcccc", 4), "ccccbbbbaaaa") - - def test_encode_bin(self): - self.assertEqual(encode_bin("ab"), - "\x00\x01\x01\x00\x00\x00\x00\x01\x00\x01\x01\x00\x00\x00\x01\x00") - - def test_decode_bin(self): - self.assertEqual(decode_bin( - "\x00\x01\x01\x00\x00\x00\x00\x01\x00\x01\x01\x00\x00\x00\x01\x00"), - "ab") - - def test_decode_bin_length(self): - self.assertRaises(ValueError, decode_bin, "\x00") diff --git a/construct/tests/test_mapping.py b/construct/tests/test_mapping.py deleted file mode 100644 index 547859f88..000000000 --- a/construct/tests/test_mapping.py +++ /dev/null @@ -1,29 +0,0 @@ -import unittest - -from construct import Flag - -class TestFlag(unittest.TestCase): - - def test_parse(self): - flag = Flag("flag") - self.assertTrue(flag.parse("\x01")) - - def test_parse_flipped(self): - flag = Flag("flag", truth=0, falsehood=1) - self.assertFalse(flag.parse("\x01")) - - def test_parse_default(self): - flag = Flag("flag") - self.assertFalse(flag.parse("\x02")) - - def test_parse_default_true(self): - flag = Flag("flag", default=True) - self.assertTrue(flag.parse("\x02")) - - def test_build(self): - flag = Flag("flag") - self.assertEqual(flag.build(True), "\x01") - - def test_build_flipped(self): - flag = Flag("flag", truth=0, falsehood=1) - self.assertEqual(flag.build(True), "\x00") diff --git a/construct/tests/test_repeaters.py b/construct/tests/test_repeaters.py deleted file mode 100644 index 4b6eacbc6..000000000 --- a/construct/tests/test_repeaters.py +++ /dev/null @@ -1,97 +0,0 @@ -import unittest - -from construct import UBInt8 -from construct import Repeater -from construct import StrictRepeater, GreedyRepeater, OptionalGreedyRepeater -from construct import ArrayError, RangeError - -class TestRepeater(unittest.TestCase): - - def setUp(self): - self.c = Repeater(3, 7, UBInt8("foo")) - - def test_trivial(self): - pass - - def test_parse(self): - self.assertEqual(self.c.parse("\x01\x02\x03"), [1, 2, 3]) - self.assertEqual(self.c.parse("\x01\x02\x03\x04\x05\x06"), - [1, 2, 3, 4, 5, 6]) - self.assertEqual(self.c.parse("\x01\x02\x03\x04\x05\x06\x07"), - [1, 2, 3, 4, 5, 6, 7]) - self.assertEqual(self.c.parse("\x01\x02\x03\x04\x05\x06\x07\x08\x09"), - [1, 2, 3, 4, 5, 6, 7]) - - def test_build(self): - self.assertEqual(self.c.build([1, 2, 3, 4]), "\x01\x02\x03\x04") - - def test_build_undersized(self): - self.assertRaises(RangeError, self.c.build, [1, 2]) - - def test_build_oversized(self): - self.assertRaises(RangeError, self.c.build, [1, 2, 3, 4, 5, 6, 7, 8]) - -class TestStrictRepeater(unittest.TestCase): - - def setUp(self): - self.c = StrictRepeater(4, UBInt8("foo")) - - def test_trivial(self): - pass - - def test_parse(self): - self.assertEqual(self.c.parse("\x01\x02\x03\x04"), [1, 2, 3, 4]) - self.assertEqual(self.c.parse("\x01\x02\x03\x04\x05\x06"), - [1, 2, 3, 4]) - - def test_build(self): - self.assertEqual(self.c.build([5, 6, 7, 8]), "\x05\x06\x07\x08") - - def test_build_oversized(self): - self.assertRaises(ArrayError, self.c.build, [5, 6, 7, 8, 9]) - - def test_build_undersized(self): - self.assertRaises(ArrayError, self.c.build, [5, 6, 7]) - -class TestGreedyRepeater(unittest.TestCase): - - def setUp(self): - self.c = GreedyRepeater(UBInt8("foo")) - - def test_trivial(self): - pass - - def test_empty_parse(self): - self.assertRaises(RangeError, self.c.parse, "") - - def test_parse(self): - self.assertEqual(self.c.parse("\x01"), [1]) - self.assertEqual(self.c.parse("\x01\x02\x03"), [1, 2, 3]) - self.assertEqual(self.c.parse("\x01\x02\x03\x04\x05\x06"), - [1, 2, 3, 4, 5, 6]) - - def test_empty_build(self): - self.assertRaises(RangeError, self.c.build, []) - - def test_build(self): - self.assertEqual(self.c.build([1, 2]), "\x01\x02") - -class TestOptionalGreedyRepeater(unittest.TestCase): - - def setUp(self): - self.c = OptionalGreedyRepeater(UBInt8("foo")) - - def test_trivial(self): - pass - - def test_empty_parse(self): - self.assertEqual(self.c.parse(""), []) - - def test_parse(self): - self.assertEqual(self.c.parse("\x01\x02"), [1, 2]) - - def test_empty_build(self): - self.assertEqual(self.c.build([]), "") - - def test_build(self): - self.assertEqual(self.c.build([1, 2]), "\x01\x02") diff --git a/construct/tests/test_strings.py b/construct/tests/test_strings.py deleted file mode 100644 index b87f997e0..000000000 --- a/construct/tests/test_strings.py +++ /dev/null @@ -1,83 +0,0 @@ -import unittest - -from construct import String, PascalString, CString, UBInt16 - -class TestString(unittest.TestCase): - - def test_parse(self): - s = String("foo", 5) - self.assertEqual(s.parse("hello"), "hello") - - def test_parse_utf8(self): - s = String("foo", 12, encoding="utf8") - self.assertEqual(s.parse("hello joh\xd4\x83n"), u"hello joh\u0503n") - - def test_parse_padded(self): - s = String("foo", 10, padchar="X", paddir="right") - self.assertEqual(s.parse("helloXXXXX"), "hello") - - def test_parse_padded_left(self): - s = String("foo", 10, padchar="X", paddir="left") - self.assertEqual(s.parse("XXXXXhello"), "hello") - - def test_parse_padded_center(self): - s = String("foo", 10, padchar="X", paddir="center") - self.assertEqual(s.parse("XXhelloXXX"), "hello") - - def test_build(self): - s = String("foo", 5) - self.assertEqual(s.build("hello"), "hello") - - def test_build_utf8(self): - s = String("foo", 12, encoding="utf8") - self.assertEqual(s.build(u"hello joh\u0503n"), "hello joh\xd4\x83n") - - def test_build_padded(self): - s = String("foo", 10, padchar="X", paddir="right") - self.assertEqual(s.build("hello"), "helloXXXXX") - - def test_build_padded_left(self): - s = String("foo", 10, padchar="X", paddir="left") - self.assertEqual(s.build("hello"), "XXXXXhello") - - def test_build_padded_center(self): - s = String("foo", 10, padchar="X", paddir="center") - self.assertEqual(s.build("hello"), "XXhelloXXX") - -class TestPascalString(unittest.TestCase): - - def test_parse(self): - s = PascalString("foo") - self.assertEqual(s.parse("\x05hello"), "hello") - - def test_build(self): - s = PascalString("foo") - self.assertEqual(s.build("hello world"), "\x0bhello world") - - def test_parse_custom_length_field(self): - s = PascalString("foo", length_field=UBInt16("length")) - self.assertEqual(s.parse("\x00\x05hello"), "hello") - - def test_build_custom_length_field(self): - s = PascalString("foo", length_field=UBInt16("length")) - self.assertEqual(s.build("hello"), "\x00\x05hello") - -class TestCString(unittest.TestCase): - - def test_parse(self): - s = CString("foo") - self.assertEqual(s.parse("hello\x00"), "hello") - - def test_build(self): - s = CString("foo") - self.assertEqual(s.build("hello"), "hello\x00") - - def test_parse_terminator(self): - s = CString("foo", terminators="XYZ") - self.assertEqual(s.parse("helloX"), "hello") - self.assertEqual(s.parse("helloY"), "hello") - self.assertEqual(s.parse("helloZ"), "hello") - - def test_build_terminator(self): - s = CString("foo", terminators="XYZ") - self.assertEqual(s.build("hello"), "helloX") diff --git a/construct/tests/test_text.py b/construct/tests/test_text.py deleted file mode 100644 index 6cd08a274..000000000 --- a/construct/tests/test_text.py +++ /dev/null @@ -1,15 +0,0 @@ -import unittest - -from construct.text import Whitespace -from construct import RangeError - -class TestWhitespace(unittest.TestCase): - - def test_parse(self): - self.assertEqual(Whitespace().parse(" \t\t "), None) - - def test_parse_required(self): - self.assertRaises(RangeError, Whitespace(optional=False).parse, "X") - - def test_build(self): - self.assertEqual(Whitespace().build(None), " ") diff --git a/construct/tests/testall.py b/construct/tests/testall.py deleted file mode 100644 index e20e97a91..000000000 --- a/construct/tests/testall.py +++ /dev/null @@ -1,32 +0,0 @@ - -import os -from subprocess import call - - -basepath = os.path.abspath("..") - -def scan(path, failures): - if os.path.isdir(path): - for subpath in os.listdir(path): - scan(os.path.join(path, subpath), failures) - elif os.path.isfile(path) and path.endswith(".py"): - dirname, name = os.path.split(path) - os.chdir(dirname) - errorcode = call("python %s > %s 2> %s" % (name, os.devnull, os.devnull), shell=True) - if errorcode != 0: - failures.append((path, errorcode)) - -failures = [] -print "testing packages" - -scan(os.path.join(basepath, "formats"), failures) -scan(os.path.join(basepath, "protocols"), failures) - -if not failures: - print "success" -else: - print "%d errors:" % (len(failures),) - for fn, ec in failures: - print " %s" % (fn,) - - diff --git a/construct/tests/text.py b/construct/tests/text.py deleted file mode 100644 index b2cf9d788..000000000 --- a/construct/tests/text.py +++ /dev/null @@ -1,77 +0,0 @@ -from construct.text import * - - -ws = Whitespace(" \t\r\n") - -term = Select("term", - DecNumber("dec"), - Identifier("symbol"), - IndexingAdapter( - Sequence("expr", - Literal("("), - ws, - LazyBound("expr", lambda: expr), - ws, - Literal(")"), - ), - 0 - ), -) - -expr1 = Select("expr1", - Sequence("node", - term, - ws, - CharOf("binop", "*/"), - ws, - LazyBound("rhs", lambda: expr1), - ), - term, -) - -expr2 = Select("expr2", - Sequence("node", - expr1, - ws, - CharOf("binop", "+-"), - ws, - LazyBound("rhs", lambda: expr2), - ), - expr1, -) - -expr = expr2 - -def eval2(node): - if type(node) is int: - return node - lhs = eval2(node[0]) - op = node[1] - rhs = eval2(node[2]) - if op == "+": - return lhs + rhs - elif op == "-": - return lhs - rhs - elif op == "*": - return lhs * rhs - elif op == "/": - return lhs / rhs - assert False - -print expr.parse("(1 + 2)*3") -print eval2(expr.parse("(1 + 2)*3")) -print expr.build([[1, "+", 2], "*", 3]) - - - - - - - - - - - - - - diff --git a/construct/tests/unit.py b/construct/tests/unit.py deleted file mode 100644 index c046035c6..000000000 --- a/construct/tests/unit.py +++ /dev/null @@ -1,341 +0,0 @@ -import sys -from construct import * -from construct.text import * -from construct.lib import LazyContainer - - -# some tests require doing bad things... -import warnings -warnings.filterwarnings("ignore", category = DeprecationWarning) - - -# declarative to the bitter end! -tests = [ - # - # constructs - # - [MetaArray(lambda ctx: 3, UBInt8("metaarray")).parse, "\x01\x02\x03", [1,2,3], None], - [MetaArray(lambda ctx: 3, UBInt8("metaarray")).parse, "\x01\x02", None, ArrayError], - [MetaArray(lambda ctx: 3, UBInt8("metaarray")).build, [1,2,3], "\x01\x02\x03", None], - [MetaArray(lambda ctx: 3, UBInt8("metaarray")).build, [1,2], None, ArrayError], - - [Range(3, 5, UBInt8("range")).parse, "\x01\x02\x03", [1,2,3], None], - [Range(3, 5, UBInt8("range")).parse, "\x01\x02\x03\x04", [1,2,3,4], None], - [Range(3, 5, UBInt8("range")).parse, "\x01\x02\x03\x04\x05", [1,2,3,4,5], None], - [Range(3, 5, UBInt8("range")).parse, "\x01\x02", None, RangeError], - [Range(3, 5, UBInt8("range")).build, [1,2,3], "\x01\x02\x03", None], - [Range(3, 5, UBInt8("range")).build, [1,2,3,4], "\x01\x02\x03\x04", None], - [Range(3, 5, UBInt8("range")).build, [1,2,3,4,5], "\x01\x02\x03\x04\x05", None], - [Range(3, 5, UBInt8("range")).build, [1,2], None, RangeError], - [Range(3, 5, UBInt8("range")).build, [1,2,3,4,5,6], None, RangeError], - - [RepeatUntil(lambda obj, ctx: obj == 9, UBInt8("repeatuntil")).parse, "\x02\x03\x09", [2,3,9], None], - [RepeatUntil(lambda obj, ctx: obj == 9, UBInt8("repeatuntil")).parse, "\x02\x03\x08", None, ArrayError], - [RepeatUntil(lambda obj, ctx: obj == 9, UBInt8("repeatuntil")).build, [2,3,9], "\x02\x03\x09", None], - [RepeatUntil(lambda obj, ctx: obj == 9, UBInt8("repeatuntil")).build, [2,3,8], None, ArrayError], - - [Struct("struct", UBInt8("a"), UBInt16("b")).parse, "\x01\x00\x02", Container(a=1,b=2), None], - [Struct("struct", UBInt8("a"), UBInt16("b"), Struct("foo", UBInt8("c"), UBInt8("d"))).parse, "\x01\x00\x02\x03\x04", Container(a=1,b=2,foo=Container(c=3,d=4)), None], - [Struct("struct", UBInt8("a"), UBInt16("b"), Embedded(Struct("foo", UBInt8("c"), UBInt8("d")))).parse, "\x01\x00\x02\x03\x04", Container(a=1,b=2,c=3,d=4), None], - [Struct("struct", UBInt8("a"), UBInt16("b")).build, Container(a=1,b=2), "\x01\x00\x02", None], - [Struct("struct", UBInt8("a"), UBInt16("b"), Struct("foo", UBInt8("c"), UBInt8("d"))).build, Container(a=1,b=2,foo=Container(c=3,d=4)), "\x01\x00\x02\x03\x04", None], - [Struct("struct", UBInt8("a"), UBInt16("b"), Embedded(Struct("foo", UBInt8("c"), UBInt8("d")))).build, Container(a=1,b=2,c=3,d=4), "\x01\x00\x02\x03\x04", None], - - [Sequence("sequence", UBInt8("a"), UBInt16("b")).parse, "\x01\x00\x02", [1,2], None], - [Sequence("sequence", UBInt8("a"), UBInt16("b"), Sequence("foo", UBInt8("c"), UBInt8("d"))).parse, "\x01\x00\x02\x03\x04", [1,2,[3,4]], None], - [Sequence("sequence", UBInt8("a"), UBInt16("b"), Embedded(Sequence("foo", UBInt8("c"), UBInt8("d")))).parse, "\x01\x00\x02\x03\x04", [1,2,3,4], None], - [Sequence("sequence", UBInt8("a"), UBInt16("b")).build, [1,2], "\x01\x00\x02", None], - [Sequence("sequence", UBInt8("a"), UBInt16("b"), Sequence("foo", UBInt8("c"), UBInt8("d"))).build, [1,2,[3,4]], "\x01\x00\x02\x03\x04", None], - [Sequence("sequence", UBInt8("a"), UBInt16("b"), Embedded(Sequence("foo", UBInt8("c"), UBInt8("d")))).build, [1,2,3,4], "\x01\x00\x02\x03\x04", None], - - [Switch("switch", lambda ctx: 5, {1:UBInt8("x"), 5:UBInt16("y")}).parse, "\x00\x02", 2, None], - [Switch("switch", lambda ctx: 6, {1:UBInt8("x"), 5:UBInt16("y")}).parse, "\x00\x02", None, SwitchError], - [Switch("switch", lambda ctx: 6, {1:UBInt8("x"), 5:UBInt16("y")}, default = UBInt8("x")).parse, "\x00\x02", 0, None], - [Switch("switch", lambda ctx: 5, {1:UBInt8("x"), 5:UBInt16("y")}, include_key = True).parse, "\x00\x02", (5, 2), None], - [Switch("switch", lambda ctx: 5, {1:UBInt8("x"), 5:UBInt16("y")}).build, 2, "\x00\x02", None], - [Switch("switch", lambda ctx: 6, {1:UBInt8("x"), 5:UBInt16("y")}).build, 9, None, SwitchError], - [Switch("switch", lambda ctx: 6, {1:UBInt8("x"), 5:UBInt16("y")}, default = UBInt8("x")).build, 9, "\x09", None], - [Switch("switch", lambda ctx: 5, {1:UBInt8("x"), 5:UBInt16("y")}, include_key = True).build, ((5, 2),), "\x00\x02", None], - [Switch("switch", lambda ctx: 5, {1:UBInt8("x"), 5:UBInt16("y")}, include_key = True).build, ((89, 2),), None, SwitchError], - - [Select("select", UBInt32("a"), UBInt16("b"), UBInt8("c")).parse, "\x07", 7, None], - [Select("select", UBInt32("a"), UBInt16("b")).parse, "\x07", None, SelectError], - [Select("select", UBInt32("a"), UBInt16("b"), UBInt8("c"), include_name = True).parse, "\x07", ("c", 7), None], - [Select("select", UBInt32("a"), UBInt16("b"), UBInt8("c")).build, 7, "\x00\x00\x00\x07", None], - [Select("select", UBInt32("a"), UBInt16("b"), UBInt8("c"), include_name = True).build, (("c", 7),), "\x07", None], - [Select("select", UBInt32("a"), UBInt16("b"), UBInt8("c"), include_name = True).build, (("d", 7),), None, SelectError], - - [Peek(UBInt8("peek")).parse, "\x01", 1, None], - [Peek(UBInt8("peek")).parse, "", None, None], - [Peek(UBInt8("peek")).build, 1, "", None], - [Peek(UBInt8("peek"), perform_build = True).build, 1, "\x01", None], - [Struct("peek", Peek(UBInt8("a")), UBInt16("b")).parse, "\x01\x02", Container(a=1,b=0x102), None], - [Struct("peek", Peek(UBInt8("a")), UBInt16("b")).build, Container(a=1,b=0x102), "\x01\x02", None], - - [Value("value", lambda ctx: "moo").parse, "", "moo", None], - [Value("value", lambda ctx: "moo").build, None, "", None], - - [Anchor("anchor").parse, "", 0, None], - [Anchor("anchor").build, None, "", None], - - [LazyBound("lazybound", lambda: UBInt8("foo")).parse, "\x02", 2, None], - [LazyBound("lazybound", lambda: UBInt8("foo")).build, 2, "\x02", None], - - [Pass.parse, "", None, None], - [Pass.build, None, "", None], - - [Terminator.parse, "", None, None], - [Terminator.parse, "x", None, TerminatorError], - [Terminator.build, None, "", None], - - [Pointer(lambda ctx: 2, UBInt8("pointer")).parse, "\x00\x00\x07", 7, None], - [Pointer(lambda ctx: 2, UBInt8("pointer")).build, 7, "\x00\x00\x07", None], - - [OnDemand(UBInt8("ondemand")).parse("\x08").read, (), 8, None], - [Struct("ondemand", UBInt8("a"), OnDemand(UBInt8("b")), UBInt8("c")).parse, - "\x07\x08\x09", Container(a=7,b=LazyContainer(None, None, None, None),c=9), None], - [Struct("ondemand", UBInt8("a"), OnDemand(UBInt8("b"), advance_stream = False), UBInt8("c")).parse, - "\x07\x09", Container(a=7,b=LazyContainer(None, None, None, None),c=9), None], - - [OnDemand(UBInt8("ondemand")).build, 8, "\x08", None], - [Struct("ondemand", UBInt8("a"), OnDemand(UBInt8("b")), UBInt8("c")).build, - Container(a=7,b=8,c=9), "\x07\x08\x09", None], - [Struct("ondemand", UBInt8("a"), OnDemand(UBInt8("b"), force_build = False), UBInt8("c")).build, - Container(a=7,b=LazyContainer(None, None, None, None),c=9), "\x07\x00\x09", None], - [Struct("ondemand", UBInt8("a"), OnDemand(UBInt8("b"), force_build = False, advance_stream = False), UBInt8("c")).build, - Container(a=7,b=LazyContainer(None, None, None, None),c=9), "\x07\x09", None], - - [Struct("reconfig", Reconfig("foo", UBInt8("bar"))).parse, "\x01", Container(foo=1), None], - [Struct("reconfig", Reconfig("foo", UBInt8("bar"))).build, Container(foo=1), "\x01", None], - - [Buffered(UBInt8("buffered"), lambda x:x, lambda x:x, lambda x:x).parse, - "\x07", 7, None], - [Buffered(GreedyRange(UBInt8("buffered")), lambda x:x, lambda x:x, lambda x:x).parse, - "\x07", None, SizeofError], - [Buffered(UBInt8("buffered"), lambda x:x, lambda x:x, lambda x:x).build, - 7, "\x07", None], - [Buffered(GreedyRange(UBInt8("buffered")), lambda x:x, lambda x:x, lambda x:x).build, - [7], None, SizeofError], - - [Restream(UBInt8("restream"), lambda x:x, lambda x:x, lambda x:x).parse, - "\x07", 7, None], - [Restream(GreedyRepeater(UBInt8("restream")), lambda x:x, lambda x:x, lambda x:x).parse, - "\x07", [7], None], - [Restream(UBInt8("restream"), lambda x:x, lambda x:x, lambda x:x).parse, - "\x07", 7, None], - [Restream(GreedyRepeater(UBInt8("restream")), lambda x:x, lambda x:x, lambda x:x).parse, - "\x07", [7], None], - - # - # adapters - # - [BitIntegerAdapter(Field("bitintegeradapter", 8), 8).parse, "\x01" * 8, 255, None], - [BitIntegerAdapter(Field("bitintegeradapter", 8), 8, signed = True).parse, "\x01" * 8, -1, None], - [BitIntegerAdapter(Field("bitintegeradapter", 8), 8, swapped = True, bytesize = 4).parse, - "\x01" * 4 + "\x00" * 4, 0x0f, None], - [BitIntegerAdapter(Field("bitintegeradapter", 8), 8).build, 255, "\x01" * 8, None], - [BitIntegerAdapter(Field("bitintegeradapter", 8), 8).build, -1, None, BitIntegerError], - [BitIntegerAdapter(Field("bitintegeradapter", 8), 8, signed = True).build, -1, "\x01" * 8, None], - [BitIntegerAdapter(Field("bitintegeradapter", 8), 8, swapped = True, bytesize = 4).build, - 0x0f, "\x01" * 4 + "\x00" * 4, None], - - [MappingAdapter(UBInt8("mappingadapter"), {2:"x",3:"y"}, {"x":2,"y":3}).parse, - "\x03", "y", None], - [MappingAdapter(UBInt8("mappingadapter"), {2:"x",3:"y"}, {"x":2,"y":3}).parse, - "\x04", None, MappingError], - [MappingAdapter(UBInt8("mappingadapter"), {2:"x",3:"y"}, {"x":2,"y":3}, decdefault="foo").parse, - "\x04", "foo", None], - [MappingAdapter(UBInt8("mappingadapter"), {2:"x",3:"y"}, {"x":2,"y":3}, decdefault=Pass).parse, - "\x04", 4, None], - [MappingAdapter(UBInt8("mappingadapter"), {2:"x",3:"y"}, {"x":2,"y":3}).build, - "y", "\x03", None], - [MappingAdapter(UBInt8("mappingadapter"), {2:"x",3:"y"}, {"x":2,"y":3}).build, - "z", None, MappingError], - [MappingAdapter(UBInt8("mappingadapter"), {2:"x",3:"y"}, {"x":2,"y":3}, encdefault=17).build, - "foo", "\x11", None], - [MappingAdapter(UBInt8("mappingadapter"), {2:"x",3:"y"}, {"x":2,"y":3}, encdefault=Pass).build, - 4, "\x04", None], - - [FlagsAdapter(UBInt8("flagsadapter"), {"a":1,"b":2,"c":4,"d":8,"e":16,"f":32,"g":64,"h":128}).parse, - "\x81", Container(a=True, b=False,c=False,d=False,e=False,f=False,g=False,h=True), None], - [FlagsAdapter(UBInt8("flagsadapter"), {"a":1,"b":2,"c":4,"d":8,"e":16,"f":32,"g":64,"h":128}).build, - Container(a=True, b=False,c=False,d=False,e=False,f=False,g=False,h=True), "\x81", None], - - [IndexingAdapter(Array(3, UBInt8("indexingadapter")), 2).parse, "\x11\x22\x33", 0x33, None], - [IndexingAdapter(Array(3, UBInt8("indexingadapter")), 2)._encode, (0x33, {}), [None, None, 0x33], None], - - [SlicingAdapter(Array(3, UBInt8("indexingadapter")), 1, 3).parse, "\x11\x22\x33", [0x22, 0x33], None], - [SlicingAdapter(Array(3, UBInt8("indexingadapter")), 1, 3)._encode, ([0x22, 0x33], {}), [None, 0x22, 0x33], None], - - [PaddingAdapter(Field("paddingadapter", 4)).parse, "abcd", "abcd", None], - [PaddingAdapter(Field("paddingadapter", 4), strict = True).parse, "abcd", None, PaddingError], - [PaddingAdapter(Field("paddingadapter", 4), strict = True).parse, "\x00\x00\x00\x00", "\x00\x00\x00\x00", None], - [PaddingAdapter(Field("paddingadapter", 4)).build, "abcd", "\x00\x00\x00\x00", None], - - [LengthValueAdapter(Sequence("lengthvalueadapter", UBInt8("length"), Field("value", lambda ctx: ctx.length))).parse, - "\x05abcde", "abcde", None], - [LengthValueAdapter(Sequence("lengthvalueadapter", UBInt8("length"), Field("value", lambda ctx: ctx.length))).build, - "abcde", "\x05abcde", None], - - [TunnelAdapter(PascalString("data", encoding = "zlib"), GreedyRange(UBInt16("elements"))).parse, - "\rx\x9cc`f\x18\x16\x10\x00u\xf8\x01-", [3] * 100, None], - [TunnelAdapter(PascalString("data", encoding = "zlib"), GreedyRange(UBInt16("elements"))).build, - [3] * 100, "\rx\x9cc`f\x18\x16\x10\x00u\xf8\x01-", None], - - [Const(Field("const", 2), "MZ").parse, "MZ", "MZ", None], - [Const(Field("const", 2), "MZ").parse, "MS", None, ConstError], - [Const(Field("const", 2), "MZ").build, "MZ", "MZ", None], - [Const(Field("const", 2), "MZ").build, "MS", None, ConstError], - [Const(Field("const", 2), "MZ").build, None, "MZ", None], - - [ExprAdapter(UBInt8("expradapter"), - encoder = lambda obj, ctx: obj / 7, - decoder = lambda obj, ctx: obj * 7).parse, - "\x06", 42, None], - [ExprAdapter(UBInt8("expradapter"), - encoder = lambda obj, ctx: obj / 7, - decoder = lambda obj, ctx: obj * 7).build, - 42, "\x06", None], - # - # text - # - [QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = "-").parse, - "{hello-} world}", "hello} world", None], - [QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None).parse, - "{hello-} world}", "hello-", None], - [QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None, allow_eof = True).parse, - "{hello world", "hello world", None], - [QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None, allow_eof = False).parse, - "{hello world", None, FieldError], - [QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = "-").build, - "hello} world", "{hello-} world}", None], - [QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None).build, - "hello}", None, QuotedStringError], - - [Identifier("identifier").parse, "ab_c8 XXX", "ab_c8", None], - [Identifier("identifier").parse, "_c8 XXX", "_c8", None], - [Identifier("identifier").parse, "2c8 XXX", None, ValidationError], - [Identifier("identifier").build, "ab_c8", "ab_c8", None], - [Identifier("identifier").build, "_c8", "_c8", None], - [Identifier("identifier").build, "2c8", None, ValidationError], - - [TextualIntAdapter(Field("textintadapter", 3)).parse, "234", 234, None], - [TextualIntAdapter(Field("textintadapter", 3), radix = 16).parse, "234", 0x234, None], - [TextualIntAdapter(Field("textintadapter", 3)).build, 234, "234", None], - [TextualIntAdapter(Field("textintadapter", 3), radix = 16).build, 0x234, "234", None], - # [TextualIntAdapter(Field("textintadapter", 3)).build, 23, "023", None], - - [StringUpto("stringupto", "XY").parse, "helloX", "hello", None], - [StringUpto("stringupto", "XY").parse, "helloY", "hello", None], - [StringUpto("stringupto", "XY").build, "helloX", "hello", None], - - # - # macros - # - [Aligned(UBInt8("aligned")).parse, "\x01\x00\x00\x00", 1, None], - [Aligned(UBInt8("aligned")).build, 1, "\x01\x00\x00\x00", None], - [Struct("aligned", Aligned(UBInt8("a")), UBInt8("b")).parse, - "\x01\x00\x00\x00\x02", Container(a=1,b=2), None], - [Struct("aligned", Aligned(UBInt8("a")), UBInt8("b")).build, - Container(a=1,b=2), "\x01\x00\x00\x00\x02", None], - - [Bitwise(Field("bitwise", 8)).parse, "\xff", "\x01" * 8, None], - [Bitwise(Field("bitwise", lambda ctx: 8)).parse, "\xff", "\x01" * 8, None], - [Bitwise(Field("bitwise", 8)).build, "\x01" * 8, "\xff", None], - [Bitwise(Field("bitwise", lambda ctx: 8)).build, "\x01" * 8, "\xff", None], - - [Union("union", - UBInt32("a"), - Struct("b", UBInt16("a"), UBInt16("b")), - BitStruct("c", Padding(4), Octet("a"), Padding(4)), - Struct("d", UBInt8("a"), UBInt8("b"), UBInt8("c"), UBInt8("d")), - Embedded(Struct("q", UBInt8("e"))), - ).parse, - "\x11\x22\x33\x44", - Container(a=0x11223344, - b=Container(a=0x1122, b=0x3344), - c=Container(a=0x12), - d=Container(a=0x11, b=0x22, c=0x33, d=0x44), - e=0x11, - ), - None], - [Union("union", - UBInt32("a"), - Struct("b", UBInt16("a"), UBInt16("b")), - BitStruct("c", Padding(4), Octet("a"), Padding(4)), - Struct("d", UBInt8("a"), UBInt8("b"), UBInt8("c"), UBInt8("d")), - Embedded(Struct("q", UBInt8("e"))), - ).build, - Container(a=0x11223344, - b=Container(a=0x1122, b=0x3344), - c=Container(a=0x12), - d=Container(a=0x11, b=0x22, c=0x33, d=0x44), - e=0x11, - ), - "\x11\x22\x33\x44", - None], - - [Enum(UBInt8("enum"),q=3,r=4,t=5).parse, "\x04", "r", None], - [Enum(UBInt8("enum"),q=3,r=4,t=5).parse, "\x07", None, MappingError], - [Enum(UBInt8("enum"),q=3,r=4,t=5, _default_ = "spam").parse, "\x07", "spam", None], - [Enum(UBInt8("enum"),q=3,r=4,t=5, _default_ =Pass).parse, "\x07", 7, None], - [Enum(UBInt8("enum"),q=3,r=4,t=5).build, "r", "\x04", None], - [Enum(UBInt8("enum"),q=3,r=4,t=5).build, "spam", None, MappingError], - [Enum(UBInt8("enum"),q=3,r=4,t=5, _default_ = 9).build, "spam", "\x09", None], - [Enum(UBInt8("enum"),q=3,r=4,t=5, _default_ =Pass).build, 9, "\x09", None], - - [PrefixedArray(UBInt8("array"), UBInt8("count")).parse, "\x03\x01\x01\x01", [1,1,1], None], - [PrefixedArray(UBInt8("array"), UBInt8("count")).parse, "\x03\x01\x01", None, ArrayError], - [PrefixedArray(UBInt8("array"), UBInt8("count")).build, [1,1,1], "\x03\x01\x01\x01", None], - - [IfThenElse("ifthenelse", lambda ctx: True, UBInt8("then"), UBInt16("else")).parse, - "\x01", 1, None], - [IfThenElse("ifthenelse", lambda ctx: False, UBInt8("then"), UBInt16("else")).parse, - "\x00\x01", 1, None], - [IfThenElse("ifthenelse", lambda ctx: True, UBInt8("then"), UBInt16("else")).build, - 1, "\x01", None], - [IfThenElse("ifthenelse", lambda ctx: False, UBInt8("then"), UBInt16("else")).build, - 1, "\x00\x01", None], - - [Magic("MZ").parse, "MZ", "MZ", None], - [Magic("MZ").parse, "ELF", None, ConstError], - [Magic("MZ").build, None, "MZ", None], -] - - -def run_tests(tests): - errors = [] - for func, args, res, exctype in tests: - if type(args) is not tuple: - args = (args,) - try: - r = func(*args) - except: - t, ex, tb = sys.exc_info() - if exctype is None: - errors.append("[%s]: unexpected exception %r" % (func, ex)) - continue - if t is not exctype: - errors.append("[%s]: raised %r, expected %r" % (func, t, exctype)) - continue - else: - if exctype is not None: - errors.append("[%s]: expected exception %r" % (func, exctype)) - continue - if r != res: - errors.append("[%s]: returned %r, expected %r" % (func, r, res)) - continue - return errors - - -def run_all(): - errors = run_tests(tests) - if not errors: - print "success" - else: - print "errors:" - for e in errors: - print " ", e - -if __name__ == "__main__": - run_all() diff --git a/construct/text/__init__.py b/construct/text/__init__.py deleted file mode 100644 index 74b83d08e..000000000 --- a/construct/text/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from common import * -from ast import * - - diff --git a/construct/text/ast.py b/construct/text/ast.py deleted file mode 100644 index 1baa29e5a..000000000 --- a/construct/text/ast.py +++ /dev/null @@ -1,18 +0,0 @@ -from construct.core import Container -from construct.adapters import Adapter - -class AstNode(Container): - def __init__(self, nodetype, **kw): - Container.__init__(self) - self.nodetype = nodetype - for k, v in sorted(kw.iteritems()): - setattr(self, k, v) - - def accept(self, visitor): - return getattr(visitor, "visit_%s" % (self.nodetype,))(self) - -class AstTransformator(Adapter): - def _decode(self, obj, context): - return self.to_ast(obj, context) - def _encode(self, obj, context): - return self.to_cst(obj, context) diff --git a/construct/text/common.py b/construct/text/common.py deleted file mode 100644 index a1cfea6d2..000000000 --- a/construct/text/common.py +++ /dev/null @@ -1,331 +0,0 @@ -""" -common constructs for typical programming languages (numbers, strings, ...) -""" -from construct.core import (Construct, ConstructError, FieldError, - SizeofError) -from construct.adapters import (Adapter, StringAdapter, IndexingAdapter, - ConstAdapter, OneOf, NoneOf) -from construct.macros import (Field, OptionalGreedyRange, GreedyRange, - Sequence, Optional) - - -#=============================================================================== -# exceptions -#=============================================================================== -class QuotedStringError(ConstructError): - __slots__ = [] - - -#=============================================================================== -# constructs -#=============================================================================== -class QuotedString(Construct): - r""" - A quoted string (begins with an opening-quote, terminated by a - closing-quote, which may be escaped by an escape character) - - Parameters: - * name - the name of the field - * start_quote - the opening quote character. default is '"' - * end_quote - the closing quote character. default is '"' - * esc_char - the escape character, or None to disable escaping. defualt - is "\" (backslash) - * encoding - the character encoding (e.g., "utf8"), or None to return - raw bytes. defualt is None. - * allow_eof - whether to allow EOF before the closing quote is matched. - if False, an exception will be raised when EOF is reached by the closing - quote is missing. default is False. - - Example: - QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None) - """ - __slots__ = [ - "start_quote", "end_quote", "char", "esc_char", "encoding", - "allow_eof" - ] - def __init__(self, name, start_quote = '"', end_quote = None, - esc_char = '\\', encoding = None, allow_eof = False): - Construct.__init__(self, name) - if end_quote is None: - end_quote = start_quote - self.start_quote = Literal(start_quote) - self.char = Char("char") - self.end_quote = end_quote - self.esc_char = esc_char - self.encoding = encoding - self.allow_eof = allow_eof - - def _parse(self, stream, context): - self.start_quote._parse(stream, context) - text = [] - escaped = False - try: - while True: - ch = self.char._parse(stream, context) - if ch == self.esc_char: - if escaped: - text.append(ch) - escaped = False - else: - escaped = True - elif ch == self.end_quote and not escaped: - break - else: - text.append(ch) - escaped = False - except FieldError: - if not self.allow_eof: - raise - text = "".join(text) - if self.encoding is not None: - text = text.decode(self.encoding) - return text - - def _build(self, obj, stream, context): - self.start_quote._build(None, stream, context) - if self.encoding: - obj = obj.encode(self.encoding) - for ch in obj: - if ch == self.esc_char: - self.char._build(self.esc_char, stream, context) - elif ch == self.end_quote: - if self.esc_char is None: - raise QuotedStringError("found ending quote in data, " - "but no escape char defined", ch) - else: - self.char._build(self.esc_char, stream, context) - self.char._build(ch, stream, context) - self.char._build(self.end_quote, stream, context) - - def _sizeof(self, context): - raise SizeofError("can't calculate size") - - -#=============================================================================== -# macros -#=============================================================================== -class WhitespaceAdapter(Adapter): - """ - Adapter for whitespace sequences; do not use directly. - See Whitespace. - - Parameters: - * subcon - the subcon to adapt - * build_char - the character used for encoding (building) - """ - __slots__ = ["build_char"] - def __init__(self, subcon, build_char): - Adapter.__init__(self, subcon) - self.build_char = build_char - def _encode(self, obj, context): - return self.build_char - def _decode(self, obj, context): - return None - -def Whitespace(charset = " \t", optional = True): - """whitespace (space that is ignored between tokens). when building, the - first character of the charset is used. - * charset - the set of characters that are considered whitespace. default - is space and tab. - * optional - whether or not whitespace is optional. default is True. - """ - con = CharOf(None, charset) - if optional: - con = OptionalGreedyRange(con) - else: - con = GreedyRange(con) - return WhitespaceAdapter(con, build_char = charset[0]) - -def Literal(text): - """matches a literal string in the text - * text - the text (string) to match - """ - return ConstAdapter(Field(None, len(text)), text) - -def Char(name): - """a one-byte character""" - return Field(name, 1) - -def CharOf(name, charset): - """matches only characters of a given charset - * name - the name of the field - * charset - the set of valid characters - """ - return OneOf(Char(name), charset) - -def CharNoneOf(name, charset): - """matches only characters that do not belong to a given charset - * name - the name of the field - * charset - the set of invalid characters - """ - return NoneOf(Char(name), charset) - -def Alpha(name): - """a letter character (A-Z, a-z)""" - return CharOf(name, set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')) - -def Digit(name): - """a digit character (0-9)""" - return CharOf(name, set('0123456789')) - -def AlphaDigit(name): - """an alphanumeric character (A-Z, a-z, 0-9)""" - return CharOf(name, set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")) - -def BinDigit(name): - """a binary digit (0-1)""" - return CharOf(name, set('01')) - -def HexDigit(name): - """a hexadecimal digit (0-9, A-F, a-f)""" - return CharOf(name, set('0123456789abcdefABCDEF')) - -def Word(name): - """a sequence of letters""" - return StringAdapter(GreedyRange(Alpha(name))) - -class TextualIntAdapter(Adapter): - """ - Adapter for textual integers - - Parameters: - * subcon - the subcon to adapt - * radix - the base of the integer (decimal, hexadecimal, binary, ...) - * digits - the sequence of digits of that radix - """ - __slots__ = ["radix", "digits"] - def __init__(self, subcon, radix = 10, digits = "0123456789abcdef"): - Adapter.__init__(self, subcon) - if radix > len(digits): - raise ValueError("not enough digits for radix %d" % (radix,)) - self.radix = radix - self.digits = digits - def _encode(self, obj, context): - chars = [] - if obj < 0: - chars.append("-") - n = -obj - else: - n = obj - r = self.radix - digs = self.digits - while n > 0: - n, d = divmod(n, r) - chars.append(digs[d]) - # obj2 = "".join(reversed(chars)) - # filler = digs[0] * (self._sizeof(context) - len(obj2)) - # return filler + obj2 - return "".join(reversed(chars)) - def _decode(self, obj, context): - return int("".join(obj), self.radix) - -def DecNumber(name): - """decimal number""" - return TextualIntAdapter(GreedyRange(Digit(name))) - -def BinNumber(name): - """binary number""" - return TextualIntAdapter(GreedyRange(BinDigit(name)), 2) - -def HexNumber(name): - """hexadecimal number""" - return TextualIntAdapter(GreedyRange(HexDigit(name)), 16) - -class TextualFloatAdapter(Adapter): - def _decode(self, obj, context): - whole, frac, exp = obj - mantissa = "".join(whole) + "." + "".join(frac) - if exp: - sign, value = exp - if not sign: - sign = "" - return float(mantissa + "e" + sign + "".join(value)) - else: - return float(mantissa) - def _encode(self, obj, context): - obj = str(obj) - exp = None - if "e" in obj: - obj, exp = obj.split("e") - sign = exp[0] - value = exp[1:] - exp = [sign, value] - whole, frac = obj.split(".") - return [whole, frac, exp] - -def FloatNumber(name): - return TextualFloatAdapter( - Sequence(name, - GreedyRange(Digit("whole")), - Literal("."), - GreedyRange(Digit("frac")), - Optional( - Sequence("exp", - Literal("e"), - Optional(CharOf("sign", "+-")), - GreedyRange(Digit("value")), - ) - ) - ) - ) - -def StringUpto(name, terminators, consume_terminator = False, allow_eof = True): - """a string that stretches up to a terminator, or EOF. this is a more - flexible version of CString. - * name - the name of the field - * terminator - the set of terminator characters - * consume_terminator - whether to consume the terminator character. the - default is False. - * allow_eof - whether to allow EOF to terminate the string. the default - is True. this option is applicable only if consume_terminator is set. - """ - con = StringAdapter(OptionalGreedyRange(CharNoneOf(name, terminators))) - if not consume_terminator: - return con - if allow_eof: - term = Optional(CharOf(None, terminators)) - else: - term = CharOf(None, terminators) - return IndexingAdapter(Sequence("foo", con, term), index = 0) - -def Line(name, consume_terminator = True, allow_eof = True): - r"""a textual line (up to "\n") - * name - the name of the field - * consume_terminator - whether to consume the newline character. the - default is True. - * allow_eof - whether to allow EOF to terminate the string. the default - is True. this option is applicable only if consume_terminator is set. - """ - return StringUpto(name, "\n", - consume_terminator = consume_terminator, - allow_eof = allow_eof - ) - -class IdentifierAdapter(Adapter): - """ - Adapter for programmatic identifiers - - Parameters: - * subcon - the subcon to adapt - """ - def _encode(self, obj, context): - return obj[0], obj[1:] - def _decode(self, obj, context): - return obj[0] + "".join(obj[1]) - -def Identifier(name, - headset = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"), - tailset = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_") - ): - """a programmatic identifier (symbol). must start with a char of headset, - followed by a sequence of tailset characters - * name - the name of the field - * headset - charset for the first character. default is A-Z, a-z, and _ - * tailset - charset for the tail. default is A-Z, a-z, 0-9 and _ - """ - return IdentifierAdapter( - Sequence(name, - CharOf("head", headset), - OptionalGreedyRange(CharOf("tail", tailset)), - ) - ) diff --git a/construct/text/test.py b/construct/text/test.py deleted file mode 100644 index d33b5bd6c..000000000 --- a/construct/text/test.py +++ /dev/null @@ -1,306 +0,0 @@ -from construct import * -from construct.text import * - - - -#=============================================================================== -# AST transfomations -#=============================================================================== -class NumberTransformator(AstTransformator): - def to_ast(self, obj, context): - return AstNode("number", value = obj) - -class StringTransformator(AstTransformator): - def to_ast(self, obj, context): - return AstNode("string", value = obj) - -class SymbolTransformator(AstTransformator): - keywords = set([ - "if", "for", "while", "else", "def", "import", "in", "and", "or", - "not", "as", "from", "return", "const", "var", - ]) - def to_ast(self, obj, context): - if obj in self.keywords: - return AstNode("error", - message = "reserved word used as a symbol", - args = [obj] - ) - else: - return AstNode("symbol", name = obj) - -class CommentTransformator(AstTransformator): - def to_ast(self, obj, context): - return AstNode("comment", text = obj) - -class CallTransformator(AstTransformator): - def to_ast(self, obj, context): - symbol, args, lastarg = obj - args.append(lastarg) - return AstNode("call", name = symbol, args = args) - -class ExprTransformator(AstTransformator): - def to_ast(self, obj, context): - lhs, rhs = obj - if rhs is None: - return lhs - else: - op, rhs = rhs - return AstNode("expr", lhs = lhs, op = op, rhs = rhs) - -class VardefTransformator(AstTransformator): - def to_ast(self, obj, context): - args, lastarg = obj - vars = [] - for name, type, init in args: - args.append((name, type, init)) - name, type, init = lastarg - vars.append((name, type, init)) - return AstNode("vardef", vars = vars) - -class AsgnTransformator(AstTransformator): - def to_ast(self, obj, context): - name, expr = obj - return AstNode("asgnstmt", name = name, expr = expr) - -class IfTransformator(AstTransformator): - def to_ast(self, obj, context): - return AstNode("ifstmt", - cond = obj.cond, - thencode = obj.thencode, - elsecode = obj.elsecode - ) - -class WhileTransformator(AstTransformator): - def to_ast(self, obj, context): - return AstNode("whilestmt", cond = obj.cond, code = obj.code) - -class BlockTransformator(AstTransformator): - def to_ast(self, obj, context): - return AstNode("block", statements = obj) - -class RootTransformator(AstTransformator): - def to_ast(self, obj, context): - return AstNode("root", statements = obj) - - -#=============================================================================== -# macros -#=============================================================================== -def OptSeq(name, *subcons): - return Optional(Sequence(name, *subcons)) - -def SeqOfOne(name, *subcons): - return IndexingAdapter(Sequence(name, *subcons), index = 0) - -def OptSeqOfOne(name, *subcons): - return Optional(SeqOfOne(name, *subcons)) - -def Expr(name): - return LazyBound(name, lambda: expr2) - - -#=============================================================================== -# grammar -#=============================================================================== -ws = Whitespace(" \t\r\n") -rws = Whitespace(" \t\r\n", optional = False) - -number = NumberTransformator( - Select("num", - FloatNumber("flt"), - SeqOfOne("hex", - Literal("0x"), - HexNumber("value"), - ), - DecNumber("dec"), - ) -) - -symbol = SymbolTransformator(Identifier("symbol")) - -call = CallTransformator( - Sequence("call", - symbol, - ws, - Literal("("), - OptionalGreedyRange( - SeqOfOne("args", - Expr("expr"), - Literal(","), - ) - ), - Optional(Expr("expr")), - Literal(")"), - ) -) - -comment = CommentTransformator( - SeqOfOne("comment", - Literal("/*"), - StringUpto("text", "*/"), - Literal("*/"), - ) -) - -term = SeqOfOne("term", - ws, - Select("term", - number, - call, - symbol, - SeqOfOne("subexpr", - Literal("("), - Expr("subexpr"), - Literal(")"), - ) - ), - ws, -) - -expr1 = ExprTransformator( - Sequence("expr1", - term, - OptSeq("rhs", - CharOf("op", "*/"), - LazyBound("expr1", lambda: expr1), - ) - ) -) -expr2 = ExprTransformator( - Sequence("expr2", - expr1, - OptSeq("rhs", - CharOf("op", "+-"), - LazyBound("expr2", lambda: expr2), - ) - ) -) - -asgnstmt = AsgnTransformator( - Sequence("asgnstmt", - symbol, - ws, - Literal("="), - Expr("expr"), - Literal(";"), - ) -) - -vardef_elem = Sequence("vardef_elem", - Identifier("name"), - ws, - Literal("as"), - ws, - Identifier("type"), - OptSeqOfOne("init", - ws, - Literal("="), - Expr("expr"), - ) -) -vardef = VardefTransformator( - Sequence("vardef", - Literal("var"), - rws, - OptionalGreedyRange( - SeqOfOne("names", - ws, - vardef_elem, - ws, - Literal(","), - ) - ), - ws, - vardef_elem, - ws, - Literal(";"), - ) -) - -stmt = SeqOfOne("stmt", - ws, - Select("stmt", - comment, - LazyBound("if", lambda: ifstmt), - LazyBound("while", lambda: whilestmt), - asgnstmt, - vardef, - SeqOfOne("expr", - Expr("expr"), - Literal(";") - ), - ), - ws, -) - -def Block(name): - return BlockTransformator( - Select(name, - SeqOfOne("multi", - ws, - Literal("{"), - OptionalGreedyRange(stmt), - Literal("}"), - ws, - ), - Sequence("single", stmt), - ) - ) - -ifstmt = IfTransformator( - Struct("ifstmt", - Literal("if"), - ws, - Literal("("), - Expr("cond"), - Literal(")"), - Block("thencode"), - Optional( - SeqOfOne("elsecode", - Literal("else"), - Block("code"), - ) - ), - ) -) - -whilestmt = WhileTransformator( - Struct("whilestmt", - Literal("while"), - ws, - Literal("("), - Expr("cond"), - Literal(")"), - Block("code"), - ) -) - -root = RootTransformator( - OptionalGreedyRange(stmt) -) - -test = """var x as int, y as int;""" - -print vardef.parse(test) - - - - - - - - - - - - - - - - - - - - - - diff --git a/construct/version.py b/construct/version.py new file mode 100644 index 000000000..11272fbe0 --- /dev/null +++ b/construct/version.py @@ -0,0 +1,3 @@ +version = (2,10,70) +version_string = "2.10.70" +release_date = "2023.11.29" diff --git a/deprecated_gallery/__init__.py b/deprecated_gallery/__init__.py new file mode 100644 index 000000000..54d972e65 --- /dev/null +++ b/deprecated_gallery/__init__.py @@ -0,0 +1,12 @@ +from .emf import emf_file +from .png import png_file +from .bmp import bitmap_file +from .wmf import wmf_file +from .gif import gif_file +from .mbr import mbr_format +from .cap import cap_file +from .snoop import snoop_file +from .pe32 import pe32_file +from .elf32 import elf32_file + +from .ipstack import ip_stack, dns, tcp_header, udp_header, dhcp4_header, dhcp6_message, icmp_header, igmpv2_header, ipv4_header, ipv6_header, arp_header, ethernet_header, MacAddress, IpAddress, Ipv6Address diff --git a/deprecated_gallery/bmp.py b/deprecated_gallery/bmp.py new file mode 100644 index 000000000..fd82c4b07 --- /dev/null +++ b/deprecated_gallery/bmp.py @@ -0,0 +1,81 @@ +""" +Windows/OS2 Bitmap (BMP) this could have been a perfect show-case file format, but they had to make it ugly (all sorts of alignments) +""" + +from construct import * + +#=============================================================================== +# pixels: uncompressed +#=============================================================================== +def UncompressedRows(subcon, align_to_byte=False): + """argh! lines must be aligned to a 4-byte boundary, and bit-pixel lines must be aligned to full bytes...""" + if align_to_byte: + line_pixels = Bitwise(Aligned(8, Array(this.width, subcon))) + else: + line_pixels = Array(this.width, subcon) + return Array(this.height, Aligned(4, line_pixels)) + +uncompressed_pixels = Switch(this.bpp, + { + 1 : UncompressedRows(Bit, align_to_byte=True), # index + 4 : UncompressedRows(Nibble, align_to_byte=True), # index + 8 : UncompressedRows(Byte), # index + 24 : UncompressedRows(Byte[3]), # rgb + } +) + +#=============================================================================== +# pixels: Run Length Encoding (RLE) 8 bit +#=============================================================================== +class RunLengthAdapter(Adapter): + def _decode(self, obj, context, path): + length,value = obj + return [value] * length + def _encode(self, obj, context, path): + return len(obj), obj[0] + +# WARNING: not used anywhere +rle8pixel = "rle8pixel" / RunLengthAdapter(Byte >> Byte) + +#=============================================================================== +# file structure +#=============================================================================== +bitmap_file = Struct( + "signature" / Const(b"BM"), + "file_size" / Int32ul, + Padding(4), + "data_offset" / Int32ul, + "header_size" / Int32ul, + "version" / Enum(Computed(this.header_size), + v2 = 12, + v3 = 40, + v4 = 108, + ), + "width" / Int32ul, + "height" / Int32ul, + "number_of_pixels" / Computed(this.width * this.height), + "planes" / Int16ul, + "bpp" / Int16ul, # bits per pixel + "compression" / Enum(Int32ul, + Uncompressed = 0, + RLE8 = 1, + RLE4 = 2, + Bitfields = 3, + JPEG = 4, + PNG = 5, + ), + "image_data_size" / Int32ul, # in bytes + "horizontal_dpi" / Int32ul, + "vertical_dpi" / Int32ul, + "colors_used" / Int32ul, + "important_colors" / Int32ul, + + # palette (24 bit has no palette) + # NOTE: was called "rgb" inside of it + "palette" / Array(lambda ctx: 2**ctx.bpp if ctx.bpp <= 8 else 0, + Padded(4, Byte[3])), + + "pixels" / Pointer(this.data_offset, + Switch(this.compression, {"Uncompressed" : uncompressed_pixels}), + ), +) diff --git a/deprecated_gallery/cap.py b/deprecated_gallery/cap.py new file mode 100644 index 000000000..0dd1562e3 --- /dev/null +++ b/deprecated_gallery/cap.py @@ -0,0 +1,31 @@ +############################################################## +# WARNING: HEADER IS SKIPPED NOT PARSED, DATETIME CAN BE WRONG +# +# https://wiki.wireshark.org/Development/LibpcapFileFormat +############################################################## + +from construct import * +import time, datetime + + +# use core Timestamp +class MicrosecAdapter(Adapter): + def _decode(self, obj, context, path): + return datetime.datetime.fromtimestamp(obj[0] + obj[1] / 1000000.) + def _encode(self, obj, context, path): + epoch = datetime.datetime.utcfromtimestamp(0) + return [int((obj-epoch).total_seconds()), 0] + + # offset = time.mktime(*obj.timetuple()) + # sec = int(offset) + # usec = (offset - sec) * 1000000 + # return (sec, usec) + +packet = Struct( + "time" / MicrosecAdapter(Int32ul >> Int32ul), + "length" / Int32ul, + Padding(4), + "data" / Bytes(this.length), +) + +cap_file = Padded(24, GreedyRange(packet)) diff --git a/deprecated_gallery/elf32.py b/deprecated_gallery/elf32.py new file mode 100644 index 000000000..f28c77864 --- /dev/null +++ b/deprecated_gallery/elf32.py @@ -0,0 +1,124 @@ +""" +Executable and Linkable Format (ELF), 32 bit, big or little endian. +Used on Unix systems as a replacement of the older a.out format. + +Big-endian support kindly submitted by Craig McQueen (mcqueen-c#edsrd1!yzk!co!jp). +""" + +from construct import * + + +def elf32_body(ElfInt16, ElfInt32): + + elf32_program_header = Struct( + "type" / Enum(ElfInt32, + NULL = 0, + LOAD = 1, + DYNAMIC = 2, + INTERP = 3, + NOTE = 4, + SHLIB = 5, + PHDR = 6, + ), + "offset" / ElfInt32, + "vaddr" / ElfInt32, + "paddr" / ElfInt32, + "file_size" / ElfInt32, + "mem_size" / ElfInt32, + "flags" / ElfInt32, + "align" / ElfInt32, + ) + + elf32_section_header = Struct( + "name_offset" / ElfInt32, + "name" / Pointer(this._.strtab_data_offset + this.name_offset, + CString("utf8")), + "type" / Enum(ElfInt32, + NULL = 0, + PROGBITS = 1, + SYMTAB = 2, + STRTAB = 3, + RELA = 4, + HASH = 5, + DYNAMIC = 6, + NOTE = 7, + NOBITS = 8, + REL = 9, + SHLIB = 10, + DYNSYM = 11, + ), + "flags" / ElfInt32, + "addr" / ElfInt32, + "offset" / ElfInt32, + "size" / ElfInt32, + "link" / ElfInt32, + "info" / ElfInt32, + "align" / ElfInt32, + "entry_size" / ElfInt32, + "data" / Pointer(this.offset, + Bytes(this.size)), + ) + + return Struct( + "type" / Enum(ElfInt16, + NONE = 0, + RELOCATABLE = 1, + EXECUTABLE = 2, + SHARED = 3, + CORE = 4, + ), + "machine" / Enum(ElfInt16, + NONE = 0, + M32 = 1, + SPARC = 2, + I386 = 3, + Motorolla68K = 4, + Motorolla88K = 5, + Intel860 = 7, + MIPS = 8, + ), + "version" / ElfInt32, + "entry" / ElfInt32, + "ph_offset" / ElfInt32, + "sh_offset" / ElfInt32, + "flags" / ElfInt32, + "header_size" / ElfInt16, + "ph_entry_size" / ElfInt16, + "ph_count" / ElfInt16, + "sh_entry_size" / ElfInt16, + "sh_count" / ElfInt16, + "strtab_section_index" / ElfInt16, + + # calculate the string table data offset (pointer arithmetics) + # ugh... anyway, we need it in order to read the section names, later on + "strtab_data_offset" / Pointer(this.sh_offset + this.strtab_section_index * this.sh_entry_size + 16, + ElfInt32), + + "program_table" / Pointer(this.ph_offset, + elf32_program_header[this.ph_count]), + + "sections" / Pointer(this.sh_offset, + elf32_section_header[this.sh_count]), + ) + +elf32_file = Struct( + "identifier" / Struct( + Const(b"\x7fELF"), + "file_class" / Enum(Byte, + NONE = 0, + CLASS32 = 1, + CLASS64 = 2, + ), + "encoding" / Enum(Byte, + NONE = 0, + LSB = 1, + MSB = 2, + ), + "version" / Byte, + Padding(9), + ), + "body" / IfThenElse(this.identifier.encoding == "LSB", + elf32_body(Int16ul, Int32ul), + elf32_body(Int16ub, Int32ub), + ), +) diff --git a/deprecated_gallery/emf.py b/deprecated_gallery/emf.py new file mode 100644 index 000000000..c229b5aaa --- /dev/null +++ b/deprecated_gallery/emf.py @@ -0,0 +1,148 @@ +""" +Enhanced Meta File +""" + +from construct import * + +record_type = Enum(Int32ul, + ABORTPATH = 68, + ANGLEARC = 41, + ARC = 45, + ARCTO = 55, + BEGINPATH = 59, + BITBLT = 76, + CHORD = 46, + CLOSEFIGURE = 61, + CREATEBRUSHINDIRECT = 39, + CREATEDIBPATTERNBRUSHPT = 94, + CREATEMONOBRUSH = 93, + CREATEPALETTE = 49, + CREATEPEN = 38, + DELETEOBJECT = 40, + ELLIPSE = 42, + ENDPATH = 60, + EOF = 14, + EXCLUDECLIPRECT = 29, + EXTCREATEFONTINDIRECTW = 82, + EXTCREATEPEN = 95, + EXTFLOODFILL = 53, + EXTSELECTCLIPRGN = 75, + EXTTEXTOUTA = 83, + EXTTEXTOUTW = 84, + FILLPATH = 62, + FILLRGN = 71, + FLATTENPATH = 65, + FRAMERGN = 72, + GDICOMMENT = 70, + HEADER = 1, + INTERSECTCLIPRECT = 30, + INVERTRGN = 73, + LINETO = 54, + MASKBLT = 78, + MODIFYWORLDTRANSFORM = 36, + MOVETOEX = 27, + OFFSETCLIPRGN = 26, + PAINTRGN = 74, + PIE = 47, + PLGBLT = 79, + POLYBEZIER = 2, + POLYBEZIER16 = 85, + POLYBEZIERTO = 5, + POLYBEZIERTO16 = 88, + POLYDRAW = 56, + POLYDRAW16 = 92, + POLYGON = 3, + POLYGON16 = 86, + POLYLINE = 4, + POLYLINE16 = 87, + POLYLINETO = 6, + POLYLINETO16 = 89, + POLYPOLYGON = 8, + POLYPOLYGON16 = 91, + POLYPOLYLINE = 7, + POLYPOLYLINE16 = 90, + POLYTEXTOUTA = 96, + POLYTEXTOUTW = 97, + REALIZEPALETTE = 52, + RECTANGLE = 43, + RESIZEPALETTE = 51, + RESTOREDC = 34, + ROUNDRECT = 44, + SAVEDC = 33, + SCALEVIEWPORTEXTEX = 31, + SCALEWINDOWEXTEX = 32, + SELECTCLIPPATH = 67, + SELECTOBJECT = 37, + SELECTPALETTE = 48, + SETARCDIRECTION = 57, + SETBKCOLOR = 25, + SETBKMODE = 18, + SETBRUSHORGEX = 13, + SETCOLORADJUSTMENT = 23, + SETDIBITSTODEVICE = 80, + SETMAPMODE = 17, + SETMAPPERFLAGS = 16, + SETMETARGN = 28, + SETMITERLIMIT = 58, + SETPALETTEENTRIES = 50, + SETPIXELV = 15, + SETPOLYFILLMODE = 19, + SETROP2 = 20, + SETSTRETCHBLTMODE = 21, + SETTEXTALIGN = 22, + SETTEXTCOLOR = 24, + SETVIEWPORTEXTEX = 11, + SETVIEWPORTORGEX = 12, + SETWINDOWEXTEX = 9, + SETWINDOWORGEX = 10, + SETWORLDTRANSFORM = 35, + STRETCHBLT = 77, + STRETCHDIBITS = 81, + STROKEANDFILLPATH = 63, + STROKEPATH = 64, + WIDENPATH = 66, +) + +generic_record = Struct( + "record_type" / record_type, + "record_size" / Int32ul, # Size of the record in bytes + "params" / RawCopy(Array((this.record_size - 8) // 4, Int32ul)), +) + +header_record = Struct( + Const("HEADER", record_type), + "record_size" / Int32ul, # Size of the record in bytes + "bounds_left" / Int32sl, # Left inclusive bounds + "bounds_right" / Int32sl, # Right inclusive bounds + "bounds_top" / Int32sl, # Top inclusive bounds + "bounds_bottom" / Int32sl, # Bottom inclusive bounds + "frame_left" / Int32sl, # Left side of inclusive picture frame + "frame_right" / Int32sl, # Right side of inclusive picture frame + "frame_top" / Int32sl, # Top side of inclusive picture frame + "frame_bottom" / Int32sl, # Bottom side of inclusive picture frame + "signature" / Const(0x464D4520, Int32ul), + "version" / Int32ul, # Version of the metafile + "size" / Int32ul, # Size of the metafile in bytes + "num_of_records" / Int32ul, # Number of records in the metafile + "num_of_handles" / Int16ul, # Number of handles in the handle table + Padding(2), + "description_size" / Int32ul, # Size of description string in WORDs + "description_offset" / Int32ul, # Offset of description string in metafile + "num_of_palette_entries" / Int32ul, # Number of color palette entries + "device_width_pixels" / Int32sl, # Width of reference device in pixels + "device_height_pixels" / Int32sl, # Height of reference device in pixels + "device_width_mm" / Int32sl, # Width of reference device in millimeters + "device_height_mm" / Int32sl, # Height of reference device in millimeters + + "description" / Pointer(this.description_offset, + PaddedString(this.description_size * 2, "utf8")), + + # padding up to end of record + Padding(this.record_size - 88), +) + +emf_file = Struct( + "header_record" / header_record, + "records" / Array(this.header_record.num_of_records - 1, + generic_record), +) diff --git a/deprecated_gallery/ext2.py b/deprecated_gallery/ext2.py new file mode 100644 index 000000000..cf62a8382 --- /dev/null +++ b/deprecated_gallery/ext2.py @@ -0,0 +1,132 @@ +""" +Extension 2 (ext2) used in Linux systems +""" + +from construct import * + +Char = SLInt8 +UChar = ULInt8 +Short = SLInt16 +UShort = ULInt16 +Long = SLInt32 +ULong = ULInt32 + +BlockPointer = Struct( + "block_number" / ULong, + # WARNING: unnamed field? + OnDemandPointer(this.block_number), +) + +superblock = Struct( + "inodes_count" / ULong, + "blocks_count" / ULong, + "reserved_blocks_count" / ULong, + "free_blocks_count" / ULong, + "free_inodes_count" / ULong, + "first_data_block" / ULong, + "log_block_size" / Enum(ULong, + OneKB = 0, + TwoKB = 1, + FourKB = 2, + ), + "log_frag_size" / Long, + "blocks_per_group" / ULong, + "frags_per_group" / ULong, + "inodes_per_group" / ULong, + "mtime" / ULong, + "wtime" / ULong, + "mnt_count" / UShort, + "max_mnt_count" / Short, + "magic" / Const(UShort, 0xEF53), + "state" / UShort, + "errors" / UShort, + Padding(2), + "lastcheck" / ULong, + "checkinterval" / ULong, + "creator_os" / ULong, + "rev_level" / ULong, + Padding(235*4), +) + +group_descriptor = Struct( + "block_bitmap" / ULong, + "inode_bitmap" / ULong, + "inode_table" / ULong, + "free_blocks_count" / UShort, + "free_inodes_count" / UShort, + "used_dirs_count" / UShort, + Padding(14), +) + +inode = Struct( + "mode" / FlagsEnum(UShort, + IXOTH = 0x0001, + IWOTH = 0x0002, + IROTH = 0x0004, + IRWXO = 0x0007, + IXGRP = 0x0008, + IWGRP = 0x0010, + IRGRP = 0x0020, + IRWXG = 0x0038, + IXUSR = 0x0040, + IWUSR = 0x0080, + IRUSR = 0x0100, + IRWXU = 0x01C0, + ISVTX = 0x0200, + ISGID = 0x0400, + ISUID = 0x0800, + IFIFO = 0x1000, + IFCHR = 0x2000, + IFDIR = 0x4000, + IFBLK = 0x6000, + IFREG = 0x8000, + IFLNK = 0xC000, + IFSOCK = 0xA000, + IFMT = 0xF000, + ), + "uid" / UShort, + "size" / ULong, + "atime" / ULong, + "ctime" / ULong, + "mtime" / ULong, + "dtime" / ULong, + "gid" / UShort, + "links_count" / UShort, + "blocks" / ULong, + "flags" / FlagsEnum(ULong, + SecureDelete = 0x0001, + AllowUndelete = 0x0002, + Compressed = 0x0004, + Synchronous = 0x0008, + ), + Padding(4), + # WARNING: doubled name + "blocks" / ULong[12], + "indirect1_block" / ULong, + "indirect2_block" / ULong, + "indirect3_block" / ULong, + "version" / ULong, + "file_acl" / ULong, + "dir_acl" / ULong, + "faddr" / ULong, + "frag" / UChar, + "fsize" / UChar, + Padding(10), +) + +# special inodes +EXT2_BAD_INO = 1 +EXT2_ROOT_INO = 2 +EXT2_ACL_IDX_INO = 3 +EXT2_ACL_DATA_INO = 4 +EXT2_BOOT_LOADER_INO = 5 +EXT2_UNDEL_DIR_INO = 6 +EXT2_FIRST_INO = 11 + +directory_record = Struct( + "inode" / ULong, + "rec_length" / UShort, + "name_length" / UShort, + "name" / Bytes(this.name_length), + Padding(this.rec_length - this.name_length), +) diff --git a/deprecated_gallery/fat16.py b/deprecated_gallery/fat16.py new file mode 100644 index 000000000..a056afd23 --- /dev/null +++ b/deprecated_gallery/fat16.py @@ -0,0 +1,222 @@ +""" +fat.py; ad-hoc fat16 reader + by Bram Westerbaan + +references: + http://en.wikipedia.org/wiki/File_Allocation_Table + http://www.ecma-international.org/publications/standards/Ecma-107.htm + +example: + with open("/dev/sdc1", "rb") as file: + fs = FatFs(file) + for rootdir in fs: + print rootdir +""" + +import numbers +from io import BytesIO, BufferedReader +from construct import * + +Fat16Header = Struct( + "jumpInstruction" / Bytes(3), + "creatingSystemId" / Bytes(8), + "sectorSize" / Int16ul, + "sectorsPerCluster" / Byte, + "reservedSectorCount" / Int16ul, + "fatCount" / Byte, + "rootdirEntryCount" / Int16ul, + "sectorCount_small" / Int16ul, + "mediaId" / Byte, + "sectorsPerFat" / Int16ul, + "sectorsPerTrack" / Int16ul, + "sideCount" / Int16ul, + "hiddenSectorCount" / Int32ul, + "sectorCount_large" / Int32ul, + "physicalDriveNumber" / Byte, + "currentHead" / Byte, + "extendedBootSignature" / Byte, + "volumeId" / Bytes(4), + "volumeLabel" / Bytes(11), + "fsType" / Const(b"FAT16 "), + "bootCode" / Bytes(448), + "bootSectorSignature" / Const(b"\x55\xaa"), +) + +BootSector = Struct( + Embedded(Fat16Header), + Padding(this.sectorSize - Fat16Header.sizeof()), +) + +FatEntry = Enum(Int16ul, + free_cluster = 0x0000, + bad_cluster = 0xfff7, + last_cluster = 0xffff, + default = Pass, +) + +DirEntry = Struct( + "name" / Bytes(8), + "extension" / Bytes(3), + "attributes" / BitStruct( + "unused" / Flag, + "device" / Flag, + "archive" / Flag, + "subDirectory" / Flag, + "volumeLabel" / Flag, + "system" /Flag, + "hidden" / Flag, + "readonly" / Flag, + ), + Padding(10), + "timeRecorded" / Int16ul, + "dateRecorded" / Int16ul, + "firstCluster" / Int16ul, + "fileSize" / Int32ul, +) + +# NOTE: non-integer division, really? +PreDataRegion = Struct( + "bootSector" / Embedded(BootSector), + # the remaining reserved sectors + Padding((this.reservedSectorCount - 1) * this.sectorSize), + # file allocation tables + Array(this.fatCount, Array(this.sectorsPerFat * this.sectorSize / FatEntry.sizeof(), FatEntry)), + # root directories + Array((this.rootdirEntryCount * DirEntry.sizeof()) / this.sectorSize, DirEntry), +) + + +class File(object): + def __init__(self, dirEntry, fs): + self.fs = fs + self.dirEntry = dirEntry + + @classmethod + def fromDirEntry(cls, dirEntry, fs): + if dirEntry.name[0] in "\x00\xe5\x2e": + return None + a = dirEntry.attributes + #Long file name directory entry + if a.volumeLabel and a.system and a.hidden and a.readonly: + return None + if a.subDirectory: + return Directory(dirEntry, fs) + return File(dirEntry, fs) + + @classmethod + def fromDirEntries(cls, dirEntries, fs): + return filter(None, [cls.fromDirEntry(de, fs) for de in dirEntries]) + + def toStream(self, stream): + self.fs.fileToStream(self.dirEntry.firstCluster, stream) + + @property + def name(self): + return "%s.%s" % (self.dirEntry.name.rstrip(), self.dirEntry.extension) + + def __str__(self): + return "&%s %s" % (self.dirEntry.firstCluster, self.name) + + +class Directory(File): + def __init__(self, dirEntry, fs, children=None): + super(Directory, self).__init__(dirEntry, fs) + self.children = children + if not self.children: + self.children = File.fromDirEntries(\ + self.fs.getDirEntries(\ + self.dirEntry.firstCluster), fs) + + @property + def name(self): + return self.dirEntry.name.rstrip() + + def __str__(self): + return "&%s %s/" % (self.dirEntry.firstCluster, self.name) + + def __getitem__(self, name): + for file in self.children: + if file.name == name: + return file + + def __iter__(self): + return iter(self.children) + + +class FatFs(Directory): + def __init__(self, stream): + self.stream = stream + self.pdr = PreDataRegion.parse_stream(stream) + super(FatFs, self).__init__(dirEntry = None, + fs = self, children = File.fromDirEntries( + self.pdr.rootdirs, self)) + + def fileToStream(self, clidx, stream): + for clidx in self.getLinkedClusters(clidx): + self.clusterToStream(clidx, stream) + + def clusterToStream(self, clidx, stream): + start, todo = self.getClusterSlice(clidx) + self.stream.seek(start) + while todo > 0: + read = self.stream.read(todo) + if not len(read): + print("failed to read %s bytes at %s" % (todo, self.stream.tell())) + raise EOFError() + todo -= len(read) + stream.write(read) + + def getClusterSlice(self, clidx): + startSector = self.pdr.reservedSectorCount \ + + self.pdr.fatCount * self.pdr.sectorsPerFat \ + + (self.pdr.rootdirEntryCount * 32) \ + / self.pdr.sectorSize \ + + (clidx-2) * self.pdr.sectorsPerCluster + start = startSector * self.pdr.sectorSize + length = self.pdr.sectorSize * self.pdr.sectorsPerCluster + return (start, length) + + def getLinkedClusters(self, clidx): + res = [] + while clidx != "last_cluster": + if not isinstance(clidx, numbers.Real): + print(clidx) + assert False + assert 2 <= clidx <= 0xffef + res.append(clidx) + clidx = self.getNextCluster(clidx) + assert clidx not in res + return res + + def getNextCluster(self, clidx): + ress = set([fat[clidx] for fat in self.pdr.fats]) + if len(ress)==1: + return ress.pop() + print("inconsistencie between FATs: %s points to" % (clidx,)) + for i,fat in enumerate(self.pdr.fats): + print("\t%s according to fat #%s" % (fat[clidx], i)) + res = ress.pop() + print ("assuming %s" % res) + return res + + def getDirEntries(self, clidx): + try: + for de in self._getDirEntries(clidx): + yield de + except IOError: + print("failed to read directory entries at %s" % clidx) + + def _getDirEntries(self, clidx): + de = DirEntry("dirEntry") + with BytesIO() as mem: + self.fileToStream(clidx, mem) + mem.seek(0) + with BufferedReader(mem) as reader: + while reader.peek(1): + yield de.parse_stream(reader) + def __str__(self): + return "/" + + @property + def name(self): + return "" diff --git a/deprecated_gallery/gif.py b/deprecated_gallery/gif.py new file mode 100644 index 000000000..fd38e6b50 --- /dev/null +++ b/deprecated_gallery/gif.py @@ -0,0 +1,127 @@ +""" +Contributed by Dany Zatuchna (danzat at gmail) + +Implementation of the following grammar for the GIF89a file format + + ::= Header * Trailer + ::= Logical Screen Descriptor [Global Color Table] + ::= | + ::= [Graphic Control Extension] + ::= | Plain Text Extension + ::= Image Descriptor [Local Color Table] Image Data + ::= Application Extension | Comment Extension +""" + +from construct import * + + +data_sub_block = PascalString(Int8ul, "utf8") + +gif_logical_screen = Struct( + "width" / Int16ul, + "height" / Int16ul, + "flags" / BitStruct( + "global_color_table" / Bit, + "color_resolution" / BitsInteger(3), + "sort_flag" / Bit, + "global_color_table_bpp" / BitsInteger(3), + ), + "bgcolor_index" / Int8ul, + "pixel_aspect_ratio" / Int8ul, + "palette" / If(this.flags.global_color_table, + Array(lambda this: 2**(this.flags.global_color_table_bpp + 1), + Struct( + "R" / Int8ul, + "G" / Int8ul, + "B" / Int8ul, + ))), +) + +application_extension = Struct( + "block_size" / Const(11, Int8ul), + "application_identifier" / PaddedString(8, "utf8"), + "application_auth_code" / PaddedString(3, "utf8"), + "data_sub_block" / data_sub_block, + "block_terminator" / Int8ul, +) + +comment_extension = Struct( + "data_sub_block" / data_sub_block, + "block_terminator" / Int8ul, +) + +graphic_control_extension = Struct( + "block_size" / Const(4, Int8ul), + "flags" / BitStruct( + "reserved" / BitsInteger(3), + "disposal_method" / BitsInteger(3), + "user_input_flag" / Bit, + "transparent_color_flag" / Bit, + ), + "delay" / Int16ul, + "transparent_color_index" / Int8ul, + "block_terminator" / Int8ul, +) + +plain_text_extension = Struct( + "block_size" / Const(12, Int8ul), + "text_left" / Int16ul, + "text_top" / Int16ul, + "text_width" / Int16ul, + "text_height" / Int16ul, + "cell_width" / Int8ul, + "cell_height" / Int8ul, + "foreground_index" / Int8ul, + "background_index" / Int8ul, + "data_sub_block" / data_sub_block, + "block_terminator" / Int8ul, +) + +extension = Struct( + "label" / Int8ul, + "ext" / Switch(this.label, { + 0xFF: application_extension, + 0xFE: comment_extension, + 0xF9: graphic_control_extension, + 0x01: plain_text_extension + }), +) + +image_descriptor = Struct( + "left" / Int16ul, + "top" / Int16ul, + "width" / Int16ul, + "height" / Int16ul, + "flags" / BitStruct( + "local_color_table" / Bit, + "interlace" / Bit, + "sort" / Bit, + "reserved" / BitsInteger(2), + "local_color_table_bpp" / BitsInteger(3), + ), + "palette" / If(this.flags.local_color_table, + Array(lambda this: 2**(this.flags.local_color_table_bpp + 1), + Struct( + "R" / Int8ul, + "G" / Int8ul, + "B" / Int8ul, + ))), + "lzw_minimum_code_size" / Int8ul, + "data_sub_block" / RepeatUntil(lambda obj,lst,ctx: obj.size == 0, data_sub_block), +) + +gif_data = Struct( + "introducer" / Int8ul, + "data" / Switch(this.introducer, { + 0x21: extension, + 0x2C: image_descriptor + }), +) + +gif_file = Struct( + "signature" / Const(b"GIF"), + "version" / Const(b"89a"), + "logical_screen" / gif_logical_screen, + "data" / GreedyRange(gif_data), + # Const(Int8ul("trailer"), 0x3B) +) diff --git a/deprecated_gallery/ipstack.py b/deprecated_gallery/ipstack.py new file mode 100644 index 000000000..1502aee98 --- /dev/null +++ b/deprecated_gallery/ipstack.py @@ -0,0 +1,761 @@ +""" +TCP/IP Protocol Stack + +WARNING: before parsing the application layer over a TCP stream, you must first combine all the TCP frames into a stream. See utils.tcpip for some solutions. +""" + +from construct import * +from construct.lib import * + + +#=============================================================================== +# layer 2, Ethernet +#=============================================================================== + +MacAddress = ExprAdapter(Byte[6], + decoder = lambda obj,ctx: "-".join("%02x" % b for b in obj), + encoder = lambda obj,ctx: [int(part, 16) for part in obj.split("-")], +) + +ethernet_header = Struct( + "destination" / MacAddress, + "source" / MacAddress, + "type" / Enum(Int16ub, + IPv4 = 0x0800, + ARP = 0x0806, + RARP = 0x8035, + X25 = 0x0805, + IPX = 0x8137, + IPv6 = 0x86DD, + default = Pass, + ), +) + +#=============================================================================== +# layer 2, ARP +#=============================================================================== + +# HwAddress = IfThenElse(this.hardware_type == "ETHERNET", +# MacAddressAdapter(Bytes(this.hwaddr_length)), +# Bytes(this.hwaddr_length) +# ) + +HwAddress = Bytes(this.hwaddr_length) + +# ProtoAddress = IfThenElse(this.protocol_type == "IP", +# IpAddressAdapter(Bytes(this.protoaddr_length)), +# Bytes(this.protoaddr_length) +# ) + +ProtoAddress = Bytes(this.protoaddr_length) + +arp_header = Struct( + "hardware_type" / Enum(Int16ub, + ETHERNET = 1, + EXPERIMENTAL_ETHERNET = 2, + ProNET_TOKEN_RING = 4, + CHAOS = 5, + IEEE802 = 6, + ARCNET = 7, + HYPERCHANNEL = 8, + ULTRALINK = 13, + FRAME_RELAY = 15, + FIBRE_CHANNEL = 18, + IEEE1394 = 24, + HIPARP = 28, + ISO7816_3 = 29, + ARPSEC = 30, + IPSEC_TUNNEL = 31, + INFINIBAND = 32, + ), + "protocol_type" / Enum(Int16ub, + IP = 0x0800, + ), + "hwaddr_length" / Int8ub, + "protoaddr_length" / Int8ub, + "opcode" / Enum(Int16ub, + REQUEST = 1, + REPLY = 2, + REQUEST_REVERSE = 3, + REPLY_REVERSE = 4, + DRARP_REQUEST = 5, + DRARP_REPLY = 6, + DRARP_ERROR = 7, + InARP_REQUEST = 8, + InARP_REPLY = 9, + ARP_NAK = 10 + ), + "source_hwaddr" / HwAddress, + "source_protoaddr" / ProtoAddress, + "dest_hwaddr" / HwAddress, + "dest_protoaddr" / ProtoAddress, +) + +#=============================================================================== +# layer 2, Message Transport Part 2 (SS7 protocol stack) +# (untested) +#=============================================================================== + +mtp2_header = BitStruct( + "flag1" / Octet, + "bsn" / BitsInteger(7), + "bib" / Bit, + "fsn" / BitsInteger(7), + "sib" / Bit, + "length" / Octet, + "service_info" / Octet, + "signalling_info" / Octet, + "crc" / BitsInteger(16), + "flag2" / Octet, +) + +#=============================================================================== +# layer 3, IP v4 +#=============================================================================== + +IpAddress = ExprAdapter(Byte[4], + decoder = lambda obj,ctx: "{0}.{1}.{2}.{3}".format(*obj), + encoder = lambda obj,ctx: [int(x) for x in obj.split(".")], +) + +ProtocolEnum = Enum(Int8ub, + ICMP = 1, + TCP = 6, + UDP = 17, +) + +ipv4_header = Struct( + "header" / BitStruct( + "version" / Const(4, Nibble), + "header_length" / ExprAdapter(Nibble, + decoder = lambda obj, ctx: obj * 4, + encoder = lambda obj, ctx: obj // 4, + ), + ), + "header_length" / Computed(this.header.header_length), + "tos" / BitStruct( + "precedence" / BitsInteger(3), + "minimize_delay" / Flag, + "high_throuput" / Flag, + "high_reliability" / Flag, + "minimize_cost" / Flag, + Padding(1), + ), + "total_length" / Int16ub, + "payload_length" / Computed(this.total_length - this.header_length), + "identification" / Int16ub, + "flags" / BitStruct( + Padding(1), + "dont_fragment" / Flag, + "more_fragments" / Flag, + "frame_offset" / BitsInteger(13), + ), + "ttl" / Int8ub, + "protocol" / ProtocolEnum, + "checksum" / Int16ub, + "source" / IpAddress, + "destination" / IpAddress, + "options" / Bytes(this.header_length - 20), +) + +#=============================================================================== +# layer 3, IP v6 +#=============================================================================== +ProtocolEnum = Enum(Int8ub, + ICMP = 1, + TCP = 6, + UDP = 17, +) + +Ipv6Address = ExprAdapter(Byte[16], + decoder = lambda obj,ctx: ":".join("%02x" % b for b in obj), + encoder = lambda obj,ctx: [int(part, 16) for part in obj.split(":")], +) + +ipv6_header = Struct( + "header" / BitStruct( + "version" / OneOf(BitsInteger(4), [6]), + "traffic_class" / BitsInteger(8), + "flow_label" / BitsInteger(20), + ), + "payload_length" / Int16ub, + "protocol" / ProtocolEnum, + "hoplimit" / Int8ub, + "ttl" / Computed(this.hoplimit), + "source" / Ipv6Address, + "destination" / Ipv6Address, +) + +#=============================================================================== +# layer 3 +# Message Transport Part 3 (SS7 protocol stack) +# (untested) +#=============================================================================== + +mtp3_header = BitStruct( + "service_indicator" / Nibble, + "subservice" / Nibble, +) + +#=============================================================================== +# layer 3 +# Internet Control Message Protocol for IPv4 +#=============================================================================== + +echo_payload = Struct( + "identifier" / Int16ub, + "sequence" / Int16ub, + "data" / Bytes(32), + # length is implementation dependent, is anyone using more than 32 bytes? +) + +dest_unreachable_payload = Struct( + Padding(2), + "next_hop_mtu" / Int16ub, + "host" / IpAddress, + "echo" / Bytes(8), +) + +dest_unreachable_code = Enum(Byte, + Network_unreachable_error = 0, + Host_unreachable_error = 1, + Protocol_unreachable_error = 2, + Port_unreachable_error = 3, + The_datagram_is_too_big = 4, + Source_route_failed_error = 5, + Destination_network_unknown_error = 6, + Destination_host_unknown_error = 7, + Source_host_isolated_error = 8, + Desination_administratively_prohibited = 9, + Host_administratively_prohibited2 = 10, + Network_TOS_unreachable = 11, + Host_TOS_unreachable = 12, +) + +icmp_header = Struct( + "type" / Enum(Byte, + Echo_reply = 0, + Destination_unreachable = 3, + Source_quench = 4, + Redirect = 5, + Alternate_host_address = 6, + Echo_request = 8, + Router_advertisement = 9, + Router_solicitation = 10, + Time_exceeded = 11, + Parameter_problem = 12, + Timestamp_request = 13, + Timestamp_reply = 14, + Information_request = 15, + Information_reply = 16, + Address_mask_request = 17, + Address_mask_reply = 18, + default = Pass, + ), + "code" / Switch(this.type, + { + "Destination_unreachable" : dest_unreachable_code, + }, + default = Byte + ), + "crc" / Int16ub, + "payload" / Switch(this.type, + { + "Echo_reply" : echo_payload, + "Echo_request" : echo_payload, + "Destination_unreachable" : dest_unreachable_payload, + }, + # default = Pass, + ), +) + +#=============================================================================== +# layer 3 +# Internet Group Management Protocol, Version 2 +# +# http://www.ietf.org/rfc/rfc2236.txt +# jesse@housejunkie.ca +#=============================================================================== + +igmp_type = Enum(Byte, + MEMBERSHIP_QUERY = 0x11, + MEMBERSHIP_REPORT_V1 = 0x12, + MEMBERSHIP_REPORT_V2 = 0x16, + LEAVE_GROUP = 0x17, +) + +igmpv2_header = Struct( + "igmp_type" / igmp_type, + "max_resp_time" / Byte, + "checksum" / Int16ub, + "group_address" / IpAddress, +) + +#=============================================================================== +# layer 4 +# Dynamic Host Configuration Protocol for IPv4 +# +# http://www.networksorcery.com/enp/protocol/dhcp.htm +# http://www.networksorcery.com/enp/protocol/bootp/options.htm +#=============================================================================== + +dhcp4_option = Struct( + "code" / Enum(Byte, + Pad = 0, + Subnet_Mask = 1, + Time_Offset = 2, + Router = 3, + Time_Server = 4, + Name_Server = 5, + Domain_Name_Server = 6, + Log_Server = 7, + Quote_Server = 8, + LPR_Server = 9, + Impress_Server = 10, + Resource_Location_Server = 11, + Host_Name = 12, + Boot_File_Size = 13, + Merit_Dump_File = 14, + Domain_Name = 15, + Swap_Server = 16, + Root_Path = 17, + Extensions_Path = 18, + IP_Forwarding_enabledisable = 19, + Nonlocal_Source_Routing_enabledisable = 20, + Policy_Filter = 21, + Maximum_Datagram_Reassembly_Size = 22, + Default_IP_TTL = 23, + Path_MTU_Aging_Timeout = 24, + Path_MTU_Plateau_Table = 25, + Interface_MTU = 26, + All_Subnets_are_Local = 27, + Broadcast_Address = 28, + Perform_Mask_Discovery = 29, + Mask_supplier = 30, + Perform_router_discovery = 31, + Router_solicitation_address = 32, + Static_routing_table = 33, + Trailer_encapsulation = 34, + ARP_cache_timeout = 35, + Ethernet_encapsulation = 36, + Default_TCP_TTL = 37, + TCP_keepalive_interval = 38, + TCP_keepalive_garbage = 39, + Network_Information_Service_domain = 40, + Network_Information_Servers = 41, + NTP_servers = 42, + Vendor_specific_information = 43, + NetBIOS_over_TCPIP_name_server = 44, + NetBIOS_over_TCPIP_Datagram_Distribution_Server = 45, + NetBIOS_over_TCPIP_Node_Type = 46, + NetBIOS_over_TCPIP_Scope = 47, + X_Window_System_Font_Server = 48, + X_Window_System_Display_Manager = 49, + Requested_IP_Address = 50, + IP_address_lease_time = 51, + Option_overload = 52, + DHCP_message_type = 53, + Server_identifier = 54, + Parameter_request_list = 55, + Message = 56, + Maximum_DHCP_message_size = 57, + Renew_time_value = 58, + Rebinding_time_value = 59, + Class_identifier = 60, + Client_identifier = 61, + NetWareIP_Domain_Name = 62, + NetWareIP_information = 63, + Network_Information_Service_Domain = 64, + Network_Information_Service_Servers = 65, + TFTP_server_name = 66, + Bootfile_name = 67, + Mobile_IP_Home_Agent = 68, + Simple_Mail_Transport_Protocol_Server = 69, + Post_Office_Protocol_Server = 70, + Network_News_Transport_Protocol_Server = 71, + Default_World_Wide_Web_Server = 72, + Default_Finger_Server = 73, + Default_Internet_Relay_Chat_Server = 74, + StreetTalk_Server = 75, + StreetTalk_Directory_Assistance_Server = 76, + User_Class_Information = 77, + SLP_Directory_Agent = 78, + SLP_Service_Scope = 79, + Rapid_Commit = 80, + Fully_Qualified_Domain_Name = 81, + Relay_Agent_Information = 82, + Internet_Storage_Name_Service = 83, + NDS_servers = 85, + NDS_tree_name = 86, + NDS_context = 87, + BCMCS_Controller_Domain_Name_list = 88, + BCMCS_Controller_IPv4_address_list = 89, + Authentication = 90, + Client_last_transaction_time = 91, + Associated_ip = 92, + Client_System_Architecture_Type = 93, + Client_Network_Interface_Identifier = 94, + Lightweight_Directory_Access_Protocol = 95, + Client_Machine_Identifier = 97, + Open_Group_User_Authentication = 98, + Autonomous_System_Number = 109, + NetInfo_Parent_Server_Address = 112, + NetInfo_Parent_Server_Tag = 113, + URL = 114, + Auto_Configure = 116, + Name_Service_Search = 117, + Subnet_Selection = 118, + DNS_domain_search_list = 119, + SIP_Servers_DHCP_Option = 120, + Classless_Static_Route_Option = 121, + CableLabs_Client_Configuration = 122, + GeoConf = 123, + ), + "value" / If(this.code != "Pad", Prefixed(Byte, GreedyBytes)), +) + +dhcp4_header = Struct( + "opcode" / Enum(Byte, + BootRequest = 1, + BootReply = 2, + ), + "hardware_type" / Enum(Byte, + Ethernet = 1, + Experimental_Ethernet = 2, + ProNET_Token_Ring = 4, + Chaos = 5, + IEEE_802 = 6, + ARCNET = 7, + Hyperchannel = 8, + Lanstar = 9, + ), + "hardware_address_length" / Byte, + "hop_count" / Byte, + "transaction_id" / Int32ub, + "elapsed_time" / Int16ub, + "flags" / BitStruct( + "broadcast" / Flag, + Padding(15), + ), + "client_addr" / IpAddress, + "your_addr" / IpAddress, + "server_addr" / IpAddress, + "relay_addr" / IpAddress, + "client_hardware_addr" / Bytes(16), + "server_host_name" / Bytes(64), + "boot_filename" / Bytes(128), + # BOOTP/DHCP options + # "The first four bytes contain the (decimal) values 99, 130, 83 and 99" + "signature" / Const(b"\x63\x82\x53\x63"), + "options" / GreedyRange(dhcp4_option), +) + +#=============================================================================== +# layer 4 +# Dynamic Host Configuration Protocol for IPv6 +# +# http://www.networksorcery.com/enp/rfc/rfc3315.txt +#=============================================================================== + +dhcp6_option = Struct( + "code" / Enum(Int16ub, + OPTION_CLIENTID = 1, + OPTION_SERVERID = 2, + OPTION_IA_NA = 3, + OPTION_IA_TA = 4, + OPTION_IAADDR = 5, + OPTION_ORO = 6, + OPTION_PREFERENCE = 7, + OPTION_ELAPSED_TIME = 8, + OPTION_RELAY_MSG = 9, + OPTION_AUTH = 11, + OPTION_UNICAST = 12, + OPTION_STATUS_CODE = 13, + OPTION_RAPID_COMMIT = 14, + OPTION_USER_CLASS = 15, + OPTION_VENDOR_CLASS = 16, + OPTION_VENDOR_OPTS = 17, + OPTION_INTERFACE_ID = 18, + OPTION_RECONF_MSG = 19, + OPTION_RECONF_ACCEPT = 20, + SIP_SERVERS_DOMAIN_NAME_LIST = 21, + SIP_SERVERS_IPV6_ADDRESS_LIST = 22, + DNS_RECURSIVE_NAME_SERVER = 23, + DOMAIN_SEARCH_LIST = 24, + OPTION_IA_PD = 25, + OPTION_IAPREFIX = 26, + OPTION_NIS_SERVERS = 27, + OPTION_NISP_SERVERS = 28, + OPTION_NIS_DOMAIN_NAME = 29, + OPTION_NISP_DOMAIN_NAME = 30, + SNTP_SERVER_LIST = 31, + INFORMATION_REFRESH_TIME = 32, + BCMCS_CONTROLLER_DOMAIN_NAME_LIST = 33, + BCMCS_CONTROLLER_IPV6_ADDRESS_LIST = 34, + OPTION_GEOCONF_CIVIC = 36, + OPTION_REMOTE_ID = 37, + RELAY_AGENT_SUBSCRIBER_ID = 38, + OPTION_CLIENT_FQDN = 39, + ), + "data" / Prefixed(Int16ub, GreedyBytes), +) + +client_message = BitStruct( + "transaction_id" / BitsInteger(24), +) + +relay_message = Struct( + "hop_count" / Byte, + "linkaddr" / Ipv6Address, + "peeraddr" / Ipv6Address, +) + +dhcp6_message = Struct( + "msgtype" / Enum(Byte, + # these are client-server messages + SOLICIT = 1, + ADVERTISE = 2, + REQUEST = 3, + CONFIRM = 4, + RENEW = 5, + REBIND = 6, + REPLY = 7, + RELEASE_ = 8, + DECLINE_ = 9, + RECONFIGURE = 10, + INFORMATION_REQUEST = 11, + # these two are relay messages + RELAY_FORW = 12, + RELAY_REPL = 13, + ), + # relay messages have a different structure from client-server messages + "params" / Switch(this.msgtype, + { + "RELAY_FORW" : relay_message, + "RELAY_REPL" : relay_message, + }, + default = client_message, + ), + "options" / GreedyRange(dhcp6_option), +) + +#=============================================================================== +# layer 4 +# ISDN User Part (SS7 protocol stack) +#=============================================================================== + +isup_header = Struct( + "routing_label" / Bytes(5), + "cic" / Int16ub, + "message_type" / Int8ub, + # mandatory fixed parameters + # mandatory variable parameters + # optional parameters +) + +#=============================================================================== +# layer 4 +# Transmission Control Protocol (TCP/IP protocol stack) +#=============================================================================== + +tcp_header = Struct( + "source" / Int16ub, + "destination" / Int16ub, + "seq" / Int32ub, + "ack" / Int32ub, + "header" / BitStruct( + "header_length" / ExprAdapter(Nibble, + encoder = lambda obj,ctx: obj // 4, + decoder = lambda obj,ctx: obj * 4, + ), + Padding(3), + # make into FlagsEnum? + "flags" / Struct( + "ns" / Flag, + "cwr" / Flag, + "ece" / Flag, + "urg" / Flag, + "ack" / Flag, + "psh" / Flag, + "rst" / Flag, + "syn" / Flag, + "fin" / Flag, + ), + ), + "header_length" / Computed(this.header.header_length), + "window" / Int16ub, + "checksum" / Int16ub, + "urgent" / Int16ub, + "options" / Bytes(this.header_length - 20), +) + +#=============================================================================== +# layer 4 +# User Datagram Protocol (TCP/IP protocol stack) +#=============================================================================== + +udp_header = Struct( + "header_length" / Computed(8), + "source" / Int16ub, + "destination" / Int16ub, + "payload_length" / ExprAdapter(Int16ub, + encoder = lambda obj,ctx: obj + 8, + decoder = lambda obj,ctx: obj - 8, + ), + "checksum" / Int16ub, +) + +#=============================================================================== +# layer 4 +# Domain Name System (TCP/IP protocol stack) +#=============================================================================== + +class DnsStringAdapter(Adapter): + def _decode(self, obj, context, path): + return u".".join(obj[:-1]) + def _encode(self, obj, context, path): + return obj.split(u".") + [u""] + +class DnsNamesAdapter(Adapter): + def _decode(self, obj, context, path): + return [x.label if x.islabel else x.pointer & 0x3fff for x in obj] + def _encode(self, obj, context, path): + return [dict(ispointer=1,pointer=x|0xc000) if isinstance(x,int) else dict(islabel=1,label=x) for x in obj] + +dns_record_class = Enum(Int16ub, + RESERVED = 0, + INTERNET = 1, + CHAOS = 3, + HESIOD = 4, + NONE = 254, + ANY = 255, +) + +dns_record_type = Enum(Int16ub, + IPv4 = 1, + AUTHORITIVE_NAME_SERVER = 2, + CANONICAL_NAME = 5, + NULL = 10, + MAIL_EXCHANGE = 15, + TEXT = 16, + X25 = 19, + ISDN = 20, + IPv6 = 28, + UNSPECIFIED = 103, + ALL = 255, +) + +query_record = Struct( + "name" / DnsStringAdapter(RepeatUntil(len_(obj_)==0, PascalString(Byte, "ascii"))), + "type" / dns_record_type, + "class" / dns_record_class, +) + +labelpointer = Struct( + "firstbyte" / Peek(Byte), + "islabel" / Computed(this.firstbyte & 0b11000000 == 0), + "ispointer" / Computed(this.firstbyte & 0b11000000 == 0b11000000), + Check(this.islabel | this.ispointer), + "label" / If(this.islabel, PascalString(Byte, "ascii")), + "pointer" / If(this.ispointer, Int16ub), +) + +resource_record = Struct( + # based on http://www.zytrax.com/books/dns/ch15/#qname + "names" / DnsNamesAdapter(RepeatUntil(obj_.ispointer | len_(obj_.label)==0, labelpointer)), + "type" / dns_record_type, + "class" / dns_record_class, + "ttl" / Int32ub, + "rdata" / Prefixed(Int16ub, GreedyBytes), +) + +dns = Struct( + "id" / Int16ub, + "flags" / BitStruct( + "type" / Enum(Bit, + QUERY = 0, + RESPONSE = 1, + ), + "opcode" / Enum(Nibble, + STANDARD_QUERY = 0, + INVERSE_QUERY = 1, + SERVER_STATUS_REQUEST = 2, + NOTIFY = 4, + UPDATE = 5, + ), + "authoritive_answer" / Flag, + "truncation" / Flag, + "recursion_desired" / Flag, + "recursion_available" / Flag, + Padding(1), + "authenticated_data" / Flag, + "checking_disabled" / Flag, + "response_code" / Enum(Nibble, + SUCCESS = 0, + FORMAT_ERROR = 1, + SERVER_FAILURE = 2, + NAME_DOES_NOT_EXIST = 3, + NOT_IMPLEMENTED = 4, + REFUSED = 5, + NAME_SHOULD_NOT_EXIST = 6, + RR_SHOULD_NOT_EXIST = 7, + RR_SHOULD_EXIST = 8, + NOT_AUTHORITIVE = 9, + NOT_ZONE = 10, + ), + ), + "question_count" / Rebuild(Int16ub, len_(this.questions)), + "answer_count" / Rebuild(Int16ub, len_(this.answers)), + "authority_count" / Rebuild(Int16ub, len_(this.authorities)), + "additional_count" / Rebuild(Int16ub, len_(this.additionals)), + "questions" / query_record[this.question_count], + "answers" / resource_record[this.answer_count], + "authorities" / resource_record[this.authority_count], + "additionals" / resource_record[this.additional_count], +) + +#=============================================================================== +# entire IP stack +#=============================================================================== + +layer4_tcp = Struct( + "header" / tcp_header, + "next" / Bytes(this._.header.payload_length - this.header.header_length), +) + +layer4_udp = Struct( + "header" / udp_header, + "next" / Bytes(this.header.payload_length), +) + +layer3_payload = Switch(this.header.protocol, + { + "TCP" : layer4_tcp, + "UDP" : layer4_udp, + "ICMP" : icmp_header, + }, + # default = Pass, +) + +layer3_ipv4 = Struct( + "header" / ipv4_header, + "next" / layer3_payload, +) + +layer3_ipv6 = Struct( + "header" / ipv6_header, + "next" / layer3_payload, +) + +layer2_ethernet = Struct( + "header" / ethernet_header, + "next" / Switch(this.header.type, + { + "IPv4" : layer3_ipv4, + "IPv6" : layer3_ipv6, + }, + # default = Pass, + ), +) + +ip_stack = "ip_stack" / layer2_ethernet diff --git a/deprecated_gallery/mbr.py b/deprecated_gallery/mbr.py new file mode 100644 index 000000000..15b94a97a --- /dev/null +++ b/deprecated_gallery/mbr.py @@ -0,0 +1,46 @@ +""" +Master Boot Record +The first sector on disk, contains the partition table, bootloader, et al. + +http://www.win.tue.nl/~aeb/partitions/partition_types-1.html +""" + +from construct import * + +mbr_format = Struct( + "bootloader_code" / Bytes(446), + "partitions" / Array(4, Struct( + "state" / Enum(Byte, + INACTIVE = 0x00, + ACTIVE = 0x80, + ), + "beginning" / BitStruct( + "head" / BitsInteger(8), + "sect" / BitsInteger(6), + "cyl" / BitsInteger(10), + ), + "type" / Enum(Byte, + Nothing = 0x00, + FAT12 = 0x01, + XENIX_ROOT = 0x02, + XENIX_USR = 0x03, + FAT16_old = 0x04, + Extended_DOS = 0x05, + FAT16 = 0x06, + NTFS = 0x07, + FAT32 = 0x0B, + FAT32_LBA = 0x0C, + ExtendedWithLBA = 0x0F, + LINUX_SWAP = 0x82, + LINUX_NATIVE = 0x83, + ), + "ending" / BitStruct( + "head" / BitsInteger(8), + "sect" / BitsInteger(6), + "cyl" / BitsInteger(10), + ), + "sector_offset" / Int32ub, # offset from MBR in sectors + "size" / Int32ub, # in sectors + )), + "signature" / Const(b"\x55\xAA"), +) diff --git a/construct/formats/executable/pe32.py b/deprecated_gallery/pe32.py similarity index 54% rename from construct/formats/executable/pe32.py rename to deprecated_gallery/pe32.py index 4fe6b60ba..f572a5e97 100644 --- a/construct/formats/executable/pe32.py +++ b/deprecated_gallery/pe32.py @@ -8,36 +8,50 @@ 2006 with updates relevant for .NET: http://download.microsoft.com/download/9/c/5/9c5b2167-8017-4bae-9fde-d599bac8184a/pecoff_v8.doc """ + from construct import * import time +# code uses linux epoch in microsoft format? class UTCTimeStampAdapter(Adapter): - def _decode(self, obj, context): + def _decode(self, obj, context, path): return time.ctime(obj) - def _encode(self, obj, context): + def _encode(self, obj, context, path): return int(time.mktime(time.strptime(obj))) -def UTCTimeStamp(name): - return UTCTimeStampAdapter(ULInt32(name)) +# use core Timestamp +UTCTimeStamp = UTCTimeStampAdapter(Int32ul) + class NamedSequence(Adapter): """ creates a mapping between the elements of a sequence and their respective names. this is useful for sequences of a variable length, where each - element in the sequence has a name (as is the case with the data + element in the sequence has a name (as is the case with the data directories of the PE header) """ __slots__ = ["mapping", "rev_mapping"] prefix = "unnamed_" + def __init__(self, subcon, mapping): - Adapter.__init__(self, subcon) + super(NamedSequence, self).__init__(subcon) self.mapping = mapping - self.rev_mapping = dict((v, k) for k, v in mapping.iteritems()) - def _encode(self, obj, context): - d = obj.__dict__ - obj2 = [None] * len(d) - for name, value in d.iteritems(): + self.rev_mapping = dict((v, k) for k, v in mapping.items()) + + def _decode(self, obj, context, path): + obj2 = Container() + for i, item in enumerate(obj): + if i in self.mapping: + name = self.mapping[i] + else: + name = "%s%d" % (self.prefix, i) + setattr(obj2, name, item) + return obj2 + + def _encode(self, obj, context, path): + obj2 = [None] * len(obj) + for name, value in obj.items(): if name in self.rev_mapping: index = self.rev_mapping[name] elif name.startswith("__"): @@ -49,66 +63,48 @@ def _encode(self, obj, context): raise ValueError("no mapping defined for %r" % (name,)) obj2[index] = value return obj2 - def _decode(self, obj, context): - obj2 = Container() - for i, item in enumerate(obj): - if i in self.mapping: - name = self.mapping[i] - else: - name = "%s%d" % (self.prefix, i) - setattr(obj2, name, item) - return obj2 -msdos_header = Struct("msdos_header", - Magic("MZ"), - ULInt16("partPag"), - ULInt16("page_count"), - ULInt16("relocation_count"), - ULInt16("header_size"), - ULInt16("minmem"), - ULInt16("maxmem"), - ULInt16("relocation_stackseg"), - ULInt16("exe_stackptr"), - ULInt16("checksum"), - ULInt16("exe_ip"), - ULInt16("relocation_codeseg"), - ULInt16("table_offset"), - ULInt16("overlay"), +msdos_header = Struct( + Const(b"MZ"), + "partPag" / Int16ul, + "page_count" / Int16ul, + "relocation_count" / Int16ul, + "header_size" / Int16ul, + "minmem" / Int16ul, + "maxmem" / Int16ul, + "relocation_stackseg" / Int16ul, + "exe_stackptr" / Int16ul, + "checksum" / Int16ul, + "exe_ip" / Int16ul, + "relocation_codeseg" / Int16ul, + "table_offset" / Int16ul, + "overlay" / Int16ul, Padding(8), - ULInt16("oem_id"), - ULInt16("oem_info"), + "oem_id" / Int16ul, + "oem_info" / Int16ul, Padding(20), - ULInt32("coff_header_pointer"), - Anchor("_assembly_start"), - OnDemand( - HexDumpAdapter( - Field("code", - lambda ctx: ctx.coff_header_pointer - ctx._assembly_start - ) - ) - ), + "coff_header_pointer" / Int32ul, + "_assembly_start" / Tell, + "code" / Bytes(this.coff_header_pointer - this._assembly_start), ) -symbol_table = Struct("symbol_table", - String("name", 8, padchar = "\x00"), - ULInt32("value"), - Enum(ExprAdapter(SLInt16("section_number"), - encoder = lambda obj, ctx: obj + 1, - decoder = lambda obj, ctx: obj - 1, - ), +symbol_table = "symbol_table" / Struct( + "name" / PaddedString(8, "utf8"), + "value" / Int32ul, + "section_number" / Enum( + ExprAdapter(Int16sl, obj_-1, obj_+1), UNDEFINED = -1, ABSOLUTE = -2, DEBUG = -3, - _default_ = Pass, ), - Enum(ULInt8("complex_type"), + "complex_type" / Enum(Int8ul, NULL = 0, POINTER = 1, FUNCTION = 2, ARRAY = 3, ), - Enum(ULInt8("base_type"), + "base_type" / Enum(Int8ul, NULL = 0, VOID = 1, CHAR = 2, @@ -126,8 +122,7 @@ def _decode(self, obj, context): UINT = 14, DWORD = 15, ), - Enum(ULInt8("storage_class"), - END_OF_FUNCTION = 255, + "storage_class" / Enum(Int8ul, NULL = 0, AUTOMATIC = 1, EXTERNAL = 2, @@ -153,16 +148,15 @@ def _decode(self, obj, context): FILE = 103, SECTION = 104, WEAK_EXTERNAL = 105, + END_OF_FUNCTION = 255, ), - ULInt8("number_of_aux_symbols"), - Array(lambda ctx: ctx.number_of_aux_symbols, - Bytes("aux_symbols", 18) - ) + "number_of_aux_symbols" / Int8ul, + "aux_symbols" / Array(this.number_of_aux_symbols, Bytes(18)) ) -coff_header = Struct("coff_header", - Magic("PE\x00\x00"), - Enum(ULInt16("machine_type"), +coff_header = Struct( + Const(b"PE\x00\x00"), + "machine_type" / Enum(Int16ul, UNKNOWN = 0x0, AM33 = 0x1d3, AMD64 = 0x8664, @@ -183,14 +177,13 @@ def _decode(self, obj, context): SH5= 0x1a8, THUMB = 0x1c2, WCEMIPSV2 = 0x169, - _default_ = Pass ), - ULInt16("number_of_sections"), - UTCTimeStamp("time_stamp"), - ULInt32("symbol_table_pointer"), - ULInt32("number_of_symbols"), - ULInt16("optional_header_size"), - FlagsEnum(ULInt16("characteristics"), + "number_of_sections" / Int16ul, + "time_stamp" / UTCTimeStamp, + "symbol_table_pointer" / Int32ul, + "number_of_symbols" / Int32ul, + "optional_header_size" / Int16ul, + "characteristics" / FlagsEnum(Int16ul, RELOCS_STRIPPED = 0x0001, EXECUTABLE_IMAGE = 0x0002, LINE_NUMS_STRIPPED = 0x0004, @@ -207,53 +200,44 @@ def _decode(self, obj, context): UNIPROCESSOR_ONLY = 0x4000, BIG_ENDIAN_MACHINE = 0x8000, ), - - # symbol table - Pointer(lambda ctx: ctx.symbol_table_pointer, - Array(lambda ctx: ctx.number_of_symbols, symbol_table) - ) + "symbol_table" / Pointer(this.symbol_table_pointer, + Array(this.number_of_symbols, symbol_table)) ) -def PEPlusField(name): - return IfThenElse(name, lambda ctx: ctx.pe_type == "PE32_plus", - ULInt64(None), - ULInt32(None), - ) +PEPlusField = IfThenElse(this.pe_type == "PE32_plus", Int64ul, Int32ul) -optional_header = Struct("optional_header", +optional_header = Struct( # standard fields - Enum(ULInt16("pe_type"), + "pe_type" / Enum(Int16ul, PE32 = 0x10b, PE32_plus = 0x20b, ), - ULInt8("major_linker_version"), - ULInt8("minor_linker_version"), - ULInt32("code_size"), - ULInt32("initialized_data_size"), - ULInt32("uninitialized_data_size"), - ULInt32("entry_point_pointer"), - ULInt32("base_of_code"), - + "major_linker_version" / Int8ul, + "minor_linker_version" / Int8ul, + "code_size" / Int32ul, + "initialized_data_size" / Int32ul, + "uninitialized_data_size" / Int32ul, + "entry_point_pointer" / Int32ul, + "base_of_code" / Int32ul, + # only in PE32 files - If(lambda ctx: ctx.pe_type == "PE32", - ULInt32("base_of_data") - ), - + "base_of_data" / If(this.pe_type == "PE32", Int32ul), + # WinNT-specific fields - PEPlusField("image_base"), - ULInt32("section_aligment"), - ULInt32("file_alignment"), - ULInt16("major_os_version"), - ULInt16("minor_os_version"), - ULInt16("major_image_version"), - ULInt16("minor_image_version"), - ULInt16("major_subsystem_version"), - ULInt16("minor_subsystem_version"), + "image_base" / PEPlusField, + "section_aligment" / Int32ul, + "file_alignment" / Int32ul, + "major_os_version" / Int16ul, + "minor_os_version" / Int16ul, + "major_image_version" / Int16ul, + "minor_image_version" / Int16ul, + "major_subsystem_version" / Int16ul, + "minor_subsystem_version" / Int16ul, Padding(4), - ULInt32("image_size"), - ULInt32("headers_size"), - ULInt32("checksum"), - Enum(ULInt16("subsystem"), + "image_size" / Int32ul, + "headers_size" / Int32ul, + "checksum" / Int32ul, + "subsystem" / Enum(Int16ul, UNKNOWN = 0, NATIVE = 1, WINDOWS_GUI = 2, @@ -265,25 +249,24 @@ def PEPlusField(name): EFI_RUNTIME_DRIVER = 12, EFI_ROM = 13, XBOX = 14, - _defualt_ = Pass ), - FlagsEnum(ULInt16("dll_characteristics"), + "dll_characteristics" / FlagsEnum(Int16ul, NO_BIND = 0x0800, WDM_DRIVER = 0x2000, TERMINAL_SERVER_AWARE = 0x8000, ), - PEPlusField("reserved_stack_size"), - PEPlusField("stack_commit_size"), - PEPlusField("reserved_heap_size"), - PEPlusField("heap_commit_size"), - ULInt32("loader_flags"), - ULInt32("number_of_data_directories"), - - NamedSequence( - Array(lambda ctx: ctx.number_of_data_directories, - Struct("data_directories", - ULInt32("address"), - ULInt32("size"), + "reserved_stack_size" / PEPlusField, + "stack_commit_size" / PEPlusField, + "reserved_heap_size" / PEPlusField, + "heap_commit_size" / PEPlusField, + "loader_flags" / Int32ul, + "number_of_data_directories" / Int32ul, + + "data_directories" / NamedSequence( + Array(this.number_of_data_directories, + Struct( + "address" / Int32ul, + "size" / Int32ul, ) ), mapping = { @@ -306,17 +289,17 @@ def PEPlusField(name): ), ) -section = Struct("section", - String("name", 8, padchar = "\x00"), - ULInt32("virtual_size"), - ULInt32("virtual_address"), - ULInt32("raw_data_size"), - ULInt32("raw_data_pointer"), - ULInt32("relocations_pointer"), - ULInt32("line_numbers_pointer"), - ULInt16("number_of_relocations"), - ULInt16("number_of_line_numbers"), - FlagsEnum(ULInt32("characteristics"), +section = "section" / Struct( + "name" / PaddedString(8, "utf8"), + "virtual_size" / Int32ul, + "virtual_address" / Int32ul, + "raw_data_size" / Int32ul, + "raw_data_pointer" / Int32ul, + "relocations_pointer" / Int32ul, + "line_numbers_pointer" / Int32ul, + "number_of_relocations" / Int16ul, + "number_of_line_numbers" / Int16ul, + "characteristics" / FlagsEnum(Int32ul, TYPE_REG = 0x00000000, TYPE_DSECT = 0x00000001, TYPE_NOLOAD = 0x00000002, @@ -357,63 +340,40 @@ def PEPlusField(name): MEM_SHARED = 0x10000000, MEM_EXECUTE = 0x20000000, MEM_READ = 0x40000000, - MEM_WRITE = 0x80000000, - ), - - OnDemandPointer(lambda ctx: ctx.raw_data_pointer, - HexDumpAdapter(Field("raw_data", lambda ctx: ctx.raw_data_size)) + MEM_WRITE = 0x80000000, ), - - OnDemandPointer(lambda ctx: ctx.line_numbers_pointer, - Array(lambda ctx: ctx.number_of_line_numbers, - Struct("line_numbers", - ULInt32("type"), - ULInt16("line_number"), + + "raw_data" / Pointer(this.raw_data_pointer, + Bytes(this.raw_data_size)), + + "line_numbers" / Pointer(this.line_numbers_pointer, + Array(this.number_of_line_numbers, + Struct( + "type" / Int32ul, + "line_number" / Int16ul, ) ) ), - - OnDemandPointer(lambda ctx: ctx.relocations_pointer, - Array(lambda ctx: ctx.number_of_relocations, - Struct("relocations", - ULInt32("virtual_address"), - ULInt32("symbol_table_index"), - ULInt16("type"), + + "relocations" / Pointer(this.relocations_pointer, + Array(this.number_of_relocations, + Struct( + "virtual_address" / Int32ul, + "symbol_table_index" / Int32ul, + "type" / Int16ul, ) ) ), ) -pe32_file = Struct("pe32_file", - # headers - msdos_header, - coff_header, - Anchor("_start_of_optional_header"), - optional_header, - Anchor("_end_of_optional_header"), - Padding(lambda ctx: min(0, - ctx.coff_header.optional_header_size - - ctx._end_of_optional_header + - ctx._start_of_optional_header - ) - ), - - # sections - Array(lambda ctx: ctx.coff_header.number_of_sections, section) +pe32_file = "pe32_file" / Struct( + "msdos_header" / msdos_header, + "coff_header" / coff_header, + "_start_of_optional_header" / Tell, + "optional_header" / optional_header, + "_end_of_optional_header" / Tell, + # this code is just weird + Padding(lambda this: min(0, this.coff_header.optional_header_size - this._end_of_optional_header + this._start_of_optional_header)), + "sections" / Array(this.coff_header.number_of_sections, + section), ) - - -if __name__ == "__main__": - print pe32_file.parse_stream(open("../../tests/NOTEPAD.EXE", "rb")) - print pe32_file.parse_stream(open("../../tests/sqlite3.dll", "rb")) - - - - - - - - - - - diff --git a/deprecated_gallery/png.py b/deprecated_gallery/png.py new file mode 100644 index 000000000..2b913e311 --- /dev/null +++ b/deprecated_gallery/png.py @@ -0,0 +1,257 @@ +""" +Portable Network Graphics (PNG) file format +Official spec: http://www.w3.org/TR/PNG + +Original code contributed by Robin Munn (rmunn at pobox dot com) +(although the code has been extensively reorganized to meet Construct's +coding conventions) +""" + +from construct import * + +#=============================================================================== +# utils +#=============================================================================== +coord = Struct( + "x" / Int32ub, + "y" / Int32ub, +) + +compression_method = "compression_method" / Enum(Byte, + deflate = 0, +) + +#=============================================================================== +# 11.2.3: PLTE - Palette +#=============================================================================== +plte_info = "plte_info" / Struct( + "num_entries" / Computed(this._.length // 3), + "palette_entries" / Array(this.num_entries, Byte[3]), +) + +#=============================================================================== +# 11.2.4: IDAT - Image data +#=============================================================================== +idat_info = "idat_info" / Bytes(this.length) + +#=============================================================================== +# 11.3.2.1: tRNS - Transparency +#=============================================================================== +trns_info = "trns_info" / Switch(this._.image_header.color_type, + { + "greyscale": Int16ub, + "truecolor": Int16ub[3], + "indexed": Array(this.length, Byte), + } +) + +#=============================================================================== +# 11.3.3.1: cHRM - Primary chromacities and white point +#=============================================================================== +chrm_info = "chrm_info" / Struct( + "white_point" / coord, + "rgb" / coord[3], +) + +#=============================================================================== +# 11.3.3.2: gAMA - Image gamma +#=============================================================================== +gama_info = "gama_info" / Struct( + "gamma" / Int32ub, +) + +#=============================================================================== +# 11.3.3.3: iCCP - Embedded ICC profile +#=============================================================================== +iccp_info = "iccp_info" / Struct( + "name" / CString("utf8"), + compression_method, + "compressed_profile" / Bytes(this._.length - (len_(this.name) + 2)), +) + +#=============================================================================== +# 11.3.3.4: sBIT - Significant bits +#=============================================================================== +sbit_info = "sbit_info" / Switch(this._.image_header.color_type, + { + "greyscale": Byte, + "truecolor": Byte[3], + "indexed": Byte[3], + "greywithalpha": Byte[2], + "truewithalpha": Byte[4], + } +) + +#=============================================================================== +# 11.3.3.5: sRGB - Standard RPG color space +#=============================================================================== +srgb_info = "rendering_intent" / Enum(Byte, + perceptual = 0, + relative_colorimetric = 1, + saturation = 2, + absolute_colorimetric = 3, +) + +#=============================================================================== +# 11.3.4.3: tEXt - Textual data +#=============================================================================== +text_info = "text_info" / Struct( + "keyword" / CString("utf8"), + "text" / Bytes(this._.length - (len_(this.keyword) + 1)), +) + +#=============================================================================== +# 11.3.4.4: zTXt - Compressed textual data +#=============================================================================== +ztxt_info = "ztxt_info" / Struct( + "keyword" / CString("utf8"), + compression_method, + # As with iCCP, length is chunk length, minus length of + # keyword, minus two: one byte for the null terminator, + # and one byte for the compression method. + "compressed_text" / Bytes(this._.length - (len_(this.keyword) + 2)), +) + +#=============================================================================== +# 11.3.4.5: iTXt - International textual data +#=============================================================================== +itxt_info = "itxt_info" / Struct( + "keyword" / CString("utf8"), + "compression_flag" / Byte, + compression_method, + "language_tag" / CString("utf8"), + "translated_keyword" / CString("utf8"), + "text" / Bytes(this._.length - (len_(this.keyword) + len_(this.language_tag) + len_(this.translated_keyword) + 5)), +) + +#=============================================================================== +# 11.3.5.1: bKGD - Background color +#=============================================================================== +bkgd_info = "bkgd_info" / Switch(this._.image_header.color_type, + { + "greyscale": Int16ub[1], + "greywithalpha": Int16ub[1], + "truecolor": Int16ub[3], + "truewithalpha": Int16ub[3], + "indexed": Int8ub, + } +) + +#=============================================================================== +# 11.3.5.2: hIST - Image histogram +#=============================================================================== +hist_info = "frequency" / Array(this._.length / 2, Int16ub) + +#=============================================================================== +# 11.3.5.3: pHYs - Physical pixel dimensions +#=============================================================================== +phys_info = "phys_info" / Struct( + "pixels_per_unit_x" / Int32ub, + "pixels_per_unit_y" / Int32ub, + "unit" / Enum(Byte, + unknown = 0, + meter = 1, + ), +) + +#=============================================================================== +# 11.3.5.4: sPLT - Suggested palette +#=============================================================================== +def splt_info_data_length(ctx): + entry_size = 6 if ctx.sample_depth == 8 else 10 + return (ctx._.length - len(ctx.name) - 2) // entry_size + +splt_info = "data" / Struct( + "name" / CString("utf8"), + "sample_depth" / Byte, + "table" / Array(splt_info_data_length, + IfThenElse(this.sample_depth == 8, + # Sample depth 8 + Struct( + "rgb" / Byte[3], + "alpha" / Byte, + "frequency" / Int16ub, + ), + # Sample depth 16 + Struct( + "rgb" / Byte[3], + "alpha" / Int16ub, + "frequency" / Int16ub, + ), + ), + ), +) + +#=============================================================================== +# 11.3.6.1: tIME - Image last-modification time +#=============================================================================== +time_info = "time_info" / Struct( + "year" / Int16ub, + "month" / Byte, + "day" / Byte, + "hour" / Byte, + "minute" / Byte, + "second" / Byte, +) + +#=============================================================================== +# chunks +#=============================================================================== +default_chunk_info = Bytes(this.length) + +chunk = Struct( + "length" / Int32ub, + "type" / Bytes(4), + "data" / Switch(this.type, + { + b"PLTE" : plte_info, + b"IEND" : Pass, + b"IDAT" : idat_info, + b"tRNS" : trns_info, + b"cHRM" : chrm_info, + b"gAMA" : gama_info, + b"iCCP" : iccp_info, + b"sBIT" : sbit_info, + b"sRGB" : srgb_info, + b"tEXt" : text_info, + b"zTXt" : ztxt_info, + b"iTXt" : itxt_info, + b"bKGD" : bkgd_info, + b"hIST" : hist_info, + b"pHYs" : phys_info, + b"sPLT" : splt_info, + b"tIME" : time_info, + }, + default = default_chunk_info, + ), + "crc" / Int32ub, +) + +image_header_chunk = Struct( + "length" / Int32ub, + "signature" / Const(b"IHDR"), + "width" / Int32ub, + "height" / Int32ub, + "bit_depth" / Byte, + "color_type" / Enum(Byte, + greyscale = 0, + truecolor = 2, + indexed = 3, + greywithalpha = 4, + truewithalpha = 6, + ), + compression_method, + # "adaptive filtering with five basic filter types" + "filter_method" / Enum(Byte, adaptive5 = 0), + "interlace_method" / Enum(Byte, none = 0, adam7 = 1), + "crc" / Int32ub, +) + +#=============================================================================== +# the complete PNG file +#=============================================================================== +png_file = "png" / Struct( + "signature" / Const(b"\x89PNG\r\n\x1a\n"), + "image_header" / image_header_chunk, + "chunks" / GreedyRange(chunk), +) diff --git a/deprecated_gallery/snoop.py b/deprecated_gallery/snoop.py new file mode 100644 index 000000000..b6a25adb0 --- /dev/null +++ b/deprecated_gallery/snoop.py @@ -0,0 +1,50 @@ +""" +what : snoop v2 capture file. + how : http://tools.ietf.org/html/rfc1761 + who : jesse@housejunkie.ca +""" + +from construct import * +import time + + +# use core Timestamp +class EpochTimeStampAdapter(Adapter): + """ Convert epoch timestamp <-> localtime """ + def _decode(self, obj, context, path): + return time.ctime(obj) + def _encode(self, obj, context, path): + return int(time.mktime(time.strptime(obj))) + +packet_record = Struct( + "original_length" / Int32ub, + "included_length" / Int32ub, + "record_length" / Int32ub, + "cumulative_drops" / Int32ub, + "timestamp_seconds" / EpochTimeStampAdapter(Int32ub), + "timestamp_microseconds" / Int32ub, + "data" / Bytes(this.included_length), + # 24 being the static length of the packet_record header + Padding(this.record_length - this.included_length - 24), +) + +datalink_type = Enum(Int32ub, + IEEE802dot3 = 0, + IEEE802dot4 = 1, + IEEE802dot5 = 2, + IEEE802dot6 = 3, + ETHERNET = 4, + HDLC = 5, + CHARSYNC = 6, + IBMCHANNEL = 7, + FDDI = 8, + OTHER = 9, + UNASSIGNED = 10, +) + +snoop_file = Struct( + "signature" / Const(b"snoop\x00\x00\x00"), + "version" / Int32ub, # snoop v1 is deprecated + "datalink" / datalink_type, + "records" / GreedyRange(packet_record), +) diff --git a/construct/formats/graphics/wmf.py b/deprecated_gallery/wmf.py similarity index 77% rename from construct/formats/graphics/wmf.py rename to deprecated_gallery/wmf.py index 55e79ddbd..7971128fd 100644 --- a/construct/formats/graphics/wmf.py +++ b/deprecated_gallery/wmf.py @@ -1,12 +1,13 @@ """ Windows Meta File """ + from construct import * -wmf_record = Struct("records", - ULInt32("size"), # size in words, including the size, function and params - Enum(ULInt16("function"), +wmf_record = Struct( + "size" / Int32ul, # size in words, including the size, function and params + "function" / Enum(Int16ul, AbortDoc = 0x0052, Aldus_Header = 0x0001, AnimatePalette = 0x0436, @@ -91,39 +92,38 @@ StretchBlt = 0x0B23, StretchDIB = 0x0F43, TextOut = 0x0521, - _default_ = Pass, ), - Array(lambda ctx: ctx.size - 3, ULInt16("params")), + "params" / Array(this.size - 3, Int16ul), ) -wmf_placeable_header = Struct("placeable_header", - Const(ULInt32("key"), 0x9AC6CDD7), - ULInt16("handle"), - SLInt16("left"), - SLInt16("top"), - SLInt16("right"), - SLInt16("bottom"), - ULInt16("units_per_inch"), +wmf_placeable_header = Struct( + "key" / Const(0x9AC6CDD7, Int32ul), + "handle" / Int16ul, + "left" / Int16sl, + "top" / Int16sl, + "right" / Int16sl, + "bottom"/ Int16sl, + "units_per_inch"/ Int16ul, Padding(4), - ULInt16("checksum") + "checksum" / Int16ul, ) -wmf_file = Struct("wmf_file", +wmf_file = Struct( # --- optional placeable header --- - Optional(wmf_placeable_header), + "placeable_header" / Optional(wmf_placeable_header), # --- header --- - Enum(ULInt16("type"), + "type" / Enum(Int16ul, InMemory = 0, File = 1, ), - Const(ULInt16("header_size"), 9), - ULInt16("version"), - ULInt32("size"), # file size is in words - ULInt16("number_of_objects"), - ULInt32("size_of_largest_record"), - ULInt16("number_of_params"), + "header_size" / Const(9, Int16ul), + "version" / Int16ul, + "size" / Int32ul, # file size is in words + "number_of_objects" / Int16ul, + "size_of_largest_record" / Int32ul, + "number_of_params" / Int16ul, # --- records --- - GreedyRange(wmf_record) + "records" / GreedyRange(wmf_record) ) diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 000000000..d54e8cf68 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +_build diff --git a/docs/_static/arkadiusz-120.png b/docs/_static/arkadiusz-120.png new file mode 100644 index 000000000..667451fcc Binary files /dev/null and b/docs/_static/arkadiusz-120.png differ diff --git a/docs/_static/construct-logo1.png b/docs/_static/construct-logo1.png new file mode 100644 index 000000000..bb7dfbff1 Binary files /dev/null and b/docs/_static/construct-logo1.png differ diff --git a/docs/_static/construct-logo2.png b/docs/_static/construct-logo2.png new file mode 100644 index 000000000..a53b856e8 Binary files /dev/null and b/docs/_static/construct-logo2.png differ diff --git a/docs/_static/construct-logo3.png b/docs/_static/construct-logo3.png new file mode 100644 index 000000000..10c88aabc Binary files /dev/null and b/docs/_static/construct-logo3.png differ diff --git a/docs/_static/corbin-100.png b/docs/_static/corbin-100.png new file mode 100644 index 000000000..ad8b05255 Binary files /dev/null and b/docs/_static/corbin-100.png differ diff --git a/docs/_static/logo-sketch-120.png b/docs/_static/logo-sketch-120.png new file mode 100644 index 000000000..aea2bc264 Binary files /dev/null and b/docs/_static/logo-sketch-120.png differ diff --git a/docs/_static/logo-sketch.png b/docs/_static/logo-sketch.png new file mode 100644 index 000000000..6c1435de0 Binary files /dev/null and b/docs/_static/logo-sketch.png differ diff --git a/docs/_static/tomer-57.png b/docs/_static/tomer-57.png new file mode 100644 index 000000000..c7ada5ba5 Binary files /dev/null and b/docs/_static/tomer-57.png differ diff --git a/docs/adapters-api.rst b/docs/adapters-api.rst deleted file mode 100644 index 92ab2dec1..000000000 --- a/docs/adapters-api.rst +++ /dev/null @@ -1,5 +0,0 @@ -================================== -``construct.adapters`` -- Adapters -================================== - -.. automodule:: construct.adapters diff --git a/docs/adapters.rst b/docs/adapters.rst index bd85e6d0f..6cf786aa8 100644 --- a/docs/adapters.rst +++ b/docs/adapters.rst @@ -1,164 +1,122 @@ +======================= +Adapters and Validators +======================= + Adapting -======== - -Adapting is the process of converting one representation of an object to -another. One representation is usually "lower" (closer to the byte level), and -the other "higher" (closer to the python object model). The process of -converting the lower representation to the higher one is called decoding, and -the process of converting the higher level representation to the lower one is -called encoding. Encoding and decoding are expected to be symmetrical, so that -they counter-act each other (``encode(decode(x)) == x`` and ``decode(encode(x)) -== x``). - -Custom adapter classes derive of the abstract Adapter class, and implement -their own versions of ``_encode`` and ``_decode``, as shown below: - ->>> class IpAddressAdapter(Adapter): -... def _encode(self, obj, context): -... return "".join(chr(int(b)) for b in obj.split(".")) -... def _decode(self, obj, context): -... return ".".join(str(ord(b)) for b in obj) -... - - -As you can see, the IpAddressAdapter encodes strings of the format -"XXX.XXX.XXX.XXX" to a binary string of 4 bytes, and decodes such binary -strings into the more readable "XXX.XXX.XXX.XXX" format. Also note that the -adapter does not perform any manipulation of the stream, it only converts the -object! - -This is called separation of concern, and is a key feature of -component-oriented programming. It allows us to keep each component very -simple and unaware of its consumers. Whenever we need a different -representation of the data, we don't need to write a new Construct -- we only -write the suitable adapter. +============== + +Adapting is the process of converting one representation of an object into another. One representation is usually "lower" (closer to the byte level), and the other is "higher" (closer to the python object model). The process of converting the lower representation to the higher one is called decoding, and the process of converting the higher level representation to the lower one is called encoding. Encoding and decoding are expected to be symmetrical, so that they counter-act each other ``encode(decode(x)) == x`` and ``decode(encode(x)) == x``. + +Custom adapter classes derive from the abstract ``Adapter`` class, and implement their own versions of ``_decode`` and ``_encode``, as shown below: + +:: + + class IpAddressAdapter(Adapter): + def _decode(self, obj, context, path): + return ".".join(map(str, obj)) + + def _encode(self, obj, context, path): + return list(map(int, obj.split("."))) + + IpAddress = IpAddressAdapter(Byte[4]) + +As you can see, the IpAddressAdapter encodes a string of the format ``"XXX.XXX.XXX.XXX"`` to a list of 4 integers like ``[XXX, XXX, XXX, XXX]``. This representation then gets handed over to ``Array(4, Byte)`` which turns it into bytes. + +Note that the adapter does not perform any manipulation of the stream, it only converts between objects! + +:: + + class Adapter(Subconstruct): + def _parse(self, stream, context, path): + return self._decode(self.subcon._parse(stream, context, path), context, path) + + def _build(self, obj, stream, context, path): + return self.subcon._build(self._encode(obj, context, path), stream, context, path) + +This is called separation of concern, and is a key feature of component-oriented programming. It allows us to keep each component very simple and unaware of its consumers. Whenever we need a different representation of the data, we don't need to write a new construct -- usually we only write the suitable adapter. So, let's see our adapter in action: ->>> IpAddressAdapter(Bytes("foo", 4)).parse("\x01\x02\x03\x04") +>>> IpAddress.parse(b"\x01\x02\x03\x04") '1.2.3.4' ->>> IpAddressAdapter(Bytes("foo", 4)).build("192.168.2.3") -'\xc0\xa8\x02\x03' - - -We can also use macro functions, to bind an adapter to a construct, instead of -having to do so manually every time: - ->>> def IpAddress(name): -... return IpAddressAdapter(Bytes(name, 4)) -... ->>> IpAddress("foo").build("10.0.0.1") -'\n\x00\x00\x01' - - -Having the representation separated from the actual parsing or building means -an adapter is loosely coupled with its underlying construct. As we'll see with -enums in a moment, we can use the same enum for ``UBInt8``, ``SLInt32``, or -``LFloat64``, etc., as long as the underlying construct returns an object we -can map. Moreover, we can stack several adapters on top of one another, to -created a nested adapter. - -Enums ------ - -Enums provide symmetrical name-to-value mapping. The name may be misleading, -as it's not an enumeration as you would expect in C. But since enums in C are -often just used as a collection of named values, we'll stick with the name. -Hint: enums are implemented by the ``MappingAdapter``, which provides mapping -of values to other values (not necessarily names to numbers). - ->>> c = Enum(Byte("protocol"), -... TCP = 6, -... UDP = 17, -... ) ->>> c - - ->>> # parsing ->>> c.parse("\x06") -'TCP' ->>> c.parse("\x11") -'UDP' ->>> c.parse("\x12") -Traceback (most recent call last): - . - . -construct.adapters.MappingAdapterError: undefined mapping for 18 - ->>> # building ->>> c.build("TCP") -'\x06' ->>> c.build("UDP") -'\x11' ->>> - - -We can also supply a default mapped value when no mapping exists for them. We -do this by supplying a keyword argument named ``_default_`` (a single uderscore -on each side). If we don't supply a default value, an exception is raised (as -we saw in the previous snippet). - ->>> c = Enum(Byte("protocol"), -... TCP = 6, -... UDP = 17, -... _default_ = "blah" -... ) ->>> c.parse("\x11") -'UDP' ->>> c.parse("\x12") # no mapping for 18, so default to "blah" -'blah' ->>> - - -We can also just "pass through" unmapped values. We do this by supplying -``_default_ = Pass``. If you are curious, ``Pass`` is a special construct that -"does nothing"; in this context, we use it to indicate the Enum to "pass -through" the unmapped value as-is. - ->>> c = Enum(Byte("protocol"), -... TCP = 6, -... UDP = 17, -... _default_ = Pass -... ) ->>> c.parse("\x11") -'UDP' ->>> c.parse("\x12") # no mapping, passing through -18 ->>> c.parse("\xff") # no mapping, passing through -255 - - -When we wish to use the same enum multiple times, we will use a simple macro -function. This keeps us conforming to the Don't Repeat Yourself principle: - ->>> def ProtocolEnum(subcon): -... return Enum(subcon, -... ICMP = 1, -... TCP = 6, -... UDP = 17, -... ) -... ->>> ProtocolEnum(UBInt8("foo")).parse("\x06") -'TCP' ->>> ProtocolEnum(UBInt16("foo")).parse("\x00\x06") -'TCP' ->>> - - -Validating -========== - -Validating means making sure the parsed/built object meets a given condition. -Validators simply raise an exception (``ValidatorError``) if the object is -invalid. The two most common cases already exist as builtins. - -Validators are usually used to make sure a "magic number" is found, the -correct version of the protocol, a file signature is matched, etc. You can -write custom validators by deriving from the Validator class and implementing -the ``_validate`` method; this allows you to write validators for more complex -things, such as making sure a CRC field (or even a cryptographic hash) is -correct, etc. - -.. autoclass:: construct.NoneOf - -.. autoclass:: construct.OneOf +>>> IpAddress.build("192.168.2.3") +b'\xc0\xa8\x02\x03' + +Having the representation separated from the actual parsing or building means an adapter is loosely coupled with its underlying construct. As with enums for example, we can use the same enum for ``Byte`` or ``Int32sl`` or ``VarInt``, as long as the underlying construct returns an object that we can map. Moreover, we can stack several adapters on top of one another, to create a nested adapter. + + +Using expressions instead of classes +------------------------------------ + +Adaters can be created declaratively using ``ExprAdapter``. Note that this construction is not recommended, unless its much cleaner than ``Adapter``. Use can use ``obj_`` expression to generate lambdas that operate on the object passed around. + +For example, month in object model might be `1..12` but data format saves it as `0..11`. + +:: + + >>> d = ExprAdapter(Byte, obj_+1, obj_-1) + >>> d.parse(b'\x04') + 5 + >>> d.build(5) + b'\x04' + +Or another example, where some of the bits are unset in both parsed/built objects: + +:: + + >>> d = ExprSymmetricAdapter(Byte, obj_ & 0b00001111) + >>> d.parse(b"\xff") + 15 + >>> d.build(255) + b'\x0f' + + +Validating and filtering +============================== + +Validating means making sure the parsed/built object meets a given condition. Validators simply raise ``ValidationError`` if the lambda predicate indicates ``False`` when called with the actual object. You can write custom validators by deriving from the ``Validator`` class and implementing the ``_validate`` method. + +:: + + class VersionNumberValidator(Validator): + def _validate(self, obj, context, path): + return obj in [1,2,3] + + VersionNumber = VersionNumberValidator(Byte) + +:: + + >>> VersionNumber.build(3) + b'\x03' + >>> VersionNumber.build(88) + ValidationError: Error in path (building) + object failed validation: 88 + +For reference, this is how it works under the hood (in core library): + +:: + + class Validator(SymmetricAdapter): + def _decode(self, obj, context, path): + if not self._validate(obj, context, path): + raise ValidationError("object failed validation: %s" % (obj,)) + return obj + + + +Using expressions instead of classes +------------------------------------ + +Validators can also be created declaratively using ``ExprValidator``. Unfortunately ``obj_`` expression does not work with ``in`` (contains) operator, nor with ``and or not`` logical operators. But it still has the advantage that it can be declared inlined. ``Adapter`` and ``Validator`` derived classes cannot be inlined inside a ``Struct``. + +For example, if 7 out of 8 bits are not allowed to be set (like a flag boolean): + +:: + + >>> d = ExprValidator(Byte, obj_ & 0b11111110 == 0) + >>> d.build(1) + b'\x01' + >>> d.build(88) + ValidationError: Error in path (building) + object failed validation: 88 + diff --git a/docs/advanced.rst b/docs/advanced.rst new file mode 100644 index 000000000..9576354b1 --- /dev/null +++ b/docs/advanced.rst @@ -0,0 +1,220 @@ +============ +The Basics, part 2 +============ + + +Integers and floats +=================== + +Basic computer science 101. All integers follow the ``Int{8,16,24,32,64}{u,s}{b,l,n}`` and floats follow the ``Float{16,32,64}{b,l}`` naming patterns. Endianness can be either big-endian, little-endian or native. Integers can be signed or unsigned (non-negative only). Floats do not have a unsigned type. + +>>> Int64sl.build(500) +b'\xf4\x01\x00\x00\x00\x00\x00\x00' +>>> Int64sl.build(-23) +b'\xe9\xff\xff\xff\xff\xff\xff\xff' + +Few fields have aliases, ``Byte`` among integers and ``Single`` among floats. + +:: + + Byte <--> Int8ub + Short <--> Int16ub + Int <--> Int32ub + Long <--> Int64ub + Half <--> Float16b + Single <--> Float32b + Double <--> Float64b + +Integers can also be variable-length encoded for compactness. Google invented a popular encoding: + +>>> VarInt.build(127) +b'\x7f' +>>> VarInt.build(1234567890) +b'\xd2\x85\xd8\xcc\x04' + +Signed integers can also be variable-length encoded using an encoding similar to ``VarInt``. Also from Google: + +>>> ZigZag.build(-3) +b'\x05' +>>> ZigZag.build(3) +b'\x06' + +Long integers (or those of particularly odd sizes) can be encoded using a ``BytesInteger``. Here is a 128-bit integer. + +>>> BytesInteger(16).build(255) +b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff' + +Some numerical classes, those being ``FormatField``, are implemented using builtin ``struct`` module: + +>>> d = FormatField("<", "l") +>>> d.build(1) +b'\x01\x00\x00\x00' + + +Bytes and bits +============== + +.. warning:: + + Unprefixed string literals like "data" are on Python 3 interpreted as unicode. This causes failures when using fields like ``Bytes``. + +``bytes`` objects can be moved around as-is. Bits are discussed in a later chapter. + +>>> Bytes(5).build(b"12345") +b'12345' +>>> Bytes(5).parse(b"12345") +b'12345' + +``Bytes`` can also be consumed until end of stream. Tunneling is discussed in a later chapter. + +>>> GreedyBytes.parse(b"39217839219...") +b'39217839219...' + +You can also build from ``bytearray`` objects: + +>>> GreedyBytes.build(bytearray(b'12345')) +b'12345' +>>> Bytes(5).build(bytearray(b'12345')) +b'12345' + + +Strings +======== + +.. note:: + + Encodings like UTF8, UTF16, UTF32 (including little-endian) work fine with all ``String*`` classes. However two of them, ``PaddedString`` and ``CString``, support only encodings listed explicitly in :class:`~construct.core.possiblestringencodings` . + +``PaddedString`` is a fixed-length construct that pads built string with null bytes, and strips those same null bytes when parsing. Strings can also be trimmed when building. If you supply a too long string, the construct will chop it off apart instead of raising a ``StringError``. + +To be honest, using this class is not recommended. It is provided only for ancient data formats. + +>>> PaddedString(10, "utf8").build("Афон") +b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00\x00' + +``PascalString`` is a variable length string that is prefixed by a length field. This scheme was invented in Pascal language that put ``Byte`` field instead of C convention of appending null byte at the end. Note that the length field does not need to be Byte, and can also be variable length itself, as shown below. VarInt is recommended when designing new protocols. + +>>> PascalString(VarInt, "utf8").build("Афон") +b'\x08\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd' + +``CString`` is another string representation, that always ends with a null terminating byte at the end. This scheme was invented in C language and is known in the computer science community very well. One of the authors, Kernighan or Ritchie, admitted that it was one of the most regretable design decisions in history. + +>>> CString("utf8").build("hello") +b'hello\x00' + +Last would be ``GreedyString`` which does the same thing as ``GreedyBytes``, plus encoding. It reads until the end of stream and then decodes data using specified encoding. ``Greedy*`` classes are usually used with tunneling constructs, which are discussed in a later chapter. + +>>> GreedyString("utf8").parse(b"329817392189") +'329817392189' + + +Mappings +========== + +Booleans are flags: + +>>> Flag.parse(b"\x01") +True +>>> Flag.build(True) +b'\x01' + +``Enum`` translates between string labels and integer values. Parsing returns an ``EnumIntegerString`` (a sort of string and int at same time, if value has mapping) but returns an integer otherwise. This creates no problem since ``Enum`` can build from string and integer representations just the same. Note that resulting string has a special implementation, so it can be converted into a corresponding integer. + +>>> d = Enum(Byte, one=1, two=2, four=4, eight=8) +>>> d.parse(b"\x01") +EnumIntegerString.new(1, 'one') +>>> int(d.parse(b"\x01")) +1 +>>> str(d.parse(b"\x01")) +"one" +>>> d.parse(b"\xff") +255 + +Note that string values can also be obtained using attribute members. + +>>> d.one +EnumIntegerString.new(1, 'one') +>>> d.build(d.one) +b'\x01' +>>> d.build("one") +b'\x01' +>>> d.build(1) +b'\x01' + +``FlagsEnum`` decomposes an integer value into a set of string labels: + +>>> d = FlagsEnum(Byte, one=1, two=2, four=4, eight=8) +>>> d.parse(b"\x03") +Container(one=True, two=True, four=False, eight=False) +>>> d.build(dict(one=True, two=True)) +b'\x03' + +Note that string values can also be obtained using attribute members. + +>>> d.build(d.one | d.two) +b'\x03' +>>> d.build("one|two") +b'\x03' +>>> d.build(1 | 2) +b'\x03' + +Both ``Enum`` and ``FlagsEnum`` support merging labels from ``IntEnum`` and ``IntFlag`` (``enum`` module): + +:: + + import enum + class E(enum.IntEnum or enum.IntFlag): + one = 1 + two = 2 + + Enum(Byte, E) <--> Enum(Byte, one=1, two=2) + FlagsEnum(Byte, E) <--> FlagsEnum(Byte, one=1, two=2) + +.. warning:: + + Using the enum module is deprecated and should not be used. + +For completeness, there is also ``Mapping`` class, but using it is not recommended. Consider it a last resort. + +:: + + >>> x = 'any hashable object' + >>> d = Mapping(Byte, {x: 0}) + >>> d.build(x) + b'\x00' + >>> d.parse(b"\x00") + 'any hashable object' + + +Processing files +=========================== + +.. warning:: + + Opening a file without mode like ``open(filename)`` implies text mode, which cannot be parsed or built. + +Constructs can parse both in-memory data (``bytes``) and binary files: + +>>> d = Struct(...) +>>> d.parse(bytes(1000)) + +>>> with open('/dev/zero', 'rb') as f: +... d.parse_stream(f) + +>>> d.parse_file('/dev/zero') + + +Documenting fields +======================== + +Top-most structures should have elaborate descriptions, documenting who made them and from what specifications. Individual fields can also have docstrings, but field names should be descriptive, not the docstrings. + +:: + + """ + Full docstring with autor, email, links to RFC-alike pages. + """ * \ + Struct( + "title" / CString("utf8"), + Padding(2) * "reserved, see 8.1", + ) diff --git a/docs/api.rst b/docs/api.rst deleted file mode 100644 index 6bc92bb8c..000000000 --- a/docs/api.rst +++ /dev/null @@ -1,13 +0,0 @@ -=== -API -=== - -This documentation covers Construct's API, as automatically generated from its -code and docstrings. - -.. toctree:: - :maxdepth: 2 - - core - macros - adapters-api diff --git a/docs/api/abstract.rst b/docs/api/abstract.rst new file mode 100644 index 000000000..fe7a8920c --- /dev/null +++ b/docs/api/abstract.rst @@ -0,0 +1,11 @@ +=============================== +Core API: Abstract classes +=============================== + +.. autoclass:: construct.Construct +.. autoclass:: construct.Subconstruct +.. autoclass:: construct.Adapter +.. autoclass:: construct.SymmetricAdapter +.. autoclass:: construct.Validator +.. autoclass:: construct.Tunnel +.. autoclass:: construct.Compiled diff --git a/docs/api/adapters.rst b/docs/api/adapters.rst new file mode 100644 index 000000000..c5a0be9f1 --- /dev/null +++ b/docs/api/adapters.rst @@ -0,0 +1,12 @@ +=============================== +Core API: Adapters and Validators +=============================== + +.. autofunction:: construct.ExprAdapter +.. autofunction:: construct.ExprSymmetricAdapter +.. autofunction:: construct.ExprValidator +.. autofunction:: construct.OneOf +.. autofunction:: construct.NoneOf +.. autofunction:: construct.Filter +.. autofunction:: construct.Slicing +.. autofunction:: construct.Indexing diff --git a/docs/api/align.rst b/docs/api/align.rst new file mode 100644 index 000000000..3c0ecfd04 --- /dev/null +++ b/docs/api/align.rst @@ -0,0 +1,8 @@ +=============================== +Core API: Alignment and Padding +=============================== + +.. autofunction:: construct.Padding +.. autofunction:: construct.Padded +.. autofunction:: construct.Aligned +.. autofunction:: construct.AlignedStruct diff --git a/docs/api/bytes.rst b/docs/api/bytes.rst new file mode 100644 index 000000000..e692adf1b --- /dev/null +++ b/docs/api/bytes.rst @@ -0,0 +1,9 @@ +======================== +Core API: Bytes and bits +======================== + +.. autofunction:: construct.Bytes +.. autofunction:: construct.GreedyBytes +.. autofunction:: construct.setGlobalPrintFullStrings +.. autofunction:: construct.Bitwise +.. autofunction:: construct.Bytewise diff --git a/docs/api/conditional.rst b/docs/api/conditional.rst new file mode 100644 index 000000000..9a863b2d3 --- /dev/null +++ b/docs/api/conditional.rst @@ -0,0 +1,11 @@ +=============================== +Core API: Conditional +=============================== + +.. autofunction:: construct.Union +.. autofunction:: construct.Select +.. autofunction:: construct.Optional +.. autofunction:: construct.If +.. autofunction:: construct.IfThenElse +.. autofunction:: construct.Switch +.. autofunction:: construct.StopIf diff --git a/docs/core.rst b/docs/api/core.rst similarity index 73% rename from docs/core.rst rename to docs/api/core.rst index 53ac855b2..c9c32df40 100644 --- a/docs/core.rst +++ b/docs/api/core.rst @@ -1,5 +1,5 @@ ========================================== -``construct.core`` -- Core data structures +``construct.core`` -- entire module ========================================== .. automodule:: construct.core diff --git a/docs/api/debugging.rst b/docs/api/debugging.rst new file mode 100644 index 000000000..563aa2479 --- /dev/null +++ b/docs/api/debugging.rst @@ -0,0 +1,9 @@ +================================ +Core API: Debugging +================================ + +.. autofunction:: construct.Probe +.. autofunction:: construct.setGlobalPrintFullStrings +.. autofunction:: construct.setGlobalPrintFalseFlags +.. autofunction:: construct.setGlobalPrintPrivateEntries +.. autofunction:: construct.Debugger diff --git a/docs/api/exceptions.rst b/docs/api/exceptions.rst new file mode 100644 index 000000000..1598ef009 --- /dev/null +++ b/docs/api/exceptions.rst @@ -0,0 +1,31 @@ +=============================== +Core API: Exception types +=============================== + +.. autofunction:: construct.ConstructError +.. autofunction:: construct.SizeofError +.. autofunction:: construct.AdaptationError +.. autofunction:: construct.ValidationError +.. autofunction:: construct.StreamError +.. autofunction:: construct.FormatFieldError +.. autofunction:: construct.IntegerError +.. autofunction:: construct.StringError +.. autofunction:: construct.MappingError +.. autofunction:: construct.RangeError +.. autofunction:: construct.RepeatError +.. autofunction:: construct.ConstError +.. autofunction:: construct.IndexFieldError +.. autofunction:: construct.CheckError +.. autofunction:: construct.ExplicitError +.. autofunction:: construct.NamedTupleError +.. autofunction:: construct.TimestampError +.. autofunction:: construct.UnionError +.. autofunction:: construct.SelectError +.. autofunction:: construct.SwitchError +.. autofunction:: construct.StopFieldError +.. autofunction:: construct.PaddingError +.. autofunction:: construct.TerminatedError +.. autofunction:: construct.RawCopyError +.. autofunction:: construct.ChecksumError +.. autofunction:: construct.CancelParsing +.. autofunction:: construct.CipherError diff --git a/docs/api/lazy.rst b/docs/api/lazy.rst new file mode 100644 index 000000000..41e9906aa --- /dev/null +++ b/docs/api/lazy.rst @@ -0,0 +1,8 @@ +=============================== +Core API: Lazy equivalents +=============================== + +.. autofunction:: construct.Lazy +.. autofunction:: construct.LazyStruct +.. autofunction:: construct.LazyArray +.. autofunction:: construct.LazyBound diff --git a/docs/api/lib.rst b/docs/api/lib.rst new file mode 100644 index 000000000..5a73e66d9 --- /dev/null +++ b/docs/api/lib.rst @@ -0,0 +1,5 @@ +========================================== +``construct.lib`` -- entire module +========================================== + +.. automodule:: construct.lib diff --git a/docs/api/mappings.rst b/docs/api/mappings.rst new file mode 100644 index 000000000..8abcff614 --- /dev/null +++ b/docs/api/mappings.rst @@ -0,0 +1,9 @@ +=============================== +Core API: Mappings +=============================== + +.. autofunction:: construct.Flag +.. autofunction:: construct.Enum +.. autofunction:: construct.FlagsEnum +.. autofunction:: construct.setGlobalPrintFalseFlags +.. autofunction:: construct.Mapping diff --git a/docs/api/misc.rst b/docs/api/misc.rst new file mode 100644 index 000000000..05629f06d --- /dev/null +++ b/docs/api/misc.rst @@ -0,0 +1,18 @@ +=============================== +Core API: Miscellaneous +=============================== + +.. autofunction:: construct.Const +.. autofunction:: construct.Computed +.. autofunction:: construct.Index +.. autofunction:: construct.Rebuild +.. autofunction:: construct.Default +.. autofunction:: construct.Check +.. autofunction:: construct.Error +.. autofunction:: construct.FocusedSeq +.. autofunction:: construct.Pickled +.. autofunction:: construct.Numpy +.. autofunction:: construct.NamedTuple +.. autofunction:: construct.Timestamp +.. autofunction:: construct.Hex +.. autofunction:: construct.HexDump diff --git a/docs/api/numerics.rst b/docs/api/numerics.rst new file mode 100644 index 000000000..da70e3ac0 --- /dev/null +++ b/docs/api/numerics.rst @@ -0,0 +1,9 @@ +======================== +Core API: Integers and Floats +======================== + +.. autofunction:: construct.FormatField +.. autofunction:: construct.BytesInteger +.. autofunction:: construct.BitsInteger +.. autofunction:: construct.VarInt +.. autofunction:: construct.ZigZag diff --git a/docs/api/repeaters.rst b/docs/api/repeaters.rst new file mode 100644 index 000000000..6f339f5df --- /dev/null +++ b/docs/api/repeaters.rst @@ -0,0 +1,7 @@ +========================================== +Core API: Repeaters +========================================== + +.. autofunction:: construct.Array +.. autofunction:: construct.GreedyRange +.. autofunction:: construct.RepeatUntil diff --git a/docs/api/special.rst b/docs/api/special.rst new file mode 100644 index 000000000..5ce26a4fa --- /dev/null +++ b/docs/api/special.rst @@ -0,0 +1,5 @@ +=============================== +Core API: Special +=============================== + +.. autofunction:: construct.Renamed diff --git a/docs/api/streaming.rst b/docs/api/streaming.rst new file mode 100644 index 000000000..f22bfebdc --- /dev/null +++ b/docs/api/streaming.rst @@ -0,0 +1,11 @@ +=============================== +Core API: Streaming +=============================== + +.. autofunction:: construct.Pointer +.. autofunction:: construct.Peek +.. autofunction:: construct.OffsettedEnd +.. autofunction:: construct.Seek +.. autofunction:: construct.Tell +.. autofunction:: construct.Pass +.. autofunction:: construct.Terminated diff --git a/docs/api/strings.rst b/docs/api/strings.rst new file mode 100644 index 000000000..a307c0416 --- /dev/null +++ b/docs/api/strings.rst @@ -0,0 +1,10 @@ +=================== +Core API: Strings +=================== + +.. autodata:: construct.core.possiblestringencodings +.. autofunction:: construct.PaddedString +.. autofunction:: construct.PascalString +.. autofunction:: construct.CString +.. autofunction:: construct.GreedyString +.. autofunction:: construct.setGlobalPrintFullStrings diff --git a/docs/api/structs.rst b/docs/api/structs.rst new file mode 100644 index 000000000..23bb3c114 --- /dev/null +++ b/docs/api/structs.rst @@ -0,0 +1,8 @@ +=============================== +Core API: Structs and Sequences +=============================== + +.. autofunction:: construct.Struct +.. autofunction:: construct.Sequence +.. autofunction:: construct.AlignedStruct +.. autofunction:: construct.BitStruct diff --git a/docs/api/tunneling.rst b/docs/api/tunneling.rst new file mode 100644 index 000000000..8b1aab7cb --- /dev/null +++ b/docs/api/tunneling.rst @@ -0,0 +1,23 @@ +=============================== +Core API: Tunneling +=============================== + +.. autofunction:: construct.RawCopy +.. autofunction:: construct.ByteSwapped +.. autofunction:: construct.BitsSwapped +.. autofunction:: construct.Prefixed +.. autofunction:: construct.PrefixedArray +.. autofunction:: construct.FixedSized +.. autofunction:: construct.NullTerminated +.. autofunction:: construct.NullStripped +.. autofunction:: construct.RestreamData +.. autofunction:: construct.Transformed +.. autofunction:: construct.Restreamed +.. autofunction:: construct.ProcessXor +.. autofunction:: construct.ProcessRotateLeft +.. autofunction:: construct.Checksum +.. autofunction:: construct.Compressed +.. autofunction:: construct.CompressedLZ4 +.. autofunction:: construct.EncryptedSym +.. autofunction:: construct.EncryptedSymAead +.. autofunction:: construct.Rebuffered diff --git a/docs/basics.rst b/docs/basics.rst index f6449bea9..7a3e931d6 100644 --- a/docs/basics.rst +++ b/docs/basics.rst @@ -2,257 +2,333 @@ The Basics ========== + Fields ====== -Fields are the most fundamental unit of construction: they **parse** (read -data from the stream and return an object) and **build** (take an object and -write it down onto a stream). There are many kinds of fields, each working -with a different type of data (numeric, boolean, strings, etc.). +Fields are the most fundamental unit of construction: they **parse** (read data from the stream and return an object) and **build** (take an object and write it down onto a stream). There are many kinds of fields, each working with a different type of data (numeric, boolean, strings, etc.). Some examples of parsing: ->>> from construct import UBInt16, ULInt16 ->>> UBInt16("foo").parse("\x01\x02") +>>> from construct import Int16ub, Int16ul +>>> Int16ub.parse(b"\x01\x02") 258 ->>> ULInt16("foo").parse("\x01\x02") +>>> Int16ul.parse(b"\x01\x02") 513 Some examples of building: ->>> from construct import UBInt16, SBInt16 ->>> UBInt16("foo").build(31337) -'zi' ->>> SBInt16("foo").build(-31337) -'\x86\x97' +>>> from construct import Int16ub, Int16sb +>>> Int16ub.build(31337) +b'zi' +>>> Int16sb.build(-31337) +b'\x85\x97' + +Other fields like: + +>>> Flag.parse(b"\x01") +True + +>>> d = Enum(Byte, g=8, h=11).parse(b"\x08") +>>> d.parse(b"\x08") +EnumIntegerString.new(8, 'g') +>>> str(_) +'g' +>>> d.build('g') +b'\x08' +>>> d.build(11) +b'\x0b' + +>>> Float32b.build(12.345) +b'AE\x85\x1f' +>>> Single.parse(_) +12.345000267028809 + + +Variable-length fields +====================== + +>>> VarInt.build(1234567890) +b'\xd2\x85\xd8\xcc\x04' +>>> VarInt.sizeof() +SizeofError: Error in path (sizeof) + +Fields are sometimes fixed size and some composites behave differently when they are composed of those. Keep that detail in mind. Classes that cannot determine size always raise ``SizeofError`` in response. There are few classes where same instance may return an integer or raise ``SizeofError`` depending on circumstances. Array size depends on whether count of elements is constant (can be a context lambda) and subcon is fixed size (can be variable size). For example, many classes take context lambdas and ``SizeofError`` is raised if the key is missing from the context dictionary. + +>>> Int16ub[2].sizeof() +4 +>>> VarInt[2].sizeof() +SizeofError: Error in path (sizeof) + Structs ======= -For those of you familiar with C, Structs are very intuitive, but here's a -short explanation for the larger audience. A Struct is a sequenced collection -of fields or other components, that are parsed/built in that order. Note that -if two or more fields of a Struct have the same name, the last field "wins;" -that is, the last field's value will be the value returned from a parse. - ->>> from construct import Struct, UBInt8, SLInt16, LFloat32 ->>> c = Struct("foo", -... UBInt8("a"), -... SLInt16("b"), -... LFloat32("c"), +For those of you familiar with C, Structs are very intuitive, but here's a short explanation for the larger audience. A ``Struct`` is a collection of ordered and usually named fields (field means an instance of ``Construct`` class), that are parsed/built in that same order. Names are used for two reasons: (1) when parsed, values are returned in a dictionary where keys are matching the names, and when build, each field gets built with a value taken from a dictionary from a matching key (2) fields' parsed and built values are inserted into the context dictionary under matching names. + +>>> d = Struct( +... "signature" / Const(b"BMP"), +... "width" / Int8ub, +... "height" / Int8ub, +... "pixels" / Array(this.width * this.height, Byte), +... ) +>>> d.build(dict(width=3, height=2, pixels=[7,8,9,11,12,13])) +b'BMP\x03\x02\x07\x08\t\x0b\x0c\r' +>>> d.parse(b'BMP\x03\x02\x07\x08\t\x0b\x0c\r') +Container(signature=b'BMP', width=3, height=2, pixels=ListContainer([7, 8, 9, 11, 12, 13])) + +Usually members are named but there are some classes that build from nothing and return nothing on parsing, so they have no need for a name (they can stay anonymous). Duplicated names within same struct can have unknown side effects. + +>>> d = Struct( +... Const(b"XYZ"), +... Padding(2), +... Pass, +... Terminated, ... ) ->>> c - ->>> c.parse("\x07\x00\x01\x00\x00\x00\x01") -Container(a = 7, b = 256, c = 2.350988701644575e-038) +>>> d.build(dict()) +b'XYZ\x00\x00' +>>> d.parse(_) +Container() + +There is another declaration syntax that uses keyword arguments. Truth be told, I am not keen on using this way of declaring Structs. You should use the ``/`` operator as shown in first example. + +>>> Struct(a=Byte, b=Byte, c=Byte, d=Byte) + +Operator ``+`` can also be used to make Structs. Structs are nested when added. Truth be told, I am not keen on using this way of declaring Structs either. + +>>> d = "a"/Byte + "inner"/Struct("b"/Byte) + "c"/Byte + Containers ---------- -What *is* that Container object, anyway? Well, a Container is a regular Python -dictionary. It provides pretty-printing and accessing items as attributes, in -addition to the normal facilities of dictionaries. Let's see more of those: - ->>> x = c.parse("\x07\x00\x01\x00\x00\x00\x01") ->>> x -Container(a = 7, b = 256, c = 2.350988701644575e-038) ->>> x.a -7 ->>> x.b -256 ->>> print x +What is that ``Container`` object, anyway? Well, a ``Container`` is a subclass of ``dict``. They provide pretty-printing and allow to access items as attributes as well as keys, and they also preserve insertion order. ``ListContainer``, similarly, is a subclass of ``list``. Both ``Container`` and ``ListContainer`` provide searching functionality. Let's see more of those: + +>>> d = Struct("float" / Single) +>>> x = d.parse(b"\x00\x00\x00\x01") +>>> x.float +1.401298464324817e-45 +>>> x["float"] +1.401298464324817e-45 +>>> x # REPL uses repr(x) +Container(float=1.401298464324817e-45) +>>> print(x) # print uses str(x) Container: - a = 7 - b = 256 - c = 2.350988701644575e-038 - -Building --------- - -And here is how we build Structs: - ->>> # Rebuild the parsed object. ->>> c.build(x) -'\x07\x00\x01\x00\x00\x00\x01' - ->>> # Mutate the parsed object and build... ->>> x.b = 5000 ->>> c.build(x) -'\x07\x88\x13\x00\x00\x00\x01' - ->>> # ...Or, we can create a new container. ->>> c.build(Container(a = 9, b = 1234, c = 56.78)) -'\t\xd2\x04\xb8\x1ecB' - -.. note:: - - Building is fully duck-typed and can be done with any object. - ->>> class Foo(object): pass -... ->>> f = Foo() ->>> f.a = 1 ->>> f.b = 2 ->>> f.c = 3 ->>> c.build(f) -'\x01\x02\x00\x00\x00@@' - -Nested ------- - -Structs can be nested. Structs can contain other Structs, as well as any -construct. Here's how it's done: - ->>> c = Struct("foo", -... UBInt8("a"), -... UBInt16("b"), -... Struct("bar", -... UBInt8("a"), -... UBInt16("b"), -... ) + float = 1.401298464324817e-45 + +As you can see, Containers provide human-readable representation of the data when printed, which is very important. By default, it truncates byte-strings and unicode-strings and hides ``EnumFlags`` unset flags (false values). If you would like a full print, you can use these functions: + +>>> setGlobalPrintFalseFlags(True) +>>> setGlobalPrintFullStrings(True) +>>> setGlobalPrintPrivateEntries(True) + +Thanks to blapid, containers can also be searched. Structs nested within Structs return containers within containers on parsing. One can search the entire tree of dicts for a particular name. Regular expressions are supported. + +>>> x = Container(Container(a=1,d=Container(a=2))) +>>> x.search("a") +1 +>>> x.search_all("a") +[1, 2] + +Note that not all parameters can be accessed via attribute access (dot operator). If the name of an item matches a method name of the ``Container``, it can only be accessed via key access (square brackets operator). This includes the following names: ``clear``, ``copy``, ``fromkeys``, ``get``, ``items``, ``keys``, ``move_to_end``, ``pop``, ``popitem``, ``search``, ``search_all``, ``setdefault``, ``update``, ``values``. + +>>> x = Container(update=5) +>>> x["update"] +5 +>>> x.update # not usable via dot access + + + +Nesting and embedding +--------------------- + +Structs can be nested. Structs can contain other Structs, as well as any other constructs. Here's how it's done: + +>>> d = Struct( +... "inner" / Struct( +... "data" / Bytes(4), +... ), ... ) ->>> x = c.parse("ABBabb") ->>> x -Container(a = 65, b = 16962, bar = Container(a = 97, b = 25186)) ->>> print x +>>> d.parse(b"1234") +Container(inner=Container(data=b'1234')) +>>> print(_) Container: - a = 65 - b = 16962 - bar = Container: - a = 97 - b = 25186 ->>> x.a -65 ->>> x.bar -Container(a = 97, b = 25186) ->>> x.bar.b -25186 - -As you can see, Containers provide human-readable representations of the data, -which is very important for large data structures. - -Embedding ---------- - -A Struct can be embedded into an enclosing Struct. This means all the fields -of the embedded Struct will be merged into the fields of the enclosing Struct. -This is useful when you want to split a big Struct into multiple parts, and -then combine them all into one Struct. - ->>> foo = Struct("foo", -... UBInt8("a"), -... UBInt8("b"), -... ) ->>> bar = Struct("bar", -... foo, # This Struct is not embedded. -... UBInt8("c"), -... UBInt8("d"), -... ) ->>> bar2= Struct("bar", -... Embed(foo), # This Struct is embedded. -... UBInt8("c"), -... UBInt8("d"), -... ) ->>> bar.parse("abcd") -Container(c = 99, d = 100, foo = Container(a = 97, b = 98)) ->>> bar2.parse("abcd") -Container(a = 97, b = 98, c = 99, d = 100) + inner = Container: + data = b'1234' (total 4) + +It used to be that Structs could have been embedded (flattened out). However, this created more problems than it solved so this feature was eventually removed. Since Construct 2.10 it is no longer possible to embed structs. You should, and always should have been, be nesting them just like in the example above. + + +Showing path information in exceptions +---------------------------------------- + +If your construct throws an exception, for any reason, there should be a "path information" attached to it. In the example below, the ``(parsing) -> a -> b -> c -> foo`` field throws an exception due to lack of bytes to consume. You can see that in the exception message. + +:: + + >>> x = Struct( + ... 'foo' / Bytes(1), + ... 'a' / Struct( + ... 'foo' / Bytes(1), + ... 'b' / Struct( + ... 'foo' / Bytes(1), + ... 'c' / Struct( + ... 'foo' / Bytes(1), + ... 'bar' / Bytes(1) + ... ) + ... ) + ... ) + ... ) + >>> x.parse(b'\xff' * 3) + construct.core.StreamError: Error in path (parsing) -> a -> b -> c -> foo + stream read less than specified amount, expected 1, found 0 + +Note that compiled parsing classes may not provide a path information. + + +Hidden context entries +---------------------- + +There are few additional, hidden entries in the context dictionary. They are mostly used internally so they are not very well documented. + +:: + + >>> d = Struct( + ... 'x' / Computed(1), + ... 'inner' / Struct( + ... 'inner2' / Struct( + ... 'x' / Computed(this._root.x), + ... 'z' / Computed(this._params.z), + ... 'zz' / Computed(this._root._.z), + ... ), + ... ), + ... Probe(), + ... ) + >>> setGlobalPrintPrivateEntries(True) + >>> d.parse(b'', z=2) + -------------------------------------------------- + Probe, path is (parsing), into is None + Container: + _ = Container: + z = 2 + _parsing = True + _building = False + _sizing = False + _params = + _params = Container: + z = 2 + _parsing = True + _building = False + _sizing = False + _params = + _root = + _parsing = True + _building = False + _sizing = False + _subcons = Container: + x = > + inner = > + _io = <_io.BytesIO object at 0x7fd91e7313b8> + _index = None + x = 1 + inner = Container: + _io = <_io.BytesIO object at 0x7fd91e7313b8> + inner2 = Container: + _io = <_io.BytesIO object at 0x7fd91e7313b8> + x = 1 + z = 2 + zz = 2 + -------------------------------------------------- + Container(x=1, inner=Container(inner2=Container(x=1, z=2, zz=2))) + + +Explanation is as follows: + +* ``_`` means up-level in the context stack, every Struct does context nesting +* ``_params`` is the level on which externally provided values reside, those passed as parse() and build() keyword arguments +* ``_root`` is the outer-most Struct, this entry might not exist if you do not use Structs +* ``_parsing``, ``_building`` and ``_sizing`` are boolean values that are set by ``parse``, ``build`` and ``sizeof`` public API methods +* ``_subcons`` is a list of ``Construct`` instances, this ``Struct`` members +* ``_io`` is a memory-stream or file-stream or whatever was provided to ``parse_stream`` public API method +* ``_index`` is an indexing number used eg. in ``Array`` +* (parsed members are also added under matching names) Sequences ========= -Sequences are very similar to Structs, but operate with lists rather than -containers. Sequences are less commonly used than Structs, but are very handy -in certain situations. Since a list is returned, in place of an attribute -container, the names of the sub-constructs are not important; two constructs -with the same name will not override or replace each other. - -Parsing -------- +Sequences are very similar to Structs, but operate with lists rather than containers. Sequences are less commonly used than Structs, but are very handy in certain situations. Since a list is returned in place of an attribute container, the names of the sub-constructs are not important. Two constructs with the same name will not override or replace each other. Names are used for the purposes of context dictionary. ->>> c = Sequence("foo", -... UBInt8("a"), -... UBInt16("b"), +>>> d = Sequence( +... Int16ub, +... CString("utf8"), +... GreedyBytes, ... ) ->>> c - ->>> c.parse("abb") -[97, 25186] +Operator ``>>`` can also be used to make Sequences, or to merge them (not nest them, this syntax is not recommended). -Building --------- +>>> d = Int16ub >> Sequence(Byte, Byte) +>>> d.parse(bytes(4)) +ListContainer([0, 0, 0]) +# it is NOT nested like ListContainer(0, ListContainer(0, 0)) ->>> c.build([1,2]) -'\x01\x00\x02' +Repeaters +============== -Nested ------- +Repeaters, as their name suggests, repeat a given unit for a specified number of times. At this point, we'll only cover static repeaters where count is a constant integer. Meta-repeaters take values at parse/build time from the context and they will be covered in the meta-constructs tutorial. ``Array`` and ``GreedyRange`` differ from ``Sequence`` in that they are homogenous, they process elements of same kind. We have three kinds of repeaters. ->>> c = Sequence("foo", -... UBInt8("a"), -... UBInt16("b"), -... Sequence("bar", -... UBInt8("a"), -... UBInt16("b"), -... ) -... ) ->>> c.parse("ABBabb") -[65, 16962, [97, 25186]] +Arrays have a fixed constant count of elements. Operator ``[]`` is used instead of calling the ``Array`` class (and is recommended syntax). +>>> d = Array(10, Byte) +>>> d = Byte[10] # same thing +>>> d.parse(b"1234567890") +ListContainer([49, 50, 51, 52, 53, 54, 55, 56, 57, 48]) +>>> d.build([1,2,3,4,5,6,7,8,9,0]) +b'\x01\x02\x03\x04\x05\x06\x07\x08\t\x00' -Embedded --------- +``GreedyRange`` attempts to parse until EOF or subcon fails to parse correctly. Either way, when ``GreedyRange`` encounters either failure it seeks the stream back to a position after last successful subcon parsing. This means the stream must be seekable/tellable (doesn't work inside ``Bitwise``). -Like Structs, Sequences are compatible with the Embed wrapper. Embedding one -Sequence into another causes a merge of the parsed lists of the two Sequences. +>>> d = GreedyRange(Byte) +>>> d.parse(b"dsadhsaui") +ListContainer([100, 115, 97, 100, 104, 115, 97, 117, 105]) ->>> foo = Sequence("foo", -... UBInt8("a"), -... UBInt8("b"), -... ) ->>> bar = Sequence("bar", -... foo, # <-- unembedded -... UBInt8("c"), -... UBInt8("d"), -... ) ->>> bar2 = Sequence("bar", -... Embed(foo), # <-- embedded -... UBInt8("c"), -... UBInt8("d"), -... ) ->>> bar.parse("abcd") -[[97, 98], 99, 100] ->>> bar2.parse("abcd") -[97, 98, 99, 100] +``RepeatUntil`` is different than the others. Each element is tested by a lambda predicate. The predicate signals when a given element is the terminal element. The repeater inserts all previous items along with the terminal one, and returns them as a list. -.. _repeaters: +Note that all elements accumulated during parsing are provided as additional lambda parameter (second in order). -Repeaters -========= +>>> d = RepeatUntil(lambda obj,lst,ctx: obj > 10, Byte) +>>> d.parse(b"\x01\x05\x08\xff\x01\x02\x03") +ListContainer([1, 5, 8, 255]) +>>> d.build(range(20)) +b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b' + +>>> d = RepeatUntil(lambda x,lst,ctx: lst[-2:] == [0,0], Byte) +>>> d.parse(b"\x01\x00\x00\xff") +ListContainer([1, 0, 0]) -Repeaters, as their name suggests, repeat a given unit for a specified number -of times. At this point, we'll only cover static repeaters. Meta-repeaters -will be covered in the meta-constructs tutorial. -We have four kinds of static repeaters. In fact, for those of you who wish to -go under the hood, two of these repeaters are actually wrappers around Range. +Processing on-the-fly +========================== -.. autofunction:: construct.Range +Data can be parsed and processed before further items get parsed. Hooks can be attached by using ``*`` operator. -.. autofunction:: construct.Array +Repeater classes like ``GreedyRange`` support indexing feature, which inserts incremental numbers into the context under ``_index`` key, in case you want to enumerate the objects. If you dont want to process further data, just raise ``CancelParsing`` from within the hook, and the parse method will exit clean. -.. autofunction:: construct.GreedyRange +:: -.. autofunction:: construct.OptionalGreedyRange + def printobj(obj, ctx): + print(obj) + if ctx._index + 1 >= 3: + raise CancelParsing + st = Struct( + "first" / Byte * printobj, + "second" / Byte, + ) + d = GreedyRange(st * printobj) -Nesting -------- +If you want to process gigabyte-sized data, then ``GreedyRange`` has an option to discard each element after it was parsed (and processed by the hook). Otherwise you would end up consuming gigabytes of RAM, because ``GreedyRange`` normally accumulates all parsed objects and returns them in a list. -As with all constructs, Repeaters can be nested too. Here's an example: +:: ->>> c = Array(5, Array(2, UBInt8("foo"))) ->>> c.parse("aabbccddee") -[[97, 97], [98, 98], [99, 99], [100, 100], [101, 101]] + d = GreedyRange(Struct(...) * printobj, discard=True) diff --git a/docs/bitwise.rst b/docs/bitwise.rst index c79bacfeb..225014102 100644 --- a/docs/bitwise.rst +++ b/docs/bitwise.rst @@ -1,110 +1,140 @@ -======= +====================== +The Bit/Byte Duality +====================== + + History ======= -In Construct 1.XX, parsing and building where performed at the bit level: the -entire data was converted to a string of 1's and 0's, so you could really work -with bit fields. Every construct worked with bits, except some (which were -named ByteXXX) that worked on whole octets. This has made it very easy to work -with single bits, such as the flags of the TCP header, 7-bit ASCII characters, -or fields that were not aligned to the byte boundary (nibbles et al). +In Construct 1.XX, parsing and building were performed at the bit level: the entire data was converted to a string of 1's and 0's, so you could really work with bit fields. Every construct worked with bits, except some (which were named ``ByteXXX``) that worked on whole octets. This made it very easy to work with single bits, such as the flags of the TCP header, 7-bit ASCII characters, or fields that were not aligned to the byte boundary (nibbles et al). This approach was easy and flexible, but had two main drawbacks: * Most data is byte-aligned (with very few exceptions) -* The overhead was too big. +* The overhead was too big -Since constructs worked on bits, the data had to be first converted to a -bit-string, which meant you had to hold the entire data set in memory. Not -only that, but you actually held 8 times the size of the original data (it was -a bit-string). According to some tests I made, you were limited to files of -about 50MB (and that was slow due to page-thrashing). +Since constructs worked on bits, the data had to be first converted to a bit-string, which meant you had to hold the entire data set in memory. Not only that, but you actually held 8 times the size of the original data (it was a bit-string). According to some tests I made, you were limited to files of about 50MB (and that was slow due to page-thrashing). So as of Construct 2.XX, all constructs work with bytes: * Less memory consumption -* No unnecessary bytes-to-bits/bits-to-bytes coversions -* Can rely on python's built in struct module for numeric packing/unpacking - (faster, tested) -* Can directly parse-from/build-to file-like objects (without in-memory - buffering) +* No unnecessary bytes-to-bits and bits-to-bytes coversions +* Can rely on python's built-in ``struct`` module for numeric packing/unpacking (it is faster and more robust) +* Can directly parse from and build to file-like objects (without in-memory buffering) + +But how are we supposed to work with raw bits? The only difference is that we must explicitly declare that: certain fields like ``BitsInteger`` (``Bit``, ``Nibble`` and ``Octet`` are instances of ``BitsInteger``) handle parsing and building of bit strings. There are also few fields like ``Struct`` and ``Flag`` that work with both byte-strings and bit-strings. -But how are we supposed to work with raw bits? The only difference is that we -must explicitly declare that: BitFields handle parsing/building bit fields, -and BitStructs handle to/from conversions. BitStruct ========= -A BitStruct is a sequence of constructs that are parsed/built in the specified -order, much like normal Structs. The difference is that BitStruct operate on -bits rather than bytes. When parsing a BitStruct, the data is first converted -to a bit stream (a stream of 1's and 0's), and only then is it fed to the -subconstructs. The subconstructs are expected to operate on bits instead of -bytes. For reference, see the code snippet in the BitField section. -Note: BitStruct is actually just a wrapper for the Bitwise construct. +A ``BitStruct`` is a sequence of constructs that are parsed/built in the specified order, much like normal ``Struct``'s. The difference is that ``BitStruct`` operates on bits rather than bytes. When parsing a ``BitStruct``, the data is first converted to a bit stream (a stream of ``\x01`` and ``\x00``), and only then is it fed to the subconstructs. The subconstructs are expected to operate on bits instead of bytes. For reference look at the code below: + +>>> d = BitStruct( +... "a" / Flag, +... "b" / Nibble, +... "c" / BitsInteger(10), +... "d" / Padding(1), +... ) +>>> d.parse(b"\xbe\xef") +Container(a=True, b=7, c=887, d=None) +>>> d.sizeof() +2 + +``BitStruct`` is actually just a wrapper for the ``Bitwise`` around a ``Struct`` . + Important notes ---------------- +=============== -* Non-nestable - BitStructs are not nestable/stackable; writing something - like ``BitStruct("foo", BitStruct("bar", Octet("spam")))`` will not work. You - can use regular Structs inside BitStructs. -* Byte-Aligned - The total size of the elements of a BitStruct must be a - multiple of 8 (due to alignment issues) -* Pointers and OnDemand - Do not place Pointers or OnDemands inside - BitStructs, since it uses an internal stream, so external stream offsets - will turn out wrong. +* ``BitStruct``'s are non-nestable (because ``Bitwise`` are not nestable) so writing something like ``BitStruct(BitStruct(Octet))`` will not work. You can use regular ``Struct`` inside ``BitStruct`` . +* Byte aligned - The total size of the elements of a ``BitStruct`` must be a multiple of 8 (due to alignment issues). ``RestreamedBytesIO`` will raise an error if the amount of bits and bytes does not align properly. +* ``GreedyRange``, ``Pointer`` and any ``Lazy*`` - Do not place fields that do seeking/telling or lazy parsing inside ``Bitwise``, because ``RestreamedBytesIO`` offsets will turn out wrong, have unknown side-effects or raise unknown exceptions. +* Normal (byte-oriented) classes like ``Int*`` or ``Float*`` can be used by wrapping in ``Bytewise``. If you need to mix byte- and bit-oriented fields, you should use a ``BitStruct`` and ``Bytewise`` . +* Advanced classes like tunneling may not work in bitwise context. Only basic fields like integers were throughly tested. -BitField -======== +Fields that work with bits +============================= -.. autofunction:: construct.BitField +Those classes work exclusively in Bitwise context. -Convenience wrappers for BitField ---------------------------------- +:: -Bit - A single bit -Nibble - A sequence of 4 bits (half a byte) -Octet - An sequence of 8 bits (byte) + Bit <--> BitsInteger(1) + Nibble <--> BitsInteger(4) + Octet <--> BitsInteger(8) -The Bit/Byte Duality -==================== -Most simple fields (such as Flag, Padding, Terminator, etc.) are ignorant to -the granularity of the data they operate on. The actual granularity depends on -the enclosing layers. +Fields that work with bytes +============================= -Here's a snippet of code that operates on bytes: +Normal classes, that is those working with byte-streams, can be used on bit-streams by wrapping them with ``Bytewise``. Its a wrapper that does the opposite of ``Bitwise``, it transforms each 8 bits into 1 byte. The enclosing stream is a bit-stream but the subcon is provided a byte-stream. ->>> c1 = Struct("foo", -... Padding(2), -... Flag("myflag"), -... Padding(5), -... ) ->>> ->>> c1.parse("\x00\x00\x01\x00\x00\x00\x00\x00") -Container(myflag = True) +:: + + >>> d = Bitwise(Struct( + ... 'a' / Nibble, + ... 'b' / Bytewise(Float32b), + ... 'c' / Padding(4), + ... )) + >>> d.parse(bytes(5)) + Container(a=0, b=0.0, c=None) + >>> d.sizeof() + 5 -And here's a snippet of code that operates on bits. The only difference is -BitStruct in place of a normal Struct: +Fields that do both +============================= ->>> c2 = BitStruct("foo", +Some simple fields (such as ``Flag``, ``Padding``, ``Pass`` or ``Terminated``) are ignorant to the granularity of the data they operate on. The actual granularity depends on the enclosing layers. Same applies to classes that are wrappers or adapters like ``Enum`` or ``EnumFlags``. Those classes do not care about granularity because they dont interact with the stream, its their subcons. + +Here's a snippet of a code that operates on bytes: + +>>> d = Struct( ... Padding(2), -... Flag("myflag"), +... "x" / Flag, ... Padding(5), ... ) ->>> ->>> c2.parse("\x20") -Container(myflag = True) ->>> +>>> d.build(dict(x=5)) +b'\x00\x00\x01\x00\x00\x00\x00\x00' +>>> d.sizeof() +8 +And here's a snippet of a code that operates on bits. The only difference is ``BitStruct`` in place of a normal ``Struct``: -So unlike "classical Construct", there's no need for BytePadding and -BitPadding. If Padding is enclosed by a BitStruct, it operates on bits; -otherwise, it operates on bytes. +>>> d = Bitwise(Struct( +... Padding(2), +... "x" / Flag, +... Padding(5), +... )) +>>> d.build(dict(x=5)) +b' ' +>>> d.sizeof() +1 + +So unlike "classical Construct", there's no need for ``BytePadding`` and ``BitPadding``. If ``Padding`` is enclosed by a ``Bitwise``, it operates on bits, otherwise, it operates on bytes. + + +Fields that do not work and fail +======================================= + +Following classes may not work within ``Bitwise`` or ``Bytewise`` depending one some circumstances. Actually this section applies to ``ByteSwapped`` and ``BitsSwapped`` as well. Those 4 are macros and resolve to either ``Transformed`` or ``Restreamed`` depending if subcon is fixed-sized and therefore the data can be prefetched entirely. If yes, then it turns into ``Transformed`` and should work just fine, it not, then it turns into ``Restreamed`` which uses ``RestreamedBytesIO`` which has several limitations in its implementation. Milage may vary. + +Those do use stream seeking or telling (or both): + +* ``GreedyRange`` +* ``Union`` +* ``Select`` +* ``Padded`` (actually works) +* ``Aligned`` (actually works) +* ``Pointer`` +* ``Peek`` +* ``Seek`` +* ``Tell`` +* ``RawCopy`` +* ``Prefixed`` (actually works) +* ``PrefixedArray`` (actually works) +* ``NullTerminated`` (actually works unless ``consume=False``) +* ``LazyStruct`` +* ``LazyArray`` diff --git a/docs/compilation.rst b/docs/compilation.rst new file mode 100644 index 000000000..9cbf2a582 --- /dev/null +++ b/docs/compilation.rst @@ -0,0 +1,919 @@ +====================== +Compilation feature +====================== + +.. warning:: This feature is fully implemented but may not be fully mature. + + +Overall +========= + +Construct 2.9 adds an experimental feature: compiling user made constructs into much faster (but less feature-rich) code. If you are familiar with Kaitai Struct, an alternative framework to Construct, Kaitai compiles yaml-based schemas into pure Python modules. Construct on the other hand, defines schemas in pure Python and compiles them into pure Python modules. Once you define a construct, you can use it to parse and build blobs without compilation. Compilation has only one purpose: performance. + +It should be made clear that currently the compiler supports only parsing and building. Sizeof is deferred to original construct, from which a compiled instance was made. + + +Requirements +--------------- + +Compilation feature requires Construct 2.9 for compiled parsing and Construct 2.10 for compiled building, preferrably the newest version to date. More importantly, you should have a test suite of your own. Construct aims to be reliable, but the compiler makes a lot of undocumented assumptions, and generates a code that "takes shortcuts" a lot. Since some checks are ommited by generated code, you should not use it to parse corrupted or untrusted data. + + +Restrictions +--------------- + +Compiled classes only parse and build faster, sizeof defers to core classes + +Sizeof is applied during compilation (not during parsing and building) + +Lambdas (unlike ``this`` expressions) are not supported + +Exceptions do not include ``path`` information + +``enum34`` is not supported, please use pure ``Enum`` constructions + +``_index`` context entry is not supported, neither is ``Index`` class + +``Struct``, ``Sequence``, ``FocusedSeq``, ``Union`` and ``LazyStruct`` do not support ``_subcons`` and ``_stream`` context entries + +Parsed hooks are not supported, so is discard option, ignored + +Debugger is not supported, ignored + + +Compiling schemas +=================== + +Every construct (even those that do not compile) has a parameter-less ``compile`` method that returns also a construct (instance of ``Compiled`` class). It may be a good idea to compile something that is used for processing megabyte-sized data or millions of blobs. That compiled instance has ``parse`` and ``build`` methods just like the construct it was compiled from. Therefore, in your code, you can simply reassign the compiled instance over the original one. + +>>> d = Struct("num" / Byte) +>>> d.parse(b"\x01") +Container(num=1) +>>> d = d.compile(filename="copyforinspection.py") +>>> d.parse(b"\x01") +Container(num=1) + +Performance boost can be easily measured. This method also happens to be testing the correctness of the compiled parser, by making sure that both original and compiled instance parse into same results. + +>>> print(d.benchmark(sampledata)) +Compiled instance performance: +parsing: 0.0001288388 sec/call +parsing compiled: 0.0000452531 sec/call +building: 0.0001240775 sec/call +building compiled: 0.0001062776 sec/call + + +Motivation +============ + +The code generated by compiler and core classes have essentially same functionality, but there is a noticable difference in performance. First half of performance boost is thanks to pre-processing, as shown in this chapter. Pre-processing means inserting constants instead of variable lookups, constants means just variables that are known at compile time. The second half is thanks to pypy. This chapter explains the performance difference by comparing ``Struct``, ``FormatField``, ``BytesInteger`` and ``Bytes`` classes, including using the context. Example construct: + +:: + + Struct( + "num8" / Int8ub, + "num24" / Int24ub, + "data" / Bytes(this.num8), + ) + +Compiled parsing code: + +:: + + def read_bytes(io, count): + assert count >= 0 + data = io.read(count) + assert len(data) == count + return data + def parse_struct_1(io, this): + this = Container(_ = this) + try: + this['num8'] = unpack('>B', read_bytes(io, 1))[0] + this['num24'] = int.from_bytes(read_bytes(io, 3), byteorder='big', signed=False) + this['data'] = read_bytes(io, this.num8) + except StopIteration: + pass + del this['_'] + return this + def parseall(io, this): + return parse_struct_1(io, this) + compiledschema = Compiled(None, None, parseall) + +Non-compiled parsing code: + +:: + + def _read_stream(stream, length): + if length < 0: + raise StreamError("length must be non-negative, found %s" % length) + try: + data = stream.read(length) + except Exception: + raise StreamError("stream.read() failed, requested %s bytes" % (length,)) + if len(data) != length: + raise StreamError("could not read enough bytes, expected %d, found %d" % (length, len(data))) + return data + + class FormatField(Construct): + def _parse(self, stream, context, path): + data = _read_stream(stream, self.length) + try: + return struct.unpack(self.fmtstr, data)[0] + except Exception: + raise FormatFieldError("struct %r error during parsing" % self.fmtstr) + + class BytesInteger(Construct): + def _parse(self, stream, context, path): + length = self.length(context) if callable(self.length) else self.length + data = _read_stream(stream, length) + if self.swapped: + data = data[::-1] + return bytes2integer(data, self.signed) + + class Bytes(Construct): + def _parse(self, stream, context, path): + length = self.length(context) if callable(self.length) else self.length + return _read_stream(stream, length) + + class Renamed(Subconstruct): + def _parse(self, stream, context, path): + path += " -> %s" % (self.name,) + return self.subcon._parse(stream, context, path) + + class Struct(Construct): + def _parse(self, stream, context, path): + obj = Container() + context = Container(_ = context) + context._subcons = Container({sc.name:sc for sc in self.subcons if sc.name}) + for sc in self.subcons: + try: + subobj = sc._parse(stream, context, path) + if sc.name: + obj[sc.name] = subobj + context[sc.name] = subobj + except StopIteration: + break + return obj + + +There are several "shortcuts" that the compiled code does: + +Function calls are relatively expensive, so an inlined expression is faster than a function returning the same exact expression. Therefore ``FormatField`` compiles into ``struct.unpack(..., read_bytes(io, ...))`` directly. + +Literals like ``1`` and ``'>B'`` are faster than object field lookup, dictionary lookup, or passing function arguments. Therefore each instance of ``FormatField`` compiles into a similar expression but with different format-strings and byte-counts inlined, usually literals. + +Passing parameters to functions is slower than just referring to variables in same scope. Therefore, for example, compiled ``Struct`` creates ``this`` variable that is accessible to all expressions generated by subcons, as it exists in same scope, but core ``Struct`` would call ``subcon._parse`` and pass entire context as parameter value, regardless whether that subcon even uses a context (for example ``FormatField`` and ``VarInt`` have no need for a context). It's similar but not exactly the same with ``restream`` function. The lambda in second parameter is rebounding ``io`` to a different object (a stream that gets created inside restream function). On the other hand, ``this`` is not rebounded, it exists in outer scope. + +If statement (or conditional ternary operator) with two possible expressions and a condition that could be evaluated at compile-time is slower than just one or the other expression. Therefore, for example, ``BytesInteger`` does a lookup to check if field is swapped, but compiled ``BytesInteger`` simply inlines ``'big'`` or ``'little'`` literal. Moreover, ``Struct`` checks if each subcon has a name and then inserts a value into the context dictionary, but compiled ``Struct`` simply has an assignment or not. This shortcut also applies to most constructs, those that accept context lambdas as parameters. Generated classes do not need to check if a parameter is a constant or a lambda, because what gets emitted is either something like ``1`` which is a literal, or something like ``this.field`` which is an object lookup. Both are valid expressions and evaluate without red tape or checks. + +Looping over an iterable is slower than a block of code that accesses each item once. The reason it's slower is that each iteration must fetch another item, and also check termination condition. Loop unrolling technique requires the iterable (or list rather) to be known at compile-time, which is the case with ``Struct`` and ``Sequence`` instances. Therefore, compiled ``Struct`` emits one line per subcon, but core ``Struct`` loops over its subcons. + +Function calls that only defer to another function are only wasting CPU cycles. This relates specifically to ``Renamed`` class, which in compiled code emits same code as its subcon. Entire functionality of ``Renamed`` class (maintaining path information) is not supported in compiled code, where it would serve as mere subconstruct, just deferring to subcon. + +Building two identical dictionaries is slower than building just one. ``Struct`` maintains two dictionaries (called ``obj`` and ``context``) which differ only by ``_`` key, but compiled ``Struct`` maintains only one dictionary and removes the ``_`` key before returning it. + +This expressions (not lambdas) are expensive to compute in regular code but something like ``this.field`` in a compiled code is merely one object field lookup. Same applies to ``len_``, ``obj_`` and ``list_`` expressions since they share the implementation with ``this`` expression. + +``Container`` is an implementation of so called ``AttrDict``. It captures access to its attributes (``field`` in ``this.field``) and treats it as dictionary key access (``this.field`` becomes ``this["field"]``). However, due to internal CPython drawbacks, capturing attribute access involves some red tape, unlike accessing keys, which is done directly. Therefore compiled ``Struct`` emits lines that assign to ``Container`` keys, not attributes. + + +Empirical evidence +--------------------- + +The "shortcuts" that are described above are not much, but amount to quite a large portion of actual run-time. In fact, they amount to about a third (31%) of entire run-time. Note that this benchmark includes only pure-python compile-time optimisations. + +Notice that results are in microseconds (10**-6). + +:: + + -------------------------------- benchmark: 158 tests -------------------------------- + Name (time in us) Min StdDev + -------------------------------------------------------------------------------------- + test_class_array_parse 284.7820 (74.05) 31.0403 (118.46) + test_class_array_parse_compiled 73.6430 (19.15) 10.7624 (41.07) + test_class_greedyrange_parse 325.6610 (84.67) 31.8383 (121.50) + test_class_greedyrange_parse_compiled 300.9270 (78.24) 24.0149 (91.65) + test_class_repeatuntil_parse 10.2730 (2.67) 0.8322 (3.18) + test_class_repeatuntil_parse_compiled 7.3020 (1.90) 1.3155 (5.02) + test_class_string_parse 21.2270 (5.52) 1.3555 (5.17) + test_class_string_parse_compiled 18.9030 (4.91) 1.6023 (6.11) + test_class_cstring_parse 10.9060 (2.84) 1.0971 (4.19) + test_class_cstring_parse_compiled 9.4050 (2.45) 1.6083 (6.14) + test_class_pascalstring_parse 7.9290 (2.06) 0.4959 (1.89) + test_class_pascalstring_parse_compiled 6.6670 (1.73) 0.6601 (2.52) + test_class_struct_parse 43.5890 (11.33) 4.4993 (17.17) + test_class_struct_parse_compiled 18.7370 (4.87) 2.0198 (7.71) + test_class_sequence_parse 20.7810 (5.40) 2.6298 (10.04) + test_class_sequence_parse_compiled 11.9820 (3.12) 3.2669 (12.47) + test_class_union_parse 91.0570 (23.68) 10.2126 (38.97) + test_class_union_parse_compiled 31.9240 (8.30) 3.5955 (13.72) + test_overall_parse 3,200.7850 (832.23) 224.9197 (858.34) + test_overall_parse_compiled 2,229.9610 (579.81) 118.2029 (451.09) + -------------------------------------------------------------------------------------- + +.. + -------------------------------- benchmark: 158 tests -------------------------------- + Name (time in us) Min StdDev + -------------------------------------------------------------------------------------- + test_class_aligned_build 7.8420 (2.04) 0.8678 (3.31) + test_class_aligned_parse 6.6060 (1.72) 0.6813 (2.60) + test_class_aligned_parse_compiled 5.3540 (1.39) 1.4117 (5.39) + test_class_array_build 326.6060 (84.92) 38.4864 (146.87) + test_class_array_parse 284.7820 (74.05) 31.0403 (118.46) + test_class_array_parse_compiled 73.6430 (19.15) 10.7624 (41.07) + test_class_bitsinteger_build 19.5040 (5.07) 0.9291 (3.55) + test_class_bitsinteger_parse 19.2790 (5.01) 3.8293 (14.61) + test_class_bitsinteger_parse_compiled 17.9910 (4.68) 4.5695 (17.44) + test_class_bitsswapped1_build 20.2650 (5.27) 2.7666 (10.56) + test_class_bitsswapped1_parse 18.8030 (4.89) 3.6720 (14.01) + test_class_bitsswapped1_parse_compiled 18.3760 (4.78) 3.1836 (12.15) + test_class_bitsswapped2_build 860.2690 (223.68) 65.2748 (249.10) + test_class_bitsswapped2_parse 810.8180 (210.82) 113.5936 (433.50) + test_class_bitwise1_build 38.3340 (9.97) 2.8267 (10.79) + test_class_bitwise1_parse 19.0340 (4.95) 1.6937 (6.46) + test_class_bitwise1_parse_compiled 18.3380 (4.77) 1.9169 (7.32) + test_class_bitwise2_build 5,181.2200 (>1000.0) 176.1713 (672.30) + test_class_bitwise2_parse 4,641.4420 (>1000.0) 149.0798 (568.92) + test_class_bytes_build 5.2700 (1.37) 0.3894 (1.49) + test_class_bytes_parse 4.3720 (1.14) 0.2620 (1.0) + test_class_bytes_parse_compiled 4.3770 (1.14) 0.4845 (1.85) + test_class_bytesinteger_build 7.1130 (1.85) 0.5597 (2.14) + test_class_bytesinteger_parse 6.1550 (1.60) 0.8879 (3.39) + test_class_bytesinteger_parse_compiled 5.9690 (1.55) 0.8120 (3.10) + test_class_byteswapped1_build 7.8880 (2.05) 1.6156 (6.17) + test_class_byteswapped1_parse 6.6990 (1.74) 1.4248 (5.44) + test_class_byteswapped1_parse_compiled 5.8140 (1.51) 1.0893 (4.16) + test_class_bytewise1_build 54.3910 (14.14) 3.5353 (13.49) + test_class_bytewise1_parse 51.2590 (13.33) 4.9621 (18.94) + test_class_bytewise1_parse_compiled 51.1530 (13.30) 5.0922 (19.43) + test_class_bytewise2_build 1,264.2500 (328.72) 76.9591 (293.69) + test_class_bytewise2_parse 1,233.1150 (320.62) 65.5335 (250.09) + test_class_check_build 7.7850 (2.02) 0.9710 (3.71) + test_class_check_parse 7.5500 (1.96) 1.0495 (4.01) + test_class_check_parse_compiled 5.7900 (1.51) 0.7776 (2.97) + test_class_computed_build 6.7760 (1.76) 0.6328 (2.41) + test_class_computed_parse 6.5940 (1.71) 0.6383 (2.44) + test_class_computed_parse_compiled 6.7670 (1.76) 0.7396 (2.82) + test_class_const_build 5.8600 (1.52) 0.6461 (2.47) + test_class_const_parse 4.8930 (1.27) 0.3691 (1.41) + test_class_const_parse_compiled 4.6680 (1.21) 0.6549 (2.50) + test_class_cstring_build 7.7910 (2.03) 32.0498 (122.31) + test_class_cstring_parse 10.9060 (2.84) 1.0971 (4.19) + test_class_cstring_parse_compiled 9.4050 (2.45) 1.6083 (6.14) + test_class_default_build 5.8910 (1.53) 0.7784 (2.97) + test_class_default_parse 5.0430 (1.31) 0.5048 (1.93) + test_class_default_parse_compiled 4.7200 (1.23) 0.7015 (2.68) + test_class_enum_build 6.4310 (1.67) 0.4820 (1.84) + test_class_enum_parse 6.4100 (1.67) 0.2944 (1.12) + test_class_enum_parse_compiled 4.9280 (1.28) 0.5852 (2.23) + test_class_flag_build 4.7740 (1.24) 0.5016 (1.91) + test_class_flag_parse 4.2450 (1.10) 0.8202 (3.13) + test_class_flag_parse_compiled 4.4510 (1.16) 0.7262 (2.77) + test_class_flagsenum_build 9.5940 (2.49) 2.3077 (8.81) + test_class_flagsenum_parse 14.9890 (3.90) 1.1867 (4.53) + test_class_flagsenum_parse_compiled 12.5860 (3.27) 7.8440 (29.93) + test_class_focusedseq_build 27.4290 (7.13) 3.5810 (13.67) + test_class_focusedseq_parse 23.9230 (6.22) 2.9801 (11.37) + test_class_focusedseq_parse_compiled 11.4680 (2.98) 1.8008 (6.87) + test_class_formatfield_build 5.3830 (1.40) 0.3952 (1.51) + test_class_formatfield_parse 4.7820 (1.24) 0.3797 (1.45) + test_class_formatfield_parse_compiled 4.7870 (1.24) 0.7985 (3.05) + test_class_greedybytes_build 3.9610 (1.03) 0.5677 (2.17) + test_class_greedybytes_parse 3.8460 (1.0) 0.3800 (1.45) + test_class_greedybytes_parse_compiled 3.9150 (1.02) 0.4162 (1.59) + test_class_greedyrange_build 328.9710 (85.54) 17.5818 (67.10) + test_class_greedyrange_parse 325.6610 (84.67) 31.8383 (121.50) + test_class_greedyrange_parse_compiled 300.9270 (78.24) 24.0149 (91.65) + test_class_greedystring_build 5.3440 (1.39) 0.6892 (2.63) + test_class_greedystring_parse 5.0730 (1.32) 0.9543 (3.64) + test_class_greedystring_parse_compiled 4.5540 (1.18) 0.5366 (2.05) + test_class_hex_build 4.6150 (1.20) 0.5106 (1.95) + test_class_hex_parse 5.2830 (1.37) 0.8942 (3.41) + test_class_hex_parse_compiled 3.9050 (1.02) 0.6158 (2.35) + test_class_hexdump_build 4.6340 (1.20) 0.8433 (3.22) + test_class_hexdump_parse 5.0960 (1.33) 1.0297 (3.93) + test_class_hexdump_parse_compiled 3.9120 (1.02) 0.7631 (2.91) + test_class_ifthenelse_build 8.9100 (2.32) 0.9234 (3.52) + test_class_ifthenelse_parse 8.3680 (2.18) 0.7548 (2.88) + test_class_ifthenelse_parse_compiled 6.7390 (1.75) 0.7323 (2.79) + test_class_mapping_build 6.3000 (1.64) 0.9057 (3.46) + test_class_mapping_parse 5.6000 (1.46) 1.6992 (6.48) + test_class_mapping_parse_compiled 4.9730 (1.29) 0.6396 (2.44) + test_class_namedtuple1_build 18.0560 (4.69) 2.1252 (8.11) + test_class_namedtuple1_parse 16.8770 (4.39) 2.5048 (9.56) + test_class_namedtuple1_parse_compiled 9.0800 (2.36) 1.3966 (5.33) + test_class_namedtuple2_build 46.3020 (12.04) 4.8023 (18.33) + test_class_namedtuple2_parse 34.1590 (8.88) 3.9813 (15.19) + test_class_namedtuple2_parse_compiled 16.1740 (4.21) 2.1471 (8.19) + test_class_numpy_build 212.2070 (55.18) 19.0170 (72.57) + test_class_numpy_parse 287.4910 (74.75) 1,033.8723 (>1000.0) + test_class_numpy_parse_compiled 289.1160 (75.17) 31.5770 (120.50) + test_class_padded_build 7.6610 (1.99) 1.0465 (3.99) + test_class_padded_parse 6.5550 (1.70) 0.8192 (3.13) + test_class_padded_parse_compiled 5.3810 (1.40) 0.6683 (2.55) + test_class_padding_build 6.1410 (1.60) 0.4382 (1.67) + test_class_padding_parse 5.3390 (1.39) 0.3259 (1.24) + test_class_padding_parse_compiled 4.5490 (1.18) 0.6567 (2.51) + test_class_pascalstring_build 9.0730 (2.36) 0.6574 (2.51) + test_class_pascalstring_parse 7.9290 (2.06) 0.4959 (1.89) + test_class_pascalstring_parse_compiled 6.6670 (1.73) 0.6601 (2.52) + test_class_peek_build 14.8610 (3.86) 1.5169 (5.79) + test_class_peek_parse 19.3210 (5.02) 1.7638 (6.73) + test_class_peek_parse_compiled 11.9050 (3.10) 1.2330 (4.71) + test_class_pickled_build 5.5730 (1.45) 0.8605 (3.28) + test_class_pickled_parse 8.1680 (2.12) 0.8642 (3.30) + test_class_pickled_parse_compiled 8.9110 (2.32) 1.5638 (5.97) + test_class_pointer_build 7.2010 (1.87) 0.3975 (1.52) + test_class_pointer_parse 6.3530 (1.65) 0.6129 (2.34) + test_class_pointer_parse_compiled 5.7300 (1.49) 0.6892 (2.63) + test_class_prefixed_build 7.8600 (2.04) 0.4987 (1.90) + test_class_prefixed_parse 6.8100 (1.77) 0.7110 (2.71) + test_class_prefixed_parse_compiled 6.1950 (1.61) 0.6435 (2.46) + test_class_prefixedarray_build 855.3260 (222.39) 55.4369 (211.56) + test_class_prefixedarray_parse 757.6910 (197.01) 49.8982 (190.42) + test_class_prefixedarray_parse_compiled 184.4760 (47.97) 14.9617 (57.10) + test_class_rawcopy_build1 13.3870 (3.48) 2.1631 (8.25) + test_class_rawcopy_build2 16.8280 (4.38) 3.4464 (13.15) + test_class_rawcopy_parse 14.4990 (3.77) 1.3540 (5.17) + test_class_rawcopy_parse_compiled 14.9130 (3.88) 4.8756 (18.61) + test_class_rebuild_build 5.8890 (1.53) 0.5504 (2.10) + test_class_rebuild_parse 5.0030 (1.30) 0.6272 (2.39) + test_class_rebuild_parse_compiled 4.8300 (1.26) 0.5108 (1.95) + test_class_repeatuntil_build 11.1090 (2.89) 0.8754 (3.34) + test_class_repeatuntil_parse 10.2730 (2.67) 0.8322 (3.18) + test_class_repeatuntil_parse_compiled 7.3020 (1.90) 1.3155 (5.02) + test_class_select_build 19.3270 (5.03) 2.1872 (8.35) + test_class_select_parse 5.5500 (1.44) 0.5927 (2.26) + test_class_select_parse_compiled 5.9140 (1.54) 0.9409 (3.59) + test_class_sequence_build 23.9440 (6.23) 3.7300 (14.23) + test_class_sequence_parse 20.7810 (5.40) 2.6298 (10.04) + test_class_sequence_parse_compiled 11.9820 (3.12) 3.2669 (12.47) + test_class_string_build 8.4160 (2.19) 0.5589 (2.13) + test_class_string_parse 21.2270 (5.52) 1.3555 (5.17) + test_class_string_parse_compiled 18.9030 (4.91) 1.6023 (6.11) + test_class_struct_build 49.0800 (12.76) 3.9414 (15.04) + test_class_struct_parse 43.5890 (11.33) 4.4993 (17.17) + test_class_struct_parse_compiled 18.7370 (4.87) 2.0198 (7.71) + test_class_switch_build 9.2500 (2.41) 0.4969 (1.90) + test_class_switch_parse 8.4710 (2.20) 0.7958 (3.04) + test_class_switch_parse_compiled 7.1160 (1.85) 0.7794 (2.97) + test_class_timestamp1_build 9.7510 (2.54) 1.0072 (3.84) + test_class_timestamp1_parse 29.7140 (7.73) 2.7236 (10.39) + test_class_timestamp1_parse_compiled 30.2160 (7.86) 3.5592 (13.58) + test_class_timestamp2_build 100.4570 (26.12) 15.4131 (58.82) + test_class_timestamp2_parse 106.5390 (27.70) 12.0199 (45.87) + test_class_timestamp2_parse_compiled 107.6340 (27.99) 17.3917 (66.37) + test_class_union_build 55.8850 (14.53) 6.5646 (25.05) + test_class_union_parse 91.0570 (23.68) 10.2126 (38.97) + test_class_union_parse_compiled 31.9240 (8.30) 3.5955 (13.72) + test_class_varint_build 14.9650 (3.89) 0.8179 (3.12) + test_class_varint_parse 18.6660 (4.85) 1.6747 (6.39) + test_class_varint_parse_compiled 19.6660 (5.11) 5.0212 (19.16) + test_overall_build 2,848.2370 (740.57) 5,609.2037 (>1000.0) + test_overall_build_compiled 2,852.9260 (741.79) 163.0128 (622.09) + test_overall_parse 3,200.7850 (832.23) 224.9197 (858.34) + test_overall_parse_compiled 2,229.9610 (579.81) 118.2029 (451.09) + -------------------------------------------------------------------------------------- + + +Motivation, part 2 +===================== + +The second part of optimisation is just running the generated code on pypy. Since pypy is not using any type annotations, there is nothing to discuss in this chapter. The benchmark reflects the same code as in previous chapter, but ran on Pypy 2.7 rather than CPython 3.6. + +Empirical evidence +--------------------- + +Notice that results are in nanoseconds (10**-9). + +:: + + ------------------------------------- benchmark: 152 tests ------------------------------------ + Name (time in ns) Min StdDev + ----------------------------------------------------------------------------------------------- + test_class_array_parse 11,042.9974 (103.52) 40,792.8559 (46.97) + test_class_array_parse_compiled 9,088.0058 (85.20) 43,001.3909 (49.52) + test_class_greedyrange_parse 14,402.0014 (135.01) 49,834.2047 (57.38) + test_class_greedyrange_parse_compiled 9,801.0059 (91.88) 39,296.4529 (45.25) + test_class_repeatuntil_parse 318.4996 (2.99) 2,469.5524 (2.84) + test_class_repeatuntil_parse_compiled 309.3746 (2.90) 103,425.2134 (119.09) + test_class_string_parse 966.8991 (9.06) 537,241.0095 (618.62) + test_class_string_parse_compiled 726.6994 (6.81) 3,719.2657 (4.28) + test_class_cstring_parse 782.2993 (7.33) 4,111.8970 (4.73) + test_class_cstring_parse_compiled 591.1992 (5.54) 479,164.9746 (551.75) + test_class_pascalstring_parse 465.0911 (4.36) 4,262.4397 (4.91) + test_class_pascalstring_parse_compiled 298.4118 (2.80) 122,279.2150 (140.80) + test_class_struct_parse 2,633.9985 (24.69) 14,654.3095 (16.87) + test_class_struct_parse_compiled 949.7991 (8.90) 4,228.2890 (4.87) + test_class_sequence_parse 1,310.6008 (12.29) 5,811.8046 (6.69) + test_class_sequence_parse_compiled 732.2000 (6.86) 4,703.9483 (5.42) + test_class_union_parse 5,619.9933 (52.69) 30,590.0630 (35.22) + test_class_union_parse_compiled 2,699.9987 (25.31) 15,888.8206 (18.30) + test_overall_parse 1,332,581.9891 (>1000.0) 2,274,995.4192 (>1000.0) + test_overall_parse_compiled 690,380.0095 (>1000.0) 602,697.9721 (694.00) + ----------------------------------------------------------------------------------------------- + +.. + ------------------------------------- benchmark: 152 tests ------------------------------------ + Name (time in ns) Min StdDev + ----------------------------------------------------------------------------------------------- + test_class_aligned_build 740.5994 (6.94) 4,143.5039 (4.77) + test_class_aligned_parse 602.1000 (5.64) 4,001.4447 (4.61) + test_class_aligned_parse_compiled 237.5240 (2.23) 233,368.4415 (268.72) + test_class_array_build 12,085.9913 (113.30) 4,199,133.4429 (>1000.0) + test_class_array_parse 11,042.9974 (103.52) 40,792.8559 (46.97) + test_class_array_parse_compiled 9,088.0058 (85.20) 43,001.3909 (49.52) + test_class_bitsinteger_build 3,602.4940 (33.77) 1,177,244.9019 (>1000.0) + test_class_bitsinteger_parse 2,823.5008 (26.47) 14,156.0060 (16.30) + test_class_bitsinteger_parse_compiled 2,768.9966 (25.96) 14,832.6464 (17.08) + test_class_bitsswapped1_build 5,726.9935 (53.69) 29,157.1889 (33.57) + test_class_bitsswapped1_parse 6,172.9952 (57.87) 28,735.2233 (33.09) + test_class_bitsswapped1_parse_compiled 5,715.9923 (53.59) 26,115.4525 (30.07) + test_class_bitsswapped2_build 38,265.0032 (358.72) 92,216.9408 (106.19) + test_class_bitsswapped2_parse 36,199.9992 (339.36) 99,672.2831 (114.77) + test_class_bitwise1_build 7,979.0043 (74.80) 18,320.0158 (21.10) + test_class_bitwise1_parse 5,914.0002 (55.44) 15,593.2498 (17.96) + test_class_bitwise1_parse_compiled 5,969.9960 (55.97) 10,953.7787 (12.61) + test_class_bitwise2_build 136,212.0092 (>1000.0) 126,711.5616 (145.91) + test_class_bitwise2_parse 120,290.0021 (>1000.0) 100,256.6237 (115.44) + test_class_bytes_build 106.6699 (1.0) 45,663.4740 (52.58) + test_class_bytes_parse 166.0601 (1.56) 26,090.0331 (30.04) + test_class_bytes_parse_compiled 172.6300 (1.62) 38,715.3059 (44.58) + test_class_bytesinteger_build 440.4998 (4.13) 2,794.5403 (3.22) + test_class_bytesinteger_parse 397.6915 (3.73) 2,760.2520 (3.18) + test_class_bytesinteger_parse_compiled 404.1537 (3.79) 314,221.4811 (361.82) + test_class_byteswapped1_build 423.0011 (3.97) 439,883.6772 (506.52) + test_class_byteswapped1_parse 700.1989 (6.56) 5,650.5263 (6.51) + test_class_byteswapped1_parse_compiled 467.4551 (4.38) 375,681.4718 (432.59) + test_class_bytewise1_build 13,313.0088 (124.81) 40,142.8640 (46.22) + test_class_bytewise1_parse 13,626.0060 (127.74) 2,380,928.9149 (>1000.0) + test_class_bytewise1_parse_compiled 13,586.0028 (127.36) 35,062.2700 (40.37) + test_class_bytewise2_build 72,109.9932 (676.01) 73,553.4202 (84.70) + test_class_bytewise2_parse 66,791.9958 (626.16) 140,635.6099 (161.94) + test_class_check_build 740.6998 (6.94) 4,307.2706 (4.96) + test_class_check_parse 541.0999 (5.07) 3,440.5007 (3.96) + test_class_check_parse_compiled 545.6997 (5.12) 679,945.6527 (782.95) + test_class_computed_build 679.1000 (6.37) 605,315.9050 (697.01) + test_class_computed_parse 526.0008 (4.93) 3,428.9984 (3.95) + test_class_computed_parse_compiled 552.2001 (5.18) 3,464.2913 (3.99) + test_class_const_build 310.6879 (2.91) 2,745.9160 (3.16) + test_class_const_parse 176.2500 (1.65) 79,386.8928 (91.41) + test_class_const_parse_compiled 182.1501 (1.71) 94,547.7996 (108.87) + test_class_cstring_build 491.0001 (4.60) 3,734.7308 (4.30) + test_class_cstring_parse 782.2993 (7.33) 4,111.8970 (4.73) + test_class_cstring_parse_compiled 591.1992 (5.54) 479,164.9746 (551.75) + test_class_default_build 461.9995 (4.33) 3,437.9897 (3.96) + test_class_default_parse 220.9200 (2.07) 875.7176 (1.01) + test_class_default_parse_compiled 167.3000 (1.57) 115,216.5525 (132.67) + test_class_enum_build 318.2495 (2.98) 329,774.1824 (379.73) + test_class_enum_parse 216.3301 (2.03) 98,506.1576 (113.43) + test_class_enum_parse_compiled 150.8200 (1.41) 56,082.0649 (64.58) + test_class_flag_build 204.2799 (1.92) 130,206.5059 (149.93) + test_class_flag_parse 153.9801 (1.44) 100,694.1426 (115.95) + test_class_flag_parse_compiled 139.8900 (1.31) 868.4449 (1.0) + test_class_flagsenum_build 573.3993 (5.38) 4,344.7692 (5.00) + test_class_flagsenum_parse 652.1004 (6.11) 422,339.3586 (486.32) + test_class_flagsenum_parse_compiled 464.5461 (4.35) 3,596.9171 (4.14) + test_class_focusedseq_build 2,233.9998 (20.94) 6,533.8875 (7.52) + test_class_focusedseq_parse 1,345.1005 (12.61) 5,739.1458 (6.61) + test_class_focusedseq_parse_compiled 615.0003 (5.77) 3,967.2471 (4.57) + test_class_formatfield_build 282.0557 (2.64) 286,541.4444 (329.95) + test_class_formatfield_parse 237.0500 (2.22) 63,666.5654 (73.31) + test_class_formatfield_parse_compiled 154.2599 (1.45) 35,054.4102 (40.36) + test_class_greedybytes_build 110.4000 (1.03) 89,466.1548 (103.02) + test_class_greedybytes_parse 117.2700 (1.10) 94,205.4030 (108.48) + test_class_greedybytes_parse_compiled 118.3101 (1.11) 88,084.6992 (101.43) + test_class_greedyrange_build 12,186.0066 (114.24) 37,782.4850 (43.51) + test_class_greedyrange_parse 14,402.0014 (135.01) 49,834.2047 (57.38) + test_class_greedyrange_parse_compiled 9,801.0059 (91.88) 39,296.4529 (45.25) + test_class_greedystring_build 348.3331 (3.27) 3,029.8253 (3.49) + test_class_greedystring_parse 473.3645 (4.44) 3,041.7270 (3.50) + test_class_greedystring_parse_compiled 409.9241 (3.84) 387,658.3773 (446.38) + test_class_hex_build 459.6355 (4.31) 4,006.9444 (4.61) + test_class_hex_parse 291.4441 (2.73) 182,038.6025 (209.61) + test_class_hex_parse_compiled 126.4800 (1.19) 84,815.3901 (97.66) + test_class_hexdump_build 450.4157 (4.22) 3,790.8239 (4.37) + test_class_hexdump_parse 284.8335 (2.67) 294,559.8261 (339.18) + test_class_hexdump_parse_compiled 128.8101 (1.21) 78,435.0791 (90.32) + test_class_ifthenelse_build 982.9993 (9.22) 4,688.0488 (5.40) + test_class_ifthenelse_parse 851.1997 (7.98) 580,777.8856 (668.76) + test_class_ifthenelse_parse_compiled 733.0003 (6.87) 4,714.3734 (5.43) + test_class_mapping_build 336.3336 (3.15) 419,990.5974 (483.61) + test_class_mapping_parse 226.8000 (2.13) 111,247.9039 (128.10) + test_class_mapping_parse_compiled 184.2000 (1.73) 872.1972 (1.00) + test_class_namedtuple1_build 918.4005 (8.61) 3,765.2820 (4.34) + test_class_namedtuple1_parse 673.6998 (6.32) 3,434.7049 (3.96) + test_class_namedtuple1_parse_compiled 610.4994 (5.72) 551,488.8854 (635.03) + test_class_namedtuple2_build 3,212.0006 (30.11) 13,384.9602 (15.41) + test_class_namedtuple2_parse 1,786.3000 (16.75) 4,818.3417 (5.55) + test_class_namedtuple2_parse_compiled 728.0993 (6.83) 3,332.2180 (3.84) + test_class_padded_build 732.6991 (6.87) 3,967.5355 (4.57) + test_class_padded_parse 583.3004 (5.47) 4,356.6780 (5.02) + test_class_padded_parse_compiled 301.4703 (2.83) 305,922.3763 (352.26) + test_class_padding_build 499.1823 (4.68) 3,525.5175 (4.06) + test_class_padding_parse 350.1996 (3.28) 328,502.3785 (378.27) + test_class_padding_parse_compiled 192.7000 (1.81) 82,517.9180 (95.02) + test_class_pascalstring_build 483.4543 (4.53) 243,109.6546 (279.94) + test_class_pascalstring_parse 465.0911 (4.36) 4,262.4397 (4.91) + test_class_pascalstring_parse_compiled 298.4118 (2.80) 122,279.2150 (140.80) + test_class_peek_build 952.7997 (8.93) 6,047.5404 (6.96) + test_class_peek_parse 1,454.3999 (13.63) 774,202.5660 (891.48) + test_class_peek_parse_compiled 438.8183 (4.11) 3,811.7552 (4.39) + test_class_pointer_build 576.9005 (5.41) 3,782.3046 (4.36) + test_class_pointer_parse 377.6430 (3.54) 393,433.4406 (453.03) + test_class_pointer_parse_compiled 210.3799 (1.97) 947.6097 (1.09) + test_class_prefixed_build 888.7000 (8.33) 5,004.2176 (5.76) + test_class_prefixed_parse 757.0008 (7.10) 524,495.2616 (603.95) + test_class_prefixed_parse_compiled 471.9080 (4.42) 439,226.7896 (505.76) + test_class_prefixedarray_build 37,869.9915 (355.02) 59,808.3893 (68.87) + test_class_prefixedarray_parse 29,731.0035 (278.72) 10,591,190.0651 (>1000.0) + test_class_prefixedarray_parse_compiled 22,710.9995 (212.91) 65,049.0162 (74.90) + test_class_rawcopy_build1 1,041.5999 (9.76) 5,312.0368 (6.12) + test_class_rawcopy_build2 1,513.5010 (14.19) 931,668.4553 (>1000.0) + test_class_rawcopy_parse 1,064.9004 (9.98) 5,628.3455 (6.48) + test_class_rawcopy_parse_compiled 669.7999 (6.28) 4,616.0835 (5.32) + test_class_rebuild_build 409.5006 (3.84) 3,371.2846 (3.88) + test_class_rebuild_parse 225.8090 (2.12) 1,961.0702 (2.26) + test_class_rebuild_parse_compiled 164.7700 (1.54) 82,487.8733 (94.98) + test_class_repeatuntil_build 475.6360 (4.46) 3,568.2374 (4.11) + test_class_repeatuntil_parse 318.4996 (2.99) 2,469.5524 (2.84) + test_class_repeatuntil_parse_compiled 309.3746 (2.90) 103,425.2134 (119.09) + test_class_select_build 7,528.9863 (70.58) 23,358.3203 (26.90) + test_class_select_parse 395.7684 (3.71) 468,021.0341 (538.92) + test_class_select_parse_compiled 194.6000 (1.82) 911.6117 (1.05) + test_class_sequence_build 1,521.9004 (14.27) 6,600.0406 (7.60) + test_class_sequence_parse 1,310.6008 (12.29) 5,811.8046 (6.69) + test_class_sequence_parse_compiled 732.2000 (6.86) 4,703.9483 (5.42) + test_class_string_build 535.1001 (5.02) 289,163.7688 (332.97) + test_class_string_parse 966.8991 (9.06) 537,241.0095 (618.62) + test_class_string_parse_compiled 726.6994 (6.81) 3,719.2657 (4.28) + test_class_struct_build 2,857.5014 (26.79) 16,764.1319 (19.30) + test_class_struct_parse 2,633.9985 (24.69) 14,654.3095 (16.87) + test_class_struct_parse_compiled 949.7991 (8.90) 4,228.2890 (4.87) + test_class_switch_build 1,079.1002 (10.12) 4,754.6705 (5.47) + test_class_switch_parse 948.8998 (8.90) 4,558.0161 (5.25) + test_class_switch_parse_compiled 783.7996 (7.35) 4,640.9683 (5.34) + test_class_timestamp1_build 771.2006 (7.23) 3,534.5051 (4.07) + test_class_timestamp1_parse 2,018.1993 (18.92) 5,448.9309 (6.27) + test_class_timestamp1_parse_compiled 1,970.7004 (18.47) 891,363.4033 (>1000.0) + test_class_timestamp2_build 5,808.9936 (54.46) 28,921.4390 (33.30) + test_class_timestamp2_parse 7,547.0016 (70.75) 38,718.9886 (44.58) + test_class_timestamp2_parse_compiled 7,391.9946 (69.30) 36,903.9105 (42.49) + test_class_union_build 3,535.9990 (33.15) 17,829.5208 (20.53) + test_class_union_parse 5,619.9933 (52.69) 30,590.0630 (35.22) + test_class_union_parse_compiled 2,699.9987 (25.31) 15,888.8206 (18.30) + test_class_varint_build 944.5997 (8.86) 5,002.7418 (5.76) + test_class_varint_parse 861.3002 (8.07) 4,343.2995 (5.00) + test_class_varint_parse_compiled 863.2996 (8.09) 4,426.6909 (5.10) + test_overall_build 554,530.0082 (>1000.0) 475,067.7994 (547.03) + test_overall_build_compiled 358,168.0066 (>1000.0) 127,081.1333 (146.33) + test_overall_parse 1,332,581.9891 (>1000.0) 2,274,995.4192 (>1000.0) + test_overall_parse_compiled 690,380.0095 (>1000.0) 602,697.9721 (694.00) + ----------------------------------------------------------------------------------------------- + + +Motivation, part 3 +===================== + +.. warning:: Benchmarks revealed that pypy makes the code run much faster than cython, therefore cython improvements were withdrawn, and compiler now generates pure python code that is compatible with Python 2 including pypy. This chapter is no longer relevant. It remained just for educational purposes. + +This chapter talks about the second half of optimisation, which is due to Cython type annotations and type inference. I should state for the record, that I am no expert at Cython, and following explanatations are merely "the way I understand it". Please take that into account when reading it. Fourth example: + +:: + + Struct( + "num1" / Int8ul, + "num2" / Int24ul, + "fixedarray1" / Array(3, Int8ul), + "name1" / CString("utf8"), + ) + +:: + + cdef bytes read_bytes(io, int count): + if not count >= 0: raise StreamError + cdef bytes data = io.read(count) + if not len(data) == count: raise StreamError + return data + cdef bytes parse_nullterminatedstring(io, int unitsize, bytes finalunit): + cdef list result = [] + cdef bytes unit + while True: + unit = read_bytes(io, unitsize) + if unit == finalunit: + break + result.append(unit) + return b"".join(result) + def parse_struct_1(io, this): + this = Container(_ = this) + try: + this['num1'] = unpack('= 0`` and ``len(data) == count``. + + +Empirical evidence +--------------------- + +Below micro-benchmarks show the difference between core classes and cython-compiled classes. Only those where performance boost was highest are listed (although they also happen to be the most important), some other classes have little speedup, and some have none. + +Notice that results are in microseconds (10**-6). + +:: + + ------------------------------- benchmark: 152 tests ------------------------------- + Name (time in us) Min StdDev + ------------------------------------------------------------------------------------ + test_class_array_parse 286.5460 (73.85) 42.8831 (89.84) + test_class_array_parse_compiled 30.7200 (7.92) 6.9577 (14.58) + test_class_greedyrange_parse 320.9860 (82.73) 45.9480 (96.26) + test_class_greedyrange_parse_compiled 262.7010 (67.71) 36.4504 (76.36) + test_class_repeatuntil_parse 10.1850 (2.63) 2.4147 (5.06) + test_class_repeatuntil_parse_compiled 6.8880 (1.78) 1.5471 (3.24) + test_class_string_parse 20.4400 (5.27) 4.4044 (9.23) + test_class_string_parse_compiled 9.1470 (2.36) 2.2427 (4.70) + test_class_cstring_parse 11.2290 (2.89) 1.6216 (3.40) + test_class_cstring_parse_compiled 5.6080 (1.45) 1.0321 (2.16) + test_class_pascalstring_parse 7.8560 (2.02) 1.8567 (3.89) + test_class_pascalstring_parse_compiled 5.8910 (1.52) 0.9466 (1.98) + test_class_struct_parse 44.1300 (11.37) 6.8434 (14.34) + test_class_struct_parse_compiled 16.9070 (4.36) 3.0500 (6.39) + test_class_sequence_parse 21.5420 (5.55) 2.6852 (5.63) + test_class_sequence_parse_compiled 10.1530 (2.62) 2.1645 (4.53) + test_class_union_parse 91.9150 (23.69) 10.7812 (22.59) + test_class_union_parse_compiled 22.5970 (5.82) 15.2649 (31.98) + test_overall_parse 2,126.2570 (548.01) 255.0154 (534.27) + test_overall_parse_compiled 1,124.9560 (289.94) 127.4730 (267.06) + ------------------------------------------------------------------------------------ + +.. + ------------------------------- benchmark: 152 tests ------------------------------- + Name (time in us) Min StdDev + ------------------------------------------------------------------------------------ + test_class_aligned_build 7.8110 (2.01) 1.4475 (3.03) + test_class_aligned_parse 6.7560 (1.74) 2.4557 (5.14) + test_class_aligned_parse_compiled 4.7080 (1.21) 1.0038 (2.10) + test_class_array_build 331.7150 (85.49) 45.1915 (94.68) + test_class_array_parse 286.5460 (73.85) 42.8831 (89.84) + test_class_array_parse_compiled 30.7200 (7.92) 6.9577 (14.58) + test_class_bitsinteger_build 19.4150 (5.00) 6.0416 (12.66) + test_class_bitsinteger_parse 19.2520 (4.96) 6.7657 (14.17) + test_class_bitsinteger_parse_compiled 17.4700 (4.50) 11.1148 (23.29) + test_class_bitsswapped1_build 20.0300 (5.16) 3.5605 (7.46) + test_class_bitsswapped1_parse 18.9740 (4.89) 3.1174 (6.53) + test_class_bitsswapped1_parse_compiled 17.4030 (4.49) 3.2099 (6.72) + test_class_bitsswapped2_build 866.5650 (223.34) 99.0145 (207.44) + test_class_bitsswapped2_parse 813.8270 (209.75) 104.6734 (219.29) + test_class_bitwise1_build 38.7430 (9.99) 4.1560 (8.71) + test_class_bitwise1_parse 18.8820 (4.87) 3.8922 (8.15) + test_class_bitwise1_parse_compiled 17.5770 (4.53) 2.1345 (4.47) + test_class_bitwise2_build 5,249.8520 (>1000.0) 247.1093 (517.70) + test_class_bitwise2_parse 4,650.4640 (>1000.0) 605.3646 (>1000.0) + test_class_bytes_build 5.3900 (1.39) 0.7781 (1.63) + test_class_bytes_parse 4.4180 (1.14) 0.4773 (1.0) + test_class_bytes_parse_compiled 4.0220 (1.04) 0.7253 (1.52) + test_class_bytesinteger_build 7.1450 (1.84) 1.4272 (2.99) + test_class_bytesinteger_parse 6.2820 (1.62) 1.4176 (2.97) + test_class_bytesinteger_parse_compiled 5.3420 (1.38) 1.8858 (3.95) + test_class_byteswapped1_build 7.9820 (2.06) 1.5524 (3.25) + test_class_byteswapped1_parse 6.6840 (1.72) 1.2694 (2.66) + test_class_byteswapped1_parse_compiled 4.9890 (1.29) 1.1038 (2.31) + test_class_bytewise1_build 53.7710 (13.86) 5.8007 (12.15) + test_class_bytewise1_parse 49.7540 (12.82) 7.8771 (16.50) + test_class_bytewise1_parse_compiled 48.5480 (12.51) 5.0040 (10.48) + test_class_bytewise2_build 1,270.0850 (327.34) 116.3612 (243.78) + test_class_bytewise2_parse 1,225.2780 (315.79) 99.7644 (209.01) + test_class_check_build 7.9260 (2.04) 1.7875 (3.74) + test_class_check_parse 7.7250 (1.99) 1.7400 (3.65) + test_class_check_parse_compiled 5.8770 (1.51) 1.5456 (3.24) + test_class_computed_build 6.9660 (1.80) 1.0798 (2.26) + test_class_computed_parse 6.6770 (1.72) 1.6214 (3.40) + test_class_computed_parse_compiled 5.6290 (1.45) 0.9689 (2.03) + test_class_const_build 5.9990 (1.55) 1.4849 (3.11) + test_class_const_parse 4.8720 (1.26) 1.1863 (2.49) + test_class_const_parse_compiled 4.2520 (1.10) 0.9856 (2.06) + test_class_cstring_build 7.8570 (2.03) 1.2683 (2.66) + test_class_cstring_parse 11.2290 (2.89) 1.6216 (3.40) + test_class_cstring_parse_compiled 5.6080 (1.45) 1.0321 (2.16) + test_class_default_build 6.0770 (1.57) 1.2640 (2.65) + test_class_default_parse 5.1160 (1.32) 1.1421 (2.39) + test_class_default_parse_compiled 4.4890 (1.16) 1.2474 (2.61) + test_class_enum_build 6.3000 (1.62) 0.9694 (2.03) + test_class_enum_parse 6.3900 (1.65) 0.9849 (2.06) + test_class_enum_parse_compiled 4.5520 (1.17) 0.7292 (1.53) + test_class_flag_build 4.7940 (1.24) 0.6771 (1.42) + test_class_flag_parse 4.3500 (1.12) 0.6541 (1.37) + test_class_flag_parse_compiled 4.1380 (1.07) 0.5723 (1.20) + test_class_flagsenum_build 9.7270 (2.51) 1.1748 (2.46) + test_class_flagsenum_parse 15.2000 (3.92) 2.1840 (4.58) + test_class_flagsenum_parse_compiled 11.6480 (3.00) 1.5491 (3.25) + test_class_focusedseq_build 27.1080 (6.99) 6.3815 (13.37) + test_class_focusedseq_parse 23.6720 (6.10) 3.4153 (7.16) + test_class_focusedseq_parse_compiled 10.7130 (2.76) 2.1026 (4.41) + test_class_formatfield_build 5.3590 (1.38) 1.1223 (2.35) + test_class_formatfield_parse 4.7750 (1.23) 0.8140 (1.71) + test_class_formatfield_parse_compiled 4.4370 (1.14) 0.9037 (1.89) + test_class_greedybytes_build 4.0550 (1.05) 1.1607 (2.43) + test_class_greedybytes_parse 3.8800 (1.0) 0.5046 (1.06) + test_class_greedybytes_parse_compiled 3.9690 (1.02) 1.1108 (2.33) + test_class_greedyrange_build 332.8790 (85.79) 43.8336 (91.83) + test_class_greedyrange_parse 320.9860 (82.73) 45.9480 (96.26) + test_class_greedyrange_parse_compiled 262.7010 (67.71) 36.4504 (76.36) + test_class_greedystring_build 5.3930 (1.39) 0.7442 (1.56) + test_class_greedystring_parse 5.0800 (1.31) 1.1375 (2.38) + test_class_greedystring_parse_compiled 4.6150 (1.19) 0.9228 (1.93) + test_class_hex_build 4.5730 (1.18) 0.8108 (1.70) + test_class_hex_parse 5.4210 (1.40) 0.9506 (1.99) + test_class_hex_parse_compiled 4.0000 (1.03) 0.8198 (1.72) + test_class_hexdump_build 4.5640 (1.18) 0.8572 (1.80) + test_class_hexdump_parse 5.1660 (1.33) 0.8708 (1.82) + test_class_hexdump_parse_compiled 3.9460 (1.02) 0.8104 (1.70) + test_class_ifthenelse_build 9.0200 (2.32) 3.1983 (6.70) + test_class_ifthenelse_parse 8.5450 (2.20) 4.2003 (8.80) + test_class_ifthenelse_parse_compiled 6.4490 (1.66) 3.5984 (7.54) + test_class_mapping_build 6.1160 (1.58) 0.9536 (2.00) + test_class_mapping_parse 5.5320 (1.43) 0.9137 (1.91) + test_class_mapping_parse_compiled 4.5650 (1.18) 0.8350 (1.75) + test_class_namedtuple1_build 18.3450 (4.73) 2.1664 (4.54) + test_class_namedtuple1_parse 17.1850 (4.43) 2.9482 (6.18) + test_class_namedtuple1_parse_compiled 7.1810 (1.85) 1.0228 (2.14) + test_class_namedtuple2_build 47.7850 (12.32) 6.1995 (12.99) + test_class_namedtuple2_parse 34.4330 (8.87) 3.8498 (8.07) + test_class_namedtuple2_parse_compiled 15.4160 (3.97) 2.5158 (5.27) + test_class_numpy_build 212.5540 (54.78) 27.0343 (56.64) + test_class_numpy_parse 288.5380 (74.37) 45.4344 (95.19) + test_class_numpy_parse_compiled 290.8960 (74.97) 110.2389 (230.95) + test_class_padded_build 7.7810 (2.01) 3.6378 (7.62) + test_class_padded_parse 6.6460 (1.71) 1.2688 (2.66) + test_class_padded_parse_compiled 4.7090 (1.21) 1.2451 (2.61) + test_class_padding_build 6.1880 (1.59) 1.4536 (3.05) + test_class_padding_parse 5.4070 (1.39) 1.1753 (2.46) + test_class_padding_parse_compiled 4.1200 (1.06) 1.1916 (2.50) + test_class_pascalstring_build 9.1680 (2.36) 1.4623 (3.06) + test_class_pascalstring_parse 7.8560 (2.02) 1.8567 (3.89) + test_class_pascalstring_parse_compiled 5.8910 (1.52) 0.9466 (1.98) + test_class_peek_build 14.8710 (3.83) 2.6207 (5.49) + test_class_peek_parse 19.5870 (5.05) 3.6857 (7.72) + test_class_peek_parse_compiled 10.6000 (2.73) 2.0105 (4.21) + test_class_pickled_build 5.6150 (1.45) 1.2695 (2.66) + test_class_pickled_parse 8.3370 (2.15) 1.5174 (3.18) + test_class_pickled_parse_compiled 8.9810 (2.31) 1.7670 (3.70) + test_class_pointer_build 7.2470 (1.87) 1.3817 (2.89) + test_class_pointer_parse 6.3760 (1.64) 1.2557 (2.63) + test_class_pointer_parse_compiled 5.0970 (1.31) 0.9715 (2.04) + test_class_prefixed_build 7.8970 (2.04) 1.8404 (3.86) + test_class_prefixed_parse 6.7860 (1.75) 1.3916 (2.92) + test_class_prefixed_parse_compiled 5.2350 (1.35) 1.3229 (2.77) + test_class_prefixedarray_build 873.1850 (225.05) 84.7384 (177.53) + test_class_prefixedarray_parse 763.2760 (196.72) 88.0787 (184.53) + test_class_prefixedarray_parse_compiled 79.4790 (20.48) 11.9930 (25.13) + test_class_rawcopy_build1 13.8040 (3.56) 2.1913 (4.59) + test_class_rawcopy_build2 16.9810 (4.38) 2.6092 (5.47) + test_class_rawcopy_parse 15.2890 (3.94) 3.6678 (7.68) + test_class_rawcopy_parse_compiled 14.8570 (3.83) 2.6335 (5.52) + test_class_rebuild_build 6.0380 (1.56) 1.2981 (2.72) + test_class_rebuild_parse 5.1540 (1.33) 0.8264 (1.73) + test_class_rebuild_parse_compiled 4.5160 (1.16) 0.7145 (1.50) + test_class_repeatuntil_build 11.0780 (2.86) 2.4318 (5.09) + test_class_repeatuntil_parse 10.1850 (2.63) 2.4147 (5.06) + test_class_repeatuntil_parse_compiled 6.8880 (1.78) 1.5471 (3.24) + test_class_select_build 19.1100 (4.93) 6.5128 (13.64) + test_class_select_parse 5.6280 (1.45) 3.2641 (6.84) + test_class_select_parse_compiled 5.5660 (1.43) 3.7881 (7.94) + test_class_sequence_build 24.5060 (6.32) 5.1873 (10.87) + test_class_sequence_parse 21.5420 (5.55) 2.6852 (5.63) + test_class_sequence_parse_compiled 10.1530 (2.62) 2.1645 (4.53) + test_class_string_build 8.5320 (2.20) 1.8491 (3.87) + test_class_string_parse 20.4400 (5.27) 4.4044 (9.23) + test_class_string_parse_compiled 9.1470 (2.36) 2.2427 (4.70) + test_class_struct_build 49.1730 (12.67) 5.5050 (11.53) + test_class_struct_parse 44.1300 (11.37) 6.8434 (14.34) + test_class_struct_parse_compiled 16.9070 (4.36) 3.0500 (6.39) + test_class_switch_build 9.5110 (2.45) 1.7349 (3.63) + test_class_switch_parse 8.7100 (2.24) 1.9867 (4.16) + test_class_switch_parse_compiled 6.7830 (1.75) 1.1652 (2.44) + test_class_union_build 57.0540 (14.70) 12.0599 (25.27) + test_class_union_parse 91.9150 (23.69) 10.7812 (22.59) + test_class_union_parse_compiled 22.5970 (5.82) 15.2649 (31.98) + test_class_varint_build 15.2000 (3.92) 3.2498 (6.81) + test_class_varint_parse 18.9080 (4.87) 4.2807 (8.97) + test_class_varint_parse_compiled 19.6070 (5.05) 4.0409 (8.47) + test_overall_build 1,970.9570 (507.98) 189.2782 (396.54) + test_overall_build_compiled 1,987.8950 (512.35) 166.3636 (348.54) + test_overall_parse 2,126.2570 (548.01) 255.0154 (534.27) + test_overall_parse_compiled 1,124.9560 (289.94) 127.4730 (267.06) + ------------------------------------------------------------------------------------ + + +Comparison with Kaitai Struct +================================ + +Kaitai Struct is a very respectable competitor, so I believe a benchmark-based comparison should be presented. Construct and Kaitai have very different capabilities: Kaitai supports about a dozen languages, Construct only supports Python, Kaitai offers only basic common features, Construct offers python-only stuff like Numpy and Pickle support, Kaitai does only parsing, Construct does also building. In a sense, those libraries are in two different categories (like sumo and karate). There are multiple scenarios where either library would not be usable. + +Example used for comparison: + +:: + + Struct( + "count" / Int32ul, + "items" / Array(this.count, Struct( + "num1" / Int8ul, + "num2" / Int24ul, + "flags" / BitStruct( + "bool1" / Flag, + "num4" / BitsInteger(3), + Padding(4), + ), + "fixedarray1" / Array(3, Int8ul), + "name1" / CString("utf8"), + "name2" / PascalString(Int8ul, "utf8"), + )), + ) + +:: + + meta: + id: comparison_1_kaitai + encoding: utf-8 + endian: le + seq: + - id: count + type: u4 + - id: items + repeat: expr + repeat-expr: count + type: item + types: + item: + seq: + - id: num1 + type: u1 + - id: num2_lo + type: u2 + - id: num2_hi + type: u1 + - id: flags + type: flags + - id: fixedarray1 + repeat: expr + repeat-expr: 3 + type: u1 + - id: name1 + type: strz + - id: len_name2 + type: u1 + - id: name2 + type: str + size: len_name2 + instances: + num2: + value: 'num2_hi << 16 | num2_lo' + types: + flags: + seq: + - id: bool1 + type: b1 + - id: num4 + type: b3 + - id: padding + type: b4 + + +Suprisingly, Kaitai won the benchmark! Honestly, I am shocked and dismayed that it did. The only explanation that I can point out, is that Kaitai is parsing structs into class objects (with attributes) while Construct parses into dictionaries (with keys). However that one detail seems unlikely explanation for the huge discrepancy in benchmark results. Perhaps there is a flaw in the methodology. But until that is proven, Kaitai gets its respects. Congrats. + +:: + + $ python3.6 comparison_1_construct.py + Timeit measurements: + parsing: 0.1024609069 sec/call + parsing compiled: 0.0410809368 sec/call + + $ pypy comparison_1_construct.py + Timeit measurements: + parsing: 0.0108308416 sec/call + parsing compiled: 0.0062594243 sec/call + +:: + + $ python3.6 comparison_1_kaitai.py + Timeit measurements: + parsing: 0.0250326035 sec/call + + $ pypy comparison_1_kaitai.py + Timeit measurements: + parsing: 0.0019435351 sec/call diff --git a/docs/conf.py b/docs/conf.py index 5c3bd382d..d49b903c0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,6 +12,7 @@ # serve to show the default. import sys, os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -28,6 +29,7 @@ extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest'] autodoc_default_flags = ["members"] +autodoc_member_order = "bysource" # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -43,16 +45,16 @@ # General information about the project. project = u'Construct' -copyright = u'2010, Tomer Filiba' +copyright = u'2023, Arkadiusz Bulski, Tomer Filiba, Corbin Simpson' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '2.1' +version = '2.10' # The full version, including alpha/beta/rc tags. -release = '2.1' +release = '2.10' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -72,7 +74,7 @@ #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). @@ -93,12 +95,12 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' +html_theme = 'haiku' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +html_theme_options = {"full_logo" : True} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] @@ -112,7 +114,7 @@ # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +html_logo = "_static/construct-logo2.png" # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 @@ -180,8 +182,7 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'construct.tex', u'Construct Documentation', - u'Tomer Filiba', 'manual'), + ('index', 'construct.tex', u'Construct Documentation', u'Arkadiusz Bulski', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of @@ -214,5 +215,5 @@ # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'construct', u'Construct Documentation', - [u'Tomer Filiba'], 1) + [u'Arkadiusz Bulski (taken over from Tomer Filiba and Corbin Simpson)'], 1) ] diff --git a/docs/debugging.rst b/docs/debugging.rst index fd3f132a8..d9b03d0d1 100644 --- a/docs/debugging.rst +++ b/docs/debugging.rst @@ -2,155 +2,77 @@ Debugging Construct =================== -Intro -===== -Programming data structures in Construct is much easier than writing the -equivalent procedural code, both in terms of RAD and correctness. However, -sometimes things don't behave the way you expect them to. Yep, a bug. +Programming data structures in Construct is much easier than writing the equivalent procedural code, both in terms of ease-of-use and correctness. However, sometimes things don't behave the way you expect them to. Yep, a bug. + +Most end-user bugs originate from handling the context wrong. Sometimes you forget what nesting level you are at, or you move things around without taking into account the nesting, thus breaking context-based expressions. The two utilities described below should help you out. -Most end-user bugs originate from handling the context wrong. Sometimes you -forget what nesting level you are at, or you move things around without taking -into account the nesting, thus breaking context-based expressions. The two -utilities described below should help you out. Probe -===== - -The Probe simply dumps information to the screen. It will help you inspect the -context tree, the stream, and partially constructed objects, so you can -understand your problem better. It has the same interface as any other field, -and you can just stick it into a Struct, near the place you wish to inspect. -Do note that the printout happens during the construction, before the final -object is ready. - ->>> foo = Struct("foo", -... UBInt8("bar"), -... Probe(), -... UBInt8("baz"), -... ) ->>> foo.parse("spam spam spam spam bacon and eggs") -Probe -Container: - stream_position = 1 - following_stream_data = - 0000 70 61 6d 20 73 70 61 6d 20 73 70 61 6d 20 73 70 pam spam spam -sp - 0010 61 6d 20 62 61 63 6f 6e 20 61 6e 64 20 65 67 67 am bacon and -egg - 0020 73 s - context = { - '_' : {} - 'bar' : 115 - } - stack = [ - { - 'data' : 'spam spam spam spam bacon and eggs' - 'self' : Struct('foo') - } - { - 'self' : Struct('foo') - 'stream' : - } - { - 'context' : { - '_' : {} - 'bar' : 115 - } - 'obj' : Container: - bar = 115 - 'sc' : Probe('') - 'self' : Struct('foo') - 'stream' : - 'subobj' : 115 - } - { - 'context' : { - '_' : {} - 'bar' : 115 - } - 'self' : Probe('') - 'stream' : - } - ] -Container(bar = 115, baz = 112) +============= +The ``Probe`` simply dumps information to the screen. It will help you inspect the context tree, the stream, and partially constructed objects, so you can understand your problem better. It has the same interface as any other field, and you can just stick it into a ``Struct``, near the place you wish to inspect. Do note that the printout happens during the construction, before the final object is ready. -Debugger -======== +:: -The Debugger is a pdb-based full python debugger. Unlike Probe, Debugger is a -subconstruct (it wraps an inner construct), so you simply put it around the -problematic construct. If no exception occurs, the return value is passed -right through. Otherwise, an interactive debugger pops, letting you tweak -around. + >>> d = Struct( + ... "count" / Byte, + ... "items" / Byte[this.count], + ... Probe(lookahead=32), + ... ) + >>> d.parse(b"\x05abcde\x01\x02\x03") + -------------------------------------------------- + Probe, path is (parsing), into is None + Stream peek: (hexlified) b'010203'... + Container: + count = 5 + items = ListContainer: + 97 + 98 + 99 + 100 + 101 + -------------------------------------------------- + Container(count=5, items=ListContainer([97, 98, 99, 100, 101])) -When an exception occurs while parsing, you can go up (using u) to the level -of the debugger and set self.retval to the desired return value. This allows -you to hot-fix the error. Then use q to quit the debugger prompt and resume -normal execution with the fixed value. However, if you don't set self.retval, -the exception will propagate up. +There is also feature that looks inside the context and extracts a part of it using a lambda instead of printing the entire context. :: - >>> foo = Struct("foo", - ... UBInt8("bar"), - ... Debugger( - ... Enum(UBInt8("spam"), - ... ABC = 1, - ... DEF = 2, - ... GHI = 3, - ... ) - ... ), - ... UBInt8("eggs"), + >>> d = Struct( + ... "count" / Byte, + ... "items" / Byte[this.count], + ... Probe(this.count), ... ) - >>> - >>> - >>> print foo.parse("\x01\x02\x03") - Container: - bar = 1 - spam = 'DEF' - eggs = 3 - >>> - >>> print foo.parse("\x01\x04\x03") - Debugging exception of MappingAdapter('spam'): - File "d:\projects\construct\debug.py", line 112, in _parse - return self.subcon._parse(stream, context) - File "d:\projects\construct\core.py", line 174, in _parse - return self._decode(self.subcon._parse(stream, context), context) - File "d:\projects\construct\adapters.py", line 77, in _decode - raise MappingError("no decoding mapping for %r" % (obj,)) - MappingError: no decoding mapping for 4 - - (you can set the value of 'self.retval', which will be returned) - > d:\projects\construct\adapters.py(77)_decode() - -> raise MappingError("no decoding mapping for %r" % (obj,)) - (Pdb) - (Pdb) u - > d:\projects\construct\core.py(174)_parse() - -> return self._decode(self.subcon._parse(stream, context), context) - (Pdb) u - > d:\projects\construct\debug.py(115)_parse() - -> self.handle_exc("(you can set the value of 'self.retval', " - (Pdb) - (Pdb) l - 110 def _parse(self, stream, context): - 111 try: - 112 return self.subcon._parse(stream, context) - 113 except: - 114 self.retval = NotImplemented - 115 -> self.handle_exc("(you can set the value of 'self.retval', - " - 116 "which will be returned)") - 117 if self.retval is NotImplemented: - 118 raise - 119 else: - 120 return self.retval - (Pdb) - (Pdb) self.retval = "QWERTY" + >>> d.parse(b"\x05abcde\x01\x02\x03") + -------------------------------------------------- + Probe, path is (parsing), into is this.count + 5 + -------------------------------------------------- + Container(count=5, items=ListContainer([97, 98, 99, 100, 101])) + + +Debugger +============= + +The ``Debugger`` is a pdb-based full python debugger. Unlike ``Probe``, ``Debugger`` is a subconstruct (it wraps an inner construct), so you simply put it around the problematic construct. If no exception occurs, the return value is passed right through. Otherwise, an interactive debugger pops, letting you tweak around. + +When an exception occurs while parsing, you can go up (using ``u``) to the level of the debugger and set ``self.retval`` to the desired return value. This allows you to hot-fix the error. Then use ``q`` to quit the debugger prompt and resume normal execution with the fixed value. However, if you don't set ``self.retval``, the exception will propagate up. + + +:: + + >>> Debugger(Byte[3]).build([]) + -------------------------------------------------- + Debugging exception of + path is (building) + File "/media/arkadiusz/MAIN/GitHub/construct/construct/debug.py", line 192, in _build + return self.subcon._build(obj, stream, context, path) + File "/media/arkadiusz/MAIN/GitHub/construct/construct/core.py", line 2149, in _build + raise RangeError("expected %d elements, found %d" % (count, len(obj))) + construct.core.RangeError: expected 3 elements, found 0 + + > /media/arkadiusz/MAIN/GitHub/construct/construct/core.py(2149)_build() + -> raise RangeError("expected %d elements, found %d" % (count, len(obj))) (Pdb) q - Container: - bar = 1 - spam = 'QWERTY' - eggs = 3 - >>> + -------------------------------------------------- diff --git a/docs/extending.rst b/docs/extending.rst index 7829019eb..3c36ad266 100644 --- a/docs/extending.rst +++ b/docs/extending.rst @@ -2,86 +2,86 @@ Extending Construct =================== + Adapters ======== -Adapters are the standard way to extend and customize the library. Adapters -operate at the object level (unlike constructs, which operate at the stream -level), and are thus easy to write and more flexible. For more info see, the -adapter tutorial. +Adapters are the standard way to extend and customize the library. Adapters operate at the object level (unlike constructs, which operate at the stream level), and are thus easier to write and are more flexible. For more information see the adapter tutorial. -In order to write custom adapters, implement _encode and _decode:: +In order to write custom adapters, implement ``_decode`` and ``_encode``:: class MyAdapter(Adapter): - def _encode(self, obj, context): - # called at building time to return a modified version of obj - # reverse version of _decode - ... - def _decode(self, obj, context): + def _decode(self, obj, context, path): # called at parsing time to return a modified version of obj - # reverse version of _encode - ... + pass + def _encode(self, obj, context, path): + # called at building time to return a modified version of obj + pass Constructs ========== -Generally speaking, you should not write constructs by yourself: + .. note:: This is a last line of defense. If you are not able to achieve whatever that is you are trying to do with Construct existing classes, you can always write your own parsing classes. In particular, you can copy-paste existing classes into your source code and modify them slighly. This is the best way to go. -* It's a craft that requires skills and understanding of the internals of the - library (which change over time). -* Adapters should really be all you need and are much more simpler to - implement. -* To make things faster, try using psyco, or write your code in pyrex. The - python-level classes are as fast as it gets, assuming generality. +Generally speaking, you should not write constructs by yourself: +* It's a craft that requires skills and understanding of the internals of the library (which change over time). +* Adapters should really be all you need and are much simpler to implement. +* To make things faster, try using compilation feature or pypy. The python-level classes are as fast as it gets, assuming generality. -The only reason you might want to write a construct is to achieve something -that's not currently possible. This might be a construct that -computes/corrects the checksum of data... the reason there's no such construct -yet is because I couldn't find an elegant way to do that (although Buffered or -Union may be a good place to start). +The only reason you might want to write a custom class is to achieve something that's not currently possible. This might be a construct that computes/corrects the checksum of data, altough that already exists. Or a compression, or hashing. These also exist. But surely there is something that was not invented yet. If you need a semantics modification to an existing class, you can post a feature request, or copy the code of existing class into your project and modify it. -There are two kinds of constructs: raw construct and subconstructs. +There are at least two kinds of constructs: raw construct and subconstructs. Raw constructs --------------- +--------------------- -Deriving directly of class ``Construct``, raw construct can do as they wish by -implementing ``_parse``, ``_build``, and ``_sizeof``:: +Deriving directly from class ``Construct``, raw constructs can do as they wish by implementing ``_parse``, ``_build`` and ``_sizeof``:: class MyConstruct(Construct): - def _parse(self, stream, context): - # read from the stream (usually not directly) + def _parse(self, stream, context, path): + # read from the stream # return object - ... - def _build(self, obj, stream, context): - # write obj to the stream (usually not directly) - # no return value is necessary - ... - def _sizeof(self, context): - # return computed size, or raise SizeofError if not possible - ... + pass + + def _build(self, obj, stream, context, path): + # write obj to the stream + # return same value (obj) or a modified value + # that will replace the context dictionary entry + pass + + def _sizeof(self, context, path): + # return computed size (when fixed size or depends on context) + # or raise SizeofError (when variable size or unknown) + pass + +Variable size fields typically raise ``SizeofError``, for example ``VarInt`` and ``CString``. Subconstructs -------------- +--------------------- -Deriving of class Subconstruct, subconstructs wrap an inner construct, -inheriting it's properties (name, flags, etc.). In their ``_parse`` and -``_build`` methods, they will call ``self.subcon._parse`` or -``self.subcon._build`` respectively. Most subconstruct do not need to override -``_sizeof``. - -:: +Deriving from class ``Subconstruct``, these wrap an inner construct, inheriting its properties (name and flags). In their ``_parse``, ``_build`` and ``_sizeof`` methods, they will call ``self.subcon._parse``, ``self.subcon._build`` and ``self.subcon._sizeof`` respectively. :: class MySubconstruct(Subconstruct): - def _parse(self, stream, context): - obj = self.subcon._parse(stream, context) + def __init__(self, subcon): + self.name = subcon.name + self.subcon = subcon + self.flagbuildnone = subcon.flagbuildnone + + def _parse(self, stream, context, path): + obj = self.subcon._parse(stream, context, path) # do something with obj - # return object - def _build(self, obj, stream, context): + return obj + + def _build(self, obj, stream, context, path): # do something with obj - self.subcon._build(obj, stream, context) - # no return value is necessary + return self.subcon._build(obj, stream, context, path) + # return same value (obj) or a modified value + # that will replace the context dictionary entry + + def _sizeof(self, context, path): + # if not overriden, defers to subcon size + return self.subcon._sizeof(context, path) diff --git a/docs/index.rst b/docs/index.rst index 66a4b4150..d7cde5acc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,27 +1,106 @@ -.. Construct documentation master file, created by - sphinx-quickstart on Fri Dec 24 05:23:18 2010. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. +.. raw:: html -Welcome to Construct's documentation! -======================================= +
+ Chapters
+ + External links
+ + Personal sites
+ + +
+ + +
+ + +
+
-Contents: + +Construct +========= +.. raw:: html + +
+ + Sticky
+
+ + Version 2.10 was released in January 2020. It removes major neckpains (Embedded and EmbeddedSwitch). See Transition page.
+
+ + Happy to announce collaboration with Kaitai Struct, a multi-platform binary parser.
+
+ + Please use github issues to ask general questions, make feature requests (and vote for them), report issues and bugs, and to submit PRs. Feel free to request any changes that would support your project.
+
+ +
+ + +User Guide +========== .. toctree:: :maxdepth: 2 intro + transition28 + transition29 + transition210 basics + advanced bitwise - adapters meta - string misc - text + streaming + tunneling + lazy + adapters extending debugging - api + compilation + +API Reference +============= + +.. toctree:: + :maxdepth: 2 + + api/abstract + api/exceptions + api/bytes + api/numerics + api/strings + api/mappings + api/structs + api/repeaters + api/special + api/misc + api/conditional + api/align + api/streaming + api/tunneling + api/lazy + api/debugging + api/adapters + + api/core + api/lib Indices and tables ================== @@ -29,4 +108,3 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` - diff --git a/docs/intro.rst b/docs/intro.rst index 36c8e209a..09b07515f 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -2,56 +2,102 @@ Introduction ============ -What is Construct? -================== +Construct is a powerful **declarative** and **symmetrical** parser and builder for binary data. -In a nutshell, Construct is a declarative binary parser and builder library. -To break that down into each different part, Construct is... +Instead of writing *imperative code* to parse a piece of data, you declaratively define a *data structure* that describes your data. As this data structure is not code, you can use it in one direction to *parse* data into Pythonic objects, and in the other direction, to *build* objects into binary data. -Declarative ------------ +The library provides both simple, atomic constructs (such as integers of various sizes), as well as composite ones which allow you form hierarchical and sequential structures of increasing complexity. Construct features **bit and byte granularity**, easy debugging and testing, an **easy-to-extend subclass system**, and lots of primitive constructs to make your work easier: -Construct does not force users to write code in order to create parsers and -builders. Instead, Construct gives users a **domain-specific language**, or -DSL, for specifying their data structures. +* Fields: raw bytes or numerical types +* Structs and Sequences: combine simpler constructs into more complex ones +* Bitwise: splitting bytes into bit-grained fields +* Adapters: change how data is represented +* Arrays/Ranges: duplicate constructs +* Meta-constructs: use the context (history) to compute the size of data +* If/Switch: branch the computational path based on the context +* On-demand (lazy) parsing: read and parse only the fields what you require +* Pointers: jump from here to there in the data stream +* Tunneling: prefix data with a byte count or compress it -Binary ------- -Construct operates on bytes, not strings, and is specialized for binary data. -While Construct can consume normal text, it is best suited for binary formats. +Example +--------- -Parser and Builder ------------------- +A ``Struct`` is a collection of ordered, named fields:: -Structures declared in Construct are symmetrical and describe both the parser -and the builder. This eliminates the possibility of disparity between the -parsing and building actions, and reduces the amount of code required to -implement a format. + >>> format = Struct( + ... "signature" / Const(b"BMP"), + ... "width" / Int8ub, + ... "height" / Int8ub, + ... "pixels" / Array(this.width * this.height, Byte), + ... ) + >>> format.build(dict(width=3,height=2,pixels=[7,8,9,11,12,13])) + b'BMP\x03\x02\x07\x08\t\x0b\x0c\r' + >>> format.parse(b'BMP\x03\x02\x07\x08\t\x0b\x0c\r') + Container(signature=b'BMP')(width=3)(height=2)(pixels=[7, 8, 9, 11, 12, 13]) -Library -------- +A ``Sequence`` is a collection of ordered fields, and differs from ``Array`` and ``GreedyRange`` in that those two are homogenous:: -Construct is not a framework. It does not have any dependencies besides the -Python standard library, and does not require users to adapt their code to its -whims. + >>> format = Sequence(PascalString(Byte, "utf8"), GreedyRange(Byte)) + >>> format.build([u"lalaland", [255,1,2]]) + b'\nlalaland\xff\x01\x02' + >>> format.parse(b"\x004361789432197") + ['', [52, 51, 54, 49, 55, 56, 57, 52, 51, 50, 49, 57, 55]] -What is Construct good for? -=========================== Construct has been used to parse: - * Networking formats - * Binary file formats - * Filesystem layouts +* Networking formats like Ethernet, IP, ICMP, IGMP, TCP, UDP, DNS, DHCP +* Binary file formats like Bitmaps, PNG, GIF, EMF, WMF +* Executable binaries formats like ELF32, PE32 +* Filesystem layouts like Ext2, Fat16, MBR -And many other things! +See more examples in `current gallery `_ and in `deprecated gallery `_. -What isn't Construct good at? -============================= -As previously mentioned, Construct is not a good choice for parsing text, due -to the typical complexity of text-based grammars and the relative difficulty -of parsing Unicode correctly. While Construct does have a suite of special -text-parsing structures, it was not designed to handle text and is not a good -fit for those applications. +Development and support +------------------------- +Please use `github issues `_ to ask general questions, make feature requests (and vote for them), report issues and bugs, and to submit PRs. Feel free to request any changes that would support your project. + +Main documentation is at `readthedocs `_, which is substantial. Source is at `github `_. Releases are available at `pypi `_. + + +Requirements +-------------- +Construct should run on CPython 3.8 3.9 3.10 3.11 3.12 (and probably beta) and PyPy implementations. PyPy achieves much better performance. + +Following modules are needed only if you want to use certain features: + +* Numpy is optional, if you want to serialize arrays using Numpy protocol. Otherwise arrays can still be serialized using PrefixedArray. +* Arrow is optional, if you want to use Timestamp class. +* Different Python versions support different compression modules (like gzip lzma), if you want to use Compressed class. +* Ruamel.yaml is optional, if you want to use KaitaiStruct (KSY) exporter. +* Cloudpickle is optional, if you want to serialize the classes. +* LZ4 is optional, if you want to use CompressedLZ4 class. +* Cryptography is optional, if you want to use Encrypted* classes. + + +Installing +------------- + +The library is downloadable and installable from Pypi. Just use standard command-line. There are no hard dependencies, but if you would like to install all supported (not required) modules listed above, you can use the 2nd command-line form. + +* ``pip install construct`` +* ``pip install construct[extras]`` + + +Type Hints / Type Annotations +--------------------------------- + +As an extension to this library there is the `construct-typing `_ library, which provides PEP 561 compliant stub files for this library. It also provides extended adapters to describe complex structures using PEP 526 type annotations for improved static code analysis with mypy. + +* ``pip install construct-typing`` + + +Visual Editor +----------------- + +Another fancy extension to this library is the `construct-editor `_ visual editor for binary blobs that uses Construct parsing classes internally. + +* ``pip install construct-editor`` +* ``construct-editor`` diff --git a/docs/lazy.rst b/docs/lazy.rst new file mode 100644 index 000000000..b7ddf49b6 --- /dev/null +++ b/docs/lazy.rst @@ -0,0 +1,103 @@ +============ +Lazy parsing +============ + +.. warning:: This feature is fully implemented but may not be fully mature. + + +Lazy +--------------- + +This wrapper allows you to do lazy parsing of individual fields inside a normal ``Struct`` (without using ``LazyStruct`` which may not work in every scenario). It is also used by KaitaiStruct compiler to emit `instances` because those are not processed greedily, and they may refer to other not yet parsed fields. Those are 2 entirely different applications but semantics are the same. + +>>> d = Lazy(Byte) +>>> x = d.parse(b'\x00') +>>> x +.execute> +>>> x() +0 +>>> d.build(0) +b'\x00' +>>> d.build(x) +b'\x00' +>>> d.sizeof() +1 + + +LazyStruct +--------------- + +Equivalent to ``Struct``, but when this class is parsed, most fields are not parsed (they are skipped if their size can be measured by ``_actualsize`` or ``_sizeof`` method). See its docstring for details. + +Fields are parsed depending on some factors: + +* Some fields like ``Int*``, ``Float*``, ``Bytes(5)``, ``Array(5, Byte)`` or ``Pointer`` are fixed-size and are therefore skipped. Stream is not read. +* Some fields like ``Bytes(this.field)`` are variable-size but their size is known during parsing when there is a corresponding context entry. Those fields are also skipped. Stream is not read. +* Some fields like ``Prefixed``, ``PrefixedArray`` or ``PascalString`` are variable-size but their size can be computed by partially reading the stream. Only first few bytes are read (the lengthfield). +* Other fields like ``VarInt`` need to be parsed. Stream position that is left after the field was parsed is used. +* Some fields may not work properly, due to the fact that this class attempts to skip fields, and parses them only out of necessity. Miscellaneous fields often have size defined as 0, and fixed sized fields are skippable. + +Note there are restrictions: + +* If a field like ``Bytes(this.field)`` references another field in the same struct, you need to access the referenced field first (to trigger its parsing) and then you can access the ``Bytes`` field. Otherwise it would fail due to missing context entry. +* If a field references another field within inner (nested) or outer (super) struct, things may break. Context is nested, but this class was not rigorously tested in that manner. + +Building and sizeof are greedy, like in ``Struct``. + + +LazyArray +--------------- + +Equivalent to ``Array``, but the subcon is not parsed when possible (it gets skipped if the size can be measured by ``_actualsize`` or ``_sizeof`` method). See its docstring for details. The restrictions are identical as in ``LazyStruct``. + + +LazyBound +--------------- + +Field that binds to the subcon only at runtime (during parsing and building, not in the constructor). Useful for recursive data structures, like linked-lists and trees, where a construct needs to refer to itself (while it does not exist yet in the namespace). + +Note that it is possible to obtain same effect without using this class, using a loop. However there are usecases where that is not possible (if remaining nodes cannot be sized-up, and there is data following the recursive structure). There is also a significant difference, namely that ``LazyBound`` actually does greedy parsing while the loop does lazy parsing. See examples. + +To break recursion, use ``If`` field. See examples. + +:: + + d = Struct( + "value" / Byte, + "next" / If(this.value > 0, LazyBound(lambda: d)), + ) + + >>> print(d.parse(b"\x05\x09\x00")) + Container: + value = 5 + next = Container: + value = 9 + next = Container: + value = 0 + next = None + +:: + + d = Struct( + "value" / Byte, + "next" / GreedyBytes, + ) + + data = b"\x05\x09\x00" + while data: + x = d.parse(data) + data = x.next + print(x) + + # print outputs + Container: + value = 5 + next = \t\x00 (total 2) + # print outputs + Container: + value = 9 + next = \x00 (total 1) + # print outputs + Container: + value = 0 + next = (total 0) diff --git a/docs/macros.rst b/docs/macros.rst deleted file mode 100644 index e20aed6ab..000000000 --- a/docs/macros.rst +++ /dev/null @@ -1,5 +0,0 @@ -============================== -``construct.macros`` -- Macros -============================== - -.. automodule:: construct.macros diff --git a/docs/meta.rst b/docs/meta.rst index ec17f7d8e..92c44ef29 100644 --- a/docs/meta.rst +++ b/docs/meta.rst @@ -1,293 +1,222 @@ +=========== The Context =========== -Meta constructs are the key to the declarative power of Construct. Meta -constructs are constructs which are affected by the context of the -construction (parsing or building). In other words, meta constructs are -self-referring. -The context is a dictionary that is created during the construction process, -by Structs, and is "propagated" down and up to all constructs along the way, -so that they could use it. It basically represents a mirror image of the -construction tree, as it is altered by the different constructs. Structs -create nested contexts, just as they create nested Containers. + +Meta constructs are the key to the declarative power of Construct. Meta constructs are constructs which are affected by the context of the construction (during parsing and building). The context is a dictionary that is created during the parsing and building process by Structs and Sequences, and is "propagated" down and up to all constructs along the way, so that other members can access other members parsing or building intermediate results. It basically represents a mirror image of the construction tree, as it is altered by the different constructs. Nested structs create nested contexts, just as they create nested containers. In order to see the context, let's try this snippet: ->>> class PrintContext(Construct): -... def _parse(self, stream, context): -... print context -... ->>> foo = Struct("foo", -... Byte("a"), -... Byte("b"), -... PrintContext("c"), -... Struct("bar", -... Byte("a"), -... Byte("b"), -... PrintContext("c"), -... ), -... PrintContext("d"), +>>> d = Struct( +... "a" / Byte, +... Probe(), +... "b" / Byte, +... Probe(), ... ) ->>> ->>> foo.parse("\x01\x02\x03\x04") -{'_': {'a': 1, 'b': 2}} -{'_': {'a': 3, 'b': 4, '_': {'a': 1, 'c': None, 'b': 2}}} -{'_': {'a': 1, 'c': None, 'b': 2, 'bar': Container(a = 3, b = 4, c = None)}} -Container(a = 1, b = 2, bar = Container(a = 3, b = 4, c = None), c = None, d = -None) ->>> - -As you can see, the context looks different in different points of the -construction. - -You may wonder what does the little underscore ('_') that is found in the -context means. It basically represents the parent node, like the .. in unix -pathnames ("../foo.txt"). We'll use it only when we refer to the context of -upper layers. - -Using the context is easy. All meta constructs take a function as a parameter, -which is usually passed as a lambda function, although "big" functions are -just as good. This function, unless otherwise stated, takes a single parameter -called ctx (short for context), and returns a result calculated from that -context. - ->>> foo = Struct("foo", -... Byte("length"), -... Field("data", lambda ctx: ctx.length * 2 + 1), # <-- calculate -the length of the string +>>> d.parse(b"\x01\x02") +-------------------------------------------------- +Probe, path is (parsing), into is None +Container: + a = 1 +-------------------------------------------------- +-------------------------------------------------- +Probe, path is (parsing), into is None +Container: + a = 1 + b = 2 +-------------------------------------------------- +Container(a=1, b=2) + +As you can see, the context looks different at different points of the construction. + +You may wonder what does the little underscore (``_``) that is found in the context means. It basically represents the parent node, like the ``..`` in unix pathnames (``../foo.txt``). We'll use it only when we refer to the context of upper layers. + +Using the context is easy. All meta constructs take a function as a parameter, which is usually passed as a lambda function, although "big" named functions are just as good. This function, unless otherwise stated, takes a single parameter called ``ctx`` (short for context), and returns a result calculated from that context. + +>>> st = Struct( +... "count" / Byte, +... "data" / Bytes(lambda ctx: ctx.count), ... ) ->>> ->>> foo.parse("\x05abcdefghijkXXX") -Container(data = 'abcdefghijk', length = 5) +>>> st.parse(b"\x05abcde") +Container(count=5, data=b'abcde') + +Of course a function can return anything (it does not need to depend on the context): +>>> Computed(lambda ctx: 7) +>>> Computed(lambda ctx: os.urandom(16)) -Of course the function can return anything (it doesn't have to use ctx at -all): ->>> foo = Struct("foo", -... Byte("length"), -... Field("data", lambda ctx: 7), +Nesting +============================ + +And here's how we use the special ``_`` name to get to the upper container in a nested containers situation (which happens when parsing nested ``Struct``). Notice that ``length1`` is on different (upper) level than ``length2``, therefore it exists within a different up-level containter. + +>>> d = Struct( +... "length1" / Byte, +... "inner" / Struct( +... "length2" / Byte, +... "sum" / Computed(lambda ctx: ctx._.length1 + ctx.length2), +... ), ... ) ->>> ->>> foo.parse("\x99abcdefg") -Container(data = 'abcdefg', length = 153) +>>> d.parse(b"12") +Container(length1=49, inner=Container(length2=50, sum=99)) +Context entries can also be passed directly through ``parse`` and ``build`` methods. However, one should take into account that some classes are nesting context (like ``Struct``, ``Sequence``, ``Union``, ``FocusedSeq`` or ``LazyStruct``), so entries passed to these end up on upper level. Compare examples: -And here's how we use the special '_' name to get to the upper layer. Here the -length of the string is calculated as ``length1 + length2``. +>>> d = Bytes(lambda ctx: ctx.n) +>>> d.parse(bytes(100), n=4) +b'\x00\x00\x00\x00' ->>> foo = Struct("foo", -... Byte("length1"), -... Struct("bar", -... Byte("length2"), -... Field("data", lambda ctx: ctx._.length1 + ctx.length2), -... ) +>>> d = Struct( +... "data" / Bytes(lambda ctx: ctx._.n), ... ) ->>> ->>> foo.parse("\x02\x03abcde") -Container(bar = Container(data = 'abcde', length2 = 3), length1 = 2) +>>> d.parse(bytes(100), n=4) +Container(data=b'\x00\x00\x00\x00') -.. autofunction:: construct.Field -Array ------ +Refering to inlined constructs +============================ -When creating an :ref:`Array `, rather than specifying a constant -length, you can instead specify that it repeats a variable number of times. +If you need to refer to a subcon like ``Enum``, that was inlined in the struct (and therefore wasnt assigned to any variable in the namespace), you can access it as ``Struct`` attribute under same name. This feature is particularly handy when using ``Enum`` and ``EnumFlag`` classes. ->>> foo = Struct("foo", -... Byte("length"), -... Array(lambda ctx: ctx.length, UBInt16("data")), +>>> d = Struct( +... "animal" / Enum(Byte, giraffe=1), ... ) ->>> ->>> foo.parse("\x03\x00\x01\x00\x02\x00\x03") -Container(data = [1, 2, 3], length = 3) +>>> d.animal.giraffe +EnumIntegerString.new(1, 'giraffe') +If you need to refer to the size of a field, that was inlined in the same struct (and therefore wasnt assigned to any variable in the namespace), you can use a special ``_subcons`` context entry that contains all struct members. Note that you need to use a lambda (because ``this`` expression is not supported). -RepeatUntil ------------ +>>> d = Struct( +... "count" / Byte, +... "data" / Bytes(lambda ctx: ctx.count - ctx._subcons.count.sizeof()), +... ) +>>> d.parse(b"\x05four") +Container(count=5, data=b'four') -A repeater that repeats until a condition is met. The perfect example is -null-terminated strings. Note: for null-terminated strings, use CString. +>>> d = Union(None, +... "chars" / Byte[4], +... "data" / Bytes(lambda ctx: ctx._subcons.chars.sizeof()), +... ) +>>> d.parse(b"\x01\x02\x03\x04") +Container(chars=ListContainer([1, 2, 3, 4]), data=b'\x01\x02\x03\x04') ->>> foo = RepeatUntil(lambda obj, ctx: obj == "\x00", Field("data", 1)) ->>> ->>> foo.parse("abcdef\x00this is another string") -['a', 'b', 'c', 'd', 'e', 'f', '\x00'] ->>> ->>> foo2 = StringAdapter(foo) ->>> foo2.parse("abcdef\x00this is another string") -'abcdef\x00' +Using ``this`` expression +============================ -Switch ------- +Certain classes take a number of parameters and allow a callable to be provided instead of constants. This callable is called at parsing and building, and is provided the current context dictionary. Context is always a ``Container``, not a ``dict``, so it supports attribute access as well as key access. Amazingly, this can get even more fancy. Tomer Filiba provided an even better syntax. The ``this`` singleton object can be used to build a lambda expression. All four examples below are equivalent, but first is recommended: -Branches the construction path based on a condition, similarly to C's switch -statement. +>>> this._.field +>>> lambda this: this._.field +>>> this["_"]["field"] +>>> lambda this: this["_"]["field"] ->>> foo = Struct("foo", -... Enum(Byte("type"), -... INT1 = 1, -... INT2 = 2, -... INT4 = 3, -... STRING = 4, -... ), -... Switch("data", lambda ctx: ctx.type, -... { - ... "INT1" : UBInt8("spam"), - ... "INT2" : UBInt16("spam"), - ... "INT4" : UBInt32("spam"), - ... "STRING" : String("spam", 6), - ... } - ... ) -... ) ->>> ->>> ->>> foo.parse("\x01\x12") -Container(data = 18, type = 'INT1') ->>> ->>> foo.parse("\x02\x12\x34") -Container(data = 4660, type = 'INT2') ->>> ->>> foo.parse("\x03\x12\x34\x56\x78") -Container(data = 305419896, type = 'INT4') ->>> ->>> foo.parse("\x04abcdef") -Container(data = 'abcdef', type = 'STRING') - - -When the condition is not found in the switching table, and a default -construct is not given, an exception is raised (SwitchError). In order to -specify a default construct, set default (a keyword argument) when creating -the Switch. - ->>> foo = Struct("foo", -... Byte("type"), -... Switch("data", lambda ctx: ctx.type, -... { - ... 1 : UBInt8("spam"), - ... 2 : UBInt16("spam"), - ... }, - ... default = UBInt8("spam") # <-- sets the default - construct - ... ) -... ) ->>> ->>> foo.parse("\x01\xff") -Container(data = 255, type = 1) ->>> ->>> foo.parse("\x02\xff\xff") -Container(data = 65535, type = 2) ->>> ->>> foo.parse("\x03\xff\xff") # <-- uses the default -construct -Container(data = 255, type = 3) ->>> - - -When you want to ignore/skip errors, you can use the Pass construct, which is -a no-op construct. Pass will simply return None, without reading anything from -the stream. - ->>> foo = Struct("foo", -... Byte("type"), -... Switch("data", lambda ctx: ctx.type, -... { - ... 1 : UBInt8("spam"), - ... 2 : UBInt16("spam"), - ... }, - ... default = Pass - ... ) -... ) ->>> ->>> foo.parse("\x01\xff") -Container(data = 255, type = 1) ->>> ->>> foo.parse("\x02\xff\xff") -Container(data = 65535, type = 2) ->>> ->>> foo.parse("\x03\xff\xff") -Container(data = None, type = 3) - - -Pointer -------- - -Pointer allows for non-sequential construction. The pointer first changes the -stream position, constructs the subconstruct, and restores the original stream -position. -the stream position points the construction -Note: pointers are available only for seekable streams (in-memory and files). -Sockets and pipes do not suppose seeking, so you'll have to first read the -data from the stream, and parse it in-memory. - ->>> foo = Struct("foo", -... Pointer(lambda ctx: 4, Byte("data1")), # <-- data1 is at (absolute) -position 4 -... Pointer(lambda ctx: 7, Byte("data2")), # <-- data2 is at (absolute) -position 7 +Of course, ``this`` expression can be mixed with other calculations. When evaluating, each instance of ``this`` is replaced by context Container which supports attribute access to keys. + +>>> this.width * this.height - this.offset + +When creating an ``Array`` (the ``"items"`` field in the following example), rather than specifying a constant count, you can use a previous field value as count. + +>>> d = Struct( +... "count" / Rebuild(Byte, lambda ctx: len(ctx.items)), +... "items" / Byte[this.count], ... ) ->>> ->>> foo.parse("\x00\x00\x00\x00\x01\x00\x00\x02") -Container(data1 = 1, data2 = 2) - - -Anchor ------- - -Anchor is not really a meta construct, but it strongly coupled with Pointer, -so I chose to list it here. Anchor simply returns the stream position at the -moment it's invoked, so Pointers can "anchor" relative offsets to absolute -stream positions using it. See the following example: - ->>> foo = Struct("foo", -... Byte("padding_length"), -... Padding(lambda ctx: ctx.padding_length), -... Byte("relative_offset"), -... Anchor("absolute_position"), -... Pointer(lambda ctx: ctx.absolute_position + ctx.relative_offset, -... Byte("data") -... ), +>>> d.build(dict(items=[1,2,3,4,5])) +b'\x05\x01\x02\x03\x04\x05' + +Switch can branch the construction path based on previously parsed value. + +>>> d = Struct( +... "type" / Enum(Byte, INT1=1, INT2=2, INT4=3, STRING=4), +... "data" / Switch(this.type, +... { +... "INT1" : Int8ub, +... "INT2" : Int16ub, +... "INT4" : Int32ub, +... "STRING" : CString('ascii'), +... }), ... ) ->>> ->>> foo.parse("\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\xff") -Container(absolute_position = 7, data = 255, padding_length = 5, -relative_offset = 3) - -OnDemand --------- - -OnDemand allows lazy construction, meaning the data is actually parsed (or -built) only when it's requested (demanded). On-demand parsing is very useful -with record-oriented data, where you don't have to actually parse the data -unless it's actually needed. The result of OnDemand is an OnDemandContainer -- -a special object that "remembers" the stream position where its data is found, -and parses it when you access its .value property. -Note: lazy construction is available only for seekable streams (in-memory and -files). Sockets and pipes do not suppose seeking, so you'll have to first read -the data from the stream, and parse it in-memory. - ->>> foo = Struct("foo", -... Byte("a"), -... OnDemand(Bytes("bigdata", 20)), # <-- this will be read only on -demand -... Byte("b"), +>>> d.parse(b"\x02\x00\xff") +Container(type=EnumIntegerString.new(2, 'INT2'), data=255) +>>> d.parse(b"\x04abcdef\x00") +Container(type=EnumIntegerString.new(4, 'STRING'), data='abcdef') + + +Using `len_`` expression +============================ + +There used to be a bit of a hassle when you used built-in functions like ``len``, ``sum``, ``min``, ``max`` or ``abs`` on context items. Built-in ``len`` takes a list and returns an integer but ``len_`` analog takes a lambda and returns a lambda. This allows you to use this kind of shorthand: + +>>> len_(this.items) +>>> lambda ctx: len(ctx.items) + +These can be used in Rebuild wrappers that compute count/length fields from another list-alike field: + +>>> d = Struct( +... "count" / Rebuild(Byte, len_(this.items)), +... "items" / Byte[this.count], ... ) ->>> ->>> x = foo.parse("\x0101234567890123456789\x02") ->>> x -Container(a = 1, b = 2, bigdata = OnDemandContainer()) ->>> ->>> x.bigdata -OnDemandContainer() ->>> x.bigdata.has_value # <-- still unread -False ->>> ->>> x.bigdata.value # <-- demand the data -'01234567890123456789' ->>> ->>> x.bigdata.has_value # <-- already demanded -True ->>> x.bigdata -OnDemandContainer('01234567890123456789') +>>> d.build(dict(items=[1,2,3,4,5])) +b'\x05\x01\x02\x03\x04\x05' + + +Using ``obj_`` expression +============================ + +There is also an analog that takes ``(obj, context)`` or ``(obj, list, context)`` unlike ``this`` singleton which only takes a ``context`` as a single parameter: + +>>> obj_ > 0 +>>> lambda obj,ctx: obj > 0 + +These can be used in at least one construct: + +>>> RepeatUntil(obj_ == 0, Byte).parse(b"aioweqnjkscs\x00") +ListContainer([97, 105, 111, 119, 101, 113, 110, 106, 107, 115, 99, 115, 0]) + + +Using ``list_`` expression +============================ + +.. warning:: The ``list_`` expression is implemented but buggy, using it is not recommended at present time. + +There is also a third expression that takes ``(obj, list, context)`` and computes on the second parameter (the list). In constructs that use lambdas with all 3 parameters, those constructs usually process lists of elements and the 2nd parameter is a list of elements processed so far. + +These can be used in at least one construct: + +>>> RepeatUntil(list_[-1] == 0, Byte).parse(b"aioweqnjkscs\x00") +[97, 105, 111, 119, 101, 113, 110, 106, 107, 115, 99, 115, 0] + +In that example, ``list_`` gets substituted with following, at each iteration. Index ``-1`` means last element: + +:: + + list_ <- [97] + list_ <- [97, 105] + list_ <- [97, 105, 111] + list_ <- [97, 105, 111, 119] + ... + +Known deficiencies +============================ + +Logical ``and``, ``or``, ``not`` operators cannot be used in this expressions. You have to either use a lambda or equivalent bitwise operators: + +>>> ~this.flag1 | this.flag2 & this.flag3 +>>> lambda this: not this.flag1 or this.flag2 and this.flag3 + +Contains operator ``in`` cannot be used in this expressions, you have to use a lambda: + +>>> lambda this: this.value in (1, 2, 3) + +Indexing (square brackets) do not work in this expressions. Use a lambda: + +>>> lambda this: this.list[this.index] + +Sizeof method does not work in this expressions. Use a lambda: + +>>> lambda this: this._subcons..sizeof() + +Lambdas (unlike this expressions) are not compilable. + + diff --git a/docs/misc.rst b/docs/misc.rst index c17d392f9..42d513a05 100644 --- a/docs/misc.rst +++ b/docs/misc.rst @@ -2,271 +2,469 @@ Miscellaneous ============= -Conditional -=========== -Optional --------- +Special +============= -Attempts to parse or build the subconstruct; if it fails, returns a default -value. By default, the default value is ``None``. +Renamed +------- ->>> foo = Optional(UBInt32("foo")) ->>> foo.parse("\x12\x34\x56\x78") -305419896 ->>> print foo.parse("\x12\x34\x56") -None ->>> ->>> foo = Optional(UBInt32("foo"), default = 17) ->>> foo.parse("\x12\x34\x56\x78") -305419896 ->>> foo.parse("\x12\x34\x56") -17 +Adds a name string to a field (which by default is ``None``). This class is only used internally and you should use the ``/`` and ``*`` operators instead. Naming fields is needed when working with ``Struct`` and ``Union``, but also sometimes with ``Sequence`` and ``FocusedSeq``. +:: -If --- + "num" / Byte <--> Renamed(Byte, newname="num") + Byte * "comment" <--> Renamed(Byte, newdocs="comment") + Byte * parsedhook <--> Renamed(byte, newparsed=parsedhook) -Parses or builds the subconstruct only if a certain condition is met. -Otherwise, returns a default value. By default, the default value is ``None``. ->>> foo = Struct("foo", -... Flag("has_options"), -... If(lambda ctx: ctx["has_options"], -... Bytes("options", 5) -... ) -... ) ->>> ->>> foo.parse("\x01hello") -Container(has_options = True, options = 'hello') ->>> ->>> foo.parse("\x00hello") -Container(has_options = False, options = None) ->>> +Miscellaneous +============= +Const +----- -IfThenElse ----------- +A constant value that is required to exist in the data and match a given value. If the value is not matching, ``ConstError`` is raised. Useful for so called magic numbers, signatures, asserting correct protocol version, etc. -Branches the construction path based on a given condition. If the condition is -met, the ``then_construct`` is used; otherwise the ``else_construct`` is used. +>>> d = Const(b"IHDR") +>>> d.build(None) +b'IHDR' +>>> d.parse(b"JPEG") +construct.core.ConstError: expected b'IHDR' but parsed b'JPEG' ->>> foo = Struct("foo", -... Byte("a"), -... IfThenElse("b", lambda ctx: ctx["a"] > 7, -... UBInt32("foo"), -... UBInt16("bar") -... ), +By default, ``Const`` uses a ``Bytes`` field with size matching the value. However, other fields can also be used: + +>>> d = Const(255, Int32ul) +>>> d.build(None) +b'\xff\x00\x00\x00' + +The shortcoming is that it only works if the amount and exact bytes are known in advance. To check if a "variable" data meets some criterium (not mere equality), you would need the ``Check`` class. There is also ``OneOf`` and ``NoneOf`` class. + + +Computed +-------- + +Represents a value dynamically computed from the context. ``Computed`` does not read or write anything to the stream. It only computes a value (usually by extracting a key from a context dictionary) and returns its computed value as the result. Usually ``Computed`` fields are used for computations on the context dict. Context is explained in a previous chapter. However, ``Computed`` can also produce values based on external environment, ``random`` module, or constants. For example: + +>>> d = Struct( +... "width" / Byte, +... "height" / Byte, +... "total" / Computed(this.width * this.height), ... ) ->>> ->>> foo.parse("\x09\xaa\xbb\xcc\xdd") # <-- condition is met -Container(a = 9, b = 2864434397L) ->>> foo.parse("\x02\xaa\xbb") # <-- condition is not met -Container(a = 2, b = 43707) +>>> d.parse(b"12") +Container(width=49, height=50, total=2450) +>>> d.build(dict(width=4, height=5)) +b'\x04\x05' +>>> d = Computed(lambda ctx: os.urandom(10)) +>>> d.parse(b"") +b'[\x86\xcc\xf1b\xd9\x10\x0f?\x1a' -Alignment and Padding -===================== -Aligned +Index ------- -Aligns the subconstruct to a given modulus boundary (default is 4). +Fields that are inside ``Array``, ``GreedyRange`` or ``RepeatUntil`` can reference their index within the outer list. This is being effectuated by repeater class maintaining a context entry ``_index`` and updating it between each iteration. Note that some classes do context nesting (like ``Struct``), but they do copy the key over. You can access the key using ``Index`` class, or refer to the context entry directly, using ``this._index`` expression. Some constructions are only possible with direct method, when you want to use the index as parameter of a construct, like in ``Bytes(this._index + 1)``. ->>> foo = Aligned(UBInt8("foo")) ->>> foo.parse("\xff\x00\x00\x00") -255 ->>> foo.build(255) -'\xff\x00\x00\x00' +>>> d = Array(3, Index) +>>> d.parse(b"") +ListContainer([0, 1, 2]) +>>> d = Array(3, Struct("i" / Index)) +>>> d.parse(b"") +ListContainer([Container(i=0), Container(i=1), Container(i=2)]) -AlignedStruct -------------- +>>> d = Array(3, Computed(this._index + 1)) +>>> d.parse(b"") +ListContainer([1, 2, 3]) +>>> d = Array(3, Struct("i" / Computed(this._index + 1))) +>>> d.parse(b"") +ListContainer([Container(i=1), Container(i=2), Container(i=3)]) + + +Rebuild +------- -Automatically aligns all the fields of the Struct to the modulus -boundary. +When there is an array separated from its length field, the ``Rebuild`` wrapper can be used to measure the length of the list when building. Note that both the ``len_`` and ``this`` expressions are used as discussed in meta chapter. Only building is affected, parsing is simply deferred to subcon. ->>> foo = AlignedStruct("foo", -... Byte("a"), -... Byte("b"), +>>> d = Struct( +... "count" / Rebuild(Byte, len_(this.items)), +... "items" / Byte[this.count], ... ) ->>> ->>> foo.parse("\x01\x00\x00\x00\x02\x00\x00\x00") -Container(a = 1, b = 2) ->>> foo.build(Container(a=1,b=2)) -'\x01\x00\x00\x00\x02\x00\x00\x00' +>>> d.build(dict(items=[1,2,3])) +b'\x03\x01\x02\x03' +When the count field is directly before the items, ``PrefixedArray`` can be used instead: -Padding +>>> d = PrefixedArray(Byte, Byte) +>>> d.build([1,2,3]) +b'\x03\x01\x02\x03' + + +Default ------- -Padding is a sequence of bytes of bits that contains no data (its value is -discarded), and is necessary only for padding, etc. +Allows to make a field have a default value, which comes handly when building a ``Struct`` from a dict with missing keys. Only building is affected, parsing is simply deferred to subcon. ->>> foo = Struct("foo", -... Byte("a"), -... Padding(2), -... Byte("b"), +>>> d = Struct( +... "a" / Default(Byte, 0), ... ) ->>> ->>> foo.parse("\x01\x00\x00\x02") -Container(a = 1, b = 2) +>>> d.build(dict(a=1)) +b'\x01' +>>> d.build(dict()) +b'\x00' + +Check +----- -Special Constructs -================== +When fields are expected to be coherent in some way but integrity cannot be checked by merely comparing data with constant bytes using ``Const`` field, then a ``Check`` field can be put in place to get a key from context dict and check if the integrity is preserved. For example, maybe there is a count field (implied being non-negative but the field is signed type): -Rename ------- +>>> d = Struct( +... "num" / Int8sb, +... "integrity1" / Check(this.num > 0), +... ) +>>> d.parse(b"\xff") +CheckError: Error in path (parsing) -> integrity1 +check failed during parsing -Renames a construct. +Or there is a collection and a count provided and the count is expected to match the collection length (which might go out of sync by mistake). Note that ``Rebuild`` is more appropriate but the check is also possible: ->>> foo = Struct("foo", -... Rename("xxx", Byte("yyy")), +>>> d = Struct( +... "count" / Byte, +... "items" / Byte[this.count], +... ) +>>> st.build(dict(count=9090, items=[])) +FormatFieldError: Error in path (building) -> count +struct '>B' error during building, given value 9090 + +>>> d = Struct( +... "integrity" / Check(this.count == len_(this.items)), +... "count" / Byte, +... "items" / Byte[this.count], ... ) ->>> ->>> foo.parse("\x02") -Container(xxx = 2) +>>> d.build(dict(count=9090, items=[])) +CheckError: Error in path (building) -> integrity +check failed during building -Alias ------ +Error +------ -Creates an alias for an existing field of a Struct. +You can also explicitly raise an error, declaratively with a construct. ->>> foo = Struct("foo", -... Byte("a"), -... Alias("b", "a"), -... ) ->>> ->>> foo.parse("\x03") -Container(a = 3, b = 3) +>>> Error.parse(b"") +ExplicitError: Error in path (parsing) +Error field was activated during parsing -Value ------ +FocusedSeq +---------- -Represents a computed value. Value does not read or write anything to the -stream; it only returns its computed value as the result. +When a sequence has some fields that could be ommited like ``Const``, ``Padding`` or ``Terminated``, the user can focus on one particular field that is useful. Only one field can be focused on, and can be referred by index or name. Other fields must be able to build without a value: ->>> foo = Struct("foo", -... Byte("a"), -... Value("b", lambda ctx: ctx["a"] + 7) +>>> d = FocusedSeq(1 or "num", +... Const(b"MZ"), +... "num" / Byte, +... Terminated, ... ) ->>> ->>> foo.parse("\x02") -Container(a = 2, b = 9) +>>> d.parse(b"MZ\xff") +255 +>>> d.build(255) +b'MZ\xff' -Terminator +Pickled ---------- -Asserts the end of the stream has been reached (so that no more trailing data -is left unparsed). Note: Terminator is a singleton object. Do not try to -"instantiate" it (i.e., ``Terminator()``). +For convenience, arbitrary Python objects can be preserved using the famous pickle protocol. Almost any type can be pickled, but you have to understand that pickle uses its own (homebrew) protocol that is not a standard outside Python. Therefore, you can forget about parsing the binary blobs using other languages. Its useful, but it automates things beyond your understanding. ->>> Terminator.parse("") ->>> Terminator.parse("x") -Traceback (most recent call last): - . - . -construct.extensions.TerminatorError: end of stream not reached +>>> obj = [1, 2.3, {}] +>>> Pickled.build(obj) +b'\x80\x03]q\x00(K\x01G@\x02ffffff}q\x01e.' +>>> Pickled.parse(_) +[1, 2.3, {}] -Pass ----- +Numpy +---------- -A do-nothing construct; useful in Switches and Enums. Note: Pass is a -singleton object. Do not try to "instantiate" it (i.e., ``Pass()``). +Numpy arrays can be preserved and retrived along with their element type (dtype), dimensions (shape) and items. This is effectuated using the Numpy binary protocol, so parsing blobs produced by this class with other langagues (or other frameworks than Numpy for that matter) is not possible. Otherwise you could use ``PrefixedArray`` but this class is more convenient. ->>> print Pass.parse("xyz") -None +>>> import numpy +>>> obj = numpy.asarray([1,2,3]) +>>> Numpy.build(obj) +b"\x93NUMPY\x01\x00F\x00{'descr': '>> foo = Const(Bytes("magic", 6), "FOOBAR") ->>> foo.parse("FOOBAR") -'FOOBAR' ->>> foo.parse("FOOBAX") -Traceback (most recent call last): - . - . -construct.extensions.ConstError: expected 'FOOBAR', found 'FOOBAX' ->>> +>>> d = NamedTuple("coord", "x y z", Byte[3]) +>>> d = NamedTuple("coord", "x y z", Byte >> Byte >> Byte) +>>> d = NamedTuple("coord", "x y z", "x"/Byte + "y"/Byte + "z"/Byte) +>>> d.parse(b"123") +coord(x=49, y=50, z=51) -Peek ----- +Timestamp +---------- -Parses the subconstruct but restores the stream position afterwards -("peeking"). Note: works only with seekable streams (in-memory and files). +Datetimes can be parsed using ``Timestamp`` class. It supports modern formats and even MSDOS one. Note however that this class is not guaranteed to provide "exact" accurate values, due to several reasons explained in the docstring. + +>>> d = Timestamp(Int64ub, 1., 1970) +>>> d.parse(b'\x00\x00\x00\x00ZIz\x00') + + +>>> d = Timestamp(Int32ub, "msdos", "msdos") +>>> d.parse(b'H9\x8c"') + + + +Hex and HexDump +------------------ + +Integers and bytes can be displayed in hex form, for convenience. Note that parsing still results in int-alike and bytes-alike objects, and those results are unmodified, the hex form appears only when pretty-printing. If you want to obtain hexlified bytes, you need to use ``binascii.hexlify()`` on parsed results. + +>>> d = Hex(Int32ub) +>>> obj = d.parse(b"\x00\x00\x01\x02") +>>> obj +258 +>>> print(obj) +0x00000102 + +>>> d = Hex(GreedyBytes) +>>> obj = d.parse(b"\x00\x00\x01\x02") +>>> obj +b'\x00\x00\x01\x02' +>>> print(obj) +unhexlify('00000102') + +>>> d = Hex(RawCopy(Int32ub)) +>>> obj = d.parse(b"\x00\x00\x01\x02") +>>> obj +{'data': b'\x00\x00\x01\x02', + 'length': 4, + 'offset1': 0, + 'offset2': 4, + 'value': 258} +>>> print(obj) +unhexlify('00000102') + +Another variant is hexdumping, which shows both ascii representaion, hexadecimal representation, and offsets. Functionality is identical. + +>>> d = HexDump(GreedyBytes) +>>> obj = d.parse(b"\x00\x00\x01\x02") +>>> obj +b'\x00\x00\x01\x02' +>>> print(obj) +hexundump(''' +0000 00 00 01 02 .... +''') + +>>> d = HexDump(RawCopy(Int32ub)) +>>> obj = d.parse(b"\x00\x00\x01\x02") +>>> obj +{'data': b'\x00\x00\x01\x02', + 'length': 4, + 'offset1': 0, + 'offset2': 4, + 'value': 258} +>>> print(obj) +hexundump(''' +0000 00 00 01 02 .... +''') + +.. warning:: Note that Hex and possibly HexDump do not work correctly within a ``Bitwise`` context. ->>> foo = Struct("foo", -... Byte("a"), -... Peek(Byte("b")), -... Byte("c"), -... ) ->>> foo.parse("\x01\x02") -Container(a = 1, b = 2, c = 2) +Conditional +=========== Union ----- -Treats the same data as multiple constructs (similar to C's union statement). -When building, each subconstruct parses the same data (so you can "look" at -the data in multiple views); when writing, the first subconstruct is used to -build the final result. Note: works only with seekable streams (in-memory and -files). - ->>> foo = Union("foo", -... UBInt32("a"), -... UBInt16("b"), # <-- note that this field is -of a different size -... Struct("c", UBInt16("high"), UBInt16("low")), -... LFloat32("d"), +Treats the same data as multiple constructs (similar to C union statement) so you can "look" at the data in multiple views. + +When parsing, all fields read the same data bytes, but stream remains at initial offset (or rather seeks back to original position after each subcon was parsed), unless parsefrom selects a subcon by index or name. When building, the first subcon that can find an entry in the dict (or builds from ``None``, so it does not require an entry) is automatically selected. + +.. warning:: If you skip ``parsefrom`` parameter then stream will be left back at starting offset, not seeked to any common denominator. + +>>> d = Union(0, +... "raw" / Bytes(8), +... "ints" / Int32ub[2], +... "shorts" / Int16ub[4], +... "chars" / Byte[8], ... ) ->>> ->>> print foo.parse("\xaa\xbb\xcc\xdd") -Container: - a = 2864434397L - b = 43707 - c = Container: - high = 43707 - low = 52445 - d = -1.8440714901698642e+018 ->>> ->>> foo.build(Container(a = 0x11223344, b=0,c=Container(low=0, high=0),d=0)) # -<-- only "a" is used for building -'\x11"3D' - - -LazyBound ---------- - -A lazy-bound construct; it binds to the construct only at runtime. Useful for -recursive data structures (like linked lists or trees), where a construct -needs to refer to itself (while it doesn't exist yet). - ->>> foo = Struct("foo", -... Flag("has_next"), -... If(lambda ctx: ctx["has_next"], LazyBound("next", lambda: foo)), +>>> d = Union(0, # alternative syntax +... raw=Bytes(8), +... ints=Int32ub[2], +... shorts=Int16ub[4], +... chars=Byte[8], ... ) ->>> ->>> print foo.parse("\x01\x01\x01\x00") +>>> d.parse(b"12345678") Container: - has_next = True - next = Container: - has_next = True - next = Container: - has_next = True - next = Container: - has_next = False - next = None ->>> + raw = b'12345678' (total 8) + ints = ListContainer: + 825373492 + 892745528 + shorts = ListContainer: + 12594 + 13108 + 13622 + 14136 + chars = ListContainer: + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 +>>> d.build(dict(chars=range(8))) +b'\x00\x01\x02\x03\x04\x05\x06\x07' + + +Select +------ + +Attempts to parse or build each of the subcons, in order they were provided. + +:: + + >>> d = Select(Int32ub, CString("utf8")) + >>> d = Select(num=Int32ub, text=CString("utf8")) # alternative syntax + >>> d.build(1) + b'\x00\x00\x00\x01' + >>> d.build("Афон") + b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00' + + +Optional +-------- + +Attempts to parse or build the subconstruct. If it fails during parsing, returns a ``None``. If it fails during building, it puts nothing into the stream. + +>>> d = Optional(Int64ul) +>>> d.parse(b"12345678") +4050765991979987505 +>>> d.parse(b"") +None + +>>> d.build(1) +b'\x01\x00\x00\x00\x00\x00\x00\x00' +>>> d.build(None) +b'' + + +If +-- + +Parses or builds the subconstruct only if a certain condition is met. Otherwise, returns a ``None`` when parsing and puts nothing into the stream when building. The condition is a lambda that computes on the context just like in Computed examples. + +>>> d = If(this.x > 0, Byte) +>>> d.build(255, x=1) +b'\xff' +>>> d.build(255, x=0) +b'' + + +IfThenElse +---------- + +Branches the construction path based on a given condition. If the condition is met, the ``thensubcon`` is used, otherwise the ``elsesubcon`` is used. Fields like ``Pass`` and ``Error`` can be used here. Just for your curiosity, ``If`` is just a macro around this class. + +>>> d = IfThenElse(this.x > 0, VarInt, Byte) +>>> d.build(255, x=1) +b'\xff\x01' +>>> d.build(255, x=0) +b'\xff' + +In particular, you can use different subcons for parsing and building. The ``_parsing``, ``_building`` and ``_sizing`` context entries have boolean values that always exist, only one of them that corresponds to current action is set to ``True``. For convenience, those two entries are duplicated in ``Struct``, ``Sequence``, ``FocusedSeq`` and ``Union`` nested contexts. You dont need to reach for the top-most entry. This comes handy when using hackish constructs to achieve some complex semantics that are not available in the core library. + +:: + + d = Struct( + If(this._parsing, ...), + If(this._building, ...), + ) + + +Switch +------ + +Branches the construction based on a return value from a context function. This is a more general implementation than ``IfThenElse``. If no cases match the actual, it just passes successfully, although that behavior can be overriden. + +>>> d = Switch(this.n, {1: Int8ub, 2: Int16ub, 4: Int32ub}) +>>> d.build(5, n=1) +b'\x05' +>>> d.build(5, n=4) +b'\x00\x00\x00\x05' + +>>> d = Switch(this.n, {}, default=Byte) +>>> d.parse(b"\x01", n=255) +1 +>>> d.build(1, n=255) +b'\x01' + + +StopIf +------ + +Checks for a condition after each element, and stops a ``Struct``, ``Sequence`` or ``GreedyRange`` from parsing or building further. + +:: + + Struct('x'/Byte, StopIf(this.x == 0), 'y'/Byte) + Sequence('x'/Byte, StopIf(this.x == 0), 'y'/Byte) + GreedyRange(FocusedSeq(0, 'x'/Byte, StopIf(this.x == 0))) + + +Alignment and padding +===================== + +Padding +------- + +Adds additional null bytes (a filler) analog to ``Padded`` but without a subcon. This field is usually anonymous inside a ``Struct``. Internally this is just ``Padded(n, Pass)`` where ``n`` is an amount of null bytes. + +>>> d = Padding(4) +>>> d.parse(b"****") +None +>>> d.build(None) +b'\x00\x00\x00\x00' + + +Padded +------ + +Appends additional null bytes after subcon to achieve a fixed length. Note that implementation of this class uses ``stream.tell()`` to find how many bytes were written by the subcon. + +>>> d = Padded(4, Byte) +>>> d.build(255) +b'\xff\x00\x00\x00' + +Similar effect can be obtained using ``FixedSized``, but the implementation is rather different. ``FixedSized`` uses a separate ``BytesIO``, which means that ``Greedy*`` fields should work properly with it (and fail with ``Padded``) and also the stream does not need to be tellable (like pipes sockets etc). + + +Aligned +------- + +Appends additional null bytes after subcon to achieve a given modulus boundary. This implementation also uses ``stream.tell()``. + +>>> d = Aligned(4, Int16ub) +>>> d.build(1) +b'\x00\x01\x00\x00' + + +AlignedStruct +------------- + +Automatically aligns each member to modulus boundary. It does NOT align entire ``Struct``, but each member separately. + +>>> d = AlignedStruct(4, "a"/Int8ub, "b"/Int16ub) +>>> d.build(dict(a=0xFF, b=0xFFFF)) +b'\xff\x00\x00\x00\xff\xff\x00\x00' diff --git a/docs/streaming.rst b/docs/streaming.rst new file mode 100644 index 000000000..f435f2f5d --- /dev/null +++ b/docs/streaming.rst @@ -0,0 +1,77 @@ +=================== +Stream manipulation +=================== + +.. note:: + + Certain constructs are available only for seekable and tellable streams (in-memory and files). Sockets and pipes do not support neither, so you'll have to first read the data from the stream and parse it in-memory, or use experimental ``Rebuffered`` wrapper. + + +Field wrappers +============== + +``Pointer`` allows for non-sequential construction. The pointer first moves the stream into new position, does the construction, and then restores the stream back to original position. This allows for random access within the stream. + +>>> d = Pointer(8, Bytes(1)) +>>> d.parse(b"abcdefghijkl") +b'i' +>>> d.build(b"Z") +b'\x00\x00\x00\x00\x00\x00\x00\x00Z' + +``Peek`` parses a field but restores the stream position afterwards (it peeks into the stream). Building does nothing, it does NOT defer to subcon. + +>>> d = Sequence(Peek(Int16ul), Peek(Int16ub)) +>>> d.parse(b"\x01\x02") +ListContainer([513, 258]) +>>> d.sizeof() +0 + +``OffsettedEnd`` parses a greedy subcon until EOF plus a negative offset. This way you can read (almost) all data but leave some bytes left for a fixed sized footer. + +>>> d = Struct( +... "header" / Bytes(2), +... "data" / OffsettedEnd(-2, GreedyBytes), +... "footer" / Bytes(2), +... ) +>>> d.parse(b"\x01\x02\x03\x04\x05\x06\x07") +Container(header=b'\x01\x02', data=b'\x03\x04\x05', footer=b'\x06\x07') + + +Pure side effects +================= + +``Seek`` makes a jump within the stream and leaves it there, for other constructs to follow up from that location. It does not read or write anything to the stream by itself. + +>>> d = Sequence(Bytes(10), Seek(5), Byte) +>>> d.build([b"0123456789", None, 255]) +b'01234\xff6789' + +``Tell`` checks the current stream position and returns it. The returned value gets automatically inserted into the context dictionary. It also does not read or write anything to the stream by itself. + +>>> d = Struct("num"/VarInt, "offset"/Tell) +>>> d.parse(b"X") +Container(num=88, offset=1) +>>> d.build(dict(num=88)) +b'X' + + +Other fields +================= + +``Pass`` literally does nothing. It is mostly used internally by ``If(IfThenElse)`` and ``Padding(Padded)``. + +>>> Pass.parse(b"") +None +>>> Pass.build(None) +b'' +>>> Pass.sizeof() +0 + +``Terminated`` only works during parsing. It checks if the stream reached EOF and raises error if not. + +>>> Terminated.parse(b"") +None +>>> Terminated.parse(b"remaining") +TerminatedError: Error in path (parsing) +expected end of stream + diff --git a/docs/string.rst b/docs/string.rst deleted file mode 100644 index a1e6a2aa7..000000000 --- a/docs/string.rst +++ /dev/null @@ -1,11 +0,0 @@ -======= -Strings -======= - -Strings in Construct work very much like strings in other languages. - -.. autofunction:: construct.String - -.. autofunction:: construct.PascalString - -.. autofunction:: construct.CString diff --git a/docs/text.rst b/docs/text.rst deleted file mode 100644 index 0353e3a8d..000000000 --- a/docs/text.rst +++ /dev/null @@ -1,227 +0,0 @@ -============ -Text parsing -============ - -What has text to do with Construct? - -As you already know at this stage of the tutorial, Construct works with binary -data. That is, all sorts of data structures, file formats, and protocols, that -have a well defined binary structure. Construct can be used to parse such data -into objects (which are easier to handle programmatically), or build objects -into binary data. - -But even with the narrow scope of protocols or file formats, there are many -textual protocols such as HTTP, or textual file formats such as RTF. As -Construct aims to be a one-parser-to-rule-them-all, I decided to add some -textual construction tools to Construct. - -Nevertheless, I also wrote it so as to show people that Construct is really a -parser (and more). I got lots of mails saying Construct was not really a -parser, in the computer-science sense of the word, since it doesn't define a -grammar. So now it does, mwhahaha. - -But this comes with a reservation: Construct's main target is still binary -data. It is possible to define fully functional context-free grammars with -Construct -- and I did write a grammar for a C-like language -- but if grammar -is what you seek, use a dedicated library. There are lots of possible -optimizations, such as using a tokenizer first or optimized LL/LR parsers, -handling text encoding, etc., which Construct does not (and will not) perform. -That's all for a different library. - -"Parsing HTTP ought to be enough for everyone". - -In order to use the text module, you'll have to explicitly import it. - ->>> from construct.text import * ->>> - - -Matching characters -=================== - -Char ----- - -A single character (1-byte). Note that all characters are assumed to be 8-bit -ASCII. More complex encoding are left for a higher level. - -CharOf ------- - -Matches a character that is one of a set of valid characters. - ->>> digit = CharOf("digit", "0123456789") ->>> digit.parse("6") -'6' ->>> digit.parse("v") -Traceback (most recent call last): - . - . -construct.core.ValidatorError: ('invalid object', 'v') - - -CharNoneOf ----------- - -Matches a character that is not part of the set of invalid characters. - ->>> nonquote = CharNoneOf("quote", '"') ->>> nonquote.parse("x") -'x' ->>> nonquote.parse('"') -Traceback (most recent call last): - . - . -construct.core.ValidatorError: ('invalid object', '"') - - -Literal -------- - -Matches a given literal pattern (i.e., a keyword). - ->>> while_statement = Struct("while_statement", -... Literal("while"), -... GreedyRange(CharOf(None, " \t")), -... Word("name"), -... Literal(":") -... ) ->>> while_statement.parse("while True:") -Container(name = 'True') ->>> while_statement.parse("if True:") -Traceback (most recent call last): - . - . -construct.extensions.ConstError: expected 'while', found 'if ' ->>> - - -Select ------- - -Selects the first matching subconstruct, and uses it for parsing or building. -The order of the subconstructs is meaningful. Also note that Select can -operate with seekable streams only (files or in-memory). Raises SelectError if -no matching subconstruct is found. - ->>> c = Select("foo", -... Sequence("hex", Literal("0x"), HexNumber("value")), -... FloatNumber("flt"), -... DecNumber("dec"), -... ) ->>> c.parse("123") -123 ->>> c.parse("0x123") -[291] ->>> c.parse("123.456") -123.456 ->>> ->>> c.build(123) -'123' ->>> c.build([123]) -'0x7b' ->>> c.build(123.456) -'123.456' - - -Constructs for Languages -======================== - -These constructs are provided because they are likely to be very useful with -most common computer languages (C, java, python, ruby, ...) - -QuotedString ------------- - -A quoted string. You can define the starting and ending quote chars, and -escape char. - ->>> q = QuotedString("foo", start_char = "{", end_char = "}", esc_char = "~") ->>> q.parse("{hello world") -Traceback (most recent call last): - . - . -construct.core.EndOfStreamError ->>> q.parse("{hello world}") -'hello world' ->>> q.parse("{this ~} is an escaped ending quote }") -'this } is an escaped ending quote ' ->>> - - -Whitespace ----------- - -Whitespace is a sequence of whitespace chars (by default space and tab) that -has no programmatic meaning. It is only used to separate tokens or to make the -code readable. You can specify ``allow_empty = False``, which means that the -whitespace is mandatory. Otherwise, whitespace is optional. - ->>> Whitespace().parse(" \t") ->>> - - -DecNumber ---------- - -Decimal integral number ((0-9)+). Returns a python integer. - ->>> DecNumber("foo").parse("123+456") -123 - - -HexNumber ---------- - -Hexadecimal number ((0-9, A-F, a-f)+). Returns a python integer. - ->>> HexNumber("foo").parse("c0ffee") -12648430 - - -FloatNumber ------------ - -Floating-pointer number ((0-9)+\.(0-9)+). Returns a python float. - ->>> FloatNumber("foo").parse("123.456") -123.456 - - -Word ----- - -A sequence of alpha characters ((A-Z, a-z)+). - ->>> Word("foo").parse("hello world") -'hello' - - -StringUpto ----------- - -A string terminated by some character (similar to CString, but the terminator -char is not consumed). - ->>> StringUpto("foo", "x").parse("hellox") -'hello' - - -Line ----- - -A text line (terminated by ``\r`` or ``\n``) - ->>> Line("foo").parse("hello world\n") -'hello world' - - -Identifier ----------- - -A sequence of alpha-numeric or underscore characters commonly used as -identifiers in programming languages. The first char must be a alpha or -underscore (not number). - ->>> Identifier("foo").parse("fat_boy3 beefed") -'fat_boy3' diff --git a/docs/transition210.rst b/docs/transition210.rst new file mode 100644 index 000000000..0148fbc89 --- /dev/null +++ b/docs/transition210.rst @@ -0,0 +1,55 @@ +================= +Transition to 2.10 +================= + + +Overall +========== + +Dropped support for Python 2.7 and 3.5 (pypy is also supported) + +Bytes GreedyBytes can build from bytearrays (not just bytes) + +Embedded and EmbeddedSwitch were permanently removed + +Exceptions always display path information + +build_file() opens a file for both reading and writing + +BytesInteger BitsInteger can take lambda for swapped parameter + +cloudpickle is now supported and tested for + +ZigZag signed integer encoding from Protocol Buffers added + +FormatField now supports ? format string + +CompressedLZ4 tunneling class added + +Windows is now officially supported and tested for + +BytesInteger and BitsInteger are checking numbers are valid + +BitsInteger swapped semantic was fixed + +Compilation covers building as well, parsing slightly improved + +Array GreedyRange RepeatUntil builders use discard option + +Sequence build fixed, no longer skips subcons on short obj + +Lazy class fixed, seeks the not-yet parsed subcon + +BytesInteger BitsInteger cannot process zero length anymore + +OffsettedEnd class added in streaming category + +EncryptedSym EncryptedSymAead classes were added in tunneling category + +StringEncoded now properly raises StringError exceptions on encoding/decoding (bugfix) + +Soft dependency on enum34 is no longer included + +Container class was overhauled (thanks to @denballakh) + +Supported Python runtimes (now 3.8 through 3.12) (thanks to @denballakh) diff --git a/docs/transition28.rst b/docs/transition28.rst new file mode 100644 index 000000000..b90feb620 --- /dev/null +++ b/docs/transition28.rst @@ -0,0 +1,228 @@ +================= +Transition to 2.8 +================= + +Overall +======= + +All fields and complex constructs are now nameless, you need to use / operator to name them. Look at Struct Sequence Range for how to use + >> [] operators to construct larger instances. + + + +Integers and floats +------------------- + +{U,S}{L,B,N}Int{8,16,24,32,64} was made Int{8,16,24,32,64}{u,s}{l,b,n} + +Byte, Short, Int, Long were made aliases to Int{8,16,32,64}ub + +{B,L,N}Float{32,64} was made Float{32,64}{b,l,n} + +Single, Double were made aliases to Float{32,64}b + +VarInt was added + +Bit, Nibble, Octet remain + +All above were made singletons + + + +Fields +------ + +Field was made Bytes (operates on b-strings) + +BytesInteger was added (operates on integers) + +BitField was made BitsInteger (operates on integers) + +GreedyBytes was added + +Flag was made a singleton + +Enum takes the `default` keyword argument (no underscores) + +Enum was fixed, the context value is a string label (not integer). + +FlagsEnum remains + + + +Strings +------- + +String remains + +PascalString argument `length_field=UBInt8` was made `lengthfield` and explicit + +CString dropped `char_field` + +GreedyString dropped `char_field` + +All above use optional `encoding` argument or use global encoding (see ``setglobalstringencoding()``) + + + +Structures and Sequences +------------------------ + +Struct uses syntax like ``Struct("num"/Int32ub, "text"/CString())`` and ``"num"/Int32ub + "text"/CString()`` + +Sequence uses syntax like ``Byte >> Int16ul`` and ``Sequence(Byte, Int16ul)`` + +On Python 3.6 you can also use syntax like ``Struct(num=Int32ub, text=CString())`` and ``Sequence(num=Int32ub, text=CString())`` + + +Ranges and Arrays +----------------- + +Array uses syntax like ``Byte[10]`` and ``Array(10, Byte)`` + +Range uses syntax like ``Byte[5:]`` and ``Byte[:5]`` and ``Range(min=5, max=2**64, Byte)`` + +GreedyRange uses syntax like ``Byte[:]`` and ``GreedyRange(Byte)`` + +PrefixedArray takes explicit `lengthfield` before subcon + +OpenRange and GreedyRange were dropped + +OptionalGreedyRange was renamed to GreedyRange + +RepeatUntil takes 3-argument (last element, list, context) lambda + + + +Lazy collections +---------------- + +LazyStruct LazyRange LazySequence were added + +OnDemand returns a parameterless lambda that returns the parsed object + +OnDemandPointer was dropped + +LazyBound remains + + + +Padding and Alignment +--------------------- + +Aligned takes explicit `modulus` before the subcon + +Padded was added, also takes explicit `modulus` before the subcon + +Padding remains + +Padding and Padded dropped `strict` parameter + + + +Optional +-------- + +If dropped `elsevalue` and always returns None + +IfThenElse parameters renamed to `thensubcon` and `elsesubcon` + +Switch remains + +Optional remains + +Union takes explicit `parsefrom` so parsing seeks stream by selected subcon size, or does not seek by default + +Select remains + + + +Miscellaneous and others +------------------------ + +Value was made Computed + +Embed was made Embedded + +Alias was removed + +Magic was made Const + +Const has reordered parameters, like ``Const(b"\\x00")`` and ``Const(0, Int8ub)``. + +Pass remains + +Terminator was renamed Terminated + +OneOf and NoneOf remain + +Filter added + +LengthValueAdapter was made Prefixed, and gained `includelength` option + +Hex added + +HexDumpAdapter was made HexDump + +HexDump builds from hexdumped data, not from raw bytes + +SlicingAdapter and IndexingAdapter were made Slicing and Indexing + +ExprAdapter ExprSymmetricAdapter ExprValidator were added or remain + +SeqOfOne was replaced by FocusedSeq + +Numpy added + +NamedTuple added + +Check added + +Error added + +Default added + +Rebuild added + +StopIf added + + + +Stream manipulation +------------------- + +Bitwise was reimplemented using Restreamed + +Bytewise was added + +Restreamed and Rebuffered were redesigned + +Anchor was made Tell and a singleton + +Seek was added + +Pointer remains, size cannot be computed + +Peek dropped `perform_build` parameter, never builds + + + +Tunneling +--------- + +RawCopy was added, returns both parsed object and raw bytes consumed + +Prefixed was added, allows to put greedy fields inside structs and sequences + +ByteSwapped and BitsSwapped were added + +Checksum was added + +Compressed was added + + +Exceptions +----------- + +FocusedError OverwriteError were removed + +FieldError was replaced with StreamError (raised when stream returns less than requested amount) and FormatFieldError (raised by FormatField class, for example if building Float from non-float value and struct.pack complaining). diff --git a/docs/transition29.rst b/docs/transition29.rst new file mode 100644 index 000000000..7aefbffe4 --- /dev/null +++ b/docs/transition29.rst @@ -0,0 +1,149 @@ +================= +Transition to 2.9 +================= + + +Overall +========== + +**Compilation feature for faster performance!** Read `this tutorial chapter `_, particularly its restrictions section. + +**Docstrings of all classes were overhauled.** Check the `Core API pages `_. + + +General classes +------------------- + +All constructs: `parse build sizeof` methods take context entries ONLY as keyword parameters \*\*contextkw (`see tutorial page `_) + +All constructs: `parse_file` and `build_file` methods were added (`see tutorial page `_) + +All constructs: operator * can be used for docstrings and parsed hooks (`see tutorial page `_ and `tutorial page `_) + +All constructs: added `compile` and `benchmark` methods (`see tutorial page `_) + +All constructs: added `parsed` hook/callback (`see tutorial page `_) + +Compiled added (used internally) + +Half was added alongside Single, Double + +String* require explicit encodings, all of them support UTF16 UTF32 encodings, but PaddedString CString dropped some parameters and support only encodings explicitly listed in `possiblestringencodings` (`see tutorial page `_) + +PaddedString CString classes reimplemented using NullTerminated NullStripped + +String* build empty strings into empty bytes (despite for example UTF16 encoding empty string into 2 bytes marker) + +String class renamed to PaddedString + +Enum FlagsEnum can merge labels from IntEnum IntFlag, from enum34 module (`see tutorial page `_) + +Enum FlagsEnum dropped `default` parameter but returns integer if no mapping found (`see tutorial page `_) + +Enum FlagsEnum can build from integers and labels, and expose labels as attributes, as bitwisable strings (`see tutorial page `_) + +FlagsEnum had parsing semantics fixed (affecting multi-bit flags) + +Mapping replaced SymmetricMapping, and dropped `default` parameter (`see API page `_) + +Struct Sequence FocusedSeq Union LazyStruct have new embedding semantics (`see tutorial page `_) + +Struct Sequence FocusedSeq Union LazyStruct are exposing subcons, as attributes and in _subcons context entry (`see tutorial page `_) + +Struct Sequence FocusedSeq Union LazyStruct are exposing _ _params _root _parsing _building _sizing _subcons _io _index entries in the context (`see tutorial page `_) + +EmbeddedBitStruct removed, instead use BitStruct with Bytewise-wrapped fields (`see tutorial page `_) + +Array reimplemented without Range, does not use stream.tell() + +Range removed, GreedyRange does not support [:] syntax + +Array GreedyRange RepeatUntil added `discard` parameter (`see tutorial page `_) + +Const has reordered parameters, `value` before `subcon` (`see API page `_) + +Index added, in Miscellaneous (`see tutorial page `_) + +Pickled added, in Miscellaneous (`see tutorial page `_) + +Timestamp added, in Miscellaneous (`see tutorial page `_) + +Hex HexDump reimplemented, return bytes and not hexlified strings (`see tutorial page `_) + +Select dropped `includename` parameter (`see API page `_) + +Select allows to build from none if any of its subcons can + +If IfThenElse parameter `predicate` renamed to `condfunc`, and cannot be embedded (`see API page `_) + +Switch updated, `default` parameter is `Pass` instead of `NoDefault`, dropped `includekey` parameter, and cannot be embedded (`see API page `_) + +EmbeddedSwitch added, in Conditional (`see tutorial page `_) + +StopIf raises `StopFieldError` instead of `StopIteration` (`see API page `_) + +Pointer changed size to 0, can be parsed lazily, can also select a stream from context entry + +PrefixedArray parameter `lengthfield` renamed to `countfield` (`see API page `_) + +FixedSized NullTerminated NullStripped added, in Tunneling (`see tutorial page `_) + +RestreamData added, in Tunneling (`see tutorial page `_) + +Transformed added, in Tunneling (`see tutorial page `_) + +ProcessXor and ProcessRotateLeft added, in Tunneling (`see tutorial page `_) + +ExprAdapter Mapping Restreamed changed parameters order (decoders before encoders) + +Adapter changed parameters, added `path` parameter to `_encode _decode _validate` methods (`see tutorial page `_) + +Lazy added, in Lazy equivalents category (`see tutorial page `_) + +LazyStruct LazyArray reimplemented with new lazy parsing semantics (`see tutorial page `_) + +LazySequence LazyRange LazyField(OnDemand) removed + +LazyBound remains, but changed to parameter-less lambda (`see tutorial page `_) + +Probe Debugger updated, ProbeInto removed (`see tutorial page `_) + + +Support classes +-------------------- + +Container updated, uses `globalPrintFullStrings globalPrintFalseFlags globalPrintPrivateEntries` + +Container updated, equality does not check hidden keys like _private or keys order + +FlagsContainer removed + +RestreamedBytesIO supports reading till EOF, enabling GreedyBytes GreedyString inside Bitwise Bytewise + +HexString removed + + +Exceptions +------------- + +Exceptions always display path information + +FieldError was replaced with StreamError (raised when stream returns less than requested amount) and FormatFieldError (raised by FormatField class, for example if building Float from non-float value and struct.pack complains). + +StreamError can be raised by most classes, when the stream is not seekable or tellable + +StringError can be raised by classes like Bytes Const, when expected bytes but given unicode string as build value + +BitIntegerError was replaced by IntegerError + +Struct Sequence can raise IndexError KeyError when dictionaries are missing entries + +RepeatError added + +IndexFieldError added + +CheckError added + +NamedTupleError added + +RawCopyError added diff --git a/docs/tunneling.rst b/docs/tunneling.rst new file mode 100644 index 000000000..cffe69c9e --- /dev/null +++ b/docs/tunneling.rst @@ -0,0 +1,274 @@ +================= +Tunneling tactics +================= + + +Obtaining raw bytes +------------------- + +When some value needs to be processed as both a parsed object and its raw bytes representation, both of these can be obtained using RawCopy. You can build from either the object or raw bytes as well. Dict also happen to contain the stream offsets, if you need to know at which position it resides in the stream or if you need to know its size in bytes. + +When building, if both the ``"value"`` and ``"data"`` keys are present, then the ``"data"`` key is used and the ``"value"`` key is ignored. This is undesirable in the case that you parse some data for the purpose of modifying it and writing it back; in this case, delete the ``"data"`` key when modifying the ``"value"`` key to correctly rebuild the former. + +>>> d = RawCopy(Byte) +>>> d.parse(b"\xff") +Container(data=b'\xff', value=255, offset1=0, offset2=1, length=1) +>>> d.build(dict(data=b"\xff")) +b'\xff' +>>> d.build(dict(value=255)) +b'\xff' + + +Endianness +---------- + +When little endianness is needed, either use integer fields like ``Int*l`` or ``BytesInteger(swapped=True)`` or swap bytes of an arbitrary field: + +:: + + Int24ul <--> ByteSwapped(Int24ub) <--> BytesInteger(3, swapped=True) <--> ByteSwapped(BytesInteger(3)) + +>>> Int24ul.build(0x010203) +b'\x03\x02\x01' + +When bits within each byte need to be swapped, there is another wrapper: + +>>> d = Bitwise(Bytes(8)) +>>> d.parse(b"\x01") +b'\x00\x00\x00\x00\x00\x00\x00\x01' +>>> d = BitsSwapped(Bitwise(Bytes(8))) +>>> d.parse(b"\x01") +b'\x01\x00\x00\x00\x00\x00\x00\x00' + +In case that endianness is determined at parse/build time, you can pass endianness (``swapped`` parameter) by the context: + +>>> d = BytesInteger(2, swapped=this.swapped) +>>> d.build(1, swapped=True) +b'\x01\x00' +>>> d = BitsInteger(16, swapped=this.swapped) +>>> d.build(1, swapped=True) +b'\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + + +Working with bytes subsets +-------------------------------------------- + +``Greedy*`` constructs consume as much data as possible, they read until EOF. This is convenient when building from a list of unknown length but becomes a problem when parsing it back and the list needs to be separated from following data. This can be achieved either by prepending a byte count (see ``Prefixed``) or by prepending an element count (see ``PrefixedArray``): + +``VarInt`` encoding is recommended because it is both compact and never overflows. Also and optionally, the length field can include its own size. If so, length field must be of fixed size. + +>>> d = Prefixed(VarInt, GreedyRange(Int32ul)) +>>> d.parse(b"\x08abcdefgh") +ListContainer([1684234849, 1751606885]) + +>>> d = PrefixedArray(VarInt, Int32ul) +>>> d.parse(b"\x02abcdefgh") +ListContainer([1684234849, 1751606885]) + +There are also other means of restricting constructs to substreamed data. All 3 classes below work by substreaming data, meaning the subcon is not given the original stream but a new ``BytesIO`` made out of pre-read bytes. This allows ``Greedy*`` fields to work properly. + +``FixedSized`` consumes a specified amount and then exposes inner construct to a new stream built out of those bytes. When building, it appends a padding to make a specified total. + +>>> d = FixedSized(10, Byte) +>>> d.parse(b'\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00') +255 + +``FixedSized`` is similar to ``Padded``. The results seem identical but the implementation is entirely different. ``FixedSized`` uses a substream and ``Padded`` uses stream.tell(). Therefore: + +:: + + # valid + FixedSized(10, GreedyBytes) + # UNSAFE + Padded(10, GreedyBytes) + +``NullTerminated`` consumes bytes up to first occurance of the term. When building, it just writes the subcon followed by the term. + +>>> d = NullTerminated(Byte) +>>> d.parse(b'\xff\x00') +255 + +.. warning:: + + The term can be multiple bytes, to support string classes with UTF16/32 encodings for example. Be warned however: as reported in Issue #1046, the data read must be a multiple of the term length and the term must start at a unit boundary, otherwise strange things happen when parsing. + +``NullStripped`` consumes bytes till EOF, and for that matter should be restricted by ``Prefixed``, ``FixedSized`` etc, and then strips paddings. Subcon is parsed using a new stream built using those stripped bytes. When building, it just builds the subcon as-is. + +>>> d = NullStripped(Byte) +>>> d.parse(b'\xff\x00\x00') +255 + + +Working with different bytes +-------------------------------------------------- + +``RestreamData`` allows you to insert a field that parses some data that came either from some other field, from the context (like ``Bytes``) or some literal hardcoded value in your code. Comes handy when for example, you are testing a large struct by parsing null bytes, but some field is unable to parse null bytes (like ``Numpy``). It substitutes the stream with another data for the purposes of parsing a particular field in a ``Struct``. + +Instead of data itself (bytes object) you can reference another stream (taken from the context like ``this._stream``) or use a Construct that parses into bytes (including those exposed via context like ``this._subcons.field``). + +:: + + >>> d = RestreamData(b"\x01", Int8ub) + >>> d.parse(b"") + 1 + >>> d.build(0) + b'' + +:: + + >>> d = RestreamData(NullTerminated(GreedyBytes), Int16ub) + >>> d.parse(b"\x01\x02\x00") + 258 + + >>> d = RestreamData(FixedSized(2, GreedyBytes), Int16ub) + >>> d.parse(b"\x01\x02\x00") + 258 + +:: + + d = Struct( + "numpy_data" / Computed(b"\x93NUMPY\x01\x00F\x00{'descr': '>> from construct.lib import * + >>> d = Transformed(Bytes(16), bytes2bits, 2, bits2bytes, 2) + >>> d.parse(b"\x00\x00") + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + +``Transformed`` can also process unknown amount of bytes, if that amount is entire data. Decode amount and encode amount are then set to None. + +:: + + >>> from construct.lib import * + >>> d = Transformed(GreedyBytes, bytes2bits, None, bits2bytes, None) + >>> d.parse(b"\x00\x00") + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + +:: + + # Bitwise implementation + try: + size = subcon.sizeof() + macro = Transformed(subcon, bytes2bits, size//8, bits2bytes, size//8) + except SizeofError: + macro = Restreamed(subcon, bytes2bits, 1, bits2bytes, 8, lambda n: n//8) + +``Restreamed`` is similar to ``Transformed``, but the main difference is that ``Transformed`` requires fixed-sized subcon because it reads all bytes in advance, processes them, and then feeds them to the subcon. ``Restreamed`` on the other hand, reads few bytes at a time, the minimum amount on each stream read. Since both are used mostly internally, there is no tutorial how to use it, other than this short code above. + + +Processing data with XOR and ROL +---------------------------------------- + +This chapter is mostly relevant to KaitaiStruct compiler implementation, as following constructs exist mostly for that purpose. + +Data can be transformed by XORing with a single or several bytes, and the key can also be taken from the context at runtime. Key can be of any positive length. + +>>> d = ProcessXor(0xf0 or b'\xf0', Int16ub) +>>> print(hex(d.parse(b"\x00\xff"))) +0xf00f +>>> d.sizeof() +2 + +Data can also be rotated (cycle shifted). Rotation is to the left on parsing, and to the right on building. Amount is in bits, and can be negative to make rotation right instead of left. Group size defines the size of chunks to which rotation is applied. + +>>> d = ProcessRotateLeft(4, 1, Int16ub) +>>> print(hex(d.parse(b'\x0f\xf0'))) +0xf00f +>>> d = ProcessRotateLeft(4, 2, Int16ub) +>>> print(hex(d.parse(b'\x0f\xf0'))) +0xff00 +>>> d.sizeof() +2 + +Note that the classes read entire stream till EOF so they should be wrapped in ``FixedSized``, ``Prefixed`` etc unless you actually want to process the entire remaining stream. + + +Compression and checksuming +---------------------------------------- + +Data can be easily checksummed. Note that checksum field does not need to be ``Bytes``, and lambda may return an integer or otherwise. + +:: + + import hashlib + d = Struct( + "fields" / RawCopy(Struct( + Padding(1000), + )), + "checksum" / Checksum(Bytes(64), + lambda data: hashlib.sha512(data).digest(), + this.fields.data), + ) + d.build(dict(fields=dict(value={}))) + +:: + + import hashlib + d = Struct( + "offset" / Tell, + "checksum" / Padding(64), + "fields" / RawCopy(Struct( + Padding(1000), + )), + "checksum" / Pointer(this.offset, Checksum(Bytes(64), + lambda data: hashlib.sha512(data).digest(), + this.fields.data)), + ) + d.build(dict(fields=dict(value={}))) + + +Data can also be easily compressed. Supported encodings include zlib/gzip/bzip2/lzma and entire codecs module. When parsing, entire stream is consumed. When building, it puts compressed bytes without marking the end. This construct should be used with ``Prefixed`` or entire stream. + +>>> d = Prefixed(VarInt, Compressed(GreedyBytes, "zlib")) +>>> d.build(bytes(100)) +b'\x0cx\x9cc`\xa0=\x00\x00\x00d\x00\x01' +>>> len(_) +13 + +LZ4 compression is also supported. It provides less compaction but does it at higher throughputs. This class is also supposed to be used with ``Prefixed`` class. + +>>> d = Prefixed(VarInt, CompressedLZ4(GreedyBytes)) +>>> d.build(bytes(100)) +b'"\x04"M\x18h@d\x00\x00\x00\x00\x00\x00\x00#\x0b\x00\x00\x00\x1f\x00\x01\x00KP\x00\x00\x00\x00\x00\x00\x00\x00\x00' +>>> len(_) +35 + + +Encryption and authentication +---------------------------------------------------- + +Subcons can also be easily encrypted and authenticated in a AEAD manner. Please note that the data sometimes needs to be aligned to a particular block size that depends on the encryption scheme used. + +:: + + >>> from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + >>> d = Struct( + ... "iv" / Default(Bytes(16), os.urandom(16)), + ... "enc_data" / EncryptedSym( + ... Aligned(16, + ... Struct( + ... "width" / Int16ul, + ... "height" / Int16ul, + ... ) + ... ), + ... lambda ctx: Cipher(algorithms.AES(ctx._.key), modes.CBC(ctx.iv)) + ... ) + ... ) + >>> key128 = b"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + >>> d.build({"enc_data": {"width": 5, "height": 4}}, key=key128) + b"o\x11i\x98~H\xc9\x1c\x17\x83\xf6|U:\x1a\x86+\x00\x89\xf7\x8e\xc3L\x04\t\xca\x8a\xc8\xc2\xfb'\xc8" + >>> d.parse(b"o\x11i\x98~H\xc9\x1c\x17\x83\xf6|U:\x1a\x86+\x00\x89\xf7\x8e\xc3L\x04\t\xca\x8a\xc8\xc2\xfb'\xc8", key=key128) + Container: + iv = b'o\x11i\x98~H\xc9\x1c\x17\x83\xf6|U:\x1a\x86' (total 16) + enc_data = Container: + width = 5 + height = 4 + diff --git a/gallery/__init__.py b/gallery/__init__.py new file mode 100644 index 000000000..e3c67104e --- /dev/null +++ b/gallery/__init__.py @@ -0,0 +1,2 @@ +from .pe32coff import pe32file +from .ut_index import UTIndex diff --git a/gallery/elf.py b/gallery/elf.py new file mode 100644 index 000000000..ce421251a --- /dev/null +++ b/gallery/elf.py @@ -0,0 +1,380 @@ +from construct import * + + +identifier = Struct( + "signature" / Const(b"\x7fELF"), + "elfclass" / Enum(Byte, ELFCLASSNONE=0, ELFCLASS32=1, ELFCLASS64=2), + "encoding" / Enum(Byte, ELFDATANONE=0, LSB=1, MSB=2), + "version" / Enum(Byte, EV_NONE=0, EV_CURRENT=1), + "osabi" + / Enum( + Byte, + ELFOSABI_NONE=0, + ELFOSABI_HPUX=1, + ELFOSABI_NETBSD=2, + ELFOSABI_GNU=3, + ELFOSABI_SOLARIS=6, + ELFOSABI_AIX=7, + ELFOSABI_IRIX=8, + ELFOSABI_FREEBSD=9, + ELFOSABI_TRU64=10, + ELFOSABI_MODESTO=11, + ELFOSABI_OPENBSD=12, + ELFOSABI_OPENVMS=13, + ELFOSABI_NSK=14, + ELFOSABI_AROS=15, + ELFOSABI_FENIXOS=16, + ELFOSABI_CLOUDABI=17, + ELFOSABI_OPENVOS=18, + ), + "abiversion" / Byte, + Padding(7), +) + + +def program_header(ELFInt32, ELFInt64, is64bit=True): + """Program header structure.""" + Addr = IfThenElse(is64bit, ELFInt64, ELFInt32) + + return Struct( + "p_type" + / Enum( + ELFInt32, + PT_NULL=0x00000000, + PT_LOAD=0x00000001, + PT_DYNAMIC=0x00000002, + PT_INTERP=0x00000003, + PT_NOTE=0x00000004, + PT_SHLIB=0x00000005, + PT_PHDR=0x00000006, + PT_TLS=0x00000007, + PT_LDOOS=0x60000000, + PT_HIOS=0x6FFFFFFF, + PT_LOPROC=0x70000000, + PT_HIPROC=0x7FFFFFFF, + ), + "flags_64" + / If( + is64bit, + Enum( + ELFInt32, R__=0x4, RW_=0x6, RWX=0x7, _WX=0x3, __X=0x1, _W_=0x2, R_X=0x5 + ), + ), + "offset" / Addr, + "virtual_address" / Addr, + "physical_address" / Addr, + "size_file" / Addr, + "size_mem" / Addr, + "flags_32" + / If( + (not is64bit), + Enum( + ELFInt32, R__=0x4, RW_=0x6, RWX=0x7, _WX=0x3, __X=0x1, _W_=0x2, R_X=0x5 + ), + ), + "alignment" / Addr, + ) + + +def section_header(ELFInt32, ELFInt64, is64bit=True): + """Section header structure.""" + Addr = IfThenElse(is64bit, ELFInt64, ELFInt32) + + return Struct( + "sh_name_offset" / ELFInt32, + "sh_name" + / Pointer(this._.strtab_data_offset + this.sh_name_offset, CString("utf-8")), + "sh_type" + / Enum( + ELFInt32, + SHT_NULL=0x0, + SHT_PROGBITS=0x1, + SHT_SYMTAB=0x2, + SHT_STRTAB=0x3, + SHT_RELA=0x4, + SHT_HASH=0x5, + SHT_DYNAMIC=0x6, + SHT_NOTE=0x7, + SHT_NOBITS=0x7, + SHT_REL=0x9, + SHT_SHLIB=0x0A, + SHT_DYNSYM=0x0B, + SHT_INIT_ARRAY=0x0E, + SHT_FINI_ARRAY=0x0F, + SHT_PREINIT_ARRAY=0x10, + SHT_GROUP=0x11, + SHT_SYMTAB_SHNDX=0x12, + SHT_NUM=0x13, + SHT_LOOS=0x60000000, + ), + "sh_flags" + / Enum( + Addr, + SHF_WRITE=0x1, + SHF_ALLOC=0x2, + SHF_EXECINSTR=0x4, + SHF_MERGE=0x10, + SHF_STRINGS=0x20, + SHF_INFO_LINK=0x40, + SHF_LINK_ORDER=0x80, + SHF_OS_NONCONFORMING=0x100, + SHF_GROUP=0x200, + SHF_TLS=0x400, + SHF_MASKOS=0x0FF00000, + SHF_MASKPROC=0xF0000000, + SHF_ORDERED=0x4000000, + SHF_EXCLUDE=0x8000000, + ), + "sh_addr" / Addr, + "sh_offset" / Addr, + "sh_size" / Addr, + "sh_link" / ELFInt32, + "sh_info" / ELFInt32, + "sh_addralign" / Addr, + "sh_entsize" / Addr, + ) + + +def body(ELFInt16, ELFInt32, ELFInt64, is64bit=True): + """ELF file structure.""" + p_header = program_header(ELFInt32, ELFInt64, is64bit) + s_header = section_header(ELFInt32, ELFInt64, is64bit) + + return Struct( + "type" + / Enum( + ELFInt16, + ET_NONE=0, + ET_REL=1, + ET_EXEC=2, + ET_DYN=3, + ET_CORE=4, + ET_LOOS=0xFE00, + ET_HIOS=0xFEFF, + ET_LOPROC=0xFF00, + ET_HIPROC=0xFFFF, + ), + "machine" + / Enum( + ELFInt16, + EM_NONE=0, + EM_M32=1, + EM_SPARC=2, + EM_386=3, + EM_68K=4, + EM_88K=5, + EM_860=7, + EM_MIPS=8, + EM_S370=9, + EM_MIPS_RS3_LE=10, + EM_PARISC=15, + EM_VPP500=17, + EM_SPARC32PLUS=18, + EM_960=19, + EM_PPC=20, + EM_PPC64=21, + EM_S390=22, + EM_V800=36, + EM_FR20=37, + EM_RH32=38, + EM_RCE=39, + EM_ARM=40, + EM_ALPHA=41, + EM_SH=42, + EM_SPARCV9=43, + EM_TRICORE=44, + EM_ARC=45, + EM_H8_300=46, + EM_H8_300H=47, + EM_H8S=48, + EM_H8_500=49, + EM_IA_64=50, + EM_MIPS_X=51, + EM_COLDFIRE=52, + EM_68HC12=53, + EM_MMA=54, + EM_PCP=55, + EM_NCPU=56, + EM_NDR1=57, + EM_STARCORE=58, + EM_ME16=59, + EM_ST100=60, + EM_TINYJ=61, + EM_X86_64=62, + EM_PDSP=63, + EM_PDP10=64, + EM_PDP11=65, + EM_FX66=66, + EM_ST9PLUS=67, + EM_ST7=68, + EM_68HC16=69, + EM_68HC11=70, + EM_68HC08=71, + EM_68HC05=72, + EM_SVX=73, + EM_ST19=74, + EM_VAX=75, + EM_CRIS=76, + EM_JAVELIN=77, + EM_FIREPATH=78, + EM_ZSP=79, + EM_MMIX=80, + EM_HUANY=81, + EM_PRISM=82, + EM_AVR=83, + EM_FR30=84, + EM_D10V=85, + EM_D30V=86, + EM_V850=87, + EM_M32R=88, + EM_MN10300=89, + EM_MN10200=90, + EM_PJ=91, + EM_OPENRISC=92, + EM_ARC_COMPACT=93, + EM_XTENSA=94, + EM_VIDEOCORE=95, + EM_TMM_GPP=96, + EM_NS32K=97, + EM_TPC=98, + EM_SNP1K=99, + EM_ST200=100, + EM_IP2K=101, + EM_MAX=102, + EM_CR=103, + EM_F2MC16=104, + EM_MSP430=105, + EM_BLACKFIN=106, + EM_SE_C33=107, + EM_SEP=108, + EM_ARCA=109, + EM_UNICORE=110, + EM_EXCESS=111, + EM_DXP=112, + EM_ALTERA_NIOS2=113, + EM_CRX=114, + EM_XGATE=115, + EM_C166=116, + EM_M16C=117, + EM_DSPIC30F=118, + EM_CE=119, + EM_M32C=120, + EM_TSK3000=131, + EM_RS08=132, + EM_SHARC=133, + EM_ECOG2=134, + EM_SCORE7=135, + EM_DSP24=136, + EM_VIDEOCORE3=137, + EM_LATTICEMICO32=138, + EM_SE_C17=139, + EM_TI_C6000=140, + EM_TI_C2000=141, + EM_TI_C5500=142, + EM_TI_ARP32=143, + EM_TI_PRU=144, + EM_MMDSP_PLUS=160, + EM_CYPRESS_M8C=161, + EM_R32C=162, + EM_TRIMEDIA=163, + EM_HEXAGON=164, + EM_8051=165, + EM_STXP7X=166, + EM_NDS32=167, + EM_ECOG1=168, + EM_MAXQ30=169, + EM_XIMO16=170, + EM_MANIK=171, + EM_CRAYNV2=172, + EM_RX=173, + EM_METAG=174, + EM_MCST_ELBRUS=175, + EM_ECOG16=176, + EM_CR16=177, + EM_ETPU=178, + EM_SLE9X=179, + EM_L10M=180, + EM_K10M=181, + EM_AARCH64=183, + EM_AVR32=185, + EM_STM8=186, + EM_TILE64=187, + EM_TILEPRO=188, + EM_CUDA=190, + EM_TILEGX=191, + EM_CLOUDSHIELD=192, + EM_COREA_1ST=193, + EM_COREA_2ND=194, + EM_ARC_COMPACT2=195, + EM_OPEN8=196, + EM_RL78=197, + EM_VIDEOCORE5=198, + EM_78KOR=199, + EM_56800EX=200, + EM_BA1=201, + EM_BA2=202, + EM_XCORE=203, + EM_MCHP_PIC=204, + EM_INTEL205=205, + EM_INTEL206=206, + EM_INTEL207=207, + EM_INTEL208=208, + EM_INTEL209=209, + EM_KM32=210, + EM_KMX32=211, + EM_KMX16=212, + EM_KMX8=213, + EM_KVARC=214, + EM_CDP=215, + EM_COGE=216, + EM_COOL=217, + EM_NORC=218, + EM_CSR_KALIMBA=219, + EM_Z80=220, + EM_VISIUM=221, + EM_FT32=222, + EM_MOXIE=223, + EM_AMDGPU=224, + EM_RISCV=243, + ), + "version" / Enum(ELFInt32, EV_NONE=0, EV_CURRENT=1), + "entry" / IfThenElse(is64bit, ELFInt64, ELFInt32), + "ph_offset" / IfThenElse(is64bit, ELFInt64, ELFInt32), + "sh_offset" / IfThenElse(is64bit, ELFInt64, ELFInt32), + "flags" / ELFInt32, + "header_size" / ELFInt16, + "ph_entry_size" / ELFInt16, + "ph_count" / ELFInt16, + "sh_entry_size" / ELFInt16, + "sh_count" / ELFInt16, + "strtab_section_index" / ELFInt16, + "strtab_data_offset" + / Pointer( + this.sh_offset + + this.strtab_section_index * this.sh_entry_size + + (24 if is64bit else 16), + ELFInt32, + ), + "program_table" / Pointer(this.ph_offset, p_header[this.ph_count]), + "sections" / Pointer(this.sh_offset, s_header[this.sh_count]), + ) + + +# ELF file structure +elf = Struct( + "identifier" / identifier, + "body" + / IfThenElse( + this.identifier.encoding == "LSB", + IfThenElse( + this.identifier.elfclass == "ELFCLASS64", + body(Int16ul, Int32ul, Int64ul, is64bit=True), + body(Int16ul, Int32ul, Int64ul, is64bit=False), + ), + IfThenElse( + this.identifier.elfclass == "ELFCLASS64", + body(Int16ub, Int32ub, Int64ub, is64bit=True), + body(Int16ub, Int32ub, Int64ub, is64bit=False), + ), + ), +) diff --git a/gallery/pe32coff.py b/gallery/pe32coff.py new file mode 100644 index 000000000..cac3501fd --- /dev/null +++ b/gallery/pe32coff.py @@ -0,0 +1,246 @@ +from construct import * + +docs = """ +PE/COFF format as used on Windows to store EXE DLL SYS files. This includes 64-bit and .NET code. + +Microsoft specifications: +https://msdn.microsoft.com/en-us/library/windows/desktop/ms680547(v=vs.85).aspx +https://msdn.microsoft.com/en-us/library/ms809762.aspx +Format tutorial breakdown at: +http://blog.dkbza.org/ +https://drive.google.com/file/d/0B3_wGJkuWLytQmc2di0wajB1Xzg/view +https://drive.google.com/file/d/0B3_wGJkuWLytbnIxY1J5WUs4MEk/view + +Authored by Arkadiusz Bulski, under same license. +""" + +msdosheader = Struct( + "signature" / Const(b"MZ"), + "lfanew" / Pointer(0x3c, Int16ul), +) + +coffheader = Struct( + "signature" / Const(b"PE\x00\x00"), + "machine" / Enum(Int16ul, + UNKNOWN = 0x0, + AM33 = 0x1d3, + AMD64 = 0x8664, + ARM = 0x1c0, + ARM64 = 0xaa64, + ARMNT = 0x1c4, + EBC = 0xebc, + I386 = 0x14c, + IA64 = 0x200, + M32R = 0x9041, + MIPS16 = 0x266, + MIPSFPU = 0x366, + MIPSFPU16 = 0x466, + POWERPC = 0x1f0, + POWERPCFP = 0x1f1, + R4000 = 0x166, + RISCV32 = 0x5032, + RISCV64 = 0x5064, + RISCV128 = 0x5128, + SH3 = 0x1a2, + SH3DSP = 0x1a3, + SH4 = 0x1a6, + SH5 = 0x1a8, + THUMB = 0x1c2, + WCEMIPSV2 = 0x169, + ), + "sections_count" / Int16ul, + "created" / Timestamp(Int32ul, 1., 1970), + "symbol_pointer" / Int32ul, #deprecated + "symbol_count" / Int32ul, #deprecated + "optionalheader_size" / Int16ul, + "characteristics" / FlagsEnum(Int16ul, + RELOCS_STRIPPED = 0x0001, + EXECUTABLE_IMAGE = 0x0002, + LINE_NUMS_STRIPPED = 0x0004, #deprecated + LOCAL_SYMS_STRIPPED = 0x0008, #deprecated + AGGRESSIVE_WS_TRIM = 0x0010, #deprecated + LARGE_ADDRESS_AWARE = 0x0020, + RESERVED = 0x0040, #reserved + BYTES_REVERSED_LO = 0x0080, #deprecated + MACHINE_32BIT = 0x0100, + DEBUG_STRIPPED = 0x0200, + REMOVABLE_RUN_FROM_SWAP = 0x0400, + SYSTEM = 0x1000, + DLL = 0x2000, + UNIPROCESSOR_ONLY = 0x4000, + BIG_ENDIAN_MACHINE = 0x8000, #deprecated + ), +) + +plusfield = IfThenElse(this.signature == "PE32plus", Int64ul, Int32ul) + +entriesnames = { + 0 : 'export_table', + 1 : 'import_table', + 2 : 'resource_table', + 3 : 'exception_table', + 4 : 'certificate_table', + 5 : 'base_relocation_table', + 6 : 'debug', + 7 : 'architecture', + 8 : 'global_ptr', + 9 : 'tls_table', + 10 : 'load_config_table', + 11 : 'bound_import', + 12 : 'import_address_table', + 13 : 'delay_import_descriptor', + 14 : 'clr_runtime_header', + 15 : 'reserved', +} + +datadirectory = Struct( + "name" / Computed(lambda this: entriesnames[this._._index]), + "virtualaddress" / Int32ul, + "size" / Int32ul, +) + +optionalheader = Struct( + "signature" / Enum(Int16ul, + PE32 = 0x10b, + PE32plus = 0x20b, + ROMIMAGE = 0x107, + ), + "linker_version" / Int8ul[2], + "size_code" / Int32ul, + "size_initialized_data" / Int32ul, + "size_uninitialized_data" / Int32ul, + "entrypoint" / Int32ul, + "base_code" / Int32ul, + "base_data" / If(this.signature == "PE32", Int32ul), + "image_base" / plusfield, + "section_alignment" / Int32ul, + "file_alignment" / Int32ul, + "os_version" / Int16ul[2], + "image_version" / Int16ul[2], + "subsystem_version" / Int16ul[2], + "win32versionvalue" / Int32ul, #deprecated + "image_size" / Int32ul, + "headers_size" / Int32ul, + "checksum" / Int32ul, + "subsystem" / Enum(Int16ul, + UNKNOWN = 0, + NATIVE = 1, + WINDOWS_GUI = 2, + WINDOWS_CUI = 3, + OS2_CUI = 5, + POSIX_CUI = 7, + WINDOWS_NATIVE = 8, + WINDOWS_CE_GUI = 9, + EFI_APPLICATION = 10, + EFI_BOOT_SERVICE_DRIVER = 11, + EFI_RUNTIME_DRIVER = 12, + EFI_ROM = 13, + XBOX = 14, + WINDOWS_BOOT_APPLICATION = 16, + ), + "dll_characteristics" / FlagsEnum(Int16ul, + HIGH_ENTROPY_VA = 0x0020, + DYNAMIC_BASE = 0x0040, + FORCE_INTEGRITY = 0x0080, + NX_COMPAT = 0x0100, + NO_ISOLATION = 0x0200, + NO_SEH = 0x0400, + NO_BIND = 0x0800, + APPCONTAINER = 0x1000, + WDM_DRIVER = 0x2000, + GUARD_CF = 0x4000, + TERMINAL_SERVER_AWARE = 0x8000, + ), + "stack_reserve" / plusfield, + "stack_commit" / plusfield, + "heap_reserve" / plusfield, + "heap_commit" / plusfield, + "loader_flags" / Int32ul, #reserved + "datadirectories_count" / Int32ul, + "datadirectories" / Array(this.datadirectories_count, + datadirectory), +) + +section = Struct( + "name" / PaddedString(8, "utf8"), + "virtual_size" / Int32ul, + "virtual_address" / Int32ul, + "rawdata_size" / Int32ul, + "rawdata_pointer" / Int32ul, + "relocations_pointer" / Int32ul, + "linenumbers_pointer" / Int32ul, + "relocations_count" / Int16ul, + "linenumbers_count" / Int16ul, + "characteristics" / FlagsEnum(Int32ul, + TYPE_REG = 0x00000000, + TYPE_DSECT = 0x00000001, + TYPE_NOLOAD = 0x00000002, + TYPE_GROUP = 0x00000004, + TYPE_NO_PAD = 0x00000008, + TYPE_COPY = 0x00000010, + CNT_CODE = 0x00000020, + CNT_INITIALIZED_DATA = 0x00000040, + CNT_UNINITIALIZED_DATA = 0x00000080, + LNK_OTHER = 0x00000100, + LNK_INFO = 0x00000200, + TYPE_OVER = 0x00000400, + LNK_REMOVE = 0x00000800, + LNK_COMDAT = 0x00001000, + MEM_FARDATA = 0x00008000, + MEM_PURGEABLE = 0x00020000, + MEM_16BIT = 0x00020000, + MEM_LOCKED = 0x00040000, + MEM_PRELOAD = 0x00080000, + ALIGN_1BYTES = 0x00100000, + ALIGN_2BYTES = 0x00200000, + ALIGN_4BYTES = 0x00300000, + ALIGN_8BYTES = 0x00400000, + ALIGN_16BYTES = 0x00500000, + ALIGN_32BYTES = 0x00600000, + ALIGN_64BYTES = 0x00700000, + ALIGN_128BYTES = 0x00800000, + ALIGN_256BYTES = 0x00900000, + ALIGN_512BYTES = 0x00A00000, + ALIGN_1024BYTES = 0x00B00000, + ALIGN_2048BYTES = 0x00C00000, + ALIGN_4096BYTES = 0x00D00000, + ALIGN_8192BYTES = 0x00E00000, + LNK_NRELOC_OVFL = 0x01000000, + MEM_DISCARDABLE = 0x02000000, + MEM_NOT_CACHED = 0x04000000, + MEM_NOT_PAGED = 0x08000000, + MEM_SHARED = 0x10000000, + MEM_EXECUTE = 0x20000000, + MEM_READ = 0x40000000, + MEM_WRITE = 0x80000000, + ), + "rawdata" / Pointer(this.rawdata_pointer, + Bytes(lambda this: this.rawdata_size if this.rawdata_pointer else 0)), + "relocations" / Pointer(this.relocations_pointer, + Array(this.relocations_count, Struct( + "virtualaddress" / Int32ul, + "symboltable_index" / Int32ul, + "type" / Int16ul * "complicated platform-dependant Enum", + )) + ), + "linenumbers" / Pointer(this.linenumbers_pointer, + Array(this.linenumbers_count, Struct( + "_type" / Int32ul, + "_linenumber" / Int16ul, + "is_symboltableindex" / Computed(this._linenumber == 0), + "is_linenumber" / Computed(this._linenumber > 0), + "symboltableindex" / If(this.is_symboltableindex, Computed(this._type)), + "linenumber" / If(this.is_linenumber, Computed(this._linenumber)), + "virtualaddress" / If(this.is_linenumber, Computed(this._type)), + )) + ), +) + +pe32file = docs * Struct( + "msdosheader" / msdosheader, + Seek(this.msdosheader.lfanew), + "coffheader" / coffheader, + "optionalheader" / If(this.coffheader.optionalheader_size > 0, optionalheader), + "sections_count" / Computed(this.coffheader.sections_count), + "sections" / Array(this.sections_count, section), +) diff --git a/gallery/ut_index.py b/gallery/ut_index.py new file mode 100644 index 000000000..435732b8f --- /dev/null +++ b/gallery/ut_index.py @@ -0,0 +1,72 @@ +from construct import * +from construct.lib import * + + +class UTIndex(Construct): + """ + Format for "Index" objects in Unreal Tournament 1999 packages. + Index objects are variable length signed integers with the following structure: + + +------------------------------------+-------------------------+--------------+ + | Byte 0 | Bytes 1-3 | Byte 4 | + +----------+----------+--------------+----------+--------------+--------------+ + | Sign Bit | More Bit | Data Bits[6] | More Bit | Data Bits[7] | Data Bits[8] | + +----------+----------+--------------+----------+--------------+--------------+ + + If the "More" bit is 0 in any byte, that's the end of the Index. Otherwise, + keep going. There cannot be more than 5 bytes in an Index so Byte 4 doesn't + have a "More" bit. + """ + lengths = {0: 6, 1: 7, 2: 7, 3: 7, 4: 8} + negative_bit = 0x80 + + @staticmethod + def _get_data_mask(length): + return (0xFF ^ (0xFF << length)) & 0xFF + + @staticmethod + def _get_more_bit(length): + return 1 << length + + def _parse(self, stream, context, path): + result = 0 + sign = 1 + i = 0 + depth = 0 + while True: + length = self.lengths[i] + bits = byte2int(stream_read(stream, 1, path)) + mask = self._get_data_mask(length) + data = bits & mask + more = self._get_more_bit(length) & bits + if (i == 0) and (self.negative_bit & bits): + sign = -1 + result |= data << depth + if not more: + break + i += 1 + depth += length + return sign * result + + def _build(self, obj, stream, context, path): + if not isinstance(obj, int): + raise IntegerError("Value is not an integer") + to_write = obj + for i in range(5): + byte = 0 + length = self.lengths[i] + if i == 0: + negative = obj < 0 + byte |= self.negative_bit * negative + if negative: + to_write *= -1 + mask = self._get_data_mask(length) + byte |= to_write & mask + to_write >>= length + more_bit = (to_write > 0) and self._get_more_bit(length) + byte |= more_bit + byte &= 0xFF + stream_write(stream, int2byte(byte), 1, path) + if not more_bit: + break + return obj diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..f5a52af71 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +pythonpath = . + diff --git a/setup.py b/setup.py index f6c3d414b..5983a98c8 100755 --- a/setup.py +++ b/setup.py @@ -1,17 +1,69 @@ #!/usr/bin/env python - -from setuptools import find_packages, setup +from setuptools import setup +from construct.version import version_string setup( - name="construct", - version="2.06", - packages=find_packages(), - license="Public Domain", - description="a powerful declarative parser for binary data", - long_description=open("README.rst").read(), - url="https://github.com/MostAwesomeDude/construct", - author="Tomer Filiba", - author_email="tomerfiliba at gmail dot com", - maintainer="Corbin Simpson", - maintainer_email="MostAwesomeDude@gmail.com", + name = "construct", + version = version_string, + packages = [ + 'construct', + 'construct.lib', + ], + license = "MIT", + description = "A powerful declarative symmetric parser/builder for binary data", + long_description = open("README.rst").read(), + platforms = ["POSIX", "Windows"], + url = "http://construct.readthedocs.org", + project_urls = { + "Source": "https://github.com/construct/construct", + "Documentation": "https://construct.readthedocs.io/en/latest/", + "Issues": "https://github.com/construct/construct/issues", + }, + author = "Arkadiusz Bulski, Tomer Filiba, Corbin Simpson", + author_email = "arek.bulski@gmail.com, tomerfiliba@gmail.com, MostAwesomeDude@gmail.com", + python_requires = ">=3.8", + install_requires = [], + extras_require = { + "extras": [ + "numpy", + "arrow", + "ruamel.yaml", + "cloudpickle", + "lz4", + "cryptography", + ], + }, + keywords = [ + "construct", + "kaitai", + "declarative", + "data structure", + "struct", + "binary", + "symmetric", + "parser", + "builder", + "parsing", + "building", + "pack", + "unpack", + "packer", + "unpacker", + ], + classifiers = [ + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved :: MIT License", + "Intended Audience :: Developers", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Software Development :: Build Tools", + "Topic :: Software Development :: Code Generators", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + ], ) diff --git a/construct/formats/__init__.py b/tests/__init__.py similarity index 100% rename from construct/formats/__init__.py rename to tests/__init__.py diff --git a/tests/declarativeunittest.py b/tests/declarativeunittest.py new file mode 100644 index 000000000..81fdd1672 --- /dev/null +++ b/tests/declarativeunittest.py @@ -0,0 +1,70 @@ +import pytest + +xfail = pytest.mark.xfail +skip = pytest.mark.skip +skipif = pytest.mark.skipif + +import os, math, random, collections, itertools, io, hashlib, binascii + +from construct import * +from construct.lib import * + +if not ONWINDOWS: + devzero = open("/dev/zero", "rb") + +ident = lambda x: x + +def raises(func, *args, **kw): + try: + return func(*args, **kw) + except Exception as e: + return e.__class__ + +def common(format, datasample, objsample, sizesample=SizeofError, **kw): + # following are implied (re-parse and re-build) + # assert format.parse(format.build(obj)) == obj + # assert format.build(format.parse(data)) == data + obj = format.parse(datasample, **kw) + assert obj == objsample + data = format.build(objsample, **kw) + assert data == datasample + + if isinstance(sizesample, int): + size = format.sizeof(**kw) + assert size == sizesample + else: + size = raises(format.sizeof, **kw) + assert size == sizesample + + # attemps to compile, ignores if compilation fails + # following was added to test compiling functionality + # and implies: format.parse(data) == cformat.parse(data) + # and implies: format.build(obj) == cformat.build(obj) + try: + cformat = format.compile() + except Exception: + pass + else: + obj = cformat.parse(datasample, **kw) + assert obj == objsample + data = cformat.build(objsample, **kw) + assert data == datasample + +def commonhex(format, hexdata): + commonbytes(format, binascii.unhexlify(hexdata)) + +def commondumpdeprecated(format, filename): + filename = "tests/deprecated_gallery/blobs/" + filename + with open(filename,'rb') as f: + data = f.read() + commonbytes(format, data) + +def commondump(format, filename): + filename = "tests/gallery/blobs/" + filename + with open(filename,'rb') as f: + data = f.read() + commonbytes(format, data) + +def commonbytes(format, data): + obj = format.parse(data) + data2 = format.build(obj) diff --git a/construct/tests/NOTEPAD.EXE b/tests/deprecated_gallery/blobs/NOTEPAD.EXE similarity index 100% rename from construct/tests/NOTEPAD.EXE rename to tests/deprecated_gallery/blobs/NOTEPAD.EXE diff --git a/construct/tests/bitmap1.bmp b/tests/deprecated_gallery/blobs/bitmap1.bmp similarity index 100% rename from construct/tests/bitmap1.bmp rename to tests/deprecated_gallery/blobs/bitmap1.bmp diff --git a/construct/tests/bitmap24.bmp b/tests/deprecated_gallery/blobs/bitmap24.bmp similarity index 100% rename from construct/tests/bitmap24.bmp rename to tests/deprecated_gallery/blobs/bitmap24.bmp diff --git a/construct/tests/bitmap4.bmp b/tests/deprecated_gallery/blobs/bitmap4.bmp similarity index 100% rename from construct/tests/bitmap4.bmp rename to tests/deprecated_gallery/blobs/bitmap4.bmp diff --git a/construct/tests/bitmap8.bmp b/tests/deprecated_gallery/blobs/bitmap8.bmp similarity index 100% rename from construct/tests/bitmap8.bmp rename to tests/deprecated_gallery/blobs/bitmap8.bmp diff --git a/construct/tests/cap2.cap b/tests/deprecated_gallery/blobs/cap2.cap similarity index 100% rename from construct/tests/cap2.cap rename to tests/deprecated_gallery/blobs/cap2.cap diff --git a/construct/tests/_ctypes_test.so b/tests/deprecated_gallery/blobs/ctypes.so similarity index 100% rename from construct/tests/_ctypes_test.so rename to tests/deprecated_gallery/blobs/ctypes.so diff --git a/construct/tests/emf1.emf b/tests/deprecated_gallery/blobs/emf1.emf similarity index 100% rename from construct/tests/emf1.emf rename to tests/deprecated_gallery/blobs/emf1.emf diff --git a/tests/deprecated_gallery/blobs/mbr1 b/tests/deprecated_gallery/blobs/mbr1 new file mode 100644 index 000000000..421d97bdb Binary files /dev/null and b/tests/deprecated_gallery/blobs/mbr1 differ diff --git a/construct/tests/python.exe b/tests/deprecated_gallery/blobs/python.exe similarity index 100% rename from construct/tests/python.exe rename to tests/deprecated_gallery/blobs/python.exe diff --git a/tests/deprecated_gallery/blobs/sample.gif b/tests/deprecated_gallery/blobs/sample.gif new file mode 100644 index 000000000..ade98a624 Binary files /dev/null and b/tests/deprecated_gallery/blobs/sample.gif differ diff --git a/tests/deprecated_gallery/blobs/sample.jpg b/tests/deprecated_gallery/blobs/sample.jpg new file mode 100644 index 000000000..da4439595 Binary files /dev/null and b/tests/deprecated_gallery/blobs/sample.jpg differ diff --git a/tests/deprecated_gallery/blobs/sample.png b/tests/deprecated_gallery/blobs/sample.png new file mode 100644 index 000000000..60d4a2c53 Binary files /dev/null and b/tests/deprecated_gallery/blobs/sample.png differ diff --git a/tests/deprecated_gallery/blobs/snoop1 b/tests/deprecated_gallery/blobs/snoop1 new file mode 100644 index 000000000..0d3b15607 Binary files /dev/null and b/tests/deprecated_gallery/blobs/snoop1 differ diff --git a/construct/tests/sqlite3.dll b/tests/deprecated_gallery/blobs/sqlite3.dll similarity index 100% rename from construct/tests/sqlite3.dll rename to tests/deprecated_gallery/blobs/sqlite3.dll diff --git a/construct/tests/wmf1.wmf b/tests/deprecated_gallery/blobs/wmf1.wmf similarity index 100% rename from construct/tests/wmf1.wmf rename to tests/deprecated_gallery/blobs/wmf1.wmf diff --git a/tests/deprecated_gallery/test_formats.py b/tests/deprecated_gallery/test_formats.py new file mode 100644 index 000000000..82db841e1 --- /dev/null +++ b/tests/deprecated_gallery/test_formats.py @@ -0,0 +1,41 @@ +from tests.declarativeunittest import * +from construct import * +from construct.lib import * + +from deprecated_gallery import * + + +def test_png(): + commondumpdeprecated(png_file, "sample.png") + +def test_emf(): + commondumpdeprecated(emf_file, "emf1.emf") + +def test_bitmap(): + commondumpdeprecated(bitmap_file, "bitmap1.bmp") + commondumpdeprecated(bitmap_file, "bitmap4.bmp") + commondumpdeprecated(bitmap_file, "bitmap8.bmp") + commondumpdeprecated(bitmap_file, "bitmap24.bmp") + +def test_wmf(): + commondumpdeprecated(wmf_file, "wmf1.wmf") + +def test_gif(): + commondumpdeprecated(gif_file, "sample.gif") + +def test_mbr(): + commondumpdeprecated(mbr_format, "mbr1") + +def test_cap(): + commondumpdeprecated(cap_file, "cap2.cap") + +def test_snoop(): + commondumpdeprecated(snoop_file, "snoop1") + +def test_pe32(): + commondumpdeprecated(pe32_file, "python.exe") + commondumpdeprecated(pe32_file, "NOTEPAD.EXE") + commondumpdeprecated(pe32_file, "sqlite3.dll") + +def test_elf32(): + commondumpdeprecated(elf32_file, "ctypes.so") diff --git a/tests/deprecated_gallery/test_protocols.py b/tests/deprecated_gallery/test_protocols.py new file mode 100644 index 000000000..f96e9b2bf --- /dev/null +++ b/tests/deprecated_gallery/test_protocols.py @@ -0,0 +1,54 @@ +from tests.declarativeunittest import * +from construct import * +from construct.lib import * + +from deprecated_gallery import * + + +def test_ethernet(): + common(MacAddress, b"abcdef", "61-62-63-64-65-66", 6) + commonhex(ethernet_header, b"0011508c283c0002e34260090800") + +def test_arp(): + commonhex(arp_header, b"00010800060400010002e3426009c0a80204000000000000c0a80201") + commonhex(arp_header, b"00010800060400020011508c283cc0a802010002e3426009c0a80204") + +def test_ip4(): + common(IpAddress, b"\x7f\x80\x81\x82", "127.128.129.130", 4) + commonhex(ipv4_header, b"4500003ca0e3000080116185c0a80205d474a126") + +def test_ip6(): + common(Ipv6Address, b"1234567890123456", "31:32:33:34:35:36:37:38:39:30:31:32:33:34:35:36", 16) + commonhex(ipv6_header, b"6ff00000010206803031323334353637383941424344454646454443424139383736353433323130") + +def test_icmp(): + commonhex(icmp_header, b"0800305c02001b006162636465666768696a6b6c6d6e6f7071727374757677616263646566676869") + commonhex(icmp_header, b"0000385c02001b006162636465666768696a6b6c6d6e6f7071727374757677616263646566676869") + commonhex(icmp_header, b"0301000000001122aabbccdd0102030405060708") + +def test_igmp(): + commonhex(igmpv2_header, b"1600FA01EFFFFFFD") + +def test_dhcp4(): + commonhex(dhcp4_header, b"0101060167c05f5a00000000"+b"0102030405060708090a0b0c"+b"0d0e0f10"+b"DEADBEEFBEEF"+b"000000000000000000000000000000000000000000000000000000"+b"000000000000000000000000000000000000000000000000000000"+b"000000000000000000000000000000000000000000000000000000"+b"000000000000000000000000000000000000000000000000000000"+b"000000000000000000000000000000000000000000000000000000"+b"000000000000000000000000000000000000000000000000000000"+b"000000000000000000000000000000000000000000000000000000"+b"00000000000000000000000000"+b"63825363"+b"3501083d0701DEADBEEFBEEF0c04417375733c084d53465420352e"+b"30370d010f03062c2e2f1f2179f92bfc52210117566c616e333338"+b"382b45746865726e6574312f302f32340206f8f0827348f9") + +def test_dhcp6(): + commonbytes(dhcp6_message, b"\x03\x11\x22\x33\x00\x17\x00\x03ABC\x00\x05\x00\x05HELLO") + commonbytes(dhcp6_message, b"\x0c\x040123456789abcdef0123456789abcdef\x00\x09\x00\x0bhello world\x00\x01\x00\x00") + +def test_tcp(): + commonhex(tcp_header, b"0db5005062303fb21836e9e650184470c9bc0000") + +def test_udp(): + commonhex(udp_header, b"0bcc003500280689") + +def test_dns(): + commonhex(dns, b"2624010000010000000000000377777706676f6f676c6503636f6d0000010001") + +@xfail(reason="unknown problem, fails during parsing") +def test_dns_part2(): + commonhex(dns, b"2624818000010005000600060377777706676f6f676c6503636f6d0000010001c00c0005000100089065000803777777016cc010c02c0001000100000004000440e9b768c02c0001000100000004000440e9b793c02c0001000100000004000440e9b763c02c0001000100000004000440e9b767c030000200010000a88600040163c030c030000200010000a88600040164c030c030000200010000a88600040165c030c030000200010000a88600040167c030c030000200010000a88600040161c030c030000200010000a88600040162c030c0c00001000100011d0c0004d8ef3509c0d0000100010000ca7c000440e9b309c080000100010000c4c5000440e9a109c0900001000100004391000440e9b709c0a0000100010000ca7c000442660b09c0b00001000100000266000440e9a709") + +def test_ip_stack(): + commonhex(ip_stack, b"0011508c283c001150886b570800450001e971474000800684e4c0a80202525eedda112a0050d98ec61d54fe977d501844705dcc0000474554202f20485454502f312e310d0a486f73743a207777772e707974686f6e2e6f72670d0a557365722d4167656e743a204d6f7a696c6c612f352e30202857696e646f77733b20553b2057696e646f7773204e5420352e313b20656e2d55533b2072763a312e382e302e3129204765636b6f2f32303036303131312046697265666f782f312e352e302e310d0a4163636570743a20746578742f786d6c2c6170706c69636174696f6e2f786d6c2c6170706c69636174696f6e2f7868746d6c2b786d6c2c746578742f68746d6c3b713d302e392c746578742f706c61696e3b713d302e382c696d6167652f706e672c2a2f2a3b713d302e350d0a4163636570742d4c616e67756167653a20656e2d75732c656e3b713d302e350d0a4163636570742d456e636f64696e673a20677a69702c6465666c6174650d0a4163636570742d436861727365743a2049534f2d383835392d312c7574662d383b713d302e372c2a3b713d302e370d0a4b6565702d416c6976653a203330300d0a436f6e6e656374696f6e3a206b6565702d616c6976650d0a507261676d613a206e6f2d63616368650d0a43616368652d436f6e74726f6c3a206e6f2d63616368650d0a0d0a") + commonhex(ip_stack, b"0002e3426009001150f2c280080045900598fd22000036063291d149baeec0a8023c00500cc33b8aa7dcc4e588065010ffffcecd0000485454502f312e3120323030204f4b0d0a446174653a204672692c2031352044656320323030362032313a32363a323520474d540d0a5033503a20706f6c6963797265663d22687474703a2f2f7033702e7961686f6f2e636f6d2f7733632f7033702e786d6c222c2043503d2243414f2044535020434f52204355522041444d204445562054414920505341205053442049564169204956446920434f4e692054454c6f204f545069204f55522044454c692053414d69204f54526920554e5269205055426920494e4420504859204f4e4c20554e49205055522046494e20434f4d204e415620494e542044454d20434e542053544120504f4c204845412050524520474f56220d0a43616368652d436f6e74726f6c3a20707269766174650d0a566172793a20557365722d4167656e740d0a5365742d436f6f6b69653a20443d5f796c683d58336f444d54466b64476c6f5a7a567842463954417a49334d5459784e446b4563476c6b417a45784e6a59794d5463314e5463456447567a64414d7742485274634777446157356b5a58677462412d2d3b20706174683d2f3b20646f6d61696e3d2e7961686f6f2e636f6d0d0a436f6e6e656374696f6e3a20636c6f73650d0a5472616e736665722d456e636f64696e673a206368756e6b65640d0a436f6e74656e742d547970653a20746578742f68746d6c3b20636861727365743d7574662d380d0a436f6e74656e742d456e636f64696e673a20677a69700d0a0d0a366263382020200d0a1f8b0800000000000003dcbd6977db38b200faf9fa9cf90f88326dd9b1169212b5d891739cd84ed2936d1277a7d3cbf1a1484a624c910c4979893bbfec7d7bbfec556121012eb29d65e6be7be7762c9240a1502854150a85c2c37b87af9f9c7c7873449e9dbc7c41defcf2f8c5f327a4d1ee76dff79e74bb872787ec43bfa3e9ddeed1ab06692cd234daed762f2e2e3a17bd4e18cfbb276fbb8b74e9f7bb491a7b76da7152a7b1bff110dfed3f5cb896030f4b37b508566dbb9f56def9a4f1240c523748db275791db20367b9a3452f732a5d0f688bdb0e2c44d27bf9c1cb7470830b1632f4a490a3578c18fd6b9c5dec2f7732b2641783109dc0b7268a56e2bd527a931497b93b43f49cd493a98a4c3493a9aa4e349aa6bf01f7cd78d89d6b2ed49b3d9baf223f8b307b5004a67eea627ded2dddadedb78d8656de428f856305f5973779223b0fff05ebbbde1db67082a499289ae0f06863e1c8f4c0639eaccbdd9a3547abf798a1f0ec6c73fafd2e4f151ffd5f1c9e2f9e37ff74e74fbddd941b375eadb0942b3e3d5723a69f6060373a6cff49e6df586dac8b11c4d1f1afd81319b0df45e6fd4925a6cee6db4dbfb19e225bc1b12e56a098aed9309715c3b74dc5fde3e7f122ea3308061dac22f4018a4f8878367af5f4f2ebcc001a2d187bfffbefeb2477f75026be9269165bb93d92ab0532f0cb68264fbda9b6ddd0b92bfff867f3abe1bccd3c5f675eca6ab3820c1caf7f7be20e05363029f93c8f7d2ad46a7b1bd475ff62614f2de2c8cb7f08537d93a35fed0fe9a4c1af44363fb91beabed790f4f0d0e7a6f67c7dbbe3eedfd01e5bcbffe9a64bf289e00307bb1f7852371dadb133df0c3798efba9d93a1db44e87dbd7d8b4cf50e95c780e304be745389fbbf11ef4cddfdcf4b162d629fa94d7defbe2fa892b3ece2c78d8fb221a84517003476a73dc3ad535d6e22c7fbd0db8cf3a511ca6211d3e28933fed9d8ea54f381f66c0c7f2cb0e4c3898ad2b3b0de3c9e918bf25abc88d6ddf02d65581418f94174addc9ebe94717e67ce557207b6d45f892773ae393adc62af57c18ecd27b46e5aa2feea5b58c7c173e6d94be1d3bd5afa3fcf571d409ded9b1eb06ef3d275d00c36f25f4916c6ed2a911cef88b0e4c0ecfa7a5b627936600b3d28d9bdbe411") diff --git a/tests/gallery/blobs/SharpZipLib0860-dotnet20.dll b/tests/gallery/blobs/SharpZipLib0860-dotnet20.dll new file mode 100644 index 000000000..fe643ebc6 Binary files /dev/null and b/tests/gallery/blobs/SharpZipLib0860-dotnet20.dll differ diff --git a/tests/gallery/blobs/python37-win32.exe b/tests/gallery/blobs/python37-win32.exe new file mode 100644 index 000000000..122beae51 Binary files /dev/null and b/tests/gallery/blobs/python37-win32.exe differ diff --git a/tests/gallery/blobs/python37-win64.exe b/tests/gallery/blobs/python37-win64.exe new file mode 100644 index 000000000..87a02a79e Binary files /dev/null and b/tests/gallery/blobs/python37-win64.exe differ diff --git a/tests/gallery/blobs/sqlite3.dll b/tests/gallery/blobs/sqlite3.dll new file mode 100644 index 000000000..c99ef993c Binary files /dev/null and b/tests/gallery/blobs/sqlite3.dll differ diff --git a/tests/gallery/test_gallery.py b/tests/gallery/test_gallery.py new file mode 100644 index 000000000..8edac1ed0 --- /dev/null +++ b/tests/gallery/test_gallery.py @@ -0,0 +1,37 @@ +from tests.declarativeunittest import * +from construct import * +from construct.lib import * + +from gallery import pe32file, UTIndex + + +def test_pe32(): + commondump(pe32file, "python37-win32.exe") + commondump(pe32file, "python37-win64.exe") + commondump(pe32file, "SharpZipLib0860-dotnet20.dll") + commondump(pe32file, "sqlite3.dll") + +def test_utindex(): + d = UTIndex() + test_data = [ + [0x0f], # 0x0f + [0x4f, 0x40], # (0x40 << 6) + 0x0f = 0x100f + [0x8f], # -0x0f + [0xcf, 0x40], # -((0x40 << 6) + 0x0f) = -0x100f + [0x4f, 0x80, 0x40], # (0x40 << 13) + 0x0f = 0x8000f + [0x4f, 0x80, 0x80, 0x40], # (0x40 << 20) + 0x0f = 0x400000f + [0x4f, 0x80, 0x80, 0x80, 0x8f], # 0x8f << 27 + 0x0f = 0x47800000f + ] + expected_values = [ + 0x0f, + 0x100f, + -0x0f, + -0x100f, + 0x8000f, + 0x400000f, + 0x47800000f, + ] + for test, ev in zip(test_data, expected_values): + assert d.parse(bytes(test)) == ev + assert d.build(ev) == bytes(test) + assert raises(d.sizeof) == SizeofError diff --git a/tests/kaitai_comparisons/comparison_1_construct.py b/tests/kaitai_comparisons/comparison_1_construct.py new file mode 100644 index 000000000..a71296310 --- /dev/null +++ b/tests/kaitai_comparisons/comparison_1_construct.py @@ -0,0 +1,31 @@ +from construct import * + +d = Struct( + "count" / Int32ul, + "items" / Array(this.count, Struct( + "num1" / Int8ul, + "num2" / Int24ul, + "flags" / BitStruct( + "bool1" / Flag, + "num4" / BitsInteger(3), + Padding(4), + ), + "fixedarray1" / Array(3, Int8ul), + "name1" / CString("utf8"), + "name2" / PascalString(Int8ul, "utf8"), + )), +) + + +data = d.build(dict(count=1000, items=[dict(num1=0, num2=0, flags=dict(bool1=True, num4=0), fixedarray1=[0,0,0], name1=u"...", name2=u"...") for i in range(1000)])) +with open("blob","wb") as f: + f.write(data) + +# from timeit import timeit +# d.parse(data) +# parsetime = timeit(lambda: d.parse(data), number=1000)/1000 +# print("Timeit measurements:") +# print("parsing: {:.10f} sec/call".format(parsetime)) + +d = d.compile() +print(d.benchmark(data)) diff --git a/tests/kaitai_comparisons/comparison_1_kaitai.ksy b/tests/kaitai_comparisons/comparison_1_kaitai.ksy new file mode 100644 index 000000000..cbf377fed --- /dev/null +++ b/tests/kaitai_comparisons/comparison_1_kaitai.ksy @@ -0,0 +1,45 @@ +meta: + id: comparison_1_kaitai + encoding: utf-8 + endian: le +seq: + - id: count + type: u4 + - id: items + repeat: expr + repeat-expr: count + type: item +types: + item: + seq: + - id: num1 + type: u1 + - id: num2_lo + type: u2 + - id: num2_hi + type: u1 + - id: flags + type: flags + - id: fixedarray1 + repeat: expr + repeat-expr: 3 + type: u1 + - id: name1 + type: strz + - id: len_name2 + type: u1 + - id: name2 + type: str + size: len_name2 + instances: + num2: + value: 'num2_hi << 16 | num2_lo' + types: + flags: + seq: + - id: bool1 + type: b1 + - id: num4 + type: b3 + - id: padding + type: b4 diff --git a/tests/kaitai_comparisons/comparison_1_kaitai.py b/tests/kaitai_comparisons/comparison_1_kaitai.py new file mode 100644 index 000000000..ed97262f6 --- /dev/null +++ b/tests/kaitai_comparisons/comparison_1_kaitai.py @@ -0,0 +1,74 @@ +# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild + +from pkg_resources import parse_version +from kaitaistruct import __version__ as ks_version, KaitaiStruct, KaitaiStream, BytesIO + + +if parse_version(ks_version) < parse_version('0.7'): + raise Exception("Incompatible Kaitai Struct Python API: 0.7 or later is required, but you have %s" % (ks_version)) + +class Comparison1Kaitai(KaitaiStruct): + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.count = self._io.read_u4le() + self.items = [None] * (self.count) + for i in range(self.count): + self.items[i] = self._root.Item(self._io, self, self._root) + + + class Item(KaitaiStruct): + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.num1 = self._io.read_u1() + self.num2_lo = self._io.read_u2le() + self.num2_hi = self._io.read_u1() + # tweaked + self.num2 + self.flags = self._root.Item.Flags(self._io, self, self._root) + self.fixedarray1 = [None] * (3) + for i in range(3): + self.fixedarray1[i] = self._io.read_u1() + + self.name1 = (self._io.read_bytes_term(0, False, True, True)).decode(u"utf-8") + self.len_name2 = self._io.read_u1() + self.name2 = (self._io.read_bytes(self.len_name2)).decode(u"utf-8") + + class Flags(KaitaiStruct): + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.bool1 = self._io.read_bits_int(1) != 0 + self.num4 = self._io.read_bits_int(3) + self.padding = self._io.read_bits_int(4) + + + @property + def num2(self): + if hasattr(self, '_m_num2'): + return self._m_num2 if hasattr(self, '_m_num2') else None + + self._m_num2 = ((self.num2_hi << 16) | self.num2_lo) + return self._m_num2 if hasattr(self, '_m_num2') else None + + +data = open("blob","rb").read() +Comparison1Kaitai.from_bytes(data) + +from timeit import timeit +parsetime = timeit(lambda: Comparison1Kaitai.from_bytes(data), number=1000)/1000 +print("Timeit measurements:") +print("parsing: {:.10f} sec/call".format(parsetime)) diff --git a/tests/lib/test_binary.py b/tests/lib/test_binary.py new file mode 100644 index 000000000..bc7e42f1f --- /dev/null +++ b/tests/lib/test_binary.py @@ -0,0 +1,81 @@ +from tests.declarativeunittest import * +from construct.lib.binary import * + + +def test_integer2bits(): + assert raises(integer2bits, 0, 0, False) == ValueError + assert raises(integer2bits, 0, 0, True) == ValueError + assert integer2bits(19, 5) == b"\x01\x00\x00\x01\x01" + assert integer2bits(19, 8) == b"\x00\x00\x00\x01\x00\x00\x01\x01" + assert integer2bits(-13, 5, True) == b"\x01\x00\x00\x01\x01" + assert integer2bits(-13, 8, True) == b"\x01\x01\x01\x01\x00\x00\x01\x01" + assert raises(integer2bits, 0, -1) == ValueError + assert raises(integer2bits, -1, 8, False) == ValueError + assert raises(integer2bits, -2**64, 8, True) == ValueError + assert raises(integer2bits, 2**64, 8, True) == ValueError + assert raises(integer2bits, -2**64, 8, False) == ValueError + assert raises(integer2bits, 2**64, 8, False) == ValueError + +def test_integer2bytes(): + assert raises(integer2bytes, 0, 0, False) == ValueError + assert raises(integer2bytes, 0, 0, True) == ValueError + assert integer2bytes(0, 4) == b"\x00\x00\x00\x00" + assert integer2bytes(1, 4) == b"\x00\x00\x00\x01" + assert integer2bytes(19, 4) == b'\x00\x00\x00\x13' + assert integer2bytes(255, 1) == b"\xff" + assert integer2bytes(255, 4) == b"\x00\x00\x00\xff" + assert integer2bytes(-1, 4, True) == b"\xff\xff\xff\xff" + assert integer2bytes(-255, 4, True) == b"\xff\xff\xff\x01" + assert raises(integer2bytes, 0, -1) == ValueError + assert raises(integer2bytes, -1, 8, False) == ValueError + assert raises(integer2bytes, -2**64, 4, True) == ValueError + assert raises(integer2bytes, 2**64, 4, True) == ValueError + assert raises(integer2bytes, -2**64, 4, False) == ValueError + assert raises(integer2bytes, 2**64, 4, False) == ValueError + +def test_bits2integer(): + assert raises(bits2integer, b"", False) == ValueError + assert raises(bits2integer, b"", True) == ValueError + assert bits2integer(b"\x01\x00\x00\x01\x01", False) == 19 + assert bits2integer(b"\x01\x00\x00\x01\x01", True) == -13 + +def test_bytes2integer(): + assert raises(bytes2integer, b"", False) == ValueError + assert raises(bytes2integer, b"", True) == ValueError + assert bytes2integer(b"\x00") == 0 + assert bytes2integer(b"\x00", True) == 0 + assert bytes2integer(b"\xff") == 255 + assert bytes2integer(b"\xff", True) == -1 + assert bytes2integer(b'\x00\x00\x00\x13', False) == 19 + assert bytes2integer(b'\x00\x00\x00\x13', True) == 19 + +def test_cross_integers(): + for i in [-300,-255,-100,-1,0,1,100,255,300]: + assert bits2integer(integer2bits(i,64,signed=(i<0)),signed=(i<0)) == i + assert bytes2integer(integer2bytes(i,8,signed=(i<0)),signed=(i<0)) == i + assert bits2bytes(integer2bits(i,64,signed=(i<0))) == integer2bytes(i,8,signed=(i<0)) + assert bytes2bits(integer2bytes(i,8,signed=(i<0))) == integer2bits(i,64,signed=(i<0)) + +def test_bytes2bits(): + assert bytes2bits(b"") == b"" + assert bytes2bits(b"ab") == b"\x00\x01\x01\x00\x00\x00\x00\x01\x00\x01\x01\x00\x00\x00\x01\x00" + +def test_bits2bytes(): + assert bits2bytes(b"") == b"" + assert bits2bytes(b"\x00\x01\x01\x00\x00\x00\x00\x01\x00\x01\x01\x00\x00\x00\x01\x00") == b"ab" + assert raises(bits2bytes, b"\x00") == ValueError + assert raises(bits2bytes, b"\x00\x00\x00\x00\x00\x00\x00") == ValueError + +def test_swapbytes(): + assert swapbytes(b"") == b"" + assert swapbytes(b"abcd") == b"dcba" + +def test_swapbytesinbits(): + assert swapbytesinbits(b"") == b"" + assert swapbytesinbits(b"0000000011111111") == b"1111111100000000" + assert raises(swapbytesinbits, b"1") == ValueError + +def test_swapbitsinbytes(): + assert swapbitsinbytes(b"") == b"" + assert swapbitsinbytes(b"\xf0") == b"\x0f" + assert swapbitsinbytes(b"\xf0\x00") == b"\x0f\x00" diff --git a/tests/lib/test_bitstream.py b/tests/lib/test_bitstream.py new file mode 100644 index 000000000..9d5c558c9 --- /dev/null +++ b/tests/lib/test_bitstream.py @@ -0,0 +1,55 @@ +from tests.declarativeunittest import * +from construct.lib.bitstream import * + + +def test_restreamed(): + # tested by Bitwise Bytewise ByteSwapped BitsSwapped cases + pass + +def test_rebuffered(): + z = b"0" + + print("sequential read") + bstream = RebufferedBytesIO(io.BytesIO(z*1000)) + assert bstream.read(1000) == z*1000 + + print("random reads") + data = os.urandom(1000) + bstream = RebufferedBytesIO(io.BytesIO(data)) + for i in range(50): + o1 = random.randrange(0, 480) + o2 = random.randrange(520, 1000) + assert bstream.seek(o1) == o1 + assert bstream.tell() == o1 + assert bstream.read(o2-o1) == data[o1:o2] + assert bstream.tell() == o2 + + print("sequential writes") + bstream = RebufferedBytesIO(io.BytesIO()) + for i in range(10): + assert bstream.write(z*100) == 100 + assert bstream.seek(0) == 0 + assert bstream.read(1000) == z*1000 + + print("random writes") + data = os.urandom(1000) + bstream = RebufferedBytesIO(io.BytesIO()) + assert bstream.write(data) == len(data) + for i in range(50): + o1 = random.randrange(0, 480) + o2 = random.randrange(520, 1000) + assert bstream.seek(o1) == o1 + assert bstream.tell() == o1 + assert bstream.write(data[o1:o2]) == o2-o1 + assert bstream.tell() == o2 + assert bstream.seek(0) == 0 + assert bstream.read(len(data)) == data + + print("cutting off trail") + data = os.urandom(1000) + bstream = RebufferedBytesIO(io.BytesIO(data), tailcutoff=50) + for i in range(15): + at = bstream.tell() + assert bstream.read(50) == data[at:at+50] + jumpback = random.randrange(1, 19) + assert bstream.seek(-jumpback, 1) diff --git a/tests/lib/test_containers_dict.py b/tests/lib/test_containers_dict.py new file mode 100644 index 000000000..f8622741d --- /dev/null +++ b/tests/lib/test_containers_dict.py @@ -0,0 +1,324 @@ +from tests.declarativeunittest import * +from construct import * +from construct.lib import * + + +def test_getitem(): + c = Container(a=1) + assert c["a"] == 1 + assert c.a == 1 + assert raises(lambda: c.unknownkey) == AttributeError + assert raises(lambda: c["unknownkey"]) == KeyError + +def test_setitem(): + c = Container() + c.a = 1 + assert c["a"] == 1 + assert c.a == 1 + c["a"] = 2 + assert c["a"] == 2 + assert c.a == 2 + +def test_delitem(): + c = Container(a=1, b=2) + del c.a + assert "a" not in c + assert raises(lambda: c.a) == AttributeError + assert raises(lambda: c["a"]) == KeyError + del c["b"] + assert "b" not in c + assert raises(lambda: c.b) == AttributeError + assert raises(lambda: c["b"]) == KeyError + assert c == Container() + assert list(c) == [] + +def test_ctor_empty(): + c = Container() + assert len(c) == 0 + assert list(c.items()) == [] + assert c == Container() + assert c == Container(c) + assert c == Container({}) + assert c == Container([]) + +def test_ctor_chained(): + c = Container(a=1, b=2, c=3, d=4) + assert c == Container(c) + +def test_ctor_dict(): + c = Container(a=1, b=2, c=3, d=4) + c = Container(c) + assert len(c) == 4 + assert list(c.items()) == [('a',1),('b',2),('c',3),('d',4)] + +def test_ctor_seqoftuples(): + c = Container([('a',1),('b',2),('c',3),('d',4)]) + assert len(c) == 4 + assert list(c.items()) == [('a',1),('b',2),('c',3),('d',4)] + +def test_ctor_orderedkw(): + c = Container(a=1, b=2, c=3, d=4) + d = Container(a=1, b=2, c=3, d=4) + assert c == d + assert len(c) == len(d) + assert list(c.items()) == list(d.items()) + +def test_keys(): + c = Container(a=1, b=2, c=3, d=4) + assert list(c.keys()) == ["a","b","c","d"] + +def test_values(): + c = Container(a=1, b=2, c=3, d=4) + assert list(c.values()) == [1,2,3,4] + +def test_items(): + c = Container(a=1, b=2, c=3, d=4) + assert list(c.items()) == [("a",1),("b",2),("c",3),("d",4)] + +def test_iter(): + c = Container(a=1, b=2, c=3, d=4) + assert list(c) == list(c.keys()) + +def test_clear(): + c = Container(a=1, b=2, c=3, d=4) + c.clear() + assert c == Container() + assert list(c.items()) == [] + +def test_pop(): + c = Container(a=1, b=2, c=3, d=4) + assert c.pop("b") == 2 + assert c.pop("d") == 4 + assert c.pop("a") == 1 + assert c.pop("c") == 3 + assert raises(c.pop, "missing") == KeyError + assert c == Container() + +def test_popitem(): + c = Container(a=1, b=2, c=3, d=4) + assert c.popitem() == ("d",4) + assert c.popitem() == ("c",3) + assert c.popitem() == ("b",2) + assert c.popitem() == ("a",1) + assert raises(c.popitem) == KeyError + +def test_update_dict(): + c = Container(a=1, b=2, c=3, d=4) + d = Container() + d.update(c) + assert d.a == 1 + assert d.b == 2 + assert d.c == 3 + assert d.d == 4 + assert c == d + assert list(c.items()) == list(d.items()) + +def test_update_seqoftuples(): + c = Container(a=1, b=2, c=3, d=4) + d = Container() + d.update([("a",1),("b",2),("c",3),("d",4)]) + assert d.a == 1 + assert d.b == 2 + assert d.c == 3 + assert d.d == 4 + assert c == d + assert list(c.items()) == list(d.items()) + +def test_copy_method(): + c = Container(a=1) + d = c.copy() + assert c == d + assert c is not d + +def test_copy(): + from copy import copy, deepcopy + + c = Container(a=1) + d = copy(c) + assert c == d + assert c is not d + +def test_deepcopy(): + from copy import copy, deepcopy + + c = Container(a=1) + d = deepcopy(c) + d.a = 2 + assert c != d + assert c is not d + +def test_pickling(): + import pickle + + empty = Container() + empty_unpickled = pickle.loads(pickle.dumps(empty)) + assert empty_unpickled == empty + + nested = Container(a=1,b=Container(),c=3,d=Container(e=4)) + nested_unpickled = pickle.loads(pickle.dumps(nested)) + assert nested_unpickled == nested + +def test_eq_issue_818(): + c = Container(a=1, b=2, c=3, d=4, e=5) + d = Container(a=1, b=2, c=3, d=4, e=5) + assert c == c + assert d == d + assert c == d + assert d == c + + a = Container(a=1,b=2) + b = Container(a=1,b=2,c=3) + assert not a == b + assert not b == a + + # c contains internal '_io' field, which shouldn't be considered in the comparison + c = Struct('a' / Int8ul).parse(b'\x01') + d = {'a': 1} + assert c == d + assert d == c + +def test_eq_numpy(): + import numpy + c = Container(arr=numpy.zeros(10, dtype=numpy.uint8)) + d = Container(arr=numpy.zeros(10, dtype=numpy.uint8)) + assert c == d + +def test_ne_issue_818(): + c = Container(a=1, b=2, c=3) + d = Container(a=1, b=2, c=3, d=4, e=5) + assert c != d + assert d != c + +def test_str_repr_empty(): + c = Container() + assert str(c) == "Container: " + assert repr(c) == "Container()" + assert eval(repr(c)) == c + +def test_str_repr(): + c = Container(a=1, b=2, c=3) + assert str(c) == "Container: \n a = 1\n b = 2\n c = 3" + assert repr(c) == "Container(a=1, b=2, c=3)" + assert eval(repr(c)) == c + +def test_str_repr_nested(): + c = Container(a=1,b=2,c=Container()) + assert str(c) == "Container: \n a = 1\n b = 2\n c = Container: " + assert repr(c) == "Container(a=1, b=2, c=Container())" + assert eval(repr(c)) == c + +def test_str_repr_recursive(): + c = Container(a=1,b=2) + c.c = c + assert str(c) == "Container: \n a = 1\n b = 2\n c = " + assert repr(c) == "Container(a=1, b=2, c=)" + +def test_fullstrings(): + setGlobalPrintFullStrings(True) + c = Container(data=b"1234567890") + assert str(c) == "Container: \n data = b'1234567890' (total 10)" + assert repr(c) == "Container(data=b'1234567890')" + c = Container(data=u"1234567890") + assert str(c) == "Container: \n data = '1234567890' (total 10)" + assert repr(c) == "Container(data='1234567890')" + c = Container(data=b"1234567890123456789012345678901234567890") + assert str(c) == "Container: \n data = b'1234567890123456789012345678901234567890' (total 40)" + assert repr(c) == "Container(data=b'1234567890123456789012345678901234567890')" + c = Container(data=u"1234567890123456789012345678901234567890") + assert str(c) == "Container: \n data = '1234567890123456789012345678901234567890' (total 40)" + assert repr(c) == "Container(data='1234567890123456789012345678901234567890')" + + setGlobalPrintFullStrings(False) + c = Container(data=b"1234567890") + assert str(c) == "Container: \n data = b'1234567890' (total 10)" + assert repr(c) == "Container(data=b'1234567890')" + c = Container(data=u"1234567890") + assert str(c) == "Container: \n data = '1234567890' (total 10)" + assert repr(c) == "Container(data='1234567890')" + c = Container(data=b"1234567890123456789012345678901234567890") + assert str(c) == "Container: \n data = b'1234567890123456'... (truncated, total 40)" + assert repr(c) == "Container(data=b'1234567890123456789012345678901234567890')" + c = Container(data=u"1234567890123456789012345678901234567890") + assert str(c) == "Container: \n data = '12345678901234567890123456789012'... (truncated, total 40)" + assert repr(c) == "Container(data='1234567890123456789012345678901234567890')" + + setGlobalPrintFullStrings() + +def test_falseflags(): + d = FlagsEnum(Byte, set=1, unset=2) + c = d.parse(b"\x01") + + setGlobalPrintFalseFlags(True) + assert str(c) == "Container: \n set = True\n unset = False" + assert repr(c) == "Container(set=True, unset=False)" + + setGlobalPrintFalseFlags(False) + assert str(c) == "Container: \n set = True" + assert repr(c) == "Container(set=True, unset=False)" + + setGlobalPrintFalseFlags() + +def test_privateentries(): + c = Container(_private = 1) + + setGlobalPrintPrivateEntries(True) + assert str(c) == "Container: \n _private = 1" + assert repr(c) == "Container()" + + setGlobalPrintPrivateEntries(False) + assert str(c) == "Container: " + assert repr(c) == "Container()" + + setGlobalPrintPrivateEntries() + +def test_len_bool(): + c = Container(a=1, b=2, c=3, d=4) + assert len(c) == 4 + assert c + c = Container() + assert len(c) == 0 + assert not c + +def test_in(): + c = Container(a=1) + assert "a" in c + assert "b" not in c + +def test_regression_recursionlock(): + print("REGRESSION: recursion_lock() used to leave private keys.") + c = Container() + str(c); repr(c) + assert not c + +def test_method_shadowing_1(): + c = Container() + assert c.update != 42 + c['update'] = 42 + assert c.update == 42 + +def test_method_shadowing_2(): + # TODO: test more possible things that might break if some method is shadowed + # ensure that methods work even if shadowed + import copy + c = Container( + x=42, + items='foo', + keys='bar', + __init__='', + search=lambda *_: 1/0, + update=lambda *_: 1/0, + copy=print, + # __copy__=print, # copy calls these two methods through instance, this will break things if is shadowed + # __deepcopy__=print, + # __class__=int, # this will break a lot of things, this should not be supported + ) + + dir(c) + assert c == copy.copy(c) + assert c == copy.deepcopy(c) + assert c is not copy.copy(c) + assert c is not copy.deepcopy(c) + assert Container.search(c, 'x') == 42 + assert Container.search(c, 'y') == None + pytest.raises(ZeroDivisionError, c.search, 'x') + diff --git a/tests/lib/test_containers_list.py b/tests/lib/test_containers_list.py new file mode 100644 index 000000000..bf63a8fd4 --- /dev/null +++ b/tests/lib/test_containers_list.py @@ -0,0 +1,16 @@ +from tests.declarativeunittest import * +from construct import * +from construct.lib import * + + +def test_str(): + l = ListContainer(range(5)) + assert str(l) == "ListContainer: \n 0\n 1\n 2\n 3\n 4" + assert repr(l) == "ListContainer([0, 1, 2, 3, 4])" + + l = ListContainer(range(5)) + print(repr(str(l))) + print(repr((l))) + l.append(l) + assert str(l) == "ListContainer: \n 0\n 1\n 2\n 3\n 4\n " + assert repr(l) == "ListContainer([0, 1, 2, 3, 4, ])" diff --git a/tests/lib/test_hex.py b/tests/lib/test_hex.py new file mode 100644 index 000000000..31c068b31 --- /dev/null +++ b/tests/lib/test_hex.py @@ -0,0 +1,27 @@ +from tests.declarativeunittest import * +from construct.lib.hex import * + + +def test_hexdump(): + for i in range(100): + assert hexundump(hexdump(b"?"*i,32),32) == b"?"*i + +def test_hexundump_issue_882(): + data1 = (hexdump(hexundump( +""" +0000 30 31 32 33 34 35 36 5C 0123456\\ +0008 38 8 + +""", linesize=8), linesize=8)) + + data2 = (hexdump(hexundump( +""" +0000 30 31 32 33 34 35 36 37 01234567 +0008 38 8 + +""", linesize=8), linesize=8)) + + print(data1) + print(data2) + assert data1 == hexdump(b"0123456\\8", 8) + assert data2 == hexdump(b"012345678", 8) diff --git a/tests/lib/test_py3compat.py b/tests/lib/test_py3compat.py new file mode 100644 index 000000000..2035c5fe1 --- /dev/null +++ b/tests/lib/test_py3compat.py @@ -0,0 +1,16 @@ +from tests.declarativeunittest import * +from construct.lib.py3compat import * + + +def test_int_byte(): + assert int2byte(5) == b"\x05" + assert int2byte(255) == b"\xff" + assert byte2int(b"\x05") == 5 + assert byte2int(b"\xff") == 255 + assert all(byte2int(int2byte(i)) == i for i in range(256)) + +def test_str_bytes(): + assert str2bytes("abc") == b"abc" + assert bytes2str(b"abc") == "abc" + assert bytes2str(str2bytes("abc123\n")) == "abc123\n" + assert str2bytes(bytes2str(b"abc123\n")) == b"abc123\n" diff --git a/tests/lib/test_search.py b/tests/lib/test_search.py new file mode 100644 index 000000000..e68e54609 --- /dev/null +++ b/tests/lib/test_search.py @@ -0,0 +1,79 @@ +from tests.declarativeunittest import * +from construct import * +from construct.lib import * + + +d = Struct( + "aa" / Int8ub, + "ab" / Struct( + "aba" / Int8ub, + "abb" / Int8ub, + "abc" / Struct( + "abca" / Int8ub, + "abcb" / Switch(this.abca, { + 1 : Struct("abcb1a" / Int8ub), + 2 : Struct("abcb2a" / Int8ub), + 3 : Struct("abcb3a" / Int8ub), + 4 : Struct("abcb4a" / Int8ub), + }), + ), + ), + "ac" / Int8ub, + "ad" / GreedyRange(Struct("ada" / Int8ub)), +) + +def test_search_sanity(): + obj1 = d.parse(b"\x11\x21\x22\x02\x02\x13\x51\x52") + + assert obj1.search("bb") == None + assert obj1.search("abcb") != None + assert obj1.search("ad") != None + assert obj1.search("aa") == 0x11 + assert obj1.search("aba") == 0x21 + assert obj1.search("abb") == 0x22 + assert obj1.search('ac') == 0x13 + +def test_search_functionality(): + obj1 = d.parse(b"\x11\x21\x22\x02\x02\x13\x51\x52") + obj2 = d.parse(b"\x11\x21\x22\x03\x03\x13\x51\x52") + + assert obj1.search('abcb1a') == None + assert obj1.search('abcb3a') == None + assert obj1.search('abcb4a') == None + assert obj1.search('abcb2a') == 0x02 + + assert obj2.search('abcb1a') == None + assert obj2.search('abcb2a') == None + assert obj2.search('abcb4a') == None + assert obj2.search('abcb3a') == 0x03 + + # Return only the first one + assert obj1.search("ada") == 0x51 + +def test_search_regexp(): + obj1 = d.parse(b"\x11\x21\x22\x02\x02\x13\x51\x52") + obj2 = d.parse(b"\x11\x21\x22\x03\x03\x13\x51\x52") + + assert obj1.search('abcb[1-4]a') == 0x02 + assert obj2.search('abcb[1-4]a') == 0x03 + +def test_search_all_sanity(): + obj1 = d.parse(b"\x11\x21\x22\x02\x02\x13\x51\x52") + + assert obj1.search_all("bb") == [] + assert obj1.search_all("ad") != None + assert obj1.search_all("aa") == [0x11] + assert obj1.search_all("aba") == [0x21] + assert obj1.search_all("abb") == [0x22] + assert obj1.search_all('ac') == [0x13] + +def test_search_all_functionality(): + obj1 = d.parse(b"\x11\x21\x22\x02\x02\x13\x51\x52") + + # Return all of them + assert obj1.search_all("ada") == [0x51,0x52] + +def test_search_all_regexp(): + obj1 = d.parse(b"\x11\x21\x22\x02\x02\x13\x51\x52") + + assert obj1.search_all("ab.*") == [0x21, 0x22, 0x02, 0x02] diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py new file mode 100644 index 000000000..229b6ee3b --- /dev/null +++ b/tests/test_benchmarks.py @@ -0,0 +1,981 @@ +# -*- coding: utf-8 -*- + +from tests.declarativeunittest import * +from construct import * +from construct.lib import * +from tests.test_compiler import example, exampledata + + +def test_class_bytes_parse(benchmark): + d = Bytes(100) + benchmark(d.parse, bytes(100)) + +def test_class_bytes_parse_compiled(benchmark): + d = Bytes(100) + d = d.compile() + benchmark(d.parse, bytes(100)) + +def test_class_bytes_build(benchmark): + d = Bytes(100) + benchmark(d.build, bytes(100)) + +def test_class_bytes_build_compiled(benchmark): + d = Bytes(100) + d = d.compile() + benchmark(d.build, bytes(100)) + +def test_class_greedybytes_parse(benchmark): + d = GreedyBytes + benchmark(d.parse, bytes(100)) + +def test_class_greedybytes_parse_compiled(benchmark): + d = GreedyBytes + d = d.compile() + benchmark(d.parse, bytes(100)) + +def test_class_greedybytes_build(benchmark): + d = GreedyBytes + benchmark(d.build, bytes(100)) + +def test_class_greedybytes_build_compiled(benchmark): + d = GreedyBytes + d = d.compile() + benchmark(d.build, bytes(100)) + +def test_class_bitwise1_parse(benchmark): + d = Bitwise(Bytes(800)) + benchmark(d.parse, bytes(100)) + +def test_class_bitwise1_parse_compiled(benchmark): + d = Bitwise(Bytes(800)) + d = d.compile() + benchmark(d.parse, bytes(100)) + +def test_class_bitwise1_build(benchmark): + d = Bitwise(Bytes(800)) + benchmark(d.build, bytes(800)) + +def test_class_bitwise2_parse(benchmark): + d = Bitwise(RepeatUntil(obj_ == 1, Byte)) + benchmark(d.parse, bytes(99)+b"\x01") + +def test_class_bitwise2_parse_compiled(benchmark): + d = Bitwise(RepeatUntil(obj_ == 1, Byte)) + d = d.compile() + benchmark(d.parse, bytes(99)+b"\x01") + +def test_class_bitwise2_build(benchmark): + d = Bitwise(RepeatUntil(obj_ == 1, Byte)) + benchmark(d.build, [0 if i<800-1 else 1 for i in range(800)]) + +def test_class_bytewise1_parse(benchmark): + d = Bitwise(Bytewise(Bytes(100))) + benchmark(d.parse, bytes(100)) + +def test_class_bytewise1_parse_compiled(benchmark): + d = Bitwise(Bytewise(Bytes(100))) + d = d.compile() + benchmark(d.parse, bytes(100)) + +def test_class_bytewise1_build(benchmark): + d = Bitwise(Bytewise(Bytes(100))) + benchmark(d.build, bytes(100)) + +def test_class_bytewise2_parse(benchmark): + d = Bitwise(Bytewise(RepeatUntil(obj_ == 1, Byte))) + benchmark(d.parse, bytes(99)+b"\x01") + +def test_class_bytewise2_parse_compiled(benchmark): + d = Bitwise(Bytewise(RepeatUntil(obj_ == 1, Byte))) + d = d.compile() + benchmark(d.parse, bytes(99)+b"\x01") + +def test_class_bytewise2_build(benchmark): + d = Bitwise(Bytewise(RepeatUntil(obj_ == 1, Byte))) + benchmark(d.build, [0 if i<100-1 else 1 for i in range(100)]) + +def test_class_formatfield_parse(benchmark): + d = FormatField(">", "L") + benchmark(d.parse, bytes(4)) + +def test_class_formatfield_parse_compiled(benchmark): + d = FormatField(">", "L") + d = d.compile() + benchmark(d.parse, bytes(4)) + +def test_class_formatfield_build(benchmark): + d = FormatField(">", "L") + benchmark(d.build, 0) + +def test_class_formatfield_build_compiled(benchmark): + d = FormatField(">", "L") + d = d.compile() + benchmark(d.build, 0) + +def test_class_bytesinteger_parse(benchmark): + d = BytesInteger(16) + benchmark(d.parse, bytes(16)) + +def test_class_bytesinteger_parse_compiled(benchmark): + d = BytesInteger(16) + d = d.compile() + benchmark(d.parse, bytes(16)) + +def test_class_bytesinteger_build(benchmark): + d = BytesInteger(16) + benchmark(d.build, 0) + +def test_class_bytesinteger_build_compiled(benchmark): + d = BytesInteger(16) + d = d.compile() + benchmark(d.build, 0) + +def test_class_bitsinteger_parse(benchmark): + d = Bitwise(BitsInteger(128, swapped=True)) + benchmark(d.parse, bytes(128//8)) + +def test_class_bitsinteger_parse_compiled(benchmark): + d = Bitwise(BitsInteger(128, swapped=True)) + d = d.compile() + benchmark(d.parse, bytes(128//8)) + +def test_class_bitsinteger_build(benchmark): + d = Bitwise(BitsInteger(128, swapped=True)) + benchmark(d.build, 0) + +def test_class_bitsinteger_build_compiled(benchmark): + d = Bitwise(BitsInteger(128, swapped=True)) + d = d.compile() + benchmark(d.build, 0) + +def test_class_varint_parse(benchmark): + d = VarInt + benchmark(d.parse, b"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x10") + +def test_class_varint_parse_compiled(benchmark): + d = VarInt + d = d.compile() + benchmark(d.parse, b"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x10") + +def test_class_varint_build(benchmark): + d = VarInt + benchmark(d.build, 2**100) + +# ZigZag + +def test_class_paddedstring_parse(benchmark): + d = PaddedString(100, "utf8") + benchmark(d.parse, b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00\x00'+bytes(100)) + +def test_class_paddedstring_parse_compiled(benchmark): + d = PaddedString(100, "utf8") + d = d.compile() + benchmark(d.parse, b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00\x00'+bytes(100)) + +def test_class_paddedstring_build(benchmark): + d = PaddedString(100, "utf8") + benchmark(d.build, u"Афон") + +def test_class_pascalstring_parse(benchmark): + d = PascalString(Byte, "utf8") + benchmark(d.parse, b'\x08\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd'+bytes(100)) + +def test_class_pascalstring_parse_compiled(benchmark): + d = PascalString(Byte, "utf8") + d = d.compile() + benchmark(d.parse, b'\x08\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd'+bytes(100)) + +def test_class_pascalstring_build(benchmark): + d = PascalString(Byte, "utf8") + benchmark(d.build, u"Афон") + +def test_class_cstring_parse(benchmark): + d = CString("utf8") + benchmark(d.parse, b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00'+bytes(100)) + +def test_class_cstring_parse_compiled(benchmark): + d = CString("utf8") + d = d.compile() + benchmark(d.parse, b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00'+bytes(100)) + +def test_class_cstring_build(benchmark): + d = CString("utf8") + benchmark(d.build, u"Афон") + +def test_class_greedystring_parse(benchmark): + d = GreedyString("utf8") + benchmark(d.parse, b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00'+bytes(100)) + +def test_class_greedystring_parse_compiled(benchmark): + d = GreedyString("utf8") + d = d.compile() + benchmark(d.parse, b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00'+bytes(100)) + +def test_class_greedystring_build(benchmark): + d = GreedyString("utf8") + benchmark(d.build, u"Афон") + +def test_class_flag_parse(benchmark): + d = Flag + benchmark(d.parse, bytes(1)) + +def test_class_flag_parse_compiled(benchmark): + d = Flag + d = d.compile() + benchmark(d.parse, bytes(1)) + +def test_class_flag_build(benchmark): + d = Flag + benchmark(d.build, False) + +def test_class_flag_build_compiled(benchmark): + d = Flag + d = d.compile() + benchmark(d.build, False) + +def test_class_enum_parse(benchmark): + d = Enum(Byte, zero=0) + benchmark(d.parse, bytes(1)) + +def test_class_enum_parse_compiled(benchmark): + d = Enum(Byte, zero=0) + d = d.compile() + benchmark(d.parse, bytes(1)) + +def test_class_enum_build(benchmark): + d = Enum(Byte, zero=0) + benchmark(d.build, 0) + +def test_class_enum_build_compiled(benchmark): + d = Enum(Byte, zero=0) + d = d.compile() + benchmark(d.build, 0) + +def test_class_flagsenum_parse(benchmark): + d = FlagsEnum(Byte, a=1, b=2, c=4, d=8) + benchmark(d.parse, bytes(1)) + +def test_class_flagsenum_parse_compiled(benchmark): + d = FlagsEnum(Byte, a=1, b=2, c=4, d=8) + d = d.compile() + benchmark(d.parse, bytes(1)) + +def test_class_flagsenum_build(benchmark): + d = FlagsEnum(Byte, a=1, b=2, c=4, d=8) + benchmark(d.build, Container(a=False, b=False, c=False, d=False)) + +def test_class_mapping_parse(benchmark): + x = "object" + d = Mapping(Byte, {x:0}) + benchmark(d.parse, bytes(1)) + +def test_class_mapping_parse_compiled(benchmark): + x = "object" + d = Mapping(Byte, {x:0}) + d = d.compile() + benchmark(d.parse, bytes(1)) + +def test_class_mapping_build(benchmark): + x = "object" + d = Mapping(Byte, {x:0}) + benchmark(d.build, x) + +def test_class_mapping_build_compiled(benchmark): + x = "object" + d = Mapping(Byte, {x:0}) + d = d.compile() + benchmark(d.build, x) + +def test_class_struct_parse(benchmark): + d = Struct("a"/Byte, "b"/Byte, "c"/Byte, "d"/Byte, "e"/Byte) + benchmark(d.parse, bytes(5)) + +def test_class_struct_parse_compiled(benchmark): + d = Struct("a"/Byte, "b"/Byte, "c"/Byte, "d"/Byte, "e"/Byte) + d = d.compile() + benchmark(d.parse, bytes(5)) + +def test_class_struct_build(benchmark): + d = Struct("a"/Byte, "b"/Byte, "c"/Byte, "d"/Byte, "e"/Byte) + benchmark(d.build, dict(a=0, b=0, c=0, d=0, e=0)) + +def test_class_struct_build_compiled(benchmark): + d = Struct("a"/Byte, "b"/Byte, "c"/Byte, "d"/Byte, "e"/Byte) + d = d.compile() + benchmark(d.build, dict(a=0, b=0, c=0, d=0, e=0)) + +def test_class_sequence_parse(benchmark): + d = Sequence(Byte, Byte, Byte, Byte, Byte) + benchmark(d.parse, bytes(5)) + +def test_class_sequence_parse_compiled(benchmark): + d = Sequence(Byte, Byte, Byte, Byte, Byte) + d = d.compile() + benchmark(d.parse, bytes(5)) + +def test_class_sequence_build(benchmark): + d = Sequence(Byte, Byte, Byte, Byte, Byte) + benchmark(d.build, [0]*5) + +def test_class_sequence_build_compiled(benchmark): + d = Sequence(Byte, Byte, Byte, Byte, Byte) + d = d.compile() + benchmark(d.build, [0]*5) + +def test_class_array_parse(benchmark): + d = Array(100, Byte) + benchmark(d.parse, bytes(100)) + +def test_class_array_parse_compiled(benchmark): + d = Array(100, Byte) + d = d.compile() + benchmark(d.parse, bytes(100)) + +def test_class_array_build(benchmark): + d = Array(100, Byte) + benchmark(d.build, [0]*100) + +def test_class_array_build_compiled(benchmark): + d = Array(100, Byte) + d = d.compile() + benchmark(d.build, [0]*100) + +def test_class_greedyrange_parse(benchmark): + d = GreedyRange(Byte) + benchmark(d.parse, bytes(100)) + +def test_class_greedyrange_parse_compiled(benchmark): + d = GreedyRange(Byte) + d = d.compile() + benchmark(d.parse, bytes(100)) + +def test_class_greedyrange_build(benchmark): + d = GreedyRange(Byte) + benchmark(d.build, [0]*100) + +def test_class_repeatuntil_parse(benchmark): + d = RepeatUntil(obj_ > 0, Byte) + benchmark(d.parse, bytes(i<100 for i in range(100))) + +def test_class_repeatuntil_parse_compiled(benchmark): + d = RepeatUntil(obj_ > 0, Byte) + d = d.compile() + benchmark(d.parse, bytes(i<100 for i in range(100))) + +def test_class_repeatuntil_build(benchmark): + d = RepeatUntil(obj_ > 0, Byte) + benchmark(d.build, [int(i<99) for i in range(100)]) + +def test_class_repeatuntil_build_compiled(benchmark): + d = RepeatUntil(obj_ > 0, Byte) + d = d.compile() + benchmark(d.build, [int(i<99) for i in range(100)]) + +def test_class_const_parse(benchmark): + d = Const(bytes(10)) + benchmark(d.parse, bytes(10)) + +def test_class_const_parse_compiled(benchmark): + d = Const(bytes(10)) + d = d.compile() + benchmark(d.parse, bytes(10)) + +def test_class_const_build(benchmark): + d = Const(bytes(10)) + benchmark(d.build, bytes(10)) + +def test_class_const_build_compiled(benchmark): + d = Const(bytes(10)) + d = d.compile() + benchmark(d.build, bytes(10)) + +def test_class_computed_parse(benchmark): + d = Computed(this.entry) + benchmark(d.parse, bytes(), entry=1) + +def test_class_computed_parse_compiled(benchmark): + d = Computed(this.entry) + d = d.compile() + benchmark(d.parse, bytes(), entry=1) + +def test_class_computed_build(benchmark): + d = Computed(this.entry) + benchmark(d.build, None, entry=1) + +def test_class_computed_build_compiled(benchmark): + d = Computed(this.entry) + d = d.compile() + benchmark(d.build, None, entry=1) + +# - not supported by compiler +# Index + +def test_class_rebuild_parse(benchmark): + d = Rebuild(Int32ub, 0) + benchmark(d.parse, bytes(4)) + +def test_class_rebuild_parse_compiled(benchmark): + d = Rebuild(Int32ub, 0) + d = d.compile() + benchmark(d.parse, bytes(4)) + +def test_class_rebuild_build(benchmark): + d = Rebuild(Int32ub, 0) + benchmark(d.build, None) + +def test_class_rebuild_build_compiled(benchmark): + d = Rebuild(Int32ub, 0) + d = d.compile() + benchmark(d.build, None) + +def test_class_default_parse(benchmark): + d = Default(Int32ub, 0) + benchmark(d.parse, bytes(4)) + +def test_class_default_parse_compiled(benchmark): + d = Default(Int32ub, 0) + d = d.compile() + benchmark(d.parse, bytes(4)) + +def test_class_default_build(benchmark): + d = Default(Int32ub, 0) + benchmark(d.build, None) + +def test_class_default_build_compiled(benchmark): + d = Default(Int32ub, 0) + d = d.compile() + benchmark(d.build, None) + +def test_class_check_parse(benchmark): + d = Check(this.entry == 1) + benchmark(d.parse, bytes(), entry=1) + +def test_class_check_parse_compiled(benchmark): + d = Check(this.entry == 1) + d = d.compile() + benchmark(d.parse, bytes(), entry=1) + +def test_class_check_build(benchmark): + d = Check(this.entry == 1) + benchmark(d.build, None, entry=1) + +def test_class_check_build_compiled(benchmark): + d = Check(this.entry == 1) + d = d.compile() + benchmark(d.build, None, entry=1) + +# - raises exception +# Error + +def test_class_focusedseq_parse(benchmark): + d = FocusedSeq("num", Const(bytes(10)), "num"/Int32ub, Terminated) + benchmark(d.parse, bytes(14)) + +def test_class_focusedseq_parse_compiled(benchmark): + d = FocusedSeq("num", Const(bytes(10)), "num"/Int32ub, Terminated) + d = d.compile() + benchmark(d.parse, bytes(14)) + +def test_class_focusedseq_build(benchmark): + d = FocusedSeq("num", Const(bytes(10)), "num"/Int32ub, Terminated) + benchmark(d.build, 0) + +def test_class_focusedseq_build_compiled(benchmark): + d = FocusedSeq("num", Const(bytes(10)), "num"/Int32ub, Terminated) + d = d.compile() + benchmark(d.build, 0) + +def test_class_pickled_parse(benchmark): + d = Pickled + if PY3: + data = b'\x80\x03]q\x00()K\x01G@\x02ffffff}q\x01]q\x02C\x01\x00q\x03X\x00\x00\x00\x00q\x04e.' + else: + data = b"(lp0\n(taI1\naF2.3\na(dp1\na(lp2\naS'1'\np3\naS''\np4\na." + benchmark(d.parse, data) + +def test_class_pickled_parse_compiled(benchmark): + d = Pickled + d = d.compile() + if PY3: + data = b'\x80\x03]q\x00()K\x01G@\x02ffffff}q\x01]q\x02C\x01\x00q\x03X\x00\x00\x00\x00q\x04e.' + else: + data = b"(lp0\n(taI1\naF2.3\na(dp1\na(lp2\naS'1'\np3\naS''\np4\na." + benchmark(d.parse, data) + +def test_class_pickled_build(benchmark): + d = Pickled + if PY3: + data = b'\x80\x03]q\x00()K\x01G@\x02ffffff}q\x01]q\x02C\x01\x00q\x03X\x00\x00\x00\x00q\x04e.' + else: + data = b"(lp0\n(taI1\naF2.3\na(dp1\na(lp2\naS'1'\np3\naS''\np4\na." + benchmark(d.build, d.parse(data)) + +def test_class_numpy_parse(benchmark): + d = Numpy + data = b"\x93NUMPY\x01\x00F\x00{'descr': '", "B"), + "bytesinteger1" / BytesInteger(16, signed=True), + "bytesinteger2" / BytesInteger(16, swapped=True), + "bytesinteger3" / BytesInteger(this.num+1), + "bitsinteger1" / BitsInteger(16, signed=True), + "bitsinteger2" / BitsInteger(16, swapped=True), + "bitsinteger3" / BitsInteger(this.num+1), + "int1" / Byte, + "int2" / Int64ub, + "float1" / Half, + "float2" / Single, + "float3" / Double, + "varint" / VarInt, + "zigzag" / ZigZag, + + "string1" / PaddedString(12, "ascii"), + "string2" / PaddedString(12, "utf8"), + "string3" / PaddedString(12, "utf16"), + "string4" / PaddedString(12, "utf32"), + "pascalstring1" / PascalString(Byte, "ascii"), + "pascalstring2" / PascalString(Byte, "utf8"), + "pascalstring3" / PascalString(Byte, "utf16"), + "pascalstring4" / PascalString(Byte, "utf32"), + "cstring1" / CString("ascii"), + "cstring2" / CString("utf8"), + "cstring3" / CString("utf16"), + "cstring4" / CString("utf32"), + "greedystring1" / Prefixed(Byte, GreedyString("ascii")), + "greedystring2" / Prefixed(Byte, GreedyString("utf8")), + "greedystring3" / Prefixed(Byte, GreedyString("utf16")), + "greedystring4" / Prefixed(Byte, GreedyString("utf32")), + + "flag" / Flag, + "enum1" / Enum(Byte, zero=0), + "enum2" / Enum(Byte), + "flagsenum1" / FlagsEnum(Byte, zero=0, one=1), + "flagsenum2" / FlagsEnum(Byte), + "mapping" / Mapping(Byte, {"zero":0}), + + "struct1" / Struct("field" / Byte, Check(this.field == 0)), + "struct2" / Struct("field" / Byte, StopIf(True), Error), + "sequence1" / Sequence(Byte, Byte), + "sequence2" / Sequence("num1" / Byte, "num2" / Byte), + # WARNING: this no longer rebuilds after fixing + # "sequence3" / Sequence("num1" / Byte, "num2" / Byte, StopIf(True), Error), + + "array1" / Array(5, Byte), + "array2" / Array(this.num, Byte), + "greedyrange0" / Prefixed(Byte, GreedyRange(Byte)), + "repeatuntil1" / RepeatUntil(obj_ == 0, Byte), + + "const1" / Const(bytes(4)), + "const2" / Const(0, Int32ub), + "computed1" / Computed("string literal"), + "computed2" / Computed(this.num), + "computedarray" / Computed([1,2,3]), + # WARNING: _index is not supported in compiled classes + # "index1" / Array(3, Index), + # "index2" / RestreamData(b"\x00", GreedyRange(Byte >> Index)), + # "index3" / RestreamData(b"\x00", RepeatUntil(True, Byte >> Index)), + "rebuild" / Rebuild(Byte, len_(this.computedarray)), + "default" / Default(Byte, 0), + Check(this.num == 0), + "check" / Check(this.num == 0), + "error0" / If(False, Error), + "focusedseq1" / FocusedSeq("num", Const(bytes(4)), "num"/Byte), + "focusedseq2_select" / Computed("num"), + "focusedseq2" / FocusedSeq(this._.focusedseq2_select, "num"/Byte), + "pickled_data" / Computed(b"(lp0\n(taI1\naF2.3\na(dp1\na(lp2\naS'1'\np3\naS''\np4\na."), + "pickled" / RestreamData(this.pickled_data, Pickled), + "numpy_data" / Computed(b"\x93NUMPY\x01\x00F\x00{'descr': '> Byte >> Byte), + "namedtuple4" / NamedTuple("coord", "x y z", "x"/Byte + "y"/Byte + "z"/Byte), + "timestamp1" / RestreamData(b'\x00\x00\x00\x00ZIz\x00', Timestamp(Int64ub, 1, 1970)), + "timestamp2" / RestreamData(b'H9\x8c"', Timestamp(Int32ub, "msdos", "msdos")), + "hex1" / Hex(Byte), + "hex2" / Hex(Bytes(1)), + "hex3" / Hex(RawCopy(Byte)), + "hexdump1" / HexDump(Bytes(1)), + "hexdump2" / HexDump(RawCopy(Byte)), + + "union1" / Union(None, "char"/Byte, "short"/Short, "int"/Int), + "union2" / Union(1, "char"/Byte, "short"/Short, "int"/Int), + "union3" / Union(0, "char1"/Byte, "char2"/Byte, "char3"/Byte), + "union4" / Union("char1", "char1"/Byte, "char2"/Byte, "char3"/Byte), + "select" / Select(Byte, CString("ascii")), + "optional" / Optional(Byte), + "if1" / If(this.num == 0, Byte), + "ifthenelse" / IfThenElse(this.num == 0, Byte, Byte), + "switch1" / Switch(this.num, {0 : Byte, 255 : Error}), + "switch2" / Switch(this.num, {}), + "switch3" / Switch(this.num, {}, default=Byte), + "stopif0" / StopIf(this.num == 255), + "stopif1" / Struct(StopIf(this._.num == 0), Error), + # WARNING: this no longer rebuilds after fixing + # "stopif2" / Sequence(StopIf(this._.num == 0), Error), + "stopif3" / GreedyRange(StopIf(this.num == 0)), + + "padding" / Padding(2), + "paddedbyte" / Padded(4, Byte), + "alignedbyte" / Aligned(4, Byte), + "alignedstruct" / AlignedStruct(4, "a"/Byte, "b"/Short), + "bitstruct" / BitStruct("a"/Octet), + + "pointer" / Pointer(0, Byte), + "peek" / Peek(Byte), + "seek0" / Seek(0, 1), + "tell" / Tell, + "pass1" / Pass, + "terminated0" / Prefixed(Byte, Terminated), + + "rawcopy1" / RawCopy(Byte), + "rawcopy2" / RawCopy(RawCopy(RawCopy(Byte))), + "bytesswapped" / ByteSwapped(BytesInteger(8)), + "bitsswapped" / BitsSwapped(BytesInteger(8)), + "prefixed1" / Prefixed(Byte, GreedyBytes), + "prefixed2" / RestreamData(b"\x01", Prefixed(Byte, GreedyBytes, includelength=True)), + "prefixedarray" / PrefixedArray(Byte, Byte), + # WARNING: no buildemit yet + "fixedsized" / FixedSized(10, GreedyBytes), + "nullterminated" / RestreamData(b'\x01\x00', NullTerminated(GreedyBytes)), + "nullstripped" / RestreamData(b'\x01\x00', NullStripped(GreedyBytes)), + "restreamdata" / RestreamData(b"\xff", Byte), + "restreamdata_verify" / Check(this.restreamdata == 255), + # Transformed + # Restreamed + # ProcessXor + # ProcessRotateLeft + # Checksum + "compressed_bzip2_data" / Computed(b'BZh91AY&SYSc\x11\x99\x00\x00\x00A\x00@\x00@\x00 \x00!\x00\x82\x83\x17rE8P\x90Sc\x11\x99'), + "compressed_bzip2" / RestreamData(this.compressed_bzip2_data, Compressed(GreedyBytes, "bzip2", level=9)), + # Rebuffered + + # Lazy + # LazyStruct + # LazyArray + # LazyBound + + # adapters and validators + + "probe" / Probe(), + "debugger" / Debugger(Byte), + + "items1" / Computed([1,2,3]), + "len1" / Computed(len_(this.items1)), + Check(this.len1 == 3), + + "len2" / Rebuild(Computed(5), len_(this.items2)), + "items2" / Bytes(5), + Check(this.len2 == 5), + + # WARNING: faulty list_ implementation, but compiles into correct code? + # "repeatuntil2" / RepeatUntil(list_ == [0], Byte), + # "repeatuntil3" / RepeatUntil(obj_ == 0, Byte), +) +exampledata = bytes(1000) + + +def test_compiled_example_benchmark(): + d = example.compile(filename="example_compiled.py") + d.benchmark(exampledata, filename="example_benchmark.txt") + +def test_compiled_example_integrity(): + d = example + obj = d.parse(exampledata) + data = d.build(obj) + d = d.compile() + obj2 = d.parse(exampledata) + data2 = d.build(obj) + assert obj == obj2 + assert data == data2 diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 000000000..e7ef7de77 --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,2470 @@ +# -*- coding: utf-8 -*- + +from tests.declarativeunittest import * +from construct import * +from construct.lib import * + +def test_bytes(): + d = Bytes(4) + common(d, b"1234", b"1234", 4) + assert d.parse(b"1234567890") == b"1234" + assert raises(d.parse, b"") == StreamError + assert raises(d.build, b"looooooooooooooong") == StreamError + assert d.build(1) == b"\x00\x00\x00\x01" + assert d.build(0x01020304) == b"\x01\x02\x03\x04" + + d = Bytes(this.n) + common(d, b"1234", b"1234", 4, n=4) + assert d.parse(b"1234567890",n=4) == b"1234" + assert d.build(1, n=4) == b"\x00\x00\x00\x01" + assert raises(d.build, b"", n=4) == StreamError + assert raises(d.build, b"toolong", n=4) == StreamError + assert raises(d.sizeof) == SizeofError + assert raises(d.sizeof, n=4) == 4 + +def test_greedybytes(): + common(GreedyBytes, b"1234", b"1234", SizeofError) + +def test_bytes_issue_827(): + d = Bytes(3) + assert d.build(bytearray(b'\x01\x02\x03')) == b'\x01\x02\x03' + d = GreedyBytes + assert d.build(bytearray(b'\x01\x02\x03')) == b'\x01\x02\x03' + +def test_bitwise(): + common(Bitwise(Bytes(8)), b"\xff", b"\x01\x01\x01\x01\x01\x01\x01\x01", 1) + common(Bitwise(Array(8,Bit)), b"\xff", [1,1,1,1,1,1,1,1], 1) + common(Bitwise(Array(2,Nibble)), b"\xff", [15,15], 1) + common(Bitwise(Array(1,Octet)), b"\xff", [255], 1) + + common(Bitwise(GreedyBytes), bytes(10), bytes(80), SizeofError) + +def test_bytewise(): + common(Bitwise(Bytewise(Bytes(1))), b"\xff", b"\xff", 1) + common(BitStruct("p1"/Nibble, "num"/Bytewise(Int24ub), "p2"/Nibble), b"\xf0\x10\x20\x3f", Container(p1=15, num=0x010203, p2=15), 4) + common(Bitwise(Sequence(Nibble, Bytewise(Int24ub), Nibble)), b"\xf0\x10\x20\x3f", [0x0f,0x010203,0x0f], 4) + common(Bitwise(Bytewise(GreedyBytes)), bytes(10), bytes(10), SizeofError) + +def test_ints(): + common(Byte, b"\xff", 255, 1) + common(Short, b"\x00\xff", 255, 2) + common(Int, b"\x00\x00\x00\xff", 255, 4) + common(Long, b"\x00\x00\x00\x00\x00\x00\x00\xff", 255, 8) + + common(Int8ub, b"\x01", 0x01, 1) + common(Int16ub, b"\x01\x02", 0x0102, 2) + common(Int32ub, b"\x01\x02\x03\x04", 0x01020304, 4) + common(Int64ub, b"\x01\x02\x03\x04\x05\x06\x07\x08", 0x0102030405060708, 8) + + common(Int8sb, b"\x01", 0x01, 1) + common(Int16sb, b"\x01\x02", 0x0102, 2) + common(Int32sb, b"\x01\x02\x03\x04", 0x01020304, 4) + common(Int64sb, b"\x01\x02\x03\x04\x05\x06\x07\x08", 0x0102030405060708, 8) + common(Int8sb, b"\xff", -1, 1) + common(Int16sb, b"\xff\xff", -1, 2) + common(Int32sb, b"\xff\xff\xff\xff", -1, 4) + common(Int64sb, b"\xff\xff\xff\xff\xff\xff\xff\xff", -1, 8) + + common(Int8ul, b"\x01", 0x01, 1) + common(Int16ul, b"\x01\x02", 0x0201, 2) + common(Int32ul, b"\x01\x02\x03\x04", 0x04030201, 4) + common(Int64ul, b"\x01\x02\x03\x04\x05\x06\x07\x08", 0x0807060504030201, 8) + + common(Int8sl, b"\x01", 0x01, 1) + common(Int16sl, b"\x01\x02", 0x0201, 2) + common(Int32sl, b"\x01\x02\x03\x04", 0x04030201, 4) + common(Int64sl, b"\x01\x02\x03\x04\x05\x06\x07\x08", 0x0807060504030201, 8) + common(Int8sl, b"\xff", -1, 1) + common(Int16sl, b"\xff\xff", -1, 2) + common(Int32sl, b"\xff\xff\xff\xff", -1, 4) + common(Int64sl, b"\xff\xff\xff\xff\xff\xff\xff\xff", -1, 8) + +def test_ints24(): + common(Int24ub, b"\x01\x02\x03", 0x010203, 3) + common(Int24ul, b"\x01\x02\x03", 0x030201, 3) + common(Int24sb, b"\xff\xff\xff", -1, 3) + common(Int24sl, b"\xff\xff\xff", -1, 3) + +def test_floats(): + common(Half, b"\x00\x00", 0., 2) + common(Half, b"\x35\x55", 0.333251953125, 2) + common(Single, b"\x00\x00\x00\x00", 0., 4) + common(Single, b"?\x99\x99\x9a", 1.2000000476837158, 4) + common(Double, b"\x00\x00\x00\x00\x00\x00\x00\x00", 0., 8) + common(Double, b"?\xf3333333", 1.2, 8) + +def test_formatfield(): + d = FormatField("<","L") + common(d, b"\x01\x02\x03\x04", 0x04030201, 4) + assert raises(d.parse, b"") == StreamError + assert raises(d.parse, b"\x01\x02") == StreamError + assert raises(d.build, 2**100) == FormatFieldError + assert raises(d.build, 1e9999) == FormatFieldError + assert raises(d.build, "string not int") == FormatFieldError + +def test_formatfield_ints_randomized(): + for endianess,dtype in itertools.product("<>=","bhlqBHLQ"): + d = FormatField(endianess, dtype) + for i in range(100): + obj = random.randrange(0, 256**d.sizeof()//2) + assert d.parse(d.build(obj)) == obj + data = os.urandom(d.sizeof()) + assert d.build(d.parse(data)) == data + +def test_formatfield_floats_randomized(): + # there is a roundoff error because Python float is a C double + # http://stackoverflow.com/questions/39619636/struct-unpackstruct-packfloat-has-roundoff-error + # and analog although that was misplaced + # http://stackoverflow.com/questions/39676482/struct-packstruct-unpackfloat-is-inconsistent-on-py3 + for endianess,dtype in itertools.product("<>=","fd"): + d = FormatField(endianess, dtype) + for i in range(100): + x = random.random()*12345 + if dtype == "d": + assert d.parse(d.build(x)) == x + else: + assert abs(d.parse(d.build(x)) - x) < 1e-3 + for i in range(100): + b = os.urandom(d.sizeof()) + if not math.isnan(d.parse(b)): + assert d.build(d.parse(b)) == b + +def test_formatfield_bool_issue_901(): + d = FormatField(">","?") + assert d.parse(b"\x01") == True + assert d.parse(b"\xff") == True + assert d.parse(b"\x00") == False + assert d.build(True) == b"\x01" + assert d.build(False) == b"\x00" + assert d.sizeof() == 1 + +def test_bytesinteger(): + d = BytesInteger(0) + assert raises(d.parse, b"") == IntegerError + assert raises(d.build, 0) == IntegerError + d = BytesInteger(4, signed=True, swapped=False) + common(d, b"\x01\x02\x03\x04", 0x01020304, 4) + common(d, b"\xff\xff\xff\xff", -1, 4) + d = BytesInteger(4, signed=False, swapped=this.swapped) + common(d, b"\x01\x02\x03\x04", 0x01020304, 4, swapped=False) + common(d, b"\x04\x03\x02\x01", 0x01020304, 4, swapped=True) + assert raises(BytesInteger(-1).parse, b"") == IntegerError + assert raises(BytesInteger(-1).build, 0) == IntegerError + assert raises(BytesInteger(8).build, None) == IntegerError + assert raises(BytesInteger(8, signed=False).build, -1) == IntegerError + assert raises(BytesInteger(8, True).build, -2**64) == IntegerError + assert raises(BytesInteger(8, True).build, 2**64) == IntegerError + assert raises(BytesInteger(8, False).build, -2**64) == IntegerError + assert raises(BytesInteger(8, False).build, 2**64) == IntegerError + assert raises(BytesInteger(this.missing).sizeof) == SizeofError + +def test_bitsinteger(): + d = BitsInteger(0) + assert raises(d.parse, b"") == IntegerError + assert raises(d.build, 0) == IntegerError + d = BitsInteger(8) + common(d, b"\x01\x01\x01\x01\x01\x01\x01\x01", 255, 8) + d = BitsInteger(8, signed=True) + common(d, b"\x01\x01\x01\x01\x01\x01\x01\x01", -1, 8) + d = BitsInteger(16, swapped=True) + common(d, b"\x00\x00\x00\x00\x00\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x01", 0xff00, 16) + d = BitsInteger(16, swapped=this.swapped) + common(d, b"\x01\x01\x01\x01\x01\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00", 0xff00, 16, swapped=False) + common(d, b"\x00\x00\x00\x00\x00\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x01", 0xff00, 16, swapped=True) + assert raises(BitsInteger(-1).parse, b"") == IntegerError + assert raises(BitsInteger(-1).build, 0) == IntegerError + assert raises(BitsInteger(5, swapped=True).parse, bytes(5)) == IntegerError + assert raises(BitsInteger(5, swapped=True).build, 0) == IntegerError + assert raises(BitsInteger(8).build, None) == IntegerError + assert raises(BitsInteger(8, signed=False).build, -1) == IntegerError + assert raises(BitsInteger(8, True).build, -2**64) == IntegerError + assert raises(BitsInteger(8, True).build, 2**64) == IntegerError + assert raises(BitsInteger(8, False).build, -2**64) == IntegerError + assert raises(BitsInteger(8, False).build, 2**64) == IntegerError + assert raises(BitsInteger(this.missing).sizeof) == SizeofError + +def test_varint(): + d = VarInt + common(d, b"\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x10", 2**123, SizeofError) + for n in [0,1,5,100,255,256,65535,65536,2**32,2**100]: + assert d.parse(d.build(n)) == n + for n in range(0, 127): + common(d, int2byte(n), n, SizeofError) + assert raises(d.parse, b"") == StreamError + assert raises(d.build, -1) == IntegerError + assert raises(d.build, None) == IntegerError + assert raises(d.sizeof) == SizeofError + +def test_varint_issue_705(): + # no asserts needed + d = Struct('namelen' / VarInt, 'name' / Bytes(this.namelen)) + d.build(Container(namelen = 400, name = bytes(400))) + d = Struct('namelen' / VarInt, Check(this.namelen == 400)) + d.build(dict(namelen=400)) + +def test_zigzag(): + d = ZigZag + common(d, b"\x00", 0) + common(d, b"\x05", -3) + common(d, b"\x06", 3) + for n in [0,1,5,100,255,256,65535,65536,2**32,2**100]: + assert d.parse(d.build(n)) == n + for n in range(0, 63): + common(d, int2byte(n*2), n, SizeofError) + assert raises(d.parse, b"") == StreamError + assert raises(d.build, None) == IntegerError + assert raises(d.sizeof) == SizeofError + +def test_zigzag_regression(): + d = ZigZag + assert isinstance(d.parse(b"\x05"), int) + assert isinstance(d.parse(b"\x06"), int) + d = Struct('namelen' / ZigZag, Check(this.namelen == 400)) + # no asserts needed + d.build(dict(namelen=400)) + +def test_paddedstring(): + common(PaddedString(10, "utf8"), b"hello\x00\x00\x00\x00\x00", u"hello", 10) + + d = PaddedString(100, "ascii") + assert d.parse(b"X"*100) == u"X"*100 + assert d.build(u"X"*100) == b"X"*100 + assert raises(d.build, u"X"*200) == PaddingError + + for e,us in [("utf8",1),("utf16",2),("utf_16_le",2),("utf32",4),("utf_32_le",4)]: + s = u"Афон" + data = (s.encode(e)+bytes(100))[:100] + common(PaddedString(100, e), data, s, 100) + s = u"" + data = bytes(100) + common(PaddedString(100, e), data, s, 100) + + for e in ["ascii","utf8","utf16","utf-16-le","utf32","utf-32-le"]: + assert PaddedString(10, e).sizeof() == 10 + assert PaddedString(this.n, e).sizeof(n=10) == 10 + +def test_pascalstring(): + for e,us in [("utf8",1),("utf16",2),("utf_16_le",2),("utf32",4),("utf_32_le",4)]: + for sc in [Byte, Int16ub, Int16ul, VarInt]: + s = u"Афон" + data = sc.build(len(s.encode(e))) + s.encode(e) + common(PascalString(sc, e), data, s) + common(PascalString(sc, e), sc.build(0), u"") + + for e in ["utf8","utf16","utf-16-le","utf32","utf-32-le","ascii"]: + assert raises(PascalString(Byte, e).sizeof) == SizeofError + assert raises(PascalString(VarInt, e).sizeof) == SizeofError + +def test_pascalstring_issue_960(): + d = Select(PascalString(Byte, "ascii")) + assert raises(d.parse, b"\x01\xff") == SelectError + assert raises(d.build, u"Афон") == SelectError + +def test_cstring(): + for e,us in [("utf8",1),("utf16",2),("utf_16_le",2),("utf32",4),("utf_32_le",4)]: + s = u"Афон" + common(CString(e), s.encode(e)+bytes(us), s) + common(CString(e), bytes(us), u"") + + CString("utf8").build(s) == b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd'+b"\x00" + CString("utf16").build(s) == b'\xff\xfe\x10\x04D\x04>\x04=\x04'+b"\x00\x00" + CString("utf32").build(s) == b'\xff\xfe\x00\x00\x10\x04\x00\x00D\x04\x00\x00>\x04\x00\x00=\x04\x00\x00'+b"\x00\x00\x00\x00" + + for e in ["utf8","utf16","utf-16-le","utf32","utf-32-le","ascii"]: + assert raises(CString(e).sizeof) == SizeofError + +def test_greedystring(): + for e,us in [("utf8",1),("utf16",2),("utf_16_le",2),("utf32",4),("utf_32_le",4)]: + s = u"Афон" + common(GreedyString(e), s.encode(e), s) + common(GreedyString(e), b"", u"") + + for e in ["utf8","utf16","utf-16-le","utf32","utf-32-le","ascii"]: + assert raises(GreedyString(e).sizeof) == SizeofError + +def test_string_encodings(): + # checks that "-" is replaced with "_" + common(GreedyString("utf-8"), b"", u"") + common(GreedyString("utf-8"), b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd', u"Афон") + +def test_flag(): + d = Flag + common(d, b"\x00", False, 1) + common(d, b"\x01", True, 1) + assert d.parse(b"\xff") == True + +def test_enum(): + d = Enum(Byte, one=1, two=2, four=4, eight=8) + common(d, b"\x01", "one", 1) + common(d, b"\xff", 255, 1) + assert d.parse(b"\x01") == d.one + assert d.parse(b"\x01") == "one" + assert int(d.parse(b"\x01")) == 1 + assert d.parse(b"\xff") == 255 + assert int(d.parse(b"\xff")) == 255 + assert d.build(8) == b'\x08' + assert d.build(255) == b"\xff" + assert d.build(d.eight) == b'\x08' + assert d.one == "one" + assert int(d.one) == 1 + assert raises(d.build, "unknown") == MappingError + assert raises(lambda: d.missing) == AttributeError + +def test_enum_enum34(): + import enum + class E(enum.IntEnum): + a = 1 + class F(enum.IntEnum): + b = 2 + d = Enum(Byte, E, F) + common(d, b"\x01", "a", 1) + common(d, b"\x02", "b", 1) + +def test_enum_enum36(): + import enum + class E(enum.IntEnum): + a = 1 + class F(enum.IntFlag): + b = 2 + d = Enum(Byte, E, F) + common(d, b"\x01", "a", 1) + common(d, b"\x02", "b", 1) + +def test_enum_issue_298(): + d = Struct( + "ctrl" / Enum(Byte, + NAK = 0x15, + STX = 0x02, + ), + Probe(), + "optional" / If(lambda this: this.ctrl == "NAK", Byte), + ) + common(d, b"\x15\xff", Container(ctrl='NAK', optional=255)) + common(d, b"\x02", Container(ctrl='STX', optional=None)) + + # FlagsEnum is not affected by same bug + d = Struct( + "flags" / FlagsEnum(Byte, a=1), + Check(lambda ctx: ctx.flags == Container(_flagsenum=True, a=1)), + ) + common(d, b"\x01", dict(flags=Container(_flagsenum=True, a=True)), 1) + + # Flag is not affected by same bug + d = Struct( + "flag" / Flag, + Check(lambda ctx: ctx.flag == True), + ) + common(d, b"\x01", dict(flag=True), 1) + +def test_enum_issue_677(): + d = Enum(Byte, one=1) + common(d, b"\xff", 255, 1) + common(d, b"\x01", EnumIntegerString.new(1, "one"), 1) + assert isinstance(d.parse(b"\x01"), EnumIntegerString) + d = Enum(Byte, one=1).compile() + common(d, b"\xff", 255, 1) + common(d, b"\x01", EnumIntegerString.new(1, "one"), 1) + assert isinstance(d.parse(b"\x01"), EnumIntegerString) + + d = Struct("e" / Enum(Byte, one=1)) + assert str(d.parse(b"\x01")) == 'Container: \n e = (enum) one 1' + assert str(d.parse(b"\xff")) == 'Container: \n e = (enum) (unknown) 255' + d = Struct("e" / Enum(Byte, one=1)).compile() + assert str(d.parse(b"\x01")) == 'Container: \n e = (enum) one 1' + assert str(d.parse(b"\xff")) == 'Container: \n e = (enum) (unknown) 255' + +@xfail(reason="Cannot implement this in EnumIntegerString.") +def test_enum_issue_992(): + import enum + class E(enum.IntEnum): + a = 1 + class F(enum.IntFlag): + b = 2 + d = Enum(Byte, E, F) + x = d.parse(b"\x01") + assert x == E.a + x = d.parse(b"\x02") + assert x == F.b + +def test_flagsenum(): + d = FlagsEnum(Byte, one=1, two=2, four=4, eight=8) + common(d, b"\x03", Container(_flagsenum=True, one=True, two=True, four=False, eight=False), 1) + assert d.build({}) == b'\x00' + assert d.build(dict(one=True,two=True)) == b'\x03' + assert d.build(8) == b'\x08' + assert d.build(1|2) == b'\x03' + assert d.build(255) == b"\xff" + assert d.build(d.eight) == b'\x08' + assert d.build(d.one|d.two) == b'\x03' + assert raises(d.build, dict(unknown=True)) == MappingError + assert raises(d.build, "unknown") == MappingError + assert d.one == "one" + assert d.one|d.two == "one|two" + assert raises(lambda: d.missing) == AttributeError + +def test_flagsenum_enum34(): + import enum + class E(enum.IntEnum): + a = 1 + class F(enum.IntEnum): + b = 2 + d = FlagsEnum(Byte, E, F) + common(d, b"\x01", Container(_flagsenum=True, a=True,b=False), 1) + common(d, b"\x02", Container(_flagsenum=True, a=False,b=True), 1) + common(d, b"\x03", Container(_flagsenum=True, a=True,b=True), 1) + +def test_flagsenum_enum36(): + import enum + class E(enum.IntEnum): + a = 1 + class F(enum.IntFlag): + b = 2 + d = FlagsEnum(Byte, E, F) + common(d, b"\x01", Container(_flagsenum=True, a=True,b=False), 1) + common(d, b"\x02", Container(_flagsenum=True, a=False,b=True), 1) + common(d, b"\x03", Container(_flagsenum=True, a=True,b=True), 1) + +def test_mapping(): + x = object + d = Mapping(Byte, {x:0}) + common(d, b"\x00", x, 1) + +def test_struct(): + common(Struct(), b"", Container(), 0) + common(Struct("a"/Int16ub, "b"/Int8ub), b"\x00\x01\x02", Container(a=1,b=2), 3) + common(Struct("a"/Struct("b"/Byte)), b"\x01", Container(a=Container(b=1)), 1) + common(Struct(Const(b"\x00"), Padding(1), Pass, Terminated), bytes(2), {}, SizeofError) + assert raises(Struct("missingkey"/Byte).build, {}) == KeyError + assert raises(Struct(Bytes(this.missing)).sizeof) == SizeofError + d = Struct(Computed(7), Const(b"JPEG"), Pass, Terminated) + assert d.build(None) == d.build({}) + +def test_struct_nested(): + d = Struct("a"/Byte, "b"/Int16ub, "inner"/Struct("c"/Byte, "d"/Byte)) + common(d, b"\x01\x00\x02\x03\x04", Container(a=1,b=2,inner=Container(c=3,d=4)), 5) + +def test_struct_kwctor(): + d = Struct(a=Byte, b=Byte, c=Byte, d=Byte) + common(d, b"\x01\x02\x03\x04", Container(a=1,b=2,c=3,d=4), 4) + +def test_struct_proper_context(): + # adjusted to support new embedding semantics + d = Struct( + "x"/Byte, + "inner"/Struct( + "y"/Byte, + "a"/Computed(this._.x+1), + "b"/Computed(this.y+2), + ), + "c"/Computed(this.x+3), + "d"/Computed(this.inner.y+4), + ) + assert d.parse(b"\x01\x0f") == Container(x=1, inner=Container(y=15, a=2, b=17), c=4, d=19) + +def test_struct_sizeof_context_nesting(): + d = Struct( + "a" / Computed(1), + "inner" / Struct( + "b" / Computed(2), + Check(this._.a == 1), + Check(this.b == 2), + ), + Check(this.a == 1), + Check(this.inner.b == 2), + ) + d.sizeof() + +def test_sequence(): + common(Sequence(), b"", [], 0) + common(Sequence(Int8ub, Int16ub), b"\x01\x00\x02", [1,2], 3) + common(Int8ub >> Int16ub, b"\x01\x00\x02", [1,2], 3) + d = Sequence(Computed(7), Const(b"JPEG"), Pass, Terminated) + assert d.build(None) == d.build([None,None,None,None]) + +def test_sequence_nested(): + d = Sequence(Int8ub, Int16ub, Sequence(Int8ub, Int8ub)) + common(d, b"\x01\x00\x02\x03\x04", [1,2,[3,4]], 5) + +def test_array(): + common(Byte[0], b"", [], 0) + common(Byte[4], b"1234", [49,50,51,52], 4) + + d = Array(3, Byte) + common(d, b"\x01\x02\x03", [1,2,3], 3) + assert d.parse(b"\x01\x02\x03additionalgarbage") == [1,2,3] + assert raises(d.parse, b"") == StreamError + assert raises(d.build, [1,2]) == RangeError + assert raises(d.build, [1,2,3,4,5,6,7,8]) == RangeError + + d = Array(this.n, Byte) + common(d, b"\x01\x02\x03", [1,2,3], 3, n=3) + assert d.parse(b"\x01\x02\x03", n=3) == [1,2,3] + assert d.parse(b"\x01\x02\x03additionalgarbage", n=3) == [1,2,3] + assert raises(d.parse, b"", n=3) == StreamError + assert raises(d.build, [1,2], n=3) == RangeError + assert raises(d.build, [1,2,3,4,5,6,7,8], n=3) == RangeError + assert raises(d.sizeof) == SizeofError + assert raises(d.sizeof, n=3) == 3 + + d = Array(3, Byte, discard=True) + assert d.parse(b"\x01\x02\x03") == [] + assert d.build([1,2,3]) == b"\x01\x02\x03" + assert d.sizeof() == 3 + +@xfail(ONWINDOWS, reason="/dev/zero not available on Windows") +def test_array_nontellable(): + assert Array(5, Byte).parse_stream(devzero) == [0,0,0,0,0] + +def test_greedyrange(): + d = GreedyRange(Byte) + common(d, b"", [], SizeofError) + common(d, b"\x01\x02", [1,2], SizeofError) + + d = GreedyRange(Byte, discard=False) + assert d.parse(b"\x01\x02") == [1,2] + assert d.build([1,2]) == b"\x01\x02" + + d = GreedyRange(Byte, discard=True) + assert d.parse(b"\x01\x02") == [] + assert d.build([1,2]) == b"\x01\x02" + +def test_repeatuntil(): + d = RepeatUntil(obj_ == 9, Byte) + common(d, b"\x02\x03\x09", [2,3,9], SizeofError) + assert d.parse(b"\x02\x03\x09additionalgarbage") == [2,3,9] + assert raises(d.parse, b"\x02\x03\x08") == StreamError + assert raises(d.build, [2,3,8]) == RepeatError + + d = RepeatUntil(lambda x,lst,ctx: lst[-2:] == [0,0], Byte) + # d = RepeatUntil(lst_[-2:] == [0,0], Byte) + assert d.parse(b"\x01\x00\x00\xff") == [1,0,0] + assert d.build([1,0,0,4]) == b"\x01\x00\x00" + d = RepeatUntil(True, Byte) + assert d.parse(b"\x00") == [0] + assert d.build([0]) == b"\x00" + + d = RepeatUntil(obj_ == 9, Byte, discard=True) + assert d.parse(b"\x02\x03\x09additionalgarbage") == [] + assert raises(d.parse, b"\x02\x03\x08") == StreamError + assert d.build([2,3,8,9]) == b"\x02\x03\x08\x09" + assert raises(d.build, [2,3,8]) == RepeatError + +def test_const(): + common(Const(b"MZ"), b"MZ", b"MZ", 2) + common(Const(b"MZ", Bytes(2)), b"MZ", b"MZ", 2) + common(Const(255, Int32ul), b"\xff\x00\x00\x00", 255, 4) + assert raises(Const(b"MZ").parse, b"???") == ConstError + assert raises(Const(b"MZ").build, b"???") == ConstError + assert raises(Const(255, Int32ul).parse, b"\x00\x00\x00\x00") == ConstError + assert Struct(Const(b"MZ")).build({}) == b"MZ" + # non-prefixed string literals are unicode on Python 3 + assert raises(lambda: Const("no prefix string")) == StringError + +def test_computed(): + common(Computed(255), b"", 255, 0) + common(Computed(lambda ctx: 255), b"", 255, 0) + assert Computed(255).build(None) == b"" + assert Struct(Computed(255)).build({}) == b"" + assert raises(Computed(this.missing).parse, b"") == KeyError + assert raises(Computed(this["missing"]).parse, b"") == KeyError + +@xfail(reason="_index fails during parsing or building, not during compilation") +def test_index(): + d = Array(3, Bytes(this._index+1)) + common(d, b"abbccc", [b"a", b"bb", b"ccc"]) + d = GreedyRange(Bytes(this._index+1)) + common(d, b"abbccc", [b"a", b"bb", b"ccc"]) + d = RepeatUntil(lambda o,l,ctx: ctx._index == 2, Bytes(this._index+1)) + common(d, b"abbccc", [b"a", b"bb", b"ccc"]) + + d = Array(3, Struct("i" / Index)) + common(d, b"", [Container(i=0),Container(i=1),Container(i=2)], 0) + d = GreedyRange(Struct("i" / Index, "d" / Bytes(this.i+1))) + common(d, b"abbccc", [Container(i=0,d=b"a"),Container(i=1,d=b"bb"),Container(i=2,d=b"ccc")]) + d = RepeatUntil(lambda o,l,ctx: ctx._index == 2, Index) + common(d, b"", [0,1,2]) + +def test_rebuild(): + d = Struct( + "count" / Rebuild(Byte, len_(this.items)), + "items"/Byte[this.count], + ) + assert d.parse(b"\x02ab") == Container(count=2, items=[97,98]) + assert d.build(dict(count=None,items=[255])) == b"\x01\xff" + assert d.build(dict(count=-1,items=[255])) == b"\x01\xff" + assert d.build(dict(items=[255])) == b"\x01\xff" + +def test_rebuild_issue_664(): + d = Struct( + "bytes" / Bytes(1), + Check(this.bytes == b"\x00"), + "bytesinteger" / BytesInteger(4), + Check(this.bytesinteger == 255), + "pascalstring" / PascalString(Byte, "utf8"), + Check(this.pascalstring == u"text"), + "enum" / Enum(Byte, label=255), + Check(this.enum == "label"), + "flagsenum" / FlagsEnum(Byte, label=255), + Check(lambda this: this.flagsenum == Container(label=True)), + "upfield" / Computed(200), + "nestedstruct" / Struct( + "nestedfield" / Computed(255), + Check(this._.upfield == 200), + Check(this.nestedfield == 255), + ), + Check(this.upfield == 200), + Check(this.nestedstruct.nestedfield == 255), + "sequence" / Sequence(Computed(1), Computed(2), Computed(3), Computed(4)), + Check(this.sequence == [1,2,3,4]), + "array" / Array(4, Byte), + Check(this.array == [1,2,3,4]), + "greedyrange" / GreedyRange(Byte), + Check(this.greedyrange == [1,2,3,4]), + "repeatuntil" / RepeatUntil(obj_ == 4, Byte), + Check(this.repeatuntil == [1,2,3,4]), + # Timestamp + # Union + # IfThenElse + ) + obj = Container( + bytes = 0, + bytesinteger = 255, + pascalstring = u"text", + enum = "label", + flagsenum = dict(label=True), + # nestedstruct = dict(), + # sequence = [1,2,3,4], + array = [1,2,3,4], + greedyrange = [1,2,3,4], + repeatuntil = [1,2,3,4], + ) + # no asserts are needed + d.build(obj) + + +def test_rebuild_custom_function(): + def getlen(this): + return 2 + + template = Struct( "count" / Rebuild(Byte, getlen), "my_items" / Byte[this.count]) + for d in [template, template.compile()]: + assert d.parse(b"\x02ab") == Container(count=2, my_items=[97,98]) + assert d.build(dict(count=None,my_items=[255,255])) == b"\x02\xff\xff" + assert d.build(dict(count=2,my_items=[255,255])) == b"\x02\xff\xff" + assert d.build(dict(my_items=[255,255])) == b"\x02\xff\xff" + +def test_default(): + d = Default(Byte, 0) + common(d, b"\xff", 255, 1) + assert d.build(None) == b"\x00" + +def test_check(): + common(Check(True), b"", None, 0) + common(Check(this.x == 255), b"", None, 0, x=255) + common(Check(len_(this.a) == 3), b"", None, 0, a=[1,2,3]) + assert raises(Check(False).parse, b"") == CheckError + assert raises(Check(this.x == 255).parse, b"", x=0) == CheckError + assert raises(Check(len_(this.a) == 3).parse, b"", a=[]) == CheckError + +def test_error(): + assert raises(Error.parse, b"") == ExplicitError + assert raises(Error.build, None) == ExplicitError + assert ("x"/Int8sb >> IfThenElse(this.x > 0, Int8sb, Error)).parse(b"\x01\x05") == [1,5] + assert raises(("x"/Int8sb >> IfThenElse(this.x > 0, Int8sb, Error)).parse, b"\xff\x05") == ExplicitError + +def test_focusedseq(): + common(FocusedSeq("num", Const(b"MZ"), "num"/Byte, Terminated), b"MZ\xff", 255, SizeofError) + common(FocusedSeq(this._.s, Const(b"MZ"), "num"/Byte, Terminated), b"MZ\xff", 255, SizeofError, s="num") + + d = FocusedSeq("missing", Pass) + assert raises(d.parse, b"") == UnboundLocalError + assert raises(d.build, {}) == UnboundLocalError + assert raises(d.sizeof) == 0 + d = FocusedSeq(this.missing, Pass) + assert raises(d.parse, b"") == KeyError + assert raises(d.build, {}) == KeyError + assert raises(d.sizeof) == 0 + +def test_pickled(): + import pickle + obj = [(), 1, 2.3, {}, [], bytes(1), ""] + data = pickle.dumps(obj) + common(Pickled, data, obj) + +def test_numpy(): + import numpy + obj = numpy.array([1,2,3], dtype=numpy.int64) + assert numpy.array_equal(Numpy.parse(Numpy.build(obj)), obj) + +@xfail(reason="docs stated that it throws StreamError, not true at all") +def test_numpy_error(): + import numpy, io + numpy.load(io.BytesIO(b"")) + +def test_namedtuple(): + coord = collections.namedtuple("coord", "x y z") + d = NamedTuple("coord", "x y z", Array(3, Byte)) + common(d, b"123", coord(49,50,51), 3) + d = NamedTuple("coord", "x y z", GreedyRange(Byte)) + common(d, b"123", coord(49,50,51), SizeofError) + d = NamedTuple("coord", "x y z", Struct("x"/Byte, "y"/Byte, "z"/Byte)) + common(d, b"123", coord(49,50,51), 3) + d = NamedTuple("coord", "x y z", Sequence(Byte, Byte, Byte)) + common(d, b"123", coord(49,50,51), 3) + + assert raises(lambda: NamedTuple("coord", "x y z", BitStruct("x"/Byte, "y"/Byte, "z"/Byte))) == NamedTupleError + +def test_timestamp(): + import arrow + d = Timestamp(Int64ub, 1, 1970) + common(d, b'\x00\x00\x00\x00ZIz\x00', arrow.Arrow(2018,1,1), 8) + d = Timestamp(Int64ub, 1, 1904) + common(d, b'\x00\x00\x00\x00\xd6o*\x80', arrow.Arrow(2018,1,1), 8) + d = Timestamp(Int64ub, 10**-7, 1600) + common(d, b'\x01\xd4\xa2.\x1a\xa8\x00\x00', arrow.Arrow(2018,1,1), 8) + d = Timestamp(Int32ub, "msdos", "msdos") + common(d, b'H9\x8c"', arrow.Arrow(2016,1,25,17,33,4), 4) + +def test_hex(): + d = Hex(Int32ub) + common(d, b"\x00\x00\x01\x02", 0x0102, 4) + obj = d.parse(b"\x00\x00\x01\x02") + assert str(obj) == "0x00000102" + assert str(obj) == "0x00000102" + + d = Hex(GreedyBytes) + common(d, b"\x00\x00\x01\x02", b"\x00\x00\x01\x02") + common(d, b"", b"") + obj = d.parse(b"\x00\x00\x01\x02") + assert str(obj) == "unhexlify('00000102')" + assert str(obj) == "unhexlify('00000102')" + + d = Hex(RawCopy(Int32ub)) + common(d, b"\x00\x00\x01\x02", dict(data=b"\x00\x00\x01\x02", value=0x0102, offset1=0, offset2=4, length=4), 4) + obj = d.parse(b"\x00\x00\x01\x02") + assert str(obj) == "unhexlify('00000102')" + assert str(obj) == "unhexlify('00000102')" + +def test_hexdump(): + d = HexDump(GreedyBytes) + common(d, b"abcdef", b"abcdef") + common(d, b"", b"") + obj = d.parse(b"\x00\x00\x01\x02") + repr = \ +'''hexundump(""" +0000 00 00 01 02 .... +""") +''' + pass + assert str(obj) == repr + assert str(obj) == repr + + d = HexDump(RawCopy(Int32ub)) + common(d, b"\x00\x00\x01\x02", dict(data=b"\x00\x00\x01\x02", value=0x0102, offset1=0, offset2=4, length=4), 4) + obj = d.parse(b"\x00\x00\x01\x02") + repr = \ +'''hexundump(""" +0000 00 00 01 02 .... +""") +''' + assert str(obj) == repr + assert str(obj) == repr + +def test_hexdump_regression_issue_188(): + # Hex HexDump were not inheriting subcon flags + d = Struct(Hex(Const(b"MZ"))) + assert d.parse(b"MZ") == Container() + assert d.build(dict()) == b"MZ" + d = Struct(HexDump(Const(b"MZ"))) + assert d.parse(b"MZ") == Container() + assert d.build(dict()) == b"MZ" + +def test_union(): + d = Union(None, "a"/Bytes(2), "b"/Int16ub) + assert d.parse(b"\x01\x02") == Container(a=b"\x01\x02", b=0x0102) + assert raises(Union(123, Pass).parse, b"") == KeyError + assert raises(Union("missing", Pass).parse, b"") == KeyError + assert d.build(dict(a=b"zz")) == b"zz" + assert d.build(dict(b=0x0102)) == b"\x01\x02" + assert raises(d.build, {}) == UnionError + + d = Union(None, "a"/Bytes(2), "b"/Int16ub, Pass) + assert d.build({}) == b"" + + # build skips parsefrom, invalid or not + assert raises(Union(123, Pass).build, {}) == b"" + assert raises(Union("missing", Pass).build, {}) == b"" + + assert raises(Union(None, Byte).sizeof) == SizeofError + assert raises(Union(None, VarInt).sizeof) == SizeofError + assert raises(Union(0, Byte, VarInt).sizeof) == SizeofError + assert raises(Union(1, Byte, VarInt).sizeof) == SizeofError + assert raises(Union(123, Pass).sizeof) == SizeofError + assert raises(Union("missing", Pass).sizeof) == SizeofError + assert raises(Union(this.missing, Pass).sizeof) == SizeofError + + # regression check, so first subcon is not parsefrom by accident + assert raises(Union, Byte, VarInt) == UnionError + +def test_union_kwctor(): + d = Union(None, a=Int8ub, b=Int16ub, c=Int32ub) + assert d.parse(b"\x01\x02\x03\x04") == Container(a=0x01,b=0x0102,c=0x01020304) + assert d.build(Container(c=0x01020304)) == b"\x01\x02\x03\x04" + +def test_union_issue_348(): + d = Union(None, + Int8=Prefixed(Int16ub, GreedyRange(Int8ub)), + Int16=Prefixed(Int16ub, GreedyRange(Int16ub)), + Int32=Prefixed(Int16ub, GreedyRange(Int32ub)), + ) + assert d.parse(b'\x00\x04\x11\x22\x33\x44') == {'Int16': [4386, 13124], 'Int32': [287454020], 'Int8': [17, 34, 51, 68]} + assert d.build(dict(Int16=[4386, 13124])) == b'\x00\x04\x11\x22\x33\x44' + assert d.build(dict(Int32=[287454020])) == b'\x00\x04\x11\x22\x33\x44' + +def test_select(): + d = Select(Int32ub, Int16ub, Int8ub) + common(d, b"\x00\x00\x00\x07", 7) + assert raises(Select(Int32ub, Int16ub).parse, b"") == SelectError + assert raises(Select(Byte).sizeof) == SizeofError + +def test_select_kwctor(): + d = Select(a=Int8ub, b=Int16ub, c=Int32ub) + assert d.parse(b"\x01\x02\x03\x04") == 0x01 + assert d.build(0x01020304) == b"\x01\x02\x03\x04" + +def test_optional(): + d = Optional(Int32ul) + assert d.parse(b"\x01\x00\x00\x00") == 1 + assert d.build(1) == b"\x01\x00\x00\x00" + assert d.parse(b"???") == None + assert d.parse(b"") == None + assert d.build(None) == b"" + assert raises(d.sizeof) == SizeofError + +def test_optional_in_struct_issue_747(): + d = Struct("field" / Optional(Int32ul)) + assert d.parse(b"\x01\x00\x00\x00") == {"field": 1} + assert d.build({"field": 1}) == b"\x01\x00\x00\x00" + assert d.parse(b"???") == {"field": None} + assert d.build({"field": None}) == b"" + assert d.parse(b"") == {"field": None} + assert raises(d.sizeof) == SizeofError + +def test_optional_in_bit_struct_issue_747(): + d = BitStruct("field" / Optional(Octet)) + assert d.parse(b"\x01") == {"field": 1} + assert d.build({"field": 1}) == b"\x01" + assert d.parse(b"???") == {"field": ord("?")} + assert d.build({"field": None}) == b"" + assert d.parse(b"") == {"field": None} + assert raises(d.sizeof) == SizeofError + +def test_select_buildfromnone_issue_747(): + d = Struct("select" / Select(Int32ub, Default(Bytes(3), b"abc"))) + assert d.parse(b"def") == dict(select=b"def") + assert d.parse(b"\x01\x02\x03\x04") == dict(select=0x01020304) + assert d.build(dict(select=b"def")) == b"def" + assert d.build(dict(select=0xbeefcace)) == b"\xbe\xef\xca\xce" + assert d.build(dict()) == b"abc" + + d = Struct("opt" / Optional(Byte)) + assert d.build(dict(opt=1)) == b"\x01" + assert d.build(dict()) == b"" + +def test_if(): + common(If(True, Byte), b"\x01", 1, 1) + common(If(False, Byte), b"", None, 0) + +def test_ifthenelse(): + common(IfThenElse(True, Int8ub, Int16ub), b"\x01", 1, 1) + common(IfThenElse(False, Int8ub, Int16ub), b"\x00\x01", 1, 2) + +def test_switch(): + d = Switch(this.x, {1:Int8ub, 2:Int16ub, 4:Int32ub}) + common(d, b"\x01", 0x01, 1, x=1) + common(d, b"\x01\x02", 0x0102, 2, x=2) + assert d.parse(b"", x=255) == None + assert d.build(None, x=255) == b"" + assert raises(d.sizeof) == SizeofError + assert raises(d.sizeof, x=1) == 1 + + d = Switch(this.x, {}, default=Byte) + common(d, b"\x01", 1, 1, x=255) + +def test_switch_issue_357(): + inner = Struct( + "computed" / Computed(4), + ) + inner2 = Struct( + "computed" / Computed(7), + ) + st1 = Struct( + "a" / inner, + "b" / Switch(5, {1: inner2}, inner), + Probe(), + ) + st2 = Struct( + "a" / inner, + "b" / Switch(5, {}, inner), + Probe(), + ) + assert st1.parse(b"") == st2.parse(b"") + +def test_stopif(): + d = Struct("x"/Byte, StopIf(this.x == 0), "y"/Byte) + common(d, b"\x00", Container(x=0)) + common(d, b"\x01\x02", Container(x=1,y=2)) + + d = Sequence("x"/Byte, StopIf(this.x == 0), "y"/Byte) + common(d, b"\x01\x02", [1,None,2]) + + d = GreedyRange(FocusedSeq("x", "x"/Byte, StopIf(this.x == 0))) + assert d.parse(b"\x01\x00?????") == [1] + assert d.build([]) == b"" + assert d.build([0]) == b"\x00" + assert d.build([1]) == b"\x01" + assert d.build([1,0,2]) == b"\x01\x00" + +def test_padding(): + common(Padding(4), b"\x00\x00\x00\x00", None, 4) + assert raises(Padding, 4, pattern=b"?????") == PaddingError + assert raises(Padding, 4, pattern=u"?") == PaddingError + +def test_padded(): + common(Padded(4, Byte), b"\x01\x00\x00\x00", 1, 4) + assert raises(Padded, 4, Byte, pattern=b"?????") == PaddingError + assert raises(Padded, 4, Byte, pattern=u"?") == PaddingError + assert Padded(4, VarInt).sizeof() == 4 + assert Padded(4, Byte[this.missing]).sizeof() == 4 + +def test_aligned(): + common(Aligned(4, Byte), b"\x01\x00\x00\x00", 1, 4) + common(Struct("a"/Aligned(4, Byte), "b"/Byte), b"\x01\x00\x00\x00\x02", Container(a=1, b=2), 5) + assert Aligned(4, Int8ub).build(1) == b"\x01\x00\x00\x00" + assert Aligned(4, Int16ub).build(1) == b"\x00\x01\x00\x00" + assert Aligned(4, Int32ub).build(1) == b"\x00\x00\x00\x01" + assert Aligned(4, Int64ub).build(1) == b"\x00\x00\x00\x00\x00\x00\x00\x01" + d = Aligned(this.m, Byte) + common(d, b"\xff\x00", 255, 2, m=2) + assert raises(d.sizeof) == SizeofError + assert raises(d.sizeof, m=2) == 2 + +def test_alignedstruct(): + d = AlignedStruct(4, "a"/Int8ub, "b"/Int16ub) + common(d, b"\x01\x00\x00\x00\x00\x05\x00\x00", Container(a=1, b=5), 8) + +def test_bitstruct(): + d = BitStruct("a"/BitsInteger(3), "b"/Flag, Padding(3), "c"/Nibble, "d"/BitsInteger(5)) + common(d, b"\xe1\x1f", Container(a=7, b=False, c=8, d=31), 2) + d = BitStruct("a"/BitsInteger(3), "b"/Flag, Padding(3), "c"/Nibble, "sub"/Struct("d"/Nibble, "e"/Bit)) + common(d, b"\xe1\x1f", Container(a=7, b=False, c=8, sub=Container(d=15, e=1)), 2) + +def test_pointer(): + common(Pointer(2, Byte), b"\x00\x00\x07", 7, 0) + common(Pointer(lambda ctx: 2, Byte), b"\x00\x00\x07", 7, 0) + + d = Struct( + 'inner' / Struct(), + 'x' / Pointer(0, Byte, stream=this.inner._io), + ) + assert d.parse(bytes(20)) == Container(inner=Container(), x=0) + + d = Struct ( + 'dummy' / Byte, + 'pointer' / Pointer(1, Byte, relativeOffset=True), + ) + + common(d, b"\xde\x00\xad", Container(dummy=0xde, pointer=0xad), 1) + + d = Struct ( + 'dummy' / Byte, + 'pointer' / Pointer(0, Byte, relativeOffset=True), + ) + + common(d, b"\xde\xad", Container(dummy=0xde, pointer=0xad), 1) + + d = Struct ( + 'dummy' / Byte, + 'pointer' / Pointer(-1, Byte, relativeOffset=True), + ) + + common(d, b"\xde", Container(dummy=0xde, pointer=0xde), 1) + +def test_peek(): + d = Peek(Int8ub) + assert d.parse(b"\x01") == 1 + assert d.parse(b"") == None + assert d.build(1) == b"" + assert d.build(None) == b"" + assert d.sizeof() == 0 + d = Peek(VarInt) + assert d.sizeof() == 0 + + d = Struct("a"/Peek(Int8ub), "b"/Int16ub) + common(d, b"\x01\x02", Container(a=0x01, b=0x0102), 2) + +def test_offsettedend(): + d = Struct( + "header" / Bytes(2), + "data" / OffsettedEnd(-2, GreedyBytes), + "footer" / Bytes(2), + ) + common(d, b"\x01\x02\x03\x04\x05\x06\x07", Container(header=b'\x01\x02', data=b'\x03\x04\x05', footer=b'\x06\x07')) + + d = OffsettedEnd(0, Byte) + assert raises(d.sizeof) == SizeofError + +def test_seek(): + d = Seek(5) + assert d.parse(b"") == 5 + assert d.build(None) == b"" + assert (d >> Byte).parse(b"01234x") == [5,120] + assert (d >> Byte).build([5,255]) == b"\x00\x00\x00\x00\x00\xff" + assert (Bytes(10) >> d >> Byte).parse(b"0123456789") == [b"0123456789",5,ord('5')] + assert (Bytes(10) >> d >> Byte).build([b"0123456789",None,255]) == b"01234\xff6789" + assert Struct("data"/Bytes(10), d, "addin"/Byte).parse(b"0123456789") == Container(data=b"0123456789", addin=53) + assert Struct("data"/Bytes(10), d, "addin"/Byte).build(dict(data=b"0123456789",addin=53)) == b"01234\x356789" + assert (Seek(10,1) >> Seek(-5,1) >> Bytes(1)).parse(b"0123456789") == [10,5,b"5"] + assert (Seek(10,1) >> Seek(-5,1) >> Bytes(1)).build([None,None,255]) == b"\x00\x00\x00\x00\x00\xff" + assert raises(d.sizeof) == SizeofError + +def test_tell(): + d = Tell + assert d.parse(b"") == 0 + assert d.build(None) == b"" + assert d.sizeof() == 0 + d = Struct("a"/Tell, "b"/Byte, "c"/Tell) + assert d.parse(b"\xff") == Container(a=0, b=255, c=1) + assert d.build(Container(a=0, b=255, c=1)) == b"\xff" + assert d.build(dict(b=255)) == b"\xff" + +def test_pass(): + common(Pass, b"", None, 0) + common(Struct("empty"/Pass), b"", Container(empty=None), 0) + +def test_terminated(): + common(Terminated, b"", None, SizeofError) + common(Struct(Terminated), b"", Container(), SizeofError) + common(BitStruct(Terminated), b"", Container(), SizeofError) + assert raises(Terminated.parse, b"x") == TerminatedError + assert raises(Struct(Terminated).parse, b"x") == TerminatedError + assert raises(BitStruct(Terminated).parse, b"x") == TerminatedError + +def test_rawcopy(): + d = RawCopy(Byte) + assert d.parse(b"\xff") == dict(data=b"\xff", value=255, offset1=0, offset2=1, length=1) + assert d.build(dict(data=b"\xff")) == b"\xff" + assert d.build(dict(value=255)) == b"\xff" + assert d.sizeof() == 1 + d = RawCopy(Padding(1)) + assert d.build(None) == b'\x00' + +def test_rawcopy_issue_289(): + # When you build from a full dict that has all the keys, the if data kicks in, and replaces the context entry with a subset of a dict it had to begin with. + d = Struct( + "raw" / RawCopy(Struct("x"/Byte, "len"/Byte)), + "array" / Byte[this.raw.value.len], + ) + print(d.parse(b"\x01\x02\xff\x00")) + print(d.build(dict(raw=dict(value=dict(x=1, len=2)), array=[0xff, 0x01]))) + print(d.build(d.parse(b"\x01\x02\xff\x00"))) + # this is not buildable, array is not passed and cannot be deduced from raw data + # print(d.build(dict(raw=dict(data=b"\x01\x02\xff\x00")))) + +def test_rawcopy_issue_358(): + # RawCopy overwritten context value with subcon return obj regardless of None + d = Struct("a"/RawCopy(Byte), "check"/Check(this.a.value == 255)) + assert d.build(dict(a=dict(value=255))) == b"\xff" + +def test_rawcopy_issue_888(): + # If you use build_file() on a RawCopy that has only a value defined, then + # RawCopy._build may also attempt to read from the file, which won't work + # if build_file opened the file for writing only. + d = RawCopy(Byte) + d.build_file(dict(value=0), filename="example_888") + +def test_byteswapped(): + d = ByteSwapped(Bytes(5)) + common(d, b"12345", b"54321", 5) + d = ByteSwapped(Struct("a"/Byte, "b"/Byte)) + common(d, b"\x01\x02", Container(a=2, b=1), 2) + +def test_byteswapped_from_issue_70(): + d = ByteSwapped(BitStruct("flag1"/Bit, "flag2"/Bit, Padding(2), "number"/BitsInteger(16), Padding(4))) + assert d.parse(b'\xd0\xbc\xfa') == Container(flag1=1, flag2=1, number=0xabcd) + d = BitStruct("flag1"/Bit, "flag2"/Bit, Padding(2), "number"/BitsInteger(16), Padding(4)) + assert d.parse(b'\xfa\xbc\xd1') == Container(flag1=1, flag2=1, number=0xabcd) + +def test_bitsswapped(): + d = BitsSwapped(Bytes(2)) + common(d, b"\x0f\x01", b"\xf0\x80", 2) + d = Bitwise(Bytes(8)) + common(d, b"\xf2", b'\x01\x01\x01\x01\x00\x00\x01\x00', 1) + d = BitsSwapped(Bitwise(Bytes(8))) + common(d, b"\xf2", b'\x00\x01\x00\x00\x01\x01\x01\x01', 1) + d = BitStruct("a"/Nibble, "b"/Nibble) + common(d, b"\xf1", Container(a=15, b=1), 1) + d = BitsSwapped(BitStruct("a"/Nibble, "b"/Nibble)) + common(d, b"\xf1", Container(a=8, b=15), 1) + +def test_prefixed(): + d = Prefixed(Byte, Int16ul) + assert d.parse(b"\x02\xff\xff??????") == 65535 + assert d.build(65535) == b"\x02\xff\xff" + assert d.sizeof() == 3 + d = Prefixed(VarInt, GreedyBytes) + assert d.parse(b"\x03abc??????") == b"abc" + assert d.build(b"abc") == b'\x03abc' + assert raises(d.sizeof) == SizeofError + d = Prefixed(Byte, Sequence(Peek(Byte), Int16ub, GreedyBytes)) + assert d.parse(b"\x02\x00\xff????????") == [0,255,b''] + + d = Prefixed(Byte, GreedyBytes) + common(d, b"\x0a"+bytes(10), bytes(10), SizeofError) + d = Prefixed(Byte, GreedyString("utf-8")) + common(d, b"\x0a"+bytes(10), u"\x00"*10, SizeofError) + +def test_prefixedarray(): + common(PrefixedArray(Byte,Byte), b"\x02\x0a\x0b", [10,11], SizeofError) + assert PrefixedArray(Byte, Byte).parse(b"\x03\x01\x02\x03") == [1,2,3] + assert PrefixedArray(Byte, Byte).parse(b"\x00") == [] + assert PrefixedArray(Byte, Byte).build([1,2,3]) == b"\x03\x01\x02\x03" + assert raises(PrefixedArray(Byte, Byte).parse, b"") == StreamError + assert raises(PrefixedArray(Byte, Byte).parse, b"\x03\x01") == StreamError + assert raises(PrefixedArray(Byte, Byte).sizeof) == SizeofError + +def test_fixedsized(): + d = FixedSized(10, Byte) + common(d, b'\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00', 255, 10) + d = FixedSized(-255, Byte) + assert raises(d.parse, bytes(10)) == PaddingError + assert raises(d.build, 0) == PaddingError + assert raises(d.sizeof) == PaddingError + d = FixedSized(10, GreedyBytes) + common(d, bytes(10), bytes(10), 10) + d = FixedSized(10, GreedyString("utf-8")) + common(d, bytes(10), u"\x00"*10, 10) + +def test_nullterminated(): + d = NullTerminated(Byte) + common(d, b'\xff\x00', 255, SizeofError) + d = NullTerminated(GreedyBytes, include=True) + assert d.parse(b'\xff\x00') == b'\xff\x00' + d = NullTerminated(GreedyBytes, include=False) + assert d.parse(b'\xff\x00') == b'\xff' + d = NullTerminated(GreedyBytes, consume=True) >> GreedyBytes + assert d.parse(b'\xff\x00') == [b'\xff', b''] + d = NullTerminated(GreedyBytes, consume=False) >> GreedyBytes + assert d.parse(b'\xff\x00') == [b'\xff', b'\x00'] + d = NullTerminated(GreedyBytes, require=True) + assert raises(d.parse, b'\xff') == StreamError + d = NullTerminated(GreedyBytes, require=False) + assert d.parse(b'\xff') == b'\xff' + d = NullTerminated(GreedyBytes) + common(d, bytes(1), b"", SizeofError) + d = NullTerminated(GreedyString("utf-8")) + common(d, bytes(1), u"", SizeofError) + d = NullTerminated(GreedyBytes, term=bytes(2)) + common(d, b"\x01\x00\x00\x02\x00\x00", b"\x01\x00\x00\x02", SizeofError) + +def test_nullstripped(): + d = NullStripped(GreedyBytes) + common(d, b'\xff', b'\xff', SizeofError) + assert d.parse(b'\xff\x00\x00') == b'\xff' + assert d.build(b'\xff') == b'\xff' + d = NullStripped(GreedyBytes, pad=b'\x05') + common(d, b'\xff', b'\xff', SizeofError) + assert d.parse(b'\xff\x05\x05') == b'\xff' + assert d.build(b'\xff') == b'\xff' + d = NullStripped(GreedyString("utf-8")) + assert d.parse(bytes(10)) == u"" + assert d.build(u"") == b"" + d = NullStripped(GreedyBytes, pad=bytes(2)) + assert d.parse(bytes(10)) == b"" + assert d.parse(bytes(11)) == b"" + +def test_restreamdata(): + d = RestreamData(b"\x01", Int8ub) + common(d, b"", 1, 0) + d = RestreamData(b"", Padding(1)) + assert d.build(None) == b'' + + d = RestreamData(io.BytesIO(b"\x01\x02"), Int16ub) + assert d.parse(b"\x01\x02\x00") == 0x0102 + assert d.build(None) == b'' + + d = RestreamData(NullTerminated(GreedyBytes), Int16ub) + assert d.parse(b"\x01\x02\x00") == 0x0102 + assert d.build(None) == b'' + + d = RestreamData(FixedSized(2, GreedyBytes), Int16ub) + assert d.parse(b"\x01\x02\x00") == 0x0102 + assert d.build(None) == b'' + +@xfail(reason="unknown, either StreamError or KeyError due to this.entire or this._.entire") +def test_restreamdata_issue_701(): + d = Struct( + 'entire' / GreedyBytes, + 'ac' / RestreamData(this.entire, Struct( + 'a' / Byte, + Bytes(len_(this._.entire)-1), + 'c' / Byte, + )), + ) + # StreamError: stream read less then specified amount, expected 1, found 0 + assert d.parse(b'\x01GGGGGGGGGG\x02') == Container(entire=b'\x01GGGGGGGGGG\x02', ac=Container(a=1,b=2)) + + d = FocusedSeq('ac' + 'entire' / GreedyBytes, + 'ac' / RestreamData(this.entire, Struct( + 'a' / Byte, + Bytes(len_(this._.entire)-1), + 'c' / Byte, + )), + ) + # KeyError: 'entire' + assert d.parse(b'\x01GGGGGGGGGG\x02') == Container(a=1,b=2) + +def test_transformed(): + d = Transformed(Bytes(16), bytes2bits, 2, bits2bytes, 2) + common(d, bytes(2), bytes(16), 2) + d = Transformed(GreedyBytes, bytes2bits, None, bits2bytes, None) + common(d, bytes(2), bytes(16), SizeofError) + d = Transformed(GreedyString("utf-8"), bytes2bits, None, bits2bytes, None) + common(d, bytes(2), u"\x00"*16, SizeofError) + +def test_transformed_issue_676(): + d = Struct( + 'inner1' / BitStruct( + 'a' / Default(BitsInteger(8), 0), + ), + 'inner2' / BitStruct( + 'a' / Default(BitsInteger(lambda this: 8), 0), + ), + Probe(), + Check(this.inner1.a == 0), + Check(this.inner2.a == 0), + ) + d.build({}) + +def test_restreamed(): + d = Restreamed(Int16ub, ident, 1, ident, 1, ident) + common(d, b"\x00\x01", 1, 2) + d = Restreamed(VarInt, ident, 1, ident, 1, ident) + assert raises(d.sizeof) == SizeofError + d = Restreamed(Bytes(2), lambda b: b*2, 1, lambda b: b[0:1], 1, lambda n: n*2) + common(d, b"aa", b"aa", 4) + +def test_restreamed_partial_read(): + d = Restreamed(Bytes(255), ident, 1, ident, 1, ident) + assert raises(d.parse, b"") == StreamError + +def test_processxor(): + d = ProcessXor(0, Int16ub) + common(d, b"\xf0\x0f", 0xf00f, 2) + d = ProcessXor(0xf0, Int16ub) + common(d, b"\x00\xff", 0xf00f, 2) + d = ProcessXor(bytes(10), Int16ub) + common(d, b"\xf0\x0f", 0xf00f, 2) + d = ProcessXor(b"\xf0\xf0\xf0\xf0\xf0", Int16ub) + common(d, b"\x00\xff", 0xf00f, 2) + + d = ProcessXor(0xf0, GreedyBytes) + common(d, b"\x00\xff", b"\xf0\x0f", SizeofError) + d = ProcessXor(b"\xf0\xf0\xf0\xf0\xf0", GreedyBytes) + common(d, b"\x00\xff", b"\xf0\x0f", SizeofError) + d = ProcessXor(b"X", GreedyString("utf-8")) + common(d, b"\x00", u"X", SizeofError) + d = ProcessXor(b"XXXXX", GreedyString("utf-8")) + common(d, b"\x00", u"X", SizeofError) + +def test_processrotateleft(): + d = ProcessRotateLeft(0, 1, GreedyBytes) + common(d, bytes(10), bytes(10)) + d = ProcessRotateLeft(0, 2, GreedyBytes) + common(d, bytes(10), bytes(10)) + d = ProcessRotateLeft(4, 1, GreedyBytes) + common(d, b'\x0f\xf0', b'\xf0\x0f') + d = ProcessRotateLeft(4, 2, GreedyBytes) + common(d, b'\x0f\xf0', b'\xff\x00') + +def test_checksum(): + d = Struct( + "fields" / RawCopy(Struct( + "a" / Byte, + "b" / Byte, + )), + "checksum" / Checksum(Bytes(64), lambda data: hashlib.sha512(data).digest(), this.fields.data), + ) + + c = hashlib.sha512(b"\x01\x02").digest() + assert d.parse(b"\x01\x02"+c) == Container(fields=dict(data=b"\x01\x02", value=Container(a=1, b=2), offset1=0, offset2=2, length=2), checksum=c) + assert d.build(dict(fields=dict(data=b"\x01\x02"))) == b"\x01\x02"+c + assert d.build(dict(fields=dict(value=dict(a=1,b=2)))) == b"\x01\x02"+c + +def test_checksum_nonbytes_issue_323(): + d = Struct( + "vals" / Byte[2], + "checksum" / Checksum(Byte, lambda vals: sum(vals) & 0xFF, this.vals), + ) + assert d.parse(b"\x00\x00\x00") == Container(vals=[0, 0], checksum=0) + assert raises(d.parse, b"\x00\x00\x01") == ChecksumError + +def test_compressed_zlib(): + zeros = bytes(10000) + d = Compressed(GreedyBytes, "zlib") + assert d.parse(d.build(zeros)) == zeros + assert len(d.build(zeros)) < 50 + assert raises(d.sizeof) == SizeofError + d = Compressed(GreedyBytes, "zlib", level=9) + assert d.parse(d.build(zeros)) == zeros + assert len(d.build(zeros)) < 50 + assert raises(d.sizeof) == SizeofError + +def test_compressed_gzip(): + zeros = bytes(10000) + d = Compressed(GreedyBytes, "gzip") + assert d.parse(d.build(zeros)) == zeros + assert len(d.build(zeros)) < 50 + assert raises(d.sizeof) == SizeofError + d = Compressed(GreedyBytes, "gzip", level=9) + assert d.parse(d.build(zeros)) == zeros + assert len(d.build(zeros)) < 50 + assert raises(d.sizeof) == SizeofError + +def test_compressed_bzip2(): + zeros = bytes(10000) + d = Compressed(GreedyBytes, "bzip2") + assert d.parse(d.build(zeros)) == zeros + assert len(d.build(zeros)) < 50 + assert raises(d.sizeof) == SizeofError + d = Compressed(GreedyBytes, "bzip2", level=9) + assert d.parse(d.build(zeros)) == zeros + assert len(d.build(zeros)) < 50 + assert raises(d.sizeof) == SizeofError + +def test_compressed_lzma(): + zeros = bytes(10000) + d = Compressed(GreedyBytes, "lzma") + assert d.parse(d.build(zeros)) == zeros + assert len(d.build(zeros)) < 200 + assert raises(d.sizeof) == SizeofError + d = Compressed(GreedyBytes, "lzma", level=9) + assert d.parse(d.build(zeros)) == zeros + assert len(d.build(zeros)) < 200 + assert raises(d.sizeof) == SizeofError + +def test_compressed_prefixed(): + zeros = bytes(10000) + d = Prefixed(VarInt, Compressed(GreedyBytes, "zlib")) + st = Struct("one"/d, "two"/d) + assert st.parse(st.build(Container(one=zeros,two=zeros))) == Container(one=zeros,two=zeros) + assert raises(d.sizeof) == SizeofError + +def test_compressedlz4(): + zeros = bytes(10000) + d = CompressedLZ4(GreedyBytes) + assert d.parse(d.build(zeros)) == zeros + assert len(d.build(zeros)) < 100 + assert raises(d.sizeof) == SizeofError + +@xfail(ONWINDOWS and PYPY, reason="no wheel for 'cryptography' is currently available for pypy on windows") +def test_encryptedsym(): + from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + key128 = b"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + key256 = b"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + iv = b"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + nonce = iv + + # AES 128/256 bit - ECB + d = EncryptedSym(GreedyBytes, lambda ctx: Cipher(algorithms.AES(ctx.key), modes.ECB())) + common(d, b"\xf4\x0f\x54\xb7\x6a\x7a\xf1\xdb\x92\x73\x14\xde\x2f\xa0\x3e\x2d", b'Secret Message..', key=key128, iv=iv) + common(d, b"\x82\x6b\x01\x82\x90\x02\xa1\x9e\x35\x0a\xe2\xc3\xee\x1a\x42\xf5", b'Secret Message..', key=key256, iv=iv) + + # AES 128/256 bit - CBC + d = EncryptedSym(GreedyBytes, lambda ctx: Cipher(algorithms.AES(ctx.key), modes.CBC(ctx.iv))) + common(d, b"\xba\x79\xc2\x62\x22\x08\x29\xb9\xfb\xd3\x90\xc4\x04\xb7\x55\x87", b'Secret Message..', key=key128, iv=iv) + common(d, b"\x60\xc2\x45\x0d\x7e\x41\xd4\xf8\x85\xd4\x8a\x64\xd1\x45\x49\xe3", b'Secret Message..', key=key256, iv=iv) + + # AES 128/256 bit - CTR + d = EncryptedSym(GreedyBytes, lambda ctx: Cipher(algorithms.AES(ctx.key), modes.CTR(ctx.nonce))) + common(d, b"\x80\x78\xb6\x0c\x07\xf5\x0c\x90\xce\xa2\xbf\xcb\x5b\x22\xb9\xb5", b'Secret Message..', key=key128, nonce=nonce) + common(d, b"\x6a\xae\x7b\x86\x1a\xa6\xe0\x6a\x49\x02\x02\x1b\xf2\x3c\xd8\x0d", b'Secret Message..', key=key256, nonce=nonce) + + assert raises(EncryptedSym(GreedyBytes, "AES").build, b"") == CipherError + assert raises(EncryptedSym(GreedyBytes, "AES").parse, b"") == CipherError + +@xfail(ONWINDOWS and PYPY, reason="no wheel for 'cryptography' is currently available for pypy on windows") +def test_encryptedsym_cbc_example(): + from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + d = Struct( + "iv" / Default(Bytes(16), os.urandom(16)), + "enc_data" / EncryptedSym( + Aligned(16, + Struct( + "width" / Int16ul, + "height" / Int16ul + ) + ), + lambda ctx: Cipher(algorithms.AES(ctx._.key), modes.CBC(ctx.iv)) + ) + ) + key128 = b"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + byts = d.build({"enc_data": {"width": 5, "height": 4}}, key=key128) + obj = d.parse(byts, key=key128) + assert obj.enc_data == Container(width=5, height=4) + +@xfail(ONWINDOWS and PYPY, reason="no wheel for 'cryptography' is currently available for pypy on windows") +def test_encryptedsymaead(): + from cryptography.hazmat.primitives.ciphers import aead + key128 = b"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + key256 = b"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + nonce = b"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + + # AES 128/256 bit - GCM + d = Struct( + "associated_data" / Bytes(21), + "data" / EncryptedSymAead( + GreedyBytes, + lambda ctx: aead.AESGCM(ctx._.key), + this._.nonce, + this.associated_data + ) + ) + common( + d, + b"This is authenticated\xb6\xd3\x64\x0c\x7a\x31\xaa\x16\xa3\x58\xec\x17\x39\x99\x2e\xf8\x4e\x41\x17\x76\x3f\xd1\x06\x47\x04\x9f\x42\x1c\xf4\xa9\xfd\x99\x9c\xe9", + Container(associated_data=b"This is authenticated", data=b"The secret message"), + key=key128, + nonce=nonce + ) + common( + d, + b"This is authenticated\xde\xb4\x41\x79\xc8\x7f\xea\x8d\x0e\x41\xf6\x44\x2f\x93\x21\xe6\x37\xd1\xd3\x29\xa4\x97\xc3\xb5\xf4\x81\x72\xa1\x7f\x3b\x9b\x53\x24\xe4", + Container(associated_data=b"This is authenticated", data=b"The secret message"), + key=key256, + nonce=nonce + ) + assert raises(EncryptedSymAead(GreedyBytes, "AESGCM", bytes(16)).build, b"") == CipherError + assert raises(EncryptedSymAead(GreedyBytes, "AESGCM", bytes(16)).parse, b"") == CipherError + +@xfail(ONWINDOWS and PYPY, reason="no wheel for 'cryptography' is currently available for pypy on windows") +def test_encryptedsymaead_gcm_example(): + from cryptography.hazmat.primitives.ciphers import aead + d = Struct( + "nonce" / Default(Bytes(16), os.urandom(16)), + "associated_data" / Bytes(21), + "enc_data" / EncryptedSymAead( + GreedyBytes, + lambda ctx: aead.AESGCM(ctx._.key), + this.nonce, + this.associated_data + ) + ) + key128 = b"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + byts = d.build({"associated_data": b"This is authenticated", "enc_data": b"The secret message"}, key=key128) + obj = d.parse(byts, key=key128) + assert obj.enc_data == b"The secret message" + assert obj.associated_data == b"This is authenticated" + +def test_rebuffered(): + data = b"0" * 1000 + assert Rebuffered(Array(1000,Byte)).parse_stream(io.BytesIO(data)) == [48]*1000 + assert Rebuffered(Array(1000,Byte), tailcutoff=50).parse_stream(io.BytesIO(data)) == [48]*1000 + assert Rebuffered(Byte).sizeof() == 1 + assert raises(Rebuffered(Byte).sizeof) == 1 + assert raises(Rebuffered(VarInt).sizeof) == SizeofError + +def test_lazy(): + d = Struct( + 'dup' / Lazy(Computed(this.exists)), + 'exists' / Computed(1), + ) + obj = d.parse(b'') + assert obj.dup() == 1 + + d = Lazy(Byte) + x = d.parse(b'\x00') + assert x() == 0 + assert d.build(0) == b'\x00' + assert d.build(x) == b'\x00' + assert d.sizeof() == 1 + +def test_lazy_issue_938(): + d = Lazy(Prefixed(Byte, Byte)) + func = d.parse(b'\x01\x02') + assert func() == 2 + +def test_lazy_seek(): + d = Struct( + "a" / Int8ub, + "b" / Lazy(Bytes(2)), + "c" / Int16ub, + "d" / Lazy(Bytes(4)) + ) + obj = d.parse(b"\x01\x02\x03\x04\x05\x06\x07\x08\x09") + + assert obj.a == 0x01 + assert obj.b() == b'\x02\x03' + assert obj.c == 0x0405 + assert obj.d() == b'\x06\x07\x08\x09' + +def test_lazystruct(): + d = LazyStruct( + "num1" / Int8ub, + "num2" / BytesInteger(1), + "prefixed1" / Prefixed(Byte, Byte), + "prefixed2" / Prefixed(Byte, Byte, includelength=True), + "prefixedarray" / PrefixedArray(Byte, Byte), + ) + obj = d.parse(b"\x00\x00\x01\x00\x02\x00\x01\x00") + assert obj.num1 == obj["num1"] == obj[0] == 0 + assert obj.num2 == obj["num2"] == obj[1] == 0 + assert obj.prefixed1 == obj["prefixed1"] == obj[2] == 0 + assert obj.prefixed2 == obj["prefixed2"] == obj[3] == 0 + assert obj.prefixedarray == obj["prefixedarray"] == obj[4] == [0] + assert len(obj) == 5 + assert list(obj.keys()) == ['num1', 'num2', 'prefixed1', 'prefixed2', 'prefixedarray'] + assert list(obj.values()) == [0, 0, 0, 0, [0]] + assert list(obj.items()) == [('num1', 0), ('num2', 0), ('prefixed1', 0), ('prefixed2', 0), ('prefixedarray', [0])] + assert repr(obj) == "" + assert str(obj) == "" + assert d.build(obj) == b"\x00\x00\x01\x00\x02\x00\x01\x00" + assert d.build(Container(obj)) == b"\x00\x00\x01\x00\x02\x00\x01\x00" + assert raises(d.sizeof) == SizeofError + +def test_lazyarray(): + d = LazyArray(5, Int8ub) + obj = d.parse(b"\x00\x01\x02\x03\x04") + assert repr(obj) == "" + for i in range(5): + assert obj[i] == i + assert obj[:] == [0,1,2,3,4] + assert obj == [0,1,2,3,4] + assert list(obj) == [0,1,2,3,4] + assert len(obj) == 5 + assert repr(obj) == "" + assert str(obj) == "" + assert d.build([0,1,2,3,4]) == b"\x00\x01\x02\x03\x04" + assert d.build(ListContainer([0,1,2,3,4])) == b"\x00\x01\x02\x03\x04" + assert d.build(obj) == b"\x00\x01\x02\x03\x04" + assert d.build(obj[:]) == b"\x00\x01\x02\x03\x04" + assert d.sizeof() == 5 + + d = LazyArray(5, VarInt) + obj = d.parse(b"\x00\x01\x02\x03\x04") + assert repr(obj) == "" + for i in range(5): + assert obj[i] == i + assert obj[:] == [0,1,2,3,4] + assert obj == [0,1,2,3,4] + assert list(obj) == [0,1,2,3,4] + assert len(obj) == 5 + assert repr(obj) == "" + assert str(obj) == "" + assert d.build([0,1,2,3,4]) == b"\x00\x01\x02\x03\x04" + assert d.build(ListContainer([0,1,2,3,4])) == b"\x00\x01\x02\x03\x04" + assert d.build(obj) == b"\x00\x01\x02\x03\x04" + assert d.build(obj[:]) == b"\x00\x01\x02\x03\x04" + assert raises(d.sizeof) == SizeofError + +def test_lazybound(): + d = LazyBound(lambda: Byte) + common(d, b"\x01", 1) + + d = Struct( + "value" / Byte, + "next" / If(this.value > 0, LazyBound(lambda: d)), + ) + common(d, b"\x05\x09\x00", Container(value=5, next=Container(value=9, next=Container(value=0, next=None)))) + + d = Struct( + "value" / Byte, + "next" / GreedyBytes, + ) + data = b"\x05\x09\x00" + while data: + x = d.parse(data) + data = x.next + print(x) + +def test_expradapter(): + MulDiv = ExprAdapter(Byte, obj_ * 7, obj_ // 7) + assert MulDiv.parse(b"\x06") == 42 + assert MulDiv.build(42) == b"\x06" + assert MulDiv.sizeof() == 1 + + Ident = ExprAdapter(Byte, obj_-1, obj_+1) + assert Ident.parse(b"\x02") == 1 + assert Ident.build(1) == b"\x02" + assert Ident.sizeof() == 1 + +def test_exprsymmetricadapter(): + pass + +def test_exprvalidator(): + One = ExprValidator(Byte, lambda obj,ctx: obj in [1,3,5]) + assert One.parse(b"\x01") == 1 + assert raises(One.parse, b"\xff") == ValidationError + assert One.build(5) == b"\x05" + assert raises(One.build, 255) == ValidationError + assert One.sizeof() == 1 + +def test_ipaddress_adapter_issue_95(): + class IpAddressAdapter(Adapter): + def _encode(self, obj, context, path): + return list(map(int, obj.split("."))) + def _decode(self, obj, context, path): + return "{0}.{1}.{2}.{3}".format(*obj) + IpAddress = IpAddressAdapter(Byte[4]) + + assert IpAddress.parse(b"\x7f\x80\x81\x82") == "127.128.129.130" + assert IpAddress.build("127.1.2.3") == b"\x7f\x01\x02\x03" + assert IpAddress.sizeof() == 4 + + IpAddress = ExprAdapter(Byte[4], + encoder = lambda obj,ctx: list(map(int, obj.split("."))), + decoder = lambda obj,ctx: "{0}.{1}.{2}.{3}".format(*obj), ) + + assert IpAddress.parse(b"\x7f\x80\x81\x82") == "127.128.129.130" + assert IpAddress.build("127.1.2.3") == b"\x7f\x01\x02\x03" + assert IpAddress.sizeof() == 4 + +def test_oneof(): + assert OneOf(Byte,[4,5,6,7]).parse(b"\x05") == 5 + assert OneOf(Byte,[4,5,6,7]).build(5) == b"\x05" + assert raises(OneOf(Byte,[4,5,6,7]).parse, b"\x08") == ValidationError + assert raises(OneOf(Byte,[4,5,6,7]).build, 8) == ValidationError + +def test_noneof(): + assert NoneOf(Byte,[4,5,6,7]).parse(b"\x08") == 8 + assert raises(NoneOf(Byte,[4,5,6,7]).parse, b"\x06") == ValidationError + +def test_filter(): + d = Filter(obj_ != 0, GreedyRange(Byte)) + assert d.parse(b"\x00\x02\x00") == [2] + assert d.build([0,1,0,2,0]) == b"\x01\x02" + +def test_slicing(): + d = Slicing(Array(4,Byte), 4, 1, 3, empty=0) + assert d.parse(b"\x01\x02\x03\x04") == [2,3] + assert d.build([2,3]) == b"\x00\x02\x03\x00" + assert d.sizeof() == 4 + +def test_indexing(): + d = Indexing(Array(4,Byte), 4, 2, empty=0) + assert d.parse(b"\x01\x02\x03\x04") == 3 + assert d.build(3) == b"\x00\x00\x03\x00" + assert d.sizeof() == 4 + +def test_probe(): + common(Probe(), b"", None, 0) + common(Probe(lookahead=32), b"", None, 0) + + common(Struct(Probe()), b"", {}, 0) + common(Struct(Probe(lookahead=32)), b"", {}, 0) + common(Struct("value"/Computed(7), Probe(this.value)), b"", dict(value=7), 0) + +def test_debugger(): + common(Debugger(Byte), b"\xff", 255, 1) + +def test_repr(): + assert repr(Byte) == '' + assert repr("num"/Byte) == '>' + assert repr(Default(Byte, 0)) == '>' + assert repr(Struct()) == '' + +def test_operators(): + common(Struct("new" / ("old" / Byte)), b"\x01", Container(new=1), 1) + common(Struct(Renamed(Renamed(Byte, newname="old"), newname="new")), b"\x01", Container(new=1), 1) + + common(Array(4, Byte), b"\x01\x02\x03\x04", [1,2,3,4], 4) + common(Byte[4], b"\x01\x02\x03\x04", [1,2,3,4], 4) + common(Struct("nums" / Byte[4]), b"\x01\x02\x03\x04", Container(nums=[1,2,3,4]), 4) + + common(Int8ub >> Int16ub, b"\x01\x00\x02", [1,2], 3) + common(Int8ub >> Int16ub >> Int32ub, b"\x01\x00\x02\x00\x00\x00\x03", [1,2,3], 7) + common(Int8ub[2] >> Int16ub[2], b"\x01\x02\x00\x03\x00\x04", [[1,2],[3,4]], 6) + + common(Sequence(Int8ub) >> Sequence(Int16ub), b"\x01\x00\x02", [1,2], 3) + common(Struct("count"/Byte, "items"/Byte[this.count], Pass, Terminated), b"\x03\x01\x02\x03", Container(count=3, items=[1,2,3]), SizeofError) + common("count"/Byte + "items"/Byte[this.count] + Pass + Terminated, b"\x03\x01\x02\x03", Container(count=3, items=[1,2,3]), SizeofError) + common(Struct(a=Byte) + Struct(b=Byte), b"\x01\x02", Container(a=1, b=2), 2) + + d = Byte * "description" + assert d.docs == "description" + d = "description" * Byte + assert d.docs == "description" + """ + description + """ * \ + Byte + assert d.docs == "description" + d = Renamed(Renamed(Byte, newdocs="old"), newdocs="new") + assert d.docs == "new" + +def test_operators_issue_87(): + assert ("string_name" / Byte).parse(b"\x01") == 1 + assert (u"unicode_name" / Byte).parse(b"\x01") == 1 + assert (b"bytes_name" / Byte).parse(b"\x01") == 1 + assert (None / Byte).parse(b"\x01") == 1 + +def test_from_issue_76(): + d = Aligned(4, Struct("a"/Byte, "f"/Bytes(lambda ctx: ctx.a))) + common(d, b"\x02\xab\xcd\x00", Container(a=2, f=b"\xab\xcd")) + +def test_from_issue_60(): + Header = Struct( + "type" / Int8ub, + "size" / Switch(lambda ctx: ctx.type, + { + 0: Int8ub, + 1: Int16ub, + 2: Int32ub, + }), + "length" / Tell, + ) + assert Header.parse(b"\x00\x05") == Container(type=0, size=5, length=2) + assert Header.parse(b"\x01\x00\x05") == Container(type=1, size=5, length=3) + assert Header.parse(b"\x02\x00\x00\x00\x05") == Container(type=2, size=5, length=5) + assert Header.build(dict(type=0, size=5)) == b"\x00\x05" + assert Header.build(dict(type=1, size=5)) == b"\x01\x00\x05" + assert Header.build(dict(type=2, size=5)) == b"\x02\x00\x00\x00\x05" + +def test_from_issue_171(): + attributes = BitStruct( + "attr" / Aligned(8, Array(3, Struct( + "attrCode" / BitsInteger(16), + "attrValue" / Switch(this.attrCode, { + 34: BitsInteger(8), + 205: BitsInteger(2), + 512: BitsInteger(2), + }), + ))), + ) + blob = b"\x00\x22\x82\x00\xCD\x80\x80\x10" + assert attributes.parse(blob) == Container(attr=[ + Container(attrCode=34, attrValue=130), + Container(attrCode=205, attrValue=2), + Container(attrCode=512, attrValue=1), ]) + +def test_from_issue_175(): + @FuncPath + def comp_(num_array): + return sum(x << ((len(num_array)-1-i)*8) for i,x in enumerate(num_array)) + + test = Struct( + "numArray" / RepeatUntil(obj_ < 128, Byte), + "value" / Computed(comp_(this.numArray)) + ) + assert test.parse(b'\x87\x0f').value == 34575 + +def test_from_issue_71(): + Inner = Struct( + 'name' / PascalString(Byte, "utf8"), + 'occupation' / PascalString(Byte, "utf8"), + ) + Outer = Struct( + 'struct_type' / Int16ub, + 'payload_len' / Int16ub, + 'payload' / RawCopy(Inner), + 'serial' / Int16ub, + 'checksum' / Checksum(Bytes(64), + lambda data: hashlib.sha512(data).digest(), + this.payload.data), + Check(len_(this.payload.data) == this.payload_len), + Terminated, + ) + + payload = Inner.build(Container( + name=u"unknown", + occupation=u"worker", + )) + Outer.build(Container( + struct_type=9001, + payload_len=len(payload), + payload=Container(data=payload), + serial=12345, + )) + +def test_from_issue_231(): + u = Union(0, "raw"/Byte[8], "ints"/Int[2]) + s = Struct("u"/u, "d"/Byte[4]) + + buildret = s.build(dict(u=dict(ints=[1,2]),d=[0,1,2,3])) + assert buildret == b"\x00\x00\x00\x01\x00\x00\x00\x02\x00\x01\x02\x03" + assert s.build(s.parse(buildret)) == buildret + +def test_from_issue_246(): + NumVertices = Bitwise(Aligned(8, Struct( + 'numVx4' / BitsInteger(4), + 'numVx8' / If(this.numVx4 == 0, BitsInteger(8)), + 'numVx16' / If(this.numVx4 == 0 & this.numVx8 == 255, BitsInteger(16)), + ))) + common(NumVertices, b'\x02\x30', Container(numVx4=0, numVx8=35, numVx16=None)) + + testBit = BitStruct( + 'a' / BitsInteger(8), + 'b' / If(this.a == 97, BitsInteger(8)) + ) + testByte = Struct( + 'a' / Byte, + 'b' / If(this.a == 97, Byte) + ) + common(testBit, b'ab', Container(a=97, b=98)) + common(testByte, b'ab', Container(a=97, b=98)) + + NumVertices = Union(None, + 'numVx4' / Bitwise(Aligned(8, Struct('num'/ BitsInteger(4) ))), + 'numVx8' / Bitwise(Aligned(8, Struct('num'/ BitsInteger(12)))), + 'numVx16'/ Bitwise(Aligned(8, Struct('num'/ BitsInteger(28)))), + ) + assert NumVertices.parse(b'\x01\x34\x56\x70') == Container(numVx4=Container(num=0), numVx8=Container(num=19), numVx16=Container(num=1262951)) + +def test_from_issue_244(): + class AddIndexes(Adapter): + def _decode(self, obj, context, path): + for i,con in enumerate(obj): + con.index = i + return obj + + d = AddIndexes(Struct("num"/Byte)[4]) + assert d.parse(b"abcd") == [Container(num=97, index=0),Container(num=98, index=1),Container(num=99, index=2),Container(num=100, index=3),] + +def test_from_issue_269(): + d = Struct("enabled" / Byte, If(this.enabled, Padding(2))) + assert d.build(dict(enabled=1)) == b"\x01\x00\x00" + assert d.build(dict(enabled=0)) == b"\x00" + d = Struct("enabled" / Byte, "pad" / If(this.enabled, Padding(2))) + assert d.build(dict(enabled=1)) == b"\x01\x00\x00" + assert d.build(dict(enabled=0)) == b"\x00" + +def test_hanging_issue_280(): + d = BitStruct('a'/BitsInteger(20), 'b'/BitsInteger(12)) + assert raises(d.parse, b'\x00') == StreamError + +def test_from_issue_324(): + d = Struct( + "vals" / Prefixed(Byte, RawCopy( + Struct("a" / Byte[2]), + )), + "checksum" / Checksum( + Byte, + lambda data: sum(data) & 0xFF, + this.vals.data + ), + ) + assert d.build(dict(vals=dict(value=dict(a=[0,1])))) == b"\x02\x00\x01\x01" + assert d.build(dict(vals=dict(data=b"\x00\x01"))) == b"\x02\x00\x01\x01" + +def test_from_issue_357(): + inner = Struct( + "computed" / Computed(4), + ) + st1 = Struct( + "a" / inner, + Check(this.a.computed == 4), + ) + st2 = Struct( + "b" / Switch(0, {}, inner), + Check(this.b.computed == 4), + ) + assert st1.build(dict(a={})) == b"" + assert st2.build(dict(b={})) == b"" + +def test_context_is_container(): + d = Struct(Check(lambda ctx: type(ctx) is Container)) + d.parse(b"") + +def test_from_issue_362(): + FORMAT = Struct( + "my_tell" / Tell, + "my_byte" / Byte, + ) + BIT_FORMAT = BitStruct( + "my_tell" / Tell, + "my_bits" / Bit[8], + ) + for i in range(5): + assert FORMAT.parse(b'\x00').my_tell == 0 + for i in range(5): + assert BIT_FORMAT.parse(b'\x00').my_tell == 0 + +@xfail(raises=AttributeError, reason="can't access Enums inside BitStruct") +def test_from_issue_781(): + d = Struct( + "animal" / Enum(Byte, giraffe=1), + ) + + x = d.parse(b"\x01") + assert x.animal == "giraffe" # works + assert x.animal == d.animal.giraffe # works + + d = BitStruct( + "animal" / Enum(BitsInteger(8), giraffe=1), + ) + + x = d.parse(b"\x01") + assert x.animal == "giraffe" # works + assert x.animal == d.animal.giraffe # AttributeError: 'Transformed' object has no attribute 'animal' + +def test_this_expresion_compare_container(): + st = Struct( + "flags" / FlagsEnum(Byte, a=1), + Check(lambda this: this.flags == Container(_flagsenum=True, a=1)), + ) + common(st, b"\x01", dict(flags=Container(_flagsenum=True, a=True)), 1) + +def test_pickling_constructs(): + import cloudpickle + + d = Struct( + "count" / Byte, + "greedybytes" / Prefixed(Byte, GreedyBytes), + "formatfield" / FormatField("=","Q"), + "bytesinteger" / BytesInteger(1), + "varint" / VarInt, + "text1" / PascalString(Byte, "utf8"), + "text2" / CString("utf8"), + "enum" / Enum(Byte, zero=0), + "flagsenum" / FlagsEnum(Byte, zero=0), + "array1" / Byte[5], + "array2" / Byte[this.count], + "greedyrange" / Prefixed(Byte, GreedyRange(Byte)), + "if1" / IfThenElse(True, Byte, Byte), + "padding" / Padding(1), + "peek" / Peek(Byte), + "tell" / Tell, + "this1" / Byte[this.count], + "obj_1" / RepeatUntil(obj_ == 0, Byte), + "len_1" / Computed(len_(this.array1)), + ) + data = bytes(100) + + du = cloudpickle.loads(cloudpickle.dumps(d, protocol=-1)) + assert du.parse(data) == d.parse(data) + +def test_pickling_constructs_issue_894(): + import cloudpickle + + fundus_header = Struct( + 'width' / Int32un, + 'height' / Int32un, + 'bits_per_pixel' / Int32un, + 'number_slices' / Int32un, + 'unknown' / PaddedString(4, 'ascii'), + 'size' / Int32un, + 'img' / Int8un, + ) + + cloudpickle.dumps(fundus_header) + +def test_exposing_members_attributes(): + d = Struct( + "animal" / Enum(Byte, giraffe=1), + ) + assert isinstance(d.animal, Renamed) + assert isinstance(d.animal.subcon, Enum) + assert d.animal.giraffe == "giraffe" + + d = Sequence( + "animal" / Enum(Byte, giraffe=1), + ) + assert isinstance(d.animal, Renamed) + assert isinstance(d.animal.subcon, Enum) + assert d.animal.giraffe == "giraffe" + + d = FocusedSeq(0, + "animal" / Enum(Byte, giraffe=1), + ) + assert isinstance(d.animal, Renamed) + assert isinstance(d.animal.subcon, Enum) + assert d.animal.giraffe == "giraffe" + + d = Union(None, + "animal" / Enum(Byte, giraffe=1), + ) + assert isinstance(d.animal, Renamed) + assert isinstance(d.animal.subcon, Enum) + assert d.animal.giraffe == "giraffe" + +def test_exposing_members_context(): + d = Struct( + "count" / Byte, + "data" / Bytes(lambda this: this.count - this._subcons.count.sizeof()), + Check(lambda this: this._subcons.count.sizeof() == 1), + ) + common(d, b"\x05four", Container(count=5, data=b"four")) + + d = Sequence( + "count" / Byte, + "data" / Bytes(lambda this: this.count - this._subcons.count.sizeof()), + Check(lambda this: this._subcons.count.sizeof() == 1), + ) + common(d, b"\x05four", [5,b"four",None]) + + d = FocusedSeq("count", + "count" / Byte, + "data" / Padding(lambda this: this.count - this._subcons.count.sizeof()), + Check(lambda this: this._subcons.count.sizeof() == 1), + ) + common(d, b'\x04\x00\x00\x00', 4, SizeofError) + + d = Union(None, + "chars" / Byte[4], + "data" / Bytes(lambda this: this._subcons.chars.sizeof()), + Check(lambda this: this._subcons.chars.sizeof() == 4), + ) + assert d.parse(b"\x01\x02\x03\x04") == dict(chars=[1,2,3,4],data=b"\x01\x02\x03\x04") + +def test_isparsingbuilding(): + d = Struct( + Check(this._parsing & this._._parsing), + Check(~this._building & ~this._._building), + Check(~this._sizing & ~this._._sizing), + ) + d.parse(b'') + d = Struct( + Check(~this._parsing & ~this._._parsing), + Check(this._building & this._._building), + Check(~this._sizing & ~this._._sizing), + ) + d.build(None) + d = Struct( + Check(~this._parsing & ~this._._parsing), + Check(~this._building & ~this._._building), + Check(this._sizing & this._._sizing), + ) + d.sizeof() + # --------------------------------- + d = Sequence( + Check(this._parsing & this._._parsing), + Check(~this._building & ~this._._building), + Check(~this._sizing & ~this._._sizing), + ) + d.parse(b'') + d = Sequence( + Check(~this._parsing & ~this._._parsing), + Check(this._building & this._._building), + Check(~this._sizing & ~this._._sizing), + ) + d.build(None) + d = Sequence( + Check(~this._parsing & ~this._._parsing), + Check(~this._building & ~this._._building), + Check(this._sizing & this._._sizing), + ) + d.sizeof() + # --------------------------------- + d = FocusedSeq("none", + "none" / Pass, + Check(this._parsing & this._._parsing), + Check(~this._building & ~this._._building), + Check(~this._sizing & ~this._._sizing), + ) + d.parse(b'') + d = FocusedSeq("none", + "none" / Pass, + Check(~this._parsing & ~this._._parsing), + Check(this._building & this._._building), + Check(~this._sizing & ~this._._sizing), + ) + d.build(None) + d = FocusedSeq("none", + "none" / Pass, + Check(~this._parsing & ~this._._parsing), + Check(~this._building & ~this._._building), + Check(this._sizing & this._._sizing), + ) + d.sizeof() + # --------------------------------- + d = Union(None, + "none" / Pass, + Check(this._parsing & this._._parsing), + Check(~this._building & ~this._._building), + Check(~this._sizing & ~this._._sizing), + ) + d.parse(b'') + d = Union(None, + "none" / Pass, + Check(~this._parsing & ~this._._parsing), + Check(this._building & this._._building), + Check(~this._sizing & ~this._._sizing), + ) + d.build(dict(none=None)) + d = Union(None, + "none" / Pass, + Check(~this._parsing & ~this._._parsing), + Check(~this._building & ~this._._building), + Check(this._sizing & this._._sizing), + ) + # doesnt check context because _sizeof just raises the error + assert raises(d.sizeof) == SizeofError + # --------------------------------- + d = LazyStruct( + Check(this._parsing & this._._parsing), + Check(~this._building & ~this._._building), + Check(~this._sizing & ~this._._sizing), + ) + d.parse(b'') + d = LazyStruct( + Check(~this._parsing & ~this._._parsing), + Check(this._building & this._._building), + Check(~this._sizing & ~this._._sizing), + ) + d.build({}) + d = LazyStruct( + Check(~this._parsing & ~this._._parsing), + Check(~this._building & ~this._._building), + Check(this._sizing & this._._sizing), + ) + d.sizeof() + +def test_struct_stream(): + d = Struct( + 'fixed' / FixedSized(10, Struct( + 'data' / GreedyBytes, + # check a substream + Check(lambda this: stream_size(this._io) == 10), + Check(lambda this: stream_iseof(this._io)), + # checks parent original stream + Check(lambda this: stream_size(this._._io) == 20), + Check(lambda this: not stream_iseof(this._._io)), + )), + # checks mid-parsing + Check(lambda this: stream_tell(this._io, None) == 10), + Check(lambda this: stream_size(this._io) == 20), + Check(lambda this: not stream_iseof(this._io)), + 'rest' / GreedyBytes, + # checks after parsed to EOF + Check(lambda this: stream_tell(this._io, None) == 20), + Check(lambda this: stream_size(this._io) == 20), + Check(lambda this: stream_iseof(this._io)), + Check(lambda this: stream_seek(this._io, 0, 2, None) == 20), + # checks nested struct stream + Check(lambda this: stream_tell(this.fixed._io, None) == 10), + Check(lambda this: stream_size(this.fixed._io) == 10), + ) + d.parse(bytes(20)) + +def test_struct_root_topmost(): + d = Struct( + 'x' / Computed(1), + 'inner' / Struct( + 'inner2' / Struct( + 'x' / Computed(this._root.x), + 'z' / Computed(this._params.z), + 'zz' / Computed(this._root._.z), + ), + ), + Probe(), + ) + # setGlobalPrintPrivateEntries(True) + # d.parse(b'', z=2) + assert d.parse(b"", z=2) == Container(x=1, inner=Container(inner2=Container(x=1,z=2,zz=2))) + +def test_parsedhook_repeatersdiscard(): + outputs = [] + def printobj(obj, ctx): + outputs.append(obj) + d = GreedyRange(Byte * printobj, discard=True) + assert d.parse(b"\x01\x02\x03") == [] + assert outputs == [1,2,3] + + outputs = [] + def printobj(obj, ctx): + outputs.append(obj) + d = Array(3, Byte * printobj, discard=True) + assert d.parse(b"\x01\x02\x03") == [] + assert outputs == [1,2,3] + + outputs = [] + def printobj(obj, ctx): + outputs.append(obj) + d = RepeatUntil(lambda obj,lst,ctx: ctx._index == 2, Byte * printobj, discard=True) + assert d.parse(b"\x01\x02\x03") == [] + assert outputs == [1,2,3] + +def test_exportksy(): + d = Struct( + "nothing" / Pass * "field docstring", + + "data1" / Bytes(10), + "data2" / GreedyBytes, + + "bitstruct" / BitStruct( + "flag" / Flag, + "padding" / Padding(7), + "int32" / Int32ub, + "int32le" / BytesInteger(4), + "int4a" / Nibble, + "int4b" / BitsInteger(4), + ), + + "int32" / Int32ub, + "float32" / Float32b, + "int32le" / BytesInteger(4, swapped=True), + "varint" / VarInt, + + "string1" / PaddedString(10, "utf8"), + "string2" / PascalString(Byte, "utf8"), + "string3" / CString("utf8"), + "string4" / GreedyString("utf8"), + + "flag" / Flag, + "enum" / Enum(Byte, one=1, two=2), + "flagsenum" / FlagsEnum(Byte, one=1, two=2), + + "struct1" / Struct(Byte, "named"/Byte), + "sequence1" / Sequence(Byte, "named"/Byte), + + "array2d" / Array(5, Array(5, Byte)), + "greedyrange" / GreedyRange(Byte), + "repeatuntil" / RepeatUntil(obj_ == 0, Byte), + + "const1" / Const(b"ABCD"), + "const2" / Const(1, Int32ub), + # Computed + # Index + "rebuild" / Rebuild(Byte, 0), + "default" / Default(Byte, 0), + "namedtuple1" / NamedTuple("coord", "x y z", "x"/Byte + "y"/Byte + "z"/Byte), + "namedtuple2" / NamedTuple("coord", "x y z", Byte >> Byte >> Byte), + "namedtuple3" / NamedTuple("coord", "x y z", Byte[3]), + "namedtuple4" / NamedTuple("coord", "x y z", GreedyRange(Byte)), + "timestamp1" / Timestamp(Int32ub, 1, 1970), + "timestamp2" / Timestamp(Int32ub, "msdos", "msdos"), + "hex" / Hex(Int32ub), + "hexdump" / HexDump(Int32ub), + + # Union + "if1" / If(this.num == 0, Byte), + "ifthenelse1" / IfThenElse(this.num == 0, Byte, Byte), + # Switch + + "padding" / Padding(5), + "padded" / Padded(5, Byte), + + "pointer1" / Pointer(0x1000, Int32ub), + "pointer2" / Pointer(this.pointer1, Int32ub), + "pass1" / Pass, + # Terminated + + "prefixed" / Prefixed(Byte, GreedyBytes), + "prefixedarray" / PrefixedArray(Byte, Byte), + # Compressed + ) * \ + "struct docstring" + print(d.export_ksy(filename="example_ksy.ksy")) + +@xfail(reason="both sizeof fail because length is 1 level up than when parsing") +def test_from_issue_692(): + # https://stackoverflow.com/questions/44747202/pythons-construct-sizeof-for-construct-depending-on-its-parent + + AttributeHandleValuePair = Struct( + "handle" / Int16ul, + "value" / GreedyBytes, + ) + AttReadByTypeResponse = Struct( + "length" / Int8ul, # The size in bytes of each handle/value pair + "datalist" / Array(2, FixedSized(this.length, AttributeHandleValuePair)), + ) + assert AttReadByTypeResponse.parse(b"\x04\x01\x02\x03\x04\x01\x02\x03\x04") == Container(length=4,datalist=[dict(handle=0x0201,value=b'\x03\x04'),dict(handle=0x0201,value=b'\x03\x04')]) + assert AttReadByTypeResponse.sizeof(length=4) == 1+2*4 + + AttributeHandleValuePair = Struct( + "handle" / Int16ul, + "value" / Bytes(this._.length - 2), + ) + AttReadByTypeResponse = Struct( + "length" / Int8ul, # The size in bytes of each handle/value pair + "datalist" / AttributeHandleValuePair[2], + ) + assert AttReadByTypeResponse.parse(b"\x04\x01\x02\x03\x04\x01\x02\x03\x04") == Container(length=4,datalist=[dict(handle=0x0201,value=b'\x03\x04'),dict(handle=0x0201,value=b'\x03\x04')]) + assert AttReadByTypeResponse.sizeof(length=4) == 1+2*(2+4-2) + +def test_greedyrange_issue_697(): + d = BitStruct( + "rest" / Bytewise(GreedyRange(Byte)), + ) + d.parse(bytes(5)) + +def test_greedybytes_issue_697(): + d = BitStruct( + "rest" / Bytewise(GreedyBytes), + ) + d.parse(bytes(5)) + +def test_hex_issue_709(): + # Make sure, the fix doesn't destroy already working code + d = Hex(Bytes(1)) + obj = d.parse(b"\xff") + assert "unhexlify('ff')" in str(obj) + + d = Struct("x" / Hex(Byte)) + obj = d.parse(b"\xff") + assert "x = 0xFF" in str(obj) + + d = HexDump(Bytes(1)) + obj = d.parse(b"\xff") + assert "hexundump" in str(obj) + + # The following checks only succeed after fixing the issue + d = Struct("x" / Hex(Bytes(1))) + obj = d.parse(b"\xff") + assert "x = unhexlify('ff')" in str(obj) + + d = Struct("x" / HexDump(Bytes(1))) + obj = d.parse(b"\xff") + assert "x = hexundump" in str(obj) + + d = Struct("x" / Struct("y" / Hex(Bytes(1)))) + obj = d.parse(b"\xff") + assert "y = unhexlify('ff')" in str(obj) + +@xfail(reason="Enable to see path information in stream operations") +def test_showpath(): + # trips stream_read + d = Struct("inner"/Struct("x"/Byte)) + d.parse(b"") + +@xfail(reason="Enable to see path information in stream operations") +def test_showpath2(): + x = Struct( + 'foo' / Bytes(1), + 'a' / Struct( + 'foo' / Bytes(1), + 'b' / Struct( + 'foo' / Bytes(1), + 'c' / Struct( + 'foo' / Bytes(1), + 'bar' / Bytes(1) + ) + ) + ) + ) + x.parse(b'\xff' * 5) + x.parse(b'\xff' * 3) + # StreamError: Error in path (parsing) -> a -> b -> c -> foo + # stream read less than specified amount, expected 1, found 0 + +def test_buildfile_issue_737(): + Byte.build_file(Byte.parse(b'\xff'), 'example_737') + assert Byte.parse_file('example_737') == 255 + +@xfail(reason="Context is not properly processed, see #771 and PR #784") +def test_struct_issue_771(): + spec = Struct( + 'a' / Int32ul, + 'b' / Struct( + 'count' / Int32ul, + 'entries' / Byte[this.count] + ) + ) + data = b'\x01\x00\x00\x00\x02\x00\x00\x00\x0a\x0b' + info = spec.parse(data) + assert info == {'a': 1, 'b': {'count': 2, 'entries': [0x0a, 0x0b]}} + assert spec.build(info) == data + assert spec.sizeof(**info) == 10 + +def test_struct_copy(): + import copy + d = Struct( + "a" / Int16ub, + "b" / Int8ub, + ) + d_copy = copy.copy(d) + + common(d, b"\x00\x01\x02", Container(a=1,b=2), 3) + common(d_copy, b"\x00\x01\x02", Container(a=1,b=2), 3) + +def test_switch_issue_913_using_enum(): + enum = Enum(Byte, Zero=0, One=1, Two=2) + mapping = { + enum.Zero: Pass, + enum.One: Int8ul, + enum.Two: Int16ul, + } + + d = Switch(keyfunc = this.x, cases = mapping) + common(d, b"", None, 0, x="Zero") + common(d, b"\xab", 171, 1, x="One") + common(d, b"\x09\x00", 9, 2, x="Two") + +def test_switch_issue_913_using_strings(): + mapping = { + "Zero": Pass, + "One": Int8ul, + "Two": Int16ul, + } + + d = Switch(keyfunc = this.x, cases = mapping) + common(d, b"", None, 0, x="Zero") + common(d, b"\xab", 171, 1, x="One") + common(d, b"\x09\x00", 9, 2, x="Two") + +def test_switch_issue_913_using_integers(): + mapping = { + 0: Pass, + 1: Int8ul, + 2: Int16ul, + } + + d = Switch(keyfunc = this.x, cases = mapping) + common(d, b"", None, 0, x=0) + common(d, b"\xab", 171, 1, x=1) + common(d, b"\x09\x00", 9, 2, x=2) + +@xfail(reason="unfixable defect in the design") +def test_adapters_context_issue_954(): + class IdAdapter(Adapter): + def _decode(self, obj, context, path): + return obj + def _encode(self, obj, context, path): + return obj + IdentityAdapter = IdAdapter(Rebuild(Int16ub, len_(this.data))) + TestStruct = Struct("len" / IdentityAdapter, "data" / Bytes(this.len)) + TestStruct.build({"data": b"123456"}) + +def test_nullterminated_longterm_issue_1046(): + d = NullTerminated(GreedyBytes, term=b"END") + assert d.parse(b"xxxEND") == b"xxx" + assert raises(d.parse, b"xENDxx") == StreamError + +def test_compile_binexpr_bitwise_and_issue_1039(): + d = Struct( + "a" / Int8ub, + "cond" / If(this.a & 32, Int8ub), + Terminated, + ) + common(d, b"\x00", {"a": 0, "cond": None}) + common(d, b"\x01", {"a": 1, "cond": None}) + common(d, b" \x05", {"a": 32, "cond": 5}) + +@xfail(reason="unknown problem with Select") +def test_select_issue_1038(): + s = Struct( + "value" / Select(IfThenElse(this._params.ctx == 1, Byte, Short)), + ) + assert s.build(dict(value=9), ctx=1) == b"\x09" + +def test_select_issue_1038_fixed(): + s = Struct( + "value" / Select(If(this._.ctx == 1, Byte), If(this._.ctx == 2, Short)), + ) + assert s.build(dict(value=9), ctx=1) == b"\x09" + +def test_unicode_error(): + d = Select(PaddedString(255, "ascii"), CString("ascii"), PascalString(Byte, "ascii"), GreedyString("ascii"), Pass) + data = u"Афон".encode() + assert d.parse(data) == None + +def test_issue_1014(): + d = Struct( + "version" / Int16ub, + "box" / Prefixed(Int8ub, Struct( + "position" / Tell, + "payload" / GreedyBytes, + )), + ) + assert d.parse(bytes([1,2,3,4,5,6])) == Container(version=0x0102, box=Container(position=3, payload=b'\x04\x05\x06')) diff --git a/tests/test_expr.py b/tests/test_expr.py new file mode 100644 index 000000000..eaf793fd2 --- /dev/null +++ b/tests/test_expr.py @@ -0,0 +1,138 @@ +from tests.declarativeunittest import * +from construct import * +from construct.lib import * + + +def test_path(): + path = Path("path") + x = ~((path.foo * 2 + 3 << 2) % 11) + assert repr(x) == "not ((((path['foo'] * 2) + 3) << 2) % 11)" + assert str(x) == "not ((((path['foo'] * 2) + 3) << 2) % 11)" + assert x(dict(foo=7)) == False + +def test_this(): + assert repr(this.x) == "this['x']" + assert str(this.x) == "this['x']" + assert repr(this.x == 0) == "(this['x'] == 0)" + assert str(this.x == 0) == "(this['x'] == 0)" + + this_example = Struct( + # straight-forward usage: instead of passing (lambda ctx: ctx["length"]) use this.length + "length" / Int8ub, + "value" / Bytes(this.length), + # an example of nesting: '_' refers to the parent's scope + "nested" / Struct( + "b1" / Int8ub, + "b2" / Int8ub, + "b3" / Computed(this.b1 * this.b2 + this._.length), + ), + # and conditions work as expected + "condition" / IfThenElse( + this.nested.b1 > 50, + "c1" / Int32ub, + "c2" / Int8ub, + ), + ) + common(this_example, b"\x05helloABXXXX", Container(length=5, value=b'hello', nested=Container(b1=65, b2=66, b3=4295), condition=1482184792)) + +def test_this_getitem(): + d = Struct( + "num" / Int8ub, + "dup1" / Computed(this.num), + "dup2" / Computed(this["num"]), + Check(this.num == 0), + Check(this.dup1 == 0), + Check(this.dup2 == 0), + ) + common(d, b"\x00", Container(num=0, dup1=0, dup2=0), 1) + +def test_functions(): + assert repr(len_(this.x)) == "len_(this['x'])" + assert str(len_(this.x)) == "len_(this['x'])" + assert repr(len_) == "len_" + assert str(len_) == "len_" + + example = Struct( + "items" / Byte[2], + Check(len_(this.items) == 2), + Check(sum_(this.items) == 10), + Check(min_(this.items) == 3), + Check(max_(this.items) == 7), + "nega" / Int8sb, + Check(this.nega == -1), + Check(abs_(this.nega) == 1), + ) + common(example, b"\x03\x07\xff", Container(items=[3,7], nega=-1), 3) + +def test_obj(): + assert repr(obj_) == "obj_" + assert repr(obj_ + 1 == 12) == "((obj_ + 1) == 12)" + assert str(obj_) == "obj_" + assert str(obj_ + 1 == 12) == "((obj_ + 1) == 12)" + + assert (obj_)(1,{}) == 1 + assert (obj_ + 10)(1,{}) == 11 + assert (obj_ == 12)(12,{}) + assert (obj_ != 12)(13,{}) + + assert (obj_)(1,[],{}) == 1 + assert (obj_ + 10)(1,[],{}) == 11 + assert (obj_ == 12)(12,[],{}) + assert (obj_ != 12)(13,[],{}) + + example = Struct( + "items" / RepeatUntil(obj_ == 255, Byte), + ) + common(example, b"\x03\x07\xff", Container(items=[3,7,255])) + +@xfail(reason="faulty implementation, needs fixing") +def test_list(): + assert repr(list_) == "list_" + assert str(list_) == "list_" + assert repr(list_ == [0, 1, 2]) == "(list_ == [0, 1, 2])" + assert str(list_ == [0, 1, 2]) == "(list_ == [0, 1, 2])" + assert repr(list_[-1]) == "list_[-1]" + assert str(list_[-1]) == "list_[-1]" + assert repr(list_[-1] == 0) == "(list_[-1] == 0)" + assert str(list_[-1] == 0) == "(list_[-1] == 0)" + assert repr(list_[-1] + 1) == "(list_[-1] + 1)" + assert str(list_[-1] + 1) == "(list_[-1] + 1)" + # missing str + + assert (list_)(1,[],{}) == [] + assert (list_[-1])(1,[2,3,4],{}) == 4 + # below fail + assert (list_[-1] + 1)(1,[2,3,4],{}) == 5 + assert (list_[-1] == 4)(1,[2,3,4],{}) == True + assert (len_(list_))(1,[2,3,4],{}) == 3 + assert (len_(list_[:]))(1,[2,3,4],{}) == 3 + + example = Struct( + "items" / RepeatUntil(list_[-1] == 255, Byte), + ) + common(example, b"\x03\x07\xff", Container(items=[3,7,255])) + +@xfail(reason="this expression does not support in operator") +def test_this_in_operator(): + d = Struct( + "if" / If(this.data in [1,2,3], Const(b"4")), + "not" / If(this.data not in [1,2,3], Const(b"5")), + ) + assert d.build(dict(data=1)) == b'4' + assert d.build(dict(data=7)) == b'5' + +def test_lambda_in_operator(): + d = Struct( + "if" / If(lambda ctx: ctx.data in [1,2,3], Const(b"4")), + "not" / If(lambda ctx: ctx.data not in [1,2,3], Const(b"5")), + ) + assert d.build(dict(data=1)) == b'4' + assert d.build(dict(data=7)) == b'5' + +def test_this_shift_operator(): + d = Struct( + "a" / Byte, + "ls" / Computed(this.a << 1), + "rs" / Computed(this.a >> 1), + ) + assert d.parse(b"\x02") == Container(a=2, ls=4, rs=1) diff --git a/tests/test_multiprocessing.py b/tests/test_multiprocessing.py new file mode 100644 index 000000000..b39ad0059 --- /dev/null +++ b/tests/test_multiprocessing.py @@ -0,0 +1,24 @@ +from tests.declarativeunittest import * +from construct import * +from construct.lib import * + +def worker(q): + obj = q.get() + print(obj) + +def test_multiprocessing(): + import multiprocessing + + queue = multiprocessing.Queue() + + p = multiprocessing.Process(target=worker, args=(queue,)) + p.start() + + obj = Container(name="test") + print(obj) + queue.put(obj) + + # Wait for the worker to finish + queue.close() + queue.join_thread() + p.join() diff --git a/todo.txt b/todo.txt deleted file mode 100644 index 0f055b141..000000000 --- a/todo.txt +++ /dev/null @@ -1,9 +0,0 @@ -fix aligned bug (test/t1.py) -introduce container displayers (repr, xml, tree-view) -figure out a reasonable way to check and generate crc - - * Discovered that repeater errors are incorrect now. StrictRepeater raises - ArrayError, GreedyRepeater and OptionalGreedyRepeater raise RangeError, and - nobody raises RepeaterError; in fact, it's no longer defined. Should these - be reunified? - * Tests and docs reference names which will go away. We should fix that. diff --git a/version-increment b/version-increment new file mode 100755 index 000000000..a5122dd43 --- /dev/null +++ b/version-increment @@ -0,0 +1,20 @@ +#!/usr/bin/python3 + +import construct +x,y,z = construct.version +z += 1 + +import arrow +date = arrow.utcnow().datetime + +content = f""" +version = ({x},{y},{z}) +version_string = "{x}.{y}.{z}" +release_date = "{date.year}.{date.month:02}.{date.day:02}" +""".lstrip() + +with open("construct/version.py","wt") as f: + f.write(content) + +import os +os.system(f"git commit -a -m 'version uped to {x}.{y}.{z}'")