From 39017d5df21d74e862be058be81051d0401a4c88 Mon Sep 17 00:00:00 2001 From: franz haas Date: Sun, 4 Feb 2024 18:49:46 +0100 Subject: [PATCH 01/32] limit pytest version to <8.0.0 this is due to breaking changes in the way tests get discovered which effect us --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b69943f9..8857958f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -23,7 +23,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest pytest-benchmark numpy arrow ruamel.yaml cloudpickle lz4 + pip install "pytest<8.0.0" pytest-benchmark numpy arrow ruamel.yaml cloudpickle lz4 - name: Install cryptography (but not for pypy on windows) if: ${{ !((matrix.os == 'windows-latest') && (matrix.python-version == 'pypy3.9')) }} run: | From e33d7261d7eb56337342b383b02bbb0361ae680d Mon Sep 17 00:00:00 2001 From: franz haas Date: Sun, 4 Feb 2024 18:58:29 +0100 Subject: [PATCH 02/32] - added test for user supplied function for compiled rebuild --- tests/test_core.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_core.py b/tests/test_core.py index 779bd816..eb7068fb 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -641,6 +641,18 @@ def test_rebuild_issue_664(): # no asserts are needed d.build(obj) + +def test_rebuild_custom_function(): + def getlen(this): + return 2 + + template = Struct( "count" / Rebuild(Byte, getlen), "my_items" / Byte[this.count]) + for d in [template, template.compile()]: + assert d.parse(b"\x02ab") == Container(count=2, my_items=[97,98]) + assert d.build(dict(count=None,my_items=[255,255])) == b"\x02\xff\xff" + assert d.build(dict(count=2,my_items=[255,255])) == b"\x02\xff\xff" + assert d.build(dict(my_items=[255,255])) == b"\x02\xff\xff" + def test_default(): d = Default(Byte, 0) common(d, b"\xff", 255, 1) From ea68c34d8eb4760e9d0aaba2d00f67c87c1c9f4a Mon Sep 17 00:00:00 2001 From: franz haas Date: Sun, 4 Feb 2024 19:05:23 +0100 Subject: [PATCH 03/32] - fixed the rebuild with user functione test --- construct/core.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/construct/core.py b/construct/core.py index f56ca163..8f5919f0 100644 --- a/construct/core.py +++ b/construct/core.py @@ -267,6 +267,7 @@ def __init__(self): self.linkedinstances = {} self.linkedparsers = {} self.linkedbuilders = {} + self.userfunction = {} def allocateId(self): self.nextid += 1 @@ -535,6 +536,7 @@ def reuse(obj, func): linkedinstances = {} linkedparsers = {} linkedbuilders = {} + userfunction = {} len_ = len sum_ = sum @@ -564,6 +566,7 @@ def buildall(obj, io, this): module.linkedinstances = code.linkedinstances module.linkedparsers = code.linkedparsers module.linkedbuilders = code.linkedbuilders + module.userfunction = code.userfunction compiled = module.compiled compiled.source = source compiled.module = module @@ -3007,7 +3010,12 @@ def _emitparse(self, code): return self.subcon._compileparse(code) def _emitbuild(self, code): - return f"reuse({repr(self.func)}, lambda obj: ({self.subcon._compilebuild(code)}))" + if isinstance(self.func, ExprMixin) or (not callable(self.func)): + return f"reuse({repr(self.func)}, lambda obj: ({self.subcon._compilebuild(code)}))" + else: + aid = code.allocateId() + code.userfunction[aid] = self.func + return f"reuse(userfunction[{aid}](this), lambda obj: ({self.subcon._compilebuild(code)}))" def _emitseq(self, ksy, bitwise): return self.subcon._compileseq(ksy, bitwise) From 2c7e020c80e1427a7883c270f4a1d1c6542b2624 Mon Sep 17 00:00:00 2001 From: franz haas Date: Wed, 7 Feb 2024 20:03:13 +0100 Subject: [PATCH 04/32] - fixed ifthenelse and switch with regards to user supplied functions --- tests/test_core.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_core.py b/tests/test_core.py index eb7068fb..01b53be5 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -878,6 +878,10 @@ def test_if(): def test_ifthenelse(): common(IfThenElse(True, Int8ub, Int16ub), b"\x01", 1, 1) common(IfThenElse(False, Int8ub, Int16ub), b"\x00\x01", 1, 2) + stimulus_with_user_function = IfThenElse(lambda _: False, Int8ub, Int16ub) + for d in [stimulus_with_user_function, stimulus_with_user_function.compile()]: + common(d, b"\x00\x01", 1, 2) + def test_switch(): d = Switch(this.x, {1:Int8ub, 2:Int16ub, 4:Int32ub}) @@ -888,8 +892,18 @@ def test_switch(): assert raises(d.sizeof) == SizeofError assert raises(d.sizeof, x=1) == 1 + dStencil = Switch(lambda this: this["x"], {1:Int8ub, 2:Int16ub, 4:Int32ub}) + for d in [dStencil, dStencil.compile()]: + common(d, b"\x01", 0x01, 1, x=1) + common(d, b"\x01\x02", 0x0102, 2, x=2) + assert d.parse(b"", x=255) == None + assert d.build(None, x=255) == b"" + assert raises(d.sizeof) == SizeofError + assert raises(d.sizeof, x=1) == 1 + d = Switch(this.x, {}, default=Byte) common(d, b"\x01", 1, 1, x=255) + def test_switch_issue_357(): inner = Struct( From 562c5f71450f68c6d738036643bb72d3dd0b5cd6 Mon Sep 17 00:00:00 2001 From: franzhaas Date: Thu, 8 Feb 2024 19:40:19 +0100 Subject: [PATCH 05/32] This change is user visibly changing enums, they cant be comapred to strings any more. Allwoing to mix Enum, int, and str makes it hard to do the compiled code to do the same thing as the interpreted one. I believe it is better for readability of the user code, the compiler and basically everything between to not allow that. Only int and enum types shall be used. --- construct/core.py | 47 ++++++++++++++++++++++++++++++++++++++++++---- tests/test_core.py | 9 +++++---- 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/construct/core.py b/construct/core.py index 8f5919f0..a2ee7ae9 100644 --- a/construct/core.py +++ b/construct/core.py @@ -5,6 +5,7 @@ from construct.lib import * from construct.expr import * from construct.version import * +import logging #=============================================================================== @@ -1905,6 +1906,8 @@ class EnumIntegerString(str): """Used internally.""" def __repr__(self): + #Eventually this will just be the int value. This makes enums at runtime of + #compiled code just as fast as integers... return "EnumIntegerString.new(%s, %s)" % (self.intvalue, str.__repr__(self), ) def __int__(self): @@ -1916,6 +1919,18 @@ def new(intvalue, stringvalue): ret.intvalue = intvalue return ret + def __eq__(self, other): + if isinstance(other, int): + return (self.intvalue == other) + elif type(other) == type(self): + return (self.intvalue == other.intvalue) + elif isinstance(other, str): + logging.warning("Using a str to compare with a enum value is depricated! this may lead to bugs in the future!") + return str(self) == other + raise NotImplementedError(f"Cont compare {type(self)} to {type(other)} {other}") + + def __hash__(self): + return str(self).__hash__() class Enum(Adapter): r""" @@ -1969,6 +1984,8 @@ def __init__(self, subcon, *merge, **mapping): self.encmapping = {EnumIntegerString.new(v,k):v for k,v in mapping.items()} self.decmapping = {v:EnumIntegerString.new(v,k) for k,v in mapping.items()} self.ksymapping = {v:k for k,v in mapping.items()} + for k,v in mapping.items(): + setattr(self, k, EnumIntegerString.new(v,k)) def __getattr__(self, name): if name in self.encmapping: @@ -1985,6 +2002,8 @@ def _encode(self, obj, context, path): try: if isinstance(obj, int): return obj + if isinstance(obj, str): + logging.warning("Use enum typed values, not strings as enum values...") return self.encmapping[obj] except KeyError: raise MappingError("building failed, no mapping for %r" % (obj,), path=path) @@ -3987,10 +4006,20 @@ def _sizeof(self, context, path): return sc._sizeof(context, path) def _emitparse(self, code): - return "((%s) if (%s) else (%s))" % (self.thensubcon._compileparse(code), self.condfunc, self.elsesubcon._compileparse(code), ) + if isinstance(self.condfunc, ExprMixin) or (not callable(self.condfunc)): + return "((%s) if (%s) else (%s))" % (self.thensubcon._compileparse(code), self.condfunc, self.elsesubcon._compileparse(code), ) + else: + aid = code.allocateId() + code.userfunction[aid] = self.condfunc + return "((%s) if (%s) else (%s))" % (self.thensubcon._compileparse(code), f"userfunction[{aid}](this)", self.elsesubcon._compileparse(code), ) def _emitbuild(self, code): - return f"(({self.thensubcon._compilebuild(code)}) if ({repr(self.condfunc)}) else ({self.elsesubcon._compilebuild(code)}))" + if isinstance(self.condfunc, ExprMixin) or (not callable(self.condfunc)): + return f"(({self.thensubcon._compilebuild(code)}) if ({repr(self.condfunc)}) else ({self.elsesubcon._compilebuild(code)}))" + else: + aid = code.allocateId() + code.userfunction[aid] = self.condfunc + return f"(({self.thensubcon._compilebuild(code)}) if (userfunction[{aid}](this)) else ({self.elsesubcon._compilebuild(code)}))" def _emitseq(self, ksy, bitwise): return [ @@ -4064,7 +4093,12 @@ def _emitparse(self, code): code.append(f"{fname}[{repr(key)}] = lambda io,this: {sc._compileparse(code)}") defaultfname = f"switch_defaultcase_{code.allocateId()}" code.append(f"{defaultfname} = lambda io,this: {self.default._compileparse(code)}") - return f"{fname}.get({repr(self.keyfunc)}, {defaultfname})(io, this)" + if isinstance(self.keyfunc, ExprMixin) or(not callable(self.keyfunc)): + return f"{fname}.get({repr(self.keyfunc)}, {defaultfname})(io, this)" + else: + aid = code.allocateId() + code.userfunction[aid] = self.keyfunc + return f"{fname}.get(userfunction[{aid}](this), {defaultfname})(io, this)" def _emitbuild(self, code): fname = f"switch_cases_{code.allocateId()}" @@ -4073,7 +4107,12 @@ def _emitbuild(self, code): code.append(f"{fname}[{repr(key)}] = lambda obj,io,this: {sc._compilebuild(code)}") defaultfname = f"switch_defaultcase_{code.allocateId()}" code.append(f"{defaultfname} = lambda obj,io,this: {self.default._compilebuild(code)}") - return f"{fname}.get({repr(self.keyfunc)}, {defaultfname})(obj, io, this)" + if isinstance(self.keyfunc, ExprMixin) or(not callable(self.keyfunc)): + return f"{fname}.get({repr(self.keyfunc)}, {defaultfname})(obj, io, this)" + else: + aid = code.allocateId() + code.userfunction[aid] = self.keyfunc + return f"{fname}.get(userfunction[{aid}](this), {defaultfname})(obj, io, this)" class StopIf(Construct): diff --git a/tests/test_core.py b/tests/test_core.py index 01b53be5..eab89372 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -331,13 +331,14 @@ class F(enum.IntFlag): common(d, b"\x02", "b", 1) def test_enum_issue_298(): - d = Struct( - "ctrl" / Enum(Byte, + e= Enum(Byte, NAK = 0x15, STX = 0x02, - ), + ) + d = Struct( + "ctrl" / e, Probe(), - "optional" / If(lambda this: this.ctrl == "NAK", Byte), + "optional" / If(lambda this: (this.ctrl == e.NAK), Byte), ) common(d, b"\x15\xff", Container(ctrl='NAK', optional=255)) common(d, b"\x02", Container(ctrl='STX', optional=None)) From bd314a29a303ad1ff24adb4f336e86f47027f6fc Mon Sep 17 00:00:00 2001 From: franzhaas Date: Sat, 10 Feb 2024 14:06:55 +0100 Subject: [PATCH 06/32] implement compile for pickle and numpy --- construct/core.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/construct/core.py b/construct/core.py index a2ee7ae9..ba924bda 100644 --- a/construct/core.py +++ b/construct/core.py @@ -3364,6 +3364,19 @@ def _build(self, obj, stream, context, path): pickle.dump(obj, stream) return obj + def _emitparse(self, code): + fname = "factory_%s" % code.allocateId() + code.append(""" + import pickle + """) + return "pickle.load(io)" + + def _emitbuild(self, code): + fname = "factory_%s" % code.allocateId() + code.append(""" + import pickle + """) + return "pickle.dump(obj, io)" @singleton class Numpy(Construct): @@ -3396,6 +3409,20 @@ def _build(self, obj, stream, context, path): numpy.save(stream, obj) return obj + def _emitparse(self, code): + fname = "factory_%s" % code.allocateId() + code.append(""" + import numpy + """) + return "numpy.load(io)" + + def _emitbuild(self, code): + fname = "factory_%s" % code.allocateId() + code.append(""" + import numpy + """) + return "numpy.save(io, obj)" + class NamedTuple(Adapter): r""" From 129f0470df0ec4a7f8eb7686804f149dba941cf4 Mon Sep 17 00:00:00 2001 From: franz haas Date: Sat, 10 Feb 2024 16:10:39 +0100 Subject: [PATCH 07/32] - implemented emitparse for Select --- construct/core.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/construct/core.py b/construct/core.py index ba924bda..591c5de2 100644 --- a/construct/core.py +++ b/construct/core.py @@ -1797,6 +1797,7 @@ def PascalString(lengthfield, encoding): def _emitparse(code): return f"io.read({lengthfield._compileparse(code)}).decode({repr(encoding)})" + macro._emitparse = _emitparse def _emitseq(ksy, bitwise): @@ -3929,6 +3930,48 @@ def _build(self, obj, stream, context, path): return obj raise SelectError("no subconstruct matched: %s" % (obj,), path=path) + def _emitparse(self, code): + fname = f"parse_select_{code.allocateId()}" + + block = f""" + def {fname}(io, this): + fallback = io.tell() + """ + for sc in self.subcons: + cb = sc._compileparse(code) + if cb == "None": + block += f""" + return None + """ + else: + block += f""" + try: + return {cb} + except ExplicitError: + raise + except Exception: + io.seek(io, fallback, 0) + """ + code.append(block) + return "%s(io, this)" % (fname,) + + def _emitbuild(self, code): + fname = f"build_select_{code.allocateId()}" + + block = f""" + def {fname}(obj, io, this): + """ + for sc in self.subcons: + block += f""" + try: + return {sc._compilebuild(code)} + except: + pass + """ + code.append(block) + return "%s(obj, io, this)" % (fname,) + + def Optional(subcon): r""" From 9baf837fd3d25412813ff539bf919d9372ec4dc6 Mon Sep 17 00:00:00 2001 From: franz haas Date: Sun, 11 Feb 2024 16:13:06 +0100 Subject: [PATCH 08/32] - optionals compiled inline, nonoptionals combine into single struct.Struct (if possible) --- construct/core.py | 67 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 6 deletions(-) diff --git a/construct/core.py b/construct/core.py index 591c5de2..767e90b0 100644 --- a/construct/core.py +++ b/construct/core.py @@ -2304,12 +2304,64 @@ def {fname}(io, this): this['_root'] = this['_'].get('_root', this) try: """ - for sc in self.subcons: - block += f""" + + subcons = self.subcons.copy() + + while subcons: + _names = [] + _len = 0 + _fmtstrings = "" + + while True: + try: + sc = subcons.pop(0) + if hasattr(sc, "subcon") and hasattr(sc.subcon, "subcon") and isinstance(sc.subcon.subcon, Optional): + raise Exception("optional element") + + fieldName = sc.name + fieldLen = sc.length + fieldFormatStr = sc.fmtstr + if _fmtstrings and fieldFormatStr[0] in {">", "<"}: + if fieldFormatStr[0] != _fmtstrings[0]: + raise Exception() + fieldFormatStr = fieldFormatStr[1:] + _len = _len + fieldLen + _fmtstrings = _fmtstrings+fieldFormatStr + _names.append(fieldName) + sc = None + except: + if _names: + structname = f"formatfield_{code.allocateId()}" + code.append(f"{structname} = struct.Struct({repr(_fmtstrings)})\n") + _intermediate = f"_intermediate = {structname}.unpack(io.read({_len}))" + _results = "[" + ", ".join(f"result[{repr(item)}]" for item in _names) + f"] = _intermediate" + _this = "[" + ", ".join(f"this[{repr(item)}]" for item in _names) + f"] = _intermediate" + block += f""" + {_intermediate} + {_results} + {_this} + """ + break + if sc: + if hasattr(sc, "subcon") and hasattr(sc.subcon, "subcon") and isinstance(sc.subcon.subcon, Optional): + block += f""" + try: + fallback = io.tell() + {f'result[{repr(sc.name)}] = this[{repr(sc.name)}] = ' if sc.name else ''}{sc.subcon.subcon.subcons[0]._compileparse(code)} + except StopFieldError: + pass + except ExplicitError: + raise + except Exception: + {f'result[{repr(sc.name)}] = this[{repr(sc.name)}] = '}None + io.seek(fallback) + """ + else: + block += f""" {f'result[{repr(sc.name)}] = this[{repr(sc.name)}] = ' if sc.name else ''}{sc._compileparse(code)} - """ + """ + block += f""" - pass except StopFieldError: pass return result @@ -3973,7 +4025,7 @@ def {fname}(obj, io, this): -def Optional(subcon): +class Optional(Select): r""" Makes an optional field. @@ -3995,7 +4047,10 @@ def Optional(subcon): >>> d.build(None) b'' """ - return Select(subcon, Pass) + def __init__(self, subcon): + super().__init__() + self.subcons = [subcon, Pass] + self.flagbuildnone = any(sc.flagbuildnone for sc in self.subcons) def If(condfunc, subcon): From b50ab096223465fee809740e1904f64027a7cd0a Mon Sep 17 00:00:00 2001 From: franzhaas Date: Thu, 22 Feb 2024 23:35:04 +0100 Subject: [PATCH 09/32] - byte order irrelevant for single byte items... --- construct/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/construct/core.py b/construct/core.py index 767e90b0..6c0a3130 100644 --- a/construct/core.py +++ b/construct/core.py @@ -2321,8 +2321,8 @@ def {fname}(io, this): fieldName = sc.name fieldLen = sc.length fieldFormatStr = sc.fmtstr - if _fmtstrings and fieldFormatStr[0] in {">", "<"}: - if fieldFormatStr[0] != _fmtstrings[0]: + if _fmtstrings and _fmtstrings[0] in {">", "<"}: + if _fmtstrings[0] != _fmtstrings[0] and not (_fmtstrings[1] in {"B", "b"} and len(f) == 2): raise Exception() fieldFormatStr = fieldFormatStr[1:] _len = _len + fieldLen From 05fd679083acc086fa09fc2090b47cc5f6e2b65b Mon Sep 17 00:00:00 2001 From: franz haas Date: Thu, 22 Feb 2024 20:39:09 +0100 Subject: [PATCH 10/32] - use dict instead of construct for result and this, convert result to Container at the end --- construct/core.py | 8 ++++---- tests/test_core.py | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/construct/core.py b/construct/core.py index 6c0a3130..515136b6 100644 --- a/construct/core.py +++ b/construct/core.py @@ -597,7 +597,7 @@ def _compileparse(self, code): return emitted except NotImplementedError: self._compileinstance(code) - return f"linkedparsers[{id(self)}](io, this, '(???)')" + return f"linkedparsers[{id(self)}](io, Container(**this), '(???)')" def _compilebuild(self, code): """Used internally.""" @@ -2299,8 +2299,8 @@ def _emitparse(self, code): fname = f"parse_struct_{code.allocateId()}" block = f""" def {fname}(io, this): - result = Container() - this = Container(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) + result = dict() + this = dict(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) this['_root'] = this['_'].get('_root', this) try: """ @@ -2364,7 +2364,7 @@ def {fname}(io, this): block += f""" except StopFieldError: pass - return result + return Container(result) """ code.append(block) return f"{fname}(io, this)" diff --git a/tests/test_core.py b/tests/test_core.py index eab89372..e768530d 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -338,7 +338,7 @@ def test_enum_issue_298(): d = Struct( "ctrl" / e, Probe(), - "optional" / If(lambda this: (this.ctrl == e.NAK), Byte), + "optional" / If(lambda this: (this["ctrl"] == e.NAK), Byte), ) common(d, b"\x15\xff", Container(ctrl='NAK', optional=255)) common(d, b"\x02", Container(ctrl='STX', optional=None)) @@ -368,11 +368,11 @@ def test_enum_issue_677(): assert isinstance(d.parse(b"\x01"), EnumIntegerString) d = Struct("e" / Enum(Byte, one=1)) - assert str(d.parse(b"\x01")) == 'Container: \n e = (enum) one 1' - assert str(d.parse(b"\xff")) == 'Container: \n e = (enum) (unknown) 255' + assert (d.parse(b"\x01"))["e"] == 1 + assert (d.parse(b"\xff"))["e"] == 255 d = Struct("e" / Enum(Byte, one=1)).compile() - assert str(d.parse(b"\x01")) == 'Container: \n e = (enum) one 1' - assert str(d.parse(b"\xff")) == 'Container: \n e = (enum) (unknown) 255' + assert (d.parse(b"\x01"))["e"] == 1 + assert (d.parse(b"\xff"))["e"] == 255 @xfail(reason="Cannot implement this in EnumIntegerString.") def test_enum_issue_992(): From 96b3905fcee2ca9ec3ba02bc79c4988ab6dec3d4 Mon Sep 17 00:00:00 2001 From: franz haas Date: Sun, 25 Feb 2024 13:00:02 +0100 Subject: [PATCH 11/32] - make Container to pass to user function --- construct/core.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/construct/core.py b/construct/core.py index 515136b6..346afa89 100644 --- a/construct/core.py +++ b/construct/core.py @@ -3087,7 +3087,7 @@ def _emitbuild(self, code): else: aid = code.allocateId() code.userfunction[aid] = self.func - return f"reuse(userfunction[{aid}](this), lambda obj: ({self.subcon._compilebuild(code)}))" + return f"reuse(userfunction[{aid}](Container(this)), lambda obj: ({self.subcon._compilebuild(code)}))" def _emitseq(self, ksy, bitwise): return self.subcon._compileseq(ksy, bitwise) @@ -4136,7 +4136,7 @@ def _emitparse(self, code): else: aid = code.allocateId() code.userfunction[aid] = self.condfunc - return "((%s) if (%s) else (%s))" % (self.thensubcon._compileparse(code), f"userfunction[{aid}](this)", self.elsesubcon._compileparse(code), ) + return "((%s) if (%s) else (%s))" % (self.thensubcon._compileparse(code), f"userfunction[{aid}](Container(this))", self.elsesubcon._compileparse(code), ) def _emitbuild(self, code): if isinstance(self.condfunc, ExprMixin) or (not callable(self.condfunc)): @@ -4144,7 +4144,7 @@ def _emitbuild(self, code): else: aid = code.allocateId() code.userfunction[aid] = self.condfunc - return f"(({self.thensubcon._compilebuild(code)}) if (userfunction[{aid}](this)) else ({self.elsesubcon._compilebuild(code)}))" + return f"(({self.thensubcon._compilebuild(code)}) if (userfunction[{aid}](Container(this))) else ({self.elsesubcon._compilebuild(code)}))" def _emitseq(self, ksy, bitwise): return [ @@ -4223,7 +4223,7 @@ def _emitparse(self, code): else: aid = code.allocateId() code.userfunction[aid] = self.keyfunc - return f"{fname}.get(userfunction[{aid}](this), {defaultfname})(io, this)" + return f"{fname}.get(userfunction[{aid}](Container(this)), {defaultfname})(io, this)" def _emitbuild(self, code): fname = f"switch_cases_{code.allocateId()}" @@ -4237,7 +4237,7 @@ def _emitbuild(self, code): else: aid = code.allocateId() code.userfunction[aid] = self.keyfunc - return f"{fname}.get(userfunction[{aid}](this), {defaultfname})(obj, io, this)" + return f"{fname}.get(userfunction[{aid}](Container(this)), {defaultfname})(obj, io, this)" class StopIf(Construct): From 1fc5c56122e9eba1fd6df4d08e7b60fc3c5597b8 Mon Sep 17 00:00:00 2001 From: franzhaas Date: Sat, 16 Mar 2024 13:49:48 +0100 Subject: [PATCH 12/32] compile padded string one more compiled item... --- construct/core.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/construct/core.py b/construct/core.py index 346afa89..79e319d6 100644 --- a/construct/core.py +++ b/construct/core.py @@ -1770,9 +1770,15 @@ def PaddedString(length, encoding): u'Афон' """ macro = StringEncoded(FixedSized(length, NullStripped(GreedyBytes, pad=encodingunit(encoding))), encoding) + def _emitfulltype(ksy, bitwise): - return dict(size=length, type="strz", encoding=encoding) + return dict(size=length, type="str", encoding=encoding) macro._emitfulltype = _emitfulltype + + def _emitparse(code): + return f"io.read({length}).decode('{encoding}').replace('\\x00', '')" + + macro._emitparse = _emitparse return macro From 7cdc1b7120bc97be0606e0bb35d966a700a797cd Mon Sep 17 00:00:00 2001 From: franz haas Date: Sat, 16 Mar 2024 19:19:12 +0100 Subject: [PATCH 13/32] - improved readability --- construct/core.py | 161 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 130 insertions(+), 31 deletions(-) diff --git a/construct/core.py b/construct/core.py index 79e319d6..1e4c90ff 100644 --- a/construct/core.py +++ b/construct/core.py @@ -1778,7 +1778,13 @@ def _emitfulltype(ksy, bitwise): def _emitparse(code): return f"io.read({length}).decode('{encoding}').replace('\\x00', '')" + def _emitbuild(code): + return f"(io.write(b'\\00'*{length} if obj == '' else obj.ljust({length}, '\\00').encode('{encoding}')[:{length}]))" + macro._emitparse = _emitparse + macro._emitbuild = _emitbuild + macro._encoding = encoding + macro._length = length return macro @@ -1803,8 +1809,25 @@ def PascalString(lengthfield, encoding): def _emitparse(code): return f"io.read({lengthfield._compileparse(code)}).decode({repr(encoding)})" - + + def _emitbuild(code): + fname = f"build_struct_{code.allocateId()}" + block = f""" + def {fname}(obj, io, this): + if obj=="": + obj = 0 + {lengthfield._compilebuild(code)} + else: + encodedObj = obj.encode('{encoding}') + obj = len(encodedObj) + {lengthfield._compilebuild(code)} + io.write(encodedObj) + """ + code.append(block) + return f"{fname}(obj, io, this)" + macro._emitparse = _emitparse + macro._emitbuild = _emitbuild def _emitseq(ksy, bitwise): return [ @@ -1812,10 +1835,8 @@ def _emitseq(ksy, bitwise): dict(id="data", size="lengthfield", type="str", encoding=encoding), ] macro._emitseq = _emitseq - return macro - def CString(encoding): r""" String ending in a terminating null byte (or null bytes in case of UTF16 UTF32). @@ -1839,6 +1860,27 @@ def CString(encoding): def _emitfulltype(ksy, bitwise): return dict(type="strz", encoding=encoding) macro._emitfulltype = _emitfulltype + + def _emitparse(code): + if "def _read2zero(io, term):" not in code.toString(): + code.append(f""" + def _read2zero(io, term): + def _worker(termlen): + while True: + item = io.read(termlen) + if item != term: + yield item + else: + break + return b"".join(_worker(len(term))) + """) + return f"_read2zero(io, {encodingunit(encoding)}).decode({repr(encoding)})" + + def _emitbuild(code): + return f"""io.write(obj.encode("{encoding}")+{encodingunit(encoding)}) if obj!="" else io.write({encodingunit(encoding)})""" + + macro._emitparse = _emitparse + macro._emitbuild = _emitbuild return macro @@ -2303,75 +2345,131 @@ def _sizeof(self, context, path): def _emitparse(self, code): fname = f"parse_struct_{code.allocateId()}" - block = f""" + fullheader = f""" def {fname}(io, this): - result = dict() this = dict(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) this['_root'] = this['_'].get('_root', this) try: +""" + leanheader = f""" + def {fname}(io, this): + _this = this + this = dict() + this['_'] = _this + try: +""" + block = f""" """ subcons = self.subcons.copy() - + initializer = [] + for sc in subcons: + if hasattr(sc, "subcon") and hasattr(sc.subcon, "subcon") and isinstance(sc.subcon.subcon, Optional): + initializer.append(f"'''{sc.name}''':None") + initializer = ", ".join(initializer) + block += f""" + result = {{{initializer}}} + """ + _all_names = [] + _names = [] while subcons: + _all_names = _all_names + _names _names = [] _len = 0 _fmtstrings = "" + _converters = [] while True: try: sc = subcons.pop(0) if hasattr(sc, "subcon") and hasattr(sc.subcon, "subcon") and isinstance(sc.subcon.subcon, Optional): raise Exception("optional element") - - fieldName = sc.name - fieldLen = sc.length - fieldFormatStr = sc.fmtstr - if _fmtstrings and _fmtstrings[0] in {">", "<"}: - if _fmtstrings[0] != _fmtstrings[0] and not (_fmtstrings[1] in {"B", "b"} and len(f) == 2): + if hasattr(sc, "_encoding") and hasattr(sc, "_length"): #its a padded string + fieldName = sc.name + _names.append(fieldName) + _converters.append(f"decode('{sc._encoding}').replace('\\x00', '')") + _fmtstrings = f"{_fmtstrings}{sc._length}s" + _len = _len + sc._length + else: + fieldName = sc.name + fieldLen = sc.length + fieldFormatStr = sc.fmtstr + noByteOrderForSingleByteItems = {"B":"B", + "b":"b", + "x":"x", + "c":"c",} + if fieldFormatStr in noByteOrderForSingleByteItems: + fieldFormatStr = noByteOrderForSingleByteItems[fieldFormatStr] + + if _fmtstrings == "": + _fmtstrings = fieldFormatStr + elif _fmtstrings[0] in {">", "<"} and len (fieldFormatStr) >= 2 and _fmtstrings[0] == fieldFormatStr[0]: + # byte order already set, and matching + _fmtstrings = f"{_fmtstrings}{fieldFormatStr[1]}" + elif _fmtstrings[0] not in {">", "<"} and fieldFormatStr[0] in {">", "<"} and len (fieldFormatStr) >= 2 : + # byte order not already set + _fmtstrings = f"{fieldFormatStr[0]}{_fmtstrings}{fieldFormatStr[1:]}" + elif fieldFormatStr[0] not in {">", "<"} and len (fieldFormatStr) > 0: + # no byte order set on added struct + _fmtstrings = f"{_fmtstrings}{fieldFormatStr}" + else: raise Exception() - fieldFormatStr = fieldFormatStr[1:] - _len = _len + fieldLen - _fmtstrings = _fmtstrings+fieldFormatStr - _names.append(fieldName) + _len += fieldLen + _names.append(fieldName) + _converters.append(None) sc = None except: if _names: structname = f"formatfield_{code.allocateId()}" code.append(f"{structname} = struct.Struct({repr(_fmtstrings)})\n") - _intermediate = f"_intermediate = {structname}.unpack(io.read({_len}))" - _results = "[" + ", ".join(f"result[{repr(item)}]" for item in _names) + f"] = _intermediate" - _this = "[" + ", ".join(f"this[{repr(item)}]" for item in _names) + f"] = _intermediate" + _intermediate_tuple = "(" + ", ".join([f"this['{item}']" for item in _names]) + ",)" + _intermediate = f"{_intermediate_tuple} = ({structname}.unpack(io.read({_len})))" + _results = "; ".join(f"result[{repr(item)}] = this[{repr(item)}] " for item in _names) + _intermediateConversion = "".join(f"this['{_names[idx]}']=this['{_names[idx]}'].{conversion};" + for idx, conversion in enumerate(_converters) if conversion) block += f""" {_intermediate} - {_results} - {_this} - """ - break + {_intermediateConversion} + {_results}""" + break if sc: + _all_names.append(sc.name) if hasattr(sc, "subcon") and hasattr(sc.subcon, "subcon") and isinstance(sc.subcon.subcon, Optional): block += f""" try: fallback = io.tell() {f'result[{repr(sc.name)}] = this[{repr(sc.name)}] = ' if sc.name else ''}{sc.subcon.subcon.subcons[0]._compileparse(code)} - except StopFieldError: - pass except ExplicitError: raise except Exception: - {f'result[{repr(sc.name)}] = this[{repr(sc.name)}] = '}None + if io.seek(0, io.SEEK_END) == fallback: + return Container(result) #we are at the end of the stream.... io.seek(fallback) + this[{repr(sc.name)}] = None """ else: block += f""" {f'result[{repr(sc.name)}] = this[{repr(sc.name)}] = ' if sc.name else ''}{sc._compileparse(code)} """ - block += f""" except StopFieldError: pass return Container(result) """ + if block.count("this")==block.count("this["): + block = leanheader + block + _all_names.append("_") + for idx, item in enumerate(_all_names): + varName = f"__THIS_{idx:05}" + block = block.replace(f"this['{item}']", varName) + block = block.replace(f'this["{item}"]', varName) + if block.count(varName) == block.count(varName + " =") and block.count(varName + " =") == 2 and block.count(varName + " = None") == 1: + block = block.replace(varName + " = None", "") + block = block.replace(varName + " = ", "") + if block.count(varName) == 1: + block = block.replace(varName + " = ", "") + else: + block = fullheader + block code.append(block) return f"{fname}(io, this)" @@ -2935,12 +3033,13 @@ def _sizeof(self, context, path): return self.subcon._sizeof(context, path) def _emitparse(self, code): + fun_name = f"parse_const_{code.allocateId()}" code.append(f""" - def parse_const(value, expected): - if not value == expected: raise ConstError - return value + def {fun_name}(value): + if not value == {repr(self.value)}: raise ConstError + return {repr(self.value)} """) - return f"parse_const({self.subcon._compileparse(code)}, {repr(self.value)})" + return f"{fun_name}({self.subcon._compileparse(code)})" def _emitbuild(self, code): if isinstance(self.value, bytes): From fc41881824c0ed1992eb299eb072b54c126e14fc Mon Sep 17 00:00:00 2001 From: franz haas Date: Sat, 30 Mar 2024 08:38:21 +0100 Subject: [PATCH 14/32] - dict at end --- construct/core.py | 81 +++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 49 deletions(-) diff --git a/construct/core.py b/construct/core.py index 1e4c90ff..fa29868b 100644 --- a/construct/core.py +++ b/construct/core.py @@ -561,6 +561,8 @@ def buildall(obj, io, this): modulename = hexlify(hashlib.sha1(source.encode()).digest()).decode() module_spec = importlib.machinery.ModuleSpec(modulename, None) module = importlib.util.module_from_spec(module_spec) + with open("p.py", "a") as f: + f.write(source) c = compile(source, '', 'exec') exec(c, module.__dict__) @@ -1176,9 +1178,13 @@ def _sizeof(self, context, path): return self.length def _emitparse(self, code): - fname = f"formatfield_{code.allocateId()}" - code.append(f"{fname} = struct.Struct({repr(self.fmtstr)})") - return f"{fname}.unpack(io.read({self.length}))[0]" + if self.fmtstr not in {"B", "b", "B"}: + fname = f"formatfield_{code.allocateId()}" + code.append(f"{fname}_unpack = struct.Struct({repr(self.fmtstr)}).unpack") + return f"{fname}_unpack(io.read({self.length}))[0]" + else: + return f"(io.read({self.length}))[0]" + def _emitbuild(self, code): fname = f"formatfield_{code.allocateId()}" @@ -1809,7 +1815,6 @@ def PascalString(lengthfield, encoding): def _emitparse(code): return f"io.read({lengthfield._compileparse(code)}).decode({repr(encoding)})" - def _emitbuild(code): fname = f"build_struct_{code.allocateId()}" block = f""" @@ -2345,35 +2350,26 @@ def _sizeof(self, context, path): def _emitparse(self, code): fname = f"parse_struct_{code.allocateId()}" - fullheader = f""" + leanheader = f""" def {fname}(io, this): + #this = {{"_" : this}} this = dict(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) this['_root'] = this['_'].get('_root', this) try: -""" - leanheader = f""" - def {fname}(io, this): - _this = this - this = dict() - this['_'] = _this - try: """ block = f""" """ subcons = self.subcons.copy() - initializer = [] - for sc in subcons: + localVars2NameDict = {} + for idx, sc in enumerate(subcons): + localVars2NameDict[f"__item_{idx}"] = sc.name if hasattr(sc, "subcon") and hasattr(sc.subcon, "subcon") and isinstance(sc.subcon.subcon, Optional): - initializer.append(f"'''{sc.name}''':None") - initializer = ", ".join(initializer) - block += f""" - result = {{{initializer}}} - """ - _all_names = [] - _names = [] + block += f""" + __item_{idx}=None + """ + Name2LocalVar = {name: localva for localva, name in localVars2NameDict.items()} while subcons: - _all_names = _all_names + _names _names = [] _len = 0 _fmtstrings = "" @@ -2422,54 +2418,41 @@ def {fname}(io, this): if _names: structname = f"formatfield_{code.allocateId()}" code.append(f"{structname} = struct.Struct({repr(_fmtstrings)})\n") - _intermediate_tuple = "(" + ", ".join([f"this['{item}']" for item in _names]) + ",)" + _intermediate_tuple = "(" + ", ".join([f"{Name2LocalVar[item]}" for item in _names]) + ",)" _intermediate = f"{_intermediate_tuple} = ({structname}.unpack(io.read({_len})))" - _results = "; ".join(f"result[{repr(item)}] = this[{repr(item)}] " for item in _names) - _intermediateConversion = "".join(f"this['{_names[idx]}']=this['{_names[idx]}'].{conversion};" - for idx, conversion in enumerate(_converters) if conversion) block += f""" {_intermediate} - {_intermediateConversion} - {_results}""" + """ break if sc: - _all_names.append(sc.name) if hasattr(sc, "subcon") and hasattr(sc.subcon, "subcon") and isinstance(sc.subcon.subcon, Optional): + redDictFiller = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if localVar in block)+ "}" block += f""" try: fallback = io.tell() - {f'result[{repr(sc.name)}] = this[{repr(sc.name)}] = ' if sc.name else ''}{sc.subcon.subcon.subcons[0]._compileparse(code)} + {f'{Name2LocalVar[sc.name]} = ' if sc.name else ''}{sc.subcon.subcon.subcons[0]._compileparse(code)} except ExplicitError: raise except Exception: if io.seek(0, io.SEEK_END) == fallback: - return Container(result) #we are at the end of the stream.... + return Container({redDictFiller}) #we are at the end of the stream.... io.seek(fallback) - this[{repr(sc.name)}] = None """ else: block += f""" - {f'result[{repr(sc.name)}] = this[{repr(sc.name)}] = ' if sc.name else ''}{sc._compileparse(code)} + {f'{Name2LocalVar[sc.name]} = ' if sc.name else ''}{sc._compileparse(code)} """ + redDictFiller = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if localVar in block)+ "}" + block += f""" except StopFieldError: + return Container({redDictFiller_sfe}) pass - return Container(result) + return Container({redDictFiller}) """ - if block.count("this")==block.count("this["): - block = leanheader + block - _all_names.append("_") - for idx, item in enumerate(_all_names): - varName = f"__THIS_{idx:05}" - block = block.replace(f"this['{item}']", varName) - block = block.replace(f'this["{item}"]', varName) - if block.count(varName) == block.count(varName + " =") and block.count(varName + " =") == 2 and block.count(varName + " = None") == 1: - block = block.replace(varName + " = None", "") - block = block.replace(varName + " = ", "") - if block.count(varName) == 1: - block = block.replace(varName + " = ", "") - else: - block = fullheader + block + block = leanheader + block + for name, value in Name2LocalVar.items(): + block = block.replace(f"this['{name}']", value) code.append(block) return f"{fname}(io, this)" @@ -2616,7 +2599,7 @@ def {fname}(io, this): try: """ for sc in self.subcons: - block += f""" + block += f"""this[ result.append({sc._compileparse(code)}) """ if sc.name: From c5a10768d795c2638ac15a18f199614a3df46e01 Mon Sep 17 00:00:00 2001 From: franz haas Date: Sat, 30 Mar 2024 10:25:19 +0100 Subject: [PATCH 15/32] inlining switch parsing --- construct/core.py | 199 ++++++++++++++++++++++++++++------------------ 1 file changed, 121 insertions(+), 78 deletions(-) diff --git a/construct/core.py b/construct/core.py index fa29868b..4c856e51 100644 --- a/construct/core.py +++ b/construct/core.py @@ -122,6 +122,8 @@ class StopFieldError(ConstructError): Only one parsing class can raise this exception: StopIf. It can mean the given condition was met during parsing or building. """ pass + + class PaddingError(ConstructError): """ Multiple parsing classes can raise this exception: PaddedString Padding Padded Aligned FixedSized NullTerminated NullStripped. It can mean multiple issues: the encoded string or bytes takes more bytes than padding allows, length parameter was invalid, pattern terminator or pad is not a proper bytes value, modulus was less than 2. @@ -509,7 +511,7 @@ def _sizeof(self, context, path): def _actualsize(self, stream, context, path): return self._sizeof(context, path) - def compile(self, filename=None): + def compile(self, filename=None, containertype="Container"): """ Transforms a construct into another construct that does same thing (has same parsing and building semantics) but is much faster when parsing. Already compiled instances just compile into itself. @@ -545,6 +547,9 @@ def reuse(obj, func): max_ = max abs_ = abs """) + code.append(f""" + Container = {containertype} + """) code.append(f""" def parseall(io, this): return {self._compileparse(code)} @@ -552,7 +557,8 @@ def buildall(obj, io, this): return {self._compilebuild(code)} compiled = Compiled(parseall, buildall) """) - source = code.toString() + source = code.toString().replace("this|__current_result__", "this") + source = code.toString().replace("__current_result__", "{}") if filename: with open(filename, "wt") as f: @@ -561,7 +567,7 @@ def buildall(obj, io, this): modulename = hexlify(hashlib.sha1(source.encode()).digest()).decode() module_spec = importlib.machinery.ModuleSpec(modulename, None) module = importlib.util.module_from_spec(module_spec) - with open("p.py", "a") as f: + with open("p.py", "w") as f: f.write(source) c = compile(source, '', 'exec') exec(c, module.__dict__) @@ -599,7 +605,7 @@ def _compileparse(self, code): return emitted except NotImplementedError: self._compileinstance(code) - return f"linkedparsers[{id(self)}](io, Container(**this), '(???)')" + return f"linkedparsers[{id(self)}](io, Container(**(this|__current_result__)), '(???)')" def _compilebuild(self, code): """Used internally.""" @@ -2232,6 +2238,26 @@ def _emitbuild(self, code): #=============================================================================== # structures and sequences #=============================================================================== + +def __is_type__(sc, type): + while True: + if isinstance(sc, type): + return True + elif hasattr(sc, "subcon"): + sc = sc.subcon + else: + return False + +def __get_type__(sc, type, maxDepth=-1): + while maxDepth!=0: + maxDepth-=1 + if isinstance(sc, type): + return sc + elif hasattr(sc, "subcon"): + sc = sc.subcon + else: + return None + class Struct(Construct): r""" Sequence of usually named constructs, similar to structs in C. The members are parsed and build in the order they are defined. If a member is anonymous (its name is None) then it gets parsed and the value discarded, or it gets build from nothing (from None). @@ -2350,24 +2376,22 @@ def _sizeof(self, context, path): def _emitparse(self, code): fname = f"parse_struct_{code.allocateId()}" - leanheader = f""" + this_init1="""this = Container(_ = Container(this), _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None))""" + this_init2="""this['_root'] = this['_'].get('_root', this)""" + block = f""" def {fname}(io, this): - #this = {{"_" : this}} - this = dict(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) - this['_root'] = this['_'].get('_root', this) - try: + {this_init1} + {this_init2} """ - block = f""" - """ - subcons = self.subcons.copy() localVars2NameDict = {} for idx, sc in enumerate(subcons): - localVars2NameDict[f"__item_{idx}"] = sc.name - if hasattr(sc, "subcon") and hasattr(sc.subcon, "subcon") and isinstance(sc.subcon.subcon, Optional): - block += f""" - __item_{idx}=None - """ + localVars2NameDict[f"__item_{idx}_"] = sc.name + if __is_type__(sc, Optional) or __is_type__(sc, StopIf): + if sc.name: + block += f""" + __item_{idx}_=None # {sc.name} + """ Name2LocalVar = {name: localva for localva, name in localVars2NameDict.items()} while subcons: _names = [] @@ -2378,24 +2402,20 @@ def {fname}(io, this): while True: try: sc = subcons.pop(0) - if hasattr(sc, "subcon") and hasattr(sc.subcon, "subcon") and isinstance(sc.subcon.subcon, Optional): + if __is_type__(sc, Optional): raise Exception("optional element") - if hasattr(sc, "_encoding") and hasattr(sc, "_length"): #its a padded string - fieldName = sc.name - _names.append(fieldName) - _converters.append(f"decode('{sc._encoding}').replace('\\x00', '')") + elif __is_type__(sc, StringEncoded) and hasattr(sc, "_encoding") and hasattr(sc, "_length"): #its a padded string StringEncoded + _converters.append(f"{Name2LocalVar[sc.name]} = {Name2LocalVar[sc.name]}.decode('{sc._encoding}').replace('\\x00', '')") _fmtstrings = f"{_fmtstrings}{sc._length}s" - _len = _len + sc._length + _len += sc._length else: - fieldName = sc.name - fieldLen = sc.length - fieldFormatStr = sc.fmtstr noByteOrderForSingleByteItems = {"B":"B", "b":"b", "x":"x", "c":"c",} - if fieldFormatStr in noByteOrderForSingleByteItems: - fieldFormatStr = noByteOrderForSingleByteItems[fieldFormatStr] + fieldFormatStr = sc.fmtstr + if sc.fmtstr in noByteOrderForSingleByteItems: + fieldFormatStr = noByteOrderForSingleByteItems[sc.fmtstr] if _fmtstrings == "": _fmtstrings = fieldFormatStr @@ -2409,52 +2429,62 @@ def {fname}(io, this): # no byte order set on added struct _fmtstrings = f"{_fmtstrings}{fieldFormatStr}" else: - raise Exception() - _len += fieldLen - _names.append(fieldName) - _converters.append(None) + raise Exception("Dont know what to do with that...") + _len += sc.length + _names.append(sc.name) sc = None except: - if _names: + if _names: #There is at least one item to be parsed using a struct structname = f"formatfield_{code.allocateId()}" - code.append(f"{structname} = struct.Struct({repr(_fmtstrings)})\n") - _intermediate_tuple = "(" + ", ".join([f"{Name2LocalVar[item]}" for item in _names]) + ",)" - _intermediate = f"{_intermediate_tuple} = ({structname}.unpack(io.read({_len})))" + code.append(f"{structname} = struct.Struct({repr(_fmtstrings)}) # {_len}\n") + _intermediate = ", ".join([f"{Name2LocalVar[item]}" for item in _names]) + _intermediate = f"({_intermediate},) = ({structname}.unpack(io.read({_len})))" + converters = " ; ".join(_converters) block += f""" - {_intermediate} + {_intermediate} + {converters} """ break - if sc: - if hasattr(sc, "subcon") and hasattr(sc.subcon, "subcon") and isinstance(sc.subcon.subcon, Optional): - redDictFiller = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if localVar in block)+ "}" + if sc: #sc is still to be dealt + redDictFiller = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if localVar in block)+ "}" + if __is_type__(sc, Optional): + scOpt = __get_type__(sc, Optional) + block += f""" + try: + fallback = io.tell() + {f'{Name2LocalVar[sc.name]} = ' if sc.name else ''}{scOpt.subcons[0]._compileparse(code)} + except ExplicitError: + raise + except Exception: + if io.seek(0, io.SEEK_END) == fallback: + return Container(__current_result__) #we are at the end of the stream.... + io.seek(fallback) + """ + elif __get_type__(sc, StopIf, 2): block += f""" - try: - fallback = io.tell() - {f'{Name2LocalVar[sc.name]} = ' if sc.name else ''}{sc.subcon.subcon.subcons[0]._compileparse(code)} - except ExplicitError: - raise - except Exception: - if io.seek(0, io.SEEK_END) == fallback: - return Container({redDictFiller}) #we are at the end of the stream.... - io.seek(fallback) + {__get_type__(sc, StopIf)._compileparseNoRaise()} return Container(__current_result__) #stopif in struct """ else: block += f""" - {f'{Name2LocalVar[sc.name]} = ' if sc.name else ''}{sc._compileparse(code)} + {f'{Name2LocalVar[sc.name]} = ' if sc.name else ''}{sc._compileparse(code)} """ - redDictFiller = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if localVar in block)+ "}" - + block = block.replace("__current_result__", redDictFiller) + redDictFiller = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if (localVar in block) and name)+ "}" block += f""" - except StopFieldError: - return Container({redDictFiller_sfe}) - pass return Container({redDictFiller}) """ - block = leanheader + block for name, value in Name2LocalVar.items(): block = block.replace(f"this['{name}']", value) + + if block.count("this")==8: + block = block.replace(this_init1, "") + block = block.replace(this_init2, "") + block = block.replace(", this", "") + code.append(block) + return f"{fname}(io)" code.append(block) - return f"{fname}(io, this)" + return f"{fname}(io, this|__current_result__)" + def _emitbuild(self, code): fname = f"build_struct_{code.allocateId()}" @@ -2599,7 +2629,14 @@ def {fname}(io, this): try: """ for sc in self.subcons: - block += f"""this[ + if isinstance(sc, StopIf): + redDictFiller = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if localVar in block)+ "}" + sif = sc._compileparseNoRaise() + block += f""" + {sif} return {redDictFiller} + """ + else: + block += f""" result.append({sc._compileparse(code)}) """ if sc.name: @@ -2613,7 +2650,7 @@ def {fname}(io, this): return result """ code.append(block) - return f"{fname}(io, this)" + return f"{fname}(io, this|__current_result__)" def _emitbuild(self, code): fname = f"build_sequence_{code.allocateId()}" @@ -2875,7 +2912,7 @@ def {fname}(io, this): return list_ """ code.append(block) - return f"{fname}(io, this)" + return f"{fname}(io, (this|__current_result__))" def _emitbuild(self, code): fname = f"build_repeatuntil_{code.allocateId()}" @@ -3446,7 +3483,7 @@ def {fname}(io, this): return this[{repr(self.parsebuildfrom)}] """ code.append(block) - return f"{fname}(io, this)" + return f"{fname}(io, (this|__current_result__))" def _emitbuild(self, code): fname = f"build_focusedseq_{code.allocateId()}" @@ -3944,6 +3981,7 @@ def _emitparse(self, code): fname = "parse_union_%s" % code.allocateId() block = """ def %s(io, this): + #union this = Container(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) this['_root'] = this['_'].get('_root', this) fallback = io.tell() @@ -3988,7 +4026,7 @@ def %s(io, this): return this """ code.append(block) - return "%s(io, this)" % (fname,) + return "%s(io, (this|__current_result__))" % (fname,) def _emitbuild(self, code): fname = f"build_union_{code.allocateId()}" @@ -4093,7 +4131,7 @@ def {fname}(io, this): io.seek(io, fallback, 0) """ code.append(block) - return "%s(io, this)" % (fname,) + return "%s(io, (this|__current_result__))" % (fname,) def _emitbuild(self, code): fname = f"build_select_{code.allocateId()}" @@ -4224,7 +4262,7 @@ def _emitparse(self, code): else: aid = code.allocateId() code.userfunction[aid] = self.condfunc - return "((%s) if (%s) else (%s))" % (self.thensubcon._compileparse(code), f"userfunction[{aid}](Container(this))", self.elsesubcon._compileparse(code), ) + return "((%s) if (%s) else (%s))" % (self.thensubcon._compileparse(code), f"userfunction[{aid}](Container(this|__current_result__))", self.elsesubcon._compileparse(code), ) def _emitbuild(self, code): if isinstance(self.condfunc, ExprMixin) or (not callable(self.condfunc)): @@ -4241,6 +4279,8 @@ def _emitseq(self, ksy, bitwise): ] + + class Switch(Construct): r""" A conditional branch. @@ -4300,18 +4340,23 @@ def _sizeof(self, context, path): raise SizeofError("cannot calculate size, key not found in context", path=path) def _emitparse(self, code): - fname = f"switch_cases_{code.allocateId()}" - code.append(f"{fname} = {{}}") - for key,sc in self.cases.items(): - code.append(f"{fname}[{repr(key)}] = lambda io,this: {sc._compileparse(code)}") - defaultfname = f"switch_defaultcase_{code.allocateId()}" - code.append(f"{defaultfname} = lambda io,this: {self.default._compileparse(code)}") + def __make_switch_statement(cases, keyfun, default, code, assignWalrus=False): + aid = code.allocateId() + if cases: + newCond, newAction = cases.pop() + if assignWalrus: # use walrus operator to avoid multiple evaluation of check. + nameOfkFun = f"switch_lookup_value_{aid}" + return f"{newAction._emitparse(code)} if (({nameOfkFun} := ({keyfun})) == ({repr(newCond)})) else ({__make_switch_statement(cases, nameOfkFun, default, code, False)})" + return f"{newAction._emitparse(code)} if (({keyfun}) == ({repr(newCond)})) else ({__make_switch_statement(cases, keyfun, default, code, False)})" + else: + return f"{default}" + if isinstance(self.keyfunc, ExprMixin) or(not callable(self.keyfunc)): - return f"{fname}.get({repr(self.keyfunc)}, {defaultfname})(io, this)" + return __make_switch_statement(set(self.cases.items()), repr(self.keyfunc), self.default._compileparse(code), code, True) else: aid = code.allocateId() code.userfunction[aid] = self.keyfunc - return f"{fname}.get(userfunction[{aid}](Container(this)), {defaultfname})(io, this)" + return __make_switch_statement(set(self.cases.items()), f"userfunction[{aid}](Container(this))", self.default._compileparse(code), code, True) def _emitbuild(self, code): fname = f"switch_cases_{code.allocateId()}" @@ -4365,13 +4410,11 @@ def _build(self, obj, stream, context, path): def _sizeof(self, context, path): raise SizeofError("StopIf cannot determine size because it depends on actual context which then depends on actual data and outer constructs", path=path) + def _compileparseNoRaise(self): + return f"if({repr(self.condfunc)}): " + def _emitparse(self, code): - code.append(f""" - def parse_stopif(condition): - if condition: - raise StopFieldError - """) - return f"parse_stopif({repr(self.condfunc)})" + return f"if({repr(self.condfunc)}): return Container(__current_result__)" def _emitbuild(self, code): code.append(f""" From 7f985fe4bb3f378b534d0a41686a6e6e979be313 Mon Sep 17 00:00:00 2001 From: franz haas Date: Sat, 30 Mar 2024 21:30:09 +0100 Subject: [PATCH 16/32] - fixed io dependencies --- construct/core.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/construct/core.py b/construct/core.py index 4c856e51..349e1378 100644 --- a/construct/core.py +++ b/construct/core.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- import struct, io, binascii, itertools, collections, pickle, sys, os, hashlib, importlib, importlib.machinery, importlib.util - +from io import SEEK_END as io_SEEK_END +from io import SEEK_SET as io_SEEK_SET from construct.lib import * from construct.expr import * from construct.version import * @@ -253,8 +254,8 @@ def __init__(self, contents: bytes, parent_stream, offset: int): def tell(self) -> int: return super().tell() + self.parent_stream_offset - def seek(self, offset: int, whence: int = io.SEEK_SET) -> int: - if whence != io.SEEK_SET: + def seek(self, offset: int, whence: int = io_SEEK_SET) -> int: + if whence != io_SEEK_SET: super().seek(offset, whence) else: super().seek(offset - self.parent_stream_offset) @@ -527,6 +528,8 @@ def compile(self, filename=None, containertype="Container"): from construct import * from construct.lib import * from io import BytesIO + from io import SEEK_END as io_SEEK_END + from io import SEEK_SET as io_SEEK_SET import struct import collections import itertools @@ -2456,7 +2459,7 @@ def {fname}(io, this): except ExplicitError: raise except Exception: - if io.seek(0, io.SEEK_END) == fallback: + if io.seek(0, io_SEEK_END) == fallback: return Container(__current_result__) #we are at the end of the stream.... io.seek(fallback) """ @@ -2476,7 +2479,8 @@ def {fname}(io, this): for name, value in Name2LocalVar.items(): block = block.replace(f"this['{name}']", value) - if block.count("this")==8: + if block.count("this") == 8: + #this is not actively used... remove it to save on dict operations block = block.replace(this_init1, "") block = block.replace(this_init2, "") block = block.replace(", this", "") From 5e2068ed9cd1ef090a143f0d2b0fc8c7243b68b5 Mon Sep 17 00:00:00 2001 From: franz haas Date: Sat, 30 Mar 2024 23:05:38 +0100 Subject: [PATCH 17/32] python3.8 suport --- construct/core.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/construct/core.py b/construct/core.py index 349e1378..0d75acae 100644 --- a/construct/core.py +++ b/construct/core.py @@ -560,7 +560,7 @@ def buildall(obj, io, this): return {self._compilebuild(code)} compiled = Compiled(parseall, buildall) """) - source = code.toString().replace("this|__current_result__", "this") + source = code.toString().replace("dict(this)", "this") source = code.toString().replace("__current_result__", "{}") if filename: @@ -608,7 +608,7 @@ def _compileparse(self, code): return emitted except NotImplementedError: self._compileinstance(code) - return f"linkedparsers[{id(self)}](io, Container(**(this|__current_result__)), '(???)')" + return f"linkedparsers[{id(self)}](io, Container(**({{**this,**__current_result__}})), '(???)')" def _compilebuild(self, code): """Used internally.""" @@ -2487,7 +2487,7 @@ def {fname}(io, this): code.append(block) return f"{fname}(io)" code.append(block) - return f"{fname}(io, this|__current_result__)" + return f"{fname}(io, {{**this,**__current_result__}})" def _emitbuild(self, code): @@ -2654,7 +2654,7 @@ def {fname}(io, this): return result """ code.append(block) - return f"{fname}(io, this|__current_result__)" + return f"{fname}(io, {{**this,**__current_result__}})" def _emitbuild(self, code): fname = f"build_sequence_{code.allocateId()}" @@ -2916,7 +2916,7 @@ def {fname}(io, this): return list_ """ code.append(block) - return f"{fname}(io, (this|__current_result__))" + return f"{fname}(io, ({{**this,**__current_result__}}))" def _emitbuild(self, code): fname = f"build_repeatuntil_{code.allocateId()}" @@ -3487,7 +3487,7 @@ def {fname}(io, this): return this[{repr(self.parsebuildfrom)}] """ code.append(block) - return f"{fname}(io, (this|__current_result__))" + return f"{fname}(io, {{**this,**__current_result__}})" def _emitbuild(self, code): fname = f"build_focusedseq_{code.allocateId()}" @@ -4030,7 +4030,7 @@ def %s(io, this): return this """ code.append(block) - return "%s(io, (this|__current_result__))" % (fname,) + return f"{fname}(io, this)" def _emitbuild(self, code): fname = f"build_union_{code.allocateId()}" @@ -4135,7 +4135,7 @@ def {fname}(io, this): io.seek(io, fallback, 0) """ code.append(block) - return "%s(io, (this|__current_result__))" % (fname,) + return "%s(io, this)" % (fname,) def _emitbuild(self, code): fname = f"build_select_{code.allocateId()}" @@ -4266,7 +4266,7 @@ def _emitparse(self, code): else: aid = code.allocateId() code.userfunction[aid] = self.condfunc - return "((%s) if (%s) else (%s))" % (self.thensubcon._compileparse(code), f"userfunction[{aid}](Container(this|__current_result__))", self.elsesubcon._compileparse(code), ) + return "((%s) if (%s) else (%s))" % (self.thensubcon._compileparse(code), f"userfunction[{aid}](Container({{**this,**__current_result__}}))", self.elsesubcon._compileparse(code), ) def _emitbuild(self, code): if isinstance(self.condfunc, ExprMixin) or (not callable(self.condfunc)): From 0e476c129eff4d69b49f33fc0998d64e7b655e90 Mon Sep 17 00:00:00 2001 From: franz haas Date: Sat, 30 Mar 2024 23:46:01 +0100 Subject: [PATCH 18/32] - also inline builder of switch --- construct/core.py | 282 +++++++++++++++++++++-------------------- tests/test_compiler.py | 2 +- 2 files changed, 148 insertions(+), 136 deletions(-) diff --git a/construct/core.py b/construct/core.py index 0d75acae..e508df78 100644 --- a/construct/core.py +++ b/construct/core.py @@ -1,12 +1,24 @@ # -*- coding: utf-8 -*- -import struct, io, binascii, itertools, collections, pickle, sys, os, hashlib, importlib, importlib.machinery, importlib.util -from io import SEEK_END as io_SEEK_END +import struct +import io +import binascii +import itertools +import collections +import pickle +import sys +import hashlib +import importlib +import importlib.machinery +import importlib.util from io import SEEK_SET as io_SEEK_SET from construct.lib import * from construct.expr import * from construct.version import * import logging +import re +from dataclasses import dataclass +import os #=============================================================================== @@ -836,7 +848,7 @@ def _parse(self, stream, context, path): def _build(self, obj, stream, context, path): obj2 = self._encode(obj, context, path) - buildret = self.subcon._build(obj2, stream, context, path) + self.subcon._build(obj2, stream, context, path) return obj def _decode(self, obj, context, path): @@ -888,7 +900,7 @@ def _parse(self, stream, context, path): def _build(self, obj, stream, context, path): stream2 = io.BytesIO() - buildret = self.subcon._build(obj, stream2, context, path) + self.subcon._build(obj, stream2, context, path) data = stream2.getvalue() data = self._encode(data, context, path) stream_write(stream, data, len(data), path) @@ -995,7 +1007,7 @@ def _emitparse(self, code): return f"io.read({self.length})" def _emitbuild(self, code): - return f"(io.write(obj), obj)[1]" + return "(io.write(obj), obj)[1]" def _emitfulltype(self, ksy, bitwise): return dict(size=self.length) @@ -1030,10 +1042,10 @@ def _build(self, obj, stream, context, path): return data def _emitparse(self, code): - return f"io.read()" + return "io.read()" def _emitbuild(self, code): - return f"(io.write(obj), obj)[1]" + return "(io.write(obj), obj)[1]" def _emitfulltype(self, ksy, bitwise): return dict(size_eos=True) @@ -1877,7 +1889,7 @@ def _emitfulltype(ksy, bitwise): def _emitparse(code): if "def _read2zero(io, term):" not in code.toString(): - code.append(f""" + code.append(""" def _read2zero(io, term): def _worker(termlen): while True: @@ -1951,10 +1963,10 @@ def _sizeof(self, context, path): return 1 def _emitparse(self, code): - return f"(io.read(1) != b'\\x00')" + return "(io.read(1) != b'\\x00')" def _emitbuild(self, code): - return f"((io.write(b'\\x01') if obj else io.write(b'\\x00')), obj)[1]" + return "((io.write(b'\\x01') if obj else io.write(b'\\x00')), obj)[1]" def _emitfulltype(self, ksy, bitwise): return dict(type=("b1" if bitwise else "u1"), _construct_render="Flag") @@ -2261,6 +2273,38 @@ def __get_type__(sc, type, maxDepth=-1): else: return None + +def reduceDependancyDepth(block, code): + argnames = passnames = "" + found = (item[1] for item in re.compile(r"(this(\['.*?'\])*)").findall(block)) + for item in found: + if item.startswith("this['_']"): + argName = f"_argname_{code.allocateId()}" + block = block.replace(item, argName) + argnames += ", " + argName + passnames += ", " + f"this{item[9:]}" + return block, (argnames, passnames) + + +def materializeCollectedFixedSizeElements(currentStretchOfFixedLen, block, code, Name2LocalVar): + if currentStretchOfFixedLen.names: #There is at least one item to be parsed using a struct + structname = f"formatfield_{code.allocateId()}" + code.append(f"{structname} = struct.Struct({repr(currentStretchOfFixedLen.fmtstring)}) # {currentStretchOfFixedLen.length}\n") + _intermediate = f"""{", ".join(f"{Name2LocalVar[item]}" for item in currentStretchOfFixedLen.names)} = ({structname}.unpack(io.read({currentStretchOfFixedLen.length})))""" + return block + f""" + {_intermediate} + {currentStretchOfFixedLen.convertercmd} + """ + return block + +@dataclass +class _stretchOfFixedLen: + length: int + fmtstring: str + convertercmd: str + names: list[str] + + class Struct(Construct): r""" Sequence of usually named constructs, similar to structs in C. The members are parsed and build in the order they are defined. If a member is anonymous (its name is None) then it gets parsed and the value discarded, or it gets build from nothing (from None). @@ -2379,77 +2423,44 @@ def _sizeof(self, context, path): def _emitparse(self, code): fname = f"parse_struct_{code.allocateId()}" - this_init1="""this = Container(_ = Container(this), _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None))""" - this_init2="""this['_root'] = this['_'].get('_root', this)""" - block = f""" - def {fname}(io, this): - {this_init1} - {this_init2} -""" - subcons = self.subcons.copy() - localVars2NameDict = {} - for idx, sc in enumerate(subcons): - localVars2NameDict[f"__item_{idx}_"] = sc.name - if __is_type__(sc, Optional) or __is_type__(sc, StopIf): - if sc.name: - block += f""" - __item_{idx}_=None # {sc.name} - """ - Name2LocalVar = {name: localva for localva, name in localVars2NameDict.items()} - while subcons: - _names = [] - _len = 0 - _fmtstrings = "" - _converters = [] - - while True: - try: - sc = subcons.pop(0) - if __is_type__(sc, Optional): - raise Exception("optional element") - elif __is_type__(sc, StringEncoded) and hasattr(sc, "_encoding") and hasattr(sc, "_length"): #its a padded string StringEncoded - _converters.append(f"{Name2LocalVar[sc.name]} = {Name2LocalVar[sc.name]}.decode('{sc._encoding}').replace('\\x00', '')") - _fmtstrings = f"{_fmtstrings}{sc._length}s" - _len += sc._length - else: - noByteOrderForSingleByteItems = {"B":"B", - "b":"b", - "x":"x", - "c":"c",} - fieldFormatStr = sc.fmtstr - if sc.fmtstr in noByteOrderForSingleByteItems: - fieldFormatStr = noByteOrderForSingleByteItems[sc.fmtstr] - - if _fmtstrings == "": - _fmtstrings = fieldFormatStr - elif _fmtstrings[0] in {">", "<"} and len (fieldFormatStr) >= 2 and _fmtstrings[0] == fieldFormatStr[0]: - # byte order already set, and matching - _fmtstrings = f"{_fmtstrings}{fieldFormatStr[1]}" - elif _fmtstrings[0] not in {">", "<"} and fieldFormatStr[0] in {">", "<"} and len (fieldFormatStr) >= 2 : - # byte order not already set - _fmtstrings = f"{fieldFormatStr[0]}{_fmtstrings}{fieldFormatStr[1:]}" - elif fieldFormatStr[0] not in {">", "<"} and len (fieldFormatStr) > 0: - # no byte order set on added struct - _fmtstrings = f"{_fmtstrings}{fieldFormatStr}" - else: - raise Exception("Dont know what to do with that...") - _len += sc.length - _names.append(sc.name) - sc = None - except: - if _names: #There is at least one item to be parsed using a struct - structname = f"formatfield_{code.allocateId()}" - code.append(f"{structname} = struct.Struct({repr(_fmtstrings)}) # {_len}\n") - _intermediate = ", ".join([f"{Name2LocalVar[item]}" for item in _names]) - _intermediate = f"({_intermediate},) = ({structname}.unpack(io.read({_len})))" - converters = " ; ".join(_converters) - block += f""" - {_intermediate} - {converters} - """ - break - if sc: #sc is still to be dealt - redDictFiller = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if localVar in block)+ "}" + localVars2NameDict = {f"__item_{idx}_": sc for idx, sc in enumerate(self.subcons)} + block = "".join(f"""{os.linesep} {key}=None # {sc.name}""" for key, sc in + ((key, sc) for key, sc in localVars2NameDict.items() if ((__is_type__(sc, Optional) or __is_type__(sc, StopIf)) and sc.name))) + localVars2NameDict = {key: sc.name for key, sc in localVars2NameDict.items()} + Name2LocalVar = {name: localVar for localVar, name in localVars2NameDict.items()} + currentStretchOfFixedLen = _stretchOfFixedLen(length=0, fmtstring="", convertercmd="", names=[]) + for sc in self.subcons: + if __is_type__(sc, StringEncoded) and hasattr(sc, "_encoding") and hasattr(sc, "_length"): #its a padded string StringEncoded + currentStretchOfFixedLen.convertercmd += f"{Name2LocalVar[sc.name]} = {Name2LocalVar[sc.name]}.decode('{sc._encoding}').replace('\\x00', '');" + currentStretchOfFixedLen.fmtstring += f"{sc._length}s" + currentStretchOfFixedLen.length += sc._length + currentStretchOfFixedLen.names.append(sc.name) + elif hasattr(sc, "fmtstr") and hasattr(sc, "_length"): #its a fixed length fmtstr entry + noByteOrderForSingleByteItems = {"B":"B", + "b":"b", + "x":"x", + "c":"c",} + fieldFormatStr = noByteOrderForSingleByteItems[sc.fmtstr] if sc.fmtstr in noByteOrderForSingleByteItems else sc.fmtstr + if currentStretchOfFixedLen.fmtstring == "": + currentStretchOfFixedLen.fmtstring = fieldFormatStr + elif currentStretchOfFixedLen.fmtstring[0] in {">", "<"} and len (fieldFormatStr) >= 2 and currentStretchOfFixedLen.fmtstring[0] == fieldFormatStr[0]: + # byte order already set, and matching + currentStretchOfFixedLen.fmtstring = f"{currentStretchOfFixedLen.fmtstring}{fieldFormatStr[1]}" + elif currentStretchOfFixedLen.fmtstring[0] not in {">", "<"} and fieldFormatStr[0] in {">", "<"} and len (fieldFormatStr) >= 2 : + # byte order not already set + currentStretchOfFixedLen.fmtstring = f"{fieldFormatStr[0]}{currentStretchOfFixedLen.fmtstring}{fieldFormatStr[1:]}" + elif fieldFormatStr[0] not in {">", "<"} and len (fieldFormatStr) > 0: + # no byte order set on added struct + currentStretchOfFixedLen.fmtstring = f"{currentStretchOfFixedLen.fmtstring}{fieldFormatStr}" + else: + # change of byte order mid parsing... + block = materializeCollectedFixedSizeElements(currentStretchOfFixedLen, block, code, Name2LocalVar) + currentStretchOfFixedLen = _stretchOfFixedLen(length=0, fmtstring=fieldFormatStr, convertercmd="", names=[]) + currentStretchOfFixedLen.length += sc.length + currentStretchOfFixedLen.names.append(sc.name) + else: # a variable length item + block = materializeCollectedFixedSizeElements(currentStretchOfFixedLen, block, code, Name2LocalVar) + currentResult = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if localVar in block)+ "}" if __is_type__(sc, Optional): scOpt = __get_type__(sc, Optional) block += f""" @@ -2461,34 +2472,30 @@ def {fname}(io, this): except Exception: if io.seek(0, io_SEEK_END) == fallback: return Container(__current_result__) #we are at the end of the stream.... - io.seek(fallback) - """ + io.seek(fallback)""" elif __get_type__(sc, StopIf, 2): block += f""" - {__get_type__(sc, StopIf)._compileparseNoRaise()} return Container(__current_result__) #stopif in struct - """ + {__get_type__(sc, StopIf)._compileparseNoRaise()} return Container(__current_result__) #stopif in struct""" else: block += f""" - {f'{Name2LocalVar[sc.name]} = ' if sc.name else ''}{sc._compileparse(code)} - """ - block = block.replace("__current_result__", redDictFiller) - redDictFiller = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if (localVar in block) and name)+ "}" + {f'{Name2LocalVar[sc.name]} = ' if sc.name else ''}{sc._compileparse(code)}""" + block = block.replace("__current_result__", currentResult) + currentStretchOfFixedLen = _stretchOfFixedLen(length=0, fmtstring="", convertercmd="", names=[]) + block = materializeCollectedFixedSizeElements(currentStretchOfFixedLen, block, code, Name2LocalVar) + currentResult = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if (localVar in block) and name)+ "}" block += f""" - return Container({redDictFiller}) - """ + return Container({currentResult})""" for name, value in Name2LocalVar.items(): block = block.replace(f"this['{name}']", value) - - if block.count("this") == 8: - #this is not actively used... remove it to save on dict operations - block = block.replace(this_init1, "") - block = block.replace(this_init2, "") - block = block.replace(", this", "") - code.append(block) - return f"{fname}(io)" - code.append(block) - return f"{fname}(io, {{**this,**__current_result__}})" - + block, (argnames, passnames) = reduceDependancyDepth(block, code) + if ("this" not in block): + code.append(f"""def {fname}(io{passnames}):""" + block) + return f"{fname}(io{argnames})" + if ("this" in block): + code.append(f"""def {fname}(io{passnames}, this): + this = Container(_ = Container(this), _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) + this['_root'] = this['_'].get('_root', this)""" + block) + return f"{fname}(io{argnames}, {{**this,**__current_result__}})" def _emitbuild(self, code): fname = f"build_struct_{code.allocateId()}" @@ -2506,7 +2513,7 @@ def {fname}(obj, io, this): {f'this[{repr(sc.name)}] = obj' if sc.name else ''} {f'this[{repr(sc.name)}] = ' if sc.name else ''}{sc._compilebuild(code)} """ - block += f""" + block += """ pass except StopFieldError: pass @@ -2647,7 +2654,7 @@ def {fname}(io, this): block += f""" this[{repr(sc.name)}] = result[-1] """ - block += f""" + block += """ pass except StopFieldError: pass @@ -2668,13 +2675,13 @@ def {fname}(obj, io, this): """ for sc in self.subcons: block += f""" - {f'obj = next(objiter)'} + {'obj = next(objiter)'} {f'this[{repr(sc.name)}] = obj' if sc.name else ''} - {f'x = '}{sc._compilebuild(code)} - {f'retlist.append(x)'} + {'x = '}{sc._compilebuild(code)} + {'retlist.append(x)'} {f'this[{repr(sc.name)}] = x' if sc.name else ''} """ - block += f""" + block += """ pass except StopFieldError: pass @@ -2871,7 +2878,8 @@ def _parse(self, stream, context, path): predicate = self.predicate discard = self.discard if not callable(predicate): - predicate = lambda _1,_2,_3: predicate + def predicate(_1, _2, _3): + return predicate obj = ListContainer() for i in itertools.count(): context._index = i @@ -2885,7 +2893,8 @@ def _build(self, obj, stream, context, path): predicate = self.predicate discard = self.discard if not callable(predicate): - predicate = lambda _1,_2,_3: predicate + def predicate(_1, _2, _3): + return predicate partiallist = ListContainer() retlist = ListContainer() for i,e in enumerate(obj): @@ -3316,14 +3325,14 @@ def _sizeof(self, context, path): return 0 def _emitparse(self, code): - code.append(f""" + code.append(""" def parse_check(condition): if not condition: raise CheckError """) return f"parse_check({repr(self.func)})" def _emitbuild(self, code): - code.append(f""" + code.append(""" def build_check(condition): if not condition: raise CheckError """) @@ -3502,11 +3511,11 @@ def {fname}(obj, io, this): for sc in self.subcons: block += f""" {f'obj = {"finalobj" if sc.name == self.parsebuildfrom else "None"}'} - {f'buildret = '}{sc._compilebuild(code)} + {'buildret = '}{sc._compilebuild(code)} {f'this[{repr(sc.name)}] = buildret' if sc.name else ''} {f'{"finalret = buildret" if sc.name == self.parsebuildfrom else ""}'} """ - block += f""" + block += """ pass except StopFieldError: pass @@ -3547,14 +3556,14 @@ def _build(self, obj, stream, context, path): return obj def _emitparse(self, code): - fname = "factory_%s" % code.allocateId() + "factory_%s" % code.allocateId() code.append(""" import pickle """) return "pickle.load(io)" def _emitbuild(self, code): - fname = "factory_%s" % code.allocateId() + "factory_%s" % code.allocateId() code.append(""" import pickle """) @@ -3592,14 +3601,14 @@ def _build(self, obj, stream, context, path): return obj def _emitparse(self, code): - fname = "factory_%s" % code.allocateId() + "factory_%s" % code.allocateId() code.append(""" import numpy """) return "numpy.load(io)" def _emitbuild(self, code): - fname = "factory_%s" % code.allocateId() + "factory_%s" % code.allocateId() code.append(""" import numpy """) @@ -4049,7 +4058,7 @@ def {fname}(obj, io, this): {f'buildret = this[{repr(sc.name)}] = ' if sc.name else ''}{sc._compilebuild(code)} {f'return Container({{ {repr(sc.name)}:buildret }})'} """ - block += f""" + block += """ raise UnionError('cannot build, none of subcons were found in the dictionary') """ code.append(block) @@ -4122,7 +4131,7 @@ def {fname}(io, this): for sc in self.subcons: cb = sc._compileparse(code) if cb == "None": - block += f""" + block += """ return None """ else: @@ -4363,19 +4372,23 @@ def __make_switch_statement(cases, keyfun, default, code, assignWalrus=False): return __make_switch_statement(set(self.cases.items()), f"userfunction[{aid}](Container(this))", self.default._compileparse(code), code, True) def _emitbuild(self, code): - fname = f"switch_cases_{code.allocateId()}" - code.append(f"{fname} = {{}}") - for key,sc in self.cases.items(): - code.append(f"{fname}[{repr(key)}] = lambda obj,io,this: {sc._compilebuild(code)}") - defaultfname = f"switch_defaultcase_{code.allocateId()}" - code.append(f"{defaultfname} = lambda obj,io,this: {self.default._compilebuild(code)}") + def __make_switch_statement(cases, keyfun, default, code, assignWalrus=False): + aid = code.allocateId() + if cases: + newCond, newAction = cases.pop() + if assignWalrus: # use walrus operator to avoid multiple evaluation of check. + nameOfkFun = f"switch_lookup_value_{aid}" + return f"{newAction._emitbuild(code)} if (({nameOfkFun} := ({keyfun})) == ({repr(newCond)})) else ({__make_switch_statement(cases, nameOfkFun, default, code, False)})" + return f"{newAction._emitbuild(code)} if (({keyfun}) == ({repr(newCond)})) else ({__make_switch_statement(cases, keyfun, default, code, False)})" + else: + return f"{default}" + if isinstance(self.keyfunc, ExprMixin) or(not callable(self.keyfunc)): - return f"{fname}.get({repr(self.keyfunc)}, {defaultfname})(obj, io, this)" + return __make_switch_statement(set(self.cases.items()), repr(self.keyfunc), self.default._compilebuild(code), code, True) else: aid = code.allocateId() code.userfunction[aid] = self.keyfunc - return f"{fname}.get(userfunction[{aid}](Container(this)), {defaultfname})(obj, io, this)" - + return __make_switch_statement(set(self.cases.items()), f"userfunction[{aid}](Container(this))", self.default._compilebuild(code), code, True) class StopIf(Construct): r""" @@ -4421,7 +4434,7 @@ def _emitparse(self, code): return f"if({repr(self.condfunc)}): return Container(__current_result__)" def _emitbuild(self, code): - code.append(f""" + code.append(""" def build_stopif(condition): if condition: raise StopFieldError @@ -4733,7 +4746,7 @@ def _sizeof(self, context, path): return 0 def _emitparse(self, code): - code.append(f""" + code.append(""" def parse_pointer(io, offset, func): fallback = io.tell() io.seek(offset, 2 if offset < 0 else 0) @@ -4744,7 +4757,7 @@ def parse_pointer(io, offset, func): return f"parse_pointer(io, {self.offset}, lambda: {self.subcon._compileparse(code)})" def _emitbuild(self, code): - code.append(f""" + code.append(""" def build_pointer(obj, io, offset, func): fallback = io.tell() io.seek(offset, 2 if offset < 0 else 0) @@ -5621,7 +5634,7 @@ def _parse(self, stream, context, path): def _build(self, obj, stream, context, path): stream2 = RestreamedBytesIO(stream, self.decoder, self.decoderunit, self.encoder, self.encoderunit) - buildret = self.subcon._build(obj, stream2, context, path) + self.subcon._build(obj, stream2, context, path) stream2.close() return obj @@ -6039,7 +6052,6 @@ class EncryptedSym(Tunnel): """ def __init__(self, subcon, cipher): - import cryptography super().__init__(subcon) self.cipher = cipher @@ -6049,7 +6061,7 @@ def _evaluate_cipher(self, context, path): if not isinstance(cipher, Cipher): raise CipherError(f"cipher {repr(cipher)} is not a cryptography.hazmat.primitives.ciphers.Cipher object", path=path) if isinstance(cipher.mode, modes.GCM): - raise CipherError(f"AEAD cipher is not supported in this class, use EncryptedSymAead", path=path) + raise CipherError("AEAD cipher is not supported in this class, use EncryptedSymAead", path=path) return cipher def _decode(self, data, context, path): diff --git a/tests/test_compiler.py b/tests/test_compiler.py index 083f3015..1eed7975 100644 --- a/tests/test_compiler.py +++ b/tests/test_compiler.py @@ -174,7 +174,7 @@ # "repeatuntil2" / RepeatUntil(list_ == [0], Byte), # "repeatuntil3" / RepeatUntil(obj_ == 0, Byte), ) -exampledata = bytes(1000) +exampledata = bytes(10000) def test_compiled_example_benchmark(): From 03fd7cea2a9ace814537b5b2598be384048285b6 Mon Sep 17 00:00:00 2001 From: "LIGHT\\F.Haas2" Date: Fri, 12 Apr 2024 22:33:21 +0200 Subject: [PATCH 19/32] - optional parsing dedicated function to avoid exception handling --- construct/core.py | 59 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/construct/core.py b/construct/core.py index e508df78..f77d056d 100644 --- a/construct/core.py +++ b/construct/core.py @@ -1226,6 +1226,25 @@ def _emitprimitivetype(self, ksy, bitwise): if format in "fd": assert not bitwise return "f%s%s" % (self.length, "le" if swapped else "be", ) + + def _emitparse_optional(self, block, code, name_of_parsed_item): + structname = f"formatfield_{code.allocateId()}" + code.append(f"{structname} = struct.Struct({repr(self.fmtstr)})") + if name_of_parsed_item: + assignment = f"({name_of_parsed_item},) = {structname}.unpack(readBuf)" + else: + assignment = "" + block += f""" + fallback = io.tell() + readBuf = io.read({self.length}) + readBufLen = len(readBuf) + if readBufLen == {self.length}: + {assignment} + elif readBufLen == 0: + return Container(__current_result__) #we are at the end of the stream.... + else: + io.seek(fallback)""" + return block class BytesInteger(Construct): @@ -1836,6 +1855,29 @@ def PascalString(lengthfield, encoding): def _emitparse(code): return f"io.read({lengthfield._compileparse(code)}).decode({repr(encoding)})" + + def _emitparse_optional(block, code, name_of_parsed_item): + if name_of_parsed_item: + assignment = f"{name_of_parsed_item} = readBuf.decode({repr(encoding)})" + else: + assignment = "pass" + block = lengthfield._emitparse_optional(block, code, "_lenOfPascalString") + block += f""" + fallback = io.tell() + readBuf = io.read(_lenOfPascalString) + readBufLen = len(readBuf) + if readBufLen == _lenOfPascalString: + try: + {assignment} + except: + io.seek(fallback) + elif readBufLen == 0: + return Container(__current_result__) #we are at the end of the stream.... + else: + io.seek(fallback)""" + return block + + def _emitbuild(code): fname = f"build_struct_{code.allocateId()}" block = f""" @@ -1854,6 +1896,7 @@ def {fname}(obj, io, this): macro._emitparse = _emitparse macro._emitbuild = _emitbuild + macro._emitparse_optional = _emitparse_optional def _emitseq(ksy, bitwise): return [ @@ -2290,7 +2333,7 @@ def materializeCollectedFixedSizeElements(currentStretchOfFixedLen, block, code, if currentStretchOfFixedLen.names: #There is at least one item to be parsed using a struct structname = f"formatfield_{code.allocateId()}" code.append(f"{structname} = struct.Struct({repr(currentStretchOfFixedLen.fmtstring)}) # {currentStretchOfFixedLen.length}\n") - _intermediate = f"""{", ".join(f"{Name2LocalVar[item]}" for item in currentStretchOfFixedLen.names)} = ({structname}.unpack(io.read({currentStretchOfFixedLen.length})))""" + _intermediate = f"""({", ".join(f"{Name2LocalVar[item]}" for item in currentStretchOfFixedLen.names)}, ) = ({structname}.unpack(io.read({currentStretchOfFixedLen.length})))""" return block + f""" {_intermediate} {currentStretchOfFixedLen.convertercmd} @@ -2302,7 +2345,7 @@ class _stretchOfFixedLen: length: int fmtstring: str convertercmd: str - names: list[str] + names: list class Struct(Construct): @@ -2435,7 +2478,7 @@ def _emitparse(self, code): currentStretchOfFixedLen.fmtstring += f"{sc._length}s" currentStretchOfFixedLen.length += sc._length currentStretchOfFixedLen.names.append(sc.name) - elif hasattr(sc, "fmtstr") and hasattr(sc, "_length"): #its a fixed length fmtstr entry + elif isinstance(sc, FormatField): #its a fixed length fmtstr entry noByteOrderForSingleByteItems = {"B":"B", "b":"b", "x":"x", @@ -2458,15 +2501,17 @@ def _emitparse(self, code): currentStretchOfFixedLen = _stretchOfFixedLen(length=0, fmtstring=fieldFormatStr, convertercmd="", names=[]) currentStretchOfFixedLen.length += sc.length currentStretchOfFixedLen.names.append(sc.name) - else: # a variable length item + else: # a variable length item, or optional item block = materializeCollectedFixedSizeElements(currentStretchOfFixedLen, block, code, Name2LocalVar) currentResult = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if localVar in block)+ "}" if __is_type__(sc, Optional): - scOpt = __get_type__(sc, Optional) - block += f""" + try: + block = sc.subcons[0]._emitparse_optional(block, code, Name2LocalVar[sc.name]) + except AttributeError as e: + block += f""" try: fallback = io.tell() - {f'{Name2LocalVar[sc.name]} = ' if sc.name else ''}{scOpt.subcons[0]._compileparse(code)} + {f'{Name2LocalVar[sc.name]} = ' if sc.name else ''}{sc.subcons[0]._compileparse(code)} except ExplicitError: raise except Exception: From f4a786716369985150519dbbbaa752cecead97e2 Mon Sep 17 00:00:00 2001 From: "LIGHT\\F.Haas2" Date: Tue, 16 Apr 2024 20:26:56 +0200 Subject: [PATCH 20/32] - support pytest 8 --- .github/workflows/main.yml | 2 +- pytest.ini | 2 ++ tests/__init__.py | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 pytest.ini diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8857958f..b69943f9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -23,7 +23,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install "pytest<8.0.0" pytest-benchmark numpy arrow ruamel.yaml cloudpickle lz4 + pip install pytest pytest-benchmark numpy arrow ruamel.yaml cloudpickle lz4 - name: Install cryptography (but not for pypy on windows) if: ${{ !((matrix.os == 'windows-latest') && (matrix.python-version == 'pypy3.9')) }} run: | diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..03f586d4 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +pythonpath = . \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py index e69de29b..2ae28399 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1 @@ +pass From 2176d8f6119657b2c5448d1fa36e981b5ab93d70 Mon Sep 17 00:00:00 2001 From: "LIGHT\\F.Haas2" Date: Tue, 16 Apr 2024 22:22:20 +0200 Subject: [PATCH 21/32] - initial bugy attempt to order computed elements --- construct/core.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/construct/core.py b/construct/core.py index f77d056d..ac263890 100644 --- a/construct/core.py +++ b/construct/core.py @@ -2413,6 +2413,15 @@ def __init__(self, *subcons, **subconskw): self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) self.flagbuildnone = all(sc.flagbuildnone for sc in self.subcons) + subcons = [] + for sc in self.subcons: + if __is_type__(sc, Computed) and "this" not in repr(sc.func): + subcons = [sc] + subcons + else: + subcons.append(sc) + + self.subcons = subcons + def __getattr__(self, name): if name in self._subcons: return self._subcons[name] @@ -2472,6 +2481,7 @@ def _emitparse(self, code): localVars2NameDict = {key: sc.name for key, sc in localVars2NameDict.items()} Name2LocalVar = {name: localVar for localVar, name in localVars2NameDict.items()} currentStretchOfFixedLen = _stretchOfFixedLen(length=0, fmtstring="", convertercmd="", names=[]) + for sc in self.subcons: if __is_type__(sc, StringEncoded) and hasattr(sc, "_encoding") and hasattr(sc, "_length"): #its a padded string StringEncoded currentStretchOfFixedLen.convertercmd += f"{Name2LocalVar[sc.name]} = {Name2LocalVar[sc.name]}.decode('{sc._encoding}').replace('\\x00', '');" From 71d334f1a2c724bf0f5c3c703438377682fe37e2 Mon Sep 17 00:00:00 2001 From: "LIGHT\\F.Haas2" Date: Tue, 16 Apr 2024 23:15:57 +0200 Subject: [PATCH 22/32] - sort computed elements --- construct/core.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/construct/core.py b/construct/core.py index ac263890..a401403c 100644 --- a/construct/core.py +++ b/construct/core.py @@ -2413,14 +2413,15 @@ def __init__(self, *subcons, **subconskw): self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) self.flagbuildnone = all(sc.flagbuildnone for sc in self.subcons) - subcons = [] - for sc in self.subcons: - if __is_type__(sc, Computed) and "this" not in repr(sc.func): - subcons = [sc] + subcons + computedWithInput = [item for item in self.subcons if __is_type__(item, Computed)] + self.subcons = [item for item in self.subcons if not __is_type__(item, Computed)] + for cIn in computedWithInput: + for idx in range(len(self.subcons)-1, -1, -1): + if f"this['{self.subcons[idx].name}']" in repr(cIn.func): + self.subcons.insert(idx+1, cIn) + break else: - subcons.append(sc) - - self.subcons = subcons + self.subcons.insert(0, cIn) def __getattr__(self, name): if name in self._subcons: From fe2ceb87c427120ffad0c8b296837963a09944b0 Mon Sep 17 00:00:00 2001 From: "LIGHT\\F.Haas2" Date: Wed, 17 Apr 2024 08:03:12 +0200 Subject: [PATCH 23/32] - reordering of Computed elements only works with expressions --- construct/core.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/construct/core.py b/construct/core.py index a401403c..0e5e9601 100644 --- a/construct/core.py +++ b/construct/core.py @@ -2413,15 +2413,18 @@ def __init__(self, *subcons, **subconskw): self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) self.flagbuildnone = all(sc.flagbuildnone for sc in self.subcons) - computedWithInput = [item for item in self.subcons if __is_type__(item, Computed)] - self.subcons = [item for item in self.subcons if not __is_type__(item, Computed)] - for cIn in computedWithInput: - for idx in range(len(self.subcons)-1, -1, -1): - if f"this['{self.subcons[idx].name}']" in repr(cIn.func): - self.subcons.insert(idx+1, cIn) - break - else: - self.subcons.insert(0, cIn) + try: + computedWithInput = [item for item in self.subcons if __is_type__(item, Computed)] + self.subcons = [item for item in self.subcons if not __is_type__(item, Computed)] + for cIn in computedWithInput: + for idx in range(len(self.subcons)-1, -1, -1): + if f"this['{self.subcons[idx].name}']" in repr(cIn.func): + self.subcons.insert(idx+1, cIn) + break + else: + self.subcons.insert(0, cIn) + except: + pass def __getattr__(self, name): if name in self._subcons: From b7e52ec0073c9d3a6fd89eaf7e60a1050374fd78 Mon Sep 17 00:00:00 2001 From: "LIGHT\\F.Haas2" Date: Thu, 18 Apr 2024 18:03:45 +0200 Subject: [PATCH 24/32] - fix ordering of Computed items --- construct/core.py | 47 ++++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/construct/core.py b/construct/core.py index 0e5e9601..f0449e44 100644 --- a/construct/core.py +++ b/construct/core.py @@ -2317,7 +2317,7 @@ def __get_type__(sc, type, maxDepth=-1): return None -def reduceDependancyDepth(block, code): +def __reduceDependancyDepth__(block, code): argnames = passnames = "" found = (item[1] for item in re.compile(r"(this(\['.*?'\])*)").findall(block)) for item in found: @@ -2329,7 +2329,7 @@ def reduceDependancyDepth(block, code): return block, (argnames, passnames) -def materializeCollectedFixedSizeElements(currentStretchOfFixedLen, block, code, Name2LocalVar): +def __materializeCollectedFixedSizeElements__(currentStretchOfFixedLen, block, code, Name2LocalVar): if currentStretchOfFixedLen.names: #There is at least one item to be parsed using a struct structname = f"formatfield_{code.allocateId()}" code.append(f"{structname} = struct.Struct({repr(currentStretchOfFixedLen.fmtstring)}) # {currentStretchOfFixedLen.length}\n") @@ -2340,6 +2340,17 @@ def materializeCollectedFixedSizeElements(currentStretchOfFixedLen, block, code, """ return block + +def __orderComputedParts(computed_in, placeComputed): + for cIn in computed_in: + for idx in range(len(placeComputed)-1, -1, -1): + if f"this['{placeComputed[idx].name}']" in repr(__get_type__(cIn, Computed).func): + placeComputed.insert(idx+1, cIn) + break + else: + placeComputed.insert(0, cIn) + return placeComputed + @dataclass class _stretchOfFixedLen: length: int @@ -2410,21 +2421,19 @@ class Struct(Construct): def __init__(self, *subcons, **subconskw): super().__init__() self.subcons = list(subcons) + list(k/v for k,v in subconskw.items()) - self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) - self.flagbuildnone = all(sc.flagbuildnone for sc in self.subcons) - + try: - computedWithInput = [item for item in self.subcons if __is_type__(item, Computed)] - self.subcons = [item for item in self.subcons if not __is_type__(item, Computed)] - for cIn in computedWithInput: - for idx in range(len(self.subcons)-1, -1, -1): - if f"this['{self.subcons[idx].name}']" in repr(cIn.func): - self.subcons.insert(idx+1, cIn) - break - else: - self.subcons.insert(0, cIn) - except: + computed1 = [item for item in self.subcons if __is_type__(item, Computed)] + for _ in range(2): + # The first run orders all items in the order, but the correct start point + # might be in the middle, the second rum moves it to the beginning... + computed = computed1 = __orderComputedParts(computed1, []) + subcons = [item for item in self.subcons if not __is_type__(item, Computed)] + self.subcons = __orderComputedParts(computed, subcons) + except Exception as e: pass + self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) + self.flagbuildnone = all(sc.flagbuildnone for sc in self.subcons) def __getattr__(self, name): if name in self._subcons: @@ -2511,12 +2520,12 @@ def _emitparse(self, code): currentStretchOfFixedLen.fmtstring = f"{currentStretchOfFixedLen.fmtstring}{fieldFormatStr}" else: # change of byte order mid parsing... - block = materializeCollectedFixedSizeElements(currentStretchOfFixedLen, block, code, Name2LocalVar) + block = __materializeCollectedFixedSizeElements__(currentStretchOfFixedLen, block, code, Name2LocalVar) currentStretchOfFixedLen = _stretchOfFixedLen(length=0, fmtstring=fieldFormatStr, convertercmd="", names=[]) currentStretchOfFixedLen.length += sc.length currentStretchOfFixedLen.names.append(sc.name) else: # a variable length item, or optional item - block = materializeCollectedFixedSizeElements(currentStretchOfFixedLen, block, code, Name2LocalVar) + block = __materializeCollectedFixedSizeElements__(currentStretchOfFixedLen, block, code, Name2LocalVar) currentResult = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if localVar in block)+ "}" if __is_type__(sc, Optional): try: @@ -2540,13 +2549,13 @@ def _emitparse(self, code): {f'{Name2LocalVar[sc.name]} = ' if sc.name else ''}{sc._compileparse(code)}""" block = block.replace("__current_result__", currentResult) currentStretchOfFixedLen = _stretchOfFixedLen(length=0, fmtstring="", convertercmd="", names=[]) - block = materializeCollectedFixedSizeElements(currentStretchOfFixedLen, block, code, Name2LocalVar) + block = __materializeCollectedFixedSizeElements__(currentStretchOfFixedLen, block, code, Name2LocalVar) currentResult = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if (localVar in block) and name)+ "}" block += f""" return Container({currentResult})""" for name, value in Name2LocalVar.items(): block = block.replace(f"this['{name}']", value) - block, (argnames, passnames) = reduceDependancyDepth(block, code) + block, (argnames, passnames) = __reduceDependancyDepth__(block, code) if ("this" not in block): code.append(f"""def {fname}(io{passnames}):""" + block) return f"{fname}(io{argnames})" From 66a9f2b4a1a471997e1f71c7bddd7d67789b6008 Mon Sep 17 00:00:00 2001 From: "LIGHT\\F.Haas2" Date: Fri, 19 Apr 2024 21:11:56 +0200 Subject: [PATCH 25/32] - less function calls --- construct/core.py | 66 +++++++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/construct/core.py b/construct/core.py index f0449e44..1e45ab09 100644 --- a/construct/core.py +++ b/construct/core.py @@ -1199,13 +1199,14 @@ def _sizeof(self, context, path): return self.length def _emitparse(self, code): - if self.fmtstr not in {"B", "b", "B"}: + if self.fmtstr in {"B", "B"}: + return f"(io.read({self.length}))[0]" + elif self.fmtstr in {"b", "b"}: + return f"[_temp := (io.read({self.length}))[0], _temp-(_temp&0x80)][0]" + else: fname = f"formatfield_{code.allocateId()}" code.append(f"{fname}_unpack = struct.Struct({repr(self.fmtstr)}).unpack") return f"{fname}_unpack(io.read({self.length}))[0]" - else: - return f"(io.read({self.length}))[0]" - def _emitbuild(self, code): fname = f"formatfield_{code.allocateId()}" @@ -1228,14 +1229,18 @@ def _emitprimitivetype(self, ksy, bitwise): return "f%s%s" % (self.length, "le" if swapped else "be", ) def _emitparse_optional(self, block, code, name_of_parsed_item): - structname = f"formatfield_{code.allocateId()}" - code.append(f"{structname} = struct.Struct({repr(self.fmtstr)})") if name_of_parsed_item: - assignment = f"({name_of_parsed_item},) = {structname}.unpack(readBuf)" + if all(item in {">", "<", "B"} for item in self.fmtstr): + assignment = f"({name_of_parsed_item},) = readBuf" + elif all(item in {">", "<", "b"} for item in self.fmtstr): + assignment = f"({name_of_parsed_item},) = readBuf; name_of_parsed_item = name_of_parsed_item - (name_of_parsed_item&0x80)" + else: + structname = f"formatfield_{code.allocateId()}" + code.append(f"{structname} = struct.Struct({repr(self.fmtstr)})") + assignment = f"({name_of_parsed_item},) = {structname}.unpack(readBuf)" else: assignment = "" block += f""" - fallback = io.tell() readBuf = io.read({self.length}) readBufLen = len(readBuf) if readBufLen == {self.length}: @@ -1243,7 +1248,7 @@ def _emitparse_optional(self, block, code, name_of_parsed_item): elif readBufLen == 0: return Container(__current_result__) #we are at the end of the stream.... else: - io.seek(fallback)""" + io.seek(io.tell()-readBufLen)""" return block @@ -1861,20 +1866,29 @@ def _emitparse_optional(block, code, name_of_parsed_item): assignment = f"{name_of_parsed_item} = readBuf.decode({repr(encoding)})" else: assignment = "pass" - block = lengthfield._emitparse_optional(block, code, "_lenOfPascalString") + if lengthfield.fmtstr in {"B", ">B", "", "<", "B"} for item in (currentStretchOfFixedLen.fmtstring)): + _intermediate = f"""({", ".join(f"{Name2LocalVar[item]}" for item in currentStretchOfFixedLen.names)}, ) = io.read({currentStretchOfFixedLen.length})""" + return block + f""" {_intermediate} {currentStretchOfFixedLen.convertercmd} - """ +""" + else: + structname = f"formatfield_{code.allocateId()}" + code.append(f"{structname} = struct.Struct({repr(currentStretchOfFixedLen.fmtstring)}) # {currentStretchOfFixedLen.length}\n") + _intermediate = f"""({", ".join(f"{Name2LocalVar[item]}" for item in currentStretchOfFixedLen.names)}, ) = ({structname}.unpack(io.read({currentStretchOfFixedLen.length})))""" + return block + f""" + {_intermediate} + {currentStretchOfFixedLen.convertercmd} +""" return block @@ -2496,12 +2518,12 @@ def _emitparse(self, code): currentStretchOfFixedLen = _stretchOfFixedLen(length=0, fmtstring="", convertercmd="", names=[]) for sc in self.subcons: - if __is_type__(sc, StringEncoded) and hasattr(sc, "_encoding") and hasattr(sc, "_length"): #its a padded string StringEncoded + if __is_type__(sc, StringEncoded, 2) and hasattr(sc, "_encoding") and hasattr(sc, "_length"): #its a padded string StringEncoded currentStretchOfFixedLen.convertercmd += f"{Name2LocalVar[sc.name]} = {Name2LocalVar[sc.name]}.decode('{sc._encoding}').replace('\\x00', '');" currentStretchOfFixedLen.fmtstring += f"{sc._length}s" currentStretchOfFixedLen.length += sc._length currentStretchOfFixedLen.names.append(sc.name) - elif isinstance(sc, FormatField): #its a fixed length fmtstr entry + elif __is_type__(sc, FormatField, 3): #its a fixed length fmtstr entry noByteOrderForSingleByteItems = {"B":"B", "b":"b", "x":"x", From fa2de6ca4b4dd164135ace017c1986ed2dd3203e Mon Sep 17 00:00:00 2001 From: "LIGHT\\F.Haas2" Date: Fri, 19 Apr 2024 22:52:15 +0200 Subject: [PATCH 26/32] Revert "- less function calls" This reverts commit 66a9f2b4a1a471997e1f71c7bddd7d67789b6008. --- construct/core.py | 66 ++++++++++++++++------------------------------- 1 file changed, 22 insertions(+), 44 deletions(-) diff --git a/construct/core.py b/construct/core.py index 1e45ab09..f0449e44 100644 --- a/construct/core.py +++ b/construct/core.py @@ -1199,14 +1199,13 @@ def _sizeof(self, context, path): return self.length def _emitparse(self, code): - if self.fmtstr in {"B", "B"}: - return f"(io.read({self.length}))[0]" - elif self.fmtstr in {"b", "b"}: - return f"[_temp := (io.read({self.length}))[0], _temp-(_temp&0x80)][0]" - else: + if self.fmtstr not in {"B", "b", "B"}: fname = f"formatfield_{code.allocateId()}" code.append(f"{fname}_unpack = struct.Struct({repr(self.fmtstr)}).unpack") return f"{fname}_unpack(io.read({self.length}))[0]" + else: + return f"(io.read({self.length}))[0]" + def _emitbuild(self, code): fname = f"formatfield_{code.allocateId()}" @@ -1229,18 +1228,14 @@ def _emitprimitivetype(self, ksy, bitwise): return "f%s%s" % (self.length, "le" if swapped else "be", ) def _emitparse_optional(self, block, code, name_of_parsed_item): + structname = f"formatfield_{code.allocateId()}" + code.append(f"{structname} = struct.Struct({repr(self.fmtstr)})") if name_of_parsed_item: - if all(item in {">", "<", "B"} for item in self.fmtstr): - assignment = f"({name_of_parsed_item},) = readBuf" - elif all(item in {">", "<", "b"} for item in self.fmtstr): - assignment = f"({name_of_parsed_item},) = readBuf; name_of_parsed_item = name_of_parsed_item - (name_of_parsed_item&0x80)" - else: - structname = f"formatfield_{code.allocateId()}" - code.append(f"{structname} = struct.Struct({repr(self.fmtstr)})") - assignment = f"({name_of_parsed_item},) = {structname}.unpack(readBuf)" + assignment = f"({name_of_parsed_item},) = {structname}.unpack(readBuf)" else: assignment = "" block += f""" + fallback = io.tell() readBuf = io.read({self.length}) readBufLen = len(readBuf) if readBufLen == {self.length}: @@ -1248,7 +1243,7 @@ def _emitparse_optional(self, block, code, name_of_parsed_item): elif readBufLen == 0: return Container(__current_result__) #we are at the end of the stream.... else: - io.seek(io.tell()-readBufLen)""" + io.seek(fallback)""" return block @@ -1866,29 +1861,20 @@ def _emitparse_optional(block, code, name_of_parsed_item): assignment = f"{name_of_parsed_item} = readBuf.decode({repr(encoding)})" else: assignment = "pass" - if lengthfield.fmtstr in {"B", ">B", "", "<", "B"} for item in (currentStretchOfFixedLen.fmtstring)): - _intermediate = f"""({", ".join(f"{Name2LocalVar[item]}" for item in currentStretchOfFixedLen.names)}, ) = io.read({currentStretchOfFixedLen.length})""" - return block + f""" + structname = f"formatfield_{code.allocateId()}" + code.append(f"{structname} = struct.Struct({repr(currentStretchOfFixedLen.fmtstring)}) # {currentStretchOfFixedLen.length}\n") + _intermediate = f"""({", ".join(f"{Name2LocalVar[item]}" for item in currentStretchOfFixedLen.names)}, ) = ({structname}.unpack(io.read({currentStretchOfFixedLen.length})))""" + return block + f""" {_intermediate} {currentStretchOfFixedLen.convertercmd} -""" - else: - structname = f"formatfield_{code.allocateId()}" - code.append(f"{structname} = struct.Struct({repr(currentStretchOfFixedLen.fmtstring)}) # {currentStretchOfFixedLen.length}\n") - _intermediate = f"""({", ".join(f"{Name2LocalVar[item]}" for item in currentStretchOfFixedLen.names)}, ) = ({structname}.unpack(io.read({currentStretchOfFixedLen.length})))""" - return block + f""" - {_intermediate} - {currentStretchOfFixedLen.convertercmd} -""" + """ return block @@ -2518,12 +2496,12 @@ def _emitparse(self, code): currentStretchOfFixedLen = _stretchOfFixedLen(length=0, fmtstring="", convertercmd="", names=[]) for sc in self.subcons: - if __is_type__(sc, StringEncoded, 2) and hasattr(sc, "_encoding") and hasattr(sc, "_length"): #its a padded string StringEncoded + if __is_type__(sc, StringEncoded) and hasattr(sc, "_encoding") and hasattr(sc, "_length"): #its a padded string StringEncoded currentStretchOfFixedLen.convertercmd += f"{Name2LocalVar[sc.name]} = {Name2LocalVar[sc.name]}.decode('{sc._encoding}').replace('\\x00', '');" currentStretchOfFixedLen.fmtstring += f"{sc._length}s" currentStretchOfFixedLen.length += sc._length currentStretchOfFixedLen.names.append(sc.name) - elif __is_type__(sc, FormatField, 3): #its a fixed length fmtstr entry + elif isinstance(sc, FormatField): #its a fixed length fmtstr entry noByteOrderForSingleByteItems = {"B":"B", "b":"b", "x":"x", From 1f6089ac5dc20c03b1c1258ec6d885c267036d93 Mon Sep 17 00:00:00 2001 From: "LIGHT\\F.Haas2" Date: Mon, 22 Apr 2024 17:39:04 +0200 Subject: [PATCH 27/32] - less function calls --- construct/core.py | 57 +++++++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/construct/core.py b/construct/core.py index f0449e44..5307f7eb 100644 --- a/construct/core.py +++ b/construct/core.py @@ -1199,12 +1199,15 @@ def _sizeof(self, context, path): return self.length def _emitparse(self, code): - if self.fmtstr not in {"B", "b", "B"}: + if self.fmtstr in {"B", "B"}: + return f"(io.read(1))[0]" + elif self.fmtstr in {"b", "b"}: + return f"[_temp := io.read(1)[0], (_temp&0x7f)-(_temp&0x80)][1]" + else: fname = f"formatfield_{code.allocateId()}" code.append(f"{fname}_unpack = struct.Struct({repr(self.fmtstr)}).unpack") return f"{fname}_unpack(io.read({self.length}))[0]" - else: - return f"(io.read({self.length}))[0]" + def _emitbuild(self, code): @@ -1228,14 +1231,18 @@ def _emitprimitivetype(self, ksy, bitwise): return "f%s%s" % (self.length, "le" if swapped else "be", ) def _emitparse_optional(self, block, code, name_of_parsed_item): - structname = f"formatfield_{code.allocateId()}" - code.append(f"{structname} = struct.Struct({repr(self.fmtstr)})") if name_of_parsed_item: - assignment = f"({name_of_parsed_item},) = {structname}.unpack(readBuf)" + if self.fmtstr in {"B", "B"}: + assignment = f"({name_of_parsed_item},) = readBuf" + if self.fmtstr in {"b", "b"}: + assignment = f"{name_of_parsed_item} = [_tmp = readBuf[0], (_temp&0x7F)-(0x80&_temp)][1]" + else: + structname = f"formatfield_{code.allocateId()}" + code.append(f"{structname} = struct.Struct({repr(self.fmtstr)})") + assignment = f"({name_of_parsed_item},) = {structname}.unpack(readBuf)" else: assignment = "" block += f""" - fallback = io.tell() readBuf = io.read({self.length}) readBufLen = len(readBuf) if readBufLen == {self.length}: @@ -1243,7 +1250,7 @@ def _emitparse_optional(self, block, code, name_of_parsed_item): elif readBufLen == 0: return Container(__current_result__) #we are at the end of the stream.... else: - io.seek(fallback)""" + io.seek(io.tell()-readBufLen)""" return block @@ -1863,18 +1870,18 @@ def _emitparse_optional(block, code, name_of_parsed_item): assignment = "pass" block = lengthfield._emitparse_optional(block, code, "_lenOfPascalString") block += f""" - fallback = io.tell() + readBuf = io.read(_lenOfPascalString) readBufLen = len(readBuf) if readBufLen == _lenOfPascalString: try: {assignment} except: - io.seek(fallback) + io.seek(io.tell()-readBufLen-{lengthfield.length}) elif readBufLen == 0: return Container(__current_result__) #we are at the end of the stream.... else: - io.seek(fallback)""" + io.seek(io.tell()-readBufLen-{lengthfield.length})""" return block @@ -2297,8 +2304,9 @@ def _emitbuild(self, code): # structures and sequences #=============================================================================== -def __is_type__(sc, type): - while True: +def __is_type__(sc, type, maxDepth=-1): + while maxDepth!=0: + maxDepth-=1 if isinstance(sc, type): return True elif hasattr(sc, "subcon"): @@ -2331,10 +2339,17 @@ def __reduceDependancyDepth__(block, code): def __materializeCollectedFixedSizeElements__(currentStretchOfFixedLen, block, code, Name2LocalVar): if currentStretchOfFixedLen.names: #There is at least one item to be parsed using a struct - structname = f"formatfield_{code.allocateId()}" - code.append(f"{structname} = struct.Struct({repr(currentStretchOfFixedLen.fmtstring)}) # {currentStretchOfFixedLen.length}\n") - _intermediate = f"""({", ".join(f"{Name2LocalVar[item]}" for item in currentStretchOfFixedLen.names)}, ) = ({structname}.unpack(io.read({currentStretchOfFixedLen.length})))""" - return block + f""" + if all(item in {">", "<", "B"} for item in (currentStretchOfFixedLen.fmtstring)): + _intermediate = f"""({", ".join(f"{Name2LocalVar[item]}" for item in currentStretchOfFixedLen.names)}, ) = io.read({currentStretchOfFixedLen.length})""" + return block + f""" + {_intermediate} + {currentStretchOfFixedLen.convertercmd} + """ + else: + structname = f"formatfield_{code.allocateId()}" + code.append(f"{structname} = struct.Struct({repr(currentStretchOfFixedLen.fmtstring)}) # {currentStretchOfFixedLen.length}\n") + _intermediate = f"""({", ".join(f"{Name2LocalVar[item]}" for item in currentStretchOfFixedLen.names)}, ) = ({structname}.unpack(io.read({currentStretchOfFixedLen.length})))""" + return block + f""" {_intermediate} {currentStretchOfFixedLen.convertercmd} """ @@ -2502,11 +2517,15 @@ def _emitparse(self, code): currentStretchOfFixedLen.length += sc._length currentStretchOfFixedLen.names.append(sc.name) elif isinstance(sc, FormatField): #its a fixed length fmtstr entry + name = sc.name noByteOrderForSingleByteItems = {"B":"B", "b":"b", "x":"x", "c":"c",} - fieldFormatStr = noByteOrderForSingleByteItems[sc.fmtstr] if sc.fmtstr in noByteOrderForSingleByteItems else sc.fmtstr + if sc.fmtstr in noByteOrderForSingleByteItems: + fieldFormatStr = noByteOrderForSingleByteItems[sc.fmtstr] + else: + fieldFormatStr = sc.fmtstr if currentStretchOfFixedLen.fmtstring == "": currentStretchOfFixedLen.fmtstring = fieldFormatStr elif currentStretchOfFixedLen.fmtstring[0] in {">", "<"} and len (fieldFormatStr) >= 2 and currentStretchOfFixedLen.fmtstring[0] == fieldFormatStr[0]: @@ -2523,7 +2542,7 @@ def _emitparse(self, code): block = __materializeCollectedFixedSizeElements__(currentStretchOfFixedLen, block, code, Name2LocalVar) currentStretchOfFixedLen = _stretchOfFixedLen(length=0, fmtstring=fieldFormatStr, convertercmd="", names=[]) currentStretchOfFixedLen.length += sc.length - currentStretchOfFixedLen.names.append(sc.name) + currentStretchOfFixedLen.names.append(name) else: # a variable length item, or optional item block = __materializeCollectedFixedSizeElements__(currentStretchOfFixedLen, block, code, Name2LocalVar) currentResult = "{"+ ", ".join(f"'{name}':{localVar}" for localVar, name in localVars2NameDict.items() if localVar in block)+ "}" From e050433b31c40705932382931f5ab13cda8af087 Mon Sep 17 00:00:00 2001 From: "LIGHT\\F.Haas2" Date: Mon, 22 Apr 2024 17:46:24 +0200 Subject: [PATCH 28/32] - use reuse less... --- construct/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/construct/core.py b/construct/core.py index 5307f7eb..0dc05cba 100644 --- a/construct/core.py +++ b/construct/core.py @@ -2136,7 +2136,7 @@ def _encode(self, obj, context, path): def _emitparse(self, code): fname = f"factory_{code.allocateId()}" code.append(f"{fname} = {repr(self.decmapping)}") - return f"reuse(({self.subcon._compileparse(code)}), lambda x: {fname}.get(x, EnumInteger(x)))" + return f"[x:={self.subcon._compileparse(code)}, {fname}.get(x, EnumInteger(x))][1]" def _emitbuild(self, code): fname = f"factory_{code.allocateId()}" @@ -2241,7 +2241,7 @@ def _encode(self, obj, context, path): raise MappingError("building failed, unknown label: %r" % (obj,), path=path) def _emitparse(self, code): - return f"reuse(({self.subcon._compileparse(code)}), lambda x: Container({', '.join(f'{k}=bool(x & {v} == {v})' for k,v in self.flags.items()) }))" + return f"[x:=({self.subcon._compileparse(code)}), Container({', '.join(f'{k}=bool(x & {v} == {v})' for k,v in self.flags.items()) })][1]" def _emitseq(self, ksy, bitwise): bitstotal = self.subcon.sizeof() * 8 From f63a80fc700fdca973b72adb236935b1c5667aea Mon Sep 17 00:00:00 2001 From: "LIGHT\\F.Haas2" Date: Mon, 22 Apr 2024 19:12:40 +0200 Subject: [PATCH 29/32] more tests, fix of optional pascalstring --- construct/core.py | 21 +++++++++++---------- tests/test_core.py | 23 +++++++++++++++++++++++ 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/construct/core.py b/construct/core.py index 0dc05cba..e2a805fe 100644 --- a/construct/core.py +++ b/construct/core.py @@ -1234,14 +1234,14 @@ def _emitparse_optional(self, block, code, name_of_parsed_item): if name_of_parsed_item: if self.fmtstr in {"B", "B"}: assignment = f"({name_of_parsed_item},) = readBuf" - if self.fmtstr in {"b", "b"}: + elif self.fmtstr in {"b", "b"}: assignment = f"{name_of_parsed_item} = [_tmp = readBuf[0], (_temp&0x7F)-(0x80&_temp)][1]" else: structname = f"formatfield_{code.allocateId()}" code.append(f"{structname} = struct.Struct({repr(self.fmtstr)})") assignment = f"({name_of_parsed_item},) = {structname}.unpack(readBuf)" else: - assignment = "" + assignment = "pass" block += f""" readBuf = io.read({self.length}) readBufLen = len(readBuf) @@ -1865,19 +1865,20 @@ def _emitparse(code): def _emitparse_optional(block, code, name_of_parsed_item): if name_of_parsed_item: - assignment = f"{name_of_parsed_item} = readBuf.decode({repr(encoding)})" + assignment = f""" + try: + {name_of_parsed_item} = readBuf.decode({repr(encoding)}) + except: + io.seek(io.tell()-readBufLen-{lengthfield.length}) +""" else: assignment = "pass" block = lengthfield._emitparse_optional(block, code, "_lenOfPascalString") block += f""" - readBuf = io.read(_lenOfPascalString) readBufLen = len(readBuf) if readBufLen == _lenOfPascalString: - try: - {assignment} - except: - io.seek(io.tell()-readBufLen-{lengthfield.length}) + {assignment} elif readBufLen == 0: return Container(__current_result__) #we are at the end of the stream.... else: @@ -2516,7 +2517,7 @@ def _emitparse(self, code): currentStretchOfFixedLen.fmtstring += f"{sc._length}s" currentStretchOfFixedLen.length += sc._length currentStretchOfFixedLen.names.append(sc.name) - elif isinstance(sc, FormatField): #its a fixed length fmtstr entry + elif __is_type__(sc, FormatField, 3) and hasattr(sc, "fmtstr"): #its a fixed length fmtstr entry name = sc.name noByteOrderForSingleByteItems = {"B":"B", "b":"b", @@ -4228,7 +4229,7 @@ def {fname}(io, this): except ExplicitError: raise except Exception: - io.seek(io, fallback, 0) + io.seek(fallback) """ code.append(block) return "%s(io, this)" % (fname,) diff --git a/tests/test_core.py b/tests/test_core.py index e768530d..565c2c56 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -387,6 +387,29 @@ class F(enum.IntFlag): x = d.parse(b"\x02") assert x == F.b + +def test_optional_pascal_string(): + d = Struct("opt"/Optional(PascalString(Byte, "ascii"))) + dc = d.compile() + for blob in [b"\x01a", b""]: + assert d.parse(blob) == dc.parse(blob) + assert d.build(dc.parse(blob)) == blob + assert dc.build(d.parse(blob)) == blob + assert dc.build(dc.parse(blob)) == blob + assert d.build(d.parse(blob)) == blob + + for blob in [b"\x01", b"\x01\xff"]: + assert d.parse(blob) == Container(opt=None) + assert dc.parse(blob) == Container(opt=None) + assert dc.parse(b"\x03abc") == Container(opt="abc") + + d = Struct("opt1"/Optional(PascalString(Byte, "ascii")), + "opt2"/Optional(Int32ul)) + dc = d.compile() + for blob in [b"\x0111234", b"\x01\xff12"]: + assert d.parse(blob) == dc.parse(blob) + + def test_flagsenum(): d = FlagsEnum(Byte, one=1, two=2, four=4, eight=8) common(d, b"\x03", Container(_flagsenum=True, one=True, two=True, four=False, eight=False), 1) From d6c113005253651affcc15285479768717463a6d Mon Sep 17 00:00:00 2001 From: franz haas Date: Thu, 25 Apr 2024 20:41:03 +0200 Subject: [PATCH 30/32] - minimised single byte case --- construct/core.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/construct/core.py b/construct/core.py index e2a805fe..156b367e 100644 --- a/construct/core.py +++ b/construct/core.py @@ -1247,10 +1247,15 @@ def _emitparse_optional(self, block, code, name_of_parsed_item): readBufLen = len(readBuf) if readBufLen == {self.length}: {assignment} - elif readBufLen == 0: - return Container(__current_result__) #we are at the end of the stream.... - else: +""" + if self.length > 1: + block += f""" + elif readBufLen > 0: io.seek(io.tell()-readBufLen)""" + block += """ + else: + return Container(__current_result__) #we are at the end of the stream.... + """ return block @@ -4364,7 +4369,7 @@ def _emitparse(self, code): aid = code.allocateId() code.userfunction[aid] = self.condfunc return "((%s) if (%s) else (%s))" % (self.thensubcon._compileparse(code), f"userfunction[{aid}](Container({{**this,**__current_result__}}))", self.elsesubcon._compileparse(code), ) - + def _emitbuild(self, code): if isinstance(self.condfunc, ExprMixin) or (not callable(self.condfunc)): return f"(({self.thensubcon._compilebuild(code)}) if ({repr(self.condfunc)}) else ({self.elsesubcon._compilebuild(code)}))" From dd465f74537109137e05bf6a21add9e6b153a122 Mon Sep 17 00:00:00 2001 From: franz haas Date: Thu, 25 Apr 2024 21:29:06 +0200 Subject: [PATCH 31/32] - more optional compilers --- construct/core.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/construct/core.py b/construct/core.py index 156b367e..b4d95b1b 100644 --- a/construct/core.py +++ b/construct/core.py @@ -1235,7 +1235,7 @@ def _emitparse_optional(self, block, code, name_of_parsed_item): if self.fmtstr in {"B", "B"}: assignment = f"({name_of_parsed_item},) = readBuf" elif self.fmtstr in {"b", "b"}: - assignment = f"{name_of_parsed_item} = [_tmp = readBuf[0], (_temp&0x7F)-(0x80&_temp)][1]" + assignment = f"{name_of_parsed_item} = [_temp := readBuf[0], (_temp&0x7F)-(0x80&_temp)][1]" else: structname = f"formatfield_{code.allocateId()}" code.append(f"{structname} = struct.Struct({repr(self.fmtstr)})") @@ -4369,6 +4369,29 @@ def _emitparse(self, code): aid = code.allocateId() code.userfunction[aid] = self.condfunc return "((%s) if (%s) else (%s))" % (self.thensubcon._compileparse(code), f"userfunction[{aid}](Container({{**this,**__current_result__}}))", self.elsesubcon._compileparse(code), ) + + def _emitparse_optional(self, block, code, name_of_parsed_item): + def _indent(block): + return (f"{os.linesep} ").join(block.split(os.linesep)) + + if isinstance(self.condfunc, ExprMixin) or (not callable(self.condfunc)): + funcString = self.condfunc + else: + aid = code.allocateId() + code.userfunction[aid] = self.condfunc + funcString = f"userfunction[{aid}](Container({{**this,**__current_result__}}))" + block += f""" + if {funcString}: + {_indent(self.thensubcon._emitparse_optional("", code, name_of_parsed_item))} +""" + if self.elsesubcon != Pass: + print(self.elsesubcon) + block += f""" + else: + {_indent(self.elsesubcon._emitparse_optional("", code, name_of_parsed_item))} +""" + return block + def _emitbuild(self, code): if isinstance(self.condfunc, ExprMixin) or (not callable(self.condfunc)): @@ -5102,6 +5125,12 @@ def _emitparse(self, code): def _emitbuild(self, code): return "None" + + def _emitparse_optional(self, block, code, name_of_parsed_item): + block += f""" + {name_of_parsed_item} = None +""" + return block def _emitfulltype(self, ksy, bitwise): return dict(size=0) From 9f9da1e4aaa871f5ed632f30468278fd1c386614 Mon Sep 17 00:00:00 2001 From: franz haas Date: Fri, 26 Apr 2024 20:39:04 +0200 Subject: [PATCH 32/32] - reuse structs reducing line cnt --- construct/core.py | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/construct/core.py b/construct/core.py index b4d95b1b..ae83beb5 100644 --- a/construct/core.py +++ b/construct/core.py @@ -284,11 +284,23 @@ def __init__(self): self.linkedparsers = {} self.linkedbuilders = {} self.userfunction = {} + self._structs = {} def allocateId(self): self.nextid += 1 return self.nextid + def getCachedStruct(self, fmtstr): + fmtstr = repr(fmtstr) + try: + return self._structs[fmtstr] + except KeyError: + fname = f"formatfield_{self.allocateId()}" + self.append(f"{fname} = struct.Struct({fmtstr})") + self._structs[fmtstr] = fname + return fname + + def append(self, block): block = [s for s in block.splitlines() if s.strip()] firstline = block[0] @@ -534,7 +546,7 @@ def compile(self, filename=None, containertype="Container"): """ code = CodeGen() - code.append(""" + code.append(f""" # generated by Construct, this source is for inspection only! do not import! from construct import * @@ -551,20 +563,18 @@ def restream(data, func): def reuse(obj, func): return func(obj) - linkedinstances = {} - linkedparsers = {} - linkedbuilders = {} - userfunction = {} + linkedinstances = {{}} + linkedparsers = {{}} + linkedbuilders = {{}} + userfunction = {{}} len_ = len sum_ = sum min_ = min max_ = max abs_ = abs - """) - code.append(f""" Container = {containertype} - """) + """) code.append(f""" def parseall(io, this): return {self._compileparse(code)} @@ -1204,16 +1214,10 @@ def _emitparse(self, code): elif self.fmtstr in {"b", "b"}: return f"[_temp := io.read(1)[0], (_temp&0x7f)-(_temp&0x80)][1]" else: - fname = f"formatfield_{code.allocateId()}" - code.append(f"{fname}_unpack = struct.Struct({repr(self.fmtstr)}).unpack") - return f"{fname}_unpack(io.read({self.length}))[0]" - - + return f"{code.getCachedStruct(self.fmtstr)}.unpack(io.read({self.length}))[0]" def _emitbuild(self, code): - fname = f"formatfield_{code.allocateId()}" - code.append(f"{fname} = struct.Struct({repr(self.fmtstr)})") - return f"(io.write({fname}.pack(obj)), obj)[1]" + return f"(io.write({code.getCachedStruct(self.fmtstr)}.pack(obj)), obj)[1]" def _emitprimitivetype(self, ksy, bitwise): endianity,format = self.fmtstr @@ -1237,9 +1241,7 @@ def _emitparse_optional(self, block, code, name_of_parsed_item): elif self.fmtstr in {"b", "b"}: assignment = f"{name_of_parsed_item} = [_temp := readBuf[0], (_temp&0x7F)-(0x80&_temp)][1]" else: - structname = f"formatfield_{code.allocateId()}" - code.append(f"{structname} = struct.Struct({repr(self.fmtstr)})") - assignment = f"({name_of_parsed_item},) = {structname}.unpack(readBuf)" + assignment = f"({name_of_parsed_item},) = {code.getCachedStruct(self.fmtstr)}.unpack(readBuf)" else: assignment = "pass" block += f""" @@ -2352,11 +2354,8 @@ def __materializeCollectedFixedSizeElements__(currentStretchOfFixedLen, block, c {currentStretchOfFixedLen.convertercmd} """ else: - structname = f"formatfield_{code.allocateId()}" - code.append(f"{structname} = struct.Struct({repr(currentStretchOfFixedLen.fmtstring)}) # {currentStretchOfFixedLen.length}\n") - _intermediate = f"""({", ".join(f"{Name2LocalVar[item]}" for item in currentStretchOfFixedLen.names)}, ) = ({structname}.unpack(io.read({currentStretchOfFixedLen.length})))""" return block + f""" - {_intermediate} + ({", ".join(f"{Name2LocalVar[item]}" for item in currentStretchOfFixedLen.names)}, ) = {code.getCachedStruct(currentStretchOfFixedLen.fmtstring)}.unpack(io.read({currentStretchOfFixedLen.length})) {currentStretchOfFixedLen.convertercmd} """ return block