From 8792cb1a1c6edcef5b30e475eea555afae1d3aee Mon Sep 17 00:00:00 2001 From: Daniel Buckmaster Date: Sun, 19 Sep 2021 13:24:38 +1000 Subject: [PATCH 1/9] Implement new 'plugin' extension method --- src/Encoder.ts | 5 +++ src/ExtensionCodec.ts | 37 +++++++++++++++++++++ test/ExtensionCodecPlugin.test.ts | 54 +++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+) create mode 100644 test/ExtensionCodecPlugin.test.ts diff --git a/src/Encoder.ts b/src/Encoder.ts index afea365c..7531a186 100644 --- a/src/Encoder.ts +++ b/src/Encoder.ts @@ -185,6 +185,11 @@ export class Encoder { } private encodeObject(object: unknown, depth: number) { + const pluginRan = this.extensionCodec.tryToEncodePlugin(this, depth, object, this.context); + if (pluginRan) { + return; + } + // try to encode objects with custom codec first of non-primitives const ext = this.extensionCodec.tryToEncode(object, this.context); if (ext != null) { diff --git a/src/ExtensionCodec.ts b/src/ExtensionCodec.ts index d38705ee..517c3a7a 100644 --- a/src/ExtensionCodec.ts +++ b/src/ExtensionCodec.ts @@ -2,6 +2,7 @@ import { ExtData } from "./ExtData"; import { timestampExtension } from "./timestamp"; +import type { Encoder } from "./Encoder"; export type ExtensionDecoderType = ( data: Uint8Array, @@ -11,11 +12,14 @@ export type ExtensionDecoderType = ( export type ExtensionEncoderType = (input: unknown, context: ContextType) => Uint8Array | null; +export type ExtensionEncoderPluginType = (encoder: Encoder, depth: number, input: unknown, context: ContextType) => boolean; + // immutable interfce to ExtensionCodec export type ExtensionCodecType = { // eslint-disable-next-line @typescript-eslint/naming-convention __brand?: ContextType; tryToEncode(object: unknown, context: ContextType): ExtData | null; + tryToEncodePlugin(encoder: Encoder, depth: number, object: unknown, context: ContextType): boolean; decode(data: Uint8Array, extType: number, context: ContextType): unknown; }; @@ -34,6 +38,7 @@ export class ExtensionCodec implements ExtensionCodecTy // custom extensions private readonly encoders: Array | undefined | null> = []; private readonly decoders: Array | undefined | null> = []; + private readonly rawEncoders: Array | undefined | null> = []; public constructor() { this.register(timestampExtension); @@ -60,6 +65,24 @@ export class ExtensionCodec implements ExtensionCodecTy } } + public registerPlugin({ + type, + encode, + decode, + }: { + type: number; + encode: ExtensionEncoderPluginType; + decode: ExtensionDecoderType; + }): void { + if (type >= 0) { + // custom extensions + this.rawEncoders[type] = encode; + this.decoders[type] = decode; + } else { + throw new Error("cannot register plugin for builtin type"); + } + } + public tryToEncode(object: unknown, context: ContextType): ExtData | null { // built-in extensions for (let i = 0; i < this.builtInEncoders.length; i++) { @@ -92,6 +115,20 @@ export class ExtensionCodec implements ExtensionCodecTy return null; } + public tryToEncodePlugin(encoder: Encoder, depth: number, object: unknown, context: ContextType): boolean { + for (let i = 0; i < this.rawEncoders.length; i++) { + const encodeExt = this.rawEncoders[i]; + if (encodeExt != null) { + const accepted = encodeExt(encoder, depth, object, context); + if (accepted) { + return true; + } + } + } + + return false; + } + public decode(data: Uint8Array, type: number, context: ContextType): unknown { const decodeExt = type < 0 ? this.builtInDecoders[-1 - type] : this.decoders[type]; if (decodeExt) { diff --git a/test/ExtensionCodecPlugin.test.ts b/test/ExtensionCodecPlugin.test.ts new file mode 100644 index 00000000..a7096b94 --- /dev/null +++ b/test/ExtensionCodecPlugin.test.ts @@ -0,0 +1,54 @@ +import assert from "assert"; +import { encode, decode, Encoder, ExtensionCodec, ExtData, decodeAsync } from "../src"; +import { typedArrays } from "../example/typed-arrays-plugin"; + +describe("ExtensionCodecPlugin", () => { + context("custom extension plugin", () => { + const extensionCodec = new ExtensionCodec(); + + // Set + extensionCodec.registerPlugin({ + type: 0, + encode: (encoder: Encoder, depth: number, object: unknown): boolean => { + if (object instanceof Set) { + // This uses the plugin mechanism in a pointless way: simply encoding an extension + // the same as it would have been normally. + const extData = encode([...object]); + encoder["encodeExtension"](new ExtData(0, extData)); + return true; + } + return false; + }, + decode: (data: Uint8Array) => { + const array = decode(data) as Array; + return new Set(array); + }, + }); + + it("encodes and decodes custom data types (synchronously)", () => { + const set = new Set([1, 2, 3]); + const encoded = encode([set], { extensionCodec }); + assert.deepStrictEqual(decode(encoded, { extensionCodec }), [set]); + }); + + it("encodes and decodes custom data types (asynchronously)", async () => { + const set = new Set([1, 2, 3]); + const encoded = encode([set], { extensionCodec }); + const createStream = async function* () { + yield encoded; + }; + assert.deepStrictEqual(await decodeAsync(createStream(), { extensionCodec }), [set]); + }); + }); + + context("typed-arrays-plugin example", () => { + const extensionCodec = new ExtensionCodec(); + extensionCodec.registerPlugin(typedArrays({type: 1})); + + it("encodes and decodes a Float32Array (synchronously)", () => { + const floatArray = new Float32Array([1, 2, 3, 4, 5]); + const encoded = encode({ floatArray }, { extensionCodec }); + assert.deepStrictEqual(decode(encoded, { extensionCodec }), { floatArray }); + }); + }); +}); \ No newline at end of file From 24e6e644692e0a4eceb757fbcc91bbd730ac4252 Mon Sep 17 00:00:00 2001 From: Daniel Buckmaster Date: Sun, 19 Sep 2021 13:24:57 +1000 Subject: [PATCH 2/9] Implement TypedArray family plugin with alignment for efficiency --- example/typed-array-plugin-example.ts | 20 ++++++ example/typed-arrays-plugin.ts | 95 +++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 example/typed-array-plugin-example.ts create mode 100644 example/typed-arrays-plugin.ts diff --git a/example/typed-array-plugin-example.ts b/example/typed-array-plugin-example.ts new file mode 100644 index 00000000..b9fd76d8 --- /dev/null +++ b/example/typed-array-plugin-example.ts @@ -0,0 +1,20 @@ +// ts-node example/typed-array-plugin-example.ts + +import { encode, decode, ExtensionCodec } from "../src"; +import { typedArrays } from "../example/typed-arrays-plugin"; + +const extensionCodec = new ExtensionCodec(); +extensionCodec.registerPlugin(typedArrays({ type: 1 })); + +const int16Array = new Int16Array([-4, 1, 5]); +const float32Array = new Float32Array([1, -2, 3, 1e-9, 5]); +console.log("Object to encode:"); +console.log({ int16Array, float32Array }); + +const encoded = encode({ int16Array, float32Array }, { extensionCodec }); +console.log("\n\nRaw encoded data:"); +console.log(encoded); + +const decoded = decode(encoded, { extensionCodec }); +console.log("\n\nDecoded object:"); +console.log(decoded); \ No newline at end of file diff --git a/example/typed-arrays-plugin.ts b/example/typed-arrays-plugin.ts new file mode 100644 index 00000000..e1100ef2 --- /dev/null +++ b/example/typed-arrays-plugin.ts @@ -0,0 +1,95 @@ +import type { Encoder } from "../src/Encoder"; +import { ensureUint8Array } from "../src/utils/typedArrays"; + +export function typedArrays({type}: {type: number}) { + const TypedArray = Object.getPrototypeOf(Int8Array); + + const arrayConstructors = { + Uint8Array, + Int8Array, + Uint16Array, + Int16Array, + Uint32Array, + Int32Array, + BigUint64Array, + BigInt64Array, + Float32Array, + Float64Array, + }; + + const arrayTypeNameToNumber: Map = new Map([ + ["Uint8Array", 1], + ["Int8Array", 255-1], + ["Uint16Array", 2], + ["Int16Array", 255-2], + ["Uint32Array", 3], + ["Int32Array", 255-3], + ["BigUint64Array", 4], + ["BigInt64Array", 255-4], + ["Float32Array", 9], + ["Float64Array", 10], + ]); + + const arrayTypeNumberToName: Map = new Map( + [...arrayTypeNameToNumber.entries()] + .map(entry => entry.reverse() as [number, string]) + ); + + const arrayHeaderSize = 2; + + return { + type, + + encode(encoder: Encoder, depth: number, object: unknown, context: C) { + if (!(object instanceof TypedArray)) { + return false; + } + + const array = object as ArrayBufferView; + const alignment = (array as any).constructor.BYTES_PER_ELEMENT; + const arrayType = arrayTypeNameToNumber.get((array as any).constructor.name)!; + + // Always use ext32 to make things simpler for now + const extHeaderSize = 6; + const unalignedDataStart = encoder["pos"] + extHeaderSize + arrayHeaderSize; + const alignBytes = alignment - (unalignedDataStart % alignment); + const extDataSize = arrayHeaderSize + alignBytes + array.buffer.byteLength; + + // Ext32 header + encoder["writeU8"](0xc9); + encoder["writeU32"](extDataSize); + encoder["writeU8"](type); + + // TypedArray header + encoder["writeU8"](arrayType); // TODO: map typedarray types + encoder["writeU8"](alignBytes); + for (let i = 0; i < alignBytes; i += 1) { + encoder["writeU8"](0); + } + + const bytes = ensureUint8Array(array); + encoder["writeU8a"](bytes); + + return true; + }, + + decode(data: Uint8Array, extensionType: number, context: C) { + if (extensionType !== type) { + return null; + } + + const arrayType = data[0]!; + const alignBytes = data[1]!; + + const ctorName = arrayTypeNumberToName.get(arrayType)!; + const ctor = (arrayConstructors as any)[ctorName] as new (...args: any[]) => ArrayBufferView; + const alignment = (ctor as any).BYTES_PER_ELEMENT; + + return new ctor( + data.buffer, + data.byteOffset + arrayHeaderSize + alignBytes, + (data.length - alignBytes - 2) / alignment + ); + }, + }; +} \ No newline at end of file From c9b288503a9e5146d7be22db71c628f6d78de30e Mon Sep 17 00:00:00 2001 From: Daniel Buckmaster Date: Sun, 19 Sep 2021 13:28:23 +1000 Subject: [PATCH 3/9] Move typed arrays into a folder and add a readme --- example/typed-arrays/README.md | 88 +++++++++++++++++++ .../example.ts} | 6 +- .../plugin.ts} | 4 +- 3 files changed, 93 insertions(+), 5 deletions(-) create mode 100644 example/typed-arrays/README.md rename example/{typed-array-plugin-example.ts => typed-arrays/example.ts} (76%) rename example/{typed-arrays-plugin.ts => typed-arrays/plugin.ts} (95%) diff --git a/example/typed-arrays/README.md b/example/typed-arrays/README.md new file mode 100644 index 00000000..aa540047 --- /dev/null +++ b/example/typed-arrays/README.md @@ -0,0 +1,88 @@ +# MessagePack typed arrays + +This is an extension to MessagePack which provides "native" support for JS's TypedArray family. + +## Why? + +The official JS library can already handle TypedArrays by serialising them as binary data, but this has two disadvantages: + +1. You must know, and manually construct, the correct type of array from raw binary data after deserialising. +2. The data is unaligned, which may require copying it into a new array before using it. + +Number 2 is the main reason I was inspired to write an extension to handle these types; I didn't want to give up on the possibility of zero-copy decoding. + +## Spec + +`data` has some internal layout which looks like the following: + +``` ++--------+--------+========+========+ +| artype |AAAAAAAA| align | vals | ++--------+--------+========+========+ +``` + +Where: + +- `artype` is an identifier for the type of array that is stored +- `AAAAAAAA` is an 8-bit unsigned integer +- `align` is a number of bytes equal to the value of `AAAAAAAA`, all of which contain 0 +- `vals` is the binary content of the TypedArray + +The value of `AAAAAAAA`, and therefore the number of bytes in the `align` segment, is determined so that `cont` begins on a byte offset from the _beginning of the encoded MessagePack object_ which correctly aligns `cont` for efficient access. + +If `AAAAAAAA` is 0, then there are no `align` bytes, and `vals` begins immediately after. + +Note that the length of `data`, and therefore the value of `YYYYYYYY_YYYYYYYY` includes _all_ of `artype`, `AAAAAAAA`, `align` and `vals`. + +## Example + +A Float32Array containing 10 values will have a `data` size starting at 42 bytes if there is no alignment: + +- 1 byte of `artype` = `0x??` +- 1 byte of `AAAAAAAA` = 0 +- 0 bytes of `align` +- 40 bytes of `vals` + +A Float32Array should be aligned on 4-byte boundaries, so there may need to be up to 3 bytes of padding. +In that case, the total size of `data` woulb become so this may increase to 45 bytes: + +- 1 byte of `artype` = `0x??` +- 1 byte of `AAAAAAAA` = 3 +- 3 bytes of `align` +- 40 bytes of `vals` + +Since the extension array is wrapped with its own header, there is some additional structure before this content. + +See the [MessagePack spec for extensions](https://github.com/msgpack/msgpack/blob/master/spec.md#ext-format-family). +The content of a TypedArray object is inserted after the extension header. +For example, an extension where the size of the encoded array is up to (2^8)-1 bytes will be laid out like this: + +``` ++--------+--------+--------+========+ +| 0xc7 |XXXXXXXX| type | data | ++--------+--------+--------+========+ +``` + +Where: + +- `0xc8` is the `ext 16` header +- `XXXXXXXX` is a 8-bit unsigned integer which represents the length of `data` in bytes +- `type` is the extension type number 0-127 + +So to put the entire example of a 10-entry Float32Array together, it would be represented as: + +``` ++--------+--------+--------+--------+--------+========+========+ +| 0xc7 | 0x2D | type | 0x?? | 0x03 |3 zeros | vals | ++--------+--------+--------+--------+--------+========+========+ +``` + +Where: + +- `0xc7` is the MessagePack type for `ext 8` +- `0x2D` is 45, the length of the TypedArray payload described above +- `type` is the extension type number +- `0x??` is the `artype` number for Float32Array +- `0x03` is the number of alignment bytes +- 3 zeros are required for alignment +- `vals` contains the actual floating-point data diff --git a/example/typed-array-plugin-example.ts b/example/typed-arrays/example.ts similarity index 76% rename from example/typed-array-plugin-example.ts rename to example/typed-arrays/example.ts index b9fd76d8..18175e98 100644 --- a/example/typed-array-plugin-example.ts +++ b/example/typed-arrays/example.ts @@ -1,7 +1,7 @@ -// ts-node example/typed-array-plugin-example.ts +// ts-node example/typed-arrays/example.ts -import { encode, decode, ExtensionCodec } from "../src"; -import { typedArrays } from "../example/typed-arrays-plugin"; +import { encode, decode, ExtensionCodec } from "../../src"; +import { typedArrays } from "./plugin"; const extensionCodec = new ExtensionCodec(); extensionCodec.registerPlugin(typedArrays({ type: 1 })); diff --git a/example/typed-arrays-plugin.ts b/example/typed-arrays/plugin.ts similarity index 95% rename from example/typed-arrays-plugin.ts rename to example/typed-arrays/plugin.ts index e1100ef2..b3bc181c 100644 --- a/example/typed-arrays-plugin.ts +++ b/example/typed-arrays/plugin.ts @@ -1,5 +1,5 @@ -import type { Encoder } from "../src/Encoder"; -import { ensureUint8Array } from "../src/utils/typedArrays"; +import type { Encoder } from "../../src/Encoder"; +import { ensureUint8Array } from "../../src/utils/typedArrays"; export function typedArrays({type}: {type: number}) { const TypedArray = Object.getPrototypeOf(Int8Array); From 830fec0f4ae120e7185c084cd94143fea3380764 Mon Sep 17 00:00:00 2001 From: Daniel Buckmaster Date: Sun, 19 Sep 2021 13:42:50 +1000 Subject: [PATCH 4/9] Improve readme --- example/typed-arrays/README.md | 44 +++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/example/typed-arrays/README.md b/example/typed-arrays/README.md index aa540047..92b9607e 100644 --- a/example/typed-arrays/README.md +++ b/example/typed-arrays/README.md @@ -7,13 +7,16 @@ This is an extension to MessagePack which provides "native" support for JS's Typ The official JS library can already handle TypedArrays by serialising them as binary data, but this has two disadvantages: 1. You must know, and manually construct, the correct type of array from raw binary data after deserialising. -2. The data is unaligned, which may require copying it into a new array before using it. +2. The data is unaligned, which may require copying it into a new array before using it. (See [about alignment](#about-alignment).) Number 2 is the main reason I was inspired to write an extension to handle these types; I didn't want to give up on the possibility of zero-copy decoding. ## Spec -`data` has some internal layout which looks like the following: +TypedArray support is implemented as a MessagePack [extension](https://github.com/msgpack/msgpack/blob/master/spec.md#ext-format-family). +Extensions are encoded as a header followed by an opaque `data` array. + +This extension fills `data` with an internal layout which looks like the following: ``` +--------+--------+========+========+ @@ -28,12 +31,27 @@ Where: - `align` is a number of bytes equal to the value of `AAAAAAAA`, all of which contain 0 - `vals` is the binary content of the TypedArray -The value of `AAAAAAAA`, and therefore the number of bytes in the `align` segment, is determined so that `cont` begins on a byte offset from the _beginning of the encoded MessagePack object_ which correctly aligns `cont` for efficient access. +The value of `AAAAAAAA`, and therefore the number of bytes in the `align` segment, is determined so that `vals` begins on a byte offset from the _beginning of the encoded MessagePack object_ which correctly aligns `vals` for efficient access. If `AAAAAAAA` is 0, then there are no `align` bytes, and `vals` begins immediately after. Note that the length of `data`, and therefore the value of `YYYYYYYY_YYYYYYYY` includes _all_ of `artype`, `AAAAAAAA`, `align` and `vals`. +### Array types + +| Constructor | `artype` decimal | `artype` hex | +| - | - | - | +| Uint8Array | 1 | 0x01 | +| Int8Array | -1 | 0xfe | +| Uint16Array | 2 | 0x02 | +| Int16Array | -2 | 0xfd | +| Uint32Array | 3 | 0x03 | +| Int32Array | -3 | 0xfc | +| BigUint64Array | 4 | 0x04 | +| BigInt64Array | -4 | 0xfb | +| Float32Array | 9 | 0x09 | +| Float64Array | 10 | 0x0a | + ## Example A Float32Array containing 10 values will have a `data` size starting at 42 bytes if there is no alignment: @@ -86,3 +104,23 @@ Where: - `0x03` is the number of alignment bytes - 3 zeros are required for alignment - `vals` contains the actual floating-point data + +## About alignment + +This [SO question](https://stackoverflow.com/q/7372124) demonstrates the problem: + +```js +new Float32Array(buffer, 31, 6); +``` + +will throw an exception. +When creating any TypedArray, the offset (2nd argument) must be a multiple of the byte length of the element type. +In the case of a Float32Array, 31 is not a multiple of 4 so the creation fails. + +As the top answer states, + +> Some architectures do not allow unaligned word accesses, and there are performance penalties on architectures that do allow it such as x86 (though some instructions must be aligned). + +[This post](http://www.songho.ca/misc/alignment/dataalign.html) contains more details. +So the typical approach if you receive some data from a MessagePack buffer which you want to access as a TypedArray is to copy the data out into a new buffer entirely. +Because new buffers are correctly aligned (e.g. their first byte falls on a [max_align_t](https://en.cppreference.com/w/c/types/max_align_t) memory address), and the offset will be 0 for the new buffer, your access will work fine. \ No newline at end of file From fa4def56a725cd6493148abaa61f65718a7748f9 Mon Sep 17 00:00:00 2001 From: Daniel Buckmaster Date: Sun, 19 Sep 2021 13:46:32 +1000 Subject: [PATCH 5/9] artype is known now --- example/typed-arrays/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/example/typed-arrays/README.md b/example/typed-arrays/README.md index 92b9607e..057479fa 100644 --- a/example/typed-arrays/README.md +++ b/example/typed-arrays/README.md @@ -56,7 +56,7 @@ Note that the length of `data`, and therefore the value of `YYYYYYYY_YYYYYYYY` i A Float32Array containing 10 values will have a `data` size starting at 42 bytes if there is no alignment: -- 1 byte of `artype` = `0x??` +- 1 byte of `artype` = `0x09` - 1 byte of `AAAAAAAA` = 0 - 0 bytes of `align` - 40 bytes of `vals` @@ -64,7 +64,7 @@ A Float32Array containing 10 values will have a `data` size starting at 42 bytes A Float32Array should be aligned on 4-byte boundaries, so there may need to be up to 3 bytes of padding. In that case, the total size of `data` woulb become so this may increase to 45 bytes: -- 1 byte of `artype` = `0x??` +- 1 byte of `artype` = `0x09` - 1 byte of `AAAAAAAA` = 3 - 3 bytes of `align` - 40 bytes of `vals` @@ -91,7 +91,7 @@ So to put the entire example of a 10-entry Float32Array together, it would be re ``` +--------+--------+--------+--------+--------+========+========+ -| 0xc7 | 0x2D | type | 0x?? | 0x03 |3 zeros | vals | +| 0xc7 | 0x2D | type | 0x09 | 0x03 |3 zeros | vals | +--------+--------+--------+--------+--------+========+========+ ``` @@ -100,7 +100,7 @@ Where: - `0xc7` is the MessagePack type for `ext 8` - `0x2D` is 45, the length of the TypedArray payload described above - `type` is the extension type number -- `0x??` is the `artype` number for Float32Array +- `0x09` is the `artype` number for Float32Array - `0x03` is the number of alignment bytes - 3 zeros are required for alignment - `vals` contains the actual floating-point data From e47c3ad3aa1fd0b463a4950b668130b71f6ae8ee Mon Sep 17 00:00:00 2001 From: Daniel Buckmaster Date: Sun, 19 Sep 2021 13:48:35 +1000 Subject: [PATCH 6/9] Fix typo and rearrange --- example/typed-arrays/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/example/typed-arrays/README.md b/example/typed-arrays/README.md index 057479fa..e59d964e 100644 --- a/example/typed-arrays/README.md +++ b/example/typed-arrays/README.md @@ -62,16 +62,18 @@ A Float32Array containing 10 values will have a `data` size starting at 42 bytes - 40 bytes of `vals` A Float32Array should be aligned on 4-byte boundaries, so there may need to be up to 3 bytes of padding. -In that case, the total size of `data` woulb become so this may increase to 45 bytes: +In that case, the total size of `data` would become 45 bytes: - 1 byte of `artype` = `0x09` - 1 byte of `AAAAAAAA` = 3 - 3 bytes of `align` - 40 bytes of `vals` -Since the extension array is wrapped with its own header, there is some additional structure before this content. +The exact amount of padding depends on what data has been encoded _before_ the TypedArray is encountered. +Since the extension array is wrapped with its own header, there is some additional structure before this content. See the [MessagePack spec for extensions](https://github.com/msgpack/msgpack/blob/master/spec.md#ext-format-family). + The content of a TypedArray object is inserted after the extension header. For example, an extension where the size of the encoded array is up to (2^8)-1 bytes will be laid out like this: From a9a5567d04ea51f88689c6d1a7a7076656c073ee Mon Sep 17 00:00:00 2001 From: Daniel Buckmaster Date: Sun, 19 Sep 2021 14:02:24 +1000 Subject: [PATCH 7/9] Fix test import --- test/ExtensionCodecPlugin.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ExtensionCodecPlugin.test.ts b/test/ExtensionCodecPlugin.test.ts index a7096b94..b5435d3d 100644 --- a/test/ExtensionCodecPlugin.test.ts +++ b/test/ExtensionCodecPlugin.test.ts @@ -1,6 +1,6 @@ import assert from "assert"; import { encode, decode, Encoder, ExtensionCodec, ExtData, decodeAsync } from "../src"; -import { typedArrays } from "../example/typed-arrays-plugin"; +import { typedArrays } from "../example/typed-arrays/plugin"; describe("ExtensionCodecPlugin", () => { context("custom extension plugin", () => { From ad28160c5ccabdda10740973d0dc665ecb68136b Mon Sep 17 00:00:00 2001 From: Daniel Buckmaster Date: Sun, 19 Sep 2021 17:02:45 +1000 Subject: [PATCH 8/9] e.g.i.e. --- example/typed-arrays/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/typed-arrays/README.md b/example/typed-arrays/README.md index e59d964e..111f0998 100644 --- a/example/typed-arrays/README.md +++ b/example/typed-arrays/README.md @@ -125,4 +125,4 @@ As the top answer states, [This post](http://www.songho.ca/misc/alignment/dataalign.html) contains more details. So the typical approach if you receive some data from a MessagePack buffer which you want to access as a TypedArray is to copy the data out into a new buffer entirely. -Because new buffers are correctly aligned (e.g. their first byte falls on a [max_align_t](https://en.cppreference.com/w/c/types/max_align_t) memory address), and the offset will be 0 for the new buffer, your access will work fine. \ No newline at end of file +Because new buffers are correctly aligned (i.e. their first byte falls on a [max_align_t](https://en.cppreference.com/w/c/types/max_align_t) memory address), and the offset will be 0 for the new buffer, your access will work fine. From 7f84239463b162358ec11a39ced36fdb878b35a4 Mon Sep 17 00:00:00 2001 From: Daniel Buckmaster Date: Mon, 20 Sep 2021 11:28:44 +1000 Subject: [PATCH 9/9] Refactor 'plugin' mechanism by giving behaviour to ExtData --- example/typed-arrays/example.ts | 2 +- example/typed-arrays/plugin.ts | 134 ++++++++++++++++-------------- src/Encoder.ts | 7 +- src/ExtData.ts | 8 +- src/ExtensionCodec.ts | 50 +++-------- test/ExtensionCodecPlugin.test.ts | 40 +-------- 6 files changed, 91 insertions(+), 150 deletions(-) diff --git a/example/typed-arrays/example.ts b/example/typed-arrays/example.ts index 18175e98..b9cb9f74 100644 --- a/example/typed-arrays/example.ts +++ b/example/typed-arrays/example.ts @@ -4,7 +4,7 @@ import { encode, decode, ExtensionCodec } from "../../src"; import { typedArrays } from "./plugin"; const extensionCodec = new ExtensionCodec(); -extensionCodec.registerPlugin(typedArrays({ type: 1 })); +extensionCodec.register(typedArrays({ type: 1 })); const int16Array = new Int16Array([-4, 1, 5]); const float32Array = new Float32Array([1, -2, 3, 1e-9, 5]); diff --git a/example/typed-arrays/plugin.ts b/example/typed-arrays/plugin.ts index b3bc181c..6f441fd7 100644 --- a/example/typed-arrays/plugin.ts +++ b/example/typed-arrays/plugin.ts @@ -1,76 +1,52 @@ +import { ExtData } from "src/ExtData"; import type { Encoder } from "../../src/Encoder"; import { ensureUint8Array } from "../../src/utils/typedArrays"; -export function typedArrays({type}: {type: number}) { - const TypedArray = Object.getPrototypeOf(Int8Array); - - const arrayConstructors = { - Uint8Array, - Int8Array, - Uint16Array, - Int16Array, - Uint32Array, - Int32Array, - BigUint64Array, - BigInt64Array, - Float32Array, - Float64Array, - }; - - const arrayTypeNameToNumber: Map = new Map([ - ["Uint8Array", 1], - ["Int8Array", 255-1], - ["Uint16Array", 2], - ["Int16Array", 255-2], - ["Uint32Array", 3], - ["Int32Array", 255-3], - ["BigUint64Array", 4], - ["BigInt64Array", 255-4], - ["Float32Array", 9], - ["Float64Array", 10], - ]); - - const arrayTypeNumberToName: Map = new Map( - [...arrayTypeNameToNumber.entries()] - .map(entry => entry.reverse() as [number, string]) - ); - - const arrayHeaderSize = 2; +const TypedArray = Object.getPrototypeOf(Int8Array); + +const arrayConstructors = { + Uint8Array, + Int8Array, + Uint16Array, + Int16Array, + Uint32Array, + Int32Array, + BigUint64Array, + BigInt64Array, + Float32Array, + Float64Array, +}; + +const arrayTypeNameToNumber: Map = new Map([ + ["Uint8Array", 1], + ["Int8Array", 255-1], + ["Uint16Array", 2], + ["Int16Array", 255-2], + ["Uint32Array", 3], + ["Int32Array", 255-3], + ["BigUint64Array", 4], + ["BigInt64Array", 255-4], + ["Float32Array", 9], + ["Float64Array", 10], +]); + +const arrayTypeNumberToName: Map = new Map( + [...arrayTypeNameToNumber.entries()] + .map(entry => entry.reverse() as [number, string]) +); + +const arrayHeaderSize = 2; +export function typedArrays({type}: {type: number}) { return { type, - encode(encoder: Encoder, depth: number, object: unknown, context: C) { + encode(object: unknown, context: C) { if (!(object instanceof TypedArray)) { - return false; - } - - const array = object as ArrayBufferView; - const alignment = (array as any).constructor.BYTES_PER_ELEMENT; - const arrayType = arrayTypeNameToNumber.get((array as any).constructor.name)!; - - // Always use ext32 to make things simpler for now - const extHeaderSize = 6; - const unalignedDataStart = encoder["pos"] + extHeaderSize + arrayHeaderSize; - const alignBytes = alignment - (unalignedDataStart % alignment); - const extDataSize = arrayHeaderSize + alignBytes + array.buffer.byteLength; - - // Ext32 header - encoder["writeU8"](0xc9); - encoder["writeU32"](extDataSize); - encoder["writeU8"](type); - - // TypedArray header - encoder["writeU8"](arrayType); // TODO: map typedarray types - encoder["writeU8"](alignBytes); - for (let i = 0; i < alignBytes; i += 1) { - encoder["writeU8"](0); + return null; } - const bytes = ensureUint8Array(array); - encoder["writeU8a"](bytes); - - return true; + return new TypedArrayExtData(type, object as ArrayBufferView); }, decode(data: Uint8Array, extensionType: number, context: C) { @@ -92,4 +68,36 @@ export function typedArrays({type}: {type: number}) { ); }, }; +} + +class TypedArrayExtData extends ExtData { + constructor(type: number, private readonly array: ArrayBufferView) { + super(type, new Uint8Array()); + } + + override write(encoder: Encoder, depth: number, source: unknown) { + const alignment = (this.array as any).constructor.BYTES_PER_ELEMENT; + const arrayType = arrayTypeNameToNumber.get((this.array as any).constructor.name)!; + + // Always use ext32 to make things simpler for now + const extHeaderSize = 6; + const unalignedDataStart = encoder["pos"] + extHeaderSize + arrayHeaderSize; + const alignBytes = alignment - (unalignedDataStart % alignment); + const extDataSize = arrayHeaderSize + alignBytes + this.array.buffer.byteLength; + + // Ext32 header + encoder["writeU8"](0xc9); + encoder["writeU32"](extDataSize); + encoder["writeU8"](this.type); + + // TypedArray header + encoder["writeU8"](arrayType); // TODO: map typedarray types + encoder["writeU8"](alignBytes); + for (let i = 0; i < alignBytes; i += 1) { + encoder["writeU8"](0); + } + + const bytes = ensureUint8Array(this.array); + encoder["writeU8a"](bytes); + } } \ No newline at end of file diff --git a/src/Encoder.ts b/src/Encoder.ts index 7531a186..dc478dd1 100644 --- a/src/Encoder.ts +++ b/src/Encoder.ts @@ -185,15 +185,10 @@ export class Encoder { } private encodeObject(object: unknown, depth: number) { - const pluginRan = this.extensionCodec.tryToEncodePlugin(this, depth, object, this.context); - if (pluginRan) { - return; - } - // try to encode objects with custom codec first of non-primitives const ext = this.extensionCodec.tryToEncode(object, this.context); if (ext != null) { - this.encodeExtension(ext); + ext.write(this, depth, object); } else if (Array.isArray(object)) { this.encodeArray(object, depth); } else if (ArrayBuffer.isView(object)) { diff --git a/src/ExtData.ts b/src/ExtData.ts index f69060e8..c29d9cf4 100644 --- a/src/ExtData.ts +++ b/src/ExtData.ts @@ -1,6 +1,12 @@ +import type { Encoder } from "./Encoder"; + /** * ExtData is used to handle Extension Types that are not registered to ExtensionCodec. */ export class ExtData { constructor(readonly type: number, readonly data: Uint8Array) {} -} + + write(encoder: Encoder, depth: number, source: unknown) { + encoder["encodeExtension"](this); + } +} \ No newline at end of file diff --git a/src/ExtensionCodec.ts b/src/ExtensionCodec.ts index 517c3a7a..c206ec35 100644 --- a/src/ExtensionCodec.ts +++ b/src/ExtensionCodec.ts @@ -2,7 +2,6 @@ import { ExtData } from "./ExtData"; import { timestampExtension } from "./timestamp"; -import type { Encoder } from "./Encoder"; export type ExtensionDecoderType = ( data: Uint8Array, @@ -10,16 +9,13 @@ export type ExtensionDecoderType = ( context: ContextType, ) => unknown; -export type ExtensionEncoderType = (input: unknown, context: ContextType) => Uint8Array | null; - -export type ExtensionEncoderPluginType = (encoder: Encoder, depth: number, input: unknown, context: ContextType) => boolean; +export type ExtensionEncoderType = (input: unknown, context: ContextType) => Uint8Array | ExtData | null; // immutable interfce to ExtensionCodec export type ExtensionCodecType = { // eslint-disable-next-line @typescript-eslint/naming-convention __brand?: ContextType; tryToEncode(object: unknown, context: ContextType): ExtData | null; - tryToEncodePlugin(encoder: Encoder, depth: number, object: unknown, context: ContextType): boolean; decode(data: Uint8Array, extType: number, context: ContextType): unknown; }; @@ -38,7 +34,6 @@ export class ExtensionCodec implements ExtensionCodecTy // custom extensions private readonly encoders: Array | undefined | null> = []; private readonly decoders: Array | undefined | null> = []; - private readonly rawEncoders: Array | undefined | null> = []; public constructor() { this.register(timestampExtension); @@ -65,24 +60,6 @@ export class ExtensionCodec implements ExtensionCodecTy } } - public registerPlugin({ - type, - encode, - decode, - }: { - type: number; - encode: ExtensionEncoderPluginType; - decode: ExtensionDecoderType; - }): void { - if (type >= 0) { - // custom extensions - this.rawEncoders[type] = encode; - this.decoders[type] = decode; - } else { - throw new Error("cannot register plugin for builtin type"); - } - } - public tryToEncode(object: unknown, context: ContextType): ExtData | null { // built-in extensions for (let i = 0; i < this.builtInEncoders.length; i++) { @@ -91,7 +68,7 @@ export class ExtensionCodec implements ExtensionCodecTy const data = encodeExt(object, context); if (data != null) { const type = -1 - i; - return new ExtData(type, data); + return ensureExtData(type, data); } } } @@ -103,7 +80,7 @@ export class ExtensionCodec implements ExtensionCodecTy const data = encodeExt(object, context); if (data != null) { const type = i; - return new ExtData(type, data); + return ensureExtData(type, data); } } } @@ -115,20 +92,6 @@ export class ExtensionCodec implements ExtensionCodecTy return null; } - public tryToEncodePlugin(encoder: Encoder, depth: number, object: unknown, context: ContextType): boolean { - for (let i = 0; i < this.rawEncoders.length; i++) { - const encodeExt = this.rawEncoders[i]; - if (encodeExt != null) { - const accepted = encodeExt(encoder, depth, object, context); - if (accepted) { - return true; - } - } - } - - return false; - } - public decode(data: Uint8Array, type: number, context: ContextType): unknown { const decodeExt = type < 0 ? this.builtInDecoders[-1 - type] : this.decoders[type]; if (decodeExt) { @@ -139,3 +102,10 @@ export class ExtensionCodec implements ExtensionCodecTy } } } + +function ensureExtData(type: number, ext: Uint8Array | ExtData) { + if (ext instanceof Uint8Array) { + return new ExtData(type, ext); + } + return ext; +} diff --git a/test/ExtensionCodecPlugin.test.ts b/test/ExtensionCodecPlugin.test.ts index b5435d3d..48fe60c9 100644 --- a/test/ExtensionCodecPlugin.test.ts +++ b/test/ExtensionCodecPlugin.test.ts @@ -3,47 +3,9 @@ import { encode, decode, Encoder, ExtensionCodec, ExtData, decodeAsync } from ". import { typedArrays } from "../example/typed-arrays/plugin"; describe("ExtensionCodecPlugin", () => { - context("custom extension plugin", () => { - const extensionCodec = new ExtensionCodec(); - - // Set - extensionCodec.registerPlugin({ - type: 0, - encode: (encoder: Encoder, depth: number, object: unknown): boolean => { - if (object instanceof Set) { - // This uses the plugin mechanism in a pointless way: simply encoding an extension - // the same as it would have been normally. - const extData = encode([...object]); - encoder["encodeExtension"](new ExtData(0, extData)); - return true; - } - return false; - }, - decode: (data: Uint8Array) => { - const array = decode(data) as Array; - return new Set(array); - }, - }); - - it("encodes and decodes custom data types (synchronously)", () => { - const set = new Set([1, 2, 3]); - const encoded = encode([set], { extensionCodec }); - assert.deepStrictEqual(decode(encoded, { extensionCodec }), [set]); - }); - - it("encodes and decodes custom data types (asynchronously)", async () => { - const set = new Set([1, 2, 3]); - const encoded = encode([set], { extensionCodec }); - const createStream = async function* () { - yield encoded; - }; - assert.deepStrictEqual(await decodeAsync(createStream(), { extensionCodec }), [set]); - }); - }); - context("typed-arrays-plugin example", () => { const extensionCodec = new ExtensionCodec(); - extensionCodec.registerPlugin(typedArrays({type: 1})); + extensionCodec.register(typedArrays({type: 1})); it("encodes and decodes a Float32Array (synchronously)", () => { const floatArray = new Float32Array([1, 2, 3, 4, 5]);