diff --git a/example/typed-arrays/README.md b/example/typed-arrays/README.md new file mode 100644 index 00000000..111f0998 --- /dev/null +++ b/example/typed-arrays/README.md @@ -0,0 +1,128 @@ +# MessagePack typed arrays + +This is an extension to MessagePack which provides "native" support for JS's TypedArray family. + +## Why? + +The official JS library can already handle TypedArrays by serialising them as binary data, but this has two disadvantages: + +1. You must know, and manually construct, the correct type of array from raw binary data after deserialising. +2. The data is unaligned, which may require copying it into a new array before using it. (See [about alignment](#about-alignment).) + +Number 2 is the main reason I was inspired to write an extension to handle these types; I didn't want to give up on the possibility of zero-copy decoding. + +## Spec + +TypedArray support is implemented as a MessagePack [extension](https://github.com/msgpack/msgpack/blob/master/spec.md#ext-format-family). +Extensions are encoded as a header followed by an opaque `data` array. + +This extension fills `data` with an internal layout which looks like the following: + +``` ++--------+--------+========+========+ +| artype |AAAAAAAA| align | vals | ++--------+--------+========+========+ +``` + +Where: + +- `artype` is an identifier for the type of array that is stored +- `AAAAAAAA` is an 8-bit unsigned integer +- `align` is a number of bytes equal to the value of `AAAAAAAA`, all of which contain 0 +- `vals` is the binary content of the TypedArray + +The value of `AAAAAAAA`, and therefore the number of bytes in the `align` segment, is determined so that `vals` begins on a byte offset from the _beginning of the encoded MessagePack object_ which correctly aligns `vals` for efficient access. + +If `AAAAAAAA` is 0, then there are no `align` bytes, and `vals` begins immediately after. + +Note that the length of `data`, and therefore the value of `YYYYYYYY_YYYYYYYY` includes _all_ of `artype`, `AAAAAAAA`, `align` and `vals`. + +### Array types + +| Constructor | `artype` decimal | `artype` hex | +| - | - | - | +| Uint8Array | 1 | 0x01 | +| Int8Array | -1 | 0xfe | +| Uint16Array | 2 | 0x02 | +| Int16Array | -2 | 0xfd | +| Uint32Array | 3 | 0x03 | +| Int32Array | -3 | 0xfc | +| BigUint64Array | 4 | 0x04 | +| BigInt64Array | -4 | 0xfb | +| Float32Array | 9 | 0x09 | +| Float64Array | 10 | 0x0a | + +## Example + +A Float32Array containing 10 values will have a `data` size starting at 42 bytes if there is no alignment: + +- 1 byte of `artype` = `0x09` +- 1 byte of `AAAAAAAA` = 0 +- 0 bytes of `align` +- 40 bytes of `vals` + +A Float32Array should be aligned on 4-byte boundaries, so there may need to be up to 3 bytes of padding. +In that case, the total size of `data` would become 45 bytes: + +- 1 byte of `artype` = `0x09` +- 1 byte of `AAAAAAAA` = 3 +- 3 bytes of `align` +- 40 bytes of `vals` + +The exact amount of padding depends on what data has been encoded _before_ the TypedArray is encountered. + +Since the extension array is wrapped with its own header, there is some additional structure before this content. +See the [MessagePack spec for extensions](https://github.com/msgpack/msgpack/blob/master/spec.md#ext-format-family). + +The content of a TypedArray object is inserted after the extension header. +For example, an extension where the size of the encoded array is up to (2^8)-1 bytes will be laid out like this: + +``` ++--------+--------+--------+========+ +| 0xc7 |XXXXXXXX| type | data | ++--------+--------+--------+========+ +``` + +Where: + +- `0xc8` is the `ext 16` header +- `XXXXXXXX` is a 8-bit unsigned integer which represents the length of `data` in bytes +- `type` is the extension type number 0-127 + +So to put the entire example of a 10-entry Float32Array together, it would be represented as: + +``` ++--------+--------+--------+--------+--------+========+========+ +| 0xc7 | 0x2D | type | 0x09 | 0x03 |3 zeros | vals | ++--------+--------+--------+--------+--------+========+========+ +``` + +Where: + +- `0xc7` is the MessagePack type for `ext 8` +- `0x2D` is 45, the length of the TypedArray payload described above +- `type` is the extension type number +- `0x09` is the `artype` number for Float32Array +- `0x03` is the number of alignment bytes +- 3 zeros are required for alignment +- `vals` contains the actual floating-point data + +## About alignment + +This [SO question](https://stackoverflow.com/q/7372124) demonstrates the problem: + +```js +new Float32Array(buffer, 31, 6); +``` + +will throw an exception. +When creating any TypedArray, the offset (2nd argument) must be a multiple of the byte length of the element type. +In the case of a Float32Array, 31 is not a multiple of 4 so the creation fails. + +As the top answer states, + +> Some architectures do not allow unaligned word accesses, and there are performance penalties on architectures that do allow it such as x86 (though some instructions must be aligned). + +[This post](http://www.songho.ca/misc/alignment/dataalign.html) contains more details. +So the typical approach if you receive some data from a MessagePack buffer which you want to access as a TypedArray is to copy the data out into a new buffer entirely. +Because new buffers are correctly aligned (i.e. their first byte falls on a [max_align_t](https://en.cppreference.com/w/c/types/max_align_t) memory address), and the offset will be 0 for the new buffer, your access will work fine. diff --git a/example/typed-arrays/example.ts b/example/typed-arrays/example.ts new file mode 100644 index 00000000..b9cb9f74 --- /dev/null +++ b/example/typed-arrays/example.ts @@ -0,0 +1,20 @@ +// ts-node example/typed-arrays/example.ts + +import { encode, decode, ExtensionCodec } from "../../src"; +import { typedArrays } from "./plugin"; + +const extensionCodec = new ExtensionCodec(); +extensionCodec.register(typedArrays({ type: 1 })); + +const int16Array = new Int16Array([-4, 1, 5]); +const float32Array = new Float32Array([1, -2, 3, 1e-9, 5]); +console.log("Object to encode:"); +console.log({ int16Array, float32Array }); + +const encoded = encode({ int16Array, float32Array }, { extensionCodec }); +console.log("\n\nRaw encoded data:"); +console.log(encoded); + +const decoded = decode(encoded, { extensionCodec }); +console.log("\n\nDecoded object:"); +console.log(decoded); \ No newline at end of file diff --git a/example/typed-arrays/plugin.ts b/example/typed-arrays/plugin.ts new file mode 100644 index 00000000..6f441fd7 --- /dev/null +++ b/example/typed-arrays/plugin.ts @@ -0,0 +1,103 @@ +import { ExtData } from "src/ExtData"; +import type { Encoder } from "../../src/Encoder"; +import { ensureUint8Array } from "../../src/utils/typedArrays"; + +const TypedArray = Object.getPrototypeOf(Int8Array); + +const arrayConstructors = { + Uint8Array, + Int8Array, + Uint16Array, + Int16Array, + Uint32Array, + Int32Array, + BigUint64Array, + BigInt64Array, + Float32Array, + Float64Array, +}; + +const arrayTypeNameToNumber: Map = new Map([ + ["Uint8Array", 1], + ["Int8Array", 255-1], + ["Uint16Array", 2], + ["Int16Array", 255-2], + ["Uint32Array", 3], + ["Int32Array", 255-3], + ["BigUint64Array", 4], + ["BigInt64Array", 255-4], + ["Float32Array", 9], + ["Float64Array", 10], +]); + +const arrayTypeNumberToName: Map = new Map( + [...arrayTypeNameToNumber.entries()] + .map(entry => entry.reverse() as [number, string]) +); + +const arrayHeaderSize = 2; + +export function typedArrays({type}: {type: number}) { + return { + type, + + encode(object: unknown, context: C) { + if (!(object instanceof TypedArray)) { + return null; + } + + return new TypedArrayExtData(type, object as ArrayBufferView); + }, + + decode(data: Uint8Array, extensionType: number, context: C) { + if (extensionType !== type) { + return null; + } + + const arrayType = data[0]!; + const alignBytes = data[1]!; + + const ctorName = arrayTypeNumberToName.get(arrayType)!; + const ctor = (arrayConstructors as any)[ctorName] as new (...args: any[]) => ArrayBufferView; + const alignment = (ctor as any).BYTES_PER_ELEMENT; + + return new ctor( + data.buffer, + data.byteOffset + arrayHeaderSize + alignBytes, + (data.length - alignBytes - 2) / alignment + ); + }, + }; +} + +class TypedArrayExtData extends ExtData { + constructor(type: number, private readonly array: ArrayBufferView) { + super(type, new Uint8Array()); + } + + override write(encoder: Encoder, depth: number, source: unknown) { + const alignment = (this.array as any).constructor.BYTES_PER_ELEMENT; + const arrayType = arrayTypeNameToNumber.get((this.array as any).constructor.name)!; + + // Always use ext32 to make things simpler for now + const extHeaderSize = 6; + const unalignedDataStart = encoder["pos"] + extHeaderSize + arrayHeaderSize; + const alignBytes = alignment - (unalignedDataStart % alignment); + const extDataSize = arrayHeaderSize + alignBytes + this.array.buffer.byteLength; + + // Ext32 header + encoder["writeU8"](0xc9); + encoder["writeU32"](extDataSize); + encoder["writeU8"](this.type); + + // TypedArray header + encoder["writeU8"](arrayType); // TODO: map typedarray types + encoder["writeU8"](alignBytes); + for (let i = 0; i < alignBytes; i += 1) { + encoder["writeU8"](0); + } + + const bytes = ensureUint8Array(this.array); + encoder["writeU8a"](bytes); + } +} \ No newline at end of file diff --git a/src/Encoder.ts b/src/Encoder.ts index afea365c..dc478dd1 100644 --- a/src/Encoder.ts +++ b/src/Encoder.ts @@ -188,7 +188,7 @@ export class Encoder { // try to encode objects with custom codec first of non-primitives const ext = this.extensionCodec.tryToEncode(object, this.context); if (ext != null) { - this.encodeExtension(ext); + ext.write(this, depth, object); } else if (Array.isArray(object)) { this.encodeArray(object, depth); } else if (ArrayBuffer.isView(object)) { diff --git a/src/ExtData.ts b/src/ExtData.ts index f69060e8..c29d9cf4 100644 --- a/src/ExtData.ts +++ b/src/ExtData.ts @@ -1,6 +1,12 @@ +import type { Encoder } from "./Encoder"; + /** * ExtData is used to handle Extension Types that are not registered to ExtensionCodec. */ export class ExtData { constructor(readonly type: number, readonly data: Uint8Array) {} -} + + write(encoder: Encoder, depth: number, source: unknown) { + encoder["encodeExtension"](this); + } +} \ No newline at end of file diff --git a/src/ExtensionCodec.ts b/src/ExtensionCodec.ts index d38705ee..c206ec35 100644 --- a/src/ExtensionCodec.ts +++ b/src/ExtensionCodec.ts @@ -9,7 +9,7 @@ export type ExtensionDecoderType = ( context: ContextType, ) => unknown; -export type ExtensionEncoderType = (input: unknown, context: ContextType) => Uint8Array | null; +export type ExtensionEncoderType = (input: unknown, context: ContextType) => Uint8Array | ExtData | null; // immutable interfce to ExtensionCodec export type ExtensionCodecType = { @@ -68,7 +68,7 @@ export class ExtensionCodec implements ExtensionCodecTy const data = encodeExt(object, context); if (data != null) { const type = -1 - i; - return new ExtData(type, data); + return ensureExtData(type, data); } } } @@ -80,7 +80,7 @@ export class ExtensionCodec implements ExtensionCodecTy const data = encodeExt(object, context); if (data != null) { const type = i; - return new ExtData(type, data); + return ensureExtData(type, data); } } } @@ -102,3 +102,10 @@ export class ExtensionCodec implements ExtensionCodecTy } } } + +function ensureExtData(type: number, ext: Uint8Array | ExtData) { + if (ext instanceof Uint8Array) { + return new ExtData(type, ext); + } + return ext; +} diff --git a/test/ExtensionCodecPlugin.test.ts b/test/ExtensionCodecPlugin.test.ts new file mode 100644 index 00000000..48fe60c9 --- /dev/null +++ b/test/ExtensionCodecPlugin.test.ts @@ -0,0 +1,16 @@ +import assert from "assert"; +import { encode, decode, Encoder, ExtensionCodec, ExtData, decodeAsync } from "../src"; +import { typedArrays } from "../example/typed-arrays/plugin"; + +describe("ExtensionCodecPlugin", () => { + context("typed-arrays-plugin example", () => { + const extensionCodec = new ExtensionCodec(); + extensionCodec.register(typedArrays({type: 1})); + + it("encodes and decodes a Float32Array (synchronously)", () => { + const floatArray = new Float32Array([1, 2, 3, 4, 5]); + const encoded = encode({ floatArray }, { extensionCodec }); + assert.deepStrictEqual(decode(encoded, { extensionCodec }), { floatArray }); + }); + }); +}); \ No newline at end of file