diff --git a/CHANGELOG.md b/CHANGELOG.md index a32b2a01..c1f7a7e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # @digitalbazaar/cborld ChangeLog +## 7.2.0 - 2024-10-xx + +### Added +- Add `async function typeTableLoader({registryEntryId})` option to look up the + `typeTable` to use by id for both `encode` and `decode`. + +### Changed +- Refactor `registryEntryId` encoding and decoding logic. Trying to be more + readable and handle more error and edge cases. This is a work in progress. + ## 7.1.3 - 2024-10-16 ### Fixed diff --git a/lib/decode.js b/lib/decode.js index 96d55507..af9d5e8a 100644 --- a/lib/decode.js +++ b/lib/decode.js @@ -7,6 +7,7 @@ import {CborldError} from './CborldError.js'; import {Converter} from './Converter.js'; import {Decompressor} from './Decompressor.js'; import {inspect} from './util.js'; +import {default as varint} from 'varint'; // 0xd9 == 11011001 // 110 = CBOR major type 6 @@ -23,14 +24,16 @@ const CBORLD_TAG_SECOND_BYTE_LEGACY = 0x05; * @param {object} options - The options to use when decoding CBOR-LD. * @param {Uint8Array} options.cborldBytes - The encoded CBOR-LD bytes to * decode. - * @param {Function} options.documentLoader -The document loader to use when + * @param {Function} options.documentLoader - The document loader to use when * resolving JSON-LD Context URLs. * @param {diagnosticFunction} options.diagnose - A function that, if * provided, is called with diagnostic information. - * @param {Map} options.typeTable - A map of possible value types, including + * @param {Map} [options.typeTable] - A map of possible value types, including * `context`, `url`, `none`, and any JSON-LD type, each of which maps to * another map of values of that type to their associated CBOR-LD integer * values. + * @param {Function} [options.typeTableLoader] - The typeTable loader to use to + * resolve a registryEntryId to a typeTable. * @param {Map} options.appContextMap - A map of context string values * to their associated CBOR-LD integer values. For use with legacy * cborldBytes. @@ -40,6 +43,7 @@ const CBORLD_TAG_SECOND_BYTE_LEGACY = 0x05; export async function decode({ cborldBytes, documentLoader, typeTable, + typeTableLoader, diagnose, appContextMap = new Map(), }) { @@ -55,7 +59,7 @@ export async function decode({ 'ERR_NOT_CBORLD', 'CBOR-LD must start with a CBOR major type "Tag" header of `0xd9`.'); } - const {suffix, isLegacy} = _getSuffix({cborldBytes}); + const {suffix, isLegacy, registryEntryId} = _getSuffix({cborldBytes}); const isCompressed = _checkCompressionMode({cborldBytes, isLegacy}); if(!isCompressed) { return cborg.decode(suffix, {useMaps: false}); @@ -68,6 +72,19 @@ export async function decode({ diagnose(inspect(input, {depth: null, colors: true})); } + // lookup typeTable by id if needed + if(!isLegacy) { + if(!typeTable && typeTableLoader) { + typeTable = await typeTableLoader({registryEntryId}); + } + if(!typeTable) { + throw new CborldError( + 'ERR_NO_TYPETABLE', + '"typeTable" not provided or found for registryEntryId ' + + `"${registryEntryId}".`); + } + } + const converter = _createConverter({ isLegacy, typeTable, @@ -126,27 +143,59 @@ function _checkCompressionMode({cborldBytes, isLegacy}) { } function _getSuffix({cborldBytes}) { - const isModern = cborldBytes[1] === CBORLD_TAG_SECOND_BYTE; - const isLegacy = cborldBytes[1] === CBORLD_TAG_SECOND_BYTE_LEGACY; + let index = 1; // start after 0xd9 + const isModern = cborldBytes[index] === CBORLD_TAG_SECOND_BYTE; + const isLegacy = cborldBytes[index] === CBORLD_TAG_SECOND_BYTE_LEGACY; if(!(isModern || isLegacy)) { throw new CborldError( 'ERR_NOT_CBORLD', 'CBOR-LD must either have a second byte of 0x06 or 0x05 (legacy).'); } - const tagValue = cborldBytes[2]; - let index = 3; - if(isModern && tagValue >= 128) { - // FIXME: this assumes tag length <= 31 bytes; throw error if not - // cborldBytes[index + 1] is the header byte for the varint bytestring - const varintArrayLength = cborldBytes[index + 1] % 32; - // This sets `index` to the index of the first byte of the second - // array element in `cborldBytes` - index += varintArrayLength + 2; - } + index++; // advance to tag value const {buffer, byteOffset, length} = cborldBytes; + const tagValue = cborldBytes[index]; + let registryEntryId; + if(isModern) { + if(tagValue < 128) { + registryEntryId = tagValue; + // advance to encoded data + index++; + } else { + index++; // advance to array + // check for 2 element array + if(cborldBytes[index] !== 0x82) { + throw new CborldError( + 'ERR_NOT_CBORLD', + 'CBOR-LD large varint encoding error.'); + } + index++; // advance to byte string tag + // first element is tail of varint encoded as byte string + // low 5 bits are byte string length (or exceptions for large values) + const varintArrayLength = cborldBytes[index] % 32; + // don't support unbounded lengths here + if(varintArrayLength >= 24) { + throw new CborldError( + 'ERR_NOT_CBORLD', + 'CBOR-LD encoded registryEntryId too large.'); + } + // FIXME: check for bad 0 length + index++; // advance to byte string data + // create single buffer for id varint initial byte and tail bytes + const varintBytes = new Uint8Array(varintArrayLength + 1); + varintBytes[0] = tagValue; + const varintTailBytes = new Uint8Array(buffer, index, varintArrayLength); + varintBytes.set(varintTailBytes, 1); + // decode id from varint + registryEntryId = varint.decode(varintBytes); + // advance to second array element + index += varintArrayLength; + } + } else { + index++; // advance to tag value + } const suffix = new Uint8Array(buffer, byteOffset + index, length - index); - return {suffix, isLegacy}; + return {suffix, isLegacy, registryEntryId}; } /** diff --git a/lib/encode.js b/lib/encode.js index 9b5422ce..6b2bb351 100644 --- a/lib/encode.js +++ b/lib/encode.js @@ -30,10 +30,12 @@ const typeEncoders = { * @param {number|string} [options.registryEntryId='legacy] - The registry * entry ID for the registry entry associated with the resulting CBOR-LD * payload. For legacy support, use registryEntryId = 'legacy'. - * @param {Map} options.typeTable - A map of possible value types, including + * @param {Map} [options.typeTable] - A map of possible value types, including * `context`, `url`, `none`, and any JSON-LD type, each of which maps to * another map of values of that type to their associated CBOR-LD integer * values. + * @param {Function} [options.typeTableLoader] - The typeTable loader to use to + * resolve a registryEntryId to a typeTable. * @param {diagnosticFunction} options.diagnose - A function that, if * provided, is called with diagnostic information. * @param {Map} options.appContextMap - For use with the legacy value of @@ -46,11 +48,11 @@ const typeEncoders = { export async function encode({ jsonldDocument, documentLoader, registryEntryId = 'legacy', typeTable, + typeTableLoader, diagnose, appContextMap, compressionMode } = {}) { - // validate that an acceptable value for `registryEntryId` was passed if(!((typeof registryEntryId === 'number' && registryEntryId > 0) || registryEntryId === 'legacy')) { @@ -96,6 +98,10 @@ export async function encode({ // output uncompressed CBOR-LD suffix = cborg.encode(jsonldDocument); } else { + // lookup typeTable by id if needed + if(!isLegacy && !typeTable && typeTableLoader) { + typeTable = await typeTableLoader({registryEntryId}); + } const converter = _createConverter({ isLegacy, typeTable, @@ -125,27 +131,34 @@ export async function encode({ return bytes; } -/** - * A diagnostic function that is called with diagnostic information. Typically - * set to `console.log` when debugging. - * - * @callback diagnosticFunction - * @param {string} message - The diagnostic message. - */ - function _getPrefix({isLegacy, compressionMode, registryEntryId}) { if(isLegacy) { - return new Uint8Array([0xd9, 0x05, compressionMode]); + return new Uint8Array([ + 0xd9, // CBOR major type 6 + 2 byte tag size + 0x05, // legacy CBOR-LD tag + compressionMode // compression flag + ]); } - const { - varintTagValue, varintByteValue - } = _getVarintStructure(registryEntryId); - if(varintByteValue) { - // Define varintByteValue as first element in 2 element array - // `0x82` means "the following is a 2 element array" - return [...varintTagValue, 0x82, ...varintByteValue]; + if(registryEntryId < 128) { + return new Uint8Array([ + 0xd9, // CBOR major type 6 + 2 byte tag size + 0x06, // non-legacy CBOR-LD tag + registryEntryId // low-value type table id + // encoded document appended in caller + ]); } - return varintTagValue; + const idVarint = varint.encode(registryEntryId); + + return new Uint8Array([ + 0xd9, // CBOR major type 6 + 2 byte tag size + 0x06, // non-legacy CBOR-LD tag + idVarint[0], + ...[ + 0x82, // 2 element array + ...cborg.encode(Uint8Array.from(idVarint.slice(1))) + // encoded document appended as second element in caller + ] + ]); } function _createConverter({ @@ -166,16 +179,10 @@ function _createConverter({ }); } -function _getVarintStructure(registryEntryId) { - let varintTagValue; - let varintByteValue; - if(registryEntryId < 128) { - varintTagValue = new Uint8Array([0xd9, 0x06, registryEntryId]); - varintByteValue = null; - } else { - const varintArray = varint.encode(registryEntryId); - varintTagValue = new Uint8Array([0xd9, 0x06, varintArray[0]]); - varintByteValue = cborg.encode(Uint8Array.from(varintArray.slice(1))); - } - return {varintTagValue, varintByteValue}; -} +/** + * A diagnostic function that is called with diagnostic information. Typically + * set to `console.log` when debugging. + * + * @callback diagnosticFunction + * @param {string} message - The diagnostic message. + */ diff --git a/tests/decode.spec.js b/tests/decode.spec.js index 1e4c8800..2fa053c8 100644 --- a/tests/decode.spec.js +++ b/tests/decode.spec.js @@ -14,16 +14,82 @@ import { TYPE_TABLE, } from '../lib/tables.js'; +function _makeTypeTableLoader(entries) { + const typeTables = new Map(entries); + return async function({registryEntryId}) { + return typeTables.get(registryEntryId); + }; +} + describe('cborld decode', () => { + it('should decode CBOR-LD bytes (direct type table)', + async () => { + const cborldBytes = new Uint8Array([0xd9, 0x06, 0x01, 0xa0]); + const jsonldDocument = await decode({ + cborldBytes, + typeTable: new Map() + }); + expect(jsonldDocument).deep.equal({}); + }); + + it('should decode CBOR-LD bytes (type table loader)', + async () => { + const cborldBytes = new Uint8Array([0xd9, 0x06, 0x01, 0xa0]); + const jsonldDocument = await decode({ + cborldBytes, + typeTableLoader: _makeTypeTableLoader([[0x01, new Map()]]) + }); + expect(jsonldDocument).deep.equal({}); + }); + + it('should fail to decode with no typeTable or typeTableLoader', + async () => { + const cborldBytes = new Uint8Array([0xd9, 0x06, 0x01, 0xa0]); + let result; + let error; + try { + result = await decode({ + cborldBytes + }); + } catch(e) { + error = e; + } + expect(result).to.eql(undefined); + expect(error?.code).to.eql('ERR_NO_TYPETABLE'); + }); + + it('should fail to decode with no typeTableLoader id', + async () => { + const cborldBytes = new Uint8Array([0xd9, 0x06, 0x01, 0xa0]); + let result; + let error; + try { + result = await decode({ + cborldBytes, + typeTableLoader: _makeTypeTableLoader([]) + }); + } catch(e) { + error = e; + } + expect(result).to.eql(undefined); + expect(error?.code).to.eql('ERR_NO_TYPETABLE'); + }); + it('should decode empty document CBOR-LD bytes', async () => { const cborldBytes = new Uint8Array([0xd9, 0x06, 0x01, 0xa0]); - const jsonldDocument = await decode({cborldBytes}); + const jsonldDocument = await decode({ + cborldBytes, + typeTableLoader: _makeTypeTableLoader([[0x01, new Map()]]) + }); expect(jsonldDocument).deep.equal({}); }); it('should decode empty JSON-LD document bytes with varint', async () => { const cborldBytes = new Uint8Array([0xd9, 0x06, 0x10, 0xa0]); - const jsonldDocument = await decode({cborldBytes}); + const jsonldDocument = await decode({ + cborldBytes, + typeTableLoader: _makeTypeTableLoader([[0x10, new Map()]]) + }); expect(jsonldDocument).deep.equal({}); }); @@ -31,7 +97,10 @@ describe('cborld decode', () => { async () => { const cborldBytes = new Uint8Array( [0xd9, 0x06, 0x80, 0x82, 0x41, 0x01, 0xa0]); - const jsonldDocument = await decode({cborldBytes}); + const jsonldDocument = await decode({ + cborldBytes, + typeTableLoader: _makeTypeTableLoader([[0x80, new Map()]]) + }); expect(jsonldDocument).deep.equal({}); }); @@ -39,7 +108,10 @@ describe('cborld decode', () => { async () => { const cborldBytes = new Uint8Array( [0xd9, 0x06, 0x80, 0x82, 0x44, 0x94, 0xeb, 0xdc, 0x03, 0xa0]); - const jsonldDocument = await decode({cborldBytes}); + const jsonldDocument = await decode({ + cborldBytes, + typeTableLoader: _makeTypeTableLoader([[1000000000, new Map()]]) + }); expect(jsonldDocument).deep.equal({}); });