Skip to content

Commit

Permalink
Add typeTableLoader support.
Browse files Browse the repository at this point in the history
Added
- Add `async function typeTableLoader({registryEntryId})` option to look
  up the `typeTable` to use by id for both `encode` and `decode`.

Changed
- Refactor `registryEntryId` encoding and decoding logic. Trying to be
  more readable and handle more error and edge cases. This is a work in
  progress.
  • Loading branch information
davidlehn committed Oct 21, 2024
1 parent d512bcc commit 6f67640
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 52 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# @digitalbazaar/cborld ChangeLog

## 7.2.0 - 2024-10-xx

### Added
- Add `async function typeTableLoader({registryEntryId})` option to look up the
`typeTable` to use by id for both `encode` and `decode`.

### Changed
- Refactor `registryEntryId` encoding and decoding logic. Trying to be more
readable and handle more error and edge cases. This is a work in progress.

## 7.1.3 - 2024-10-16

### Fixed
Expand Down
81 changes: 65 additions & 16 deletions lib/decode.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {CborldError} from './CborldError.js';
import {Converter} from './Converter.js';
import {Decompressor} from './Decompressor.js';
import {inspect} from './util.js';
import {default as varint} from 'varint';

// 0xd9 == 11011001
// 110 = CBOR major type 6
Expand All @@ -23,14 +24,16 @@ const CBORLD_TAG_SECOND_BYTE_LEGACY = 0x05;
* @param {object} options - The options to use when decoding CBOR-LD.
* @param {Uint8Array} options.cborldBytes - The encoded CBOR-LD bytes to
* decode.
* @param {Function} options.documentLoader -The document loader to use when
* @param {Function} options.documentLoader - The document loader to use when
* resolving JSON-LD Context URLs.
* @param {diagnosticFunction} options.diagnose - A function that, if
* provided, is called with diagnostic information.
* @param {Map} options.typeTable - A map of possible value types, including
* @param {Map} [options.typeTable] - A map of possible value types, including
* `context`, `url`, `none`, and any JSON-LD type, each of which maps to
* another map of values of that type to their associated CBOR-LD integer
* values.
* @param {Function} [options.typeTableLoader] - The typeTable loader to use to
* resolve a registryEntryId to a typeTable.
* @param {Map} options.appContextMap - A map of context string values
* to their associated CBOR-LD integer values. For use with legacy
* cborldBytes.
Expand All @@ -40,6 +43,7 @@ const CBORLD_TAG_SECOND_BYTE_LEGACY = 0x05;
export async function decode({
cborldBytes, documentLoader,
typeTable,
typeTableLoader,
diagnose,
appContextMap = new Map(),
}) {
Expand All @@ -55,7 +59,7 @@ export async function decode({
'ERR_NOT_CBORLD',
'CBOR-LD must start with a CBOR major type "Tag" header of `0xd9`.');
}
const {suffix, isLegacy} = _getSuffix({cborldBytes});
const {suffix, isLegacy, registryEntryId} = _getSuffix({cborldBytes});
const isCompressed = _checkCompressionMode({cborldBytes, isLegacy});
if(!isCompressed) {
return cborg.decode(suffix, {useMaps: false});
Expand All @@ -68,6 +72,19 @@ export async function decode({
diagnose(inspect(input, {depth: null, colors: true}));
}

// lookup typeTable by id if needed
if(!isLegacy) {
if(!typeTable && typeTableLoader) {
typeTable = await typeTableLoader({registryEntryId});
}
if(!typeTable) {
throw new CborldError(
'ERR_NO_TYPETABLE',
'"typeTable" not provided or found for registryEntryId ' +
`"${registryEntryId}".`);
}
}

const converter = _createConverter({
isLegacy,
typeTable,
Expand Down Expand Up @@ -126,27 +143,59 @@ function _checkCompressionMode({cborldBytes, isLegacy}) {
}

function _getSuffix({cborldBytes}) {
const isModern = cborldBytes[1] === CBORLD_TAG_SECOND_BYTE;
const isLegacy = cborldBytes[1] === CBORLD_TAG_SECOND_BYTE_LEGACY;
let index = 1; // start after 0xd9
const isModern = cborldBytes[index] === CBORLD_TAG_SECOND_BYTE;
const isLegacy = cborldBytes[index] === CBORLD_TAG_SECOND_BYTE_LEGACY;
if(!(isModern || isLegacy)) {
throw new CborldError(
'ERR_NOT_CBORLD',
'CBOR-LD must either have a second byte of 0x06 or 0x05 (legacy).');
}

const tagValue = cborldBytes[2];
let index = 3;
if(isModern && tagValue >= 128) {
// FIXME: this assumes tag length <= 31 bytes; throw error if not
// cborldBytes[index + 1] is the header byte for the varint bytestring
const varintArrayLength = cborldBytes[index + 1] % 32;
// This sets `index` to the index of the first byte of the second
// array element in `cborldBytes`
index += varintArrayLength + 2;
}
index++; // advance to tag value
const {buffer, byteOffset, length} = cborldBytes;
const tagValue = cborldBytes[index];
let registryEntryId;
if(isModern) {
if(tagValue < 128) {
registryEntryId = tagValue;
// advance to encoded data
index++;
} else {
index++; // advance to array
// check for 2 element array
if(cborldBytes[index] !== 0x82) {
throw new CborldError(
'ERR_NOT_CBORLD',
'CBOR-LD large varint encoding error.');
}
index++; // advance to byte string tag
// first element is tail of varint encoded as byte string
// low 5 bits are byte string length (or exceptions for large values)
const varintArrayLength = cborldBytes[index] % 32;
// don't support unbounded lengths here
if(varintArrayLength >= 24) {
throw new CborldError(
'ERR_NOT_CBORLD',
'CBOR-LD encoded registryEntryId too large.');
}
// FIXME: check for bad 0 length
index++; // advance to byte string data
// create single buffer for id varint initial byte and tail bytes
const varintBytes = new Uint8Array(varintArrayLength + 1);
varintBytes[0] = tagValue;
const varintTailBytes = new Uint8Array(buffer, index, varintArrayLength);
varintBytes.set(varintTailBytes, 1);
// decode id from varint
registryEntryId = varint.decode(varintBytes);
// advance to second array element
index += varintArrayLength;
}
} else {
index++; // advance to tag value
}
const suffix = new Uint8Array(buffer, byteOffset + index, length - index);
return {suffix, isLegacy};
return {suffix, isLegacy, registryEntryId};
}

/**
Expand Down
71 changes: 39 additions & 32 deletions lib/encode.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@ const typeEncoders = {
* @param {number|string} [options.registryEntryId='legacy] - The registry
* entry ID for the registry entry associated with the resulting CBOR-LD
* payload. For legacy support, use registryEntryId = 'legacy'.
* @param {Map} options.typeTable - A map of possible value types, including
* @param {Map} [options.typeTable] - A map of possible value types, including
* `context`, `url`, `none`, and any JSON-LD type, each of which maps to
* another map of values of that type to their associated CBOR-LD integer
* values.
* @param {Function} [options.typeTableLoader] - The typeTable loader to use to
* resolve a registryEntryId to a typeTable.
* @param {diagnosticFunction} options.diagnose - A function that, if
* provided, is called with diagnostic information.
* @param {Map} options.appContextMap - For use with the legacy value of
Expand All @@ -46,11 +48,11 @@ const typeEncoders = {
export async function encode({
jsonldDocument, documentLoader, registryEntryId = 'legacy',
typeTable,
typeTableLoader,
diagnose,
appContextMap,
compressionMode
} = {}) {

// validate that an acceptable value for `registryEntryId` was passed
if(!((typeof registryEntryId === 'number' && registryEntryId > 0) ||
registryEntryId === 'legacy')) {
Expand Down Expand Up @@ -96,6 +98,10 @@ export async function encode({
// output uncompressed CBOR-LD
suffix = cborg.encode(jsonldDocument);
} else {
// lookup typeTable by id if needed
if(!isLegacy && !typeTable && typeTableLoader) {
typeTable = await typeTableLoader({registryEntryId});
}
const converter = _createConverter({
isLegacy,
typeTable,
Expand Down Expand Up @@ -125,27 +131,34 @@ export async function encode({
return bytes;
}

/**
* A diagnostic function that is called with diagnostic information. Typically
* set to `console.log` when debugging.
*
* @callback diagnosticFunction
* @param {string} message - The diagnostic message.
*/

function _getPrefix({isLegacy, compressionMode, registryEntryId}) {
if(isLegacy) {
return new Uint8Array([0xd9, 0x05, compressionMode]);
return new Uint8Array([
0xd9, // CBOR major type 6 + 2 byte tag size
0x05, // legacy CBOR-LD tag
compressionMode // compression flag
]);
}
const {
varintTagValue, varintByteValue
} = _getVarintStructure(registryEntryId);
if(varintByteValue) {
// Define varintByteValue as first element in 2 element array
// `0x82` means "the following is a 2 element array"
return [...varintTagValue, 0x82, ...varintByteValue];
if(registryEntryId < 128) {
return new Uint8Array([
0xd9, // CBOR major type 6 + 2 byte tag size
0x06, // non-legacy CBOR-LD tag
registryEntryId // low-value type table id
// encoded document appended in caller
]);
}
return varintTagValue;
const idVarint = varint.encode(registryEntryId);

return new Uint8Array([
0xd9, // CBOR major type 6 + 2 byte tag size
0x06, // non-legacy CBOR-LD tag
idVarint[0],
...[
0x82, // 2 element array
...cborg.encode(Uint8Array.from(idVarint.slice(1)))
// encoded document appended as second element in caller
]
]);
}

function _createConverter({
Expand All @@ -166,16 +179,10 @@ function _createConverter({
});
}

function _getVarintStructure(registryEntryId) {
let varintTagValue;
let varintByteValue;
if(registryEntryId < 128) {
varintTagValue = new Uint8Array([0xd9, 0x06, registryEntryId]);
varintByteValue = null;
} else {
const varintArray = varint.encode(registryEntryId);
varintTagValue = new Uint8Array([0xd9, 0x06, varintArray[0]]);
varintByteValue = cborg.encode(Uint8Array.from(varintArray.slice(1)));
}
return {varintTagValue, varintByteValue};
}
/**
* A diagnostic function that is called with diagnostic information. Typically
* set to `console.log` when debugging.
*
* @callback diagnosticFunction
* @param {string} message - The diagnostic message.
*/
80 changes: 76 additions & 4 deletions tests/decode.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,32 +14,104 @@ import {
TYPE_TABLE,
} from '../lib/tables.js';

function _makeTypeTableLoader(entries) {
const typeTables = new Map(entries);
return async function({registryEntryId}) {
return typeTables.get(registryEntryId);
};
}

describe('cborld decode', () => {
it('should decode CBOR-LD bytes (direct type table)',
async () => {
const cborldBytes = new Uint8Array([0xd9, 0x06, 0x01, 0xa0]);
const jsonldDocument = await decode({
cborldBytes,
typeTable: new Map()
});
expect(jsonldDocument).deep.equal({});
});

it('should decode CBOR-LD bytes (type table loader)',
async () => {
const cborldBytes = new Uint8Array([0xd9, 0x06, 0x01, 0xa0]);
const jsonldDocument = await decode({
cborldBytes,
typeTableLoader: _makeTypeTableLoader([[0x01, new Map()]])
});
expect(jsonldDocument).deep.equal({});
});

it('should fail to decode with no typeTable or typeTableLoader',
async () => {
const cborldBytes = new Uint8Array([0xd9, 0x06, 0x01, 0xa0]);
let result;
let error;
try {
result = await decode({
cborldBytes
});
} catch(e) {
error = e;
}
expect(result).to.eql(undefined);
expect(error?.code).to.eql('ERR_NO_TYPETABLE');
});

it('should fail to decode with no typeTableLoader id',
async () => {
const cborldBytes = new Uint8Array([0xd9, 0x06, 0x01, 0xa0]);
let result;
let error;
try {
result = await decode({
cborldBytes,
typeTableLoader: _makeTypeTableLoader([])
});
} catch(e) {
error = e;
}
expect(result).to.eql(undefined);
expect(error?.code).to.eql('ERR_NO_TYPETABLE');
});

it('should decode empty document CBOR-LD bytes', async () => {
const cborldBytes = new Uint8Array([0xd9, 0x06, 0x01, 0xa0]);
const jsonldDocument = await decode({cborldBytes});
const jsonldDocument = await decode({
cborldBytes,
typeTableLoader: _makeTypeTableLoader([[0x01, new Map()]])
});
expect(jsonldDocument).deep.equal({});
});

it('should decode empty JSON-LD document bytes with varint', async () => {
const cborldBytes = new Uint8Array([0xd9, 0x06, 0x10, 0xa0]);
const jsonldDocument = await decode({cborldBytes});
const jsonldDocument = await decode({
cborldBytes,
typeTableLoader: _makeTypeTableLoader([[0x10, new Map()]])
});
expect(jsonldDocument).deep.equal({});
});

it('should decode empty JSON-LD document bytes with varint >1 byte',
async () => {
const cborldBytes = new Uint8Array(
[0xd9, 0x06, 0x80, 0x82, 0x41, 0x01, 0xa0]);
const jsonldDocument = await decode({cborldBytes});
const jsonldDocument = await decode({
cborldBytes,
typeTableLoader: _makeTypeTableLoader([[0x80, new Map()]])
});
expect(jsonldDocument).deep.equal({});
});

it('should decode an empty JSON-LD document with multiple byte varint',
async () => {
const cborldBytes = new Uint8Array(
[0xd9, 0x06, 0x80, 0x82, 0x44, 0x94, 0xeb, 0xdc, 0x03, 0xa0]);
const jsonldDocument = await decode({cborldBytes});
const jsonldDocument = await decode({
cborldBytes,
typeTableLoader: _makeTypeTableLoader([[1000000000, new Map()]])
});
expect(jsonldDocument).deep.equal({});
});

Expand Down

0 comments on commit 6f67640

Please sign in to comment.