From abb4afcba5459dd1e478e0c254226c50f2909383 Mon Sep 17 00:00:00 2001 From: Zeke Gabrielse Date: Wed, 10 Jul 2024 21:45:07 -0500 Subject: [PATCH] add comment on export format --- lib/keygen/exporter/v1/exporter.rb | 50 ++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/lib/keygen/exporter/v1/exporter.rb b/lib/keygen/exporter/v1/exporter.rb index 3c755497a..335450a94 100644 --- a/lib/keygen/exporter/v1/exporter.rb +++ b/lib/keygen/exporter/v1/exporter.rb @@ -6,6 +6,56 @@ module Keygen module Exporter module V1 + ## + # The export format consists of: + # + # 1. A leading byte indicating the version of the export format. Right + # now this is always 1, but can be used in the future if we need + # to make changes to the export format. + # 2. A series of chunks, each prefixed by an 8-byte integer, indicating + # the size of the chunk. Each chunk represents packed, optionally + # encrypted, and compressed data. + # + # Chunks are represented with the following structure: + # + # - 8 bytes: size of the chunk data. + # - n bytes: the chunk data. + # + # Chunk data consists of a serialized array of: + # + # - A class name, representing the shared class of the packed records. + # - An array of attribute hashes, representing individual records. + # + # Note on serialization: + # + # - Records are converted into a hash via #attributes_for_export. + # - Chunked and packed with MessagePack. + # - Encrypted with AES-256-GCM, if a secret key is provided. + # - Compressed with zlib. + # + # Note on encryption: + # + # - Each chunk is encrypted individually so that the export can be + # piped, e.g. to stdout or a file, with a low memory footprint. + # + # Note on order: + # + # - The first chunk will always be the exported account. + # - The next chunks will be batches of associations. Right now, + # we export in batches of 1,000. + # + # Example export: + # + # [...] + # | | | | | + # 1 byte 8 bytes n bytes 8 bytes n bytes + # + # Example chunk: + # + # ['License', [{ ... }, { ... }, ...]] + # | | | + # class attrs attrs + # class Exporter BATCH_SIZE = 1_000