From e46d028aa6c07c62f6bd6df420ba9fb6ca779989 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sun, 25 Feb 2024 22:13:36 +0100 Subject: [PATCH 1/8] Chacha20-Poly1305: use string instead of cstruct Performance improvement from 8MB/s to 20MB/s (with 16 byte blocks, on my laptop) --- bench/speed.ml | 25 +++++++++++-- src/chacha20.ml | 75 ++++++++++++++++++++----------------- src/mirage_crypto.mli | 20 ++++++---- src/native.ml | 10 +++-- src/native/chacha.c | 34 +++++------------ src/native/chacha_generic.c | 22 ++--------- src/native/mirage_crypto.h | 3 ++ src/native/misc.c | 8 +++- src/native/misc_sse.c | 10 ++++- src/native/poly1305-donna.c | 12 +++--- src/poly1305.ml | 38 +++++++++++-------- tests/test_cipher.ml | 22 +++++------ tests/test_common.ml | 6 +++ 13 files changed, 160 insertions(+), 125 deletions(-) diff --git a/bench/speed.ml b/bench/speed.ml index 21d24348..03946065 100644 --- a/bench/speed.ml +++ b/bench/speed.ml @@ -22,7 +22,7 @@ module Time = struct end -let burn_period = 2.0 +let burn_period = 3.0 let sizes = [16; 64; 256; 1024; 8192] (* let sizes = [16] *) @@ -38,6 +38,17 @@ let burn f n = let time = Time.time ~n:iters f cs in (iters, time, float (n * iters) /. time) +let burn_str f n = + let cs = Cstruct.to_string (Mirage_crypto_rng.generate n) in + let (t1, i1) = + let rec loop it = + let t = Time.time ~n:it f cs in + if t > 0.2 then (t, it) else loop (it * 10) in + loop 10 in + let iters = int_of_float (float i1 *. burn_period /. t1) in + let time = Time.time ~n:iters f cs in + (iters, time, float (n * iters) /. time) + let mb = 1024. *. 1024. let throughput title f = @@ -48,6 +59,14 @@ let throughput title f = Printf.printf " % 5d: %04f MB/s (%d iters in %.03f s)\n%!" size (bw /. mb) iters time +let throughput_str title f = + Printf.printf "\n* [%s]\n%!" title ; + sizes |> List.iter @@ fun size -> + Gc.full_major () ; + let (iters, time, bw) = burn_str f size in + Printf.printf " % 5d: %04f MB/s (%d iters in %.03f s)\n%!" + size (bw /. mb) iters time + let count_period = 10. let count f n = @@ -370,8 +389,8 @@ let benchmarks = [ bm "chacha20-poly1305" (fun name -> let key = Mirage_crypto.Chacha20.of_secret (Mirage_crypto_rng.generate 32) - and nonce = Mirage_crypto_rng.generate 8 in - throughput name (Mirage_crypto.Chacha20.authenticate_encrypt ~key ~nonce)) ; + and nonce = Cstruct.to_string (Mirage_crypto_rng.generate 8) in + throughput_str name (Mirage_crypto.Chacha20.auth_enc_str ~key ~nonce)) ; bm "aes-128-ecb" (fun name -> let key = AES.ECB.of_secret (Mirage_crypto_rng.generate 16) in diff --git a/src/chacha20.ml b/src/chacha20.ml index b7d844db..b7040c06 100644 --- a/src/chacha20.ml +++ b/src/chacha20.ml @@ -4,23 +4,23 @@ open Uncommon let block = 64 -type key = Cstruct.t +type key = string -let of_secret a = a +let of_secret a = Cstruct.to_string a let chacha20_block state idx key_stream = - Native.Chacha.round 10 state.Cstruct.buffer 0 key_stream.Cstruct.buffer idx + Native.Chacha.round 10 state key_stream idx let init ctr ~key ~nonce = let ctr_off = 48 in - let set_ctr32 b v = Cstruct.LE.set_uint32 b ctr_off v - and set_ctr64 b v = Cstruct.LE.set_uint64 b ctr_off v + let set_ctr32 b v = Bytes.set_int32_le b ctr_off v + and set_ctr64 b v = Bytes.set_int64_le b ctr_off v in - let inc32 b = set_ctr32 b (Int32.add (Cstruct.LE.get_uint32 b ctr_off) 1l) - and inc64 b = set_ctr64 b (Int64.add (Cstruct.LE.get_uint64 b ctr_off) 1L) + let inc32 b = set_ctr32 b (Int32.add (Bytes.get_int32_le b ctr_off) 1l) + and inc64 b = set_ctr64 b (Int64.add (Bytes.get_int64_le b ctr_off) 1L) in let s, key, init_ctr, nonce_off, inc = - match Cstruct.length key, Cstruct.length nonce, Int64.shift_right ctr 32 = 0L with + match String.length key, String.length nonce, Int64.shift_right ctr 32 = 0L with | 32, 12, true -> let ctr = Int64.to_int32 ctr in "expand 32-byte k", key, (fun b -> set_ctr32 b ctr), 52, inc32 @@ -29,81 +29,82 @@ let init ctr ~key ~nonce = | 32, 8, _ -> "expand 32-byte k", key, (fun b -> set_ctr64 b ctr), 56, inc64 | 16, 8, _ -> - let k = Cstruct.append key key in + let k = key ^ key in "expand 16-byte k", k, (fun b -> set_ctr64 b ctr), 56, inc64 | _ -> invalid_arg "Valid parameters are nonce 12 bytes and key 32 bytes \ (counter 32 bit), or nonce 8 byte and key 16 or 32 \ bytes (counter 64 bit)." in - let state = Cstruct.create block in - Cstruct.blit_from_string s 0 state 0 16 ; - Cstruct.blit key 0 state 16 32 ; + let state = Bytes.create block in + Bytes.blit_string s 0 state 0 16 ; + Bytes.blit_string key 0 state 16 32 ; init_ctr state ; - Cstruct.blit nonce 0 state nonce_off (Cstruct.length nonce) ; + Bytes.blit_string nonce 0 state nonce_off (String.length nonce) ; state, inc let crypt ~key ~nonce ?(ctr = 0L) data = let state, inc = init ctr ~key ~nonce in - let l = Cstruct.length data in + let l = String.length data in let block_count = l // block in let len = block * block_count in let last_len = let last = l mod block in if last = 0 then block else last in - let key_stream = Cstruct.create_unsafe len in + let key_stream = Bytes.create len in let rec loop i = function | 0 -> () | 1 -> chacha20_block state i key_stream ; - Native.xor_into data.buffer (data.off + i) key_stream.buffer i last_len + Native.xor_into_bytes data i key_stream i last_len | n -> chacha20_block state i key_stream ; - Native.xor_into data.buffer (data.off + i) key_stream.buffer i block ; + Native.xor_into_bytes data i key_stream i block ; inc state; loop (i + block) (n - 1) in loop 0 block_count ; - Cstruct.sub key_stream 0 l + let res = Bytes.unsafe_to_string key_stream in + if l <> len then String.sub res 0 l else res module P = Poly1305.It let generate_poly1305_key ~key ~nonce = - crypt ~key ~nonce (Cstruct.create 32) + crypt ~key ~nonce (String.make 32 '\000') let mac ~key ~adata ciphertext = let pad16 b = - let len = Cstruct.length b mod 16 in - if len = 0 then Cstruct.empty else Cstruct.create (16 - len) + let len = String.length b mod 16 in + if len = 0 then "" else String.make (16 - len) '\000' and len = - let data = Cstruct.create 16 in - Cstruct.LE.set_uint64 data 0 (Int64.of_int (Cstruct.length adata)); - Cstruct.LE.set_uint64 data 8 (Int64.of_int (Cstruct.length ciphertext)); - data + let data = Bytes.create 16 in + Bytes.set_int64_le data 0 (Int64.of_int (String.length adata)); + Bytes.set_int64_le data 8 (Int64.of_int (String.length ciphertext)); + Bytes.unsafe_to_string data in - let ctx = P.empty ~key in - let ctx = P.feed ctx adata in - let ctx = P.feed ctx (pad16 adata) in - let ctx = P.feed ctx ciphertext in - let ctx = P.feed ctx (pad16 ciphertext) in - let ctx = P.feed ctx len in - P.get ctx + P.macl ~key [ adata ; pad16 adata ; ciphertext ; pad16 ciphertext ; len ] let authenticate_encrypt_tag ~key ~nonce ?(adata = Cstruct.empty) data = + let adata = Cstruct.to_string adata in + let nonce = Cstruct.to_string nonce in + let data = Cstruct.to_string data in let poly1305_key = generate_poly1305_key ~key ~nonce in let ciphertext = crypt ~key ~nonce ~ctr:1L data in let mac = mac ~key:poly1305_key ~adata ciphertext in - ciphertext, mac + Cstruct.of_string ciphertext, Cstruct.of_string mac let authenticate_encrypt ~key ~nonce ?adata data = let cdata, ctag = authenticate_encrypt_tag ~key ~nonce ?adata data in Cstruct.append cdata ctag let authenticate_decrypt_tag ~key ~nonce ?(adata = Cstruct.empty) ~tag data = + let adata = Cstruct.to_string adata in + let nonce = Cstruct.to_string nonce in + let data = Cstruct.to_string data in let poly1305_key = generate_poly1305_key ~key ~nonce in let ctag = mac ~key:poly1305_key ~adata data in let plain = crypt ~key ~nonce ~ctr:1L data in - if Eqaf_cstruct.equal tag ctag then Some plain else None + if Eqaf_cstruct.equal tag (Cstruct.of_string ctag) then Some (Cstruct.of_string plain) else None let authenticate_decrypt ~key ~nonce ?adata data = if Cstruct.length data < P.mac_size then @@ -112,4 +113,10 @@ let authenticate_decrypt ~key ~nonce ?adata data = let cipher, tag = Cstruct.split data (Cstruct.length data - P.mac_size) in authenticate_decrypt_tag ~key ~nonce ?adata ~tag cipher +let auth_enc_str ~key ~nonce ?(adata = "") data = + let poly1305_key = generate_poly1305_key ~key ~nonce in + let ciphertext = crypt ~key ~nonce ~ctr:1L data in + let mac = mac ~key:poly1305_key ~adata ciphertext in + ciphertext ^ mac + let tag_size = P.mac_size diff --git a/src/mirage_crypto.mli b/src/mirage_crypto.mli index 2cc4857d..42d6e36a 100644 --- a/src/mirage_crypto.mli +++ b/src/mirage_crypto.mli @@ -204,7 +204,7 @@ end (** The poly1305 message authentication code *) module Poly1305 : sig - type mac = Cstruct.t + type mac = string type 'a iter = ('a -> unit) -> unit @@ -214,27 +214,30 @@ module Poly1305 : sig val mac_size : int (** [mac_size] is the size of the output. *) - val empty : key:Cstruct.t -> t + val empty : key:string -> t (** [empty] is the empty context with the given [key]. @raise Invalid_argument if key is not 32 bytes. *) - val feed : t -> Cstruct.t -> t + val feed : t -> string -> t (** [feed t msg] adds the information in [msg] to [t]. *) - val feedi : t -> Cstruct.t iter -> t + val feedi : t -> string iter -> t (** [feedi t iter] feeds iter into [t]. *) val get : t -> mac (** [get t] is the mac corresponding to [t]. *) - val mac : key:Cstruct.t -> Cstruct.t -> mac + val mac : key:string -> string -> mac (** [mac ~key msg] is the all-in-one mac computation: [get (feed (empty ~key) msg)]. *) - val maci : key:Cstruct.t -> Cstruct.t iter -> mac + val maci : key:string -> string iter -> mac (** [maci ~key iter] is the all-in-one mac computation: [get (feedi (empty ~key) iter)]. *) + + val macl : key:string -> string list -> mac + (** [macl ~key datas] computes the [mac] of [datas]. *) end (** {1 Symmetric-key cryptography} *) @@ -506,7 +509,7 @@ end module Chacha20 : sig include AEAD - val crypt : key:key -> nonce:Cstruct.t -> ?ctr:int64 -> Cstruct.t -> Cstruct.t + val crypt : key:key -> nonce:string -> ?ctr:int64 -> string -> string (** [crypt ~key ~nonce ~ctr data] generates a ChaCha20 key stream using the [key], and [nonce]. The [ctr] defaults to 0. The generated key stream is of the same length as [data], and the output is the XOR @@ -520,6 +523,9 @@ module Chacha20 : sig IETF mode (and counter fit into 32 bits), or [key] must be either 16 bytes or 32 bytes and [nonce] 8 bytes. *) + + val auth_enc_str : key:key -> nonce:string -> ?adata:string -> + string -> string end (** Streaming ciphers. *) diff --git a/src/native.ml b/src/native.ml index 152008a2..75ae38f1 100644 --- a/src/native.ml +++ b/src/native.ml @@ -31,13 +31,13 @@ module DES = struct end module Chacha = struct - external round : int -> buffer -> off -> buffer -> off -> unit = "mc_chacha_round" [@@noalloc] + external round : int -> bytes -> bytes -> off -> unit = "mc_chacha_round" [@@noalloc] end module Poly1305 = struct - external init : ctx -> buffer -> off -> unit = "mc_poly1305_init" [@@noalloc] - external update : ctx -> buffer -> off -> size -> unit = "mc_poly1305_update" [@@noalloc] - external finalize : ctx -> buffer -> off -> unit = "mc_poly1305_finalize" [@@noalloc] + external init : ctx -> string -> unit = "mc_poly1305_init" [@@noalloc] + external update : ctx -> string -> size -> unit = "mc_poly1305_update" [@@noalloc] + external finalize : ctx -> bytes -> unit = "mc_poly1305_finalize" [@@noalloc] external ctx_size : unit -> int = "mc_poly1305_ctx_size" [@@noalloc] external mac_size : unit -> int = "mc_poly1305_mac_size" [@@noalloc] end @@ -95,6 +95,8 @@ end * Unsolved: bounds-checked XORs are slowing things down considerably... *) external xor_into : buffer -> off -> buffer -> off -> size -> unit = "mc_xor_into" [@@noalloc] +external xor_into_bytes : string -> off -> bytes -> off -> size -> unit = "mc_xor_into_bytes" [@@noalloc] + external count8be : bytes -> buffer -> off -> blocks:size -> unit = "mc_count_8_be" [@@noalloc] external count16be : bytes -> buffer -> off -> blocks:size -> unit = "mc_count_16_be" [@@noalloc] external count16be4 : bytes -> buffer -> off -> blocks:size -> unit = "mc_count_16_be_4" [@@noalloc] diff --git a/src/native/chacha.c b/src/native/chacha.c index 14925e95..1f9b76b2 100644 --- a/src/native/chacha.c +++ b/src/native/chacha.c @@ -2,7 +2,7 @@ #include "mirage_crypto.h" -extern void mc_chacha_core_generic(int count, uint8_t *src, uint8_t *dst); +extern void mc_chacha_core_generic(int count, const uint32_t *src, uint32_t *dst); #ifdef __mc_ACCELERATE__ @@ -13,24 +13,10 @@ static inline void mc_chacha_quarterround(uint32_t *x, int a, int b, int c, int x[c] += x[d]; x[b] = rol32(x[b] ^ x[c], 7); } -static inline uint32_t mc_get_u32_le(uint8_t *input, int offset) { - return input[offset] - | (input[offset + 1] << 8) - | (input[offset + 2] << 16) - | (input[offset + 3] << 24); -} - -static inline void mc_set_u32_le(uint8_t *input, int offset, uint32_t value) { - input[offset] = (uint8_t) value; - input[offset + 1] = (uint8_t) (value >> 8); - input[offset + 2] = (uint8_t) (value >> 16); - input[offset + 3] = (uint8_t) (value >> 24); -} - -static void mc_chacha_core(int count, uint8_t *src, uint8_t *dst) { +static void mc_chacha_core(int count, const uint32_t *src, uint32_t *dst) { uint32_t x[16]; for (int i = 0; i < 16; i++) { - x[i] = mc_get_u32_le(src, i * 4); + x[i] = src[i]; } for (int i = 0; i < count; i++) { mc_chacha_quarterround(x, 0, 4, 8, 12); @@ -45,26 +31,26 @@ static void mc_chacha_core(int count, uint8_t *src, uint8_t *dst) { } for (int i = 0; i < 16; i++) { uint32_t xi = x[i]; - uint32_t hj = mc_get_u32_le(src, i * 4); - mc_set_u32_le(dst, i * 4, xi + hj); + uint32_t hj = src[i]; + dst[i] = xi + hj; } } CAMLprim value -mc_chacha_round(value count, value src, value off1, value dst, value off2) +mc_chacha_round(value count, value src, value dst, value off) { _mc_switch_accel(ssse3, - mc_chacha_core_generic(Int_val(count), _ba_uint8_off(src, off1), _ba_uint8_off(dst, off2)), - mc_chacha_core(Int_val(count), _ba_uint8_off(src, off1), _ba_uint8_off(dst, off2))); + mc_chacha_core_generic(Int_val(count), (const uint32_t *)(String_val(src)), (uint32_t *)(Bytes_val(dst) + Long_val(off))), + mc_chacha_core(Int_val(count), (const uint32_t *)(String_val(src)), (uint32_t *)(Bytes_val(dst) + Long_val(off)))); return Val_unit; } #else //#ifdef __mc_ACCELERATE__ CAMLprim value -mc_chacha_round(value count, value src, value off1, value dst, value off2) +mc_chacha_round(value count, value src, value dst, value off) { - mc_chacha_core_generic(Int_val(count), _ba_uint8_off(src, off1), _ba_uint8_off(dst, off2)); + mc_chacha_core_generic(Int_val(count), (const uint32_t *)(String_val(src)), (uint32_t *)(Bytes_val(dst) + Long_val(off))); return Val_unit; } diff --git a/src/native/chacha_generic.c b/src/native/chacha_generic.c index 11cd3c7e..56729eb0 100644 --- a/src/native/chacha_generic.c +++ b/src/native/chacha_generic.c @@ -9,24 +9,10 @@ static inline void mc_chacha_quarterround(uint32_t *x, int a, int b, int c, int x[c] += x[d]; x[b] = rol32(x[b] ^ x[c], 7); } -static inline uint32_t mc_get_u32_le(uint8_t *input, int offset) { - return input[offset] - | (input[offset + 1] << 8) - | (input[offset + 2] << 16) - | (input[offset + 3] << 24); -} - -static inline void mc_set_u32_le(uint8_t *input, int offset, uint32_t value) { - input[offset] = (uint8_t) value; - input[offset + 1] = (uint8_t) (value >> 8); - input[offset + 2] = (uint8_t) (value >> 16); - input[offset + 3] = (uint8_t) (value >> 24); -} - -void mc_chacha_core_generic(int count, uint8_t *src, uint8_t *dst) { +void mc_chacha_core_generic(int count, const uint32_t *src, uint32_t *dst) { uint32_t x[16]; for (int i = 0; i < 16; i++) { - x[i] = mc_get_u32_le(src, i * 4); + x[i] = src[i]; } for (int i = 0; i < count; i++) { mc_chacha_quarterround(x, 0, 4, 8, 12); @@ -41,8 +27,8 @@ void mc_chacha_core_generic(int count, uint8_t *src, uint8_t *dst) { } for (int i = 0; i < 16; i++) { uint32_t xi = x[i]; - uint32_t hj = mc_get_u32_le(src, i * 4); - mc_set_u32_le(dst, i * 4, xi + hj); + uint32_t hj = src[i]; + dst[i] = xi + hj; } } diff --git a/src/native/mirage_crypto.h b/src/native/mirage_crypto.h index 4e276432..7c70d4c2 100644 --- a/src/native/mirage_crypto.h +++ b/src/native/mirage_crypto.h @@ -115,6 +115,9 @@ mc_ghash_generic (value m, value hash, value src, value off, value len); CAMLprim value mc_xor_into_generic (value b1, value off1, value b2, value off2, value n); +CAMLprim value +mc_xor_into_bytes_generic (value b1, value off1, value b2, value off2, value n); + CAMLprim value mc_count_16_be_4_generic (value ctr, value dst, value off, value blocks); diff --git a/src/native/misc.c b/src/native/misc.c index 13918554..97083d42 100644 --- a/src/native/misc.c +++ b/src/native/misc.c @@ -1,6 +1,6 @@ #include "mirage_crypto.h" -static inline void xor_into (uint8_t *src, uint8_t *dst, size_t n) { +static inline void xor_into (const uint8_t *src, uint8_t *dst, size_t n) { /* see issue #70 #81 for alignment considerations (memcpy used below) */ #ifdef ARCH_64BIT uint64_t s; @@ -59,6 +59,12 @@ mc_xor_into_generic (value b1, value off1, value b2, value off2, value n) { return Val_unit; } +CAMLprim value +mc_xor_into_bytes_generic (value b1, value off1, value b2, value off2, value n) { + xor_into (_st_uint8 (b1) + Long_val(off1), Bytes_val (b2) + Long_val(off2), Int_val (n)); + return Val_unit; +} + #define __export_counter(name, f) \ CAMLprim value name (value ctr, value dst, value off, value blocks) { \ f ( (uint64_t*) Bp_val (ctr), \ diff --git a/src/native/misc_sse.c b/src/native/misc_sse.c index 17be5109..fe322745 100644 --- a/src/native/misc_sse.c +++ b/src/native/misc_sse.c @@ -2,7 +2,7 @@ #ifdef __mc_ACCELERATE__ -static inline void xor_into (uint8_t *src, uint8_t *dst, size_t n) { +static inline void xor_into (const uint8_t *src, uint8_t *dst, size_t n) { /* see issue #70 #81 for alignment considerations (memcpy used below) */ #ifdef ARCH_64BIT __m128i r; @@ -47,6 +47,14 @@ mc_xor_into (value b1, value off1, value b2, value off2, value n) { return Val_unit; } +CAMLprim value +mc_xor_into_bytes (value b1, value off1, value b2, value off2, value n) { + _mc_switch_accel(ssse3, + mc_xor_into_bytes_generic(b1, off1, b2, off2, n), + xor_into (_st_uint8 (b1) + Long_val(off1), Bytes_val (b2) + Long_val(off2), Int_val (n))) + return Val_unit; +} + #define __export_counter(name, f) \ CAMLprim value name (value ctr, value dst, value off, value blocks) { \ _mc_switch_accel(ssse3, \ diff --git a/src/native/poly1305-donna.c b/src/native/poly1305-donna.c index aab3a139..567649ab 100644 --- a/src/native/poly1305-donna.c +++ b/src/native/poly1305-donna.c @@ -54,18 +54,18 @@ poly1305_update(poly1305_context *ctx, const unsigned char *m, size_t bytes) { } //stubs for OCaml -CAMLprim value mc_poly1305_init (value ctx, value key, value off) { - poly1305_init ((poly1305_context *) Bytes_val(ctx), _ba_uint8_off(key, off)); +CAMLprim value mc_poly1305_init (value ctx, value key) { + poly1305_init ((poly1305_context *) Bytes_val(ctx), _st_uint8(key)); return Val_unit; } -CAMLprim value mc_poly1305_update (value ctx, value buf, value off, value len) { - poly1305_update ((poly1305_context *) Bytes_val(ctx), _ba_uint8_off(buf, off), Int_val(len)); +CAMLprim value mc_poly1305_update (value ctx, value buf, value len) { + poly1305_update ((poly1305_context *) Bytes_val(ctx), _st_uint8(buf), Int_val(len)); return Val_unit; } -CAMLprim value mc_poly1305_finalize (value ctx, value mac, value off) { - poly1305_finish ((poly1305_context *) Bytes_val(ctx), _ba_uint8_off(mac, off)); +CAMLprim value mc_poly1305_finalize (value ctx, value mac) { + poly1305_finish ((poly1305_context *) Bytes_val(ctx), Bytes_val(mac)); return Val_unit; } diff --git a/src/poly1305.ml b/src/poly1305.ml index ac72d89c..08df20ab 100644 --- a/src/poly1305.ml +++ b/src/poly1305.ml @@ -1,21 +1,22 @@ module type S = sig - type mac = Cstruct.t + type mac = string type 'a iter = 'a Uncommon.iter type t val mac_size : int - val empty : key:Cstruct.t -> t - val feed : t -> Cstruct.t -> t - val feedi : t -> Cstruct.t iter -> t - val get : t -> Cstruct.t + val empty : key:string -> t + val feed : t -> string -> t + val feedi : t -> string iter -> t + val get : t -> string - val mac : key:Cstruct.t -> Cstruct.t -> mac - val maci : key:Cstruct.t -> Cstruct.t iter -> mac + val mac : key:string -> string -> mac + val maci : key:string -> string iter -> mac + val macl : key:string -> string list -> mac end module It : S = struct - type mac = Cstruct.t + type mac = string type 'a iter = 'a Uncommon.iter module P = Native.Poly1305 @@ -25,14 +26,14 @@ module It : S = struct let dup = Bytes.copy - let empty ~key:{ Cstruct.buffer ; off ; len } = + let empty ~key = let ctx = Bytes.create (P.ctx_size ()) in - if len <> 32 then invalid_arg "Poly1305 key must be 32 bytes" ; - P.init ctx buffer off ; + if String.length key <> 32 then invalid_arg "Poly1305 key must be 32 bytes" ; + P.init ctx key ; ctx - let update ctx { Cstruct.buffer ; off ; len } = - P.update ctx buffer off len + let update ctx data = + P.update ctx data (String.length data) let feed ctx cs = let t = dup ctx in @@ -45,13 +46,18 @@ module It : S = struct t let final ctx = - let res = Cstruct.create mac_size in - P.finalize ctx res.buffer res.off; - res + let res = Bytes.create mac_size in + P.finalize ctx res; + Bytes.unsafe_to_string res let get ctx = final (dup ctx) let mac ~key data = feed (empty ~key) data |> final let maci ~key iter = feedi (empty ~key) iter |> final + + let macl ~key datas = + let ctx = empty ~key in + List.iter (update ctx) datas; + final ctx end diff --git a/tests/test_cipher.ml b/tests/test_cipher.ml index f208c23a..8bae772f 100644 --- a/tests/test_cipher.ml +++ b/tests/test_cipher.ml @@ -429,14 +429,14 @@ let gcm_regressions = let chacha20_cases = - let case msg ?ctr ~key ~nonce ?(input = Cstruct.create 128) output = + let case msg ?ctr ~key ~nonce ?(input = String.make 128 '\000') output = let key = Chacha20.of_secret (vx key) - and nonce = vx nonce - and output = vx output + and nonce = vx_str nonce + and output = vx_str output in - assert_cs_equal ~msg (Chacha20.crypt ~key ~nonce ?ctr input) output + assert_str_equal ~msg (Chacha20.crypt ~key ~nonce ?ctr input) output in - let rfc8439_input = Cstruct.of_string "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it." in + let rfc8439_input = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it." in let rfc8439_test_2_4_2 _ = let key = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f" and nonce = "000000000000004a00000000" @@ -467,13 +467,13 @@ let chacha20_cases = 1a e1 0b 59 4f 09 e2 6a 7e 90 2e cb d0 60 06 91|} in assert_cs_equal ~msg:"Chacha20/Poly1305 RFC 8439 2.8.2 encrypt" - (Chacha20.authenticate_encrypt ~key ~nonce ~adata rfc8439_input) + (Chacha20.authenticate_encrypt ~key ~nonce ~adata (Cstruct.of_string rfc8439_input)) output; assert_cs_equal ~msg:"Chacha20/Poly1305 RFC 8439 2.8.2 decrypt" (match Chacha20.authenticate_decrypt ~key ~nonce ~adata output with | Some cs -> cs | None -> assert_failure "Chacha20/poly1305 decryption broken") - rfc8439_input; - let input = Cstruct.(shift (append (create 16) rfc8439_input) 16) in + (Cstruct.of_string rfc8439_input); + let input = Cstruct.(shift (append (create 16) (Cstruct.of_string rfc8439_input)) 16) in assert_cs_equal ~msg:"Chacha20/Poly1305 RFC 8439 2.8.2 encrypt 2" (Chacha20.authenticate_encrypt ~key ~nonce ~adata input) output; @@ -682,12 +682,12 @@ let chacha20_cases = ] let poly1305_rfc8439_2_5_2 _ = - let key = vx "85d6be7857556d337f4452fe42d506a80103808afb0db2fd4abff6af4149f51b" - and data = Cstruct.of_string "Cryptographic Forum Research Group" + let key = Cstruct.to_string (vx "85d6be7857556d337f4452fe42d506a80103808afb0db2fd4abff6af4149f51b") + and data = "Cryptographic Forum Research Group" and output = vx "a8061dc1305136c6c22b8baf0c0127a9" in assert_cs_equal ~msg:"poly 1305 RFC8439 Section 2.5.2" - (Poly1305.mac ~key data) output + (Cstruct.of_string (Poly1305.mac ~key data)) output let empty_cases _ = let open Cipher_block in diff --git a/tests/test_common.ml b/tests/test_common.ml index 972c0743..2ea5349e 100644 --- a/tests/test_common.ml +++ b/tests/test_common.ml @@ -27,6 +27,10 @@ let assert_cs_equal ?msg = assert_equal ~cmp:Cstruct.equal ?msg ~pp_diff:(pp_diff Cstruct.hexdump_pp) +let assert_str_equal ?msg = + assert_equal ~cmp:String.equal ?msg + ~pp_diff:(fun ppf (a, b) -> pp_diff Cstruct.hexdump_pp ppf (Cstruct.of_string a, Cstruct.of_string b)) + let iter_list xs f = List.iter f xs let cases_of f = @@ -36,6 +40,8 @@ let any _ = true let vx = Cstruct.of_hex +let vx_str data = Cstruct.to_string (Cstruct.of_hex data) + let f1_eq ?msg f (a, b) _ = assert_cs_equal ?msg (f (vx a)) (vx b) From 61303844aa0b9be091c171b87e14e9c4d934a4fe Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sun, 25 Feb 2024 22:48:06 +0100 Subject: [PATCH 2/8] fix --- src/native/bitfn.h | 8 ++++---- src/native/chacha.c | 6 ++---- src/native/chacha_generic.c | 6 ++---- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/native/bitfn.h b/src/native/bitfn.h index 000748a4..33e33df5 100644 --- a/src/native/bitfn.h +++ b/src/native/bitfn.h @@ -99,24 +99,24 @@ static inline uint64_t ror64(uint64_t word, uint32_t shift) return (word >> shift) | (word << (64 - shift)); } -static inline void array_swap32(uint32_t *d, uint32_t *s, uint32_t nb) +static inline void array_swap32(uint32_t *d, const uint32_t *s, uint32_t nb) { while (nb--) *d++ = bitfn_swap32(*s++); } -static inline void array_swap64(uint64_t *d, uint64_t *s, uint32_t nb) +static inline void array_swap64(uint64_t *d, const uint64_t *s, uint32_t nb) { while (nb--) *d++ = bitfn_swap64(*s++); } -static inline void array_copy32(uint32_t *d, uint32_t *s, uint32_t nb) +static inline void array_copy32(uint32_t *d, const uint32_t *s, uint32_t nb) { while (nb--) *d++ = *s++; } -static inline void array_copy64(uint64_t *d, uint64_t *s, uint32_t nb) +static inline void array_copy64(uint64_t *d, const uint64_t *s, uint32_t nb) { while (nb--) *d++ = *s++; } diff --git a/src/native/chacha.c b/src/native/chacha.c index 1f9b76b2..f640c552 100644 --- a/src/native/chacha.c +++ b/src/native/chacha.c @@ -15,9 +15,7 @@ static inline void mc_chacha_quarterround(uint32_t *x, int a, int b, int c, int static void mc_chacha_core(int count, const uint32_t *src, uint32_t *dst) { uint32_t x[16]; - for (int i = 0; i < 16; i++) { - x[i] = src[i]; - } + cpu_to_le32_array(x, src, 16); for (int i = 0; i < count; i++) { mc_chacha_quarterround(x, 0, 4, 8, 12); mc_chacha_quarterround(x, 1, 5, 9, 13); @@ -32,7 +30,7 @@ static void mc_chacha_core(int count, const uint32_t *src, uint32_t *dst) { for (int i = 0; i < 16; i++) { uint32_t xi = x[i]; uint32_t hj = src[i]; - dst[i] = xi + hj; + dst[i] = le32_to_cpu(xi + hj); } } diff --git a/src/native/chacha_generic.c b/src/native/chacha_generic.c index 56729eb0..2f78d1d0 100644 --- a/src/native/chacha_generic.c +++ b/src/native/chacha_generic.c @@ -11,9 +11,7 @@ static inline void mc_chacha_quarterround(uint32_t *x, int a, int b, int c, int void mc_chacha_core_generic(int count, const uint32_t *src, uint32_t *dst) { uint32_t x[16]; - for (int i = 0; i < 16; i++) { - x[i] = src[i]; - } + cpu_to_le32_array(x, src, 16); for (int i = 0; i < count; i++) { mc_chacha_quarterround(x, 0, 4, 8, 12); mc_chacha_quarterround(x, 1, 5, 9, 13); @@ -28,7 +26,7 @@ void mc_chacha_core_generic(int count, const uint32_t *src, uint32_t *dst) { for (int i = 0; i < 16; i++) { uint32_t xi = x[i]; uint32_t hj = src[i]; - dst[i] = xi + hj; + dst[i] = le32_to_cpu(xi + hj); } } From b03bb11853fb619c3c3e330fbda077bc6449db2c Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sun, 25 Feb 2024 22:54:36 +0100 Subject: [PATCH 3/8] . --- src/native/chacha.c | 2 +- src/native/chacha_generic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/native/chacha.c b/src/native/chacha.c index f640c552..fc99fdf0 100644 --- a/src/native/chacha.c +++ b/src/native/chacha.c @@ -29,7 +29,7 @@ static void mc_chacha_core(int count, const uint32_t *src, uint32_t *dst) { } for (int i = 0; i < 16; i++) { uint32_t xi = x[i]; - uint32_t hj = src[i]; + uint32_t hj = cpu_to_le32(src[i]); dst[i] = le32_to_cpu(xi + hj); } } diff --git a/src/native/chacha_generic.c b/src/native/chacha_generic.c index 2f78d1d0..030b7807 100644 --- a/src/native/chacha_generic.c +++ b/src/native/chacha_generic.c @@ -25,7 +25,7 @@ void mc_chacha_core_generic(int count, const uint32_t *src, uint32_t *dst) { } for (int i = 0; i < 16; i++) { uint32_t xi = x[i]; - uint32_t hj = src[i]; + uint32_t hj = cpu_to_le32(src[i]); dst[i] = le32_to_cpu(xi + hj); } } From 1e1d1792dcf6568ec50b7fc5b8b11625484e23c2 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Tue, 27 Feb 2024 14:26:11 +0100 Subject: [PATCH 4/8] apply @reynir suggestions --- src/chacha20.ml | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/chacha20.ml b/src/chacha20.ml index b7040c06..e2fa21b3 100644 --- a/src/chacha20.ml +++ b/src/chacha20.ml @@ -36,36 +36,41 @@ let init ctr ~key ~nonce = bytes (counter 64 bit)." in let state = Bytes.create block in - Bytes.blit_string s 0 state 0 16 ; - Bytes.blit_string key 0 state 16 32 ; + Bytes.unsafe_blit_string s 0 state 0 16 ; + Bytes.unsafe_blit_string key 0 state 16 32 ; init_ctr state ; - Bytes.blit_string nonce 0 state nonce_off (String.length nonce) ; + Bytes.unsafe_blit_string nonce 0 state nonce_off (String.length nonce) ; state, inc let crypt ~key ~nonce ?(ctr = 0L) data = let state, inc = init ctr ~key ~nonce in let l = String.length data in let block_count = l // block in - let len = block * block_count in let last_len = let last = l mod block in if last = 0 then block else last in - let key_stream = Bytes.create len in + let res = Bytes.create l in let rec loop i = function | 0 -> () | 1 -> - chacha20_block state i key_stream ; - Native.xor_into_bytes data i key_stream i last_len + if last_len = block then begin + chacha20_block state i res ; + Native.xor_into_bytes data i res i block + end else begin + let buf = Bytes.create block in + chacha20_block state 0 buf ; + Native.xor_into_bytes data i buf 0 last_len ; + Bytes.unsafe_blit buf 0 res i last_len + end | n -> - chacha20_block state i key_stream ; - Native.xor_into_bytes data i key_stream i block ; + chacha20_block state i res ; + Native.xor_into_bytes data i res i block ; inc state; loop (i + block) (n - 1) in loop 0 block_count ; - let res = Bytes.unsafe_to_string key_stream in - if l <> len then String.sub res 0 l else res + Bytes.unsafe_to_string res module P = Poly1305.It From a68f3ee9634220c52990232803f18bad8025d2ca Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Wed, 28 Feb 2024 19:41:38 +0100 Subject: [PATCH 5/8] Bytes.unsafe_blit_string is only available from ocaml 4.09 on --- mirage-crypto.opam | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mirage-crypto.opam b/mirage-crypto.opam index d6ad51ce..28854561 100644 --- a/mirage-crypto.opam +++ b/mirage-crypto.opam @@ -13,7 +13,7 @@ build: [ ["dune" "subst"] {dev} ["dune" "runtest" "-p" name "-j" jobs] {with-test} ] depends: [ - "ocaml" {>= "4.08.0"} + "ocaml" {>= "4.09.0"} "dune" {>= "2.7"} "dune-configurator" {>= "2.0.0"} "ounit2" {with-test} From d5f6f374cbfee348779e7f544a08bb82cbe585f2 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Wed, 28 Feb 2024 20:32:56 +0100 Subject: [PATCH 6/8] remove 4.08 CI --- .github/workflows/test.yml | 2 +- .github/workflows/windows.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 47bc3899..2804d7e4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,7 +9,7 @@ jobs: strategy: fail-fast: false matrix: - ocaml-version: ["4.14.1", "4.13.1", "4.12.1", "4.11.2", "4.10.2", "4.09.1", "4.08.1"] + ocaml-version: ["4.14.1", "4.13.1", "4.12.1", "4.11.2", "4.10.2", "4.09.1"] operating-system: [macos-latest, ubuntu-latest] runs-on: ${{ matrix.operating-system }} diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 2c5216be..82d45e2b 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -9,7 +9,7 @@ jobs: strategy: fail-fast: false matrix: - ocaml-version: ["4.14.0", "4.13.1", "4.12.1", "4.11.2", "4.10.2", "4.09.1", "4.08.1"] + ocaml-version: ["4.14.0", "4.13.1", "4.12.1", "4.11.2", "4.10.2", "4.09.1"] operating-system: [windows-latest] runs-on: ${{ matrix.operating-system }} From db4a1d54992cd3859c4f4043e9f055e97287874e Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Wed, 28 Feb 2024 21:19:24 +0100 Subject: [PATCH 7/8] revert some changes --- bench/speed.ml | 25 +++---------------------- src/chacha20.ml | 6 ------ src/mirage_crypto.mli | 3 --- 3 files changed, 3 insertions(+), 31 deletions(-) diff --git a/bench/speed.ml b/bench/speed.ml index 03946065..21d24348 100644 --- a/bench/speed.ml +++ b/bench/speed.ml @@ -22,7 +22,7 @@ module Time = struct end -let burn_period = 3.0 +let burn_period = 2.0 let sizes = [16; 64; 256; 1024; 8192] (* let sizes = [16] *) @@ -38,17 +38,6 @@ let burn f n = let time = Time.time ~n:iters f cs in (iters, time, float (n * iters) /. time) -let burn_str f n = - let cs = Cstruct.to_string (Mirage_crypto_rng.generate n) in - let (t1, i1) = - let rec loop it = - let t = Time.time ~n:it f cs in - if t > 0.2 then (t, it) else loop (it * 10) in - loop 10 in - let iters = int_of_float (float i1 *. burn_period /. t1) in - let time = Time.time ~n:iters f cs in - (iters, time, float (n * iters) /. time) - let mb = 1024. *. 1024. let throughput title f = @@ -59,14 +48,6 @@ let throughput title f = Printf.printf " % 5d: %04f MB/s (%d iters in %.03f s)\n%!" size (bw /. mb) iters time -let throughput_str title f = - Printf.printf "\n* [%s]\n%!" title ; - sizes |> List.iter @@ fun size -> - Gc.full_major () ; - let (iters, time, bw) = burn_str f size in - Printf.printf " % 5d: %04f MB/s (%d iters in %.03f s)\n%!" - size (bw /. mb) iters time - let count_period = 10. let count f n = @@ -389,8 +370,8 @@ let benchmarks = [ bm "chacha20-poly1305" (fun name -> let key = Mirage_crypto.Chacha20.of_secret (Mirage_crypto_rng.generate 32) - and nonce = Cstruct.to_string (Mirage_crypto_rng.generate 8) in - throughput_str name (Mirage_crypto.Chacha20.auth_enc_str ~key ~nonce)) ; + and nonce = Mirage_crypto_rng.generate 8 in + throughput name (Mirage_crypto.Chacha20.authenticate_encrypt ~key ~nonce)) ; bm "aes-128-ecb" (fun name -> let key = AES.ECB.of_secret (Mirage_crypto_rng.generate 16) in diff --git a/src/chacha20.ml b/src/chacha20.ml index e2fa21b3..0e58b8cc 100644 --- a/src/chacha20.ml +++ b/src/chacha20.ml @@ -118,10 +118,4 @@ let authenticate_decrypt ~key ~nonce ?adata data = let cipher, tag = Cstruct.split data (Cstruct.length data - P.mac_size) in authenticate_decrypt_tag ~key ~nonce ?adata ~tag cipher -let auth_enc_str ~key ~nonce ?(adata = "") data = - let poly1305_key = generate_poly1305_key ~key ~nonce in - let ciphertext = crypt ~key ~nonce ~ctr:1L data in - let mac = mac ~key:poly1305_key ~adata ciphertext in - ciphertext ^ mac - let tag_size = P.mac_size diff --git a/src/mirage_crypto.mli b/src/mirage_crypto.mli index 42d6e36a..73987744 100644 --- a/src/mirage_crypto.mli +++ b/src/mirage_crypto.mli @@ -523,9 +523,6 @@ module Chacha20 : sig IETF mode (and counter fit into 32 bits), or [key] must be either 16 bytes or 32 bytes and [nonce] 8 bytes. *) - - val auth_enc_str : key:key -> nonce:string -> ?adata:string -> - string -> string end (** Streaming ciphers. *) From 3b3533c8ce62fe2c2a4c3bdf0d48d907d5f8cecd Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Wed, 28 Feb 2024 21:22:45 +0100 Subject: [PATCH 8/8] minor --- tests/test_cipher.ml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_cipher.ml b/tests/test_cipher.ml index 8bae772f..badfedab 100644 --- a/tests/test_cipher.ml +++ b/tests/test_cipher.ml @@ -682,12 +682,12 @@ let chacha20_cases = ] let poly1305_rfc8439_2_5_2 _ = - let key = Cstruct.to_string (vx "85d6be7857556d337f4452fe42d506a80103808afb0db2fd4abff6af4149f51b") + let key = vx_str "85d6be7857556d337f4452fe42d506a80103808afb0db2fd4abff6af4149f51b" and data = "Cryptographic Forum Research Group" - and output = vx "a8061dc1305136c6c22b8baf0c0127a9" + and output = vx_str "a8061dc1305136c6c22b8baf0c0127a9" in - assert_cs_equal ~msg:"poly 1305 RFC8439 Section 2.5.2" - (Cstruct.of_string (Poly1305.mac ~key data)) output + assert_str_equal ~msg:"poly 1305 RFC8439 Section 2.5.2" + (Poly1305.mac ~key data) output let empty_cases _ = let open Cipher_block in