-
Notifications
You must be signed in to change notification settings - Fork 854
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Zvk: Implement Zvknh[ab], NIST Suite: Vector SHA-2
Implement the instructions part of the Zvknha and Zvknhb sub-extensions: - vsha2ms.vv, message schedule - vsha2ch.vv / vsha2cl.vv, compression rounds A header files for common macros is added. Signed-off-by: Eric Gouriou <ego@rivosinc.com>
- Loading branch information
1 parent
fbd4ca2
commit 00873aa
Showing
5 changed files
with
348 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
// vsha2ch.vv vd, vs2, vs1 | ||
|
||
#include "zvknh_ext_macros.h" | ||
|
||
// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2. | ||
require_vsha2_common_constraints; | ||
|
||
switch (P.VU.vsew) { | ||
case e32: { | ||
require_vsha2_vsew32_constraints; | ||
|
||
VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( | ||
{}, | ||
{ | ||
// {c, d, g, h} <- vd | ||
EXTRACT_EGU32x4_WORDS_BE(vd, c, d, g, h); | ||
// {a, b, e, f} <- vs2 | ||
EXTRACT_EGU32x4_WORDS_BE(vs2, a, b, e, f); | ||
// {kw3, kw2, kw1, kw0} <- vs1. "kw" stands for K+W | ||
EXTRACT_EGU32x4_WORDS_BE(vs1, kw3, kw2, | ||
UNUSED _unused_kw1, UNUSED _unused_kw0); | ||
|
||
ZVK_SHA256_COMPRESS(a, b, c, d, e, f, g, h, kw2); | ||
ZVK_SHA256_COMPRESS(a, b, c, d, e, f, g, h, kw3); | ||
|
||
// Update the destination register, vd <- {a, b, e, f}. | ||
SET_EGU32x4_BE(vd, a, b, e, f); | ||
} | ||
); | ||
break; | ||
} | ||
|
||
case e64: { | ||
require_vsha2_vsew64_constraints; | ||
|
||
VI_ZVK_VD_VS1_VS2_EGU64x4_NOVM_LOOP( | ||
{}, | ||
{ | ||
// {c, d, g, h} <- vd | ||
EXTRACT_EGU64x4_WORDS_BE(vd, c, d, g, h); | ||
// {a, b, e, f} <- vs2 | ||
EXTRACT_EGU64x4_WORDS_BE(vs2, a, b, e, f); | ||
// {kw3, kw2, kw1, kw0} <- vs1. "kw" stands for K+W | ||
EXTRACT_EGU64x4_WORDS_BE(vs1, kw3, kw2, | ||
UNUSED _unused_kw1, UNUSED _unused_kw0); | ||
|
||
ZVK_SHA512_COMPRESS(a, b, c, d, e, f, g, h, kw2); | ||
ZVK_SHA512_COMPRESS(a, b, c, d, e, f, g, h, kw3); | ||
|
||
// Update the destination register, vd <- {a, b, e, f}. | ||
SET_EGU64x4_BE(vd, a, b, e, f); | ||
} | ||
); | ||
break; | ||
} | ||
|
||
// 'require_vsha2_common_constraints' ensures that | ||
// VSEW is either 32 or 64. | ||
default: | ||
require(false); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
// vsha2cl.vv vd, vs2, vs1 | ||
|
||
#include "zvknh_ext_macros.h" | ||
|
||
// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2. | ||
require_vsha2_common_constraints; | ||
|
||
switch (P.VU.vsew) { | ||
case e32: { | ||
require_vsha2_vsew32_constraints; | ||
|
||
VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( | ||
{}, | ||
{ | ||
// {c, d, g, h} <- vd | ||
EXTRACT_EGU32x4_WORDS_BE(vd, c, d, g, h); | ||
// {a, b, e, f} <- vs2 | ||
EXTRACT_EGU32x4_WORDS_BE(vs2, a, b, e, f); | ||
// {kw3, kw2, kw1, kw0} <- vs1. "kw" stands for K+W | ||
EXTRACT_EGU32x4_WORDS_BE(vs1, UNUSED _unused_kw3, UNUSED _unused_kw2, | ||
kw1, kw0); | ||
|
||
ZVK_SHA256_COMPRESS(a, b, c, d, e, f, g, h, kw0); | ||
ZVK_SHA256_COMPRESS(a, b, c, d, e, f, g, h, kw1); | ||
|
||
// Update the destination register, vd <- {a, b, e, f}. | ||
SET_EGU32x4_BE(vd, a, b, e, f); | ||
} | ||
); | ||
break; | ||
} | ||
|
||
case e64: { | ||
require_vsha2_vsew64_constraints; | ||
|
||
VI_ZVK_VD_VS1_VS2_EGU64x4_NOVM_LOOP( | ||
{}, | ||
{ | ||
// {c, d, g, h} <- vd | ||
EXTRACT_EGU64x4_WORDS_BE(vd, c, d, g, h); | ||
// {a, b, e, f} <- vs2 | ||
EXTRACT_EGU64x4_WORDS_BE(vs2, a, b, e, f); | ||
// {kw3, kw2, kw1, kw0} <- vs1. "kw" stands for K+W | ||
EXTRACT_EGU64x4_WORDS_BE(vs1, UNUSED _unused_kw3, UNUSED _unused_kw2, | ||
kw1, kw0); | ||
|
||
ZVK_SHA512_COMPRESS(a, b, c, d, e, f, g, h, kw0); | ||
ZVK_SHA512_COMPRESS(a, b, c, d, e, f, g, h, kw1); | ||
|
||
// Update the destination register, vd <- {a, b, e, f}. | ||
SET_EGU64x4_BE(vd, a, b, e, f); | ||
} | ||
); | ||
break; | ||
} | ||
|
||
// 'require_vsha2_common_constraints' ensures that | ||
// VSEW is either 32 or 64. | ||
default: | ||
require(false); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
// vshams.vv vd, vs2, vs1 | ||
|
||
#include "zvknh_ext_macros.h" | ||
|
||
// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2. | ||
require_vsha2_common_constraints; | ||
|
||
switch (P.VU.vsew) { | ||
case e32: { | ||
require_vsha2_vsew32_constraints; | ||
|
||
VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( | ||
{}, | ||
{ | ||
// {w3, w2, w1, w0} <- vd | ||
EXTRACT_EGU32x4_WORDS_BE(vd, w3, w2, w1, w0); | ||
// {w11, w10, w9, w4} <- vs2 | ||
EXTRACT_EGU32x4_WORDS_BE(vs2, w11, w10, w9, w4); | ||
// {w15, w14, w13, w12} <- vs1 | ||
EXTRACT_EGU32x4_WORDS_BE(vs1, w15, w14, UNUSED _unused_w13, w12); | ||
|
||
const uint32_t w16 = ZVK_SHA256_SCHEDULE(w14, w9, w1, w0); | ||
const uint32_t w17 = ZVK_SHA256_SCHEDULE(w15, w10, w2, w1); | ||
const uint32_t w18 = ZVK_SHA256_SCHEDULE(w16, w11, w3, w2); | ||
const uint32_t w19 = ZVK_SHA256_SCHEDULE(w17, w12, w4, w3); | ||
|
||
// Update the destination register. | ||
SET_EGU32x4_BE(vd, w19, w18, w17, w16);; | ||
} | ||
); | ||
break; | ||
} | ||
|
||
case e64: { | ||
require_vsha2_vsew64_constraints; | ||
|
||
VI_ZVK_VD_VS1_VS2_EGU64x4_NOVM_LOOP( | ||
{}, | ||
{ | ||
// {w3, w2, w1, w0} <- vd | ||
EXTRACT_EGU64x4_WORDS_BE(vd, w3, w2, w1, w0); | ||
// {w11, w10, w9, w4} <- vs2 | ||
EXTRACT_EGU64x4_WORDS_BE(vs2, w11, w10, w9, w4); | ||
// {w15, w14, w13, w12} <- vs1 | ||
EXTRACT_EGU64x4_WORDS_BE(vs1, w15, w14, UNUSED _unused_w13, w12); | ||
|
||
const uint64_t w16 = ZVK_SHA512_SCHEDULE(w14, w9, w1, w0); | ||
const uint64_t w17 = ZVK_SHA512_SCHEDULE(w15, w10, w2, w1); | ||
const uint64_t w18 = ZVK_SHA512_SCHEDULE(w16, w11, w3, w2); | ||
const uint64_t w19 = ZVK_SHA512_SCHEDULE(w17, w12, w4, w3); | ||
|
||
// Update the destination register. | ||
SET_EGU64x4_BE(vd, w19, w18, w17, w16);; | ||
} | ||
); | ||
break; | ||
} | ||
|
||
// 'require_vsha2_common_constraints' ensures that | ||
// VSEW is either 32 or 64. | ||
default: | ||
require(false); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
// Helper macros to help implement instructions defined as part of | ||
// the RISC-V Zvknh[ab] extensions (vector SHA-256/SHA-512 cryptography). | ||
|
||
#include "zvk_ext_macros.h" | ||
|
||
#ifndef RISCV_ZVKNH_EXT_MACROS_H_ | ||
#define RISCV_ZVKNH_EXT_MACROS_H_ | ||
|
||
// Constraints common to all vsha* instructions, across all VSEW: | ||
// - VSEW is 32 (SHA-256) or 64 (SHA-512) | ||
// - No overlap of vd with vs1 or vs2. | ||
// | ||
// The constraint that vstart and vl are both EGS (4) aligned | ||
// is checked in the VI_..._EGU32x4_..._LOOP and VI_..._EGU64x4_..._LOOP | ||
// macros. | ||
#define require_vsha2_common_constraints \ | ||
do { \ | ||
require(P.VU.vsew == 32 || P.VU.vsew == 64); \ | ||
require(insn.rd() != insn.rs1()); \ | ||
require(insn.rd() != insn.rs2()); \ | ||
} while (false) | ||
|
||
// Constraints on vsha2 instructions that must be verified when VSEW==32. | ||
// Those are *IN ADDITION* to the constraints checked by | ||
// 'require_vsha2_common_constraints', which is meant to be run earlier. | ||
// | ||
// The constraint that vstart and vl are both EGS (4) aligned | ||
// is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. | ||
#define require_vsha2_vsew32_constraints \ | ||
do { \ | ||
require_zvknh_256; \ | ||
require_egw_fits(128); \ | ||
} while (false) | ||
|
||
// Constraints on vsha2 instructions that must be verified when VSEW==32. | ||
// Those are *IN ADDITION* to the constraints checked by | ||
// 'require_vsha2_common_constraints', which is meant to be run earlier. | ||
// | ||
// The constraint that vstart and vl are both EGS (4) aligned | ||
// is checked in the VI_ZVK_..._EGU64x4_..._LOOP macros. | ||
#define require_vsha2_vsew64_constraints \ | ||
do { \ | ||
require_zvknh_512; \ | ||
require_egw_fits(256); \ | ||
} while (false) | ||
|
||
// | ||
// SHA-256 and SHA-512 common logic | ||
// | ||
|
||
// Ch(x, y, z) = (xy) ⊕ (~xz) = xy | ~xz | ||
#define ZVK_SHA_CH(X, Y, Z) (((X) & (Y)) ^ ((~(X)) & (Z))) | ||
|
||
// Maj(x,y,z) = (xy) ⊕ (xz) ⊕(yz) = xy | xz | yz | ||
#define ZVK_SHA_MAJ(X, Y, Z) (((X) & (Y)) ^ ((X) & (Z)) ^ ((Y) & (Z))) | ||
|
||
// | ||
// SHA-256 | ||
// | ||
|
||
// sum0(x) = ROTR2(x) ⊕ ROTR13(x) ⊕ ROTR22(x) | ||
#define ZVK_SHA256_SUM0(X) \ | ||
(ZVK_ROR32(X, 2) ^ ZVK_ROR32(X, 13) ^ ZVK_ROR32(X, 22)) | ||
|
||
// sum1(x) = ROTR6(x) ⊕ ROTR11(x) ⊕ ROTR25(x) | ||
#define ZVK_SHA256_SUM1(X) \ | ||
(ZVK_ROR32(X, 6) ^ ZVK_ROR32(X, 11) ^ ZVK_ROR32(X, 25)) | ||
|
||
// sig0(x) = ROTR7(x) ⊕ ROTR18(x) ⊕ SHR3 (x) | ||
#define ZVK_SHA256_SIG0(X) \ | ||
(ZVK_ROR32(X, 7) ^ ZVK_ROR32(X, 18) ^ ((X) >> 3)) | ||
|
||
// sig1(x) = ROTR17(x) ⊕ ROTR19(x) ⊕ SHR10(x) | ||
#define ZVK_SHA256_SIG1(X) \ | ||
(ZVK_ROR32(X, 17) ^ ZVK_ROR32(X, 19) ^ ((X) >> 10)) | ||
|
||
// Given the schedule words W[t+0], W[t+1], W[t+9], W[t+14], computes | ||
// W[t+16]. | ||
#define ZVK_SHA256_SCHEDULE(W14, W9, W1, W0) \ | ||
(ZVK_SHA256_SIG1(W14) + (W9) + ZVK_SHA256_SIG0(W1) + (W0)) | ||
|
||
// Performs one round of compression (out of the 64 rounds), given the state | ||
// temporaries A,B,C,...,H, and KW, the sum Kt+Wt. | ||
// Updates A,B,C,...,H to their new values. KW is not modified. | ||
// | ||
// Note that some of the logic could be omitted in vshac[ab] since | ||
// some of the variables are dropped in each of those. However removing | ||
// those unnecessary updates reduces the opportunities to share this single | ||
// per-round logic and forces us to move further away from the how the logic | ||
// is expressed in FIPS PUB 180-4. | ||
#define ZVK_SHA256_COMPRESS(A, B, C, D, E, F, G, H, KW) \ | ||
{ \ | ||
const uint32_t t1 = (H) + ZVK_SHA256_SUM1(E) + \ | ||
ZVK_SHA_CH((E), (F), (G)) + (KW); \ | ||
const uint32_t t2 = ZVK_SHA256_SUM0(A) + ZVK_SHA_MAJ((A), (B), (C)); \ | ||
(H) = (G); \ | ||
(G) = (F); \ | ||
(F) = (E); \ | ||
(E) = (D) + t1; \ | ||
(D) = (C); \ | ||
(C) = (B); \ | ||
(B) = (A); \ | ||
(A) = t1 + t2; \ | ||
} | ||
|
||
// | ||
// SHA-512 | ||
// | ||
|
||
// sum0(x) = ROTR2(x) ⊕ ROTR13(x) ⊕ ROTR22(x) | ||
#define ZVK_SHA512_SUM0(X) \ | ||
(ZVK_ROR64(X, 28) ^ ZVK_ROR64(X, 34) ^ ZVK_ROR64(X, 39)) | ||
|
||
// sum1(x) = ROTR6(x) ⊕ ROTR11(x) ⊕ ROTR25(x) | ||
#define ZVK_SHA512_SUM1(X) \ | ||
(ZVK_ROR64(X, 14) ^ ZVK_ROR64(X, 18) ^ ZVK_ROR64(X, 41)) | ||
|
||
// sig0(x) = ROTR7(x) ⊕ ROTR18(x) ⊕ SHR3 (x) | ||
#define ZVK_SHA512_SIG0(X) \ | ||
(ZVK_ROR64(X, 1) ^ ZVK_ROR64(X, 8) ^ ((X) >> 7)) | ||
|
||
// sig1(x) = ROTR17(x) ⊕ ROTR19(x) ⊕ SHR10(x) | ||
#define ZVK_SHA512_SIG1(X) \ | ||
(ZVK_ROR64(X, 19) ^ ZVK_ROR64(X, 61) ^ ((X) >> 6)) | ||
|
||
// Given the schedule words W[t+0], W[t+1], W[t+9], W[t+14], computes | ||
// W[t+16]. | ||
#define ZVK_SHA512_SCHEDULE(W14, W9, W1, W0) \ | ||
(ZVK_SHA512_SIG1(W14) + (W9) + ZVK_SHA512_SIG0(W1) + (W0)) | ||
|
||
// Performs one round of compression (out of the 64 rounds), given the state | ||
// temporaries A,B,C,...,H, and KW, the sum Kt+Wt. | ||
// Updates A,B,C,...,H to their new values. KW is not modified. | ||
// | ||
// Note that some of the logic could be omitted in vshac[ab] since | ||
// some of the variables are dropped in each of those. However removing | ||
// those unnecessary updates reduces the opportunities to share this single | ||
// per-round logic and forces us to move further away from the how the logic | ||
// is expressed in FIPS PUB 180-4. | ||
#define ZVK_SHA512_COMPRESS(A, B, C, D, E, F, G, H, KW) \ | ||
{ \ | ||
const uint64_t t1 = (H) + ZVK_SHA512_SUM1(E) + \ | ||
ZVK_SHA_CH((E), (F), (G)) + (KW); \ | ||
const uint64_t t2 = ZVK_SHA512_SUM0(A) + ZVK_SHA_MAJ((A), (B), (C)); \ | ||
(H) = (G); \ | ||
(G) = (F); \ | ||
(F) = (E); \ | ||
(E) = (D) + t1; \ | ||
(D) = (C); \ | ||
(C) = (B); \ | ||
(B) = (A); \ | ||
(A) = t1 + t2; \ | ||
} | ||
|
||
#endif // RISCV_ZVKNH_EXT_MACROS_H_ |