-
Notifications
You must be signed in to change notification settings - Fork 854
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Zvk: Implement Zvkned, vector AES single round
Implement the Zvkned extension, "NIST Suite: Vector AES Encryption & Decryption (Single Round)". - vaeskf1.vi: AES forward key scheduling, AES-128. - vaeskf2.vi: AES forward key scheduling, AES-256. - vaesz.vs: AES encryption/decryption, 0-th round. - vaesdm.{vs,vv}: AES decryption, middle rounds. - vaesdf.{vs,vv}: AES decryption, final round. - vaesem.{vs,vv}: AES encryption, middle rounds. - vaesef.{vs,vv}: AES encryption, final round. An extension specific header containing common logic is added. Co-authored-by: Stanislaw Kardach <kda@semihalf.com> Signed-off-by: Eric Gouriou <ego@rivosinc.com>
- Loading branch information
1 parent
00873aa
commit eadb0e1
Showing
13 changed files
with
786 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
// vaesdf.vs vd, vs2 | ||
|
||
#include "zvkned_ext_macros.h" | ||
#include "zvk_ext_macros.h" | ||
|
||
require_vaes_vs_constraints; | ||
|
||
VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( | ||
{}, | ||
// This statement will be executed before the first execution | ||
// of the loop, and only if the loop is going to be entered. | ||
// We cannot use a block ( { ... } ) since we want the variables declared | ||
// here to be visible in the loop block. | ||
// We capture the "scalar", vs2's first element, by copy, even though | ||
// the "no overlap" constraint means that vs2 should remain constant | ||
// during the loop. | ||
const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);, | ||
{ | ||
// For AES128, AES192, or AES256, state and key are 128b/16B values: | ||
// - vd contains the input state, | ||
// - vs2 contains the round key, | ||
// - vd does receive the output state. | ||
// | ||
// While the spec calls for handling the vector as made of EGU32x4 | ||
// element groups (i.e., 4 uint32_t), it is convenient to treat | ||
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why | ||
// we extract the operands here instead of using the existing LOOP | ||
// macro that defines/extracts the operand variables as EGU32x4. | ||
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg); | ||
|
||
// InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. | ||
VAES_INV_SHIFT_ROWS(aes_state); | ||
// InvSubBytes - Apply S-box to every byte in the state | ||
VAES_INV_SUB_BYTES(aes_state); | ||
// AddRoundKey (which is also InvAddRoundKey as it's xor) | ||
EGU8x16_XOREQ(aes_state, scalar_key); | ||
// InvMixColumns is not performed in the final round. | ||
|
||
// Update the destination register. | ||
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true); | ||
EGU8x16_COPY(vd, aes_state); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
// vaesdf.vv vd, vs2 | ||
|
||
#include "zvkned_ext_macros.h" | ||
#include "zvk_ext_macros.h" | ||
|
||
require_vaes_vv_constraints; | ||
|
||
VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( | ||
{}, | ||
{}, // No PRELOOP. | ||
{ | ||
// For AES128, AES192, or AES256, state and key are 128b/16B values: | ||
// - vd in contains the input state, | ||
// - vs2 contains the input round key, | ||
// - vd out receives the output state. | ||
// | ||
// While the spec calls for handling the vector as made of EGU32x4 | ||
// element groups (i.e., 4 uint32_t), it is convenient to treat | ||
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why | ||
// we extract the operands here instead of using the existing LOOP | ||
// macro that defines/extracts the operand variables as EGU32x4. | ||
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg); | ||
const EGU8x16_t round_key = P.VU.elt_group<EGU8x16_t>(vs2_num, idx_eg); | ||
|
||
// InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. | ||
VAES_INV_SHIFT_ROWS(aes_state); | ||
// InvSubBytes - Apply S-box to every byte in the state | ||
VAES_INV_SUB_BYTES(aes_state); | ||
// AddRoundKey (which is also InvAddRoundKey as it's xor) | ||
EGU8x16_XOREQ(aes_state, round_key); | ||
// InvMixColumns is not performed in the final round. | ||
|
||
// Update the destination register. | ||
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true); | ||
EGU8x16_COPY(vd, aes_state); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// vaesdm.vs vd, vs2 | ||
|
||
#include "zvkned_ext_macros.h" | ||
#include "zvk_ext_macros.h" | ||
|
||
require_vaes_vs_constraints; | ||
|
||
VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( | ||
{}, | ||
// This statement will be executed before the first execution | ||
// of the loop, and only if the loop is going to be entered. | ||
// We cannot use a block ( { ... } ) since we want the variables declared | ||
// here to be visible in the loop block. | ||
// We capture the "scalar", vs2's first element, by copy, even though | ||
// the "no overlap" constraint means that vs2 should remain constant | ||
// during the loop. | ||
const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);, | ||
{ | ||
// For AES128, AES192, or AES256, state and key are 128b/16B values: | ||
// - vd in contains the input state, | ||
// - vs2 contains the input round key, | ||
// - vd out receives the output state. | ||
// | ||
// While the spec calls for handling the vector as made of EGU32x4 | ||
// element groups (i.e., 4 uint32_t), it is convenient to treat | ||
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why | ||
// we extract the operands here instead of using the existing LOOP | ||
// macro that defines/extracts the operand variables as EGU32x4. | ||
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg); | ||
|
||
// InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. | ||
VAES_INV_SHIFT_ROWS(aes_state); | ||
// InvSubBytes - Apply S-box to every byte in the state | ||
VAES_INV_SUB_BYTES(aes_state); | ||
// AddRoundKey (which is also InvAddRoundKey as it's xor) | ||
EGU8x16_XOREQ(aes_state, scalar_key); | ||
// InvMixColumns | ||
VAES_INV_MIX_COLUMNS(aes_state); | ||
|
||
// Update the destination register. | ||
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true); | ||
EGU8x16_COPY(vd, aes_state); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
// vaesdm.vv vd, vs2 | ||
|
||
#include "zvkned_ext_macros.h" | ||
#include "zvk_ext_macros.h" | ||
|
||
require_vaes_vv_constraints; | ||
|
||
VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( | ||
{}, | ||
{}, // No PRELOOP. | ||
{ | ||
// For AES128, AES192, or AES256, state and key are 128b/16B values: | ||
// - vd contains the input state, | ||
// - vs2 contains the round key, | ||
// - vd does receive the output state. | ||
// | ||
// While the spec calls for handling the vector as made of EGU32x4 | ||
// element groups (i.e., 4 uint32_t), it is convenient to treat | ||
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why | ||
// we extract the operands here instead of using the existing LOOP | ||
// macro that defines/extracts the operand variables as EGU32x4. | ||
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg); | ||
const EGU8x16_t round_key = P.VU.elt_group<EGU8x16_t>(vs2_num, idx_eg); | ||
|
||
// InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. | ||
VAES_INV_SHIFT_ROWS(aes_state); | ||
// InvSubBytes - Apply S-box to every byte in the state | ||
VAES_INV_SUB_BYTES(aes_state); | ||
// AddRoundKey (which is also InvAddRoundKey as it's xor) | ||
EGU8x16_XOREQ(aes_state, round_key); | ||
// InvMixColumns | ||
VAES_INV_MIX_COLUMNS(aes_state); | ||
|
||
// Update the destination register. | ||
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true); | ||
EGU8x16_COPY(vd, aes_state); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
// vaesef.vs vd, vs2 | ||
|
||
#include "zvkned_ext_macros.h" | ||
#include "zvk_ext_macros.h" | ||
|
||
require_vaes_vs_constraints; | ||
|
||
VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( | ||
{}, | ||
// This statement will be executed before the first execution | ||
// of the loop, and only if the loop is going to be entered. | ||
// We cannot use a block ( { ... } ) since we want the variables declared | ||
// here to be visible in the loop block. | ||
// We capture the "scalar", vs2's first element, by copy, even though | ||
// the "no overlap" constraint means that vs2 should remain constant | ||
// during the loop. | ||
const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);, | ||
{ | ||
// For AES128, AES192, or AES256, state and key are 128b/16B values: | ||
// - vd contains the input state, | ||
// - vs2 contains the round key, | ||
// - vd receives the output state. | ||
// | ||
// While the spec calls for handling the vector as made of EGU32x4 | ||
// element groups (i.e., 4 uint32_t), it is convenient to treat | ||
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why | ||
// we extract the operands here instead of using the existing LOOP | ||
// macro that defines/extracts the operand variables as EGU32x4. | ||
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg); | ||
|
||
// SubBytes - Apply S-box to every byte in the state | ||
VAES_SUB_BYTES(aes_state); | ||
// ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. | ||
VAES_SHIFT_ROWS(aes_state); | ||
// MixColumns is not performed for the final round. | ||
// AddRoundKey | ||
EGU8x16_XOREQ(aes_state, scalar_key); | ||
|
||
// Update the destination register. | ||
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true); | ||
EGU8x16_COPY(vd, aes_state); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
// vaesef.vv vd, vs2 | ||
|
||
#include "zvkned_ext_macros.h" | ||
#include "zvk_ext_macros.h" | ||
|
||
require_vaes_vv_constraints; | ||
|
||
VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( | ||
{}, | ||
{}, // No PRELOOP. | ||
{ | ||
// For AES128, AES192, or AES256, state and key are 128b/16B values: | ||
// - vd contains the input state, | ||
// - vs2 contains the round key, | ||
// - vd receives the output state. | ||
// | ||
// While the spec calls for handling the vector as made of EGU32x4 | ||
// element groups (i.e., 4 uint32_t), it is convenient to treat | ||
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why | ||
// we extract the operands here instead of using the existing LOOP | ||
// macro that defines/extracts the operand variables as EGU32x4. | ||
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg); | ||
const EGU8x16_t round_key = P.VU.elt_group<EGU8x16_t>(vs2_num, idx_eg); | ||
|
||
// SubBytes - Apply S-box to every byte in the state | ||
VAES_SUB_BYTES(aes_state); | ||
// ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. | ||
VAES_SHIFT_ROWS(aes_state); | ||
// MixColumns is not performed for the final round. | ||
// AddRoundKey | ||
EGU8x16_XOREQ(aes_state, round_key); | ||
|
||
// Update the destination register. | ||
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true); | ||
EGU8x16_COPY(vd, aes_state); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// vaesem.vs vd, vs2 | ||
|
||
#include "zvkned_ext_macros.h" | ||
#include "zvk_ext_macros.h" | ||
|
||
require_vaes_vs_constraints; | ||
|
||
VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( | ||
{}, | ||
// This statement will be executed before the first execution | ||
// of the loop, and only if the loop is going to be entered. | ||
// We cannot use a block ( { ... } ) since we want the variables declared | ||
// here to be visible in the loop block. | ||
// We capture the "scalar", vs2's first element, by copy, even though | ||
// the "no overlap" constraint means that vs2 should remain constant | ||
// during the loop. | ||
const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);, | ||
{ | ||
// For AES128, AES192, or AES256, state and key are 128b/16B values: | ||
// - vd contains the input state, | ||
// - vs2 contains the round key, | ||
// - vd receives the output state. | ||
// | ||
// While the spec calls for handling the vector as made of EGU32x4 | ||
// element groups (i.e., 4 uint32_t), it is convenient to treat | ||
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why | ||
// we extract the operands here instead of using the existing LOOP | ||
// macro that defines/extracts the operand variables as EGU32x4. | ||
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg); | ||
|
||
// SubBytes - Apply S-box to every byte in the state | ||
VAES_SUB_BYTES(aes_state); | ||
// ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. | ||
VAES_SHIFT_ROWS(aes_state); | ||
// MixColumns | ||
VAES_MIX_COLUMNS(aes_state); | ||
// AddRoundKey | ||
EGU8x16_XOREQ(aes_state, scalar_key); | ||
|
||
// Update the destination register. | ||
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true); | ||
EGU8x16_COPY(vd, aes_state); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
// vaesem.vv vd, vs2 | ||
|
||
#include "zvkned_ext_macros.h" | ||
#include "zvk_ext_macros.h" | ||
|
||
require_vaes_vv_constraints; | ||
|
||
VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( | ||
{}, | ||
{}, // No PRELOOP. | ||
{ | ||
// For AES128, AES192, or AES256, state and key are 128b/16B values: | ||
// - vd contains the input state, | ||
// - vs2 contains the round key, | ||
// - vd receives the output state. | ||
// | ||
// While the spec calls for handling the vector as made of EGU32x4 | ||
// element groups (i.e., 4 uint32_t), it is convenient to treat | ||
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why | ||
// we extract the operands here instead of using the existing LOOP | ||
// macro that defines/extracts the operand variables as EGU32x4. | ||
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg); | ||
const EGU8x16_t round_key = P.VU.elt_group<EGU8x16_t>(vs2_num, idx_eg); | ||
|
||
// SubBytes - Apply S-box to every byte in the state | ||
VAES_SUB_BYTES(aes_state); | ||
// ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions. | ||
VAES_SHIFT_ROWS(aes_state); | ||
// MixColumns | ||
VAES_MIX_COLUMNS(aes_state); | ||
// AddRoundKey | ||
EGU8x16_XOREQ(aes_state, round_key); | ||
|
||
// Update the destination register. | ||
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true); | ||
EGU8x16_COPY(vd, aes_state); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
// vaeskf1.vi vd, vs2, rnd | ||
|
||
#include "zvk_ext_macros.h" | ||
#include "zvkned_ext_macros.h" | ||
|
||
require_vaeskf_vi_constraints; | ||
|
||
// There is one round constant for each round number | ||
// between 1 and 10. We index using 'round# -1'. | ||
static constexpr uint8_t kRoundConstants[10] = { | ||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 | ||
}; | ||
|
||
// For AES128, AES192, or AES256, keys (and state) are handled as | ||
// 128b/16B values. | ||
// | ||
// The Zvkned spec calls for handling the vector as made of EGU32x4 | ||
// element groups (i.e., 4 uint32_t), and FIPS-197 AES specification | ||
// describes the key expansion in terms of manipulations of 32 bit | ||
// words, so using the EGU32x4 is natural. | ||
// | ||
VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP( | ||
{}, | ||
// The following statements will be executed before the first execution | ||
// of the loop, and only if the loop is going to be entered. | ||
// We cannot use a block ( { ... } ) since we want the 'round' variable | ||
// declared and defined here here to be visible in the loop block. | ||
// Only consider the bottom 4 bits of the immediate. | ||
const reg_t zimm4 = zimm5 & 0xF; | ||
// Normalize the round value to be in [2, 14] by toggling bit 3 | ||
// if outside the range (i.e., +8 or -8). | ||
const reg_t round = ((1 <= zimm4) && (zimm4 <= 10)) ? zimm4 : (zimm4 ^ 0x8); | ||
const uint32_t rcon = kRoundConstants[round - 1];, | ||
// Per Element Group body. | ||
{ | ||
// vaeskf1_vi produces key[i+1] in vd, it receives key[i] in vs2, | ||
// i.e., 4x32b values (4 words). | ||
// | ||
// The logic is fairly similar between vaeskf1/vaeskf2, with the following | ||
// differences: | ||
// - in AES-128 (vaeskf1), we get both the 'temp' word and | ||
// the "previous words" w0..w3 from key[i]/vs2. | ||
// - in AES-256 (vaeskf2), we get 'temp' from key[i]/vs2, and | ||
// the "previous words" w0..w3 from key[i-1]/vd. | ||
|
||
// 'temp' is extracted from the last (most significant) word of key[i]. | ||
uint32_t temp = vs2[3]; | ||
temp = (temp >> 8) | (temp << 24); // Rotate right by 8 | ||
temp = (((uint32_t)AES_ENC_SBOX[(temp >> 24) & 0xFF] << 24) | | ||
((uint32_t)AES_ENC_SBOX[(temp >> 16) & 0xFF] << 16) | | ||
((uint32_t)AES_ENC_SBOX[(temp >> 8) & 0xFF] << 8) | | ||
((uint32_t)AES_ENC_SBOX[(temp >> 0) & 0xFF] << 0)); | ||
temp = temp ^ rcon; | ||
|
||
// "old" words are the w[i-Nk] of FIPS-197. They are extracted | ||
// from vs2, which contains key[i] in AES-128 where Nk=4. | ||
const uint32_t w0 = vs2[0] ^ temp; | ||
const uint32_t w1 = vs2[1] ^ w0; | ||
const uint32_t w2 = vs2[2] ^ w1; | ||
const uint32_t w3 = vs2[3] ^ w2; | ||
|
||
// Overwrite vd with k[i+1] from the new words. | ||
SET_EGU32x4_LE(vd, w0, w1, w2, w3); | ||
} | ||
); |
Oops, something went wrong.