-
Notifications
You must be signed in to change notification settings - Fork 854
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Zvk: Implement Zvksed, vector SM4 Block Cipher
Implement the Zvksed sub-extension, "ShangMi Suite: SM4 Block Cipher": - vsm4k.vi, vector SM4 key expansion, - vsm4r.{vs,vv}, vector SM4 rounds. This also introduces a header for common vector SM4 logic. Co-authored-by: Raghav Gupta <rgupta@rivosinc.com> Co-authored-by: Albert Jakieła <aja@semihalf.com> Signed-off-by: Eric Gouriou <ego@rivosinc.com>
- Loading branch information
1 parent
eadb0e1
commit cbb2b1a
Showing
6 changed files
with
206 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// vsm4k.vi vd, vs2, round# | ||
|
||
#include "zvksed_ext_macros.h" | ||
|
||
// SM4 Constant Key (CK) - section 7.3.2. of the IETF draft. | ||
static constexpr uint32_t zvksed_ck[32] = { | ||
0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269, | ||
0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9, | ||
0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249, | ||
0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9, | ||
0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229, | ||
0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299, | ||
0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209, | ||
0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 | ||
}; | ||
|
||
require_vsm4_constraints; | ||
|
||
VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP( | ||
{}, | ||
// The following statements will be executed before the first execution | ||
// of the loop, and only if the loop is going to be entered. | ||
// We cannot use a block ( { ... } ) since we want the 'round' variable | ||
// declared and defined here here to be visible in the loop block. | ||
// Only consider the bottom 3 bits of the immediate, ensuring that | ||
// 'round' is in the valid range [0, 7]. | ||
const reg_t round = zimm5 & 0x7;, | ||
// Per Element Group body. | ||
{ | ||
// {rk0, rk1, rk2, rk3} <- vs2 | ||
EXTRACT_EGU32x4_WORDS_LE(vs2, rk0, rk1, rk2, rk3); | ||
|
||
uint32_t B = rk1 ^ rk2 ^ rk3 ^ zvksed_ck[4 * round]; | ||
uint32_t S = ZVKSED_SUB_BYTES(B); | ||
uint32_t rk4 = ZVKSED_ROUND_KEY(rk0, S); | ||
|
||
B = rk2 ^ rk3 ^ rk4 ^ zvksed_ck[4 * round + 1]; | ||
S = ZVKSED_SUB_BYTES(B); | ||
uint32_t rk5 = ZVKSED_ROUND_KEY(rk1, S); | ||
|
||
B = rk3 ^ rk4 ^ rk5 ^ zvksed_ck[4 * round + 2]; | ||
S = ZVKSED_SUB_BYTES(B); | ||
uint32_t rk6 = ZVKSED_ROUND_KEY(rk2, S); | ||
|
||
B = rk4 ^ rk5 ^ rk6 ^ zvksed_ck[4 * round + 3]; | ||
S = ZVKSED_SUB_BYTES(B); | ||
uint32_t rk7 = ZVKSED_ROUND_KEY(rk3, S); | ||
|
||
// Update the destination register. | ||
SET_EGU32x4_LE(vd, rk4, rk5, rk6, rk7); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// vsm4r.vs vd, vs2 | ||
|
||
#include "zvksed_ext_macros.h" | ||
|
||
require_vsm4_constraints; | ||
// No overlap of vd and vs2. | ||
require(insn.rd() != insn.rs2()); | ||
|
||
VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( | ||
{}, | ||
// This statement will be executed before the first execution | ||
// of the loop, and only if the loop is going to be entered. | ||
// We cannot use a block ( { ... } ) since we want the variables declared | ||
// here to be visible in the loop block. | ||
// We capture the "scalar", vs2's first element, by copy, even though | ||
// the "no overlap" constraint means that vs2 should remain constant | ||
// during the loop. | ||
const EGU32x4_t scalar_key = P.VU.elt_group<EGU32x4_t>(vs2_num, 0); | ||
const uint32_t rk0 = scalar_key[0]; | ||
const uint32_t rk1 = scalar_key[1]; | ||
const uint32_t rk2 = scalar_key[2]; | ||
const uint32_t rk3 = scalar_key[3];, | ||
{ | ||
EGU32x4_t &state = P.VU.elt_group<EGU32x4_t>(vd_num, idx_eg, true); | ||
|
||
// {x0, x1,x2, x3} <- vd | ||
EXTRACT_EGU32x4_WORDS_LE(state, x0, x1, x2, x3); | ||
|
||
uint32_t B; | ||
uint32_t S; | ||
|
||
B = x1 ^ x2 ^ x3 ^ rk0; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x4 = ZVKSED_ROUND(x0, S); | ||
|
||
B = x2 ^ x3 ^ x4 ^ rk1; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x5 = ZVKSED_ROUND(x1, S); | ||
|
||
B = x3 ^ x4 ^ x5 ^ rk2; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x6 = ZVKSED_ROUND(x2, S); | ||
|
||
B = x4 ^ x5 ^ x6 ^ rk3; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x7 = ZVKSED_ROUND(x3, S); | ||
|
||
// Update the destination register. | ||
SET_EGU32x4_LE(state, x4, x5, x6, x7); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
// vsm4r.vv vd, vs2 | ||
|
||
#include "zvksed_ext_macros.h" | ||
|
||
require_vsm4_constraints; | ||
|
||
VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP( | ||
{}, | ||
{ | ||
// vd = {x0, x1,x2, x3} <- vd | ||
EXTRACT_EGU32x4_WORDS_LE(vd, x0, x1, x2, x3); | ||
// {rk0, rk1, rk2, rk3} <- vs2 | ||
EXTRACT_EGU32x4_WORDS_LE(vs2, rk0, rk1, rk2, rk3); | ||
|
||
uint32_t B; | ||
uint32_t S; | ||
|
||
B = x1 ^ x2 ^ x3 ^ rk0; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x4 = ZVKSED_ROUND(x0, S); | ||
|
||
B = x2 ^ x3 ^ x4 ^ rk1; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x5 = ZVKSED_ROUND(x1, S); | ||
|
||
B = x3 ^ x4 ^ x5 ^ rk2; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x6 = ZVKSED_ROUND(x2, S); | ||
|
||
B = x4 ^ x5 ^ x6 ^ rk3; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x7 = ZVKSED_ROUND(x3, S); | ||
|
||
// Update the destination register. | ||
SET_EGU32x4_LE(vd, x4, x5, x6, x7); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
// Helper macros and functions to help implement instructions defined as part of | ||
// the RISC-V Zvksed extension (vectorized SM4). | ||
|
||
#include "insns/sm4_common.h" | ||
#include "zvk_ext_macros.h" | ||
|
||
#ifndef RISCV_ZVKSED_MACROS_H_ | ||
#define RISCV_ZVKSED_MACROS_H_ | ||
|
||
// Constraints common to all vsm4* instructions: | ||
// - Zvksed is enabled | ||
// - VSEW == 32 | ||
// - EGW (128) <= LMUL * VLEN | ||
// | ||
// The constraint that vstart and vl are both EGS (4) aligned | ||
// is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. | ||
#define require_vsm4_constraints \ | ||
do { \ | ||
require_zvksed; \ | ||
require(P.VU.vsew == 32); \ | ||
require_egw_fits(128); \ | ||
} while (false) | ||
|
||
// Returns a uint32_t value constructed from the 4 bytes (uint8_t) | ||
// provided in "Little Endian" (LE) order, i.e., from least significant (B0) | ||
// to most significant (B3). | ||
#define ZVKSED_U32_FROM_U8_LE(B0, B1, B2, B3) \ | ||
(((uint32_t)(B0)) << 0 | \ | ||
((uint32_t)(B1)) << 8 | \ | ||
((uint32_t)(B2)) << 16 | \ | ||
((uint32_t)(B3)) << 24) | ||
|
||
// Get byte BYTE of the SBox. | ||
#define ZVKSED_SBOX(BYTE) (sm4_sbox[(BYTE)]) | ||
|
||
// Given an unsigned integer value 'X' and a byte index, | ||
// returns a uint8_t value for the byte at the given index. | ||
#define ZVKSED_EXTRACT_U8(X, BYTE_IDX) ((uint8_t)((X) >> (BYTE_IDX * 8))) | ||
|
||
// Apply the nonlinear transformation tau to a 32 bit word B - section 6.2.1. | ||
// of the IETF draft. | ||
#define ZVKSED_SUB_BYTES(B) \ | ||
ZVKSED_U32_FROM_U8_LE(ZVKSED_SBOX(ZVKSED_EXTRACT_U8((B), 0)), \ | ||
ZVKSED_SBOX(ZVKSED_EXTRACT_U8((B), 1)), \ | ||
ZVKSED_SBOX(ZVKSED_EXTRACT_U8((B), 2)), \ | ||
ZVKSED_SBOX(ZVKSED_EXTRACT_U8((B), 3))) | ||
|
||
// Perform the linear transformation L to a 32 bit word S and xor it with a 32 | ||
// bit word X - section 6.2.2. of the IETF draft. | ||
#define ZVKSED_ROUND(X, S) \ | ||
((X) ^ \ | ||
((S) ^ ZVK_ROL32((S), 2) ^ ZVK_ROL32((S), 10) ^ \ | ||
ZVK_ROL32((S), 18) ^ ZVK_ROL32((S), 24))) | ||
|
||
// Perform the linear transformation L' to a 32 bit word S and xor it with a 32 | ||
// bit word X - section 6.2.2. of the IETF draft. | ||
#define ZVKSED_ROUND_KEY(X, S) \ | ||
((X) ^ ((S) ^ ZVK_ROL32((S), 13) ^ ZVK_ROL32((S), 23))) | ||
|
||
#endif // RISCV_ZVKSED_MACROS_H_ |