-
Notifications
You must be signed in to change notification settings - Fork 854
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Zvk: Implement Zvksed, vector SM4 Block Cipher
Implement the Zvksed sub-extension, "ShangMi Suite: SM4 Block Cipher": - vsm4k.vi, vector SM4 key expansion, - vsm4r.{vs,vv}, vector SM4 rounds. This also introduces a header for common vector SM4 logic. Co-authored-by: Raghav Gupta <rgupta@rivosinc.com> Co-authored-by: Albert Jakieła <aja@semihalf.com> Signed-off-by: Eric Gouriou <ego@rivosinc.com>
- Loading branch information
1 parent
6d3a082
commit 6878fcd
Showing
6 changed files
with
258 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
// vsm4k.vi vd, vs2, round# | ||
|
||
#include "zvksed_ext_macros.h" | ||
|
||
// Uncomment to enable debug logging of invocations of this instruction. | ||
//#define DLOG_INVOCATION | ||
|
||
#if defined(DLOG_INVOCATION) | ||
#define DLOG(...) ZVK_DBG_LOG(__VA_ARGS__) | ||
// Print format/value for both state and round key element groups. | ||
#define PRIxEG PRIxEGU32x4_LE | ||
#define PRVEG(X) PRVEGU32x4_LE(X) | ||
// Print format/value for "v<reg_num>(<Element Group in Hex, Little Endian>)" | ||
#define PRI_uR_xEG PRI_uREG_xEGU32x8 | ||
#define PRV_R_EG(reg_num, reg) PRV_REG_EGU32x8_LE(reg_num, reg) | ||
#else | ||
#define DLOG(...) (void)(0) | ||
#endif | ||
|
||
// SM4 Constant Key (CK) - section 7.3.2. of the IETF draft. | ||
static constexpr uint32_t zvksed_ck[32] = { | ||
0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269, | ||
0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9, | ||
0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249, | ||
0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9, | ||
0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229, | ||
0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299, | ||
0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209, | ||
0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 | ||
}; | ||
|
||
require_vsm4_constraints; | ||
|
||
VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP( | ||
{ | ||
DLOG("-- vsm4k_vi " ZVK_PRI_REGNUMS_VD_VS2_ZIMM5, | ||
ZVK_PRV_REGNUMS_VD_VS2_ZIMM5); | ||
}, | ||
// The following statements will be executed before the first execution | ||
// of the loop, and only if the loop is going to be entered. | ||
// We cannot use a block ( { ... } ) since we want the 'round' variable | ||
// declared and defined here here to be visible in the loop block. | ||
// Only consider the bottom 3 bits of the immediate, ensuring that | ||
// 'round' is in the valid range [0, 7]. | ||
const reg_t round = zimm5 & 0x7; | ||
if (round != zimm5) { | ||
DLOG("vsm4k: zimm5 %" PRIuREG " => round %" PRIuREG, zimm5, round); | ||
}, | ||
{ | ||
// {rk0, rk1, rk2, rk3} <- vs2 | ||
EXTRACT_EGU32x4_WORDS_LE(vs2, rk0, rk1, rk2, rk3); | ||
|
||
uint32_t B = rk1 ^ rk2 ^ rk3 ^ zvksed_ck[4 * round]; | ||
uint32_t S = ZVKSED_SUB_BYTES(B); | ||
uint32_t rk4 = ZVKSED_ROUND_KEY(rk0, S); | ||
|
||
B = rk2 ^ rk3 ^ rk4 ^ zvksed_ck[4 * round + 1]; | ||
S = ZVKSED_SUB_BYTES(B); | ||
uint32_t rk5 = ZVKSED_ROUND_KEY(rk1, S); | ||
|
||
B = rk3 ^ rk4 ^ rk5 ^ zvksed_ck[4 * round + 2]; | ||
S = ZVKSED_SUB_BYTES(B); | ||
uint32_t rk6 = ZVKSED_ROUND_KEY(rk2, S); | ||
|
||
B = rk4 ^ rk5 ^ rk6 ^ zvksed_ck[4 * round + 3]; | ||
S = ZVKSED_SUB_BYTES(B); | ||
uint32_t rk7 = ZVKSED_ROUND_KEY(rk3, S); | ||
|
||
// Update the destination register. | ||
SET_EGU32x4_LE(vd, rk4, rk5, rk6, rk7); | ||
DLOG("= vsm4k_vi %" PRIxEG, PRVEG(vd)); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
// vsm4r.vs vd, vs2 | ||
|
||
#include "zvksed_ext_macros.h" | ||
|
||
// Uncomment to enable debug logging of invocations of this instruction. | ||
//#define DLOG_INVOCATION | ||
|
||
#if defined(DLOG_INVOCATION) | ||
#define DLOG(...) ZVK_DBG_LOG(__VA_ARGS__) | ||
// Print format/value for both state and round key element groups. | ||
#define PRIxEG PRIxEGU32x4_LE | ||
#define PRVEG(X) PRVEGU32x4_LE(X) | ||
// Print format/value for "v<reg_num>(<Element Group in Hex, Big Endian>)" | ||
#define PRI_uR_xEG PRI_uREG_xEGU32x4 | ||
#define PRV_R_EG(reg_num, reg) PRV_REG_EGU32x4_LE(reg_num, reg) | ||
#else | ||
#define DLOG(...) (void)(0) | ||
#endif | ||
|
||
require_vsm4_constraints; | ||
// No overlap of vd and vs2. | ||
require(insn.rd() != insn.rs2()); | ||
|
||
VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( | ||
{ | ||
DLOG("vsm4r_vs vd#(%" PRIuREG ") vs2#(%" PRIuREG ")" | ||
" vstart_eg(%" PRIuREG ") vl_eg(%" PRIuREG ")", | ||
vd_num, vs2_num, vstart_eg, vl_eg); | ||
}, | ||
// This statement will be executed before the first execution | ||
// of the loop, and only if the loop is going to be entered. | ||
// We cannot use a block ( { ... } ) since we want the variables declared | ||
// here to be visible in the loop block. | ||
// We capture the "scalar", vs2's first element, by copy, even though | ||
// the "no overlap" constraint means that vs2 should remain constant | ||
// during the loop. | ||
const EGU32x4_t scalar_key = P.VU.elt_group<EGU32x4_t>(vs2_num, 0); | ||
const uint32_t rk0 = scalar_key[0]; | ||
const uint32_t rk1 = scalar_key[1]; | ||
const uint32_t rk2 = scalar_key[2]; | ||
const uint32_t rk3 = scalar_key[3];, | ||
{ | ||
EGU32x4_t &state = P.VU.elt_group<EGU32x4_t>(vd_num, idx_eg, true); | ||
|
||
DLOG("vsm4r_vs " PRI_uR_xEG " " PRI_uR_xEG, | ||
PRV_R_EG(vd_num, state), PRV_R_EG(vs2_num, scalar_key)); | ||
|
||
// {x0, x1,x2, x3} <- vd | ||
EXTRACT_EGU32x4_WORDS_LE(state, x0, x1, x2, x3); | ||
|
||
uint32_t B; | ||
uint32_t S; | ||
|
||
B = x1 ^ x2 ^ x3 ^ rk0; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x4 = ZVKSED_ROUND(x0, S); | ||
|
||
B = x2 ^ x3 ^ x4 ^ rk1; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x5 = ZVKSED_ROUND(x1, S); | ||
|
||
B = x3 ^ x4 ^ x5 ^ rk2; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x6 = ZVKSED_ROUND(x2, S); | ||
|
||
B = x4 ^ x5 ^ x6 ^ rk3; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x7 = ZVKSED_ROUND(x3, S); | ||
|
||
// Update the destination register. | ||
SET_EGU32x4_LE(state, x4, x5, x6, x7); | ||
DLOG("= vsm4r_vs v%" PRIuREG " <- (%" PRIxEG ")", vd_num, PRVEG(state)); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
// vsm4r.vv vd, vs2 | ||
|
||
#include "zvksed_ext_macros.h" | ||
|
||
// Uncomment to enable debug logging of invocations of this instruction. | ||
//#define DLOG_INVOCATION | ||
|
||
#if defined(DLOG_INVOCATION) | ||
#define DLOG(...) ZVK_DBG_LOG(__VA_ARGS__) | ||
// Print format/value for both state and round key element groups. | ||
#define PRIxEG PRIxEGU32x4_LE | ||
#define PRVEG(X) PRVEGU32x4_LE(X) | ||
// Print format/value for "v<reg_num>(<Element Group in Hex, Little Endian>)" | ||
#define PRI_uR_xEG PRI_uREG_xEGU32x8 | ||
#define PRV_R_EG(reg_num, reg) PRV_REG_EGU32x8_LE(reg_num, reg) | ||
#else | ||
#define DLOG(...) (void)(0) | ||
#endif | ||
|
||
require_vsm4_constraints; | ||
|
||
VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP( | ||
{ | ||
DLOG("vsm4r_vv vd#(%" PRIuREG ") vs2#(%" PRIuREG ")" | ||
" vstart_eg(%" PRIuREG ") vl_eg(%" PRIuREG ")", | ||
vd_num, vs2_num, vstart_eg, vl_eg); | ||
}, | ||
{ | ||
DLOG("vsm4r_vv " PRI_uR_xEG " " PRI_uR_xEG, | ||
PRV_R_EG(vd_num, vd), PRV_R_EG(vs2_num, vs2)); | ||
|
||
// vd = {x0, x1,x2, x3} <- vd | ||
EXTRACT_EGU32x4_WORDS_LE(vd, x0, x1, x2, x3); | ||
// {rk0, rk1, rk2, rk3} <- vs2 | ||
EXTRACT_EGU32x4_WORDS_LE(vs2, rk0, rk1, rk2, rk3); | ||
|
||
uint32_t B; | ||
uint32_t S; | ||
|
||
B = x1 ^ x2 ^ x3 ^ rk0; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x4 = ZVKSED_ROUND(x0, S); | ||
|
||
B = x2 ^ x3 ^ x4 ^ rk1; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x5 = ZVKSED_ROUND(x1, S); | ||
|
||
B = x3 ^ x4 ^ x5 ^ rk2; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x6 = ZVKSED_ROUND(x2, S); | ||
|
||
B = x4 ^ x5 ^ x6 ^ rk3; | ||
S = ZVKSED_SUB_BYTES(B); | ||
const uint32_t x7 = ZVKSED_ROUND(x3, S); | ||
|
||
// Update the destination register. | ||
SET_EGU32x4_LE(vd, x4, x5, x6, x7); | ||
DLOG("= vsm4r_vv v%" PRIuREG " <- (%" PRIxEG ")", vd_num, PRVEG(vd)); | ||
} | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
// Helper macros and functions to help implement instructions defined as part of | ||
// the RISC-V Zvksed extension (vectorized SM4). | ||
|
||
#include "insns/sm4_common.h" | ||
#include "zvk_ext_macros.h" | ||
|
||
#ifndef RISCV_ZVKSED_MACROS_H_ | ||
#define RISCV_ZVKSED_MACROS_H_ | ||
|
||
// Constraints common to all vsm4* instructions: | ||
// - Zvksed is enabled | ||
// - VSEW == 32 | ||
// - EGW (128) <= LMUL * VLEN | ||
// | ||
// The constraint that vstart and vl are both EGS (4) aligned | ||
// is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. | ||
#define require_vsm4_constraints \ | ||
do { \ | ||
require_zvksed; \ | ||
require(P.VU.vsew == 32); \ | ||
require_egw_fits(128); \ | ||
} while (false) | ||
|
||
// Get byte BYTE of the SBox. | ||
#define ZVKSED_SBOX(BYTE) (sm4_sbox[(BYTE)]) | ||
|
||
// Apply the nonlinear transformation tau to a 32 bit word B - section 6.2.1. | ||
// of the IETF draft. | ||
#define ZVKSED_SUB_BYTES(B) \ | ||
U32_FROM_U8_LE(ZVKSED_SBOX(EXTRACT_U8((B), 0)), \ | ||
ZVKSED_SBOX(EXTRACT_U8((B), 1)), \ | ||
ZVKSED_SBOX(EXTRACT_U8((B), 2)), \ | ||
ZVKSED_SBOX(EXTRACT_U8((B), 3))) | ||
|
||
// Perform the linear transformation L to a 32 bit word S and xor it with a 32 | ||
// bit word X - section 6.2.2. of the IETF draft. | ||
#define ZVKSED_ROUND(X, S) \ | ||
((X) ^ ((S) ^ ROL32((S), 2) ^ ROL32((S), 10) ^ ROL32((S), 18) ^ ROL32((S), 24))) | ||
|
||
// Perform the linear transformation L' to a 32 bit word S and xor it with a 32 | ||
// bit word X - section 6.2.2. of the IETF draft. | ||
#define ZVKSED_ROUND_KEY(X, S) \ | ||
((X) ^ ((S) ^ ROL32((S), 13) ^ ROL32((S), 23))) | ||
|
||
#endif // RISCV_ZVKSED_MACROS_H_ |