Re: [PATCH v8 07/14] target/riscv: rvk: add support for zkne/zknd extension in RV64

2022-03-10 Thread Alistair Francis
On Tue, Mar 1, 2022 at 10:03 PM Weiwei Li  wrote:
>
>  - add aes64dsm, aes64ds, aes64im, aes64es, aes64esm, aes64ks2, aes64ks1i 
> instructions
>
> Co-authored-by: Ruibo Lu 
> Co-authored-by: Zewen Ye 
> Signed-off-by: Weiwei Li 
> Signed-off-by: Junqiang Wang 
> Reviewed-by: Richard Henderson 

Acked-by: Alistair Francis 

Alistair

> ---
>  target/riscv/crypto_helper.c| 169 
>  target/riscv/helper.h   |   8 ++
>  target/riscv/insn32.decode  |  12 ++
>  target/riscv/insn_trans/trans_rvk.c.inc |  47 +++
>  4 files changed, 236 insertions(+)
>
> diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c
> index 220d51c742..cb4783a1e9 100644
> --- a/target/riscv/crypto_helper.c
> +++ b/target/riscv/crypto_helper.c
> @@ -102,4 +102,173 @@ target_ulong HELPER(aes32dsi)(target_ulong rs1, 
> target_ulong rs2,
>  {
>  return aes32_operation(shamt, rs1, rs2, false, false);
>  }
> +
> +#define BY(X, I) ((X >> (8 * I)) & 0xFF)
> +
> +#define AES_SHIFROWS_LO(RS1, RS2) ( \
> +(((RS1 >> 24) & 0xFF) << 56) | (((RS2 >> 48) & 0xFF) << 48) | \
> +(((RS2 >> 8) & 0xFF) << 40) | (((RS1 >> 32) & 0xFF) << 32) | \
> +(((RS2 >> 56) & 0xFF) << 24) | (((RS2 >> 16) & 0xFF) << 16) | \
> +(((RS1 >> 40) & 0xFF) << 8) | (((RS1 >> 0) & 0xFF) << 0))
> +
> +#define AES_INVSHIFROWS_LO(RS1, RS2) ( \
> +(((RS2 >> 24) & 0xFF) << 56) | (((RS2 >> 48) & 0xFF) << 48) | \
> +(((RS1 >> 8) & 0xFF) << 40) | (((RS1 >> 32) & 0xFF) << 32) | \
> +(((RS1 >> 56) & 0xFF) << 24) | (((RS2 >> 16) & 0xFF) << 16) | \
> +(((RS2 >> 40) & 0xFF) << 8) | (((RS1 >> 0) & 0xFF) << 0))
> +
> +#define AES_MIXBYTE(COL, B0, B1, B2, B3) ( \
> +BY(COL, B3) ^ BY(COL, B2) ^ AES_GFMUL(BY(COL, B1), 3) ^ \
> +AES_GFMUL(BY(COL, B0), 2))
> +
> +#define AES_MIXCOLUMN(COL) ( \
> +AES_MIXBYTE(COL, 3, 0, 1, 2) << 24 | \
> +AES_MIXBYTE(COL, 2, 3, 0, 1) << 16 | \
> +AES_MIXBYTE(COL, 1, 2, 3, 0) << 8 | AES_MIXBYTE(COL, 0, 1, 2, 3) << 0)
> +
> +#define AES_INVMIXBYTE(COL, B0, B1, B2, B3) ( \
> +AES_GFMUL(BY(COL, B3), 0x9) ^ AES_GFMUL(BY(COL, B2), 0xd) ^ \
> +AES_GFMUL(BY(COL, B1), 0xb) ^ AES_GFMUL(BY(COL, B0), 0xe))
> +
> +#define AES_INVMIXCOLUMN(COL) ( \
> +AES_INVMIXBYTE(COL, 3, 0, 1, 2) << 24 | \
> +AES_INVMIXBYTE(COL, 2, 3, 0, 1) << 16 | \
> +AES_INVMIXBYTE(COL, 1, 2, 3, 0) << 8 | \
> +AES_INVMIXBYTE(COL, 0, 1, 2, 3) << 0)
> +
> +static inline target_ulong aes64_operation(target_ulong rs1, target_ulong 
> rs2,
> +   bool enc, bool mix)
> +{
> +uint64_t RS1 = rs1;
> +uint64_t RS2 = rs2;
> +uint64_t result;
> +uint64_t temp;
> +uint32_t col_0;
> +uint32_t col_1;
> +
> +if (enc) {
> +temp = AES_SHIFROWS_LO(RS1, RS2);
> +temp = (((uint64_t)AES_sbox[(temp >> 0) & 0xFF] << 0) |
> +((uint64_t)AES_sbox[(temp >> 8) & 0xFF] << 8) |
> +((uint64_t)AES_sbox[(temp >> 16) & 0xFF] << 16) |
> +((uint64_t)AES_sbox[(temp >> 24) & 0xFF] << 24) |
> +((uint64_t)AES_sbox[(temp >> 32) & 0xFF] << 32) |
> +((uint64_t)AES_sbox[(temp >> 40) & 0xFF] << 40) |
> +((uint64_t)AES_sbox[(temp >> 48) & 0xFF] << 48) |
> +((uint64_t)AES_sbox[(temp >> 56) & 0xFF] << 56));
> +if (mix) {
> +col_0 = temp & 0x;
> +col_1 = temp >> 32;
> +
> +col_0 = AES_MIXCOLUMN(col_0);
> +col_1 = AES_MIXCOLUMN(col_1);
> +
> +result = ((uint64_t)col_1 << 32) | col_0;
> +} else {
> +result = temp;
> +}
> +} else {
> +temp = AES_INVSHIFROWS_LO(RS1, RS2);
> +temp = (((uint64_t)AES_isbox[(temp >> 0) & 0xFF] << 0) |
> +((uint64_t)AES_isbox[(temp >> 8) & 0xFF] << 8) |
> +((uint64_t)AES_isbox[(temp >> 16) & 0xFF] << 16) |
> +((uint64_t)AES_isbox[(temp >> 24) & 0xFF] << 24) |
> +((uint64_t)AES_isbox[(temp >> 32) & 0xFF] << 32) |
> +((uint64_t)AES_isbox[(temp >> 40) & 0xFF] << 40) |
> +((uint64_t)AES_isbox[(temp >> 48) & 0xFF] << 48) |
> +((uint64_t)AES_isbox[(temp >> 56) & 0xFF] << 56));
> +if (mix) {
> +col_0 = temp & 0x;
> +col_1 = temp >> 32;
> +
> +col_0 = AES_INVMIXCOLUMN(col_0);
> +col_1 = AES_INVMIXCOLUMN(col_1);
> +
> +result = ((uint64_t)col_1 << 32) | col_0;
> +} else {
> +result = temp;
> +}
> +}
> +
> +return result;
> +}
> +
> +target_ulong HELPER(aes64esm)(target_ulong rs1, target_ulong rs2)
> +{
> +return aes64_operation(rs1, rs2, true, true);
> +}
> +
> +target_ulong HELPER(aes64es)(target_ulong rs1, target_ulong rs2)
> +{
> +return aes64_operation(rs1, rs2, true, false);
> +}
> +
> +target_ulong HELPER(aes64ds)(target_ulong rs1, 

[PATCH v8 07/14] target/riscv: rvk: add support for zkne/zknd extension in RV64

2022-03-01 Thread Weiwei Li
 - add aes64dsm, aes64ds, aes64im, aes64es, aes64esm, aes64ks2, aes64ks1i 
instructions

Co-authored-by: Ruibo Lu 
Co-authored-by: Zewen Ye 
Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Reviewed-by: Richard Henderson 
---
 target/riscv/crypto_helper.c| 169 
 target/riscv/helper.h   |   8 ++
 target/riscv/insn32.decode  |  12 ++
 target/riscv/insn_trans/trans_rvk.c.inc |  47 +++
 4 files changed, 236 insertions(+)

diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c
index 220d51c742..cb4783a1e9 100644
--- a/target/riscv/crypto_helper.c
+++ b/target/riscv/crypto_helper.c
@@ -102,4 +102,173 @@ target_ulong HELPER(aes32dsi)(target_ulong rs1, 
target_ulong rs2,
 {
 return aes32_operation(shamt, rs1, rs2, false, false);
 }
+
+#define BY(X, I) ((X >> (8 * I)) & 0xFF)
+
+#define AES_SHIFROWS_LO(RS1, RS2) ( \
+(((RS1 >> 24) & 0xFF) << 56) | (((RS2 >> 48) & 0xFF) << 48) | \
+(((RS2 >> 8) & 0xFF) << 40) | (((RS1 >> 32) & 0xFF) << 32) | \
+(((RS2 >> 56) & 0xFF) << 24) | (((RS2 >> 16) & 0xFF) << 16) | \
+(((RS1 >> 40) & 0xFF) << 8) | (((RS1 >> 0) & 0xFF) << 0))
+
+#define AES_INVSHIFROWS_LO(RS1, RS2) ( \
+(((RS2 >> 24) & 0xFF) << 56) | (((RS2 >> 48) & 0xFF) << 48) | \
+(((RS1 >> 8) & 0xFF) << 40) | (((RS1 >> 32) & 0xFF) << 32) | \
+(((RS1 >> 56) & 0xFF) << 24) | (((RS2 >> 16) & 0xFF) << 16) | \
+(((RS2 >> 40) & 0xFF) << 8) | (((RS1 >> 0) & 0xFF) << 0))
+
+#define AES_MIXBYTE(COL, B0, B1, B2, B3) ( \
+BY(COL, B3) ^ BY(COL, B2) ^ AES_GFMUL(BY(COL, B1), 3) ^ \
+AES_GFMUL(BY(COL, B0), 2))
+
+#define AES_MIXCOLUMN(COL) ( \
+AES_MIXBYTE(COL, 3, 0, 1, 2) << 24 | \
+AES_MIXBYTE(COL, 2, 3, 0, 1) << 16 | \
+AES_MIXBYTE(COL, 1, 2, 3, 0) << 8 | AES_MIXBYTE(COL, 0, 1, 2, 3) << 0)
+
+#define AES_INVMIXBYTE(COL, B0, B1, B2, B3) ( \
+AES_GFMUL(BY(COL, B3), 0x9) ^ AES_GFMUL(BY(COL, B2), 0xd) ^ \
+AES_GFMUL(BY(COL, B1), 0xb) ^ AES_GFMUL(BY(COL, B0), 0xe))
+
+#define AES_INVMIXCOLUMN(COL) ( \
+AES_INVMIXBYTE(COL, 3, 0, 1, 2) << 24 | \
+AES_INVMIXBYTE(COL, 2, 3, 0, 1) << 16 | \
+AES_INVMIXBYTE(COL, 1, 2, 3, 0) << 8 | \
+AES_INVMIXBYTE(COL, 0, 1, 2, 3) << 0)
+
+static inline target_ulong aes64_operation(target_ulong rs1, target_ulong rs2,
+   bool enc, bool mix)
+{
+uint64_t RS1 = rs1;
+uint64_t RS2 = rs2;
+uint64_t result;
+uint64_t temp;
+uint32_t col_0;
+uint32_t col_1;
+
+if (enc) {
+temp = AES_SHIFROWS_LO(RS1, RS2);
+temp = (((uint64_t)AES_sbox[(temp >> 0) & 0xFF] << 0) |
+((uint64_t)AES_sbox[(temp >> 8) & 0xFF] << 8) |
+((uint64_t)AES_sbox[(temp >> 16) & 0xFF] << 16) |
+((uint64_t)AES_sbox[(temp >> 24) & 0xFF] << 24) |
+((uint64_t)AES_sbox[(temp >> 32) & 0xFF] << 32) |
+((uint64_t)AES_sbox[(temp >> 40) & 0xFF] << 40) |
+((uint64_t)AES_sbox[(temp >> 48) & 0xFF] << 48) |
+((uint64_t)AES_sbox[(temp >> 56) & 0xFF] << 56));
+if (mix) {
+col_0 = temp & 0x;
+col_1 = temp >> 32;
+
+col_0 = AES_MIXCOLUMN(col_0);
+col_1 = AES_MIXCOLUMN(col_1);
+
+result = ((uint64_t)col_1 << 32) | col_0;
+} else {
+result = temp;
+}
+} else {
+temp = AES_INVSHIFROWS_LO(RS1, RS2);
+temp = (((uint64_t)AES_isbox[(temp >> 0) & 0xFF] << 0) |
+((uint64_t)AES_isbox[(temp >> 8) & 0xFF] << 8) |
+((uint64_t)AES_isbox[(temp >> 16) & 0xFF] << 16) |
+((uint64_t)AES_isbox[(temp >> 24) & 0xFF] << 24) |
+((uint64_t)AES_isbox[(temp >> 32) & 0xFF] << 32) |
+((uint64_t)AES_isbox[(temp >> 40) & 0xFF] << 40) |
+((uint64_t)AES_isbox[(temp >> 48) & 0xFF] << 48) |
+((uint64_t)AES_isbox[(temp >> 56) & 0xFF] << 56));
+if (mix) {
+col_0 = temp & 0x;
+col_1 = temp >> 32;
+
+col_0 = AES_INVMIXCOLUMN(col_0);
+col_1 = AES_INVMIXCOLUMN(col_1);
+
+result = ((uint64_t)col_1 << 32) | col_0;
+} else {
+result = temp;
+}
+}
+
+return result;
+}
+
+target_ulong HELPER(aes64esm)(target_ulong rs1, target_ulong rs2)
+{
+return aes64_operation(rs1, rs2, true, true);
+}
+
+target_ulong HELPER(aes64es)(target_ulong rs1, target_ulong rs2)
+{
+return aes64_operation(rs1, rs2, true, false);
+}
+
+target_ulong HELPER(aes64ds)(target_ulong rs1, target_ulong rs2)
+{
+return aes64_operation(rs1, rs2, false, false);
+}
+
+target_ulong HELPER(aes64dsm)(target_ulong rs1, target_ulong rs2)
+{
+return aes64_operation(rs1, rs2, false, true);
+}
+
+target_ulong HELPER(aes64ks2)(target_ulong rs1, target_ulong rs2)
+{
+uint64_t RS1 = rs1;
+uint64_t RS2 = rs2;
+uint32_t rs1_hi =