With this conversion, we will be able to use the same helpers with sve. This also fixes a bug in which we failed to clear the high bits of the SVE register after an AdvSIMD operation.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/helper.h | 2 ++ target/arm/translate-a64.h | 2 ++ target/arm/crypto_helper.c | 11 ++++++++ target/arm/translate-a64.c | 53 ++++++++++++++++++-------------------- 4 files changed, 40 insertions(+), 28 deletions(-) diff --git a/target/arm/helper.h b/target/arm/helper.h index 6623b6689a..96cf4464be 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -537,6 +537,8 @@ DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h index 84c40377bd..f2250a8dd1 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/translate-a64.h @@ -141,4 +141,6 @@ void arm_gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, int64_t shift, uint32_t opr_sz, uint32_t max_sz); +extern const GVecGen3 rax1_op; + #endif /* TARGET_ARM_TRANSLATE_A64_H */ diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c index 6bd5a3d2d0..372d8350e4 100644 --- a/target/arm/crypto_helper.c +++ b/target/arm/crypto_helper.c @@ -725,3 +725,14 @@ void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) } clear_tail(vd, opr_sz, simd_maxsz(desc)); } + +void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; ++i) { + d[i] = n[i] ^ rol64(m[i], 1); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index b2adf3a39e..2eb4315b6d 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -14636,6 +14636,29 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) tcg_temp_free_ptr(tcg_rn_ptr); } +static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + tcg_gen_rotli_i64(d, m, 1); + tcg_gen_xor_i64(d, d, n); +} + +static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) +{ + tcg_gen_rotli_vec(vece, d, m, 1); + tcg_gen_xor_vec(vece, d, d, n); +} + +static const TCGOpcode rax1_opc[] = { INDEX_op_rotli_vec, 0 }; + +const GVecGen3 rax1_op = +{ + .fni8 = gen_rax1_i64, + .fniv = gen_rax1_vec, + .opt_opc = rax1_opc, + .fno = gen_helper_crypto_rax1, + .vece = MO_64, +}; + /* Crypto three-reg SHA512 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 * +-----------------------+------+---+---+-----+--------+------+------+ @@ -14670,7 +14693,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) break; case 3: /* RAX1 */ feature = dc_isar_feature(aa64_sha3, s); - genfn = NULL; + gvecop = &rax1_op; break; default: g_assert_not_reached(); @@ -14706,10 +14729,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) if (gvecop) { gen_gvec_op3(s, true, rd, rn, rm, gvecop); - return; - } - - if (genfn) { + } else { TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; tcg_rd_ptr = vec_full_reg_ptr(s, rd); @@ -14721,29 +14741,6 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) tcg_temp_free_ptr(tcg_rd_ptr); tcg_temp_free_ptr(tcg_rn_ptr); tcg_temp_free_ptr(tcg_rm_ptr); - } else { - TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; - int pass; - - tcg_op1 = tcg_temp_new_i64(); - tcg_op2 = tcg_temp_new_i64(); - tcg_res[0] = tcg_temp_new_i64(); - tcg_res[1] = tcg_temp_new_i64(); - - for (pass = 0; pass < 2; pass++) { - read_vec_element(s, tcg_op1, rn, pass, MO_64); - read_vec_element(s, tcg_op2, rm, pass, MO_64); - - tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1); - tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); - } - write_vec_element(s, tcg_res[0], rd, 0, MO_64); - write_vec_element(s, tcg_res[1], rd, 1, MO_64); - - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); - tcg_temp_free_i64(tcg_res[0]); - tcg_temp_free_i64(tcg_res[1]); } } -- 2.20.1