On 11/1/21 11:11 PM, liweiwei wrote:
Signed-off-by: liweiwei <liwei...@iscas.ac.cn>
Signed-off-by: wangjunqiang <wangjunqi...@iscas.ac.cn>
You managed to get the whole patch description into the subject line.
Please break it up.
+target_ulong HELPER(grev)(target_ulong rs1, target_ulong rs2)
+{
+ return do_grev(rs1, rs2, TARGET_LONG_BITS);
+}
Are we expecting to see the full grev instruction at any point? If not, we can certainly
implement Zbk with a simpler implementation.
+target_ulong HELPER(xperm)(target_ulong rs1, target_ulong rs2, uint32_t
sz_log2)
+{
+ target_ulong r = 0;
+ target_ulong sz = 1LL << sz_log2;
+ target_ulong mask = (1LL << sz) - 1;
+ for (int i = 0; i < TARGET_LONG_BITS; i += sz) {
+ target_ulong pos = ((rs2 >> i) & mask) << sz_log2;
+ if (pos < sizeof(target_ulong) * 8) {
+ r |= ((rs1 >> pos) & mask) << i;
+ }
+ }
+ return r;
+}
This could become a static inline do_xperm, and provide two specific xperm4 and xperm8
helpers; the compiler would fold all of the sz_log2 stuff into a more efficient
implementation.
+target_ulong HELPER(unshfl)(target_ulong rs1,
+ target_ulong rs2)
+{
+ target_ulong x = rs1;
+ int i, shift;
+ int bits = TARGET_LONG_BITS >> 1;
+ for (i = 0, shift = 1; shift < bits; i++, shift <<= 1) {
+ if (rs2 & shift) {
+ x = do_shuf_stage(x, shuf_masks[i], shuf_masks[i] >> shift, shift);
+ }
+ }
+ return x;
+}
+
+target_ulong HELPER(shfl)(target_ulong rs1,
+ target_ulong rs2)
+{
+ target_ulong x = rs1;
+ int i, shift;
+ shift = TARGET_LONG_BITS >> 2;
+ i = (shift == 8) ? 3 : 4;
+ for (; i >= 0; i--, shift >>= 1) {
+ if (rs2 & shift) {
+ x = do_shuf_stage(x, shuf_masks[i], shuf_masks[i] >> shift, shift);
+ }
+ }
+ return x;
+}
Similar comment as for grev.
+# The encoding for zext.h differs between RV32 and RV64.
+# zext_h_32 denotes the RV32 variant.
+{
+ zext_h_32 0000100 00000 ..... 100 ..... 0110011 @r2
+ pack 0000100 ..... ..... 100 ..... 0110011 @r
+}
Note to self: improve tcg_gen_deposit to notice zeros, so that the more general pack
compiles to zero-extension.
@@ -556,6 +563,81 @@ static bool gen_unary_per_ol(DisasContext *ctx, arg_r2 *a,
DisasExtend ext,
return gen_unary(ctx, a, ext, f_tl);
}
+static bool gen_xperm(DisasContext *ctx, arg_r *a, int32_t size)
+{
+ TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE);
+ TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE);
+
+ TCGv_i32 sz = tcg_const_i32(size);
+ gen_helper_xperm(dest, src1, src2, sz);
+
+ gen_set_gpr(ctx, a->rd, dest);
+ tcg_temp_free_i32(sz);
+ return true;
+}
+
+static bool gen_grevi(DisasContext *ctx, arg_r2 *a, int shamt)
+{
+ TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE);
+
+ if (shamt == (TARGET_LONG_BITS - 8)) {
+ /* rev8, byte swaps */
+ tcg_gen_bswap_tl(dest, src1);
+ } else {
+ TCGv src2 = tcg_temp_new();
+ tcg_gen_movi_tl(src2, shamt);
+ gen_helper_grev(dest, src1, src2);
+ tcg_temp_free(src2);
+ }
+
+ gen_set_gpr(ctx, a->rd, dest);
+ return true;
+}
+
+static void gen_pack(TCGv ret, TCGv src1, TCGv src2)
+{
+ tcg_gen_deposit_tl(ret, src1, src2,
+ TARGET_LONG_BITS / 2,
+ TARGET_LONG_BITS / 2);
+}
+
+static void gen_packh(TCGv ret, TCGv src1, TCGv src2)
+{
+ TCGv t = tcg_temp_new();
+ tcg_gen_ext8u_tl(t, src2);
+ tcg_gen_deposit_tl(ret, src1, t, 8, TARGET_LONG_BITS - 8);
+ tcg_temp_free(t);
+}
+
+static void gen_packw(TCGv ret, TCGv src1, TCGv src2)
+{
+ TCGv t = tcg_temp_new();
+ tcg_gen_ext16s_tl(t, src2);
+ tcg_gen_deposit_tl(ret, src1, t, 16, 48);
+ tcg_temp_free(t);
+}
+
+static bool gen_shufi(DisasContext *ctx, arg_r2 *a, int shamt,
+ void(*func)(TCGv, TCGv, TCGv))
+{
+ if (shamt >= TARGET_LONG_BITS / 2) {
+ return false;
+ }
+
+ TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE);
+ TCGv src2 = tcg_temp_new();
+
+ tcg_gen_movi_tl(src2, shamt);
+ (*func)(dest, src1, src2);
+
+ gen_set_gpr(ctx, a->rd, dest);
+ tcg_temp_free(src2);
+ return true;
+}
All of the gen functions belong in insn_trans/trans_rvb.c.inc.
r~