Re: [PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.
Hi Richard, On 4/17/24 00:50, Richard Henderson wrote: On 4/15/24 23:39, Chinmay Rath wrote: Moving the following instructions to decodetree specification : cmp{rb, eqb}, t{w, d} : X-form t{w, d}i : D-form isel : A-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath A faithful reorg of the existing code, so, Reviewed-by: Richard Henderson Thank you. Notes for improvement: +static bool trans_CMPRB(DisasContext *ctx, arg_CMPRB *a) +{ + TCGv_i32 src1 = tcg_temp_new_i32(); + TCGv_i32 src2 = tcg_temp_new_i32(); + TCGv_i32 src2lo = tcg_temp_new_i32(); + TCGv_i32 src2hi = tcg_temp_new_i32(); + TCGv_i32 crf = cpu_crf[a->bf]; + + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + tcg_gen_trunc_tl_i32(src1, cpu_gpr[a->ra]); + tcg_gen_trunc_tl_i32(src2, cpu_gpr[a->rb]); + + tcg_gen_andi_i32(src1, src1, 0xFF); + tcg_gen_ext8u_i32(src2lo, src2); + tcg_gen_shri_i32(src2, src2, 8); + tcg_gen_ext8u_i32(src2hi, src2); tcg_gen_extract_i32(src2hi, src2, 8, 8); + + tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1); + tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi); + tcg_gen_and_i32(crf, src2lo, src2hi); + + if (a->l) { + tcg_gen_shri_i32(src2, src2, 8); + tcg_gen_ext8u_i32(src2lo, src2); tcg_gen_extract_i32(src2lo, src2, 16, 8); + tcg_gen_shri_i32(src2, src2, 8); + tcg_gen_ext8u_i32(src2hi, src2); tcg_gen_extract_i32(src2hi, src2, 24, 8); Will update the above in v2. Will implement the below improvements for trap insns as a separate patch later. +/* + * Fixed-Point Trap Instructions + */ + +static bool trans_TW(DisasContext *ctx, arg_TW *a) +{ + TCGv_i32 t0; + + if (check_unconditional_trap(ctx, a->rt)) { + return true; + } + t0 = tcg_constant_i32(a->rt); + gen_helper_TW(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0); + return true; +} + +static bool trans_TWI(DisasContext *ctx, arg_TWI *a) +{ + TCGv t0; + TCGv_i32 t1; + + if (check_unconditional_trap(ctx, a->rt)) { + return true; + } + t0 = tcg_constant_tl(a->si); + t1 = tcg_constant_i32(a->rt); + gen_helper_TW(tcg_env, cpu_gpr[a->ra], t0, t1); + return true; +} + +static bool trans_TD(DisasContext *ctx, arg_TD *a) +{ + TCGv_i32 t0; + + REQUIRE_64BIT(ctx); + if (check_unconditional_trap(ctx, a->rt)) { + return true; + } + t0 = tcg_constant_i32(a->rt); + gen_helper_TD(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0); + return true; +} + +static bool trans_TDI(DisasContext *ctx, arg_TDI *a) +{ + TCGv t0; + TCGv_i32 t1; + + REQUIRE_64BIT(ctx); + if (check_unconditional_trap(ctx, a->rt)) { + return true; + } + t0 = tcg_constant_tl(a->si); + t1 = tcg_constant_i32(a->rt); + gen_helper_TD(tcg_env, cpu_gpr[a->ra], t0, t1); + return true; +} See target/sparc/translate.c, delay_exception, for a method of implementing compare-and-trap inline with no inline branch penalty. static void do_conditional_trap(DisasContext *ctx, unsigned to, TCGv a, TCGv b) { static const TCGCond ucond[8] = { TCG_COND_NEVER, TCG_COND_GTU, TCG_COND_LTU, TCG_COND_NE, TCG_COND_EQ, TCG_COND_GEU, TCG_COND_LEU, TCG_COND_ALWAYS, }; static const TCGCond scond[8] = { TCG_COND_NEVER, TCG_COND_EQ, TCG_COND_GT, TCG_COND_GE, TCG_COND_LT, TCG_COND_LE, TCG_COND_NE, TCG_COND_ALWAYS, }; TCGCond uc = ucond[to & 7]; TCGCond sc = scond[to >> 2]; /* There is overlap with EQ; we may not need both comparisons. */ if (!(to & 0x18)) { sc = TCG_COND_NEVER; } else if (!(to & 0x03)) { uc = TCG_COND_NEVER; } if (uc == TCG_COND_ALWAYS || sc == TCG_COND_ALWAYS) { unconditional trap; return true; } if (uc == TCG_COND_NEVER && sc == TCG_COND_NEVER) { return true; } e = delay_exception(ctx, POWERPC_EXCP_TRAP); if (uc != TCG_COND_NEVER) { tcg_gen_brcond_tl(uc, a, b, e->lab); } if (sc != TCG_COND_NEVER) { tcg_gen_brcond_tl(sc, a, b, e->lab); } return true; } bool trans_TW(...) { TCGv a = tcg_temp_new(); TCGv b = tcg_temp_new(); /* Note that consistent sign extensions work for unsigned comparisons. */ tcg_gen_exts_i32_tl(a, ra); tcg_gen_exts_i32_tl(b, rb); return do_conditional_trap(ctx, to, a, b); } etc. Thanks, Chinmay r~
Re: [PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.
On 4/15/24 23:39, Chinmay Rath wrote: Moving the following instructions to decodetree specification : cmp{rb, eqb}, t{w, d} : X-form t{w, d}i: D-form isel: A-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath A faithful reorg of the existing code, so, Reviewed-by: Richard Henderson Notes for improvement: +static bool trans_CMPRB(DisasContext *ctx, arg_CMPRB *a) +{ +TCGv_i32 src1 = tcg_temp_new_i32(); +TCGv_i32 src2 = tcg_temp_new_i32(); +TCGv_i32 src2lo = tcg_temp_new_i32(); +TCGv_i32 src2hi = tcg_temp_new_i32(); +TCGv_i32 crf = cpu_crf[a->bf]; + +REQUIRE_INSNS_FLAGS2(ctx, ISA300); +tcg_gen_trunc_tl_i32(src1, cpu_gpr[a->ra]); +tcg_gen_trunc_tl_i32(src2, cpu_gpr[a->rb]); + +tcg_gen_andi_i32(src1, src1, 0xFF); +tcg_gen_ext8u_i32(src2lo, src2); +tcg_gen_shri_i32(src2, src2, 8); +tcg_gen_ext8u_i32(src2hi, src2); tcg_gen_extract_i32(src2hi, src2, 8, 8); + +tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1); +tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi); +tcg_gen_and_i32(crf, src2lo, src2hi); + +if (a->l) { +tcg_gen_shri_i32(src2, src2, 8); +tcg_gen_ext8u_i32(src2lo, src2); tcg_gen_extract_i32(src2lo, src2, 16, 8); +tcg_gen_shri_i32(src2, src2, 8); +tcg_gen_ext8u_i32(src2hi, src2); tcg_gen_extract_i32(src2hi, src2, 24, 8); +/* + * Fixed-Point Trap Instructions + */ + +static bool trans_TW(DisasContext *ctx, arg_TW *a) +{ +TCGv_i32 t0; + +if (check_unconditional_trap(ctx, a->rt)) { +return true; +} +t0 = tcg_constant_i32(a->rt); +gen_helper_TW(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0); +return true; +} + +static bool trans_TWI(DisasContext *ctx, arg_TWI *a) +{ +TCGv t0; +TCGv_i32 t1; + +if (check_unconditional_trap(ctx, a->rt)) { +return true; +} +t0 = tcg_constant_tl(a->si); +t1 = tcg_constant_i32(a->rt); +gen_helper_TW(tcg_env, cpu_gpr[a->ra], t0, t1); +return true; +} + +static bool trans_TD(DisasContext *ctx, arg_TD *a) +{ +TCGv_i32 t0; + +REQUIRE_64BIT(ctx); +if (check_unconditional_trap(ctx, a->rt)) { +return true; +} +t0 = tcg_constant_i32(a->rt); +gen_helper_TD(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0); +return true; +} + +static bool trans_TDI(DisasContext *ctx, arg_TDI *a) +{ +TCGv t0; +TCGv_i32 t1; + +REQUIRE_64BIT(ctx); +if (check_unconditional_trap(ctx, a->rt)) { +return true; +} +t0 = tcg_constant_tl(a->si); +t1 = tcg_constant_i32(a->rt); +gen_helper_TD(tcg_env, cpu_gpr[a->ra], t0, t1); +return true; +} See target/sparc/translate.c, delay_exception, for a method of implementing compare-and-trap inline with no inline branch penalty. static void do_conditional_trap(DisasContext *ctx, unsigned to, TCGv a, TCGv b) { static const TCGCond ucond[8] = { TCG_COND_NEVER, TCG_COND_GTU, TCG_COND_LTU, TCG_COND_NE, TCG_COND_EQ,TCG_COND_GEU, TCG_COND_LEU, TCG_COND_ALWAYS, }; static const TCGCond scond[8] = { TCG_COND_NEVER, TCG_COND_EQ, TCG_COND_GT, TCG_COND_GE, TCG_COND_LT,TCG_COND_LE, TCG_COND_NE, TCG_COND_ALWAYS, }; TCGCond uc = ucond[to & 7]; TCGCond sc = scond[to >> 2]; /* There is overlap with EQ; we may not need both comparisons. */ if (!(to & 0x18)) { sc = TCG_COND_NEVER; } else if (!(to & 0x03)) { uc = TCG_COND_NEVER; } if (uc == TCG_COND_ALWAYS || sc == TCG_COND_ALWAYS) { unconditional trap; return true; } if (uc == TCG_COND_NEVER && sc == TCG_COND_NEVER) { return true; } e = delay_exception(ctx, POWERPC_EXCP_TRAP); if (uc != TCG_COND_NEVER) { tcg_gen_brcond_tl(uc, a, b, e->lab); } if (sc != TCG_COND_NEVER) { tcg_gen_brcond_tl(sc, a, b, e->lab); } return true; } bool trans_TW(...) { TCGv a = tcg_temp_new(); TCGv b = tcg_temp_new(); /* Note that consistent sign extensions work for unsigned comparisons. */ tcg_gen_exts_i32_tl(a, ra); tcg_gen_exts_i32_tl(b, rb); return do_conditional_trap(ctx, to, a, b); } etc. r~
[PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.
Moving the following instructions to decodetree specification : cmp{rb, eqb}, t{w, d} : X-form t{w, d}i: D-form isel: A-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h| 6 +- target/ppc/insn32.decode | 16 +++ target/ppc/excp_helper.c | 4 +- target/ppc/int_helper.c| 2 +- target/ppc/translate.c | 133 + target/ppc/translate/fixedpoint-impl.c.inc | 123 +++ 6 files changed, 148 insertions(+), 136 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index e862bdceaf..05f7ab5f6e 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -1,8 +1,8 @@ DEF_HELPER_FLAGS_3(raise_exception_err, TCG_CALL_NO_WG, noreturn, env, i32, i32) DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32) -DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32) +DEF_HELPER_FLAGS_4(TW, TCG_CALL_NO_WG, void, env, tl, tl, i32) #if defined(TARGET_PPC64) -DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32) +DEF_HELPER_FLAGS_4(TD, TCG_CALL_NO_WG, void, env, tl, tl, i32) #endif DEF_HELPER_4(HASHST, void, env, tl, tl, tl) DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl) @@ -67,7 +67,7 @@ DEF_HELPER_FLAGS_2(PEXTD, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl) #if defined(TARGET_PPC64) -DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl) +DEF_HELPER_FLAGS_2(CMPEQB, TCG_CALL_NO_RWG_SE, i32, tl, tl) DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_3(srad, tl, env, tl, tl) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 509961023b..80a7bb1872 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -23,6 +23,9 @@ &A_tb frt frb rc:bool @A_tb .. frt:5 . frb:5 . . rc:1 &A_tb +&A_tab_bc rt ra rb bc +@A_tab_bc .. rt:5 ra:5 rb:5 bc:5 . . &A_tab_bc + &D rt ra si:int64_t @D .. rt:5 ra:5 si:s16 &D @@ -331,6 +334,19 @@ CMP 01 ... - . . . 00 - @X_bfl CMPL01 ... - . . . 10 - @X_bfl CMPI001011 ... - . . @D_bfs CMPLI 001010 ... - . . @D_bfu +CMPRB 01 ... - . . . 001100 - @X_bfl +CMPEQB 01 ... -- . . 001110 - @X_bf + +### Fixed-Point Trap Instructions + +TW 01 . . . 000100 - @X +TD 01 . . . 0001000100 - @X +TWI 11 . . @D +TDI 10 . . @D + +### Fixed-Point Select Instruction + +ISEL01 . . . . 0 - @A_tab_bc ### Fixed-Point Arithmetic Instructions diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 674c05a2ce..79dd9b82cf 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -2750,7 +2750,7 @@ void helper_rfmci(CPUPPCState *env) } #endif /* !CONFIG_USER_ONLY */ -void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2, +void helper_TW(CPUPPCState *env, target_ulong arg1, target_ulong arg2, uint32_t flags) { if (!likely(!(((int32_t)arg1 < (int32_t)arg2 && (flags & 0x10)) || @@ -2764,7 +2764,7 @@ void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2, } #ifdef TARGET_PPC64 -void helper_td(CPUPPCState *env, target_ulong arg1, target_ulong arg2, +void helper_TD(CPUPPCState *env, target_ulong arg1, target_ulong arg2, uint32_t flags) { if (!likely(!(((int64_t)arg1 < (int64_t)arg2 && (flags & 0x10)) || diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 585c2b65d3..d12dcc28e1 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -159,7 +159,7 @@ uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) /* When you XOR the pattern and there is a match, that byte will be zero */ #define hasvalue(x, n) (haszero((x) ^ pattern(n))) -uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) +uint32_t helper_CMPEQB(target_ulong ra, target_ulong rb) { return hasvalue(rb, ra) ? CRF_GT : 0; } diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 8900da85e5..98e642b19a 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1564,66 +1564,6 @@ static inline void gen_set_Rc0(DisasContext *ctx, TCGv reg) }