Re: [PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.

2024-04-19 Thread Chinmay Rath

Hi Richard,

On 4/17/24 00:50, Richard Henderson wrote:

On 4/15/24 23:39, Chinmay Rath wrote:

Moving the following instructions to decodetree specification :

cmp{rb, eqb}, t{w, d}    : X-form
t{w, d}i    : D-form
isel    : A-form

The changes were verified by validating that the tcg ops generated by 
those
instructions remain the same, which were captured using the '-d 
in_asm,op' flag.


Signed-off-by: Chinmay Rath 


A faithful reorg of the existing code, so,
Reviewed-by: Richard Henderson 


Thank you.

Notes for improvement:


+static bool trans_CMPRB(DisasContext *ctx, arg_CMPRB *a)
+{
+    TCGv_i32 src1 = tcg_temp_new_i32();
+    TCGv_i32 src2 = tcg_temp_new_i32();
+    TCGv_i32 src2lo = tcg_temp_new_i32();
+    TCGv_i32 src2hi = tcg_temp_new_i32();
+    TCGv_i32 crf = cpu_crf[a->bf];
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    tcg_gen_trunc_tl_i32(src1, cpu_gpr[a->ra]);
+    tcg_gen_trunc_tl_i32(src2, cpu_gpr[a->rb]);
+
+    tcg_gen_andi_i32(src1, src1, 0xFF);
+    tcg_gen_ext8u_i32(src2lo, src2);
+    tcg_gen_shri_i32(src2, src2, 8);
+    tcg_gen_ext8u_i32(src2hi, src2);


tcg_gen_extract_i32(src2hi, src2, 8, 8);


+
+    tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
+    tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
+    tcg_gen_and_i32(crf, src2lo, src2hi);
+
+    if (a->l) {
+    tcg_gen_shri_i32(src2, src2, 8);
+    tcg_gen_ext8u_i32(src2lo, src2);


tcg_gen_extract_i32(src2lo, src2, 16, 8);


+    tcg_gen_shri_i32(src2, src2, 8);
+    tcg_gen_ext8u_i32(src2hi, src2);


tcg_gen_extract_i32(src2hi, src2, 24, 8);


Will update the above in v2.

Will implement the below improvements for trap insns as a separate patch 
later.



+/*
+ * Fixed-Point Trap Instructions
+ */
+
+static bool trans_TW(DisasContext *ctx, arg_TW *a)
+{
+    TCGv_i32 t0;
+
+    if (check_unconditional_trap(ctx, a->rt)) {
+    return true;
+    }
+    t0 = tcg_constant_i32(a->rt);
+    gen_helper_TW(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0);
+    return true;
+}
+
+static bool trans_TWI(DisasContext *ctx, arg_TWI *a)
+{
+    TCGv t0;
+    TCGv_i32 t1;
+
+    if (check_unconditional_trap(ctx, a->rt)) {
+    return true;
+    }
+    t0 = tcg_constant_tl(a->si);
+    t1 = tcg_constant_i32(a->rt);
+    gen_helper_TW(tcg_env, cpu_gpr[a->ra], t0, t1);
+    return true;
+}
+
+static bool trans_TD(DisasContext *ctx, arg_TD *a)
+{
+    TCGv_i32 t0;
+
+    REQUIRE_64BIT(ctx);
+    if (check_unconditional_trap(ctx, a->rt)) {
+    return true;
+    }
+    t0 = tcg_constant_i32(a->rt);
+    gen_helper_TD(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0);
+    return true;
+}
+
+static bool trans_TDI(DisasContext *ctx, arg_TDI *a)
+{
+    TCGv t0;
+    TCGv_i32 t1;
+
+    REQUIRE_64BIT(ctx);
+    if (check_unconditional_trap(ctx, a->rt)) {
+    return true;
+    }
+    t0 = tcg_constant_tl(a->si);
+    t1 = tcg_constant_i32(a->rt);
+    gen_helper_TD(tcg_env, cpu_gpr[a->ra], t0, t1);
+    return true;
+}


See target/sparc/translate.c, delay_exception, for a method of 
implementing compare-and-trap inline with no inline branch penalty.


static void do_conditional_trap(DisasContext *ctx, unsigned to, TCGv 
a, TCGv b)

{
    static const TCGCond ucond[8] = {
    TCG_COND_NEVER, TCG_COND_GTU, TCG_COND_LTU, TCG_COND_NE,
    TCG_COND_EQ,    TCG_COND_GEU, TCG_COND_LEU, TCG_COND_ALWAYS,
    };
    static const TCGCond scond[8] = {
    TCG_COND_NEVER, TCG_COND_EQ,  TCG_COND_GT,  TCG_COND_GE,
    TCG_COND_LT,    TCG_COND_LE,  TCG_COND_NE, TCG_COND_ALWAYS,
    };

    TCGCond uc = ucond[to & 7];
    TCGCond sc = scond[to >> 2];

    /* There is overlap with EQ; we may not need both comparisons. */
    if (!(to & 0x18)) {
    sc = TCG_COND_NEVER;
    } else if (!(to & 0x03)) {
    uc = TCG_COND_NEVER;
    }

    if (uc == TCG_COND_ALWAYS || sc == TCG_COND_ALWAYS) {
    unconditional trap;
    return true;
    }
    if (uc == TCG_COND_NEVER && sc == TCG_COND_NEVER) {
    return true;
    }

    e = delay_exception(ctx, POWERPC_EXCP_TRAP);

    if (uc != TCG_COND_NEVER) {
    tcg_gen_brcond_tl(uc, a, b, e->lab);
    }
    if (sc != TCG_COND_NEVER) {
    tcg_gen_brcond_tl(sc, a, b, e->lab);
    }
    return true;
}

bool trans_TW(...)
{
    TCGv a = tcg_temp_new();
    TCGv b = tcg_temp_new();

    /* Note that consistent sign extensions work for unsigned 
comparisons. */

    tcg_gen_exts_i32_tl(a, ra);
    tcg_gen_exts_i32_tl(b, rb);
    return do_conditional_trap(ctx, to, a, b);
}

etc.



Thanks,
Chinmay

r~





Re: [PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.

2024-04-16 Thread Richard Henderson

On 4/15/24 23:39, Chinmay Rath wrote:

Moving the following instructions to decodetree specification :

cmp{rb, eqb}, t{w, d}   : X-form
t{w, d}i: D-form
isel: A-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 


A faithful reorg of the existing code, so,
Reviewed-by: Richard Henderson 

Notes for improvement:


+static bool trans_CMPRB(DisasContext *ctx, arg_CMPRB *a)
+{
+TCGv_i32 src1 = tcg_temp_new_i32();
+TCGv_i32 src2 = tcg_temp_new_i32();
+TCGv_i32 src2lo = tcg_temp_new_i32();
+TCGv_i32 src2hi = tcg_temp_new_i32();
+TCGv_i32 crf = cpu_crf[a->bf];
+
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+tcg_gen_trunc_tl_i32(src1, cpu_gpr[a->ra]);
+tcg_gen_trunc_tl_i32(src2, cpu_gpr[a->rb]);
+
+tcg_gen_andi_i32(src1, src1, 0xFF);
+tcg_gen_ext8u_i32(src2lo, src2);
+tcg_gen_shri_i32(src2, src2, 8);
+tcg_gen_ext8u_i32(src2hi, src2);


tcg_gen_extract_i32(src2hi, src2, 8, 8);


+
+tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
+tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
+tcg_gen_and_i32(crf, src2lo, src2hi);
+
+if (a->l) {
+tcg_gen_shri_i32(src2, src2, 8);
+tcg_gen_ext8u_i32(src2lo, src2);


tcg_gen_extract_i32(src2lo, src2, 16, 8);


+tcg_gen_shri_i32(src2, src2, 8);
+tcg_gen_ext8u_i32(src2hi, src2);


tcg_gen_extract_i32(src2hi, src2, 24, 8);


+/*
+ * Fixed-Point Trap Instructions
+ */
+
+static bool trans_TW(DisasContext *ctx, arg_TW *a)
+{
+TCGv_i32 t0;
+
+if (check_unconditional_trap(ctx, a->rt)) {
+return true;
+}
+t0 = tcg_constant_i32(a->rt);
+gen_helper_TW(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0);
+return true;
+}
+
+static bool trans_TWI(DisasContext *ctx, arg_TWI *a)
+{
+TCGv t0;
+TCGv_i32 t1;
+
+if (check_unconditional_trap(ctx, a->rt)) {
+return true;
+}
+t0 = tcg_constant_tl(a->si);
+t1 = tcg_constant_i32(a->rt);
+gen_helper_TW(tcg_env, cpu_gpr[a->ra], t0, t1);
+return true;
+}
+
+static bool trans_TD(DisasContext *ctx, arg_TD *a)
+{
+TCGv_i32 t0;
+
+REQUIRE_64BIT(ctx);
+if (check_unconditional_trap(ctx, a->rt)) {
+return true;
+}
+t0 = tcg_constant_i32(a->rt);
+gen_helper_TD(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0);
+return true;
+}
+
+static bool trans_TDI(DisasContext *ctx, arg_TDI *a)
+{
+TCGv t0;
+TCGv_i32 t1;
+
+REQUIRE_64BIT(ctx);
+if (check_unconditional_trap(ctx, a->rt)) {
+return true;
+}
+t0 = tcg_constant_tl(a->si);
+t1 = tcg_constant_i32(a->rt);
+gen_helper_TD(tcg_env, cpu_gpr[a->ra], t0, t1);
+return true;
+}


See target/sparc/translate.c, delay_exception, for a method of implementing 
compare-and-trap inline with no inline branch penalty.


static void do_conditional_trap(DisasContext *ctx, unsigned to, TCGv a, TCGv b)
{
static const TCGCond ucond[8] = {
TCG_COND_NEVER, TCG_COND_GTU, TCG_COND_LTU, TCG_COND_NE,
TCG_COND_EQ,TCG_COND_GEU, TCG_COND_LEU, TCG_COND_ALWAYS,
};
static const TCGCond scond[8] = {
TCG_COND_NEVER, TCG_COND_EQ,  TCG_COND_GT,  TCG_COND_GE,
TCG_COND_LT,TCG_COND_LE,  TCG_COND_NE,  TCG_COND_ALWAYS,
};

TCGCond uc = ucond[to & 7];
TCGCond sc = scond[to >> 2];

/* There is overlap with EQ; we may not need both comparisons. */
if (!(to & 0x18)) {
sc = TCG_COND_NEVER;
} else if (!(to & 0x03)) {
uc = TCG_COND_NEVER;
}

if (uc == TCG_COND_ALWAYS || sc == TCG_COND_ALWAYS) {
unconditional trap;
return true;
}
if (uc == TCG_COND_NEVER && sc == TCG_COND_NEVER) {
return true;
}

e = delay_exception(ctx, POWERPC_EXCP_TRAP);

if (uc != TCG_COND_NEVER) {
tcg_gen_brcond_tl(uc, a, b, e->lab);
}
if (sc != TCG_COND_NEVER) {
tcg_gen_brcond_tl(sc, a, b, e->lab);
}
return true;
}

bool trans_TW(...)
{
TCGv a = tcg_temp_new();
TCGv b = tcg_temp_new();

/* Note that consistent sign extensions work for unsigned comparisons. */
tcg_gen_exts_i32_tl(a, ra);
tcg_gen_exts_i32_tl(b, rb);
return do_conditional_trap(ctx, to, a, b);
}

etc.


r~



[PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.

2024-04-15 Thread Chinmay Rath
Moving the following instructions to decodetree specification :

cmp{rb, eqb}, t{w, d}   : X-form
t{w, d}i: D-form
isel: A-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h|   6 +-
 target/ppc/insn32.decode   |  16 +++
 target/ppc/excp_helper.c   |   4 +-
 target/ppc/int_helper.c|   2 +-
 target/ppc/translate.c | 133 +
 target/ppc/translate/fixedpoint-impl.c.inc | 123 +++
 6 files changed, 148 insertions(+), 136 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index e862bdceaf..05f7ab5f6e 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -1,8 +1,8 @@
 DEF_HELPER_FLAGS_3(raise_exception_err, TCG_CALL_NO_WG, noreturn, env, i32, 
i32)
 DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32)
-DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(TW, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #if defined(TARGET_PPC64)
-DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(TD, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #endif
 DEF_HELPER_4(HASHST, void, env, tl, tl, tl)
 DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl)
@@ -67,7 +67,7 @@ DEF_HELPER_FLAGS_2(PEXTD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl)
 #if defined(TARGET_PPC64)
-DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
+DEF_HELPER_FLAGS_2(CMPEQB, TCG_CALL_NO_RWG_SE, i32, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 509961023b..80a7bb1872 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -23,6 +23,9 @@
 &A_tb   frt frb rc:bool
 @A_tb   .. frt:5 . frb:5 . . rc:1   &A_tb
 
+&A_tab_bc   rt ra rb bc
+@A_tab_bc   .. rt:5 ra:5 rb:5 bc:5 . .  &A_tab_bc
+
 &D  rt ra si:int64_t
 @D  .. rt:5 ra:5 si:s16 &D
 
@@ -331,6 +334,19 @@ CMP 01 ... - . . . 00 - 
@X_bfl
 CMPL01 ... - . . . 10 - @X_bfl
 CMPI001011 ... - . .    @D_bfs
 CMPLI   001010 ... - . .    @D_bfu
+CMPRB   01 ... - . . . 001100 - @X_bfl
+CMPEQB  01 ... -- . . 001110 -  @X_bf
+
+### Fixed-Point Trap Instructions
+
+TW  01 . . . 000100 -   @X
+TD  01 . . . 0001000100 -   @X
+TWI 11 . .  @D
+TDI 10 . .  @D
+
+### Fixed-Point Select Instruction
+
+ISEL01 . . . . 0 -  @A_tab_bc
 
 ### Fixed-Point Arithmetic Instructions
 
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 674c05a2ce..79dd9b82cf 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2750,7 +2750,7 @@ void helper_rfmci(CPUPPCState *env)
 }
 #endif /* !CONFIG_USER_ONLY */
 
-void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
+void helper_TW(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
uint32_t flags)
 {
 if (!likely(!(((int32_t)arg1 < (int32_t)arg2 && (flags & 0x10)) ||
@@ -2764,7 +2764,7 @@ void helper_tw(CPUPPCState *env, target_ulong arg1, 
target_ulong arg2,
 }
 
 #ifdef TARGET_PPC64
-void helper_td(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
+void helper_TD(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
uint32_t flags)
 {
 if (!likely(!(((int64_t)arg1 < (int64_t)arg2 && (flags & 0x10)) ||
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 585c2b65d3..d12dcc28e1 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -159,7 +159,7 @@ uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, 
uint64_t rbu, uint32_t oe)
 /* When you XOR the pattern and there is a match, that byte will be zero */
 #define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
 
-uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
+uint32_t helper_CMPEQB(target_ulong ra, target_ulong rb)
 {
 return hasvalue(rb, ra) ? CRF_GT : 0;
 }
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 8900da85e5..98e642b19a 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1564,66 +1564,6 @@ static inline void gen_set_Rc0(DisasContext *ctx, TCGv 
reg)
 }