Re: [PATCH for-9.1 04/19] target/i386: do not use s->tmp0 and s->tmp4 to compute flags

2024-04-10 Thread Paolo Bonzini
Il mer 10 apr 2024, 08:35 Richard Henderson 
ha scritto:

> On 4/9/24 06:43, Paolo Bonzini wrote:
> > Create a new temporary whenever flags have to use one, instead of using
> > s->tmp0 or s->tmp4.  NULL can now be passed as the scratch register
> > to gen_prepare_*.
> >
> > Signed-off-by: Paolo Bonzini 
> > ---
> >   target/i386/tcg/translate.c | 54 +
> >   1 file changed, 31 insertions(+), 23 deletions(-)
> >
> > diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
> > index 197cccb6c96..debc1b27283 100644
> > --- a/target/i386/tcg/translate.c
> > +++ b/target/i386/tcg/translate.c
> > @@ -947,9 +947,9 @@ static CCPrepare gen_prepare_eflags_c(DisasContext
> *s, TCGv reg)
> >   case CC_OP_SUBB ... CC_OP_SUBQ:
> >   /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
> >   size = s->cc_op - CC_OP_SUBB;
> > -t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
> > -/* If no temporary was used, be careful not to alias t1 and
> t0.  */
> > -t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
> > +/* Be careful not to alias t1 and t0.  */
> > +t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
> > +t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
> >   tcg_gen_mov_tl(t0, s->cc_srcT);
> >   gen_extu(size, t0);
>
> The tcg_temp_new, mov, and extu can be had with gen_ext_tl...
>

There's actually a lot more that can be done now that I looked more closely
at gen_ext_tl. It is fine (modulo bugs elsewhere) to just extend cc_* in
place. In fact this lets the optimizer work better, even allows (rare)
cross tb optimization because it effectively bumps CC_OP_ADD* to
target_long size, and is just as effective in removing tmp0/tmp4.

Paolo


> >   goto add_sub;
> > @@ -957,8 +957,9 @@ static CCPrepare gen_prepare_eflags_c(DisasContext
> *s, TCGv reg)
> >   case CC_OP_ADDB ... CC_OP_ADDQ:
> >   /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
> >   size = s->cc_op - CC_OP_ADDB;
> > -t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
> > -t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
> > +/* Be careful not to alias t1 and t0.  */
> > +t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
> > +t0 = gen_ext_tl(reg == t1 ? NULL : reg, cpu_cc_dst, size,
> false);
>
> ... like this.
>
> It would be helpful to update the function comments (nothing is 'compute
> ... to reg' in
> these functions).  Future cleanup, perhaps rename 'reg' to 'scratch', or
> remove the
> argument entirely where applicable.
>
> > @@ -1109,11 +1113,13 @@ static CCPrepare gen_prepare_cc(DisasContext *s,
> int b, TCGv reg)
> >   size = s->cc_op - CC_OP_SUBB;
> >   switch (jcc_op) {
> >   case JCC_BE:
> > -tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
> > -gen_extu(size, s->tmp4);
> > -t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
> > -cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
> > -   .reg2 = t0, .use_reg2 = true };
> > +/* Be careful not to alias t1 and t0.  */
> > +t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
> > +t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
> > +tcg_gen_mov_tl(t0, s->cc_srcT);
> > +gen_extu(size, t0);
>
> gen_ext_tl
>
> > +cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = t0,
> > +   .reg2 = t1, .use_reg2 = true };
> >   break;
> >
> >   case JCC_L:
> > @@ -1122,11 +1128,13 @@ static CCPrepare gen_prepare_cc(DisasContext *s,
> int b, TCGv reg)
> >   case JCC_LE:
> >   cond = TCG_COND_LE;
> >   fast_jcc_l:
> > -tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
> > -gen_exts(size, s->tmp4);
> > -t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
> > -cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
> > -   .reg2 = t0, .use_reg2 = true };
> > +/* Be careful not to alias t1 and t0.  */
> > +t1 = gen_ext_tl(NULL, cpu_cc_src, size, true);
> > +t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
> > +tcg_gen_mov_tl(t0, s->cc_srcT);
> > +gen_exts(size, t0);
>
> gen_ext_tl
>
> With that,
> Reviewed-by: Richard Henderson 
>
>
> r~
>
>


Re: [PATCH for-9.1 04/19] target/i386: do not use s->tmp0 and s->tmp4 to compute flags

2024-04-10 Thread Richard Henderson

On 4/9/24 06:43, Paolo Bonzini wrote:

Create a new temporary whenever flags have to use one, instead of using
s->tmp0 or s->tmp4.  NULL can now be passed as the scratch register
to gen_prepare_*.

Signed-off-by: Paolo Bonzini 
---
  target/i386/tcg/translate.c | 54 +
  1 file changed, 31 insertions(+), 23 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 197cccb6c96..debc1b27283 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -947,9 +947,9 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv 
reg)
  case CC_OP_SUBB ... CC_OP_SUBQ:
  /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
  size = s->cc_op - CC_OP_SUBB;
-t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
-/* If no temporary was used, be careful not to alias t1 and t0.  */
-t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
+/* Be careful not to alias t1 and t0.  */
+t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
+t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
  tcg_gen_mov_tl(t0, s->cc_srcT);
  gen_extu(size, t0);


The tcg_temp_new, mov, and extu can be had with gen_ext_tl...


  goto add_sub;
@@ -957,8 +957,9 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv 
reg)
  case CC_OP_ADDB ... CC_OP_ADDQ:
  /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
  size = s->cc_op - CC_OP_ADDB;
-t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
-t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
+/* Be careful not to alias t1 and t0.  */
+t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
+t0 = gen_ext_tl(reg == t1 ? NULL : reg, cpu_cc_dst, size, false);


... like this.

It would be helpful to update the function comments (nothing is 'compute ... to reg' in 
these functions).  Future cleanup, perhaps rename 'reg' to 'scratch', or remove the 
argument entirely where applicable.



@@ -1109,11 +1113,13 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, 
TCGv reg)
  size = s->cc_op - CC_OP_SUBB;
  switch (jcc_op) {
  case JCC_BE:
-tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
-gen_extu(size, s->tmp4);
-t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
-cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
-   .reg2 = t0, .use_reg2 = true };
+/* Be careful not to alias t1 and t0.  */
+t1 = gen_ext_tl(NULL, cpu_cc_src, size, false);
+t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
+tcg_gen_mov_tl(t0, s->cc_srcT);
+gen_extu(size, t0);


gen_ext_tl


+cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = t0,
+   .reg2 = t1, .use_reg2 = true };
  break;
  
  case JCC_L:

@@ -1122,11 +1128,13 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, 
TCGv reg)
  case JCC_LE:
  cond = TCG_COND_LE;
  fast_jcc_l:
-tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
-gen_exts(size, s->tmp4);
-t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
-cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
-   .reg2 = t0, .use_reg2 = true };
+/* Be careful not to alias t1 and t0.  */
+t1 = gen_ext_tl(NULL, cpu_cc_src, size, true);
+t0 = (reg == t1 || !reg) ? tcg_temp_new() : reg;
+tcg_gen_mov_tl(t0, s->cc_srcT);
+gen_exts(size, t0);


gen_ext_tl

With that,
Reviewed-by: Richard Henderson 


r~