Re: [PATCH v2 3/6] tcg/ppc: Hoist common argument loads in tcg_out_op()

2021-01-14 Thread Philippe Mathieu-Daudé
On 1/14/21 1:28 AM, Richard Henderson wrote:
> 
> Let's just drop the hoisting parts and only do the signature parts for now.
> I'd rather think of a way to split up this large function than waste time
> optimizing it.

Agreed :) Thanks!



Re: [PATCH v2 3/6] tcg/ppc: Hoist common argument loads in tcg_out_op()

2021-01-13 Thread Richard Henderson
On 1/13/21 7:24 AM, Philippe Mathieu-Daudé wrote:
>  case INDEX_op_ld8s_i32:
>  case INDEX_op_ld8s_i64:
> -tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
> +tcg_out_mem_long(s, LBZ, LBZX, a0, a1, a2);
>  tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));

Missed replacements.

> -a0 = args[0], a1 = args[1], a2 = args[2];
>  if (const_args[2]) {

Missed replacement.

>  do_addi_32:
>  tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
> @@ -2475,7 +2481,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, 
> const TCGArg *args,
>  }
>  break;
>  case INDEX_op_sub_i32:
> -a0 = args[0], a1 = args[1], a2 = args[2];
>  if (const_args[1]) {
>  if (const_args[2]) {

And again.

Let's just drop the hoisting parts and only do the signature parts for now.
I'd rather think of a way to split up this large function than waste time
optimizing it.


r~



[PATCH v2 3/6] tcg/ppc: Hoist common argument loads in tcg_out_op()

2021-01-13 Thread Philippe Mathieu-Daudé
Signed-off-by: Philippe Mathieu-Daudé 
---
 tcg/ppc/tcg-target.c.inc | 188 ++-
 1 file changed, 85 insertions(+), 103 deletions(-)

diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 19a4a12f155..70b747a8a30 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -2357,15 +2357,22 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 tcg_out32(s, BCLR | BO_ALWAYS);
 }
 
-static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
-   const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+   const TCGArg args[TCG_MAX_OP_ARGS],
+   const int const_args[TCG_MAX_OP_ARGS])
 {
 TCGArg a0, a1, a2;
-int c;
+int c, c2;
+
+/* Hoist the loads of the most common arguments.  */
+a0 = args[0];
+a1 = args[1];
+a2 = args[2];
+c2 = const_args[2];
 
 switch (opc) {
 case INDEX_op_exit_tb:
-tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
+tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, a0);
 tcg_out_b(s, 0, tcg_code_gen_epilogue);
 break;
 case INDEX_op_goto_tb:
@@ -2389,11 +2396,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, 
const TCGArg *args,
 /* Indirect jump. */
 tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
-   (intptr_t)(s->tb_jmp_insn_offset + args[0]));
+   (intptr_t)(s->tb_jmp_insn_offset + a0));
 }
 tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
 tcg_out32(s, BCCTR | BO_ALWAYS);
-set_jmp_reset_offset(s, args[0]);
+set_jmp_reset_offset(s, a0);
 if (USE_REG_TB) {
 /* For the unlinked case, need to reset TCG_REG_TB.  */
 tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
@@ -2403,7 +2410,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, 
const TCGArg *args,
 case INDEX_op_goto_ptr:
 tcg_out32(s, MTSPR | RS(args[0]) | CTR);
 if (USE_REG_TB) {
-tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
+tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
 }
 tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
 tcg_out32(s, BCCTR | BO_ALWAYS);
@@ -2424,49 +2431,48 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, 
const TCGArg *args,
 break;
 case INDEX_op_ld8u_i32:
 case INDEX_op_ld8u_i64:
-tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
+tcg_out_mem_long(s, LBZ, LBZX, a0, a1, a2);
 break;
 case INDEX_op_ld8s_i32:
 case INDEX_op_ld8s_i64:
-tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
+tcg_out_mem_long(s, LBZ, LBZX, a0, a1, a2);
 tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
 break;
 case INDEX_op_ld16u_i32:
 case INDEX_op_ld16u_i64:
-tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
+tcg_out_mem_long(s, LHZ, LHZX, a0, a1, a2);
 break;
 case INDEX_op_ld16s_i32:
 case INDEX_op_ld16s_i64:
-tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
+tcg_out_mem_long(s, LHA, LHAX, a0, a1, a2);
 break;
 case INDEX_op_ld_i32:
 case INDEX_op_ld32u_i64:
-tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
+tcg_out_mem_long(s, LWZ, LWZX, a0, a1, a2);
 break;
 case INDEX_op_ld32s_i64:
-tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
+tcg_out_mem_long(s, LWA, LWAX, a0, a1, a2);
 break;
 case INDEX_op_ld_i64:
-tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
+tcg_out_mem_long(s, LD, LDX, a0, a1, a2);
 break;
 case INDEX_op_st8_i32:
 case INDEX_op_st8_i64:
-tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
+tcg_out_mem_long(s, STB, STBX, a0, a1, a2);
 break;
 case INDEX_op_st16_i32:
 case INDEX_op_st16_i64:
-tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
+tcg_out_mem_long(s, STH, STHX, a0, a1, a2);
 break;
 case INDEX_op_st_i32:
 case INDEX_op_st32_i64:
-tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
+tcg_out_mem_long(s, STW, STWX, a0, a1, a2);
 break;
 case INDEX_op_st_i64:
-tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
+tcg_out_mem_long(s, STD, STDX, a0, a1, a2);
 break;
 
 case INDEX_op_add_i32:
-a0 = args[0], a1 = args[1], a2 = args[2];
 if (const_args[2]) {
 do_addi_32:
 tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
@@ -2475,7 +2481,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, 
const TCGArg *args,
 }
 break;
 case INDEX_op_sub_i32:
-a0 = args[0], a1 = args[1], a2 = args[2];