Re: [PATCH v4 099/163] tcg: Convert extrl_i64_i32 to TCGOutOpUnary

2025-04-15 Thread Pierrick Bouvier

On 4/15/25 12:24, Richard Henderson wrote:

Drop the cast from TCGv_i64 to TCGv_i32 in tcg_gen_extrl_i64_i32
an emit extrl_i64_i32 unconditionally.  Move that special case
to tcg_gen_code when we find out if the output is live or dead.
In this way even hosts that canonicalize truncations can make
use of a store directly from the 64-bit host register.

Signed-off-by: Richard Henderson 
---
  tcg/tcg-op.c |  4 +---
  tcg/tcg.c| 35 +++-
  tcg/aarch64/tcg-target.c.inc |  1 -
  tcg/i386/tcg-target.c.inc|  4 
  tcg/loongarch64/tcg-target.c.inc |  2 --
  tcg/mips/tcg-target.c.inc|  2 --
  tcg/ppc/tcg-target.c.inc |  1 -
  tcg/riscv/tcg-target.c.inc   |  2 --
  tcg/s390x/tcg-target.c.inc   |  1 -
  tcg/tci/tcg-target.c.inc |  1 -
  10 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index d3f3c9d248..7ecd1f6c8f 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2962,11 +2962,9 @@ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
  {
  if (TCG_TARGET_REG_BITS == 32) {
  tcg_gen_mov_i32(ret, TCGV_LOW(arg));
-} else if (TCG_TARGET_HAS_extr_i64_i32) {
+} else {
  tcg_gen_op2(INDEX_op_extrl_i64_i32, TCG_TYPE_I32,
  tcgv_i32_arg(ret), tcgv_i64_arg(arg));
-} else {
-tcg_gen_mov_i32(ret, (TCGv_i32)arg);
  }
  }
  
diff --git a/tcg/tcg.c b/tcg/tcg.c

index b6c1efa828..84083d133d 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1093,6 +1093,16 @@ static const TCGOutOpUnary outop_extu_i32_i64 = {
  .base.static_constraint = C_O1_I1(r, r),
  .out_rr = tgen_extu_i32_i64,
  };
+
+static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
+{
+tcg_out_extrl_i64_i32(s, a0, a1);
+}
+
+static const TCGOutOpUnary outop_extrl_i64_i32 = {
+.base.static_constraint = C_O1_I1(r, r),
+.out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL,
+};
  #endif
  
  /*

@@ -1151,6 +1161,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = {
  OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64),
  OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64),
  OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64),
+OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32),
  #endif
  };
  
@@ -2400,12 +2411,12 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)

  case INDEX_op_st_i64:
  case INDEX_op_ext_i32_i64:
  case INDEX_op_extu_i32_i64:
+case INDEX_op_extrl_i64_i32:
  case INDEX_op_deposit_i64:
  return TCG_TARGET_REG_BITS == 64;
  
  case INDEX_op_extract2_i64:

  return TCG_TARGET_HAS_extract2_i64;
-case INDEX_op_extrl_i64_i32:
  case INDEX_op_extrh_i64_i32:
  return TCG_TARGET_HAS_extr_i64_i32;
  case INDEX_op_add2_i64:
@@ -5438,10 +5449,6 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
  /* emit instruction */
  TCGType type = TCGOP_TYPE(op);
  switch (op->opc) {
-case INDEX_op_extrl_i64_i32:
-tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
-break;
-
  case INDEX_op_add:
  case INDEX_op_and:
  case INDEX_op_andc:
@@ -5499,6 +5506,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
  case INDEX_op_bswap64:
  case INDEX_op_ext_i32_i64:
  case INDEX_op_extu_i32_i64:
+case INDEX_op_extrl_i64_i32:
  assert(TCG_TARGET_REG_BITS == 64);
  /* fall through */
  case INDEX_op_ctpop:
@@ -6657,6 +6665,22 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, 
uint64_t pc_start)
  TCGOpcode opc = op->opc;
  
  switch (opc) {

+case INDEX_op_extrl_i64_i32:
+assert(TCG_TARGET_REG_BITS == 64);
+/*
+ * If TCG_TYPE_I32 is represented in some canonical form,
+ * e.g. zero or sign-extended, then emit as a unary op.
+ * Otherwise we can treat this as a plain move.
+ * If the output dies, treat this as a plain move, because
+ * this will be implemented with a store.
+ */
+if (TCG_TARGET_HAS_extr_i64_i32) {
+TCGLifeData arg_life = op->life;
+if (!IS_DEAD_ARG(0)) {
+goto do_default;
+}
+}
+/* fall through */
  case INDEX_op_mov:
  case INDEX_op_mov_vec:
  tcg_reg_alloc_mov(s, op);
@@ -6699,6 +6723,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, 
uint64_t pc_start)
  }
  /* fall through */
  default:
+do_default:
  /* Sanity check that we've not introduced any unhandled opcodes. 
*/
  tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
TCGOP_FLAGS(op)));
diff --git a/tcg/aarch64/tcg-target.c.i

[PATCH v4 099/163] tcg: Convert extrl_i64_i32 to TCGOutOpUnary

2025-04-15 Thread Richard Henderson
Drop the cast from TCGv_i64 to TCGv_i32 in tcg_gen_extrl_i64_i32
an emit extrl_i64_i32 unconditionally.  Move that special case
to tcg_gen_code when we find out if the output is live or dead.
In this way even hosts that canonicalize truncations can make
use of a store directly from the 64-bit host register.

Signed-off-by: Richard Henderson 
---
 tcg/tcg-op.c |  4 +---
 tcg/tcg.c| 35 +++-
 tcg/aarch64/tcg-target.c.inc |  1 -
 tcg/i386/tcg-target.c.inc|  4 
 tcg/loongarch64/tcg-target.c.inc |  2 --
 tcg/mips/tcg-target.c.inc|  2 --
 tcg/ppc/tcg-target.c.inc |  1 -
 tcg/riscv/tcg-target.c.inc   |  2 --
 tcg/s390x/tcg-target.c.inc   |  1 -
 tcg/tci/tcg-target.c.inc |  1 -
 10 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index d3f3c9d248..7ecd1f6c8f 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2962,11 +2962,9 @@ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
 {
 if (TCG_TARGET_REG_BITS == 32) {
 tcg_gen_mov_i32(ret, TCGV_LOW(arg));
-} else if (TCG_TARGET_HAS_extr_i64_i32) {
+} else {
 tcg_gen_op2(INDEX_op_extrl_i64_i32, TCG_TYPE_I32,
 tcgv_i32_arg(ret), tcgv_i64_arg(arg));
-} else {
-tcg_gen_mov_i32(ret, (TCGv_i32)arg);
 }
 }
 
diff --git a/tcg/tcg.c b/tcg/tcg.c
index b6c1efa828..84083d133d 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1093,6 +1093,16 @@ static const TCGOutOpUnary outop_extu_i32_i64 = {
 .base.static_constraint = C_O1_I1(r, r),
 .out_rr = tgen_extu_i32_i64,
 };
+
+static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
+{
+tcg_out_extrl_i64_i32(s, a0, a1);
+}
+
+static const TCGOutOpUnary outop_extrl_i64_i32 = {
+.base.static_constraint = C_O1_I1(r, r),
+.out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL,
+};
 #endif
 
 /*
@@ -1151,6 +1161,7 @@ static const TCGOutOp * const all_outop[NB_OPS] = {
 OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64),
 OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64),
 OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64),
+OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32),
 #endif
 };
 
@@ -2400,12 +2411,12 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, 
unsigned flags)
 case INDEX_op_st_i64:
 case INDEX_op_ext_i32_i64:
 case INDEX_op_extu_i32_i64:
+case INDEX_op_extrl_i64_i32:
 case INDEX_op_deposit_i64:
 return TCG_TARGET_REG_BITS == 64;
 
 case INDEX_op_extract2_i64:
 return TCG_TARGET_HAS_extract2_i64;
-case INDEX_op_extrl_i64_i32:
 case INDEX_op_extrh_i64_i32:
 return TCG_TARGET_HAS_extr_i64_i32;
 case INDEX_op_add2_i64:
@@ -5438,10 +5449,6 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
 /* emit instruction */
 TCGType type = TCGOP_TYPE(op);
 switch (op->opc) {
-case INDEX_op_extrl_i64_i32:
-tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
-break;
-
 case INDEX_op_add:
 case INDEX_op_and:
 case INDEX_op_andc:
@@ -5499,6 +5506,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
 case INDEX_op_bswap64:
 case INDEX_op_ext_i32_i64:
 case INDEX_op_extu_i32_i64:
+case INDEX_op_extrl_i64_i32:
 assert(TCG_TARGET_REG_BITS == 64);
 /* fall through */
 case INDEX_op_ctpop:
@@ -6657,6 +6665,22 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, 
uint64_t pc_start)
 TCGOpcode opc = op->opc;
 
 switch (opc) {
+case INDEX_op_extrl_i64_i32:
+assert(TCG_TARGET_REG_BITS == 64);
+/*
+ * If TCG_TYPE_I32 is represented in some canonical form,
+ * e.g. zero or sign-extended, then emit as a unary op.
+ * Otherwise we can treat this as a plain move.
+ * If the output dies, treat this as a plain move, because
+ * this will be implemented with a store.
+ */
+if (TCG_TARGET_HAS_extr_i64_i32) {
+TCGLifeData arg_life = op->life;
+if (!IS_DEAD_ARG(0)) {
+goto do_default;
+}
+}
+/* fall through */
 case INDEX_op_mov:
 case INDEX_op_mov_vec:
 tcg_reg_alloc_mov(s, op);
@@ -6699,6 +6723,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, 
uint64_t pc_start)
 }
 /* fall through */
 default:
+do_default:
 /* Sanity check that we've not introduced any unhandled opcodes. */
 tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
   TCGOP_FLAGS(op)));
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 44314f6a0f..8abc5f26da 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/a