On Sun, Sep 28, 2025 at 7:33 PM Richard Henderson <[email protected]> wrote: > > The expansions that we chose in tcg-op.c may be less than optimial. > Delay lowering until optimize, so that we have propagated constants > and have computed known zero/one masks. > > Signed-off-by: Richard Henderson <[email protected]> > ---
Reviewed-by: Manos Pitsidianakis <[email protected]> > tcg/optimize.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++---- > tcg/tcg-op.c | 9 ++------ > 2 files changed, 60 insertions(+), 12 deletions(-) > > diff --git a/tcg/optimize.c b/tcg/optimize.c > index 5df57049c2..47fbcd73e3 100644 > --- a/tcg/optimize.c > +++ b/tcg/optimize.c > @@ -2161,21 +2161,74 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op) > uint64_t z2 = t2->z_mask; > uint64_t o1 = t1->o_mask; > uint64_t o2 = t2->o_mask; > + uint64_t zr, or; > int shr = op->args[3]; > + int shl; > > if (ctx->type == TCG_TYPE_I32) { > z1 = (uint32_t)z1 >> shr; > o1 = (uint32_t)o1 >> shr; > - z2 = (uint64_t)((int32_t)z2 << (32 - shr)); > - o2 = (uint64_t)((int32_t)o2 << (32 - shr)); > + shl = 32 - shr; > + z2 = (uint64_t)((int32_t)z2 << shl); > + o2 = (uint64_t)((int32_t)o2 << shl); > } else { > z1 >>= shr; > o1 >>= shr; > - z2 <<= 64 - shr; > - o2 <<= 64 - shr; > + shl = 64 - shr; > + z2 <<= shl; > + o2 <<= shl; > + } > + zr = z1 | z2; > + or = o1 | o2; > + > + if (zr == or) { > + return tcg_opt_gen_movi(ctx, op, op->args[0], zr); > } > > - return fold_masks_zo(ctx, op, z1 | z2, o1 | o2); > + if (z2 == 0) { > + /* High part zeros folds to simple right shift. */ > + op->opc = INDEX_op_shr; > + op->args[2] = arg_new_constant(ctx, shr); > + } else if (z1 == 0) { > + /* Low part zeros folds to simple left shift. */ > + op->opc = INDEX_op_shl; > + op->args[1] = op->args[2]; > + op->args[2] = arg_new_constant(ctx, shl); > + } else if (!tcg_op_supported(INDEX_op_extract2, ctx->type, 0)) { > + TCGArg tmp = arg_new_temp(ctx); > + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3); > + > + op2->args[0] = tmp; > + op2->args[1] = op->args[1]; > + op2->args[2] = arg_new_constant(ctx, shr); > + > + if (TCG_TARGET_deposit_valid(ctx->type, shl, shr)) { > + /* > + * Deposit has more arguments than extract2, > + * so we need to create a new TCGOp. > + */ > + op2 = opt_insert_before(ctx, op, INDEX_op_deposit, 5); > + op2->args[0] = op->args[0]; > + op2->args[1] = tmp; > + op2->args[2] = op->args[2]; > + op2->args[3] = shl; > + op2->args[4] = shr; > + > + tcg_op_remove(ctx->tcg, op); > + op = op2; > + } else { > + op2 = opt_insert_before(ctx, op, INDEX_op_shl, 3); > + op2->args[0] = op->args[0]; > + op2->args[1] = op->args[2]; > + op2->args[2] = arg_new_constant(ctx, shl); > + > + op->opc = INDEX_op_or; > + op->args[1] = op->args[0]; > + op->args[2] = tmp; > + } > + } > + > + return fold_masks_zo(ctx, op, zr, or); > } > > static bool fold_exts(OptContext *ctx, TCGOp *op) > diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c > index abce307f26..4caf77da1e 100644 > --- a/tcg/tcg-op.c > +++ b/tcg/tcg-op.c > @@ -1008,13 +1008,8 @@ void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, > TCGv_i32 ah, > tcg_gen_mov_i32(ret, ah); > } else if (al == ah) { > tcg_gen_rotri_i32(ret, al, ofs); > - } else if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I32, 0)) { > - tcg_gen_op4i_i32(INDEX_op_extract2, ret, al, ah, ofs); > } else { > - TCGv_i32 t0 = tcg_temp_ebb_new_i32(); > - tcg_gen_shri_i32(t0, al, ofs); > - tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs); > - tcg_temp_free_i32(t0); > + tcg_gen_op4i_i32(INDEX_op_extract2, ret, al, ah, ofs); > } > } > > @@ -2711,7 +2706,7 @@ void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, > TCGv_i64 ah, > tcg_gen_mov_i64(ret, ah); > } else if (al == ah) { > tcg_gen_rotri_i64(ret, al, ofs); > - } else if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I64, 0)) { > + } else if (TCG_TARGET_REG_BITS == 64) { > tcg_gen_op4i_i64(INDEX_op_extract2, ret, al, ah, ofs); > } else { > TCGv_i64 t0 = tcg_temp_ebb_new_i64(); > -- > 2.43.0 > >
