Richard Henderson <richard.hender...@linaro.org> writes:
> AVX512VL has VPROLD and VPROLQ, layered onto the same > opcode as PSHIFTD, but requires EVEX encoding and W. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > tcg/i386/tcg-target.h | 2 +- > tcg/i386/tcg-target.c.inc | 15 +++++++++++++-- > 2 files changed, 14 insertions(+), 3 deletions(-) > > diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h > index 12d098ad6c..38c09fd66c 100644 > --- a/tcg/i386/tcg-target.h > +++ b/tcg/i386/tcg-target.h > @@ -195,7 +195,7 @@ extern bool have_movbe; > #define TCG_TARGET_HAS_not_vec 0 > #define TCG_TARGET_HAS_neg_vec 0 > #define TCG_TARGET_HAS_abs_vec 1 > -#define TCG_TARGET_HAS_roti_vec 0 > +#define TCG_TARGET_HAS_roti_vec have_avx512vl > #define TCG_TARGET_HAS_rots_vec 0 > #define TCG_TARGET_HAS_rotv_vec 0 > #define TCG_TARGET_HAS_shi_vec 1 > diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc > index c4e6f2e5ea..5ab7c4c0fa 100644 > --- a/tcg/i386/tcg-target.c.inc > +++ b/tcg/i386/tcg-target.c.inc > @@ -361,7 +361,7 @@ static bool tcg_target_const_match(int64_t val, TCGType > type, int ct) > #define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2) > #define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3) > #define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */ > -#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */ > +#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */ > #define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */ > #define OPC_PSLLW (0xf1 | P_EXT | P_DATA16) > #define OPC_PSLLD (0xf2 | P_EXT | P_DATA16) > @@ -2906,6 +2906,14 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode > opc, > insn |= P_VEXW | P_EVEX; > } > sub = 4; > + goto gen_shift; > + case INDEX_op_rotli_vec: > + insn = OPC_PSHIFTD_Ib | P_EVEX; /* VPROL[DQ] */ > + if (vece == MO_64) { > + insn |= P_VEXW; > + } > + sub = 1; > + goto gen_shift; This could just be a /* fall-through */ although given the large amount of gotos the switch statement is gathering I'm not sure it makes too much difference. Is there any reason why gen_shift couldn't be pushed into a helper function so we just had: static void tcg_out_vec_shift(s, vece, insn, sub, a0, a1, a2) { tcg_debug_assert(vece != MO_8); if (type == TCG_TYPE_V256) { insn |= P_VEXL; } tcg_out_vex_modrm(s, insn, sub, a0, a1); tcg_out8(s, a2); } ... case INDEX_op_rotli_vec: insn = OPC_PSHIFTD_Ib | P_EVEX; /* VPROL[DQ] */ if (vece == MO_64) { insn |= P_VEXW; } tcg_out_vec_shift(s, vece, insn, 1, a0, a1, a2); break; Surely the compiler would inline if needed (and even if it didn't it the code generation that critical we care about a few cycles)? -- Alex Bennée