Richard Henderson <richard.hender...@linaro.org> writes:

> AVX512VL has VPROLD and VPROLQ, layered onto the same
> opcode as PSHIFTD, but requires EVEX encoding and W.
>
> Signed-off-by: Richard Henderson <richard.hender...@linaro.org>
> ---
>  tcg/i386/tcg-target.h     |  2 +-
>  tcg/i386/tcg-target.c.inc | 15 +++++++++++++--
>  2 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index 12d098ad6c..38c09fd66c 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -195,7 +195,7 @@ extern bool have_movbe;
>  #define TCG_TARGET_HAS_not_vec          0
>  #define TCG_TARGET_HAS_neg_vec          0
>  #define TCG_TARGET_HAS_abs_vec          1
> -#define TCG_TARGET_HAS_roti_vec         0
> +#define TCG_TARGET_HAS_roti_vec         have_avx512vl
>  #define TCG_TARGET_HAS_rots_vec         0
>  #define TCG_TARGET_HAS_rotv_vec         0
>  #define TCG_TARGET_HAS_shi_vec          1
> diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
> index c4e6f2e5ea..5ab7c4c0fa 100644
> --- a/tcg/i386/tcg-target.c.inc
> +++ b/tcg/i386/tcg-target.c.inc
> @@ -361,7 +361,7 @@ static bool tcg_target_const_match(int64_t val, TCGType 
> type, int ct)
>  #define OPC_PSHUFLW     (0x70 | P_EXT | P_SIMDF2)
>  #define OPC_PSHUFHW     (0x70 | P_EXT | P_SIMDF3)
>  #define OPC_PSHIFTW_Ib  (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */
> -#define OPC_PSHIFTD_Ib  (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */
> +#define OPC_PSHIFTD_Ib  (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */
>  #define OPC_PSHIFTQ_Ib  (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */
>  #define OPC_PSLLW       (0xf1 | P_EXT | P_DATA16)
>  #define OPC_PSLLD       (0xf2 | P_EXT | P_DATA16)
> @@ -2906,6 +2906,14 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode 
> opc,
>              insn |= P_VEXW | P_EVEX;
>          }
>          sub = 4;
> +        goto gen_shift;
> +    case INDEX_op_rotli_vec:
> +        insn = OPC_PSHIFTD_Ib | P_EVEX;  /* VPROL[DQ] */
> +        if (vece == MO_64) {
> +            insn |= P_VEXW;
> +        }
> +        sub = 1;
> +        goto gen_shift;

This could just be a /* fall-through */ although given the large amount
of gotos the switch statement is gathering I'm not sure it makes too
much difference.

Is there any reason why gen_shift couldn't be pushed into a helper
function so we just had:

    static void tcg_out_vec_shift(s, vece, insn, sub, a0, a1, a2) {
        tcg_debug_assert(vece != MO_8);
        if (type == TCG_TYPE_V256) {
            insn |= P_VEXL;
        }
        tcg_out_vex_modrm(s, insn, sub, a0, a1);
        tcg_out8(s, a2);
    }

    ...

    case INDEX_op_rotli_vec:
        insn = OPC_PSHIFTD_Ib | P_EVEX;  /* VPROL[DQ] */
        if (vece == MO_64) {
            insn |= P_VEXW;
        }
        tcg_out_vec_shift(s, vece, insn, 1, a0, a1, a2);
        break;

Surely the compiler would inline if needed (and even if it didn't it the
code generation that critical we care about a few cycles)?


-- 
Alex Bennée

Reply via email to