On Thu, Aug 11, 2016 at 6:54 PM, H.J. Lu <hongjiu...@intel.com> wrote:
> Support TImode CONST_WIDE_INT store generated from piecewise store.
> Need to verify performance impact before enabling TImode CONST_INT
> store for __int128.
>
> Tested on x86-64.  OK for trunk?

OK.

Thanks,
Uros.

> H.J.
> ---
> gcc/
>
>         * config/i386/i386.c (timode_scalar_to_vector_candidate_p): Allow
>         TImode CONST_WIDE_INT store.
>         (timode_scalar_chain::convert_insn): Handle CONST_WIDE_INT store.
>
> gcc/testsuite/
>
>         * gcc.target/i386/pieces-strcpy-1.c: New test.
>         * gcc.target/i386/pieces-strcpy-2.c: Likewise.
> ---
>  gcc/config/i386/i386.c                          | 23 ++++++++++++++++++++---
>  gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c | 15 +++++++++++++++
>  gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 15 +++++++++++++++
>  3 files changed, 50 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 93eaab1..d086ede 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -2862,9 +2862,12 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
>
>    if (MEM_P (dst))
>      {
> -      /* Check for store.  Only support store from register or standard
> -        SSE constants.  Memory must be aligned or unaligned store is
> -        optimal.  */
> +      /* Check for store.  Memory must be aligned or unaligned store
> +        is optimal.  Only support store from register, standard SSE
> +        constant or CONST_WIDE_INT generated from piecewise store.
> +
> +        ??? Verify performance impact before enabling CONST_INT for
> +        __int128 store.  */
>        if (misaligned_operand (dst, TImode)
>           && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
>         return false;
> @@ -2875,6 +2878,7 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
>           return false;
>
>         case REG:
> +       case CONST_WIDE_INT:
>           return true;
>
>         case CONST_INT:
> @@ -3868,6 +3872,19 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
>        PUT_MODE (src, V1TImode);
>        break;
>
> +    case CONST_WIDE_INT:
> +      if (NONDEBUG_INSN_P (insn))
> +       {
> +         /* Since there are no instructions to store 128-bit constant,
> +            temporary register usage is required.  */
> +         rtx tmp = gen_reg_rtx (V1TImode);
> +         src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
> +         src = validize_mem (force_const_mem (V1TImode, src));
> +         emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
> +         dst = tmp;
> +       }
> +      break;
> +
>      case CONST_INT:
>        switch (standard_sse_constant_p (src, TImode))
>         {
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c 
> b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c
> new file mode 100644
> index 0000000..64b7329
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-1.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
> +
> +extern char *strcpy (char *, const char *);
> +
> +void
> +foo (char *s)
> +{
> +  strcpy (s,
> +         "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
> +         "1234567");
> +}
> +
> +/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c 
> b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
> new file mode 100644
> index 0000000..7421255
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
> +
> +extern char *strcpy (char *, const char *);
> +
> +void
> +foo (char *s)
> +{
> +  strcpy (s,
> +         "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
> +         "1234567");
> +}
> +
> +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> +/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
> --
> 2.7.4
>

Reply via email to