On 8 February 2018 at 14:54, Richard Sandiford
<richard.sandif...@linaro.org> wrote:
> One advantage of the new permute handling compared to the old way is
> that we can now easily take advantage of the vectoriser's divmod patterns
> for SVE.
>
> I realise we're in stage 4, but this is entirely SVE-specific.
>
> Tested on aarch64-linux-gnu and aarch64_be-elf.  OK to install?
>
> Richard
>
Hi Richard,

>
> 2018-02-08  Richard Sandiford  <richard.sandif...@linaro.org>
>
> gcc/
>         * config/aarch64/iterators.md (UNSPEC_SMUL_HIGHPART)
>         (UNSPEC_UMUL_HIGHPART): New constants.
>         (MUL_HIGHPART): New int iteraor.
>         (su): Handle UNSPEC_SMUL_HIGHPART and UNSPEC_UMUL_HIGHPART.
>         * config/aarch64/aarch64-sve.md (<su>mul<mode>3_highpart): New
>         define_expand.
>         (*<su>mul<mode>3_highpart): New define_insn.
>
> gcc/testsuite/
>         * gcc.target/aarch64/sve/mul_highpart_1.c: New test.
>         * gcc.target/aarch64/sve/mul_highpart_1_run.c: Likewise.
>
> Index: gcc/config/aarch64/iterators.md
> ===================================================================
> --- gcc/config/aarch64/iterators.md     2018-01-26 15:14:35.386171048 +0000
> +++ gcc/config/aarch64/iterators.md     2018-02-08 13:51:56.252511923 +0000
> @@ -438,6 +438,8 @@ (define_c_enum "unspec"
>      UNSPEC_ANDF                ; Used in aarch64-sve.md.
>      UNSPEC_IORF                ; Used in aarch64-sve.md.
>      UNSPEC_XORF                ; Used in aarch64-sve.md.
> +    UNSPEC_SMUL_HIGHPART ; Used in aarch64-sve.md.
> +    UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md.
>      UNSPEC_COND_ADD    ; Used in aarch64-sve.md.
>      UNSPEC_COND_SUB    ; Used in aarch64-sve.md.
>      UNSPEC_COND_SMAX   ; Used in aarch64-sve.md.
> @@ -1467,6 +1469,8 @@ (define_int_iterator UNPACK [UNSPEC_UNPA
>
>  (define_int_iterator UNPACK_UNSIGNED [UNSPEC_UNPACKULO UNSPEC_UNPACKUHI])
>
> +(define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART 
> UNSPEC_UMUL_HIGHPART])
> +
>  (define_int_iterator SVE_COND_INT_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB
>                                       UNSPEC_COND_SMAX UNSPEC_COND_UMAX
>                                       UNSPEC_COND_SMIN UNSPEC_COND_UMIN
> @@ -1558,7 +1562,9 @@ (define_int_attr logicalf_op [(UNSPEC_AN
>  (define_int_attr su [(UNSPEC_UNPACKSHI "s")
>                      (UNSPEC_UNPACKUHI "u")
>                      (UNSPEC_UNPACKSLO "s")
> -                    (UNSPEC_UNPACKULO "u")])
> +                    (UNSPEC_UNPACKULO "u")
> +                    (UNSPEC_SMUL_HIGHPART "s")
> +                    (UNSPEC_UMUL_HIGHPART "u")])
>
>  (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
>                       (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
> Index: gcc/config/aarch64/aarch64-sve.md
> ===================================================================
> --- gcc/config/aarch64/aarch64-sve.md   2018-02-01 11:04:16.723192040 +0000
> +++ gcc/config/aarch64/aarch64-sve.md   2018-02-08 13:51:56.252511923 +0000
> @@ -980,6 +980,34 @@ (define_insn "*msub<mode>3"
>     mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
>  )
>
> +;; Unpredicated highpart multiplication.
> +(define_expand "<su>mul<mode>3_highpart"
> +  [(set (match_operand:SVE_I 0 "register_operand")
> +       (unspec:SVE_I
> +         [(match_dup 3)
> +          (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
> +                         (match_operand:SVE_I 2 "register_operand")]
> +                        MUL_HIGHPART)]
> +         UNSPEC_MERGE_PTRUE))]
> +  "TARGET_SVE"
> +  {
> +    operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
> +  }
> +)
> +
> +;; Predicated highpart multiplication.
> +(define_insn "*<su>mul<mode>3_highpart"
> +  [(set (match_operand:SVE_I 0 "register_operand" "=w")
> +       (unspec:SVE_I
> +         [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +          (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0")
> +                         (match_operand:SVE_I 3 "register_operand" "w")]
> +                        MUL_HIGHPART)]
> +         UNSPEC_MERGE_PTRUE))]
> +  "TARGET_SVE"
> +  "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +)
> +
>  ;; Unpredicated NEG, NOT and POPCOUNT.
>  (define_expand "<optab><mode>2"
>    [(set (match_operand:SVE_I 0 "register_operand")
> Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c
> ===================================================================
> --- /dev/null   2018-02-08 11:17:10.862716283 +0000
> +++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1.c       2018-02-08 
> 13:51:56.252511923 +0000
> @@ -0,0 +1,25 @@
> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
> +
> +#include <stdint.h>
> +
> +#define DEF_LOOP(TYPE)                         \
> +void __attribute__ ((noipa))                   \
> +mod_##TYPE (TYPE *dst, TYPE *src, int count)   \
> +{                                              \
> +  for (int i = 0; i < count; ++i)              \
> +    dst[i] = src[i] % 17;                      \
> +}
> +
> +#define TEST_ALL(T) \
> +  T (int32_t) \
> +  T (uint32_t) \
> +  T (int64_t) \
> +  T (uint64_t)
> +
> +TEST_ALL (DEF_LOOP)
> +
> +/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.d, p[0-7]/m, 
> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
> Index: gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c
> ===================================================================
> --- /dev/null   2018-02-08 11:17:10.862716283 +0000
> +++ gcc/testsuite/gcc.target/aarch64/sve/mul_highpart_1_run.c   2018-02-08 
> 13:51:56.253511883 +0000
> @@ -0,0 +1,29 @@
> +/* { dg-do run } */
You forgot to include an effective target to prevent trying to run on
non-SVE capable HW.

I suppose check_effective_target_aarch64_sve_hw would work, but I 'm
not sure it's sufficient to prevent from compiling the test with old
binutils non supporting sve: maybe you also need to add
aarch64_asm_sve_ok as in the other testcase?

Thanks,

Christophe

> +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
> +
> +#include "mul_highpart_1.c"
> +
> +#define N 79
> +
> +#define TEST_LOOP(TYPE)                                \
> +  {                                            \
> +    TYPE dst[N], src[N];                       \
> +    for (int i = 0; i < N; ++i)                        \
> +      {                                                \
> +       src[i] = i * 7 + i % 3;                 \
> +       if (i % 11 > 7)                         \
> +         src[i] = -src[i];                     \
> +       asm volatile ("" ::: "memory");         \
> +      }                                                \
> +    mod_##TYPE (dst, src, N);                  \
> +    for (int i = 0; i < N; ++i)                        \
> +      if (dst[i] != src[i] % 17)               \
> +       __builtin_abort ();                     \
> +  }
> +
> +int
> +main (void)
> +{
> +  TEST_ALL (TEST_LOOP);
> +  return 0;
> +}

Reply via email to