This patch adds __fp16 support to the previous patch that added vcmp support with MVE. For this we update existing expanders to use VDQWH iterator, and add a new expander vcond<VH_cvtto><mode>. In the process we need to create suitable iterators, and update v_cmp_result as needed.
2021-04-26 Christophe Lyon <christophe.l...@linaro.org> gcc/ * config/arm/iterators.md (V16): New iterator. (VH_cvtto): New iterator. (v_cmp_result): Added V4HF and V8HF support. * config/arm/vec-common.md (vec_cmp<mode><v_cmp_result>): Use VDQWH. (vcond<mode><mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Likewise. (vcond<VH_cvtto><mode>): New expander. gcc/testsuite/ * gcc.target/arm/simd/mve-compare-3.c: New test with GCC vectors. * gcc.target/arm/simd/mve-vcmp-f16.c: New test for auto-vectorization. --- gcc/config/arm/iterators.md | 6 ++++ gcc/config/arm/vec-common.md | 40 ++++++++++++++++------- gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c | 38 +++++++++++++++++++++ gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c | 30 +++++++++++++++++ 4 files changed, 102 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index a128465..3042baf 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -231,6 +231,9 @@ (define_mode_iterator VU [V16QI V8HI V4SI]) ;; Vector modes for 16-bit floating-point support. (define_mode_iterator VH [V8HF V4HF]) +;; Modes with 16-bit elements only. +(define_mode_iterator V16 [V4HI V4HF V8HI V8HF]) + ;; 16-bit floating-point vector modes suitable for moving (includes BFmode). (define_mode_iterator VHFBF [V8HF V4HF V4BF V8BF]) @@ -571,6 +574,8 @@ (define_mode_attr V_cvtto [(V2SI "v2sf") (V2SF "v2si") ;; (Opposite) mode to convert to/from for vector-half mode conversions. (define_mode_attr VH_CVTTO [(V4HI "V4HF") (V4HF "V4HI") (V8HI "V8HF") (V8HF "V8HI")]) +(define_mode_attr VH_cvtto [(V4HI "v4hf") (V4HF "v4hi") + (V8HI "v8hf") (V8HF "v8hi")]) ;; Define element mode for each vector mode. (define_mode_attr V_elem [(V8QI "QI") (V16QI "QI") @@ -720,6 +725,7 @@ (define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI") (define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi") (V4HI "v4hi") (V8HI "v8hi") (V2SI "v2si") (V4SI "v4si") + (V4HF "v4hi") (V8HF "v8hi") (DI "di") (V2DI "v2di") (V2SF "v2si") (V4SF "v4si")]) diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 034b48b..3fd341c 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -366,8 +366,8 @@ (define_expand "vlshr<mode>3" (define_expand "vec_cmp<mode><v_cmp_result>" [(set (match_operand:<V_cmp_result> 0 "s_register_operand") (match_operator:<V_cmp_result> 1 "comparison_operator" - [(match_operand:VDQW 2 "s_register_operand") - (match_operand:VDQW 3 "reg_or_zero_operand")]))] + [(match_operand:VDQWH 2 "s_register_operand") + (match_operand:VDQWH 3 "reg_or_zero_operand")]))] "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT && (!<Is_float_mode> || flag_unsafe_math_optimizations)" @@ -399,13 +399,13 @@ (define_expand "vec_cmpu<mode><mode>" ;; element-wise. (define_expand "vcond<mode><mode>" - [(set (match_operand:VDQW 0 "s_register_operand") - (if_then_else:VDQW + [(set (match_operand:VDQWH 0 "s_register_operand") + (if_then_else:VDQWH (match_operator 3 "comparison_operator" - [(match_operand:VDQW 4 "s_register_operand") - (match_operand:VDQW 5 "reg_or_zero_operand")]) - (match_operand:VDQW 1 "s_register_operand") - (match_operand:VDQW 2 "s_register_operand")))] + [(match_operand:VDQWH 4 "s_register_operand") + (match_operand:VDQWH 5 "reg_or_zero_operand")]) + (match_operand:VDQWH 1 "s_register_operand") + (match_operand:VDQWH 2 "s_register_operand")))] "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT && (!<Is_float_mode> || flag_unsafe_math_optimizations)" @@ -430,6 +430,22 @@ (define_expand "vcond<V_cvtto><mode>" DONE; }) +(define_expand "vcond<VH_cvtto><mode>" + [(set (match_operand:<VH_CVTTO> 0 "s_register_operand") + (if_then_else:<VH_CVTTO> + (match_operator 3 "comparison_operator" + [(match_operand:V16 4 "s_register_operand") + (match_operand:V16 5 "reg_or_zero_operand")]) + (match_operand:<VH_CVTTO> 1 "s_register_operand") + (match_operand:<VH_CVTTO> 2 "s_register_operand")))] + "ARM_HAVE_<MODE>_ARITH + && !TARGET_REALLY_IWMMXT + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" +{ + arm_expand_vcond (operands, <V_cmp_result>mode); + DONE; +}) + (define_expand "vcondu<mode><v_cmp_result>" [(set (match_operand:VDQW 0 "s_register_operand") (if_then_else:VDQW @@ -446,11 +462,11 @@ (define_expand "vcondu<mode><v_cmp_result>" }) (define_expand "vcond_mask_<mode><v_cmp_result>" - [(set (match_operand:VDQW 0 "s_register_operand") - (if_then_else:VDQW + [(set (match_operand:VDQWH 0 "s_register_operand") + (if_then_else:VDQWH (match_operand:<V_cmp_result> 3 "s_register_operand") - (match_operand:VDQW 1 "s_register_operand") - (match_operand:VDQW 2 "s_register_operand")))] + (match_operand:VDQWH 1 "s_register_operand") + (match_operand:VDQWH 2 "s_register_operand")))] "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT" { diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c b/gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c new file mode 100644 index 0000000..76f81e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-compare-3.c @@ -0,0 +1,38 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */ + +/* float 16 tests. */ + +#ifndef ELEM_TYPE +#define ELEM_TYPE __fp16 +#endif +#ifndef INT_ELEM_TYPE +#define INT_ELEM_TYPE __INT16_TYPE__ +#endif + +#define COMPARE(NAME, OP) \ + int_vec \ + cmp_##NAME##_reg (vec a, vec b) \ + { \ + return a OP b; \ + } + +typedef INT_ELEM_TYPE int_vec __attribute__((vector_size(16))); +typedef ELEM_TYPE vec __attribute__((vector_size(16))); + +COMPARE (eq, ==) +COMPARE (ne, !=) +COMPARE (lt, <) +COMPARE (le, <=) +COMPARE (gt, >) +COMPARE (ge, >=) + +/* eq, ne, lt, le, gt, ge. +/* { dg-final { scan-assembler-times {\tvcmp.f16\teq, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tne, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tlt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tle, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tgt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tge, q[0-9]+, q[0-9]+\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c new file mode 100644 index 0000000..dbae2d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f16.c @@ -0,0 +1,30 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */ + +#include <stdint.h> + +#define NB 8 + +#define FUNC(OP, NAME) \ + void test_ ## NAME ##_f (__fp16 * __restrict__ dest, __fp16 *a, __fp16 *b) { \ + int i; \ + for (i=0; i<NB; i++) { \ + dest[i] = a[i] OP b[i]; \ + } \ + } + +FUNC(==, vcmpeq) +FUNC(!=, vcmpne) +FUNC(<, vcmplt) +FUNC(<=, vcmple) +FUNC(>, vcmpgt) +FUNC(>=, vcmpge) + +/* { dg-final { scan-assembler-times {\tvcmp.f16\teq, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tne, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tlt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tle, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tgt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f16\tge, q[0-9]+, q[0-9]+\n} 1 } } */ -- 2.7.4