Re: [PATCH] Enable vectorization for _Float16 floor/ceil/trunc/nearbyint/rint operations.

2021-10-28 Thread Hongtao Liu via Gcc-patches
On Thu, Oct 28, 2021 at 10:26 AM Hongtao Liu  wrote:
>
> On Mon, Oct 25, 2021 at 4:24 PM liuhongt  wrote:
> >
> >   Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> >   Ok for trunk?
> >
> I'm going to check in this patch if there's no objection.
Committed.
> > gcc/ChangeLog:
> >
> > PR target/102464
> > * config/i386/i386-builtin-types.def (V8HF_FTYPE_V8HF): New
> > function type.
> > (V16HF_FTYPE_V16HF): Ditto.
> > (V32HF_FTYPE_V32HF): Ditto.
> > (V8HF_FTYPE_V8HF_ROUND): Ditto.
> > (V16HF_FTYPE_V16HF_ROUND): Ditto.
> > (V32HF_FTYPE_V32HF_ROUND): Ditto.
> > * config/i386/i386-builtin.def ( IX86_BUILTIN_FLOORPH,
> > IX86_BUILTIN_CEILPH, IX86_BUILTIN_TRUNCPH,
> > IX86_BUILTIN_FLOORPH256, IX86_BUILTIN_CEILPH256,
> > IX86_BUILTIN_TRUNCPH256, IX86_BUILTIN_FLOORPH512,
> > IX86_BUILTIN_CEILPH512, IX86_BUILTIN_TRUNCPH512): New builtin.
> > * config/i386/i386-builtins.c
> > (ix86_builtin_vectorized_function): Enable vectorization for
> > HFmode FLOOR/CEIL/TRUNC operation.
> > * config/i386/i386-expand.c (ix86_expand_args_builtin): Handle
> > new builtins.
> > * config/i386/sse.md (rint2, nearbyint2): Extend
> > to vector HFmodes.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/pr102464-vrndscaleph.c: New test.
> > ---
> >  gcc/config/i386/i386-builtin-types.def|   7 ++
> >  gcc/config/i386/i386-builtin.def  |  11 ++
> >  gcc/config/i386/i386-builtins.c   |  42 +++
> >  gcc/config/i386/i386-expand.c |   3 +
> >  gcc/config/i386/sse.md|  12 +-
> >  .../gcc.target/i386/pr102464-vrndscaleph.c| 115 ++
> >  6 files changed, 184 insertions(+), 6 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102464-vrndscaleph.c
> >
> > diff --git a/gcc/config/i386/i386-builtin-types.def 
> > b/gcc/config/i386/i386-builtin-types.def
> > index 4c355c587b5..e33f06ab30b 100644
> > --- a/gcc/config/i386/i386-builtin-types.def
> > +++ b/gcc/config/i386/i386-builtin-types.def
> > @@ -1380,3 +1380,10 @@ DEF_FUNCTION_TYPE (USI, V32HF, V32HF, INT, USI, INT)
> >  DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, UHI, INT)
> >  DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI, INT)
> >  DEF_FUNCTION_TYPE (V32HF, V32HF, INT, V32HF, USI, INT)
> > +
> > +DEF_FUNCTION_TYPE (V8HF, V8HF)
> > +DEF_FUNCTION_TYPE (V16HF, V16HF)
> > +DEF_FUNCTION_TYPE (V32HF, V32HF)
> > +DEF_FUNCTION_TYPE_ALIAS (V8HF_FTYPE_V8HF, ROUND)
> > +DEF_FUNCTION_TYPE_ALIAS (V16HF_FTYPE_V16HF, ROUND)
> > +DEF_FUNCTION_TYPE_ALIAS (V32HF_FTYPE_V32HF, ROUND)
> > diff --git a/gcc/config/i386/i386-builtin.def 
> > b/gcc/config/i386/i386-builtin.def
> > index 99217d08d37..d9eee3f373c 100644
> > --- a/gcc/config/i386/i386-builtin.def
> > +++ b/gcc/config/i386/i386-builtin.def
> > @@ -958,6 +958,10 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, 
> > CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__buil
> >  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2, 
> > "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) 
> > V2DF_FTYPE_V2DF)
> >  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2_vec_pack_sfix, 
> > "__builtin_ia32_roundpd_az_vec_pack_sfix", 
> > IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF)
> >
> > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, 
> > CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_floorph", 
> > IX86_BUILTIN_FLOORPH, (enum rtx_code) ROUND_FLOOR, (int) 
> > V8HF_FTYPE_V8HF_ROUND)
> > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, 
> > CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_ceilph", 
> > IX86_BUILTIN_CEILPH, (enum rtx_code) ROUND_CEIL, (int) 
> > V8HF_FTYPE_V8HF_ROUND)
> > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, 
> > CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_truncph", 
> > IX86_BUILTIN_TRUNCPH, (enum rtx_code) ROUND_TRUNC, (int) 
> > V8HF_FTYPE_V8HF_ROUND)
> > +
> >  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, 
> > "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) 
> > ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND)
> >  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, 
> > "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, 
> > (int) V4SF_FTYPE_V4SF_ROUND)
> >  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, 
> > "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) 
> > ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND)
> > @@ -1090,6 +1094,10 @@ BDESC (OPTION_MASK_ISA_AVX, 0, 
> > CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia3
> >  BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd_vec_pack_sfix256, 
> > "__builtin_ia32_floorpd_vec_pack_sfix256", 
> > IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) 
> > V8SI_FTYPE_V4DF_V4DF_ROUND)
> >  BDESC 

Re: [PATCH] Enable vectorization for _Float16 floor/ceil/trunc/nearbyint/rint operations.

2021-10-27 Thread Hongtao Liu via Gcc-patches
On Mon, Oct 25, 2021 at 4:24 PM liuhongt  wrote:
>
>   Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
>   Ok for trunk?
>
I'm going to check in this patch if there's no objection.
> gcc/ChangeLog:
>
> PR target/102464
> * config/i386/i386-builtin-types.def (V8HF_FTYPE_V8HF): New
> function type.
> (V16HF_FTYPE_V16HF): Ditto.
> (V32HF_FTYPE_V32HF): Ditto.
> (V8HF_FTYPE_V8HF_ROUND): Ditto.
> (V16HF_FTYPE_V16HF_ROUND): Ditto.
> (V32HF_FTYPE_V32HF_ROUND): Ditto.
> * config/i386/i386-builtin.def ( IX86_BUILTIN_FLOORPH,
> IX86_BUILTIN_CEILPH, IX86_BUILTIN_TRUNCPH,
> IX86_BUILTIN_FLOORPH256, IX86_BUILTIN_CEILPH256,
> IX86_BUILTIN_TRUNCPH256, IX86_BUILTIN_FLOORPH512,
> IX86_BUILTIN_CEILPH512, IX86_BUILTIN_TRUNCPH512): New builtin.
> * config/i386/i386-builtins.c
> (ix86_builtin_vectorized_function): Enable vectorization for
> HFmode FLOOR/CEIL/TRUNC operation.
> * config/i386/i386-expand.c (ix86_expand_args_builtin): Handle
> new builtins.
> * config/i386/sse.md (rint2, nearbyint2): Extend
> to vector HFmodes.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr102464-vrndscaleph.c: New test.
> ---
>  gcc/config/i386/i386-builtin-types.def|   7 ++
>  gcc/config/i386/i386-builtin.def  |  11 ++
>  gcc/config/i386/i386-builtins.c   |  42 +++
>  gcc/config/i386/i386-expand.c |   3 +
>  gcc/config/i386/sse.md|  12 +-
>  .../gcc.target/i386/pr102464-vrndscaleph.c| 115 ++
>  6 files changed, 184 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102464-vrndscaleph.c
>
> diff --git a/gcc/config/i386/i386-builtin-types.def 
> b/gcc/config/i386/i386-builtin-types.def
> index 4c355c587b5..e33f06ab30b 100644
> --- a/gcc/config/i386/i386-builtin-types.def
> +++ b/gcc/config/i386/i386-builtin-types.def
> @@ -1380,3 +1380,10 @@ DEF_FUNCTION_TYPE (USI, V32HF, V32HF, INT, USI, INT)
>  DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, UHI, INT)
>  DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI, INT)
>  DEF_FUNCTION_TYPE (V32HF, V32HF, INT, V32HF, USI, INT)
> +
> +DEF_FUNCTION_TYPE (V8HF, V8HF)
> +DEF_FUNCTION_TYPE (V16HF, V16HF)
> +DEF_FUNCTION_TYPE (V32HF, V32HF)
> +DEF_FUNCTION_TYPE_ALIAS (V8HF_FTYPE_V8HF, ROUND)
> +DEF_FUNCTION_TYPE_ALIAS (V16HF_FTYPE_V16HF, ROUND)
> +DEF_FUNCTION_TYPE_ALIAS (V32HF_FTYPE_V32HF, ROUND)
> diff --git a/gcc/config/i386/i386-builtin.def 
> b/gcc/config/i386/i386-builtin.def
> index 99217d08d37..d9eee3f373c 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -958,6 +958,10 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, 
> CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__buil
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2, 
> "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) 
> V2DF_FTYPE_V2DF)
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2_vec_pack_sfix, 
> "__builtin_ia32_roundpd_az_vec_pack_sfix", 
> IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF)
>
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, 
> CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_floorph", 
> IX86_BUILTIN_FLOORPH, (enum rtx_code) ROUND_FLOOR, (int) 
> V8HF_FTYPE_V8HF_ROUND)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, 
> CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_ceilph", 
> IX86_BUILTIN_CEILPH, (enum rtx_code) ROUND_CEIL, (int) V8HF_FTYPE_V8HF_ROUND)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, 
> CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_truncph", 
> IX86_BUILTIN_TRUNCPH, (enum rtx_code) ROUND_TRUNC, (int) 
> V8HF_FTYPE_V8HF_ROUND)
> +
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, 
> "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, 
> (int) V4SF_FTYPE_V4SF_ROUND)
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, 
> "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, 
> (int) V4SF_FTYPE_V4SF_ROUND)
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, 
> "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, 
> (int) V4SF_FTYPE_V4SF_ROUND)
> @@ -1090,6 +1094,10 @@ BDESC (OPTION_MASK_ISA_AVX, 0, 
> CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia3
>  BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd_vec_pack_sfix256, 
> "__builtin_ia32_floorpd_vec_pack_sfix256", 
> IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) 
> V8SI_FTYPE_V4DF_V4DF_ROUND)
>  BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd_vec_pack_sfix256, 
> "__builtin_ia32_ceilpd_vec_pack_sfix256", 
> IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) 
> V8SI_FTYPE_V4DF_V4DF_ROUND)
>
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, 
> 

[PATCH] Enable vectorization for _Float16 floor/ceil/trunc/nearbyint/rint operations.

2021-10-25 Thread liuhongt via Gcc-patches
  Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
  Ok for trunk?

gcc/ChangeLog:

PR target/102464
* config/i386/i386-builtin-types.def (V8HF_FTYPE_V8HF): New
function type.
(V16HF_FTYPE_V16HF): Ditto.
(V32HF_FTYPE_V32HF): Ditto.
(V8HF_FTYPE_V8HF_ROUND): Ditto.
(V16HF_FTYPE_V16HF_ROUND): Ditto.
(V32HF_FTYPE_V32HF_ROUND): Ditto.
* config/i386/i386-builtin.def ( IX86_BUILTIN_FLOORPH,
IX86_BUILTIN_CEILPH, IX86_BUILTIN_TRUNCPH,
IX86_BUILTIN_FLOORPH256, IX86_BUILTIN_CEILPH256,
IX86_BUILTIN_TRUNCPH256, IX86_BUILTIN_FLOORPH512,
IX86_BUILTIN_CEILPH512, IX86_BUILTIN_TRUNCPH512): New builtin.
* config/i386/i386-builtins.c
(ix86_builtin_vectorized_function): Enable vectorization for
HFmode FLOOR/CEIL/TRUNC operation.
* config/i386/i386-expand.c (ix86_expand_args_builtin): Handle
new builtins.
* config/i386/sse.md (rint2, nearbyint2): Extend
to vector HFmodes.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr102464-vrndscaleph.c: New test.
---
 gcc/config/i386/i386-builtin-types.def|   7 ++
 gcc/config/i386/i386-builtin.def  |  11 ++
 gcc/config/i386/i386-builtins.c   |  42 +++
 gcc/config/i386/i386-expand.c |   3 +
 gcc/config/i386/sse.md|  12 +-
 .../gcc.target/i386/pr102464-vrndscaleph.c| 115 ++
 6 files changed, 184 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102464-vrndscaleph.c

diff --git a/gcc/config/i386/i386-builtin-types.def 
b/gcc/config/i386/i386-builtin-types.def
index 4c355c587b5..e33f06ab30b 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -1380,3 +1380,10 @@ DEF_FUNCTION_TYPE (USI, V32HF, V32HF, INT, USI, INT)
 DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, UHI, INT)
 DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI, INT)
 DEF_FUNCTION_TYPE (V32HF, V32HF, INT, V32HF, USI, INT)
+
+DEF_FUNCTION_TYPE (V8HF, V8HF)
+DEF_FUNCTION_TYPE (V16HF, V16HF)
+DEF_FUNCTION_TYPE (V32HF, V32HF)
+DEF_FUNCTION_TYPE_ALIAS (V8HF_FTYPE_V8HF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V16HF_FTYPE_V16HF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V32HF_FTYPE_V32HF, ROUND)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 99217d08d37..d9eee3f373c 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -958,6 +958,10 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, 
CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__buil
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2, 
"__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) 
V2DF_FTYPE_V2DF)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv2df2_vec_pack_sfix, 
"__builtin_ia32_roundpd_az_vec_pack_sfix", 
IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF)
 
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, 
CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_floorph", 
IX86_BUILTIN_FLOORPH, (enum rtx_code) ROUND_FLOOR, (int) V8HF_FTYPE_V8HF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, 
CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_ceilph", IX86_BUILTIN_CEILPH, 
(enum rtx_code) ROUND_CEIL, (int) V8HF_FTYPE_V8HF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, 
CODE_FOR_avx512fp16_rndscalev8hf, "__builtin_ia32_truncph", 
IX86_BUILTIN_TRUNCPH, (enum rtx_code) ROUND_TRUNC, (int) V8HF_FTYPE_V8HF_ROUND)
+
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, 
"__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, 
(int) V4SF_FTYPE_V4SF_ROUND)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, 
"__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) 
V4SF_FTYPE_V4SF_ROUND)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps, 
"__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, 
(int) V4SF_FTYPE_V4SF_ROUND)
@@ -1090,6 +1094,10 @@ BDESC (OPTION_MASK_ISA_AVX, 0, 
CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia3
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd_vec_pack_sfix256, 
"__builtin_ia32_floorpd_vec_pack_sfix256", 
IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) 
V8SI_FTYPE_V4DF_V4DF_ROUND)
 BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_roundpd_vec_pack_sfix256, 
"__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, 
(enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND)
 
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, 
CODE_FOR_avx512vl_rndscalev16hf, "__builtin_ia32_floorph256", 
IX86_BUILTIN_FLOORPH256, (enum rtx_code) ROUND_FLOOR, (int) 
V16HF_FTYPE_V16HF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, 
CODE_FOR_avx512vl_rndscalev16hf, "__builtin_ia32_ceilph256", 
IX86_BUILTIN_CEILPH256, (enum