Re: [PATCH] i386: Do not sanitize upper part of V2HFmode and V4HFmode reg with -fno-trapping-math [PR110832]
On Thu, Aug 10, 2023 at 2:06 PM Hongtao Liu wrote: > > On Thu, Aug 10, 2023 at 2:01 PM Uros Bizjak via Gcc-patches > wrote: > > > > On Thu, Aug 10, 2023 at 2:49 AM liuhongt wrote: > > > > > > Also add ix86_partial_vec_fp_math to to condition of V2HF/V4HF named > > > patterns in order to avoid generation of partial vector V8HFmode > > > trapping instructions. > > > > > > Bootstrapped and regtseted on x86_64-pc-linux-gnu{-m32,} > > > Ok for trunk? > > > > > > gcc/ChangeLog: > > > > > > PR target/110832 > > > * config/i386/mmx.md: (movq__to_sse): Also do not > > > sanitize upper part of V4HFmode register with > > > -fno-trapping-math. > > > (v4hf3): Enable for ix86_partial_vec_fp_math. > > > ( > > (v2hf3): Ditto. > > > (divv2hf3): Ditto. > > > (movd_v2hf_to_sse): Do not sanitize upper part of V2HFmode > > > register with -fno-trapping-math. > > > > OK. > > > > BTW: I would just like to mention that plenty of instructions can be > > enabled for V4HF/V2HFmode besides arithmetic insns. At least > > conversions, comparisons, FMA and min/max (to name some of them) can > > be enabled by introducing expanders that expand to V8HFmode > > instruction. > Yes, try to support that in GCC14. I would wait for avx10's patch to go in first, so as to avoid extra rebases and conflicts. > > > > Uros. > > > > > > --- > > > gcc/config/i386/mmx.md | 20 ++-- > > > 1 file changed, 14 insertions(+), 6 deletions(-) > > > > > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > > > index d51b3b9dc71..170432a7128 100644 > > > --- a/gcc/config/i386/mmx.md > > > +++ b/gcc/config/i386/mmx.md > > > @@ -596,7 +596,7 @@ (define_expand "movq__to_sse" > > > (match_dup 2)))] > > >"TARGET_SSE2" > > > { > > > - if (mode == V2SFmode > > > + if (mode != V2SImode > > >&& !flag_trapping_math) > > > { > > >rtx op1 = force_reg (mode, operands[1]); > > > @@ -1941,7 +1941,7 @@ (define_expand "v4hf3" > > > (plusminusmult:V4HF > > > (match_operand:V4HF 1 "nonimmediate_operand") > > > (match_operand:V4HF 2 "nonimmediate_operand")))] > > > - "TARGET_AVX512FP16 && TARGET_AVX512VL" > > > + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" > > > { > > >rtx op2 = gen_reg_rtx (V8HFmode); > > >rtx op1 = gen_reg_rtx (V8HFmode); > > > @@ -1961,7 +1961,7 @@ (define_expand "divv4hf3" > > > (div:V4HF > > > (match_operand:V4HF 1 "nonimmediate_operand") > > > (match_operand:V4HF 2 "nonimmediate_operand")))] > > > - "TARGET_AVX512FP16 && TARGET_AVX512VL" > > > + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" > > > { > > >rtx op2 = gen_reg_rtx (V8HFmode); > > >rtx op1 = gen_reg_rtx (V8HFmode); > > > @@ -1983,14 +1983,22 @@ (define_expand "movd_v2hf_to_sse" > > > (match_operand:V2HF 1 "nonimmediate_operand")) > > > (match_operand:V8HF 2 "reg_or_0_operand") > > > (const_int 3)))] > > > - "TARGET_SSE") > > > + "TARGET_SSE" > > > +{ > > > + if (!flag_trapping_math && operands[2] == CONST0_RTX (V8HFmode)) > > > + { > > > +rtx op1 = force_reg (V2HFmode, operands[1]); > > > +emit_move_insn (operands[0], lowpart_subreg (V8HFmode, op1, > > > V2HFmode)); > > > +DONE; > > > + } > > > +}) > > > > > > (define_expand "v2hf3" > > >[(set (match_operand:V2HF 0 "register_operand") > > > (plusminusmult:V2HF > > > (match_operand:V2HF 1 "nonimmediate_operand") > > > (match_operand:V2HF 2 "nonimmediate_operand")))] > > > - "TARGET_AVX512FP16 && TARGET_AVX512VL" > > > + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" > > > { > > >rtx op2 = gen_reg_rtx (V8HFmode); > > >rtx op1 = gen_reg_rtx (V8HFmode); > > > @@ -2009,7 +2017,7 @@ (define_expand "divv2hf3" > > > (div:V2HF > > > (match_operand:V2HF 1 "nonimmediate_operand") > > > (match_operand:V2HF 2 "nonimmediate_operand")))] > > > - "TARGET_AVX512FP16 && TARGET_AVX512VL" > > > + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" > > > { > > >rtx op2 = gen_reg_rtx (V8HFmode); > > >rtx op1 = gen_reg_rtx (V8HFmode); > > > -- > > > 2.31.1 > > > > > > > -- > BR, > Hongtao -- BR, Hongtao
Re: [PATCH] i386: Do not sanitize upper part of V2HFmode and V4HFmode reg with -fno-trapping-math [PR110832]
On Thu, Aug 10, 2023 at 2:01 PM Uros Bizjak via Gcc-patches wrote: > > On Thu, Aug 10, 2023 at 2:49 AM liuhongt wrote: > > > > Also add ix86_partial_vec_fp_math to to condition of V2HF/V4HF named > > patterns in order to avoid generation of partial vector V8HFmode > > trapping instructions. > > > > Bootstrapped and regtseted on x86_64-pc-linux-gnu{-m32,} > > Ok for trunk? > > > > gcc/ChangeLog: > > > > PR target/110832 > > * config/i386/mmx.md: (movq__to_sse): Also do not > > sanitize upper part of V4HFmode register with > > -fno-trapping-math. > > (v4hf3): Enable for ix86_partial_vec_fp_math. > > ( > (v2hf3): Ditto. > > (divv2hf3): Ditto. > > (movd_v2hf_to_sse): Do not sanitize upper part of V2HFmode > > register with -fno-trapping-math. > > OK. > > BTW: I would just like to mention that plenty of instructions can be > enabled for V4HF/V2HFmode besides arithmetic insns. At least > conversions, comparisons, FMA and min/max (to name some of them) can > be enabled by introducing expanders that expand to V8HFmode > instruction. Yes, try to support that in GCC14. > > Uros. > > > > --- > > gcc/config/i386/mmx.md | 20 ++-- > > 1 file changed, 14 insertions(+), 6 deletions(-) > > > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > > index d51b3b9dc71..170432a7128 100644 > > --- a/gcc/config/i386/mmx.md > > +++ b/gcc/config/i386/mmx.md > > @@ -596,7 +596,7 @@ (define_expand "movq__to_sse" > > (match_dup 2)))] > >"TARGET_SSE2" > > { > > - if (mode == V2SFmode > > + if (mode != V2SImode > >&& !flag_trapping_math) > > { > >rtx op1 = force_reg (mode, operands[1]); > > @@ -1941,7 +1941,7 @@ (define_expand "v4hf3" > > (plusminusmult:V4HF > > (match_operand:V4HF 1 "nonimmediate_operand") > > (match_operand:V4HF 2 "nonimmediate_operand")))] > > - "TARGET_AVX512FP16 && TARGET_AVX512VL" > > + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" > > { > >rtx op2 = gen_reg_rtx (V8HFmode); > >rtx op1 = gen_reg_rtx (V8HFmode); > > @@ -1961,7 +1961,7 @@ (define_expand "divv4hf3" > > (div:V4HF > > (match_operand:V4HF 1 "nonimmediate_operand") > > (match_operand:V4HF 2 "nonimmediate_operand")))] > > - "TARGET_AVX512FP16 && TARGET_AVX512VL" > > + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" > > { > >rtx op2 = gen_reg_rtx (V8HFmode); > >rtx op1 = gen_reg_rtx (V8HFmode); > > @@ -1983,14 +1983,22 @@ (define_expand "movd_v2hf_to_sse" > > (match_operand:V2HF 1 "nonimmediate_operand")) > > (match_operand:V8HF 2 "reg_or_0_operand") > > (const_int 3)))] > > - "TARGET_SSE") > > + "TARGET_SSE" > > +{ > > + if (!flag_trapping_math && operands[2] == CONST0_RTX (V8HFmode)) > > + { > > +rtx op1 = force_reg (V2HFmode, operands[1]); > > +emit_move_insn (operands[0], lowpart_subreg (V8HFmode, op1, V2HFmode)); > > +DONE; > > + } > > +}) > > > > (define_expand "v2hf3" > >[(set (match_operand:V2HF 0 "register_operand") > > (plusminusmult:V2HF > > (match_operand:V2HF 1 "nonimmediate_operand") > > (match_operand:V2HF 2 "nonimmediate_operand")))] > > - "TARGET_AVX512FP16 && TARGET_AVX512VL" > > + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" > > { > >rtx op2 = gen_reg_rtx (V8HFmode); > >rtx op1 = gen_reg_rtx (V8HFmode); > > @@ -2009,7 +2017,7 @@ (define_expand "divv2hf3" > > (div:V2HF > > (match_operand:V2HF 1 "nonimmediate_operand") > > (match_operand:V2HF 2 "nonimmediate_operand")))] > > - "TARGET_AVX512FP16 && TARGET_AVX512VL" > > + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" > > { > >rtx op2 = gen_reg_rtx (V8HFmode); > >rtx op1 = gen_reg_rtx (V8HFmode); > > -- > > 2.31.1 > > -- BR, Hongtao
Re: [PATCH] i386: Do not sanitize upper part of V2HFmode and V4HFmode reg with -fno-trapping-math [PR110832]
On Thu, Aug 10, 2023 at 2:49 AM liuhongt wrote: > > Also add ix86_partial_vec_fp_math to to condition of V2HF/V4HF named > patterns in order to avoid generation of partial vector V8HFmode > trapping instructions. > > Bootstrapped and regtseted on x86_64-pc-linux-gnu{-m32,} > Ok for trunk? > > gcc/ChangeLog: > > PR target/110832 > * config/i386/mmx.md: (movq__to_sse): Also do not > sanitize upper part of V4HFmode register with > -fno-trapping-math. > (v4hf3): Enable for ix86_partial_vec_fp_math. > ( (v2hf3): Ditto. > (divv2hf3): Ditto. > (movd_v2hf_to_sse): Do not sanitize upper part of V2HFmode > register with -fno-trapping-math. OK. BTW: I would just like to mention that plenty of instructions can be enabled for V4HF/V2HFmode besides arithmetic insns. At least conversions, comparisons, FMA and min/max (to name some of them) can be enabled by introducing expanders that expand to V8HFmode instruction. Uros. > > --- > gcc/config/i386/mmx.md | 20 ++-- > 1 file changed, 14 insertions(+), 6 deletions(-) > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index d51b3b9dc71..170432a7128 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -596,7 +596,7 @@ (define_expand "movq__to_sse" > (match_dup 2)))] >"TARGET_SSE2" > { > - if (mode == V2SFmode > + if (mode != V2SImode >&& !flag_trapping_math) > { >rtx op1 = force_reg (mode, operands[1]); > @@ -1941,7 +1941,7 @@ (define_expand "v4hf3" > (plusminusmult:V4HF > (match_operand:V4HF 1 "nonimmediate_operand") > (match_operand:V4HF 2 "nonimmediate_operand")))] > - "TARGET_AVX512FP16 && TARGET_AVX512VL" > + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" > { >rtx op2 = gen_reg_rtx (V8HFmode); >rtx op1 = gen_reg_rtx (V8HFmode); > @@ -1961,7 +1961,7 @@ (define_expand "divv4hf3" > (div:V4HF > (match_operand:V4HF 1 "nonimmediate_operand") > (match_operand:V4HF 2 "nonimmediate_operand")))] > - "TARGET_AVX512FP16 && TARGET_AVX512VL" > + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" > { >rtx op2 = gen_reg_rtx (V8HFmode); >rtx op1 = gen_reg_rtx (V8HFmode); > @@ -1983,14 +1983,22 @@ (define_expand "movd_v2hf_to_sse" > (match_operand:V2HF 1 "nonimmediate_operand")) > (match_operand:V8HF 2 "reg_or_0_operand") > (const_int 3)))] > - "TARGET_SSE") > + "TARGET_SSE" > +{ > + if (!flag_trapping_math && operands[2] == CONST0_RTX (V8HFmode)) > + { > +rtx op1 = force_reg (V2HFmode, operands[1]); > +emit_move_insn (operands[0], lowpart_subreg (V8HFmode, op1, V2HFmode)); > +DONE; > + } > +}) > > (define_expand "v2hf3" >[(set (match_operand:V2HF 0 "register_operand") > (plusminusmult:V2HF > (match_operand:V2HF 1 "nonimmediate_operand") > (match_operand:V2HF 2 "nonimmediate_operand")))] > - "TARGET_AVX512FP16 && TARGET_AVX512VL" > + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" > { >rtx op2 = gen_reg_rtx (V8HFmode); >rtx op1 = gen_reg_rtx (V8HFmode); > @@ -2009,7 +2017,7 @@ (define_expand "divv2hf3" > (div:V2HF > (match_operand:V2HF 1 "nonimmediate_operand") > (match_operand:V2HF 2 "nonimmediate_operand")))] > - "TARGET_AVX512FP16 && TARGET_AVX512VL" > + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" > { >rtx op2 = gen_reg_rtx (V8HFmode); >rtx op1 = gen_reg_rtx (V8HFmode); > -- > 2.31.1 >
[PATCH] i386: Do not sanitize upper part of V2HFmode and V4HFmode reg with -fno-trapping-math [PR110832]
Also add ix86_partial_vec_fp_math to to condition of V2HF/V4HF named patterns in order to avoid generation of partial vector V8HFmode trapping instructions. Bootstrapped and regtseted on x86_64-pc-linux-gnu{-m32,} Ok for trunk? gcc/ChangeLog: PR target/110832 * config/i386/mmx.md: (movq__to_sse): Also do not sanitize upper part of V4HFmode register with -fno-trapping-math. (v4hf3): Enable for ix86_partial_vec_fp_math. (v2hf3): Ditto. (divv2hf3): Ditto. (movd_v2hf_to_sse): Do not sanitize upper part of V2HFmode register with -fno-trapping-math. --- gcc/config/i386/mmx.md | 20 ++-- 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index d51b3b9dc71..170432a7128 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -596,7 +596,7 @@ (define_expand "movq__to_sse" (match_dup 2)))] "TARGET_SSE2" { - if (mode == V2SFmode + if (mode != V2SImode && !flag_trapping_math) { rtx op1 = force_reg (mode, operands[1]); @@ -1941,7 +1941,7 @@ (define_expand "v4hf3" (plusminusmult:V4HF (match_operand:V4HF 1 "nonimmediate_operand") (match_operand:V4HF 2 "nonimmediate_operand")))] - "TARGET_AVX512FP16 && TARGET_AVX512VL" + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" { rtx op2 = gen_reg_rtx (V8HFmode); rtx op1 = gen_reg_rtx (V8HFmode); @@ -1961,7 +1961,7 @@ (define_expand "divv4hf3" (div:V4HF (match_operand:V4HF 1 "nonimmediate_operand") (match_operand:V4HF 2 "nonimmediate_operand")))] - "TARGET_AVX512FP16 && TARGET_AVX512VL" + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" { rtx op2 = gen_reg_rtx (V8HFmode); rtx op1 = gen_reg_rtx (V8HFmode); @@ -1983,14 +1983,22 @@ (define_expand "movd_v2hf_to_sse" (match_operand:V2HF 1 "nonimmediate_operand")) (match_operand:V8HF 2 "reg_or_0_operand") (const_int 3)))] - "TARGET_SSE") + "TARGET_SSE" +{ + if (!flag_trapping_math && operands[2] == CONST0_RTX (V8HFmode)) + { +rtx op1 = force_reg (V2HFmode, operands[1]); +emit_move_insn (operands[0], lowpart_subreg (V8HFmode, op1, V2HFmode)); +DONE; + } +}) (define_expand "v2hf3" [(set (match_operand:V2HF 0 "register_operand") (plusminusmult:V2HF (match_operand:V2HF 1 "nonimmediate_operand") (match_operand:V2HF 2 "nonimmediate_operand")))] - "TARGET_AVX512FP16 && TARGET_AVX512VL" + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" { rtx op2 = gen_reg_rtx (V8HFmode); rtx op1 = gen_reg_rtx (V8HFmode); @@ -2009,7 +2017,7 @@ (define_expand "divv2hf3" (div:V2HF (match_operand:V2HF 1 "nonimmediate_operand") (match_operand:V2HF 2 "nonimmediate_operand")))] - "TARGET_AVX512FP16 && TARGET_AVX512VL" + "TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math" { rtx op2 = gen_reg_rtx (V8HFmode); rtx op1 = gen_reg_rtx (V8HFmode); -- 2.31.1