On Mon, Apr 21, 2025 at 2:52 PM liuhongt <hongtao....@intel.com> wrote: > > Since ix86_expand_sse_movcc will simplify them into a simple vmov, vpand > or vpandn. > Current register_operand/vector_operand could lose some optimization > opportunity. > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk? Committed. > > gcc/ChangeLog: > > * config/i386/predicates.md (vector_or_0_or_1s_operand): New > predicate. > (nonimm_or_0_or_1s_operand): Ditto. > * config/i386/sse.md (vcond_mask_<mode><sseintvecmodelower>): > Extend the predicate of operands1 to accept 0 or allones > operands. > (vcond_mask_<mode><sseintvecmodelower>): Ditto. > (vcond_mask_v1tiv1ti): Ditto. > (vcond_mask_<mode><sseintvecmodelower>): Ditto. > * config/i386/i386.md (mov<mode>cc): Ditto for operands[2] and > operands[3]. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/blendv-to-maxmin.c: New test. > * gcc.target/i386/blendv-to-pand.c: New test. > --- > gcc/config/i386/i386-expand.cc | 6 ++++++ > gcc/config/i386/i386.md | 4 ++-- > gcc/config/i386/predicates.md | 14 ++++++++++++++ > gcc/config/i386/sse.md | 10 +++++----- > gcc/testsuite/gcc.target/i386/blendv-to-maxmin.c | 12 ++++++++++++ > gcc/testsuite/gcc.target/i386/blendv-to-pand.c | 16 ++++++++++++++++ > 6 files changed, 55 insertions(+), 7 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/blendv-to-maxmin.c > create mode 100644 gcc/testsuite/gcc.target/i386/blendv-to-pand.c > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > index cdfd94d3c73..ef867fb4f82 100644 > --- a/gcc/config/i386/i386-expand.cc > +++ b/gcc/config/i386/i386-expand.cc > @@ -4138,6 +4138,10 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code > code, rtx cmp_op0, > return false; > > mode = GET_MODE (dest); > + if (immediate_operand (if_false, mode)) > + if_false = force_reg (mode, if_false); > + if (immediate_operand (if_true, mode)) > + if_true = force_reg (mode, if_true); > > /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, > but MODE may be a vector mode and thus not appropriate. */ > @@ -4687,6 +4691,8 @@ ix86_expand_fp_movcc (rtx operands[]) > compare_op = ix86_expand_compare (NE, tmp, const0_rtx); > } > > + operands[2] = force_reg (mode, operands[2]); > + operands[3] = force_reg (mode, operands[3]); > emit_insn (gen_rtx_SET (operands[0], > gen_rtx_IF_THEN_ELSE (mode, compare_op, > operands[2], operands[3]))); > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index f7f790d2aeb..45c2fe5a58a 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -26576,8 +26576,8 @@ (define_expand "mov<mode>cc" > [(set (match_operand:X87MODEF 0 "register_operand") > (if_then_else:X87MODEF > (match_operand 1 "comparison_operator") > - (match_operand:X87MODEF 2 "register_operand") > - (match_operand:X87MODEF 3 "register_operand")))] > + (match_operand:X87MODEF 2 "nonimm_or_0_or_1s_operand") > + (match_operand:X87MODEF 3 "nonimm_or_0_operand")))] > "(TARGET_80387 && TARGET_CMOVE) > || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" > "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md > index 3d3848c0a22..4b23e18eaf4 100644 > --- a/gcc/config/i386/predicates.md > +++ b/gcc/config/i386/predicates.md > @@ -1267,6 +1267,14 @@ (define_predicate "vector_or_const_vector_operand" > (match_operand 0 "vector_memory_operand") > (match_code "const_vector"))) > > +; Return true when OP is register_operand, vector_memory_operand, > +; const_vector zero or const_vector all ones. > +(define_predicate "vector_or_0_or_1s_operand" > + (ior (match_operand 0 "register_operand") > + (match_operand 0 "vector_memory_operand") > + (match_operand 0 "const0_operand") > + (match_operand 0 "int_float_vector_all_ones_operand"))) > + > (define_predicate "bcst_mem_operand" > (and (match_code "vec_duplicate") > (and (match_test "TARGET_AVX512F") > @@ -1333,6 +1341,12 @@ (define_predicate "nonimm_or_0_operand" > (ior (match_operand 0 "nonimmediate_operand") > (match_operand 0 "const0_operand"))) > > +; Return true when OP is a nonimmediate or zero or all ones. > +(define_predicate "nonimm_or_0_or_1s_operand" > + (ior (match_operand 0 "nonimmediate_operand") > + (match_operand 0 "const0_operand") > + (match_operand 0 "int_float_vector_all_ones_operand"))) > + > ;; Return true for RTX codes that force SImode address. > (define_predicate "SImode_address_operand" > (match_code "subreg,zero_extend,and")) > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index ed5ac1abe80..aa192993b50 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -5138,7 +5138,7 @@ (define_mode_iterator VI_256_AVX2 [(V32QI > "TARGET_AVX2") (V16HI "TARGET_AVX2") > (define_expand "vcond_mask_<mode><sseintvecmodelower>" > [(set (match_operand:VI_256_AVX2 0 "register_operand") > (vec_merge:VI_256_AVX2 > - (match_operand:VI_256_AVX2 1 "nonimmediate_operand") > + (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand") > (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand") > (match_operand:<sseintvecmode> 3 "register_operand")))] > "TARGET_AVX" > @@ -5151,7 +5151,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" > (define_expand "vcond_mask_<mode><sseintvecmodelower>" > [(set (match_operand:VI_128 0 "register_operand") > (vec_merge:VI_128 > - (match_operand:VI_128 1 "vector_operand") > + (match_operand:VI_128 1 "vector_or_0_or_1s_operand") > (match_operand:VI_128 2 "nonimm_or_0_operand") > (match_operand:<sseintvecmode> 3 "register_operand")))] > "TARGET_SSE2" > @@ -5164,7 +5164,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" > (define_expand "vcond_mask_v1tiv1ti" > [(set (match_operand:V1TI 0 "register_operand") > (vec_merge:V1TI > - (match_operand:V1TI 1 "vector_operand") > + (match_operand:V1TI 1 "vector_or_0_or_1s_operand") > (match_operand:V1TI 2 "nonimm_or_0_operand") > (match_operand:V1TI 3 "register_operand")))] > "TARGET_SSE2" > @@ -5177,7 +5177,7 @@ (define_expand "vcond_mask_v1tiv1ti" > (define_expand "vcond_mask_<mode><sseintvecmodelower>" > [(set (match_operand:VF_256 0 "register_operand") > (vec_merge:VF_256 > - (match_operand:VF_256 1 "nonimmediate_operand") > + (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand") > (match_operand:VF_256 2 "nonimm_or_0_operand") > (match_operand:<sseintvecmode> 3 "register_operand")))] > "TARGET_AVX" > @@ -5190,7 +5190,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" > (define_expand "vcond_mask_<mode><sseintvecmodelower>" > [(set (match_operand:VF_128 0 "register_operand") > (vec_merge:VF_128 > - (match_operand:VF_128 1 "vector_operand") > + (match_operand:VF_128 1 "vector_or_0_or_1s_operand") > (match_operand:VF_128 2 "nonimm_or_0_operand") > (match_operand:<sseintvecmode> 3 "register_operand")))] > "TARGET_SSE" > diff --git a/gcc/testsuite/gcc.target/i386/blendv-to-maxmin.c > b/gcc/testsuite/gcc.target/i386/blendv-to-maxmin.c > new file mode 100644 > index 00000000000..042eb7d8f24 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/blendv-to-maxmin.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=x86-64-v3 -O2 -mfpmath=sse" } */ > +/* { dg-final { scan-assembler-times "vmaxsd" 1 } } */ > + > +double > +foo (double a) > +{ > + if (a > 0.0) > + return a; > + return 0.0; > +} > + > diff --git a/gcc/testsuite/gcc.target/i386/blendv-to-pand.c > b/gcc/testsuite/gcc.target/i386/blendv-to-pand.c > new file mode 100644 > index 00000000000..2896a2b2c95 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/blendv-to-pand.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=x86-64-v3 -mfpmath=sse" } */ > +/* { dg-final { scan-assembler-not "vblendv" } } */ > + > +void > +foo (float* a, float* b, float* c, float* __restrict d, int n) > +{ > + for (int i = 0; i != n; i++) > + { > + c[i] *= 2.0f; > + if (a[i] > b[i]) > + d[i] = 0.0f; > + else > + d[i] = c[i]; > + } > +} > -- > 2.34.1 >
-- BR, Hongtao