On Wed, Jan 31, 2024 at 12:50 PM Robin Dapp <rdapp....@gmail.com> wrote: > > Hi, > > in PR113607 we see an invalid fold of > > _429 = .COND_SHL (mask_patt_205.47_276, vect_cst__262, vect_cst__262, { 0, > ... }); > vect_prephitmp_129.51_282 = _429; > vect_iftmp.55_287 = VEC_COND_EXPR <mask_patt_209.54_286, > vect_prephitmp_129.51_282, vect_cst__262>; > > to > > Applying pattern match.pd:9607, gimple-match-10.cc:3817 > gimple_simplified to vect_iftmp.55_287 = .COND_SHL (mask_patt_205.47_276, > vect_cst__262, vect_cst__262, { 0, ... }); > > where we essentially use COND_SHL's else instead of VEC_COND_EXPR's. > > This patch adjusts the corresponding match.pd pattern and makes it only > match when the else values are the same. > > That, however, causes the exact test case to fail which this pattern > was introduced for. XFAIL it for now. > > Bootstrapped and regtested on x86. Regtested on riscv. aarch64 > is still running.
OK. Thanks, Richard. > Regards > Robin > > > gcc/ChangeLog: > > PR middle-end/113607 > > * match.pd: Make sure else values match when folding a > vec_cond into a conditional operation. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/sve/pre_cond_share_1.c: XFAIL. > * gcc.target/riscv/rvv/autovec/pr113607-run.c: New test. > * gcc.target/riscv/rvv/autovec/pr113607.c: New test. > --- > gcc/match.pd | 8 +-- > .../gcc.target/aarch64/sve/pre_cond_share_1.c | 2 +- > .../riscv/rvv/autovec/pr113607-run.c | 4 ++ > .../gcc.target/riscv/rvv/autovec/pr113607.c | 49 +++++++++++++++++++ > 4 files changed, 58 insertions(+), 5 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607-run.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index e42ecaf9ec7..7c391a8fe20 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -9592,18 +9592,18 @@ and, > > /* Detect simplification for vector condition folding where > > - c = mask1 ? (masked_op mask2 a b) : b > + c = mask1 ? (masked_op mask2 a b els) : els > > into > > - c = masked_op (mask1 & mask2) a b > + c = masked_op (mask1 & mask2) a b els > > where the operation can be partially applied to one operand. */ > > (for cond_op (COND_BINARY) > (simplify > (vec_cond @0 > - (cond_op:s @1 @2 @3 @4) @3) > + (cond_op:s @1 @2 @3 @4) @4) > (cond_op (bit_and @1 @0) @2 @3 @4))) > > /* And same for ternary expressions. */ > @@ -9611,7 +9611,7 @@ and, > (for cond_op (COND_TERNARY) > (simplify > (vec_cond @0 > - (cond_op:s @1 @2 @3 @4 @5) @4) > + (cond_op:s @1 @2 @3 @4 @5) @5) > (cond_op (bit_and @1 @0) @2 @3 @4 @5))) > > /* For pointers @0 and @2 and nonnegative constant offset @1, look for > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c > index b51d0f298ea..e4f754d739c 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c > @@ -129,4 +129,4 @@ fasten_main(size_t group, size_t ntypes, size_t nposes, > size_t natlig, size_t na > } > > /* { dg-final { scan-tree-dump-times {\.COND_MUL} 1 "optimized" } } */ > -/* { dg-final { scan-tree-dump-times {\.VCOND} 1 "optimized" } } */ > +/* { dg-final { scan-tree-dump-times {\.VCOND} 1 "optimized" { xfail *-*-* } > } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607-run.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607-run.c > new file mode 100644 > index 00000000000..06074767ce5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607-run.c > @@ -0,0 +1,4 @@ > +/* { dg-do run { target { riscv_v && rv64 } } } */ > +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fdump-tree-optimized" } */ > + > +#include "pr113607.c" > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607.c > new file mode 100644 > index 00000000000..70a93665497 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607.c > @@ -0,0 +1,49 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fdump-tree-optimized" } */ > + > +struct { > + signed b; > +} c, d = {6}; > + > +short e, f; > +int g[1000]; > +signed char h; > +int i, j; > +long k, l; > + > +long m(long n, long o) { > + if (n < 1 && o == 0) > + return 0; > + return n; > +} > + > +static int p() { > + long q = 0; > + int a = 0; > + for (; e < 2; e += 1) > + g[e * 7 + 1] = -1; > + for (; h < 1; h += 1) { > + k = g[8] || f; > + l = m(g[f * 7 + 1], k); > + a = l; > + j = a < 0 || g[f * 7 + 1] < 0 || g[f * 7 + 1] >= 32 ? a : a << g[f * 7 + > 1]; > + if (j) > + ++q; > + } > + if (q) > + c = d; > + return i; > +} > + > +int main() { > + p(); > + if (c.b != 6) > + __builtin_abort (); > +} > + > +/* We must not fold VEC_COND_EXPR into COND_SHL. > + Therefore, make sure that we still have 2/4 VCOND_MASKs with real else > + value. */ > + > +/* { dg-final { scan-tree-dump-times { = \.VCOND_MASK.\([a-z0-9\._]+, > [a-z0-9\._\{\}, ]+, [0-9\.\{\},]+\);} 0 "optimized" } } */ > +/* { dg-final { scan-tree-dump-times { = \.VCOND_MASK.\([a-z0-9\._]+, > [a-z0-9\._\{\}, ]+, [a-z0-9\._]+\);} 4 "optimized" } } */ > -- > 2.43.0