On 3/1/21 5:00 PM, Stefan Schulze Frielinghaus wrote: > As of commit 3a6e3ad38a17a03ee0139b49a0946e7b9ded1eb1 expressions > x CMP y ? -1 : 0 are fold into x CMP y. Due to this we do not see > shifts anymore after expand in our testcases but comparisons. Thus > replace instructions vesraX by corresponding vchX. Keep testcases > vchX_{lt,gt} where only a relational comparison is done and no shift in > order to keep test coverage for vectorization.
The vcond-shift optimization verified by the testcase is currently implemented in s390_expand_vcond but due to the common code change we go the vec_cmp route now. So we probably should do the same also in s390_expand_vec_compare now. Perhaps like this ... it appears to fix the testcase for me: diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 9d2cee950d0b..9d9f5a0f6f4e 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -6562,6 +6562,7 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond, if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT) { + cmp_op2 = force_operand (cmp_op2, 0); switch (cond) { /* NE a != b -> !(a == b) */ @@ -6600,6 +6601,19 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond, } else { + /* Turn x < 0 into x >> (bits - ) */ + if (cond == LT && cmp_op2 == CONST0_RTX (mode)) + { + int shift = GET_MODE_BITSIZE (GET_MODE_INNER (mode)) - 1; + rtx res = expand_simple_binop (mode, ASHIFTRT, cmp_op1, + GEN_INT (shift), target, + 0, OPTAB_DIRECT); + if (res != target) + emit_move_insn (target, res); + return; + } + cmp_op2 = force_operand (cmp_op2, 0); + switch (cond) { /* NE: a != b -> !(a == b) */ diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index bc52211c55e5..c80d582a300d 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -1589,7 +1589,7 @@ [(set (match_operand:<TOINTVEC> 0 "register_operand" "") (match_operator:<TOINTVEC> 1 "vcond_comparison_operator" [(match_operand:V_HW 2 "register_operand" "") - (match_operand:V_HW 3 "register_operand" "")]))] + (match_operand:V_HW 3 "nonmemory_operand" "")]))] "TARGET_VX" { s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], operands[3]); Andreas > > gcc/testsuite/ChangeLog: > > * gcc.target/s390/vector/vcond-shift.c: Replace vesraX > instructions by corresponding vchX instructions. > --- > .../gcc.target/s390/vector/vcond-shift.c | 31 ++++++++++--------- > 1 file changed, 17 insertions(+), 14 deletions(-) > > diff --git a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c > b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c > index a6b4e97aa50..9e472aef960 100644 > --- a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c > +++ b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c > @@ -3,10 +3,13 @@ > /* { dg-do compile { target { s390*-*-* } } } */ > /* { dg-options "-O3 -march=z13 -mzarch" } */ > > -/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 6 } } */ > -/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 6 } } */ > -/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 6 } } */ > -/* { dg-final { scan-assembler-not "vzero\t*" } } */ > +/* { dg-final { scan-assembler-times "vzero\t" 9 } } */ > +/* { dg-final { scan-assembler-times "vchf\t" 6 } } */ > +/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,1" 2 } } */ > +/* { dg-final { scan-assembler-times "vchh\t" 6 } } */ > +/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,1" 2 } } */ > +/* { dg-final { scan-assembler-times "vchb\t" 6 } } */ > +/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,1" 2 } } */ > /* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 4 } } */ > /* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 4 } } */ > /* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 4 } } */ > @@ -15,19 +18,19 @@ > #define ITER(X) (2 * (16 / sizeof (X[1]))) > > void > -vesraf_div (int *x) > +vchf_vesraf_div (int *x) > { > int i; > int *xx = __builtin_assume_aligned (x, 8); > > /* Should expand to (xx + (xx < 0 ? 1 : 0)) >> 1 > - which in turn should get simplified to (xx + (xx >> 31)) >> 1. */ > + which in turn should get simplified to (xx - (xx < 0)) >> 1. */ > for (i = 0; i < ITER (xx); i++) > xx[i] = xx[i] / 2; > } > > void > -vesrah_div (short *x) > +vchh_vesrah_div (short *x) > { > int i; > short *xx = __builtin_assume_aligned (x, 8); > @@ -38,7 +41,7 @@ vesrah_div (short *x) > > > void > -vesrab_div (signed char *x) > +vchb_vesrab_div (signed char *x) > { > int i; > signed char *xx = __builtin_assume_aligned (x, 8); > @@ -50,7 +53,7 @@ vesrab_div (signed char *x) > > > int > -vesraf_lt (int *x) > +vchf_lt (int *x) > { > int i; > int *xx = __builtin_assume_aligned (x, 8); > @@ -60,7 +63,7 @@ vesraf_lt (int *x) > } > > int > -vesrah_lt (short *x) > +vchh_lt (short *x) > { > int i; > short *xx = __builtin_assume_aligned (x, 8); > @@ -70,7 +73,7 @@ vesrah_lt (short *x) > } > > int > -vesrab_lt (signed char *x) > +vchb_lt (signed char *x) > { > int i; > signed char *xx = __builtin_assume_aligned (x, 8); > @@ -82,7 +85,7 @@ vesrab_lt (signed char *x) > > > int > -vesraf_ge (int *x) > +vchf_ge (int *x) > { > int i; > int *xx = __builtin_assume_aligned (x, 8); > @@ -92,7 +95,7 @@ vesraf_ge (int *x) > } > > int > -vesrah_ge (short *x) > +vchh_ge (short *x) > { > int i; > short *xx = __builtin_assume_aligned (x, 8); > @@ -102,7 +105,7 @@ vesrah_ge (short *x) > } > > int > -vesrab_ge (signed char *x) > +vchb_ge (signed char *x) > { > int i; > signed char *xx = __builtin_assume_aligned (x, 8); >