On Thu, Dec 01, 2016 at 05:26:16PM +0100, Dominik Vogt wrote: > The following patch series adds some patterns for enhanced use of > the r[ixo]sbg instructions on S/390. > > - 0001-* fixes some test regressions with the existing risbg > patterns that are broken because of recent trunkt changes. > > - 0002-* adds new patterns for the r[xo]sbg instructions and an > SI mode variant of "extzv". > > For details, please chech the commit comments of the patches. All > patches have been bootstrapped on s390x biarch and regression > tested on s390x biarch and s390.
r[xo]sbg patch. Ciao Dominik ^_^ ^_^ -- Dominik Vogt IBM Germany
gcc/ChangeLog * config/s390/s390.md ("extzv<mode>"): Allow GPR mode and rename expander from "extzv" to "extzv<mode>". * ("*extzvdisi<clobbercc_or_nocc>"): New zero_extract pattern. * ("*<risbg_n>_<mode>_ior_and_ze"): New pattern. with a plain (zero_extract:SI). Allow GPR mode. * ("*extract1bit<mode><clobbercc_or_nocc>") ("*extract1bitdi<clobbercc_or_nocc>"): Rename pattern and switch to GPR mode. * ("*r<noxa>sbg_<mode>_ze"): New pattern. gcc/testsuite/ChangeLog * gcc.target/s390/risbg-ll-1.c (f1, f2, f23, f34, f35, f41): Updated tests. * (g1, g2): New tests. * gcc.target/s390/risbg-ll-2.c (f3, f4): Updated tests. * gcc.target/s390/risbg-ll-3.c (g1, g2): New tests. * gcc.target/s390/rosbg-1.c: Add tests for rosbg and rxsbg.
>From 9874c8afb7a61fb98af5b302df9866d25df16b30 Mon Sep 17 00:00:00 2001 From: Dominik Vogt <v...@linux.vnet.ibm.com> Date: Mon, 17 Oct 2016 10:06:16 +0100 Subject: [PATCH 2/2] S/390: New patterns for extzv, risbg and r[ox]sbg. The new extzv-patterns are necessary for the new r[ox]sbg patterns. The new risbg patterns are necessary for the new etzv patterns. --- gcc/config/s390/s390.md | 74 +++++++++++++++++++++------ gcc/testsuite/gcc.target/s390/risbg-ll-1.c | 36 +++++++++++--- gcc/testsuite/gcc.target/s390/risbg-ll-2.c | 6 +-- gcc/testsuite/gcc.target/s390/risbg-ll-3.c | 2 + gcc/testsuite/gcc.target/s390/rosbg-1.c | 80 ++++++++++++++++++++++++++++++ 5 files changed, 175 insertions(+), 23 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/rosbg-1.c diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 43b9371..15f0a41 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -3728,26 +3728,24 @@ ; extv instruction patterns ; -; FIXME: This expander needs to be converted from DI to GPR as well -; after resolving some issues with it. - -(define_expand "extzv" +(define_expand "extzv<mode>" [(parallel - [(set (match_operand:DI 0 "register_operand" "=d") - (zero_extract:DI - (match_operand:DI 1 "register_operand" "d") + [(set (match_operand:GPR 0 "register_operand" "=d") + (zero_extract:GPR + (match_operand:GPR 1 "register_operand" "d") (match_operand 2 "const_int_operand" "") ; size (match_operand 3 "const_int_operand" ""))) ; start (clobber (reg:CC CC_REGNUM))])] "TARGET_Z10" { - if (! EXTRACT_ARGS_IN_RANGE (INTVAL (operands[2]), INTVAL (operands[3]), 64)) + if (! EXTRACT_ARGS_IN_RANGE (INTVAL (operands[2]), INTVAL (operands[3]), + GET_MODE_BITSIZE (<MODE>mode))) FAIL; /* Starting with zEC12 there is risbgn not clobbering CC. */ if (TARGET_ZEC12) { emit_move_insn (operands[0], - gen_rtx_ZERO_EXTRACT (DImode, + gen_rtx_ZERO_EXTRACT (<MODE>mode, operands[1], operands[2], operands[3])); @@ -3787,6 +3785,19 @@ [(set_attr "op_type" "RIE") (set_attr "z10prop" "z10_super_E1")]) +(define_insn "*extzvdisi<clobbercc_or_nocc>" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extract:DI + (match_operand:SI 1 "register_operand" "d") + (match_operand 2 "const_int_operand" "") ; size + (match_operand 3 "const_int_operand" ""))) ; start + ] + "<z10_or_zEC12_cond> + && EXTRACT_ARGS_IN_RANGE (INTVAL (operands[2]), INTVAL (operands[3]), 32)" + "<risbg_n>\t%0,%1,64-%2,128+63,32+%3+%2" ; dst, src, start, end, shift + [(set_attr "op_type" "RIE") + (set_attr "z10prop" "z10_super_E1")]) + ; 64 bit: (a & -16) | ((b >> 8) & 15) (define_insn "*extzvdi<clobbercc_or_nocc>_lshiftrt" [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+d") @@ -3820,17 +3831,36 @@ [(set_attr "op_type" "RIE") (set_attr "z10prop" "z10_super_E1")]) +(define_insn "*<risbg_n>_<mode>_ior_and_ze" + [(set (match_operand:GPR 0 "register_operand" "=d") + (ior:GPR (and:GPR + (match_operand:GPR 1 "register_operand" "0") + (match_operand:GPR 2 "const_int_operand" "")) + (zero_extract:GPR + (match_operand:GPR 3 "register_operand" "d") + (match_operand 4 "const_int_operand" "") ; size + (match_operand 5 "const_int_operand" "")) ; start + ))] + "<z10_or_zEC12_cond> + && EXTRACT_ARGS_IN_RANGE (INTVAL (operands[4]), INTVAL (operands[5]), + GET_MODE_BITSIZE (<MODE>mode)) + && UINTVAL (operands[2]) == (~(0ULL) << UINTVAL (operands[4]))" + "<risbg_n>\t%0,%3,64-%4,63,<bitoff_plus>%4+%5" + [(set_attr "op_type" "RIE") + (set_attr "z10prop" "z10_super_E1")]) + ; ((int)foo >> 10) & 1; -(define_insn "*extract1bitdi<clobbercc_or_nocc>" +(define_insn "*extract1bit<mode><clobbercc_or_nocc>" [(set (match_operand:DI 0 "register_operand" "=d") - (ne:DI (zero_extract:DI - (match_operand:DI 1 "register_operand" "d") + (ne:DI (zero_extract:GPR + (match_operand:GPR 1 "register_operand" "d") (const_int 1) ; size (match_operand 2 "const_int_operand" "")) ; start (const_int 0)))] "<z10_or_zEC12_cond> - && EXTRACT_ARGS_IN_RANGE (1, INTVAL (operands[2]), 64)" - "<risbg_n>\t%0,%1,64-1,128+63,%2+1" ; dst, src, start, end, shift + && EXTRACT_ARGS_IN_RANGE (1, INTVAL (operands[2]), + GET_MODE_BITSIZE (<MODE>mode))" + "<risbg_n>\t%0,%1,63,128+63,<bitoff_plus>%2+1" ; dst, src, start, end, shift [(set_attr "op_type" "RIE") (set_attr "z10prop" "z10_super_E1")]) @@ -4221,6 +4251,22 @@ "r<noxa>sbg\t%0,%1,<bitoff_plus>%2,63,64-%2" [(set_attr "op_type" "RIE")]) +;; a = a | ((b >> const_int) & 15) +;; a = a ^ ((b >> const_int) & 15) +(define_insn "*r<noxa>sbg_<mode>_ze" + [(set (match_operand:GPR 0 "nonimmediate_operand" "=d") + (IXOR:GPR + (zero_extract:GPR (match_operand:GPR 1 "register_operand" "d") + (match_operand 2 "const_int_operand" "") ; size + (match_operand 3 "const_int_operand" "")) ; start + (match_operand:GPR 4 "nonimmediate_operand" "0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10 + && EXTRACT_ARGS_IN_RANGE (INTVAL (operands[2]), INTVAL (operands[3]), + GET_MODE_BITSIZE (<MODE>mode))" + "r<noxa>sbg\t%0,%1,64-%2,63,<bitoff_plus>%3+%2" + [(set_attr "op_type" "RIE")]) + ;; These two are generated by combine for s.bf &= val. ;; ??? For bitfields smaller than 32-bits, we wind up with SImode ;; shifts and ands, which results in some truly awful patterns diff --git a/gcc/testsuite/gcc.target/s390/risbg-ll-1.c b/gcc/testsuite/gcc.target/s390/risbg-ll-1.c index 17a9000..665dc71 100644 --- a/gcc/testsuite/gcc.target/s390/risbg-ll-1.c +++ b/gcc/testsuite/gcc.target/s390/risbg-ll-1.c @@ -16,7 +16,8 @@ // Test an extraction of bit 0 from a right-shifted value. i32 f1 (i32 v_foo) { - /* { dg-final { scan-assembler "f1:\n\trisbg\t%r2,%r2,64-1,128\\\+63,53\\\+1" } } */ + /* { dg-final { scan-assembler "f1:\n\trisbg\t%r2,%r2,64-1,128\\\+63,53\\\+1" { target { lp64 } } } } */ + /* { dg-final { scan-assembler "f1:\n\trisbg\t%r2,%r2,64-1,128\\\+63,32\\\+21\\\+1" { target { ! lp64 } } } } */ i32 v_shr = ((ui32)v_foo) >> 10; i32 v_and = v_shr & 1; return v_and; @@ -26,7 +27,7 @@ i32 f1 (i32 v_foo) i64 f2 (i64 v_foo) { /* { dg-final { scan-assembler "f2:\n\trisbg\t%r2,%r2,64-1,128\\\+63,53\\\+1" { target { lp64 } } } } */ - /* { dg-final { scan-assembler "f2:\n\trisbg\t%r3,%r3,64-1,128\\\+63,53\\\+1\n\tlhi\t%r2,0" { target { ! lp64 } } } } */ + /* { dg-final { scan-assembler "f2:\n\trisbg\t%r3,%r3,64-1,128\\\+63,32\\\+21\\\+1\n\tlhi\t%r2,0" { target { ! lp64 } } } } */ i64 v_shr = ((ui64)v_foo) >> 10; i64 v_and = v_shr & 1; return v_and; @@ -263,7 +264,7 @@ i64 f22 (i64 v_foo) i64 f23 (i64 v_foo) { /* { dg-final { scan-assembler "f23:\n\trisbg\t%r2,%r2,64-8,128\\\+63,54\\\+8" { target { lp64 } } } } */ - /* { dg-final { scan-assembler "f23:\n\trisbg\t%r3,%r3,64-8,128\\\+63,54\\\+8\n\tlhi\t%r2,0" { target { ! lp64 } } } } */ + /* { dg-final { scan-assembler "f23:\n\trisbg\t%r3,%r3,64-8,128\\\+63,32\\\+22\\\+8\n\tlhi\t%r2,0" { target { ! lp64 } } } } */ i64 v_shr = ((ui64)v_foo) >> 2; i64 v_and = v_shr & 255; return v_and; @@ -378,7 +379,8 @@ i64 f33 (i64 v_foo) // Test a case where the AND comes before a shift right. i32 f34 (i32 v_foo) { - /* { dg-final { scan-assembler "f34:\n\trisbg\t%r2,%r2,64-7,128\\\+63,48\\\+7" } } */ + /* { dg-final { scan-assembler "f34:\n\trisbg\t%r2,%r2,64-7,128\\\+63,48\\\+7" { target { lp64 } } } } */ + /* { dg-final { scan-assembler "f34:\n\trisbg\t%r2,%r2,64-7,128\\\+63,32\\\+16\\\+7" { target { ! lp64 } } } } */ i32 v_and = v_foo & 65535; i32 v_shl = ((ui32)v_and) >> 9; return v_shl; @@ -388,7 +390,7 @@ i32 f34 (i32 v_foo) i64 f35 (i64 v_foo) { /* { dg-final { scan-assembler "f35:\n\trisbg\t%r2,%r2,64-7,128\\\+63,48\\\+7" { target { lp64 } } } } */ - /* { dg-final { scan-assembler "f35:\n\trisbg\t%r3,%r3,64-7,128\\\+63,48\\\+7\n\tlhi\t%r2,0" { target { ! lp64 } } } } */ + /* { dg-final { scan-assembler "f35:\n\trisbg\t%r3,%r3,64-7,128\\\+63,32\\\+16\\\+7\n\tlhi\t%r2,0" { target { ! lp64 } } } } */ i64 v_and = v_foo & 65535; i64 v_shl = ((ui64)v_and) >> 9; return v_shl; @@ -454,7 +456,7 @@ i64 f40 (i64 v_foo, i64 *v_dest) i64 f41 (i32 v_a) { /* { dg-final { scan-assembler "f41:\n\trisbg\t%r2,%r2,64-28,128\\\+63,34\\\+28" { target { lp64 } } } } */ - /* { dg-final { scan-assembler "f41:\n\trisbg\t%r3,%r2,64-28,128\\\+63,34\\\+28\n\tlhi\t%r2,0" { target { ! lp64 } } } } */ + /* { dg-final { scan-assembler "f41:\n\trisbg\t%r3,%r2,64-28,128\\\+63,32\\\+2\\\+28\n\tlhi\t%r2,0" { target { ! lp64 } } } } */ i32 v_shl = v_a << 2; i32 v_shr = ((ui32)v_shl) >> 4; i64 v_ext = (ui64)v_shr; @@ -496,3 +498,25 @@ i32 f44 (i64 v_x) i32 v_and = v_conv & 10; return v_and; } + +// Check whether risbg is used to extract the lsb of an SI. +i64 g1 (i32 a) +{ + /* { dg-final { scan-assembler "g1:\n\trisbg\t%r2,%r2,63,128\\\+63,0" { target { lp64 } } } } */ + /* { dg-final { scan-assembler "g1:\n\trisbg\t%r3,%r2,63,128\\\+63,0" { target { ! lp64 } } } } */ + i64 b; + + b = !(i64)(a & 1); + return b; +} + +// Check whether risbg is used to extract the second lowest bit of an SI. +void g2_foo (i32, i32); +i32 g2 (i32 a, i32 *b) +{ + /* { dg-final { scan-assembler "g2:\n\(\t.*\n\)*\trisbg\t%r3,%r1,62,128\\\+62,0" } } */ + if (a) + g2_foo (0, *b & 2); + + return *b; +} diff --git a/gcc/testsuite/gcc.target/s390/risbg-ll-2.c b/gcc/testsuite/gcc.target/s390/risbg-ll-2.c index 3628192..0c3db1b 100644 --- a/gcc/testsuite/gcc.target/s390/risbg-ll-2.c +++ b/gcc/testsuite/gcc.target/s390/risbg-ll-2.c @@ -34,7 +34,7 @@ i64 f2 (i64 v_a, i64 v_b) // Test a case with two ANDs and a shift. i32 f3 (i32 v_a, i32 v_b) { - /* { dg-final { scan-assembler "f3:\n\trisbg\t%r2,%r3,64-4,63,4\\\+52" } } */ + /* { dg-final { scan-assembler "f3:\n\trisbg\t%r2,%r3,64-4,63,32\\\+4\\\+20" } } */ i32 v_anda = v_a & -16; i32 v_shr = ((ui32)v_b) >> 8; i32 v_andb = v_shr & 15; @@ -45,8 +45,8 @@ i32 f3 (i32 v_a, i32 v_b) // ...and again with i64. i64 f4 (i64 v_a, i64 v_b) { - /* { dg-final { scan-assembler "f4:\n\trisbg\t%r2,%r3,60,60\\\+4-1,128-60-4-8" { target { lp64 } } } } */ - /* { dg-final { scan-assembler "f4:\n\(\t.*\n\)*\trisbg\t%r5,%r5,64-4,128\\\+63,52\\\+4" { target { ! lp64 } } } } */ + /* { dg-final { scan-assembler "f4:\n\trisbg\t%r2,%r3,64-4,63,4\\\+52" { target { lp64 } } } } */ + /* { dg-final { scan-assembler "f4:\n\(\t.*\n\)*\trosbg\t%r3,%r5,64-4,63,52\\\+4" { target { ! lp64 } } } } */ i64 v_anda = v_a & -16; i64 v_shr = ((ui64)v_b) >> 8; i64 v_andb = v_shr & 15; diff --git a/gcc/testsuite/gcc.target/s390/risbg-ll-3.c b/gcc/testsuite/gcc.target/s390/risbg-ll-3.c index 838f1ff..4bdb224 100644 --- a/gcc/testsuite/gcc.target/s390/risbg-ll-3.c +++ b/gcc/testsuite/gcc.target/s390/risbg-ll-3.c @@ -5,6 +5,8 @@ /* { dg-do compile { target s390x-*-* } } */ /* { dg-options "-O3 -march=zEC12 -mzarch -fno-asynchronous-unwind-tables" } */ +#define i32 signed int +#define ui32 unsigned int #define i64 signed long long #define ui64 unsigned long long diff --git a/gcc/testsuite/gcc.target/s390/rosbg-1.c b/gcc/testsuite/gcc.target/s390/rosbg-1.c new file mode 100644 index 0000000..3c22794 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/rosbg-1.c @@ -0,0 +1,80 @@ +/* Test use of ROSBG and RXSBG. */ + +/* { dg-do compile { target s390x-*-* } } */ +/* { dg-options "-O3 -march=z10 -mzarch -fno-asynchronous-unwind-tables" } */ + +unsigned long long ior1 (unsigned long long v_a, unsigned long long v_b) +{ + /* { dg-final { scan-assembler "ior1:\n\trosbg\t%r2,%r3,64-4,63,1\\\+4" { target { lp64 } } } } */ + unsigned long long v_shift = (v_b >> 59); + unsigned long long v_and = (v_shift & 15); + unsigned long long v_or = v_a | v_and; + return v_or; +} + +unsigned int ior2 (unsigned int v_a, unsigned int v_b) +{ + /* { dg-final { scan-assembler "ior2:\n\trosbg\t%r2,%r3,64-4,63,32\\\+1\\\+4" } } */ + unsigned int v_shift = (v_b >> 27); + unsigned int v_and = (v_shift & 15); + unsigned int v_or = v_a | v_and; + return v_or; +} + +unsigned short ior3 (unsigned short v_a, unsigned short v_b) +{ + /* { dg-final { scan-assembler "ior3:\n\trosbg\t%r2,%r3,64-4,63,49\\\+4" { target { lp64 } } } } */ + /* { dg-final { scan-assembler "ior3:\n\trosbg\t%r2,%r3,64-4,63,32\\\+17\\\+4" { target { ! lp64 } } } } */ + unsigned short v_shift = (v_b >> 11); + unsigned short v_and = (v_shift & 15); + unsigned short v_or = v_a | v_and; + return v_or; +} + +unsigned char ior4 (unsigned char v_a, unsigned char v_b) +{ + /* { dg-final { scan-assembler "ior4:\n\trosbg\t%r2,%r3,64-4,63,57\\\+4" { target { lp64 } } } } */ + /* { dg-final { scan-assembler "ior4:\n\trosbg\t%r2,%r3,64-4,63,32\\\+25\\\+4" { target { ! lp64 } } } } */ + unsigned char v_shift = (v_b >> 3); + unsigned char v_and = (v_shift & 15); + unsigned char v_or = v_a | v_and; + return v_or; +} + +unsigned long long xor1 (unsigned long long v_a, unsigned long long v_b) +{ + /* { dg-final { scan-assembler "xor1:\n\trxsbg\t%r2,%r3,64-4,63,1\\\+4" { target { lp64 } } } } */ + unsigned long long v_shift = (v_b >> 59); + unsigned long long v_and = (v_shift & 15); + unsigned long long v_or = v_a ^ v_and; + return v_or; +} + +unsigned int xor2 (unsigned int v_a, unsigned int v_b) +{ + /* { dg-final { scan-assembler "xor2:\n\trxsbg\t%r2,%r3,64-4,63,32\\\+1\\\+4" } } */ + unsigned int v_shift = (v_b >> 27); + unsigned int v_and = (v_shift & 15); + unsigned int v_or = v_a ^ v_and; + return v_or; +} + +unsigned short xor3 (unsigned short v_a, unsigned short v_b) +{ + /* { dg-final { scan-assembler "xor3:\n\trxsbg\t%r2,%r3,64-4,63,49\\\+4" { target { lp64 } } } } */ + /* { dg-final { scan-assembler "xor3:\n\trxsbg\t%r2,%r3,64-4,63,32\\\+17\\\+4" { target { ! lp64 } } } } */ + unsigned short v_shift = (v_b >> 11); + unsigned short v_and = (v_shift & 15); + unsigned short v_or = v_a ^ v_and; + return v_or; +} + +unsigned char xor4 (unsigned char v_a, unsigned char v_b) +{ + /* { dg-final { scan-assembler "xor4:\n\trxsbg\t%r2,%r3,64-4,63,57\\\+4" { target { lp64 } } } } */ + /* { dg-final { scan-assembler "xor4:\n\trxsbg\t%r2,%r3,64-4,63,32\\\+25\\\+4" { target { ! lp64 } } } } */ + unsigned char v_shift = (v_b >> 3); + unsigned char v_and = (v_shift & 15); + unsigned char v_or = v_a ^ v_and; + return v_or; +} -- 2.3.0