This patch implements the addv, subv, and mulv patterns for signed integers.
Bootstrapped and regression tested on s390x (IBM z14). Committed to mainline. gcc/ChangeLog: 2019-07-24 Andreas Krebbel <kreb...@linux.ibm.com> * config/s390/predicates.md (addv_const_operand): New predicate. * config/s390/s390-modes.def (CCO): New condition code mode. * config/s390/s390.c (s390_match_ccmode_set): Handle E_CCOmode. (s390_branch_condition_mask): Likewise. * config/s390/s390.md ("addv<mode>4", "subv<mode>4") ("mulv<mode>4"): New expanders. ("*addv<mode>3_ccoverflow", "*addv<mode>3_ccoverflow_const") ("*subv<mode>3_ccoverflow", "*mulv<mode>3_ccoverflow"): New pattern definitions. gcc/testsuite/ChangeLog: 2019-07-24 Andreas Krebbel <kreb...@linux.ibm.com> * gcc.target/s390/addsub-signed-overflow-1.c: New test. * gcc.target/s390/addsub-signed-overflow-2.c: New test. * gcc.target/s390/mul-signed-overflow-1.c: New test. * gcc.target/s390/mul-signed-overflow-2.c: New test. --- gcc/config/s390/predicates.md | 6 + gcc/config/s390/s390-modes.def | 14 ++ gcc/config/s390/s390.c | 10 ++ gcc/config/s390/s390.md | 144 +++++++++++++++++++++ .../gcc.target/s390/addsub-signed-overflow-1.c | 81 ++++++++++++ .../gcc.target/s390/addsub-signed-overflow-2.c | 80 ++++++++++++ .../gcc.target/s390/mul-signed-overflow-1.c | 56 ++++++++ .../gcc.target/s390/mul-signed-overflow-2.c | 56 ++++++++ 8 files changed, 447 insertions(+) create mode 100644 gcc/testsuite/gcc.target/s390/addsub-signed-overflow-1.c create mode 100644 gcc/testsuite/gcc.target/s390/addsub-signed-overflow-2.c create mode 100644 gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c create mode 100644 gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md index 4d2f8b2..fa15c05 100644 --- a/gcc/config/s390/predicates.md +++ b/gcc/config/s390/predicates.md @@ -585,3 +585,9 @@ return s390_valid_shift_count (op, 0); } ) + +; An integer constant which can be used in a signed add with overflow +; pattern without being reloaded. +(define_predicate "addv_const_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= -32768 && INTVAL (op) <= 32767"))) diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def index 88c8673..7b7c114 100644 --- a/gcc/config/s390/s390-modes.def +++ b/gcc/config/s390/s390-modes.def @@ -31,6 +31,8 @@ FLOAT_MODE (TF, 16, ieee_quad_format); Condition Codes + CC0 CC1 CC2 CC3 + Check for zero CCZ: EQ NE NE NE @@ -57,6 +59,10 @@ CCA: EQ LT GT Overflow CCAP: EQ LT GT LT (AGHI, AHI) CCAN: EQ LT GT GT (AGHI, AHI) +Condition codes for overflow checking resulting from signed adds/subs/mults + +CCO: EQ EQ EQ NE (AGR, AGHI, SGR, MSC, ...) + Condition codes of unsigned adds and subs CCL: EQ NE EQ NE (ALGF/R, ALG/R, AL/R/Y, @@ -98,6 +104,13 @@ If you know whether the used constant is positive or negative you can predict the sign of the result even in case of an overflow. +CCO + +This mode is used to check whether there was an overflow condition in +a signed add, sub, or mul operation. See (addv<mode>4, subv<mode>4, +mulv<mode>4 patterns). + + CCT, CCT1, CCT2, CCT3 If bits of an integer masked with an AND instruction are checked, the test under @@ -204,6 +217,7 @@ CC_MODE (CCZ1); CC_MODE (CCA); CC_MODE (CCAP); CC_MODE (CCAN); +CC_MODE (CCO); CC_MODE (CCL); CC_MODE (CCL1); CC_MODE (CCL2); diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 75b0b5b..24b8a5c 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -1378,6 +1378,7 @@ s390_match_ccmode_set (rtx set, machine_mode req_mode) case E_CCSRmode: case E_CCUmode: case E_CCURmode: + case E_CCOmode: case E_CCLmode: case E_CCL1mode: case E_CCL2mode: @@ -2071,6 +2072,15 @@ s390_branch_condition_mask (rtx code) } break; + case E_CCOmode: + switch (GET_CODE (code)) + { + case EQ: return CC0 | CC1 | CC2; + case NE: return CC3; + default: return -1; + } + break; + case E_CCSmode: switch (GET_CODE (code)) { diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 94a7340..e4516f6 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -5961,6 +5961,83 @@ "agh\t%0,%2" [(set_attr "op_type" "RXY")]) + +; Jump to label OP3 if OP1 + OP2 results in a signed overflow + +; addv_const_operand accepts all constants which can be handled +; without reloads. These will be handled primarily by +; "*addv<mode>3_ccoverflow_const" which doesn't provide a register +; alternative. Hence we have to match the operand exactly. +; For immediates we have to avoid the SIGN_EXTEND around OP2. +(define_expand "addv<mode>4" + [(parallel + [(set (reg:CCO CC_REGNUM) + (compare:CCO (plus:<DBL> + (sign_extend:<DBL> (match_operand:GPR 1 "nonimmediate_operand")) + (match_dup 4)) + (sign_extend:<DBL> (plus:GPR (match_dup 1) + (match_operand:GPR 2 "general_operand"))))) + (set (match_operand:GPR 0 "nonimmediate_operand") + (plus:GPR (match_dup 1) (match_dup 2)))]) + (set (pc) + (if_then_else (ne (reg:CCO CC_REGNUM) (const_int 0)) + (label_ref (match_operand 3)) + (pc)))] + "" +{ + if (CONSTANT_P (operands[2]) + && !addv_const_operand (operands[2], GET_MODE (operands[2]))) + operands[2] = force_reg (<GPR:MODE>mode, operands[2]); + + if (GET_MODE (operands[2]) != VOIDmode) + operands[4] = gen_rtx_SIGN_EXTEND (<DBL>mode, operands[2]); + else + /* This is what CSE does when propagating a constant into the pattern. */ + operands[4] = simplify_unary_operation (SIGN_EXTEND, <GPR:DBL>mode, operands[2], <GPR:MODE>mode); +}) + +; ark, agrk, ar, ahi, ahik, aghik, a, ay, agr, aghi, ag, asi, agsi +(define_insn "*addv<mode>3_ccoverflow" + [(set (reg CC_REGNUM) + (compare (plus:<DBL> + (sign_extend:<DBL> (match_operand:GPR 1 "nonimmediate_operand" "%0,d,0,d,0,0,0")) + (sign_extend:<DBL> (match_operand:GPR 2 "general_operand" " d,d,K,K,R,T,C"))) + (sign_extend:<DBL> (plus:GPR (match_dup 1) (match_dup 2))))) + (set (match_operand:GPR 0 "nonimmediate_operand" "=d,d,d,d,d,d,S") + (plus:GPR (match_dup 1) (match_dup 2)))] + "s390_match_ccmode (insn, CCOmode)" + "@ + a<g>r\t%0,%2 + a<g>rk\t%0,%1,%2 + a<g>hi\t%0,%h2 + a<g>hik\t%0,%1,%h2 + a<g>\t%0,%2 + a<y>\t%0,%2 + a<g>si\t%0,%c2" + [(set_attr "op_type" "RR<E>,RRF,RI,RIE,RX<Y>,RXY,SIY") + (set_attr "cpu_facility" "*,z196,*,z196,*,longdisp,z10") + (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,*, + z10_super_E1,z10_super_E1,z10_super_E1")]) + +; ahi, aghi, ahik, aghik, asi, agsi +(define_insn "*addv<mode>3_ccoverflow_const" + [(set (reg CC_REGNUM) + (compare (plus:<DBL> + (sign_extend:<DBL> (match_operand:GPR 1 "nonimmediate_operand" "%0,d,0")) + (match_operand:<DBL> 2 "addv_const_operand" "K,K,C")) + (sign_extend:<DBL> (plus:GPR (match_dup 1) (match_dup 2))))) + (set (match_operand:GPR 0 "nonimmediate_operand" "=d,d,S") + (plus:GPR (match_dup 1) (match_dup 2)))] + "s390_match_ccmode (insn, CCOmode)" + "@ + a<g>hi\t%0,%h2 + a<g>hik\t%0,%1,%h2 + a<g>si\t%0,%c2" + [(set_attr "op_type" "RI,RIE,SIY") + (set_attr "cpu_facility" "*,z196,z10") + (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")]) + + ; ; add(tf|df|sf|td|dd)3 instruction pattern(s). ; @@ -6370,6 +6447,41 @@ "sgh\t%0,%2" [(set_attr "op_type" "RXY")]) +; Jump to label OP3 if OP1 - OP2 results in a signed overflow +(define_expand "subv<mode>4" + [(parallel + [(set (reg:CCO CC_REGNUM) + (compare:CCO (minus:<DBL> + (sign_extend:<DBL> (match_operand:GPR 1 "nonimmediate_operand")) + (sign_extend:<DBL> (match_operand:GPR 2 "nonimmediate_operand"))) + (sign_extend:<DBL> (minus:GPR (match_dup 1) (match_dup 2))))) + (set (match_operand:GPR 0 "nonimmediate_operand") + (minus:GPR (match_dup 1) (match_dup 2)))]) + (set (pc) + (if_then_else (ne (reg:CCO CC_REGNUM) (const_int 0)) + (label_ref (match_operand 3)) + (pc)))] + "") + +; sr, s, sy, sgr, sg, srk, sgrk +(define_insn "*subv<mode>3_ccoverflow" + [(set (reg CC_REGNUM) + (compare (minus:<DBL> + (sign_extend:<DBL> (match_operand:GPR 1 "nonimmediate_operand" "0,d,0,0")) + (sign_extend:<DBL> (match_operand:GPR 2 "nonimmediate_operand" "d,d,R,T"))) + (sign_extend:<DBL> (minus:GPR (match_dup 1) (match_dup 2))))) + (set (match_operand:GPR 0 "register_operand" "=d,d,d,d") + (minus:GPR (match_dup 1) (match_dup 2)))] + "s390_match_ccmode (insn, CCOmode)" + "@ + s<g>r\t%0,%2 + s<g>rk\t%0,%1,%2 + s<g>\t%0,%2 + s<y>\t%0,%2" + [(set_attr "op_type" "RR<E>,RRF,RX<Y>,RXY") + (set_attr "cpu_facility" "*,z196,*,longdisp") + (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")]) + ; ; sub(tf|df|sf|td|dd)3 instruction pattern(s). @@ -6888,6 +7000,38 @@ (set_attr "type" "imulsi") (set_attr "cpu_facility" "*,*,z10")]) +; Jump to label OP3 if OP1 * OP2 results in a signed overflow +(define_expand "mulv<mode>4" + [(parallel + [(set (reg:CCO CC_REGNUM) + (compare:CCO (mult:<DBL> + (sign_extend:<DBL> (match_operand:GPR 1 "register_operand")) + (sign_extend:<DBL> (match_operand:GPR 2 "nonimmediate_operand"))) + (sign_extend:<DBL> (mult:GPR (match_dup 1) (match_dup 2))))) + (set (match_operand:GPR 0 "register_operand") + (mult:GPR (match_dup 1) (match_dup 2)))]) + (set (pc) + (if_then_else (ne (reg:CCO CC_REGNUM) (const_int 0)) + (label_ref (match_operand 3)) + (pc)))] + "TARGET_Z14") + +; msrkc, msc, msgrkc, msgc +(define_insn "*mulv<mode>3_ccoverflow" + [(set (reg CC_REGNUM) + (compare (mult:<DBL> + (sign_extend:<DBL> (match_operand:GPR 1 "register_operand" "%d,0")) + (sign_extend:<DBL> (match_operand:GPR 2 "nonimmediate_operand" " d,T"))) + (sign_extend:<DBL> (mult:GPR (match_dup 1) (match_dup 2))))) + (set (match_operand:GPR 0 "register_operand" "=d,d") + (mult:GPR (match_dup 1) (match_dup 2)))] + "s390_match_ccmode (insn, CCOmode) && TARGET_Z14" + "@ + ms<g>rkc\t%0,%1,%2 + ms<g>c\t%0,%2" + [(set_attr "op_type" "RRF,RXY")]) + + ; ; umul instruction pattern(s). ; diff --git a/gcc/testsuite/gcc.target/s390/addsub-signed-overflow-1.c b/gcc/testsuite/gcc.target/s390/addsub-signed-overflow-1.c new file mode 100644 index 0000000..367dbcb --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/addsub-signed-overflow-1.c @@ -0,0 +1,81 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mzarch --save-temps" } */ + +#include <stddef.h> +#include <limits.h> + +int __attribute__((noinline,noclone)) +sadd (int a, int b, int *res) +{ + return __builtin_sadd_overflow(a, b, res); +} + +int __attribute__((noinline,noclone)) +ssub (int a, int b, int *res) +{ + return __builtin_ssub_overflow(a, b, res); +} + + +int __attribute__((noinline,noclone)) +saddl (long a, long b, long *res) +{ + return __builtin_saddl_overflow(a, b, res); +} + +int __attribute__((noinline,noclone)) +ssubl (long a, long b, long *res) +{ + return __builtin_ssubl_overflow(a, b, res); +} + + +int __attribute__((noinline,noclone)) +saddll (long long a, long long b, long long *res) +{ + return __builtin_saddll_overflow(a, b, res); +} + +int __attribute__((noinline,noclone)) +ssubll (long long a, long long b, long long *res) +{ + return __builtin_ssubll_overflow(a, b, res); +} + + +/* With the attribute at least main always uses the same instructions + regardless of the -march setting. This is necessary for the + scan-assembler-times directive below. */ +int __attribute__ ((target("arch=z10"))) +main () +{ + int ret = 0; + int result; + long lresult; + long long llresult; + + ret += !!sadd (INT_MAX, 1, &result); + ret += !!ssub (INT_MIN, 1, &result); + ret += !!saddl (LONG_MAX, 1, &lresult); + ret += !!ssubl (LONG_MIN, 1, &lresult); + ret += !!saddll (LLONG_MAX, 1, &llresult); + ret += !!ssubll (LLONG_MIN, 1, &llresult); + + if (ret != 6) + __builtin_abort (); + + return 0; +} + +/* Check that no compare or bitop instructions are emitted. */ +/* { dg-final { scan-assembler-not "\tcr" } } */ +/* { dg-final { scan-assembler-not "\txr" } } */ +/* { dg-final { scan-assembler-not "\tnr" } } */ +/* { dg-final { scan-assembler-not "\tcgr" } } */ +/* { dg-final { scan-assembler-not "\txgr" } } */ +/* { dg-final { scan-assembler-not "\tngr" } } */ +/* On 31 bit the long long variants use risbgn to merge the 32 bit + regs into a 64 bit reg. */ +/* { dg-final { scan-assembler-not "\trisbg" { target { lp64 } } } } */ +/* Just one for the ret != 6 comparison. */ +/* { dg-final { scan-assembler-times "ci" 1 } } */ diff --git a/gcc/testsuite/gcc.target/s390/addsub-signed-overflow-2.c b/gcc/testsuite/gcc.target/s390/addsub-signed-overflow-2.c new file mode 100644 index 0000000..230ad4a --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/addsub-signed-overflow-2.c @@ -0,0 +1,80 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mzarch --save-temps" } */ + +#include <stddef.h> +#include <limits.h> + +int __attribute__((noinline,noclone)) +sadd (int a, int *res) +{ + return __builtin_sadd_overflow(a, -1, res); +} + +int __attribute__((noinline,noclone)) +ssub (int a, int *res) +{ + return __builtin_ssub_overflow(a, -1, res); +} + + +int __attribute__((noinline,noclone)) +saddl (long a, long *res) +{ + return __builtin_saddl_overflow(a, -1, res); +} + +int __attribute__((noinline,noclone)) +ssubl (long a, long *res) +{ + return __builtin_ssubl_overflow(a, -1, res); +} + + +int __attribute__((noinline,noclone)) +saddll (long long a, long long *res) +{ + return __builtin_saddll_overflow(a, -1, res); +} + +int __attribute__((noinline,noclone)) +ssubll (long long a, long long *res) +{ + return __builtin_ssubll_overflow(a, -1, res); +} + +/* With the attribute at least main always uses the same instructions + regardless of the -march setting. This is necessary for the + scan-assembler-times directive below. */ +int __attribute__ ((target("arch=z10"))) +main () +{ + int ret = 0; + int result; + long lresult; + long long llresult; + + ret += !!sadd (INT_MIN, &result); + ret += !!ssub (INT_MIN, &result); + ret += !!saddl (LONG_MIN, &lresult); + ret += !!ssubl (LONG_MIN, &lresult); + ret += !!saddll (LLONG_MIN, &llresult); + ret += !!ssubll (LLONG_MIN, &llresult); + + if (ret != 3) + __builtin_abort (); + + return 0; +} + +/* Check that no compare or bitop instructions are emitted. */ +/* { dg-final { scan-assembler-not "\tcr" } } */ +/* { dg-final { scan-assembler-not "\txr" } } */ +/* { dg-final { scan-assembler-not "\tnr" } } */ +/* { dg-final { scan-assembler-not "\tcgr" } } */ +/* { dg-final { scan-assembler-not "\txgr" } } */ +/* { dg-final { scan-assembler-not "\tngr" } } */ +/* On 31 bit the long long variants use risbgn to merge the 32 bit + regs into a 64 bit reg. */ +/* { dg-final { scan-assembler-not "\trisbg" { target { lp64 } } } } */ +/* Just one for the ret != 3 comparison. */ +/* { dg-final { scan-assembler-times "ci" 1 } } */ diff --git a/gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c b/gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c new file mode 100644 index 0000000..b3db60f --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/mul-signed-overflow-1.c @@ -0,0 +1,56 @@ +/* { dg-do run } */ +/* z14 only because we need msrkc, msc, msgrkc, msgc */ +/* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */ + +#include <stddef.h> +#include <limits.h> + +int __attribute__((noinline,noclone)) +smul (int a, int b, int *res) +{ + return __builtin_smul_overflow(a, b, res); +} + +int __attribute__((noinline,noclone)) +smull (long a, long b, long *res) +{ + return __builtin_smull_overflow(a, b, res); +} + +int __attribute__((noinline,noclone)) +smulll (long long a, long long b, long long *res) +{ + return __builtin_smulll_overflow(a, b, res); +} + + +int +main () +{ + int ret = 0; + int result; + long lresult; + long long llresult; + + ret += !!smul (INT_MAX, 2, &result); + ret += !!smull (LONG_MAX, 2, &lresult); + ret += !!smulll (LLONG_MAX, 2, &llresult); + + if (ret != 3) + __builtin_abort (); + + return 0; +} + +/* Check that no compare or bitop instructions are emitted. */ +/* { dg-final { scan-assembler-not "\tcr" } } */ +/* { dg-final { scan-assembler-not "\txr" } } */ +/* { dg-final { scan-assembler-not "\tnr" } } */ +/* { dg-final { scan-assembler-not "\tcgr" } } */ +/* { dg-final { scan-assembler-not "\txgr" } } */ +/* { dg-final { scan-assembler-not "\tngr" } } */ +/* On 31 bit the long long variants use risbgn to merge the 32 bit + regs into a 64 bit reg. */ +/* { dg-final { scan-assembler-not "\trisbg" { target { lp64 } } } } */ +/* Just one for the ret != 3 comparison. */ +/* { dg-final { scan-assembler-times "ci" 1 } } */ diff --git a/gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c b/gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c new file mode 100644 index 0000000..76b3fa6 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/mul-signed-overflow-2.c @@ -0,0 +1,56 @@ +/* { dg-do run } */ +/* z14 only because we need msrkc, msc, msgrkc, msgc */ +/* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */ + +#include <stddef.h> +#include <limits.h> + +int __attribute__((noinline,noclone)) +smul (int a, int *res) +{ + return __builtin_smul_overflow(a, -1, res); +} + +int __attribute__((noinline,noclone)) +smull (long a, long *res) +{ + return __builtin_smull_overflow(a, -1, res); +} + +int __attribute__((noinline,noclone)) +smulll (long long a, long long *res) +{ + return __builtin_smulll_overflow(a, -1, res); +} + + +int +main () +{ + int ret = 0; + int result; + long lresult; + long long llresult; + + ret += !!smul (INT_MIN, &result); + ret += !!smull (LONG_MIN, &lresult); + ret += !!smulll (LLONG_MIN, &llresult); + + if (ret != 3) + __builtin_abort (); + + return 0; +} + +/* Check that no compare or bitop instructions are emitted. */ +/* { dg-final { scan-assembler-not "\tcr" } } */ +/* { dg-final { scan-assembler-not "\txr" } } */ +/* { dg-final { scan-assembler-not "\tnr" } } */ +/* { dg-final { scan-assembler-not "\tcgr" } } */ +/* { dg-final { scan-assembler-not "\txgr" } } */ +/* { dg-final { scan-assembler-not "\tngr" } } */ +/* On 31 bit the long long variants use risbgn to merge the 32 bit + regs into a 64 bit reg. */ +/* { dg-final { scan-assembler-not "\trisbg" { target { lp64 } } } } */ +/* Just one for the ret != 3 comparison. */ +/* { dg-final { scan-assembler-times "ci" 1 } } */ -- 2.7.4