Hi all, Currently when evaluating expressions like (a ? 24 : 25) we will move 24 and 25 into registers and perform a csel on them. This misses the opportunity to instead move just 24 into a register and then perform a csinc, saving us an instruction and a register use. Similarly for csneg and csinv.
This patch implements that idea by allowing such pairs of immediates in *cmov<mode>_insn and adding an early splitter that performs the necessary transformation. The testcase included in the patch demonstrates the kind of opportunities that are now picked up. With this patch I see about 9.6% more csinc instructions being generated for SPEC2006 and the generated code looks objectively better (i.e. fewer mov-immediates and slightly lower register pressure). Bootstrapped and tested on aarch64. Ok for trunk? Thanks, Kyrill 2015-07-10 Kyrylo Tkachov <kyrylo.tkac...@arm.com> * config/aarch64/aarch64.md (*cmov<mode>_insn): Move stricter check for operands 3 and 4 to pattern predicate. Allow immediates that can be expressed as csinc/csneg/csinv. New define_split. (*csinv3<mode>_insn): Rename to... (csinv3<mode>_insn): ... This. * config/aarch64/aarch64.h (AARCH64_IMMS_OK_FOR_CSNEG): New macro. (AARCH64_IMMS_OK_FOR_CSINC): Likewise. (AARCH64_IMMS_OK_FOR_CSINV): Likewise. * config/aarch64/aarch64.c (aarch64_imms_ok_for_cond_op_1): New function. (aarch64_imms_ok_for_cond_op): Likewise. * config/aarch64/aarch64-protos.h (aarch64_imms_ok_for_cond_op_1): Declare prototype. (aarch64_imms_ok_for_cond_op): Likewise. 2015-07-10 Kyrylo Tkachov <kyrylo.tkac...@arm.com> * gcc.target/aarch64/cond-op-imm_1.c: New test.
commit eed5af149229609215327b62b7b3b4018adc6f3f Author: Kyrylo Tkachov <kyrylo.tkac...@arm.com> Date: Wed Jul 8 10:22:20 2015 +0100 [AArch64] Improve csinc/csneg/csinv opportunities on immediates diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 4fe437f..6e3781e 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -254,6 +254,8 @@ bool aarch64_float_const_zero_rtx_p (rtx); bool aarch64_function_arg_regno_p (unsigned); bool aarch64_gen_movmemqi (rtx *); bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *); +bool aarch64_imms_ok_for_cond_op_1 (rtx, rtx); +bool aarch64_imms_ok_for_cond_op (rtx, rtx, machine_mode); bool aarch64_is_extend_from_extract (machine_mode, rtx, rtx); bool aarch64_is_long_call_p (rtx); bool aarch64_label_mentioned_p (rtx); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 0d81921..8babefb 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -3268,6 +3268,36 @@ aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) return aarch64_bitmask_imm (val, mode); } +/* Helper for aarch64_imms_ok_for_cond_op. */ + +bool +aarch64_imms_ok_for_cond_op_1 (rtx a, rtx b) +{ + return AARCH64_IMMS_OK_FOR_CSNEG (a, b) + || AARCH64_IMMS_OK_FOR_CSINC (a, b) + || AARCH64_IMMS_OK_FOR_CSINV (a, b); +} + +/* Return true if A and B are CONST_INT rtxes that can appear in + the two arms of an IF_THEN_ELSE used for a CSINC, CSNEG or CSINV + operation in mode MODE. This is used in the splitter below + *cmov<mode>_insn in aarch64.md. */ + +bool +aarch64_imms_ok_for_cond_op (rtx a, rtx b, machine_mode mode) +{ + if (!CONST_INT_P (a) || !CONST_INT_P (b)) + return false; + + /* No need to do smart splitting with constant 0. We can just do + normal csinc, csneg, csinv on {w,x}zr. */ + if (a == const0_rtx || b == const0_rtx) + return false; + + return aarch64_imms_ok_for_cond_op_1 (a, b) + || aarch64_imms_ok_for_cond_op_1 (b, a); +} + static bool aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 7c31376..e7aecd1 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -678,6 +678,15 @@ do { \ /* Maximum bytes moved by a single instruction (load/store pair). */ #define MOVE_MAX (UNITS_PER_WORD * 2) +/* Check if CONST_INTs A and B can be used as two arguments to CSNEG. */ +#define AARCH64_IMMS_OK_FOR_CSNEG(A, B) (INTVAL (A) == -INTVAL (B)) + +/* Check if CONST_INTs A and B can be used as two arguments to CSINC. */ +#define AARCH64_IMMS_OK_FOR_CSINC(A, B) (INTVAL (A) == (INTVAL (B) + 1)) + +/* Check if CONST_INTs A and B can be used as two arguments to CSINV. */ +#define AARCH64_IMMS_OK_FOR_CSINV(A, B) (INTVAL (A) == ~INTVAL (B)) + /* The base cost overhead of a memcpy call, for MOVE_RATIO and friends. */ #define AARCH64_CALL_RATIO 8 diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 1e343fa..358f89b 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -2848,10 +2848,14 @@ (define_insn "*cmov<mode>_insn" (if_then_else:ALLI (match_operator 1 "aarch64_comparison_operator" [(match_operand 2 "cc_register" "") (const_int 0)]) - (match_operand:ALLI 3 "aarch64_reg_zero_or_m1_or_1" "rZ,rZ,UsM,rZ,Ui1,UsM,Ui1") - (match_operand:ALLI 4 "aarch64_reg_zero_or_m1_or_1" "rZ,UsM,rZ,Ui1,rZ,UsM,Ui1")))] - "!((operands[3] == const1_rtx && operands[4] == constm1_rtx) - || (operands[3] == constm1_rtx && operands[4] == const1_rtx))" + (match_operand:ALLI 3 "aarch64_reg_or_imm" "rZ,rZ,UsM,rZ,Ui1,UsM,Ui1") + (match_operand:ALLI 4 "aarch64_reg_or_imm" "rZ,UsM,rZ,Ui1,rZ,UsM,Ui1")))] + "(aarch64_reg_zero_or_m1_or_1 (operands[3], <MODE>mode) + && aarch64_reg_zero_or_m1_or_1 (operands[4], <MODE>mode) + && !((operands[3] == const1_rtx && operands[4] == constm1_rtx) + || (operands[3] == constm1_rtx && operands[4] == const1_rtx))) + || (!reload_completed && (<MODE>mode == SImode || <MODE>mode == DImode) + && aarch64_imms_ok_for_cond_op (operands[3], operands[4], <MODE>mode))" ;; Final two alternatives should be unreachable, but included for completeness "@ csel\\t%<w>0, %<w>3, %<w>4, %m1 @@ -2864,6 +2868,55 @@ (define_insn "*cmov<mode>_insn" [(set_attr "type" "csel")] ) +;; This interacts with the *cmov<mode>_insn pattern above and is used to +;; optimize cases like: +;; mov xa, #imm1 +;; mov xb, #imm2 +;; csel xc, xa, xb, <cond> +;; into: +;; mov xa, #imm1 +;; cs{inc,inv,neg} xc, xa, xa, <cond> +;; when imm1 and imm2 have that relationship. +(define_split + [(set (match_operand:GPI 0 "register_operand" "") + (if_then_else:GPI + (match_operator 1 "aarch64_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:GPI 3 "const_int_operand" "") + (match_operand:GPI 4 "const_int_operand" "")))] + "!reload_completed + && aarch64_imms_ok_for_cond_op (operands[3], operands[4], <MODE>mode)" + [(const_int 0)] + { + + bool swap_p = !aarch64_imms_ok_for_cond_op_1 (operands[3], + operands[4]); + + if (swap_p) + std::swap (operands[3], operands[4]); + + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_move_insn (tmp, operands[4]); + + enum rtx_code code = GET_CODE (operands[1]); + if (swap_p) + code = REVERSE_CONDITION (code, GET_MODE (operands[2])); + + rtx comp = gen_rtx_fmt_ee (code, VOIDmode, operands[2], const0_rtx); + + if (AARCH64_IMMS_OK_FOR_CSINC (operands[3], operands[4])) + emit_insn (gen_csinc3<mode>_insn (operands[0], comp, tmp, tmp)); + else if (AARCH64_IMMS_OK_FOR_CSNEG (operands[3], operands[4])) + emit_insn (gen_csneg3<mode>_insn (operands[0], comp, tmp, tmp)); + else if (AARCH64_IMMS_OK_FOR_CSINV (operands[3], operands[4])) + emit_insn (gen_csinv3<mode>_insn (operands[0], comp, tmp, tmp)); + else + gcc_unreachable (); + + DONE; + } +) + ;; zero_extend version of above (define_insn "*cmovsi_insn_uxtw" [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r,r") @@ -2998,7 +3051,7 @@ (define_insn "csinc3<mode>_insn" [(set_attr "type" "csel")] ) -(define_insn "*csinv3<mode>_insn" +(define_insn "csinv3<mode>_insn" [(set (match_operand:GPI 0 "register_operand" "=r") (if_then_else:GPI (match_operand 1 "aarch64_comparison_operation" "") diff --git a/gcc/testsuite/gcc.target/aarch64/cond-op-imm_1.c b/gcc/testsuite/gcc.target/aarch64/cond-op-imm_1.c new file mode 100644 index 0000000..1cd1494 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/cond-op-imm_1.c @@ -0,0 +1,138 @@ +/* { dg-do run } */ +/* { dg-options "-save-temps -O2 -fno-inline" } */ + +extern void abort (void); + +#define N 30 +#define M 25089992 + +int +fooincsi (int a) +{ + return a ? N : (N + 1); +} + +/* { dg-final { scan-assembler "csinc\tw\[0-9\]*.*" } } */ + +int +foonegsi (int a) +{ + return a ? N : -N; +} + +/* { dg-final { scan-assembler "csneg\tw\[0-9\]*.*" } } */ + + +int +fooinvsi (int a) +{ + return a ? N : ~N; +} + +/* { dg-final { scan-assembler "csinv\tw\[0-9\]*.*" } } */ + +long +fooincdi (long a) +{ + return a ? N : (N + 1); +} + +long +largefooinc (long a) +{ + return a ? M : (M + 1); +} + +/* { dg-final { scan-assembler "csinc\tx\[0-9\]*.*" } } */ + +long +foonegdi (long a) +{ + return a ? N : -N; +} + +long +largefooneg (long a) +{ + return a ? M : -M; +} + +/* { dg-final { scan-assembler "csneg\tx\[0-9\]*.*" } } */ + +long +fooinvdi (long a) +{ + return a ? N : ~N; +} + +long +largefooinv (long a) +{ + return a ? M : ~M; +} + +/* { dg-final { scan-assembler "csinv\tx\[0-9\]*.*" } } */ + + +int +main (void) +{ + if (fooincsi (1) != N) + abort (); + + if (fooincsi (0) != (N + 1)) + abort (); + + if (foonegsi (1) != N) + abort (); + + if (foonegsi (0) != -N) + abort (); + + if (fooinvsi (1) != N) + abort (); + + if (fooinvsi (0) != ~N) + abort (); + + if (fooincdi (1) != N) + abort (); + + if (fooincdi (0) != (N + 1)) + abort (); + + if (foonegdi (1) != N) + abort (); + + if (foonegdi (0) != -N) + abort (); + + if (fooinvdi (1) != N) + abort (); + + if (fooinvdi (0) != ~N) + abort (); + + if (largefooinc (0) != (M + 1)) + abort (); + + if (largefooinv (0) != ~M) + abort (); + + if (largefooneg (0) != -M) + abort (); + + if (largefooinc (1) != M) + abort (); + + if (largefooinv (1) != M) + abort (); + + if (largefooneg (1) != M) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-not "csel\tx\[0-9\]*.*" } } */ +/* { dg-final { scan-assembler-not "csel\tw\[0-9\]*.*" } } */