For sub-int types (short, char), the sign/zero extension between add+1 and csel prevents combine from matching the csinc3 pattern.
Add four peephole2 patterns that sink the extension past the conditional select, exposing the cinc opportunity: Patterns A and B handle the simple case with a CC-mode compare (4-insn window, saves 1 insn). Both instruction orderings (cmp/add/extend/csel and add/cmp/extend/csel) are handled. Before: After: sxth w0, w0 sxth w0, w0 cmp w0, w1 cmp w0, w1 add w1, w0, #1 csinc w0, w0, w0, ge sxth w1, w1 sxth w0, w0 csel w0, w0, w1, ge Patterns C and D handle the case where the compare uses CC_SWP mode with the sign/zero-extend folded into the compare instruction (5-insn window, saves 2 insns). This occurs when the value being compared was produced by a preceding arithmetic operation (e.g. val += other) rather than arriving directly as a function argument. Before: After: add w1, w0, w1 add w1, w0, w1 sxth w0, w1 cmp w2, w1, sxth cmp w2, w1, sxth csinc w0, w1, w1, le add w1, w1, 1 sxth w0, w0 sxth w1, w1 csel w0, w0, w1, le Co-authored-by: Konstantinos Eleftheriou <[email protected]> gcc/ChangeLog: * config/aarch64/aarch64.md: Add peephole2 patterns to sink sign/zero extension past conditional select to expose csinc3. gcc/testsuite/ChangeLog: * gcc.target/aarch64/csinc-4.c: New test. * gcc.target/aarch64/csinc-4-neg.c: New test. * gcc.target/aarch64/csinc-4-run.c: New test. diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index d1f2873f208b..9973501651f7 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1122,6 +1122,10 @@ void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); bool aarch64_gen_adjusted_ldpstp (rtx *, bool, machine_mode, RTX_CODE); void aarch64_finish_ldpstp_peephole (rtx *, bool, enum rtx_code = (enum rtx_code)0); +bool aarch64_peep_cinc_extend_p (rtx *); +bool aarch64_peep_cinc_extend_swp_p (rtx *); +void aarch64_emit_cinc_extend (machine_mode, rtx, rtx, bool, rtx, rtx, + rtx, machine_mode, machine_mode, rtx_code); void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx); void aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index f2ecb0ee8cb7..dec208d6d214 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -29890,6 +29890,101 @@ aarch64_finish_ldpstp_peephole (rtx *operands, bool load_p, enum rtx_code code) } } +/* Predicate for peephole2 patterns A and B (CC-mode, 4-insn window). + Returns true when the operands satisfy the conditions for sinking a + sign/zero extension past a csel to expose a cinc. Operand roles: + 0 add destination (must differ from source 1) + 1 value being compared and incremented + 2 extend destination + 4 csel destination + 7 narrow source of the extend (must alias operand 0) + 8/9 csel arms (one must be operand 1, the other operand 2) */ + +bool +aarch64_peep_cinc_extend_p (rtx *operands) +{ + return (!rtx_equal_p (operands[0], operands[1]) + && true_regnum (operands[7]) == true_regnum (operands[0]) + && (rtx_equal_p (operands[0], operands[4]) + || peep2_reg_dead_p (4, operands[0])) + && (rtx_equal_p (operands[2], operands[4]) + || peep2_reg_dead_p (4, operands[2])) + && ((rtx_equal_p (operands[8], operands[1]) + && rtx_equal_p (operands[9], operands[2])) + || (rtx_equal_p (operands[8], operands[2]) + && rtx_equal_p (operands[9], operands[1])))); +} + +/* Predicate for peephole2 patterns C and D (CC_SWP-mode, 5-insn window). + Returns true when the operands satisfy the conditions for sinking a + sign/zero extension past a csel to expose a cinc. Operand roles: + 0 add destination + 1 value being incremented (narrow source must alias via operand 6) + 2 first extend destination + 4 csel destination + 6 narrow source of the first extend (must alias operand 1) + 7 second extend destination + 8/9 csel arms (one must be operand 2, the other operand 7) + 10 narrow source of the second extend (must alias operand 0) */ + +bool +aarch64_peep_cinc_extend_swp_p (rtx *operands) +{ + return (true_regnum (operands[1]) == true_regnum (operands[6]) + && true_regnum (operands[10]) == true_regnum (operands[0]) + && (rtx_equal_p (operands[0], operands[4]) + || peep2_reg_dead_p (5, operands[0])) + && (rtx_equal_p (operands[2], operands[4]) + || peep2_reg_dead_p (5, operands[2])) + && (rtx_equal_p (operands[7], operands[4]) + || peep2_reg_dead_p (5, operands[7])) + && ((rtx_equal_p (operands[8], operands[2]) + && rtx_equal_p (operands[9], operands[7])) + || (rtx_equal_p (operands[8], operands[7]) + && rtx_equal_p (operands[9], operands[2])))); +} + +/* Helper for the four peephole2 patterns that sink a sign/zero extension + past a conditional select to expose a cinc opportunity (patterns A-D in + aarch64.md). + + Emits three instructions: + (1) A compare that sets CC_REG (in mode CC_MODE) from COMPARE_LHS and + COMPARE_RHS. + (2) A csinc that writes BASE or BASE+1 into RESULT, using a condition + derived from COND_OP. If MOD_IS_TRUE is false the condition is + reversed so that the increment is taken on the originally-true arm. + (3) A sign/zero extension (EXT_CODE) of the narrow view (SHORT_MODE) + of RESULT back into RESULT (in GPI_MODE). + + CC_MODE is CCmode for patterns A/B and CC_SWPmode for patterns C/D. */ + +void +aarch64_emit_cinc_extend (machine_mode cc_mode, + rtx compare_lhs, rtx compare_rhs, + bool mod_is_true, + rtx cond_op, rtx result, rtx base, + machine_mode gpi_mode, machine_mode short_mode, + rtx_code ext_code) +{ + rtx cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); + enum rtx_code code = GET_CODE (cond_op); + if (!mod_is_true) + code = reverse_condition (code); + rtx cond = gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx); + + emit_insn (gen_rtx_SET (cc_reg, + gen_rtx_COMPARE (cc_mode, compare_lhs, + compare_rhs))); + if (gpi_mode == SImode) + emit_insn (gen_csinc3si_insn (result, cond, base, base)); + else + emit_insn (gen_csinc3di_insn (result, cond, base, base)); + + rtx narrow = gen_lowpart (short_mode, result); + convert_move (result, narrow, ext_code == ZERO_EXTEND); +} + /* Taking X and Y to be HOST_WIDE_INT pointers, return the result of a comparison between the two. */ int diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index e44b1cd9eefa..6195bc3aa6f6 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -5033,6 +5033,148 @@ (define_insn "csinc3<mode>_insn" [(set_attr "type" "csel")] ) +;; Sink sign/zero extension past conditional select to expose cinc. +;; For sub-int types, the extension between add+1 and csel prevents +;; combine from matching csinc3. These peephole2 patterns reorder to: +;; cmp, cinc, extend +;; saving one or two instructions. +;; +;; The unchanged arm is already within SHORT range (sign/zero-extended +;; on function entry or by a prior narrowing op), so applying +;; extend(truncate(x)) after cinc is a no-op on that arm. +;; +;; Patterns A and B handle the simple case with a CC-mode compare +;; (4-insn window, saves 1 insn). +;; +;; Patterns C and D handle the case where the compare uses CC_SWP mode +;; with the extend folded into the compare instruction (5-insn window, +;; saves 2 insns). This occurs when the value being compared was +;; produced by a preceding arithmetic operation (e.g. val += other) +;; rather than arriving as a function argument. +;; +;; Pattern A: cmp, add, extend, csel -> cmp, cinc, extend +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:GPI 1 "register_operand") + (match_operand:GPI 3 "aarch64_plus_operand"))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_dup 1) + (const_int 1))) + (set (match_operand:GPI 2 "register_operand") + (ANY_EXTEND:GPI (match_operand:SHORT 7 "register_operand"))) + (set (match_operand:GPI 4 "register_operand") + (if_then_else:GPI + (match_operator 5 "aarch64_comparison_operator" + [(reg:CC CC_REGNUM) (const_int 0)]) + (match_operand:GPI 8 "register_operand") + (match_operand:GPI 9 "register_operand")))] + "aarch64_peep_cinc_extend_p (operands)" + [(const_int 0)] + { + rtx_code ext_code = '<ANY_EXTEND:su>' == 's' ? SIGN_EXTEND : ZERO_EXTEND; + bool mod_is_true = rtx_equal_p (operands[8], operands[2]); + aarch64_emit_cinc_extend (CCmode, operands[1], operands[3], + mod_is_true, operands[5], operands[4], + operands[1], <GPI:MODE>mode, <SHORT:MODE>mode, + ext_code); + DONE; + } +) + +;; Pattern B: add, cmp, extend, csel -> cmp, cinc, extend +(define_peephole2 + [(set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_operand:GPI 1 "register_operand") + (const_int 1))) + (set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) + (match_operand:GPI 3 "aarch64_plus_operand"))) + (set (match_operand:GPI 2 "register_operand") + (ANY_EXTEND:GPI (match_operand:SHORT 7 "register_operand"))) + (set (match_operand:GPI 4 "register_operand") + (if_then_else:GPI + (match_operator 5 "aarch64_comparison_operator" + [(reg:CC CC_REGNUM) (const_int 0)]) + (match_operand:GPI 8 "register_operand") + (match_operand:GPI 9 "register_operand")))] + "aarch64_peep_cinc_extend_p (operands)" + [(const_int 0)] + { + rtx_code ext_code = '<ANY_EXTEND:su>' == 's' ? SIGN_EXTEND : ZERO_EXTEND; + bool mod_is_true = rtx_equal_p (operands[8], operands[2]); + aarch64_emit_cinc_extend (CCmode, operands[1], operands[3], + mod_is_true, operands[5], operands[4], + operands[1], <GPI:MODE>mode, <SHORT:MODE>mode, + ext_code); + DONE; + } +) + +;; Pattern C: extend, cmp_swp, add, extend, csel -> cmp_swp, cinc, extend +(define_peephole2 + [(set (match_operand:GPI 2 "register_operand") + (ANY_EXTEND:GPI (match_operand:SHORT 6 "register_operand"))) + (set (reg:CC_SWP CC_REGNUM) + (compare:CC_SWP (ANY_EXTEND:GPI (match_dup 6)) + (match_operand:GPI 3 "register_operand"))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_operand:GPI 1 "register_operand") + (const_int 1))) + (set (match_operand:GPI 7 "register_operand") + (ANY_EXTEND:GPI (match_operand:SHORT 10 "register_operand"))) + (set (match_operand:GPI 4 "register_operand") + (if_then_else:GPI + (match_operator 5 "aarch64_comparison_operator" + [(reg:CC_SWP CC_REGNUM) (const_int 0)]) + (match_operand:GPI 8 "register_operand") + (match_operand:GPI 9 "register_operand")))] + "aarch64_peep_cinc_extend_swp_p (operands)" + [(const_int 0)] + { + rtx_code ext_code = '<ANY_EXTEND:su>' == 's' ? SIGN_EXTEND : ZERO_EXTEND; + rtx ext = gen_rtx_fmt_e (ext_code, <GPI:MODE>mode, operands[6]); + bool mod_is_true = rtx_equal_p (operands[8], operands[7]); + aarch64_emit_cinc_extend (CC_SWPmode, ext, operands[3], + mod_is_true, operands[5], operands[4], + operands[1], <GPI:MODE>mode, <SHORT:MODE>mode, + ext_code); + DONE; + } +) + +;; Pattern D: cmp_swp, extend, add, extend, csel -> cmp_swp, cinc, extend +(define_peephole2 + [(set (reg:CC_SWP CC_REGNUM) + (compare:CC_SWP (ANY_EXTEND:GPI + (match_operand:SHORT 6 "register_operand")) + (match_operand:GPI 3 "register_operand"))) + (set (match_operand:GPI 2 "register_operand") + (ANY_EXTEND:GPI (match_dup 6))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_operand:GPI 1 "register_operand") + (const_int 1))) + (set (match_operand:GPI 7 "register_operand") + (ANY_EXTEND:GPI (match_operand:SHORT 10 "register_operand"))) + (set (match_operand:GPI 4 "register_operand") + (if_then_else:GPI + (match_operator 5 "aarch64_comparison_operator" + [(reg:CC_SWP CC_REGNUM) (const_int 0)]) + (match_operand:GPI 8 "register_operand") + (match_operand:GPI 9 "register_operand")))] + "aarch64_peep_cinc_extend_swp_p (operands)" + [(const_int 0)] + { + rtx_code ext_code = '<ANY_EXTEND:su>' == 's' ? SIGN_EXTEND : ZERO_EXTEND; + rtx ext = gen_rtx_fmt_e (ext_code, <GPI:MODE>mode, operands[6]); + bool mod_is_true = rtx_equal_p (operands[8], operands[7]); + aarch64_emit_cinc_extend (CC_SWPmode, ext, operands[3], + mod_is_true, operands[5], operands[4], + operands[1], <GPI:MODE>mode, <SHORT:MODE>mode, + ext_code); + DONE; + } +) + (define_insn "*csinv3<mode>_insn" [(set (match_operand:GPI 0 "register_operand" "=r") (if_then_else:GPI diff --git a/gcc/testsuite/gcc.target/aarch64/csinc-4-neg.c b/gcc/testsuite/gcc.target/aarch64/csinc-4-neg.c new file mode 100644 index 000000000000..1ac153d9904e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/csinc-4-neg.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* Negative tests: cases that should NOT be transformed to cinc by the + sub-int peephole2 patterns in csinc-4.c. These verify the patterns + do not fire on non-matching code shapes. */ + +/* Increment by 2 instead of 1: not a cinc candidate. */ +int neg_add2 (short val, int clipval) { + if (val < clipval) val += 2; + return val; +} + +/* Decrement instead of increment: not a cinc candidate. */ +int neg_dec (short val, int clipval) { + if (val < clipval) val--; + return val; +} + +/* Full-width int: combine already handles this, no peephole2 needed. + Verify we don't regress -- cinc should still appear via combine. */ +int pos_fullwidth (int val, int clipval) { + if (val < clipval) val++; + return val; +} + +/* Conditional assignment (not increment): csel, not cinc. */ +int neg_assign (short val, int clipval, short other) { + if (val < clipval) val = other; + return val; +} + +/* The add-by-2 and decrement cases should use csel, not cinc. + The conditional assignment should also use csel. + The full-width case should still get cinc via combine. */ + +/* { dg-final { scan-assembler-times "cinc\tw" 1 } } */ +/* { dg-final { scan-assembler-times "csel\tw" 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/csinc-4-run.c b/gcc/testsuite/gcc.target/aarch64/csinc-4-run.c new file mode 100644 index 000000000000..0dd720331aca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/csinc-4-run.c @@ -0,0 +1,243 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +/* Runtime boundary-value tests for cinc peephole2 with sub-int types. + Complements the assembly-scan tests in csinc-4.c. */ + +#include <limits.h> + +__attribute__((noinline)) +int f_short (short val, int clipval) { + if (val < clipval) val++; + return val; +} + +__attribute__((noinline)) +int f_schar (signed char val, int clipval) { + if (val < clipval) val++; + return val; +} + +__attribute__((noinline)) +int f_ushort (unsigned short val, unsigned int clipval) { + if (val < clipval) val++; + return val; +} + +__attribute__((noinline)) +int f_uchar (unsigned char val, unsigned int clipval) { + if (val < clipval) val++; + return val; +} + +__attribute__((noinline)) +int f_short_3arg (short val, short other, int clipval) { + val += other; + if (val < clipval) val++; + return val; +} + +__attribute__((noinline)) +int f_schar_3arg (signed char val, signed char other, int clipval) { + val += other; + if (val < clipval) val++; + return val; +} + +__attribute__((noinline)) +int f_ushort_3arg (unsigned short val, unsigned short other, unsigned int clipval) { + val += other; + if (val < clipval) val++; + return val; +} + +__attribute__((noinline)) +int f_uchar_3arg (unsigned char val, unsigned char other, unsigned int clipval) { + val += other; + if (val < clipval) val++; + return val; +} + +int +main (void) +{ + /* --- f_short: if (val < clipval) val++ --- */ + + /* val < clipval: increment happens. */ + if (f_short (5, 10) != 6) + __builtin_abort (); + + /* val == clipval: no increment. */ + if (f_short (10, 10) != 10) + __builtin_abort (); + + /* val > clipval: no increment. */ + if (f_short (15, 10) != 15) + __builtin_abort (); + + /* val == clipval - 1: increment makes val == clipval. */ + if (f_short (9, 10) != 10) + __builtin_abort (); + + /* val at SHRT_MAX: no increment (SHRT_MAX >= any int-promoted short). */ + if (f_short (SHRT_MAX, SHRT_MAX) != SHRT_MAX) + __builtin_abort (); + + /* val at SHRT_MAX - 1, clipval = SHRT_MAX: increment. */ + if (f_short (SHRT_MAX - 1, SHRT_MAX) != SHRT_MAX) + __builtin_abort (); + + /* val at SHRT_MIN, clipval = SHRT_MIN: no increment. */ + if (f_short (SHRT_MIN, SHRT_MIN) != SHRT_MIN) + __builtin_abort (); + + /* val at SHRT_MIN, clipval = 0: increment. */ + if (f_short (SHRT_MIN, 0) != SHRT_MIN + 1) + __builtin_abort (); + + /* Negative values. */ + if (f_short (-5, -3) != -4) + __builtin_abort (); + + if (f_short (-3, -5) != -3) + __builtin_abort (); + + /* --- f_schar: if (val < clipval) val++ --- */ + + if (f_schar (5, 10) != 6) + __builtin_abort (); + + if (f_schar (10, 10) != 10) + __builtin_abort (); + + if (f_schar (SCHAR_MAX, SCHAR_MAX) != SCHAR_MAX) + __builtin_abort (); + + if (f_schar (SCHAR_MAX - 1, SCHAR_MAX) != SCHAR_MAX) + __builtin_abort (); + + if (f_schar (SCHAR_MIN, SCHAR_MIN) != SCHAR_MIN) + __builtin_abort (); + + if (f_schar (SCHAR_MIN, 0) != SCHAR_MIN + 1) + __builtin_abort (); + + /* --- f_ushort: if (val < clipval) val++ --- */ + + if (f_ushort (5, 10) != 6) + __builtin_abort (); + + if (f_ushort (10, 10) != 10) + __builtin_abort (); + + if (f_ushort (USHRT_MAX, USHRT_MAX) != USHRT_MAX) + __builtin_abort (); + + if (f_ushort (USHRT_MAX - 1, USHRT_MAX) != USHRT_MAX) + __builtin_abort (); + + if (f_ushort (0, 1) != 1) + __builtin_abort (); + + if (f_ushort (0, 0) != 0) + __builtin_abort (); + + /* --- f_uchar: if (val < clipval) val++ --- */ + + if (f_uchar (5, 10) != 6) + __builtin_abort (); + + if (f_uchar (10, 10) != 10) + __builtin_abort (); + + if (f_uchar (UCHAR_MAX, UCHAR_MAX) != UCHAR_MAX) + __builtin_abort (); + + if (f_uchar (UCHAR_MAX - 1, UCHAR_MAX) != UCHAR_MAX) + __builtin_abort (); + + /* --- f_short_3arg: val += other; if (val < clipval) val++ --- */ + + /* Simple case: 3 + 4 = 7 < 10 -> 8. */ + if (f_short_3arg (3, 4, 10) != 8) + __builtin_abort (); + + /* Sum == clipval: no increment. */ + if (f_short_3arg (5, 5, 10) != 10) + __builtin_abort (); + + /* Sum > clipval: no increment. */ + if (f_short_3arg (6, 5, 10) != 11) + __builtin_abort (); + + /* Overflow wraps within short range: 32000 + 1000 overflows to -32536, + which is < 0, so increment to -32535. */ + if (f_short_3arg (32000, 1000, 0) != -32535) + __builtin_abort (); + + /* No overflow, negative result: -100 + 50 = -50 < 0 -> -49. */ + if (f_short_3arg (-100, 50, 0) != -49) + __builtin_abort (); + + /* No overflow, negative result >= clip: -100 + 50 = -50 >= -50 -> -50. */ + if (f_short_3arg (-100, 50, -50) != -50) + __builtin_abort (); + + /* --- f_schar_3arg: val += other; if (val < clipval) val++ --- */ + + if (f_schar_3arg (3, 4, 10) != 8) + __builtin_abort (); + + if (f_schar_3arg (5, 5, 10) != 10) + __builtin_abort (); + + /* Overflow wraps: 100 + 100 = -56 (signed char), < 0 -> -55. */ + if (f_schar_3arg (100, 100, 0) != -55) + __builtin_abort (); + + /* Negative: -50 + 20 = -30 < 0 -> -29. */ + if (f_schar_3arg (-50, 20, 0) != -29) + __builtin_abort (); + + /* --- f_ushort_3arg: val += other; if (val < clipval) val++ --- */ + + /* Simple: 3 + 4 = 7 < 10 -> 8. */ + if (f_ushort_3arg (3, 4, 10) != 8) + __builtin_abort (); + + /* Sum == clipval: no increment. */ + if (f_ushort_3arg (5, 5, 10) != 10) + __builtin_abort (); + + /* Sum > clipval: no increment. */ + if (f_ushort_3arg (6, 5, 10) != 11) + __builtin_abort (); + + /* Wrap: USHRT_MAX + 1 = 0 (unsigned short wraps), 0 < 1 -> 1. */ + if (f_ushort_3arg (USHRT_MAX, 1, 1) != 1) + __builtin_abort (); + + /* No wrap, sum == USHRT_MAX: no increment. */ + if (f_ushort_3arg (USHRT_MAX - 1, 1, USHRT_MAX) != USHRT_MAX) + __builtin_abort (); + + /* --- f_uchar_3arg: val += other; if (val < clipval) val++ --- */ + + /* Simple: 3 + 4 = 7 < 10 -> 8. */ + if (f_uchar_3arg (3, 4, 10) != 8) + __builtin_abort (); + + /* Sum == clipval: no increment. */ + if (f_uchar_3arg (5, 5, 10) != 10) + __builtin_abort (); + + /* Wrap: UCHAR_MAX + 1 = 0 (unsigned char wraps), 0 < 1 -> 1. */ + if (f_uchar_3arg (UCHAR_MAX, 1, 1) != 1) + __builtin_abort (); + + /* No wrap, sum == UCHAR_MAX: no increment. */ + if (f_uchar_3arg (UCHAR_MAX - 1, 1, UCHAR_MAX) != UCHAR_MAX) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/csinc-4.c b/gcc/testsuite/gcc.target/aarch64/csinc-4.c new file mode 100644 index 000000000000..570c7497eac3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/csinc-4.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* Test that cinc is used for conditional increment of sub-int types + where sign/zero extension would otherwise block the pattern. */ + +int f_short (short val, int clipval) { + if (val < clipval) val++; + return val; +} + +int f_schar (signed char val, int clipval) { + if (val < clipval) val++; + return val; +} + +int f_ushort (unsigned short val, unsigned int clipval) { + if (val < clipval) val++; + return val; +} + +int f_uchar (unsigned char val, unsigned int clipval) { + if (val < clipval) val++; + return val; +} + +/* Three-argument variants where val += other precedes the conditional + increment. The addition produces a full-width result, causing + the compare to use CC_SWP mode with a folded sign/zero-extend. */ + +int f_short_3arg (short val, short other, int clipval) { + val += other; + if (val < clipval) val++; + return val; +} + +int f_schar_3arg (signed char val, signed char other, int clipval) { + val += other; + if (val < clipval) val++; + return val; +} + +/* { dg-final { scan-assembler-times "csinc\tw" 6 } } */ +/* { dg-final { scan-assembler-not "csel\tw" } } */ -- 2.34.1
