[PATCH GCC17-stage1] aarch64: Add peephole2 to sink extension past csel for cinc

Philipp Tomsich Thu, 12 Mar 2026 09:40:34 -0700

For sub-int types (short, char), the sign/zero extension between
add+1 and csel prevents combine from matching the csinc3 pattern.


Add four peephole2 patterns that sink the extension past the
conditional select, exposing the cinc opportunity:

Patterns A and B handle the simple case with a CC-mode compare
(4-insn window, saves 1 insn).  Both instruction orderings
(cmp/add/extend/csel and add/cmp/extend/csel) are handled.

Before:                      After:
  sxth  w0, w0                 sxth  w0, w0
  cmp   w0, w1                 cmp   w0, w1
  add   w1, w0, #1             csinc w0, w0, w0, ge
  sxth  w1, w1                 sxth  w0, w0
  csel  w0, w0, w1, ge

Patterns C and D handle the case where the compare uses CC_SWP mode
with the sign/zero-extend folded into the compare instruction (5-insn
window, saves 2 insns).  This occurs when the value being compared was
produced by a preceding arithmetic operation (e.g. val += other)
rather than arriving directly as a function argument.

Before:                      After:
  add   w1, w0, w1             add   w1, w0, w1
  sxth  w0, w1                 cmp   w2, w1, sxth
  cmp   w2, w1, sxth           csinc w0, w1, w1, le
  add   w1, w1, 1              sxth  w0, w0
  sxth  w1, w1
  csel  w0, w0, w1, le

Co-authored-by: Konstantinos Eleftheriou <[email protected]>

gcc/ChangeLog:

        * config/aarch64/aarch64.md: Add peephole2 patterns to sink
        sign/zero extension past conditional select to expose csinc3.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/csinc-4.c: New test.
        * gcc.target/aarch64/csinc-4-neg.c: New test.
        * gcc.target/aarch64/csinc-4-run.c: New test.

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index d1f2873f208b..9973501651f7 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1122,6 +1122,10 @@ void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, 
rtx, rtx, rtx, rtx);
 bool aarch64_gen_adjusted_ldpstp (rtx *, bool, machine_mode, RTX_CODE);
 void aarch64_finish_ldpstp_peephole (rtx *, bool,
                                     enum rtx_code = (enum rtx_code)0);
+bool aarch64_peep_cinc_extend_p (rtx *);
+bool aarch64_peep_cinc_extend_swp_p (rtx *);
+void aarch64_emit_cinc_extend (machine_mode, rtx, rtx, bool, rtx, rtx,
+                              rtx, machine_mode, machine_mode, rtx_code);
 
 void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
 void aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index f2ecb0ee8cb7..dec208d6d214 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -29890,6 +29890,101 @@ aarch64_finish_ldpstp_peephole (rtx *operands, bool 
load_p, enum rtx_code code)
     }
 }
 
+/* Predicate for peephole2 patterns A and B (CC-mode, 4-insn window).
+   Returns true when the operands satisfy the conditions for sinking a
+   sign/zero extension past a csel to expose a cinc.  Operand roles:
+     0  add destination (must differ from source 1)
+     1  value being compared and incremented
+     2  extend destination
+     4  csel destination
+     7  narrow source of the extend (must alias operand 0)
+     8/9  csel arms (one must be operand 1, the other operand 2)  */
+
+bool
+aarch64_peep_cinc_extend_p (rtx *operands)
+{
+  return (!rtx_equal_p (operands[0], operands[1])
+         && true_regnum (operands[7]) == true_regnum (operands[0])
+         && (rtx_equal_p (operands[0], operands[4])
+             || peep2_reg_dead_p (4, operands[0]))
+         && (rtx_equal_p (operands[2], operands[4])
+             || peep2_reg_dead_p (4, operands[2]))
+         && ((rtx_equal_p (operands[8], operands[1])
+              && rtx_equal_p (operands[9], operands[2]))
+             || (rtx_equal_p (operands[8], operands[2])
+                 && rtx_equal_p (operands[9], operands[1]))));
+}
+
+/* Predicate for peephole2 patterns C and D (CC_SWP-mode, 5-insn window).
+   Returns true when the operands satisfy the conditions for sinking a
+   sign/zero extension past a csel to expose a cinc.  Operand roles:
+     0  add destination
+     1  value being incremented (narrow source must alias via operand 6)
+     2  first extend destination
+     4  csel destination
+     6  narrow source of the first extend (must alias operand 1)
+     7  second extend destination
+     8/9  csel arms (one must be operand 2, the other operand 7)
+     10 narrow source of the second extend (must alias operand 0)  */
+
+bool
+aarch64_peep_cinc_extend_swp_p (rtx *operands)
+{
+  return (true_regnum (operands[1]) == true_regnum (operands[6])
+         && true_regnum (operands[10]) == true_regnum (operands[0])
+         && (rtx_equal_p (operands[0], operands[4])
+             || peep2_reg_dead_p (5, operands[0]))
+         && (rtx_equal_p (operands[2], operands[4])
+             || peep2_reg_dead_p (5, operands[2]))
+         && (rtx_equal_p (operands[7], operands[4])
+             || peep2_reg_dead_p (5, operands[7]))
+         && ((rtx_equal_p (operands[8], operands[2])
+              && rtx_equal_p (operands[9], operands[7]))
+             || (rtx_equal_p (operands[8], operands[7])
+                 && rtx_equal_p (operands[9], operands[2]))));
+}
+
+/* Helper for the four peephole2 patterns that sink a sign/zero extension
+   past a conditional select to expose a cinc opportunity (patterns A-D in
+   aarch64.md).
+
+   Emits three instructions:
+     (1) A compare that sets CC_REG (in mode CC_MODE) from COMPARE_LHS and
+        COMPARE_RHS.
+     (2) A csinc that writes BASE or BASE+1 into RESULT, using a condition
+        derived from COND_OP.  If MOD_IS_TRUE is false the condition is
+        reversed so that the increment is taken on the originally-true arm.
+     (3) A sign/zero extension (EXT_CODE) of the narrow view (SHORT_MODE)
+        of RESULT back into RESULT (in GPI_MODE).
+
+   CC_MODE is CCmode for patterns A/B and CC_SWPmode for patterns C/D.  */
+
+void
+aarch64_emit_cinc_extend (machine_mode cc_mode,
+                              rtx compare_lhs, rtx compare_rhs,
+                              bool mod_is_true,
+                              rtx cond_op, rtx result, rtx base,
+                              machine_mode gpi_mode, machine_mode short_mode,
+                              rtx_code ext_code)
+{
+  rtx cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+  enum rtx_code code = GET_CODE (cond_op);
+  if (!mod_is_true)
+    code = reverse_condition (code);
+  rtx cond = gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
+
+  emit_insn (gen_rtx_SET (cc_reg,
+                         gen_rtx_COMPARE (cc_mode, compare_lhs,
+                                          compare_rhs)));
+  if (gpi_mode == SImode)
+    emit_insn (gen_csinc3si_insn (result, cond, base, base));
+  else
+    emit_insn (gen_csinc3di_insn (result, cond, base, base));
+
+  rtx narrow = gen_lowpart (short_mode, result);
+  convert_move (result, narrow, ext_code == ZERO_EXTEND);
+}
+
 /* Taking X and Y to be HOST_WIDE_INT pointers, return the result of a
    comparison between the two.  */
 int
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index e44b1cd9eefa..6195bc3aa6f6 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5033,6 +5033,148 @@ (define_insn "csinc3<mode>_insn"
   [(set_attr "type" "csel")]
 )
 
+;; Sink sign/zero extension past conditional select to expose cinc.
+;; For sub-int types, the extension between add+1 and csel prevents
+;; combine from matching csinc3.  These peephole2 patterns reorder to:
+;;   cmp, cinc, extend
+;; saving one or two instructions.
+;;
+;; The unchanged arm is already within SHORT range (sign/zero-extended
+;; on function entry or by a prior narrowing op), so applying
+;; extend(truncate(x)) after cinc is a no-op on that arm.
+;;
+;; Patterns A and B handle the simple case with a CC-mode compare
+;; (4-insn window, saves 1 insn).
+;;
+;; Patterns C and D handle the case where the compare uses CC_SWP mode
+;; with the extend folded into the compare instruction (5-insn window,
+;; saves 2 insns).  This occurs when the value being compared was
+;; produced by a preceding arithmetic operation (e.g. val += other)
+;; rather than arriving as a function argument.
+;;
+;; Pattern A: cmp, add, extend, csel -> cmp, cinc, extend
+(define_peephole2
+  [(set (reg:CC CC_REGNUM)
+       (compare:CC (match_operand:GPI 1 "register_operand")
+                   (match_operand:GPI 3 "aarch64_plus_operand")))
+   (set (match_operand:GPI 0 "register_operand")
+       (plus:GPI (match_dup 1)
+                 (const_int 1)))
+   (set (match_operand:GPI 2 "register_operand")
+       (ANY_EXTEND:GPI (match_operand:SHORT 7 "register_operand")))
+   (set (match_operand:GPI 4 "register_operand")
+       (if_then_else:GPI
+         (match_operator 5 "aarch64_comparison_operator"
+           [(reg:CC CC_REGNUM) (const_int 0)])
+         (match_operand:GPI 8 "register_operand")
+         (match_operand:GPI 9 "register_operand")))]
+  "aarch64_peep_cinc_extend_p (operands)"
+  [(const_int 0)]
+  {
+    rtx_code ext_code = '<ANY_EXTEND:su>' == 's' ? SIGN_EXTEND : ZERO_EXTEND;
+    bool mod_is_true = rtx_equal_p (operands[8], operands[2]);
+    aarch64_emit_cinc_extend (CCmode, operands[1], operands[3],
+                                  mod_is_true, operands[5], operands[4],
+                                  operands[1], <GPI:MODE>mode, 
<SHORT:MODE>mode,
+                                  ext_code);
+    DONE;
+  }
+)
+
+;; Pattern B: add, cmp, extend, csel -> cmp, cinc, extend
+(define_peephole2
+  [(set (match_operand:GPI 0 "register_operand")
+       (plus:GPI (match_operand:GPI 1 "register_operand")
+                 (const_int 1)))
+   (set (reg:CC CC_REGNUM)
+       (compare:CC (match_dup 1)
+                   (match_operand:GPI 3 "aarch64_plus_operand")))
+   (set (match_operand:GPI 2 "register_operand")
+       (ANY_EXTEND:GPI (match_operand:SHORT 7 "register_operand")))
+   (set (match_operand:GPI 4 "register_operand")
+       (if_then_else:GPI
+         (match_operator 5 "aarch64_comparison_operator"
+           [(reg:CC CC_REGNUM) (const_int 0)])
+         (match_operand:GPI 8 "register_operand")
+         (match_operand:GPI 9 "register_operand")))]
+  "aarch64_peep_cinc_extend_p (operands)"
+  [(const_int 0)]
+  {
+    rtx_code ext_code = '<ANY_EXTEND:su>' == 's' ? SIGN_EXTEND : ZERO_EXTEND;
+    bool mod_is_true = rtx_equal_p (operands[8], operands[2]);
+    aarch64_emit_cinc_extend (CCmode, operands[1], operands[3],
+                                  mod_is_true, operands[5], operands[4],
+                                  operands[1], <GPI:MODE>mode, 
<SHORT:MODE>mode,
+                                  ext_code);
+    DONE;
+  }
+)
+
+;; Pattern C: extend, cmp_swp, add, extend, csel -> cmp_swp, cinc, extend
+(define_peephole2
+  [(set (match_operand:GPI 2 "register_operand")
+       (ANY_EXTEND:GPI (match_operand:SHORT 6 "register_operand")))
+   (set (reg:CC_SWP CC_REGNUM)
+       (compare:CC_SWP (ANY_EXTEND:GPI (match_dup 6))
+                       (match_operand:GPI 3 "register_operand")))
+   (set (match_operand:GPI 0 "register_operand")
+       (plus:GPI (match_operand:GPI 1 "register_operand")
+                 (const_int 1)))
+   (set (match_operand:GPI 7 "register_operand")
+       (ANY_EXTEND:GPI (match_operand:SHORT 10 "register_operand")))
+   (set (match_operand:GPI 4 "register_operand")
+       (if_then_else:GPI
+         (match_operator 5 "aarch64_comparison_operator"
+           [(reg:CC_SWP CC_REGNUM) (const_int 0)])
+         (match_operand:GPI 8 "register_operand")
+         (match_operand:GPI 9 "register_operand")))]
+  "aarch64_peep_cinc_extend_swp_p (operands)"
+  [(const_int 0)]
+  {
+    rtx_code ext_code = '<ANY_EXTEND:su>' == 's' ? SIGN_EXTEND : ZERO_EXTEND;
+    rtx ext = gen_rtx_fmt_e (ext_code, <GPI:MODE>mode, operands[6]);
+    bool mod_is_true = rtx_equal_p (operands[8], operands[7]);
+    aarch64_emit_cinc_extend (CC_SWPmode, ext, operands[3],
+                                  mod_is_true, operands[5], operands[4],
+                                  operands[1], <GPI:MODE>mode, 
<SHORT:MODE>mode,
+                                  ext_code);
+    DONE;
+  }
+)
+
+;; Pattern D: cmp_swp, extend, add, extend, csel -> cmp_swp, cinc, extend
+(define_peephole2
+  [(set (reg:CC_SWP CC_REGNUM)
+       (compare:CC_SWP (ANY_EXTEND:GPI
+                         (match_operand:SHORT 6 "register_operand"))
+                       (match_operand:GPI 3 "register_operand")))
+   (set (match_operand:GPI 2 "register_operand")
+       (ANY_EXTEND:GPI (match_dup 6)))
+   (set (match_operand:GPI 0 "register_operand")
+       (plus:GPI (match_operand:GPI 1 "register_operand")
+                 (const_int 1)))
+   (set (match_operand:GPI 7 "register_operand")
+       (ANY_EXTEND:GPI (match_operand:SHORT 10 "register_operand")))
+   (set (match_operand:GPI 4 "register_operand")
+       (if_then_else:GPI
+         (match_operator 5 "aarch64_comparison_operator"
+           [(reg:CC_SWP CC_REGNUM) (const_int 0)])
+         (match_operand:GPI 8 "register_operand")
+         (match_operand:GPI 9 "register_operand")))]
+  "aarch64_peep_cinc_extend_swp_p (operands)"
+  [(const_int 0)]
+  {
+    rtx_code ext_code = '<ANY_EXTEND:su>' == 's' ? SIGN_EXTEND : ZERO_EXTEND;
+    rtx ext = gen_rtx_fmt_e (ext_code, <GPI:MODE>mode, operands[6]);
+    bool mod_is_true = rtx_equal_p (operands[8], operands[7]);
+    aarch64_emit_cinc_extend (CC_SWPmode, ext, operands[3],
+                                  mod_is_true, operands[5], operands[4],
+                                  operands[1], <GPI:MODE>mode, 
<SHORT:MODE>mode,
+                                  ext_code);
+    DONE;
+  }
+)
+
 (define_insn "*csinv3<mode>_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
         (if_then_else:GPI
diff --git a/gcc/testsuite/gcc.target/aarch64/csinc-4-neg.c 
b/gcc/testsuite/gcc.target/aarch64/csinc-4-neg.c
new file mode 100644
index 000000000000..1ac153d9904e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/csinc-4-neg.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Negative tests: cases that should NOT be transformed to cinc by the
+   sub-int peephole2 patterns in csinc-4.c.  These verify the patterns
+   do not fire on non-matching code shapes.  */
+
+/* Increment by 2 instead of 1: not a cinc candidate.  */
+int neg_add2 (short val, int clipval) {
+  if (val < clipval) val += 2;
+  return val;
+}
+
+/* Decrement instead of increment: not a cinc candidate.  */
+int neg_dec (short val, int clipval) {
+  if (val < clipval) val--;
+  return val;
+}
+
+/* Full-width int: combine already handles this, no peephole2 needed.
+   Verify we don't regress -- cinc should still appear via combine.  */
+int pos_fullwidth (int val, int clipval) {
+  if (val < clipval) val++;
+  return val;
+}
+
+/* Conditional assignment (not increment): csel, not cinc.  */
+int neg_assign (short val, int clipval, short other) {
+  if (val < clipval) val = other;
+  return val;
+}
+
+/* The add-by-2 and decrement cases should use csel, not cinc.
+   The conditional assignment should also use csel.
+   The full-width case should still get cinc via combine.  */
+
+/* { dg-final { scan-assembler-times "cinc\tw" 1 } } */
+/* { dg-final { scan-assembler-times "csel\tw" 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/csinc-4-run.c 
b/gcc/testsuite/gcc.target/aarch64/csinc-4-run.c
new file mode 100644
index 000000000000..0dd720331aca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/csinc-4-run.c
@@ -0,0 +1,243 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+/* Runtime boundary-value tests for cinc peephole2 with sub-int types.
+   Complements the assembly-scan tests in csinc-4.c.  */
+
+#include <limits.h>
+
+__attribute__((noinline))
+int f_short (short val, int clipval) {
+  if (val < clipval) val++;
+  return val;
+}
+
+__attribute__((noinline))
+int f_schar (signed char val, int clipval) {
+  if (val < clipval) val++;
+  return val;
+}
+
+__attribute__((noinline))
+int f_ushort (unsigned short val, unsigned int clipval) {
+  if (val < clipval) val++;
+  return val;
+}
+
+__attribute__((noinline))
+int f_uchar (unsigned char val, unsigned int clipval) {
+  if (val < clipval) val++;
+  return val;
+}
+
+__attribute__((noinline))
+int f_short_3arg (short val, short other, int clipval) {
+  val += other;
+  if (val < clipval) val++;
+  return val;
+}
+
+__attribute__((noinline))
+int f_schar_3arg (signed char val, signed char other, int clipval) {
+  val += other;
+  if (val < clipval) val++;
+  return val;
+}
+
+__attribute__((noinline))
+int f_ushort_3arg (unsigned short val, unsigned short other, unsigned int 
clipval) {
+  val += other;
+  if (val < clipval) val++;
+  return val;
+}
+
+__attribute__((noinline))
+int f_uchar_3arg (unsigned char val, unsigned char other, unsigned int 
clipval) {
+  val += other;
+  if (val < clipval) val++;
+  return val;
+}
+
+int
+main (void)
+{
+  /* --- f_short: if (val < clipval) val++ --- */
+
+  /* val < clipval: increment happens.  */
+  if (f_short (5, 10) != 6)
+    __builtin_abort ();
+
+  /* val == clipval: no increment.  */
+  if (f_short (10, 10) != 10)
+    __builtin_abort ();
+
+  /* val > clipval: no increment.  */
+  if (f_short (15, 10) != 15)
+    __builtin_abort ();
+
+  /* val == clipval - 1: increment makes val == clipval.  */
+  if (f_short (9, 10) != 10)
+    __builtin_abort ();
+
+  /* val at SHRT_MAX: no increment (SHRT_MAX >= any int-promoted short).  */
+  if (f_short (SHRT_MAX, SHRT_MAX) != SHRT_MAX)
+    __builtin_abort ();
+
+  /* val at SHRT_MAX - 1, clipval = SHRT_MAX: increment.  */
+  if (f_short (SHRT_MAX - 1, SHRT_MAX) != SHRT_MAX)
+    __builtin_abort ();
+
+  /* val at SHRT_MIN, clipval = SHRT_MIN: no increment.  */
+  if (f_short (SHRT_MIN, SHRT_MIN) != SHRT_MIN)
+    __builtin_abort ();
+
+  /* val at SHRT_MIN, clipval = 0: increment.  */
+  if (f_short (SHRT_MIN, 0) != SHRT_MIN + 1)
+    __builtin_abort ();
+
+  /* Negative values.  */
+  if (f_short (-5, -3) != -4)
+    __builtin_abort ();
+
+  if (f_short (-3, -5) != -3)
+    __builtin_abort ();
+
+  /* --- f_schar: if (val < clipval) val++ --- */
+
+  if (f_schar (5, 10) != 6)
+    __builtin_abort ();
+
+  if (f_schar (10, 10) != 10)
+    __builtin_abort ();
+
+  if (f_schar (SCHAR_MAX, SCHAR_MAX) != SCHAR_MAX)
+    __builtin_abort ();
+
+  if (f_schar (SCHAR_MAX - 1, SCHAR_MAX) != SCHAR_MAX)
+    __builtin_abort ();
+
+  if (f_schar (SCHAR_MIN, SCHAR_MIN) != SCHAR_MIN)
+    __builtin_abort ();
+
+  if (f_schar (SCHAR_MIN, 0) != SCHAR_MIN + 1)
+    __builtin_abort ();
+
+  /* --- f_ushort: if (val < clipval) val++ --- */
+
+  if (f_ushort (5, 10) != 6)
+    __builtin_abort ();
+
+  if (f_ushort (10, 10) != 10)
+    __builtin_abort ();
+
+  if (f_ushort (USHRT_MAX, USHRT_MAX) != USHRT_MAX)
+    __builtin_abort ();
+
+  if (f_ushort (USHRT_MAX - 1, USHRT_MAX) != USHRT_MAX)
+    __builtin_abort ();
+
+  if (f_ushort (0, 1) != 1)
+    __builtin_abort ();
+
+  if (f_ushort (0, 0) != 0)
+    __builtin_abort ();
+
+  /* --- f_uchar: if (val < clipval) val++ --- */
+
+  if (f_uchar (5, 10) != 6)
+    __builtin_abort ();
+
+  if (f_uchar (10, 10) != 10)
+    __builtin_abort ();
+
+  if (f_uchar (UCHAR_MAX, UCHAR_MAX) != UCHAR_MAX)
+    __builtin_abort ();
+
+  if (f_uchar (UCHAR_MAX - 1, UCHAR_MAX) != UCHAR_MAX)
+    __builtin_abort ();
+
+  /* --- f_short_3arg: val += other; if (val < clipval) val++ --- */
+
+  /* Simple case: 3 + 4 = 7 < 10 -> 8.  */
+  if (f_short_3arg (3, 4, 10) != 8)
+    __builtin_abort ();
+
+  /* Sum == clipval: no increment.  */
+  if (f_short_3arg (5, 5, 10) != 10)
+    __builtin_abort ();
+
+  /* Sum > clipval: no increment.  */
+  if (f_short_3arg (6, 5, 10) != 11)
+    __builtin_abort ();
+
+  /* Overflow wraps within short range: 32000 + 1000 overflows to -32536,
+     which is < 0, so increment to -32535.  */
+  if (f_short_3arg (32000, 1000, 0) != -32535)
+    __builtin_abort ();
+
+  /* No overflow, negative result: -100 + 50 = -50 < 0 -> -49.  */
+  if (f_short_3arg (-100, 50, 0) != -49)
+    __builtin_abort ();
+
+  /* No overflow, negative result >= clip: -100 + 50 = -50 >= -50 -> -50.  */
+  if (f_short_3arg (-100, 50, -50) != -50)
+    __builtin_abort ();
+
+  /* --- f_schar_3arg: val += other; if (val < clipval) val++ --- */
+
+  if (f_schar_3arg (3, 4, 10) != 8)
+    __builtin_abort ();
+
+  if (f_schar_3arg (5, 5, 10) != 10)
+    __builtin_abort ();
+
+  /* Overflow wraps: 100 + 100 = -56 (signed char), < 0 -> -55.  */
+  if (f_schar_3arg (100, 100, 0) != -55)
+    __builtin_abort ();
+
+  /* Negative: -50 + 20 = -30 < 0 -> -29.  */
+  if (f_schar_3arg (-50, 20, 0) != -29)
+    __builtin_abort ();
+
+  /* --- f_ushort_3arg: val += other; if (val < clipval) val++ --- */
+
+  /* Simple: 3 + 4 = 7 < 10 -> 8.  */
+  if (f_ushort_3arg (3, 4, 10) != 8)
+    __builtin_abort ();
+
+  /* Sum == clipval: no increment.  */
+  if (f_ushort_3arg (5, 5, 10) != 10)
+    __builtin_abort ();
+
+  /* Sum > clipval: no increment.  */
+  if (f_ushort_3arg (6, 5, 10) != 11)
+    __builtin_abort ();
+
+  /* Wrap: USHRT_MAX + 1 = 0 (unsigned short wraps), 0 < 1 -> 1.  */
+  if (f_ushort_3arg (USHRT_MAX, 1, 1) != 1)
+    __builtin_abort ();
+
+  /* No wrap, sum == USHRT_MAX: no increment.  */
+  if (f_ushort_3arg (USHRT_MAX - 1, 1, USHRT_MAX) != USHRT_MAX)
+    __builtin_abort ();
+
+  /* --- f_uchar_3arg: val += other; if (val < clipval) val++ --- */
+
+  /* Simple: 3 + 4 = 7 < 10 -> 8.  */
+  if (f_uchar_3arg (3, 4, 10) != 8)
+    __builtin_abort ();
+
+  /* Sum == clipval: no increment.  */
+  if (f_uchar_3arg (5, 5, 10) != 10)
+    __builtin_abort ();
+
+  /* Wrap: UCHAR_MAX + 1 = 0 (unsigned char wraps), 0 < 1 -> 1.  */
+  if (f_uchar_3arg (UCHAR_MAX, 1, 1) != 1)
+    __builtin_abort ();
+
+  /* No wrap, sum == UCHAR_MAX: no increment.  */
+  if (f_uchar_3arg (UCHAR_MAX - 1, 1, UCHAR_MAX) != UCHAR_MAX)
+    __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/csinc-4.c 
b/gcc/testsuite/gcc.target/aarch64/csinc-4.c
new file mode 100644
index 000000000000..570c7497eac3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/csinc-4.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Test that cinc is used for conditional increment of sub-int types
+   where sign/zero extension would otherwise block the pattern.  */
+
+int f_short (short val, int clipval) {
+  if (val < clipval) val++;
+  return val;
+}
+
+int f_schar (signed char val, int clipval) {
+  if (val < clipval) val++;
+  return val;
+}
+
+int f_ushort (unsigned short val, unsigned int clipval) {
+  if (val < clipval) val++;
+  return val;
+}
+
+int f_uchar (unsigned char val, unsigned int clipval) {
+  if (val < clipval) val++;
+  return val;
+}
+
+/* Three-argument variants where val += other precedes the conditional
+   increment.  The addition produces a full-width result, causing
+   the compare to use CC_SWP mode with a folded sign/zero-extend.  */
+
+int f_short_3arg (short val, short other, int clipval) {
+  val += other;
+  if (val < clipval) val++;
+  return val;
+}
+
+int f_schar_3arg (signed char val, signed char other, int clipval) {
+  val += other;
+  if (val < clipval) val++;
+  return val;
+}
+
+/* { dg-final { scan-assembler-times "csinc\tw" 6 } } */
+/* { dg-final { scan-assembler-not "csel\tw" } } */
-- 
2.34.1

[PATCH GCC17-stage1] aarch64: Add peephole2 to sink extension past csel for cinc

Reply via email to