The following example:
#define N 640
int a[N] = {};
int b[N] = {};
int c[N] = {};
void f1 (int d)
{
for (int i = 0; i < N; i++)
{
b[i] += a[i];
if (a[i] != d)
break;
}
}
today generates with
-Ofast -march=armv8-a+sve --param aarch64-autovec-preference=asimd-only
.L6:
ldr q30, [x3, x1]
cmeq v31.4s, v30.4s, v27.4s
not v31.16b, v31.16b
umaxp v31.4s, v31.4s, v31.4s
fmov x4, d31
cbz x4, .L2
Where an we use an Adv. SIMD compare and a reduction sequence to implement
early break. This patch implements the new optabs vec_cbranch_any and
vec_cbranch_all in order to replace the Adv. SIMD compare and reduction with
an SVE flag-setting compare.
With this patch the above generates:
ptrue p7.b, vl16
.L6:
ldr q30, [x3, x1]
cmpne p15.s, p7/z, z30.s, z27.s
b.none .L2
This optab could also be used for optimizing the Adv. SIMD Sequence when SVE
is not available. I have a separate patch for that and will send depending on
if this approach is accepted or not.
Note that for floating-point we still need the ptest as floating point SVE
compares don't set flags. In addition because SVE doesn't have a CMTST
equivalent instruction we have to do an explicit AND before the compares.
These two cases don't have a speed advantage, but do have a codesize one
so I've left them enabled.
When compiling with -ffast-math we can however use an SVE
integer comparison when comparing FP values for equality. This then saves
the PTEST there and equality is the most common form.
This patch also eliminated PTEST on normal SVE compare and branch through
the introduction of new optabs cond_vec_cbranch_any and cond_vec_cbranch_all.
In the example
void f1 ()
{
for (int i = 0; i < N; i++)
{
b[i] += a[i];
if (a[i] > 0)
break;
}
}
when compiled for SVE we generate:
ld1w z28.s, p7/z, [x4, x0, lsl 2]
cmpgt p14.s, p7/z, z28.s, #0
ptest p15, p14.b
b.none .L3
Where the ptest isn't needed since the branch only cares about the Z and NZ
flags.
GCC Today supports eliding this through the pattern *cmp<cmp_op><mode>_ptest
however this pattern only supports the removal when the outermost context is a
CMP where the predicate is inside the condition itself.
This typically only happens for an unpredicated CMP as a ptrue will be generated
during expand.
In the case about at the GIMPLE level we have
mask_patt_14.15_57 = vect__2.11_52 > { 0, ... };
vec_mask_and_58 = loop_mask_48 & mask_patt_14.15_57;
if (vec_mask_and_58 != { 0, ... })
goto <bb 5>; [5.50%]
else
goto <bb 6>; [94.50%]
where the loop mask is applied to the compare as an AND.
The loop mask is moved into the compare by the pattern *cmp<cmp_op><mode>_and
which moves the mask inside if the current mask is a ptrue since
p && true -> p.
However this happens after combine, and so we can't both move the predicate
inside AND eliminate the ptests.
To fix this the middle-end will now rewrite the mask into the compare optab
and indicate that only the CC flags are required. This allows us to simply
not generate the ptest at all, rather than trying to eliminate it later on.
After this patch we generate
ld1w z28.s, p7/z, [x4, x0, lsl 2]
cmpgt p14.s, p7/z, z28.s, #0
b.none .L3
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
PR target/118974
* config/aarch64/aarch64-simd.md (xor<mode>3<vczle><vczbe>): Rename ...
(@xor<mode>3<vczle><vczbe>): .. to this.
(cbranch<mode>4): Update comments.
(<optab><mode>): New.
* config/aarch64/aarch64-sve.md (cbranch<mode>4): Update comment.
(<optab><mode>): New.
(aarch64_ptest<mode>): Rename to ...
(@aarch64_ptest<mode>): .. this.
* config/aarch64/iterators.md (UNSPEC_CMP_ALL, UNSPEC_CMP_ANY,
UNSPEC_COND_CMP_ALL, UNSPEC_COND_CMP_ANY): New.
(optabs): Add them.
(CBRANCH_CMP, COND_CBRANCH_CMP, cbranch_op): New.
* config/aarch64/predicates.md (aarch64_cbranch_compare_operation): New.
gcc/testsuite/ChangeLog:
PR target/118974
* gcc.target/aarch64/sve/pr119351.c: Update codegen.
* gcc.target/aarch64/sve/vect-early-break-cbranch.c: Likewise.
* gcc.target/aarch64/vect-early-break-cbranch.c: Likewise.
* gcc.target/aarch64/sve/vect-early-break-cbranch_2.c: New test.
* gcc.target/aarch64/sve/vect-early-break-cbranch_3.c: New test.
* gcc.target/aarch64/sve/vect-early-break-cbranch_4.c: New test.
* gcc.target/aarch64/sve/vect-early-break-cbranch_5.c: New test.
* gcc.target/aarch64/sve/vect-early-break-cbranch_7.c: New test.
* gcc.target/aarch64/sve/vect-early-break-cbranch_8.c: New test.
* gcc.target/aarch64/vect-early-break-cbranch_2.c: New test.
* gcc.target/aarch64/vect-early-break-cbranch_3.c: New test.
---
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index
e7c459dceb3a9d5aa05501278d85f9cc7ac0eeab..3aecef7470b4309f42831228f8cdf0f684cd22fe
100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1155,7 +1155,7 @@ (define_insn "ior<mode>3<vczle><vczbe>"
)
;; For EOR (vector, register) and SVE EOR (vector, immediate)
-(define_insn "xor<mode>3<vczle><vczbe>"
+(define_insn "@xor<mode>3<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand")
(xor:VDQ_I (match_operand:VDQ_I 1 "register_operand")
(match_operand:VDQ_I 2 "aarch64_reg_or_xor_imm")))]
@@ -4156,6 +4156,8 @@ (define_expand "vcond_mask_<mode><v_int_equiv>"
;; Patterns comparing two vectors and conditionally jump
+;; Define cbranch on masks. This optab is only called for
BOOLEAN_VECTOR_TYPE_P
+;; which allows optimizing compares with zero.
(define_expand "cbranch<mode>4"
[(set (pc)
(if_then_else
@@ -4196,6 +4198,89 @@ (define_expand "cbranch<mode>4"
DONE;
})
+;; Define vec_cbranch_any and vec_cbranch_all
+;; Vector comparison and branch for Adv. SIMD Integer types using SVE
+;; instructions.
+(define_expand "<optab><mode>"
+ [(set (pc)
+ (unspec:VALL
+ [(if_then_else
+ (match_operator 0 "aarch64_cbranch_compare_operation"
+ [(match_operand:VALL 1 "register_operand")
+ (match_operand:VALL 2 "aarch64_simd_reg_or_zero")])
+ (label_ref (match_operand 3 ""))
+ (pc))]
+ CBRANCH_CMP))]
+ "TARGET_SIMD"
+{
+ auto code = GET_CODE (operands[0]);
+ if (TARGET_SVE)
+ {
+ machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require ();
+
+ /* Since SVE Vector FP compares don't set flags, when comparing against
+ equality we can use the integer variant which sets flags since
+ IEEE equality is just bitwise equality. */
+ if (FLOAT_MODE_P (full_mode)
+ && aarch64_equality_operator (operands[0], <MODE>mode))
+ full_mode = related_int_vector_mode (full_mode).require ();
+ rtx in1 = force_lowpart_subreg (full_mode, operands[1], <MODE>mode);
+ rtx in2;
+ if (CONST0_RTX (<MODE>mode) == operands[2])
+ in2 = CONST0_RTX (full_mode);
+ else
+ in2 = force_lowpart_subreg (full_mode, operands[2], <MODE>mode);
+
+ unsigned lanes
+ = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (full_mode);
+ rtx ptrue = aarch64_ptrue_reg (VNx16BImode, lanes);
+ rtx hint = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode);
+
+ rtx tmp = gen_reg_rtx (pred_mode);
+ rtx cast_ptrue = gen_lowpart (pred_mode, ptrue);
+
+ if (FLOAT_MODE_P (full_mode))
+ {
+ aarch64_expand_sve_vec_cmp<sve_cmp_suff> (tmp, code, in1, in2);
+ emit_insn (gen_and3 (pred_mode, tmp, tmp, cast_ptrue));
+ emit_insn (gen_aarch64_ptest (pred_mode, ptrue, cast_ptrue, hint,
+ tmp));
+ }
+ else
+ emit_insn (gen_aarch64_pred_cmp_ptest (code, full_mode, tmp, ptrue, in1,
+ in2, cast_ptrue, hint,
+ cast_ptrue, hint));
+
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[3]));
+ DONE;
+ }
+
+ rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
+ emit_insn (gen_vec_cmp<mode><v_int_equiv> (tmp, operands[0], operands[1],
+ operands[2]));
+
+ /* For 128-bit vectors we need a reduction to 64-bit first. */
+ if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
+ {
+ /* Always reduce using a V4SI. */
+ rtx reduc = gen_lowpart (V4SImode, tmp);
+ rtx res = gen_reg_rtx (V4SImode);
+ emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));
+ emit_move_insn (tmp, gen_lowpart (<V_INT_EQUIV>mode, res));
+ }
+
+ rtx val = gen_reg_rtx (DImode);
+ emit_move_insn (val, gen_lowpart (DImode, tmp));
+
+ rtx cc_reg = aarch64_gen_compare_reg (<cbranch_op>, val, const0_rtx);
+ rtx cmp_rtx = gen_rtx_fmt_ee (<cbranch_op>, DImode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[3]));
+ DONE;
+})
+
;; Patterns comparing two vectors to produce a mask.
(define_expand "vec_cmp<mode><mode>"
diff --git a/gcc/config/aarch64/aarch64-sve.md
b/gcc/config/aarch64/aarch64-sve.md
index
4648aa67e0c360076cf4444c4e0ac55babda34e6..2a2caa2066ce07959fc97ccf4a78632551dda07a
100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -9761,7 +9761,8 @@ (define_insn "@vcond_mask_<mode><mode>"
;; - PTEST
;; -------------------------------------------------------------------------
-;; Branch based on predicate equality or inequality.
+;; Branch based on predicate equality or inequality. This allows PTEST to be
+;; combined with other flag setting instructions like ORR -> ORRS.
(define_expand "cbranch<mode>4"
[(set (pc)
(if_then_else
@@ -9790,8 +9791,120 @@ (define_expand "cbranch<mode>4"
}
)
+;; Define vec_cbranch_any and vec_cbranch_all
+;; Branch based on predicate equality or inequality.
+(define_expand "<optab><mode>"
+ [(set (pc)
+ (unspec:PRED_ALL
+ [(if_then_else
+ (match_operator 0 "aarch64_equality_operator"
+ [(match_operand:PRED_ALL 1 "register_operand")
+ (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
+ (label_ref (match_operand 3 ""))
+ (pc))]
+ CBRANCH_CMP))]
+ ""
+ {
+ rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
+ rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
+ rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
+ rtx pred;
+ if (operands[2] == CONST0_RTX (<MODE>mode))
+ pred = operands[1];
+ else
+ {
+ pred = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
+ operands[2]));
+ }
+ emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
+
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[3]));
+ DONE;
+ }
+)
+
+;; Define cond_vec_cbranch_any and cond_vec_cbranch_all
+;; Vector comparison and branch for SVE Floating point types instructions
+;; using Integer operations.
+(define_expand "<optab><mode>"
+ [(set (pc)
+ (unspec:SVE_F
+ [(if_then_else
+ (match_operator 0 "aarch64_equality_operator"
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")
+ (match_operand:<VPRED> 4 "aarch64_simd_imm_zero")])
+ (label_ref (match_operand 5 ""))
+ (pc))]
+ COND_CBRANCH_CMP))]
+ "flag_unsafe_math_optimizations"
+{
+ auto code = GET_CODE (operands[0]);
+ machine_mode full_mode = related_int_vector_mode (<MODE>mode).require ();
+ rtx in1 = force_lowpart_subreg (full_mode, operands[2], <MODE>mode);
+ rtx in2;
+ if (CONST0_RTX (<MODE>mode) == operands[3])
+ in2 = CONST0_RTX (full_mode);
+ else
+ in2 = force_lowpart_subreg (full_mode, operands[3], <MODE>mode);
+
+ rtx res = gen_reg_rtx (<VPRED>mode);
+ rtx gp = gen_lowpart (VNx16BImode, operands[1]);
+ rtx cast_gp = operands[1];
+ rtx flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
+
+ emit_insn (gen_aarch64_pred_cmp_ptest (code, full_mode, res, gp, in1, in2,
+ cast_gp, flag, cast_gp, flag));
+
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[5]));
+ DONE;
+})
+
+
+;; Define cond_vec_cbranch_any and cond_vec_cbranch_all
+;; Vector comparison and branch for SVE Floating points types instructions.
+;; But only on EQ or NE comparisons, which allows us to use integer compares
+;; instead and about the ptest.
+(define_expand "<optab><mode>"
+ [(set (pc)
+ (unspec:SVE_I
+ [(if_then_else
+ (match_operator 0 "aarch64_comparison_operator"
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SVE_I 2 "register_operand")
+ (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")
+ (match_operand:<VPRED> 4 "aarch64_simd_imm_zero")])
+ (label_ref (match_operand 5 ""))
+ (pc))]
+ COND_CBRANCH_CMP))]
+ ""
+{
+ auto code = GET_CODE (operands[0]);
+ rtx in1 = operands[2];
+ rtx in2 = operands[3];
+
+ rtx res = gen_reg_rtx (<VPRED>mode);
+ rtx gp = gen_lowpart (VNx16BImode, operands[1]);
+ rtx cast_gp = operands[1];
+ rtx flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
+
+ emit_insn (gen_aarch64_pred_cmp_ptest (code, <MODE>mode, res, gp, in1, in2,
+ cast_gp, flag, cast_gp, flag));
+
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[5]));
+ DONE;
+})
+
;; See "Description of UNSPEC_PTEST" above for details.
-(define_insn "aarch64_ptest<mode>"
+(define_insn "@aarch64_ptest<mode>"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
(match_operand 1)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index
517b2808b5f725db81709122848817aaafff1f34..7a1f8e7aed4a56dec70c1a056c1512bd0da7ff56
100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -837,6 +837,10 @@ (define_c_enum "unspec"
UNSPEC_SSHLL ; Used in aarch64-simd.md.
UNSPEC_USHLL ; Used in aarch64-simd.md.
UNSPEC_ADDP ; Used in aarch64-simd.md.
+ UNSPEC_CMP_ALL ; Used in aarch64-simd.md.
+ UNSPEC_CMP_ANY ; Used in aarch64-simd.md.
+ UNSPEC_COND_CMP_ALL ; Used in aarch64-simd.md.
+ UNSPEC_COND_CMP_ANY ; Used in aarch64-simd.md.
UNSPEC_TBL ; Used in vector permute patterns.
UNSPEC_TBLQ ; Used in vector permute patterns.
UNSPEC_TBX ; Used in vector permute patterns.
@@ -2612,6 +2616,12 @@ (define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI
"vnx8bi")
(VNx16SI "vnx4bi") (VNx16SF "vnx4bi")
(VNx8DI "vnx2bi") (VNx8DF "vnx2bi")])
+;; Map mode to suffix for using an SVE comparison
+(define_mode_attr sve_cmp_suff [(V8QI "_int") (V16QI "_int")
+ (V4HI "_int") (V8HI "_int") (V2SI "_int")
+ (V4SI "_int") (V2DI "_int")
+ (V2SF "_float") (V4SF "_float") (V2DF "_float")])
+
(define_mode_attr VDOUBLE [(VNx16QI "VNx32QI")
(VNx8HI "VNx16HI") (VNx8HF "VNx16HF")
(VNx8BF "VNx16BF")
@@ -3272,6 +3282,9 @@ (define_int_iterator HADD [UNSPEC_SHADD UNSPEC_UHADD])
(define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD])
+(define_int_iterator CBRANCH_CMP [UNSPEC_CMP_ALL UNSPEC_CMP_ANY])
+(define_int_iterator COND_CBRANCH_CMP [UNSPEC_COND_CMP_ALL
UNSPEC_COND_CMP_ANY])
+
(define_int_iterator BSL_DUP [1 2])
(define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
@@ -4215,7 +4228,16 @@ (define_int_attr optab [(UNSPEC_ANDF "and")
(UNSPEC_COND_SCVTF "float")
(UNSPEC_COND_SMAX "smax")
(UNSPEC_COND_SMIN "smin")
- (UNSPEC_COND_UCVTF "floatuns")])
+ (UNSPEC_COND_UCVTF "floatuns")
+ (UNSPEC_CMP_ALL "vec_cbranch_all")
+ (UNSPEC_CMP_ANY "vec_cbranch_any")
+ (UNSPEC_COND_CMP_ALL "cond_vec_cbranch_all")
+ (UNSPEC_COND_CMP_ANY "cond_vec_cbranch_any")])
+
+(define_int_attr cbranch_op [(UNSPEC_CMP_ALL "EQ")
+ (UNSPEC_CMP_ANY "NE")
+ (UNSPEC_COND_CMP_ALL "EQ")
+ (UNSPEC_COND_CMP_ANY "NE")])
(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan")
(UNSPEC_FMAXNM "fmax")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index
42304cef4391e15598bcd22da590c8663f3ffaa5..4b0a4f79253fcc28192cb70ce46d487875b25a7a
100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -462,6 +462,19 @@ (define_special_predicate "aarch64_comparison_operation"
(define_special_predicate "aarch64_equality_operator"
(match_code "eq,ne"))
+(define_special_predicate "aarch64_cbranch_compare_operation"
+ (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,
+ ordered,unlt,unle,unge,ungt")
+{
+ if (TARGET_SVE)
+ return true;
+
+ if (!TARGET_SIMD)
+ return false;
+
+ return true;
+})
+
(define_special_predicate "aarch64_carry_operation"
(match_code "ltu,geu")
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
index
85aab355f95f83e1fa65d280f14fb8ade7f7e658..1ebc735a82f4a59d8eccff39346e46a449b4729a
100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
@@ -14,7 +14,6 @@ int x[N] __attribute__((aligned(32)));
** ...
** ld1w z[0-9]+.s, p[0-9]+/z, \[x[0-9], x[0-9], lsl 2\]
** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** ...
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
index
d7cef1105410be04ed67d1d3b800746267f205a8..48fb407ccee769b008e0de87e25dda71fe235d20
100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
@@ -8,8 +8,7 @@ int b[N] = {0};
** f1:
** ...
** cmpgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f1 ()
@@ -25,8 +24,7 @@ void f1 ()
** f2:
** ...
** cmpge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f2 ()
@@ -42,8 +40,7 @@ void f2 ()
** f3:
** ...
** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f3 ()
@@ -59,8 +56,7 @@ void f3 ()
** f4:
** ...
** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f4 ()
@@ -76,8 +72,7 @@ void f4 ()
** f5:
** ...
** cmplt p[0-9]+.s, p7/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) .L[0-9]+
+** b(\.?eq|\.none) .L[0-9]+
** ...
*/
void f5 ()
@@ -93,8 +88,7 @@ void f5 ()
** f6:
** ...
** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f6 ()
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_2.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_2.c
new file mode 100644
index
0000000000000000000000000000000000000000..7c996a893d99f7462c980140d12ab2711738597c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_2.c
@@ -0,0 +1,132 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks
-fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE int
+#endif
+#ifndef FMT
+#define FMT "d"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE
expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT "
at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_3.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_3.c
new file mode 100644
index
0000000000000000000000000000000000000000..75927abb09479712760a60b0f6a11135d9be9502
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_3.c
@@ -0,0 +1,132 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks
-fno-schedule-insns2 --param aarch64-autovec-preference=sve-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE int
+#endif
+#ifndef FMT
+#define FMT "d"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE
expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT "
at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_4.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_4.c
new file mode 100644
index
0000000000000000000000000000000000000000..68a5d0a09fdfd5664562e1bac3b5e586a37b3a6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_4.c
@@ -0,0 +1,132 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks
-fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE float
+#endif
+#ifndef FMT
+#define FMT ".6f"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE
expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT "
at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_5.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_5.c
new file mode 100644
index
0000000000000000000000000000000000000000..52d95e9a71b7999376731e47a224936d4253ac2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_5.c
@@ -0,0 +1,132 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks
-fno-schedule-insns2 --param aarch64-autovec-preference=sve-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE float
+#endif
+#ifndef FMT
+#define FMT ".6f"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE
expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT "
at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_7.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_7.c
new file mode 100644
index
0000000000000000000000000000000000000000..20cbb685b17a3312b30ce0124a36cdfaf3b3a02d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_7.c
@@ -0,0 +1,147 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast -fno-schedule-insns -fno-reorder-blocks
-fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+#include <math.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE float
+#endif
+#ifndef FMT
+#define FMT ".6f"
+#endif
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate comparison functions */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+/* Example unordered-sensitive loop: breaks if a[i] is unordered with 0 */
+__attribute__((noipa))
+void f7(void) {
+ for (int i = 0; i < N; i++) {
+ b[i] += a[i];
+ if (__builtin_isunordered(a[i], 0.0f))
+ break;
+ }
+}
+
+__attribute__((noreturn))
+static inline void __abort_trace(const char *m, int i, TYPE result, TYPE
expected) {
+ printf("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at
pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort();
+}
+
+/* Array setup */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Floating-point comparison macros (with unordered handling) */
+#define CHECK_EQ(_i, _val) do { \
+ if (__builtin_isnan (_val) != __builtin_isnan (b[_i]) \
+ && b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+} while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (__builtin_isnan (_val) != __builtin_isnan (b[i]) \
+ && b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+} while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC(f1, 1.0f, 0, 1.0f, 10.0f, CHECK_EQ(0, 11.0f); CHECK_EQ(1, 10.0f));
+ TEST_FUNC(f2, -1.0f, 5, 0.0f, 10.0f, CHECK_EQ(0, 9.0f); CHECK_EQ(5, 10.0f));
+ TEST_FUNC(f3, 3.0f, 3, 0.0f, 0.0f, CHECK_EQ(0, 3.0f); CHECK_EQ(3, 0.0f));
+ TEST_FUNC(f4, 0.0f, 4, 1.0f, 1.0f, CHECK_EQ(4, 2.0f); CHECK_EQ(5, 1.0f));
+ TEST_FUNC(f5, 1.0f, 6, -1.0f, 5.0f, CHECK_EQ(6, 4.0f); CHECK_EQ(7, 5.0f));
+ TEST_FUNC(f6, 2.0f, 10, 0.0f, 7.0f, CHECK_EQ(10, 7.0f); CHECK_EQ(11, 7.0f));
+
+ /* Break on last iteration. */
+ TEST_FUNC(f1, 0.0f, N - 1, 1.0f, 1.0f,
+ CHECK_RANGE_EQ(0, N - 1, 1.0f); CHECK_EQ(N - 1, 2.0f));
+
+ TEST_FUNC(f2, -5.0f, N - 1, 0.0f, 9.0f,
+ CHECK_RANGE_EQ(0, N - 1, 4.0f); CHECK_EQ(N - 1, 9.0f));
+
+ TEST_FUNC(f3, 2.0f, N - 1, 0.0f, 0.0f,
+ CHECK_RANGE_EQ(0, N - 1, 2.0f); CHECK_EQ(N - 1, 0.0f));
+
+ TEST_FUNC(f4, 0.0f, N - 1, 2.0f, 1.0f,
+ CHECK_RANGE_EQ(0, N - 1, 1.0f); CHECK_EQ(N - 1, 3.0f));
+
+ TEST_FUNC(f5, 2.0f, N - 1, -3.0f, 6.0f,
+ CHECK_RANGE_EQ(0, N - 1, 8.0f); CHECK_EQ(N - 1, 3.0f));
+
+ TEST_FUNC(f6, 5.0f, N - 1, 0.0f, 7.0f,
+ CHECK_RANGE_EQ(0, N - 1, 12.0f); CHECK_EQ(N - 1, 7.0f));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC(f1, 0.0f, -1, 0.0f, 2.0f,
+ CHECK_RANGE_EQ(0, N, 2.0f));
+
+ TEST_FUNC(f2, -2.0f, -1, 0.0f, 5.0f,
+ CHECK_RANGE_EQ(0, N, 3.0f));
+
+ TEST_FUNC(f3, 1.0f, -1, 0.0f, 0.0f,
+ CHECK_RANGE_EQ(0, N, 1.0f));
+
+ TEST_FUNC(f4, 0.0f, -1, 0.0f, 7.0f,
+ CHECK_RANGE_EQ(0, N, 7.0f));
+
+ TEST_FUNC(f5, 1.0f, -1, 0.0f, 4.0f,
+ CHECK_RANGE_EQ(0, N, 5.0f));
+
+ TEST_FUNC(f6, 5.0f, -1, 0.0f, 3.0f,
+ CHECK_RANGE_EQ(0, N, 8.0f));
+
+#if !defined(__FAST_MATH__)
+ /* Unordered break (NAN in a[i]) */
+ TEST_FUNC(f7, 1.0f, 123, NAN, 2.0f,
+ CHECK_RANGE_EQ(0, 123, 3.0f); CHECK_EQ(123, NAN));
+#endif
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_8.c
b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_8.c
new file mode 100644
index
0000000000000000000000000000000000000000..d951d649ac6d3c3030f7b5e04055f8065908d87b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_8.c
@@ -0,0 +1,132 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast -fno-schedule-insns -fno-reorder-blocks
-fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE float
+#endif
+#ifndef FMT
+#define FMT ".6f"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE
expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT "
at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c
b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c
index
673b781eb6d092f6311409797b20a971f4fae247..ca4ef498485f68dbb8a178750db3f552cf7a66d0
100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c
@@ -50,7 +50,6 @@ void f2 ()
/*
** f3:
** ...
-** cmeq v[0-9]+.4s, v[0-9]+.4s, #0
** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
** fmov x[0-9]+, d[0-9]+
** cbn?z x[0-9]+, \.L[0-9]+
@@ -69,7 +68,6 @@ void f3 ()
/*
** f4:
** ...
-** cmtst v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
** fmov x[0-9]+, d[0-9]+
** cbn?z x[0-9]+, \.L[0-9]+
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
new file mode 100644
index
0000000000000000000000000000000000000000..d5cb1946873f5e720c6e12e0dd4991582764f18f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
@@ -0,0 +1,105 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks
-fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+sve"
+
+#define N 640
+int a[N] = {0};
+int b[N] = {0};
+/*
+** f1:
+** ...
+** cmpgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f1 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] > 0)
+ break;
+ }
+}
+/*
+** f2:
+** ...
+** cmpge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f2 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] >= 0)
+ break;
+ }
+}
+/*
+** f3:
+** ...
+** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f3 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] == 0)
+ break;
+ }
+}
+/*
+** f4:
+** ...
+** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f4 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] != 0)
+ break;
+ }
+}
+/*
+** f5:
+** ...
+** cmplt p[0-9]+.s, p7/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) .L[0-9]+
+** ...
+*/
+void f5 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] < 0)
+ break;
+ }
+}
+/*
+** f6:
+** ...
+** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f6 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] <= 0)
+ break;
+ }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
new file mode 100644
index
0000000000000000000000000000000000000000..9312db675eb428fc2fb775ec45e9e71e636fabdf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
@@ -0,0 +1,110 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks
-fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+sve"
+
+#define N 640
+float a[N] = {0};
+float b[N] = {0};
+
+/*
+** f1:
+** ...
+** fcmgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f1 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] > 0)
+ break;
+ }
+}
+/*
+** f2:
+** ...
+** fcmge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f2 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] >= 0)
+ break;
+ }
+}
+/*
+** f3:
+** ...
+** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f3 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] == 0)
+ break;
+ }
+}
+/*
+** f4:
+** ...
+** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f4 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] != 0)
+ break;
+ }
+}
+/*
+** f5:
+** ...
+** fcmlt p[0-9]+.s, p7/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) .L[0-9]+
+** ...
+*/
+void f5 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] < 0)
+ break;
+ }
+}
+/*
+** f6:
+** ...
+** fcmle p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f6 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] <= 0)
+ break;
+ }
+}
--
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e7c459dceb3a9d5aa05501278d85f9cc7ac0eeab..3aecef7470b4309f42831228f8cdf0f684cd22fe 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1155,7 +1155,7 @@ (define_insn "ior<mode>3<vczle><vczbe>"
)
;; For EOR (vector, register) and SVE EOR (vector, immediate)
-(define_insn "xor<mode>3<vczle><vczbe>"
+(define_insn "@xor<mode>3<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand")
(xor:VDQ_I (match_operand:VDQ_I 1 "register_operand")
(match_operand:VDQ_I 2 "aarch64_reg_or_xor_imm")))]
@@ -4156,6 +4156,8 @@ (define_expand "vcond_mask_<mode><v_int_equiv>"
;; Patterns comparing two vectors and conditionally jump
+;; Define cbranch on masks. This optab is only called for BOOLEAN_VECTOR_TYPE_P
+;; which allows optimizing compares with zero.
(define_expand "cbranch<mode>4"
[(set (pc)
(if_then_else
@@ -4196,6 +4198,89 @@ (define_expand "cbranch<mode>4"
DONE;
})
+;; Define vec_cbranch_any and vec_cbranch_all
+;; Vector comparison and branch for Adv. SIMD Integer types using SVE
+;; instructions.
+(define_expand "<optab><mode>"
+ [(set (pc)
+ (unspec:VALL
+ [(if_then_else
+ (match_operator 0 "aarch64_cbranch_compare_operation"
+ [(match_operand:VALL 1 "register_operand")
+ (match_operand:VALL 2 "aarch64_simd_reg_or_zero")])
+ (label_ref (match_operand 3 ""))
+ (pc))]
+ CBRANCH_CMP))]
+ "TARGET_SIMD"
+{
+ auto code = GET_CODE (operands[0]);
+ if (TARGET_SVE)
+ {
+ machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require ();
+
+ /* Since SVE Vector FP compares don't set flags, when comparing against
+ equality we can use the integer variant which sets flags since
+ IEEE equality is just bitwise equality. */
+ if (FLOAT_MODE_P (full_mode)
+ && aarch64_equality_operator (operands[0], <MODE>mode))
+ full_mode = related_int_vector_mode (full_mode).require ();
+ rtx in1 = force_lowpart_subreg (full_mode, operands[1], <MODE>mode);
+ rtx in2;
+ if (CONST0_RTX (<MODE>mode) == operands[2])
+ in2 = CONST0_RTX (full_mode);
+ else
+ in2 = force_lowpart_subreg (full_mode, operands[2], <MODE>mode);
+
+ unsigned lanes
+ = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (full_mode);
+ rtx ptrue = aarch64_ptrue_reg (VNx16BImode, lanes);
+ rtx hint = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode);
+
+ rtx tmp = gen_reg_rtx (pred_mode);
+ rtx cast_ptrue = gen_lowpart (pred_mode, ptrue);
+
+ if (FLOAT_MODE_P (full_mode))
+ {
+ aarch64_expand_sve_vec_cmp<sve_cmp_suff> (tmp, code, in1, in2);
+ emit_insn (gen_and3 (pred_mode, tmp, tmp, cast_ptrue));
+ emit_insn (gen_aarch64_ptest (pred_mode, ptrue, cast_ptrue, hint,
+ tmp));
+ }
+ else
+ emit_insn (gen_aarch64_pred_cmp_ptest (code, full_mode, tmp, ptrue, in1,
+ in2, cast_ptrue, hint,
+ cast_ptrue, hint));
+
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[3]));
+ DONE;
+ }
+
+ rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
+ emit_insn (gen_vec_cmp<mode><v_int_equiv> (tmp, operands[0], operands[1],
+ operands[2]));
+
+ /* For 128-bit vectors we need a reduction to 64-bit first. */
+ if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
+ {
+ /* Always reduce using a V4SI. */
+ rtx reduc = gen_lowpart (V4SImode, tmp);
+ rtx res = gen_reg_rtx (V4SImode);
+ emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));
+ emit_move_insn (tmp, gen_lowpart (<V_INT_EQUIV>mode, res));
+ }
+
+ rtx val = gen_reg_rtx (DImode);
+ emit_move_insn (val, gen_lowpart (DImode, tmp));
+
+ rtx cc_reg = aarch64_gen_compare_reg (<cbranch_op>, val, const0_rtx);
+ rtx cmp_rtx = gen_rtx_fmt_ee (<cbranch_op>, DImode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[3]));
+ DONE;
+})
+
;; Patterns comparing two vectors to produce a mask.
(define_expand "vec_cmp<mode><mode>"
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 4648aa67e0c360076cf4444c4e0ac55babda34e6..2a2caa2066ce07959fc97ccf4a78632551dda07a 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -9761,7 +9761,8 @@ (define_insn "@vcond_mask_<mode><mode>"
;; - PTEST
;; -------------------------------------------------------------------------
-;; Branch based on predicate equality or inequality.
+;; Branch based on predicate equality or inequality. This allows PTEST to be
+;; combined with other flag setting instructions like ORR -> ORRS.
(define_expand "cbranch<mode>4"
[(set (pc)
(if_then_else
@@ -9790,8 +9791,120 @@ (define_expand "cbranch<mode>4"
}
)
+;; Define vec_cbranch_any and vec_cbranch_all
+;; Branch based on predicate equality or inequality.
+(define_expand "<optab><mode>"
+ [(set (pc)
+ (unspec:PRED_ALL
+ [(if_then_else
+ (match_operator 0 "aarch64_equality_operator"
+ [(match_operand:PRED_ALL 1 "register_operand")
+ (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
+ (label_ref (match_operand 3 ""))
+ (pc))]
+ CBRANCH_CMP))]
+ ""
+ {
+ rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
+ rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
+ rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
+ rtx pred;
+ if (operands[2] == CONST0_RTX (<MODE>mode))
+ pred = operands[1];
+ else
+ {
+ pred = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
+ operands[2]));
+ }
+ emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
+
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[3]));
+ DONE;
+ }
+)
+
+;; Define cond_vec_cbranch_any and cond_vec_cbranch_all
+;; Vector comparison and branch for SVE Floating point types instructions
+;; using Integer operations.
+(define_expand "<optab><mode>"
+ [(set (pc)
+ (unspec:SVE_F
+ [(if_then_else
+ (match_operator 0 "aarch64_equality_operator"
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")
+ (match_operand:<VPRED> 4 "aarch64_simd_imm_zero")])
+ (label_ref (match_operand 5 ""))
+ (pc))]
+ COND_CBRANCH_CMP))]
+ "flag_unsafe_math_optimizations"
+{
+ auto code = GET_CODE (operands[0]);
+ machine_mode full_mode = related_int_vector_mode (<MODE>mode).require ();
+ rtx in1 = force_lowpart_subreg (full_mode, operands[2], <MODE>mode);
+ rtx in2;
+ if (CONST0_RTX (<MODE>mode) == operands[3])
+ in2 = CONST0_RTX (full_mode);
+ else
+ in2 = force_lowpart_subreg (full_mode, operands[3], <MODE>mode);
+
+ rtx res = gen_reg_rtx (<VPRED>mode);
+ rtx gp = gen_lowpart (VNx16BImode, operands[1]);
+ rtx cast_gp = operands[1];
+ rtx flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
+
+ emit_insn (gen_aarch64_pred_cmp_ptest (code, full_mode, res, gp, in1, in2,
+ cast_gp, flag, cast_gp, flag));
+
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[5]));
+ DONE;
+})
+
+
+;; Define cond_vec_cbranch_any and cond_vec_cbranch_all
+;; Vector comparison and branch for SVE Floating points types instructions.
+;; But only on EQ or NE comparisons, which allows us to use integer compares
+;; instead and about the ptest.
+(define_expand "<optab><mode>"
+ [(set (pc)
+ (unspec:SVE_I
+ [(if_then_else
+ (match_operator 0 "aarch64_comparison_operator"
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SVE_I 2 "register_operand")
+ (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")
+ (match_operand:<VPRED> 4 "aarch64_simd_imm_zero")])
+ (label_ref (match_operand 5 ""))
+ (pc))]
+ COND_CBRANCH_CMP))]
+ ""
+{
+ auto code = GET_CODE (operands[0]);
+ rtx in1 = operands[2];
+ rtx in2 = operands[3];
+
+ rtx res = gen_reg_rtx (<VPRED>mode);
+ rtx gp = gen_lowpart (VNx16BImode, operands[1]);
+ rtx cast_gp = operands[1];
+ rtx flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
+
+ emit_insn (gen_aarch64_pred_cmp_ptest (code, <MODE>mode, res, gp, in1, in2,
+ cast_gp, flag, cast_gp, flag));
+
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[5]));
+ DONE;
+})
+
;; See "Description of UNSPEC_PTEST" above for details.
-(define_insn "aarch64_ptest<mode>"
+(define_insn "@aarch64_ptest<mode>"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
(match_operand 1)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 517b2808b5f725db81709122848817aaafff1f34..7a1f8e7aed4a56dec70c1a056c1512bd0da7ff56 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -837,6 +837,10 @@ (define_c_enum "unspec"
UNSPEC_SSHLL ; Used in aarch64-simd.md.
UNSPEC_USHLL ; Used in aarch64-simd.md.
UNSPEC_ADDP ; Used in aarch64-simd.md.
+ UNSPEC_CMP_ALL ; Used in aarch64-simd.md.
+ UNSPEC_CMP_ANY ; Used in aarch64-simd.md.
+ UNSPEC_COND_CMP_ALL ; Used in aarch64-simd.md.
+ UNSPEC_COND_CMP_ANY ; Used in aarch64-simd.md.
UNSPEC_TBL ; Used in vector permute patterns.
UNSPEC_TBLQ ; Used in vector permute patterns.
UNSPEC_TBX ; Used in vector permute patterns.
@@ -2612,6 +2616,12 @@ (define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi")
(VNx16SI "vnx4bi") (VNx16SF "vnx4bi")
(VNx8DI "vnx2bi") (VNx8DF "vnx2bi")])
+;; Map mode to suffix for using an SVE comparison
+(define_mode_attr sve_cmp_suff [(V8QI "_int") (V16QI "_int")
+ (V4HI "_int") (V8HI "_int") (V2SI "_int")
+ (V4SI "_int") (V2DI "_int")
+ (V2SF "_float") (V4SF "_float") (V2DF "_float")])
+
(define_mode_attr VDOUBLE [(VNx16QI "VNx32QI")
(VNx8HI "VNx16HI") (VNx8HF "VNx16HF")
(VNx8BF "VNx16BF")
@@ -3272,6 +3282,9 @@ (define_int_iterator HADD [UNSPEC_SHADD UNSPEC_UHADD])
(define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD])
+(define_int_iterator CBRANCH_CMP [UNSPEC_CMP_ALL UNSPEC_CMP_ANY])
+(define_int_iterator COND_CBRANCH_CMP [UNSPEC_COND_CMP_ALL UNSPEC_COND_CMP_ANY])
+
(define_int_iterator BSL_DUP [1 2])
(define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
@@ -4215,7 +4228,16 @@ (define_int_attr optab [(UNSPEC_ANDF "and")
(UNSPEC_COND_SCVTF "float")
(UNSPEC_COND_SMAX "smax")
(UNSPEC_COND_SMIN "smin")
- (UNSPEC_COND_UCVTF "floatuns")])
+ (UNSPEC_COND_UCVTF "floatuns")
+ (UNSPEC_CMP_ALL "vec_cbranch_all")
+ (UNSPEC_CMP_ANY "vec_cbranch_any")
+ (UNSPEC_COND_CMP_ALL "cond_vec_cbranch_all")
+ (UNSPEC_COND_CMP_ANY "cond_vec_cbranch_any")])
+
+(define_int_attr cbranch_op [(UNSPEC_CMP_ALL "EQ")
+ (UNSPEC_CMP_ANY "NE")
+ (UNSPEC_COND_CMP_ALL "EQ")
+ (UNSPEC_COND_CMP_ANY "NE")])
(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan")
(UNSPEC_FMAXNM "fmax")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 42304cef4391e15598bcd22da590c8663f3ffaa5..4b0a4f79253fcc28192cb70ce46d487875b25a7a 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -462,6 +462,19 @@ (define_special_predicate "aarch64_comparison_operation"
(define_special_predicate "aarch64_equality_operator"
(match_code "eq,ne"))
+(define_special_predicate "aarch64_cbranch_compare_operation"
+ (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,
+ ordered,unlt,unle,unge,ungt")
+{
+ if (TARGET_SVE)
+ return true;
+
+ if (!TARGET_SIMD)
+ return false;
+
+ return true;
+})
+
(define_special_predicate "aarch64_carry_operation"
(match_code "ltu,geu")
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
index 85aab355f95f83e1fa65d280f14fb8ade7f7e658..1ebc735a82f4a59d8eccff39346e46a449b4729a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
@@ -14,7 +14,6 @@ int x[N] __attribute__((aligned(32)));
** ...
** ld1w z[0-9]+.s, p[0-9]+/z, \[x[0-9], x[0-9], lsl 2\]
** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** ...
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
index d7cef1105410be04ed67d1d3b800746267f205a8..48fb407ccee769b008e0de87e25dda71fe235d20 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c
@@ -8,8 +8,7 @@ int b[N] = {0};
** f1:
** ...
** cmpgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f1 ()
@@ -25,8 +24,7 @@ void f1 ()
** f2:
** ...
** cmpge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f2 ()
@@ -42,8 +40,7 @@ void f2 ()
** f3:
** ...
** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f3 ()
@@ -59,8 +56,7 @@ void f3 ()
** f4:
** ...
** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f4 ()
@@ -76,8 +72,7 @@ void f4 ()
** f5:
** ...
** cmplt p[0-9]+.s, p7/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) .L[0-9]+
+** b(\.?eq|\.none) .L[0-9]+
** ...
*/
void f5 ()
@@ -93,8 +88,7 @@ void f5 ()
** f6:
** ...
** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f6 ()
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_2.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..7c996a893d99f7462c980140d12ab2711738597c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_2.c
@@ -0,0 +1,132 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE int
+#endif
+#ifndef FMT
+#define FMT "d"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_3.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..75927abb09479712760a60b0f6a11135d9be9502
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_3.c
@@ -0,0 +1,132 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=sve-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE int
+#endif
+#ifndef FMT
+#define FMT "d"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_4.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..68a5d0a09fdfd5664562e1bac3b5e586a37b3a6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_4.c
@@ -0,0 +1,132 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE float
+#endif
+#ifndef FMT
+#define FMT ".6f"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_5.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_5.c
new file mode 100644
index 0000000000000000000000000000000000000000..52d95e9a71b7999376731e47a224936d4253ac2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_5.c
@@ -0,0 +1,132 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=sve-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE float
+#endif
+#ifndef FMT
+#define FMT ".6f"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_7.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_7.c
new file mode 100644
index 0000000000000000000000000000000000000000..20cbb685b17a3312b30ce0124a36cdfaf3b3a02d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_7.c
@@ -0,0 +1,147 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+#include <math.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE float
+#endif
+#ifndef FMT
+#define FMT ".6f"
+#endif
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate comparison functions */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+/* Example unordered-sensitive loop: breaks if a[i] is unordered with 0 */
+__attribute__((noipa))
+void f7(void) {
+ for (int i = 0; i < N; i++) {
+ b[i] += a[i];
+ if (__builtin_isunordered(a[i], 0.0f))
+ break;
+ }
+}
+
+__attribute__((noreturn))
+static inline void __abort_trace(const char *m, int i, TYPE result, TYPE expected) {
+ printf("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort();
+}
+
+/* Array setup */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Floating-point comparison macros (with unordered handling) */
+#define CHECK_EQ(_i, _val) do { \
+ if (__builtin_isnan (_val) != __builtin_isnan (b[_i]) \
+ && b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+} while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (__builtin_isnan (_val) != __builtin_isnan (b[i]) \
+ && b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+} while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC(f1, 1.0f, 0, 1.0f, 10.0f, CHECK_EQ(0, 11.0f); CHECK_EQ(1, 10.0f));
+ TEST_FUNC(f2, -1.0f, 5, 0.0f, 10.0f, CHECK_EQ(0, 9.0f); CHECK_EQ(5, 10.0f));
+ TEST_FUNC(f3, 3.0f, 3, 0.0f, 0.0f, CHECK_EQ(0, 3.0f); CHECK_EQ(3, 0.0f));
+ TEST_FUNC(f4, 0.0f, 4, 1.0f, 1.0f, CHECK_EQ(4, 2.0f); CHECK_EQ(5, 1.0f));
+ TEST_FUNC(f5, 1.0f, 6, -1.0f, 5.0f, CHECK_EQ(6, 4.0f); CHECK_EQ(7, 5.0f));
+ TEST_FUNC(f6, 2.0f, 10, 0.0f, 7.0f, CHECK_EQ(10, 7.0f); CHECK_EQ(11, 7.0f));
+
+ /* Break on last iteration. */
+ TEST_FUNC(f1, 0.0f, N - 1, 1.0f, 1.0f,
+ CHECK_RANGE_EQ(0, N - 1, 1.0f); CHECK_EQ(N - 1, 2.0f));
+
+ TEST_FUNC(f2, -5.0f, N - 1, 0.0f, 9.0f,
+ CHECK_RANGE_EQ(0, N - 1, 4.0f); CHECK_EQ(N - 1, 9.0f));
+
+ TEST_FUNC(f3, 2.0f, N - 1, 0.0f, 0.0f,
+ CHECK_RANGE_EQ(0, N - 1, 2.0f); CHECK_EQ(N - 1, 0.0f));
+
+ TEST_FUNC(f4, 0.0f, N - 1, 2.0f, 1.0f,
+ CHECK_RANGE_EQ(0, N - 1, 1.0f); CHECK_EQ(N - 1, 3.0f));
+
+ TEST_FUNC(f5, 2.0f, N - 1, -3.0f, 6.0f,
+ CHECK_RANGE_EQ(0, N - 1, 8.0f); CHECK_EQ(N - 1, 3.0f));
+
+ TEST_FUNC(f6, 5.0f, N - 1, 0.0f, 7.0f,
+ CHECK_RANGE_EQ(0, N - 1, 12.0f); CHECK_EQ(N - 1, 7.0f));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC(f1, 0.0f, -1, 0.0f, 2.0f,
+ CHECK_RANGE_EQ(0, N, 2.0f));
+
+ TEST_FUNC(f2, -2.0f, -1, 0.0f, 5.0f,
+ CHECK_RANGE_EQ(0, N, 3.0f));
+
+ TEST_FUNC(f3, 1.0f, -1, 0.0f, 0.0f,
+ CHECK_RANGE_EQ(0, N, 1.0f));
+
+ TEST_FUNC(f4, 0.0f, -1, 0.0f, 7.0f,
+ CHECK_RANGE_EQ(0, N, 7.0f));
+
+ TEST_FUNC(f5, 1.0f, -1, 0.0f, 4.0f,
+ CHECK_RANGE_EQ(0, N, 5.0f));
+
+ TEST_FUNC(f6, 5.0f, -1, 0.0f, 3.0f,
+ CHECK_RANGE_EQ(0, N, 8.0f));
+
+#if !defined(__FAST_MATH__)
+ /* Unordered break (NAN in a[i]) */
+ TEST_FUNC(f7, 1.0f, 123, NAN, 2.0f,
+ CHECK_RANGE_EQ(0, 123, 3.0f); CHECK_EQ(123, NAN));
+#endif
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_8.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_8.c
new file mode 100644
index 0000000000000000000000000000000000000000..d951d649ac6d3c3030f7b5e04055f8065908d87b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_8.c
@@ -0,0 +1,132 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE float
+#endif
+#ifndef FMT
+#define FMT ".6f"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c
index 673b781eb6d092f6311409797b20a971f4fae247..ca4ef498485f68dbb8a178750db3f552cf7a66d0 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c
@@ -50,7 +50,6 @@ void f2 ()
/*
** f3:
** ...
-** cmeq v[0-9]+.4s, v[0-9]+.4s, #0
** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
** fmov x[0-9]+, d[0-9]+
** cbn?z x[0-9]+, \.L[0-9]+
@@ -69,7 +68,6 @@ void f3 ()
/*
** f4:
** ...
-** cmtst v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
** fmov x[0-9]+, d[0-9]+
** cbn?z x[0-9]+, \.L[0-9]+
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..d5cb1946873f5e720c6e12e0dd4991582764f18f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
@@ -0,0 +1,105 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+sve"
+
+#define N 640
+int a[N] = {0};
+int b[N] = {0};
+/*
+** f1:
+** ...
+** cmpgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f1 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] > 0)
+ break;
+ }
+}
+/*
+** f2:
+** ...
+** cmpge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f2 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] >= 0)
+ break;
+ }
+}
+/*
+** f3:
+** ...
+** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f3 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] == 0)
+ break;
+ }
+}
+/*
+** f4:
+** ...
+** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f4 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] != 0)
+ break;
+ }
+}
+/*
+** f5:
+** ...
+** cmplt p[0-9]+.s, p7/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) .L[0-9]+
+** ...
+*/
+void f5 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] < 0)
+ break;
+ }
+}
+/*
+** f6:
+** ...
+** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f6 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] <= 0)
+ break;
+ }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..9312db675eb428fc2fb775ec45e9e71e636fabdf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
@@ -0,0 +1,110 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+sve"
+
+#define N 640
+float a[N] = {0};
+float b[N] = {0};
+
+/*
+** f1:
+** ...
+** fcmgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f1 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] > 0)
+ break;
+ }
+}
+/*
+** f2:
+** ...
+** fcmge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f2 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] >= 0)
+ break;
+ }
+}
+/*
+** f3:
+** ...
+** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f3 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] == 0)
+ break;
+ }
+}
+/*
+** f4:
+** ...
+** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f4 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] != 0)
+ break;
+ }
+}
+/*
+** f5:
+** ...
+** fcmlt p[0-9]+.s, p7/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) .L[0-9]+
+** ...
+*/
+void f5 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] < 0)
+ break;
+ }
+}
+/*
+** f6:
+** ...
+** fcmle p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f6 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] <= 0)
+ break;
+ }
+}