The SVE ACLE patches need to introduce a new CC_NZC mode for the conditions that can be tested after a PTRUE. In particular, LT needs to map to "mi"/"first" and GE to "pl"/"nfrst", instead of the normal CC mapping.
Another advantage of using a separate mode is that we can print the SVE names of the conditions, which makes the output a bit easier to read. It therefore seems like an independent improvement that can go in now. The patch also avoids using (compare X (const_int 0)), because that gets folded away when used with LTU and GEU ("cc"/"last" and "cs"/"nlast"). Just using an unspec should be OK. The full set of conditions can't be tested without other SVE ACLE patches. Tested on aarch64-linux-gnu (with and without SVE). Applied as r272427. Richard 2019-06-18 Richard Sandiford <richard.sandif...@arm.com> gcc/ * config/aarch64/aarch64-modes.def (CC_NZC): New CC_MODE. * config/aarch64/aarch64-sve.md (*<optab><mode>3_cc) (ptest_ptrue<mode>, while_ult<GPI:mode><PRED_ALL:mode>) (*while_ult<GPI:mode><PRED_ALL:mode>_cc, *cmp<cmp_op><mode>) (*cmp<cmp_op><mode>_ptest, *cmp<cmp_op><mode>_cc) (*pred_cmp<cmp_op><mode>_combine, *pred_cmp<cmp_op><mode>) (vec_cmp<mode><vpred>, vec_cmpu<mode><vpred>, cbranch<mode>4): Use CC_NZC instead of CC. * config/aarch64/aarch64.md (condjump): Print a '.' in SVE conditions. * config/aarch64/aarch64.c (aarch64_sve_condition_codes): New variable. (aarch64_print_operand): Handle E_CC_NZCmode. (aarch64_emit_sve_ptrue_op_cc): Use gen_set_clobber_cc_nzc instead of gen_set_clobber_cc. gcc/testsuite/ * gcc.target/aarch64/sve/struct_vect_18.c: Allow branches to contain dots. * gcc.target/aarch64/sve/struct_vect_19.c: Likewise. * gcc.target/aarch64/sve/struct_vect_20.c: Likewise. * gcc.target/aarch64/sve/struct_vect_21.c: Likewise. * gcc.target/aarch64/sve/struct_vect_22.c: Likewise. * gcc.target/aarch64/sve/struct_vect_23.c: Likewise. * gcc.target/aarch64/sve/unroll-1.c: Likewise. * gcc.target/aarch64/sve/while_1.c: Check for b.any. Index: gcc/config/aarch64/aarch64-modes.def =================================================================== --- gcc/config/aarch64/aarch64-modes.def 2019-03-08 18:15:38.220734572 +0000 +++ gcc/config/aarch64/aarch64-modes.def 2019-06-18 15:44:25.158766687 +0100 @@ -33,6 +33,8 @@ CC_MODE (CCFP); CC_MODE (CCFPE); CC_MODE (CC_SWP); +CC_MODE (CC_NZC); /* Only N, Z and C bits of condition flags are valid. + (Used with SVE predicate tests.) */ CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ CC_MODE (CC_C); /* C represents unsigned overflow of a simple addition. */ Index: gcc/config/aarch64/aarch64-sve.md =================================================================== --- gcc/config/aarch64/aarch64-sve.md 2019-06-18 15:43:09.591393527 +0100 +++ gcc/config/aarch64/aarch64-sve.md 2019-06-18 15:44:25.158766687 +0100 @@ -1172,16 +1172,15 @@ (define_insn "pred_<optab><mode>3" ;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested ;; value is structurally equivalent to rhs of the second set. (define_insn "*<optab><mode>3_cc" - [(set (reg:CC CC_REGNUM) - (compare:CC - (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa") - (and:PRED_ALL - (LOGICAL:PRED_ALL - (match_operand:PRED_ALL 2 "register_operand" "Upa") - (match_operand:PRED_ALL 3 "register_operand" "Upa")) - (match_dup 1))] - UNSPEC_PTEST_PTRUE) - (const_int 0))) + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:PRED_ALL 1 "register_operand" "Upa") + (and:PRED_ALL + (LOGICAL:PRED_ALL + (match_operand:PRED_ALL 2 "register_operand" "Upa") + (match_operand:PRED_ALL 3 "register_operand" "Upa")) + (match_dup 1))] + UNSPEC_PTEST_PTRUE)) (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) (match_dup 1)))] @@ -1320,12 +1319,11 @@ (define_expand "<ASHIFT:optab><mode>3" ;; the constant. We would use a separate unspec code for PTESTs involving ;; GPs that might not be PTRUEs. (define_insn "ptest_ptrue<mode>" - [(set (reg:CC CC_REGNUM) - (compare:CC - (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa") - (match_operand:PRED_ALL 1 "register_operand" "Upa")] - UNSPEC_PTEST_PTRUE) - (const_int 0)))] + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:PRED_ALL 0 "register_operand" "Upa") + (match_operand:PRED_ALL 1 "register_operand" "Upa")] + UNSPEC_PTEST_PTRUE))] "TARGET_SVE" "ptest\t%0, %1.b" ) @@ -1337,7 +1335,7 @@ (define_insn "while_ult<GPI:mode><PRED_A (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] UNSPEC_WHILE_LO)) - (clobber (reg:CC CC_REGNUM))] + (clobber (reg:CC_NZC CC_REGNUM))] "TARGET_SVE" "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" ) @@ -1346,15 +1344,14 @@ (define_insn "while_ult<GPI:mode><PRED_A ;; Handle the case in which both results are useful. The GP operand ;; to the PTEST isn't needed, so we allow it to be anything. (define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc" - [(set (reg:CC CC_REGNUM) - (compare:CC - (unspec:SI [(match_operand:PRED_ALL 1) - (unspec:PRED_ALL - [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") - (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] - UNSPEC_WHILE_LO)] - UNSPEC_PTEST_PTRUE) - (const_int 0))) + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:PRED_ALL 1) + (unspec:PRED_ALL + [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") + (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] + UNSPEC_WHILE_LO)] + UNSPEC_PTEST_PTRUE)) (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") (unspec:PRED_ALL [(match_dup 2) (match_dup 3)] @@ -1378,7 +1375,7 @@ (define_insn "*cmp<cmp_op><mode>" (match_operand:SVE_I 2 "register_operand" "w, w") (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] UNSPEC_MERGE_PTRUE)) - (clobber (reg:CC CC_REGNUM))] + (clobber (reg:CC_NZC CC_REGNUM))] "TARGET_SVE" "@ cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 @@ -1388,18 +1385,16 @@ (define_insn "*cmp<cmp_op><mode>" ;; Integer comparisons predicated with a PTRUE in which only the flags result ;; is interesting. (define_insn "*cmp<cmp_op><mode>_ptest" - [(set (reg:CC CC_REGNUM) - (compare:CC - (unspec:SI - [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") - (unspec:<VPRED> - [(match_dup 1) - (SVE_INT_CMP:<VPRED> - (match_operand:SVE_I 2 "register_operand" "w, w") - (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] - UNSPEC_MERGE_PTRUE)] - UNSPEC_PTEST_PTRUE) - (const_int 0))) + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") + (unspec:<VPRED> + [(match_dup 1) + (SVE_INT_CMP:<VPRED> + (match_operand:SVE_I 2 "register_operand" "w, w") + (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] + UNSPEC_MERGE_PTRUE)] + UNSPEC_PTEST_PTRUE)) (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))] "TARGET_SVE" "@ @@ -1410,18 +1405,16 @@ (define_insn "*cmp<cmp_op><mode>_ptest" ;; Integer comparisons predicated with a PTRUE in which both the flag and ;; predicate results are interesting. (define_insn "*cmp<cmp_op><mode>_cc" - [(set (reg:CC CC_REGNUM) - (compare:CC - (unspec:SI - [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") - (unspec:<VPRED> - [(match_dup 1) - (SVE_INT_CMP:<VPRED> - (match_operand:SVE_I 2 "register_operand" "w, w") - (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] - UNSPEC_MERGE_PTRUE)] - UNSPEC_PTEST_PTRUE) - (const_int 0))) + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") + (unspec:<VPRED> + [(match_dup 1) + (SVE_INT_CMP:<VPRED> + (match_operand:SVE_I 2 "register_operand" "w, w") + (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] + UNSPEC_MERGE_PTRUE)] + UNSPEC_PTEST_PTRUE)) (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") (unspec:<VPRED> [(match_dup 1) @@ -1449,7 +1442,7 @@ (define_insn_and_split "*pred_cmp<cmp_op (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] UNSPEC_MERGE_PTRUE) (match_operand:<VPRED> 4 "register_operand" "Upl, Upl"))) - (clobber (reg:CC CC_REGNUM))] + (clobber (reg:CC_NZC CC_REGNUM))] "TARGET_SVE" "#" "&& 1" @@ -1460,7 +1453,7 @@ (define_insn_and_split "*pred_cmp<cmp_op (match_dup 2) (match_dup 3)) (match_dup 4))) - (clobber (reg:CC CC_REGNUM))])] + (clobber (reg:CC_NZC CC_REGNUM))])] ) ;; Predicated integer comparisons. @@ -1471,7 +1464,7 @@ (define_insn "*pred_cmp<cmp_op><mode>" (match_operand:SVE_I 2 "register_operand" "w, w") (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w")) (match_operand:<VPRED> 1 "register_operand" "Upl, Upl"))) - (clobber (reg:CC CC_REGNUM))] + (clobber (reg:CC_NZC CC_REGNUM))] "TARGET_SVE" "@ cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 @@ -1684,7 +1677,7 @@ (define_expand "vec_cmp<mode><vpred>" (match_operator:<VPRED> 1 "comparison_operator" [(match_operand:SVE_I 2 "register_operand") (match_operand:SVE_I 3 "nonmemory_operand")])) - (clobber (reg:CC CC_REGNUM))])] + (clobber (reg:CC_NZC CC_REGNUM))])] "TARGET_SVE" { aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), @@ -1702,7 +1695,7 @@ (define_expand "vec_cmpu<mode><vpred>" (match_operator:<VPRED> 1 "comparison_operator" [(match_operand:SVE_I 2 "register_operand") (match_operand:SVE_I 3 "nonmemory_operand")])) - (clobber (reg:CC CC_REGNUM))])] + (clobber (reg:CC_NZC CC_REGNUM))])] "TARGET_SVE" { aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), @@ -1749,7 +1742,7 @@ (define_expand "cbranch<mode>4" operands[2])); } emit_insn (gen_ptest_ptrue<mode> (ptrue, pred)); - operands[1] = gen_rtx_REG (CCmode, CC_REGNUM); + operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM); operands[2] = const0_rtx; } ) Index: gcc/config/aarch64/aarch64.md =================================================================== --- gcc/config/aarch64/aarch64.md 2019-05-30 18:34:35.946485479 +0100 +++ gcc/config/aarch64/aarch64.md 2019-06-18 15:44:25.166766620 +0100 @@ -534,10 +534,14 @@ (define_insn "condjump" (pc)))] "" { + /* GCC's traditional style has been to use "beq" instead of "b.eq", etc., + but the "." is required for SVE conditions. */ + bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode; if (get_attr_length (insn) == 8) - return aarch64_gen_far_branch (operands, 2, "Lbcond", "b%M0\\t"); + return aarch64_gen_far_branch (operands, 2, "Lbcond", + use_dot_p ? "b.%M0\\t" : "b%M0\\t"); else - return "b%m0\\t%l2"; + return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2"; } [(set_attr "type" "branch") (set (attr "length") @@ -7121,10 +7125,10 @@ (define_insn "bti_jc" ) ;; Helper for aarch64.c code. -(define_expand "set_clobber_cc" +(define_expand "set_clobber_cc_nzc" [(parallel [(set (match_operand 0) (match_operand 1)) - (clobber (reg:CC CC_REGNUM))])]) + (clobber (reg:CC_NZC CC_REGNUM))])]) ;; Hard speculation barrier. (define_insn "speculation_barrier" Index: gcc/config/aarch64/aarch64.c =================================================================== --- gcc/config/aarch64/aarch64.c 2019-06-18 15:42:40.863631835 +0100 +++ gcc/config/aarch64/aarch64.c 2019-06-18 15:44:25.162766653 +0100 @@ -1308,6 +1308,13 @@ static const char * const aarch64_condit "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" }; +/* The preferred condition codes for SVE conditions. */ +static const char *const aarch64_sve_condition_codes[] = +{ + "none", "any", "nlast", "last", "first", "nfrst", "vs", "vc", + "pmore", "plast", "tcont", "tstop", "gt", "le", "al", "nv" +}; + /* Generate code to enable conditional branches in functions over 1 MiB. */ const char * aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest, @@ -7401,6 +7408,21 @@ aarch64_get_condition_code_1 (machine_mo } break; + case E_CC_NZCmode: + switch (comp_code) + { + case NE: return AARCH64_NE; /* = any */ + case EQ: return AARCH64_EQ; /* = none */ + case GE: return AARCH64_PL; /* = nfrst */ + case LT: return AARCH64_MI; /* = first */ + case GEU: return AARCH64_CS; /* = nlast */ + case GTU: return AARCH64_HI; /* = pmore */ + case LEU: return AARCH64_LS; /* = plast */ + case LTU: return AARCH64_CC; /* = last */ + default: return -1; + } + break; + case E_CC_NZmode: switch (comp_code) { @@ -7734,7 +7756,10 @@ aarch64_print_operand (FILE *f, rtx x, i gcc_assert (cond_code >= 0); if (code == 'M') cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code); - fputs (aarch64_condition_codes[cond_code], f); + if (GET_MODE (XEXP (x, 0)) == CC_NZCmode) + fputs (aarch64_sve_condition_codes[cond_code], f); + else + fputs (aarch64_condition_codes[cond_code], f); } break; @@ -17059,7 +17084,7 @@ aarch64_emit_sve_ptrue_op_cc (rtx target rtx unspec = gen_rtx_UNSPEC (GET_MODE (target), gen_rtvec (2, ptrue, op), UNSPEC_MERGE_PTRUE); - rtx_insn *insn = emit_insn (gen_set_clobber_cc (target, unspec)); + rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec)); set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op)); } Index: gcc/testsuite/gcc.target/aarch64/sve/struct_vect_18.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/sve/struct_vect_18.c 2019-03-08 18:14:29.764994797 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve/struct_vect_18.c 2019-06-18 15:44:25.178766521 +0100 @@ -46,4 +46,4 @@ TEST (test) /* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ /* The only branches should be in the vectorized loop. */ -/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ +/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 4 } } */ Index: gcc/testsuite/gcc.target/aarch64/sve/struct_vect_19.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/sve/struct_vect_19.c 2019-03-08 18:14:29.768994780 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve/struct_vect_19.c 2019-06-18 15:44:25.178766521 +0100 @@ -46,4 +46,4 @@ TEST (test) /* Each function should have three branches: one directly to the exit (n <= 0), one to the single scalar epilogue iteration (n == 1), and one branch-back for the vectorized loop. */ -/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ +/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 12 } } */ Index: gcc/testsuite/gcc.target/aarch64/sve/struct_vect_20.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/sve/struct_vect_20.c 2019-03-08 18:14:29.768994780 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve/struct_vect_20.c 2019-06-18 15:44:25.178766521 +0100 @@ -46,4 +46,4 @@ TEST (test) /* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ /* The only branches should be in the vectorized loop. */ -/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ +/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 4 } } */ Index: gcc/testsuite/gcc.target/aarch64/sve/struct_vect_21.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/sve/struct_vect_21.c 2019-03-08 18:14:29.772994767 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve/struct_vect_21.c 2019-06-18 15:44:25.178766521 +0100 @@ -46,4 +46,4 @@ TEST (test) /* Each function should have three branches: one directly to the exit (n <= 0), one to the single scalar epilogue iteration (n == 1), and one branch-back for the vectorized loop. */ -/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ +/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 12 } } */ Index: gcc/testsuite/gcc.target/aarch64/sve/struct_vect_22.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/sve/struct_vect_22.c 2019-03-08 18:14:29.776994751 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve/struct_vect_22.c 2019-06-18 15:44:25.178766521 +0100 @@ -46,4 +46,4 @@ TEST (test) /* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ /* The only branches should be in the vectorized loop. */ -/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ +/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 4 } } */ Index: gcc/testsuite/gcc.target/aarch64/sve/struct_vect_23.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/sve/struct_vect_23.c 2019-03-08 18:14:29.780994734 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve/struct_vect_23.c 2019-06-18 15:44:25.178766521 +0100 @@ -46,4 +46,4 @@ TEST (test) /* Each function should have three branches: one directly to the exit (n <= 0), one to the single scalar epilogue iteration (n == 1), and one branch-back for the vectorized loop. */ -/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ +/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 12 } } */ Index: gcc/testsuite/gcc.target/aarch64/sve/unroll-1.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/sve/unroll-1.c 2019-03-08 18:14:29.768994780 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve/unroll-1.c 2019-06-18 15:44:25.178766521 +0100 @@ -10,4 +10,4 @@ fully_peel_me (double *x) x[i] = x[i] * 2; } -/* { dg-final { scan-assembler-times {b..\t\.L.\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 1 } } */ Index: gcc/testsuite/gcc.target/aarch64/sve/while_1.c =================================================================== --- gcc/testsuite/gcc.target/aarch64/sve/while_1.c 2019-06-18 09:35:52.949885761 +0100 +++ gcc/testsuite/gcc.target/aarch64/sve/while_1.c 2019-06-18 15:44:25.178766521 +0100 @@ -42,3 +42,4 @@ TEST_ALL (ADD_LOOP) /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x0, x[0-9]+, lsl 2\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tb\.any\t} 10 } } */