On 12/10/19 5:03 PM, Kyrill Tkachov wrote: > Hi Stam, > > On 11/15/19 5:26 PM, Stam Markianos-Wright wrote: >> Pinging with more correct maintainers this time :) >> >> Also would need to backport to gcc7,8,9, but need to get this approved >> first! >> > > Sorry for the delay.
Same here now! Sorry totally forget about this in the lead up to Xmas! Done the changes marked below and also removed the unnecessary extra #defines from the test. > > >> Thank you, >> Stam >> >> >> -------- Forwarded Message -------- >> Subject: Re: [PATCH][GCC][ARM] Arm generates out of range conditional >> branches in Thumb2 (PR91816) >> Date: Mon, 21 Oct 2019 10:37:09 +0100 >> From: Stam Markianos-Wright <stam.markianos-wri...@arm.com> >> To: Ramana Radhakrishnan <ramana....@googlemail.com> >> CC: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>, nd <n...@arm.com>, >> James Greenhalgh <james.greenha...@arm.com>, Richard Earnshaw >> <richard.earns...@arm.com> >> >> >> >> On 10/13/19 4:23 PM, Ramana Radhakrishnan wrote: >> >> >> >> Patch bootstrapped and regression tested on arm-none-linux-gnueabihf, >> >> however, on my native Aarch32 setup the test times out when run as part >> >> of a big "make check-gcc" regression, but not when run individually. >> >> >> >> 2019-10-11 Stamatis Markianos-Wright <stam.markianos-wri...@arm.com> >> >> >> >> * config/arm/arm.md: Update b<cond> for Thumb2 range checks. >> >> * config/arm/arm.c: New function arm_gen_far_branch. >> >> * config/arm/arm-protos.h: New function arm_gen_far_branch >> >> prototype. >> >> >> >> gcc/testsuite/ChangeLog: >> >> >> >> 2019-10-11 Stamatis Markianos-Wright <stam.markianos-wri...@arm.com> >> >> >> >> * testsuite/gcc.target/arm/pr91816.c: New test. >> > >> >> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h >> >> index f995974f9bb..1dce333d1c3 100644 >> >> --- a/gcc/config/arm/arm-protos.h >> >> +++ b/gcc/config/arm/arm-protos.h >> >> @@ -570,4 +570,7 @@ void arm_parse_option_features (sbitmap, const >> cpu_arch_option *, >> >> >> >> void arm_initialize_isa (sbitmap, const enum isa_feature *); >> >> >> >> +const char * arm_gen_far_branch (rtx *, int,const char * , const char *); >> >> + >> >> + >> > >> > Lets get the nits out of the way. >> > >> > Unnecessary extra new line, need a space between int and const above. >> > >> > >> >> .Fixed! >> >> >> #endif /* ! GCC_ARM_PROTOS_H */ >> >> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c >> >> index 39e1a1ef9a2..1a693d2ddca 100644 >> >> --- a/gcc/config/arm/arm.c >> >> +++ b/gcc/config/arm/arm.c >> >> @@ -32139,6 +32139,31 @@ arm_run_selftests (void) >> >> } >> >> } /* Namespace selftest. */ >> >> >> >> + >> >> +/* Generate code to enable conditional branches in functions over 1 MiB. >> >> */ >> >> +const char * >> >> +arm_gen_far_branch (rtx * operands, int pos_label, const char * dest, >> >> + const char * branch_format) >> > >> > Not sure if this is some munging from the attachment but check >> > vertical alignment of parameters. >> > >> >> .Fixed! >> >> >> +{ >> >> + rtx_code_label * tmp_label = gen_label_rtx (); >> >> + char label_buf[256]; >> >> + char buffer[128]; >> >> + ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \ >> >> + CODE_LABEL_NUMBER (tmp_label)); >> >> + const char *label_ptr = arm_strip_name_encoding (label_buf); >> >> + rtx dest_label = operands[pos_label]; >> >> + operands[pos_label] = tmp_label; >> >> + >> >> + snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr); >> >> + output_asm_insn (buffer, operands); >> >> + >> >> + snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, >> >> label_ptr); >> >> + operands[pos_label] = dest_label; >> >> + output_asm_insn (buffer, operands); >> >> + return ""; >> >> +} >> >> + >> >> + >> > >> > Unnecessary extra newline. >> > >> >> .Fixed! >> >> >> #undef TARGET_RUN_TARGET_SELFTESTS >> >> #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests >> >> #endif /* CHECKING_P */ >> >> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md >> >> index f861c72ccfc..634fd0a59da 100644 >> >> --- a/gcc/config/arm/arm.md >> >> +++ b/gcc/config/arm/arm.md >> >> @@ -6686,9 +6686,16 @@ >> >> ;; And for backward branches we have >> >> ;; (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4) + >> >>4). >> >> ;; >> >> +;; In 16-bit Thumb these ranges are: >> >> ;; For a 'b' pos_range = 2046, neg_range = -2048 giving >> >>(-2040->2048). >> >> ;; For a 'b<cond>' pos_range = 254, neg_range = -256 giving (-250 >> >>->256). >> >> >> >> +;; In 32-bit Thumb these ranges are: >> >> +;; For a 'b' +/- 16MB is not checked for. >> >> +;; For a 'b<cond>' pos_range = 1048574, neg_range = -1048576 giving >> >> +;; (-1048568 -> 1048576). >> >> + >> >> + >> > >> > Unnecessary extra newline. >> > >> >> .Fixed! >> >> >> (define_expand "cbranchsi4" >> >> [(set (pc) (if_then_else >> >> (match_operator 0 "expandable_comparison_operator" >> >> @@ -6947,22 +6954,42 @@ >> >> (pc)))] >> >> "TARGET_32BIT" >> >> "* >> >> - if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) >> >> - { >> >> - arm_ccfsm_state += 2; >> >> - return \"\"; >> >> - } >> >> - return \"b%d1\\t%l0\"; >> >> + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) >> >> + { >> >> + arm_ccfsm_state += 2; >> >> + return \"\"; >> >> + } >> >> + switch (get_attr_length (insn)) >> >> + { >> >> + // Thumb2 16-bit b{cond} >> >> + case 2: >> >> + >> >> + // Thumb2 32-bit b{cond} >> >> + case 4: return \"b%d1\\t%l0\";break; >> >> + >> >> + // Thumb2 b{cond} out of range. Use unconditional branch. >> >> + case 8: return arm_gen_far_branch \ >> >> + (operands, 0, \"Lbcond\", \"b%D1\t\"); >> >> + break; >> >> + >> >> + // A32 b{cond} >> >> + default: return \"b%d1\\t%l0\"; >> >> + } >> > >> > Please fix indentation here. >> > >> >> .Fixed together with below changes. >> >> >> " >> >> [(set_attr "conds" "use") >> >> (set_attr "type" "branch") >> >> (set (attr "length") >> >> - (if_then_else >> >> - (and (match_test "TARGET_THUMB2") >> >> - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >> >> - (le (minus (match_dup 0) (pc)) (const_int 256)))) >> >> - (const_int 2) >> >> - (const_int 4)))] >> >> + (if_then_else (match_test "TARGET_THUMB2") >> >> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >> >> + (le (minus (match_dup 0) (pc)) (const_int 256))) >> >> + (const_int 2) >> >> + (if_then_else (and (ge (minus (match_dup 0) (pc)) >> >> + (const_int -1048568)) >> >> + (le (minus (match_dup 0) (pc)) (const_int 1048576))) >> >> + (const_int 4) >> >> + (const_int 8))) >> >> + (const_int 10))) >> >> + ] >> > >> > This conditional is unreadable and is getting quite complex. >> > >> > Please fix the indentation and add some comments to indicate when >> > this is 2, 4, 8, 10 above the pattern and ask for the comment to >> > be in sync with this. >> > >> > How did we end up with length 10 ? That indicates 2 4 byte instructions >> > and a 2 byte instruction ? You are handling lengths 2, 4, 8 above in >> > the switch - is length 10 going to be a single A32 b<cond> instruction ? >> > >> > What am I missing ? >> > >> > >> >> Ah sorry, I had not realised that the "length" related to the number of >> bytes in the instruction, so I just used it as a variable to then check >> in the switch(). >> And yes, you are correct in assuming that length 10 would have been the >> A32 b<cond> version. >> So the mapping I had in mind was: >> 2-> Thumb2 b<cond> - narrow 16bit version >> 4-> Thumb2 b<cond> - wide 32bit version >> 8-> Thumb2 b - "far branch". >> 10-> A32 b<cond> >> >> The new version that maintains the "length=number of bytes" would be: >> >> 2-> Thumb2 b<cond> - narrow 16bit version >> 4-> Thumb2 b<cond> - wide 32bit version OR A32 b<cond> >> 6-> Thumb2 "far branch" made up from one b<cond> to a very close Lbcond >> label (so 16 bits) and one b for 32 bits. (so 2+4 == 6) >> >> I've gone ahead and done this in the new proposed patch. Let me know if >> it's ok! (also I changed the first check to !TARGET_THUMB2 - this makes >> it slightly more readable). I'm still not sure about this, so any >> suggestions are welcome! >> >> > >> >> ) >> >> >> >> (define_insn "*arm_cond_branch_reversed" >> >> @@ -6978,17 +7005,36 @@ >> >> arm_ccfsm_state += 2; >> >> return \"\"; >> >> } >> >> - return \"b%D1\\t%l0\"; >> >> + switch (get_attr_length (insn)) >> >> + { >> >> + // Thumb2 16-bit b{cond} >> >> + case 2: >> >> + >> >> + // Thumb2 32-bit b{cond} >> >> + case 4: return \"b%D1\\t%l0\";break; >> >> + >> >> + // Thumb2 b{cond} out of range. Use unconditional branch. >> >> + case 8: return arm_gen_far_branch \ >> >> + (operands, 0, \"Lbcond\", \"b%d1\t\"); >> >> + break; >> >> + // A32 b{cond} >> >> + default: return \"b%D1\\t%l0\"; >> >> + } >> >> " >> >> [(set_attr "conds" "use") >> >> (set_attr "type" "branch") >> >> (set (attr "length") >> >> - (if_then_else >> >> - (and (match_test "TARGET_THUMB2") >> >> - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >> >> - (le (minus (match_dup 0) (pc)) (const_int 256)))) >> >> - (const_int 2) >> >> - (const_int 4)))] >> >> + (if_then_else (match_test "TARGET_THUMB2") >> >> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >> >> + (le (minus (match_dup 0) (pc)) (const_int 256))) >> >> + (const_int 2) >> >> + (if_then_else (and (ge (minus (match_dup 0) (pc)) >> >> + (const_int -1048568)) >> >> + (le (minus (match_dup 0) (pc)) (const_int 1048576))) >> >> + (const_int 4) >> >> + (const_int 8))) >> >> + (const_int 10))) >> >> + ] >> > >> > Same comments as above apply here too. >> > >> >> Same as above. >> >> Thank you for the feedback and apologies for being a clueless :) >> >> And, of course, let me know of any problems or queries! >> >> Cheers, >> Stam >> >> > Ramana >> > >> >> ) >> >> >> >> >> >> diff --git a/gcc/testsuite/gcc.target/arm/pr91816.c >> b/gcc/testsuite/gcc.target/arm/pr91816.c >> >> new file mode 100644 >> >> index 00000000000..176bf61780b >> >> --- /dev/null >> >> +++ b/gcc/testsuite/gcc.target/arm/pr91816.c >> >> @@ -0,0 +1,102 @@ >> >> +/* { dg-do compile } */ >> >> +/* { dg-options "-march=armv7-a -mthumb -mfpu=vfpv3-d16" } */ >> >> +int printf(const char *, ...); >> >> + >> >> +__attribute__((noinline,noclone)) void f1(int a) >> >> +{ >> >> + if (a) { >> >> +#define HW0 printf("Hello World!\n"); >> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >> >> + HW0 >> >> + } >> >> +} >> >> + >> >> +__attribute__((noinline,noclone)) void f2(int a) >> >> +{ >> >> + if (a) { >> >> +#define HW0 printf("Hello World!\n"); >> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >> >> + HW3 >> >> + } >> >> +} >> >> + >> >> + >> >> +__attribute__((noinline,noclone)) void f3(int a) >> >> +{ >> >> + if (a) { >> >> +#define HW0 printf("Hello World!\n"); >> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >> >> + HW5 >> >> + } >> >> +} >> >> + >> >> +__attribute__((noinline,noclone)) void f4(int a) >> >> +{ >> >> + if (a==1) { >> >> +#define HW0 printf("Hello World!\n"); >> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >> >> + HW0 >> >> + } >> >> +} >> >> + >> >> +__attribute__((noinline,noclone)) void f5(int a) >> >> +{ >> >> + if (a==1) { >> >> +#define HW0 printf("Hello World!\n"); >> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >> >> + HW3 >> >> + } >> >> +} >> >> + >> >> + >> >> +__attribute__((noinline,noclone)) void f6(int a) >> >> +{ >> >> + if (a==1) { >> >> +#define HW0 printf("Hello World!\n"); >> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >> >> + HW5 >> >> + } >> >> +} >> >> + >> >> + >> >> +int main(void) >> >> +{ >> >> + f1(0); >> >> + f2(0); >> >> + f3(0); >> >> + f4(0); >> >> + f5(0); >> >> + f6(0); >> >> + return 0; >> >> +} >> >> + >> >> + >> >> +/* { dg-final { scan-assembler-times "beq\\t.L\[0-9\]" 2 } } */ >> >> +/* { dg-final { scan-assembler-times "beq\\t.Lbcond\[0-9\]" 1 } } */ >> >> +/* { dg-final { scan-assembler-times "bne\\t.L\[0-9\]" 2 } } */ >> >> +/* { dg-final { scan-assembler-times "bne\\t.Lbcond\[0-9\]" 1 } } */ >> >> +/* { dg-final { scan-assembler-times "b\\t.L\[0-9\]" 2 } } */ >> > >> > > 1.patch > > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h > index f995974f9bb..59ec219da3d 100644 > --- a/gcc/config/arm/arm-protos.h > +++ b/gcc/config/arm/arm-protos.h > @@ -570,4 +570,6 @@ void arm_parse_option_features (sbitmap, const > cpu_arch_option *, > > void arm_initialize_isa (sbitmap, const enum isa_feature *); > > +const char * arm_gen_far_branch (rtx *, int, const char *, const char *); > + > #endif /* ! GCC_ARM_PROTOS_H */ > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 39e1a1ef9a2..7a69ddb6b7b 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -32139,6 +32139,30 @@ arm_run_selftests (void) > } > } /* Namespace selftest. */ > > + > +/* Generate code to enable conditional branches in functions over 1 MiB. */ > > > Please document the function parameters in this comment as other functions in > this file (try to) do. Done :) > > > +const char * > +arm_gen_far_branch (rtx * operands, int pos_label, const char * dest, > + const char * branch_format) > +{ > + rtx_code_label * tmp_label = gen_label_rtx (); > + char label_buf[256]; > + char buffer[128]; > + ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \ > + CODE_LABEL_NUMBER (tmp_label)); > + const char *label_ptr = arm_strip_name_encoding (label_buf); > + rtx dest_label = operands[pos_label]; > + operands[pos_label] = tmp_label; > + > + snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr); > + output_asm_insn (buffer, operands); > + > + snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr); > + operands[pos_label] = dest_label; > + output_asm_insn (buffer, operands); > + return ""; > +} > + > #undef TARGET_RUN_TARGET_SELFTESTS > #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests > #endif /* CHECKING_P */ > diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md > index f861c72ccfc..7e5e1489214 100644 > --- a/gcc/config/arm/arm.md > +++ b/gcc/config/arm/arm.md > @@ -6686,9 +6686,15 @@ > ;; And for backward branches we have > ;; (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4) + 4). > ;; > +;; In 16-bit Thumb these ranges are: > ;; For a 'b' pos_range = 2046, neg_range = -2048 giving (-2040->2048). > ;; For a 'b<cond>' pos_range = 254, neg_range = -256 giving (-250 ->256). > > +;; In 32-bit Thumb these ranges are: > +;; For a 'b' ± 16MB is not checked for. > +;; For a 'b<cond>' pos_range = 1048574, neg_range = -1048576 giving > +;; (-1048568 -> 1048576). > + > (define_expand "cbranchsi4" > [(set (pc) (if_then_else > (match_operator 0 "expandable_comparison_operator" > @@ -6946,23 +6952,56 @@ > (label_ref (match_operand 0 "" "")) > (pc)))] > "TARGET_32BIT" > - "* > - if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) > - { > - arm_ccfsm_state += 2; > - return \"\"; > - } > - return \"b%d1\\t%l0\"; > - " > + { > + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) > + { > + arm_ccfsm_state += 2; > + return ""; > + } > + switch (get_attr_length (insn)) > + { > + /* Thumb2 16-bit b{cond}. */ > + case 2: > + > + /* Thumb2 32-bit b{cond} or A32 b{cond}. */ > + case 4: return "b%d1\t%l0"; > + break; > + > + /* Thumb2 b{cond} out of range. Use 16-bit b{cond} and > + unconditional branch b. */ > + default: return arm_gen_far_branch \ > + (operands, 0, "Lbcond", "b%D1\t"); > + } > > > The indentation here is wrong. Please look at how other switch statements are > written in the backend for guidance: 2 space indentation, new line after the > cases etc. Done > > + } > [(set_attr "conds" "use") > (set_attr "type" "branch") > (set (attr "length") > - (if_then_else > - (and (match_test "TARGET_THUMB2") > - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) > - (le (minus (match_dup 0) (pc)) (const_int 256)))) > - (const_int 2) > - (const_int 4)))] > + (if_then_else (match_test "!TARGET_THUMB2") > + > + ;;Target is not Thumb2, therefore is A32. Generate b{cond}. > + (const_int 4) > + > + ;; Check if target is within 16-bit Thumb2 b{cond} range. > + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) > + (le (minus (match_dup 0) (pc)) (const_int 256))) > + > + ;; Target is Thumb2, within narrow range. > + ;; Generate b{cond}. > + (const_int 2) > + > + ;; Check if target is within 32-bit Thumb2 b{cond} range. > + (if_then_else (and (ge (minus (match_dup 0) > + (pc))(const_int -1048568)) > + (le (minus (match_dup 0) > + (pc)) (const_int 1048576))) > + > + ;; Target is Thumb2, within wide range. > + ;; Generate b{cond} > + (const_int 4) > + ;; Target is Thumb2, out of range. > + ;; Generate narrow b{cond} and unconditional branch b. > + (const_int 6))))) > + ] > > > Likewise on the indentation. Done, sorry about that! > > ) > > (define_insn "*arm_cond_branch_reversed" > @@ -6972,23 +7011,56 @@ > (pc) > (label_ref (match_operand 0 "" ""))))] > "TARGET_32BIT" > - "* > - if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) > - { > - arm_ccfsm_state += 2; > - return \"\"; > - } > - return \"b%D1\\t%l0\"; > - " > + { > + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) > + { > + arm_ccfsm_state += 2; > + return ""; > + } > + switch (get_attr_length (insn)) > + { > + /* Thumb2 16-bit b{cond}. */ > + case 2: > + > + /* Thumb2 32-bit b{cond} or A32 b{cond}. */ > + case 4: return "b%D1\t%l0"; > + break; > + > + /* Thumb2 b{cond} out of range. Use 16-bit b{cond} and > + unconditional branch b. */ > + default: return arm_gen_far_branch \ > + (operands, 0, "Lbcond", "b%d1\t"); > + } > > > > > + } > [(set_attr "conds" "use") > (set_attr "type" "branch") > (set (attr "length") > - (if_then_else > - (and (match_test "TARGET_THUMB2") > - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) > - (le (minus (match_dup 0) (pc)) (const_int 256)))) > - (const_int 2) > - (const_int 4)))] > + (if_then_else (match_test "!TARGET_THUMB2") > + > + ;;Target is not Thumb2, therefore is A32. Generate b{cond}. > + (const_int 4) > + > + ;; Check if target is within 16-bit Thumb2 b{cond} range. > + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) > + (le (minus (match_dup 0) (pc)) (const_int 256))) > + > + ;; Target is Thumb2, within narrow range. > + ;; Generate b{cond}. > + (const_int 2) > + > + ;; Check if target is within 32-bit Thumb2 b{cond} range. > + (if_then_else (and (ge (minus (match_dup 0) > + (pc))(const_int -1048568)) > + (le (minus (match_dup 0) > + (pc)) (const_int 1048576))) > + > + ;; Target is Thumb2, within wide range. > + ;; Generate b{cond}. > + (const_int 4) > + ;; Target is Thumb2, out of range. > + ;; Generate narrow b{cond} and unconditional branch b. > + (const_int 6))))) > + ] > ) > > > Otherwise this looks reasonable to me. Ramana, did you have any further > comments > on the patch? > Thanks, > Kyrill > > > > diff --git a/gcc/testsuite/gcc.target/arm/pr91816.c > b/gcc/testsuite/gcc.target/arm/pr91816.c > new file mode 100644 > index 00000000000..176bf61780b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/pr91816.c > @@ -0,0 +1,102 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=armv7-a -mthumb -mfpu=vfpv3-d16" } */ > +int printf(const char *, ...); > + > +__attribute__((noinline,noclone)) void f1(int a) > +{ > + if (a) { > +#define HW0 printf("Hello World!\n"); > +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 > +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 > +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 > +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 > +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 > + HW0 > + } > +} > + > +__attribute__((noinline,noclone)) void f2(int a) > +{ > + if (a) { > +#define HW0 printf("Hello World!\n"); > +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 > +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 > +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 > +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 > +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 > + HW3 > + } > +} > + > + > +__attribute__((noinline,noclone)) void f3(int a) > +{ > + if (a) { > +#define HW0 printf("Hello World!\n"); > +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 > +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 > +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 > +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 > +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 > + HW5 > + } > +} > + > +__attribute__((noinline,noclone)) void f4(int a) > +{ > + if (a==1) { > +#define HW0 printf("Hello World!\n"); > +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 > +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 > +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 > +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 > +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 > + HW0 > + } > +} > + > +__attribute__((noinline,noclone)) void f5(int a) > +{ > + if (a==1) { > +#define HW0 printf("Hello World!\n"); > +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 > +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 > +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 > +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 > +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 > + HW3 > + } > +} > + > + > +__attribute__((noinline,noclone)) void f6(int a) > +{ > + if (a==1) { > +#define HW0 printf("Hello World!\n"); > +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 > +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 > +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 > +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 > +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 > + HW5 > + } > +} > + > + > +int main(void) > +{ > + f1(0); > + f2(0); > + f3(0); > + f4(0); > + f5(0); > + f6(0); > + return 0; > +} > + > + > +/* { dg-final { scan-assembler-times "beq\\t.L\[0-9\]" 2 } } */ > +/* { dg-final { scan-assembler-times "beq\\t.Lbcond\[0-9\]" 1 } } */ > +/* { dg-final { scan-assembler-times "bne\\t.L\[0-9\]" 2 } } */ > +/* { dg-final { scan-assembler-times "bne\\t.Lbcond\[0-9\]" 1 } } */ > +/* { dg-final { scan-assembler-times "b\\t.L\[0-9\]" 2 } } */ > >
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 7c4b1003844a1cdbed008e233753663bc8df3beb..8895becc639057b6394df4d3966b960ace5e97db 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -576,4 +576,6 @@ void arm_parse_option_features (sbitmap, const cpu_arch_option *, void arm_initialize_isa (sbitmap, const enum isa_feature *); +const char * arm_gen_far_branch (rtx *, int, const char * , const char *); + #endif /* ! GCC_ARM_PROTOS_H */ diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 07231d722b978b5c99eb5a27d8ad8ece3d6c80fd..ee5de169f3ebdd5e3381144156f136ac5b3af887 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -32626,6 +32626,40 @@ arm_run_selftests (void) } } /* Namespace selftest. */ + +/* Generate code to enable conditional branches in functions over 1 MiB. + Parameters are: + operands: is the operands list of the asm insn (see arm_cond_branch or + arm_cond_branch_reversed). + pos_label: is an index into the operands array where operands[pos_label] is + the asm label of the final jump destination. + dest: is a string which is used to generate the asm label of the intermediate + destination + branch_format: is a string denoting the intermediate branch format, e.g. + "beq", "bne", etc. */ + +const char * +arm_gen_far_branch (rtx * operands, int pos_label, const char * dest, + const char * branch_format) +{ + rtx_code_label * tmp_label = gen_label_rtx (); + char label_buf[256]; + char buffer[128]; + ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \ + CODE_LABEL_NUMBER (tmp_label)); + const char *label_ptr = arm_strip_name_encoding (label_buf); + rtx dest_label = operands[pos_label]; + operands[pos_label] = tmp_label; + + snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr); + output_asm_insn (buffer, operands); + + snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr); + operands[pos_label] = dest_label; + output_asm_insn (buffer, operands); + return ""; +} + #undef TARGET_RUN_TARGET_SELFTESTS #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests #endif /* CHECKING_P */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index f89a2d412df8afe621241958b29a8a7d58dce284..fb1d4547e5c990cc719e2cac5bfe5c2ce8fcb2a3 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -7546,9 +7546,15 @@ ;; And for backward branches we have ;; (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4) + 4). ;; +;; In 16-bit Thumb these ranges are: ;; For a 'b' pos_range = 2046, neg_range = -2048 giving (-2040->2048). ;; For a 'b<cond>' pos_range = 254, neg_range = -256 giving (-250 ->256). +;; In 32-bit Thumb these ranges are: +;; For a 'b' +/- 16MB is not checked for. +;; For a 'b<cond>' pos_range = 1048574, neg_range = -1048576 giving +;; (-1048568 -> 1048576). + (define_expand "cbranchsi4" [(set (pc) (if_then_else (match_operator 0 "expandable_comparison_operator" @@ -7721,23 +7727,50 @@ (label_ref (match_operand 0 "" "")) (pc)))] "TARGET_32BIT" - "* - if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) + { + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) { arm_ccfsm_state += 2; - return \"\"; + return ""; } - return \"b%d1\\t%l0\"; - " + switch (get_attr_length (insn)) + { + case 2: /* Thumb2 16-bit b{cond}. */ + case 4: /* Thumb2 32-bit b{cond} or A32 b{cond}. */ + return "b%d1\t%l0"; + break; + + /* Thumb2 b{cond} out of range. Use 16-bit b{cond} and + unconditional branch b. */ + default: return arm_gen_far_branch (operands, 0, "Lbcond", "b%D1\t"); + } + } [(set_attr "conds" "use") (set_attr "type" "branch") (set (attr "length") - (if_then_else - (and (match_test "TARGET_THUMB2") - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) - (le (minus (match_dup 0) (pc)) (const_int 256)))) - (const_int 2) - (const_int 4)))] + (if_then_else (match_test "!TARGET_THUMB2") + + ;;Target is not Thumb2, therefore is A32. Generate b{cond}. + (const_int 4) + + ;; Check if target is within 16-bit Thumb2 b{cond} range. + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) + (le (minus (match_dup 0) (pc)) (const_int 256))) + + ;; Target is Thumb2, within narrow range. + ;; Generate b{cond}. + (const_int 2) + + ;; Check if target is within 32-bit Thumb2 b{cond} range. + (if_then_else (and (ge (minus (match_dup 0) (pc))(const_int -1048568)) + (le (minus (match_dup 0) (pc)) (const_int 1048576))) + + ;; Target is Thumb2, within wide range. + ;; Generate b{cond} + (const_int 4) + ;; Target is Thumb2, out of range. + ;; Generate narrow b{cond} and unconditional branch b. + (const_int 6)))))] ) (define_insn "*arm_cond_branch_reversed" @@ -7747,23 +7780,50 @@ (pc) (label_ref (match_operand 0 "" ""))))] "TARGET_32BIT" - "* - if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) + { + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) { arm_ccfsm_state += 2; - return \"\"; + return ""; } - return \"b%D1\\t%l0\"; - " + switch (get_attr_length (insn)) + { + case 2: /* Thumb2 16-bit b{cond}. */ + case 4: /* Thumb2 32-bit b{cond} or A32 b{cond}. */ + return "b%D1\t%l0"; + break; + + /* Thumb2 b{cond} out of range. Use 16-bit b{cond} and + unconditional branch b. */ + default: return arm_gen_far_branch (operands, 0, "Lbcond", "b%d1\t"); + } + } [(set_attr "conds" "use") (set_attr "type" "branch") (set (attr "length") - (if_then_else - (and (match_test "TARGET_THUMB2") - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) - (le (minus (match_dup 0) (pc)) (const_int 256)))) - (const_int 2) - (const_int 4)))] + (if_then_else (match_test "!TARGET_THUMB2") + + ;;Target is not Thumb2, therefore is A32. Generate b{cond}. + (const_int 4) + + ;; Check if target is within 16-bit Thumb2 b{cond} range. + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) + (le (minus (match_dup 0) (pc)) (const_int 256))) + + ;; Target is Thumb2, within narrow range. + ;; Generate b{cond}. + (const_int 2) + + ;; Check if target is within 32-bit Thumb2 b{cond} range. + (if_then_else (and (ge (minus (match_dup 0) (pc))(const_int -1048568)) + (le (minus (match_dup 0) (pc)) (const_int 1048576))) + + ;; Target is Thumb2, within wide range. + ;; Generate b{cond}. + (const_int 4) + ;; Target is Thumb2, out of range. + ;; Generate narrow b{cond} and unconditional branch b. + (const_int 6)))))] ) diff --git a/gcc/testsuite/gcc.target/arm/pr91816.c b/gcc/testsuite/gcc.target/arm/pr91816.c new file mode 100644 index 0000000000000000000000000000000000000000..757c897e9c0db32709227b3fdf1b4a8033428232 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr91816.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-options "-march=armv7-a -mthumb -mfpu=vfpv3-d16" } */ +int printf(const char *, ...); + +#define HW0 printf("Hello World!\n"); +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 + +__attribute__((noinline,noclone)) void f1 (int a) +{ + if (a) { HW0 } +} + +__attribute__((noinline,noclone)) void f2 (int a) +{ + if (a) { HW3 } +} + + +__attribute__((noinline,noclone)) void f3 (int a) +{ + if (a) { HW5 } +} + +__attribute__((noinline,noclone)) void f4 (int a) +{ + if (a == 1) { HW0 } +} + +__attribute__((noinline,noclone)) void f5 (int a) +{ + if (a == 1) { HW3 } +} + + +__attribute__((noinline,noclone)) void f6 (int a) +{ + if (a == 1) { HW5 } +} + + +int main(void) +{ + f1(0); + f2(0); + f3(0); + f4(0); + f5(0); + f6(0); + return 0; +} + + +/* { dg-final { scan-assembler-times "beq\\t.L\[0-9\]" 2 } } */ +/* { dg-final { scan-assembler-times "beq\\t.Lbcond\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "bne\\t.L\[0-9\]" 2 } } */ +/* { dg-final { scan-assembler-times "bne\\t.Lbcond\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "b\\t.L\[0-9\]" 2 } } */