On 11/15/19 5:26 PM, Stam Markianos-Wright wrote: > Pinging with more correct maintainers this time :) > > Also would need to backport to gcc7,8,9, but need to get this approved > first! > > Thank you, > Stam > > > -------- Forwarded Message -------- > Subject: Re: [PATCH][GCC][ARM] Arm generates out of range conditional > branches in Thumb2 (PR91816) > Date: Mon, 21 Oct 2019 10:37:09 +0100 > From: Stam Markianos-Wright <stam.markianos-wri...@arm.com> > To: Ramana Radhakrishnan <ramana....@googlemail.com> > CC: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>, nd <n...@arm.com>, > James Greenhalgh <james.greenha...@arm.com>, Richard Earnshaw > <richard.earns...@arm.com> > > > > On 10/13/19 4:23 PM, Ramana Radhakrishnan wrote: >>> >>> Patch bootstrapped and regression tested on arm-none-linux-gnueabihf, >>> however, on my native Aarch32 setup the test times out when run as part >>> of a big "make check-gcc" regression, but not when run individually. >>> >>> 2019-10-11 Stamatis Markianos-Wright <stam.markianos-wri...@arm.com> >>> >>> * config/arm/arm.md: Update b<cond> for Thumb2 range checks. >>> * config/arm/arm.c: New function arm_gen_far_branch. >>> * config/arm/arm-protos.h: New function arm_gen_far_branch >>> prototype. >>> >>> gcc/testsuite/ChangeLog: >>> >>> 2019-10-11 Stamatis Markianos-Wright <stam.markianos-wri...@arm.com> >>> >>> * testsuite/gcc.target/arm/pr91816.c: New test. >> >>> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h >>> index f995974f9bb..1dce333d1c3 100644 >>> --- a/gcc/config/arm/arm-protos.h >>> +++ b/gcc/config/arm/arm-protos.h >>> @@ -570,4 +570,7 @@ void arm_parse_option_features (sbitmap, const >>> cpu_arch_option *, >>> void arm_initialize_isa (sbitmap, const enum isa_feature *); >>> +const char * arm_gen_far_branch (rtx *, int,const char * , const >>> char *); >>> + >>> + >> >> Lets get the nits out of the way. >> >> Unnecessary extra new line, need a space between int and const above. >> >> > > .Fixed! > >>> #endif /* ! GCC_ARM_PROTOS_H */ >>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c >>> index 39e1a1ef9a2..1a693d2ddca 100644 >>> --- a/gcc/config/arm/arm.c >>> +++ b/gcc/config/arm/arm.c >>> @@ -32139,6 +32139,31 @@ arm_run_selftests (void) >>> } >>> } /* Namespace selftest. */ >>> + >>> +/* Generate code to enable conditional branches in functions over 1 >>> MiB. */ >>> +const char * >>> +arm_gen_far_branch (rtx * operands, int pos_label, const char * dest, >>> + const char * branch_format) >> >> Not sure if this is some munging from the attachment but check >> vertical alignment of parameters. >> > > .Fixed! > >>> +{ >>> + rtx_code_label * tmp_label = gen_label_rtx (); >>> + char label_buf[256]; >>> + char buffer[128]; >>> + ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \ >>> + CODE_LABEL_NUMBER (tmp_label)); >>> + const char *label_ptr = arm_strip_name_encoding (label_buf); >>> + rtx dest_label = operands[pos_label]; >>> + operands[pos_label] = tmp_label; >>> + >>> + snprintf (buffer, sizeof (buffer), "%s%s", branch_format , >>> label_ptr); >>> + output_asm_insn (buffer, operands); >>> + >>> + snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, >>> label_ptr); >>> + operands[pos_label] = dest_label; >>> + output_asm_insn (buffer, operands); >>> + return ""; >>> +} >>> + >>> + >> >> Unnecessary extra newline. >> > > .Fixed! > >>> #undef TARGET_RUN_TARGET_SELFTESTS >>> #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests >>> #endif /* CHECKING_P */ >>> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md >>> index f861c72ccfc..634fd0a59da 100644 >>> --- a/gcc/config/arm/arm.md >>> +++ b/gcc/config/arm/arm.md >>> @@ -6686,9 +6686,16 @@ >>> ;; And for backward branches we have >>> ;; (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or >>> -4) + 4). >>> ;; >>> +;; In 16-bit Thumb these ranges are: >>> ;; For a 'b' pos_range = 2046, neg_range = -2048 giving >>> (-2040->2048). >>> ;; For a 'b<cond>' pos_range = 254, neg_range = -256 giving (-250 >>> ->256). >>> +;; In 32-bit Thumb these ranges are: >>> +;; For a 'b' +/- 16MB is not checked for. >>> +;; For a 'b<cond>' pos_range = 1048574, neg_range = -1048576 giving >>> +;; (-1048568 -> 1048576). >>> + >>> + >> >> Unnecessary extra newline. >> > > .Fixed! > >>> (define_expand "cbranchsi4" >>> [(set (pc) (if_then_else >>> (match_operator 0 "expandable_comparison_operator" >>> @@ -6947,22 +6954,42 @@ >>> (pc)))] >>> "TARGET_32BIT" >>> "* >>> - if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) >>> - { >>> - arm_ccfsm_state += 2; >>> - return \"\"; >>> - } >>> - return \"b%d1\\t%l0\"; >>> + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) >>> + { >>> + arm_ccfsm_state += 2; >>> + return \"\"; >>> + } >>> + switch (get_attr_length (insn)) >>> + { >>> + // Thumb2 16-bit b{cond} >>> + case 2: >>> + >>> + // Thumb2 32-bit b{cond} >>> + case 4: return \"b%d1\\t%l0\";break; >>> + >>> + // Thumb2 b{cond} out of range. Use unconditional branch. >>> + case 8: return arm_gen_far_branch \ >>> + (operands, 0, \"Lbcond\", \"b%D1\t\"); >>> + break; >>> + >>> + // A32 b{cond} >>> + default: return \"b%d1\\t%l0\"; >>> + } >> >> Please fix indentation here. >> > > .Fixed together with below changes. > >>> " >>> [(set_attr "conds" "use") >>> (set_attr "type" "branch") >>> (set (attr "length") >>> - (if_then_else >>> - (and (match_test "TARGET_THUMB2") >>> - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >>> - (le (minus (match_dup 0) (pc)) (const_int 256)))) >>> - (const_int 2) >>> - (const_int 4)))] >>> + (if_then_else (match_test "TARGET_THUMB2") >>> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >>> + (le (minus (match_dup 0) (pc)) (const_int 256))) >>> + (const_int 2) >>> + (if_then_else (and (ge (minus (match_dup 0) (pc)) >>> + (const_int -1048568)) >>> + (le (minus (match_dup 0) (pc)) (const_int 1048576))) >>> + (const_int 4) >>> + (const_int 8))) >>> + (const_int 10))) >>> + ] >> >> This conditional is unreadable and is getting quite complex. >> >> Please fix the indentation and add some comments to indicate when >> this is 2, 4, 8, 10 above the pattern and ask for the comment to >> be in sync with this. >> >> How did we end up with length 10 ? That indicates 2 4 byte instructions >> and a 2 byte instruction ? You are handling lengths 2, 4, 8 above in >> the switch - is length 10 going to be a single A32 b<cond> instruction ? >> >> What am I missing ? >> >> > > Ah sorry, I had not realised that the "length" related to the number of > bytes in the instruction, so I just used it as a variable to then check > in the switch(). > And yes, you are correct in assuming that length 10 would have been the > A32 b<cond> version. > So the mapping I had in mind was: > 2-> Thumb2 b<cond> - narrow 16bit version > 4-> Thumb2 b<cond> - wide 32bit version > 8-> Thumb2 b - "far branch". > 10-> A32 b<cond> > > The new version that maintains the "length=number of bytes" would be: > > 2-> Thumb2 b<cond> - narrow 16bit version > 4-> Thumb2 b<cond> - wide 32bit version OR A32 b<cond> > 6-> Thumb2 "far branch" made up from one b<cond> to a very close Lbcond > label (so 16 bits) and one b for 32 bits. (so 2+4 == 6) > > I've gone ahead and done this in the new proposed patch. Let me know if > it's ok! (also I changed the first check to !TARGET_THUMB2 - this makes > it slightly more readable). I'm still not sure about this, so any > suggestions are welcome!
Ping :) > >> >>> ) >>> (define_insn "*arm_cond_branch_reversed" >>> @@ -6978,17 +7005,36 @@ >>> arm_ccfsm_state += 2; >>> return \"\"; >>> } >>> - return \"b%D1\\t%l0\"; >>> + switch (get_attr_length (insn)) >>> + { >>> + // Thumb2 16-bit b{cond} >>> + case 2: >>> + >>> + // Thumb2 32-bit b{cond} >>> + case 4: return \"b%D1\\t%l0\";break; >>> + >>> + // Thumb2 b{cond} out of range. Use unconditional branch. >>> + case 8: return arm_gen_far_branch \ >>> + (operands, 0, \"Lbcond\", \"b%d1\t\"); >>> + break; >>> + // A32 b{cond} >>> + default: return \"b%D1\\t%l0\"; >>> + } >>> " >>> [(set_attr "conds" "use") >>> (set_attr "type" "branch") >>> (set (attr "length") >>> - (if_then_else >>> - (and (match_test "TARGET_THUMB2") >>> - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >>> - (le (minus (match_dup 0) (pc)) (const_int 256)))) >>> - (const_int 2) >>> - (const_int 4)))] >>> + (if_then_else (match_test "TARGET_THUMB2") >>> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >>> + (le (minus (match_dup 0) (pc)) (const_int 256))) >>> + (const_int 2) >>> + (if_then_else (and (ge (minus (match_dup 0) (pc)) >>> + (const_int -1048568)) >>> + (le (minus (match_dup 0) (pc)) (const_int 1048576))) >>> + (const_int 4) >>> + (const_int 8))) >>> + (const_int 10))) >>> + ] >> >> Same comments as above apply here too. >> > > Same as above. Ping :) > > Thank you for the feedback and apologies for being a clueless :) > > And, of course, let me know of any problems or queries! > > Cheers, > Stam > >> Ramana >> >>> ) >>> >>> diff --git a/gcc/testsuite/gcc.target/arm/pr91816.c >>> b/gcc/testsuite/gcc.target/arm/pr91816.c >>> new file mode 100644 >>> index 00000000000..176bf61780b >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/arm/pr91816.c >>> @@ -0,0 +1,102 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=armv7-a -mthumb -mfpu=vfpv3-d16" } */ >>> +int printf(const char *, ...); >>> + >>> +__attribute__((noinline,noclone)) void f1(int a) >>> +{ >>> + if (a) { >>> +#define HW0 printf("Hello World!\n"); >>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>> + HW0 >>> + } >>> +} >>> + >>> +__attribute__((noinline,noclone)) void f2(int a) >>> +{ >>> + if (a) { >>> +#define HW0 printf("Hello World!\n"); >>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>> + HW3 >>> + } >>> +} >>> + >>> + >>> +__attribute__((noinline,noclone)) void f3(int a) >>> +{ >>> + if (a) { >>> +#define HW0 printf("Hello World!\n"); >>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>> + HW5 >>> + } >>> +} >>> + >>> +__attribute__((noinline,noclone)) void f4(int a) >>> +{ >>> + if (a==1) { >>> +#define HW0 printf("Hello World!\n"); >>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>> + HW0 >>> + } >>> +} >>> + >>> +__attribute__((noinline,noclone)) void f5(int a) >>> +{ >>> + if (a==1) { >>> +#define HW0 printf("Hello World!\n"); >>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>> + HW3 >>> + } >>> +} >>> + >>> + >>> +__attribute__((noinline,noclone)) void f6(int a) >>> +{ >>> + if (a==1) { >>> +#define HW0 printf("Hello World!\n"); >>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>> + HW5 >>> + } >>> +} >>> + >>> + >>> +int main(void) >>> +{ >>> + f1(0); >>> + f2(0); >>> + f3(0); >>> + f4(0); >>> + f5(0); >>> + f6(0); >>> + return 0; >>> +} >>> + >>> + >>> +/* { dg-final { scan-assembler-times "beq\\t.L\[0-9\]" 2 } } */ >>> +/* { dg-final { scan-assembler-times "beq\\t.Lbcond\[0-9\]" 1 } } */ >>> +/* { dg-final { scan-assembler-times "bne\\t.L\[0-9\]" 2 } } */ >>> +/* { dg-final { scan-assembler-times "bne\\t.Lbcond\[0-9\]" 1 } } */ >>> +/* { dg-final { scan-assembler-times "b\\t.L\[0-9\]" 2 } } */ >> > >