On 1/16/20 4:06 PM, Stam Markianos-Wright wrote:
>
>
> On 1/8/20 3:18 PM, Stam Markianos-Wright wrote:
>>
>>
>> On 12/10/19 5:03 PM, Kyrill Tkachov wrote:
>>> Hi Stam,
>>>
>>> On 11/15/19 5:26 PM, Stam Markianos-Wright wrote:
>>>> Pinging with more correct maintainers this time :)
>>>>
>>>> Also would need to backport to gcc7,8,9, but need to get this approved
>>>> first!
>>>>
>>>
>>> Sorry for the delay.
>>
>> Same here now! Sorry totally forget about this in the lead up to Xmas!
>>
>> Done the changes marked below and also removed the unnecessary extra
>> #defines
>> from the test.
>
> Ping :)
>
> Cheers,
> Stam
>
>>
>>>
>>>
>>>> Thank you,
>>>> Stam
>>>>
>>>>
>>>> -------- Forwarded Message --------
>>>> Subject: Re: [PATCH][GCC][ARM] Arm generates out of range conditional
>>>> branches in Thumb2 (PR91816)
>>>> Date: Mon, 21 Oct 2019 10:37:09 +0100
>>>> From: Stam Markianos-Wright <stam.markianos-wri...@arm.com>
>>>> To: Ramana Radhakrishnan <ramana....@googlemail.com>
>>>> CC: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>, nd <n...@arm.com>,
>>>> James Greenhalgh <james.greenha...@arm.com>, Richard Earnshaw
>>>> <richard.earns...@arm.com>
>>>>
>>>>
>>>>
>>>> On 10/13/19 4:23 PM, Ramana Radhakrishnan wrote:
>>>> >>
>>>> >> Patch bootstrapped and regression tested on arm-none-linux-gnueabihf,
>>>> >> however, on my native Aarch32 setup the test times out when run as part
>>>> >> of a big "make check-gcc" regression, but not when run individually.
>>>> >>
>>>> >> 2019-10-11 Stamatis Markianos-Wright <stam.markianos-wri...@arm.com>
>>>> >>
>>>> >> * config/arm/arm.md: Update b<cond> for Thumb2 range checks.
>>>> >> * config/arm/arm.c: New function arm_gen_far_branch.
>>>> >> * config/arm/arm-protos.h: New function arm_gen_far_branch
>>>> >> prototype.
>>>> >>
>>>> >> gcc/testsuite/ChangeLog:
>>>> >>
>>>> >> 2019-10-11 Stamatis Markianos-Wright <stam.markianos-wri...@arm.com>
>>>> >>
>>>> >> * testsuite/gcc.target/arm/pr91816.c: New test.
>>>> >
>>>> >> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
>>>> >> index f995974f9bb..1dce333d1c3 100644
>>>> >> --- a/gcc/config/arm/arm-protos.h
>>>> >> +++ b/gcc/config/arm/arm-protos.h
>>>> >> @@ -570,4 +570,7 @@ void arm_parse_option_features (sbitmap, const
>>>> cpu_arch_option *,
>>>> >>
>>>> >> void arm_initialize_isa (sbitmap, const enum isa_feature *);
>>>> >>
>>>> >> +const char * arm_gen_far_branch (rtx *, int,const char * , const char
>>>> >> *);
>>>> >> +
>>>> >> +
>>>> >
>>>> > Lets get the nits out of the way.
>>>> >
>>>> > Unnecessary extra new line, need a space between int and const above.
>>>> >
>>>> >
>>>>
>>>> .Fixed!
>>>>
>>>> >> #endif /* ! GCC_ARM_PROTOS_H */
>>>> >> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>>>> >> index 39e1a1ef9a2..1a693d2ddca 100644
>>>> >> --- a/gcc/config/arm/arm.c
>>>> >> +++ b/gcc/config/arm/arm.c
>>>> >> @@ -32139,6 +32139,31 @@ arm_run_selftests (void)
>>>> >> }
>>>> >> } /* Namespace selftest. */
>>>> >>
>>>> >> +
>>>> >> +/* Generate code to enable conditional branches in functions over 1
>>>> MiB. */
>>>> >> +const char *
>>>> >> +arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
>>>> >> + const char * branch_format)
>>>> >
>>>> > Not sure if this is some munging from the attachment but check
>>>> > vertical alignment of parameters.
>>>> >
>>>>
>>>> .Fixed!
>>>>
>>>> >> +{
>>>> >> + rtx_code_label * tmp_label = gen_label_rtx ();
>>>> >> + char label_buf[256];
>>>> >> + char buffer[128];
>>>> >> + ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
>>>> >> + CODE_LABEL_NUMBER (tmp_label));
>>>> >> + const char *label_ptr = arm_strip_name_encoding (label_buf);
>>>> >> + rtx dest_label = operands[pos_label];
>>>> >> + operands[pos_label] = tmp_label;
>>>> >> +
>>>> >> + snprintf (buffer, sizeof (buffer), "%s%s", branch_format ,
>>>> >> label_ptr);
>>>> >> + output_asm_insn (buffer, operands);
>>>> >> +
>>>> >> + snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label,
>>>> label_ptr);
>>>> >> + operands[pos_label] = dest_label;
>>>> >> + output_asm_insn (buffer, operands);
>>>> >> + return "";
>>>> >> +}
>>>> >> +
>>>> >> +
>>>> >
>>>> > Unnecessary extra newline.
>>>> >
>>>>
>>>> .Fixed!
>>>>
>>>> >> #undef TARGET_RUN_TARGET_SELFTESTS
>>>> >> #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
>>>> >> #endif /* CHECKING_P */
>>>> >> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
>>>> >> index f861c72ccfc..634fd0a59da 100644
>>>> >> --- a/gcc/config/arm/arm.md
>>>> >> +++ b/gcc/config/arm/arm.md
>>>> >> @@ -6686,9 +6686,16 @@
>>>> >> ;; And for backward branches we have
>>>> >> ;; (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4)
>>>> >>+ 4).
>>>> >> ;;
>>>> >> +;; In 16-bit Thumb these ranges are:
>>>> >> ;; For a 'b' pos_range = 2046, neg_range = -2048 giving
>>>> (-2040->2048).
>>>> >> ;; For a 'b<cond>' pos_range = 254, neg_range = -256 giving (-250
>>>> >>->256).
>>>> >>
>>>> >> +;; In 32-bit Thumb these ranges are:
>>>> >> +;; For a 'b' +/- 16MB is not checked for.
>>>> >> +;; For a 'b<cond>' pos_range = 1048574, neg_range = -1048576 giving
>>>> >> +;; (-1048568 -> 1048576).
>>>> >> +
>>>> >> +
>>>> >
>>>> > Unnecessary extra newline.
>>>> >
>>>>
>>>> .Fixed!
>>>>
>>>> >> (define_expand "cbranchsi4"
>>>> >> [(set (pc) (if_then_else
>>>> >> (match_operator 0 "expandable_comparison_operator"
>>>> >> @@ -6947,22 +6954,42 @@
>>>> >> (pc)))]
>>>> >> "TARGET_32BIT"
>>>> >> "*
>>>> >> - if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>>>> >> - {
>>>> >> - arm_ccfsm_state += 2;
>>>> >> - return \"\";
>>>> >> - }
>>>> >> - return \"b%d1\\t%l0\";
>>>> >> + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>>>> >> + {
>>>> >> + arm_ccfsm_state += 2;
>>>> >> + return \"\";
>>>> >> + }
>>>> >> + switch (get_attr_length (insn))
>>>> >> + {
>>>> >> + // Thumb2 16-bit b{cond}
>>>> >> + case 2:
>>>> >> +
>>>> >> + // Thumb2 32-bit b{cond}
>>>> >> + case 4: return \"b%d1\\t%l0\";break;
>>>> >> +
>>>> >> + // Thumb2 b{cond} out of range. Use unconditional branch.
>>>> >> + case 8: return arm_gen_far_branch \
>>>> >> + (operands, 0, \"Lbcond\", \"b%D1\t\");
>>>> >> + break;
>>>> >> +
>>>> >> + // A32 b{cond}
>>>> >> + default: return \"b%d1\\t%l0\";
>>>> >> + }
>>>> >
>>>> > Please fix indentation here.
>>>> >
>>>>
>>>> .Fixed together with below changes.
>>>>
>>>> >> "
>>>> >> [(set_attr "conds" "use")
>>>> >> (set_attr "type" "branch")
>>>> >> (set (attr "length")
>>>> >> - (if_then_else
>>>> >> - (and (match_test "TARGET_THUMB2")
>>>> >> - (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>>> >> - (le (minus (match_dup 0) (pc)) (const_int 256))))
>>>> >> - (const_int 2)
>>>> >> - (const_int 4)))]
>>>> >> + (if_then_else (match_test "TARGET_THUMB2")
>>>> >> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>>> >> + (le (minus (match_dup 0) (pc)) (const_int 256)))
>>>> >> + (const_int 2)
>>>> >> + (if_then_else (and (ge (minus (match_dup 0) (pc))
>>>> >> + (const_int -1048568))
>>>> >> + (le (minus (match_dup 0) (pc)) (const_int
>>>> >> 1048576)))
>>>> >> + (const_int 4)
>>>> >> + (const_int 8)))
>>>> >> + (const_int 10)))
>>>> >> + ]
>>>> >
>>>> > This conditional is unreadable and is getting quite complex.
>>>> >
>>>> > Please fix the indentation and add some comments to indicate when
>>>> > this is 2, 4, 8, 10 above the pattern and ask for the comment to
>>>> > be in sync with this.
>>>> >
>>>> > How did we end up with length 10 ? That indicates 2 4 byte instructions
>>>> > and a 2 byte instruction ? You are handling lengths 2, 4, 8 above in
>>>> > the switch - is length 10 going to be a single A32 b<cond> instruction ?
>>>> >
>>>> > What am I missing ?
>>>> >
>>>> >
>>>>
>>>> Ah sorry, I had not realised that the "length" related to the number of
>>>> bytes in the instruction, so I just used it as a variable to then check
>>>> in the switch().
>>>> And yes, you are correct in assuming that length 10 would have been the
>>>> A32 b<cond> version.
>>>> So the mapping I had in mind was:
>>>> 2-> Thumb2 b<cond> - narrow 16bit version
>>>> 4-> Thumb2 b<cond> - wide 32bit version
>>>> 8-> Thumb2 b - "far branch".
>>>> 10-> A32 b<cond>
>>>>
>>>> The new version that maintains the "length=number of bytes" would be:
>>>>
>>>> 2-> Thumb2 b<cond> - narrow 16bit version
>>>> 4-> Thumb2 b<cond> - wide 32bit version OR A32 b<cond>
>>>> 6-> Thumb2 "far branch" made up from one b<cond> to a very close Lbcond
>>>> label (so 16 bits) and one b for 32 bits. (so 2+4 == 6)
>>>>
>>>> I've gone ahead and done this in the new proposed patch. Let me know if
>>>> it's ok! (also I changed the first check to !TARGET_THUMB2 - this makes
>>>> it slightly more readable). I'm still not sure about this, so any
>>>> suggestions are welcome!
>>>>
>>>> >
>>>> >> )
>>>> >>
>>>> >> (define_insn "*arm_cond_branch_reversed"
>>>> >> @@ -6978,17 +7005,36 @@
>>>> >> arm_ccfsm_state += 2;
>>>> >> return \"\";
>>>> >> }
>>>> >> - return \"b%D1\\t%l0\";
>>>> >> + switch (get_attr_length (insn))
>>>> >> + {
>>>> >> + // Thumb2 16-bit b{cond}
>>>> >> + case 2:
>>>> >> +
>>>> >> + // Thumb2 32-bit b{cond}
>>>> >> + case 4: return \"b%D1\\t%l0\";break;
>>>> >> +
>>>> >> + // Thumb2 b{cond} out of range. Use unconditional branch.
>>>> >> + case 8: return arm_gen_far_branch \
>>>> >> + (operands, 0, \"Lbcond\", \"b%d1\t\");
>>>> >> + break;
>>>> >> + // A32 b{cond}
>>>> >> + default: return \"b%D1\\t%l0\";
>>>> >> + }
>>>> >> "
>>>> >> [(set_attr "conds" "use")
>>>> >> (set_attr "type" "branch")
>>>> >> (set (attr "length")
>>>> >> - (if_then_else
>>>> >> - (and (match_test "TARGET_THUMB2")
>>>> >> - (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>>> >> - (le (minus (match_dup 0) (pc)) (const_int 256))))
>>>> >> - (const_int 2)
>>>> >> - (const_int 4)))]
>>>> >> + (if_then_else (match_test "TARGET_THUMB2")
>>>> >> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>>> >> + (le (minus (match_dup 0) (pc)) (const_int 256)))
>>>> >> + (const_int 2)
>>>> >> + (if_then_else (and (ge (minus (match_dup 0) (pc))
>>>> >> + (const_int -1048568))
>>>> >> + (le (minus (match_dup 0) (pc)) (const_int 1048576)))
>>>> >> + (const_int 4)
>>>> >> + (const_int 8)))
>>>> >> + (const_int 10)))
>>>> >> + ]
>>>> >
>>>> > Same comments as above apply here too.
>>>> >
>>>>
>>>> Same as above.
>>>>
>>>> Thank you for the feedback and apologies for being a clueless :)
>>>>
>>>> And, of course, let me know of any problems or queries!
>>>>
>>>> Cheers,
>>>> Stam
>>>>
>>>> > Ramana
>>>> >
>>>> >> )
>>>> >>
>>>> >>
>>>> >> diff --git a/gcc/testsuite/gcc.target/arm/pr91816.c
>>>> b/gcc/testsuite/gcc.target/arm/pr91816.c
>>>> >> new file mode 100644
>>>> >> index 00000000000..176bf61780b
>>>> >> --- /dev/null
>>>> >> +++ b/gcc/testsuite/gcc.target/arm/pr91816.c
>>>> >> @@ -0,0 +1,102 @@
>>>> >> +/* { dg-do compile } */
>>>> >> +/* { dg-options "-march=armv7-a -mthumb -mfpu=vfpv3-d16" } */
>>>> >> +int printf(const char *, ...);
>>>> >> +
>>>> >> +__attribute__((noinline,noclone)) void f1(int a)
>>>> >> +{
>>>> >> + if (a) {
>>>> >> +#define HW0 printf("Hello World!\n");
>>>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>>> >> + HW0
>>>> >> + }
>>>> >> +}
>>>> >> +
>>>> >> +__attribute__((noinline,noclone)) void f2(int a)
>>>> >> +{
>>>> >> + if (a) {
>>>> >> +#define HW0 printf("Hello World!\n");
>>>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>>> >> + HW3
>>>> >> + }
>>>> >> +}
>>>> >> +
>>>> >> +
>>>> >> +__attribute__((noinline,noclone)) void f3(int a)
>>>> >> +{
>>>> >> + if (a) {
>>>> >> +#define HW0 printf("Hello World!\n");
>>>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>>> >> + HW5
>>>> >> + }
>>>> >> +}
>>>> >> +
>>>> >> +__attribute__((noinline,noclone)) void f4(int a)
>>>> >> +{
>>>> >> + if (a==1) {
>>>> >> +#define HW0 printf("Hello World!\n");
>>>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>>> >> + HW0
>>>> >> + }
>>>> >> +}
>>>> >> +
>>>> >> +__attribute__((noinline,noclone)) void f5(int a)
>>>> >> +{
>>>> >> + if (a==1) {
>>>> >> +#define HW0 printf("Hello World!\n");
>>>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>>> >> + HW3
>>>> >> + }
>>>> >> +}
>>>> >> +
>>>> >> +
>>>> >> +__attribute__((noinline,noclone)) void f6(int a)
>>>> >> +{
>>>> >> + if (a==1) {
>>>> >> +#define HW0 printf("Hello World!\n");
>>>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>>> >> + HW5
>>>> >> + }
>>>> >> +}
>>>> >> +
>>>> >> +
>>>> >> +int main(void)
>>>> >> +{
>>>> >> + f1(0);
>>>> >> + f2(0);
>>>> >> + f3(0);
>>>> >> + f4(0);
>>>> >> + f5(0);
>>>> >> + f6(0);
>>>> >> + return 0;
>>>> >> +}
>>>> >> +
>>>> >> +
>>>> >> +/* { dg-final { scan-assembler-times "beq\\t.L\[0-9\]" 2 } } */
>>>> >> +/* { dg-final { scan-assembler-times "beq\\t.Lbcond\[0-9\]" 1 } } */
>>>> >> +/* { dg-final { scan-assembler-times "bne\\t.L\[0-9\]" 2 } } */
>>>> >> +/* { dg-final { scan-assembler-times "bne\\t.Lbcond\[0-9\]" 1 } } */
>>>> >> +/* { dg-final { scan-assembler-times "b\\t.L\[0-9\]" 2 } } */
>>>> >
>>>>
>>>
>>> 1.patch
>>>
>>> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
>>> index f995974f9bb..59ec219da3d 100644
>>> --- a/gcc/config/arm/arm-protos.h
>>> +++ b/gcc/config/arm/arm-protos.h
>>> @@ -570,4 +570,6 @@ void arm_parse_option_features (sbitmap, const
>>> cpu_arch_option *,
>>>
>>> void arm_initialize_isa (sbitmap, const enum isa_feature *);
>>>
>>> +const char * arm_gen_far_branch (rtx *, int, const char *, const char *);
>>> +
>>> #endif /* ! GCC_ARM_PROTOS_H */
>>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>>> index 39e1a1ef9a2..7a69ddb6b7b 100644
>>> --- a/gcc/config/arm/arm.c
>>> +++ b/gcc/config/arm/arm.c
>>> @@ -32139,6 +32139,30 @@ arm_run_selftests (void)
>>> }
>>> } /* Namespace selftest. */
>>>
>>> +
>>> +/* Generate code to enable conditional branches in functions over 1 MiB.
>>> */
>>>
>>>
>>> Please document the function parameters in this comment as other functions
>>> in
>>> this file (try to) do.
>>
>> Done :)
>>>
>>>
>>> +const char *
>>> +arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
>>> + const char * branch_format)
>>> +{
>>> + rtx_code_label * tmp_label = gen_label_rtx ();
>>> + char label_buf[256];
>>> + char buffer[128];
>>> + ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
>>> + CODE_LABEL_NUMBER (tmp_label));
>>> + const char *label_ptr = arm_strip_name_encoding (label_buf);
>>> + rtx dest_label = operands[pos_label];
>>> + operands[pos_label] = tmp_label;
>>> +
>>> + snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
>>> + output_asm_insn (buffer, operands);
>>> +
>>> + snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label,
>>> label_ptr);
>>> + operands[pos_label] = dest_label;
>>> + output_asm_insn (buffer, operands);
>>> + return "";
>>> +}
>>> +
>>> #undef TARGET_RUN_TARGET_SELFTESTS
>>> #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
>>> #endif /* CHECKING_P */
>>> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
>>> index f861c72ccfc..7e5e1489214 100644
>>> --- a/gcc/config/arm/arm.md
>>> +++ b/gcc/config/arm/arm.md
>>> @@ -6686,9 +6686,15 @@
>>> ;; And for backward branches we have
>>> ;; (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4) + 4).
>>> ;;
>>> +;; In 16-bit Thumb these ranges are:
>>> ;; For a 'b' pos_range = 2046, neg_range = -2048 giving
>>> (-2040->2048).
>>> ;; For a 'b<cond>' pos_range = 254, neg_range = -256 giving (-250
>>> ->256).
>>>
>>> +;; In 32-bit Thumb these ranges are:
>>> +;; For a 'b' ± 16MB is not checked for.
>>> +;; For a 'b<cond>' pos_range = 1048574, neg_range = -1048576 giving
>>> +;; (-1048568 -> 1048576).
>>> +
>>> (define_expand "cbranchsi4"
>>> [(set (pc) (if_then_else
>>> (match_operator 0 "expandable_comparison_operator"
>>> @@ -6946,23 +6952,56 @@
>>> (label_ref (match_operand 0 "" ""))
>>> (pc)))]
>>> "TARGET_32BIT"
>>> - "*
>>> - if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>>> - {
>>> - arm_ccfsm_state += 2;
>>> - return \"\";
>>> - }
>>> - return \"b%d1\\t%l0\";
>>> - "
>>> + {
>>> + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>>> + {
>>> + arm_ccfsm_state += 2;
>>> + return "";
>>> + }
>>> + switch (get_attr_length (insn))
>>> + {
>>> + /* Thumb2 16-bit b{cond}. */
>>> + case 2:
>>> +
>>> + /* Thumb2 32-bit b{cond} or A32 b{cond}. */
>>> + case 4: return "b%d1\t%l0";
>>> + break;
>>> +
>>> + /* Thumb2 b{cond} out of range. Use 16-bit b{cond} and
>>> + unconditional branch b. */
>>> + default: return arm_gen_far_branch \
>>> + (operands, 0, "Lbcond", "b%D1\t");
>>> + }
>>>
>>>
>>> The indentation here is wrong. Please look at how other switch statements
>>> are
>>> written in the backend for guidance: 2 space indentation, new line after
>>> the
>>> cases etc.
>>
>> Done
>>>
>>> + }
>>> [(set_attr "conds" "use")
>>> (set_attr "type" "branch")
>>> (set (attr "length")
>>> - (if_then_else
>>> - (and (match_test "TARGET_THUMB2")
>>> - (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>> - (le (minus (match_dup 0) (pc)) (const_int 256))))
>>> - (const_int 2)
>>> - (const_int 4)))]
>>> + (if_then_else (match_test "!TARGET_THUMB2")
>>> +
>>> + ;;Target is not Thumb2, therefore is A32. Generate b{cond}.
>>> + (const_int 4)
>>> +
>>> + ;; Check if target is within 16-bit Thumb2 b{cond} range.
>>> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>> + (le (minus (match_dup 0) (pc)) (const_int 256)))
>>> +
>>> + ;; Target is Thumb2, within narrow range.
>>> + ;; Generate b{cond}.
>>> + (const_int 2)
>>> +
>>> + ;; Check if target is within 32-bit Thumb2 b{cond} range.
>>> + (if_then_else (and (ge (minus (match_dup 0)
>>> + (pc))(const_int -1048568))
>>> + (le (minus (match_dup 0)
>>> + (pc)) (const_int 1048576)))
>>> +
>>> + ;; Target is Thumb2, within wide range.
>>> + ;; Generate b{cond}
>>> + (const_int 4)
>>> + ;; Target is Thumb2, out of range.
>>> + ;; Generate narrow b{cond} and unconditional branch b.
>>> + (const_int 6)))))
>>> + ]
>>>
>>>
>>> Likewise on the indentation.
>>
>> Done, sorry about that!
>>>
>>> )
>>>
>>> (define_insn "*arm_cond_branch_reversed"
>>> @@ -6972,23 +7011,56 @@
>>> (pc)
>>> (label_ref (match_operand 0 "" ""))))]
>>> "TARGET_32BIT"
>>> - "*
>>> - if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>>> - {
>>> - arm_ccfsm_state += 2;
>>> - return \"\";
>>> - }
>>> - return \"b%D1\\t%l0\";
>>> - "
>>> + {
>>> + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>>> + {
>>> + arm_ccfsm_state += 2;
>>> + return "";
>>> + }
>>> + switch (get_attr_length (insn))
>>> + {
>>> + /* Thumb2 16-bit b{cond}. */
>>> + case 2:
>>> +
>>> + /* Thumb2 32-bit b{cond} or A32 b{cond}. */
>>> + case 4: return "b%D1\t%l0";
>>> + break;
>>> +
>>> + /* Thumb2 b{cond} out of range. Use 16-bit b{cond} and
>>> + unconditional branch b. */
>>> + default: return arm_gen_far_branch \
>>> + (operands, 0, "Lbcond", "b%d1\t");
>>> + }
>>>
>>>
>>>
>>>
>>> + }
>>> [(set_attr "conds" "use")
>>> (set_attr "type" "branch")
>>> (set (attr "length")
>>> - (if_then_else
>>> - (and (match_test "TARGET_THUMB2")
>>> - (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>> - (le (minus (match_dup 0) (pc)) (const_int 256))))
>>> - (const_int 2)
>>> - (const_int 4)))]
>>> + (if_then_else (match_test "!TARGET_THUMB2")
>>> +
>>> + ;;Target is not Thumb2, therefore is A32. Generate b{cond}.
>>> + (const_int 4)
>>> +
>>> + ;; Check if target is within 16-bit Thumb2 b{cond} range.
>>> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>> + (le (minus (match_dup 0) (pc)) (const_int 256)))
>>> +
>>> + ;; Target is Thumb2, within narrow range.
>>> + ;; Generate b{cond}.
>>> + (const_int 2)
>>> +
>>> + ;; Check if target is within 32-bit Thumb2 b{cond} range.
>>> + (if_then_else (and (ge (minus (match_dup 0)
>>> + (pc))(const_int -1048568))
>>> + (le (minus (match_dup 0)
>>> + (pc)) (const_int 1048576)))
>>> +
>>> + ;; Target is Thumb2, within wide range.
>>> + ;; Generate b{cond}.
>>> + (const_int 4)
>>> + ;; Target is Thumb2, out of range.
>>> + ;; Generate narrow b{cond} and unconditional branch b.
>>> + (const_int 6)))))
>>> + ]
>>> )
>>>
>>>
>>> Otherwise this looks reasonable to me. Ramana, did you have any further
>>> comments on the patch?
>>> Thanks,
>>> Kyrill
>>>
>>>
>>>
>>> diff --git a/gcc/testsuite/gcc.target/arm/pr91816.c
>>> b/gcc/testsuite/gcc.target/arm/pr91816.c
>>> new file mode 100644
>>> index 00000000000..176bf61780b
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/arm/pr91816.c
>>> @@ -0,0 +1,102 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-march=armv7-a -mthumb -mfpu=vfpv3-d16" } */
>>> +int printf(const char *, ...);
>>> +
>>> +__attribute__((noinline,noclone)) void f1(int a)
>>> +{
>>> + if (a) {
>>> +#define HW0 printf("Hello World!\n");
>>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> + HW0
>>> + }
>>> +}
>>> +
>>> +__attribute__((noinline,noclone)) void f2(int a)
>>> +{
>>> + if (a) {
>>> +#define HW0 printf("Hello World!\n");
>>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> + HW3
>>> + }
>>> +}
>>> +
>>> +
>>> +__attribute__((noinline,noclone)) void f3(int a)
>>> +{
>>> + if (a) {
>>> +#define HW0 printf("Hello World!\n");
>>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> + HW5
>>> + }
>>> +}
>>> +
>>> +__attribute__((noinline,noclone)) void f4(int a)
>>> +{
>>> + if (a==1) {
>>> +#define HW0 printf("Hello World!\n");
>>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> + HW0
>>> + }
>>> +}
>>> +
>>> +__attribute__((noinline,noclone)) void f5(int a)
>>> +{
>>> + if (a==1) {
>>> +#define HW0 printf("Hello World!\n");
>>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> + HW3
>>> + }
>>> +}
>>> +
>>> +
>>> +__attribute__((noinline,noclone)) void f6(int a)
>>> +{
>>> + if (a==1) {
>>> +#define HW0 printf("Hello World!\n");
>>> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> + HW5
>>> + }
>>> +}
>>> +
>>> +
>>> +int main(void)
>>> +{
>>> + f1(0);
>>> + f2(0);
>>> + f3(0);
>>> + f4(0);
>>> + f5(0);
>>> + f6(0);
>>> + return 0;
>>> +}
>>> +
>>> +
>>> +/* { dg-final { scan-assembler-times "beq\\t.L\[0-9\]" 2 } } */
>>> +/* { dg-final { scan-assembler-times "beq\\t.Lbcond\[0-9\]" 1 } } */
>>> +/* { dg-final { scan-assembler-times "bne\\t.L\[0-9\]" 2 } } */
>>> +/* { dg-final { scan-assembler-times "bne\\t.Lbcond\[0-9\]" 1 } } */
>>> +/* { dg-final { scan-assembler-times "b\\t.L\[0-9\]" 2 } } */
>>>
>>>