On 11/15/19 5:26 PM, Stam Markianos-Wright wrote:
> Pinging with more correct maintainers this time :)
> 
> Also would need to backport to gcc7,8,9, but need to get this approved 
> first!
> 
> Thank you,
> Stam
> 
> 
> -------- Forwarded Message --------
> Subject: Re: [PATCH][GCC][ARM] Arm generates out of range conditional 
> branches in Thumb2 (PR91816)
> Date: Mon, 21 Oct 2019 10:37:09 +0100
> From: Stam Markianos-Wright <stam.markianos-wri...@arm.com>
> To: Ramana Radhakrishnan <ramana....@googlemail.com>
> CC: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>, nd <n...@arm.com>, 
> James Greenhalgh <james.greenha...@arm.com>, Richard Earnshaw 
> <richard.earns...@arm.com>
> 
> 
> 
> On 10/13/19 4:23 PM, Ramana Radhakrishnan wrote:
>>>
>>> Patch bootstrapped and regression tested on arm-none-linux-gnueabihf,
>>> however, on my native Aarch32 setup the test times out when run as part
>>> of a big "make check-gcc" regression, but not when run individually.
>>>
>>> 2019-10-11  Stamatis Markianos-Wright <stam.markianos-wri...@arm.com>
>>>
>>>     * config/arm/arm.md: Update b<cond> for Thumb2 range checks.
>>>     * config/arm/arm.c: New function arm_gen_far_branch.
>>>        * config/arm/arm-protos.h: New function arm_gen_far_branch
>>>     prototype.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> 2019-10-11  Stamatis Markianos-Wright <stam.markianos-wri...@arm.com>
>>>
>>>        * testsuite/gcc.target/arm/pr91816.c: New test.
>>
>>> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
>>> index f995974f9bb..1dce333d1c3 100644
>>> --- a/gcc/config/arm/arm-protos.h
>>> +++ b/gcc/config/arm/arm-protos.h
>>> @@ -570,4 +570,7 @@ void arm_parse_option_features (sbitmap, const 
>>> cpu_arch_option *,
>>>   void arm_initialize_isa (sbitmap, const enum isa_feature *);
>>> +const char * arm_gen_far_branch (rtx *, int,const char * , const 
>>> char *);
>>> +
>>> +
>>
>> Lets get the nits out of the way.
>>
>> Unnecessary extra new line, need a space between int and const above.
>>
>>
> 
> .Fixed!
> 
>>>   #endif /* ! GCC_ARM_PROTOS_H */
>>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>>> index 39e1a1ef9a2..1a693d2ddca 100644
>>> --- a/gcc/config/arm/arm.c
>>> +++ b/gcc/config/arm/arm.c
>>> @@ -32139,6 +32139,31 @@ arm_run_selftests (void)
>>>   }
>>>   } /* Namespace selftest.  */
>>> +
>>> +/* Generate code to enable conditional branches in functions over 1 
>>> MiB.  */
>>> +const char *
>>> +arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
>>> +            const char * branch_format)
>>
>> Not sure if this is some munging from the attachment but check
>> vertical alignment of parameters.
>>
> 
> .Fixed!
> 
>>> +{
>>> +  rtx_code_label * tmp_label = gen_label_rtx ();
>>> +  char label_buf[256];
>>> +  char buffer[128];
>>> +  ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
>>> +            CODE_LABEL_NUMBER (tmp_label));
>>> +  const char *label_ptr = arm_strip_name_encoding (label_buf);
>>> +  rtx dest_label = operands[pos_label];
>>> +  operands[pos_label] = tmp_label;
>>> +
>>> +  snprintf (buffer, sizeof (buffer), "%s%s", branch_format , 
>>> label_ptr);
>>> +  output_asm_insn (buffer, operands);
>>> +
>>> +  snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, 
>>> label_ptr);
>>> +  operands[pos_label] = dest_label;
>>> +  output_asm_insn (buffer, operands);
>>> +  return "";
>>> +}
>>> +
>>> +
>>
>> Unnecessary extra newline.
>>
> 
> .Fixed!
> 
>>>   #undef TARGET_RUN_TARGET_SELFTESTS
>>>   #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
>>>   #endif /* CHECKING_P */
>>> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
>>> index f861c72ccfc..634fd0a59da 100644
>>> --- a/gcc/config/arm/arm.md
>>> +++ b/gcc/config/arm/arm.md
>>> @@ -6686,9 +6686,16 @@
>>>   ;; And for backward branches we have
>>>   ;;   (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or 
>>> -4) + 4).
>>>   ;;
>>> +;; In 16-bit Thumb these ranges are:
>>>   ;; For a 'b'       pos_range = 2046, neg_range = -2048 giving 
>>> (-2040->2048).
>>>   ;; For a 'b<cond>' pos_range = 254,  neg_range = -256  giving (-250 
>>> ->256).
>>> +;; In 32-bit Thumb these ranges are:
>>> +;; For a 'b'       +/- 16MB is not checked for.
>>> +;; For a 'b<cond>' pos_range = 1048574,  neg_range = -1048576  giving
>>> +;; (-1048568 -> 1048576).
>>> +
>>> +
>>
>> Unnecessary extra newline.
>>
> 
> .Fixed!
> 
>>>   (define_expand "cbranchsi4"
>>>     [(set (pc) (if_then_else
>>>             (match_operator 0 "expandable_comparison_operator"
>>> @@ -6947,22 +6954,42 @@
>>>                 (pc)))]
>>>     "TARGET_32BIT"
>>>     "*
>>> -  if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>>> -    {
>>> -      arm_ccfsm_state += 2;
>>> -      return \"\";
>>> -    }
>>> -  return \"b%d1\\t%l0\";
>>> +     if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>>> +      {
>>> +    arm_ccfsm_state += 2;
>>> +    return \"\";
>>> +      }
>>> +     switch (get_attr_length (insn))
>>> +      {
>>> +    // Thumb2 16-bit b{cond}
>>> +    case 2:
>>> +
>>> +    // Thumb2 32-bit b{cond}
>>> +    case 4: return \"b%d1\\t%l0\";break;
>>> +
>>> +    // Thumb2 b{cond} out of range.  Use unconditional branch.
>>> +    case 8: return arm_gen_far_branch \
>>> +        (operands, 0, \"Lbcond\", \"b%D1\t\");
>>> +    break;
>>> +
>>> +    // A32 b{cond}
>>> +    default: return \"b%d1\\t%l0\";
>>> +      }
>>
>> Please fix indentation here.
>>
> 
> .Fixed together with below changes.
> 
>>>     "
>>>     [(set_attr "conds" "use")
>>>      (set_attr "type" "branch")
>>>      (set (attr "length")
>>> -    (if_then_else
>>> -       (and (match_test "TARGET_THUMB2")
>>> -        (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>> -             (le (minus (match_dup 0) (pc)) (const_int 256))))
>>> -       (const_int 2)
>>> -       (const_int 4)))]
>>> +    (if_then_else (match_test "TARGET_THUMB2")
>>> +    (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>> +    (le (minus (match_dup 0) (pc)) (const_int 256)))
>>> +    (const_int 2)
>>> +    (if_then_else (and (ge (minus (match_dup 0) (pc))
>>> +                        (const_int -1048568))
>>> +            (le (minus (match_dup 0) (pc)) (const_int 1048576)))
>>> +    (const_int 4)
>>> +    (const_int 8)))
>>> +    (const_int 10)))
>>> +   ]
>>
>> This conditional is unreadable and is getting quite complex.
>>
>> Please fix the indentation and add some comments to indicate when
>> this is 2, 4, 8, 10 above the pattern and ask for the comment to
>> be in sync with this.
>>
>> How did we end up with length 10 ? That indicates 2 4 byte instructions
>> and a 2 byte instruction ? You are handling lengths 2, 4, 8 above in
>> the switch - is length 10 going to be a single A32 b<cond> instruction ?
>>
>> What am I missing ?
>>
>>
> 
> Ah sorry, I had not realised that the "length" related to the number of 
> bytes in the instruction, so I just used it as a variable to then check 
> in the switch().
> And yes, you are correct in assuming that length 10 would have been the 
> A32 b<cond> version.
> So the mapping I had in mind was:
> 2->  Thumb2 b<cond> - narrow 16bit version
> 4->  Thumb2 b<cond> - wide 32bit version
> 8->  Thumb2 b       - "far branch".
> 10-> A32 b<cond>
> 
> The new version that maintains the "length=number of bytes" would be:
> 
> 2->  Thumb2 b<cond> - narrow 16bit version
> 4->  Thumb2 b<cond> - wide 32bit version OR A32 b<cond>
> 6->  Thumb2 "far branch" made up from one b<cond> to a very close Lbcond 
> label (so 16 bits) and one b for 32 bits. (so 2+4 == 6)
> 
> I've gone ahead and done this in the new proposed patch. Let me know if 
> it's ok! (also I changed the first check to !TARGET_THUMB2 - this makes 
> it slightly more readable). I'm still not sure about this, so any 
> suggestions are welcome!

Ping :)

> 
>>
>>>   )
>>>   (define_insn "*arm_cond_branch_reversed"
>>> @@ -6978,17 +7005,36 @@
>>>         arm_ccfsm_state += 2;
>>>         return \"\";
>>>       }
>>> -  return \"b%D1\\t%l0\";
>>> +     switch (get_attr_length (insn))
>>> +      {
>>> +    // Thumb2 16-bit b{cond}
>>> +    case 2:
>>> +
>>> +    // Thumb2 32-bit b{cond}
>>> +    case 4: return \"b%D1\\t%l0\";break;
>>> +
>>> +    // Thumb2 b{cond} out of range.  Use unconditional branch.
>>> +    case 8: return arm_gen_far_branch \
>>> +        (operands, 0, \"Lbcond\", \"b%d1\t\");
>>> +        break;
>>> +    // A32 b{cond}
>>> +    default: return \"b%D1\\t%l0\";
>>> +       }
>>>     "
>>>     [(set_attr "conds" "use")
>>>      (set_attr "type" "branch")
>>>      (set (attr "length")
>>> -    (if_then_else
>>> -       (and (match_test "TARGET_THUMB2")
>>> -        (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>> -             (le (minus (match_dup 0) (pc)) (const_int 256))))
>>> -       (const_int 2)
>>> -       (const_int 4)))]
>>> +    (if_then_else (match_test "TARGET_THUMB2")
>>> +    (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>> +        (le (minus (match_dup 0) (pc)) (const_int 256)))
>>> +    (const_int 2)
>>> +    (if_then_else (and (ge (minus (match_dup 0) (pc))
>>> +                            (const_int -1048568))
>>> +        (le (minus (match_dup 0) (pc)) (const_int 1048576)))
>>> +    (const_int 4)
>>> +    (const_int 8)))
>>> +    (const_int 10)))
>>> +   ]
>>
>> Same comments as above apply here too.
>>
> 
> Same as above.

Ping :)

> 
> Thank you for the feedback and apologies for being a clueless :)
> 
> And, of course, let me know of any problems or queries!
> 
> Cheers,
> Stam
> 
>> Ramana
>>
>>>   )
>>>   
>>> diff --git a/gcc/testsuite/gcc.target/arm/pr91816.c 
>>> b/gcc/testsuite/gcc.target/arm/pr91816.c
>>> new file mode 100644
>>> index 00000000000..176bf61780b
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/arm/pr91816.c
>>> @@ -0,0 +1,102 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-march=armv7-a -mthumb -mfpu=vfpv3-d16" }  */
>>> +int printf(const char *, ...);
>>> +
>>> +__attribute__((noinline,noclone)) void f1(int a)
>>> +{
>>> +    if (a) {
>>> +#define HW0    printf("Hello World!\n");
>>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> +        HW0
>>> +    }
>>> +}
>>> +
>>> +__attribute__((noinline,noclone)) void f2(int a)
>>> +{
>>> +    if (a) {
>>> +#define HW0    printf("Hello World!\n");
>>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> +        HW3
>>> +    }
>>> +}
>>> +
>>> +
>>> +__attribute__((noinline,noclone)) void f3(int a)
>>> +{
>>> +    if (a) {
>>> +#define HW0    printf("Hello World!\n");
>>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> +        HW5
>>> +    }
>>> +}
>>> +
>>> +__attribute__((noinline,noclone)) void f4(int a)
>>> +{
>>> +    if (a==1) {
>>> +#define HW0    printf("Hello World!\n");
>>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> +        HW0
>>> +    }
>>> +}
>>> +
>>> +__attribute__((noinline,noclone)) void f5(int a)
>>> +{
>>> +    if (a==1) {
>>> +#define HW0    printf("Hello World!\n");
>>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> +        HW3
>>> +    }
>>> +}
>>> +
>>> +
>>> +__attribute__((noinline,noclone)) void f6(int a)
>>> +{
>>> +    if (a==1) {
>>> +#define HW0    printf("Hello World!\n");
>>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> +        HW5
>>> +    }
>>> +}
>>> +
>>> +
>>> +int main(void)
>>> +{
>>> +    f1(0);
>>> +    f2(0);
>>> +    f3(0);
>>> +    f4(0);
>>> +    f5(0);
>>> +    f6(0);
>>> +    return 0;
>>> +}
>>> +
>>> +
>>> +/* { dg-final { scan-assembler-times "beq\\t.L\[0-9\]" 2 } } */
>>> +/* { dg-final { scan-assembler-times "beq\\t.Lbcond\[0-9\]" 1 } } */
>>> +/* { dg-final { scan-assembler-times "bne\\t.L\[0-9\]" 2 } } */
>>> +/* { dg-final { scan-assembler-times "bne\\t.Lbcond\[0-9\]" 1 } } */
>>> +/* { dg-final { scan-assembler-times "b\\t.L\[0-9\]" 2 } } */
>>
> 
> 

Reply via email to