Re: [PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2016-07-12 Thread Andre Vieira (lists)
On 11/07/16 18:09, Andre Vieira (lists) wrote:
> On 06/07/16 11:52, Andre Vieira (lists) wrote:
>> On 01/07/16 14:40, Ramana Radhakrishnan wrote:
>>>
>>>
>>> On 13/10/15 18:01, Andre Vieira wrote:
 This patch ports the aeabi_idiv routine from Linaro Cortex-Strings 
 (https://git.linaro.org/toolchain/cortex-strings.git), which was 
 contributed by ARM under Free BSD license.

 The new aeabi_idiv routine is used to replace the one in 
 libgcc/config/arm/lib1funcs.S. This replacement happens within the Thumb1 
 wrapper. The new routine is under LGPLv3 license.
>>>
>>> This is not under LGPLv3 . It is under GPLv3 with the runtime library 
>>> exception license, there's a difference. Assuming your licensing 
>>> expectation is ok  read on for more of a review.
>>>

 The main advantage of this version is that it can improve the performance 
 of the aeabi_idiv function for Thumb1. This solution will also increase 
 the code size. So it will only be used if __OPTIMIZE_SIZE__ is not defined.

 Make check passed for armv6-m.

 libgcc/ChangeLog:
 2015-08-10  Hale Wang  
 Andre Vieira  

   * config/arm/lib1funcs.S: Add new wrapper.

 0001-integer-division.patch


 From 832a3d6af6f06399f70b5a4ac3727d55960c93b7 Mon Sep 17 00:00:00 2001
 From: Andre Simoes Dias Vieira 
 Date: Fri, 21 Aug 2015 14:23:28 +0100
 Subject: [PATCH] new wrapper idivmod

 ---
  libgcc/config/arm/lib1funcs.S | 250 
 --
  1 file changed, 217 insertions(+), 33 deletions(-)

 diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
 index 
 252efcbd5385cc58a5ce1e48c6816d36a6f4c797..c9e544114590da8cde88382bea0f67206e593816
  100644
 --- a/libgcc/config/arm/lib1funcs.S
 +++ b/libgcc/config/arm/lib1funcs.S
 @@ -306,34 +306,12 @@ LSYM(Lend_fde):
  #ifdef __ARM_EABI__
  .macro THUMB_LDIV0 name signed
  #if defined(__ARM_ARCH_6M__)
 -  .ifc \signed, unsigned
 -  cmp r0, #0
 -  beq 1f
 -  mov r0, #0
 -  mvn r0, r0  @ 0x
 -1:
 -  .else
 -  cmp r0, #0
 -  beq 2f
 -  blt 3f
 +
 +  push{r0, lr}
mov r0, #0
 -  mvn r0, r0
 -  lsr r0, r0, #1  @ 0x7fff
 -  b   2f
 -3:mov r0, #0x80
 -  lsl r0, r0, #24 @ 0x8000
 -2:
 -  .endif
 -  push{r0, r1, r2}
 -  ldr r0, 4f
 -  adr r1, 4f
 -  add r0, r1
 -  str r0, [sp, #8]
 -  @ We know we are not on armv4t, so pop pc is safe.
 -  pop {r0, r1, pc}
 -  .align  2
 -4:
 -  .word   __aeabi_idiv0 - 4b
 +  bl  SYM(__aeabi_idiv0)
 +  pop {r1, pc}
 +
>>>
>>> I'd still retain the comment about pop pc here because there's often a 
>>> misconception of merging armv4t and armv6m code.
>>>
  #elif defined(__thumb2__)
.syntax unified
.ifc \signed, unsigned
 @@ -945,7 +923,170 @@ LSYM(Lover7):
add dividend, work
.endif
  LSYM(Lgot_result):
 -.endm 
 +.endm
 +
 +#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
 +/* If performance is preferred, the following functions are provided.  */
 +
>>>
>>> Comment above #if please and also check elsewhere in patch.
>>>
 +/* Branch to div(n), and jump to label if curbit is lo than divisior.  */
 +.macro BranchToDiv n, label
 +  lsr curbit, dividend, \n
 +  cmp curbit, divisor
 +  blo \label
 +.endm
 +
 +/* Body of div(n).  Shift the divisor in n bits and compare the divisor
 +   and dividend.  Update the dividend as the substruction result.  */
 +.macro DoDiv n
 +  lsr curbit, dividend, \n
 +  cmp curbit, divisor
 +  bcc 1f
 +  lsl curbit, divisor, \n
 +  sub dividend, dividend, curbit
 +
 +1:adc result, result
 +.endm
 +
 +/* The body of division with positive divisor.  Unless the divisor is very
 +   big, shift it up in multiples of four bits, since this is the amount of
 +   unwinding in the main division loop.  Continue shifting until the 
 divisor
 +   is larger than the dividend.  */
 +.macro THUMB1_Div_Positive
 +  mov result, #0
 +  BranchToDiv #1, LSYM(Lthumb1_div1)
 +  BranchToDiv #4, LSYM(Lthumb1_div4)
 +  BranchToDiv #8, LSYM(Lthumb1_div8)
 +  BranchToDiv #12, LSYM(Lthumb1_div12)
 +  BranchToDiv #16, LSYM(Lthumb1_div16)
 +LSYM(Lthumb1_div_large_positive):
 +  mov result, #0xff
 +  lsl divisor, divisor, #8
 +  rev result, result
 +  lsr curbit, dividend, #16
 +  cmp curbit, divisor
 +  blo 1f
 +  asr result, #8

Re: [PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2016-07-11 Thread Andre Vieira (lists)
On 06/07/16 11:52, Andre Vieira (lists) wrote:
> On 01/07/16 14:40, Ramana Radhakrishnan wrote:
>>
>>
>> On 13/10/15 18:01, Andre Vieira wrote:
>>> This patch ports the aeabi_idiv routine from Linaro Cortex-Strings 
>>> (https://git.linaro.org/toolchain/cortex-strings.git), which was 
>>> contributed by ARM under Free BSD license.
>>>
>>> The new aeabi_idiv routine is used to replace the one in 
>>> libgcc/config/arm/lib1funcs.S. This replacement happens within the Thumb1 
>>> wrapper. The new routine is under LGPLv3 license.
>>
>> This is not under LGPLv3 . It is under GPLv3 with the runtime library 
>> exception license, there's a difference. Assuming your licensing expectation 
>> is ok  read on for more of a review.
>>
>>>
>>> The main advantage of this version is that it can improve the performance 
>>> of the aeabi_idiv function for Thumb1. This solution will also increase the 
>>> code size. So it will only be used if __OPTIMIZE_SIZE__ is not defined.
>>>
>>> Make check passed for armv6-m.
>>>
>>> libgcc/ChangeLog:
>>> 2015-08-10  Hale Wang  
>>> Andre Vieira  
>>>
>>>   * config/arm/lib1funcs.S: Add new wrapper.
>>>
>>> 0001-integer-division.patch
>>>
>>>
>>> From 832a3d6af6f06399f70b5a4ac3727d55960c93b7 Mon Sep 17 00:00:00 2001
>>> From: Andre Simoes Dias Vieira 
>>> Date: Fri, 21 Aug 2015 14:23:28 +0100
>>> Subject: [PATCH] new wrapper idivmod
>>>
>>> ---
>>>  libgcc/config/arm/lib1funcs.S | 250 
>>> --
>>>  1 file changed, 217 insertions(+), 33 deletions(-)
>>>
>>> diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
>>> index 
>>> 252efcbd5385cc58a5ce1e48c6816d36a6f4c797..c9e544114590da8cde88382bea0f67206e593816
>>>  100644
>>> --- a/libgcc/config/arm/lib1funcs.S
>>> +++ b/libgcc/config/arm/lib1funcs.S
>>> @@ -306,34 +306,12 @@ LSYM(Lend_fde):
>>>  #ifdef __ARM_EABI__
>>>  .macro THUMB_LDIV0 name signed
>>>  #if defined(__ARM_ARCH_6M__)
>>> -   .ifc \signed, unsigned
>>> -   cmp r0, #0
>>> -   beq 1f
>>> -   mov r0, #0
>>> -   mvn r0, r0  @ 0x
>>> -1:
>>> -   .else
>>> -   cmp r0, #0
>>> -   beq 2f
>>> -   blt 3f
>>> +
>>> +   push{r0, lr}
>>> mov r0, #0
>>> -   mvn r0, r0
>>> -   lsr r0, r0, #1  @ 0x7fff
>>> -   b   2f
>>> -3: mov r0, #0x80
>>> -   lsl r0, r0, #24 @ 0x8000
>>> -2:
>>> -   .endif
>>> -   push{r0, r1, r2}
>>> -   ldr r0, 4f
>>> -   adr r1, 4f
>>> -   add r0, r1
>>> -   str r0, [sp, #8]
>>> -   @ We know we are not on armv4t, so pop pc is safe.
>>> -   pop {r0, r1, pc}
>>> -   .align  2
>>> -4:
>>> -   .word   __aeabi_idiv0 - 4b
>>> +   bl  SYM(__aeabi_idiv0)
>>> +   pop {r1, pc}
>>> +
>>
>> I'd still retain the comment about pop pc here because there's often a 
>> misconception of merging armv4t and armv6m code.
>>
>>>  #elif defined(__thumb2__)
>>> .syntax unified
>>> .ifc \signed, unsigned
>>> @@ -945,7 +923,170 @@ LSYM(Lover7):
>>> add dividend, work
>>>.endif
>>>  LSYM(Lgot_result):
>>> -.endm  
>>> +.endm
>>> +
>>> +#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
>>> +/* If performance is preferred, the following functions are provided.  */
>>> +
>>
>> Comment above #if please and also check elsewhere in patch.
>>
>>> +/* Branch to div(n), and jump to label if curbit is lo than divisior.  */
>>> +.macro BranchToDiv n, label
>>> +   lsr curbit, dividend, \n
>>> +   cmp curbit, divisor
>>> +   blo \label
>>> +.endm
>>> +
>>> +/* Body of div(n).  Shift the divisor in n bits and compare the divisor
>>> +   and dividend.  Update the dividend as the substruction result.  */
>>> +.macro DoDiv n
>>> +   lsr curbit, dividend, \n
>>> +   cmp curbit, divisor
>>> +   bcc 1f
>>> +   lsl curbit, divisor, \n
>>> +   sub dividend, dividend, curbit
>>> +
>>> +1: adc result, result
>>> +.endm
>>> +
>>> +/* The body of division with positive divisor.  Unless the divisor is very
>>> +   big, shift it up in multiples of four bits, since this is the amount of
>>> +   unwinding in the main division loop.  Continue shifting until the 
>>> divisor
>>> +   is larger than the dividend.  */
>>> +.macro THUMB1_Div_Positive
>>> +   mov result, #0
>>> +   BranchToDiv #1, LSYM(Lthumb1_div1)
>>> +   BranchToDiv #4, LSYM(Lthumb1_div4)
>>> +   BranchToDiv #8, LSYM(Lthumb1_div8)
>>> +   BranchToDiv #12, LSYM(Lthumb1_div12)
>>> +   BranchToDiv #16, LSYM(Lthumb1_div16)
>>> +LSYM(Lthumb1_div_large_positive):
>>> +   mov result, #0xff
>>> +   lsl divisor, divisor, #8
>>> +   rev result, result
>>> +   lsr curbit, dividend, #16
>>> +   cmp curbit, divisor
>>> +   blo 1f
>>> +   asr result, #8
>>> +   lsl divisor, divisor, #8
>>> +   beq LSYM(Ldivbyzero_waypoint)
>>> +
>>> +1: lsr curbit, dividend, #12
>>> +   cmp curbit, divisor

Re: [PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2016-07-06 Thread Andre Vieira (lists)
On 01/07/16 14:40, Ramana Radhakrishnan wrote:
> 
> 
> On 13/10/15 18:01, Andre Vieira wrote:
>> This patch ports the aeabi_idiv routine from Linaro Cortex-Strings 
>> (https://git.linaro.org/toolchain/cortex-strings.git), which was contributed 
>> by ARM under Free BSD license.
>>
>> The new aeabi_idiv routine is used to replace the one in 
>> libgcc/config/arm/lib1funcs.S. This replacement happens within the Thumb1 
>> wrapper. The new routine is under LGPLv3 license.
> 
> This is not under LGPLv3 . It is under GPLv3 with the runtime library 
> exception license, there's a difference. Assuming your licensing expectation 
> is ok  read on for more of a review.
> 
>>
>> The main advantage of this version is that it can improve the performance of 
>> the aeabi_idiv function for Thumb1. This solution will also increase the 
>> code size. So it will only be used if __OPTIMIZE_SIZE__ is not defined.
>>
>> Make check passed for armv6-m.
>>
>> libgcc/ChangeLog:
>> 2015-08-10  Hale Wang  
>> Andre Vieira  
>>
>>   * config/arm/lib1funcs.S: Add new wrapper.
>>
>> 0001-integer-division.patch
>>
>>
>> From 832a3d6af6f06399f70b5a4ac3727d55960c93b7 Mon Sep 17 00:00:00 2001
>> From: Andre Simoes Dias Vieira 
>> Date: Fri, 21 Aug 2015 14:23:28 +0100
>> Subject: [PATCH] new wrapper idivmod
>>
>> ---
>>  libgcc/config/arm/lib1funcs.S | 250 
>> --
>>  1 file changed, 217 insertions(+), 33 deletions(-)
>>
>> diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
>> index 
>> 252efcbd5385cc58a5ce1e48c6816d36a6f4c797..c9e544114590da8cde88382bea0f67206e593816
>>  100644
>> --- a/libgcc/config/arm/lib1funcs.S
>> +++ b/libgcc/config/arm/lib1funcs.S
>> @@ -306,34 +306,12 @@ LSYM(Lend_fde):
>>  #ifdef __ARM_EABI__
>>  .macro THUMB_LDIV0 name signed
>>  #if defined(__ARM_ARCH_6M__)
>> -.ifc \signed, unsigned
>> -cmp r0, #0
>> -beq 1f
>> -mov r0, #0
>> -mvn r0, r0  @ 0x
>> -1:
>> -.else
>> -cmp r0, #0
>> -beq 2f
>> -blt 3f
>> +
>> +push{r0, lr}
>>  mov r0, #0
>> -mvn r0, r0
>> -lsr r0, r0, #1  @ 0x7fff
>> -b   2f
>> -3:  mov r0, #0x80
>> -lsl r0, r0, #24 @ 0x8000
>> -2:
>> -.endif
>> -push{r0, r1, r2}
>> -ldr r0, 4f
>> -adr r1, 4f
>> -add r0, r1
>> -str r0, [sp, #8]
>> -@ We know we are not on armv4t, so pop pc is safe.
>> -pop {r0, r1, pc}
>> -.align  2
>> -4:
>> -.word   __aeabi_idiv0 - 4b
>> +bl  SYM(__aeabi_idiv0)
>> +pop {r1, pc}
>> +
> 
> I'd still retain the comment about pop pc here because there's often a 
> misconception of merging armv4t and armv6m code.
> 
>>  #elif defined(__thumb2__)
>>  .syntax unified
>>  .ifc \signed, unsigned
>> @@ -945,7 +923,170 @@ LSYM(Lover7):
>>  add dividend, work
>>.endif
>>  LSYM(Lgot_result):
>> -.endm   
>> +.endm
>> +
>> +#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
>> +/* If performance is preferred, the following functions are provided.  */
>> +
> 
> Comment above #if please and also check elsewhere in patch.
> 
>> +/* Branch to div(n), and jump to label if curbit is lo than divisior.  */
>> +.macro BranchToDiv n, label
>> +lsr curbit, dividend, \n
>> +cmp curbit, divisor
>> +blo \label
>> +.endm
>> +
>> +/* Body of div(n).  Shift the divisor in n bits and compare the divisor
>> +   and dividend.  Update the dividend as the substruction result.  */
>> +.macro DoDiv n
>> +lsr curbit, dividend, \n
>> +cmp curbit, divisor
>> +bcc 1f
>> +lsl curbit, divisor, \n
>> +sub dividend, dividend, curbit
>> +
>> +1:  adc result, result
>> +.endm
>> +
>> +/* The body of division with positive divisor.  Unless the divisor is very
>> +   big, shift it up in multiples of four bits, since this is the amount of
>> +   unwinding in the main division loop.  Continue shifting until the divisor
>> +   is larger than the dividend.  */
>> +.macro THUMB1_Div_Positive
>> +mov result, #0
>> +BranchToDiv #1, LSYM(Lthumb1_div1)
>> +BranchToDiv #4, LSYM(Lthumb1_div4)
>> +BranchToDiv #8, LSYM(Lthumb1_div8)
>> +BranchToDiv #12, LSYM(Lthumb1_div12)
>> +BranchToDiv #16, LSYM(Lthumb1_div16)
>> +LSYM(Lthumb1_div_large_positive):
>> +mov result, #0xff
>> +lsl divisor, divisor, #8
>> +rev result, result
>> +lsr curbit, dividend, #16
>> +cmp curbit, divisor
>> +blo 1f
>> +asr result, #8
>> +lsl divisor, divisor, #8
>> +beq LSYM(Ldivbyzero_waypoint)
>> +
>> +1:  lsr curbit, dividend, #12
>> +cmp curbit, divisor
>> +blo LSYM(Lthumb1_div12)
>> +b   LSYM(Lthumb1_div16)
>> +LSYM(Lthumb1_div_loop):
>> +lsr divisor, divisor, 

Re: [PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2016-07-01 Thread Ramana Radhakrishnan


On 13/10/15 18:01, Andre Vieira wrote:
> This patch ports the aeabi_idiv routine from Linaro Cortex-Strings 
> (https://git.linaro.org/toolchain/cortex-strings.git), which was contributed 
> by ARM under Free BSD license.
> 
> The new aeabi_idiv routine is used to replace the one in 
> libgcc/config/arm/lib1funcs.S. This replacement happens within the Thumb1 
> wrapper. The new routine is under LGPLv3 license.

This is not under LGPLv3 . It is under GPLv3 with the runtime library exception 
license, there's a difference. Assuming your licensing expectation is ok  
read on for more of a review.

> 
> The main advantage of this version is that it can improve the performance of 
> the aeabi_idiv function for Thumb1. This solution will also increase the code 
> size. So it will only be used if __OPTIMIZE_SIZE__ is not defined.
> 
> Make check passed for armv6-m.
> 
> libgcc/ChangeLog:
> 2015-08-10  Hale Wang  
> Andre Vieira  
> 
>   * config/arm/lib1funcs.S: Add new wrapper.
> 
> 0001-integer-division.patch
> 
> 
> From 832a3d6af6f06399f70b5a4ac3727d55960c93b7 Mon Sep 17 00:00:00 2001
> From: Andre Simoes Dias Vieira 
> Date: Fri, 21 Aug 2015 14:23:28 +0100
> Subject: [PATCH] new wrapper idivmod
> 
> ---
>  libgcc/config/arm/lib1funcs.S | 250 
> --
>  1 file changed, 217 insertions(+), 33 deletions(-)
> 
> diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
> index 
> 252efcbd5385cc58a5ce1e48c6816d36a6f4c797..c9e544114590da8cde88382bea0f67206e593816
>  100644
> --- a/libgcc/config/arm/lib1funcs.S
> +++ b/libgcc/config/arm/lib1funcs.S
> @@ -306,34 +306,12 @@ LSYM(Lend_fde):
>  #ifdef __ARM_EABI__
>  .macro THUMB_LDIV0 name signed
>  #if defined(__ARM_ARCH_6M__)
> - .ifc \signed, unsigned
> - cmp r0, #0
> - beq 1f
> - mov r0, #0
> - mvn r0, r0  @ 0x
> -1:
> - .else
> - cmp r0, #0
> - beq 2f
> - blt 3f
> +
> + push{r0, lr}
>   mov r0, #0
> - mvn r0, r0
> - lsr r0, r0, #1  @ 0x7fff
> - b   2f
> -3:   mov r0, #0x80
> - lsl r0, r0, #24 @ 0x8000
> -2:
> - .endif
> - push{r0, r1, r2}
> - ldr r0, 4f
> - adr r1, 4f
> - add r0, r1
> - str r0, [sp, #8]
> - @ We know we are not on armv4t, so pop pc is safe.
> - pop {r0, r1, pc}
> - .align  2
> -4:
> - .word   __aeabi_idiv0 - 4b
> + bl  SYM(__aeabi_idiv0)
> + pop {r1, pc}
> +

I'd still retain the comment about pop pc here because there's often a 
misconception of merging armv4t and armv6m code.

>  #elif defined(__thumb2__)
>   .syntax unified
>   .ifc \signed, unsigned
> @@ -945,7 +923,170 @@ LSYM(Lover7):
>   add dividend, work
>.endif
>  LSYM(Lgot_result):
> -.endm
> +.endm
> +
> +#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
> +/* If performance is preferred, the following functions are provided.  */
> +

Comment above #if please and also check elsewhere in patch.

> +/* Branch to div(n), and jump to label if curbit is lo than divisior.  */
> +.macro BranchToDiv n, label
> + lsr curbit, dividend, \n
> + cmp curbit, divisor
> + blo \label
> +.endm
> +
> +/* Body of div(n).  Shift the divisor in n bits and compare the divisor
> +   and dividend.  Update the dividend as the substruction result.  */
> +.macro DoDiv n
> + lsr curbit, dividend, \n
> + cmp curbit, divisor
> + bcc 1f
> + lsl curbit, divisor, \n
> + sub dividend, dividend, curbit
> +
> +1:   adc result, result
> +.endm
> +
> +/* The body of division with positive divisor.  Unless the divisor is very
> +   big, shift it up in multiples of four bits, since this is the amount of
> +   unwinding in the main division loop.  Continue shifting until the divisor
> +   is larger than the dividend.  */
> +.macro THUMB1_Div_Positive
> + mov result, #0
> + BranchToDiv #1, LSYM(Lthumb1_div1)
> + BranchToDiv #4, LSYM(Lthumb1_div4)
> + BranchToDiv #8, LSYM(Lthumb1_div8)
> + BranchToDiv #12, LSYM(Lthumb1_div12)
> + BranchToDiv #16, LSYM(Lthumb1_div16)
> +LSYM(Lthumb1_div_large_positive):
> + mov result, #0xff
> + lsl divisor, divisor, #8
> + rev result, result
> + lsr curbit, dividend, #16
> + cmp curbit, divisor
> + blo 1f
> + asr result, #8
> + lsl divisor, divisor, #8
> + beq LSYM(Ldivbyzero_waypoint)
> +
> +1:   lsr curbit, dividend, #12
> + cmp curbit, divisor
> + blo LSYM(Lthumb1_div12)
> + b   LSYM(Lthumb1_div16)
> +LSYM(Lthumb1_div_loop):
> + lsr divisor, divisor, #8
> +LSYM(Lthumb1_div16):
> + Dodiv   #15
> + Dodiv   #14
> + Dodiv   #13
> + Dodiv   #12
> +LSYM(Lthumb1_div12):
> + 

Re: [PING^5][PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2016-06-23 Thread Andre Vieira (lists)
Ping.

On 08/06/16 15:35, Andre Vieira (lists) wrote:
> Ping.
> 
> On 19/05/16 11:19, Andre Vieira (lists) wrote:
>> Ping for GCC-7, patch applies cleanly, passed make check for cortex-m0.
>>
>> Might be worth mentioning that this patch has been used in three
>> releases of the GNU ARM embedded toolchain, using GCC versions 4.9 and
>> 5, and no issues have been reported so far.
>>
>> On 25/01/16 17:15, Andre Vieira (lists) wrote:
>>> Ping.
>>>
>>> On 27/10/15 17:03, Andre Vieira wrote:
 Ping.

 BR,
 Andre

 On 13/10/15 18:01, Andre Vieira wrote:
> This patch ports the aeabi_idiv routine from Linaro Cortex-Strings
> (https://git.linaro.org/toolchain/cortex-strings.git), which was
> contributed by ARM under Free BSD license.
>
> The new aeabi_idiv routine is used to replace the one in
> libgcc/config/arm/lib1funcs.S. This replacement happens within the
> Thumb1 wrapper. The new routine is under LGPLv3 license.
>
> The main advantage of this version is that it can improve the
> performance of the aeabi_idiv function for Thumb1. This solution will
> also increase the code size. So it will only be used if
> __OPTIMIZE_SIZE__ is not defined.
>
> Make check passed for armv6-m.
>
> libgcc/ChangeLog:
> 2015-08-10  Hale Wang  
>   Andre Vieira  
>
> * config/arm/lib1funcs.S: Add new wrapper.
>
>>>
>>
> 



Re: [PING^4][PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2016-06-08 Thread Andre Vieira (lists)
Ping.

On 19/05/16 11:19, Andre Vieira (lists) wrote:
> Ping for GCC-7, patch applies cleanly, passed make check for cortex-m0.
> 
> Might be worth mentioning that this patch has been used in three
> releases of the GNU ARM embedded toolchain, using GCC versions 4.9 and
> 5, and no issues have been reported so far.
> 
> On 25/01/16 17:15, Andre Vieira (lists) wrote:
>> Ping.
>>
>> On 27/10/15 17:03, Andre Vieira wrote:
>>> Ping.
>>>
>>> BR,
>>> Andre
>>>
>>> On 13/10/15 18:01, Andre Vieira wrote:
 This patch ports the aeabi_idiv routine from Linaro Cortex-Strings
 (https://git.linaro.org/toolchain/cortex-strings.git), which was
 contributed by ARM under Free BSD license.

 The new aeabi_idiv routine is used to replace the one in
 libgcc/config/arm/lib1funcs.S. This replacement happens within the
 Thumb1 wrapper. The new routine is under LGPLv3 license.

 The main advantage of this version is that it can improve the
 performance of the aeabi_idiv function for Thumb1. This solution will
 also increase the code size. So it will only be used if
 __OPTIMIZE_SIZE__ is not defined.

 Make check passed for armv6-m.

 libgcc/ChangeLog:
 2015-08-10  Hale Wang  
   Andre Vieira  

 * config/arm/lib1funcs.S: Add new wrapper.

>>
> 



[PING^3][PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2016-05-19 Thread Andre Vieira (lists)
Ping for GCC-7, patch applies cleanly, passed make check for cortex-m0.

Might be worth mentioning that this patch has been used in three
releases of the GNU ARM embedded toolchain, using GCC versions 4.9 and
5, and no issues have been reported so far.

On 25/01/16 17:15, Andre Vieira (lists) wrote:
> Ping.
> 
> On 27/10/15 17:03, Andre Vieira wrote:
>> Ping.
>>
>> BR,
>> Andre
>>
>> On 13/10/15 18:01, Andre Vieira wrote:
>>> This patch ports the aeabi_idiv routine from Linaro Cortex-Strings
>>> (https://git.linaro.org/toolchain/cortex-strings.git), which was
>>> contributed by ARM under Free BSD license.
>>>
>>> The new aeabi_idiv routine is used to replace the one in
>>> libgcc/config/arm/lib1funcs.S. This replacement happens within the
>>> Thumb1 wrapper. The new routine is under LGPLv3 license.
>>>
>>> The main advantage of this version is that it can improve the
>>> performance of the aeabi_idiv function for Thumb1. This solution will
>>> also increase the code size. So it will only be used if
>>> __OPTIMIZE_SIZE__ is not defined.
>>>
>>> Make check passed for armv6-m.
>>>
>>> libgcc/ChangeLog:
>>> 2015-08-10  Hale Wang  
>>>   Andre Vieira  
>>>
>>> * config/arm/lib1funcs.S: Add new wrapper.
>>>
> 



Re: [PING^2][PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2016-01-25 Thread Andre Vieira (lists)

Ping.

On 27/10/15 17:03, Andre Vieira wrote:

Ping.

BR,
Andre

On 13/10/15 18:01, Andre Vieira wrote:

This patch ports the aeabi_idiv routine from Linaro Cortex-Strings
(https://git.linaro.org/toolchain/cortex-strings.git), which was
contributed by ARM under Free BSD license.

The new aeabi_idiv routine is used to replace the one in
libgcc/config/arm/lib1funcs.S. This replacement happens within the
Thumb1 wrapper. The new routine is under LGPLv3 license.

The main advantage of this version is that it can improve the
performance of the aeabi_idiv function for Thumb1. This solution will
also increase the code size. So it will only be used if
__OPTIMIZE_SIZE__ is not defined.

Make check passed for armv6-m.

libgcc/ChangeLog:
2015-08-10  Hale Wang  
  Andre Vieira  

* config/arm/lib1funcs.S: Add new wrapper.



[arm-embedded][PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2015-12-07 Thread Thomas Preud'homme
We decided to apply this to ARM/embedded-5-branch.

Best regards,

Thomas

> -Original Message-
> From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-
> ow...@gcc.gnu.org] On Behalf Of Andre Vieira
> Sent: Wednesday, October 28, 2015 1:03 AM
> To: gcc-patches@gcc.gnu.org
> Subject: Re: [PING][PATCHv2, ARM, libgcc] New aeabi_idiv function for
> armv6-m
> 
> Ping.
> 
> BR,
> Andre
> 
> On 13/10/15 18:01, Andre Vieira wrote:
> > This patch ports the aeabi_idiv routine from Linaro Cortex-Strings
> > (https://git.linaro.org/toolchain/cortex-strings.git), which was
> > contributed by ARM under Free BSD license.
> >
> > The new aeabi_idiv routine is used to replace the one in
> > libgcc/config/arm/lib1funcs.S. This replacement happens within the
> > Thumb1 wrapper. The new routine is under LGPLv3 license.
> >
> > The main advantage of this version is that it can improve the
> > performance of the aeabi_idiv function for Thumb1. This solution will
> > also increase the code size. So it will only be used if
> > __OPTIMIZE_SIZE__ is not defined.
> >
> > Make check passed for armv6-m.
> >
> > libgcc/ChangeLog:
> > 2015-08-10  Hale Wang  <hale.w...@arm.com>
> >   Andre Vieira  <andre.simoesdiasvie...@arm.com>
> >
> > * config/arm/lib1funcs.S: Add new wrapper.
> >





Re: [PING][PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2015-10-27 Thread Andre Vieira

Ping.

BR,
Andre

On 13/10/15 18:01, Andre Vieira wrote:

This patch ports the aeabi_idiv routine from Linaro Cortex-Strings
(https://git.linaro.org/toolchain/cortex-strings.git), which was
contributed by ARM under Free BSD license.

The new aeabi_idiv routine is used to replace the one in
libgcc/config/arm/lib1funcs.S. This replacement happens within the
Thumb1 wrapper. The new routine is under LGPLv3 license.

The main advantage of this version is that it can improve the
performance of the aeabi_idiv function for Thumb1. This solution will
also increase the code size. So it will only be used if
__OPTIMIZE_SIZE__ is not defined.

Make check passed for armv6-m.

libgcc/ChangeLog:
2015-08-10  Hale Wang  
  Andre Vieira  

* config/arm/lib1funcs.S: Add new wrapper.





[PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2015-10-13 Thread Andre Vieira
This patch ports the aeabi_idiv routine from Linaro Cortex-Strings 
(https://git.linaro.org/toolchain/cortex-strings.git), which was 
contributed by ARM under Free BSD license.


The new aeabi_idiv routine is used to replace the one in 
libgcc/config/arm/lib1funcs.S. This replacement happens within the 
Thumb1 wrapper. The new routine is under LGPLv3 license.


The main advantage of this version is that it can improve the 
performance of the aeabi_idiv function for Thumb1. This solution will 
also increase the code size. So it will only be used if 
__OPTIMIZE_SIZE__ is not defined.


Make check passed for armv6-m.

libgcc/ChangeLog:
2015-08-10  Hale Wang  
Andre Vieira  

  * config/arm/lib1funcs.S: Add new wrapper.
From 832a3d6af6f06399f70b5a4ac3727d55960c93b7 Mon Sep 17 00:00:00 2001
From: Andre Simoes Dias Vieira 
Date: Fri, 21 Aug 2015 14:23:28 +0100
Subject: [PATCH] new wrapper idivmod

---
 libgcc/config/arm/lib1funcs.S | 250 --
 1 file changed, 217 insertions(+), 33 deletions(-)

diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
index 252efcbd5385cc58a5ce1e48c6816d36a6f4c797..c9e544114590da8cde88382bea0f67206e593816 100644
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@@ -306,34 +306,12 @@ LSYM(Lend_fde):
 #ifdef __ARM_EABI__
 .macro THUMB_LDIV0 name signed
 #if defined(__ARM_ARCH_6M__)
-	.ifc \signed, unsigned
-	cmp	r0, #0
-	beq	1f
-	mov	r0, #0
-	mvn	r0, r0		@ 0x
-1:
-	.else
-	cmp	r0, #0
-	beq	2f
-	blt	3f
+
+	push	{r0, lr}
 	mov	r0, #0
-	mvn	r0, r0
-	lsr	r0, r0, #1	@ 0x7fff
-	b	2f
-3:	mov	r0, #0x80
-	lsl	r0, r0, #24	@ 0x8000
-2:
-	.endif
-	push	{r0, r1, r2}
-	ldr	r0, 4f
-	adr	r1, 4f
-	add	r0, r1
-	str	r0, [sp, #8]
-	@ We know we are not on armv4t, so pop pc is safe.
-	pop	{r0, r1, pc}
-	.align	2
-4:
-	.word	__aeabi_idiv0 - 4b
+	bl	SYM(__aeabi_idiv0)
+	pop	{r1, pc}
+
 #elif defined(__thumb2__)
 	.syntax unified
 	.ifc \signed, unsigned
@@ -945,7 +923,170 @@ LSYM(Lover7):
 	add	dividend, work
   .endif
 LSYM(Lgot_result):
-.endm	
+.endm
+
+#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
+/* If performance is preferred, the following functions are provided.  */
+
+/* Branch to div(n), and jump to label if curbit is lo than divisior.  */
+.macro BranchToDiv n, label
+	lsr	curbit, dividend, \n
+	cmp	curbit, divisor
+	blo	\label
+.endm
+
+/* Body of div(n).  Shift the divisor in n bits and compare the divisor
+   and dividend.  Update the dividend as the substruction result.  */
+.macro DoDiv n
+	lsr	curbit, dividend, \n
+	cmp	curbit, divisor
+	bcc	1f
+	lsl	curbit, divisor, \n
+	sub	dividend, dividend, curbit
+
+1:	adc	result, result
+.endm
+
+/* The body of division with positive divisor.  Unless the divisor is very
+   big, shift it up in multiples of four bits, since this is the amount of
+   unwinding in the main division loop.  Continue shifting until the divisor
+   is larger than the dividend.  */
+.macro THUMB1_Div_Positive
+	mov	result, #0
+	BranchToDiv #1, LSYM(Lthumb1_div1)
+	BranchToDiv #4, LSYM(Lthumb1_div4)
+	BranchToDiv #8, LSYM(Lthumb1_div8)
+	BranchToDiv #12, LSYM(Lthumb1_div12)
+	BranchToDiv #16, LSYM(Lthumb1_div16)
+LSYM(Lthumb1_div_large_positive):
+	mov	result, #0xff
+	lsl	divisor, divisor, #8
+	rev	result, result
+	lsr	curbit, dividend, #16
+	cmp	curbit, divisor
+	blo	1f
+	asr	result, #8
+	lsl	divisor, divisor, #8
+	beq	LSYM(Ldivbyzero_waypoint)
+
+1:	lsr	curbit, dividend, #12
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb1_div12)
+	b	LSYM(Lthumb1_div16)
+LSYM(Lthumb1_div_loop):
+	lsr	divisor, divisor, #8
+LSYM(Lthumb1_div16):
+	Dodiv	#15
+	Dodiv	#14
+	Dodiv	#13
+	Dodiv	#12
+LSYM(Lthumb1_div12):
+	Dodiv	#11
+	Dodiv	#10
+	Dodiv	#9
+	Dodiv	#8
+	bcs	LSYM(Lthumb1_div_loop)
+LSYM(Lthumb1_div8):
+	Dodiv	#7
+	Dodiv	#6
+	Dodiv	#5
+LSYM(Lthumb1_div5):
+	Dodiv	#4
+LSYM(Lthumb1_div4):
+	Dodiv	#3
+LSYM(Lthumb1_div3):
+	Dodiv	#2
+LSYM(Lthumb1_div2):
+	Dodiv	#1
+LSYM(Lthumb1_div1):
+	sub	divisor, dividend, divisor
+	bcs	1f
+	cpy	divisor, dividend
+
+1:	adc	result, result
+	cpy	dividend, result
+	RET
+
+LSYM(Ldivbyzero_waypoint):
+	b	LSYM(Ldiv0)
+.endm
+
+/* The body of division with negative divisor.  Similar with
+   THUMB1_Div_Positive except that the shift steps are in multiples
+   of six bits.  */
+.macro THUMB1_Div_Negative
+	lsr	result, divisor, #31
+	beq	1f
+	neg	divisor, divisor
+
+1:	asr	curbit, dividend, #32
+	bcc	2f
+	neg	dividend, dividend
+
+2:	eor	curbit, result
+	mov	result, #0
+	cpy	ip, curbit
+	BranchToDiv #4, LSYM(Lthumb1_div_negative4)
+	BranchToDiv #8, LSYM(Lthumb1_div_negative8)
+LSYM(Lthumb1_div_large):
+	mov	result, #0xfc
+	lsl	divisor, divisor, #6
+	rev	result, result
+	lsr	curbit, dividend, #8
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb1_div_negative8)
+
+	lsl	divisor, divisor, #6
+	asr	result, result, #6
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb1_div_negative8)
+
+	lsl	divisor,