On 01/07/16 14:40, Ramana Radhakrishnan wrote: > > > On 13/10/15 18:01, Andre Vieira wrote: >> This patch ports the aeabi_idiv routine from Linaro Cortex-Strings >> (https://git.linaro.org/toolchain/cortex-strings.git), which was contributed >> by ARM under Free BSD license. >> >> The new aeabi_idiv routine is used to replace the one in >> libgcc/config/arm/lib1funcs.S. This replacement happens within the Thumb1 >> wrapper. The new routine is under LGPLv3 license. > > This is not under LGPLv3 . It is under GPLv3 with the runtime library > exception license, there's a difference. Assuming your licensing expectation > is ok .... read on for more of a review. > >> >> The main advantage of this version is that it can improve the performance of >> the aeabi_idiv function for Thumb1. This solution will also increase the >> code size. So it will only be used if __OPTIMIZE_SIZE__ is not defined. >> >> Make check passed for armv6-m. >> >> libgcc/ChangeLog: >> 2015-08-10 Hale Wang <hale.w...@arm.com> >> Andre Vieira <andre.simoesdiasvie...@arm.com> >> >> * config/arm/lib1funcs.S: Add new wrapper. >> >> 0001-integer-division.patch >> >> >> From 832a3d6af6f06399f70b5a4ac3727d55960c93b7 Mon Sep 17 00:00:00 2001 >> From: Andre Simoes Dias Vieira <andsi...@arm.com> >> Date: Fri, 21 Aug 2015 14:23:28 +0100 >> Subject: [PATCH] new wrapper idivmod >> >> --- >> libgcc/config/arm/lib1funcs.S | 250 >> ++++++++++++++++++++++++++++++++++++------ >> 1 file changed, 217 insertions(+), 33 deletions(-) >> >> diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S >> index >> 252efcbd5385cc58a5ce1e48c6816d36a6f4c797..c9e544114590da8cde88382bea0f67206e593816 >> 100644 >> --- a/libgcc/config/arm/lib1funcs.S >> +++ b/libgcc/config/arm/lib1funcs.S >> @@ -306,34 +306,12 @@ LSYM(Lend_fde): >> #ifdef __ARM_EABI__ >> .macro THUMB_LDIV0 name signed >> #if defined(__ARM_ARCH_6M__) >> - .ifc \signed, unsigned >> - cmp r0, #0 >> - beq 1f >> - mov r0, #0 >> - mvn r0, r0 @ 0xffffffff >> -1: >> - .else >> - cmp r0, #0 >> - beq 2f >> - blt 3f >> + >> + push {r0, lr} >> mov r0, #0 >> - mvn r0, r0 >> - lsr r0, r0, #1 @ 0x7fffffff >> - b 2f >> -3: mov r0, #0x80 >> - lsl r0, r0, #24 @ 0x80000000 >> -2: >> - .endif >> - push {r0, r1, r2} >> - ldr r0, 4f >> - adr r1, 4f >> - add r0, r1 >> - str r0, [sp, #8] >> - @ We know we are not on armv4t, so pop pc is safe. >> - pop {r0, r1, pc} >> - .align 2 >> -4: >> - .word __aeabi_idiv0 - 4b >> + bl SYM(__aeabi_idiv0) >> + pop {r1, pc} >> + > > I'd still retain the comment about pop pc here because there's often a > misconception of merging armv4t and armv6m code. > >> #elif defined(__thumb2__) >> .syntax unified >> .ifc \signed, unsigned >> @@ -945,7 +923,170 @@ LSYM(Lover7): >> add dividend, work >> .endif >> LSYM(Lgot_result): >> -.endm >> +.endm >> + >> +#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__) >> +/* If performance is preferred, the following functions are provided. */ >> + > > Comment above #if please and also check elsewhere in patch. > >> +/* Branch to div(n), and jump to label if curbit is lo than divisior. */ >> +.macro BranchToDiv n, label >> + lsr curbit, dividend, \n >> + cmp curbit, divisor >> + blo \label >> +.endm >> + >> +/* Body of div(n). Shift the divisor in n bits and compare the divisor >> + and dividend. Update the dividend as the substruction result. */ >> +.macro DoDiv n >> + lsr curbit, dividend, \n >> + cmp curbit, divisor >> + bcc 1f >> + lsl curbit, divisor, \n >> + sub dividend, dividend, curbit >> + >> +1: adc result, result >> +.endm >> + >> +/* The body of division with positive divisor. Unless the divisor is very >> + big, shift it up in multiples of four bits, since this is the amount of >> + unwinding in the main division loop. Continue shifting until the divisor >> + is larger than the dividend. */ >> +.macro THUMB1_Div_Positive >> + mov result, #0 >> + BranchToDiv #1, LSYM(Lthumb1_div1) >> + BranchToDiv #4, LSYM(Lthumb1_div4) >> + BranchToDiv #8, LSYM(Lthumb1_div8) >> + BranchToDiv #12, LSYM(Lthumb1_div12) >> + BranchToDiv #16, LSYM(Lthumb1_div16) >> +LSYM(Lthumb1_div_large_positive): >> + mov result, #0xff >> + lsl divisor, divisor, #8 >> + rev result, result >> + lsr curbit, dividend, #16 >> + cmp curbit, divisor >> + blo 1f >> + asr result, #8 >> + lsl divisor, divisor, #8 >> + beq LSYM(Ldivbyzero_waypoint) >> + >> +1: lsr curbit, dividend, #12 >> + cmp curbit, divisor >> + blo LSYM(Lthumb1_div12) >> + b LSYM(Lthumb1_div16) >> +LSYM(Lthumb1_div_loop): >> + lsr divisor, divisor, #8 >> +LSYM(Lthumb1_div16): >> + Dodiv #15 >> + Dodiv #14 >> + Dodiv #13 >> + Dodiv #12 >> +LSYM(Lthumb1_div12): >> + Dodiv #11 >> + Dodiv #10 >> + Dodiv #9 >> + Dodiv #8 >> + bcs LSYM(Lthumb1_div_loop) >> +LSYM(Lthumb1_div8): >> + Dodiv #7 >> + Dodiv #6 >> + Dodiv #5 >> +LSYM(Lthumb1_div5): >> + Dodiv #4 >> +LSYM(Lthumb1_div4): >> + Dodiv #3 >> +LSYM(Lthumb1_div3): >> + Dodiv #2 >> +LSYM(Lthumb1_div2): >> + Dodiv #1 >> +LSYM(Lthumb1_div1): >> + sub divisor, dividend, divisor >> + bcs 1f >> + cpy divisor, dividend >> + >> +1: adc result, result >> + cpy dividend, result >> + RET >> + >> +LSYM(Ldivbyzero_waypoint): >> + b LSYM(Ldiv0) >> +.endm >> + >> +/* The body of division with negative divisor. Similar with >> + THUMB1_Div_Positive except that the shift steps are in multiples >> + of six bits. */ >> +.macro THUMB1_Div_Negative >> + lsr result, divisor, #31 >> + beq 1f >> + neg divisor, divisor >> + >> +1: asr curbit, dividend, #32 >> + bcc 2f >> + neg dividend, dividend >> + >> +2: eor curbit, result >> + mov result, #0 >> + cpy ip, curbit >> + BranchToDiv #4, LSYM(Lthumb1_div_negative4) >> + BranchToDiv #8, LSYM(Lthumb1_div_negative8) >> +LSYM(Lthumb1_div_large): >> + mov result, #0xfc >> + lsl divisor, divisor, #6 >> + rev result, result >> + lsr curbit, dividend, #8 >> + cmp curbit, divisor >> + blo LSYM(Lthumb1_div_negative8) >> + >> + lsl divisor, divisor, #6 >> + asr result, result, #6 >> + cmp curbit, divisor >> + blo LSYM(Lthumb1_div_negative8) >> + >> + lsl divisor, divisor, #6 >> + asr result, result, #6 >> + cmp curbit, divisor >> + blo LSYM(Lthumb1_div_negative8) >> + >> + lsl divisor, divisor, #6 >> + beq LSYM(Ldivbyzero_negative) >> + asr result, result, #6 >> + b LSYM(Lthumb1_div_negative8) >> +LSYM(Lthumb1_div_negative_loop): >> + lsr divisor, divisor, #6 >> +LSYM(Lthumb1_div_negative8): >> + DoDiv #7 >> + DoDiv #6 >> + DoDiv #5 >> + DoDiv #4 >> +LSYM(Lthumb1_div_negative4): >> + DoDiv #3 >> + DoDiv #2 >> + bcs LSYM(Lthumb1_div_negative_loop) >> + DoDiv #1 >> + sub divisor, dividend, divisor >> + bcs 1f >> + cpy divisor, dividend >> + >> +1: cpy curbit, ip >> + adc result, result >> + asr curbit, curbit, #1 >> + cpy dividend, result >> + bcc 2f >> + neg dividend, dividend >> + cmp curbit, #0 >> + >> +2: bpl 3f >> + neg divisor, divisor >> + >> +3: RET >> + >> +LSYM(Ldivbyzero_negative): >> + cpy curbit, ip >> + asr curbit, curbit, #1 >> + bcc LSYM(Ldiv0) >> + neg dividend, dividend >> +.endm >> +#endif /* ARM Thumb version. */ >> + >> /* ------------------------------------------------------------------------ >> */ >> /* Start of the Real Functions */ >> /* ------------------------------------------------------------------------ >> */ >> @@ -955,6 +1096,7 @@ LSYM(Lgot_result): >> >> FUNC_START udivsi3 >> FUNC_ALIAS aeabi_uidiv udivsi3 >> +#if defined(__OPTIMIZE_SIZE__) >> >> cmp divisor, #0 >> beq LSYM(Ldiv0) >> @@ -972,6 +1114,14 @@ LSYM(udivsi3_skip_div0_test): >> pop { work } >> RET >> >> +#else >> + /* Implementation of aeabi_uidiv for ARMv6m. This version is only >> + used in ARMv6-M when we need an efficient implementation. */ >> +LSYM(udivsi3_skip_div0_test): >> + THUMB1_Div_Positive >> + >> +#endif /* __OPTIMIZE_SIZE__ */ >> + >> #elif defined(__ARM_ARCH_EXT_IDIV__) >> >> ARM_FUNC_START udivsi3 >> @@ -1023,12 +1173,21 @@ LSYM(udivsi3_skip_div0_test): >> FUNC_START aeabi_uidivmod >> cmp r1, #0 >> beq LSYM(Ldiv0) >> +# if defined(__OPTIMIZE_SIZE__) >> push {r0, r1, lr} >> bl LSYM(udivsi3_skip_div0_test) >> POP {r1, r2, r3} >> mul r2, r0 >> sub r1, r1, r2 >> bx r3 >> +# else >> + /* Both the quotient and remainder are calculated simultaneously >> + in THUMB1_Div_Positive. There is no need to calculate the >> + remainder again here. */ >> + b LSYM(udivsi3_skip_div0_test) >> + RET >> +# endif /* __OPTIMIZE_SIZE__ */ >> + >> #elif defined(__ARM_ARCH_EXT_IDIV__) >> ARM_FUNC_START aeabi_uidivmod >> cmp r1, #0 >> @@ -1084,7 +1243,7 @@ LSYM(Lover10): >> RET >> >> #else /* ARM version. */ >> - >> + >> FUNC_START umodsi3 >> >> subs r2, r1, #1 @ compare divisor with 1 >> @@ -1109,8 +1268,9 @@ LSYM(Lover10): >> >> #if defined(__prefer_thumb__) >> >> - FUNC_START divsi3 >> + FUNC_START divsi3 >> FUNC_ALIAS aeabi_idiv divsi3 >> +#if defined(__OPTIMIZE_SIZE__) >> >> cmp divisor, #0 >> beq LSYM(Ldiv0) >> @@ -1133,7 +1293,7 @@ LSYM(Lover11): >> blo LSYM(Lgot_result) >> >> THUMB_DIV_MOD_BODY 0 >> - >> + >> mov r0, result >> mov work, ip >> cmp work, #0 >> @@ -1142,6 +1302,21 @@ LSYM(Lover11): >> LSYM(Lover12): >> pop { work } >> RET >> +#else >> + /* Implementation of aeabi_idiv for ARMv6m. This version is only >> + used in ARMv6-M when we need an efficient implementation. */ >> +LSYM(divsi3_skip_div0_test): >> + cpy curbit, dividend >> + orr curbit, divisor >> + bmi LSYM(Lthumb1_div_negative) >> + >> +LSYM(Lthumb1_div_positive): >> + THUMB1_Div_Positive >> + >> +LSYM(Lthumb1_div_negative): >> + THUMB1_Div_Negative >> + >> +#endif /* __OPTIMIZE_SIZE__ */ >> >> #elif defined(__ARM_ARCH_EXT_IDIV__) >> >> @@ -1154,8 +1329,8 @@ LSYM(Lover12): >> RET >> >> #else /* ARM/Thumb-2 version. */ >> - >> - ARM_FUNC_START divsi3 >> + >> + ARM_FUNC_START divsi3 >> ARM_FUNC_ALIAS aeabi_idiv divsi3 >> >> cmp r1, #0 >> @@ -1209,12 +1384,21 @@ LSYM(divsi3_skip_div0_test): >> FUNC_START aeabi_idivmod >> cmp r1, #0 >> beq LSYM(Ldiv0) >> +# if defined(__OPTIMIZE_SIZE__) >> push {r0, r1, lr} >> bl LSYM(divsi3_skip_div0_test) >> POP {r1, r2, r3} >> mul r2, r0 >> sub r1, r1, r2 >> bx r3 >> +# else >> + /* Both the quotient and remainder are calculated simultaneously >> + in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no >> + need to calculate the remainder again here. */ >> + b LSYM(divsi3_skip_div0_test) >> + RET >> +# endif /* __OPTIMIZE_SIZE__ */ >> + >> #elif defined(__ARM_ARCH_EXT_IDIV__) >> ARM_FUNC_START aeabi_idivmod >> cmp r1, #0 >> -- 1.9.1 >> > > Otherwise OK if no regressions and the following request passes. > > Can you ensure that libgcc for one ARM state and one Thumb2 state non-v6m > configuration should give identical binaries with and without your patch, no > ? > > regards > Ramana > Hi Ramana,
Thank you for the comments. Sorry about the license, must have been a mixup somewhere. I put back the 'pop pc is safe' assembly comment and I moved some comments before the #if and #else as requested. I left some in place because they did not apply to the whole block but simply to the first assembly instruction after the #if/else. I checked that the assembly generated for libgcc was the same with and without the patch for armv7-a in arm mode and armv7-m in thumb mode. Is this OK? Cheers, Andre libgcc/ChangeLog: 2016-07-06 Hale Wang <hale.w...@arm.com> Andre Vieira <andre.simoesdiasvie...@arm.com> * config/arm/lib1funcs.S: Add new wrapper.
>From b5b129e698b9e7446907d3da1fbce0236b09b67c Mon Sep 17 00:00:00 2001 From: Andre Simoes Dias Vieira <andsi...@arm.com> Date: Mon, 16 May 2016 18:34:52 +0100 Subject: [PATCH 1/2] integer division --- libgcc/config/arm/lib1funcs.S | 250 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 218 insertions(+), 32 deletions(-) diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S index 375a5135110895faa44267ebee045fd315515027..8a245b7a9333b249f120cd7e7ecc77248b150610 100644 --- a/libgcc/config/arm/lib1funcs.S +++ b/libgcc/config/arm/lib1funcs.S @@ -306,34 +306,13 @@ LSYM(Lend_fde): #ifdef __ARM_EABI__ .macro THUMB_LDIV0 name signed #if defined(__ARM_ARCH_6M__) - .ifc \signed, unsigned - cmp r0, #0 - beq 1f - mov r0, #0 - mvn r0, r0 @ 0xffffffff -1: - .else - cmp r0, #0 - beq 2f - blt 3f + + push {r0, lr} mov r0, #0 - mvn r0, r0 - lsr r0, r0, #1 @ 0x7fffffff - b 2f -3: mov r0, #0x80 - lsl r0, r0, #24 @ 0x80000000 -2: - .endif - push {r0, r1, r2} - ldr r0, 4f - adr r1, 4f - add r0, r1 - str r0, [sp, #8] + bl SYM(__aeabi_idiv0) @ We know we are not on armv4t, so pop pc is safe. - pop {r0, r1, pc} - .align 2 -4: - .word __aeabi_idiv0 - 4b + pop {r1, pc} + #elif defined(__thumb2__) .syntax unified .ifc \signed, unsigned @@ -945,7 +924,170 @@ LSYM(Lover7): add dividend, work .endif LSYM(Lgot_result): -.endm +.endm + +/* If performance is preferred, the following functions are provided. */ +#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__) + +/* Branch to div(n), and jump to label if curbit is lo than divisior. */ +.macro BranchToDiv n, label + lsr curbit, dividend, \n + cmp curbit, divisor + blo \label +.endm + +/* Body of div(n). Shift the divisor in n bits and compare the divisor + and dividend. Update the dividend as the substruction result. */ +.macro DoDiv n + lsr curbit, dividend, \n + cmp curbit, divisor + bcc 1f + lsl curbit, divisor, \n + sub dividend, dividend, curbit + +1: adc result, result +.endm + +/* The body of division with positive divisor. Unless the divisor is very + big, shift it up in multiples of four bits, since this is the amount of + unwinding in the main division loop. Continue shifting until the divisor + is larger than the dividend. */ +.macro THUMB1_Div_Positive + mov result, #0 + BranchToDiv #1, LSYM(Lthumb1_div1) + BranchToDiv #4, LSYM(Lthumb1_div4) + BranchToDiv #8, LSYM(Lthumb1_div8) + BranchToDiv #12, LSYM(Lthumb1_div12) + BranchToDiv #16, LSYM(Lthumb1_div16) +LSYM(Lthumb1_div_large_positive): + mov result, #0xff + lsl divisor, divisor, #8 + rev result, result + lsr curbit, dividend, #16 + cmp curbit, divisor + blo 1f + asr result, #8 + lsl divisor, divisor, #8 + beq LSYM(Ldivbyzero_waypoint) + +1: lsr curbit, dividend, #12 + cmp curbit, divisor + blo LSYM(Lthumb1_div12) + b LSYM(Lthumb1_div16) +LSYM(Lthumb1_div_loop): + lsr divisor, divisor, #8 +LSYM(Lthumb1_div16): + Dodiv #15 + Dodiv #14 + Dodiv #13 + Dodiv #12 +LSYM(Lthumb1_div12): + Dodiv #11 + Dodiv #10 + Dodiv #9 + Dodiv #8 + bcs LSYM(Lthumb1_div_loop) +LSYM(Lthumb1_div8): + Dodiv #7 + Dodiv #6 + Dodiv #5 +LSYM(Lthumb1_div5): + Dodiv #4 +LSYM(Lthumb1_div4): + Dodiv #3 +LSYM(Lthumb1_div3): + Dodiv #2 +LSYM(Lthumb1_div2): + Dodiv #1 +LSYM(Lthumb1_div1): + sub divisor, dividend, divisor + bcs 1f + cpy divisor, dividend + +1: adc result, result + cpy dividend, result + RET + +LSYM(Ldivbyzero_waypoint): + b LSYM(Ldiv0) +.endm + +/* The body of division with negative divisor. Similar with + THUMB1_Div_Positive except that the shift steps are in multiples + of six bits. */ +.macro THUMB1_Div_Negative + lsr result, divisor, #31 + beq 1f + neg divisor, divisor + +1: asr curbit, dividend, #32 + bcc 2f + neg dividend, dividend + +2: eor curbit, result + mov result, #0 + cpy ip, curbit + BranchToDiv #4, LSYM(Lthumb1_div_negative4) + BranchToDiv #8, LSYM(Lthumb1_div_negative8) +LSYM(Lthumb1_div_large): + mov result, #0xfc + lsl divisor, divisor, #6 + rev result, result + lsr curbit, dividend, #8 + cmp curbit, divisor + blo LSYM(Lthumb1_div_negative8) + + lsl divisor, divisor, #6 + asr result, result, #6 + cmp curbit, divisor + blo LSYM(Lthumb1_div_negative8) + + lsl divisor, divisor, #6 + asr result, result, #6 + cmp curbit, divisor + blo LSYM(Lthumb1_div_negative8) + + lsl divisor, divisor, #6 + beq LSYM(Ldivbyzero_negative) + asr result, result, #6 + b LSYM(Lthumb1_div_negative8) +LSYM(Lthumb1_div_negative_loop): + lsr divisor, divisor, #6 +LSYM(Lthumb1_div_negative8): + DoDiv #7 + DoDiv #6 + DoDiv #5 + DoDiv #4 +LSYM(Lthumb1_div_negative4): + DoDiv #3 + DoDiv #2 + bcs LSYM(Lthumb1_div_negative_loop) + DoDiv #1 + sub divisor, dividend, divisor + bcs 1f + cpy divisor, dividend + +1: cpy curbit, ip + adc result, result + asr curbit, curbit, #1 + cpy dividend, result + bcc 2f + neg dividend, dividend + cmp curbit, #0 + +2: bpl 3f + neg divisor, divisor + +3: RET + +LSYM(Ldivbyzero_negative): + cpy curbit, ip + asr curbit, curbit, #1 + bcc LSYM(Ldiv0) + neg dividend, dividend +.endm +#endif /* ARM Thumb version. */ + /* ------------------------------------------------------------------------ */ /* Start of the Real Functions */ /* ------------------------------------------------------------------------ */ @@ -955,6 +1097,7 @@ LSYM(Lgot_result): FUNC_START udivsi3 FUNC_ALIAS aeabi_uidiv udivsi3 +#if defined(__OPTIMIZE_SIZE__) cmp divisor, #0 beq LSYM(Ldiv0) @@ -972,6 +1115,14 @@ LSYM(udivsi3_skip_div0_test): pop { work } RET +/* Implementation of aeabi_uidiv for ARMv6m. This version is only + used in ARMv6-M when we need an efficient implementation. */ +#else +LSYM(udivsi3_skip_div0_test): + THUMB1_Div_Positive + +#endif /* __OPTIMIZE_SIZE__ */ + #elif defined(__ARM_ARCH_EXT_IDIV__) ARM_FUNC_START udivsi3 @@ -1023,12 +1174,21 @@ LSYM(udivsi3_skip_div0_test): FUNC_START aeabi_uidivmod cmp r1, #0 beq LSYM(Ldiv0) +# if defined(__OPTIMIZE_SIZE__) push {r0, r1, lr} bl LSYM(udivsi3_skip_div0_test) POP {r1, r2, r3} mul r2, r0 sub r1, r1, r2 bx r3 +# else + /* Both the quotient and remainder are calculated simultaneously + in THUMB1_Div_Positive. There is no need to calculate the + remainder again here. */ + b LSYM(udivsi3_skip_div0_test) + RET +# endif /* __OPTIMIZE_SIZE__ */ + #elif defined(__ARM_ARCH_EXT_IDIV__) ARM_FUNC_START aeabi_uidivmod cmp r1, #0 @@ -1084,7 +1244,7 @@ LSYM(Lover10): RET #else /* ARM version. */ - + FUNC_START umodsi3 subs r2, r1, #1 @ compare divisor with 1 @@ -1109,8 +1269,9 @@ LSYM(Lover10): #if defined(__prefer_thumb__) - FUNC_START divsi3 + FUNC_START divsi3 FUNC_ALIAS aeabi_idiv divsi3 +#if defined(__OPTIMIZE_SIZE__) cmp divisor, #0 beq LSYM(Ldiv0) @@ -1133,7 +1294,7 @@ LSYM(Lover11): blo LSYM(Lgot_result) THUMB_DIV_MOD_BODY 0 - + mov r0, result mov work, ip cmp work, #0 @@ -1143,6 +1304,22 @@ LSYM(Lover12): pop { work } RET +/* Implementation of aeabi_idiv for ARMv6m. This version is only + used in ARMv6-M when we need an efficient implementation. */ +#else +LSYM(divsi3_skip_div0_test): + cpy curbit, dividend + orr curbit, divisor + bmi LSYM(Lthumb1_div_negative) + +LSYM(Lthumb1_div_positive): + THUMB1_Div_Positive + +LSYM(Lthumb1_div_negative): + THUMB1_Div_Negative + +#endif /* __OPTIMIZE_SIZE__ */ + #elif defined(__ARM_ARCH_EXT_IDIV__) ARM_FUNC_START divsi3 @@ -1154,8 +1331,8 @@ LSYM(Lover12): RET #else /* ARM/Thumb-2 version. */ - - ARM_FUNC_START divsi3 + + ARM_FUNC_START divsi3 ARM_FUNC_ALIAS aeabi_idiv divsi3 cmp r1, #0 @@ -1209,12 +1386,21 @@ LSYM(divsi3_skip_div0_test): FUNC_START aeabi_idivmod cmp r1, #0 beq LSYM(Ldiv0) +# if defined(__OPTIMIZE_SIZE__) push {r0, r1, lr} bl LSYM(divsi3_skip_div0_test) POP {r1, r2, r3} mul r2, r0 sub r1, r1, r2 bx r3 +# else + /* Both the quotient and remainder are calculated simultaneously + in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no + need to calculate the remainder again here. */ + b LSYM(divsi3_skip_div0_test) + RET +# endif /* __OPTIMIZE_SIZE__ */ + #elif defined(__ARM_ARCH_EXT_IDIV__) ARM_FUNC_START aeabi_idivmod cmp r1, #0 -- 1.9.1