Ping? > -----Original Message----- > From: Thomas Preud'homme [mailto:thomas.preudho...@arm.com] > Sent: Thursday, April 30, 2015 3:19 PM > To: Thomas Preud'homme; Richard Earnshaw; 'gcc-patches@gcc.gnu.org'; > Marcus Shawcroft; Ramana Radhakrishnan > (ramana.radhakrish...@arm.com) > Subject: RE: [PATCH 2/3, ARM, libgcc, ping6] Code size optimization for > the fmul/fdiv and dmul/ddiv function in libgcc > > Here is an updated patch that prefix local symbols with __ for more > safety. > They appear in the symtab as local so it is not strictly necessary but one is > never too cautious. Being local, they also do not generate any PLT entry. > They appear only because the jumps are from one section to another > (which is the whole purpose of this patch) and thus need a static > relocation. > > I hope this revised version address all your concerns. > > ChangeLog entry is unchanged: > > *** gcc/libgcc/ChangeLog *** > > 2015-04-30 Tony Wang <tony.w...@arm.com> > > * config/arm/ieee754-sf.S: Expose symbols around fragment > boundaries as function symbols. > * config/arm/ieee754-df.S: Same with above > > diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754- > df.S > index c1468dc..39b0028 100644 > --- a/libgcc/config/arm/ieee754-df.S > +++ b/libgcc/config/arm/ieee754-df.S > @@ -559,7 +559,7 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf > > #ifdef L_arm_muldivdf3 > > -ARM_FUNC_START muldf3 > +ARM_FUNC_START muldf3, function_section > ARM_FUNC_ALIAS aeabi_dmul muldf3 > do_push {r4, r5, r6, lr} > > @@ -571,7 +571,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3 > COND(and,s,ne) r5, ip, yh, lsr #20 > teqne r4, ip > teqne r5, ip > - bleq LSYM(Lml_s) > + bleq __Lml_s > > @ Add exponents together > add r4, r4, r5 > @@ -689,7 +689,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3 > subs ip, r4, #(254 - 1) > do_it hi > cmphi ip, #0x700 > - bhi LSYM(Lml_u) > + bhi __Lml_u > > @ Round the result, merge final exponent. > cmp lr, #0x80000000 > @@ -716,9 +716,12 @@ LSYM(Lml_1): > mov lr, #0 > subs r4, r4, #1 > > -LSYM(Lml_u): > + FUNC_END aeabi_dmul > + FUNC_END muldf3 > + > +ARM_SYM_START __Lml_u > @ Overflow? > - bgt LSYM(Lml_o) > + bgt __Lml_o > > @ Check if denormalized result is possible, otherwise return > signed 0. > cmn r4, #(53 + 1) > @@ -778,10 +781,11 @@ LSYM(Lml_u): > do_it eq > biceq xl, xl, r3, lsr #31 > RETLDM "r4, r5, r6" > + SYM_END __Lml_u > > @ One or both arguments are denormalized. > @ Scale them leftwards and preserve sign bit. > -LSYM(Lml_d): > +ARM_SYM_START __Lml_d > teq r4, #0 > bne 2f > and r6, xh, #0x80000000 > @@ -804,8 +808,9 @@ LSYM(Lml_d): > beq 3b > orr yh, yh, r6 > RET > + SYM_END __Lml_d > > -LSYM(Lml_s): > +ARM_SYM_START __Lml_s > @ Isolate the INF and NAN cases away > teq r4, ip > and r5, ip, yh, lsr #20 > @@ -817,10 +822,11 @@ LSYM(Lml_s): > orrs r6, xl, xh, lsl #1 > do_it ne > COND(orr,s,ne) r6, yl, yh, lsl #1 > - bne LSYM(Lml_d) > + bne __Lml_d > + SYM_END __Lml_s > > @ Result is 0, but determine sign anyway. > -LSYM(Lml_z): > +ARM_SYM_START __Lml_z > eor xh, xh, yh > and xh, xh, #0x80000000 > mov xl, #0 > @@ -832,41 +838,42 @@ LSYM(Lml_z): > moveq xl, yl > moveq xh, yh > COND(orr,s,ne) r6, yl, yh, lsl #1 > - beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN > + beq __Lml_n @ 0 * INF or INF * 0 -> NAN > teq r4, ip > bne 1f > orrs r6, xl, xh, lsl #12 > - bne LSYM(Lml_n) @ NAN * <anything> -> NAN > + bne __Lml_n @ NAN * <anything> -> NAN > 1: teq r5, ip > - bne LSYM(Lml_i) > + bne __Lml_i > orrs r6, yl, yh, lsl #12 > do_it ne, t > movne xl, yl > movne xh, yh > - bne LSYM(Lml_n) @ <anything> * NAN -> NAN > + bne __Lml_n @ <anything> * NAN -> NAN > + SYM_END __Lml_z > > @ Result is INF, but we need to determine its sign. > -LSYM(Lml_i): > +ARM_SYM_START __Lml_i > eor xh, xh, yh > + SYM_END __Lml_i > > @ Overflow: return INF (sign already in xh). > -LSYM(Lml_o): > +ARM_SYM_START __Lml_o > and xh, xh, #0x80000000 > orr xh, xh, #0x7f000000 > orr xh, xh, #0x00f00000 > mov xl, #0 > RETLDM "r4, r5, r6" > + SYM_END __Lml_o > > @ Return a quiet NAN. > -LSYM(Lml_n): > +ARM_SYM_START __Lml_n > orr xh, xh, #0x7f000000 > orr xh, xh, #0x00f80000 > RETLDM "r4, r5, r6" > + SYM_END __Lml_n > > - FUNC_END aeabi_dmul > - FUNC_END muldf3 > - > -ARM_FUNC_START divdf3 > +ARM_FUNC_START divdf3 function_section > ARM_FUNC_ALIAS aeabi_ddiv divdf3 > > do_push {r4, r5, r6, lr} > @@ -985,7 +992,7 @@ ARM_FUNC_ALIAS aeabi_ddiv divdf3 > subs ip, r4, #(254 - 1) > do_it hi > cmphi ip, #0x700 > - bhi LSYM(Lml_u) > + bhi __Lml_u > > @ Round the result, merge final exponent. > subs ip, r5, yh > @@ -1009,13 +1016,13 @@ LSYM(Ldv_1): > orr xh, xh, #0x00100000 > mov lr, #0 > subs r4, r4, #1 > - b LSYM(Lml_u) > + b __Lml_u > > @ Result mightt need to be denormalized: put remainder bits > @ in lr for rounding considerations. > LSYM(Ldv_u): > orr lr, r5, r6 > - b LSYM(Lml_u) > + b __Lml_u > > @ One or both arguments is either INF, NAN or zero. > LSYM(Ldv_s): > @@ -1023,34 +1030,34 @@ LSYM(Ldv_s): > teq r4, ip > do_it eq > teqeq r5, ip > - beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN > + beq __Lml_n @ INF/NAN / INF/NAN -> NAN > teq r4, ip > bne 1f > orrs r4, xl, xh, lsl #12 > - bne LSYM(Lml_n) @ NAN / <anything> -> NAN > + bne __Lml_n @ NAN / <anything> -> NAN > teq r5, ip > - bne LSYM(Lml_i) @ INF / <anything> -> INF > + bne __Lml_i @ INF / <anything> -> INF > mov xl, yl > mov xh, yh > - b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN > + b __Lml_n @ INF / (INF or NAN) -> NAN > 1: teq r5, ip > bne 2f > orrs r5, yl, yh, lsl #12 > - beq LSYM(Lml_z) @ <anything> / INF -> 0 > + beq __Lml_z @ <anything> / INF -> 0 > mov xl, yl > mov xh, yh > - b LSYM(Lml_n) @ <anything> / NAN -> NAN > + b __Lml_n @ <anything> / NAN -> NAN > 2: @ If both are nonzero, we need to normalize and resume above. > orrs r6, xl, xh, lsl #1 > do_it ne > COND(orr,s,ne) r6, yl, yh, lsl #1 > - bne LSYM(Lml_d) > + bne __Lml_d > @ One or both arguments are 0. > orrs r4, xl, xh, lsl #1 > - bne LSYM(Lml_i) @ <non_zero> / 0 -> INF > + bne __Lml_i @ <non_zero> / 0 -> INF > orrs r5, yl, yh, lsl #1 > - bne LSYM(Lml_z) @ 0 / <non_zero> -> 0 > - b LSYM(Lml_n) @ 0 / 0 -> NAN > + bne __Lml_z @ 0 / <non_zero> -> 0 > + b __Lml_n @ 0 / 0 -> NAN > > FUNC_END aeabi_ddiv > FUNC_END divdf3 > diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754- > sf.S > index bc44d4e..7c2ab8b 100644 > --- a/libgcc/config/arm/ieee754-sf.S > +++ b/libgcc/config/arm/ieee754-sf.S > @@ -418,7 +418,7 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf > > #ifdef L_arm_muldivsf3 > > -ARM_FUNC_START mulsf3 > +ARM_FUNC_START mulsf3, function_section > ARM_FUNC_ALIAS aeabi_fmul mulsf3 > > @ Mask out exponents, trap any zero/denormal/INF/NAN. > @@ -428,7 +428,7 @@ ARM_FUNC_ALIAS aeabi_fmul mulsf3 > COND(and,s,ne) r3, ip, r1, lsr #23 > teqne r2, ip > teqne r3, ip > - beq LSYM(Lml_s) > + beq __Lml_s > LSYM(Lml_x): > > @ Add exponents together > @@ -490,7 +490,7 @@ LSYM(Lml_x): > @ Apply exponent bias, check for under/overflow. > sbc r2, r2, #127 > cmp r2, #(254 - 1) > - bhi LSYM(Lml_u) > + bhi __Lml_u > > @ Round the result, merge final exponent. > cmp r3, #0x80000000 > @@ -518,9 +518,12 @@ LSYM(Lml_1): > mov r3, #0 > subs r2, r2, #1 > > -LSYM(Lml_u): > + FUNC_END aeabi_fmul > + FUNC_END mulsf3 > + > +ARM_SYM_START __Lml_u > @ Overflow? > - bgt LSYM(Lml_o) > + bgt __Lml_o > > @ Check if denormalized result is possible, otherwise return > signed 0. > cmn r2, #(24 + 1) > @@ -540,10 +543,11 @@ LSYM(Lml_u): > do_it eq > biceq r0, r0, ip, lsr #31 > RET > + SYM_END __Lml_u > > @ One or both arguments are denormalized. > @ Scale them leftwards and preserve sign bit. > -LSYM(Lml_d): > +ARM_SYM_START __Lml_d > teq r2, #0 > and ip, r0, #0x80000000 > 1: do_it eq, tt > @@ -561,8 +565,9 @@ LSYM(Lml_d): > beq 2b > orr r1, r1, ip > b LSYM(Lml_x) > + SYM_END __Lml_d > > -LSYM(Lml_s): > +ARM_SYM_START __Lml_s > @ Isolate the INF and NAN cases away > and r3, ip, r1, lsr #23 > teq r2, ip > @@ -574,10 +579,11 @@ LSYM(Lml_s): > bics ip, r0, #0x80000000 > do_it ne > COND(bic,s,ne) ip, r1, #0x80000000 > - bne LSYM(Lml_d) > + bne __Lml_d > + SYM_END __Lml_s > > @ Result is 0, but determine sign anyway. > -LSYM(Lml_z): > +ARM_SYM_START __Lml_z > eor r0, r0, r1 > bic r0, r0, #0x7fffffff > RET > @@ -589,39 +595,41 @@ LSYM(Lml_z): > moveq r0, r1 > teqne r1, #0x0 > teqne r1, #0x80000000 > - beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN > + beq __Lml_n @ 0 * INF or INF * 0 -> NAN > teq r2, ip > bne 1f > movs r2, r0, lsl #9 > - bne LSYM(Lml_n) @ NAN * <anything> -> NAN > + bne __Lml_n @ NAN * <anything> -> NAN > 1: teq r3, ip > - bne LSYM(Lml_i) > + bne __Lml_i > movs r3, r1, lsl #9 > do_it ne > movne r0, r1 > - bne LSYM(Lml_n) @ <anything> * NAN -> NAN > + bne __Lml_n @ <anything> * NAN -> NAN > + SYM_END __Lml_z > > @ Result is INF, but we need to determine its sign. > -LSYM(Lml_i): > +ARM_SYM_START __Lml_i > eor r0, r0, r1 > + SYM_END __Lml_i > > @ Overflow: return INF (sign already in r0). > -LSYM(Lml_o): > +ARM_SYM_START __Lml_o > and r0, r0, #0x80000000 > orr r0, r0, #0x7f000000 > orr r0, r0, #0x00800000 > RET > + SYM_END __Lml_o > > @ Return a quiet NAN. > -LSYM(Lml_n): > +ARM_SYM_START __Lml_n > orr r0, r0, #0x7f000000 > orr r0, r0, #0x00c00000 > RET > + SYM_END __Lml_n > > - FUNC_END aeabi_fmul > - FUNC_END mulsf3 > > -ARM_FUNC_START divsf3 > +ARM_FUNC_START divsf3 function_section > ARM_FUNC_ALIAS aeabi_fdiv divsf3 > > @ Mask out exponents, trap any zero/denormal/INF/NAN. > @@ -684,7 +692,7 @@ LSYM(Ldv_x): > > @ Check exponent for under/overflow. > cmp r2, #(254 - 1) > - bhi LSYM(Lml_u) > + bhi __Lml_u > > @ Round the result, merge final exponent. > cmp r3, r1 > @@ -706,7 +714,7 @@ LSYM(Ldv_1): > orr r0, r0, #0x00800000 > mov r3, #0 > subs r2, r2, #1 > - b LSYM(Lml_u) > + b __Lml_u > > @ One or both arguments are denormalized. > @ Scale them leftwards and preserve sign bit. > @@ -735,17 +743,17 @@ LSYM(Ldv_s): > teq r2, ip > bne 1f > movs r2, r0, lsl #9 > - bne LSYM(Lml_n) @ NAN / <anything> -> NAN > + bne __Lml_n @ NAN / <anything> -> NAN > teq r3, ip > - bne LSYM(Lml_i) @ INF / <anything> -> INF > + bne __Lml_i @ INF / <anything> -> INF > mov r0, r1 > - b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN > + b __Lml_n @ INF / (INF or NAN) -> NAN > 1: teq r3, ip > bne 2f > movs r3, r1, lsl #9 > - beq LSYM(Lml_z) @ <anything> / INF -> 0 > + beq __Lml_z @ <anything> / INF -> 0 > mov r0, r1 > - b LSYM(Lml_n) @ <anything> / NAN -> NAN > + b __Lml_n @ <anything> / NAN -> NAN > 2: @ If both are nonzero, we need to normalize and resume above. > bics ip, r0, #0x80000000 > do_it ne > @@ -753,10 +761,10 @@ LSYM(Ldv_s): > bne LSYM(Ldv_d) > @ One or both arguments are zero. > bics r2, r0, #0x80000000 > - bne LSYM(Lml_i) @ <non_zero> / 0 -> INF > + bne __Lml_i @ <non_zero> / 0 -> INF > bics r3, r1, #0x80000000 > - bne LSYM(Lml_z) @ 0 / <non_zero> -> 0 > - b LSYM(Lml_n) @ 0 / 0 -> NAN > + bne __Lml_z @ 0 / <non_zero> -> 0 > + b __Lml_n @ 0 / 0 -> NAN > > FUNC_END aeabi_fdiv > FUNC_END divsf3 > > > Is this ok for trunk? > > Best regards, > > Thomas