The Thumb versions of these functions are each 1-2 instructions smaller
and faster, and branchless when the IT instruction is available.

The ARM versions were converted to the "xxl/xxh" big-endian register
naming convention, but are otherwise unchanged.

gcc/libgcc/ChangeLog:
2022-10-09 Daniel Engel <g...@danielengel.com>

        * config/arm/bits/shift.S (__ashldi3, __ashrdi3, __lshldi3):
        Reduced code size on Thumb architectures;
        updated big-endian register naming convention to "xxl/xxh".
---
 libgcc/config/arm/eabi/lshift.S | 338 +++++++++++++++++++++-----------
 1 file changed, 228 insertions(+), 110 deletions(-)

diff --git a/libgcc/config/arm/eabi/lshift.S b/libgcc/config/arm/eabi/lshift.S
index 6e79d96c118..365350dfb2d 100644
--- a/libgcc/config/arm/eabi/lshift.S
+++ b/libgcc/config/arm/eabi/lshift.S
@@ -1,123 +1,241 @@
-/* Copyright (C) 1995-2022 Free Software Foundation, Inc.
+/* lshift.S: ARM optimized 64-bit integer shift
 
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
+   Copyright (C) 2018-2022 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (g...@danielengel.com)
 
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
 
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
 
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
 
 
 #ifdef L_lshrdi3
 
-       FUNC_START lshrdi3
-       FUNC_ALIAS aeabi_llsr lshrdi3
-       
-#ifdef __thumb__
-       lsrs    al, r2
-       movs    r3, ah
-       lsrs    ah, r2
-       mov     ip, r3
-       subs    r2, #32
-       lsrs    r3, r2
-       orrs    al, r3
-       negs    r2, r2
-       mov     r3, ip
-       lsls    r3, r2
-       orrs    al, r3
-       RET
-#else
-       subs    r3, r2, #32
-       rsb     ip, r2, #32
-       movmi   al, al, lsr r2
-       movpl   al, ah, lsr r3
-       orrmi   al, al, ah, lsl ip
-       mov     ah, ah, lsr r2
-       RET
-#endif
-       FUNC_END aeabi_llsr
-       FUNC_END lshrdi3
-
-#endif
-       
+// long long __aeabi_llsr(long long, int)
+// Logical shift right the 64 bit value in $r1:$r0 by the count in $r2.
+// The result is only guaranteed for shifts in the range of '0' to '63'.
+// Uses $r3 as scratch space.
+FUNC_START_SECTION aeabi_llsr .text.sorted.libgcc.lshrdi3
+FUNC_ALIAS lshrdi3 aeabi_llsr
+    CFI_START_FUNCTION
+
+  #if defined(__thumb__) && __thumb__
+
+        // Save a copy for the remainder.
+        movs    r3,     xxh
+
+        // Assume a simple shift.
+        lsrs    xxl,    r2
+        lsrs    xxh,    r2
+
+        // Test if the shift distance is larger than 1 word.
+        subs    r2,     #32
+
+    #ifdef __HAVE_FEATURE_IT
+        do_it   lo,te
+
+        // The remainder is opposite the main shift, (32 - x) bits.
+        rsblo   r2,     #0
+        lsllo   r3,     r2
+
+        // The remainder shift extends into the hi word.
+        lsrhs   r3,     r2
+
+    #else /* !__HAVE_FEATURE_IT */
+        bhs     LLSYM(__llsr_large)
+
+        // The remainder is opposite the main shift, (32 - x) bits.
+        rsbs    r2,     #0
+        lsls    r3,     r2
+
+        // Cancel any remaining shift.
+        eors    r2,     r2
+
+      LLSYM(__llsr_large):
+        // Apply any remaining shift to the hi word.
+        lsrs    r3,     r2
+
+    #endif /* !__HAVE_FEATURE_IT */
+
+        // Merge remainder and result.
+        adds    xxl,    r3
+        RET
+
+  #else /* !__thumb__ */
+
+        subs    r3,     r2,     #32
+        rsb     ip,     r2,     #32
+        movmi   xxl,    xxl,    lsr r2
+        movpl   xxl,    xxh,    lsr r3
+        orrmi   xxl,    xxl,    xxh,    lsl ip
+        mov     xxh,    xxh,    lsr r2
+        RET
+
+  #endif /* !__thumb__ */
+
+
+    CFI_END_FUNCTION
+FUNC_END lshrdi3
+FUNC_END aeabi_llsr
+
+#endif /* L_lshrdi3 */
+
+
 #ifdef L_ashrdi3
-       
-       FUNC_START ashrdi3
-       FUNC_ALIAS aeabi_lasr ashrdi3
-       
-#ifdef __thumb__
-       lsrs    al, r2
-       movs    r3, ah
-       asrs    ah, r2
-       subs    r2, #32
-       @ If r2 is negative at this point the following step would OR
-       @ the sign bit into all of AL.  That's not what we want...
-       bmi     1f
-       mov     ip, r3
-       asrs    r3, r2
-       orrs    al, r3
-       mov     r3, ip
-1:
-       negs    r2, r2
-       lsls    r3, r2
-       orrs    al, r3
-       RET
-#else
-       subs    r3, r2, #32
-       rsb     ip, r2, #32
-       movmi   al, al, lsr r2
-       movpl   al, ah, asr r3
-       orrmi   al, al, ah, lsl ip
-       mov     ah, ah, asr r2
-       RET
-#endif
-
-       FUNC_END aeabi_lasr
-       FUNC_END ashrdi3
-
-#endif
+
+// long long __aeabi_lasr(long long, int)
+// Arithmetic shift right the 64 bit value in $r1:$r0 by the count in $r2.
+// The result is only guaranteed for shifts in the range of '0' to '63'.
+// Uses $r3 as scratch space.
+FUNC_START_SECTION aeabi_lasr .text.sorted.libgcc.ashrdi3
+FUNC_ALIAS ashrdi3 aeabi_lasr
+    CFI_START_FUNCTION
+
+  #if defined(__thumb__) && __thumb__
+
+        // Save a copy for the remainder.
+        movs    r3,     xxh
+
+        // Assume a simple shift.
+        lsrs    xxl,    r2
+        asrs    xxh,    r2
+
+        // Test if the shift distance is larger than 1 word.
+        subs    r2,     #32
+
+    #ifdef __HAVE_FEATURE_IT
+        do_it   lo,te
+
+        // The remainder is opposite the main shift, (32 - x) bits.
+        rsblo   r2,     #0
+        lsllo   r3,     r2
+
+        // The remainder shift extends into the hi word.
+        asrhs   r3,     r2
+
+    #else /* !__HAVE_FEATURE_IT */
+        bhs     LLSYM(__lasr_large)
+
+        // The remainder is opposite the main shift, (32 - x) bits.
+        rsbs    r2,     #0
+        lsls    r3,     r2
+
+        // Cancel any remaining shift.
+        eors    r2,     r2
+
+      LLSYM(__lasr_large):
+        // Apply any remaining shift to the hi word.
+        asrs    r3,     r2
+
+    #endif /* !__HAVE_FEATURE_IT */
+
+        // Merge remainder and result.
+        adds    xxl,    r3
+        RET
+
+  #else /* !__thumb__ */
+
+        subs    r3,     r2,     #32
+        rsb     ip,     r2,     #32
+        movmi   xxl,    xxl,    lsr r2
+        movpl   xxl,    xxh,    asr r3
+        orrmi   xxl,    xxl,    xxh,    lsl ip
+        mov     xxh,    xxh,    asr r2
+        RET
+
+  #endif /* !__thumb__ */
+
+    CFI_END_FUNCTION
+FUNC_END ashrdi3
+FUNC_END aeabi_lasr
+
+#endif /* L_ashrdi3 */
+
 
 #ifdef L_ashldi3
 
-       FUNC_START ashldi3
-       FUNC_ALIAS aeabi_llsl ashldi3
-       
-#ifdef __thumb__
-       lsls    ah, r2
-       movs    r3, al
-       lsls    al, r2
-       mov     ip, r3
-       subs    r2, #32
-       lsls    r3, r2
-       orrs    ah, r3
-       negs    r2, r2
-       mov     r3, ip
-       lsrs    r3, r2
-       orrs    ah, r3
-       RET
-#else
-       subs    r3, r2, #32
-       rsb     ip, r2, #32
-       movmi   ah, ah, lsl r2
-       movpl   ah, al, lsl r3
-       orrmi   ah, ah, al, lsr ip
-       mov     al, al, lsl r2
-       RET
-#endif
-       FUNC_END aeabi_llsl
-       FUNC_END ashldi3
-
-#endif
+// long long __aeabi_llsl(long long, int)
+// Logical shift left the 64 bit value in $r1:$r0 by the count in $r2.
+// The result is only guaranteed for shifts in the range of '0' to '63'.
+// Uses $r3 as scratch space.
+.section .text.sorted.libgcc.ashldi3,"x"
+FUNC_START_SECTION aeabi_llsl .text.sorted.libgcc.ashldi3
+FUNC_ALIAS ashldi3 aeabi_llsl
+    CFI_START_FUNCTION
+
+  #if defined(__thumb__) && __thumb__
+
+        // Save a copy for the remainder.
+        movs    r3,     xxl
+
+        // Assume a simple shift.
+        lsls    xxl,    r2
+        lsls    xxh,    r2
+
+        // Test if the shift distance is larger than 1 word.
+        subs    r2,     #32
+
+    #ifdef __HAVE_FEATURE_IT
+        do_it   lo,te
+
+        // The remainder is opposite the main shift, (32 - x) bits.
+        rsblo   r2,     #0
+        lsrlo   r3,     r2
+
+        // The remainder shift extends into the hi word.
+        lslhs   r3,     r2
+
+    #else /* !__HAVE_FEATURE_IT */
+        bhs     LLSYM(__llsl_large)
+
+        // The remainder is opposite the main shift, (32 - x) bits.
+        rsbs    r2,     #0
+        lsrs    r3,     r2
+
+        // Cancel any remaining shift.
+        eors    r2,     r2
+
+      LLSYM(__llsl_large):
+        // Apply any remaining shift to the hi word.
+        lsls    r3,     r2
+
+    #endif /* !__HAVE_FEATURE_IT */
+
+        // Merge remainder and result.
+        adds    xxh,    r3
+        RET
+
+  #else /* !__thumb__ */
+
+        subs    r3,     r2,     #32
+        rsb     ip,     r2,     #32
+        movmi   xxh,    xxh,    lsl r2
+        movpl   xxh,    xxl,    lsl r3
+        orrmi   xxh,    xxh,    xxl,    lsr ip
+        mov     xxl,    xxl,    lsl r2
+        RET
+
+  #endif /* !__thumb__ */
+
+    CFI_END_FUNCTION
+FUNC_END ashldi3
+FUNC_END aeabi_llsl
+
+#endif /* L_ashldi3 */
+
+
 
-- 
2.34.1

Reply via email to