Author: adconrad
Date: 2014-04-28 02:42:40 +0000 (Mon, 28 Apr 2014)
New Revision: 6028

Added:
   glibc-package/trunk/debian/patches/arm/cvs-arm-always-blx.diff
   glibc-package/trunk/debian/patches/arm/cvs-memcpy-align.diff
Modified:
   glibc-package/trunk/debian/changelog
   glibc-package/trunk/debian/patches/series
Log:
debian/patches/arm/cvs-{memcpy-align.patch,arm-always-blx.diff}: Backport
ifunc memcpy routines from 2.19 to fix alignment issues and computed-jump
calculations for ARM_ALWAYS_BX, should fix memcpy on our ArmadaXP buildds.

Modified: glibc-package/trunk/debian/changelog
===================================================================
--- glibc-package/trunk/debian/changelog        2014-04-27 20:47:05 UTC (rev 
6027)
+++ glibc-package/trunk/debian/changelog        2014-04-28 02:42:40 UTC (rev 
6028)
@@ -25,6 +25,11 @@
   * kfreebsd/local-sysdeps.diff: update to revision 5460 (from glibc-bsd).
   * kfreebsd/local-fbtl.diff: likewise
 
+  [ Adam Conrad ]
+  * debian/patches/arm/cvs-{memcpy-align.patch,arm-always-blx.diff}: Backport
+    ifunc memcpy routines from 2.19 to fix alignment issues and computed-jump
+    calculations for ARM_ALWAYS_BX, should fix memcpy on our ArmadaXP buildds.
+
  -- Aurelien Jarno <[email protected]>  Sun, 02 Mar 2014 16:19:49 +0100
 
 eglibc (2.18-4) unstable; urgency=high

Added: glibc-package/trunk/debian/patches/arm/cvs-arm-always-blx.diff
===================================================================
--- glibc-package/trunk/debian/patches/arm/cvs-arm-always-blx.diff              
                (rev 0)
+++ glibc-package/trunk/debian/patches/arm/cvs-arm-always-blx.diff      
2014-04-28 02:42:40 UTC (rev 6028)
@@ -0,0 +1,74 @@
+commit 068dcfd6758b2f50445d40cfe9d10e4284bd0635
+Author: Roland McGrath <[email protected]>
+Date:   Fri Nov 22 11:39:20 2013 -0800
+
+    ARM: Fix memcpy computed-jump calculations for ARM_ALWAYS_BX case.
+
+2013-11-22  Roland McGrath  <[email protected]>
+
+       * sysdeps/arm/armv7/multiarch/memcpy_impl.S
+       [ARM_ALWAYS_BX] (dispatch_helper): Fix PC computation to properly
+       account for instructions after the reference to PC given that 'bx'
+       might actually be expanded to multiple instructions.
+       * sysdeps/arm/arm-features.h (ARM_BX_NINSNS): Macro removed.
+
+diff --git a/ports/sysdeps/arm/arm-features.h 
b/ports/sysdeps/arm/arm-features.h
+index 1d4b0f1..336b690 100644
+--- a/ports/sysdeps/arm/arm-features.h
++++ b/ports/sysdeps/arm/arm-features.h
+@@ -53,14 +53,6 @@
+ # define ARM_BX_ALIGN_LOG2    2
+ #endif
+ 
+-/* The number of instructions that 'bx' expands to.  A more-specific
+-   arm-features.h that defines 'bx' as a macro should define this to the
+-   number instructions it expands to.  This is used only in a context
+-   where the 'bx' expansion won't cross an ARM_BX_ALIGN_LOG2 boundary.  */
+-#ifndef ARM_BX_NINSNS
+-# define ARM_BX_NINSNS                1
+-#endif
+-
+ /* An OS-specific arm-features.h file may define ARM_NO_INDEX_REGISTER to
+    indicate that the two-register addressing modes must never be used.  */
+ 
+diff --git a/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S 
b/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S
+index ad43a3d..44cecb0 100644
+--- a/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S
++++ b/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S
+@@ -127,25 +127,26 @@
+       .purgem dispatch_step
+       .endm
+ #else
+-# if ARM_BX_ALIGN_LOG2 < 4
++# if ARM_BX_ALIGN_LOG2 < 3
+ #  error case not handled
+ # endif
+       .macro dispatch_helper steps, log2_bytes_per_step
+-      .p2align ARM_BX_ALIGN_LOG2
+       /* TMP1 gets (max_bytes - bytes_to_copy), where max_bytes is
+          (STEPS << LOG2_BYTES_PER_STEP).
+-         So this is (steps_to_skip << LOG2_BYTES_PER_STEP).  */
+-      rsb     tmp1, tmp1, #(\steps << \log2_bytes_per_step)
+-      /* Pad so that the add;bx pair immediately precedes an alignment
+-         boundary.  Hence, TMP1=0 will run all the steps.  */
+-      .rept (1 << (ARM_BX_ALIGN_LOG2 - 2)) - (2 + ARM_BX_NINSNS)
+-      nop
+-      .endr
++         So this is (steps_to_skip << LOG2_BYTES_PER_STEP).
++         Then it needs further adjustment to compensate for the
++         distance between the PC value taken below (0f + PC_OFS)
++         and the first step's instructions (1f).  */
++      rsb     tmp1, tmp1, #((\steps << \log2_bytes_per_step) \
++                            + ((1f - PC_OFS - 0f) \
++                               >> (ARM_BX_ALIGN_LOG2 - \log2_bytes_per_step)))
+       /* Shifting down LOG2_BYTES_PER_STEP gives us the number of
+          steps to skip, then shifting up ARM_BX_ALIGN_LOG2 gives us
+          the (byte) distance to add to the PC.  */
+-      add     tmp1, pc, tmp1, lsl #(ARM_BX_ALIGN_LOG2 - \log2_bytes_per_step)
++0:    add     tmp1, pc, tmp1, lsl #(ARM_BX_ALIGN_LOG2 - \log2_bytes_per_step)
+       bx      tmp1
++      .p2align ARM_BX_ALIGN_LOG2
++1:
+       .endm
+ 
+       .macro dispatch_7_dword

Added: glibc-package/trunk/debian/patches/arm/cvs-memcpy-align.diff
===================================================================
--- glibc-package/trunk/debian/patches/arm/cvs-memcpy-align.diff                
                (rev 0)
+++ glibc-package/trunk/debian/patches/arm/cvs-memcpy-align.diff        
2014-04-28 02:42:40 UTC (rev 6028)
@@ -0,0 +1,67 @@
+commit cd90698b541046c22544c2c057a4676368fd1d7f
+Author: Will Newton <[email protected]>
+Date:   Wed Aug 7 14:15:52 2013 +0100
+
+    ARM: Improve armv7 memcpy performance.
+    
+    Only enter the aligned copy loop with buffers that can be 8-byte
+    aligned. This improves performance slightly on Cortex-A9 and
+    Cortex-A15 cores for large copies with buffers that are 4-byte
+    aligned but not 8-byte aligned.
+    
+    ports/ChangeLog.arm:
+    
+    2013-09-16  Will Newton  <[email protected]>
+    
+       * sysdeps/arm/armv7/multiarch/memcpy_impl.S: Tighten check
+       on entry to aligned copy loop to improve performance.
+
+2013-09-16  Will Newton  <[email protected]>
+
+       * sysdeps/arm/armv7/multiarch/memcpy_impl.S: Tighten check
+       on entry to aligned copy loop to improve performance.
+
+diff --git a/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S 
b/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S
+index 3decad6..ad43a3d 100644
+--- a/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S
++++ b/ports/sysdeps/arm/armv7/multiarch/memcpy_impl.S
+@@ -24,7 +24,6 @@
+     ARMv6 (ARMv7-a if using Neon)
+     ARM state
+     Unaligned accesses
+-    LDRD/STRD support unaligned word accesses
+ 
+  */
+ 
+@@ -369,8 +368,8 @@ ENTRY(memcpy)
+       cfi_adjust_cfa_offset (FRAME_SIZE)
+       cfi_rel_offset (tmp2, 0)
+       cfi_remember_state
+-      and     tmp2, src, #3
+-      and     tmp1, dst, #3
++      and     tmp2, src, #7
++      and     tmp1, dst, #7
+       cmp     tmp1, tmp2
+       bne     .Lcpy_notaligned
+ 
+@@ -381,9 +380,9 @@ ENTRY(memcpy)
+       vmov.f32        s0, s0
+ #endif
+ 
+-      /* SRC and DST have the same mutual 32-bit alignment, but we may
++      /* SRC and DST have the same mutual 64-bit alignment, but we may
+          still need to pre-copy some bytes to get to natural alignment.
+-         We bring DST into full 64-bit alignment.  */
++         We bring SRC and DST into full 64-bit alignment.  */
+       lsls    tmp2, dst, #29
+       beq     1f
+       rsbs    tmp2, tmp2, #0
+@@ -515,7 +514,7 @@ ENTRY(memcpy)
+ 
+ .Ltail63aligned:                      /* Count in tmp2.  */
+       /* Copy up to 7 d-words of data.  Similar to Ltail63unaligned, but
+-         we know that the src and dest are 32-bit aligned so we can use
++         we know that the src and dest are 64-bit aligned so we can use
+          LDRD/STRD to improve efficiency.  */
+       /* TMP2 is now negative, but we don't care about that.  The bottom
+          six bits still tell us how many bytes are left to copy.  */

Modified: glibc-package/trunk/debian/patches/series
===================================================================
--- glibc-package/trunk/debian/patches/series   2014-04-27 20:47:05 UTC (rev 
6027)
+++ glibc-package/trunk/debian/patches/series   2014-04-28 02:42:40 UTC (rev 
6028)
@@ -60,6 +60,8 @@
 arm/cvs-arm__longjmp-thumb.diff
 arm/cvs-arm__sigsetjmp-thumb.diff
 arm/cvs-arm-pointer-mangle-frame.diff
+arm/cvs-arm-always-blx.diff
+arm/cvs-memcpy-align.diff
 
 arm64/cvs-arm64-sigcontext.diff
 arm64/cvs-arm64-relocs.diff


-- 
To UNSUBSCRIBE, email to [email protected]
with a subject of "unsubscribe". Trouble? Contact [email protected]
Archive: https://lists.debian.org/[email protected]

Reply via email to