On 7 September 2016 at 15:52, Ard Biesheuvel <[email protected]> wrote: > This adds ARM support to BaseMemoryLibOptDxe, partially based on the > cortex-strings library (ScanMem) and the existing CopyMem() implementation > from BaseMemoryLibStm in ArmPkg. > > All string routines are accelerated except ScanMem16, ScanMem32, > ScanMem64 and IsZeroBuffer, which can wait for another day. (Very few > occurrences exist in the codebase) > > Contributed-under: TianoCore Contribution Agreement 1.0 > Signed-off-by: Ard Biesheuvel <[email protected]> > Reviewed-by: Liming Gao <[email protected]> > --- > MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S | 138 > ++++++++++++++++ > MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm | 140 > ++++++++++++++++ > MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S | 172 > ++++++++++++++++++++ > MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm | 147 > +++++++++++++++++ > MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.S | 146 > +++++++++++++++++ > MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.asm | 147 > +++++++++++++++++ > MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMemGeneric.c | 142 > ++++++++++++++++ > MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S | 75 +++++++++ > MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm | 81 +++++++++ > MdePkg/Library/BaseMemoryLibOptDxe/BaseMemoryLibOptDxe.inf | 30 ++-- > 10 files changed, 1204 insertions(+), 14 deletions(-) > [..] > diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S > b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S > new file mode 100644 > index 000000000000..914fdd60ea52 > --- /dev/null > +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S > @@ -0,0 +1,75 @@ > +#------------------------------------------------------------------------------ > +# > +# Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR> > +# > +# This program and the accompanying materials are licensed and made available > +# under the terms and conditions of the BSD License which accompanies this > +# distribution. The full text of the license may be found at > +# http://opensource.org/licenses/bsd-license.php > +# > +# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, > +# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR > IMPLIED. > +# > +#------------------------------------------------------------------------------ > + > + .text > + .thumb > + .syntax unified > + > +ASM_GLOBAL ASM_PFX(InternalMemZeroMem) > +ASM_PFX(InternalMemZeroMem): > + movs r2, #0 > + > +ASM_GLOBAL ASM_PFX(InternalMemSetMem) > +ASM_PFX(InternalMemSetMem): > + uxtb r2, r2 > + orr r2, r2, r2, lsl #8 > + > +ASM_GLOBAL ASM_PFX(InternalMemSetMem16) > +ASM_PFX(InternalMemSetMem16): > + uxth r2, r2 > + orr r2, r2, r2, lsl #16 > + > +ASM_GLOBAL ASM_PFX(InternalMemSetMem32) > +ASM_PFX(InternalMemSetMem32): > + mov r3, r2 > + > +ASM_GLOBAL ASM_PFX(InternalMemSetMem64) > +ASM_PFX(InternalMemSetMem64): > + push {r0, lr} > + add ip, r0, r1 // ip := dst + length > + adds r0, r0, #16 // advance the output pointer by 16 bytes > + cmp r1, #16 // fewer than 16 bytes of input? > + blt 2f > + > + str r2, [r0, #-16] // potentially unaligned store of 4 bytes > + str r3, [r0, #-12] // potentially unaligned store of 4 bytes > + str r2, [r0, #-8] // potentially unaligned store of 4 bytes > + str r3, [r0, #-4] // potentially unaligned store of 4 bytes > + bic r0, r0, #15 // align output pointer > + beq 1f > + > +0: adds r0, r0, #16 // advance the output pointer by 16 bytes > + subs r1, ip, r0 // past the output? > + blt 2f // break out of the loop > + strd r2, r3, [r0, #-16] // aligned store of 16 bytes > + strd r2, r3, [r0, #-8] > + bne 0b // goto beginning of loop > +1: pop {r0, pc} > + > +2: and r1, r1, #0xf > + cmp r1, #0x4 // between 4 and 15 bytes? > + blt 3f > + cmp r1, #0x8 // between 8 and 15 bytes? > + str r2, [r0, #-16] // overlapping store of 4 + (4 + 4) + 4 > bytes > + itt ge > + strge r3, [r0, #-12] > + strge r2, [ip, #-8] > + str r3, [ip, #-4] > + pop {r0, pc} > + > +3: cmp r1, #2 // 2 or 3 bytes? > + strb r2, [r0, #-16] // store 1 byte > + it ge > + strhge r2, [ip, #-2] // store 2 bytes > + pop {r0, pc} > diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm > b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm > new file mode 100644 > index 000000000000..14fecd93a96c > --- /dev/null > +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm > @@ -0,0 +1,81 @@ > +;------------------------------------------------------------------------------ > +; > +; Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR> > +; > +; This program and the accompanying materials are licensed and made available > +; under the terms and conditions of the BSD License which accompanies this > +; distribution. The full text of the license may be found at > +; http://opensource.org/licenses/bsd-license.php > +; > +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, > +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR > IMPLIED. > +; > +;------------------------------------------------------------------------------ > + > + EXPORT InternalMemZeroMem > + EXPORT InternalMemSetMem > + EXPORT InternalMemSetMem16 > + EXPORT InternalMemSetMem32 > + EXPORT InternalMemSetMem64 > + > + AREA SetMem, CODE, READONLY > + THUMB > + > +InternalMemZeroMem > + movs r2, #0 > + > +InternalMemSetMem > + uxtb r2, r2 > + orr r2, r2, r2, lsl #8 > + > +InternalMemSetMem16 > + uxth r2, r2 > + orr r2, r2, r2, lsr #16 > + > +InternalMemSetMem32 > + mov r3, r2 > + > +InternalMemSetMem64 > + push {r0, lr} > + add ip, r0, r1 ; ip := dst + length > + adds r0, r0, #16 ; advance the output pointer by 16 bytes > + cmp r1, #16 ; fewer than 16 bytes of input? > + blt L2 > + > + str r2, [r0, #-16] ; potentially unaligned store of 4 bytes > + str r3, [r0, #-12] ; potentially unaligned store of 4 bytes > + str r2, [r0, #-8] ; potentially unaligned store of 4 bytes > + str r3, [r0, #-4] ; potentially unaligned store of 4 bytes > + bic r0, r0, #15 ; align output pointer > + beq L1 > + > +L0 > + adds r0, r0, #16 ; advance the output pointer by 16 bytes > + subs r1, ip, r0 ; past the output? > + blt L2 ; break out of the loop > + strd r2, r3, [r0, #-16] ; aligned store of 16 bytes > + strd r2, r3, [r0, #-8] > + bne L0 ; goto beginning of loop > +L1 > + pop {r0, pc} > + > +L2 > + and r1, r1, #0xf > + cmp r1, #0x4 ; between 4 and 15 bytes? > + blt L3 > + cmp r1, #0x8 ; between 8 and 15 bytes? > + str r2, [r0, #-16] ; overlapping store of 4 + (4 + 4) + 4 > bytes > + itt ge > + strge r3, [r0, #-12] > + strge r2, [ip, #-8]
This could be changed to 'gt' in all three instructions above, while keeping the same functionality. I can change that before committing -- Ard. _______________________________________________ edk2-devel mailing list [email protected] https://lists.01.org/mailman/listinfo/edk2-devel

