On 7 September 2016 at 15:52, Ard Biesheuvel <[email protected]> wrote:
> This adds ARM support to BaseMemoryLibOptDxe, partially based on the
> cortex-strings library (ScanMem) and the existing CopyMem() implementation
> from BaseMemoryLibStm in ArmPkg.
>
> All string routines are accelerated except ScanMem16, ScanMem32,
> ScanMem64 and IsZeroBuffer, which can wait for another day. (Very few
> occurrences exist in the codebase)
>
> Contributed-under: TianoCore Contribution Agreement 1.0
> Signed-off-by: Ard Biesheuvel <[email protected]>
> Reviewed-by: Liming Gao <[email protected]>
> ---
>  MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S        | 138 
> ++++++++++++++++
>  MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm      | 140 
> ++++++++++++++++
>  MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S           | 172 
> ++++++++++++++++++++
>  MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm         | 147 
> +++++++++++++++++
>  MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.S           | 146 
> +++++++++++++++++
>  MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.asm         | 147 
> +++++++++++++++++
>  MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMemGeneric.c    | 142 
> ++++++++++++++++
>  MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S            |  75 +++++++++
>  MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm          |  81 +++++++++
>  MdePkg/Library/BaseMemoryLibOptDxe/BaseMemoryLibOptDxe.inf |  30 ++--
>  10 files changed, 1204 insertions(+), 14 deletions(-)
>
[..]
> diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S 
> b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S
> new file mode 100644
> index 000000000000..914fdd60ea52
> --- /dev/null
> +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S
> @@ -0,0 +1,75 @@
> +#------------------------------------------------------------------------------
> +#
> +# Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
> +#
> +# This program and the accompanying materials are licensed and made available
> +# under the terms and conditions of the BSD License which accompanies this
> +# distribution.  The full text of the license may be found at
> +# http://opensource.org/licenses/bsd-license.php
> +#
> +# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
> +# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR 
> IMPLIED.
> +#
> +#------------------------------------------------------------------------------
> +
> +    .text
> +    .thumb
> +    .syntax unified
> +
> +ASM_GLOBAL ASM_PFX(InternalMemZeroMem)
> +ASM_PFX(InternalMemZeroMem):
> +    movs    r2, #0
> +
> +ASM_GLOBAL ASM_PFX(InternalMemSetMem)
> +ASM_PFX(InternalMemSetMem):
> +    uxtb    r2, r2
> +    orr     r2, r2, r2, lsl #8
> +
> +ASM_GLOBAL ASM_PFX(InternalMemSetMem16)
> +ASM_PFX(InternalMemSetMem16):
> +    uxth    r2, r2
> +    orr     r2, r2, r2, lsl #16
> +
> +ASM_GLOBAL ASM_PFX(InternalMemSetMem32)
> +ASM_PFX(InternalMemSetMem32):
> +    mov     r3, r2
> +
> +ASM_GLOBAL ASM_PFX(InternalMemSetMem64)
> +ASM_PFX(InternalMemSetMem64):
> +    push    {r0, lr}
> +    add     ip, r0, r1              // ip := dst + length
> +    adds    r0, r0, #16             // advance the output pointer by 16 bytes
> +    cmp     r1, #16                 // fewer than 16 bytes of input?
> +    blt     2f
> +
> +    str     r2, [r0, #-16]          // potentially unaligned store of 4 bytes
> +    str     r3, [r0, #-12]          // potentially unaligned store of 4 bytes
> +    str     r2, [r0, #-8]           // potentially unaligned store of 4 bytes
> +    str     r3, [r0, #-4]           // potentially unaligned store of 4 bytes
> +    bic     r0, r0, #15             // align output pointer
> +    beq     1f
> +
> +0:  adds    r0, r0, #16             // advance the output pointer by 16 bytes
> +    subs    r1, ip, r0              // past the output?
> +    blt     2f                      // break out of the loop
> +    strd    r2, r3, [r0, #-16]      // aligned store of 16 bytes
> +    strd    r2, r3, [r0, #-8]
> +    bne     0b                      // goto beginning of loop
> +1:  pop     {r0, pc}
> +
> +2:  and     r1, r1, #0xf
> +    cmp     r1, #0x4                // between 4 and 15 bytes?
> +    blt     3f
> +    cmp     r1, #0x8                // between 8 and 15 bytes?
> +    str     r2, [r0, #-16]          // overlapping store of 4 + (4 + 4) + 4 
> bytes
> +    itt     ge
> +    strge   r3, [r0, #-12]
> +    strge   r2, [ip, #-8]
> +    str     r3, [ip, #-4]
> +    pop     {r0, pc}
> +
> +3:  cmp     r1, #2                  // 2 or 3 bytes?
> +    strb    r2, [r0, #-16]          // store 1 byte
> +    it      ge
> +    strhge  r2, [ip, #-2]           // store 2 bytes
> +    pop     {r0, pc}
> diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm 
> b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm
> new file mode 100644
> index 000000000000..14fecd93a96c
> --- /dev/null
> +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.asm
> @@ -0,0 +1,81 @@
> +;------------------------------------------------------------------------------
> +;
> +; Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
> +;
> +; This program and the accompanying materials are licensed and made available
> +; under the terms and conditions of the BSD License which accompanies this
> +; distribution.  The full text of the license may be found at
> +; http://opensource.org/licenses/bsd-license.php
> +;
> +; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
> +; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR 
> IMPLIED.
> +;
> +;------------------------------------------------------------------------------
> +
> +    EXPORT  InternalMemZeroMem
> +    EXPORT  InternalMemSetMem
> +    EXPORT  InternalMemSetMem16
> +    EXPORT  InternalMemSetMem32
> +    EXPORT  InternalMemSetMem64
> +
> +    AREA    SetMem, CODE, READONLY
> +    THUMB
> +
> +InternalMemZeroMem
> +    movs    r2, #0
> +
> +InternalMemSetMem
> +    uxtb    r2, r2
> +    orr     r2, r2, r2, lsl #8
> +
> +InternalMemSetMem16
> +    uxth    r2, r2
> +    orr     r2, r2, r2, lsr #16
> +
> +InternalMemSetMem32
> +    mov     r3, r2
> +
> +InternalMemSetMem64
> +    push    {r0, lr}
> +    add     ip, r0, r1              ; ip := dst + length
> +    adds    r0, r0, #16             ; advance the output pointer by 16 bytes
> +    cmp     r1, #16                 ; fewer than 16 bytes of input?
> +    blt     L2
> +
> +    str     r2, [r0, #-16]          ; potentially unaligned store of 4 bytes
> +    str     r3, [r0, #-12]          ; potentially unaligned store of 4 bytes
> +    str     r2, [r0, #-8]           ; potentially unaligned store of 4 bytes
> +    str     r3, [r0, #-4]           ; potentially unaligned store of 4 bytes
> +    bic     r0, r0, #15             ; align output pointer
> +    beq     L1
> +
> +L0
> +    adds    r0, r0, #16             ; advance the output pointer by 16 bytes
> +    subs    r1, ip, r0              ; past the output?
> +    blt     L2                      ; break out of the loop
> +    strd    r2, r3, [r0, #-16]      ; aligned store of 16 bytes
> +    strd    r2, r3, [r0, #-8]
> +    bne     L0                      ; goto beginning of loop
> +L1
> +    pop     {r0, pc}
> +
> +L2
> +    and     r1, r1, #0xf
> +    cmp     r1, #0x4                ; between 4 and 15 bytes?
> +    blt     L3
> +    cmp     r1, #0x8                ; between 8 and 15 bytes?
> +    str     r2, [r0, #-16]          ; overlapping store of 4 + (4 + 4) + 4 
> bytes
> +    itt     ge
> +    strge   r3, [r0, #-12]
> +    strge   r2, [ip, #-8]

This could be changed to 'gt' in all three instructions above, while
keeping the same functionality. I can change that before committing

-- 
Ard.
_______________________________________________
edk2-devel mailing list
[email protected]
https://lists.01.org/mailman/listinfo/edk2-devel

Reply via email to