Re: [edk2] [PATCH 2/2] MdeModulePkg/EbcDxe: cleanup
On 5 August 2016 at 21:24, Daniil Egranovwrote: > Hi Ard, > > > > On 08/01/2016 09:22 AM, Ard Biesheuvel wrote: >> >> - indentation >> - premature optimization of the thunking code, i.e., align function >> prototypes >>so we don't have to move arguments around or duplicate them on the >> stack, >>and perform tail calls where possible >> - adhere to calling convention (stack frame layout) >> - replace instruction buffer with a fixed struct, so that we no longer >> have >>to traverse it to find the entry point slots >> >> Contributed-under: TianoCore Contribution Agreement 1.0 >> Signed-off-by: Ard Biesheuvel >> --- >> MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 188 >> ++- >> MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c | 193 >> ++-- >> 2 files changed, 159 insertions(+), 222 deletions(-) >> >> diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S >> b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S >> index 3c1a461f5e87..d0a5a4c5a37d 100644 >> --- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S >> +++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S >> @@ -1,10 +1,12 @@ >> ///** @file >> // >> -//This code provides low level routines that support the Virtual >> Machine >> -// for option ROMs. >> +// This code provides low level routines that support the Virtual >> Machine >> +// for option ROMs. >> // >> -// Copyright (c) 2015, The Linux Foundation. All rights reserved. >> +// Copyright (c) 2016, Linaro, Ltd. All rights reserved. >> +// Copyright (c) 2015, The Linux Foundation. All rights reserved. >> // Copyright (c) 2007 - 2014, Intel Corporation. All rights >> reserved. >> +// >> // This program and the accompanying materials >> // are licensed and made available under the terms and conditions of >> the BSD License >> // which accompanies this distribution. The full text of the license >> may be found at >> @@ -15,9 +17,10 @@ >> // >> //**/ >> -ASM_GLOBAL ASM_PFX(CopyMem); >> -ASM_GLOBAL ASM_PFX(EbcInterpret); >> -ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint); >> +ASM_GLOBAL ASM_PFX(EbcLLEbcInterpret); >> +ASM_GLOBAL ASM_PFX(EbcLLExecuteEbcImageEntryPoint); >> + >> +ASM_GLOBAL ASM_PFX(mEbcInstructionBufferTemplate); > > The ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative); is missing here. The linking > fails without it. > Thanks for the report. I had a single patch and split it into two, and apparently, I was sloppy >> >> // >> // EbcLLCALLEX >> @@ -30,102 +33,121 @@ ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint); >> // >> >> // >> // UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID >> *FramePtr) >> -ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative); >> ASM_PFX(EbcLLCALLEXNative): >> - stp x19, x20, [sp, #-16]! >> - stp x29, x30, [sp, #-16]! >> - >> - mov x19, x0 >> - mov x20, sp >> - sub x2, x2, x1 // Length = NewStackPointer-FramePtr >> - sub sp, sp, x2 >> - sub sp, sp, #64 // Make sure there is room for at least 8 args in >> the new stack >> - mov x0, sp >> - >> - bl CopyMem // Sp, NewStackPointer, Length >> - >> - ldp x0, x1, [sp], #16 >> - ldp x2, x3, [sp], #16 >> - ldp x4, x5, [sp], #16 >> - ldp x6, x7, [sp], #16 >> - >> - blr x19 >> - >> - mov sp, x20 >> - ldp x29, x30, [sp], #16 >> - ldp x19, x20, [sp], #16 >> - >> - ret >> +mov x8, x0 // Preserve x0 >> +mov x9, x1 // Preserve x1 >> + >> +// >> +// If the EBC stack frame is smaller than or equal to 64 bytes, we >> know there >> +// are no stacked arguments #9 and beyond that we need to copy to the >> native >> +// stack. In this case, we can perform a tail call which is much more >> +// efficient, since there is no need to touch the native stack at >> all. >> +// >> +sub x3, x2, x1 // Length = NewStackPointer - >> FramePtr >> +cmp x3, #64 >> +b.gt1f >> + >> +adr x0, 0f >> +sub x0, x0, x3, lsr #1 >> +br x0 >> + >> +ldr x7, [x9, #56] >> +ldr x6, [x9, #48] >> +ldr x5, [x9, #40] >> +ldr x4, [x9, #32] >> +ldr x3, [x9, #24] >> +ldr x2, [x9, #16] >> +ldr x1, [x9, #8] >> +ldr x0, [x9] >> + >> +0: br x8 >> + >> +// >> +// More than 64 bytes: we need to build the full native stack frame >> and copy >> +// the part of the VM stack exceeding 64 bytes (which may contain >> stacked >> +// arguments) to the native stack >> +// >> +1: stp x29, x30, [sp, #-16]! >> +mov x29, sp >> + >> +// >> +// Ensure that the stack pointer remains 16 byte aligned, >> +// even if the
Re: [edk2] [PATCH 2/2] MdeModulePkg/EbcDxe: cleanup
Hi Ard, On 08/01/2016 09:22 AM, Ard Biesheuvel wrote: - indentation - premature optimization of the thunking code, i.e., align function prototypes so we don't have to move arguments around or duplicate them on the stack, and perform tail calls where possible - adhere to calling convention (stack frame layout) - replace instruction buffer with a fixed struct, so that we no longer have to traverse it to find the entry point slots Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Ard Biesheuvel--- MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 188 ++- MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c | 193 ++-- 2 files changed, 159 insertions(+), 222 deletions(-) diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S index 3c1a461f5e87..d0a5a4c5a37d 100644 --- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S +++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S @@ -1,10 +1,12 @@ ///** @file // -//This code provides low level routines that support the Virtual Machine -// for option ROMs. +// This code provides low level routines that support the Virtual Machine +// for option ROMs. // -// Copyright (c) 2015, The Linux Foundation. All rights reserved. +// Copyright (c) 2016, Linaro, Ltd. All rights reserved. +// Copyright (c) 2015, The Linux Foundation. All rights reserved. // Copyright (c) 2007 - 2014, Intel Corporation. All rights reserved. +// // This program and the accompanying materials // are licensed and made available under the terms and conditions of the BSD License // which accompanies this distribution. The full text of the license may be found at @@ -15,9 +17,10 @@ // //**/ -ASM_GLOBAL ASM_PFX(CopyMem); -ASM_GLOBAL ASM_PFX(EbcInterpret); -ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint); +ASM_GLOBAL ASM_PFX(EbcLLEbcInterpret); +ASM_GLOBAL ASM_PFX(EbcLLExecuteEbcImageEntryPoint); + +ASM_GLOBAL ASM_PFX(mEbcInstructionBufferTemplate); The ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative); is missing here. The linking fails without it. // // EbcLLCALLEX @@ -30,102 +33,121 @@ ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint); // // // UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID *FramePtr) -ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative); ASM_PFX(EbcLLCALLEXNative): - stp x19, x20, [sp, #-16]! - stp x29, x30, [sp, #-16]! - - mov x19, x0 - mov x20, sp - sub x2, x2, x1 // Length = NewStackPointer-FramePtr - sub sp, sp, x2 - sub sp, sp, #64 // Make sure there is room for at least 8 args in the new stack - mov x0, sp - - bl CopyMem // Sp, NewStackPointer, Length - - ldp x0, x1, [sp], #16 - ldp x2, x3, [sp], #16 - ldp x4, x5, [sp], #16 - ldp x6, x7, [sp], #16 - - blr x19 - - mov sp, x20 - ldp x29, x30, [sp], #16 - ldp x19, x20, [sp], #16 - - ret +mov x8, x0 // Preserve x0 +mov x9, x1 // Preserve x1 + +// +// If the EBC stack frame is smaller than or equal to 64 bytes, we know there +// are no stacked arguments #9 and beyond that we need to copy to the native +// stack. In this case, we can perform a tail call which is much more +// efficient, since there is no need to touch the native stack at all. +// +sub x3, x2, x1 // Length = NewStackPointer - FramePtr +cmp x3, #64 +b.gt1f + +adr x0, 0f +sub x0, x0, x3, lsr #1 +br x0 + +ldr x7, [x9, #56] +ldr x6, [x9, #48] +ldr x5, [x9, #40] +ldr x4, [x9, #32] +ldr x3, [x9, #24] +ldr x2, [x9, #16] +ldr x1, [x9, #8] +ldr x0, [x9] + +0: br x8 + +// +// More than 64 bytes: we need to build the full native stack frame and copy +// the part of the VM stack exceeding 64 bytes (which may contain stacked +// arguments) to the native stack +// +1: stp x29, x30, [sp, #-16]! +mov x29, sp + +// +// Ensure that the stack pointer remains 16 byte aligned, +// even if the size of the VM stack frame is not a multiple of 16 +// +add x1, x1, #64 // Skip over [potential] reg params +tbz x3, #3, 2f // Multiple of 16? +ldr x4, [x2, #-8]! // No? Then push one word +str x4, [sp, #-16]! // ... but use two slots +b 3f + +2: ldp x4, x5, [x2, #-16]! +stp x4, x5, [sp, #-16]! +3: cmp x2, x1 +b.gt2b + +ldp x0, x1, [x9] +ldp x2, x3, [x9, #16] +ldp x4, x5, [x9, #32] +ldp x6, x7, [x9, #48] + +blr x8 + +
[edk2] [PATCH 2/2] MdeModulePkg/EbcDxe: cleanup
- indentation - premature optimization of the thunking code, i.e., align function prototypes so we don't have to move arguments around or duplicate them on the stack, and perform tail calls where possible - adhere to calling convention (stack frame layout) - replace instruction buffer with a fixed struct, so that we no longer have to traverse it to find the entry point slots Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Ard Biesheuvel--- MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 188 ++- MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c | 193 ++-- 2 files changed, 159 insertions(+), 222 deletions(-) diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S index 3c1a461f5e87..d0a5a4c5a37d 100644 --- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S +++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S @@ -1,10 +1,12 @@ ///** @file // -//This code provides low level routines that support the Virtual Machine -// for option ROMs. +// This code provides low level routines that support the Virtual Machine +// for option ROMs. // -// Copyright (c) 2015, The Linux Foundation. All rights reserved. +// Copyright (c) 2016, Linaro, Ltd. All rights reserved. +// Copyright (c) 2015, The Linux Foundation. All rights reserved. // Copyright (c) 2007 - 2014, Intel Corporation. All rights reserved. +// // This program and the accompanying materials // are licensed and made available under the terms and conditions of the BSD License // which accompanies this distribution. The full text of the license may be found at @@ -15,9 +17,10 @@ // //**/ -ASM_GLOBAL ASM_PFX(CopyMem); -ASM_GLOBAL ASM_PFX(EbcInterpret); -ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint); +ASM_GLOBAL ASM_PFX(EbcLLEbcInterpret); +ASM_GLOBAL ASM_PFX(EbcLLExecuteEbcImageEntryPoint); + +ASM_GLOBAL ASM_PFX(mEbcInstructionBufferTemplate); // // EbcLLCALLEX @@ -30,102 +33,121 @@ ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint); // // // UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID *FramePtr) -ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative); ASM_PFX(EbcLLCALLEXNative): - stp x19, x20, [sp, #-16]! - stp x29, x30, [sp, #-16]! - - mov x19, x0 - mov x20, sp - sub x2, x2, x1 // Length = NewStackPointer-FramePtr - sub sp, sp, x2 - sub sp, sp, #64 // Make sure there is room for at least 8 args in the new stack - mov x0, sp - - bl CopyMem // Sp, NewStackPointer, Length - - ldp x0, x1, [sp], #16 - ldp x2, x3, [sp], #16 - ldp x4, x5, [sp], #16 - ldp x6, x7, [sp], #16 - - blr x19 - - mov sp, x20 - ldp x29, x30, [sp], #16 - ldp x19, x20, [sp], #16 - - ret +mov x8, x0 // Preserve x0 +mov x9, x1 // Preserve x1 + +// +// If the EBC stack frame is smaller than or equal to 64 bytes, we know there +// are no stacked arguments #9 and beyond that we need to copy to the native +// stack. In this case, we can perform a tail call which is much more +// efficient, since there is no need to touch the native stack at all. +// +sub x3, x2, x1 // Length = NewStackPointer - FramePtr +cmp x3, #64 +b.gt1f + +adr x0, 0f +sub x0, x0, x3, lsr #1 +br x0 + +ldr x7, [x9, #56] +ldr x6, [x9, #48] +ldr x5, [x9, #40] +ldr x4, [x9, #32] +ldr x3, [x9, #24] +ldr x2, [x9, #16] +ldr x1, [x9, #8] +ldr x0, [x9] + +0: br x8 + +// +// More than 64 bytes: we need to build the full native stack frame and copy +// the part of the VM stack exceeding 64 bytes (which may contain stacked +// arguments) to the native stack +// +1: stp x29, x30, [sp, #-16]! +mov x29, sp + +// +// Ensure that the stack pointer remains 16 byte aligned, +// even if the size of the VM stack frame is not a multiple of 16 +// +add x1, x1, #64 // Skip over [potential] reg params +tbz x3, #3, 2f // Multiple of 16? +ldr x4, [x2, #-8]! // No? Then push one word +str x4, [sp, #-16]! // ... but use two slots +b 3f + +2: ldp x4, x5, [x2, #-16]! +stp x4, x5, [sp, #-16]! +3: cmp x2, x1 +b.gt2b + +ldp x0, x1, [x9] +ldp x2, x3, [x9, #16] +ldp x4, x5, [x9, #32] +ldp x6, x7, [x9, #48] + +blr x8 + +mov sp, x29 +ldp x29, x30, [sp], #16 +ret // // EbcLLEbcInterpret // // This