Re: [edk2] [PATCH 2/2] MdeModulePkg/EbcDxe: cleanup

2016-08-05 Thread Ard Biesheuvel
On 5 August 2016 at 21:24, Daniil Egranov  wrote:
> Hi Ard,
>
>
>
> On 08/01/2016 09:22 AM, Ard Biesheuvel wrote:
>>
>> - indentation
>> - premature optimization of the thunking code, i.e., align function
>> prototypes
>>so we don't have to move arguments around or duplicate them on the
>> stack,
>>and perform tail calls where possible
>> - adhere to calling convention (stack frame layout)
>> - replace instruction buffer with a fixed struct, so that we no longer
>> have
>>to traverse it to find the entry point slots
>>
>> Contributed-under: TianoCore Contribution Agreement 1.0
>> Signed-off-by: Ard Biesheuvel 
>> ---
>>   MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 188
>> ++-
>>   MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c  | 193
>> ++--
>>   2 files changed, 159 insertions(+), 222 deletions(-)
>>
>> diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
>> b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
>> index 3c1a461f5e87..d0a5a4c5a37d 100644
>> --- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
>> +++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
>> @@ -1,10 +1,12 @@
>>   ///** @file
>>   //
>> -//This code provides low level routines that support the Virtual
>> Machine
>> -//   for option ROMs.
>> +//  This code provides low level routines that support the Virtual
>> Machine
>> +//  for option ROMs.
>>   //
>> -//  Copyright (c) 2015, The Linux Foundation. All rights reserved.
>> +//  Copyright (c) 2016, Linaro, Ltd. All rights reserved.
>> +//  Copyright (c) 2015, The Linux Foundation. All rights reserved.
>>   //  Copyright (c) 2007 - 2014, Intel Corporation. All rights
>> reserved.
>> +//
>>   //  This program and the accompanying materials
>>   //  are licensed and made available under the terms and conditions of
>> the BSD License
>>   //  which accompanies this distribution.  The full text of the license
>> may be found at
>> @@ -15,9 +17,10 @@
>>   //
>>   //**/
>>   -ASM_GLOBAL ASM_PFX(CopyMem);
>> -ASM_GLOBAL ASM_PFX(EbcInterpret);
>> -ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint);
>> +ASM_GLOBAL ASM_PFX(EbcLLEbcInterpret);
>> +ASM_GLOBAL ASM_PFX(EbcLLExecuteEbcImageEntryPoint);
>> +
>> +ASM_GLOBAL ASM_PFX(mEbcInstructionBufferTemplate);
>
> The ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative); is missing here. The linking
> fails without it.
>

Thanks for the report. I had a single patch and split it into two, and
apparently, I was sloppy

>>
>> //
>>   // EbcLLCALLEX
>> @@ -30,102 +33,121 @@ ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint);
>>   //
>>
>> //
>>   // UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID
>> *FramePtr)
>> -ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative);
>>   ASM_PFX(EbcLLCALLEXNative):
>> -  stp  x19, x20, [sp, #-16]!
>> -  stp  x29, x30, [sp, #-16]!
>> -
>> -  mov  x19, x0
>> -  mov  x20, sp
>> -  sub  x2, x2, x1   // Length = NewStackPointer-FramePtr
>> -  sub  sp, sp, x2
>> -  sub  sp, sp, #64  // Make sure there is room for at least 8 args in
>> the new stack
>> -  mov  x0, sp
>> -
>> -  bl   CopyMem  // Sp, NewStackPointer, Length
>> -
>> -  ldp  x0, x1, [sp], #16
>> -  ldp  x2, x3, [sp], #16
>> -  ldp  x4, x5, [sp], #16
>> -  ldp  x6, x7, [sp], #16
>> -
>> -  blr  x19
>> -
>> -  mov  sp,  x20
>> -  ldp  x29, x30, [sp], #16
>> -  ldp  x19, x20, [sp], #16
>> -
>> -  ret
>> +mov x8, x0 // Preserve x0
>> +mov x9, x1 // Preserve x1
>> +
>> +//
>> +// If the EBC stack frame is smaller than or equal to 64 bytes, we
>> know there
>> +// are no stacked arguments #9 and beyond that we need to copy to the
>> native
>> +// stack. In this case, we can perform a tail call which is much more
>> +// efficient, since there is no need to touch the native stack at
>> all.
>> +//
>> +sub x3, x2, x1  // Length = NewStackPointer -
>> FramePtr
>> +cmp x3, #64
>> +b.gt1f
>> +
>> +adr x0, 0f
>> +sub x0, x0, x3, lsr #1
>> +br  x0
>> +
>> +ldr x7, [x9, #56]
>> +ldr x6, [x9, #48]
>> +ldr x5, [x9, #40]
>> +ldr x4, [x9, #32]
>> +ldr x3, [x9, #24]
>> +ldr x2, [x9, #16]
>> +ldr x1, [x9, #8]
>> +ldr x0, [x9]
>> +
>> +0:  br  x8
>> +
>> +//
>> +// More than 64 bytes: we need to build the full native stack frame
>> and copy
>> +// the part of the VM stack exceeding 64 bytes (which may contain
>> stacked
>> +// arguments) to the native stack
>> +//
>> +1:  stp x29, x30, [sp, #-16]!
>> +mov x29, sp
>> +
>> +//
>> +// Ensure that the stack pointer remains 16 byte aligned,
>> +// even if the 

Re: [edk2] [PATCH 2/2] MdeModulePkg/EbcDxe: cleanup

2016-08-05 Thread Daniil Egranov

Hi Ard,


On 08/01/2016 09:22 AM, Ard Biesheuvel wrote:

- indentation
- premature optimization of the thunking code, i.e., align function prototypes
   so we don't have to move arguments around or duplicate them on the stack,
   and perform tail calls where possible
- adhere to calling convention (stack frame layout)
- replace instruction buffer with a fixed struct, so that we no longer have
   to traverse it to find the entry point slots

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Ard Biesheuvel 
---
  MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 188 ++-
  MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c  | 193 ++--
  2 files changed, 159 insertions(+), 222 deletions(-)

diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S 
b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
index 3c1a461f5e87..d0a5a4c5a37d 100644
--- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
+++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
@@ -1,10 +1,12 @@
  ///** @file
  //
-//This code provides low level routines that support the Virtual Machine
-//   for option ROMs.
+//  This code provides low level routines that support the Virtual Machine
+//  for option ROMs.
  //
-//  Copyright (c) 2015, The Linux Foundation. All rights reserved.
+//  Copyright (c) 2016, Linaro, Ltd. All rights reserved.
+//  Copyright (c) 2015, The Linux Foundation. All rights reserved.
  //  Copyright (c) 2007 - 2014, Intel Corporation. All rights reserved.
+//
  //  This program and the accompanying materials
  //  are licensed and made available under the terms and conditions of the BSD 
License
  //  which accompanies this distribution.  The full text of the license may be 
found at
@@ -15,9 +17,10 @@
  //
  //**/
  
-ASM_GLOBAL ASM_PFX(CopyMem);

-ASM_GLOBAL ASM_PFX(EbcInterpret);
-ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint);
+ASM_GLOBAL ASM_PFX(EbcLLEbcInterpret);
+ASM_GLOBAL ASM_PFX(EbcLLExecuteEbcImageEntryPoint);
+
+ASM_GLOBAL ASM_PFX(mEbcInstructionBufferTemplate);
The ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative); is missing here. The linking 
fails without it.
  
  //

  // EbcLLCALLEX
@@ -30,102 +33,121 @@ ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint);
  //
  //
  // UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID 
*FramePtr)
-ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative);
  ASM_PFX(EbcLLCALLEXNative):
-  stp  x19, x20, [sp, #-16]!
-  stp  x29, x30, [sp, #-16]!
-
-  mov  x19, x0
-  mov  x20, sp
-  sub  x2, x2, x1   // Length = NewStackPointer-FramePtr
-  sub  sp, sp, x2
-  sub  sp, sp, #64  // Make sure there is room for at least 8 args in the 
new stack
-  mov  x0, sp
-
-  bl   CopyMem  // Sp, NewStackPointer, Length
-
-  ldp  x0, x1, [sp], #16
-  ldp  x2, x3, [sp], #16
-  ldp  x4, x5, [sp], #16
-  ldp  x6, x7, [sp], #16
-
-  blr  x19
-
-  mov  sp,  x20
-  ldp  x29, x30, [sp], #16
-  ldp  x19, x20, [sp], #16
-
-  ret
+mov x8, x0 // Preserve x0
+mov x9, x1 // Preserve x1
+
+//
+// If the EBC stack frame is smaller than or equal to 64 bytes, we know 
there
+// are no stacked arguments #9 and beyond that we need to copy to the 
native
+// stack. In this case, we can perform a tail call which is much more
+// efficient, since there is no need to touch the native stack at all.
+//
+sub x3, x2, x1  // Length = NewStackPointer - FramePtr
+cmp x3, #64
+b.gt1f
+
+adr x0, 0f
+sub x0, x0, x3, lsr #1
+br  x0
+
+ldr x7, [x9, #56]
+ldr x6, [x9, #48]
+ldr x5, [x9, #40]
+ldr x4, [x9, #32]
+ldr x3, [x9, #24]
+ldr x2, [x9, #16]
+ldr x1, [x9, #8]
+ldr x0, [x9]
+
+0:  br  x8
+
+//
+// More than 64 bytes: we need to build the full native stack frame and 
copy
+// the part of the VM stack exceeding 64 bytes (which may contain stacked
+// arguments) to the native stack
+//
+1:  stp x29, x30, [sp, #-16]!
+mov x29, sp
+
+//
+// Ensure that the stack pointer remains 16 byte aligned,
+// even if the size of the VM stack frame is not a multiple of 16
+//
+add x1, x1, #64 // Skip over [potential] reg params
+tbz x3, #3, 2f  // Multiple of 16?
+ldr x4, [x2, #-8]!  // No? Then push one word
+str x4, [sp, #-16]! // ... but use two slots
+b   3f
+
+2:  ldp x4, x5, [x2, #-16]!
+stp x4, x5, [sp, #-16]!
+3:  cmp x2, x1
+b.gt2b
+
+ldp x0, x1, [x9]
+ldp x2, x3, [x9, #16]
+ldp x4, x5, [x9, #32]
+ldp x6, x7, [x9, #48]
+
+blr x8
+
+

[edk2] [PATCH 2/2] MdeModulePkg/EbcDxe: cleanup

2016-08-01 Thread Ard Biesheuvel
- indentation
- premature optimization of the thunking code, i.e., align function prototypes
  so we don't have to move arguments around or duplicate them on the stack,
  and perform tail calls where possible
- adhere to calling convention (stack frame layout)
- replace instruction buffer with a fixed struct, so that we no longer have
  to traverse it to find the entry point slots

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Ard Biesheuvel 
---
 MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 188 ++-
 MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c  | 193 ++--
 2 files changed, 159 insertions(+), 222 deletions(-)

diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S 
b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
index 3c1a461f5e87..d0a5a4c5a37d 100644
--- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
+++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
@@ -1,10 +1,12 @@
 ///** @file
 //
-//This code provides low level routines that support the Virtual Machine
-//   for option ROMs.
+//  This code provides low level routines that support the Virtual Machine
+//  for option ROMs.
 //
-//  Copyright (c) 2015, The Linux Foundation. All rights reserved.
+//  Copyright (c) 2016, Linaro, Ltd. All rights reserved.
+//  Copyright (c) 2015, The Linux Foundation. All rights reserved.
 //  Copyright (c) 2007 - 2014, Intel Corporation. All rights reserved.
+//
 //  This program and the accompanying materials
 //  are licensed and made available under the terms and conditions of the BSD 
License
 //  which accompanies this distribution.  The full text of the license may be 
found at
@@ -15,9 +17,10 @@
 //
 //**/
 
-ASM_GLOBAL ASM_PFX(CopyMem);
-ASM_GLOBAL ASM_PFX(EbcInterpret);
-ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint);
+ASM_GLOBAL ASM_PFX(EbcLLEbcInterpret);
+ASM_GLOBAL ASM_PFX(EbcLLExecuteEbcImageEntryPoint);
+
+ASM_GLOBAL ASM_PFX(mEbcInstructionBufferTemplate);
 
 //
 // EbcLLCALLEX
@@ -30,102 +33,121 @@ ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint);
 //
 //
 // UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID 
*FramePtr)
-ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative);
 ASM_PFX(EbcLLCALLEXNative):
-  stp  x19, x20, [sp, #-16]!
-  stp  x29, x30, [sp, #-16]!
-
-  mov  x19, x0
-  mov  x20, sp
-  sub  x2, x2, x1   // Length = NewStackPointer-FramePtr
-  sub  sp, sp, x2
-  sub  sp, sp, #64  // Make sure there is room for at least 8 args in the 
new stack
-  mov  x0, sp
-
-  bl   CopyMem  // Sp, NewStackPointer, Length
-
-  ldp  x0, x1, [sp], #16
-  ldp  x2, x3, [sp], #16
-  ldp  x4, x5, [sp], #16
-  ldp  x6, x7, [sp], #16
-
-  blr  x19
-
-  mov  sp,  x20
-  ldp  x29, x30, [sp], #16
-  ldp  x19, x20, [sp], #16
-
-  ret
+mov x8, x0 // Preserve x0
+mov x9, x1 // Preserve x1
+
+//
+// If the EBC stack frame is smaller than or equal to 64 bytes, we know 
there
+// are no stacked arguments #9 and beyond that we need to copy to the 
native
+// stack. In this case, we can perform a tail call which is much more
+// efficient, since there is no need to touch the native stack at all.
+//
+sub x3, x2, x1  // Length = NewStackPointer - FramePtr
+cmp x3, #64
+b.gt1f
+
+adr x0, 0f
+sub x0, x0, x3, lsr #1
+br  x0
+
+ldr x7, [x9, #56]
+ldr x6, [x9, #48]
+ldr x5, [x9, #40]
+ldr x4, [x9, #32]
+ldr x3, [x9, #24]
+ldr x2, [x9, #16]
+ldr x1, [x9, #8]
+ldr x0, [x9]
+
+0:  br  x8
+
+//
+// More than 64 bytes: we need to build the full native stack frame and 
copy
+// the part of the VM stack exceeding 64 bytes (which may contain stacked
+// arguments) to the native stack
+//
+1:  stp x29, x30, [sp, #-16]!
+mov x29, sp
+
+//
+// Ensure that the stack pointer remains 16 byte aligned,
+// even if the size of the VM stack frame is not a multiple of 16
+//
+add x1, x1, #64 // Skip over [potential] reg params
+tbz x3, #3, 2f  // Multiple of 16?
+ldr x4, [x2, #-8]!  // No? Then push one word
+str x4, [sp, #-16]! // ... but use two slots
+b   3f
+
+2:  ldp x4, x5, [x2, #-16]!
+stp x4, x5, [sp, #-16]!
+3:  cmp x2, x1
+b.gt2b
+
+ldp x0, x1, [x9]
+ldp x2, x3, [x9, #16]
+ldp x4, x5, [x9, #32]
+ldp x6, x7, [x9, #48]
+
+blr x8
+
+mov sp, x29
+ldp x29, x30, [sp], #16
+ret
 
 //
 // EbcLLEbcInterpret
 //
 // This