Add module that initializes a CPU for the SMM environment and
installs the first level SMI handler.  This module along with the
SMM IPL and SMM Core provide the services required for
DXE_SMM_DRIVERS to register hardware and software SMI handlers.

CPU specific features are abstracted through the SmmCpuFeaturesLib

Platform specific features are abstracted through the
SmmCpuPlatformHookLib

Several PCDs are added to enable/disable features and configure
settings for the PiSmmCpuDxeSmm module

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Michael Kinney <[email protected]>
---
 UefiCpuPkg/PiSmmCpuDxeSmm/X64/MpFuncs.S        | 204 ++++++++
 UefiCpuPkg/PiSmmCpuDxeSmm/X64/MpFuncs.asm      | 206 ++++++++
 UefiCpuPkg/PiSmmCpuDxeSmm/X64/PageTbl.c        | 692 +++++++++++++++++++++++++
 UefiCpuPkg/PiSmmCpuDxeSmm/X64/Semaphore.c      |  64 +++
 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiEntry.S       | 217 ++++++++
 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiEntry.asm     | 221 ++++++++
 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiException.S   | 610 ++++++++++++++++++++++
 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiException.asm | 413 +++++++++++++++
 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmInit.S        | 141 +++++
 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmInit.asm      | 132 +++++
 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmProfileArch.c | 316 +++++++++++
 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmProfileArch.h | 105 ++++
 12 files changed, 3321 insertions(+)
 create mode 100644 UefiCpuPkg/PiSmmCpuDxeSmm/X64/MpFuncs.S
 create mode 100644 UefiCpuPkg/PiSmmCpuDxeSmm/X64/MpFuncs.asm
 create mode 100644 UefiCpuPkg/PiSmmCpuDxeSmm/X64/PageTbl.c
 create mode 100644 UefiCpuPkg/PiSmmCpuDxeSmm/X64/Semaphore.c
 create mode 100644 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiEntry.S
 create mode 100644 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiEntry.asm
 create mode 100644 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiException.S
 create mode 100644 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiException.asm
 create mode 100644 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmInit.S
 create mode 100644 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmInit.asm
 create mode 100644 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmProfileArch.c
 create mode 100644 UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmProfileArch.h

diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/X64/MpFuncs.S 
b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/MpFuncs.S
new file mode 100644
index 0000000..d7cbc8c
--- /dev/null
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/MpFuncs.S
@@ -0,0 +1,204 @@
+#------------------------------------------------------------------------------
+#
+# Copyright (c) 2006 - 2015, Intel Corporation. All rights reserved.<BR>
+# This program and the accompanying materials
+# are licensed and made available under the terms and conditions of the BSD 
License
+# which accompanies this distribution.  The full text of the license may be 
found at
+# http://opensource.org/licenses/bsd-license.php.
+#
+# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+#
+# Module Name:
+#
+#   MpFuncs.S
+#
+# Abstract:
+#
+#   This is the assembly code for Multi-processor S3 support
+#
+#------------------------------------------------------------------------------
+
+.equ                   VacantFlag,       0x0
+.equ                   NotVacantFlag,    0xff
+
+.equ                   LockLocation,              RendezvousFunnelProcEnd - 
RendezvousFunnelProcStart
+.equ                   StackStartAddressLocation, RendezvousFunnelProcEnd - 
RendezvousFunnelProcStart + 0x08
+.equ                   StackSizeLocation,         RendezvousFunnelProcEnd - 
RendezvousFunnelProcStart + 0x10
+.equ                   CProcedureLocation,        RendezvousFunnelProcEnd - 
RendezvousFunnelProcStart + 0x18
+.equ                   GdtrLocation,              RendezvousFunnelProcEnd - 
RendezvousFunnelProcStart + 0x20
+.equ                   IdtrLocation,              RendezvousFunnelProcEnd - 
RendezvousFunnelProcStart + 0x2A
+.equ                   BufferStartLocation,       RendezvousFunnelProcEnd - 
RendezvousFunnelProcStart + 0x34
+.equ                   Cr3OffsetLocation,         RendezvousFunnelProcEnd - 
RendezvousFunnelProcStart + 0x38
+
+#-------------------------------------------------------------------------------------
+#RendezvousFunnelProc  procedure follows. All APs execute their procedure. This
+#procedure serializes all the AP processors through an Init sequence. It must 
be
+#noted that APs arrive here very raw...ie: real mode, no stack.
+#ALSO THIS PROCEDURE IS EXECUTED BY APs ONLY ON 16 BIT MODE. HENCE THIS PROC
+#IS IN MACHINE CODE.
+#-------------------------------------------------------------------------------------
+#RendezvousFunnelProc (&WakeUpBuffer,MemAddress);
+
+.code:
+
+ASM_GLOBAL ASM_PFX(RendezvousFunnelProc)
+ASM_PFX(RendezvousFunnelProc):
+RendezvousFunnelProcStart:
+
+# At this point CS = 0x(vv00) and ip= 0x0.
+
+        .byte 0x8c,0xc8               # mov        ax,  cs
+        .byte 0x8e,0xd8               # mov        ds,  ax
+        .byte 0x8e,0xc0               # mov        es,  ax
+        .byte 0x8e,0xd0               # mov        ss,  ax
+        .byte 0x33,0xc0               # xor        ax,  ax
+        .byte 0x8e,0xe0               # mov        fs,  ax
+        .byte 0x8e,0xe8               # mov        gs,  ax
+
+flat32Start:
+
+        .byte 0xBE
+        .word BufferStartLocation
+        .byte 0x66,0x8B,0x14          # mov        edx,dword ptr [si]          
; EDX is keeping the start address of wakeup buffer
+
+        .byte 0xBE
+        .word Cr3OffsetLocation
+        .byte 0x66,0x8B,0xC           # mov        ecx,dword ptr [si]          
; ECX is keeping the value of CR3
+
+        .byte 0xBE
+        .word GdtrLocation
+        .byte 0x66                    # db         66h
+        .byte 0x2E,0xF,0x1,0x14       # lgdt       fword ptr cs:[si]
+
+        .byte 0xBE
+        .word IdtrLocation
+        .byte 0x66                    # db         66h
+        .byte 0x2E,0xF,0x1,0x1C       # lidt       fword ptr cs:[si]
+
+        .byte 0x33,0xC0               # xor        ax,  ax
+        .byte 0x8E,0xD8               # mov        ds,  ax
+
+        .byte 0xF,0x20,0xC0           # mov        eax, cr0                    
; Get control register 0
+        .byte 0x66,0x83,0xC8,0x1      # or         eax, 000000001h             
; Set PE bit (bit #0)
+        .byte 0xF,0x22,0xC0           # mov        cr0, eax
+
+FLAT32_JUMP:
+
+        .byte 0x66,0x67,0xEA          # far jump
+        .long 0x0                     # 32-bit offset
+        .word 0x20                    # 16-bit selector
+
+PMODE_ENTRY:                          # protected mode entry point
+
+        .byte 0x66,0xB8,0x18,0x0      # mov        ax,  18h
+        .byte 0x66,0x8E,0xD8          # mov        ds,  ax
+        .byte 0x66,0x8E,0xC0          # mov        es,  ax
+        .byte 0x66,0x8E,0xE0          # mov        fs,  ax
+        .byte 0x66,0x8E,0xE8          # mov        gs,  ax
+        .byte 0x66,0x8E,0xD0          # mov        ss,  ax                     
; Flat mode setup.
+
+        .byte 0xF,0x20,0xE0           # mov        eax, cr4
+        .byte 0xF,0xBA,0xE8,0x5       # bts        eax, 5
+        .byte 0xF,0x22,0xE0           # mov        cr4, eax
+
+        .byte 0xF,0x22,0xD9           # mov        cr3, ecx
+
+        .byte 0x8B,0xF2               # mov        esi, edx                    
; Save wakeup buffer address
+
+        .byte 0xB9
+        .long 0xC0000080              # mov        ecx, 0c0000080h             
; EFER MSR number.
+        .byte 0xF,0x32                # rdmsr                                  
; Read EFER.
+        .byte 0xF,0xBA,0xE8,0x8       # bts        eax, 8                      
; Set LME=1.
+        .byte 0xF,0x30                # wrmsr                                  
; Write EFER.
+
+        .byte 0xF,0x20,0xC0           # mov        eax, cr0                    
; Read CR0.
+        .byte 0xF,0xBA,0xE8,0x1F      # bts        eax, 31                     
; Set PG=1.
+        .byte 0xF,0x22,0xC0           # mov        cr0, eax                    
; Write CR0.
+
+LONG_JUMP:
+
+        .byte 0x67,0xEA               # far jump
+        .long 0x0                     # 32-bit offset
+        .word 0x38                    # 16-bit selector
+
+LongModeStart:
+
+        movw        $0x30,%ax
+        .byte       0x66
+        movw        %ax,%ds
+        .byte       0x66
+        movw        %ax,%es
+        .byte       0x66
+        movw        %ax,%ss
+
+        movl %esi,%edi
+        addl $LockLocation, %edi
+        movb $NotVacantFlag, %al
+TestLock:
+        xchgb (%edi), %al
+        cmpb $NotVacantFlag, %al
+        jz   TestLock
+
+ProgramStack:
+
+        movl %esi,%edi
+        addl $StackSizeLocation, %edi
+        movq (%edi), %rax
+        movl %esi,%edi
+        addl $StackStartAddressLocation, %edi
+        addq (%edi), %rax
+        movq %rax, %rsp
+        movq %rax, (%edi)
+
+Releaselock:
+
+        movb $VacantFlag, %al
+        movl %esi,%edi
+        addl $LockLocation, %edi
+        xchgb (%edi), %al
+
+        #
+        # Call assembly function to initialize FPU.
+        #
+        movabsq     $ASM_PFX(InitializeFloatingPointUnits), %rax
+        subq        $0x20, %rsp
+        call        *%rax
+        addq        $0x20, %rsp
+        #
+        # Call C Function
+        #
+        movl        %esi,%edi
+        addl        $CProcedureLocation, %edi
+        movq        (%edi), %rax
+
+        testq       %rax, %rax
+        jz          GoToSleep
+
+        subq        $0x20, %rsp
+        call        *%rax
+        addq        $0x20, %rsp
+
+GoToSleep:
+        cli
+        hlt
+        jmp         .-2
+
+RendezvousFunnelProcEnd:
+
+
+#-------------------------------------------------------------------------------------
+#  AsmGetAddressMap (&AddressMap);
+#-------------------------------------------------------------------------------------
+# comments here for definition of address map
+ASM_GLOBAL ASM_PFX(AsmGetAddressMap)
+ASM_PFX(AsmGetAddressMap):
+        movabsq      $RendezvousFunnelProcStart, %rax
+        movq         %rax, (%rcx)
+        movq         $(PMODE_ENTRY - RendezvousFunnelProcStart), 0x08(%rcx)
+        movq         $(FLAT32_JUMP - RendezvousFunnelProcStart), 0x10(%rcx)
+        movq         $(RendezvousFunnelProcEnd - RendezvousFunnelProcStart), 
0x18(%rcx)
+        movq         $(LongModeStart - RendezvousFunnelProcStart), 0x20(%rcx)
+        movq         $(LONG_JUMP - RendezvousFunnelProcStart), 0x28(%rcx)
+        ret
+
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/X64/MpFuncs.asm 
b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/MpFuncs.asm
new file mode 100644
index 0000000..ff7c3c0
--- /dev/null
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/MpFuncs.asm
@@ -0,0 +1,206 @@
+;------------------------------------------------------------------------------
 ;
+; Copyright (c) 2006 - 2015, Intel Corporation. All rights reserved.<BR>
+; This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD 
License
+; which accompanies this distribution.  The full text of the license may be 
found at
+; http://opensource.org/licenses/bsd-license.php.
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+;   MpFuncs.asm
+; 
+; Abstract:
+; 
+;   This is the assembly code for Multi-processor S3 support
+;
+;-------------------------------------------------------------------------------
+
+EXTERN  InitializeFloatingPointUnits:PROC
+
+VacantFlag             Equ   00h
+NotVacantFlag          Equ   0ffh
+
+LockLocation                  equ        RendezvousFunnelProcEnd - 
RendezvousFunnelProcStart
+StackStartAddressLocation     equ        LockLocation + 08h
+StackSizeLocation             equ        LockLocation + 10h
+CProcedureLocation            equ        LockLocation + 18h
+GdtrLocation                  equ        LockLocation + 20h
+IdtrLocation                  equ        LockLocation + 2Ah
+BufferStartLocation           equ        LockLocation + 34h
+Cr3OffsetLocation             equ        LockLocation + 38h
+
+;-------------------------------------------------------------------------------------
+;RendezvousFunnelProc  procedure follows. All APs execute their procedure. This
+;procedure serializes all the AP processors through an Init sequence. It must 
be
+;noted that APs arrive here very raw...ie: real mode, no stack.
+;ALSO THIS PROCEDURE IS EXECUTED BY APs ONLY ON 16 BIT MODE. HENCE THIS PROC
+;IS IN MACHINE CODE.
+;-------------------------------------------------------------------------------------
+;RendezvousFunnelProc (&WakeUpBuffer,MemAddress);
+
+;text      SEGMENT
+.code
+
+RendezvousFunnelProc   PROC  
+RendezvousFunnelProcStart::
+
+; At this point CS = 0x(vv00) and ip= 0x0.
+
+        db 8ch,  0c8h                 ; mov        ax,  cs
+        db 8eh,  0d8h                 ; mov        ds,  ax
+        db 8eh,  0c0h                 ; mov        es,  ax
+        db 8eh,  0d0h                 ; mov        ss,  ax 
+        db 33h,  0c0h                 ; xor        ax,  ax
+        db 8eh,  0e0h                 ; mov        fs,  ax
+        db 8eh,  0e8h                 ; mov        gs,  ax
+
+flat32Start::
+
+        db 0BEh
+        dw BufferStartLocation        ; mov        si, BufferStartLocation
+        db 66h,  8Bh, 14h             ; mov        edx,dword ptr [si]          
; EDX is keeping the start address of wakeup buffer
+
+        db 0BEh
+        dw Cr3OffsetLocation          ; mov        si, Cr3Location
+        db 66h,  8Bh, 0Ch             ; mov        ecx,dword ptr [si]          
; ECX is keeping the value of CR3
+        
+        db 0BEh
+        dw GdtrLocation               ; mov        si, GdtrProfile
+        db 66h                        ; db         66h
+        db 2Eh,  0Fh, 01h, 14h        ; lgdt       fword ptr cs:[si]
+
+        db 0BEh
+        dw IdtrLocation               ; mov        si, IdtrProfile
+        db 66h                        ; db         66h
+        db 2Eh,  0Fh, 01h, 1Ch        ; lidt       fword ptr cs:[si]
+        
+        db 33h,  0C0h                 ; xor        ax,  ax
+        db 8Eh,  0D8h                 ; mov        ds,  ax
+        
+        db 0Fh,  20h, 0C0h            ; mov        eax, cr0                    
; Get control register 0
+        db 66h,  83h, 0C8h, 01h       ; or         eax, 000000001h             
; Set PE bit (bit #0)
+        db 0Fh,  22h, 0C0h            ; mov        cr0, eax
+
+FLAT32_JUMP::
+
+        db 66h,  67h, 0EAh            ; far jump
+        dd 0h                         ; 32-bit offset
+        dw 20h                        ; 16-bit selector
+
+PMODE_ENTRY::                         ; protected mode entry point
+
+        db 66h,  0B8h, 18h,  00h      ; mov        ax,  18h
+        db 66h,  8Eh,  0D8h           ; mov        ds,  ax
+        db 66h,  8Eh,  0C0h           ; mov        es,  ax
+        db 66h,  8Eh,  0E0h           ; mov        fs,  ax
+        db 66h,  8Eh,  0E8h           ; mov        gs,  ax
+        db 66h,  8Eh,  0D0h           ; mov        ss,  ax                     
; Flat mode setup.
+
+        db 0Fh,  20h,  0E0h           ; mov        eax, cr4
+        db 0Fh,  0BAh, 0E8h, 05h      ; bts        eax, 5
+        db 0Fh,  22h,  0E0h           ; mov        cr4, eax
+
+        db 0Fh,  22h,  0D9h           ; mov        cr3, ecx
+        
+        db 8Bh,  0F2h                 ; mov        esi, edx                    
; Save wakeup buffer address
+        
+        db 0B9h
+        dd 0C0000080h                 ; mov        ecx, 0c0000080h             
; EFER MSR number.
+        db 0Fh,  32h                  ; rdmsr                                  
; Read EFER.
+        db 0Fh,  0BAh, 0E8h, 08h      ; bts        eax, 8                      
; Set LME=1.
+        db 0Fh,  30h                  ; wrmsr                                  
; Write EFER.
+        
+        db 0Fh,  20h,  0C0h           ; mov        eax, cr0                    
; Read CR0.
+        db 0Fh,  0BAh, 0E8h, 1Fh      ; bts        eax, 31                     
; Set PG=1.
+        db 0Fh,  22h,  0C0h           ; mov        cr0, eax                    
; Write CR0.
+
+LONG_JUMP::
+        
+        db 67h,  0EAh                 ; far jump
+        dd 0h                         ; 32-bit offset
+        dw 38h                        ; 16-bit selector
+        
+LongModeStart::
+
+        mov         ax,  30h
+        mov         ds,  ax
+        mov         es,  ax
+        mov         ss,  ax
+
+        mov  edi, esi
+        add  edi, LockLocation
+        mov  al,  NotVacantFlag
+TestLock::
+        xchg byte ptr [edi], al
+        cmp  al, NotVacantFlag
+        jz   TestLock
+
+ProgramStack::
+
+        mov  edi, esi
+        add  edi, StackSizeLocation
+        mov  rax, qword ptr [edi]
+        mov  edi, esi
+        add  edi, StackStartAddressLocation
+        add  rax, qword ptr [edi]
+        mov  rsp, rax
+        mov  qword ptr [edi], rax
+
+Releaselock::
+
+        mov  al,  VacantFlag
+        mov  edi, esi
+        add  edi, LockLocation
+        xchg byte ptr [edi], al
+
+        ;
+        ; Call assembly function to initialize FPU.
+        ;
+        mov         rax, InitializeFloatingPointUnits
+        sub         rsp, 20h
+        call        rax
+        add         rsp, 20h
+
+        ;
+        ; Call C Function
+        ;
+        mov         edi, esi
+        add         edi, CProcedureLocation
+        mov         rax, qword ptr [edi]
+
+        test        rax, rax
+        jz          GoToSleep
+
+        sub         rsp, 20h
+        call        rax
+        add         rsp, 20h
+        
+GoToSleep::
+        cli
+        hlt
+        jmp         $-2
+        
+RendezvousFunnelProcEnd::
+RendezvousFunnelProc   ENDP
+
+
+;-------------------------------------------------------------------------------------
+;  AsmGetAddressMap (&AddressMap);
+;-------------------------------------------------------------------------------------
+; comments here for definition of address map
+AsmGetAddressMap   PROC  
+        mov         rax, offset RendezvousFunnelProcStart
+        mov         qword ptr [rcx], rax
+        mov         qword ptr [rcx+8h], PMODE_ENTRY - RendezvousFunnelProcStart
+        mov         qword ptr [rcx+10h], FLAT32_JUMP - 
RendezvousFunnelProcStart
+        mov         qword ptr [rcx+18h], RendezvousFunnelProcEnd - 
RendezvousFunnelProcStart
+        mov         qword ptr [rcx+20h], LongModeStart - 
RendezvousFunnelProcStart
+        mov         qword ptr [rcx+28h], LONG_JUMP - RendezvousFunnelProcStart
+        ret
+        
+AsmGetAddressMap   ENDP
+
+END
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/X64/PageTbl.c 
b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/PageTbl.c
new file mode 100644
index 0000000..bbb2ee3
--- /dev/null
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/PageTbl.c
@@ -0,0 +1,692 @@
+/** @file
+Page Fault (#PF) handler for X64 processors
+
+Copyright (c) 2009 - 2015, Intel Corporation. All rights reserved.<BR>
+This program and the accompanying materials
+are licensed and made available under the terms and conditions of the BSD 
License
+which accompanies this distribution.  The full text of the license may be 
found at
+http://opensource.org/licenses/bsd-license.php
+
+THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+
+**/
+
+#include "PiSmmCpuDxeSmm.h"
+
+#define PAGE_TABLE_PAGES            8
+#define ACC_MAX_BIT                 BIT3
+LIST_ENTRY                          mPagePool = INITIALIZE_LIST_HEAD_VARIABLE 
(mPagePool);
+SPIN_LOCK                           mPFLock;
+BOOLEAN                             m1GPageTableSupport = FALSE;
+
+/**
+  Check if 1-GByte pages is supported by processor or not.
+  
+  @retval TRUE   1-GByte pages is supported.
+  @retval FALSE  1-GByte pages is not supported.
+
+**/
+BOOLEAN
+Is1GPageSupport (
+  VOID
+  )
+{
+  UINT32         RegEax;
+  UINT32         RegEdx;
+
+  AsmCpuid (0x80000000, &RegEax, NULL, NULL, NULL);
+  if (RegEax >= 0x80000001) {
+    AsmCpuid (0x80000001, NULL, NULL, NULL, &RegEdx);
+    if ((RegEdx & BIT26) != 0) {
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+/**
+  Set sub-entries number in entry.
+  
+  @param[in, out] Entry        Pointer to entry
+  @param[in]      SubEntryNum  Sub-entries number based on 0:
+                               0 means there is 1 sub-entry under this entry 
+                               0x1ff means there is 512 sub-entries under this 
entry
+
+**/
+VOID
+SetSubEntriesNum (
+  IN OUT UINT64               *Entry,
+  IN     UINT64               SubEntryNum
+  )
+{
+  //
+  // Sub-entries number is saved in BIT52 to BIT60 (reserved field) in Entry
+  //
+  *Entry = BitFieldWrite64 (*Entry, 52, 60, SubEntryNum);
+}
+
+/**
+  Return sub-entries number in entry.
+  
+  @param[in] Entry        Pointer to entry
+
+  @return Sub-entries number based on 0:
+          0 means there is 1 sub-entry under this entry 
+          0x1ff means there is 512 sub-entries under this entry 
+**/
+UINT64
+GetSubEntriesNum (
+  IN UINT64            *Entry
+  )
+{
+  //
+  // Sub-entries number is saved in BIT52 to BIT60 (reserved field) in Entry
+  //
+  return BitFieldRead64 (*Entry, 52, 60);
+}
+
+/**
+  Create PageTable for SMM use.
+
+  @return The address of PML4 (to set CR3).
+
+**/
+UINT32
+SmmInitPageTable (
+  VOID
+  )
+{
+  EFI_PHYSICAL_ADDRESS              Pages;
+  UINT64                            *PTEntry;
+  LIST_ENTRY                        *FreePage;
+  UINTN                             Index;
+  UINTN                             PageFaultHandlerHookAddress;
+  IA32_IDT_GATE_DESCRIPTOR          *IdtEntry;
+
+  //
+  // Initialize spin lock
+  //
+  InitializeSpinLock (&mPFLock);
+
+  m1GPageTableSupport = Is1GPageSupport ();
+  //
+  // Generate PAE page table for the first 4GB memory space
+  //
+  Pages = Gen4GPageTable (PAGE_TABLE_PAGES + 1);
+
+  //
+  // Set IA32_PG_PMNT bit to mask this entry
+  //
+  PTEntry = (UINT64*)(UINTN)Pages;
+  for (Index = 0; Index < 4; Index++) {
+    PTEntry[Index] |= IA32_PG_PMNT;
+  }
+
+  //
+  // Fill Page-Table-Level4 (PML4) entry
+  //
+  PTEntry = (UINT64*)(UINTN)(Pages - EFI_PAGES_TO_SIZE (PAGE_TABLE_PAGES + 1));
+  *PTEntry = Pages + IA32_PG_P;
+  ZeroMem (PTEntry + 1, EFI_PAGE_SIZE - sizeof (*PTEntry));
+  //
+  // Set sub-entries number 
+  //
+  SetSubEntriesNum (PTEntry, 3);
+
+  //
+  // Add remaining pages to page pool
+  //
+  FreePage = (LIST_ENTRY*)(PTEntry + EFI_PAGE_SIZE / sizeof (*PTEntry));
+  while ((UINTN)FreePage < Pages) {
+    InsertTailList (&mPagePool, FreePage);
+    FreePage += EFI_PAGE_SIZE / sizeof (*FreePage);
+  }
+
+  if (FeaturePcdGet (PcdCpuSmmProfileEnable)) {
+    //
+    // Set own Page Fault entry instead of the default one, because SMM Profile
+    // feature depends on IRET instruction to do Single Step
+    //
+    PageFaultHandlerHookAddress = (UINTN)PageFaultIdtHandlerSmmProfile;
+    IdtEntry  = (IA32_IDT_GATE_DESCRIPTOR *) gcSmiIdtr.Base;
+    IdtEntry += EXCEPT_IA32_PAGE_FAULT;
+    IdtEntry->Bits.OffsetLow      = (UINT16)PageFaultHandlerHookAddress;
+    IdtEntry->Bits.Reserved_0     = 0;
+    IdtEntry->Bits.GateType       = IA32_IDT_GATE_TYPE_INTERRUPT_32;
+    IdtEntry->Bits.OffsetHigh     = (UINT16)(PageFaultHandlerHookAddress >> 
16);
+    IdtEntry->Bits.OffsetUpper    = (UINT32)(PageFaultHandlerHookAddress >> 
32);
+    IdtEntry->Bits.Reserved_1     = 0;
+  } else {
+    //
+    // Register Smm Page Fault Handler
+    //
+    SmmRegisterExceptionHandler (&mSmmCpuService, EXCEPT_IA32_PAGE_FAULT, 
SmiPFHandler);
+  }
+
+  //
+  // Additional SMM IDT initialization for SMM stack guard
+  //
+  if (FeaturePcdGet (PcdCpuSmmStackGuard)) {
+    InitializeIDTSmmStackGuard ();
+  }
+
+  //
+  // Return the address of PML4 (to set CR3)
+  //
+  return (UINT32)(UINTN)PTEntry;
+}
+
+/**
+  Set access record in entry.
+  
+  @param[in, out] Entry        Pointer to entry
+  @param[in]      Acc          Access record value
+
+**/
+VOID
+SetAccNum (
+  IN OUT UINT64               *Entry,
+  IN     UINT64               Acc
+  )
+{
+  //
+  // Access record is saved in BIT9 to BIT11 (reserved field) in Entry
+  //
+  *Entry = BitFieldWrite64 (*Entry, 9, 11, Acc);
+}
+
+/**
+  Return access record in entry.
+  
+  @param[in] Entry        Pointer to entry
+
+  @return Access record value.
+
+**/
+UINT64
+GetAccNum (
+  IN UINT64            *Entry
+  )
+{
+  //
+  // Access record is saved in BIT9 to BIT11 (reserved field) in Entry
+  //
+  return BitFieldRead64 (*Entry, 9, 11);
+}
+
+/**
+  Return and update the access record in entry.
+
+  @param[in, out]  Entry    Pointer to entry
+
+  @return Access record value.
+
+**/
+UINT64
+GetAndUpdateAccNum (
+  IN OUT UINT64      *Entry
+  )
+{
+  UINT64         Acc;
+ 
+  Acc = GetAccNum (Entry);
+  if ((*Entry & IA32_PG_A) != 0) {
+    //
+    // If this entry has been accessed, clear access flag in Entry and update 
access record
+    // to the initial value 7, adding ACC_MAX_BIT is to make it larger than 
others
+    //
+    *Entry &= ~(UINT64)(UINTN)IA32_PG_A;
+    SetAccNum (Entry, 0x7);
+    return (0x7 + ACC_MAX_BIT);
+  } else {
+    if (Acc != 0) {
+      //
+      // If the access record is not the smallest value 0, minus 1 and update 
the access record field
+      //
+      SetAccNum (Entry, Acc - 1);
+    }
+  }
+  return Acc;
+}
+
+/**
+  Reclaim free pages for PageFault handler.
+
+  Search the whole entries tree to find the leaf entry that has the smallest 
+  access record value. Insert the page pointed by this leaf entry into the
+  page pool. And check its upper entries if need to be inserted into the page
+  pool or not.
+
+**/
+VOID
+ReclaimPages (
+  VOID
+  )
+{
+  UINT64                       *Pml4;
+  UINT64                       *Pdpt;
+  UINT64                       *Pdt;
+  UINTN                        Pml4Index;
+  UINTN                        PdptIndex;
+  UINTN                        PdtIndex;
+  UINTN                        MinPml4;
+  UINTN                        MinPdpt;
+  UINTN                        MinPdt;
+  UINT64                       MinAcc;
+  UINT64                       Acc;
+  UINT64                       SubEntriesNum;
+  BOOLEAN                      PML4EIgnore;
+  BOOLEAN                      PDPTEIgnore;
+  UINT64                       *ReleasePageAddress;
+
+  Pml4 = NULL;
+  Pdpt = NULL;
+  Pdt  = NULL;
+  MinAcc  = (UINT64)-1;
+  MinPml4 = (UINTN)-1;
+  MinPdpt = (UINTN)-1;
+  MinPdt  = (UINTN)-1;
+  Acc     = 0;
+  ReleasePageAddress = 0;
+
+  //
+  // First, find the leaf entry has the smallest access record value
+  //
+  Pml4 = (UINT64*)(UINTN)(AsmReadCr3 () & gPhyMask);
+  for (Pml4Index = 0; Pml4Index < EFI_PAGE_SIZE / sizeof (*Pml4); Pml4Index++) 
{ 
+    if ((Pml4[Pml4Index] & IA32_PG_P) == 0 || (Pml4[Pml4Index] & IA32_PG_PMNT) 
!= 0) {
+      //
+      // If the PML4 entry is not present or is masked, skip it
+      //
+      continue;
+    }
+    Pdpt = (UINT64*)(UINTN)(Pml4[Pml4Index] & gPhyMask);
+    PML4EIgnore = FALSE;
+    for (PdptIndex = 0; PdptIndex < EFI_PAGE_SIZE / sizeof (*Pdpt); 
PdptIndex++) {
+      if ((Pdpt[PdptIndex] & IA32_PG_P) == 0 || (Pdpt[PdptIndex] & 
IA32_PG_PMNT) != 0) {
+        //
+        // If the PDPT entry is not present or is masked, skip it
+        //
+        if ((Pdpt[PdptIndex] & IA32_PG_PMNT) != 0) {
+          //
+          // If the PDPT entry is masked, we will ignore checking the PML4 
entry
+          //
+          PML4EIgnore = TRUE;
+        }
+        continue;
+      }
+      if ((Pdpt[PdptIndex] & IA32_PG_PS) == 0) {
+        //
+        // It's not 1-GByte pages entry, it should be a PDPT entry, 
+        // we will not check PML4 entry more
+        //
+        PML4EIgnore = TRUE;
+        Pdt =  (UINT64*)(UINTN)(Pdpt[PdptIndex] & gPhyMask);
+        PDPTEIgnore = FALSE;
+        for (PdtIndex = 0; PdtIndex < EFI_PAGE_SIZE / sizeof(*Pdt); 
PdtIndex++) {
+          if ((Pdt[PdtIndex] & IA32_PG_P) == 0 || (Pdt[PdtIndex] & 
IA32_PG_PMNT) != 0) {
+            //
+            // If the PD entry is not present or is masked, skip it
+            //
+            if ((Pdt[PdtIndex] & IA32_PG_PMNT) != 0) {
+              //
+              // If the PD entry is masked, we will not PDPT entry more
+              //
+              PDPTEIgnore = TRUE;
+            }
+            continue;
+          } 
+          if ((Pdt[PdtIndex] & IA32_PG_PS) == 0) {
+            //
+            // It's not 2 MByte page table entry, it should be PD entry
+            // we will find the entry has the smallest access record value
+            //
+            PDPTEIgnore = TRUE;
+            Acc = GetAndUpdateAccNum (Pdt + PdtIndex);
+            if (Acc < MinAcc) {
+              //
+              // If the PD entry has the smallest access record value,
+              // save the Page address to be released
+              //
+              MinAcc  = Acc;
+              MinPml4 = Pml4Index;
+              MinPdpt = PdptIndex;
+              MinPdt  = PdtIndex;
+              ReleasePageAddress = Pdt + PdtIndex;
+            }
+          }
+        }
+        if (!PDPTEIgnore) {
+          //
+          // If this PDPT entry has no PDT entries pointer to 4 KByte pages,
+          // it should only has the entries point to 2 MByte Pages
+          //
+          Acc = GetAndUpdateAccNum (Pdpt + PdptIndex);
+          if (Acc < MinAcc) {
+            //
+            // If the PDPT entry has the smallest access record value,
+            // save the Page address to be released
+            //
+            MinAcc  = Acc;
+            MinPml4 = Pml4Index;
+            MinPdpt = PdptIndex;
+            MinPdt  = (UINTN)-1;
+            ReleasePageAddress = Pdpt + PdptIndex;
+          }
+        }
+      }
+    }
+    if (!PML4EIgnore) {
+      //
+      // If PML4 entry has no the PDPT entry pointer to 2 MByte pages,
+      // it should only has the entries point to 1 GByte Pages
+      //
+      Acc = GetAndUpdateAccNum (Pml4 + Pml4Index);
+      if (Acc < MinAcc) {
+        //
+        // If the PML4 entry has the smallest access record value,
+        // save the Page address to be released
+        //
+        MinAcc  = Acc;
+        MinPml4 = Pml4Index;
+        MinPdpt = (UINTN)-1;
+        MinPdt  = (UINTN)-1;
+        ReleasePageAddress = Pml4 + Pml4Index; 
+      }
+    }
+  }
+  //
+  // Make sure one PML4/PDPT/PD entry is selected
+  //
+  ASSERT (MinAcc != (UINT64)-1);
+
+  //
+  // Secondly, insert the page pointed by this entry into page pool and clear 
this entry
+  //
+  InsertTailList (&mPagePool, (LIST_ENTRY*)(UINTN)(*ReleasePageAddress & 
gPhyMask));
+  *ReleasePageAddress = 0;
+ 
+  //
+  // Lastly, check this entry's upper entries if need to be inserted into page 
pool
+  // or not
+  //
+  while (TRUE) {
+    if (MinPdt != (UINTN)-1) {
+      //
+      // If 4 KByte Page Table is released, check the PDPT entry
+      //
+      Pdpt = (UINT64*)(UINTN)(Pml4[MinPml4] & gPhyMask);
+      SubEntriesNum = GetSubEntriesNum(Pdpt + MinPdpt);
+      if (SubEntriesNum == 0) {
+        //
+        // Release the empty Page Directory table if there was no more 4 KByte 
Page Table entry
+        // clear the Page directory entry
+        //
+        InsertTailList (&mPagePool, (LIST_ENTRY*)(UINTN)(Pdpt[MinPdpt] & 
gPhyMask));
+        Pdpt[MinPdpt] = 0;
+        //
+        // Go on checking the PML4 table
+        //
+        MinPdt = (UINTN)-1;
+        continue;
+      }
+      //
+      // Update the sub-entries filed in PDPT entry and exit
+      //
+      SetSubEntriesNum (Pdpt + MinPdpt, SubEntriesNum - 1);
+      break;
+    }
+    if (MinPdpt != (UINTN)-1) {
+      //
+      // One 2MB Page Table is released or Page Directory table is released, 
check the PML4 entry
+      //
+      SubEntriesNum = GetSubEntriesNum (Pml4 + MinPml4);
+      if (SubEntriesNum == 0) {
+        //
+        // Release the empty PML4 table if there was no more 1G KByte Page 
Table entry
+        // clear the Page directory entry
+        //
+        InsertTailList (&mPagePool, (LIST_ENTRY*)(UINTN)(Pml4[MinPml4] & 
gPhyMask));
+        Pml4[MinPml4] = 0;
+        MinPdpt = (UINTN)-1;
+        continue;
+      }
+      //
+      // Update the sub-entries filed in PML4 entry and exit
+      //
+      SetSubEntriesNum (Pml4 + MinPml4, SubEntriesNum - 1);
+      break;
+    }
+    //
+    // PLM4 table has been released before, exit it
+    //
+    break;
+  }
+}
+
+/**
+  Allocate free Page for PageFault handler use.
+
+  @return Page address.
+
+**/
+UINT64
+AllocPage (
+  VOID
+  )
+{
+  UINT64                            RetVal;
+
+  if (IsListEmpty (&mPagePool)) {
+    //
+    // If page pool is empty, reclaim the used pages and insert one into page 
pool
+    //
+    ReclaimPages ();
+  }
+
+  //
+  // Get one free page and remove it from page pool
+  //
+  RetVal = (UINT64)(UINTN)mPagePool.ForwardLink;
+  RemoveEntryList (mPagePool.ForwardLink);
+  //
+  // Clean this page and return
+  //
+  ZeroMem ((VOID*)(UINTN)RetVal, EFI_PAGE_SIZE);
+  return RetVal;
+}
+
+/**
+  Page Fault handler for SMM use.
+
+**/
+VOID
+SmiDefaultPFHandler (
+  VOID
+  )
+{
+  UINT64                            *PageTable;
+  UINT64                            *Pml4;
+  UINT64                            PFAddress;
+  UINTN                             StartBit;
+  UINTN                             EndBit;
+  UINT64                            PTIndex;
+  UINTN                             Index;
+  SMM_PAGE_SIZE_TYPE                PageSize;
+  UINTN                             NumOfPages;
+  UINTN                             PageAttribute;
+  EFI_STATUS                        Status;
+  UINT64                            *UpperEntry;
+
+  //
+  // Set default SMM page attribute
+  //
+  PageSize = SmmPageSize2M;
+  NumOfPages = 1;
+  PageAttribute = 0;
+
+  EndBit = 0;
+  Pml4 = (UINT64*)(AsmReadCr3 () & gPhyMask);
+  PFAddress = AsmReadCr2 ();
+
+  Status = GetPlatformPageTableAttribute (PFAddress, &PageSize, &NumOfPages, 
&PageAttribute);
+  //
+  // If platform not support page table attribute, set default SMM page 
attribute
+  //
+  if (Status != EFI_SUCCESS) {
+    PageSize = SmmPageSize2M;
+    NumOfPages = 1;
+    PageAttribute = 0;
+  }
+  if (PageSize >= MaxSmmPageSizeType) {
+    PageSize = SmmPageSize2M; 
+  }
+  if (NumOfPages > 512) {
+    NumOfPages = 512;
+  }
+
+  switch (PageSize) {
+  case SmmPageSize4K:
+    //
+    // BIT12 to BIT20 is Page Table index
+    //
+    EndBit = 12;
+    break;
+  case SmmPageSize2M:
+    //
+    // BIT21 to BIT29 is Page Directory index
+    //
+    EndBit = 21;
+    PageAttribute |= (UINTN)IA32_PG_PS;
+    break;
+  case SmmPageSize1G:
+    if (!m1GPageTableSupport) {
+      DEBUG ((EFI_D_ERROR, "1-GByte pages is not supported!"));
+      ASSERT (FALSE);
+    }
+    //
+    // BIT30 to BIT38 is Page Directory Pointer Table index
+    //
+    EndBit = 30;
+    PageAttribute |= (UINTN)IA32_PG_PS;
+    break; 
+  default:
+    ASSERT (FALSE);
+  }
+
+  //
+  // If execute-disable is enabled, set NX bit
+  //
+  if (mXdEnabled) {
+    PageAttribute |= IA32_PG_NX;
+  }
+
+  for (Index = 0; Index < NumOfPages; Index++) {
+    PageTable  = Pml4;
+    UpperEntry = NULL;
+    for (StartBit = 39; StartBit > EndBit; StartBit -= 9) {
+      PTIndex = BitFieldRead64 (PFAddress, StartBit, StartBit + 8);
+      if ((PageTable[PTIndex] & IA32_PG_P) == 0) {
+        //
+        // If the entry is not present, allocate one page from page pool for it
+        //
+        PageTable[PTIndex] = AllocPage () | IA32_PG_RW | IA32_PG_P;
+      } else {
+        //
+        // Save the upper entry address
+        //
+        UpperEntry = PageTable + PTIndex;
+      }
+      //
+      // BIT9 to BIT11 of entry is used to save access record,
+      // initialize value is 7
+      //
+      PageTable[PTIndex] |= (UINT64)IA32_PG_A;
+      SetAccNum (PageTable + PTIndex, 7);
+      PageTable = (UINT64*)(UINTN)(PageTable[PTIndex] & gPhyMask);
+    }
+
+    PTIndex = BitFieldRead64 (PFAddress, StartBit, StartBit + 8);
+    if ((PageTable[PTIndex] & IA32_PG_P) != 0) {
+      //
+      // Check if the entry has already existed, this issue may occur when the 
different
+      // size page entries created under the same entry
+      //
+      DEBUG ((EFI_D_ERROR, "PageTable = %lx, PTIndex = %x, PageTable[PTIndex] 
= %lx\n", PageTable, PTIndex, PageTable[PTIndex]));
+      DEBUG ((EFI_D_ERROR, "New page table overlapped with old page 
table!\n"));
+      ASSERT (FALSE);
+    }
+    //
+    // Fill the new entry
+    //
+    PageTable[PTIndex] = (PFAddress & gPhyMask & ~((1ull << EndBit) - 1)) |
+                         PageAttribute | IA32_PG_A | IA32_PG_RW | IA32_PG_P;
+    if (UpperEntry != NULL) {
+      SetSubEntriesNum (UpperEntry, GetSubEntriesNum (UpperEntry) + 1);
+    }
+    //
+    // Get the next page address if we need to create more page tables
+    //    
+    PFAddress += (1ull << EndBit);
+  }
+}
+
+/**
+  ThePage Fault handler wrapper for SMM use.
+
+  @param  InterruptType    Defines the type of interrupt or exception that
+                           occurred on the processor.This parameter is 
processor architecture specific.
+  @param  SystemContext    A pointer to the processor context when
+                           the interrupt occurred on the processor.
+**/
+VOID
+EFIAPI
+SmiPFHandler (
+    IN EFI_EXCEPTION_TYPE   InterruptType,
+    IN EFI_SYSTEM_CONTEXT   SystemContext
+  )
+{
+  UINTN             PFAddress;
+
+  ASSERT (InterruptType == EXCEPT_IA32_PAGE_FAULT);
+  
+  AcquireSpinLock (&mPFLock);
+
+  PFAddress = AsmReadCr2 ();
+
+  //
+  // If a page fault occurs in SMRAM range, it should be in a SMM stack guard 
page.
+  //
+  if ((FeaturePcdGet (PcdCpuSmmStackGuard)) && 
+      (PFAddress >= mCpuHotPlugData.SmrrBase) && 
+      (PFAddress < (mCpuHotPlugData.SmrrBase + mCpuHotPlugData.SmrrSize))) {
+    DEBUG ((EFI_D_ERROR, "SMM stack overflow!\n"));
+    CpuDeadLoop ();
+  }
+
+  //
+  // If a page fault occurs in SMM range
+  //
+  if ((PFAddress < mCpuHotPlugData.SmrrBase) || 
+      (PFAddress >= mCpuHotPlugData.SmrrBase + mCpuHotPlugData.SmrrSize)) {
+    if ((SystemContext.SystemContextX64->ExceptionData & IA32_PF_EC_ID) != 0) {
+      DEBUG ((EFI_D_ERROR, "Code executed on IP(0x%lx) out of SMM range after 
SMM is locked!\n", PFAddress));
+      DEBUG_CODE (
+        DumpModuleInfoByIp (*(UINTN 
*)(UINTN)SystemContext.SystemContextX64->Rsp);
+      );
+      CpuDeadLoop ();
+    }
+  }
+
+  if (FeaturePcdGet (PcdCpuSmmProfileEnable)) {
+    SmmProfilePFHandler (
+      SystemContext.SystemContextX64->Rip,
+      SystemContext.SystemContextX64->ExceptionData
+      );
+  } else {
+    SmiDefaultPFHandler ();
+  }
+  
+  ReleaseSpinLock (&mPFLock);
+}
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/X64/Semaphore.c 
b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/Semaphore.c
new file mode 100644
index 0000000..1af7465
--- /dev/null
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/Semaphore.c
@@ -0,0 +1,64 @@
+/** @file
+Semaphore mechanism to indicate to the BSP that an AP has exited SMM
+after SMBASE relocation.
+
+Copyright (c) 2009 - 2015, Intel Corporation. All rights reserved.<BR>
+This program and the accompanying materials
+are licensed and made available under the terms and conditions of the BSD 
License
+which accompanies this distribution.  The full text of the license may be 
found at
+http://opensource.org/licenses/bsd-license.php
+
+THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+
+**/
+
+#include "PiSmmCpuDxeSmm.h"
+
+extern  UINT32    mSmmRelocationOriginalAddressPtr32;
+extern  UINT32    mRebasedFlagAddr32;
+
+UINTN             mSmmRelocationOriginalAddress;
+volatile BOOLEAN  *mRebasedFlag;
+
+/**
+AP Semaphore operation in 32-bit mode while BSP runs in 64-bit mode.
+**/
+VOID
+SmmRelocationSemaphoreComplete32 (
+  VOID
+  );
+
+/**
+  Hook return address of SMM Save State so that semaphore code
+  can be executed immediately after AP exits SMM to indicate to
+  the BSP that an AP has exited SMM after SMBASE relocation.
+
+  @param CpuIndex  The processor index.
+**/
+VOID
+SemaphoreHook (
+  IN UINTN             CpuIndex,
+  IN volatile BOOLEAN  *RebasedFlag
+  )
+{
+  SMRAM_SAVE_STATE_MAP  *CpuState;
+  UINTN                 TempValue;
+
+  mRebasedFlag       = RebasedFlag;
+  mRebasedFlagAddr32 = (UINT32)(UINTN)mRebasedFlag;
+
+  CpuState = (SMRAM_SAVE_STATE_MAP *)(UINTN)(SMM_DEFAULT_SMBASE + 
SMRAM_SAVE_STATE_MAP_OFFSET);
+  mSmmRelocationOriginalAddress = HookReturnFromSmm (
+                                    CpuIndex,
+                                    CpuState,
+                                    
(UINT64)(UINTN)&SmmRelocationSemaphoreComplete32,
+                                    
(UINT64)(UINTN)&SmmRelocationSemaphoreComplete
+                                    );
+
+  //
+  // Use temp value to fix ICC complier warning
+  // 
+  TempValue = (UINTN)&mSmmRelocationOriginalAddress;
+  mSmmRelocationOriginalAddressPtr32 = (UINT32)TempValue;
+}
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiEntry.S 
b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiEntry.S
new file mode 100644
index 0000000..8315593
--- /dev/null
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiEntry.S
@@ -0,0 +1,217 @@
+#------------------------------------------------------------------------------
+#
+# Copyright (c) 2009 - 2015, Intel Corporation. All rights reserved.<BR>
+# This program and the accompanying materials
+# are licensed and made available under the terms and conditions of the BSD 
License
+# which accompanies this distribution.  The full text of the license may be 
found at
+# http://opensource.org/licenses/bsd-license.php.
+#
+# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+#
+# Module Name:
+#
+#   SmiEntry.S
+#
+# Abstract:
+#
+#   Code template of the SMI handler for a particular processor
+#
+#------------------------------------------------------------------------------
+
+ASM_GLOBAL  ASM_PFX(gcSmiHandlerTemplate)
+ASM_GLOBAL  ASM_PFX(gcSmiHandlerSize)
+ASM_GLOBAL  ASM_PFX(gSmiCr3)
+ASM_GLOBAL  ASM_PFX(gSmiStack)
+ASM_GLOBAL  ASM_PFX(gSmbase)
+ASM_GLOBAL  ASM_PFX(FeaturePcdGet (PcdCpuSmmDebug))
+ASM_GLOBAL  ASM_PFX(gSmiHandlerIdtr)
+
+#
+# Constants relating to PROCESSOR_SMM_DESCRIPTOR
+#
+.equ            DSC_OFFSET, 0xfb00
+.equ            DSC_GDTPTR, 0x30
+.equ            DSC_GDTSIZ, 0x38
+.equ            DSC_CS, 14
+.equ            DSC_DS, 16
+.equ            DSC_SS, 18
+.equ            DSC_OTHERSEG, 20
+#
+# Constants relating to CPU State Save Area
+#
+.equ            SSM_DR6,   0xffd0
+.equ            SSM_DR7,   0xffc8
+
+.equ            PROTECT_MODE_CS, 0x08
+.equ            PROTECT_MODE_DS, 0x20
+.equ            LONG_MODE_CS, 0x38
+.equ            TSS_SEGMENT, 0x40
+.equ            GDT_SIZE, 0x50
+
+    .text
+
+ASM_PFX(gcSmiHandlerTemplate):
+
+_SmiEntryPoint:
+    #
+    # The encoding of BX in 16-bit addressing mode is the same as of RDI in 64-
+    # bit addressing mode. And that coincidence has been used in the following
+    # "64-bit like" 16-bit code. Be aware that once RDI is referenced as a
+    # base address register, it is actually BX that is referenced.
+    #
+    .byte 0xbb                          # mov bx, imm16
+    .word _GdtDesc - _SmiEntryPoint + 0x8000
+    #
+    # fix GDT descriptor
+    #
+    .byte 0x2e,0xa1                     # mov ax, cs:[offset16]
+    .word      DSC_OFFSET + DSC_GDTSIZ
+    .byte 0x48                          # dec ax
+    .byte 0x2e
+    movl    %eax, (%rdi)                # mov cs:[bx], ax
+    .byte 0x66,0x2e,0xa1                # mov eax, cs:[offset16]
+    .word      DSC_OFFSET + DSC_GDTPTR
+    .byte 0x2e
+    movw    %ax, 2(%rdi)
+    .byte 0x66,0x2e
+    lgdt    (%rdi)
+    #
+    # Patch ProtectedMode Segment
+    #
+    .byte 0xb8
+    .word PROTECT_MODE_CS
+    .byte 0x2e
+    movl    %eax, -2(%rdi)
+    #
+    # Patch ProtectedMode entry
+    #
+    .byte 0x66, 0xbf                    # mov edi, SMBASE
+ASM_PFX(gSmbase): .space 4
+    lea     ((ProtectedMode - _SmiEntryPoint) + 0x8000)(%edi), %ax
+    .byte 0x2e
+    movw    %ax, -6(%rdi)
+    #
+    # Switch into ProtectedMode
+    #
+    movq    %cr0, %rbx
+    .byte 0x66
+    andl    $0x9ffafff3, %ebx
+    .byte 0x66
+    orl     $0x00000023, %ebx
+
+    movq    %rbx, %cr0
+    .byte 0x66, 0xea
+    .space 6
+
+_GdtDesc:    .space  6
+
+ProtectedMode:
+    movw    $PROTECT_MODE_DS, %ax
+    movl    %eax, %ds
+    movl    %eax, %es
+    movl    %eax, %fs
+    movl    %eax, %gs
+    movl    %eax, %ss
+    .byte   0xbc                       # mov esp, imm32
+ASM_PFX(gSmiStack):   .space  4
+    jmp     ProtFlatMode
+
+ProtFlatMode:
+    .byte   0xb8
+ASM_PFX(gSmiCr3):    .space  4
+    movq    %rax, %cr3
+    movl    $0x668,%eax                 # as cr4.PGE is not set here, refresh 
cr3
+    movq    %rax, %cr4                  # in PreModifyMtrrs() to flush TLB.
+# Load TSS
+    subl    $8, %esp                    # reserve room in stack
+    sgdt    (%rsp)
+    movl    2(%rsp), %eax               # eax = GDT base
+    addl    $8, %esp
+    movl    %eax, %edx
+    addl    $GDT_SIZE, %edx
+    movb    %dl, (TSS_SEGMENT + 2)(%rax)
+    movb    %dh, (TSS_SEGMENT + 3)(%rax)
+    .byte   0xc1, 0xea, 0x10             # shr     edx, 16
+    movb    %dl, (TSS_SEGMENT + 4)(%rax)
+    movb    %dh, (TSS_SEGMENT + 7)(%rax)
+    movl    %eax, %edx
+    movb    $0x89, %dl
+    movb    %dl, (TSS_SEGMENT + 5)(%rax) # clear busy flag
+    movl    $TSS_SEGMENT, %eax
+    ltr     %ax
+
+    #
+    # Switch to LongMode
+    #
+    pushq    $LONG_MODE_CS                # push cs hardcore here
+    call     Base                         # push return address for retf later
+Base:
+    addl    $(LongMode - Base), (%rsp)  # offset for far retf, seg is the 1st 
arg
+    movl    $0xc0000080, %ecx
+    rdmsr
+    orb     $1,%ah
+    wrmsr
+    movq    %cr0, %rbx
+    btsl    $31, %ebx
+    movq    %rbx, %cr0
+    retf
+LongMode:                               # long mode (64-bit code) starts here
+    movabsq $ASM_PFX(gSmiHandlerIdtr), %rax
+    lidt    (%rax)
+    lea     (DSC_OFFSET)(%rdi), %ebx
+    movw    DSC_DS(%rbx), %ax
+    movl    %eax,%ds
+    movw    DSC_OTHERSEG(%rbx), %ax
+    movl    %eax,%es
+    movl    %eax,%fs
+    movl    %eax,%gs
+    movw    DSC_SS(%rbx), %ax
+    movl    %eax,%ss
+#   jmp     _SmiHandler                 ; instruction is not needed
+
+_SmiHandler:
+    movabsq $ASM_PFX(FeaturePcdGet (PcdCpuSmmDebug)), %rax
+    cmpb    $0, (%rax)
+    jz      L1
+
+    .byte   0x48, 0x8b, 0x0d            # mov rcx, [rip + disp32]
+    .long   SSM_DR6 - (. + 4 - _SmiEntryPoint + 0x8000)
+    .byte   0x48, 0x8b, 0x15            # mov rdx, [rip + disp32]
+    .long   SSM_DR7 - (. + 4 - _SmiEntryPoint + 0x8000)
+    movq    %rcx, %dr6
+    movq    %rdx, %dr7
+L1:
+
+    movabsq $ASM_PFX(SmiRendezvous), %rax
+    movq    (%rsp), %rcx
+    # Save FP registers
+
+    subq    $0x208, %rsp
+    .byte   0x48                        # FXSAVE64
+    fxsave  (%rsp)
+
+    addq    $-0x20, %rsp
+    call    *%rax
+    addq    $0x20, %rsp
+
+    #
+    # Restore FP registers
+    #
+    .byte   0x48                        # FXRSTOR64
+    fxrstor (%rsp)
+
+    movabsq $ASM_PFX(FeaturePcdGet (PcdCpuSmmDebug)), %rax
+    cmpb    $0, (%rax)
+    jz      L2
+
+    movq    %dr7, %rdx
+    movq    %dr6, %rcx
+    .byte   0x48, 0x89, 0x15            # mov [rip + disp32], rdx
+    .long   SSM_DR7 - (. + 4 - _SmiEntryPoint + 0x8000)
+    .byte   0x48, 0x89, 0x0d            # mov [rip + disp32], rcx
+    .long   SSM_DR6 - (. + 4 - _SmiEntryPoint + 0x8000)
+L2:
+    rsm
+
+ASM_PFX(gcSmiHandlerSize):    .word      . - _SmiEntryPoint
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiEntry.asm 
b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiEntry.asm
new file mode 100644
index 0000000..a1a7d3e
--- /dev/null
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiEntry.asm
@@ -0,0 +1,221 @@
+;------------------------------------------------------------------------------
 ;
+; Copyright (c) 2009 - 2015, Intel Corporation. All rights reserved.<BR>
+; This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD 
License
+; which accompanies this distribution.  The full text of the license may be 
found at
+; http://opensource.org/licenses/bsd-license.php.
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+;   SmiEntry.asm
+;
+; Abstract:
+;
+;   Code template of the SMI handler for a particular processor
+;
+;-------------------------------------------------------------------------------
+
+;
+; Variables referenced by C code
+;
+EXTERNDEF   SmiRendezvous:PROC
+EXTERNDEF   gcSmiHandlerTemplate:BYTE
+EXTERNDEF   gcSmiHandlerSize:WORD
+EXTERNDEF   gSmiCr3:DWORD
+EXTERNDEF   gSmiStack:DWORD
+EXTERNDEF   gSmbase:DWORD
+EXTERNDEF   FeaturePcdGet (PcdCpuSmmDebug):BYTE
+EXTERNDEF   gSmiHandlerIdtr:FWORD
+
+
+;
+; Constants relating to PROCESSOR_SMM_DESCRIPTOR
+;
+DSC_OFFSET    EQU     0fb00h
+DSC_GDTPTR    EQU     30h
+DSC_GDTSIZ    EQU     38h
+DSC_CS        EQU     14
+DSC_DS        EQU     16
+DSC_SS        EQU     18
+DSC_OTHERSEG  EQU     20
+;
+; Constants relating to CPU State Save Area
+;
+SSM_DR6         EQU     0ffd0h
+SSM_DR7         EQU     0ffc8h
+
+PROTECT_MODE_CS EQU     08h
+PROTECT_MODE_DS EQU     20h
+LONG_MODE_CS    EQU     38h
+TSS_SEGMENT     EQU     40h
+GDT_SIZE        EQU     50h
+
+    .code
+
+gcSmiHandlerTemplate    LABEL   BYTE
+
+_SmiEntryPoint:
+    ;
+    ; The encoding of BX in 16-bit addressing mode is the same as of RDI in 64-
+    ; bit addressing mode. And that coincidence has been used in the following
+    ; "64-bit like" 16-bit code. Be aware that once RDI is referenced as a
+    ; base address register, it is actually BX that is referenced.
+    ;
+    DB      0bbh                        ; mov bx, imm16
+    DW      offset _GdtDesc - _SmiEntryPoint + 8000h  ; bx = GdtDesc offset
+; fix GDT descriptor
+    DB      2eh, 0a1h                   ; mov ax, cs:[offset16]
+    DW      DSC_OFFSET + DSC_GDTSIZ
+    DB      48h                         ; dec ax
+    DB      2eh
+    mov     [rdi], eax                  ; mov cs:[bx], ax
+    DB      66h, 2eh, 0a1h              ; mov eax, cs:[offset16]
+    DW      DSC_OFFSET + DSC_GDTPTR
+    DB      2eh
+    mov     [rdi + 2], ax               ; mov cs:[bx + 2], eax
+    DB      66h, 2eh
+    lgdt    fword ptr [rdi]             ; lgdt fword ptr cs:[bx]
+; Patch ProtectedMode Segment
+    DB      0b8h                        ; mov ax, imm16
+    DW      PROTECT_MODE_CS             ; set AX for segment directly
+    DB      2eh
+    mov     [rdi - 2], eax              ; mov cs:[bx - 2], ax
+; Patch ProtectedMode entry
+    DB      66h, 0bfh                   ; mov edi, SMBASE
+gSmbase    DD    ?
+    lea     ax, [edi + (@ProtectedMode - _SmiEntryPoint) + 8000h]
+    DB      2eh
+    mov     [rdi - 6], ax               ; mov cs:[bx - 6], eax
+; Switch into @ProtectedMode
+    mov     rbx, cr0
+    DB      66h
+    and     ebx, 9ffafff3h
+    DB      66h
+    or      ebx, 00000023h
+
+    mov     cr0, rbx
+    DB      66h, 0eah
+    DD      ?
+    DW      ?
+
+_GdtDesc    FWORD   ?
+@ProtectedMode:
+    mov     ax, PROTECT_MODE_DS
+    mov     ds, ax
+    mov     es, ax
+    mov     fs, ax
+    mov     gs, ax
+    mov     ss, ax
+    DB      0bch                   ; mov esp, imm32
+gSmiStack   DD      ?
+    jmp     ProtFlatMode
+
+ProtFlatMode:
+    DB      0b8h                        ; mov eax, offset gSmiCr3
+gSmiCr3     DD      ?
+    mov     cr3, rax
+    mov     eax, 668h                   ; as cr4.PGE is not set here, refresh 
cr3
+    mov     cr4, rax                    ; in PreModifyMtrrs() to flush TLB.
+; Load TSS
+    sub     esp, 8                      ; reserve room in stack
+    sgdt    fword ptr [rsp]
+    mov     eax, [rsp + 2]              ; eax = GDT base
+    add     esp, 8
+    mov     edx, eax
+    add     edx, GDT_SIZE
+    mov     [rax + TSS_SEGMENT + 2], dl
+    mov     [rax + TSS_SEGMENT + 3], dh
+    DB      0c1h, 0eah, 10h             ; shr     edx, 16
+    mov     [rax + TSS_SEGMENT + 4], dl
+    mov     [rax + TSS_SEGMENT + 7], dh
+    mov     edx, eax
+    mov     dl, 89h
+    mov     [rax + TSS_SEGMENT + 5], dl ; clear busy flag
+    mov     eax, TSS_SEGMENT
+    ltr     ax
+
+; Switch into @LongMode
+    push    LONG_MODE_CS                ; push cs hardcore here
+    call    Base                       ; push return address for retf later
+Base:
+    add     dword ptr [rsp], @LongMode - Base; offset for far retf, seg is the 
1st arg
+    mov     ecx, 0c0000080h
+    rdmsr
+    or      ah, 1
+    wrmsr
+    mov     rbx, cr0
+    bts     ebx, 31
+    mov     cr0, rbx
+    retf
+@LongMode:                              ; long mode (64-bit code) starts here
+    mov     rax, offset gSmiHandlerIdtr
+    lidt    fword ptr [rax]
+    lea     ebx, [rdi + DSC_OFFSET]
+    mov     ax, [rbx + DSC_DS]
+    mov     ds, eax
+    mov     ax, [rbx + DSC_OTHERSEG]
+    mov     es, eax
+    mov     fs, eax
+    mov     gs, eax
+    mov     ax, [rbx + DSC_SS]
+    mov     ss, eax
+;   jmp     _SmiHandler                 ; instruction is not needed
+
+_SmiHandler:
+;
+; The following lines restore DR6 & DR7 before running C code. They are useful
+; when you want to enable hardware breakpoints in SMM.
+;
+; NOTE: These lines might not be appreciated in runtime since they might
+;       conflict with OS debugging facilities. Turn them off in RELEASE.
+;
+    mov     rax, offset FeaturePcdGet (PcdCpuSmmDebug) ;Get absolute address. 
Avoid RIP relative addressing
+    cmp     byte ptr [rax], 0
+    jz      @1
+
+    DB      48h, 8bh, 0dh               ; mov rcx, [rip + disp32]
+    DD      SSM_DR6 - ($ + 4 - _SmiEntryPoint + 8000h)
+    DB      48h, 8bh, 15h               ; mov rdx, [rip + disp32]
+    DD      SSM_DR7 - ($ + 4 - _SmiEntryPoint + 8000h)
+    mov     dr6, rcx
+    mov     dr7, rdx
+@1:
+    mov     rcx, [rsp]                  ; rcx <- CpuIndex
+    mov     rax, SmiRendezvous          ; rax <- absolute addr of SmiRedezvous
+
+    ;
+    ; Save FP registers
+    ;
+    sub     rsp, 208h
+    DB      48h                         ; FXSAVE64
+    fxsave  [rsp]
+    
+    add     rsp, -20h
+    call    rax
+    add     rsp, 20h
+
+    ;
+    ; Restore FP registers
+    ;
+    DB      48h                         ; FXRSTOR64
+    fxrstor [rsp]  
+ 
+    mov     rax, offset FeaturePcdGet (PcdCpuSmmDebug) ;Get absolute address. 
Avoid RIP relative addressing
+    cmp     byte ptr [rax], 0
+    jz      @2
+
+    mov     rdx, dr7
+    mov     rcx, dr6
+    DB      48h, 89h, 15h               ; mov [rip + disp32], rdx
+    DD      SSM_DR7 - ($ + 4 - _SmiEntryPoint + 8000h)
+    DB      48h, 89h, 0dh               ; mov [rip + disp32], rcx
+    DD      SSM_DR6 - ($ + 4 - _SmiEntryPoint + 8000h)
+@2:
+    rsm
+
+gcSmiHandlerSize    DW      $ - _SmiEntryPoint
+
+    END
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiException.S 
b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiException.S
new file mode 100644
index 0000000..9793528
--- /dev/null
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiException.S
@@ -0,0 +1,610 @@
+#------------------------------------------------------------------------------
+#
+# Copyright (c) 2009 - 2015, Intel Corporation. All rights reserved.<BR>
+# This program and the accompanying materials
+# are licensed and made available under the terms and conditions of the BSD 
License
+# which accompanies this distribution.  The full text of the license may be 
found at
+# http://opensource.org/licenses/bsd-license.php.
+#
+# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+#
+# Module Name:
+#
+#   SmiException.S
+#
+# Abstract:
+#
+#   Exception handlers used in SM mode
+#
+#------------------------------------------------------------------------------
+
+ASM_GLOBAL  ASM_PFX(SmiPFHandler)
+ASM_GLOBAL  ASM_PFX(gSmiMtrrs)
+ASM_GLOBAL  ASM_PFX(gcSmiIdtr)
+ASM_GLOBAL  ASM_PFX(gcSmiGdtr)
+ASM_GLOBAL  ASM_PFX(gcPsd)
+
+    .data
+
+NullSeg:    .quad 0                     # reserved by architecture
+CodeSeg32:
+            .word -1                    # LimitLow
+            .word 0                     # BaseLow
+            .byte 0                     # BaseMid
+            .byte 0x9b
+            .byte 0xcf                  # LimitHigh
+            .byte 0                     # BaseHigh
+ProtModeCodeSeg32:
+            .word -1                    # LimitLow
+            .word 0                     # BaseLow
+            .byte 0                     # BaseMid
+            .byte 0x9b
+            .byte 0xcf                  # LimitHigh
+            .byte 0                     # BaseHigh
+ProtModeSsSeg32:
+            .word -1                    # LimitLow
+            .word 0                     # BaseLow
+            .byte 0                     # BaseMid
+            .byte 0x93
+            .byte 0xcf                  # LimitHigh
+            .byte 0                     # BaseHigh
+DataSeg32:
+            .word -1                    # LimitLow
+            .word 0                     # BaseLow
+            .byte 0                     # BaseMid
+            .byte 0x93
+            .byte 0xcf                  # LimitHigh
+            .byte 0                     # BaseHigh
+CodeSeg16:
+            .word -1
+            .word 0
+            .byte 0
+            .byte 0x9b
+            .byte 0x8f
+            .byte 0
+DataSeg16:
+            .word -1
+            .word 0
+            .byte 0
+            .byte 0x93
+            .byte 0x8f
+            .byte 0
+CodeSeg64:
+            .word -1                    # LimitLow
+            .word 0                     # BaseLow
+            .byte 0                     # BaseMid
+            .byte 0x9b
+            .byte 0xaf                  # LimitHigh
+            .byte 0                     # BaseHigh
+# TSS Segment for X64 specially
+TssSeg:
+            .word TSS_DESC_SIZE         # LimitLow
+            .word 0                     # BaseLow
+            .byte 0                     # BaseMid
+            .byte 0x89
+            .byte 0xDB                  # LimitHigh
+            .byte 0                     # BaseHigh
+            .long 0                     # BaseUpper
+            .long 0                     # Reserved
+.equ  GDT_SIZE, .- NullSeg
+
+TssDescriptor:
+            .space 104, 0
+.equ  TSS_DESC_SIZE, .- TssDescriptor
+
+#
+# This structure serves as a template for all processors.
+#
+ASM_PFX(gcPsd):
+            .ascii  "PSDSIG  "
+            .word      PSD_SIZE
+            .word 2
+            .word      1 << 2
+            .word      CODE_SEL
+            .word      DATA_SEL
+            .word      DATA_SEL
+            .word      DATA_SEL
+            .word 0
+            .quad 0
+            .quad 0
+            .quad 0                     # fixed in InitializeMpServiceData()
+            .quad      NullSeg
+            .long      GDT_SIZE
+            .long 0
+            .space 24, 0
+            .quad      ASM_PFX(gSmiMtrrs)
+.equ  PSD_SIZE,  . - ASM_PFX(gcPsd)
+
+#
+# CODE & DATA segments for SMM runtime
+#
+.equ  CODE_SEL,    CodeSeg64 - NullSeg
+.equ  DATA_SEL,    DataSeg32 - NullSeg
+.equ  CODE32_SEL,  CodeSeg32 - NullSeg
+
+ASM_PFX(gcSmiGdtr):
+    .word      GDT_SIZE - 1
+    .quad      NullSeg
+
+ASM_PFX(gcSmiIdtr):
+    .word      IDT_SIZE - 1
+    .quad      _SmiIDT
+
+
+#
+# Here is the IDT. There are 32 (not 255) entries in it since only processor
+# generated exceptions will be handled.
+#
+_SmiIDT:
+# The following segment repeats 32 times:
+# No. 1
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 2
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 3
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 4
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 5
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 6
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 7
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 8
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 9
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 10
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 11
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 12
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 13
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 14
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 15
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 16
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 17
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 18
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 19
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 20
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 21
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 22
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 23
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 24
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 25
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 26
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 27
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 28
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 29
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 30
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 31
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+# No. 32
+    .word 0                             # Offset 0:15
+    .word CODE_SEL
+    .byte 0                             # Unused
+    .byte 0x8e                          # Interrupt Gate, Present
+    .word 0                             # Offset 16:31
+    .quad 0                             # Offset 32:63
+
+_SmiIDTEnd:
+
+.equ  IDT_SIZE, (_SmiIDTEnd - _SmiIDT)
+
+    .text
+
+#------------------------------------------------------------------------------
+# _SmiExceptionEntryPoints is the collection of exception entry points followed
+# by a common exception handler.
+#
+# Stack frame would be as follows as specified in IA32 manuals:
+# +---------------------+ <-- 16-byte aligned ensured by processor
+# +    Old SS           +
+# +---------------------+
+# +    Old RSP          +
+# +---------------------+
+# +    RFlags           +
+# +---------------------+
+# +    CS               +
+# +---------------------+
+# +    RIP              +
+# +---------------------+
+# +    Error Code       +
+# +---------------------+
+# +   Vector Number     +
+# +---------------------+
+# +    RBP              +
+# +---------------------+ <-- RBP, 16-byte aligned
+#
+# RSP set to odd multiple of 8 at @CommonEntryPoint means ErrCode PRESENT
+#------------------------------------------------------------------------------
+ASM_GLOBAL ASM_PFX(PageFaultIdtHandlerSmmProfile)
+ASM_PFX(PageFaultIdtHandlerSmmProfile):
+    pushq   $0x0e                    # Page Fault
+    .byte   0x40, 0xf6, 0xc4, 0x08    #test    spl, 8
+    jnz     L1
+    pushq   (%rsp)
+    movq    $0, 8(%rsp)
+L1:
+    pushq   %rbp
+    movq    %rsp, %rbp
+
+    #
+    # Since here the stack pointer is 16-byte aligned, so
+    # EFI_FX_SAVE_STATE_X64 of EFI_SYSTEM_CONTEXT_x64
+    # is 16-byte aligned
+    #       
+
+## UINT64  Rdi, Rsi, Rbp, Rsp, Rbx, Rdx, Rcx, Rax;
+## UINT64  R8, R9, R10, R11, R12, R13, R14, R15;
+    pushq   %r15
+    pushq   %r14
+    pushq   %r13
+    pushq   %r12
+    pushq   %r11
+    pushq   %r10
+    pushq   %r9
+    pushq   %r8
+    pushq   %rax
+    pushq   %rcx
+    pushq   %rdx
+    pushq   %rbx
+    pushq   48(%rbp)                   # RSP
+    pushq   (%rbp)                     # RBP
+    pushq   %rsi
+    pushq   %rdi
+
+## UINT64  Gs, Fs, Es, Ds, Cs, Ss;  insure high 16 bits of each is zero
+    movzwq 56(%rbp), %rax
+    pushq   %rax                       # for ss
+    movzwq  32(%rbp), %rax
+    pushq   %rax                       # for cs
+    movq    %ds, %rax
+    pushq   %rax
+    movq    %es, %rax
+    pushq   %rax
+    movq    %fs, %rax
+    pushq   %rax
+    movq    %gs, %rax
+    pushq   %rax
+
+## UINT64  Rip;
+    pushq   24(%rbp)
+
+## UINT64  Gdtr[2], Idtr[2];
+    subq    $16, %rsp
+    sidt    (%rsp)
+    subq    $16, %rsp
+    sgdt    (%rsp)
+
+## UINT64  Ldtr, Tr;
+    xorq    %rax, %rax
+    strw    %ax
+    pushq   %rax
+    sldtw   %ax
+    pushq   %rax
+
+## UINT64  RFlags;
+    pushq   40(%rbp)
+
+## UINT64  Cr0, Cr1, Cr2, Cr3, Cr4, Cr8;
+    movq    %cr8, %rax
+    pushq   %rax
+    movq    %cr4, %rax
+    orq     $0x208, %rax
+    movq    %rax, %cr4
+    pushq   %rax
+    movq    %cr3, %rax
+    pushq   %rax
+    movq    %cr2, %rax
+    pushq   %rax
+    xorq    %rax, %rax
+    pushq   %rax
+    movq    %cr0, %rax
+    pushq   %rax
+
+## UINT64  Dr0, Dr1, Dr2, Dr3, Dr6, Dr7;
+    movq    %dr7, %rax
+    pushq   %rax
+    movq    %dr6, %rax
+    pushq   %rax
+    movq    %dr3, %rax
+    pushq   %rax
+    movq    %dr2, %rax
+    pushq   %rax
+    movq    %dr1, %rax
+    pushq   %rax
+    movq    %dr0, %rax
+    pushq   %rax
+
+## FX_SAVE_STATE_X64 FxSaveState;
+
+    subq    $512, %rsp
+    movq    %rsp, %rdi
+    .byte   0xf, 0xae, 0x7                  # fxsave [rdi]
+
+# UEFI calling convention for x64 requires that Direction flag in EFLAGs is 
clear
+    cld
+
+## UINT32  ExceptionData;
+    pushq   16(%rbp)
+
+## call into exception handler
+    movq    8(%rbp), %rcx
+    movabsq $ASM_PFX(SmiPFHandler), %rax
+
+## Prepare parameter and call
+    movq    %rsp, %rdx
+    #
+    # Per X64 calling convention, allocate maximum parameter stack space
+    # and make sure RSP is 16-byte aligned
+    #
+    subq    $4 * 8 + 8, %rsp
+    call    *%rax
+    addq    $4 * 8 + 8, %rsp
+    jmp     L5
+
+L5:
+## UINT64  ExceptionData;
+    addq    $8, %rsp
+
+## FX_SAVE_STATE_X64 FxSaveState;
+
+    movq    %rsp, %rsi
+    .byte   0xf, 0xae, 0xe                 # fxrstor [rsi]
+    addq    $512, %rsp
+
+## UINT64  Dr0, Dr1, Dr2, Dr3, Dr6, Dr7;
+## Skip restoration of DRx registers to support debuggers
+## that set breakpoints in interrupt/exception context
+    addq    $8 * 6, %rsp 
+
+## UINT64  Cr0, Cr1, Cr2, Cr3, Cr4, Cr8;
+    popq    %rax
+    movq    %rax, %cr0
+    addq    $8, %rsp                      # not for Cr1
+    popq    %rax
+    movq    %rax, %cr2
+    popq    %rax
+    movq    %rax, %cr3
+    popq    %rax
+    movq    %rax, %cr4
+    popq    %rax
+    movq    %rax, %cr8
+
+## UINT64  RFlags;
+    popq    40(%rbp)
+
+## UINT64  Ldtr, Tr;
+## UINT64  Gdtr[2], Idtr[2];
+## Best not let anyone mess with these particular registers...
+    addq    $48, %rsp
+
+## UINT64  Rip;
+    popq    24(%rbp)
+
+## UINT64  Gs, Fs, Es, Ds, Cs, Ss;
+    popq    %rax
+    # mov   gs, rax ; not for gs
+    popq    %rax
+    # mov   fs, rax ; not for fs
+    # (X64 will not use fs and gs, so we do not restore it)
+    popq    %rax
+    movq    %rax, %es
+    popq    %rax
+    movq    %rax, %ds
+    popq    32(%rbp)                      # for cs
+    popq    56(%rbp)                      # for ss
+
+## UINT64  Rdi, Rsi, Rbp, Rsp, Rbx, Rdx, Rcx, Rax;
+## UINT64  R8, R9, R10, R11, R12, R13, R14, R15;
+    popq    %rdi
+    popq    %rsi
+    addq    $8, %rsp                      # not for rbp
+    popq    48(%rbp)                      # for rsp
+    popq    %rbx
+    popq    %rdx
+    popq    %rcx
+    popq    %rax
+    popq    %r8
+    popq    %r9
+    popq    %r10
+    popq    %r11
+    popq    %r12
+    popq    %r13
+    popq    %r14
+    popq    %r15
+
+    movq    %rbp, %rsp
+
+# Enable TF bit after page fault handler runs
+    btsl    $8, 40(%rsp)                 #RFLAGS
+
+    popq    %rbp
+    addq    $16, %rsp                    # skip INT# & ErrCode
+    iretq
+
+ASM_GLOBAL ASM_PFX(InitializeIDTSmmStackGuard)
+ASM_PFX(InitializeIDTSmmStackGuard):
+# If SMM Stack Guard feature is enabled, set the IST field of
+# the interrupt gate for Page Fault Exception to be 1
+#
+    movabsq  $_SmiIDT + 14 * 16, %rax
+    movb     $1, 4(%rax)
+    ret
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiException.asm 
b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiException.asm
new file mode 100644
index 0000000..e6f0f45
--- /dev/null
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmiException.asm
@@ -0,0 +1,413 @@
+;------------------------------------------------------------------------------
 ;
+; Copyright (c) 2009 - 2015, Intel Corporation. All rights reserved.<BR>
+; This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD 
License
+; which accompanies this distribution.  The full text of the license may be 
found at
+; http://opensource.org/licenses/bsd-license.php.
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+;   SmiException.asm
+;
+; Abstract:
+;
+;   Exception handlers used in SM mode
+;
+;-------------------------------------------------------------------------------
+
+EXTERNDEF   SmiPFHandler:PROC
+EXTERNDEF   gSmiMtrrs:QWORD
+EXTERNDEF   gcSmiIdtr:FWORD
+EXTERNDEF   gcSmiGdtr:FWORD
+EXTERNDEF   gcPsd:BYTE
+
+    .const
+
+NullSeg     DQ      0                   ; reserved by architecture
+CodeSeg32   LABEL   QWORD
+            DW      -1                  ; LimitLow
+            DW      0                   ; BaseLow
+            DB      0                   ; BaseMid
+            DB      9bh
+            DB      0cfh                ; LimitHigh
+            DB      0                   ; BaseHigh
+ProtModeCodeSeg32   LABEL   QWORD
+            DW      -1                  ; LimitLow
+            DW      0                   ; BaseLow
+            DB      0                   ; BaseMid
+            DB      9bh
+            DB      0cfh                ; LimitHigh
+            DB      0                   ; BaseHigh
+ProtModeSsSeg32     LABEL   QWORD
+            DW      -1                  ; LimitLow
+            DW      0                   ; BaseLow
+            DB      0                   ; BaseMid
+            DB      93h
+            DB      0cfh                ; LimitHigh
+            DB      0                   ; BaseHigh
+DataSeg32   LABEL   QWORD
+            DW      -1                  ; LimitLow
+            DW      0                   ; BaseLow
+            DB      0                   ; BaseMid
+            DB      93h
+            DB      0cfh                ; LimitHigh
+            DB      0                   ; BaseHigh
+CodeSeg16   LABEL   QWORD
+            DW      -1
+            DW      0
+            DB      0
+            DB      9bh
+            DB      8fh
+            DB      0
+DataSeg16   LABEL   QWORD
+            DW      -1
+            DW      0
+            DB      0
+            DB      93h
+            DB      8fh
+            DB      0
+CodeSeg64   LABEL   QWORD
+            DW      -1                  ; LimitLow
+            DW      0                   ; BaseLow
+            DB      0                   ; BaseMid
+            DB      9bh
+            DB      0afh                ; LimitHigh
+            DB      0                   ; BaseHigh
+; TSS Segment for X64 specially
+TssSeg      LABEL   QWORD
+            DW      TSS_DESC_SIZE       ; LimitLow
+            DW      0                   ; BaseLow
+            DB      0                   ; BaseMid
+            DB      89h
+            DB      080h                ; LimitHigh
+            DB      0                   ; BaseHigh
+            DD      0                   ; BaseUpper
+            DD      0                   ; Reserved
+GDT_SIZE = $ - offset NullSeg
+
+; Create TSS Descriptor just after GDT
+TssDescriptor LABEL BYTE
+            DD      0                   ; Reserved
+            DQ      0                   ; RSP0
+            DQ      0                   ; RSP1
+            DQ      0                   ; RSP2
+            DD      0                   ; Reserved
+            DD      0                   ; Reserved
+            DQ      0                   ; IST1
+            DQ      0                   ; IST2
+            DQ      0                   ; IST3
+            DQ      0                   ; IST4
+            DQ      0                   ; IST5
+            DQ      0                   ; IST6
+            DQ      0                   ; IST7
+            DD      0                   ; Reserved
+            DD      0                   ; Reserved
+            DW      0                   ; Reserved
+            DW      0                   ; I/O Map Base Address
+TSS_DESC_SIZE = $ - offset TssDescriptor
+
+;
+; This structure serves as a template for all processors.
+;
+gcPsd     LABEL   BYTE
+            DB      'PSDSIG  '
+            DW      PSD_SIZE
+            DW      2
+            DW      1 SHL 2
+            DW      CODE_SEL
+            DW      DATA_SEL
+            DW      DATA_SEL
+            DW      DATA_SEL
+            DW      0
+            DQ      0
+            DQ      0
+            DQ      0                   ; fixed in InitializeMpServiceData()
+            DQ      offset NullSeg
+            DD      GDT_SIZE
+            DD      0
+            DB      24 dup (0)
+            DQ      offset gSmiMtrrs
+PSD_SIZE  = $ - offset gcPsd
+
+;
+; CODE & DATA segments for SMM runtime
+;
+CODE_SEL    = offset CodeSeg64 - offset NullSeg
+DATA_SEL    = offset DataSeg32 - offset NullSeg
+CODE32_SEL  = offset CodeSeg32 - offset NullSeg
+
+gcSmiGdtr   LABEL   FWORD
+    DW      GDT_SIZE - 1
+    DQ      offset NullSeg
+
+gcSmiIdtr   LABEL   FWORD
+    DW      IDT_SIZE - 1
+    DQ      offset _SmiIDT
+
+    .data
+
+;
+; Here is the IDT. There are 32 (not 255) entries in it since only processor
+; generated exceptions will be handled.
+;
+_SmiIDT:
+REPEAT      32
+    DW      0                           ; Offset 0:15
+    DW      CODE_SEL                    ; Segment selector
+    DB      0                           ; Unused
+    DB      8eh                         ; Interrupt Gate, Present
+    DW      0                           ; Offset 16:31
+    DQ      0                           ; Offset 32:63
+            ENDM
+_SmiIDTEnd:
+
+IDT_SIZE = (offset _SmiIDTEnd - offset _SmiIDT)
+
+    .code
+
+;------------------------------------------------------------------------------
+; _SmiExceptionEntryPoints is the collection of exception entry points followed
+; by a common exception handler.
+;
+; Stack frame would be as follows as specified in IA32 manuals:
+;
+; +---------------------+ <-- 16-byte aligned ensured by processor
+; +    Old SS           +
+; +---------------------+
+; +    Old RSP          +
+; +---------------------+
+; +    RFlags           +
+; +---------------------+
+; +    CS               +
+; +---------------------+
+; +    RIP              +
+; +---------------------+
+; +    Error Code       +
+; +---------------------+
+; +   Vector Number     +
+; +---------------------+
+; +    RBP              +
+; +---------------------+ <-- RBP, 16-byte aligned
+;
+; RSP set to odd multiple of 8 at @CommonEntryPoint means ErrCode PRESENT
+;------------------------------------------------------------------------------
+PageFaultIdtHandlerSmmProfile    PROC
+    push    0eh                         ; Page Fault
+    test    spl, 8                      ; odd multiple of 8 => ErrCode present
+    jnz     @F
+    push    [rsp]                       ; duplicate INT# if no ErrCode
+    mov     qword ptr [rsp + 8], 0
+@@:
+    push    rbp
+    mov     rbp, rsp
+
+    ;
+    ; Since here the stack pointer is 16-byte aligned, so
+    ; EFI_FX_SAVE_STATE_X64 of EFI_SYSTEM_CONTEXT_x64
+    ; is 16-byte aligned
+    ;       
+
+;; UINT64  Rdi, Rsi, Rbp, Rsp, Rbx, Rdx, Rcx, Rax;
+;; UINT64  R8, R9, R10, R11, R12, R13, R14, R15;
+    push    r15
+    push    r14
+    push    r13
+    push    r12
+    push    r11
+    push    r10
+    push    r9
+    push    r8
+    push    rax
+    push    rcx
+    push    rdx
+    push    rbx
+    push    qword ptr [rbp + 48]  ; RSP
+    push    qword ptr [rbp]       ; RBP
+    push    rsi
+    push    rdi
+
+;; UINT64  Gs, Fs, Es, Ds, Cs, Ss;  insure high 16 bits of each is zero
+    movzx   rax, word ptr [rbp + 56]
+    push    rax                      ; for ss
+    movzx   rax, word ptr [rbp + 32]
+    push    rax                      ; for cs
+    mov     rax, ds
+    push    rax
+    mov     rax, es
+    push    rax
+    mov     rax, fs
+    push    rax
+    mov     rax, gs
+    push    rax
+
+;; UINT64  Rip;
+    push    qword ptr [rbp + 24]
+
+;; UINT64  Gdtr[2], Idtr[2];
+    sub     rsp, 16
+    sidt    fword ptr [rsp]
+    sub     rsp, 16
+    sgdt    fword ptr [rsp]
+
+;; UINT64  Ldtr, Tr;
+    xor     rax, rax
+    str     ax
+    push    rax
+    sldt    ax
+    push    rax
+
+;; UINT64  RFlags;
+    push    qword ptr [rbp + 40]
+
+;; UINT64  Cr0, Cr1, Cr2, Cr3, Cr4, Cr8;
+    mov     rax, cr8
+    push    rax
+    mov     rax, cr4
+    or      rax, 208h
+    mov     cr4, rax
+    push    rax
+    mov     rax, cr3
+    push    rax
+    mov     rax, cr2
+    push    rax
+    xor     rax, rax
+    push    rax
+    mov     rax, cr0
+    push    rax
+
+;; UINT64  Dr0, Dr1, Dr2, Dr3, Dr6, Dr7;
+    mov     rax, dr7
+    push    rax
+    mov     rax, dr6
+    push    rax
+    mov     rax, dr3
+    push    rax
+    mov     rax, dr2
+    push    rax
+    mov     rax, dr1
+    push    rax
+    mov     rax, dr0
+    push    rax
+
+;; FX_SAVE_STATE_X64 FxSaveState;
+
+    sub rsp, 512
+    mov rdi, rsp
+    db 0fh, 0aeh, 00000111y ;fxsave [rdi]
+
+; UEFI calling convention for x64 requires that Direction flag in EFLAGs is 
clear
+    cld
+
+;; UINT32  ExceptionData;
+    push    qword ptr [rbp + 16]
+
+;; call into exception handler
+    mov     rcx, [rbp + 8]
+    mov     rax, SmiPFHandler
+
+;; Prepare parameter and call
+    mov     rdx, rsp
+    ;
+    ; Per X64 calling convention, allocate maximum parameter stack space
+    ; and make sure RSP is 16-byte aligned
+    ;
+    sub     rsp, 4 * 8 + 8
+    call    rax
+    add     rsp, 4 * 8 + 8
+    jmp     @F
+
+@@:
+;; UINT64  ExceptionData;
+    add     rsp, 8
+
+;; FX_SAVE_STATE_X64 FxSaveState;
+
+    mov rsi, rsp
+    db 0fh, 0aeh, 00001110y ; fxrstor [rsi]
+    add rsp, 512
+
+;; UINT64  Dr0, Dr1, Dr2, Dr3, Dr6, Dr7;
+;; Skip restoration of DRx registers to support debuggers
+;; that set breakpoints in interrupt/exception context
+  add     rsp, 8 * 6
+
+;; UINT64  Cr0, Cr1, Cr2, Cr3, Cr4, Cr8;
+    pop     rax
+    mov     cr0, rax
+    add     rsp, 8   ; not for Cr1
+    pop     rax
+    mov     cr2, rax
+    pop     rax
+    mov     cr3, rax
+    pop     rax
+    mov     cr4, rax
+    pop     rax
+    mov     cr8, rax
+
+;; UINT64  RFlags;
+    pop     qword ptr [rbp + 40]
+
+;; UINT64  Ldtr, Tr;
+;; UINT64  Gdtr[2], Idtr[2];
+;; Best not let anyone mess with these particular registers...
+    add     rsp, 48
+
+;; UINT64  Rip;
+    pop     qword ptr [rbp + 24]
+
+;; UINT64  Gs, Fs, Es, Ds, Cs, Ss;
+    pop     rax
+    ; mov     gs, rax ; not for gs
+    pop     rax
+    ; mov     fs, rax ; not for fs
+    ; (X64 will not use fs and gs, so we do not restore it)
+    pop     rax
+    mov     es, rax
+    pop     rax
+    mov     ds, rax
+    pop     qword ptr [rbp + 32]  ; for cs
+    pop     qword ptr [rbp + 56]  ; for ss
+
+;; UINT64  Rdi, Rsi, Rbp, Rsp, Rbx, Rdx, Rcx, Rax;
+;; UINT64  R8, R9, R10, R11, R12, R13, R14, R15;
+    pop     rdi
+    pop     rsi
+    add     rsp, 8               ; not for rbp
+    pop     qword ptr [rbp + 48] ; for rsp
+    pop     rbx
+    pop     rdx
+    pop     rcx
+    pop     rax
+    pop     r8
+    pop     r9
+    pop     r10
+    pop     r11
+    pop     r12
+    pop     r13
+    pop     r14
+    pop     r15
+
+    mov     rsp, rbp
+
+; Enable TF bit after page fault handler runs
+    bts     dword ptr [rsp + 40], 8  ;RFLAGS
+
+    pop     rbp
+    add     rsp, 16           ; skip INT# & ErrCode
+    iretq
+PageFaultIdtHandlerSmmProfile ENDP
+
+InitializeIDTSmmStackGuard   PROC
+;
+; If SMM Stack Guard feature is enabled, set the IST field of
+; the interrupt gate for Page Fault Exception to be 1
+;
+    lea     rax, _SmiIDT + 14 * 16
+    mov     byte ptr [rax + 4], 1
+    ret
+InitializeIDTSmmStackGuard   ENDP
+
+    END
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmInit.S 
b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmInit.S
new file mode 100644
index 0000000..5ace1a6
--- /dev/null
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmInit.S
@@ -0,0 +1,141 @@
+#------------------------------------------------------------------------------
+#
+# Copyright (c) 2009 - 2015, Intel Corporation. All rights reserved.<BR>
+# This program and the accompanying materials
+# are licensed and made available under the terms and conditions of the BSD 
License
+# which accompanies this distribution.  The full text of the license may be 
found at
+# http://opensource.org/licenses/bsd-license.php.
+#
+# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+#
+# Module Name:
+#
+#   SmmInit.S
+#
+# Abstract:
+#
+#   Functions for relocating SMBASE's for all processors
+#
+#------------------------------------------------------------------------------
+
+ASM_GLOBAL   ASM_PFX(gSmmCr0)
+ASM_GLOBAL   ASM_PFX(gSmmCr3)
+ASM_GLOBAL   ASM_PFX(gSmmCr4)
+ASM_GLOBAL   ASM_PFX(gSmmJmpAddr)
+ASM_GLOBAL   ASM_PFX(gcSmmInitTemplate)
+ASM_GLOBAL   ASM_PFX(gcSmmInitSize)
+ASM_GLOBAL   ASM_PFX(mRebasedFlagAddr32)
+ASM_GLOBAL   ASM_PFX(SmmRelocationSemaphoreComplete)
+ASM_GLOBAL   ASM_PFX(SmmRelocationSemaphoreComplete32)
+ASM_GLOBAL   ASM_PFX(mSmmRelocationOriginalAddressPtr32)
+ASM_GLOBAL   ASM_PFX(gSmmInitStack)
+ASM_GLOBAL   ASM_PFX(gcSmiInitGdtr)
+
+
+    .text
+
+ASM_PFX(gcSmiInitGdtr):
+            .word      0
+            .quad      0
+
+SmmStartup:
+    .byte 0x66,0xb8                     # mov eax, imm32
+ASM_PFX(gSmmCr3):    .space     4
+    movq    %rax, %cr3
+    .byte 0x66,0x2e
+    lgdt    (ASM_PFX(gcSmiInitGdtr) - SmmStartup)(%ebp)
+    .byte 0x66,0xb8                     # mov eax, imm32
+ASM_PFX(gSmmCr4):    .space     4
+    orb     $2, %ah                     # enable XMM registers access
+    movq    %rax, %cr4
+    .byte 0x66
+    movl    $0xc0000080,%ecx            # IA32_EFER MSR
+    rdmsr
+    orb     $1,%ah                      # set LME bit
+    wrmsr
+    .byte 0x66,0xb8                     # mov eax, imm32
+ASM_PFX(gSmmCr0):    .space     4
+    movq    %rax, %cr0
+    .byte 0x66,0xea                     # far jmp to long mode
+ASM_PFX(gSmmJmpAddr): .quad      LongMode
+LongMode:                               # long-mode starts here
+    .byte 0x48,0xbc                     # mov rsp, imm64
+ASM_PFX(gSmmInitStack):  .space  8
+    andw  $0xfff0, %sp                  # make sure RSP is 16-byte aligned
+    #
+    # Accoring to X64 calling convention, XMM0~5 are volatile, we need to save
+    # them before calling C-function.
+    #
+    subq     $0x60, %rsp
+    movdqa   %xmm0, 0x0(%rsp) 
+    movdqa   %xmm1, 0x10(%rsp) 
+    movdqa   %xmm2, 0x20(%rsp) 
+    movdqa   %xmm3, 0x30(%rsp) 
+    movdqa   %xmm4, 0x40(%rsp) 
+    movdqa   %xmm5, 0x50(%rsp) 
+
+
+    addq  $-0x20, %rsp
+    call  ASM_PFX(SmmInitHandler)
+    addq  $0x20, %rsp
+    #
+    # Restore XMM0~5 after calling C-function.
+    #
+    movdqa  0x0(%rsp), %xmm0
+    movdqa  0x10(%rsp), %xmm1
+    movdqa  0x20(%rsp), %xmm2
+    movdqa  0x30(%rsp), %xmm3
+    movdqa  0x40(%rsp), %xmm4
+    movdqa  0x50(%rsp), %xmm5
+
+    rsm
+
+ASM_PFX(gcSmmInitTemplate):
+
+_SmmInitTemplate:
+    .byte 0x66,0x2e,0x8b,0x2e           # mov ebp, cs:[@F]
+    .word L1 - _SmmInitTemplate + 0x8000
+    .byte 0x66, 0x81, 0xed, 0, 0, 3, 0  # sub ebp, 0x30000
+    jmp     *%bp                        # jmp ebp actually
+L1:
+    .quad      SmmStartup
+
+ASM_PFX(gcSmmInitSize):   .word  . - ASM_PFX(gcSmmInitTemplate)
+
+ASM_PFX(SmmRelocationSemaphoreComplete):
+    # Create a simple stack frame to store RAX and the original RSM location
+    pushq   %rax  # Used to store return address
+    pushq   %rax
+
+    # Load the original RSM location onto stack
+    movabsq $ASM_PFX(mSmmRelocationOriginalAddress), %rax
+    movq    (%rax), %rax
+    movq    %rax, 0x08(%rsp)
+
+    # Update rebase flag
+    movabsq $ASM_PFX(mRebasedFlag), %rax
+    movq    (%rax), %rax
+    movb    $1, (%rax)
+
+    #restore RAX and return to original RSM location
+    popq    %rax
+    retq
+
+#
+# Semaphore code running in 32-bit mode
+#
+ASM_PFX(SmmRelocationSemaphoreComplete32):
+    #
+    # movb $1, ()
+    #
+    .byte   0xc6, 0x05
+ASM_PFX(mRebasedFlagAddr32):
+    .long   0
+    .byte   1
+    #
+    # jmpd ()
+    #
+    .byte   0xff, 0x25
+ASM_PFX(mSmmRelocationOriginalAddressPtr32):
+    .long   0
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmInit.asm 
b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmInit.asm
new file mode 100644
index 0000000..25a0447
--- /dev/null
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmInit.asm
@@ -0,0 +1,132 @@
+;------------------------------------------------------------------------------
 ;
+; Copyright (c) 2009 - 2015, Intel Corporation. All rights reserved.<BR>
+; This program and the accompanying materials
+; are licensed and made available under the terms and conditions of the BSD 
License
+; which accompanies this distribution.  The full text of the license may be 
found at
+; http://opensource.org/licenses/bsd-license.php.
+;
+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+;
+; Module Name:
+;
+;   SmmInit.Asm
+;
+; Abstract:
+;
+;   Functions for relocating SMBASE's for all processors
+;
+;-------------------------------------------------------------------------------
+
+EXTERNDEF   SmmInitHandler:PROC
+EXTERNDEF   gSmmCr0:DWORD
+EXTERNDEF   gSmmCr3:DWORD
+EXTERNDEF   gSmmCr4:DWORD
+EXTERNDEF   gSmmJmpAddr:QWORD
+EXTERNDEF   gcSmmInitTemplate:BYTE
+EXTERNDEF   gcSmmInitSize:WORD
+EXTERNDEF   mRebasedFlag:PTR BYTE
+EXTERNDEF   mSmmRelocationOriginalAddress:QWORD
+EXTERNDEF   mRebasedFlagAddr32:DWORD
+EXTERNDEF   mSmmRelocationOriginalAddressPtr32:DWORD
+EXTERNDEF   gSmmInitStack:QWORD
+EXTERNDEF   gcSmiInitGdtr:FWORD
+
+    .code
+
+gcSmiInitGdtr   LABEL   FWORD
+            DW      0
+            DQ      0
+
+SmmStartup  PROC
+    DB      66h, 0b8h                   ; mov eax, imm32
+gSmmCr3     DD      ?
+    mov     cr3, rax
+    DB      66h, 2eh
+    lgdt    fword ptr [ebp + (offset gcSmiInitGdtr - SmmStartup)]
+    DB      66h, 0b8h                   ; mov eax, imm32
+gSmmCr4     DD      ?
+    or      ah,  2                      ; enable XMM registers access
+    mov     cr4, rax
+    DB      66h
+    mov     ecx, 0c0000080h             ; IA32_EFER MSR
+    rdmsr
+    or      ah, 1                       ; set LME bit
+    wrmsr
+    DB      66h, 0b8h                   ; mov eax, imm32
+gSmmCr0     DD      ?
+    mov     cr0, rax                    ; enable protected mode & paging
+    DB      66h, 0eah                   ; far jmp to long mode
+gSmmJmpAddr DQ      @LongMode
+@LongMode:                              ; long-mode starts here
+    DB      48h, 0bch                   ; mov rsp, imm64
+gSmmInitStack   DQ      ?
+    and     sp, 0fff0h                  ; make sure RSP is 16-byte aligned
+    ;
+    ; Accoring to X64 calling convention, XMM0~5 are volatile, we need to save
+    ; them before calling C-function.
+    ;
+    sub     rsp, 60h
+    movdqa  [rsp], xmm0
+    movdqa  [rsp + 10h], xmm1
+    movdqa  [rsp + 20h], xmm2
+    movdqa  [rsp + 30h], xmm3
+    movdqa  [rsp + 40h], xmm4
+    movdqa  [rsp + 50h], xmm5
+
+    add     rsp, -20h   
+    call    SmmInitHandler
+    add     rsp, 20h
+
+    ;
+    ; Restore XMM0~5 after calling C-function.
+    ;
+    movdqa  xmm0, [rsp]
+    movdqa  xmm1, [rsp + 10h]
+    movdqa  xmm2, [rsp + 20h]
+    movdqa  xmm3, [rsp + 30h]
+    movdqa  xmm4, [rsp + 40h]
+    movdqa  xmm5, [rsp + 50h]    
+
+    rsm
+SmmStartup  ENDP
+
+gcSmmInitTemplate   LABEL   BYTE
+
+_SmmInitTemplate    PROC
+    DB      66h, 2eh, 8bh, 2eh          ; mov ebp, cs:[@F]
+    DW      @L1 - _SmmInitTemplate + 8000h
+    DB      66h, 81h, 0edh, 00h, 00h, 03h, 00  ; sub ebp, 30000h
+    jmp     bp                          ; jmp ebp actually
+@L1:
+    DQ      SmmStartup
+_SmmInitTemplate    ENDP
+
+gcSmmInitSize   DW  $ - gcSmmInitTemplate
+
+SmmRelocationSemaphoreComplete PROC
+    push    rax
+    mov     rax, mRebasedFlag
+    mov     byte ptr [rax], 1
+    pop     rax
+    jmp     [mSmmRelocationOriginalAddress]
+SmmRelocationSemaphoreComplete ENDP
+
+;
+; Semaphore code running in 32-bit mode
+;
+SmmRelocationSemaphoreComplete32 PROC
+    ;
+    ; mov byte ptr [], 1
+    ;
+    db      0c6h, 05h
+mRebasedFlagAddr32    dd      0
+    db      1
+    ;
+    ; jmp dword ptr []
+    ;
+    db      0ffh, 25h
+mSmmRelocationOriginalAddressPtr32    dd      0
+SmmRelocationSemaphoreComplete32 ENDP
+
+    END
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmProfileArch.c 
b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmProfileArch.c
new file mode 100644
index 0000000..81faf48
--- /dev/null
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmProfileArch.c
@@ -0,0 +1,316 @@
+/** @file
+X64 processor specific functions to enable SMM profile.
+
+Copyright (c) 2012 - 2015, Intel Corporation. All rights reserved.<BR>
+This program and the accompanying materials
+are licensed and made available under the terms and conditions of the BSD 
License
+which accompanies this distribution.  The full text of the license may be 
found at
+http://opensource.org/licenses/bsd-license.php
+
+THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+
+**/
+
+#include "PiSmmCpuDxeSmm.h"
+#include "SmmProfileInternal.h"
+
+//
+// Current page index.
+//
+UINTN                     mPFPageIndex;
+
+//
+// Pool for dynamically creating page table in page fault handler.
+//
+UINT64                    mPFPageBuffer;
+
+//
+// Store the uplink information for each page being used.
+//
+UINT64                    *mPFPageUplink[MAX_PF_PAGE_COUNT];
+
+/**
+  Create SMM page table for S3 path.
+  
+**/
+VOID
+InitSmmS3Cr3 (
+  VOID
+  )
+{
+  EFI_PHYSICAL_ADDRESS              Pages;
+  UINT64                            *PTEntry;
+
+  //
+  // Generate PAE page table for the first 4GB memory space
+  //
+  Pages = Gen4GPageTable (1);
+
+  //
+  // Fill Page-Table-Level4 (PML4) entry
+  //
+  PTEntry = (UINT64*)(UINTN)(Pages - EFI_PAGES_TO_SIZE (1));
+  *PTEntry = Pages + IA32_PG_P;
+  ZeroMem (PTEntry + 1, EFI_PAGE_SIZE - sizeof (*PTEntry));
+
+  //
+  // Return the address of PML4 (to set CR3)
+  //
+  mSmmS3ResumeState->SmmS3Cr3 = (UINT32)(UINTN)PTEntry;
+
+  return ;
+}
+
+/**
+  Allocate pages for creating 4KB-page based on 2MB-page when page fault 
happens.
+  
+**/
+VOID
+InitPagesForPFHandler (
+  VOID
+  )
+{
+  VOID          *Address;
+  
+  //
+  // Pre-Allocate memory for page fault handler
+  //
+  Address = NULL;
+  Address = AllocatePages (MAX_PF_PAGE_COUNT);
+  ASSERT_EFI_ERROR (Address != NULL);
+
+  mPFPageBuffer =  (UINT64)(UINTN) Address;
+  mPFPageIndex = 0; 
+  ZeroMem ((VOID *) (UINTN) mPFPageBuffer, EFI_PAGE_SIZE * MAX_PF_PAGE_COUNT);
+  ZeroMem (mPFPageUplink, sizeof (mPFPageUplink));
+  
+  return;
+}
+
+/**
+  Allocate one page for creating 4KB-page based on 2MB-page. 
+  
+  @param  Uplink   The address of Page-Directory entry.
+  
+**/
+VOID
+AcquirePage (
+  UINT64          *Uplink
+  )
+{
+  UINT64          Address;
+
+  //
+  // Get the buffer
+  //
+  Address = mPFPageBuffer + EFI_PAGES_TO_SIZE (mPFPageIndex);
+  ZeroMem ((VOID *) (UINTN) Address, EFI_PAGE_SIZE);
+
+  //
+  // Cut the previous uplink if it exists and wasn't overwritten
+  //
+  if ((mPFPageUplink[mPFPageIndex] != NULL) && ((*mPFPageUplink[mPFPageIndex] 
& PHYSICAL_ADDRESS_MASK) == Address)) {
+    *mPFPageUplink[mPFPageIndex] = 0;
+  }
+
+  //
+  // Link & Record the current uplink
+  //
+  *Uplink = Address | IA32_PG_P | IA32_PG_RW;
+  mPFPageUplink[mPFPageIndex] = Uplink;
+
+  mPFPageIndex = (mPFPageIndex + 1) % MAX_PF_PAGE_COUNT;
+}
+
+/**
+  Update page table to map the memory correctly in order to make the 
instruction 
+  which caused page fault execute successfully. And it also save the original 
page
+  table to be restored in single-step exception.
+
+  @param  PageTable           PageTable Address.
+  @param  PFAddress           The memory address which caused page fault 
exception.
+  @param  CpuIndex            The index of the processor.
+  @param  ErrorCode           The Error code of exception.
+  @param  IsValidPFAddress    The flag indicates if SMM profile data need be 
added.
+
+**/
+VOID
+RestorePageTableAbove4G (
+  UINT64        *PageTable,
+  UINT64        PFAddress,
+  UINTN         CpuIndex,
+  UINTN         ErrorCode,
+  BOOLEAN       *IsValidPFAddress
+  )
+{
+  UINTN         PTIndex;
+  UINT64        Address;  
+  BOOLEAN       Nx;
+  BOOLEAN       Existed;
+  UINTN         Index;
+  UINTN         PFIndex;
+
+  ASSERT ((PageTable != NULL) && (IsValidPFAddress != NULL));
+
+  //
+  // If page fault address is 4GB above.
+  //
+  
+  //
+  // Check if page fault address has existed in page table.
+  // If it exists in page table but page fault is generated,
+  // there are 2 possible reasons: 1. present flag is set to 0; 2. instruction 
fetch in protected memory range.
+  //
+  Existed = FALSE;
+  PageTable = (UINT64*)(AsmReadCr3 () & PHYSICAL_ADDRESS_MASK);
+  PTIndex = BitFieldRead64 (PFAddress, 39, 47);
+  if ((PageTable[PTIndex] & IA32_PG_P) != 0) {
+    // PML4E
+    PageTable = (UINT64*)(UINTN)(PageTable[PTIndex] & PHYSICAL_ADDRESS_MASK);
+    PTIndex = BitFieldRead64 (PFAddress, 30, 38);
+    if ((PageTable[PTIndex] & IA32_PG_P) != 0) {
+      // PDPTE
+      PageTable = (UINT64*)(UINTN)(PageTable[PTIndex] & PHYSICAL_ADDRESS_MASK);
+      PTIndex = BitFieldRead64 (PFAddress, 21, 29);
+      // PD
+      if ((PageTable[PTIndex] & IA32_PG_PS) != 0) {
+        //
+        // 2MB page
+        //
+        Address = (UINT64)(PageTable[PTIndex] & PHYSICAL_ADDRESS_MASK);
+        if ((Address & PHYSICAL_ADDRESS_MASK & ~((1ull << 21) - 1)) == 
((PFAddress & PHYSICAL_ADDRESS_MASK & ~((1ull << 21) - 1)))) {
+          Existed = TRUE;
+        }
+      } else {
+        //
+        // 4KB page
+        //
+        PageTable = (UINT64*)(UINTN)(PageTable[PTIndex] & 
PHYSICAL_ADDRESS_MASK);
+        if (PageTable != 0) {
+          //
+          // When there is a valid entry to map to 4KB page, need not create a 
new entry to map 2MB.
+          //
+          PTIndex = BitFieldRead64 (PFAddress, 12, 20);
+          Address = (UINT64)(PageTable[PTIndex] & PHYSICAL_ADDRESS_MASK);
+          if ((Address & PHYSICAL_ADDRESS_MASK & ~((1ull << 12) - 1)) == 
(PFAddress & PHYSICAL_ADDRESS_MASK & ~((1ull << 12) - 1))) {
+            Existed = TRUE;
+          }
+        }
+      }
+    }
+  }
+  
+  //
+  // If page entry does not existed in page table at all, create a new entry.
+  //
+  if (!Existed) { 
+
+    if (IsAddressValid (PFAddress, &Nx)) {
+      //
+      // If page fault address above 4GB is in protected range but it causes a 
page fault exception,
+      // Will create a page entry for this page fault address, make page table 
entry as present/rw and execution-disable.
+      // this access is not saved into SMM profile data.
+      //
+      *IsValidPFAddress = TRUE;
+    }      
+    
+    //
+    // Create one entry in page table for page fault address.
+    //
+    SmiDefaultPFHandler ();
+    //
+    // Find the page table entry created just now.
+    //
+    PageTable = (UINT64*)(AsmReadCr3 () & PHYSICAL_ADDRESS_MASK);
+    PFAddress = AsmReadCr2 ();
+    // PML4E  
+    PTIndex = BitFieldRead64 (PFAddress, 39, 47);
+    PageTable = (UINT64*)(UINTN)(PageTable[PTIndex] & PHYSICAL_ADDRESS_MASK);
+    // PDPTE
+    PTIndex = BitFieldRead64 (PFAddress, 30, 38);
+    PageTable = (UINT64*)(UINTN)(PageTable[PTIndex] & PHYSICAL_ADDRESS_MASK);  
  
+    // PD
+    PTIndex = BitFieldRead64 (PFAddress, 21, 29);
+    Address = PageTable[PTIndex] & PHYSICAL_ADDRESS_MASK;
+    //
+    // Check if 2MB-page entry need be changed to 4KB-page entry.
+    //
+    if (IsAddressSplit (Address)) {
+      AcquirePage (&PageTable[PTIndex]);
+
+      // PTE
+      PageTable = (UINT64*)(UINTN)(PageTable[PTIndex] & PHYSICAL_ADDRESS_MASK);
+      for (Index = 0; Index < 512; Index++) {
+        PageTable[Index] = Address | IA32_PG_RW | IA32_PG_P;
+        if (!IsAddressValid (Address, &Nx)) {
+          PageTable[Index] = PageTable[Index] & (INTN)(INT32)(~(IA32_PG_RW | 
IA32_PG_P));
+        }
+        if (Nx && mXdSupported) {
+          PageTable[Index] = PageTable[Index] | IA32_PG_NX;
+        }
+        if (Address == (PFAddress & PHYSICAL_ADDRESS_MASK & ~((1ull << 12) - 
1))) {
+          PTIndex = Index;
+        }
+        Address += SIZE_4KB;
+      } // end for PT
+    } else {
+      //
+      // Update 2MB page entry.
+      //
+      if (!IsAddressValid (Address, &Nx)) {
+        //
+        // Patch to remove present flag and rw flag.
+        //
+        PageTable[PTIndex] = PageTable[PTIndex] & (INTN)(INT32)(~(IA32_PG_RW | 
IA32_PG_P));
+      }
+      //
+      // Set XD bit to 1
+      //
+      if (Nx && mXdSupported) {
+        PageTable[PTIndex] = PageTable[PTIndex] | IA32_PG_NX;
+      }    
+    }
+  }
+
+  //
+  // Record old entries with non-present status
+  // Old entries include the memory which instruction is at and the memory 
which instruction access.
+  // 
+  //
+  ASSERT (mPFEntryCount[CpuIndex] < MAX_PF_ENTRY_COUNT);
+  if (mPFEntryCount[CpuIndex] < MAX_PF_ENTRY_COUNT) {
+    PFIndex = mPFEntryCount[CpuIndex];
+    mLastPFEntryValue[CpuIndex][PFIndex]   = PageTable[PTIndex];
+    mLastPFEntryPointer[CpuIndex][PFIndex] = &PageTable[PTIndex];
+    mPFEntryCount[CpuIndex]++;
+  }
+                 
+  //
+  // Add present flag or clear XD flag to make page fault handler succeed.
+  //
+  PageTable[PTIndex] |= (UINT64)(IA32_PG_RW | IA32_PG_P);
+  if ((ErrorCode & IA32_PF_EC_ID) != 0) {
+    //
+    // If page fault is caused by instruction fetch, clear XD bit in the entry.
+    //
+    PageTable[PTIndex] &= ~IA32_PG_NX;
+  }
+  
+  return;
+} 
+
+/**
+  Clear TF in FLAGS.
+
+  @param  SystemContext    A pointer to the processor context when
+                           the interrupt occurred on the processor.
+
+**/
+VOID
+ClearTrapFlag (
+  IN OUT EFI_SYSTEM_CONTEXT   SystemContext
+  )
+{
+  SystemContext.SystemContextX64->Rflags &= (UINTN) ~BIT8;
+}
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmProfileArch.h 
b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmProfileArch.h
new file mode 100644
index 0000000..40de858
--- /dev/null
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/X64/SmmProfileArch.h
@@ -0,0 +1,105 @@
+/** @file
+X64 processor specific header file to enable SMM profile.
+
+Copyright (c) 2012 - 2015, Intel Corporation. All rights reserved.<BR>
+This program and the accompanying materials
+are licensed and made available under the terms and conditions of the BSD 
License
+which accompanies this distribution.  The full text of the license may be 
found at
+http://opensource.org/licenses/bsd-license.php
+
+THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+
+**/
+
+#ifndef _SMM_PROFILE_ARCH_H_
+#define _SMM_PROFILE_ARCH_H_
+
+#pragma pack (1)
+
+typedef struct _MSR_DS_AREA_STRUCT {
+  UINT64  BTSBufferBase;
+  UINT64  BTSIndex;
+  UINT64  BTSAbsoluteMaximum;
+  UINT64  BTSInterruptThreshold;
+  UINT64  PEBSBufferBase;
+  UINT64  PEBSIndex;
+  UINT64  PEBSAbsoluteMaximum;
+  UINT64  PEBSInterruptThreshold;
+  UINT64  PEBSCounterReset[2];
+  UINT64  Reserved;
+} MSR_DS_AREA_STRUCT;
+
+typedef struct _BRANCH_TRACE_RECORD {
+  UINT64  LastBranchFrom;
+  UINT64  LastBranchTo;
+  UINT64  Rsvd0 : 4;
+  UINT64  BranchPredicted : 1;
+  UINT64  Rsvd1 : 59;
+} BRANCH_TRACE_RECORD;
+
+typedef struct _PEBS_RECORD {
+  UINT64  Rflags;
+  UINT64  LinearIP;
+  UINT64  Rax;
+  UINT64  Rbx;
+  UINT64  Rcx;
+  UINT64  Rdx;
+  UINT64  Rsi;
+  UINT64  Rdi;
+  UINT64  Rbp;
+  UINT64  Rsp;
+  UINT64  R8;
+  UINT64  R9;
+  UINT64  R10;
+  UINT64  R11;
+  UINT64  R12;
+  UINT64  R13;
+  UINT64  R14;
+  UINT64  R15;
+} PEBS_RECORD;
+
+#pragma pack ()
+
+#define PHYSICAL_ADDRESS_MASK       ((1ull << 52) - SIZE_4KB)
+
+/**
+  Update page table to map the memory correctly in order to make the 
instruction 
+  which caused page fault execute successfully. And it also save the original 
page
+  table to be restored in single-step exception.
+
+  @param  PageTable           PageTable Address.
+  @param  PFAddress           The memory address which caused page fault 
exception.
+  @param  CpuIndex            The index of the processor.
+  @param  ErrorCode           The Error code of exception.
+  @param  IsValidPFAddress    The flag indicates if SMM profile data need be 
added.
+
+**/
+VOID
+RestorePageTableAbove4G (
+  UINT64        *PageTable,
+  UINT64        PFAddress,
+  UINTN         CpuIndex,
+  UINTN         ErrorCode,
+  BOOLEAN       *IsValidPFAddress
+  );
+
+/**
+  Create SMM page table for S3 path.
+  
+**/
+VOID
+InitSmmS3Cr3 (
+  VOID
+  );
+
+/**
+  Allocate pages for creating 4KB-page based on 2MB-page when page fault 
happens.
+  
+**/
+VOID
+InitPagesForPFHandler (
+  VOID
+  );
+
+#endif // _SMM_PROFILE_ARCH_H_
-- 
1.9.5.msysgit.1

_______________________________________________
edk2-devel mailing list
[email protected]
https://lists.01.org/mailman/listinfo/edk2-devel

Reply via email to