From: Sai Praneeth <sai.praneeth.prak...@intel.com>

EFI regions could briefly be divided into 3 types.
1. EFI_BOOT_SERVICES_<CODE/DATA> regions
2. EFI_RUNTIME_SERVICES_<CODE/DATA> regions
3. Other EFI regions like EFI_LOADER_<CODE/DATA> etc.

As per the UEFI specification, after the call to ExitBootServices(),
accesses by firmware to any memory region except
EFI_RUNTIME_SERVICES_<CODE/DATA> regions is considered illegal. A buggy
firmware could trigger these illegal accesses during boot time or at
runtime (i.e. when the kernel is up and running). Presently, the kernel
can fix up illegal accesses to EFI_BOOT_SERVICES_<CODE/DATA> regions
*only* during kernel boot phase. If firmware triggers illegal accesses
to *any* other EFI regions during kernel boot, the kernel panics or if
this happens during kernel runtime then the kernel hangs.

Kernel panics/hangs because the memory region requested by firmware
isn't mapped which causes a page fault in ring 0 and the kernel fails to
handle it leading to die(). To save kernel from hanging we add a page
fault handler which detects illegal accesses by firmware and
1. If the illegally accessed region is EFI_BOOT_SERVICES_<CODE/DATA>,
the kernel fixes it up by mapping the requested region.
2. If any other region (Eg: EFI_CONVENTIONAL_MEMORY or
EFI_LOADER_<CODE/DATA>), then the kernel exits firmware context and
disables EFI Runtime Services, so that we will never again call buggy
firmware.

Illegal accesses to EFI_BOOT_SERVICES_<CODE/DATA> and to other regions
are dealt differently in efi page fault handler because presently during
kernel boot EFI_BOOT_SERVICES_<CODE/DATA> regions are reserved by kernel
and hence it's OK to dynamically map these regions in page fault
handler. We cannot reserve other EFI regions like
EFI_CONVENTIONAL_MEMORY and EFI_LOADER_<CODE/DATA> as they are very huge
in size and reserving them will make the kernel un-bootable. Hence, we
take a different approach (exiting firmware context) in dealing with
page faults to these regions.

The efi specific page fault handler offers us two advantages:
1. Avoid panics/hangs caused by buggy firmware.
2. Shout loud that the firmware is buggy and hence can save ourselves
from being blamed for not a fault of ours.

Finally, this new mapping will not impact a reboot from kexec, as kexec
is only concerned about runtime memory regions.

Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prak...@intel.com>
Suggested-by: Matt Fleming <m...@codeblueprint.co.uk>
Based-on-code-from: Ricardo Neri <ricardo.n...@intel.com>
Cc: Al Stone <ast...@redhat.com>
Cc: Lee Chun-Yi <j...@suse.com>
Cc: Borislav Petkov <b...@alien8.de>
Cc: Bhupesh Sharma <bhsha...@redhat.com>
Cc: Ard Biesheuvel <ard.biesheu...@linaro.org>
---
 arch/x86/include/asm/efi.h              |  22 ++++-
 arch/x86/mm/fault.c                     |   9 ++
 arch/x86/platform/efi/quirks.c          | 140 ++++++++++++++++++++++++++++++++
 drivers/firmware/efi/runtime-wrappers.c |   6 ++
 4 files changed, 176 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 47202b9e1b8e..1285caccdff4 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,8 +90,20 @@ struct efi_scratch {
                efi_switch_mm(&efi_mm);                                 \
 })
 
+/*
+ * Returns "EFI_ABORTED" if illegal access by firmware caused to exit
+ * firmware context, otherwise returns status returned by firmware.
+ */
 #define arch_efi_call_virt(p, f, args...)                              \
-       efi_call((void *)p->f, args)                                    \
+({                                                                     \
+       efi_status_t __s;                                               \
+                                                                       \
+       __s = efi_call((void *)p->f, args);                             \
+       if (exited_fw_ctx)                                              \
+               __s = EFI_ABORTED;                                      \
+                                                                       \
+       __s;                                                            \
+})
 
 #define arch_efi_call_virt_teardown()                                  \
 ({                                                                     \
@@ -124,6 +136,7 @@ extern void __iomem *__efi_init_fixup efi_ioremap(unsigned 
long addr,
 extern u64 xmm_regs_rsp;
 extern u64 core_regs_rsp;
 extern u64 exit_fw_ctx_rip;
+extern bool exited_fw_ctx;
 extern struct efi_scratch efi_scratch;
 extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern int __init efi_memblock_x86_reserve_range(void);
@@ -147,8 +160,15 @@ extern void efi_switch_mm(struct mm_struct *mm);
 
 #ifdef CONFIG_EFI_WARN_ON_ILLEGAL_ACCESSES
 extern void __init efi_save_original_memmap(void);
+extern int efi_illegal_accesses_fixup(unsigned long phys_addr,
+                                     struct pt_regs *regs);
 #else
 static inline void __init efi_save_original_memmap(void) { }
+static inline int efi_illegal_accesses_fixup(unsigned long phys_addr,
+                                            struct pt_regs *regs)
+{
+       return 0;
+}
 #endif /* CONFIG_EFI_WARN_ON_ILLEGAL_ACCESSES */
 
 struct efi_setup_data {
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2aafa6ab6103..bc0507cc90ba 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -16,6 +16,7 @@
 #include <linux/prefetch.h>            /* prefetchw                    */
 #include <linux/context_tracking.h>    /* exception_enter(), ...       */
 #include <linux/uaccess.h>             /* faulthandler_disabled()      */
+#include <linux/efi.h>                 /* fixup for buggy UEFI firmware*/
 
 #include <asm/cpufeature.h>            /* boot_cpu_has, ...            */
 #include <asm/traps.h>                 /* dotraplinkage, ...           */
@@ -24,6 +25,7 @@
 #include <asm/vsyscall.h>              /* emulate_vsyscall             */
 #include <asm/vm86.h>                  /* struct vm86                  */
 #include <asm/mmu_context.h>           /* vma_pkey()                   */
+#include <asm/efi.h>                   /* fixup for buggy UEFI firmware*/
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/exceptions.h>
@@ -790,6 +792,13 @@ no_context(struct pt_regs *regs, unsigned long error_code,
                return;
 
        /*
+        * Buggy firmware could trigger illegal accesses to some EFI regions,
+        * try to fixup or recover from such faults.
+        */
+       if (efi_illegal_accesses_fixup(address, regs))
+               return;
+
+       /*
         * Oops. The kernel tried to access some bad page. We'll have to
         * terminate things with extreme prejudice:
         */
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index a3a1ae6a2562..a09d9f754eee 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -82,6 +82,7 @@ static bool efi_no_storage_paranoia;
 u64 xmm_regs_rsp __aligned(16) = 0;
 u64 core_regs_rsp __aligned(16) = 0;
 u64 exit_fw_ctx_rip __aligned(16) = 0;
+bool exited_fw_ctx;
 
 /*
  * Some firmware implementations refuse to boot if there's insufficient
@@ -706,4 +707,143 @@ void __init efi_save_original_memmap(void)
 
        original_memory_map_present = true;
 }
+
+/*
+ * From the original EFI memory map, return a pointer to the memory
+ * descriptor that describes the given physical address. If not found,
+ * return NULL.
+ */
+static efi_memory_desc_t *efi_get_md(unsigned long phys_addr)
+{
+       efi_memory_desc_t *md;
+
+       for_each_efi_memory_desc_in_map(&original_memory_map, md) {
+               if (md->phys_addr <= phys_addr &&
+                   (phys_addr < (md->phys_addr +
+                   (md->num_pages << EFI_PAGE_SHIFT)))) {
+                       return md;
+               }
+       }
+       return NULL;
+}
+
+/*
+ * Detect illegal accesses by firmware and
+ * 1.  If the illegally accessed region is EFI_BOOT_SERVICES_<CODE/DATA>,
+ *  fix it up by mapping the requested region.
+ * 2.  If any other region (Eg: EFI_CONVENTIONAL_MEMORY or
+ *  EFI_LOADER_<CODE/DATA>), then exit firmware context and disable EFI
+ *  Runtime Services, so that we will never again call buggy firmware.
+ *
+ * @return: Return 1, if successfully detected that the page fault is
+ * caused by firmware. After detecting we either fix it or exit firmware
+ * context. Return 0 otherwise.
+ */
+int efi_illegal_accesses_fixup(unsigned long phys_addr, struct pt_regs *regs)
+{
+       char buf[64];
+       efi_memory_desc_t *md;
+       unsigned long long phys_addr_end, size_in_MB;
+
+       /*
+        * This handler should fix faults caused *only* by firmware. So make
+        * sure that we are indeed in firmware context. These global
+        * variables are set everytime we call firmware and are cleared on
+        * exit.
+        */
+       if (xmm_regs_rsp == 0 || exit_fw_ctx_rip == 0)
+               return 0;
+
+       /*
+        * While in efi_pgd, we shouldn't fault on any addresses between
+        * 0x0000 - 0x0fff as they are always mapped.
+        */
+       if (phys_addr >= 0x0000 && phys_addr <= 0x0fff)
+               return 0;
+
+       /*
+        * We need original memory map to retrieve memory descriptor that we
+        * faulted on. So, check if we succeeded in saving original memory
+        * map passed by firmware.
+        */
+       if (!original_memory_map_present) {
+               pr_info("Original memory map not found, aborting fixing illegal 
"
+                       "access by firmware\n");
+               return 0;
+       }
+
+       /*
+        * EFI Memory map could sometimes have holes, eg: SMRAM.
+        * So, make sure we have a valid memory descriptor describing the
+        * physical address we faulted on.
+        */
+       md = efi_get_md(phys_addr);
+       if (!md) {
+               pr_info("Failed to find EFI memory descriptor for PA: 0x%lx\n",
+                       phys_addr);
+               return 0;
+       }
+
+       /*
+        * EFI_RUNTIME_SERVICES_<CODE/DATA> regions are mapped into efi_pgd
+        * by kernel during boot and hence should never page fault. Accesses
+        * to these regions by firmware is legal.
+        */
+       if (md->type == EFI_RUNTIME_SERVICES_CODE ||
+           md->type == EFI_RUNTIME_SERVICES_DATA) {
+               pr_info("Kernel shouldn't page fault on accesses to "
+                       "EFI_RUNTIME_SERVICES_<CODE/DATA> regions\n");
+               return 0;
+       }
+
+       /*
+        * Now we are sure that an illegal access by firmware has caused
+        * page fault. Print stack trace and memory descriptor - useful to
+        * know which EFI Runtime Service is buggy and what did it try to
+        * access.
+        */
+       phys_addr_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+       size_in_MB = md->num_pages >> (20 - EFI_PAGE_SHIFT);
+       WARN(1, FW_BUG "Detected illegal access by Firmware at PA: 0x%lx\n",
+            phys_addr);
+       pr_info("EFI Memory Descriptor for offending PA is:\n");
+       pr_info("%s range=[0x%016llx-0x%016llx] (%lluMB)\n",
+               efi_md_typeattr_format(buf, sizeof(buf), md), md->phys_addr,
+               phys_addr_end, size_in_MB);
+
+       /*
+        * Fix illegal access to EFI_BOOT_SERVICES_<CODE/DATA> regions by
+        * creating VA->PA mappings. Further accesses to these regions will
+        * not page fault.
+        */
+       if (md->type == EFI_BOOT_SERVICES_CODE ||
+           md->type == EFI_BOOT_SERVICES_DATA) {
+               efi_map_region(md);
+               pr_info("Fixed illegal access at PA: 0x%lx\n", phys_addr);
+               return 1;
+       }
+
+       /*
+        * We didn't fault on EFI_RUNTIME_SERVICES_<CODE/DATA> or
+        * EFI_BOOT_SERVICES_<CODE/DATA> regions. This means that the
+        * firmware has illegally accessed some other EFI region which can't
+        * be fixed. Hence, exit firmware context.
+        *
+        * Remember that we were in firmware context before faulting, hence,
+        * regs->ip points to the firmware instruction that we faulted on
+        * and regs->sp has some value set by firmware.
+        *
+        * Exiting firmware context means changing regs->ip to the
+        * instruction that gets executed when firmware returns and regs->sp
+        * to the RSP value before calling firmware. We don't need to worry
+        * about other registers messed up by firmware as they are restored
+        * in efi_stub_64.S
+        */
+       regs->sp = xmm_regs_rsp;
+       regs->ip = exit_fw_ctx_rip;
+       exited_fw_ctx = true;
+       clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+       pr_info("Exited Firmware context and disabled EFI Runtime Services\n");
+       return 1;
+}
 #endif /* CONFIG_EFI_WARN_ON_ILLEGAL_ACCESSES */
diff --git a/drivers/firmware/efi/runtime-wrappers.c 
b/drivers/firmware/efi/runtime-wrappers.c
index aa66cbf23512..0d1fe1da3a3a 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -94,6 +94,11 @@ struct efi_runtime_work {
        struct efi_runtime_work efi_rts_work;                           \
        efi_rts_work.status = EFI_ABORTED;                              \
                                                                        \
+       if (!efi_enabled(EFI_RUNTIME_SERVICES)) {                       \
+               pr_err("Aborting! EFI Runtime Services disabled\n");    \
+               goto exit;                                              \
+       }                                                               \
+                                                                       \
        init_completion(&efi_rts_work.efi_rts_comp);                    \
        INIT_WORK_ONSTACK(&efi_rts_work.work, efi_call_rts);            \
        efi_rts_work.arg1 = _arg1;                                      \
@@ -112,6 +117,7 @@ struct efi_runtime_work {
        else                                                            \
                pr_err("Failed to queue work to efi_rts_wq.\n");        \
                                                                        \
+exit:                                                                  \
        efi_rts_work.status;                                            \
 })
 
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-efi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to