[PATCH 08/11] efi: Allow drivers to reserve boot services forever

2016-06-23 Thread Matt Fleming
Today, it is not possible for drivers to reserve EFI boot services for
access after efi_free_boot_services() has been called on x86. For
ARM/arm64 it can be done simply by calling memblock_reserve().

Having this ability for all three architectures is desirable for a
couple of reasons,

  1) It saves drivers copying data out of those regions
  2) kexec reboot can now make use of things like ESRT

Instead of using the standard memblock_reserve() which is insufficient
to reserve the region on x86 (see efi_reserve_boot_services()), a new
API is introduced in this patch; efi_mem_reserve().

efi.memmap now always represents which EFI memory regions are
available. On x86 the EFI boot services regions that have not been
reserved via efi_mem_reserve() will be removed from efi.memmap during
efi_free_boot_services().

This has implications for kexec, since it is not possible for a newly
kexec'd kernel to access the same boot services regions that the
initial boot kernel had access to unless they are reserved by every
kexec kernel in the chain.

Cc: Ard Biesheuvel 
Cc: Leif Lindholm 
Cc: Peter Jones 
Cc: Borislav Petkov 
Cc: Mark Rutland 
Cc: Dave Young 
Signed-off-by: Matt Fleming 
---
 arch/x86/platform/efi/quirks.c | 121 +
 drivers/firmware/efi/efi.c |  30 ++
 include/linux/efi.h|   1 +
 3 files changed, 141 insertions(+), 11 deletions(-)

diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 570c33683a26..3dae771266a6 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -164,6 +164,71 @@ efi_status_t efi_query_variable_store(u32 attributes, 
unsigned long size,
 EXPORT_SYMBOL_GPL(efi_query_variable_store);
 
 /*
+ * The UEFI specification makes it clear that the operating system is
+ * free to do whatever it wants with boot services code after
+ * ExitBootServices() has been called. Ignoring this recommendation a
+ * significant bunch of EFI implementations continue calling into boot
+ * services code (SetVirtualAddressMap). In order to work around such
+ * buggy implementations we reserve boot services region during EFI
+ * init and make sure it stays executable. Then, after
+ * SetVirtualAddressMap(), it is discarded.
+ *
+ * However, some boot services regions contain data that is required
+ * by drivers, so we need to track which memory ranges can never be
+ * freed. This is done by tagging those regions with the
+ * EFI_MEMORY_RUNTIME attribute.
+ *
+ * Any driver that wants to mark a region as reserved must use
+ * efi_mem_reserve() which will insert a new EFI memory descriptor
+ * into efi.memmap (splitting existing regions if necessary) and tag
+ * it with EFI_MEMORY_RUNTIME.
+ */
+void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
+{
+   phys_addr_t new_phys, new_size;
+   struct efi_mem_range mr;
+   efi_memory_desc_t md;
+   int num_entries;
+   void *new;
+
+   if (efi_mem_desc_lookup(addr, )) {
+   pr_err("Failed to lookup EFI memory descriptor for %pa\n", 
);
+   return;
+   }
+
+   if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) {
+   pr_err("Region spans EFI memory descriptors, %pa\n", );
+   return;
+   }
+
+   mr.range.start = addr;
+   mr.range.end = addr + size;
+   mr.attribute = md.attribute | EFI_MEMORY_RUNTIME;
+
+   num_entries = efi_memmap_split_count(, );
+   num_entries += efi.memmap.nr_map;
+
+   new_size = efi.memmap.desc_size * num_entries;
+
+   new_phys = memblock_alloc(new_size, 0);
+   if (!new_phys) {
+   pr_err("Could not allocate boot services memmap\n");
+   return;
+   }
+
+   new = early_memremap(new_phys, new_size);
+   if (!new) {
+   pr_err("Failed to map new boot services memmap\n");
+   return;
+   }
+
+   efi_memmap_insert(, new, );
+   early_memunmap(new, new_size);
+
+   efi_memmap_install(new_phys, num_entries);
+}
+
+/*
  * Helper function for efi_reserve_boot_services() to figure out if we
  * can free regions in efi_free_boot_services().
  *
@@ -184,15 +249,6 @@ static bool can_free_region(u64 start, u64 size)
return true;
 }
 
-/*
- * The UEFI specification makes it clear that the operating system is free to 
do
- * whatever it wants with boot services code after ExitBootServices() has been
- * called. Ignoring this recommendation a significant bunch of EFI 
implementations 
- * continue calling into boot services code (SetVirtualAddressMap). In order 
to 
- * work around such buggy implementations we reserve boot services region 
during 
- * EFI init and make sure it stays executable. Then, after 
SetVirtualAddressMap(), it
-* is discarded.
-*/
 void __init 

[PATCH 08/11] efi: Allow drivers to reserve boot services forever

2016-06-23 Thread Matt Fleming
Today, it is not possible for drivers to reserve EFI boot services for
access after efi_free_boot_services() has been called on x86. For
ARM/arm64 it can be done simply by calling memblock_reserve().

Having this ability for all three architectures is desirable for a
couple of reasons,

  1) It saves drivers copying data out of those regions
  2) kexec reboot can now make use of things like ESRT

Instead of using the standard memblock_reserve() which is insufficient
to reserve the region on x86 (see efi_reserve_boot_services()), a new
API is introduced in this patch; efi_mem_reserve().

efi.memmap now always represents which EFI memory regions are
available. On x86 the EFI boot services regions that have not been
reserved via efi_mem_reserve() will be removed from efi.memmap during
efi_free_boot_services().

This has implications for kexec, since it is not possible for a newly
kexec'd kernel to access the same boot services regions that the
initial boot kernel had access to unless they are reserved by every
kexec kernel in the chain.

Cc: Ard Biesheuvel 
Cc: Leif Lindholm 
Cc: Peter Jones 
Cc: Borislav Petkov 
Cc: Mark Rutland 
Cc: Dave Young 
Signed-off-by: Matt Fleming 
---
 arch/x86/platform/efi/quirks.c | 121 +
 drivers/firmware/efi/efi.c |  30 ++
 include/linux/efi.h|   1 +
 3 files changed, 141 insertions(+), 11 deletions(-)

diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 570c33683a26..3dae771266a6 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -164,6 +164,71 @@ efi_status_t efi_query_variable_store(u32 attributes, 
unsigned long size,
 EXPORT_SYMBOL_GPL(efi_query_variable_store);
 
 /*
+ * The UEFI specification makes it clear that the operating system is
+ * free to do whatever it wants with boot services code after
+ * ExitBootServices() has been called. Ignoring this recommendation a
+ * significant bunch of EFI implementations continue calling into boot
+ * services code (SetVirtualAddressMap). In order to work around such
+ * buggy implementations we reserve boot services region during EFI
+ * init and make sure it stays executable. Then, after
+ * SetVirtualAddressMap(), it is discarded.
+ *
+ * However, some boot services regions contain data that is required
+ * by drivers, so we need to track which memory ranges can never be
+ * freed. This is done by tagging those regions with the
+ * EFI_MEMORY_RUNTIME attribute.
+ *
+ * Any driver that wants to mark a region as reserved must use
+ * efi_mem_reserve() which will insert a new EFI memory descriptor
+ * into efi.memmap (splitting existing regions if necessary) and tag
+ * it with EFI_MEMORY_RUNTIME.
+ */
+void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
+{
+   phys_addr_t new_phys, new_size;
+   struct efi_mem_range mr;
+   efi_memory_desc_t md;
+   int num_entries;
+   void *new;
+
+   if (efi_mem_desc_lookup(addr, )) {
+   pr_err("Failed to lookup EFI memory descriptor for %pa\n", 
);
+   return;
+   }
+
+   if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) {
+   pr_err("Region spans EFI memory descriptors, %pa\n", );
+   return;
+   }
+
+   mr.range.start = addr;
+   mr.range.end = addr + size;
+   mr.attribute = md.attribute | EFI_MEMORY_RUNTIME;
+
+   num_entries = efi_memmap_split_count(, );
+   num_entries += efi.memmap.nr_map;
+
+   new_size = efi.memmap.desc_size * num_entries;
+
+   new_phys = memblock_alloc(new_size, 0);
+   if (!new_phys) {
+   pr_err("Could not allocate boot services memmap\n");
+   return;
+   }
+
+   new = early_memremap(new_phys, new_size);
+   if (!new) {
+   pr_err("Failed to map new boot services memmap\n");
+   return;
+   }
+
+   efi_memmap_insert(, new, );
+   early_memunmap(new, new_size);
+
+   efi_memmap_install(new_phys, num_entries);
+}
+
+/*
  * Helper function for efi_reserve_boot_services() to figure out if we
  * can free regions in efi_free_boot_services().
  *
@@ -184,15 +249,6 @@ static bool can_free_region(u64 start, u64 size)
return true;
 }
 
-/*
- * The UEFI specification makes it clear that the operating system is free to 
do
- * whatever it wants with boot services code after ExitBootServices() has been
- * called. Ignoring this recommendation a significant bunch of EFI 
implementations 
- * continue calling into boot services code (SetVirtualAddressMap). In order 
to 
- * work around such buggy implementations we reserve boot services region 
during 
- * EFI init and make sure it stays executable. Then, after 
SetVirtualAddressMap(), it
-* is discarded.
-*/
 void __init efi_reserve_boot_services(void)
 {
efi_memory_desc_t *md;
@@ -249,22 +305,65 @@ void __init efi_reserve_boot_services(void)
 
 void __init