On 01/11/14 at 09:49pm, Borislav Petkov wrote:
> From: Borislav Petkov <b...@suse.de>
> 
> Currently, running SetVirtualAddressMap() and passing the physical
> address of the virtual map array was working only by a lucky coincidence
> because the memory was present in the EFI page table too. Until Toshi
> went and booted this on a big HP box - the krealloc() manner of resizing
> the memmap we're doing did allocate from such physical addresses which
> were not mapped anymore and boom:
> 
> http://lkml.kernel.org/r/1386806463.1791.295.ca...@misato.fc.hp.com
> 
> One way to take care of that issue is to reimplement the krealloc thing
> but with pages. We start with contiguous pages of order 1, i.e. 2 pages,
> and when we deplete that memory (shouldn't happen all that often but you
> know firmware) we realloc the next power-of-two pages.
> 
> Having the pages, it is much more handy and easy to map them into the
> EFI page table with the already existing mapping code which we're using
> for building the virtual mappings.
> 
> And, it doesn't matter all that much how much pages we've used as we're
> freeing them right after they've fulfilled their purpose at the end of
> the function anyway.
> 
> Reported-by: Toshi Kani <toshi.k...@hp.com>
> Signed-off-by: Borislav Petkov <b...@suse.de>
> ---
>  arch/x86/include/asm/efi.h     |  3 +-
>  arch/x86/platform/efi/efi.c    | 62 
> ++++++++++++++++++++++++++++++------------
>  arch/x86/platform/efi/efi_32.c |  6 +++-
>  arch/x86/platform/efi/efi_64.c | 31 +++++++++++++++++++--
>  4 files changed, 80 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
> index 3b978c472d08..0e7973f7492e 100644
> --- a/arch/x86/include/asm/efi.h
> +++ b/arch/x86/include/asm/efi.h
> @@ -130,7 +130,8 @@ extern void efi_memory_uc(u64 addr, unsigned long size);
>  extern void __init efi_map_region(efi_memory_desc_t *md);
>  extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
>  extern void efi_sync_low_kernel_mappings(void);
> -extern void efi_setup_page_tables(void);
> +extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned 
> num_pages);
> +extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned 
> num_pages);
>  extern void __init old_map_region(efi_memory_desc_t *md);
>  
>  struct efi_setup_data {
> diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
> index c34be4ce94c9..65a8c969db87 100644
> --- a/arch/x86/platform/efi/efi.c
> +++ b/arch/x86/platform/efi/efi.c
> @@ -948,14 +948,36 @@ static void __init efi_map_regions_fixed(void)
>  
>  }
>  
> +static void *realloc_pages(void *old_memmap, int old_shift)
> +{
> +     void *ret;
> +
> +     ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);
> +     if (!ret)
> +             goto out;
> +
> +     /*
> +      * A first-time allocation doesn't have anything to copy.
> +      */
> +     if (!old_memmap)
> +             return ret;
> +
> +     memcpy(ret, old_memmap, PAGE_SIZE << old_shift);
> +
> +out:
> +     __free_pages(old_memmap, old_shift);
> +     return ret;
> +}
> +
>  /*
> - * Map efi memory ranges for runtime serivce and update new_memmap with 
> virtual
> - * addresses.
> + * Map the efi memory ranges of the runtime services and update new_mmap with
> + * virtual addresses.
>   */
> -static void * __init efi_map_regions(int *count)
> +static void * __init efi_map_regions(int *count, int *pg_shift)
>  {
> +     void *p, *new_memmap = NULL;
> +     unsigned long left = 0;
>       efi_memory_desc_t *md;
> -     void *p, *tmp, *new_memmap = NULL;
>  
>       for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
>               md = p;
> @@ -970,20 +992,23 @@ static void * __init efi_map_regions(int *count)
>               efi_map_region(md);
>               get_systab_virt_addr(md);
>  
> -             tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size,
> -                            GFP_KERNEL);
> -             if (!tmp)
> -                     goto out;
> -             new_memmap = tmp;
> +             if (left < memmap.desc_size) {
> +                     new_memmap = realloc_pages(new_memmap, *pg_shift);
> +                     if (!new_memmap)
> +                             return NULL;
> +
> +                     left += PAGE_SIZE << *pg_shift;
> +                     (*pg_shift)++;
> +             }
> +
>               memcpy(new_memmap + (*count * memmap.desc_size), md,
>                      memmap.desc_size);
> +
> +             left -= memmap.desc_size;

Adding a safeguard check for desc_size is better though currently it's 
impossible
for the desc_size > PAGE_SIZE?

>               (*count)++;
>       }
>  
>       return new_memmap;
> -out:
> -     kfree(new_memmap);
> -     return NULL;
>  }
>  
>  /*
> @@ -1009,9 +1034,9 @@ out:
>   */
>  void __init efi_enter_virtual_mode(void)
>  {
> -     efi_status_t status;
> +     int err, count = 0, pg_shift = 0;
>       void *new_memmap = NULL;
> -     int err, count = 0;
> +     efi_status_t status;
>  
>       efi.systab = NULL;
>  
> @@ -1028,7 +1053,7 @@ void __init efi_enter_virtual_mode(void)
>               efi_map_regions_fixed();
>       } else {
>               efi_merge_regions();
> -             new_memmap = efi_map_regions(&count);
> +             new_memmap = efi_map_regions(&count, &pg_shift);
>               if (!new_memmap) {
>                       pr_err("Error reallocating memory, EFI runtime 
> non-functional!\n");
>                       return;
> @@ -1041,7 +1066,9 @@ void __init efi_enter_virtual_mode(void)
>  
>       BUG_ON(!efi.systab);
>  
> -     efi_setup_page_tables();
> +     if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift))
> +             return;
> +
>       efi_sync_low_kernel_mappings();
>       dump_pagetable();
>  
> @@ -1083,7 +1110,8 @@ void __init efi_enter_virtual_mode(void)
>       if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
>               runtime_code_page_mkexec();
>  
> -     kfree(new_memmap);
> +     efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
> +     __free_pages(new_memmap, pg_shift);
>  
>       /* clean DUMMY object */
>       efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
> diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
> index 249b183cf417..d58a2015e22d 100644
> --- a/arch/x86/platform/efi/efi_32.c
> +++ b/arch/x86/platform/efi/efi_32.c
> @@ -40,7 +40,11 @@
>  static unsigned long efi_rt_eflags;
>  
>  void efi_sync_low_kernel_mappings(void) {}
> -void efi_setup_page_tables(void) {}
> +int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
> +{
> +     return 0;
> +}
> +void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {}
>  
>  void __init efi_map_region(efi_memory_desc_t *md)
>  {
> diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
> index 6284f158a47d..3d66844ea156 100644
> --- a/arch/x86/platform/efi/efi_64.c
> +++ b/arch/x86/platform/efi/efi_64.c
> @@ -137,12 +137,37 @@ void efi_sync_low_kernel_mappings(void)
>               sizeof(pgd_t) * num_pgds);
>  }
>  
> -void efi_setup_page_tables(void)
> +int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
>  {
> +     pgd_t *pgd;
> +
> +     if (efi_enabled(EFI_OLD_MEMMAP))
> +             return 0;
> +
> +     /*
> +      * It can happen that the physical address of new_memmap lands in memory
> +      * which is not mapped in the EFI page table. Therefore we need to go
> +      * and ident-map those pages containing the map before calling
> +      * phys_efi_set_virtual_address_map().
> +      */
> +     if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, 
> _PAGE_NX)) {
> +             pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
> +             return 1;
> +     }
> +
>       efi_scratch.efi_pgt = (pgd_t *)(unsigned 
> long)real_mode_header->trampoline_pgd;
> +     efi_scratch.use_pgd = true;
>  
> -     if (!efi_enabled(EFI_OLD_MEMMAP))
> -             efi_scratch.use_pgd = true;
> +     pgd = __va(efi_scratch.efi_pgt);
> +
> +     return 0;
> +}
> +
> +void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
> +{
> +     pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
> +
> +     kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages);
>  }
>  
>  static void __init __map_region(efi_memory_desc_t *md, u64 va)
> -- 
> 1.8.5.2.192.g7794a68
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to