On 04/19/19 at 04:34pm, Kairui Song wrote:
>  /* Locates and clears a region for a new top level page table. */
>  void initialize_identity_maps(void)
>  {
> -     /* If running as an SEV guest, the encryption mask is required. */
> -     set_sev_encryption_mask();
> -
> -     /* Exclude the encryption mask from __PHYSICAL_MASK */
> -     physical_mask &= ~sme_me_mask;
> -
> -     /* Init mapping_info with run-time function/buffer pointers. */
> -     mapping_info.alloc_pgt_page = alloc_pgt_page;
> -     mapping_info.context = &pgt_data;
> -     mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
> -     mapping_info.kernpg_flag = _KERNPG_TABLE;
> -
> -     /*
> -      * It should be impossible for this not to already be true,
> -      * but since calling this a second time would rewind the other
> -      * counters, let's just make sure this is reset too.
> -      */
> -     pgt_data.pgt_buf_offset = 0;
> -
> -     /*
> -      * If we came here via startup_32(), cr3 will be _pgtable already
> -      * and we must append to the existing area instead of entirely
> -      * overwriting it.
> -      *
> -      * With 5-level paging, we use '_pgtable' to allocate the p4d page 
> table,
> -      * the top-level page table is allocated separately.
> -      *
> -      * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
> -      * cases. On 4-level paging it's equal to 'top_level_pgt'.
> -      */
> -     top_level_pgt = read_cr3_pa();
> -     if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
> -             debug_putstr("booted via startup_32()\n");
> -             pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
> -             pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
> -             memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
> -     } else {
> -             debug_putstr("booted via startup_64()\n");
> -             pgt_data.pgt_buf = _pgtable;
> -             pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
> -             memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
> +     top_level_pgt = early_boot_top_pgt;
> +     if ((p4d_t *)top_level_pgt != (p4d_t *)_pgtable)
>               top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);

Kairui, will you make a patchset to include these changes separately
later on? I don't get the purposes of code changes. E.g here, I
don't know why you introduce a new variable early_boot_top_pgt, and
allocate the page table, even though they have been done in the old 
initialize_identity_maps().

Thanks
Baoquan

> -     }
>  }
>  
>  /*
> @@ -141,8 +41,7 @@ void add_identity_map(unsigned long start, unsigned long 
> size)
>               return;
>  
>       /* Build the mapping. */
> -     kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt,
> -                               start, end);
> +     add_identity_map_pgd(start, end, top_level_pgt);
>  }
>  
>  /*
> diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
> index c0d6c560df69..6b3548080d15 100644
> --- a/arch/x86/boot/compressed/misc.c
> +++ b/arch/x86/boot/compressed/misc.c
> @@ -345,6 +345,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, 
> memptr heap,
>       const unsigned long kernel_total_size = VO__end - VO__text;
>       unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
>  
> +     initialize_pgtable_alloc();
> +
>       /* Retain x86 boot parameters pointer passed from startup_32/64. */
>       boot_params = rmode;
>  
> diff --git a/arch/x86/boot/compressed/pgtable.h 
> b/arch/x86/boot/compressed/pgtable.h
> index 6ff7e81b5628..443df2b65fbf 100644
> --- a/arch/x86/boot/compressed/pgtable.h
> +++ b/arch/x86/boot/compressed/pgtable.h
> @@ -16,5 +16,16 @@ extern unsigned long *trampoline_32bit;
>  
>  extern void trampoline_32bit_src(void *return_ptr);
>  
> +extern struct alloc_pgt_data pgt_data;
> +
> +extern unsigned long early_boot_top_pgt;
> +
> +void *alloc_pgt_page(void *context);
> +
> +int add_identity_map_pgd(unsigned long pstart,
> +                      unsigned long pend, unsigned long pgd);
> +
> +void initialize_pgtable_alloc(void);
> +
>  #endif /* __ASSEMBLER__ */
>  #endif /* BOOT_COMPRESSED_PAGETABLE_H */
> diff --git a/arch/x86/boot/compressed/pgtable_64.c 
> b/arch/x86/boot/compressed/pgtable_64.c
> index f8debf7aeb4c..cd36cf9e6a5c 100644
> --- a/arch/x86/boot/compressed/pgtable_64.c
> +++ b/arch/x86/boot/compressed/pgtable_64.c
> @@ -1,9 +1,30 @@
> +/*
> + * Since we're dealing with identity mappings, physical and virtual
> + * addresses are the same, so override these defines which are ultimately
> + * used by the headers in misc.h.
> + */
> +#define __pa(x)  ((unsigned long)(x))
> +#define __va(x)  ((void *)((unsigned long)(x)))
> +
> +/* No PAGE_TABLE_ISOLATION support needed either: */
> +#undef CONFIG_PAGE_TABLE_ISOLATION
> +
> +#include "misc.h"
> +#include "pgtable.h"
> +#include "../string.h"
> +
>  #include <linux/efi.h>
>  #include <asm/e820/types.h>
>  #include <asm/processor.h>
>  #include <asm/efi.h>
> -#include "pgtable.h"
> -#include "../string.h"
> +
> +/* For handling early ident mapping */
> +#include <asm/init.h>
> +#include <asm/pgtable.h>
> +/* Use the static base for this part of the boot process */
> +#undef __PAGE_OFFSET
> +#define __PAGE_OFFSET __PAGE_OFFSET_BASE
> +#include "../../mm/ident_map.c"
>  
>  /*
>   * __force_order is used by special_insns.h asm code to force instruction
> @@ -14,6 +35,28 @@
>   */
>  unsigned long __force_order;
>  
> +/* Used to track our page table allocation area. */
> +struct alloc_pgt_data {
> +     unsigned char *pgt_buf;
> +     unsigned long pgt_buf_size;
> +     unsigned long pgt_buf_offset;
> +};
> +
> +/* Used to track our allocated page tables. */
> +struct alloc_pgt_data pgt_data;
> +
> +/* Track the first loaded boot page table. */
> +unsigned long early_boot_top_pgt;
> +
> +phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
> +
> +/*
> + * Mapping information structure passed to kernel_ident_mapping_init().
> + * Due to relocation, pointers must be assigned at run time not build time.
> + */
> +static struct x86_mapping_info mapping_info;
> +
> +/* For handling trampoline. */
>  #define BIOS_START_MIN               0x20000U        /* 128K, less than this 
> is insane */
>  #define BIOS_START_MAX               0x9f000U        /* 640K, absolute 
> maximum */
>  
> @@ -202,3 +245,87 @@ void cleanup_trampoline(void *pgtable)
>       /* Restore trampoline memory */
>       memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
>  }
> +
> +/*
> + * Allocates space for a page table entry, using struct alloc_pgt_data
> + * above. Besides the local callers, this is used as the allocation
> + * callback in mapping_info below.
> + */
> +void *alloc_pgt_page(void *context)
> +{
> +     struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
> +     unsigned char *entry;
> +
> +     /* Validate there is space available for a new page. */
> +     if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
> +             debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
> +             debug_putaddr(pages->pgt_buf_offset);
> +             debug_putaddr(pages->pgt_buf_size);
> +             return NULL;
> +     }
> +
> +     entry = pages->pgt_buf + pages->pgt_buf_offset;
> +     pages->pgt_buf_offset += PAGE_SIZE;
> +
> +     return entry;
> +}
> +
> +/* Locates and clears a region for update or create page table. */
> +void initialize_pgtable_alloc(void)
> +{
> +     /* If running as an SEV guest, the encryption mask is required. */
> +     set_sev_encryption_mask();
> +
> +     /* Exclude the encryption mask from __PHYSICAL_MASK */
> +     physical_mask &= ~sme_me_mask;
> +
> +     /* Init mapping_info with run-time function/buffer pointers. */
> +     mapping_info.alloc_pgt_page = alloc_pgt_page;
> +     mapping_info.context = &pgt_data;
> +     mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
> +     mapping_info.kernpg_flag = _KERNPG_TABLE;
> +
> +     /*
> +      * It should be impossible for this not to already be true,
> +      * but since calling this a second time would rewind the other
> +      * counters, let's just make sure this is reset too.
> +      */
> +     pgt_data.pgt_buf_offset = 0;
> +
> +     /*
> +      * If we came here via startup_32(), cr3 will be _pgtable already
> +      * and we must append to the existing area instead of entirely
> +      * overwriting it.
> +      *
> +      * With 5-level paging, we use '_pgtable' to allocate the p4d page
> +      * table, the top-level page table is allocated separately.
> +      *
> +      * p4d_offset(early_boot_top_pgt, 0) would cover both the 4- and 5-level
> +      * cases. On 4-level paging it's equal to 'early_boot_top_pgt'.
> +      */
> +
> +     early_boot_top_pgt = read_cr3_pa();
> +     early_boot_top_pgt = (unsigned long)p4d_offset(
> +                     (pgd_t *)early_boot_top_pgt, 0);
> +     if ((p4d_t *)early_boot_top_pgt == (p4d_t *)_pgtable) {
> +             debug_putstr("booted via startup_32()\n");
> +             pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
> +             pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
> +             memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
> +     } else {
> +             debug_putstr("booted via startup_64()\n");
> +             pgt_data.pgt_buf = _pgtable;
> +             pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
> +             memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
> +     }
> +}
> +
> +/*
> + * Helper for mapping extra memory region in very early stage
> + * before extract and execute the actual kernel
> + */
> +int add_identity_map_pgd(unsigned long pstart, unsigned long pend,
> +                      unsigned long pgd)
> +{
> +     kernel_ident_mapping_init(&mapping_info, (pgd_t *)pgd, pstart, pend);
> +}
> diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
> index 680c320363db..fb37eb98b65d 100644
> --- a/arch/x86/include/asm/boot.h
> +++ b/arch/x86/include/asm/boot.h
> @@ -33,6 +33,8 @@
>  #ifdef CONFIG_X86_64
>  # define BOOT_STACK_SIZE     0x4000
>  
> +/* Reserve one page for possible extra mapping requirement */
> +# define BOOT_EXTRA_PGT_SIZE (1*4096)
>  # define BOOT_INIT_PGT_SIZE  (6*4096)
>  # ifdef CONFIG_RANDOMIZE_BASE
>  /*
> @@ -43,12 +45,12 @@
>   * Total is 19 pages.
>   */
>  #  ifdef CONFIG_X86_VERBOSE_BOOTUP
> -#   define BOOT_PGT_SIZE     (19*4096)
> +#   define BOOT_PGT_SIZE     ((19 * 4096) + BOOT_EXTRA_PGT_SIZE)
>  #  else /* !CONFIG_X86_VERBOSE_BOOTUP */
> -#   define BOOT_PGT_SIZE     (17*4096)
> +#   define BOOT_PGT_SIZE     ((17 * 4096) + BOOT_EXTRA_PGT_SIZE)
>  #  endif
>  # else /* !CONFIG_RANDOMIZE_BASE */
> -#  define BOOT_PGT_SIZE              BOOT_INIT_PGT_SIZE
> +#  define BOOT_PGT_SIZE              (BOOT_INIT_PGT_SIZE + 
> BOOT_EXTRA_PGT_SIZE)
>  # endif
>  
>  #else /* !CONFIG_X86_64 */
> -- 
> 2.20.1
> 

_______________________________________________
kexec mailing list
[email protected]
http://lists.infradead.org/mailman/listinfo/kexec

Reply via email to