On Thu, Jun 20, 2024 at 10:23:59PM +0000, Ashish Kalra wrote:
> From: Ashish Kalra <ashish.ka...@amd.com>
> 
> SNP guests allocate shared buffers to perform I/O. It is done by
> allocating pages normally from the buddy allocator and converting them
> to shared with set_memory_decrypted().
> 
> The second kernel has no idea what memory is converted this way. It only

The secon, kexec-ed, kernel...

> sees E820_TYPE_RAM.

...

> @@ -92,6 +94,9 @@ static struct ghcb *boot_ghcb __section(".data");
>  /* Bitmap of SEV features supported by the hypervisor */
>  static u64 sev_hv_features __ro_after_init;
>  
> +/* Last address to be switched to private during kexec */
> +static unsigned long kexec_last_addr_to_make_private;

This is particularly yucky.

AFAIU, you want to:

1. Skip GHCB addresses when making all pages private

2. Once you're done converting, in snp_kexec_finish() go over the GHCB
   addresses and convert them explicitly, one-by-one, without this silly
   variable

>  /* #VC handler runtime per-CPU data */
>  struct sev_es_runtime_data {
>       struct ghcb ghcb_page;
> @@ -1010,6 +1015,169 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t 
> end)
>       set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
>  }
>  
> +static bool set_pte_enc(pte_t *kpte, int level, void *va)

Code duplication: __set_clr_pte_enc().

You need to refactor the code instead of adding just another, generically
looking helper just because it is easier this way.

> +{
> +     pte_t new_pte;
> +
> +     if (pte_none(*kpte))
> +             return false;
> +
> +     /*
> +      * Change the physical page attribute from C=0 to C=1. Flush the
> +      * caches to ensure that data gets accessed with the correct C-bit.
> +      */
> +     if (pte_present(*kpte))
> +             clflush_cache_range(va, page_level_size(level));
> +
> +     new_pte = __pte(cc_mkenc(pte_val(*kpte)));
> +     set_pte_atomic(kpte, new_pte);
> +
> +     return true;
> +}
> +
> +static bool make_pte_private(pte_t *pte, unsigned long addr, int pages, int 
> level)
> +{
> +     struct sev_es_runtime_data *data;
> +     struct ghcb *ghcb;
> +
> +     data = this_cpu_read(runtime_data);
> +     ghcb = &data->ghcb_page;
> +
> +     /* Check for GHCB for being part of a PMD range. */
> +     if ((unsigned long)ghcb >= addr &&
> +         (unsigned long)ghcb <= (addr + (pages * PAGE_SIZE))) {
> +             /*
> +              * Ensure that the current cpu's GHCB is made private

s/cpu/CPU/g

There are multiple places.

> +              * at the end of unshared loop so that we continue to use the
> +              * optimized GHCB protocol and not force the switch to
> +              * MSR protocol till the very end.
> +              */
> +             pr_debug("setting boot_ghcb to NULL for this cpu ghcb\n");
> +             kexec_last_addr_to_make_private = addr;
> +             return true;
> +     }
> +
> +     if (!set_pte_enc(pte, level, (void *)addr))
> +             return false;
> +
> +     snp_set_memory_private(addr, pages);
> +
> +     return true;
> +}
> +
> +static void unshare_all_memory(void)
> +{
> +     unsigned long addr, end;
> +
> +     /*
> +      * Walk direct mapping and convert all shared memory back to private,

This ends with a ','. Is anything more coming?

> +      */
> +
> +     addr = PAGE_OFFSET;
> +     end  = PAGE_OFFSET + get_max_mapped();
> +
> +     while (addr < end) {
> +             unsigned long size;
> +             unsigned int level;
> +             pte_t *pte;
> +
> +             pte = lookup_address(addr, &level);
> +             size = page_level_size(level);
> +
> +             /*
> +              * pte_none() check is required to skip physical memory holes 
> in direct mapped.

This sentence needs to be written for humans. And come to think of it, you can
simply drop it. lookup_address() can return NULL so you simply need to check
its retval.

> +              */
> +             if (pte && pte_decrypted(*pte) && !pte_none(*pte)) {
> +                     int pages = size / PAGE_SIZE;
> +
> +                     if (!make_pte_private(pte, addr, pages, level)) {
> +                             pr_err("Failed to unshare range %#lx-%#lx\n",
> +                                    addr, addr + size);

Might as well terminate the guest here.

> +                     }
> +
> +             }
> +
> +             addr += size;
> +     }
> +     __flush_tlb_all();
> +
> +}
> +
> +static void unshare_all_bss_decrypted_memory(void)

Why is this a separate function and not part of unshare_all_memory()?

> +{
> +     unsigned long vaddr, vaddr_end;
> +     unsigned int level;
> +     unsigned int npages;
> +     pte_t *pte;
> +
> +     vaddr = (unsigned long)__start_bss_decrypted;
> +     vaddr_end = (unsigned long)__start_bss_decrypted_unused;
> +     npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
> +     for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) {
> +             pte = lookup_address(vaddr, &level);
> +             if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
> +                     continue;
> +
> +             set_pte_enc(pte, level, (void *)vaddr);
> +     }
> +     vaddr = (unsigned long)__start_bss_decrypted;
> +     snp_set_memory_private(vaddr, npages);
> +}

...

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

Reply via email to