On Thu, Jun 20, 2024 at 10:23:59PM +0000, Ashish Kalra wrote: > From: Ashish Kalra <ashish.ka...@amd.com> > > SNP guests allocate shared buffers to perform I/O. It is done by > allocating pages normally from the buddy allocator and converting them > to shared with set_memory_decrypted(). > > The second kernel has no idea what memory is converted this way. It only
The secon, kexec-ed, kernel... > sees E820_TYPE_RAM. ... > @@ -92,6 +94,9 @@ static struct ghcb *boot_ghcb __section(".data"); > /* Bitmap of SEV features supported by the hypervisor */ > static u64 sev_hv_features __ro_after_init; > > +/* Last address to be switched to private during kexec */ > +static unsigned long kexec_last_addr_to_make_private; This is particularly yucky. AFAIU, you want to: 1. Skip GHCB addresses when making all pages private 2. Once you're done converting, in snp_kexec_finish() go over the GHCB addresses and convert them explicitly, one-by-one, without this silly variable > /* #VC handler runtime per-CPU data */ > struct sev_es_runtime_data { > struct ghcb ghcb_page; > @@ -1010,6 +1015,169 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t > end) > set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); > } > > +static bool set_pte_enc(pte_t *kpte, int level, void *va) Code duplication: __set_clr_pte_enc(). You need to refactor the code instead of adding just another, generically looking helper just because it is easier this way. > +{ > + pte_t new_pte; > + > + if (pte_none(*kpte)) > + return false; > + > + /* > + * Change the physical page attribute from C=0 to C=1. Flush the > + * caches to ensure that data gets accessed with the correct C-bit. > + */ > + if (pte_present(*kpte)) > + clflush_cache_range(va, page_level_size(level)); > + > + new_pte = __pte(cc_mkenc(pte_val(*kpte))); > + set_pte_atomic(kpte, new_pte); > + > + return true; > +} > + > +static bool make_pte_private(pte_t *pte, unsigned long addr, int pages, int > level) > +{ > + struct sev_es_runtime_data *data; > + struct ghcb *ghcb; > + > + data = this_cpu_read(runtime_data); > + ghcb = &data->ghcb_page; > + > + /* Check for GHCB for being part of a PMD range. */ > + if ((unsigned long)ghcb >= addr && > + (unsigned long)ghcb <= (addr + (pages * PAGE_SIZE))) { > + /* > + * Ensure that the current cpu's GHCB is made private s/cpu/CPU/g There are multiple places. > + * at the end of unshared loop so that we continue to use the > + * optimized GHCB protocol and not force the switch to > + * MSR protocol till the very end. > + */ > + pr_debug("setting boot_ghcb to NULL for this cpu ghcb\n"); > + kexec_last_addr_to_make_private = addr; > + return true; > + } > + > + if (!set_pte_enc(pte, level, (void *)addr)) > + return false; > + > + snp_set_memory_private(addr, pages); > + > + return true; > +} > + > +static void unshare_all_memory(void) > +{ > + unsigned long addr, end; > + > + /* > + * Walk direct mapping and convert all shared memory back to private, This ends with a ','. Is anything more coming? > + */ > + > + addr = PAGE_OFFSET; > + end = PAGE_OFFSET + get_max_mapped(); > + > + while (addr < end) { > + unsigned long size; > + unsigned int level; > + pte_t *pte; > + > + pte = lookup_address(addr, &level); > + size = page_level_size(level); > + > + /* > + * pte_none() check is required to skip physical memory holes > in direct mapped. This sentence needs to be written for humans. And come to think of it, you can simply drop it. lookup_address() can return NULL so you simply need to check its retval. > + */ > + if (pte && pte_decrypted(*pte) && !pte_none(*pte)) { > + int pages = size / PAGE_SIZE; > + > + if (!make_pte_private(pte, addr, pages, level)) { > + pr_err("Failed to unshare range %#lx-%#lx\n", > + addr, addr + size); Might as well terminate the guest here. > + } > + > + } > + > + addr += size; > + } > + __flush_tlb_all(); > + > +} > + > +static void unshare_all_bss_decrypted_memory(void) Why is this a separate function and not part of unshare_all_memory()? > +{ > + unsigned long vaddr, vaddr_end; > + unsigned int level; > + unsigned int npages; > + pte_t *pte; > + > + vaddr = (unsigned long)__start_bss_decrypted; > + vaddr_end = (unsigned long)__start_bss_decrypted_unused; > + npages = (vaddr_end - vaddr) >> PAGE_SHIFT; > + for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) { > + pte = lookup_address(vaddr, &level); > + if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) > + continue; > + > + set_pte_enc(pte, level, (void *)vaddr); > + } > + vaddr = (unsigned long)__start_bss_decrypted; > + snp_set_memory_private(vaddr, npages); > +} ... -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec