On Fri, Feb 23, 2024 at 11:39:07AM -0800, Dave Hansen wrote:
> On 2/12/24 02:44, Kirill A. Shutemov wrote:
> > +static void tdx_kexec_stop_conversion(bool crash)
> > +{
> > +   /* Stop new private<->shared conversions */
> > +   conversion_allowed = false;
> > +
> > +   /*
> > +    * Make sure conversion_allowed is cleared before checking
> > +    * conversions_in_progress.
> > +    */
> > +   barrier();
> > +
> > +   /*
> > +    * Crash kernel reaches here with interrupts disabled: can't wait for
> > +    * conversions to finish.
> > +    *
> > +    * If race happened, just report and proceed.
> > +    */
> > +   if (!crash) {
> > +           unsigned long timeout;
> > +
> > +           /*
> > +            * Wait for in-flight conversions to complete.
> > +            *
> > +            * Do not wait more than 30 seconds.
> > +            */
> > +           timeout = 30 * USEC_PER_SEC;
> > +           while (atomic_read(&conversions_in_progress) && timeout--)
> > +                   udelay(1);
> > +   }
> > +
> > +   if (atomic_read(&conversions_in_progress))
> > +           pr_warn("Failed to finish shared<->private conversions\n");
> > +}
> 
> I'd really prefer we find a way to do this with actual locks, especially
> 'conversion_allowed'.
> 
> This is _awfully_ close to being able to be handled by a rwsem where the
> readers are the converters and tdx_kexec_stop_conversion() takes a write.

Okay, here's what I come up with. It needs more testing.

Any comments?

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index fd212c9bad89..5eb0dac33f37 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -6,8 +6,10 @@
 
 #include <linux/cpufeature.h>
 #include <linux/debugfs.h>
+#include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/io.h>
+#include <linux/kexec.h>
 #include <asm/coco.h>
 #include <asm/tdx.h>
 #include <asm/vmx.h>
@@ -15,6 +17,7 @@
 #include <asm/insn.h>
 #include <asm/insn-eval.h>
 #include <asm/pgtable.h>
+#include <asm/set_memory.h>
 
 /* MMIO direction */
 #define EPT_READ       0
@@ -837,6 +840,65 @@ static int tdx_enc_status_change_finish(unsigned long 
vaddr, int numpages,
        return 0;
 }
 
+static void tdx_kexec_stop_conversion(bool crash)
+{
+       /* Stop new private<->shared conversions */
+       if (!stop_memory_enc_conversion(!crash))
+               pr_warn("Failed to finish shared<->private conversions\n");
+}
+
+static void tdx_kexec_unshare_mem(void)
+{
+       unsigned long addr, end;
+       long found = 0, shared;
+
+       /*
+        * Walk direct mapping and convert all shared memory back to private,
+        */
+
+       addr = PAGE_OFFSET;
+       end  = PAGE_OFFSET + get_max_mapped();
+
+       while (addr < end) {
+               unsigned long size;
+               unsigned int level;
+               pte_t *pte;
+
+               pte = lookup_address(addr, &level);
+               size = page_level_size(level);
+
+               if (pte && pte_decrypted(*pte)) {
+                       int pages = size / PAGE_SIZE;
+
+                       /*
+                        * Touching memory with shared bit set triggers implicit
+                        * conversion to shared.
+                        *
+                        * Make sure nobody touches the shared range from
+                        * now on.
+                        */
+                       set_pte(pte, __pte(0));
+
+                       if (!tdx_enc_status_changed(addr, pages, true)) {
+                               pr_err("Failed to unshare range %#lx-%#lx\n",
+                                      addr, addr + size);
+                       }
+
+                       found += pages;
+               }
+
+               addr += size;
+       }
+
+       __flush_tlb_all();
+
+       shared = atomic_long_read(&nr_shared);
+       if (shared != found) {
+               pr_err("shared page accounting is off\n");
+               pr_err("nr_shared = %ld, nr_found = %ld\n", shared, found);
+       }
+}
+
 void __init tdx_early_init(void)
 {
        struct tdx_module_args args = {
@@ -896,6 +958,9 @@ void __init tdx_early_init(void)
        x86_platform.guest.enc_cache_flush_required  = tdx_cache_flush_required;
        x86_platform.guest.enc_tlb_flush_required    = tdx_tlb_flush_required;
 
+       x86_platform.guest.enc_kexec_stop_conversion = 
tdx_kexec_stop_conversion;
+       x86_platform.guest.enc_kexec_unshare_mem     = tdx_kexec_unshare_mem;
+
        /*
         * TDX intercepts the RDMSR to read the X2APIC ID in the parallel
         * bringup low level code. That raises #VE which cannot be handled
diff --git a/arch/x86/include/asm/set_memory.h 
b/arch/x86/include/asm/set_memory.h
index a5e89641bd2d..9d4a8e548820 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -48,8 +48,11 @@ int set_memory_wc(unsigned long addr, int numpages);
 int set_memory_wb(unsigned long addr, int numpages);
 int set_memory_np(unsigned long addr, int numpages);
 int set_memory_4k(unsigned long addr, int numpages);
+
+bool stop_memory_enc_conversion(bool wait);
 int set_memory_encrypted(unsigned long addr, int numpages);
 int set_memory_decrypted(unsigned long addr, int numpages);
+
 int set_memory_np_noalias(unsigned long addr, int numpages);
 int set_memory_nonglobal(unsigned long addr, int numpages);
 int set_memory_global(unsigned long addr, int numpages);
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 0d2267ad4e0e..e074b2aca970 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2176,12 +2176,32 @@ static int __set_memory_enc_pgtable(unsigned long addr, 
int numpages, bool enc)
        return ret;
 }
 
+static DECLARE_RWSEM(mem_enc_lock);
+
+bool stop_memory_enc_conversion(bool wait)
+{
+       if (!wait)
+               return down_write_trylock(&mem_enc_lock);
+
+       down_write(&mem_enc_lock);
+
+       return true;
+}
+
 static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
 {
-       if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
-               return __set_memory_enc_pgtable(addr, numpages, enc);
+       int ret = 0;
 
-       return 0;
+       if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
+               if (!down_read_trylock(&mem_enc_lock))
+                       return -EBUSY;
+
+               ret =__set_memory_enc_pgtable(addr, numpages, enc);
+
+               up_read(&mem_enc_lock);
+       }
+
+       return ret;
 }
 
 int set_memory_encrypted(unsigned long addr, int numpages)
-- 
  Kiryl Shutsemau / Kirill A. Shutemov

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

Reply via email to