The unmapping part also wants to cover UNUSABLE regions, and it will now
be necessary for space outside the low 16Mb (wherever Xen is placed).

While there, limit the scopes of involved variables.

Fixes: e4dd91ea85a3 ("x86: Ensure RAM holes really are not mapped in Xen's 
ongoing 1:1 physmap")
Fixes: 7cd7f2f5e116 ("x86/boot: Remove the preconstructed low 16M superpage 
mappings")
Signed-off-by: Jan Beulich <jbeul...@suse.com>
---
Originally I had the unmap cover the entire range up to 4Gb, which made
this ACPI mapping issue more pronounced: Mappings done by
acpi_dmar_init(), erst_init(), and acpi_hest_init() may wrongly be
undone this way. Having limited things to the initial mapping range, the
risk of an overlap with some area which needs to remain mapped is lower,
but it's not gone.

As the excess mappings could, at least in principle, also cause other
issues (like mapping a range WB which must strictly be non-cachable), I
wonder whether we can actually get away with those excess mappings,
despite properly tearing them down in arch_init_memory() (directmap) and
__start_xen() (Xen image space). Options would appear to be to move
_end[] to a 2Mb aligned boundary (or at least extend the PT_LOAD segment
to end at the next 2Mb boundary), or to use 4k mappings for the tail
part of .bss. That would then also eliminate the remaining concern here.

Extending the PT_LOAD segment (in mkelf32) would apparently allow to do
away with the hackery introduced by 773ded42218d ("Move cpu0_stack out
of Xen text section and into BSS"), to "work around" a supposed linker
bug (which really was a bug in the linker script imo). The extra command
line argument then wouldn't be needed anymore.

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -275,8 +275,6 @@ static void __init assign_io_page(struct
 
 void __init arch_init_memory(void)
 {
-    unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, ioend_pfn;
-
     /*
      * Basic guest-accessible flags:
      *   PRESENT, R/W, USER, A/D, AVAIL[0,1,2], AVAIL_HIGH, NX (if available).
@@ -292,12 +290,55 @@ void __init arch_init_memory(void)
      * case the low 1MB.
      */
     BUG_ON(pvh_boot && trampoline_phys != 0x1000);
-    for ( i = 0; i < 0x100; i++ )
+    for ( unsigned int i = 0; i < MB(1) >> PAGE_SHIFT; i++ )
         assign_io_page(mfn_to_page(_mfn(i)));
 
-    /* Any areas not specified as RAM by the e820 map are considered I/O. */
-    for ( i = 0, pfn = 0; pfn < max_page; i++ )
+    /*
+     * Any areas not specified as RAM by the e820 map want to have no mappings.
+     * We may have established some by mapping more than necessary in head.S,
+     * due to the use of super-pages there.
+     */
+    for ( unsigned long i = 0, pfn = 0,
+                        rlimit_pfn = PFN_DOWN(PAGE_ALIGN_2M(__pa(_end)));
+          pfn < rlimit_pfn; i++ )
     {
+        unsigned long rstart_pfn, rend_pfn, start_pfn;
+
+        while ( i < e820.nr_map &&
+                e820.map[i].type != E820_RAM )
+            i++;
+
+        if ( i >= e820.nr_map )
+        {
+            /* No more RAM regions: Unmap right to upper boundary. */
+            rstart_pfn = rend_pfn = rlimit_pfn;
+        }
+        else
+        {
+            /* Unmap just up as far as next RAM region. */
+            rstart_pfn = min(rlimit_pfn, PFN_UP(e820.map[i].addr));
+            rend_pfn   = max(rstart_pfn,
+                             PFN_DOWN(e820.map[i].addr + e820.map[i].size));
+        }
+
+        /* NB: _start is already 2Mb-aligned. */
+        start_pfn = max(pfn, PFN_DOWN(__pa(_start)));
+        if ( start_pfn < rstart_pfn )
+            destroy_xen_mappings((unsigned long)mfn_to_virt(start_pfn),
+                                 (unsigned long)mfn_to_virt(rstart_pfn));
+
+        /* Skip the RAM region. */
+        pfn = rend_pfn;
+    }
+
+    /*
+     * Any areas not specified as RAM or UNUSABLE by the e820 map are
+     * considered I/O.
+     */
+    for ( unsigned long i = 0, pfn = 0; pfn < max_page; i++ )
+    {
+        unsigned long rstart_pfn, rend_pfn;
+
         while ( (i < e820.nr_map) &&
                 (e820.map[i].type != E820_RAM) &&
                 (e820.map[i].type != E820_UNUSABLE) )
@@ -317,17 +358,6 @@ void __init arch_init_memory(void)
                                PFN_DOWN(e820.map[i].addr + e820.map[i].size));
         }
 
-        /*
-         * Make sure any Xen mappings of RAM holes above 1MB are blown away.
-         * In particular this ensures that RAM holes are respected even in
-         * the statically-initialised 1-16MB mapping area.
-         */
-        iostart_pfn = max_t(unsigned long, pfn, 1UL << (20 - PAGE_SHIFT));
-        ioend_pfn = min(rstart_pfn, 16UL << (20 - PAGE_SHIFT));
-        if ( iostart_pfn < ioend_pfn )
-            destroy_xen_mappings((unsigned long)mfn_to_virt(iostart_pfn),
-                                 (unsigned long)mfn_to_virt(ioend_pfn));
-
         /* Mark as I/O up to next RAM region. */
         for ( ; pfn < rstart_pfn; pfn++ )
         {
@@ -365,6 +395,7 @@ void __init arch_init_memory(void)
                     const l3_pgentry_t *l3idle = map_l3t_from_l4e(
                             idle_pg_table[l4_table_offset(split_va)]);
                     l3_pgentry_t *l3tab = map_domain_page(l3mfn);
+                    unsigned int i;
 
                     for ( i = 0; i < l3_table_offset(split_va); ++i )
                         l3tab[i] = l3idle[i];

Reply via email to