Currently, guests needs to explicitely map their shared info and
grant table through hypercalls. It has proved to be complicated
and had been traditionally placed in the Xen PCI Platform device
BAR, but has caused confusions regarding MTRR [1].

Introduce a optional feature to let the toolstack map these pages in
advance, and exposing their location through the memory map (i.e E820).

When this feature is active, guests are expected to not invalidate such
mappings (e.g by calling xen_remove_from_physmap); as they may be used
by the operating system or firmware (i.e OVMF).

Moreover, this feature is useful to allow having a working guest without
having to expose to it physmap manipulation.

[1] 
https://lore.kernel.org/xen-devel/20250610162930.89055-1-roger....@citrix.com/

Signed-off-by: Teddy Astie <teddy.as...@vates.tech>
---
 .../x86/fixed-memory-layout.pandoc            | 24 ++++++
 docs/guest-guide/x86/index.rst                |  1 +
 tools/include/xen-tools/common-macros.h       |  4 +
 tools/libs/guest/xg_dom_x86.c                 | 84 +++++++++++++++++++
 tools/libs/light/libxl_create.c               |  1 +
 tools/libs/light/libxl_types.idl              |  1 +
 tools/libs/light/libxl_x86.c                  | 71 ++++++++++++++++
 tools/xl/xl_parse.c                           |  1 +
 xen/include/public/arch-x86/hvm/start_info.h  |  7 ++
 9 files changed, 194 insertions(+)
 create mode 100644 docs/guest-guide/x86/fixed-memory-layout.pandoc

diff --git a/docs/guest-guide/x86/fixed-memory-layout.pandoc 
b/docs/guest-guide/x86/fixed-memory-layout.pandoc
new file mode 100644
index 0000000000..b394dc1cff
--- /dev/null
+++ b/docs/guest-guide/x86/fixed-memory-layout.pandoc
@@ -0,0 +1,24 @@
+# Fixed memory layout
+
+When creating a guest with "fixed_mem_layout", the guest has additional special
+pages mapped in advance. These mappings may be required in some cases due to 
FastABI
+not providing the ability for the guest to modify its physical memory layout 
(which is
+usually used to map the shared info page or the grant table).
+
+The guest is informed of such mappings and their location through the memory 
map using
+Xen specific memory map types (in the OEM range of ACPI specification).
+
+It's expected that these mappings stay stable, therefore, the guest shouldn't 
try to
+modify such mappings as it may be actively used by guest firmware or operating 
system.
+
+    /* Xen-specific types (OEM-specific range of the ACPI spec) */
+    #define XEN_HVM_MEMMAP_TYPE_SHARED_INFO   0xF0000001 /* Shared info page */
+    #define XEN_HVM_MEMMAP_TYPE_GRANT_TABLE   0xF0000002 /* Grant table pages 
*/
+    #define XEN_HVM_MEMMAP_TYPE_GNTTAB_STATUS 0xF0000003 /* Grant table status 
page (v2) */
+    #define XEN_HVM_MEMMAP_TYPE_FOREIGN_REG   0xF0000004 /* Suitable region 
for grant mappings */
+                                                         /* and foreign 
mappings */
+
+These mappings are likely to be placed outside the 4G range.
+
+XEN_HVM_MEMMAP_TYPE_FOREIGN_REG is a special region which doesn't contain any 
mapping,
+but is safe to use for placing grant and foreign mappings.
diff --git a/docs/guest-guide/x86/index.rst b/docs/guest-guide/x86/index.rst
index 502968490d..6927271e53 100644
--- a/docs/guest-guide/x86/index.rst
+++ b/docs/guest-guide/x86/index.rst
@@ -7,3 +7,4 @@ x86
    :maxdepth: 2
 
    hypercall-abi
+   fixed-memory-layout
diff --git a/tools/include/xen-tools/common-macros.h 
b/tools/include/xen-tools/common-macros.h
index 0088208c2e..8b286b3da5 100644
--- a/tools/include/xen-tools/common-macros.h
+++ b/tools/include/xen-tools/common-macros.h
@@ -72,6 +72,10 @@
 #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
 #endif
 
+#ifndef DIV_ROUNDUP
+#define DIV_ROUNDUP(n, d) (((n) + (d) - 1) / (d))
+#endif
+
 #define MASK_EXTR(v, m) (((v) & (m)) / ((m) & -(m)))
 #define MASK_INSR(v, m) (((v) * ((m) & -(m))) & (m))
 
diff --git a/tools/libs/guest/xg_dom_x86.c b/tools/libs/guest/xg_dom_x86.c
index cba01384ae..ab1c5063a4 100644
--- a/tools/libs/guest/xg_dom_x86.c
+++ b/tools/libs/guest/xg_dom_x86.c
@@ -624,6 +624,82 @@ static void build_hvm_info(void *hvm_info_page, struct 
xc_dom_image *dom)
     hvm_info->checksum = -sum;
 }
 
+/* Prepare special (shared_info, grant table, ...) regions marked in E820. */
+static int prepare_fixed_special_regions(xc_interface *xch, struct 
xc_dom_image *dom)
+{
+    int rc = 0; unsigned int i;
+    uint32_t domid = dom->guest_domid;
+    gnttab_query_size_t gnttab_query;
+    size_t gnttab_frame_count, gnttab_status_frame_count;
+    
+    gnttab_query.dom = domid;
+    rc = xc_gnttab_query_size(xch, &gnttab_query);
+
+    if ( rc != 0 || gnttab_query.status != GNTST_okay )
+    {
+        DOMPRINTF("Unable to query grant table size.");
+        return rc;
+    }
+
+    gnttab_frame_count = gnttab_query.max_nr_frames;
+    gnttab_status_frame_count = DIV_ROUNDUP(
+        gnttab_frame_count * (XC_DOM_PAGE_SIZE(dom) / 
sizeof(grant_entry_v2_t)),
+        XC_DOM_PAGE_SIZE(dom) / sizeof(grant_status_t));
+
+    for ( i = 0; i < dom->e820_entries; i++ )
+    {
+        struct e820entry entry = dom->e820[i];
+        rc = 0;
+
+        switch ( entry.type ) {
+        case XEN_HVM_MEMMAP_TYPE_SHARED_INFO:
+            rc = xc_domain_add_to_physmap(xch, domid, XENMAPSPACE_shared_info,
+                                          0, entry.addr >> PAGE_SHIFT);
+            break;
+        case XEN_HVM_MEMMAP_TYPE_GRANT_TABLE:
+            if ( gnttab_frame_count != entry.size >> PAGE_SHIFT )
+            {
+                DOMPRINTF("Invalid grant table memmap region size");
+                return -EINVAL;
+            }
+
+            for ( i = 0; i < gnttab_frame_count; i++ )
+            {
+                rc = xc_domain_add_to_physmap(xch, domid, 
XENMAPSPACE_grant_table, i,
+                                              (entry.addr >> PAGE_SHIFT) + i);
+                
+                if ( rc !=  0 )
+                    break;
+            }
+            break;
+        case XEN_HVM_MEMMAP_TYPE_GNTTAB_STATUS:
+        {
+            if ( gnttab_status_frame_count != entry.size >> PAGE_SHIFT )
+            {
+                DOMPRINTF("Invalid grant table status memmap region size");
+                return -EINVAL;
+            }
+
+            for ( i = 0; i < gnttab_status_frame_count; i++ )
+            {
+                rc = xc_domain_add_to_physmap(xch, domid, 
XENMAPSPACE_grant_table,
+                                              i | XENMAPIDX_grant_table_status,
+                                              (entry.addr >> PAGE_SHIFT) + i);
+                
+                if ( rc !=  0 )
+                    break;
+            }
+            break;
+        }
+        }
+
+        if ( rc != 0 )
+            break;
+    }
+
+    return rc;
+}
+
 static int alloc_magic_pages_hvm(struct xc_dom_image *dom)
 {
     unsigned long i;
@@ -718,6 +794,14 @@ static int alloc_magic_pages_hvm(struct xc_dom_image *dom)
         goto out;
     }
 
+    rc = prepare_fixed_special_regions(xch, dom);
+
+    if ( rc != 0 )
+    {
+        DOMPRINTF("Unable to prepare fixed special regions");
+        goto out;
+    }
+
     /*
      * Identity-map page table is required for running with CR0.PG=0 when
      * using Intel EPT. Create a 32-bit non-PAE page directory of superpages.
diff --git a/tools/libs/light/libxl_create.c b/tools/libs/light/libxl_create.c
index 8a85fba1cf..5e23e122fc 100644
--- a/tools/libs/light/libxl_create.c
+++ b/tools/libs/light/libxl_create.c
@@ -2363,6 +2363,7 @@ int libxl_domain_create_restore(libxl_ctx *ctx, 
libxl_domain_config *d_config,
      * configuration.
      */
     libxl_defbool_setdefault(&d_config->b_info.arch_x86.msr_relaxed, true);
+    libxl_defbool_setdefault(&d_config->b_info.arch_x86.fixed_mem_layout, 
false);
     libxl_defbool_setdefault(&d_config->b_info.u.hvm.pirq, true);
 
     return do_domain_create(ctx, d_config, domid, restore_fd, send_back_fd,
diff --git a/tools/libs/light/libxl_types.idl b/tools/libs/light/libxl_types.idl
index a3a79d12b2..74edfdebc1 100644
--- a/tools/libs/light/libxl_types.idl
+++ b/tools/libs/light/libxl_types.idl
@@ -727,6 +727,7 @@ libxl_domain_build_info = Struct("domain_build_info",[
                                ("nr_spis", uint32, {'init_val': 
'LIBXL_NR_SPIS_DEFAULT'}),
                               ])),
     ("arch_x86", Struct(None, [("msr_relaxed", libxl_defbool),
+                               ("fixed_mem_layout", libxl_defbool),
                               ])),
     # Alternate p2m is not bound to any architecture or guest type, as it is
     # supported by x86 HVM and ARM support is planned.
diff --git a/tools/libs/light/libxl_x86.c b/tools/libs/light/libxl_x86.c
index 60d4e8661c..de19f722ab 100644
--- a/tools/libs/light/libxl_x86.c
+++ b/tools/libs/light/libxl_x86.c
@@ -1,6 +1,7 @@
 #include "libxl_internal.h"
 #include "libxl_arch.h"
 #include <xen/arch-x86/cpuid.h>
+#include <xen/arch-x86/hvm/start_info.h>
 
 int libxl__arch_domain_prepare_config(libxl__gc *gc,
                                       libxl_domain_config *d_config,
@@ -50,6 +51,10 @@ static const char *e820_names(int type)
         case E820_ACPI: return "ACPI";
         case E820_NVS: return "ACPI NVS";
         case E820_UNUSABLE: return "Unusable";
+        case XEN_HVM_MEMMAP_TYPE_SHARED_INFO: return "HVM Shared Info";
+        case XEN_HVM_MEMMAP_TYPE_GRANT_TABLE: return "HVM Grant Table";
+        case XEN_HVM_MEMMAP_TYPE_GNTTAB_STATUS: return "HVM Grant Status";
+        case XEN_HVM_MEMMAP_TYPE_FOREIGN_REG: return "HVM Foreign mapping 
region";
         default: break;
     }
     return "Unknown";
@@ -686,10 +691,31 @@ static int domain_construct_memmap(libxl__gc *gc,
     /* We always own at least one lowmem entry. */
     unsigned int e820_entries = 1;
     struct e820entry *e820 = NULL;
+    uint64_t highmem_start = ((uint64_t)1 << 32);
     uint64_t highmem_size =
                     dom->highmem_end ? dom->highmem_end - (1ull << 32) : 0;
     uint32_t lowmem_start = dom->device_model ? GUEST_LOW_MEM_START_DEFAULT : 
0;
     unsigned page_size = XC_DOM_PAGE_SIZE(dom);
+    /* Special region starts at the first 1G boundary after the highmem */
+    uint64_t special_region_start =
+        (highmem_start + highmem_size + GB(1) - 1) & ~(GB(1) - 1);
+    uint64_t special_region_offset = special_region_start;
+    size_t gnttab_frame_count, gnttab_status_frame_count;
+    gnttab_query_size_t gnttab_query;
+
+    gnttab_query.dom = domid;
+    rc = xc_gnttab_query_size(dom->xch, &gnttab_query);
+
+    if (rc != 0 || gnttab_query.status != GNTST_okay)
+    {
+        gnttab_frame_count = 0;
+        gnttab_status_frame_count = 0;
+    }
+
+    gnttab_frame_count = gnttab_query.max_nr_frames;
+    gnttab_status_frame_count = DIV_ROUNDUP(
+        gnttab_frame_count * (page_size / sizeof(grant_entry_v2_t)),
+        page_size / sizeof(grant_status_t));
 
     /* Add all rdm entries. */
     for (i = 0; i < d_config->num_rdms; i++)
@@ -703,6 +729,16 @@ static int domain_construct_memmap(libxl__gc *gc,
     /* If we should have a highmem range. */
     if (highmem_size)
         e820_entries++;
+    
+    if (libxl_defbool_val(d_config->b_info.arch_x86.fixed_mem_layout))
+    {
+        e820_entries++; /* XEN_HVM_MEMMAP_TYPE_SHARED_INFO */
+        if ( gnttab_frame_count )
+            e820_entries++; /* XEN_HVM_MEMMAP_TYPE_GRANT_TABLE */
+        if (d_config->b_info.max_grant_version >= 2 && 
gnttab_status_frame_count)
+            e820_entries++; /* XEN_HVM_MEMMAP_TYPE_GNTTAB_STATUS status */
+        e820_entries++; /* XEN_HVM_MEMMAP_TYPE_FOREIGN_REG */
+    }
 
     for (i = 0; i < MAX_ACPI_MODULES; i++)
         if (dom->acpi_modules[i].length)
@@ -769,6 +805,40 @@ static int domain_construct_memmap(libxl__gc *gc,
         e820[nr].type = E820_RAM;
     }
 
+    /* Special regions */
+    if (libxl_defbool_val(d_config->b_info.arch_x86.fixed_mem_layout))
+    {
+        e820[nr].type = XEN_HVM_MEMMAP_TYPE_SHARED_INFO;
+        e820[nr].addr = special_region_offset;
+        e820[nr].size = page_size;
+        special_region_offset += e820[nr].size;
+        nr++;
+
+        if ( gnttab_frame_count )
+        {
+            e820[nr].type = XEN_HVM_MEMMAP_TYPE_GRANT_TABLE;
+            e820[nr].addr = special_region_offset;
+            e820[nr].size = gnttab_frame_count * page_size;
+            special_region_offset += e820[nr].size;
+            nr++;
+        }
+
+        if (d_config->b_info.max_grant_version >= 2 && 
gnttab_status_frame_count)
+        {
+            e820[nr].type = XEN_HVM_MEMMAP_TYPE_GNTTAB_STATUS;
+            e820[nr].addr = special_region_offset;
+            e820[nr].size = gnttab_status_frame_count * page_size;
+            special_region_offset += e820[nr].size;
+            nr++;
+        }
+
+        e820[nr].type = XEN_HVM_MEMMAP_TYPE_FOREIGN_REG;
+        e820[nr].addr = special_region_offset;
+        e820[nr].size = MB(512);
+        special_region_offset += e820[nr].size;
+        nr++;
+    }
+
     if (xc_domain_set_memory_map(CTX->xch, domid, e820, e820_entries) != 0) {
         rc = ERROR_FAIL;
         goto out;
@@ -819,6 +889,7 @@ int libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
     libxl_defbool_setdefault(&b_info->acpi, true);
     libxl_defbool_setdefault(&b_info->arch_x86.msr_relaxed, false);
     libxl_defbool_setdefault(&b_info->trap_unmapped_accesses, false);
+    libxl_defbool_setdefault(&b_info->arch_x86.fixed_mem_layout, false);
 
     if (b_info->type == LIBXL_DOMAIN_TYPE_HVM) {
         /*
diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
index 7e11c62ba0..a74cc577e9 100644
--- a/tools/xl/xl_parse.c
+++ b/tools/xl/xl_parse.c
@@ -2992,6 +2992,7 @@ skip_usbdev:
                     "WARNING: msr_relaxed will be removed in future 
versions.\n"
                     "If it fixes an issue you are having please report to "
                     "xen-devel@lists.xenproject.org.\n");
+    xlu_cfg_get_defbool(config, "fixed_mem_layout", 
&b_info->arch_x86.fixed_mem_layout, 0);
 
     xlu_cfg_get_defbool(config, "vpmu", &b_info->vpmu, 0);
 
diff --git a/xen/include/public/arch-x86/hvm/start_info.h 
b/xen/include/public/arch-x86/hvm/start_info.h
index e33557c0b4..0b3dfe91af 100644
--- a/xen/include/public/arch-x86/hvm/start_info.h
+++ b/xen/include/public/arch-x86/hvm/start_info.h
@@ -99,6 +99,13 @@
 #define XEN_HVM_MEMMAP_TYPE_DISABLED  6
 #define XEN_HVM_MEMMAP_TYPE_PMEM      7
 
+/* Xen-specific types (OEM-specific range of the ACPI spec) */
+#define XEN_HVM_MEMMAP_TYPE_SHARED_INFO   0xF0000001 /* Shared info page */
+#define XEN_HVM_MEMMAP_TYPE_GRANT_TABLE   0xF0000002 /* Grant table pages */
+#define XEN_HVM_MEMMAP_TYPE_GNTTAB_STATUS 0xF0000003 /* Grant table status 
page (v2) */
+#define XEN_HVM_MEMMAP_TYPE_FOREIGN_REG   0xF0000004 /* Suitable region for 
grant mappings */
+                                                     /* and foreign mappings */
+
 /*
  * C representation of the x86/HVM start info layout.
  *
-- 
2.50.1



Teddy Astie | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech


Reply via email to