riscv: implement p2m mapping functionality

Oleksii Kurochko Fri, 09 May 2025 08:58:33 -0700

These utilities are needed for building and managing RISC-V guest page
tables and MMIO mappings by using functions map_regions_p2mt() and
guest_physmap_add_entry().


To implement p2m mapping functionality the following is introduced:
- Define P2M root level/order and entry count.
- Introdude radix type for p2m types as it isn't enough free bits in pte
  and the helpers (p2m_type_radix_{get,set}()) to deal with them.
- Introduce p2m_is_*() helpers() as  pte_is_*() helpers are checking
  the valid bit set in the PTE but we have to check p2m_type instead
  (look at the comment above p2m_is_valid() for some details).
- Introduce helper to set p2m's pte permission: p2m_set_permissions().
- Introduce helper to create p2m entry based on mfn, p2m_type_t and
  p2m_access_t.
- Introduce helper to generate table entry with correct attributes:
  page_to_p2m_table().
- Introduce p2m page allocation function: p2m_alloc_page().
- Introduce functions to write/remove p2m's entries: p2m_{write,remove}_pte().
- Introduce function to allocate p2m table: p2m_create_table().
- Introduce functions used to free p2m entry.
- Introduce function for table walking: p2m_next_level().
- Introduce function to insert an entry in the p2m (p2m_set_entry()).
- Introduce superpage splitting: p2m_split_superpage()).
- Introduce page table type defines (PGT_{none,writable_page}, etc).

Signed-off-by: Oleksii Kurochko <oleksii.kuroc...@gmail.com>
---
 xen/arch/riscv/include/asm/mm.h   |  32 +-
 xen/arch/riscv/include/asm/p2m.h  |  17 +-
 xen/arch/riscv/include/asm/page.h |  11 +
 xen/arch/riscv/p2m.c              | 780 ++++++++++++++++++++++++++++++
 4 files changed, 829 insertions(+), 11 deletions(-)

diff --git a/xen/arch/riscv/include/asm/mm.h b/xen/arch/riscv/include/asm/mm.h
index 972ec45448..c1e4519839 100644
--- a/xen/arch/riscv/include/asm/mm.h
+++ b/xen/arch/riscv/include/asm/mm.h
@@ -12,6 +12,7 @@
 #include <xen/sections.h>
 #include <xen/types.h>
 
+#include <asm/cmpxchg.h>
 #include <asm/page-bits.h>
 
 extern vaddr_t directmap_virt_start;
@@ -229,9 +230,21 @@ static inline bool arch_mfns_in_directmap(unsigned long 
mfn, unsigned long nr)
 #define PGT_writable_page PG_mask(1, 1)  /* has writable mappings?         */
 #define PGT_type_mask     PG_mask(1, 1)  /* Bits 31 or 63.                 */
 
-/* Count of uses of this frame as its current type. */
-#define PGT_count_width   PG_shift(2)
-#define PGT_count_mask    ((1UL << PGT_count_width) - 1)
+ /* 9-bit count of uses of this frame as its current type. */
+#define PGT_count_mask    PG_mask(0x3FF, 10)
+
+/*
+ * Sv32 has 22-bit GFN. Sv{39, 48, 57} have 44-bit GFN.
+ * Thereby we can use for `type_info` 10 bits for all modes, having the same
+ * amount of bits for `type_info` for all MMU modes let us avoid introducing
+ * an extra #ifdef to that header:
+ *   if we go with maximum possible bits for count on each configuration
+ *   we would need to have a set of PGT_count_* and PGT_gfn_*).
+ */
+#define PGT_gfn_width     PG_shift(10)
+#define PGT_gfn_mask      (BIT(PGT_gfn_width, UL) - 1)
+
+#define PGT_INVALID_XENHEAP_GFN   _gfn(PGT_gfn_mask)
 
 /*
  * Page needs to be scrubbed. Since this bit can only be set on a page that is
@@ -283,6 +296,19 @@ static inline bool arch_mfns_in_directmap(unsigned long 
mfn, unsigned long nr)
 
 #define PFN_ORDER(pg) ((pg)->v.free.order)
 
+static inline void page_set_xenheap_gfn(struct page_info *p, gfn_t gfn)
+{
+    gfn_t gfn_ = gfn_eq(gfn, INVALID_GFN) ? PGT_INVALID_XENHEAP_GFN : gfn;
+    unsigned long x, nx, y = p->u.inuse.type_info;
+
+    ASSERT(is_xen_heap_page(p));
+
+    do {
+        x = y;
+        nx = (x & ~PGT_gfn_mask) | gfn_x(gfn_);
+    } while ( (y = cmpxchg(&p->u.inuse.type_info, x, nx)) != x );
+}
+
 extern unsigned char cpu0_boot_stack[];
 
 void setup_initial_pagetables(void);
diff --git a/xen/arch/riscv/include/asm/p2m.h b/xen/arch/riscv/include/asm/p2m.h
index 0fcfec7f03..9f4633501f 100644
--- a/xen/arch/riscv/include/asm/p2m.h
+++ b/xen/arch/riscv/include/asm/p2m.h
@@ -9,8 +9,13 @@
 #include <xen/rwlock.h>
 #include <xen/types.h>
 
+#include <asm/page.h>
 #include <asm/page-bits.h>
 
+#define P2M_ROOT_LEVEL  HYP_PT_ROOT_LEVEL
+#define P2M_ROOT_ORDER  XEN_PT_LEVEL_ORDER(P2M_ROOT_LEVEL)
+#define P2M_ROOT_PAGES  (1U << P2M_ROOT_ORDER)
+
 #define paddr_bits PADDR_BITS
 
 /* Get host p2m table */
@@ -145,14 +150,10 @@ static inline int 
guest_physmap_mark_populate_on_demand(struct domain *d,
     return -EOPNOTSUPP;
 }
 
-static inline int guest_physmap_add_entry(struct domain *d,
-                                          gfn_t gfn, mfn_t mfn,
-                                          unsigned long page_order,
-                                          p2m_type_t t)
-{
-    BUG_ON("unimplemented");
-    return -EINVAL;
-}
+int guest_physmap_add_entry(struct domain *d,
+                            gfn_t gfn, mfn_t mfn,
+                            unsigned long page_order,
+                            p2m_type_t t);
 
 /* Untyped version for RAM only, for compatibility */
 static inline int __must_check
diff --git a/xen/arch/riscv/include/asm/page.h 
b/xen/arch/riscv/include/asm/page.h
index cb3dea309c..a5c6f5140d 100644
--- a/xen/arch/riscv/include/asm/page.h
+++ b/xen/arch/riscv/include/asm/page.h
@@ -22,6 +22,7 @@
 #define XEN_PT_LEVEL_SIZE(lvl)      (_AT(paddr_t, 1) << 
XEN_PT_LEVEL_SHIFT(lvl))
 #define XEN_PT_LEVEL_MAP_MASK(lvl)  (~(XEN_PT_LEVEL_SIZE(lvl) - 1))
 #define XEN_PT_LEVEL_MASK(lvl)      (VPN_MASK << XEN_PT_LEVEL_SHIFT(lvl))
+#define XEN_PT_ENTRIES              (_AT(unsigned int, 1) << PAGETABLE_ORDER)
 
 /*
  * PTE format:
@@ -69,10 +70,20 @@
 #define PTE_PMBT_NOCACHE    BIT(61, UL)
 #define PTE_PMBT_IO         BIT(62, UL)
 
+enum pmbt_type_t {
+    pbmt_pma,
+    pbmt_nc,
+    pbmt_io,
+    pbmt_rsvd,
+    pbmt_max,
+};
+
 #define PTE_ACCESS_MASK (PTE_READABLE | PTE_WRITABLE | PTE_EXECUTABLE)
 
 #define PTE_PBMT_MASK   (PTE_PMBT_NOCACHE | PTE_PMBT_IO)
 
+#define P2M_CLEAR_PERM(p2m_pte) ((p2m_pte).pte & ~PTE_ACCESS_MASK)
+
 /* Calculate the offsets into the pagetables for a given VA */
 #define pt_linear_offset(lvl, va)   ((va) >> XEN_PT_LEVEL_SHIFT(lvl))
 
diff --git a/xen/arch/riscv/p2m.c b/xen/arch/riscv/p2m.c
index a890870391..84cb3d28af 100644
--- a/xen/arch/riscv/p2m.c
+++ b/xen/arch/riscv/p2m.c
@@ -135,6 +135,37 @@ static int p2m_alloc_table(struct domain *d)
     return 0;
 }
 
+static p2m_type_t p2m_type_radix_get(struct p2m_domain *p2m, pte_t pte)
+{
+    void *ptr;
+
+    ptr = radix_tree_lookup(&p2m->p2m_type, pte.pte);
+
+    if ( !ptr )
+        return p2m_invalid;
+
+    return radix_tree_ptr_to_int(ptr);
+}
+
+static int p2m_type_radix_set(struct p2m_domain *p2m, pte_t pte, p2m_type_t t)
+{
+    int rc;
+
+    rc = radix_tree_insert(&p2m->p2m_type, pte.pte,
+                           radix_tree_int_to_ptr(t));
+    if ( rc == -EEXIST )
+    {
+        /* If a setting already exists, change it to the new one */
+        radix_tree_replace_slot(
+            radix_tree_lookup_slot(
+                &p2m->p2m_type, pte.pte),
+            radix_tree_int_to_ptr(t));
+        rc = 0;
+    }
+
+    return rc;
+}
+
 int p2m_init(struct domain *d)
 {
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
@@ -233,3 +264,752 @@ int p2m_set_allocation(struct domain *d, unsigned long 
pages, bool *preempted)
 
     return 0;
 }
+
+/*
+ * Find and map the root page table. The caller is responsible for
+ * unmapping the table.
+ *
+ * The function will return NULL if the offset of the root table is
+ * invalid.
+ */
+static pte_t *p2m_get_root_pointer(struct p2m_domain *p2m, gfn_t gfn)
+{
+    unsigned long root_table;
+
+    root_table = gfn_x(gfn) >> XEN_PT_LEVEL_ORDER(P2M_ROOT_LEVEL);
+    if ( root_table >= P2M_ROOT_PAGES )
+        return NULL;
+
+    return __map_domain_page(p2m->root + root_table);
+}
+
+/*
+ * In the case of the P2M, the valid bit is used for other purpose. Use
+ * the type to check whether an entry is valid.
+ */
+static inline bool p2m_is_valid(struct p2m_domain *p2m, pte_t pte)
+{
+    return p2m_type_radix_get(p2m, pte) != p2m_invalid;
+}
+
+/*
+ * pte_is_* helpers are checking the valid bit set in the
+ * PTE but we have to check p2m_type instead (look at the comment above
+ * p2m_is_valid())
+ * Provide our own overlay to check the valid bit.
+ */
+static inline bool p2m_is_mapping(struct p2m_domain *p2m, pte_t pte)
+{
+    return p2m_is_valid(p2m, pte) && (pte.pte & PTE_ACCESS_MASK);
+}
+
+static inline bool p2m_is_superpage(struct p2m_domain *p2m, pte_t pte,
+                                    unsigned int level)
+{
+    return p2m_is_valid(p2m, pte) && (pte.pte & PTE_ACCESS_MASK) &&
+           (level > 0);
+}
+
+static void p2m_set_permission(pte_t *e, p2m_type_t t, p2m_access_t a)
+{
+    /* First apply type permissions */
+    switch ( t )
+    {
+    case p2m_ram_rw:
+        e->bits.r = 1;
+        e->bits.w = 1;
+        e->bits.x = 1;
+
+        break;
+
+    case p2m_mmio_direct_dev:
+        e->bits.r = 1;
+        e->bits.w = 1;
+        e->bits.x = 0;
+        break;
+
+    case p2m_invalid:
+        e->bits.r = 0;
+        e->bits.w = 0;
+        e->bits.x = 0;
+        break;
+
+    default:
+        BUG();
+        break;
+    }
+
+    /* Then restrict with access permissions */
+    switch ( a )
+    {
+    case p2m_access_rwx:
+        break;
+    case p2m_access_wx:
+        e->bits.r = 0;
+        break;
+    case p2m_access_rw:
+        e->bits.x = 0;
+        break;
+    case p2m_access_w:
+        e->bits.r = 0;
+        e->bits.x = 0;
+        break;
+    case p2m_access_rx:
+    case p2m_access_rx2rw:
+        e->bits.w = 0;
+        break;
+    case p2m_access_x:
+        e->bits.r = 0;
+        e->bits.w = 0;
+        break;
+    case p2m_access_r:
+        e->bits.w = 0;
+        e->bits.x = 0;
+        break;
+    case p2m_access_n:
+    case p2m_access_n2rwx:
+        e->bits.r = 0;
+        e->bits.w = 0;
+        e->bits.x = 0;
+        break;
+    default:
+        BUG();
+        break;
+    }
+}
+
+static pte_t p2m_entry_from_mfn(struct p2m_domain *p2m, mfn_t mfn, p2m_type_t 
t, p2m_access_t a)
+{
+    pte_t e = (pte_t) {
+        .bits.v = 1,
+    };
+
+    switch ( t )
+    {
+    case p2m_mmio_direct_dev:
+        e.bits.pbmt = pbmt_io;
+        break;
+
+    default:
+        e.bits.pbmt = pbmt_pma;
+        break;
+    }
+
+    p2m_set_permission(&e, t, a);
+
+    ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK));
+
+    pte_set_mfn(&e, mfn);
+
+    BUG_ON(p2m_type_radix_set(p2m, e, t));
+
+    return e;
+}
+
+/* Generate table entry with correct attributes. */
+static pte_t page_to_p2m_table(struct p2m_domain *p2m, struct page_info *page)
+{
+    /*
+     * Since this function generates a table entry, according to "Encoding
+     * of PTE R/W/X fields," the entry's r, w, and x fields must be set to 0
+     * to point to the next level of the page table.
+     * Therefore, to ensure that an entry is a page table entry,
+     * `p2m_access_n2rwx` is passed to `mfn_to_p2m_entry()` as the access 
value,
+     * which overrides whatever was passed as `p2m_type_t` and guarantees that
+     * the entry is a page table entry by setting r = w = x = 0.
+     */
+    return p2m_entry_from_mfn(p2m, page_to_mfn(page), p2m_ram_rw, 
p2m_access_n2rwx);
+}
+
+static struct page_info *p2m_alloc_page(struct domain *d)
+{
+    struct page_info *pg;
+
+    /*
+     * For hardware domain, there should be no limit in the number of pages 
that
+     * can be allocated, so that the kernel may take advantage of the extended
+     * regions. Hence, allocate p2m pages for hardware domains from heap.
+     */
+    if ( is_hardware_domain(d) )
+    {
+        pg = alloc_domheap_page(d, MEMF_no_owner);
+        if ( pg == NULL )
+            printk(XENLOG_G_ERR "Failed to allocate P2M pages for hwdom.\n");
+    }
+    else
+    {
+        spin_lock(&d->arch.paging.lock);
+        pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
+        spin_unlock(&d->arch.paging.lock);
+    }
+
+    return pg;
+}
+
+static inline void p2m_write_pte(pte_t *p, pte_t pte, bool clean_pte)
+{
+    write_pte(p, pte);
+    if ( clean_pte )
+        clean_dcache_va_range(p, sizeof(*p));
+}
+
+static inline void p2m_remove_pte(pte_t *p, bool clean_pte)
+{
+    pte_t pte;
+
+    memset(&pte, 0x00, sizeof(pte));
+    p2m_write_pte(p, pte, clean_pte);
+}
+
+/* Allocate a new page table page and hook it in via the given entry. */
+static int p2m_create_table(struct p2m_domain *p2m, pte_t *entry)
+{
+    struct page_info *page;
+    pte_t *p;
+
+    ASSERT(!p2m_is_valid(p2m, *entry));
+
+    page = p2m_alloc_page(p2m->domain);
+    if ( page == NULL )
+        return -ENOMEM;
+
+    page_list_add(page, &p2m->pages);
+
+    p = __map_domain_page(page);
+    clear_page(p);
+
+    unmap_domain_page(p);
+
+    p2m_write_pte(entry, page_to_p2m_table(p2m, page), p2m->clean_pte);
+
+    return 0;
+}
+
+#define GUEST_TABLE_MAP_NONE 0
+#define GUEST_TABLE_MAP_NOMEM 1
+#define GUEST_TABLE_SUPER_PAGE 2
+#define GUEST_TABLE_NORMAL 3
+
+/*
+ * Take the currently mapped table, find the corresponding GFN entry,
+ * and map the next table, if available. The previous table will be
+ * unmapped if the next level was mapped (e.g GUEST_TABLE_NORMAL
+ * returned).
+ *
+ * `alloc_tbl` parameter indicates whether intermediate tables should
+ * be allocated when not present.
+ *
+ * Return values:
+ *  GUEST_TABLE_MAP_NONE: a table allocation isn't permitted.
+ *  GUEST_TABLE_MAP_NOMEM: allocating a new page failed.
+ *  GUEST_TABLE_SUPER_PAGE: next level or leaf mapped normally.
+ *  GUEST_TABLE_NORMAL: The next entry points to a superpage.
+ */
+static int p2m_next_level(struct p2m_domain *p2m, bool alloc_tbl,
+                          unsigned int level, pte_t **table,
+                          unsigned int offset)
+{
+    pte_t *entry;
+    int ret;
+    mfn_t mfn;
+
+    entry = *table + offset;
+
+    if ( !p2m_is_valid(p2m, *entry) )
+    {
+        if ( !alloc_tbl )
+            return GUEST_TABLE_MAP_NONE;
+
+        ret = p2m_create_table(p2m, entry);
+        if ( ret )
+            return GUEST_TABLE_MAP_NOMEM;
+    }
+
+    /* The function p2m_next_level() is never called at the last level */
+    ASSERT(level != 0);
+    if ( p2m_is_mapping(p2m, *entry) )
+        return GUEST_TABLE_SUPER_PAGE;
+
+    mfn = mfn_from_pte(*entry);
+
+    unmap_domain_page(*table);
+    *table = map_domain_page(mfn);
+
+    return GUEST_TABLE_NORMAL;
+}
+
+static bool p2m_split_superpage(struct p2m_domain *p2m, pte_t *entry,
+                                unsigned int level, unsigned int target,
+                                const unsigned int *offsets)
+{
+    struct page_info *page;
+    unsigned int i;
+    pte_t pte, *table;
+    bool rv = true;
+
+    /* Convenience aliases */
+    mfn_t mfn = pte_get_mfn(*entry);
+    unsigned int next_level = level - 1;
+    unsigned int level_order = XEN_PT_LEVEL_ORDER(next_level);
+
+    /*
+     * This should only be called with target != level and the entry is
+     * a superpage.
+     */
+    ASSERT(level > target);
+    ASSERT(p2m_is_superpage(p2m, *entry, level));
+
+    page = p2m_alloc_page(p2m->domain);
+    if ( !page )
+        return false;
+
+    page_list_add(page, &p2m->pages);
+    table = __map_domain_page(page);
+
+    /*
+     * We are either splitting a first level 1G page into 512 second level
+     * 2M pages, or a second level 2M page into 512 third level 4K pages.
+     */
+    for ( i = 0; i < XEN_PT_ENTRIES; i++ )
+    {
+        pte_t *new_entry = table + i;
+
+        /*
+         * Use the content of the superpage entry and override
+         * the necessary fields. So the correct permission are kept.
+         */
+        pte = *entry;
+        pte_set_mfn(&pte, mfn_add(mfn, i << level_order));
+
+        write_pte(new_entry, pte);
+    }
+
+    /*
+     * Shatter superpage in the page to the level we want to make the
+     * changes.
+     * This is done outside the loop to avoid checking the offset to
+     * know whether the entry should be shattered for every entry.
+     */
+    if ( next_level != target )
+        rv = p2m_split_superpage(p2m, table + offsets[next_level],
+                                 level - 1, target, offsets);
+
+    /* TODO: why it is necessary to have clean here? Not somewhere in the 
caller */
+    if ( p2m->clean_pte )
+        clean_dcache_va_range(table, PAGE_SIZE);
+
+    unmap_domain_page(table);
+
+    /*
+     * Even if we failed, we should install the newly allocated PTE
+     * entry. The caller will be in charge to free the sub-tree.
+     */
+    p2m_write_pte(entry, page_to_p2m_table(p2m, page), p2m->clean_pte);
+
+    return rv;
+}
+
+static void p2m_put_foreign_page(struct page_info *pg)
+{
+    /*
+     * It's safe to do the put_page here because page_alloc will
+     * flush the TLBs if the page is reallocated before the end of
+     * this loop.
+     */
+    put_page(pg);
+}
+
+/* Put any references on the single 4K page referenced by mfn. */
+static void p2m_put_4k_page(mfn_t mfn, p2m_type_t type)
+{
+    /* TODO: Handle other p2m types */
+    if ( p2m_is_foreign(type) )
+    {
+        ASSERT(mfn_valid(mfn));
+        p2m_put_foreign_page(mfn_to_page(mfn));
+    }
+    /* Detect the xenheap page and mark the stored GFN as invalid. */
+    else if ( p2m_is_ram(type) && is_xen_heap_mfn(mfn) )
+        page_set_xenheap_gfn(mfn_to_page(mfn), INVALID_GFN);
+}
+
+/* Put any references on the superpage referenced by mfn. */
+static void p2m_put_2m_superpage(mfn_t mfn, p2m_type_t type)
+{
+    struct page_info *pg;
+    unsigned int i;
+
+    /*
+     * TODO: Handle other p2m types, but be aware that any changes to handle
+     * different types should require an update on the relinquish code to 
handle
+     * preemption.
+     */
+    if ( !p2m_is_foreign(type) )
+        return;
+
+    ASSERT(mfn_valid(mfn));
+
+    pg = mfn_to_page(mfn);
+
+    for ( i = 0; i < XEN_PT_ENTRIES; i++, pg++ )
+        p2m_put_foreign_page(pg);
+}
+
+/* Put any references on the page referenced by pte. */
+static void p2m_put_page(struct p2m_domain *p2m, const pte_t pte,
+                         unsigned int level)
+{
+    mfn_t mfn = pte_get_mfn(pte);
+    p2m_type_t p2m_type = p2m_type_radix_get(p2m, pte);
+
+    ASSERT(p2m_is_valid(p2m, pte));
+
+    /*
+     * TODO: Currently we don't handle level 2 super-page, Xen is not
+     * preemptible and therefore some work is needed to handle such
+     * superpages, for which at some point Xen might end up freeing memory
+     * and therefore for such a big mapping it could end up in a very long
+     * operation.
+     */
+    if ( level == 1 )
+        return p2m_put_2m_superpage(mfn, p2m_type);
+    else if ( level == 0 )
+        return p2m_put_4k_page(mfn, p2m_type);
+}
+
+static void p2m_free_page(struct domain *d, struct page_info *pg)
+{
+    if ( is_hardware_domain(d) )
+        free_domheap_page(pg);
+    else
+    {
+        spin_lock(&d->arch.paging.lock);
+        page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
+        spin_unlock(&d->arch.paging.lock);
+    }
+}
+
+/* Free pte sub-tree behind an entry */
+static void p2m_free_entry(struct p2m_domain *p2m,
+                           pte_t entry, unsigned int level)
+{
+    unsigned int i;
+    pte_t *table;
+    mfn_t mfn;
+    struct page_info *pg;
+
+    /* Nothing to do if the entry is invalid. */
+    if ( !p2m_is_valid(p2m, entry) )
+        return;
+
+    if ( p2m_is_superpage(p2m, entry, level) || (level == 0) )
+    {
+#ifdef CONFIG_IOREQ_SERVER
+        /*
+         * If this gets called then either the entry was replaced by an entry
+         * with a different base (valid case) or the shattering of a superpage
+         * has failed (error case).
+         * So, at worst, the spurious mapcache invalidation might be sent.
+         */
+        if ( p2m_is_ram( p2m_type_radix_get(p2m, entry)) &&
+             domain_has_ioreq_server(p2m->domain) )
+            ioreq_request_mapcache_invalidate(p2m->domain);
+#endif
+
+        p2m_put_page(p2m, entry, level);
+
+        return;
+    }
+
+    table = map_domain_page(pte_get_mfn(entry));
+    for ( i = 0; i < XEN_PT_ENTRIES; i++ )
+        p2m_free_entry(p2m, *(table + i), level - 1);
+
+    unmap_domain_page(table);
+
+    /*
+     * Make sure all the references in the TLB have been removed before
+     * freing the intermediate page table.
+     * XXX: Should we defer the free of the page table to avoid the
+     * flush?
+     */
+    p2m_tlb_flush_sync(p2m);
+
+    mfn = pte_get_mfn(entry);
+    ASSERT(mfn_valid(mfn));
+
+    pg = mfn_to_page(mfn);
+
+    page_list_del(pg, &p2m->pages);
+    p2m_free_page(p2m->domain, pg);
+}
+
+/*
+ * Insert an entry in the p2m. This should be called with a mapping
+ * equal to a page/superpage.
+ */
+static int __p2m_set_entry(struct p2m_domain *p2m,
+                           gfn_t sgfn,
+                           unsigned int page_order,
+                           mfn_t smfn,
+                           p2m_type_t t,
+                           p2m_access_t a)
+{
+    unsigned int level;
+    unsigned int target = page_order / PAGETABLE_ORDER;
+    pte_t *entry, *table, orig_pte;
+    int rc;
+    /* A mapping is removed if the MFN is invalid. */
+    bool removing_mapping = mfn_eq(smfn, INVALID_MFN);
+    DECLARE_OFFSETS(offsets, gfn_to_gaddr(sgfn));
+
+    ASSERT(p2m_is_write_locked(p2m));
+
+    /*
+     * Check if the level target is valid: we only support
+     * 4K - 2M - 1G mapping.
+     */
+    ASSERT(target <= 2);
+
+    table = p2m_get_root_pointer(p2m, sgfn);
+    if ( !table )
+        return -EINVAL;
+
+    for ( level = P2M_ROOT_LEVEL; level > target; level-- )
+    {
+        /*
+         * Don't try to allocate intermediate page table if the mapping
+         * is about to be removed.
+         */
+        rc = p2m_next_level(p2m, !removing_mapping,
+                            level, &table, offsets[level]);
+        if ( (rc == GUEST_TABLE_MAP_NONE) || (rc == GUEST_TABLE_MAP_NOMEM) )
+        {
+            /*
+             * We are here because p2m_next_level has failed to map
+             * the intermediate page table (e.g the table does not exist
+             * and they p2m tree is read-only). It is a valid case
+             * when removing a mapping as it may not exist in the
+             * page table. In this case, just ignore it.
+             */
+            rc = removing_mapping ?  0 : -ENOENT;
+            goto out;
+        }
+        else if ( rc != GUEST_TABLE_NORMAL )
+            break;
+    }
+
+    entry = table + offsets[level];
+
+    /*
+     * If we are here with level > target, we must be at a leaf node,
+     * and we need to break up the superpage.
+     */
+    if ( level > target )
+    {
+        /* We need to split the original page. */
+        pte_t split_pte = *entry;
+
+        ASSERT(p2m_is_superpage(p2m, *entry, level));
+
+        if ( !p2m_split_superpage(p2m, &split_pte, level, target, offsets) )
+        {
+            /* Free the allocated sub-tree */
+            p2m_free_entry(p2m, split_pte, level);
+
+            rc = -ENOMEM;
+            goto out;
+        }
+
+        /* Follow the break-before-sequence to update the entry. */
+        p2m_remove_pte(entry, p2m->clean_pte);
+        p2m_force_tlb_flush_sync(p2m);
+
+        p2m_write_pte(entry, split_pte, p2m->clean_pte);
+
+        /* Then move to the level we want to make real changes */
+        for ( ; level < target; level++ )
+        {
+            rc = p2m_next_level(p2m, true, level, &table, offsets[level]);
+
+            /*
+             * The entry should be found and either be a table
+             * or a superpage if level 0 is not targeted
+             */
+            ASSERT(rc == GUEST_TABLE_NORMAL ||
+                   (rc == GUEST_TABLE_SUPER_PAGE && target > 0));
+        }
+
+        entry = table + offsets[level];
+    }
+
+    /*
+     * We should always be there with the correct level because
+     * all the intermediate tables have been installed if necessary.
+     */
+    ASSERT(level == target);
+
+    orig_pte = *entry;
+
+    /*
+     * The access type should always be p2m_access_rwx when the mapping
+     * is removed.
+     */
+    ASSERT(!mfn_eq(INVALID_MFN, smfn) || (a == p2m_access_rwx));
+
+    /*
+     * Always remove the entry in order to follow the break-before-make
+     * sequence when updating the translation table.
+     */
+    if ( pte_is_valid(orig_pte) || removing_mapping )
+        p2m_remove_pte(entry, p2m->clean_pte);
+
+    if ( removing_mapping )
+        /* Flush can be deferred if the entry is removed */
+        p2m->need_flush |= !!pte_is_valid(orig_pte);
+    else
+    {
+        pte_t pte = p2m_entry_from_mfn(p2m, smfn, t, a);
+
+        /*
+         * It is necessary to flush the TLB before writing the new entry
+         * to keep coherency when the previous entry was valid.
+         *
+         * Although, it could be defered when only the permissions are
+         * changed (e.g in case of memaccess).
+         */
+        if ( pte_is_valid(orig_pte) )
+        {
+            if ( P2M_CLEAR_PERM(pte) != P2M_CLEAR_PERM(orig_pte) )
+                p2m_force_tlb_flush_sync(p2m);
+            else
+                p2m->need_flush = true;
+        }
+
+        p2m_write_pte(entry, pte, p2m->clean_pte);
+
+        p2m->max_mapped_gfn = gfn_max(p2m->max_mapped_gfn,
+                                      gfn_add(sgfn, (1UL << page_order) - 1));
+        p2m->lowest_mapped_gfn = gfn_min(p2m->lowest_mapped_gfn, sgfn);
+    }
+
+#ifdef CONFIG_HAS_PASSTHROUGH
+    if ( is_iommu_enabled(p2m->domain) &&
+         (pte_is_valid(orig_pte) || pte_is_valid(*entry)) )
+    {
+        unsigned int flush_flags = 0;
+
+        if ( pte_is_valid(orig_pte) )
+            flush_flags |= IOMMU_FLUSHF_modified;
+        if ( pte_is_valid(*entry) )
+            flush_flags |= IOMMU_FLUSHF_added;
+
+        rc = iommu_iotlb_flush(p2m->domain, _dfn(gfn_x(sgfn)),
+                               1UL << page_order, flush_flags);
+    }
+    else
+#endif
+        rc = 0;
+
+    /*
+     * Free the entry only if the original pte was valid and the base
+     * is different (to avoid freeing when permission is changed).
+     */
+    if ( p2m_is_valid(p2m, orig_pte) &&
+         !mfn_eq(pte_get_mfn(*entry), pte_get_mfn(orig_pte)) )
+        p2m_free_entry(p2m, orig_pte, level);
+
+out:
+    unmap_domain_page(table);
+
+    return rc;
+}
+
+int p2m_set_entry(struct p2m_domain *p2m,
+                  gfn_t sgfn,
+                  unsigned long nr,
+                  mfn_t smfn,
+                  p2m_type_t t,
+                  p2m_access_t a)
+{
+    int rc = 0;
+
+    /*
+     * Any reference taken by the P2M mappings (e.g. foreign mapping) will
+     * be dropped in relinquish_p2m_mapping(). As the P2M will still
+     * be accessible after, we need to prevent mapping to be added when the
+     * domain is dying.
+     */
+    if ( unlikely(p2m->domain->is_dying) )
+        return -ENOMEM;
+
+    while ( nr )
+    {
+        unsigned long mask;
+        unsigned long order = 0;
+        /* 1gb, 2mb, 4k mappings are supported */
+        unsigned int i = ( P2M_ROOT_LEVEL > 2 ) ? 2 : P2M_ROOT_LEVEL;
+
+        /*
+         * Don't take into account the MFN when removing mapping (i.e
+         * MFN_INVALID) to calculate the correct target order.
+         *
+         * XXX: Support superpage mappings if nr is not aligned to a
+         * superpage size.
+         */
+        mask = !mfn_eq(smfn, INVALID_MFN) ? mfn_x(smfn) : 0;
+        mask |= gfn_x(sgfn) | nr;
+
+        for ( ; i != 0; i-- )
+        {
+            if ( !(mask & (BIT(XEN_PT_LEVEL_ORDER(i), UL) - 1)) )
+            {
+                    order = XEN_PT_LEVEL_ORDER(i);
+                    break;
+            }
+        }
+
+        rc = __p2m_set_entry(p2m, sgfn, order, smfn, t, a);
+        if ( rc )
+            break;
+
+        sgfn = gfn_add(sgfn, (1 << order));
+        if ( !mfn_eq(smfn, INVALID_MFN) )
+           smfn = mfn_add(smfn, (1 << order));
+
+        nr -= (1 << order);
+    }
+
+    return rc;
+}
+
+static int p2m_insert_mapping(struct domain *d, gfn_t start_gfn,
+                              unsigned long nr, mfn_t mfn, p2m_type_t t)
+{
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    int rc;
+
+    p2m_write_lock(p2m);
+    rc = p2m_set_entry(p2m, start_gfn, nr, mfn, t, p2m->default_access);
+    p2m_write_unlock(p2m);
+
+    return rc;
+}
+
+int map_regions_p2mt(struct domain *d,
+                     gfn_t gfn,
+                     unsigned long nr,
+                     mfn_t mfn,
+                     p2m_type_t p2mt)
+{
+    return p2m_insert_mapping(d, gfn, nr, mfn, p2mt);
+}
+
+int guest_physmap_add_entry(struct domain *d,
+                            gfn_t gfn,
+                            mfn_t mfn,
+                            unsigned long page_order,
+                            p2m_type_t t)
+{
+    return p2m_insert_mapping(d, gfn, (1 << page_order), mfn, t);
+}
-- 
2.49.0

[PATCH v1 6/6] xen/riscv: implement p2m mapping functionality

Reply via email to