On 24.11.2025 13:33, Oleksii Kurochko wrote:
> @@ -374,24 +399,107 @@ static struct page_info *p2m_alloc_page(struct
> p2m_domain *p2m)
> return pg;
> }
>
> -static int p2m_set_type(pte_t *pte, p2m_type_t t)
> +/*
> + * `pte` – PTE entry for which the type `t` will be stored.
> + *
> + * If `t` is `p2m_ext_storage`, both `ctx` and `p2m` must be provided;
> + * otherwise, only p2m may be NULL.
> + */
> +static void p2m_set_type(pte_t *pte, p2m_type_t t,
> + struct p2m_pte_ctx *ctx,
> + struct p2m_domain *p2m)
I assume you having struct p2m_domain * separate from ctx here is because the
"get" counterpart wouldn't need it. The same is true for the level member,
though. I wonder therefore whether putting p2m in pte_ctx as well wouldn't
make for an overall cleaner interface. (But this is truly just a thought; I
don#t mean to insist here.)
> {
> - int rc = 0;
> + struct page_info **md_pg;
> + struct md_t *metadata = NULL;
>
> - if ( t > p2m_first_external )
> - panic("unimplemeted\n");
> - else
> - pte->pte |= MASK_INSR(t, P2M_TYPE_PTE_BITS_MASK);
> + ASSERT(p2m);
>
> - return rc;
> + /*
> + * It is sufficient to compare ctx->index with PAGETABLE_ENTRIES because,
> + * even for the p2m root page table (which is a 16 KB page allocated as
> + * four 4 KB pages), calc_offset() guarantees that the page-table index
> + * will always fall within the range [0, 511].
> + */
> + ASSERT(ctx && ctx->index < PAGETABLE_ENTRIES);
> +
> + /*
> + * At the moment, p2m_get_root_pointer() returns one of four possible p2m
> + * root pages, so there is no need to search for the correct ->pt_page
> + * here.
> + * Non-root page tables are 4 KB pages, so simply using ->pt_page is
> + * sufficient.
> + */
> + md_pg = &ctx->pt_page->v.md.pg;
> +
> + if ( !*md_pg && (t >= p2m_first_external) )
> + {
> + BUG_ON(ctx->level > P2M_MAX_SUPPORTED_LEVEL_MAPPING);
> +
> + if ( ctx->level <= P2M_MAX_SUPPORTED_LEVEL_MAPPING )
> + {
> + /*
> + * Since p2m_alloc_page() initializes an allocated page with
> zeros, p2m_invalid
> + * is expected to have the value 0 as well. This ensures that if
> a metadata
> + * page is accessed before being properly initialized, the
> correct type
> + * (p2m_invalid in this case) will be returned.
> + */
Nit: Line length.
Also imo "properly initialized" is ambiguous. The clearing of the page can
already
count as such. No access to the page may happen ahead of this clearing.
> + BUILD_BUG_ON(p2m_invalid);
> +
> + *md_pg = p2m_alloc_page(p2m);
> + if ( !*md_pg )
> + {
> + printk("%pd: can't allocate metadata page\n", p2m->domain);
> + domain_crash(p2m->domain);
> +
> + return;
> + }
> + }
> + }
> +
> + if ( *md_pg )
> + metadata = __map_domain_page(*md_pg);
> +
> + if ( t >= p2m_first_external )
> + {
> + metadata[ctx->index].type = t;
> +
> + t = p2m_ext_storage;
> + }
> + else if ( metadata )
> + metadata[ctx->index].type = p2m_invalid;
> +
> + pte->pte |= MASK_INSR(t, P2M_TYPE_PTE_BITS_MASK);
> +
> + unmap_domain_page(metadata);
> }
Just to mention (towards future work): Once a metadata page goes back to be
entirely zero-filled, it could as well be hooked off and returned to the pool.
Not doing so may mean detaining an unused page indefinitely.
> -static p2m_type_t p2m_get_type(const pte_t pte)
> +/*
> + * `pte` -> PTE entry that stores the PTE's type.
> + *
> + * If the PTE's type is `p2m_ext_storage`, `ctx` should be provided;
> + * otherwise it could be NULL.
> + */
> +static p2m_type_t p2m_get_type(const pte_t pte, const struct p2m_pte_ctx
> *ctx)
> {
> p2m_type_t type = MASK_EXTR(pte.pte, P2M_TYPE_PTE_BITS_MASK);
>
> + /*
> + * Since the PTE is initialized with all zeros by default, p2m_invalid
> must
> + * have the value 0. This ensures that if p2m_get_type() is called for a
> GFN
> + * that hasn't been initialized, the correct type (p2m_invalid in this
> case)
> + * will be returned. It also guarantees that metadata won't be touched
> when
> + * the GFN hasn't been initialized.
> + */
> + BUILD_BUG_ON(p2m_invalid);
I don't think comment and BUILD_BUG_ON() need repeating here. That's relevant
only when (zero-)initializing the page.
> if ( type == p2m_ext_storage )
> - panic("unimplemented\n");
> + {
> + const struct md_t *md = __map_domain_page(ctx->pt_page->v.md.pg);
> +
> + type = md[ctx->index].type;
In exchange you may want to assert here that the type found is
>= p2m_first_external (as - supposedly - guaranteed by p2m_set_type()).
> @@ -792,6 +952,13 @@ static bool p2m_split_superpage(struct p2m_domain *p2m,
> pte_t *entry,
> pte = *entry;
> pte_set_mfn(&pte, mfn_add(mfn, i << level_order));
>
> + if ( MASK_EXTR(pte.pte, P2M_TYPE_PTE_BITS_MASK) == p2m_ext_storage )
> + {
> + p2m_pte_ctx.index = i;
> +
> + p2m_set_type(&pte, old_type, &p2m_pte_ctx, p2m);
In order to re-use p2m_pte_ctx across multiple iterations without fully re-
initializing, you want the respective parameter of p2m_set_type() be pointer-
to-const.
> @@ -894,13 +1061,21 @@ static int p2m_set_entry(struct p2m_domain *p2m,
> {
> /* We need to split the original page. */
> pte_t split_pte = *entry;
> + struct page_info *tbl_pg =
> mfn_to_page(domain_page_map_to_mfn(table));
>
> ASSERT(pte_is_superpage(*entry, level));
>
> - if ( !p2m_split_superpage(p2m, &split_pte, level, target, offsets) )
> + if ( !p2m_split_superpage(p2m, &split_pte, level, target, offsets,
> + tbl_pg) )
> {
> + struct p2m_pte_ctx tmp_ctx = {
> + .pt_page = tbl_pg,
> + .index = offsets[level],
> + .level = level,
> + };
This, ...
> @@ -938,7 +1113,13 @@ static int p2m_set_entry(struct p2m_domain *p2m,
> p2m_clean_pte(entry, p2m->clean_dcache);
> else
> {
> - pte_t pte = p2m_pte_from_mfn(mfn, t, false);
> + struct p2m_pte_ctx tmp_ctx = {
> + .pt_page = mfn_to_page(domain_page_map_to_mfn(table)),
> + .index = offsets[level],
> + .level = level,
> + };
... this, and ...
> @@ -974,7 +1155,15 @@ static int p2m_set_entry(struct p2m_domain *p2m,
> if ( pte_is_valid(orig_pte) &&
> (!pte_is_valid(*entry) ||
> !mfn_eq(pte_get_mfn(*entry), pte_get_mfn(orig_pte))) )
> - p2m_free_subtree(p2m, orig_pte, level);
> + {
> + struct p2m_pte_ctx tmp_ctx = {
> + .pt_page = mfn_to_page(domain_page_map_to_mfn(table)),
> + .index = offsets[level],
> + .level = level,
> + };
... this initializer are identical. Perhaps (sorry) it wasn't a good idea
after all to move the context variable from function scope to the more
narrow ones?
Jan