Hi Jason,

On Mon, May 05, 2025 at 11:18:37AM -0300, Jason Gunthorpe wrote:
> map is slightly complicated because it has to handle a number of special
> edge cases:
>  - Overmapping a previously shared table with an OA - requries validating
>    and freeing the possibly empty tables
>  - Doing the above across an entire to-be-created contiguous entry
>  - Installing a new shared table level concurrently with another thread
>  - Expanding the table by adding more top levels
> 
> Table expansion is a unique feature of AMDv1, this version is quite
> similar except we handle racing concurrent lockless map. The table top
> pointer and starting level are encoded in a single uintptr_t which ensures
> we can READ_ONCE() without tearing. Any op will do the READ_ONCE() and use
> that fixed point as its starting point. Concurrent expansion is handled
> with a table global spinlock.
> 
> When inserting a new table entry map checks that the entire portion of the
> table is empty. This includes freeing any empty lower tables that will be
> overwritten by an OA. A separate free list is used while checking and
> collecting all the empty lower tables so that writing the new entry is
> uninterrupted, either the new entry fully writes or nothing changes.
> 
> A special fast path for PAGE_SIZE is implemented that does a direct walk
> to the leaf level and installs a single entry. This gives ~15% improvement
> for iommu_map() when mapping lists of single pages.
> 
> This version sits under the iommu_domain_ops as map_pages() but does not
> require the external page size calculation. The implementation is actually
> map_range() and can do arbitrary ranges, internally handling all the
> validation and supporting any arrangment of page sizes. A future series
> can optimize iommu_map() to take advantage of this.
> 
> Signed-off-by: Jason Gunthorpe <j...@nvidia.com>
> ---
>  drivers/iommu/generic_pt/iommu_pt.h | 481 ++++++++++++++++++++++++++++
>  include/linux/generic_pt/iommu.h    |  58 ++++
>  2 files changed, 539 insertions(+)
> 
> diff --git a/drivers/iommu/generic_pt/iommu_pt.h 
> b/drivers/iommu/generic_pt/iommu_pt.h
> index aa15fb79abf24a..7a680017f35bff 100644
> --- a/drivers/iommu/generic_pt/iommu_pt.h
> +++ b/drivers/iommu/generic_pt/iommu_pt.h
> @@ -90,6 +90,23 @@ static __maybe_unused int make_range_u64(struct pt_common 
> *common,
>               ret;                                                     \
>       })
>  
> +static inline unsigned int compute_best_pgsize(struct pt_state *pts,
> +                                            pt_oaddr_t oa)
> +{
> +     struct pt_iommu *iommu_table = iommu_from_common(pts->range->common);
> +
> +     if (!pt_can_have_leaf(pts))
> +             return 0;
> +
> +     /*
> +      * The page size is limited by the domain's bitmap. This allows the core
> +      * code to reduce the supported page sizes by changing the bitmap.
> +      */
> +     return pt_compute_best_pgsize(pt_possible_sizes(pts) &
> +                                           iommu_table->domain.pgsize_bitmap,
> +                                   pts->range->va, pts->range->last_va, oa);
> +}
> +
>  static __always_inline int __do_iova_to_phys(struct pt_range *range, void 
> *arg,
>                                            unsigned int level,
>                                            struct pt_table_p *table,
> @@ -189,6 +206,463 @@ static inline struct pt_table_p *table_alloc_top(struct 
> pt_common *common,
>               log2_to_int(pt_top_memsize_lg2(common, top_of_table)));
>  }
>  
> +/* Allocate an interior table */
> +static inline struct pt_table_p *table_alloc(const struct pt_state 
> *parent_pts,
> +                                          gfp_t gfp)
> +{
> +     struct pt_iommu *iommu_table =
> +             iommu_from_common(parent_pts->range->common);
> +     struct pt_state child_pts =
> +             pt_init(parent_pts->range, parent_pts->level - 1, NULL);
> +
> +     return iommu_alloc_pages_node_sz(
> +             iommu_table->nid, gfp,
> +             log2_to_int(pt_num_items_lg2(&child_pts) +
> +                         ilog2(PT_ENTRY_WORD_SIZE)));
> +}
> +
> +static inline int pt_iommu_new_table(struct pt_state *pts,
> +                                  struct pt_write_attrs *attrs)
> +{
> +     struct pt_table_p *table_mem;
> +     phys_addr_t phys;
> +
> +     /* Given PA/VA/length can't be represented */
> +     if (unlikely(!pt_can_have_table(pts)))
> +             return -ENXIO;
> +
> +     table_mem = table_alloc(pts, attrs->gfp);
> +     if (IS_ERR(table_mem))
> +             return PTR_ERR(table_mem);
> +
> +     phys = virt_to_phys(table_mem);
> +     if (!pt_install_table(pts, phys, attrs)) {
> +             iommu_free_pages(table_mem);
> +             return -EAGAIN;
> +     }
> +
> +     if (IS_ENABLED(CONFIG_DEBUG_GENERIC_PT)) {
> +             /*
> +              * The underlying table can't store the physical table address.
> +              * This happens when kunit testing tables outside their normal
> +              * environment where a CPU might be limited.
> +              */
> +             pt_load_single_entry(pts);
> +             if (PT_WARN_ON(pt_table_pa(pts) != phys)) {
> +                     pt_clear_entry(pts, ilog2(1));
> +                     iommu_free_pages(table_mem);
> +                     return -EINVAL;
> +             }
> +     }
> +
> +     pts->table_lower = table_mem;
> +     return 0;
> +}
> +
> +struct pt_iommu_map_args {
> +     struct iommu_iotlb_gather *iotlb_gather;
> +     struct pt_write_attrs attrs;
> +     pt_oaddr_t oa;
> +     unsigned int leaf_pgsize_lg2;
> +     unsigned int leaf_level;
> +};
> +
> +/*
> + * Check that the items in a contiguous block are all empty. This will
> + * recursively check any tables in the block to validate they are empty and
> + * accumulate them on the free list. Makes no change on failure. On success
> + * caller must fill the items.
> + */
> +static int clear_contig(const struct pt_state *start_pts,
> +                     struct iommu_iotlb_gather *iotlb_gather,
> +                     unsigned int step, unsigned int pgsize_lg2)
> +{
> +     struct pt_iommu *iommu_table =
> +             iommu_from_common(start_pts->range->common);
> +     struct pt_range range = *start_pts->range;
> +     struct pt_state pts =
> +             pt_init(&range, start_pts->level, start_pts->table);
> +     struct pt_iommu_collect_args collect = {
> +             .free_list = IOMMU_PAGES_LIST_INIT(collect.free_list),
> +     };
> +     pt_vaddr_t start_va = range.va;
> +     int ret;
> +
> +     pts.index = start_pts->index;
> +     pts.end_index = start_pts->index + step;
> +     for (; _pt_iter_load(&pts); pt_next_entry(&pts)) {
> +             if (pts.type == PT_ENTRY_TABLE) {
> +                     ret = pt_walk_descend_all(&pts, __collect_tables,
> +                                               &collect);
> +                     if (ret)
> +                             return ret;
> +
> +                     iommu_pages_list_add(&collect.free_list,
> +                                          pt_table_ptr(&pts));
> +             } else if (pts.type != PT_ENTRY_EMPTY) {
> +                     return -EADDRINUSE;
> +             }
> +     }
> +
> +     if (!iommu_pages_list_empty(&collect.free_list)) {
> +             gather_range(iotlb_gather, iommu_table, start_va,
> +                          range.va - start_va);
> +             iommu_pages_list_splice(&collect.free_list,
> +                                     &iotlb_gather->freelist);
> +     }
> +     return 0;
> +}
> +
> +static int __map_range_leaf(struct pt_range *range, void *arg,
> +                         unsigned int level, struct pt_table_p *table)
> +{
> +     struct pt_state pts = pt_init(range, level, table);
> +     struct pt_iommu_map_args *map = arg;
> +     unsigned int leaf_pgsize_lg2 = map->leaf_pgsize_lg2;
> +     unsigned int start_index;
> +     pt_oaddr_t oa = map->oa;
> +     unsigned int step;
> +     bool need_contig;
> +
> +     PT_WARN_ON(map->leaf_level != level);
> +     PT_WARN_ON(!pt_can_have_leaf(&pts));
> +
> +     step = log2_to_int_t(unsigned int,
> +                          leaf_pgsize_lg2 - pt_table_item_lg2sz(&pts));
> +     need_contig = leaf_pgsize_lg2 != pt_table_item_lg2sz(&pts);
> +
> +     _pt_iter_first(&pts);
> +     start_index = pts.index;
> +     do {
> +             pts.type = pt_load_entry_raw(&pts);
> +             if (pts.type != PT_ENTRY_EMPTY || need_contig) {
> +                     int ret;
> +
> +                     if (pts.index != start_index)
> +                             pt_index_to_va(&pts);
> +                     ret = clear_contig(&pts, map->iotlb_gather, step,
> +                                        leaf_pgsize_lg2);
> +                     if (ret) {
> +                             map->oa = oa;
> +                             return ret;
> +                     }
> +             }
> +
> +             PT_WARN_ON(compute_best_pgsize(&pts, oa) != leaf_pgsize_lg2);
> +
> +             pt_install_leaf_entry(&pts, oa, leaf_pgsize_lg2, &map->attrs);
> +
> +             oa += log2_to_int(leaf_pgsize_lg2);
> +             pts.index += step;
> +     } while (pts.index < pts.end_index);
> +
> +     map->oa = oa;
> +     return 0;
> +}
> +
> +static int __map_range(struct pt_range *range, void *arg, unsigned int level,
> +                    struct pt_table_p *table)
> +{
> +     struct pt_state pts = pt_init(range, level, table);
> +     struct pt_iommu_map_args *map = arg;
> +     int ret;
> +
> +     PT_WARN_ON(map->leaf_level == level);
> +     PT_WARN_ON(!pt_can_have_table(&pts));
> +
> +     _pt_iter_first(&pts);
> +
> +     /* Descend to a child table */
> +     do {
> +             pts.type = pt_load_entry_raw(&pts);
> +
> +             if (pts.type != PT_ENTRY_TABLE) {
> +                     if (pts.type != PT_ENTRY_EMPTY)
> +                             return -EADDRINUSE;
> +                     ret = pt_iommu_new_table(&pts, &map->attrs);
> +                     if (ret) {
> +                             /*
> +                              * Racing with another thread installing a table
> +                              */
> +                             if (ret == -EAGAIN)
> +                                     continue;
> +                             return ret;
> +                     }
> +             } else {
> +                     pts.table_lower = pt_table_ptr(&pts);
> +             }
> +
> +             /*
> +              * The already present table can possibly be shared with another
> +              * concurrent map.
> +              */
> +             if (map->leaf_level == level - 1)
> +                     ret = pt_descend(&pts, arg, __map_range_leaf);
> +             else
> +                     ret = pt_descend(&pts, arg, __map_range);
> +             if (ret)
> +                     return ret;
> +
> +             pts.index++;
> +             pt_index_to_va(&pts);
> +             if (pts.index >= pts.end_index)
> +                     break;
> +             pts.type = pt_load_entry_raw(&pts);
> +     } while (true);
> +     return 0;
> +}
> +
> +static __always_inline int __do_map_single_page(struct pt_range *range,
> +                                             void *arg, unsigned int level,
> +                                             struct pt_table_p *table,
> +                                             pt_level_fn_t descend_fn)
> +{
> +     struct pt_state pts = pt_init(range, level, table);
> +     struct pt_iommu_map_args *map = arg;
> +
> +     pts.type = pt_load_single_entry(&pts);
> +     if (level == 0) {
> +             if (pts.type != PT_ENTRY_EMPTY)
> +                     return -EADDRINUSE;
> +             pt_install_leaf_entry(&pts, map->oa, PAGE_SHIFT,
> +                                   &map->attrs);
> +             map->oa += PAGE_SIZE;
> +             return 0;
> +     }
> +     if (pts.type != PT_ENTRY_TABLE)
> +             return -EAGAIN;
> +     return pt_descend(&pts, arg, descend_fn);
> +}
> +PT_MAKE_LEVELS(__map_single_page, __do_map_single_page);
> +
> +/*
> + * Add a table to the top, increasing the top level as much as necessary to
> + * encompass range.
> + */
> +static int increase_top(struct pt_iommu *iommu_table, struct pt_range *range,
> +                     struct pt_iommu_map_args *map)
> +{
> +     struct iommu_pages_list free_list = IOMMU_PAGES_LIST_INIT(free_list);
> +     struct pt_common *common = common_from_iommu(iommu_table);
> +     uintptr_t top_of_table = READ_ONCE(common->top_of_table);
> +     uintptr_t new_top_of_table = top_of_table;
> +     struct pt_table_p *table_mem;
> +     unsigned int new_level;
> +     spinlock_t *domain_lock;
> +     unsigned long flags;
> +     int ret;
> +
> +     while (true) {
> +             struct pt_range top_range =
> +                     _pt_top_range(common, new_top_of_table);
> +             struct pt_state pts = pt_init_top(&top_range);
> +
> +             top_range.va = range->va;
> +             top_range.last_va = range->last_va;
> +
> +             if (!pt_check_range(&top_range) && map->leaf_level <= pts.level)
> +                     break;
> +
> +             pts.level++;
> +             if (pts.level > PT_MAX_TOP_LEVEL ||
> +                 pt_table_item_lg2sz(&pts) >= common->max_vasz_lg2) {
> +                     ret = -ERANGE;
> +                     goto err_free;
> +             }
> +
> +             new_level = pts.level;
> +             table_mem = table_alloc_top(
> +                     common, _pt_top_set(NULL, pts.level), map->attrs.gfp);
> +             if (IS_ERR(table_mem))
> +                     return PTR_ERR(table_mem);

For subsequent calls for while, it should necessitate invoking goto, correct?

> +             iommu_pages_list_add(&free_list, table_mem);
> +
> +             /* The new table links to the lower table always at index 0 */
> +             top_range.va = 0;
> +             top_range.top_level = new_level;
> +             pts.table_lower = pts.table;
> +             pts.table = table_mem;
> +             pt_load_single_entry(&pts);
> +             PT_WARN_ON(pts.index != 0);
> +             pt_install_table(&pts, virt_to_phys(pts.table_lower),
> +                              &map->attrs);
> +             new_top_of_table = _pt_top_set(pts.table, pts.level);
> +     }
> +
> +     /*
> +      * top_of_table is write locked by the spinlock, but readers can use
> +      * READ_ONCE() to get the value. Since we encode both the level and the
> +      * pointer in one quanta the lockless reader will always see something
> +      * valid. The HW must be updated to the new level under the spinlock
> +      * before top_of_table is updated so that concurrent readers don't map
> +      * into the new level until it is fully functional. If another thread
> +      * already updated it while we were working then throw everything away
> +      * and try again.
> +      */
> +     domain_lock = iommu_table->hw_flush_ops->get_top_lock(iommu_table);
> +     spin_lock_irqsave(domain_lock, flags);
> +     if (common->top_of_table != top_of_table) {
> +             spin_unlock_irqrestore(domain_lock, flags);
> +             ret = -EAGAIN;
> +             goto err_free;
> +     }
> +
> +     /*
> +      * We do not issue any flushes for change_top on the expectation that
> +      * any walk cache will not become a problem by adding another layer to
> +      * the tree. Misses will rewalk from the updated top pointer, hits
> +      * continue to be correct. Negative caching is fine too since all the
> +      * new IOVA added by the new top is non-present.
> +      */
> +     iommu_table->hw_flush_ops->change_top(
> +             iommu_table, virt_to_phys(table_mem), new_level);
> +     WRITE_ONCE(common->top_of_table, new_top_of_table);
> +     spin_unlock_irqrestore(domain_lock, flags);
> +     return 0;
> +
> +err_free:
> +     iommu_put_pages_list(&free_list);
> +     return ret;
> +}
> +
> +static int check_map_range(struct pt_iommu *iommu_table, struct pt_range 
> *range,
> +                        struct pt_iommu_map_args *map)
> +{
> +     struct pt_common *common = common_from_iommu(iommu_table);
> +     int ret;
> +
> +     do {
> +             ret = pt_check_range(range);
> +             if (!pt_feature(common, PT_FEAT_DYNAMIC_TOP))
> +                     return ret;
> +
> +             if (!ret && map->leaf_level <= range->top_level)
> +                     break;
> +
> +             ret = increase_top(iommu_table, range, map);
> +             if (ret && ret != -EAGAIN)
> +                     return ret;
> +
> +             /* Reload the new top */
> +             *range = pt_make_range(common, range->va, range->last_va);
> +     } while (ret);
> +     PT_WARN_ON(pt_check_range(range));
> +     return 0;
> +}
> +
> +/**
> + * map_range() - Install translation for an IOVA range
> + * @iommu_table: Table to manipulate
> + * @iova: IO virtual address to start
> + * @paddr: Physical/Output address to start
> + * @len: Length of the range starting from @iova
> + * @prot: A bitmap of IOMMU_READ/WRITE/CACHE/NOEXEC/MMIO
> + * @gfp: GFP flags for any memory allocations
> + * @gather: Gather struct that must be flushed on return
> + *
> + * The range starting at IOVA will have paddr installed into it. The rage is
> + * automatically segmented into optimally sized table entries, and can have 
> any
> + * valid alignment.
> + *
> + * On error the caller will probably want to invoke unmap on the range from 
> iova
> + * up to the amount indicated by @mapped to return the table back to an
> + * unchanged state.
> + *
> + * Context: The caller must hold a write range lock that includes the whole
> + * range.
> + *
> + * Returns: -ERRNO on failure, 0 on success. The number of bytes of VA that 
> were
> + * mapped are added to @mapped, @mapped is not zerod first.
> + */
> +int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,
> +                      phys_addr_t paddr, size_t pgsize, size_t pgcount,
> +                      int prot, gfp_t gfp, size_t *mapped)
> +{
> +     struct pt_iommu *iommu_table =
> +             container_of(domain, struct pt_iommu, domain);
> +     pt_vaddr_t pgsize_bitmap = iommu_table->domain.pgsize_bitmap;
> +     struct pt_common *common = common_from_iommu(iommu_table);
> +     struct iommu_iotlb_gather iotlb_gather;
> +     pt_vaddr_t len = pgsize * pgcount;
> +     struct pt_iommu_map_args map = {
> +             .iotlb_gather = &iotlb_gather,
> +             .oa = paddr,
> +             .leaf_pgsize_lg2 = log2_ffs(pgsize),
> +     };
> +     bool single_page = false;
> +     struct pt_range range;
> +     int ret;
> +
> +     iommu_iotlb_gather_init(&iotlb_gather);
> +
> +     if (WARN_ON(!(prot & (IOMMU_READ | IOMMU_WRITE))))
> +             return -EINVAL;
> +
> +     /* Check the paddr doesn't exceed what the table can store */
> +     if ((sizeof(pt_oaddr_t) > sizeof(paddr) && paddr > PT_VADDR_MAX) ||
> +         (common->max_oasz_lg2 != PT_VADDR_MAX_LG2 &&
> +          oalog2_div(paddr, common->max_oasz_lg2)))
> +             return -ERANGE;
> +
> +     ret = pt_iommu_set_prot(common, &map.attrs, prot);
> +     if (ret)
> +             return ret;
> +     map.attrs.gfp = gfp;
> +
> +     ret = make_range_no_check(common, &range, iova, len);
> +     if (ret)
> +             return ret;
> +
> +     /* Calculate target page size and level for the leaves */
> +     if (pt_has_system_page(common) && pgsize == PAGE_SIZE && pgcount == 1) {
> +             PT_WARN_ON(!(pgsize_bitmap & PAGE_SIZE));
> +             if (log2_mod(iova | paddr, PAGE_SHIFT))
> +                     return -ENXIO;
> +             map.leaf_pgsize_lg2 = PAGE_SHIFT;
> +             map.leaf_level = 0;
> +             single_page = true;
> +     } else {
> +             map.leaf_pgsize_lg2 = pt_compute_best_pgsize(
> +                     pgsize_bitmap, range.va, range.last_va, paddr);
> +             if (!map.leaf_pgsize_lg2)
> +                     return -ENXIO;
> +             map.leaf_level =
> +                     pt_pgsz_lg2_to_level(common, map.leaf_pgsize_lg2);
> +     }
> +
> +     ret = check_map_range(iommu_table, &range, &map);
> +     if (ret)
> +             return ret;
> +
> +     PT_WARN_ON(map.leaf_level > range.top_level);
> +
> +     do {
> +             if (single_page) {
> +                     ret = pt_walk_range(&range, __map_single_page, &map);
> +                     if (ret != -EAGAIN)
> +                             break;
> +             }
> +
> +             if (map.leaf_level == range.top_level)
> +                     ret = pt_walk_range(&range, __map_range_leaf, &map);
> +             else
> +                     ret = pt_walk_range(&range, __map_range, &map);
> +     } while (false);
> +
> +     /*
> +      * Table levels were freed and replaced with large items, flush any walk
> +      * cache that may refer to the freed levels.
> +      */
> +     if (!iommu_pages_list_empty(&iotlb_gather.freelist))
> +             iommu_iotlb_sync(&iommu_table->domain, &iotlb_gather);
> +
> +     /* Bytes successfully mapped */
> +     PT_WARN_ON(!ret && map.oa - paddr != len);
> +     *mapped += map.oa - paddr;
> +     return ret;
> +}
> +EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(map_pages), "GENERIC_PT_IOMMU");
> +
>  struct pt_unmap_args {
>       struct iommu_pages_list free_list;
>       pt_vaddr_t unmapped;
> @@ -448,6 +922,7 @@ static void pt_iommu_zero(struct pt_iommu_table 
> *fmt_table)
>       memset_after(fmt_table, 0, iommu.domain);
>  
>       /* The caller can initialize some of these values */
> +     iommu_table->hw_flush_ops = cfg.hw_flush_ops;
>       iommu_table->nid = cfg.nid;
>  }
>  
> @@ -483,6 +958,12 @@ int pt_iommu_init(struct pt_iommu_table *fmt_table,
>       if (ret)
>               return ret;
>  
> +     if (pt_feature(common, PT_FEAT_DYNAMIC_TOP) &&
> +         WARN_ON(!iommu_table->hw_flush_ops ||
> +                 !iommu_table->hw_flush_ops->change_top ||
> +                 !iommu_table->hw_flush_ops->get_top_lock))
> +             return -EINVAL;
> +
>       if (pt_feature(common, PT_FEAT_SIGN_EXTEND) &&
>           (pt_feature(common, PT_FEAT_FULL_VA) ||
>            pt_feature(common, PT_FEAT_DYNAMIC_TOP)))
> diff --git a/include/linux/generic_pt/iommu.h 
> b/include/linux/generic_pt/iommu.h
> index 862d224c59281e..fd0c598526ce57 100644
> --- a/include/linux/generic_pt/iommu.h
> +++ b/include/linux/generic_pt/iommu.h
> @@ -11,6 +11,7 @@
>  
>  struct iommu_iotlb_gather;
>  struct pt_iommu_ops;
> +struct pt_iommu_flush_ops;
>  
>  /**
>   * DOC: IOMMU Radix Page Table
> @@ -43,6 +44,12 @@ struct pt_iommu {
>        */
>       const struct pt_iommu_ops *ops;
>  
> +     /**
> +      * @hw_flush_ops - Function pointers provided by the HW driver to flush
> +      * HW caches after changes to the page table.
> +      */
> +     const struct pt_iommu_flush_ops *hw_flush_ops;
> +
>       /**
>        * @nid - Node ID to use for table memory allocations. The iommu driver
>        * may want to set the NID to the device's NID, if there are multiple
> @@ -84,6 +91,52 @@ struct pt_iommu_ops {
>       void (*deinit)(struct pt_iommu *iommu_table);
>  };
>  
> +/**
> + * struct pt_iommu_flush_ops - HW IOTLB cache flushing operations
> + *
> + * The IOMMU driver should implement these using container_of(iommu_table) to
> + * get to it's iommu_domain dervied structure. All ops can be called in 
> atomic
> + * contexts as they are buried under DMA API calls.
> + */
> +struct pt_iommu_flush_ops {
> +     /**
> +      * change_top() - Update the top of table pointer
> +      * @iommu_table: Table to operate on
> +      * @top_paddr: New CPU physical address of the top pointer
> +      * @top_level: IOMMU PT level of the new top
> +      *
> +      * Called under the get_top_lock() spinlock. The driver must update all
> +      * HW references to this domain with a new top address and
> +      * configuration. On return mappings placed in the new top must be
> +      * reachable by the HW.
> +      *
> +      * top_level encodes the level in IOMMU PT format, level 0 is the
> +      * smallest page size increasing from there. This has to be translated
> +      * to any HW specific format. During this call the new top will not be
> +      * visible to any other API.
> +      *
> +      * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
> +      * enabled.
> +      */
> +     void (*change_top)(struct pt_iommu *iommu_table, phys_addr_t top_paddr,
> +                        unsigned int top_level);
> +
> +     /**
> +      * get_top_lock() - Return a lock to hold when changing the table top
> +      * @iommu_table: Table to operate on
> +      *
> +      * page table from being stored in HW. The lock will be held prior
> +      * to calling change_top() and released once the top is fully visible.
> +      *
> +      * Typically this would be a lock that protects the iommu_domain's
> +      * attachment list.
> +      *
> +      * This op is only used by PT_FEAT_DYNAMIC_TOP, and is required if
> +      * enabled.
> +      */
> +     spinlock_t *(*get_top_lock)(struct pt_iommu *iommu_table);
> +};
> +
>  static inline void pt_iommu_deinit(struct pt_iommu *iommu_table)
>  {
>       iommu_table->ops->deinit(iommu_table);
> @@ -114,6 +167,10 @@ struct pt_iommu_cfg {
>  #define IOMMU_PROTOTYPES(fmt)                                                
>   \
>       phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \
>                                                 dma_addr_t iova);            \
> +     int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain,            \
> +                                    unsigned long iova, phys_addr_t paddr,  \
> +                                    size_t pgsize, size_t pgcount,          \
> +                                    int prot, gfp_t gfp, size_t *mapped);   \
>       size_t pt_iommu_##fmt##_unmap_pages(                                   \
>               struct iommu_domain *domain, unsigned long iova,               \
>               size_t pgsize, size_t pgcount,                                 \
> @@ -136,6 +193,7 @@ struct pt_iommu_cfg {
>   */
>  #define IOMMU_PT_DOMAIN_OPS(fmt)                        \
>       .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \
> +     .map_pages = &pt_iommu_##fmt##_map_pages,       \
>       .unmap_pages = &pt_iommu_##fmt##_unmap_pages
>  
>  /*
> -- 
> 2.43.0
> 

Reply via email to