On Thu, Oct 23, 2025 at 11:21 AM Jason Gunthorpe <[email protected]> wrote: > > iova_to_phys is a performance path for the DMA API and iommufd, implement > it using an unrolled get_user_pages() like function waterfall scheme. > > The implementation itself is fairly trivial. > > Tested-by: Alejandro Jimenez <[email protected]> > Reviewed-by: Kevin Tian <[email protected]> > Signed-off-by: Jason Gunthorpe <[email protected]> > --- > drivers/iommu/generic_pt/iommu_pt.h | 105 ++++++++++++++++++++++++++++ > include/linux/generic_pt/iommu.h | 19 +++-- > 2 files changed, 119 insertions(+), 5 deletions(-) > > diff --git a/drivers/iommu/generic_pt/iommu_pt.h > b/drivers/iommu/generic_pt/iommu_pt.h > index 564f2d3a6e11e1..5ff1b887928a46 100644 > --- a/drivers/iommu/generic_pt/iommu_pt.h > +++ b/drivers/iommu/generic_pt/iommu_pt.h > @@ -17,6 +17,111 @@ > > #define DOMAIN_NS(op) CONCATENATE(CONCATENATE(pt_iommu_, PTPFX), op) > > +static int make_range_ul(struct pt_common *common, struct pt_range *range, > + unsigned long iova, unsigned long len) > +{ > + unsigned long last; > + > + if (unlikely(len == 0)) > + return -EINVAL; > + > + if (check_add_overflow(iova, len - 1, &last)) > + return -EOVERFLOW; > + > + *range = pt_make_range(common, iova, last); > + if (sizeof(iova) > sizeof(range->va)) { > + if (unlikely(range->va != iova || range->last_va != last)) > + return -EOVERFLOW; > + } > + return 0; > +} > + > +static __maybe_unused int make_range_u64(struct pt_common *common, > + struct pt_range *range, u64 iova, > + u64 len) > +{ > + if (unlikely(iova > ULONG_MAX || len > ULONG_MAX)) > + return -EOVERFLOW; > + return make_range_ul(common, range, iova, len); > +} > + > +/* > + * Some APIs use unsigned long, while othersuse dma_addr_t as the type. > Dispatch > + * to the correct validation based on the type. > + */ > +#define make_range_no_check(common, range, iova, len) \ > + ({ \ > + int ret; \ > + if (sizeof(iova) > sizeof(unsigned long) || \ > + sizeof(len) > sizeof(unsigned long)) \ > + ret = make_range_u64(common, range, iova, len); \ > + else \ > + ret = make_range_ul(common, range, iova, len); \ > + ret; \ > + }) > + > +#define make_range(common, range, iova, len) \ > + ({ \ > + int ret = make_range_no_check(common, range, iova, len); \ > + if (!ret) \ > + ret = pt_check_range(range); \ > + ret; \ > + }) > + > +static __always_inline int __do_iova_to_phys(struct pt_range *range, void > *arg, > + unsigned int level, > + struct pt_table_p *table, > + pt_level_fn_t descend_fn) > +{ > + struct pt_state pts = pt_init(range, level, table); > + pt_oaddr_t *res = arg; > + > + switch (pt_load_single_entry(&pts)) { > + case PT_ENTRY_EMPTY: > + return -ENOENT; > + case PT_ENTRY_TABLE: > + return pt_descend(&pts, arg, descend_fn); > + case PT_ENTRY_OA: > + *res = pt_entry_oa_exact(&pts); > + return 0; > + } > + return -ENOENT; > +} > +PT_MAKE_LEVELS(__iova_to_phys, __do_iova_to_phys); > + > +/** > + * iova_to_phys() - Return the output address for the given IOVA > + * @iommu_table: Table to query > + * @iova: IO virtual address to query > + * > + * Determine the output address from the given IOVA. @iova may have any > + * alignment, the returned physical will be adjusted with any sub page > offset. > + * > + * Context: The caller must hold a read range lock that includes @iova. > + * > + * Return: 0 if there is no translation for the given iova. > + */ > +phys_addr_t DOMAIN_NS(iova_to_phys)(struct iommu_domain *domain, > + dma_addr_t iova) > +{ > + struct pt_iommu *iommu_table = > + container_of(domain, struct pt_iommu, domain); > + struct pt_range range; > + pt_oaddr_t res; > + int ret; > + > + ret = make_range(common_from_iommu(iommu_table), &range, iova, 1); > + if (ret) > + return ret; > + > + ret = pt_walk_range(&range, __iova_to_phys, &res); > + /* PHYS_ADDR_MAX would be a better error code */ > + if (ret) > + return 0; > + return res; > +} > +EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(iova_to_phys), "GENERIC_PT_IOMMU"); > + > struct pt_iommu_collect_args { > struct iommu_pages_list free_list; > }; > diff --git a/include/linux/generic_pt/iommu.h > b/include/linux/generic_pt/iommu.h > index dc731fe003d153..5622856e199881 100644 > --- a/include/linux/generic_pt/iommu.h > +++ b/include/linux/generic_pt/iommu.h > @@ -116,11 +116,13 @@ struct pt_iommu_cfg { > }; > > /* Generate the exported function signatures from iommu_pt.h */ > -#define IOMMU_PROTOTYPES(fmt) \ > - int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table, \ > - const struct pt_iommu_##fmt##_cfg *cfg, \ > - gfp_t gfp); \ > - void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table, \ > +#define IOMMU_PROTOTYPES(fmt) > \ > + phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain > *domain, \ > + dma_addr_t iova); > \ > + int pt_iommu_##fmt##_init(struct pt_iommu_##fmt *table, > \ > + const struct pt_iommu_##fmt##_cfg *cfg, > \ > + gfp_t gfp); > \ > + void pt_iommu_##fmt##_hw_info(struct pt_iommu_##fmt *table, > \ > struct pt_iommu_##fmt##_hw_info *info) > #define IOMMU_FORMAT(fmt, member) \ > struct pt_iommu_##fmt { \ > @@ -129,6 +131,13 @@ struct pt_iommu_cfg { > }; \ > IOMMU_PROTOTYPES(fmt) > > +/* > + * A driver uses IOMMU_PT_DOMAIN_OPS to populate the iommu_domain_ops for the > + * iommu_pt > + */ > +#define IOMMU_PT_DOMAIN_OPS(fmt) \ > + .iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, > + > /* > * The driver should setup its domain struct like > * union { > -- > 2.43.0 > >
Reviewed-by: Samiullah Khawaja <[email protected]>
