Signed-off-by: Shivaprasad G Bhat <[email protected]>
References:
1 :
https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fshivaprasadbhat%2Fqemu%2Ftree%2Fiommufd-wip&data=05%7C02%7Cchristophe.leroy%40csgroup.eu%7C4b6054524dcf4d42f24308de5dd2fc27%7C8b87af7d86474dc78df45f69a2011bb5%7C0%7C0%7C639051357920885715%7CUnknown%7CTWFpbGZsb3d8eyJFbXB0eU1hcGkiOnRydWUsIlYiOiIwLjAuMDAwMCIsIlAiOiJXaW4zMiIsIkFOIjoiTWFpbCIsIldUIjoyfQ%3D%3D%7C0%7C%7C%7C&sdata=NBGzjiMaEskySEDGCZHhPwQ5VzADQXPCpH45d5p4Cuk%3D&reserved=0
---
arch/powerpc/include/asm/iommu.h | 2
arch/powerpc/kernel/iommu.c | 181 +++++++++++++++++++++++++
arch/powerpc/platforms/powernv/pci-ioda-tce.c | 4 -
arch/powerpc/platforms/powernv/pci-ioda.c | 4 -
arch/powerpc/platforms/powernv/pci.h | 2
arch/powerpc/platforms/pseries/iommu.c | 6 -
drivers/vfio/Kconfig | 4 -
7 files changed, 190 insertions(+), 13 deletions(-)
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index eafdd63cd6c4..1dc72fbb89e7 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -46,7 +46,7 @@ struct iommu_table_ops {
long index, long npages,
unsigned long uaddr,
enum dma_data_direction direction,
- unsigned long attrs);
+ unsigned long attrs, bool is_phys);
#ifdef CONFIG_IOMMU_API
/*
* Exchanges existing TCE with new TCE plus direction bits;
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 0ce71310b7d9..e6543480c461 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -365,7 +365,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct
iommu_table *tbl,
/* Put the TCEs in the HW table */
build_fail = tbl->it_ops->set(tbl, entry, npages,
(unsigned long)page &
- IOMMU_PAGE_MASK(tbl), direction, attrs);
+ IOMMU_PAGE_MASK(tbl), direction, attrs,
false);
/* tbl->it_ops->set() only returns non-zero for transient errors.
* Clean up the table bitmap in this case and return
@@ -539,7 +539,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table
*tbl,
/* Insert into HW table */
build_fail = tbl->it_ops->set(tbl, entry, npages,
vaddr & IOMMU_PAGE_MASK(tbl),
- direction, attrs);
+ direction, attrs, false);
if(unlikely(build_fail))
goto failure;
@@ -1201,7 +1201,15 @@ spapr_tce_blocked_iommu_attach_dev(struct iommu_domain *platform_domain,
* also sets the dma_api ops
*/
table_group = iommu_group_get_iommudata(grp);
+
+ if (old && old->type == IOMMU_DOMAIN_DMA) {
+ ret = table_group->ops->unset_window(table_group, 0);
+ if (ret)
+ goto exit;
+ }
+
ret = table_group->ops->take_ownership(table_group, dev);
+exit:
iommu_group_put(grp);
return ret;
@@ -1260,6 +1268,167 @@ static struct iommu_group
*spapr_tce_iommu_device_group(struct device *dev)
return hose->controller_ops.device_group(hose, pdev);
}
+struct ppc64_domain {
+ struct iommu_domain domain;
+ struct device *device; /* Make it a list */
+ struct iommu_table *table;
+ spinlock_t list_lock;
+ struct rcu_head rcu;
+};
+
+static struct ppc64_domain *to_ppc64_domain(struct iommu_domain *dom)
+{
+ return container_of(dom, struct ppc64_domain, domain);
+}
+
+static void spapr_tce_domain_free(struct iommu_domain *domain)
+{
+ struct ppc64_domain *ppc64_domain = to_ppc64_domain(domain);
+
+ kfree(ppc64_domain);
+}
+
+static const struct iommu_ops spapr_tce_iommu_ops;
+static struct iommu_domain *spapr_tce_domain_alloc_paging(struct device *dev)
+{
+ struct iommu_group *grp = iommu_group_get(dev);
+ struct iommu_table_group *table_group;
+ struct ppc64_domain *ppc64_domain;
+ struct iommu_table *ptbl;
+ int ret = -1;
+
+ table_group = iommu_group_get_iommudata(grp);
+ ppc64_domain = kzalloc(sizeof(*ppc64_domain), GFP_KERNEL);
+ if (!ppc64_domain)
+ return NULL;
+
+ /* Just the default window hardcode for now */
+ ret = table_group->ops->create_table(table_group, 0, 0xc, 0x40000000, 1,
&ptbl);
+ iommu_tce_table_get(ptbl);
+ ppc64_domain->table = ptbl; /* REVISIT: Single device for now */
+ if (!ppc64_domain->table) {
+ kfree(ppc64_domain);
+ iommu_tce_table_put(ptbl);
+ iommu_group_put(grp);
+ return NULL;
+ }
+
+ table_group->ops->set_window(table_group, 0, ptbl);
+ iommu_group_put(grp);
+
+ ppc64_domain->domain.pgsize_bitmap = SZ_4K;
+ ppc64_domain->domain.geometry.force_aperture = true;
+ ppc64_domain->domain.geometry.aperture_start = 0;
+ ppc64_domain->domain.geometry.aperture_end = 0x40000000; /*default
window */
+ ppc64_domain->domain.ops = spapr_tce_iommu_ops.default_domain_ops;
+
+ spin_lock_init(&ppc64_domain->list_lock);
+
+ return &ppc64_domain->domain;
+}
+
+static size_t spapr_tce_iommu_unmap_pages(struct iommu_domain *domain,
+ unsigned long iova,
+ size_t pgsize, size_t pgcount,
+ struct iommu_iotlb_gather *gather)
+{
+ struct ppc64_domain *ppc64_domain = to_ppc64_domain(domain);
+ struct iommu_table *tbl = ppc64_domain->table;
+ unsigned long pgshift = __ffs(pgsize);
+ size_t size = pgcount << pgshift;
+ size_t mapped = 0;
+ unsigned int tcenum;
+ int mask;
+
+ if (pgsize != SZ_4K)
+ return -EINVAL;
+
+ size = PAGE_ALIGN(size);
+
+ mask = IOMMU_PAGE_MASK(tbl);
+ tcenum = iova >> tbl->it_page_shift;
+
+ tbl->it_ops->clear(tbl, tcenum, pgcount);
+
+ mapped = pgsize * pgcount;
+
+ return mapped;
+}
+
+static phys_addr_t spapr_tce_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
+{
+ struct ppc64_domain *ppc64_domain = to_ppc64_domain(domain);
+ struct iommu_table *tbl = ppc64_domain->table;
+ phys_addr_t paddr, rpn, tceval;
+ unsigned int tcenum;
+
+ tcenum = iova >> tbl->it_page_shift;
+ tceval = tbl->it_ops->get(tbl, tcenum);
+
+ /* Ignore the direction bits */
+ rpn = tceval >> tbl->it_page_shift;
+ paddr = rpn << tbl->it_page_shift;
+
+ return paddr;
+}
+
+static int spapr_tce_iommu_map_pages(struct iommu_domain *domain,
+ unsigned long iova, phys_addr_t paddr,
+ size_t pgsize, size_t pgcount,
+ int prot, gfp_t gfp, size_t *mapped)
+{
+ struct ppc64_domain *ppc64_domain = to_ppc64_domain(domain);
+ enum dma_data_direction direction = DMA_BIDIRECTIONAL;
+ struct iommu_table *tbl = ppc64_domain->table;
+ unsigned long pgshift = __ffs(pgsize);
+ size_t size = pgcount << pgshift;
+ unsigned int tcenum;
+ int ret;
+
+ if (pgsize != SZ_4K)
+ return -EINVAL;
+
+ if (iova < ppc64_domain->domain.geometry.aperture_start ||
+ (iova + size - 1) > ppc64_domain->domain.geometry.aperture_end)
+ return -EINVAL;
+
+ if (!IS_ALIGNED(iova | paddr, pgsize))
+ return -EINVAL;
+
+ if (!(prot & IOMMU_WRITE))
+ direction = DMA_FROM_DEVICE;
+
+ if (!(prot & IOMMU_READ))
+ direction = DMA_TO_DEVICE;
+
+ size = PAGE_ALIGN(size);
+ tcenum = iova >> tbl->it_page_shift;
+
+ /* Put the TCEs in the HW table */
+ ret = tbl->it_ops->set(tbl, tcenum, pgcount,
+ paddr, direction, 0, true);
+ if (!ret && mapped)
+ *mapped = pgsize;
+
+ return 0;
+}
+
+static int spapr_tce_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev, struct iommu_domain
*old)
+{
+ struct ppc64_domain *ppc64_domain = to_ppc64_domain(domain);
+
+ /* REVISIT */
+ if (!domain)
+ return 0;
+
+ /* REVISIT: Check table group, list handling */
+ ppc64_domain->device = dev;
+
+ return 0;
+}
+
+
static const struct iommu_ops spapr_tce_iommu_ops = {
.default_domain = &spapr_tce_platform_domain,
.blocked_domain = &spapr_tce_blocked_domain,
@@ -1267,6 +1436,14 @@ static const struct iommu_ops spapr_tce_iommu_ops = {
.probe_device = spapr_tce_iommu_probe_device,
.release_device = spapr_tce_iommu_release_device,
.device_group = spapr_tce_iommu_device_group,
+ .domain_alloc_paging = spapr_tce_domain_alloc_paging,
+ .default_domain_ops = &(const struct iommu_domain_ops) {
+ .attach_dev = spapr_tce_iommu_attach_device,
+ .map_pages = spapr_tce_iommu_map_pages,
+ .unmap_pages = spapr_tce_iommu_unmap_pages,
+ .iova_to_phys = spapr_tce_iommu_iova_to_phys,
+ .free = spapr_tce_domain_free,
+ }
};
static struct attribute *spapr_tce_iommu_attrs[] = {
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index e96324502db0..8800bf86d17a 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -123,10 +123,10 @@ static __be64 *pnv_tce(struct iommu_table *tbl, bool
user, long idx, bool alloc)
int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr, enum dma_data_direction direction,
- unsigned long attrs)
+ unsigned long attrs, bool is_phys)
{
u64 proto_tce = iommu_direction_to_tce_perm(direction);
- u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
+ u64 rpn = !is_phys ? __pa(uaddr) >> tbl->it_page_shift : uaddr >>
tbl->it_page_shift;
long i;
if (proto_tce & TCE_PCI_WRITE)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c
b/arch/powerpc/platforms/powernv/pci-ioda.c
index b0c1d9d16fb5..610146a63e3b 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1241,10 +1241,10 @@ static void pnv_pci_ioda2_tce_invalidate(struct
iommu_table *tbl,
static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
- unsigned long attrs)
+ unsigned long attrs, bool is_phys)
{
int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
- attrs);
+ attrs, is_phys);
if (!ret)
pnv_pci_ioda2_tce_invalidate(tbl, index, npages);
diff --git a/arch/powerpc/platforms/powernv/pci.h
b/arch/powerpc/platforms/powernv/pci.h
index 42075501663b..3579ecd55d00 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -300,7 +300,7 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe,
const char *level,
extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr, enum dma_data_direction direction,
- unsigned long attrs);
+ unsigned long attrs, bool is_phys);
extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
unsigned long *hpa, enum dma_data_direction *direction);
diff --git a/arch/powerpc/platforms/pseries/iommu.c
b/arch/powerpc/platforms/pseries/iommu.c
index eec333dd2e59..8c6f9f18e462 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -122,7 +122,7 @@ static void iommu_pseries_free_group(struct
iommu_table_group *table_group,
static int tce_build_pSeries(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
- unsigned long attrs)
+ unsigned long attrs, bool false)
{
u64 proto_tce;
__be64 *tcep;
@@ -250,7 +250,7 @@ static DEFINE_PER_CPU(__be64 *, tce_page);
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
- unsigned long attrs)
+ unsigned long attrs, bool is_phys)
{
u64 rc = 0;
u64 proto_tce;
@@ -287,7 +287,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table
*tbl, long tcenum,
__this_cpu_write(tce_page, tcep);
}
- rpn = __pa(uaddr) >> tceshift;
+ rpn = !is_phys ? __pa(uaddr) >> tceshift : uaddr >> tceshift;
proto_tce = TCE_PCI_READ;
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index ceae52fd7586..9929aa78a5da 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -4,7 +4,7 @@ menuconfig VFIO
select IOMMU_API
depends on IOMMUFD || !IOMMUFD
select INTERVAL_TREE
- select VFIO_GROUP if SPAPR_TCE_IOMMU || IOMMUFD=n
+ select VFIO_GROUP if IOMMUFD=n
select VFIO_DEVICE_CDEV if !VFIO_GROUP
select VFIO_CONTAINER if IOMMUFD=n
help
@@ -16,7 +16,7 @@ menuconfig VFIO
if VFIO
config VFIO_DEVICE_CDEV
bool "Support for the VFIO cdev /dev/vfio/devices/vfioX"
- depends on IOMMUFD && !SPAPR_TCE_IOMMU
+ depends on IOMMUFD
default !VFIO_GROUP
help
The VFIO device cdev is another way for userspace to get device