Re: [PATCH v5 25/29] powerpc/powernv/ioda: Define and implement DMA table/window management callbacks
On Wed, 2015-03-11 at 19:54 +1100, Alexey Kardashevskiy wrote: +/* Page size flags for ibm,query-pe-dma-window */ +#define DDW_PGSIZE_4K 0x01 +#define DDW_PGSIZE_64K 0x02 +#define DDW_PGSIZE_16M 0x04 +#define DDW_PGSIZE_32M 0x08 +#define DDW_PGSIZE_64M 0x10 +#define DDW_PGSIZE_128M 0x20 +#define DDW_PGSIZE_256M 0x40 +#define DDW_PGSIZE_16G 0x80 +#define DDW_PGSIZE_MASK 0xFF + struct iommu_table_group { #ifdef CONFIG_IOMMU_API struct iommu_group *group; #endif + /* Some key properties of IOMMU */ + __u32 tce32_start; + __u32 tce32_size; + __u32 max_dynamic_windows_supported; + __u32 max_levels; + __u32 flags; Just realized that due to their static nature, they are better to be in iommu_table_group_ops, will fix it in v6. Ugh ? I dislike mixing function pointers and other fields, even if statis. If you *really* want to separate them make them a struct iommu_table_info and declare a const member. Otherwise don't bother and leave them where they are. + struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES]; struct iommu_table_group_ops *ops; }; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index ed60b38..07857c4 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -48,6 +48,7 @@ #include pci.h #define POWERNV_IOMMU_DEFAULT_LEVELS 1 +#define POWERNV_IOMMU_MAX_LEVELS 5 extern void ioda_eeh_tvt_print(struct pnv_phb *phb); @@ -1155,11 +1156,14 @@ static void pnv_ioda1_tce_free_vm(struct iommu_table *tbl, long index, pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false); } +static void pnv_pci_free_table(struct iommu_table *tbl); + struct iommu_table_ops pnv_ioda1_iommu_ops = { .set = pnv_ioda1_tce_build_vm, .exchange = pnv_ioda1_tce_xchg_vm, .clear = pnv_ioda1_tce_free_vm, .get = pnv_tce_get, + .free = pnv_pci_free_table }; static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, @@ -1317,6 +1321,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, TCE_PCI_SWINV_PAIR); } tbl-it_ops = pnv_ioda1_iommu_ops; + pe-table_group.tce32_start = tbl-it_offset tbl-it_page_shift; + pe-table_group.tce32_size = tbl-it_size tbl-it_page_shift; + pe-table_group.max_dynamic_windows_supported = 0; + pe-table_group.max_levels = 0; + pe-table_group.flags = 0; iommu_init_table(tbl, phb-hose-node); iommu_register_group(pe-table_group, phb-hose-global_number, pe-pe_number); @@ -1401,7 +1410,7 @@ static __be64 *pnv_alloc_tce_table(int nid, } static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, - __u32 page_shift, __u64 window_size, __u32 levels, + int num, __u32 page_shift, __u64 window_size, __u32 levels, struct iommu_table *tbl) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, @@ -1428,8 +1437,8 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, shift = ROUND_UP(ilog2(window_size) - page_shift, levels) / levels; shift += 3; shift = max_t(unsigned, shift, IOMMU_PAGE_SHIFT_4K); - pr_info(Creating TCE table %08llx, %d levels, TCE table size = %lx\n, - window_size, levels, 1UL shift); + pr_info(Creating TCE table #%d %08llx, %d levels, TCE table size = %lx\n, + num, window_size, levels, 1UL shift); tbl-it_level_size = 1ULL (shift - 3); left = tce_table_size; @@ -1440,11 +1449,10 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, tbl-it_indirect_levels = levels - 1; /* Setup linux iommu table */ - pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, - page_shift); + pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, + num ? pe-tce_bypass_base : 0, page_shift); tbl-it_ops = pnv_ioda2_iommu_ops; - iommu_init_table(tbl, nid); return 0; } @@ -1461,8 +1469,21 @@ static void pnv_pci_free_table(struct iommu_table *tbl) iommu_reset_table(tbl, ioda2); } +static inline void pnv_pci_ioda2_tvt_invalidate(unsigned int pe_number, + unsigned long it_index) +{ + __be64 __iomem *invalidate = (__be64 __iomem *)it_index; + /* 01xb - invalidate TCEs that match the specified PE# */ + unsigned long addr = (0x4ull 60) | (pe_number 0xFF); + + if (!it_index) + return; + + __raw_writeq(cpu_to_be64(addr), invalidate); +} + static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, - struct iommu_table
Re: [PATCH v5 25/29] powerpc/powernv/ioda: Define and implement DMA table/window management callbacks
On 03/10/2015 01:07 AM, Alexey Kardashevskiy wrote: This extends iommu_table_group_ops by a set of callbacks to support dynamic DMA windows management. query() returns IOMMU capabilities such as default DMA window address and supported number of DMA windows and TCE table levels. create_table() creates a TCE table with specific parameters. it receives iommu_table_group to know nodeid in order to allocate TCE table memory closer to the PHB. The exact format of allocated multi-level table might be also specific to the PHB model (not the case now though). This callback puts the DMA window offset on a PCI bus into just created table. set_window() sets the window at specified TVT index on PHB. unset_window() unsets the window from specified TVT. This adds a free() callback to iommu_table_ops to free the memory (potentially a tree of tables) allocated for the TCE table. create_table() and free() are supposed to be called once per VFIO container and set_window()/unset_window() are supposed to be called for every group in a container. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- arch/powerpc/include/asm/iommu.h| 32 +++ arch/powerpc/platforms/powernv/pci-ioda.c | 87 - arch/powerpc/platforms/powernv/pci-p5ioc2.c | 14 - 3 files changed, 115 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 4007432..04f72ac 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -62,6 +62,8 @@ struct iommu_table_ops { long index, long npages); unsigned long (*get)(struct iommu_table *tbl, long index); void (*flush)(struct iommu_table *tbl); + + void (*free)(struct iommu_table *tbl); }; /* These are used by VIO */ @@ -148,12 +150,42 @@ struct iommu_table_group_ops { */ void (*set_ownership)(struct iommu_table_group *table_group, bool enable); + + long (*create_table)(struct iommu_table_group *table_group, + int num, + __u32 page_shift, + __u64 window_size, + __u32 levels, + struct iommu_table *tbl); + long (*set_window)(struct iommu_table_group *table_group, + int num, + struct iommu_table *tblnew); + long (*unset_window)(struct iommu_table_group *table_group, + int num); }; +/* Page size flags for ibm,query-pe-dma-window */ +#define DDW_PGSIZE_4K 0x01 +#define DDW_PGSIZE_64K 0x02 +#define DDW_PGSIZE_16M 0x04 +#define DDW_PGSIZE_32M 0x08 +#define DDW_PGSIZE_64M 0x10 +#define DDW_PGSIZE_128M 0x20 +#define DDW_PGSIZE_256M 0x40 +#define DDW_PGSIZE_16G 0x80 +#define DDW_PGSIZE_MASK 0xFF + struct iommu_table_group { #ifdef CONFIG_IOMMU_API struct iommu_group *group; #endif + /* Some key properties of IOMMU */ + __u32 tce32_start; + __u32 tce32_size; + __u32 max_dynamic_windows_supported; + __u32 max_levels; + __u32 flags; Just realized that due to their static nature, they are better to be in iommu_table_group_ops, will fix it in v6. + struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES]; struct iommu_table_group_ops *ops; }; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index ed60b38..07857c4 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -48,6 +48,7 @@ #include pci.h #define POWERNV_IOMMU_DEFAULT_LEVELS 1 +#define POWERNV_IOMMU_MAX_LEVELS 5 extern void ioda_eeh_tvt_print(struct pnv_phb *phb); @@ -1155,11 +1156,14 @@ static void pnv_ioda1_tce_free_vm(struct iommu_table *tbl, long index, pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false); } +static void pnv_pci_free_table(struct iommu_table *tbl); + struct iommu_table_ops pnv_ioda1_iommu_ops = { .set = pnv_ioda1_tce_build_vm, .exchange = pnv_ioda1_tce_xchg_vm, .clear = pnv_ioda1_tce_free_vm, .get = pnv_tce_get, + .free = pnv_pci_free_table }; static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, @@ -1317,6 +1321,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, TCE_PCI_SWINV_PAIR); } tbl-it_ops = pnv_ioda1_iommu_ops; + pe-table_group.tce32_start = tbl-it_offset tbl-it_page_shift; + pe-table_group.tce32_size = tbl-it_size tbl-it_page_shift; + pe-table_group.max_dynamic_windows_supported = 0; + pe-table_group.max_levels = 0; + pe-table_group.flags = 0; iommu_init_table(tbl, phb-hose-node); iommu_register_group(pe-table_group, phb-hose-global_number,
[PATCH v5 25/29] powerpc/powernv/ioda: Define and implement DMA table/window management callbacks
This extends iommu_table_group_ops by a set of callbacks to support dynamic DMA windows management. query() returns IOMMU capabilities such as default DMA window address and supported number of DMA windows and TCE table levels. create_table() creates a TCE table with specific parameters. it receives iommu_table_group to know nodeid in order to allocate TCE table memory closer to the PHB. The exact format of allocated multi-level table might be also specific to the PHB model (not the case now though). This callback puts the DMA window offset on a PCI bus into just created table. set_window() sets the window at specified TVT index on PHB. unset_window() unsets the window from specified TVT. This adds a free() callback to iommu_table_ops to free the memory (potentially a tree of tables) allocated for the TCE table. create_table() and free() are supposed to be called once per VFIO container and set_window()/unset_window() are supposed to be called for every group in a container. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- arch/powerpc/include/asm/iommu.h| 32 +++ arch/powerpc/platforms/powernv/pci-ioda.c | 87 - arch/powerpc/platforms/powernv/pci-p5ioc2.c | 14 - 3 files changed, 115 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 4007432..04f72ac 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -62,6 +62,8 @@ struct iommu_table_ops { long index, long npages); unsigned long (*get)(struct iommu_table *tbl, long index); void (*flush)(struct iommu_table *tbl); + + void (*free)(struct iommu_table *tbl); }; /* These are used by VIO */ @@ -148,12 +150,42 @@ struct iommu_table_group_ops { */ void (*set_ownership)(struct iommu_table_group *table_group, bool enable); + + long (*create_table)(struct iommu_table_group *table_group, + int num, + __u32 page_shift, + __u64 window_size, + __u32 levels, + struct iommu_table *tbl); + long (*set_window)(struct iommu_table_group *table_group, + int num, + struct iommu_table *tblnew); + long (*unset_window)(struct iommu_table_group *table_group, + int num); }; +/* Page size flags for ibm,query-pe-dma-window */ +#define DDW_PGSIZE_4K 0x01 +#define DDW_PGSIZE_64K 0x02 +#define DDW_PGSIZE_16M 0x04 +#define DDW_PGSIZE_32M 0x08 +#define DDW_PGSIZE_64M 0x10 +#define DDW_PGSIZE_128M 0x20 +#define DDW_PGSIZE_256M 0x40 +#define DDW_PGSIZE_16G 0x80 +#define DDW_PGSIZE_MASK 0xFF + struct iommu_table_group { #ifdef CONFIG_IOMMU_API struct iommu_group *group; #endif + /* Some key properties of IOMMU */ + __u32 tce32_start; + __u32 tce32_size; + __u32 max_dynamic_windows_supported; + __u32 max_levels; + __u32 flags; + struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES]; struct iommu_table_group_ops *ops; }; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index ed60b38..07857c4 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -48,6 +48,7 @@ #include pci.h #define POWERNV_IOMMU_DEFAULT_LEVELS 1 +#define POWERNV_IOMMU_MAX_LEVELS 5 extern void ioda_eeh_tvt_print(struct pnv_phb *phb); @@ -1155,11 +1156,14 @@ static void pnv_ioda1_tce_free_vm(struct iommu_table *tbl, long index, pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false); } +static void pnv_pci_free_table(struct iommu_table *tbl); + struct iommu_table_ops pnv_ioda1_iommu_ops = { .set = pnv_ioda1_tce_build_vm, .exchange = pnv_ioda1_tce_xchg_vm, .clear = pnv_ioda1_tce_free_vm, .get = pnv_tce_get, + .free = pnv_pci_free_table }; static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, @@ -1317,6 +1321,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, TCE_PCI_SWINV_PAIR); } tbl-it_ops = pnv_ioda1_iommu_ops; + pe-table_group.tce32_start = tbl-it_offset tbl-it_page_shift; + pe-table_group.tce32_size = tbl-it_size tbl-it_page_shift; + pe-table_group.max_dynamic_windows_supported = 0; + pe-table_group.max_levels = 0; + pe-table_group.flags = 0; iommu_init_table(tbl, phb-hose-node); iommu_register_group(pe-table_group, phb-hose-global_number, pe-pe_number); @@ -1401,7 +1410,7 @@ static __be64 *pnv_alloc_tce_table(int nid, } static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, -