Export PowerPC DMA window information (both default 2GB and Dynamic larger window) to user space via sysfs. Each of these DMA windows has attributes like size of the window, page size backing the window, mode, etc. Each of these atributes is exported for user space consumption as a file.
PowerPC Host Bridge (PHB) can have multiple devices/functions sharing the same DMA window. For each PHB, iommu registration creates an iommu device under "/sys/devices/virtual/iommu". These devices will have 2 groups created to export Default and DDW attributes. Reviewed-by: Brian King <[email protected]> Signed-off-by: Gaurav Batra <[email protected]> --- .../arch/powerpc/dma_window_attributes.rst | 65 +++++ arch/powerpc/include/asm/iommu.h | 20 ++ arch/powerpc/kernel/iommu.c | 235 ++++++++++++++++++ arch/powerpc/platforms/pseries/iommu.c | 156 ++++++++++++ 4 files changed, 476 insertions(+) create mode 100644 Documentation/arch/powerpc/dma_window_attributes.rst diff --git a/Documentation/arch/powerpc/dma_window_attributes.rst b/Documentation/arch/powerpc/dma_window_attributes.rst new file mode 100644 index 000000000000..8bd9aec8539d --- /dev/null +++ b/Documentation/arch/powerpc/dma_window_attributes.rst @@ -0,0 +1,65 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================== +DMA Window Attributes +===================== + +In PowerPC architecture there are 2 types of DMA windows - + +1. Default 2GB DMA window which is backed by 4K page size +2. A bigger Dynamic DMA Window (DDW) which is backed by larger page size + (64K or 2MB) + +A dedicated device will have both the DMA windows instantiated but an SR-IOV +device will only have the bigger Dynamic DMA Window. + +The attributes of these 2 DMA windows are exported to user space via sysfs. +Each IOMMU isolation unit will have its directory created under +/sys/devices/virtual/iommu. + +As an exapmple, iommu-phb0001 + +Under each IOMMU isolation unit, there will be a group of attributes for +"Default 2GB DMA Window" and "Dynamic DMA Window" - spapr-tce-dma and +spapr-tce-ddw respectively. + +Attributes under each group + +spapr-tce-ddw: +direct_address dynamic_address dynamic_size window_type +direct_size dynamic_pages_mapped page_size + +spapr-tce-dma: +dynamic_address dynamic_pages_mapped dynamic_size page_size + + +The bigger Dynamic DMA Window is configured into pre-mapped and/or dynamically +allocated TCEs. If the DDW is in "Hybrid" mode, then both the Direct +(pre-mapped) and Dynamic part of the DMA window will have valid values. Hybrid +mode is valid only for SR-IOV devices. + +DMA Window properties: + +direct_address Starting address of the pre-mapped DMA window +direct_size Size of the pre-mapped DMA Window +dynamic_address Starting address of the dynamic allocations +dynamic_size Size of the dynamic allocation window +dynamic_pages_mapped Pages mapped for DMA by dynamic allocations +page_size Page size backing the DMA window +window_type Type of the DMA Window (Direct/Dynamic/Hybrid) + + +An example of DDW attributes for an SR-IOV device:: + + $ cd /sys/devices/virtual/iommu/iommu-phb0001/spapr-tce-ddw + + $ grep . * + + direct_address:0x800000000000000 <-- Starting addr of pre-mapped Window + direct_size:137438953472 <-- Size of pre-mapped Window (128GB) + dynamic_address:0x800002000000000 <-- Starting addr of Dynamic allocations + dynamic_size:412316860416 <-- Size of dynamic allocation window (384GB) + dynamic_pages_mapped:270 <-- Pages mapped by dynamic allocations + page_size:2097152 <-- DMA window page size (2MB) + window_type:Hybrid <-- window has both pre-mapped and + dynamic sections diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index eafdd63cd6c4..e644c6e95301 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -90,6 +90,7 @@ struct iommu_pool { unsigned long start; unsigned long end; unsigned long hint; + unsigned long inuse; spinlock_t lock; } ____cacheline_aligned_in_smp; @@ -319,5 +320,24 @@ extern unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir); extern const struct dma_map_ops dma_iommu_ops; +/* used by sysfs when querying Dynamic/Default DMA Window data */ +struct dma_win_data { + u32 win_pgsize; + u64 direct_addr; + u64 direct_size; + u64 dynamic_addr; + u64 dynamic_size; + u32 dynamic_tces_inuse; + char win_type[15]; +}; + +#define SPAPR_SUCCESS 0 +#define SPAPR_NODMAWIN -1 +#define SPAPR_NODDWWIN -2 +#define SPAPR_ERROR -3 + +extern int gather_ddw_info(struct device *dev, struct dma_win_data *data); +extern int gather_dma_info(struct device *dev, struct dma_win_data *data); + #endif /* __KERNEL__ */ #endif /* _ASM_IOMMU_H */ diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0ce71310b7d9..e3cf3701dd6e 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -339,6 +339,9 @@ static unsigned long iommu_range_alloc(struct device *dev, if (handle) *handle = end; + /* update use count */ + pool->inuse += npages; + spin_unlock_irqrestore(&(pool->lock), flags); return n; @@ -452,6 +455,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, tbl->it_ops->clear(tbl, entry, npages); spin_lock_irqsave(&(pool->lock), flags); + pool->inuse -= npages; bitmap_clear(tbl->it_map, free_entry, npages); spin_unlock_irqrestore(&(pool->lock), flags); } @@ -759,6 +763,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, p->start = tbl->poolsize * i; p->hint = p->start; p->end = p->start + tbl->poolsize; + p->inuse = 0; } p = &tbl->large_pool; @@ -766,6 +771,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, p->start = tbl->poolsize * i; p->hint = p->start; p->end = tbl->it_size; + p->inuse = 0; iommu_table_clear(tbl); @@ -1269,6 +1275,233 @@ static const struct iommu_ops spapr_tce_iommu_ops = { .device_group = spapr_tce_iommu_device_group, }; +static inline const char *dma_win_error(int err) +{ + switch (err) { + case SPAPR_ERROR: + return "Error"; + case SPAPR_NODMAWIN: + return "No Default DMA Window Found"; + case SPAPR_NODDWWIN: + return "No Dynamic DMA Window Found"; + default: + return "Unknown Result"; + } +} + +static ssize_t ddw_direct_address_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int rc = 0; + struct dma_win_data data; + + rc = gather_ddw_info(dev, &data); + + if (rc == SPAPR_SUCCESS) + return sysfs_emit(buf, "%#llx\n", data.direct_addr); + else + return sysfs_emit(buf, "%s\n", dma_win_error(rc)); +} + +static ssize_t ddw_dynamic_address_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int rc = 0; + struct dma_win_data data; + + rc = gather_ddw_info(dev, &data); + + if (rc == SPAPR_SUCCESS) + return sysfs_emit(buf, "%#llx\n", data.dynamic_addr); + else + return sysfs_emit(buf, "%s\n", dma_win_error(rc)); +} + +static ssize_t ddw_direct_size_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int rc = 0; + struct dma_win_data data; + + rc = gather_ddw_info(dev, &data); + + if (rc == SPAPR_SUCCESS) + return sysfs_emit(buf, "%lld\n", data.direct_size); + else + return sysfs_emit(buf, "%s\n", dma_win_error(rc)); +} + +static ssize_t ddw_dynamic_size_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int rc = 0; + struct dma_win_data data; + + rc = gather_ddw_info(dev, &data); + + if (rc == SPAPR_SUCCESS) + return sysfs_emit(buf, "%lld\n", data.dynamic_size); + else + return sysfs_emit(buf, "%s\n", dma_win_error(rc)); +} + +static ssize_t ddw_page_size_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int rc = 0; + struct dma_win_data data; + + rc = gather_ddw_info(dev, &data); + + if (rc == SPAPR_SUCCESS) + return sysfs_emit(buf, "%d\n", data.win_pgsize); + else + return sysfs_emit(buf, "%s\n", dma_win_error(rc)); +} + +static ssize_t ddw_window_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int rc = 0; + struct dma_win_data data; + + rc = gather_ddw_info(dev, &data); + + if (rc == SPAPR_SUCCESS) + return sysfs_emit(buf, "%s\n", data.win_type); + else + return sysfs_emit(buf, "%s\n", dma_win_error(rc)); +} + +static ssize_t ddw_dynamic_pages_mapped_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int rc = 0; + struct dma_win_data data; + + rc = gather_ddw_info(dev, &data); + + if (rc == SPAPR_SUCCESS) + return sysfs_emit(buf, "%d\n", data.dynamic_tces_inuse); + else + return sysfs_emit(buf, "%s\n", dma_win_error(rc)); +} + +static ssize_t dma_dynamic_address_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int rc = 0; + struct dma_win_data data; + + rc = gather_dma_info(dev, &data); + + if (rc == SPAPR_SUCCESS) + return sysfs_emit(buf, "%#llx\n", data.dynamic_addr); + else + return sysfs_emit(buf, "%s\n", dma_win_error(rc)); +} + +static ssize_t dma_dynamic_size_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int rc = 0; + struct dma_win_data data; + + rc = gather_dma_info(dev, &data); + + if (rc == SPAPR_SUCCESS) + return sysfs_emit(buf, "%lld\n", data.dynamic_size); + else + return sysfs_emit(buf, "%s\n", dma_win_error(rc)); +} + +static ssize_t dma_page_size_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int rc = 0; + struct dma_win_data data; + + rc = gather_dma_info(dev, &data); + + if (rc == SPAPR_SUCCESS) + return sysfs_emit(buf, "%d\n", data.win_pgsize); + else + return sysfs_emit(buf, "%s\n", dma_win_error(rc)); +} + +static ssize_t dma_dynamic_pages_mapped_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int rc = 0; + struct dma_win_data data; + + rc = gather_dma_info(dev, &data); + + if (rc == SPAPR_SUCCESS) + return sysfs_emit(buf, "%d\n", data.dynamic_tces_inuse); + else + return sysfs_emit(buf, "%s\n", dma_win_error(rc)); +} + +#define DEVICE_ATTR_DDW(_name) \ + struct device_attribute dev_attr_ddw_##_name = \ + __ATTR(_name, 0444, ddw_##_name##_show, NULL) +#define DEVICE_ATTR_DMA(_name) \ + struct device_attribute dev_attr_dma_##_name = \ + __ATTR(_name, 0444, dma_##_name##_show, NULL) + +static DEVICE_ATTR_DDW(direct_address); +static DEVICE_ATTR_DDW(direct_size); +static DEVICE_ATTR_DDW(page_size); +static DEVICE_ATTR_DDW(window_type); +static DEVICE_ATTR_DDW(dynamic_address); +static DEVICE_ATTR_DDW(dynamic_size); +static DEVICE_ATTR_DDW(dynamic_pages_mapped); +static DEVICE_ATTR_DMA(dynamic_address); +static DEVICE_ATTR_DMA(dynamic_size); +static DEVICE_ATTR_DMA(page_size); +static DEVICE_ATTR_DMA(dynamic_pages_mapped); + +static struct attribute *spapr_tce_ddw_attrs[] = { + &dev_attr_ddw_direct_address.attr, + &dev_attr_ddw_direct_size.attr, + &dev_attr_ddw_page_size.attr, + &dev_attr_ddw_window_type.attr, + &dev_attr_ddw_dynamic_address.attr, + &dev_attr_ddw_dynamic_size.attr, + &dev_attr_ddw_dynamic_pages_mapped.attr, + NULL, +}; + +static struct attribute *spapr_tce_dma_attrs[] = { + &dev_attr_dma_dynamic_address.attr, + &dev_attr_dma_dynamic_size.attr, + &dev_attr_dma_page_size.attr, + &dev_attr_dma_dynamic_pages_mapped.attr, + NULL, +}; + +static struct attribute_group spapr_tce_ddw_group = { + .name = "spapr-tce-ddw", + .attrs = spapr_tce_ddw_attrs, +}; + +static struct attribute_group spapr_tce_dma_group = { + .name = "spapr-tce-dma", + .attrs = spapr_tce_dma_attrs, +}; + static struct attribute *spapr_tce_iommu_attrs[] = { NULL, }; @@ -1280,6 +1513,8 @@ static struct attribute_group spapr_tce_iommu_group = { static const struct attribute_group *spapr_tce_iommu_groups[] = { &spapr_tce_iommu_group, + &spapr_tce_ddw_group, + &spapr_tce_dma_group, NULL, }; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 5497b130e026..5d04b50ae265 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -837,6 +837,162 @@ static struct device_node *pci_dma_find(struct device_node *dn, return rdn; } +static unsigned long iommu_table_inuse_tces(struct iommu_table *tbl) +{ + struct iommu_pool *pool; + unsigned long ntces = 0; + + /* Number of TCEs in-use */ + for (int i = 0; i < tbl->nr_pools; i++) { + pool = &tbl->pools[i]; + ntces += pool->inuse; + } + + pool = &tbl->large_pool; + ntces += pool->inuse; + + return ntces; +} + +/* Get DDW information for the device */ +int gather_ddw_info(struct device *dev, struct dma_win_data *data) +{ + struct iommu_device *iommu; + struct pci_controller *phb; + struct device_node *dn; + struct pci_dn *pci; + const __be32 *prop = NULL; + bool ddw_direct = false; + bool found = false; + struct iommu_table *tbl; + u32 pgshift; + struct dynamic_dma_window_prop *p; + + memset(data, 0, sizeof(*data)); + + iommu = dev_get_drvdata(dev); + phb = container_of(iommu, struct pci_controller, iommu); + dn = phb->dn; + + if (!dn) + return SPAPR_ERROR; + + pci = PCI_DN(dn); + if (!pci || !pci->table_group) + return SPAPR_ERROR; + + /* Find DDW */ + prop = of_get_property(dn, DIRECT64_PROPNAME, NULL); + if (prop) { + ddw_direct = true; + found = true; + } else { + prop = of_get_property(dn, DMA64_PROPNAME, NULL); + if (prop) + found = true; + } + + /* NO DDW */ + if (!found) + return SPAPR_NODDWWIN; + + p = (struct dynamic_dma_window_prop *)prop; + + pgshift = be32_to_cpu(p->tce_shift); + if (pgshift != 0xc && pgshift != 0x10 && pgshift != 0x15) + data->win_pgsize = 0; + else + data->win_pgsize = 1 << pgshift; + + /* Check if DDW has table associated with it. Having a table associated with + * DDW is indicative that is has some dynamic TCE allocations. In this case the + * DDW can be fully Dynamic or in Hybrid mode. For SR-IOV DDW is on index 0, + * for dedicated adapter on index 1. + */ + found = false; + for (int i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + tbl = pci->table_group->tables[i]; + + if (tbl && tbl->it_index == be32_to_cpu(p->liobn)) { + found = true; + break; + } + } + + /* set the parameters depnding on the DDW type */ + if (ddw_direct && found) { /* Hybrid */ + data->direct_addr = be64_to_cpu(p->dma_base); + data->dynamic_size = (u64)(tbl->it_size << tbl->it_page_shift); + + data->dynamic_addr = data->direct_addr + + (u64)(1UL << be32_to_cpu(p->window_shift)) + - data->dynamic_size; + + data->direct_size = data->dynamic_addr - data->direct_addr; + data->dynamic_tces_inuse = iommu_table_inuse_tces(tbl); + + sprintf(data->win_type, "%s", "Hybrid"); + } else if (ddw_direct && !found) { /* Direct */ + data->direct_addr = be64_to_cpu(p->dma_base); + data->direct_size = (u64)(1UL << be32_to_cpu(p->window_shift)); + + sprintf(data->win_type, "%s", "Direct"); + } else { /* Dynamic */ + data->dynamic_addr = be64_to_cpu(p->dma_base); + data->dynamic_size = (u64)(1UL << be32_to_cpu(p->window_shift)); + data->dynamic_tces_inuse = iommu_table_inuse_tces(tbl); + + sprintf(data->win_type, "%s", "Dynamic"); + } + + return SPAPR_SUCCESS; +} + +/* Get DDW information for the device */ +int gather_dma_info(struct device *dev, struct dma_win_data *data) +{ + struct iommu_device *iommu; + struct pci_controller *phb; + struct device_node *dn; + struct pci_dn *pci; + const __be32 *prop = NULL; + struct iommu_table *tbl; + unsigned long offset, size, liobn; + + memset(data, 0, sizeof(*data)); + + iommu = dev_get_drvdata(dev); + phb = container_of(iommu, struct pci_controller, iommu); + dn = phb->dn; + + if (!dn) + return SPAPR_ERROR; + + pci = PCI_DN(dn); + if (!pci || !pci->table_group) + return SPAPR_ERROR; + + /* search for default DMA window */ + prop = of_get_property(dn, "ibm,dma-window", NULL); + + if (!prop) + return SPAPR_NODMAWIN; + + /* default DMA Window is always at index 0 */ + tbl = pci->table_group->tables[0]; + if (!tbl) + return SPAPR_ERROR; + + of_parse_dma_window(dn, prop, &liobn, &offset, &size); + + data->dynamic_addr = offset; + data->dynamic_size = size; + data->win_pgsize = 1ULL << IOMMU_PAGE_SHIFT_4K; + data->dynamic_tces_inuse = iommu_table_inuse_tces(tbl); + + return SPAPR_SUCCESS; +} + static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) { struct iommu_table *tbl; base-commit: 192c0159402e6bfbe13de6f8379546943297783d -- 2.39.3
