Export PowerPC DMA window information (both default 2GB and Dynamic
larger window) to user space via sysfs. Each of these DMA windows has
attributes like size of the window, page size backing the window, mode,
etc. Each of these atributes is exported for user space consumption as a
file.

PowerPC Host Bridge (PHB) can have multiple devices/functions sharing
the same DMA window. For each PHB, iommu registration creates an iommu
device under "/sys/devices/virtual/iommu".

These devices will have 2 groups created to export Default and DDW
attributes.

Reviewed-by: Brian King <[email protected]>
Signed-off-by: Gaurav Batra <[email protected]>
---
 .../arch/powerpc/dma_window_attributes.rst    |  65 +++++
 arch/powerpc/include/asm/iommu.h              |  20 ++
 arch/powerpc/kernel/iommu.c                   | 235 ++++++++++++++++++
 arch/powerpc/platforms/pseries/iommu.c        | 156 ++++++++++++
 4 files changed, 476 insertions(+)
 create mode 100644 Documentation/arch/powerpc/dma_window_attributes.rst

diff --git a/Documentation/arch/powerpc/dma_window_attributes.rst 
b/Documentation/arch/powerpc/dma_window_attributes.rst
new file mode 100644
index 000000000000..8bd9aec8539d
--- /dev/null
+++ b/Documentation/arch/powerpc/dma_window_attributes.rst
@@ -0,0 +1,65 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+DMA Window Attributes
+=====================
+
+In PowerPC architecture there are 2 types of DMA windows -
+
+1. Default 2GB DMA window which is backed by 4K page size
+2. A bigger Dynamic DMA Window (DDW) which is backed by larger page size
+   (64K or 2MB)
+
+A dedicated device will have both the DMA windows instantiated but an SR-IOV
+device will only have the bigger Dynamic DMA Window.
+
+The attributes of these 2 DMA windows are exported to user space via sysfs.
+Each IOMMU isolation unit will have its directory created under
+/sys/devices/virtual/iommu.
+
+As an exapmple, iommu-phb0001
+
+Under each IOMMU isolation unit, there will be a group of attributes for
+"Default 2GB DMA Window" and "Dynamic DMA Window" - spapr-tce-dma and
+spapr-tce-ddw respectively.
+
+Attributes under each group
+
+spapr-tce-ddw:
+direct_address  dynamic_address       dynamic_size  window_type
+direct_size     dynamic_pages_mapped  page_size
+
+spapr-tce-dma:
+dynamic_address  dynamic_pages_mapped  dynamic_size  page_size
+
+
+The bigger Dynamic DMA Window is configured into pre-mapped and/or dynamically
+allocated TCEs. If the DDW is in "Hybrid" mode, then both the Direct
+(pre-mapped) and Dynamic part of the DMA window will have valid values. Hybrid
+mode is valid only for SR-IOV devices.
+
+DMA Window properties:
+
+direct_address              Starting address of the pre-mapped DMA window
+direct_size                 Size of the pre-mapped DMA Window
+dynamic_address             Starting address of the dynamic allocations
+dynamic_size                Size of the dynamic allocation window
+dynamic_pages_mapped        Pages mapped for DMA by dynamic allocations
+page_size                   Page size backing the DMA window
+window_type                 Type of the DMA Window (Direct/Dynamic/Hybrid)
+
+
+An example of DDW attributes for an SR-IOV device::
+
+    $ cd /sys/devices/virtual/iommu/iommu-phb0001/spapr-tce-ddw
+
+    $ grep . *
+
+    direct_address:0x800000000000000   <-- Starting addr of pre-mapped Window
+    direct_size:137438953472           <-- Size of pre-mapped Window (128GB)
+    dynamic_address:0x800002000000000  <-- Starting addr of Dynamic allocations
+    dynamic_size:412316860416          <-- Size of dynamic allocation window 
(384GB)
+    dynamic_pages_mapped:270           <-- Pages mapped by dynamic allocations
+    page_size:2097152                  <-- DMA window page size (2MB)
+    window_type:Hybrid                 <-- window has both pre-mapped and
+                                           dynamic sections
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index eafdd63cd6c4..e644c6e95301 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -90,6 +90,7 @@ struct iommu_pool {
        unsigned long start;
        unsigned long end;
        unsigned long hint;
+       unsigned long inuse;
        spinlock_t lock;
 } ____cacheline_aligned_in_smp;
 
@@ -319,5 +320,24 @@ extern unsigned long iommu_direction_to_tce_perm(enum 
dma_data_direction dir);
 
 extern const struct dma_map_ops dma_iommu_ops;
 
+/* used by sysfs when querying Dynamic/Default DMA Window data */
+struct dma_win_data {
+       u32     win_pgsize;
+       u64     direct_addr;
+       u64     direct_size;
+       u64     dynamic_addr;
+       u64     dynamic_size;
+       u32     dynamic_tces_inuse;
+       char    win_type[15];
+};
+
+#define SPAPR_SUCCESS       0
+#define SPAPR_NODMAWIN      -1
+#define SPAPR_NODDWWIN      -2
+#define SPAPR_ERROR         -3
+
+extern int gather_ddw_info(struct device *dev, struct dma_win_data *data);
+extern int gather_dma_info(struct device *dev, struct dma_win_data *data);
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_IOMMU_H */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 0ce71310b7d9..e3cf3701dd6e 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -339,6 +339,9 @@ static unsigned long iommu_range_alloc(struct device *dev,
        if (handle)
                *handle = end;
 
+       /* update use count */
+       pool->inuse += npages;
+
        spin_unlock_irqrestore(&(pool->lock), flags);
 
        return n;
@@ -452,6 +455,7 @@ static void __iommu_free(struct iommu_table *tbl, 
dma_addr_t dma_addr,
        tbl->it_ops->clear(tbl, entry, npages);
 
        spin_lock_irqsave(&(pool->lock), flags);
+       pool->inuse -= npages;
        bitmap_clear(tbl->it_map, free_entry, npages);
        spin_unlock_irqrestore(&(pool->lock), flags);
 }
@@ -759,6 +763,7 @@ struct iommu_table *iommu_init_table(struct iommu_table 
*tbl, int nid,
                p->start = tbl->poolsize * i;
                p->hint = p->start;
                p->end = p->start + tbl->poolsize;
+               p->inuse = 0;
        }
 
        p = &tbl->large_pool;
@@ -766,6 +771,7 @@ struct iommu_table *iommu_init_table(struct iommu_table 
*tbl, int nid,
        p->start = tbl->poolsize * i;
        p->hint = p->start;
        p->end = tbl->it_size;
+       p->inuse = 0;
 
        iommu_table_clear(tbl);
 
@@ -1269,6 +1275,233 @@ static const struct iommu_ops spapr_tce_iommu_ops = {
        .device_group = spapr_tce_iommu_device_group,
 };
 
+static inline const char *dma_win_error(int err)
+{
+       switch (err) {
+       case SPAPR_ERROR:
+               return "Error";
+       case SPAPR_NODMAWIN:
+               return "No Default DMA Window Found";
+       case SPAPR_NODDWWIN:
+               return "No Dynamic DMA Window Found";
+       default:
+               return "Unknown Result";
+       }
+}
+
+static ssize_t ddw_direct_address_show(struct device *dev,
+                                                                          
struct device_attribute *attr,
+                                                                          char 
*buf)
+{
+       int rc = 0;
+       struct dma_win_data data;
+
+       rc = gather_ddw_info(dev, &data);
+
+       if (rc == SPAPR_SUCCESS)
+               return sysfs_emit(buf, "%#llx\n", data.direct_addr);
+       else
+               return sysfs_emit(buf, "%s\n", dma_win_error(rc));
+}
+
+static ssize_t ddw_dynamic_address_show(struct device *dev,
+                                                                               
struct device_attribute *attr,
+                                                                               
char *buf)
+{
+       int rc = 0;
+       struct dma_win_data data;
+
+       rc = gather_ddw_info(dev, &data);
+
+       if (rc == SPAPR_SUCCESS)
+               return sysfs_emit(buf, "%#llx\n", data.dynamic_addr);
+       else
+               return sysfs_emit(buf, "%s\n", dma_win_error(rc));
+}
+
+static ssize_t ddw_direct_size_show(struct device *dev,
+                                                                       struct 
device_attribute *attr,
+                                                                       char 
*buf)
+{
+       int rc = 0;
+       struct dma_win_data data;
+
+       rc = gather_ddw_info(dev, &data);
+
+       if (rc == SPAPR_SUCCESS)
+               return sysfs_emit(buf, "%lld\n", data.direct_size);
+       else
+               return sysfs_emit(buf, "%s\n", dma_win_error(rc));
+}
+
+static ssize_t ddw_dynamic_size_show(struct device *dev,
+                                                                        struct 
device_attribute *attr,
+                                                                        char 
*buf)
+{
+       int rc = 0;
+       struct dma_win_data data;
+
+       rc = gather_ddw_info(dev, &data);
+
+       if (rc == SPAPR_SUCCESS)
+               return sysfs_emit(buf, "%lld\n", data.dynamic_size);
+       else
+               return sysfs_emit(buf, "%s\n", dma_win_error(rc));
+}
+
+static ssize_t ddw_page_size_show(struct device *dev,
+                                                                 struct 
device_attribute *attr,
+                                                                 char *buf)
+{
+       int rc = 0;
+       struct dma_win_data data;
+
+       rc = gather_ddw_info(dev, &data);
+
+       if (rc == SPAPR_SUCCESS)
+               return sysfs_emit(buf, "%d\n", data.win_pgsize);
+       else
+               return sysfs_emit(buf, "%s\n", dma_win_error(rc));
+}
+
+static ssize_t ddw_window_type_show(struct device *dev,
+                                                                       struct 
device_attribute *attr,
+                                                                       char 
*buf)
+{
+       int rc = 0;
+       struct dma_win_data data;
+
+       rc = gather_ddw_info(dev, &data);
+
+       if (rc == SPAPR_SUCCESS)
+               return sysfs_emit(buf, "%s\n", data.win_type);
+       else
+               return sysfs_emit(buf, "%s\n", dma_win_error(rc));
+}
+
+static ssize_t ddw_dynamic_pages_mapped_show(struct device *dev,
+                                                                               
         struct device_attribute *attr,
+                                                                               
         char *buf)
+{
+       int rc = 0;
+       struct dma_win_data data;
+
+       rc = gather_ddw_info(dev, &data);
+
+       if (rc == SPAPR_SUCCESS)
+               return sysfs_emit(buf, "%d\n", data.dynamic_tces_inuse);
+       else
+               return sysfs_emit(buf, "%s\n", dma_win_error(rc));
+}
+
+static ssize_t dma_dynamic_address_show(struct device *dev,
+                                                                               
struct device_attribute *attr,
+                                                                               
char *buf)
+{
+       int rc = 0;
+       struct dma_win_data data;
+
+       rc = gather_dma_info(dev, &data);
+
+       if (rc == SPAPR_SUCCESS)
+               return sysfs_emit(buf, "%#llx\n", data.dynamic_addr);
+       else
+               return sysfs_emit(buf, "%s\n", dma_win_error(rc));
+}
+
+static ssize_t dma_dynamic_size_show(struct device *dev,
+                                                                        struct 
device_attribute *attr,
+                                                                        char 
*buf)
+{
+       int rc = 0;
+       struct dma_win_data data;
+
+       rc = gather_dma_info(dev, &data);
+
+       if (rc == SPAPR_SUCCESS)
+               return sysfs_emit(buf, "%lld\n", data.dynamic_size);
+       else
+               return sysfs_emit(buf, "%s\n", dma_win_error(rc));
+}
+
+static ssize_t dma_page_size_show(struct device *dev,
+                                                                 struct 
device_attribute *attr,
+                                                                 char *buf)
+{
+       int rc = 0;
+       struct dma_win_data data;
+
+       rc = gather_dma_info(dev, &data);
+
+       if (rc == SPAPR_SUCCESS)
+               return sysfs_emit(buf, "%d\n", data.win_pgsize);
+       else
+               return sysfs_emit(buf, "%s\n", dma_win_error(rc));
+}
+
+static ssize_t dma_dynamic_pages_mapped_show(struct device *dev,
+                                                                               
         struct device_attribute *attr,
+                                                                               
         char *buf)
+{
+       int rc = 0;
+       struct dma_win_data data;
+
+       rc = gather_dma_info(dev, &data);
+
+       if (rc == SPAPR_SUCCESS)
+               return sysfs_emit(buf, "%d\n", data.dynamic_tces_inuse);
+       else
+               return sysfs_emit(buf, "%s\n", dma_win_error(rc));
+}
+
+#define DEVICE_ATTR_DDW(_name)                              \
+               struct device_attribute dev_attr_ddw_##_name =      \
+                       __ATTR(_name, 0444, ddw_##_name##_show, NULL)
+#define DEVICE_ATTR_DMA(_name)                              \
+               struct device_attribute dev_attr_dma_##_name =      \
+               __ATTR(_name, 0444, dma_##_name##_show, NULL)
+
+static DEVICE_ATTR_DDW(direct_address);
+static DEVICE_ATTR_DDW(direct_size);
+static DEVICE_ATTR_DDW(page_size);
+static DEVICE_ATTR_DDW(window_type);
+static DEVICE_ATTR_DDW(dynamic_address);
+static DEVICE_ATTR_DDW(dynamic_size);
+static DEVICE_ATTR_DDW(dynamic_pages_mapped);
+static DEVICE_ATTR_DMA(dynamic_address);
+static DEVICE_ATTR_DMA(dynamic_size);
+static DEVICE_ATTR_DMA(page_size);
+static DEVICE_ATTR_DMA(dynamic_pages_mapped);
+
+static struct attribute *spapr_tce_ddw_attrs[] = {
+       &dev_attr_ddw_direct_address.attr,
+       &dev_attr_ddw_direct_size.attr,
+       &dev_attr_ddw_page_size.attr,
+       &dev_attr_ddw_window_type.attr,
+       &dev_attr_ddw_dynamic_address.attr,
+       &dev_attr_ddw_dynamic_size.attr,
+       &dev_attr_ddw_dynamic_pages_mapped.attr,
+       NULL,
+};
+
+static struct attribute *spapr_tce_dma_attrs[] = {
+       &dev_attr_dma_dynamic_address.attr,
+       &dev_attr_dma_dynamic_size.attr,
+       &dev_attr_dma_page_size.attr,
+       &dev_attr_dma_dynamic_pages_mapped.attr,
+       NULL,
+};
+
+static struct attribute_group spapr_tce_ddw_group = {
+       .name = "spapr-tce-ddw",
+       .attrs = spapr_tce_ddw_attrs,
+};
+
+static struct attribute_group spapr_tce_dma_group = {
+       .name = "spapr-tce-dma",
+       .attrs = spapr_tce_dma_attrs,
+};
+
 static struct attribute *spapr_tce_iommu_attrs[] = {
        NULL,
 };
@@ -1280,6 +1513,8 @@ static struct attribute_group spapr_tce_iommu_group = {
 
 static const struct attribute_group *spapr_tce_iommu_groups[] = {
        &spapr_tce_iommu_group,
+       &spapr_tce_ddw_group,
+       &spapr_tce_dma_group,
        NULL,
 };
 
diff --git a/arch/powerpc/platforms/pseries/iommu.c 
b/arch/powerpc/platforms/pseries/iommu.c
index 5497b130e026..5d04b50ae265 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -837,6 +837,162 @@ static struct device_node *pci_dma_find(struct 
device_node *dn,
        return rdn;
 }
 
+static unsigned long iommu_table_inuse_tces(struct iommu_table *tbl)
+{
+       struct iommu_pool *pool;
+       unsigned long ntces = 0;
+
+       /* Number of TCEs in-use */
+       for (int i = 0; i < tbl->nr_pools; i++) {
+               pool = &tbl->pools[i];
+               ntces += pool->inuse;
+       }
+
+       pool = &tbl->large_pool;
+       ntces += pool->inuse;
+
+       return ntces;
+}
+
+/* Get DDW information for the device */
+int gather_ddw_info(struct device *dev, struct dma_win_data *data)
+{
+       struct iommu_device *iommu;
+       struct pci_controller *phb;
+       struct device_node *dn;
+       struct pci_dn *pci;
+       const __be32 *prop = NULL;
+       bool ddw_direct = false;
+       bool found = false;
+       struct iommu_table *tbl;
+       u32 pgshift;
+       struct dynamic_dma_window_prop *p;
+
+       memset(data, 0, sizeof(*data));
+
+       iommu = dev_get_drvdata(dev);
+       phb = container_of(iommu, struct pci_controller, iommu);
+       dn = phb->dn;
+
+       if (!dn)
+               return SPAPR_ERROR;
+
+       pci = PCI_DN(dn);
+       if (!pci || !pci->table_group)
+               return SPAPR_ERROR;
+
+       /* Find DDW */
+       prop = of_get_property(dn, DIRECT64_PROPNAME, NULL);
+       if (prop) {
+               ddw_direct = true;
+               found = true;
+       } else {
+               prop = of_get_property(dn, DMA64_PROPNAME, NULL);
+               if (prop)
+                       found = true;
+       }
+
+       /* NO DDW */
+       if (!found)
+               return SPAPR_NODDWWIN;
+
+       p = (struct dynamic_dma_window_prop *)prop;
+
+       pgshift = be32_to_cpu(p->tce_shift);
+       if (pgshift != 0xc && pgshift != 0x10 && pgshift != 0x15)
+               data->win_pgsize = 0;
+       else
+               data->win_pgsize = 1 << pgshift;
+
+       /* Check if DDW has table associated with it. Having a table associated 
with
+        * DDW is indicative that is has some dynamic TCE allocations. In this 
case the
+        * DDW can be fully Dynamic or in Hybrid mode. For SR-IOV DDW is on 
index 0,
+        * for dedicated adapter on index 1.
+        */
+       found = false;
+       for (int i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+               tbl = pci->table_group->tables[i];
+
+               if (tbl && tbl->it_index == be32_to_cpu(p->liobn)) {
+                       found = true;
+                       break;
+               }
+       }
+
+       /* set the parameters depnding on the DDW type */
+       if (ddw_direct && found) {          /* Hybrid */
+               data->direct_addr = be64_to_cpu(p->dma_base);
+               data->dynamic_size = (u64)(tbl->it_size << tbl->it_page_shift);
+
+               data->dynamic_addr = data->direct_addr
+                                                               + (u64)(1UL << 
be32_to_cpu(p->window_shift))
+                                                               - 
data->dynamic_size;
+
+               data->direct_size = data->dynamic_addr - data->direct_addr;
+               data->dynamic_tces_inuse = iommu_table_inuse_tces(tbl);
+
+               sprintf(data->win_type, "%s", "Hybrid");
+       } else if (ddw_direct && !found) {    /* Direct */
+               data->direct_addr = be64_to_cpu(p->dma_base);
+               data->direct_size = (u64)(1UL << be32_to_cpu(p->window_shift));
+
+               sprintf(data->win_type, "%s", "Direct");
+       } else {                              /* Dynamic */
+               data->dynamic_addr = be64_to_cpu(p->dma_base);
+               data->dynamic_size = (u64)(1UL << be32_to_cpu(p->window_shift));
+               data->dynamic_tces_inuse = iommu_table_inuse_tces(tbl);
+
+               sprintf(data->win_type, "%s", "Dynamic");
+       }
+
+       return SPAPR_SUCCESS;
+}
+
+/* Get DDW information for the device */
+int gather_dma_info(struct device *dev, struct dma_win_data *data)
+{
+       struct iommu_device *iommu;
+       struct pci_controller *phb;
+       struct device_node *dn;
+       struct pci_dn *pci;
+       const __be32 *prop = NULL;
+       struct iommu_table *tbl;
+       unsigned long offset, size, liobn;
+
+       memset(data, 0, sizeof(*data));
+
+       iommu = dev_get_drvdata(dev);
+       phb = container_of(iommu, struct pci_controller, iommu);
+       dn = phb->dn;
+
+       if (!dn)
+               return SPAPR_ERROR;
+
+       pci = PCI_DN(dn);
+       if (!pci || !pci->table_group)
+               return SPAPR_ERROR;
+
+       /* search for default DMA window */
+       prop = of_get_property(dn, "ibm,dma-window", NULL);
+
+       if (!prop)
+               return SPAPR_NODMAWIN;
+
+       /* default DMA Window is always at index 0 */
+       tbl = pci->table_group->tables[0];
+       if (!tbl)
+               return SPAPR_ERROR;
+
+       of_parse_dma_window(dn, prop, &liobn, &offset, &size);
+
+       data->dynamic_addr = offset;
+       data->dynamic_size = size;
+       data->win_pgsize = 1ULL << IOMMU_PAGE_SHIFT_4K;
+       data->dynamic_tces_inuse = iommu_table_inuse_tces(tbl);
+
+       return SPAPR_SUCCESS;
+}
+
 static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 {
        struct iommu_table *tbl;

base-commit: 192c0159402e6bfbe13de6f8379546943297783d
-- 
2.39.3


Reply via email to