Currently dmar_fault handler only prints a message in the dmesg. This
commit introduces counters - how many faults have happened, and
exposes them via sysfs. Each pci device will have an entry
'dmar_faults' reading from which will give user 3 lines
  remap: xxx
  read: xxx
  write: xxx

This functionality is targeted for health monitoring daemons.

Signed-off-by: Yuri Volchkov <volch...@amazon.de>
---
 drivers/iommu/dmar.c        | 133 +++++++++++++++++++++++++++++++-----
 drivers/pci/pci-sysfs.c     |  20 ++++++
 include/linux/intel-iommu.h |   3 +
 include/linux/pci.h         |  11 +++
 4 files changed, 150 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index eecd6a421667..0749873e3e41 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1107,6 +1107,7 @@ static void free_iommu(struct intel_iommu *iommu)
        }
 
        if (iommu->irq) {
+               destroy_workqueue(iommu->fault_wq);
                if (iommu->pr_irq) {
                        free_irq(iommu->pr_irq, iommu);
                        dmar_free_hwirq(iommu->pr_irq);
@@ -1672,9 +1673,46 @@ void dmar_msi_read(int irq, struct msi_msg *msg)
        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 }
 
-static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
-               u8 fault_reason, int pasid, u16 source_id,
-               unsigned long long addr)
+struct dmar_fault_info {
+       struct work_struct work;
+       struct intel_iommu *iommu;
+       int type;
+       int pasid;
+       u16 source_id;
+       unsigned long long addr;
+       u8 fault_reason;
+};
+
+static struct kmem_cache *dmar_fault_info_cache;
+int __init dmar_fault_info_cache_init(void)
+{
+       int ret = 0;
+
+       dmar_fault_info_cache =
+               kmem_cache_create("dmar_fault_info",
+                                 sizeof(struct dmar_fault_info), 0,
+                                 SLAB_HWCACHE_ALIGN, NULL);
+       if (!dmar_fault_info_cache) {
+               pr_err("Couldn't create dmar_fault_info cache\n");
+               ret = -ENOMEM;
+       }
+
+       return ret;
+}
+
+static inline void *alloc_dmar_fault_info(void)
+{
+       return kmem_cache_alloc(dmar_fault_info_cache, GFP_ATOMIC);
+}
+
+static inline void free_dmar_fault_info(void *vaddr)
+{
+       kmem_cache_free(dmar_fault_info_cache, vaddr);
+}
+
+static int dmar_fault_dump_one(struct intel_iommu *iommu, int type,
+                              u8 fault_reason, int pasid, u16 source_id,
+                              unsigned long long addr)
 {
        const char *reason;
        int fault_type;
@@ -1695,6 +1733,57 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, 
int type,
        return 0;
 }
 
+static int dmar_fault_handle_one(struct dmar_fault_info *info)
+{
+       struct pci_dev *pdev;
+       u8 devfn;
+       atomic_t *pcnt;
+
+       devfn = PCI_DEVFN(PCI_SLOT(info->source_id), PCI_FUNC(info->source_id));
+       pdev = pci_get_domain_bus_and_slot(info->iommu->segment,
+                                          PCI_BUS_NUM(info->source_id), devfn);
+
+       if (!pdev)
+               return -ENXIO;
+
+       if (info->fault_reason == INTR_REMAP)
+               pcnt = &pdev->faults_cnt.remap;
+       else if (info->type)
+               pcnt = &pdev->faults_cnt.read;
+       else
+               pcnt = &pdev->faults_cnt.write;
+
+       atomic_inc(pcnt);
+       return 0;
+}
+
+static void dmar_fault_handle_work(struct work_struct *work)
+{
+       struct dmar_fault_info *info;
+
+       info = container_of(work, struct dmar_fault_info, work);
+
+       dmar_fault_handle_one(info);
+       free_dmar_fault_info(info);
+}
+
+static int dmar_fault_schedule_one(struct intel_iommu *iommu, int type,
+                                  u8 fault_reason, int pasid, u16 source_id,
+                                  unsigned long long addr)
+{
+       struct dmar_fault_info *info = alloc_dmar_fault_info();
+
+       INIT_WORK(&info->work, dmar_fault_handle_work);
+       info->iommu = iommu;
+       info->type = type;
+       info->fault_reason = fault_reason;
+       info->pasid = pasid;
+       info->source_id = source_id;
+       info->addr = addr;
+
+       return queue_work(iommu->fault_wq, &info->work);
+}
+
 #define PRIMARY_FAULT_REG_LEN (16)
 irqreturn_t dmar_fault(int irq, void *dev_id)
 {
@@ -1733,20 +1822,18 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
                if (!(data & DMA_FRCD_F))
                        break;
 
-               if (!ratelimited) {
-                       fault_reason = dma_frcd_fault_reason(data);
-                       type = dma_frcd_type(data);
+               fault_reason = dma_frcd_fault_reason(data);
+               type = dma_frcd_type(data);
 
-                       pasid = dma_frcd_pasid_value(data);
-                       data = readl(iommu->reg + reg +
-                                    fault_index * PRIMARY_FAULT_REG_LEN + 8);
-                       source_id = dma_frcd_source_id(data);
+               pasid = dma_frcd_pasid_value(data);
+               data = readl(iommu->reg + reg +
+                            fault_index * PRIMARY_FAULT_REG_LEN + 8);
+               source_id = dma_frcd_source_id(data);
 
-                       pasid_present = dma_frcd_pasid_present(data);
-                       guest_addr = dmar_readq(iommu->reg + reg +
+               pasid_present = dma_frcd_pasid_present(data);
+               guest_addr = dmar_readq(iommu->reg + reg +
                                        fault_index * PRIMARY_FAULT_REG_LEN);
-                       guest_addr = dma_frcd_page_addr(guest_addr);
-               }
+               guest_addr = dma_frcd_page_addr(guest_addr);
 
                /* clear the fault */
                writel(DMA_FRCD_F, iommu->reg + reg +
@@ -1756,9 +1843,13 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 
                if (!ratelimited)
                        /* Using pasid -1 if pasid is not present */
-                       dmar_fault_do_one(iommu, type, fault_reason,
-                                         pasid_present ? pasid : -1,
-                                         source_id, guest_addr);
+                       dmar_fault_dump_one(iommu, type, fault_reason,
+                                           pasid_present ? pasid : -1,
+                                           source_id, guest_addr);
+               if (irq != -1)
+                       dmar_fault_schedule_one(iommu, type, fault_reason,
+                                               pasid_present ? pasid : -1,
+                                               source_id, guest_addr);
 
                fault_index++;
                if (fault_index >= cap_num_fault_regs(iommu->cap))
@@ -1784,6 +1875,10 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
        if (iommu->irq)
                return 0;
 
+       iommu->fault_wq = alloc_ordered_workqueue("fault_%s", 0, iommu->name);
+       if (!iommu->fault_wq)
+               return -ENOMEM;
+
        irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
        if (irq > 0) {
                iommu->irq = irq;
@@ -1803,6 +1898,9 @@ int __init enable_drhd_fault_handling(void)
        struct dmar_drhd_unit *drhd;
        struct intel_iommu *iommu;
 
+       if (dmar_fault_info_cache_init())
+               return -1;
+
        /*
         * Enable fault control interrupt.
         */
@@ -1813,6 +1911,7 @@ int __init enable_drhd_fault_handling(void)
                if (ret) {
                        pr_err("DRHD %Lx: failed to enable fault, interrupt, 
ret %d\n",
                               (unsigned long long)drhd->reg_base_addr, ret);
+                       kmem_cache_destroy(dmar_fault_info_cache);
                        return -1;
                }
 
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 07bd84c50238..d01514fca6d1 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -263,6 +263,23 @@ static ssize_t ari_enabled_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(ari_enabled);
 
+#ifdef CONFIG_DMAR_TABLE
+static ssize_t dmar_faults_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+       int remap, read, write;
+
+       remap = atomic_xchg(&pdev->faults_cnt.remap, 0);
+       read = atomic_xchg(&pdev->faults_cnt.read, 0);
+       write = atomic_xchg(&pdev->faults_cnt.write, 0);
+
+       return sprintf(buf, "remap: %d\nread: %d\nwrite: %d\n", remap, read,
+                      write);
+}
+static DEVICE_ATTR_RO(dmar_faults);
+#endif
+
 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
                             char *buf)
 {
@@ -623,6 +640,9 @@ static struct attribute *pci_dev_attrs[] = {
 #endif
        &dev_attr_driver_override.attr,
        &dev_attr_ari_enabled.attr,
+#ifdef CONFIG_DMAR_TABLE
+       &dev_attr_dmar_faults.attr,
+#endif
        NULL,
 };
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index ed11ef594378..f8963c833fb0 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -552,6 +552,9 @@ struct intel_iommu {
        struct iommu_device iommu;  /* IOMMU core code handle */
        int             node;
        u32             flags;      /* Software defined flags */
+#ifdef CONFIG_DMAR_TABLE
+       struct workqueue_struct *fault_wq; /* Fault handler */
+#endif
 };
 
 /* PCI domain-device relationship */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 14efa2586a8c..d9cc94fbf0ee 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -286,6 +286,14 @@ struct pci_vpd;
 struct pci_sriov;
 struct pci_p2pdma;
 
+#ifdef CONFIG_DMAR_TABLE
+struct pci_dmar_faults_cnt {
+       atomic_t read; /* How many read faults occurred */
+       atomic_t write; /* How many write faults occurred */
+       atomic_t remap; /* How many remap faults occurred */
+};
+#endif
+
 /* The pci_dev structure describes PCI devices */
 struct pci_dev {
        struct list_head bus_list;      /* Node in per-bus list */
@@ -468,6 +476,9 @@ struct pci_dev {
        char            *driver_override; /* Driver name to force a match */
 
        unsigned long   priv_flags;     /* Private flags for the PCI driver */
+#ifdef CONFIG_DMAR_TABLE
+       struct pci_dmar_faults_cnt faults_cnt; /* Dmar fault statistics */
+#endif
 };
 
 static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
-- 
2.23.0




Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Ralf Herbrich
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879



Reply via email to