[Patch V2 06/13] platform-msi: Add device MSI infrastructure

2021-02-26 Thread Megha Dey
From: Thomas Gleixner 

Add device specific MSI domain infrastructure for devices which have their
own resource management and interrupt chip. These devices are not related
to PCI and contrary to platform MSI they do not share a common resource and
interrupt chip. They provide their own domain specific resource management
and interrupt chip.

This utilizes the new alloc/free override in a non evil way which avoids
having yet another set of specialized alloc/free functions. Just using
msi_domain_alloc/free_irqs() is sufficient

While initially it was suggested and tried to piggyback device MSI on
platform MSI, the better variant is to reimplement platform MSI on top of
device MSI.

Reviewed-by: Tony Luck 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 drivers/base/platform-msi.c | 131 
 include/linux/irqdomain.h   |   1 +
 include/linux/msi.h |  24 
 kernel/irq/Kconfig  |   4 ++
 4 files changed, 160 insertions(+)

diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 9d9ccfc..6127b3b 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -419,3 +419,134 @@ int platform_msi_domain_alloc(struct irq_domain *domain, 
unsigned int virq,
 
return err;
 }
+
+#ifdef CONFIG_DEVICE_MSI
+/*
+ * Device specific MSI domain infrastructure for devices which have their
+ * own resource management and interrupt chip. These devices are not
+ * related to PCI and contrary to platform MSI they do not share a common
+ * resource and interrupt chip. They provide their own domain specific
+ * resource management and interrupt chip.
+ */
+
+static void device_msi_free_msi_entries(struct device *dev)
+{
+   struct list_head *msi_list = dev_to_msi_list(dev);
+   struct msi_desc *entry, *tmp;
+
+   list_for_each_entry_safe(entry, tmp, msi_list, list) {
+   list_del(>list);
+   free_msi_entry(entry);
+   }
+}
+
+/**
+ * device_msi_free_irqs - Free MSI interrupts assigned to  a device
+ * @dev:   Pointer to the device
+ *
+ * Frees the interrupt and the MSI descriptors.
+ */
+static void device_msi_free_irqs(struct irq_domain *domain, struct device *dev)
+{
+   __msi_domain_free_irqs(domain, dev);
+   device_msi_free_msi_entries(dev);
+}
+
+/**
+ * device_msi_alloc_irqs - Allocate MSI interrupts for a device
+ * @dev:   Pointer to the device
+ * @nvec:  Number of vectors
+ *
+ * Allocates the required number of MSI descriptors and the corresponding
+ * interrupt descriptors.
+ */
+static int device_msi_alloc_irqs(struct irq_domain *domain, struct device 
*dev, int nvec)
+{
+   int i, ret = -ENOMEM;
+
+   for (i = 0; i < nvec; i++) {
+   struct msi_desc *entry = alloc_msi_entry(dev, 1, NULL);
+
+   if (!entry)
+   goto fail;
+   list_add_tail(>list, dev_to_msi_list(dev));
+   }
+
+   ret = __msi_domain_alloc_irqs(domain, dev, nvec);
+   if (!ret)
+   return 0;
+fail:
+   device_msi_free_msi_entries(dev);
+   return ret;
+}
+
+static void device_msi_update_dom_ops(struct msi_domain_info *info)
+{
+   if (!info->ops->domain_alloc_irqs)
+   info->ops->domain_alloc_irqs = device_msi_alloc_irqs;
+   if (!info->ops->domain_free_irqs)
+   info->ops->domain_free_irqs = device_msi_free_irqs;
+   if (!info->ops->msi_prepare)
+   info->ops->msi_prepare = arch_msi_prepare;
+}
+
+/**
+ * device_msi_create_msi_irq_domain - Create an irq domain for devices
+ * @fwnode:Firmware node of the interrupt controller
+ * @info:  MSI domain info to configure the new domain
+ * @parent:Parent domain
+ */
+struct irq_domain *device_msi_create_irq_domain(struct fwnode_handle *fn,
+   struct msi_domain_info *info,
+   struct irq_domain *parent)
+{
+   struct irq_domain *domain;
+
+   if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
+   platform_msi_update_chip_ops(info);
+
+   if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
+   device_msi_update_dom_ops(info);
+
+   msi_domain_set_default_info_flags(info);
+
+   domain = msi_create_irq_domain(fn, info, parent);
+   if (domain)
+   irq_domain_update_bus_token(domain, DOMAIN_BUS_DEVICE_MSI);
+   return domain;
+}
+
+#ifdef CONFIG_PCI
+#include 
+
+/**
+ * pci_subdevice_msi_create_irq_domain - Create an irq domain for subdevices
+ * @pdev:  Pointer to PCI device for which the subdevice domain is created
+ * @info:  MSI domain info to configure the new domain
+ */
+struct irq_domain *pci_subdevice_msi_create_irq_domain(struct pci_dev *pdev,
+  struct msi_domain_info 
*info)
+{
+   struct irq_

[Patch V2 11/13] platform-msi: Add platform check for subdevice irq domain

2021-02-26 Thread Megha Dey
From: Lu Baolu 

The pci_subdevice_msi_create_irq_domain() should fail if the underlying
platform is not able to support IMS (Interrupt Message Storage). Otherwise,
the isolation of interrupt is not guaranteed.

For x86, IMS is only supported on bare metal for now. We could enable it
in the virtualization environments in the future if interrupt HYPERCALL
domain is supported or the hardware has the capability of interrupt
isolation for subdevices.

Cc: David Woodhouse 
Cc: Leon Romanovsky 
Cc: Kevin Tian 
Suggested-by: Thomas Gleixner 
Link: https://lore.kernel.org/linux-pci/87pn4nk7nn@nanos.tec.linutronix.de/
Link: https://lore.kernel.org/linux-pci/877dqrnzr3@nanos.tec.linutronix.de/
Link: https://lore.kernel.org/linux-pci/877dqqmc2h@nanos.tec.linutronix.de/
Reviewed-by: Tony Luck 
Signed-off-by: Lu Baolu 
Signed-off-by: Megha Dey 
---
 arch/x86/pci/common.c   | 72 +
 drivers/base/platform-msi.c |  8 +
 include/linux/msi.h |  2 ++
 3 files changed, 82 insertions(+)

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 3507f45..64daa6a 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -12,6 +12,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
@@ -724,3 +726,73 @@ struct pci_dev *pci_real_dma_dev(struct pci_dev *dev)
return dev;
 }
 #endif
+
+/*
+ * We want to figure out which context we are running in. But the hardware
+ * does not introduce a reliable way (instruction, CPUID leaf, MSR, whatever)
+ * which can be manipulated by the VMM to let the OS figure out where it runs.
+ * So we go with the below probably on_bare_metal() function as a replacement
+ * for definitely on_bare_metal() to go forward only for the very simple reason
+ * that this is the only option we have.
+ */
+static const char * const vmm_vendor_name[] = {
+   "QEMU", "Bochs", "KVM", "Xen", "VMware", "VMW", "VMware Inc.",
+   "innotek GmbH", "Oracle Corporation", "Parallels", "BHYVE"
+};
+
+static void read_type0_virtual_machine(const struct dmi_header *dm, void *p)
+{
+   u8 *data = (u8 *)dm + 0x13;
+
+   /* BIOS Information (Type 0) */
+   if (dm->type != 0 || dm->length < 0x14)
+   return;
+
+   /* Bit 4 of BIOS Characteristics Extension Byte 2*/
+   if (*data & BIT(4))
+   *((bool *)p) = true;
+}
+
+static bool smbios_virtual_machine(void)
+{
+   bool bit_present = false;
+
+   dmi_walk(read_type0_virtual_machine, _present);
+
+   return bit_present;
+}
+
+static bool on_bare_metal(struct device *dev)
+{
+   int i;
+
+   if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+   return false;
+
+   if (smbios_virtual_machine())
+   return false;
+
+   if (iommu_capable(dev->bus, IOMMU_CAP_VIOMMU_HINT))
+   return false;
+
+   for (i = 0; i < ARRAY_SIZE(vmm_vendor_name); i++)
+   if (dmi_match(DMI_SYS_VENDOR, vmm_vendor_name[i]))
+   return false;
+
+   pr_info("System running on bare metal, report to bugzilla.kernel.org if 
not the case.");
+
+   return true;
+}
+
+bool arch_support_pci_device_msi(struct pci_dev *pdev)
+{
+   /*
+* When we are running in a VMM context, the device IMS could only be
+* enabled when the underlying hardware supports interrupt isolation
+* of the subdevice, or any mechanism (trap, hypercall) is added so
+* that changes in the interrupt message store could be managed by the
+* VMM. For now, we only support the device IMS when we are running on
+* the bare metal.
+*/
+   return on_bare_metal(>dev);
+}
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 6127b3b..c4a0d9c 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -519,6 +519,11 @@ struct irq_domain *device_msi_create_irq_domain(struct 
fwnode_handle *fn,
 #ifdef CONFIG_PCI
 #include 
 
+bool __weak arch_support_pci_device_msi(struct pci_dev *pdev)
+{
+   return false;
+}
+
 /**
  * pci_subdevice_msi_create_irq_domain - Create an irq domain for subdevices
  * @pdev:  Pointer to PCI device for which the subdevice domain is created
@@ -530,6 +535,9 @@ struct irq_domain 
*pci_subdevice_msi_create_irq_domain(struct pci_dev *pdev,
struct irq_domain *domain, *pdev_msi;
struct fwnode_handle *fn;
 
+   if (!arch_support_pci_device_msi(pdev))
+   return NULL;
+
/*
 * Retrieve the MSI domain of the underlying PCI device's MSI
 * domain. The PCI device domain's parent domain is also the parent
diff --git a/include/linux/msi.h b/include/linux/msi.h
index e915932..24abec0 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -489,6 +489,8 @@ struct irq_d

[Patch V2 09/13] iommu/vt-d: Add DEV-MSI support

2021-02-26 Thread Megha Dey
Add required support in the interrupt remapping driver for devices
which generate dev-msi interrupts and use the intel remapping
domain as the parent domain. Set the source-id of all dev-msi
interrupt requests to the parent PCI device associated with it.

Reviewed-by: Tony Luck 
Signed-off-by: Megha Dey 
---
 drivers/iommu/intel/irq_remapping.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/irq_remapping.c 
b/drivers/iommu/intel/irq_remapping.c
index 611ef52..2a55e54 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1282,6 +1282,9 @@ static void intel_irq_remapping_prepare_irte(struct 
intel_ir_data *data,
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
set_msi_sid(irte, msi_desc_to_pci_dev(info->desc));
break;
+   case X86_IRQ_ALLOC_TYPE_DEV_MSI:
+   set_msi_sid(irte, to_pci_dev(info->desc->dev->parent));
+   break;
default:
BUG_ON(1);
break;
@@ -1325,7 +1328,8 @@ static int intel_irq_remapping_alloc(struct irq_domain 
*domain,
if (!info || !iommu)
return -EINVAL;
if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI &&
-   info->type != X86_IRQ_ALLOC_TYPE_PCI_MSIX)
+   info->type != X86_IRQ_ALLOC_TYPE_PCI_MSIX &&
+   info->type != X86_IRQ_ALLOC_TYPE_DEV_MSI)
return -EINVAL;
 
/*
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V2 00/13] Introduce dev-msi and interrupt message store

2021-02-26 Thread Megha Dey
ore
   calling dev_msi_alloc_irqs()
6. dev_msi_alloc/free_irqs() cannot be used for PCI devices
7. Followed the generic layering scheme: infrastructure bits->arch 
bits->enabling bits

V4:
1. Make interrupt remapping code more readable
2. Add flush writes to unmask/write and reset ims slots
3. Interrupt Message Storm-> Interrupt Message Store
4. Merge in pasid programming code.

Stage 3: Standalone dev-msi and IMS driver series
-
V1:(Changes from Stage 2 V4)[6]
1. Split dev-msi/IMS code from Dave Jiang’s IDXD patch series
2. Set the source-id of all dev-msi interrupt requests to the parent PCI device
3. Separated core irq code from IMS related code
4. Added missing set_desc ops to the IMS msi_domain_ops
5. Added more details in the commit message-test case for auxillary interrupt 
data
6. Updated the copyright year from 2020 to 2021
7. Updated cover letter
8. Add platform check for subdevice irq domain (Lu Baolu):
   V1->V2:
   - V1 patches:[4]
   - Rename probably_on_bare_metal() with on_bare_metal();
   - Some vendors might use the same name for both bare metal and virtual
 environment. Before we add vendor specific code to distinguish
 between them, let's return false in on_bare_metal(). This won't
 introduce any regression. The only impact is that the coming new
 platform msi feature won't be supported until the vendor specific code
 is provided.
   V2->V3:
   - V2 patches:[5]
   - Add all identified heuristics so far

V1->V2:
1. s/arch_support_pci_device_ims/arch_support_pci_device_msi/g
2. Remove CONFIG_DEVICE_MSI in arch/x86/pci/common.c
3. Added helper functions to get linux IRQ and dev-msi HW IRQ numbers
4. Change the caching mode logic from dynamic to static

Dave Jiang (1):
  genirq/msi: Provide helpers to return Linux IRQ/dev_msi hw IRQ number

Lu Baolu (2):
  iommu: Add capability IOMMU_CAP_VIOMMU_HINT
  platform-msi: Add platform check for subdevice irq domain

Megha Dey (3):
  genirq: Set auxiliary data for an interrupt
  iommu/vt-d: Add DEV-MSI support
  irqchip: Add IMS (Interrupt Message Store) driver

Thomas Gleixner (7):
  x86/irq: Add DEV_MSI allocation type
  x86/msi: Rename and rework pci_msi_prepare() to cover non-PCI MSI
  platform-msi: Provide default irq_chip:: Ack
  genirq/proc: Take buslock on affinity write
  genirq/msi: Provide and use msi_domain_set_default_info_flags()
  platform-msi: Add device MSI infrastructure
  irqdomain/msi: Provide msi_alloc/free_store() callbacks

 arch/x86/include/asm/hw_irq.h   |   1 +
 arch/x86/include/asm/msi.h  |   4 +-
 arch/x86/kernel/apic/msi.c  |  27 +++--
 arch/x86/pci/common.c   |  72 
 drivers/base/platform-msi.c | 141 
 drivers/iommu/amd/iommu.c   |   2 +
 drivers/iommu/intel/iommu.c |   5 +
 drivers/iommu/intel/irq_remapping.c |   6 +-
 drivers/iommu/virtio-iommu.c|   9 ++
 drivers/irqchip/Kconfig |  14 +++
 drivers/irqchip/Makefile|   1 +
 drivers/irqchip/irq-ims-msi.c   | 211 
 drivers/pci/controller/pci-hyperv.c |   2 +-
 drivers/pci/msi.c   |   7 +-
 include/linux/interrupt.h   |   2 +
 include/linux/iommu.h   |   2 +
 include/linux/irq.h |   4 +
 include/linux/irqchip/irq-ims-msi.h |  68 
 include/linux/irqdomain.h   |   1 +
 include/linux/msi.h |  41 +++
 kernel/irq/Kconfig  |   4 +
 kernel/irq/manage.c |  38 ++-
 kernel/irq/msi.c|  79 ++
 23 files changed, 722 insertions(+), 19 deletions(-)
 create mode 100644 drivers/irqchip/irq-ims-msi.c
 create mode 100644 include/linux/irqchip/irq-ims-msi.h

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[Patch V2 13/13] genirq/msi: Provide helpers to return Linux IRQ/dev_msi hw IRQ number

2021-02-26 Thread Megha Dey
From: Dave Jiang 

Add new helpers to get the Linux IRQ number and device specific index
for given device-relative vector so that the drivers don't need to
allocate their own arrays to keep track of the vectors and hwirq for
the multi vector device MSI case.

Reviewed-by: Tony Luck 
Signed-off-by: Dave Jiang 
Signed-off-by: Megha Dey 
---
 include/linux/msi.h |  2 ++
 kernel/irq/msi.c| 44 
 2 files changed, 46 insertions(+)

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 24abec0..d60a6ba 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -451,6 +451,8 @@ struct irq_domain *platform_msi_create_irq_domain(struct 
fwnode_handle *fwnode,
 int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
   irq_write_msi_msg_t write_msi_msg);
 void platform_msi_domain_free_irqs(struct device *dev);
+int msi_irq_vector(struct device *dev, unsigned int nr);
+int dev_msi_hwirq(struct device *dev, unsigned int nr);
 
 /* When an MSI domain is used as an intermediate domain */
 int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 047b59d..f2a8f55 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -581,4 +581,48 @@ struct msi_domain_info *msi_get_domain_info(struct 
irq_domain *domain)
return (struct msi_domain_info *)domain->host_data;
 }
 
+/**
+ * msi_irq_vector - Get the Linux IRQ number of a device vector
+ * @dev: device to operate on
+ * @nr: device-relative interrupt vector index (0-based).
+ *
+ * Returns the Linux IRQ number of a device vector.
+ */
+int msi_irq_vector(struct device *dev, unsigned int nr)
+{
+   struct msi_desc *entry;
+   int i = 0;
+
+   for_each_msi_entry(entry, dev) {
+   if (i == nr)
+   return entry->irq;
+   i++;
+   }
+   WARN_ON_ONCE(1);
+   return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(msi_irq_vector);
+
+/**
+ * dev_msi_hwirq - Get the device MSI hw IRQ number of a device vector
+ * @dev: device to operate on
+ * @nr: device-relative interrupt vector index (0-based).
+ *
+ * Return the dev_msi hw IRQ number of a device vector.
+ */
+int dev_msi_hwirq(struct device *dev, unsigned int nr)
+{
+   struct msi_desc *entry;
+   int i = 0;
+
+   for_each_msi_entry(entry, dev) {
+   if (i == nr)
+   return entry->device_msi.hwirq;
+   i++;
+   }
+   WARN_ON_ONCE(1);
+   return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(dev_msi_hwirq);
+
 #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V2 05/13] genirq/msi: Provide and use msi_domain_set_default_info_flags()

2021-02-26 Thread Megha Dey
From: Thomas Gleixner 

MSI interrupts have some common flags which should be set not only for
PCI/MSI interrupts.

Move the PCI/MSI flag setting into a common function so it can be reused.

Reviewed-by: Tony Luck 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 drivers/pci/msi.c   |  7 +--
 include/linux/msi.h |  1 +
 kernel/irq/msi.c| 24 
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 3162f88..20d2512 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1492,12 +1492,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct 
fwnode_handle *fwnode,
if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
pci_msi_domain_update_chip_ops(info);
 
-   info->flags |= MSI_FLAG_ACTIVATE_EARLY;
-   if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
-   info->flags |= MSI_FLAG_MUST_REACTIVATE;
-
-   /* PCI-MSI is oneshot-safe */
-   info->chip->flags |= IRQCHIP_ONESHOT_SAFE;
+   msi_domain_set_default_info_flags(info);
 
domain = msi_create_irq_domain(fwnode, info, parent);
if (!domain)
diff --git a/include/linux/msi.h b/include/linux/msi.h
index f3e54d2..f6e52de 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -454,6 +454,7 @@ int platform_msi_domain_alloc(struct irq_domain *domain, 
unsigned int virq,
 void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq,
  unsigned int nvec);
 void *platform_msi_get_host_data(struct irq_domain *domain);
+void msi_domain_set_default_info_flags(struct msi_domain_info *info);
 #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
 
 #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index b338d62..c54316d 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -70,6 +70,30 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg 
*msg)
 EXPORT_SYMBOL_GPL(get_cached_msi_msg);
 
 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+void msi_domain_set_default_info_flags(struct msi_domain_info *info)
+{
+   /* Required so that a device latches a valid MSI message on startup */
+   info->flags |= MSI_FLAG_ACTIVATE_EARLY;
+
+   /*
+* Interrupt reservation mode allows to stear the MSI message of an
+* inactive device to a special (usually spurious interrupt) target.
+* This allows to prevent interrupt vector exhaustion e.g. on x86.
+* But (PCI)MSI interrupts are activated early - see above - so the
+* interrupt request/startup sequence would not try to allocate a
+* usable vector which means that the device interrupts would end
+* up on the special vector and issue spurious interrupt messages.
+* Setting the reactivation flag ensures that when the interrupt
+* is requested the activation is invoked again so that a real
+* vector can be allocated.
+*/
+   if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
+   info->flags |= MSI_FLAG_MUST_REACTIVATE;
+
+   /* MSI is oneshot-safe at least in theory */
+   info->chip->flags |= IRQCHIP_ONESHOT_SAFE;
+}
+
 static inline void irq_chip_write_msi_msg(struct irq_data *data,
  struct msi_msg *msg)
 {
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V2 07/13] irqdomain/msi: Provide msi_alloc/free_store() callbacks

2021-02-26 Thread Megha Dey
From: Thomas Gleixner 

For devices which don't have a standard storage for MSI messages like the
upcoming IMS (Interrupt Message Store) it's required to allocate storage
space before allocating interrupts and after freeing them.

This could be achieved with the existing callbacks, but that would be
awkward because they operate on msi_alloc_info_t which is not uniform
across architectures. Also these callbacks are invoked per interrupt but
the allocation might have bulk requirements depending on the device.

As such devices can operate on different architectures it is simpler to
have separate callbacks which operate on struct device. The resulting
storage information has to be stored in struct msi_desc so the underlying
irq chip implementation can retrieve it for the relevant operations.

Reviewed-by: Tony Luck 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 include/linux/msi.h |  8 
 kernel/irq/msi.c| 11 +++
 2 files changed, 19 insertions(+)

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 46e879c..e915932 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -323,6 +323,10 @@ struct msi_domain_info;
  * function.
  * @domain_free_irqs:  Optional function to override the default free
  * function.
+ * @msi_alloc_store:   Optional callback to allocate storage in a device
+ * specific non-standard MSI store
+ * @msi_alloc_free:Optional callback to free storage in a device
+ * specific non-standard MSI store
  *
  * @get_hwirq, @msi_init and @msi_free are callbacks used by
  * msi_create_irq_domain() and related interfaces
@@ -372,6 +376,10 @@ struct msi_domain_ops {
 struct device *dev, int nvec);
void(*domain_free_irqs)(struct irq_domain *domain,
struct device *dev);
+   int (*msi_alloc_store)(struct irq_domain *domain,
+  struct device *dev, int nvec);
+   void(*msi_free_store)(struct irq_domain *domain,
+ struct device *dev);
 };
 
 /**
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index c54316d..047b59d 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -434,6 +434,12 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, 
struct device *dev,
if (ret)
return ret;
 
+   if (ops->msi_alloc_store) {
+   ret = ops->msi_alloc_store(domain, dev, nvec);
+   if (ret)
+   return ret;
+   }
+
for_each_msi_entry(desc, dev) {
ops->set_desc(, desc);
 
@@ -529,6 +535,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct 
device *dev,
 
 void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
 {
+   struct msi_domain_info *info = domain->host_data;
+   struct msi_domain_ops *ops = info->ops;
struct msi_desc *desc;
 
for_each_msi_entry(desc, dev) {
@@ -542,6 +550,9 @@ void __msi_domain_free_irqs(struct irq_domain *domain, 
struct device *dev)
desc->irq = 0;
}
}
+
+   if (ops->msi_free_store)
+   ops->msi_free_store(domain, dev);
 }
 
 /**
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V2 03/13] platform-msi: Provide default irq_chip:: Ack

2021-02-26 Thread Megha Dey
From: Thomas Gleixner 

For the upcoming device MSI support it's required to have a default
irq_chip::ack implementation (irq_chip_ack_parent) so the drivers do not
need to care.

Reviewed-by: Tony Luck 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 drivers/base/platform-msi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 2c1e2e0..9d9ccfc 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -101,6 +101,8 @@ static void platform_msi_update_chip_ops(struct 
msi_domain_info *info)
chip->irq_mask = irq_chip_mask_parent;
if (!chip->irq_unmask)
chip->irq_unmask = irq_chip_unmask_parent;
+   if (!chip->irq_ack)
+   chip->irq_ack = irq_chip_ack_parent;
if (!chip->irq_eoi)
chip->irq_eoi = irq_chip_eoi_parent;
if (!chip->irq_set_affinity)
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V2 12/13] irqchip: Add IMS (Interrupt Message Store) driver

2021-02-26 Thread Megha Dey
Generic IMS(Interrupt Message Store) irq chips and irq domain
implementations for IMS based devices which store the interrupt messages
in an array in device memory.

Allocation and freeing of interrupts happens via the generic
msi_domain_alloc/free_irqs() interface. No special purpose IMS magic
required as long as the interrupt domain is stored in the underlying
device struct. The irq_set_auxdata() is used to program the pasid into
the IMS entry.

[Megha: Fixed compile time errors
Added necessary dependencies to IMS_MSI_ARRAY config
Fixed polarity of IMS_VECTOR_CTRL
Added reads after writes to flush writes to device
Added set_desc ops to IMS msi domain ops
Tested the IMS infrastructure with the IDXD driver]

Reviewed-by: Tony Luck 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 drivers/irqchip/Kconfig |  14 +++
 drivers/irqchip/Makefile|   1 +
 drivers/irqchip/irq-ims-msi.c   | 211 
 include/linux/irqchip/irq-ims-msi.h |  68 
 4 files changed, 294 insertions(+)
 create mode 100644 drivers/irqchip/irq-ims-msi.c
 create mode 100644 include/linux/irqchip/irq-ims-msi.h

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index e74fa20..2fb0c24 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -586,4 +586,18 @@ config MST_IRQ
help
  Support MStar Interrupt Controller.
 
+config IMS_MSI
+   depends on PCI
+   select DEVICE_MSI
+   bool
+
+config IMS_MSI_ARRAY
+   bool "IMS Interrupt Message Store MSI controller for device memory 
storage arrays"
+   depends on PCI
+   select IMS_MSI
+   select GENERIC_MSI_IRQ_DOMAIN
+   help
+ Support for IMS Interrupt Message Store MSI controller
+ with IMS slot storage in a slot array in device memory
+
 endmenu
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index c59b95a..e903201 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -113,3 +113,4 @@ obj-$(CONFIG_LOONGSON_PCH_MSI)  += 
irq-loongson-pch-msi.o
 obj-$(CONFIG_MST_IRQ)  += irq-mst-intc.o
 obj-$(CONFIG_SL28CPLD_INTC)+= irq-sl28cpld.o
 obj-$(CONFIG_MACH_REALTEK_RTL) += irq-realtek-rtl.o
+obj-$(CONFIG_IMS_MSI)  += irq-ims-msi.o
diff --git a/drivers/irqchip/irq-ims-msi.c b/drivers/irqchip/irq-ims-msi.c
new file mode 100644
index 000..fa23207
--- /dev/null
+++ b/drivers/irqchip/irq-ims-msi.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+// (C) Copyright 2021 Thomas Gleixner 
+/*
+ * Shared interrupt chips and irq domains for IMS devices
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#ifdef CONFIG_IMS_MSI_ARRAY
+
+struct ims_array_data {
+   struct ims_array_info   info;
+   unsigned long   map[0];
+};
+
+static inline void iowrite32_and_flush(u32 value, void __iomem *addr)
+{
+   iowrite32(value, addr);
+   ioread32(addr);
+}
+
+static void ims_array_mask_irq(struct irq_data *data)
+{
+   struct msi_desc *desc = irq_data_get_msi_desc(data);
+   struct ims_slot __iomem *slot = desc->device_msi.priv_iomem;
+   u32 __iomem *ctrl = >ctrl;
+
+   iowrite32_and_flush(ioread32(ctrl) | IMS_CTRL_VECTOR_MASKBIT, ctrl);
+}
+
+static void ims_array_unmask_irq(struct irq_data *data)
+{
+   struct msi_desc *desc = irq_data_get_msi_desc(data);
+   struct ims_slot __iomem *slot = desc->device_msi.priv_iomem;
+   u32 __iomem *ctrl = >ctrl;
+
+   iowrite32_and_flush(ioread32(ctrl) & ~IMS_CTRL_VECTOR_MASKBIT, ctrl);
+}
+
+static void ims_array_write_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+   struct msi_desc *desc = irq_data_get_msi_desc(data);
+   struct ims_slot __iomem *slot = desc->device_msi.priv_iomem;
+
+   iowrite32(msg->address_lo, >address_lo);
+   iowrite32(msg->address_hi, >address_hi);
+   iowrite32_and_flush(msg->data, >data);
+}
+
+static int ims_array_set_auxdata(struct irq_data *data, unsigned int which,
+u64 auxval)
+{
+   struct msi_desc *desc = irq_data_get_msi_desc(data);
+   struct ims_slot __iomem *slot = desc->device_msi.priv_iomem;
+   u32 val, __iomem *ctrl = >ctrl;
+
+   if (which != IMS_AUXDATA_CONTROL_WORD)
+   return -EINVAL;
+   if (auxval & ~(u64)IMS_CONTROL_WORD_AUXMASK)
+   return -EINVAL;
+
+   val = ioread32(ctrl) & IMS_CONTROL_WORD_IRQMASK;
+   iowrite32_and_flush(val | (u32)auxval, ctrl);
+   return 0;
+}
+
+static const struct irq_chip ims_array_msi_controller = {
+   .name   = "IMS",
+   .irq_mask   = ims_array_mask_irq,
+   .irq_unmask = ims_array_unmask_irq,
+   .irq_write_msi_msg  = ims_array_write_msi_msg,
+   

[Patch V2 04/13] genirq/proc: Take buslock on affinity write

2021-02-26 Thread Megha Dey
From: Thomas Gleixner 

Until now interrupt chips which support setting affinity are not locking
the associated bus lock for two reasons:

 - All chips which support affinity setting do not use buslock because they
   just can operated directly on the hardware.

 - All chips which use buslock do not support affinity setting because
   their interrupt chips are not capable. These chips are usually connected
   over a bus like I2C, SPI etc. and have an interrupt output which is
   conneted to CPU interrupt of some sort. So there is no way to set the
   affinity on the chip itself.

Upcoming hardware which is PCIE based sports a non standard MSI(X) variant
which stores the MSI message in RAM which is associated to e.g. a device
queue. The device manages this RAM and writes have to be issued via command
queues or similar mechanisms which is obviously not possible from interrupt
disabled, raw spinlock held context.

The buslock mechanism of irq chips can be utilized to support that. The
affinity write to the chip writes to shadow state, marks it pending and the
irq chip's irq_bus_sync_unlock() callback handles the command queue and
wait for completion similar to the other chip operations on I2C or SPI
buses.

Change the locking in irq_set_affinity() to bus_lock/unlock to help with
that. There are a few other callers than the proc interface, but none of
them is affected by this change as none of them affects an irq chip with
bus lock support.

Reviewed-by: Tony Luck 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 kernel/irq/manage.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index dec3f73..85ede4e 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -443,16 +443,16 @@ int irq_update_affinity_desc(unsigned int irq,
 
 int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool 
force)
 {
-   struct irq_desc *desc = irq_to_desc(irq);
+   struct irq_desc *desc;
unsigned long flags;
int ret;
 
+   desc = irq_get_desc_buslock(irq, , IRQ_GET_DESC_CHECK_GLOBAL);
if (!desc)
return -EINVAL;
 
-   raw_spin_lock_irqsave(>lock, flags);
ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force);
-   raw_spin_unlock_irqrestore(>lock, flags);
+   irq_put_desc_busunlock(desc, flags);
return ret;
 }
 
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V2 10/13] iommu: Add capability IOMMU_CAP_VIOMMU_HINT

2021-02-26 Thread Megha Dey
From: Lu Baolu 

Some IOMMU specification defines some kind of hint mechanism, through
which BIOS can imply that OS runs in a virtualized environment. For
example, the caching mode defined in VT-d spec and NpCache capability
defined in the AMD IOMMU specification. This hint could also be used
outside of the IOMMU subsystem, where it could be used with other known
means (CPUID, smbios) to sense whether Linux is running in a virtualized
environment. Add a capability bit so that it could be used there.

Cc: Joerg Roedel 
Reviewed-by: Tony Luck 
Signed-off-by: Lu Baolu 
Signed-off-by: Megha Dey 
---
 drivers/iommu/amd/iommu.c| 2 ++
 drivers/iommu/intel/iommu.c  | 5 +
 drivers/iommu/virtio-iommu.c | 9 +
 include/linux/iommu.h| 2 ++
 4 files changed, 18 insertions(+)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index a69a8b5..a912318 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2140,6 +2140,8 @@ static bool amd_iommu_capable(enum iommu_cap cap)
return (irq_remapping_enabled == 1);
case IOMMU_CAP_NOEXEC:
return false;
+   case IOMMU_CAP_VIOMMU_HINT:
+   return amd_iommu_np_cache;
default:
break;
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index ee09323..55fa198 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -294,6 +294,7 @@ static inline void context_clear_entry(struct context_entry 
*context)
  */
 static struct dmar_domain *si_domain;
 static int hw_pass_through = 1;
+static int intel_caching_mode;
 
 #define for_each_domain_iommu(idx, domain) \
for (idx = 0; idx < g_num_of_iommus; idx++) \
@@ -3253,6 +3254,8 @@ static int __init init_dmars(void)
 
if (!ecap_pass_through(iommu->ecap))
hw_pass_through = 0;
+   if (cap_caching_mode(iommu->cap))
+   intel_caching_mode = 1;
intel_svm_check(iommu);
}
 
@@ -5113,6 +5116,8 @@ static bool intel_iommu_capable(enum iommu_cap cap)
return domain_update_iommu_snooping(NULL) == 1;
if (cap == IOMMU_CAP_INTR_REMAP)
return irq_remapping_enabled == 1;
+   if (cap == IOMMU_CAP_VIOMMU_HINT)
+   return intel_caching_mode;
 
return false;
 }
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 2bfdd57..e4941ca 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -931,7 +931,16 @@ static int viommu_of_xlate(struct device *dev, struct 
of_phandle_args *args)
return iommu_fwspec_add_ids(dev, args->args, 1);
 }
 
+static bool viommu_capable(enum iommu_cap cap)
+{
+   if (cap == IOMMU_CAP_VIOMMU_HINT)
+   return true;
+
+   return false;
+}
+
 static struct iommu_ops viommu_ops = {
+   .capable= viommu_capable,
.domain_alloc   = viommu_domain_alloc,
.domain_free= viommu_domain_free,
.attach_dev = viommu_attach_dev,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e7fe51..9d0ade4 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -94,6 +94,8 @@ enum iommu_cap {
   transactions */
IOMMU_CAP_INTR_REMAP,   /* IOMMU supports interrupt isolation */
IOMMU_CAP_NOEXEC,   /* IOMMU_NOEXEC flag */
+   IOMMU_CAP_VIOMMU_HINT,  /* IOMMU can detect a hit for running in
+  VM */
 };
 
 /*
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V2 08/13] genirq: Set auxiliary data for an interrupt

2021-02-26 Thread Megha Dey
Introduce a new function pointer in the irq_chip structure(irq_set_auxdata)
which is responsible for updating data which is stored in a shared register
or data storage. For example, the idxd driver uses the auxiliary data API
to enable/set and disable PASID field that is in the IMS entry (introduced
in a later patch) and that data are not typically present in MSI entry.

Reviewed-by: Tony Luck 
Signed-off-by: Megha Dey 
---
 include/linux/interrupt.h |  2 ++
 include/linux/irq.h   |  4 
 kernel/irq/manage.c   | 32 
 3 files changed, 38 insertions(+)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 967e257..461ed1c 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -496,6 +496,8 @@ extern int irq_get_irqchip_state(unsigned int irq, enum 
irqchip_irq_state which,
 extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state 
which,
 bool state);
 
+int irq_set_auxdata(unsigned int irq, unsigned int which, u64 val);
+
 #ifdef CONFIG_IRQ_FORCED_THREADING
 # ifdef CONFIG_PREEMPT_RT
 #  define force_irqthreads (true)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 2efde6a..fc19f32 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -491,6 +491,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data 
*d)
  * irq_request_resources
  * @irq_compose_msi_msg:   optional to compose message content for MSI
  * @irq_write_msi_msg: optional to write message content for MSI
+ * @irq_set_auxdata:   Optional function to update auxiliary data e.g. in
+ * shared registers
  * @irq_get_irqchip_state: return the internal state of an interrupt
  * @irq_set_irqchip_state: set the internal state of a interrupt
  * @irq_set_vcpu_affinity: optional to target a vCPU in a virtual machine
@@ -538,6 +540,8 @@ struct irq_chip {
void(*irq_compose_msi_msg)(struct irq_data *data, struct 
msi_msg *msg);
void(*irq_write_msi_msg)(struct irq_data *data, struct 
msi_msg *msg);
 
+   int (*irq_set_auxdata)(struct irq_data *data, unsigned int 
which, u64 auxval);
+
int (*irq_get_irqchip_state)(struct irq_data *data, enum 
irqchip_irq_state which, bool *state);
int (*irq_set_irqchip_state)(struct irq_data *data, enum 
irqchip_irq_state which, bool state);
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 85ede4e..68ff559 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -2860,3 +2860,35 @@ bool irq_check_status_bit(unsigned int irq, unsigned int 
bitmask)
return res;
 }
 EXPORT_SYMBOL_GPL(irq_check_status_bit);
+
+/**
+ * irq_set_auxdata - Set auxiliary data
+ * @irq:   Interrupt to update
+ * @which: Selector which data to update
+ * @auxval:Auxiliary data value
+ *
+ * Function to update auxiliary data for an interrupt, e.g. to update data
+ * which is stored in a shared register or data storage (e.g. IMS).
+ */
+int irq_set_auxdata(unsigned int irq, unsigned int which, u64 val)
+{
+   struct irq_desc *desc;
+   struct irq_data *data;
+   unsigned long flags;
+   int res = -ENODEV;
+
+   desc = irq_get_desc_buslock(irq, , 0);
+   if (!desc)
+   return -EINVAL;
+
+   for (data = >irq_data; data; data = irqd_get_parent_data(data)) {
+   if (data->chip->irq_set_auxdata) {
+   res = data->chip->irq_set_auxdata(data, which, val);
+   break;
+   }
+   }
+
+   irq_put_desc_busunlock(desc, flags);
+   return res;
+}
+EXPORT_SYMBOL_GPL(irq_set_auxdata);
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V2 02/13] x86/msi: Rename and rework pci_msi_prepare() to cover non-PCI MSI

2021-02-26 Thread Megha Dey
From: Thomas Gleixner 

Rename it to x86_msi_prepare() and handle the allocation type setup
depending on the device type.

Add a new arch_msi_prepare define which will be utilized by the upcoming
device MSI support. Define it to NULL if not provided by an architecture
in the generic MSI header.

One arch specific function for MSI support is truly enough.

Reviewed-by: Tony Luck 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 arch/x86/include/asm/msi.h  |  4 +++-
 arch/x86/kernel/apic/msi.c  | 27 ---
 drivers/pci/controller/pci-hyperv.c |  2 +-
 include/linux/msi.h |  4 
 4 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
index b85147d..9bd214e 100644
--- a/arch/x86/include/asm/msi.h
+++ b/arch/x86/include/asm/msi.h
@@ -6,9 +6,11 @@
 
 typedef struct irq_alloc_info msi_alloc_info_t;
 
-int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
+int x86_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
msi_alloc_info_t *arg);
 
+#define arch_msi_prepare   x86_msi_prepare
+
 /* Structs and defines for the X86 specific MSI message format */
 
 typedef struct x86_msi_data {
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 44ebe25..84b16c7 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -153,26 +153,39 @@ static struct irq_chip pci_msi_controller = {
.flags  = IRQCHIP_SKIP_SET_WAKE,
 };
 
-int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
-   msi_alloc_info_t *arg)
+static void pci_msi_prepare(struct device *dev, msi_alloc_info_t *arg)
 {
-   struct pci_dev *pdev = to_pci_dev(dev);
-   struct msi_desc *desc = first_pci_msi_entry(pdev);
+   struct msi_desc *desc = first_msi_entry(dev);
 
-   init_irq_alloc_info(arg, NULL);
if (desc->msi_attrib.is_msix) {
arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX;
} else {
arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI;
arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
}
+}
+
+static void dev_msi_prepare(struct device *dev, msi_alloc_info_t *arg)
+{
+   arg->type = X86_IRQ_ALLOC_TYPE_DEV_MSI;
+}
+
+int x86_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
+   msi_alloc_info_t *arg)
+{
+   init_irq_alloc_info(arg, NULL);
+
+   if (dev_is_pci(dev))
+   pci_msi_prepare(dev, arg);
+   else
+   dev_msi_prepare(dev, arg);
 
return 0;
 }
-EXPORT_SYMBOL_GPL(pci_msi_prepare);
+EXPORT_SYMBOL_GPL(x86_msi_prepare);
 
 static struct msi_domain_ops pci_msi_domain_ops = {
-   .msi_prepare= pci_msi_prepare,
+   .msi_prepare= x86_msi_prepare,
 };
 
 static struct msi_domain_info pci_msi_domain_info = {
diff --git a/drivers/pci/controller/pci-hyperv.c 
b/drivers/pci/controller/pci-hyperv.c
index 27a17a1..ac4fe8b7 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -1546,7 +1546,7 @@ static struct irq_chip hv_msi_irq_chip = {
 };
 
 static struct msi_domain_ops hv_msi_ops = {
-   .msi_prepare= pci_msi_prepare,
+   .msi_prepare= arch_msi_prepare,
.msi_free   = hv_msi_free,
 };
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index aef35fd..f3e54d2 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -473,4 +473,8 @@ static inline struct irq_domain 
*pci_msi_get_device_domain(struct pci_dev *pdev)
 }
 #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
 
+#ifndef arch_msi_prepare
+# define arch_msi_prepare  NULL
+#endif
+
 #endif /* LINUX_MSI_H */
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V2 01/13] x86/irq: Add DEV_MSI allocation type

2021-02-26 Thread Megha Dey
From: Thomas Gleixner 

For the upcoming device MSI support a new allocation type is
required.

Reviewed-by: Tony Luck 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 arch/x86/include/asm/hw_irq.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index d465ece..0531b9c 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -41,6 +41,7 @@ enum irq_alloc_type {
X86_IRQ_ALLOC_TYPE_DMAR,
X86_IRQ_ALLOC_TYPE_AMDVI,
X86_IRQ_ALLOC_TYPE_UV,
+   X86_IRQ_ALLOC_TYPE_DEV_MSI,
 };
 
 struct ioapic_alloc_info {
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 12/12] irqchip: Add IMS (Interrupt Message Store) driver

2021-02-03 Thread Megha Dey
From: Thomas Gleixner 

Generic IMS(Interrupt Message Store) irq chips and irq domain
implementations for IMS based devices which store the interrupt messages
in an array in device memory.

Allocation and freeing of interrupts happens via the generic
msi_domain_alloc/free_irqs() interface. No special purpose IMS magic
required as long as the interrupt domain is stored in the underlying
device struct. The irq_set_auxdata() is used to program the pasid into
the IMS entry.

[Megha : Fixed compile time errors
 Added necessary dependencies to IMS_MSI_ARRAY config
 Fixed polarity of IMS_VECTOR_CTRL
 Added reads after writes to flush writes to device
 Added set_desc ops to IMS msi domain ops
 Tested the IMS infrastructure with the IDXD driver]

Reviewed-by: Tony Luck 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 drivers/irqchip/Kconfig |  14 +++
 drivers/irqchip/Makefile|   1 +
 drivers/irqchip/irq-ims-msi.c   | 211 
 include/linux/irqchip/irq-ims-msi.h |  68 
 4 files changed, 294 insertions(+)
 create mode 100644 drivers/irqchip/irq-ims-msi.c
 create mode 100644 include/linux/irqchip/irq-ims-msi.h

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index b147f22..b50c821 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -590,4 +590,18 @@ config MST_IRQ
help
  Support MStar Interrupt Controller.
 
+config IMS_MSI
+   depends on PCI
+   select DEVICE_MSI
+   bool
+
+config IMS_MSI_ARRAY
+   bool "IMS Interrupt Message Store MSI controller for device memory 
storage arrays"
+   depends on PCI
+   select IMS_MSI
+   select GENERIC_MSI_IRQ_DOMAIN
+   help
+ Support for IMS Interrupt Message Store MSI controller
+ with IMS slot storage in a slot array in device memory
+
 endmenu
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 0ac93bf..658a6bd 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -113,3 +113,4 @@ obj-$(CONFIG_LOONGSON_PCH_PIC)  += 
irq-loongson-pch-pic.o
 obj-$(CONFIG_LOONGSON_PCH_MSI) += irq-loongson-pch-msi.o
 obj-$(CONFIG_MST_IRQ)  += irq-mst-intc.o
 obj-$(CONFIG_SL28CPLD_INTC)+= irq-sl28cpld.o
+obj-$(CONFIG_IMS_MSI)  += irq-ims-msi.o
diff --git a/drivers/irqchip/irq-ims-msi.c b/drivers/irqchip/irq-ims-msi.c
new file mode 100644
index 000..fa23207
--- /dev/null
+++ b/drivers/irqchip/irq-ims-msi.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+// (C) Copyright 2021 Thomas Gleixner 
+/*
+ * Shared interrupt chips and irq domains for IMS devices
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#ifdef CONFIG_IMS_MSI_ARRAY
+
+struct ims_array_data {
+   struct ims_array_info   info;
+   unsigned long   map[0];
+};
+
+static inline void iowrite32_and_flush(u32 value, void __iomem *addr)
+{
+   iowrite32(value, addr);
+   ioread32(addr);
+}
+
+static void ims_array_mask_irq(struct irq_data *data)
+{
+   struct msi_desc *desc = irq_data_get_msi_desc(data);
+   struct ims_slot __iomem *slot = desc->device_msi.priv_iomem;
+   u32 __iomem *ctrl = >ctrl;
+
+   iowrite32_and_flush(ioread32(ctrl) | IMS_CTRL_VECTOR_MASKBIT, ctrl);
+}
+
+static void ims_array_unmask_irq(struct irq_data *data)
+{
+   struct msi_desc *desc = irq_data_get_msi_desc(data);
+   struct ims_slot __iomem *slot = desc->device_msi.priv_iomem;
+   u32 __iomem *ctrl = >ctrl;
+
+   iowrite32_and_flush(ioread32(ctrl) & ~IMS_CTRL_VECTOR_MASKBIT, ctrl);
+}
+
+static void ims_array_write_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+   struct msi_desc *desc = irq_data_get_msi_desc(data);
+   struct ims_slot __iomem *slot = desc->device_msi.priv_iomem;
+
+   iowrite32(msg->address_lo, >address_lo);
+   iowrite32(msg->address_hi, >address_hi);
+   iowrite32_and_flush(msg->data, >data);
+}
+
+static int ims_array_set_auxdata(struct irq_data *data, unsigned int which,
+u64 auxval)
+{
+   struct msi_desc *desc = irq_data_get_msi_desc(data);
+   struct ims_slot __iomem *slot = desc->device_msi.priv_iomem;
+   u32 val, __iomem *ctrl = >ctrl;
+
+   if (which != IMS_AUXDATA_CONTROL_WORD)
+   return -EINVAL;
+   if (auxval & ~(u64)IMS_CONTROL_WORD_AUXMASK)
+   return -EINVAL;
+
+   val = ioread32(ctrl) & IMS_CONTROL_WORD_IRQMASK;
+   iowrite32_and_flush(val | (u32)auxval, ctrl);
+   return 0;
+}
+
+static const struct irq_chip ims_array_msi_controller = {
+   .name   = "IMS",
+   .irq_mask   = ims_array_mask_irq,
+   .irq_unmask = ims_array_unmask_irq,
+   .irq_write_msi_msg  = ims_array

[PATCH 06/12] platform-msi: Add device MSI infrastructure

2021-02-03 Thread Megha Dey
From: Thomas Gleixner 

Add device specific MSI domain infrastructure for devices which have their
own resource management and interrupt chip. These devices are not related
to PCI and contrary to platform MSI they do not share a common resource and
interrupt chip. They provide their own domain specific resource management
and interrupt chip.

This utilizes the new alloc/free override in a non evil way which avoids
having yet another set of specialized alloc/free functions. Just using
msi_domain_alloc/free_irqs() is sufficient

While initially it was suggested and tried to piggyback device MSI on
platform MSI, the better variant is to reimplement platform MSI on top of
device MSI.

Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 drivers/base/platform-msi.c | 131 
 include/linux/irqdomain.h   |   1 +
 include/linux/msi.h |  24 
 kernel/irq/Kconfig  |   4 ++
 4 files changed, 160 insertions(+)

diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 9d9ccfc..6127b3b 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -419,3 +419,134 @@ int platform_msi_domain_alloc(struct irq_domain *domain, 
unsigned int virq,
 
return err;
 }
+
+#ifdef CONFIG_DEVICE_MSI
+/*
+ * Device specific MSI domain infrastructure for devices which have their
+ * own resource management and interrupt chip. These devices are not
+ * related to PCI and contrary to platform MSI they do not share a common
+ * resource and interrupt chip. They provide their own domain specific
+ * resource management and interrupt chip.
+ */
+
+static void device_msi_free_msi_entries(struct device *dev)
+{
+   struct list_head *msi_list = dev_to_msi_list(dev);
+   struct msi_desc *entry, *tmp;
+
+   list_for_each_entry_safe(entry, tmp, msi_list, list) {
+   list_del(>list);
+   free_msi_entry(entry);
+   }
+}
+
+/**
+ * device_msi_free_irqs - Free MSI interrupts assigned to  a device
+ * @dev:   Pointer to the device
+ *
+ * Frees the interrupt and the MSI descriptors.
+ */
+static void device_msi_free_irqs(struct irq_domain *domain, struct device *dev)
+{
+   __msi_domain_free_irqs(domain, dev);
+   device_msi_free_msi_entries(dev);
+}
+
+/**
+ * device_msi_alloc_irqs - Allocate MSI interrupts for a device
+ * @dev:   Pointer to the device
+ * @nvec:  Number of vectors
+ *
+ * Allocates the required number of MSI descriptors and the corresponding
+ * interrupt descriptors.
+ */
+static int device_msi_alloc_irqs(struct irq_domain *domain, struct device 
*dev, int nvec)
+{
+   int i, ret = -ENOMEM;
+
+   for (i = 0; i < nvec; i++) {
+   struct msi_desc *entry = alloc_msi_entry(dev, 1, NULL);
+
+   if (!entry)
+   goto fail;
+   list_add_tail(>list, dev_to_msi_list(dev));
+   }
+
+   ret = __msi_domain_alloc_irqs(domain, dev, nvec);
+   if (!ret)
+   return 0;
+fail:
+   device_msi_free_msi_entries(dev);
+   return ret;
+}
+
+static void device_msi_update_dom_ops(struct msi_domain_info *info)
+{
+   if (!info->ops->domain_alloc_irqs)
+   info->ops->domain_alloc_irqs = device_msi_alloc_irqs;
+   if (!info->ops->domain_free_irqs)
+   info->ops->domain_free_irqs = device_msi_free_irqs;
+   if (!info->ops->msi_prepare)
+   info->ops->msi_prepare = arch_msi_prepare;
+}
+
+/**
+ * device_msi_create_msi_irq_domain - Create an irq domain for devices
+ * @fwnode:Firmware node of the interrupt controller
+ * @info:  MSI domain info to configure the new domain
+ * @parent:Parent domain
+ */
+struct irq_domain *device_msi_create_irq_domain(struct fwnode_handle *fn,
+   struct msi_domain_info *info,
+   struct irq_domain *parent)
+{
+   struct irq_domain *domain;
+
+   if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
+   platform_msi_update_chip_ops(info);
+
+   if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
+   device_msi_update_dom_ops(info);
+
+   msi_domain_set_default_info_flags(info);
+
+   domain = msi_create_irq_domain(fn, info, parent);
+   if (domain)
+   irq_domain_update_bus_token(domain, DOMAIN_BUS_DEVICE_MSI);
+   return domain;
+}
+
+#ifdef CONFIG_PCI
+#include 
+
+/**
+ * pci_subdevice_msi_create_irq_domain - Create an irq domain for subdevices
+ * @pdev:  Pointer to PCI device for which the subdevice domain is created
+ * @info:  MSI domain info to configure the new domain
+ */
+struct irq_domain *pci_subdevice_msi_create_irq_domain(struct pci_dev *pdev,
+  struct msi_domain_info 
*info)
+{
+   struct irq_domain *domain, *pdev_msi;
+  

[PATCH 09/12] iommu/vt-d: Add DEV-MSI support

2021-02-03 Thread Megha Dey
Add required support in the interrupt remapping driver for devices
which generate dev-msi interrupts and use the intel remapping
domain as the parent domain. Set the source-id of all dev-msi
interrupt requests to the parent PCI device associated with it.

Reviewed-by: Tony Luck 
Signed-off-by: Megha Dey 
---
 drivers/iommu/intel/irq_remapping.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/irq_remapping.c 
b/drivers/iommu/intel/irq_remapping.c
index 685200a..18f1b53 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1278,6 +1278,9 @@ static void intel_irq_remapping_prepare_irte(struct 
intel_ir_data *data,
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
set_msi_sid(irte, msi_desc_to_pci_dev(info->desc));
break;
+   case X86_IRQ_ALLOC_TYPE_DEV_MSI:
+   set_msi_sid(irte, to_pci_dev(info->desc->dev->parent));
+   break;
default:
BUG_ON(1);
break;
@@ -1321,7 +1324,8 @@ static int intel_irq_remapping_alloc(struct irq_domain 
*domain,
if (!info || !iommu)
return -EINVAL;
if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI &&
-   info->type != X86_IRQ_ALLOC_TYPE_PCI_MSIX)
+   info->type != X86_IRQ_ALLOC_TYPE_PCI_MSIX &&
+   info->type != X86_IRQ_ALLOC_TYPE_DEV_MSI)
return -EINVAL;
 
/*
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 11/12] platform-msi: Add platform check for subdevice irq domain

2021-02-03 Thread Megha Dey
From: Lu Baolu 

The pci_subdevice_msi_create_irq_domain() should fail if the underlying
platform is not able to support IMS (Interrupt Message Storage). Otherwise,
the isolation of interrupt is not guaranteed.

For x86, IMS is only supported on bare metal for now. We could enable it
in the virtualization environments in the future if interrupt HYPERCALL
domain is supported or the hardware has the capability of interrupt
isolation for subdevices.

Cc: David Woodhouse 
Cc: Leon Romanovsky 
Cc: Kevin Tian 
Suggested-by: Thomas Gleixner 
Link: https://lore.kernel.org/linux-pci/87pn4nk7nn@nanos.tec.linutronix.de/
Link: https://lore.kernel.org/linux-pci/877dqrnzr3@nanos.tec.linutronix.de/
Link: https://lore.kernel.org/linux-pci/877dqqmc2h@nanos.tec.linutronix.de/
Signed-off-by: Lu Baolu 
Signed-off-by: Megha Dey 
---
 arch/x86/pci/common.c   | 74 +
 drivers/base/platform-msi.c |  8 +
 include/linux/msi.h |  1 +
 3 files changed, 83 insertions(+)

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 3507f45..263ccf6 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -12,6 +12,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
@@ -724,3 +726,75 @@ struct pci_dev *pci_real_dma_dev(struct pci_dev *dev)
return dev;
 }
 #endif
+
+#ifdef CONFIG_DEVICE_MSI
+/*
+ * We want to figure out which context we are running in. But the hardware
+ * does not introduce a reliable way (instruction, CPUID leaf, MSR, whatever)
+ * which can be manipulated by the VMM to let the OS figure out where it runs.
+ * So we go with the below probably on_bare_metal() function as a replacement
+ * for definitely on_bare_metal() to go forward only for the very simple reason
+ * that this is the only option we have.
+ */
+static const char * const vmm_vendor_name[] = {
+   "QEMU", "Bochs", "KVM", "Xen", "VMware", "VMW", "VMware Inc.",
+   "innotek GmbH", "Oracle Corporation", "Parallels", "BHYVE"
+};
+
+static void read_type0_virtual_machine(const struct dmi_header *dm, void *p)
+{
+   u8 *data = (u8 *)dm + 0x13;
+
+   /* BIOS Information (Type 0) */
+   if (dm->type != 0 || dm->length < 0x14)
+   return;
+
+   /* Bit 4 of BIOS Characteristics Extension Byte 2*/
+   if (*data & BIT(4))
+   *((bool *)p) = true;
+}
+
+static bool smbios_virtual_machine(void)
+{
+   bool bit_present = false;
+
+   dmi_walk(read_type0_virtual_machine, _present);
+
+   return bit_present;
+}
+
+static bool on_bare_metal(struct device *dev)
+{
+   int i;
+
+   if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+   return false;
+
+   if (smbios_virtual_machine())
+   return false;
+
+   if (iommu_capable(dev->bus, IOMMU_CAP_VIOMMU_HINT))
+   return false;
+
+   for (i = 0; i < ARRAY_SIZE(vmm_vendor_name); i++)
+   if (dmi_match(DMI_SYS_VENDOR, vmm_vendor_name[i]))
+   return false;
+
+   pr_info("System running on bare metal, report to bugzilla.kernel.org if 
not the case.");
+
+   return true;
+}
+
+bool arch_support_pci_device_ims(struct pci_dev *pdev)
+{
+   /*
+* When we are running in a VMM context, the device IMS could only be
+* enabled when the underlying hardware supports interrupt isolation
+* of the subdevice, or any mechanism (trap, hypercall) is added so
+* that changes in the interrupt message store could be managed by the
+* VMM. For now, we only support the device IMS when we are running on
+* the bare metal.
+*/
+   return on_bare_metal(>dev);
+}
+#endif
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 6127b3b..d5ae26f 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -519,6 +519,11 @@ struct irq_domain *device_msi_create_irq_domain(struct 
fwnode_handle *fn,
 #ifdef CONFIG_PCI
 #include 
 
+bool __weak arch_support_pci_device_ims(struct pci_dev *pdev)
+{
+   return false;
+}
+
 /**
  * pci_subdevice_msi_create_irq_domain - Create an irq domain for subdevices
  * @pdev:  Pointer to PCI device for which the subdevice domain is created
@@ -530,6 +535,9 @@ struct irq_domain 
*pci_subdevice_msi_create_irq_domain(struct pci_dev *pdev,
struct irq_domain *domain, *pdev_msi;
struct fwnode_handle *fn;
 
+   if (!arch_support_pci_device_ims(pdev))
+   return NULL;
+
/*
 * Retrieve the MSI domain of the underlying PCI device's MSI
 * domain. The PCI device domain's parent domain is also the parent
diff --git a/include/linux/msi.h b/include/linux/msi.h
index a6b419d..fa02542 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -478,6 +478,7 @

[PATCH 02/12] x86/msi: Rename and rework pci_msi_prepare() to cover non-PCI MSI

2021-02-03 Thread Megha Dey
From: Thomas Gleixner 

Rename it to x86_msi_prepare() and handle the allocation type setup
depending on the device type.

Add a new arch_msi_prepare define which will be utilized by the upcoming
device MSI support. Define it to NULL if not provided by an architecture
in the generic MSI header.

One arch specific function for MSI support is truly enough.

Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 arch/x86/include/asm/msi.h  |  4 +++-
 arch/x86/kernel/apic/msi.c  | 27 ---
 drivers/pci/controller/pci-hyperv.c |  2 +-
 include/linux/msi.h |  4 
 4 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
index b85147d..9bd214e 100644
--- a/arch/x86/include/asm/msi.h
+++ b/arch/x86/include/asm/msi.h
@@ -6,9 +6,11 @@
 
 typedef struct irq_alloc_info msi_alloc_info_t;
 
-int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
+int x86_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
msi_alloc_info_t *arg);
 
+#define arch_msi_prepare   x86_msi_prepare
+
 /* Structs and defines for the X86 specific MSI message format */
 
 typedef struct x86_msi_data {
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 44ebe25..84b16c7 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -153,26 +153,39 @@ static struct irq_chip pci_msi_controller = {
.flags  = IRQCHIP_SKIP_SET_WAKE,
 };
 
-int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
-   msi_alloc_info_t *arg)
+static void pci_msi_prepare(struct device *dev, msi_alloc_info_t *arg)
 {
-   struct pci_dev *pdev = to_pci_dev(dev);
-   struct msi_desc *desc = first_pci_msi_entry(pdev);
+   struct msi_desc *desc = first_msi_entry(dev);
 
-   init_irq_alloc_info(arg, NULL);
if (desc->msi_attrib.is_msix) {
arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX;
} else {
arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI;
arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
}
+}
+
+static void dev_msi_prepare(struct device *dev, msi_alloc_info_t *arg)
+{
+   arg->type = X86_IRQ_ALLOC_TYPE_DEV_MSI;
+}
+
+int x86_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
+   msi_alloc_info_t *arg)
+{
+   init_irq_alloc_info(arg, NULL);
+
+   if (dev_is_pci(dev))
+   pci_msi_prepare(dev, arg);
+   else
+   dev_msi_prepare(dev, arg);
 
return 0;
 }
-EXPORT_SYMBOL_GPL(pci_msi_prepare);
+EXPORT_SYMBOL_GPL(x86_msi_prepare);
 
 static struct msi_domain_ops pci_msi_domain_ops = {
-   .msi_prepare= pci_msi_prepare,
+   .msi_prepare= x86_msi_prepare,
 };
 
 static struct msi_domain_info pci_msi_domain_info = {
diff --git a/drivers/pci/controller/pci-hyperv.c 
b/drivers/pci/controller/pci-hyperv.c
index 6db8d96..bfb47c2 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -1546,7 +1546,7 @@ static struct irq_chip hv_msi_irq_chip = {
 };
 
 static struct msi_domain_ops hv_msi_ops = {
-   .msi_prepare= pci_msi_prepare,
+   .msi_prepare= arch_msi_prepare,
.msi_free   = hv_msi_free,
 };
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 360a0a7..89acc76 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -467,4 +467,8 @@ static inline struct irq_domain 
*pci_msi_get_device_domain(struct pci_dev *pdev)
 }
 #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
 
+#ifndef arch_msi_prepare
+# define arch_msi_prepare  NULL
+#endif
+
 #endif /* LINUX_MSI_H */
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 10/12] iommu: Add capability IOMMU_CAP_VIOMMU_HINT

2021-02-03 Thread Megha Dey
From: Lu Baolu 

Some IOMMU specification defines some kind of hint mechanism, through
which BIOS can imply that OS runs in a virtualized environment. For
example, the caching mode defined in VT-d spec and NpCache capability
defined in the AMD IOMMU specification. This hint could also be used
outside of the IOMMU subsystem, where it could be used with other known
means (CPUID, smbios) to sense whether Linux is running in a virtualized
environment. Add a capability bit so that it could be used there.

Signed-off-by: Lu Baolu 
Signed-off-by: Megha Dey 
---
 drivers/iommu/amd/iommu.c|  2 ++
 drivers/iommu/intel/iommu.c  | 20 
 drivers/iommu/virtio-iommu.c |  9 +
 include/linux/iommu.h|  2 ++
 4 files changed, 33 insertions(+)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index f0adbc4..a851f37 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2646,6 +2646,8 @@ static bool amd_iommu_capable(enum iommu_cap cap)
return (irq_remapping_enabled == 1);
case IOMMU_CAP_NOEXEC:
return false;
+   case IOMMU_CAP_VIOMMU_HINT:
+   return amd_iommu_np_cache;
default:
break;
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 06b00b5..905d6aa 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -5094,12 +5094,32 @@ static inline bool nested_mode_support(void)
return ret;
 }
 
+static inline bool caching_mode_supported(void)
+{
+   struct dmar_drhd_unit *drhd;
+   struct intel_iommu *iommu;
+   bool ret = false;
+
+   rcu_read_lock();
+   for_each_active_iommu(iommu, drhd) {
+   if (cap_caching_mode(iommu->cap)) {
+   ret = true;
+   break;
+   }
+   }
+   rcu_read_unlock();
+
+   return ret;
+}
+
 static bool intel_iommu_capable(enum iommu_cap cap)
 {
if (cap == IOMMU_CAP_CACHE_COHERENCY)
return domain_update_iommu_snooping(NULL) == 1;
if (cap == IOMMU_CAP_INTR_REMAP)
return irq_remapping_enabled == 1;
+   if (cap == IOMMU_CAP_VIOMMU_HINT)
+   return caching_mode_supported();
 
return false;
 }
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 2bfdd57..e4941ca 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -931,7 +931,16 @@ static int viommu_of_xlate(struct device *dev, struct 
of_phandle_args *args)
return iommu_fwspec_add_ids(dev, args->args, 1);
 }
 
+static bool viommu_capable(enum iommu_cap cap)
+{
+   if (cap == IOMMU_CAP_VIOMMU_HINT)
+   return true;
+
+   return false;
+}
+
 static struct iommu_ops viommu_ops = {
+   .capable= viommu_capable,
.domain_alloc   = viommu_domain_alloc,
.domain_free= viommu_domain_free,
.attach_dev = viommu_attach_dev,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index b3f0e20..5e62bcc 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -94,6 +94,8 @@ enum iommu_cap {
   transactions */
IOMMU_CAP_INTR_REMAP,   /* IOMMU supports interrupt isolation */
IOMMU_CAP_NOEXEC,   /* IOMMU_NOEXEC flag */
+   IOMMU_CAP_VIOMMU_HINT,  /* IOMMU can detect a hit for running in
+  VM */
 };
 
 /*
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 07/12] irqdomain/msi: Provide msi_alloc/free_store() callbacks

2021-02-03 Thread Megha Dey
From: Thomas Gleixner 

For devices which don't have a standard storage for MSI messages like the
upcoming IMS (Interrupt Message Store) it's required to allocate storage
space before allocating interrupts and after freeing them.

This could be achieved with the existing callbacks, but that would be
awkward because they operate on msi_alloc_info_t which is not uniform
across architectures. Also these callbacks are invoked per interrupt but
the allocation might have bulk requirements depending on the device.

As such devices can operate on different architectures it is simpler to
have separate callbacks which operate on struct device. The resulting
storage information has to be stored in struct msi_desc so the underlying
irq chip implementation can retrieve it for the relevant operations.

Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 include/linux/msi.h |  8 
 kernel/irq/msi.c| 11 +++
 2 files changed, 19 insertions(+)

diff --git a/include/linux/msi.h b/include/linux/msi.h
index fbf2258..a6b419d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -317,6 +317,10 @@ struct msi_domain_info;
  * function.
  * @domain_free_irqs:  Optional function to override the default free
  * function.
+ * @msi_alloc_store:   Optional callback to allocate storage in a device
+ * specific non-standard MSI store
+ * @msi_alloc_free:Optional callback to free storage in a device
+ * specific non-standard MSI store
  *
  * @get_hwirq, @msi_init and @msi_free are callbacks used by
  * msi_create_irq_domain() and related interfaces
@@ -366,6 +370,10 @@ struct msi_domain_ops {
 struct device *dev, int nvec);
void(*domain_free_irqs)(struct irq_domain *domain,
struct device *dev);
+   int (*msi_alloc_store)(struct irq_domain *domain,
+  struct device *dev, int nvec);
+   void(*msi_free_store)(struct irq_domain *domain,
+ struct device *dev);
 };
 
 /**
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 3697909..d70d92e 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -434,6 +434,12 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, 
struct device *dev,
if (ret)
return ret;
 
+   if (ops->msi_alloc_store) {
+   ret = ops->msi_alloc_store(domain, dev, nvec);
+   if (ret)
+   return ret;
+   }
+
for_each_msi_entry(desc, dev) {
ops->set_desc(, desc);
 
@@ -533,6 +539,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct 
device *dev,
 
 void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
 {
+   struct msi_domain_info *info = domain->host_data;
+   struct msi_domain_ops *ops = info->ops;
struct msi_desc *desc;
 
for_each_msi_entry(desc, dev) {
@@ -546,6 +554,9 @@ void __msi_domain_free_irqs(struct irq_domain *domain, 
struct device *dev)
desc->irq = 0;
}
}
+
+   if (ops->msi_free_store)
+   ops->msi_free_store(domain, dev);
 }
 
 /**
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 00/12] Introduce dev-msi and interrupt message store

2021-02-03 Thread Megha Dey
sed for PCI devices
7. Followed the generic layering scheme: infrastructure bits->arch 
bits->enabling bits

V4:
1. Make interrupt remapping code more readable
2. Add flush writes to unmask/write and reset ims slots
3. Interrupt Message Storm-> Interrupt Message Store
4. Merge in pasid programming code.

Stage 3: Standalone dev-msi and IMS driver series
-
V1:(Changes from Stage 2 V4)
1. Split dev-msi/IMS code from Dave Jiang’s IDXD patch series
2. Set the source-id of all dev-msi interrupt requests to the parent PCI device
3. Separated core irq code from IMS related code
4. Added missing set_desc ops to the IMS msi_domain_ops
5. Added more details in the commit message-test case for auxillary interrupt 
data
6. Updated the copyright year from 2020 to 2021
7. Updated cover letter
8. Add platform check for subdevice irq domain (Lu Baolu):
   V1->V2:
   - V1 patches:[4]
   - Rename probably_on_bare_metal() with on_bare_metal();
   - Some vendors might use the same name for both bare metal and virtual
 environment. Before we add vendor specific code to distinguish
 between them, let's return false in on_bare_metal(). This won't
 introduce any regression. The only impact is that the coming new
 platform msi feature won't be supported until the vendor specific code
 is provided.
   V2->V3:
   - V2 patches:[5]
   - Add all identified heuristics so far

[1]: 
https://software.intel.com/en-us/download/intel-scalable-io-virtualization-technical-specification
[2]: 
https://lore.kernel.org/dmaengine/160408357912.912050.17005584526266191420.st...@djiang5-desk3.ch.intel.com/
[3]: 
https://lore.kernel.org/lkml/1568338328-22458-1-git-send-email-megha@linux.intel.com/
[4]: 
https://lore.kernel.org/linux-pci/20201210004624.345282-1-baolu...@linux.intel.com/
[5]: 
https://lore.kernel.org/linux-pci/20210106022749.2769057-1-baolu...@linux.intel.com/

Lu Baolu (2):
  iommu: Add capability IOMMU_CAP_VIOMMU_HINT
  platform-msi: Add platform check for subdevice irq domain

Megha Dey (2):
  genirq: Set auxiliary data for an interrupt
  iommu/vt-d: Add DEV-MSI support

Thomas Gleixner (8):
  x86/irq: Add DEV_MSI allocation type
  x86/msi: Rename and rework pci_msi_prepare() to cover non-PCI MSI
  platform-msi: Provide default irq_chip:: Ack
  genirq/proc: Take buslock on affinity write
  genirq/msi: Provide and use msi_domain_set_default_info_flags()
  platform-msi: Add device MSI infrastructure
  irqdomain/msi: Provide msi_alloc/free_store() callbacks
  irqchip: Add IMS (Interrupt Message Store) driver

 arch/x86/include/asm/hw_irq.h   |   1 +
 arch/x86/include/asm/msi.h  |   4 +-
 arch/x86/kernel/apic/msi.c  |  27 +++--
 arch/x86/pci/common.c   |  74 +
 drivers/base/platform-msi.c | 141 
 drivers/iommu/amd/iommu.c   |   2 +
 drivers/iommu/intel/iommu.c |  20 
 drivers/iommu/intel/irq_remapping.c |   6 +-
 drivers/iommu/virtio-iommu.c|   9 ++
 drivers/irqchip/Kconfig |  14 +++
 drivers/irqchip/Makefile|   1 +
 drivers/irqchip/irq-ims-msi.c   | 211 
 drivers/pci/controller/pci-hyperv.c |   2 +-
 drivers/pci/msi.c   |   7 +-
 include/linux/interrupt.h   |   2 +
 include/linux/iommu.h   |   2 +
 include/linux/irq.h |   4 +
 include/linux/irqchip/irq-ims-msi.h |  68 
 include/linux/irqdomain.h   |   1 +
 include/linux/msi.h |  38 +++
 kernel/irq/Kconfig  |   4 +
 kernel/irq/manage.c |  38 ++-
 kernel/irq/msi.c|  35 ++
 23 files changed, 692 insertions(+), 19 deletions(-)
 create mode 100644 drivers/irqchip/irq-ims-msi.c
 create mode 100644 include/linux/irqchip/irq-ims-msi.h

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 08/12] genirq: Set auxiliary data for an interrupt

2021-02-03 Thread Megha Dey
Introduce a new function pointer in the irq_chip structure(irq_set_auxdata)
which is responsible for updating data which is stored in a shared register
or data storage. For example, the idxd driver uses the auxiliary data API
to enable/set and disable PASID field that is in the IMS entry (introduced
in a later patch) and that data are not typically present in MSI entry.

Reviewed-by: Tony Luck 
Signed-off-by: Megha Dey 
---
 include/linux/interrupt.h |  2 ++
 include/linux/irq.h   |  4 
 kernel/irq/manage.c   | 32 
 3 files changed, 38 insertions(+)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index bb8ff90..d3f419b 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -496,6 +496,8 @@ extern int irq_get_irqchip_state(unsigned int irq, enum 
irqchip_irq_state which,
 extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state 
which,
 bool state);
 
+int irq_set_auxdata(unsigned int irq, unsigned int which, u64 val);
+
 #ifdef CONFIG_IRQ_FORCED_THREADING
 # ifdef CONFIG_PREEMPT_RT
 #  define force_irqthreads (true)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 4aeb1c4..568cdf5 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -491,6 +491,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data 
*d)
  * irq_request_resources
  * @irq_compose_msi_msg:   optional to compose message content for MSI
  * @irq_write_msi_msg: optional to write message content for MSI
+ * @irq_set_auxdata:   Optional function to update auxiliary data e.g. in
+ * shared registers
  * @irq_get_irqchip_state: return the internal state of an interrupt
  * @irq_set_irqchip_state: set the internal state of a interrupt
  * @irq_set_vcpu_affinity: optional to target a vCPU in a virtual machine
@@ -538,6 +540,8 @@ struct irq_chip {
void(*irq_compose_msi_msg)(struct irq_data *data, struct 
msi_msg *msg);
void(*irq_write_msi_msg)(struct irq_data *data, struct 
msi_msg *msg);
 
+   int (*irq_set_auxdata)(struct irq_data *data, unsigned int 
which, u64 auxval);
+
int (*irq_get_irqchip_state)(struct irq_data *data, enum 
irqchip_irq_state which, bool *state);
int (*irq_set_irqchip_state)(struct irq_data *data, enum 
irqchip_irq_state which, bool state);
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 85ede4e..68ff559 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -2860,3 +2860,35 @@ bool irq_check_status_bit(unsigned int irq, unsigned int 
bitmask)
return res;
 }
 EXPORT_SYMBOL_GPL(irq_check_status_bit);
+
+/**
+ * irq_set_auxdata - Set auxiliary data
+ * @irq:   Interrupt to update
+ * @which: Selector which data to update
+ * @auxval:Auxiliary data value
+ *
+ * Function to update auxiliary data for an interrupt, e.g. to update data
+ * which is stored in a shared register or data storage (e.g. IMS).
+ */
+int irq_set_auxdata(unsigned int irq, unsigned int which, u64 val)
+{
+   struct irq_desc *desc;
+   struct irq_data *data;
+   unsigned long flags;
+   int res = -ENODEV;
+
+   desc = irq_get_desc_buslock(irq, , 0);
+   if (!desc)
+   return -EINVAL;
+
+   for (data = >irq_data; data; data = irqd_get_parent_data(data)) {
+   if (data->chip->irq_set_auxdata) {
+   res = data->chip->irq_set_auxdata(data, which, val);
+   break;
+   }
+   }
+
+   irq_put_desc_busunlock(desc, flags);
+   return res;
+}
+EXPORT_SYMBOL_GPL(irq_set_auxdata);
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 05/12] genirq/msi: Provide and use msi_domain_set_default_info_flags()

2021-02-03 Thread Megha Dey
From: Thomas Gleixner 

MSI interrupts have some common flags which should be set not only for
PCI/MSI interrupts.

Move the PCI/MSI flag setting into a common function so it can be reused.

Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 drivers/pci/msi.c   |  7 +--
 include/linux/msi.h |  1 +
 kernel/irq/msi.c| 24 
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 3162f88..20d2512 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1492,12 +1492,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct 
fwnode_handle *fwnode,
if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
pci_msi_domain_update_chip_ops(info);
 
-   info->flags |= MSI_FLAG_ACTIVATE_EARLY;
-   if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
-   info->flags |= MSI_FLAG_MUST_REACTIVATE;
-
-   /* PCI-MSI is oneshot-safe */
-   info->chip->flags |= IRQCHIP_ONESHOT_SAFE;
+   msi_domain_set_default_info_flags(info);
 
domain = msi_create_irq_domain(fwnode, info, parent);
if (!domain)
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 89acc76..d7a7f7d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -448,6 +448,7 @@ int platform_msi_domain_alloc(struct irq_domain *domain, 
unsigned int virq,
 void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq,
  unsigned int nvec);
 void *platform_msi_get_host_data(struct irq_domain *domain);
+void msi_domain_set_default_info_flags(struct msi_domain_info *info);
 #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
 
 #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index dc0e2d7..3697909 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -70,6 +70,30 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg 
*msg)
 EXPORT_SYMBOL_GPL(get_cached_msi_msg);
 
 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+void msi_domain_set_default_info_flags(struct msi_domain_info *info)
+{
+   /* Required so that a device latches a valid MSI message on startup */
+   info->flags |= MSI_FLAG_ACTIVATE_EARLY;
+
+   /*
+* Interrupt reservation mode allows to stear the MSI message of an
+* inactive device to a special (usually spurious interrupt) target.
+* This allows to prevent interrupt vector exhaustion e.g. on x86.
+* But (PCI)MSI interrupts are activated early - see above - so the
+* interrupt request/startup sequence would not try to allocate a
+* usable vector which means that the device interrupts would end
+* up on the special vector and issue spurious interrupt messages.
+* Setting the reactivation flag ensures that when the interrupt
+* is requested the activation is invoked again so that a real
+* vector can be allocated.
+*/
+   if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
+   info->flags |= MSI_FLAG_MUST_REACTIVATE;
+
+   /* MSI is oneshot-safe at least in theory */
+   info->chip->flags |= IRQCHIP_ONESHOT_SAFE;
+}
+
 static inline void irq_chip_write_msi_msg(struct irq_data *data,
  struct msi_msg *msg)
 {
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 01/12] x86/irq: Add DEV_MSI allocation type

2021-02-03 Thread Megha Dey
From: Thomas Gleixner 

For the upcoming device MSI support a new allocation type is
required.

Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 arch/x86/include/asm/hw_irq.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index d465ece..0531b9c 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -41,6 +41,7 @@ enum irq_alloc_type {
X86_IRQ_ALLOC_TYPE_DMAR,
X86_IRQ_ALLOC_TYPE_AMDVI,
X86_IRQ_ALLOC_TYPE_UV,
+   X86_IRQ_ALLOC_TYPE_DEV_MSI,
 };
 
 struct ioapic_alloc_info {
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 03/12] platform-msi: Provide default irq_chip:: Ack

2021-02-03 Thread Megha Dey
From: Thomas Gleixner 

For the upcoming device MSI support it's required to have a default
irq_chip::ack implementation (irq_chip_ack_parent) so the drivers do not
need to care.

Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 drivers/base/platform-msi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 2c1e2e0..9d9ccfc 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -101,6 +101,8 @@ static void platform_msi_update_chip_ops(struct 
msi_domain_info *info)
chip->irq_mask = irq_chip_mask_parent;
if (!chip->irq_unmask)
chip->irq_unmask = irq_chip_unmask_parent;
+   if (!chip->irq_ack)
+   chip->irq_ack = irq_chip_ack_parent;
if (!chip->irq_eoi)
chip->irq_eoi = irq_chip_eoi_parent;
if (!chip->irq_set_affinity)
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 04/12] genirq/proc: Take buslock on affinity write

2021-02-03 Thread Megha Dey
From: Thomas Gleixner 

Until now interrupt chips which support setting affinity are not locking
the associated bus lock for two reasons:

 - All chips which support affinity setting do not use buslock because they
   just can operated directly on the hardware.

 - All chips which use buslock do not support affinity setting because
   their interrupt chips are not capable. These chips are usually connected
   over a bus like I2C, SPI etc. and have an interrupt output which is
   conneted to CPU interrupt of some sort. So there is no way to set the
   affinity on the chip itself.

Upcoming hardware which is PCIE based sports a non standard MSI(X) variant
which stores the MSI message in RAM which is associated to e.g. a device
queue. The device manages this RAM and writes have to be issued via command
queues or similar mechanisms which is obviously not possible from interrupt
disabled, raw spinlock held context.

The buslock mechanism of irq chips can be utilized to support that. The
affinity write to the chip writes to shadow state, marks it pending and the
irq chip's irq_bus_sync_unlock() callback handles the command queue and
wait for completion similar to the other chip operations on I2C or SPI
busses.

Change the locking in irq_set_affinity() to bus_lock/unlock to help with
that. There are a few other callers than the proc interface, but none of
them is affected by this change as none of them affects an irq chip with
bus lock support.

Signed-off-by: Thomas Gleixner 
Signed-off-by: Megha Dey 
---
 kernel/irq/manage.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index dec3f73..85ede4e 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -443,16 +443,16 @@ int irq_update_affinity_desc(unsigned int irq,
 
 int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool 
force)
 {
-   struct irq_desc *desc = irq_to_desc(irq);
+   struct irq_desc *desc;
unsigned long flags;
int ret;
 
+   desc = irq_get_desc_buslock(irq, , IRQ_GET_DESC_CHECK_GLOBAL);
if (!desc)
return -EINVAL;
 
-   raw_spin_lock_irqsave(>lock, flags);
ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force);
-   raw_spin_unlock_irqrestore(>lock, flags);
+   irq_put_desc_busunlock(desc, flags);
return ret;
 }
 
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V1 1/2] iommu/vt-d: Fix debugfs register reads

2020-03-09 Thread Megha Dey
Commit 6825d3ea6cde ("iommu/vt-d: Add debugfs support to show register
contents") dumps the register contents for all IOMMU devices.

Currently, a 64 bit read(dmar_readq) is done for all the IOMMU registers,
even though some of the registers are 32 bits, which is incorrect.

Use the correct read function variant (dmar_readl/dmar_readq) while 
reading the contents of 32/64 bit registers respectively.

Signed-off-by: Megha Dey 
---
 drivers/iommu/intel-iommu-debugfs.c | 40 +++--
 include/linux/intel-iommu.h |  2 ++
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/intel-iommu-debugfs.c 
b/drivers/iommu/intel-iommu-debugfs.c
index c1257be..0a77919 100644
--- a/drivers/iommu/intel-iommu-debugfs.c
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -33,38 +33,42 @@ struct iommu_regset {
 
 #define IOMMU_REGSET_ENTRY(_reg_)  \
{ DMAR_##_reg_##_REG, __stringify(_reg_) }
-static const struct iommu_regset iommu_regs[] = {
+
+static const struct iommu_regset iommu_regs_32[] = {
IOMMU_REGSET_ENTRY(VER),
-   IOMMU_REGSET_ENTRY(CAP),
-   IOMMU_REGSET_ENTRY(ECAP),
IOMMU_REGSET_ENTRY(GCMD),
IOMMU_REGSET_ENTRY(GSTS),
-   IOMMU_REGSET_ENTRY(RTADDR),
-   IOMMU_REGSET_ENTRY(CCMD),
IOMMU_REGSET_ENTRY(FSTS),
IOMMU_REGSET_ENTRY(FECTL),
IOMMU_REGSET_ENTRY(FEDATA),
IOMMU_REGSET_ENTRY(FEADDR),
IOMMU_REGSET_ENTRY(FEUADDR),
-   IOMMU_REGSET_ENTRY(AFLOG),
IOMMU_REGSET_ENTRY(PMEN),
IOMMU_REGSET_ENTRY(PLMBASE),
IOMMU_REGSET_ENTRY(PLMLIMIT),
+   IOMMU_REGSET_ENTRY(ICS),
+   IOMMU_REGSET_ENTRY(PRS),
+   IOMMU_REGSET_ENTRY(PECTL),
+   IOMMU_REGSET_ENTRY(PEDATA),
+   IOMMU_REGSET_ENTRY(PEADDR),
+   IOMMU_REGSET_ENTRY(PEUADDR),
+};
+
+static const struct iommu_regset iommu_regs_64[] = {
+   IOMMU_REGSET_ENTRY(CAP),
+   IOMMU_REGSET_ENTRY(ECAP),
+   IOMMU_REGSET_ENTRY(RTADDR),
+   IOMMU_REGSET_ENTRY(CCMD),
+   IOMMU_REGSET_ENTRY(AFLOG),
IOMMU_REGSET_ENTRY(PHMBASE),
IOMMU_REGSET_ENTRY(PHMLIMIT),
IOMMU_REGSET_ENTRY(IQH),
IOMMU_REGSET_ENTRY(IQT),
IOMMU_REGSET_ENTRY(IQA),
-   IOMMU_REGSET_ENTRY(ICS),
IOMMU_REGSET_ENTRY(IRTA),
IOMMU_REGSET_ENTRY(PQH),
IOMMU_REGSET_ENTRY(PQT),
IOMMU_REGSET_ENTRY(PQA),
-   IOMMU_REGSET_ENTRY(PRS),
-   IOMMU_REGSET_ENTRY(PECTL),
-   IOMMU_REGSET_ENTRY(PEDATA),
-   IOMMU_REGSET_ENTRY(PEADDR),
-   IOMMU_REGSET_ENTRY(PEUADDR),
IOMMU_REGSET_ENTRY(MTRRCAP),
IOMMU_REGSET_ENTRY(MTRRDEF),
IOMMU_REGSET_ENTRY(MTRR_FIX64K_0),
@@ -127,10 +131,16 @@ static int iommu_regset_show(struct seq_file *m, void 
*unused)
 * by adding the offset to the pointer (virtual address).
 */
raw_spin_lock_irqsave(>register_lock, flag);
-   for (i = 0 ; i < ARRAY_SIZE(iommu_regs); i++) {
-   value = dmar_readq(iommu->reg + iommu_regs[i].offset);
+   for (i = 0 ; i < ARRAY_SIZE(iommu_regs_32); i++) {
+   value = dmar_readl(iommu->reg + 
iommu_regs_32[i].offset);
+   seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",
+  iommu_regs_32[i].regs, 
iommu_regs_32[i].offset,
+  value);
+   }
+   for (i = 0 ; i < ARRAY_SIZE(iommu_regs_64); i++) {
+   value = dmar_readq(iommu->reg + 
iommu_regs_64[i].offset);
seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",
-  iommu_regs[i].regs, iommu_regs[i].offset,
+  iommu_regs_64[i].regs, 
iommu_regs_64[i].offset,
   value);
}
raw_spin_unlock_irqrestore(>register_lock, flag);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 4a16b39..980234a 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -123,6 +123,8 @@
 
 #define dmar_readq(a) readq(a)
 #define dmar_writeq(a,v) writeq(v,a)
+#define dmar_readl(a) readl(a)
+#define dmar_writel(a, v) writel(v, a)
 
 #define DMAR_VER_MAJOR(v)  (((v) & 0xf0) >> 4)
 #define DMAR_VER_MINOR(v)  ((v) & 0x0f)
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V1 0/2] iommu/vtd: Fixes to the IOMMU debugfs

2020-03-09 Thread Megha Dey
This patchset aims to fix some of the existing issues in the iommu debugfs.

The first patch aims to fix the debugfs register reads by using the correct
read function variant while reading the contents of iommu registers while
the second patch ensures the debugfs directory is populated even if DMA
remapping is disabled.

Megha Dey (2):
  iommu/vt-d: Fix debugfs register reads
  iommu/vt-d: Populate debugfs if IOMMUs are detected

 drivers/iommu/intel-iommu-debugfs.c | 53 ++---
 drivers/iommu/intel-iommu.c |  4 ++-
 include/linux/intel-iommu.h |  2 ++
 3 files changed, 43 insertions(+), 16 deletions(-)

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch V1 2/2] iommu/vt-d: Populate debugfs if IOMMUs are detected

2020-03-09 Thread Megha Dey
Currently, the intel iommu debugfs directory(/sys/kernel/debug/iommu/intel)
gets populated only when DMA remapping is enabled (dmar_disabled = 0)
irrespective of whether interrupt remapping is enabled or not.

Instead, populate the intel iommu debugfs directory if any IOMMUs are
detected.

Signed-off-by: Megha Dey 
---
 drivers/iommu/intel-iommu-debugfs.c | 13 +
 drivers/iommu/intel-iommu.c |  4 +++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu-debugfs.c 
b/drivers/iommu/intel-iommu-debugfs.c
index 0a77919..8d24c4d 100644
--- a/drivers/iommu/intel-iommu-debugfs.c
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -282,9 +282,15 @@ static int dmar_translation_struct_show(struct seq_file 
*m, void *unused)
 {
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
+   u32 sts;
 
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
+   sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
+   if (!(sts & DMA_GSTS_TES)) {
+   seq_puts(m, "DMA Remapping is not enabled\n");
+   return 0;
+   }
root_tbl_walk(m, iommu);
seq_putc(m, '\n');
}
@@ -425,6 +431,7 @@ static int ir_translation_struct_show(struct seq_file *m, 
void *unused)
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
u64 irta;
+   u32 sts;
 
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
@@ -434,6 +441,12 @@ static int ir_translation_struct_show(struct seq_file *m, 
void *unused)
seq_printf(m, "Remapped Interrupt supported on IOMMU: %s\n",
   iommu->name);
 
+   sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
+   if (!(sts & DMA_GSTS_IRES)) {
+   seq_puts(m, "Interrupt Remapping is not enabled\n");
+   return 0;
+   }
+
if (iommu->ir_table) {
irta = virt_to_phys(iommu->ir_table->base);
seq_printf(m, " IR table address:%llx\n", irta);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 9dc3767..0bf0ba7 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5123,6 +5123,9 @@ int __init intel_iommu_init(void)
 
down_write(_global_lock);
 
+   if (!no_iommu)
+   intel_iommu_debugfs_init();
+
if (no_iommu || dmar_disabled) {
/*
 * We exit the function here to ensure IOMMU's remapping and
@@ -5215,7 +5218,6 @@ int __init intel_iommu_init(void)
pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
 
intel_iommu_enabled = 1;
-   intel_iommu_debugfs_init();
 
return 0;
 
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu