[PATCH V2 1/3] iommu/vt-d: Modify the format of intel DMAR tables dump

2019-05-10 Thread Sai Praneeth Prakhya
Presently, "/sys/kernel/debug/iommu/intel/dmar_translation_struct" file
dumps DMAR tables in the below format

IOMMU dmar2: Root Table Address:4362cc000
Root Table Entries:
 Bus: 0 H: 0 L: 4362f0001
 Context Table Entries for Bus: 0
  Entry B:D.F   HighLow
  160   00:14.0 102 4362ef001
  184   00:17.0 302 435ec4001
  248   00:1f.0 202 43631

This format has few short comings like
1. When extended for dumping scalable mode DMAR table it will quickly be
   very clumsy, making it unreadable.
2. It has information like the Bus number and Entry which are basically
   part of B:D.F, hence are a repetition and are not so useful.

So, change it to a new format which could be easily extended to dump
scalable mode DMAR table. The new format looks as below:

IOMMU dmar2: Root Table Address: 0x436f7d000
B.D.F   Root_entry  Context_entry
00:14.0 0x:0x000436fbd001   
0x0102:0x000436fbc001
00:17.0 0x:0x000436fbd001   
0x0302:0x000436af4001
00:1f.0 0x:0x000436fbd001   
0x0202:0x000436fcd001

Cc: Joerg Roedel 
Cc: Ashok Raj 
Cc: Lu Baolu 
Cc: Sohil Mehta 
Cc: David Woodhouse 
Cc: Jacob Pan 
Cc: Andy Shevchenko 
Reviewed-by: Lu Baolu 
Reviewed-by: Andy Shevchenko 
Signed-off-by: Sai Praneeth Prakhya 
---
 drivers/iommu/intel-iommu-debugfs.c | 65 +++--
 1 file changed, 41 insertions(+), 24 deletions(-)

diff --git a/drivers/iommu/intel-iommu-debugfs.c 
b/drivers/iommu/intel-iommu-debugfs.c
index 7fabf9b1c2dc..3f5399b5e6c0 100644
--- a/drivers/iommu/intel-iommu-debugfs.c
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -14,6 +14,13 @@
 
 #include 
 
+struct tbl_walk {
+   u16 bus;
+   u16 devfn;
+   struct root_entry *rt_entry;
+   struct context_entry *ctx_entry;
+};
+
 struct iommu_regset {
int offset;
const char *regs;
@@ -131,16 +138,25 @@ static int iommu_regset_show(struct seq_file *m, void 
*unused)
 }
 DEFINE_SHOW_ATTRIBUTE(iommu_regset);
 
-static void ctx_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu,
-  int bus)
+static inline void print_tbl_walk(struct seq_file *m)
 {
-   struct context_entry *context;
-   int devfn;
+   struct tbl_walk *tbl_wlk = m->private;
 
-   seq_printf(m, " Context Table Entries for Bus: %d\n", bus);
-   seq_puts(m, "  Entry\tB:D.F\tHigh\tLow\n");
+   seq_printf(m, 
"%02x:%02x.%x\t0x%016llx:0x%016llx\t0x%016llx:0x%016llx\n",
+  tbl_wlk->bus, PCI_SLOT(tbl_wlk->devfn),
+  PCI_FUNC(tbl_wlk->devfn), tbl_wlk->rt_entry->hi,
+  tbl_wlk->rt_entry->lo, tbl_wlk->ctx_entry->hi,
+  tbl_wlk->ctx_entry->lo);
+}
+
+static void ctx_tbl_walk(struct seq_file *m, struct intel_iommu *iommu, u16 
bus)
+{
+   struct context_entry *context;
+   u16 devfn;
 
for (devfn = 0; devfn < 256; devfn++) {
+   struct tbl_walk tbl_wlk = {0};
+
context = iommu_context_addr(iommu, bus, devfn, 0);
if (!context)
return;
@@ -148,33 +164,34 @@ static void ctx_tbl_entry_show(struct seq_file *m, struct 
intel_iommu *iommu,
if (!context_present(context))
continue;
 
-   seq_printf(m, "  %-5d\t%02x:%02x.%x\t%-6llx\t%llx\n", devfn,
-  bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
-  context[0].hi, context[0].lo);
+   tbl_wlk.bus = bus;
+   tbl_wlk.devfn = devfn;
+   tbl_wlk.rt_entry = >root_entry[bus];
+   tbl_wlk.ctx_entry = context;
+   m->private = _wlk;
+
+   print_tbl_walk(m);
}
 }
 
-static void root_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu)
+static void root_tbl_walk(struct seq_file *m, struct intel_iommu *iommu)
 {
unsigned long flags;
-   int bus;
+   u16 bus;
 
spin_lock_irqsave(>lock, flags);
-   seq_printf(m, "IOMMU %s: Root Table Address:%llx\n", iommu->name,
+   seq_printf(m, "IOMMU %s: Root Table Address: 0x%llx\n", iommu->name,
   (u64)virt_to_phys(iommu->root_entry));
-   seq_puts(m, "Root Table Entries:\n");
-
-   for (bus = 0; bus < 256; bus++) {
-   if (!(iommu->root_entry[bus].lo & 1))
-   continue;
+   seq_puts(m, "B.D.F\tRoot_entry\t\t\t\tContext_entry\n");
 
-   seq_printf(m, " Bus: %d H: %llx L: %llx\n", bus,
-  iommu->root_entry[bus].hi,
-  iommu->root_entry[bus].lo);
+   /*
+* No need to check if the root entry is present or not because
+* iommu_context_addr() performs the same check before returning
+* context entry.
+*/
+   for (bus = 0; bus < 256; bus++)
+   ctx_tbl_walk(m, iommu, bus);
 
-   

[PATCH V2 3/3] iommu/vt-d: Add debugfs support to show scalable mode DMAR table internals

2019-05-10 Thread Sai Praneeth Prakhya
A DMAR table walk would typically follow the below process.
1. Bus number is used to index into root table which points to a context
   table.
2. Device number and Function number are used together to index into
   context table which then points to a pasid directory.
3. PASID[19:6] is used to index into PASID directory which points to a
   PASID table.
4. PASID[5:0] is used to index into PASID table which points to all levels
   of page tables.

Whenever a user opens the file
"/sys/kernel/debug/iommu/intel/dmar_translation_struct", the above
described DMAR table walk is performed and the contents of the table are
dumped into the file. The dump could be handy while dealing with devices
that use PASID.

Example of such dump:
cat /sys/kernel/debug/iommu/intel/dmar_translation_struct

(Please note that because of 80 char limit, entries that should have been
in the same line are broken into different lines)

IOMMU dmar0: Root Table Address: 0x436f7c000
B.D.F   Root_entry  Context_entry
PASID   PASID_table_entry
00:0a.0 0x:0x00044dd3f001   
0x0010:0x000435460e1d
0   0x00044d6e1089:0x0003:0x0001
00:0a.0 0x:0x00044dd3f001   
0x0010:0x000435460e1d
1   0x0049:0x0001:0x03c0e001

Note that the above format is followed even for legacy DMAR table dump
which doesn't support PASID and hence in such cases PASID is defaulted to
-1 indicating that PASID and it's related fields are invalid.

Cc: Joerg Roedel 
Cc: Ashok Raj 
Cc: Lu Baolu 
Cc: Sohil Mehta 
Cc: David Woodhouse 
Cc: Jacob Pan 
Cc: Andy Shevchenko 
Reviewed-by: Lu Baolu 
Reviewed-by: Andy Shevchenko 
Signed-off-by: Sai Praneeth Prakhya 
---
 drivers/iommu/intel-iommu-debugfs.c | 80 +++--
 1 file changed, 76 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/intel-iommu-debugfs.c 
b/drivers/iommu/intel-iommu-debugfs.c
index 3f5399b5e6c0..f1b0f82373bb 100644
--- a/drivers/iommu/intel-iommu-debugfs.c
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -14,11 +14,15 @@
 
 #include 
 
+#include "intel-pasid.h"
+
 struct tbl_walk {
u16 bus;
u16 devfn;
+   u32 pasid;
struct root_entry *rt_entry;
struct context_entry *ctx_entry;
+   struct pasid_entry *pasid_tbl_entry;
 };
 
 struct iommu_regset {
@@ -142,21 +146,82 @@ static inline void print_tbl_walk(struct seq_file *m)
 {
struct tbl_walk *tbl_wlk = m->private;
 
-   seq_printf(m, 
"%02x:%02x.%x\t0x%016llx:0x%016llx\t0x%016llx:0x%016llx\n",
+   seq_printf(m, 
"%02x:%02x.%x\t0x%016llx:0x%016llx\t0x%016llx:0x%016llx\t",
   tbl_wlk->bus, PCI_SLOT(tbl_wlk->devfn),
   PCI_FUNC(tbl_wlk->devfn), tbl_wlk->rt_entry->hi,
   tbl_wlk->rt_entry->lo, tbl_wlk->ctx_entry->hi,
   tbl_wlk->ctx_entry->lo);
+
+   /*
+* A legacy mode DMAR doesn't support PASID, hence default it to -1
+* indicating that it's invalid. Also, default all PASID related fields
+* to 0.
+*/
+   if (!tbl_wlk->pasid_tbl_entry)
+   seq_printf(m, "%-6d\t0x%016llx:0x%016llx:0x%016llx\n", -1,
+  (u64)0, (u64)0, (u64)0);
+   else
+   seq_printf(m, "%-6d\t0x%016llx:0x%016llx:0x%016llx\n",
+  tbl_wlk->pasid, tbl_wlk->pasid_tbl_entry->val[0],
+  tbl_wlk->pasid_tbl_entry->val[1],
+  tbl_wlk->pasid_tbl_entry->val[2]);
+}
+
+static void pasid_tbl_walk(struct seq_file *m, struct pasid_entry *tbl_entry,
+  u16 dir_idx)
+{
+   struct tbl_walk *tbl_wlk = m->private;
+   u8 tbl_idx;
+
+   for (tbl_idx = 0; tbl_idx < PASID_TBL_ENTRIES; tbl_idx++) {
+   if (pasid_pte_is_present(tbl_entry)) {
+   tbl_wlk->pasid_tbl_entry = tbl_entry;
+   tbl_wlk->pasid = (dir_idx << PASID_PDE_SHIFT) + tbl_idx;
+   print_tbl_walk(m);
+   }
+
+   tbl_entry++;
+   }
+}
+
+static void pasid_dir_walk(struct seq_file *m, u64 pasid_dir_ptr,
+  u16 pasid_dir_size)
+{
+   struct pasid_dir_entry *dir_entry = phys_to_virt(pasid_dir_ptr);
+   struct pasid_entry *pasid_tbl;
+   u16 dir_idx;
+
+   for (dir_idx = 0; dir_idx < pasid_dir_size; dir_idx++) {
+   pasid_tbl = get_pasid_table_from_pde(dir_entry);
+   if (pasid_tbl)
+   pasid_tbl_walk(m, pasid_tbl, dir_idx);
+
+   dir_entry++;
+   }
 }
 
 static void ctx_tbl_walk(struct seq_file *m, struct intel_iommu *iommu, u16 
bus)
 {
struct context_entry *context;
-   u16 devfn;
+   u16 devfn, pasid_dir_size;
+   u64 pasid_dir_ptr;
 
for (devfn = 0; devfn < 256; devfn++) {
struct tbl_walk tbl_wlk = {0};
 
+

[PATCH V2 2/3] iommu/vt-d: Introduce macros useful for dumping DMAR table

2019-05-10 Thread Sai Praneeth Prakhya
A scalable mode DMAR table walk would involve looking at bits in each stage
of walk, like,
1. Is PASID enabled in the context entry?
2. What's the size of PASID directory?
3. Is the PASID directory entry present?
4. Is the PASID table entry present?
5. Number of PASID table entries?

Hence, add these macros that will later be used during this walk.
Apart from adding new macros, move existing macros (like
pasid_pde_is_present() and get_pasid_table_from_pde()) from pasid.c file
to pasid.h header file so that they could be reused.

Cc: Joerg Roedel 
Cc: Ashok Raj 
Cc: Lu Baolu 
Cc: Sohil Mehta 
Cc: David Woodhouse 
Cc: Jacob Pan 
Cc: Andy Shevchenko 
Reviewed-by: Lu Baolu 
Reviewed-by: Andy Shevchenko 
Signed-off-by: Sai Praneeth Prakhya 
---
 drivers/iommu/intel-pasid.c | 17 -
 drivers/iommu/intel-pasid.h | 26 ++
 2 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index 03b12d2ee213..0be00ff53d25 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -167,23 +167,6 @@ int intel_pasid_alloc_table(struct device *dev)
return 0;
 }
 
-/* Get PRESENT bit of a PASID directory entry. */
-static inline bool
-pasid_pde_is_present(struct pasid_dir_entry *pde)
-{
-   return READ_ONCE(pde->val) & PASID_PTE_PRESENT;
-}
-
-/* Get PASID table from a PASID directory entry. */
-static inline struct pasid_entry *
-get_pasid_table_from_pde(struct pasid_dir_entry *pde)
-{
-   if (!pasid_pde_is_present(pde))
-   return NULL;
-
-   return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK);
-}
-
 void intel_pasid_free_table(struct device *dev)
 {
struct device_domain_info *info;
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index 23537b3f34e3..fc8cd8f17de1 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -18,6 +18,10 @@
 #define PDE_PFN_MASK   PAGE_MASK
 #define PASID_PDE_SHIFT6
 #define MAX_NR_PASID_BITS  20
+#define PASID_TBL_ENTRIES  BIT(PASID_PDE_SHIFT)
+
+#define is_pasid_enabled(entry)(((entry)->lo >> 3) & 0x1)
+#define get_pasid_dir_size(entry)  (1 << entry)->lo >> 9) & 0x7) + 7))
 
 /*
  * Domain ID reserved for pasid entries programmed for first-level
@@ -49,6 +53,28 @@ struct pasid_table {
struct list_headdev;/* device list */
 };
 
+/* Get PRESENT bit of a PASID directory entry. */
+static inline bool pasid_pde_is_present(struct pasid_dir_entry *pde)
+{
+   return READ_ONCE(pde->val) & PASID_PTE_PRESENT;
+}
+
+/* Get PASID table from a PASID directory entry. */
+static inline struct pasid_entry *
+get_pasid_table_from_pde(struct pasid_dir_entry *pde)
+{
+   if (!pasid_pde_is_present(pde))
+   return NULL;
+
+   return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK);
+}
+
+/* Get PRESENT bit of a PASID table entry. */
+static inline bool pasid_pte_is_present(struct pasid_entry *pte)
+{
+   return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT;
+}
+
 extern u32 intel_pasid_max_id;
 int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp);
 void intel_pasid_free_id(int pasid);
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH V2 0/3] Add debugfs support to show scalable mode DMAR table

2019-05-10 Thread Sai Praneeth Prakhya
Presently, "/sys/kernel/debug/iommu/intel/dmar_translation_struct" file dumps
only legacy DMAR table which consists of root table and context table. Scalable
mode DMAR table adds PASID directory and PASID table. Hence, add support to dump
these tables as well.

Directly extending the present dumping format for PASID tables will make the
output look clumsy. Hence, the first patch modifies the present format to a
tabular format. The second patch introduces macros that are used during PASID
table walk and the third patch actually adds support to dump scalable mode DMAR
table.

Changes from V1 to V2:
--
1. Make my name consistent in "From" and "Signed-off-by"
2. Fix comment regarding scalable mode context entries
3. Add reviewed by tags

Sai Praneeth Prakhya (3):
  iommu/vt-d: Modify the format of intel DMAR tables dump
  iommu/vt-d: Introduce macros useful for dumping DMAR table
  iommu/vt-d: Add debugfs support to show scalable mode DMAR table
internals

 drivers/iommu/intel-iommu-debugfs.c | 137 +---
 drivers/iommu/intel-pasid.c |  17 -
 drivers/iommu/intel-pasid.h |  26 +++
 3 files changed, 139 insertions(+), 41 deletions(-)

Cc: Joerg Roedel 
Cc: Ashok Raj 
Cc: Lu Baolu 
Cc: Sohil Mehta 
Cc: David Woodhouse 
Cc: Jacob Pan 
Cc: Andy Shevchenko 
Reviewed-by: Lu Baolu 
Reviewed-by: Andy Shevchenko 
Signed-off-by: Sai Praneeth Prakhya 

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC] iommu: arm-smmu: stall support

2019-05-10 Thread Rob Clark
On Fri, Sep 22, 2017 at 2:58 AM Jean-Philippe Brucker
 wrote:
>
> On 22/09/17 10:02, Joerg Roedel wrote:
> > On Tue, Sep 19, 2017 at 10:23:43AM -0400, Rob Clark wrote:
> >> I would like to decide in the IRQ whether or not to queue work or not,
> >> because when we get a gpu fault, we tend to get 1000's of gpu faults
> >> all at once (and I really only need to handle the first one).  I
> >> suppose that could also be achieved by having a special return value
> >> from the fault handler to say "call me again from a wq"..
> >>
> >> Note that in the drm driver I already have a suitable wq to queue the
> >> work, so it really doesn't buy me anything to have the iommu driver
> >> toss things off to a wq for me.  Might be a different situation for
> >> other drivers (but I guess mostly other drivers are using iommu API
> >> indirectly via dma-mapping?)
> >
> > Okay, so since you are the only user for now, we don't need a
> > work-queue. But I still want the ->resume call-back to be hidden in the
> > iommu code and not be exposed to users.
> >
> > We already have per-domain fault-handlers, so the best solution for now
> > is to call ->resume from report_iommu_fault() when the fault-handler
> > returns a special value.
>
> The problem is that report_iommu_fault is called from IRQ context by the
> SMMU driver, so the device driver callback cannot sleep.
>
> So if the device driver needs to be able to sleep between fault report and
> resume, as I understand Rob needs for writing debugfs, we can either:
>
> * call report_iommu_fault from higher up, in a thread or workqueue.
> * split the fault reporting as this patch proposes. The exact same
>   mechanism is needed for the vSVM work by Intel: in order to inject fault
>   into the guest, they would like to have an atomic notifier registered by
>   VFIO for passing down the Page Request, and a new function in the IOMMU
>   API to resume/complete the fault.
>

So I was thinking about this topic again.. I would still like to get
some sort of async resume so that I can wire up GPU cmdstream/state
logging on iommu fault (without locally resurrecting and rebasing this
patch and drm/msm side changes each time I need to debug iommu
faults)..

And I do still prefer the fault cb in irq (or not requiring it in
wq)..  but on thinking about it, the two ideas aren't entirely
conflicting, ie. add some flags either when we register handler[1], or
they could be handled thru domain_set_attr, like:

 _EXPLICIT_RESUME - iommu API user calls iommu_domain_resume(),
potentialy from wq/thread after fault handler returns
 _HANDLER_SLEEPS  - iommu core handles the wq, and calls ops->resume()
internally

In both cases, from the iommu driver PoV it just implements
iommu_ops::resume().. in first case it is called via iommu user either
from the fault handler or at some point later (ie. wq or thread).

I don't particularly need the _HANDLER_SLEEPS case (unless I can't
convince anyone that iommu_domamin_resume() called from outside iommu
core is a good idea).. so probably I wouldn't wire up the wq plumbing
for the _HANDLER_SLEEPS case unless someone really wanted me to.

Since there are more iommu drivers, than places that register fault
handlers, I like the idea that in either case, from the driver PoV, it
is just implementing the resume callback.

[1] currently I only see a few places where fault handlers are
registered, so changing iommu_set_fault_handler() is really not much
churn

BR,
-R
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/intel: fix variable 'iommu' set but not used

2019-05-10 Thread Qian Cai
The commit cf04eee8bf0e ("iommu/vt-d: Include ACPI devices in iommu=pt")
added for_each_active_iommu() in iommu_prepare_static_identity_mapping()
but never used the each element, i.e, "drhd->iommu".

drivers/iommu/intel-iommu.c: In function
'iommu_prepare_static_identity_mapping':
drivers/iommu/intel-iommu.c:3037:22: warning: variable 'iommu' set but not
used [-Wunused-but-set-variable]
  struct intel_iommu *iommu;

Fixed the warning by passing "drhd->iommu" directly to
for_each_active_iommu() which all subsequent self-assignments should be
ignored by a compiler anyway.

Signed-off-by: Qian Cai 
---
 drivers/iommu/intel-iommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a209199f3af6..86e1ddcb4a8e 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3034,7 +3034,6 @@ static int __init 
iommu_prepare_static_identity_mapping(int hw)
 {
struct pci_dev *pdev = NULL;
struct dmar_drhd_unit *drhd;
-   struct intel_iommu *iommu;
struct device *dev;
int i;
int ret = 0;
@@ -3045,7 +3044,7 @@ static int __init 
iommu_prepare_static_identity_mapping(int hw)
return ret;
}
 
-   for_each_active_iommu(iommu, drhd)
+   for_each_active_iommu(drhd->iommu, drhd)
for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, 
dev) {
struct acpi_device_physical_node *pn;
struct acpi_device *adev;
-- 
1.8.3.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/3] Add debugfs support to show scalable mode DMAR table

2019-05-10 Thread Andy Shevchenko
On Thu, May 09, 2019 at 11:41:42AM -0700, Sai Praneeth Prakhya wrote:
> From: Sai Praneeth 
> 
> Presently, "/sys/kernel/debug/iommu/intel/dmar_translation_struct" file dumps
> only legacy DMAR table which consists of root table and context table. 
> Scalable
> mode DMAR table adds PASID directory and PASID table. Hence, add support to 
> dump
> these tables as well.
> 
> Directly extending the present dumping format for PASID tables will make the
> output look clumsy. Hence, the first patch modifies the present format to a
> tabular format. The second patch introduces macros that are used during PASID
> table walk and the third patch actually adds support to dump scalable mode 
> DMAR
> table.

FWIW,
Reviewed-by: Andy Shevchenko 

Since it's debugfs the format is not carved in stone.

> 
> Sai Praneeth (3):
>   iommu/vt-d: Modify the format of intel DMAR tables dump
>   iommu/vt-d: Introduce macros useful for dumping DMAR table
>   iommu/vt-d: Add debugfs support to show scalable mode DMAR table
> internals
> 
>  drivers/iommu/intel-iommu-debugfs.c | 132 
> +---
>  drivers/iommu/intel-pasid.c |  17 -
>  drivers/iommu/intel-pasid.h |  26 +++
>  3 files changed, 134 insertions(+), 41 deletions(-)
> 
> Cc: Joerg Roedel 
> Cc: Ashok Raj 
> Cc: Lu Baolu 
> Cc: Sohil Mehta 
> Cc: David Woodhouse 
> Cc: Jacob Pan 
> Cc: Andy Shevchenko 
> Signed-off-by: Sai Praneeth Prakhya 
> 
> -- 
> 2.7.4
> 

-- 
With Best Regards,
Andy Shevchenko


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/4] s390: pci: Exporting access to CLP PCI function and PCI group

2019-05-10 Thread Pierre Morel

On 10/05/2019 12:21, Robin Murphy wrote:

On 10/05/2019 09:22, Pierre Morel wrote:

For the generic implementation of VFIO PCI we need to retrieve
the hardware configuration for the PCI functions and the
PCI function groups.

We modify the internal function using CLP Query PCI function and
CLP query PCI function group so that they can be called from
outside the S390 architecture PCI code and prefix the two
functions with "zdev" to make clear that they can be called
knowing only the associated zdevice.

Signed-off-by: Pierre Morel 
Reviewed-by: Sebastian Ott 
---
  arch/s390/include/asm/pci.h |  3 ++
  arch/s390/pci/pci_clp.c | 72 
-

  2 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 305befd..e66b246 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -261,4 +261,7 @@ cpumask_of_pcibus(const struct pci_bus *bus)
  #endif /* CONFIG_NUMA */
+int zdev_query_pci_fngrp(struct zpci_dev *zdev,
+ struct clp_req_rsp_query_pci_grp *rrb);
+int zdev_query_pci_fn(struct zpci_dev *zdev, struct 
clp_req_rsp_query_pci *rrb);

  #endif
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 3a36b07..4ae5d77 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -113,32 +113,18 @@ static void clp_store_query_pci_fngrp(struct 
zpci_dev *zdev,

  }
  }
-static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
+int zdev_query_pci_fngrp(struct zpci_dev *zdev,
+ struct clp_req_rsp_query_pci_grp *rrb)
  {
-    struct clp_req_rsp_query_pci_grp *rrb;
-    int rc;
-
-    rrb = clp_alloc_block(GFP_KERNEL);
-    if (!rrb)
-    return -ENOMEM;
-
  memset(rrb, 0, sizeof(*rrb));
  rrb->request.hdr.len = sizeof(rrb->request);
  rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP;
  rrb->response.hdr.len = sizeof(rrb->response);
-    rrb->request.pfgid = pfgid;
+    rrb->request.pfgid = zdev->pfgid;
-    rc = clp_req(rrb, CLP_LPS_PCI);
-    if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
-    clp_store_query_pci_fngrp(zdev, >response);
-    else {
-    zpci_err("Q PCI FGRP:\n");
-    zpci_err_clp(rrb->response.hdr.rsp, rc);
-    rc = -EIO;
-    }
-    clp_free_block(rrb);
-    return rc;
+    return clp_req(rrb, CLP_LPS_PCI);
  }
+EXPORT_SYMBOL(zdev_query_pci_fngrp);


AFAICS it's only the IOMMU driver itself which needs to call these. That 
can't be built as a module, so you shouldn't need explicit exports - the 
header declaration is enough.


Robin.


This is right and seeing the pointer type only zPCI and s390iommu can 
make use of it.

If nobody has another point of view I will remove the export on the
next iteration.

Thanks,
Pierre




  static int clp_store_query_pci_fn(struct zpci_dev *zdev,
    struct clp_rsp_query_pci *response)
@@ -174,32 +160,50 @@ static int clp_store_query_pci_fn(struct 
zpci_dev *zdev,

  return 0;
  }
-static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
+int zdev_query_pci_fn(struct zpci_dev *zdev, struct 
clp_req_rsp_query_pci *rrb)

+{
+
+    memset(rrb, 0, sizeof(*rrb));
+    rrb->request.hdr.len = sizeof(rrb->request);
+    rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
+    rrb->response.hdr.len = sizeof(rrb->response);
+    rrb->request.fh = zdev->fh;
+
+    return clp_req(rrb, CLP_LPS_PCI);
+}
+EXPORT_SYMBOL(zdev_query_pci_fn);
+
+static int clp_query_pci(struct zpci_dev *zdev)
  {
  struct clp_req_rsp_query_pci *rrb;
+    struct clp_req_rsp_query_pci_grp *grrb;
  int rc;
  rrb = clp_alloc_block(GFP_KERNEL);
  if (!rrb)
  return -ENOMEM;
-    memset(rrb, 0, sizeof(*rrb));
-    rrb->request.hdr.len = sizeof(rrb->request);
-    rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
-    rrb->response.hdr.len = sizeof(rrb->response);
-    rrb->request.fh = fh;
-
-    rc = clp_req(rrb, CLP_LPS_PCI);
-    if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
-    rc = clp_store_query_pci_fn(zdev, >response);
-    if (rc)
-    goto out;
-    rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
-    } else {
+    rc = zdev_query_pci_fn(zdev, rrb);
+    if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
  zpci_err("Q PCI FN:\n");
  zpci_err_clp(rrb->response.hdr.rsp, rc);
  rc = -EIO;
+    goto out;
  }
+    rc = clp_store_query_pci_fn(zdev, >response);
+    if (rc)
+    goto out;
+
+    grrb = (struct clp_req_rsp_query_pci_grp *)rrb;
+    rc = zdev_query_pci_fngrp(zdev, grrb);
+    if (rc || grrb->response.hdr.rsp != CLP_RC_OK) {
+    zpci_err("Q PCI FGRP:\n");
+    zpci_err_clp(grrb->response.hdr.rsp, rc);
+    rc = -EIO;
+    goto out;
+    }
+    clp_store_query_pci_fngrp(zdev, >response);
+
  out:
  clp_free_block(rrb);
  return rc;
@@ -219,7 +223,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int 
configured)

  zdev->fid = fid;
  /* Query function 

Re: [PATCH v7 13/23] iommu/smmuv3: Implement attach/detach_pasid_table

2019-05-10 Thread Auger Eric
Hi Robin,

On 5/8/19 4:38 PM, Robin Murphy wrote:
> On 08/04/2019 13:19, Eric Auger wrote:
>> On attach_pasid_table() we program STE S1 related info set
>> by the guest into the actual physical STEs. At minimum
>> we need to program the context descriptor GPA and compute
>> whether the stage1 is translated/bypassed or aborted.
>>
>> Signed-off-by: Eric Auger 
>>
>> ---
>> v6 -> v7:
>> - check versions and comment the fact we don't need to take
>>    into account s1dss and s1fmt
>> v3 -> v4:
>> - adapt to changes in iommu_pasid_table_config
>> - different programming convention at s1_cfg/s2_cfg/ste.abort
>>
>> v2 -> v3:
>> - callback now is named set_pasid_table and struct fields
>>    are laid out differently.
>>
>> v1 -> v2:
>> - invalidate the STE before changing them
>> - hold init_mutex
>> - handle new fields
>> ---
>>   drivers/iommu/arm-smmu-v3.c | 121 
>>   1 file changed, 121 insertions(+)
>>
>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>> index e22e944ffc05..1486baf53425 100644
>> --- a/drivers/iommu/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm-smmu-v3.c
>> @@ -2207,6 +2207,125 @@ static void arm_smmu_put_resv_regions(struct
>> device *dev,
>>   kfree(entry);
>>   }
>>   +static int arm_smmu_attach_pasid_table(struct iommu_domain *domain,
>> +   struct iommu_pasid_table_config *cfg)
>> +{
>> +    struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>> +    struct arm_smmu_master_data *entry;
>> +    struct arm_smmu_s1_cfg *s1_cfg;
>> +    struct arm_smmu_device *smmu;
>> +    unsigned long flags;
>> +    int ret = -EINVAL;
>> +
>> +    if (cfg->format != IOMMU_PASID_FORMAT_SMMUV3)
>> +    return -EINVAL;
>> +
>> +    if (cfg->version != PASID_TABLE_CFG_VERSION_1 ||
>> +    cfg->smmuv3.version != PASID_TABLE_SMMUV3_CFG_VERSION_1)
>> +    return -EINVAL;
>> +
>> +    mutex_lock(_domain->init_mutex);
>> +
>> +    smmu = smmu_domain->smmu;
>> +
>> +    if (!smmu)
>> +    goto out;
>> +
>> +    if (!((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
>> +  (smmu->features & ARM_SMMU_FEAT_TRANS_S2))) {
>> +    dev_info(smmu_domain->smmu->dev,
>> + "does not implement two stages\n");
>> +    goto out;
>> +    }
> 
> That check is redundant (and frankly looks a little bit spammy). If the
> one below is not enough, there is a problem elsewhere - if it's possible
> for smmu_domain->stage to ever get set to ARM_SMMU_DOMAIN_NESTED without
> both stages of translation present, we've already gone fundamentally wrong.

Makes sense. Moved that check to arm_smmu_domain_finalise() instead and
remove redundant ones.
> 
>> +
>> +    if (smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
>> +    goto out;
>> +
>> +    switch (cfg->config) {
>> +    case IOMMU_PASID_CONFIG_ABORT:
>> +    spin_lock_irqsave(_domain->devices_lock, flags);
>> +    list_for_each_entry(entry, _domain->devices, list) {
>> +    entry->ste.s1_cfg = NULL;
>> +    entry->ste.abort = true;
>> +    arm_smmu_install_ste_for_dev(entry->dev->iommu_fwspec);
>> +    }
>> +    spin_unlock_irqrestore(_domain->devices_lock, flags);
>> +    ret = 0;
>> +    break;
>> +    case IOMMU_PASID_CONFIG_BYPASS:
>> +    spin_lock_irqsave(_domain->devices_lock, flags);
>> +    list_for_each_entry(entry, _domain->devices, list) {
>> +    entry->ste.s1_cfg = NULL;
>> +    entry->ste.abort = false;
>> +    arm_smmu_install_ste_for_dev(entry->dev->iommu_fwspec);
>> +    }
>> +    spin_unlock_irqrestore(_domain->devices_lock, flags);
>> +    ret = 0;
>> +    break;
>> +    case IOMMU_PASID_CONFIG_TRANSLATE:
>> +    /*
>> + * we currently support a single CD so s1fmt and s1dss
>> + * fields are also ignored
>> + */
>> +    if (cfg->pasid_bits)
>> +    goto out;
>> +
>> +    s1_cfg = _domain->s1_cfg;
>> +    s1_cfg->cdptr_dma = cfg->base_ptr;
>> +
>> +    spin_lock_irqsave(_domain->devices_lock, flags);
>> +    list_for_each_entry(entry, _domain->devices, list) {
>> +    entry->ste.s1_cfg = s1_cfg;
> 
> Either we reject valid->valid transitions outright, or we need to remove
> and invalidate the existing S1 context from the STE at this point, no?
I agree. I added this in arm_smmu_write_strtab_ent().

> 
>> +    entry->ste.abort = false;
>> +    arm_smmu_install_ste_for_dev(entry->dev->iommu_fwspec);
>> +    }
>> +    spin_unlock_irqrestore(_domain->devices_lock, flags);
>> +    ret = 0;
>> +    break;
>> +    default:
>> +    break;
>> +    }
>> +out:
>> +    mutex_unlock(_domain->init_mutex);
>> +    return ret;
>> +}
>> +
>> +static void arm_smmu_detach_pasid_table(struct iommu_domain *domain)
>> +{
>> +    struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>> +    struct arm_smmu_master_data *entry;
>> +    struct arm_smmu_device 

Re: [PATCH v7 06/23] iommu: Introduce bind/unbind_guest_msi

2019-05-10 Thread Auger Eric
Hi Robin,
On 5/8/19 3:59 PM, Robin Murphy wrote:
> On 08/04/2019 13:18, Eric Auger wrote:
>> On ARM, MSI are translated by the SMMU. An IOVA is allocated
>> for each MSI doorbell. If both the host and the guest are exposed
>> with SMMUs, we end up with 2 different IOVAs allocated by each.
>> guest allocates an IOVA (gIOVA) to map onto the guest MSI
>> doorbell (gDB). The Host allocates another IOVA (hIOVA) to map
>> onto the physical doorbell (hDB).
>>
>> So we end up with 2 untied mappings:
>>   S1    S2
>> gIOVA    ->    gDB
>>    hIOVA    ->    hDB
>>
>> Currently the PCI device is programmed by the host with hIOVA
>> as MSI doorbell. So this does not work.
>>
>> This patch introduces an API to pass gIOVA/gDB to the host so
>> that gIOVA can be reused by the host instead of re-allocating
>> a new IOVA. So the goal is to create the following nested mapping:
>>
>>   S1    S2
>> gIOVA    ->    gDB ->    hDB
>>
>> and program the PCI device with gIOVA MSI doorbell.
>>
>> In case we have several devices attached to this nested domain
>> (devices belonging to the same group), they cannot be isolated
>> on guest side either. So they should also end up in the same domain
>> on guest side. We will enforce that all the devices attached to
>> the host iommu domain use the same physical doorbell and similarly
>> a single virtual doorbell mapping gets registered (1 single
>> virtual doorbell is used on guest as well).
>>
>> Signed-off-by: Eric Auger 
>>
>> ---
>> v6 -> v7:
>> - remove the device handle parameter.
>> - Add comments saying there can only be a single MSI binding
>>    registered per iommu_domain
>> v5 -> v6:
>> -fix compile issue when IOMMU_API is not set
>>
>> v3 -> v4:
>> - add unbind
>>
>> v2 -> v3:
>> - add a struct device handle
>> ---
>>   drivers/iommu/iommu.c | 37 +
>>   include/linux/iommu.h | 23 +++
>>   2 files changed, 60 insertions(+)
>>
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index 6d6cb4005ca5..0d160bbd6f81 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -1575,6 +1575,43 @@ static void __iommu_detach_device(struct
>> iommu_domain *domain,
>>   trace_detach_device_from_domain(dev);
>>   }
>>   +/**
>> + * iommu_bind_guest_msi - Passes the stage1 GIOVA/GPA mapping of a
>> + * virtual doorbell
>> + *
>> + * @domain: iommu domain the stage 1 mapping will be attached to
>> + * @iova: iova allocated by the guest
>> + * @gpa: guest physical address of the virtual doorbell
>> + * @size: granule size used for the mapping
>> + *
>> + * The associated IOVA can be reused by the host to create a nested
>> + * stage2 binding mapping translating into the physical doorbell used
>> + * by the devices attached to the domain.
>> + *
>> + * All devices within the domain must share the same physical doorbell.
>> + * A single MSI GIOVA/GPA mapping can be attached to an iommu_domain.
>> + */
>> +
>> +int iommu_bind_guest_msi(struct iommu_domain *domain,
>> + dma_addr_t giova, phys_addr_t gpa, size_t size)
>> +{
>> +    if (unlikely(!domain->ops->bind_guest_msi))
>> +    return -ENODEV;
>> +
>> +    return domain->ops->bind_guest_msi(domain, giova, gpa, size);
>> +}
>> +EXPORT_SYMBOL_GPL(iommu_bind_guest_msi);
>> +
>> +void iommu_unbind_guest_msi(struct iommu_domain *domain,
>> +    dma_addr_t iova)
>> +{
>> +    if (unlikely(!domain->ops->unbind_guest_msi))
>> +    return;
>> +
>> +    domain->ops->unbind_guest_msi(domain, iova);
>> +}
>> +EXPORT_SYMBOL_GPL(iommu_unbind_guest_msi);
>> +
>>   void iommu_detach_device(struct iommu_domain *domain, struct device
>> *dev)
>>   {
>>   struct iommu_group *group;
>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>> index 7c7c6bad1420..a2f3f964ead2 100644
>> --- a/include/linux/iommu.h
>> +++ b/include/linux/iommu.h
>> @@ -192,6 +192,8 @@ struct iommu_resv_region {
>>    * @attach_pasid_table: attach a pasid table
>>    * @detach_pasid_table: detach the pasid table
>>    * @cache_invalidate: invalidate translation caches
>> + * @bind_guest_msi: provides a stage1 giova/gpa MSI doorbell mapping
>> + * @unbind_guest_msi: withdraw a stage1 giova/gpa MSI doorbell mapping
>>    * @pgsize_bitmap: bitmap of all possible supported page sizes
>>    */
>>   struct iommu_ops {
>> @@ -243,6 +245,10 @@ struct iommu_ops {
>>   int (*cache_invalidate)(struct iommu_domain *domain, struct
>> device *dev,
>>   struct iommu_cache_invalidate_info *inv_info);
>>   +    int (*bind_guest_msi)(struct iommu_domain *domain,
>> +  dma_addr_t giova, phys_addr_t gpa, size_t size);
>> +    void (*unbind_guest_msi)(struct iommu_domain *domain, dma_addr_t
>> giova);
>> +
>>   unsigned long pgsize_bitmap;
>>   };
>>   @@ -356,6 +362,11 @@ extern void iommu_detach_pasid_table(struct
>> iommu_domain *domain);
>>   extern int iommu_cache_invalidate(struct 

Re: [PATCH] of/device: add blacklist for iommu dma_ops

2019-05-10 Thread Rob Clark
On Tue, Dec 4, 2018 at 2:29 PM Rob Herring  wrote:
>
> On Sat, Dec 1, 2018 at 10:54 AM Rob Clark  wrote:
> >
> > This solves a problem we see with drm/msm, caused by getting
> > iommu_dma_ops while we attach our own domain and manage it directly at
> > the iommu API level:
> >
> >   [0038] user address but active_mm is swapper
> >   Internal error: Oops: 9605 [#1] PREEMPT SMP
> >   Modules linked in:
> >   CPU: 7 PID: 70 Comm: kworker/7:1 Tainted: GW 4.19.3 #90
> >   Hardware name: xxx (DT)
> >   Workqueue: events deferred_probe_work_func
> >   pstate: 80c9 (Nzcv daif +PAN +UAO)
> >   pc : iommu_dma_map_sg+0x7c/0x2c8
> >   lr : iommu_dma_map_sg+0x40/0x2c8
> >   sp : ff80095eb4f0
> >   x29: ff80095eb4f0 x28: 
> >   x27: ffc0f9431578 x26: 
> >   x25:  x24: 0003
> >   x23: 0001 x22: ffc0fa9ac010
> >   x21:  x20: ffc0fab40980
> >   x19: ffc0fab40980 x18: 0003
> >   x17: 01c4 x16: 0007
> >   x15: 000e x14: 
> >   x13:  x12: 0028
> >   x11: 0101010101010101 x10: 7f7f7f7f7f7f7f7f
> >   x9 :  x8 : ffc0fab409a0
> >   x7 :  x6 : 0002
> >   x5 : 0001 x4 : 
> >   x3 : 0001 x2 : 0002
> >   x1 : ffc0f9431578 x0 : 
> >   Process kworker/7:1 (pid: 70, stack limit = 0x17d08ffb)
> >   Call trace:
> >iommu_dma_map_sg+0x7c/0x2c8
> >__iommu_map_sg_attrs+0x70/0x84
> >get_pages+0x170/0x1e8
> >msm_gem_get_iova+0x8c/0x128
> >_msm_gem_kernel_new+0x6c/0xc8
> >msm_gem_kernel_new+0x4c/0x58
> >dsi_tx_buf_alloc_6g+0x4c/0x8c
> >msm_dsi_host_modeset_init+0xc8/0x108
> >msm_dsi_modeset_init+0x54/0x18c
> >_dpu_kms_drm_obj_init+0x430/0x474
> >dpu_kms_hw_init+0x5f8/0x6b4
> >msm_drm_bind+0x360/0x6c8
> >try_to_bring_up_master.part.7+0x28/0x70
> >component_master_add_with_match+0xe8/0x124
> >msm_pdev_probe+0x294/0x2b4
> >platform_drv_probe+0x58/0xa4
> >really_probe+0x150/0x294
> >driver_probe_device+0xac/0xe8
> >__device_attach_driver+0xa4/0xb4
> >bus_for_each_drv+0x98/0xc8
> >__device_attach+0xac/0x12c
> >device_initial_probe+0x24/0x30
> >bus_probe_device+0x38/0x98
> >deferred_probe_work_func+0x78/0xa4
> >process_one_work+0x24c/0x3dc
> >worker_thread+0x280/0x360
> >kthread+0x134/0x13c
> >ret_from_fork+0x10/0x18
> >   Code: d284 91000725 6b17039f 5400048a (f9401f40)
> >   ---[ end trace f22dda57f3648e2c ]---
> >   Kernel panic - not syncing: Fatal exception
> >   SMP: stopping secondary CPUs
> >   Kernel Offset: disabled
> >   CPU features: 0x0,22802a18
> >   Memory Limit: none
> >
> > The problem is that when drm/msm does it's own iommu_attach_device(),
> > now the domain returned by iommu_get_domain_for_dev() is drm/msm's
> > domain, and it doesn't have domain->iova_cookie.
> >
> > We kind of avoided this problem prior to sdm845/dpu because the iommu
> > was attached to the mdp node in dt, which is a child of the toplevel
> > mdss node (which corresponds to the dev passed in dma_map_sg()).  But
> > with sdm845, now the iommu is attached at the mdss level so we hit the
> > iommu_dma_ops in dma_map_sg().
> >
> > But auto allocating/attaching a domain before the driver is probed was
> > already a blocking problem for enabling per-context pagetables for the
> > GPU.  This problem is also now solved with this patch.
> >
> > Fixes: 97890ba9289c dma-mapping: detect and configure IOMMU in 
> > of_dma_configure
> > Tested-by: Douglas Anderson 
> > Signed-off-by: Rob Clark 
> > ---
> > This is an alternative/replacement for [1].  What it lacks in elegance
> > it makes up for in practicality ;-)
> >
> > [1] https://patchwork.freedesktop.org/patch/264930/
> >
> >  drivers/of/device.c | 22 ++
> >  1 file changed, 22 insertions(+)
> >
> > diff --git a/drivers/of/device.c b/drivers/of/device.c
> > index 5957cd4fa262..15ffee00fb22 100644
> > --- a/drivers/of/device.c
> > +++ b/drivers/of/device.c
> > @@ -72,6 +72,14 @@ int of_device_add(struct platform_device *ofdev)
> > return device_add(>dev);
> >  }
> >
> > +static const struct of_device_id iommu_blacklist[] = {
> > +   { .compatible = "qcom,mdp4" },
> > +   { .compatible = "qcom,mdss" },
> > +   { .compatible = "qcom,sdm845-mdss" },
> > +   { .compatible = "qcom,adreno" },
> > +   {}
> > +};
>
> Not completely clear to whether this is still needed or not, but this
> really won't scale. Why can't the driver for these devices override
> whatever has been setup by default?
>

fwiw, at the moment it is not needed, but it will become needed again
to implement per-context pagetables (although I suppose for this we
only need to blacklist qcom,adreno and not also the display nodes).

The 

Re: [PATCH v7 12/23] iommu/smmuv3: Get prepared for nested stage support

2019-05-10 Thread Auger Eric
Hi Robin,

On 5/8/19 4:24 PM, Robin Murphy wrote:
> On 08/04/2019 13:19, Eric Auger wrote:
>> To allow nested stage support, we need to store both
>> stage 1 and stage 2 configurations (and remove the former
>> union).
>>
>> A nested setup is characterized by both s1_cfg and s2_cfg
>> set.
>>
>> We introduce a new ste.abort field that will be set upon
>> guest stage1 configuration passing. If s1_cfg is NULL and
>> ste.abort is set, traffic can't pass. If ste.abort is not set,
>> S1 is bypassed.
>>
>> arm_smmu_write_strtab_ent() is modified to write both stage
>> fields in the STE and deal with the abort field.
>>
>> In nested mode, only stage 2 is "finalized" as the host does
>> not own/configure the stage 1 context descriptor, guest does.
>>
>> Signed-off-by: Eric Auger 
>>
>> ---
>>
>> v4 -> v5:
>> - reset ste.abort on detach
>>
>> v3 -> v4:
>> - s1_cfg.nested_abort and nested_bypass removed.
>> - s/ste.nested/ste.abort
>> - arm_smmu_write_strtab_ent modifications with introduction
>>    of local abort, bypass and translate local variables
>> - comment updated
>>
>> v1 -> v2:
>> - invalidate the STE before moving from a live STE config to another
>> - add the nested_abort and nested_bypass fields
>> ---
>>   drivers/iommu/arm-smmu-v3.c | 35 ---
>>   1 file changed, 20 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>> index 21d027695181..e22e944ffc05 100644
>> --- a/drivers/iommu/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm-smmu-v3.c
>> @@ -211,6 +211,7 @@
>>   #define STRTAB_STE_0_CFG_BYPASS    4
>>   #define STRTAB_STE_0_CFG_S1_TRANS    5
>>   #define STRTAB_STE_0_CFG_S2_TRANS    6
>> +#define STRTAB_STE_0_CFG_NESTED    7
>>     #define STRTAB_STE_0_S1FMT    GENMASK_ULL(5, 4)
>>   #define STRTAB_STE_0_S1FMT_LINEAR    0
>> @@ -514,6 +515,7 @@ struct arm_smmu_strtab_ent {
>>    * configured according to the domain type.
>>    */
>>   bool    assigned;
>> +    bool    abort;
>>   struct arm_smmu_s1_cfg    *s1_cfg;
>>   struct arm_smmu_s2_cfg    *s2_cfg;
>>   };
>> @@ -628,10 +630,8 @@ struct arm_smmu_domain {
>>   bool    non_strict;
>>     enum arm_smmu_domain_stage    stage;
>> -    union {
>> -    struct arm_smmu_s1_cfg    s1_cfg;
>> -    struct arm_smmu_s2_cfg    s2_cfg;
>> -    };
>> +    struct arm_smmu_s1_cfg    s1_cfg;
>> +    struct arm_smmu_s2_cfg    s2_cfg;
>>     struct iommu_domain    domain;
>>   @@ -1108,12 +1108,13 @@ static void arm_smmu_write_strtab_ent(struct
>> arm_smmu_device *smmu, u32 sid,
>>     __le64 *dst, struct arm_smmu_strtab_ent *ste)
>>   {
>>   /*
>> - * This is hideously complicated, but we only really care about
>> - * three cases at the moment:
>> + * We care about the following transitions:
>>    *
>>    * 1. Invalid (all zero) -> bypass/fault (init)
>> - * 2. Bypass/fault -> translation/bypass (attach)
>> - * 3. Translation/bypass -> bypass/fault (detach)
>> + * 2. Bypass/fault -> single stage translation/bypass (attach)
>> + * 3. single stage Translation/bypass -> bypass/fault (detach)
>> + * 4. S2 -> S1 + S2 (attach_pasid_table)
>> + * 5. S1 + S2 -> S2 (detach_pasid_table)
>>    *
>>    * Given that we can't update the STE atomically and the SMMU
>>    * doesn't read the thing in a defined order, that leaves us
>> @@ -1124,7 +1125,7 @@ static void arm_smmu_write_strtab_ent(struct
>> arm_smmu_device *smmu, u32 sid,
>>    * 3. Update Config, sync
>>    */
>>   u64 val = le64_to_cpu(dst[0]);
>> -    bool ste_live = false;
>> +    bool abort, bypass, translate, ste_live = false;
>>   struct arm_smmu_cmdq_ent prefetch_cmd = {
>>   .opcode    = CMDQ_OP_PREFETCH_CFG,
>>   .prefetch    = {
>> @@ -1138,11 +1139,11 @@ static void arm_smmu_write_strtab_ent(struct
>> arm_smmu_device *smmu, u32 sid,
>>   break;
>>   case STRTAB_STE_0_CFG_S1_TRANS:
>>   case STRTAB_STE_0_CFG_S2_TRANS:
>> +    case STRTAB_STE_0_CFG_NESTED:
>>   ste_live = true;
>>   break;
>>   case STRTAB_STE_0_CFG_ABORT:
>> -    if (disable_bypass)
>> -    break;
>> +    break;
>>   default:
>>   BUG(); /* STE corruption */
>>   }
>> @@ -1152,8 +1153,13 @@ static void arm_smmu_write_strtab_ent(struct
>> arm_smmu_device *smmu, u32 sid,
>>   val = STRTAB_STE_0_V;
>>     /* Bypass/fault */
>> -    if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
>> -    if (!ste->assigned && disable_bypass)
>> +
>> +    abort = (!ste->assigned && disable_bypass) || ste->abort;
>> +    translate = ste->s1_cfg || ste->s2_cfg;
>> +    bypass = !abort && !translate;
>> +
>> +    if (abort || bypass) {
>> +    if (abort)
>>   val |= FIELD_PREP(STRTAB_STE_0_CFG,
>> 

Re: [ARM SMMU] Dynamic StreamID allocation

2019-05-10 Thread Jean-Philippe Brucker
On 10/05/2019 13:33, Pankaj Bansal wrote:
> Hi Will/Robin/Joerg,
> 
> I am s/w engineer from NXP India Pvt. Ltd.
> We are using SMMU-V3 in one of NXP SOC.
> I have a question about the SMMU Stream ID allocation in linux.
> 
> Right now the Stream IDs allocated to a device are mapped via device tree to 
> the device.
> https://elixir.bootlin.com/linux/latest/source/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt#L39
> 
> As the device tree is passed from bootloader to linux, we detect all the 
> stream IDs needed by a device in bootloader and add their IDs in respective 
> device nodes.
> For each PCIE Endpoint (a unique BDF (Bus Device Function)) on PCIE bus, we 
> are assigning a unique Stream ID in bootloader.
> 
> However, this poses an issue with PCIE hot plug.
> If we plug in a pcie device while linux is running, a unique BDF is assigned 
> to the device, for which there is no stream ID in device tree.
> 
> How can this problem be solved in linux?

Assuming the streamID associated to a BDF is predictable (streamID = BDF
+ constant), using the iommu-map property should just work:

https://elixir.bootlin.com/linux/latest/source/Documentation/devicetree/bindings/pci/pci-iommu.txt

It describes the streamIDs of all possible BDFs, including hotplugged
functions.

Thanks,
Jean

> 
> Is there a way to assign (and revoke) stream IDs at run time?
> 
> Regards,
> Pankaj Bansal
> 
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[ARM SMMU] Dynamic StreamID allocation

2019-05-10 Thread Pankaj Bansal
Hi Will/Robin/Joerg,

I am s/w engineer from NXP India Pvt. Ltd.
We are using SMMU-V3 in one of NXP SOC.
I have a question about the SMMU Stream ID allocation in linux.

Right now the Stream IDs allocated to a device are mapped via device tree to 
the device.
https://elixir.bootlin.com/linux/latest/source/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt#L39

As the device tree is passed from bootloader to linux, we detect all the stream 
IDs needed by a device in bootloader and add their IDs in respective device 
nodes.
For each PCIE Endpoint (a unique BDF (Bus Device Function)) on PCIE bus, we are 
assigning a unique Stream ID in bootloader.

However, this poses an issue with PCIE hot plug.
If we plug in a pcie device while linux is running, a unique BDF is assigned to 
the device, for which there is no stream ID in device tree.

How can this problem be solved in linux?

Is there a way to assign (and revoke) stream IDs at run time?

Regards,
Pankaj Bansal

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [ARM SMMU] Dynamic StreamID allocation

2019-05-10 Thread Pankaj Bansal
Correction: we use ARM SMMU-500.
Corresponding bindings are : 
https://elixir.bootlin.com/linux/latest/source/Documentation/devicetree/bindings/iommu/arm,smmu.txt#L49

The #iommu-cells is 1 in our SOC: 
https://elixir.bootlin.com/linux/latest/source/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi#L693

Regards,
Pankaj Bansal

> -Original Message-
> From: Pankaj Bansal
> Sent: Friday, 10 May, 2019 06:04 PM
> To: Will Deacon ; Robin Murphy
> ; Joerg Roedel 
> Cc: linux-arm-ker...@lists.infradead.org; iommu@lists.linux-foundation.org;
> Varun Sethi ; Nipun Gupta 
> Subject: [ARM SMMU] Dynamic StreamID allocation
> 
> Hi Will/Robin/Joerg,
> 
> I am s/w engineer from NXP India Pvt. Ltd.
> We are using SMMU-V3 in one of NXP SOC.
> I have a question about the SMMU Stream ID allocation in linux.
> 
> Right now the Stream IDs allocated to a device are mapped via device tree to
> the device.
> https://elixir.bootlin.com/linux/latest/source/Documentation/devicetree/bindin
> gs/iommu/arm,smmu-v3.txt#L39
> 
> As the device tree is passed from bootloader to linux, we detect all the 
> stream
> IDs needed by a device in bootloader and add their IDs in respective device
> nodes.
> For each PCIE Endpoint (a unique BDF (Bus Device Function)) on PCIE bus, we 
> are
> assigning a unique Stream ID in bootloader.
> 
> However, this poses an issue with PCIE hot plug.
> If we plug in a pcie device while linux is running, a unique BDF is assigned 
> to the
> device, for which there is no stream ID in device tree.
> 
> How can this problem be solved in linux?
> 
> Is there a way to assign (and revoke) stream IDs at run time?
> 
> Regards,
> Pankaj Bansal

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/4] Retrieving zPCI specific info with VFIO

2019-05-10 Thread Pierre Morel
Using the PCI VFIO interface allows userland, a.k.a. QEMU, to retrieve
ZPCI specific information without knowing Z specific identifiers
like the function ID or the function handle of the zPCI function
hidden behind the PCI interface. 
  
By using the VFIO_IOMMU_GET_INFO ioctl we enter the vfio_iommu_type1
ioctl callback and can insert there the treatment for a new Z specific
capability.
  
Once in vfio_iommu_type1 we can retrieve the real iommu device,
s390_iommu and call the get_attr iommu operation's callback
in which we can retrieve the zdev device and start clp operations
to retrieve Z specific values the guest driver is concerned with.
 
To share the code with arch/s390/pci/pci_clp.c the original functions
in pci_clp.c to query PCI functions and PCI functions group are
modified so that they can be exported.

A new function clp_query_pci() replaces clp_query_pci_fn() and
the previous calls to clp_query_pci_fn() and clp_query_pci_fngrp()
are replaced with calls to zdev_query_pci_fn() and zdev_query_pci_fngrp()
using a zdev pointer as argument.

These two functions are exported to be used from out of the s390_iommu
code.

Pierre Morel (4):
  s390: pci: Exporting access to CLP PCI function and PCI group
  vfio: vfio_iommu_type1: Define VFIO_IOMMU_INFO_CAPABILITIES
  s390: iommu: Adding get attributes for s390_iommu
  vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES

 arch/s390/include/asm/pci.h |  3 ++
 arch/s390/pci/pci_clp.c | 72 ---
 drivers/iommu/s390-iommu.c  | 77 +
 drivers/vfio/vfio_iommu_type1.c | 95 -
 include/linux/iommu.h   |  4 ++
 include/uapi/linux/vfio.h   | 10 +
 6 files changed, 226 insertions(+), 35 deletions(-)

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/4] s390: pci: Exporting access to CLP PCI function and PCI group

2019-05-10 Thread Pierre Morel
For the generic implementation of VFIO PCI we need to retrieve
the hardware configuration for the PCI functions and the
PCI function groups.

We modify the internal function using CLP Query PCI function and
CLP query PCI function group so that they can be called from
outside the S390 architecture PCI code and prefix the two
functions with "zdev" to make clear that they can be called
knowing only the associated zdevice.

Signed-off-by: Pierre Morel 
Reviewed-by: Sebastian Ott 
---
 arch/s390/include/asm/pci.h |  3 ++
 arch/s390/pci/pci_clp.c | 72 -
 2 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 305befd..e66b246 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -261,4 +261,7 @@ cpumask_of_pcibus(const struct pci_bus *bus)
 
 #endif /* CONFIG_NUMA */
 
+int zdev_query_pci_fngrp(struct zpci_dev *zdev,
+struct clp_req_rsp_query_pci_grp *rrb);
+int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci 
*rrb);
 #endif
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 3a36b07..4ae5d77 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -113,32 +113,18 @@ static void clp_store_query_pci_fngrp(struct zpci_dev 
*zdev,
}
 }
 
-static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
+int zdev_query_pci_fngrp(struct zpci_dev *zdev,
+struct clp_req_rsp_query_pci_grp *rrb)
 {
-   struct clp_req_rsp_query_pci_grp *rrb;
-   int rc;
-
-   rrb = clp_alloc_block(GFP_KERNEL);
-   if (!rrb)
-   return -ENOMEM;
-
memset(rrb, 0, sizeof(*rrb));
rrb->request.hdr.len = sizeof(rrb->request);
rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP;
rrb->response.hdr.len = sizeof(rrb->response);
-   rrb->request.pfgid = pfgid;
+   rrb->request.pfgid = zdev->pfgid;
 
-   rc = clp_req(rrb, CLP_LPS_PCI);
-   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
-   clp_store_query_pci_fngrp(zdev, >response);
-   else {
-   zpci_err("Q PCI FGRP:\n");
-   zpci_err_clp(rrb->response.hdr.rsp, rc);
-   rc = -EIO;
-   }
-   clp_free_block(rrb);
-   return rc;
+   return clp_req(rrb, CLP_LPS_PCI);
 }
+EXPORT_SYMBOL(zdev_query_pci_fngrp);
 
 static int clp_store_query_pci_fn(struct zpci_dev *zdev,
  struct clp_rsp_query_pci *response)
@@ -174,32 +160,50 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
return 0;
 }
 
-static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
+int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci *rrb)
+{
+
+   memset(rrb, 0, sizeof(*rrb));
+   rrb->request.hdr.len = sizeof(rrb->request);
+   rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
+   rrb->response.hdr.len = sizeof(rrb->response);
+   rrb->request.fh = zdev->fh;
+
+   return clp_req(rrb, CLP_LPS_PCI);
+}
+EXPORT_SYMBOL(zdev_query_pci_fn);
+
+static int clp_query_pci(struct zpci_dev *zdev)
 {
struct clp_req_rsp_query_pci *rrb;
+   struct clp_req_rsp_query_pci_grp *grrb;
int rc;
 
rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
 
-   memset(rrb, 0, sizeof(*rrb));
-   rrb->request.hdr.len = sizeof(rrb->request);
-   rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
-   rrb->response.hdr.len = sizeof(rrb->response);
-   rrb->request.fh = fh;
-
-   rc = clp_req(rrb, CLP_LPS_PCI);
-   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
-   rc = clp_store_query_pci_fn(zdev, >response);
-   if (rc)
-   goto out;
-   rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
-   } else {
+   rc = zdev_query_pci_fn(zdev, rrb);
+   if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
zpci_err("Q PCI FN:\n");
zpci_err_clp(rrb->response.hdr.rsp, rc);
rc = -EIO;
+   goto out;
}
+   rc = clp_store_query_pci_fn(zdev, >response);
+   if (rc)
+   goto out;
+
+   grrb = (struct clp_req_rsp_query_pci_grp *)rrb;
+   rc = zdev_query_pci_fngrp(zdev, grrb);
+   if (rc || grrb->response.hdr.rsp != CLP_RC_OK) {
+   zpci_err("Q PCI FGRP:\n");
+   zpci_err_clp(grrb->response.hdr.rsp, rc);
+   rc = -EIO;
+   goto out;
+   }
+   clp_store_query_pci_fngrp(zdev, >response);
+
 out:
clp_free_block(rrb);
return rc;
@@ -219,7 +223,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured)
zdev->fid = fid;
 
/* Query function properties and update zdev */
-   rc = clp_query_pci_fn(zdev, fh);
+   rc = clp_query_pci(zdev);
if (rc)
goto error;
 
-- 
2.7.4


[PATCH 3/4] s390: iommu: Adding get attributes for s390_iommu

2019-05-10 Thread Pierre Morel
We add "get attributes" to the S390 iommu operations to retrieve the S390
specific attributes through the call of zPCI dedicated CLP functions.

Signed-off-by: Pierre Morel 
---
 drivers/iommu/s390-iommu.c | 77 ++
 include/linux/iommu.h  |  4 +++
 2 files changed, 81 insertions(+)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 22d4db3..98082f0 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -363,6 +363,82 @@ void zpci_destroy_iommu(struct zpci_dev *zdev)
iommu_device_sysfs_remove(>iommu_dev);
 }
 
+struct zpci_dev *get_zpci(struct s390_domain *s390_domain)
+{
+   struct s390_domain_device *domain_device;
+
+   domain_device = list_first_entry(_domain->devices,
+struct s390_domain_device, list);
+   if (!domain_device)
+   return NULL;
+   return domain_device->zdev;
+}
+
+static int s390_domain_get_fn(struct iommu_domain *domain, void *data)
+{
+   struct zpci_dev *zdev;
+   struct clp_req_rsp_query_pci *rrb;
+   int rc;
+
+   zdev = get_zpci(to_s390_domain(domain));
+   if (!zdev)
+   return -ENODEV;
+   rrb = (struct clp_req_rsp_query_pci *)
+ __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE));
+   if (!rrb)
+   return -ENOMEM;
+   rc = zdev_query_pci_fn(zdev, rrb);
+
+   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+   memcpy(data, >response, sizeof(struct clp_rsp_query_pci));
+   else
+   rc = -EIO;
+   free_pages((unsigned long) rrb, get_order(CLP_BLK_SIZE));
+   return rc;
+}
+
+static int s390_domain_get_grp(struct iommu_domain *domain, void *data)
+{
+   struct zpci_dev *zdev;
+   struct clp_req_rsp_query_pci_grp *rrb;
+   int rc;
+
+   zdev = get_zpci(to_s390_domain(domain));
+   if (!zdev)
+   return -ENODEV;
+   rrb = (struct clp_req_rsp_query_pci_grp *)
+ __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE));
+   if (!rrb)
+   return -ENOMEM;
+
+   rc = zdev_query_pci_fngrp(zdev, rrb);
+   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+   memcpy(data, >response,
+  sizeof(struct clp_rsp_query_pci_grp));
+   else
+   rc = -EIO;
+
+   free_pages((unsigned long) rrb, get_order(CLP_BLK_SIZE));
+   return rc;
+}
+
+static int s390_domain_get_attr(struct iommu_domain *domain,
+   enum iommu_attr attr, void *data)
+{
+   switch (attr) {
+   case DOMAIN_ATTR_ZPCI_FN_SIZE:
+   return sizeof(struct clp_rsp_query_pci);
+   case DOMAIN_ATTR_ZPCI_GRP_SIZE:
+   return sizeof(struct clp_rsp_query_pci_grp);
+   case DOMAIN_ATTR_ZPCI_FN:
+   return s390_domain_get_fn(domain, data);
+   case DOMAIN_ATTR_ZPCI_GRP:
+   return s390_domain_get_grp(domain, data);
+   default:
+   return -ENODEV;
+   }
+}
+
 static const struct iommu_ops s390_iommu_ops = {
.capable = s390_iommu_capable,
.domain_alloc = s390_domain_alloc,
@@ -376,6 +452,7 @@ static const struct iommu_ops s390_iommu_ops = {
.remove_device = s390_iommu_remove_device,
.device_group = generic_device_group,
.pgsize_bitmap = S390_IOMMU_PGSIZES,
+   .domain_get_attr = s390_domain_get_attr,
 };
 
 static int __init s390_iommu_init(void)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ffbbc7e..ebdcac4 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -125,6 +125,10 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING,/* two stages of translation */
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+   DOMAIN_ATTR_ZPCI_FN_SIZE,
+   DOMAIN_ATTR_ZPCI_GRP_SIZE,
+   DOMAIN_ATTR_ZPCI_FN,
+   DOMAIN_ATTR_ZPCI_GRP,
DOMAIN_ATTR_MAX,
 };
 
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/4] vfio: vfio_iommu_type1: Define VFIO_IOMMU_INFO_CAPABILITIES

2019-05-10 Thread Pierre Morel
To use the VFIO_IOMMU_GET_INFO to retrieve IOMMU specific information,
we define a new flag VFIO_IOMMU_INFO_CAPABILITIES in the
vfio_iommu_type1_info structure and the associated capability
information block.

Signed-off-by: Pierre Morel 
---
 include/uapi/linux/vfio.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 8f10748..8f68e0f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -715,6 +715,16 @@ struct vfio_iommu_type1_info {
__u32   flags;
 #define VFIO_IOMMU_INFO_PGSIZES (1 << 0)   /* supported page sizes info */
__u64   iova_pgsizes;   /* Bitmap of supported page sizes */
+#define VFIO_IOMMU_INFO_CAPABILITIES (1 << 1)  /* support capabilities info */
+   __u64   cap_offset; /* Offset within info struct of first cap */
+};
+
+#define VFIO_IOMMU_INFO_CAP_QFN1
+#define VFIO_IOMMU_INFO_CAP_QGRP   2
+
+struct vfio_iommu_type1_info_block {
+   struct vfio_info_cap_header header;
+   __u32 data[];
 };
 
 #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH 3/3] iommu/vt-d: Add debugfs support to show scalable mode DMAR table internals

2019-05-10 Thread Prakhya, Sai Praneeth
> >   static void ctx_tbl_walk(struct seq_file *m, struct intel_iommu *iommu, 
> > u16
> bus)
> >   {
> > struct context_entry *context;
> > -   u16 devfn;
> > +   u16 devfn, pasid_dir_size;
> > +   u64 pasid_dir_ptr;
> >
> > for (devfn = 0; devfn < 256; devfn++) {
> > struct tbl_walk tbl_wlk = {0};
> >
> > +   /*
> > +* Scalable mode root entry points to upper context table and
> > +* lower context table. Each scalable mode context table has
> > +* 128 context entries where as legacy mode context table has
> > +* 256 context entries. So for scalable mode, devfn > 127 is
> > +* invalid. But, iommu_context_addr() inherently handles this by
> 
> This comment is a bit misleading. :-)
> 
> devfn > 127 is also valid for scalable mode. The context entries for former 
> 128
> devices are in the lower scalable-mode context-table, while the latter 128
> devices in upper scalable-mode context-table.
> This has been handled in iommu_context_addr(), so the caller don't need to
> worry about this.

Yes.. that makes sense. Will correct it in V2.

Regards,
Sai
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH 0/3] Add debugfs support to show scalable mode DMAR table

2019-05-10 Thread Prakhya, Sai Praneeth
> Hi Sai,
> 
> On 5/10/19 2:41 AM, Sai Praneeth Prakhya wrote:
> > From: Sai Praneeth 
> >
> > Presently, "/sys/kernel/debug/iommu/intel/dmar_translation_struct"
> > file dumps only legacy DMAR table which consists of root table and
> > context table. Scalable mode DMAR table adds PASID directory and PASID
> > table. Hence, add support to dump these tables as well.
> >
> > Directly extending the present dumping format for PASID tables will
> > make the output look clumsy. Hence, the first patch modifies the
> > present format to a tabular format. The second patch introduces macros
> > that are used during PASID table walk and the third patch actually
> > adds support to dump scalable mode DMAR table.
> >
> > Sai Praneeth (3):
> >iommu/vt-d: Modify the format of intel DMAR tables dump
> >iommu/vt-d: Introduce macros useful for dumping DMAR table
> >iommu/vt-d: Add debugfs support to show scalable mode DMAR table
> >  internals
> 
> This patch set looks good to me in general. One minor suggestion is that the
> author name and signed-of-by name should be consistent for all patches.

Thanks for the suggestion. Sorry! that I missed it.
Will quickly send a V2 fixing it.

Regards,
Sai
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu