Re: [PATCH] iommu/arm-smmu-v2: ThunderX(errata-23399) mis-extends 64bit registers

2015-07-30 Thread Chalamarla, Tirumalesh
is some thing like this looks good

+#ifdef CONFIG_64BIT
+#define smmu_writeq(reg64, addr)   writeq_relaxed((reg64), (addr))
+#else
+#define smmu_writeq(reg64, addr)   \
+   writel_relaxed(((reg64) >> 32), ((addr) + 4));  \
+   writel_relaxed((reg64), (addr))
+
+
 /* Configuration registers */
 #define ARM_SMMU_GR0_sCR0  0x0
 #define sCR0_CLIENTPD  (1 << 0)
@@ -226,7 +234,7 @@
 #define TTBCR2_SEP_SHIFT   15
 #define TTBCR2_SEP_UPSTREAM(0x7 << TTBCR2_SEP_SHIFT)

-#define TTBRn_HI_ASID_SHIFT16
+#define TTBRn_ASID_SHIFT   48

 #define FSR_MULTI  (1 << 31)
 #define FSR_SS (1 << 30)
@@ -719,6 +727,7 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain,
   struct io_pgtable_cfg *pgtbl_cfg)
 {
u32 reg;
+   u64 reg64;
bool stage1;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
struct arm_smmu_device *smmu = smmu_domain->smmu;
@@ -762,22 +771,16 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain,

/* TTBRs */
if (stage1) {
-   reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
-   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
-   reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
-   reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
-   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
-
-   reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
-   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
-   reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;
-   reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
-   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI);
+   reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+   reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
+   smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0_LO);
+
+   reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+   reg64 |= ARM_SMMU_CB_ASID(cfg) << TTBRn_ASID_SHIFT;
+   smmu_writeq(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
} else {
-   reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
-   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
-   reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr >> 32;
-   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
+   reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
+   smmu_writeq(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
}

/* TTBCR */
@@ -1236,10 +1239,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct 
iommu_domain *domain,
u32 reg = iova & ~0xfff;
writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
} else {
-   u32 reg = iova & ~0xfff;
-   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
-   reg = ((u64)iova & ~0xfff) >> 32;
-   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI);
+   u64 reg = iova & ~0xfff;
+   smmu_writeq(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
}

if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
~

On Jul 30, 2015, at 12:07 PM, Chalamarla, Tirumalesh 
mailto:tchalama...@caviumnetworks.com>> wrote:


On Jul 30, 2015, at 11:45 AM, Will Deacon 
mailto:will.dea...@arm.com>> wrote:

Hello,

On Thu, Jul 30, 2015 at 06:55:06PM +0100, 
tchalama...@caviumnetworks.com wrote:
From: Tirumalesh Chalamarla 
mailto:tchalama...@caviumnetworks.com>>

The SMMU architecture defines two different behaviors when 64-bit
registers are written with 32-bit writes.  The first behavior causes
zero extension into the upper 32-bits.  The second behavior splits a
64-bit register into "normal" 32-bit register pairs.

On some passes of ThunderX,
the following registers incorrectly zero extended when they should
instead behave as normal 32-bit register pairs:

 SMMU()_(S)GFAR
 SMMU()_NSGFAR
 SMMU()_CB()_TTBR0
 SMMU()_CB()_TTBR1
 SMMU()_CB()_FAR

Signed-off-by: Tirumalesh Chalamarla 
mailto:tchalama...@caviumnetworks.com>>
---
drivers/iommu/arm-smmu.c | 51 ++--
1 file changed, 36 insertions(+), 15 deletions(-)

[...]

@@ -762,22 +766,39 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain,

/* TTBRs */
if (stage1) {
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
- reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
-
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;

Re: [PATCH] iommu/arm-smmu-v2: ThunderX(errata-23399) mis-extends 64bit registers

2015-07-30 Thread Robin Murphy

On 30/07/15 19:45, Will Deacon wrote:

Hello,

On Thu, Jul 30, 2015 at 06:55:06PM +0100, tchalama...@caviumnetworks.com wrote:

From: Tirumalesh Chalamarla 

The SMMU architecture defines two different behaviors when 64-bit
registers are written with 32-bit writes.  The first behavior causes
zero extension into the upper 32-bits.  The second behavior splits a
64-bit register into "normal" 32-bit register pairs.

On some passes of ThunderX,
the following registers incorrectly zero extended when they should
instead behave as normal 32-bit register pairs:

   SMMU()_(S)GFAR
   SMMU()_NSGFAR
   SMMU()_CB()_TTBR0
   SMMU()_CB()_TTBR1
   SMMU()_CB()_FAR

Signed-off-by: Tirumalesh Chalamarla 
---
  drivers/iommu/arm-smmu.c | 51 ++--
  1 file changed, 36 insertions(+), 15 deletions(-)


[...]


@@ -762,22 +766,39 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain,

/* TTBRs */
if (stage1) {
-   reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
-   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
-   reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
-   reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
-   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
-
-   reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
-   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
-   reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;
-   reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
-   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI);
+   if (smmu->options & ARM_SMMU_OPT_64BIT_WRITES_ONLY) {
+   reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+   reg64 |= ((u64) ARM_SMMU_CB_ASID(cfg)) <<
+   (TTBRn_HI_ASID_SHIFT + 32);
+   writeq_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
+
+   reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+   reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) <<
+   (TTBRn_HI_ASID_SHIFT + 32);
+   writeq_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
+   } else {
+   reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
+   reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
+   reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
+
+   reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
+   reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;
+   reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI);
+   }


I'm fine with making this sort of change if you need it, but this is pretty
ugly. Worse, it won't compile for 32-bit ARM.

How about we add a wrapper around these, say smmu_writeq(...), which can
then either expand to 2x writel_relaxed or 1x writeq_relaxed depending on
CONFIG_64BIT and your erratum workaround?


Would we even need a specific erratum workaround then? I don't see an 
issue with always using writeq on 64-bit, and there's not much we can do 
for 32-bit if it has no way to write the upper half of the TTBR on this 
thing.


Robin.



Don't forgot to update the ATOS code too (so you need to write the high word
first).

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu



___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/arm-smmu-v2: ThunderX(errata-23399) mis-extends 64bit registers

2015-07-30 Thread Chalamarla, Tirumalesh

On Jul 30, 2015, at 11:45 AM, Will Deacon 
mailto:will.dea...@arm.com>> wrote:

Hello,

On Thu, Jul 30, 2015 at 06:55:06PM +0100, 
tchalama...@caviumnetworks.com wrote:
From: Tirumalesh Chalamarla 
mailto:tchalama...@caviumnetworks.com>>

The SMMU architecture defines two different behaviors when 64-bit
registers are written with 32-bit writes.  The first behavior causes
zero extension into the upper 32-bits.  The second behavior splits a
64-bit register into "normal" 32-bit register pairs.

On some passes of ThunderX,
the following registers incorrectly zero extended when they should
instead behave as normal 32-bit register pairs:

 SMMU()_(S)GFAR
 SMMU()_NSGFAR
 SMMU()_CB()_TTBR0
 SMMU()_CB()_TTBR1
 SMMU()_CB()_FAR

Signed-off-by: Tirumalesh Chalamarla 
mailto:tchalama...@caviumnetworks.com>>
---
drivers/iommu/arm-smmu.c | 51 ++--
1 file changed, 36 insertions(+), 15 deletions(-)

[...]

@@ -762,22 +766,39 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain,

/* TTBRs */
if (stage1) {
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
- reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
-
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;
- reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI);
+ if (smmu->options & ARM_SMMU_OPT_64BIT_WRITES_ONLY) {
+ reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+ reg64 |= ((u64) ARM_SMMU_CB_ASID(cfg)) <<
+ (TTBRn_HI_ASID_SHIFT + 32);
+ writeq_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
+
+ reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+ reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) <<
+ (TTBRn_HI_ASID_SHIFT + 32);
+ writeq_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
+ } else {
+ reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+ writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
+ reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
+ reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
+ writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
+
+ reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+ writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
+ reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;
+ reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
+ writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI);
+ }

I'm fine with making this sort of change if you need it, but this is pretty
ugly. Worse, it won't compile for 32-bit ARM.

How about we add a wrapper around these, say smmu_writeq(...), which can
then either expand to 2x writel_relaxed or 1x writeq_relaxed depending on
CONFIG_64BIT and your erratum workaround?

 i think we don’t even need to restrict it for work around.
we can just use CONFIG_64BIT and smmu_writeq.

if thats fine i will repost the patch.

Thanks,
Tirumalesh.
Don't forgot to update the ATOS code too (so you need to write the high word
first).

Will

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] iommu/arm-smmu-v2: ThunderX(errata-23399) mis-extends 64bit registers

2015-07-30 Thread Will Deacon
Hello,

On Thu, Jul 30, 2015 at 06:55:06PM +0100, tchalama...@caviumnetworks.com wrote:
> From: Tirumalesh Chalamarla 
> 
> The SMMU architecture defines two different behaviors when 64-bit
> registers are written with 32-bit writes.  The first behavior causes
> zero extension into the upper 32-bits.  The second behavior splits a
> 64-bit register into "normal" 32-bit register pairs.
> 
> On some passes of ThunderX,
> the following registers incorrectly zero extended when they should
> instead behave as normal 32-bit register pairs:
> 
>   SMMU()_(S)GFAR
>   SMMU()_NSGFAR
>   SMMU()_CB()_TTBR0
>   SMMU()_CB()_TTBR1
>   SMMU()_CB()_FAR
> 
> Signed-off-by: Tirumalesh Chalamarla 
> ---
>  drivers/iommu/arm-smmu.c | 51 
> ++--
>  1 file changed, 36 insertions(+), 15 deletions(-)

[...]

> @@ -762,22 +766,39 @@ static void arm_smmu_init_context_bank(struct 
> arm_smmu_domain *smmu_domain,
>  
>   /* TTBRs */
>   if (stage1) {
> - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
> - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
> - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
> - reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
> - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
> -
> - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
> - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
> - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;
> - reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
> - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI);
> + if (smmu->options & ARM_SMMU_OPT_64BIT_WRITES_ONLY) {
> + reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
> + reg64 |= ((u64) ARM_SMMU_CB_ASID(cfg)) <<
> + (TTBRn_HI_ASID_SHIFT + 32);
> + writeq_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
> +
> + reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
> + reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) <<
> + (TTBRn_HI_ASID_SHIFT + 32);
> + writeq_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
> + } else {
> + reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
> + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
> + reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
> + reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
> + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
> +
> + reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
> + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
> + reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;
> + reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
> + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI);
> + }

I'm fine with making this sort of change if you need it, but this is pretty
ugly. Worse, it won't compile for 32-bit ARM.

How about we add a wrapper around these, say smmu_writeq(...), which can
then either expand to 2x writel_relaxed or 1x writeq_relaxed depending on
CONFIG_64BIT and your erratum workaround?

Don't forgot to update the ATOS code too (so you need to write the high word
first).

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Bugfix]

2015-07-30 Thread Alex Deucher
On Thu, Jul 30, 2015 at 12:44 PM, Jiang Liu  wrote:
> Hi Alexander, Mark, Alex,
> Could you please help to apply the debug patch and send me back
> the dmesg? Please also help to turn kernel paramemter "apic=debug".

See attached.

Thanks,

Alex

>
> Hi Mark,
> It seems that this regression is caused by support of multiple-MSI,
> but I have no PCI card supportting multiple-MSI at hand. So may I remotely
> access your system from Intel internal network?  That will definitely speed
> up fix.
> Thanks!
> Gerry
>
> Signed-off-by: Jiang Liu 
> ---
>  drivers/iommu/amd_iommu.c   |   15 +--
>  drivers/iommu/intel_irq_remapping.c |4 
>  2 files changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index a57e9b749895..c039ed9333a4 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -3916,8 +3916,8 @@ static void irq_remapping_prepare_irte(struct 
> amd_ir_data *data,
> union irte *irte = &data->irte_entry;
> struct IO_APIC_route_entry *entry;
>
> -   data->irq_2_irte.devid = devid;
> -   data->irq_2_irte.index = index + sub_handle;
> +   irte_info->devid = devid;
> +   irte_info->index = index + sub_handle;
>
> /* Setup IRTE for IOMMU */
> irte->val = 0;
> @@ -3926,6 +3926,10 @@ static void irq_remapping_prepare_irte(struct 
> amd_ir_data *data,
> irte->fields.destination = irq_cfg->dest_apicid;
> irte->fields.dm  = apic->irq_dest_mode;
> irte->fields.valid   = 1;
> +   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
> +   pr_warn("irqdomain: IRTE%d vector %d APICID%d data%p cfg%p\n",
> +   irte_info->index, irte->fields.vector,
> +   irte->fields.destination, data, irq_cfg);
>
> switch (info->type) {
> case X86_IRQ_ALLOC_TYPE_IOAPIC:
> @@ -3972,6 +3976,9 @@ static int irq_remapping_alloc(struct irq_domain 
> *domain, unsigned int virq,
> info->type != X86_IRQ_ALLOC_TYPE_MSIX)
> return -EINVAL;
>
> +   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
> +   pr_warn("irqdomain: allocate %d MSI IRQ, VIRQ%d\n", nr_irqs, 
> virq);
> +
> /*
>  * With IRQ remapping enabled, don't need contiguous CPU vectors
>  * to support multiple MSI interrupts.
> @@ -3986,6 +3993,8 @@ static int irq_remapping_alloc(struct irq_domain 
> *domain, unsigned int virq,
> ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
> if (ret < 0)
> return ret;
> +   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
> +   pr_warn("irqdomain: allocate parent returns %d\n", ret);
>
> ret = -ENOMEM;
> data = kzalloc(sizeof(*data), GFP_KERNEL);
> @@ -4005,6 +4014,8 @@ static int irq_remapping_alloc(struct irq_domain 
> *domain, unsigned int virq,
> kfree(data);
> goto out_free_parent;
> }
> +   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
> +   pr_warn("irqdomain: allocate IRTE index %d\n", index);
>
> for (i = 0; i < nr_irqs; i++) {
> irq_data = irq_domain_get_irq_data(domain, virq + i);
> diff --git a/drivers/iommu/intel_irq_remapping.c 
> b/drivers/iommu/intel_irq_remapping.c
> index f15692a410c7..bb093bc6b334 100644
> --- a/drivers/iommu/intel_irq_remapping.c
> +++ b/drivers/iommu/intel_irq_remapping.c
> @@ -1234,6 +1234,8 @@ static void intel_irq_remapping_prepare_irte(struct 
> intel_ir_data *data,
>   MSI_ADDR_IR_SHV |
>   MSI_ADDR_IR_INDEX1(index) |
>   MSI_ADDR_IR_INDEX2(index);
> +   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
> +   pr_warn("irqdomain: allocate index%d, subhandle%d, 
> irte%llx,%llx\n", index, sub_handle, irte->high, irte->low);
> break;
>
> default:
> @@ -1305,6 +1307,8 @@ static int intel_irq_remapping_alloc(struct irq_domain 
> *domain,
> kfree(data);
> goto out_free_parent;
> }
> +   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
> +   pr_warn("irqdomain: allocate VIRQ%d, count%d, index%d\n", 
> virq, nr_irqs, index);
>
> for (i = 0; i < nr_irqs; i++) {
> irq_data = irq_domain_get_irq_data(domain, virq + i);
> --
> 1.7.10.4
>


dmesg.apic-debug
Description: Binary data
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[Debug Patch] Collect more information about the regression

2015-07-30 Thread Jiang Liu
Hi Alexander, Mark, Alex,
Could you please help to apply the debug patch and send me back
the dmesg? Please also help to turn kernel paramemter "apic=debug".

Hi Mark,
It seems that this regression is caused by support of multiple-MSI,
but I have no PCI card supportting multiple-MSI at hand. So may I remotely
access your system from Intel internal network?  That will definitely speed
up fix.
Thanks!
Gerry

Signed-off-by: Jiang Liu 
---
 drivers/iommu/amd_iommu.c   |   15 +--
 drivers/iommu/intel_irq_remapping.c |4 
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index a57e9b749895..c039ed9333a4 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3916,8 +3916,8 @@ static void irq_remapping_prepare_irte(struct amd_ir_data 
*data,
union irte *irte = &data->irte_entry;
struct IO_APIC_route_entry *entry;
 
-   data->irq_2_irte.devid = devid;
-   data->irq_2_irte.index = index + sub_handle;
+   irte_info->devid = devid;
+   irte_info->index = index + sub_handle;
 
/* Setup IRTE for IOMMU */
irte->val = 0;
@@ -3926,6 +3926,10 @@ static void irq_remapping_prepare_irte(struct 
amd_ir_data *data,
irte->fields.destination = irq_cfg->dest_apicid;
irte->fields.dm  = apic->irq_dest_mode;
irte->fields.valid   = 1;
+   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+   pr_warn("irqdomain: IRTE%d vector %d APICID%d data%p cfg%p\n",
+   irte_info->index, irte->fields.vector,
+   irte->fields.destination, data, irq_cfg);
 
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
@@ -3972,6 +3976,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, 
unsigned int virq,
info->type != X86_IRQ_ALLOC_TYPE_MSIX)
return -EINVAL;
 
+   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+   pr_warn("irqdomain: allocate %d MSI IRQ, VIRQ%d\n", nr_irqs, 
virq);
+
/*
 * With IRQ remapping enabled, don't need contiguous CPU vectors
 * to support multiple MSI interrupts.
@@ -3986,6 +3993,8 @@ static int irq_remapping_alloc(struct irq_domain *domain, 
unsigned int virq,
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
if (ret < 0)
return ret;
+   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+   pr_warn("irqdomain: allocate parent returns %d\n", ret);
 
ret = -ENOMEM;
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -4005,6 +4014,8 @@ static int irq_remapping_alloc(struct irq_domain *domain, 
unsigned int virq,
kfree(data);
goto out_free_parent;
}
+   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+   pr_warn("irqdomain: allocate IRTE index %d\n", index);
 
for (i = 0; i < nr_irqs; i++) {
irq_data = irq_domain_get_irq_data(domain, virq + i);
diff --git a/drivers/iommu/intel_irq_remapping.c 
b/drivers/iommu/intel_irq_remapping.c
index f15692a410c7..bb093bc6b334 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1234,6 +1234,8 @@ static void intel_irq_remapping_prepare_irte(struct 
intel_ir_data *data,
  MSI_ADDR_IR_SHV |
  MSI_ADDR_IR_INDEX1(index) |
  MSI_ADDR_IR_INDEX2(index);
+   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+   pr_warn("irqdomain: allocate index%d, subhandle%d, 
irte%llx,%llx\n", index, sub_handle, irte->high, irte->low);
break;
 
default:
@@ -1305,6 +1307,8 @@ static int intel_irq_remapping_alloc(struct irq_domain 
*domain,
kfree(data);
goto out_free_parent;
}
+   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+   pr_warn("irqdomain: allocate VIRQ%d, count%d, index%d\n", virq, 
nr_irqs, index);
 
for (i = 0; i < nr_irqs; i++) {
irq_data = irq_domain_get_irq_data(domain, virq + i);
-- 
1.7.10.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Bugfix]

2015-07-30 Thread Jiang Liu
Hi all,
Sorry, send email too quick. Will resend with correct title.
Thanks!
Gerry

On 2015/7/31 0:44, Jiang Liu wrote:
> Hi Alexander, Mark, Alex,
>   Could you please help to apply the debug patch and send me back
> the dmesg? Please also help to turn kernel paramemter "apic=debug".
> 
> Hi Mark,
>   It seems that this regression is caused by support of multiple-MSI,
> but I have no PCI card supportting multiple-MSI at hand. So may I remotely
> access your system from Intel internal network?  That will definitely speed
> up fix.
> Thanks!
> Gerry
> 
> Signed-off-by: Jiang Liu 
> ---
>  drivers/iommu/amd_iommu.c   |   15 +--
>  drivers/iommu/intel_irq_remapping.c |4 
>  2 files changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index a57e9b749895..c039ed9333a4 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -3916,8 +3916,8 @@ static void irq_remapping_prepare_irte(struct 
> amd_ir_data *data,
>   union irte *irte = &data->irte_entry;
>   struct IO_APIC_route_entry *entry;
>  
> - data->irq_2_irte.devid = devid;
> - data->irq_2_irte.index = index + sub_handle;
> + irte_info->devid = devid;
> + irte_info->index = index + sub_handle;
>  
>   /* Setup IRTE for IOMMU */
>   irte->val = 0;
> @@ -3926,6 +3926,10 @@ static void irq_remapping_prepare_irte(struct 
> amd_ir_data *data,
>   irte->fields.destination = irq_cfg->dest_apicid;
>   irte->fields.dm  = apic->irq_dest_mode;
>   irte->fields.valid   = 1;
> + if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
> + pr_warn("irqdomain: IRTE%d vector %d APICID%d data%p cfg%p\n",
> + irte_info->index, irte->fields.vector,
> + irte->fields.destination, data, irq_cfg);
>  
>   switch (info->type) {
>   case X86_IRQ_ALLOC_TYPE_IOAPIC:
> @@ -3972,6 +3976,9 @@ static int irq_remapping_alloc(struct irq_domain 
> *domain, unsigned int virq,
>   info->type != X86_IRQ_ALLOC_TYPE_MSIX)
>   return -EINVAL;
>  
> + if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
> + pr_warn("irqdomain: allocate %d MSI IRQ, VIRQ%d\n", nr_irqs, 
> virq);
> +
>   /*
>* With IRQ remapping enabled, don't need contiguous CPU vectors
>* to support multiple MSI interrupts.
> @@ -3986,6 +3993,8 @@ static int irq_remapping_alloc(struct irq_domain 
> *domain, unsigned int virq,
>   ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
>   if (ret < 0)
>   return ret;
> + if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
> + pr_warn("irqdomain: allocate parent returns %d\n", ret);
>  
>   ret = -ENOMEM;
>   data = kzalloc(sizeof(*data), GFP_KERNEL);
> @@ -4005,6 +4014,8 @@ static int irq_remapping_alloc(struct irq_domain 
> *domain, unsigned int virq,
>   kfree(data);
>   goto out_free_parent;
>   }
> + if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
> + pr_warn("irqdomain: allocate IRTE index %d\n", index);
>  
>   for (i = 0; i < nr_irqs; i++) {
>   irq_data = irq_domain_get_irq_data(domain, virq + i);
> diff --git a/drivers/iommu/intel_irq_remapping.c 
> b/drivers/iommu/intel_irq_remapping.c
> index f15692a410c7..bb093bc6b334 100644
> --- a/drivers/iommu/intel_irq_remapping.c
> +++ b/drivers/iommu/intel_irq_remapping.c
> @@ -1234,6 +1234,8 @@ static void intel_irq_remapping_prepare_irte(struct 
> intel_ir_data *data,
> MSI_ADDR_IR_SHV |
> MSI_ADDR_IR_INDEX1(index) |
> MSI_ADDR_IR_INDEX2(index);
> + if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
> + pr_warn("irqdomain: allocate index%d, subhandle%d, 
> irte%llx,%llx\n", index, sub_handle, irte->high, irte->low);
>   break;
>  
>   default:
> @@ -1305,6 +1307,8 @@ static int intel_irq_remapping_alloc(struct irq_domain 
> *domain,
>   kfree(data);
>   goto out_free_parent;
>   }
> + if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
> + pr_warn("irqdomain: allocate VIRQ%d, count%d, index%d\n", virq, 
> nr_irqs, index);
>  
>   for (i = 0; i < nr_irqs; i++) {
>   irq_data = irq_domain_get_irq_data(domain, virq + i);
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Bugfix]

2015-07-30 Thread Jiang Liu
Hi Alexander, Mark, Alex,
Could you please help to apply the debug patch and send me back
the dmesg? Please also help to turn kernel paramemter "apic=debug".

Hi Mark,
It seems that this regression is caused by support of multiple-MSI,
but I have no PCI card supportting multiple-MSI at hand. So may I remotely
access your system from Intel internal network?  That will definitely speed
up fix.
Thanks!
Gerry

Signed-off-by: Jiang Liu 
---
 drivers/iommu/amd_iommu.c   |   15 +--
 drivers/iommu/intel_irq_remapping.c |4 
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index a57e9b749895..c039ed9333a4 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3916,8 +3916,8 @@ static void irq_remapping_prepare_irte(struct amd_ir_data 
*data,
union irte *irte = &data->irte_entry;
struct IO_APIC_route_entry *entry;
 
-   data->irq_2_irte.devid = devid;
-   data->irq_2_irte.index = index + sub_handle;
+   irte_info->devid = devid;
+   irte_info->index = index + sub_handle;
 
/* Setup IRTE for IOMMU */
irte->val = 0;
@@ -3926,6 +3926,10 @@ static void irq_remapping_prepare_irte(struct 
amd_ir_data *data,
irte->fields.destination = irq_cfg->dest_apicid;
irte->fields.dm  = apic->irq_dest_mode;
irte->fields.valid   = 1;
+   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+   pr_warn("irqdomain: IRTE%d vector %d APICID%d data%p cfg%p\n",
+   irte_info->index, irte->fields.vector,
+   irte->fields.destination, data, irq_cfg);
 
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
@@ -3972,6 +3976,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, 
unsigned int virq,
info->type != X86_IRQ_ALLOC_TYPE_MSIX)
return -EINVAL;
 
+   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+   pr_warn("irqdomain: allocate %d MSI IRQ, VIRQ%d\n", nr_irqs, 
virq);
+
/*
 * With IRQ remapping enabled, don't need contiguous CPU vectors
 * to support multiple MSI interrupts.
@@ -3986,6 +3993,8 @@ static int irq_remapping_alloc(struct irq_domain *domain, 
unsigned int virq,
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
if (ret < 0)
return ret;
+   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+   pr_warn("irqdomain: allocate parent returns %d\n", ret);
 
ret = -ENOMEM;
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -4005,6 +4014,8 @@ static int irq_remapping_alloc(struct irq_domain *domain, 
unsigned int virq,
kfree(data);
goto out_free_parent;
}
+   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+   pr_warn("irqdomain: allocate IRTE index %d\n", index);
 
for (i = 0; i < nr_irqs; i++) {
irq_data = irq_domain_get_irq_data(domain, virq + i);
diff --git a/drivers/iommu/intel_irq_remapping.c 
b/drivers/iommu/intel_irq_remapping.c
index f15692a410c7..bb093bc6b334 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1234,6 +1234,8 @@ static void intel_irq_remapping_prepare_irte(struct 
intel_ir_data *data,
  MSI_ADDR_IR_SHV |
  MSI_ADDR_IR_INDEX1(index) |
  MSI_ADDR_IR_INDEX2(index);
+   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+   pr_warn("irqdomain: allocate index%d, subhandle%d, 
irte%llx,%llx\n", index, sub_handle, irte->high, irte->low);
break;
 
default:
@@ -1305,6 +1307,8 @@ static int intel_irq_remapping_alloc(struct irq_domain 
*domain,
kfree(data);
goto out_free_parent;
}
+   if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+   pr_warn("irqdomain: allocate VIRQ%d, count%d, index%d\n", virq, 
nr_irqs, index);
 
for (i = 0; i < nr_irqs; i++) {
irq_data = irq_domain_get_irq_data(domain, virq + i);
-- 
1.7.10.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/amd: Allow non-ATS devices in IOMMUv2 domains

2015-07-30 Thread Joerg Roedel
Hi Oded,

On Thu, Jul 30, 2015 at 01:08:46PM +0300, Oded Gabbay wrote:
> Why amdkfd's devices are considered a non-IOMMUv2 capable devices ?
> After all, they contain the IOMMUv2 H/W and use it constantly for HSA.

With 4.2 the AMD IOMMU driver relies on iommu groups from the iommu core
code. With that, all functions of a multi-function device will be in one
group (and have to be in one domain). This means that the audio device
which comes with every HSA GPU is in the same group, and this device
lacks the IOMMUv2 features.


Joerg

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/5] AMD IOMMU Fixes for v4.2-rc4

2015-07-30 Thread Oded Gabbay
On Tue, Jul 28, 2015 at 5:58 PM, Joerg Roedel  wrote:
> Hi,
>
> here are a couple of fixes for the AMD IOMMU driver for
> issues found recently. The issues were introduced by the
> default-domain conversion in this development cycle. I plan
> to send these fixes upstream by the end of the week.
>
> Please review!
>
> Thanks,
>
> Joerg
>
> Joerg Roedel (5):
>   iommu/amd: Use iommu_attach_group()
>   iommu/amd: Use iommu core for passthrough mode
>   iommu/amd: Allow non-IOMMUv2 devices in IOMMUv2 domains
>   iommu/amd: Use swiotlb in passthrough mode
>   iommu/amd: Set global dma_ops if swiotlb is disabled
>
>  drivers/iommu/amd_iommu.c  | 91 
> +++---
>  drivers/iommu/amd_iommu_init.c | 10 +
>  drivers/iommu/amd_iommu_v2.c   | 24 +--
>  3 files changed, 45 insertions(+), 80 deletions(-)
>
> --
> 1.9.1
>
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu

This series, with the additional patch, is:
Tested-by: "Oded Gabbay "
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/amd: Allow non-ATS devices in IOMMUv2 domains

2015-07-30 Thread Oded Gabbay
On Thu, Jul 30, 2015 at 1:05 PM, Joerg Roedel  wrote:
> From: Joerg Roedel 
>
> With the grouping of multi-function devices a non-ATS
> capable device might also end up in the same domain as an
> IOMMUv2 capable device.
> So handle this situation gracefully and don't consider it a
> bug anymore.
>
> Signed-off-by: Joerg Roedel 
> ---
>  drivers/iommu/amd_iommu.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index fa9508b..658ee39 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -3318,7 +3318,12 @@ static int __flush_pasid(struct protection_domain 
> *domain, int pasid,
> struct amd_iommu *iommu;
> int qdep;
>
> -   BUG_ON(!dev_data->ats.enabled);
> +   /*
> +  There might be non-IOMMUv2 capable devices in an IOMMUv2
> +* domain.
> +*/
> +   if (!dev_data->ats.enabled)
> +   continue;
>
> qdep  = dev_data->ats.qdep;
> iommu = amd_iommu_rlookup_table[dev_data->devid];
> --
> 1.9.1
>
This series, with the additional patch is:
Tested-by: "Oded Gabbay "
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/amd: Allow non-ATS devices in IOMMUv2 domains

2015-07-30 Thread Oded Gabbay
Joerg,
Why amdkfd's devices are considered a non-IOMMUv2 capable devices ?
After all, they contain the IOMMUv2 H/W and use it constantly for HSA.

ODed

On Thu, Jul 30, 2015 at 1:05 PM, Joerg Roedel  wrote:
> From: Joerg Roedel 
>
> With the grouping of multi-function devices a non-ATS
> capable device might also end up in the same domain as an
> IOMMUv2 capable device.
> So handle this situation gracefully and don't consider it a
> bug anymore.
>
> Signed-off-by: Joerg Roedel 
> ---
>  drivers/iommu/amd_iommu.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index fa9508b..658ee39 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -3318,7 +3318,12 @@ static int __flush_pasid(struct protection_domain 
> *domain, int pasid,
> struct amd_iommu *iommu;
> int qdep;
>
> -   BUG_ON(!dev_data->ats.enabled);
> +   /*
> +  There might be non-IOMMUv2 capable devices in an IOMMUv2
> +* domain.
> +*/
> +   if (!dev_data->ats.enabled)
> +   continue;
>
> qdep  = dev_data->ats.qdep;
> iommu = amd_iommu_rlookup_table[dev_data->devid];
> --
> 1.9.1
>
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/amd: Allow non-ATS devices in IOMMUv2 domains

2015-07-30 Thread Joerg Roedel
From: Joerg Roedel 

With the grouping of multi-function devices a non-ATS
capable device might also end up in the same domain as an
IOMMUv2 capable device.
So handle this situation gracefully and don't consider it a
bug anymore.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/amd_iommu.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index fa9508b..658ee39 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3318,7 +3318,12 @@ static int __flush_pasid(struct protection_domain 
*domain, int pasid,
struct amd_iommu *iommu;
int qdep;
 
-   BUG_ON(!dev_data->ats.enabled);
+   /*
+  There might be non-IOMMUv2 capable devices in an IOMMUv2
+* domain.
+*/
+   if (!dev_data->ats.enabled)
+   continue;
 
qdep  = dev_data->ats.qdep;
iommu = amd_iommu_rlookup_table[dev_data->devid];
-- 
1.9.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/5] AMD IOMMU Fixes for v4.2-rc4

2015-07-30 Thread Oded Gabbay
Hi Joerg,

I tested your patches on Kaveri, and while they do fix the initial
setup between amdkfd and IOMMU, the kernel crashes when an HSA process
tries to do any simple action that involves the kernel, such as to
create a compute queue.

So while I suppose the above patches should be included, the new IOMMU
feature as a whole is still broken for amdkfd.

This is the log I get from dmesg:


[  116.592829] systemd-logind[2221]: New session 1 of user ogabbay.
[  136.933560] [ cut here ]
[  136.933845] kernel BUG at drivers/iommu/amd_iommu.c:3321!
[  136.934168] invalid opcode:  [#1] SMP
[  136.934424] Modules linked in: amdkfd amd_iommu_v2 radeon
drm_kms_helper ttm fuse
[  136.934919] CPU: 1 PID: 3894 Comm: kfdtest Not tainted 4.2.0-rc3-kfd+ #9
[  136.935319] Hardware name: AMD BALLINA/Ballina, BIOS
WBL3B20N_Weekly_13_11_2 11/20/2013
[  136.935796] task: 8800a0c48000 ti: 880095f28000 task.ti:
880095f28000
[  136.936242] RIP: 0010:[]  []
__flush_pasid+0xd0/0x153
[  136.936740] RSP: 0018:880095f2bb98  EFLAGS: 00010046
[  136.937055] RAX:  RBX: 8800bd89b138 RCX: 4712e4da
[  136.937480] RDX: 8800ba7c4d40 RSI: 0001 RDI: 0a88
[  136.937906] RBP: 880095f2bbf8 R08: 4712f017 R09: 0b40
[  136.938331] R10: 8800a0c48000 R11: 8800a0c48688 R12: f000
[  136.938755] R13: 8800bd89b148 R14: 7fff R15: 8800bd89b101
[  136.939180] FS:  7f2dbbb14780() GS:88011ec8()
knlGS:
[  136.939662] CS:  0010 DS:  ES:  CR0: 80050033
[  136.940003] CR2: 00b77000 CR3: 95e15000 CR4: 000406e0
[  136.940427] Stack:
[  136.940544]  880095f2bc18 bd89b190 8801
8800bd89b190
[  136.941009]  300e0001 7007 880095f2bc08
95e15001
[  136.941474]  8800bd89b190 0001 0296
8800bd89b138
[  136.941937] Call Trace:
[  136.942084]  [] __amd_iommu_flush_tlb+0x24/0x26
[  136.942447]  [] amd_iommu_domain_set_gcr3+0x85/0xa5
[  136.942833]  [] ? _raw_spin_unlock_irqrestore+0x41/0x46
[  136.943241]  [] amd_iommu_bind_pasid+0x197/0x20c
[amd_iommu_v2]
[  136.943695]  []
kfd_bind_process_to_device+0x59/0x74 [amdkfd]
[  136.944144]  [] ?
kfd_ioctl_create_queue+0x375/0x4e8 [amdkfd]
[  136.944583]  [] kfd_ioctl_create_queue+0x3ae/0x4e8 [amdkfd]
[  136.945013]  [] ?
kfd_ioctl_destroy_queue+0x73/0x73 [amdkfd]
[  136.945448]  [] kfd_ioctl+0x1be/0x2b5 [amdkfd]
[  136.945808]  [] ? inode_has_perm+0x2b/0x30
[  136.946146]  [] do_vfs_ioctl+0x455/0x4dd
[  136.946474]  [] ? selinux_file_ioctl+0xaf/0xb4
[  136.946835]  [] ? security_file_ioctl+0x35/0x49
[  136.947198]  [] SyS_ioctl+0x5a/0x7f
[  136.947499]  [] entry_SYSCALL_64_fastpath+0x12/0x6f
[  136.947881] Code: b6 6d b0 48 8b 45 b0 48 8b 53 10 0f b6 c4 41 c1
e5 10 c1 e0 10 44 89 6d b0 4c 8d 6b 10 89 45 ac 4c 39 ea 74 68 80 7a
44 00 75 02 <0f> 0b 0f b7 72 40 48 8b 0d a3 57 76 01 48 8b 3c f1 8b 4a
48 48
[  136.949408] RIP  [] __flush_pasid+0xd0/0x153
[  136.949762]  RSP 
[  137.116301] ---[ end trace 2b10f64971a81bd2 ]---

Oded

On Tue, Jul 28, 2015 at 5:58 PM, Joerg Roedel  wrote:
> Hi,
>
> here are a couple of fixes for the AMD IOMMU driver for
> issues found recently. The issues were introduced by the
> default-domain conversion in this development cycle. I plan
> to send these fixes upstream by the end of the week.
>
> Please review!
>
> Thanks,
>
> Joerg
>
> Joerg Roedel (5):
>   iommu/amd: Use iommu_attach_group()
>   iommu/amd: Use iommu core for passthrough mode
>   iommu/amd: Allow non-IOMMUv2 devices in IOMMUv2 domains
>   iommu/amd: Use swiotlb in passthrough mode
>   iommu/amd: Set global dma_ops if swiotlb is disabled
>
>  drivers/iommu/amd_iommu.c  | 91 
> +++---
>  drivers/iommu/amd_iommu_init.c | 10 +
>  drivers/iommu/amd_iommu_v2.c   | 24 +--
>  3 files changed, 45 insertions(+), 80 deletions(-)
>
> --
> 1.9.1
>
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu