Re: [PATCH v7 00/12] Fix kdump faults on system with amd iommu

2016-12-23 Thread Baoquan He
Hi Joerg,

Ping!

Could you help review this version? Not sure this could catch up to
v4.10 merging.

Thanks
Baoqaun

On 11/25/16 at 01:13pm, Baoquan He wrote:
> This is v7 post.
> 
> The principle of the fix is similar to intel iommu. Just defer the assignment
> of device to domain to device driver init. In this version of post, a new
> call-back is_attach_deferred is added to iommu-ops, it's used to check whether
> we need defer the domain attach/detach in iommu-core code.
> 
> v5:
> bnx2 NIC can't reset itself during driver init. Post patch to reset
> it during driver init. IO_PAGE_FAULT can't be seen anymore.
> 
> Below is link of v5 post.
> 
> https://lists.linuxfoundation.org/pipermail/iommu/2016-September/018527.html
> 
> v5->v6:
> According to Joerg's comments made several below main changes:
> - Add sanity check when copy old dev tables.
> 
> - If a device is set up with guest translations (DTE.GV=1), then don't
>   copy that information but move the device over to an empty guest-cr3
>   table and handle the faults in the PPR log (which just answer them
>   with INVALID).
> 
> v6->v7:
> Two main changes are made according to Joerg's suggestion:
> - Add is_attach_deferred call-back to iommu-ops. With this domain
>   can be deferred to device driver init cleanly.
> 
> - Allocate memory below 4G for dev table if translation pre-enabled.
>   AMD engineer pointed out that it's unsafe to update the device-table
>   while iommu is enabled. device-table pointer update is split up into
>   two 32bit writes in the IOMMU hardware. So updating it while the IOMMU
>   is enabled could have some nasty side effects.
> 
> Baoquan He (12):
>   iommu/amd: Detect pre enabled translation
>   iommu/amd: add several helper function
>   iommu/amd: Define bit fields for DTE particularly
>   iommu/amd: Add function copy_dev_tables
>   iommu/amd: copy old trans table from old kernel
>   iommu: Add is_attach_deferred call-back to iommu-ops
>   iommu/amd: Use is_attach_deferred call-back
>   iommu/amd: Add sanity check of irq remap information of old dev table
> entry
>   iommu/amd: Don't copy GCR3 table root pointer
>   iommu/amd: Clear out the GV flag when handle deferred domain attach
>   iommu: Assign the direct mapped domain to group->domain
>   iommu/amd: Allocate memory below 4G for dev table if translation
> pre-enabled
> 
>  drivers/iommu/amd_iommu.c   |  78 +---
>  drivers/iommu/amd_iommu_init.c  | 201 
> +---
>  drivers/iommu/amd_iommu_proto.h |   2 +
>  drivers/iommu/amd_iommu_types.h |  53 ++-
>  drivers/iommu/amd_iommu_v2.c|  18 +++-
>  drivers/iommu/iommu.c   |   9 ++
>  include/linux/iommu.h   |   1 +
>  7 files changed, 313 insertions(+), 49 deletions(-)
> 
> -- 
> 2.5.5
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 RESEND 02/12] iommu/amd: add several helper function

2017-01-01 Thread Baoquan He
Move per iommu enabling code into a wrapper function early_enable_iommu().
This can make later kdump change easier.

And also add iommu_disable_command_buffer and iommu_disable_event_buffer
for later usage.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 42 +++---
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index fc45e92..ef743ca 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -626,6 +626,14 @@ static void iommu_enable_command_buffer(struct amd_iommu 
*iommu)
amd_iommu_reset_cmd_buffer(iommu);
 }
 
+/*
+ * This function disables the command buffer
+ */
+static void iommu_disable_command_buffer(struct amd_iommu *iommu)
+{
+   iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
+}
+
 static void __init free_command_buffer(struct amd_iommu *iommu)
 {
free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
@@ -658,6 +666,14 @@ static void iommu_enable_event_buffer(struct amd_iommu 
*iommu)
iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 }
 
+/*
+ * This function disables the command buffer
+ */
+static void iommu_disable_event_buffer(struct amd_iommu *iommu)
+{
+   iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
+}
+
 static void __init free_event_buffer(struct amd_iommu *iommu)
 {
free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
@@ -2027,6 +2043,19 @@ static void iommu_enable_ga(struct amd_iommu *iommu)
 #endif
 }
 
+static void early_enable_iommu(struct amd_iommu *iommu)
+{
+   iommu_disable(iommu);
+   iommu_init_flags(iommu);
+   iommu_set_device_table(iommu);
+   iommu_enable_command_buffer(iommu);
+   iommu_enable_event_buffer(iommu);
+   iommu_set_exclusion_range(iommu);
+   iommu_enable_ga(iommu);
+   iommu_enable(iommu);
+   iommu_flush_all_caches(iommu);
+}
+
 /*
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
@@ -2035,17 +2064,8 @@ static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
 
-   for_each_iommu(iommu) {
-   iommu_disable(iommu);
-   iommu_init_flags(iommu);
-   iommu_set_device_table(iommu);
-   iommu_enable_command_buffer(iommu);
-   iommu_enable_event_buffer(iommu);
-   iommu_set_exclusion_range(iommu);
-   iommu_enable_ga(iommu);
-   iommu_enable(iommu);
-   iommu_flush_all_caches(iommu);
-   }
+   for_each_iommu(iommu)
+   early_enable_iommu(iommu);
 
 #ifdef CONFIG_IRQ_REMAP
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 RESEND 01/12] iommu/amd: Detect pre enabled translation

2017-01-01 Thread Baoquan He
Add functions to check whether translation is already enabled in IOMMU.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c  | 25 +
 drivers/iommu/amd_iommu_proto.h |  1 +
 drivers/iommu/amd_iommu_types.h |  4 
 3 files changed, 30 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 6799cf9..fc45e92 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -252,6 +252,26 @@ static int amd_iommu_enable_interrupts(void);
 static int __init iommu_go_to_state(enum iommu_init_state state);
 static void init_device_table_dma(void);
 
+
+bool translation_pre_enabled(struct amd_iommu *iommu)
+{
+   return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
+}
+
+static void clear_translation_pre_enabled(struct amd_iommu *iommu)
+{
+   iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
+static void init_translation_status(struct amd_iommu *iommu)
+{
+   u32 ctrl;
+
+   ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+   if (ctrl & (1<flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
 static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
u8 bank, u8 cntr, u8 fxn,
u64 *value, bool is_write);
@@ -1390,6 +1410,11 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
 
iommu->int_enabled = false;
 
+   init_translation_status(iommu);
+
+   if (translation_pre_enabled(iommu))
+   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+
ret = init_iommu_from_acpi(iommu, h);
if (ret)
return ret;
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 7eb60c1..9560183 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -93,4 +93,5 @@ static inline bool iommu_feature(struct amd_iommu *iommu, u64 
f)
return !!(iommu->features & f);
 }
 
+extern bool translation_pre_enabled(struct amd_iommu *iommu);
 #endif /* _ASM_X86_AMD_IOMMU_PROTO_H  */
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 0d91785..2bbc19d 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -416,6 +416,7 @@ extern struct kmem_cache *amd_iommu_irq_cache;
 #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
 
 
+
 /*
  * This struct is used to pass information about
  * incoming PPR faults around.
@@ -434,6 +435,8 @@ struct iommu_domain;
 struct irq_domain;
 struct amd_irte_ops;
 
+#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED  (1 << 0)
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -566,6 +569,7 @@ struct amd_iommu {
struct amd_irte_ops *irte_ops;
 #endif
 
+   u32 flags;
volatile u64 __aligned(8) cmd_sem;
 };
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 RESEND 05/12] iommu/amd: copy old trans table from old kernel

2017-01-01 Thread Baoquan He
Here several things need be done:
- If iommu is pre-enabled in a normal kernel, just disable it and print
  warning.

- If failed to copy dev table of old kernel, continue to proceed as
  it does in normal kernel.

- Disable and Re-enable event/cmd buffer,  install the copied DTE table
  to reg, and detect and enable guest vapic.

- Flush all caches

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 51 --
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index fddc6e6..93f0ac2 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 
+#include 
 #include "amd_iommu_proto.h"
 #include "amd_iommu_types.h"
 #include "irq_remapping.h"
@@ -1482,9 +1483,12 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
iommu->int_enabled = false;
 
init_translation_status(iommu);
-
-   if (translation_pre_enabled(iommu))
-   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+   if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
+   iommu_disable(iommu);
+   clear_translation_pre_enabled(iommu);
+   pr_warn("Translation was enabled for IOMMU:%d but we are not in 
kdump mode\n",
+   iommu->index);
+   }
 
ret = init_iommu_from_acpi(iommu, h);
if (ret)
@@ -1976,8 +1980,7 @@ static int __init init_memory_definitions(struct 
acpi_table_header *table)
 }
 
 /*
- * Init the device table to not allow DMA access for devices and
- * suppress all page faults
+ * Init the device table to not allow DMA access for devices
  */
 static void init_device_table_dma(void)
 {
@@ -2118,9 +2121,43 @@ static void early_enable_iommu(struct amd_iommu *iommu)
 static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
+   bool is_pre_enabled = false;
 
-   for_each_iommu(iommu)
-   early_enable_iommu(iommu);
+   for_each_iommu(iommu) {
+   if (translation_pre_enabled(iommu)) {
+   is_pre_enabled = true;
+   break;
+   }
+   }
+
+   if (!is_pre_enabled) {
+   for_each_iommu(iommu)
+   early_enable_iommu(iommu);
+   } else {
+   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+   if (copy_dev_tables()) {
+   pr_err("Failed to copy DEV table from previous 
kernel.\n");
+   /*
+* If failed to copy dev tables from old kernel, 
continue to proceed
+* as it does in normal kernel.
+*/
+   for_each_iommu(iommu) {
+   clear_translation_pre_enabled(iommu);
+   early_enable_iommu(iommu);
+   }
+   } else {
+   pr_info("Copied DEV table from previous kernel.\n");
+   for_each_iommu(iommu) {
+   iommu_disable_command_buffer(iommu);
+   iommu_disable_event_buffer(iommu);
+   iommu_enable_command_buffer(iommu);
+   iommu_enable_event_buffer(iommu);
+   iommu_enable_ga(iommu);
+   iommu_set_device_table(iommu);
+   iommu_flush_all_caches(iommu);
+   }
+   }
+   }
 
 #ifdef CONFIG_IRQ_REMAP
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 RESEND 04/12] iommu/amd: Add function copy_dev_tables

2017-01-01 Thread Baoquan He
Add function copy_dev_tables to copy the old DEV table entries of the panicked
kernel to the new allocated DEV table. Since all iommus share the same DTE table
the copy only need be done once as long as the physical address of old DEV table
is retrieved from iommu reg. Besides, we also need to:

  - Check whether all IOMMUs actually use the same device table with the same 
size

  - Verify that the size of the old device table is the expected size.

  - Reserve the old domain id occupied in 1st kernel to avoid touching the old
io-page tables. Then on-flight DMA can continue looking it up.

And define MACRO DEV_DOMID_MASK to replace magic number 0xULL because
it need be reused in copy_dev_tables.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  2 +-
 drivers/iommu/amd_iommu_init.c  | 55 +
 drivers/iommu/amd_iommu_types.h |  1 +
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 350bcd9..91ddec1 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1840,7 +1840,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
flags|= tmp;
}
 
-   flags &= ~(0xUL);
+   flags &= ~DEV_DOMID_MASK;
flags |= domain->id;
 
amd_iommu_dev_table[devid].data[1]  = flags;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index ef743ca..fddc6e6 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -835,6 +835,61 @@ static int get_dev_entry_bit(u16 devid, u8 bit)
 }
 
 
+static int copy_dev_tables(void)
+{
+   struct dev_table_entry *old_devtb = NULL;
+   u32 lo, hi, devid, old_devtb_size;
+   phys_addr_t old_devtb_phys;
+   u64 entry, last_entry = 0;
+   struct amd_iommu *iommu;
+   u16 dom_id, dte_v;
+   static int copied;
+
+   for_each_iommu(iommu) {
+   if (!translation_pre_enabled(iommu)) {
+   pr_err("IOMMU:%d is not pre-enabled!/n",
+   iommu->index);
+   return -1;
+   }
+
+   /* All IOMMUs should use the same device table with the same 
size */
+   lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
+   hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
+   entry = (((u64) hi) << 32) + lo;
+   if (last_entry && last_entry != entry) {
+   pr_err("IOMMU:%d should use the same dev table as 
others!/n",
+   iommu->index);
+   return -1;
+   }
+   last_entry = entry;
+
+   old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
+   if (old_devtb_size != dev_table_size) {
+   pr_err("The device table size of IOMMU:%d is not 
expected!/n",
+   iommu->index);
+   return -1;
+   }
+
+   old_devtb_phys = entry & PAGE_MASK;
+   old_devtb = memremap(old_devtb_phys, dev_table_size, 
MEMREMAP_WB);
+   if (!old_devtb)
+   return -1;
+
+   if (copied)
+   continue;
+   for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
+   amd_iommu_dev_table[devid] = old_devtb[devid];
+   dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
+   dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
+   if (dte_v && dom_id)
+   __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   }
+   memunmap(old_devtb);
+   copied = 1;
+   }
+   return 0;
+}
+
 void amd_iommu_apply_erratum_63(u16 devid)
 {
int sysmgt;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 6a4378f..79ec841 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -336,6 +336,7 @@
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
 #define DTE_GLX_SHIFT  (56)
 #define DTE_GLX_MASK   (3)
+#define DEV_DOMID_MASK 0xULL
 
 #define DTE_GCR3_VAL_A(x)  (((x) >> 12) & 0x7ULL)
 #define DTE_GCR3_VAL_B(x)  (((x) >> 15) & 0x0ULL)
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 RESEND 00/12] Fix kdump faults on system with amd iommu

2017-01-01 Thread Baoquan He
This is v7 post RESENT based on v4.10-rc2 of linus's tree. Just adjust the
sequence of several patches and add the Suggested-by forgot in the previous
post.

The principle of the fix is similar to intel iommu. Just defer the assignment
of device to domain to device driver init. In this version of post, a new
call-back is_attach_deferred is added to iommu-ops, it's used to check whether
we need defer the domain attach/detach in iommu-core code.

v5:
bnx2 NIC can't reset itself during driver init. Post patch to reset
it during driver init. IO_PAGE_FAULT can't be seen anymore.

Below is link of v5 post.
https://lists.linuxfoundation.org/pipermail/iommu/2016-September/018527.html

v5->v6:
According to Joerg's comments made several below main changes:
- Add sanity check when copy old dev tables.

- If a device is set up with guest translations (DTE.GV=1), then don't
  copy that information but move the device over to an empty guest-cr3
  table and handle the faults in the PPR log (which just answer them
  with INVALID).

v6->v7:
Two main changes are made according to Joerg's suggestion:
- Add is_attach_deferred call-back to iommu-ops. With this domain
  can be deferred to device driver init cleanly.

- Allocate memory below 4G for dev table if translation pre-enabled.
  AMD engineer pointed out that it's unsafe to update the device-table
  while iommu is enabled. device-table pointer update is split up into
  two 32bit writes in the IOMMU hardware. So updating it while the IOMMU
  is enabled could have some nasty side effects.

Baoquan He (12):
  iommu/amd: Detect pre enabled translation
  iommu/amd: add several helper function
  iommu/amd: Define bit fields for DTE particularly
  iommu/amd: Add function copy_dev_tables
  iommu/amd: copy old trans table from old kernel
  iommu: Add is_attach_deferred call-back to iommu-ops
  iommu/amd: Use is_attach_deferred call-back
  iommu/amd: Add sanity check of irq remap information of old dev table
entry
  iommu: Assign the direct mapped domain to group->domain
  iommu/amd: Allocate memory below 4G for dev table if translation
pre-enabled
  iommu/amd: Don't copy GCR3 table root pointer
  iommu/amd: Clear out the GV flag when handle deferred domain attach

 drivers/iommu/amd_iommu.c   |  78 +---
 drivers/iommu/amd_iommu_init.c  | 201 +---
 drivers/iommu/amd_iommu_proto.h |   2 +
 drivers/iommu/amd_iommu_types.h |  53 ++-
 drivers/iommu/amd_iommu_v2.c|  18 +++-
 drivers/iommu/iommu.c   |   9 ++
 include/linux/iommu.h   |   1 +
 7 files changed, 313 insertions(+), 49 deletions(-)

-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 RESEND 03/12] iommu/amd: Define bit fields for DTE particularly

2017-01-01 Thread Baoquan He
In amd-vi spec several bits of IO PTE fields and DTE fields are similar
so that both of them can share the same MACRO definition. However
defining their respecitve bit fields can make code more read-able. So
do it in this patch.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  8 
 drivers/iommu/amd_iommu_types.h | 18 ++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 019e027..350bcd9 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1478,9 +1478,9 @@ static int iommu_map_page(struct protection_domain *dom,
 
if (count > 1) {
__pte = PAGE_SIZE_PTE(phys_addr, page_size);
-   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
} else
-   __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte = phys_addr | IOMMU_PTE_PR | IOMMU_PTE_FC;
 
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
@@ -1807,7 +1807,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
-   pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
+   pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
flags = amd_iommu_dev_table[devid].data[1];
 
@@ -1850,7 +1850,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 static void clear_dte_entry(u16 devid)
 {
/* remove entry from the device table seen by the hardware */
-   amd_iommu_dev_table[devid].data[0]  = IOMMU_PTE_P | IOMMU_PTE_TV;
+   amd_iommu_dev_table[devid].data[0]  = DTE_FLAG_V | DTE_FLAG_TV;
amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK;
 
amd_iommu_apply_erratum_63(devid);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 2bbc19d..6a4378f 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -265,7 +265,7 @@
 #define PM_LEVEL_INDEX(x, a)   (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
 #define PM_LEVEL_ENC(x)(((x) << 9) & 0xe00ULL)
 #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \
-IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
+IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW)
 #define PM_PTE_LEVEL(pte)  (((pte) >> 9) & 0x7ULL)
 
 #define PM_MAP_4k  0
@@ -314,13 +314,23 @@
 #define PTE_LEVEL_PAGE_SIZE(level) \
(1ULL << (12 + (9 * (level
 
-#define IOMMU_PTE_P  (1ULL << 0)
-#define IOMMU_PTE_TV (1ULL << 1)
+/*
+ * Bit value definition for I/O PTE fields
+ */
+#define IOMMU_PTE_PR (1ULL << 0)
 #define IOMMU_PTE_U  (1ULL << 59)
 #define IOMMU_PTE_FC (1ULL << 60)
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
+/*
+ * Bit value definition for DTE fields
+ */
+#define DTE_FLAG_V  (1ULL << 0)
+#define DTE_FLAG_TV (1ULL << 1)
+#define DTE_FLAG_IR (1ULL << 61)
+#define DTE_FLAG_IW (1ULL << 62)
+
 #define DTE_FLAG_IOTLB (1ULL << 32)
 #define DTE_FLAG_GV(1ULL << 55)
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
@@ -342,7 +352,7 @@
 #define GCR3_VALID 0x01ULL
 
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
-#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
+#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
 #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
 #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 RESEND 06/12] iommu: Add is_attach_deferred call-back to iommu-ops

2017-01-01 Thread Baoquan He
This new call-back will be used to check if the domain attach need be
deferred for now. If yes, the domain attach/detach will return directly.

Suggested-by: Joerg Roedel 
Signed-off-by: Baoquan He 
---
 drivers/iommu/iommu.c | 8 
 include/linux/iommu.h | 1 +
 2 files changed, 9 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index dbe7f65..e5cb9b1 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1096,6 +1096,10 @@ static int __iommu_attach_device(struct iommu_domain 
*domain,
 struct device *dev)
 {
int ret;
+   if ((domain->ops->is_attach_deferred != NULL) &&
+   domain->ops->is_attach_deferred(domain, dev))
+   return 0;
+
if (unlikely(domain->ops->attach_dev == NULL))
return -ENODEV;
 
@@ -1137,6 +1141,10 @@ EXPORT_SYMBOL_GPL(iommu_attach_device);
 static void __iommu_detach_device(struct iommu_domain *domain,
  struct device *dev)
 {
+   if ((domain->ops->is_attach_deferred != NULL) &&
+   domain->ops->is_attach_deferred(domain, dev))
+   return;
+
if (unlikely(domain->ops->detach_dev == NULL))
return;
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0ff5111..5aa3742 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -200,6 +200,7 @@ struct iommu_ops {
u32 (*domain_get_windows)(struct iommu_domain *domain);
 
int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
+   bool (*is_attach_deferred)(struct iommu_domain *domain, struct device 
*dev);
 
unsigned long pgsize_bitmap;
 };
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 RESEND 07/12] iommu/amd: Use is_attach_deferred call-back

2017-01-01 Thread Baoquan He
Implement call-back is_attach_deferred and use it to defer the
domain attach from iommu driver init to device driver init when
iommu is pre-enabled in kdump kernel.

Suggested-by: Joerg Roedel 
Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 91ddec1..2005d26 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -138,6 +138,7 @@ struct iommu_dev_data {
 PPR completions */
u32 errata;   /* Bitmap for errata to apply */
bool use_vapic;   /* Enable device to use vapic mode */
+   bool defer_attach;
 };
 
 /*
@@ -340,12 +341,17 @@ static u16 get_alias(struct device *dev)
 static struct iommu_dev_data *find_dev_data(u16 devid)
 {
struct iommu_dev_data *dev_data;
+   struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
 
dev_data = search_dev_data(devid);
 
-   if (dev_data == NULL)
+   if (dev_data == NULL) {
dev_data = alloc_dev_data(devid);
 
+   if (translation_pre_enabled(iommu))
+   dev_data->defer_attach = true;
+   }
+
return dev_data;
 }
 
@@ -2317,11 +2323,18 @@ static void queue_add(struct dma_ops_domain *dma_dom,
 static struct protection_domain *get_domain(struct device *dev)
 {
struct protection_domain *domain;
+   struct iommu_domain *io_domain;
 
if (!check_device(dev))
return ERR_PTR(-EINVAL);
 
domain = get_dev_data(dev)->domain;
+   if (domain == NULL && get_dev_data(dev)->defer_attach) {
+   get_dev_data(dev)->defer_attach = false;
+   io_domain = iommu_get_domain_for_dev(dev);
+   domain = to_pdomain(io_domain);
+   attach_device(dev, domain);
+   }
if (!dma_ops_domain(domain))
return ERR_PTR(-EBUSY);
 
@@ -3217,6 +3230,13 @@ static void amd_iommu_apply_dm_region(struct device *dev,
WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
 }
 
+static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
+struct device *dev)
+{
+   struct iommu_dev_data *dev_data = dev->archdata.iommu;
+   return dev_data->defer_attach;
+}
+
 static const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.domain_alloc = amd_iommu_domain_alloc,
@@ -3233,6 +3253,7 @@ static const struct iommu_ops amd_iommu_ops = {
.get_dm_regions = amd_iommu_get_dm_regions,
.put_dm_regions = amd_iommu_put_dm_regions,
.apply_dm_region = amd_iommu_apply_dm_region,
+   .is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap  = AMD_IOMMU_PGSIZES,
 };
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 RESEND 10/12] iommu/amd: Allocate memory below 4G for dev table if translation pre-enabled

2017-01-01 Thread Baoquan He
AMD pointed out it's unsafe to update the device-table while iommu
is enabled. It turns out that device-table pointer update is split
up into two 32bit writes in the IOMMU hardware. So updating it while
the IOMMU is enabled could have some nasty side effects.

The only way to work around this is to allocate the device-table below
4GB if translation is pre-enabled in kdump kernel. If allocation failed,
still use the old one.

Suggested-by: Joerg Roedel 
Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 9186f8b..73b74ba 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2139,11 +2139,23 @@ static void early_enable_iommu(struct amd_iommu *iommu)
  */
 static void early_enable_iommus(void)
 {
+   struct dev_table_entry *dev_tbl;
struct amd_iommu *iommu;
bool is_pre_enabled = false;
 
for_each_iommu(iommu) {
if (translation_pre_enabled(iommu)) {
+   gfp_t gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;;
+
+   dev_tbl = (void *)__get_free_pages(gfp_flag,
+   get_order(dev_table_size));
+   if (dev_tbl != NULL) {
+   memcpy(dev_tbl, amd_iommu_dev_table, 
dev_table_size);
+   free_pages((unsigned long)amd_iommu_dev_table,
+   get_order(dev_table_size));
+   amd_iommu_dev_table = dev_tbl;
+   }
+
is_pre_enabled = true;
break;
}
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 RESEND 12/12] iommu/amd: Clear out the GV flag when handle deferred domain attach

2017-01-01 Thread Baoquan He
When handle deferred domain attach, we need check if the domain is
v2. If not, should try to clear out the GV flag which could be
copied from the old device table entry.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index bc3711e..8d00fc3 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1841,6 +1841,11 @@ static void clear_dte_entry(u16 devid)
amd_iommu_apply_erratum_63(devid);
 }
 
+static void clear_dte_flag_gv(u16 devid)
+{
+   amd_iommu_dev_table[devid].data[0] &= (~DTE_FLAG_GV);
+}
+
 static void do_attach(struct iommu_dev_data *dev_data,
  struct protection_domain *domain)
 {
@@ -2301,6 +2306,7 @@ static void queue_add(struct dma_ops_domain *dma_dom,
  */
 static struct protection_domain *get_domain(struct device *dev)
 {
+   struct iommu_dev_data *dev_data = get_dev_data(dev);
struct protection_domain *domain;
struct iommu_domain *io_domain;
 
@@ -2308,11 +2314,21 @@ static struct protection_domain *get_domain(struct 
device *dev)
return ERR_PTR(-EINVAL);
 
domain = get_dev_data(dev)->domain;
-   if (domain == NULL && get_dev_data(dev)->defer_attach) {
+   if (domain == NULL && dev_data->defer_attach) {
+   u16 alias = amd_iommu_alias_table[dev_data->devid];
get_dev_data(dev)->defer_attach = false;
io_domain = iommu_get_domain_for_dev(dev);
domain = to_pdomain(io_domain);
attach_device(dev, domain);
+   /*
+* If the deferred attached domain is not v2, should clear out
+* the old GV flag.
+*/
+   if (!(domain->flags & PD_IOMMUV2_MASK)) {
+   clear_dte_flag_gv(dev_data->devid);
+   if (alias != dev_data->devid)
+   clear_dte_flag_gv(dev_data->devid);
+   }
}
if (!dma_ops_domain(domain))
return ERR_PTR(-EBUSY);
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 RESEND 09/12] iommu: Assign the direct mapped domain to group->domain

2017-01-01 Thread Baoquan He
In iommu_request_dm_for_dev(), devices of group have all been attached
to newly created direct mapped domain. We should store the domain into
group->domain so that it works for iommu_get_domain_for_dev() and
get_domain().

Signed-off-by: Baoquan He 
---
 drivers/iommu/iommu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index e5cb9b1..7ac0293 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1625,6 +1625,7 @@ int iommu_request_dm_for_dev(struct device *dev)
if (group->default_domain)
iommu_domain_free(group->default_domain);
group->default_domain = dm_domain;
+   group->domain = dm_domain;
 
pr_info("Using direct mapping for device %s\n", dev_name(dev));
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 RESEND 11/12] iommu/amd: Don't copy GCR3 table root pointer

2017-01-01 Thread Baoquan He
When in kdump kernel iommu is pre_enabled, if a device is set up with
guest translations (DTE.GV=1), then don't copy GCR3 table root pointer
but move the device over to an empty guest-cr3 table and handle the
faults in the PPR log (which answer them with INVALID). After all these
PPR faults are recoverable for the device and we should not allow the
device to change old-kernels data when we don't have to.

Suggested-by: Joerg Roedel 
Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   | 26 +++---
 drivers/iommu/amd_iommu_init.c  | 11 +++
 drivers/iommu/amd_iommu_proto.h |  1 +
 drivers/iommu/amd_iommu_types.h | 22 ++
 drivers/iommu/amd_iommu_v2.c| 18 +-
 5 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index c23f3d11..bc3711e 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -120,28 +120,6 @@ int amd_iommu_max_glx_val = -1;
 static struct dma_map_ops amd_iommu_dma_ops;
 
 /*
- * This struct contains device specific data for the IOMMU
- */
-struct iommu_dev_data {
-   struct list_head list;/* For domain->dev_list */
-   struct list_head dev_data_list;   /* For global dev_data_list */
-   struct protection_domain *domain; /* Domain the device is bound to */
-   u16 devid;/* PCI Device ID */
-   u16 alias;/* Alias Device ID */
-   bool iommu_v2;/* Device can make use of IOMMUv2 */
-   bool passthrough; /* Device is identity mapped */
-   struct {
-   bool enabled;
-   int qdep;
-   } ats;/* ATS state */
-   bool pri_tlp; /* PASID TLB required for
-PPR completions */
-   u32 errata;   /* Bitmap for errata to apply */
-   bool use_vapic;   /* Enable device to use vapic mode */
-   bool defer_attach;
-};
-
-/*
  * general struct to manage commands send to an IOMMU
  */
 struct iommu_cmd {
@@ -355,10 +333,11 @@ static struct iommu_dev_data *find_dev_data(u16 devid)
return dev_data;
 }
 
-static struct iommu_dev_data *get_dev_data(struct device *dev)
+struct iommu_dev_data *get_dev_data(struct device *dev)
 {
return dev->archdata.iommu;
 }
+EXPORT_SYMBOL(get_dev_data);
 
 /*
 * Find or create an IOMMU group for a acpihid device.
@@ -2380,6 +2359,7 @@ static int dir2prot(enum dma_data_direction direction)
else
return 0;
 }
+
 /*
  * This function contains common code for mapping of a physically
  * contiguous memory region into DMA address space. It is used by all
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 73b74ba..08d8d81 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -205,6 +205,7 @@ u16 *amd_iommu_alias_table;
  * for a specific device. It is also indexed by the PCI device id.
  */
 struct amd_iommu **amd_iommu_rlookup_table;
+EXPORT_SYMBOL(amd_iommu_rlookup_table);
 
 /*
  * This table is used to find the irq remapping table for a given device id
@@ -258,6 +259,7 @@ bool translation_pre_enabled(struct amd_iommu *iommu)
 {
return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
 }
+EXPORT_SYMBOL(translation_pre_enabled);
 
 static void clear_translation_pre_enabled(struct amd_iommu *iommu)
 {
@@ -845,6 +847,7 @@ static int copy_dev_tables(void)
struct amd_iommu *iommu;
u16 dom_id, dte_v, irq_v;
static int copied;
+   u64 tmp;
 
for_each_iommu(iommu) {
if (!translation_pre_enabled(iommu)) {
@@ -888,6 +891,14 @@ static int copy_dev_tables(void)
amd_iommu_dev_table[devid].data[1]
= old_devtb[devid].data[1];
__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   /* If gcr3 table existed, mask it out */
+   if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
+   tmp = DTE_GCR3_VAL_B(~0ULL) << 
DTE_GCR3_SHIFT_B;
+   tmp |= DTE_GCR3_VAL_C(~0ULL) << 
DTE_GCR3_SHIFT_C;
+   amd_iommu_dev_table[devid].data[1] &= 
~tmp;
+   tmp = DTE_GCR3_VAL_A(~0ULL) << 
DTE_GCR3_SHIFT_A;
+   amd_iommu_dev_table[devid].data[0] &= 
~tmp;
+   }
}
 
irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 9560183..d6a2c36 100644
--- a/drivers/iommu/amd_iommu_pro

[PATCH v7 RESEND 08/12] iommu/amd: Add sanity check of irq remap information of old dev table entry

2017-01-01 Thread Baoquan He
Firstly split the dev table entry copy into address translation part and
irq remapping part. Because these two parts could be configured to
be available indepentently.

Secondly check if IntCtl and IntTabLen are 10b and 1000b if they are
set.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  5 -
 drivers/iommu/amd_iommu_init.c  | 25 ++---
 drivers/iommu/amd_iommu_types.h |  8 
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 2005d26..c23f3d11 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3642,11 +3642,6 @@ EXPORT_SYMBOL(amd_iommu_device_info);
 
 static struct irq_chip amd_ir_chip;
 
-#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
-#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
-#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
-#define DTE_IRQ_REMAP_ENABLE1ULL
-
 static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
 {
u64 dte;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 93f0ac2..9186f8b 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -838,12 +838,12 @@ static int get_dev_entry_bit(u16 devid, u8 bit)
 
 static int copy_dev_tables(void)
 {
+   u64 int_ctl, int_tab_len, entry, last_entry = 0;
struct dev_table_entry *old_devtb = NULL;
u32 lo, hi, devid, old_devtb_size;
phys_addr_t old_devtb_phys;
-   u64 entry, last_entry = 0;
struct amd_iommu *iommu;
-   u16 dom_id, dte_v;
+   u16 dom_id, dte_v, irq_v;
static int copied;
 
for_each_iommu(iommu) {
@@ -882,8 +882,27 @@ static int copy_dev_tables(void)
amd_iommu_dev_table[devid] = old_devtb[devid];
dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
-   if (dte_v && dom_id)
+   if (dte_v && dom_id) {
+   amd_iommu_dev_table[devid].data[0]
+   = old_devtb[devid].data[0];
+   amd_iommu_dev_table[devid].data[1]
+   = old_devtb[devid].data[1];
__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   }
+
+   irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
+   int_ctl = old_devtb[devid].data[2] & 
DTE_IRQ_REMAP_INTCTL_MASK;
+   int_tab_len = old_devtb[devid].data[2] & 
DTE_IRQ_TABLE_LEN_MASK;
+   if (irq_v && (int_ctl || int_tab_len)) {
+   if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
+(int_tab_len != DTE_IRQ_TABLE_LEN)) {
+   pr_err("Wrong old irq remapping flag: 
%#x\n", devid);
+   return -1;
+   }
+
+   amd_iommu_dev_table[devid].data[2]
+   = old_devtb[devid].data[2];
+   }
}
memunmap(old_devtb);
copied = 1;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 79ec841..b5ae18e 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -250,6 +250,14 @@
 
 #define GA_GUEST_NR0x1
 
+/* Bit value definition for dte irq remapping fields*/
+#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
+#define DTE_IRQ_REMAP_INTCTL_MASK  (0x3ULL << 60)
+#define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1)
+#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
+#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
+#define DTE_IRQ_REMAP_ENABLE1ULL
+
 #define PAGE_MODE_NONE0x00
 #define PAGE_MODE_1_LEVEL 0x01
 #define PAGE_MODE_2_LEVEL 0x02
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/vt-d: Make sure IOMMUs are off when intel_iommu=off

2017-04-11 Thread Baoquan He
Hi Joerg,

Do you plan to merge this one as urgent?

There's bug created about this issue on rhel, it would be great if it
can be put in next or merged so that we can back port it.

Thanks
Baoquan

On 03/29/17 at 05:00pm, Joerg Roedel wrote:
> From: Joerg Roedel 
> 
> When booting into a kexec kernel with intel_iommu=off, and
> the previous kernel had intel_iommu=on, the IOMMU hardware
> is still enabled and gets not disabled by the new kernel.
> 
> This causes the boot to fail because DMA is blocked by the
> hardware. Disable the IOMMUs when we find it enabled in the
> kexec kernel and boot with intel_iommu=off.
> 
> Signed-off-by: Joerg Roedel 
> ---
>  drivers/iommu/intel-iommu.c | 18 +-
>  1 file changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index d412a31..1662288 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -4730,6 +4730,15 @@ static int intel_iommu_cpu_dead(unsigned int cpu)
>   return 0;
>  }
>  
> +static void intel_disable_iommus(void)
> +{
> + struct intel_iommu *iommu = NULL;
> + struct dmar_drhd_unit *drhd;
> +
> + for_each_iommu(iommu, drhd)
> + iommu_disable_translation(iommu);
> +}
> +
>  static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
>  {
>   return container_of(dev, struct intel_iommu, iommu.dev);
> @@ -4840,8 +4849,15 @@ int __init intel_iommu_init(void)
>   goto out_free_dmar;
>   }
>  
> - if (no_iommu || dmar_disabled)
> + if (no_iommu || dmar_disabled) {
> + /*
> +  * Make sure the IOMMUs are switched off, even when we
> +  * boot into a kexec kernel and the previous kernel left
> +  * them enabled
> +  */
> + intel_disable_iommus();
>   goto out_free_dmar;
> + }
>  
>   if (list_empty(&dmar_rmrr_units))
>   pr_info("No RMRR found\n");
> -- 
> 1.9.1
> 
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/vt-d: Make sure IOMMUs are off when intel_iommu=off

2017-04-12 Thread Baoquan He
On 04/12/17 at 11:54pm, Joerg Roedel wrote:
> Hi Baoquan,
> 
> On Wed, Apr 12, 2017 at 09:40:56AM +0800, Baoquan He wrote:
> > Do you plan to merge this one as urgent?
> > 
> > There's bug created about this issue on rhel, it would be great if it
> > can be put in next or merged so that we can back port it.
> 
> No, I am not sending this for v4.11, because this issue existed forever
> and is no regression. I queued it for v4.12 and the commit-id in the
> iommu-tree will be the same as upstream, if you need that for your
> backport.

Yeah, agree. The commit-id should be enough. Thanks a lot!

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/amd: Suppress IO_PAGE_FAULTs in kdump kernel

2017-06-23 Thread Baoquan He
Hi dear Joerg,

On 06/16/17 at 10:15am, Joerg Roedel wrote:
> From: Joerg Roedel 
> 
> When booting into a kdump kernel, suppress IO_PAGE_FAULTs by
> default for all devices. But allow the faults again when a
> domain is assigned to a device.

I have two bugs at hand reported by customer, saying their system hang
with amd iommu on. I remember I borrowed the system and found it hang very
early so that no one knew what's happened. One time it printed several lines
of boot message and I found it's amd iommu system, adding amd_iommu=off
to make the system boot normally.

And with the kdump fix of amd iommu patchset applied, kdump kernel boots
well. So maybe suppressing the fault message is not enough.

Thanks
Baoquan

> 
> Signed-off-by: Joerg Roedel 
> ---
>  drivers/iommu/amd_iommu.c   | 3 ++-
>  drivers/iommu/amd_iommu_init.c  | 9 +
>  drivers/iommu/amd_iommu_types.h | 1 +
>  3 files changed, 12 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index 80efa72..623ab53 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -2050,7 +2050,8 @@ static void set_dte_entry(u16 devid, struct 
> protection_domain *domain, bool ats)
>   flags|= tmp;
>   }
>  
> - flags &= ~(0xUL);
> +
> + flags &= ~(DTE_FLAG_SA | 0xULL);
>   flags |= domain->id;
>  
>   amd_iommu_dev_table[devid].data[1]  = flags;
> diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
> index 5a11328..d9f5ddd 100644
> --- a/drivers/iommu/amd_iommu_init.c
> +++ b/drivers/iommu/amd_iommu_init.c
> @@ -29,6 +29,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -1898,6 +1899,14 @@ static void init_device_table_dma(void)
>   for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
>   set_dev_entry_bit(devid, DEV_ENTRY_VALID);
>   set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
> + /*
> +  * In kdump kernels in-flight DMA from the old kernel might
> +  * cause IO_PAGE_FAULTs. There are no reports that a kdump
> +  * actually failed because of that, so just disable fault
> +  * reporting in the hardware to get rid of the messages
> +  */
> + if (is_kdump_kernel())
> + set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
>   }
>  }
>  
> diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
> index 4de8f41..4cad9b3 100644
> --- a/drivers/iommu/amd_iommu_types.h
> +++ b/drivers/iommu/amd_iommu_types.h
> @@ -322,6 +322,7 @@
>  #define IOMMU_PTE_IW (1ULL << 62)
>  
>  #define DTE_FLAG_IOTLB   (1ULL << 32)
> +#define DTE_FLAG_SA  (1ULL << 34)
>  #define DTE_FLAG_GV  (1ULL << 55)
>  #define DTE_FLAG_MASK(0x3ffULL << 32)
>  #define DTE_GLX_SHIFT(56)
> -- 
> 2.7.4
> 
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/amd: Suppress IO_PAGE_FAULTs in kdump kernel

2017-06-23 Thread Baoquan He
Hi Joerg,

On 06/23/17 at 04:57pm, Baoquan He wrote:
> Hi dear Joerg,
> 
> On 06/16/17 at 10:15am, Joerg Roedel wrote:
> > From: Joerg Roedel 
> > 
> > When booting into a kdump kernel, suppress IO_PAGE_FAULTs by
> > default for all devices. But allow the faults again when a
> > domain is assigned to a device.
> 
> I have two bugs at hand reported by customer, saying their system hang
> with amd iommu on. I remember I borrowed the system and found it hang very
> early so that no one knew what's happened. One time it printed several lines
> of boot message and I found it's amd iommu system, adding amd_iommu=off
> to make the system boot normally.
> 
> And with the kdump fix of amd iommu patchset applied, kdump kernel boots
> well. So maybe suppressing the fault message is not enough.

Do you think whether it's necessary to continue my kdump fix of amd iommu
patchset? Seems my last post was in Jan this year. I know you are very
busy on fixing bugs and reviewing tons of patches. Without your
guidance and reviewing, I absolutely can't make it. So I would like to
hear your suggestions and idea.

I focused on kaslr issues recently, now most of them have been
fixed. My boss discussed with me about the next plan. If you have other
plan, I can sync it to our team about the status of upstream.

Thanks
Baoquan


> > ---
> >  drivers/iommu/amd_iommu.c   | 3 ++-
> >  drivers/iommu/amd_iommu_init.c  | 9 +
> >  drivers/iommu/amd_iommu_types.h | 1 +
> >  3 files changed, 12 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> > index 80efa72..623ab53 100644
> > --- a/drivers/iommu/amd_iommu.c
> > +++ b/drivers/iommu/amd_iommu.c
> > @@ -2050,7 +2050,8 @@ static void set_dte_entry(u16 devid, struct 
> > protection_domain *domain, bool ats)
> > flags|= tmp;
> > }
> >  
> > -   flags &= ~(0xUL);
> > +
> > +   flags &= ~(DTE_FLAG_SA | 0xULL);
> > flags |= domain->id;
> >  
> > amd_iommu_dev_table[devid].data[1]  = flags;
> > diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
> > index 5a11328..d9f5ddd 100644
> > --- a/drivers/iommu/amd_iommu_init.c
> > +++ b/drivers/iommu/amd_iommu_init.c
> > @@ -29,6 +29,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -1898,6 +1899,14 @@ static void init_device_table_dma(void)
> > for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
> > set_dev_entry_bit(devid, DEV_ENTRY_VALID);
> > set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
> > +   /*
> > +* In kdump kernels in-flight DMA from the old kernel might
> > +* cause IO_PAGE_FAULTs. There are no reports that a kdump
> > +* actually failed because of that, so just disable fault
> > +* reporting in the hardware to get rid of the messages
> > +*/
> > +   if (is_kdump_kernel())
> > +   set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
> > }
> >  }
> >  
> > diff --git a/drivers/iommu/amd_iommu_types.h 
> > b/drivers/iommu/amd_iommu_types.h
> > index 4de8f41..4cad9b3 100644
> > --- a/drivers/iommu/amd_iommu_types.h
> > +++ b/drivers/iommu/amd_iommu_types.h
> > @@ -322,6 +322,7 @@
> >  #define IOMMU_PTE_IW (1ULL << 62)
> >  
> >  #define DTE_FLAG_IOTLB (1ULL << 32)
> > +#define DTE_FLAG_SA(1ULL << 34)
> >  #define DTE_FLAG_GV(1ULL << 55)
> >  #define DTE_FLAG_MASK  (0x3ffULL << 32)
> >  #define DTE_GLX_SHIFT  (56)
> > -- 
> > 2.7.4
> > 
> > ___
> > iommu mailing list
> > iommu@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/iommu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/amd: Suppress IO_PAGE_FAULTs in kdump kernel

2017-06-26 Thread Baoquan He
On 06/26/17 at 12:07pm, Joerg Roedel wrote:
> Hi Baoquan,
> 
> On Fri, Jun 23, 2017 at 07:43:10PM +0800, Baoquan He wrote:
> > Do you think whether it's necessary to continue my kdump fix of amd iommu
> > patchset? Seems my last post was in Jan this year. I know you are very
> > busy on fixing bugs and reviewing tons of patches. Without your
> > guidance and reviewing, I absolutely can't make it. So I would like to
> > hear your suggestions and idea.
> > 
> > I focused on kaslr issues recently, now most of them have been
> > fixed. My boss discussed with me about the next plan. If you have other
> > plan, I can sync it to our team about the status of upstream.
> 
> Sorry for my silence on the patches, I have not yet found the time to
> look deeply into them. I am still interested in them, so how about you
> do a rebase/repost after the next merge window and then I will take the
> time for a more in depth review and discussion?

Thanks a lot for reply. Totally understood, so many patches need be
reviewed, and so many different iommu types.

Sure, let me do it right away. I may need a little time to recall the
details, go through the spec and code again. It won't take too long.

Thanks
Baoquan
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/amd: Suppress IO_PAGE_FAULTs in kdump kernel

2017-07-20 Thread Baoquan He
Hi Joerg,

On 06/26/17 at 12:07pm, Joerg Roedel wrote:
> Hi Baoquan,
> 
> On Fri, Jun 23, 2017 at 07:43:10PM +0800, Baoquan He wrote:
> > Do you think whether it's necessary to continue my kdump fix of amd iommu
> > patchset? Seems my last post was in Jan this year. I know you are very
> > busy on fixing bugs and reviewing tons of patches. Without your
> > guidance and reviewing, I absolutely can't make it. So I would like to
> > hear your suggestions and idea.
> > 
> > I focused on kaslr issues recently, now most of them have been
> > fixed. My boss discussed with me about the next plan. If you have other
> > plan, I can sync it to our team about the status of upstream.
> 
> Sorry for my silence on the patches, I have not yet found the time to
> look deeply into them. I am still interested in them, so how about you
> do a rebase/repost after the next merge window and then I will take the
> time for a more in depth review and discussion?

I have rebased the amd iommu fix patches of kdump kernel on the latest
upstream kernel, can I send them to you to have a look? Or just send to
iommu and lkml mailing list?

Thanks
Baoquan
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 01/13] iommu/amd: Detect pre enabled translation

2017-07-21 Thread Baoquan He
Add functions to check whether translation is already enabled in IOMMU.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c  | 24 
 drivers/iommu/amd_iommu_proto.h |  1 +
 drivers/iommu/amd_iommu_types.h |  4 
 3 files changed, 29 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 5cc597b383c7..e39857ce6481 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -258,6 +258,25 @@ static int amd_iommu_enable_interrupts(void);
 static int __init iommu_go_to_state(enum iommu_init_state state);
 static void init_device_table_dma(void);
 
+bool translation_pre_enabled(struct amd_iommu *iommu)
+{
+   return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
+}
+
+static void clear_translation_pre_enabled(struct amd_iommu *iommu)
+{
+   iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
+static void init_translation_status(struct amd_iommu *iommu)
+{
+   u32 ctrl;
+
+   ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+   if (ctrl & (1<flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
 static inline void update_last_devid(u16 devid)
 {
if (devid > amd_iommu_last_bdf)
@@ -1399,6 +1418,11 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
 
iommu->int_enabled = false;
 
+   init_translation_status(iommu);
+
+   if (translation_pre_enabled(iommu))
+   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+
ret = init_iommu_from_acpi(iommu, h);
if (ret)
return ret;
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 466260f8a1df..a9666d2005bb 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -87,4 +87,5 @@ static inline bool iommu_feature(struct amd_iommu *iommu, u64 
f)
return !!(iommu->features & f);
 }
 
+extern bool translation_pre_enabled(struct amd_iommu *iommu);
 #endif /* _ASM_X86_AMD_IOMMU_PROTO_H  */
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 294a409e283b..d15966b62b33 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -417,6 +417,7 @@ extern struct kmem_cache *amd_iommu_irq_cache;
 #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
 
 
+
 /*
  * This struct is used to pass information about
  * incoming PPR faults around.
@@ -435,6 +436,8 @@ struct iommu_domain;
 struct irq_domain;
 struct amd_irte_ops;
 
+#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED  (1 << 0)
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -569,6 +572,7 @@ struct amd_iommu {
struct amd_irte_ops *irte_ops;
 #endif
 
+   u32 flags;
volatile u64 __aligned(8) cmd_sem;
 };
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 00/13] Fix the on-flight DMA issue on system with amd iommu

2017-07-21 Thread Baoquan He
When kernel panicked and jump into the kdump kernel, DMA started by the
1st kernel is not stopped, this is called on-flight DMA. In the current
code it will disable iommu and build new translation table and attach
device to it. This will cause:

 1. IO_PAGE_FAULT warning message can be seen.
 2. transfer data to or from incorrect areas of memory.

Sometime it causes the dump failure or kernel hang.

The principle of the fix is to defer the assignment of device to domain to
device driver initializtion stage. A new call-back is_attach_deferred() is
added to iommu-ops, will check whether we need defer the domain attach/detach
in iommu-core code. If defer is needed, just return directly from amd iommu
attach/detach function. The attachment will be done in device driver
initializaiton stage when calling get_domain().

Change history:
v8:v7:
Rebase patchset v7 on the latest v4.13-rc1.

- And re-enable printing IO_PAGE_FAULT message in kdump kernel.
- Only disable iommu if amd_iommu=off is specified in kdump kernel.


v6->v7:
Two main changes are made according to Joerg's suggestion:
- Add is_attach_deferred call-back to iommu-ops. With this domain
  can be deferred to device driver init cleanly.

- Allocate memory below 4G for dev table if translation pre-enabled.
  AMD engineer pointed out that it's unsafe to update the device-table
  while iommu is enabled. device-table pointer update is split up into
  two 32bit writes in the IOMMU hardware. So updating it while the IOMMU
  is enabled could have some nasty side effects.

v5->v6:
According to Joerg's comments made several below main changes:
- Add sanity check when copy old dev tables.

- If a device is set up with guest translations (DTE.GV=1), then don't
  copy that information but move the device over to an empty guest-cr3
  table and handle the faults in the PPR log (which just answer them
  with INVALID).

v5:
bnx2 NIC can't reset itself during driver init. Post patch to reset
it during driver init. IO_PAGE_FAULT can't be seen anymore.

Below is link of v5 post.
https://lists.linuxfoundation.org/pipermail/iommu/2016-September/018527.html


Baoquan He (12):
  iommu/amd: Detect pre enabled translation
  iommu/amd: add several helper functions
  Revert "iommu/amd: Suppress IO_PAGE_FAULTs in kdump kernel"
  iommu/amd: Define bit fields for DTE particularly
  iommu/amd: Add function copy_dev_tables()
  iommu/amd: copy old trans table from old kernel
  iommu/amd: Do sanity check for irq remap of old dev table entry
  iommu: Add is_attach_deferred call-back to iommu-ops
  iommu/amd: Use is_attach_deferred call-back
  iommu/amd: Allocate memory below 4G for dev table if translation
pre-enabled
  iommu/amd: Don't copy GCR3 table root pointer
  iommu/amd: Clear out the GV flag when handle deferred domain attach

root (1):
  iommu/amd: Disable iommu only if amd_iommu=off is specified

 drivers/iommu/amd_iommu.c   |  81 ---
 drivers/iommu/amd_iommu_init.c  | 212 
 drivers/iommu/amd_iommu_proto.h |   2 +
 drivers/iommu/amd_iommu_types.h |  56 ++-
 drivers/iommu/amd_iommu_v2.c|  18 +++-
 drivers/iommu/iommu.c   |   8 ++
 include/linux/iommu.h   |   1 +
 7 files changed, 315 insertions(+), 63 deletions(-)

-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 03/13] Revert "iommu/amd: Suppress IO_PAGE_FAULTs in kdump kernel"

2017-07-21 Thread Baoquan He
This reverts commit 54bd63570484167cb13edf81e31fff107b879981.

We still need the IO_PAGE_FAULT message to warn error after the
issue of on-flight dma in kdump kernel is fixed.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   | 3 +--
 drivers/iommu/amd_iommu_init.c  | 9 -
 drivers/iommu/amd_iommu_types.h | 1 -
 3 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 688e77576e5a..e8a6d8109564 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2086,8 +2086,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
flags|= tmp;
}
 
-
-   flags &= ~(DTE_FLAG_SA | 0xULL);
+   flags &= ~(0xUL);
flags |= domain->id;
 
amd_iommu_dev_table[devid].data[1]  = flags;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 4ca6e3257d92..f6da5fe03b31 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -29,7 +29,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -1942,14 +1941,6 @@ static void init_device_table_dma(void)
for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
set_dev_entry_bit(devid, DEV_ENTRY_VALID);
set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
-   /*
-* In kdump kernels in-flight DMA from the old kernel might
-* cause IO_PAGE_FAULTs. There are no reports that a kdump
-* actually failed because of that, so just disable fault
-* reporting in the hardware to get rid of the messages
-*/
-   if (is_kdump_kernel())
-   set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
}
 }
 
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index d15966b62b33..608e81ca5e92 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -322,7 +322,6 @@
 #define IOMMU_PTE_IW (1ULL << 62)
 
 #define DTE_FLAG_IOTLB (1ULL << 32)
-#define DTE_FLAG_SA(1ULL << 34)
 #define DTE_FLAG_GV(1ULL << 55)
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
 #define DTE_GLX_SHIFT  (56)
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 02/13] iommu/amd: add several helper functions

2017-07-21 Thread Baoquan He
Move single iommu enabling codes into a wrapper function early_enable_iommu().
This can make later kdump change easier.

And also add iommu_disable_command_buffer and iommu_disable_event_buffer
for later usage.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 42 +++---
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index e39857ce6481..4ca6e3257d92 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -634,6 +634,14 @@ static void iommu_enable_command_buffer(struct amd_iommu 
*iommu)
amd_iommu_reset_cmd_buffer(iommu);
 }
 
+/*
+ * This function disables the command buffer
+ */
+static void iommu_disable_command_buffer(struct amd_iommu *iommu)
+{
+   iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
+}
+
 static void __init free_command_buffer(struct amd_iommu *iommu)
 {
free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
@@ -666,6 +674,14 @@ static void iommu_enable_event_buffer(struct amd_iommu 
*iommu)
iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 }
 
+/*
+ * This function disables the command buffer
+ */
+static void iommu_disable_event_buffer(struct amd_iommu *iommu)
+{
+   iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
+}
+
 static void __init free_event_buffer(struct amd_iommu *iommu)
 {
free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
@@ -2046,6 +2062,19 @@ static void iommu_enable_ga(struct amd_iommu *iommu)
 #endif
 }
 
+static void early_enable_iommu(struct amd_iommu *iommu)
+{
+   iommu_disable(iommu);
+   iommu_init_flags(iommu);
+   iommu_set_device_table(iommu);
+   iommu_enable_command_buffer(iommu);
+   iommu_enable_event_buffer(iommu);
+   iommu_set_exclusion_range(iommu);
+   iommu_enable_ga(iommu);
+   iommu_enable(iommu);
+   iommu_flush_all_caches(iommu);
+}
+
 /*
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
@@ -2054,17 +2083,8 @@ static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
 
-   for_each_iommu(iommu) {
-   iommu_disable(iommu);
-   iommu_init_flags(iommu);
-   iommu_set_device_table(iommu);
-   iommu_enable_command_buffer(iommu);
-   iommu_enable_event_buffer(iommu);
-   iommu_set_exclusion_range(iommu);
-   iommu_enable_ga(iommu);
-   iommu_enable(iommu);
-   iommu_flush_all_caches(iommu);
-   }
+   for_each_iommu(iommu)
+   early_enable_iommu(iommu);
 
 #ifdef CONFIG_IRQ_REMAP
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 04/13] iommu/amd: Define bit fields for DTE particularly

2017-07-21 Thread Baoquan He
In AMD-Vi spec several bits of IO PTE fields and DTE fields are similar
so that both of them can share the same MACRO definition. However
defining them respectively can make code more read-able. Do it now.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  8 
 drivers/iommu/amd_iommu_types.h | 18 ++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index e8a6d8109564..e5a03f259986 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1537,9 +1537,9 @@ static int iommu_map_page(struct protection_domain *dom,
 
if (count > 1) {
__pte = PAGE_SIZE_PTE(phys_addr, page_size);
-   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
} else
-   __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte = phys_addr | IOMMU_PTE_PR | IOMMU_PTE_FC;
 
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
@@ -2053,7 +2053,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
-   pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
+   pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
flags = amd_iommu_dev_table[devid].data[1];
 
@@ -2096,7 +2096,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 static void clear_dte_entry(u16 devid)
 {
/* remove entry from the device table seen by the hardware */
-   amd_iommu_dev_table[devid].data[0]  = IOMMU_PTE_P | IOMMU_PTE_TV;
+   amd_iommu_dev_table[devid].data[0]  = DTE_FLAG_V | DTE_FLAG_TV;
amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK;
 
amd_iommu_apply_erratum_63(devid);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 608e81ca5e92..ea36af19b5b9 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -265,7 +265,7 @@
 #define PM_LEVEL_INDEX(x, a)   (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
 #define PM_LEVEL_ENC(x)(((x) << 9) & 0xe00ULL)
 #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \
-IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
+IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW)
 #define PM_PTE_LEVEL(pte)  (((pte) >> 9) & 0x7ULL)
 
 #define PM_MAP_4k  0
@@ -314,13 +314,23 @@
 #define PTE_LEVEL_PAGE_SIZE(level) \
(1ULL << (12 + (9 * (level
 
-#define IOMMU_PTE_P  (1ULL << 0)
-#define IOMMU_PTE_TV (1ULL << 1)
+/*
+ * Bit value definition for I/O PTE fields
+ */
+#define IOMMU_PTE_PR (1ULL << 0)
 #define IOMMU_PTE_U  (1ULL << 59)
 #define IOMMU_PTE_FC (1ULL << 60)
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
+/*
+ * Bit value definition for DTE fields
+ */
+#define DTE_FLAG_V  (1ULL << 0)
+#define DTE_FLAG_TV (1ULL << 1)
+#define DTE_FLAG_IR (1ULL << 61)
+#define DTE_FLAG_IW (1ULL << 62)
+
 #define DTE_FLAG_IOTLB (1ULL << 32)
 #define DTE_FLAG_GV(1ULL << 55)
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
@@ -342,7 +352,7 @@
 #define GCR3_VALID 0x01ULL
 
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
-#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
+#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
 #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
 #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 05/13] iommu/amd: Add function copy_dev_tables()

2017-07-21 Thread Baoquan He
Add function copy_dev_tables to copy the old DEV table entries of the panicked
kernel to the new allocated DEV table. Since all iommus share the same DTE table
the copy only need be done one time. Besides, we also need to:

  - Check whether all IOMMUs actually use the same device table with the same 
size

  - Verify that the size of the old device table is the expected size.

  - Reserve the old domain id occupied in 1st kernel to avoid touching the old
io-page tables. Then on-flight DMA can continue looking it up.

And also define MACRO DEV_DOMID_MASK to replace magic number 0xULL, it can 
be
reused in copy_dev_tables().

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  2 +-
 drivers/iommu/amd_iommu_init.c  | 55 +
 drivers/iommu/amd_iommu_types.h |  1 +
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index e5a03f259986..4d00f1bda900 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2086,7 +2086,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
flags|= tmp;
}
 
-   flags &= ~(0xUL);
+   flags &= ~DEV_DOMID_MASK;
flags |= domain->id;
 
amd_iommu_dev_table[devid].data[1]  = flags;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index f6da5fe03b31..c58f091ce232 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -842,6 +842,61 @@ static int get_dev_entry_bit(u16 devid, u8 bit)
 }
 
 
+static int copy_dev_tables(void)
+{
+   struct dev_table_entry *old_devtb = NULL;
+   u32 lo, hi, devid, old_devtb_size;
+   phys_addr_t old_devtb_phys;
+   u64 entry, last_entry = 0;
+   struct amd_iommu *iommu;
+   u16 dom_id, dte_v;
+   static int copied;
+
+   for_each_iommu(iommu) {
+   if (!translation_pre_enabled(iommu)) {
+   pr_err("IOMMU:%d is not pre-enabled!/n",
+   iommu->index);
+   return -1;
+   }
+
+   /* All IOMMUs should use the same device table with the same 
size */
+   lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
+   hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
+   entry = (((u64) hi) << 32) + lo;
+   if (last_entry && last_entry != entry) {
+   pr_err("IOMMU:%d should use the same dev table as 
others!/n",
+   iommu->index);
+   return -1;
+   }
+   last_entry = entry;
+
+   old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
+   if (old_devtb_size != dev_table_size) {
+   pr_err("The device table size of IOMMU:%d is not 
expected!/n",
+   iommu->index);
+   return -1;
+   }
+
+   old_devtb_phys = entry & PAGE_MASK;
+   old_devtb = memremap(old_devtb_phys, dev_table_size, 
MEMREMAP_WB);
+   if (!old_devtb)
+   return -1;
+
+   if (copied)
+   continue;
+   for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
+   amd_iommu_dev_table[devid] = old_devtb[devid];
+   dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
+   dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
+   if (dte_v && dom_id)
+   __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   }
+   memunmap(old_devtb);
+   copied = 1;
+   }
+   return 0;
+}
+
 void amd_iommu_apply_erratum_63(u16 devid)
 {
int sysmgt;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index ea36af19b5b9..1c06bcc06f5c 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -336,6 +336,7 @@
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
 #define DTE_GLX_SHIFT  (56)
 #define DTE_GLX_MASK   (3)
+#define DEV_DOMID_MASK 0xULL
 
 #define DTE_GCR3_VAL_A(x)  (((x) >> 12) & 0x7ULL)
 #define DTE_GCR3_VAL_B(x)  (((x) >> 15) & 0x0ULL)
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 06/13] iommu/amd: copy old trans table from old kernel

2017-07-21 Thread Baoquan He
Here several things need be done:
- If iommu is pre-enabled in a normal kernel, just disable it and print
  warning.

- If failed to copy dev table of old kernel, continue to proceed as
  it does in normal kernel.

- Disable and Re-enable event/cmd buffer,  install the copied DTE table
  to reg, and detect and enable guest vapic.

- Flush all caches

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 51 --
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index c58f091ce232..aa9b5918d11f 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 
+#include 
 #include "amd_iommu_proto.h"
 #include "amd_iommu_types.h"
 #include "irq_remapping.h"
@@ -1489,9 +1490,12 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
iommu->int_enabled = false;
 
init_translation_status(iommu);
-
-   if (translation_pre_enabled(iommu))
-   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+   if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
+   iommu_disable(iommu);
+   clear_translation_pre_enabled(iommu);
+   pr_warn("Translation was enabled for IOMMU:%d but we are not in 
kdump mode\n",
+   iommu->index);
+   }
 
ret = init_iommu_from_acpi(iommu, h);
if (ret)
@@ -1986,8 +1990,7 @@ static int __init init_memory_definitions(struct 
acpi_table_header *table)
 }
 
 /*
- * Init the device table to not allow DMA access for devices and
- * suppress all page faults
+ * Init the device table to not allow DMA access for devices
  */
 static void init_device_table_dma(void)
 {
@@ -2128,9 +2131,43 @@ static void early_enable_iommu(struct amd_iommu *iommu)
 static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
+   bool is_pre_enabled = false;
 
-   for_each_iommu(iommu)
-   early_enable_iommu(iommu);
+   for_each_iommu(iommu) {
+   if (translation_pre_enabled(iommu)) {
+   is_pre_enabled = true;
+   break;
+   }
+   }
+
+   if (!is_pre_enabled) {
+   for_each_iommu(iommu)
+   early_enable_iommu(iommu);
+   } else {
+   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+   if (copy_dev_tables()) {
+   pr_err("Failed to copy DEV table from previous 
kernel.\n");
+   /*
+* If failed to copy dev tables from old kernel, 
continue to proceed
+* as it does in normal kernel.
+*/
+   for_each_iommu(iommu) {
+   clear_translation_pre_enabled(iommu);
+   early_enable_iommu(iommu);
+   }
+   } else {
+   pr_info("Copied DEV table from previous kernel.\n");
+   for_each_iommu(iommu) {
+   iommu_disable_command_buffer(iommu);
+   iommu_disable_event_buffer(iommu);
+   iommu_enable_command_buffer(iommu);
+   iommu_enable_event_buffer(iommu);
+   iommu_enable_ga(iommu);
+   iommu_set_device_table(iommu);
+   iommu_flush_all_caches(iommu);
+   }
+   }
+   }
 
 #ifdef CONFIG_IRQ_REMAP
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 11/13] iommu/amd: Don't copy GCR3 table root pointer

2017-07-21 Thread Baoquan He
When iommu is pre_enabled in kdump kernel, if a device is set up with
guest translations (DTE.GV=1), then don't copy GCR3 table root pointer
but move the device over to an empty guest-cr3 table and handle the
faults in the PPR log (which answer them with INVALID). After all these
PPR faults are recoverable for the device and we should not allow the
device to change old-kernels data when we don't have to.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   | 28 +++-
 drivers/iommu/amd_iommu_init.c  | 11 +++
 drivers/iommu/amd_iommu_proto.h |  1 +
 drivers/iommu/amd_iommu_types.h | 24 
 drivers/iommu/amd_iommu_v2.c| 18 +-
 5 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 4ff413b34b51..46d077784da0 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -103,30 +103,6 @@ int amd_iommu_max_glx_val = -1;
 static const struct dma_map_ops amd_iommu_dma_ops;
 
 /*
- * This struct contains device specific data for the IOMMU
- */
-struct iommu_dev_data {
-   struct list_head list;/* For domain->dev_list */
-   struct list_head dev_data_list;   /* For global dev_data_list */
-   struct protection_domain *domain; /* Domain the device is bound to */
-   u16 devid;/* PCI Device ID */
-   u16 alias;/* Alias Device ID */
-   bool iommu_v2;/* Device can make use of IOMMUv2 */
-   bool passthrough; /* Device is identity mapped */
-   struct {
-   bool enabled;
-   int qdep;
-   } ats;/* ATS state */
-   bool pri_tlp; /* PASID TLB required for
-PPR completions */
-   u32 errata;   /* Bitmap for errata to apply */
-   bool use_vapic;   /* Enable device to use vapic mode */
-   bool defer_attach;
-
-   struct ratelimit_state rs;/* Ratelimit IOPF messages */
-};
-
-/*
  * general struct to manage commands send to an IOMMU
  */
 struct iommu_cmd {
@@ -386,10 +362,11 @@ static struct iommu_dev_data *find_dev_data(u16 devid)
return dev_data;
 }
 
-static struct iommu_dev_data *get_dev_data(struct device *dev)
+struct iommu_dev_data *get_dev_data(struct device *dev)
 {
return dev->archdata.iommu;
 }
+EXPORT_SYMBOL(get_dev_data);
 
 /*
 * Find or create an IOMMU group for a acpihid device.
@@ -2540,6 +2517,7 @@ static int dir2prot(enum dma_data_direction direction)
else
return 0;
 }
+
 /*
  * This function contains common code for mapping of a physically
  * contiguous memory region into DMA address space. It is used by all
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index d7c301d0d672..8b4bac978062 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -209,6 +209,7 @@ u16 *amd_iommu_alias_table;
  * for a specific device. It is also indexed by the PCI device id.
  */
 struct amd_iommu **amd_iommu_rlookup_table;
+EXPORT_SYMBOL(amd_iommu_rlookup_table);
 
 /*
  * This table is used to find the irq remapping table for a given device id
@@ -262,6 +263,7 @@ bool translation_pre_enabled(struct amd_iommu *iommu)
 {
return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
 }
+EXPORT_SYMBOL(translation_pre_enabled);
 
 static void clear_translation_pre_enabled(struct amd_iommu *iommu)
 {
@@ -852,6 +854,7 @@ static int copy_dev_tables(void)
struct amd_iommu *iommu;
u16 dom_id, dte_v, irq_v;
static int copied;
+   u64 tmp;
 
for_each_iommu(iommu) {
if (!translation_pre_enabled(iommu)) {
@@ -895,6 +898,14 @@ static int copy_dev_tables(void)
amd_iommu_dev_table[devid].data[1]
= old_devtb[devid].data[1];
__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   /* If gcr3 table existed, mask it out */
+   if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
+   tmp = DTE_GCR3_VAL_B(~0ULL) << 
DTE_GCR3_SHIFT_B;
+   tmp |= DTE_GCR3_VAL_C(~0ULL) << 
DTE_GCR3_SHIFT_C;
+   amd_iommu_dev_table[devid].data[1] &= 
~tmp;
+   tmp = DTE_GCR3_VAL_A(~0ULL) << 
DTE_GCR3_SHIFT_A;
+   amd_iommu_dev_table[devid].data[0] &= 
~tmp;
+   }
}
 
irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_io

[PATCH v8 12/13] iommu/amd: Clear out the GV flag when handle deferred domain attach

2017-07-21 Thread Baoquan He
When handle deferred domain attach, we need check if the domain is
v2. If not, should try to clear out the GV flag which could be
copied from the old device table entry.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 46d077784da0..98aaccecbb76 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2085,6 +2085,11 @@ static void clear_dte_entry(u16 devid)
amd_iommu_apply_erratum_63(devid);
 }
 
+static void clear_dte_flag_gv(u16 devid)
+{
+   amd_iommu_dev_table[devid].data[0] &= (~DTE_FLAG_GV);
+}
+
 static void do_attach(struct iommu_dev_data *dev_data,
  struct protection_domain *domain)
 {
@@ -2459,6 +2464,7 @@ static struct iommu_group *amd_iommu_device_group(struct 
device *dev)
  */
 static struct protection_domain *get_domain(struct device *dev)
 {
+   struct iommu_dev_data *dev_data = get_dev_data(dev);
struct protection_domain *domain;
struct iommu_domain *io_domain;
 
@@ -2466,11 +2472,21 @@ static struct protection_domain *get_domain(struct 
device *dev)
return ERR_PTR(-EINVAL);
 
domain = get_dev_data(dev)->domain;
-   if (domain == NULL && get_dev_data(dev)->defer_attach) {
+   if (domain == NULL && dev_data->defer_attach) {
+   u16 alias = amd_iommu_alias_table[dev_data->devid];
get_dev_data(dev)->defer_attach = false;
io_domain = iommu_get_domain_for_dev(dev);
domain = to_pdomain(io_domain);
attach_device(dev, domain);
+   /*
+* If the deferred attached domain is not v2, should clear out
+* the old GV flag.
+*/
+   if (!(domain->flags & PD_IOMMUV2_MASK)) {
+   clear_dte_flag_gv(dev_data->devid);
+   if (alias != dev_data->devid)
+   clear_dte_flag_gv(dev_data->devid);
+   }
}
if (!dma_ops_domain(domain))
return ERR_PTR(-EBUSY);
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 13/13] iommu/amd: Disable iommu only if amd_iommu=off is specified

2017-07-21 Thread Baoquan He
From: root 

It's ok to disable iommu in normal kernel. While there's no need
to disable it in kdump kernel after the on-flight dma issue has
heen fixed.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 8b4bac978062..880f693c809b 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2494,7 +2494,8 @@ static int __init early_amd_iommu_init(void)
goto out;
 
/* Disable any previously enabled IOMMUs */
-   disable_iommus();
+   if (amd_iommu_disabled)
+   disable_iommus();
 
if (amd_iommu_irq_remap)
amd_iommu_irq_remap = check_ioapic_information();
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 09/13] iommu/amd: Use is_attach_deferred call-back

2017-07-21 Thread Baoquan He
Implement call-back is_attach_deferred and use it to defer the
domain attach from iommu driver init to device driver init when
iommu is pre-enabled in kdump kernel.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 5508f57a3e4f..4ff413b34b51 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -121,6 +121,7 @@ struct iommu_dev_data {
 PPR completions */
u32 errata;   /* Bitmap for errata to apply */
bool use_vapic;   /* Enable device to use vapic mode */
+   bool defer_attach;
 
struct ratelimit_state rs;/* Ratelimit IOPF messages */
 };
@@ -371,12 +372,17 @@ static u16 get_alias(struct device *dev)
 static struct iommu_dev_data *find_dev_data(u16 devid)
 {
struct iommu_dev_data *dev_data;
+   struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
 
dev_data = search_dev_data(devid);
 
-   if (dev_data == NULL)
+   if (dev_data == NULL) {
dev_data = alloc_dev_data(devid);
 
+   if (translation_pre_enabled(iommu))
+   dev_data->defer_attach = true;
+   }
+
return dev_data;
 }
 
@@ -2477,11 +2483,18 @@ static struct iommu_group 
*amd_iommu_device_group(struct device *dev)
 static struct protection_domain *get_domain(struct device *dev)
 {
struct protection_domain *domain;
+   struct iommu_domain *io_domain;
 
if (!check_device(dev))
return ERR_PTR(-EINVAL);
 
domain = get_dev_data(dev)->domain;
+   if (domain == NULL && get_dev_data(dev)->defer_attach) {
+   get_dev_data(dev)->defer_attach = false;
+   io_domain = iommu_get_domain_for_dev(dev);
+   domain = to_pdomain(io_domain);
+   attach_device(dev, domain);
+   }
if (!dma_ops_domain(domain))
return ERR_PTR(-EBUSY);
 
@@ -3372,6 +3385,13 @@ static void amd_iommu_apply_resv_region(struct device 
*dev,
WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
 }
 
+static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
+struct device *dev)
+{
+   struct iommu_dev_data *dev_data = dev->archdata.iommu;
+   return dev_data->defer_attach;
+}
+
 const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.domain_alloc = amd_iommu_domain_alloc,
@@ -3388,6 +3408,7 @@ const struct iommu_ops amd_iommu_ops = {
.get_resv_regions = amd_iommu_get_resv_regions,
.put_resv_regions = amd_iommu_put_resv_regions,
.apply_resv_region = amd_iommu_apply_resv_region,
+   .is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap  = AMD_IOMMU_PGSIZES,
 };
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 07/13] iommu/amd: Do sanity check for irq remap of old dev table entry

2017-07-21 Thread Baoquan He
Firstly split the dev table entry copy into address translation part and
irq remapping part. Because these two parts could be enabled
independently.

Secondly check if IntCtl and IntTabLen are 10b and 1000b if they are
set.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  5 -
 drivers/iommu/amd_iommu_init.c  | 25 ++---
 drivers/iommu/amd_iommu_types.h |  8 
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 4d00f1bda900..5508f57a3e4f 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3776,11 +3776,6 @@ EXPORT_SYMBOL(amd_iommu_device_info);
 
 static struct irq_chip amd_ir_chip;
 
-#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
-#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
-#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
-#define DTE_IRQ_REMAP_ENABLE1ULL
-
 static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
 {
u64 dte;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index aa9b5918d11f..052fa4a977d8 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -845,12 +845,12 @@ static int get_dev_entry_bit(u16 devid, u8 bit)
 
 static int copy_dev_tables(void)
 {
+   u64 int_ctl, int_tab_len, entry, last_entry = 0;
struct dev_table_entry *old_devtb = NULL;
u32 lo, hi, devid, old_devtb_size;
phys_addr_t old_devtb_phys;
-   u64 entry, last_entry = 0;
struct amd_iommu *iommu;
-   u16 dom_id, dte_v;
+   u16 dom_id, dte_v, irq_v;
static int copied;
 
for_each_iommu(iommu) {
@@ -889,8 +889,27 @@ static int copy_dev_tables(void)
amd_iommu_dev_table[devid] = old_devtb[devid];
dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
-   if (dte_v && dom_id)
+   if (dte_v && dom_id) {
+   amd_iommu_dev_table[devid].data[0]
+   = old_devtb[devid].data[0];
+   amd_iommu_dev_table[devid].data[1]
+   = old_devtb[devid].data[1];
__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   }
+
+   irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
+   int_ctl = old_devtb[devid].data[2] & 
DTE_IRQ_REMAP_INTCTL_MASK;
+   int_tab_len = old_devtb[devid].data[2] & 
DTE_IRQ_TABLE_LEN_MASK;
+   if (irq_v && (int_ctl || int_tab_len)) {
+   if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
+(int_tab_len != DTE_IRQ_TABLE_LEN)) {
+   pr_err("Wrong old irq remapping flag: 
%#x\n", devid);
+   return -1;
+   }
+
+   amd_iommu_dev_table[devid].data[2]
+   = old_devtb[devid].data[2];
+   }
}
memunmap(old_devtb);
copied = 1;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 1c06bcc06f5c..7149fa52063f 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -250,6 +250,14 @@
 
 #define GA_GUEST_NR0x1
 
+/* Bit value definition for dte irq remapping fields*/
+#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
+#define DTE_IRQ_REMAP_INTCTL_MASK  (0x3ULL << 60)
+#define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1)
+#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
+#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
+#define DTE_IRQ_REMAP_ENABLE1ULL
+
 #define PAGE_MODE_NONE0x00
 #define PAGE_MODE_1_LEVEL 0x01
 #define PAGE_MODE_2_LEVEL 0x02
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 08/13] iommu: Add is_attach_deferred call-back to iommu-ops

2017-07-21 Thread Baoquan He
This new call-back will be used to check if the domain attach need be
deferred for now. If yes, the domain attach/detach will return directly.

Signed-off-by: Baoquan He 
---
 drivers/iommu/iommu.c | 8 
 include/linux/iommu.h | 1 +
 2 files changed, 9 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3f6ea160afed..86581b115b92 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1283,6 +1283,10 @@ static int __iommu_attach_device(struct iommu_domain 
*domain,
 struct device *dev)
 {
int ret;
+   if ((domain->ops->is_attach_deferred != NULL) &&
+   domain->ops->is_attach_deferred(domain, dev))
+   return 0;
+
if (unlikely(domain->ops->attach_dev == NULL))
return -ENODEV;
 
@@ -1324,6 +1328,10 @@ EXPORT_SYMBOL_GPL(iommu_attach_device);
 static void __iommu_detach_device(struct iommu_domain *domain,
  struct device *dev)
 {
+   if ((domain->ops->is_attach_deferred != NULL) &&
+   domain->ops->is_attach_deferred(domain, dev))
+   return;
+
if (unlikely(domain->ops->detach_dev == NULL))
return;
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 2cb54adc4a33..63983c9e6c3a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -225,6 +225,7 @@ struct iommu_ops {
u32 (*domain_get_windows)(struct iommu_domain *domain);
 
int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
+   bool (*is_attach_deferred)(struct iommu_domain *domain, struct device 
*dev);
 
unsigned long pgsize_bitmap;
 };
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v8 10/13] iommu/amd: Allocate memory below 4G for dev table if translation pre-enabled

2017-07-21 Thread Baoquan He
AMD pointed out it's unsafe to update the device-table while iommu
is enabled. It turns out that device-table pointer update is split
up into two 32bit writes in the IOMMU hardware. So updating it while
the IOMMU is enabled could have some nasty side effects.

The only way to work around this is to allocate the device-table below
4GB if translation is pre-enabled in kdump kernel. If allocation failed,
still use the old one.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 052fa4a977d8..d7c301d0d672 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2149,11 +2149,23 @@ static void early_enable_iommu(struct amd_iommu *iommu)
  */
 static void early_enable_iommus(void)
 {
+   struct dev_table_entry *dev_tbl;
struct amd_iommu *iommu;
bool is_pre_enabled = false;
 
for_each_iommu(iommu) {
if (translation_pre_enabled(iommu)) {
+   gfp_t gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;;
+
+   dev_tbl = (void *)__get_free_pages(gfp_flag,
+   get_order(dev_table_size));
+   if (dev_tbl != NULL) {
+   memcpy(dev_tbl, amd_iommu_dev_table, 
dev_table_size);
+   free_pages((unsigned long)amd_iommu_dev_table,
+   get_order(dev_table_size));
+   amd_iommu_dev_table = dev_tbl;
+   }
+
is_pre_enabled = true;
break;
}
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 11/13] iommu/amd: Don't copy GCR3 table root pointer

2017-07-23 Thread Baoquan He
On 07/24/17 at 06:33am, kbuild test robot wrote:
> Hi Baoquan,
> 
> [auto build test WARNING on iommu/next]
> [also build test WARNING on v4.13-rc1]
> [cannot apply to next-20170721]
> [if your patch is applied to the wrong git tree, please drop us a note to 
> help improve the system]
> 
> url:
> https://github.com/0day-ci/linux/commits/Baoquan-He/Fix-the-on-flight-DMA-issue-on-system-with-amd-iommu/20170724-060048
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git next
> config: x86_64-randconfig-x005-201730 (attached as .config)
> compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=x86_64 
> 
> Note: it may well be a FALSE warning. FWIW you are at least aware of it now.
> http://gcc.gnu.org/wiki/Better_Uninitialized_Warnings
> 
> All warnings (new ones prefixed by >>):
> 
>drivers//iommu/amd_iommu_v2.c: In function 'ppr_notifier':
> >> drivers//iommu/amd_iommu_v2.c:566:6: warning: 'ret' may be used 
> >> uninitialized in this function [-Wmaybe-uninitialized]
>  int ret;
>  ^~~
> 
> vim +/ret +566 drivers//iommu/amd_iommu_v2.c

Thanks, it should return NOTIFY_DONE anyway when ppr faults is handled
in kdump kernel since the GCR3 table root pointer has been made NULL
intentionally.

I will add this into patch 11/13 when repost need be done.


>From 742d8a51d8832e12884800840c4ebe802767d808 Mon Sep 17 00:00:00 2001
From: Baoquan He 
Date: Mon, 24 Jul 2017 07:48:10 +0800
Subject: [PATCH] iommu/amd: The ppr faults handled in kdump kernel should
 return NOTIFY_DONE

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_v2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 0245d414a7b3..e705fac89cb4 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -578,6 +578,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned 
long e, void *data)
dev_data = get_dev_data(&pdev->dev);
 
/* In kdump kernel pci dev is not initialized yet -> send INVALID */
+   ret = NOTIFY_DONE;
if (translation_pre_enabled(amd_iommu_rlookup_table[devid])
&& dev_data->defer_attach) {
amd_iommu_complete_ppr(pdev, iommu_fault->pasid,
@@ -585,7 +586,6 @@ static int ppr_notifier(struct notifier_block *nb, unsigned 
long e, void *data)
goto out;
}
 
-   ret = NOTIFY_DONE;
dev_state = get_device_state(iommu_fault->device_id);
if (dev_state == NULL)
goto out;
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] intel-iommu: Quiesce devices before disabling IOMMU

2013-09-23 Thread Baoquan He
On 09/18/13 at 03:09pm, Takao Indoh wrote:
> This patch quiesces devices before disabling IOMMU on boot to stop
> ongoing DMA. In intel_iommu_init(), check context entries and if there
> is entry whose present bit is set then reset corresponding device.
> 
> When IOMMU is already enabled on boot, it is disabled and new DMAR table
> is created and then re-enabled in intel_iommu_init(). This causes DMAR
> faults if there are in-flight DMAs.
> 
> This causes problem on kdump. Devices are working in first kernel, and
> after switching to second kernel and initializing IOMMU, many DMAR
> faults occur and it causes problems like driver error or PCI SERR, at
> last kdump fails. This patch fixes this problem.
> 
> Changelog:
> v2:
> - Add CONTEXT_ENTRY_NR
> 
> v1:
> https://lkml.org/lkml/2013/8/21/71
> 
> Signed-off-by: Takao Indoh 
> ---
>  drivers/iommu/intel-iommu.c |   56 
> ++-
>  1 files changed, 55 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index eec0d3e..d0e8aff 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -224,6 +224,7 @@ struct context_entry {
>   u64 lo;
>   u64 hi;
>  };
> +#define CONTEXT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct context_entry))
>  
>  static inline bool context_present(struct context_entry *context)
>  {
> @@ -3663,6 +3664,56 @@ static struct notifier_block device_nb = {
>   .notifier_call = device_notifier,
>  };
>  
> +/* Reset PCI devices if its entry exists in DMAR table */
> +static void __init iommu_reset_devices(struct intel_iommu *iommu, u16 
> segment)
> +{
> + u64 addr;
> + struct root_entry *root;
> + struct context_entry *context;
> + int bus, devfn;
> + struct pci_dev *dev;
> +
> + addr = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
> + if (!addr)
> + return;
> +
> + /*
> +  *  In the case of kdump, ioremap is needed because root-entry table
> +  *  exists in first kernel's memory area which is not mapped in second
> +  *  kernel
> +  */
> + root = (struct root_entry *)ioremap(addr, PAGE_SIZE);
> + if (!root)
> + return;
> +
> + for (bus = 0; bus < ROOT_ENTRY_NR; bus++) {
> + if (!root_present(&root[bus]))
> + continue;
> +
> + context = (struct context_entry *)ioremap(
> + root[bus].val & VTD_PAGE_MASK, PAGE_SIZE);
> + if (!context)
> + continue;
> +
> + for (devfn = 0; devfn < CONTEXT_ENTRY_NR; devfn++) {
> + if (!context_present(&context[devfn]))
> + continue;
> +
> + dev = pci_get_domain_bus_and_slot(segment, bus, devfn);
> + if (!dev)
> + continue;
> +
> + if (!pci_reset_bus(dev->bus)) /* go to next bus */
> + break;
> + else /* Try per-function reset */
> + pci_reset_function(dev);
> +
> + }
> + iounmap(context);
> + }
> + iounmap(root);
> +}
> +
>  int __init intel_iommu_init(void)
>  {
>   int ret = 0;
> @@ -3687,8 +3738,11 @@ int __init intel_iommu_init(void)
>   continue;
>  
>   iommu = drhd->iommu;
> - if (iommu->gcmd & DMA_GCMD_TE)
> + if (iommu->gcmd & DMA_GCMD_TE) {
> + if (reset_devices)
> + iommu_reset_devices(iommu, drhd->segment);
>   iommu_disable_translation(iommu);
> + }
>   }
>  
>   if (dmar_dev_scope_init() < 0) {


Looks good to me, thanks!
Acked-by: Baoquan He 

> -- 
> 1.7.1
> 
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] intel-iommu: Quiesce devices before disabling IOMMU

2013-09-23 Thread Baoquan He
Hi,
Patch is great and works on my HP-z420.
There are several small concerns, please see inline comments.

On 08/21/13 at 04:15pm, Takao Indoh wrote:
> This patch quiesces devices before disabling IOMMU on boot to stop
> ongoing DMA. In intel_iommu_init(), check context entries and if there
> is entry whose present bit is set then reset corresponding device.
> 
> When IOMMU is already enabled on boot, it is disabled and new DMAR table
> is created and then re-enabled in intel_iommu_init(). This causes DMAR
> faults if there are in-flight DMAs.
> 
> This causes problem on kdump. Devices are working in first kernel, and
> after switching to second kernel and initializing IOMMU, many DMAR faults
> occur and it causes problems like driver error or PCI SERR, at last
> kdump fails. This patch fixes this problem.
> 
> Signed-off-by: Takao Indoh 
> 
> 
> NOTE:
> To reset devices this patch uses bus reset interface introduced by
> following commits in PCI "next" branch.
> 
> 64e8674fbe6bc848333a9b7e19f8cc019dde9eab
> 5c32b35b004f5ef70dcf62bbc42b8bed1e50b471
> 2e35afaefe64946caaecfacaf7fb568e46185e88
> 608c388122c72e1bf11ba8113434eb3d0c40c32d
> 77cb985ad4acbe66a92ead1bb826deffa47dd33f
> 090a3c5322e900f468b3205b76d0837003ad57b2
> a6cbaadea0af9b4aa6eee2882f2aa761ab91a4f8
> de0c548c33429cc78fd47a3c190c6d00b0e4e441
> 1b95ce8fc9c12fdb60047f2f9950f29e76e7c66d
> ---
>  drivers/iommu/intel-iommu.c |   55 
> ++-
>  1 files changed, 54 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index eec0d3e..efb98eb 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -3663,6 +3663,56 @@ static struct notifier_block device_nb = {
>   .notifier_call = device_notifier,
>  };
>  
> +/* Reset PCI devices if its entry exists in DMAR table */
> +static void __init iommu_reset_devices(struct intel_iommu *iommu, u16 
> segment)
> +{
> + u64 addr;
> + struct root_entry *root;
> + struct context_entry *context;
> + int bus, devfn;
> + struct pci_dev *dev;
> +
> + addr = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
> + if (!addr)
> + return;
> +
> + /*
> +  *  In the case of kdump, ioremap is needed because root-entry table
> +  *  exists in first kernel's memory area which is not mapped in second
> +  *  kernel
> +  */
> + root = (struct root_entry *)ioremap(addr, PAGE_SIZE);
> + if (!root)
> + return;
> +
> + for (bus = 0; bus < ROOT_ENTRY_NR; bus++) {
> + if (!root_present(&root[bus]))
> + continue;
> +
> + context = (struct context_entry *)ioremap(
> + root[bus].val & VTD_PAGE_MASK, PAGE_SIZE);
> + if (!context)
> + continue;
> +
> + for (devfn = 0; devfn < ROOT_ENTRY_NR; devfn++) {
For context_entry table, the context_entry has the same size as
root_entry currently, so it's also correct to use ROOT_ENTRY_NR. But for
future extention and removing confusion, can we define a new MACRO on
calculating the size of context_entry table, e.g CONTEXT_ENTRY_NR?

> + if (!context_present(&context[devfn]))
> + continue;
> +
> + dev = pci_get_domain_bus_and_slot(segment, bus, devfn);
> + if (!dev)
> + continue;
> +
> + if (!pci_reset_bus(dev->bus)) /* go to next bus */

Here, we have got the specific dev, why don't we just call
pci_reset_function? If call pci_reset_bus here, will it repeat resetting
devices on the same bus many times?

> + break;
> + else /* Try per-function reset */
> + pci_reset_function(dev);
> +
> + }
> + iounmap(context);
> + }
> + iounmap(root);
> +}
> +
>  int __init intel_iommu_init(void)
>  {
>   int ret = 0;
> @@ -3687,8 +3737,11 @@ int __init intel_iommu_init(void)
>   continue;
>  
>   iommu = drhd->iommu;
> - if (iommu->gcmd & DMA_GCMD_TE)
> + if (iommu->gcmd & DMA_GCMD_TE) {
> + if (reset_devices)
> + iommu_reset_devices(iommu, drhd->segment);

I remember the double reset issue vivek concerned in the old patch. Here
the iommu reset is done at the very beginning of pci_iommu_init, it's
after the bus subsystem init, it means here only iommu reset, am I
right?

If yes, I think this patch is clear and logic is simple.

BTW, what's the status of Alex's PCI patchset which this patch depends
on?

Baoquan
Thanks

>   iommu_disable_translation(iommu);
> + }
>   }
>  
>   if (dmar_dev_scope_init() < 0) {
> -- 
> 1.7.1
> 
> 
> 
> ___
> kexec mailing list
> ke...@lists.infradead.org
> http://

Re: [RFC PATCH] Crashdump Accepting Active IOMMU

2013-11-20 Thread Baoquan He
Hi Bill,

I have tested this prototype patch on hp z420, it works very well.
Looking forward to your formal patch set. I can help test and review.

Baoquan
Thanks

On 11/18/13 at 11:30pm, Sumner, William wrote:
> Thank you for testing this RFC patch.  It is great to have confirmation that 
> the code works in a different test environment.
> 
> You asked: "What is the status of this patch?"
> I have made a few changes since the RFC version of this patch:
> 
> 1. Consolidated all of the operational code into the "copy..." functions.  
> The "process..." functions were primarily used for diagnostics and 
> exploration; however, there was a small amount of operational code that used 
> the "process..." functions. This operational code has been moved into the 
> "copy..." functions.
> 
> 2. Removed the "Process ..." functions and the diagnostic code that ran on 
> that function set.  This removed about 1/4 of the code -- which this 
> operational patch no longer needs.  These portions of the RFC patch could be 
> formatted as a separate patch and submitted independently at a later date.
> 
> 3. Re-formatted the code to the Linux Coding Standards.  The checkpatch 
> script still finds some lines to complain about; however these lines are 
> either (1) lines that I did not change, or (2) lines that only changed by 
> adding a level of indent which pushed them over 80-characters, or (3) new 
> lines whose intent is far clearer when longer than 80-characters (allowed by 
> the Linux Coding Standards.)
> 
> 4. Updated the remaining debug print to be significantly more flexible.  This 
> allows control over the amount of debug print to the console -- which can 
> vary widely.
> 
> 5. Fixed a couple of minor bugs found by testing on a machine with a very 
> large IO configuration.
> 
> 
> You asked: " Do you have a plan to post new version?"
> Yes.  I am in the process of dividing the code into a set of 6 or 7 patches, 
> and completing the due-diligence on these patches before submitting them.
> 
> Bill
> 
> -Original Message-
> From: Takao Indoh [mailto:indou.ta...@jp.fujitsu.com]
> Sent: Tuesday, November 12, 2013 12:45 AM
> To: Sumner, William; bhelg...@google.com; alex.william...@redhat.com; 
> ddut...@redhat.com
> Cc: linux-...@vger.kernel.org; ke...@lists.infradead.org; 
> linux-ker...@vger.kernel.org; iommu@lists.linux-foundation.org; 
> ishii.hiron...@jp.fujitsu.com; dw...@infradead.org
> Subject: Re: [RFC PATCH] Crashdump Accepting Active IOMMU
> 
> Hi Bill,
> 
> What is the status of this patch? It works and DMA problems on kdump are
> solved as far as I tested. Do you have a plan to post new version?
> 
> Thanks,
> Takao Indoh
> 
> (2013/09/27 8:25), Sumner, William wrote:
> > This Request For Comment submission is primarily to solicit comments on a 
> > concept for how kdump can handle legacy DMA IO leftover from the panicked 
> > kernel and comments on early prototype code to implement it.  Some level of 
> > interest was noted when I proposed this concept in June; however, for 
> > generating serious discussion there is no substitute for a working 
> > prototype.
> >
> > This concept modifies the behavior of the iommu in the (new) crashdump 
> > kernel:
> > 1. to accept the iommu hardware in an active state,
> > 2. to leave the current translations in-place so that legacy DMA will 
> > continue using its current buffers until the device drivers in the 
> > crashdump kernel initialize and initialize their devices,
> > 3. to use different portions of the iova address ranges for the device 
> > drivers in the crashdump kernel than the iova ranges that were in-use at 
> > the time of the panic.
> >
> > Advantages of this concept:
> > 1. All manipulation of the IO-device is done by the Linux device-driver for 
> > that device.
> > 2. This concept behaves in a very similar manner to operation without an 
> > active iommu.
> > 3. Any activity between the IO-device and its RMRR areas is handled by the 
> > device-driver in the same manner as during a non-kdump boot.
> > 4. If an IO-device has no driver in the kdump kernel, it is simply left 
> > alone.  This supports the practice of creating a special kdump kernel 
> > without drivers for any devices that are not required for taking a 
> > crashdump.
> >
> >
> >
> > About the early-prototype code in the patch below:
> > --
> > 1. It works on one machine that reproduced the original problem -- still 
> > need to test it on a lot of other machines with various IO configurations.
> >
> > 2. Currently implemented for intel-iommu architecture only,
> >
> > 3. It is based near TOT from kernel.org.  The TOT version of 'crash' reads 
> > the dump that is produced.
> >
> > 4. It is definitely prototype-only and not yet ready to propose as a patch 
> > for inclusion into Linux proper.
> >
> > 5. Although this patch is not yet intended for incorporation into 
> > mainstream Linux, it should install and operate for an

Re: [PATCHv2 0/6] Crashdump Accepting Active IOMMU

2013-12-19 Thread Baoquan He

I have reviewed and tested this patchset. Without it the DMAR error
always occured as below. With this patchset, no error is reported and
kdump can work successfully.

This patchset is awesome, it get to the root of the problem when enable
intel-iommu in kdump and fix it. And from code no harm would come to 1st
kernel, surely people's review can guarantee this better and is very
important.

Hi Bill,

Thanks a lot for your effort. 

I have several comments. I think for a formal patch the debug print
need be erased. Or you can split it from this patchset and post it
separately. Then people who review and test your patchset can use the
debug utility.

A bug is found when I test it. please see the inline comment.

Baoquan
Thanks


On 12/19/13 at 07:49pm, Bill Sumner wrote:
> 
> Changes from previous submission:
> 1. Moved up to Linux kernel top-of-tree of this week.
> 2. Expanded the comments section of each patch so that the documentation
>will appear in the commit logs. (Requested by Alex Williamson and one
>internal reviewer)
> 3. Corrected one line in the table which drives the diagnostic printing
>of the IOMMU registers (Requested by one internal reviewer)
> 
> Notes:
> 1. This patch set is ready for extensive testing by the Linux community,
>for discussion, and hopefully for acceptance into Linux mainstream.
> 2. In order to support this testing, I have left a modest amount of
>debug print enabled.  For testing, the amount of print can be increased
>or decreased easily; and for production, can be completely turned-off.
>Please see the bit-flags in struct 'pr_dbg'
> 
> Testing:
> 1. This patch set was re-tested on the machine that reproduced the problem
>a. Without this patch set, crashdump console sees a large number of:
>   "dmar: DMAR:[DMA Write] Request device [04:00.0] fault addr fff69000
>   "DMAR:[fault reason 01] Present bit in root entry is clear"
>b. With this patch set, none of the above messages are seen.
> 2. This patch set has not yet been tested with hardware pass-through enabled.
> 
> 
> Patch 0 file from previous submission:
> The following series implements a fix for:
> A kdump problem about DMA that has been discussed for a long time. That is,
> when a kernel panics and boots into the kdump kernel, DMA started by the 
> panicked kernel is not stopped before the kdump kernel is booted and the 
> kdump kernel disables the IOMMU while this DMA continues.  This causes the
> IOMMU to stop translating the DMA addresses as IOVAs and begin to treat them 
> as physical memory addresses -- which causes the DMA to either:
> (1) generate DMAR errors or (2) generate PCI SERR errors or (3) transfer  
> data to or from incorrect areas of memory. Often this causes the dump to fail.
> 
> This patch set modifies the behavior of the iommu in the (new) crashdump 
> kernel: 
> 1. to accept the iommu hardware in an active state, 
> 2. to leave the current translations in-place so that legacy DMA will continue
>using its current buffers until the device drivers in the crashdump kernel
>initialize and initialize their devices,
> 3. to use different portions of the iova address ranges for the device drivers
>in the crashdump kernel than the iova ranges that were in-use at the time
>of the panic.  
> 
> Advantages of this approach:
> 1. All manipulation of the IO-device is done by the Linux device-driver
>for that device.
> 2. This approach behaves in a manner very similar to operation without an
>active iommu.
> 3. Any activity between the IO-device and its RMRR areas is handled by the
>device-driver in the same manner as during a non-kdump boot.
> 4. If an IO-device has no driver in the kdump kernel, it is simply left alone.
>This supports the practice of creating a special kdump kernel without
>drivers for any devices that are not required for taking a crashdump. 
> 
> Changes since the RFC version of this patch:
> 1. Consolidated all of the operational code into the "copy..." functions.
>The "process..." functions were primarily used for diagnostics and
>exploration; however, there was a small amount of operational code that
>used the "process..." functions.
>This operational code has been moved into the "copy..." functions.
> 
> 2. Removed the "Process ..." functions and the diagnostic code that ran
>on that function set.  This removed about 1/4 of the code -- which this
>operational patch set no longer needs.  These portions of the RFC patch
>could be formatted as a separate patch and submitted independently
>at a later date. 
> 
> 3. Re-formatted the code to the Linux Coding Standards.
>The checkpatch script still finds some lines to complain about;
>however most of these lines are either (1) lines that I did not change,
>or (2) lines that only changed by adding a level of indent which pushed
>them over 80-characters, or (3) new lines whose intent is far clearer when
>longer

Re: [PATCHv2 4/6] Crashdump-Accepting-Active-IOMMU-Copy-Translations

2013-12-19 Thread Baoquan He
On 12/19/13 at 07:49pm, Bill Sumner wrote:

> +static int copy_page_addr(u64 page_addr, u32 shift, u32 bus, u32 devfn,
> + u64 pte, struct dmar_domain *domain,
> + void *parms)
> +{
> + struct copy_page_addr_parms *ppap = parms;
> +
> + u64 page_size = ((u64)1 << shift);  /* page_size */
> + u64 pfn_lo; /* For reserving IOVA range */
> + u64 pfn_hi; /* For reserving IOVA range */
> + struct iova *iova_p;/* For reserving IOVA range */
> +
> + if (!ppap) {
> + pr_err("ERROR: ppap is NULL: 0x%3.3x(%3.3d) DevFn: 
> 0x%3.3x(%3.3d) Page: 0x%16.16llx Size: 0x%16.16llx(%lld)\n",
> + bus, bus, devfn, devfn,  page_addr,
> + page_size, page_size);
> + return 0;
> + }
> +

> + /* Prepare for a new page */
> + ppap->first = 0;/* Not first-time anymore */
> + ppap->bus   = bus;
> + ppap->devfn = devfn;
> + ppap->shift = shift;
> + ppap->pte   = pte;
> + ppap->next_addr = page_addr + page_size; /* Next-expected page_addr */
> +
> + ppap->page_addr = page_addr;/* Addr(new page) */
> + ppap->page_size = page_size;/* Size(new page) */
> +
> + ppap->domain= domain;   /* adr(domain for the new range) */

Here I am confused, ppap is used to collect the copied ranges and
necessary information. To my understanding, this domain is the
dmar_domain which the 1st device is on. When ppat->last is set to 1,
among the whole collected range, there may be many domains. I just feel
not good for this. Is it OK to define a specific lock for ppap
structure, possibly? Please correct me if I am wrong.

> +
> + return 0;
> +}
> +

> +
> +
> +
> +static int copy_context_entry(struct intel_iommu *iommu, u32 bus, u32 devfn,
> +   void *ppap, struct context_entry *ce)
> +{
> + int ret = 0;/* Integer Return Code */
> + u32 shift = 0;  /* bits to shift page_addr  */
> + u64 page_addr = 0;  /* Address of translated page */
> + struct dma_pte *pgt_old_phys;   /* Adr(page_table in the old kernel) */
> + struct dma_pte *pgt_new_phys;   /* Adr(page_table in the new kernel) */
> + unsigned long asr;  /* New asr value for new context */
> + u8  t;  /* Translation-type from context */
> + u8  aw; /* Address-width from context */
> + u32 aw_shift[8] = {
> + 12+9+9, /* [000b] 30-bit AGAW (2-level page table) */
> + 12+9+9+9,   /* [001b] 39-bit AGAW (3-level page table) */
> + 12+9+9+9+9, /* [010b] 48-bit AGAW (4-level page table) */
> + 12+9+9+9+9+9,   /* [011b] 57-bit AGAW (5-level page table) */
> + 12+9+9+9+9+9+9, /* [100b] 64-bit AGAW (6-level page table) */
> + 0,  /* [111b] Reserved */
> + 0,  /* [110b] Reserved */
> + 0,  /* [111b] Reserved */
> + };
> +
> + struct dmar_domain *domain = NULL;  /* To hold domain & device */
> + /*values from old kernel */
> + struct device_domain_info *info = NULL; /* adr(new for this device) */
> + struct device_domain_info *i = NULL;/* iterator for foreach */
> +
> +
 +  /* info->segment = segment;  May need this later */
> + info->bus = bus;
> + info->devfn = devfn;
> + info->iommu = iommu;
> +
> + list_for_each_entry(i, &device_domain_values_list[iommu->seq_id],
> + global) {
> + if (i->domain->id == (int) context_get_did(ce)) {
> + domain = i->domain;
> + pr_debug("CTXT B:D:F:%2.2x:%2.2x:%1.1x Found 
> did=0x%4.4x\n",
> + bus, devfn >> 3, devfn & 0x7, i->domain->id);
> + break;
> + }
> + }
> +
> + if (!domain) {
> + domain = alloc_domain();
> + if (!domain) {
> + ret = -ENOMEM;
> + goto exit;
> + }
> + INIT_LIST_HEAD(&domain->devices);
> + domain->id = (int) context_get_did(ce);
> + domain->agaw = (int) context_get_aw(ce);
> + domain->pgd = NULL;

Here the domain is created and initialized, but its member iovad is not
initialized. This will cause domain->iovad.iova_rbtree_lock deadlock
because its initial value is random. And the rbtree operation will
crash. This happen in reserve_iova invoked in copy_page_addr in high
frequency.

I add 1 line of code as below and it works well.

init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
> +
> + pr_debug("CTXT Allocated new list entry, did:%d\n",
>

Re: [PATCHv2 4/6] Crashdump-Accepting-Active-IOMMU-Copy-Translations

2013-12-19 Thread Baoquan He
On 12/20/13 at 02:04pm, Baoquan He wrote:
> On 12/19/13 at 07:49pm, Bill Sumner wrote:
> 
> > +static int copy_page_addr(u64 page_addr, u32 shift, u32 bus, u32 devfn,
> > +   u64 pte, struct dmar_domain *domain,
> > +   void *parms)
> > +{
> > +   struct copy_page_addr_parms *ppap = parms;
> > +
> > +   u64 page_size = ((u64)1 << shift);  /* page_size */
> > +   u64 pfn_lo; /* For reserving IOVA range */
> > +   u64 pfn_hi; /* For reserving IOVA range */
> > +   struct iova *iova_p;/* For reserving IOVA range */
> > +
> > +   if (!ppap) {
> > +   pr_err("ERROR: ppap is NULL: 0x%3.3x(%3.3d) DevFn: 
> > 0x%3.3x(%3.3d) Page: 0x%16.16llx Size: 0x%16.16llx(%lld)\n",
> > +   bus, bus, devfn, devfn,  page_addr,
> > +   page_size, page_size);
> > +   return 0;
> > +   }
> > +
> 
> > +   /* Prepare for a new page */
> > +   ppap->first = 0;/* Not first-time anymore */
> > +   ppap->bus   = bus;
> > +   ppap->devfn = devfn;
> > +   ppap->shift = shift;
> > +   ppap->pte   = pte;
> > +   ppap->next_addr = page_addr + page_size; /* Next-expected page_addr */
> > +
> > +   ppap->page_addr = page_addr;/* Addr(new page) */
> > +   ppap->page_size = page_size;/* Size(new page) */
> > +
> > +   ppap->domain= domain;   /* adr(domain for the new range) */
> 
> Here I am confused, ppap is used to collect the copied ranges and
> necessary information. To my understanding, this domain is the
> dmar_domain which the 1st device is on. When ppat->last is set to 1,
> among the whole collected range, there may be many domains. I just feel
> not good for this. Is it OK to define a specific lock for ppap
> structure, possibly? Please correct me if I am wrong.

Well, check it again. It's not about the lock. Just all address is
recorded in one dmar_domain.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCHv3 0/6] Crashdump Accepting Active IOMMU

2014-01-24 Thread Baoquan He
Tested this patchset on my local HP Z420 workstation, and it works very
well.

Hi Bill,

Thanks for your effort.

There are several concerns from me.

Firstly, I think the patch log need be rearanged. Patchset cover letter
can contain information to express why, how briefly. If you think this
is very useful, it can be split and put into patch log.

Then for each patch, patch log should be accurate and summary to
describe why and how this patch really does. If you feel several patches
have the corelation, they may need to be merged.

Secondly, each patch could get a seperate subject which tells what this
patch really does. Even they are merged to kernel git tree, each of them
is a independent commit. People can take to use or depend only one of
them. Actually, I don't like current patch subject.

Thirdly, this patchset will be part of intel-iommu, though they only
works for kdump kernel. As a subsystem, the style need be consistent. I
like the debug method which introduces a struct pr_debug, however
maintainers may not like it. Because a debug utility may bloat code and
affect people's review. Personally I like refined code, the less the
easier to review. Or put it as a independent patch at the end of the
patchset, let maintainer decide whether it's OK to pull in.

Sorry to say so much, I think this solution is truly the right way. As
you know, it's a big problem for kdump when intel-iommu is active in 1st
kernel. Because of this bug, many machines with intel-iommu have to be
set intel-iommu=off, the performance is affected very much.

Baoquan
Thanks

On 01/10/14 at 03:07pm, Bill Sumner wrote:
> v2->v3:
> 1. Commented-out "#define DEBUG 1" to eliminate debug messages
> 2. Updated the comments about changes in each version in all patches in the 
> set.
> 3. Fixed: one-line added to Copy-Translations" patch to initialize the iovad
>   struct as recommended by Baoquan He [b...@redhat.com]
>   init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
> 
> v1->v2:
> The following series implements a fix for:
> A kdump problem about DMA that has been discussed for a long time. That is,
> when a kernel panics and boots into the kdump kernel, DMA started by the 
> panicked kernel is not stopped before the kdump kernel is booted and the 
> kdump kernel disables the IOMMU while this DMA continues.  This causes the
> IOMMU to stop translating the DMA addresses as IOVAs and begin to treat them 
> as physical memory addresses -- which causes the DMA to either:
> (1) generate DMAR errors or (2) generate PCI SERR errors or (3) transfer  
> data to or from incorrect areas of memory. Often this causes the dump to fail.
> 
 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: hpsa driver bug crack kernel down!

2014-04-10 Thread Baoquan He
On 04/10/14 at 04:34pm, Jiang Liu wrote:
> Hi Baoquan,
>   Could you please help to give output of "lspci -"?
> Is device "hpsa :03:00.0" a legacy PCI device(non-PCIe)?
> It may have relationship with IOMMU driver.
> Thanks!
> Gerry

Hi,

I just saw your mail now. Do you still need the output of "lspci -"
on my test machine? 

In fact, I didn't see the DMAR error related to intel vt-d issues.

If the output is helpful, I can make a latest build to do this.

Thanks
Baoquan

> 
> On 2014/4/10 12:03, Bjorn Helgaas wrote:
> > [+cc Joerg, iommu list]
> > 
> > On Wed, Apr 9, 2014 at 6:19 PM, Davidlohr Bueso  wrote:
> >> On Wed, 2014-04-09 at 16:50 -0700, James Bottomley wrote:
> >>> On Wed, 2014-04-09 at 16:40 -0700, Davidlohr Bueso wrote:
> >>>> On Wed, 2014-04-09 at 16:10 -0700, James Bottomley wrote:
> >>>>> On Wed, 2014-04-09 at 16:08 -0700, James Bottomley wrote:
> >>>>>> [+linux-scsi]
> >>>>>> On Wed, 2014-04-09 at 15:49 -0700, Davidlohr Bueso wrote:
> >>>>>>> On Wed, 2014-04-09 at 10:39 +0800, Baoquan He wrote:
> >>>>>>>> Hi,
> >>>>>>>>
> >>>>>>>> The kernel is 3.14.0+ which is pulled just now.
> >>>>>>>
> >>>>>>> Cc'ing more people.
> >>>>>>>
> >>>>>>> While the hpsa driver appears to be involved in some way, I'm sure if
> >>>>>>> this is a related issue, but as of today's pull I'm getting another
> >>>>>>> problem that causes my DL980 not to come up.
> >>>>>>>
> >>>>>>> *Massive* amounts of:
> >>>>>>>
> >>>>>>> DMAR:[fault reason 02] Present bit in context entry is clear
> >>>>>>> dmar: DRHD: handling fault status reg 602
> >>>>>>> dmar: DMAR:[DMA Read] Request device [02:00.0] fault addr 7f61e000
> >>>>>>>
> >>>>>>> Then:
> >>>>>>>
> >>>>>>> hpsa :03:00.0: Controller lockup detected: 0x
> >>>>>>> ...
> >>>>>>> Workqueue: events hpsa_monitor_ctlr_worker [hpsa]
> >>>>>>> ...
> >>>>>>>
> >>>>>>> Screenshot of the actual LOCKUP:
> >>>>>>> http://stgolabs.net/hpsa-hard-lockup-3.14+.png
> >>>>>>>
> >>>>>>> While I haven't bisected, things worked fine until at least until 
> >>>>>>> commit
> >>>>>>> 39de65aa2c3e (April 2nd).
> >>>>>>>
> >>>>>>> Any ideas?
> >>>>>>
> >>>>>> Well, it's either a DMA remapping issue or a hpsa one.  Your assertion
> >>>>>> that everything worked fine until 39de65aa2c3e would tend to vindicate
> >>>>>> hpsa,
> >>>>
> >>>> Hmm here you mean DMA, right?
> >>>
> >>> No, it vindicates the hpsa changes ... they don't seem to be causing
> >>> problems until something goes wrong with dma remapping.
> >>>
> >>>>> because all the hpsa changes went in before that under
> >>>>> Missing crucial info:
> >>>>>
> >>>>> commit 1a0b6abaea78f73d9bc0a2f6df2d9e4c917cade1
> >>>>>
> >>>>>> Merge: 3e75c6d b2bff6c
> >>>>>> Author: Linus Torvalds 
> >>>>>> Date:   Tue Apr 1 18:49:04 2014 -0700
> >>>>>>
> >>>>>> Merge tag 'scsi-misc' of
> >>>>>> git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
> >>>>>>
> >>>>>> can you revalidate that this commit works OK just to make sure?
> >>>>
> >>>> Ok so I don't see those DMA messages and system starts just fine. I'm
> >>>> thinking perhaps something broke after the IO mmu stuff in commit
> >>>> 3f583bc21977a608908b83d03ee2250426a5695c... could this be indirectly
> >>>> causing the CPU stalls and just blame hpsa in the path as a side effect?
> >>>>
> >>>> /me goes out to try the commit.
> >>>
> >>> That's my guess.  The DMAR messages are DMA remapping issues caused in
> >>> the IOMMU.  If I had to guess, I'd say the DMAR fault message is
> >>> indicating the IOMMU is calling for a mapping address before it can
> >>> satisfy the driver read request, which is causing the hang apparently in
> >>> the hpsa driver.
> >>>
> >>> I've added linux-pci to the cc; I think they deal with iommu issues on
> >>> x86.
> >>
> >> So that merge commit appears to be the culprit, I see both the DMA
> >> messages and the lockup blaming hpsa...
> > 
> > My understanding so far (please correct me if I'm wrong):
> > 
> > 39de65aa2c3e OK ("Merge branch 'i2c/for-next'")
> > 1a0b6abaea78 OK ("Merge tag 'scsi-misc'")
> > 3f583bc21977 BAD ("Merge tag 'iommu-updates-v3.15'")
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majord...@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> > 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] hpsa: fix uninitialized trans_support in hpsa_put_ctlr_into_performant_mode()

2014-04-10 Thread Baoquan He
This patch works for me.

Tested-by: Baoquan He 

Thanks
Baoquan

On 04/10/14 at 05:17pm, scame...@beardog.cce.hp.com wrote:
> 
> Without this, you'll see a null pointer dereference in
> hpsa_enter_performant_mode().
> 
> Signed-off-by: Stephen M. Cameron 
> ---
>  drivers/scsi/hpsa.c |4 
>  1 files changed, 4 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
> index 8cf4a0c..ef4dfdd 100644
> --- a/drivers/scsi/hpsa.c
> +++ b/drivers/scsi/hpsa.c
> @@ -7463,6 +7463,10 @@ static void hpsa_put_ctlr_into_performant_mode(struct 
> ctlr_info *h)
>   if (hpsa_simple_mode)
>   return;
>  
> + trans_support = readl(&(h->cfgtable->TransportSupport));
> + if (!(trans_support & PERFORMANT_MODE))
> + return;
> +
>   /* Check for I/O accelerator mode support */
>   if (trans_support & CFGTBL_Trans_io_accel1) {
>   transMethod |= CFGTBL_Trans_io_accel1 |
> -- 
> 1.7.1
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: hpsa driver bug crack kernel down!

2014-04-10 Thread Baoquan He
On 04/10/14 at 04:34pm, Jiang Liu wrote:
> Hi Baoquan,
>   Could you please help to give output of "lspci -"?
> Is device "hpsa :03:00.0" a legacy PCI device(non-PCIe)?
> It may have relationship with IOMMU driver.
> Thanks!
> Gerry

Well, the machine bug was reported on is a AMD machine, and it doesn't
have the IOMMU problem. David saw there are some DMAR errors, it should
be a intel machine which use the VT-d.

> 
> On 2014/4/10 12:03, Bjorn Helgaas wrote:
> > [+cc Joerg, iommu list]
> > 
> > On Wed, Apr 9, 2014 at 6:19 PM, Davidlohr Bueso  wrote:
> >> On Wed, 2014-04-09 at 16:50 -0700, James Bottomley wrote:
> >>> On Wed, 2014-04-09 at 16:40 -0700, Davidlohr Bueso wrote:
> >>>> On Wed, 2014-04-09 at 16:10 -0700, James Bottomley wrote:
> >>>>> On Wed, 2014-04-09 at 16:08 -0700, James Bottomley wrote:
> >>>>>> [+linux-scsi]
> >>>>>> On Wed, 2014-04-09 at 15:49 -0700, Davidlohr Bueso wrote:
> >>>>>>> On Wed, 2014-04-09 at 10:39 +0800, Baoquan He wrote:
> >>>>>>>> Hi,
> >>>>>>>>
> >>>>>>>> The kernel is 3.14.0+ which is pulled just now.
> >>>>>>>
> >>>>>>> Cc'ing more people.
> >>>>>>>
> >>>>>>> While the hpsa driver appears to be involved in some way, I'm sure if
> >>>>>>> this is a related issue, but as of today's pull I'm getting another
> >>>>>>> problem that causes my DL980 not to come up.
> >>>>>>>
> >>>>>>> *Massive* amounts of:
> >>>>>>>
> >>>>>>> DMAR:[fault reason 02] Present bit in context entry is clear
> >>>>>>> dmar: DRHD: handling fault status reg 602
> >>>>>>> dmar: DMAR:[DMA Read] Request device [02:00.0] fault addr 7f61e000
> >>>>>>>
> >>>>>>> Then:
> >>>>>>>
> >>>>>>> hpsa :03:00.0: Controller lockup detected: 0x
> >>>>>>> ...
> >>>>>>> Workqueue: events hpsa_monitor_ctlr_worker [hpsa]
> >>>>>>> ...
> >>>>>>>
> >>>>>>> Screenshot of the actual LOCKUP:
> >>>>>>> http://stgolabs.net/hpsa-hard-lockup-3.14+.png
> >>>>>>>
> >>>>>>> While I haven't bisected, things worked fine until at least until 
> >>>>>>> commit
> >>>>>>> 39de65aa2c3e (April 2nd).
> >>>>>>>
> >>>>>>> Any ideas?
> >>>>>>
> >>>>>> Well, it's either a DMA remapping issue or a hpsa one.  Your assertion
> >>>>>> that everything worked fine until 39de65aa2c3e would tend to vindicate
> >>>>>> hpsa,
> >>>>
> >>>> Hmm here you mean DMA, right?
> >>>
> >>> No, it vindicates the hpsa changes ... they don't seem to be causing
> >>> problems until something goes wrong with dma remapping.
> >>>
> >>>>> because all the hpsa changes went in before that under
> >>>>> Missing crucial info:
> >>>>>
> >>>>> commit 1a0b6abaea78f73d9bc0a2f6df2d9e4c917cade1
> >>>>>
> >>>>>> Merge: 3e75c6d b2bff6c
> >>>>>> Author: Linus Torvalds 
> >>>>>> Date:   Tue Apr 1 18:49:04 2014 -0700
> >>>>>>
> >>>>>> Merge tag 'scsi-misc' of
> >>>>>> git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
> >>>>>>
> >>>>>> can you revalidate that this commit works OK just to make sure?
> >>>>
> >>>> Ok so I don't see those DMA messages and system starts just fine. I'm
> >>>> thinking perhaps something broke after the IO mmu stuff in commit
> >>>> 3f583bc21977a608908b83d03ee2250426a5695c... could this be indirectly
> >>>> causing the CPU stalls and just blame hpsa in the path as a side effect?
> >>>>
> >>>> /me goes out to try the commit.
> >>>
> >>> That's my guess.  The DMAR messages are DMA remapping issues caused in
> >>> the IOMMU.  If I had to guess, I'd say the DMAR fault message is
> >>> indicating the IOMMU is calling for a mapping address before it can
> >>> satisfy the driver read request, which is causing the hang apparently in
> >>> the hpsa driver.
> >>>
> >>> I've added linux-pci to the cc; I think they deal with iommu issues on
> >>> x86.
> >>
> >> So that merge commit appears to be the culprit, I see both the DMA
> >> messages and the lockup blaming hpsa...
> > 
> > My understanding so far (please correct me if I'm wrong):
> > 
> > 39de65aa2c3e OK ("Merge branch 'i2c/for-next'")
> > 1a0b6abaea78 OK ("Merge tag 'scsi-misc'")
> > 3f583bc21977 BAD ("Merge tag 'iommu-updates-v3.15'")
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majord...@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> > 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/8] Consolidate all .h lines at front of intel-iommu.c file

2014-04-19 Thread Baoquan He
Hi Bill,

Could you rebase on latest linus's kernel tree, since there are several
changes in intel-iommu.c. This patch can't be applied because of below
commits. I think patch need be reabsed on latest linus's tree before
post, people will apply your patch without conflict.

commit b94e4117f8c4ffb591b1e462364d725e3a1c63c4
Author: Jiang Liu 
Date:   Wed Feb 19 14:07:25 2014 +0800

iommu/vt-d: Move private structures and variables into intel-iommu.c

Move private structures and variables into intel-iommu.c, which will
help to simplify locking policy for hotplug. Also delete redundant
declarations.

Signed-off-by: Jiang Liu 
Signed-off-by: Joerg Roedel 


Thanks
Baoquan

On 04/15/14 at 05:09pm, Bill Sumner wrote:
>  In intel-iommu.c, move downward the few lines near the
>  front that should not move to an intel-iommu-private.h
>  file (mostly data-item definitions) This leaves a
>  consolidated block of the lines that would move to an
>  intel-iommu-private.h file at the front of the file.
> 
> 
> Signed-off-by: Bill Sumner 
> ---
>  drivers/iommu/intel-iommu.c | 74 
> +++--
>  1 file changed, 38 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 8abcb6c..22298e9 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -165,17 +165,6 @@ static inline unsigned long virt_to_dma_pfn(void *p)
>   return page_to_dma_pfn(virt_to_page(p));
>  }
>  
> -/* global iommu list, set NULL for ignored DMAR units */
> -static struct intel_iommu **g_iommus;
> -
> -static void __init check_tylersburg_isoch(void);
> -static int rwbf_quirk;
> -
> -/*
> - * set to 1 to panic kernel if can't successfully enable VT-d
> - * (used when kernel is launched w/ TXT)
> - */
> -static int force_on;
>  
>  /*
>   * 0: Present
> @@ -314,15 +303,6 @@ static inline int first_pte_in_page(struct dma_pte *pte)
>   return !((unsigned long)pte & ~VTD_PAGE_MASK);
>  }
>  
> -/*
> - * This domain is a statically identity mapping domain.
> - *   1. This domain creats a static 1:1 mapping to all usable memory.
> - *   2. It maps to each iommu if successful.
> - *   3. Each iommu mapps to this domain if successful.
> - */
> -static struct dmar_domain *si_domain;
> -static int hw_pass_through = 1;
> -
>  /* devices under the same p2p bridge are owned in one domain */
>  #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
>  
> @@ -380,6 +360,44 @@ struct device_domain_info {
>   struct dmar_domain *domain; /* pointer to domain */
>  };
>  
> +static inline void *alloc_pgtable_page(int node)
> +{
> + struct page *page;
> + void *vaddr = NULL;
> +
> + page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
> + if (page)
> + vaddr = page_address(page);
> + return vaddr;
> +}
> +
> +static inline void free_pgtable_page(void *vaddr)
> +{
> + free_page((unsigned long)vaddr);
> +}
> +
> +
> +static void __init check_tylersburg_isoch(void);
> +
> +/* global iommu list, set NULL for ignored DMAR units */
> +static struct intel_iommu **g_iommus;
> +static int rwbf_quirk;
> +
> +/*
> + * set to 1 to panic kernel if can't successfully enable VT-d
> + * (used when kernel is launched w/ TXT)
> + */
> +static int force_on;
> +
> +/*
> + * This domain is a statically identity mapping domain.
> + *   1. This domain creats a static 1:1 mapping to all usable memory.
> + *   2. It maps to each iommu if successful.
> + *   3. Each iommu mapps to this domain if successful.
> + */
> +static struct dmar_domain *si_domain;
> +static int hw_pass_through = 1;
> +
>  static void flush_unmaps_timeout(unsigned long data);
>  
>  static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
> @@ -468,22 +486,6 @@ static struct kmem_cache *iommu_domain_cache;
>  static struct kmem_cache *iommu_devinfo_cache;
>  static struct kmem_cache *iommu_iova_cache;
>  
> -static inline void *alloc_pgtable_page(int node)
> -{
> - struct page *page;
> - void *vaddr = NULL;
> -
> - page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
> - if (page)
> - vaddr = page_address(page);
> - return vaddr;
> -}
> -
> -static inline void free_pgtable_page(void *vaddr)
> -{
> - free_page((unsigned long)vaddr);
> -}
> -
>  static inline void *alloc_domain_mem(void)
>  {
>   return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
> -- 
> Bill Sumner 
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] swiotlb: Allow swiotlb to live at pre-defined address

2020-03-30 Thread Baoquan He
On 03/30/20 at 10:42pm, Alexander Graf wrote:
> 
> 
> On 30.03.20 15:40, Konrad Rzeszutek Wilk wrote:
> > 
> > 
> > 
> > On Mon, Mar 30, 2020 at 02:06:01PM +0800, Kairui Song wrote:
> > > On Sat, Mar 28, 2020 at 7:57 PM Dave Young  wrote:
> > > > 
> > > > On 03/26/20 at 05:29pm, Alexander Graf wrote:
> > > > > The swiotlb is a very convenient fallback mechanism for bounce 
> > > > > buffering of
> > > > > DMAable data. It is usually used for the compatibility case where 
> > > > > devices
> > > > > can only DMA to a "low region".
> > > > > 
> > > > > However, in some scenarios this "low region" may be bound even more
> > > > > heavily. For example, there are embedded system where only an SRAM 
> > > > > region
> > > > > is shared between device and CPU. There are also heterogeneous 
> > > > > computing
> > > > > scenarios where only a subset of RAM is cache coherent between the
> > > > > components of the system. There are partitioning hypervisors, where
> > > > > a "control VM" that implements device emulation has limited view into 
> > > > > a
> > > > > partition's memory for DMA capabilities due to safety concerns.
> > > > > 
> > > > > This patch adds a command line driven mechanism to move all DMA 
> > > > > memory into
> > > > > a predefined shared memory region which may or may not be part of the
> > > > > physical address layout of the Operating System.
> > > > > 
> > > > > Ideally, the typical path to set this configuration would be through 
> > > > > Device
> > > > > Tree or ACPI, but neither of the two mechanisms is standardized yet. 
> > > > > Also,
> > > > > in the x86 MicroVM use case, we have neither ACPI nor Device Tree, but
> > > > > instead configure the system purely through kernel command line 
> > > > > options.
> > > > > 
> > > > > I'm sure other people will find the functionality useful going forward
> > > > > though and extend it to be triggered by DT/ACPI in the future.
> > > > 
> > > > Hmm, we have a use case for kdump, this maybe useful.  For example
> > > > swiotlb is enabled by default if AMD SME/SEV is active, and in kdump
> > > > kernel we have to increase the crashkernel reserved size for the extra
> > > > swiotlb requirement.  I wonder if we can just reuse the old kernel's
> > > > swiotlb region and pass the addr to kdump kernel.
> > > > 
> > > 
> > > Yes, definitely helpful for kdump kernel. This can help reduce the
> > > crashkernel value.
> > > 
> > > Previously I was thinking about something similar, play around the
> > > e820 entry passed to kdump and let it place swiotlb in wanted region.
> > > Simply remap it like in this patch looks much cleaner.
> > > 
> > > If this patch is acceptable, one more patch is needed to expose the
> > > swiotlb in iomem, so kexec-tools can pass the right kernel cmdline to
> > > second kernel.
> > 
> > We seem to be passsing a lot of data to kexec.. Perhaps something
> > of a unified way since we seem to have a lot of things to pass - disabling
> > IOMMU, ACPI RSDT address, and then this.
> > 
> > CC-ing Anthony who is working on something - would you by any chance
> > have a doc on this?
> 
> 
> I see in general 2 use cases here:
> 
> 
> 1) Allow for a generic mechanism to have the fully system, individual buses,
> devices or functions of a device go through a particular, self-contained
> bounce buffer.
> 
> This sounds like the holy grail to a lot of problems. It would solve typical
> embedded scenarios where you only have a shared SRAM. It solves the safety
> case (to some extent) where you need to ensure that one device interaction
> doesn't conflict with another device interaction. It also solves the problem
> I've tried to solve with the patch here.
> 
> It's unfortunately a lot harder than the patch I sent, so it will take me
> some time to come up with a working patch set.. I suppose starting with a DT
> binding only is sensible. Worst case, x86 does also support DT ...
> 
> (And yes, I'm always happy to review patches if someone else beats me to it)
> 
> 
> 2) Reuse the SWIOTLB from the previous boot on kexec/kdump
> 
> I see little direct relation to SEV here. The only reason SEV makes it more
> relevant, is that you need to have an SWIOTLB region available with SEV
> while without you could live with a disabled IOMMU.
> 
> However, I can definitely understand how you would want to have a way to
> tell the new kexec'ed kernel where the old SWIOTLB was, so it can reuse its
> memory for its own SWIOTLB. That way, you don't have to reserve another 64MB
> of RAM for kdump.
> 
> What I'm curious on is whether we need to be as elaborate. Can't we just
> pass the old SWIOTLB as free memory to the new kexec'ed kernel and
> everything else will fall into place? All that would take is a bit of
> shuffling on the e820 table pass-through to the kexec'ed kernel, no?

Swiotlb memory have to be continuous. We can't guarantee that region
won't be touched by kernel allocation before swiotlb init. Then we may
not have chance to get a continuous re

Re: [PATCH 0/5] iommu/vt-d: Fix crash dump failure caused by legacy DMA/IO

2014-10-22 Thread Baoquan He
Hi Zhenhua,

I tested your latest patch on 3.18.0-rc1+, there are still some dmar
errors. I remember it worked well with Bill's original patchset.


0console [earlya[0.00] allocate tes of page_cg  'a ong[
0.00] tsc: Fast TSC calibration using PIT
0031] Calibrating delay loop (skipped), value calculated using timer
frequency.. 5586.77 BogoMIPS (lpj=2793386)
[0.010682] pid_max: default: 32768 minimum: 301
[0.015317] ACPI: Core revision 20140828
[0.044598] ACPI: All ACPI Tables successfully acquired
[0.126450] Security Framework initialized
[0.130569] SELinux:  Initializing.
[0.135211] Dentry cache hash table entries: 2097152 (order: 12,
16777216 bytes)
[0.145731] Inode-cache hash table entries: 1048576 (order: 11,
8388608 bytes)
[0.154249] Mount-cache hash table entries: 32768 (order: 6, 262144
bytes)
[0.161163] Mountpoint-cache hash table entries: 32768 (order: 6,
262144 bytes)
[0.168731] Initializing cgroup subsys memory
[0.173110] Initializing cgroup subsys devices
[0.177570] Initializing cgroup subsys freezer
[0.182026] Initializing cgroup subsys net_cls
[0.186483] Initializing cgroup subsys blkio
[0.190763] Initializing cgroup subsys perf_event
[0.195479] Initializing cgroup subsys hugetlb
[0.199955] CPU: Physical Processor ID: 0
[0.203972] CPU: Processor Core ID: 0
[0.207649] ENERGY_PERF_BIAS: Set to 'normal', was 'performance'
[0.207649] ENERGY_PERF_BIAS: View and update with
x86_energy_perf_policy(8)
[0.220704] mce: CPU supports 16 MCE banks
[0.224832] CPU0: Thermal monitoring enabled (TM1)
[0.229658] Last level iTLB entries: 4KB 512, 2MB 8, 4MB 8
[0.229658] Last level dTLB entries: 4KB 512, 2MB 32, 4MB 32, 1GB 0
[0.241537] Freeing SMP alternatives memory: 24K (81e8 -
81e86000)
[0.250740] ftrace: allocating 27051 entries in 106 pages
[0.268137] dmar: Host address width 46
[0.271986] dmar: DRHD base: 0x00dfffc000 flags: 0x1
[0.277314] dmar: IOMMU 0: reg_base_addr dfffc000 ver 1:0 cap
d2078c106f0462 ecap f020fe
[0.285423] dmar: RMRR base: 0x00cba11000 end: 0x00cba27fff
[0.291703] dmar: ATSR flags: 0x0
[0.295122] IOAPIC id 0 under DRHD base  0xdfffc000 IOMMU 0
[0.300704] IOAPIC id 2 under DRHD base  0xdfffc000 IOMMU 0
[0.306281] HPET id 0 under DRHD base 0xdfffc000
[0.311011] Enabled IRQ remapping in xapic mode
[0.316070] ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
[0.332096] smpboot: CPU0: Intel(R) Xeon(R) CPU E5-1603 0 @ 2.80GHz
(fam: 06, model: 2d, stepping: 07)
[0.341495] Performance Events: PEBS fmt1+, 16-deep LBR, SandyBridge
events, full-width counters, Intel PMU driver.
[0.352047] perf_event_intel: PEBS disabled due to CPU errata, please
upgrade microcode
[0.360060] ... version:3
[0.364081] ... bit width:  48
[0.368182] ... generic registers:  8
[0.372196] ... value mask: 
[0.377513] ... max period: 
[0.382829] ... fixed-purpose events:   3
[0.386842] ... event mask: 000700ff
[0.393368] x86: Booting SMP configuration:
[0.397563]  node  #0, CPUs:  #1
[0.414672] NMI watchdog: enabled on all CPUs, permanently consumes
one hw-PMU counter.
[0.422957]  #2 #3
[0.451320] x86: Booted up 1 node, 4 CPUs
[0.455539] smpboot: Total of 4 processors activated (22347.08
BogoMIPS)
[0.466369] devtmpfs: initialized
[0.472993] PM: Registering ACPI NVS region [mem
0xcb75-0xcb7dafff] (569344 bytes)
[0.480930] PM: Registering ACPI NVS region [mem
0xcbaad000-0xcbaaefff] (8192 bytes)
[0.488689] PM: Registering ACPI NVS region [mem
0xcbabb000-0xcbacdfff] (77824 bytes)
[0.496535] PM: Registering ACPI NVS region [mem
0xcbb56000-0xcbb5dfff] (32768 bytes)
[0.504380] PM: Registering ACPI NVS region [mem
0xcbb71000-0xcbff] (4780032 bytes)
[0.513294] atomic64_test: passed for x86-64 platform with CX8 and
with SSE
[0.520272] pinctrl core: initialized pinctrl subsystem
[0.525549] RTC time:  9:52:43, date: 10/22/14
[0.530096] NET: Registered protocol family 16
[0.539573] cpuidle: using governor menu
[0.543583] ACPI: bus type PCI registered
[0.547608] acpiphp: ACPI Hot Plug PCI Controller Driver version: 0.5
[0.554133] PCI: MMCONFIG for domain  [bus 00-ff] at [mem
0xe000-0xefff] (base 0xe000)
[0.563457] PCI: MMCONFIG at [mem 0xe000-0xefff] reserved in
E820
[0.570548] PCI: Using configuration type 1 for base access
[0.582492] ACPI: Added _OSI(Module Device)
[0.586683] ACPI: Added _OSI(Processor Device)
[0.591140] ACPI: Added _OSI(3.0 _SCP Extensions)
[0.595849] ACPI: Added _OSI(Processor Aggregator Device)
[0.608829] ACPI: Executed 1 blocks of module-level executable AML
code
[0.670857] ACPI: Interpreter enabled
[0.674537] ACPI Exception: AE_

Re: [PATCH 0/5] iommu/vt-d: Fix crash dump failure caused by legacy DMA/IO

2014-10-22 Thread Baoquan He
On 10/22/14 at 10:22am, Li, Zhen-Hua wrote:
> 
> Hi Baoquan,
> I tested it on 3.17, it does not have these faults. There are little 
> differences between this version and Bill's last version.
> 
> I will test it on 3.18.0-rc1+ on my system and let you know the result.
> 
> And could you send me the result of "lspci -vvv " on your system?

I have pasted them here.

[~]$ lspci -vvv
00:00.0 Host bridge: Intel Corporation Xeon E5/Core i7 DMI2 (rev 07)
Subsystem: Hewlett-Packard Company Device 1589
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
SERR- 

00:01.0 PCI bridge: Intel Corporation Xeon E5/Core i7 IIO PCI Express
Root Port 1a (rev 07) (prog-if 00 [Normal decode])
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr+ Stepping- SERR+ FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
SERR- TAbort-
Reset- FastB2B-
PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
Capabilities: 
Kernel driver in use: pcieport

00:02.0 PCI bridge: Intel Corporation Xeon E5/Core i7 IIO PCI Express
Root Port 2a (rev 07) (prog-if 00 [Normal decode])
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr+ Stepping- SERR+ FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
SERR- TAbort-
Reset- FastB2B-
PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
Capabilities: 
Kernel driver in use: pcieport

00:03.0 PCI bridge: Intel Corporation Xeon E5/Core i7 IIO PCI Express
Root Port 3a in PCI Express Mode (rev 07) (prog-if 00 [Normal decode])
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr+ Stepping- SERR+ FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
SERR- TAbort-
Reset- FastB2B-
PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
Capabilities: 
Kernel driver in use: pcieport

00:05.0 System peripheral: Intel Corporation Xeon E5/Core i7 Address
Map, VTd_Misc, System Management (rev 07)
Subsystem: Hewlett-Packard Company Device 1589
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
SERR- 

00:05.2 System peripheral: Intel Corporation Xeon E5/Core i7 Control
Status and Global Errors (rev 07)
Subsystem: Hewlett-Packard Company Device 1589
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
SERR- 

00:05.4 PIC: Intel Corporation Xeon E5/Core i7 I/O APIC (rev 07)
(prog-if 20 [IO(X)-APIC])
Subsystem: Intel Corporation Xeon E5/Core i7 I/O APIC
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
SERR- 

00:11.0 PCI bridge: Intel Corporation C600/X79 series chipset PCI
Express Virtual Root Port (rev 05) (prog-if 00 [Normal decode])
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR+ FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
SERR- TAbort-
Reset- FastB2B-
PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
Capabilities: 
Kernel driver in use: pcieport

00:16.0 Communication controller: Intel Corporation C600/X79 series
chipset MEI Controller #1 (rev 05)
Subsystem: Hewlett-Packard Company Device 1589
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B- DisINTx+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
SERR- 
Kernel driver in use: mei_me

00:16.2 IDE interface: Intel Corporation C600/X79 series chipset IDE-r
Controller (rev 05) (prog-if 85 [Master SecO PriO])
Subsystem: Hewlett-Packard Company Device 1589
Control: I/O+ Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort-
SERR- 
Kernel driver in use: ata_generic

00:16.3 Serial controller: Intel Corporation C600/X79 series chipset KT
Controller (rev 05) (prog-if 02 [16550])
Subsystem: Hewlett-Packard Company Device 1589
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast >TAbort-
SERR- 
Kernel driver in use: serial

00:19.0 Ethernet controller: Intel Corporation 82579LM Gigabit Network
Connection (rev 05)
Subsystem: Hewlett-Packard Company Device 1589
Control: I/O+ Mem+ BusMaster+ SpecCy

Re: [PATCH 0/5] iommu/vt-d: Fix crash dump failure caused by legacy DMA/IO

2014-10-27 Thread Baoquan He
On 10/27/14 at 03:29pm, Li, ZhenHua wrote:
> Hi Baoquan,
> I failed in testing this patchset for 3.18.0-rc1, this upstream
> 3.18.0-rc1 kernel cannot boot on my system, have not found out the
> reason.
> 
> Could you please test this patchset on 3.17.0 to see whether it has
> these faults?
> 
> Thanks
> Zhenhua

Failed too on 3.17.0, check the log as below:


[0.103751] Mount-cache hash table entries: 512 (order: 0, 4096
bytes)
[0.110285] Mountpoint-cache hash table entries: 512 (order: 0, 4096
bytes)
[0.117549] Initializing cgroup subsys memory
[0.121917] Initializing cgroup subsys devices
[0.126367] Initializing cgroup subsys freezer
[0.130817] Initializing cgroup subsys net_cls
[0.135265] Initializing cgroup subsys blkio
[0.139545] Initializing cgroup subsys perf_event
[0.144254] Initializing cgroup subsys hugetlb
[0.148741] CPU: Physical Processor ID: 0
[0.152751] CPU: Processor Core ID: 1
[0.156427] Last level iTLB entries: 4KB 512, 2MB 8, 4MB 8
[0.156427] Last level dTLB entries: 4KB 512, 2MB 32, 4MB 32, 1GB 0
[0.180040] Freeing SMP alternatives memory: 24K (ade7a000 -
ade8)
[0.190787] ftrace: allocating 26881 entries in 106 pages
[0.222955] dmar: Host address width 46
[0.226796] dmar: DRHD base: 0x00dfffc000 flags: 0x1
[0.232128] dmar: IOMMU 0: reg_base_addr dfffc000 ver 1:0 cap
d2078c106f0462 ecap f020fe
[0.240223] dmar: RMRR base: 0x00cba11000 end: 0x00cba27fff
[0.246495] dmar: ATSR flags: 0x0
[0.249921] IOAPIC id 0 under DRHD base  0xdfffc000 IOMMU 0
[0.255499] IOAPIC id 2 under DRHD base  0xdfffc000 IOMMU 0
[0.261076] HPET id 0 under DRHD base 0xdfffc000
[0.265899] Enabled IRQ remapping in xapic mode
[0.271030] ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
[0.287077] smpboot: CPU0: Intel(R) Xeon(R) CPU E5-1603 0 @ 2.80GHz
(fam: 06, model: 2d, stepping: 07)
[0.296535] Performance Events: PEBS fmt1+, 16-deep LBR, SandyBridge
events, full-width counters, Broken BIOS detected, complain to your
hardware vendor.
[0.310427] [Firmware Bug]: the BIOS has corrupted hw-PMU resources
(MSR 38d is b0)
[0.318087] Intel PMU driver.
[0.321065] ... version:3
[0.325077] ... bit width:  48
[0.329180] ... generic registers:  8
[0.333198] ... value mask: 
[0.338516] ... max period: 
[0.343834] ... fixed-purpose events:   3
[0.347848] ... event mask: 000700ff
[0.355607] x86: Booted up 1 node, 1 CPUs
[0.359627] smpboot: Total of 1 processors activated (5586.06
BogoMIPS)
[0.366281] NMI watchdog: enabled on all CPUs, permanently consumes
one hw-PMU counter.
[0.377496] devtmpfs: initialized
[0.386629] PM: Registering ACPI NVS region [mem
0xcb75-0xcb7dafff] (569344 bytes)
[0.394583] PM: Registering ACPI NVS region [mem
0xcbaad000-0xcbaaefff] (8192 bytes)
[0.402337] PM: Registering ACPI NVS region [mem
0xcbabb000-0xcbacdfff] (77824 bytes)
[0.410169] PM: Registering ACPI NVS region [mem
0xcbb56000-0xcbb5dfff] (32768 bytes)
[0.418005] PM: Registering ACPI NVS region [mem
0xcbb71000-0xcbff] (4780032 bytes)
[0.427905] atomic64_test: passed for x86-64 platform with CX8 and
with SSE
[0.434883] pinctrl core: initialized pinctrl subsystem
[0.440171] RTC time: 10:38:17, date: 10/27/14
[0.444783] NET: Registered protocol family 16
[0.449652] cpuidle: using governor menu
[0.453820] ACPI: bus type PCI registered
[0.457841] acpiphp: ACPI Hot Plug PCI Controller Driver version: 0.5
[0.464406] PCI: MMCONFIG for domain  [bus 00-ff] at [mem
0xe000-0xefff] (base 0xe000)
[0.473718] PCI: MMCONFIG at [mem 0xe000-0xefff] reserved in
E820
[0.481119] PCI: Using configuration type 1 for base access
[0.489116] ACPI: Added _OSI(Module Device)
[0.493313] ACPI: Added _OSI(Processor Device)
[0.497768] ACPI: Added _OSI(3.0 _SCP Extensions)
[0.502477] ACPI: Added _OSI(Processor Aggregator Device)
[0.521054] ACPI: Executed 1 blocks of module-level executable AML
code
[0.653647] ACPI: Interpreter enabled
[0.657334] ACPI Exception: AE_NOT_FOUND, While evaluating Sleep
State [\_S1_] (20140724/hwxface-580)
[0.10] ACPI Exception: AE_NOT_FOUND, While evaluating Sleep
State [\_S2_] (20140724/hwxface-580)
[0.675902] ACPI: (supports S0 S3 S4 S5)
[0.679833] ACPI: Using IOAPIC for interrupt routing
[0.684858] PCI: Using host bridge windows from ACPI; if necessary,
use "pci=nocrs" and report a bug
[0.695495] [Firmware Bug]: ACPI: BIOS _OSI(Linux) query ignored
[0.717663] ACPI: PCI Root Bridge [PCI0] (domain  [bus 00-7f])
[0.723860] acpi PNP0A08:00: _OSC: OS supports [ExtendedConfig ASPM
ClockPM Segments MSI]
[0.732282] acpi PNP0A08:00: _OSC: platform does not support
[PCIeCapability]
[0.739533] acpi PNP0A0

Re: [PATCH 1/1] x86/iommu: fix incorrect bit operations in setting values

2014-11-13 Thread Baoquan He
On 11/13/14 at 05:06pm, Li, ZhenHua wrote:
> Minfei,
> Thanks for your testing.
> On my system, I got error messages:
> 
> [8.019096] usb usb2: New USB device strings: Mfr=3, Product=2,
> SerialNumber=1
> [8.019617] dmar: DRHD: handling fault status reg 102
> [8.019621] dmar: DMAR:[DMA Read] Request device [01:00.0] fault
> addr fff6a000
> [8.019621] DMAR:[fault reason 06] PTE Read access is not set
> [8.019627] dmar: DRHD: handling fault status reg 202
> [8.019631] dmar: DMAR:[DMA Read] Request device [21:00.0] fault
> addr fff6a000
> [8.019631] DMAR:[fault reason 06] PTE Read access is not set
> [8.019638] dmar: DRHD: handling fault status reg 202
> [8.019641] dmar: DMAR:[DMA Read] Request device [41:00.0] fault
> addr fff6a000
> [8.019641] DMAR:[fault reason 06] PTE Read access is not set
> [8.019647] dmar: DRHD: handling fault status reg 202
> [8.019651] dmar: DMAR:[DMA Read] Request device [61:00.0] fault
> addr fff6a000
> [8.019651] DMAR:[fault reason 06] PTE Read access is not set
> 
> And this patch can fix this.
> 
> 
> The reason you do not get error messages may be there is no ongoing DMA
> request on your PCI devices when kdump kernel boots, I am not sure of
> this.

I think Minfei means he applied this patch, so no error message is got.
The patches he listed includes this one:

0006-x86-iommu-fix-incorrect-bit-operations-in-setting-va.patch

Hi Zhenhua, 

Below is abstracted from Joerg's comments which he and David discussed
and get to this conclusion. So the 1st step is the same as your patches,
how do you think the 2nd step?

1. If the VT-d driver finds the IOMMU enabled, it reuses its
   root-context table. This way the IOMMU must not be disabled
   and re-enabled, eliminating the race we have now.
   Other data structures like the context-entries are copied
   over from the old kernel.  We basically keep all mappings
   from the old kernel, allowing any in-flight DMA to succeed.
   No memory or disk data corruption.
   The issue here is, that we modify data from the old kernel
   which is about to be dumped. But there are ways to work
   around that.

2. When a device driver issues the first dma_map command for a
   device, we assign a new and empty page-table, thus removing
   all mappings from the old kernel for the device.
   Rationale is, that at this point the device driver should
   have reset the device to a point where all in-flight DMA is
   canceled.

Thanks
Baoquan

> 
> Zhenhua
> On 11/12/2014 07:28 PM, Minfei Huang wrote:
> >The kdump starts 2nd kernel without any error message when I use
> >3.18.0-rc4 merged last 6 patchs. The following is the message which 2nd
> >kernel prints during booting.
> >
> >Patchset:
> > 0001-iommu-vt-d-Update-iommu_attach_domain-and-its-caller.patch
> > 0002-iommu-vt-d-Items-required-for-kdump.patch
> > 0003-iommu-vt-d-data-types-and-functions-used-for-kdump.patch
> > 0004-iommu-vt-d-Add-domain-id-functions.patch
> > 0005-iommu-vt-d-enable-kdump-support-in-iommu-module.patch
> > 0006-x86-iommu-fix-incorrect-bit-operations-in-setting-va.patch
> >
> >log:
> >[1.689604] IOMMU intel_iommu_in_crashdump = true
> >[1.694310] IOMMU Skip disabling iommu hardware translations
> >[1.699987] DMAR: No ATSR found
> >[1.703151] IOMMU Copying translate tables from panicked kernel
> >[1.710786] IOMMU: root_new_virt:0x8800296ec000 phys:0x296ec000
> >[1.717401] IOMMU:0 Domain ids from panicked kernel:
> >[1.722364] DID did:13(0x000d)
> >[1.725424] DID did:12(0x000c)
> >[1.728482] DID did:11(0x000b)
> >[1.731542] DID did:10(0x000a)
> >[1.734603] DID did:6(0x0006)
> >[1.737574] DID did:7(0x0007)
> >[1.740549] DID did:5(0x0005)
> >[1.743522] DID did:9(0x0009)
> >[1.746495] DID did:8(0x0008)
> >[1.749467] DID did:4(0x0004)
> >[1.752439] DID did:3(0x0003)
> >[1.755413] DID did:2(0x0002)
> >[1.758385] DID did:0(0x)
> >[1.761357] DID did:1(0x0001)
> >[1.764331] 
> >[1.769302] IOMMU 0 0xfed5: using Queued invalidation
> >
> >On 11/05/14 at 03:30pm, Li, Zhen-Hua wrote:
> >>The function context_set_address_root() and set_root_value are setting new
> >>address in a wrong way, and this patch is trying to fix this problem.
> >>
> >>According to Intel Vt-d specs(Feb 2011, Revision 1.3), Chapter 9.1 and 9.2,
> >>field ctp in root entry is using bits 12:63, field asr in context entry is
> >>using bits 12:63.
> >>
> >>To set these fields, the following functions are used:
> >>static inline void context_set_address_root(struct context_entry *context,
> >> unsigned long value);
> >>and
> >>static inline void set_root_value(struct root_entry *root, unsigned long 
> >>value)
> >>
> >>But they are using an invalid method to set these field

Re: [PATCH 0/5] iommu/vt-d: Fix crash dump failure caused by legacy DMA/IO

2014-12-10 Thread Baoquan He
Hi Joerg, ZhenHua,

This issue happens on AMD iommu too, do you have any plans or
thoughts on that?

Thanks
Baoquan

On 11/17/14 at 02:38pm, Joerg Roedel wrote:
> On Fri, Nov 14, 2014 at 02:27:44PM +0800, Li, ZhenHua wrote:
> > I am working following  your directions:
> > 
> > 1.  If the VT-d driver finds the IOMMU enabled, it reuses its root entry
> > table, and do NOT disable-enable iommu. Other data will be copied.
> > 
> > 2. When a device driver issues the first dma_map command for a
> > device, we assign a new and empty page-table, thus removing all
> > mappings from the old kernel for the device.
> > 
> > Please let me know if I get something wrong.
> 
> Yes, this sounds right. Happily waiting for patches :)
> 
> 
>   Joerg
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/5] iommu/vt-d: Fix crash dump failure caused by legacy DMA/IO

2014-12-15 Thread Baoquan He
On 12/12/14 at 05:11pm, Joerg Roedel wrote:
> On Fri, Dec 12, 2014 at 10:25:31AM +0800, Li, ZhenHua wrote:
> > Sorry I have no plan yet.
> > Could you send me your logs on your AMD system?
> 
> > On 12/10/2014 04:46 PM, Baoquan He wrote:
> > >This issue happens on AMD iommu too, do you have any plans or
> > >thoughts on that?
> 
> I think the best approach for now is to get a prove-of-concept on the
> VT-d driver. If it works there the way we expect, we can implement the
> same handling in the AMD driver. But I see no reason to hold back the
> VT-d patches until it is also fixed for AMD systems.

Yes, I agree with you. Just raise this issue to upstream. 

Thanks, Joerg.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/5] iommu/vt-d: Fix crash dump failure caused by legacy DMA/IO

2014-12-15 Thread Baoquan He
On 12/12/14 at 10:25am, Li, ZhenHua wrote:
> Sorry I have no plan yet.
> Could you send me your logs on your AMD system?

Sure, please check the attachment. AMD iommu seems a little different on
action. On the machine I reserved for testing, it always hang the system
bootup. As Joerg said, we can just know this issue, vt-d is still the
first thing.

[root@hp-sl4545g7-01 ~]# echo c >/proc/sysrq-trigger 
[ 4178.349174] SysRq : Trigger a crash
[ 4178.367887] BUG: unable to handle kernel NULL pointer dereference at 
  (null)
[ 4178.413933] IP: [] sysrq_handle_crash+0x16/0x20
[ 4178.449637] PGD 8219eb067 PUD 824b4f067 PMD 0 
[ 4178.475360] Oops: 0002 [#1] SMP 
[ 4178.494112] Modules linked in: cfg80211 sg rfkill kvm_amd kvm nfsd igb 
crct10dif_pclmul crc32_pclmul ptp crc32c_intel ghash_clmulni_intel auth_rpcgss 
aesni_intel pps_core dca ipmi_si hpwdt sp5100_tco lrw nfs_acl gf128mul hpilo 
pcspkr ipmi_msghandler serio_raw fam15h_power amd64_edac_mod glue_helper 
i2c_piix4 edac_mce_amd k10temp lockd edac_core ablk_helper cryptd shpchp sunrpc 
xfs libcrc32c radeon i2c_algo_bit drm_kms_helper ttm sd_mod ata_generic 
crc_t10dif pata_acpi ahci drm crct10dif_common libahci pata_atiixp libata hpsa 
i2c_core dm_mirror dm_region_hash dm_log dm_mod
[ 4178.785689] CPU: 1 PID: 1872 Comm: bash Not tainted 3.14.0+ #44
[ 4178.819575] Hardware name: HP ProLiant SL4545 G7/, BIOS A31 12/08/2012
[ 4178.856427] task: 88081ab9b680 ti: 88082036 task.ti: 
88082036
[ 4178.899369] RIP: 0010:[]  [] 
sysrq_handle_crash+0x16/0x20
[ 4178.947941] RSP: 0018:880820361e80  EFLAGS: 00010046
[ 4178.977106] RAX: 000f RBX: 81a0b6c0 RCX: 
[ 4179.018848] RDX:  RSI: 88083ea0e708 RDI: 0063
[ 4179.059550] RBP: 880820361e80 R08: 0092 R09: 051b
[ 4179.099603] R10: 051a R11: 0003 R12: 0063
[ 4179.140142] R13: 0246 R14: 0007 R15: 
[ 4179.179475] FS:  7f4edd746740() GS:88083ea0() 
knlGS:
[ 4179.225437] CS:  0010 DS:  ES:  CR0: 80050033
[ 4179.256725] CR2:  CR3: 000821b75000 CR4: 000406e0
[ 4179.297414] Stack:
[ 4179.308744]  880820361eb8 813a8882 0002 
7f4edd744000
[ 4179.349142]  880820361f48 0002  
880820361ed0
[ 4179.390914]  813a8d8f 88082405fa40 880820361ef0 
81236d5d
[ 4179.440766] Call Trace:
[ 4179.453918]  [] __handle_sysrq+0xa2/0x170
[ 4179.485923]  [] write_sysrq_trigger+0x2f/0x40
[ 4179.518602]  [] proc_reg_write+0x3d/0x80
[ 4179.548870]  [] vfs_write+0xba/0x1e0
[ 4179.578130]  [] SyS_write+0x55/0xd0
[ 4179.606615]  [] system_call_fastpath+0x16/0x1b
[ 4179.640318] Code: 65 34 75 e5 4c 89 ef e8 d9 f7 ff ff eb db 0f 1f 80 00 00 
00 00 66 66 66 66 90 55 c7 05 a0 d5 5b 00 01 00 00 00 48 89 e5 0f ae f8  04 
25 00 00 00 00 01 5d c3 66 66 66 66 90 55 31 c0 c7 05 1e 
[ 4179.748149] RIP  [] sysrq_handle_crash+0x16/0x20
[ 4179.783683]  RSP 
[ 4179.802762] CR2: 
[0.00] Initializing cgroup subsys cpuset
[0.00] Initializing cgroup subsys cpu
[0.00] Initializing cgroup subsys cpuacct
[0.00] Linux version 3.14.0+ (b...@ibm-x3755m3-02.lab.bos.redhat.com) 
(gcc version 4.8.2 20140120 (Red Hat 4.8.2-13) (GCC) ) #44 SMP Thu Apr 10 
22:28:02 EDT 2014
[0.00] Command line: BOOT_IMAGE=/vmlinuz-3.14.0+ 
root=/dev/mapper/rhel_hp--sl4545g7--01-root ro console=ttyS1,115200n81 
rd.lvm.lv=rhel_hp-sl4545g7-01/root rd.lvm.lv=rhel_hp-sl4545g7-01/swap 
vconsole.font=latarcyrheb-sun16 vconsole.keymap=us LANG=en_US.UTF-8 irqpoll 
nr_cpus=1 reset_devices cgroup_disable=memory mce=off numa=off 
udev.children-max=2 panic=10 rootflags=nofail acpi_no_memhotplug 
disable_cpu_apicid=0 memmap=exactmap memmap=592K@4K memmap=523672K@311296K 
elfcorehdr=834968K memmap=192K#3141752K
[0.00] e820: BIOS-provided physical RAM map:
[0.00] BIOS-e820: [mem 0x0100-0x00094fff] usable
[0.00] BIOS-e820: [mem 0x00095000-0x00095bff] reserved
[0.00] BIOS-e820: [mem 0x00098000-0x0009] reserved
[0.00] BIOS-e820: [mem 0x000f-0x000f] reserved
[0.00] BIOS-e820: [mem 0x0010-0xbfc1dfff] usable
[ [mem 0xbfc1e000-0xbfc4dfff] ACPI data
[0.00] BIOS-e820: [mem 0xbfc4e000-0xbfc4efff] usable
[0.00] BIOS-e820: [mem 0xbfc4f000-0xcfff] reserved
[0.00] BIOS-e820: [mem 0xfec0-0xfee0] reserved
[0.00] BIOS-e820: [mem 0xff80-0x] reserved
[0.00] BIOS-e820: [mem 0x0001-0x00083effefff] usable
[0.00] e820: last_pfn = 0x83efff max_arch_pfn = 0x4
[0.00] NX (Execute

Re: [PATCH 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel

2015-01-05 Thread Baoquan He
Hi Zhenhua,

I just tested your patchset based on 3.19.0-rc2+, and found several dmar
fault and intr-remap fault, it seems not the same as Takao's, please
check the attachment.

Thanks
Baoquan


[root@dhcp-16-105 ~]# kdumpctl restart
kexec: failed to unloaded kdump kernel
Stopping kdump: [FAILED]
+ /sbin/kexec -p '--command-line=BOOT_IMAGE=/vmlinuz-3.19.0-rc2+ 
root=UUID=f170152e-de83-46ee-9546-8ccd53f9753b ro rd.md=0 rd.lvm=0 rd.dm=0 
vconsole.keymap=us rd.luk+
+ set +x
kexec: loaded kdump kernel
Starting kdump: [OK]
[root@dhcp-16-105 ~]# echo c >/proc/sysrq-trigger 
[  722.444549] SysRq : Trigger a crash
[  722.448133] BUG: unable to handle kernel NULL pointer dereference at 
  (null)
[  722.456044] IP: [] sysrq_handle_crash+0x16/0x20
[  722.462179] PGD 404720067 PUD 41afd6067 PMD 0 
[  722.466719] Oops: 0002 [#1] SMP 
[  722.46] Modules linked in: fuse ipt_MASQUERADE nf_nat_masquerade_ipv4 
xt_CHECKSUM tun ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 cfg80211 xt_conntrack 
ebtable_nc
[  722.570165] CPU: 0 PID: 1979 Comm: bash Not tainted 3.19.0-rc2+ #93
[  722.576453] Hardware name: Hewlett-Packard HP Z420 Workstation/1589, BIOS 
J61 v01.02 03/09/2012
[  722.585176] task: 88041d6a9b60 ti: 88040882c000 task.ti: 
88040882c000
[  722.592699] RIP: 0010:[]  [] 
sysrq_handle_crash+0x16/0x20
[  722.601292] RSP: 0018:88040882fe58  EFLAGS: 00010246
[  722.606618] RAX: 000f RBX: 81cafdc0 RCX: 
[  722.613776] RDX:  RSI: 88042fc0e6f8 RDI: 0063
[  722.620932] RBP: 88040882fe58 R08: 00c2 R09: 81eec85c
[  722.628098] R10: 040f R11: 040e R12: 0063
[  722.635249] R13:  R14: 0007 R15: 
[  722.642415] FS:  7f0cca07b740() GS:88042fc0() 
knlGS:
[  722.650529] CS:  0010 DS:  ES:  CR0: 80050033
[  722.656294] CR2:  CR3: 0004095e4000 CR4: 000407f0
[  722.663444] Stack:
[  722.665471]  88040882fe88 8145abc7 0002 
7f0cca092000
[  722.672960]  0002 88040882ff48 88040882fea8 
8145b073
[  722.680440]  0001 8803ee48b300 88040882fed8 
8126ebcd
[  722.687927] Call Trace:
[  722.690390]  [] __handle_sysrq+0x107/0x170
[  722.696079]  [] write_sysrq_trigger+0x33/0x40
[  722.702029]  [] proc_reg_write+0x3d/0x80
[  722.707536]  [] vfs_write+0xb7/0x1f0
[  722.712697]  [] ? do_audit_syscall_entry+0x6c/0x70
[  722.719084]  [] SyS_write+0x55/0xd0
[  722.724159]  [] system_call_fastpath+0x12/0x17
[  722.730176] Code: c1 f7 ff ff eb db 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 
00 00 66 66 66 66 90 55 c7 05 b4 e9 a8 00 01 00 00 00 48 89 e5 0f ae f8  04 
25 00 0 
[  722.750274] RIP  [] sysrq_handle_crash+0x16/0x20
[  722.756509]  RSP 
[  722.760023] CR2: 
h
 [0.00] ERROR: earlyprintk= earlyser already used
[0.00] debug: ignoring loglevel setting.
[0.00] NX (Execute Disable) protection: active
[0.00] SMBIOS 2.7 present.
[0.00] DMI: Hewlett-Packard HP Z420 Workstation/1589, BIOS J61 v01.02 
03/09/2012
[0.00] e820: update [mem 0x-0x0fff] usable ==> reserved
[0.00] e820: remove [mem 0x000a-0x000f] usable
[0.00] e820: last_pfn = 0x38000 max_arch_pfn = 0x4
[0.00] MTRR default type: write-back
[0.00] MTRR fixed ranges enabled:
[0.00]   0-9 write-back
[0.00]   A-B uncachable
[0.00]   C-DBFFF write-protect
[0.00]   DC000-E9FFF uncachable
[0.00]   EA000-F write-protect
[0.00] MTRR variable ranges enabled:
[0.00]   0 base CC00 mask 3C00 uncachable
[0.00]   1 base D000 mask 3000 uncachable
[0.00]   2 base E000 mask 3FFFE000 uncachable
[0.00]   3 base 00043000 mask 3000 uncachable
[0.00]   4 disabled
[0.00]   5 disabled
[0.00]   6 disabled
[0.00]   7 disabled
[0.00]   8 disabled
[0.00]   9 disabled
[0.00] PAT configuration [0-7]: WB  WC  UC- UC  WB  WC  UC- UC  
[0.00] x2apic enabled by BIOS, switching to x2apic ops
[0.00] found SMP MP-table at [mem 0x000f4bc0-0x000f4bcf] mapped at 
[880f4bc0]
[0.00] Base memory trampoline at [8809] 9 size 24576
[0.00] Using GB pages for direct mapping
[0.00] init_memory_mapping: [mem 0x-0x000f]
[0.00]  [mem 0x-0x000f] page 4k
[0.00] BRK [0x36ffa000, 0x36ffafff] PGTABLE
[0.00] BRK [0x36ffb000, 0x36ffbfff] PGTABLE
[0.00] BRK [0x36ffc000, 0x36ffcfff] PGTABLE
[0.00] init_memory_mapping: [mem 0x37c0-0x37df]
[0.00]  [mem 0x37c0-0x37df] page 2M
[0.00] init_memory_mapping: [mem 0x3400-0x3

Re: [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel

2015-01-06 Thread Baoquan He
On 01/07/15 at 12:11pm, Li, ZhenHua wrote:
> Many thanks to Takao Indoh and Baoquan He, for your testing on more
> different systems.
> 
> The calling of flush functions are added to this version.
> 
> The usage of __iommu_flush_cache function :
> 1. Fixes a dump on Takao's system.
> 2. Reduces the count of faults on Baoquan's system.

I am testing the version you sent to me yesterday afternoon. Is that
different with this patchset? I found your patchset man reserve a big
contiguous memory region under 896M, this will cause the crashkernel
reservation failed when I set crashkernel=320M. The reason I increase
the crashkerenl reservation to 320M is 256M is not enough and cause OOM
when that patchset is tested.

I am checking what happened.


Thanks
Baoquan

> 
> Regards
> Zhenhua
> 
> On 01/07/2015 12:04 PM, Li, Zhen-Hua wrote:
> >This patchset is an update of Bill Sumner's patchset, implements a fix for:
> >If a kernel boots with intel_iommu=on on a system that supports intel vt-d,
> >when a panic happens, the kdump kernel will boot with these faults:
> >
> > dmar: DRHD: handling fault status reg 102
> > dmar: DMAR:[DMA Read] Request device [01:00.0] fault addr fff8
> > DMAR:[fault reason 01] Present bit in root entry is clear
> >
> > dmar: DRHD: handling fault status reg 2
> > dmar: INTR-REMAP: Request device [[61:00.0] fault index 42
> > INTR-REMAP:[fault reason 34] Present field in the IRTE entry is clear
> >
> >On some system, the interrupt remapping fault will also happen even if the
> >intel_iommu is not set to on, because the interrupt remapping will be enabled
> >when x2apic is needed by the system.
> >
> >The cause of the DMA fault is described in Bill's original version, and the
> >INTR-Remap fault is caused by a similar reason. In short, the initialization
> >of vt-d drivers causes the in-flight DMA and interrupt requests get wrong
> >response.
> >
> >To fix this problem, we modifies the behaviors of the intel vt-d in the
> >crashdump kernel:
> >
> >For DMA Remapping:
> >1. To accept the vt-d hardware in an active state,
> >2. Do not disable and re-enable the translation, keep it enabled.
> >3. Use the old root entry table, do not rewrite the RTA register.
> >4. Malloc and use new context entry table and page table, copy data from the
> >old ones that used by the old kernel.
> >5. to use different portions of the iova address ranges for the device 
> >drivers
> >in the crashdump kernel than the iova ranges that were in-use at the time
> >of the panic.
> >6. After device driver is loaded, when it issues the first dma_map command,
> >free the dmar_domain structure for this device, and generate a new one, 
> > so
> >that the device can be assigned a new and empty page table.
> >7. When a new context entry table is generated, we also save its address to
> >the old root entry table.
> >
> >For Interrupt Remapping:
> >1. To accept the vt-d hardware in an active state,
> >2. Do not disable and re-enable the interrupt remapping, keep it enabled.
> >3. Use the old interrupt remapping table, do not rewrite the IRTA register.
> >4. When ioapic entry is setup, the interrupt remapping table is changed, and
> >the updated data will be stored to the old interrupt remapping table.
> >
> >Advantages of this approach:
> >1. All manipulation of the IO-device is done by the Linux device-driver
> >for that device.
> >2. This approach behaves in a manner very similar to operation without an
> >active iommu.
> >3. Any activity between the IO-device and its RMRR areas is handled by the
> >device-driver in the same manner as during a non-kdump boot.
> >4. If an IO-device has no driver in the kdump kernel, it is simply left 
> >alone.
> >This supports the practice of creating a special kdump kernel without
> >drivers for any devices that are not required for taking a crashdump.
> >5. Minimal code-changes among the existing mainline intel vt-d code.
> >
> >Summary of changes in this patch set:
> >1. Added some useful function for root entry table in code intel-iommu.c
> >2. Added new members to struct root_entry and struct irte;
> >3. Functions to load old root entry table to iommu->root_entry from the 
> >memory
> >of old kernel.
> >4. Functions to malloc new context entry table and page table and copy the 
> >data
> >from the old ones to the malloced new ones.
> >5. Functions to enable support for DMA remapping in kdump kernel.
> >6. Functions to load 

Re: [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel

2015-01-07 Thread Baoquan He
On 01/07/15 at 01:25pm, Li, ZhenHua wrote:
> It is same as the last one I send to you yesterday.
> 
> The continuous memory that needed for data in this patchset:
> RE: PAGE_SIZE, 4096 Bytes;
> IRTE: 65536 * 16 ; 1M Bytes;
> 
> It should use same memory as the old versions of this patchset. The
> changes for the last version do not need more memory.

Hi Zhenhua,

It was my mistake because I didn't strip the debug info of modules, then
initramfs is bloated very big. Just now I tested the latest version, it
works well and dump is successful. No dmar fault and intr-remap fault
seen any more, good job!

Thanks
Baoquan


> 
> Regards
> Zhenhua
> 
> On 01/07/2015 01:02 PM, Baoquan He wrote:
> >On 01/07/15 at 12:11pm, Li, ZhenHua wrote:
> >>Many thanks to Takao Indoh and Baoquan He, for your testing on more
> >>different systems.
> >>
> >>The calling of flush functions are added to this version.
> >>
> >>The usage of __iommu_flush_cache function :
> >>1. Fixes a dump on Takao's system.
> >>2. Reduces the count of faults on Baoquan's system.
> >
> >I am testing the version you sent to me yesterday afternoon. Is that
> >different with this patchset? I found your patchset man reserve a big
> >contiguous memory region under 896M, this will cause the crashkernel
> >reservation failed when I set crashkernel=320M. The reason I increase
> >the crashkerenl reservation to 320M is 256M is not enough and cause OOM
> >when that patchset is tested.
> >
> >I am checking what happened.
> >
> >
> >Thanks
> >Baoquan
> >
> >>
> >>Regards
> >>Zhenhua
> >>
> >>On 01/07/2015 12:04 PM, Li, Zhen-Hua wrote:
> >>>This patchset is an update of Bill Sumner's patchset, implements a fix for:
> >>>If a kernel boots with intel_iommu=on on a system that supports intel vt-d,
> >>>when a panic happens, the kdump kernel will boot with these faults:
> >>>
> >>> dmar: DRHD: handling fault status reg 102
> >>> dmar: DMAR:[DMA Read] Request device [01:00.0] fault addr fff8
> >>> DMAR:[fault reason 01] Present bit in root entry is clear
> >>>
> >>> dmar: DRHD: handling fault status reg 2
> >>> dmar: INTR-REMAP: Request device [[61:00.0] fault index 42
> >>> INTR-REMAP:[fault reason 34] Present field in the IRTE entry is clear
> >>>
> >>>On some system, the interrupt remapping fault will also happen even if the
> >>>intel_iommu is not set to on, because the interrupt remapping will be 
> >>>enabled
> >>>when x2apic is needed by the system.
> >>>
> >>>The cause of the DMA fault is described in Bill's original version, and the
> >>>INTR-Remap fault is caused by a similar reason. In short, the 
> >>>initialization
> >>>of vt-d drivers causes the in-flight DMA and interrupt requests get wrong
> >>>response.
> >>>
> >>>To fix this problem, we modifies the behaviors of the intel vt-d in the
> >>>crashdump kernel:
> >>>
> >>>For DMA Remapping:
> >>>1. To accept the vt-d hardware in an active state,
> >>>2. Do not disable and re-enable the translation, keep it enabled.
> >>>3. Use the old root entry table, do not rewrite the RTA register.
> >>>4. Malloc and use new context entry table and page table, copy data from 
> >>>the
> >>>old ones that used by the old kernel.
> >>>5. to use different portions of the iova address ranges for the device 
> >>>drivers
> >>>in the crashdump kernel than the iova ranges that were in-use at the 
> >>> time
> >>>of the panic.
> >>>6. After device driver is loaded, when it issues the first dma_map command,
> >>>free the dmar_domain structure for this device, and generate a new 
> >>> one, so
> >>>that the device can be assigned a new and empty page table.
> >>>7. When a new context entry table is generated, we also save its address to
> >>>the old root entry table.
> >>>
> >>>For Interrupt Remapping:
> >>>1. To accept the vt-d hardware in an active state,
> >>>2. Do not disable and re-enable the interrupt remapping, keep it enabled.
> >>>3. Use the old interrupt remapping table, do not rewrite the IRTA register.
> >>>4. When ioapic entry is setup, the interrupt remapping table is changed, 
> >>>and
> >&g

Re: [PATCH v8 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel

2015-01-12 Thread Baoquan He
gt; >Zhenhua's updates:
> > https://lkml.org/lkml/2014/10/21/134
> > https://lkml.org/lkml/2014/12/15/121
> > https://lkml.org/lkml/2014/12/22/53
> > https://lkml.org/lkml/2015/1/6/1166
> >
> >Changelog[v8]:
> > 1. Add a missing __iommu_flush_cache in function copy_page_table.
> >
> >Changelog[v7]:
> > 1. Use __iommu_flush_cache to flush the data to hardware.
> >
> >Changelog[v6]:
> > 1. Use "unsigned long" as type of physical address.
> > 2. Use new function unmap_device_dma to unmap the old dma.
> > 3. Some small incorrect bits order for aw shift.
> >
> >Changelog[v5]:
> > 1. Do not disable and re-enable traslation and interrupt remapping.
> > 2. Use old root entry table.
> > 3. Use old interrupt remapping table.
> > 4. New functions to copy data from old kernel, and save to old kernel 
> > mem.
> > 5. New functions to save updated root entry table and irte table.
> > 6. Use intel_unmap to unmap the old dma;
> >     7. Allocate new pages while driver is being loaded.
> >
> >Changelog[v4]:
> > 1. Cut off the patches that move some defines and functions to new 
> > files.
> > 2. Reduce the numbers of patches to five, make it more easier to read.
> > 3. Changed the name of functions, make them consistent with current 
> > context
> >get/set functions.
> > 4. Add change to function __iommu_attach_domain.
> >
> >Changelog[v3]:
> > 1. Commented-out "#define DEBUG 1" to eliminate debug messages.
> > 2. Updated the comments about changes in each version.
> > 3. Fixed: one-line added to Copy-Translations patch to initialize the 
> > iovad
> >   struct as recommended by Baoquan He [b...@redhat.com]
> >   init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
> >
> >Changelog[v2]:
> > The following series implements a fix for:
> > A kdump problem about DMA that has been discussed for a long time. That 
> > is,
> > when a kernel panics and boots into the kdump kernel, DMA started by the
> > panicked kernel is not stopped before the kdump kernel is booted and the
> > kdump kernel disables the IOMMU while this DMA continues.  This causes 
> > the
> > IOMMU to stop translating the DMA addresses as IOVAs and begin to treat
> > them as physical memory addresses -- which causes the DMA to either:
> > (1) generate DMAR errors or
> > (2) generate PCI SERR errors or
> > (3) transfer data to or from incorrect areas of memory. Often this
> > causes the dump to fail.
> >
> >Changelog[v1]:
> > The original version.
> >
> >Changed in this version:
> >1. Do not disable and re-enable traslation and interrupt remapping.
> >2. Use old root entry table.
> >3. Use old interrupt remapping table.
> >4. Use "unsigned long" as physical address.
> >5. Use intel_unmap to unmap the old dma;
> >
> >Baoquan He  helps testing this patchset.
> >Takao Indoh  gives valuable suggestions.
> >
> >   iommu/vt-d: Update iommu_attach_domain() and its callers
> >   iommu/vt-d: Items required for kdump
> >   iommu/vt-d: Add domain-id functions
> >   iommu/vt-d: functions to copy data from old mem
> >   iommu/vt-d: Add functions to load and save old re
> >   iommu/vt-d: datatypes and functions used for kdump
> >   iommu/vt-d: enable kdump support in iommu module
> >   iommu/vt-d: assign new page table for dma_map
> >   iommu/vt-d: Copy functions for irte
> >   iommu/vt-d: Use old irte in kdump kernel
> >
> >Signed-off-by: Bill Sumner 
> >Signed-off-by: Li, Zhen-Hua 
> >Signed-off-by: Takao Indoh 
> >Tested-by: Baoquan He 
> >---
> >  drivers/iommu/intel-iommu.c | 1054 
> > +--
> >  drivers/iommu/intel_irq_remapping.c |  104 +++-
> >  include/linux/intel-iommu.h |   18 +
> >  3 files changed, 1134 insertions(+), 42 deletions(-)
> >
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 02/10] iommu/vt-d: Items required for kdump

2015-01-13 Thread Baoquan He
On 01/12/15 at 10:29am, Vivek Goyal wrote:
> On Mon, Jan 12, 2015 at 04:22:08PM +0100, Joerg Roedel wrote:
> > It looks like you are still copying the io-page-tables from the old
> > kernel into the kdump kernel, is that right? With the approach that was
> > proposed you only need to copy over the context entries 1-1. They are
> > still pointing to the page-tables in the old kernels memory (which is
> > just fine).
> 
> Kdump has the notion of backup region. Where certain parts of old kernels
> memory can be moved to a different location (first 640K on x86 as of now)
> and new kernel can make use of this memory now.

Hi Vivek,

About backup region I am a bit confusing. Just say x86, we usually copy
it to a backup region. And this first 640K will be used as a usable
memory region in 2nd kernel since its content has been copied to backup
region. And that backup region is taken from crashkernel reserved
memory and not passed to 2nd kernel as usable memory region.

Here did you mean the old page table could fall into first 640K memory
region or that reserved backup region?  Since as I understand the backup
region is taken from crashkernel memory which is not used by 1st kernel
process.

Thanks
Baoquan

> 
> So we will have to just make sure that no parts of this old page table
> fall into backup region.
> 
> Thanks
> Vivek
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 06/10] iommu/vt-d: datatypes and functions used for kdump

2015-01-14 Thread Baoquan He
On 01/12/15 at 03:06pm, Li, Zhen-Hua wrote:
> +/*
> + * Interface to the "copy translation tables" set of functions
> + * from mainline code.
> + */
> +static int intel_iommu_load_translation_tables(struct dmar_drhd_unit *drhd,
> + int g_num_of_iommus)

The argument g_num_of_iommus is the same as the global variable, it's better
to rename it as num_of_iommus. And even it can be removed since you can
just use the global variable g_num_of_iommus in this function.

Argument drhd can be intel_iommu because no other member variable in
drhd is needed.

> +{
> + struct intel_iommu *iommu;  /* Virt(iommu hardware registers) */
> + unsigned long long q;   /* quadword scratch */
> + int ret = 0;/* Integer return code */
> + int i = 0;  /* Loop index */
> + unsigned long flags;
> +
> + /* Structure so copy_page_addr() can accumulate things
> +  * over multiple calls and returns
> +  */
> + struct copy_page_addr_parms ppa_parms = copy_page_addr_parms_init;
> + struct copy_page_addr_parms *ppap = &ppa_parms;
> +
> +
> + iommu = drhd->iommu;
> + q = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
> + if (!q)
> + return -1;
> +
> + /* If (list needs initializing) do it here */

This initializing should not be here, because it's not only for this
drhd. It should be done in init_dmars().

> + if (!domain_values_list) {
> + domain_values_list =
> +  kcalloc(g_num_of_iommus, sizeof(struct list_head),
> + GFP_KERNEL);
> +
> + if (!domain_values_list) {
> + pr_err("Allocation failed for domain_values_list 
> array\n");
> + return -ENOMEM;
> + }
> + for (i = 0; i < g_num_of_iommus; i++)
> + INIT_LIST_HEAD(&domain_values_list[i]);
> + }
> +
> + spin_lock_irqsave(&iommu->lock, flags);
> +
> + /* Load the root-entry table from the old kernel
> +  * foreach context_entry_table in root_entry
> +  *foreach context_entry in context_entry_table
> +  *   foreach level-1 page_table_entry in context_entry
> +  *  foreach level-2 page_table_entry in level 1 page_table_entry
> +  * Above pattern continues up to 6 levels of page tables
> +  *Sanity-check the entry
> +  *Process the bus, devfn, page_address, page_size
> +  */
> + if (!iommu->root_entry) {
> + iommu->root_entry =
> + (struct root_entry *)alloc_pgtable_page(iommu->node);
> + if (!iommu->root_entry) {
> + spin_unlock_irqrestore(&iommu->lock, flags);
> + return -ENOMEM;
> + }
> + }
> +
> + iommu->root_entry_old_phys = q & VTD_PAGE_MASK;
> + if (!iommu->root_entry_old_phys) {
> + pr_err("Could not read old root entry address.");
> + return -1;
> + }
> +
> + iommu->root_entry_old_virt = ioremap_cache(iommu->root_entry_old_phys,
> + VTD_PAGE_SIZE);
> + if (!iommu->root_entry_old_virt) {
> + pr_err("Could not map the old root entry.");
> + return -ENOMEM;
> + }
> +
> + __iommu_load_old_root_entry(iommu);
> + ret = copy_root_entry_table(iommu, ppap);
> + __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
> + __iommu_update_old_root_entry(iommu, -1);
> +
> + spin_unlock_irqrestore(&iommu->lock, flags);
> +
> + __iommu_free_mapped_mem();
> +
> + if (ret)
> + return ret;
> +
> + ppa_parms.last = 1;
> + copy_page_addr(0, 0, 0, 0, 0, NULL, ppap);
> +
> + return 0;
> +}
> +
>  #endif /* CONFIG_CRASH_DUMP */
> -- 
> 2.0.0-rc0
> 
> 
> ___
> kexec mailing list
> ke...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 06/10] iommu/vt-d: datatypes and functions used for kdump

2015-01-14 Thread Baoquan He
On 01/15/15 at 01:45pm, Li, ZhenHua wrote:
> Hi Baoquan,
> Thank you very much for your review. But according to the latest
> discussion, the page tables will not be copied from old kernel. We keep
> using the old page tables before driver is loaded. So there are many
> changes in next version;

Oh, yes. So please ignore this comment.

> 
> See my comments.
> 
> On 01/15/2015 11:28 AM, Baoquan He wrote:
> >On 01/12/15 at 03:06pm, Li, Zhen-Hua wrote:
> >>+/*
> >>+ * Interface to the "copy translation tables" set of functions
> >>+ * from mainline code.
> >>+ */
> >>+static int intel_iommu_load_translation_tables(struct dmar_drhd_unit *drhd,
> >>+   int g_num_of_iommus)
> >
> >The argument g_num_of_iommus is the same as the global variable, it's better
> >to rename it as num_of_iommus. And even it can be removed since you can
> >just use the global variable g_num_of_iommus in this function.
> >
> >Argument drhd can be intel_iommu because no other member variable in
> >drhd is needed.
> 
> This function is no longer used. So forget the parameters.
> 
> >
> >>+{
> >>+   struct intel_iommu *iommu;  /* Virt(iommu hardware registers) */
> >>+   unsigned long long q;   /* quadword scratch */
> >>+   int ret = 0;/* Integer return code */
> >>+   int i = 0;  /* Loop index */
> >>+   unsigned long flags;
> >>+
> >>+   /* Structure so copy_page_addr() can accumulate things
> >>+* over multiple calls and returns
> >>+*/
> >>+   struct copy_page_addr_parms ppa_parms = copy_page_addr_parms_init;
> >>+   struct copy_page_addr_parms *ppap = &ppa_parms;
> >>+
> >>+
> >>+   iommu = drhd->iommu;
> >>+   q = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
> >>+   if (!q)
> >>+   return -1;
> >>+
> >>+   /* If (list needs initializing) do it here */
> >
> >This initializing should not be here, because it's not only for this
> >drhd. It should be done in init_dmars().
> >
> Yes you are right. Though the variable domain_values_list will not be
> used in next version, I think I need to check if there are any other
> similar problems.
> 
> >>+   if (!domain_values_list) {
> >>+   domain_values_list =
> >>+kcalloc(g_num_of_iommus, sizeof(struct list_head),
> >>+   GFP_KERNEL);
> >>+
> >>+   if (!domain_values_list) {
> >>+   pr_err("Allocation failed for domain_values_list 
> >>array\n");
> >>+   return -ENOMEM;
> >>+   }
> >>+   for (i = 0; i < g_num_of_iommus; i++)
> >>+   INIT_LIST_HEAD(&domain_values_list[i]);
> >>+   }
> >>+
> >>+   spin_lock_irqsave(&iommu->lock, flags);
> >>+
> >>+   /* Load the root-entry table from the old kernel
> >>+* foreach context_entry_table in root_entry
> >>+*foreach context_entry in context_entry_table
> >>+*   foreach level-1 page_table_entry in context_entry
> >>+*  foreach level-2 page_table_entry in level 1 page_table_entry
> >>+* Above pattern continues up to 6 levels of page tables
> >>+*Sanity-check the entry
> >>+*Process the bus, devfn, page_address, page_size
> >>+*/
> >>+   if (!iommu->root_entry) {
> >>+   iommu->root_entry =
> >>+   (struct root_entry *)alloc_pgtable_page(iommu->node);
> >>+   if (!iommu->root_entry) {
> >>+   spin_unlock_irqrestore(&iommu->lock, flags);
> >>+   return -ENOMEM;
> >>+   }
> >>+   }
> >>+
> >>+   iommu->root_entry_old_phys = q & VTD_PAGE_MASK;
> >>+   if (!iommu->root_entry_old_phys) {
> >>+   pr_err("Could not read old root entry address.");
> >>+   return -1;
> >>+   }
> >>+
> >>+   iommu->root_entry_old_virt = ioremap_cache(iommu->root_entry_old_phys,
> >>+   VTD_PAGE_SIZE);
> >>+   if (!iommu->root_entry_old_virt) {
> >>+   pr_err("Could not map the old root entry.");
> >>+   return -ENOMEM;
> >>+   }
> >>+
> >>+   __iommu_load_old_root_entry(iommu);
> >>+   ret = copy_root_entry_table(iommu, ppap);
> >>+   __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
> >>+   __iommu_update_old_root_entry(iommu, -1);
> >>+
> >>+   spin_unlock_irqrestore(&iommu->lock, flags);
> >>+
> >>+   __iommu_free_mapped_mem();
> >>+
> >>+   if (ret)
> >>+   return ret;
> >>+
> >>+   ppa_parms.last = 1;
> >>+   copy_page_addr(0, 0, 0, 0, 0, NULL, ppap);
> >>+
> >>+   return 0;
> >>+}
> >>+
> >>  #endif /* CONFIG_CRASH_DUMP */
> >>--
> >>2.0.0-rc0
> >>
> >>
> >>___
> >>kexec mailing list
> >>ke...@lists.infradead.org
> >>http://lists.infradead.org/mailman/listinfo/kexec
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel

2015-04-04 Thread Baoquan He
On 04/03/15 at 05:21pm, Dave Young wrote:
> On 04/03/15 at 05:01pm, Li, ZhenHua wrote:
> > Hi Dave,
> > 
> > There may be some possibilities that the old iommu data is corrupted by
> > some other modules. Currently we do not have a better solution for the
> > dmar faults.
> > 
> > But I think when this happens, we need to fix the module that corrupted
> > the old iommu data. I once met a similar problem in normal kernel, the
> > queue used by the qi_* functions was written again by another module.
> > The fix was in that module, not in iommu module.
> 
> It is too late, there will be no chance to save vmcore then.
> 
> Also if it is possible to continue corrupt other area of oldmem because
> of using old iommu tables then it will cause more problems.
> 
> So I think the tables at least need some verifycation before being used.
> 

Yes, it's a good thinking anout this and verification is also an
interesting idea. kexec/kdump do a sha256 calculation on loaded kernel
and then verify this again when panic happens in purgatory. This checks
whether any code stomps into region reserved for kexec/kernel and corrupt
the loaded kernel.

If this is decided to do it should be an enhancement to current
patchset but not a approach change. Since this patchset is going very
close to point as maintainers expected maybe this can be merged firstly,
then think about enhancement. After all without this patchset vt-d often
raised error message, hung.

By the way I tested this patchset it works very well on my HP z420 work
station.

Thanks
Baoquan
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v10 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel

2015-04-24 Thread Baoquan He
On 04/15/15 at 02:48pm, Dave Young wrote:
> On 04/15/15 at 01:47pm, Li, ZhenHua wrote:
> > On 04/15/2015 08:57 AM, Dave Young wrote:
> > >Again, I think it is bad to use old page table, below issues need consider:
> > >1) make sure old page table are reliable across crash
> > >2) do not allow writing oldmem after crash
> > >
> > >Please correct me if I'm wrong, or if above is not doable I think I will 
> > >vote for
> > >resetting pci bus.
> > >
> > >Thanks
> > >Dave
> > >
> > Hi Dave,
> > 
> > When updating the context tables, we have to write their address to root
> > tables, this will cause writing to old mem.
> > 
> > Resetting the pci bus has been discussed, please check this:
> > http://lists.infradead.org/pipermail/kexec/2014-October/012752.html
> > https://lkml.org/lkml/2014/10/21/890

I support this patchset.

We should not fear oldmem since reserved crashkernel region is similar.
No one can guarantee that any crazy code won't step into crashkernel
region just because 1st kernel says it's reversed for kdump kernel. Here
the root table and context tables are also not built to allow legal code
to danamge. Both of them has the risk to be corrupted, for trying our
best to get a dumped vmcore the risk is worth being taken.

And the resetting pci way has been NACKed by David Woodhouse, the
maintainer of intel iommu. Because the place calling the resetting pci
code is ugly before kdump kernel or in kdump kernel. And as he said a
certain device made mistakes why we blame on all devices. We should fix
that device who made mistakes. 

As for me, periodically poked by customers to ask how iommu fix is
going, I really think this patchset is good enough. Aren't we going to
do thing just because there's a risk with tiny possibility or not perfect
enough. I think people won't agree. Otherwise kdump could have been
killed when author proposed it since crashkernel reserved region is
risky and could be corrupted by 1st kernel.

Anyway, let's comprimise a little. At worst it can be reverted if it's
not satisfactory.

Personal opinion.

By the way, I tested it and it works well on my HP z420 workstation.

Thanks
Baoquan


> 
> I know one reason to use old pgtable is this looks better because it fixes the
> real problem, but it is not a good way if it introduce more problems because 
> of
> it have to use oldmem. I will be glad if this is not a problem but I have not
> been convinced.
> 
> OTOH, there's many types of iommu, intel, amd, a lot of other types. They need
> their own fixes, so it looks not that elegant.
> 
> For pci reset, it is not perfect, but it has another advantage, the patch is
> simpler. The problem I see from the old discusssion is, reset bus in 2nd 
> kernel
> is acceptable but it does not fix things on sparc platform. AFAIK current 
> reported
> problems are intel and amd iommu, at least pci reset stuff does not make it 
> worse.
> 
> Thanks
> Dave
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v10 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel

2015-04-24 Thread Baoquan He
On 04/24/15 at 04:25pm, Dave Young wrote:
> Hi, Baoquan
> 
> > I support this patchset.
> > 
> > We should not fear oldmem since reserved crashkernel region is similar.
> > No one can guarantee that any crazy code won't step into crashkernel
> > region just because 1st kernel says it's reversed for kdump kernel. Here
> > the root table and context tables are also not built to allow legal code
> > to danamge. Both of them has the risk to be corrupted, for trying our
> > best to get a dumped vmcore the risk is worth being taken.
> 
> old mem is mapped in 1st kernel so compare with the reserved crashkernel
> they are more likely to be corrupted. they are totally different. 

Could you tell how and why they are different? Wrong code will choose
root tables and context tables to danamge when they totally lose
control?

> 
> > 
> > And the resetting pci way has been NACKed by David Woodhouse, the
> > maintainer of intel iommu. Because the place calling the resetting pci
> > code is ugly before kdump kernel or in kdump kernel. And as he said a
> > certain device made mistakes why we blame on all devices. We should fix
> > that device who made mistakes. 
> 
> Resetting pci bus is not ugly than fixing a problem with risk and to fix
> the problem it introduced in the future.

There's a problem, we fix the problem. If that's uglier, I need redefine
the 'ugly' in my personal dict. You mean the problem it could introduce
is wrong code will damage root table and context tables, why don't we
fix that wrong code, but blame innocent context tables? So you mean
these tables should deserve being damaged by wrong code?

> 
> I know it is late to speak out, but sorry I still object and have to NACK this
> oldmem approach from my point.
> 
> Thanks
> Dave
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v10 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel

2015-04-28 Thread Baoquan He
On 04/24/15 at 04:49pm, Dave Young wrote:
> On 04/24/15 at 04:35pm, Baoquan He wrote:
> > On 04/24/15 at 04:25pm, Dave Young wrote:
> > > Hi, Baoquan
> > > 
> > > > I support this patchset.
> > > > 
> > > > We should not fear oldmem since reserved crashkernel region is similar.
> > > > No one can guarantee that any crazy code won't step into crashkernel
> > > > region just because 1st kernel says it's reversed for kdump kernel. Here
> > > > the root table and context tables are also not built to allow legal code
> > > > to danamge. Both of them has the risk to be corrupted, for trying our
> > > > best to get a dumped vmcore the risk is worth being taken.
> > > 
> > > old mem is mapped in 1st kernel so compare with the reserved crashkernel
> > > they are more likely to be corrupted. they are totally different. 
> > 
> > Could you tell how and why they are different? Wrong code will choose
> > root tables and context tables to danamge when they totally lose
> > control?
> 
> iommu will map io address to system ram, right? not to reserved ram, but
> yes I'm assuming the page table is right, but I was worrying they are 
> corrupted
> while kernel panic is happening.

OK, I think we may need to think more about the old context tables
reuse. Currently dmar faults will cause error or warning message,
occasionally will cause system with iommu hang in kdump kernel. I don't
know what will happen if old root tables or context tables are corrupted
by evil code. For kdump kernel which use the similar mechanism there's a
verification. When load kdump kernel into reserved crashkernel region a
sha256 sum is calculated, then verify it when jump into kdump kernel
after panic. If corrupted context tables will bring worse result, then
we need consider giving it up and change back to the old way and try
to dump though there's error message.

Hi Zhenhua,

I don't know what's your plan about verification whether old root tables
or old context tables are corrupted. Or have you experimented that what
will happen if old tables are corrupted on purpose.

I am fine if you just put this in a TODO list since that's truly in a
rare case. But it maybe necessary to tell it in patch log.

Thanks
Baoquan

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch v4 1/9] iommu/amd: clean up the cmpxchg64 invocation

2016-05-24 Thread Baoquan He
Change it as it's designed for and keep it consistent with other
places.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 5efadad..9ec7cad 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1287,7 +1287,8 @@ static u64 *alloc_pte(struct protection_domain *domain,
 
__npte = PM_LEVEL_PDE(level, virt_to_phys(page));
 
-   if (cmpxchg64(pte, __pte, __npte)) {
+   /* pte could have been changed somewhere. */
+   if (cmpxchg64(pte, __pte, __npte) != __pte) {
free_page((unsigned long)page);
continue;
}
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch v4 0/9] *** Fix kdump failure in system with amd iommu***

2016-05-24 Thread Baoquan He
Hi Joerg,

Recently I have time to continue the work of fixing AMD IOMMU faults in
kdump kernel. The situation is I tried to make change at the time point
as Intel iommu has done, but still Ethernet NIC will trigger the printing
of IO_PAGE_FAULT. I got 2 machines with AMD IOMMU v1 and v2 separately,
the test result is similar, always there are IO_PAGE_FAULT message for
Ethernet network card. I have no idea why it happened though I did it
just like Intel IOMMU did.

In this v4 change, I just made changes as follows:

1) Several clean up patches when I reviewed AMD IOMMU code.
2) Detect if it's pre-enabled in kdump kernel.
3) Copy dev tables in kdump kernel. Since dev table is per device we just
   need to copy the content in dev table, the io page table pointer and
   irq table pointer are contained in each dev table entry.
4) Reserved the domain id which has been allocated in 1st kernel
5) Do not re-enable or re-init the dev table entry bits or control bit
   of AMD IOMMU.

Post this to mailing list, hope it can be figured out which need be further
changed.

v3->v4:
1)Define several PTE/DTE bits MACRO definition according to Zongshun's
  comments.
2)Learned the implementation of vt-d fix done by Joerg and did the AMD
  IOMMU change similiarly.

Baoquan HE (4):
  iommu/amd: add early_enable_iommu() helper function
  iommu/amd: copy old trans table from old kernel
  iommu/amd: Do not initialize dev tables again in kdump
  iommu/amd: Check the validation of irq table and domain id

Baoquan He (5):
  iommu/amd: clean up the cmpxchg64 invocation
  iommu/amd: Use standard bitmap operation to set bitmap
  iommu/amd: Detect pre enabled translation
  iommu/amd: Define bit fields for DTE particularly
  iommu/amd: Add function copy_dev_tables

 drivers/iommu/amd_iommu.c   |  21 +++
 drivers/iommu/amd_iommu_init.c  | 136 +++-
 drivers/iommu/amd_iommu_types.h |  29 +++--
 3 files changed, 156 insertions(+), 30 deletions(-)

-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch v4 3/9] iommu/amd: Detect pre enabled translation

2016-05-24 Thread Baoquan He
Add functions to check whether translation is already enabled in IOMMU.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c  | 25 +
 drivers/iommu/amd_iommu_types.h |  4 
 2 files changed, 29 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 8361367d..9e1dfcb 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -229,6 +229,26 @@ static int amd_iommu_enable_interrupts(void);
 static int __init iommu_go_to_state(enum iommu_init_state state);
 static void init_device_table_dma(void);
 
+
+static bool translation_pre_enabled(struct amd_iommu *iommu)
+{
+   return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
+}
+
+static void clear_translation_pre_enabled(struct amd_iommu *iommu)
+{
+iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
+static void init_translation_status(struct amd_iommu *iommu)
+{
+   u32 ctrl;
+
+   ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+   if (ctrl & (1<flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
 static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
u8 bank, u8 cntr, u8 fxn,
u64 *value, bool is_write);
@@ -1101,6 +1121,11 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
 
iommu->int_enabled = false;
 
+   init_translation_status(iommu);
+
+   if (translation_pre_enabled())
+   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+
ret = init_iommu_from_acpi(iommu, h);
if (ret)
return ret;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 9d32b20..01783cc 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -384,6 +384,7 @@ extern struct kmem_cache *amd_iommu_irq_cache;
 #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
 
 
+
 /*
  * This struct is used to pass information about
  * incoming PPR faults around.
@@ -401,6 +402,8 @@ struct amd_iommu_fault {
 struct iommu_domain;
 struct irq_domain;
 
+#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED  (1 << 0)
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -525,6 +528,7 @@ struct amd_iommu {
struct irq_domain *ir_domain;
struct irq_domain *msi_domain;
 #endif
+   u32 flags;
 };
 
 struct devid_map {
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch v4 2/9] iommu/amd: Use standard bitmap operation to set bitmap

2016-05-24 Thread Baoquan He
It will be more readable then the old setting.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c  | 2 +-
 drivers/iommu/amd_iommu_init.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 9ec7cad..cc636e6 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2016,7 +2016,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
 * mark the first page as allocated so we never return 0 as
 * a valid dma-address. So we can use 0 as error value
 */
-   dma_dom->aperture[0]->bitmap[0] = 1;
+   __set_bit(0, dma_dom->aperture[0]->bitmap);
 
for_each_possible_cpu(cpu)
*per_cpu_ptr(dma_dom->next_index, cpu) = 0;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index bf4959f..8361367d 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1908,8 +1909,7 @@ static int __init early_amd_iommu_init(void)
 * never allocate domain 0 because its used as the non-allocated and
 * error value placeholder
 */
-   amd_iommu_pd_alloc_bitmap[0] = 1;
-
+__set_bit(0, amd_iommu_pd_alloc_bitmap);
spin_lock_init(&amd_iommu_pd_lock);
 
/*
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch v4 5/9] iommu/amd: Define bit fields for DTE particularly

2016-05-24 Thread Baoquan He
In amd-vi spec several bits of IO PTE fields and DTE fields are similar
so that both of them can share the same MACRO definition. However
defining their respecitve bit fields can make code more read-able. So
do it in this patch.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   | 10 +-
 drivers/iommu/amd_iommu_types.h | 19 +++
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index cc636e6..1c916cc 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1401,9 +1401,9 @@ static int iommu_map_page(struct protection_domain *dom,
 
if (count > 1) {
__pte = PAGE_SIZE_PTE(phys_addr, page_size);
-   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
} else
-   __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte = phys_addr | IOMMU_PTE_PR | IOMMU_PTE_FC;
 
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
@@ -2048,7 +2048,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
-   pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
+   pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
flags = amd_iommu_dev_table[devid].data[1];
 
@@ -2091,7 +2091,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 static void clear_dte_entry(u16 devid)
 {
/* remove entry from the device table seen by the hardware */
-   amd_iommu_dev_table[devid].data[0]  = IOMMU_PTE_P | IOMMU_PTE_TV;
+   amd_iommu_dev_table[devid].data[0]  = DTE_FLAG_V | DTE_FLAG_TV;
amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK;
 
amd_iommu_apply_erratum_63(devid);
@@ -2533,7 +2533,7 @@ static dma_addr_t dma_ops_domain_map(struct 
dma_ops_domain *dom,
if (!pte)
return DMA_ERROR_CODE;
 
-   __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte = paddr | IOMMU_PTE_PR | IOMMU_PTE_FC;
 
if (direction == DMA_TO_DEVICE)
__pte |= IOMMU_PTE_IR;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 01783cc..7796edf 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -239,7 +239,7 @@
 #define PM_LEVEL_INDEX(x, a)   (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
 #define PM_LEVEL_ENC(x)(((x) << 9) & 0xe00ULL)
 #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \
-IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
+IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW)
 #define PM_PTE_LEVEL(pte)  (((pte) >> 9) & 0x7ULL)
 
 #define PM_MAP_4k  0
@@ -288,13 +288,24 @@
 #define PTE_LEVEL_PAGE_SIZE(level) \
(1ULL << (12 + (9 * (level
 
-#define IOMMU_PTE_P  (1ULL << 0)
-#define IOMMU_PTE_TV (1ULL << 1)
+/*
+ * Bit value definition for I/O PTE fields
+ */
+#define IOMMU_PTE_PR (1ULL << 0)
 #define IOMMU_PTE_U  (1ULL << 59)
 #define IOMMU_PTE_FC (1ULL << 60)
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
+
+/*
+ * Bit value definition for DTE fields
+ */
+#define DTE_FLAG_V  (1ULL << 0)
+#define DTE_FLAG_TV (1ULL << 1)
+#define DTE_FLAG_IR (1ULL << 61)
+#define DTE_FLAG_IW (1ULL << 62)
+
 #define DTE_FLAG_IOTLB (1ULL << 32)
 #define DTE_FLAG_GV(1ULL << 55)
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
@@ -316,7 +327,7 @@
 #define GCR3_VALID 0x01ULL
 
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
-#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
+#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
 #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
 #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch v4 6/9] iommu/amd: Add function copy_dev_tables

2016-05-24 Thread Baoquan He
Add function copy_dev_tables to copy old DTE of the 1st kernel to
the new DTE table. Since all iommu share the same DTE table the
copy only need be done once as long as the physical address of
old DTE table is retrieved from iommu reg. Besides the old domain
id occupied in 1st kernel need be reserved in order to avoid touch
the old translation tables.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  2 +-
 drivers/iommu/amd_iommu_init.c  | 38 ++
 drivers/iommu/amd_iommu_types.h |  1 +
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 1c916cc..f3bd7fd 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2081,7 +2081,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
flags|= tmp;
}
 
-   flags &= ~(0xUL);
+   flags &= ~DEV_DOMID_MASK;
flags |= domain->id;
 
amd_iommu_dev_table[devid].data[1]  = flags;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 9c1aa54..71c7ac9 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -664,6 +664,44 @@ static int get_dev_entry_bit(u16 devid, u8 bit)
 }
 
 
+static int copy_dev_tables(void)
+{
+   u64 entry;
+   u32 lo, hi, devid;
+   phys_addr_t old_devtb_phys;
+   struct dev_table_entry *old_devtb;
+   u16 dom_id, dte_v;
+   struct amd_iommu *iommu;
+   static int copied;
+
+for_each_iommu(iommu) {
+   if (!translation_pre_enabled()) {
+   pr_err("IOMMU:%d is not pre-enabled!/n", iommu->index);
+   return -1;
+   }
+
+   if (copied)
+   continue;
+
+lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
+hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
+entry = (((u64) hi) << 32) + lo;
+old_devtb_phys = entry & PAGE_MASK;
+old_devtb = memremap(old_devtb_phys, dev_table_size, 
MEMREMAP_WB);
+for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
+amd_iommu_dev_table[devid] = old_devtb[devid];
+dom_id = amd_iommu_dev_table[devid].data[1] & 
DEV_DOMID_MASK;
+   dte_v = amd_iommu_dev_table[devid].data[0] & DTE_FLAG_V;
+   if (!dte_v)
+   continue;
+__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+}
+   memunmap(old_devtb);
+   copied = 1;
+}
+   return 0;
+}
+
 void amd_iommu_apply_erratum_63(u16 devid)
 {
int sysmgt;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 7796edf..34acd73 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -311,6 +311,7 @@
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
 #define DTE_GLX_SHIFT  (56)
 #define DTE_GLX_MASK   (3)
+#define DEV_DOMID_MASK 0xULL
 
 #define DTE_GCR3_VAL_A(x)  (((x) >> 12) & 0x7ULL)
 #define DTE_GCR3_VAL_B(x)  (((x) >> 15) & 0x0ULL)
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch v4 8/9] iommu/amd: Do not initialize dev tables again in kdump

2016-05-24 Thread Baoquan He
From: Baoquan HE 

The init should have been done in normal kernel, skip it in kdump
kernel. And clean up the function comments.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 66a1fa5..47e5972 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1432,7 +1432,11 @@ static int __init amd_iommu_init_pci(void)
break;
}
 
-   init_device_table_dma();
+   for_each_iommu(iommu) {
+   if ( !translation_pre_enabled(iommu) )
+   init_device_table_dma();
+   break;
+   }
 
for_each_iommu(iommu)
iommu_flush_all_caches(iommu);
@@ -1612,8 +1616,7 @@ static int __init init_memory_definitions(struct 
acpi_table_header *table)
 }
 
 /*
- * Init the device table to not allow DMA access for devices and
- * suppress all page faults
+ * Init the device table to not allow DMA access for devices.
  */
 static void init_device_table_dma(void)
 {
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch v4 4/9] iommu/amd: add early_enable_iommu() helper function

2016-05-24 Thread Baoquan He
From: Baoquan HE 

This can make later kdump change easier.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 24 ++--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 9e1dfcb..9c1aa54 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1673,6 +1673,18 @@ static void iommu_apply_resume_quirks(struct amd_iommu 
*iommu)
   iommu->stored_addr_lo | 1);
 }
 
+static void early_enable_iommu(struct amd_iommu *iommu)
+{
+   iommu_disable(iommu);
+   iommu_init_flags(iommu);
+   iommu_set_device_table(iommu);
+   iommu_enable_command_buffer(iommu);
+   iommu_enable_event_buffer(iommu);
+   iommu_set_exclusion_range(iommu);
+   iommu_enable(iommu);
+   iommu_flush_all_caches(iommu);
+}
+
 /*
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
@@ -1681,16 +1693,8 @@ static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
 
-   for_each_iommu(iommu) {
-   iommu_disable(iommu);
-   iommu_init_flags(iommu);
-   iommu_set_device_table(iommu);
-   iommu_enable_command_buffer(iommu);
-   iommu_enable_event_buffer(iommu);
-   iommu_set_exclusion_range(iommu);
-   iommu_enable(iommu);
-   iommu_flush_all_caches(iommu);
-   }
+   for_each_iommu(iommu)
+   early_enable_iommu(iommu);
 }
 
 static void enable_iommus_v2(void)
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch v4 7/9] iommu/amd: copy old trans table from old kernel

2016-05-24 Thread Baoquan He
From: Baoquan HE 

Here several things need be done:
1) Initialize amd_iommu_dev_table because it was set several times
   since kdump kernel reboot. We don't need the set because we will
   copy the content from old kernel.
2) Re-enable event/cmd buffer
3) Install the DTE table to reg
4) Flush all caches

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 47 +-
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 71c7ac9..66a1fa5 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -35,7 +35,7 @@
 #include 
 #include 
 #include 
-
+#include 
 #include "amd_iommu_proto.h"
 #include "amd_iommu_types.h"
 #include "irq_remapping.h"
@@ -675,7 +675,7 @@ static int copy_dev_tables(void)
static int copied;
 
 for_each_iommu(iommu) {
-   if (!translation_pre_enabled()) {
+   if (!translation_pre_enabled(iommu)) {
pr_err("IOMMU:%d is not pre-enabled!/n", iommu->index);
return -1;
}
@@ -1160,8 +1160,13 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
iommu->int_enabled = false;
 
init_translation_status(iommu);
+   if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
+clear_translation_pre_enabled(iommu);
+pr_warn("Translation was enabled for IOMMU:%d but we 
are not in kdump mode\n",
+iommu->index);
+}
 
-   if (translation_pre_enabled())
+   if (translation_pre_enabled(iommu))
pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
 
ret = init_iommu_from_acpi(iommu, h);
@@ -1730,9 +1735,41 @@ static void early_enable_iommu(struct amd_iommu *iommu)
 static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
+   bool is_pre_enabled=false;
 
-   for_each_iommu(iommu)
-   early_enable_iommu(iommu);
+   for_each_iommu(iommu) {
+   if ( translation_pre_enabled(iommu) ) {
+   is_pre_enabled = true;
+   break;
+   }
+   }
+
+   if ( !is_pre_enabled) {
+   for_each_iommu(iommu)
+   early_enable_iommu(iommu);
+   } else {
+   if (copy_dev_tables()) {
+   pr_err("Failed to copy translation tables from previous 
kernel.\n");
+   /*
+* If failed to copy dev tables from old kernel, 
continue to proceed
+* as it does in normal kernel.
+*/
+   for_each_iommu(iommu) {
+   clear_translation_pre_enabled(iommu);
+   early_enable_iommu(iommu);
+   }
+   } else {
+   pr_info("Copied translation tables from previous 
kernel.\n");
+   for_each_iommu(iommu) {
+   iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
+   iommu_feature_disable(iommu, 
CONTROL_EVT_LOG_EN);
+   iommu_enable_command_buffer(iommu);
+   iommu_enable_event_buffer(iommu);
+   iommu_set_device_table(iommu);
+   iommu_flush_all_caches(iommu);
+   }
+   }
+   }
 }
 
 static void enable_iommus_v2(void)
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch v4 9/9] iommu/amd: Check the validation of irq table and domain id

2016-05-24 Thread Baoquan He
From: Baoquan HE 

If not valid just skip reserving the old domain id.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   | 4 
 drivers/iommu/amd_iommu_init.c  | 5 +++--
 drivers/iommu/amd_iommu_types.h | 5 +
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index f3bd7fd..40c4a05 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3697,10 +3697,6 @@ struct amd_ir_data {
 
 static struct irq_chip amd_ir_chip;
 
-#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
-#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
-#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
-#define DTE_IRQ_REMAP_ENABLE1ULL
 
 static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
 {
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 47e5972..263704a 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -670,7 +670,7 @@ static int copy_dev_tables(void)
u32 lo, hi, devid;
phys_addr_t old_devtb_phys;
struct dev_table_entry *old_devtb;
-   u16 dom_id, dte_v;
+   u16 dom_id, dte_v, irq_v;
struct amd_iommu *iommu;
static int copied;
 
@@ -692,7 +692,8 @@ static int copy_dev_tables(void)
 amd_iommu_dev_table[devid] = old_devtb[devid];
 dom_id = amd_iommu_dev_table[devid].data[1] & 
DEV_DOMID_MASK;
dte_v = amd_iommu_dev_table[devid].data[0] & DTE_FLAG_V;
-   if (!dte_v)
+   irq_v = amd_iommu_dev_table[devid].data[2] & 
DTE_IRQ_REMAP_ENABLE;
+   if (!dte_v || !irq_v || !dom_id)
continue;
 __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
 }
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 34acd73..08340f5 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -224,6 +224,11 @@
 
 #define PPR_REQ_FAULT  0x01
 
+#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
+#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
+#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
+#define DTE_IRQ_REMAP_ENABLE1ULL
+
 #define PAGE_MODE_NONE0x00
 #define PAGE_MODE_1_LEVEL 0x01
 #define PAGE_MODE_2_LEVEL 0x02
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Patch v4 0/9] *** Fix kdump failure in system with amd iommu***

2016-05-25 Thread Baoquan He
Hi Joerg,

Attachments are console log of normal kernel and kdump kernel on a test
machine at my hand, and the related information of lspci -vt and lspci
-vvv. Before I tried to defer the calling of set_dte_entry() until the
device driver try to call __map_single() to really allocate coherent
memory or do the mapping, seems it didn't work. I am surprised by the
simplicity and effectiveness of Intel IOMMU fix, but can't think of
where I have missed. 




-[:00]-+-00.0  Advanced Micro Devices, Inc. [AMD] Family 15h (Models 
30h-3fh) Processor Root Complex
   +-00.2  Advanced Micro Devices, Inc. [AMD] Family 15h (Models 
30h-3fh) I/O Memory Management Unit
   +-01.0  Advanced Micro Devices, Inc. [AMD/ATI] Kaveri [Radeon R7 
Graphics]
   +-01.1  Advanced Micro Devices, Inc. [AMD/ATI] Kaveri HDMI/DP Audio 
Controller
   +-02.0  Advanced Micro Devices, Inc. [AMD] Device 1424
   +-03.0  Advanced Micro Devices, Inc. [AMD] Device 1424
   +-03.1-[01]00.0  Realtek Semiconductor Co., Ltd. 
RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller
   +-04.0  Advanced Micro Devices, Inc. [AMD] Device 1424
   +-10.0  Advanced Micro Devices, Inc. [AMD] FCH USB XHCI Controller
   +-10.1  Advanced Micro Devices, Inc. [AMD] FCH USB XHCI Controller
   +-11.0  Advanced Micro Devices, Inc. [AMD] FCH SATA Controller [IDE 
mode]
   +-12.0  Advanced Micro Devices, Inc. [AMD] FCH USB OHCI Controller
   +-12.2  Advanced Micro Devices, Inc. [AMD] FCH USB EHCI Controller
   +-13.0  Advanced Micro Devices, Inc. [AMD] FCH USB OHCI Controller
   +-13.2  Advanced Micro Devices, Inc. [AMD] FCH USB EHCI Controller
   +-14.0  Advanced Micro Devices, Inc. [AMD] FCH SMBus Controller
   +-14.1  Advanced Micro Devices, Inc. [AMD] FCH IDE Controller
   +-14.2  Advanced Micro Devices, Inc. [AMD] FCH Azalia Controller
   +-14.3  Advanced Micro Devices, Inc. [AMD] FCH LPC Bridge
   +-14.4-[02]--
   +-14.5  Advanced Micro Devices, Inc. [AMD] FCH USB OHCI Controller
   +-18.0  Advanced Micro Devices, Inc. [AMD] Family 15h (Models 
30h-3fh) Processor Function 0
   +-18.1  Advanced Micro Devices, Inc. [AMD] Family 15h (Models 
30h-3fh) Processor Function 1
   +-18.2  Advanced Micro Devices, Inc. [AMD] Family 15h (Models 
30h-3fh) Processor Function 2
   +-18.3  Advanced Micro Devices, Inc. [AMD] Family 15h (Models 
30h-3fh) Processor Function 3
   +-18.4  Advanced Micro Devices, Inc. [AMD] Family 15h (Models 
30h-3fh) Processor Function 4
   \-18.5  Advanced Micro Devices, Inc. [AMD] Family 15h (Models 
30h-3fh) Processor Function 5
[0.00] Linux version 4.6.0-rc7+ (b...@dhcp-129-10.nay.redhat.com) (gcc 
version 5.1.1 20150618 (Red Hat 5.1.1-4) (GCC) ) #34 SMP Tue May 24 17:6
[0.00] Command line: BOOT_IMAGE=/vmlinuz-4.6.0-rc7+ 
root=/dev/mapper/fedora_dhcp--129--10-root ro rd.lvm.lv=fedora_dhcp-129-10/root 
rd.lvm.lvl
[0.00] x86/fpu: xstate_offset[2]:  576, xstate_sizes[2]:  256
[0.00] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point 
registers'
[0.00] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers'
[0.00] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers'
[0.00] x86/fpu: Enabled xstate features 0x7, context size is 832 bytes, 
using 'standard' format.
[0.00] x86/fpu: Using 'eager' FPU context switches.
[0.00] e820: BIOS-provided physical RAM map:
[0.00] BIOS-e820: [mem 0x-0x0009fbff] usable
[0.00] BIOS-e820: [mem 0x0009fc00-0x0009] reserved
[0.00] BIOS-e820: [mem 0x000e-0x000f] reserved
[0.00] BIOS-e820: [mem 0x0010-0x7cc99fff] usable
[0.00] BIOS-e820: [mem 0x7cc9a000-0x7ccc9fff] reserved
[0.00] BIOS-e820: [mem 0x7ccca000-0x7cf9] usable
[0.00] BIOS-e820: [mem 0x7cfa-0x7d06dfff] ACPI NVS
[0.00] BIOS-e820: [mem 0x7d06e000-0x7e1c7fff] reserved
[0.00] BIOS-e820: [mem 0x7e1c8000-0x7e1c8fff] usable
[0.00] BIOS-e820: [mem 0x7e1c9000-0x7e3cefff] ACPI NVS
[0.00] BIOS-e820: [mem 0x7e3cf000-0x7e850fff] usable
[0.00] BIOS-e820: [mem 0x7e851000-0x7efe1fff] reserved
[0.00] BIOS-e820: [mem 0x7efe2000-0x7eff] usable
[0.00] BIOS-e820: [mem 0xfec0-0xfec01fff] reserved
[0.00] BIOS-e820: [mem 0xfec1-0xfec10fff] reserved
[0.00] BIOS-e820: [mem 0xfed0-0xfed00fff] reserved
[0.00] BIOS-e820: [mem 0xfed4-0xfed44fff] reserved
[0.00] BIOS-e820: [mem 0xfed8-0xfed8] reserved
[0.00] 

Re: [Patch v4 0/9] *** Fix kdump failure in system with amd iommu***

2016-05-25 Thread Baoquan He
Sorry, log of 'lspci -vvv' is not attatched correclty. Re-attach it here.

00:00.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Family 15h (Models 
30h-3fh) Processor Root Complex
Subsystem: Advanced Micro Devices, Inc. [AMD] Family 15h (Models 
30h-3fh) Processor Root Complex
Control: I/O- Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx-
Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- TAbort- SERR- 

00:01.0 VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI] 
Kaveri [Radeon R7 Graphics] (prog-if 00 [VGA controller])
Subsystem: Advanced Micro Devices, Inc. [AMD/ATI] Device 0123
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- 
Kernel driver in use: radeon
Kernel modules: radeon

00:01.1 Audio device: Advanced Micro Devices, Inc. [AMD/ATI] Kaveri HDMI/DP 
Audio Controller
Subsystem: Advanced Micro Devices, Inc. [AMD/ATI] Device 0123
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- 
Kernel driver in use: snd_hda_intel
Kernel modules: snd_hda_intel

00:02.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Device 1424
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx-
Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- TAbort- SERR- TAbort- SERR- TAbort- Reset- FastB2B-
PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
Capabilities: 
Kernel driver in use: pcieport
Kernel modules: shpchp

00:04.0 Host bridge: Advanced Micro Devices, Inc. [AMD] Device 1424
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx-
Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- TAbort- SERR- 
Kernel driver in use: xhci_hcd

00:10.1 USB controller: Advanced Micro Devices, Inc. [AMD] FCH USB XHCI 
Controller (rev 09) (prog-if 30 [XHCI])
Subsystem: Gigabyte Technology Co., Ltd Device 5004
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- 
Kernel driver in use: xhci_hcd

00:11.0 SATA controller: Advanced Micro Devices, Inc. [AMD] FCH SATA Controller 
[IDE mode] (rev 40) (prog-if 01 [AHCI 1.0])
Subsystem: Gigabyte Technology Co., Ltd Device b002
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz+ UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- 
SERR- 
Kernel driver in use: ahci

00:12.0 USB controller: Advanced Micro Devices, Inc. [AMD] FCH USB OHCI 
Controller (rev 11) (prog-if 10 [OHCI])
Subsystem: Gigabyte Technology Co., Ltd Device 5004
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx-
Status: Cap- 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- 
SERR- TAbort- 
SERR- 
Kernel driver in use: ehci-pci

00:13.0 USB controller: Advanced Micro Devices, Inc. [AMD] FCH USB OHCI 
Controller (rev 11) (prog-if 10 [OHCI])
Subsystem: Gigabyte Technology Co., Ltd Device 5004
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx-
Status: Cap- 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- 
SERR- TAbort- 
SERR- 
Kernel driver in use: ehci-pci

00:14.0 SMBus: Advanced Micro Devices, Inc. [AMD] FCH SMBus Controller (rev 16)
Subsystem: Advanced Micro Devices, Inc. [AMD] FCH SMBus Controller
Control: I/O+ Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx+
Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- 
SERR- TAbort- 
SERR- TAbort- SERR- 
Kernel driver in use: snd_hda_intel
Kernel modules: snd_hda_intel

00:14.3 ISA bridge: Advanced Micro Devices, Inc. [AMD] FCH LPC Bridge (rev 11)
Subsystem: Advanced Micro Devices, Inc. [AMD] FCH LPC Bridge
Control: I/O+ Mem+ BusMaster+ SpecCycle+ MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B- DisINTx-
Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- 
SERR- TAbort- 
SERR- TAbort- 
Reset- FastB2B-
PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-

00:14.5 USB controller: Advanced Micro Devices, Inc. [AMD] FCH USB OHCI 
Controller (rev 11) (prog-if 10 [OHCI])
Subsystem: Gigabyte Technology Co., Ltd Device 5004
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- 

Re: [Patch v4 3/9] iommu/amd: Detect pre enabled translation

2016-05-29 Thread Baoquan He
On 05/28/16 at 08:49pm, Wan Zongshun wrote:
> 
> 
>  Original Message 
> >@@ -1101,6 +1121,11 @@ static int __init init_iommu_one(struct amd_iommu 
> >*iommu, struct ivhd_header *h)
> >
> > iommu->int_enabled = false;
> >
> >+init_translation_status(iommu);
> >+
> >+if (translation_pre_enabled())
> >+pr_warn("Translation is already enabled - trying to copy 
> >translation structures\n");
> >+
> 
> You missed this 'iommu' parameter here, even I saw you fixed it in
> another patch, but please keep each patch to be meaningful.

Hi Zongshun,

Thanks for reviewing this patchset and great comments, will remember and
update with change.

Yes, translation_pre_enabled() in this patchset need a parameter "struct
amd_iommu*". 

In fact I am still debugging and trying to figure out what need be done
further to stop the IO_PAGE_FAULT happened on ethernet network card. I
kept changing code and adjust the patches. Up to now seems I still
didn't figure out why. There must be something I didn't notice and
everything will be fine as soon as I close that valve. With my
understand pci bug scanning will detect each pci device and do the
initialization job like setting configuration space registers and
control registers. After that we can safely re-init the pci device and
re-install the new io-page tables, and this is how Joerg has done for
vt-d fix for kdump if my understanding is correct. And I tried to do
like that in this patchset, don't know why it doesn't work.

As you know in previous post I thought the final initialization of
device should be done when its related driver probe and do the mapping
job, I tried re-install io-page tables at this time. Seems it didn't
work too. So I left that way.

Sorry for this rough post, will pay attention and make a formal post if
there's new update.

Thanks
Baoquan
> 
> > ret = init_iommu_from_acpi(iommu, h);
> > if (ret)
> > return ret;
> >diff --git a/drivers/iommu/amd_iommu_types.h 
> >b/drivers/iommu/amd_iommu_types.h
> >index 9d32b20..01783cc 100644
> >--- a/drivers/iommu/amd_iommu_types.h
> >+++ b/drivers/iommu/amd_iommu_types.h
> >@@ -384,6 +384,7 @@ extern struct kmem_cache *amd_iommu_irq_cache;
> > #define APERTURE_PAGE_INDEX(a)  (((a) >> 21) & 0x3fULL)
> >
> >
> >+
> > /*
> >  * This struct is used to pass information about
> >  * incoming PPR faults around.
> >@@ -401,6 +402,8 @@ struct amd_iommu_fault {
> > struct iommu_domain;
> > struct irq_domain;
> >
> >+#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED  (1 << 0)
> >+
> > /*
> >  * This structure contains generic data for  IOMMU protection domains
> >  * independent of their use.
> >@@ -525,6 +528,7 @@ struct amd_iommu {
> > struct irq_domain *ir_domain;
> > struct irq_domain *msi_domain;
> > #endif
> >+u32 flags;
> > };
> >
> > struct devid_map {
> >
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Patch v4 3/9] iommu/amd: Detect pre enabled translation

2016-05-29 Thread Baoquan He
On 05/30/16 at 11:24am, Baoquan He wrote:
> On 05/28/16 at 08:49pm, Wan Zongshun wrote:
> In fact I am still debugging and trying to figure out what need be done
> further to stop the IO_PAGE_FAULT happened on ethernet network card. I
> kept changing code and adjust the patches. Up to now seems I still
> didn't figure out why. There must be something I didn't notice and
> everything will be fine as soon as I close that valve. With my
> understand pci bug scanning will detect each pci device and do the
 |_ bus
> initialization job like setting configuration space registers and
> control registers. After that we can safely re-init the pci device and
> re-install the new io-page tables, and this is how Joerg has done for
> vt-d fix for kdump if my understanding is correct. And I tried to do
> like that in this patchset, don't know why it doesn't work.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Patch v4 6/9] iommu/amd: Add function copy_dev_tables

2016-05-29 Thread Baoquan He
On 05/28/16 at 09:08pm, Wan Zongshun wrote:
> >+static int copy_dev_tables(void)
> >+{
> >+u64 entry;
> >+u32 lo, hi, devid;
> >+phys_addr_t old_devtb_phys;
> >+struct dev_table_entry *old_devtb;
> >+u16 dom_id, dte_v;
> >+struct amd_iommu *iommu;
> >+static int copied;
> >+
> >+for_each_iommu(iommu) {
> >+if (!translation_pre_enabled()) {
> >+pr_err("IOMMU:%d is not pre-enabled!/n", iommu->index);
> >+return -1;
> >+}
> 
> If one iommu is not pre-enabled, all iommus will be exit the copy.

Currently amd iommu driver make all iommu-s share a single device table.
When handling this code, I am struggling to take what way to make this
look better. Say we have two iommus A and B on a system, A is detected
to be pre_enabled, but B is not, I didn't think of a good way to do. Any
suggestion?

> 
> >+
> >+if (copied)
> >+continue;
> >+
> >+lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
> >+hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
> >+entry = (((u64) hi) << 32) + lo;
> >+old_devtb_phys = entry & PAGE_MASK;
> >+old_devtb = memremap(old_devtb_phys, dev_table_size, 
> >MEMREMAP_WB);
> >+for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
> >+amd_iommu_dev_table[devid] = old_devtb[devid];
> >+dom_id = amd_iommu_dev_table[devid].data[1] & 
> >DEV_DOMID_MASK;
> >+dte_v = amd_iommu_dev_table[devid].data[0] & DTE_FLAG_V;
> >+if (!dte_v)
> >+continue;
> >+__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
> >+}
> >+memunmap(old_devtb);
> >+copied = 1;
> >+}
> >+return 0;
> >+}
> >+
> > void amd_iommu_apply_erratum_63(u16 devid)
> > {
> > int sysmgt;
> >diff --git a/drivers/iommu/amd_iommu_types.h 
> >b/drivers/iommu/amd_iommu_types.h
> >index 7796edf..34acd73 100644
> >--- a/drivers/iommu/amd_iommu_types.h
> >+++ b/drivers/iommu/amd_iommu_types.h
> >@@ -311,6 +311,7 @@
> > #define DTE_FLAG_MASK   (0x3ffULL << 32)
> > #define DTE_GLX_SHIFT   (56)
> > #define DTE_GLX_MASK(3)
> >+#define DEV_DOMID_MASK  0xULL
> >
> > #define DTE_GCR3_VAL_A(x)   (((x) >> 12) & 0x7ULL)
> > #define DTE_GCR3_VAL_B(x)   (((x) >> 15) & 0x0ULL)
> >
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Patch v4 9/9] iommu/amd: Check the validation of irq table and domain id

2016-05-29 Thread Baoquan He
On 05/28/16 at 09:30pm, Wan Zongshun wrote:
> 
> 
>  Original Message 
> >From: Baoquan HE 
> >diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> >index f3bd7fd..40c4a05 100644
> >--- a/drivers/iommu/amd_iommu.c
> >+++ b/drivers/iommu/amd_iommu.c
> >@@ -3697,10 +3697,6 @@ struct amd_ir_data {
> >
> > static struct irq_chip amd_ir_chip;
> >
> >-#define DTE_IRQ_PHYS_ADDR_MASK  (((1ULL << 45)-1) << 6)
> >-#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
> >-#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
> >-#define DTE_IRQ_REMAP_ENABLE1ULL
> 
> At least, you should give reason comments to why you want move it.
> 
> Any drivers files you want to use them as well?

Yes, sorry fot this. I have several test machines. I made change on
them and made patches. In the machine I posted this patch was not
arranged well.

> 
> >
> > static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
> > {
> >diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
> >index 47e5972..263704a 100644
> >--- a/drivers/iommu/amd_iommu_init.c
> >+++ b/drivers/iommu/amd_iommu_init.c
> >@@ -670,7 +670,7 @@ static int copy_dev_tables(void)
> > u32 lo, hi, devid;
> > phys_addr_t old_devtb_phys;
> > struct dev_table_entry *old_devtb;
> >-u16 dom_id, dte_v;
> >+u16 dom_id, dte_v, irq_v;
> > struct amd_iommu *iommu;
> > static int copied;
> >
> >@@ -692,7 +692,8 @@ static int copy_dev_tables(void)
> > amd_iommu_dev_table[devid] = old_devtb[devid];
> > dom_id = amd_iommu_dev_table[devid].data[1] & 
> > DEV_DOMID_MASK;
> > dte_v = amd_iommu_dev_table[devid].data[0] & DTE_FLAG_V;
> >-if (!dte_v)
> >+irq_v = amd_iommu_dev_table[devid].data[2] & 
> >DTE_IRQ_REMAP_ENABLE;
> >+if (!dte_v || !irq_v || !dom_id)
> > continue;
> > __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
> > }
> >diff --git a/drivers/iommu/amd_iommu_types.h 
> >b/drivers/iommu/amd_iommu_types.h
> >index 34acd73..08340f5 100644
> >--- a/drivers/iommu/amd_iommu_types.h
> >+++ b/drivers/iommu/amd_iommu_types.h
> >@@ -224,6 +224,11 @@
> >
> > #define PPR_REQ_FAULT   0x01
> >
> >+#define DTE_IRQ_PHYS_ADDR_MASK  (((1ULL << 45)-1) << 6)
> >+#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
> >+#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
> >+#define DTE_IRQ_REMAP_ENABLE1ULL
> >+
> > #define PAGE_MODE_NONE0x00
> > #define PAGE_MODE_1_LEVEL 0x01
> > #define PAGE_MODE_2_LEVEL 0x02
> >
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/4] iommu/amd: Use standard bitmap operation to set bitmap

2016-09-15 Thread Baoquan He
It will be more readable and safer than the old setting.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 59741ea..3e810c6 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2136,7 +2137,7 @@ static int __init early_amd_iommu_init(void)
 * never allocate domain 0 because its used as the non-allocated and
 * error value placeholder
 */
-   amd_iommu_pd_alloc_bitmap[0] = 1;
+   __set_bit(0, amd_iommu_pd_alloc_bitmap);
 
spin_lock_init(&amd_iommu_pd_lock);
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/4] iommu/amd: clean up the cmpxchg64 invocation

2016-09-15 Thread Baoquan He
Change it as it's designed for and keep it consistent with other
places.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 96de97a..160fc6a 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1274,7 +1274,8 @@ static u64 *alloc_pte(struct protection_domain *domain,
 
__npte = PM_LEVEL_PDE(level, virt_to_phys(page));
 
-   if (cmpxchg64(pte, __pte, __npte)) {
+   /* pte could have been changed somewhere. */
+   if (cmpxchg64(pte, __pte, __npte) != __pte) {
free_page((unsigned long)page);
continue;
}
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/4] iommu/amd: Free domain id when free a domain of struct dma_ops_domain

2016-09-15 Thread Baoquan He
The current code missed freeing domain id when free a domain of
struct dma_ops_domain.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 160fc6a..a9f78c2 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1655,6 +1655,9 @@ static void dma_ops_domain_free(struct dma_ops_domain 
*dom)
 
free_pagetable(&dom->domain);
 
+   if (dom->domain.id)
+   domain_id_free(dom->domain.id);
+
kfree(dom);
 }
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/4] iommu/amd: Clean up patches

2016-09-15 Thread Baoquan He
These were found out when I tried to fix the kdump failure on system
with AMD iommu. Pack them into this patchset since they are not related
to the kdump issue and each other.

Baoquan He (4):
  iommu/amd: clean up the cmpxchg64 invocation
  iommu/amd: Use standard bitmap operation to set bitmap
  iommu/amd: Free domain id when free a domain of struct dma_ops_domain
  iommu/amd: No need to wait iommu completion if no dte irq entry change

 drivers/iommu/amd_iommu.c  | 12 
 drivers/iommu/amd_iommu_init.c |  3 ++-
 2 files changed, 10 insertions(+), 5 deletions(-)

-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/4] iommu/amd: No need to wait iommu completion if no dte irq entry change

2016-09-15 Thread Baoquan He
This is a clean up. In get_irq_table() only if DTE entry is changed
iommu_completion_wait() need be called. Otherwise no need to do it.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index a9f78c2..461c2fe 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3581,7 +3581,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, 
bool ioapic)
 
table = irq_lookup_table[devid];
if (table)
-   goto out;
+   goto out_unlock;
 
alias = amd_iommu_alias_table[devid];
table = irq_lookup_table[alias];
@@ -3595,7 +3595,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, 
bool ioapic)
/* Nothing there yet, allocate new irq remapping table */
table = kzalloc(sizeof(*table), GFP_ATOMIC);
if (!table)
-   goto out;
+   goto out_unlock;
 
/* Initialize table spin-lock */
spin_lock_init(&table->lock);
@@ -3608,7 +3608,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, 
bool ioapic)
if (!table->table) {
kfree(table);
table = NULL;
-   goto out;
+   goto out_unlock;
}
 
memset(table->table, 0, MAX_IRQS_PER_TABLE * sizeof(u32));
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 1/8] iommu/amd: Detect pre enabled translation

2016-09-15 Thread Baoquan He
Add functions to check whether translation is already enabled in IOMMU.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c  | 25 +
 drivers/iommu/amd_iommu_proto.h |  1 +
 drivers/iommu/amd_iommu_types.h |  4 
 3 files changed, 30 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 59741ea..9bf1a04 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -247,6 +247,26 @@ static int amd_iommu_enable_interrupts(void);
 static int __init iommu_go_to_state(enum iommu_init_state state);
 static void init_device_table_dma(void);
 
+
+bool translation_pre_enabled(struct amd_iommu *iommu)
+{
+   return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
+}
+
+static void clear_translation_pre_enabled(struct amd_iommu *iommu)
+{
+iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
+static void init_translation_status(struct amd_iommu *iommu)
+{
+   u32 ctrl;
+
+   ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+   if (ctrl & (1<flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
 static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
u8 bank, u8 cntr, u8 fxn,
u64 *value, bool is_write);
@@ -1283,6 +1303,11 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
 
iommu->int_enabled = false;
 
+   init_translation_status(iommu);
+
+   if (translation_pre_enabled(iommu))
+   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+
ret = init_iommu_from_acpi(iommu, h);
if (ret)
return ret;
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 0bd9eb3..f066e01 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -98,4 +98,5 @@ static inline bool iommu_feature(struct amd_iommu *iommu, u64 
f)
return !!(iommu->features & f);
 }
 
+extern bool translation_pre_enabled(struct amd_iommu *iommu);
 #endif /* _ASM_X86_AMD_IOMMU_PROTO_H  */
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index caf5e38..7781953 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -384,6 +384,7 @@ extern struct kmem_cache *amd_iommu_irq_cache;
 #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
 
 
+
 /*
  * This struct is used to pass information about
  * incoming PPR faults around.
@@ -401,6 +402,8 @@ struct amd_iommu_fault {
 struct iommu_domain;
 struct irq_domain;
 
+#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED  (1 << 0)
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -524,6 +527,7 @@ struct amd_iommu {
struct irq_domain *ir_domain;
struct irq_domain *msi_domain;
 #endif
+   u32 flags;
 };
 
 #define ACPIHID_UID_LEN 256
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 2/8] iommu/amd: add early_enable_iommu() wrapper function

2016-09-15 Thread Baoquan He
Move per iommu enabling code into a wrapper function early_enable_iommu().
This can make later kdump change easier.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 24 ++--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 9bf1a04..77c44c8 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1887,6 +1887,18 @@ static void iommu_apply_resume_quirks(struct amd_iommu 
*iommu)
   iommu->stored_addr_lo | 1);
 }
 
+static void early_enable_iommu(struct amd_iommu *iommu)
+{
+   iommu_disable(iommu);
+   iommu_init_flags(iommu);
+   iommu_set_device_table(iommu);
+   iommu_enable_command_buffer(iommu);
+   iommu_enable_event_buffer(iommu);
+   iommu_set_exclusion_range(iommu);
+   iommu_enable(iommu);
+   iommu_flush_all_caches(iommu);
+}
+
 /*
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
@@ -1895,16 +1907,8 @@ static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
 
-   for_each_iommu(iommu) {
-   iommu_disable(iommu);
-   iommu_init_flags(iommu);
-   iommu_set_device_table(iommu);
-   iommu_enable_command_buffer(iommu);
-   iommu_enable_event_buffer(iommu);
-   iommu_set_exclusion_range(iommu);
-   iommu_enable(iommu);
-   iommu_flush_all_caches(iommu);
-   }
+   for_each_iommu(iommu)
+   early_enable_iommu(iommu);
 }
 
 static void enable_iommus_v2(void)
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 5/8] iommu/amd: copy old trans table from old kernel

2016-09-15 Thread Baoquan He
Here several things need be done:
1) If iommu is pre-enabled in a normal kernel, just disable it and print
   warning.
2) If failed to copy dev table of old kernel, continue to proceed as
   it does in normal kernel.
3) Re-enable event/cmd buffer and install the new DTE table to reg.
4) Flush all caches

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 44 +++---
 1 file changed, 41 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index ce49641..47a8fc9 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -34,7 +34,7 @@
 #include 
 #include 
 #include 
-
+#include 
 #include "amd_iommu_proto.h"
 #include "amd_iommu_types.h"
 #include "irq_remapping.h"
@@ -1344,6 +1344,12 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
iommu->int_enabled = false;
 
init_translation_status(iommu);
+   if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
+   iommu_disable(iommu);
+   clear_translation_pre_enabled(iommu);
+   pr_warn("Translation was enabled for IOMMU:%d but we are not in 
kdump mode\n",
+   iommu->index);
+   }
 
if (translation_pre_enabled(iommu))
pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
@@ -1946,9 +1952,41 @@ static void early_enable_iommu(struct amd_iommu *iommu)
 static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
+   bool is_pre_enabled=false;
 
-   for_each_iommu(iommu)
-   early_enable_iommu(iommu);
+   for_each_iommu(iommu) {
+   if ( translation_pre_enabled(iommu) ) {
+   is_pre_enabled = true;
+   break;
+   }
+   }
+
+   if ( !is_pre_enabled) {
+   for_each_iommu(iommu)
+   early_enable_iommu(iommu);
+   } else {
+   if (copy_dev_tables()) {
+   pr_err("Failed to copy DEV table from previous 
kernel.\n");
+   /*
+* If failed to copy dev tables from old kernel, 
continue to proceed
+* as it does in normal kernel.
+*/
+   for_each_iommu(iommu) {
+   clear_translation_pre_enabled(iommu);
+   early_enable_iommu(iommu);
+   }
+   } else {
+   pr_info("Copied DEV table from previous kernel.\n");
+   for_each_iommu(iommu) {
+   iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
+   iommu_feature_disable(iommu, 
CONTROL_EVT_LOG_EN);
+   iommu_enable_command_buffer(iommu);
+   iommu_enable_event_buffer(iommu);
+   iommu_set_device_table(iommu);
+   iommu_flush_all_caches(iommu);
+   }
+   }
+   }
 }
 
 static void enable_iommus_v2(void)
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 3/8] iommu/amd: Define bit fields for DTE particularly

2016-09-15 Thread Baoquan He
In amd-vi spec several bits of IO PTE fields and DTE fields are similar
so that both of them can share the same MACRO definition. However
defining their respecitve bit fields can make code more read-able. So
do it in this patch.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  8 
 drivers/iommu/amd_iommu_types.h | 18 ++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 96de97a..995b050 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1388,9 +1388,9 @@ static int iommu_map_page(struct protection_domain *dom,
 
if (count > 1) {
__pte = PAGE_SIZE_PTE(phys_addr, page_size);
-   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
} else
-   __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte = phys_addr | IOMMU_PTE_PR | IOMMU_PTE_FC;
 
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
@@ -1714,7 +1714,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
-   pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
+   pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
flags = amd_iommu_dev_table[devid].data[1];
 
@@ -1757,7 +1757,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 static void clear_dte_entry(u16 devid)
 {
/* remove entry from the device table seen by the hardware */
-   amd_iommu_dev_table[devid].data[0]  = IOMMU_PTE_P | IOMMU_PTE_TV;
+   amd_iommu_dev_table[devid].data[0]  = DTE_FLAG_V | DTE_FLAG_TV;
amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK;
 
amd_iommu_apply_erratum_63(devid);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 7781953..809944a 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -239,7 +239,7 @@
 #define PM_LEVEL_INDEX(x, a)   (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
 #define PM_LEVEL_ENC(x)(((x) << 9) & 0xe00ULL)
 #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \
-IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
+IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW)
 #define PM_PTE_LEVEL(pte)  (((pte) >> 9) & 0x7ULL)
 
 #define PM_MAP_4k  0
@@ -288,13 +288,23 @@
 #define PTE_LEVEL_PAGE_SIZE(level) \
(1ULL << (12 + (9 * (level
 
-#define IOMMU_PTE_P  (1ULL << 0)
-#define IOMMU_PTE_TV (1ULL << 1)
+/*
+ * Bit value definition for I/O PTE fields
+ */
+#define IOMMU_PTE_PR (1ULL << 0)
 #define IOMMU_PTE_U  (1ULL << 59)
 #define IOMMU_PTE_FC (1ULL << 60)
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
+/*
+ * Bit value definition for DTE fields
+ */
+#define DTE_FLAG_V  (1ULL << 0)
+#define DTE_FLAG_TV (1ULL << 1)
+#define DTE_FLAG_IR (1ULL << 61)
+#define DTE_FLAG_IW (1ULL << 62)
+
 #define DTE_FLAG_IOTLB (1ULL << 32)
 #define DTE_FLAG_GV(1ULL << 55)
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
@@ -316,7 +326,7 @@
 #define GCR3_VALID 0x01ULL
 
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
-#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
+#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
 #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
 #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
 
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 6/8] iommu/amd: Do not re-enable dev table entries in kdump

2016-09-15 Thread Baoquan He
This enabling should have been done in normal kernel. It's unnecessary
to enable it again in kdump kernel.

And clean up the function comments of init_device_table_dma.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 47a8fc9..8d5db2e 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1651,7 +1651,12 @@ static int __init amd_iommu_init_pci(void)
 */
ret = amd_iommu_init_api();
 
-   init_device_table_dma();
+   for_each_iommu(iommu) {
+   if ( !translation_pre_enabled(iommu) ) {
+   init_device_table_dma();
+   break;
+   }
+   }
 
for_each_iommu(iommu)
iommu_flush_all_caches(iommu);
@@ -1829,8 +1834,7 @@ static int __init init_memory_definitions(struct 
acpi_table_header *table)
 }
 
 /*
- * Init the device table to not allow DMA access for devices and
- * suppress all page faults
+ * Init the device table to not allow DMA access for devices.
  */
 static void init_device_table_dma(void)
 {
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 4/8] iommu/amd: Add function copy_dev_tables

2016-09-15 Thread Baoquan He
Add function copy_dev_tables to copy the old DEV table entry of the panicked
kernel to the new allocated DEV table. Since all iommu share the same DTE table
the copy only need be done once as long as the physical address of old DEV table
is retrieved from iommu reg. Besides the old domain id occupied in 1st kernel
need be reserved to avoid touching the old io-page tables so that on-flight DMA
can continue looking up.

And define MACRO DEV_DOMID_MASK to replace magic number 0xULL because
it need be reused in copy_dev_tables.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  2 +-
 drivers/iommu/amd_iommu_init.c  | 40 
 drivers/iommu/amd_iommu_types.h |  1 +
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 995b050..fcb69ff 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1747,7 +1747,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
flags|= tmp;
}
 
-   flags &= ~(0xUL);
+   flags &= ~DEV_DOMID_MASK;
flags |= domain->id;
 
amd_iommu_dev_table[devid].data[1]  = flags;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 77c44c8..ce49641 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -717,6 +717,46 @@ static int get_dev_entry_bit(u16 devid, u8 bit)
 }
 
 
+static int copy_dev_tables(void)
+{
+   u64 entry;
+   u32 lo, hi, devid;
+   phys_addr_t old_devtb_phys;
+   struct dev_table_entry *old_devtb = NULL;
+   u16 dom_id, dte_v;
+   struct amd_iommu *iommu;
+   static int copied;
+
+for_each_iommu(iommu) {
+   if (!translation_pre_enabled(iommu)) {
+   pr_err("IOMMU:%d is not pre-enabled!/n", iommu->index);
+   return -1;
+   }
+
+   if (copied)
+   continue;
+
+lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
+hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
+entry = (((u64) hi) << 32) + lo;
+old_devtb_phys = entry & PAGE_MASK;
+old_devtb = memremap(old_devtb_phys, dev_table_size, 
MEMREMAP_WB);
+   if (!old_devtb)
+   return -1;
+for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
+amd_iommu_dev_table[devid] = old_devtb[devid];
+dom_id = amd_iommu_dev_table[devid].data[1] & 
DEV_DOMID_MASK;
+   dte_v = amd_iommu_dev_table[devid].data[0] & DTE_FLAG_V;
+   if (!dte_v || !dom_id)
+   continue;
+__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+}
+   memunmap(old_devtb);
+   copied = 1;
+}
+   return 0;
+}
+
 void amd_iommu_apply_erratum_63(u16 devid)
 {
int sysmgt;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 809944a..a1ccede 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -310,6 +310,7 @@
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
 #define DTE_GLX_SHIFT  (56)
 #define DTE_GLX_MASK   (3)
+#define DEV_DOMID_MASK 0xULL
 
 #define DTE_GCR3_VAL_A(x)  (((x) >> 12) & 0x7ULL)
 #define DTE_GCR3_VAL_B(x)  (((x) >> 15) & 0x0ULL)
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 7/8] iommu/amd: Don't update domain info to dte entry at iommu init stage

2016-09-15 Thread Baoquan He
AMD iommu creates protection domain and assign each device to it during
iommu driver initialization stage. This happened just after system pci
bus scanning stage, and much earlier than device driver init stage. So
at this time if in kdump kernel the domain info, especially pte_root,
can't be updated to dte entry. We should wait until device driver init
stage.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index fcb69ff..6c37300 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -137,6 +137,7 @@ struct iommu_dev_data {
bool pri_tlp; /* PASID TLB required for
 PPR completions */
u32 errata;   /* Bitmap for errata to apply */
+   bool domain_updated;
 };
 
 /*
@@ -1708,6 +1709,15 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 {
u64 pte_root = 0;
u64 flags = 0;
+   struct iommu_dev_data *dev_data;
+   struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+   dev_data = find_dev_data(devid);
+if (!dev_data)
+return;
+
+   if (translation_pre_enabled(iommu) && !dev_data->domain_updated)
+   return;
 
if (domain->mode != PAGE_MODE_NONE)
pte_root = virt_to_phys(domain->pt_root);
@@ -1756,6 +1766,14 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 
 static void clear_dte_entry(u16 devid)
 {
+   struct iommu_dev_data *dev_data;
+   struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+   dev_data = find_dev_data(devid);
+if (!dev_data)
+return;
+   if (translation_pre_enabled(iommu) && !dev_data->domain_updated)
+   return;
/* remove entry from the device table seen by the hardware */
amd_iommu_dev_table[devid].data[0]  = DTE_FLAG_V | DTE_FLAG_TV;
amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK;
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 8/8] iommu/amd: Update domain into to dte entry during device driver init

2016-09-15 Thread Baoquan He
All devices are supposed to reset themselves at device driver initialization
stage. At this time if in kdump kernel those on-flight DMA will be stopped
because of device reset. It's best time to update the protection domain info,
especially pte_root, to dte entry which the device relates to.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 6c37300..00b64ee 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2310,6 +2310,10 @@ static dma_addr_t __map_single(struct device *dev,
unsigned int pages;
int prot = 0;
int i;
+   struct iommu_dev_data *dev_data = get_dev_data(dev);
+   struct protection_domain *domain = get_domain(dev);
+   u16 alias = amd_iommu_alias_table[dev_data->devid];
+   struct amd_iommu *iommu = amd_iommu_rlookup_table[dev_data->devid];
 
pages = iommu_num_pages(paddr, size, PAGE_SIZE);
paddr &= PAGE_MASK;
@@ -2319,6 +2323,13 @@ static dma_addr_t __map_single(struct device *dev,
goto out;
 
prot = dir2prot(direction);
+   if (translation_pre_enabled(iommu) && !dev_data->domain_updated) {
+   dev_data->domain_updated = true;
+   set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled);
+   if (alias != dev_data->devid)
+   set_dte_entry(alias, domain, dev_data->ats.enabled);
+   device_flush_dte(dev_data);
+   }
 
start = address;
for (i = 0; i < pages; ++i) {
@@ -2470,6 +2481,9 @@ static int map_sg(struct device *dev, struct scatterlist 
*sglist,
struct scatterlist *s;
unsigned long address;
u64 dma_mask;
+   struct iommu_dev_data *dev_data = get_dev_data(dev);
+   u16 alias = amd_iommu_alias_table[dev_data->devid];
+   struct amd_iommu *iommu = amd_iommu_rlookup_table[dev_data->devid];
 
domain = get_domain(dev);
if (IS_ERR(domain))
@@ -2485,6 +2499,13 @@ static int map_sg(struct device *dev, struct scatterlist 
*sglist,
goto out_err;
 
prot = dir2prot(direction);
+   if (translation_pre_enabled(iommu) && !dev_data->domain_updated) {
+   dev_data->domain_updated = true;
+   set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled);
+   if (alias != dev_data->devid)
+   set_dte_entry(alias, domain, dev_data->ats.enabled);
+   device_flush_dte(dev_data);
+   }
 
/* Map all sg entries */
for_each_sg(sglist, s, nelems, i) {
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 0/8] Fix kdump faults on system with amd iommu

2016-09-15 Thread Baoquan He
This is v5 post. In fact in v3 the solution is correct. Just unluckily 
I got a AMD machine with bnx2 NIC which can't reset itself during driver
init. It made me very unconfident with my understanding about the fix.
Now with below fix the AMD machine with bnx2 NIC can also work well
to dump and there's no IO_PAGE_FAULT seen any more. Now network maintainer
has picked it up.

bnx2: Reset device during driver initialization
https://www.mail-archive.com/netdev@vger.kernel.org/msg127336.html

The principle of the fix is similar to intel iommu. Just defer the assignment
of device to domain to device driver init. But there's difference than
intel iommu. AMD iommu create protection domain and assign device to
domain in iommu driver init stage. So in this patchset I just allow the
assignment of device to domain in software level, but defer updating the
domain info, especially the pte_root to dev table entry to device driver
init stage.


Baoquan He (8):
  iommu/amd: Detect pre enabled translation
  iommu/amd: add early_enable_iommu() wrapper function
  iommu/amd: Define bit fields for DTE particularly
  iommu/amd: Add function copy_dev_tables
  iommu/amd: copy old trans table from old kernel
  iommu/amd: Do not re-enable dev table entries in kdump
  iommu/amd: Don't update domain info to dte entry at iommu init stage
  iommu/amd: Update domain into to dte entry during device driver init

 drivers/iommu/amd_iommu.c   |  49 +--
 drivers/iommu/amd_iommu_init.c  | 135 
 drivers/iommu/amd_iommu_proto.h |   1 +
 drivers/iommu/amd_iommu_types.h |  23 +--
 4 files changed, 187 insertions(+), 21 deletions(-)

-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RESEND 4/4] iommu/amd: No need to wait iommu completion if no dte irq entry change

2016-09-19 Thread Baoquan He
This is a clean up. In get_irq_table() only if DTE entry is changed
iommu_completion_wait() need be called. Otherwise no need to do it.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b5b117b..a2479d0 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3634,7 +3634,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, 
bool ioapic)
 
table = irq_lookup_table[devid];
if (table)
-   goto out;
+   goto out_unlock;
 
alias = amd_iommu_alias_table[devid];
table = irq_lookup_table[alias];
@@ -3648,7 +3648,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, 
bool ioapic)
/* Nothing there yet, allocate new irq remapping table */
table = kzalloc(sizeof(*table), GFP_ATOMIC);
if (!table)
-   goto out;
+   goto out_unlock;
 
/* Initialize table spin-lock */
spin_lock_init(&table->lock);
@@ -3661,7 +3661,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, 
bool ioapic)
if (!table->table) {
kfree(table);
table = NULL;
-   goto out;
+   goto out_unlock;
}
 
if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
-- 
2.5.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/4] iommu/amd: Clean up patches

2016-09-19 Thread Baoquan He
On 09/19/16 at 04:20pm, Joerg Roedel wrote:
> Hi Baoquan,
> 
> On Thu, Sep 15, 2016 at 04:50:49PM +0800, Baoquan He wrote:
> > These were found out when I tried to fix the kdump failure on system
> > with AMD iommu. Pack them into this patchset since they are not related
> > to the kdump issue and each other.
> > 
> > Baoquan He (4):
> >   iommu/amd: clean up the cmpxchg64 invocation
> >   iommu/amd: Use standard bitmap operation to set bitmap
> >   iommu/amd: Free domain id when free a domain of struct dma_ops_domain
> >   iommu/amd: No need to wait iommu completion if no dte irq entry change
> 
> I applied patches 1-3, but patch 4 had some conflicts. Can you please
> rebase this patch to my x86/amd branch and resend?

Finished, please try the resent post.

Thanks
Baoquan
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH RESEND 4/4] iommu/amd: No need to wait iommu completion if no dte irq entry change

2016-09-20 Thread Baoquan He
On 09/20/16 at 11:56am, Joerg Roedel wrote:
> On Tue, Sep 20, 2016 at 09:05:34AM +0800, Baoquan He wrote:
> > This is a clean up. In get_irq_table() only if DTE entry is changed
> > iommu_completion_wait() need be called. Otherwise no need to do it.
> > 
> > Signed-off-by: Baoquan He 
> > ---
> >  drivers/iommu/amd_iommu.c | 6 +++---
> >  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> Applied, thanks.

Thanks!
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 4/8] iommu/amd: Add function copy_dev_tables

2016-09-21 Thread Baoquan He
Hi Joerg,

Thanks for your reviewing and great suggestion!

On 09/20/16 at 01:58pm, Joerg Roedel wrote:
> Hi Baoquan,
> 
> On Thu, Sep 15, 2016 at 11:03:22PM +0800, Baoquan He wrote:
> > +static int copy_dev_tables(void)
> > +{
> > +   u64 entry;
> > +   u32 lo, hi, devid;
> > +   phys_addr_t old_devtb_phys;
> > +   struct dev_table_entry *old_devtb = NULL;
> > +   u16 dom_id, dte_v;
> > +   struct amd_iommu *iommu;
> > +   static int copied;
> 
> Please order this by line-length, longer lines first.

Will do.

> 
> > +for_each_iommu(iommu) {
> > +   if (!translation_pre_enabled(iommu)) {
> > +   pr_err("IOMMU:%d is not pre-enabled!/n", iommu->index);
> > +   return -1;
> > +   }
> > +
> > +   if (copied)
> > +   continue;
> > +
> > +lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
> > +hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
> > +entry = (((u64) hi) << 32) + lo;
> > +old_devtb_phys = entry & PAGE_MASK;
> > +old_devtb = memremap(old_devtb_phys, dev_table_size, 
> > MEMREMAP_WB);
> > +   if (!old_devtb)
> > +   return -1;
> > +for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
> > +amd_iommu_dev_table[devid] = old_devtb[devid];
> > +dom_id = amd_iommu_dev_table[devid].data[1] & 
> > DEV_DOMID_MASK;
> > +   dte_v = amd_iommu_dev_table[devid].data[0] & DTE_FLAG_V;
> > +   if (!dte_v || !dom_id)
> > +   continue;
> > +__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
> > +}
> > +   memunmap(old_devtb);
> > +   copied = 1;
> > +}
> 
> This loop need more refinement and sanity checking code. I suggest using
> two loops and do the sanity checking in the first one. The sanity checks
> should do:
> 
>   * Check whether all IOMMUs actually use the same device table
> with the same size
> 
>   * Verify that the size of the old device table is the expected
> size.

Will do.

> 
>   * Also sanity check the irq-remapping information and remapping
> table sizes.

Will do. Since this need those irq DTE_IRQ_ MACRO which is defined
in amd_iommu.c , I plan to move them into amd_iommu_types.h, and then do
irq-remapping. These can be made in another patch.

> 
> If any of these checks fail, just bail out of copying.
> 
> What is further needed it some more selection on what is copied from the
> old kernel. There is no need to copy all the GCR3 root-pointer
> information. If a device is set up with guest translations (DTE.GV=1),
> then don't copy that information but move the device over to an empty
> guest-cr3 table and handle the faults in the PPR log (which should just
> answer them with INVALID). After all these PPR faults are recoverable
> for the device and we should not allow the device to change old-kernels
> data when we don't have to.

The current fix is simplest and cleanest. Because the on-flight DMAs
continue transferring data since system crash, including guest
translations, we may not need to care about it and just let it continue
flying a little more time until device is reset. Since you have suggested,
I will try to make another patch for this issue, we can see the effect.

Thanks
Baoquan
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 5/8] iommu/amd: copy old trans table from old kernel

2016-09-21 Thread Baoquan He
On 09/20/16 at 02:40pm, Joerg Roedel wrote:
> On Thu, Sep 15, 2016 at 11:03:23PM +0800, Baoquan He wrote:
> > Here several things need be done:
> > 1) If iommu is pre-enabled in a normal kernel, just disable it and print
> >warning.
> > 2) If failed to copy dev table of old kernel, continue to proceed as
> >it does in normal kernel.
> > 3) Re-enable event/cmd buffer and install the new DTE table to reg.
> > 4) Flush all caches
> > 
> > Signed-off-by: Baoquan He 
> > ---
> >  drivers/iommu/amd_iommu_init.c | 44 
> > +++---
> >  1 file changed, 41 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
> > index ce49641..47a8fc9 100644
> > --- a/drivers/iommu/amd_iommu_init.c
> > +++ b/drivers/iommu/amd_iommu_init.c
> > @@ -34,7 +34,7 @@
> >  #include 
> >  #include 
> >  #include 
> > -
> > +#include 
> 
> Please keep that empty line, it is there for readability.

Thanks, will change.

> 
> >  #include "amd_iommu_proto.h"
> >  #include "amd_iommu_types.h"
> >  #include "irq_remapping.h"
> > @@ -1344,6 +1344,12 @@ static int __init init_iommu_one(struct amd_iommu 
> > *iommu, struct ivhd_header *h)
> > iommu->int_enabled = false;
> >  
> > init_translation_status(iommu);
> > +   if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
> > +   iommu_disable(iommu);
> > +   clear_translation_pre_enabled(iommu);
> > +   pr_warn("Translation was enabled for IOMMU:%d but we are not in 
> > kdump mode\n",
> > +   iommu->index);
> > +   }
> >  
> > if (translation_pre_enabled(iommu))
> > pr_warn("Translation is already enabled - trying to copy 
> > translation structures\n");
> > @@ -1946,9 +1952,41 @@ static void early_enable_iommu(struct amd_iommu 
> > *iommu)
> >  static void early_enable_iommus(void)
> >  {
> > struct amd_iommu *iommu;
> > +   bool is_pre_enabled=false;
> >  
> > -   for_each_iommu(iommu)
> > -   early_enable_iommu(iommu);
> > +   for_each_iommu(iommu) {
> > +   if ( translation_pre_enabled(iommu) ) {
> 
> Coding style, too many spaces. There is more of that below.

Will change.

> 
> > +   is_pre_enabled = true;
> > +   break;
> > +   }
> > +   }
> > +
> > +   if ( !is_pre_enabled) {
> > +   for_each_iommu(iommu)
> > +   early_enable_iommu(iommu);
> > +   } else {
> > +   if (copy_dev_tables()) {
> > +   pr_err("Failed to copy DEV table from previous 
> > kernel.\n");
> > +   /*
> > +* If failed to copy dev tables from old kernel, 
> > continue to proceed
> > +* as it does in normal kernel.
> > +*/
> > +   for_each_iommu(iommu) {
> > +   clear_translation_pre_enabled(iommu);
> > +   early_enable_iommu(iommu);
> > +   }
> > +   } else {
> > +   pr_info("Copied DEV table from previous kernel.\n");
> > +   for_each_iommu(iommu) {
> > +   iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
> > +   iommu_feature_disable(iommu, 
> > CONTROL_EVT_LOG_EN);
> 
> Could you move that into new helpers (iommu_disable_command_buffer...)?

Yes, sure, will do.

> 
> > +   iommu_enable_command_buffer(iommu);
> > +   iommu_enable_event_buffer(iommu);
> > +   iommu_set_device_table(iommu);
> > +   iommu_flush_all_caches(iommu);
> > +   }
> > +   }
> > +   }
> >  }
> >  
> >  static void enable_iommus_v2(void)
> > -- 
> > 2.5.5
> > 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


  1   2   3   4   >