date:20170616

[PATCH v7 26/36] x86/CPU/AMD: Make the microcode level available earlier in the boot

2017-06-16 Thread Tom Lendacky

Move the setting of the cpuinfo_x86.microcode field from amd_init() to
early_amd_init() so that it is available earlier in the boot process. This
avoids having to read MSR_AMD64_PATCH_LEVEL directly during early boot.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kernel/cpu/amd.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 5bdcbd4..fdcf305 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -547,8 +547,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 
 static void early_init_amd(struct cpuinfo_x86 *c)
 {
+   u32 dummy;
+
early_init_amd_mc(c);
 
+   rdmsr_safe(MSR_AMD64_PATCH_LEVEL, >microcode, );
+
/*
 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
 * with P/T states and does not stop in deep C-states
@@ -746,8 +750,6 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
 
 static void init_amd(struct cpuinfo_x86 *c)
 {
-   u32 dummy;
-
early_init_amd(c);
 
/*
@@ -809,8 +811,6 @@ static void init_amd(struct cpuinfo_x86 *c)
if (c->x86 > 0x11)
set_cpu_cap(c, X86_FEATURE_ARAT);
 
-   rdmsr_safe(MSR_AMD64_PATCH_LEVEL, >microcode, );
-
/* 3DNow or LM implies PREFETCHW */
if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))

[PATCH v7 26/36] x86/CPU/AMD: Make the microcode level available earlier in the boot

2017-06-16 Thread Tom Lendacky

Move the setting of the cpuinfo_x86.microcode field from amd_init() to
early_amd_init() so that it is available earlier in the boot process. This
avoids having to read MSR_AMD64_PATCH_LEVEL directly during early boot.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kernel/cpu/amd.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 5bdcbd4..fdcf305 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -547,8 +547,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 
 static void early_init_amd(struct cpuinfo_x86 *c)
 {
+   u32 dummy;
+
early_init_amd_mc(c);
 
+   rdmsr_safe(MSR_AMD64_PATCH_LEVEL, >microcode, );
+
/*
 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
 * with P/T states and does not stop in deep C-states
@@ -746,8 +750,6 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
 
 static void init_amd(struct cpuinfo_x86 *c)
 {
-   u32 dummy;
-
early_init_amd(c);
 
/*
@@ -809,8 +811,6 @@ static void init_amd(struct cpuinfo_x86 *c)
if (c->x86 > 0x11)
set_cpu_cap(c, X86_FEATURE_ARAT);
 
-   rdmsr_safe(MSR_AMD64_PATCH_LEVEL, >microcode, );
-
/* 3DNow or LM implies PREFETCHW */
if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))

[PATCH v7 28/36] x86, realmode: Check for memory encryption on the APs

2017-06-16 Thread Tom Lendacky

Add support to check if memory encryption is active in the kernel and that
it has been enabled on the AP. If memory encryption is active in the kernel
but has not been enabled on the AP, then set the memory encryption bit (bit
23) of MSR_K8_SYSCFG to enable memory encryption on that AP and allow the
AP to continue start up.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/realmode.h  |   12 
 arch/x86/realmode/init.c |4 
 arch/x86/realmode/rm/trampoline_64.S |   24 
 3 files changed, 40 insertions(+)

diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 230e190..90d9152 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -1,6 +1,15 @@
 #ifndef _ARCH_X86_REALMODE_H
 #define _ARCH_X86_REALMODE_H
 
+/*
+ * Flag bit definitions for use with the flags field of the trampoline header
+ * in the CONFIG_X86_64 variant.
+ */
+#define TH_FLAGS_SME_ACTIVE_BIT0
+#define TH_FLAGS_SME_ACTIVEBIT(TH_FLAGS_SME_ACTIVE_BIT)
+
+#ifndef __ASSEMBLY__
+
 #include 
 #include 
 
@@ -38,6 +47,7 @@ struct trampoline_header {
u64 start;
u64 efer;
u32 cr4;
+   u32 flags;
 #endif
 };
 
@@ -69,4 +79,6 @@ static inline size_t real_mode_size_needed(void)
 void set_real_mode_mem(phys_addr_t mem, size_t size);
 void reserve_real_mode(void);
 
+#endif /* __ASSEMBLY__ */
+
 #endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index d6ddc7e..1f71980 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -108,6 +108,10 @@ static void __init setup_real_mode(void)
trampoline_cr4_features = _header->cr4;
*trampoline_cr4_features = mmu_cr4_features;
 
+   trampoline_header->flags = 0;
+   if (sme_active())
+   trampoline_header->flags |= TH_FLAGS_SME_ACTIVE;
+
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
trampoline_pgd[511] = init_top_pgt[511].pgd;
diff --git a/arch/x86/realmode/rm/trampoline_64.S 
b/arch/x86/realmode/rm/trampoline_64.S
index dac7b20..614fd70 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "realmode.h"
 
.text
@@ -92,6 +93,28 @@ ENTRY(startup_32)
movl%edx, %fs
movl%edx, %gs
 
+   /*
+* Check for memory encryption support. This is a safety net in
+* case BIOS hasn't done the necessary step of setting the bit in
+* the MSR for this AP. If SME is active and we've gotten this far
+* then it is safe for us to set the MSR bit and continue. If we
+* don't we'll eventually crash trying to execute encrypted
+* instructions.
+*/
+   bt  $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
+   jnc .Ldone
+   movl$MSR_K8_SYSCFG, %ecx
+   rdmsr
+   bts $MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax
+   jc  .Ldone
+
+   /*
+* Memory encryption is enabled but the SME enable bit for this
+* CPU has has not been set.  It is safe to set it, so do so.
+*/
+   wrmsr
+.Ldone:
+
movlpa_tr_cr4, %eax
movl%eax, %cr4  # Enable PAE mode
 
@@ -147,6 +170,7 @@ GLOBAL(trampoline_header)
tr_start:   .space  8
GLOBAL(tr_efer) .space  8
GLOBAL(tr_cr4)  .space  4
+   GLOBAL(tr_flags).space  4
 END(trampoline_header)
 
 #include "trampoline_common.S"

[PATCH v7 27/36] iommu/amd: Allow the AMD IOMMU to work with memory encryption

2017-06-16 Thread Tom Lendacky

The IOMMU is programmed with physical addresses for the various tables
and buffers that are used to communicate between the device and the
driver. When the driver allocates this memory it is encrypted. In order
for the IOMMU to access the memory as encrypted the encryption mask needs
to be included in these physical addresses during configuration.

The PTE entries created by the IOMMU should also include the encryption
mask so that when the device behind the IOMMU performs a DMA, the DMA
will be performed to encrypted memory.

Signed-off-by: Tom Lendacky 
---
 drivers/iommu/amd_iommu.c   |   30 --
 drivers/iommu/amd_iommu_init.c  |   34 --
 drivers/iommu/amd_iommu_proto.h |   10 ++
 drivers/iommu/amd_iommu_types.h |2 +-
 4 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 63cacf5..912008c 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -544,7 +544,7 @@ static void dump_dte_entry(u16 devid)
 
 static void dump_command(unsigned long phys_addr)
 {
-   struct iommu_cmd *cmd = phys_to_virt(phys_addr);
+   struct iommu_cmd *cmd = iommu_phys_to_virt(phys_addr);
int i;
 
for (i = 0; i < 4; ++i)
@@ -865,11 +865,13 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu,
 
 static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
 {
+   u64 paddr = iommu_virt_to_phys((void *)address);
+
WARN_ON(address & 0x7ULL);
 
memset(cmd, 0, sizeof(*cmd));
-   cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
-   cmd->data[1] = upper_32_bits(__pa(address));
+   cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK;
+   cmd->data[1] = upper_32_bits(paddr);
cmd->data[2] = 1;
CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
 }
@@ -1328,7 +1330,7 @@ static bool increase_address_space(struct 
protection_domain *domain,
return false;
 
*pte = PM_LEVEL_PDE(domain->mode,
-   virt_to_phys(domain->pt_root));
+   iommu_virt_to_phys(domain->pt_root));
domain->pt_root  = pte;
domain->mode+= 1;
domain->updated  = true;
@@ -1365,7 +1367,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
if (!page)
return NULL;
 
-   __npte = PM_LEVEL_PDE(level, virt_to_phys(page));
+   __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
 
/* pte could have been changed somewhere. */
if (cmpxchg64(pte, __pte, __npte) != __pte) {
@@ -1481,10 +1483,10 @@ static int iommu_map_page(struct protection_domain *dom,
return -EBUSY;
 
if (count > 1) {
-   __pte = PAGE_SIZE_PTE(phys_addr, page_size);
+   __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
__pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
} else
-   __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte = __sme_set(phys_addr) | IOMMU_PTE_P | IOMMU_PTE_FC;
 
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
@@ -1700,7 +1702,7 @@ static void free_gcr3_tbl_level1(u64 *tbl)
if (!(tbl[i] & GCR3_VALID))
continue;
 
-   ptr = __va(tbl[i] & PAGE_MASK);
+   ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK);
 
free_page((unsigned long)ptr);
}
@@ -1715,7 +1717,7 @@ static void free_gcr3_tbl_level2(u64 *tbl)
if (!(tbl[i] & GCR3_VALID))
continue;
 
-   ptr = __va(tbl[i] & PAGE_MASK);
+   ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK);
 
free_gcr3_tbl_level1(ptr);
}
@@ -1807,7 +1809,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
u64 flags = 0;
 
if (domain->mode != PAGE_MODE_NONE)
-   pte_root = virt_to_phys(domain->pt_root);
+   pte_root = iommu_virt_to_phys(domain->pt_root);
 
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
@@ -1819,7 +1821,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
flags |= DTE_FLAG_IOTLB;
 
if (domain->flags & PD_IOMMUV2_MASK) {
-   u64 gcr3 = __pa(domain->gcr3_tbl);
+   u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl);
u64 glx  = domain->glx;
u64 tmp;
 
@@ -3470,10 +3472,10 @@ static u64 *__get_gcr3_pte(u64 *root, int level, int 
pasid, bool alloc)
if (root == NULL)
return NULL;
 
-

[PATCH v7 28/36] x86, realmode: Check for memory encryption on the APs

2017-06-16 Thread Tom Lendacky

Add support to check if memory encryption is active in the kernel and that
it has been enabled on the AP. If memory encryption is active in the kernel
but has not been enabled on the AP, then set the memory encryption bit (bit
23) of MSR_K8_SYSCFG to enable memory encryption on that AP and allow the
AP to continue start up.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/realmode.h  |   12 
 arch/x86/realmode/init.c |4 
 arch/x86/realmode/rm/trampoline_64.S |   24 
 3 files changed, 40 insertions(+)

diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 230e190..90d9152 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -1,6 +1,15 @@
 #ifndef _ARCH_X86_REALMODE_H
 #define _ARCH_X86_REALMODE_H
 
+/*
+ * Flag bit definitions for use with the flags field of the trampoline header
+ * in the CONFIG_X86_64 variant.
+ */
+#define TH_FLAGS_SME_ACTIVE_BIT0
+#define TH_FLAGS_SME_ACTIVEBIT(TH_FLAGS_SME_ACTIVE_BIT)
+
+#ifndef __ASSEMBLY__
+
 #include 
 #include 
 
@@ -38,6 +47,7 @@ struct trampoline_header {
u64 start;
u64 efer;
u32 cr4;
+   u32 flags;
 #endif
 };
 
@@ -69,4 +79,6 @@ static inline size_t real_mode_size_needed(void)
 void set_real_mode_mem(phys_addr_t mem, size_t size);
 void reserve_real_mode(void);
 
+#endif /* __ASSEMBLY__ */
+
 #endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index d6ddc7e..1f71980 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -108,6 +108,10 @@ static void __init setup_real_mode(void)
trampoline_cr4_features = _header->cr4;
*trampoline_cr4_features = mmu_cr4_features;
 
+   trampoline_header->flags = 0;
+   if (sme_active())
+   trampoline_header->flags |= TH_FLAGS_SME_ACTIVE;
+
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
trampoline_pgd[511] = init_top_pgt[511].pgd;
diff --git a/arch/x86/realmode/rm/trampoline_64.S 
b/arch/x86/realmode/rm/trampoline_64.S
index dac7b20..614fd70 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "realmode.h"
 
.text
@@ -92,6 +93,28 @@ ENTRY(startup_32)
movl%edx, %fs
movl%edx, %gs
 
+   /*
+* Check for memory encryption support. This is a safety net in
+* case BIOS hasn't done the necessary step of setting the bit in
+* the MSR for this AP. If SME is active and we've gotten this far
+* then it is safe for us to set the MSR bit and continue. If we
+* don't we'll eventually crash trying to execute encrypted
+* instructions.
+*/
+   bt  $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
+   jnc .Ldone
+   movl$MSR_K8_SYSCFG, %ecx
+   rdmsr
+   bts $MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax
+   jc  .Ldone
+
+   /*
+* Memory encryption is enabled but the SME enable bit for this
+* CPU has has not been set.  It is safe to set it, so do so.
+*/
+   wrmsr
+.Ldone:
+
movlpa_tr_cr4, %eax
movl%eax, %cr4  # Enable PAE mode
 
@@ -147,6 +170,7 @@ GLOBAL(trampoline_header)
tr_start:   .space  8
GLOBAL(tr_efer) .space  8
GLOBAL(tr_cr4)  .space  4
+   GLOBAL(tr_flags).space  4
 END(trampoline_header)
 
 #include "trampoline_common.S"

[PATCH v7 27/36] iommu/amd: Allow the AMD IOMMU to work with memory encryption

2017-06-16 Thread Tom Lendacky

The IOMMU is programmed with physical addresses for the various tables
and buffers that are used to communicate between the device and the
driver. When the driver allocates this memory it is encrypted. In order
for the IOMMU to access the memory as encrypted the encryption mask needs
to be included in these physical addresses during configuration.

The PTE entries created by the IOMMU should also include the encryption
mask so that when the device behind the IOMMU performs a DMA, the DMA
will be performed to encrypted memory.

Signed-off-by: Tom Lendacky 
---
 drivers/iommu/amd_iommu.c   |   30 --
 drivers/iommu/amd_iommu_init.c  |   34 --
 drivers/iommu/amd_iommu_proto.h |   10 ++
 drivers/iommu/amd_iommu_types.h |2 +-
 4 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 63cacf5..912008c 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -544,7 +544,7 @@ static void dump_dte_entry(u16 devid)
 
 static void dump_command(unsigned long phys_addr)
 {
-   struct iommu_cmd *cmd = phys_to_virt(phys_addr);
+   struct iommu_cmd *cmd = iommu_phys_to_virt(phys_addr);
int i;
 
for (i = 0; i < 4; ++i)
@@ -865,11 +865,13 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu,
 
 static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
 {
+   u64 paddr = iommu_virt_to_phys((void *)address);
+
WARN_ON(address & 0x7ULL);
 
memset(cmd, 0, sizeof(*cmd));
-   cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
-   cmd->data[1] = upper_32_bits(__pa(address));
+   cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK;
+   cmd->data[1] = upper_32_bits(paddr);
cmd->data[2] = 1;
CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
 }
@@ -1328,7 +1330,7 @@ static bool increase_address_space(struct 
protection_domain *domain,
return false;
 
*pte = PM_LEVEL_PDE(domain->mode,
-   virt_to_phys(domain->pt_root));
+   iommu_virt_to_phys(domain->pt_root));
domain->pt_root  = pte;
domain->mode+= 1;
domain->updated  = true;
@@ -1365,7 +1367,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
if (!page)
return NULL;
 
-   __npte = PM_LEVEL_PDE(level, virt_to_phys(page));
+   __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
 
/* pte could have been changed somewhere. */
if (cmpxchg64(pte, __pte, __npte) != __pte) {
@@ -1481,10 +1483,10 @@ static int iommu_map_page(struct protection_domain *dom,
return -EBUSY;
 
if (count > 1) {
-   __pte = PAGE_SIZE_PTE(phys_addr, page_size);
+   __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
__pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
} else
-   __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte = __sme_set(phys_addr) | IOMMU_PTE_P | IOMMU_PTE_FC;
 
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
@@ -1700,7 +1702,7 @@ static void free_gcr3_tbl_level1(u64 *tbl)
if (!(tbl[i] & GCR3_VALID))
continue;
 
-   ptr = __va(tbl[i] & PAGE_MASK);
+   ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK);
 
free_page((unsigned long)ptr);
}
@@ -1715,7 +1717,7 @@ static void free_gcr3_tbl_level2(u64 *tbl)
if (!(tbl[i] & GCR3_VALID))
continue;
 
-   ptr = __va(tbl[i] & PAGE_MASK);
+   ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK);
 
free_gcr3_tbl_level1(ptr);
}
@@ -1807,7 +1809,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
u64 flags = 0;
 
if (domain->mode != PAGE_MODE_NONE)
-   pte_root = virt_to_phys(domain->pt_root);
+   pte_root = iommu_virt_to_phys(domain->pt_root);
 
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
@@ -1819,7 +1821,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
flags |= DTE_FLAG_IOTLB;
 
if (domain->flags & PD_IOMMUV2_MASK) {
-   u64 gcr3 = __pa(domain->gcr3_tbl);
+   u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl);
u64 glx  = domain->glx;
u64 tmp;
 
@@ -3470,10 +3472,10 @@ static u64 *__get_gcr3_pte(u64 *root, int level, int 
pasid, bool alloc)
if (root == NULL)
return NULL;
 
-   *pte =

[PATCH v7 25/36] swiotlb: Add warnings for use of bounce buffers with SME

2017-06-16 Thread Tom Lendacky

Add warnings to let the user know when bounce buffers are being used for
DMA when SME is active.  Since the bounce buffers are not in encrypted
memory, these notifications are to allow the user to determine some
appropriate action - if necessary.  Actions can range from utilizing an
IOMMU, replacing the device with another device that can support 64-bit
DMA, ignoring the message if the device isn't used much, etc.

Signed-off-by: Tom Lendacky 
---
 include/linux/dma-mapping.h |   11 +++
 include/linux/mem_encrypt.h |8 
 lib/swiotlb.c   |3 +++
 3 files changed, 22 insertions(+)

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 4f3eece..ee2307e 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /**
  * List of possible attributes associated with a DMA mapping. The semantics
@@ -577,6 +578,11 @@ static inline int dma_set_mask(struct device *dev, u64 
mask)
 
if (!dev->dma_mask || !dma_supported(dev, mask))
return -EIO;
+
+   /* Since mask is unsigned, this can only be true if SME is active */
+   if (mask < sme_dma_mask())
+   dev_warn(dev, "SME is active, device will require DMA bounce 
buffers\n");
+
*dev->dma_mask = mask;
return 0;
 }
@@ -596,6 +602,11 @@ static inline int dma_set_coherent_mask(struct device 
*dev, u64 mask)
 {
if (!dma_supported(dev, mask))
return -EIO;
+
+   /* Since mask is unsigned, this can only be true if SME is active */
+   if (mask < sme_dma_mask())
+   dev_warn(dev, "SME is active, device will require DMA bounce 
buffers\n");
+
dev->coherent_dma_mask = mask;
return 0;
 }
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
index 837c66b..2168002 100644
--- a/include/linux/mem_encrypt.h
+++ b/include/linux/mem_encrypt.h
@@ -30,6 +30,14 @@ static inline bool sme_active(void)
return !!sme_me_mask;
 }
 
+static inline u64 sme_dma_mask(void)
+{
+   if (!sme_me_mask)
+   return 0ULL;
+
+   return ((u64)sme_me_mask << 1) - 1;
+}
+
 /*
  * The __sme_set() and __sme_clr() macros are useful for adding or removing
  * the encryption mask from a value (e.g. when dealing with pagetable
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 04ac91a..8c6c83e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -507,6 +507,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
if (no_iotlb_memory)
panic("Can not allocate SWIOTLB buffer earlier and can't now 
provide you with the DMA bounce buffer");
 
+   if (sme_active())
+   pr_warn_once("SME is active and system is using DMA bounce 
buffers\n");
+
mask = dma_get_seg_boundary(hwdev);
 
tbl_dma_addr &= mask;

[PATCH v7 25/36] swiotlb: Add warnings for use of bounce buffers with SME

2017-06-16 Thread Tom Lendacky

Add warnings to let the user know when bounce buffers are being used for
DMA when SME is active.  Since the bounce buffers are not in encrypted
memory, these notifications are to allow the user to determine some
appropriate action - if necessary.  Actions can range from utilizing an
IOMMU, replacing the device with another device that can support 64-bit
DMA, ignoring the message if the device isn't used much, etc.

Signed-off-by: Tom Lendacky 
---
 include/linux/dma-mapping.h |   11 +++
 include/linux/mem_encrypt.h |8 
 lib/swiotlb.c   |3 +++
 3 files changed, 22 insertions(+)

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 4f3eece..ee2307e 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /**
  * List of possible attributes associated with a DMA mapping. The semantics
@@ -577,6 +578,11 @@ static inline int dma_set_mask(struct device *dev, u64 
mask)
 
if (!dev->dma_mask || !dma_supported(dev, mask))
return -EIO;
+
+   /* Since mask is unsigned, this can only be true if SME is active */
+   if (mask < sme_dma_mask())
+   dev_warn(dev, "SME is active, device will require DMA bounce 
buffers\n");
+
*dev->dma_mask = mask;
return 0;
 }
@@ -596,6 +602,11 @@ static inline int dma_set_coherent_mask(struct device 
*dev, u64 mask)
 {
if (!dma_supported(dev, mask))
return -EIO;
+
+   /* Since mask is unsigned, this can only be true if SME is active */
+   if (mask < sme_dma_mask())
+   dev_warn(dev, "SME is active, device will require DMA bounce 
buffers\n");
+
dev->coherent_dma_mask = mask;
return 0;
 }
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
index 837c66b..2168002 100644
--- a/include/linux/mem_encrypt.h
+++ b/include/linux/mem_encrypt.h
@@ -30,6 +30,14 @@ static inline bool sme_active(void)
return !!sme_me_mask;
 }
 
+static inline u64 sme_dma_mask(void)
+{
+   if (!sme_me_mask)
+   return 0ULL;
+
+   return ((u64)sme_me_mask << 1) - 1;
+}
+
 /*
  * The __sme_set() and __sme_clr() macros are useful for adding or removing
  * the encryption mask from a value (e.g. when dealing with pagetable
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 04ac91a..8c6c83e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -507,6 +507,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
if (no_iotlb_memory)
panic("Can not allocate SWIOTLB buffer earlier and can't now 
provide you with the DMA bounce buffer");
 
+   if (sme_active())
+   pr_warn_once("SME is active and system is using DMA bounce 
buffers\n");
+
mask = dma_get_seg_boundary(hwdev);
 
tbl_dma_addr &= mask;

[PATCH v7 24/36] x86, swiotlb: Add memory encryption support

2017-06-16 Thread Tom Lendacky

Since DMA addresses will effectively look like 48-bit addresses when the
memory encryption mask is set, SWIOTLB is needed if the DMA mask of the
device performing the DMA does not support 48-bits. SWIOTLB will be
initialized to create decrypted bounce buffers for use by these devices.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/dma-mapping.h |5 ++-
 arch/x86/include/asm/mem_encrypt.h |5 +++
 arch/x86/kernel/pci-dma.c  |   11 +--
 arch/x86/kernel/pci-nommu.c|2 +
 arch/x86/kernel/pci-swiotlb.c  |   15 +-
 arch/x86/mm/mem_encrypt.c  |   22 +++
 include/linux/swiotlb.h|1 +
 init/main.c|   10 +++
 lib/swiotlb.c  |   54 +++-
 9 files changed, 108 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/dma-mapping.h 
b/arch/x86/include/asm/dma-mapping.h
index 08a0838..191f9a5 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_ISA
 # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24)
@@ -62,12 +63,12 @@ static inline bool dma_capable(struct device *dev, 
dma_addr_t addr, size_t size)
 
 static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-   return paddr;
+   return __sme_set(paddr);
 }
 
 static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
-   return daddr;
+   return __sme_clr(daddr);
 }
 #endif /* CONFIG_X86_DMA_REMAP */
 
diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index 3ffa5fa..af835cf 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -33,6 +33,11 @@ void __init sme_early_decrypt(resource_size_t paddr,
 
 void __init sme_enable(void);
 
+/* Architecture __weak replacement functions */
+void __init mem_encrypt_init(void);
+
+void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
+
 #else  /* !CONFIG_AMD_MEM_ENCRYPT */
 
 #define sme_me_mask0UL
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 3a216ec..72d96d4 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -93,9 +93,12 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t 
size,
if (gfpflags_allow_blocking(flag)) {
page = dma_alloc_from_contiguous(dev, count, get_order(size),
 flag);
-   if (page && page_to_phys(page) + size > dma_mask) {
-   dma_release_from_contiguous(dev, page, count);
-   page = NULL;
+   if (page) {
+   addr = phys_to_dma(dev, page_to_phys(page));
+   if (addr + size > dma_mask) {
+   dma_release_from_contiguous(dev, page, count);
+   page = NULL;
+   }
}
}
/* fallback */
@@ -104,7 +107,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t 
size,
if (!page)
return NULL;
 
-   addr = page_to_phys(page);
+   addr = phys_to_dma(dev, page_to_phys(page));
if (addr + size > dma_mask) {
__free_pages(page, get_order(size));
 
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index a88952e..98b576a 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -30,7 +30,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct 
page *page,
 enum dma_data_direction dir,
 unsigned long attrs)
 {
-   dma_addr_t bus = page_to_phys(page) + offset;
+   dma_addr_t bus = phys_to_dma(dev, page_to_phys(page)) + offset;
WARN_ON(size == 0);
if (!check_addr("map_single", dev, bus, size))
return DMA_ERROR_CODE;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 1e23577..6770775 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -6,12 +6,14 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
 #include 
 #include 
 #include 
+
 int swiotlb __read_mostly;
 
 void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -79,8 +81,8 @@ int __init pci_swiotlb_detect_override(void)
  pci_swiotlb_late_init);
 
 /*
- * if 4GB or more detected (and iommu=off not set) return 1
- * and set swiotlb to 1.
+ * If 4GB or more detected (and iommu=off not set) or if SME is active
+ * then set swiotlb to 1 and return 1.
  */
 int __init pci_swiotlb_detect_4gb(void)
 {
@@ -89,6 +91,15 @@ int __init pci_swiotlb_detect_4gb(void)
if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
swiotlb = 1;
 #endif
+
+   /*
+* If SME is

[PATCH v7 24/36] x86, swiotlb: Add memory encryption support

2017-06-16 Thread Tom Lendacky

Since DMA addresses will effectively look like 48-bit addresses when the
memory encryption mask is set, SWIOTLB is needed if the DMA mask of the
device performing the DMA does not support 48-bits. SWIOTLB will be
initialized to create decrypted bounce buffers for use by these devices.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/dma-mapping.h |5 ++-
 arch/x86/include/asm/mem_encrypt.h |5 +++
 arch/x86/kernel/pci-dma.c  |   11 +--
 arch/x86/kernel/pci-nommu.c|2 +
 arch/x86/kernel/pci-swiotlb.c  |   15 +-
 arch/x86/mm/mem_encrypt.c  |   22 +++
 include/linux/swiotlb.h|1 +
 init/main.c|   10 +++
 lib/swiotlb.c  |   54 +++-
 9 files changed, 108 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/dma-mapping.h 
b/arch/x86/include/asm/dma-mapping.h
index 08a0838..191f9a5 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_ISA
 # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24)
@@ -62,12 +63,12 @@ static inline bool dma_capable(struct device *dev, 
dma_addr_t addr, size_t size)
 
 static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-   return paddr;
+   return __sme_set(paddr);
 }
 
 static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
-   return daddr;
+   return __sme_clr(daddr);
 }
 #endif /* CONFIG_X86_DMA_REMAP */
 
diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index 3ffa5fa..af835cf 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -33,6 +33,11 @@ void __init sme_early_decrypt(resource_size_t paddr,
 
 void __init sme_enable(void);
 
+/* Architecture __weak replacement functions */
+void __init mem_encrypt_init(void);
+
+void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
+
 #else  /* !CONFIG_AMD_MEM_ENCRYPT */
 
 #define sme_me_mask0UL
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 3a216ec..72d96d4 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -93,9 +93,12 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t 
size,
if (gfpflags_allow_blocking(flag)) {
page = dma_alloc_from_contiguous(dev, count, get_order(size),
 flag);
-   if (page && page_to_phys(page) + size > dma_mask) {
-   dma_release_from_contiguous(dev, page, count);
-   page = NULL;
+   if (page) {
+   addr = phys_to_dma(dev, page_to_phys(page));
+   if (addr + size > dma_mask) {
+   dma_release_from_contiguous(dev, page, count);
+   page = NULL;
+   }
}
}
/* fallback */
@@ -104,7 +107,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t 
size,
if (!page)
return NULL;
 
-   addr = page_to_phys(page);
+   addr = phys_to_dma(dev, page_to_phys(page));
if (addr + size > dma_mask) {
__free_pages(page, get_order(size));
 
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index a88952e..98b576a 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -30,7 +30,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct 
page *page,
 enum dma_data_direction dir,
 unsigned long attrs)
 {
-   dma_addr_t bus = page_to_phys(page) + offset;
+   dma_addr_t bus = phys_to_dma(dev, page_to_phys(page)) + offset;
WARN_ON(size == 0);
if (!check_addr("map_single", dev, bus, size))
return DMA_ERROR_CODE;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 1e23577..6770775 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -6,12 +6,14 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
 #include 
 #include 
 #include 
+
 int swiotlb __read_mostly;
 
 void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -79,8 +81,8 @@ int __init pci_swiotlb_detect_override(void)
  pci_swiotlb_late_init);
 
 /*
- * if 4GB or more detected (and iommu=off not set) return 1
- * and set swiotlb to 1.
+ * If 4GB or more detected (and iommu=off not set) or if SME is active
+ * then set swiotlb to 1 and return 1.
  */
 int __init pci_swiotlb_detect_4gb(void)
 {
@@ -89,6 +91,15 @@ int __init pci_swiotlb_detect_4gb(void)
if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
swiotlb = 1;
 #endif
+
+   /*
+* If SME is active then swiotlb will be

[PATCH v7 22/36] x86/mm: Add support for changing the memory encryption attribute

2017-06-16 Thread Tom Lendacky

Add support for changing the memory encryption attribute for one or more
memory pages. This will be useful when we have to change the AP trampoline
area to not be encrypted. Or when we need to change the SWIOTLB area to
not be encrypted in support of devices that can't support the encryption
mask range.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/set_memory.h |3 ++
 arch/x86/mm/pageattr.c|   62 +
 2 files changed, 65 insertions(+)

diff --git a/arch/x86/include/asm/set_memory.h 
b/arch/x86/include/asm/set_memory.h
index eaec6c3..cd71273 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -11,6 +11,7 @@
  * Executability : eXeutable, NoteXecutable
  * Read/Write: ReadOnly, ReadWrite
  * Presence  : NotPresent
+ * Encryption: Encrypted, Decrypted
  *
  * Within a category, the attributes are mutually exclusive.
  *
@@ -42,6 +43,8 @@
 int set_memory_wb(unsigned long addr, int numpages);
 int set_memory_np(unsigned long addr, int numpages);
 int set_memory_4k(unsigned long addr, int numpages);
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
 
 int set_memory_array_uc(unsigned long *addr, int addrinarray);
 int set_memory_array_wc(unsigned long *addr, int addrinarray);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e7d3866..d9e09fb 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1769,6 +1769,68 @@ int set_memory_4k(unsigned long addr, int numpages)
__pgprot(0), 1, 0, NULL);
 }
 
+static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
+{
+   struct cpa_data cpa;
+   unsigned long start;
+   int ret;
+
+   /* Nothing to do if the SME is not active */
+   if (!sme_active())
+   return 0;
+
+   /* Should not be working on unaligned addresses */
+   if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr))
+   addr &= PAGE_MASK;
+
+   start = addr;
+
+   memset(, 0, sizeof(cpa));
+   cpa.vaddr = 
+   cpa.numpages = numpages;
+   cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
+   cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
+   cpa.pgd = init_mm.pgd;
+
+   /* Must avoid aliasing mappings in the highmem code */
+   kmap_flush_unused();
+   vm_unmap_aliases();
+
+   /*
+* Before changing the encryption attribute, we need to flush caches.
+*/
+   if (static_cpu_has(X86_FEATURE_CLFLUSH))
+   cpa_flush_range(start, numpages, 1);
+   else
+   cpa_flush_all(1);
+
+   ret = __change_page_attr_set_clr(, 1);
+
+   /*
+* After changing the encryption attribute, we need to flush TLBs
+* again in case any speculative TLB caching occurred (but no need
+* to flush caches again).  We could just use cpa_flush_all(), but
+* in case TLB flushing gets optimized in the cpa_flush_range()
+* path use the same logic as above.
+*/
+   if (static_cpu_has(X86_FEATURE_CLFLUSH))
+   cpa_flush_range(start, numpages, 0);
+   else
+   cpa_flush_all(0);
+
+   return ret;
+}
+
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+   return __set_memory_enc_dec(addr, numpages, true);
+}
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+   return __set_memory_enc_dec(addr, numpages, false);
+}
+
 int set_pages_uc(struct page *page, int numpages)
 {
unsigned long addr = (unsigned long)page_address(page);

[PATCH v7 22/36] x86/mm: Add support for changing the memory encryption attribute

2017-06-16 Thread Tom Lendacky

Add support for changing the memory encryption attribute for one or more
memory pages. This will be useful when we have to change the AP trampoline
area to not be encrypted. Or when we need to change the SWIOTLB area to
not be encrypted in support of devices that can't support the encryption
mask range.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/set_memory.h |3 ++
 arch/x86/mm/pageattr.c|   62 +
 2 files changed, 65 insertions(+)

diff --git a/arch/x86/include/asm/set_memory.h 
b/arch/x86/include/asm/set_memory.h
index eaec6c3..cd71273 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -11,6 +11,7 @@
  * Executability : eXeutable, NoteXecutable
  * Read/Write: ReadOnly, ReadWrite
  * Presence  : NotPresent
+ * Encryption: Encrypted, Decrypted
  *
  * Within a category, the attributes are mutually exclusive.
  *
@@ -42,6 +43,8 @@
 int set_memory_wb(unsigned long addr, int numpages);
 int set_memory_np(unsigned long addr, int numpages);
 int set_memory_4k(unsigned long addr, int numpages);
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
 
 int set_memory_array_uc(unsigned long *addr, int addrinarray);
 int set_memory_array_wc(unsigned long *addr, int addrinarray);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e7d3866..d9e09fb 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1769,6 +1769,68 @@ int set_memory_4k(unsigned long addr, int numpages)
__pgprot(0), 1, 0, NULL);
 }
 
+static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
+{
+   struct cpa_data cpa;
+   unsigned long start;
+   int ret;
+
+   /* Nothing to do if the SME is not active */
+   if (!sme_active())
+   return 0;
+
+   /* Should not be working on unaligned addresses */
+   if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr))
+   addr &= PAGE_MASK;
+
+   start = addr;
+
+   memset(, 0, sizeof(cpa));
+   cpa.vaddr = 
+   cpa.numpages = numpages;
+   cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
+   cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
+   cpa.pgd = init_mm.pgd;
+
+   /* Must avoid aliasing mappings in the highmem code */
+   kmap_flush_unused();
+   vm_unmap_aliases();
+
+   /*
+* Before changing the encryption attribute, we need to flush caches.
+*/
+   if (static_cpu_has(X86_FEATURE_CLFLUSH))
+   cpa_flush_range(start, numpages, 1);
+   else
+   cpa_flush_all(1);
+
+   ret = __change_page_attr_set_clr(, 1);
+
+   /*
+* After changing the encryption attribute, we need to flush TLBs
+* again in case any speculative TLB caching occurred (but no need
+* to flush caches again).  We could just use cpa_flush_all(), but
+* in case TLB flushing gets optimized in the cpa_flush_range()
+* path use the same logic as above.
+*/
+   if (static_cpu_has(X86_FEATURE_CLFLUSH))
+   cpa_flush_range(start, numpages, 0);
+   else
+   cpa_flush_all(0);
+
+   return ret;
+}
+
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+   return __set_memory_enc_dec(addr, numpages, true);
+}
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+   return __set_memory_enc_dec(addr, numpages, false);
+}
+
 int set_pages_uc(struct page *page, int numpages)
 {
unsigned long addr = (unsigned long)page_address(page);

[PATCH v7 21/36] x86/mm: Add support to access persistent memory in the clear

2017-06-16 Thread Tom Lendacky

Persistent memory is expected to persist across reboots. The encryption
key used by SME will change across reboots which will result in corrupted
persistent memory.  Persistent memory is handed out by block devices
through memory remapping functions, so be sure not to map this memory as
encrypted.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/mm/ioremap.c |   31 ++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index f3fa007..0254b78 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -428,17 +428,46 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
  * Examine the physical address to determine if it is an area of memory
  * that should be mapped decrypted.  If the memory is not part of the
  * kernel usable area it was accessed and created decrypted, so these
- * areas should be mapped decrypted.
+ * areas should be mapped decrypted. And since the encryption key can
+ * change across reboots, persistent memory should also be mapped
+ * decrypted.
  */
 static bool memremap_should_map_decrypted(resource_size_t phys_addr,
  unsigned long size)
 {
+   int is_pmem;
+
+   /*
+* Check if the address is part of a persistent memory region.
+* This check covers areas added by E820, EFI and ACPI.
+*/
+   is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
+   IORES_DESC_PERSISTENT_MEMORY);
+   if (is_pmem != REGION_DISJOINT)
+   return true;
+
+   /*
+* Check if the non-volatile attribute is set for an EFI
+* reserved area.
+*/
+   if (efi_enabled(EFI_BOOT)) {
+   switch (efi_mem_type(phys_addr)) {
+   case EFI_RESERVED_TYPE:
+   if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
+   return true;
+   break;
+   default:
+   break;
+   }
+   }
+
/* Check if the address is outside kernel usable area */
switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
case E820_TYPE_RESERVED:
case E820_TYPE_ACPI:
case E820_TYPE_NVS:
case E820_TYPE_UNUSABLE:
+   case E820_TYPE_PRAM:
return true;
default:
break;

[PATCH v7 21/36] x86/mm: Add support to access persistent memory in the clear

2017-06-16 Thread Tom Lendacky

Persistent memory is expected to persist across reboots. The encryption
key used by SME will change across reboots which will result in corrupted
persistent memory.  Persistent memory is handed out by block devices
through memory remapping functions, so be sure not to map this memory as
encrypted.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/mm/ioremap.c |   31 ++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index f3fa007..0254b78 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -428,17 +428,46 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
  * Examine the physical address to determine if it is an area of memory
  * that should be mapped decrypted.  If the memory is not part of the
  * kernel usable area it was accessed and created decrypted, so these
- * areas should be mapped decrypted.
+ * areas should be mapped decrypted. And since the encryption key can
+ * change across reboots, persistent memory should also be mapped
+ * decrypted.
  */
 static bool memremap_should_map_decrypted(resource_size_t phys_addr,
  unsigned long size)
 {
+   int is_pmem;
+
+   /*
+* Check if the address is part of a persistent memory region.
+* This check covers areas added by E820, EFI and ACPI.
+*/
+   is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
+   IORES_DESC_PERSISTENT_MEMORY);
+   if (is_pmem != REGION_DISJOINT)
+   return true;
+
+   /*
+* Check if the non-volatile attribute is set for an EFI
+* reserved area.
+*/
+   if (efi_enabled(EFI_BOOT)) {
+   switch (efi_mem_type(phys_addr)) {
+   case EFI_RESERVED_TYPE:
+   if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
+   return true;
+   break;
+   default:
+   break;
+   }
+   }
+
/* Check if the address is outside kernel usable area */
switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
case E820_TYPE_RESERVED:
case E820_TYPE_ACPI:
case E820_TYPE_NVS:
case E820_TYPE_UNUSABLE:
+   case E820_TYPE_PRAM:
return true;
default:
break;

[PATCH v7 18/36] x86/efi: Update EFI pagetable creation to work with SME

2017-06-16 Thread Tom Lendacky

When SME is active, pagetable entries created for EFI need to have the
encryption mask set as necessary.

When the new pagetable pages are allocated they are mapped encrypted. So,
update the efi_pgt value that will be used in cr3 to include the encryption
mask so that the PGD table can be read successfully. The pagetable mapping
as well as the kernel are also added to the pagetable mapping as encrypted.
All other EFI mappings are mapped decrypted (tables, etc.).

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/platform/efi/efi_64.c |   15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 9bf72f5..12e8388 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -327,7 +327,7 @@ void efi_sync_low_kernel_mappings(void)
 
 int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 {
-   unsigned long pfn, text;
+   unsigned long pfn, text, pf;
struct page *page;
unsigned npages;
pgd_t *pgd;
@@ -335,7 +335,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, 
unsigned num_pages)
if (efi_enabled(EFI_OLD_MEMMAP))
return 0;
 
-   efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
+   /*
+* Since the PGD is encrypted, set the encryption mask so that when
+* this value is loaded into cr3 the PGD will be decrypted during
+* the pagetable walk.
+*/
+   efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd);
pgd = efi_pgd;
 
/*
@@ -345,7 +350,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, 
unsigned num_pages)
 * phys_efi_set_virtual_address_map().
 */
pfn = pa_memmap >> PAGE_SHIFT;
-   if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | 
_PAGE_RW)) {
+   pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC;
+   if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) {
pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
return 1;
}
@@ -388,7 +394,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, 
unsigned num_pages)
text = __pa(_text);
pfn = text >> PAGE_SHIFT;
 
-   if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) {
+   pf = _PAGE_RW | _PAGE_ENC;
+   if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) {
pr_err("Failed to map kernel text 1:1\n");
return 1;
}

[PATCH v7 18/36] x86/efi: Update EFI pagetable creation to work with SME

2017-06-16 Thread Tom Lendacky

When SME is active, pagetable entries created for EFI need to have the
encryption mask set as necessary.

When the new pagetable pages are allocated they are mapped encrypted. So,
update the efi_pgt value that will be used in cr3 to include the encryption
mask so that the PGD table can be read successfully. The pagetable mapping
as well as the kernel are also added to the pagetable mapping as encrypted.
All other EFI mappings are mapped decrypted (tables, etc.).

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/platform/efi/efi_64.c |   15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 9bf72f5..12e8388 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -327,7 +327,7 @@ void efi_sync_low_kernel_mappings(void)
 
 int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 {
-   unsigned long pfn, text;
+   unsigned long pfn, text, pf;
struct page *page;
unsigned npages;
pgd_t *pgd;
@@ -335,7 +335,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, 
unsigned num_pages)
if (efi_enabled(EFI_OLD_MEMMAP))
return 0;
 
-   efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
+   /*
+* Since the PGD is encrypted, set the encryption mask so that when
+* this value is loaded into cr3 the PGD will be decrypted during
+* the pagetable walk.
+*/
+   efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd);
pgd = efi_pgd;
 
/*
@@ -345,7 +350,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, 
unsigned num_pages)
 * phys_efi_set_virtual_address_map().
 */
pfn = pa_memmap >> PAGE_SHIFT;
-   if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | 
_PAGE_RW)) {
+   pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC;
+   if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) {
pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
return 1;
}
@@ -388,7 +394,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, 
unsigned num_pages)
text = __pa(_text);
pfn = text >> PAGE_SHIFT;
 
-   if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) {
+   pf = _PAGE_RW | _PAGE_ENC;
+   if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) {
pr_err("Failed to map kernel text 1:1\n");
return 1;
}

[PATCH v7 17/36] efi: Update efi_mem_type() to return an error rather than 0

2017-06-16 Thread Tom Lendacky

The efi_mem_type() function currently returns a 0, which maps to
EFI_RESERVED_TYPE, if the function is unable to find a memmap entry for
the supplied physical address. Returning EFI_RESERVED_TYPE implies that
a memmap entry exists, when it doesn't.  Instead of returning 0, change
the function to return a negative error value when no memmap entry is
found.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/ia64/kernel/efi.c  |4 ++--
 arch/x86/platform/efi/efi.c |6 +++---
 include/linux/efi.h |2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 1212956..8141600 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -757,14 +757,14 @@ static void __init handle_palo(unsigned long phys_addr)
return 0;
 }
 
-u32
+int
 efi_mem_type (unsigned long phys_addr)
 {
efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
 
if (md)
return md->type;
-   return 0;
+   return -EINVAL;
 }
 
 u64
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f084d87..6217b23 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -1035,12 +1035,12 @@ void __init efi_enter_virtual_mode(void)
 /*
  * Convenience functions to obtain memory types and attributes
  */
-u32 efi_mem_type(unsigned long phys_addr)
+int efi_mem_type(unsigned long phys_addr)
 {
efi_memory_desc_t *md;
 
if (!efi_enabled(EFI_MEMMAP))
-   return 0;
+   return -ENOTSUPP;
 
for_each_efi_memory_desc(md) {
if ((md->phys_addr <= phys_addr) &&
@@ -1048,7 +1048,7 @@ u32 efi_mem_type(unsigned long phys_addr)
  (md->num_pages << EFI_PAGE_SHIFT
return md->type;
}
-   return 0;
+   return -EINVAL;
 }
 
 static int __init arch_parse_efi_cmdline(char *str)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8e24f09..4e47f78 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -985,7 +985,7 @@ static inline void efi_esrt_init(void) { }
 extern int efi_config_parse_tables(void *config_tables, int count, int sz,
   efi_config_table_type_t *arch_tables);
 extern u64 efi_get_iobase (void);
-extern u32 efi_mem_type (unsigned long phys_addr);
+extern int efi_mem_type(unsigned long phys_addr);
 extern u64 efi_mem_attributes (unsigned long phys_addr);
 extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
 extern int __init efi_uart_console_only (void);

[PATCH v7 17/36] efi: Update efi_mem_type() to return an error rather than 0

2017-06-16 Thread Tom Lendacky

The efi_mem_type() function currently returns a 0, which maps to
EFI_RESERVED_TYPE, if the function is unable to find a memmap entry for
the supplied physical address. Returning EFI_RESERVED_TYPE implies that
a memmap entry exists, when it doesn't.  Instead of returning 0, change
the function to return a negative error value when no memmap entry is
found.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/ia64/kernel/efi.c  |4 ++--
 arch/x86/platform/efi/efi.c |6 +++---
 include/linux/efi.h |2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 1212956..8141600 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -757,14 +757,14 @@ static void __init handle_palo(unsigned long phys_addr)
return 0;
 }
 
-u32
+int
 efi_mem_type (unsigned long phys_addr)
 {
efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
 
if (md)
return md->type;
-   return 0;
+   return -EINVAL;
 }
 
 u64
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f084d87..6217b23 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -1035,12 +1035,12 @@ void __init efi_enter_virtual_mode(void)
 /*
  * Convenience functions to obtain memory types and attributes
  */
-u32 efi_mem_type(unsigned long phys_addr)
+int efi_mem_type(unsigned long phys_addr)
 {
efi_memory_desc_t *md;
 
if (!efi_enabled(EFI_MEMMAP))
-   return 0;
+   return -ENOTSUPP;
 
for_each_efi_memory_desc(md) {
if ((md->phys_addr <= phys_addr) &&
@@ -1048,7 +1048,7 @@ u32 efi_mem_type(unsigned long phys_addr)
  (md->num_pages << EFI_PAGE_SHIFT
return md->type;
}
-   return 0;
+   return -EINVAL;
 }
 
 static int __init arch_parse_efi_cmdline(char *str)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8e24f09..4e47f78 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -985,7 +985,7 @@ static inline void efi_esrt_init(void) { }
 extern int efi_config_parse_tables(void *config_tables, int count, int sz,
   efi_config_table_type_t *arch_tables);
 extern u64 efi_get_iobase (void);
-extern u32 efi_mem_type (unsigned long phys_addr);
+extern int efi_mem_type(unsigned long phys_addr);
 extern u64 efi_mem_attributes (unsigned long phys_addr);
 extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
 extern int __init efi_uart_console_only (void);

[PATCH v7 16/36] efi: Add an EFI table address match function

2017-06-16 Thread Tom Lendacky

Add a function that will determine if a supplied physical address matches
the address of an EFI table.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 drivers/firmware/efi/efi.c |   33 +
 include/linux/efi.h|7 +++
 2 files changed, 40 insertions(+)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index b372aad..983675d 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -55,6 +55,25 @@ struct efi __read_mostly efi = {
 };
 EXPORT_SYMBOL(efi);
 
+static unsigned long *efi_tables[] = {
+   ,
+   ,
+   ,
+   ,
+   ,
+   _systab,
+   _info,
+   ,
+   ,
+   _systab,
+   _vendor,
+   ,
+   _table,
+   ,
+   _table,
+   _attr_table,
+};
+
 static bool disable_runtime;
 static int __init setup_noefi(char *arg)
 {
@@ -854,6 +873,20 @@ int efi_status_to_err(efi_status_t status)
return err;
 }
 
+bool efi_is_table_address(unsigned long phys_addr)
+{
+   unsigned int i;
+
+   if (phys_addr == EFI_INVALID_TABLE_ADDR)
+   return false;
+
+   for (i = 0; i < ARRAY_SIZE(efi_tables); i++)
+   if (*(efi_tables[i]) == phys_addr)
+   return true;
+
+   return false;
+}
+
 #ifdef CONFIG_KEXEC
 static int update_efi_random_seed(struct notifier_block *nb,
  unsigned long code, void *unused)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8269bcb..8e24f09 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1091,6 +1091,8 @@ static inline bool efi_enabled(int feature)
return test_bit(feature, ) != 0;
 }
 extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused);
+
+extern bool efi_is_table_address(unsigned long phys_addr);
 #else
 static inline bool efi_enabled(int feature)
 {
@@ -1104,6 +1106,11 @@ static inline bool efi_enabled(int feature)
 {
return false;
 }
+
+static inline bool efi_is_table_address(unsigned long phys_addr)
+{
+   return false;
+}
 #endif
 
 extern int efi_status_to_err(efi_status_t status);

[PATCH v7 15/36] x86/boot/e820: Add support to determine the E820 type of an address

2017-06-16 Thread Tom Lendacky

Add a function that will return the E820 type associated with an address
range.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/e820/api.h |2 ++
 arch/x86/kernel/e820.c  |   26 +++---
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index 8e0f8b8..3641f5f 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -38,6 +38,8 @@
 extern void e820__reallocate_tables(void);
 extern void e820__register_nosave_regions(unsigned long limit_pfn);
 
+extern int  e820__get_entry_type(u64 start, u64 end);
+
 /*
  * Returns true iff the specified range [start,end) is completely contained 
inside
  * the ISA region.
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d78a586..46c9b65 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -84,7 +84,8 @@ bool e820__mapped_any(u64 start, u64 end, enum e820_type type)
  * Note: this function only works correctly once the E820 table is sorted and
  * not-overlapping (at least for the range specified), which is the case 
normally.
  */
-bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type)
+static struct e820_entry *__e820__mapped_all(u64 start, u64 end,
+enum e820_type type)
 {
int i;
 
@@ -110,9 +111,28 @@ bool __init e820__mapped_all(u64 start, u64 end, enum 
e820_type type)
 * coverage of the desired range exists:
 */
if (start >= end)
-   return 1;
+   return entry;
}
-   return 0;
+
+   return NULL;
+}
+
+/*
+ * This function checks if the entire range  is mapped with type.
+ */
+bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type)
+{
+   return __e820__mapped_all(start, end, type);
+}
+
+/*
+ * This function returns the type associated with the range .
+ */
+int e820__get_entry_type(u64 start, u64 end)
+{
+   struct e820_entry *entry = __e820__mapped_all(start, end, 0);
+
+   return entry ? entry->type : -EINVAL;
 }
 
 /*

[PATCH v7 16/36] efi: Add an EFI table address match function

2017-06-16 Thread Tom Lendacky

Add a function that will determine if a supplied physical address matches
the address of an EFI table.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 drivers/firmware/efi/efi.c |   33 +
 include/linux/efi.h|7 +++
 2 files changed, 40 insertions(+)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index b372aad..983675d 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -55,6 +55,25 @@ struct efi __read_mostly efi = {
 };
 EXPORT_SYMBOL(efi);
 
+static unsigned long *efi_tables[] = {
+   ,
+   ,
+   ,
+   ,
+   ,
+   _systab,
+   _info,
+   ,
+   ,
+   _systab,
+   _vendor,
+   ,
+   _table,
+   ,
+   _table,
+   _attr_table,
+};
+
 static bool disable_runtime;
 static int __init setup_noefi(char *arg)
 {
@@ -854,6 +873,20 @@ int efi_status_to_err(efi_status_t status)
return err;
 }
 
+bool efi_is_table_address(unsigned long phys_addr)
+{
+   unsigned int i;
+
+   if (phys_addr == EFI_INVALID_TABLE_ADDR)
+   return false;
+
+   for (i = 0; i < ARRAY_SIZE(efi_tables); i++)
+   if (*(efi_tables[i]) == phys_addr)
+   return true;
+
+   return false;
+}
+
 #ifdef CONFIG_KEXEC
 static int update_efi_random_seed(struct notifier_block *nb,
  unsigned long code, void *unused)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8269bcb..8e24f09 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1091,6 +1091,8 @@ static inline bool efi_enabled(int feature)
return test_bit(feature, ) != 0;
 }
 extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused);
+
+extern bool efi_is_table_address(unsigned long phys_addr);
 #else
 static inline bool efi_enabled(int feature)
 {
@@ -1104,6 +1106,11 @@ static inline bool efi_enabled(int feature)
 {
return false;
 }
+
+static inline bool efi_is_table_address(unsigned long phys_addr)
+{
+   return false;
+}
 #endif
 
 extern int efi_status_to_err(efi_status_t status);

[PATCH v7 15/36] x86/boot/e820: Add support to determine the E820 type of an address

2017-06-16 Thread Tom Lendacky

Add a function that will return the E820 type associated with an address
range.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/e820/api.h |2 ++
 arch/x86/kernel/e820.c  |   26 +++---
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index 8e0f8b8..3641f5f 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -38,6 +38,8 @@
 extern void e820__reallocate_tables(void);
 extern void e820__register_nosave_regions(unsigned long limit_pfn);
 
+extern int  e820__get_entry_type(u64 start, u64 end);
+
 /*
  * Returns true iff the specified range [start,end) is completely contained 
inside
  * the ISA region.
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d78a586..46c9b65 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -84,7 +84,8 @@ bool e820__mapped_any(u64 start, u64 end, enum e820_type type)
  * Note: this function only works correctly once the E820 table is sorted and
  * not-overlapping (at least for the range specified), which is the case 
normally.
  */
-bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type)
+static struct e820_entry *__e820__mapped_all(u64 start, u64 end,
+enum e820_type type)
 {
int i;
 
@@ -110,9 +111,28 @@ bool __init e820__mapped_all(u64 start, u64 end, enum 
e820_type type)
 * coverage of the desired range exists:
 */
if (start >= end)
-   return 1;
+   return entry;
}
-   return 0;
+
+   return NULL;
+}
+
+/*
+ * This function checks if the entire range  is mapped with type.
+ */
+bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type)
+{
+   return __e820__mapped_all(start, end, type);
+}
+
+/*
+ * This function returns the type associated with the range .
+ */
+int e820__get_entry_type(u64 start, u64 end)
+{
+   struct e820_entry *entry = __e820__mapped_all(start, end, 0);
+
+   return entry ? entry->type : -EINVAL;
 }
 
 /*

[PATCH v7 14/36] x86/mm: Insure that boot memory areas are mapped properly

2017-06-16 Thread Tom Lendacky

The boot data and command line data are present in memory in a decrypted
state and are copied early in the boot process.  The early page fault
support will map these areas as encrypted, so before attempting to copy
them, add decrypted mappings so the data is accessed properly when copied.

For the initrd, encrypt this data in place. Since the future mapping of
the initrd area will be mapped as encrypted the data will be accessed
properly.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/mem_encrypt.h |6 +++
 arch/x86/include/asm/pgtable.h |3 ++
 arch/x86/kernel/head64.c   |   30 +--
 arch/x86/kernel/setup.c|9 +
 arch/x86/mm/kasan_init_64.c|2 +
 arch/x86/mm/mem_encrypt.c  |   70 
 6 files changed, 115 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index 6508ec9..3ffa5fa 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -26,6 +26,9 @@ void __init sme_early_encrypt(resource_size_t paddr,
 void __init sme_early_decrypt(resource_size_t paddr,
  unsigned long size);
 
+void __init sme_map_bootdata(char *real_mode_data);
+void __init sme_unmap_bootdata(char *real_mode_data);
+
 void __init sme_early_init(void);
 
 void __init sme_enable(void);
@@ -39,6 +42,9 @@ static inline void __init sme_early_encrypt(resource_size_t 
paddr,
 static inline void __init sme_early_decrypt(resource_size_t paddr,
unsigned long size) { }
 
+static inline void __init sme_map_bootdata(char *real_mode_data) { }
+static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
+
 static inline void __init sme_early_init(void) { }
 
 static inline void __init sme_enable(void) { }
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index c6452cb..bbeae4a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -23,6 +23,9 @@
 #ifndef __ASSEMBLY__
 #include 
 
+extern pgd_t early_top_pgt[PTRS_PER_PGD];
+int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
+
 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
 void ptdump_walk_pgd_level_checkwx(void);
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5d7363a..9e94ed2 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -34,7 +34,6 @@
 /*
  * Manage page tables very early on.
  */
-extern pgd_t early_top_pgt[PTRS_PER_PGD];
 extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
 static unsigned int __initdata next_early_pgt;
 pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
@@ -161,13 +160,13 @@ static void __init reset_early_page_tables(void)
 }
 
 /* Create a new PMD entry */
-int __init early_make_pgtable(unsigned long address)
+int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
 {
unsigned long physaddr = address - __PAGE_OFFSET;
pgdval_t pgd, *pgd_p;
p4dval_t p4d, *p4d_p;
pudval_t pud, *pud_p;
-   pmdval_t pmd, *pmd_p;
+   pmdval_t *pmd_p;
 
/* Invalid address or early pgt is done ?  */
if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt))
@@ -226,12 +225,21 @@ int __init early_make_pgtable(unsigned long address)
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + 
_KERNPG_TABLE;
}
-   pmd = (physaddr & PMD_MASK) + early_pmd_flags;
pmd_p[pmd_index(address)] = pmd;
 
return 0;
 }
 
+int __init early_make_pgtable(unsigned long address)
+{
+   unsigned long physaddr = address - __PAGE_OFFSET;
+   pmdval_t pmd;
+
+   pmd = (physaddr & PMD_MASK) + early_pmd_flags;
+
+   return __early_make_pgtable(address, pmd);
+}
+
 /* Don't add a printk in there. printk relies on the PDA which is not 
initialized 
yet. */
 static void __init clear_bss(void)
@@ -254,6 +262,12 @@ static void __init copy_bootdata(char *real_mode_data)
char * command_line;
unsigned long cmd_line_ptr;
 
+   /*
+* If SME is active, this will create decrypted mappings of the
+* boot data in advance of the copy operations.
+*/
+   sme_map_bootdata(real_mode_data);
+
memcpy(_params, real_mode_data, sizeof boot_params);
sanitize_boot_params(_params);
cmd_line_ptr = get_cmd_line_ptr();
@@ -261,6 +275,14 @@ static void __init copy_bootdata(char *real_mode_data)
command_line = __va(cmd_line_ptr);
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
}
+
+   /*
+* The old boot data is no longer needed and won't be reserved,
+* freeing up that memory for use by the system. If SME is

[PATCH v7 14/36] x86/mm: Insure that boot memory areas are mapped properly

2017-06-16 Thread Tom Lendacky

The boot data and command line data are present in memory in a decrypted
state and are copied early in the boot process.  The early page fault
support will map these areas as encrypted, so before attempting to copy
them, add decrypted mappings so the data is accessed properly when copied.

For the initrd, encrypt this data in place. Since the future mapping of
the initrd area will be mapped as encrypted the data will be accessed
properly.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/mem_encrypt.h |6 +++
 arch/x86/include/asm/pgtable.h |3 ++
 arch/x86/kernel/head64.c   |   30 +--
 arch/x86/kernel/setup.c|9 +
 arch/x86/mm/kasan_init_64.c|2 +
 arch/x86/mm/mem_encrypt.c  |   70 
 6 files changed, 115 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index 6508ec9..3ffa5fa 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -26,6 +26,9 @@ void __init sme_early_encrypt(resource_size_t paddr,
 void __init sme_early_decrypt(resource_size_t paddr,
  unsigned long size);
 
+void __init sme_map_bootdata(char *real_mode_data);
+void __init sme_unmap_bootdata(char *real_mode_data);
+
 void __init sme_early_init(void);
 
 void __init sme_enable(void);
@@ -39,6 +42,9 @@ static inline void __init sme_early_encrypt(resource_size_t 
paddr,
 static inline void __init sme_early_decrypt(resource_size_t paddr,
unsigned long size) { }
 
+static inline void __init sme_map_bootdata(char *real_mode_data) { }
+static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
+
 static inline void __init sme_early_init(void) { }
 
 static inline void __init sme_enable(void) { }
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index c6452cb..bbeae4a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -23,6 +23,9 @@
 #ifndef __ASSEMBLY__
 #include 
 
+extern pgd_t early_top_pgt[PTRS_PER_PGD];
+int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
+
 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
 void ptdump_walk_pgd_level_checkwx(void);
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5d7363a..9e94ed2 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -34,7 +34,6 @@
 /*
  * Manage page tables very early on.
  */
-extern pgd_t early_top_pgt[PTRS_PER_PGD];
 extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
 static unsigned int __initdata next_early_pgt;
 pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
@@ -161,13 +160,13 @@ static void __init reset_early_page_tables(void)
 }
 
 /* Create a new PMD entry */
-int __init early_make_pgtable(unsigned long address)
+int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
 {
unsigned long physaddr = address - __PAGE_OFFSET;
pgdval_t pgd, *pgd_p;
p4dval_t p4d, *p4d_p;
pudval_t pud, *pud_p;
-   pmdval_t pmd, *pmd_p;
+   pmdval_t *pmd_p;
 
/* Invalid address or early pgt is done ?  */
if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt))
@@ -226,12 +225,21 @@ int __init early_make_pgtable(unsigned long address)
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + 
_KERNPG_TABLE;
}
-   pmd = (physaddr & PMD_MASK) + early_pmd_flags;
pmd_p[pmd_index(address)] = pmd;
 
return 0;
 }
 
+int __init early_make_pgtable(unsigned long address)
+{
+   unsigned long physaddr = address - __PAGE_OFFSET;
+   pmdval_t pmd;
+
+   pmd = (physaddr & PMD_MASK) + early_pmd_flags;
+
+   return __early_make_pgtable(address, pmd);
+}
+
 /* Don't add a printk in there. printk relies on the PDA which is not 
initialized 
yet. */
 static void __init clear_bss(void)
@@ -254,6 +262,12 @@ static void __init copy_bootdata(char *real_mode_data)
char * command_line;
unsigned long cmd_line_ptr;
 
+   /*
+* If SME is active, this will create decrypted mappings of the
+* boot data in advance of the copy operations.
+*/
+   sme_map_bootdata(real_mode_data);
+
memcpy(_params, real_mode_data, sizeof boot_params);
sanitize_boot_params(_params);
cmd_line_ptr = get_cmd_line_ptr();
@@ -261,6 +275,14 @@ static void __init copy_bootdata(char *real_mode_data)
command_line = __va(cmd_line_ptr);
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
}
+
+   /*
+* The old boot data is no longer needed and won't be reserved,
+* freeing up that memory for use by the system. If SME is active,
+* we need to

[PATCH v7 11/36] x86/mm: Add SME support for read_cr3_pa()

2017-06-16 Thread Tom Lendacky

The cr3 register entry can contain the SME encryption mask that indicates
the PGD is encrypted.  The encryption mask should not be used when
creating a virtual address from the cr3 register, so remove the SME
encryption mask in the read_cr3_pa() function.

During early boot SME will need to use a native version of read_cr3_pa(),
so create native_read_cr3_pa().

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/processor-flags.h |3 ++-
 arch/x86/include/asm/processor.h   |5 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/processor-flags.h 
b/arch/x86/include/asm/processor-flags.h
index 79aa2f9..cb6999c 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_PROCESSOR_FLAGS_H
 
 #include 
+#include 
 
 #ifdef CONFIG_VM86
 #define X86_VM_MASKX86_EFLAGS_VM
@@ -33,7 +34,7 @@
  */
 #ifdef CONFIG_X86_64
 /* Mask off the address space ID bits. */
-#define CR3_ADDR_MASK 0x7000ull
+#define CR3_ADDR_MASK __sme_clr(0x7000ull)
 #define CR3_PCID_MASK 0xFFFull
 #else
 /*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 8010c97..ab878bd 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -240,6 +240,11 @@ static inline unsigned long read_cr3_pa(void)
return __read_cr3() & CR3_ADDR_MASK;
 }
 
+static inline unsigned long native_read_cr3_pa(void)
+{
+   return __native_read_cr3() & CR3_ADDR_MASK;
+}
+
 static inline void load_cr3(pgd_t *pgdir)
 {
write_cr3(__sme_pa(pgdir));

[PATCH v7 12/36] x86/mm: Extend early_memremap() support with additional attrs

2017-06-16 Thread Tom Lendacky

Add early_memremap() support to be able to specify encrypted and
decrypted mappings with and without write-protection. The use of
write-protection is necessary when encrypting data "in place". The
write-protect attribute is considered cacheable for loads, but not
stores. This implies that the hardware will never give the core a
dirty line with this memtype.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/Kconfig |4 +++
 arch/x86/include/asm/fixmap.h|   13 ++
 arch/x86/include/asm/pgtable_types.h |8 ++
 arch/x86/mm/ioremap.c|   44 ++
 include/asm-generic/early_ioremap.h  |2 ++
 mm/early_ioremap.c   |   10 
 6 files changed, 81 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cf74791..0b09b88 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1433,6 +1433,10 @@ config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
  If set to N, then the encryption of system memory can be
  activated with the mem_encrypt=on command line option.
 
+config ARCH_USE_MEMREMAP_PROT
+   def_bool y
+   depends on AMD_MEM_ENCRYPT
+
 # Common NUMA Features
 config NUMA
bool "Numa Memory Allocation and Scheduler Support"
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index d9ff226..dcd9fb5 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -164,6 +164,19 @@ static inline void __set_fixmap(enum fixed_addresses idx,
  */
 #define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_IO_NOCACHE
 
+/*
+ * Early memremap routines used for in-place encryption. The mappings created
+ * by these routines are intended to be used as temporary mappings.
+ */
+void __init *early_memremap_encrypted(resource_size_t phys_addr,
+ unsigned long size);
+void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
+unsigned long size);
+void __init *early_memremap_decrypted(resource_size_t phys_addr,
+ unsigned long size);
+void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
+unsigned long size);
+
 #include 
 
 #define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags)
diff --git a/arch/x86/include/asm/pgtable_types.h 
b/arch/x86/include/asm/pgtable_types.h
index de32ca3..32095af 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -161,6 +161,7 @@ enum page_cache_mode {
 
 #define _PAGE_CACHE_MASK   (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
 #define _PAGE_NOCACHE  (cachemode2protval(_PAGE_CACHE_MODE_UC))
+#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
 
 #define PAGE_NONE  __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
 #define PAGE_SHARED__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
@@ -189,6 +190,7 @@ enum page_cache_mode {
 #define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
 #define __PAGE_KERNEL_LARGE(__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC   (__PAGE_KERNEL_EXEC | _PAGE_PSE)
+#define __PAGE_KERNEL_WP   (__PAGE_KERNEL | _PAGE_CACHE_WP)
 
 #define __PAGE_KERNEL_IO   (__PAGE_KERNEL)
 #define __PAGE_KERNEL_IO_NOCACHE   (__PAGE_KERNEL_NOCACHE)
@@ -202,6 +204,12 @@ enum page_cache_mode {
 #define _KERNPG_TABLE  (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |\
 _PAGE_DIRTY | _PAGE_ENC)
 
+#define __PAGE_KERNEL_ENC  (__PAGE_KERNEL | _PAGE_ENC)
+#define __PAGE_KERNEL_ENC_WP   (__PAGE_KERNEL_WP | _PAGE_ENC)
+
+#define __PAGE_KERNEL_NOENC(__PAGE_KERNEL)
+#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP)
+
 #define PAGE_KERNEL__pgprot(__PAGE_KERNEL | _PAGE_ENC)
 #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
 #define PAGE_KERNEL_EXEC   __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index a382ba9..4feda83 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -422,6 +422,50 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
 }
 
+#ifdef CONFIG_ARCH_USE_MEMREMAP_PROT
+/* Remap memory with encryption */
+void __init *early_memremap_encrypted(resource_size_t phys_addr,
+ unsigned long size)
+{
+   return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
+}
+
+/*
+ * Remap memory with encryption and write-protected - cannot be called
+ * before pat_init() is called
+ */
+void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
+unsigned long size)
+{
+   /* Be sure the write-protect PAT entry is set for write-protect */
+   if

[PATCH v7 11/36] x86/mm: Add SME support for read_cr3_pa()

2017-06-16 Thread Tom Lendacky

The cr3 register entry can contain the SME encryption mask that indicates
the PGD is encrypted.  The encryption mask should not be used when
creating a virtual address from the cr3 register, so remove the SME
encryption mask in the read_cr3_pa() function.

During early boot SME will need to use a native version of read_cr3_pa(),
so create native_read_cr3_pa().

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/processor-flags.h |3 ++-
 arch/x86/include/asm/processor.h   |5 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/processor-flags.h 
b/arch/x86/include/asm/processor-flags.h
index 79aa2f9..cb6999c 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_PROCESSOR_FLAGS_H
 
 #include 
+#include 
 
 #ifdef CONFIG_VM86
 #define X86_VM_MASKX86_EFLAGS_VM
@@ -33,7 +34,7 @@
  */
 #ifdef CONFIG_X86_64
 /* Mask off the address space ID bits. */
-#define CR3_ADDR_MASK 0x7000ull
+#define CR3_ADDR_MASK __sme_clr(0x7000ull)
 #define CR3_PCID_MASK 0xFFFull
 #else
 /*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 8010c97..ab878bd 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -240,6 +240,11 @@ static inline unsigned long read_cr3_pa(void)
return __read_cr3() & CR3_ADDR_MASK;
 }
 
+static inline unsigned long native_read_cr3_pa(void)
+{
+   return __native_read_cr3() & CR3_ADDR_MASK;
+}
+
 static inline void load_cr3(pgd_t *pgdir)
 {
write_cr3(__sme_pa(pgdir));

[PATCH v7 12/36] x86/mm: Extend early_memremap() support with additional attrs

2017-06-16 Thread Tom Lendacky

Add early_memremap() support to be able to specify encrypted and
decrypted mappings with and without write-protection. The use of
write-protection is necessary when encrypting data "in place". The
write-protect attribute is considered cacheable for loads, but not
stores. This implies that the hardware will never give the core a
dirty line with this memtype.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/Kconfig |4 +++
 arch/x86/include/asm/fixmap.h|   13 ++
 arch/x86/include/asm/pgtable_types.h |8 ++
 arch/x86/mm/ioremap.c|   44 ++
 include/asm-generic/early_ioremap.h  |2 ++
 mm/early_ioremap.c   |   10 
 6 files changed, 81 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cf74791..0b09b88 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1433,6 +1433,10 @@ config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
  If set to N, then the encryption of system memory can be
  activated with the mem_encrypt=on command line option.
 
+config ARCH_USE_MEMREMAP_PROT
+   def_bool y
+   depends on AMD_MEM_ENCRYPT
+
 # Common NUMA Features
 config NUMA
bool "Numa Memory Allocation and Scheduler Support"
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index d9ff226..dcd9fb5 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -164,6 +164,19 @@ static inline void __set_fixmap(enum fixed_addresses idx,
  */
 #define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_IO_NOCACHE
 
+/*
+ * Early memremap routines used for in-place encryption. The mappings created
+ * by these routines are intended to be used as temporary mappings.
+ */
+void __init *early_memremap_encrypted(resource_size_t phys_addr,
+ unsigned long size);
+void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
+unsigned long size);
+void __init *early_memremap_decrypted(resource_size_t phys_addr,
+ unsigned long size);
+void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
+unsigned long size);
+
 #include 
 
 #define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags)
diff --git a/arch/x86/include/asm/pgtable_types.h 
b/arch/x86/include/asm/pgtable_types.h
index de32ca3..32095af 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -161,6 +161,7 @@ enum page_cache_mode {
 
 #define _PAGE_CACHE_MASK   (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
 #define _PAGE_NOCACHE  (cachemode2protval(_PAGE_CACHE_MODE_UC))
+#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
 
 #define PAGE_NONE  __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
 #define PAGE_SHARED__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
@@ -189,6 +190,7 @@ enum page_cache_mode {
 #define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
 #define __PAGE_KERNEL_LARGE(__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC   (__PAGE_KERNEL_EXEC | _PAGE_PSE)
+#define __PAGE_KERNEL_WP   (__PAGE_KERNEL | _PAGE_CACHE_WP)
 
 #define __PAGE_KERNEL_IO   (__PAGE_KERNEL)
 #define __PAGE_KERNEL_IO_NOCACHE   (__PAGE_KERNEL_NOCACHE)
@@ -202,6 +204,12 @@ enum page_cache_mode {
 #define _KERNPG_TABLE  (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |\
 _PAGE_DIRTY | _PAGE_ENC)
 
+#define __PAGE_KERNEL_ENC  (__PAGE_KERNEL | _PAGE_ENC)
+#define __PAGE_KERNEL_ENC_WP   (__PAGE_KERNEL_WP | _PAGE_ENC)
+
+#define __PAGE_KERNEL_NOENC(__PAGE_KERNEL)
+#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP)
+
 #define PAGE_KERNEL__pgprot(__PAGE_KERNEL | _PAGE_ENC)
 #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
 #define PAGE_KERNEL_EXEC   __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index a382ba9..4feda83 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -422,6 +422,50 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
 }
 
+#ifdef CONFIG_ARCH_USE_MEMREMAP_PROT
+/* Remap memory with encryption */
+void __init *early_memremap_encrypted(resource_size_t phys_addr,
+ unsigned long size)
+{
+   return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
+}
+
+/*
+ * Remap memory with encryption and write-protected - cannot be called
+ * before pat_init() is called
+ */
+void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
+unsigned long size)
+{
+   /* Be sure the write-protect PAT entry is set for write-protect */
+   if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] !=

[PATCH v7 13/36] x86/mm: Add support for early encrypt/decrypt of memory

2017-06-16 Thread Tom Lendacky

Add support to be able to either encrypt or decrypt data in place during
the early stages of booting the kernel. This does not change the memory
encryption attribute - it is used for ensuring that data present in either
an encrypted or decrypted memory area is in the proper state (for example
the initrd will have been loaded by the boot loader and will not be
encrypted, but the memory that it resides in is marked as encrypted).

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/mem_encrypt.h |   10 +
 arch/x86/mm/mem_encrypt.c  |   76 
 2 files changed, 86 insertions(+)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index faae4e1..6508ec9 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -21,6 +21,11 @@
 
 extern unsigned long sme_me_mask;
 
+void __init sme_early_encrypt(resource_size_t paddr,
+ unsigned long size);
+void __init sme_early_decrypt(resource_size_t paddr,
+ unsigned long size);
+
 void __init sme_early_init(void);
 
 void __init sme_enable(void);
@@ -29,6 +34,11 @@
 
 #define sme_me_mask0UL
 
+static inline void __init sme_early_encrypt(resource_size_t paddr,
+   unsigned long size) { }
+static inline void __init sme_early_decrypt(resource_size_t paddr,
+   unsigned long size) { }
+
 static inline void __init sme_early_init(void) { }
 
 static inline void __init sme_enable(void) { }
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index b2d1cdf..b7671b9 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -17,6 +17,9 @@
 
 #include 
 
+#include 
+#include 
+
 /*
  * Since SME related variables are set early in the boot process they must
  * reside in the .data section so as not to be zeroed out when the .bss
@@ -25,6 +28,79 @@
 unsigned long sme_me_mask __section(.data) = 0;
 EXPORT_SYMBOL_GPL(sme_me_mask);
 
+/* Buffer used for early in-place encryption by BSP, no locking needed */
+static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+/*
+ * This routine does not change the underlying encryption setting of the
+ * page(s) that map this memory. It assumes that eventually the memory is
+ * meant to be accessed as either encrypted or decrypted but the contents
+ * are currently not in the desired state.
+ *
+ * This routine follows the steps outlined in the AMD64 Architecture
+ * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place.
+ */
+static void __init __sme_early_enc_dec(resource_size_t paddr,
+  unsigned long size, bool enc)
+{
+   void *src, *dst;
+   size_t len;
+
+   if (!sme_me_mask)
+   return;
+
+   local_flush_tlb();
+   wbinvd();
+
+   /*
+* There are limited number of early mapping slots, so map (at most)
+* one page at time.
+*/
+   while (size) {
+   len = min_t(size_t, sizeof(sme_early_buffer), size);
+
+   /*
+* Create mappings for the current and desired format of
+* the memory. Use a write-protected mapping for the source.
+*/
+   src = enc ? early_memremap_decrypted_wp(paddr, len) :
+   early_memremap_encrypted_wp(paddr, len);
+
+   dst = enc ? early_memremap_encrypted(paddr, len) :
+   early_memremap_decrypted(paddr, len);
+
+   /*
+* If a mapping can't be obtained to perform the operation,
+* then eventual access of that area in the desired mode
+* will cause a crash.
+*/
+   BUG_ON(!src || !dst);
+
+   /*
+* Use a temporary buffer, of cache-line multiple size, to
+* avoid data corruption as documented in the APM.
+*/
+   memcpy(sme_early_buffer, src, len);
+   memcpy(dst, sme_early_buffer, len);
+
+   early_memunmap(dst, len);
+   early_memunmap(src, len);
+
+   paddr += len;
+   size -= len;
+   }
+}
+
+void __init sme_early_encrypt(resource_size_t paddr, unsigned long size)
+{
+   __sme_early_enc_dec(paddr, size, true);
+}
+
+void __init sme_early_decrypt(resource_size_t paddr, unsigned long size)
+{
+   __sme_early_enc_dec(paddr, size, false);
+}
+
 void __init sme_early_init(void)
 {
unsigned int i;

[PATCH v7 13/36] x86/mm: Add support for early encrypt/decrypt of memory

2017-06-16 Thread Tom Lendacky

Add support to be able to either encrypt or decrypt data in place during
the early stages of booting the kernel. This does not change the memory
encryption attribute - it is used for ensuring that data present in either
an encrypted or decrypted memory area is in the proper state (for example
the initrd will have been loaded by the boot loader and will not be
encrypted, but the memory that it resides in is marked as encrypted).

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/mem_encrypt.h |   10 +
 arch/x86/mm/mem_encrypt.c  |   76 
 2 files changed, 86 insertions(+)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index faae4e1..6508ec9 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -21,6 +21,11 @@
 
 extern unsigned long sme_me_mask;
 
+void __init sme_early_encrypt(resource_size_t paddr,
+ unsigned long size);
+void __init sme_early_decrypt(resource_size_t paddr,
+ unsigned long size);
+
 void __init sme_early_init(void);
 
 void __init sme_enable(void);
@@ -29,6 +34,11 @@
 
 #define sme_me_mask0UL
 
+static inline void __init sme_early_encrypt(resource_size_t paddr,
+   unsigned long size) { }
+static inline void __init sme_early_decrypt(resource_size_t paddr,
+   unsigned long size) { }
+
 static inline void __init sme_early_init(void) { }
 
 static inline void __init sme_enable(void) { }
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index b2d1cdf..b7671b9 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -17,6 +17,9 @@
 
 #include 
 
+#include 
+#include 
+
 /*
  * Since SME related variables are set early in the boot process they must
  * reside in the .data section so as not to be zeroed out when the .bss
@@ -25,6 +28,79 @@
 unsigned long sme_me_mask __section(.data) = 0;
 EXPORT_SYMBOL_GPL(sme_me_mask);
 
+/* Buffer used for early in-place encryption by BSP, no locking needed */
+static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+/*
+ * This routine does not change the underlying encryption setting of the
+ * page(s) that map this memory. It assumes that eventually the memory is
+ * meant to be accessed as either encrypted or decrypted but the contents
+ * are currently not in the desired state.
+ *
+ * This routine follows the steps outlined in the AMD64 Architecture
+ * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place.
+ */
+static void __init __sme_early_enc_dec(resource_size_t paddr,
+  unsigned long size, bool enc)
+{
+   void *src, *dst;
+   size_t len;
+
+   if (!sme_me_mask)
+   return;
+
+   local_flush_tlb();
+   wbinvd();
+
+   /*
+* There are limited number of early mapping slots, so map (at most)
+* one page at time.
+*/
+   while (size) {
+   len = min_t(size_t, sizeof(sme_early_buffer), size);
+
+   /*
+* Create mappings for the current and desired format of
+* the memory. Use a write-protected mapping for the source.
+*/
+   src = enc ? early_memremap_decrypted_wp(paddr, len) :
+   early_memremap_encrypted_wp(paddr, len);
+
+   dst = enc ? early_memremap_encrypted(paddr, len) :
+   early_memremap_decrypted(paddr, len);
+
+   /*
+* If a mapping can't be obtained to perform the operation,
+* then eventual access of that area in the desired mode
+* will cause a crash.
+*/
+   BUG_ON(!src || !dst);
+
+   /*
+* Use a temporary buffer, of cache-line multiple size, to
+* avoid data corruption as documented in the APM.
+*/
+   memcpy(sme_early_buffer, src, len);
+   memcpy(dst, sme_early_buffer, len);
+
+   early_memunmap(dst, len);
+   early_memunmap(src, len);
+
+   paddr += len;
+   size -= len;
+   }
+}
+
+void __init sme_early_encrypt(resource_size_t paddr, unsigned long size)
+{
+   __sme_early_enc_dec(paddr, size, true);
+}
+
+void __init sme_early_decrypt(resource_size_t paddr, unsigned long size)
+{
+   __sme_early_enc_dec(paddr, size, false);
+}
+
 void __init sme_early_init(void)
 {
unsigned int i;

[PATCH v7 06/36] x86/mm: Add Secure Memory Encryption (SME) support

2017-06-16 Thread Tom Lendacky

Add support for Secure Memory Encryption (SME). This initial support
provides a Kconfig entry to build the SME support into the kernel and
defines the memory encryption mask that will be used in subsequent
patches to mark pages as encrypted.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/Kconfig   |   26 ++
 arch/x86/include/asm/mem_encrypt.h |   30 ++
 arch/x86/mm/Makefile   |1 +
 arch/x86/mm/mem_encrypt.c  |   21 +
 include/linux/mem_encrypt.h|   35 +++
 5 files changed, 113 insertions(+)
 create mode 100644 arch/x86/include/asm/mem_encrypt.h
 create mode 100644 arch/x86/mm/mem_encrypt.c
 create mode 100644 include/linux/mem_encrypt.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7c991d0..cf74791 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1407,6 +1407,32 @@ config X86_DIRECT_GBPAGES
  supports them), so don't confuse the user by printing
  that we have them enabled.
 
+config ARCH_HAS_MEM_ENCRYPT
+   def_bool y
+   depends on X86
+
+config AMD_MEM_ENCRYPT
+   bool "AMD Secure Memory Encryption (SME) support"
+   depends on X86_64 && CPU_SUP_AMD
+   ---help---
+ Say yes to enable support for the encryption of system memory.
+ This requires an AMD processor that supports Secure Memory
+ Encryption (SME).
+
+config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
+   bool "Activate AMD Secure Memory Encryption (SME) by default"
+   default y
+   depends on AMD_MEM_ENCRYPT
+   ---help---
+ Say yes to have system memory encrypted by default if running on
+ an AMD processor that supports Secure Memory Encryption (SME).
+
+ If set to Y, then the encryption of system memory can be
+ deactivated with the mem_encrypt=off command line option.
+
+ If set to N, then the encryption of system memory can be
+ activated with the mem_encrypt=on command line option.
+
 # Common NUMA Features
 config NUMA
bool "Numa Memory Allocation and Scheduler Support"
diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
new file mode 100644
index 000..a105796
--- /dev/null
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -0,0 +1,30 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __X86_MEM_ENCRYPT_H__
+#define __X86_MEM_ENCRYPT_H__
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+
+extern unsigned long sme_me_mask;
+
+#else  /* !CONFIG_AMD_MEM_ENCRYPT */
+
+#define sme_me_mask0UL
+
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 0fbdcb6..a94a7b6 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -39,3 +39,4 @@ obj-$(CONFIG_X86_INTEL_MPX)   += mpx.o
 obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
 obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
 
+obj-$(CONFIG_AMD_MEM_ENCRYPT)  += mem_encrypt.o
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
new file mode 100644
index 000..b99d469
--- /dev/null
+++ b/arch/x86/mm/mem_encrypt.c
@@ -0,0 +1,21 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+
+/*
+ * Since SME related variables are set early in the boot process they must
+ * reside in the .data section so as not to be zeroed out when the .bss
+ * section is later cleared.
+ */
+unsigned long sme_me_mask __section(.data) = 0;
+EXPORT_SYMBOL_GPL(sme_me_mask);
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
new file mode 100644
index 000..59769f7
--- /dev/null
+++ b/include/linux/mem_encrypt.h
@@ -0,0 +1,35 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MEM_ENCRYPT_H__
+#define __MEM_ENCRYPT_H__
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_ARCH_HAS_MEM_ENCRYPT
+
+#include 
+
+#else  /* !CONFIG_ARCH_HAS_MEM_ENCRYPT */
+
+#define sme_me_mask

[PATCH v7 06/36] x86/mm: Add Secure Memory Encryption (SME) support

2017-06-16 Thread Tom Lendacky

Add support for Secure Memory Encryption (SME). This initial support
provides a Kconfig entry to build the SME support into the kernel and
defines the memory encryption mask that will be used in subsequent
patches to mark pages as encrypted.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/Kconfig   |   26 ++
 arch/x86/include/asm/mem_encrypt.h |   30 ++
 arch/x86/mm/Makefile   |1 +
 arch/x86/mm/mem_encrypt.c  |   21 +
 include/linux/mem_encrypt.h|   35 +++
 5 files changed, 113 insertions(+)
 create mode 100644 arch/x86/include/asm/mem_encrypt.h
 create mode 100644 arch/x86/mm/mem_encrypt.c
 create mode 100644 include/linux/mem_encrypt.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7c991d0..cf74791 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1407,6 +1407,32 @@ config X86_DIRECT_GBPAGES
  supports them), so don't confuse the user by printing
  that we have them enabled.
 
+config ARCH_HAS_MEM_ENCRYPT
+   def_bool y
+   depends on X86
+
+config AMD_MEM_ENCRYPT
+   bool "AMD Secure Memory Encryption (SME) support"
+   depends on X86_64 && CPU_SUP_AMD
+   ---help---
+ Say yes to enable support for the encryption of system memory.
+ This requires an AMD processor that supports Secure Memory
+ Encryption (SME).
+
+config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
+   bool "Activate AMD Secure Memory Encryption (SME) by default"
+   default y
+   depends on AMD_MEM_ENCRYPT
+   ---help---
+ Say yes to have system memory encrypted by default if running on
+ an AMD processor that supports Secure Memory Encryption (SME).
+
+ If set to Y, then the encryption of system memory can be
+ deactivated with the mem_encrypt=off command line option.
+
+ If set to N, then the encryption of system memory can be
+ activated with the mem_encrypt=on command line option.
+
 # Common NUMA Features
 config NUMA
bool "Numa Memory Allocation and Scheduler Support"
diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
new file mode 100644
index 000..a105796
--- /dev/null
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -0,0 +1,30 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __X86_MEM_ENCRYPT_H__
+#define __X86_MEM_ENCRYPT_H__
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+
+extern unsigned long sme_me_mask;
+
+#else  /* !CONFIG_AMD_MEM_ENCRYPT */
+
+#define sme_me_mask0UL
+
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 0fbdcb6..a94a7b6 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -39,3 +39,4 @@ obj-$(CONFIG_X86_INTEL_MPX)   += mpx.o
 obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
 obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
 
+obj-$(CONFIG_AMD_MEM_ENCRYPT)  += mem_encrypt.o
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
new file mode 100644
index 000..b99d469
--- /dev/null
+++ b/arch/x86/mm/mem_encrypt.c
@@ -0,0 +1,21 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+
+/*
+ * Since SME related variables are set early in the boot process they must
+ * reside in the .data section so as not to be zeroed out when the .bss
+ * section is later cleared.
+ */
+unsigned long sme_me_mask __section(.data) = 0;
+EXPORT_SYMBOL_GPL(sme_me_mask);
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
new file mode 100644
index 000..59769f7
--- /dev/null
+++ b/include/linux/mem_encrypt.h
@@ -0,0 +1,35 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MEM_ENCRYPT_H__
+#define __MEM_ENCRYPT_H__
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_ARCH_HAS_MEM_ENCRYPT
+
+#include 
+
+#else  /* !CONFIG_ARCH_HAS_MEM_ENCRYPT */
+
+#define sme_me_mask0UL
+
+#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */
+
+static inline bool sme_active(void)
+{
+   return

[PATCH v7 09/36] x86/mm: Simplify p[gum]d_page() macros

2017-06-16 Thread Tom Lendacky

Create a pgd_pfn() macro similar to the p[um]d_pfn() macros and then
use the p[gum]d_pfn() macros in the p[gum]d_page() macros instead of
duplicating the code.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/pgtable.h |   16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 77037b6..b64ea52 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -195,6 +195,11 @@ static inline unsigned long p4d_pfn(p4d_t p4d)
return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
 }
 
+static inline unsigned long pgd_pfn(pgd_t pgd)
+{
+   return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
 static inline int p4d_large(p4d_t p4d)
 {
/* No 512 GiB pages yet */
@@ -704,8 +709,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pmd_page(pmd)  \
-   pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT)
+#define pmd_page(pmd)  pfn_to_page(pmd_pfn(pmd))
 
 /*
  * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
@@ -773,8 +777,7 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pud_page(pud)  \
-   pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT)
+#define pud_page(pud)  pfn_to_page(pud_pfn(pud))
 
 /* Find an entry in the second-level page table.. */
 static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
@@ -824,8 +827,7 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define p4d_page(p4d)  \
-   pfn_to_page((p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT)
+#define p4d_page(p4d)  pfn_to_page(p4d_pfn(p4d))
 
 /* Find an entry in the third-level page table.. */
 static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
@@ -859,7 +861,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pgd_page(pgd)  pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
+#define pgd_page(pgd)  pfn_to_page(pgd_pfn(pgd))
 
 /* to find an entry in a page-table-directory. */
 static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)

[PATCH v7 09/36] x86/mm: Simplify p[gum]d_page() macros

2017-06-16 Thread Tom Lendacky

Create a pgd_pfn() macro similar to the p[um]d_pfn() macros and then
use the p[gum]d_pfn() macros in the p[gum]d_page() macros instead of
duplicating the code.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/pgtable.h |   16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 77037b6..b64ea52 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -195,6 +195,11 @@ static inline unsigned long p4d_pfn(p4d_t p4d)
return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
 }
 
+static inline unsigned long pgd_pfn(pgd_t pgd)
+{
+   return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
 static inline int p4d_large(p4d_t p4d)
 {
/* No 512 GiB pages yet */
@@ -704,8 +709,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pmd_page(pmd)  \
-   pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT)
+#define pmd_page(pmd)  pfn_to_page(pmd_pfn(pmd))
 
 /*
  * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
@@ -773,8 +777,7 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pud_page(pud)  \
-   pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT)
+#define pud_page(pud)  pfn_to_page(pud_pfn(pud))
 
 /* Find an entry in the second-level page table.. */
 static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
@@ -824,8 +827,7 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define p4d_page(p4d)  \
-   pfn_to_page((p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT)
+#define p4d_page(p4d)  pfn_to_page(p4d_pfn(p4d))
 
 /* Find an entry in the third-level page table.. */
 static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
@@ -859,7 +861,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pgd_page(pgd)  pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
+#define pgd_page(pgd)  pfn_to_page(pgd_pfn(pgd))
 
 /* to find an entry in a page-table-directory. */
 static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)

[PATCH v7 08/36] x86/mm: Add support to enable SME in early boot processing

2017-06-16 Thread Tom Lendacky

Add support to the early boot code to use Secure Memory Encryption (SME).
Since the kernel has been loaded into memory in a decrypted state, encrypt
the kernel in place and update the early pagetables with the memory
encryption mask so that new pagetable entries will use memory encryption.

The routines to set the encryption mask and perform the encryption are
stub routines for now with functionality to be added in a later patch.

Because of the need to have the routines available to head_64.S, the
mem_encrypt.c is always built and #ifdefs in mem_encrypt.c will provide
functionality or stub routines depending on CONFIG_AMD_MEM_ENCRYPT.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/mem_encrypt.h |8 +++
 arch/x86/kernel/head64.c   |   33 +-
 arch/x86/kernel/head_64.S  |   39 ++--
 arch/x86/mm/Makefile   |4 +---
 arch/x86/mm/mem_encrypt.c  |   24 ++
 5 files changed, 93 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index a105796..988b336 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -15,16 +15,24 @@
 
 #ifndef __ASSEMBLY__
 
+#include 
+
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 
 extern unsigned long sme_me_mask;
 
+void __init sme_enable(void);
+
 #else  /* !CONFIG_AMD_MEM_ENCRYPT */
 
 #define sme_me_mask0UL
 
+static inline void __init sme_enable(void) { }
+
 #endif /* CONFIG_AMD_MEM_ENCRYPT */
 
+unsigned long sme_get_me_mask(void);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2b2ac38..95979c3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -46,6 +47,7 @@ static void __init *fixup_pointer(void *ptr, unsigned long 
physaddr)
 void __init __startup_64(unsigned long physaddr)
 {
unsigned long load_delta, *p;
+   unsigned long pgtable_flags;
pgdval_t *pgd;
p4dval_t *p4d;
pudval_t *pud;
@@ -66,6 +68,12 @@ void __init __startup_64(unsigned long physaddr)
if (load_delta & ~PMD_PAGE_MASK)
for (;;);
 
+   /* Activate Secure Memory Encryption (SME) if supported and enabled */
+   sme_enable();
+
+   /* Include the SME encryption mask in the fixup value */
+   load_delta += sme_get_me_mask();
+
/* Fixup the physical addresses in the page table */
 
pgd = fixup_pointer(_top_pgt, physaddr);
@@ -92,28 +100,30 @@ void __init __startup_64(unsigned long physaddr)
 
pud = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
pmd = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+   pgtable_flags = _KERNPG_TABLE + sme_get_me_mask();
 
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], 
physaddr);
 
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
-   pgd[i + 0] = (pgdval_t)p4d + _KERNPG_TABLE;
-   pgd[i + 1] = (pgdval_t)p4d + _KERNPG_TABLE;
+   pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
+   pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
 
i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
-   p4d[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
-   p4d[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
+   p4d[i + 0] = (pgdval_t)pud + pgtable_flags;
+   p4d[i + 1] = (pgdval_t)pud + pgtable_flags;
} else {
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
-   pgd[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
-   pgd[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
+   pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
+   pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
}
 
i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
-   pud[i + 0] = (pudval_t)pmd + _KERNPG_TABLE;
-   pud[i + 1] = (pudval_t)pmd + _KERNPG_TABLE;
+   pud[i + 0] = (pudval_t)pmd + pgtable_flags;
+   pud[i + 1] = (pudval_t)pmd + pgtable_flags;
 
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
+   pmd_entry += sme_get_me_mask();
pmd_entry +=  physaddr;
 
for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
@@ -134,9 +144,12 @@ void __init __startup_64(unsigned long physaddr)
pmd[i] += load_delta;
}
 
-   /* Fixup phys_base */
+   /*
+* Fixup phys_base - remove the memory encryption mask to obtain
+* the true physical address.
+*/
p = fixup_pointer(_base, physaddr);
-   *p += load_delta;
+   *p += load_delta - sme_get_me_mask();
 }
 
 /* Wipe all early page tables except for the kernel symbol map */
diff

[PATCH v7 07/36] x86/mm: Don't use phys_to_virt in ioremap() if SME is active

2017-06-16 Thread Tom Lendacky

Currently there is a check if the address being mapped is in the ISA
range (is_ISA_range()), and if it is then phys_to_virt() is used to
perform the mapping.  When SME is active, however, this will result
in the mapping having the encryption bit set when it is expected that
an ioremap() should not have the encryption bit set. So only use the
phys_to_virt() function if SME is not active

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/mm/ioremap.c |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 4c1b5fd..a382ba9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -106,9 +107,11 @@ static void __iomem *__ioremap_caller(resource_size_t 
phys_addr,
}
 
/*
-* Don't remap the low PCI/ISA area, it's always mapped..
+* Don't remap the low PCI/ISA area, it's always mapped.
+*   But if SME is active, skip this so that the encryption bit
+*   doesn't get set.
 */
-   if (is_ISA_range(phys_addr, last_addr))
+   if (is_ISA_range(phys_addr, last_addr) && !sme_active())
return (__force void __iomem *)phys_to_virt(phys_addr);
 
/*

[PATCH v7 08/36] x86/mm: Add support to enable SME in early boot processing

2017-06-16 Thread Tom Lendacky

Add support to the early boot code to use Secure Memory Encryption (SME).
Since the kernel has been loaded into memory in a decrypted state, encrypt
the kernel in place and update the early pagetables with the memory
encryption mask so that new pagetable entries will use memory encryption.

The routines to set the encryption mask and perform the encryption are
stub routines for now with functionality to be added in a later patch.

Because of the need to have the routines available to head_64.S, the
mem_encrypt.c is always built and #ifdefs in mem_encrypt.c will provide
functionality or stub routines depending on CONFIG_AMD_MEM_ENCRYPT.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/mem_encrypt.h |8 +++
 arch/x86/kernel/head64.c   |   33 +-
 arch/x86/kernel/head_64.S  |   39 ++--
 arch/x86/mm/Makefile   |4 +---
 arch/x86/mm/mem_encrypt.c  |   24 ++
 5 files changed, 93 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index a105796..988b336 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -15,16 +15,24 @@
 
 #ifndef __ASSEMBLY__
 
+#include 
+
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 
 extern unsigned long sme_me_mask;
 
+void __init sme_enable(void);
+
 #else  /* !CONFIG_AMD_MEM_ENCRYPT */
 
 #define sme_me_mask0UL
 
+static inline void __init sme_enable(void) { }
+
 #endif /* CONFIG_AMD_MEM_ENCRYPT */
 
+unsigned long sme_get_me_mask(void);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2b2ac38..95979c3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -46,6 +47,7 @@ static void __init *fixup_pointer(void *ptr, unsigned long 
physaddr)
 void __init __startup_64(unsigned long physaddr)
 {
unsigned long load_delta, *p;
+   unsigned long pgtable_flags;
pgdval_t *pgd;
p4dval_t *p4d;
pudval_t *pud;
@@ -66,6 +68,12 @@ void __init __startup_64(unsigned long physaddr)
if (load_delta & ~PMD_PAGE_MASK)
for (;;);
 
+   /* Activate Secure Memory Encryption (SME) if supported and enabled */
+   sme_enable();
+
+   /* Include the SME encryption mask in the fixup value */
+   load_delta += sme_get_me_mask();
+
/* Fixup the physical addresses in the page table */
 
pgd = fixup_pointer(_top_pgt, physaddr);
@@ -92,28 +100,30 @@ void __init __startup_64(unsigned long physaddr)
 
pud = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
pmd = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+   pgtable_flags = _KERNPG_TABLE + sme_get_me_mask();
 
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], 
physaddr);
 
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
-   pgd[i + 0] = (pgdval_t)p4d + _KERNPG_TABLE;
-   pgd[i + 1] = (pgdval_t)p4d + _KERNPG_TABLE;
+   pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
+   pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
 
i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
-   p4d[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
-   p4d[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
+   p4d[i + 0] = (pgdval_t)pud + pgtable_flags;
+   p4d[i + 1] = (pgdval_t)pud + pgtable_flags;
} else {
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
-   pgd[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
-   pgd[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
+   pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
+   pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
}
 
i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
-   pud[i + 0] = (pudval_t)pmd + _KERNPG_TABLE;
-   pud[i + 1] = (pudval_t)pmd + _KERNPG_TABLE;
+   pud[i + 0] = (pudval_t)pmd + pgtable_flags;
+   pud[i + 1] = (pudval_t)pmd + pgtable_flags;
 
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
+   pmd_entry += sme_get_me_mask();
pmd_entry +=  physaddr;
 
for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
@@ -134,9 +144,12 @@ void __init __startup_64(unsigned long physaddr)
pmd[i] += load_delta;
}
 
-   /* Fixup phys_base */
+   /*
+* Fixup phys_base - remove the memory encryption mask to obtain
+* the true physical address.
+*/
p = fixup_pointer(_base, physaddr);
-   *p += load_delta;
+   *p += load_delta - sme_get_me_mask();
 }
 
 /* Wipe all early page tables except for the kernel symbol map */
diff --git

[PATCH v7 07/36] x86/mm: Don't use phys_to_virt in ioremap() if SME is active

2017-06-16 Thread Tom Lendacky

Currently there is a check if the address being mapped is in the ISA
range (is_ISA_range()), and if it is then phys_to_virt() is used to
perform the mapping.  When SME is active, however, this will result
in the mapping having the encryption bit set when it is expected that
an ioremap() should not have the encryption bit set. So only use the
phys_to_virt() function if SME is not active

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/mm/ioremap.c |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 4c1b5fd..a382ba9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -106,9 +107,11 @@ static void __iomem *__ioremap_caller(resource_size_t 
phys_addr,
}
 
/*
-* Don't remap the low PCI/ISA area, it's always mapped..
+* Don't remap the low PCI/ISA area, it's always mapped.
+*   But if SME is active, skip this so that the encryption bit
+*   doesn't get set.
 */
-   if (is_ISA_range(phys_addr, last_addr))
+   if (is_ISA_range(phys_addr, last_addr) && !sme_active())
return (__force void __iomem *)phys_to_virt(phys_addr);
 
/*

[PATCH v7 04/36] x86/CPU/AMD: Add the Secure Memory Encryption CPU feature

2017-06-16 Thread Tom Lendacky

Update the CPU features to include identifying and reporting on the
Secure Memory Encryption (SME) feature.  SME is identified by CPUID
0x801f, but requires BIOS support to enable it (set bit 23 of
MSR_K8_SYSCFG).  Only show the SME feature as available if reported by
CPUID and enabled by BIOS.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/cpufeatures.h |1 +
 arch/x86/include/asm/msr-index.h   |2 ++
 arch/x86/kernel/cpu/amd.c  |   13 +
 arch/x86/kernel/cpu/scattered.c|1 +
 4 files changed, 17 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 2701e5f..2b692df 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -196,6 +196,7 @@
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME( 7*32+10) /* AMD Secure Memory 
Encryption */
 
 #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number 
*/
 #define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 18b1623..460ac01 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -352,6 +352,8 @@
 #define MSR_K8_TOP_MEM10xc001001a
 #define MSR_K8_TOP_MEM20xc001001d
 #define MSR_K8_SYSCFG  0xc0010010
+#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT  23
+#define MSR_K8_SYSCFG_MEM_ENCRYPT  BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
 #define MSR_K8_INT_PENDING_MSG 0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK0x1800
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb5abe8..c47ceee 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -611,6 +611,19 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 */
if (cpu_has_amd_erratum(c, amd_erratum_400))
set_cpu_bug(c, X86_BUG_AMD_E400);
+
+   /*
+* BIOS support is required for SME. If BIOS has not enabled SME
+* then don't advertise the feature (set in scattered.c)
+*/
+   if (cpu_has(c, X86_FEATURE_SME)) {
+   u64 msr;
+
+   /* Check if SME is enabled */
+   rdmsrl(MSR_K8_SYSCFG, msr);
+   if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+   clear_cpu_cap(c, X86_FEATURE_SME);
+   }
 }
 
 static void init_amd_k8(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 23c2350..05459ad 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,6 +31,7 @@ struct cpuid_bit {
{ X86_FEATURE_HW_PSTATE,CPUID_EDX,  7, 0x8007, 0 },
{ X86_FEATURE_CPB,  CPUID_EDX,  9, 0x8007, 0 },
{ X86_FEATURE_PROC_FEEDBACK,CPUID_EDX, 11, 0x8007, 0 },
+   { X86_FEATURE_SME,  CPUID_EAX,  0, 0x801f, 0 },
{ 0, 0, 0, 0, 0 }
 };

[PATCH v7 04/36] x86/CPU/AMD: Add the Secure Memory Encryption CPU feature

2017-06-16 Thread Tom Lendacky

Update the CPU features to include identifying and reporting on the
Secure Memory Encryption (SME) feature.  SME is identified by CPUID
0x801f, but requires BIOS support to enable it (set bit 23 of
MSR_K8_SYSCFG).  Only show the SME feature as available if reported by
CPUID and enabled by BIOS.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/cpufeatures.h |1 +
 arch/x86/include/asm/msr-index.h   |2 ++
 arch/x86/kernel/cpu/amd.c  |   13 +
 arch/x86/kernel/cpu/scattered.c|1 +
 4 files changed, 17 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 2701e5f..2b692df 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -196,6 +196,7 @@
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME( 7*32+10) /* AMD Secure Memory 
Encryption */
 
 #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number 
*/
 #define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 18b1623..460ac01 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -352,6 +352,8 @@
 #define MSR_K8_TOP_MEM10xc001001a
 #define MSR_K8_TOP_MEM20xc001001d
 #define MSR_K8_SYSCFG  0xc0010010
+#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT  23
+#define MSR_K8_SYSCFG_MEM_ENCRYPT  BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
 #define MSR_K8_INT_PENDING_MSG 0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK0x1800
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb5abe8..c47ceee 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -611,6 +611,19 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 */
if (cpu_has_amd_erratum(c, amd_erratum_400))
set_cpu_bug(c, X86_BUG_AMD_E400);
+
+   /*
+* BIOS support is required for SME. If BIOS has not enabled SME
+* then don't advertise the feature (set in scattered.c)
+*/
+   if (cpu_has(c, X86_FEATURE_SME)) {
+   u64 msr;
+
+   /* Check if SME is enabled */
+   rdmsrl(MSR_K8_SYSCFG, msr);
+   if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+   clear_cpu_cap(c, X86_FEATURE_SME);
+   }
 }
 
 static void init_amd_k8(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 23c2350..05459ad 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,6 +31,7 @@ struct cpuid_bit {
{ X86_FEATURE_HW_PSTATE,CPUID_EDX,  7, 0x8007, 0 },
{ X86_FEATURE_CPB,  CPUID_EDX,  9, 0x8007, 0 },
{ X86_FEATURE_PROC_FEEDBACK,CPUID_EDX, 11, 0x8007, 0 },
+   { X86_FEATURE_SME,  CPUID_EAX,  0, 0x801f, 0 },
{ 0, 0, 0, 0, 0 }
 };

[PATCH v7 01/36] x86: Document AMD Secure Memory Encryption (SME)

2017-06-16 Thread Tom Lendacky

Create a Documentation entry to describe the AMD Secure Memory
Encryption (SME) feature and add documentation for the mem_encrypt=
kernel parameter.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 Documentation/admin-guide/kernel-parameters.txt |   11 
 Documentation/x86/amd-memory-encryption.txt |   68 +++
 2 files changed, 79 insertions(+)
 create mode 100644 Documentation/x86/amd-memory-encryption.txt

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index ee5c65a..9edc0b7 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2197,6 +2197,17 @@
memory contents and reserves bad memory
regions that are detected.
 
+   mem_encrypt=[X86-64] AMD Secure Memory Encryption (SME) control
+   Valid arguments: on, off
+   Default (depends on kernel configuration option):
+ on  (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y)
+ off (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=n)
+   mem_encrypt=on: Activate SME
+   mem_encrypt=off:Do not activate SME
+
+   Refer to Documentation/x86/amd-memory-encryption.txt
+   for details on when memory encryption can be activated.
+
mem_sleep_default=  [SUSPEND] Default system suspend mode:
s2idle  - Suspend-To-Idle
shallow - Power-On Suspend or equivalent (if supported)
diff --git a/Documentation/x86/amd-memory-encryption.txt 
b/Documentation/x86/amd-memory-encryption.txt
new file mode 100644
index 000..f512ab7
--- /dev/null
+++ b/Documentation/x86/amd-memory-encryption.txt
@@ -0,0 +1,68 @@
+Secure Memory Encryption (SME) is a feature found on AMD processors.
+
+SME provides the ability to mark individual pages of memory as encrypted using
+the standard x86 page tables.  A page that is marked encrypted will be
+automatically decrypted when read from DRAM and encrypted when written to
+DRAM.  SME can therefore be used to protect the contents of DRAM from physical
+attacks on the system.
+
+A page is encrypted when a page table entry has the encryption bit set (see
+below on how to determine its position).  The encryption bit can also be
+specified in the cr3 register, allowing the PGD table to be encrypted. Each
+successive level of page tables can also be encrypted by setting the encryption
+bit in the page table entry that points to the next table. This allows the full
+page table hierarchy to be encrypted. Note, this means that just because the
+encryption bit is set in cr3, doesn't imply the full hierarchy is encyrpted.
+Each page table entry in the hierarchy needs to have the encryption bit set to
+achieve that. So, theoretically, you could have the encryption bit set in cr3
+so that the PGD is encrypted, but not set the encryption bit in the PGD entry
+for a PUD which results in the PUD pointed to by that entry to not be
+encrypted.
+
+Support for SME can be determined through the CPUID instruction. The CPUID
+function 0x801f reports information related to SME:
+
+   0x801f[eax]:
+   Bit[0] indicates support for SME
+   0x801f[ebx]:
+   Bits[5:0]  pagetable bit number used to activate memory
+  encryption
+   Bits[11:6] reduction in physical address space, in bits, when
+  memory encryption is enabled (this only affects
+  system physical addresses, not guest physical
+  addresses)
+
+If support for SME is present, MSR 0xc00100010 (MSR_K8_SYSCFG) can be used to
+determine if SME is enabled and/or to enable memory encryption:
+
+   0xc0010010:
+   Bit[23]   0 = memory encryption features are disabled
+ 1 = memory encryption features are enabled
+
+Linux relies on BIOS to set this bit if BIOS has determined that the reduction
+in the physical address space as a result of enabling memory encryption (see
+CPUID information above) will not conflict with the address space resource
+requirements for the system.  If this bit is not set upon Linux startup then
+Linux itself will not set it and memory encryption will not be possible.
+
+The state of SME in the Linux kernel can be documented as follows:
+   - Supported:
+ The CPU supports SME (determined through CPUID instruction).
+
+   - Enabled:
+ Supported and bit 23 of MSR_K8_SYSCFG is set.
+
+   - Active:
+ Supported, Enabled and the Linux kernel is actively applying
+ the encryption bit to page table entries (the SME mask in the
+ kernel is non-zero).
+
+SME can also be enabled and

[PATCH v7 01/36] x86: Document AMD Secure Memory Encryption (SME)

2017-06-16 Thread Tom Lendacky

Create a Documentation entry to describe the AMD Secure Memory
Encryption (SME) feature and add documentation for the mem_encrypt=
kernel parameter.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 Documentation/admin-guide/kernel-parameters.txt |   11 
 Documentation/x86/amd-memory-encryption.txt |   68 +++
 2 files changed, 79 insertions(+)
 create mode 100644 Documentation/x86/amd-memory-encryption.txt

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index ee5c65a..9edc0b7 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2197,6 +2197,17 @@
memory contents and reserves bad memory
regions that are detected.
 
+   mem_encrypt=[X86-64] AMD Secure Memory Encryption (SME) control
+   Valid arguments: on, off
+   Default (depends on kernel configuration option):
+ on  (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y)
+ off (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=n)
+   mem_encrypt=on: Activate SME
+   mem_encrypt=off:Do not activate SME
+
+   Refer to Documentation/x86/amd-memory-encryption.txt
+   for details on when memory encryption can be activated.
+
mem_sleep_default=  [SUSPEND] Default system suspend mode:
s2idle  - Suspend-To-Idle
shallow - Power-On Suspend or equivalent (if supported)
diff --git a/Documentation/x86/amd-memory-encryption.txt 
b/Documentation/x86/amd-memory-encryption.txt
new file mode 100644
index 000..f512ab7
--- /dev/null
+++ b/Documentation/x86/amd-memory-encryption.txt
@@ -0,0 +1,68 @@
+Secure Memory Encryption (SME) is a feature found on AMD processors.
+
+SME provides the ability to mark individual pages of memory as encrypted using
+the standard x86 page tables.  A page that is marked encrypted will be
+automatically decrypted when read from DRAM and encrypted when written to
+DRAM.  SME can therefore be used to protect the contents of DRAM from physical
+attacks on the system.
+
+A page is encrypted when a page table entry has the encryption bit set (see
+below on how to determine its position).  The encryption bit can also be
+specified in the cr3 register, allowing the PGD table to be encrypted. Each
+successive level of page tables can also be encrypted by setting the encryption
+bit in the page table entry that points to the next table. This allows the full
+page table hierarchy to be encrypted. Note, this means that just because the
+encryption bit is set in cr3, doesn't imply the full hierarchy is encyrpted.
+Each page table entry in the hierarchy needs to have the encryption bit set to
+achieve that. So, theoretically, you could have the encryption bit set in cr3
+so that the PGD is encrypted, but not set the encryption bit in the PGD entry
+for a PUD which results in the PUD pointed to by that entry to not be
+encrypted.
+
+Support for SME can be determined through the CPUID instruction. The CPUID
+function 0x801f reports information related to SME:
+
+   0x801f[eax]:
+   Bit[0] indicates support for SME
+   0x801f[ebx]:
+   Bits[5:0]  pagetable bit number used to activate memory
+  encryption
+   Bits[11:6] reduction in physical address space, in bits, when
+  memory encryption is enabled (this only affects
+  system physical addresses, not guest physical
+  addresses)
+
+If support for SME is present, MSR 0xc00100010 (MSR_K8_SYSCFG) can be used to
+determine if SME is enabled and/or to enable memory encryption:
+
+   0xc0010010:
+   Bit[23]   0 = memory encryption features are disabled
+ 1 = memory encryption features are enabled
+
+Linux relies on BIOS to set this bit if BIOS has determined that the reduction
+in the physical address space as a result of enabling memory encryption (see
+CPUID information above) will not conflict with the address space resource
+requirements for the system.  If this bit is not set upon Linux startup then
+Linux itself will not set it and memory encryption will not be possible.
+
+The state of SME in the Linux kernel can be documented as follows:
+   - Supported:
+ The CPU supports SME (determined through CPUID instruction).
+
+   - Enabled:
+ Supported and bit 23 of MSR_K8_SYSCFG is set.
+
+   - Active:
+ Supported, Enabled and the Linux kernel is actively applying
+ the encryption bit to page table entries (the SME mask in the
+ kernel is non-zero).
+
+SME can also be enabled and activated in the BIOS. If SME is enabled and

[PATCH v7 02/36] x86/mm/pat: Set write-protect cache mode for full PAT support

2017-06-16 Thread Tom Lendacky

For processors that support PAT, set the write-protect cache mode
(_PAGE_CACHE_MODE_WP) entry to the actual write-protect value (x05).

Acked-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/mm/pat.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 9b78685..6753d9c 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -295,7 +295,7 @@ static void init_cache_modes(void)
  * pat_init - Initialize PAT MSR and PAT table
  *
  * This function initializes PAT MSR and PAT table with an OS-defined value
- * to enable additional cache attributes, WC and WT.
+ * to enable additional cache attributes, WC, WT and WP.
  *
  * This function must be called on all CPUs using the specific sequence of
  * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
@@ -356,7 +356,7 @@ void pat_init(void)
 *  0102UC-: _PAGE_CACHE_MODE_UC_MINUS
 *  0113UC : _PAGE_CACHE_MODE_UC
 *  1004WB : Reserved
-*  1015WC : Reserved
+*  1015WP : _PAGE_CACHE_MODE_WP
 *  1106UC-: Reserved
 *  1117WT : _PAGE_CACHE_MODE_WT
 *
@@ -364,7 +364,7 @@ void pat_init(void)
 * corresponding types in the presence of PAT errata.
 */
pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
- PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, WT);
+ PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT);
}
 
if (!boot_cpu_done) {

[PATCH v7 02/36] x86/mm/pat: Set write-protect cache mode for full PAT support

2017-06-16 Thread Tom Lendacky

For processors that support PAT, set the write-protect cache mode
(_PAGE_CACHE_MODE_WP) entry to the actual write-protect value (x05).

Acked-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/mm/pat.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 9b78685..6753d9c 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -295,7 +295,7 @@ static void init_cache_modes(void)
  * pat_init - Initialize PAT MSR and PAT table
  *
  * This function initializes PAT MSR and PAT table with an OS-defined value
- * to enable additional cache attributes, WC and WT.
+ * to enable additional cache attributes, WC, WT and WP.
  *
  * This function must be called on all CPUs using the specific sequence of
  * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
@@ -356,7 +356,7 @@ void pat_init(void)
 *  0102UC-: _PAGE_CACHE_MODE_UC_MINUS
 *  0113UC : _PAGE_CACHE_MODE_UC
 *  1004WB : Reserved
-*  1015WC : Reserved
+*  1015WP : _PAGE_CACHE_MODE_WP
 *  1106UC-: Reserved
 *  1117WT : _PAGE_CACHE_MODE_WT
 *
@@ -364,7 +364,7 @@ void pat_init(void)
 * corresponding types in the presence of PAT errata.
 */
pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
- PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, WT);
+ PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT);
}
 
if (!boot_cpu_done) {

Re: hexagon: build error in -next due to 'mm: memcontrol: per-lruvec stats infrastructure'

2017-06-16 Thread Johannes Weiner

Hi Guenter,

On Wed, Jun 14, 2017 at 12:26:46AM -0700, Guenter Roeck wrote:
> Hi,
> 
> I see the following build error in -next when building hexagon images.
> 
>   CC  arch/hexagon/kernel/asm-offsets.s
> In file included from ./include/linux/memcontrol.h:30:0,
>  from ./include/linux/swap.h:8,
>  from ./arch/hexagon/include/asm/pgtable.h:27,
>  from ./include/linux/mm.h:70,
>  from arch/hexagon/kernel/asm-offsets.c:28:
> ./include/linux/vmstat.h: In function '__inc_zone_page_state':
> ./include/linux/vmstat.h:294:2: error: implicit declaration of function 
> 'page_zone' [-Werror=implicit-function-declaration]
> ./include/linux/vmstat.h:294:2: warning: passing argument 1 of 
> '__inc_zone_state' makes pointer from integer without a cast [enabled by 
> default]
> ./include/linux/vmstat.h:267:20: note: expected 'struct zone *' but argument 
> is of type 'int'

vmstat.h depends on definitions in mm.h, but mm.h through the above
chain includes vmstat.h first. It worked in my x86 test because x86
pgtable.h doesn't include swap.h.

The headers are a bit of a mess. memcontrol.h is supposed to be a
lower level header than mm.h and vmstat.h, yet the new accounting
functions depend on mm.h definitions.

Let's move the lruvec accounting infra to vmstat.h and shuffle
memcontrol.h into the stack under mm.h and vmstat.h.

Does the following fix the hexagon build?

---

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index da9360885260..10042ada06e6 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -26,8 +26,6 @@
 #include 
 #include 
 #include 
-#include 
-#include 
 #include 
 #include 
 
@@ -536,78 +534,6 @@ static inline void mod_memcg_page_state(struct page *page,
mod_memcg_state(page->mem_cgroup, idx, val);
 }
 
-static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
-   struct mem_cgroup_per_node *pn;
-   long val = 0;
-   int cpu;
-
-   if (mem_cgroup_disabled())
-   return node_page_state(lruvec_pgdat(lruvec), idx);
-
-   pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-   for_each_possible_cpu(cpu)
-   val += per_cpu(pn->lruvec_stat->count[idx], cpu);
-
-   if (val < 0)
-   val = 0;
-
-   return val;
-}
-
-static inline void __mod_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx, int val)
-{
-   struct mem_cgroup_per_node *pn;
-
-   __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
-   if (mem_cgroup_disabled())
-   return;
-   pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-   __mod_memcg_state(pn->memcg, idx, val);
-   __this_cpu_add(pn->lruvec_stat->count[idx], val);
-}
-
-static inline void mod_lruvec_state(struct lruvec *lruvec,
-   enum node_stat_item idx, int val)
-{
-   struct mem_cgroup_per_node *pn;
-
-   mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
-   if (mem_cgroup_disabled())
-   return;
-   pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-   mod_memcg_state(pn->memcg, idx, val);
-   this_cpu_add(pn->lruvec_stat->count[idx], val);
-}
-
-static inline void __mod_lruvec_page_state(struct page *page,
-  enum node_stat_item idx, int val)
-{
-   struct mem_cgroup_per_node *pn;
-
-   __mod_node_page_state(page_pgdat(page), idx, val);
-   if (mem_cgroup_disabled() || !page->mem_cgroup)
-   return;
-   __mod_memcg_state(page->mem_cgroup, idx, val);
-   pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
-   __this_cpu_add(pn->lruvec_stat->count[idx], val);
-}
-
-static inline void mod_lruvec_page_state(struct page *page,
-enum node_stat_item idx, int val)
-{
-   struct mem_cgroup_per_node *pn;
-
-   mod_node_page_state(page_pgdat(page), idx, val);
-   if (mem_cgroup_disabled() || !page->mem_cgroup)
-   return;
-   mod_memcg_state(page->mem_cgroup, idx, val);
-   pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
-   this_cpu_add(pn->lruvec_stat->count[idx], val);
-}
-
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
@@ -835,36 +761,6 @@ static inline void mod_memcg_page_state(struct page *page,
 {
 }
 
-static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
-   return node_page_state(lruvec_pgdat(lruvec), idx);
-}
-
-static inline void __mod_lruvec_state(struct lruvec *lruvec,
-

Re: hexagon: build error in -next due to 'mm: memcontrol: per-lruvec stats infrastructure'

2017-06-16 Thread Johannes Weiner

Hi Guenter,

On Wed, Jun 14, 2017 at 12:26:46AM -0700, Guenter Roeck wrote:
> Hi,
> 
> I see the following build error in -next when building hexagon images.
> 
>   CC  arch/hexagon/kernel/asm-offsets.s
> In file included from ./include/linux/memcontrol.h:30:0,
>  from ./include/linux/swap.h:8,
>  from ./arch/hexagon/include/asm/pgtable.h:27,
>  from ./include/linux/mm.h:70,
>  from arch/hexagon/kernel/asm-offsets.c:28:
> ./include/linux/vmstat.h: In function '__inc_zone_page_state':
> ./include/linux/vmstat.h:294:2: error: implicit declaration of function 
> 'page_zone' [-Werror=implicit-function-declaration]
> ./include/linux/vmstat.h:294:2: warning: passing argument 1 of 
> '__inc_zone_state' makes pointer from integer without a cast [enabled by 
> default]
> ./include/linux/vmstat.h:267:20: note: expected 'struct zone *' but argument 
> is of type 'int'

vmstat.h depends on definitions in mm.h, but mm.h through the above
chain includes vmstat.h first. It worked in my x86 test because x86
pgtable.h doesn't include swap.h.

The headers are a bit of a mess. memcontrol.h is supposed to be a
lower level header than mm.h and vmstat.h, yet the new accounting
functions depend on mm.h definitions.

Let's move the lruvec accounting infra to vmstat.h and shuffle
memcontrol.h into the stack under mm.h and vmstat.h.

Does the following fix the hexagon build?

---

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index da9360885260..10042ada06e6 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -26,8 +26,6 @@
 #include 
 #include 
 #include 
-#include 
-#include 
 #include 
 #include 
 
@@ -536,78 +534,6 @@ static inline void mod_memcg_page_state(struct page *page,
mod_memcg_state(page->mem_cgroup, idx, val);
 }
 
-static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
-   struct mem_cgroup_per_node *pn;
-   long val = 0;
-   int cpu;
-
-   if (mem_cgroup_disabled())
-   return node_page_state(lruvec_pgdat(lruvec), idx);
-
-   pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-   for_each_possible_cpu(cpu)
-   val += per_cpu(pn->lruvec_stat->count[idx], cpu);
-
-   if (val < 0)
-   val = 0;
-
-   return val;
-}
-
-static inline void __mod_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx, int val)
-{
-   struct mem_cgroup_per_node *pn;
-
-   __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
-   if (mem_cgroup_disabled())
-   return;
-   pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-   __mod_memcg_state(pn->memcg, idx, val);
-   __this_cpu_add(pn->lruvec_stat->count[idx], val);
-}
-
-static inline void mod_lruvec_state(struct lruvec *lruvec,
-   enum node_stat_item idx, int val)
-{
-   struct mem_cgroup_per_node *pn;
-
-   mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
-   if (mem_cgroup_disabled())
-   return;
-   pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-   mod_memcg_state(pn->memcg, idx, val);
-   this_cpu_add(pn->lruvec_stat->count[idx], val);
-}
-
-static inline void __mod_lruvec_page_state(struct page *page,
-  enum node_stat_item idx, int val)
-{
-   struct mem_cgroup_per_node *pn;
-
-   __mod_node_page_state(page_pgdat(page), idx, val);
-   if (mem_cgroup_disabled() || !page->mem_cgroup)
-   return;
-   __mod_memcg_state(page->mem_cgroup, idx, val);
-   pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
-   __this_cpu_add(pn->lruvec_stat->count[idx], val);
-}
-
-static inline void mod_lruvec_page_state(struct page *page,
-enum node_stat_item idx, int val)
-{
-   struct mem_cgroup_per_node *pn;
-
-   mod_node_page_state(page_pgdat(page), idx, val);
-   if (mem_cgroup_disabled() || !page->mem_cgroup)
-   return;
-   mod_memcg_state(page->mem_cgroup, idx, val);
-   pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
-   this_cpu_add(pn->lruvec_stat->count[idx], val);
-}
-
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
@@ -835,36 +761,6 @@ static inline void mod_memcg_page_state(struct page *page,
 {
 }
 
-static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
-   return node_page_state(lruvec_pgdat(lruvec), idx);
-}
-
-static inline void __mod_lruvec_state(struct lruvec *lruvec,
-

Re: [RFC PATCH 00/13] Switchtec NTB Support

2017-06-16 Thread Serge Semin

On Fri, Jun 16, 2017 at 11:08:52AM -0600, Logan Gunthorpe  
wrote:
> 
> 
> On 16/06/17 10:33 AM, Serge Semin wrote:
> > New NTB API is going to be merged to mainline kernel within next features
> > merge window, so it's really recommended to use that API for new hardware.
> > Could you please rabase your driver on top of the tree?
> > https://github.com/jonmason/ntb.git
> 
> Yes, Allen's already pointed this out. I'll be sure to fix it up before
> a final submission.
> 
> > According to the NTB philosophy, it's not good to have any hardware
> > emulation within hardware driver. Hardware driver must reflect the only
> > hardware abilities, nothing else. Could you please get rid of Scratchpad
> > emulation and add messaging as new NTB API has got a proper callback
> > functions for it?
> 
> I disagree completely. Practicality trumps philosophy in every case. I
> need emulated scratchpads for ntb_transport to work and I'm not getting
> rid of it (thus breaking things that work well) just because of your
> philosophical beliefs.
> 

It's the way the NTB API was created for, to have set of functions to access
NTB devices in the similar way. These aren't my beliefs, it's the way it was
created. I agree it can be optional, but it shouldn't be made as the basics
of the driver. It is called NTB "hardware" driver after all, not "emulating" or
"abstracting" driver.
ntb_transport could work without Scratchpads, if it's properly altered to
use NTB messaging. This should be the way to make things compatible, but not
making the hardware driver suitable for just one ntb_transport.

> > Hmmm, I haven't seen the actual code (see my last comment), but
> > according to my impression of API, it's impossible to have memory window
> > accessed while NTB link is down, but Scratchpads still can be used.
> > In this case, if you got Scratchpads emulated over memory windows,
> > you must have got NTB link enabled before NTB device is registered, which
> > makes ntb_link_* methods kind of being useless unless Switchtec hardware
> > supports NTB link getting up/down for individual memory windows...
> 
> Nothing in-kernel actually uses the peer's scratchpads while the link is
> down and all clients seem specifically designed to wait until the link
> event to set them. So I think you're either wrong about that rule or we
> should change the rule going forward.
> 
> I'm not sure what you're referring to about the link stuff; as
> implemented, our link management works just fine.
> 
> > New NTB API is updated so to have access to any of peer ports. IDT driver
> > has got a special translation table to access peer functionality just by
> > providing an index to corresponding API callback. You can use it as
> > reference to have Switchtec driver accordingly altered. It would be vastly
> > useful to have one more multi-port hardware driver in the tree.
> 
> Yes, I expect we will get there eventually, it doesn't sound like much
> work. However, it's client support that's really going to make this
> interesting and worthwhile. That seems like the real challenge right now.
> 
> > If I'm not mistaken, these patches can be combined the way so to have
> > just two big functionally split patches:
> > 1) NTB: Microsemy Switchtec switch management driver alterations for NTB
> > 2) NTB: Add Microsemi Switchtec PCIe-switches support
> > It would really simplify the review. Could you please combine them?
> 
> Seems like every time I make a submission, someone either wants patches
> to be smaller and split up more or bigger and combined. I happen to
> agree with the people who prefer smaller patches and I think these
> provide good chunks for reviewers to look at. So, no, I'm not going to
> change this. Feel free to apply the patches to a git tree or view it on
> our github branch if you want to see the code combined.

It's not like my whim or something, but the way it's usually done.
https://kernelnewbies.org/PatchPhilosophy

Cite from there:
"Each patch should group changes into a logical sequence. Bug fixes must
come first in the patchset, then new features. This is because we need to be
able to backport bug fixes to older kernels, and they should not depend on
new features."

You grouped the patches in according to your logical view or development
progress (I don't know for sure), but it's not obvious for reviewers.
>From my perspective your new Microsemi Switchtec NTB driver is just one
feature. I don't know who would think differently so to split the solid
driver up for review. Switchtec management driver alteration might be the
same - just one fix. It's much easier for you to have your commits squashed,
than for me to look at your git tree, than get back to your patchset looking
for a necessary peace of patch and commenting it there.

Regards,
-Sergey

> 
> Thanks,
> 
> Logan
> 
> -- 
> You received this message because you are subscribed to the Google Groups 
> "linux-ntb" group.
> To unsubscribe from this group and stop

Re: [RFC PATCH 00/13] Switchtec NTB Support

2017-06-16 Thread Serge Semin

On Fri, Jun 16, 2017 at 11:08:52AM -0600, Logan Gunthorpe  
wrote:
> 
> 
> On 16/06/17 10:33 AM, Serge Semin wrote:
> > New NTB API is going to be merged to mainline kernel within next features
> > merge window, so it's really recommended to use that API for new hardware.
> > Could you please rabase your driver on top of the tree?
> > https://github.com/jonmason/ntb.git
> 
> Yes, Allen's already pointed this out. I'll be sure to fix it up before
> a final submission.
> 
> > According to the NTB philosophy, it's not good to have any hardware
> > emulation within hardware driver. Hardware driver must reflect the only
> > hardware abilities, nothing else. Could you please get rid of Scratchpad
> > emulation and add messaging as new NTB API has got a proper callback
> > functions for it?
> 
> I disagree completely. Practicality trumps philosophy in every case. I
> need emulated scratchpads for ntb_transport to work and I'm not getting
> rid of it (thus breaking things that work well) just because of your
> philosophical beliefs.
> 

It's the way the NTB API was created for, to have set of functions to access
NTB devices in the similar way. These aren't my beliefs, it's the way it was
created. I agree it can be optional, but it shouldn't be made as the basics
of the driver. It is called NTB "hardware" driver after all, not "emulating" or
"abstracting" driver.
ntb_transport could work without Scratchpads, if it's properly altered to
use NTB messaging. This should be the way to make things compatible, but not
making the hardware driver suitable for just one ntb_transport.

> > Hmmm, I haven't seen the actual code (see my last comment), but
> > according to my impression of API, it's impossible to have memory window
> > accessed while NTB link is down, but Scratchpads still can be used.
> > In this case, if you got Scratchpads emulated over memory windows,
> > you must have got NTB link enabled before NTB device is registered, which
> > makes ntb_link_* methods kind of being useless unless Switchtec hardware
> > supports NTB link getting up/down for individual memory windows...
> 
> Nothing in-kernel actually uses the peer's scratchpads while the link is
> down and all clients seem specifically designed to wait until the link
> event to set them. So I think you're either wrong about that rule or we
> should change the rule going forward.
> 
> I'm not sure what you're referring to about the link stuff; as
> implemented, our link management works just fine.
> 
> > New NTB API is updated so to have access to any of peer ports. IDT driver
> > has got a special translation table to access peer functionality just by
> > providing an index to corresponding API callback. You can use it as
> > reference to have Switchtec driver accordingly altered. It would be vastly
> > useful to have one more multi-port hardware driver in the tree.
> 
> Yes, I expect we will get there eventually, it doesn't sound like much
> work. However, it's client support that's really going to make this
> interesting and worthwhile. That seems like the real challenge right now.
> 
> > If I'm not mistaken, these patches can be combined the way so to have
> > just two big functionally split patches:
> > 1) NTB: Microsemy Switchtec switch management driver alterations for NTB
> > 2) NTB: Add Microsemi Switchtec PCIe-switches support
> > It would really simplify the review. Could you please combine them?
> 
> Seems like every time I make a submission, someone either wants patches
> to be smaller and split up more or bigger and combined. I happen to
> agree with the people who prefer smaller patches and I think these
> provide good chunks for reviewers to look at. So, no, I'm not going to
> change this. Feel free to apply the patches to a git tree or view it on
> our github branch if you want to see the code combined.

It's not like my whim or something, but the way it's usually done.
https://kernelnewbies.org/PatchPhilosophy

Cite from there:
"Each patch should group changes into a logical sequence. Bug fixes must
come first in the patchset, then new features. This is because we need to be
able to backport bug fixes to older kernels, and they should not depend on
new features."

You grouped the patches in according to your logical view or development
progress (I don't know for sure), but it's not obvious for reviewers.
>From my perspective your new Microsemi Switchtec NTB driver is just one
feature. I don't know who would think differently so to split the solid
driver up for review. Switchtec management driver alteration might be the
same - just one fix. It's much easier for you to have your commits squashed,
than for me to look at your git tree, than get back to your patchset looking
for a necessary peace of patch and commenting it there.

Regards,
-Sergey

> 
> Thanks,
> 
> Logan
> 
> -- 
> You received this message because you are subscribed to the Google Groups 
> "linux-ntb" group.
> To unsubscribe from this group and stop receiving emails from

Re: [PATCH v9 5/7] vfio: Define vfio based dma-buf operations

2017-06-16 Thread Kirti Wankhede



On 6/16/2017 10:09 PM, Alex Williamson wrote:
> On Fri, 16 Jun 2017 19:02:30 +0530
> Kirti Wankhede  wrote:
> 
>> On 6/16/2017 2:08 AM, Alex Williamson wrote:
>>> On Thu, 15 Jun 2017 18:00:38 +0200
>>> Gerd Hoffmann  wrote:
>>>   
   Hi,
  
>> +struct vfio_dmabuf_mgr_plane_info {
>> +__u64 start;
>> +__u64 drm_format_mod;
>> +__u32 drm_format;
>> +__u32 width;
>> +__u32 height;
>> +__u32 stride;
>> +__u32 size;
>> +__u32 x_pos;
>> +__u32 y_pos;
>> +__u32 padding;
>> +};
>> +
>
> This structure is generic, can remove dmabuf from its name,
> vfio_plane_info or vfio_vgpu_surface_info since this will only be
> used
> by vgpu.

 Agree.  
>>>
>>> I'm not sure I agree regarding the vgpu statement, maybe this is not
>>> dmabuf specific, but what makes it vgpu specific?  We need to separate
>>> our current usage plans from what it's actually describing and I don't
>>> see that it describes anything vgpu specific.
>>>
>> +struct vfio_dmabuf_mgr_query_plane {
>> +__u32 argsz;
>> +__u32 flags;
>> +struct vfio_dmabuf_mgr_plane_info plane_info;
>> +__u32 plane_id;
>> +};
>> +
>> +#define VFIO_DMABUF_MGR_QUERY_PLANE _IO(VFIO_TYPE, VFIO_BASE + 15)
>> +
>
> This same interface can be used to query surface/plane information
> for
> both, dmabuf and region, case. Here also 'DMABUF' can be removed and
> define flags if you want to differentiate query for 'dmabuf' and
> 'region'.

 Hmm, any specific reason why you want use a ioctl for that?  I would
 simply place a "struct vfio_dmabuf_mgr_plane_info" (or whatever the
 final name will be) at the start of the region.  
>>>
>>> Right, these are ioctls on the dmabuf mgr fd, not the vfio device fd,
>>> if you're exposing a region with the info I wouldn't think you'd want
>>> the hassle of managing this separate fd when you could do something
>>> like Gerd suggests with defining the first page of the regions as
>>> containing the structure.  
>>
>> My suggestion was to use vfio device fd for this ioctl and have dmabuf
>> mgr fd as member in above query_plane structure, for region type it
>> would be set to 0.
>> Yes there is other way to query surface information as Gerd suggested,
>> but my point is: if a ioctl is being add, it could be used for both
>> types, dmabuf and region.
> 
> I think this suggests abandoning the dmabuf manager fd entirely.  That's
> not necessarily a bad thing, but I don't think the idea of the dmabuf
> manager fd stands if we push one of its primary reasons for existing
> back to the device fd.  Reading though previous posts, I think we
> embraced the dmabuf manager as a separate fd primarily for
> consolidation and the potential to use it as a notification point, the
> latter being only theoretically useful.
> 
> So perhaps this becomes:
> 
> struct vfio_device_gfx_plane_info {
>   __u64 start;
>   __u64 drm_format_mod;
>   __u32 drm_format;
>   __u32 width;
>   __u32 height;
>   __u32 stride;
>   __u32 size;
>   __u32 x_pos;
>   __u32 y_pos;
> };
> 
> struct vfio_device_query_gfx_plane {
>   __u32 argsz;
>   __u32 flags;
> #define VFIO_GFX_PLANE_FLAGS_REGION_ID(1 << 0)
> #define VFIO_GFX_PLANE_FLAGS_PLANE_ID (1 << 1)
>   struct vfio_device_gfx_plane_info plane_info;
>   __u32 id; 
> };
> 
> The flag defines the data in the id field as either referring to a
> region (perhaps there could be multiple regions with only one active)
> or a plane ID, which is acquired separately, such as via a dmabuf fd.
> This would be retrieved via an optional VFIO_DEVICE_QUERY_GFX_PLANE
> ioctl on the vfio device, implemented in the vendor driver.
> 
> Would the above, along with the already defined mechanism for defining
> device specific regions, account for NVIDIA's needs?
> 

Yes, works for region type solution that we would go with.

Thanks,
Kirti

> For dmabuf users, we'd still need a new ioctl to get the dmabuf fd.  We
> could either create a specific ioctl for this (ex.
> VFIO_DEVICE_GET_DMABUF_FD) or we could create a shared, generic GET_FD
> interface on the device.
> 
>>>  Maybe you could even allow mmap of that page
>>> to reduce the overhead of getting the current state.
>>
>> Can't mmap that page to get surface information. There is no way to
>> synchronize between QEMU reading this mmapped region and vendor driver
>> writing it. There could be race condition in these two operations. Read
>> on this page should be trapped and blocking, so that surface in that
>> region is only updated when its asked for.
>>
>>> For the sake of
>>> userspace, I'd hope we'd still use the same structure for either the
>>> ioctl or region mapping.  I'm not really in

Re: [PATCH v9 5/7] vfio: Define vfio based dma-buf operations

2017-06-16 Thread Kirti Wankhede



On 6/16/2017 10:09 PM, Alex Williamson wrote:
> On Fri, 16 Jun 2017 19:02:30 +0530
> Kirti Wankhede  wrote:
> 
>> On 6/16/2017 2:08 AM, Alex Williamson wrote:
>>> On Thu, 15 Jun 2017 18:00:38 +0200
>>> Gerd Hoffmann  wrote:
>>>   
   Hi,
  
>> +struct vfio_dmabuf_mgr_plane_info {
>> +__u64 start;
>> +__u64 drm_format_mod;
>> +__u32 drm_format;
>> +__u32 width;
>> +__u32 height;
>> +__u32 stride;
>> +__u32 size;
>> +__u32 x_pos;
>> +__u32 y_pos;
>> +__u32 padding;
>> +};
>> +
>
> This structure is generic, can remove dmabuf from its name,
> vfio_plane_info or vfio_vgpu_surface_info since this will only be
> used
> by vgpu.

 Agree.  
>>>
>>> I'm not sure I agree regarding the vgpu statement, maybe this is not
>>> dmabuf specific, but what makes it vgpu specific?  We need to separate
>>> our current usage plans from what it's actually describing and I don't
>>> see that it describes anything vgpu specific.
>>>
>> +struct vfio_dmabuf_mgr_query_plane {
>> +__u32 argsz;
>> +__u32 flags;
>> +struct vfio_dmabuf_mgr_plane_info plane_info;
>> +__u32 plane_id;
>> +};
>> +
>> +#define VFIO_DMABUF_MGR_QUERY_PLANE _IO(VFIO_TYPE, VFIO_BASE + 15)
>> +
>
> This same interface can be used to query surface/plane information
> for
> both, dmabuf and region, case. Here also 'DMABUF' can be removed and
> define flags if you want to differentiate query for 'dmabuf' and
> 'region'.

 Hmm, any specific reason why you want use a ioctl for that?  I would
 simply place a "struct vfio_dmabuf_mgr_plane_info" (or whatever the
 final name will be) at the start of the region.  
>>>
>>> Right, these are ioctls on the dmabuf mgr fd, not the vfio device fd,
>>> if you're exposing a region with the info I wouldn't think you'd want
>>> the hassle of managing this separate fd when you could do something
>>> like Gerd suggests with defining the first page of the regions as
>>> containing the structure.  
>>
>> My suggestion was to use vfio device fd for this ioctl and have dmabuf
>> mgr fd as member in above query_plane structure, for region type it
>> would be set to 0.
>> Yes there is other way to query surface information as Gerd suggested,
>> but my point is: if a ioctl is being add, it could be used for both
>> types, dmabuf and region.
> 
> I think this suggests abandoning the dmabuf manager fd entirely.  That's
> not necessarily a bad thing, but I don't think the idea of the dmabuf
> manager fd stands if we push one of its primary reasons for existing
> back to the device fd.  Reading though previous posts, I think we
> embraced the dmabuf manager as a separate fd primarily for
> consolidation and the potential to use it as a notification point, the
> latter being only theoretically useful.
> 
> So perhaps this becomes:
> 
> struct vfio_device_gfx_plane_info {
>   __u64 start;
>   __u64 drm_format_mod;
>   __u32 drm_format;
>   __u32 width;
>   __u32 height;
>   __u32 stride;
>   __u32 size;
>   __u32 x_pos;
>   __u32 y_pos;
> };
> 
> struct vfio_device_query_gfx_plane {
>   __u32 argsz;
>   __u32 flags;
> #define VFIO_GFX_PLANE_FLAGS_REGION_ID(1 << 0)
> #define VFIO_GFX_PLANE_FLAGS_PLANE_ID (1 << 1)
>   struct vfio_device_gfx_plane_info plane_info;
>   __u32 id; 
> };
> 
> The flag defines the data in the id field as either referring to a
> region (perhaps there could be multiple regions with only one active)
> or a plane ID, which is acquired separately, such as via a dmabuf fd.
> This would be retrieved via an optional VFIO_DEVICE_QUERY_GFX_PLANE
> ioctl on the vfio device, implemented in the vendor driver.
> 
> Would the above, along with the already defined mechanism for defining
> device specific regions, account for NVIDIA's needs?
> 

Yes, works for region type solution that we would go with.

Thanks,
Kirti

> For dmabuf users, we'd still need a new ioctl to get the dmabuf fd.  We
> could either create a specific ioctl for this (ex.
> VFIO_DEVICE_GET_DMABUF_FD) or we could create a shared, generic GET_FD
> interface on the device.
> 
>>>  Maybe you could even allow mmap of that page
>>> to reduce the overhead of getting the current state.
>>
>> Can't mmap that page to get surface information. There is no way to
>> synchronize between QEMU reading this mmapped region and vendor driver
>> writing it. There could be race condition in these two operations. Read
>> on this page should be trapped and blocking, so that surface in that
>> region is only updated when its asked for.
>>
>>> For the sake of
>>> userspace, I'd hope we'd still use the same structure for either the
>>> ioctl or region mapping.  I'm not really in favor of declaring that
>>> this particular

Re: [PATCH] arc: implement DMA_ATTR_NO_KERNEL_MAPPING

2017-06-16 Thread Vineet Gupta


On 06/16/2017 12:11 AM, Christoph Hellwig wrote:

This way allocations like the NVMe HMB don't consume iomap space

Signed-off-by: Christoph Hellwig 
---

Note: compile tested only, I stumbled over this when researching dma api
quirks for HMB support.

  arch/arc/mm/dma.c | 42 +-
  1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 2a07e6ecafbd..d8999ac88879 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -28,13 +28,22 @@ static void *arc_dma_alloc(struct device *dev, size_t size,
struct page *page;
phys_addr_t paddr;
void *kvaddr;
-   int need_coh = 1, need_kvaddr = 0;
+   bool need_cache_sync = true, need_kvaddr = false;
  
  	page = alloc_pages(gfp, order);

if (!page)
return NULL;
  
  	/*

+* No-kernel mapping memoery doesn't need a kernel virtual address.
+* But we do the initial cache flush to make sure we don't write back
+* to from a previous mapping into the now device owned memory.
+*/
+   if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+   need_cache_sync = true;
+   need_kvaddr = false;


This is re-setting to init values. I do understand that a reasonable compiler will 
elide those away. And maybe keeping them here just clarifies the semantics more. 
So ok !




+
+   /*
 * IOC relies on all data (even coherent DMA data) being in cache
 * Thus allocate normal cached memory
 *
@@ -45,17 +54,19 @@ static void *arc_dma_alloc(struct device *dev, size_t size,
 *   -For coherent data, Read/Write to buffers terminate early in cache
 *   (vs. always going to memory - thus are faster)
 */
-   if ((is_isa_arcv2() && ioc_enable) ||
-   (attrs & DMA_ATTR_NON_CONSISTENT))
-   need_coh = 0;
+   } else if ((is_isa_arcv2() && ioc_enable) ||
+  (attrs & DMA_ATTR_NON_CONSISTENT)) {
+   need_cache_sync = false;
+   need_kvaddr = true;


The conditions here can't really help decide need_kvaddr so best to leave it out 
and not use the "else if" construct. Let the various conditions set and reset 
these 2 knobs based on what is over-riding. e.g. DMA_ATTR_NO_KERNEL_MAPPING seems 
like an optimization hint from a subsys, but might be needed after all given the 
constraints of the architecture.



  
  	/*

 * - A coherent buffer needs MMU mapping to enforce non-cachability
 * - A highmem page needs a virtual handle (hence MMU mapping)
 *   independent of cachability
 */
-   if (PageHighMem(page) || need_coh)
-   need_kvaddr = 1;
+   } else if (PageHighMem(page)) {
+   need_kvaddr = true;
+   }


Again why "else if".

Also need_coh mandates a kvaddr on ARC (despite DMA_ATTR_NO_KERNEL_MAPPING) since 
the uncached kernel mmu mapping is how you get the coherent semantics.


Also now I think about it, I don't like the name change from need_coh to 
need_cache_sync. conceptually we have dma_alloc_coherent() -> arc_dma_alloc()

to get coherent mem and those semantics are only guaranteed with a kernel 
mapping.

  
  	/* This is linear addr (0x8000_ based) */

paddr = page_to_phys(page);
@@ -83,7 +94,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size,
 * Currently flush_cache_vmap nukes the L1 cache completely which
 * will be optimized as a separate commit
 */
-   if (need_coh)
+   if (need_cache_sync)
dma_cache_wback_inv(paddr, size);
  
  	return kvaddr;

@@ -93,14 +104,19 @@ static void arc_dma_free(struct device *dev, size_t size, 
void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
  {
phys_addr_t paddr = plat_dma_to_phys(dev, dma_handle);
-   struct page *page = virt_to_page(paddr);
-   int is_non_coh = 1;
  
-	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) ||

-   (is_isa_arcv2() && ioc_enable);
+   if (!(attrs & DMA_ATTR_NO_KERNEL_MAPPING)) {


Again by similar reasoning as above, arch can be forced to ignore 
DMA_ATTR_NO_KERNEL_MAPPING so it alone can't be used to decide whether the mapping 
exists or not.




+   struct page *page = virt_to_page(paddr);
+   bool need_iounmap = true;
+
+   if (!PageHighMem(page) &&
+   ((is_isa_arcv2() && ioc_enable) ||
+(attrs & DMA_ATTR_NON_CONSISTENT)))
+   need_iounmap = false;
  
-	if (PageHighMem(page) || !is_non_coh)

-   iounmap((void __force __iomem *)vaddr);
+   if (need_iounmap)
+   iounmap((void __force __iomem *)vaddr);
+   }
  
  	__free_pages(page, get_order(size));

  }

Re: [PATCH] arc: implement DMA_ATTR_NO_KERNEL_MAPPING

2017-06-16 Thread Vineet Gupta


On 06/16/2017 12:11 AM, Christoph Hellwig wrote:

This way allocations like the NVMe HMB don't consume iomap space

Signed-off-by: Christoph Hellwig 
---

Note: compile tested only, I stumbled over this when researching dma api
quirks for HMB support.

  arch/arc/mm/dma.c | 42 +-
  1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 2a07e6ecafbd..d8999ac88879 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -28,13 +28,22 @@ static void *arc_dma_alloc(struct device *dev, size_t size,
struct page *page;
phys_addr_t paddr;
void *kvaddr;
-   int need_coh = 1, need_kvaddr = 0;
+   bool need_cache_sync = true, need_kvaddr = false;
  
  	page = alloc_pages(gfp, order);

if (!page)
return NULL;
  
  	/*

+* No-kernel mapping memoery doesn't need a kernel virtual address.
+* But we do the initial cache flush to make sure we don't write back
+* to from a previous mapping into the now device owned memory.
+*/
+   if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+   need_cache_sync = true;
+   need_kvaddr = false;


This is re-setting to init values. I do understand that a reasonable compiler will 
elide those away. And maybe keeping them here just clarifies the semantics more. 
So ok !




+
+   /*
 * IOC relies on all data (even coherent DMA data) being in cache
 * Thus allocate normal cached memory
 *
@@ -45,17 +54,19 @@ static void *arc_dma_alloc(struct device *dev, size_t size,
 *   -For coherent data, Read/Write to buffers terminate early in cache
 *   (vs. always going to memory - thus are faster)
 */
-   if ((is_isa_arcv2() && ioc_enable) ||
-   (attrs & DMA_ATTR_NON_CONSISTENT))
-   need_coh = 0;
+   } else if ((is_isa_arcv2() && ioc_enable) ||
+  (attrs & DMA_ATTR_NON_CONSISTENT)) {
+   need_cache_sync = false;
+   need_kvaddr = true;


The conditions here can't really help decide need_kvaddr so best to leave it out 
and not use the "else if" construct. Let the various conditions set and reset 
these 2 knobs based on what is over-riding. e.g. DMA_ATTR_NO_KERNEL_MAPPING seems 
like an optimization hint from a subsys, but might be needed after all given the 
constraints of the architecture.



  
  	/*

 * - A coherent buffer needs MMU mapping to enforce non-cachability
 * - A highmem page needs a virtual handle (hence MMU mapping)
 *   independent of cachability
 */
-   if (PageHighMem(page) || need_coh)
-   need_kvaddr = 1;
+   } else if (PageHighMem(page)) {
+   need_kvaddr = true;
+   }


Again why "else if".

Also need_coh mandates a kvaddr on ARC (despite DMA_ATTR_NO_KERNEL_MAPPING) since 
the uncached kernel mmu mapping is how you get the coherent semantics.


Also now I think about it, I don't like the name change from need_coh to 
need_cache_sync. conceptually we have dma_alloc_coherent() -> arc_dma_alloc()

to get coherent mem and those semantics are only guaranteed with a kernel 
mapping.

  
  	/* This is linear addr (0x8000_ based) */

paddr = page_to_phys(page);
@@ -83,7 +94,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size,
 * Currently flush_cache_vmap nukes the L1 cache completely which
 * will be optimized as a separate commit
 */
-   if (need_coh)
+   if (need_cache_sync)
dma_cache_wback_inv(paddr, size);
  
  	return kvaddr;

@@ -93,14 +104,19 @@ static void arc_dma_free(struct device *dev, size_t size, 
void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
  {
phys_addr_t paddr = plat_dma_to_phys(dev, dma_handle);
-   struct page *page = virt_to_page(paddr);
-   int is_non_coh = 1;
  
-	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) ||

-   (is_isa_arcv2() && ioc_enable);
+   if (!(attrs & DMA_ATTR_NO_KERNEL_MAPPING)) {


Again by similar reasoning as above, arch can be forced to ignore 
DMA_ATTR_NO_KERNEL_MAPPING so it alone can't be used to decide whether the mapping 
exists or not.




+   struct page *page = virt_to_page(paddr);
+   bool need_iounmap = true;
+
+   if (!PageHighMem(page) &&
+   ((is_isa_arcv2() && ioc_enable) ||
+(attrs & DMA_ATTR_NON_CONSISTENT)))
+   need_iounmap = false;
  
-	if (PageHighMem(page) || !is_non_coh)

-   iounmap((void __force __iomem *)vaddr);
+   if (need_iounmap)
+   iounmap((void __force __iomem *)vaddr);
+   }
  
  	__free_pages(page, get_order(size));

  }

[PATCH 03/44] dmaengine: ioat: don't use DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is not a public API and will go away.  Instead properly
unwind based on the loop counter.

Signed-off-by: Christoph Hellwig 
Acked-by: Dave Jiang 
Acked-By: Vinod Koul 
---
 drivers/dma/ioat/init.c | 24 +++-
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 6ad4384b3fa8..ed8ed1192775 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -839,8 +839,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device 
*ioat_dma)
goto free_resources;
}
 
-   for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
-   dma_srcs[i] = DMA_ERROR_CODE;
for (i = 0; i < IOAT_NUM_SRC_TEST; i++) {
dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
   DMA_TO_DEVICE);
@@ -910,8 +908,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device 
*ioat_dma)
 
xor_val_result = 1;
 
-   for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
-   dma_srcs[i] = DMA_ERROR_CODE;
for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
   DMA_TO_DEVICE);
@@ -965,8 +961,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device 
*ioat_dma)
op = IOAT_OP_XOR_VAL;
 
xor_val_result = 0;
-   for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
-   dma_srcs[i] = DMA_ERROR_CODE;
for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
   DMA_TO_DEVICE);
@@ -1017,18 +1011,14 @@ static int ioat_xor_val_self_test(struct ioatdma_device 
*ioat_dma)
goto free_resources;
 dma_unmap:
if (op == IOAT_OP_XOR) {
-   if (dest_dma != DMA_ERROR_CODE)
-   dma_unmap_page(dev, dest_dma, PAGE_SIZE,
-  DMA_FROM_DEVICE);
-   for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
-   if (dma_srcs[i] != DMA_ERROR_CODE)
-   dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
-  DMA_TO_DEVICE);
+   while (--i >= 0)
+   dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+  DMA_TO_DEVICE);
+   dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
} else if (op == IOAT_OP_XOR_VAL) {
-   for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
-   if (dma_srcs[i] != DMA_ERROR_CODE)
-   dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
-  DMA_TO_DEVICE);
+   while (--i >= 0)
+   dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+  DMA_TO_DEVICE);
}
 free_resources:
dma->device_free_chan_resources(dma_chan);
-- 
2.11.0

[PATCH 03/44] dmaengine: ioat: don't use DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is not a public API and will go away.  Instead properly
unwind based on the loop counter.

Signed-off-by: Christoph Hellwig 
Acked-by: Dave Jiang 
Acked-By: Vinod Koul 
---
 drivers/dma/ioat/init.c | 24 +++-
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 6ad4384b3fa8..ed8ed1192775 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -839,8 +839,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device 
*ioat_dma)
goto free_resources;
}
 
-   for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
-   dma_srcs[i] = DMA_ERROR_CODE;
for (i = 0; i < IOAT_NUM_SRC_TEST; i++) {
dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
   DMA_TO_DEVICE);
@@ -910,8 +908,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device 
*ioat_dma)
 
xor_val_result = 1;
 
-   for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
-   dma_srcs[i] = DMA_ERROR_CODE;
for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
   DMA_TO_DEVICE);
@@ -965,8 +961,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device 
*ioat_dma)
op = IOAT_OP_XOR_VAL;
 
xor_val_result = 0;
-   for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
-   dma_srcs[i] = DMA_ERROR_CODE;
for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
   DMA_TO_DEVICE);
@@ -1017,18 +1011,14 @@ static int ioat_xor_val_self_test(struct ioatdma_device 
*ioat_dma)
goto free_resources;
 dma_unmap:
if (op == IOAT_OP_XOR) {
-   if (dest_dma != DMA_ERROR_CODE)
-   dma_unmap_page(dev, dest_dma, PAGE_SIZE,
-  DMA_FROM_DEVICE);
-   for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
-   if (dma_srcs[i] != DMA_ERROR_CODE)
-   dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
-  DMA_TO_DEVICE);
+   while (--i >= 0)
+   dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+  DMA_TO_DEVICE);
+   dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
} else if (op == IOAT_OP_XOR_VAL) {
-   for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
-   if (dma_srcs[i] != DMA_ERROR_CODE)
-   dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
-  DMA_TO_DEVICE);
+   while (--i >= 0)
+   dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+  DMA_TO_DEVICE);
}
 free_resources:
dma->device_free_chan_resources(dma_chan);
-- 
2.11.0

[PATCH 05/44] drm/armada: don't abuse DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

dev_addr isn't even a dma_addr_t, and DMA_ERROR_CODE has never been
a valid driver API.  Add a bool mapped flag instead.

Signed-off-by: Christoph Hellwig 
---
 drivers/gpu/drm/armada/armada_fb.c  | 2 +-
 drivers/gpu/drm/armada/armada_gem.c | 5 ++---
 drivers/gpu/drm/armada/armada_gem.h | 1 +
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/armada/armada_fb.c 
b/drivers/gpu/drm/armada/armada_fb.c
index 2a7eb6817c36..92e6b08ea64a 100644
--- a/drivers/gpu/drm/armada/armada_fb.c
+++ b/drivers/gpu/drm/armada/armada_fb.c
@@ -133,7 +133,7 @@ static struct drm_framebuffer *armada_fb_create(struct 
drm_device *dev,
}
 
/* Framebuffer objects must have a valid device address for scanout */
-   if (obj->dev_addr == DMA_ERROR_CODE) {
+   if (!obj->mapped) {
ret = -EINVAL;
goto err_unref;
}
diff --git a/drivers/gpu/drm/armada/armada_gem.c 
b/drivers/gpu/drm/armada/armada_gem.c
index d6c2a5d190eb..a76ca21d063b 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -175,6 +175,7 @@ armada_gem_linear_back(struct drm_device *dev, struct 
armada_gem_object *obj)
 
obj->phys_addr = obj->linear->start;
obj->dev_addr = obj->linear->start;
+   obj->mapped = true;
}
 
DRM_DEBUG_DRIVER("obj %p phys %#llx dev %#llx\n", obj,
@@ -205,7 +206,6 @@ armada_gem_alloc_private_object(struct drm_device *dev, 
size_t size)
return NULL;
 
drm_gem_private_object_init(dev, >obj, size);
-   obj->dev_addr = DMA_ERROR_CODE;
 
DRM_DEBUG_DRIVER("alloc private obj %p size %zu\n", obj, size);
 
@@ -229,8 +229,6 @@ static struct armada_gem_object 
*armada_gem_alloc_object(struct drm_device *dev,
return NULL;
}
 
-   obj->dev_addr = DMA_ERROR_CODE;
-
mapping = obj->obj.filp->f_mapping;
mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE);
 
@@ -610,5 +608,6 @@ int armada_gem_map_import(struct armada_gem_object *dobj)
return -EINVAL;
}
dobj->dev_addr = sg_dma_address(dobj->sgt->sgl);
+   dobj->mapped = true;
return 0;
 }
diff --git a/drivers/gpu/drm/armada/armada_gem.h 
b/drivers/gpu/drm/armada/armada_gem.h
index b88d2b9853c7..6e524e0676bb 100644
--- a/drivers/gpu/drm/armada/armada_gem.h
+++ b/drivers/gpu/drm/armada/armada_gem.h
@@ -16,6 +16,7 @@ struct armada_gem_object {
void*addr;
phys_addr_t phys_addr;
resource_size_t dev_addr;
+   boolmapped;
struct drm_mm_node  *linear;/* for linear backed */
struct page *page;  /* for page backed */
struct sg_table *sgt;   /* for imported */
-- 
2.11.0

[PATCH 05/44] drm/armada: don't abuse DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

dev_addr isn't even a dma_addr_t, and DMA_ERROR_CODE has never been
a valid driver API.  Add a bool mapped flag instead.

Signed-off-by: Christoph Hellwig 
---
 drivers/gpu/drm/armada/armada_fb.c  | 2 +-
 drivers/gpu/drm/armada/armada_gem.c | 5 ++---
 drivers/gpu/drm/armada/armada_gem.h | 1 +
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/armada/armada_fb.c 
b/drivers/gpu/drm/armada/armada_fb.c
index 2a7eb6817c36..92e6b08ea64a 100644
--- a/drivers/gpu/drm/armada/armada_fb.c
+++ b/drivers/gpu/drm/armada/armada_fb.c
@@ -133,7 +133,7 @@ static struct drm_framebuffer *armada_fb_create(struct 
drm_device *dev,
}
 
/* Framebuffer objects must have a valid device address for scanout */
-   if (obj->dev_addr == DMA_ERROR_CODE) {
+   if (!obj->mapped) {
ret = -EINVAL;
goto err_unref;
}
diff --git a/drivers/gpu/drm/armada/armada_gem.c 
b/drivers/gpu/drm/armada/armada_gem.c
index d6c2a5d190eb..a76ca21d063b 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -175,6 +175,7 @@ armada_gem_linear_back(struct drm_device *dev, struct 
armada_gem_object *obj)
 
obj->phys_addr = obj->linear->start;
obj->dev_addr = obj->linear->start;
+   obj->mapped = true;
}
 
DRM_DEBUG_DRIVER("obj %p phys %#llx dev %#llx\n", obj,
@@ -205,7 +206,6 @@ armada_gem_alloc_private_object(struct drm_device *dev, 
size_t size)
return NULL;
 
drm_gem_private_object_init(dev, >obj, size);
-   obj->dev_addr = DMA_ERROR_CODE;
 
DRM_DEBUG_DRIVER("alloc private obj %p size %zu\n", obj, size);
 
@@ -229,8 +229,6 @@ static struct armada_gem_object 
*armada_gem_alloc_object(struct drm_device *dev,
return NULL;
}
 
-   obj->dev_addr = DMA_ERROR_CODE;
-
mapping = obj->obj.filp->f_mapping;
mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE);
 
@@ -610,5 +608,6 @@ int armada_gem_map_import(struct armada_gem_object *dobj)
return -EINVAL;
}
dobj->dev_addr = sg_dma_address(dobj->sgt->sgl);
+   dobj->mapped = true;
return 0;
 }
diff --git a/drivers/gpu/drm/armada/armada_gem.h 
b/drivers/gpu/drm/armada/armada_gem.h
index b88d2b9853c7..6e524e0676bb 100644
--- a/drivers/gpu/drm/armada/armada_gem.h
+++ b/drivers/gpu/drm/armada/armada_gem.h
@@ -16,6 +16,7 @@ struct armada_gem_object {
void*addr;
phys_addr_t phys_addr;
resource_size_t dev_addr;
+   boolmapped;
struct drm_mm_node  *linear;/* for linear backed */
struct page *page;  /* for page backed */
struct sg_table *sgt;   /* for imported */
-- 
2.11.0

[PATCH 06/44] iommu/dma: don't rely on DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is not a public API and will go away soon.  dma dma-iommu
driver already implements a proper ->mapping_error method, so it's only
using the value internally.  Add a new local define using the value
that arm64 which is the only current user of dma-iommu.

Signed-off-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 62618e77bedc..9403336f1fa6 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -31,6 +31,8 @@
 #include 
 #include 
 
+#define IOMMU_MAPPING_ERROR0
+
 struct iommu_dma_msi_page {
struct list_headlist;
dma_addr_t  iova;
@@ -500,7 +502,7 @@ void iommu_dma_free(struct device *dev, struct page 
**pages, size_t size,
 {
__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size);
__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
-   *handle = DMA_ERROR_CODE;
+   *handle = IOMMU_MAPPING_ERROR;
 }
 
 /**
@@ -533,7 +535,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t 
size, gfp_t gfp,
dma_addr_t iova;
unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
 
-   *handle = DMA_ERROR_CODE;
+   *handle = IOMMU_MAPPING_ERROR;
 
min_size = alloc_sizes & -alloc_sizes;
if (min_size < PAGE_SIZE) {
@@ -627,11 +629,11 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
 
iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
if (!iova)
-   return DMA_ERROR_CODE;
+   return IOMMU_MAPPING_ERROR;
 
if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
iommu_dma_free_iova(cookie, iova, size);
-   return DMA_ERROR_CODE;
+   return IOMMU_MAPPING_ERROR;
}
return iova + iova_off;
 }
@@ -671,7 +673,7 @@ static int __finalise_sg(struct device *dev, struct 
scatterlist *sg, int nents,
 
s->offset += s_iova_off;
s->length = s_length;
-   sg_dma_address(s) = DMA_ERROR_CODE;
+   sg_dma_address(s) = IOMMU_MAPPING_ERROR;
sg_dma_len(s) = 0;
 
/*
@@ -714,11 +716,11 @@ static void __invalidate_sg(struct scatterlist *sg, int 
nents)
int i;
 
for_each_sg(sg, s, nents, i) {
-   if (sg_dma_address(s) != DMA_ERROR_CODE)
+   if (sg_dma_address(s) != IOMMU_MAPPING_ERROR)
s->offset += sg_dma_address(s);
if (sg_dma_len(s))
s->length = sg_dma_len(s);
-   sg_dma_address(s) = DMA_ERROR_CODE;
+   sg_dma_address(s) = IOMMU_MAPPING_ERROR;
sg_dma_len(s) = 0;
}
 }
@@ -836,7 +838,7 @@ void iommu_dma_unmap_resource(struct device *dev, 
dma_addr_t handle,
 
 int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
-   return dma_addr == DMA_ERROR_CODE;
+   return dma_addr == IOMMU_MAPPING_ERROR;
 }
 
 static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
-- 
2.11.0

[PATCH 04/44] drm/exynos: don't use DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE already isn't a valid API to user for drivers and will
go away soon.  exynos_drm_fb_dma_addr uses it a an error return when
the passed in index is invalid, but the callers never check for it
but instead pass the address straight to the hardware.

Add a WARN_ON instead and just return 0.

Signed-off-by: Christoph Hellwig 
---
 drivers/gpu/drm/exynos/exynos_drm_fb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c 
b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index c77a5aced81a..d48fd7c918f8 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -181,8 +181,8 @@ dma_addr_t exynos_drm_fb_dma_addr(struct drm_framebuffer 
*fb, int index)
 {
struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
 
-   if (index >= MAX_FB_BUFFER)
-   return DMA_ERROR_CODE;
+   if (WARN_ON_ONCE(index >= MAX_FB_BUFFER))
+   return 0;
 
return exynos_fb->dma_addr[index];
 }
-- 
2.11.0

[PATCH 06/44] iommu/dma: don't rely on DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is not a public API and will go away soon.  dma dma-iommu
driver already implements a proper ->mapping_error method, so it's only
using the value internally.  Add a new local define using the value
that arm64 which is the only current user of dma-iommu.

Signed-off-by: Christoph Hellwig 
---
 drivers/iommu/dma-iommu.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 62618e77bedc..9403336f1fa6 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -31,6 +31,8 @@
 #include 
 #include 
 
+#define IOMMU_MAPPING_ERROR0
+
 struct iommu_dma_msi_page {
struct list_headlist;
dma_addr_t  iova;
@@ -500,7 +502,7 @@ void iommu_dma_free(struct device *dev, struct page 
**pages, size_t size,
 {
__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size);
__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
-   *handle = DMA_ERROR_CODE;
+   *handle = IOMMU_MAPPING_ERROR;
 }
 
 /**
@@ -533,7 +535,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t 
size, gfp_t gfp,
dma_addr_t iova;
unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
 
-   *handle = DMA_ERROR_CODE;
+   *handle = IOMMU_MAPPING_ERROR;
 
min_size = alloc_sizes & -alloc_sizes;
if (min_size < PAGE_SIZE) {
@@ -627,11 +629,11 @@ static dma_addr_t __iommu_dma_map(struct device *dev, 
phys_addr_t phys,
 
iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
if (!iova)
-   return DMA_ERROR_CODE;
+   return IOMMU_MAPPING_ERROR;
 
if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
iommu_dma_free_iova(cookie, iova, size);
-   return DMA_ERROR_CODE;
+   return IOMMU_MAPPING_ERROR;
}
return iova + iova_off;
 }
@@ -671,7 +673,7 @@ static int __finalise_sg(struct device *dev, struct 
scatterlist *sg, int nents,
 
s->offset += s_iova_off;
s->length = s_length;
-   sg_dma_address(s) = DMA_ERROR_CODE;
+   sg_dma_address(s) = IOMMU_MAPPING_ERROR;
sg_dma_len(s) = 0;
 
/*
@@ -714,11 +716,11 @@ static void __invalidate_sg(struct scatterlist *sg, int 
nents)
int i;
 
for_each_sg(sg, s, nents, i) {
-   if (sg_dma_address(s) != DMA_ERROR_CODE)
+   if (sg_dma_address(s) != IOMMU_MAPPING_ERROR)
s->offset += sg_dma_address(s);
if (sg_dma_len(s))
s->length = sg_dma_len(s);
-   sg_dma_address(s) = DMA_ERROR_CODE;
+   sg_dma_address(s) = IOMMU_MAPPING_ERROR;
sg_dma_len(s) = 0;
}
 }
@@ -836,7 +838,7 @@ void iommu_dma_unmap_resource(struct device *dev, 
dma_addr_t handle,
 
 int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
-   return dma_addr == DMA_ERROR_CODE;
+   return dma_addr == IOMMU_MAPPING_ERROR;
 }
 
 static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
-- 
2.11.0

[PATCH 04/44] drm/exynos: don't use DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE already isn't a valid API to user for drivers and will
go away soon.  exynos_drm_fb_dma_addr uses it a an error return when
the passed in index is invalid, but the callers never check for it
but instead pass the address straight to the hardware.

Add a WARN_ON instead and just return 0.

Signed-off-by: Christoph Hellwig 
---
 drivers/gpu/drm/exynos/exynos_drm_fb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c 
b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index c77a5aced81a..d48fd7c918f8 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -181,8 +181,8 @@ dma_addr_t exynos_drm_fb_dma_addr(struct drm_framebuffer 
*fb, int index)
 {
struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
 
-   if (index >= MAX_FB_BUFFER)
-   return DMA_ERROR_CODE;
+   if (WARN_ON_ONCE(index >= MAX_FB_BUFFER))
+   return 0;
 
return exynos_fb->dma_addr[index];
 }
-- 
2.11.0

[PATCH 07/44] xen-swiotlb: consolidate xen_swiotlb_dma_ops

2017-06-16 Thread Christoph Hellwig

ARM and x86 had duplicated versions of the dma_ops structure, the
only difference is that x86 hasn't wired up the set_dma_mask,
mmap, and get_sgtable ops yet.  On x86 all of them are identical
to the generic version, so they aren't needed but harmless.

All the symbols used only for xen_swiotlb_dma_ops can now be marked
static as well.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 arch/arm/xen/mm.c  | 17 
 arch/x86/xen/pci-swiotlb-xen.c | 14 ---
 drivers/xen/swiotlb-xen.c  | 93 ++
 include/xen/swiotlb-xen.h  | 62 +---
 4 files changed, 49 insertions(+), 137 deletions(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index f0325d96b97a..785d2a562a23 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -185,23 +185,6 @@ EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 const struct dma_map_ops *xen_dma_ops;
 EXPORT_SYMBOL(xen_dma_ops);
 
-static const struct dma_map_ops xen_swiotlb_dma_ops = {
-   .alloc = xen_swiotlb_alloc_coherent,
-   .free = xen_swiotlb_free_coherent,
-   .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
-   .sync_single_for_device = xen_swiotlb_sync_single_for_device,
-   .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
-   .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
-   .map_sg = xen_swiotlb_map_sg_attrs,
-   .unmap_sg = xen_swiotlb_unmap_sg_attrs,
-   .map_page = xen_swiotlb_map_page,
-   .unmap_page = xen_swiotlb_unmap_page,
-   .dma_supported = xen_swiotlb_dma_supported,
-   .set_dma_mask = xen_swiotlb_set_dma_mask,
-   .mmap = xen_swiotlb_dma_mmap,
-   .get_sgtable = xen_swiotlb_get_sgtable,
-};
-
 int __init xen_mm_init(void)
 {
struct gnttab_cache_flush cflush;
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 42b08f8fc2ca..37c6056a7bba 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -18,20 +18,6 @@
 
 int xen_swiotlb __read_mostly;
 
-static const struct dma_map_ops xen_swiotlb_dma_ops = {
-   .alloc = xen_swiotlb_alloc_coherent,
-   .free = xen_swiotlb_free_coherent,
-   .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
-   .sync_single_for_device = xen_swiotlb_sync_single_for_device,
-   .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
-   .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
-   .map_sg = xen_swiotlb_map_sg_attrs,
-   .unmap_sg = xen_swiotlb_unmap_sg_attrs,
-   .map_page = xen_swiotlb_map_page,
-   .unmap_page = xen_swiotlb_unmap_page,
-   .dma_supported = xen_swiotlb_dma_supported,
-};
-
 /*
  * pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary
  *
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 8dab0d3dc172..a0f006daab48 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -295,7 +295,8 @@ int __ref xen_swiotlb_init(int verbose, bool early)
free_pages((unsigned long)xen_io_tlb_start, order);
return rc;
 }
-void *
+
+static void *
 xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
   dma_addr_t *dma_handle, gfp_t flags,
   unsigned long attrs)
@@ -346,9 +347,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t 
size,
memset(ret, 0, size);
return ret;
 }
-EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent);
 
-void
+static void
 xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
  dma_addr_t dev_addr, unsigned long attrs)
 {
@@ -369,8 +369,6 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t 
size, void *vaddr,
 
xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
 }
-EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
-
 
 /*
  * Map a single buffer of the indicated size for DMA in streaming mode.  The
@@ -379,7 +377,7 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
  * Once the device is given the dma address, the device owns this memory until
  * either xen_swiotlb_unmap_page or xen_swiotlb_dma_sync_single is performed.
  */
-dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
+static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
unsigned long attrs)
@@ -429,7 +427,6 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct 
page *page,
 
return DMA_ERROR_CODE;
 }
-EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);
 
 /*
  * Unmap a single streaming mode DMA translation.  The dma_addr and size must
@@ -467,13 +464,12 @@ static void xen_unmap_single(struct device *hwdev, 
dma_addr_t dev_addr,
dma_mark_clean(phys_to_virt(paddr), size);
 }
 
-void

[PATCH 07/44] xen-swiotlb: consolidate xen_swiotlb_dma_ops

2017-06-16 Thread Christoph Hellwig

ARM and x86 had duplicated versions of the dma_ops structure, the
only difference is that x86 hasn't wired up the set_dma_mask,
mmap, and get_sgtable ops yet.  On x86 all of them are identical
to the generic version, so they aren't needed but harmless.

All the symbols used only for xen_swiotlb_dma_ops can now be marked
static as well.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 arch/arm/xen/mm.c  | 17 
 arch/x86/xen/pci-swiotlb-xen.c | 14 ---
 drivers/xen/swiotlb-xen.c  | 93 ++
 include/xen/swiotlb-xen.h  | 62 +---
 4 files changed, 49 insertions(+), 137 deletions(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index f0325d96b97a..785d2a562a23 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -185,23 +185,6 @@ EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 const struct dma_map_ops *xen_dma_ops;
 EXPORT_SYMBOL(xen_dma_ops);
 
-static const struct dma_map_ops xen_swiotlb_dma_ops = {
-   .alloc = xen_swiotlb_alloc_coherent,
-   .free = xen_swiotlb_free_coherent,
-   .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
-   .sync_single_for_device = xen_swiotlb_sync_single_for_device,
-   .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
-   .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
-   .map_sg = xen_swiotlb_map_sg_attrs,
-   .unmap_sg = xen_swiotlb_unmap_sg_attrs,
-   .map_page = xen_swiotlb_map_page,
-   .unmap_page = xen_swiotlb_unmap_page,
-   .dma_supported = xen_swiotlb_dma_supported,
-   .set_dma_mask = xen_swiotlb_set_dma_mask,
-   .mmap = xen_swiotlb_dma_mmap,
-   .get_sgtable = xen_swiotlb_get_sgtable,
-};
-
 int __init xen_mm_init(void)
 {
struct gnttab_cache_flush cflush;
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 42b08f8fc2ca..37c6056a7bba 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -18,20 +18,6 @@
 
 int xen_swiotlb __read_mostly;
 
-static const struct dma_map_ops xen_swiotlb_dma_ops = {
-   .alloc = xen_swiotlb_alloc_coherent,
-   .free = xen_swiotlb_free_coherent,
-   .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
-   .sync_single_for_device = xen_swiotlb_sync_single_for_device,
-   .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
-   .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
-   .map_sg = xen_swiotlb_map_sg_attrs,
-   .unmap_sg = xen_swiotlb_unmap_sg_attrs,
-   .map_page = xen_swiotlb_map_page,
-   .unmap_page = xen_swiotlb_unmap_page,
-   .dma_supported = xen_swiotlb_dma_supported,
-};
-
 /*
  * pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary
  *
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 8dab0d3dc172..a0f006daab48 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -295,7 +295,8 @@ int __ref xen_swiotlb_init(int verbose, bool early)
free_pages((unsigned long)xen_io_tlb_start, order);
return rc;
 }
-void *
+
+static void *
 xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
   dma_addr_t *dma_handle, gfp_t flags,
   unsigned long attrs)
@@ -346,9 +347,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t 
size,
memset(ret, 0, size);
return ret;
 }
-EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent);
 
-void
+static void
 xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
  dma_addr_t dev_addr, unsigned long attrs)
 {
@@ -369,8 +369,6 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t 
size, void *vaddr,
 
xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
 }
-EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
-
 
 /*
  * Map a single buffer of the indicated size for DMA in streaming mode.  The
@@ -379,7 +377,7 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
  * Once the device is given the dma address, the device owns this memory until
  * either xen_swiotlb_unmap_page or xen_swiotlb_dma_sync_single is performed.
  */
-dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
+static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
unsigned long attrs)
@@ -429,7 +427,6 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct 
page *page,
 
return DMA_ERROR_CODE;
 }
-EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);
 
 /*
  * Unmap a single streaming mode DMA translation.  The dma_addr and size must
@@ -467,13 +464,12 @@ static void xen_unmap_single(struct device *hwdev, 
dma_addr_t dev_addr,
dma_mark_clean(phys_to_virt(paddr), size);
 }
 
-void xen_swiotlb_unmap_page(struct device *hwdev,

[PATCH 09/44] c6x: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

Signed-off-by: Christoph Hellwig 
---
 arch/c6x/include/asm/dma-mapping.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/arch/c6x/include/asm/dma-mapping.h 
b/arch/c6x/include/asm/dma-mapping.h
index aca9f755e4f8..05daf1038111 100644
--- a/arch/c6x/include/asm/dma-mapping.h
+++ b/arch/c6x/include/asm/dma-mapping.h
@@ -12,11 +12,6 @@
 #ifndef _ASM_C6X_DMA_MAPPING_H
 #define _ASM_C6X_DMA_MAPPING_H
 
-/*
- * DMA errors are defined by all-bits-set in the DMA address.
- */
-#define DMA_ERROR_CODE ~0
-
 extern const struct dma_map_ops c6x_dma_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-- 
2.11.0

[PATCH 09/44] c6x: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

Signed-off-by: Christoph Hellwig 
---
 arch/c6x/include/asm/dma-mapping.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/arch/c6x/include/asm/dma-mapping.h 
b/arch/c6x/include/asm/dma-mapping.h
index aca9f755e4f8..05daf1038111 100644
--- a/arch/c6x/include/asm/dma-mapping.h
+++ b/arch/c6x/include/asm/dma-mapping.h
@@ -12,11 +12,6 @@
 #ifndef _ASM_C6X_DMA_MAPPING_H
 #define _ASM_C6X_DMA_MAPPING_H
 
-/*
- * DMA errors are defined by all-bits-set in the DMA address.
- */
-#define DMA_ERROR_CODE ~0
-
 extern const struct dma_map_ops c6x_dma_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-- 
2.11.0

[PATCH 08/44] xen-swiotlb: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is going to go away, so don't rely on it.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 drivers/xen/swiotlb-xen.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index a0f006daab48..c3a04b2d7532 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -67,6 +67,8 @@ static unsigned long dma_alloc_coherent_mask(struct device 
*dev,
 }
 #endif
 
+#define XEN_SWIOTLB_ERROR_CODE (~(dma_addr_t)0x0)
+
 static char *xen_io_tlb_start, *xen_io_tlb_end;
 static unsigned long xen_io_tlb_nslabs;
 /*
@@ -410,7 +412,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, 
struct page *page,
map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir,
 attrs);
if (map == SWIOTLB_MAP_ERROR)
-   return DMA_ERROR_CODE;
+   return XEN_SWIOTLB_ERROR_CODE;
 
dev_addr = xen_phys_to_bus(map);
xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT),
@@ -425,7 +427,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, 
struct page *page,
attrs |= DMA_ATTR_SKIP_CPU_SYNC;
swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
 
-   return DMA_ERROR_CODE;
+   return XEN_SWIOTLB_ERROR_CODE;
 }
 
 /*
@@ -715,6 +717,11 @@ xen_swiotlb_get_sgtable(struct device *dev, struct 
sg_table *sgt,
return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size);
 }
 
+static int xen_swiotlb_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == XEN_SWIOTLB_ERROR_CODE;
+}
+
 const struct dma_map_ops xen_swiotlb_dma_ops = {
.alloc = xen_swiotlb_alloc_coherent,
.free = xen_swiotlb_free_coherent,
@@ -730,4 +737,5 @@ const struct dma_map_ops xen_swiotlb_dma_ops = {
.set_dma_mask = xen_swiotlb_set_dma_mask,
.mmap = xen_swiotlb_dma_mmap,
.get_sgtable = xen_swiotlb_get_sgtable,
+   .mapping_error  = xen_swiotlb_mapping_error,
 };
-- 
2.11.0

[PATCH 08/44] xen-swiotlb: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is going to go away, so don't rely on it.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 drivers/xen/swiotlb-xen.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index a0f006daab48..c3a04b2d7532 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -67,6 +67,8 @@ static unsigned long dma_alloc_coherent_mask(struct device 
*dev,
 }
 #endif
 
+#define XEN_SWIOTLB_ERROR_CODE (~(dma_addr_t)0x0)
+
 static char *xen_io_tlb_start, *xen_io_tlb_end;
 static unsigned long xen_io_tlb_nslabs;
 /*
@@ -410,7 +412,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, 
struct page *page,
map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir,
 attrs);
if (map == SWIOTLB_MAP_ERROR)
-   return DMA_ERROR_CODE;
+   return XEN_SWIOTLB_ERROR_CODE;
 
dev_addr = xen_phys_to_bus(map);
xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT),
@@ -425,7 +427,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, 
struct page *page,
attrs |= DMA_ATTR_SKIP_CPU_SYNC;
swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
 
-   return DMA_ERROR_CODE;
+   return XEN_SWIOTLB_ERROR_CODE;
 }
 
 /*
@@ -715,6 +717,11 @@ xen_swiotlb_get_sgtable(struct device *dev, struct 
sg_table *sgt,
return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size);
 }
 
+static int xen_swiotlb_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == XEN_SWIOTLB_ERROR_CODE;
+}
+
 const struct dma_map_ops xen_swiotlb_dma_ops = {
.alloc = xen_swiotlb_alloc_coherent,
.free = xen_swiotlb_free_coherent,
@@ -730,4 +737,5 @@ const struct dma_map_ops xen_swiotlb_dma_ops = {
.set_dma_mask = xen_swiotlb_set_dma_mask,
.mmap = xen_swiotlb_dma_mmap,
.get_sgtable = xen_swiotlb_get_sgtable,
+   .mapping_error  = xen_swiotlb_mapping_error,
 };
-- 
2.11.0

[PATCH 10/44] ia64: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

All ia64 dma_mapping_ops instances already have a mapping_error member.

Signed-off-by: Christoph Hellwig 
---
 arch/ia64/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/ia64/include/asm/dma-mapping.h 
b/arch/ia64/include/asm/dma-mapping.h
index 73ec3c6f4cfe..3ce5ab4339f3 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -12,8 +12,6 @@
 
 #define ARCH_HAS_DMA_GET_REQUIRED_MASK
 
-#define DMA_ERROR_CODE 0
-
 extern const struct dma_map_ops *dma_ops;
 extern struct ia64_machine_vector ia64_mv;
 extern void set_iommu_machvec(void);
-- 
2.11.0

[PATCH 10/44] ia64: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

All ia64 dma_mapping_ops instances already have a mapping_error member.

Signed-off-by: Christoph Hellwig 
---
 arch/ia64/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/ia64/include/asm/dma-mapping.h 
b/arch/ia64/include/asm/dma-mapping.h
index 73ec3c6f4cfe..3ce5ab4339f3 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -12,8 +12,6 @@
 
 #define ARCH_HAS_DMA_GET_REQUIRED_MASK
 
-#define DMA_ERROR_CODE 0
-
 extern const struct dma_map_ops *dma_ops;
 extern struct ia64_machine_vector ia64_mv;
 extern void set_iommu_machvec(void);
-- 
2.11.0

[PATCH 11/44] m32r: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

dma-noop is the only dma_mapping_ops instance for m32r and does not return
errors.

Signed-off-by: Christoph Hellwig 
---
 arch/m32r/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/m32r/include/asm/dma-mapping.h 
b/arch/m32r/include/asm/dma-mapping.h
index c01d9f52d228..aff3ae8b62f7 100644
--- a/arch/m32r/include/asm/dma-mapping.h
+++ b/arch/m32r/include/asm/dma-mapping.h
@@ -8,8 +8,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
return _noop_ops;
-- 
2.11.0

[PATCH 11/44] m32r: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

dma-noop is the only dma_mapping_ops instance for m32r and does not return
errors.

Signed-off-by: Christoph Hellwig 
---
 arch/m32r/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/m32r/include/asm/dma-mapping.h 
b/arch/m32r/include/asm/dma-mapping.h
index c01d9f52d228..aff3ae8b62f7 100644
--- a/arch/m32r/include/asm/dma-mapping.h
+++ b/arch/m32r/include/asm/dma-mapping.h
@@ -8,8 +8,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
return _noop_ops;
-- 
2.11.0

Re: [PATCH v4 4/5] watchdog: provide watchdog_reconfigure() for arch watchdogs

2017-06-16 Thread Andrew Morton

On Fri, 16 Jun 2017 16:57:14 +1000 Nicholas Piggin  wrote:

> After reconfiguring watchdog sysctls etc., architecture specific
> watchdogs may not get all their parameters updated.
> 
> watchdog_reconfigure() can be implemented to pull the new values
> in and set the arch NMI watchdog.
> 

I'll update the title and changelog to say "watchdog_nmi_reconfigure".

> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -123,6 +123,11 @@ void __weak watchdog_nmi_disable(unsigned int cpu)
>  {
>  }
>  
> +void __weak watchdog_nmi_reconfigure(void)
> +{
> +}

Can we please get some documentation in here describing what it's for? 
How arch maintainers might use this?  When and why it is called, what
it must do?  etc.

Re: [PATCH v4 4/5] watchdog: provide watchdog_reconfigure() for arch watchdogs

2017-06-16 Thread Andrew Morton

On Fri, 16 Jun 2017 16:57:14 +1000 Nicholas Piggin  wrote:

> After reconfiguring watchdog sysctls etc., architecture specific
> watchdogs may not get all their parameters updated.
> 
> watchdog_reconfigure() can be implemented to pull the new values
> in and set the arch NMI watchdog.
> 

I'll update the title and changelog to say "watchdog_nmi_reconfigure".

> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -123,6 +123,11 @@ void __weak watchdog_nmi_disable(unsigned int cpu)
>  {
>  }
>  
> +void __weak watchdog_nmi_reconfigure(void)
> +{
> +}

Can we please get some documentation in here describing what it's for? 
How arch maintainers might use this?  When and why it is called, what
it must do?  etc.

[PATCH 01/44] firmware/ivc: use dma_mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is not supposed to be used by drivers.

Signed-off-by: Christoph Hellwig 
Acked-by: Thierry Reding 
---
 drivers/firmware/tegra/ivc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/tegra/ivc.c b/drivers/firmware/tegra/ivc.c
index 29ecfd815320..a01461d63f68 100644
--- a/drivers/firmware/tegra/ivc.c
+++ b/drivers/firmware/tegra/ivc.c
@@ -646,12 +646,12 @@ int tegra_ivc_init(struct tegra_ivc *ivc, struct device 
*peer, void *rx,
if (peer) {
ivc->rx.phys = dma_map_single(peer, rx, queue_size,
  DMA_BIDIRECTIONAL);
-   if (ivc->rx.phys == DMA_ERROR_CODE)
+   if (dma_mapping_error(peer, ivc->rx.phys))
return -ENOMEM;
 
ivc->tx.phys = dma_map_single(peer, tx, queue_size,
  DMA_BIDIRECTIONAL);
-   if (ivc->tx.phys == DMA_ERROR_CODE) {
+   if (dma_mapping_error(peer, ivc->tx.phys)) {
dma_unmap_single(peer, ivc->rx.phys, queue_size,
 DMA_BIDIRECTIONAL);
return -ENOMEM;
-- 
2.11.0

[PATCH 12/44] microblaze: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

microblaze does not return errors for dma_map_page.

Signed-off-by: Christoph Hellwig 
---
 arch/microblaze/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/microblaze/include/asm/dma-mapping.h 
b/arch/microblaze/include/asm/dma-mapping.h
index 3fad5e722a66..e15cd2f76e23 100644
--- a/arch/microblaze/include/asm/dma-mapping.h
+++ b/arch/microblaze/include/asm/dma-mapping.h
@@ -28,8 +28,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-
 #define __dma_alloc_coherent(dev, gfp, size, handle)   NULL
 #define __dma_free_coherent(size, addr)((void)0)
 
-- 
2.11.0

[PATCH 01/44] firmware/ivc: use dma_mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is not supposed to be used by drivers.

Signed-off-by: Christoph Hellwig 
Acked-by: Thierry Reding 
---
 drivers/firmware/tegra/ivc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/tegra/ivc.c b/drivers/firmware/tegra/ivc.c
index 29ecfd815320..a01461d63f68 100644
--- a/drivers/firmware/tegra/ivc.c
+++ b/drivers/firmware/tegra/ivc.c
@@ -646,12 +646,12 @@ int tegra_ivc_init(struct tegra_ivc *ivc, struct device 
*peer, void *rx,
if (peer) {
ivc->rx.phys = dma_map_single(peer, rx, queue_size,
  DMA_BIDIRECTIONAL);
-   if (ivc->rx.phys == DMA_ERROR_CODE)
+   if (dma_mapping_error(peer, ivc->rx.phys))
return -ENOMEM;
 
ivc->tx.phys = dma_map_single(peer, tx, queue_size,
  DMA_BIDIRECTIONAL);
-   if (ivc->tx.phys == DMA_ERROR_CODE) {
+   if (dma_mapping_error(peer, ivc->tx.phys)) {
dma_unmap_single(peer, ivc->rx.phys, queue_size,
 DMA_BIDIRECTIONAL);
return -ENOMEM;
-- 
2.11.0

[PATCH 12/44] microblaze: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

microblaze does not return errors for dma_map_page.

Signed-off-by: Christoph Hellwig 
---
 arch/microblaze/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/microblaze/include/asm/dma-mapping.h 
b/arch/microblaze/include/asm/dma-mapping.h
index 3fad5e722a66..e15cd2f76e23 100644
--- a/arch/microblaze/include/asm/dma-mapping.h
+++ b/arch/microblaze/include/asm/dma-mapping.h
@@ -28,8 +28,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-
 #define __dma_alloc_coherent(dev, gfp, size, handle)   NULL
 #define __dma_free_coherent(size, addr)((void)0)
 
-- 
2.11.0

[PATCH 15/44] xtensa: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

xtensa already implements the mapping_error method for its only
dma_map_ops instance.

Signed-off-by: Christoph Hellwig 
---
 arch/xtensa/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/xtensa/include/asm/dma-mapping.h 
b/arch/xtensa/include/asm/dma-mapping.h
index c6140fa8c0be..269738dc9d1d 100644
--- a/arch/xtensa/include/asm/dma-mapping.h
+++ b/arch/xtensa/include/asm/dma-mapping.h
@@ -16,8 +16,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-
 extern const struct dma_map_ops xtensa_dma_map_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-- 
2.11.0

[PATCH 15/44] xtensa: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

xtensa already implements the mapping_error method for its only
dma_map_ops instance.

Signed-off-by: Christoph Hellwig 
---
 arch/xtensa/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/xtensa/include/asm/dma-mapping.h 
b/arch/xtensa/include/asm/dma-mapping.h
index c6140fa8c0be..269738dc9d1d 100644
--- a/arch/xtensa/include/asm/dma-mapping.h
+++ b/arch/xtensa/include/asm/dma-mapping.h
@@ -16,8 +16,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-
 extern const struct dma_map_ops xtensa_dma_map_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-- 
2.11.0

[PATCH 16/44] arm64: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

The dma alloc interface returns an error by return NULL, and the
mapping interfaces rely on the mapping_error method, which the dummy
ops already implement correctly.

Thus remove the DMA_ERROR_CODE define.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Robin Murphy 
---
 arch/arm64/include/asm/dma-mapping.h | 1 -
 arch/arm64/mm/dma-mapping.c  | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/dma-mapping.h 
b/arch/arm64/include/asm/dma-mapping.h
index 5392dbeffa45..cf8fc8f05580 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -24,7 +24,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t)0)
 extern const struct dma_map_ops dummy_dma_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3216e098c058..147fbb907a2f 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -184,7 +184,6 @@ static void *__dma_alloc(struct device *dev, size_t size,
 no_map:
__dma_free_coherent(dev, size, ptr, *dma_handle, attrs);
 no_mem:
-   *dma_handle = DMA_ERROR_CODE;
return NULL;
 }
 
@@ -487,7 +486,7 @@ static dma_addr_t __dummy_map_page(struct device *dev, 
struct page *page,
   enum dma_data_direction dir,
   unsigned long attrs)
 {
-   return DMA_ERROR_CODE;
+   return 0;
 }
 
 static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr,
-- 
2.11.0

[PATCH 16/44] arm64: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

The dma alloc interface returns an error by return NULL, and the
mapping interfaces rely on the mapping_error method, which the dummy
ops already implement correctly.

Thus remove the DMA_ERROR_CODE define.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Robin Murphy 
---
 arch/arm64/include/asm/dma-mapping.h | 1 -
 arch/arm64/mm/dma-mapping.c  | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/dma-mapping.h 
b/arch/arm64/include/asm/dma-mapping.h
index 5392dbeffa45..cf8fc8f05580 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -24,7 +24,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t)0)
 extern const struct dma_map_ops dummy_dma_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3216e098c058..147fbb907a2f 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -184,7 +184,6 @@ static void *__dma_alloc(struct device *dev, size_t size,
 no_map:
__dma_free_coherent(dev, size, ptr, *dma_handle, attrs);
 no_mem:
-   *dma_handle = DMA_ERROR_CODE;
return NULL;
 }
 
@@ -487,7 +486,7 @@ static dma_addr_t __dummy_map_page(struct device *dev, 
struct page *page,
   enum dma_data_direction dir,
   unsigned long attrs)
 {
-   return DMA_ERROR_CODE;
+   return 0;
 }
 
 static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr,
-- 
2.11.0

Re: [PATCH] usb: gadget: bdc: 64-bit pointer capability check

2017-06-16 Thread Florian Fainelli

On 06/15/2017 02:09 AM, Srinath Mannam wrote:
> Corrected the register to check the 64-bit pointer
> capability state. 64-bit pointer implementation capability
> was checking in wrong register, which causes the BDC
> enumeration failure in 64-bit memory address.
> 
> Fixes: efed421a94e6 ("usb: gadget: Add UDC driver for
> Broadcom USB3.0 device controller IP BDC")
> 
> Signed-off-by: Srinath Mannam 

Reviewed-by: Florian Fainelli 
-- 
Florian

Re: [PATCH] usb: gadget: bdc: 64-bit pointer capability check

2017-06-16 Thread Florian Fainelli

On 06/15/2017 02:09 AM, Srinath Mannam wrote:
> Corrected the register to check the 64-bit pointer
> capability state. 64-bit pointer implementation capability
> was checking in wrong register, which causes the BDC
> enumeration failure in 64-bit memory address.
> 
> Fixes: efed421a94e6 ("usb: gadget: Add UDC driver for
> Broadcom USB3.0 device controller IP BDC")
> 
> Signed-off-by: Srinath Mannam 

Reviewed-by: Florian Fainelli 
-- 
Florian

[PATCH 13/44] openrisc: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

openrisc does not return errors for dma_map_page.

Signed-off-by: Christoph Hellwig 
---
 arch/openrisc/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/openrisc/include/asm/dma-mapping.h 
b/arch/openrisc/include/asm/dma-mapping.h
index 0c0075f17145..a4ea139c2ef9 100644
--- a/arch/openrisc/include/asm/dma-mapping.h
+++ b/arch/openrisc/include/asm/dma-mapping.h
@@ -26,8 +26,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-
 extern const struct dma_map_ops or1k_dma_map_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-- 
2.11.0

[PATCH 13/44] openrisc: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

openrisc does not return errors for dma_map_page.

Signed-off-by: Christoph Hellwig 
---
 arch/openrisc/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/openrisc/include/asm/dma-mapping.h 
b/arch/openrisc/include/asm/dma-mapping.h
index 0c0075f17145..a4ea139c2ef9 100644
--- a/arch/openrisc/include/asm/dma-mapping.h
+++ b/arch/openrisc/include/asm/dma-mapping.h
@@ -26,8 +26,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-
 extern const struct dma_map_ops or1k_dma_map_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-- 
2.11.0

[PATCH 18/44] iommu/amd: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is going to go away, so don't rely on it.

Signed-off-by: Christoph Hellwig 
---
 drivers/iommu/amd_iommu.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 63cacf5d6cf2..d41280e869de 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -54,6 +54,8 @@
 #include "amd_iommu_types.h"
 #include "irq_remapping.h"
 
+#define AMD_IOMMU_MAPPING_ERROR0
+
 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
 
 #define LOOP_TIMEOUT   10
@@ -2394,7 +2396,7 @@ static dma_addr_t __map_single(struct device *dev,
paddr &= PAGE_MASK;
 
address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask);
-   if (address == DMA_ERROR_CODE)
+   if (address == AMD_IOMMU_MAPPING_ERROR)
goto out;
 
prot = dir2prot(direction);
@@ -2431,7 +2433,7 @@ static dma_addr_t __map_single(struct device *dev,
 
dma_ops_free_iova(dma_dom, address, pages);
 
-   return DMA_ERROR_CODE;
+   return AMD_IOMMU_MAPPING_ERROR;
 }
 
 /*
@@ -2483,7 +2485,7 @@ static dma_addr_t map_page(struct device *dev, struct 
page *page,
if (PTR_ERR(domain) == -EINVAL)
return (dma_addr_t)paddr;
else if (IS_ERR(domain))
-   return DMA_ERROR_CODE;
+   return AMD_IOMMU_MAPPING_ERROR;
 
dma_mask = *dev->dma_mask;
dma_dom = to_dma_ops_domain(domain);
@@ -2560,7 +2562,7 @@ static int map_sg(struct device *dev, struct scatterlist 
*sglist,
npages = sg_num_pages(dev, sglist, nelems);
 
address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask);
-   if (address == DMA_ERROR_CODE)
+   if (address == AMD_IOMMU_MAPPING_ERROR)
goto out_err;
 
prot = dir2prot(direction);
@@ -2683,7 +2685,7 @@ static void *alloc_coherent(struct device *dev, size_t 
size,
*dma_addr = __map_single(dev, dma_dom, page_to_phys(page),
 size, DMA_BIDIRECTIONAL, dma_mask);
 
-   if (*dma_addr == DMA_ERROR_CODE)
+   if (*dma_addr == AMD_IOMMU_MAPPING_ERROR)
goto out_free;
 
return page_address(page);
@@ -2732,6 +2734,11 @@ static int amd_iommu_dma_supported(struct device *dev, 
u64 mask)
return check_device(dev);
 }
 
+static int amd_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == AMD_IOMMU_MAPPING_ERROR;
+}
+
 static const struct dma_map_ops amd_iommu_dma_ops = {
.alloc  = alloc_coherent,
.free   = free_coherent,
@@ -2740,6 +2747,7 @@ static const struct dma_map_ops amd_iommu_dma_ops = {
.map_sg = map_sg,
.unmap_sg   = unmap_sg,
.dma_supported  = amd_iommu_dma_supported,
+   .mapping_error  = amd_iommu_mapping_error,
 };
 
 static int init_reserved_iova_ranges(void)
-- 
2.11.0

[PATCH 18/44] iommu/amd: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is going to go away, so don't rely on it.

Signed-off-by: Christoph Hellwig 
---
 drivers/iommu/amd_iommu.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 63cacf5d6cf2..d41280e869de 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -54,6 +54,8 @@
 #include "amd_iommu_types.h"
 #include "irq_remapping.h"
 
+#define AMD_IOMMU_MAPPING_ERROR0
+
 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
 
 #define LOOP_TIMEOUT   10
@@ -2394,7 +2396,7 @@ static dma_addr_t __map_single(struct device *dev,
paddr &= PAGE_MASK;
 
address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask);
-   if (address == DMA_ERROR_CODE)
+   if (address == AMD_IOMMU_MAPPING_ERROR)
goto out;
 
prot = dir2prot(direction);
@@ -2431,7 +2433,7 @@ static dma_addr_t __map_single(struct device *dev,
 
dma_ops_free_iova(dma_dom, address, pages);
 
-   return DMA_ERROR_CODE;
+   return AMD_IOMMU_MAPPING_ERROR;
 }
 
 /*
@@ -2483,7 +2485,7 @@ static dma_addr_t map_page(struct device *dev, struct 
page *page,
if (PTR_ERR(domain) == -EINVAL)
return (dma_addr_t)paddr;
else if (IS_ERR(domain))
-   return DMA_ERROR_CODE;
+   return AMD_IOMMU_MAPPING_ERROR;
 
dma_mask = *dev->dma_mask;
dma_dom = to_dma_ops_domain(domain);
@@ -2560,7 +2562,7 @@ static int map_sg(struct device *dev, struct scatterlist 
*sglist,
npages = sg_num_pages(dev, sglist, nelems);
 
address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask);
-   if (address == DMA_ERROR_CODE)
+   if (address == AMD_IOMMU_MAPPING_ERROR)
goto out_err;
 
prot = dir2prot(direction);
@@ -2683,7 +2685,7 @@ static void *alloc_coherent(struct device *dev, size_t 
size,
*dma_addr = __map_single(dev, dma_dom, page_to_phys(page),
 size, DMA_BIDIRECTIONAL, dma_mask);
 
-   if (*dma_addr == DMA_ERROR_CODE)
+   if (*dma_addr == AMD_IOMMU_MAPPING_ERROR)
goto out_free;
 
return page_address(page);
@@ -2732,6 +2734,11 @@ static int amd_iommu_dma_supported(struct device *dev, 
u64 mask)
return check_device(dev);
 }
 
+static int amd_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == AMD_IOMMU_MAPPING_ERROR;
+}
+
 static const struct dma_map_ops amd_iommu_dma_ops = {
.alloc  = alloc_coherent,
.free   = free_coherent,
@@ -2740,6 +2747,7 @@ static const struct dma_map_ops amd_iommu_dma_ops = {
.map_sg = map_sg,
.unmap_sg   = unmap_sg,
.dma_supported  = amd_iommu_dma_supported,
+   .mapping_error  = amd_iommu_mapping_error,
 };
 
 static int init_reserved_iova_ranges(void)
-- 
2.11.0

[PATCH 20/44] sparc: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is going to go away, so don't rely on it.

Signed-off-by: Christoph Hellwig 
Acked-by: David S. Miller 
---
 arch/sparc/include/asm/dma-mapping.h |  2 --
 arch/sparc/kernel/iommu.c| 12 +---
 arch/sparc/kernel/iommu_common.h |  2 ++
 arch/sparc/kernel/pci_sun4v.c| 14 ++
 4 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/arch/sparc/include/asm/dma-mapping.h 
b/arch/sparc/include/asm/dma-mapping.h
index 69cc627779f2..b8e8dfcd065d 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -5,8 +5,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-
 #define HAVE_ARCH_DMA_SUPPORTED 1
 int dma_supported(struct device *dev, u64 mask);
 
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index c63ba99ca551..dafa316d978d 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -314,7 +314,7 @@ static dma_addr_t dma_4u_map_page(struct device *dev, 
struct page *page,
 bad_no_ctx:
if (printk_ratelimit())
WARN_ON(1);
-   return DMA_ERROR_CODE;
+   return SPARC_MAPPING_ERROR;
 }
 
 static void strbuf_flush(struct strbuf *strbuf, struct iommu *iommu,
@@ -547,7 +547,7 @@ static int dma_4u_map_sg(struct device *dev, struct 
scatterlist *sglist,
 
if (outcount < incount) {
outs = sg_next(outs);
-   outs->dma_address = DMA_ERROR_CODE;
+   outs->dma_address = SPARC_MAPPING_ERROR;
outs->dma_length = 0;
}
 
@@ -573,7 +573,7 @@ static int dma_4u_map_sg(struct device *dev, struct 
scatterlist *sglist,
iommu_tbl_range_free(>tbl, vaddr, npages,
 IOMMU_ERROR_CODE);
 
-   s->dma_address = DMA_ERROR_CODE;
+   s->dma_address = SPARC_MAPPING_ERROR;
s->dma_length = 0;
}
if (s == outs)
@@ -741,6 +741,11 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
spin_unlock_irqrestore(>lock, flags);
 }
 
+static int dma_4u_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == SPARC_MAPPING_ERROR;
+}
+
 static const struct dma_map_ops sun4u_dma_ops = {
.alloc  = dma_4u_alloc_coherent,
.free   = dma_4u_free_coherent,
@@ -750,6 +755,7 @@ static const struct dma_map_ops sun4u_dma_ops = {
.unmap_sg   = dma_4u_unmap_sg,
.sync_single_for_cpu= dma_4u_sync_single_for_cpu,
.sync_sg_for_cpu= dma_4u_sync_sg_for_cpu,
+   .mapping_error  = dma_4u_mapping_error,
 };
 
 const struct dma_map_ops *dma_ops = _dma_ops;
diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h
index 828493329f68..5ea5c192b1d9 100644
--- a/arch/sparc/kernel/iommu_common.h
+++ b/arch/sparc/kernel/iommu_common.h
@@ -47,4 +47,6 @@ static inline int is_span_boundary(unsigned long entry,
return iommu_is_span_boundary(entry, nr, shift, boundary_size);
 }
 
+#define SPARC_MAPPING_ERROR(~(dma_addr_t)0x0)
+
 #endif /* _IOMMU_COMMON_H */
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 68bec7c97cb8..8e2a56f4c03a 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -412,12 +412,12 @@ static dma_addr_t dma_4v_map_page(struct device *dev, 
struct page *page,
 bad:
if (printk_ratelimit())
WARN_ON(1);
-   return DMA_ERROR_CODE;
+   return SPARC_MAPPING_ERROR;
 
 iommu_map_fail:
local_irq_restore(flags);
iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
-   return DMA_ERROR_CODE;
+   return SPARC_MAPPING_ERROR;
 }
 
 static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
@@ -590,7 +590,7 @@ static int dma_4v_map_sg(struct device *dev, struct 
scatterlist *sglist,
 
if (outcount < incount) {
outs = sg_next(outs);
-   outs->dma_address = DMA_ERROR_CODE;
+   outs->dma_address = SPARC_MAPPING_ERROR;
outs->dma_length = 0;
}
 
@@ -607,7 +607,7 @@ static int dma_4v_map_sg(struct device *dev, struct 
scatterlist *sglist,
iommu_tbl_range_free(tbl, vaddr, npages,
 IOMMU_ERROR_CODE);
/* XXX demap? XXX */
-   s->dma_address = DMA_ERROR_CODE;
+   s->dma_address = SPARC_MAPPING_ERROR;
s->dma_length = 0;
}
if (s == outs)
@@ -669,6 +669,11 @@ static void dma_4v_unmap_sg(struct device *dev, struct 
scatterlist *sglist,
local_irq_restore(flags);
 }
 
+static int dma_4v_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr ==

[PATCH 14/44] sh: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

sh does not return errors for dma_map_page.

Signed-off-by: Christoph Hellwig 
---
 arch/sh/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/sh/include/asm/dma-mapping.h 
b/arch/sh/include/asm/dma-mapping.h
index d99008af5f73..9b06be07db4d 100644
--- a/arch/sh/include/asm/dma-mapping.h
+++ b/arch/sh/include/asm/dma-mapping.h
@@ -9,8 +9,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct 
bus_type *bus)
return dma_ops;
 }
 
-#define DMA_ERROR_CODE 0
-
 void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction dir);
 
-- 
2.11.0

[PATCH 20/44] sparc: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is going to go away, so don't rely on it.

Signed-off-by: Christoph Hellwig 
Acked-by: David S. Miller 
---
 arch/sparc/include/asm/dma-mapping.h |  2 --
 arch/sparc/kernel/iommu.c| 12 +---
 arch/sparc/kernel/iommu_common.h |  2 ++
 arch/sparc/kernel/pci_sun4v.c| 14 ++
 4 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/arch/sparc/include/asm/dma-mapping.h 
b/arch/sparc/include/asm/dma-mapping.h
index 69cc627779f2..b8e8dfcd065d 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -5,8 +5,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-
 #define HAVE_ARCH_DMA_SUPPORTED 1
 int dma_supported(struct device *dev, u64 mask);
 
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index c63ba99ca551..dafa316d978d 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -314,7 +314,7 @@ static dma_addr_t dma_4u_map_page(struct device *dev, 
struct page *page,
 bad_no_ctx:
if (printk_ratelimit())
WARN_ON(1);
-   return DMA_ERROR_CODE;
+   return SPARC_MAPPING_ERROR;
 }
 
 static void strbuf_flush(struct strbuf *strbuf, struct iommu *iommu,
@@ -547,7 +547,7 @@ static int dma_4u_map_sg(struct device *dev, struct 
scatterlist *sglist,
 
if (outcount < incount) {
outs = sg_next(outs);
-   outs->dma_address = DMA_ERROR_CODE;
+   outs->dma_address = SPARC_MAPPING_ERROR;
outs->dma_length = 0;
}
 
@@ -573,7 +573,7 @@ static int dma_4u_map_sg(struct device *dev, struct 
scatterlist *sglist,
iommu_tbl_range_free(>tbl, vaddr, npages,
 IOMMU_ERROR_CODE);
 
-   s->dma_address = DMA_ERROR_CODE;
+   s->dma_address = SPARC_MAPPING_ERROR;
s->dma_length = 0;
}
if (s == outs)
@@ -741,6 +741,11 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
spin_unlock_irqrestore(>lock, flags);
 }
 
+static int dma_4u_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == SPARC_MAPPING_ERROR;
+}
+
 static const struct dma_map_ops sun4u_dma_ops = {
.alloc  = dma_4u_alloc_coherent,
.free   = dma_4u_free_coherent,
@@ -750,6 +755,7 @@ static const struct dma_map_ops sun4u_dma_ops = {
.unmap_sg   = dma_4u_unmap_sg,
.sync_single_for_cpu= dma_4u_sync_single_for_cpu,
.sync_sg_for_cpu= dma_4u_sync_sg_for_cpu,
+   .mapping_error  = dma_4u_mapping_error,
 };
 
 const struct dma_map_ops *dma_ops = _dma_ops;
diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h
index 828493329f68..5ea5c192b1d9 100644
--- a/arch/sparc/kernel/iommu_common.h
+++ b/arch/sparc/kernel/iommu_common.h
@@ -47,4 +47,6 @@ static inline int is_span_boundary(unsigned long entry,
return iommu_is_span_boundary(entry, nr, shift, boundary_size);
 }
 
+#define SPARC_MAPPING_ERROR(~(dma_addr_t)0x0)
+
 #endif /* _IOMMU_COMMON_H */
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 68bec7c97cb8..8e2a56f4c03a 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -412,12 +412,12 @@ static dma_addr_t dma_4v_map_page(struct device *dev, 
struct page *page,
 bad:
if (printk_ratelimit())
WARN_ON(1);
-   return DMA_ERROR_CODE;
+   return SPARC_MAPPING_ERROR;
 
 iommu_map_fail:
local_irq_restore(flags);
iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
-   return DMA_ERROR_CODE;
+   return SPARC_MAPPING_ERROR;
 }
 
 static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
@@ -590,7 +590,7 @@ static int dma_4v_map_sg(struct device *dev, struct 
scatterlist *sglist,
 
if (outcount < incount) {
outs = sg_next(outs);
-   outs->dma_address = DMA_ERROR_CODE;
+   outs->dma_address = SPARC_MAPPING_ERROR;
outs->dma_length = 0;
}
 
@@ -607,7 +607,7 @@ static int dma_4v_map_sg(struct device *dev, struct 
scatterlist *sglist,
iommu_tbl_range_free(tbl, vaddr, npages,
 IOMMU_ERROR_CODE);
/* XXX demap? XXX */
-   s->dma_address = DMA_ERROR_CODE;
+   s->dma_address = SPARC_MAPPING_ERROR;
s->dma_length = 0;
}
if (s == outs)
@@ -669,6 +669,11 @@ static void dma_4v_unmap_sg(struct device *dev, struct 
scatterlist *sglist,
local_irq_restore(flags);
 }
 
+static int dma_4v_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == SPARC_MAPPING_ERROR;
+}
+
 static const struct

[PATCH 14/44] sh: remove DMA_ERROR_CODE

2017-06-16 Thread Christoph Hellwig

sh does not return errors for dma_map_page.

Signed-off-by: Christoph Hellwig 
---
 arch/sh/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/sh/include/asm/dma-mapping.h 
b/arch/sh/include/asm/dma-mapping.h
index d99008af5f73..9b06be07db4d 100644
--- a/arch/sh/include/asm/dma-mapping.h
+++ b/arch/sh/include/asm/dma-mapping.h
@@ -9,8 +9,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct 
bus_type *bus)
return dma_ops;
 }
 
-#define DMA_ERROR_CODE 0
-
 void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction dir);
 
-- 
2.11.0

[PATCH 19/44] s390: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

s390 can also use noop_dma_ops, and while that currently does not return
errors it will so in the future.  Implementing the mapping_error method
is the proper way to have per-ops error conditions.

Signed-off-by: Christoph Hellwig 
Acked-by: Gerald Schaefer 
---
 arch/s390/include/asm/dma-mapping.h |  2 --
 arch/s390/pci/pci_dma.c | 18 +-
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/s390/include/asm/dma-mapping.h 
b/arch/s390/include/asm/dma-mapping.h
index 3108b8dbe266..512ad0eaa11a 100644
--- a/arch/s390/include/asm/dma-mapping.h
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -8,8 +8,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t) 0x0)
-
 extern const struct dma_map_ops s390_pci_dma_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 9081a57fa340..ea623faab525 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -14,6 +14,8 @@
 #include 
 #include 
 
+#define S390_MAPPING_ERROR (~(dma_addr_t) 0x0)
+
 static struct kmem_cache *dma_region_table_cache;
 static struct kmem_cache *dma_page_table_cache;
 static int s390_iommu_strict;
@@ -281,7 +283,7 @@ static dma_addr_t dma_alloc_address(struct device *dev, int 
size)
 
 out_error:
spin_unlock_irqrestore(>iommu_bitmap_lock, flags);
-   return DMA_ERROR_CODE;
+   return S390_MAPPING_ERROR;
 }
 
 static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
@@ -329,7 +331,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, 
struct page *page,
/* This rounds up number of pages based on size and offset */
nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
dma_addr = dma_alloc_address(dev, nr_pages);
-   if (dma_addr == DMA_ERROR_CODE) {
+   if (dma_addr == S390_MAPPING_ERROR) {
ret = -ENOSPC;
goto out_err;
}
@@ -352,7 +354,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, 
struct page *page,
 out_err:
zpci_err("map error:\n");
zpci_err_dma(ret, pa);
-   return DMA_ERROR_CODE;
+   return S390_MAPPING_ERROR;
 }
 
 static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
@@ -429,7 +431,7 @@ static int __s390_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
int ret;
 
dma_addr_base = dma_alloc_address(dev, nr_pages);
-   if (dma_addr_base == DMA_ERROR_CODE)
+   if (dma_addr_base == S390_MAPPING_ERROR)
return -ENOMEM;
 
dma_addr = dma_addr_base;
@@ -476,7 +478,7 @@ static int s390_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
for (i = 1; i < nr_elements; i++) {
s = sg_next(s);
 
-   s->dma_address = DMA_ERROR_CODE;
+   s->dma_address = S390_MAPPING_ERROR;
s->dma_length = 0;
 
if (s->offset || (size & ~PAGE_MASK) ||
@@ -525,6 +527,11 @@ static void s390_dma_unmap_sg(struct device *dev, struct 
scatterlist *sg,
s->dma_length = 0;
}
 }
+   
+static int s390_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == S390_MAPPING_ERROR;
+}
 
 int zpci_dma_init_device(struct zpci_dev *zdev)
 {
@@ -657,6 +664,7 @@ const struct dma_map_ops s390_pci_dma_ops = {
.unmap_sg   = s390_dma_unmap_sg,
.map_page   = s390_dma_map_pages,
.unmap_page = s390_dma_unmap_pages,
+   .mapping_error  = s390_mapping_error,
/* if we support direct DMA this must be conditional */
.is_phys= 0,
/* dma_supported is unconditionally true without a callback */
-- 
2.11.0

[PATCH 19/44] s390: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

s390 can also use noop_dma_ops, and while that currently does not return
errors it will so in the future.  Implementing the mapping_error method
is the proper way to have per-ops error conditions.

Signed-off-by: Christoph Hellwig 
Acked-by: Gerald Schaefer 
---
 arch/s390/include/asm/dma-mapping.h |  2 --
 arch/s390/pci/pci_dma.c | 18 +-
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/s390/include/asm/dma-mapping.h 
b/arch/s390/include/asm/dma-mapping.h
index 3108b8dbe266..512ad0eaa11a 100644
--- a/arch/s390/include/asm/dma-mapping.h
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -8,8 +8,6 @@
 #include 
 #include 
 
-#define DMA_ERROR_CODE (~(dma_addr_t) 0x0)
-
 extern const struct dma_map_ops s390_pci_dma_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 9081a57fa340..ea623faab525 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -14,6 +14,8 @@
 #include 
 #include 
 
+#define S390_MAPPING_ERROR (~(dma_addr_t) 0x0)
+
 static struct kmem_cache *dma_region_table_cache;
 static struct kmem_cache *dma_page_table_cache;
 static int s390_iommu_strict;
@@ -281,7 +283,7 @@ static dma_addr_t dma_alloc_address(struct device *dev, int 
size)
 
 out_error:
spin_unlock_irqrestore(>iommu_bitmap_lock, flags);
-   return DMA_ERROR_CODE;
+   return S390_MAPPING_ERROR;
 }
 
 static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
@@ -329,7 +331,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, 
struct page *page,
/* This rounds up number of pages based on size and offset */
nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
dma_addr = dma_alloc_address(dev, nr_pages);
-   if (dma_addr == DMA_ERROR_CODE) {
+   if (dma_addr == S390_MAPPING_ERROR) {
ret = -ENOSPC;
goto out_err;
}
@@ -352,7 +354,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, 
struct page *page,
 out_err:
zpci_err("map error:\n");
zpci_err_dma(ret, pa);
-   return DMA_ERROR_CODE;
+   return S390_MAPPING_ERROR;
 }
 
 static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
@@ -429,7 +431,7 @@ static int __s390_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
int ret;
 
dma_addr_base = dma_alloc_address(dev, nr_pages);
-   if (dma_addr_base == DMA_ERROR_CODE)
+   if (dma_addr_base == S390_MAPPING_ERROR)
return -ENOMEM;
 
dma_addr = dma_addr_base;
@@ -476,7 +478,7 @@ static int s390_dma_map_sg(struct device *dev, struct 
scatterlist *sg,
for (i = 1; i < nr_elements; i++) {
s = sg_next(s);
 
-   s->dma_address = DMA_ERROR_CODE;
+   s->dma_address = S390_MAPPING_ERROR;
s->dma_length = 0;
 
if (s->offset || (size & ~PAGE_MASK) ||
@@ -525,6 +527,11 @@ static void s390_dma_unmap_sg(struct device *dev, struct 
scatterlist *sg,
s->dma_length = 0;
}
 }
+   
+static int s390_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == S390_MAPPING_ERROR;
+}
 
 int zpci_dma_init_device(struct zpci_dev *zdev)
 {
@@ -657,6 +664,7 @@ const struct dma_map_ops s390_pci_dma_ops = {
.unmap_sg   = s390_dma_unmap_sg,
.map_page   = s390_dma_map_pages,
.unmap_page = s390_dma_unmap_pages,
+   .mapping_error  = s390_mapping_error,
/* if we support direct DMA this must be conditional */
.is_phys= 0,
/* dma_supported is unconditionally true without a callback */
-- 
2.11.0

[PATCH 22/44] x86/pci-nommu: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is going to go away, so don't rely on it.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/kernel/pci-nommu.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index a88952ef371c..085fe6ce4049 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -11,6 +11,8 @@
 #include 
 #include 
 
+#define NOMMU_MAPPING_ERROR0
+
 static int
 check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
 {
@@ -33,7 +35,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct 
page *page,
dma_addr_t bus = page_to_phys(page) + offset;
WARN_ON(size == 0);
if (!check_addr("map_single", dev, bus, size))
-   return DMA_ERROR_CODE;
+   return NOMMU_MAPPING_ERROR;
flush_write_buffers();
return bus;
 }
@@ -88,6 +90,11 @@ static void nommu_sync_sg_for_device(struct device *dev,
flush_write_buffers();
 }
 
+static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == NOMMU_MAPPING_ERROR;
+}
+
 const struct dma_map_ops nommu_dma_ops = {
.alloc  = dma_generic_alloc_coherent,
.free   = dma_generic_free_coherent,
@@ -96,4 +103,5 @@ const struct dma_map_ops nommu_dma_ops = {
.sync_single_for_device = nommu_sync_single_for_device,
.sync_sg_for_device = nommu_sync_sg_for_device,
.is_phys= 1,
+   .mapping_error  = nommu_mapping_error,
 };
-- 
2.11.0

[PATCH 22/44] x86/pci-nommu: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is going to go away, so don't rely on it.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/kernel/pci-nommu.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index a88952ef371c..085fe6ce4049 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -11,6 +11,8 @@
 #include 
 #include 
 
+#define NOMMU_MAPPING_ERROR0
+
 static int
 check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
 {
@@ -33,7 +35,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct 
page *page,
dma_addr_t bus = page_to_phys(page) + offset;
WARN_ON(size == 0);
if (!check_addr("map_single", dev, bus, size))
-   return DMA_ERROR_CODE;
+   return NOMMU_MAPPING_ERROR;
flush_write_buffers();
return bus;
 }
@@ -88,6 +90,11 @@ static void nommu_sync_sg_for_device(struct device *dev,
flush_write_buffers();
 }
 
+static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == NOMMU_MAPPING_ERROR;
+}
+
 const struct dma_map_ops nommu_dma_ops = {
.alloc  = dma_generic_alloc_coherent,
.free   = dma_generic_free_coherent,
@@ -96,4 +103,5 @@ const struct dma_map_ops nommu_dma_ops = {
.sync_single_for_device = nommu_sync_single_for_device,
.sync_sg_for_device = nommu_sync_sg_for_device,
.is_phys= 1,
+   .mapping_error  = nommu_mapping_error,
 };
-- 
2.11.0

[PATCH 23/44] x86/calgary: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is going to go away, so don't rely on it.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/kernel/pci-calgary_64.c | 24 
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index fda7867046d0..e75b490f2b0b 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -50,6 +50,8 @@
 #include 
 #include 
 
+#define CALGARY_MAPPING_ERROR  0
+
 #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
 int use_calgary __read_mostly = 1;
 #else
@@ -252,7 +254,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
if (panic_on_overflow)
panic("Calgary: fix the allocator.\n");
else
-   return DMA_ERROR_CODE;
+   return CALGARY_MAPPING_ERROR;
}
}
 
@@ -272,10 +274,10 @@ static dma_addr_t iommu_alloc(struct device *dev, struct 
iommu_table *tbl,
 
entry = iommu_range_alloc(dev, tbl, npages);
 
-   if (unlikely(entry == DMA_ERROR_CODE)) {
+   if (unlikely(entry == CALGARY_MAPPING_ERROR)) {
pr_warn("failed to allocate %u pages in iommu %p\n",
npages, tbl);
-   return DMA_ERROR_CODE;
+   return CALGARY_MAPPING_ERROR;
}
 
/* set the return dma address */
@@ -295,7 +297,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t 
dma_addr,
unsigned long flags;
 
/* were we called with bad_dma_address? */
-   badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE);
+   badend = CALGARY_MAPPING_ERROR + (EMERGENCY_PAGES * PAGE_SIZE);
if (unlikely(dma_addr < badend)) {
WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
   "address 0x%Lx\n", dma_addr);
@@ -380,7 +382,7 @@ static int calgary_map_sg(struct device *dev, struct 
scatterlist *sg,
npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE);
 
entry = iommu_range_alloc(dev, tbl, npages);
-   if (entry == DMA_ERROR_CODE) {
+   if (entry == CALGARY_MAPPING_ERROR) {
/* makes sure unmap knows to stop */
s->dma_length = 0;
goto error;
@@ -398,7 +400,7 @@ static int calgary_map_sg(struct device *dev, struct 
scatterlist *sg,
 error:
calgary_unmap_sg(dev, sg, nelems, dir, 0);
for_each_sg(sg, s, nelems, i) {
-   sg->dma_address = DMA_ERROR_CODE;
+   sg->dma_address = CALGARY_MAPPING_ERROR;
sg->dma_length = 0;
}
return 0;
@@ -453,7 +455,7 @@ static void* calgary_alloc_coherent(struct device *dev, 
size_t size,
 
/* set up tces to cover the allocated range */
mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
-   if (mapping == DMA_ERROR_CODE)
+   if (mapping == CALGARY_MAPPING_ERROR)
goto free;
*dma_handle = mapping;
return ret;
@@ -478,6 +480,11 @@ static void calgary_free_coherent(struct device *dev, 
size_t size,
free_pages((unsigned long)vaddr, get_order(size));
 }
 
+static int calgary_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == CALGARY_MAPPING_ERROR;
+}
+
 static const struct dma_map_ops calgary_dma_ops = {
.alloc = calgary_alloc_coherent,
.free = calgary_free_coherent,
@@ -485,6 +492,7 @@ static const struct dma_map_ops calgary_dma_ops = {
.unmap_sg = calgary_unmap_sg,
.map_page = calgary_map_page,
.unmap_page = calgary_unmap_page,
+   .mapping_error = calgary_mapping_error,
 };
 
 static inline void __iomem * busno_to_bbar(unsigned char num)
@@ -732,7 +740,7 @@ static void __init calgary_reserve_regions(struct pci_dev 
*dev)
struct iommu_table *tbl = pci_iommu(dev->bus);
 
/* reserve EMERGENCY_PAGES from bad_dma_address and up */
-   iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES);
+   iommu_range_reserve(tbl, CALGARY_MAPPING_ERROR, EMERGENCY_PAGES);
 
/* avoid the BIOS/VGA first 640KB-1MB region */
/* for CalIOC2 - avoid the entire first MB */
-- 
2.11.0

Re: [PATCH v4 2/5] watchdog: introduce arch_touch_nmi_watchdog()

2017-06-16 Thread Andrew Morton

On Fri, 16 Jun 2017 16:57:12 +1000 Nicholas Piggin  wrote:

> For architectures that define HAVE_NMI_WATCHDOG, instead of having
> them provide the complete touch_nmi_watchdog() function, just have
> them provide arch_touch_nmi_watchdog().
> 
> This gives the generic code more flexibility in implementing this
> function, and arch implementations don't miss out on touching the
> softlockup watchdog or other generic details.
> 
> ...
>
> --- a/arch/blackfin/include/asm/nmi.h
> +++ b/arch/blackfin/include/asm/nmi.h
> @@ -9,4 +9,6 @@
>  
>  #include 
>  
> +extern void arch_touch_nmi_watchdog(void);

Do we actually need to add this to the arch header files...

>
> ...
>
> --- a/include/linux/nmi.h
> +++ b/include/linux/nmi.h
> @@ -6,6 +6,9 @@
>  
>  #include 
>  #include 
> +#if defined(CONFIG_HAVE_NMI_WATCHDOG)
> +#include 
> +#endif
>  
>  #ifdef CONFIG_LOCKUP_DETECTOR
>  extern void touch_softlockup_watchdog_sched(void);
> @@ -58,6 +61,18 @@ static inline void reset_hung_task_detector(void)
>  #define NMI_WATCHDOG_ENABLED  (1 << NMI_WATCHDOG_ENABLED_BIT)
>  #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
>  
> +#if defined(CONFIG_HARDLOCKUP_DETECTOR)
> +extern void hardlockup_detector_disable(void);
> +#else
> +static inline void hardlockup_detector_disable(void) {}
> +#endif
> +
> +#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
> +extern void arch_touch_nmi_watchdog(void);
> +#else
> +static inline void arch_touch_nmi_watchdog(void) {}
> +#endif
> +

given that we have a global declaration here?

[PATCH 23/44] x86/calgary: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is going to go away, so don't rely on it.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/kernel/pci-calgary_64.c | 24 
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index fda7867046d0..e75b490f2b0b 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -50,6 +50,8 @@
 #include 
 #include 
 
+#define CALGARY_MAPPING_ERROR  0
+
 #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
 int use_calgary __read_mostly = 1;
 #else
@@ -252,7 +254,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
if (panic_on_overflow)
panic("Calgary: fix the allocator.\n");
else
-   return DMA_ERROR_CODE;
+   return CALGARY_MAPPING_ERROR;
}
}
 
@@ -272,10 +274,10 @@ static dma_addr_t iommu_alloc(struct device *dev, struct 
iommu_table *tbl,
 
entry = iommu_range_alloc(dev, tbl, npages);
 
-   if (unlikely(entry == DMA_ERROR_CODE)) {
+   if (unlikely(entry == CALGARY_MAPPING_ERROR)) {
pr_warn("failed to allocate %u pages in iommu %p\n",
npages, tbl);
-   return DMA_ERROR_CODE;
+   return CALGARY_MAPPING_ERROR;
}
 
/* set the return dma address */
@@ -295,7 +297,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t 
dma_addr,
unsigned long flags;
 
/* were we called with bad_dma_address? */
-   badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE);
+   badend = CALGARY_MAPPING_ERROR + (EMERGENCY_PAGES * PAGE_SIZE);
if (unlikely(dma_addr < badend)) {
WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
   "address 0x%Lx\n", dma_addr);
@@ -380,7 +382,7 @@ static int calgary_map_sg(struct device *dev, struct 
scatterlist *sg,
npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE);
 
entry = iommu_range_alloc(dev, tbl, npages);
-   if (entry == DMA_ERROR_CODE) {
+   if (entry == CALGARY_MAPPING_ERROR) {
/* makes sure unmap knows to stop */
s->dma_length = 0;
goto error;
@@ -398,7 +400,7 @@ static int calgary_map_sg(struct device *dev, struct 
scatterlist *sg,
 error:
calgary_unmap_sg(dev, sg, nelems, dir, 0);
for_each_sg(sg, s, nelems, i) {
-   sg->dma_address = DMA_ERROR_CODE;
+   sg->dma_address = CALGARY_MAPPING_ERROR;
sg->dma_length = 0;
}
return 0;
@@ -453,7 +455,7 @@ static void* calgary_alloc_coherent(struct device *dev, 
size_t size,
 
/* set up tces to cover the allocated range */
mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
-   if (mapping == DMA_ERROR_CODE)
+   if (mapping == CALGARY_MAPPING_ERROR)
goto free;
*dma_handle = mapping;
return ret;
@@ -478,6 +480,11 @@ static void calgary_free_coherent(struct device *dev, 
size_t size,
free_pages((unsigned long)vaddr, get_order(size));
 }
 
+static int calgary_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == CALGARY_MAPPING_ERROR;
+}
+
 static const struct dma_map_ops calgary_dma_ops = {
.alloc = calgary_alloc_coherent,
.free = calgary_free_coherent,
@@ -485,6 +492,7 @@ static const struct dma_map_ops calgary_dma_ops = {
.unmap_sg = calgary_unmap_sg,
.map_page = calgary_map_page,
.unmap_page = calgary_unmap_page,
+   .mapping_error = calgary_mapping_error,
 };
 
 static inline void __iomem * busno_to_bbar(unsigned char num)
@@ -732,7 +740,7 @@ static void __init calgary_reserve_regions(struct pci_dev 
*dev)
struct iommu_table *tbl = pci_iommu(dev->bus);
 
/* reserve EMERGENCY_PAGES from bad_dma_address and up */
-   iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES);
+   iommu_range_reserve(tbl, CALGARY_MAPPING_ERROR, EMERGENCY_PAGES);
 
/* avoid the BIOS/VGA first 640KB-1MB region */
/* for CalIOC2 - avoid the entire first MB */
-- 
2.11.0

Re: [PATCH v4 2/5] watchdog: introduce arch_touch_nmi_watchdog()

2017-06-16 Thread Andrew Morton

On Fri, 16 Jun 2017 16:57:12 +1000 Nicholas Piggin  wrote:

> For architectures that define HAVE_NMI_WATCHDOG, instead of having
> them provide the complete touch_nmi_watchdog() function, just have
> them provide arch_touch_nmi_watchdog().
> 
> This gives the generic code more flexibility in implementing this
> function, and arch implementations don't miss out on touching the
> softlockup watchdog or other generic details.
> 
> ...
>
> --- a/arch/blackfin/include/asm/nmi.h
> +++ b/arch/blackfin/include/asm/nmi.h
> @@ -9,4 +9,6 @@
>  
>  #include 
>  
> +extern void arch_touch_nmi_watchdog(void);

Do we actually need to add this to the arch header files...

>
> ...
>
> --- a/include/linux/nmi.h
> +++ b/include/linux/nmi.h
> @@ -6,6 +6,9 @@
>  
>  #include 
>  #include 
> +#if defined(CONFIG_HAVE_NMI_WATCHDOG)
> +#include 
> +#endif
>  
>  #ifdef CONFIG_LOCKUP_DETECTOR
>  extern void touch_softlockup_watchdog_sched(void);
> @@ -58,6 +61,18 @@ static inline void reset_hung_task_detector(void)
>  #define NMI_WATCHDOG_ENABLED  (1 << NMI_WATCHDOG_ENABLED_BIT)
>  #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
>  
> +#if defined(CONFIG_HARDLOCKUP_DETECTOR)
> +extern void hardlockup_detector_disable(void);
> +#else
> +static inline void hardlockup_detector_disable(void) {}
> +#endif
> +
> +#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
> +extern void arch_touch_nmi_watchdog(void);
> +#else
> +static inline void arch_touch_nmi_watchdog(void) {}
> +#endif
> +

given that we have a global declaration here?

[PATCH 21/44] powerpc: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is going to go away, so don't rely on it.  Instead
define a ->mapping_error method for all IOMMU based dma operation
instances.  The direct ops don't ever return an error and don't
need a ->mapping_error method.

Signed-off-by: Christoph Hellwig 
Acked-by: Michael Ellerman 
---
 arch/powerpc/include/asm/dma-mapping.h |  4 
 arch/powerpc/include/asm/iommu.h   |  4 
 arch/powerpc/kernel/dma-iommu.c|  6 ++
 arch/powerpc/kernel/iommu.c| 28 ++--
 arch/powerpc/platforms/cell/iommu.c|  1 +
 arch/powerpc/platforms/pseries/vio.c   |  3 ++-
 6 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/dma-mapping.h 
b/arch/powerpc/include/asm/dma-mapping.h
index 181a095468e4..73aedbe6c977 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -17,10 +17,6 @@
 #include 
 #include 
 
-#ifdef CONFIG_PPC64
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-#endif
-
 /* Some dma direct funcs must be visible for use in other dma_ops */
 extern void *__dma_direct_alloc_coherent(struct device *dev, size_t size,
 dma_addr_t *dma_handle, gfp_t flag,
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 8a8ce220d7d0..20febe0b7f32 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -139,6 +139,8 @@ struct scatterlist;
 
 #ifdef CONFIG_PPC64
 
+#define IOMMU_MAPPING_ERROR(~(dma_addr_t)0x0)
+
 static inline void set_iommu_table_base(struct device *dev,
struct iommu_table *base)
 {
@@ -238,6 +240,8 @@ static inline int __init tce_iommu_bus_notifier_init(void)
 }
 #endif /* !CONFIG_IOMMU_API */
 
+int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
 #else
 
 static inline void *get_iommu_table_base(struct device *dev)
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index fb7cbaa37658..8f7abf9baa63 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -105,6 +105,11 @@ static u64 dma_iommu_get_required_mask(struct device *dev)
return mask;
 }
 
+int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == IOMMU_MAPPING_ERROR;
+}
+
 struct dma_map_ops dma_iommu_ops = {
.alloc  = dma_iommu_alloc_coherent,
.free   = dma_iommu_free_coherent,
@@ -115,5 +120,6 @@ struct dma_map_ops dma_iommu_ops = {
.map_page   = dma_iommu_map_page,
.unmap_page = dma_iommu_unmap_page,
.get_required_mask  = dma_iommu_get_required_mask,
+   .mapping_error  = dma_iommu_mapping_error,
 };
 EXPORT_SYMBOL(dma_iommu_ops);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index f2b724cd9e64..233ca3fe4754 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -198,11 +198,11 @@ static unsigned long iommu_range_alloc(struct device *dev,
if (unlikely(npages == 0)) {
if (printk_ratelimit())
WARN_ON(1);
-   return DMA_ERROR_CODE;
+   return IOMMU_MAPPING_ERROR;
}
 
if (should_fail_iommu(dev))
-   return DMA_ERROR_CODE;
+   return IOMMU_MAPPING_ERROR;
 
/*
 * We don't need to disable preemption here because any CPU can
@@ -278,7 +278,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
} else {
/* Give up */
spin_unlock_irqrestore(&(pool->lock), flags);
-   return DMA_ERROR_CODE;
+   return IOMMU_MAPPING_ERROR;
}
}
 
@@ -310,13 +310,13 @@ static dma_addr_t iommu_alloc(struct device *dev, struct 
iommu_table *tbl,
  unsigned long attrs)
 {
unsigned long entry;
-   dma_addr_t ret = DMA_ERROR_CODE;
+   dma_addr_t ret = IOMMU_MAPPING_ERROR;
int build_fail;
 
entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
 
-   if (unlikely(entry == DMA_ERROR_CODE))
-   return DMA_ERROR_CODE;
+   if (unlikely(entry == IOMMU_MAPPING_ERROR))
+   return IOMMU_MAPPING_ERROR;
 
entry += tbl->it_offset;/* Offset into real TCE table */
ret = entry << tbl->it_page_shift;  /* Set the return dma address */
@@ -328,12 +328,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct 
iommu_table *tbl,
 
/* tbl->it_ops->set() only returns non-zero for transient errors.
 * Clean up the table bitmap in this case and return
-* DMA_ERROR_CODE. For all other errors the functionality is
+* IOMMU_MAPPING_ERROR. For all other errors the functionality

[PATCH 21/44] powerpc: implement ->mapping_error

2017-06-16 Thread Christoph Hellwig

DMA_ERROR_CODE is going to go away, so don't rely on it.  Instead
define a ->mapping_error method for all IOMMU based dma operation
instances.  The direct ops don't ever return an error and don't
need a ->mapping_error method.

Signed-off-by: Christoph Hellwig 
Acked-by: Michael Ellerman 
---
 arch/powerpc/include/asm/dma-mapping.h |  4 
 arch/powerpc/include/asm/iommu.h   |  4 
 arch/powerpc/kernel/dma-iommu.c|  6 ++
 arch/powerpc/kernel/iommu.c| 28 ++--
 arch/powerpc/platforms/cell/iommu.c|  1 +
 arch/powerpc/platforms/pseries/vio.c   |  3 ++-
 6 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/dma-mapping.h 
b/arch/powerpc/include/asm/dma-mapping.h
index 181a095468e4..73aedbe6c977 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -17,10 +17,6 @@
 #include 
 #include 
 
-#ifdef CONFIG_PPC64
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-#endif
-
 /* Some dma direct funcs must be visible for use in other dma_ops */
 extern void *__dma_direct_alloc_coherent(struct device *dev, size_t size,
 dma_addr_t *dma_handle, gfp_t flag,
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 8a8ce220d7d0..20febe0b7f32 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -139,6 +139,8 @@ struct scatterlist;
 
 #ifdef CONFIG_PPC64
 
+#define IOMMU_MAPPING_ERROR(~(dma_addr_t)0x0)
+
 static inline void set_iommu_table_base(struct device *dev,
struct iommu_table *base)
 {
@@ -238,6 +240,8 @@ static inline int __init tce_iommu_bus_notifier_init(void)
 }
 #endif /* !CONFIG_IOMMU_API */
 
+int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
 #else
 
 static inline void *get_iommu_table_base(struct device *dev)
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index fb7cbaa37658..8f7abf9baa63 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -105,6 +105,11 @@ static u64 dma_iommu_get_required_mask(struct device *dev)
return mask;
 }
 
+int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+   return dma_addr == IOMMU_MAPPING_ERROR;
+}
+
 struct dma_map_ops dma_iommu_ops = {
.alloc  = dma_iommu_alloc_coherent,
.free   = dma_iommu_free_coherent,
@@ -115,5 +120,6 @@ struct dma_map_ops dma_iommu_ops = {
.map_page   = dma_iommu_map_page,
.unmap_page = dma_iommu_unmap_page,
.get_required_mask  = dma_iommu_get_required_mask,
+   .mapping_error  = dma_iommu_mapping_error,
 };
 EXPORT_SYMBOL(dma_iommu_ops);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index f2b724cd9e64..233ca3fe4754 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -198,11 +198,11 @@ static unsigned long iommu_range_alloc(struct device *dev,
if (unlikely(npages == 0)) {
if (printk_ratelimit())
WARN_ON(1);
-   return DMA_ERROR_CODE;
+   return IOMMU_MAPPING_ERROR;
}
 
if (should_fail_iommu(dev))
-   return DMA_ERROR_CODE;
+   return IOMMU_MAPPING_ERROR;
 
/*
 * We don't need to disable preemption here because any CPU can
@@ -278,7 +278,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
} else {
/* Give up */
spin_unlock_irqrestore(&(pool->lock), flags);
-   return DMA_ERROR_CODE;
+   return IOMMU_MAPPING_ERROR;
}
}
 
@@ -310,13 +310,13 @@ static dma_addr_t iommu_alloc(struct device *dev, struct 
iommu_table *tbl,
  unsigned long attrs)
 {
unsigned long entry;
-   dma_addr_t ret = DMA_ERROR_CODE;
+   dma_addr_t ret = IOMMU_MAPPING_ERROR;
int build_fail;
 
entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
 
-   if (unlikely(entry == DMA_ERROR_CODE))
-   return DMA_ERROR_CODE;
+   if (unlikely(entry == IOMMU_MAPPING_ERROR))
+   return IOMMU_MAPPING_ERROR;
 
entry += tbl->it_offset;/* Offset into real TCE table */
ret = entry << tbl->it_page_shift;  /* Set the return dma address */
@@ -328,12 +328,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct 
iommu_table *tbl,
 
/* tbl->it_ops->set() only returns non-zero for transient errors.
 * Clean up the table bitmap in this case and return
-* DMA_ERROR_CODE. For all other errors the functionality is
+* IOMMU_MAPPING_ERROR. For all other errors the functionality is
 * not altered.

< 1 2 3 4 5 6 7 8 9 10 >

401 - 500 of 1654 matches

Mail list logo