Re: [PATCH v6 25/34] swiotlb: Add warnings for use of bounce buffers with SME

2017-06-07 Thread kbuild test robot
Hi Tom,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.12-rc4 next-20170607]
[cannot apply to tip/x86/core]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Tom-Lendacky/x86-Secure-Memory-Encryption-AMD/20170608-104147
config: sparc-defconfig (attached as .config)
compiler: sparc-linux-gcc (GCC) 6.2.0
reproduce:
wget 
https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=sparc 

All errors (new ones prefixed by >>):

   In file included from include/linux/dma-mapping.h:13:0,
from include/linux/skbuff.h:34,
from include/linux/filter.h:12,
from kernel//bpf/core.c:24:
>> include/linux/mem_encrypt.h:16:29: fatal error: asm/mem_encrypt.h: No such 
>> file or directory
#include 
^
   compilation terminated.

vim +16 include/linux/mem_encrypt.h

2d7c2ec4 Tom Lendacky 2017-06-07  10   * published by the Free Software 
Foundation.
2d7c2ec4 Tom Lendacky 2017-06-07  11   */
2d7c2ec4 Tom Lendacky 2017-06-07  12  
2d7c2ec4 Tom Lendacky 2017-06-07  13  #ifndef __MEM_ENCRYPT_H__
2d7c2ec4 Tom Lendacky 2017-06-07  14  #define __MEM_ENCRYPT_H__
2d7c2ec4 Tom Lendacky 2017-06-07  15  
2d7c2ec4 Tom Lendacky 2017-06-07 @16  #include 
2d7c2ec4 Tom Lendacky 2017-06-07  17  
2d7c2ec4 Tom Lendacky 2017-06-07  18  #endif/* __MEM_ENCRYPT_H__ */

:: The code at line 16 was first introduced by commit
:: 2d7c2ec4c60e83432b27bfb32042706f404d4158 x86/mm: Add Secure Memory 
Encryption (SME) support

:: TO: Tom Lendacky <thomas.lenda...@amd.com>
:: CC: 0day robot <fengguang...@intel.com>

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v3 0/2] acpi/iort, numa: Add numa node mapping for smmuv3 devices

2017-06-07 Thread Ganapatrao Kulkarni
ARM IORT specification(rev. C) has added  provision to define proximity
domain in SMMUv3 IORT table. Adding required code to parse Proximity
domain and set numa_node of smmv3 platform devices.

v3:
  - Addressed Lorenzo Pieralisi comment.

v2:
  - Changed as per Lorenzo Pieralisi and Hanjun Guo suggestions.

v1:
  - Initial patch

Ganapatrao Kulkarni (2):
  acpica: iort: Update SMMUv3 header for proximity domain mapping
  acpi/iort: numa: Add numa node mapping for smmuv3 devices

 drivers/acpi/arm64/iort.c | 28 ++--
 include/acpi/actbl2.h |  4 
 2 files changed, 30 insertions(+), 2 deletions(-)

-- 
1.8.1.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 2/2] acpi/iort: numa: Add numa node mapping for smmuv3 devices

2017-06-07 Thread Ganapatrao Kulkarni
Add code to parse proximity domain in SMMUv3 IORT table to
set numa node mapping for smmuv3 devices.

Signed-off-by: Ganapatrao Kulkarni 
---
 drivers/acpi/arm64/iort.c | 28 ++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index bba2b59..e804386 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -882,6 +882,23 @@ static bool __init arm_smmu_v3_is_coherent(struct 
acpi_iort_node *node)
return smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE;
 }
 
+/*
+ * set numa proximity domain for smmuv3 device
+ */
+static void  __init arm_smmu_v3_set_proximity(struct acpi_iort_node *node,
+   struct device *dev)
+{
+   struct acpi_iort_smmu_v3 *smmu;
+
+   smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
+   if (smmu->flags & ACPI_IORT_SMMU_V3_PXM_VALID) {
+   set_dev_node(dev, acpi_map_pxm_to_node(smmu->pxm));
+   pr_info("SMMUV3[%llx] Mapped to Proximity domain %d\n",
+   smmu->base_address,
+   smmu->pxm);
+   }
+}
+
 static int __init arm_smmu_count_resources(struct acpi_iort_node *node)
 {
struct acpi_iort_smmu *smmu;
@@ -951,20 +968,24 @@ struct iort_iommu_config {
int (*iommu_count_resources)(struct acpi_iort_node *node);
void (*iommu_init_resources)(struct resource *res,
 struct acpi_iort_node *node);
+   void (*iommu_set_proximity)(struct acpi_iort_node *node,
+struct device *dev);
 };
 
 static const struct iort_iommu_config iort_arm_smmu_v3_cfg __initconst = {
.name = "arm-smmu-v3",
.iommu_is_coherent = arm_smmu_v3_is_coherent,
.iommu_count_resources = arm_smmu_v3_count_resources,
-   .iommu_init_resources = arm_smmu_v3_init_resources
+   .iommu_init_resources = arm_smmu_v3_init_resources,
+   .iommu_set_proximity = arm_smmu_v3_set_proximity
 };
 
 static const struct iort_iommu_config iort_arm_smmu_cfg __initconst = {
.name = "arm-smmu",
.iommu_is_coherent = arm_smmu_is_coherent,
.iommu_count_resources = arm_smmu_count_resources,
-   .iommu_init_resources = arm_smmu_init_resources
+   .iommu_init_resources = arm_smmu_init_resources,
+   .iommu_set_proximity = NULL
 };
 
 static __init
@@ -1002,6 +1023,9 @@ static int __init iort_add_smmu_platform_device(struct 
acpi_iort_node *node)
if (!pdev)
return -ENOMEM;
 
+   if (ops->iommu_set_proximity)
+   ops->iommu_set_proximity(node, >dev);
+
count = ops->iommu_count_resources(node);
 
r = kcalloc(count, sizeof(*r), GFP_KERNEL);
-- 
1.8.1.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 1/2] acpica: iort: Update SMMUv3 header for proximity domain mapping

2017-06-07 Thread Ganapatrao Kulkarni
ARM IORT specification (rev. C) has added two new fields to define
proximity domain for the SMMUv3 node in the IORT table.

Proximity Domain Valid:
Set to 1 if the value provided in the Proximity Domain field is
valid. Set to 0 otherwise.

Proximity domain:
If the Proximity Domain Valid flag is set to 1, this entry
provides the proximity domain to which this SMMU
instance belongs.

Update header file to reflect this.

Signed-off-by: Ganapatrao Kulkarni 
---
 include/acpi/actbl2.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 4b306a6..389e91f 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -805,6 +805,9 @@ struct acpi_iort_smmu_v3 {
u32 pri_gsiv;
u32 gerr_gsiv;
u32 sync_gsiv;
+   u8 pxm;
+   u8 reserved1;
+   u16 reserved2;
 };
 
 /* Values for Model field above */
@@ -817,6 +820,7 @@ struct acpi_iort_smmu_v3 {
 
 #define ACPI_IORT_SMMU_V3_COHACC_OVERRIDE   (1)
 #define ACPI_IORT_SMMU_V3_HTTU_OVERRIDE (1<<1)
+#define ACPI_IORT_SMMU_V3_PXM_VALID (1<<3)
 
 
/***
  *
-- 
1.8.1.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 03/12] intel-ipu3: Add DMA API implementation

2017-06-07 Thread Tomasz Figa
Hi Alan,

On Thu, Jun 8, 2017 at 2:45 AM, Alan Cox  wrote:
>> > +   struct ipu3_mmu *mmu = to_ipu3_mmu(dev);
>> > +   dma_addr_t daddr = iommu_iova_to_phys(mmu->domain, dma_handle);
>> > +
>> > +   clflush_cache_range(phys_to_virt(daddr), size);
>>
>> You might need to consider another IOMMU on the way here. Generally,
>> given that daddr is your MMU DMA address (not necessarily CPU physical
>> address), you should be able to call
>>
>> dma_sync_single_for_cpu(, daddr, size, dir)
>
> Te system IOMMU (if enabled) may be cache coherent - and on x86 would be,
> so it doesn't think it needs to do anything for cache synchronization
> and the dma_sync won't actually do any work.

I'm not very familiar with x86, but typically I found coherency to be
an attribute of the DMA master (i.e. if it is connected to a coherent
memory port).

Looking at all the IPU3 code, it looks like the whole PCI device is
non-coherent for some reason (e.g. you can see implicit cache flushes
for page tables). So I would have expected that a non-coherent variant
of x86 dma_ops is used for the PCI struct device, which would do cache
maintenance in its dma_sync_* ops.

Best regards,
Tomasz
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6 00/34] x86: Secure Memory Encryption (AMD)

2017-06-07 Thread Nick Sarnie
On Wed, Jun 7, 2017 at 3:13 PM, Tom Lendacky  wrote:
> This patch series provides support for AMD's new Secure Memory Encryption 
> (SME)
> feature.
>
> SME can be used to mark individual pages of memory as encrypted through the
> page tables. A page of memory that is marked encrypted will be automatically
> decrypted when read from DRAM and will be automatically encrypted when
> written to DRAM. Details on SME can found in the links below.
>
> The SME feature is identified through a CPUID function and enabled through
> the SYSCFG MSR. Once enabled, page table entries will determine how the
> memory is accessed. If a page table entry has the memory encryption mask set,
> then that memory will be accessed as encrypted memory. The memory encryption
> mask (as well as other related information) is determined from settings
> returned through the same CPUID function that identifies the presence of the
> feature.
>
> The approach that this patch series takes is to encrypt everything possible
> starting early in the boot where the kernel is encrypted. Using the page
> table macros the encryption mask can be incorporated into all page table
> entries and page allocations. By updating the protection map, userspace
> allocations are also marked encrypted. Certain data must be accounted for
> as having been placed in memory before SME was enabled (EFI, initrd, etc.)
> and accessed accordingly.
>
> This patch series is a pre-cursor to another AMD processor feature called
> Secure Encrypted Virtualization (SEV). The support for SEV will build upon
> the SME support and will be submitted later. Details on SEV can be found
> in the links below.
>
> The following links provide additional detail:
>
> AMD Memory Encryption whitepaper:
>
> http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf
>
> AMD64 Architecture Programmer's Manual:
>http://support.amd.com/TechDocs/24593.pdf
>SME is section 7.10
>SEV is section 15.34
>
> ---
>
> This patch series is based off of the master branch of tip.
>   Commit 53614fbd7961 ("Merge branch 'WIP.x86/fpu'")
>
> Source code is also available at https://github.com/codomania/tip/tree/sme-v6
>
>
> Still to do:
> - Kdump support, including using memremap() instead of ioremap_cache()
>
> Changes since v5:
> - Added support for 5-level paging
> - Added IOMMU support
> - Created a generic asm/mem_encrypt.h in order to remove a bunch of
>   #ifndef/#define entries
> - Removed changes to the __va() macro and defined a function to return
>   the true physical address in cr3
> - Removed sysfs support as it was determined not to be needed
> - General code cleanup based on feedback
> - General cleanup of patch subjects and descriptions
>
> Changes since v4:
> - Re-worked mapping of setup data to not use a fixed list. Rather, check
>   dynamically whether the requested early_memremap()/memremap() call
>   needs to be mapped decrypted.
> - Moved SME cpu feature into scattered features
> - Moved some declarations into header files
> - Cleared the encryption mask from the __PHYSICAL_MASK so that users
>   of macros such as pmd_pfn_mask() don't have to worry/know about the
>   encryption mask
> - Updated some return types and values related to EFI and e820 functions
>   so that an error could be returned
> - During cpu shutdown, removed cache disabling and added a check for kexec
>   in progress to use wbinvd followed immediately by halt in order to avoid
>   any memory corruption
> - Update how persistent memory is identified
> - Added a function to find command line arguments and their values
> - Added sysfs support
> - General code cleanup based on feedback
> - General cleanup of patch subjects and descriptions
>
>
> Changes since v3:
> - Broke out some of the patches into smaller individual patches
> - Updated Documentation
> - Added a message to indicate why the IOMMU was disabled
> - Updated CPU feature support for SME by taking into account whether
>   BIOS has enabled SME
> - Eliminated redundant functions
> - Added some warning messages for DMA usage of bounce buffers when SME
>   is active
> - Added support for persistent memory
> - Added support to determine when setup data is being mapped and be sure
>   to map it un-encrypted
> - Added CONFIG support to set the default action of whether to activate
>   SME if it is supported/enabled
> - Added support for (re)booting with kexec
>
> Changes since v2:
> - Updated Documentation
> - Make the encryption mask available outside of arch/x86 through a
>   standard include file
> - Conversion of assembler routines to C where possible (not everything
>   could be converted, e.g. the routine that does the actual encryption
>   needs to be copied into a safe location and it is difficult to
>   determine the actual length of the function in order to copy it)
> - Fix SME feature use of scattered CPUID feature
> - Creation of SME specific functions for things like 

Re: [PATCH v6 26/34] iommu/amd: Allow the AMD IOMMU to work with memory encryption

2017-06-07 Thread Nick Sarnie
On Wed, Jun 7, 2017 at 3:17 PM, Tom Lendacky  wrote:
> The IOMMU is programmed with physical addresses for the various tables
> and buffers that are used to communicate between the device and the
> driver. When the driver allocates this memory it is encrypted. In order
> for the IOMMU to access the memory as encrypted the encryption mask needs
> to be included in these physical addresses during configuration.
>
> The PTE entries created by the IOMMU should also include the encryption
> mask so that when the device behind the IOMMU performs a DMA, the DMA
> will be performed to encrypted memory.
>
> Signed-off-by: Tom Lendacky 
> ---
>  arch/x86/include/asm/mem_encrypt.h |7 +++
>  arch/x86/mm/mem_encrypt.c  |   30 ++
>  drivers/iommu/amd_iommu.c  |   36 
> +++-
>  drivers/iommu/amd_iommu_init.c |   18 --
>  drivers/iommu/amd_iommu_proto.h|   10 ++
>  drivers/iommu/amd_iommu_types.h|2 +-
>  include/asm-generic/mem_encrypt.h  |5 +
>  7 files changed, 84 insertions(+), 24 deletions(-)
>
> diff --git a/arch/x86/include/asm/mem_encrypt.h 
> b/arch/x86/include/asm/mem_encrypt.h
> index c7a2525..d86e544 100644
> --- a/arch/x86/include/asm/mem_encrypt.h
> +++ b/arch/x86/include/asm/mem_encrypt.h
> @@ -31,6 +31,8 @@ void __init sme_early_decrypt(resource_size_t paddr,
>
>  void __init sme_early_init(void);
>
> +bool sme_iommu_supported(void);
> +
>  /* Architecture __weak replacement functions */
>  void __init mem_encrypt_init(void);
>
> @@ -62,6 +64,11 @@ static inline void __init sme_early_init(void)
>  {
>  }
>
> +static inline bool sme_iommu_supported(void)
> +{
> +   return true;
> +}
> +
>  #endif /* CONFIG_AMD_MEM_ENCRYPT */
>
>  static inline bool sme_active(void)
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index 5d7c51d..018b58a 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -197,6 +197,36 @@ void __init sme_early_init(void)
> protection_map[i] = pgprot_encrypted(protection_map[i]);
>  }
>
> +bool sme_iommu_supported(void)
> +{
> +   struct cpuinfo_x86 *c = _cpu_data;
> +
> +   if (!sme_me_mask || (c->x86 != 0x17))
> +   return true;
> +
> +   /* For Fam17h, a specific level of support is required */
> +   switch (c->microcode & 0xf000) {
> +   case 0x:
> +   return false;
> +   case 0x1000:
> +   switch (c->microcode & 0x0f00) {
> +   case 0x:
> +   return false;
> +   case 0x0100:
> +   if ((c->microcode & 0xff) < 0x26)
> +   return false;
> +   break;
> +   case 0x0200:
> +   if ((c->microcode & 0xff) < 0x05)
> +   return false;
> +   break;
> +   }
> +   break;
> +   }
> +
> +   return true;
> +}
> +
>  /* Architecture __weak replacement functions */
>  void __init mem_encrypt_init(void)
>  {
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index 63cacf5..94eb130 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -544,7 +544,7 @@ static void dump_dte_entry(u16 devid)
>
>  static void dump_command(unsigned long phys_addr)
>  {
> -   struct iommu_cmd *cmd = phys_to_virt(phys_addr);
> +   struct iommu_cmd *cmd = iommu_phys_to_virt(phys_addr);
> int i;
>
> for (i = 0; i < 4; ++i)
> @@ -863,13 +863,15 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu,
> writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
>  }
>
> -static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
> +static void build_completion_wait(struct iommu_cmd *cmd, volatile u64 *sem)
>  {
> +   u64 address = iommu_virt_to_phys((void *)sem);
> +
> WARN_ON(address & 0x7ULL);
>
> memset(cmd, 0, sizeof(*cmd));
> -   cmd->data[0] = lower_32_bits(__pa(address)) | 
> CMD_COMPL_WAIT_STORE_MASK;
> -   cmd->data[1] = upper_32_bits(__pa(address));
> +   cmd->data[0] = lower_32_bits(address) | CMD_COMPL_WAIT_STORE_MASK;
> +   cmd->data[1] = upper_32_bits(address);
> cmd->data[2] = 1;
> CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
>  }
> @@ -1033,7 +1035,7 @@ static int __iommu_queue_command_sync(struct amd_iommu 
> *iommu,
>
> iommu->cmd_sem = 0;
>
> -   build_completion_wait(_cmd, (u64)>cmd_sem);
> +   build_completion_wait(_cmd, >cmd_sem);
> copy_cmd_to_buffer(iommu, _cmd, tail);
>
> if ((ret = wait_on_sem(>cmd_sem)) != 0)
> @@ -1083,7 +1085,7 @@ static int iommu_completion_wait(struct amd_iommu 
> *iommu)
> return 0;
>
>
> -   build_completion_wait(, (u64)>cmd_sem);
> +   

Re: [PATCH 02/12] intel-ipu3: mmu: implement driver

2017-06-07 Thread Sakari Ailus
Hi Tomasz,

On Tue, Jun 06, 2017 at 07:13:19PM +0900, Tomasz Figa wrote:
> Hi Yong, Tuukka,
> 
> +CC IOMMU ML and Joerg. (Technically you should resend this patch
> including them.)

Thanks!

> 
> On Tue, Jun 6, 2017 at 5:39 AM, Yong Zhi  wrote:
...
> > diff --git a/drivers/media/pci/intel/ipu3/Kconfig 
> > b/drivers/media/pci/intel/ipu3/Kconfig
> > index 2a895d6..ab2edcb 100644
> > --- a/drivers/media/pci/intel/ipu3/Kconfig
> > +++ b/drivers/media/pci/intel/ipu3/Kconfig
> > @@ -15,3 +15,14 @@ config VIDEO_IPU3_CIO2
> > Say Y or M here if you have a Skylake/Kaby Lake SoC with MIPI CSI-2
> > connected camera.
> > The module will be called ipu3-cio2.
> > +
> > +config INTEL_IPU3_MMU
> > +   tristate "Intel ipu3-mmu driver"
> > +   select IOMMU_API
> > +   select IOMMU_IOVA
> > +   ---help---
> > + For IPU3, this option enables its MMU driver to translate its 
> > internal
> > + virtual address to 39 bits wide physical address for 64GBytes 
> > space access.
> > +
> > + Say Y here if you have Skylake/Kaby Lake SoC with IPU3.
> > + Say N if un-sure.
> 
> Is the MMU optional? I.e. can you still use the IPU3 without the MMU
> driver? If no, then it doesn't make sense to flood the user with
> meaningless choice and the driver could simply be selected by other
> IPU3 drivers.

There are other IPUs that contain the same hardware, so they would
presumably use the same driver.

> 
> And the other way around, is the IPU3 MMU driver useful for anything
> else than IPU3? If no (but yes for the above), then it should depend
> on some other IPU3 drivers being enabled, as otherwise it would just
> confuse the user.

Very likely not.

For now I think it'd be fine to have the driver separate from the rest of
the IPU3 but without a separate Kconfig option.

> 
> > diff --git a/drivers/media/pci/intel/ipu3/Makefile 
> > b/drivers/media/pci/intel/ipu3/Makefile
> > index 20186e3..2b669df 100644
> > --- a/drivers/media/pci/intel/ipu3/Makefile
> > +++ b/drivers/media/pci/intel/ipu3/Makefile
> > @@ -1 +1,2 @@
> >  obj-$(CONFIG_VIDEO_IPU3_CIO2) += ipu3-cio2.o
> > +obj-$(CONFIG_INTEL_IPU3_MMU) += ipu3-mmu.o
> > diff --git a/drivers/media/pci/intel/ipu3/ipu3-mmu.c 
> > b/drivers/media/pci/intel/ipu3/ipu3-mmu.c
> > new file mode 100644
> > index 000..a9fb116
> > --- /dev/null
> > +++ b/drivers/media/pci/intel/ipu3/ipu3-mmu.c

...

> > +/**
> > + * ipu3_mmu_alloc_page_table - get page to fill entries with dummy defaults
> > + * @d: mapping domain to be worked on
> > + * @l1: True for L1 page table, false for L2 page table.
> > + *
> > + * Index of L1 page table points to L2 tbl
> > + *
> > + * Return: Pointer to allocated page table
> > + * or NULL on failure.
> > + */
> > +static uint32_t *ipu3_mmu_alloc_page_table(struct ipu3_mmu_domain *d, bool 
> > l1)
> > +{
> > +   uint32_t *pt = (uint32_t *)__get_free_page(GFP_KERNEL);
> 
> Style: I believe u32 is preferred in the kernel.

There are some 3 users of uint32_t alone in the kernel. I'd say it
should be fine. (I'm not trying saying it'd be more common than u32
though.)

> > +   DMA_BIT_MASK(IPU3_MMU_ADDRESS_BITS);
> > +   mmu_dom->domain.geometry.force_aperture = true;
> > +
> > +   ptr = (void *)__get_free_page(GFP_KERNEL);
> > +   if (!ptr)
> > +   goto fail_get_page;
> > +   mmu_dom->dummy_page = virt_to_phys(ptr) >> IPU3_MMU_PAGE_SHIFT;
> 
> Is virt_to_phys() correct here? I'm not an expert on x86 systems, but
> since this is a PCI device, there might be some other memory mapping
> involved.

In theory yes --- if the IPU3 were behind an IOMMU managed by the Linux
kernel. That kind of configuration wouldn't make much sense and any
attempt to use such a configuration would probably fall apart with
the assumption of single dma_ops, too.

I have to say I'm not certain if anything else than kernel configuration
would prevent this though.

-- 
Regards,

Sakari Ailus
sakari.ai...@linux.intel.com
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 34/34] x86/mm: Add support to make use of Secure Memory Encryption

2017-06-07 Thread Tom Lendacky
Add support to check if SME has been enabled and if memory encryption
should be activated (checking of command line option based on the
configuration of the default state).  If memory encryption is to be
activated, then the encryption mask is set and the kernel is encrypted
"in place."

Signed-off-by: Tom Lendacky 
---
 arch/x86/kernel/head_64.S |1 
 arch/x86/mm/mem_encrypt.c |   93 +++--
 2 files changed, 89 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 1fe944b..660bf8e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -97,6 +97,7 @@ startup_64:
 * Save the returned mask in %r12 for later use.
 */
push%rsi
+   movq%rsi, %rdi
callsme_enable
pop %rsi
movq%rax, %r12
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 6129477..d624058 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -12,6 +12,7 @@
 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 
@@ -22,10 +23,23 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+
+static char sme_cmdline_arg[] __initdata = "mem_encrypt";
+static char sme_cmdline_on[]  __initdata = "on";
+static char sme_cmdline_off[] __initdata = "off";
+
+/*
+ * Some SME functions run very early causing issues with the stack-protector
+ * support. Provide a way to turn off this support on a per-function basis.
+ */
+#define SME_NOSTACKP __attribute__((__optimize__("no-stack-protector")))
 
 /*
  * Since SME related variables are set early in the boot process they must
@@ -237,6 +251,8 @@ void __init mem_encrypt_init(void)
 
/* Call into SWIOTLB to update the SWIOTLB DMA buffers */
swiotlb_update_mem_attributes();
+
+   pr_info("AMD Secure Memory Encryption (SME) active\n");
 }
 
 void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
@@ -564,8 +580,75 @@ void __init sme_encrypt_kernel(void)
native_write_cr3(native_read_cr3());
 }
 
-unsigned long __init sme_enable(void)
+unsigned long __init SME_NOSTACKP sme_enable(struct boot_params *bp)
 {
+   const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
+   unsigned int eax, ebx, ecx, edx;
+   bool active_by_default;
+   unsigned long me_mask;
+   char buffer[16];
+   u64 msr;
+
+   /* Check for the SME support leaf */
+   eax = 0x8000;
+   ecx = 0;
+   native_cpuid(, , , );
+   if (eax < 0x801f)
+   goto out;
+
+   /*
+* Check for the SME feature:
+*   CPUID Fn8000_001F[EAX] - Bit 0
+* Secure Memory Encryption support
+*   CPUID Fn8000_001F[EBX] - Bits 5:0
+* Pagetable bit position used to indicate encryption
+*/
+   eax = 0x801f;
+   ecx = 0;
+   native_cpuid(, , , );
+   if (!(eax & 1))
+   goto out;
+
+   me_mask = 1UL << (ebx & 0x3f);
+
+   /* Check if SME is enabled */
+   msr = __rdmsr(MSR_K8_SYSCFG);
+   if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+   goto out;
+
+   /*
+* Fixups have not been applied to phys_base yet and we're running
+* identity mapped, so we must obtain the address to the SME command
+* line argument data using rip-relative addressing.
+*/
+   asm ("lea sme_cmdline_arg(%%rip), %0"
+: "=r" (cmdline_arg)
+: "p" (sme_cmdline_arg));
+   asm ("lea sme_cmdline_on(%%rip), %0"
+: "=r" (cmdline_on)
+: "p" (sme_cmdline_on));
+   asm ("lea sme_cmdline_off(%%rip), %0"
+: "=r" (cmdline_off)
+: "p" (sme_cmdline_off));
+
+   if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
+   active_by_default = true;
+   else
+   active_by_default = false;
+
+   cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
+((u64)bp->ext_cmd_line_ptr << 32));
+
+   cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
+
+   if (strncmp(buffer, cmdline_on, sizeof(buffer)) == 0)
+   sme_me_mask = me_mask;
+   else if (strncmp(buffer, cmdline_off, sizeof(buffer)) == 0)
+   sme_me_mask = 0;
+   else
+   sme_me_mask = active_by_default ? me_mask : 0;
+
+out:
return sme_me_mask;
 }
 
@@ -576,9 +659,9 @@ unsigned long sme_get_me_mask(void)
 
 #else  /* !CONFIG_AMD_MEM_ENCRYPT */
 
-void __init sme_encrypt_kernel(void)   { }
-unsigned long __init sme_enable(void)  { return 0; }
+void __init sme_encrypt_kernel(void)   { }
+unsigned long __init sme_enable(struct boot_params *bp){ return 0; }
 
-unsigned long sme_get_me_mask(void){ return 0; }
+unsigned long sme_get_me_mask(void)  

[PATCH v6 33/34] x86/boot: Add early cmdline parsing for options with arguments

2017-06-07 Thread Tom Lendacky
Add a cmdline_find_option() function to look for cmdline options that
take arguments. The argument is returned in a supplied buffer and the
argument length (regardless of whether it fits in the supplied buffer)
is returned, with -1 indicating not found.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/cmdline.h |2 +
 arch/x86/lib/cmdline.c |  105 
 2 files changed, 107 insertions(+)

diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h
index e01f7f7..84ae170 100644
--- a/arch/x86/include/asm/cmdline.h
+++ b/arch/x86/include/asm/cmdline.h
@@ -2,5 +2,7 @@
 #define _ASM_X86_CMDLINE_H
 
 int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
+int cmdline_find_option(const char *cmdline_ptr, const char *option,
+   char *buffer, int bufsize);
 
 #endif /* _ASM_X86_CMDLINE_H */
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index 5cc78bf..3261abb 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -104,7 +104,112 @@ static inline int myisspace(u8 c)
return 0;   /* Buffer overrun */
 }
 
+/*
+ * Find a non-boolean option (i.e. option=argument). In accordance with
+ * standard Linux practice, if this option is repeated, this returns the
+ * last instance on the command line.
+ *
+ * @cmdline: the cmdline string
+ * @max_cmdline_size: the maximum size of cmdline
+ * @option: option string to look for
+ * @buffer: memory buffer to return the option argument
+ * @bufsize: size of the supplied memory buffer
+ *
+ * Returns the length of the argument (regardless of if it was
+ * truncated to fit in the buffer), or -1 on not found.
+ */
+static int
+__cmdline_find_option(const char *cmdline, int max_cmdline_size,
+ const char *option, char *buffer, int bufsize)
+{
+   char c;
+   int pos = 0, len = -1;
+   const char *opptr = NULL;
+   char *bufptr = buffer;
+   enum {
+   st_wordstart = 0,   /* Start of word/after whitespace */
+   st_wordcmp, /* Comparing this word */
+   st_wordskip,/* Miscompare, skip */
+   st_bufcpy,  /* Copying this to buffer */
+   } state = st_wordstart;
+
+   if (!cmdline)
+   return -1;  /* No command line */
+
+   /*
+* This 'pos' check ensures we do not overrun
+* a non-NULL-terminated 'cmdline'
+*/
+   while (pos++ < max_cmdline_size) {
+   c = *(char *)cmdline++;
+   if (!c)
+   break;
+
+   switch (state) {
+   case st_wordstart:
+   if (myisspace(c))
+   break;
+
+   state = st_wordcmp;
+   opptr = option;
+   /* fall through */
+
+   case st_wordcmp:
+   if ((c == '=') && !*opptr) {
+   /*
+* We matched all the way to the end of the
+* option we were looking for, prepare to
+* copy the argument.
+*/
+   len = 0;
+   bufptr = buffer;
+   state = st_bufcpy;
+   break;
+   } else if (c == *opptr++) {
+   /*
+* We are currently matching, so continue
+* to the next character on the cmdline.
+*/
+   break;
+   }
+   state = st_wordskip;
+   /* fall through */
+
+   case st_wordskip:
+   if (myisspace(c))
+   state = st_wordstart;
+   break;
+
+   case st_bufcpy:
+   if (myisspace(c)) {
+   state = st_wordstart;
+   } else {
+   /*
+* Increment len, but don't overrun the
+* supplied buffer and leave room for the
+* NULL terminator.
+*/
+   if (++len < bufsize)
+   *bufptr++ = c;
+   }
+   break;
+   }
+   }
+
+   if (bufsize)
+   *bufptr = '\0';
+
+   return len;
+}
+
 int cmdline_find_option_bool(const char *cmdline, const char *option)
 {
return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
 }
+
+int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
+ 

[PATCH v6 30/34] x86/mm, kexec: Allow kexec to be used with SME

2017-06-07 Thread Tom Lendacky
Provide support so that kexec can be used to boot a kernel when SME is
enabled.

Support is needed to allocate pages for kexec without encryption.  This
is needed in order to be able to reboot in the kernel in the same manner
as originally booted.

Additionally, when shutting down all of the CPUs we need to be sure to
flush the caches and then halt. This is needed when booting from a state
where SME was not active into a state where SME is active (or vice-versa).
Without these steps, it is possible for cache lines to exist for the same
physical location but tagged both with and without the encryption bit. This
can cause random memory corruption when caches are flushed depending on
which cacheline is written last.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/init.h  |1 +
 arch/x86/include/asm/kexec.h |8 
 arch/x86/include/asm/pgtable_types.h |1 +
 arch/x86/kernel/machine_kexec_64.c   |   35 +-
 arch/x86/kernel/process.c|   17 +++--
 arch/x86/mm/ident_map.c  |   12 
 include/linux/kexec.h|   14 ++
 kernel/kexec_core.c  |6 ++
 8 files changed, 87 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 474eb8c..05c4aa0 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -7,6 +7,7 @@ struct x86_mapping_info {
unsigned long page_flag; /* page flag for PMD or PUD entry */
unsigned long offset;/* ident mapping offset */
bool direct_gbpages; /* PUD level 1GB page support */
+   unsigned long kernpg_flag;   /* kernel pagetable flag override */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 70ef205..e8183ac 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -207,6 +207,14 @@ struct kexec_entry64_regs {
uint64_t r15;
uint64_t rip;
 };
+
+extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
+  gfp_t gfp);
+#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
+
+extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
+#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+
 #endif
 
 typedef void crash_vmclear_fn(void);
diff --git a/arch/x86/include/asm/pgtable_types.h 
b/arch/x86/include/asm/pgtable_types.h
index ce8cb1c..0f326f4 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -213,6 +213,7 @@ enum page_cache_mode {
 #define PAGE_KERNEL__pgprot(__PAGE_KERNEL | _PAGE_ENC)
 #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
 #define PAGE_KERNEL_EXEC   __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC_NOENC __pgprot(__PAGE_KERNEL_EXEC)
 #define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
 #define PAGE_KERNEL_NOCACHE__pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE  __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
diff --git a/arch/x86/kernel/machine_kexec_64.c 
b/arch/x86/kernel/machine_kexec_64.c
index 6f5ca4e..35e069a 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -87,7 +87,7 @@ static int init_transition_pgtable(struct kimage *image, 
pgd_t *pgd)
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
}
pte = pte_offset_kernel(pmd, vaddr);
-   set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+   set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
return 0;
 err:
free_transition_pgtable(image);
@@ -115,6 +115,7 @@ static int init_pgtable(struct kimage *image, unsigned long 
start_pgtable)
.alloc_pgt_page = alloc_pgt_page,
.context= image,
.page_flag  = __PAGE_KERNEL_LARGE_EXEC,
+   .kernpg_flag= _KERNPG_TABLE_NOENC,
};
unsigned long mstart, mend;
pgd_t *level4p;
@@ -602,3 +603,35 @@ void arch_kexec_unprotect_crashkres(void)
 {
kexec_mark_crashkres(false);
 }
+
+int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
+{
+   int ret;
+
+   if (sme_active()) {
+   /*
+* If SME is active we need to be sure that kexec pages are
+* not encrypted because when we boot to the new kernel the
+* pages won't be accessed encrypted (initially).
+*/
+   ret = set_memory_decrypted((unsigned long)vaddr, pages);
+   if (ret)
+   return ret;
+
+   if (gfp & __GFP_ZERO)
+   memset(vaddr, 0, pages * PAGE_SIZE);
+

[PATCH v6 32/34] x86/mm: Add support to encrypt the kernel in-place

2017-06-07 Thread Tom Lendacky
Add the support to encrypt the kernel in-place. This is done by creating
new page mappings for the kernel - a decrypted write-protected mapping
and an encrypted mapping. The kernel is encrypted by copying it through
a temporary buffer.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/mem_encrypt.h |6 +
 arch/x86/mm/Makefile   |2 
 arch/x86/mm/mem_encrypt.c  |  314 
 arch/x86/mm/mem_encrypt_boot.S |  150 +
 4 files changed, 472 insertions(+)
 create mode 100644 arch/x86/mm/mem_encrypt_boot.S

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index d86e544..e0a8edc 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -21,6 +21,12 @@
 
 extern unsigned long sme_me_mask;
 
+void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
+unsigned long decrypted_kernel_vaddr,
+unsigned long kernel_len,
+unsigned long encryption_wa,
+unsigned long encryption_pgd);
+
 void __init sme_early_encrypt(resource_size_t paddr,
  unsigned long size);
 void __init sme_early_decrypt(resource_size_t paddr,
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 88ee454..47b26ea 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -38,3 +38,5 @@ obj-$(CONFIG_NUMA_EMU)+= numa_emulation.o
 obj-$(CONFIG_X86_INTEL_MPX)+= mpx.o
 obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
 obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+
+obj-$(CONFIG_AMD_MEM_ENCRYPT)  += mem_encrypt_boot.o
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 018b58a..6129477 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -24,6 +24,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 /*
  * Since SME related variables are set early in the boot process they must
@@ -246,8 +248,320 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned 
long size)
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
 }
 
+static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
+unsigned long end)
+{
+   unsigned long pgd_start, pgd_end, pgd_size;
+   pgd_t *pgd_p;
+
+   pgd_start = start & PGDIR_MASK;
+   pgd_end = end & PGDIR_MASK;
+
+   pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
+   pgd_size *= sizeof(pgd_t);
+
+   pgd_p = pgd_base + pgd_index(start);
+
+   memset(pgd_p, 0, pgd_size);
+}
+
+#ifndef CONFIG_X86_5LEVEL
+#define native_make_p4d(_x)(p4d_t) { .pgd = native_make_pgd(_x) }
+#endif
+
+#define PGD_FLAGS  _KERNPG_TABLE_NOENC
+#define P4D_FLAGS  _KERNPG_TABLE_NOENC
+#define PUD_FLAGS  _KERNPG_TABLE_NOENC
+#define PMD_FLAGS  (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
+unsigned long vaddr, pmdval_t pmd_val)
+{
+   pgd_t *pgd_p;
+   p4d_t *p4d_p;
+   pud_t *pud_p;
+   pmd_t *pmd_p;
+
+   pgd_p = pgd_base + pgd_index(vaddr);
+   if (native_pgd_val(*pgd_p)) {
+   if (IS_ENABLED(CONFIG_X86_5LEVEL))
+   p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & 
~PTE_FLAGS_MASK);
+   else
+   pud_p = (pud_t *)(native_pgd_val(*pgd_p) & 
~PTE_FLAGS_MASK);
+   } else {
+   pgd_t pgd;
+
+   if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+   p4d_p = pgtable_area;
+   memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
+   pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
+
+   pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
+   } else {
+   pud_p = pgtable_area;
+   memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
+   pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+
+   pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
+   }
+   native_set_pgd(pgd_p, pgd);
+   }
+
+   if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+   p4d_p += p4d_index(vaddr);
+   if (native_p4d_val(*p4d_p)) {
+   pud_p = (pud_t *)(native_p4d_val(*p4d_p) & 
~PTE_FLAGS_MASK);
+   } else {
+   p4d_t p4d;
+
+   pud_p = pgtable_area;
+   memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
+   pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+
+   p4d = native_make_p4d((p4dval_t)pud_p + P4D_FLAGS);
+   native_set_p4d(p4d_p, p4d);
+   }
+   }
+
+   pud_p += pud_index(vaddr);
+   if 

[PATCH v6 31/34] x86/mm: Use proper encryption attributes with /dev/mem

2017-06-07 Thread Tom Lendacky
When accessing memory using /dev/mem (or /dev/kmem) use the proper
encryption attributes when mapping the memory.

To insure the proper attributes are applied when reading or writing
/dev/mem, update the xlate_dev_mem_ptr() function to use memremap()
which will essentially perform the same steps of applying __va for
RAM or using ioremap() for if not RAM.

To insure the proper attributes are applied when mmapping /dev/mem,
update the phys_mem_access_prot() to call phys_mem_access_encrypted(),
a new function which will check if the memory should be mapped encrypted
or not. If it is not to be mapped encrypted then the VMA protection
value is updated to remove the encryption bit.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/io.h |3 +++
 arch/x86/mm/ioremap.c |   18 +-
 arch/x86/mm/pat.c |3 +++
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 9eac5a5..db163d7 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -385,4 +385,7 @@ extern bool arch_memremap_can_ram_remap(resource_size_t 
offset, size_t size,
unsigned long flags);
 #define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
 
+extern bool phys_mem_access_encrypted(unsigned long phys_addr,
+ unsigned long size);
+
 #endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 99cda55..56dd5b2 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -404,12 +404,10 @@ void *xlate_dev_mem_ptr(phys_addr_t phys)
unsigned long offset = phys & ~PAGE_MASK;
void *vaddr;
 
-   /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
-   if (page_is_ram(start >> PAGE_SHIFT))
-   return __va(phys);
+   /* memremap() maps if RAM, otherwise falls back to ioremap() */
+   vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
 
-   vaddr = ioremap_cache(start, PAGE_SIZE);
-   /* Only add the offset on success and return NULL if the ioremap() 
failed: */
+   /* Only add the offset on success and return NULL if memremap() failed 
*/
if (vaddr)
vaddr += offset;
 
@@ -418,10 +416,7 @@ void *xlate_dev_mem_ptr(phys_addr_t phys)
 
 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
 {
-   if (page_is_ram(phys >> PAGE_SHIFT))
-   return;
-
-   iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
+   memunmap((void *)((unsigned long)addr & PAGE_MASK));
 }
 
 /*
@@ -630,6 +625,11 @@ pgprot_t __init 
early_memremap_pgprot_adjust(resource_size_t phys_addr,
return prot;
 }
 
+bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
+{
+   return arch_memremap_can_ram_remap(phys_addr, size, 0);
+}
+
 #ifdef CONFIG_ARCH_USE_MEMREMAP_PROT
 /* Remap memory with encryption */
 void __init *early_memremap_encrypted(resource_size_t phys_addr,
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 6753d9c..b970c95 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -748,6 +748,9 @@ void arch_io_free_memtype_wc(resource_size_t start, 
resource_size_t size)
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
 {
+   if (!phys_mem_access_encrypted(pfn << PAGE_SHIFT, size))
+   vma_prot = pgprot_decrypted(vma_prot);
+
return vma_prot;
 }
 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 28/34] x86, drm, fbdev: Do not specify encrypted memory for video mappings

2017-06-07 Thread Tom Lendacky
Since video memory needs to be accessed decrypted, be sure that the
memory encryption mask is not set for the video ranges.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/vga.h   |   14 +-
 arch/x86/mm/pageattr.c   |2 ++
 drivers/gpu/drm/drm_gem.c|2 ++
 drivers/gpu/drm/drm_vm.c |4 
 drivers/gpu/drm/ttm/ttm_bo_vm.c  |7 +--
 drivers/gpu/drm/udl/udl_fb.c |4 
 drivers/video/fbdev/core/fbmem.c |   12 
 7 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/vga.h b/arch/x86/include/asm/vga.h
index c4b9dc2..9f42bee 100644
--- a/arch/x86/include/asm/vga.h
+++ b/arch/x86/include/asm/vga.h
@@ -7,12 +7,24 @@
 #ifndef _ASM_X86_VGA_H
 #define _ASM_X86_VGA_H
 
+#include 
+
 /*
  * On the PC, we can just recalculate addresses and then
  * access the videoram directly without any black magic.
+ * To support memory encryption however, we need to access
+ * the videoram as decrypted memory.
  */
 
-#define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x)
+#define VGA_MAP_MEM(x, s)  \
+({ \
+   unsigned long start = (unsigned long)phys_to_virt(x);   \
+   \
+   if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) \
+   set_memory_decrypted(start, (s) >> PAGE_SHIFT); \
+   \
+   start;  \
+})
 
 #define vga_readb(x) (*(x))
 #define vga_writeb(x, y) (*(y) = (x))
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d9e09fb..13fc5db 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1825,11 +1825,13 @@ int set_memory_encrypted(unsigned long addr, int 
numpages)
 {
return __set_memory_enc_dec(addr, numpages, true);
 }
+EXPORT_SYMBOL_GPL(set_memory_encrypted);
 
 int set_memory_decrypted(unsigned long addr, int numpages)
 {
return __set_memory_enc_dec(addr, numpages, false);
 }
+EXPORT_SYMBOL_GPL(set_memory_decrypted);
 
 int set_pages_uc(struct page *page, int numpages)
 {
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index b1e28c9..019f48c 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -928,6 +929,7 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned 
long obj_size,
vma->vm_ops = dev->driver->gem_vm_ops;
vma->vm_private_data = obj;
vma->vm_page_prot = 
pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+   vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
 
/* Take a ref for this mapping of the object, so that the fault
 * handler can dereference the mmap offset's pointer to the object.
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index 1170b32..ed4bcbf 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #endif
+#include 
 #include 
 #include "drm_internal.h"
 #include "drm_legacy.h"
@@ -58,6 +59,9 @@ static pgprot_t drm_io_prot(struct drm_local_map *map,
 {
pgprot_t tmp = vm_get_page_prot(vma->vm_flags);
 
+   /* We don't want graphics memory to be mapped encrypted */
+   tmp = pgprot_decrypted(tmp);
+
 #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
if (map->type == _DRM_REGISTERS && !(map->flags & _DRM_WRITE_COMBINING))
tmp = pgprot_noncached(tmp);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 9f53df9..622dab6 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define TTM_BO_VM_NUM_PREFAULT 16
 
@@ -230,9 +231,11 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf)
 * first page.
 */
for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
-   if (bo->mem.bus.is_iomem)
+   if (bo->mem.bus.is_iomem) {
+   /* Iomem should not be marked encrypted */
+   cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
pfn = bdev->driver->io_mem_pfn(bo, page_offset);
-   else {
+   } else {
page = ttm->pages[page_offset];
if (unlikely(!page && i == 0)) {
retval = VM_FAULT_OOM;
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index 4a65003..92e1690 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -169,6 +170,9 @@ static int 

[PATCH v6 27/34] x86, realmode: Check for memory encryption on the APs

2017-06-07 Thread Tom Lendacky
Add support to check if memory encryption is active in the kernel and that
it has been enabled on the AP. If memory encryption is active in the kernel
but has not been enabled on the AP, then set the memory encryption bit (bit
23) of MSR_K8_SYSCFG to enable memory encryption on that AP and allow the
AP to continue start up.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/realmode.h  |   12 
 arch/x86/realmode/init.c |4 
 arch/x86/realmode/rm/trampoline_64.S |   24 
 3 files changed, 40 insertions(+)

diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 230e190..90d9152 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -1,6 +1,15 @@
 #ifndef _ARCH_X86_REALMODE_H
 #define _ARCH_X86_REALMODE_H
 
+/*
+ * Flag bit definitions for use with the flags field of the trampoline header
+ * in the CONFIG_X86_64 variant.
+ */
+#define TH_FLAGS_SME_ACTIVE_BIT0
+#define TH_FLAGS_SME_ACTIVEBIT(TH_FLAGS_SME_ACTIVE_BIT)
+
+#ifndef __ASSEMBLY__
+
 #include 
 #include 
 
@@ -38,6 +47,7 @@ struct trampoline_header {
u64 start;
u64 efer;
u32 cr4;
+   u32 flags;
 #endif
 };
 
@@ -69,4 +79,6 @@ static inline size_t real_mode_size_needed(void)
 void set_real_mode_mem(phys_addr_t mem, size_t size);
 void reserve_real_mode(void);
 
+#endif /* __ASSEMBLY__ */
+
 #endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 195ba29..60373d0 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -101,6 +101,10 @@ static void __init setup_real_mode(void)
trampoline_cr4_features = _header->cr4;
*trampoline_cr4_features = mmu_cr4_features;
 
+   trampoline_header->flags = 0;
+   if (sme_active())
+   trampoline_header->flags |= TH_FLAGS_SME_ACTIVE;
+
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
trampoline_pgd[511] = init_level4_pgt[511].pgd;
diff --git a/arch/x86/realmode/rm/trampoline_64.S 
b/arch/x86/realmode/rm/trampoline_64.S
index dac7b20..614fd70 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "realmode.h"
 
.text
@@ -92,6 +93,28 @@ ENTRY(startup_32)
movl%edx, %fs
movl%edx, %gs
 
+   /*
+* Check for memory encryption support. This is a safety net in
+* case BIOS hasn't done the necessary step of setting the bit in
+* the MSR for this AP. If SME is active and we've gotten this far
+* then it is safe for us to set the MSR bit and continue. If we
+* don't we'll eventually crash trying to execute encrypted
+* instructions.
+*/
+   bt  $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
+   jnc .Ldone
+   movl$MSR_K8_SYSCFG, %ecx
+   rdmsr
+   bts $MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax
+   jc  .Ldone
+
+   /*
+* Memory encryption is enabled but the SME enable bit for this
+* CPU has has not been set.  It is safe to set it, so do so.
+*/
+   wrmsr
+.Ldone:
+
movlpa_tr_cr4, %eax
movl%eax, %cr4  # Enable PAE mode
 
@@ -147,6 +170,7 @@ GLOBAL(trampoline_header)
tr_start:   .space  8
GLOBAL(tr_efer) .space  8
GLOBAL(tr_cr4)  .space  4
+   GLOBAL(tr_flags).space  4
 END(trampoline_header)
 
 #include "trampoline_common.S"

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 25/34] swiotlb: Add warnings for use of bounce buffers with SME

2017-06-07 Thread Tom Lendacky
Add warnings to let the user know when bounce buffers are being used for
DMA when SME is active.  Since the bounce buffers are not in encrypted
memory, these notifications are to allow the user to determine some
appropriate action - if necessary.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/mem_encrypt.h |8 
 include/asm-generic/mem_encrypt.h  |5 +
 include/linux/dma-mapping.h|9 +
 lib/swiotlb.c  |3 +++
 4 files changed, 25 insertions(+)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index f1215a4..c7a2525 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -69,6 +69,14 @@ static inline bool sme_active(void)
return !!sme_me_mask;
 }
 
+static inline u64 sme_dma_mask(void)
+{
+   if (!sme_me_mask)
+   return 0ULL;
+
+   return ((u64)sme_me_mask << 1) - 1;
+}
+
 /*
  * The __sme_pa() and __sme_pa_nodebug() macros are meant for use when
  * writing to or comparing values from the cr3 register.  Having the
diff --git a/include/asm-generic/mem_encrypt.h 
b/include/asm-generic/mem_encrypt.h
index b55c3f9..fb02ff0 100644
--- a/include/asm-generic/mem_encrypt.h
+++ b/include/asm-generic/mem_encrypt.h
@@ -22,6 +22,11 @@ static inline bool sme_active(void)
return false;
 }
 
+static inline u64 sme_dma_mask(void)
+{
+   return 0ULL;
+}
+
 /*
  * The __sme_set() and __sme_clr() macros are useful for adding or removing
  * the encryption mask from a value (e.g. when dealing with pagetable
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 4f3eece..e2c5fda 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /**
  * List of possible attributes associated with a DMA mapping. The semantics
@@ -577,6 +578,10 @@ static inline int dma_set_mask(struct device *dev, u64 
mask)
 
if (!dev->dma_mask || !dma_supported(dev, mask))
return -EIO;
+
+   if (sme_active() && (mask < sme_dma_mask()))
+   dev_warn(dev, "SME is active, device will require DMA bounce 
buffers\n");
+
*dev->dma_mask = mask;
return 0;
 }
@@ -596,6 +601,10 @@ static inline int dma_set_coherent_mask(struct device 
*dev, u64 mask)
 {
if (!dma_supported(dev, mask))
return -EIO;
+
+   if (sme_active() && (mask < sme_dma_mask()))
+   dev_warn(dev, "SME is active, device will require DMA bounce 
buffers\n");
+
dev->coherent_dma_mask = mask;
return 0;
 }
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 74d6557..f78906a 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -509,6 +509,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
if (no_iotlb_memory)
panic("Can not allocate SWIOTLB buffer earlier and can't now 
provide you with the DMA bounce buffer");
 
+   if (sme_active())
+   pr_warn_once("SME is active and system is using DMA bounce 
buffers\n");
+
mask = dma_get_seg_boundary(hwdev);
 
tbl_dma_addr &= mask;

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 24/34] x86, swiotlb: Add memory encryption support

2017-06-07 Thread Tom Lendacky
Since DMA addresses will effectively look like 48-bit addresses when the
memory encryption mask is set, SWIOTLB is needed if the DMA mask of the
device performing the DMA does not support 48-bits. SWIOTLB will be
initialized to create decrypted bounce buffers for use by these devices.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/dma-mapping.h |5 ++-
 arch/x86/include/asm/mem_encrypt.h |5 +++
 arch/x86/kernel/pci-dma.c  |   11 +--
 arch/x86/kernel/pci-nommu.c|2 +
 arch/x86/kernel/pci-swiotlb.c  |   15 --
 arch/x86/mm/mem_encrypt.c  |   22 ++
 include/linux/swiotlb.h|1 +
 init/main.c|   13 
 lib/swiotlb.c  |   56 +++-
 9 files changed, 113 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/dma-mapping.h 
b/arch/x86/include/asm/dma-mapping.h
index 08a0838..d75430a 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_ISA
 # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24)
@@ -62,12 +63,12 @@ static inline bool dma_capable(struct device *dev, 
dma_addr_t addr, size_t size)
 
 static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-   return paddr;
+   return __sme_set(paddr);
 }
 
 static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
-   return daddr;
+   return __sme_clr(daddr);
 }
 #endif /* CONFIG_X86_DMA_REMAP */
 
diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index 61a7049..f1215a4 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -31,6 +31,11 @@ void __init sme_early_decrypt(resource_size_t paddr,
 
 void __init sme_early_init(void);
 
+/* Architecture __weak replacement functions */
+void __init mem_encrypt_init(void);
+
+void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
+
 #else  /* !CONFIG_AMD_MEM_ENCRYPT */
 
 #define sme_me_mask0UL
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 3a216ec..72d96d4 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -93,9 +93,12 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t 
size,
if (gfpflags_allow_blocking(flag)) {
page = dma_alloc_from_contiguous(dev, count, get_order(size),
 flag);
-   if (page && page_to_phys(page) + size > dma_mask) {
-   dma_release_from_contiguous(dev, page, count);
-   page = NULL;
+   if (page) {
+   addr = phys_to_dma(dev, page_to_phys(page));
+   if (addr + size > dma_mask) {
+   dma_release_from_contiguous(dev, page, count);
+   page = NULL;
+   }
}
}
/* fallback */
@@ -104,7 +107,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t 
size,
if (!page)
return NULL;
 
-   addr = page_to_phys(page);
+   addr = phys_to_dma(dev, page_to_phys(page));
if (addr + size > dma_mask) {
__free_pages(page, get_order(size));
 
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index a88952e..98b576a 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -30,7 +30,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct 
page *page,
 enum dma_data_direction dir,
 unsigned long attrs)
 {
-   dma_addr_t bus = page_to_phys(page) + offset;
+   dma_addr_t bus = phys_to_dma(dev, page_to_phys(page)) + offset;
WARN_ON(size == 0);
if (!check_addr("map_single", dev, bus, size))
return DMA_ERROR_CODE;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 1e23577..cc1e106 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -12,6 +12,8 @@
 #include 
 #include 
 #include 
+#include 
+
 int swiotlb __read_mostly;
 
 void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -79,8 +81,8 @@ int __init pci_swiotlb_detect_override(void)
  pci_swiotlb_late_init);
 
 /*
- * if 4GB or more detected (and iommu=off not set) return 1
- * and set swiotlb to 1.
+ * If 4GB or more detected (and iommu=off not set) or if SME is active
+ * then set swiotlb to 1 and return 1.
  */
 int __init pci_swiotlb_detect_4gb(void)
 {
@@ -89,6 +91,15 @@ int __init pci_swiotlb_detect_4gb(void)
if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
swiotlb = 1;
 #endif
+
+   /*
+* If SME is active then swiotlb will be set to 1 so that bounce
+   

[PATCH v6 23/34] x86, realmode: Decrypt trampoline area if memory encryption is active

2017-06-07 Thread Tom Lendacky
When Secure Memory Encryption is enabled, the trampoline area must not
be encrypted. A CPU running in real mode will not be able to decrypt
memory that has been encrypted because it will not be able to use addresses
with the memory encryption mask.

A recent change that added a new system_state value exposed a warning
issued by early_ioreamp() when the system_state was not SYSTEM_BOOTING.
At the stage where the trampoline area is decrypted, the system_state is
now SYSTEM_SCHEDULING. The check was changed to issue a warning if the
system_state is greater than or equal to SYSTEM_RUNNING.

Signed-off-by: Tom Lendacky 
---
 arch/x86/realmode/init.c |   11 +++
 mm/early_ioremap.c   |2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index a163a90..195ba29 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct real_mode_header *real_mode_header;
 u32 *trampoline_cr4_features;
@@ -130,6 +131,16 @@ static void __init set_real_mode_permissions(void)
unsigned long text_start =
(unsigned long) __va(real_mode_header->text_start);
 
+   /*
+* If SME is active, the trampoline area will need to be in
+* decrypted memory in order to bring up other processors
+* successfully.
+*/
+   if (sme_active()) {
+   sme_early_decrypt(__pa(base), size);
+   set_memory_decrypted((unsigned long)base, size >> PAGE_SHIFT);
+   }
+
set_memory_nx((unsigned long) base, size >> PAGE_SHIFT);
set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT);
set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT);
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index b1dd4a9..01d13ae 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -110,7 +110,7 @@ static int __init check_early_ioremap_leak(void)
enum fixed_addresses idx;
int i, slot;
 
-   WARN_ON(system_state != SYSTEM_BOOTING);
+   WARN_ON(system_state >= SYSTEM_RUNNING);
 
slot = -1;
for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 21/34] x86/mm: Add support to access persistent memory in the clear

2017-06-07 Thread Tom Lendacky
Persistent memory is expected to persist across reboots. The encryption
key used by SME will change across reboots which will result in corrupted
persistent memory.  Persistent memory is handed out by block devices
through memory remapping functions, so be sure not to map this memory as
encrypted.

Signed-off-by: Tom Lendacky 
---
 arch/x86/mm/ioremap.c |   31 ++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 34ed59d..99cda55 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -428,17 +428,46 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
  * Examine the physical address to determine if it is an area of memory
  * that should be mapped decrypted.  If the memory is not part of the
  * kernel usable area it was accessed and created decrypted, so these
- * areas should be mapped decrypted.
+ * areas should be mapped decrypted. And since the encryption key can
+ * change across reboots, persistent memory should also be mapped
+ * decrypted.
  */
 static bool memremap_should_map_decrypted(resource_size_t phys_addr,
  unsigned long size)
 {
+   int is_pmem;
+
+   /*
+* Check if the address is part of a persistent memory region.
+* This check covers areas added by E820, EFI and ACPI.
+*/
+   is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
+   IORES_DESC_PERSISTENT_MEMORY);
+   if (is_pmem != REGION_DISJOINT)
+   return true;
+
+   /*
+* Check if the non-volatile attribute is set for an EFI
+* reserved area.
+*/
+   if (efi_enabled(EFI_BOOT)) {
+   switch (efi_mem_type(phys_addr)) {
+   case EFI_RESERVED_TYPE:
+   if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
+   return true;
+   break;
+   default:
+   break;
+   }
+   }
+
/* Check if the address is outside kernel usable area */
switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
case E820_TYPE_RESERVED:
case E820_TYPE_ACPI:
case E820_TYPE_NVS:
case E820_TYPE_UNUSABLE:
+   case E820_TYPE_PRAM:
return true;
default:
break;

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 20/34] x86, mpparse: Use memremap to map the mpf and mpc data

2017-06-07 Thread Tom Lendacky
The SMP MP-table is built by UEFI and placed in memory in a decrypted
state. These tables are accessed using a mix of early_memremap(),
early_memunmap(), phys_to_virt() and virt_to_phys(). Change all accesses
to use early_memremap()/early_memunmap(). This allows for proper setting
of the encryption mask so that the data can be successfully accessed when
SME is active.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kernel/mpparse.c |   98 -
 1 file changed, 70 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index fd37f39..44b5d582 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -429,7 +429,7 @@ static inline void __init construct_default_ISA_mptable(int 
mpc_default_type)
}
 }
 
-static struct mpf_intel *mpf_found;
+static unsigned long mpf_base;
 
 static unsigned long __init get_mpc_size(unsigned long physptr)
 {
@@ -451,6 +451,7 @@ static int __init check_physptr(struct mpf_intel *mpf, 
unsigned int early)
 
size = get_mpc_size(mpf->physptr);
mpc = early_memremap(mpf->physptr, size);
+
/*
 * Read the physical hardware table.  Anything here will
 * override the defaults.
@@ -497,12 +498,12 @@ static int __init check_physptr(struct mpf_intel *mpf, 
unsigned int early)
  */
 void __init default_get_smp_config(unsigned int early)
 {
-   struct mpf_intel *mpf = mpf_found;
+   struct mpf_intel *mpf;
 
if (!smp_found_config)
return;
 
-   if (!mpf)
+   if (!mpf_base)
return;
 
if (acpi_lapic && early)
@@ -515,6 +516,12 @@ void __init default_get_smp_config(unsigned int early)
if (acpi_lapic && acpi_ioapic)
return;
 
+   mpf = early_memremap(mpf_base, sizeof(*mpf));
+   if (!mpf) {
+   pr_err("MPTABLE: mpf early_memremap() failed\n");
+   return;
+   }
+
pr_info("Intel MultiProcessor Specification v1.%d\n",
mpf->specification);
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
@@ -529,7 +536,7 @@ void __init default_get_smp_config(unsigned int early)
/*
 * Now see if we need to read further.
 */
-   if (mpf->feature1 != 0) {
+   if (mpf->feature1) {
if (early) {
/*
 * local APIC has default address
@@ -542,8 +549,10 @@ void __init default_get_smp_config(unsigned int early)
construct_default_ISA_mptable(mpf->feature1);
 
} else if (mpf->physptr) {
-   if (check_physptr(mpf, early))
+   if (check_physptr(mpf, early)) {
+   early_memunmap(mpf, sizeof(*mpf));
return;
+   }
} else
BUG();
 
@@ -552,6 +561,8 @@ void __init default_get_smp_config(unsigned int early)
/*
 * Only use the first configuration found.
 */
+
+   early_memunmap(mpf, sizeof(*mpf));
 }
 
 static void __init smp_reserve_memory(struct mpf_intel *mpf)
@@ -561,15 +572,16 @@ static void __init smp_reserve_memory(struct mpf_intel 
*mpf)
 
 static int __init smp_scan_config(unsigned long base, unsigned long length)
 {
-   unsigned int *bp = phys_to_virt(base);
+   unsigned int *bp;
struct mpf_intel *mpf;
-   unsigned long mem;
+   int ret = 0;
 
apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n",
base, base + length - 1);
BUILD_BUG_ON(sizeof(*mpf) != 16);
 
while (length > 0) {
+   bp = early_memremap(base, length);
mpf = (struct mpf_intel *)bp;
if ((*bp == SMP_MAGIC_IDENT) &&
(mpf->length == 1) &&
@@ -579,24 +591,26 @@ static int __init smp_scan_config(unsigned long base, 
unsigned long length)
 #ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 1;
 #endif
-   mpf_found = mpf;
+   mpf_base = base;
 
-   pr_info("found SMP MP-table at [mem %#010llx-%#010llx] 
mapped at [%p]\n",
-   (unsigned long long) virt_to_phys(mpf),
-   (unsigned long long) virt_to_phys(mpf) +
-   sizeof(*mpf) - 1, mpf);
+   pr_info("found SMP MP-table at [mem %#010lx-%#010lx] 
mapped at [%p]\n",
+   base, base + sizeof(*mpf) - 1, mpf);
 
-   mem = virt_to_phys(mpf);
-   memblock_reserve(mem, sizeof(*mpf));
+   memblock_reserve(base, sizeof(*mpf));
if (mpf->physptr)
smp_reserve_memory(mpf);
 
-   return 1;
+   ret = 1;
}
-   bp += 4;
+   

[PATCH v6 18/34] x86/efi: Update EFI pagetable creation to work with SME

2017-06-07 Thread Tom Lendacky
When SME is active, pagetable entries created for EFI need to have the
encryption mask set as necessary.

When the new pagetable pages are allocated they are mapped encrypted. So,
update the efi_pgt value that will be used in cr3 to include the encryption
mask so that the PGD table can be read successfully. The pagetable mapping
as well as the kernel are also added to the pagetable mapping as encrypted.
All other EFI mappings are mapped decrypted (tables, etc.).

Signed-off-by: Tom Lendacky 
---
 arch/x86/platform/efi/efi_64.c |   15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index eb8dff1..ed37fa3 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -327,7 +327,7 @@ void efi_sync_low_kernel_mappings(void)
 
 int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 {
-   unsigned long pfn, text;
+   unsigned long pfn, text, pf;
struct page *page;
unsigned npages;
pgd_t *pgd;
@@ -335,7 +335,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, 
unsigned num_pages)
if (efi_enabled(EFI_OLD_MEMMAP))
return 0;
 
-   efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
+   /*
+* Since the PGD is encrypted, set the encryption mask so that when
+* this value is loaded into cr3 the PGD will be decrypted during
+* the pagetable walk.
+*/
+   efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd);
pgd = efi_pgd;
 
/*
@@ -345,7 +350,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, 
unsigned num_pages)
 * phys_efi_set_virtual_address_map().
 */
pfn = pa_memmap >> PAGE_SHIFT;
-   if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | 
_PAGE_RW)) {
+   pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC;
+   if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) {
pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
return 1;
}
@@ -388,7 +394,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, 
unsigned num_pages)
text = __pa(_text);
pfn = text >> PAGE_SHIFT;
 
-   if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) {
+   pf = _PAGE_RW | _PAGE_ENC;
+   if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) {
pr_err("Failed to map kernel text 1:1\n");
return 1;
}

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 16/34] efi: Add an EFI table address match function

2017-06-07 Thread Tom Lendacky
Add a function that will determine if a supplied physical address matches
the address of an EFI table.

Signed-off-by: Tom Lendacky 
---
 drivers/firmware/efi/efi.c |   33 +
 include/linux/efi.h|7 +++
 2 files changed, 40 insertions(+)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index b372aad..983675d 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -55,6 +55,25 @@ struct efi __read_mostly efi = {
 };
 EXPORT_SYMBOL(efi);
 
+static unsigned long *efi_tables[] = {
+   ,
+   ,
+   ,
+   ,
+   ,
+   _systab,
+   _info,
+   ,
+   ,
+   _systab,
+   _vendor,
+   ,
+   _table,
+   ,
+   _table,
+   _attr_table,
+};
+
 static bool disable_runtime;
 static int __init setup_noefi(char *arg)
 {
@@ -854,6 +873,20 @@ int efi_status_to_err(efi_status_t status)
return err;
 }
 
+bool efi_is_table_address(unsigned long phys_addr)
+{
+   unsigned int i;
+
+   if (phys_addr == EFI_INVALID_TABLE_ADDR)
+   return false;
+
+   for (i = 0; i < ARRAY_SIZE(efi_tables); i++)
+   if (*(efi_tables[i]) == phys_addr)
+   return true;
+
+   return false;
+}
+
 #ifdef CONFIG_KEXEC
 static int update_efi_random_seed(struct notifier_block *nb,
  unsigned long code, void *unused)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index ec36f42..504fa85 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1079,6 +1079,8 @@ static inline bool efi_enabled(int feature)
return test_bit(feature, ) != 0;
 }
 extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused);
+
+extern bool efi_is_table_address(unsigned long phys_addr);
 #else
 static inline bool efi_enabled(int feature)
 {
@@ -1092,6 +1094,11 @@ static inline bool efi_enabled(int feature)
 {
return false;
 }
+
+static inline bool efi_is_table_address(unsigned long phys_addr)
+{
+   return false;
+}
 #endif
 
 extern int efi_status_to_err(efi_status_t status);

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 17/34] efi: Update efi_mem_type() to return an error rather than 0

2017-06-07 Thread Tom Lendacky
The efi_mem_type() function currently returns a 0, which maps to
EFI_RESERVED_TYPE, if the function is unable to find a memmap entry for
the supplied physical address. Returning EFI_RESERVED_TYPE implies that
a memmap entry exists, when it doesn't.  Instead of returning 0, change
the function to return a negative error value when no memmap entry is
found.

Signed-off-by: Tom Lendacky 
---
 arch/ia64/kernel/efi.c  |4 ++--
 arch/x86/platform/efi/efi.c |6 +++---
 include/linux/efi.h |2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 1212956..8141600 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -757,14 +757,14 @@ static void __init handle_palo(unsigned long phys_addr)
return 0;
 }
 
-u32
+int
 efi_mem_type (unsigned long phys_addr)
 {
efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
 
if (md)
return md->type;
-   return 0;
+   return -EINVAL;
 }
 
 u64
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 43b96f5..a6a26cc 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -1034,12 +1034,12 @@ void __init efi_enter_virtual_mode(void)
 /*
  * Convenience functions to obtain memory types and attributes
  */
-u32 efi_mem_type(unsigned long phys_addr)
+int efi_mem_type(unsigned long phys_addr)
 {
efi_memory_desc_t *md;
 
if (!efi_enabled(EFI_MEMMAP))
-   return 0;
+   return -ENOTSUPP;
 
for_each_efi_memory_desc(md) {
if ((md->phys_addr <= phys_addr) &&
@@ -1047,7 +1047,7 @@ u32 efi_mem_type(unsigned long phys_addr)
  (md->num_pages << EFI_PAGE_SHIFT
return md->type;
}
-   return 0;
+   return -EINVAL;
 }
 
 static int __init arch_parse_efi_cmdline(char *str)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 504fa85..8bcb271 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -973,7 +973,7 @@ static inline void efi_esrt_init(void) { }
 extern int efi_config_parse_tables(void *config_tables, int count, int sz,
   efi_config_table_type_t *arch_tables);
 extern u64 efi_get_iobase (void);
-extern u32 efi_mem_type (unsigned long phys_addr);
+extern int efi_mem_type(unsigned long phys_addr);
 extern u64 efi_mem_attributes (unsigned long phys_addr);
 extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
 extern int __init efi_uart_console_only (void);

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 13/34] x86/mm: Add support for early encrypt/decrypt of memory

2017-06-07 Thread Tom Lendacky
Add support to be able to either encrypt or decrypt data in place during
the early stages of booting the kernel. This does not change the memory
encryption attribute - it is used for ensuring that data present in either
an encrypted or decrypted memory area is in the proper state (for example
the initrd will have been loaded by the boot loader and will not be
encrypted, but the memory that it resides in is marked as encrypted).

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/mem_encrypt.h |   15 +++
 arch/x86/mm/mem_encrypt.c  |   76 
 2 files changed, 91 insertions(+)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index f1c4c29..7c395cf 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -21,12 +21,27 @@
 
 extern unsigned long sme_me_mask;
 
+void __init sme_early_encrypt(resource_size_t paddr,
+ unsigned long size);
+void __init sme_early_decrypt(resource_size_t paddr,
+ unsigned long size);
+
 void __init sme_early_init(void);
 
 #else  /* !CONFIG_AMD_MEM_ENCRYPT */
 
 #define sme_me_mask0UL
 
+static inline void __init sme_early_encrypt(resource_size_t paddr,
+   unsigned long size)
+{
+}
+
+static inline void __init sme_early_decrypt(resource_size_t paddr,
+   unsigned long size)
+{
+}
+
 static inline void __init sme_early_init(void)
 {
 }
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 8ca93e5..18c0887 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -17,6 +17,9 @@
 
 #include 
 
+#include 
+#include 
+
 /*
  * Since SME related variables are set early in the boot process they must
  * reside in the .data section so as not to be zeroed out when the .bss
@@ -25,6 +28,79 @@
 unsigned long sme_me_mask __section(.data) = 0;
 EXPORT_SYMBOL_GPL(sme_me_mask);
 
+/* Buffer used for early in-place encryption by BSP, no locking needed */
+static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+/*
+ * This routine does not change the underlying encryption setting of the
+ * page(s) that map this memory. It assumes that eventually the memory is
+ * meant to be accessed as either encrypted or decrypted but the contents
+ * are currently not in the desired state.
+ *
+ * This routine follows the steps outlined in the AMD64 Architecture
+ * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place.
+ */
+static void __init __sme_early_enc_dec(resource_size_t paddr,
+  unsigned long size, bool enc)
+{
+   void *src, *dst;
+   size_t len;
+
+   if (!sme_me_mask)
+   return;
+
+   local_flush_tlb();
+   wbinvd();
+
+   /*
+* There are limited number of early mapping slots, so map (at most)
+* one page at time.
+*/
+   while (size) {
+   len = min_t(size_t, sizeof(sme_early_buffer), size);
+
+   /*
+* Create mappings for the current and desired format of
+* the memory. Use a write-protected mapping for the source.
+*/
+   src = enc ? early_memremap_decrypted_wp(paddr, len) :
+   early_memremap_encrypted_wp(paddr, len);
+
+   dst = enc ? early_memremap_encrypted(paddr, len) :
+   early_memremap_decrypted(paddr, len);
+
+   /*
+* If a mapping can't be obtained to perform the operation,
+* then eventual access of that area in the desired mode
+* will cause a crash.
+*/
+   BUG_ON(!src || !dst);
+
+   /*
+* Use a temporary buffer, of cache-line multiple size, to
+* avoid data corruption as documented in the APM.
+*/
+   memcpy(sme_early_buffer, src, len);
+   memcpy(dst, sme_early_buffer, len);
+
+   early_memunmap(dst, len);
+   early_memunmap(src, len);
+
+   paddr += len;
+   size -= len;
+   }
+}
+
+void __init sme_early_encrypt(resource_size_t paddr, unsigned long size)
+{
+   __sme_early_enc_dec(paddr, size, true);
+}
+
+void __init sme_early_decrypt(resource_size_t paddr, unsigned long size)
+{
+   __sme_early_enc_dec(paddr, size, false);
+}
+
 void __init sme_early_init(void)
 {
unsigned int i;

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 14/34] x86/mm: Insure that boot memory areas are mapped properly

2017-06-07 Thread Tom Lendacky
The boot data and command line data are present in memory in a decrypted
state and are copied early in the boot process.  The early page fault
support will map these areas as encrypted, so before attempting to copy
them, add decrypted mappings so the data is accessed properly when copied.

For the initrd, encrypt this data in place. Since the future mapping of the
initrd area will be mapped as encrypted the data will be accessed properly.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/mem_encrypt.h |   11 +
 arch/x86/include/asm/pgtable.h |3 +
 arch/x86/kernel/head64.c   |   30 --
 arch/x86/kernel/setup.c|9 
 arch/x86/mm/mem_encrypt.c  |   77 
 5 files changed, 126 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index 7c395cf..61a7049 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -26,6 +26,9 @@ void __init sme_early_encrypt(resource_size_t paddr,
 void __init sme_early_decrypt(resource_size_t paddr,
  unsigned long size);
 
+void __init sme_map_bootdata(char *real_mode_data);
+void __init sme_unmap_bootdata(char *real_mode_data);
+
 void __init sme_early_init(void);
 
 #else  /* !CONFIG_AMD_MEM_ENCRYPT */
@@ -42,6 +45,14 @@ static inline void __init sme_early_decrypt(resource_size_t 
paddr,
 {
 }
 
+static inline void __init sme_map_bootdata(char *real_mode_data)
+{
+}
+
+static inline void __init sme_unmap_bootdata(char *real_mode_data)
+{
+}
+
 static inline void __init sme_early_init(void)
 {
 }
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 3f789ec..16657e7 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -23,6 +23,9 @@
 #ifndef __ASSEMBLY__
 #include 
 
+extern pgd_t early_level4_pgt[PTRS_PER_PGD];
+int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
+
 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
 void ptdump_walk_pgd_level_checkwx(void);
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 00ae2c5..f1fe5df 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -34,7 +34,6 @@
 /*
  * Manage page tables very early on.
  */
-extern pgd_t early_level4_pgt[PTRS_PER_PGD];
 extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
 static unsigned int __initdata next_early_pgt = 2;
 pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
@@ -48,12 +47,12 @@ static void __init reset_early_page_tables(void)
 }
 
 /* Create a new PMD entry */
-int __init early_make_pgtable(unsigned long address)
+int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
 {
unsigned long physaddr = address - __PAGE_OFFSET;
pgdval_t pgd, *pgd_p;
pudval_t pud, *pud_p;
-   pmdval_t pmd, *pmd_p;
+   pmdval_t *pmd_p;
 
/* Invalid address or early pgt is done ?  */
if (physaddr >= MAXMEM || read_cr3_pa() != 
__pa_nodebug(early_level4_pgt))
@@ -95,12 +94,21 @@ int __init early_make_pgtable(unsigned long address)
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + 
_KERNPG_TABLE;
}
-   pmd = (physaddr & PMD_MASK) + early_pmd_flags;
pmd_p[pmd_index(address)] = pmd;
 
return 0;
 }
 
+int __init early_make_pgtable(unsigned long address)
+{
+   unsigned long physaddr = address - __PAGE_OFFSET;
+   pmdval_t pmd;
+
+   pmd = (physaddr & PMD_MASK) + early_pmd_flags;
+
+   return __early_make_pgtable(address, pmd);
+}
+
 /* Don't add a printk in there. printk relies on the PDA which is not 
initialized 
yet. */
 static void __init clear_bss(void)
@@ -123,6 +131,12 @@ static void __init copy_bootdata(char *real_mode_data)
char * command_line;
unsigned long cmd_line_ptr;
 
+   /*
+* If SME is active, this will create decrypted mappings of the
+* boot data in advance of the copy operations.
+*/
+   sme_map_bootdata(real_mode_data);
+
memcpy(_params, real_mode_data, sizeof boot_params);
sanitize_boot_params(_params);
cmd_line_ptr = get_cmd_line_ptr();
@@ -130,6 +144,14 @@ static void __init copy_bootdata(char *real_mode_data)
command_line = __va(cmd_line_ptr);
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
}
+
+   /*
+* The old boot data is no longer needed and won't be reserved,
+* freeing up that memory for use by the system. If SME is active,
+* we need to remove the mappings that were created so that the
+* memory doesn't remain mapped as decrypted.
+*/
+   sme_unmap_bootdata(real_mode_data);
 }
 
 asmlinkage __visible void __init 

[PATCH v6 10/34] x86, x86/mm, x86/xen, olpc: Use __va() against just the physical address in cr3

2017-06-07 Thread Tom Lendacky
The cr3 register entry can contain the SME encryption bit that indicates
the PGD is encrypted.  The encryption bit should not be used when creating
a virtual address for the PGD table.

Create a new function, read_cr3_pa(), that will extract the physical
address from the cr3 register. This function is then used where a virtual
address of the PGD needs to be created/used from the cr3 register.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/special_insns.h |9 +
 arch/x86/kernel/head64.c |2 +-
 arch/x86/mm/fault.c  |   10 +-
 arch/x86/mm/ioremap.c|2 +-
 arch/x86/platform/olpc/olpc-xo1-pm.c |2 +-
 arch/x86/power/hibernate_64.c|2 +-
 arch/x86/xen/mmu_pv.c|6 +++---
 7 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/special_insns.h 
b/arch/x86/include/asm/special_insns.h
index 12af3e3..d8e8ace 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -234,6 +234,15 @@ static inline void clwb(volatile void *__p)
 
 #define nop() asm volatile ("nop")
 
+static inline unsigned long native_read_cr3_pa(void)
+{
+   return (native_read_cr3() & PHYSICAL_PAGE_MASK);
+}
+
+static inline unsigned long read_cr3_pa(void)
+{
+   return (read_cr3() & PHYSICAL_PAGE_MASK);
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 43b7002..dc03624 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -55,7 +55,7 @@ int __init early_make_pgtable(unsigned long address)
pmdval_t pmd, *pmd_p;
 
/* Invalid address or early pgt is done ?  */
-   if (physaddr >= MAXMEM || read_cr3() != __pa_nodebug(early_level4_pgt))
+   if (physaddr >= MAXMEM || read_cr3_pa() != 
__pa_nodebug(early_level4_pgt))
return -1;
 
 again:
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8ad91a0..2a1fa10c 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -346,7 +346,7 @@ static noinline int vmalloc_fault(unsigned long address)
 * Do _not_ use "current" here. We might be inside
 * an interrupt in the middle of a task switch..
 */
-   pgd_paddr = read_cr3();
+   pgd_paddr = read_cr3_pa();
pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
if (!pmd_k)
return -1;
@@ -388,7 +388,7 @@ static bool low_pfn(unsigned long pfn)
 
 static void dump_pagetable(unsigned long address)
 {
-   pgd_t *base = __va(read_cr3());
+   pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = [pgd_index(address)];
p4d_t *p4d;
pud_t *pud;
@@ -451,7 +451,7 @@ static noinline int vmalloc_fault(unsigned long address)
 * happen within a race in page table update. In the later
 * case just flush:
 */
-   pgd = (pgd_t *)__va(read_cr3()) + pgd_index(address);
+   pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address);
pgd_ref = pgd_offset_k(address);
if (pgd_none(*pgd_ref))
return -1;
@@ -555,7 +555,7 @@ static int bad_address(void *p)
 
 static void dump_pagetable(unsigned long address)
 {
-   pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+   pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = base + pgd_index(address);
p4d_t *p4d;
pud_t *pud;
@@ -700,7 +700,7 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long 
address)
pgd_t *pgd;
pte_t *pte;
 
-   pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+   pgd = __va(read_cr3_pa());
pgd += pgd_index(address);
 
pte = lookup_address_in_pgd(pgd, address, );
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 2a0fa89..e6305dd 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -427,7 +427,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 {
/* Don't assume we're using swapper_pg_dir at this point */
-   pgd_t *base = __va(read_cr3());
+   pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = [pgd_index(addr)];
p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud = pud_offset(p4d, addr);
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c 
b/arch/x86/platform/olpc/olpc-xo1-pm.c
index c5350fd..0668aaf 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -77,7 +77,7 @@ static int xo1_power_state_enter(suspend_state_t pm_state)
 
 asmlinkage __visible int xo1_do_sleep(u8 sleep_state)
 {
-   void *pgd_addr = __va(read_cr3());
+   void *pgd_addr = __va(read_cr3_pa());
 
/* Program wakeup mask (using dword access to CS5536_PM1_EN) */
outl(wakeup_mask << 16, acpi_base + CS5536_PM1_STS);
diff --git a/arch/x86/power/hibernate_64.c 

[PATCH v6 11/34] x86/mm: Provide general kernel support for memory encryption

2017-06-07 Thread Tom Lendacky
Changes to the existing page table macros will allow the SME support to
be enabled in a simple fashion with minimal changes to files that use these
macros.  Since the memory encryption mask will now be part of the regular
pagetable macros, we introduce two new macros (_PAGE_TABLE_NOENC and
_KERNPG_TABLE_NOENC) to allow for early pagetable creation/initialization
without the encryption mask before SME becomes active.  Two new pgprot()
macros are defined to allow setting or clearing the page encryption mask.

The FIXMAP_PAGE_NOCACHE define is introduced for use with MMIO.  SME does
not support encryption for MMIO areas so this define removes the encryption
mask from the page attribute.

Two new macros are introduced (__sme_pa() / __sme_pa_nodebug()) to allow
creating a physical address with the encryption mask.  These are used when
working with the cr3 register so that the PGD can be encrypted. The current
__va() macro is updated so that the virtual address is generated based off
of the physical address without the encryption mask thus allowing the same
virtual address to be generated regardless of whether encryption is enabled
for that physical location or not.

Also, an early initialization function is added for SME.  If SME is active,
this function:
 - Updates the early_pmd_flags so that early page faults create mappings
   with the encryption mask.
 - Updates the __supported_pte_mask to include the encryption mask.
 - Updates the protection_map entries to include the encryption mask so
   that user-space allocations will automatically have the encryption mask
   applied.

Signed-off-by: Tom Lendacky 
---
 arch/x86/boot/compressed/pagetable.c |7 +
 arch/x86/include/asm/fixmap.h|7 +
 arch/x86/include/asm/mem_encrypt.h   |   25 +++
 arch/x86/include/asm/page_types.h|2 +-
 arch/x86/include/asm/pgtable.h   |9 +++
 arch/x86/include/asm/pgtable_types.h |   45 ++
 arch/x86/include/asm/processor.h |3 ++
 arch/x86/kernel/espfix_64.c  |2 +-
 arch/x86/kernel/head64.c |   10 +++-
 arch/x86/kernel/head_64.S|   18 +++---
 arch/x86/mm/kasan_init_64.c  |4 ++-
 arch/x86/mm/mem_encrypt.c|   18 ++
 arch/x86/mm/pageattr.c   |3 ++
 include/asm-generic/mem_encrypt.h|8 ++
 include/asm-generic/pgtable.h|8 ++
 15 files changed, 138 insertions(+), 31 deletions(-)

diff --git a/arch/x86/boot/compressed/pagetable.c 
b/arch/x86/boot/compressed/pagetable.c
index 1d78f17..05455ff 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -15,6 +15,13 @@
 #define __pa(x)  ((unsigned long)(x))
 #define __va(x)  ((void *)((unsigned long)(x)))
 
+/*
+ * The pgtable.h and mm/ident_map.c includes make use of the SME related
+ * information which is not used in the compressed image support. Un-define
+ * the SME support to avoid any compile and link errors.
+ */
+#undef CONFIG_AMD_MEM_ENCRYPT
+
 #include "misc.h"
 
 /* These actually do the work of building the kernel identity maps. */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index b65155c..d9ff226 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -157,6 +157,13 @@ static inline void __set_fixmap(enum fixed_addresses idx,
 }
 #endif
 
+/*
+ * FIXMAP_PAGE_NOCACHE is used for MMIO. Memory encryption is not
+ * supported for MMIO addresses, so make sure that the memory encryption
+ * mask is not part of the page attributes.
+ */
+#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_IO_NOCACHE
+
 #include 
 
 #define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags)
diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index 5008fd9..f1c4c29 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -15,14 +15,22 @@
 
 #ifndef __ASSEMBLY__
 
+#include 
+
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 
 extern unsigned long sme_me_mask;
 
+void __init sme_early_init(void);
+
 #else  /* !CONFIG_AMD_MEM_ENCRYPT */
 
 #define sme_me_mask0UL
 
+static inline void __init sme_early_init(void)
+{
+}
+
 #endif /* CONFIG_AMD_MEM_ENCRYPT */
 
 static inline bool sme_active(void)
@@ -30,6 +38,23 @@ static inline bool sme_active(void)
return !!sme_me_mask;
 }
 
+/*
+ * The __sme_pa() and __sme_pa_nodebug() macros are meant for use when
+ * writing to or comparing values from the cr3 register.  Having the
+ * encryption mask set in cr3 enables the PGD entry to be encrypted and
+ * avoid special case handling of PGD allocations.
+ */
+#define __sme_pa(x)(__pa(x) | sme_me_mask)
+#define __sme_pa_nodebug(x)(__pa_nodebug(x) | sme_me_mask)
+
+/*
+ * The __sme_set() and __sme_clr() macros are useful for adding or removing
+ * the encryption mask from a value (e.g. when dealing with 

[PATCH v6 07/34] x86/mm: Don't use phys_to_virt in ioremap() if SME is active

2017-06-07 Thread Tom Lendacky
Currently there is a check if the address being mapped is in the ISA
range (is_ISA_range()), and if it is then phys_to_virt() is used to
perform the mapping.  When SME is active, however, this will result
in the mapping having the encryption bit set when it is expected that
an ioremap() should not have the encryption bit set. So only use the
phys_to_virt() function if SME is not active

Signed-off-by: Tom Lendacky 
---
 arch/x86/mm/ioremap.c |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index bbc558b..2a0fa89 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "physaddr.h"
 
@@ -106,9 +107,11 @@ static void __iomem *__ioremap_caller(resource_size_t 
phys_addr,
}
 
/*
-* Don't remap the low PCI/ISA area, it's always mapped..
+* Don't remap the low PCI/ISA area, it's always mapped.
+*   But if SME is active, skip this so that the encryption bit
+*   doesn't get set.
 */
-   if (is_ISA_range(phys_addr, last_addr))
+   if (is_ISA_range(phys_addr, last_addr) && !sme_active())
return (__force void __iomem *)phys_to_virt(phys_addr);
 
/*

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 08/34] x86/mm: Add support to enable SME in early boot processing

2017-06-07 Thread Tom Lendacky
Add support to the early boot code to use Secure Memory Encryption (SME).
Since the kernel has been loaded into memory in a decrypted state, encrypt
the kernel in place and update the early pagetables with the memory
encryption mask so that new pagetable entries will use memory encryption.

The routines to set the encryption mask and perform the encryption are
stub routines for now with functionality to be added in a later patch.

Because of the need to have the routines available to head_64.S, the
mem_encrypt.c is always built and #ifdefs in mem_encrypt.c will provide
functionality or stub routines depending on CONFIG_AMD_MEM_ENCRYPT.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kernel/head_64.S |   61 -
 arch/x86/mm/Makefile  |4 +--
 arch/x86/mm/mem_encrypt.c |   26 +++
 3 files changed, 86 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ac9d327..222630c 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -91,6 +91,23 @@ startup_64:
jnz bad_address
 
/*
+* Activate Secure Memory Encryption (SME), if supported and enabled.
+* The real_mode_data address is in %rsi and that register can be
+* clobbered by the called function so be sure to save it.
+* Save the returned mask in %r12 for later use.
+*/
+   push%rsi
+   callsme_enable
+   pop %rsi
+   movq%rax, %r12
+
+   /*
+* Add the memory encryption mask to %rbp to include it in the page
+* table fixups.
+*/
+   addq%r12, %rbp
+
+   /*
 * Fixup the physical addresses in the page table
 */
addq%rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip)
@@ -113,6 +130,7 @@ startup_64:
shrq$PGDIR_SHIFT, %rax
 
leaq(PAGE_SIZE + _KERNPG_TABLE)(%rbx), %rdx
+   addq%r12, %rdx
movq%rdx, 0(%rbx,%rax,8)
movq%rdx, 8(%rbx,%rax,8)
 
@@ -129,6 +147,7 @@ startup_64:
movq%rdi, %rax
shrq$PMD_SHIFT, %rdi
addq$(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
+   addq%r12, %rax
leaq(_end - 1)(%rip), %rcx
shrq$PMD_SHIFT, %rcx
subq%rdi, %rcx
@@ -142,6 +161,12 @@ startup_64:
decl%ecx
jnz 1b
 
+   /*
+* Determine if any fixups are required. This includes fixups
+* based on where the kernel was loaded and whether SME is
+* active. If %rbp is zero, then we can skip both the fixups
+* and the call to encrypt the kernel.
+*/
test %rbp, %rbp
jz .Lskip_fixup
 
@@ -162,11 +187,30 @@ startup_64:
cmp %r8, %rdi
jne 1b
 
-   /* Fixup phys_base */
+   /*
+* Fixup phys_base - remove the memory encryption mask from %rbp
+* to obtain the true physical address.
+*/
+   subq%r12, %rbp
addq%rbp, phys_base(%rip)
 
+   /*
+* Encrypt the kernel if SME is active.
+* The real_mode_data address is in %rsi and that register can be
+* clobbered by the called function so be sure to save it.
+*/
+   push%rsi
+   callsme_encrypt_kernel
+   pop %rsi
+
 .Lskip_fixup:
+   /*
+* The encryption mask is in %r12. We ADD this to %rax to be sure
+* that the encryption mask is part of the value that will be
+* stored in %cr3.
+*/
movq$(early_level4_pgt - __START_KERNEL_map), %rax
+   addq%r12, %rax
jmp 1f
 ENTRY(secondary_startup_64)
/*
@@ -186,7 +230,20 @@ ENTRY(secondary_startup_64)
/* Sanitize CPU configuration */
call verify_cpu
 
-   movq$(init_level4_pgt - __START_KERNEL_map), %rax
+   /*
+* Get the SME encryption mask.
+*  The encryption mask will be returned in %rax so we do an ADD
+*  below to be sure that the encryption mask is part of the
+*  value that will stored in %cr3.
+*
+* The real_mode_data address is in %rsi and that register can be
+* clobbered by the called function so be sure to save it.
+*/
+   push%rsi
+   callsme_get_me_mask
+   pop %rsi
+
+   addq$(init_level4_pgt - __START_KERNEL_map), %rax
 1:
 
/* Enable PAE mode and PGE */
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 44d4d21..88ee454 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -2,7 +2,7 @@
 KCOV_INSTRUMENT_tlb.o  := n
 
 obj-y  :=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o 
\
-   pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o
+   pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o mem_encrypt.o
 
 # Make sure __phys_addr has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)

[PATCH v6 06/34] x86/mm: Add Secure Memory Encryption (SME) support

2017-06-07 Thread Tom Lendacky
Add support for Secure Memory Encryption (SME). This initial support
provides a Kconfig entry to build the SME support into the kernel and
defines the memory encryption mask that will be used in subsequent
patches to mark pages as encrypted.

Signed-off-by: Tom Lendacky 
---
 arch/x86/Kconfig   |   22 ++
 arch/x86/include/asm/mem_encrypt.h |   35 +++
 arch/x86/mm/Makefile   |1 +
 arch/x86/mm/mem_encrypt.c  |   21 +
 include/asm-generic/mem_encrypt.h  |   27 +++
 include/linux/mem_encrypt.h|   18 ++
 6 files changed, 124 insertions(+)
 create mode 100644 arch/x86/include/asm/mem_encrypt.h
 create mode 100644 arch/x86/mm/mem_encrypt.c
 create mode 100644 include/asm-generic/mem_encrypt.h
 create mode 100644 include/linux/mem_encrypt.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4ccfacc..11f2fdb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1407,6 +1407,28 @@ config X86_DIRECT_GBPAGES
  supports them), so don't confuse the user by printing
  that we have them enabled.
 
+config AMD_MEM_ENCRYPT
+   bool "AMD Secure Memory Encryption (SME) support"
+   depends on X86_64 && CPU_SUP_AMD
+   ---help---
+ Say yes to enable support for the encryption of system memory.
+ This requires an AMD processor that supports Secure Memory
+ Encryption (SME).
+
+config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
+   bool "Activate AMD Secure Memory Encryption (SME) by default"
+   default y
+   depends on AMD_MEM_ENCRYPT
+   ---help---
+ Say yes to have system memory encrypted by default if running on
+ an AMD processor that supports Secure Memory Encryption (SME).
+
+ If set to Y, then the encryption of system memory can be
+ deactivated with the mem_encrypt=off command line option.
+
+ If set to N, then the encryption of system memory can be
+ activated with the mem_encrypt=on command line option.
+
 # Common NUMA Features
 config NUMA
bool "Numa Memory Allocation and Scheduler Support"
diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
new file mode 100644
index 000..5008fd9
--- /dev/null
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -0,0 +1,35 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __X86_MEM_ENCRYPT_H__
+#define __X86_MEM_ENCRYPT_H__
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+
+extern unsigned long sme_me_mask;
+
+#else  /* !CONFIG_AMD_MEM_ENCRYPT */
+
+#define sme_me_mask0UL
+
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
+
+static inline bool sme_active(void)
+{
+   return !!sme_me_mask;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 96d2b84..44d4d21 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -39,3 +39,4 @@ obj-$(CONFIG_X86_INTEL_MPX)   += mpx.o
 obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
 obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
 
+obj-$(CONFIG_AMD_MEM_ENCRYPT)  += mem_encrypt.o
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
new file mode 100644
index 000..b99d469
--- /dev/null
+++ b/arch/x86/mm/mem_encrypt.c
@@ -0,0 +1,21 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+
+/*
+ * Since SME related variables are set early in the boot process they must
+ * reside in the .data section so as not to be zeroed out when the .bss
+ * section is later cleared.
+ */
+unsigned long sme_me_mask __section(.data) = 0;
+EXPORT_SYMBOL_GPL(sme_me_mask);
diff --git a/include/asm-generic/mem_encrypt.h 
b/include/asm-generic/mem_encrypt.h
new file mode 100644
index 000..563c918
--- /dev/null
+++ b/include/asm-generic/mem_encrypt.h
@@ -0,0 +1,27 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_GENERIC_MEM_ENCRYPT_H__
+#define __ASM_GENERIC_MEM_ENCRYPT_H__
+
+#ifndef __ASSEMBLY__
+

[PATCH v6 04/34] x86/CPU/AMD: Add the Secure Memory Encryption CPU feature

2017-06-07 Thread Tom Lendacky
Update the CPU features to include identifying and reporting on the
Secure Memory Encryption (SME) feature.  SME is identified by CPUID
0x801f, but requires BIOS support to enable it (set bit 23 of
MSR_K8_SYSCFG).  Only show the SME feature as available if reported by
CPUID and enabled by BIOS.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/cpufeatures.h |1 +
 arch/x86/include/asm/msr-index.h   |2 ++
 arch/x86/kernel/cpu/amd.c  |   13 +
 arch/x86/kernel/cpu/scattered.c|1 +
 4 files changed, 17 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 2701e5f..2b692df 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -196,6 +196,7 @@
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME( 7*32+10) /* AMD Secure Memory 
Encryption */
 
 #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number 
*/
 #define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 18b1623..460ac01 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -352,6 +352,8 @@
 #define MSR_K8_TOP_MEM10xc001001a
 #define MSR_K8_TOP_MEM20xc001001d
 #define MSR_K8_SYSCFG  0xc0010010
+#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT  23
+#define MSR_K8_SYSCFG_MEM_ENCRYPT  BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
 #define MSR_K8_INT_PENDING_MSG 0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK0x1800
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb5abe8..c47ceee 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -611,6 +611,19 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 */
if (cpu_has_amd_erratum(c, amd_erratum_400))
set_cpu_bug(c, X86_BUG_AMD_E400);
+
+   /*
+* BIOS support is required for SME. If BIOS has not enabled SME
+* then don't advertise the feature (set in scattered.c)
+*/
+   if (cpu_has(c, X86_FEATURE_SME)) {
+   u64 msr;
+
+   /* Check if SME is enabled */
+   rdmsrl(MSR_K8_SYSCFG, msr);
+   if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+   clear_cpu_cap(c, X86_FEATURE_SME);
+   }
 }
 
 static void init_amd_k8(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 23c2350..05459ad 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,6 +31,7 @@ struct cpuid_bit {
{ X86_FEATURE_HW_PSTATE,CPUID_EDX,  7, 0x8007, 0 },
{ X86_FEATURE_CPB,  CPUID_EDX,  9, 0x8007, 0 },
{ X86_FEATURE_PROC_FEEDBACK,CPUID_EDX, 11, 0x8007, 0 },
+   { X86_FEATURE_SME,  CPUID_EAX,  0, 0x801f, 0 },
{ 0, 0, 0, 0, 0 }
 };
 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 05/34] x86/CPU/AMD: Handle SME reduction in physical address size

2017-06-07 Thread Tom Lendacky
When System Memory Encryption (SME) is enabled, the physical address
space is reduced. Adjust the x86_phys_bits value to reflect this
reduction.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kernel/cpu/amd.c |   10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c47ceee..5bdcbd4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -613,15 +613,19 @@ static void early_init_amd(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_AMD_E400);
 
/*
-* BIOS support is required for SME. If BIOS has not enabled SME
-* then don't advertise the feature (set in scattered.c)
+* BIOS support is required for SME. If BIOS has enabld SME then
+* adjust x86_phys_bits by the SME physical address space reduction
+* value. If BIOS has not enabled SME then don't advertise the
+* feature (set in scattered.c).
 */
if (cpu_has(c, X86_FEATURE_SME)) {
u64 msr;
 
/* Check if SME is enabled */
rdmsrl(MSR_K8_SYSCFG, msr);
-   if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+   if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT)
+   c->x86_phys_bits -= (cpuid_ebx(0x801f) >> 6) & 0x3f;
+   else
clear_cpu_cap(c, X86_FEATURE_SME);
}
 }

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 03/34] x86, mpparse, x86/acpi, x86/PCI, x86/dmi, SFI: Use memremap for RAM mappings

2017-06-07 Thread Tom Lendacky
The ioremap() function is intended for mapping MMIO. For RAM, the
memremap() function should be used. Convert calls from ioremap() to
memremap() when re-mapping RAM.

This will be used later by SME to control how the encryption mask is
applied to memory mappings, with certain memory locations being mapped
decrypted vs encrypted.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/dmi.h   |8 
 arch/x86/kernel/acpi/boot.c  |6 +++---
 arch/x86/kernel/kdebugfs.c   |   34 +++---
 arch/x86/kernel/ksysfs.c |   28 ++--
 arch/x86/kernel/mpparse.c|   10 +-
 arch/x86/pci/common.c|4 ++--
 drivers/firmware/dmi-sysfs.c |5 +++--
 drivers/firmware/pcdp.c  |4 ++--
 drivers/sfi/sfi_core.c   |   22 +++---
 9 files changed, 55 insertions(+), 66 deletions(-)

diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index 3c69fed..a8e15b0 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -13,9 +13,9 @@ static __always_inline __init void *dmi_alloc(unsigned len)
 }
 
 /* Use early IO mappings for DMI because it's initialized early */
-#define dmi_early_remapearly_ioremap
-#define dmi_early_unmapearly_iounmap
-#define dmi_remap  ioremap_cache
-#define dmi_unmap  iounmap
+#define dmi_early_remapearly_memremap
+#define dmi_early_unmapearly_memunmap
+#define dmi_remap(_x, _l)  memremap(_x, _l, MEMREMAP_WB)
+#define dmi_unmap(_x)  memunmap(_x)
 
 #endif /* _ASM_X86_DMI_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6bb6806..850160a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -115,7 +115,7 @@
 #defineACPI_INVALID_GSIINT_MIN
 
 /*
- * This is just a simple wrapper around early_ioremap(),
+ * This is just a simple wrapper around early_memremap(),
  * with sanity checks for phys == 0 and size == 0.
  */
 char *__init __acpi_map_table(unsigned long phys, unsigned long size)
@@ -124,7 +124,7 @@ char *__init __acpi_map_table(unsigned long phys, unsigned 
long size)
if (!phys || !size)
return NULL;
 
-   return early_ioremap(phys, size);
+   return early_memremap(phys, size);
 }
 
 void __init __acpi_unmap_table(char *map, unsigned long size)
@@ -132,7 +132,7 @@ void __init __acpi_unmap_table(char *map, unsigned long 
size)
if (!map || !size)
return;
 
-   early_iounmap(map, size);
+   early_memunmap(map, size);
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 38b6458..fd6f8fb 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -33,7 +33,6 @@ static ssize_t setup_data_read(struct file *file, char __user 
*user_buf,
struct setup_data_node *node = file->private_data;
unsigned long remain;
loff_t pos = *ppos;
-   struct page *pg;
void *p;
u64 pa;
 
@@ -47,18 +46,13 @@ static ssize_t setup_data_read(struct file *file, char 
__user *user_buf,
count = node->len - pos;
 
pa = node->paddr + sizeof(struct setup_data) + pos;
-   pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT);
-   if (PageHighMem(pg)) {
-   p = ioremap_cache(pa, count);
-   if (!p)
-   return -ENXIO;
-   } else
-   p = __va(pa);
+   p = memremap(pa, count, MEMREMAP_WB);
+   if (!p)
+   return -ENOMEM;
 
remain = copy_to_user(user_buf, p, count);
 
-   if (PageHighMem(pg))
-   iounmap(p);
+   memunmap(p);
 
if (remain)
return -EFAULT;
@@ -109,7 +103,6 @@ static int __init create_setup_data_nodes(struct dentry 
*parent)
struct setup_data *data;
int error;
struct dentry *d;
-   struct page *pg;
u64 pa_data;
int no = 0;
 
@@ -126,16 +119,12 @@ static int __init create_setup_data_nodes(struct dentry 
*parent)
goto err_dir;
}
 
-   pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
-   if (PageHighMem(pg)) {
-   data = ioremap_cache(pa_data, sizeof(*data));
-   if (!data) {
-   kfree(node);
-   error = -ENXIO;
-   goto err_dir;
-   }
-   } else
-   data = __va(pa_data);
+   data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
+   if (!data) {
+   kfree(node);
+   error = -ENOMEM;
+   goto err_dir;
+   }
 
node->paddr = pa_data;
node->type = 

[PATCH v6 02/34] x86/mm/pat: Set write-protect cache mode for full PAT support

2017-06-07 Thread Tom Lendacky
For processors that support PAT, set the write-protect cache mode
(_PAGE_CACHE_MODE_WP) entry to the actual write-protect value (x05).

Acked-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 arch/x86/mm/pat.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 9b78685..6753d9c 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -295,7 +295,7 @@ static void init_cache_modes(void)
  * pat_init - Initialize PAT MSR and PAT table
  *
  * This function initializes PAT MSR and PAT table with an OS-defined value
- * to enable additional cache attributes, WC and WT.
+ * to enable additional cache attributes, WC, WT and WP.
  *
  * This function must be called on all CPUs using the specific sequence of
  * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
@@ -356,7 +356,7 @@ void pat_init(void)
 *  0102UC-: _PAGE_CACHE_MODE_UC_MINUS
 *  0113UC : _PAGE_CACHE_MODE_UC
 *  1004WB : Reserved
-*  1015WC : Reserved
+*  1015WP : _PAGE_CACHE_MODE_WP
 *  1106UC-: Reserved
 *  1117WT : _PAGE_CACHE_MODE_WT
 *
@@ -364,7 +364,7 @@ void pat_init(void)
 * corresponding types in the presence of PAT errata.
 */
pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
- PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, WT);
+ PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT);
}
 
if (!boot_cpu_done) {

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 00/34] x86: Secure Memory Encryption (AMD)

2017-06-07 Thread Tom Lendacky
This patch series provides support for AMD's new Secure Memory Encryption (SME)
feature.

SME can be used to mark individual pages of memory as encrypted through the
page tables. A page of memory that is marked encrypted will be automatically
decrypted when read from DRAM and will be automatically encrypted when
written to DRAM. Details on SME can found in the links below.

The SME feature is identified through a CPUID function and enabled through
the SYSCFG MSR. Once enabled, page table entries will determine how the
memory is accessed. If a page table entry has the memory encryption mask set,
then that memory will be accessed as encrypted memory. The memory encryption
mask (as well as other related information) is determined from settings
returned through the same CPUID function that identifies the presence of the
feature.

The approach that this patch series takes is to encrypt everything possible
starting early in the boot where the kernel is encrypted. Using the page
table macros the encryption mask can be incorporated into all page table
entries and page allocations. By updating the protection map, userspace
allocations are also marked encrypted. Certain data must be accounted for
as having been placed in memory before SME was enabled (EFI, initrd, etc.)
and accessed accordingly.

This patch series is a pre-cursor to another AMD processor feature called
Secure Encrypted Virtualization (SEV). The support for SEV will build upon
the SME support and will be submitted later. Details on SEV can be found
in the links below.

The following links provide additional detail:

AMD Memory Encryption whitepaper:
   
http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf

AMD64 Architecture Programmer's Manual:
   http://support.amd.com/TechDocs/24593.pdf
   SME is section 7.10
   SEV is section 15.34

---

This patch series is based off of the master branch of tip.
  Commit 53614fbd7961 ("Merge branch 'WIP.x86/fpu'")

Source code is also available at https://github.com/codomania/tip/tree/sme-v6


Still to do:
- Kdump support, including using memremap() instead of ioremap_cache()

Changes since v5:
- Added support for 5-level paging
- Added IOMMU support
- Created a generic asm/mem_encrypt.h in order to remove a bunch of
  #ifndef/#define entries
- Removed changes to the __va() macro and defined a function to return
  the true physical address in cr3
- Removed sysfs support as it was determined not to be needed
- General code cleanup based on feedback
- General cleanup of patch subjects and descriptions

Changes since v4:
- Re-worked mapping of setup data to not use a fixed list. Rather, check
  dynamically whether the requested early_memremap()/memremap() call
  needs to be mapped decrypted.
- Moved SME cpu feature into scattered features
- Moved some declarations into header files
- Cleared the encryption mask from the __PHYSICAL_MASK so that users
  of macros such as pmd_pfn_mask() don't have to worry/know about the
  encryption mask
- Updated some return types and values related to EFI and e820 functions
  so that an error could be returned
- During cpu shutdown, removed cache disabling and added a check for kexec
  in progress to use wbinvd followed immediately by halt in order to avoid
  any memory corruption
- Update how persistent memory is identified
- Added a function to find command line arguments and their values
- Added sysfs support
- General code cleanup based on feedback
- General cleanup of patch subjects and descriptions


Changes since v3:
- Broke out some of the patches into smaller individual patches
- Updated Documentation
- Added a message to indicate why the IOMMU was disabled
- Updated CPU feature support for SME by taking into account whether
  BIOS has enabled SME
- Eliminated redundant functions
- Added some warning messages for DMA usage of bounce buffers when SME
  is active
- Added support for persistent memory
- Added support to determine when setup data is being mapped and be sure
  to map it un-encrypted
- Added CONFIG support to set the default action of whether to activate
  SME if it is supported/enabled
- Added support for (re)booting with kexec

Changes since v2:
- Updated Documentation
- Make the encryption mask available outside of arch/x86 through a
  standard include file
- Conversion of assembler routines to C where possible (not everything
  could be converted, e.g. the routine that does the actual encryption
  needs to be copied into a safe location and it is difficult to
  determine the actual length of the function in order to copy it)
- Fix SME feature use of scattered CPUID feature
- Creation of SME specific functions for things like encrypting
  the setup data, ramdisk, etc.
- New take on early_memremap / memremap encryption support
- Additional support for accessing video buffers (fbdev/gpu) as
  un-encrypted
- Disable IOMMU for now - need to investigate further in relation to
  how it needs to be programmed 

[PATCH v6 01/34] x86: Document AMD Secure Memory Encryption (SME)

2017-06-07 Thread Tom Lendacky
Create a Documentation entry to describe the AMD Secure Memory
Encryption (SME) feature and add documentation for the mem_encrypt=
kernel parameter.

Reviewed-by: Borislav Petkov 
Signed-off-by: Tom Lendacky 
---
 Documentation/admin-guide/kernel-parameters.txt |   11 
 Documentation/x86/amd-memory-encryption.txt |   68 +++
 2 files changed, 79 insertions(+)
 create mode 100644 Documentation/x86/amd-memory-encryption.txt

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 4e4c340..abb65da 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2188,6 +2188,17 @@
memory contents and reserves bad memory
regions that are detected.
 
+   mem_encrypt=[X86-64] AMD Secure Memory Encryption (SME) control
+   Valid arguments: on, off
+   Default (depends on kernel configuration option):
+ on  (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y)
+ off (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=n)
+   mem_encrypt=on: Activate SME
+   mem_encrypt=off:Do not activate SME
+
+   Refer to Documentation/x86/amd-memory-encryption.txt
+   for details on when memory encryption can be activated.
+
mem_sleep_default=  [SUSPEND] Default system suspend mode:
s2idle  - Suspend-To-Idle
shallow - Power-On Suspend or equivalent (if supported)
diff --git a/Documentation/x86/amd-memory-encryption.txt 
b/Documentation/x86/amd-memory-encryption.txt
new file mode 100644
index 000..f512ab7
--- /dev/null
+++ b/Documentation/x86/amd-memory-encryption.txt
@@ -0,0 +1,68 @@
+Secure Memory Encryption (SME) is a feature found on AMD processors.
+
+SME provides the ability to mark individual pages of memory as encrypted using
+the standard x86 page tables.  A page that is marked encrypted will be
+automatically decrypted when read from DRAM and encrypted when written to
+DRAM.  SME can therefore be used to protect the contents of DRAM from physical
+attacks on the system.
+
+A page is encrypted when a page table entry has the encryption bit set (see
+below on how to determine its position).  The encryption bit can also be
+specified in the cr3 register, allowing the PGD table to be encrypted. Each
+successive level of page tables can also be encrypted by setting the encryption
+bit in the page table entry that points to the next table. This allows the full
+page table hierarchy to be encrypted. Note, this means that just because the
+encryption bit is set in cr3, doesn't imply the full hierarchy is encyrpted.
+Each page table entry in the hierarchy needs to have the encryption bit set to
+achieve that. So, theoretically, you could have the encryption bit set in cr3
+so that the PGD is encrypted, but not set the encryption bit in the PGD entry
+for a PUD which results in the PUD pointed to by that entry to not be
+encrypted.
+
+Support for SME can be determined through the CPUID instruction. The CPUID
+function 0x801f reports information related to SME:
+
+   0x801f[eax]:
+   Bit[0] indicates support for SME
+   0x801f[ebx]:
+   Bits[5:0]  pagetable bit number used to activate memory
+  encryption
+   Bits[11:6] reduction in physical address space, in bits, when
+  memory encryption is enabled (this only affects
+  system physical addresses, not guest physical
+  addresses)
+
+If support for SME is present, MSR 0xc00100010 (MSR_K8_SYSCFG) can be used to
+determine if SME is enabled and/or to enable memory encryption:
+
+   0xc0010010:
+   Bit[23]   0 = memory encryption features are disabled
+ 1 = memory encryption features are enabled
+
+Linux relies on BIOS to set this bit if BIOS has determined that the reduction
+in the physical address space as a result of enabling memory encryption (see
+CPUID information above) will not conflict with the address space resource
+requirements for the system.  If this bit is not set upon Linux startup then
+Linux itself will not set it and memory encryption will not be possible.
+
+The state of SME in the Linux kernel can be documented as follows:
+   - Supported:
+ The CPU supports SME (determined through CPUID instruction).
+
+   - Enabled:
+ Supported and bit 23 of MSR_K8_SYSCFG is set.
+
+   - Active:
+ Supported, Enabled and the Linux kernel is actively applying
+ the encryption bit to page table entries (the SME mask in the
+ kernel is non-zero).
+
+SME can also be enabled and 

Re: [PATCH 03/12] intel-ipu3: Add DMA API implementation

2017-06-07 Thread Alan Cox
> > +   struct ipu3_mmu *mmu = to_ipu3_mmu(dev);
> > +   dma_addr_t daddr = iommu_iova_to_phys(mmu->domain, dma_handle);
> > +
> > +   clflush_cache_range(phys_to_virt(daddr), size);  
> 
> You might need to consider another IOMMU on the way here. Generally,
> given that daddr is your MMU DMA address (not necessarily CPU physical
> address), you should be able to call
> 
> dma_sync_single_for_cpu(, daddr, size, dir)

Te system IOMMU (if enabled) may be cache coherent - and on x86 would be,
so it doesn't think it needs to do anything for cache synchronization
and the dma_sync won't actually do any work.
 
Alan
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFCv2 2/2] iommu/arm-smmu-v3:Enable ACPI based HiSilicon erratum 161010801

2017-06-07 Thread Lorenzo Pieralisi
On Tue, Jun 06, 2017 at 03:01:36PM +, Shameerali Kolothum Thodi wrote:
> Hi Lorenzo,
> 
> > -Original Message-
> > From: Lorenzo Pieralisi [mailto:lorenzo.pieral...@arm.com]
> > Sent: Tuesday, June 06, 2017 2:56 PM
> > To: Shameerali Kolothum Thodi
> > Cc: marc.zyng...@arm.com; sudeep.ho...@arm.com; will.dea...@arm.com;
> > robin.mur...@arm.com; hanjun@linaro.org; Gabriele Paoloni; John
> > Garry; iommu@lists.linux-foundation.org; linux-arm-
> > ker...@lists.infradead.org; linux-a...@vger.kernel.org; de...@acpica.org;
> > Linuxarm; Wangzhou (B); Guohanjun (Hanjun Guo)
> > Subject: Re: [RFCv2 2/2] iommu/arm-smmu-v3:Enable ACPI based HiSilicon
> > erratum 161010801
> > 
> > On Wed, May 31, 2017 at 03:32:13PM +0100, shameer wrote:
> > > The HiSilicon erratum 161010801 describes the limitation of HiSilicon
> > > platforms Hip06/Hip07 to support the SMMU mappings for MSI
> > transactions.
> > >
> > > On these platforms GICv3 ITS translator is presented with the deviceID
> > > by extending the MSI payload data to 64 bits to include the deviceID.
> > > Hence, the PCIe controller on this platforms has to differentiate the
> > > MSI payload against other DMA payload and has to modify the MSI
> > payload.
> > > This basically makes it difficult for this platforms to have a SMMU
> > > translation for MSI.
> > >
> > > This patch implements a ACPI table based quirk to reserve the hw msi
> > > regions in the smmu-v3 driver which means these address regions will
> > > not be translated and will be excluded from iova allocations.
> > >
> > > The HW ITS address region associated with the dev is retrieved using a
> > > new helper function added in the IORT code.
> > 
> > Remove or rephrase last paragraph, it reads as if you are adding an IORT
> > helper function in this patch but you actually aren't.
> 
> Thanks for going through this patch series. I will remove this in next 
> version.
> 
> > > Signed-off-by: shameer 
> > > ---
> > >  drivers/iommu/arm-smmu-v3.c | 49
> > > ++---
> > >  1 file changed, 46 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-
> > v3.c
> > > index abe4b88..3767526 100644
> > > --- a/drivers/iommu/arm-smmu-v3.c
> > > +++ b/drivers/iommu/arm-smmu-v3.c
> > > @@ -597,6 +597,7 @@ struct arm_smmu_device {
> > >   u32 features;
> > >
> > >  #define ARM_SMMU_OPT_SKIP_PREFETCH   (1 << 0)
> > > +#define ARM_SMMU_OPT_RESV_HW_MSI (1 << 1)
> > >   u32 options;
> > >
> > >   struct arm_smmu_cmdqcmdq;
> > > @@ -1755,6 +1756,38 @@ static bool arm_smmu_sid_in_range(struct
> > > arm_smmu_device *smmu, u32 sid)
> > >
> > >  static struct iommu_ops arm_smmu_ops;
> > >
> > > +#ifdef CONFIG_ACPI
> > > +static struct iommu_resv_region *arm_smmu_acpi_alloc_hw_msi(struct
> > > +device *dev) {
> > > + struct iommu_resv_region *region;
> > > + struct  irq_domain *irq_dom;
> > > + int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
> > > + u64 base;
> > 
> > phys_addr_t
> 
> Ok.
> 
> > > + irq_dom = pci_msi_get_device_domain(to_pci_dev(dev));
> > > + if (irq_dom) {
> > > + int ret;
> > > + u32 rid;
> > > +
> > > + rid = pci_msi_domain_get_msi_rid(irq_dom,
> > to_pci_dev(dev));
> > > + ret = iort_dev_find_its_base(dev, rid, 0, );
> > 
> > Well, here we use ITS id 0 which is fine as long as code in IORT uses the 
> > same
> > policy for getting the irq_domain (ie we want to reserve the ITS address
> > space that is actually used by the device to send IRQs not a a different 
> > one) it
> > is just a heads-up because I find this confusing.
> 
> Ok. Just to make it clear, 0 is the index into the ITS identifier list.
> I noted that iort_get_device_domain() uses index 0 while retrieving the ITS 
> identifier.
> May be use the same approach here as well? ie, remove the index from function 
> call?
> 
> I am not sure, how we can get the index info  though theoretically It is 
> possible for
> the ITS group node having multiple ITSs.

Yes, it would be ideal to avoid the look-up through the ITS index and
just reuse the ITS node associated with the MSI domain because I do not
want this quirk to force the ITS domain allocation policy (what I mean
I do not want to be tied to index 0 if for any reason we change
the allocation in IORT for normal ITS<->device mapping).

I will have a further look to see if we can improve the code to
this extent.

> > > + if (!ret) {
> > > + dev_info(dev, "SMMUv3:HW MSI resv addr
> > 0x%pa\n", );
> > > + region = iommu_alloc_resv_region(base, SZ_128K,
> > > +  prot,
> > IOMMU_RESV_MSI);
> > > + return region;
> > > + }
> > > + }
> > > +
> > > + return NULL;
> > > +}
> > > +#else
> > > +static struct iommu_resv_region 

Re: [PATCH v2 2/2] acpi/iort: numa: Add numa node mapping for smmuv3 devices

2017-06-07 Thread Lorenzo Pieralisi
On Tue, Jun 06, 2017 at 04:17:45PM +0530, Ganapatrao Kulkarni wrote:
> Add code to parse proximity domain in SMMUv3 IORT table to
> set numa node mapping for smmuv3 devices.
> 
> Signed-off-by: Ganapatrao Kulkarni 
> ---
>  drivers/acpi/arm64/iort.c | 20 
>  1 file changed, 20 insertions(+)
> 
> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> index bba2b59..b4f328f 100644
> --- a/drivers/acpi/arm64/iort.c
> +++ b/drivers/acpi/arm64/iort.c
> @@ -882,6 +882,23 @@ static bool __init arm_smmu_v3_is_coherent(struct 
> acpi_iort_node *node)
>   return smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE;
>  }
>  
> +/*
> + * set numa proximity domain for smmv3 device
> + */
> +static void  __init iort_set_proximity(struct acpi_iort_node *node,
> + struct device *dev)
> +{
> + struct acpi_iort_smmu_v3 *smmu;
> +
> + smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
> + if (smmu->flags & ACPI_IORT_SMMU_V3_PXM_VALID) {
> + set_dev_node(dev, acpi_map_pxm_to_node(smmu->pxm));
> + pr_info("SMMUV3[%llx] Mapped to Proximity domain %d\n",
> + smmu->base_address,
> + smmu->pxm);
> + }
> +}
> +
>  static int __init arm_smmu_count_resources(struct acpi_iort_node *node)
>  {
>   struct acpi_iort_smmu *smmu;
> @@ -1002,6 +1019,9 @@ static int __init iort_add_smmu_platform_device(struct 
> acpi_iort_node *node)
>   if (!pdev)
>   return -ENOMEM;
>  
> + if (node->type == ACPI_IORT_NODE_SMMU_V3)
> + iort_set_proximity(node, >dev);

Nit: while at it you may add an ops hook to set the proximity
(NULL for SMMU v2) and call it if present, it is just to make
the code adding devices more uniform.

Who is queueing these patches ? I am asking to make sure we sort
the ACPICA dependency (there are other IORT/ACPICA patches to consider
too).

Thanks,
Lorenzo

>   count = ops->iommu_count_resources(node);
>  
>   r = kcalloc(count, sizeof(*r), GFP_KERNEL);
> -- 
> 1.8.1.4
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 5/7] iommu/amd: Add flush counters to struct dma_ops_domain

2017-06-07 Thread Joerg Roedel
From: Joerg Roedel 

The counters are increased every time the TLB for a given
domain is flushed. We also store the current value of that
counter into newly added entries of the flush-queue, so that
we can tell wheter this entry is already flushed.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/amd_iommu.c | 52 +++
 1 file changed, 52 insertions(+)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 9aa2735..1ad2866 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -139,6 +139,7 @@ static void detach_device(struct device *dev);
 struct flush_queue_entry {
unsigned long iova_pfn;
unsigned long pages;
+   u64 counter; /* Flush counter when this entry was added to the queue */
 };
 
 struct flush_queue {
@@ -158,6 +159,27 @@ struct dma_ops_domain {
struct iova_domain iovad;
 
struct flush_queue __percpu *flush_queue;
+
+   /*
+* We need two counter here to be race-free wrt. IOTLB flushing and
+* adding entries to the flush queue.
+*
+* The flush_start_cnt is incremented _before_ the IOTLB flush starts.
+* New entries added to the flush ring-buffer get their 'counter' value
+* from here. This way we can make sure that entries added to the queue
+* (or other per-cpu queues of the same domain) while the TLB is about
+* to be flushed are not considered to be flushed already.
+*/
+   atomic64_t flush_start_cnt;
+
+   /*
+* The flush_finish_cnt is incremented when an IOTLB flush is complete.
+* This value is always smaller than flush_start_cnt. The queue_add
+* function frees all IOVAs that have a counter value smaller than
+* flush_finish_cnt. This makes sure that we only free IOVAs that are
+* flushed out of the IOTLB of the domain.
+*/
+   atomic64_t flush_finish_cnt;
 };
 
 static struct iova_domain reserved_iova_ranges;
@@ -1749,6 +1771,9 @@ static int dma_ops_domain_alloc_flush_queue(struct 
dma_ops_domain *dom)
 {
int cpu;
 
+   atomic64_set(>flush_start_cnt,  0);
+   atomic64_set(>flush_finish_cnt, 0);
+
dom->flush_queue = alloc_percpu(struct flush_queue);
if (!dom->flush_queue)
return -ENOMEM;
@@ -1816,22 +1841,48 @@ static inline unsigned queue_ring_add(struct 
flush_queue *queue)
return idx;
 }
 
+static inline void queue_ring_remove_head(struct flush_queue *queue)
+{
+   assert_spin_locked(>lock);
+   queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE;
+}
+
 static void queue_add(struct dma_ops_domain *dom,
  unsigned long address, unsigned long pages)
 {
struct flush_queue *queue;
unsigned long flags;
+   u64 counter;
int idx;
 
pages = __roundup_pow_of_two(pages);
address >>= PAGE_SHIFT;
 
+   counter = atomic64_read(>flush_finish_cnt);
+
queue = get_cpu_ptr(dom->flush_queue);
spin_lock_irqsave(>lock, flags);
 
+   queue_ring_for_each(idx, queue) {
+   /*
+* This assumes that counter values in the ring-buffer are
+* monotonously rising.
+*/
+   if (queue->entries[idx].counter >= counter)
+   break;
+
+   free_iova_fast(>iovad,
+  queue->entries[idx].iova_pfn,
+  queue->entries[idx].pages);
+
+   queue_ring_remove_head(queue);
+   }
+
if (queue_ring_full(queue)) {
+   atomic64_inc(>flush_start_cnt);
domain_flush_tlb(>domain);
domain_flush_complete(>domain);
+   atomic64_inc(>flush_finish_cnt);
queue_release(dom, queue);
}
 
@@ -1839,6 +1890,7 @@ static void queue_add(struct dma_ops_domain *dom,
 
queue->entries[idx].iova_pfn = address;
queue->entries[idx].pages= pages;
+   queue->entries[idx].counter  = atomic64_read(>flush_start_cnt);
 
spin_unlock_irqrestore(>lock, flags);
put_cpu_ptr(dom->flush_queue);
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 6/7] iommu/amd: Add per-domain timer to flush per-cpu queues

2017-06-07 Thread Joerg Roedel
From: Joerg Roedel 

Add a timer to each dma_ops domain so that we flush unused
IOTLB entries regularily, even if the queues don't get full
all the time.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/amd_iommu.c | 84 +--
 1 file changed, 67 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 1ad2866..2bdfabf 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -180,6 +180,13 @@ struct dma_ops_domain {
 * flushed out of the IOTLB of the domain.
 */
atomic64_t flush_finish_cnt;
+
+   /*
+* Timer to make sure we don't keep IOVAs around unflushed
+* for too long
+*/
+   struct timer_list flush_timer;
+   atomic_t flush_timer_on;
 };
 
 static struct iova_domain reserved_iova_ranges;
@@ -1806,6 +1813,14 @@ static int dma_ops_domain_alloc_flush_queue(struct 
dma_ops_domain *dom)
return 0;
 }
 
+static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
+{
+   atomic64_inc(>flush_start_cnt);
+   domain_flush_tlb(>domain);
+   domain_flush_complete(>domain);
+   atomic64_inc(>flush_finish_cnt);
+}
+
 static inline bool queue_ring_full(struct flush_queue *queue)
 {
assert_spin_locked(>lock);
@@ -1847,22 +1862,12 @@ static inline void queue_ring_remove_head(struct 
flush_queue *queue)
queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE;
 }
 
-static void queue_add(struct dma_ops_domain *dom,
- unsigned long address, unsigned long pages)
+static void queue_ring_free_flushed(struct dma_ops_domain *dom,
+   struct flush_queue *queue)
 {
-   struct flush_queue *queue;
-   unsigned long flags;
-   u64 counter;
+   u64 counter = atomic64_read(>flush_finish_cnt);
int idx;
 
-   pages = __roundup_pow_of_two(pages);
-   address >>= PAGE_SHIFT;
-
-   counter = atomic64_read(>flush_finish_cnt);
-
-   queue = get_cpu_ptr(dom->flush_queue);
-   spin_lock_irqsave(>lock, flags);
-
queue_ring_for_each(idx, queue) {
/*
 * This assumes that counter values in the ring-buffer are
@@ -1877,12 +1882,25 @@ static void queue_add(struct dma_ops_domain *dom,
 
queue_ring_remove_head(queue);
}
+}
+
+static void queue_add(struct dma_ops_domain *dom,
+ unsigned long address, unsigned long pages)
+{
+   struct flush_queue *queue;
+   unsigned long flags;
+   int idx;
+
+   pages = __roundup_pow_of_two(pages);
+   address >>= PAGE_SHIFT;
+
+   queue = get_cpu_ptr(dom->flush_queue);
+   spin_lock_irqsave(>lock, flags);
+
+   queue_ring_free_flushed(dom, queue);
 
if (queue_ring_full(queue)) {
-   atomic64_inc(>flush_start_cnt);
-   domain_flush_tlb(>domain);
-   domain_flush_complete(>domain);
-   atomic64_inc(>flush_finish_cnt);
+   dma_ops_domain_flush_tlb(dom);
queue_release(dom, queue);
}
 
@@ -1893,9 +1911,33 @@ static void queue_add(struct dma_ops_domain *dom,
queue->entries[idx].counter  = atomic64_read(>flush_start_cnt);
 
spin_unlock_irqrestore(>lock, flags);
+
+   if (atomic_cmpxchg(>flush_timer_on, 0, 1) == 0)
+   mod_timer(>flush_timer, jiffies + msecs_to_jiffies(10));
+
put_cpu_ptr(dom->flush_queue);
 }
 
+static void queue_flush_timeout(unsigned long data)
+{
+   struct dma_ops_domain *dom = (struct dma_ops_domain *)data;
+   int cpu;
+
+   atomic_set(>flush_timer_on, 0);
+
+   dma_ops_domain_flush_tlb(dom);
+
+   for_each_possible_cpu(cpu) {
+   struct flush_queue *queue;
+   unsigned long flags;
+
+   queue = per_cpu_ptr(dom->flush_queue, cpu);
+   spin_lock_irqsave(>lock, flags);
+   queue_ring_free_flushed(dom, queue);
+   spin_unlock_irqrestore(>lock, flags);
+   }
+}
+
 /*
  * Free a domain, only used if something went wrong in the
  * allocation path and we need to free an already allocated page table
@@ -1907,6 +1949,9 @@ static void dma_ops_domain_free(struct dma_ops_domain 
*dom)
 
del_domain_from_list(>domain);
 
+   if (timer_pending(>flush_timer))
+   del_timer(>flush_timer);
+
dma_ops_domain_free_flush_queue(dom);
 
put_iova_domain(>iovad);
@@ -1950,6 +1995,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
if (dma_ops_domain_alloc_flush_queue(dma_dom))
goto free_dma_dom;
 
+   setup_timer(_dom->flush_timer, queue_flush_timeout,
+   (unsigned long)dma_dom);
+
+   atomic_set(_dom->flush_timer_on, 0);
+
add_domain_to_list(_dom->domain);
 
return dma_dom;
-- 
2.7.4

___

[PATCH 7/7] iommu/amd: Remove queue_release() function

2017-06-07 Thread Joerg Roedel
From: Joerg Roedel 

We can use queue_ring_free_flushed() instead, so remove this
redundancy.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/amd_iommu.c | 28 
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 2bdfabf..80efa72 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1831,21 +1831,6 @@ static inline bool queue_ring_full(struct flush_queue 
*queue)
 #define queue_ring_for_each(i, q) \
for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE)
 
-static void queue_release(struct dma_ops_domain *dom,
- struct flush_queue *queue)
-{
-   unsigned i;
-
-   assert_spin_locked(>lock);
-
-   queue_ring_for_each(i, queue)
-   free_iova_fast(>iovad,
-  queue->entries[i].iova_pfn,
-  queue->entries[i].pages);
-
-   queue->head = queue->tail = 0;
-}
-
 static inline unsigned queue_ring_add(struct flush_queue *queue)
 {
unsigned idx = queue->tail;
@@ -1897,12 +1882,15 @@ static void queue_add(struct dma_ops_domain *dom,
queue = get_cpu_ptr(dom->flush_queue);
spin_lock_irqsave(>lock, flags);
 
-   queue_ring_free_flushed(dom, queue);
-
-   if (queue_ring_full(queue)) {
+   /*
+* When ring-queue is full, flush the entries from the IOTLB so
+* that we can free all entries with queue_ring_free_flushed()
+* below.
+*/
+   if (queue_ring_full(queue))
dma_ops_domain_flush_tlb(dom);
-   queue_release(dom, queue);
-   }
+
+   queue_ring_free_flushed(dom, queue);
 
idx = queue_ring_add(queue);
 
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/7] iommu/amd: Add locking to per-domain flush-queue

2017-06-07 Thread Joerg Roedel
From: Joerg Roedel 

With locking we can safely access the flush-queues of other
cpus.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/amd_iommu.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 6a5c858..9aa2735 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -144,6 +144,7 @@ struct flush_queue_entry {
 struct flush_queue {
struct flush_queue_entry *entries;
unsigned head, tail;
+   spinlock_t lock;
 };
 
 /*
@@ -1773,6 +1774,8 @@ static int dma_ops_domain_alloc_flush_queue(struct 
dma_ops_domain *dom)
dma_ops_domain_free_flush_queue(dom);
return -ENOMEM;
}
+
+   spin_lock_init(>lock);
}
 
return 0;
@@ -1780,6 +1783,8 @@ static int dma_ops_domain_alloc_flush_queue(struct 
dma_ops_domain *dom)
 
 static inline bool queue_ring_full(struct flush_queue *queue)
 {
+   assert_spin_locked(>lock);
+
return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head);
 }
 
@@ -1791,6 +1796,8 @@ static void queue_release(struct dma_ops_domain *dom,
 {
unsigned i;
 
+   assert_spin_locked(>lock);
+
queue_ring_for_each(i, queue)
free_iova_fast(>iovad,
   queue->entries[i].iova_pfn,
@@ -1803,6 +1810,7 @@ static inline unsigned queue_ring_add(struct flush_queue 
*queue)
 {
unsigned idx = queue->tail;
 
+   assert_spin_locked(>lock);
queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE;
 
return idx;
@@ -1812,12 +1820,14 @@ static void queue_add(struct dma_ops_domain *dom,
  unsigned long address, unsigned long pages)
 {
struct flush_queue *queue;
+   unsigned long flags;
int idx;
 
pages = __roundup_pow_of_two(pages);
address >>= PAGE_SHIFT;
 
queue = get_cpu_ptr(dom->flush_queue);
+   spin_lock_irqsave(>lock, flags);
 
if (queue_ring_full(queue)) {
domain_flush_tlb(>domain);
@@ -1830,6 +1840,7 @@ static void queue_add(struct dma_ops_domain *dom,
queue->entries[idx].iova_pfn = address;
queue->entries[idx].pages= pages;
 
+   spin_unlock_irqrestore(>lock, flags);
put_cpu_ptr(dom->flush_queue);
 }
 
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/7] iommu/amd: Make use of the per-domain flush queue

2017-06-07 Thread Joerg Roedel
From: Joerg Roedel 

Fill the flush-queue on unmap and only flush the IOMMU and
device TLBs when a per-cpu queue gets full.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/amd_iommu.c | 60 +++
 1 file changed, 56 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 71c688a..6a5c858 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1778,6 +1778,61 @@ static int dma_ops_domain_alloc_flush_queue(struct 
dma_ops_domain *dom)
return 0;
 }
 
+static inline bool queue_ring_full(struct flush_queue *queue)
+{
+   return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head);
+}
+
+#define queue_ring_for_each(i, q) \
+   for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE)
+
+static void queue_release(struct dma_ops_domain *dom,
+ struct flush_queue *queue)
+{
+   unsigned i;
+
+   queue_ring_for_each(i, queue)
+   free_iova_fast(>iovad,
+  queue->entries[i].iova_pfn,
+  queue->entries[i].pages);
+
+   queue->head = queue->tail = 0;
+}
+
+static inline unsigned queue_ring_add(struct flush_queue *queue)
+{
+   unsigned idx = queue->tail;
+
+   queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE;
+
+   return idx;
+}
+
+static void queue_add(struct dma_ops_domain *dom,
+ unsigned long address, unsigned long pages)
+{
+   struct flush_queue *queue;
+   int idx;
+
+   pages = __roundup_pow_of_two(pages);
+   address >>= PAGE_SHIFT;
+
+   queue = get_cpu_ptr(dom->flush_queue);
+
+   if (queue_ring_full(queue)) {
+   domain_flush_tlb(>domain);
+   domain_flush_complete(>domain);
+   queue_release(dom, queue);
+   }
+
+   idx = queue_ring_add(queue);
+
+   queue->entries[idx].iova_pfn = address;
+   queue->entries[idx].pages= pages;
+
+   put_cpu_ptr(dom->flush_queue);
+}
+
 /*
  * Free a domain, only used if something went wrong in the
  * allocation path and we need to free an already allocated page table
@@ -2426,10 +2481,7 @@ static void __unmap_single(struct dma_ops_domain 
*dma_dom,
domain_flush_tlb(_dom->domain);
domain_flush_complete(_dom->domain);
} else {
-   /* Keep the if() around, we need it later again */
-   dma_ops_free_iova(dma_dom, dma_addr, pages);
-   domain_flush_tlb(_dom->domain);
-   domain_flush_complete(_dom->domain);
+   queue_add(dma_dom, dma_addr, pages);
}
 }
 
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/7] iommu/amd: Rip out old queue flushing code

2017-06-07 Thread Joerg Roedel
From: Joerg Roedel 

The queue flushing is pretty inefficient when it flushes the
queues for all cpus at once. Further it flushes all domains
from all IOMMUs for all CPUs, which is overkill as well.

Rip it out to make room for something more efficient.

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/amd_iommu.c | 143 ++
 1 file changed, 6 insertions(+), 137 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 63cacf5..6304a6e 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -89,25 +89,6 @@ LIST_HEAD(ioapic_map);
 LIST_HEAD(hpet_map);
 LIST_HEAD(acpihid_map);
 
-#define FLUSH_QUEUE_SIZE 256
-
-struct flush_queue_entry {
-   unsigned long iova_pfn;
-   unsigned long pages;
-   struct dma_ops_domain *dma_dom;
-};
-
-struct flush_queue {
-   spinlock_t lock;
-   unsigned next;
-   struct flush_queue_entry *entries;
-};
-
-static DEFINE_PER_CPU(struct flush_queue, flush_queue);
-
-static atomic_t queue_timer_on;
-static struct timer_list queue_timer;
-
 /*
  * Domain for untranslated devices - only allocated
  * if iommu=pt passed on kernel cmd line.
@@ -2225,92 +2206,6 @@ static struct iommu_group *amd_iommu_device_group(struct 
device *dev)
  *
  */
 
-static void __queue_flush(struct flush_queue *queue)
-{
-   struct protection_domain *domain;
-   unsigned long flags;
-   int idx;
-
-   /* First flush TLB of all known domains */
-   spin_lock_irqsave(_iommu_pd_lock, flags);
-   list_for_each_entry(domain, _iommu_pd_list, list)
-   domain_flush_tlb(domain);
-   spin_unlock_irqrestore(_iommu_pd_lock, flags);
-
-   /* Wait until flushes have completed */
-   domain_flush_complete(NULL);
-
-   for (idx = 0; idx < queue->next; ++idx) {
-   struct flush_queue_entry *entry;
-
-   entry = queue->entries + idx;
-
-   free_iova_fast(>dma_dom->iovad,
-   entry->iova_pfn,
-   entry->pages);
-
-   /* Not really necessary, just to make sure we catch any bugs */
-   entry->dma_dom = NULL;
-   }
-
-   queue->next = 0;
-}
-
-static void queue_flush_all(void)
-{
-   int cpu;
-
-   for_each_possible_cpu(cpu) {
-   struct flush_queue *queue;
-   unsigned long flags;
-
-   queue = per_cpu_ptr(_queue, cpu);
-   spin_lock_irqsave(>lock, flags);
-   if (queue->next > 0)
-   __queue_flush(queue);
-   spin_unlock_irqrestore(>lock, flags);
-   }
-}
-
-static void queue_flush_timeout(unsigned long unsused)
-{
-   atomic_set(_timer_on, 0);
-   queue_flush_all();
-}
-
-static void queue_add(struct dma_ops_domain *dma_dom,
- unsigned long address, unsigned long pages)
-{
-   struct flush_queue_entry *entry;
-   struct flush_queue *queue;
-   unsigned long flags;
-   int idx;
-
-   pages = __roundup_pow_of_two(pages);
-   address >>= PAGE_SHIFT;
-
-   queue = get_cpu_ptr(_queue);
-   spin_lock_irqsave(>lock, flags);
-
-   if (queue->next == FLUSH_QUEUE_SIZE)
-   __queue_flush(queue);
-
-   idx   = queue->next++;
-   entry = queue->entries + idx;
-
-   entry->iova_pfn = address;
-   entry->pages= pages;
-   entry->dma_dom  = dma_dom;
-
-   spin_unlock_irqrestore(>lock, flags);
-
-   if (atomic_cmpxchg(_timer_on, 0, 1) == 0)
-   mod_timer(_timer, jiffies + msecs_to_jiffies(10));
-
-   put_cpu_ptr(_queue);
-}
-
-
 /*
  * In the dma_ops path we only have the struct device. This function
  * finds the corresponding IOMMU, the protection domain and the
@@ -2462,7 +2357,10 @@ static void __unmap_single(struct dma_ops_domain 
*dma_dom,
domain_flush_tlb(_dom->domain);
domain_flush_complete(_dom->domain);
} else {
-   queue_add(dma_dom, dma_addr, pages);
+   /* Keep the if() around, we need it later again */
+   dma_ops_free_iova(dma_dom, dma_addr, pages);
+   domain_flush_tlb(_dom->domain);
+   domain_flush_complete(_dom->domain);
}
 }
 
@@ -2797,7 +2695,7 @@ static int init_reserved_iova_ranges(void)
 
 int __init amd_iommu_init_api(void)
 {
-   int ret, cpu, err = 0;
+   int ret, err = 0;
 
ret = iova_cache_get();
if (ret)
@@ -2807,18 +2705,6 @@ int __init amd_iommu_init_api(void)
if (ret)
return ret;
 
-   for_each_possible_cpu(cpu) {
-   struct flush_queue *queue = per_cpu_ptr(_queue, cpu);
-
-   queue->entries = kzalloc(FLUSH_QUEUE_SIZE *
-sizeof(*queue->entries),
-

[PATCH 2/7] iommu/amd: Add per-domain flush-queue data structures

2017-06-07 Thread Joerg Roedel
From: Joerg Roedel 

Make the flush-queue per dma-ops domain and add code
allocate and free the flush-queues;

Signed-off-by: Joerg Roedel 
---
 drivers/iommu/amd_iommu.c | 69 +++
 1 file changed, 69 insertions(+)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 6304a6e..71c688a 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -134,6 +134,18 @@ static void update_domain(struct protection_domain 
*domain);
 static int protection_domain_init(struct protection_domain *domain);
 static void detach_device(struct device *dev);
 
+#define FLUSH_QUEUE_SIZE 256
+
+struct flush_queue_entry {
+   unsigned long iova_pfn;
+   unsigned long pages;
+};
+
+struct flush_queue {
+   struct flush_queue_entry *entries;
+   unsigned head, tail;
+};
+
 /*
  * Data container for a dma_ops specific protection domain
  */
@@ -143,6 +155,8 @@ struct dma_ops_domain {
 
/* IOVA RB-Tree */
struct iova_domain iovad;
+
+   struct flush_queue __percpu *flush_queue;
 };
 
 static struct iova_domain reserved_iova_ranges;
@@ -1714,6 +1728,56 @@ static void free_gcr3_table(struct protection_domain 
*domain)
free_page((unsigned long)domain->gcr3_tbl);
 }
 
+static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom)
+{
+   int cpu;
+
+   for_each_possible_cpu(cpu) {
+   struct flush_queue *queue;
+
+   queue = per_cpu_ptr(dom->flush_queue, cpu);
+   kfree(queue->entries);
+   }
+
+   free_percpu(dom->flush_queue);
+
+   dom->flush_queue = NULL;
+}
+
+static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
+{
+   int cpu;
+
+   dom->flush_queue = alloc_percpu(struct flush_queue);
+   if (!dom->flush_queue)
+   return -ENOMEM;
+
+   /* First make sure everything is cleared */
+   for_each_possible_cpu(cpu) {
+   struct flush_queue *queue;
+
+   queue = per_cpu_ptr(dom->flush_queue, cpu);
+   queue->head= 0;
+   queue->tail= 0;
+   queue->entries = NULL;
+   }
+
+   /* Now start doing the allocation */
+   for_each_possible_cpu(cpu) {
+   struct flush_queue *queue;
+
+   queue = per_cpu_ptr(dom->flush_queue, cpu);
+   queue->entries = kzalloc(FLUSH_QUEUE_SIZE * 
sizeof(*queue->entries),
+GFP_KERNEL);
+   if (!queue->entries) {
+   dma_ops_domain_free_flush_queue(dom);
+   return -ENOMEM;
+   }
+   }
+
+   return 0;
+}
+
 /*
  * Free a domain, only used if something went wrong in the
  * allocation path and we need to free an already allocated page table
@@ -1725,6 +1789,8 @@ static void dma_ops_domain_free(struct dma_ops_domain 
*dom)
 
del_domain_from_list(>domain);
 
+   dma_ops_domain_free_flush_queue(dom);
+
put_iova_domain(>iovad);
 
free_pagetable(>domain);
@@ -1763,6 +1829,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
/* Initialize reserved ranges */
copy_reserved_iova(_iova_ranges, _dom->iovad);
 
+   if (dma_ops_domain_alloc_flush_queue(dma_dom))
+   goto free_dma_dom;
+
add_domain_to_list(_dom->domain);
 
return dma_dom;
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v1 3/3] iommu/amd: Optimize the IOMMU queue flush

2017-06-07 Thread Joerg Roedel
Hey Tom,

On Wed, Jun 07, 2017 at 09:03:15AM -0500, Tom Lendacky wrote:
> I was able to run your patches in combination with the first two patches
> that I submitted and the results look good.  Let me know if you'd like
> me to resubmit the series minus the third patch.

Thanks a lot for testing the patches! You don't need to resubmit them, I
take the first two of this set and apply them along with my patches to
the iommu tree.


Thanks,

Joerg

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v1 3/3] iommu/amd: Optimize the IOMMU queue flush

2017-06-07 Thread Tom Lendacky

On 6/6/2017 8:36 AM, Tom Lendacky wrote:

On 6/6/2017 7:05 AM, Joerg Roedel wrote:

Hey Tom,


Hi Joerg,



On Mon, Jun 05, 2017 at 02:52:35PM -0500, Tom Lendacky wrote:
After reducing the amount of MMIO performed by the IOMMU during 
operation,

perf data shows that flushing the TLB for all protection domains during
DMA unmapping is a performance issue. It is not necessary to flush the
TLBs for all protection domains, only the protection domains associated
with iova's on the flush queue.

Create a separate queue that tracks the protection domains associated 
with

the iova's on the flush queue. This new queue optimizes the flushing of
TLBs to the required protection domains.

Reviewed-by: Arindam Nath 
Signed-off-by: Tom Lendacky 
---
  drivers/iommu/amd_iommu.c |   56 
-

  1 file changed, 50 insertions(+), 6 deletions(-)


I also did a major rewrite of the AMD IOMMU queue handling and flushing
code last week. It is functionally complete and I am currently testing,
documenting it, and cleaning it up. I pushed the current state of it to

git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux.git 
amd-iommu


Its quite intrusive as it implements a per-domain flush-queue, and uses
a ring-buffer instead of a real queue. But you see the details in the
code.

Can you please have a look and give it a test in your setup?


I'll try and look at this as soon as I can... I'm sharing the test
setup and I might not be able to get access again for a day or two.



I was able to run your patches in combination with the first two patches
that I submitted and the results look good.  Let me know if you'd like
me to resubmit the series minus the third patch.

Thanks,
Tom


Thanks,
Tom




Thanks,

Joerg


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 03/12] intel-ipu3: Add DMA API implementation

2017-06-07 Thread Tomasz Figa
Hi Yong,

+Robin, Joerg, IOMMU ML

Please see my comments inline.

On Tue, Jun 6, 2017 at 5:39 AM, Yong Zhi  wrote:
> IPU3 mmu based DMA mapping driver
>
> Signed-off-by: Yong Zhi 
> ---
>  drivers/media/pci/intel/ipu3/Kconfig   |   6 +
>  drivers/media/pci/intel/ipu3/Makefile  |   1 +
>  drivers/media/pci/intel/ipu3/ipu3-dmamap.c | 408 
> +
>  drivers/media/pci/intel/ipu3/ipu3-dmamap.h |  20 ++
>  4 files changed, 435 insertions(+)
>  create mode 100644 drivers/media/pci/intel/ipu3/ipu3-dmamap.c
>  create mode 100644 drivers/media/pci/intel/ipu3/ipu3-dmamap.h
>
> diff --git a/drivers/media/pci/intel/ipu3/Kconfig 
> b/drivers/media/pci/intel/ipu3/Kconfig
> index ab2edcb..2030be7 100644
> --- a/drivers/media/pci/intel/ipu3/Kconfig
> +++ b/drivers/media/pci/intel/ipu3/Kconfig
> @@ -26,3 +26,9 @@ config INTEL_IPU3_MMU
>
>   Say Y here if you have Skylake/Kaby Lake SoC with IPU3.
>   Say N if un-sure.
> +
> +config INTEL_IPU3_DMAMAP
> +   bool "Intel ipu3 DMA mapping driver"
> +   select IOMMU_IOVA
> +   ---help---
> + This is IPU3 IOMMU domain specific DMA driver.
> diff --git a/drivers/media/pci/intel/ipu3/Makefile 
> b/drivers/media/pci/intel/ipu3/Makefile
> index 2b669df..2c2a035 100644
> --- a/drivers/media/pci/intel/ipu3/Makefile
> +++ b/drivers/media/pci/intel/ipu3/Makefile
> @@ -1,2 +1,3 @@
>  obj-$(CONFIG_VIDEO_IPU3_CIO2) += ipu3-cio2.o
>  obj-$(CONFIG_INTEL_IPU3_MMU) += ipu3-mmu.o
> +obj-$(CONFIG_INTEL_IPU3_DMAMAP) += ipu3-dmamap.o
> diff --git a/drivers/media/pci/intel/ipu3/ipu3-dmamap.c 
> b/drivers/media/pci/intel/ipu3/ipu3-dmamap.c
> new file mode 100644
> index 000..74704d9
> --- /dev/null
> +++ b/drivers/media/pci/intel/ipu3/ipu3-dmamap.c
> @@ -0,0 +1,408 @@
> +/*
> + * Copyright (c) 2017 Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include "ipu3-mmu.h"
> +
> +/* Begin of things adapted from arch/arm/mm/dma-mapping.c */

ARM's DMA ops are not a good example of today's coding standards.
There are already generic DMA mapping helpers available in
drivers/iommu/dma-iommu.c and drivers/base/dma-*. (Hmm, I remember
writing this already, déjà vu maybe...)

> +static void ipu3_dmamap_clear_buffer(struct page *page, size_t size,
> +unsigned long attrs)
> +{
> +   /*
> +* Ensure that the allocated pages are zeroed, and that any data
> +* lurking in the kernel direct-mapped region is invalidated.
> +*/
> +   if (PageHighMem(page)) {
> +   while (size > 0) {
> +   void *ptr = kmap_atomic(page);
> +
> +   memset(ptr, 0, PAGE_SIZE);
> +   if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
> +   clflush_cache_range(ptr, PAGE_SIZE);
> +   kunmap_atomic(ptr);
> +   page++;
> +   size -= PAGE_SIZE;
> +   }
> +   } else {
> +   void *ptr = page_address(page);
> +
> +   memset(ptr, 0, size);
> +   if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
> +   clflush_cache_range(ptr, size);
> +   }
> +}
> +
> +/**
> + * ipu3_dmamap_alloc_buffer - allocate buffer based on attributes
> + * @dev: struct device pointer
> + * @size: size of buffer in bytes
> + * @gfp: specify the free page type
> + * @attrs: defined in linux/dma-attrs.h
> + *
> + * This is a helper function for physical page allocation
> + *
> + * Return array representing buffer from alloc_pages() on success
> + * or NULL on failure
> + *
> + * Must be freed with ipu3_dmamap_free_buffer.
> + */
> +static struct page **ipu3_dmamap_alloc_buffer(struct device *dev, size_t 
> size,
> + gfp_t gfp, unsigned long attrs)
> +{
> +   struct page **pages;
> +   int count = size >> PAGE_SHIFT;
> +   int array_size = count * sizeof(struct page *);
> +   int i = 0;
> +
> +   /* Allocate mem for array of page ptrs */
> +   if (array_size <= PAGE_SIZE)
> +   pages = kzalloc(array_size, GFP_KERNEL);
> +   else
> +   pages = vzalloc(array_size);
> +   if (!pages)
> +   return NULL;
> +
> +   gfp |= __GFP_NOWARN;
> +
> +   while (count) {
> +   int j, order = __fls(count);
> +
> +   pages[i] = alloc_pages(gfp, order);
> +   

Re: [PATCH 02/12] intel-ipu3: mmu: implement driver

2017-06-07 Thread Tomasz Figa
Hi Yong, Tuukka,

Continuing from yesterday. Please see comments inline.

> On Tue, Jun 6, 2017 at 5:39 AM, Yong Zhi  wrote:
[snip]
>> +   ptr = ipu3_mmu_alloc_page_table(mmu_dom, false);
>> +   if (!ptr)
>> +   goto fail_page_table;
>> +
>> +   /*
>> +* We always map the L1 page table (a single page as well as
>> +* the L2 page tables).
>> +*/
>> +   mmu_dom->dummy_l2_tbl = virt_to_phys(ptr) >> IPU3_MMU_PAGE_SHIFT;
>> +   mmu_dom->pgtbl = ipu3_mmu_alloc_page_table(mmu_dom, true);
>> +   if (!mmu_dom->pgtbl)
>> +   goto fail_page_table;
>> +
>> +   spin_lock_init(_dom->lock);
>> +   return _dom->domain;
>> +
>> +fail_page_table:
>> +   free_page((unsigned long)TBL_VIRT_ADDR(mmu_dom->dummy_page));
>> +   free_page((unsigned long)TBL_VIRT_ADDR(mmu_dom->dummy_l2_tbl));
>> +fail_get_page:
>> +   kfree(mmu_dom);
>> +   return NULL;
>> +}
>> +
>> +static void ipu3_mmu_domain_free(struct iommu_domain *dom)
>> +{
>> +   struct ipu3_mmu_domain *mmu_dom =
>> +   container_of(dom, struct ipu3_mmu_domain, domain);
>> +   uint32_t l1_idx;
>> +
>> +   for (l1_idx = 0; l1_idx < IPU3_MMU_L1PT_PTES; l1_idx++)
>> +   if (mmu_dom->pgtbl[l1_idx] != mmu_dom->dummy_l2_tbl)
>> +   free_page((unsigned long)
>> + TBL_VIRT_ADDR(mmu_dom->pgtbl[l1_idx]));
>> +
>> +   free_page((unsigned long)TBL_VIRT_ADDR(mmu_dom->dummy_page));
>> +   free_page((unsigned long)TBL_VIRT_ADDR(mmu_dom->dummy_l2_tbl));

I might be overly paranoid, but reading back kernel virtual pointers
from device accessible memory doesn't seem safe to me. Other drivers
keep kernel pointers of page tables in a dedicated array (it's only 8K
of memory, but much better safety).

>> +   free_page((unsigned long)mmu_dom->pgtbl);
>> +   kfree(mmu_dom);
>> +}
>> +
>> +/**
>> + * ipu3_mmu_map - mapping iova allocated cache to phy addr
>> + * @domain: iommu domain
>> + * @iova: virtual address
>> + * @paddr: physical address
>> + * @size: size to be mapped
>> + * Allocate L2 pgt if needed and establish the mapping between
>> + * iova address space and pfn
>> + *
>> + * Return: 0 for success
>> + * or negative on failure.
>> + */
>> +static int ipu3_mmu_map(struct iommu_domain *domain, unsigned long iova,
>> +   phys_addr_t paddr, size_t size, int prot)
>> +{
>> +   struct ipu3_mmu_domain *mmu_dom =
>> +   container_of(domain, struct ipu3_mmu_domain, domain);

Please add a static inline function for this conversion.

>> +   uint32_t iova_start = round_down(iova, IPU3_MMU_PAGE_SIZE);
>> +   uint32_t iova_end = ALIGN(iova + size, IPU3_MMU_PAGE_SIZE);
>> +   uint32_t l1_idx = iova >> IPU3_MMU_L1PT_SHIFT;
>> +   uint32_t l1_entry = mmu_dom->pgtbl[l1_idx];
>> +   uint32_t *l2_pt;
>> +   uint32_t l2_idx;
>> +   unsigned long flags;
>> +
>> +   /* map to single PAGE */
>> +   WARN_ON(size != IPU3_MMU_PAGE_SIZE);

If we already check this, we could fail as well, i.e.

if (WARN_ON(size != IPU3_MMU_PAGE_SIZE))
return -EINVAL;

>> +
>> +   dev_dbg(mmu_dom->mmu->dev,
>> +   "mapping iova 0x%8.8x--0x%8.8x, size %zu at paddr 0x%pa\n",
>> +   iova_start, iova_end, size, );
>> +   dev_dbg(mmu_dom->mmu->dev,
>> +   "mapping l2 page table for l1 index %u (iova 0x%8.8lx)\n",
>> +   l1_idx, iova);
>> +
>> +   if (l1_entry == mmu_dom->dummy_l2_tbl) {
>> +   uint32_t *l2_virt = ipu3_mmu_alloc_page_table(mmu_dom, 
>> false);
>> +
>> +   if (!l2_virt)
>> +   return -ENOMEM;
>> +
>> +   l1_entry = virt_to_phys(l2_virt) >> IPU3_MMU_PAGE_SHIFT;
>> +   dev_dbg(mmu_dom->mmu->dev,
>> +   "allocated page for l1_idx %u\n", l1_idx);
>> +
>> +   spin_lock_irqsave(_dom->lock, flags);
>> +   if (mmu_dom->pgtbl[l1_idx] == mmu_dom->dummy_l2_tbl) {
>> +   mmu_dom->pgtbl[l1_idx] = l1_entry;
>> +   clflush_cache_range(_dom->pgtbl[l1_idx],
>> +   sizeof(mmu_dom->pgtbl[l1_idx]));
>> +   } else {
>> +   spin_unlock_irqrestore(_dom->lock, flags);
>> +   free_page((unsigned long)TBL_VIRT_ADDR(l1_entry));
>> +   spin_lock_irqsave(_dom->lock, flags);
>> +   }
>> +   } else {
>> +   spin_lock_irqsave(_dom->lock, flags);
>> +   }
>> +
>> +   l2_pt = TBL_VIRT_ADDR(mmu_dom->pgtbl[l1_idx]);
>> +
>> +   dev_dbg(mmu_dom->mmu->dev, "l2_pt at %p\n", l2_pt);
>> +
>> +   paddr = ALIGN(paddr, IPU3_MMU_PAGE_SIZE);
>> +
>> +   l2_idx = (iova_start & IPU3_MMU_L2PT_MASK) >> IPU3_MMU_L2PT_SHIFT;
>> +
>> +   dev_dbg(mmu_dom->mmu->dev,
>> +   "l2_idx %u, 

Re: Device address specific mapping of arm,mmu-500

2017-06-07 Thread Ray Jui via iommu

Hi Robin,


On 6/6/2017 3:02 AM, Robin Murphy wrote:

I've currently got some experimental patches pushed out here:

 git://linux-arm.org/linux-rm  iommu/pgtable

So far, there's still one silly bug (which doesn't affect DMA ops usage)
and an awkward race for non-coherent table walks which will need
resolving before I have anything to post properly, which I hope will be
within the next couple of weeks. In the meantime, though, it already
seems to work well enough in practice, so any feedback is welcome!

Robin.
Excellent! I'm going to find time to test it out (likely next week). 
I'll report back the test result after that.


Thanks,

Ray
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu