[PATCH] iommu/of: Handle PCI aliases properly

2017-05-31 Thread Robin Murphy
When a PCI device has DMA quirks, we need to ensure that an upstream
IOMMU knows about all possible aliases, since the presence of a DMA
quirk does not preclude the device still also emitting transactions
(e.g. MSIs) on its 'real' RID. Similarly, the rules for bridge aliasing
are relatively complex, and some bridges may only take ownership of
transactions under particular transient circumstances, leading again to
multiple RIDs potentially being seen at the IOMMU for the given device.

Take all this into account in the OF code by translating every RID
produced by the alias walk, not just whichever one comes out last.
Happily, this also makes things tidy enough that we can reduce the
number of both total lines of code, and confusing levels of indirection,
by pulling the "iommus"/"iommu-map" parsing helpers back in-line again.

Signed-off-by: Robin Murphy 
---

This applies on top of the fix currently queued in the IOMMU tree:
"iommu/of: Ignore all errors except EPROBE_DEFER"

 drivers/iommu/of_iommu.c | 104 +--
 1 file changed, 47 insertions(+), 57 deletions(-)

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 19779b88a479..1fbd7a142b30 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -140,75 +140,39 @@ static const struct iommu_ops
return ops;
 }
 
-static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
-{
-   struct of_phandle_args *iommu_spec = data;
+struct of_pci_iommu_alias_info {
+   struct device *dev;
+   struct device_node *np;
+};
 
-   iommu_spec->args[0] = alias;
-   return iommu_spec->np == pdev->bus->dev.of_node;
-}
-
-static const struct iommu_ops
-*of_pci_iommu_init(struct pci_dev *pdev, struct device_node *bridge_np)
+static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
 {
+   struct of_pci_iommu_alias_info *info = data;
const struct iommu_ops *ops;
-   struct of_phandle_args iommu_spec;
+   struct of_phandle_args iommu_spec = { .args_count = 1 };
int err;
 
-   /*
-* Start by tracing the RID alias down the PCI topology as
-* far as the host bridge whose OF node we have...
-* (we're not even attempting to handle multi-alias devices yet)
-*/
-   iommu_spec.args_count = 1;
-   iommu_spec.np = bridge_np;
-   pci_for_each_dma_alias(pdev, __get_pci_rid, _spec);
-   /*
-* ...then find out what that becomes once it escapes the PCI
-* bus into the system beyond, and which IOMMU it ends up at.
-*/
-   iommu_spec.np = NULL;
-   err = of_pci_map_rid(bridge_np, iommu_spec.args[0], "iommu-map",
+   err = of_pci_map_rid(info->np, alias, "iommu-map",
 "iommu-map-mask", _spec.np,
 iommu_spec.args);
if (err)
-   return err == -ENODEV ? NULL : ERR_PTR(err);
-
-   ops = of_iommu_xlate(>dev, _spec);
+   return err == -ENODEV ? 1 : err;
 
+   ops = of_iommu_xlate(info->dev, _spec);
of_node_put(iommu_spec.np);
-   return ops;
-}
 
-static const struct iommu_ops
-*of_platform_iommu_init(struct device *dev, struct device_node *np)
-{
-   struct of_phandle_args iommu_spec;
-   const struct iommu_ops *ops = NULL;
-   int idx = 0;
+   if (IS_ERR(ops))
+   return PTR_ERR(ops);
 
-   /*
-* We don't currently walk up the tree looking for a parent IOMMU.
-* See the `Notes:' section of
-* Documentation/devicetree/bindings/iommu/iommu.txt
-*/
-   while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells",
-  idx, _spec)) {
-   ops = of_iommu_xlate(dev, _spec);
-   of_node_put(iommu_spec.np);
-   idx++;
-   if (IS_ERR_OR_NULL(ops))
-   break;
-   }
-
-   return ops;
+   return info->np == pdev->bus->dev.of_node;
 }
 
 const struct iommu_ops *of_iommu_configure(struct device *dev,
   struct device_node *master_np)
 {
-   const struct iommu_ops *ops;
+   const struct iommu_ops *ops = NULL;
struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+   int err;
 
if (!master_np)
return NULL;
@@ -221,18 +185,44 @@ const struct iommu_ops *of_iommu_configure(struct device 
*dev,
iommu_fwspec_free(dev);
}
 
-   if (dev_is_pci(dev))
-   ops = of_pci_iommu_init(to_pci_dev(dev), master_np);
-   else
-   ops = of_platform_iommu_init(dev, master_np);
+   /*
+* We don't currently walk up the tree looking for a parent IOMMU.
+* See the `Notes:' section of
+* Documentation/devicetree/bindings/iommu/iommu.txt
+*/
+   if (dev_is_pci(dev)) {
+   struct of_pci_iommu_alias_info info 

Re: Device address specific mapping of arm,mmu-500

2017-05-31 Thread Ray Jui via iommu
Hi Will,

On 5/31/17 5:44 AM, Will Deacon wrote:
> On Tue, May 30, 2017 at 11:13:36PM -0700, Ray Jui wrote:
>> I did a little more digging myself and I think I now understand what you
>> meant by identity mapping, i.e., configuring the MMU-500 with 1:1 mapping
>> between the DMA address and the IOVA address.
>>
>> I think that should work. In the end, due to this MSI write parsing issue in
>> our PCIe controller, the reason to use IOMMU is to allow the cache
>> attributes (AxCACHE) of the MSI writes towards GICv3 ITS to be modified by
>> the IOMMU to be device type, while leaving the rest of inbound reads/writes
>> from/to DDR with more optimized cache attributes setting, to allow I/O
>> coherency to be still enabled for the PCIe controller. In fact, the PCIe
>> controller itself is fully capable of DMA to/from the full address space of
>> our SoC including both DDR and any device memory.
>>
>> The 1:1 mapping will still pose some translation overhead like you
>> suggested; however, the overhead of allocating page tables and locking will
>> be gone. This sounds like the best possible option I have currently.
> 
> It might end up being pretty invasive to work around a hardware bug, so
> we'll have to see what it looks like. Ideally, we could just use the SMMU
> for everything as-is and work on clawing back the lost performance (it
> should be possible to get ~95% of the perf if we sort out the locking, which
> we *are* working on).
> 

If 95% of performance can be achieved by fixing the locking in the
driver, then that's great news.

If you have anything that you want me to help test, feel free to send it
out. I will be more than happy to help testing it and let you know about
the performance numbers, :)

>> May I ask, how do I start to try to get this identity mapping to work as an
>> experiment and proof of concept? Any pointer or advise is highly appreciated
>> as you can see I'm not very experienced with this. I found Will recently
>> added the IOMMU_DOMAIN_IDENTITY support to the arm-smmu driver. But I
>> suppose that is to bypass the SMMU completely, instead of still going
>> through the MMU with 1:1 translation. Is my understanding correct?
> 
> Yes, I don't think IOMMU_DOMAIN_IDENTITY is what you need because you
> actally need per-page control of memory attributes.
> 
> Robin might have a better idea, but I think you'll have to hack dma-iommu.c
> so that you can have a version of the DMA ops that:
> 
>   * Initialises the identity map (I guess as normal WB cacheable?)
>   * Reserves and maps the MSI region appropriately
>   * Just returns the physical address for the dma address for map requests
> (return error for the MSI region)
>   * Does nothing for unmap requests
> 
> But my strong preference would be to fix the locking overhead from the
> SMMU so that the perf hit is acceptable.

Yes, I agree, we want to be able to use the SMMU the intended way. Do
you have a timeline on when the locking issue may be fixed (or
improved)? Depending on the timeline, on our side, we may still need to
go for identity mapping as a temporary solution until the fix.

> 
> Will
> 

Thanks,

Ray
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/7] dt-bindings: PCI: Describe ATS property for root complex nodes

2017-05-31 Thread Rob Herring
On Wed, May 24, 2017 at 07:01:38PM +0100, Jean-Philippe Brucker wrote:
> Address Translation Service (ATS) is an extension to PCIe allowing
> endpoints to manage their own IOTLB, called Address Translation Cache
> (ATC). Instead of having every memory transaction processed by the IOMMU,
> the endpoint can first send an Address Translation Requests for an IOVA,
> obtain the corresponding Physical Address from the IOMMU and store it in
> its ATC. Subsequent transactions to this memory region can be performed on
> the PA, in which case they are marked 'translated' and (partially) bypass
> the IOMMU.
> 
> Since the extension uses fields that were previously reserved in the
> PCIe Translation Layer Packet, it seems ill-advised to enabled it on a
> system that doesn't fully support ATS.
> 
> To "old" root complexes that simply ignored the new AT field, an Address
> Translation Request will look exactly like a Memory Read Request, so the
> root bridge will forward a memory read to the IOMMU instead of a
> translation request. If the access succeeds, the RC will send a Read
> Completion, which looks like a Translation Completion, back to the
> endpoint. As a result the endpoint might end up storing the content of
> memory instead of a physical address in its ATC. In reality, it's more
> likely that the size fields will be invalid and either end will detect the
> error, but in any case, it is undesirable.
> 
> Add a way for firmware to tell the OS that ATS is supported by the PCI
> root complex.

Can't firmware have already enabled ATS? Often for things like this, not 
present means "use firmware setting".

> 
> Signed-off-by: Jean-Philippe Brucker 
> ---
>  Documentation/devicetree/bindings/pci/pci-iommu.txt | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/pci/pci-iommu.txt 
> b/Documentation/devicetree/bindings/pci/pci-iommu.txt
> index 0def586fdcdf..f21a68ec471a 100644
> --- a/Documentation/devicetree/bindings/pci/pci-iommu.txt
> +++ b/Documentation/devicetree/bindings/pci/pci-iommu.txt
> @@ -44,6 +44,14 @@ Optional properties
>  - iommu-map-mask: A mask to be applied to each Requester ID prior to being
>mapped to an IOMMU specifier per the iommu-map property.
>  
> +- ats-supported: if present, the root complex supports the Address
> +  Translation Service (ATS). It is able to interpret the AT field in PCIe
> +  Transaction Layer Packets, and forward Translation Completions or
> +  Invalidation Requests to endpoints.

Why can't this be based on the compatible strings?

Rob
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/7] dt-bindings: PCI: Describe ATS property for root complex nodes

2017-05-31 Thread Rob Herring
On Tue, May 30, 2017 at 11:58:50AM +0100, Jean-Philippe Brucker wrote:
> On 30/05/17 11:01, Joerg Roedel wrote:
> > On Wed, May 24, 2017 at 07:01:38PM +0100, Jean-Philippe Brucker wrote:
> >> +- ats-supported: if present, the root complex supports the Address
> >> +  Translation Service (ATS). It is able to interpret the AT field in PCIe
> >> +  Transaction Layer Packets, and forward Translation Completions or
> >> +  Invalidation Requests to endpoints.
> >> +
> >> +  Device drivers should not enable ATS in endpoints unless this property
> >> +  is present.
> > 
> > Device drivers should never enable ATS, this is the job of the IOMMU
> > driver. This should be clarified in the text.
> 
> Right, I will change this.

Driver details should not be in DT to begin with.

Rob
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-31 Thread Borislav Petkov
On Wed, May 31, 2017 at 11:03:52PM +0800, Xunlei Pang wrote:
> For kdump case, it will be put in some reserved crash memory allocated
> by kexec-tools, and passed the corresponding start address of the
> allocated reserved crash memory to kdump kernel via "elfcorehdr=",
> please see kernel functions setup_elfcorehdr() and vmcore_init() for
> how it is parsed by kdump kernel.

... which could be a great way to pass the SME status to the second
kernel without any funky sysfs games.

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/7] Add PCI ATS support to SMMUv3

2017-05-31 Thread Nate Watterson

Hi Jean-Philippe,

On 5/24/2017 2:01 PM, Jean-Philippe Brucker wrote:

PCIe devices can implement their own TLB, named Address Translation Cache
(ATC). In order to support Address Translation Service (ATS), the
following changes are needed in software:

* Enable ATS on endpoints when the system supports it. Both PCI root
   complex and associated SMMU must implement the ATS protocol.

* When unmapping an IOVA, send an ATC invalidate request to the endpoint
   in addition to the usual SMMU IOTLB invalidations.

I previously sent this as part of a lengthy RFC [1] adding SVM (ATS +
PASID + PRI) support to SMMUv3. The next PASID/PRI version is almost
ready, but isn't likely to get merged because it needs hardware testing,
so I will send it later. PRI depends on ATS, but ATS should be useful on
its own.

Without PASID and PRI, ATS is used for accelerating transactions. Instead
of having all memory accesses go through SMMU translation, the endpoint
can translate IOVA->PA once, store the result in its ATC, then issue
subsequent transactions using the PA, partially bypassing the SMMU. So in
theory it should be faster while keeping the advantages of an IOMMU,
namely scatter-gather and access control.

The ATS patches can now be tested on some hardware, even though the lack
of compatible PCI endpoints makes it difficult to assess what performance
optimizations we need. That's why the ATS implementation is a bit rough at
the moment, and we will work on optimizing things like invalidation ranges
later.


Sinan and I have tested this series on a QDF2400 development platform
using a PCIe exerciser card as the ATS capable endpoint. We were able
to verify that ATS requests complete with a valid translated address
and that DMA transactions using the pre-translated address "bypass"
the SMMU. Testing ATC invalidations was a bit more difficult as we
could not figure out how to get the exerciser card to automatically
send the completion message. We ended up having to write a debugger
script that would monitor the CMDQ and tell the exerciser to send
the completion when a hanging CMD_SYNC following a CMD_ATC_INV was
detected. Hopefully we'll get some real ATS capable endpoints to
test with soon.



Since the RFC [1]:
* added DT and ACPI patches,
* added invalidate-all on domain detach,
* removed smmu_group again,
* removed invalidation print from the fast path,
* disabled tagged pointers for good,
* some style changes.

These patches are based on Linux v4.12-rc2

[1] https://www.spinics.net/lists/linux-pci/msg58650.html

Jean-Philippe Brucker (7):
   PCI: Move ATS declarations outside of CONFIG_PCI
   dt-bindings: PCI: Describe ATS property for root complex nodes
   iommu/of: Check ATS capability in root complex nodes
   ACPI/IORT: Check ATS capability in root complex nodes
   iommu/arm-smmu-v3: Link domains and devices
   iommu/arm-smmu-v3: Add support for PCI ATS
   iommu/arm-smmu-v3: Disable tagged pointers

  .../devicetree/bindings/pci/pci-iommu.txt  |   8 +
  drivers/acpi/arm64/iort.c  |  10 +
  drivers/iommu/arm-smmu-v3.c| 258 -
  drivers/iommu/of_iommu.c   |   8 +
  include/linux/iommu.h  |   4 +
  include/linux/pci.h|  26 +--
  6 files changed, 293 insertions(+), 21 deletions(-)



--
Qualcomm Datacenter Technologies as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux 
Foundation Collaborative Project.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-31 Thread Xunlei Pang
On 05/31/2017 at 01:46 AM, Tom Lendacky wrote:
> On 5/25/2017 11:17 PM, Xunlei Pang wrote:
>> On 04/19/2017 at 05:21 AM, Tom Lendacky wrote:
>>> Provide support so that kexec can be used to boot a kernel when SME is
>>> enabled.
>>>
>>> Support is needed to allocate pages for kexec without encryption.  This
>>> is needed in order to be able to reboot in the kernel in the same manner
>>> as originally booted.
>>
>> Hi Tom,
>>
>> Looks like kdump will break, I didn't see the similar handling for kdump 
>> cases, see kernel:
>>  kimage_alloc_crash_control_pages(), kimage_load_crash_segment(), etc. >
>> We need to support kdump with SME, kdump 
>> kernel/initramfs/purgatory/elfcorehdr/etc
>> are all loaded into the reserved memory(see crashkernel=X) by userspace 
>> kexec-tools.
>> I think a straightforward way would be to mark the whole reserved memory 
>> range without
>> encryption before loading all the kexec segments for kdump, I guess we can 
>> handle this
>> easily in arch_kexec_unprotect_crashkres().
>
> Yes, that would work.
>
>>
>> Moreover, now that "elfcorehdr=X" is left as decrypted, it needs to be 
>> remapped to the
>> encrypted data.
>
> This is an area that I'm not familiar with, so I don't completely
> understand the flow in regards to where/when/how the ELF headers are
> copied and what needs to be done.
>
> Can you elaborate a bit on this?

"elfcorehdr" is generated by userspace 
kexec-tools(git://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git), 
it's
actually ELF CORE header data(elf header, PT_LOAD/PT_NOTE program header), see 
kexec/crashdump-elf.c::FUNC().

For kdump case, it will be put in some reserved crash memory allocated by 
kexec-tools, and passed the corresponding
start address of the allocated reserved crash memory to kdump kernel via 
"elfcorehdr=", please see kernel functions
setup_elfcorehdr() and vmcore_init() for how it is parsed by kdump kernel.

Regards,
Xunlei

>>
>>>
>>> Additionally, when shutting down all of the CPUs we need to be sure to
>>> flush the caches and then halt. This is needed when booting from a state
>>> where SME was not active into a state where SME is active (or vice-versa).
>>> Without these steps, it is possible for cache lines to exist for the same
>>> physical location but tagged both with and without the encryption bit. This
>>> can cause random memory corruption when caches are flushed depending on
>>> which cacheline is written last.
>>>
>>> Signed-off-by: Tom Lendacky 
>>> ---
>>>   arch/x86/include/asm/init.h  |1 +
>>>   arch/x86/include/asm/irqflags.h  |5 +
>>>   arch/x86/include/asm/kexec.h |8 
>>>   arch/x86/include/asm/pgtable_types.h |1 +
>>>   arch/x86/kernel/machine_kexec_64.c   |   35 
>>> +-
>>>   arch/x86/kernel/process.c|   26 +++--
>>>   arch/x86/mm/ident_map.c  |   11 +++
>>>   include/linux/kexec.h|   14 ++
>>>   kernel/kexec_core.c  |7 +++
>>>   9 files changed, 101 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
>>> index 737da62..b2ec511 100644
>>> --- a/arch/x86/include/asm/init.h
>>> +++ b/arch/x86/include/asm/init.h
>>> @@ -6,6 +6,7 @@ struct x86_mapping_info {
>>>   void *context; /* context for alloc_pgt_page */
>>>   unsigned long pmd_flag; /* page flag for PMD entry */
>>>   unsigned long offset; /* ident mapping offset */
>>> +unsigned long kernpg_flag; /* kernel pagetable flag override */
>>>   };
>>> int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t 
>>> *pgd_page,
>>> diff --git a/arch/x86/include/asm/irqflags.h 
>>> b/arch/x86/include/asm/irqflags.h
>>> index ac7692d..38b5920 100644
>>> --- a/arch/x86/include/asm/irqflags.h
>>> +++ b/arch/x86/include/asm/irqflags.h
>>> @@ -58,6 +58,11 @@ static inline __cpuidle void native_halt(void)
>>>   asm volatile("hlt": : :"memory");
>>>   }
>>>   +static inline __cpuidle void native_wbinvd_halt(void)
>>> +{
>>> +asm volatile("wbinvd; hlt" : : : "memory");
>>> +}
>>> +
>>>   #endif
>>> #ifdef CONFIG_PARAVIRT
>>> diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
>>> index 70ef205..e8183ac 100644
>>> --- a/arch/x86/include/asm/kexec.h
>>> +++ b/arch/x86/include/asm/kexec.h
>>> @@ -207,6 +207,14 @@ struct kexec_entry64_regs {
>>>   uint64_t r15;
>>>   uint64_t rip;
>>>   };
>>> +
>>> +extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
>>> +   gfp_t gfp);
>>> +#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
>>> +
>>> +extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
>>> +#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
>>> +
>>>   #endif
>>> typedef void crash_vmclear_fn(void);
>>> diff --git 

[RFCv2 1/2] acpi:iort: Add new helper function to retrieve ITS base addr from dev IORT node

2017-05-31 Thread shameer
This provides a helper function to find and retrieve the ITS
base address from the ID mappings array reference of a device
IORT node(if any).

This is used in the subsequent patch to retrieve the ITS base 
address associated with a pci dev IORT node.

Signed-off-by: shameer 
---
 drivers/acpi/arm64/iort.c| 47 +---
 drivers/irqchip/irq-gic-v3-its.c |  3 ++-
 include/linux/acpi_iort.h|  8 ++-
 3 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index c5fecf9..12d7347 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -34,6 +34,7 @@
 struct iort_its_msi_chip {
struct list_headlist;
struct fwnode_handle*fw_node;
+   u64 base_addr;
u32 translation_id;
 };
 
@@ -132,13 +133,14 @@ typedef acpi_status (*iort_find_node_callback)
 
 /**
  * iort_register_domain_token() - register domain token and related ITS ID
- * to the list from where we can get it back later on.
+ * and base address to the list from where we can get it back later on.
  * @trans_id: ITS ID.
  * @fw_node: Domain token.
  *
  * Returns: 0 on success, -ENOMEM if no memory when allocating list element
  */
-int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node)
+int iort_register_domain_token(int trans_id, u64 base,
+  struct fwnode_handle *fw_node)
 {
struct iort_its_msi_chip *its_msi_chip;
 
@@ -148,6 +150,7 @@ int iort_register_domain_token(int trans_id, struct 
fwnode_handle *fw_node)
 
its_msi_chip->fw_node = fw_node;
its_msi_chip->translation_id = trans_id;
+   its_msi_chip->base_addr = base;
 
spin_lock(_msi_chip_lock);
list_add(_msi_chip->list, _msi_chip_list);
@@ -370,7 +373,6 @@ static struct acpi_iort_node *iort_node_map_id(struct 
acpi_iort_node *node,
 
if (!node->mapping_offset || !node->mapping_count)
goto fail_map;
-
map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node,
   node->mapping_offset);
 
@@ -491,6 +493,45 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
return -ENODEV;
 }
 
+int iort_dev_find_its_base(struct device *dev, u32 req_id,
+  unsigned int idx, u64 *its_base)
+{
+   struct acpi_iort_its_group *its;
+   struct acpi_iort_node *node;
+   struct iort_its_msi_chip *its_msi_chip;
+   u32  trans_id;
+
+   node = iort_find_dev_node(dev);
+   if (!node)
+   return -ENXIO;
+
+   node = iort_node_map_id(node, req_id, NULL, IORT_MSI_TYPE);
+   if (!node)
+   return -ENXIO;
+
+   /* Move to ITS specific data */
+   its = (struct acpi_iort_its_group *)node->node_data;
+   if (idx > its->its_count) {
+   dev_err(dev, "requested ITS ID index [%d] is greater than 
available [%d]\n",
+   idx, its->its_count);
+   return -ENXIO;
+   }
+
+   trans_id = its->identifiers[idx];
+
+   spin_lock(_msi_chip_lock);
+   list_for_each_entry(its_msi_chip, _msi_chip_list, list) {
+   if (its_msi_chip->translation_id == trans_id) {
+   *its_base = its_msi_chip->base_addr;
+   spin_unlock(_msi_chip_lock);
+   return 0;
+   }
+   }
+   spin_unlock(_msi_chip_lock);
+
+   return -ENXIO;
+}
+
 /**
  * iort_dev_find_its_id() - Find the ITS identifier for a device
  * @dev: The device.
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 45ea1933..c45a2ad 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1854,7 +1854,8 @@ static int __init gic_acpi_parse_madt_its(struct 
acpi_subtable_header *header,
return -ENOMEM;
}
 
-   err = iort_register_domain_token(its_entry->translation_id, dom_handle);
+   err = iort_register_domain_token(its_entry->translation_id, res.start,
+dom_handle);
if (err) {
pr_err("ITS@%pa: Unable to register GICv3 ITS domain token (ITS 
ID %d) to IORT\n",
   , its_entry->translation_id);
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 3ff9ace..bf7b53d 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -26,7 +26,8 @@
 #define IORT_IRQ_MASK(irq) (irq & 0xULL)
 #define IORT_IRQ_TRIGGER_MASK(irq) ((irq >> 32) & 0xULL)
 
-int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node);
+int iort_register_domain_token(int trans_id, u64 base,
+  struct fwnode_handle *fw_node);
 void iort_deregister_domain_token(int trans_id);
 struct 

[RFCv2 2/2] iommu/arm-smmu-v3:Enable ACPI based HiSilicon erratum 161010801

2017-05-31 Thread shameer
The HiSilicon erratum 161010801 describes the limitation of HiSilicon
platforms Hip06/Hip07 to support the SMMU mappings for MSI transactions.

On these platforms GICv3 ITS translator is presented with the deviceID
by extending the MSI payload data to 64 bits to include the deviceID.
Hence, the PCIe controller on this platforms has to differentiate the
MSI payload against other DMA payload and has to modify the MSI payload.
This basically makes it difficult for this platforms to have a SMMU
translation for MSI.

This patch implements a ACPI table based quirk to reserve the hw msi
regions in the smmu-v3 driver which means these address regions will
not be translated and will be excluded from iova allocations.

The HW ITS address region associated with the dev is retrieved
using a new helper function added in the IORT code.

Signed-off-by: shameer 
---
 drivers/iommu/arm-smmu-v3.c | 49 ++---
 1 file changed, 46 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index abe4b88..3767526 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -597,6 +597,7 @@ struct arm_smmu_device {
u32 features;
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
+#define ARM_SMMU_OPT_RESV_HW_MSI   (1 << 1)
u32 options;
 
struct arm_smmu_cmdqcmdq;
@@ -1755,6 +1756,38 @@ static bool arm_smmu_sid_in_range(struct arm_smmu_device 
*smmu, u32 sid)
 
 static struct iommu_ops arm_smmu_ops;
 
+#ifdef CONFIG_ACPI
+static struct iommu_resv_region *arm_smmu_acpi_alloc_hw_msi(struct device *dev)
+{
+   struct iommu_resv_region *region;
+   struct  irq_domain *irq_dom;
+   int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+   u64 base;
+
+   irq_dom = pci_msi_get_device_domain(to_pci_dev(dev));
+   if (irq_dom) {
+   int ret;
+   u32 rid;
+
+   rid = pci_msi_domain_get_msi_rid(irq_dom, to_pci_dev(dev));
+   ret = iort_dev_find_its_base(dev, rid, 0, );
+   if (!ret) {
+   dev_info(dev, "SMMUv3:HW MSI resv addr 0x%pa\n", );
+   region = iommu_alloc_resv_region(base, SZ_128K,
+prot, IOMMU_RESV_MSI);
+   return region;
+   }
+   }
+
+   return NULL;
+}
+#else
+static struct iommu_resv_region *arm_smmu_acpi_alloc_hw_msi(struct device *dev)
+{
+   return NULL;
+}
+#endif
+
 static int arm_smmu_add_device(struct device *dev)
 {
int i, ret;
@@ -1903,11 +1936,20 @@ static int arm_smmu_of_xlate(struct device *dev, struct 
of_phandle_args *args)
 static void arm_smmu_get_resv_regions(struct device *dev,
  struct list_head *head)
 {
-   struct iommu_resv_region *region;
+   struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+   struct iommu_resv_region *region = NULL;
int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+   struct arm_smmu_device *smmu;
+
+   smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
 
-   region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
-prot, IOMMU_RESV_SW_MSI);
+   if (smmu && (smmu->options & ARM_SMMU_OPT_RESV_HW_MSI) &&
+ dev_is_pci(dev))
+   region = arm_smmu_acpi_alloc_hw_msi(dev);
+
+   if (!region)
+   region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
+prot, IOMMU_RESV_SW_MSI);
if (!region)
return;
 
@@ -2611,6 +2653,7 @@ static void parse_driver_acpi_options(struct 
acpi_iort_smmu_v3 *iort_smmu,
switch (iort_smmu->model) {
case ACPI_IORT_SMMU_HISILICON_HI161X:
smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
+   smmu->options |= ARM_SMMU_OPT_RESV_HW_MSI;
break;
default:
break;
-- 
1.9.1


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFCv2 0/2] iommu/smmu-v3: Workaround for hisilicon 161010801 erratum(reserve HW MSI)

2017-05-31 Thread shameer
On certain HiSilicon platforms (Hip06/Hip07) the GIC ITS and
PCIe RC deviates from the standard implementation and this breaks
PCIe MSI functionality when SMMU is enabled.

The HiSilicon erratum 161010801 describes this limitation of certain
HiSilicon platforms to support the SMMU mappings for MSI transactions.
On these platforms GICv3 ITS translator is presented with the deviceID
by extending the MSI payload data to 64 bits to include the deviceID.
Hence, the PCIe controller on this platforms has to differentiate the
MSI payload against other DMA payload and has to modify the MSI payload.
This basically makes it difficult for this platforms to have a SMMU
translation for MSI.

This patch implements a ACPI table based quirk to reserve the hw msi
regions in the smmu-v3 driver which means these address regions will
not be translated and will be excluded from iova allocations.

To implement this quirk, the following changes are incorporated:

1. Added a generic helper function to IORT code to retrieve the
   associated ITS base address from a device IORT node.
2. Added quirk to SMMUv3 to retrieve the HW ITS address and replace
   the default SW MSI reserve address based on the IORT SMMU model.

This is based on the following patches:
1. https://patchwork.kernel.org/patch/9740733/
2. https://patchwork.kernel.org/patch/9730491/

Thanks,
Shameer

RFC v1 ---> v2
 Based on Robin's review comments,
:Removed  the generic erratum framework.
:Using IORT/MADT tables to retrieve the ITS base addr instead
 of vendor specific CSRT table.

shameer (2):
  acpi:iort: Add new helper function to retrieve ITS base addr from IORT
node
  iommu/arm-smmu-v3:Enable ACPI based HiSilicon erratum 161010801

 drivers/acpi/arm64/iort.c| 47 +++---
 drivers/iommu/arm-smmu-v3.c  | 49 +---
 drivers/irqchip/irq-gic-v3-its.c |  3 ++-
 include/linux/acpi_iort.h|  8 ++-
 4 files changed, 99 insertions(+), 8 deletions(-)

-- 
1.9.1


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 32/32] x86/mm: Add support to make use of Secure Memory Encryption

2017-05-31 Thread Borislav Petkov
On Wed, May 31, 2017 at 08:37:50AM -0500, Tom Lendacky wrote:
> I like keeping the command line option and the values together. It may
> not look the greatest but I like it more than defining the command line
> option in head_64.S and passing it in as an argument.
> 
> OTOH, I don't think the rip-relative addressing was that bad, I can
> always go back to that...

Yeah, no nice solution here. Having gone full circle, the rip-relative
thing doesn't look all that bad, all of a sudden. I'd let you decide
what to do...

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC,20/30] iommu/arm-smmu-v3: Enable PCI PASID in masters

2017-05-31 Thread Sinan Kaya
Hi Jean-Philippe,

On 2/27/2017 2:54 PM, Jean-Philippe Brucker wrote:
> Enable PASID for PCI devices that support it.
> 
> Signed-off-by: Jean-Philippe Brucker 
> ---
>  drivers/iommu/arm-smmu-v3.c | 66 
> ++---
>  1 file changed, 63 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 499dc1cd07eb..37fd061405e9 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -730,6 +730,8 @@ struct arm_smmu_master_data {
>  
>   struct arm_smmu_stream  *streams;
>   struct rb_root  contexts;
> +
> + u32 avail_contexts;
>  };
>  

I know that you are doing some code restructuring. As I was looking at the 
amdkfd
driver, I realized that there is quite a bit of PASID, ATS and PRI work here 
that
can be plumbed into the framework you are building. 

https://github.com/torvalds/linux/tree/master/drivers/gpu/drm/amd/amdkfd

I wanted to share this with if you were aware of this or not. Functions of 
interest
are:

amd_iommu_init_device
amd_iommu_free_device
amd_iommu_bind_pasid
amd_iommu_set_invalid_ppr_cb
amd_iommu_unbind_pasid
amd_iommu_device_info

Sinan

-- 
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm 
Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux 
Foundation Collaborative Project.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 32/32] x86/mm: Add support to make use of Secure Memory Encryption

2017-05-31 Thread Tom Lendacky

On 5/31/2017 3:49 AM, Borislav Petkov wrote:

On Tue, May 30, 2017 at 10:37:03AM -0500, Tom Lendacky wrote:

I can define the command line option and the "on" and "off" values as
character buffers in the function and initialize them on a per character
basis (using a static string causes the same issues as referencing a
string constant), i.e.:

char cmdline_arg[] = {'m', 'e', 'm', '_', 'e', 'n', 'c', 'r', 'y', 'p', 't', 
'\0'};
char cmdline_off[] = {'o', 'f', 'f', '\0'};
char cmdline_on[] = {'o', 'n', '\0'};

It doesn't look the greatest, but it works and removes the need for the
rip-relative addressing.


Well, I'm not thrilled about this one either. It's like being between a
rock and a hard place. :-\

On the one hand, we need the encryption mask before we do the fixups and
OTOH we need to do the fixups in order to access the strings properly.
Yuck.

Well, the only thing I can think of right now is maybe define
"mem_encrypt=" at the end of head_64.S and pass it in from asm to
sme_enable() and then do the "on"/"off" comparsion with local char
buffers. That could make it less ugly...


I like keeping the command line option and the values together. It may
not look the greatest but I like it more than defining the command line
option in head_64.S and passing it in as an argument.

OTOH, I don't think the rip-relative addressing was that bad, I can
always go back to that...

Thanks,
Tom




___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 29/32] x86/mm: Add support to encrypt the kernel in-place

2017-05-31 Thread Tom Lendacky

On 5/31/2017 4:51 AM, Borislav Petkov wrote:

On Tue, May 30, 2017 at 11:39:07AM -0500, Tom Lendacky wrote:

Yes, it's from objtool:

arch/x86/mm/mem_encrypt_boot.o: warning: objtool: .text+0xd2: return
instruction outside of a callable function


Oh, well, let's make it a global symbol then. Who knows, we might have
to live-patch it someday :-)


Can do.

Thanks,
Tom



---
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index fb58f9f953e3..7720b0050840 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -47,9 +47,9 @@ ENTRY(sme_encrypt_execute)
movq%rdx, %r12  /* Kernel length */
  
  	/* Copy encryption routine into the workarea */

-   movq%rax, %rdi  /* Workarea encryption routine */
-   leaq.Lenc_start(%rip), %rsi /* Encryption routine */
-   movq$(.Lenc_stop - .Lenc_start), %rcx   /* Encryption routine 
length */
+   movq%rax, %rdi  /* Workarea encryption 
routine */
+   leaq__enc_copy(%rip), %rsi  /* Encryption routine */
+   movq$(.L__enc_copy_end - __enc_copy), %rcx  /* Encryption routine 
length */
rep movsb
  
  	/* Setup registers for call */

@@ -70,8 +70,7 @@ ENTRY(sme_encrypt_execute)
ret
  ENDPROC(sme_encrypt_execute)
  
-.Lenc_start:

-ENTRY(sme_enc_routine)
+ENTRY(__enc_copy)
  /*
   * Routine used to encrypt kernel.
   *   This routine must be run outside of the kernel proper since
@@ -147,5 +146,5 @@ ENTRY(sme_enc_routine)
wrmsr
  
  	ret

-ENDPROC(sme_enc_routine)
-.Lenc_stop:
+.L__enc_copy_end:
+ENDPROC(__enc_copy)


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: Device address specific mapping of arm,mmu-500

2017-05-31 Thread Will Deacon
On Tue, May 30, 2017 at 11:13:36PM -0700, Ray Jui wrote:
> I did a little more digging myself and I think I now understand what you
> meant by identity mapping, i.e., configuring the MMU-500 with 1:1 mapping
> between the DMA address and the IOVA address.
> 
> I think that should work. In the end, due to this MSI write parsing issue in
> our PCIe controller, the reason to use IOMMU is to allow the cache
> attributes (AxCACHE) of the MSI writes towards GICv3 ITS to be modified by
> the IOMMU to be device type, while leaving the rest of inbound reads/writes
> from/to DDR with more optimized cache attributes setting, to allow I/O
> coherency to be still enabled for the PCIe controller. In fact, the PCIe
> controller itself is fully capable of DMA to/from the full address space of
> our SoC including both DDR and any device memory.
> 
> The 1:1 mapping will still pose some translation overhead like you
> suggested; however, the overhead of allocating page tables and locking will
> be gone. This sounds like the best possible option I have currently.

It might end up being pretty invasive to work around a hardware bug, so
we'll have to see what it looks like. Ideally, we could just use the SMMU
for everything as-is and work on clawing back the lost performance (it
should be possible to get ~95% of the perf if we sort out the locking, which
we *are* working on).

> May I ask, how do I start to try to get this identity mapping to work as an
> experiment and proof of concept? Any pointer or advise is highly appreciated
> as you can see I'm not very experienced with this. I found Will recently
> added the IOMMU_DOMAIN_IDENTITY support to the arm-smmu driver. But I
> suppose that is to bypass the SMMU completely, instead of still going
> through the MMU with 1:1 translation. Is my understanding correct?

Yes, I don't think IOMMU_DOMAIN_IDENTITY is what you need because you
actally need per-page control of memory attributes.

Robin might have a better idea, but I think you'll have to hack dma-iommu.c
so that you can have a version of the DMA ops that:

  * Initialises the identity map (I guess as normal WB cacheable?)
  * Reserves and maps the MSI region appropriately
  * Just returns the physical address for the dma address for map requests
(return error for the MSI region)
  * Does nothing for unmap requests

But my strong preference would be to fix the locking overhead from the
SMMU so that the perf hit is acceptable.

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/2] ACPICA: IORT: Update SMMU models for IORT rev. C

2017-05-31 Thread Robin Murphy
On 30/05/17 10:12, Joerg Roedel wrote:
> On Mon, May 22, 2017 at 04:06:37PM +0100, Robin Murphy wrote:
>> IORT revision C has been published with a number of new SMMU
>> implementation identifiers. Since IORT doesn't have any way of falling
>> back to a more generic model code, we really need Linux to know about
>> these before vendors start updating their firmware tables to use them.
>>
>> CC: Rafael J. Wysocki 
>> CC: Robert Moore 
>> CC: Lv Zheng 
>> Acked-by: Robert Richter 
>> Tested-by: Robert Richter 
>> Signed-off-by: Robin Murphy 
>> ---
>>
>> v2: Update more comments, add Robert's tags.
> 
> I generally prefer 'Fixes'-tags, can you please add them too?

This patch isn't a fix, though, it's merely adding some new stuff from a
new release of the IORT spec.

Either way, since I discovered we do actually have approval to
contribute to ACPICA directly, I now intend to route the header change
that way per Rafaels' preference[1] - patch 2/2 will take care of itself
in the meantime.

Thanks,
Robin.

[1]:https://www.mail-archive.com/iommu@lists.linux-foundation.org/msg17602.html

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 17/32] x86/mm: Add support to access boot related data in the clear

2017-05-31 Thread Borislav Petkov
On Tue, May 30, 2017 at 11:46:52AM -0500, Tom Lendacky wrote:
> Check if you have CONFIG_DEBUG_SECTION_MISMATCH=y

$ grep MISM .config
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y

Still no joy.

Can you give me your .config?

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-31 Thread Borislav Petkov
On Tue, May 30, 2017 at 12:46:14PM -0500, Tom Lendacky wrote:
> This is an area that I'm not familiar with, so I don't completely
> understand the flow in regards to where/when/how the ELF headers are
> copied and what needs to be done.

So my suggestion is still to put kexec/kdump on the backburner for now
and concentrate on the 30-ish patchset first. Once they're done, we can
start dealing with it. Ditto with the IOMMU side of things. One thing at
a time.

Thanks.

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 29/32] x86/mm: Add support to encrypt the kernel in-place

2017-05-31 Thread Borislav Petkov
On Tue, May 30, 2017 at 11:39:07AM -0500, Tom Lendacky wrote:
> Yes, it's from objtool:
> 
> arch/x86/mm/mem_encrypt_boot.o: warning: objtool: .text+0xd2: return
> instruction outside of a callable function

Oh, well, let's make it a global symbol then. Who knows, we might have
to live-patch it someday :-)

---
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index fb58f9f953e3..7720b0050840 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -47,9 +47,9 @@ ENTRY(sme_encrypt_execute)
movq%rdx, %r12  /* Kernel length */
 
/* Copy encryption routine into the workarea */
-   movq%rax, %rdi  /* Workarea encryption routine */
-   leaq.Lenc_start(%rip), %rsi /* Encryption routine */
-   movq$(.Lenc_stop - .Lenc_start), %rcx   /* Encryption routine 
length */
+   movq%rax, %rdi  /* Workarea encryption 
routine */
+   leaq__enc_copy(%rip), %rsi  /* Encryption routine */
+   movq$(.L__enc_copy_end - __enc_copy), %rcx  /* Encryption routine 
length */
rep movsb
 
/* Setup registers for call */
@@ -70,8 +70,7 @@ ENTRY(sme_encrypt_execute)
ret
 ENDPROC(sme_encrypt_execute)
 
-.Lenc_start:
-ENTRY(sme_enc_routine)
+ENTRY(__enc_copy)
 /*
  * Routine used to encrypt kernel.
  *   This routine must be run outside of the kernel proper since
@@ -147,5 +146,5 @@ ENTRY(sme_enc_routine)
wrmsr
 
ret
-ENDPROC(sme_enc_routine)
-.Lenc_stop:
+.L__enc_copy_end:
+ENDPROC(__enc_copy)

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu