Re: [PATCH 2/4] iommu: Implement common IOMMU ops for DMA mapping

2015-05-29 Thread Yingjoe Chen

Hi Robin,

More info, hope this make it clearer. We are calling dma_map_sg_attrs
with the following 2 sg. With IOMMU, we are expecting it got merged into
1 contiguous va range, but instead we get 2 va range.

sg0 dma_address 0xfeeddc00 size 0x400, offset 0xc00
sg1 dma_address 0xfeede000 size 0x1000, offset 0x0

Joe.C

On Fri, 2015-05-29 at 13:26 +0800, Yong Wu wrote:
 Hi Robin,
 Thanks.
 
 While we test venc in v4l2, we get a problem:
 When we enter the funtion[0], it will be break unexpectedly in the
 funcion[1] while the offset of sg table is not zero. It is ok if the
 offset is zero. Then I add more log in dma-iommu.c, please help check
 below.
 All we tested it based on dma v2. and have not tested it on v3 yet.
 The code of iommu-map-sg seems the same. if it's fixed in v3, I'm very
 sorry. The map_sg in mtk-iommu use default_iommu_map_sg.
 Any question please tell me, Thanks very much. 
 
 [0]http://lxr.free-electrons.com/source/drivers/media/v4l2-core/videobuf2-dma-contig.c#L564

 [1]http://lxr.free-electrons.com/source/drivers/media/v4l2-core/videobuf2-dma-contig.c#L70
 
 
 On Wed, 2015-05-27 at 15:09 +0100, Robin Murphy wrote:
  Taking inspiration from the existing arch/arm code, break out some
  generic functions to interface the DMA-API to the IOMMU-API. This will
  do the bulk of the heavy lifting for IOMMU-backed dma-mapping.
  
  Signed-off-by: Robin Murphy robin.mur...@arm.com
  ---
   drivers/iommu/Kconfig |   7 +
   drivers/iommu/Makefile|   1 +
   drivers/iommu/dma-iommu.c | 560 
  ++
   include/linux/dma-iommu.h |  94 
   4 files changed, 662 insertions(+)
   create mode 100644 drivers/iommu/dma-iommu.c
   create mode 100644 include/linux/dma-iommu.h
  
 [snip]
  +static int __finalise_sg(struct device *dev, struct scatterlist *sg, int 
  nents,
  +   dma_addr_t dma_addr)
  +{
  +   struct scatterlist *s, *seg = sg;
  +   unsigned long seg_mask = dma_get_seg_boundary(dev);
  +   unsigned int max_len = dma_get_max_seg_size(dev);
  +   unsigned int seg_len = 0, seg_dma = 0;
  +   int i, count = 1;
  +
  +   for_each_sg(sg, s, nents, i) {
  +   /* Un-swizzling the fields here, hence the naming mismatch */
  +   unsigned int s_offset = sg_dma_address(s);
  +   unsigned int s_length = sg_dma_len(s);
  +   unsigned int s_dma_len = s-length;
  +
  +   s-offset = s_offset;
  +   s-length = s_length;
  +   sg_dma_address(s) = DMA_ERROR_CODE;
  +   sg_dma_len(s) = 0;
  +
  +   if (seg_len  (seg_dma + seg_len == dma_addr + s_offset) 
  +   (seg_len + s_dma_len = max_len) 
  +   ((seg_dma  seg_mask) = seg_mask - (seg_len + s_length))
  +  ) {
  +   sg_dma_len(seg) += s_dma_len;
  +   } else {
  +   if (seg_len) {
  +   seg = sg_next(seg);
  +   count++;
  +   }
  +   sg_dma_len(seg) = s_dma_len;
  +   sg_dma_address(seg) = dma_addr + s_offset;
Here the value of sg_dma_address have added s_offset, but
 sg_dma_len(seg) still is s_dma_len.
In the first loop, s_dma_len is from s-length which is alignd by
 s_length = iova_align(iovad, s_length + s_offset); in
 the interface iommu_dma_map_sg.
  +
  +   seg_len = s_offset;
  +   seg_dma = dma_addr + s_offset;
  +   }
  +   seg_len += s_length;
  +   dma_addr += s_dma_len;
  +   }
  +   return count;
  +}
  +
  +static void __invalidate_sg(struct scatterlist *sg, int nents)
  +{
  +   struct scatterlist *s;
  +   int i;
  +
  +   for_each_sg(sg, s, nents, i) {
  +   if (sg_dma_address(s) != DMA_ERROR_CODE)
  +   s-offset = sg_dma_address(s);
  +   if (sg_dma_len(s))
  +   s-length = sg_dma_len(s);
  +   sg_dma_address(s) = DMA_ERROR_CODE;
  +   sg_dma_len(s) = 0;
  +   }
  +}
  +
  +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
  +   int nents, int prot, bool coherent)
  +{
  +   struct iommu_dma_domain *dom = arch_get_dma_domain(dev);
  +   struct iova_domain *iovad = dom-iovad;
  +   struct iova *iova;
  +   struct scatterlist *s;
  +   dma_addr_t dma_addr;
  +   size_t iova_len = 0;
  +   int i;
  +
  +   /*
  +* Work out how much IOVA space we need, and align the segments to
  +* IOVA granules for the IOMMU driver to handle. With some clever
  +* trickery we can modify the list in a reversible manner.
  +*/
  +   for_each_sg(sg, s, nents, i) {
  +   size_t s_offset = iova_offset(iovad, s-offset);
  +   size_t s_length = s-length;
  +
  +   sg_dma_address(s) = s-offset;
  +   sg_dma_len(s) = s_length;
  +   s-offset -= s_offset;
  +   s_length = iova_align(iovad, s_length + s_offset);
  +   s-length = 

Re: [PATCH 2/4] iommu: Implement common IOMMU ops for DMA mapping

2015-05-29 Thread Robin Murphy

On 29/05/15 06:26, Yong Wu wrote:

Hi Robin,
 Thanks.

 While we test venc in v4l2, we get a problem:


Thanks as always for testing!


 When we enter the funtion[0], it will be break unexpectedly in the
funcion[1] while the offset of sg table is not zero. It is ok if the
offset is zero. Then I add more log in dma-iommu.c, please help check
below.


Bah, non-zero offset was about the only scatterlist case I wasn't able 
to get out of the EHCI driver...



 All we tested it based on dma v2. and have not tested it on v3 yet.
The code of iommu-map-sg seems the same. if it's fixed in v3, I'm very
sorry. The map_sg in mtk-iommu use default_iommu_map_sg.
 Any question please tell me, Thanks very much.

[0]http://lxr.free-electrons.com/source/drivers/media/v4l2-core/videobuf2-dma-contig.c#L564
 
[1]http://lxr.free-electrons.com/source/drivers/media/v4l2-core/videobuf2-dma-contig.c#L70


On Wed, 2015-05-27 at 15:09 +0100, Robin Murphy wrote:

Taking inspiration from the existing arch/arm code, break out some
generic functions to interface the DMA-API to the IOMMU-API. This will
do the bulk of the heavy lifting for IOMMU-backed dma-mapping.

Signed-off-by: Robin Murphy robin.mur...@arm.com
---
  drivers/iommu/Kconfig |   7 +
  drivers/iommu/Makefile|   1 +
  drivers/iommu/dma-iommu.c | 560 ++
  include/linux/dma-iommu.h |  94 
  4 files changed, 662 insertions(+)
  create mode 100644 drivers/iommu/dma-iommu.c
  create mode 100644 include/linux/dma-iommu.h


[snip]

+static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
+   dma_addr_t dma_addr)
+{
+   struct scatterlist *s, *seg = sg;
+   unsigned long seg_mask = dma_get_seg_boundary(dev);
+   unsigned int max_len = dma_get_max_seg_size(dev);
+   unsigned int seg_len = 0, seg_dma = 0;
+   int i, count = 1;
+
+   for_each_sg(sg, s, nents, i) {
+   /* Un-swizzling the fields here, hence the naming mismatch */
+   unsigned int s_offset = sg_dma_address(s);
+   unsigned int s_length = sg_dma_len(s);
+   unsigned int s_dma_len = s-length;
+
+   s-offset = s_offset;
+   s-length = s_length;
+   sg_dma_address(s) = DMA_ERROR_CODE;
+   sg_dma_len(s) = 0;
+
+   if (seg_len  (seg_dma + seg_len == dma_addr + s_offset) 
+   (seg_len + s_dma_len = max_len) 
+   ((seg_dma  seg_mask) = seg_mask - (seg_len + s_length))
+  ) {
+   sg_dma_len(seg) += s_dma_len;
+   } else {
+   if (seg_len) {
+   seg = sg_next(seg);
+   count++;
+   }
+   sg_dma_len(seg) = s_dma_len;


Does changing this to
sg_dma_len(seg) = s_dma_len - s_offset;

make things behave as expected? Since the new unmap code no longer 
relies on adding up the individual segments to equal the IOVA size I 
don't _think_ that'll break anything here.


Robin.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/4] iommu: Implement common IOMMU ops for DMA mapping

2015-05-28 Thread Yong Wu
Hi Robin,
Thanks.

While we test venc in v4l2, we get a problem:
When we enter the funtion[0], it will be break unexpectedly in the
funcion[1] while the offset of sg table is not zero. It is ok if the
offset is zero. Then I add more log in dma-iommu.c, please help check
below.
All we tested it based on dma v2. and have not tested it on v3 yet.
The code of iommu-map-sg seems the same. if it's fixed in v3, I'm very
sorry. The map_sg in mtk-iommu use default_iommu_map_sg.
Any question please tell me, Thanks very much. 

[0]http://lxr.free-electrons.com/source/drivers/media/v4l2-core/videobuf2-dma-contig.c#L564
 
[1]http://lxr.free-electrons.com/source/drivers/media/v4l2-core/videobuf2-dma-contig.c#L70


On Wed, 2015-05-27 at 15:09 +0100, Robin Murphy wrote:
 Taking inspiration from the existing arch/arm code, break out some
 generic functions to interface the DMA-API to the IOMMU-API. This will
 do the bulk of the heavy lifting for IOMMU-backed dma-mapping.
 
 Signed-off-by: Robin Murphy robin.mur...@arm.com
 ---
  drivers/iommu/Kconfig |   7 +
  drivers/iommu/Makefile|   1 +
  drivers/iommu/dma-iommu.c | 560 
 ++
  include/linux/dma-iommu.h |  94 
  4 files changed, 662 insertions(+)
  create mode 100644 drivers/iommu/dma-iommu.c
  create mode 100644 include/linux/dma-iommu.h
 
[snip]
 +static int __finalise_sg(struct device *dev, struct scatterlist *sg, int 
 nents,
 + dma_addr_t dma_addr)
 +{
 + struct scatterlist *s, *seg = sg;
 + unsigned long seg_mask = dma_get_seg_boundary(dev);
 + unsigned int max_len = dma_get_max_seg_size(dev);
 + unsigned int seg_len = 0, seg_dma = 0;
 + int i, count = 1;
 +
 + for_each_sg(sg, s, nents, i) {
 + /* Un-swizzling the fields here, hence the naming mismatch */
 + unsigned int s_offset = sg_dma_address(s);
 + unsigned int s_length = sg_dma_len(s);
 + unsigned int s_dma_len = s-length;
 +
 + s-offset = s_offset;
 + s-length = s_length;
 + sg_dma_address(s) = DMA_ERROR_CODE;
 + sg_dma_len(s) = 0;
 +
 + if (seg_len  (seg_dma + seg_len == dma_addr + s_offset) 
 + (seg_len + s_dma_len = max_len) 
 + ((seg_dma  seg_mask) = seg_mask - (seg_len + s_length))
 +) {
 + sg_dma_len(seg) += s_dma_len;
 + } else {
 + if (seg_len) {
 + seg = sg_next(seg);
 + count++;
 + }
 + sg_dma_len(seg) = s_dma_len;
 + sg_dma_address(seg) = dma_addr + s_offset;
   Here the value of sg_dma_address have added s_offset, but
sg_dma_len(seg) still is s_dma_len.
   In the first loop, s_dma_len is from s-length which is alignd by
s_length = iova_align(iovad, s_length + s_offset); in
the interface iommu_dma_map_sg.
 +
 + seg_len = s_offset;
 + seg_dma = dma_addr + s_offset;
 + }
 + seg_len += s_length;
 + dma_addr += s_dma_len;
 + }
 + return count;
 +}
 +
 +static void __invalidate_sg(struct scatterlist *sg, int nents)
 +{
 + struct scatterlist *s;
 + int i;
 +
 + for_each_sg(sg, s, nents, i) {
 + if (sg_dma_address(s) != DMA_ERROR_CODE)
 + s-offset = sg_dma_address(s);
 + if (sg_dma_len(s))
 + s-length = sg_dma_len(s);
 + sg_dma_address(s) = DMA_ERROR_CODE;
 + sg_dma_len(s) = 0;
 + }
 +}
 +
 +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 + int nents, int prot, bool coherent)
 +{
 + struct iommu_dma_domain *dom = arch_get_dma_domain(dev);
 + struct iova_domain *iovad = dom-iovad;
 + struct iova *iova;
 + struct scatterlist *s;
 + dma_addr_t dma_addr;
 + size_t iova_len = 0;
 + int i;
 +
 + /*
 +  * Work out how much IOVA space we need, and align the segments to
 +  * IOVA granules for the IOMMU driver to handle. With some clever
 +  * trickery we can modify the list in a reversible manner.
 +  */
 + for_each_sg(sg, s, nents, i) {
 + size_t s_offset = iova_offset(iovad, s-offset);
 + size_t s_length = s-length;
 +
 + sg_dma_address(s) = s-offset;
 + sg_dma_len(s) = s_length;
 + s-offset -= s_offset;
 + s_length = iova_align(iovad, s_length + s_offset);
 + s-length = s_length;
At the begging, s-length is the length of valid data. but it's aligned
here.
 +
 + iova_len += s_length;
 + }
 +
 + iova = __alloc_iova(dev, iova_len, coherent);
 + if (!iova)
 + goto out_restore_sg;
 +
 + /*
 +  * We'll leave any physical concatenation to the IOMMU driver's
 +  * implementation - it 

[PATCH 2/4] iommu: Implement common IOMMU ops for DMA mapping

2015-05-27 Thread Robin Murphy
Taking inspiration from the existing arch/arm code, break out some
generic functions to interface the DMA-API to the IOMMU-API. This will
do the bulk of the heavy lifting for IOMMU-backed dma-mapping.

Signed-off-by: Robin Murphy robin.mur...@arm.com
---
 drivers/iommu/Kconfig |   7 +
 drivers/iommu/Makefile|   1 +
 drivers/iommu/dma-iommu.c | 560 ++
 include/linux/dma-iommu.h |  94 
 4 files changed, 662 insertions(+)
 create mode 100644 drivers/iommu/dma-iommu.c
 create mode 100644 include/linux/dma-iommu.h

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 1ae4e54..9107b6e 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -48,6 +48,13 @@ config OF_IOMMU
def_bool y
depends on OF  IOMMU_API
 
+# IOMMU-agnostic DMA-mapping layer
+config IOMMU_DMA
+   bool
+   depends on NEED_SG_DMA_LENGTH
+   select IOMMU_API
+   select IOMMU_IOVA
+
 config FSL_PAMU
bool Freescale IOMMU support
depends on PPC32
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 080ffab..574b241 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
+obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
 obj-$(CONFIG_IOMMU_IOVA) += iova.o
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
new file mode 100644
index 000..5b2b065
--- /dev/null
+++ b/drivers/iommu/dma-iommu.c
@@ -0,0 +1,560 @@
+/*
+ * A fairly generic DMA-API to IOMMU-API glue layer.
+ *
+ * Copyright (C) 2014-2015 ARM Ltd.
+ *
+ * based in part on arch/arm/mm/dma-mapping.c:
+ * Copyright (C) 2000-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#define pr_fmt(fmt)%s:  fmt, __func__
+
+#include linux/device.h
+#include linux/dma-iommu.h
+#include linux/huge_mm.h
+#include linux/iommu.h
+#include linux/iova.h
+#include linux/mm.h
+
+int iommu_dma_init(void)
+{
+   return iommu_iova_cache_init();
+}
+
+struct iommu_dma_domain {
+   struct iommu_domain *domain;
+   struct iova_domain *iovad;
+   struct kref kref;
+};
+
+/**
+ * iommu_dma_create_domain - Create a DMA mapping domain
+ * @ops: iommu_ops representing the IOMMU backing this domain. It is down to
+ *   the IOMMU driver whether a domain may span multiple IOMMU instances
+ * @base: IOVA at which the mappable address space starts
+ * @size: Size of IOVA space
+ *
+ * @base and @size should be exact multiples of IOMMU page granularity to
+ * avoid rounding surprises. If necessary, we reserve the page at address 0
+ * to ensure it is an invalid IOVA.
+ *
+ * Return: Pointer to a domain initialised with the given IOVA range,
+ * or NULL on failure. If successful, the caller holds an initial
+ * reference, which may be released with iommu_dma_release_domain()
+ * once a device is attached.
+ */
+struct iommu_dma_domain *iommu_dma_create_domain(const struct iommu_ops *ops,
+   dma_addr_t base, u64 size)
+{
+   struct iommu_dma_domain *dom;
+   struct iommu_domain *domain;
+   struct iova_domain *iovad;
+   unsigned long order, base_pfn, end_pfn;
+
+   dom = kzalloc(sizeof(*dom), GFP_KERNEL);
+   if (!dom)
+   return NULL;
+   /*
+* HACK(sort of): These domains currently belong to this layer and are
+* opaque from outside it, so they are unmanaged by the IOMMU API
+* itself. Once we have default domain support worked out, then we can
+* turn things inside out and put these inside managed IOMMU domains...
+*/
+   domain = ops-domain_alloc(IOMMU_DOMAIN_UNMANAGED);
+   if (!domain)
+   goto out_free_dma_domain;
+
+   domain-ops = ops;
+   domain-type = IOMMU_DOMAIN_UNMANAGED;
+
+   /* Use the smallest supported page size for IOVA granularity */
+   order = __ffs(ops-pgsize_bitmap);
+   base_pfn = max_t(unsigned long, 1, base  order);
+   end_pfn = (base + size - 1)  order;
+
+   /* Check the domain allows at least some access to the device... */
+   if (domain-geometry.force_aperture) {
+   if (base  domain-geometry.aperture_end ||
+   base + size =