Re: [PATCH v2 1/3] drm/prime: Iterate SG DMA addresses separately

2018-04-17 Thread Deucher, Alexander
+ Suravee


From: Robin Murphy <robin.mur...@arm.com>
Sent: Tuesday, April 17, 2018 2:22:46 PM
To: Koenig, Christian; amd-...@lists.freedesktop.org; 
dri-devel@lists.freedesktop.org
Cc: ok...@codeaurora.org; Deucher, Alexander
Subject: Re: [PATCH v2 1/3] drm/prime: Iterate SG DMA addresses separately

On 17/04/18 17:29, Christian König wrote:
> Am 17.04.2018 um 17:58 schrieb Robin Murphy:
>> For dma_map_sg(), DMA API implementations are free to merge consecutive
>> segments into a single DMA mapping if conditions are suitable, thus the
>> resulting DMA addresses which drm_prime_sg_to_page_addr_arrays()
>> iterates may be packed into fewer entries than ttm->sg->nents implies.
>>
>> The current implementation does not account for this, meaning that its
>> callers either have to reject the 0 < count < nents case or risk getting
>> bogus DMA addresses beyond the first segment. Fortunately this is quite
>> easy to handle without having to rejig structures to also store the
>> mapped count, since the total DMA length should still be equal to the
>> total buffer length. All we need is a second scatterlist cursor to
>> iterate through the DMA addresses independently of the page addresses.
>>
>> Signed-off-by: Robin Murphy <robin.mur...@arm.com>
>
> Reviewed-by: Christian König <christian.koe...@amd.com> for the whole
> series.

Thanks Christian.

FWIW, the following *completely untested* hack should in theory give the
AMD IOMMU similar segment-merging behaviour to the arm64 IOMMU DMA ops,
if that helps widen the scope for testing/investigation (I have neither
an AMD/ATI graphics card nor a PCIe-capable arm64 box to hand just at
the moment).

Robin.

->8-
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 2a99f0f14795..60b0e495b567 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2489,11 +2489,11 @@ static int map_sg(struct device *dev, struct
scatterlist *sglist,
   int nelems, enum dma_data_direction direction,
   unsigned long attrs)
  {
-   int mapped_pages = 0, npages = 0, prot = 0, i;
+   int mapped_pages = 0, npages = 0, prot = 0, i, count;
 struct protection_domain *domain;
 struct dma_ops_domain *dma_dom;
-   struct scatterlist *s;
-   unsigned long address;
+   struct scatterlist *s, *d;
+   unsigned long address, max_seg;
 u64 dma_mask;

 domain = get_domain(dev);
@@ -2535,7 +2535,28 @@ static int map_sg(struct device *dev, struct
scatterlist *sglist,
 s->dma_length   = s->length;
 }

-   return nelems;
+   d = sglist;
+   max_seg = dma_get_max_seg_size(dev);
+   count = 1;
+   nelems -= 1;
+   for_each_sg(sg_next(sglist), s, nelems, i) {
+   dma_addr_t s_dma_addr = s->dma_address;
+   unsigned int s_dma_len = s->dma_length;
+
+   s->dma_address = 0;
+   s->dma_length = 0;
+   if (s_dma_addr == d->dma_address + d->dma_length &&
+   d->dma_length + s_dma_len <= max_seg) {
+   d->dma_length += s_dma_len;
+   } else {
+   d = sg_next(d);
+   d->dma_address = s_dma_addr;
+   d->dma_length = s_dma_len;
+   count++;
+   }
+   }
+
+   return count;

  out_unmap:
 pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n",
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH v2 1/3] drm/prime: Iterate SG DMA addresses separately

2018-04-17 Thread Robin Murphy

On 17/04/18 17:29, Christian König wrote:

Am 17.04.2018 um 17:58 schrieb Robin Murphy:

For dma_map_sg(), DMA API implementations are free to merge consecutive
segments into a single DMA mapping if conditions are suitable, thus the
resulting DMA addresses which drm_prime_sg_to_page_addr_arrays()
iterates may be packed into fewer entries than ttm->sg->nents implies.

The current implementation does not account for this, meaning that its
callers either have to reject the 0 < count < nents case or risk getting
bogus DMA addresses beyond the first segment. Fortunately this is quite
easy to handle without having to rejig structures to also store the
mapped count, since the total DMA length should still be equal to the
total buffer length. All we need is a second scatterlist cursor to
iterate through the DMA addresses independently of the page addresses.

Signed-off-by: Robin Murphy 


Reviewed-by: Christian König  for the whole 
series.


Thanks Christian.

FWIW, the following *completely untested* hack should in theory give the 
AMD IOMMU similar segment-merging behaviour to the arm64 IOMMU DMA ops, 
if that helps widen the scope for testing/investigation (I have neither 
an AMD/ATI graphics card nor a PCIe-capable arm64 box to hand just at 
the moment).


Robin.

->8-
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 2a99f0f14795..60b0e495b567 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2489,11 +2489,11 @@ static int map_sg(struct device *dev, struct 
scatterlist *sglist,

  int nelems, enum dma_data_direction direction,
  unsigned long attrs)
 {
-   int mapped_pages = 0, npages = 0, prot = 0, i;
+   int mapped_pages = 0, npages = 0, prot = 0, i, count;
struct protection_domain *domain;
struct dma_ops_domain *dma_dom;
-   struct scatterlist *s;
-   unsigned long address;
+   struct scatterlist *s, *d;
+   unsigned long address, max_seg;
u64 dma_mask;

domain = get_domain(dev);
@@ -2535,7 +2535,28 @@ static int map_sg(struct device *dev, struct 
scatterlist *sglist,

s->dma_length   = s->length;
}

-   return nelems;
+   d = sglist;
+   max_seg = dma_get_max_seg_size(dev);
+   count = 1;
+   nelems -= 1;
+   for_each_sg(sg_next(sglist), s, nelems, i) {
+   dma_addr_t s_dma_addr = s->dma_address;
+   unsigned int s_dma_len = s->dma_length;
+
+   s->dma_address = 0;
+   s->dma_length = 0;
+   if (s_dma_addr == d->dma_address + d->dma_length &&
+   d->dma_length + s_dma_len <= max_seg) {
+   d->dma_length += s_dma_len;
+   } else {
+   d = sg_next(d);
+   d->dma_address = s_dma_addr;
+   d->dma_length = s_dma_len;
+   count++;
+   }
+   }
+
+   return count;

 out_unmap:
pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n",
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH v2 1/3] drm/prime: Iterate SG DMA addresses separately

2018-04-17 Thread Christian König

Am 17.04.2018 um 17:58 schrieb Robin Murphy:

For dma_map_sg(), DMA API implementations are free to merge consecutive
segments into a single DMA mapping if conditions are suitable, thus the
resulting DMA addresses which drm_prime_sg_to_page_addr_arrays()
iterates may be packed into fewer entries than ttm->sg->nents implies.

The current implementation does not account for this, meaning that its
callers either have to reject the 0 < count < nents case or risk getting
bogus DMA addresses beyond the first segment. Fortunately this is quite
easy to handle without having to rejig structures to also store the
mapped count, since the total DMA length should still be equal to the
total buffer length. All we need is a second scatterlist cursor to
iterate through the DMA addresses independently of the page addresses.

Signed-off-by: Robin Murphy 


Reviewed-by: Christian König  for the whole 
series.



---

v2: Remember to iterate dma_len correctly as well.

  drivers/gpu/drm/drm_prime.c | 15 ---
  1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 7856a9b3f8a8..b8ca06ea7d80 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -933,16 +933,18 @@ int drm_prime_sg_to_page_addr_arrays(struct sg_table 
*sgt, struct page **pages,
 dma_addr_t *addrs, int max_entries)
  {
unsigned count;
-   struct scatterlist *sg;
+   struct scatterlist *sg, *dma_sg;
struct page *page;
-   u32 len, index;
+   u32 len, dma_len, index;
dma_addr_t addr;
  
  	index = 0;

+   dma_sg = sgt->sgl;
+   dma_len = sg_dma_len(dma_sg);
+   addr = sg_dma_address(dma_sg);
for_each_sg(sgt->sgl, sg, sgt->nents, count) {
len = sg->length;
page = sg_page(sg);
-   addr = sg_dma_address(sg);
  
  		while (len > 0) {

if (WARN_ON(index >= max_entries))
@@ -955,8 +957,15 @@ int drm_prime_sg_to_page_addr_arrays(struct sg_table *sgt, 
struct page **pages,
page++;
addr += PAGE_SIZE;
len -= PAGE_SIZE;
+   dma_len -= PAGE_SIZE;
index++;
}
+
+   if (dma_len == 0) {
+   dma_sg = sg_next(dma_sg);
+   dma_len = sg_dma_len(dma_sg);
+   addr = sg_dma_address(dma_sg);
+   }
}
return 0;
  }


___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2 1/3] drm/prime: Iterate SG DMA addresses separately

2018-04-17 Thread Robin Murphy
For dma_map_sg(), DMA API implementations are free to merge consecutive
segments into a single DMA mapping if conditions are suitable, thus the
resulting DMA addresses which drm_prime_sg_to_page_addr_arrays()
iterates may be packed into fewer entries than ttm->sg->nents implies.

The current implementation does not account for this, meaning that its
callers either have to reject the 0 < count < nents case or risk getting
bogus DMA addresses beyond the first segment. Fortunately this is quite
easy to handle without having to rejig structures to also store the
mapped count, since the total DMA length should still be equal to the
total buffer length. All we need is a second scatterlist cursor to
iterate through the DMA addresses independently of the page addresses.

Signed-off-by: Robin Murphy 
---

v2: Remember to iterate dma_len correctly as well.

 drivers/gpu/drm/drm_prime.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 7856a9b3f8a8..b8ca06ea7d80 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -933,16 +933,18 @@ int drm_prime_sg_to_page_addr_arrays(struct sg_table 
*sgt, struct page **pages,
 dma_addr_t *addrs, int max_entries)
 {
unsigned count;
-   struct scatterlist *sg;
+   struct scatterlist *sg, *dma_sg;
struct page *page;
-   u32 len, index;
+   u32 len, dma_len, index;
dma_addr_t addr;
 
index = 0;
+   dma_sg = sgt->sgl;
+   dma_len = sg_dma_len(dma_sg);
+   addr = sg_dma_address(dma_sg);
for_each_sg(sgt->sgl, sg, sgt->nents, count) {
len = sg->length;
page = sg_page(sg);
-   addr = sg_dma_address(sg);
 
while (len > 0) {
if (WARN_ON(index >= max_entries))
@@ -955,8 +957,15 @@ int drm_prime_sg_to_page_addr_arrays(struct sg_table *sgt, 
struct page **pages,
page++;
addr += PAGE_SIZE;
len -= PAGE_SIZE;
+   dma_len -= PAGE_SIZE;
index++;
}
+
+   if (dma_len == 0) {
+   dma_sg = sg_next(dma_sg);
+   dma_len = sg_dma_len(dma_sg);
+   addr = sg_dma_address(dma_sg);
+   }
}
return 0;
 }
-- 
2.16.1.dirty

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel