Hi Tomasz,

Thanks for the patch.

On Thursday 22 March 2012 11:02:23 Laurent Pinchart wrote:
> From: Tomasz Stanislawski <t.stanislaws at samsung.com>
> 
> This patch combines updates and fixes to dma-contig allocator.
> Moreover the allocator code was refactored.
> The most important changes are:
> - functions were reordered
> - move compression of scatterlist to separete function
> - add support for multichunk but contiguous scatterlists
> - simplified implementation of vb2-dma-contig context structure
> - let mmap method to use dma_mmap_writecombine
> - add support for scatterlist in userptr mode
> 
> Signed-off-by: Marek Szyprowski <m.szyprowski at samsung.com>
>       [mmap method]
> Signed-off-by: Andrzej Pietrasiewicz <andrzej.p at samsung.com>
>       [scatterlist in userptr mode]
> Signed-off-by: Kamil Debski <k.debski at samsung.com>
>       [bugfixing]
> Signed-off-by: Tomasz Stanislawski <t.stanislaws at samsung.com>
>       [core refactoring, helper functions]
> Signed-off-by: Kyungmin Park <kyungmin.park at samsung.com>
> ---
>  drivers/media/video/videobuf2-dma-contig.c |  400
> +++++++++++++++++++++++++--- 1 files changed, 365 insertions(+), 35
> deletions(-)
> 
> diff --git a/drivers/media/video/videobuf2-dma-contig.c
> b/drivers/media/video/videobuf2-dma-contig.c index c898e6f..9965465 100644
> --- a/drivers/media/video/videobuf2-dma-contig.c
> +++ b/drivers/media/video/videobuf2-dma-contig.c
> @@ -10,9 +10,12 @@
>   * the Free Software Foundation.
>   */
> 
> +#include <linux/dma-buf.h>
> +#include <linux/dma-mapping.h>
>  #include <linux/module.h>
> +#include <linux/scatterlist.h>
> +#include <linux/sched.h>
>  #include <linux/slab.h>
> -#include <linux/dma-mapping.h>
> 
>  #include <media/videobuf2-core.h>
>  #include <media/videobuf2-memops.h>
> @@ -22,16 +25,115 @@ struct vb2_dc_buf {
>       void                            *vaddr;
>       unsigned long                   size;
>       dma_addr_t                      dma_addr;
> +     struct sg_table                 *dma_sgt;
> +     enum dma_data_direction         dma_dir;
> 
>       /* MMAP related */
>       struct vb2_vmarea_handler       handler;
>       atomic_t                        refcount;
> +     struct sg_table                 *sgt_base;
> 
>       /* USERPTR related */
>       struct vm_area_struct           *vma;
>  };
> 
>  /*********************************************/
> +/*        scatterlist table functions        */
> +/*********************************************/
> +
> +static struct sg_table *vb2_dc_pages_to_sgt(struct page **pages,
> +     unsigned long n_pages, size_t offset, size_t offset2)
> +{
> +     struct sg_table *sgt;
> +     int i, j; /* loop counters */
> +     int cur_page, chunks;
> +     int ret;
> +     struct scatterlist *s;
> +
> +     sgt = kzalloc(sizeof *sgt, GFP_KERNEL);
> +     if (!sgt)
> +             return ERR_PTR(-ENOMEM);
> +
> +     /* compute number of chunks */
> +     chunks = 1;
> +     for (i = 1; i < n_pages; ++i)
> +             if (pages[i] != pages[i - 1] + 1)
> +                     ++chunks;
> +
> +     ret = sg_alloc_table(sgt, chunks, GFP_KERNEL);
> +     if (ret) {
> +             kfree(sgt);
> +             return ERR_PTR(-ENOMEM);
> +     }
> +
> +     /* merging chunks and putting them into the scatterlist */
> +     cur_page = 0;
> +     for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
> +             size_t size = PAGE_SIZE;
> +
> +             for (j = cur_page + 1; j < n_pages; ++j) {
> +                     if (pages[j] != pages[j - 1] + 1)
> +                             break;
> +                     size += PAGE_SIZE;
> +             }
> +
> +             /* cut offset if chunk starts at the first page */
> +             if (cur_page == 0)
> +                     size -= offset;
> +             /* cut offset2 if chunk ends at the last page */
> +             if (j == n_pages)
> +                     size -= offset2;
> +
> +             sg_set_page(s, pages[cur_page], size, offset);
> +             offset = 0;
> +             cur_page = j;
> +     }
> +
> +     return sgt;
> +}
> +
> +static void vb2_dc_release_sgtable(struct sg_table *sgt)
> +{
> +     sg_free_table(sgt);
> +     kfree(sgt);
> +}
> +
> +static void vb2_dc_put_sgtable(struct sg_table *sgt, int dirty)
> +{
> +     struct scatterlist *s;
> +     int i, j;
> +
> +     for_each_sg(sgt->sgl, s, sgt->nents, i) {
> +             struct page *page = sg_page(s);
> +             int n_pages = PAGE_ALIGN(s->offset + s->length) >> PAGE_SHIFT;
> +
> +             for (j = 0; j < n_pages; ++j, ++page) {
> +                     if (dirty)
> +                             set_page_dirty_lock(page);
> +                     put_page(page);
> +             }
> +     }
> +
> +     vb2_dc_release_sgtable(sgt);
> +}
> +
> +static unsigned long vb2_dc_get_contiguous_size(struct sg_table *sgt)
> +{
> +     struct scatterlist *s;
> +     dma_addr_t expected = sg_dma_address(sgt->sgl);
> +     int i;
> +     unsigned long size = 0;
> +
> +     for_each_sg(sgt->sgl, s, sgt->nents, i) {
> +             if (sg_dma_address(s) != expected)
> +                     break;
> +             expected = sg_dma_address(s) + sg_dma_len(s);
> +             size += sg_dma_len(s);
> +     }
> +     return size;
> +}
> +
> +/*********************************************/
>  /*         callbacks for all buffers         */
>  /*********************************************/
> 
> @@ -45,8 +147,6 @@ static void *vb2_dc_cookie(void *buf_priv)
>  static void *vb2_dc_vaddr(void *buf_priv)
>  {
>       struct vb2_dc_buf *buf = buf_priv;
> -     if (!buf)
> -             return 0;
> 
>       return buf->vaddr;
>  }
> @@ -58,6 +158,28 @@ static unsigned int vb2_dc_num_users(void *buf_priv)
>       return atomic_read(&buf->refcount);
>  }
> 
> +static void vb2_dc_prepare(void *buf_priv)
> +{
> +     struct vb2_dc_buf *buf = buf_priv;
> +     struct sg_table *sgt = buf->dma_sgt;
> +
> +     if (!sgt)
> +             return;
> +
> +     dma_sync_sg_for_device(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir);
> +}
> +
> +static void vb2_dc_finish(void *buf_priv)
> +{
> +     struct vb2_dc_buf *buf = buf_priv;
> +     struct sg_table *sgt = buf->dma_sgt;
> +
> +     if (!sgt)
> +             return;
> +
> +     dma_sync_sg_for_cpu(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir);
> +}
> +
>  /*********************************************/
>  /*        callbacks for MMAP buffers         */
>  /*********************************************/
> @@ -66,31 +188,70 @@ static void vb2_dc_put(void *buf_priv)
>  {
>       struct vb2_dc_buf *buf = buf_priv;
> 
> -     if (atomic_dec_and_test(&buf->refcount)) {
> -             dma_free_coherent(buf->dev, buf->size, buf->vaddr,
> -                               buf->dma_addr);
> -             kfree(buf);
> -     }
> +     if (!atomic_dec_and_test(&buf->refcount))
> +             return;
> +
> +     vb2_dc_release_sgtable(buf->sgt_base);
> +     dma_free_coherent(buf->dev, buf->size, buf->vaddr,
> +             buf->dma_addr);
> +     kfree(buf);
>  }
> 
>  static void *vb2_dc_alloc(void *alloc_ctx, unsigned long size)
>  {
>       struct device *dev = alloc_ctx;
>       struct vb2_dc_buf *buf;
> +     int ret;
> +     int n_pages;
> +     struct page **pages = NULL;
> 
>       buf = kzalloc(sizeof *buf, GFP_KERNEL);
>       if (!buf)
>               return ERR_PTR(-ENOMEM);
> 
> -     buf->vaddr = dma_alloc_coherent(dev, size, &buf->dma_addr, GFP_KERNEL);
> +     buf->dev = dev;
> +     buf->size = size;
> +     buf->vaddr = dma_alloc_coherent(buf->dev, buf->size, &buf->dma_addr,
> +             GFP_KERNEL);
> +
> +     ret = -ENOMEM;
>       if (!buf->vaddr) {
> -             dev_err(dev, "dma_alloc_coherent of size %ld failed\n", size);
> -             kfree(buf);
> -             return ERR_PTR(-ENOMEM);
> +             dev_err(dev, "dma_alloc_coherent of size %ld failed\n",
> +                     size);
> +             goto fail_buf;
>       }
> 
> -     buf->dev = dev;
> -     buf->size = size;
> +     WARN_ON((unsigned long)buf->vaddr & ~PAGE_MASK);
> +     WARN_ON(buf->dma_addr & ~PAGE_MASK);
> +
> +     n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +
> +     pages = kmalloc(n_pages * sizeof pages[0], GFP_KERNEL);
> +     if (!pages) {
> +             printk(KERN_ERR "failed to alloc page table\n");
> +             goto fail_dma;
> +     }
> +
> +     ret = dma_get_pages(dev, buf->vaddr, buf->dma_addr, pages, n_pages);

As the only purpose of this is to retrieve a list of pages that will be used 
to create a single-entry sgt, wouldn't it be possible to shortcut the code and 
get the physical address of the buffer directly ?

> +     if (ret < 0) {
> +             printk(KERN_ERR "failed to get buffer pages from DMA API\n");
> +             goto fail_pages;
> +     }
> +     if (ret != n_pages) {
> +             ret = -EFAULT;
> +             printk(KERN_ERR "failed to get all pages from DMA API\n");
> +             goto fail_pages;
> +     }
> +
> +     buf->sgt_base = vb2_dc_pages_to_sgt(pages, n_pages, 0, 0);
> +     if (IS_ERR(buf->sgt_base)) {
> +             ret = PTR_ERR(buf->sgt_base);
> +             printk(KERN_ERR "failed to prepare sg table\n");
> +             goto fail_pages;
> +     }

buf->sgt_base isn't used in this patch. I would move the buf->sgt_base 
creation code to the patch that uses it then, or to its own patch just before 
the patch that uses it.

> +
> +     /* pages are no longer needed */
> +     kfree(pages);
> 
>       buf->handler.refcount = &buf->refcount;
>       buf->handler.put = vb2_dc_put;
> @@ -99,59 +260,226 @@ static void *vb2_dc_alloc(void *alloc_ctx, unsigned
> long size) atomic_inc(&buf->refcount);
> 
>       return buf;
> +
> +fail_pages:
> +     kfree(pages);
> +
> +fail_dma:
> +     dma_free_coherent(buf->dev, buf->size, buf->vaddr, buf->dma_addr);
> +
> +fail_buf:
> +     kfree(buf);
> +
> +     return ERR_PTR(ret);
>  }
> 
>  static int vb2_dc_mmap(void *buf_priv, struct vm_area_struct *vma)
>  {
>       struct vb2_dc_buf *buf = buf_priv;
> +     int ret;
> +
> +     /*
> +      * dma_mmap_* uses vm_pgoff as in-buffer offset, but we want to
> +      * map whole buffer
> +      */
> +     vma->vm_pgoff = 0;
> +
> +     ret = dma_mmap_writecombine(buf->dev, vma, buf->vaddr,
> +             buf->dma_addr, buf->size);
> 
> -     if (!buf) {
> -             printk(KERN_ERR "No buffer to map\n");
> -             return -EINVAL;
> +     if (ret) {
> +             printk(KERN_ERR "Remapping memory failed, error: %d\n", ret);
> +             return ret;
>       }
> 
> -     return vb2_mmap_pfn_range(vma, buf->dma_addr, buf->size,
> -                               &vb2_common_vm_ops, &buf->handler);
> +     vma->vm_flags           |= VM_DONTEXPAND | VM_RESERVED;
> +     vma->vm_private_data    = &buf->handler;
> +     vma->vm_ops             = &vb2_common_vm_ops;
> +
> +     vma->vm_ops->open(vma);
> +
> +     printk(KERN_DEBUG "%s: mapped dma addr 0x%08lx at 0x%08lx, size %ld\n",
> +             __func__, (unsigned long)buf->dma_addr, vma->vm_start,
> +             buf->size);
> +
> +     return 0;
>  }
> 
>  /*********************************************/
>  /*       callbacks for USERPTR buffers       */
>  /*********************************************/
> 
> +static inline int vma_is_io(struct vm_area_struct *vma)
> +{
> +     return !!(vma->vm_flags & (VM_IO | VM_PFNMAP));

Isn't VM_PFNMAP enough ? Wouldn't it be possible (at least in theory) to get a 
discontinuous physical range with VM_IO ?

> +}
> +
> +static int vb2_dc_get_pages(unsigned long start, struct page **pages,
> +     int n_pages, struct vm_area_struct **copy_vma, int write)
> +{
> +     struct vm_area_struct *vma;
> +     int n = 0; /* number of get pages */
> +     int ret = -EFAULT;
> +
> +     /* entering critical section for mm access */
> +     down_read(&current->mm->mmap_sem);

This will generate AB-BA deadlock warnings if lockdep is enabled. This 
function is called with the queue lock held, and the mmap() handler which 
takes the queue lock is called with current->mm->mmap_sem held.

This is a known issue with videobuf2, not specific to this patch. The warning 
is usually a false positive (which we still need to fix, as it worries users), 
but can become a real issue if an MMAP queue and a USERPTR queue are created 
by a driver with the same queue lock.

> +     vma = find_vma(current->mm, start);
> +     if (!vma) {
> +             printk(KERN_ERR "no vma for address %lu\n", start);
> +             goto cleanup;
> +     }
> +
> +     if (vma_is_io(vma)) {
> +             unsigned long pfn;
> +
> +             if (vma->vm_end - start < n_pages * PAGE_SIZE) {
> +                     printk(KERN_ERR "vma is too small\n");
> +                     goto cleanup;
> +             }
> +
> +             for (n = 0; n < n_pages; ++n, start += PAGE_SIZE) {
> +                     ret = follow_pfn(vma, start, &pfn);
> +                     if (ret) {
> +                             printk(KERN_ERR "no page for address %lu\n",
> +                                     start);
> +                             goto cleanup;
> +                     }
> +                     pages[n] = pfn_to_page(pfn);
> +                     get_page(pages[n]);

This worries me. When the VM_PFNMAP flag is set, the memory pages are not 
backed by a struct page. Creating a struct page pointer out of it can be an 
acceptable hack (for instance to store a page in an scatterlist with 
sg_set_page() and then retrieve its physical address with sg_phys()), but you 
should not expect the struct page to be valid for anything else. Calling 
get_page() on it will likely crash.

> +             }
> +     } else {
> +             n = get_user_pages(current, current->mm, start & PAGE_MASK,
> +                     n_pages, write, 1, pages, NULL);
> +             if (n != n_pages) {
> +                     printk(KERN_ERR "got only %d of %d user pages\n",
> +                             n, n_pages);
> +                     goto cleanup;
> +             }
> +     }
> +
> +     *copy_vma = vb2_get_vma(vma);
> +     if (!*copy_vma) {
> +             printk(KERN_ERR "failed to copy vma\n");
> +             ret = -ENOMEM;
> +             goto cleanup;
> +     }

Do we really need to make a copy of the VMA ? The only reason why we store a 
pointer to it is to check the flags in vb2_dc_put_userptr(). We could store 
the flags instead and avoid vb2_get_dma()/vb2_put_dma() calls altogether.

> +
> +     /* leaving critical section for mm access */
> +     up_read(&current->mm->mmap_sem);
> +
> +     return 0;
> +
> +cleanup:
> +     up_read(&current->mm->mmap_sem);
> +
> +     /* putting user pages if used, can be done wothout the lock */
> +     while (n)
> +             put_page(pages[--n]);
> +
> +     return ret;
> +}
> +
>  static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr,
> -                                     unsigned long size, int write)
> +     unsigned long size, int write)
>  {
>       struct vb2_dc_buf *buf;
> -     struct vm_area_struct *vma;
> -     dma_addr_t dma_addr = 0;
> -     int ret;
> +     unsigned long start, end, offset, offset2;
> +     struct page **pages;
> +     int n_pages;
> +     int ret = 0;
> +     struct sg_table *sgt;
> +     unsigned long contig_size;
> 
>       buf = kzalloc(sizeof *buf, GFP_KERNEL);
>       if (!buf)
>               return ERR_PTR(-ENOMEM);
> 
> -     ret = vb2_get_contig_userptr(vaddr, size, &vma, &dma_addr);
> +     buf->dev = alloc_ctx;
> +     buf->dma_dir = write ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
> +
> +     start = (unsigned long)vaddr & PAGE_MASK;
> +     offset = (unsigned long)vaddr & ~PAGE_MASK;
> +     end = PAGE_ALIGN((unsigned long)vaddr + size);
> +     offset2 = end - (unsigned long)vaddr - size;
> +     n_pages = (end - start) >> PAGE_SHIFT;
> +
> +     pages = kmalloc(n_pages * sizeof pages[0], GFP_KERNEL);
> +     if (!pages) {
> +             ret = -ENOMEM;
> +             printk(KERN_ERR "failed to allocate pages table\n");
> +             goto fail_buf;
> +     }
> +
> +     /* extract page list from userspace mapping */
> +     ret = vb2_dc_get_pages(start, pages, n_pages, &buf->vma, write);
>       if (ret) {
> -             printk(KERN_ERR "Failed acquiring VMA for vaddr 0x%08lx\n",
> -                             vaddr);
> -             kfree(buf);
> -             return ERR_PTR(ret);
> +             printk(KERN_ERR "failed to get user pages\n");
> +             goto fail_pages;
> +     }
> +
> +     sgt = vb2_dc_pages_to_sgt(pages, n_pages, offset, offset2);
> +     if (!sgt) {
> +             printk(KERN_ERR "failed to create scatterlist table\n");
> +             ret = -ENOMEM;
> +             goto fail_get_pages;
>       }

This looks overly complex to me. You create a multi-chunk sgt out of the user 
pointer address and map it completely, and then check if it starts with a big 
enough contiguous chunk. Why don't you create an sgt with a single continuous 
chunk then ? In the VM_PFNMAP case you could check whether the area is 
contiguous when you follow the PFNs, stop at the first discontinuity, and 
create an sgt with a single element right there. You would then need to call 
vb2_dc_pages_to_sgt() in the normal case only, and stop at the first 
discontinuity as well.

> 
> +     /* pages are no longer needed */
> +     kfree(pages);
> +     pages = NULL;
> +
> +     sgt->nents = dma_map_sg(buf->dev, sgt->sgl, sgt->orig_nents,
> +             buf->dma_dir);
> +     if (sgt->nents <= 0) {
> +             printk(KERN_ERR "failed to map scatterlist\n");
> +             ret = -EIO;
> +             goto fail_sgt;
> +     }
> +
> +     contig_size = vb2_dc_get_contiguous_size(sgt);
> +     if (contig_size < size) {
> +             printk(KERN_ERR "contiguous mapping is too small %lu/%lu\n",
> +                     contig_size, size);
> +             ret = -EFAULT;
> +             goto fail_map_sg;
> +     }
> +
> +     buf->dma_addr = sg_dma_address(sgt->sgl);
>       buf->size = size;
> -     buf->dma_addr = dma_addr;
> -     buf->vma = vma;
> +     buf->dma_sgt = sgt;
> +
> +     atomic_inc(&buf->refcount);
> 
>       return buf;
> +
> +fail_map_sg:
> +     dma_unmap_sg(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir);

I think this will break in the VM_PFNMAP case on non-coherent architectures. 
arm_dma_unmap_page() will call __dma_page_dev_to_cpu() in that case, which can 
dereference struct page. As explain above, the struct page isn't valid with 
VM_PFNMAP. I haven't check the dma_map_sg() and dma_sync_sg_*() calls, but 
changes are they might break as well.

> +
> +fail_sgt:
> +     vb2_dc_put_sgtable(sgt, 0);
> +
> +fail_get_pages:
> +     while (pages && n_pages)
> +             put_page(pages[--n_pages]);
> +     vb2_put_vma(buf->vma);
> +
> +fail_pages:
> +     kfree(pages); /* kfree is NULL-proof */
> +
> +fail_buf:
> +     kfree(buf);
> +
> +     return ERR_PTR(ret);
>  }
> 
> -static void vb2_dc_put_userptr(void *mem_priv)
> +static void vb2_dc_put_userptr(void *buf_priv)
>  {
> -     struct vb2_dc_buf *buf = mem_priv;
> -
> -     if (!buf)
> -             return;
> +     struct vb2_dc_buf *buf = buf_priv;
> +     struct sg_table *sgt = buf->dma_sgt;
> 
> +     dma_unmap_sg(buf->dev, sgt->sgl, sgt->orig_nents, buf->dma_dir);
> +     vb2_dc_put_sgtable(sgt, !vma_is_io(buf->vma));
>       vb2_put_vma(buf->vma);
>       kfree(buf);
>  }
> @@ -168,6 +496,8 @@ const struct vb2_mem_ops vb2_dma_contig_memops = {
>       .mmap           = vb2_dc_mmap,
>       .get_userptr    = vb2_dc_get_userptr,
>       .put_userptr    = vb2_dc_put_userptr,
> +     .prepare        = vb2_dc_prepare,
> +     .finish         = vb2_dc_finish,
>       .num_users      = vb2_dc_num_users,
>  };
>  EXPORT_SYMBOL_GPL(vb2_dma_contig_memops);

-- 
Regards,

Laurent Pinchart

Reply via email to