Re: [Linaro-mm-sig] [RFCv1 2/4] v4l:vb2: add support for shared buffer (dma_buf)

2012-01-20 Thread Laurent Pinchart
Hi Sumit,

On Thursday 05 January 2012 11:41:56 Sumit Semwal wrote:
 This patch adds support for DMABUF memory type in videobuf2. It calls
 relevant APIs of dma_buf for v4l reqbuf / qbuf / dqbuf operations.
 
 For this version, the support is for videobuf2 as a user of the shared
 buffer; so the allocation of the buffer is done outside of V4L2. [A sample
 allocator of dma-buf shared buffer is given at [1]]
 
 [1]: Rob Clark's DRM:
https://github.com/robclark/kernel-omap4/commits/drmplane-dmabuf
 
 Signed-off-by: Tomasz Stanislawski t.stanisl...@samsung.com
[original work in the PoC for buffer sharing]
 Signed-off-by: Sumit Semwal sumit.sem...@ti.com
 Signed-off-by: Sumit Semwal sumit.sem...@linaro.org
 ---
  drivers/media/video/videobuf2-core.c |  186 ++-
  include/media/videobuf2-core.h   |   30 ++
  2 files changed, 215 insertions(+), 1 deletions(-)
 
 diff --git a/drivers/media/video/videobuf2-core.c
 b/drivers/media/video/videobuf2-core.c index 95a3f5e..6cd2f97 100644
 --- a/drivers/media/video/videobuf2-core.c
 +++ b/drivers/media/video/videobuf2-core.c
 @@ -107,6 +107,27 @@ static void __vb2_buf_userptr_put(struct vb2_buffer
 *vb) }
 
  /**
 + * __vb2_buf_dmabuf_put() - release memory associated with
 + * a DMABUF shared buffer
 + */
 +static void __vb2_buf_dmabuf_put(struct vb2_buffer *vb)
 +{
 + struct vb2_queue *q = vb-vb2_queue;
 + unsigned int plane;
 +
 + for (plane = 0; plane  vb-num_planes; ++plane) {
 + void *mem_priv = vb-planes[plane].mem_priv;
 +
 + if (mem_priv) {
 + call_memop(q, plane, detach_dmabuf, mem_priv);
 + dma_buf_put(vb-planes[plane].dbuf);
 + vb-planes[plane].dbuf = NULL;
 + vb-planes[plane].mem_priv = NULL;
 + }
 + }
 +}
 +
 +/**
   * __setup_offsets() - setup unique offsets (cookies) for every plane in
   * every buffer on the queue
   */
 @@ -228,6 +249,8 @@ static void __vb2_free_mem(struct vb2_queue *q,
 unsigned int buffers) /* Free MMAP buffers or release USERPTR buffers */
   if (q-memory == V4L2_MEMORY_MMAP)
   __vb2_buf_mem_free(vb);
 + if (q-memory == V4L2_MEMORY_DMABUF)
 + __vb2_buf_dmabuf_put(vb);
   else
   __vb2_buf_userptr_put(vb);
   }
 @@ -350,6 +373,13 @@ static int __fill_v4l2_buffer(struct vb2_buffer *vb,
 struct v4l2_buffer *b) */
   memcpy(b-m.planes, vb-v4l2_planes,
   b-length * sizeof(struct v4l2_plane));
 +
 + if (q-memory == V4L2_MEMORY_DMABUF) {
 + unsigned int plane;
 + for (plane = 0; plane  vb-num_planes; ++plane) {
 + b-m.planes[plane].m.fd = 0;
 + }
 + }
   } else {
   /*
* We use length and offset in v4l2_planes array even for
 @@ -361,6 +391,8 @@ static int __fill_v4l2_buffer(struct vb2_buffer *vb,
 struct v4l2_buffer *b) b-m.offset = vb-v4l2_planes[0].m.mem_offset;
   else if (q-memory == V4L2_MEMORY_USERPTR)
   b-m.userptr = vb-v4l2_planes[0].m.userptr;
 + else if (q-memory == V4L2_MEMORY_DMABUF)
 + b-m.fd = 0;
   }
 
   /*
 @@ -452,6 +484,21 @@ static int __verify_mmap_ops(struct vb2_queue *q)
  }
 
  /**
 + * __verify_dmabuf_ops() - verify that all memory operations required for
 + * DMABUF queue type have been provided
 + */
 +static int __verify_dmabuf_ops(struct vb2_queue *q)
 +{
 + if (!(q-io_modes  VB2_DMABUF) || !q-mem_ops-attach_dmabuf
 + || !q-mem_ops-detach_dmabuf
 + || !q-mem_ops-map_dmabuf
 + || !q-mem_ops-unmap_dmabuf)
 + return -EINVAL;
 +
 + return 0;
 +}
 +
 +/**
   * vb2_reqbufs() - Initiate streaming
   * @q:   videobuf2 queue
   * @req: struct passed from userspace to vidioc_reqbufs handler in driver
 @@ -485,6 +532,7 @@ int vb2_reqbufs(struct vb2_queue *q, struct
 v4l2_requestbuffers *req) }
 
   if (req-memory != V4L2_MEMORY_MMAP
 +  req-memory != V4L2_MEMORY_DMABUF
req-memory != V4L2_MEMORY_USERPTR) {
   dprintk(1, reqbufs: unsupported memory type\n);
   return -EINVAL;
 @@ -514,6 +562,11 @@ int vb2_reqbufs(struct vb2_queue *q, struct
 v4l2_requestbuffers *req) return -EINVAL;
   }
 
 + if (req-memory == V4L2_MEMORY_DMABUF  __verify_dmabuf_ops(q)) {
 + dprintk(1, reqbufs: DMABUF for current setup unsupported\n);
 + return -EINVAL;
 + }
 +
   if (req-count == 0 || q-num_buffers != 0 || q-memory != req-memory) 
 {
   /*
* We already have buffers allocated, so first check if they
 @@ -621,7 +674,8 @@ int vb2_create_bufs(struct vb2_queue *q, struct
 v4l2_create_buffers 

Re: [Linaro-mm-sig] [RFCv1 2/4] v4l:vb2: add support for shared buffer (dma_buf)

2012-01-20 Thread Laurent Pinchart
Hi Sumit,

On Monday 16 January 2012 06:33:31 Semwal, Sumit wrote:
 On Sun, Jan 15, 2012 at 2:08 AM, Sakari Ailus sakari.ai...@iki.fi wrote:
  Hi Sumit,
  
  Thanks for the patch!
 
 Hi Sakari,
 
 Thanks for reviewing this :)
 
  snip
  Shouldn't the buffer mapping only be done at the first call to
  __qbuf_dmabuf()? On latter calls, the cache would need to be handled
  according to presence of V4L2_BUF_FLAG_NO_CACHE_CLEAN /
  V4L2_BUF_FLAG_NO_CACHE_INVALIDATE in v4l2_buffer.
 
 Well, the 'map / unmap' implementation is by design exporter-specific; so
 the exporter of the buffer may choose to, depending on the use case,
 'map-and-keep' on first call to map_dmabuf, and do actual unmap only at
 'release' time. This will mean that the {map,unmap}_dmabuf calls will be
 used mostly for 'access-bracketing' between multiple users, and not for
 actual map/unmap each time.
 Again, the framework is flexible enough to allow exporters to actually
 map/unmap as required (think cases where backing-storage migration might be
 needed while buffers are in use - in that case, when all current users have
 called unmap_XXX() on a buffer, it can be migrated, and the next map_XXX()
 calls could give different mappings back to the users).
 The kernel 'users' of dma-buf [in case of this RFC, v4l2] should not
 ideally need to worry about the actual mapping/unmapping that is done; the
 buffer exporter in a particular use-case should be able to handle it.

I'm afraid it's more complex than that. Your patch calls q-mem_ops-
map_dmabuf() at every VIDIOC_QBUF call. The function will call 
dma_buf_map_attachment(), which could cache the mapping somehow (even though 
that triggers an alarm somewhere in my brain, deciding in the exporter how to 
do so will likely cause issues - I'll try to sort my thoughts out on this), 
but it will also be responsible for mapping the sg list to the V4L2 device 
IOMMU (not for dma-contig obviously, but this code is in videobuf2-core.c). 
This is an expensive operation that we don't want to perform at every 
QBUF/DQBUF.

V4L2 uses streaming DMA mappings, partly for performance reasons. That's 
something dma-buf will likely need to support. Or you could argue that 
streaming DMA mappings are broken by design on some platform anyway, but then 
I'll expect a proposal for an efficient replacement :-)

 snip
 
  Same here, except reverse: this only should be done when the buffer is
  destroyed --- either when the user explicitly calls reqbufs(0) or when
  the file handle owning this buffer is being closed.
  
  Mapping buffers at every prepare_buf and unmapping them in dqbuf is
  prohibitively expensive. Same goes for many other APIs than V4L2, I
  think.
  
  I wonder if the means to do this exists already.
  
  I have to admit I haven't followed the dma_buf discussion closely so I
  might be missing something relevant here.
 
 Hope the above explanation helps. Please do not hesitate to contact if you
 need more details.

-- 
Regards,

Laurent Pinchart
--
To unsubscribe from this list: send the line unsubscribe linux-media in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Linaro-mm-sig] [RFCv1 2/4] v4l:vb2: add support for shared buffer (dma_buf)

2012-01-15 Thread Semwal, Sumit
On Mon, Jan 16, 2012 at 11:03 AM, Semwal, Sumit sumit.sem...@ti.com wrote:

 On Sun, Jan 15, 2012 at 2:08 AM, Sakari Ailus sakari.ai...@iki.fi wrote:

 Hi Sumit,

 Thanks for the patch!

Hi Sakari,

Thanks for reviewing this :)


 snip
 Shouldn't the buffer mapping only be done at the first call to
 __qbuf_dmabuf()? On latter calls, the cache would need to be handled
 according to presence of V4L2_BUF_FLAG_NO_CACHE_CLEAN /
 V4L2_BUF_FLAG_NO_CACHE_INVALIDATE in v4l2_buffer.

Well, the 'map / unmap' implementation is by design exporter-specific;
so the exporter of the buffer may choose to, depending on the use
case, 'map-and-keep' on first call to map_dmabuf, and do actual unmap
only at 'release' time. This will mean that the {map,unmap}_dmabuf
calls will be used mostly for 'access-bracketing' between multiple
users, and not for actual map/unmap each time.
Again, the framework is flexible enough to allow exporters to actually
map/unmap as required (think cases where backing-storage migration
might be needed while buffers are in use - in that case, when all
current users have called unmap_XXX() on a buffer, it can be migrated,
and the next map_XXX() calls could give different mappings back to the
users).
 The kernel 'users' of dma-buf [in case of this RFC, v4l2] should not ideally 
 need to worry about the actual mapping/unmapping that is done; the buffer 
 exporter in a particular use-case should be able to handle it.
 snip

 Same here, except reverse: this only should be done when the buffer is
 destroyed --- either when the user explicitly calls reqbufs(0) or when
 the file handle owning this buffer is being closed.

 Mapping buffers at every prepare_buf and unmapping them in dqbuf is
 prohibitively expensive. Same goes for many other APIs than V4L2, I think.

 I wonder if the means to do this exists already.

 I have to admit I haven't followed the dma_buf discussion closely so I
 might be missing something relevant here.

Hope the above explanation helps. Please do not hesitate to contact if
you need more details.


 Kind regards,

 --
 Sakari Ailus
 sakari.ai...@iki.fi

Best regards,
~Sumit.
--
To unsubscribe from this list: send the line unsubscribe linux-media in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Linaro-mm-sig] [RFCv1 2/4] v4l:vb2: add support for shared buffer (dma_buf)

2012-01-14 Thread Sakari Ailus
Hi Sumit,

Thanks for the patch!

Sumit Semwal wrote:
...
 @@ -962,6 +1030,109 @@ static int __qbuf_mmap(struct vb2_buffer *vb, const 
 struct v4l2_buffer *b)
  }
  
  /**
 + * __qbuf_dmabuf() - handle qbuf of a DMABUF buffer
 + */
 +static int __qbuf_dmabuf(struct vb2_buffer *vb, const struct v4l2_buffer *b)
 +{
 + struct v4l2_plane planes[VIDEO_MAX_PLANES];
 + struct vb2_queue *q = vb-vb2_queue;
 + void *mem_priv;
 + unsigned int plane;
 + int ret;
 + int write = !V4L2_TYPE_IS_OUTPUT(q-type);
 +
 + /* Verify and copy relevant information provided by the userspace */
 + ret = __fill_vb2_buffer(vb, b, planes);
 + if (ret)
 + return ret;
 +
 + for (plane = 0; plane  vb-num_planes; ++plane) {
 + struct dma_buf *dbuf = dma_buf_get(planes[plane].m.fd);
 +
 + if (IS_ERR_OR_NULL(dbuf)) {
 + dprintk(1, qbuf: invalid dmabuf fd for 
 + plane %d\n, plane);
 + ret = PTR_ERR(dbuf);
 + goto err;
 + }
 +
 + /* this doesn't get filled in until __fill_vb2_buffer(),
 +  * since it isn't known until after dma_buf_get()..
 +  */
 + planes[plane].length = dbuf-size;
 +
 + /* Skip the plane if already verified */
 + if (dbuf == vb-planes[plane].dbuf) {
 + dma_buf_put(dbuf);
 + continue;
 + }
 +
 + dprintk(3, qbuf: buffer description for plane %d changed, 
 + reattaching dma buf\n, plane);
 +
 + /* Release previously acquired memory if present */
 + if (vb-planes[plane].mem_priv) {
 + call_memop(q, plane, detach_dmabuf,
 + vb-planes[plane].mem_priv);
 + dma_buf_put(vb-planes[plane].dbuf);
 + }
 +
 + vb-planes[plane].mem_priv = NULL;
 +
 + /* Acquire each plane's memory */
 + mem_priv = q-mem_ops-attach_dmabuf(
 + q-alloc_ctx[plane], dbuf);
 + if (IS_ERR(mem_priv)) {
 + dprintk(1, qbuf: failed acquiring dmabuf 
 + memory for plane %d\n, plane);
 + ret = PTR_ERR(mem_priv);
 + goto err;
 + }
 +
 + vb-planes[plane].dbuf = dbuf;
 + vb-planes[plane].mem_priv = mem_priv;
 + }
 +
 + /* TODO: This pins the buffer(s) with  dma_buf_map_attachment()).. but
 +  * really we want to do this just before the DMA, not while queueing
 +  * the buffer(s)..
 +  */
 + for (plane = 0; plane  vb-num_planes; ++plane) {
 + ret = q-mem_ops-map_dmabuf(
 + vb-planes[plane].mem_priv, write);
 + if (ret) {
 + dprintk(1, qbuf: failed mapping dmabuf 
 + memory for plane %d\n, plane);
 + goto err;
 + }
 + }

Shouldn't the buffer mapping only be done at the first call to
__qbuf_dmabuf()? On latter calls, the cache would need to be handled
according to presence of V4L2_BUF_FLAG_NO_CACHE_CLEAN /
V4L2_BUF_FLAG_NO_CACHE_INVALIDATE in v4l2_buffer.

 + /*
 +  * Call driver-specific initialization on the newly acquired buffer,
 +  * if provided.
 +  */
 + ret = call_qop(q, buf_init, vb);
 + if (ret) {
 + dprintk(1, qbuf: buffer initialization failed\n);
 + goto err;
 + }
 +
 + /*
 +  * Now that everything is in order, copy relevant information
 +  * provided by userspace.
 +  */
 + for (plane = 0; plane  vb-num_planes; ++plane)
 + vb-v4l2_planes[plane] = planes[plane];
 +
 + return 0;
 +err:
 + /* In case of errors, release planes that were already acquired */
 + __vb2_buf_dmabuf_put(vb);
 +
 + return ret;
 +}
 +
 +/**
   * __enqueue_in_driver() - enqueue a vb2_buffer in driver for processing
   */
  static void __enqueue_in_driver(struct vb2_buffer *vb)
 @@ -985,6 +1156,9 @@ static int __buf_prepare(struct vb2_buffer *vb, const 
 struct v4l2_buffer *b)
   case V4L2_MEMORY_USERPTR:
   ret = __qbuf_userptr(vb, b);
   break;
 + case V4L2_MEMORY_DMABUF:
 + ret = __qbuf_dmabuf(vb, b);
 + break;
   default:
   WARN(1, Invalid queue type\n);
   ret = -EINVAL;
 @@ -1284,6 +1458,7 @@ int vb2_dqbuf(struct vb2_queue *q, struct v4l2_buffer 
 *b, bool nonblocking)
  {
   struct vb2_buffer *vb = NULL;
   int ret;
 + unsigned int plane;
  
   if (q-fileio) {
   dprintk(1, dqbuf: file io in progress\n);
 @@ -1307,6 +1482,15 @@ int vb2_dqbuf(struct vb2_queue *q, struct v4l2_buffer 
 *b, bool nonblocking)
   return ret;
   }
  
 + /* TODO: this unpins the