Re: [PATCH v6 07/10] media: uvcvideo: Move decode processing to process context

2018-12-04 Thread Laurent Pinchart
Hi Kieran,

Thank you for the patch.

On Friday, 9 November 2018 19:05:30 EET Kieran Bingham wrote:
> Newer high definition cameras, and cameras with multiple lenses such as
> the range of stereo-vision cameras now available have ever increasing
> data rates.
> 
> The inclusion of a variable length packet header in URB packets mean
> that we must memcpy the frame data out to our destination 'manually'.
> This can result in data rates of up to 2 gigabits per second being
> processed.
> 
> To improve efficiency, and maximise throughput, handle the URB decode
> processing through a work queue to move it from interrupt context, and
> allow multiple processors to work on URBs in parallel.
> 
> Signed-off-by: Kieran Bingham 

Reviewed-by: Laurent Pinchart 

> 
> ---
> v2:
>  - Lock full critical section of usb_submit_urb()
> 
> v3:
>  - Fix race on submitting uvc_video_decode_data_work() to work queue.
>  - Rename uvc_decode_op -> uvc_copy_op (Generic to encode/decode)
>  - Rename decodes -> copy_operations
>  - Don't queue work if there is no async task
>  - obtain copy op structure directly in uvc_video_decode_data()
>  - uvc_video_decode_data_work() -> uvc_video_copy_data_work()
> 
> v4:
>  - Provide for_each_uvc_urb()
>  - Simplify fix for shutdown race to flush queue before freeing URBs
>  - Rebase to v4.16-rc4 (linux-media/master) adjusting for metadata
>conflicts.
> 
> v5:
>  - Rebase to media/v4.20-2
>  - Use GFP_KERNEL allocation in uvc_video_copy_data_work()
>  - Fix function documentation for uvc_video_copy_data_work()
>  - Add periods to the end of sentences
>  - Rename 'decode' variable to 'op' in uvc_video_decode_data()
>  - Move uvc_urb->async_operations initialisation to before use
>  - Move async workqueue to match uvc_streaming lifetime instead of
>streamon/streamoff
> 
> v6:
>  - Utilise the new streaming object lifetime functions to perform
>allocation and destruction of the async workqueue.
> 
>  drivers/media/usb/uvc/uvc_driver.c |  11 +++-
>  drivers/media/usb/uvc/uvc_video.c  | 104 +++---
>  drivers/media/usb/uvc/uvcvideo.h   |  28 -
>  3 files changed, 119 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/media/usb/uvc/uvc_driver.c
> b/drivers/media/usb/uvc/uvc_driver.c index afb44d1c9d04..b62cbd800111
> 100644
> --- a/drivers/media/usb/uvc/uvc_driver.c
> +++ b/drivers/media/usb/uvc/uvc_driver.c
> @@ -401,6 +401,9 @@ static struct uvc_streaming *uvc_stream_by_id(struct
> uvc_device *dev, int id)
> 
>  static void uvc_stream_delete(struct uvc_streaming *stream)
>  {
> + if (stream->async_wq)
> + destroy_workqueue(stream->async_wq);
> +
>   mutex_destroy(>mutex);
> 
>   usb_put_intf(stream->intf);
> @@ -425,6 +428,14 @@ static struct uvc_streaming *uvc_stream_new(struct
> uvc_device *dev, stream->intf = usb_get_intf(intf);
>   stream->intfnum = intf->cur_altsetting->desc.bInterfaceNumber;
> 
> + /* Allocate a stream specific work queue for asynchronous tasks. */
> + stream->async_wq = alloc_workqueue("uvcvideo", WQ_UNBOUND | WQ_HIGHPRI,
> +0);
> + if (!stream->async_wq) {
> + uvc_stream_delete(stream);
> + return NULL;
> + }
> +
>   return stream;
>  }
> 
> diff --git a/drivers/media/usb/uvc/uvc_video.c
> b/drivers/media/usb/uvc/uvc_video.c index 7a7779e1b466..e19bdf089cc4 100644
> --- a/drivers/media/usb/uvc/uvc_video.c
> +++ b/drivers/media/usb/uvc/uvc_video.c
> @@ -1094,21 +1094,54 @@ static int uvc_video_decode_start(struct
> uvc_streaming *stream, return data[0];
>  }
> 
> -static void uvc_video_decode_data(struct uvc_streaming *stream,
> +/*
> + * uvc_video_decode_data_work: Asynchronous memcpy processing
> + *
> + * Copy URB data to video buffers in process context, releasing buffer
> + * references and requeuing the URB when done.
> + */
> +static void uvc_video_copy_data_work(struct work_struct *work)
> +{
> + struct uvc_urb *uvc_urb = container_of(work, struct uvc_urb, work);
> + unsigned int i;
> + int ret;
> +
> + for (i = 0; i < uvc_urb->async_operations; i++) {
> + struct uvc_copy_op *op = _urb->copy_operations[i];
> +
> + memcpy(op->dst, op->src, op->len);
> +
> + /* Release reference taken on this buffer. */
> + uvc_queue_buffer_release(op->buf);
> + }
> +
> + ret = usb_submit_urb(uvc_urb->urb, GFP_KERNEL);
> + if (ret < 0)
> + uvc_printk(KERN_ERR, "Failed to resubmit video URB (%d).\n",
> +ret);
> +}
> +
> +static void uvc_video_decode_data(struct uvc_urb *uvc_urb,
>   struct uvc_buffer *buf, const u8 *data, int len)
>  {
> - unsigned int maxlen, nbytes;
> - void *mem;
> + unsigned int active_op = uvc_urb->async_operations;
> + struct uvc_copy_op *op = _urb->copy_operations[active_op];
> + unsigned int maxlen;
> 
>   if (len <= 0)
>   return;
> 
> - 

[PATCH v6 07/10] media: uvcvideo: Move decode processing to process context

2018-11-09 Thread Kieran Bingham
Newer high definition cameras, and cameras with multiple lenses such as
the range of stereo-vision cameras now available have ever increasing
data rates.

The inclusion of a variable length packet header in URB packets mean
that we must memcpy the frame data out to our destination 'manually'.
This can result in data rates of up to 2 gigabits per second being
processed.

To improve efficiency, and maximise throughput, handle the URB decode
processing through a work queue to move it from interrupt context, and
allow multiple processors to work on URBs in parallel.

Signed-off-by: Kieran Bingham 

---
v2:
 - Lock full critical section of usb_submit_urb()

v3:
 - Fix race on submitting uvc_video_decode_data_work() to work queue.
 - Rename uvc_decode_op -> uvc_copy_op (Generic to encode/decode)
 - Rename decodes -> copy_operations
 - Don't queue work if there is no async task
 - obtain copy op structure directly in uvc_video_decode_data()
 - uvc_video_decode_data_work() -> uvc_video_copy_data_work()

v4:
 - Provide for_each_uvc_urb()
 - Simplify fix for shutdown race to flush queue before freeing URBs
 - Rebase to v4.16-rc4 (linux-media/master) adjusting for metadata
   conflicts.

v5:
 - Rebase to media/v4.20-2
 - Use GFP_KERNEL allocation in uvc_video_copy_data_work()
 - Fix function documentation for uvc_video_copy_data_work()
 - Add periods to the end of sentences
 - Rename 'decode' variable to 'op' in uvc_video_decode_data()
 - Move uvc_urb->async_operations initialisation to before use
 - Move async workqueue to match uvc_streaming lifetime instead of
   streamon/streamoff

v6:
 - Utilise the new streaming object lifetime functions to perform
   allocation and destruction of the async workqueue.

 drivers/media/usb/uvc/uvc_driver.c |  11 +++-
 drivers/media/usb/uvc/uvc_video.c  | 104 +++---
 drivers/media/usb/uvc/uvcvideo.h   |  28 -
 3 files changed, 119 insertions(+), 24 deletions(-)

diff --git a/drivers/media/usb/uvc/uvc_driver.c 
b/drivers/media/usb/uvc/uvc_driver.c
index afb44d1c9d04..b62cbd800111 100644
--- a/drivers/media/usb/uvc/uvc_driver.c
+++ b/drivers/media/usb/uvc/uvc_driver.c
@@ -401,6 +401,9 @@ static struct uvc_streaming *uvc_stream_by_id(struct 
uvc_device *dev, int id)
 
 static void uvc_stream_delete(struct uvc_streaming *stream)
 {
+   if (stream->async_wq)
+   destroy_workqueue(stream->async_wq);
+
mutex_destroy(>mutex);
 
usb_put_intf(stream->intf);
@@ -425,6 +428,14 @@ static struct uvc_streaming *uvc_stream_new(struct 
uvc_device *dev,
stream->intf = usb_get_intf(intf);
stream->intfnum = intf->cur_altsetting->desc.bInterfaceNumber;
 
+   /* Allocate a stream specific work queue for asynchronous tasks. */
+   stream->async_wq = alloc_workqueue("uvcvideo", WQ_UNBOUND | WQ_HIGHPRI,
+  0);
+   if (!stream->async_wq) {
+   uvc_stream_delete(stream);
+   return NULL;
+   }
+
return stream;
 }
 
diff --git a/drivers/media/usb/uvc/uvc_video.c 
b/drivers/media/usb/uvc/uvc_video.c
index 7a7779e1b466..e19bdf089cc4 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -1094,21 +1094,54 @@ static int uvc_video_decode_start(struct uvc_streaming 
*stream,
return data[0];
 }
 
-static void uvc_video_decode_data(struct uvc_streaming *stream,
+/*
+ * uvc_video_decode_data_work: Asynchronous memcpy processing
+ *
+ * Copy URB data to video buffers in process context, releasing buffer
+ * references and requeuing the URB when done.
+ */
+static void uvc_video_copy_data_work(struct work_struct *work)
+{
+   struct uvc_urb *uvc_urb = container_of(work, struct uvc_urb, work);
+   unsigned int i;
+   int ret;
+
+   for (i = 0; i < uvc_urb->async_operations; i++) {
+   struct uvc_copy_op *op = _urb->copy_operations[i];
+
+   memcpy(op->dst, op->src, op->len);
+
+   /* Release reference taken on this buffer. */
+   uvc_queue_buffer_release(op->buf);
+   }
+
+   ret = usb_submit_urb(uvc_urb->urb, GFP_KERNEL);
+   if (ret < 0)
+   uvc_printk(KERN_ERR, "Failed to resubmit video URB (%d).\n",
+  ret);
+}
+
+static void uvc_video_decode_data(struct uvc_urb *uvc_urb,
struct uvc_buffer *buf, const u8 *data, int len)
 {
-   unsigned int maxlen, nbytes;
-   void *mem;
+   unsigned int active_op = uvc_urb->async_operations;
+   struct uvc_copy_op *op = _urb->copy_operations[active_op];
+   unsigned int maxlen;
 
if (len <= 0)
return;
 
-   /* Copy the video data to the buffer. */
maxlen = buf->length - buf->bytesused;
-   mem = buf->mem + buf->bytesused;
-   nbytes = min((unsigned int)len, maxlen);
-   memcpy(mem, data, nbytes);
-   buf->bytesused += nbytes;
+
+   /* Take a buffer reference for async work.