On Mon Aug 5 16:55:35 2024 +0200, Jacopo Mondi wrote:
> Currently the 'pispbe_schedule()' function does two things:
> 
> 1) Tries to assemble a job by inspecting all the video node queues
>    to make sure all the required buffers are available
> 2) Submit the job to the hardware
> 
> The pispbe_schedule() function is called at:
> 
> - video device start_streaming() time
> - video device qbuf() time
> - irq handler
> 
> As assembling a job requires inspecting all queues, it is a rather
> time consuming operation which is better not run in IRQ context.
> 
> To avoid executing the time consuming job creation in interrupt
> context split the job creation and job scheduling in two distinct
> operations. When a well-formed job is created, append it to the
> newly introduced 'pispbe->job_queue' where it will be dequeued from
> by the scheduling routine.
> 
> As the per-node 'ready_queue' buffer list is only accessed in vb2 ops
> callbacks, protected by the node->queue_lock mutex, it is not necessary
> to guard it with a dedicated spinlock so drop it. Also use the
> spin_lock_irq() variant in all functions not called from an IRQ context
> where the spin_lock_irqsave() version was used.
> 
> Reviewed-by: Naushir Patuck <na...@raspberrypi.com>
> Reviewed-by: Laurent Pinchart <laurent.pinch...@ideasonboard.com>
> Signed-off-by: Jacopo Mondi <jacopo.mo...@ideasonboard.com>
> Signed-off-by: Hans Verkuil <hverk...@xs4all.nl>

Patch committed.

Thanks,
Hans Verkuil

 .../media/platform/raspberrypi/pisp_be/pisp_be.c   | 161 +++++++++++----------
 1 file changed, 87 insertions(+), 74 deletions(-)

---

diff --git a/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c 
b/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c
index 92c452891d6c..ccc6cb99868b 100644
--- a/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c
+++ b/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/slab.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-ioctl.h>
 #include <media/videobuf2-dma-contig.h>
@@ -161,8 +162,6 @@ struct pispbe_node {
        struct mutex node_lock;
        /* vb2_queue lock */
        struct mutex queue_lock;
-       /* Protect pispbe_node->ready_queue and pispbe_buffer->ready_list */
-       spinlock_t ready_lock;
        struct list_head ready_queue;
        struct vb2_queue queue;
        struct v4l2_format format;
@@ -190,6 +189,8 @@ struct pispbe_hw_enables {
 
 /* Records a job configuration and memory addresses. */
 struct pispbe_job_descriptor {
+       struct list_head queue;
+       struct pispbe_buffer *buffers[PISPBE_NUM_NODES];
        dma_addr_t hw_dma_addrs[N_HW_ADDRESSES];
        struct pisp_be_tiles_config *config;
        struct pispbe_hw_enables hw_enables;
@@ -215,8 +216,10 @@ struct pispbe_dev {
        unsigned int sequence;
        u32 streaming_map;
        struct pispbe_job queued_job, running_job;
-       spinlock_t hw_lock; /* protects "hw_busy" flag and streaming_map */
+       /* protects "hw_busy" flag, streaming_map and job_queue */
+       spinlock_t hw_lock;
        bool hw_busy; /* non-zero if a job is queued or is being started */
+       struct list_head job_queue;
        int irq;
        u32 hw_version;
        u8 done, started;
@@ -440,42 +443,48 @@ static void pispbe_xlate_addrs(struct pispbe_dev *pispbe,
  * For Output0, Output1, Tdn and Stitch, a buffer only needs to be
  * available if the blocks are enabled in the config.
  *
- * Needs to be called with hw_lock held.
+ * If all the buffers required to form a job are available, append the
+ * job descriptor to the job queue to be later queued to the HW.
  *
  * Returns 0 if a job has been successfully prepared, < 0 otherwise.
  */
-static int pispbe_prepare_job(struct pispbe_dev *pispbe,
-                             struct pispbe_job_descriptor *job)
+static int pispbe_prepare_job(struct pispbe_dev *pispbe)
 {
+       struct pispbe_job_descriptor __free(kfree) *job = NULL;
        struct pispbe_buffer *buf[PISPBE_NUM_NODES] = {};
+       unsigned int streaming_map;
        unsigned int config_index;
        struct pispbe_node *node;
-       unsigned long flags;
 
-       lockdep_assert_held(&pispbe->hw_lock);
+       lockdep_assert_irqs_enabled();
 
-       memset(job, 0, sizeof(struct pispbe_job_descriptor));
+       scoped_guard(spinlock_irq, &pispbe->hw_lock) {
+               static const u32 mask = BIT(CONFIG_NODE) | BIT(MAIN_INPUT_NODE);
 
-       if (((BIT(CONFIG_NODE) | BIT(MAIN_INPUT_NODE)) &
-               pispbe->streaming_map) !=
-                       (BIT(CONFIG_NODE) | BIT(MAIN_INPUT_NODE)))
-               return -ENODEV;
+               if ((pispbe->streaming_map & mask) != mask)
+                       return -ENODEV;
+
+               /*
+                * Take a copy of streaming_map: nodes activated after this
+                * point are ignored when preparing this job.
+                */
+               streaming_map = pispbe->streaming_map;
+       }
+
+       job = kzalloc(sizeof(*job), GFP_KERNEL);
+       if (!job)
+               return -ENOMEM;
 
        node = &pispbe->node[CONFIG_NODE];
-       spin_lock_irqsave(&node->ready_lock, flags);
        buf[CONFIG_NODE] = list_first_entry_or_null(&node->ready_queue,
                                                    struct pispbe_buffer,
                                                    ready_list);
-       if (buf[CONFIG_NODE]) {
-               list_del(&buf[CONFIG_NODE]->ready_list);
-               pispbe->queued_job.buf[CONFIG_NODE] = buf[CONFIG_NODE];
-       }
-       spin_unlock_irqrestore(&node->ready_lock, flags);
-
-       /* Exit early if no config buffer has been queued. */
        if (!buf[CONFIG_NODE])
                return -ENODEV;
 
+       list_del(&buf[CONFIG_NODE]->ready_list);
+       job->buffers[CONFIG_NODE] = buf[CONFIG_NODE];
+
        config_index = buf[CONFIG_NODE]->vb.vb2_buf.index;
        job->config = &pispbe->config[config_index];
        job->tiles = pispbe->config_dma_addr +
@@ -495,7 +504,7 @@ static int pispbe_prepare_job(struct pispbe_dev *pispbe,
                        continue;
 
                buf[i] = NULL;
-               if (!(pispbe->streaming_map & BIT(i)))
+               if (!(streaming_map & BIT(i)))
                        continue;
 
                if ((!(rgb_en & PISP_BE_RGB_ENABLE_OUTPUT0) &&
@@ -522,25 +531,28 @@ static int pispbe_prepare_job(struct pispbe_dev *pispbe,
                node = &pispbe->node[i];
 
                /* Pull a buffer from each V4L2 queue to form the queued job */
-               spin_lock_irqsave(&node->ready_lock, flags);
                buf[i] = list_first_entry_or_null(&node->ready_queue,
                                                  struct pispbe_buffer,
                                                  ready_list);
                if (buf[i]) {
                        list_del(&buf[i]->ready_list);
-                       pispbe->queued_job.buf[i] = buf[i];
+                       job->buffers[i] = buf[i];
                }
-               spin_unlock_irqrestore(&node->ready_lock, flags);
 
                if (!buf[i] && !ignore_buffers)
                        goto err_return_buffers;
        }
 
-       pispbe->queued_job.valid = true;
-
        /* Convert buffers to DMA addresses for the hardware */
        pispbe_xlate_addrs(pispbe, job, buf);
 
+       scoped_guard(spinlock_irq, &pispbe->hw_lock) {
+               list_add_tail(&job->queue, &pispbe->job_queue);
+       }
+
+       /* Set job to NULL to avoid automatic release due to __free(). */
+       job = NULL;
+
        return 0;
 
 err_return_buffers:
@@ -551,33 +563,37 @@ err_return_buffers:
                        continue;
 
                /* Return the buffer to the ready_list queue */
-               spin_lock_irqsave(&n->ready_lock, flags);
                list_add(&buf[i]->ready_list, &n->ready_queue);
-               spin_unlock_irqrestore(&n->ready_lock, flags);
        }
 
-       memset(&pispbe->queued_job, 0, sizeof(pispbe->queued_job));
-
        return -ENODEV;
 }
 
 static void pispbe_schedule(struct pispbe_dev *pispbe, bool clear_hw_busy)
 {
-       struct pispbe_job_descriptor job;
-       unsigned long flags;
-       int ret;
+       struct pispbe_job_descriptor *job;
+
+       scoped_guard(spinlock_irqsave, &pispbe->hw_lock) {
+               if (clear_hw_busy)
+                       pispbe->hw_busy = false;
 
-       spin_lock_irqsave(&pispbe->hw_lock, flags);
+               if (pispbe->hw_busy)
+                       return;
 
-       if (clear_hw_busy)
-               pispbe->hw_busy = false;
+               job = list_first_entry_or_null(&pispbe->job_queue,
+                                              struct pispbe_job_descriptor,
+                                              queue);
+               if (!job)
+                       return;
 
-       if (pispbe->hw_busy)
-               goto unlock_and_return;
+               list_del(&job->queue);
 
-       ret = pispbe_prepare_job(pispbe, &job);
-       if (ret)
-               goto unlock_and_return;
+               for (unsigned int i = 0; i < PISPBE_NUM_NODES; i++)
+                       pispbe->queued_job.buf[i] = job->buffers[i];
+               pispbe->queued_job.valid = true;
+
+               pispbe->hw_busy = true;
+       }
 
        /*
         * We can kick the job off without the hw_lock, as this can
@@ -585,16 +601,8 @@ static void pispbe_schedule(struct pispbe_dev *pispbe, 
bool clear_hw_busy)
         * only when the following job has been queued and an interrupt
         * is rised.
         */
-       pispbe->hw_busy = true;
-       spin_unlock_irqrestore(&pispbe->hw_lock, flags);
-
-       pispbe_queue_job(pispbe, &job);
-
-       return;
-
-unlock_and_return:
-       /* No job has been queued, just release the lock and return. */
-       spin_unlock_irqrestore(&pispbe->hw_lock, flags);
+       pispbe_queue_job(pispbe, job);
+       kfree(job);
 }
 
 static void pispbe_isr_jobdone(struct pispbe_dev *pispbe,
@@ -846,18 +854,16 @@ static void pispbe_node_buffer_queue(struct vb2_buffer 
*buf)
                container_of(vbuf, struct pispbe_buffer, vb);
        struct pispbe_node *node = vb2_get_drv_priv(buf->vb2_queue);
        struct pispbe_dev *pispbe = node->pispbe;
-       unsigned long flags;
 
        dev_dbg(pispbe->dev, "%s: for node %s\n", __func__, NODE_NAME(node));
-       spin_lock_irqsave(&node->ready_lock, flags);
        list_add_tail(&buffer->ready_list, &node->ready_queue);
-       spin_unlock_irqrestore(&node->ready_lock, flags);
 
        /*
         * Every time we add a buffer, check if there's now some work for the hw
         * to do.
         */
-       pispbe_schedule(pispbe, false);
+       if (!pispbe_prepare_job(pispbe))
+               pispbe_schedule(pispbe, false);
 }
 
 static int pispbe_node_start_streaming(struct vb2_queue *q, unsigned int count)
@@ -865,17 +871,16 @@ static int pispbe_node_start_streaming(struct vb2_queue 
*q, unsigned int count)
        struct pispbe_node *node = vb2_get_drv_priv(q);
        struct pispbe_dev *pispbe = node->pispbe;
        struct pispbe_buffer *buf, *tmp;
-       unsigned long flags;
        int ret;
 
        ret = pm_runtime_resume_and_get(pispbe->dev);
        if (ret < 0)
                goto err_return_buffers;
 
-       spin_lock_irqsave(&pispbe->hw_lock, flags);
-       node->pispbe->streaming_map |=  BIT(node->id);
-       node->pispbe->sequence = 0;
-       spin_unlock_irqrestore(&pispbe->hw_lock, flags);
+       scoped_guard(spinlock_irq, &pispbe->hw_lock) {
+               node->pispbe->streaming_map |=  BIT(node->id);
+               node->pispbe->sequence = 0;
+       }
 
        dev_dbg(pispbe->dev, "%s: for node %s (count %u)\n",
                __func__, NODE_NAME(node), count);
@@ -883,17 +888,16 @@ static int pispbe_node_start_streaming(struct vb2_queue 
*q, unsigned int count)
                node->pispbe->streaming_map);
 
        /* Maybe we're ready to run. */
-       pispbe_schedule(pispbe, false);
+       if (!pispbe_prepare_job(pispbe))
+               pispbe_schedule(pispbe, false);
 
        return 0;
 
 err_return_buffers:
-       spin_lock_irqsave(&pispbe->hw_lock, flags);
        list_for_each_entry_safe(buf, tmp, &node->ready_queue, ready_list) {
                list_del(&buf->ready_list);
                vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_QUEUED);
        }
-       spin_unlock_irqrestore(&pispbe->hw_lock, flags);
 
        return ret;
 }
@@ -902,8 +906,9 @@ static void pispbe_node_stop_streaming(struct vb2_queue *q)
 {
        struct pispbe_node *node = vb2_get_drv_priv(q);
        struct pispbe_dev *pispbe = node->pispbe;
+       struct pispbe_job_descriptor *job, *temp;
        struct pispbe_buffer *buf;
-       unsigned long flags;
+       LIST_HEAD(tmp_list);
 
        /*
         * Now this is a bit awkward. In a simple M2M device we could just wait
@@ -915,11 +920,7 @@ static void pispbe_node_stop_streaming(struct vb2_queue *q)
         * This may return buffers out of order.
         */
        dev_dbg(pispbe->dev, "%s: for node %s\n", __func__, NODE_NAME(node));
-       spin_lock_irqsave(&pispbe->hw_lock, flags);
        do {
-               unsigned long flags1;
-
-               spin_lock_irqsave(&node->ready_lock, flags1);
                buf = list_first_entry_or_null(&node->ready_queue,
                                               struct pispbe_buffer,
                                               ready_list);
@@ -927,15 +928,26 @@ static void pispbe_node_stop_streaming(struct vb2_queue 
*q)
                        list_del(&buf->ready_list);
                        vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR);
                }
-               spin_unlock_irqrestore(&node->ready_lock, flags1);
        } while (buf);
-       spin_unlock_irqrestore(&pispbe->hw_lock, flags);
 
        vb2_wait_for_all_buffers(&node->queue);
 
-       spin_lock_irqsave(&pispbe->hw_lock, flags);
+       spin_lock_irq(&pispbe->hw_lock);
        pispbe->streaming_map &= ~BIT(node->id);
-       spin_unlock_irqrestore(&pispbe->hw_lock, flags);
+
+       if (pispbe->streaming_map == 0) {
+               /*
+                * If all nodes have stopped streaming release all jobs
+                * without holding the lock.
+                */
+               list_splice_init(&pispbe->job_queue, &tmp_list);
+       }
+       spin_unlock_irq(&pispbe->hw_lock);
+
+       list_for_each_entry_safe(job, temp, &tmp_list, queue) {
+               list_del(&job->queue);
+               kfree(job);
+       }
 
        pm_runtime_mark_last_busy(pispbe->dev);
        pm_runtime_put_autosuspend(pispbe->dev);
@@ -1393,7 +1405,6 @@ static int pispbe_init_node(struct pispbe_dev *pispbe, 
unsigned int id)
        mutex_init(&node->node_lock);
        mutex_init(&node->queue_lock);
        INIT_LIST_HEAD(&node->ready_queue);
-       spin_lock_init(&node->ready_lock);
 
        node->format.type = node->buf_type;
        pispbe_node_def_fmt(node);
@@ -1677,6 +1688,8 @@ static int pispbe_probe(struct platform_device *pdev)
        if (!pispbe)
                return -ENOMEM;
 
+       INIT_LIST_HEAD(&pispbe->job_queue);
+
        dev_set_drvdata(&pdev->dev, pispbe);
        pispbe->dev = &pdev->dev;
        platform_set_drvdata(pdev, pispbe);

Reply via email to