The job_lock mutex guarded dev->in_flight_job across ethosu_job_run(),
the threaded IRQ handler, and ethosu_job_timedout(). However, the DRM
scheduler already provides most of the serialization required, which
makes this mutex redundant.

Analyzing the different scenarios:

  1. run_job() and timedout_job() are mutually exclusive scheduler
     callbacks, so the scheduler itself serializes them.

  2. run_job() and the IRQ handler are implicitly serialized, but they can
     overlap in time: dma_fence_signal() synchronously queues the next
     run_job onto submit_wq, and the worker can execute run_job(next) on
     another CPU before the IRQ thread finishes. The mutex previously kept
     the IRQ's trailing "in_flight_job = NULL" from racing run_job(next)'s
     "in_flight_job = next" store.

     The handler is now restructured to clear in_flight_job before calling
     dma_fence_signal(), so any run_job(next) woken by the signal observes
     NULL.

  3. timedout_job() and the IRQ handler can also overlap if the hardware
     completes the timed-out job near the timeout boundary, since
     drm_sched_stop()'s cancel_work_sync() only synchronizes with the
     scheduler's workqueue. The IRQ handler saves in_flight_job into a
     local with READ_ONCE() before dereferencing ->done_fence, and
     run_job()/timedout_job() publish the field with WRITE_ONCE(). This
     prevents the compiler from reloading the pointer, and keeps the IRQ
     thread operating on the same job for the duration of the handler even
     if timedout_job() concurrently clears the field.

Drop the mutex along with its initialization.

Signed-off-by: Maíra Canal <[email protected]>
---

Hi,

I noticed this pattern while reviewing "[PATCH v2] accel: ethosu: Add
performance counter support" and I had the impression this could be a
nice code simplification. However, I don't have the hardware to test it,
so this was only "compile-tested".

Best regards,
- Maíra

 drivers/accel/ethosu/ethosu_device.h |  2 --
 drivers/accel/ethosu/ethosu_job.c    | 22 ++++++++--------------
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/accel/ethosu/ethosu_device.h 
b/drivers/accel/ethosu/ethosu_device.h
index b189fa783d6a..c2c59681a019 100644
--- a/drivers/accel/ethosu/ethosu_device.h
+++ b/drivers/accel/ethosu/ethosu_device.h
@@ -173,8 +173,6 @@ struct ethosu_device {
        struct drm_ethosu_npu_info npu_info;
 
        struct ethosu_job *in_flight_job;
-       /* For in_flight_job and ethosu_job_hw_submit() */
-       struct mutex job_lock;
 
        /* For dma_fence */
        spinlock_t fence_lock;
diff --git a/drivers/accel/ethosu/ethosu_job.c 
b/drivers/accel/ethosu/ethosu_job.c
index 418463c03bfb..5a9bd017f3bc 100644
--- a/drivers/accel/ethosu/ethosu_job.c
+++ b/drivers/accel/ethosu/ethosu_job.c
@@ -194,10 +194,8 @@ static struct dma_fence *ethosu_job_run(struct 
drm_sched_job *sched_job)
                       dev->fence_context, ++dev->emit_seqno);
        dma_fence_get(fence);
 
-       scoped_guard(mutex, &dev->job_lock) {
-               dev->in_flight_job = job;
-               ethosu_job_hw_submit(dev, job);
-       }
+       WRITE_ONCE(dev->in_flight_job, job);
+       ethosu_job_hw_submit(dev, job);
 
        return fence;
 }
@@ -205,6 +203,7 @@ static struct dma_fence *ethosu_job_run(struct 
drm_sched_job *sched_job)
 static void ethosu_job_handle_irq(struct ethosu_device *dev)
 {
        u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS);
+       struct ethosu_job *job;
 
        if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) {
                dev_err(dev->base.dev, "Error IRQ - %x\n", status);
@@ -212,11 +211,10 @@ static void ethosu_job_handle_irq(struct ethosu_device 
*dev)
                return;
        }
 
-       scoped_guard(mutex, &dev->job_lock) {
-               if (dev->in_flight_job) {
-                       dma_fence_signal(dev->in_flight_job->done_fence);
-                       dev->in_flight_job = NULL;
-               }
+       job = READ_ONCE(dev->in_flight_job);
+       if (job) {
+               WRITE_ONCE(dev->in_flight_job, NULL);
+               dma_fence_signal(job->done_fence);
        }
 }
 
@@ -272,8 +270,7 @@ static enum drm_gpu_sched_stat ethosu_job_timedout(struct 
drm_sched_job *bad)
 
        drm_sched_stop(&dev->sched, bad);
 
-       scoped_guard(mutex, &dev->job_lock)
-               dev->in_flight_job = NULL;
+       WRITE_ONCE(dev->in_flight_job, NULL);
 
        /* Proceed with reset now. */
        pm_runtime_force_suspend(dev->base.dev);
@@ -304,9 +301,6 @@ int ethosu_job_init(struct ethosu_device *edev)
        int ret;
 
        spin_lock_init(&edev->fence_lock);
-       ret = devm_mutex_init(dev, &edev->job_lock);
-       if (ret)
-               return ret;
        ret = devm_mutex_init(dev, &edev->sched_lock);
        if (ret)
                return ret;
-- 
2.54.0

Reply via email to