This patch adds an IRQ handler for CS completions of CS jobs which are
sent on H/W queues.
The patch adds a CS shadow queue, from which the handler retrieves the
CS, and a dedicated workqueue, on which the handler queues a work to
free the CS jobs.

Signed-off-by: Tomer Tayar <[email protected]>
---
 drivers/misc/habanalabs/command_submission.c | 16 +++++++
 drivers/misc/habanalabs/device.c             | 27 +++++++++++-
 drivers/misc/habanalabs/habanalabs.h         | 18 ++++++++
 drivers/misc/habanalabs/hw_queue.c           |  2 +
 drivers/misc/habanalabs/irq.c                | 46 ++++++++++++++++++++
 5 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/command_submission.c 
b/drivers/misc/habanalabs/command_submission.c
index 25dc7308da19..b995a02a31dd 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -267,6 +267,8 @@ static void cs_do_release(struct kref *ref)
 
        hl_ctx_put(cs->ctx);
 
+       hdev->shadow_cs_queue[cs->sequence & (HL_MAX_PENDING_CS - 1)] = NULL;
+
        if (cs->timedout)
                dma_fence_set_error(cs->fence, -ETIMEDOUT);
        else if (cs->aborted)
@@ -391,6 +393,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)
 
        /* flush all completions */
        flush_workqueue(hdev->cq_wq);
+       flush_workqueue(hdev->cs_cmplt_wq);
 
        /* Make sure we don't have leftovers in the H/W queues mirror list */
        list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
@@ -415,6 +418,16 @@ static void job_wq_completion(struct work_struct *work)
        free_job(hdev, job);
 }
 
+static void cs_completion(struct work_struct *work)
+{
+       struct hl_cs *cs = container_of(work, struct hl_cs, finish_work);
+       struct hl_device *hdev = cs->ctx->hdev;
+       struct hl_cs_job *job, *tmp;
+
+       list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
+               free_job(hdev, job);
+}
+
 static int validate_queue_index(struct hl_device *hdev,
                                struct hl_cs_chunk *chunk,
                                enum hl_queue_type *queue_type,
@@ -625,6 +638,9 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user 
*chunks,
                goto free_cs_object;
        }
 
+       if (job->queue_type == QUEUE_TYPE_HW)
+               INIT_WORK(&cs->finish_work, cs_completion);
+
        rc = hl_hw_queue_schedule_cs(cs);
        if (rc) {
                dev_err(hdev->dev,
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 2f5a4da707e7..6c13f05c3120 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -284,11 +284,19 @@ static int device_early_init(struct hl_device *hdev)
                goto free_cq_wq;
        }
 
+       hdev->cs_cmplt_wq = alloc_workqueue("hl-cs-completions", WQ_UNBOUND, 0);
+       if (!hdev->cs_cmplt_wq) {
+               dev_err(hdev->dev,
+                       "Failed to allocate CS completions workqueue\n");
+               rc = -ENOMEM;
+               goto free_eq_wq;
+       }
+
        hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
                                        GFP_KERNEL);
        if (!hdev->hl_chip_info) {
                rc = -ENOMEM;
-               goto free_eq_wq;
+               goto free_cs_cmplt_wq;
        }
 
        hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
@@ -314,6 +322,8 @@ static int device_early_init(struct hl_device *hdev)
 
 free_chip_info:
        kfree(hdev->hl_chip_info);
+free_cs_cmplt_wq:
+       destroy_workqueue(hdev->cs_cmplt_wq);
 free_eq_wq:
        destroy_workqueue(hdev->eq_wq);
 free_cq_wq:
@@ -346,6 +356,7 @@ static void device_early_fini(struct hl_device *hdev)
        kfree(hdev->idle_busy_ts_arr);
        kfree(hdev->hl_chip_info);
 
+       destroy_workqueue(hdev->cs_cmplt_wq);
        destroy_workqueue(hdev->eq_wq);
        destroy_workqueue(hdev->cq_wq);
 
@@ -1138,6 +1149,14 @@ int hl_device_init(struct hl_device *hdev, struct class 
*hclass)
                }
        }
 
+       hdev->shadow_cs_queue = kmalloc_array(HL_MAX_PENDING_CS,
+                                               sizeof(*hdev->shadow_cs_queue),
+                                               GFP_KERNEL | __GFP_ZERO);
+       if (!hdev->shadow_cs_queue) {
+               rc = -ENOMEM;
+               goto cq_fini;
+       }
+
        /*
         * Initialize the event queue. Must be done before hw_init,
         * because there the address of the event queue is being
@@ -1146,7 +1165,7 @@ int hl_device_init(struct hl_device *hdev, struct class 
*hclass)
        rc = hl_eq_init(hdev, &hdev->event_queue);
        if (rc) {
                dev_err(hdev->dev, "failed to initialize event queue\n");
-               goto cq_fini;
+               goto free_shadow_cs_queue;
        }
 
        /* MMU S/W must be initialized before kernel context is created */
@@ -1269,6 +1288,8 @@ int hl_device_init(struct hl_device *hdev, struct class 
*hclass)
        hl_mmu_fini(hdev);
 eq_fini:
        hl_eq_fini(hdev, &hdev->event_queue);
+free_shadow_cs_queue:
+       kfree(hdev->shadow_cs_queue);
 cq_fini:
        for (i = 0 ; i < cq_ready_cnt ; i++)
                hl_cq_fini(hdev, &hdev->completion_queue[i]);
@@ -1383,6 +1404,8 @@ void hl_device_fini(struct hl_device *hdev)
 
        hl_eq_fini(hdev, &hdev->event_queue);
 
+       kfree(hdev->shadow_cs_queue);
+
        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
                hl_cq_fini(hdev, &hdev->completion_queue[i]);
        kfree(hdev->completion_queue);
diff --git a/drivers/misc/habanalabs/habanalabs.h 
b/drivers/misc/habanalabs/habanalabs.h
index c1af83f96415..2efb5e1e62cb 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -722,6 +722,7 @@ struct hl_userptr {
  * @job_lock: spinlock for the CS's jobs list. Needed for free_job.
  * @refcount: reference counter for usage of the CS.
  * @fence: pointer to the fence object of this CS.
+ * @finish_work: workqueue object to run when CS is completed by H/W.
  * @work_tdr: delayed work node for TDR.
  * @mirror_node : node in device mirror list of command submissions.
  * @debugfs_list: node in debugfs list of command submissions.
@@ -741,6 +742,7 @@ struct hl_cs {
        spinlock_t              job_lock;
        struct kref             refcount;
        struct dma_fence        *fence;
+       struct work_struct      finish_work;
        struct delayed_work     work_tdr;
        struct list_head        mirror_node;
        struct list_head        debugfs_list;
@@ -1203,8 +1205,12 @@ struct hl_device_idle_busy_ts {
  * @asic_name: ASIC specific nmae.
  * @asic_type: ASIC specific type.
  * @completion_queue: array of hl_cq.
+ * @shadow_cs_queue: pointer to a shadow queue that holds pointers to
+ *                   outstanding command submissions.
  * @cq_wq: work queue of completion queues for executing work in process 
context
  * @eq_wq: work queue of event queue for executing work in process context.
+ * @cs_cmplt_wq: work queue of CS completions for executing work in process
+ *               context.
  * @kernel_ctx: Kernel driver context structure.
  * @kernel_queues: array of hl_hw_queue.
  * @hw_queues_mirror_list: CS mirror list for TDR.
@@ -1284,8 +1290,10 @@ struct hl_device {
        char                            asic_name[16];
        enum hl_asic_type               asic_type;
        struct hl_cq                    *completion_queue;
+       struct hl_cs                    **shadow_cs_queue;
        struct workqueue_struct         *cq_wq;
        struct workqueue_struct         *eq_wq;
+       struct workqueue_struct         *cs_cmplt_wq;
        struct hl_ctx                   *kernel_ctx;
        struct hl_hw_queue              *kernel_queues;
        struct list_head                hw_queues_mirror_list;
@@ -1359,6 +1367,15 @@ struct hl_device {
        u8                              pldm;
 };
 
+/**
+ * struct hl_cs_irq_info - IRQ info structure for CS completion interrupt.
+ * @hdev: pointer to habanalabs device structure.
+ * @relative_idx: CS completion relative interrupt index (0-based).
+ */
+struct hl_cs_irq_info {
+       struct hl_device *hdev;
+       int relative_idx;
+};
 
 /*
  * IOCTLs
@@ -1470,6 +1487,7 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
 void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
 irqreturn_t hl_irq_handler_cq(int irq, void *arg);
 irqreturn_t hl_irq_handler_eq(int irq, void *arg);
+irqreturn_t hl_irq_handler_cs_cmplt(int irq, void *arg);
 u32 hl_cq_inc_ptr(u32 ptr);
 
 int hl_asid_init(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/hw_queue.c 
b/drivers/misc/habanalabs/hw_queue.c
index a1205ae47250..7b80e571a27c 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -469,6 +469,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
                goto unroll_cq_resv;
        }
 
+       hdev->shadow_cs_queue[cs->sequence & (HL_MAX_PENDING_CS - 1)] = cs;
+
        spin_lock(&hdev->hw_queues_mirror_lock);
        list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list);
 
diff --git a/drivers/misc/habanalabs/irq.c b/drivers/misc/habanalabs/irq.c
index fac65fbd70e8..93fa13218dd4 100644
--- a/drivers/misc/habanalabs/irq.c
+++ b/drivers/misc/habanalabs/irq.c
@@ -205,6 +205,52 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
        return IRQ_HANDLED;
 }
 
+/*
+ * hl_irq_handler_cs_cmplt() - irq handler for CS completions.
+ * @irq: IRQ number
+ * @arg: pointer to hl_device structure.
+ */
+irqreturn_t hl_irq_handler_cs_cmplt(int irq, void *arg)
+{
+       struct hl_cs_irq_info *cs_irq_info = arg;
+       struct hl_device *hdev = cs_irq_info->hdev;
+       struct hl_cs *cs;
+       struct hl_cs_job *job;
+       struct hl_cq *cq;
+       int relative_idx = cs_irq_info->relative_idx;
+
+       if (hdev->disabled) {
+               dev_dbg(hdev->dev,
+                       "Device disabled but received IRQ %d for CS 
completion\n",
+                       irq);
+               goto out;
+       }
+
+       cs = hdev->shadow_cs_queue[relative_idx & (HL_MAX_PENDING_CS - 1)];
+       if (!cs) {
+               dev_warn(hdev->dev,
+                       "No pointer to CS in shadow array at index %d\n",
+                       relative_idx);
+               goto out;
+       }
+
+       queue_work(hdev->cs_cmplt_wq, &cs->finish_work);
+
+       /*
+        * The same CQs can be accessed from parallel IRQ handlers that handle
+        * the completion of different CSs. However, locking is not needed
+        * because the "free_slots_cnt" variable is atomic.
+        * There is no need to update the CI counters of the queues/CQs, as they
+        * are not needed/used for the H/W queue type.
+        */
+       list_for_each_entry(job, &cs->job_list, cs_node) {
+               cq = &hdev->completion_queue[job->hw_queue_id];
+               atomic_inc(&cq->free_slots_cnt);
+       }
+out:
+       return IRQ_HANDLED;
+}
+
 /*
  * hl_cq_init - main initialization function for an cq object
  *
-- 
2.17.1

Reply via email to