From: Ofir Bitton <obit...@habana.ai>

Refactor sync stream implementation by adding more structures for
better readability. In addition reducing allocated resources.

Signed-off-by: Ofir Bitton <obit...@habana.ai>
Reviewed-by: Oded Gabbay <ogab...@kernel.org>
Signed-off-by: Oded Gabbay <ogab...@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h |  78 +++++---
 drivers/misc/habanalabs/common/hw_queue.c   | 197 +++++++++++---------
 drivers/misc/habanalabs/gaudi/gaudi.c       |  37 ++--
 drivers/misc/habanalabs/goya/goya.c         |   4 +-
 4 files changed, 180 insertions(+), 136 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index d12f11bdaacf..58b4097235d9 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -68,9 +68,6 @@
 #define HL_RSVD_SOBS                   4
 #define HL_RSVD_MONS                   2
 
-#define HL_RSVD_SOBS_IN_USE            2
-#define HL_RSVD_MONS_IN_USE            1
-
 #define HL_MAX_SOB_VAL                 (1 << 15)
 
 #define IS_POWER_OF_2(n)               (n != 0 && ((n & (n - 1)) == 0))
@@ -80,6 +77,22 @@
 
 #define HL_MAX_DCORES                  4
 
+/**
+ * struct hl_gen_wait_properties - properties for generating a wait CB
+ * @data: command buffer
+ * @q_idx: queue id is used to extract fence register address
+ * @sob_id: SOB id to use in this wait CB
+ * @sob_val: SOB value to wait for
+ * @mon_id: monitor to use in this wait CB
+ */
+struct hl_gen_wait_properties {
+       void    *data;
+       u32     q_idx;
+       u16     sob_id;
+       u16     sob_val;
+       u16     mon_id;
+};
+
 /**
  * struct pgt_info - MMU hop page info.
  * @node: hash linked-list node for the pgts shadow hash of pgts.
@@ -502,9 +515,27 @@ struct hl_cs_job;
 #define HL_CPU_ACCESSIBLE_MEM_SIZE     SZ_2M
 
 /**
- * struct hl_hw_queue - describes a H/W transport queue.
+ * struct hl_sync_stream_properties -
+ *     describes a H/W queue sync stream properties
  * @hw_sob: array of the used H/W SOBs by this H/W queue.
+ * @next_sob_val: the next value to use for the currently used SOB.
+ * @base_sob_id: the base SOB id of the SOBs used by this queue.
+ * @base_mon_id: the base MON id of the MONs used by this queue.
+ * @curr_sob_offset: the id offset to the currently used SOB from the
+ *                   HL_RSVD_SOBS that are being used by this queue.
+ */
+struct hl_sync_stream_properties {
+       struct hl_hw_sob        hw_sob[HL_RSVD_SOBS];
+       u16                     next_sob_val;
+       u16                     base_sob_id;
+       u16                     base_mon_id;
+       u8                      curr_sob_offset;
+};
+
+/**
+ * struct hl_hw_queue - describes a H/W transport queue.
  * @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
+ * @sync_stream_prop: sync stream queue properties
  * @queue_type: type of queue.
  * @kernel_address: holds the queue's kernel virtual address.
  * @bus_address: holds the queue's DMA address.
@@ -514,33 +545,24 @@ struct hl_cs_job;
  * @cq_id: the id for the corresponding CQ for this H/W queue.
  * @msi_vec: the IRQ number of the H/W queue.
  * @int_queue_len: length of internal queue (number of entries).
- * @next_sob_val: the next value to use for the currently used SOB.
- * @base_sob_id: the base SOB id of the SOBs used by this queue.
- * @base_mon_id: the base MON id of the MONs used by this queue.
  * @valid: is the queue valid (we have array of 32 queues, not all of them
  *         exist).
- * @curr_sob_offset: the id offset to the currently used SOB from the
- *                   HL_RSVD_SOBS that are being used by this queue.
  * @supports_sync_stream: True if queue supports sync stream
  */
 struct hl_hw_queue {
-       struct hl_hw_sob        hw_sob[HL_RSVD_SOBS];
-       struct hl_cs_job        **shadow_queue;
-       enum hl_queue_type      queue_type;
-       u64                     kernel_address;
-       dma_addr_t              bus_address;
-       u32                     pi;
-       atomic_t                ci;
-       u32                     hw_queue_id;
-       u32                     cq_id;
-       u32                     msi_vec;
-       u16                     int_queue_len;
-       u16                     next_sob_val;
-       u16                     base_sob_id;
-       u16                     base_mon_id;
-       u8                      valid;
-       u8                      curr_sob_offset;
-       u8                      supports_sync_stream;
+       struct hl_cs_job                        **shadow_queue;
+       struct hl_sync_stream_properties        sync_stream_prop;
+       enum hl_queue_type                      queue_type;
+       u64                                     kernel_address;
+       dma_addr_t                              bus_address;
+       u32                                     pi;
+       atomic_t                                ci;
+       u32                                     hw_queue_id;
+       u32                                     cq_id;
+       u32                                     msi_vec;
+       u16                                     int_queue_len;
+       u8                                      valid;
+       u8                                      supports_sync_stream;
 };
 
 /**
@@ -823,8 +845,8 @@ struct hl_asic_funcs {
        u32 (*get_signal_cb_size)(struct hl_device *hdev);
        u32 (*get_wait_cb_size)(struct hl_device *hdev);
        void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
-       void (*gen_wait_cb)(struct hl_device *hdev, void *data, u16 sob_id,
-                               u16 sob_val, u16 mon_id, u32 q_idx);
+       void (*gen_wait_cb)(struct hl_device *hdev,
+                       struct hl_gen_wait_properties *prop);
        void (*reset_sob)(struct hl_device *hdev, void *data);
        void (*set_dma_mask_from_fw)(struct hl_device *hdev);
        u64 (*get_device_time)(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/common/hw_queue.c 
b/drivers/misc/habanalabs/common/hw_queue.c
index adb61f9eb2b5..613681c2cdcc 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -389,6 +389,89 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
        ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
 }
 
+static void init_signal_cs(struct hl_device *hdev,
+               struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
+{
+       struct hl_sync_stream_properties *prop;
+       struct hl_hw_sob *hw_sob;
+       u32 q_idx;
+
+       q_idx = job->hw_queue_id;
+       prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
+       hw_sob = &prop->hw_sob[prop->curr_sob_offset];
+
+       cs_cmpl->hw_sob = hw_sob;
+       cs_cmpl->sob_val = prop->next_sob_val++;
+
+       dev_dbg(hdev->dev,
+               "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
+               cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
+
+       hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
+                               cs_cmpl->hw_sob->sob_id);
+
+       kref_get(&hw_sob->kref);
+
+       /* check for wraparound */
+       if (prop->next_sob_val == HL_MAX_SOB_VAL) {
+               /*
+                * Decrement as we reached the max value.
+                * The release function won't be called here as we've
+                * just incremented the refcount.
+                */
+               kref_put(&hw_sob->kref, hl_sob_reset_error);
+               prop->next_sob_val = 1;
+               /* only two SOBs are currently in use */
+               prop->curr_sob_offset =
+                       (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
+
+               dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
+                               prop->curr_sob_offset, q_idx);
+       }
+}
+
+static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
+               struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
+{
+       struct hl_cs_compl *signal_cs_cmpl;
+       struct hl_sync_stream_properties *prop;
+       struct hl_gen_wait_properties wait_prop;
+       u32 q_idx;
+
+       q_idx = job->hw_queue_id;
+       prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
+
+       signal_cs_cmpl = container_of(cs->signal_fence,
+                                       struct hl_cs_compl,
+                                       base_fence);
+
+       /* copy the SOB id and value of the signal CS */
+       cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
+       cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+
+       dev_dbg(hdev->dev,
+               "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, 
q_idx: %d\n",
+               cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
+               prop->base_mon_id, q_idx);
+
+       wait_prop.data = (void *) job->patched_cb;
+       wait_prop.sob_id = cs_cmpl->hw_sob->sob_id;
+       wait_prop.sob_val = cs_cmpl->sob_val;
+       wait_prop.mon_id = prop->base_mon_id;
+       wait_prop.q_idx = q_idx;
+       hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
+
+       kref_get(&cs_cmpl->hw_sob->kref);
+       /*
+        * Must put the signal fence after the SOB refcnt increment so
+        * the SOB refcnt won't turn 0 and reset the SOB before the
+        * wait CS was submitted.
+        */
+       mb();
+       hl_fence_put(cs->signal_fence);
+       cs->signal_fence = NULL;
+}
+
 /*
  * init_signal_wait_cs - initialize a signal/wait CS
  * @cs: pointer to the signal/wait CS
@@ -399,84 +482,18 @@ static void init_signal_wait_cs(struct hl_cs *cs)
 {
        struct hl_ctx *ctx = cs->ctx;
        struct hl_device *hdev = ctx->hdev;
-       struct hl_hw_queue *hw_queue;
+       struct hl_cs_job *job;
        struct hl_cs_compl *cs_cmpl =
                        container_of(cs->fence, struct hl_cs_compl, base_fence);
 
-       struct hl_hw_sob *hw_sob;
-       struct hl_cs_job *job;
-       u32 q_idx;
-
        /* There is only one job in a signal/wait CS */
        job = list_first_entry(&cs->job_list, struct hl_cs_job,
                                cs_node);
-       q_idx = job->hw_queue_id;
-       hw_queue = &hdev->kernel_queues[q_idx];
-
-       if (cs->type & CS_TYPE_SIGNAL) {
-               hw_sob = &hw_queue->hw_sob[hw_queue->curr_sob_offset];
-
-               cs_cmpl->hw_sob = hw_sob;
-               cs_cmpl->sob_val = hw_queue->next_sob_val++;
-
-               dev_dbg(hdev->dev,
-                       "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: 
%d\n",
-                       cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
-
-               hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
-                                       cs_cmpl->hw_sob->sob_id);
-
-               kref_get(&hw_sob->kref);
-
-               /* check for wraparound */
-               if (hw_queue->next_sob_val == HL_MAX_SOB_VAL) {
-                       /*
-                        * Decrement as we reached the max value.
-                        * The release function won't be called here as we've
-                        * just incremented the refcount.
-                        */
-                       kref_put(&hw_sob->kref, hl_sob_reset_error);
-                       hw_queue->next_sob_val = 1;
-                       /* only two SOBs are currently in use */
-                       hw_queue->curr_sob_offset =
-                                       (hw_queue->curr_sob_offset + 1) %
-                                               HL_RSVD_SOBS_IN_USE;
-
-                       dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
-                                       hw_queue->curr_sob_offset, q_idx);
-               }
-       } else if (cs->type & CS_TYPE_WAIT) {
-               struct hl_cs_compl *signal_cs_cmpl;
-
-               signal_cs_cmpl = container_of(cs->signal_fence,
-                                               struct hl_cs_compl,
-                                               base_fence);
-
-               /* copy the the SOB id and value of the signal CS */
-               cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
-               cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
 
-               dev_dbg(hdev->dev,
-                       "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: 
%d, q_idx: %d\n",
-                       cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
-                       hw_queue->base_mon_id, q_idx);
-
-               hdev->asic_funcs->gen_wait_cb(hdev, job->patched_cb,
-                                               cs_cmpl->hw_sob->sob_id,
-                                               cs_cmpl->sob_val,
-                                               hw_queue->base_mon_id,
-                                               q_idx);
-
-               kref_get(&cs_cmpl->hw_sob->kref);
-               /*
-                * Must put the signal fence after the SOB refcnt increment so
-                * the SOB refcnt won't turn 0 and reset the SOB before the
-                * wait CS was submitted.
-                */
-               mb();
-               hl_fence_put(cs->signal_fence);
-               cs->signal_fence = NULL;
-       }
+       if (cs->type & CS_TYPE_SIGNAL)
+               init_signal_cs(hdev, job, cs_cmpl);
+       else if (cs->type & CS_TYPE_WAIT)
+               init_wait_cs(hdev, cs, job, cs_cmpl);
 }
 
 /*
@@ -720,22 +737,28 @@ static int hw_queue_init(struct hl_device *hdev, struct 
hl_hw_queue *q)
 
 static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
 {
-       struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
+       struct hl_sync_stream_properties *sync_stream_prop;
        struct asic_fixed_properties *prop = &hdev->asic_prop;
        struct hl_hw_sob *hw_sob;
-       int sob, queue_idx = hdev->sync_stream_queue_idx++;
+       int sob, queue_idx;
+
+       if (!hdev->kernel_queues[q_idx].supports_sync_stream)
+               return;
+
+       sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
+       queue_idx = hdev->sync_stream_queue_idx++;
 
-       hw_queue->base_sob_id =
-               prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS;
-       hw_queue->base_mon_id =
-               prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS;
-       hw_queue->next_sob_val = 1;
-       hw_queue->curr_sob_offset = 0;
+       sync_stream_prop->base_sob_id = prop->sync_stream_first_sob +
+                       (queue_idx * HL_RSVD_SOBS);
+       sync_stream_prop->base_mon_id = prop->sync_stream_first_mon +
+                       (queue_idx * HL_RSVD_MONS);
+       sync_stream_prop->next_sob_val = 1;
+       sync_stream_prop->curr_sob_offset = 0;
 
        for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
-               hw_sob = &hw_queue->hw_sob[sob];
+               hw_sob = &sync_stream_prop->hw_sob[sob];
                hw_sob->hdev = hdev;
-               hw_sob->sob_id = hw_queue->base_sob_id + sob;
+               hw_sob->sob_id = sync_stream_prop->base_sob_id + sob;
                hw_sob->q_idx = q_idx;
                kref_init(&hw_sob->kref);
        }
@@ -743,15 +766,16 @@ static void sync_stream_queue_init(struct hl_device 
*hdev, u32 q_idx)
 
 static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
 {
-       struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
+       struct hl_sync_stream_properties *prop =
+                       &hdev->kernel_queues[q_idx].sync_stream_prop;
 
        /*
         * In case we got here due to a stuck CS, the refcnt might be bigger
         * than 1 and therefore we reset it.
         */
-       kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
-       hw_queue->curr_sob_offset = 0;
-       hw_queue->next_sob_val = 1;
+       kref_init(&prop->hw_sob[prop->curr_sob_offset].kref);
+       prop->curr_sob_offset = 0;
+       prop->next_sob_val = 1;
 }
 
 /*
@@ -794,8 +818,7 @@ static int queue_init(struct hl_device *hdev, struct 
hl_hw_queue *q,
                break;
        }
 
-       if (q->supports_sync_stream)
-               sync_stream_queue_init(hdev, q->hw_queue_id);
+       sync_stream_queue_init(hdev, q->hw_queue_id);
 
        if (rc)
                return rc;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 9d9d22c4452c..46dced9d1eec 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -472,9 +472,11 @@ static int gaudi_get_fixed_properties(struct hl_device 
*hdev)
        prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
 
        prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
-                       num_sync_stream_queues * HL_RSVD_SOBS;
+                       prop->sync_stream_first_sob +
+                       (num_sync_stream_queues * HL_RSVD_SOBS);
        prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
-                       num_sync_stream_queues * HL_RSVD_MONS;
+                       prop->sync_stream_first_mon +
+                       (num_sync_stream_queues * HL_RSVD_MONS);
 
        return 0;
 }
@@ -6472,16 +6474,16 @@ static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
        return pkt_size;
 }
 
-static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
-                       u16 sob_val, u16 mon_id, u32 q_idx)
+static void gaudi_gen_wait_cb(struct hl_device *hdev,
+               struct hl_gen_wait_properties *prop)
 {
-       struct hl_cb *cb = (struct hl_cb *) data;
+       struct hl_cb *cb = (struct hl_cb *) prop->data;
        void *buf = (void *) (uintptr_t) cb->kernel_address;
        u64 monitor_base, fence_addr = 0;
        u32 size = 0;
        u16 msg_addr_offset;
 
-       switch (q_idx) {
+       switch (prop->q_idx) {
        case GAUDI_QUEUE_ID_DMA_0_0:
                fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
                break;
@@ -6521,7 +6523,7 @@ static void gaudi_gen_wait_cb(struct hl_device *hdev, 
void *data, u16 sob_id,
        default:
                /* queue index should be valid here */
                dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
-                               q_idx);
+                               prop->q_idx);
                return;
        }
 
@@ -6534,17 +6536,15 @@ static void gaudi_gen_wait_cb(struct hl_device *hdev, 
void *data, u16 sob_id,
        monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
 
        /* First monitor config packet: low address of the sync */
-       msg_addr_offset =
-               (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
-                               monitor_base;
+       msg_addr_offset = (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 +
+                       prop->mon_id * 4) - monitor_base;
 
        size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
                                        msg_addr_offset);
 
        /* Second monitor config packet: high address of the sync */
-       msg_addr_offset =
-               (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
-                               monitor_base;
+       msg_addr_offset = (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 +
+                       prop->mon_id * 4) - monitor_base;
 
        size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
                                        msg_addr_offset);
@@ -6553,18 +6553,17 @@ static void gaudi_gen_wait_cb(struct hl_device *hdev, 
void *data, u16 sob_id,
         * Third monitor config packet: the payload, i.e. what to write when the
         * sync triggers
         */
-       msg_addr_offset =
-               (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
-                               monitor_base;
+       msg_addr_offset = (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 +
+                       prop->mon_id * 4) - monitor_base;
 
        size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
 
        /* Fourth monitor config packet: bind the monitor to a sync object */
        msg_addr_offset =
-               (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
+               (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) -
                                monitor_base;
-       size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
-                                               msg_addr_offset);
+       size += gaudi_add_arm_monitor_pkt(buf + size, prop->sob_id,
+                       prop->sob_val, msg_addr_offset);
 
        /* Fence packet */
        size += gaudi_add_fence_pkt(buf + size);
diff --git a/drivers/misc/habanalabs/goya/goya.c 
b/drivers/misc/habanalabs/goya/goya.c
index f41fe748f1ca..cd1366f10fbe 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5297,8 +5297,8 @@ static void goya_gen_signal_cb(struct hl_device *hdev, 
void *data, u16 sob_id)
 
 }
 
-static void goya_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
-                       u16 sob_val, u16 mon_id, u32 q_idx)
+static void goya_gen_wait_cb(struct hl_device *hdev,
+               struct hl_gen_wait_properties *prop)
 {
 
 }
-- 
2.17.1

Reply via email to