From: Ofir Bitton <obit...@habana.ai>

Support advanced monitor functionality to monitor more than a
single SOB. In addition expand all CB generation functions
with buffer offset in order to put in them multiple packets that are
generated by different functions.

Signed-off-by: Ofir Bitton <obit...@habana.ai>
Reviewed-by: Oded Gabbay <ogab...@kernel.org>
Signed-off-by: Oded Gabbay <ogab...@kernel.org>
---
 .../habanalabs/common/command_submission.c    |  32 ++++
 drivers/misc/habanalabs/common/habanalabs.h   |  16 +-
 drivers/misc/habanalabs/common/hw_queue.c     |   6 +-
 drivers/misc/habanalabs/gaudi/gaudi.c         | 137 ++++++++++++------
 drivers/misc/habanalabs/goya/goya.c           |   9 +-
 5 files changed, 143 insertions(+), 57 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c 
b/drivers/misc/habanalabs/common/command_submission.c
index 9d49dd1558af..0d82c7dd93d0 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -38,6 +38,38 @@ void hl_sob_reset_error(struct kref *ref)
                        hw_sob->q_idx, hw_sob->sob_id);
 }
 
+/**
+ * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
+ * @sob_base: sob base id
+ * @sob_mask: sob user mask, each bit represents a sob offset from sob base
+ * @mask: generated mask
+ *
+ * Return: 0 if given parameters are valid
+ */
+int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
+{
+       int i;
+
+       if (sob_mask == 0)
+               return -EINVAL;
+
+       if (sob_mask == 0x1) {
+               *mask = ~(1 << (sob_base & 0x7));
+       } else {
+               /* find msb in order to verify sob range is valid */
+               for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
+                       if (BIT(i) & sob_mask)
+                               break;
+
+               if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
+                       return -EINVAL;
+
+               *mask = ~sob_mask;
+       }
+
+       return 0;
+}
+
 static void hl_fence_release(struct kref *kref)
 {
        struct hl_fence *fence =
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 58b4097235d9..7307e0b88b44 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -77,20 +77,26 @@
 
 #define HL_MAX_DCORES                  4
 
+#define HL_MAX_SOBS_PER_MONITOR        8
+
 /**
  * struct hl_gen_wait_properties - properties for generating a wait CB
  * @data: command buffer
  * @q_idx: queue id is used to extract fence register address
- * @sob_id: SOB id to use in this wait CB
+ * @size: offset in command buffer
+ * @sob_base: SOB base to use in this wait CB
  * @sob_val: SOB value to wait for
  * @mon_id: monitor to use in this wait CB
+ * @sob_mask: each bit represents a SOB offset from sob_base to be used
  */
 struct hl_gen_wait_properties {
        void    *data;
        u32     q_idx;
-       u16     sob_id;
+       u32     size;
+       u16     sob_base;
        u16     sob_val;
        u16     mon_id;
+       u8      sob_mask;
 };
 
 /**
@@ -844,8 +850,9 @@ struct hl_asic_funcs {
        int (*load_boot_fit_to_device)(struct hl_device *hdev);
        u32 (*get_signal_cb_size)(struct hl_device *hdev);
        u32 (*get_wait_cb_size)(struct hl_device *hdev);
-       void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
-       void (*gen_wait_cb)(struct hl_device *hdev,
+       u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id,
+                       u32 size);
+       u32 (*gen_wait_cb)(struct hl_device *hdev,
                        struct hl_gen_wait_properties *prop);
        void (*reset_sob)(struct hl_device *hdev, void *data);
        void (*set_dma_mask_from_fw)(struct hl_device *hdev);
@@ -1927,6 +1934,7 @@ void hl_cs_rollback_all(struct hl_device *hdev);
 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
                enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
 void hl_sob_reset_error(struct kref *ref);
+int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
 void hl_fence_put(struct hl_fence *fence);
 void hl_fence_get(struct hl_fence *fence);
 
diff --git a/drivers/misc/habanalabs/common/hw_queue.c 
b/drivers/misc/habanalabs/common/hw_queue.c
index 613681c2cdcc..ca625789d78d 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -408,7 +408,7 @@ static void init_signal_cs(struct hl_device *hdev,
                cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
 
        hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
-                               cs_cmpl->hw_sob->sob_id);
+                               cs_cmpl->hw_sob->sob_id, 0);
 
        kref_get(&hw_sob->kref);
 
@@ -455,10 +455,12 @@ static void init_wait_cs(struct hl_device *hdev, struct 
hl_cs *cs,
                prop->base_mon_id, q_idx);
 
        wait_prop.data = (void *) job->patched_cb;
-       wait_prop.sob_id = cs_cmpl->hw_sob->sob_id;
+       wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
+       wait_prop.sob_mask = 0x1;
        wait_prop.sob_val = cs_cmpl->sob_val;
        wait_prop.mon_id = prop->base_mon_id;
        wait_prop.q_idx = q_idx;
+       wait_prop.size = 0;
        hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
 
        kref_get(&cs_cmpl->hw_sob->kref);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 46dced9d1eec..930b26b1f445 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -6380,14 +6380,16 @@ static u32 gaudi_get_wait_cb_size(struct hl_device 
*hdev)
                        sizeof(struct packet_msg_prot) * 2;
 }
 
-static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
+static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
+               u32 size)
 {
        struct hl_cb *cb = (struct hl_cb *) data;
        struct packet_msg_short *pkt;
-       u32 value, ctl;
+       u32 value, ctl, pkt_size = sizeof(*pkt);
 
-       pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
-       memset(pkt, 0, sizeof(*pkt));
+       pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address +
+                                                                       size);
+       memset(pkt, 0, pkt_size);
 
        /* Inc by 1, Mode ADD */
        value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
@@ -6403,6 +6405,8 @@ static void gaudi_gen_signal_cb(struct hl_device *hdev, 
void *data, u16 sob_id)
 
        pkt->value = cpu_to_le32(value);
        pkt->ctl = cpu_to_le32(ctl);
+
+       return size + pkt_size;
 }
 
 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
@@ -6425,21 +6429,42 @@ static u32 gaudi_add_mon_msg_short(struct 
packet_msg_short *pkt, u32 value,
        return pkt_size;
 }
 
-static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
-                                       u16 sob_val, u16 addr)
+static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
+               struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
+               u16 sob_val, u16 mon_id)
 {
+       u64 monitor_base;
        u32 ctl, value, pkt_size = sizeof(*pkt);
-       u8 mask = ~(1 << (sob_id & 0x7));
+       u16 msg_addr_offset;
+       u8 mask;
+
+       if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
+               dev_err(hdev->dev,
+                       "sob_base %u (mask %#x) is not valid\n",
+                       sob_base, sob_mask);
+               return 0;
+       }
+
+       /*
+        * monitor_base should be the content of the base0 address registers,
+        * so it will be added to the msg short offsets
+        */
+       monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
+
+       msg_addr_offset =
+               (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
+                               monitor_base;
 
        memset(pkt, 0, pkt_size);
 
-       value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
+       /* Monitor config packet: bind the monitor to a sync object */
+       value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
        value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
        value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
                        0); /* GREATER OR EQUAL*/
        value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
 
-       ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
+       ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
@@ -6474,60 +6499,61 @@ static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
        return pkt_size;
 }
 
-static void gaudi_gen_wait_cb(struct hl_device *hdev,
-               struct hl_gen_wait_properties *prop)
+static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 
*addr)
 {
-       struct hl_cb *cb = (struct hl_cb *) prop->data;
-       void *buf = (void *) (uintptr_t) cb->kernel_address;
-       u64 monitor_base, fence_addr = 0;
-       u32 size = 0;
-       u16 msg_addr_offset;
+       u32 offset;
 
-       switch (prop->q_idx) {
+       switch (queue_id) {
        case GAUDI_QUEUE_ID_DMA_0_0:
-               fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
+               offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
                break;
        case GAUDI_QUEUE_ID_DMA_0_1:
-               fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
+               offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
                break;
        case GAUDI_QUEUE_ID_DMA_0_2:
-               fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
+               offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
                break;
        case GAUDI_QUEUE_ID_DMA_0_3:
-               fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
+               offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
                break;
        case GAUDI_QUEUE_ID_DMA_1_0:
-               fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
+               offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
                break;
        case GAUDI_QUEUE_ID_DMA_1_1:
-               fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
+               offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
                break;
        case GAUDI_QUEUE_ID_DMA_1_2:
-               fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
+               offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
                break;
        case GAUDI_QUEUE_ID_DMA_1_3:
-               fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
+               offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
                break;
        case GAUDI_QUEUE_ID_DMA_5_0:
-               fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
+               offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
                break;
        case GAUDI_QUEUE_ID_DMA_5_1:
-               fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
+               offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
                break;
        case GAUDI_QUEUE_ID_DMA_5_2:
-               fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
+               offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
                break;
        case GAUDI_QUEUE_ID_DMA_5_3:
-               fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
+               offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
                break;
        default:
-               /* queue index should be valid here */
-               dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
-                               prop->q_idx);
-               return;
+               return -EINVAL;
        }
 
-       fence_addr += CFG_BASE;
+       *addr = CFG_BASE + offset;
+
+       return 0;
+}
+
+static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
+{
+       u64 monitor_base;
+       u32 size = 0;
+       u16 msg_addr_offset;
 
        /*
         * monitor_base should be the content of the base0 address registers,
@@ -6536,15 +6562,17 @@ static void gaudi_gen_wait_cb(struct hl_device *hdev,
        monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
 
        /* First monitor config packet: low address of the sync */
-       msg_addr_offset = (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 +
-                       prop->mon_id * 4) - monitor_base;
+       msg_addr_offset =
+               (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
+                               monitor_base;
 
        size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
                                        msg_addr_offset);
 
        /* Second monitor config packet: high address of the sync */
-       msg_addr_offset = (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 +
-                       prop->mon_id * 4) - monitor_base;
+       msg_addr_offset =
+               (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
+                               monitor_base;
 
        size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
                                        msg_addr_offset);
@@ -6553,20 +6581,35 @@ static void gaudi_gen_wait_cb(struct hl_device *hdev,
         * Third monitor config packet: the payload, i.e. what to write when the
         * sync triggers
         */
-       msg_addr_offset = (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 +
-                       prop->mon_id * 4) - monitor_base;
+       msg_addr_offset =
+               (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
+                               monitor_base;
 
        size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
 
-       /* Fourth monitor config packet: bind the monitor to a sync object */
-       msg_addr_offset =
-               (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) -
-                               monitor_base;
-       size += gaudi_add_arm_monitor_pkt(buf + size, prop->sob_id,
-                       prop->sob_val, msg_addr_offset);
+       return size;
+}
+
+u32 gaudi_gen_wait_cb(struct hl_device *hdev,
+               struct hl_gen_wait_properties *prop)
+{
+       struct hl_cb *cb = (struct hl_cb *) prop->data;
+       void *buf = (void *) (uintptr_t) cb->kernel_address;
+       u64 fence_addr = 0;
+       u32 size = prop->size;
 
-       /* Fence packet */
+       if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
+               dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
+                               prop->q_idx);
+               return 0;
+       }
+
+       size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
+       size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
+                       prop->sob_mask, prop->sob_val, prop->mon_id);
        size += gaudi_add_fence_pkt(buf + size);
+
+       return size;
 }
 
 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
diff --git a/drivers/misc/habanalabs/goya/goya.c 
b/drivers/misc/habanalabs/goya/goya.c
index cd1366f10fbe..e8bf0b79cd67 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5292,15 +5292,16 @@ static u32 goya_get_wait_cb_size(struct hl_device *hdev)
        return 0;
 }
 
-static void goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
+static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
+               u32 size)
 {
-
+       return 0;
 }
 
-static void goya_gen_wait_cb(struct hl_device *hdev,
+static u32 goya_gen_wait_cb(struct hl_device *hdev,
                struct hl_gen_wait_properties *prop)
 {
-
+       return 0;
 }
 
 static void goya_reset_sob(struct hl_device *hdev, void *data)
-- 
2.17.1

Reply via email to