This commit takes care of the generated signature
error cqe generated by the HW (if happened) and stores
it on the QP signature error list.

Once the user will get the completion for the transaction
he must check for signature errors on signature memory region
using a new lightweight verb ib_check_sig_status and if such
exsists, get the signature error information.

In case the user will not check for signature error, i.e.
call ib_check_sig_status, it will not be allowed to use
the memory region for another signature operation
(REG_SIG_MR work request will fail).

Signed-off-by: Sagi Grimberg <sa...@mellanox.com>
---
 drivers/infiniband/hw/mlx5/cq.c      |   49 ++++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/main.c    |    1 +
 drivers/infiniband/hw/mlx5/mlx5_ib.h |    2 +
 drivers/infiniband/hw/mlx5/mr.c      |   34 +++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/qp.c      |   14 +++++++++-
 include/linux/mlx5/cq.h              |    1 +
 include/linux/mlx5/device.h          |   17 ++++++++++++
 include/linux/mlx5/driver.h          |    2 +
 8 files changed, 119 insertions(+), 1 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 344ab03..c1d4029 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -351,6 +351,34 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct 
mlx5_cqe64 *cqe64,
        qp->sq.last_poll = tail;
 }
 
+static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
+                            struct ib_sig_err *item)
+{
+       u16 syndrome = be16_to_cpu(cqe->syndrome);
+
+       switch (syndrome) {
+       case 13:
+               item->err_type = IB_SIG_BAD_CRC;
+               break;
+       case 12:
+               item->err_type = IB_SIG_BAD_APPTAG;
+               break;
+       case 11:
+               item->err_type = IB_SIG_BAD_REFTAG;
+               break;
+       default:
+               break;
+       }
+
+       item->expected_guard = be32_to_cpu(cqe->expected_trans_sig) >> 16;
+       item->actual_guard = be32_to_cpu(cqe->actual_trans_sig) >> 16;
+       item->expected_logical_block = be32_to_cpu(cqe->expected_reftag);
+       item->actual_logical_block = be32_to_cpu(cqe->actual_reftag);
+       item->sig_err_offset = be64_to_cpu(cqe->err_offset);
+       item->qpn = be32_to_cpu(cqe->qpn);
+       item->key = be32_to_cpu(cqe->mkey);
+}
+
 static int mlx5_poll_one(struct mlx5_ib_cq *cq,
                         struct mlx5_ib_qp **cur_qp,
                         struct ib_wc *wc)
@@ -360,12 +388,15 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
        struct mlx5_cqe64 *cqe64;
        struct mlx5_core_qp *mqp;
        struct mlx5_ib_wq *wq;
+       struct mlx5_sig_err_cqe *sig_err_cqe;
+       struct ib_sig_err *err_item;
        uint8_t opcode;
        uint32_t qpn;
        u16 wqe_ctr;
        void *cqe;
        int idx;
 
+repoll:
        cqe = next_cqe_sw(cq);
        if (!cqe)
                return -EAGAIN;
@@ -449,6 +480,24 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
                        }
                }
                break;
+       case MLX5_CQE_SIG_ERR:
+               sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
+               err_item = kzalloc(sizeof(*err_item), GFP_ATOMIC);
+               if (!err_item) {
+                       mlx5_ib_err(dev, "Failed to allocate sig_err item\n");
+                       return -ENOMEM;
+               }
+
+               get_sig_err_item(sig_err_cqe, err_item);
+
+               mlx5_ib_dbg(dev, "Got SIGERR on key: 0x%x\n",
+                           err_item->key);
+
+               spin_lock(&(*cur_qp)->sig_err_lock);
+               list_add(&err_item->list, &(*cur_qp)->sig_err_list);
+               spin_unlock(&(*cur_qp)->sig_err_lock);
+
+               goto repoll;
        }
 
        return 0;
diff --git a/drivers/infiniband/hw/mlx5/main.c 
b/drivers/infiniband/hw/mlx5/main.c
index 2e67a37..f3c7111 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1409,6 +1409,7 @@ static int init_one(struct pci_dev *pdev,
        dev->ib_dev.alloc_fast_reg_mr   = mlx5_ib_alloc_fast_reg_mr;
        dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
        dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
+       dev->ib_dev.check_sig_status    = mlx5_ib_check_sig_status;
 
        if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
                dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 1d5793e..73b8cf0 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -533,6 +533,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
 int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int 
*shift);
 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
+int mlx5_ib_check_sig_status(struct ib_mr *sig_mr,
+                            struct ib_sig_err *sig_err);
 
 static inline void init_query_mad(struct ib_smp *mad)
 {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 2f6758c..de0e102 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -979,6 +979,8 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
                mr->sig->psv_memory.psv_idx = psv_index[0];
                mr->sig->psv_wire.psv_idx = psv_index[1];
                mr->sig->sig_status_collected = true;
+               /* Next UMR, Arm SIGERR */
+               ++mr->sig->sig_err;
        }
 
        err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
@@ -1125,3 +1127,35 @@ void mlx5_ib_free_fast_reg_page_list(struct 
ib_fast_reg_page_list *page_list)
        kfree(mfrpl->ibfrpl.page_list);
        kfree(mfrpl);
 }
+
+int mlx5_ib_check_sig_status(struct ib_mr *sig_mr,
+                            struct ib_sig_err *sig_err)
+{
+       struct mlx5_ib_mr *mmr = to_mmr(sig_mr);
+       struct mlx5_ib_qp *mqp = mmr->sig->last_qp;
+       struct ib_sig_err *err_item;
+       int ret = 0;
+
+       if (list_empty(&mqp->sig_err_list))
+               goto out;
+
+       spin_lock(&mqp->sig_err_lock);
+       list_for_each_entry(err_item, &mqp->sig_err_list, list)
+               if ((sig_mr->lkey >> 8) == (err_item->key >> 8)) {
+                       list_del(&err_item->list);
+                       spin_unlock(&mqp->sig_err_lock);
+                       memcpy(sig_err, err_item, sizeof(*err_item));
+                       kfree(err_item);
+                       if ((sig_mr->lkey & 0xff) == (err_item->key & 0xff))
+                               ret = 1;
+
+                       /* Increment SW number of sig errors */
+                       ++mmr->sig->sig_err;
+                       goto out;
+               }
+
+       spin_unlock(&mqp->sig_err_lock);
+out:
+       mmr->sig->sig_status_collected = true;
+       return ret;
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 971d434..7663102 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -993,6 +993,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, 
struct mlx5_ib_qp *qp)
 {
        struct mlx5_ib_cq *send_cq, *recv_cq;
        struct mlx5_modify_qp_mbox_in *in;
+       struct ib_sig_err *item, *tmp;
        int err;
 
        in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -1004,6 +1005,12 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, 
struct mlx5_ib_qp *qp)
                        mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET 
failed\n",
                                     qp->mqp.qpn);
 
+       if (qp->signature_en && !list_empty(&qp->sig_err_list))
+               list_for_each_entry_safe(item, tmp, &qp->sig_err_list, list) {
+                       list_del(&item->list);
+                       kfree(item);
+               }
+
        get_cqs(qp, &send_cq, &recv_cq);
 
        if (qp->create_type == MLX5_QP_KERNEL) {
@@ -2154,6 +2161,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg 
*seg,
 {
        struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
        u32 sig_key = sig_mr->rkey;
+       u8 sigerr = to_mmr(sig_mr)->sig->sig_err & 1;
 
        memset(seg, 0, sizeof(*seg));
 
@@ -2161,7 +2169,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg 
*seg,
        seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) |
                                   MLX5_ACCESS_MODE_KLM;
        seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
-       seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL |
+       seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
                                    MLX5_MKEY_BSF_EN | pdn);
        seg->start_addr = 0;
        seg->len = cpu_to_be64(length);
@@ -2195,6 +2203,10 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct 
mlx5_ib_qp *qp,
            unlikely(!sig_mr->sig->sig_status_collected))
                return -EINVAL;
 
+
+       /* record last active qp */
+       sig_mr->sig->last_qp = qp;
+
        /* length of the protected region, data + protection */
        region_len = wr->wr.sig_handover.data_size +
                     wr->wr.sig_handover.prot_size;
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 3db67f7..e1974b0 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -80,6 +80,7 @@ enum {
        MLX5_CQE_RESP_SEND_IMM  = 3,
        MLX5_CQE_RESP_SEND_INV  = 4,
        MLX5_CQE_RESIZE_CQ      = 0xff, /* TBD */
+       MLX5_CQE_SIG_ERR        = 12,
        MLX5_CQE_REQ_ERR        = 13,
        MLX5_CQE_RESP_ERR       = 14,
 };
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index aef7eed..8c43971 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -544,6 +544,23 @@ struct mlx5_cqe64 {
        u8              op_own;
 };
 
+struct mlx5_sig_err_cqe {
+       u8              rsvd0[16];
+       __be32          expected_trans_sig;
+       __be32          actual_trans_sig;
+       __be32          expected_reftag;
+       __be32          actual_reftag;
+       __be16          syndrome;
+       u8              rsvd22[2];
+       __be32          mkey;
+       __be64          err_offset;
+       u8              rsvd30[8];
+       __be32          qpn;
+       u8              rsvd38[2];
+       u8              signature;
+       u8              opcode;
+};
+
 struct mlx5_wqe_srq_next_seg {
        u8                      rsvd0[2];
        __be16                  next_wqe_index;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e7eb977..6839e24 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -415,7 +415,9 @@ struct mlx5_core_sig_ctx {
        struct mlx5_core_psv    psv_wire;
        struct mlx5_core_mr    *data_mr;
        struct mlx5_core_mr    *prot_mr;
+       struct mlx5_ib_qp      *last_qp;
        bool                    sig_status_collected;
+       u8                      sig_err;
 };
 
 struct mlx5_core_mr {
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to