Both responder and completer can sleep to execute page-fault when used
with ODP, and page-fault handler can be invoked when they are going to
access user MRs, so works must be scheduled in such cases.

Signed-off-by: Daisuke Matsuda <matsuda-dais...@fujitsu.com>
---
 drivers/infiniband/sw/rxe/rxe_comp.c | 20 ++++++++++++++++++--
 drivers/infiniband/sw/rxe/rxe_loc.h  |  4 ++--
 drivers/infiniband/sw/rxe/rxe_recv.c |  4 ++--
 drivers/infiniband/sw/rxe/rxe_resp.c | 15 ++++++++++-----
 4 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c 
b/drivers/infiniband/sw/rxe/rxe_comp.c
index 046bbacce37c..421f4ffe51a3 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -124,13 +124,29 @@ void retransmit_timer(struct timer_list *t)
        }
 }
 
-void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
+void rxe_comp_queue_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb)
 {
+       struct rxe_qp *qp = pkt->qp;
        int must_sched;
 
        skb_queue_tail(&qp->resp_pkts, skb);
 
-       must_sched = skb_queue_len(&qp->resp_pkts) > 1;
+       /* Schedule a work item if processing READ or ATOMIC acks.
+        * In these cases, completer may sleep to access ODP-enabled MRs.
+        */
+       switch (pkt->opcode) {
+       case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY:
+       case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:
+       case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
+       case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST:
+       case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE:
+               must_sched = 1;
+               break;
+
+       default:
+               must_sched = skb_queue_len(&qp->resp_pkts) > 1;
+       }
+
        if (must_sched != 0)
                rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED);
 
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h 
b/drivers/infiniband/sw/rxe/rxe_loc.h
index 948ce4902b10..d567aa65b5e0 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -176,9 +176,9 @@ int rxe_icrc_init(struct rxe_dev *rxe);
 int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt);
 void rxe_icrc_generate(struct sk_buff *skb, struct rxe_pkt_info *pkt);
 
-void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb);
+void rxe_resp_queue_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb);
 
-void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb);
+void rxe_comp_queue_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb);
 
 static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
 {
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c 
b/drivers/infiniband/sw/rxe/rxe_recv.c
index 434a693cd4a5..01d07572a56f 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -174,9 +174,9 @@ static int hdr_check(struct rxe_pkt_info *pkt)
 static inline void rxe_rcv_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb)
 {
        if (pkt->mask & RXE_REQ_MASK)
-               rxe_resp_queue_pkt(pkt->qp, skb);
+               rxe_resp_queue_pkt(pkt, skb);
        else
-               rxe_comp_queue_pkt(pkt->qp, skb);
+               rxe_comp_queue_pkt(pkt, skb);
 }
 
 static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c 
b/drivers/infiniband/sw/rxe/rxe_resp.c
index d9134a00a529..991550baef8c 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -85,15 +85,20 @@ static char *resp_state_name[] = {
 };
 
 /* rxe_recv calls here to add a request packet to the input queue */
-void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
+void rxe_resp_queue_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb)
 {
-       int must_sched;
-       struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+       int must_sched = 1;
+       struct rxe_qp *qp = pkt->qp;
 
        skb_queue_tail(&qp->req_pkts, skb);
 
-       must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
-                       (skb_queue_len(&qp->req_pkts) > 1);
+       /* responder can sleep to access an ODP-enabled MR. Always schedule
+        * work items for non-zero-byte operations, RDMA READ, and ATOMIC
+        * operations.
+        */
+       if ((skb_queue_len(&qp->req_pkts) == 1) && (payload_size(pkt) == 0)
+           && !(pkt->mask & RXE_READ_OR_ATOMIC_MASK))
+               must_sched = 0;
 
        if (must_sched)
                rxe_sched_task(&qp->resp.task);
-- 
2.31.1


Reply via email to