Currently a DREP is only sent in response to a DREQ if a connection
has been found matching the DREQ, and it is in the proper state.  Once
a DREP is sent, the local connection moves into timewait.  Duplicate
DREQs received while in this state result in re-sending the DREP.

However, it's likely that the local connection will enter and exit
timewait before the remote side times out a lost DREP and resends a DREQ.
There are a couple possible solutions to this.  One is to increase how
long a connection remains in timewait, by multiplying its wait time by
max_cm_retries.  This can greatly increase the timewait state before a QP
can be re-used when CM messages are not lost.

An alternative is to send a DREP in response to a DREQ, even if a local
connection is not found, which is what this patch does.

Signed-off-by: Sean Hefty <[EMAIL PROTECTED]>
---
Index: cm.c
===================================================================
--- cm.c        (revision 9490)
+++ cm.c        (working copy)
@@ -1900,6 +1900,32 @@ out:     spin_unlock_irqrestore(&cm_id_priv-
 }
 EXPORT_SYMBOL(ib_send_cm_drep);
 
+static int cm_issue_drep(struct cm_port *port,
+                        struct ib_mad_recv_wc *mad_recv_wc)
+{
+       struct ib_mad_send_buf *msg = NULL;
+       struct cm_dreq_msg *dreq_msg;
+       struct cm_drep_msg *drep_msg;
+       int ret;
+
+       ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
+       if (ret)
+               return ret;
+
+       dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
+       drep_msg = (struct cm_drep_msg *) msg->mad;
+
+       cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
+       drep_msg->remote_comm_id = dreq_msg->local_comm_id;
+       drep_msg->local_comm_id = dreq_msg->remote_comm_id;
+
+       ret = ib_post_send_mad(msg, NULL);
+       if (ret)
+               cm_free_msg(msg);
+
+       return ret;
+}
+
 static int cm_dreq_handler(struct cm_work *work)
 {
        struct cm_id_private *cm_id_priv;
@@ -1911,8 +1937,10 @@ static int cm_dreq_handler(struct cm_wor
        dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
        cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
                                   dreq_msg->local_comm_id);
-       if (!cm_id_priv)
+       if (!cm_id_priv) {
+               cm_issue_drep(work->port, work->mad_recv_wc);
                return -EINVAL;
+       }
 
        work->cm_event.private_data = &dreq_msg->private_data;
 


_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to