The purpose of this patch is to cause the ib_mad driver to discard busy 
responses from the SA, effectively causing busy responses to become time outs.

This ensures that naïve IB applications cannot overwhelm the SA with queries, 
which could happen when a cluster is being rebooted, or when a large HPC 
application is started.

Note that this patch directly changes the same code affected by the mad user 
rmpp patch - it cannot be successfully applied without that patch.

Signed-Off-By: Michael Heinz <[email protected]>

----

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c 
index efca783..05f2930 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1815,9 +1815,20 @@ static void ib_mad_complete_recv(struct 
ib_mad_agent_private *mad_agent_priv,
         */
        /* Complete corresponding request */
        if (ib_response_mad(mad_recv_wc->recv_buf.mad)) {
+               u16 busy = 
__be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.status) &
+                                       IB_MGMT_MAD_STATUS_BUSY;
+
                spin_lock_irqsave(&mad_agent_priv->lock, flags);
                mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
                if (mad_send_wr) {
+                       if (busy && mad_send_wr->retries_left) {
+                               /* Just let the query timeout and have it 
requeued later */
+                               spin_unlock_irqrestore(&mad_agent_priv->lock, 
flags);
+                               ib_free_recv_mad(mad_recv_wc);
+                               deref_mad_agent(mad_agent_priv);
+                               printk(KERN_NOTICE PFX "Response returned with 
MAD_STATUS_BUSY\n");
+                               return;
+                       }
                        ib_mark_mad_done(mad_send_wr);
                        spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 
2651e93..e9dc4cc 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -77,6 +77,15 @@
 
 #define IB_MGMT_MAX_METHODS                    128
 
+/* MAD Status field bit masks */
+#define IB_MGMT_MAD_STATUS_SUCCESS                                             
0x0000
+#define IB_MGMT_MAD_STATUS_BUSY                                                
        0x0001
+#define IB_MGMT_MAD_STATUS_REDIRECT_REQD                               0x0002
+#define IB_MGMT_MAD_STATUS_BAD_VERERSION                               0x0004  
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD                  0x0008  
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB   0x000c
+#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE                        0x001c
+
 /* RMPP information */
 #define IB_MGMT_RMPP_VERSION                   1
 #define IB_MGMT_RMPP_PASSTHRU                  255
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to