This patch builds upon a discussion we had earlier this year on adding a backoff function when retrying MAD sends after a timeout.
This patch does NOT implement the ABI/API changes that would be needed to take
advantage of the new features, but it lays the groundwork for doing so. In
addition, it provides a new module parameter that allow the administrator to
coerce existing code into using the new capability.
First, I've added a new field called "randomized_wait" to the ib_mad_send_buf
structure. If this parameter is set, each time the WR times out, the the
timeout for the next retry is set to (send_wr->timeout_ms +
511<<(send_wr->retries) - random32()&511). In other words, on the first retry,
the randomization code will add between 0 and 1/2 second to the timeout. On the
second retry, it will add between 1 and 1.5 seconds to the timeout, on the 3rd,
between 2 and 2.5 seconds, on the 4th, between 4 and 4.5, et cetera. In
addition, a new field, total_timeout has been added to the
ib_mad_send_wr_private and is initialized to (send_wr->timeout *
send_wr->max_retries). Retries cannot exceed this total time, even though that
will mean a lower number of retry attempts.
Finally, I've added a module parameter to coerce all mad work requests to use
this feature if desired.
parm: randomized_wait:When true, use a randomized backoff algorithm
to control retries for timeouts. (int)
--------
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ef1304f..3b03f1c 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -42,6 +42,11 @@
#include "smi.h"
#include "agent.h"
+#include "linux/random.h"
+
+#define MAD_MIN_TIMEOUT_MS 511
+#define MAD_RAND_TIMEOUT_MS 511
+
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("kernel IB MAD API");
MODULE_AUTHOR("Hal Rosenstock");
@@ -55,6 +60,10 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in
number of work requests
module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work
requests");
+int mad_randomized_wait = 0;
+module_param_named(randomized_wait, mad_randomized_wait, int, 0444);
+MODULE_PARM_DESC(randomized_wait, "When true, use a randomized backoff
algorithm to control retries for timeouts.");
+
static struct kmem_cache *ib_mad_cache;
static struct list_head ib_mad_port_list;
@@ -1102,11 +1111,18 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
}
mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
+
+ mad_send_wr->randomized_wait = mad_randomized_wait ||
send_buf->randomized_wait;
+ mad_send_wr->total_timeout =
msecs_to_jiffies(send_buf->timeout_ms) * send_buf->retries;
+
/* Timeout will be updated after send completes */
mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
+
mad_send_wr->max_retries = send_buf->retries;
mad_send_wr->retries_left = send_buf->retries;
+
send_buf->retries = 0;
+
/* Reference for work request to QP + response */
mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
mad_send_wr->status = IB_WC_SUCCESS;
@@ -1803,6 +1819,7 @@ static void ib_mad_complete_recv(struct
ib_mad_agent_private *mad_agent_priv,
/* Complete corresponding request */
if (ib_response_mad(mad_recv_wc->recv_buf.mad)) {
+
spin_lock_irqsave(&mad_agent_priv->lock, flags);
mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
if (!mad_send_wr) {
@@ -1811,6 +1828,7 @@ static void ib_mad_complete_recv(struct
ib_mad_agent_private *mad_agent_priv,
deref_mad_agent(mad_agent_priv);
return;
}
+
ib_mark_mad_done(mad_send_wr);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
@@ -2429,14 +2447,33 @@ static int retry_send(struct ib_mad_send_wr_private
*mad_send_wr)
{
int ret;
- if (!mad_send_wr->retries_left)
+ if (!mad_send_wr->retries_left || (mad_send_wr->total_timeout == 0))
return -ETIMEDOUT;
mad_send_wr->retries_left--;
mad_send_wr->send_buf.retries++;
- mad_send_wr->timeout =
msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+ if (mad_send_wr->randomized_wait) {
+ mad_send_wr->timeout =
msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms +
+ (MAD_MIN_TIMEOUT_MS<<mad_send_wr->send_buf.retries) -
+ (random32()&MAD_RAND_TIMEOUT_MS));
+ if (mad_send_wr->timeout > mad_send_wr->total_timeout) {
+ mad_send_wr->timeout = mad_send_wr->total_timeout;
+ mad_send_wr->total_timeout = 0;
+ } else {
+ mad_send_wr->total_timeout -= mad_send_wr->timeout;
+ }
+ } else {
+ mad_send_wr->timeout =
msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+ }
+ printk(KERN_DEBUG PFX "Retrying send %p: retries: %u, retries_left: %u,
timeout: %lu, total_timeout: %lu\n",
+ mad_send_wr,
+ mad_send_wr->send_buf.retries,
+ mad_send_wr->retries_left,
+ mad_send_wr->timeout,
+ mad_send_wr->total_timeout);
+
if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
ret = ib_retry_rmpp(mad_send_wr);
switch (ret) {
diff --git a/drivers/infiniband/core/mad_priv.h
b/drivers/infiniband/core/mad_priv.h
index 9430ab4..01fb7ed 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -132,8 +132,10 @@ struct ib_mad_send_wr_private {
struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
__be64 tid;
unsigned long timeout;
+ unsigned long total_timeout;
int max_retries;
int retries_left;
+ int randomized_wait;
int retry;
int refcount;
enum ib_wc_status status;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index d3b9401..c3d6efb 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -77,6 +77,15 @@
#define IB_MGMT_MAX_METHODS 128
+/* MAD Status field bit masks */
+#define IB_MGMT_MAD_STATUS_SUCCESS
0x0000
+#define IB_MGMT_MAD_STATUS_BUSY
0x0001
+#define IB_MGMT_MAD_STATUS_REDIRECT_REQD 0x0002
+#define IB_MGMT_MAD_STATUS_BAD_VERERSION 0x0004
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD 0x0008
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB 0x000c
+#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE 0x001c
+
/* RMPP information */
#define IB_MGMT_RMPP_VERSION 1
@@ -246,6 +255,7 @@ struct ib_mad_send_buf {
int seg_count;
int seg_size;
int timeout_ms;
+ int randomized_wait;
int retries;
};
randomized_mad_timeout.patch
Description: randomized_mad_timeout.patch
