Hi Roland, Sean,
How about the attached fix to Sean patch?
Ishai
Roland Dreier wrote:
This all looks rather fishy:
> +/*
> + * Limit CM msg timeouts to something reasonable.
> + * 8 seconds, with up to 15 retries, gives per msg timeout of 2 min.
> + */
> +#define IB_CM_MAX_TIMEOUT 21
OK... (although 8 seconds seems a little short -- it seems a somewhat
longer timeout could be legitimate on a very busy fabric across a WAN
or something like that)
but then...
> + timeout = min(IB_CM_MAX_TIMEOUT,
> + cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
> + cm_convert_to_ms(cm_id_priv->av.packet_life_time));
should the IB_CM_MAX_TIMEOUT be inside a cm_convert_to_ms() too?
and similarly...
> - cm_id_priv->timeout_ms = param->timeout_ms;
> + cm_id_priv->timeout_ms = min(IB_CM_MAX_TIMEOUT, param->timeout_ms);
is timeout_ms misnamed, or did we just limit all timeouts to 21 msecs?
...and other places in the patch seem to have similar problems.
Also, I would like to see warning messages like
ib_cm: Possibly bogus timeout of xx (yyyyyy msecs) in REP from GID zzzz
printed in the kernel log so people realize they have broken SRP
targets or whatever.
- R.
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general
To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Limit the timeout that the ib_cm will wait to receive a response to
a message, to avoid excessively large (on the order of hours) timeout
values. This prevents consuming resources tracking requests for
extended periods of time.
This helps correct for a bug in SRP Engenio target sending a large
value (>1 hour) as service timeout.
Signed-off-by: Sean Hefty <[EMAIL PROTECTED]>
Signed-off-by: Ishai Rabinovitz <[EMAIL PROTECTED]>
Index: gen2_devel_kernel/drivers/infiniband/core/cm.c
===================================================================
--- gen2_devel_kernel.orig/drivers/infiniband/core/cm.c 2007-03-01
19:11:48.000000000 +0200
+++ gen2_devel_kernel/drivers/infiniband/core/cm.c 2007-03-01
19:27:05.000000000 +0200
@@ -54,6 +54,15 @@ MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");
+#define DRV_NAME "ib_cm"
+#define PFX DRV_NAME ": "
+
+/*
+ * Limit CM msg timeouts to something reasonable.
+ * 8 seconds, with up to 15 retries, gives per msg timeout of 2 min.
+ */
+#define IB_CM_MAX_TIMEOUT 21
+
static void cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device);
@@ -887,13 +896,26 @@ static void cm_format_req(struct cm_req_
cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
cm_req_set_resp_res(req_msg, param->responder_resources);
cm_req_set_init_depth(req_msg, param->initiator_depth);
- cm_req_set_remote_resp_timeout(req_msg,
- param->remote_cm_response_timeout);
+ if ((u8) IB_CM_MAX_TIMEOUT >= param->remote_cm_response_timeout)
+ cm_req_set_remote_resp_timeout(req_msg,
+ param->remote_cm_response_timeout);
+ else {
+ cm_req_set_remote_resp_timeout(req_msg,
+ (u8) IB_CM_MAX_TIMEOUT);
+ printk(KERN_WARNING PFX "Given remote_cm_response_timeout is "
+ "too long, setting it to default value");
+ }
cm_req_set_qp_type(req_msg, param->qp_type);
cm_req_set_flow_ctrl(req_msg, param->flow_control);
cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
- cm_req_set_local_resp_timeout(req_msg,
- param->local_cm_response_timeout);
+ if ((u8) IB_CM_MAX_TIMEOUT >= param->local_cm_response_timeout)
+ cm_req_set_local_resp_timeout(req_msg,
+ param->local_cm_response_timeout);
+ else {
+ cm_req_set_local_resp_timeout(req_msg, (u8) IB_CM_MAX_TIMEOUT);
+ printk(KERN_WARNING PFX "Given local_cm_response_timeout is "
+ "too long, setting it to default value");
+ }
cm_req_set_retry_count(req_msg, param->retry_count);
req_msg->pkey = param->primary_path->pkey;
cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
@@ -1003,6 +1025,12 @@ int ib_send_cm_req(struct ib_cm_id *cm_i
param->primary_path->packet_life_time) * 2 +
cm_convert_to_ms(
param->remote_cm_response_timeout);
+ if (cm_id_priv->timeout_ms > cm_convert_to_ms(IB_CM_MAX_TIMEOUT)) {
+ cm_id_priv->timeout_ms = cm_convert_to_ms(IB_CM_MAX_TIMEOUT);
+ printk(KERN_WARNING PFX "Calculated timeout (using "
+ "remote_cm_response_timeout and packet_life_time is "
+ "too long, setting it to default value");
+ }
cm_id_priv->max_cm_retries = param->max_cm_retries;
cm_id_priv->initiator_depth = param->initiator_depth;
cm_id_priv->responder_resources = param->responder_resources;
@@ -1402,6 +1430,11 @@ static int cm_req_handler(struct cm_work
cm_id_priv->tid = req_msg->hdr.tid;
cm_id_priv->timeout_ms = cm_convert_to_ms(
cm_req_get_local_resp_timeout(req_msg));
+ if (cm_id_priv->timeout_ms > cm_convert_to_ms(IB_CM_MAX_TIMEOUT)) {
+ cm_id_priv->timeout_ms = cm_convert_to_ms(IB_CM_MAX_TIMEOUT);
+ printk(KERN_WARNING PFX "The received local response timeout "
+ "is too long, setting it to default value");
+ }
cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
@@ -2305,7 +2338,12 @@ static int cm_mra_handler(struct cm_work
cm_mra_get_service_timeout(mra_msg);
timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
cm_convert_to_ms(cm_id_priv->av.packet_life_time);
-
+ if (timeout > cm_convert_to_ms(IB_CM_MAX_TIMEOUT)) {
+ timeout = cm_convert_to_ms(IB_CM_MAX_TIMEOUT);
+ printk(KERN_WARNING PFX "Calculated timeout (using "
+ "the received service_timeout and packet_life_time is "
+ "too long, setting it to default value");
+ }
spin_lock_irqsave(&cm_id_priv->lock, flags);
switch (cm_id_priv->id.state) {
case IB_CM_REQ_SENT:
@@ -2707,7 +2745,13 @@ int ib_send_cm_sidr_req(struct ib_cm_id
cm_id->service_id = param->service_id;
cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
- cm_id_priv->timeout_ms = param->timeout_ms;
+ if (cm_id_priv->timeout_ms > cm_convert_to_ms(IB_CM_MAX_TIMEOUT))
+ cm_id_priv->timeout_ms = param->timeout_ms;
+ else {
+ cm_id_priv->timeout_ms = cm_convert_to_ms(IB_CM_MAX_TIMEOUT);
+ printk(KERN_WARNING PFX "Given timeout to ib_send_cm_sidr_req "
+ "is too long, setting it to default value");
+ }
cm_id_priv->max_cm_retries = param->max_cm_retries;
ret = cm_alloc_msg(cm_id_priv, &msg);
if (ret)
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general