Hi Roland, Sean,

How about the attached fix to Sean patch?

Ishai

Roland Dreier wrote:

This all looks rather fishy:

> +/*
> + * Limit CM msg timeouts to something reasonable.
> + * 8 seconds, with up to 15 retries, gives per msg timeout of 2 min.
> + */
> +#define IB_CM_MAX_TIMEOUT 21

OK... (although 8 seconds seems a little short -- it seems a somewhat
longer timeout could be legitimate on a very busy fabric across a WAN
or something like that)

but then...

> +  timeout = min(IB_CM_MAX_TIMEOUT,
> +                cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
> +                cm_convert_to_ms(cm_id_priv->av.packet_life_time));

should the IB_CM_MAX_TIMEOUT be inside a cm_convert_to_ms() too?
and similarly...

> -  cm_id_priv->timeout_ms = param->timeout_ms;
> +  cm_id_priv->timeout_ms = min(IB_CM_MAX_TIMEOUT, param->timeout_ms);

is timeout_ms misnamed, or did we just limit all timeouts to 21 msecs?

...and other places in the patch seem to have similar problems.

Also, I would like to see warning messages like

   ib_cm: Possibly bogus timeout of xx (yyyyyy msecs) in REP from GID zzzz

printed in the kernel log so people realize they have broken SRP
targets or whatever.

- R.

_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Limit the timeout that the ib_cm will wait to receive a response to
a message, to avoid excessively large (on the order of hours) timeout
values.  This prevents consuming resources tracking requests for
extended periods of time.

This helps correct for a bug in SRP Engenio target sending a large
value (>1 hour) as service timeout.

Signed-off-by: Sean Hefty <[EMAIL PROTECTED]>
Signed-off-by: Ishai Rabinovitz <[EMAIL PROTECTED]>

Index: gen2_devel_kernel/drivers/infiniband/core/cm.c
===================================================================
--- gen2_devel_kernel.orig/drivers/infiniband/core/cm.c 2007-03-01 
19:11:48.000000000 +0200
+++ gen2_devel_kernel/drivers/infiniband/core/cm.c      2007-03-01 
19:27:05.000000000 +0200
@@ -54,6 +54,15 @@ MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("InfiniBand CM");
 MODULE_LICENSE("Dual BSD/GPL");
 
+#define DRV_NAME        "ib_cm"
+#define PFX             DRV_NAME ": "
+
+/*
+ * Limit CM msg timeouts to something reasonable.
+ * 8 seconds, with up to 15 retries, gives per msg timeout of 2 min.
+ */
+#define IB_CM_MAX_TIMEOUT 21
+
 static void cm_add_one(struct ib_device *device);
 static void cm_remove_one(struct ib_device *device);
 
@@ -887,13 +896,26 @@ static void cm_format_req(struct cm_req_
        cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
        cm_req_set_resp_res(req_msg, param->responder_resources);
        cm_req_set_init_depth(req_msg, param->initiator_depth);
-       cm_req_set_remote_resp_timeout(req_msg,
-                                      param->remote_cm_response_timeout);
+       if ((u8) IB_CM_MAX_TIMEOUT >= param->remote_cm_response_timeout)
+               cm_req_set_remote_resp_timeout(req_msg,
+                       param->remote_cm_response_timeout);
+       else {
+               cm_req_set_remote_resp_timeout(req_msg,
+                                              (u8) IB_CM_MAX_TIMEOUT);
+               printk(KERN_WARNING PFX "Given remote_cm_response_timeout is "
+                      "too long, setting it to default value");
+       }
        cm_req_set_qp_type(req_msg, param->qp_type);
        cm_req_set_flow_ctrl(req_msg, param->flow_control);
        cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
-       cm_req_set_local_resp_timeout(req_msg,
-                                     param->local_cm_response_timeout);
+       if ((u8) IB_CM_MAX_TIMEOUT >= param->local_cm_response_timeout)
+               cm_req_set_local_resp_timeout(req_msg,
+                       param->local_cm_response_timeout);
+       else {
+               cm_req_set_local_resp_timeout(req_msg, (u8) IB_CM_MAX_TIMEOUT);
+               printk(KERN_WARNING PFX "Given local_cm_response_timeout is "
+                      "too long, setting it to default value");
+       }
        cm_req_set_retry_count(req_msg, param->retry_count);
        req_msg->pkey = param->primary_path->pkey;
        cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
@@ -1003,6 +1025,12 @@ int ib_send_cm_req(struct ib_cm_id *cm_i
                                    param->primary_path->packet_life_time) * 2 +
                                 cm_convert_to_ms(
                                    param->remote_cm_response_timeout);
+       if (cm_id_priv->timeout_ms > cm_convert_to_ms(IB_CM_MAX_TIMEOUT)) {
+               cm_id_priv->timeout_ms = cm_convert_to_ms(IB_CM_MAX_TIMEOUT);
+               printk(KERN_WARNING PFX "Calculated timeout (using "
+                      "remote_cm_response_timeout and packet_life_time is "
+                      "too long, setting it to default value");
+       }
        cm_id_priv->max_cm_retries = param->max_cm_retries;
        cm_id_priv->initiator_depth = param->initiator_depth;
        cm_id_priv->responder_resources = param->responder_resources;
@@ -1402,6 +1430,11 @@ static int cm_req_handler(struct cm_work
        cm_id_priv->tid = req_msg->hdr.tid;
        cm_id_priv->timeout_ms = cm_convert_to_ms(
                                        cm_req_get_local_resp_timeout(req_msg));
+       if (cm_id_priv->timeout_ms > cm_convert_to_ms(IB_CM_MAX_TIMEOUT)) {
+               cm_id_priv->timeout_ms = cm_convert_to_ms(IB_CM_MAX_TIMEOUT);
+               printk(KERN_WARNING PFX "The received local response timeout "
+                      "is too long, setting it to default value");
+       }
        cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
        cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
        cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
@@ -2305,7 +2338,12 @@ static int cm_mra_handler(struct cm_work
                                        cm_mra_get_service_timeout(mra_msg);
        timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
                  cm_convert_to_ms(cm_id_priv->av.packet_life_time);
-
+       if (timeout > cm_convert_to_ms(IB_CM_MAX_TIMEOUT)) {
+               timeout = cm_convert_to_ms(IB_CM_MAX_TIMEOUT);
+               printk(KERN_WARNING PFX "Calculated timeout (using "
+                      "the received service_timeout and packet_life_time is "
+                      "too long, setting it to default value");
+       }
        spin_lock_irqsave(&cm_id_priv->lock, flags);
        switch (cm_id_priv->id.state) {
        case IB_CM_REQ_SENT:
@@ -2707,7 +2745,13 @@ int ib_send_cm_sidr_req(struct ib_cm_id 
 
        cm_id->service_id = param->service_id;
        cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
-       cm_id_priv->timeout_ms = param->timeout_ms;
+       if (cm_id_priv->timeout_ms > cm_convert_to_ms(IB_CM_MAX_TIMEOUT))
+               cm_id_priv->timeout_ms = param->timeout_ms;
+       else {
+               cm_id_priv->timeout_ms = cm_convert_to_ms(IB_CM_MAX_TIMEOUT);
+               printk(KERN_WARNING PFX "Given timeout to ib_send_cm_sidr_req "
+                      "is too long, setting it to default value");
+       }
        cm_id_priv->max_cm_retries = param->max_cm_retries;
        ret = cm_alloc_msg(cm_id_priv, &msg);
        if (ret)
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to