On 4/23/2014 1:15 PM, Hefty, Sean wrote:
The first patch of the series(IB/mlx4: send a IB_EVENT_DEVICE_FATAL to users during PCI error injection), when mlx4 send the IB_EVENT_DEVICE_FATAL event I am passing the port number to that field so in that way I was able to make it work. Any other suggestions are welcome.Index: b/drivers/infiniband/core/cm.c =================================================================== --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -161,6 +161,7 @@ struct cm_port { struct ib_mad_agent *mad_agent; struct kobject port_obj; u8 port_num; + u8 device_fatal; struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; };@@ -168,6 +169,7 @@ struct cm_device { struct list_head list; struct ib_device *ib_device; struct device *device; + struct ib_event_handler event_handler; u8 ack_delay; struct cm_port *port[0]; }; @@ -258,6 +260,10 @@ static int cm_alloc_msg(struct cm_id_pri struct ib_mad_agent *mad_agent; struct ib_mad_send_buf *m; struct ib_ah *ah; + struct cm_port *port = cm_id_priv->av.port; + + if (port->device_fatal) + return -EIO; mad_agent = cm_id_priv->av.port->mad_agent; ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr); @@ -290,6 +296,9 @@ static int cm_alloc_response_msg(struct struct ib_mad_send_buf *m; struct ib_ah *ah; + if (port->device_fatal) + return -EIO; + ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc, mad_recv_wc->recv_buf.grh, port->port_num); if (IS_ERR(ah)) @@ -3764,6 +3773,33 @@ static void cm_remove_port_fs(struct cm_ kobject_put(&port->port_obj); } +static void ib_cm_event_handler(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct cm_device *cm_dev; + struct cm_port *port = NULL; + + cm_dev = container_of(handler, struct cm_device, event_handler); + switch (event->event) { + case IB_EVENT_PORT_ACTIVE: + port = cm_dev->port[event->element.port_num - 1]; + if (port == NULL) + return; + if (port->port_num == event->element.port_num) + port->device_fatal = 0; + break; + case IB_EVENT_DEVICE_FATAL: + port = cm_dev->port[event->element.port_num - 1]; + if (port == NULL) + return; + if (port->port_num == event->element.port_num) + port->device_fatal = 1; + break;This is a device level event, not a port event. The port_num value may not be valid.
Thanks
-- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html
-- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html
