The following patch separates the interactions between QP 0 and 1 in the MAD code.  
Each QP now has its own queuing, locking, completion handling, error handling, etc.

I have a list of several of changes for the MAD code that I will try to get to 
tomorrow.  Please let me know if you have any questions.

- Sean

-- Index: access/ib_mad_priv.h
===================================================================
--- access/ib_mad_priv.h        (revision 880)
+++ access/ib_mad_priv.h        (working copy)
@@ -95,6 +95,7 @@
 
 struct ib_mad_agent_private {
        struct list_head agent_list;
+       struct ib_mad_qp_info *qp_info;
        struct ib_mad_agent agent;
        struct ib_mad_reg_req *reg_req;
        u8 rmpp_version;
@@ -115,17 +116,27 @@
        struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_CLASS];
 };
 
-struct ib_mad_thread_private {
+struct ib_mad_qp_info {
+       struct ib_mad_port_private *port_priv;
+       struct ib_qp *qp;
+       struct ib_cq *cq;
+
+       spinlock_t       send_list_lock;
+       struct list_head send_posted_mad_list;
+       int              send_posted_mad_count;
+
+       spinlock_t       recv_list_lock;
+       struct list_head recv_posted_mad_list;
+       int              recv_posted_mad_count;
+
+       struct task_struct      *mad_thread;
        wait_queue_head_t       wait;
 };
 
 struct ib_mad_port_private {
        struct list_head port_list;
-       struct task_struct *mad_thread;
        struct ib_device *device;
        int port_num;
-       struct ib_qp *qp[IB_MAD_QPS_SUPPORTED];
-       struct ib_cq *cq;
        struct ib_pd *pd;
        struct ib_mr *mr;
 
@@ -133,15 +144,7 @@
        struct ib_mad_mgmt_class_table *version[MAX_MGMT_VERSION];
        struct list_head agent_list;
 
-       spinlock_t send_list_lock;
-       struct list_head send_posted_mad_list;
-       int send_posted_mad_count;
-
-       spinlock_t recv_list_lock;
-       struct list_head recv_posted_mad_list[IB_MAD_QPS_SUPPORTED];
-       int recv_posted_mad_count[IB_MAD_QPS_SUPPORTED];
-
-       struct ib_mad_thread_private mad_thread_private;
+       struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
 };
 
 #endif /* __IB_MAD_PRIV_H__ */
Index: access/ib_mad.c
===================================================================
--- access/ib_mad.c     (revision 880)
+++ access/ib_mad.c     (working copy)
@@ -81,12 +81,10 @@
 static int add_mad_reg_req(struct ib_mad_reg_req *mad_reg_req,
                           struct ib_mad_agent_private *priv);
 static void remove_mad_reg_req(struct ib_mad_agent_private *priv); 
-static int ib_mad_port_restart(struct ib_mad_port_private *priv);
-static int ib_mad_post_receive_mad(struct ib_mad_port_private *port_priv,
-                                  struct ib_qp *qp);
-static int ib_mad_post_receive_mads(struct ib_mad_port_private *priv);
+static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info);
+static int ib_mad_post_receive_mad(struct ib_mad_qp_info *qp_info);
 static inline u8 convert_mgmt_class(u8 mgmt_class);
-
+static int ib_mad_restart_qp(struct ib_mad_qp_info *qp_info);
 
 /*
  * ib_register_mad_agent - Register to send/receive MADs
@@ -205,11 +203,12 @@
        memset(mad_agent_priv, 0, sizeof *mad_agent_priv);
        mad_agent_priv->reg_req = reg_req;
        mad_agent_priv->rmpp_version = rmpp_version;
+       mad_agent_priv->qp_info = &port_priv->qp_info[qp_type];
        mad_agent_priv->agent.device = device;
        mad_agent_priv->agent.recv_handler = recv_handler;
        mad_agent_priv->agent.send_handler = send_handler;
        mad_agent_priv->agent.context = context;
-       mad_agent_priv->agent.qp = port_priv->qp[qp_type];
+       mad_agent_priv->agent.qp = port_priv->qp_info[qp_type].qp;
        mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
 
        ret2 = add_mad_reg_req(mad_reg_req, mad_agent_priv);
@@ -287,6 +286,7 @@
        struct ib_send_wr       *cur_send_wr, *next_send_wr;
        struct ib_send_wr       wr;
        struct ib_send_wr       *bad_wr;
+       struct ib_mad_agent_private *mad_agent_priv;
        struct ib_mad_send_wr_private *mad_send_wr;
        unsigned long flags;
 
@@ -297,6 +297,9 @@
                return -EINVAL;
        }
 
+       mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, 
+                                     agent);
+
        /* Walk list of send WRs and post each on send list */
        cur_send_wr = send_wr;
        while (cur_send_wr) {
@@ -330,19 +333,22 @@
                wr.send_flags = IB_SEND_SIGNALED; /* cur_send_wr->send_flags ? */
 
                /* Link send WR into posted send MAD list */
-               spin_lock_irqsave(&((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_list_lock, flags);
+               spin_lock_irqsave(&mad_agent_priv->qp_info->send_list_lock, flags);
                list_add_tail(&mad_send_wr->send_list,
-                             &((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_posted_mad_list);
-               ((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_posted_mad_count++;
-               spin_unlock_irqrestore(&((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_list_lock, flags);
+                             &mad_agent_priv->qp_info->send_posted_mad_list);
+               mad_agent_priv->qp_info->send_posted_mad_count++;
+               spin_unlock_irqrestore(&mad_agent_priv->qp_info->send_list_lock,
+                                      flags);
 
                ret = ib_post_send(mad_agent->qp, &wr, &bad_wr);
                if (ret) {
                        /* Unlink from posted send MAD list */
-                       spin_unlock_irqrestore(&((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_list_lock, flags);
+                       spin_unlock_irqrestore(
+                               &mad_agent_priv->qp_info->send_list_lock, flags);
                        list_del(&mad_send_wr->send_list);
-                       ((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_posted_mad_count--;
-                       spin_unlock_irqrestore(&((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_list_lock, flags);
+                       mad_agent_priv->qp_info->send_posted_mad_count--;
+                       spin_unlock_irqrestore(
+                               &mad_agent_priv->qp_info->send_list_lock, flags);
                        *bad_send_wr = cur_send_wr;
                        printk(KERN_NOTICE "ib_post_mad_send failed\n");
                        return ret;             
@@ -361,19 +367,32 @@
 void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
 {
        struct ib_mad_recv_buf *entry;
-       struct ib_mad_private *buffer = (struct ib_mad_private *)mad_recv_wc;
+       struct ib_mad_private_header *mad_private_header;
+       struct ib_mad_private *mad_private;
 
        /*
         * Walk receive buffer list associated with this WC
         * No need to remove them from list of receive buffers
         */
+       mad_private_header = container_of(mad_recv_wc,
+                                         struct ib_mad_private_header,
+                                         recv_wc);
+       mad_private = container_of(mad_private_header,
+                                  struct ib_mad_private,
+                                  header);
+       
        list_for_each_entry(entry, &mad_recv_wc->recv_buf->list, list) {
                /* Free previous receive buffer */
-               kmem_cache_free(ib_mad_cache, buffer);
-               buffer = (void *)entry - sizeof(struct ib_mad_private_header);
+               kmem_cache_free(ib_mad_cache, mad_private);
+               mad_private_header = container_of(mad_recv_wc,
+                                                 struct ib_mad_private_header,
+                                                 recv_wc);
+               mad_private = container_of(mad_private_header,
+                                          struct ib_mad_private,
+                                          header);
        }
        /* Free last buffer */
-       kmem_cache_free(ib_mad_cache, buffer);
+       kmem_cache_free(ib_mad_cache, mad_private);
 }
 EXPORT_SYMBOL(ib_free_recv_mad);
 
@@ -567,20 +586,6 @@
        }
 }
 
-static int convert_qpnum(u32 qp_num)
-{
-       /* 
-        * No redirection currently!!!
-        * QP0 and QP1 only
-        * Ultimately, will need table of QP numbers and table index
-        * as QP numbers will not be packed once redirection supported
-        */
-       if (qp_num > 1) {
-               printk(KERN_ERR "QP number %d invalid\n", qp_num);
-       }
-       return qp_num;
-}
-
 static int response_mad(struct ib_mad *mad)
 {
        /* Trap represses are responses although response bit is reset */
@@ -622,7 +627,7 @@
        /* Whether MAD was solicited determines type of routing to MAD client */
        if (solicited) {
                /* Routing is based on high 32 bits of transaction ID of MAD  */
-               hi_tid = mad->mad_hdr.tid >> 32;
+               hi_tid = (u32)(mad->mad_hdr.tid >> 32);
                list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
                        if (entry->agent.hi_tid == hi_tid) {
                                mad_agent = entry;
@@ -631,7 +636,7 @@
                }
                if (!mad_agent) {
                        printk(KERN_ERR "No client 0x%x for received MAD\n",
-                              (u32)(mad->mad_hdr.tid >> 32));
+                              hi_tid);
                        goto ret;
                }
        } else {
@@ -643,12 +648,14 @@
                }
                version = port_priv->version[mad->mad_hdr.class_version];
                if (!version) {
-                       printk(KERN_ERR "MAD received for class version %d with no 
client\n", mad->mad_hdr.class_version);
+                       printk(KERN_ERR "MAD received for class version %d with no 
client\n",
+                              mad->mad_hdr.class_version);
                        goto ret;
                }
                class = 
version->method_table[convert_mgmt_class(mad->mad_hdr.mgmt_class)];     
                if (!class) {
-                       printk(KERN_ERR "MAD receive for class %d with no client\n", 
mad->mad_hdr.mgmt_class);
+                       printk(KERN_ERR "MAD receive for class %d with no client\n",
+                              mad->mad_hdr.mgmt_class);
                        goto ret;
                }
                mad_agent = class->agent[mad->mad_hdr.method & ~IB_MGMT_METHOD_RESP];  
         
@@ -684,48 +691,43 @@
        return valid;
 }
 
-static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
+static void ib_mad_recv_done_handler(struct ib_mad_qp_info *qp_info,
                                     struct ib_wc *wc)
 {
        struct ib_mad_private *recv;
        unsigned long flags;
-       u32 qp_num;
        struct ib_mad_agent_private *mad_agent;
        int solicited;
 
-       /* For receive, WC WRID is the QP number */
-       qp_num = wc->wr_id;
-
        /* 
         * Completion corresponds to first entry on 
         * posted MAD receive list based on WRID in completion
         */
-       spin_lock_irqsave(&port_priv->recv_list_lock, flags);
-       if (!list_empty(&port_priv->recv_posted_mad_list[convert_qpnum(qp_num)])) {
-               recv = 
list_entry(&port_priv->recv_posted_mad_list[convert_qpnum(qp_num)],
+       spin_lock_irqsave(&qp_info->recv_list_lock, flags);
+       if (!list_empty(&qp_info->recv_posted_mad_list)) {
+               recv = list_entry(&qp_info->recv_posted_mad_list,
                                  struct ib_mad_private,
                                  header.recv_buf.list);
 
                /* Remove from posted receive MAD list */
                list_del(&recv->header.recv_buf.list);
-
-               port_priv->recv_posted_mad_count[convert_qpnum(qp_num)]--;
+               qp_info->recv_posted_mad_count--;
 
        } else {
-               printk(KERN_ERR "Receive completion WR ID 0x%Lx on QP %d with no 
posted receive\n", wc->wr_id, qp_num); 
-               spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
+               printk(KERN_ERR "Receive completion with no posted receive\n");
+               spin_unlock_irqrestore(&qp_info->recv_list_lock, flags);
                return;
        }
-       spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
+       spin_unlock_irqrestore(&qp_info->recv_list_lock, flags);
 
-       pci_unmap_single(port_priv->device->dma_device,
+       pci_unmap_single(qp_info->port_priv->device->dma_device,
                         pci_unmap_addr(&recv->header, mapping),
                         sizeof(struct ib_mad_private) - sizeof(struct 
ib_mad_private_header),
                         PCI_DMA_FROMDEVICE);
 
        /* Setup MAD receive work completion from "normal" work completion */
        recv->header.recv_wc.wc = wc;
-       recv->header.recv_wc.mad_len = sizeof(struct ib_mad); /* Should this be based 
on wc->byte_len ? Also, RMPP !!! */
+       recv->header.recv_wc.mad_len = sizeof(struct ib_mad); /* ignore GRH size */
        recv->header.recv_wc.recv_buf = &recv->header.recv_buf;
 
        /* Setup MAD receive buffer */
@@ -738,15 +740,15 @@
        }
 
        /* Validate MAD */
-       if (!validate_mad(recv->header.recv_buf.mad, qp_num))
-               goto ret;
+       if (!validate_mad(recv->header.recv_buf.mad, qp_info->qp->qp_num))
+               return;
 
        /* Determine corresponding MAD agent for incoming receive MAD */
-       spin_lock_irqsave(&port_priv->reg_lock, flags);
+       spin_lock_irqsave(&qp_info->port_priv->reg_lock, flags);
        /* First, determine whether MAD was solicited */
        solicited = solicited_mad(recv->header.recv_buf.mad);
        /* Now, find the mad agent */
-       mad_agent = find_mad_agent(port_priv,
+       mad_agent = find_mad_agent(qp_info->port_priv,
                                   recv->header.recv_buf.mad,
                                   solicited);
        if (!mad_agent) {
@@ -757,49 +759,40 @@
                        printk(KERN_DEBUG "Currently unsupported solicited MAD 
received\n");
                }
 
+               /* Release locking before callback... */
                /* Invoke receive callback */   
                mad_agent->agent.recv_handler(&mad_agent->agent,
                                              &recv->header.recv_wc);
        }
-       spin_unlock_irqrestore(&port_priv->reg_lock, flags);
-
-       /* Post another receive request for this QP */
-       ib_mad_post_receive_mad(port_priv, port_priv->qp[qp_num]);
-
-ret:
-       return;
+       spin_unlock_irqrestore(&qp_info->port_priv->reg_lock, flags);
 }
 
-static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
+static void ib_mad_send_done_handler(struct ib_mad_qp_info *qp_info,
                                     struct ib_wc *wc)
 {
        struct ib_mad_send_wr_private *send_wr;
        unsigned long flags;
 
        /* Completion corresponds to first entry on posted MAD send list */
-       spin_lock_irqsave(&port_priv->send_list_lock, flags);
-       if (!list_empty(&port_priv->send_posted_mad_list)) {
-               send_wr = list_entry(&port_priv->send_posted_mad_list,
+       spin_lock_irqsave(&qp_info->send_list_lock, flags);
+       if (!list_empty(&qp_info->send_posted_mad_list)) {
+               send_wr = list_entry(&qp_info->send_posted_mad_list,
                                     struct ib_mad_send_wr_private,
                                     send_list);
 
-               if (send_wr->wr_id != wc->wr_id) {
-                       printk(KERN_ERR "Send completion WR ID 0x%Lx doesn't match 
posted send WR ID 0x%Lx\n", wc->wr_id, send_wr->wr_id);
-                       
-                       goto error;
-               }
-
                /* Check whether timeout was requested !!! */
 
                /* Remove from posted send MAD list */
                list_del(&send_wr->send_list);
-               port_priv->send_posted_mad_count--;
+               qp_info->send_posted_mad_count--;
 
        } else {
-               printk(KERN_ERR "Send completion  WR ID 0x%Lx but send list is 
empty\n", wc->wr_id);
+               printk(KERN_ERR "Send completion WR ID 0x%Lx but send list is 
empty\n", wc->wr_id);
                goto error;
        }
-       spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
+       spin_unlock_irqrestore(&qp_info->send_list_lock, flags);
+
+       /* Synchronize with deregistration... */
 
        /* Restore client wr_id in WC */
        wc->wr_id = send_wr->wr_id;
@@ -811,20 +804,19 @@
        return;
 
 error:
-       spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
+       spin_unlock_irqrestore(&qp_info->send_list_lock, flags);
        return;
 }
 
 /*
  * IB MAD completion callback
  */
-static void ib_mad_completion_handler(struct ib_mad_port_private *port_priv)
+static void ib_mad_completion_handler(struct ib_mad_qp_info *qp_info)
 {
        struct ib_wc wc;
        int err_status = 0;
 
-       while (!ib_poll_cq(port_priv->cq, 1, &wc)) {
-               printk(KERN_DEBUG "Completion - WR ID = 0x%Lx\n", wc.wr_id);
+       while (!ib_poll_cq(qp_info->cq, 1, &wc)) {
 
                if (wc.status != IB_WC_SUCCESS) {
                        switch (wc.opcode) {
@@ -846,10 +838,11 @@
 
                switch (wc.opcode) {
                case IB_WC_SEND:
-                       ib_mad_send_done_handler(port_priv, &wc);
+                       ib_mad_send_done_handler(qp_info, &wc);
                        break;
                case IB_WC_RECV:
-                       ib_mad_recv_done_handler(port_priv, &wc);
+                       ib_mad_recv_done_handler(qp_info, &wc);
+                       ib_mad_post_receive_mad(qp_info);
                        break;
                default:
                        printk(KERN_ERR "Wrong Opcode: %d\n", wc.opcode);
@@ -861,76 +854,43 @@
        }
 
        if (err_status) {
-               ib_mad_port_restart(port_priv);
+               ib_mad_restart_qp(qp_info);
        } else {
-               ib_mad_post_receive_mads(port_priv);
-               ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
+               ib_req_notify_cq(qp_info->cq, IB_CQ_NEXT_COMP);
        }
 }
 
 /*
  * IB MAD thread
  */
-static int ib_mad_thread(void *param)
+static int ib_mad_thread_handler(void *param)
 {
-       struct ib_mad_port_private *port_priv = param;
-       struct ib_mad_thread_private *mad_thread_priv = &port_priv->mad_thread_private;
+       struct ib_mad_qp_info *qp_info = param;
        int ret;
 
        while (1) {
                while (!signal_pending(current)) {
-                       ret = wait_event_interruptible(mad_thread_priv->wait, 0);
+                       ret = wait_event_interruptible(qp_info->wait, 0);
                        if (ret) {
                                printk(KERN_ERR "ib_mad thread exiting\n");
                                return 0;
                        }
 
-                       ib_mad_completion_handler(port_priv);
-
+                       ib_mad_completion_handler(qp_info);
                }
        }
 }
 
-/*
- * Initialize the IB MAD thread
- */
-static int ib_mad_thread_init(struct ib_mad_port_private *port_priv)
-{
-       struct ib_mad_thread_private *mad_thread_priv = &port_priv->mad_thread_private;
-
-       init_waitqueue_head(&mad_thread_priv->wait);
-
-       port_priv->mad_thread = kthread_create(ib_mad_thread,
-                                              port_priv,
-                                              "ib_mad-%-6s-%-2d",
-                                              port_priv->device->name,
-                                              port_priv->port_num);
-       if (IS_ERR(port_priv->mad_thread)) {
-               printk(KERN_ERR "Couldn't start mad thread for %s port %d\n",
-                      port_priv->device->name, port_priv->port_num);
-               return 1;
-       }       
-       return 0;
-}
-
-/*
- * Stop the IB MAD thread
- */
-static void ib_mad_thread_stop(struct ib_mad_port_private *port_priv)
-{
-       kthread_stop(port_priv->mad_thread);    /* !!! */
-}
 
 static void ib_mad_thread_completion_handler(struct ib_cq *cq)
 {
-       struct ib_mad_port_private *port_priv = cq->cq_context;
-       struct ib_mad_thread_private *mad_thread_priv = &port_priv->mad_thread_private;
+       struct ib_mad_qp_info *qp_info;
 
-       wake_up_interruptible(&mad_thread_priv->wait);
+       qp_info = (struct ib_mad_qp_info*)cq->cq_context;
+       wake_up_interruptible(&qp_info->wait);
 }
 
-static int ib_mad_post_receive_mad(struct ib_mad_port_private *port_priv,
-                                  struct ib_qp *qp)
+static int ib_mad_post_receive_mad(struct ib_mad_qp_info *qp_info)
 {
        struct ib_mad_private *mad_priv;
        struct ib_sge sg_list;
@@ -955,43 +915,42 @@
        }
 
        /* Setup scatter list */
-       sg_list.addr = pci_map_single(port_priv->device->dma_device,
+       sg_list.addr = pci_map_single(qp_info->port_priv->device->dma_device,
                                      &mad_priv->grh,
                                      sizeof *mad_priv - sizeof mad_priv->header, 
                                      PCI_DMA_FROMDEVICE);
        sg_list.length = sizeof *mad_priv - sizeof mad_priv->header;
-       sg_list.lkey = (*port_priv->mr).lkey;
+       sg_list.lkey = qp_info->port_priv->mr->lkey;
 
        /* Setup receive WR */
        recv_wr.next = NULL;
        recv_wr.sg_list = &sg_list;
        recv_wr.num_sge = 1;
        recv_wr.recv_flags = IB_RECV_SIGNALED;
-       recv_wr.wr_id = qp->qp_num; /* 32 bits left */
 
        /* Link receive WR into posted receive MAD list */
-       spin_lock_irqsave(&port_priv->recv_list_lock, flags);
+       spin_lock_irqsave(&qp_info->recv_list_lock, flags);
        list_add_tail(&mad_priv->header.recv_buf.list,
-                     &port_priv->recv_posted_mad_list[convert_qpnum(qp->qp_num)]);
-       port_priv->recv_posted_mad_count[convert_qpnum(qp->qp_num)]++;
-       spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
+                     &qp_info->recv_posted_mad_list);
+       qp_info->recv_posted_mad_count++;
+       spin_unlock_irqrestore(&qp_info->recv_list_lock, flags);
 
        pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
 
        /* Now, post receive WR */
-       ret = ib_post_recv(qp, &recv_wr, &bad_recv_wr);
+       ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
        if (ret) {
 
-               pci_unmap_single(port_priv->device->dma_device,
+               pci_unmap_single(qp_info->port_priv->device->dma_device,
                                 pci_unmap_addr(&mad_priv->header, mapping),
                                 sizeof *mad_priv - sizeof mad_priv->header,
                                 PCI_DMA_FROMDEVICE);
 
                /* Unlink from posted receive MAD list */
-               spin_lock_irqsave(&port_priv->recv_list_lock, flags);
+               spin_lock_irqsave(&qp_info->recv_list_lock, flags);
                list_del(&mad_priv->header.recv_buf.list);
-               port_priv->recv_posted_mad_count[convert_qpnum(qp->qp_num)]--;
-               spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
+               qp_info->recv_posted_mad_count--;
+               spin_unlock_irqrestore(&qp_info->recv_list_lock, flags);
 
                kmem_cache_free(ib_mad_cache, mad_priv);
                printk(KERN_NOTICE "ib_post_recv failed ret = %d\n", ret);
@@ -1004,65 +963,61 @@
 /*
  * Allocate receive MADs and post receive WRs for them 
  */
-static int ib_mad_post_receive_mads(struct ib_mad_port_private *port_priv)
+static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info)
 {
-       int i, j;
+       int i, ret = 0;
 
-       for (i = 0; i < IB_MAD_QP_RECV_SIZE; i++) {
-               for (j = 0; j < IB_MAD_QPS_CORE; j++) {
-                       if (ib_mad_post_receive_mad(port_priv,
-                                                   port_priv->qp[j])) {
-                               printk(KERN_ERR "receive post %d failed on %s port 
%d\n",
-                                      i + 1, port_priv->device->name,
-                                      port_priv->port_num);
-                       }
+       for (i = qp_info->recv_posted_mad_count; i < IB_MAD_QP_RECV_SIZE; i++) {
+               ret = ib_mad_post_receive_mad(qp_info);
+               if (ret) {
+                       printk(KERN_ERR "receive post %d failed on %s port %d\n",
+                               i + 1, qp_info->port_priv->device->name,
+                               qp_info->port_priv->port_num);
+                       break;
                }
        }
 
-       return 0;
+       return ret;
 }
 
 /*
  * Return all the posted receive MADs
  */
-static void ib_mad_return_posted_recv_mads(struct ib_mad_port_private *port_priv)
+static void ib_mad_return_posted_recv_mads(struct ib_mad_qp_info *qp_info)
 {
-       int i;
        unsigned long flags;
 
-       for (i = 0; i < IB_MAD_QPS_SUPPORTED; i++) {
-               spin_lock_irqsave(&port_priv->recv_list_lock, flags);
-               while (!list_empty(&port_priv->recv_posted_mad_list[i])) {
-
-                       /* PCI mapping !!! */
+       spin_lock_irqsave(&qp_info->recv_list_lock, flags);
+       while (!list_empty(&qp_info->recv_posted_mad_list)) {
 
-               }
-               INIT_LIST_HEAD(&port_priv->recv_posted_mad_list[i]);
-               port_priv->recv_posted_mad_count[i] = 0;
-               spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
+               /* PCI mapping !!! */
+               list_del(&qp_info->recv_posted_mad_list);
        }
+       INIT_LIST_HEAD(&qp_info->recv_posted_mad_list);
+       qp_info->recv_posted_mad_count = 0;
+       spin_unlock_irqrestore(&qp_info->recv_list_lock, flags);
 }
 
 /*
  * Return all the posted send MADs
  */
-static void ib_mad_return_posted_send_mads(struct ib_mad_port_private *port_priv)
+static void ib_mad_return_posted_send_mads(struct ib_mad_qp_info *qp_info)
 {
        unsigned long flags;
 
-       spin_lock_irqsave(&port_priv->send_list_lock, flags);
-       while (!list_empty(&port_priv->send_posted_mad_list)) {
+       spin_lock_irqsave(&qp_info->send_list_lock, flags);
+       while (!list_empty(&qp_info->send_posted_mad_list)) {
 
                /* PCI mapping ? */
 
-               list_del(&port_priv->send_posted_mad_list);
+               list_del(&qp_info->send_posted_mad_list);
 
                /* Call completion handler with some status ? */
 
        }
-       INIT_LIST_HEAD(&port_priv->send_posted_mad_list);
-       port_priv->send_posted_mad_count = 0;
-       spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
+       INIT_LIST_HEAD(&qp_info->send_posted_mad_list);
+       qp_info->send_posted_mad_count = 0;
+       spin_unlock_irqrestore(&qp_info->send_list_lock, flags);
 }
 
 /*
@@ -1087,13 +1042,12 @@
         * one is needed for the Reset to Init transition.
         */
        attr->pkey_index = 0;
-       attr->port_num = port_num;
        /* QKey is 0 for QP0 */
        if (qp->qp_num == 0)
                attr->qkey = 0;
        else
                attr->qkey = IB_QP1_QKEY;
-       attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY;
+       attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
 
        ret = ib_modify_qp(qp, attr, attr_mask, &qp_cap);
        kfree(attr);
@@ -1182,93 +1136,180 @@
 }
 
 /*
- * Start the port
+ * Halt operations on the specified QP.
  */
-static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
+static void ib_mad_stop_qp(struct ib_mad_qp_info *qp_info)
 {
-       int ret, i, ret2;
+       int ret;
 
-       for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-               ret = ib_mad_change_qp_state_to_init(port_priv->qp[i],
-                                                    port_priv->port_num);
-               if (ret) {
-                       printk(KERN_ERR "Could not change QP%d state to INIT\n", i);
-                       return ret;
-               }
+       ret = ib_mad_change_qp_state_to_reset(qp_info->qp);
+       if (ret) {
+               printk(KERN_ERR "ib_mad_qp_stop: Could not change %s port %d QP%d 
state to RESET\n",
+                      qp_info->port_priv->device->name,
+                      qp_info->port_priv->port_num, qp_info->qp->qp_num);
+       }
+
+       ib_mad_return_posted_recv_mads(qp_info);
+       ib_mad_return_posted_send_mads(qp_info);
+}
+
+/*
+ * Start operations on the specified QP.
+ */
+static int ib_mad_start_qp(struct ib_mad_qp_info *qp_info)
+{
+       int ret;
+
+       ret = ib_mad_change_qp_state_to_init(qp_info->qp,
+                                            qp_info->port_priv->port_num);
+       if (ret) {
+               printk(KERN_ERR "Could not change QP%d state to INIT\n",
+                      qp_info->qp->qp_num);
+               return ret;
        }
 
-       ret = ib_mad_post_receive_mads(port_priv);
+       ret = ib_mad_post_receive_mads(qp_info);
        if (ret) {
                printk(KERN_ERR "Could not post receive requests\n");
                goto error;
        }
 
-       ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
+       ret = ib_mad_change_qp_state_to_rtr(qp_info->qp);
        if (ret) {
-               printk(KERN_ERR "Failed to request completion notification\n");
+               printk(KERN_ERR "Could not change QP%d state to RTR\n",
+                      qp_info->qp->qp_num);
                goto error;
        }
 
-       for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-               ret = ib_mad_change_qp_state_to_rtr(port_priv->qp[i]);
-               if (ret) {
-                       printk(KERN_ERR "Could not change QP%d state to RTR\n", i);
-                       goto error;
-               }
+       ret = ib_mad_change_qp_state_to_rts(qp_info->qp);
+       if (ret) {
+               printk(KERN_ERR "Could not change QP%d state to RTS\n",
+                      qp_info->qp->qp_num);
+               goto error;
+       }
 
-               ret = ib_mad_change_qp_state_to_rts(port_priv->qp[i]);
-               if (ret) {
-                       printk(KERN_ERR "Could not change QP%d state to RTS\n", i);
-                       goto error;
-               }
+       /* Don't report receive completions until we're ready to send. */
+       ret = ib_req_notify_cq(qp_info->cq, IB_CQ_NEXT_COMP);
+       if (ret) {
+               printk(KERN_ERR "Failed to request completion notification\n");
+               goto error;
        }
 
        return 0;
-error:
-       ib_mad_return_posted_recv_mads(port_priv);
-       for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-               ret2 = ib_mad_change_qp_state_to_reset(port_priv->qp[i]);
-               if (ret2) {
-                       printk(KERN_ERR "ib_mad_port_start: Could not change QP%d 
state to RESET\n", i);
-               }
-       }
 
+error:
+       ib_mad_stop_qp(qp_info);
        return ret;
 }
 
 /*
- * Stop the port
+ * Restart operations on the specified QP.
  */
-static void ib_mad_port_stop(struct ib_mad_port_private *port_priv)
+static int ib_mad_restart_qp(struct ib_mad_qp_info *qp_info)
 {
-       int i, ret;
+       int ret;
 
-       for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-               ret = ib_mad_change_qp_state_to_reset(port_priv->qp[i]);
-               if (ret) {
-                       printk(KERN_ERR "ib_mad_port_stop: Could not change %s port %d 
QP%d state to RESET\n",
-                              port_priv->device->name, port_priv->port_num, i);
-               }
-       }
+       /* Need to synchronize this against user's posting MADs... */
+       ib_mad_stop_qp(qp_info);
+       ret = ib_mad_start_qp(qp_info);
+       if (ret) {
+               printk(KERN_ERR "Could not restart %s port %d QP %d\n",
+                      qp_info->port_priv->device->name,
+                      qp_info->port_priv->port_num, qp_info->qp->qp_num);
+       }       
+
+       return ret;
+}
+
+
+static void ib_mad_destroy_qp(struct ib_mad_qp_info *qp_info)
+{
+       /* Stop processing completions. */
+       kthread_stop(qp_info->mad_thread);
+       ib_mad_stop_qp(qp_info);
 
-       ib_mad_return_posted_recv_mads(port_priv);
-       ib_mad_return_posted_send_mads(port_priv);
+       ib_destroy_qp(qp_info->qp);
+       ib_destroy_cq(qp_info->cq);
 }
 
-/*
- * Restart the port
- */
-static int ib_mad_port_restart(struct ib_mad_port_private *port_priv)
+static int ib_mad_init_qp(struct ib_mad_port_private *port_priv,
+                         struct ib_mad_qp_info *qp_info,
+                         enum ib_qp_type qp_type)
 {
-       int ret;
+       int ret, cq_size;
+       struct ib_qp_init_attr qp_init_attr;
+       struct ib_qp_cap qp_cap;
 
-       ib_mad_port_stop(port_priv);
-       ret = ib_mad_port_start(port_priv);
-       if (ret) {
-               printk(KERN_ERR "Could not restart %s port %d\n",
-                       port_priv->device->name, port_priv->port_num);
+       qp_info->port_priv = port_priv;
+
+       /* Allocate CQ */
+       cq_size = IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE;
+       qp_info->cq = ib_create_cq(port_priv->device,
+                                  (ib_comp_handler)ib_mad_thread_completion_handler,
+                                  NULL, qp_info, cq_size);
+       if (IS_ERR(qp_info->cq)) {
+               printk(KERN_ERR "Could not create ib_mad CQ\n");
+               return PTR_ERR(qp_info->cq);
+       }
+
+       /* Allocate QP */
+       memset(&qp_init_attr, 0, sizeof qp_init_attr);
+       qp_init_attr.send_cq = qp_info->cq;
+       qp_init_attr.recv_cq = qp_info->cq;
+       qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+       qp_init_attr.rq_sig_type = IB_SIGNAL_ALL_WR;
+       qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE;
+       qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE;
+       qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
+       qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
+       qp_init_attr.qp_type = qp_type;
+       qp_init_attr.port_num = port_priv->port_num;
+
+       qp_info->qp = ib_create_qp(port_priv->pd, &qp_init_attr, &qp_cap);
+       if (IS_ERR(qp_info->qp)) {
+               printk(KERN_ERR "Could not create ib_mad QP%d\n",
+                      qp_info->qp->qp_num);
+               ret = PTR_ERR(qp_info->qp);
+               goto error1;
+       }
+
+       spin_lock_init(&qp_info->send_list_lock);
+       INIT_LIST_HEAD(&qp_info->send_posted_mad_list);
+       qp_info->send_posted_mad_count = 0;
+
+       spin_lock_init(&qp_info->recv_list_lock);
+       INIT_LIST_HEAD(&qp_info->recv_posted_mad_list);
+       qp_info->recv_posted_mad_count = 0;
+
+       /* Startup the completion thread. */
+       init_waitqueue_head(&qp_info->wait);
+       qp_info->mad_thread = kthread_create(ib_mad_thread_handler,
+                                            qp_info,
+                                            "ib_mad-%-6s-%-2d-%-4d",
+                                            qp_info->port_priv->device->name,
+                                            qp_info->port_priv->port_num,
+                                            qp_info->qp->qp_num);
+       if (IS_ERR(qp_info->mad_thread)) {
+               printk(KERN_ERR "Couldn't start mad thread for %s port %d\n",
+                      qp_info->port_priv->device->name,
+                      qp_info->port_priv->port_num);
+               ret = PTR_ERR(qp_info->mad_thread);
+               goto error2;
        }       
 
+       /* Start the QP. */
+       ret = ib_mad_start_qp(qp_info);
+       if (ret)
+               goto error3;
+
+       return 0;
+
+error3:
+       kthread_stop(qp_info->mad_thread);
+error2:
+       ib_destroy_qp(qp_info->qp);
+error1:
+       ib_destroy_cq(qp_info->cq);
        return ret;
 }
 
@@ -1278,14 +1319,12 @@
  */
 static int ib_mad_port_open(struct ib_device *device, int port_num)
 {
-       int ret, cq_size, i;
+       int ret, i, qp;
        u64 iova = 0;
        struct ib_phys_buf buf_list = {
                .addr = 0,
                .size = (unsigned long) high_memory - PAGE_OFFSET
        };
-       struct ib_qp_init_attr qp_init_attr;
-       struct ib_qp_cap qp_cap;
        struct ib_mad_port_private *entry, *port_priv = NULL;
        unsigned long flags;
 
@@ -1320,21 +1359,11 @@
                port_priv->version[i] = NULL;
        }
 
-       cq_size = IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE;
-       port_priv->cq = ib_create_cq(port_priv->device,
-                                    (ib_comp_handler) 
ib_mad_thread_completion_handler,
-                                    NULL, port_priv, cq_size);
-       if (IS_ERR(port_priv->cq)) {
-               printk(KERN_ERR "Could not create ib_mad CQ\n");
-               ret = PTR_ERR(port_priv->cq);
-               goto error3;
-       }
-
        port_priv->pd = ib_alloc_pd(device);
        if (IS_ERR(port_priv->pd)) {
                printk(KERN_ERR "Could not create ib_mad PD\n");
                ret = PTR_ERR(port_priv->pd);
-               goto error4;
+               goto error1;
        }
 
        port_priv->mr = ib_reg_phys_mr(port_priv->pd, &buf_list, 1,
@@ -1342,58 +1371,19 @@
        if (IS_ERR(port_priv->mr)) {
                printk(KERN_ERR "Could not register ib_mad MR\n");
                ret = PTR_ERR(port_priv->mr);
-               goto error5;
+               goto error2;
        }
 
-       for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-               memset(&qp_init_attr, 0, sizeof qp_init_attr);
-               qp_init_attr.send_cq = port_priv->cq;
-               qp_init_attr.recv_cq = port_priv->cq;
-               qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-               qp_init_attr.rq_sig_type = IB_SIGNAL_ALL_WR;
-               qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE;
-               qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE;
-               qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
-               qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
-               if (i == 0)
-                       qp_init_attr.qp_type = IB_QPT_SMI;
-               else
-                       qp_init_attr.qp_type = IB_QPT_GSI;
-               qp_init_attr.port_num = port_priv->port_num;
-               port_priv->qp[i] = ib_create_qp(port_priv->pd, &qp_init_attr,
-                                               &qp_cap);
-               if (IS_ERR(port_priv->qp[i])) {
-                       printk(KERN_ERR "Could not create ib_mad QP%d\n", i);
-                       ret = PTR_ERR(port_priv->qp[i]);
-                       if (i == 0)
-                               goto error6;            
-                       else
-                               goto error7;
-                       }
-               printk(KERN_DEBUG "Created ib_mad QP %d\n",
-                      port_priv->qp[i]->qp_num);
+       for (qp = 0; qp < IB_MAD_QPS_CORE; qp++) {
+               ret = ib_mad_init_qp(port_priv,
+                                    &port_priv->qp_info[qp],
+                                    qp ? IB_QPT_GSI : IB_QPT_SMI);
+               if (ret)
+                       goto error3;
        }
 
        spin_lock_init(&port_priv->reg_lock);
-       spin_lock_init(&port_priv->recv_list_lock);
-       spin_lock_init(&port_priv->send_list_lock);
        INIT_LIST_HEAD(&port_priv->agent_list);
-       INIT_LIST_HEAD(&port_priv->send_posted_mad_list);
-       port_priv->send_posted_mad_count = 0;
-       for (i = 0; i < IB_MAD_QPS_SUPPORTED; i++) {
-               INIT_LIST_HEAD(&port_priv->recv_posted_mad_list[i]);
-               port_priv->recv_posted_mad_count[i] = 0;
-       }
-
-       ret = ib_mad_thread_init(port_priv);
-       if (ret)
-               goto error8;
-
-       ret = ib_mad_port_start(port_priv);
-       if (ret) {
-               printk(KERN_ERR "Couldn't start port\n");
-               goto error8;
-       }
 
        spin_lock_irqsave(&ib_mad_port_list_lock, flags);
        list_add_tail(&port_priv->port_list, &ib_mad_port_list);
@@ -1401,17 +1391,14 @@
 
        return 0;
 
-error8:
-       ib_destroy_qp(port_priv->qp[1]);
-error7:
-       ib_destroy_qp(port_priv->qp[0]);
-error6:
+error3:
+       while (qp > 0) {
+               ib_mad_destroy_qp(&port_priv->qp_info[--qp]);
+       }
        ib_dereg_mr(port_priv->mr);
-error5:
+error2:
        ib_dealloc_pd(port_priv->pd);
-error4:
-       ib_destroy_cq(port_priv->cq);
-error3:
+error1:
        kfree(port_priv);
 
        return ret;
@@ -1426,6 +1413,7 @@
 {
        struct ib_mad_port_private *entry, *port_priv = NULL;
        unsigned long flags;
+       int i;
 
        spin_lock_irqsave(&ib_mad_port_list_lock, flags);
        list_for_each_entry(entry, &ib_mad_port_list, port_list) {
@@ -1444,13 +1432,12 @@
        list_del(&port_priv->port_list);
        spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
 
-       ib_mad_port_stop(port_priv);
-       ib_mad_thread_stop(port_priv);
-       ib_destroy_qp(port_priv->qp[1]);
-       ib_destroy_qp(port_priv->qp[0]);
+       for (i = 0; i < IB_MAD_QPS_CORE; i++) {
+               ib_mad_destroy_qp(&port_priv->qp_info[i]);
+       }
+
        ib_dereg_mr(port_priv->mr);
        ib_dealloc_pd(port_priv->pd);
-       ib_destroy_cq(port_priv->cq);
        /* Handle deallocation of MAD registration tables!!! */
 
        kfree(port_priv);
@@ -1461,7 +1448,7 @@
 
 static void ib_mad_init_device(struct ib_device *device)
 {
-       int ret, num_ports, cur_port, i, ret2;
+       int ret, num_ports, i, ret2;
        struct ib_device_attr device_attr;
 
        ret = ib_query_device(device, &device_attr);
@@ -1472,16 +1459,14 @@
 
        if (device->node_type == IB_NODE_SWITCH) {
                num_ports = 1;
-               cur_port = 0;
        } else {
                num_ports = device_attr.phys_port_cnt;
-               cur_port = 1;
        }
-       for (i = 0; i < num_ports; i++, cur_port++) {
-               ret = ib_mad_port_open(device, cur_port);
+       for (i = 0; i < num_ports; i++) {
+               ret = ib_mad_port_open(device, i+1);
                if (ret) {
                        printk(KERN_ERR "Could not open %s port %d\n",
-                              device->name, cur_port);
+                              device->name, i+1);
                        goto error_device_open;
                }
        }
@@ -1490,11 +1475,10 @@
 
 error_device_open:
        while (i > 0) {
-               cur_port--;
-               ret2 = ib_mad_port_close(device, cur_port);
+               ret2 = ib_mad_port_close(device, i);
                if (ret2) {
                        printk(KERN_ERR "Could not close %s port %d\n",
-                              device->name, cur_port);
+                              device->name, i);
                }
                i--;
        }
@@ -1505,7 +1489,7 @@
 
 static void ib_mad_remove_device(struct ib_device *device)
 {
-       int ret, i, num_ports, cur_port, ret2;
+       int ret, i, num_ports, ret2;
        struct ib_device_attr device_attr;
 
        ret = ib_query_device(device, &device_attr);
@@ -1516,16 +1500,14 @@
 
        if (device->node_type == IB_NODE_SWITCH) {
                num_ports = 1;
-               cur_port = 0;
        } else {
                num_ports = device_attr.phys_port_cnt;
-               cur_port = 1;
        }
-       for (i = 0; i < num_ports; i++, cur_port++) {
-               ret2 = ib_mad_port_close(device, cur_port);
+       for (i = 0; i < num_ports; i++) {
+               ret2 = ib_mad_port_close(device, i+1);
                if (ret2) {
                        printk(KERN_ERR "Could not close %s port %d\n",
-                              device->name, cur_port);
+                              device->name, i+1);
                        if (!ret)
                                ret = ret2;
                }

_______________________________________________
openib-general mailing list
[EMAIL PROTECTED]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to