In my zeal to remove reference counting, I missed a race between ipoib_cm_destroy_tx() and ipoib_cm_tx_start(). I'm working on patch v3 and should have it ready in a day or two.
On Mon, 2010-03-01 at 17:50 -0800, Ralph Campbell wrote: > When using connected mode, ipoib_cm_create_tx() kmallocs a > struct ipoib_cm_tx which contains pointers to ipoib_neigh and > ipoib_path. If the paths are flushed or the struct neighbour is > destroyed, the pointers held by struct ipoib_cm_tx can reference > freed memory. > > Signed-off-by: Ralph Campbell <[email protected]> > --- > > I was able to remove the kref_t added to struct ipoib_path and ipoib_neigh. > The key is to delete references to ipoib_neigh and ipoib_path > when ipoib_neigh_cleanup() is called. > > drivers/infiniband/ulp/ipoib/ipoib.h | 24 ++- > drivers/infiniband/ulp/ipoib/ipoib_cm.c | 105 +++++----- > drivers/infiniband/ulp/ipoib/ipoib_main.c | 267 > +++++++++++------------- > drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 95 +++------ > 4 files changed, 222 insertions(+), 269 deletions(-) > > diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h > b/drivers/infiniband/ulp/ipoib/ipoib.h > index 753a983..84bb561 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib.h > +++ b/drivers/infiniband/ulp/ipoib/ipoib.h > @@ -415,9 +415,7 @@ static inline struct ipoib_neigh **to_ipoib_neigh(struct > neighbour *neigh) > INFINIBAND_ALEN, sizeof(void *)); > } > > -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh, > - struct net_device *dev); > -void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh); > +void ipoib_neigh_flush(struct ipoib_neigh *neigh); > > extern struct workqueue_struct *ipoib_workqueue; > > @@ -464,7 +462,8 @@ void ipoib_dev_cleanup(struct net_device *dev); > > void ipoib_mcast_join_task(struct work_struct *work); > void ipoib_mcast_carrier_on_task(struct work_struct *work); > -void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff > *skb); > +void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff > *skb, > + struct ipoib_neigh *neigh); > > void ipoib_mcast_restart_task(struct work_struct *work); > int ipoib_mcast_start_thread(struct net_device *dev); > @@ -567,9 +566,10 @@ void ipoib_cm_dev_stop(struct net_device *dev); > int ipoib_cm_dev_init(struct net_device *dev); > int ipoib_cm_add_mode_attr(struct net_device *dev); > void ipoib_cm_dev_cleanup(struct net_device *dev); > -struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct > ipoib_path *path, > - struct ipoib_neigh *neigh); > +void ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, > + struct ipoib_neigh *neigh); > void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx); > +void ipoib_cm_flush_path(struct net_device *dev, struct ipoib_path *path); > void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, > unsigned int mtu); > void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc); > @@ -646,10 +646,10 @@ void ipoib_cm_dev_cleanup(struct net_device *dev) > } > > static inline > -struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct > ipoib_path *path, > - struct ipoib_neigh *neigh) > +void ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, > + struct ipoib_neigh *neigh) > { > - return NULL; > + return; > } > > static inline > @@ -659,6 +659,12 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) > } > > static inline > +void ipoib_cm_flush_path(struct net_device *dev, struct ipoib_path *path) > +{ > + return; > +} > + > +static inline > int ipoib_cm_add_mode_attr(struct net_device *dev) > { > return 0; > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c > b/drivers/infiniband/ulp/ipoib/ipoib_cm.c > index 30bdf42..4ec4ebc 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c > @@ -794,31 +794,14 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, > struct ib_wc *wc) > > if (wc->status != IB_WC_SUCCESS && > wc->status != IB_WC_WR_FLUSH_ERR) { > - struct ipoib_neigh *neigh; > - > ipoib_dbg(priv, "failed cm send event " > "(status=%d, wrid=%d vend_err %x)\n", > wc->status, wr_id, wc->vendor_err); > > spin_lock_irqsave(&priv->lock, flags); > - neigh = tx->neigh; > - > - if (neigh) { > - neigh->cm = NULL; > - list_del(&neigh->list); > - if (neigh->ah) > - ipoib_put_ah(neigh->ah); > - ipoib_neigh_free(dev, neigh); > - > - tx->neigh = NULL; > - } > - > - if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { > - list_move(&tx->list, &priv->cm.reap_list); > - queue_work(ipoib_workqueue, &priv->cm.reap_task); > - } > > clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); > + ipoib_cm_destroy_tx(tx); > > spin_unlock_irqrestore(&priv->lock, flags); > } > @@ -1199,7 +1182,6 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, > struct ipoib_cm_tx *tx = cm_id->context; > struct ipoib_dev_priv *priv = netdev_priv(tx->dev); > struct net_device *dev = priv->dev; > - struct ipoib_neigh *neigh; > unsigned long flags; > int ret; > > @@ -1221,22 +1203,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, > ipoib_dbg(priv, "CM error %d.\n", event->event); > netif_tx_lock_bh(dev); > spin_lock_irqsave(&priv->lock, flags); > - neigh = tx->neigh; > - > - if (neigh) { > - neigh->cm = NULL; > - list_del(&neigh->list); > - if (neigh->ah) > - ipoib_put_ah(neigh->ah); > - ipoib_neigh_free(dev, neigh); > - > - tx->neigh = NULL; > - } > > - if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { > - list_move(&tx->list, &priv->cm.reap_list); > - queue_work(ipoib_workqueue, &priv->cm.reap_task); > - } > + ipoib_cm_destroy_tx(tx); > > spin_unlock_irqrestore(&priv->lock, flags); > netif_tx_unlock_bh(dev); > @@ -1248,35 +1216,61 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, > return 0; > } > > -struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct > ipoib_path *path, > - struct ipoib_neigh *neigh) > +void ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, > + struct ipoib_neigh *neigh) > { > struct ipoib_dev_priv *priv = netdev_priv(dev); > struct ipoib_cm_tx *tx; > > tx = kzalloc(sizeof *tx, GFP_ATOMIC); > if (!tx) > - return NULL; > + return; > > neigh->cm = tx; > tx->neigh = neigh; > tx->path = path; > tx->dev = dev; > list_add(&tx->list, &priv->cm.start_list); > - set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); > queue_work(ipoib_workqueue, &priv->cm.start_task); > - return tx; > } > > +/* > + * Note: this is called with netif_tx_lock_bh() and priv->lock held. > + */ > void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) > { > - struct ipoib_dev_priv *priv = netdev_priv(tx->dev); > - if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { > + struct ipoib_neigh *neigh = tx->neigh; > + > + if (neigh) { > + struct ipoib_dev_priv *priv = netdev_priv(tx->dev); > + > + neigh->cm = NULL; > + tx->neigh = NULL; > list_move(&tx->list, &priv->cm.reap_list); > queue_work(ipoib_workqueue, &priv->cm.reap_task); > ipoib_dbg(priv, "Reap connection for gid %pI6\n", > - tx->neigh->dgid.raw); > - tx->neigh = NULL; > + neigh->dgid.raw); > + } > +} > + > +/* > + * Search the list of connections to be started and remove any entries > + * which match the path being destroyed. > + * > + * This should be called with netif_tx_lock_bh() and priv->lock held. > + */ > +void ipoib_cm_flush_path(struct net_device *dev, struct ipoib_path *path) > +{ > + struct ipoib_dev_priv *priv = netdev_priv(dev); > + struct ipoib_cm_tx *tx; > + > + list_for_each_entry(tx, &priv->cm.start_list, list) { > + tx = list_entry(priv->cm.start_list.next, typeof(*tx), list); > + if (tx->path == path) { > + tx->path = NULL; > + list_del_init(&tx->list); > + break; > + } > } > } > > @@ -1300,27 +1294,32 @@ static void ipoib_cm_tx_start(struct work_struct > *work) > p = list_entry(priv->cm.start_list.next, typeof(*p), list); > list_del_init(&p->list); > neigh = p->neigh; > - qpn = IPOIB_QPN(neigh->neighbour->ha); > memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); > + p->path = NULL; > + /* > + * ipoib_neigh_cleanup() may have been called while waiting > + * on the priv->cm.start_list. > + */ > + if (neigh->neighbour) > + qpn = IPOIB_QPN(neigh->neighbour->ha); > + else > + qpn = 0; > > spin_unlock_irqrestore(&priv->lock, flags); > netif_tx_unlock_bh(dev); > > - ret = ipoib_cm_tx_init(p, qpn, &pathrec); > + if (qpn) > + ret = ipoib_cm_tx_init(p, qpn, &pathrec); > + else > + ret = -1; > > netif_tx_lock_bh(dev); > spin_lock_irqsave(&priv->lock, flags); > > if (ret) { > neigh = p->neigh; > - if (neigh) { > + if (neigh) > neigh->cm = NULL; > - list_del(&neigh->list); > - if (neigh->ah) > - ipoib_put_ah(neigh->ah); > - ipoib_neigh_free(dev, neigh); > - } > - list_del(&p->list); > kfree(p); > } > } > @@ -1342,7 +1341,7 @@ static void ipoib_cm_tx_reap(struct work_struct *work) > > while (!list_empty(&priv->cm.reap_list)) { > p = list_entry(priv->cm.reap_list.next, typeof(*p), list); > - list_del(&p->list); > + list_del_init(&p->list); > spin_unlock_irqrestore(&priv->lock, flags); > netif_tx_unlock_bh(dev); > ipoib_cm_tx_destroy(p); > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c > b/drivers/infiniband/ulp/ipoib/ipoib_main.c > index df3eb8c..89c02a2 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c > @@ -91,6 +91,9 @@ struct workqueue_struct *ipoib_workqueue; > > struct ib_sa_client ipoib_sa_client; > > +static struct ipoib_neigh *neigh_alloc(struct neighbour *neighbour, > + struct net_device *dev); > + > static void ipoib_add_one(struct ib_device *device); > static void ipoib_remove_one(struct ib_device *device); > > @@ -231,7 +234,7 @@ static struct ipoib_path *__path_find(struct net_device > *dev, void *gid) > return NULL; > } > > -static int __path_add(struct net_device *dev, struct ipoib_path *path) > +static void __path_add(struct net_device *dev, struct ipoib_path *path) > { > struct ipoib_dev_priv *priv = netdev_priv(dev); > struct rb_node **n = &priv->path_tree.rb_node; > @@ -249,44 +252,26 @@ static int __path_add(struct net_device *dev, struct > ipoib_path *path) > n = &pn->rb_left; > else if (ret > 0) > n = &pn->rb_right; > - else > - return -EEXIST; > + else /* Should never happen since we always search first */ > + return; > } > > rb_link_node(&path->rb_node, pn, n); > rb_insert_color(&path->rb_node, &priv->path_tree); > > list_add_tail(&path->list, &priv->path_list); > - > - return 0; > } > > -static void path_free(struct net_device *dev, struct ipoib_path *path) > +static void path_free(struct ipoib_path *path) > { > - struct ipoib_dev_priv *priv = netdev_priv(dev); > struct ipoib_neigh *neigh, *tn; > struct sk_buff *skb; > - unsigned long flags; > > while ((skb = __skb_dequeue(&path->queue))) > dev_kfree_skb_irq(skb); > > - spin_lock_irqsave(&priv->lock, flags); > - > - list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { > - /* > - * It's safe to call ipoib_put_ah() inside priv->lock > - * here, because we know that path->ah will always > - * hold one more reference, so ipoib_put_ah() will > - * never do more than decrement the ref count. > - */ > - if (neigh->ah) > - ipoib_put_ah(neigh->ah); > - > - ipoib_neigh_free(dev, neigh); > - } > - > - spin_unlock_irqrestore(&priv->lock, flags); > + list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) > + ipoib_neigh_flush(neigh); > > if (path->ah) > ipoib_put_ah(path->ah); > @@ -387,10 +372,11 @@ void ipoib_flush_paths(struct net_device *dev) > list_for_each_entry_safe(path, tp, &remove_list, list) { > if (path->query) > ib_sa_cancel_query(path->query_id, path->query); > + ipoib_cm_flush_path(dev, path); > spin_unlock_irqrestore(&priv->lock, flags); > netif_tx_unlock_bh(dev); > wait_for_completion(&path->done); > - path_free(dev, path); > + path_free(path); > netif_tx_lock_bh(dev); > spin_lock_irqsave(&priv->lock, flags); > } > @@ -460,19 +446,13 @@ static void path_rec_completion(int status, > memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, > sizeof(union ib_gid)); > > - if (ipoib_cm_enabled(dev, neigh->neighbour)) { > - if (!ipoib_cm_get(neigh)) > - ipoib_cm_set(neigh, > ipoib_cm_create_tx(dev, > - > path, > - > neigh)); > - if (!ipoib_cm_get(neigh)) { > - list_del(&neigh->list); > - if (neigh->ah) > - ipoib_put_ah(neigh->ah); > - ipoib_neigh_free(dev, neigh); > - continue; > - } > - } > + /* > + * If connected mode is enabled but not started, > + * start getting a connection. > + */ > + if (ipoib_cm_enabled(dev, neigh->neighbour) && > + !ipoib_cm_get(neigh)) > + ipoib_cm_create_tx(dev, path, neigh); > > while ((skb = __skb_dequeue(&neigh->queue))) > __skb_queue_tail(&skqueue, skb); > @@ -520,6 +500,8 @@ static struct ipoib_path *path_rec_create(struct > net_device *dev, void *gid) > path->pathrec.numb_path = 1; > path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class; > > + __path_add(dev, path); > + > return path; > } > > @@ -554,29 +536,23 @@ static int path_rec_start(struct net_device *dev, > return 0; > } > > -static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) > +static void neigh_add_path(struct sk_buff *skb, struct net_device *dev, > + struct ipoib_neigh *neigh) > { > struct ipoib_dev_priv *priv = netdev_priv(dev); > struct ipoib_path *path; > - struct ipoib_neigh *neigh; > + struct neighbour *n; > unsigned long flags; > > - neigh = ipoib_neigh_alloc(skb_dst(skb)->neighbour, skb->dev); > - if (!neigh) { > - ++dev->stats.tx_dropped; > - dev_kfree_skb_any(skb); > - return; > - } > + n = skb_dst(skb)->neighbour; > > spin_lock_irqsave(&priv->lock, flags); > > - path = __path_find(dev, skb_dst(skb)->neighbour->ha + 4); > + path = __path_find(dev, n->ha + 4); > if (!path) { > - path = path_rec_create(dev, skb_dst(skb)->neighbour->ha + 4); > + path = path_rec_create(dev, n->ha + 4); > if (!path) > - goto err_path; > - > - __path_add(dev, path); > + goto err_drop; > } > > list_add_tail(&neigh->list, &path->neigh_list); > @@ -587,16 +563,11 @@ static void neigh_add_path(struct sk_buff *skb, struct > net_device *dev) > memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, > sizeof(union ib_gid)); > > - if (ipoib_cm_enabled(dev, neigh->neighbour)) { > + if (ipoib_cm_enabled(dev, n)) { > + if (!ipoib_cm_get(neigh)) > + ipoib_cm_create_tx(dev, path, neigh); > if (!ipoib_cm_get(neigh)) > - ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, > path, neigh)); > - if (!ipoib_cm_get(neigh)) { > - list_del(&neigh->list); > - if (neigh->ah) > - ipoib_put_ah(neigh->ah); > - ipoib_neigh_free(dev, neigh); > goto err_drop; > - } > if (skb_queue_len(&neigh->queue) < > IPOIB_MAX_PATH_REC_QUEUE) > __skb_queue_tail(&neigh->queue, skb); > else { > @@ -606,12 +577,10 @@ static void neigh_add_path(struct sk_buff *skb, struct > net_device *dev) > } > } else { > spin_unlock_irqrestore(&priv->lock, flags); > - ipoib_send(dev, skb, path->ah, > IPOIB_QPN(skb_dst(skb)->neighbour->ha)); > + ipoib_send(dev, skb, path->ah, IPOIB_QPN(n->ha)); > return; > } > } else { > - neigh->ah = NULL; > - > if (!path->query && path_rec_start(dev, path)) > goto err_list; > > @@ -622,10 +591,7 @@ static void neigh_add_path(struct sk_buff *skb, struct > net_device *dev) > return; > > err_list: > - list_del(&neigh->list); > - > -err_path: > - ipoib_neigh_free(dev, neigh); > + list_del_init(&neigh->list); > err_drop: > ++dev->stats.tx_dropped; > dev_kfree_skb_any(skb); > @@ -633,20 +599,22 @@ err_drop: > spin_unlock_irqrestore(&priv->lock, flags); > } > > -static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev) > +static void path_lookup(struct sk_buff *skb, struct net_device *dev, > + struct ipoib_neigh *neigh) > { > - struct ipoib_dev_priv *priv = netdev_priv(skb->dev); > + struct ipoib_dev_priv *priv; > > /* Look up path record for unicasts */ > if (skb_dst(skb)->neighbour->ha[4] != 0xff) { > - neigh_add_path(skb, dev); > + neigh_add_path(skb, dev, neigh); > return; > } > > /* Add in the P_Key for multicasts */ > + priv = netdev_priv(dev); > skb_dst(skb)->neighbour->ha[8] = (priv->pkey >> 8) & 0xff; > skb_dst(skb)->neighbour->ha[9] = priv->pkey & 0xff; > - ipoib_mcast_send(dev, skb_dst(skb)->neighbour->ha + 4, skb); > + ipoib_mcast_send(dev, skb_dst(skb)->neighbour->ha + 4, skb, neigh); > } > > static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, > @@ -659,35 +627,13 @@ static void unicast_arp_send(struct sk_buff *skb, > struct net_device *dev, > spin_lock_irqsave(&priv->lock, flags); > > path = __path_find(dev, phdr->hwaddr + 4); > - if (!path || !path->valid) { > - int new_path = 0; > - > - if (!path) { > - path = path_rec_create(dev, phdr->hwaddr + 4); > - new_path = 1; > - } > - if (path) { > - /* put pseudoheader back on for next time */ > - skb_push(skb, sizeof *phdr); > - __skb_queue_tail(&path->queue, skb); > - > - if (!path->query && path_rec_start(dev, path)) { > - spin_unlock_irqrestore(&priv->lock, flags); > - if (new_path) > - path_free(dev, path); > - return; > - } else > - __path_add(dev, path); > - } else { > - ++dev->stats.tx_dropped; > - dev_kfree_skb_any(skb); > - } > - > - spin_unlock_irqrestore(&priv->lock, flags); > - return; > + if (!path) { > + path = path_rec_create(dev, phdr->hwaddr + 4); > + if (!path) > + goto drop; > } > > - if (path->ah) { > + if (path->valid && path->ah) { > ipoib_dbg(priv, "Send unicast ARP to %04x\n", > be16_to_cpu(path->pathrec.dlid)); > > @@ -699,12 +645,16 @@ static void unicast_arp_send(struct sk_buff *skb, > struct net_device *dev, > /* put pseudoheader back on for next time */ > skb_push(skb, sizeof *phdr); > __skb_queue_tail(&path->queue, skb); > - } else { > - ++dev->stats.tx_dropped; > - dev_kfree_skb_any(skb); > - } > + } else > + goto drop; > > spin_unlock_irqrestore(&priv->lock, flags); > + return; > + > +drop: > + ++dev->stats.tx_dropped; > + dev_kfree_skb_any(skb); > + spin_unlock_irqrestore(&priv->lock, flags); > } > > static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) > @@ -714,31 +664,23 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct > net_device *dev) > unsigned long flags; > > if (likely(skb_dst(skb) && skb_dst(skb)->neighbour)) { > - if (unlikely(!*to_ipoib_neigh(skb_dst(skb)->neighbour))) { > - ipoib_path_lookup(skb, dev); > + neigh = *to_ipoib_neigh(skb_dst(skb)->neighbour); > + > + if (unlikely(!neigh)) { > + neigh = neigh_alloc(skb_dst(skb)->neighbour, > skb->dev); > + if (!neigh) { > + ++dev->stats.tx_dropped; > + dev_kfree_skb_any(skb); > + } else > + path_lookup(skb, dev, neigh); > return NETDEV_TX_OK; > } > > - neigh = *to_ipoib_neigh(skb_dst(skb)->neighbour); > - > if (unlikely((memcmp(&neigh->dgid.raw, > skb_dst(skb)->neighbour->ha + 4, > sizeof(union ib_gid))) || > (neigh->dev != dev))) { > - spin_lock_irqsave(&priv->lock, flags); > - /* > - * It's safe to call ipoib_put_ah() inside > - * priv->lock here, because we know that > - * path->ah will always hold one more reference, > - * so ipoib_put_ah() will never do more than > - * decrement the ref count. > - */ > - if (neigh->ah) > - ipoib_put_ah(neigh->ah); > - list_del(&neigh->list); > - ipoib_neigh_free(dev, neigh); > - spin_unlock_irqrestore(&priv->lock, flags); > - ipoib_path_lookup(skb, dev); > + path_lookup(skb, dev, neigh); > return NETDEV_TX_OK; > } > > @@ -748,7 +690,8 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct > net_device *dev) > return NETDEV_TX_OK; > } > } else if (neigh->ah) { > - ipoib_send(dev, skb, neigh->ah, > IPOIB_QPN(skb_dst(skb)->neighbour->ha)); > + ipoib_send(dev, skb, neigh->ah, > + IPOIB_QPN(skb_dst(skb)->neighbour->ha)); > return NETDEV_TX_OK; > } > > @@ -770,7 +713,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct > net_device *dev) > phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff; > phdr->hwaddr[9] = priv->pkey & 0xff; > > - ipoib_mcast_send(dev, phdr->hwaddr + 4, skb); > + ipoib_mcast_send(dev, phdr->hwaddr + 4, skb, NULL); > } else { > /* unicast GID -- should be ARP or RARP reply */ > > @@ -847,34 +790,45 @@ static void ipoib_neigh_cleanup(struct neighbour *n) > { > struct ipoib_neigh *neigh; > struct ipoib_dev_priv *priv = netdev_priv(n->dev); > + struct sk_buff *skb; > unsigned long flags; > - struct ipoib_ah *ah = NULL; > + > + spin_lock_irqsave(&priv->lock, flags); > > neigh = *to_ipoib_neigh(n); > - if (neigh) > - priv = netdev_priv(neigh->dev); > - else > + if (!neigh) { > + spin_unlock_irqrestore(&priv->lock, flags); > return; > + } > + *to_ipoib_neigh(n) = NULL; > + neigh->neighbour = NULL; > + > ipoib_dbg(priv, > "neigh_cleanup for %06x %pI6\n", > IPOIB_QPN(n->ha), > n->ha + 4); > > - spin_lock_irqsave(&priv->lock, flags); > + if (ipoib_cm_get(neigh)) > + ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); > > - if (neigh->ah) > - ah = neigh->ah; > - list_del(&neigh->list); > - ipoib_neigh_free(n->dev, neigh); > + if (!list_empty(&neigh->list)) > + list_del_init(&neigh->list); > > spin_unlock_irqrestore(&priv->lock, flags); > > - if (ah) > - ipoib_put_ah(ah); > + while ((skb = __skb_dequeue(&neigh->queue))) { > + ++n->dev->stats.tx_dropped; > + dev_kfree_skb_any(skb); > + } > + > + if (neigh->ah) > + ipoib_put_ah(neigh->ah); > + > + kfree(neigh); > } > > -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour, > - struct net_device *dev) > +static struct ipoib_neigh *neigh_alloc(struct neighbour *neighbour, > + struct net_device *dev) > { > struct ipoib_neigh *neigh; > > @@ -882,27 +836,58 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour > *neighbour, > if (!neigh) > return NULL; > > + neigh->ah = NULL; > neigh->neighbour = neighbour; > neigh->dev = dev; > memset(&neigh->dgid.raw, 0, sizeof (union ib_gid)); > *to_ipoib_neigh(neighbour) = neigh; > skb_queue_head_init(&neigh->queue); > + INIT_LIST_HEAD(&neigh->list); > ipoib_cm_set(neigh, NULL); > > return neigh; > } > > -void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) > +/* > + * This is called when flushing the path or multicast GID from the > + * struct ipoib_neigh. ipoib_start_xmit() will then try to reinitialize > + * the address the next time it is called. > + * Note that the "neigh" pointer passed should not be used after calling > this. > + */ > +void ipoib_neigh_flush(struct ipoib_neigh *neigh) > { > + struct net_device *dev = neigh->dev; > + struct ipoib_dev_priv *priv = netdev_priv(dev); > + struct sk_buff_head skqueue; > struct sk_buff *skb; > - *to_ipoib_neigh(neigh->neighbour) = NULL; > - while ((skb = __skb_dequeue(&neigh->queue))) { > - ++dev->stats.tx_dropped; > - dev_kfree_skb_any(skb); > - } > + struct ipoib_ah *ah; > + unsigned long flags; > + > + skb_queue_head_init(&skqueue); > + > + netif_tx_lock_bh(dev); > + spin_lock_irqsave(&priv->lock, flags); > + > + while ((skb = __skb_dequeue(&neigh->queue))) > + __skb_queue_tail(&skqueue, skb); > + > if (ipoib_cm_get(neigh)) > ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); > - kfree(neigh); > + ah = neigh->ah; > + neigh->ah = NULL; > + memset(&neigh->dgid.raw, 0, sizeof(union ib_gid)); > + list_del_init(&neigh->list); > + > + spin_unlock_irqrestore(&priv->lock, flags); > + netif_tx_unlock_bh(dev); > + > + while ((skb = __skb_dequeue(&skqueue))) { > + ++dev->stats.tx_dropped; > + dev_kfree_skb_irq(skb); > + } > + > + if (ah) > + ipoib_put_ah(ah); > } > > static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms > *parms) > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c > b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c > index 8763c1e..0ae4ad2 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c > @@ -67,35 +67,21 @@ struct ipoib_mcast_iter { > static void ipoib_mcast_free(struct ipoib_mcast *mcast) > { > struct net_device *dev = mcast->dev; > - struct ipoib_dev_priv *priv = netdev_priv(dev); > struct ipoib_neigh *neigh, *tmp; > int tx_dropped = 0; > > ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", > mcast->mcmember.mgid.raw); > > - spin_lock_irq(&priv->lock); > - > - list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { > - /* > - * It's safe to call ipoib_put_ah() inside priv->lock > - * here, because we know that mcast->ah will always > - * hold one more reference, so ipoib_put_ah() will > - * never do more than decrement the ref count. > - */ > - if (neigh->ah) > - ipoib_put_ah(neigh->ah); > - ipoib_neigh_free(dev, neigh); > - } > - > - spin_unlock_irq(&priv->lock); > + list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) > + ipoib_neigh_flush(neigh); > > if (mcast->ah) > ipoib_put_ah(mcast->ah); > > while (!skb_queue_empty(&mcast->pkt_queue)) { > ++tx_dropped; > - dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); > + dev_kfree_skb_irq(skb_dequeue(&mcast->pkt_queue)); > } > > netif_tx_lock_bh(dev); > @@ -149,7 +135,7 @@ static struct ipoib_mcast *__ipoib_mcast_find(struct > net_device *dev, void *mgid > return NULL; > } > > -static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast > *mcast) > +static void __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast > *mcast) > { > struct ipoib_dev_priv *priv = netdev_priv(dev); > struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; > @@ -167,14 +153,12 @@ static int __ipoib_mcast_add(struct net_device *dev, > struct ipoib_mcast *mcast) > n = &pn->rb_left; > else if (ret > 0) > n = &pn->rb_right; > - else > - return -EEXIST; > + else /* Should never happen since we always search first */ > + return; > } > > rb_link_node(&mcast->rb_node, pn, n); > rb_insert_color(&mcast->rb_node, &priv->multicast_tree); > - > - return 0; > } > > static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, > @@ -654,7 +638,8 @@ static int ipoib_mcast_leave(struct net_device *dev, > struct ipoib_mcast *mcast) > return 0; > } > > -void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff > *skb) > +void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff > *skb, > + struct ipoib_neigh *neigh) > { > struct ipoib_dev_priv *priv = netdev_priv(dev); > struct ipoib_mcast *mcast; > @@ -664,11 +649,8 @@ void ipoib_mcast_send(struct net_device *dev, void > *mgid, struct sk_buff *skb) > > if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || > !priv->broadcast || > - !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { > - ++dev->stats.tx_dropped; > - dev_kfree_skb_any(skb); > - goto unlock; > - } > + !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) > + goto drop; > > mcast = __ipoib_mcast_find(dev, mgid); > if (!mcast) { > @@ -680,9 +662,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, > struct sk_buff *skb) > if (!mcast) { > ipoib_warn(priv, "unable to allocate memory for " > "multicast structure\n"); > - ++dev->stats.tx_dropped; > - dev_kfree_skb_any(skb); > - goto out; > + goto drop; > } > > set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); > @@ -692,48 +672,33 @@ void ipoib_mcast_send(struct net_device *dev, void > *mgid, struct sk_buff *skb) > } > > if (!mcast->ah) { > - if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) > - skb_queue_tail(&mcast->pkt_queue, skb); > - else { > - ++dev->stats.tx_dropped; > - dev_kfree_skb_any(skb); > - } > - > + if (skb_queue_len(&mcast->pkt_queue) >= IPOIB_MAX_MCAST_QUEUE) > + goto drop; > + skb_queue_tail(&mcast->pkt_queue, skb); > if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) > ipoib_dbg_mcast(priv, "no address vector, " > "but multicast join already > started\n"); > else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) > ipoib_mcast_sendonly_join(mcast); > - > - /* > - * If lookup completes between here and out:, don't > - * want to send packet twice. > - */ > - mcast = NULL; > - } > - > -out: > - if (mcast && mcast->ah) { > - if (skb_dst(skb) && > - skb_dst(skb)->neighbour && > - !*to_ipoib_neigh(skb_dst(skb)->neighbour)) { > - struct ipoib_neigh *neigh = > ipoib_neigh_alloc(skb_dst(skb)->neighbour, > - > skb->dev); > - > - if (neigh) { > - kref_get(&mcast->ah->ref); > - neigh->ah = mcast->ah; > - list_add_tail(&neigh->list, > &mcast->neigh_list); > - } > + } else { > + if (neigh && list_empty(&neigh->list)) { > + kref_get(&mcast->ah->ref); > + neigh->ah = mcast->ah; > + memcpy(neigh->dgid.raw, mgid, sizeof(union ib_gid)); > + list_add_tail(&neigh->list, &mcast->neigh_list); > } > - > spin_unlock_irqrestore(&priv->lock, flags); > ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); > return; > } > > -unlock: > spin_unlock_irqrestore(&priv->lock, flags); > + return; > + > +drop: > + spin_unlock_irqrestore(&priv->lock, flags); > + ++dev->stats.tx_dropped; > + dev_kfree_skb_any(skb); > } > > void ipoib_mcast_dev_flush(struct net_device *dev) > @@ -741,16 +706,14 @@ void ipoib_mcast_dev_flush(struct net_device *dev) > struct ipoib_dev_priv *priv = netdev_priv(dev); > LIST_HEAD(remove_list); > struct ipoib_mcast *mcast, *tmcast; > - unsigned long flags; > > ipoib_dbg_mcast(priv, "flushing multicast list\n"); > > - spin_lock_irqsave(&priv->lock, flags); > + spin_lock_irq(&priv->lock); > > list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { > - list_del(&mcast->list); > rb_erase(&mcast->rb_node, &priv->multicast_tree); > - list_add_tail(&mcast->list, &remove_list); > + list_move_tail(&mcast->list, &remove_list); > } > > if (priv->broadcast) { > @@ -759,7 +722,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev) > priv->broadcast = NULL; > } > > - spin_unlock_irqrestore(&priv->lock, flags); > + spin_unlock_irq(&priv->lock); > > list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { > ipoib_mcast_leave(dev, mcast); > -- > 1.6.0.6 > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to [email protected] > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html
