Hi Mohan, Sorry for my late answer, but now I finally could look into this. First, I think you should rebase this on Tung's optimization patch for the node timer, "tipc: improve function tipc_node_timeout()". I was ready to deliver that one last Monday, but then net-next was closed right in my face ☹ tipc_node_timeout() is in reality a time critical function, because of the frequency and the O(N^2) scaling when running many VMs or containers on the same node. This is also why I suggest some changes to you patch. See below.
> Subject: FW: [net-next 1/1] tipc: Auto removal of peer down node instance > > A peer node is considered down if there are no active links (or) lost contact > to the node. In current implementation, a peer node instance is deleted > either if > a) TIPC module is removed (or) > b) Application can use a netlink/iproute2 interface to delete a specific down > node. > > Thus, a down node instance lives in the system forever, unless the > application explicitly removes it. > > We fix this by deleting the nodes which are down for a specified amount of > time (5 minutes). > Existing node supervision timer is used to achieve this. > > Signed-off-by: GhantaKrishnamurthy MohanKrishna > <[email protected]> > --- > net/tipc/node.c | 77 > ++++++++++++++++++++++++++++++++++++++++++++++++--------- > 1 file changed, 66 insertions(+), 11 deletions(-) > > diff --git a/net/tipc/node.c b/net/tipc/node.c index > b71e4e376bb9..e08f78a72f03 100644 > --- a/net/tipc/node.c > +++ b/net/tipc/node.c > @@ -45,6 +45,7 @@ > #include "netlink.h" > > #define INVALID_NODE_SIG 0x10000 > +#define NODE_CLEANUP_AFTER 0x493E0 You should define this one in decimal, to make it easier to understand. > > /* Flags used to take different actions according to flag type > * TIPC_NOTIFY_NODE_DOWN: notify node is down @@ -96,6 +97,7 @@ > struct tipc_bclink_entry { > * @link_id: local and remote bearer ids of changing link, if any > * @publ_list: list of publications > * @rcu: rcu struct for tipc_node > + * @down_at: indicates the time for deleting a down node > */ > struct tipc_node { > u32 addr; > @@ -121,6 +123,7 @@ struct tipc_node { > unsigned long keepalive_intv; > struct timer_list timer; > struct rcu_head rcu; > + unsigned long down_at; I don't remember what I originally suggested here, but "delete_at" would be more logical name. > }; > > /* Node FSM states and events: > @@ -160,6 +163,7 @@ static struct tipc_node *tipc_node_find(struct net > *net, u32 addr); static struct tipc_node *tipc_node_find_by_id(struct net > *net, u8 *id); static void tipc_node_put(struct tipc_node *node); static > bool > node_is_up(struct tipc_node *n); > +static void tipc_node_delete_from_list(struct tipc_node *node); > > struct tipc_sock_conn { > u32 port; > @@ -369,6 +373,7 @@ static struct tipc_node *tipc_node_create(struct net > *net, u32 addr, > for (i = 0; i < MAX_BEARERS; i++) > spin_lock_init(&n->links[i].lock); > n->state = SELF_DOWN_PEER_LEAVING; > + n->down_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER); > n->signature = INVALID_NODE_SIG; > n->active_links[0] = INVALID_BEARER_ID; > n->active_links[1] = INVALID_BEARER_ID; @@ -412,11 +417,16 @@ > static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link > *l) > tipc_link_set_abort_limit(l, tol / n->keepalive_intv); } > > -static void tipc_node_delete(struct tipc_node *node) > +static void tipc_node_delete_from_list(struct tipc_node *node) > { > list_del_rcu(&node->list); > hlist_del_rcu(&node->hash); > tipc_node_put(node); > +} > + > +static void tipc_node_delete(struct tipc_node *node) { > + tipc_node_delete_from_list(node); > > del_timer_sync(&node->timer); > tipc_node_put(node); > @@ -523,6 +533,53 @@ void tipc_node_remove_conn(struct net *net, u32 > dnode, u32 port) > tipc_node_put(node); > } > > +static void tipc_node_clear_links(struct tipc_node *node) { > + int i; > + > + for (i = 0; i < MAX_BEARERS; i++) { > + struct tipc_link_entry *le = &node->links[i]; > + > + if (le->link) { > + kfree(le->link); > + le->link = NULL; > + node->link_cnt--; > + } > + } > +} > + > +/* tipc_node_cleanup - delete nodes that does not > + * have active links for NODE_CLEANUP_AFTER time */ static int > +tipc_node_cleanup(struct tipc_node *peer) { > + struct tipc_net *tn = tipc_net(peer->net); > + int err = 0; bool deleted = false; There is no error case here, so this function should only return a boolean: true => cleanup was done, false => not cleaned up. I would simplify this function as follows: > + > + spin_lock_bh(&tn->node_list_lock); > + tipc_node_write_lock(peer); if (!node_is_up(peer) && time_after(jiffies, peer->down_at)) { tipc_node_clear_links(peer); tipc_node_delete_from_list(peer); deleted = true; } tipc_node_write_unlock(peer); > + spin_unlock_bh(&tn->node_list_lock); > + return deleted; > +} > + > /* tipc_node_timeout - handle expiration of node timer > */ > static void tipc_node_timeout(struct timer_list *t) @@ -533,6 +590,12 @@ > static void tipc_node_timeout(struct timer_list *t) > int bearer_id; > int rc = 0; > > + if (!tipc_node_cleanup(n)) { > + /*Removing the reference of Timer*/ > + tipc_node_put(n); > + return; > + } The following would be more efficient: if (!node_is_up(n) && tipc_node_cleanup(n)) { tipc_node_put(n); return; } Note that you still have to do the node_is_up() test inside the function and write lock, because the node state may change before the write lock is grabbed. BR ///jon > + > __skb_queue_head_init(&xmitq); > > for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { @@ - > 1150,6 +1213,7 @@ static void node_lost_contact(struct tipc_node *n, > uint i; > > pr_debug("Lost contact with %x\n", n->addr); > + n->down_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER); > > /* Clean up broadcast state */ > tipc_bcast_remove_peer(n->net, n->bc_entry.link); @@ -1719,7 > +1783,6 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) > struct tipc_node *peer; > u32 addr; > int err; > - int i; > > /* We identify the peer by its net */ > if (!info->attrs[TIPC_NLA_NET]) > @@ -1754,15 +1817,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct > genl_info *info) > goto err_out; > } > > - for (i = 0; i < MAX_BEARERS; i++) { > - struct tipc_link_entry *le = &peer->links[i]; > - > - if (le->link) { > - kfree(le->link); > - le->link = NULL; > - peer->link_cnt--; > - } > - } > + tipc_node_clear_links(peer); > tipc_node_write_unlock(peer); > tipc_node_delete(peer); > > -- > 2.1.4 ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ tipc-discussion mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/tipc-discussion
