On 05/23/2018 10:38 PM, GhantaKrishnamurthy MohanKrishna wrote:
> A peer node is considered down if there are no
> active links (or) lost contact to the node. In current implementation,
> a peer node instance is deleted either if
> a) TIPC module is removed (or)
> b) Application can use a netlink/iproute2 interface to delete a
> specific down node.
> 
> Thus, a down node instance lives in the system forever, unless the
> application explicitly removes it.

Per my understanding, this is not a flaw, instead the current behavior
might be a deliberate design. With the current design, once a lost node
is recovered from dead state within a short period, it's not necessary
to allocate its corresponding node object. As far as I remember, this
behavior exists a very long time, at since 1.7.X

On the contrary, if we automatically delete dead node object when node
is down, it's not necessary to manually remove it through netlink from
user space. In other words, we think this approach is better, we should
obsolete the TIPC iproute2 command of deleting dead node.

Jon,

Can you please kindly advise which method is better?

Thanks,
Ying

> 
> We fix this by deleting the nodes which are down for
> a specified amount of time (5 minutes).
> Existing node supervision timer is used to achieve this.
> 
> Signed-off-by: GhantaKrishnamurthy MohanKrishna 
> <[email protected]>
> ---
>  net/tipc/node.c | 77 
> ++++++++++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 66 insertions(+), 11 deletions(-)
> 
> diff --git a/net/tipc/node.c b/net/tipc/node.c
> index b71e4e376bb9..e08f78a72f03 100644
> --- a/net/tipc/node.c
> +++ b/net/tipc/node.c
> @@ -45,6 +45,7 @@
>  #include "netlink.h"
>  
>  #define INVALID_NODE_SIG     0x10000
> +#define NODE_CLEANUP_AFTER   0x493E0
>  
>  /* Flags used to take different actions according to flag type
>   * TIPC_NOTIFY_NODE_DOWN: notify node is down
> @@ -96,6 +97,7 @@ struct tipc_bclink_entry {
>   * @link_id: local and remote bearer ids of changing link, if any
>   * @publ_list: list of publications
>   * @rcu: rcu struct for tipc_node
> + * @down_at: indicates the time for deleting a down node
>   */
>  struct tipc_node {
>       u32 addr;
> @@ -121,6 +123,7 @@ struct tipc_node {
>       unsigned long keepalive_intv;
>       struct timer_list timer;
>       struct rcu_head rcu;
> +     unsigned long down_at;
>  };
>  
>  /* Node FSM states and events:
> @@ -160,6 +163,7 @@ static struct tipc_node *tipc_node_find(struct net *net, 
> u32 addr);
>  static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id);
>  static void tipc_node_put(struct tipc_node *node);
>  static bool node_is_up(struct tipc_node *n);
> +static void tipc_node_delete_from_list(struct tipc_node *node);
>  
>  struct tipc_sock_conn {
>       u32 port;
> @@ -369,6 +373,7 @@ static struct tipc_node *tipc_node_create(struct net 
> *net, u32 addr,
>       for (i = 0; i < MAX_BEARERS; i++)
>               spin_lock_init(&n->links[i].lock);
>       n->state = SELF_DOWN_PEER_LEAVING;
> +     n->down_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER);
>       n->signature = INVALID_NODE_SIG;
>       n->active_links[0] = INVALID_BEARER_ID;
>       n->active_links[1] = INVALID_BEARER_ID;
> @@ -412,11 +417,16 @@ static void tipc_node_calculate_timer(struct tipc_node 
> *n, struct tipc_link *l)
>       tipc_link_set_abort_limit(l, tol / n->keepalive_intv);
>  }
>  
> -static void tipc_node_delete(struct tipc_node *node)
> +static void tipc_node_delete_from_list(struct tipc_node *node)
>  {
>       list_del_rcu(&node->list);
>       hlist_del_rcu(&node->hash);
>       tipc_node_put(node);
> +}
> +
> +static void tipc_node_delete(struct tipc_node *node)
> +{
> +     tipc_node_delete_from_list(node);
>  
>       del_timer_sync(&node->timer);
>       tipc_node_put(node);
> @@ -523,6 +533,53 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, 
> u32 port)
>       tipc_node_put(node);
>  }
>  
> +static void  tipc_node_clear_links(struct tipc_node *node)
> +{
> +     int i;
> +
> +     for (i = 0; i < MAX_BEARERS; i++) {
> +             struct tipc_link_entry *le = &node->links[i];
> +
> +             if (le->link) {
> +                     kfree(le->link);
> +                     le->link = NULL;
> +                     node->link_cnt--;
> +             }
> +     }
> +}
> +
> +/* tipc_node_cleanup - delete nodes that does not
> + * have active links for NODE_CLEANUP_AFTER time
> + */
> +static int tipc_node_cleanup(struct tipc_node *peer)
> +{
> +     struct tipc_net *tn = tipc_net(peer->net);
> +     int err = 0;
> +
> +     spin_lock_bh(&tn->node_list_lock);
> +     tipc_node_write_lock(peer);
> +
> +     if (node_is_up(peer)) {
> +             tipc_node_write_unlock(peer);
> +             err = -EBUSY;
> +             goto err_out;
> +     }
> +
> +     if (!time_after(jiffies, peer->down_at)) {
> +             tipc_node_write_unlock(peer);
> +             err = -EBUSY;
> +             goto err_out;
> +     }
> +
> +     tipc_node_clear_links(peer);
> +     tipc_node_write_unlock(peer);
> +     tipc_node_delete_from_list(peer);
> +
> +err_out:
> +     spin_unlock_bh(&tn->node_list_lock);
> +     return err;
> +}
> +
>  /* tipc_node_timeout - handle expiration of node timer
>   */
>  static void tipc_node_timeout(struct timer_list *t)
> @@ -533,6 +590,12 @@ static void tipc_node_timeout(struct timer_list *t)
>       int bearer_id;
>       int rc = 0;
>  
> +     if (!tipc_node_cleanup(n)) {
> +             /*Removing the reference of Timer*/
> +             tipc_node_put(n);
> +             return;
> +     }
> +
>       __skb_queue_head_init(&xmitq);
>  
>       for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
> @@ -1150,6 +1213,7 @@ static void node_lost_contact(struct tipc_node *n,
>       uint i;
>  
>       pr_debug("Lost contact with %x\n", n->addr);
> +     n->down_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER);
>  
>       /* Clean up broadcast state */
>       tipc_bcast_remove_peer(n->net, n->bc_entry.link);
> @@ -1719,7 +1783,6 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct 
> genl_info *info)
>       struct tipc_node *peer;
>       u32 addr;
>       int err;
> -     int i;
>  
>       /* We identify the peer by its net */
>       if (!info->attrs[TIPC_NLA_NET])
> @@ -1754,15 +1817,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct 
> genl_info *info)
>               goto err_out;
>       }
>  
> -     for (i = 0; i < MAX_BEARERS; i++) {
> -             struct tipc_link_entry *le = &peer->links[i];
> -
> -             if (le->link) {
> -                     kfree(le->link);
> -                     le->link = NULL;
> -                     peer->link_cnt--;
> -             }
> -     }
> +     tipc_node_clear_links(peer);
>       tipc_node_write_unlock(peer);
>       tipc_node_delete(peer);
>  
> 

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
tipc-discussion mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/tipc-discussion

Reply via email to