Hi Hoang,
You are complicating this too much. The 'syn' and 'rcast'  bits should remain 
internal for the bcast.c   rcast/bcast code, and we should not mess with those 
elsewhere.

Instead, you should add a bit in the NAME_DISTR header that marks if the 
message is bcast/rcast or bulk. For compatibility, this bit should be inverted, 
so that 1 means bcast/rcast ('not_bulk' or similar) and 0 means 'bulk'.  Then, 
you add another bit, in bulk messages only, meaning 'not_last'. 
The last bulk message has this bit set to zero, and at that moment you can set 
the link->named_sync and deliver the deferred queue.

As long as the link_named_sync bit is not set, you sort the messages as follows:

- Incoming !not_bulk/not_last messages go to the head of the named_deferred 
queue.
- Incoming not_bulk messages go to the tail of the named_deferred queue, 
irrespective of the not_last bit (which should be always zero anyway)
- Incoming !not_bulk/!not_last message is sorted in after the last 
!not_bulk/not_last message in the queue, and named_sync is set.

 Because all old messages will be !not_bulk/!not_last (0/0) those will never go 
into the deferred queue, and named_sync will be set from the start, so we have 
guaranteed backwards compatibility.

I leave it to you to find better name for those bits.

I think that should solve our problem in a reasonably simple way.

BR
///jon

> -----Original Message-----
> From: Hoang Le <hoang.h...@dektech.com.au>
> Sent: 14-Nov-19 03:09
> To: Jon Maloy <jon.ma...@ericsson.com>; ma...@donjonn.com; 
> tipc-discussion@lists.sourceforge.net
> Cc: 'Ying Xue' <ying....@windriver.com>
> Subject: RE: [net-next] tipc: update a binding service via broadcast
> 
> Hi Jon,
> 
> Please take a look at v2. The mechanism looks the same as I did before in 
> commit:
> c55c8edafa91 ("tipc: smooth change between replicast and broadcast")
> However, in this case we handle only one direction:  replicast -> broadcast.
> Then, it is still backward compatible.
> 
> [...]
> From ae2ee6a7064de3ec1dc2c7df2db241d22b0d129f Mon Sep 17 00:00:00 2001
> From: Hoang Le <hoang.h...@dektech.com.au>
> Date: Wed, 13 Nov 2019 14:01:03 +0700
> Subject: [PATCH] tipc: update a binding service via broadcast
> 
> Currently, updating binding table (add service binding to
> name table/withdraw a service binding) is being sent over replicast.
> However, if we are scaling up clusters to > 100 nodes/containers this
> method is less affection because of looping through nodes in a cluster one
> by one.
> 
> It is worth to use broadcast to update a binding service. Then binding
> table updates in all nodes for one shot.
> 
> The mechanism is backward compatible because of sending side changing.
> 
> v2: resolve synchronization problem when switching from unicast to
> broadcast
> 
> Signed-off-by: Hoang Le <hoang.h...@dektech.com.au>
> ---
>  net/tipc/bcast.c      | 13 +++++++++++++
>  net/tipc/bcast.h      |  2 ++
>  net/tipc/link.c       | 16 ++++++++++++++++
>  net/tipc/name_distr.c |  8 ++++++++
>  net/tipc/name_table.c |  9 ++++++---
>  5 files changed, 45 insertions(+), 3 deletions(-)
> 
> diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
> index f41096a759fa..18431fa897ab 100644
> --- a/net/tipc/bcast.c
> +++ b/net/tipc/bcast.c
> @@ -843,3 +843,16 @@ void tipc_mcast_filter_msg(struct net *net, struct 
> sk_buff_head *defq,
>                 __skb_queue_tail(inputq, _skb);
>         }
>  }
> +
> +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb)
> +{
> +       struct sk_buff_head xmitq;
> +       u16 cong_link_cnt;
> +       int rc = 0;
> +
> +       __skb_queue_head_init(&xmitq);
> +       __skb_queue_tail(&xmitq, skb);
> +       rc = tipc_bcast_xmit(net, &xmitq, &cong_link_cnt);
> +       __skb_queue_purge(&xmitq);
> +       return rc;
> +}
> diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
> index dadad953e2be..a100da3800fc 100644
> --- a/net/tipc/bcast.h
> +++ b/net/tipc/bcast.h
> @@ -101,6 +101,8 @@ int tipc_bclink_reset_stats(struct net *net);
>  u32 tipc_bcast_get_broadcast_mode(struct net *net);
>  u32 tipc_bcast_get_broadcast_ratio(struct net *net);
> 
> +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb);
> +
>  void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
>                            struct sk_buff_head *inputq);
> 
> diff --git a/net/tipc/link.c b/net/tipc/link.c
> index fb72031228c9..22f1854435df 100644
> --- a/net/tipc/link.c
> +++ b/net/tipc/link.c
> @@ -187,6 +187,9 @@ struct tipc_link {
>         struct tipc_link *bc_sndlink;
>         u8 nack_state;
>         bool bc_peer_is_up;
> +       bool named_sync;
> +       struct sk_buff_head defer_namedq;
> +
> 
>         /* Statistics */
>         struct tipc_stats stats;
> @@ -363,6 +366,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
>         trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!");
>         tipc_link_reset(rcv_l);
>         rcv_l->state = LINK_RESET;
> +       rcv_l->named_sync = false;
>         if (!snd_l->ackers) {
>                 trace_tipc_link_reset(snd_l, TIPC_DUMP_ALL, "zero ackers!");
>                 tipc_link_reset(snd_l);
> @@ -508,6 +512,7 @@ bool tipc_link_create(struct net *net, char *if_name, int 
> bearer_id,
>         __skb_queue_head_init(&l->failover_deferdq);
>         skb_queue_head_init(&l->wakeupq);
>         skb_queue_head_init(l->inputq);
> +       __skb_queue_head_init(&l->defer_namedq);
>         return true;
>  }
> 
> @@ -932,6 +937,8 @@ void tipc_link_reset(struct tipc_link *l)
>         l->silent_intv_cnt = 0;
>         l->rst_cnt = 0;
>         l->bc_peer_is_up = false;
> +       l->named_sync = false;
> +       __skb_queue_purge(&l->defer_namedq);
>         memset(&l->mon_state, 0, sizeof(l->mon_state));
>         tipc_link_reset_stats(l);
>  }
> @@ -1210,6 +1217,15 @@ static bool tipc_data_input(struct tipc_link *l, 
> struct sk_buff *skb,
>                 return true;
>         case NAME_DISTRIBUTOR:
>                 l->bc_rcvlink->state = LINK_ESTABLISHED;
> +               if (msg_is_syn(hdr)) {
> +                       l->bc_rcvlink->named_sync = true;
> +                       skb_queue_splice_tail_init(&l->defer_namedq, 
> l->namedq);

I think you would leak the skb here. It is not added to the queue and not 
deleted.

> +                       return true;
> +               }
> +               if (msg_is_rcast(hdr) && !l->bc_rcvlink->named_sync) {
> +                       skb_queue_tail(&l->defer_namedq, skb);
> +                       return true;
> +               }
>                 skb_queue_tail(l->namedq, skb);
>                 return true;
>         case MSG_BUNDLER:
> diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
> index 5feaf3b67380..419b3f0f102d 100644
> --- a/net/tipc/name_distr.c
> +++ b/net/tipc/name_distr.c
> @@ -180,6 +180,14 @@ static void named_distribute(struct net *net, struct 
> sk_buff_head *list,
>                 skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem));
>                 __skb_queue_tail(list, skb);
>         }
> +
> +       /* Allocate dummy message in order to synchronize w/bcast */
> +       skb = named_prepare_buf(net, PUBLICATION, 0, dnode);
> +       if (skb) {
> +               /* Preparing for 'synching' header */
> +               msg_set_syn(buf_msg(skb), 1);
> +               __skb_queue_tail(list, skb);
> +       }
>  }
> 
>  /**
> diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
> index 66a65c2cdb23..4ba6d73e5c4c 100644
> --- a/net/tipc/name_table.c
> +++ b/net/tipc/name_table.c
> @@ -632,8 +632,10 @@ struct publication *tipc_nametbl_publish(struct net 
> *net, u32 type, u32 lower,
>  exit:
>         spin_unlock_bh(&tn->nametbl_lock);
> 
> -       if (skb)
> -               tipc_node_broadcast(net, skb);
> +       if (skb) {
> +               msg_set_is_rcast(buf_msg(skb), true);
> +               tipc_bcast_named_publish(net, skb);
> +       }
>         return p;
>  }
> 
> @@ -664,7 +666,8 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 
> lower,
>         spin_unlock_bh(&tn->nametbl_lock);
> 
>         if (skb) {
> -               tipc_node_broadcast(net, skb);
> +               msg_set_is_rcast(buf_msg(skb), true);
> +               tipc_bcast_named_publish(net, skb);
>                 return 1;
>         }
>         return 0;
> --
> 2.20.1
> 
> -----Original Message-----
> From: Jon Maloy <jon.ma...@ericsson.com>
> Sent: Wednesday, November 13, 2019 7:02 PM
> To: Hoang Huu Le <hoang.h...@dektech.com.au>; ma...@donjonn.com; tipc-
> discuss...@lists.sourceforge.net
> Cc: 'Ying Xue' <ying....@windriver.com>
> Subject: RE: [net-next] tipc: update a binding service via broadcast
> 
> Hi Hoang,
> This is good, but you have missed the point about the synchronization problem 
> I have been talking about.
> 
> 1) A new node comes up
> 2) The "bulk" binding table update is sent, as a series of packets over the 
> new unicast link. This may take
> some time.
> 3) The owner of one of the bindings in the bulk (on this node) does unbind.
> 4) This is sent as broadcast withdraw to all nodes, and arrives before the 
> last packets of the unicast bulk to
> the newly connected node.
> 5) Since there is no corresponding publication in the peer node's binding 
> table yet, the withdraw is ignored.
> 6) The last bulk unicasts arrive at the new peer, and the now invalid 
> publication is added to its binding
> table.
> 7) This publication will stay there forever.
> 
> We need to find a way to synchronize so that we know that all the bulk 
> publications are in place in the
> binding table before any broadcast publications/withdraws can be accepted.
> Obviously, we could create a backlog queue in the name table, but I hope we 
> can find a simpler and neater
> solution.
> 
> Regards
> ///jon
> 
> > -----Original Message-----
> > From: Hoang Le <hoang.h...@dektech.com.au>
> > Sent: 13-Nov-19 02:35
> > To: Jon Maloy <jon.ma...@ericsson.com>; ma...@donjonn.com; 
> > tipc-discussion@lists.sourceforge.net
> > Subject: [net-next] tipc: update a binding service via broadcast
> >
> > Currently, updating binding table (add service binding to
> > name table/withdraw a service binding) is being sent over replicast.
> > However, if we are scaling up clusters to > 100 nodes/containers this
> > method is less affection because of looping through nodes in a cluster one
> > by one.
> >
> > It is worth to use broadcast to update a binding service. Then binding
> > table updates in all nodes for one shot.
> >
> > The mechanism is backward compatible because of sending side changing.
> >
> > Signed-off-by: Hoang Le <hoang.h...@dektech.com.au>
> > ---
> >  net/tipc/bcast.c      | 13 +++++++++++++
> >  net/tipc/bcast.h      |  2 ++
> >  net/tipc/name_table.c |  4 ++--
> >  3 files changed, 17 insertions(+), 2 deletions(-)
> >
> > diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
> > index f41096a759fa..18431fa897ab 100644
> > --- a/net/tipc/bcast.c
> > +++ b/net/tipc/bcast.c
> > @@ -843,3 +843,16 @@ void tipc_mcast_filter_msg(struct net *net, struct 
> > sk_buff_head *defq,
> >             __skb_queue_tail(inputq, _skb);
> >     }
> >  }
> > +
> > +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb)
> > +{
> > +   struct sk_buff_head xmitq;
> > +   u16 cong_link_cnt;
> > +   int rc = 0;
> > +
> > +   __skb_queue_head_init(&xmitq);
> > +   __skb_queue_tail(&xmitq, skb);
> > +   rc = tipc_bcast_xmit(net, &xmitq, &cong_link_cnt);
> > +   __skb_queue_purge(&xmitq);
> > +   return rc;
> > +}
> > diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
> > index dadad953e2be..a100da3800fc 100644
> > --- a/net/tipc/bcast.h
> > +++ b/net/tipc/bcast.h
> > @@ -101,6 +101,8 @@ int tipc_bclink_reset_stats(struct net *net);
> >  u32 tipc_bcast_get_broadcast_mode(struct net *net);
> >  u32 tipc_bcast_get_broadcast_ratio(struct net *net);
> >
> > +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb);
> > +
> >  void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
> >                        struct sk_buff_head *inputq);
> >
> > diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
> > index 66a65c2cdb23..9e9c61f7c999 100644
> > --- a/net/tipc/name_table.c
> > +++ b/net/tipc/name_table.c
> > @@ -633,7 +633,7 @@ struct publication *tipc_nametbl_publish(struct net 
> > *net, u32 type, u32 lower,
> >     spin_unlock_bh(&tn->nametbl_lock);
> >
> >     if (skb)
> > -           tipc_node_broadcast(net, skb);
> > +           tipc_bcast_named_publish(net, skb);
> >     return p;
> >  }
> >
> > @@ -664,7 +664,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, 
> > u32 lower,
> >     spin_unlock_bh(&tn->nametbl_lock);
> >
> >     if (skb) {
> > -           tipc_node_broadcast(net, skb);
> > +           tipc_bcast_named_publish(net, skb);
> >             return 1;
> >     }
> >     return 0;
> > --
> > 2.20.1
> 


_______________________________________________
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion

Reply via email to