Hi Hoang,
You are complicating this too much. The 'syn' and 'rcast' bits should remain
internal for the bcast.c rcast/bcast code, and we should not mess with those
elsewhere.
Instead, you should add a bit in the NAME_DISTR header that marks if the
message is bcast/rcast or bulk. For compatibility, this bit should be inverted,
so that 1 means bcast/rcast ('not_bulk' or similar) and 0 means 'bulk'. Then,
you add another bit, in bulk messages only, meaning 'not_last'.
The last bulk message has this bit set to zero, and at that moment you can set
the link->named_sync and deliver the deferred queue.
As long as the link_named_sync bit is not set, you sort the messages as follows:
- Incoming !not_bulk/not_last messages go to the head of the named_deferred
queue.
- Incoming not_bulk messages go to the tail of the named_deferred queue,
irrespective of the not_last bit (which should be always zero anyway)
- Incoming !not_bulk/!not_last message is sorted in after the last
!not_bulk/not_last message in the queue, and named_sync is set.
Because all old messages will be !not_bulk/!not_last (0/0) those will never go
into the deferred queue, and named_sync will be set from the start, so we have
guaranteed backwards compatibility.
I leave it to you to find better name for those bits.
I think that should solve our problem in a reasonably simple way.
BR
///jon
> -----Original Message-----
> From: Hoang Le <[email protected]>
> Sent: 14-Nov-19 03:09
> To: Jon Maloy <[email protected]>; [email protected];
> [email protected]
> Cc: 'Ying Xue' <[email protected]>
> Subject: RE: [net-next] tipc: update a binding service via broadcast
>
> Hi Jon,
>
> Please take a look at v2. The mechanism looks the same as I did before in
> commit:
> c55c8edafa91 ("tipc: smooth change between replicast and broadcast")
> However, in this case we handle only one direction: replicast -> broadcast.
> Then, it is still backward compatible.
>
> [...]
> From ae2ee6a7064de3ec1dc2c7df2db241d22b0d129f Mon Sep 17 00:00:00 2001
> From: Hoang Le <[email protected]>
> Date: Wed, 13 Nov 2019 14:01:03 +0700
> Subject: [PATCH] tipc: update a binding service via broadcast
>
> Currently, updating binding table (add service binding to
> name table/withdraw a service binding) is being sent over replicast.
> However, if we are scaling up clusters to > 100 nodes/containers this
> method is less affection because of looping through nodes in a cluster one
> by one.
>
> It is worth to use broadcast to update a binding service. Then binding
> table updates in all nodes for one shot.
>
> The mechanism is backward compatible because of sending side changing.
>
> v2: resolve synchronization problem when switching from unicast to
> broadcast
>
> Signed-off-by: Hoang Le <[email protected]>
> ---
> net/tipc/bcast.c | 13 +++++++++++++
> net/tipc/bcast.h | 2 ++
> net/tipc/link.c | 16 ++++++++++++++++
> net/tipc/name_distr.c | 8 ++++++++
> net/tipc/name_table.c | 9 ++++++---
> 5 files changed, 45 insertions(+), 3 deletions(-)
>
> diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
> index f41096a759fa..18431fa897ab 100644
> --- a/net/tipc/bcast.c
> +++ b/net/tipc/bcast.c
> @@ -843,3 +843,16 @@ void tipc_mcast_filter_msg(struct net *net, struct
> sk_buff_head *defq,
> __skb_queue_tail(inputq, _skb);
> }
> }
> +
> +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb)
> +{
> + struct sk_buff_head xmitq;
> + u16 cong_link_cnt;
> + int rc = 0;
> +
> + __skb_queue_head_init(&xmitq);
> + __skb_queue_tail(&xmitq, skb);
> + rc = tipc_bcast_xmit(net, &xmitq, &cong_link_cnt);
> + __skb_queue_purge(&xmitq);
> + return rc;
> +}
> diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
> index dadad953e2be..a100da3800fc 100644
> --- a/net/tipc/bcast.h
> +++ b/net/tipc/bcast.h
> @@ -101,6 +101,8 @@ int tipc_bclink_reset_stats(struct net *net);
> u32 tipc_bcast_get_broadcast_mode(struct net *net);
> u32 tipc_bcast_get_broadcast_ratio(struct net *net);
>
> +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb);
> +
> void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
> struct sk_buff_head *inputq);
>
> diff --git a/net/tipc/link.c b/net/tipc/link.c
> index fb72031228c9..22f1854435df 100644
> --- a/net/tipc/link.c
> +++ b/net/tipc/link.c
> @@ -187,6 +187,9 @@ struct tipc_link {
> struct tipc_link *bc_sndlink;
> u8 nack_state;
> bool bc_peer_is_up;
> + bool named_sync;
> + struct sk_buff_head defer_namedq;
> +
>
> /* Statistics */
> struct tipc_stats stats;
> @@ -363,6 +366,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
> trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!");
> tipc_link_reset(rcv_l);
> rcv_l->state = LINK_RESET;
> + rcv_l->named_sync = false;
> if (!snd_l->ackers) {
> trace_tipc_link_reset(snd_l, TIPC_DUMP_ALL, "zero ackers!");
> tipc_link_reset(snd_l);
> @@ -508,6 +512,7 @@ bool tipc_link_create(struct net *net, char *if_name, int
> bearer_id,
> __skb_queue_head_init(&l->failover_deferdq);
> skb_queue_head_init(&l->wakeupq);
> skb_queue_head_init(l->inputq);
> + __skb_queue_head_init(&l->defer_namedq);
> return true;
> }
>
> @@ -932,6 +937,8 @@ void tipc_link_reset(struct tipc_link *l)
> l->silent_intv_cnt = 0;
> l->rst_cnt = 0;
> l->bc_peer_is_up = false;
> + l->named_sync = false;
> + __skb_queue_purge(&l->defer_namedq);
> memset(&l->mon_state, 0, sizeof(l->mon_state));
> tipc_link_reset_stats(l);
> }
> @@ -1210,6 +1217,15 @@ static bool tipc_data_input(struct tipc_link *l,
> struct sk_buff *skb,
> return true;
> case NAME_DISTRIBUTOR:
> l->bc_rcvlink->state = LINK_ESTABLISHED;
> + if (msg_is_syn(hdr)) {
> + l->bc_rcvlink->named_sync = true;
> + skb_queue_splice_tail_init(&l->defer_namedq,
> l->namedq);
I think you would leak the skb here. It is not added to the queue and not
deleted.
> + return true;
> + }
> + if (msg_is_rcast(hdr) && !l->bc_rcvlink->named_sync) {
> + skb_queue_tail(&l->defer_namedq, skb);
> + return true;
> + }
> skb_queue_tail(l->namedq, skb);
> return true;
> case MSG_BUNDLER:
> diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
> index 5feaf3b67380..419b3f0f102d 100644
> --- a/net/tipc/name_distr.c
> +++ b/net/tipc/name_distr.c
> @@ -180,6 +180,14 @@ static void named_distribute(struct net *net, struct
> sk_buff_head *list,
> skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem));
> __skb_queue_tail(list, skb);
> }
> +
> + /* Allocate dummy message in order to synchronize w/bcast */
> + skb = named_prepare_buf(net, PUBLICATION, 0, dnode);
> + if (skb) {
> + /* Preparing for 'synching' header */
> + msg_set_syn(buf_msg(skb), 1);
> + __skb_queue_tail(list, skb);
> + }
> }
>
> /**
> diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
> index 66a65c2cdb23..4ba6d73e5c4c 100644
> --- a/net/tipc/name_table.c
> +++ b/net/tipc/name_table.c
> @@ -632,8 +632,10 @@ struct publication *tipc_nametbl_publish(struct net
> *net, u32 type, u32 lower,
> exit:
> spin_unlock_bh(&tn->nametbl_lock);
>
> - if (skb)
> - tipc_node_broadcast(net, skb);
> + if (skb) {
> + msg_set_is_rcast(buf_msg(skb), true);
> + tipc_bcast_named_publish(net, skb);
> + }
> return p;
> }
>
> @@ -664,7 +666,8 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32
> lower,
> spin_unlock_bh(&tn->nametbl_lock);
>
> if (skb) {
> - tipc_node_broadcast(net, skb);
> + msg_set_is_rcast(buf_msg(skb), true);
> + tipc_bcast_named_publish(net, skb);
> return 1;
> }
> return 0;
> --
> 2.20.1
>
> -----Original Message-----
> From: Jon Maloy <[email protected]>
> Sent: Wednesday, November 13, 2019 7:02 PM
> To: Hoang Huu Le <[email protected]>; [email protected]; tipc-
> [email protected]
> Cc: 'Ying Xue' <[email protected]>
> Subject: RE: [net-next] tipc: update a binding service via broadcast
>
> Hi Hoang,
> This is good, but you have missed the point about the synchronization problem
> I have been talking about.
>
> 1) A new node comes up
> 2) The "bulk" binding table update is sent, as a series of packets over the
> new unicast link. This may take
> some time.
> 3) The owner of one of the bindings in the bulk (on this node) does unbind.
> 4) This is sent as broadcast withdraw to all nodes, and arrives before the
> last packets of the unicast bulk to
> the newly connected node.
> 5) Since there is no corresponding publication in the peer node's binding
> table yet, the withdraw is ignored.
> 6) The last bulk unicasts arrive at the new peer, and the now invalid
> publication is added to its binding
> table.
> 7) This publication will stay there forever.
>
> We need to find a way to synchronize so that we know that all the bulk
> publications are in place in the
> binding table before any broadcast publications/withdraws can be accepted.
> Obviously, we could create a backlog queue in the name table, but I hope we
> can find a simpler and neater
> solution.
>
> Regards
> ///jon
>
> > -----Original Message-----
> > From: Hoang Le <[email protected]>
> > Sent: 13-Nov-19 02:35
> > To: Jon Maloy <[email protected]>; [email protected];
> > [email protected]
> > Subject: [net-next] tipc: update a binding service via broadcast
> >
> > Currently, updating binding table (add service binding to
> > name table/withdraw a service binding) is being sent over replicast.
> > However, if we are scaling up clusters to > 100 nodes/containers this
> > method is less affection because of looping through nodes in a cluster one
> > by one.
> >
> > It is worth to use broadcast to update a binding service. Then binding
> > table updates in all nodes for one shot.
> >
> > The mechanism is backward compatible because of sending side changing.
> >
> > Signed-off-by: Hoang Le <[email protected]>
> > ---
> > net/tipc/bcast.c | 13 +++++++++++++
> > net/tipc/bcast.h | 2 ++
> > net/tipc/name_table.c | 4 ++--
> > 3 files changed, 17 insertions(+), 2 deletions(-)
> >
> > diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
> > index f41096a759fa..18431fa897ab 100644
> > --- a/net/tipc/bcast.c
> > +++ b/net/tipc/bcast.c
> > @@ -843,3 +843,16 @@ void tipc_mcast_filter_msg(struct net *net, struct
> > sk_buff_head *defq,
> > __skb_queue_tail(inputq, _skb);
> > }
> > }
> > +
> > +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb)
> > +{
> > + struct sk_buff_head xmitq;
> > + u16 cong_link_cnt;
> > + int rc = 0;
> > +
> > + __skb_queue_head_init(&xmitq);
> > + __skb_queue_tail(&xmitq, skb);
> > + rc = tipc_bcast_xmit(net, &xmitq, &cong_link_cnt);
> > + __skb_queue_purge(&xmitq);
> > + return rc;
> > +}
> > diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
> > index dadad953e2be..a100da3800fc 100644
> > --- a/net/tipc/bcast.h
> > +++ b/net/tipc/bcast.h
> > @@ -101,6 +101,8 @@ int tipc_bclink_reset_stats(struct net *net);
> > u32 tipc_bcast_get_broadcast_mode(struct net *net);
> > u32 tipc_bcast_get_broadcast_ratio(struct net *net);
> >
> > +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb);
> > +
> > void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
> > struct sk_buff_head *inputq);
> >
> > diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
> > index 66a65c2cdb23..9e9c61f7c999 100644
> > --- a/net/tipc/name_table.c
> > +++ b/net/tipc/name_table.c
> > @@ -633,7 +633,7 @@ struct publication *tipc_nametbl_publish(struct net
> > *net, u32 type, u32 lower,
> > spin_unlock_bh(&tn->nametbl_lock);
> >
> > if (skb)
> > - tipc_node_broadcast(net, skb);
> > + tipc_bcast_named_publish(net, skb);
> > return p;
> > }
> >
> > @@ -664,7 +664,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type,
> > u32 lower,
> > spin_unlock_bh(&tn->nametbl_lock);
> >
> > if (skb) {
> > - tipc_node_broadcast(net, skb);
> > + tipc_bcast_named_publish(net, skb);
> > return 1;
> > }
> > return 0;
> > --
> > 2.20.1
>
_______________________________________________
tipc-discussion mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/tipc-discussion