> > [Ying] In theory it's not very good for us to use write lock on > > receive path. At least it indicates our locking policy is not designed > > very well. In the following code, we need to hold write lock because > > we have to update " state->peer_gen". Is it possible to not update > > state->peer_gen on receive path? Or is it possible to use other approach to > avoid the update? If so, I think we can make " mon->lock " > > performance better. For example, mon->lock is used to only protect > > mon- > > >peers[] list instead of protecting peers themselves, which will help > > >us get a fine- > > grained locking of mon->lock. Meanwhile, we can add a new lock in peer > > struct to protect peer's members. > > I considered using an RCU lock, and I considered Thomas Graph's rhashtable, > but both proved unfit because of the strong requirement on consistency > between list structure and the contents of each individual peer. > This is the same thing. There is no external api call > (add/remove/up/down/rcv) that does not affect several peers > simultaneously, and are absolutely dependent on having a consistent list > during their execution. > > To put it otherwise: the list is not just a collection of peers; where each > peer > can be changed individually in parallel with other peers. The order and > number of peers in the list is part of the algorithm, and is, along with the > content of other peers, essential for generating correct content in each > peer. > > But I also suspect you misunderstand the working of peer_gen. > As long as the peer domain (generation) has not changed, we never need to > grab the write lock, i.e., 99.99% of the time. And when it has changed, we do > a lot more than just updating peer_gen. We need to update the directly > affected peer, but also the "head_map" of its domain members, and that > while being certain that nobody else is modifying the list or its member peers > at the same time. So we still need to grab the write lock, as I see it. > > If you see something I don't see here, please explain it better.
Thanks for explanation. You are right, so the current locking policy is reasonable for us. Regards, Ying > > BR > ///jon > > > > > > + peer = get_peer(mon, addr); > > + if (!peer) > > + goto exit; > > + if (!more(ndgen, state->peer_gen)) > > + goto exit; > > + state->peer_gen = ndgen; > > + if (!peer->is_up) > > + goto exit; > > + > > + /* Transform and store received domain record */ > > + dom = peer->domain; > > + if (!dom || (dom->len < ndlen)) { > > + kfree(dom); > > + dom = kmalloc(ndlen, GFP_ATOMIC); > > + peer->domain = dom; > > + if (!dom) > > + goto exit; > > + } > > + dom->len = ndlen; > > + dom->gen = ndgen; > > + dom->member_cnt = nmember_cnt; > > + dom->up_map = be64_to_cpu(ndom->up_map); > > + for (i = 0; i < nmember_cnt; i++) > > + dom->members[i] = ntohl(ndom->members[i]); > > + > > + /* Update peers affected by this domain record */ > > + mon_match_domain(mon, peer); > > + peer->confirm = 0; > > + mon_assign_roles(mon, peer_head(peer)); > > +exit: > > + write_unlock_bh(&mon->lock); > > +} > > + > > +void tipc_mon_prep(struct net *net, void *data, int *dlen, > > + struct tipc_mon_state *state, int bearer_id) { > > + struct tipc_net *tn = tipc_net(net); > > + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); > > + struct tipc_mon_domain *dom = data; > > + u16 gen = state->gen; > > + > > + if (mon->peer_cnt <= tn->mon_threshold) { > > + *dlen = 0; > > + return; > > + } > > + if (!less(state->ack_gen, gen) || mon->disabled) { > > + *dlen = dom_rec_len(dom, 0); > > + dom->len = htons(dom_rec_len(dom, 0)); > > + dom->gen = htons(gen); > > + dom->ack_gen = htons(state->peer_gen); > > + dom->member_cnt = 0; > > + return; > > + } > > + read_lock_bh(&mon->lock); > > + *dlen = ntohs(mon->cache.len); > > + memcpy(data, &mon->cache, *dlen); > > + read_unlock_bh(&mon->lock); > > + dom->ack_gen = htons(state->peer_gen); } > > + > > +bool tipc_mon_is_probed(struct net *net, u32 addr, > > + struct tipc_mon_state *state, > > + int bearer_id) > > +{ > > + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); > > + struct tipc_peer *peer; > > + > > + if (mon->disabled) > > + return false; > > + > > + if (!state->probed && > > + !less(state->list_gen, mon->list_gen) && > > + !less(state->ack_gen, state->gen)) > > + return false; > > + > > + read_lock_bh(&mon->lock); > > + peer = get_peer(mon, addr); > > + if (peer) { > > + state->probed = less(state->gen, mon->dom_gen); > > + state->probed |= less(state->ack_gen, state->gen); > > + state->probed |= peer->confirm; > > + peer->confirm = 0; > > + state->monitored = peer->is_local; > > + state->monitored |= peer->is_head; > > + state->monitored |= !peer->head_map; > > + state->list_gen = mon->list_gen; > > + state->gen = mon->dom_gen; > > + } > > + read_unlock_bh(&mon->lock); > > + return state->probed || state->monitored; } > > + > > +int tipc_mon_create(struct net *net, int bearer_id) { > > + struct tipc_net *tn = tipc_net(net); > > + struct tipc_monitor *mon; > > + struct tipc_peer *self; > > + struct tipc_mon_domain *dom; > > + > > + if (tn->monitors[bearer_id]) > > + return 0; > > + > > + mon = kzalloc(sizeof(*mon), GFP_ATOMIC); > > + self = kzalloc(sizeof(*self), GFP_ATOMIC); > > + dom = kzalloc(sizeof(*dom), GFP_ATOMIC); > > + if (!mon || !self || !dom) { > > + kfree(mon); > > + kfree(self); > > + kfree(dom); > > + return -ENOMEM; > > + } > > + tn->monitors[bearer_id] = mon; > > + rwlock_init(&mon->lock); > > + mon->net = net; > > + mon->peer_cnt = 1; > > + mon->self = self; > > + self->domain = dom; > > + self->addr = tipc_own_addr(net); > > + self->is_up = true; > > + self->is_head = true; > > + INIT_LIST_HEAD(&self->list); > > + return 0; > > +} > > + > > +void tipc_mon_disable(struct net *net, int bearer_id) { > > + tipc_monitor(net, bearer_id)->disabled = true; } > > + > > +void tipc_mon_delete(struct net *net, int bearer_id) { > > + struct tipc_net *tn = tipc_net(net); > > + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); > > + struct tipc_peer *self = get_self(net, bearer_id); > > + struct tipc_peer *peer, *tmp; > > + > > + write_lock_bh(&mon->lock); > > + tn->monitors[bearer_id] = NULL; > > > > [Ying] Set tn->monitors[bearer_id] = NULL; > > > > + list_for_each_entry_safe(peer, tmp, &self->list, list) { > > + list_del(&peer->list); > > + hlist_del(&peer->hash); > > + kfree(peer->domain); > > + kfree(peer); > > + } > > + kfree(self->domain); > > + kfree(self); > > + write_unlock_bh(&mon->lock); > > + tn->monitors[bearer_id] = NULL; > > > > [Ying] Repeatedly set tn->monitors[bearer_id] to NULL again! > > > > Regards, > > Ying > > > > + kfree(mon); > > +} > > diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h new file mode > > 100644 index > > 0000000..7a25541 > > --- /dev/null > > +++ b/net/tipc/monitor.h > > @@ -0,0 +1,72 @@ > > +/* > > + * net/tipc/monitor.h > > + * > > + * Copyright (c) 2015, Ericsson AB > > + * All rights reserved. > > + * > > + * Redistribution and use in source and binary forms, with or without > > + * modification, are permitted provided that the following conditions are > met: > > + * > > + * 1. Redistributions of source code must retain the above copyright > > + * notice, this list of conditions and the following disclaimer. > > + * 2. Redistributions in binary form must reproduce the above copyright > > + * notice, this list of conditions and the following disclaimer in the > > + * documentation and/or other materials provided with the distribution. > > + * 3. Neither the names of the copyright holders nor the names of its > > + * contributors may be used to endorse or promote products derived > from > > + * this software without specific prior written permission. > > + * > > + * Alternatively, this software may be distributed under the terms of > > +the > > + * GNU General Public License ("GPL") version 2 as published by the > > +Free > > + * Software Foundation. > > + * > > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND > > CONTRIBUTORS "AS IS" > > + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > LIMITED > > +TO, THE > > + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A > > PARTICULAR > > +PURPOSE > > + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR > > +CONTRIBUTORS BE > > + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, > > +OR > > + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, > > PROCUREMENT OF > > + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR > > +BUSINESS > > + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, > > WHETHER > > +IN > > + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR > > +OTHERWISE) > > + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF > > +ADVISED OF THE > > + * POSSIBILITY OF SUCH DAMAGE. > > + */ > > + > > +#ifndef _TIPC_MONITOR_H > > +#define _TIPC_MONITOR_H > > + > > +/* struct tipc_mon_state: link instance's cache of monitor list and > > +domain state > > + * @list_gen: current generation of this node's monitor list > > + * @gen: current generation of this node's local domain > > + * @peer_gen: most recent domain generation received from peer > > + * @ack_gen: most recent generation of self's domain acked by peer > > + * @monitored: peer endpoint should continuously monitored > > + * @probed: peer endpoint should be temporarily probed for potential > > +loss */ struct tipc_mon_state { > > + u16 list_gen; > > + u16 gen; > > + u16 peer_gen; > > + u16 ack_gen; > > + bool monitored; > > + bool probed; > > +}; > > + > > +int tipc_mon_create(struct net *net, int bearer_id); void > > +tipc_mon_disable(struct net *net, int bearer_id); void > > +tipc_mon_delete(struct net *net, int bearer_id); > > + > > +void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id); void > > +tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id); void > > +tipc_mon_prep(struct net *net, void *data, int *dlen, > > + struct tipc_mon_state *state, int bearer_id); void > > +tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, > > + struct tipc_mon_state *state, int bearer_id); bool > > +tipc_mon_is_probed(struct net *net, u32 addr, > > + struct tipc_mon_state *state, > > + int bearer_id); > > +void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id); > > + > > +extern const int tipc_max_domain_size; #endif > > diff --git a/net/tipc/node.c b/net/tipc/node.c index 68d9f7b..43f2d78 > > 100644 > > --- a/net/tipc/node.c > > +++ b/net/tipc/node.c > > @@ -40,6 +40,7 @@ > > #include "name_distr.h" > > #include "socket.h" > > #include "bcast.h" > > +#include "monitor.h" > > #include "discover.h" > > #include "netlink.h" > > > > @@ -191,16 +192,6 @@ int tipc_node_get_mtu(struct net *net, u32 addr, > > u32 > > sel) > > tipc_node_put(n); > > return mtu; > > } > > -/* > > - * A trivial power-of-two bitmask technique is used for speed, since > > this > > - * operation is done for every incoming TIPC packet. The number of > > hash table > > - * entries has been chosen so that no hash chain exceeds 8 nodes and > > will > > - * usually be much smaller (typically only a single node). > > - */ > > -static unsigned int tipc_hashfn(u32 addr) -{ > > - return addr & (NODE_HTABLE_SIZE - 1); > > -} > > > > static void tipc_node_kref_release(struct kref *kref) { @@ -265,6 > > +256,7 @@ static void tipc_node_write_unlock(struct tipc_node *n) > > u32 addr = 0; > > u32 flags = n->action_flags; > > u32 link_id = 0; > > + u32 bearer_id; > > struct list_head *publ_list; > > > > if (likely(!flags)) { > > @@ -274,6 +266,7 @@ static void tipc_node_write_unlock(struct > > tipc_node *n) > > > > addr = n->addr; > > link_id = n->link_id; > > + bearer_id = link_id & 0xffff; > > publ_list = &n->publ_list; > > > > n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | > TIPC_NOTIFY_NODE_UP | > > @@ -287,13 +280,16 @@ static void tipc_node_write_unlock(struct > > tipc_node *n) > > if (flags & TIPC_NOTIFY_NODE_UP) > > tipc_named_node_up(net, addr); > > > > - if (flags & TIPC_NOTIFY_LINK_UP) > > + if (flags & TIPC_NOTIFY_LINK_UP) { > > + tipc_mon_peer_up(net, addr, bearer_id); > > tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, > > TIPC_NODE_SCOPE, link_id, addr); > > - > > - if (flags & TIPC_NOTIFY_LINK_DOWN) > > + } > > + if (flags & TIPC_NOTIFY_LINK_DOWN) { > > + tipc_mon_peer_down(net, addr, bearer_id); > > tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, > > link_id, addr); > > + } > > } > > > > struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 > > capabilities) @@ -674,6 +670,7 @@ static void > > tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) > > struct tipc_link *l = le->link; > > struct tipc_media_addr *maddr; > > struct sk_buff_head xmitq; > > + int old_bearer_id = bearer_id; > > > > if (!l) > > return; > > @@ -693,6 +690,8 @@ static void tipc_node_link_down(struct tipc_node > > *n, int bearer_id, bool delete) > > tipc_link_fsm_evt(l, LINK_RESET_EVT); > > } > > tipc_node_write_unlock(n); > > + if (delete) > > + tipc_mon_remove_peer(n->net, n->addr, old_bearer_id); > > tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); > > tipc_sk_rcv(n->net, &le->inputq); > > } > > -- > > 1.9.1 ------------------------------------------------------------------------------ Find and fix application performance issues faster with Applications Manager Applications Manager provides deep performance insights into multiple tiers of your business applications. It resolves application problems quickly and reduces your MTTR. Get your free trial! https://ad.doubleclick.net/ddm/clk/302982198;130105516;z _______________________________________________ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion