Thanks for looking over this. I agree with you that simply changing it to take a write lock would be simpler, but let me explain my thinking.
* Any thread that calls xlate_actions() could account stats for a bond. This could be handlers or revalidators. * Handler threads account the first set of stats for a packet. (This happens any time we see a new flow, including after flow eviction) * Revalidator threads account for subsequent stats. (This happens regularly, and may involve multiple threads for the same bond) * The bond rwlock, if used to protect statistics, stops all progress for these threads when accounting. * Stats are something that are changed frequently, so giving these a more granular lock may provide better parallelism. Looking closer, there are two things that this patch achieves: (1) Allow statistics to be attributed to two different bonds at the same time. (2) Reduce the critical section for threads that attribute stats to the same bond. I think that (1) is only likely to make a large difference in configurations with several bonds. AFAIK this is not common. (2) is more relevant, although I haven't actually observed this to be a problem. You'd have more idea whether this is a bottleneck than me. What do you think? On 14 April 2014 19:07, Andy Zhou <az...@nicira.com> wrote: > It could be I am still missing something here. It seems most of the > writes are protected by the write lock, with the exception > of bond_recirculation_account(), which is the main issue this patch > fixes. So would it be simpler to just make it acquire the write lock > instead of read lock? Unless it is changed recently, my understanding > is that only the run loops that runs in the main thread will access > the bond > stats. > > On Sun, Apr 13, 2014 at 10:30 PM, Joe Stringer <j...@wand.net.nz> wrote: > > From: Joe Stringer <joestrin...@nicira.com> > > > > dcf00ba35a0 (ofproto/bond: Implement bond megaflow using recirculation) > > added bond account functions that protected the statistics with only > > readlocks. This patch adds a mutex to ensure that they are updated > > correctly, and adds threadsafety annotations to the relevant functions. > > > > Signed-off-by: Joe Stringer <joestrin...@nicira.com> > > --- > > ofproto/bond.c | 77 > ++++++++++++++++++++++++++++++++++++++++++++++++-------- > > 1 file changed, 67 insertions(+), 10 deletions(-) > > > > diff --git a/ofproto/bond.c b/ofproto/bond.c > > index 8554955..49cef0b 100644 > > --- a/ofproto/bond.c > > +++ b/ofproto/bond.c > > @@ -62,14 +62,20 @@ static struct hmap *const all_bonds > OVS_GUARDED_BY(rwlock) = &all_bonds__; > > * "struct bond" has an array of BOND_BUCKETS of these. */ > > struct bond_entry { > > struct bond_slave *slave; /* Assigned slave, NULL if unassigned. > */ > > - uint64_t tx_bytes; /* Count of bytes recently transmitted. > */ > > struct list list_node; /* In bond_slave's 'entries' list. */ > > - > > - /* Recirculation. */ > > struct rule *pr_rule; /* Post recirculation rule for this > entry.*/ > > - uint64_t pr_tx_bytes; /* Record the rule tx_bytes to figure > out > > - the delta to update the tx_bytes > entry > > - above.*/ > > + > > + /* Bond Statistics. > > + * > > + * Any reader or writer of 'tx_bytes' or 'pr_tx_bytes' must hold > 'rwlock' > > + * to prevent the bond_entry from disappearing. > > + * Readers and writers with a readlock on 'rwlock' must also acquire > > + * 'mutex' before reading or modifying the statistics. */ > > + struct ovs_mutex mutex OVS_ACQ_AFTER(rwlock); > > + uint64_t tx_bytes OVS_GUARDED; /* Count of bytes recently > transmitted.*/ > > + uint64_t pr_tx_bytes OVS_GUARDED; /* Record the rule tx_bytes to > figure out > > + the delta to update the > tx_bytes entry > > + above. */ > > }; > > > > /* A bond slave, that is, one of the links comprising a bond. */ > > @@ -151,7 +157,12 @@ struct bond_pr_rule_op { > > struct rule *pr_rule; > > }; > > > > +static void bond_entries_init(struct bond_entry **entryp) > > + OVS_REQ_WRLOCK(rwlock); > > static void bond_entry_reset(struct bond *) OVS_REQ_WRLOCK(rwlock); > > +static void bond_entries_uninit__(struct bond_entry **entryp) > > + OVS_REQ_WRLOCK(rwlock); > > +static void bond_entries_uninit(struct bond_entry **entryp); > > static struct bond_slave *bond_slave_lookup(struct bond *, const void > *slave_) > > OVS_REQ_RDLOCK(rwlock); > > static void bond_enable_slave(struct bond_slave *, bool enable) > > @@ -271,7 +282,7 @@ bond_unref(struct bond *bond) > > hmap_destroy(&bond->slaves); > > > > ovs_mutex_destroy(&bond->mutex); > > - free(bond->hash); > > + bond_entries_uninit(&bond->hash); > > free(bond->name); > > > > HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) { > > @@ -859,9 +870,11 @@ bond_entry_account(struct bond_entry *entry, > uint64_t rule_tx_bytes) > > if (entry->slave) { > > uint64_t delta; > > > > + ovs_mutex_lock(&entry->mutex); > > delta = rule_tx_bytes - entry->pr_tx_bytes; > > entry->tx_bytes += delta; > > entry->pr_tx_bytes = rule_tx_bytes; > > + ovs_mutex_unlock(&entry->mutex); > > } > > } > > > > @@ -958,6 +971,7 @@ bond_slave_from_bal_node(struct list *bal) > OVS_REQ_RDLOCK(rwlock) > > > > static void > > log_bals(struct bond *bond, const struct list *bals) > > + OVS_REQ_RDLOCK(rwlock) > > { > > if (VLOG_IS_DBG_ENABLED()) { > > struct ds ds = DS_EMPTY_INITIALIZER; > > @@ -981,8 +995,10 @@ log_bals(struct bond *bond, const struct list *bals) > > if (&e->list_node != list_front(&slave->entries)) { > > ds_put_cstr(&ds, " + "); > > } > > + ovs_mutex_lock(&e->mutex); > > ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB", > > e - bond->hash, e->tx_bytes / 1024); > > + ovs_mutex_unlock(&e->mutex); > > } > > ds_put_cstr(&ds, ")"); > > } > > @@ -995,6 +1011,7 @@ log_bals(struct bond *bond, const struct list *bals) > > /* Shifts 'hash' from its current slave to 'to'. */ > > static void > > bond_shift_load(struct bond_entry *hash, struct bond_slave *to) > > + OVS_REQ_WRLOCK(rwlock) > > { > > struct bond_slave *from = hash->slave; > > struct bond *bond = from->bond; > > @@ -1026,6 +1043,7 @@ bond_shift_load(struct bond_entry *hash, struct > bond_slave *to) > > * shift away small hashes or large hashes. */ > > static struct bond_entry * > > choose_entry_to_migrate(const struct bond_slave *from, uint64_t > to_tx_bytes) > > + OVS_REQ_WRLOCK(rwlock) > > { > > struct bond_entry *e; > > > > @@ -1567,20 +1585,59 @@ bond_init(void) > > } > > > > static void > > +bond_entries_init(struct bond_entry **entryp) > > +{ > > + int i; > > + size_t hash_len; > > + struct bond_entry *e; > > + > > + hash_len = BOND_BUCKETS * sizeof **entryp; > > + *entryp = e = xmalloc(hash_len); > > + for (i = 0; i < BOND_BUCKETS; i++) { > > + ovs_mutex_init(&e[i].mutex); > > + } > > +} > > + > > +static void > > +bond_entries_uninit__(struct bond_entry **entryp) > > +{ > > + int i; > > + struct bond_entry *e; > > + > > + if (!*entryp) { > > + return; > > + } > > + > > + e = *entryp; > > + for (i = 0; i < BOND_BUCKETS; i++) { > > + ovs_mutex_destroy(&e[i].mutex); > > + } > > + free(*entryp); > > + *entryp = NULL; > > +} > > + > > +static void > > +bond_entries_uninit(struct bond_entry **entryp) > > +{ > > + ovs_rwlock_wrlock(&rwlock); > > + bond_entries_uninit__(entryp); > > + ovs_rwlock_unlock(&rwlock); > > +} > > + > > +static void > > bond_entry_reset(struct bond *bond) > > { > > if (bond->balance != BM_AB) { > > size_t hash_len = BOND_BUCKETS * sizeof *bond->hash; > > > > if (!bond->hash) { > > - bond->hash = xmalloc(hash_len); > > + bond_entries_init(&bond->hash); > > } > > memset(bond->hash, 0, hash_len); > > > > bond->next_rebalance = time_msec() + bond->rebalance_interval; > > } else { > > - free(bond->hash); > > - bond->hash = NULL; > > + bond_entries_uninit__(&bond->hash); > > } > > } > > > > -- > > 1.7.10.4 > > >
_______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev