Thanks for looking over this. I agree with you that simply changing it to
take a write lock would be simpler, but let me explain my thinking.

* Any thread that calls xlate_actions() could account stats for a bond.
This could be handlers or revalidators.
* Handler threads account the first set of stats for a packet. (This
happens any time we see a new flow, including after flow eviction)
* Revalidator threads account for subsequent stats. (This happens
regularly, and may involve multiple threads for the same bond)
* The bond rwlock, if used to protect statistics, stops all progress for
these threads when accounting.
* Stats are something that are changed frequently, so giving these a more
granular lock may provide better parallelism.

Looking closer, there are two things that this patch achieves:
(1) Allow statistics to be attributed to two different bonds at the same
time.
(2) Reduce the critical section for threads that attribute stats to the
same bond.

I think that (1) is only likely to make a large difference in
configurations with several bonds. AFAIK this is not common.
(2) is more relevant, although I haven't actually observed this to be a
problem. You'd have more idea whether this is a bottleneck than me.

What do you think?

On 14 April 2014 19:07, Andy Zhou <az...@nicira.com> wrote:

> It could be I am still missing something here.  It seems most of the
> writes are protected by the write lock, with the exception
> of bond_recirculation_account(), which is the main issue this patch
> fixes. So would it be simpler to just make it acquire the write lock
> instead of read lock?  Unless it is changed recently, my understanding
> is that only the run loops that runs in the main thread will access
> the bond
> stats.
>
> On Sun, Apr 13, 2014 at 10:30 PM, Joe Stringer <j...@wand.net.nz> wrote:
> > From: Joe Stringer <joestrin...@nicira.com>
> >
> > dcf00ba35a0 (ofproto/bond: Implement bond megaflow using recirculation)
> > added bond account functions that protected the statistics with only
> > readlocks. This patch adds a mutex to ensure that they are updated
> > correctly, and adds threadsafety annotations to the relevant functions.
> >
> > Signed-off-by: Joe Stringer <joestrin...@nicira.com>
> > ---
> >  ofproto/bond.c |   77
> ++++++++++++++++++++++++++++++++++++++++++++++++--------
> >  1 file changed, 67 insertions(+), 10 deletions(-)
> >
> > diff --git a/ofproto/bond.c b/ofproto/bond.c
> > index 8554955..49cef0b 100644
> > --- a/ofproto/bond.c
> > +++ b/ofproto/bond.c
> > @@ -62,14 +62,20 @@ static struct hmap *const all_bonds
> OVS_GUARDED_BY(rwlock) = &all_bonds__;
> >   * "struct bond" has an array of BOND_BUCKETS of these. */
> >  struct bond_entry {
> >      struct bond_slave *slave;   /* Assigned slave, NULL if unassigned.
> */
> > -    uint64_t tx_bytes;          /* Count of bytes recently transmitted.
> */
> >      struct list list_node;      /* In bond_slave's 'entries' list. */
> > -
> > -    /* Recirculation. */
> >      struct rule *pr_rule;       /* Post recirculation rule for this
> entry.*/
> > -    uint64_t pr_tx_bytes;       /* Record the rule tx_bytes to figure
> out
> > -                                   the delta to update the tx_bytes
> entry
> > -                                   above.*/
> > +
> > +    /* Bond Statistics.
> > +     *
> > +     * Any reader or writer of 'tx_bytes' or 'pr_tx_bytes' must hold
> 'rwlock'
> > +     * to prevent the bond_entry from disappearing.
> > +     * Readers and writers with a readlock on 'rwlock' must also acquire
> > +     * 'mutex' before reading or modifying the statistics. */
> > +    struct ovs_mutex mutex OVS_ACQ_AFTER(rwlock);
> > +    uint64_t tx_bytes OVS_GUARDED;    /* Count of bytes recently
> transmitted.*/
> > +    uint64_t pr_tx_bytes OVS_GUARDED; /* Record the rule tx_bytes to
> figure out
> > +                                         the delta to update the
> tx_bytes entry
> > +                                         above. */
> >  };
> >
> >  /* A bond slave, that is, one of the links comprising a bond. */
> > @@ -151,7 +157,12 @@ struct bond_pr_rule_op {
> >      struct rule *pr_rule;
> >  };
> >
> > +static void bond_entries_init(struct bond_entry **entryp)
> > +    OVS_REQ_WRLOCK(rwlock);
> >  static void bond_entry_reset(struct bond *) OVS_REQ_WRLOCK(rwlock);
> > +static void bond_entries_uninit__(struct bond_entry **entryp)
> > +    OVS_REQ_WRLOCK(rwlock);
> > +static void bond_entries_uninit(struct bond_entry **entryp);
> >  static struct bond_slave *bond_slave_lookup(struct bond *, const void
> *slave_)
> >      OVS_REQ_RDLOCK(rwlock);
> >  static void bond_enable_slave(struct bond_slave *, bool enable)
> > @@ -271,7 +282,7 @@ bond_unref(struct bond *bond)
> >      hmap_destroy(&bond->slaves);
> >
> >      ovs_mutex_destroy(&bond->mutex);
> > -    free(bond->hash);
> > +    bond_entries_uninit(&bond->hash);
> >      free(bond->name);
> >
> >      HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) {
> > @@ -859,9 +870,11 @@ bond_entry_account(struct bond_entry *entry,
> uint64_t rule_tx_bytes)
> >      if (entry->slave) {
> >          uint64_t delta;
> >
> > +        ovs_mutex_lock(&entry->mutex);
> >          delta = rule_tx_bytes - entry->pr_tx_bytes;
> >          entry->tx_bytes += delta;
> >          entry->pr_tx_bytes = rule_tx_bytes;
> > +        ovs_mutex_unlock(&entry->mutex);
> >      }
> >  }
> >
> > @@ -958,6 +971,7 @@ bond_slave_from_bal_node(struct list *bal)
> OVS_REQ_RDLOCK(rwlock)
> >
> >  static void
> >  log_bals(struct bond *bond, const struct list *bals)
> > +    OVS_REQ_RDLOCK(rwlock)
> >  {
> >      if (VLOG_IS_DBG_ENABLED()) {
> >          struct ds ds = DS_EMPTY_INITIALIZER;
> > @@ -981,8 +995,10 @@ log_bals(struct bond *bond, const struct list *bals)
> >                      if (&e->list_node != list_front(&slave->entries)) {
> >                          ds_put_cstr(&ds, " + ");
> >                      }
> > +                    ovs_mutex_lock(&e->mutex);
> >                      ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB",
> >                                    e - bond->hash, e->tx_bytes / 1024);
> > +                    ovs_mutex_unlock(&e->mutex);
> >                  }
> >                  ds_put_cstr(&ds, ")");
> >              }
> > @@ -995,6 +1011,7 @@ log_bals(struct bond *bond, const struct list *bals)
> >  /* Shifts 'hash' from its current slave to 'to'. */
> >  static void
> >  bond_shift_load(struct bond_entry *hash, struct bond_slave *to)
> > +    OVS_REQ_WRLOCK(rwlock)
> >  {
> >      struct bond_slave *from = hash->slave;
> >      struct bond *bond = from->bond;
> > @@ -1026,6 +1043,7 @@ bond_shift_load(struct bond_entry *hash, struct
> bond_slave *to)
> >   * shift away small hashes or large hashes. */
> >  static struct bond_entry *
> >  choose_entry_to_migrate(const struct bond_slave *from, uint64_t
> to_tx_bytes)
> > +    OVS_REQ_WRLOCK(rwlock)
> >  {
> >      struct bond_entry *e;
> >
> > @@ -1567,20 +1585,59 @@ bond_init(void)
> >  }
> >
> >  static void
> > +bond_entries_init(struct bond_entry **entryp)
> > +{
> > +    int i;
> > +    size_t hash_len;
> > +    struct bond_entry *e;
> > +
> > +    hash_len = BOND_BUCKETS * sizeof **entryp;
> > +    *entryp = e = xmalloc(hash_len);
> > +    for (i = 0; i < BOND_BUCKETS; i++) {
> > +        ovs_mutex_init(&e[i].mutex);
> > +    }
> > +}
> > +
> > +static void
> > +bond_entries_uninit__(struct bond_entry **entryp)
> > +{
> > +    int i;
> > +    struct bond_entry *e;
> > +
> > +    if (!*entryp) {
> > +        return;
> > +    }
> > +
> > +    e = *entryp;
> > +    for (i = 0; i < BOND_BUCKETS; i++) {
> > +        ovs_mutex_destroy(&e[i].mutex);
> > +    }
> > +    free(*entryp);
> > +    *entryp = NULL;
> > +}
> > +
> > +static void
> > +bond_entries_uninit(struct bond_entry **entryp)
> > +{
> > +    ovs_rwlock_wrlock(&rwlock);
> > +    bond_entries_uninit__(entryp);
> > +    ovs_rwlock_unlock(&rwlock);
> > +}
> > +
> > +static void
> >  bond_entry_reset(struct bond *bond)
> >  {
> >      if (bond->balance != BM_AB) {
> >          size_t hash_len = BOND_BUCKETS * sizeof *bond->hash;
> >
> >          if (!bond->hash) {
> > -            bond->hash = xmalloc(hash_len);
> > +            bond_entries_init(&bond->hash);
> >          }
> >          memset(bond->hash, 0, hash_len);
> >
> >          bond->next_rebalance = time_msec() + bond->rebalance_interval;
> >      } else {
> > -        free(bond->hash);
> > -        bond->hash = NULL;
> > +        bond_entries_uninit__(&bond->hash);
> >      }
> >  }
> >
> > --
> > 1.7.10.4
> >
>
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to