On Wed, Mar 2, 2022 at 10:50 AM Adrian Moreno <amore...@redhat.com> wrote:
>
> Thanks Mike,
>
> On 3/1/22 20:14, Mike Pattrick wrote:
> > Add additional logging for debug and info level with a focus on code
> > related to bond members coming up, go down, and changing.
> >
> > Several existing log messages were modified to handle sub 1kB log
> > messages with more grace. Instead of reporting 0kB of traffic load
> > shifting from one member to another, we now suppress these messages at
> > the INFO level and display exact byte count at the debug level.
> >
> > Signed-off-by: Mike Pattrick <m...@redhat.com>
> > ---
> >   lib/lacp.c     | 15 +++++++++++
> >   ofproto/bond.c | 72 ++++++++++++++++++++++++++++++++++++++++----------
> >   2 files changed, 73 insertions(+), 14 deletions(-)
> >
> > diff --git a/lib/lacp.c b/lib/lacp.c
> > index 89d711225..fa01cbbfe 100644
> > --- a/lib/lacp.c
> > +++ b/lib/lacp.c
> > @@ -387,6 +387,12 @@ lacp_process_packet(struct lacp *lacp, const void 
> > *member_,
> >           goto out;
> >       }
> >
> > +    if (member->status == LACP_DEFAULTED) {
> > +        VLOG_INFO("%s: defaulted member %s reestablished connection with 
> > LACP partner.",
> > +                 lacp->name,
> > +                 member->name);
>
> Nit: I think ovs coding conventions suggest you need an extra space in the 
> above
> two lines.
>
>
> > +    }
> > +
> >       member->status = LACP_CURRENT;
> >       tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX;
> >       timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * tx_rate);
> > @@ -465,9 +471,15 @@ lacp_member_register(struct lacp *lacp, void *member_,
> >           if (!lacp->key_member) {
> >               lacp->key_member = member;
> >           }
> > +
> > +        VLOG_DBG("LACP member %s added.", s->name);
> >       }
> >
> >       if (!member->name || strcmp(s->name, member->name)) {
> > +        if (member->name) {
> > +            VLOG_DBG("LACP member %s renamed from %s.", s->name, 
> > member->name);
> > +        }
> > +
> >           free(member->name);
> >           member->name = xstrdup(s->name);
> >       }
> > @@ -531,6 +543,9 @@ lacp_member_carrier_changed(const struct lacp *lacp, 
> > const void *member_,
> >           member->count_carrier_changed++;
> >       }
> >
> > +    VLOG_DBG("LACP member %s changed state to %s.",
> > +             member->name, carrier_up ? "up" : "down");
> > +
> >   out:
> >       lacp_unlock();
> >   }
> > diff --git a/ofproto/bond.c b/ofproto/bond.c
> > index cdfdf0b9d..55e335bfa 100644
> > --- a/ofproto/bond.c
> > +++ b/ofproto/bond.c
> > @@ -855,6 +855,8 @@ bond_check_admissibility(struct bond *bond, const void 
> > *member_,
> >       ovs_rwlock_rdlock(&rwlock);
> >       member = bond_member_lookup(bond, member_);
> >       if (!member) {
> > +        VLOG_DBG_RL(&rl, "bond %s: lookup for member on bond failed",
> > +                    bond->name);
> >           goto out;
> >       }
> >
> > @@ -1127,8 +1129,13 @@ log_bals(struct bond *bond, const struct ovs_list 
> > *bals)
> >               if (ds.length) {
> >                   ds_put_char(&ds, ',');
> >               }
> > -            ds_put_format(&ds, " %s %"PRIu64"kB",
> > -                          member->name, member->tx_bytes / 1024);
> > +            if (member->tx_bytes > 1024) {
> > +                ds_put_format(&ds, " %s %"PRIu64"kB",
> > +                              member->name, member->tx_bytes / 1024);
> > +            } else {
> > +                ds_put_format(&ds, " %s %"PRIu64"B",
> > +                              member->name, member->tx_bytes);
> > +            }
> >
> >               if (!member->enabled) {
> >                   ds_put_cstr(&ds, " (disabled)");
> > @@ -1141,13 +1148,20 @@ log_bals(struct bond *bond, const struct ovs_list 
> > *bals)
> >                       if (&e->list_node != 
> > ovs_list_front(&member->entries)) {
> >                           ds_put_cstr(&ds, " + ");
> >                       }
> > -                    ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB",
> > -                                  e - bond->hash, e->tx_bytes / 1024);
> > +                    if (e->tx_bytes > 1024) {
> > +                        ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB",
> > +                                      e - bond->hash, e->tx_bytes / 1024);
> > +                    } else {
> > +                        ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"B",
> > +                                      e - bond->hash, e->tx_bytes);
> > +                    }
> >                   }
> >                   ds_put_cstr(&ds, ")");
> >               }
> >           }
> > -        VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds));
> > +        if (ds.length) {
> > +            VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds));
> > +        }
> >           ds_destroy(&ds);
> >       }
> >   }
> > @@ -1161,13 +1175,23 @@ bond_shift_load(struct bond_entry *hash, struct 
> > bond_member *to)
> >       struct bond *bond = from->bond;
> >       uint64_t delta = hash->tx_bytes;
> >
> > -    VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %"PRIdPTR") "
> > -              "from %s to %s (now carrying %"PRIu64"kB and "
> > -              "%"PRIu64"kB load, respectively)",
> > -              bond->name, delta / 1024, hash - bond->hash,
> > -              from->name, to->name,
> > -              (from->tx_bytes - delta) / 1024,
> > -              (to->tx_bytes + delta) / 1024);
> > +    if (delta > 1024) {
> > +        VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash 
> > %"PRIdPTR") "
> > +                  "from %s to %s (now carrying %"PRIu64"kB and "
> > +                  "%"PRIu64"kB load, respectively)",
> > +                  bond->name, delta / 1024, hash - bond->hash,
> > +                  from->name, to->name,
> > +                  (from->tx_bytes - delta) / 1024,
> > +                  (to->tx_bytes + delta) / 1024);
> > +    } else {
> > +        VLOG_DBG("bond %s: shift %"PRIu64"B of load (with hash %"PRIdPTR") 
> > "
> > +                 "from %s to %s (now carrying %"PRIu64"kB and "
> > +                 "%"PRIu64"kB load, respectively)",
> > +                 bond->name, delta, hash - bond->hash,
> > +                 from->name, to->name,
> > +                 (from->tx_bytes - delta) / 1024,
> > +                 (to->tx_bytes + delta) / 1024);
> > +    }
> >
>
> Both versions should have the same log level. Original was INFO but I'm
> wondering if it's too verbose for an INFO. IIUC, this could be printed 256 
> times
> at a single rebalancing round.
>
> Should we consider summarized version for INFO level?
>
> Maybe one that sums up a migration, something like: "shifted N Kbps from 
> member
> X (which now has Tx Kbps) to member Y (which now has Ty Kbps)". That should 
> only
> be printed (N_Members -1) times at most on each rebalancing round.
> Or one that sums up the entire rebalancing round and prints the new tx_bytes 
> of
> each member, only printed once per rebalancing round.

Yes, I think that aggregating the messages could really improve this
log message. In a production environment with stochastic network
traffic this could really really produce a lot of spurious messages.
I'll work on a new version.

>
>
> >       /* Shift load away from 'from' to 'to'. */
> >       from->tx_bytes -= delta;
> > @@ -1345,6 +1369,12 @@ bond_rebalance(struct bond *bond)
> >               insert_bal(&bals, member);
> >           }
> >       }
> > +    if (ovs_list_is_empty(&bals)) {
> > +        VLOG_INFO("Bond %s cannot be balanced because no members are 
> > enabled.",
> > +                  bond->name);
> > +        goto done;
> > +    }
> > +
>
> If both members get disabled (or before they get added to the bond), this 
> could
> be printed in INFO level every second. Not sure if that's desirable 
> considering
> we would already log:
>
>          VLOG_INFO_RL(&rl, "bond %s: all members disabled", bond->name);
>
> in "bond_choose_active_member()". What do you think?
>
>
> >       log_bals(bond, &bals);
> >
> >       /* Shift load from the most-loaded members to the least-loaded 
> > members. */
> > @@ -1565,6 +1595,9 @@ bond_print_details(struct ds *ds, const struct bond 
> > *bond)
> >               if (be_tx_k) {
> >                   ds_put_format(ds, "  hash %d: %"PRIu64" kB load\n",
> >                             hash, be_tx_k);
> > +            } else if (be->tx_bytes) {
> > +                ds_put_format(ds, "  hash %d: %"PRIu64" B load\n",
> > +                          hash, be->tx_bytes);
> >               }
> >
> >               /* XXX How can we list the MACs assigned to hashes of SLB 
> > bonds? */
> > @@ -1959,7 +1992,7 @@ choose_output_member(const struct bond *bond, const 
> > struct flow *flow,
> >            * unsuccussful. If lacp_fallback_ab is enabled use active-
> >            * backup mode else drop all traffic. */
> >           if (!bond->lacp_fallback_ab) {
> > -            return NULL;
> > +            goto error;
> >           }
> >           balance = BM_AB;
> >       }
> > @@ -1971,7 +2004,7 @@ choose_output_member(const struct bond *bond, const 
> > struct flow *flow,
> >       case BM_TCP:
> >           if (bond->lacp_status != LACP_NEGOTIATED) {
> >               /* Must have LACP negotiations for TCP balanced bonds. */
> > -            return NULL;
> > +            goto error;
> >           }
> >           if (wc) {
> >               flow_mask_hash_fields(flow, wc, 
> > NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP);
> > @@ -1990,6 +2023,17 @@ choose_output_member(const struct bond *bond, const 
> > struct flow *flow,
> >       default:
> >           OVS_NOT_REACHED();
> >       }
> > +error:
> > +    if (VLOG_IS_DBG_ENABLED()) {
> > +        struct ds ds = DS_EMPTY_INITIALIZER;
> > +        flow_format(&ds, flow, NULL);
> > +        VLOG_DBG("Cannot choose output member on bond %s for flow %s with 
> > LACP status %s",
> > +                 bond->name,
> > +                 ds_cstr(&ds),
> > +                 lacp_status_description(bond->lacp_status));
> > +        ds_destroy(&ds);
> > +    }
> > +    return NULL;
>
> Use rate limiting here?
>
> >   }
> >
> >   static struct bond_member *
>
> --
> Adrián Moreno
>

-M

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to