On Tue, Aug 30, 2022 at 8:52 AM Ilya Maximets <[email protected]> wrote:
>
> On 6/22/22 16:28, Mike Pattrick wrote:
> > Add additional logging for debug and info level with a focus on code
> > related to bond members coming up, go down, and changing.
> >
> > Several existing log messages were modified to handle sub 1kB log
> > messages with more grace. Instead of reporting 0kB of traffic load
> > shifting from one member to another, we now suppress these messages at
> > the INFO level and display exact byte count at the debug level.
> >
> > Signed-off-by: Mike Pattrick <[email protected]>
> >
> > --
> >
> > Since v4:
> >  - Removed prefixes
> >  - Removed unrequired conditional
> >
> > Since v3:
> >  - Removed log if bond members were significantly deviant
> >  - Normalized log prefixes, including LACP
> >  - Added details to the member enable/disable log
> >
> > Since v2:
> >  - Normalized log capitalization and periods
> >  - Summarized info level hash transfers
> >  - Added log if bond members were significantly deviant
> >
> > Signed-off-by: Mike Pattrick <[email protected]>
> > ---
>
> Thanks for the patch, Mike!
>
> See some comments inline, besides the ones that David already made.
>
> Best regards, Ilya Maximets.

Thank you for the reviews, I'll send in a new version.

Cheers,
M

>
> >  lib/lacp.c     |  26 +++++++++---
> >  ofproto/bond.c | 110 +++++++++++++++++++++++++++++++++----------------
> >  2 files changed, 96 insertions(+), 40 deletions(-)
> >
> > diff --git a/lib/lacp.c b/lib/lacp.c
> > index 3252f17eb..bca347129 100644
> > --- a/lib/lacp.c
> > +++ b/lib/lacp.c
> > @@ -367,12 +367,12 @@ lacp_process_packet(struct lacp *lacp, const void 
> > *member_,
> >              break;
> >          case SUBTYPE_MARKER:
> >              member->count_rx_pdus_marker++;
> > -            VLOG_DBG("%s: received a LACP marker PDU.", lacp->name);
> > +            VLOG_DBG("%s: received a LACP marker PDU", lacp->name);
> >              goto out;
> >          case SUBTYPE_UNUSED:
> >          default:
> >              member->count_rx_pdus_bad++;
> > -            VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU.",
> > +            VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU",
> >                           lacp->name);
> >              goto out;
> >      }
> > @@ -382,11 +382,16 @@ lacp_process_packet(struct lacp *lacp, const void 
> > *member_,
> >       * trigger re-checking of L1 state. */
> >      if (!member->carrier_up) {
> >          VLOG_INFO_RL(&rl, "%s: carrier state is DOWN,"
> > -                     " dropping received LACP PDU.", member->name);
> > +                     " dropping received LACP PDU", member->name);
> >          seq_change(connectivity_seq_get());
> >          goto out;
> >      }
> >
> > +    if (member->status == LACP_DEFAULTED) {
> > +        VLOG_INFO("%s: defaulted member %s reestablished connection with"
> > +                  " LACP partner", lacp->name, member->name);
> > +    }
> > +
> >      member->status = LACP_CURRENT;
> >      tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX;
> >      timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * tx_rate);
> > @@ -465,9 +470,16 @@ lacp_member_register(struct lacp *lacp, void *member_,
> >          if (!lacp->key_member) {
> >              lacp->key_member = member;
> >          }
> > +
> > +        VLOG_DBG("%s: member %s added", lacp->name, s->name);
> >      }
> >
> >      if (!member->name || strcmp(s->name, member->name)) {
> > +        if (member->name) {
> > +            VLOG_DBG("%s: member %s renamed from %s", lacp->name,
> > +                     s->name, member->name);
> > +        }
> > +
> >          free(member->name);
> >          member->name = xstrdup(s->name);
> >      }
> > @@ -531,6 +543,9 @@ lacp_member_carrier_changed(const struct lacp *lacp, 
> > const void *member_,
> >          member->count_carrier_changed++;
> >      }
> >
> > +    VLOG_DBG("%s: member %s changed state to %s", lacp->name,
> > +             member->name, carrier_up ? "up" : "down");
> > +
> >  out:
> >      lacp_unlock();
> >  }
> > @@ -710,8 +725,9 @@ lacp_update_attached(struct lacp *lacp) 
> > OVS_REQUIRES(mutex)
> >          /* XXX: In the future allow users to configure the expected system 
> > ID.
> >           * For now just special case loopback. */
> >          if (eth_addr_equals(member->partner.sys_id, member->lacp->sys_id)) 
> > {
> > -            VLOG_WARN_RL(&rl, "member %s: Loopback detected. Interface is "
> > -                         "connected to its own bond", member->name);
> > +            VLOG_WARN_RL(&rl, "%s: member %s: Loopback detected. "
> > +                         "Interface is connected to its own bond",
> > +                         lacp->name, member->name);
> >              continue;
> >          }
> >
> > diff --git a/ofproto/bond.c b/ofproto/bond.c
> > index 845f69e21..13702950d 100644
> > --- a/ofproto/bond.c
> > +++ b/ofproto/bond.c
> > @@ -386,7 +386,8 @@ update_recirc_rules__(struct bond *bond)
> >                  char *err_s = match_to_string(&pr_op->match, NULL,
> >                                                RECIRC_RULE_PRIORITY);
> >
> > -                VLOG_ERR("failed to add post recirculation flow %s", 
> > err_s);
> > +                VLOG_ERR("%s: failed to add post recirculation flow %s",
> > +                         bond->name, err_s);
> >                  free(err_s);
> >              }
> >              break;
> > @@ -399,7 +400,8 @@ update_recirc_rules__(struct bond *bond)
> >                  char *err_s = match_to_string(&pr_op->match, NULL,
> >                                                RECIRC_RULE_PRIORITY);
> >
> > -                VLOG_ERR("failed to remove post recirculation flow %s", 
> > err_s);
> > +                VLOG_ERR("%s: failed to remove post recirculation flow "
> > +                         "%s", bond->name, err_s);
>
> There seems to be enough space to keep the '%s' on a previous line.
>
> >                  free(err_s);
> >              }
> >
> > @@ -496,7 +498,7 @@ bond_reconfigure(struct bond *bond, const struct 
> > bond_settings *s)
> >          if (s->use_lb_output_action &&
> >              !ovs_lb_output_action_supported(bond->ofproto)) {
> >              VLOG_WARN("%s: Datapath does not support 'lb_output' action, "
> > -                      "disabled.", bond->name);
> > +                      "disabled", bond->name);
> >          } else {
> >              bond->use_lb_output_action = s->use_lb_output_action;
> >              if (!bond->use_lb_output_action) {
> > @@ -855,6 +857,8 @@ bond_check_admissibility(struct bond *bond, const void 
> > *member_,
> >      ovs_rwlock_rdlock(&rwlock);
> >      member = bond_member_lookup(bond, member_);
> >      if (!member) {
> > +        VLOG_DBG_RL(&rl, "%s: lookup for member on bond failed",
> > +                    bond->name);
> >          goto out;
> >      }
> >
> > @@ -874,7 +878,7 @@ bond_check_admissibility(struct bond *bond, const void 
> > *member_,
> >           * main thread to run LACP state machine and enable the member. */
> >          verdict = (member->enabled || member->may_enable) ? BV_ACCEPT : 
> > BV_DROP;
> >          if (!member->enabled && member->may_enable) {
> > -            VLOG_DBG_RL(&rl, "bond %s: member %s: "
> > +            VLOG_DBG_RL(&rl, "%s: member %s: "
> >                          "main thread has not yet enabled member",
> >                          bond->name, bond->active_member->name);
> >          }
> > @@ -913,7 +917,7 @@ bond_check_admissibility(struct bond *bond, const void 
> > *member_,
> >          /* Drop all packets which arrive on backup members.  This is 
> > similar to
> >           * how Linux bonding handles active-backup bonds. */
> >          if (bond->active_member != member) {
> > -            VLOG_DBG_RL(&rl, "bond %s: member %s: active-backup bond 
> > received "
> > +            VLOG_DBG_RL(&rl, "%s: member %s: active-backup bond received "
> >                          "packet on backup member destined for " 
> > ETH_ADDR_FMT,
> >                          bond->name, member->name, ETH_ADDR_ARGS(eth_dst));
> >              goto out;
> > @@ -935,7 +939,7 @@ bond_check_admissibility(struct bond *bond, const void 
> > *member_,
> >      OVS_NOT_REACHED();
> >  out:
> >      if (member && (verdict != BV_ACCEPT)) {
> > -        VLOG_DBG_RL(&rl, "bond %s: member %s: "
> > +        VLOG_DBG_RL(&rl, "%s: member %s: "
> >                      "admissibility verdict is to drop pkt%s, "
> >                      "active member: %s, may_enable: %s, enabled: %s, "
> >                      "LACP status: %s",
> > @@ -1127,8 +1131,13 @@ log_bals(struct bond *bond, const struct ovs_list 
> > *bals)
> >              if (ds.length) {
> >                  ds_put_char(&ds, ',');
> >              }
> > -            ds_put_format(&ds, " %s %"PRIu64"kB",
> > -                          member->name, member->tx_bytes / 1024);
> > +            if (member->tx_bytes > 1024) {
> > +                ds_put_format(&ds, " %s %"PRIu64" kB",
> > +                              member->name, member->tx_bytes / 1024);
> > +            } else {
> > +                ds_put_format(&ds, " %s %"PRIu64" B",
>
> Many interface names end with numbers, and things like "dpdk0 0 B"
> are barely readable, IMHO.  Could you, please, remove the extra space
> between the number and 'kB'/'B' here and other places?
>
> > +                              member->name, member->tx_bytes);
> > +            }
> >
> >              if (!member->enabled) {
> >                  ds_put_cstr(&ds, " (disabled)");
> > @@ -1141,13 +1150,20 @@ log_bals(struct bond *bond, const struct ovs_list 
> > *bals)
> >                      if (&e->list_node != ovs_list_front(&member->entries)) 
> > {
> >                          ds_put_cstr(&ds, " + ");
> >                      }
> > -                    ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB",
> > -                                  e - bond->hash, e->tx_bytes / 1024);
> > +                    if (e->tx_bytes > 1024) {
> > +                        ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" kB",
> > +                                      e - bond->hash, e->tx_bytes / 1024);
> > +                    } else {
> > +                        ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" B",
> > +                                      e - bond->hash, e->tx_bytes);
> > +                    }
> >                  }
> >                  ds_put_cstr(&ds, ")");
> >              }
> >          }
> > -        VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds));
> > +        if (ds.length) {
> > +            VLOG_DBG("%s: %s", bond->name, ds_cstr(&ds));
> > +        }
> >          ds_destroy(&ds);
> >      }
> >  }
> > @@ -1161,14 +1177,6 @@ bond_shift_load(struct bond_entry *hash, struct 
> > bond_member *to)
> >      struct bond *bond = from->bond;
> >      uint64_t delta = hash->tx_bytes;
> >
> > -    VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %"PRIdPTR") "
> > -              "from %s to %s (now carrying %"PRIu64"kB and "
> > -              "%"PRIu64"kB load, respectively)",
> > -              bond->name, delta / 1024, hash - bond->hash,
> > -              from->name, to->name,
> > -              (from->tx_bytes - delta) / 1024,
> > -              (to->tx_bytes + delta) / 1024);
> > -
> >      /* Shift load away from 'from' to 'to'. */
> >      from->tx_bytes -= delta;
> >      to->tx_bytes += delta;
> > @@ -1345,6 +1353,7 @@ bond_rebalance(struct bond *bond)
> >              insert_bal(&bals, member);
> >          }
> >      }
> > +
> >      log_bals(bond, &bals);
> >
> >      /* Shift load from the most-loaded members to the least-loaded 
> > members. */
> > @@ -1356,6 +1365,7 @@ bond_rebalance(struct bond *bond)
> >          uint64_t overload;
> >
> >          overload = from->tx_bytes - to->tx_bytes;
> > +
> >          if (overload < to->tx_bytes >> 5 || overload < 100000) {
> >              /* The extra load on 'from' (and all less-loaded members), 
> > compared
> >               * to that of 'to' (the least-loaded member), is less than 
> > ~3%, or
> > @@ -1388,6 +1398,12 @@ bond_rebalance(struct bond *bond)
> >              reinsert_bal(&bals, from);
> >              reinsert_bal(&bals, to);
> >          }
> > +        VLOG_INFO("%s: shifted %"PRIuSIZE" hashes totaling %"PRIu64
> > +                  " kB from %s (%"PRIu64" kB) to %s (%"PRIu64" kB)",
> > +                  bond->name, cnt,
> > +                  (overload + to->tx_bytes - from->tx_bytes) >> 1,
>
> It would be easier to understand if we will just divide by 2 here.
>
> > +                  from->name, from->tx_bytes, to->name, to->tx_bytes);
>
> These are in bytes, but reported as kB.
>
> > +
> >          rebalanced = true;
> >      }
> >
> > @@ -1565,6 +1581,9 @@ bond_print_details(struct ds *ds, const struct bond 
> > *bond)
> >              if (be_tx_k) {
> >                  ds_put_format(ds, "  hash %d: %"PRIu64" kB load\n",
> >                            hash, be_tx_k);
> > +            } else if (be->tx_bytes) {
> > +                ds_put_format(ds, "  hash %d: %"PRIu64" B load\n",
> > +                          hash, be->tx_bytes);
> >              }
> >
> >              /* XXX How can we list the MACs assigned to hashes of SLB 
> > bonds? */
> > @@ -1691,7 +1710,7 @@ bond_unixctl_set_active_member(struct unixctl_conn 
> > *conn,
> >      if (bond->active_member != member) {
> >          bond->bond_revalidate = true;
> >          bond->active_member = member;
> > -        VLOG_INFO("bond %s: active member is now %s",
> > +        VLOG_INFO("%s: active member is now %s",
> >                    bond->name, member->name);
> >          bond->send_learning_packets = true;
> >          unixctl_command_reply(conn, "done");
> > @@ -1869,7 +1888,7 @@ bond_enable_member(struct bond_member *member, bool 
> > enable)
> >          }
> >          ovs_mutex_unlock(&member->bond->mutex);
> >
> > -        VLOG_INFO("member %s: %s", member->name,
> > +        VLOG_INFO("%s: member %s %s", bond->name, member->name,
> >                    member->enabled ? "enabled" : "disabled");
> >      }
> >  }
> > @@ -1879,23 +1898,31 @@ bond_link_status_update(struct bond_member *member)
> >  {
> >      struct bond *bond = member->bond;
> >      bool up;
> > +    bool carrier = netdev_get_carrier(member->netdev);
>
> Reverse xmass tree, please.   It might make sense to initialize it
> on a separate line.
>
> >
> > -    up = netdev_get_carrier(member->netdev) && member->may_enable;
> > +    up = carrier && member->may_enable;
> >      if ((up == member->enabled) != (member->delay_expires == LLONG_MAX)) {
> > +        char * msg;
>
> const char *msg;
>
> An empty line after the declaration would be nice.
>
> >          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> > -        VLOG_INFO_RL(&rl, "member %s: link state %s",
> > -                     member->name, up ? "up" : "down");
> > +        if (!carrier) {
> > +            msg = "carrier state";
> > +        } else {
> > +            msg = "link state";
> > +        }
> > +        VLOG_INFO_RL(&rl, "%s: member %s %s %s", bond->name,
> > +                     member->name, msg, up ? "up" : "down");
> >          if (up == member->enabled) {
> >              member->delay_expires = LLONG_MAX;
> > -            VLOG_INFO_RL(&rl, "member %s: will not be %s",
> > -                         member->name, up ? "disabled" : "enabled");
> > +            VLOG_INFO_RL(&rl, "%s: member %s will not be %s",
> > +                         bond->name, member->name,
> > +                         up ? "disabled" : "enabled");
> >          } else {
> >              int delay = up ? bond->updelay : bond->downdelay;
> >              member->delay_expires = time_msec() + delay;
> >              if (delay) {
> > -                VLOG_INFO_RL(&rl, "member %s: will be %s if it stays %s "
> > -                             "for %d ms",
> > -                             member->name,
> > +                VLOG_INFO_RL(&rl, "%s: member %s will be %s"
> > +                             " if it stays %s for %d ms",
> > +                             bond->name, member->name,
> >                               up ? "enabled" : "disabled",
> >                               up ? "up" : "down",
> >                               delay);
> > @@ -1952,14 +1979,15 @@ choose_output_member(const struct bond *bond, const 
> > struct flow *flow,
> >  {
> >      struct bond_entry *e;
> >      int balance;
> > +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>
> Reverse xmass tree.
>
> >
> >      balance = bond->balance;
> >      if (bond->lacp_status == LACP_CONFIGURED) {
> >          /* LACP has been configured on this bond but negotiations were
> > -         * unsuccussful. If lacp_fallback_ab is enabled use active-
> > +         * unsuccessful. If lacp_fallback_ab is enabled use active-
> >           * backup mode else drop all traffic. */
> >          if (!bond->lacp_fallback_ab) {
> > -            return NULL;
> > +            goto error;
> >          }
> >          balance = BM_AB;
> >      }
> > @@ -1971,7 +1999,7 @@ choose_output_member(const struct bond *bond, const 
> > struct flow *flow,
> >      case BM_TCP:
> >          if (bond->lacp_status != LACP_NEGOTIATED) {
> >              /* Must have LACP negotiations for TCP balanced bonds. */
> > -            return NULL;
> > +            goto error;
> >          }
> >          if (wc) {
> >              flow_mask_hash_fields(flow, wc, 
> > NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP);
> > @@ -1990,6 +2018,18 @@ choose_output_member(const struct bond *bond, const 
> > struct flow *flow,
> >      default:
> >          OVS_NOT_REACHED();
> >      }
> > +error:
> > +    if (VLOG_IS_DBG_ENABLED()) {
>
> if (!VLOG_DROP_DBG(&rl)) {
>
> > +        struct ds ds = DS_EMPTY_INITIALIZER;
>
> Empty line after the declaration.
>
> > +        flow_format(&ds, flow, NULL);
> > +        VLOG_DBG_RL(&rl, "%s: cannot choose output member "
>
> And remove the RL here.
>
> > +                    "for flow %s with LACP status %s",
> > +                    bond->name,
> > +                    ds_cstr(&ds),
> > +                    lacp_status_description(bond->lacp_status));
> > +        ds_destroy(&ds);
> > +    }
> > +    return NULL;
> >  }
> >
> >  static struct bond_member *
> > @@ -2039,10 +2079,10 @@ bond_choose_active_member(struct bond *bond)
> >      bond->active_member = bond_choose_member(bond);
> >      if (bond->active_member) {
> >          if (bond->active_member->enabled) {
> > -            VLOG_INFO_RL(&rl, "bond %s: active member is now %s",
> > +            VLOG_INFO_RL(&rl, "%s: active member is now %s",
> >                           bond->name, bond->active_member->name);
> >          } else {
> > -            VLOG_INFO_RL(&rl, "bond %s: active member is now %s, skipping "
> > +            VLOG_INFO_RL(&rl, "%s: active member is now %s, skipping "
> >                           "remaining %lld ms updelay (since no member was "
> >                           "enabled)", bond->name, bond->active_member->name,
> >                           bond->active_member->delay_expires - time_msec());
> > @@ -2056,7 +2096,7 @@ bond_choose_active_member(struct bond *bond)
> >          }
> >      } else if (old_active_member) {
> >          bond_active_member_changed(bond);
> > -        VLOG_INFO_RL(&rl, "bond %s: all members disabled", bond->name);
> > +        VLOG_INFO_RL(&rl, "%s: all members disabled", bond->name);
> >      }
> >  }
> >
>

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to