On 5/13/20 3:19 AM, Numan Siddique wrote:


On Fri, May 8, 2020 at 2:55 AM Mark Michelson <[email protected] <mailto:[email protected]>> wrote:

    Group mod messages have the possibility of growing very large if OVN
    installs a load balancer with a great many backends. The current
    approach is to send a single ADD message with the entire group contents.
    If the size of this message exceeds UINT16_MAX, then OpenFlow cannot
    properly express the length of the message since the OpenFlow header's
    length is limited to 16 bits.

    This patch solves the problem by breaking the message into pieces. The
    first piece is an ADD, and subsequent messages are INSERT_BUCKET
    messages. This way, we end up being able to express the entire size of
    the group through multiple OpenFlow messages.

    Signed-off-by: Mark Michelson <[email protected]
    <mailto:[email protected]>>


Hi Mark,

Thanks for addressing this.

The patch LGTM. But there seems to be some memory leak when  I ran with valgrind.

Can you please take a look.

    ---
      controller/ofctrl.c | 70 ++++++++++++++++++++++++++++++++++++++++++---
      tests/ovn.at <http://ovn.at>        | 29 +++++++++++++++++++
      2 files changed, 95 insertions(+), 4 deletions(-)

    diff --git a/controller/ofctrl.c b/controller/ofctrl.c
    index 4b51cd86e..2b98dd6b1 100644
    --- a/controller/ofctrl.c
    +++ b/controller/ofctrl.c
    @@ -930,10 +930,72 @@ encode_group_mod(const struct
    ofputil_group_mod *gm)
      }

      static void
    -add_group_mod(const struct ofputil_group_mod *gm, struct ovs_list
    *msgs)
    +add_group_mod(struct ofputil_group_mod *gm, struct ovs_list *msgs)
      {
          struct ofpbuf *msg = encode_group_mod(gm);
    -    ovs_list_push_back(msgs, &msg->list_node);
    +    if (msg->size <= UINT16_MAX) {
    +        ovs_list_push_back(msgs, &msg->list_node);
    +        return;
    +    }
    +    /* This group mod request is too large to fit in a single OF
    message
    +     * since the header can only specify a 16-bit size. We need to
    break
    +     * this into multiple group_mods requests.
    +     */
    +
    +    /* Pull the first bucket. All buckets are approximately the
    same length
    +     * since they contain near-identical actions. Using its length
    can give
    +     * us a good approximation of how many buckets we can fit in a
    single
    +     * OF message.
    +     */
    +    ofpraw_pull_assert(msg);
    +    struct ofp15_group_mod *ogm = ofpbuf_pull(msg, sizeof(*ogm));
    +    struct ofp15_bucket *of_bucket = ofpbuf_pull(msg,
    sizeof(*of_bucket));
    +    uint16_t bucket_size = ntohs(of_bucket->len);
    +
    +    ofpbuf_uninit(msg);
    +
    +    /* Dividing by 2 here ensures that just in case there are
    variations in
    +     * the size of the buckets, we absolutely will not put too many
    in our
    +     * new group_mod message.
    +     */
    +    size_t max_buckets = ((UINT16_MAX - sizeof *ogm) / bucket_size)
    / 2;
    +
    +    ovs_assert(max_buckets < ovs_list_size(&gm->buckets));
    +
    +    uint16_t command = OFPGC15_INSERT_BUCKET;
    +    if (gm->command == OFPGC15_DELETE ||
    +        gm->command == OFPGC15_REMOVE_BUCKET) {
    +        command = OFPGC15_REMOVE_BUCKET;
    +    }
    +    struct ofputil_group_mod split = {
    +        .command = command,
    +        .type = gm->type,
    +        .group_id = gm->group_id,
    +        .command_bucket_id = OFPG15_BUCKET_LAST,
    +    };
    +    ovs_list_init(&split.buckets);
    +
    +    size_t i = 0;
    +    struct ofputil_bucket *bucket;
    +    LIST_FOR_EACH (bucket, list_node, &gm->buckets) {
    +        if (i++ < max_buckets) {
    +            continue;
    +        }
    +        break;
    +    }
    +
    +    ovs_list_splice(&split.buckets, &bucket->list_node, &gm->buckets);
    +
    +    struct ofpbuf *orig = encode_group_mod(gm);
    +    ovs_list_push_back(msgs, &orig->list_node);
    +
    +    /* We call this recursively just in case our new
    +     * INSERT_BUCKET/REMOVE_BUCKET group_mod is still too
    +     * long for an OF message. This will allow for it to
    +     * be broken into pieces, too.
    +     */
    +    add_group_mod(&split, msgs);
    +    ofputil_uninit_group_mod(&split);


Do we need to also do  ofputil_uninit_group_mod(gm) ?

No, this is not necessary. The ofputil_group_mod structures are always freed in the same function where they are allocated. In other words, gm is always freed by the function that calls add_group_mod().

valgrind is complaining about the ofpbuf *msg allocation at the beginning of add_group_mod(). The problem is that I'm calling ofpbuf_uninit() on it when I should be calling ofpbuf_delete(). Making that change gets rid of the memory leak.



Thanks
Numan

      }


    @@ -1124,7 +1186,7 @@ ofctrl_put(struct ovn_desired_flow_table
    *flow_table,
              char *group_string = xasprintf("group_id=%"PRIu32",%s",
                                             desired->table_id,
                                             desired->name);
    -        char *error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD,
    group_string,
    +        char *error = parse_ofp_group_mod_str(&gm, OFPGC15_ADD,
    group_string,
                                                    NULL, NULL,
    &usable_protocols);
              if (!error) {
                  add_group_mod(&gm, &msgs);
    @@ -1243,7 +1305,7 @@ ofctrl_put(struct ovn_desired_flow_table
    *flow_table,
              enum ofputil_protocol usable_protocols;
              char *group_string = xasprintf("group_id=%"PRIu32"",
                                             installed->table_id);
    -        char *error = parse_ofp_group_mod_str(&gm, OFPGC11_DELETE,
    +        char *error = parse_ofp_group_mod_str(&gm, OFPGC15_DELETE,
                                                    group_string, NULL,
    NULL,
                                                    &usable_protocols);
              if (!error) {
    diff --git a/tests/ovn.at <http://ovn.at> b/tests/ovn.at <http://ovn.at>
    index 52d994972..f39fda2e4 100644
    --- a/tests/ovn.at <http://ovn.at>
    +++ b/tests/ovn.at <http://ovn.at>
    @@ -19179,3 +19179,32 @@ OVN_CHECK_PACKETS([hv1/vif1-tx.pcap],
    [expected])

      OVN_CLEANUP([hv1])
      AT_CLEANUP
    +
    +AT_SETUP([ovn -- Big Load Balancer])
    +ovn_start
    +
    +ovn-nbctl ls-add ls1
    +ovn-nbctl lsp-add ls1 lsp1
    +
    +net_add n1
    +sim_add hv1
    +
    +as hv1
    +ovs-vsctl add-br br-phys
    +ovn_attach n1 br-phys 192.168.0.1
    +ovs-vsctl add-port br-int p1 -- set Interface p1
    external-ids:iface-id=lsp1
    +
    +IPS=192.169.0.1:80 <http://192.169.0.1:80>
    +for i in `seq 1 9` ; do
    +    for j in `seq 1 254` ; do
    +        IPS=${IPS},192.169.$i.$j:80
    +    done
    +done
    +
    +ovn-nbctl lb-add lb0 172.172.0.1:8080 <http://172.172.0.1:8080>
    "${IPS}"
    +ovn-nbctl --wait=hv ls-lb-add ls1 lb0
    +
    +AT_CHECK([test 2287 = `ovs-ofctl dump-group-stats br-int | grep -o
    bucket | wc -l`])
    +
    +OVN_CLEANUP([hv1])
    +AT_CLEANUP
-- 2.25.4

    _______________________________________________
    dev mailing list
    [email protected] <mailto:[email protected]>
    https://mail.openvswitch.org/mailman/listinfo/ovs-dev


_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to