Hi Mark,

A couple more things caught my attention. Otherwise than that it LGTM.

On Fri, Nov 03, 2017 at 09:24 PM GMT, Mark Michelson wrote:
> This change adds three new options to the Northbound
> Logical_Router_Port's ipv6_ra_configs option:
>
> * send_periodic: If set to "true", then OVN will send periodic router
> advertisements out of this router port.
> * max_interval: The maximum amount of time to wait between sending
> periodic router advertisements.
> * min_interval: The minimum amount of time to wait between sending
> periodic router advertisements.
>
> When send_periodic is true, then IPv6 RA configs, as well as some layer
> 2 and layer 3 information about the router port, are copied to the
> southbound database. From there, ovn-controller can use this information
> to know when to send periodic RAs and what to send in them.
>
> Because periodic RAs originate from each ovn-controller, the new
> keep-local flag is set on the packet so that ports don't receive an
> overabundance of RAs.
>
> Signed-off-by: Mark Michelson <mmich...@redhat.com>
> ---
>  lib/packets.h            |   7 +
>  ovn/controller/pinctrl.c | 341 
> +++++++++++++++++++++++++++++++++++++++++++++++
>  ovn/northd/ovn-northd.c  |  65 +++++++++
>  ovn/ovn-nb.xml           |  19 +++
>  tests/ovn-northd.at      | 110 +++++++++++++++
>  tests/ovn.at             | 150 +++++++++++++++++++++
>  6 files changed, 692 insertions(+)
>
> diff --git a/lib/packets.h b/lib/packets.h
> index 057935cbf..b8249047f 100644
> --- a/lib/packets.h
> +++ b/lib/packets.h
> @@ -976,6 +976,7 @@ BUILD_ASSERT_DECL(ND_PREFIX_OPT_LEN == sizeof(struct 
> ovs_nd_prefix_opt));
>
>  /* Neighbor Discovery option: MTU. */
>  #define ND_MTU_OPT_LEN 8
> +#define ND_MTU_DEFAULT 0
>  struct ovs_nd_mtu_opt {
>      uint8_t  type;      /* ND_OPT_MTU */
>      uint8_t  len;       /* Always 1. */
> @@ -1015,6 +1016,12 @@ BUILD_ASSERT_DECL(RA_MSG_LEN == sizeof(struct 
> ovs_ra_msg));
>  #define ND_RA_MANAGED_ADDRESS 0x80
>  #define ND_RA_OTHER_CONFIG    0x40
>
> +/* Defaults based on MaxRtrInterval and MinRtrInterval from RFC 4861 section
> + * 6.2.1
> + */
> +#define ND_RA_MAX_INTERVAL_DEFAULT 600
> +#define ND_RA_MIN_INTERVAL_DEFAULT(max) ((max) >= 9 ? (max) / 3 : (max) * 3 
> / 4)
> +
>  /*
>   * Use the same struct for MLD and MLD2, naming members as the defined 
> fields in
>   * in the corresponding version of the protocol, though they are reserved in 
> the
> diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
> index 29b2dde0c..428d5048f 100644
> --- a/ovn/controller/pinctrl.c
> +++ b/ovn/controller/pinctrl.c
> @@ -48,6 +48,7 @@
>  #include "socket-util.h"
>  #include "timeval.h"
>  #include "vswitch-idl.h"
> +#include "lflow.h"
>
>  VLOG_DEFINE_THIS_MODULE(pinctrl);
>
> @@ -88,6 +89,11 @@ static void pinctrl_handle_put_nd_ra_opts(
>  static void pinctrl_handle_nd_ns(const struct flow *ip_flow,
>                                   const struct match *md,
>                                   struct ofpbuf *userdata);
> +static void init_ipv6_ras(void);
> +static void destroy_ipv6_ras(void);
> +static void ipv6_ra_wait(void);
> +static void send_ipv6_ras(const struct controller_ctx *ctx,
> +    struct hmap *local_datapaths);
>
>  COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
>
> @@ -98,6 +104,7 @@ pinctrl_init(void)
>      conn_seq_no = 0;
>      init_put_mac_bindings();
>      init_send_garps();
> +    init_ipv6_ras();
>  }
>
>  static ovs_be32
> @@ -1083,8 +1090,340 @@ pinctrl_run(struct controller_ctx *ctx,
>      run_put_mac_bindings(ctx);
>      send_garp_run(ctx, br_int, chassis, chassis_index, local_datapaths,
>                    active_tunnels);
> +    send_ipv6_ras(ctx, local_datapaths);
>  }
>
> +/* Table of ipv6_ra_state structures, keyed on logical port name */
> +static struct shash ipv6_ras;
> +
> +/* Next IPV6 RA in seconds. */
> +static long long int send_ipv6_ra_time;
> +
> +struct ipv6_network_prefix {
> +    struct in6_addr addr;
> +    unsigned int prefix_len;
> +};
> +
> +struct ipv6_ra_config {
> +    time_t min_interval;
> +    time_t max_interval;
> +    struct eth_addr eth_src;
> +    struct eth_addr eth_dst;
> +    struct in6_addr ipv6_src;
> +    struct in6_addr ipv6_dst;
> +    ovs_be32 mtu;
> +    uint8_t mo_flags; /* Managed/Other flags for RAs */
> +    uint8_t la_flags; /* On-link/autonomous flags for address prefixes */
> +    struct ipv6_network_prefix *prefixes;
> +    int n_prefixes;
> +};
> +
> +struct ipv6_ra_state {
> +    long long int next_announce;
> +    struct ipv6_ra_config *config;
> +    int64_t port_key;
> +    int64_t metadata;
> +    bool deleteme;
> +};
> +
> +static void
> +init_ipv6_ras(void)
> +{
> +    shash_init(&ipv6_ras);
> +    send_ipv6_ra_time = LLONG_MAX;
> +}
> +
> +static void
> +ipv6_ra_config_delete(struct ipv6_ra_config *config)
> +{
> +    if (config) {
> +        free(config->prefixes);
> +    }
> +    free(config);
> +}
> +
> +static void
> +ipv6_ra_delete(struct ipv6_ra_state *ra)
> +{
> +    if (ra) {
> +        ipv6_ra_config_delete(ra->config);
> +    }
> +    free(ra);
> +}
> +
> +static void
> +destroy_ipv6_ras(void)
> +{
> +    struct shash_node *iter, *next;
> +    SHASH_FOR_EACH_SAFE (iter, next, &ipv6_ras) {
> +        struct ipv6_ra_state *ra = iter->data;
> +        ipv6_ra_delete(ra);
> +        shash_delete(&ipv6_ras, iter);
> +    }
> +    shash_destroy(&ipv6_ras);
> +}
> +
> +static struct ipv6_ra_config *
> +ipv6_ra_update_config(const struct sbrec_port_binding *pb)
> +{
> +    struct ipv6_ra_config *config;
> +
> +    config = xzalloc(sizeof *config);
> +
> +    config->max_interval = smap_get_int(&pb->options, "ipv6_ra_max_interval",
> +            ND_RA_MAX_INTERVAL_DEFAULT);
> +    config->min_interval = smap_get_int(&pb->options, "ipv6_ra_min_interval",
> +            ND_RA_MIN_INTERVAL_DEFAULT(config->max_interval));
> +    config->mtu = htonl(smap_get_int(&pb->options, "ipv6_ra_mtu",
> +            ND_MTU_DEFAULT));
> +    config->la_flags = ND_PREFIX_ON_LINK;
> +
> +    const char *address_mode = smap_get(&pb->options, 
> "ipv6_ra_address_mode");
> +    if (!address_mode) {
> +        VLOG_WARN("No address mode specified");
> +        goto fail;
> +    }
> +    if (!strcmp(address_mode, "dhcpv6_stateless")) {
> +        config->mo_flags = IPV6_ND_RA_FLAG_OTHER_ADDR_CONFIG;
> +    } else if (!strcmp(address_mode, "dhcpv6_stateful")) {
> +        config->mo_flags = IPV6_ND_RA_FLAG_MANAGED_ADDR_CONFIG;
> +    } else if (!strcmp(address_mode, "slaac")) {
> +        config->la_flags |= ND_PREFIX_AUTONOMOUS_ADDRESS;
> +    } else {
> +        VLOG_WARN("Invalid address mode %s", address_mode);
> +        goto fail;
> +    }
> +
> +    const char *prefixes = smap_get(&pb->options, "ipv6_ra_prefixes");
> +    if (prefixes) {
> +        char *prefixes_copy = xstrdup(prefixes);
> +        char *prefix;
> +
> +        /* Prefixes are in the format:
> +         * addr/len, where
> +         * addr is the network prefix
> +         * and len is the prefix length
> +         */
> +        while ((prefix = strsep(&prefixes_copy, " "))) {
> +            unsigned int prefix_len;
> +            struct in6_addr prefix_addr;
> +            char *error;
> +
> +            error = ipv6_parse_cidr(prefix, &prefix_addr, &prefix_len);
> +            if (error) {
> +                VLOG_WARN("Error parsing IPv6 prefix: %s", error);
> +                continue;
> +            }
> +
> +            config->n_prefixes++;
> +            config->prefixes = xrealloc(config->prefixes,
> +                    config->n_prefixes * sizeof *config->prefixes);
> +
> +            struct ipv6_network_prefix *network;
> +            network = &config->prefixes[config->n_prefixes - 1];
> +            network->addr = prefix_addr;
> +            network->prefix_len = prefix_len;
> +        }
> +        free (prefixes_copy);
> +    }
> +
> +    /* All nodes multicast addresses */
> +    ovs_assert(eth_addr_from_string("33:33:00:00:00:01", &config->eth_dst));
> +    ovs_assert(ipv6_parse("ff02::1", &config->ipv6_dst));
> +
> +    const char *eth_addr = smap_get(&pb->options, "ipv6_ra_src_eth");
> +    if (!eth_addr || !eth_addr_from_string(eth_addr, &config->eth_src)) {
> +        VLOG_WARN("Invalid ethernet source %s", eth_addr);
> +        goto fail;
> +    }
> +    const char *ip_addr = smap_get(&pb->options, "ipv6_ra_src_addr");
> +    if (!ip_addr || !ipv6_parse(ip_addr, &config->ipv6_src)) {
> +        VLOG_WARN("Invalid IP source %s", ip_addr);
> +        goto fail;
> +    }
> +
> +    return config;
> +
> +fail:
> +    ipv6_ra_config_delete(config);
> +    return NULL;
> +}
> +
> +static long long int
> +ipv6_ra_calc_next_announce(time_t min_interval, time_t max_interval)
> +{
> +    long long int min_interval_ms = min_interval * 1000;
> +    long long int max_interval_ms = max_interval * 1000;
> +
> +    return time_msec() + min_interval_ms +
> +        random_range(max_interval_ms - min_interval_ms);
> +}
> +
> +static void
> +put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
> +         struct ofpbuf *ofpacts)
> +{
> +    struct ofpact_set_field *sf = ofpact_put_set_field(ofpacts,
> +                                                       mf_from_id(dst), NULL,
> +                                                       NULL);
> +    ovs_be64 n_value = htonll(value);
> +    bitwise_copy(&n_value, 8, 0, sf->value, sf->field->n_bytes, ofs, n_bits);
> +    bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs, n_bits);
> +}
> +
> +#define RA_DEFAULT_VALID_LIFETIME 2592000
> +#define RA_DEFAULT_PREFERRED_LIFETIME 604800

These defaults also come from RFC 4861:
- AdvValidLifetime (30 days),
- AdvPreferredLifetime (7 days)

Maybe we should annotate them too? Otherwise they look like arbitrary
values. Or just group all values that come from this RFC in one place?

> +static time_t
> +ipv6_ra_send(struct ipv6_ra_state *ra)
> +{
> +    if (time_msec() < ra->next_announce) {
> +        return ra->next_announce;
> +    }
> +
> +    uint64_t packet_stub[128 / 8];
> +    struct dp_packet packet;
> +    dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
> +    compose_nd_ra(&packet, ra->config->eth_src, ra->config->eth_dst,
> +            &ra->config->ipv6_src, &ra->config->ipv6_dst,
> +            255, ra->config->mo_flags, 0, 0, 0, ra->config->mtu);
> +
> +    for (int i = 0; i < ra->config->n_prefixes; ++i) {
> +        ovs_be128 addr;
> +        memcpy(&addr, &ra->config->prefixes[i].addr, sizeof addr);
> +        packet_put_ra_prefix_opt(&packet, ra->config->prefixes[i].prefix_len,
> +                ra->config->la_flags, htonl(RA_DEFAULT_VALID_LIFETIME),
> +                htonl(RA_DEFAULT_PREFERRED_LIFETIME), addr);
> +    }
> +
> +    uint64_t ofpacts_stub[4096 / 8];
> +    struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
> +    enum ofp_version version = rconn_get_version(swconn);
> +
> +    /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
> +    uint32_t dp_key = ra->metadata;
> +    uint32_t port_key = ra->port_key;
> +    put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
> +    put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
> +    put_load(1, MFF_LOG_FLAGS, MLF_KEEP_LOCAL_BIT, 1, &ofpacts);
> +    struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
> +    resubmit->in_port = OFPP_CONTROLLER;
> +    resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
> +
> +    struct ofputil_packet_out po = {
> +        .packet = dp_packet_data(&packet),
> +        .packet_len = dp_packet_size(&packet),
> +        .buffer_id = UINT32_MAX,
> +        .ofpacts = ofpacts.data,
> +        .ofpacts_len = ofpacts.size,
> +    };
> +
> +    match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
> +    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
> +    queue_msg(ofputil_encode_packet_out(&po, proto));
> +    dp_packet_uninit(&packet);
> +    ofpbuf_uninit(&ofpacts);
> +
> +    ra->next_announce = ipv6_ra_calc_next_announce(ra->config->min_interval,
> +            ra->config->max_interval);
> +
> +    return ra->next_announce;
> +}
> +
> +static void
> +ipv6_ra_wait(void)
> +{
> +    poll_timer_wait_until(send_ipv6_ra_time);
> +}
> +
> +static void
> +send_ipv6_ras(const struct controller_ctx *ctx OVS_UNUSED,
> +    struct hmap *local_datapaths)
> +{
> +    struct shash_node *iter, *iter_next;
> +
> +    send_ipv6_ra_time = LLONG_MAX;
> +
> +    SHASH_FOR_EACH (iter, &ipv6_ras) {
> +        struct ipv6_ra_state *ra = iter->data;
> +        ra->deleteme = true;
> +    }
> +
> +    const struct local_datapath *ld;
> +    HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
> +
> +        struct sbrec_port_binding *lpval;
> +        const struct sbrec_port_binding *pb;
> +        struct ovsdb_idl_index_cursor cursor;
> +
> +        lpval = sbrec_port_binding_index_init_row(ctx->ovnsb_idl,
> +                                                  &sbrec_table_port_binding);
> +        sbrec_port_binding_index_set_datapath(lpval, ld->datapath);
> +        ovsdb_idl_initialize_cursor(ctx->ovnsb_idl, 
> &sbrec_table_port_binding,
> +                                    "lport-by-datapath", &cursor);
> +        SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) {
> +            if (!smap_get_bool(&pb->options, "ipv6_ra_send_periodic", 
> false)) {
> +                continue;
> +            }
> +
> +            const char *peer_s;
> +            peer_s = smap_get(&pb->options, "peer");
> +            if (!peer_s) {
> +                continue;
> +            }
> +
> +            const struct sbrec_port_binding *peer;
> +            peer = lport_lookup_by_name(ctx->ovnsb_idl, peer_s);
> +            if (!peer) {
> +                continue;
> +            }
> +
> +            struct ipv6_ra_config *config;
> +            config = ipv6_ra_update_config(pb);
> +            if (!config) {
> +                continue;
> +            }
> +
> +            struct ipv6_ra_state *ra;
> +            ra = shash_find_data(&ipv6_ras, pb->logical_port);
> +            if (!ra) {
> +                ra = xzalloc(sizeof *ra);
> +                ra->config = config;
> +                ra->next_announce = ipv6_ra_calc_next_announce(
> +                        ra->config->min_interval,
> +                        ra->config->max_interval);
> +                shash_add(&ipv6_ras, pb->logical_port, ra);
> +            } else {
> +                ipv6_ra_config_delete(ra->config);
> +                ra->config = config;
> +            }
> +
> +            /* Peer is the logical switch port that the logical
> +             * router port is connected to. The RA is injected
> +             * into that logical switch port.
> +             */
> +            ra->port_key = peer->tunnel_key;
> +            ra->metadata = peer->datapath->tunnel_key;
> +            ra->deleteme = false;
> +
> +            long long int next_ra;
> +            next_ra = ipv6_ra_send(ra);
> +            if (send_ipv6_ra_time > next_ra) {
> +                send_ipv6_ra_time = next_ra;
> +            }
> +        }
> +    }
> +
> +    /* Remove those that are no longer in the SB database */
> +    SHASH_FOR_EACH_SAFE (iter, iter_next, &ipv6_ras) {
> +        struct ipv6_ra_state *ra = iter->data;
> +        if (ra->deleteme) {
> +            shash_delete(&ipv6_ras, iter);
> +            ipv6_ra_delete(ra);
> +        }
> +    }
> +}
> +
> +
>  void
>  pinctrl_wait(struct controller_ctx *ctx)
>  {
> @@ -1092,6 +1431,7 @@ pinctrl_wait(struct controller_ctx *ctx)
>      rconn_run_wait(swconn);
>      rconn_recv_wait(swconn);
>      send_garp_wait();
> +    ipv6_ra_wait();
>  }
>
>  void
> @@ -1100,6 +1440,7 @@ pinctrl_destroy(void)
>      rconn_destroy(swconn);
>      destroy_put_mac_bindings();
>      destroy_send_garps();
> +    destroy_ipv6_ras();
>  }
>  
>  /* Implementation of the "put_arp" and "put_nd" OVN actions.  These
> diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
> index b4c971320..6dbd5ef15 100644
> --- a/ovn/northd/ovn-northd.c
> +++ b/ovn/northd/ovn-northd.c
> @@ -4445,6 +4445,66 @@ add_router_lb_flow(struct hmap *lflows, struct 
> ovn_datapath *od,
>      ds_destroy(&undnat_match);
>  }
>
> +#define ND_RA_MAX_INTERVAL_MAX 1800
> +#define ND_RA_MAX_INTERVAL_MIN 4
> +
> +#define ND_RA_MIN_INTERVAL_MAX(max) ((max) * 3 / 4)
> +#define ND_RA_MIN_INTERVAL_MIN 3
> +
> +static void
> +copy_ra_to_sb(struct ovn_port *op, const char *address_mode)
> +{
> +    struct smap options;
> +    smap_clone(&options, &op->sb->options);
> +
> +    smap_add(&options, "ipv6_ra_send_periodic", "true");
> +    smap_add(&options, "ipv6_ra_address_mode", address_mode);
> +
> +    int max_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
> +            "max_interval", ND_RA_MAX_INTERVAL_DEFAULT);
> +    if (max_interval > ND_RA_MAX_INTERVAL_MAX) {
> +        max_interval = ND_RA_MAX_INTERVAL_MAX;
> +    }
> +    if (max_interval < ND_RA_MAX_INTERVAL_MIN) {
> +        max_interval = ND_RA_MAX_INTERVAL_MIN;
> +    }
> +    smap_add_format(&options, "ipv6_ra_max_interval", "%d", max_interval);
> +
> +    int min_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
> +            "min_interval", ND_RA_MIN_INTERVAL_DEFAULT(max_interval));
> +    if (min_interval > ND_RA_MIN_INTERVAL_MAX(max_interval)) {
> +        min_interval = ND_RA_MIN_INTERVAL_MAX(max_interval);
> +    }
> +    if (min_interval < ND_RA_MIN_INTERVAL_MIN) {
> +        min_interval = ND_RA_MIN_INTERVAL_MIN;
> +    }
> +    smap_add_format(&options, "ipv6_ra_min_interval", "%d", min_interval);
> +
> +    int mtu = smap_get_int(&op->nbrp->ipv6_ra_configs, "mtu", 
> ND_MTU_DEFAULT);
> +    if (mtu) {
> +        smap_add_format(&options, "ipv6_ra_mtu", "%d", mtu);
> +    }
> +
> +    struct ds s = DS_EMPTY_INITIALIZER;
> +    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; ++i) {
> +        struct ipv6_netaddr *addrs = &op->lrp_networks.ipv6_addrs[i];
> +        if (in6_is_lla(&addrs->network)) {
> +            smap_add(&options, "ipv6_ra_src_addr", addrs->addr_s);
> +            continue;
> +        }
> +        ds_put_format(&s, "%s/%u ", addrs->network_s, addrs->plen);
> +    }
> +    /* Remove trailing space */
> +    ds_chomp(&s, ' ');
> +    smap_add(&options, "ipv6_ra_prefixes", ds_cstr(&s));
> +    ds_destroy(&s);
> +
> +    smap_add(&options, "ipv6_ra_src_eth", op->lrp_networks.ea_s);
> +
> +    sbrec_port_binding_set_options(op->sb, &options);
> +    smap_destroy(&options);
> +}
> +
>  static void
>  build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
>                      struct hmap *lflows)
> @@ -5428,6 +5488,11 @@ build_lrouter_flows(struct hmap *datapaths, struct 
> hmap *ports,
>              continue;
>          }
>
> +        if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
> +                          false)) {
> +            copy_ra_to_sb(op, address_mode);
> +        }
> +
>          ds_clear(&match);
>          ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
>                                op->json_key);

Shouldn't we check if MTU is at least 1280, if present, before copying
it to SB DB? Like it is done a few lines later in build_lrouter_flows()
after this hunk.

[...]

Thanks,
Jakub
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to