Re: [ovs-dev] [PATCH ovs V2 09/21] dpif-netlink: Dump netdevs flows on flow dump
On 05/01/2017 23:27, Joe Stringer wrote: On 25 December 2016 at 03:39, Paul Blakey wrote: While dumping flows, dump flows that were offloaded to netdev and parse them back to dpif flow. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan --- lib/dpif-netlink.c | 179 + 1 file changed, 179 insertions(+) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 36f2888..3d8940e 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -38,6 +38,7 @@ #include "flow.h" #include "fat-rwlock.h" #include "netdev.h" +#include "netdev-provider.h" #include "netdev-linux.h" #include "netdev-vport.h" #include "netlink-conntrack.h" @@ -55,6 +56,7 @@ #include "unaligned.h" #include "util.h" #include "openvswitch/vlog.h" +#include "openvswitch/match.h" VLOG_DEFINE_THIS_MODULE(dpif_netlink); #ifdef _WIN32 @@ -68,6 +70,8 @@ enum { MAX_PORTS = USHRT_MAX }; * missing if we have old headers. */ #define ETH_FLAG_LRO (1 << 15)/* LRO is enabled */ +#define FLOW_DUMP_MAX_BATCH 50 + struct dpif_netlink_dp { /* Generic Netlink header. */ uint8_t cmd; @@ -1355,6 +1359,10 @@ struct dpif_netlink_flow_dump { struct dpif_flow_dump up; struct nl_dump nl_dump; atomic_int status; +struct netdev_flow_dump **netdev_dumps; +int netdev_num; +int netdev_given; +struct ovs_mutex netdev_lock; Could you add a brief comment above these variables that describes their use? (It's also common in OVS code to mention that, eg, netdev_lock protects the following elements. ) If there's a more descriptive name than "netdev_num", like netdev_max_dumps or something then please use that instead. At a glance, "given" and "num" don't provide particularly much context about how they relate to each other or to the dump. }; static struct dpif_netlink_flow_dump * @@ -1363,6 +1371,34 @@ dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump) return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up); } +static void start_netdev_dump(const struct dpif *dpif_, + struct dpif_netlink_flow_dump *dump) { + +if (!netdev_flow_api_enabled) { +dump->netdev_num = 0; +return; +} Typically for style we still place all variable declarations at the top of a function, in a christmas tree long lines to short lines, before functional code like this. + +struct netdev_list_element *element; +struct ovs_list port_list; +int ports = netdev_hmap_port_get_list(dpif_->dpif_class, &port_list); +int i = 0; + +dump->netdev_dumps = +ports ? xzalloc(sizeof(struct netdev_flow_dump *) * ports) : 0; Can this be sizeof(dump->netdev_dumps)? +dump->netdev_num = ports; +dump->netdev_given = 0; + +LIST_FOR_EACH(element, node, &port_list) { +dump->netdev_dumps[i] = netdev_flow_dump_create(element->netdev); +dump->netdev_dumps[i]->port = element->port_no; +i++; +} As a matter of style, it's easier to see that this loop is bounded by 'ports' (and that number is correct) if it's structured as for (i = 0; i < ports; i++) { element = get_next_node; ... } Also, it seems that even if the netdev doesn't support flow_dump, we allocate a netdev_flow_dump and add it to the netdev_dumps here.. perhaps we could/should skip it for these netdevs instead? +netdev_port_list_del(&port_list); + +ovs_mutex_init(&dump->netdev_lock); I don't see a corresponding ovs_mutex_destroy() call for this. +} + static struct dpif_flow_dump * dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse) { @@ -1387,6 +1423,8 @@ dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse) atomic_init(&dump->status, 0); dump->up.terse = terse; +start_netdev_dump(dpif_, dump); + return &dump->up; } @@ -1397,6 +1435,16 @@ dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_) unsigned int nl_status = nl_dump_done(&dump->nl_dump); int dump_status; +if (netdev_flow_api_enabled) { +for (int i = 0; i < dump->netdev_num; i++) { +int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]); +if (err != 0 && err != EOPNOTSUPP) { +VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err)); +} +} +free(dump->netdev_dumps); +} You don't really need to check for netdev_flow_api_enabled here; netdev_num will be 0 if it is disabled, so that for loop turns into a no-op; then you could initialize dump->netdev_dumps to NULL in that case and unconditionally free it. It's a bit simpler to read the code if you don't have to think about whether or not hardware offloads are enabled. + /* No other thread has access to 'dump' at this point. */ atomic_read_relaxed(&dump->status, &dump_status); free(dump); @@ -1410,6 +1458,11 @@ struct dpif_netlink_flow_dump_thread { struct dpif_flow_stats stats; struct ofpbuf nl_fl
Re: [ovs-dev] [PATCH ovs V2 09/21] dpif-netlink: Dump netdevs flows on flow dump
On 10 January 2017 at 03:45, Paul Blakey wrote: >>> +struct netdev_list_element *element; >>> +struct ovs_list port_list; >>> +int ports = netdev_hmap_port_get_list(dpif_->dpif_class, >>> &port_list); >>> +int i = 0; >>> + >>> +dump->netdev_dumps = >>> +ports ? xzalloc(sizeof(struct netdev_flow_dump *) * ports) : 0; >> >> Can this be sizeof(dump->netdev_dumps)? > > Do you mean sizeof(*dump-netdev_dumps), or sizeof(dump->netdev_dumps[0]), if > so yes. Yes that's what I meant. >>> @@ -1466,6 +1521,90 @@ dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, >>> struct dpif_flow *dpif_flow, >>> dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats); >>> } >>> >>> +static void >>> +dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread >>> *thread) >>> +{ >>> +struct dpif_netlink_flow_dump *dump = thread->dump; >>> + >>> +ovs_mutex_lock(&dump->netdev_lock); >>> +/* if we haven't finished (dumped everything) */ >>> +if (dump->netdev_given < dump->netdev_num) { >>> +/* if we are the first to find that given dump is finished >>> + * (for race condition, e.g 3 finish dump 0 at the same time) */ >> >> Why is there a race condition here if this is executed under netdev_lock? > > The design is such that all threads are working together on the first dump > to the last, in order. (at first they all on dump 0), > and when one thread finds that the given dump is finished, they all move to > the next. > As the comment tried to explain, if 3 (or 2+) threads are working on the > first dump, dump 0, > (thread->netdev_cur_dump == 0) and finish at the same time, they all call > advance func. > Now the first one to get the lock advances the shared given dump, which > signify which highest dump we have given > (and all lower dumps have finished). The rest now enter and we check if the > dump they have found to be > finished is higher then the new one that was given, if not they catch up, so > now all of them will work on dump 1. > > The race is that if 2 or more threads worked on the same dump and finished > at the same time, > if we just increased netdev_given without checking (thread->cur == given) > for both of them, > we would have increased given twice and skip one dump. Thanks for the explanation. I think that the code would benefit from having this written somewhere - for instance, above the dpif_netlink_advance_netdev_dump() function. ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH ovs V2 09/21] dpif-netlink: Dump netdevs flows on flow dump
On 05/01/2017 23:27, Joe Stringer wrote: On 25 December 2016 at 03:39, Paul Blakey wrote: While dumping flows, dump flows that were offloaded to netdev and parse them back to dpif flow. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan --- lib/dpif-netlink.c | 179 + 1 file changed, 179 insertions(+) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 36f2888..3d8940e 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -38,6 +38,7 @@ #include "flow.h" #include "fat-rwlock.h" #include "netdev.h" +#include "netdev-provider.h" #include "netdev-linux.h" #include "netdev-vport.h" #include "netlink-conntrack.h" @@ -55,6 +56,7 @@ #include "unaligned.h" #include "util.h" #include "openvswitch/vlog.h" +#include "openvswitch/match.h" VLOG_DEFINE_THIS_MODULE(dpif_netlink); #ifdef _WIN32 @@ -68,6 +70,8 @@ enum { MAX_PORTS = USHRT_MAX }; * missing if we have old headers. */ #define ETH_FLAG_LRO (1 << 15)/* LRO is enabled */ +#define FLOW_DUMP_MAX_BATCH 50 + struct dpif_netlink_dp { /* Generic Netlink header. */ uint8_t cmd; @@ -1355,6 +1359,10 @@ struct dpif_netlink_flow_dump { struct dpif_flow_dump up; struct nl_dump nl_dump; atomic_int status; +struct netdev_flow_dump **netdev_dumps; +int netdev_num; +int netdev_given; +struct ovs_mutex netdev_lock; Could you add a brief comment above these variables that describes their use? (It's also common in OVS code to mention that, eg, netdev_lock protects the following elements. ) If there's a more descriptive name than "netdev_num", like netdev_max_dumps or something then please use that instead. At a glance, "given" and "num" don't provide particularly much context about how they relate to each other or to the dump. sure, thanks. }; static struct dpif_netlink_flow_dump * @@ -1363,6 +1371,34 @@ dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump) return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up); } +static void start_netdev_dump(const struct dpif *dpif_, + struct dpif_netlink_flow_dump *dump) { + +if (!netdev_flow_api_enabled) { +dump->netdev_num = 0; +return; +} Typically for style we still place all variable declarations at the top of a function, in a christmas tree long lines to short lines, before functional code like this. + +struct netdev_list_element *element; +struct ovs_list port_list; +int ports = netdev_hmap_port_get_list(dpif_->dpif_class, &port_list); +int i = 0; + +dump->netdev_dumps = +ports ? xzalloc(sizeof(struct netdev_flow_dump *) * ports) : 0; Can this be sizeof(dump->netdev_dumps)? Do you mean sizeof(*dump-netdev_dumps), or sizeof(dump->netdev_dumps[0]), if so yes. +dump->netdev_num = ports; +dump->netdev_given = 0; + +LIST_FOR_EACH(element, node, &port_list) { +dump->netdev_dumps[i] = netdev_flow_dump_create(element->netdev); +dump->netdev_dumps[i]->port = element->port_no; +i++; +} As a matter of style, it's easier to see that this loop is bounded by 'ports' (and that number is correct) if it's structured as for (i = 0; i < ports; i++) { element = get_next_node; ... } Also, it seems that even if the netdev doesn't support flow_dump, we allocate a netdev_flow_dump and add it to the netdev_dumps here.. perhaps we could/should skip it for these netdevs instead? +netdev_port_list_del(&port_list); + +ovs_mutex_init(&dump->netdev_lock); I don't see a corresponding ovs_mutex_destroy() call for this. Good catch, thanks. +} + static struct dpif_flow_dump * dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse) { @@ -1387,6 +1423,8 @@ dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse) atomic_init(&dump->status, 0); dump->up.terse = terse; +start_netdev_dump(dpif_, dump); + return &dump->up; } @@ -1397,6 +1435,16 @@ dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_) unsigned int nl_status = nl_dump_done(&dump->nl_dump); int dump_status; +if (netdev_flow_api_enabled) { +for (int i = 0; i < dump->netdev_num; i++) { +int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]); +if (err != 0 && err != EOPNOTSUPP) { +VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err)); +} +} +free(dump->netdev_dumps); +} You don't really need to check for netdev_flow_api_enabled here; netdev_num will be 0 if it is disabled, so that for loop turns into a no-op; then you could initialize dump->netdev_dumps to NULL in that case and unconditionally free it. It's a bit simpler to read the code if you don't have to think about whether or not hardware offloads are enabled. + /* No other thread has access to 'dump' at this point. */ atomic_read_relaxed(&dump->status,
Re: [ovs-dev] [PATCH ovs V2 09/21] dpif-netlink: Dump netdevs flows on flow dump
On 25 December 2016 at 03:39, Paul Blakey wrote: > While dumping flows, dump flows that were offloaded to > netdev and parse them back to dpif flow. > > Signed-off-by: Paul Blakey > Reviewed-by: Roi Dayan > --- > lib/dpif-netlink.c | 179 > + > 1 file changed, 179 insertions(+) > > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c > index 36f2888..3d8940e 100644 > --- a/lib/dpif-netlink.c > +++ b/lib/dpif-netlink.c > @@ -38,6 +38,7 @@ > #include "flow.h" > #include "fat-rwlock.h" > #include "netdev.h" > +#include "netdev-provider.h" > #include "netdev-linux.h" > #include "netdev-vport.h" > #include "netlink-conntrack.h" > @@ -55,6 +56,7 @@ > #include "unaligned.h" > #include "util.h" > #include "openvswitch/vlog.h" > +#include "openvswitch/match.h" > > VLOG_DEFINE_THIS_MODULE(dpif_netlink); > #ifdef _WIN32 > @@ -68,6 +70,8 @@ enum { MAX_PORTS = USHRT_MAX }; > * missing if we have old headers. */ > #define ETH_FLAG_LRO (1 << 15)/* LRO is enabled */ > > +#define FLOW_DUMP_MAX_BATCH 50 > + > struct dpif_netlink_dp { > /* Generic Netlink header. */ > uint8_t cmd; > @@ -1355,6 +1359,10 @@ struct dpif_netlink_flow_dump { > struct dpif_flow_dump up; > struct nl_dump nl_dump; > atomic_int status; > +struct netdev_flow_dump **netdev_dumps; > +int netdev_num; > +int netdev_given; > +struct ovs_mutex netdev_lock; Could you add a brief comment above these variables that describes their use? (It's also common in OVS code to mention that, eg, netdev_lock protects the following elements. ) If there's a more descriptive name than "netdev_num", like netdev_max_dumps or something then please use that instead. At a glance, "given" and "num" don't provide particularly much context about how they relate to each other or to the dump. > }; > > static struct dpif_netlink_flow_dump * > @@ -1363,6 +1371,34 @@ dpif_netlink_flow_dump_cast(struct dpif_flow_dump > *dump) > return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up); > } > > +static void start_netdev_dump(const struct dpif *dpif_, > + struct dpif_netlink_flow_dump *dump) { > + > +if (!netdev_flow_api_enabled) { > +dump->netdev_num = 0; > +return; > +} Typically for style we still place all variable declarations at the top of a function, in a christmas tree long lines to short lines, before functional code like this. > + > +struct netdev_list_element *element; > +struct ovs_list port_list; > +int ports = netdev_hmap_port_get_list(dpif_->dpif_class, &port_list); > +int i = 0; > + > +dump->netdev_dumps = > +ports ? xzalloc(sizeof(struct netdev_flow_dump *) * ports) : 0; Can this be sizeof(dump->netdev_dumps)? > +dump->netdev_num = ports; > +dump->netdev_given = 0; > + > +LIST_FOR_EACH(element, node, &port_list) { > +dump->netdev_dumps[i] = netdev_flow_dump_create(element->netdev); > +dump->netdev_dumps[i]->port = element->port_no; > +i++; > +} As a matter of style, it's easier to see that this loop is bounded by 'ports' (and that number is correct) if it's structured as for (i = 0; i < ports; i++) { element = get_next_node; ... } Also, it seems that even if the netdev doesn't support flow_dump, we allocate a netdev_flow_dump and add it to the netdev_dumps here.. perhaps we could/should skip it for these netdevs instead? > +netdev_port_list_del(&port_list); > + > +ovs_mutex_init(&dump->netdev_lock); I don't see a corresponding ovs_mutex_destroy() call for this. > +} > + > static struct dpif_flow_dump * > dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse) > { > @@ -1387,6 +1423,8 @@ dpif_netlink_flow_dump_create(const struct dpif *dpif_, > bool terse) > atomic_init(&dump->status, 0); > dump->up.terse = terse; > > +start_netdev_dump(dpif_, dump); > + > return &dump->up; > } > > @@ -1397,6 +1435,16 @@ dpif_netlink_flow_dump_destroy(struct dpif_flow_dump > *dump_) > unsigned int nl_status = nl_dump_done(&dump->nl_dump); > int dump_status; > > +if (netdev_flow_api_enabled) { > +for (int i = 0; i < dump->netdev_num; i++) { > +int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]); > +if (err != 0 && err != EOPNOTSUPP) { > +VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err)); > +} > +} > +free(dump->netdev_dumps); > +} You don't really need to check for netdev_flow_api_enabled here; netdev_num will be 0 if it is disabled, so that for loop turns into a no-op; then you could initialize dump->netdev_dumps to NULL in that case and unconditionally free it. It's a bit simpler to read the code if you don't have to think about whether or not hardware offloads are enabled. > + > /* No other thread has access to 'dump' at this point. */ > atomic_read_relaxed(&dump->
[ovs-dev] [PATCH ovs V2 09/21] dpif-netlink: Dump netdevs flows on flow dump
While dumping flows, dump flows that were offloaded to netdev and parse them back to dpif flow. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan --- lib/dpif-netlink.c | 179 + 1 file changed, 179 insertions(+) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 36f2888..3d8940e 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -38,6 +38,7 @@ #include "flow.h" #include "fat-rwlock.h" #include "netdev.h" +#include "netdev-provider.h" #include "netdev-linux.h" #include "netdev-vport.h" #include "netlink-conntrack.h" @@ -55,6 +56,7 @@ #include "unaligned.h" #include "util.h" #include "openvswitch/vlog.h" +#include "openvswitch/match.h" VLOG_DEFINE_THIS_MODULE(dpif_netlink); #ifdef _WIN32 @@ -68,6 +70,8 @@ enum { MAX_PORTS = USHRT_MAX }; * missing if we have old headers. */ #define ETH_FLAG_LRO (1 << 15)/* LRO is enabled */ +#define FLOW_DUMP_MAX_BATCH 50 + struct dpif_netlink_dp { /* Generic Netlink header. */ uint8_t cmd; @@ -1355,6 +1359,10 @@ struct dpif_netlink_flow_dump { struct dpif_flow_dump up; struct nl_dump nl_dump; atomic_int status; +struct netdev_flow_dump **netdev_dumps; +int netdev_num; +int netdev_given; +struct ovs_mutex netdev_lock; }; static struct dpif_netlink_flow_dump * @@ -1363,6 +1371,34 @@ dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump) return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up); } +static void start_netdev_dump(const struct dpif *dpif_, + struct dpif_netlink_flow_dump *dump) { + +if (!netdev_flow_api_enabled) { +dump->netdev_num = 0; +return; +} + +struct netdev_list_element *element; +struct ovs_list port_list; +int ports = netdev_hmap_port_get_list(dpif_->dpif_class, &port_list); +int i = 0; + +dump->netdev_dumps = +ports ? xzalloc(sizeof(struct netdev_flow_dump *) * ports) : 0; +dump->netdev_num = ports; +dump->netdev_given = 0; + +LIST_FOR_EACH(element, node, &port_list) { +dump->netdev_dumps[i] = netdev_flow_dump_create(element->netdev); +dump->netdev_dumps[i]->port = element->port_no; +i++; +} +netdev_port_list_del(&port_list); + +ovs_mutex_init(&dump->netdev_lock); +} + static struct dpif_flow_dump * dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse) { @@ -1387,6 +1423,8 @@ dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse) atomic_init(&dump->status, 0); dump->up.terse = terse; +start_netdev_dump(dpif_, dump); + return &dump->up; } @@ -1397,6 +1435,16 @@ dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_) unsigned int nl_status = nl_dump_done(&dump->nl_dump); int dump_status; +if (netdev_flow_api_enabled) { +for (int i = 0; i < dump->netdev_num; i++) { +int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]); +if (err != 0 && err != EOPNOTSUPP) { +VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err)); +} +} +free(dump->netdev_dumps); +} + /* No other thread has access to 'dump' at this point. */ atomic_read_relaxed(&dump->status, &dump_status); free(dump); @@ -1410,6 +1458,11 @@ struct dpif_netlink_flow_dump_thread { struct dpif_flow_stats stats; struct ofpbuf nl_flows; /* Always used to store flows. */ struct ofpbuf *nl_actions; /* Used if kernel does not supply actions. */ +struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH]; +struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH]; +struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH]; +int netdev_cur_dump; +bool netdev_done; }; static struct dpif_netlink_flow_dump_thread * @@ -1429,6 +1482,8 @@ dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_) thread->dump = dump; ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE); thread->nl_actions = NULL; +thread->netdev_cur_dump = 0; +thread->netdev_done = !(thread->netdev_cur_dump < dump->netdev_num); return &thread->up; } @@ -1466,6 +1521,90 @@ dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, struct dpif_flow *dpif_flow, dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats); } +static void +dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread *thread) +{ +struct dpif_netlink_flow_dump *dump = thread->dump; + +ovs_mutex_lock(&dump->netdev_lock); +/* if we haven't finished (dumped everything) */ +if (dump->netdev_given < dump->netdev_num) { +/* if we are the first to find that given dump is finished + * (for race condition, e.g 3 finish dump 0 at the same time) */ +if (thread->netdev_cur_dump == dump->netdev_given) { +thread->netdev_cur_dump = ++dump->netdev_given; +/* did we just finish the last dump? done. */ +