[ovs-dev] [PATCH v2] tnl-ports: Remove netdevs in netdev_hash when deleted

2017-06-13 Thread fukaige
Start a virtual machine with its backend tap device attached to a brought up 
linux bridge.
If we delete the linux bridge when vm is still running, we'll get the following 
error when
trying to create a ovs bridge with the same name.

The reason is that ovs-router subsystem add the linux bridge into netdev_shash, 
but does
not remove it when the bridge is deleted in the situation. When the bridge is 
deleted, ovs
will receive a RTM_DELLINK msg, take this chance to remove the bridge in 
netdev_shash.

ovs-vsctl: Error detected while setting up 'br-eth'.  See ovs-vswitchd log for 
details.

ovs-vswitchd log:
2017-05-11T03:45:25.293Z|00026|ofproto_dpif|INFO|system@ovs-system: Datapath 
supports recirculation
2017-05-11T03:45:25.293Z|00027|ofproto_dpif|INFO|system@ovs-system: MPLS label 
stack length probed as 1
2017-05-11T03:45:25.293Z|00028|ofproto_dpif|INFO|system@ovs-system: Datapath 
supports unique flow ids
2017-05-11T03:45:25.293Z|00029|ofproto_dpif|INFO|system@ovs-system: Datapath 
supports ct_state
2017-05-11T03:45:25.293Z|00030|ofproto_dpif|INFO|system@ovs-system: Datapath 
supports ct_zone
2017-05-11T03:45:25.293Z|00031|ofproto_dpif|INFO|system@ovs-system: Datapath 
supports ct_mark
2017-05-11T03:45:25.293Z|00032|ofproto_dpif|INFO|system@ovs-system: Datapath 
supports ct_label
2017-05-11T03:45:25.364Z|1|ofproto_dpif_upcall(handler226)|INFO|received 
packet on unassociated datapath port 0
2017-05-11T03:45:25.368Z|00033|netdev_linux|WARN|ethtool command ETHTOOL_GFLAGS 
on network device br-eth failed: No such device
2017-05-11T03:45:25.368Z|00034|dpif|WARN|system@ovs-system: failed to add 
br-eth as port: No such device
2017-05-11T03:45:25.368Z|00035|bridge|INFO|bridge br-eth: using datapath ID 
2a51cf9f2841
2017-05-11T03:45:25.368Z|00036|connmgr|INFO|br-eth: added service controller 
"punix:/var/run/openvswitch/br-eth.mgmt"

Signed-off-by: fukaige 
---
 lib/tnl-ports.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/lib/tnl-ports.c b/lib/tnl-ports.c
index bcf4b94..75b5909 100644
--- a/lib/tnl-ports.c
+++ b/lib/tnl-ports.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "classifier.h"
 #include "dynamic-string.h"
@@ -33,6 +34,7 @@
 #include "ovs-thread.h"
 #include "unixctl.h"
 #include "util.h"
+#include "rtnetlink.h"
 
 static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
 static struct classifier cls;   /* Tunnel ports. */
@@ -463,11 +465,19 @@ tnl_port_map_run(void)
 ovs_mutex_unlock();
 }
 
+static void
+rtnetlink_del_cb(const struct rtnetlink_change *change, void *aux OVS_UNUSED)
+{
+if(change->nlmsg_type == RTM_DELLINK)
+tnl_port_map_delete_ipdev(change->ifname);
+}
+
 void
 tnl_port_map_init(void)
 {
 classifier_init(, flow_segment_u64s);
 list_init(_list);
 list_init(_list);
+rtnetlink_notifier_create(rtnetlink_del_cb, NULL);
 unixctl_command_register("tnl/ports/show", "-v", 0, 1, tnl_port_show, 
NULL);
 }
-- 
1.8.3.1


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ovs-ofctl: Avoid read overrun in ofperr_decode_msg().

2017-06-13 Thread Lance Richardson
> From: "Ben Pfaff" 
> To: "Jarno Rajahalme" 
> Cc: d...@openvswitch.org, "Lance Richardson" 
> Sent: Tuesday, 13 June, 2017 8:10:22 PM
> Subject: Re: [PATCH] ovs-ofctl: Avoid read overrun in ofperr_decode_msg().
> 
> Thanks.
> 
> Lance, are you OK with this solution?

Fine with me, it does seem less error-prone to keep the whole packet.

Thanks,

   Lance
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] ovs-ofctl: New option "--no-stats" for "ovs-ofctl dump-flows".

2017-06-13 Thread Ben Pfaff
It's pretty common to want to omit statistics from output, to make it
easier to read.  This commit adds an ovs-ofctl option to make that easy.

A lot of the OVS internal tests could use this, too, in place of
ofctl_strip.  This commit adopts it for a subset.

CC: Aaron Conole 
Signed-off-by: Ben Pfaff 
---
 NEWS|  8 +++---
 include/openvswitch/ofp-print.h |  2 +-
 lib/ofp-print.c | 50 ++
 ovn/utilities/ovn-sbctl.c   |  6 ++---
 ovn/utilities/ovn-trace.c   | 10 +++
 tests/bundle.at |  3 +--
 tests/learn.at  | 59 ++---
 utilities/ovs-ofctl.8.in| 14 --
 utilities/ovs-ofctl.c   | 12 ++---
 9 files changed, 87 insertions(+), 77 deletions(-)

diff --git a/NEWS b/NEWS
index b526646810f8..bc39aab5c4d7 100644
--- a/NEWS
+++ b/NEWS
@@ -1,8 +1,10 @@
 Post-v2.7.0
 -
-   - ovs-ofctl can now accept and display port names in place of numbers.  By
- default it always accepts names and in interactive use it displays them;
- use --names or --no-names to override.  See ovs-ofctl(8) for details.
+   - ovs-ofctl:
+ * ovs-ofctl can now accept and display port names in place of numbers.  By
+   default it always accepts names and in interactive use it displays them;
+   use --names or --no-names to override.  See ovs-ofctl(8) for details.
+ * "ovs-ofctl dump-flows" now accepts --no-stats to omit flow statistics.
- Tunnels:
  * Added support to set packet mark for tunnel endpoint using
`egress_pkt_mark` OVSDB option.
diff --git a/include/openvswitch/ofp-print.h b/include/openvswitch/ofp-print.h
index 4893d44b4b48..20f049a37f65 100644
--- a/include/openvswitch/ofp-print.h
+++ b/include/openvswitch/ofp-print.h
@@ -61,7 +61,7 @@ void ofp_print_table_features(
 const struct ofputil_table_stats *prev_stats);
 
 void ofp_print_flow_stats(struct ds *, const struct ofputil_flow_stats *,
-  const struct ofputil_port_map *);
+  const struct ofputil_port_map *, bool show_stats);
 
 #ifdef  __cplusplus
 }
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 423df31027d9..b1c412ea4c21 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -1682,21 +1682,34 @@ ofp_print_flow_stats_request(struct ds *string, const 
struct ofp_header *oh,
 match_format(, port_map, string, OFP_DEFAULT_PRIORITY);
 }
 
+/* Appends a textual form of 'fs' to 'string', translating port numbers to
+ * names using 'port_map' (if provided).  If 'show_stats' is true, the output
+ * includes the flow duration, packet and byte counts, and its idle and hard
+ * ages, otherwise they are omitted. */
 void
 ofp_print_flow_stats(struct ds *string, const struct ofputil_flow_stats *fs,
- const struct ofputil_port_map *port_map)
+ const struct ofputil_port_map *port_map, bool show_stats)
 {
-ds_put_format(string, " %scookie=%s0x%"PRIx64", %sduration=%s",
-  colors.param, colors.end, ntohll(fs->cookie),
-  colors.param, colors.end);
-
-ofp_print_duration(string, fs->duration_sec, fs->duration_nsec);
-ds_put_format(string, ", %stable=%s%"PRIu8", ",
-  colors.special, colors.end, fs->table_id);
-ds_put_format(string, "%sn_packets=%s%"PRIu64", ",
-  colors.param, colors.end, fs->packet_count);
-ds_put_format(string, "%sn_bytes=%s%"PRIu64", ",
-  colors.param, colors.end, fs->byte_count);
+if (show_stats || fs->cookie) {
+ds_put_format(string, "%scookie=%s0x%"PRIx64", ",
+  colors.param, colors.end, ntohll(fs->cookie));
+}
+if (show_stats) {
+ds_put_format(string, "%sduration=%s", colors.param, colors.end);
+ofp_print_duration(string, fs->duration_sec, fs->duration_nsec);
+ds_put_cstr(string, ", ");
+}
+
+if (show_stats || fs->table_id) {
+ds_put_format(string, "%stable=%s%"PRIu8", ",
+  colors.special, colors.end, fs->table_id);
+}
+if (show_stats) {
+ds_put_format(string, "%sn_packets=%s%"PRIu64", ",
+  colors.param, colors.end, fs->packet_count);
+ds_put_format(string, "%sn_bytes=%s%"PRIu64", ",
+  colors.param, colors.end, fs->byte_count);
+}
 if (fs->idle_timeout != OFP_FLOW_PERMANENT) {
 ds_put_format(string, "%sidle_timeout=%s%"PRIu16", ",
   colors.param, colors.end, fs->idle_timeout);
@@ -1712,17 +1725,20 @@ ofp_print_flow_stats(struct ds *string, const struct 
ofputil_flow_stats *fs,
 ds_put_format(string, "%simportance=%s%"PRIu16", ",
   colors.param, colors.end, fs->importance);
 }
-if (fs->idle_age >= 0) {
+if (show_stats && fs->idle_age >= 0) {
 

Re: [ovs-dev] [PATCH] ovs-ofctl: Avoid read overrun in ofperr_decode_msg().

2017-06-13 Thread Ben Pfaff
Thanks.

Lance, are you OK with this solution?

On Tue, Jun 13, 2017 at 04:23:01PM -0700, Jarno Rajahalme wrote:
> Seems like I leaped from the fact that error message’s payload must contain 
> at least 64 bytes of the message causing the error (or, less, if the message 
> length was less than 64), to the erroneous notion that the whole error 
> message would only need 64 bytes of storage. Thanks for fixing this.
> 
> Acked-by: Jarno Rajahlame >
> 
> > On Jun 13, 2017, at 4:04 PM, Ben Pfaff  wrote:
> > 
> > vconn_add_bundle_error() was keeping at most 64 bytes of an OpenFlow
> > error message, then it was passing it to ofperr_decode_msg(), which assumed
> > that the full message was available.  This led to a buffer overread.
> > There's no good reason why it was only keeping the first 64 bytes, so this
> > commit changes it to keep the whole error message, sidestepping the
> > problem.
> > 
> > struct vconn_bundle_error only existed for this special case, so remove it
> > in favor of a chain of ofpbufs.
> > 
> > Found via gcc's address sanitizer.
> > 
> > Reported-by: Lance Richardson 
> > CC: Jarno Rajahalme 
> > Fixes: 506c1ddb3404 ("vconn: Better bundle error management.")
> > Signed-off-by: Ben Pfaff 
> > ---
> > include/openvswitch/vconn.h | 12 
> > lib/vconn.c | 25 -
> > utilities/ovs-ofctl.c   | 10 ++
> > 3 files changed, 14 insertions(+), 33 deletions(-)
> > 
> > diff --git a/include/openvswitch/vconn.h b/include/openvswitch/vconn.h
> > index 40ca9edfe868..90f9bad2c1c9 100644
> > --- a/include/openvswitch/vconn.h
> > +++ b/include/openvswitch/vconn.h
> > @@ -61,18 +61,6 @@ int vconn_dump_flows(struct vconn *, const struct 
> > ofputil_flow_stats_request *,
> >  enum ofputil_protocol,
> >  struct ofputil_flow_stats **fsesp, size_t *n_fsesp);
> > 
> > -/* Bundle errors must be free()d by the caller. */
> > -struct vconn_bundle_error {
> > -struct ovs_list list_node;
> > -
> > -/* OpenFlow header and some of the message contents for error 
> > reporting. */
> > -union {
> > -struct ofp_header ofp_msg;
> > -uint8_t ofp_msg_data[64];
> > -};
> > -};
> > -
> > -/* Bundle errors must be free()d by the caller. */
> > int vconn_bundle_transact(struct vconn *, struct ovs_list *requests,
> >   uint16_t bundle_flags,
> >   struct ovs_list *errors);
> > diff --git a/lib/vconn.c b/lib/vconn.c
> > index 6997eaa96e2c..8a9f0ca8fa96 100644
> > --- a/lib/vconn.c
> > +++ b/lib/vconn.c
> > @@ -744,18 +744,6 @@ vconn_recv_block(struct vconn *vconn, struct ofpbuf 
> > **msgp)
> > return retval;
> > }
> > 
> > -static void
> > -vconn_add_bundle_error(const struct ofp_header *oh, struct ovs_list 
> > *errors)
> > -{
> > -if (errors) {
> > -struct vconn_bundle_error *err = xmalloc(sizeof *err);
> > -size_t len = ntohs(oh->length);
> > -
> > -memcpy(err->ofp_msg_data, oh, MIN(len, sizeof err->ofp_msg_data));
> > -ovs_list_push_back(errors, >list_node);
> > -}
> > -}
> > -
> > static int
> > vconn_recv_xid__(struct vconn *vconn, ovs_be32 xid, struct ofpbuf **replyp,
> >  struct ovs_list *errors)
> > @@ -781,13 +769,13 @@ vconn_recv_xid__(struct vconn *vconn, ovs_be32 xid, 
> > struct ofpbuf **replyp,
> > 
> > error = ofptype_decode(, oh);
> > if (!error && type == OFPTYPE_ERROR) {
> > -vconn_add_bundle_error(oh, errors);
> > +ovs_list_push_back(errors, >list_node);
> > } else {
> > VLOG_DBG_RL(_ofmsg_rl, "%s: received reply with xid 
> > %08"PRIx32
> > " != expected %08"PRIx32,
> > vconn->name, ntohl(recv_xid), ntohl(xid));
> > +ofpbuf_delete(reply);
> > }
> > -ofpbuf_delete(reply);
> > }
> > }
> > 
> > @@ -1078,7 +1066,8 @@ vconn_bundle_reply_validate(struct ofpbuf *reply,
> > }
> > 
> > if (type == OFPTYPE_ERROR) {
> > -vconn_add_bundle_error(oh, errors);
> > +struct ofpbuf *copy = ofpbuf_clone(reply);
> > +ovs_list_push_back(errors, >list_node);
> > return ofperr_decode_msg(oh, NULL);
> > }
> > if (type != OFPTYPE_BUNDLE_CONTROL) {
> > @@ -1150,13 +1139,13 @@ vconn_recv_error(struct vconn *vconn, struct 
> > ovs_list *errors)
> > oh = reply->data;
> > ofperr = ofptype_decode(, oh);
> > if (!ofperr && type == OFPTYPE_ERROR) {
> > -vconn_add_bundle_error(oh, errors);
> > +ovs_list_push_back(errors, >list_node);
> > } else {
> > VLOG_DBG_RL(_ofmsg_rl,
> > "%s: received unexpected reply with xid 
> > %08"PRIx32,
> > vconn->name, ntohl(oh->xid));
> > +

Re: [ovs-dev] [PATCH] ovs-ofctl: Avoid read overrun in ofperr_decode_msg().

2017-06-13 Thread Jarno Rajahalme
Seems like I leaped from the fact that error message’s payload must contain at 
least 64 bytes of the message causing the error (or, less, if the message 
length was less than 64), to the erroneous notion that the whole error message 
would only need 64 bytes of storage. Thanks for fixing this.

Acked-by: Jarno Rajahlame >

> On Jun 13, 2017, at 4:04 PM, Ben Pfaff  wrote:
> 
> vconn_add_bundle_error() was keeping at most 64 bytes of an OpenFlow
> error message, then it was passing it to ofperr_decode_msg(), which assumed
> that the full message was available.  This led to a buffer overread.
> There's no good reason why it was only keeping the first 64 bytes, so this
> commit changes it to keep the whole error message, sidestepping the
> problem.
> 
> struct vconn_bundle_error only existed for this special case, so remove it
> in favor of a chain of ofpbufs.
> 
> Found via gcc's address sanitizer.
> 
> Reported-by: Lance Richardson 
> CC: Jarno Rajahalme 
> Fixes: 506c1ddb3404 ("vconn: Better bundle error management.")
> Signed-off-by: Ben Pfaff 
> ---
> include/openvswitch/vconn.h | 12 
> lib/vconn.c | 25 -
> utilities/ovs-ofctl.c   | 10 ++
> 3 files changed, 14 insertions(+), 33 deletions(-)
> 
> diff --git a/include/openvswitch/vconn.h b/include/openvswitch/vconn.h
> index 40ca9edfe868..90f9bad2c1c9 100644
> --- a/include/openvswitch/vconn.h
> +++ b/include/openvswitch/vconn.h
> @@ -61,18 +61,6 @@ int vconn_dump_flows(struct vconn *, const struct 
> ofputil_flow_stats_request *,
>  enum ofputil_protocol,
>  struct ofputil_flow_stats **fsesp, size_t *n_fsesp);
> 
> -/* Bundle errors must be free()d by the caller. */
> -struct vconn_bundle_error {
> -struct ovs_list list_node;
> -
> -/* OpenFlow header and some of the message contents for error reporting. 
> */
> -union {
> -struct ofp_header ofp_msg;
> -uint8_t ofp_msg_data[64];
> -};
> -};
> -
> -/* Bundle errors must be free()d by the caller. */
> int vconn_bundle_transact(struct vconn *, struct ovs_list *requests,
>   uint16_t bundle_flags,
>   struct ovs_list *errors);
> diff --git a/lib/vconn.c b/lib/vconn.c
> index 6997eaa96e2c..8a9f0ca8fa96 100644
> --- a/lib/vconn.c
> +++ b/lib/vconn.c
> @@ -744,18 +744,6 @@ vconn_recv_block(struct vconn *vconn, struct ofpbuf 
> **msgp)
> return retval;
> }
> 
> -static void
> -vconn_add_bundle_error(const struct ofp_header *oh, struct ovs_list *errors)
> -{
> -if (errors) {
> -struct vconn_bundle_error *err = xmalloc(sizeof *err);
> -size_t len = ntohs(oh->length);
> -
> -memcpy(err->ofp_msg_data, oh, MIN(len, sizeof err->ofp_msg_data));
> -ovs_list_push_back(errors, >list_node);
> -}
> -}
> -
> static int
> vconn_recv_xid__(struct vconn *vconn, ovs_be32 xid, struct ofpbuf **replyp,
>  struct ovs_list *errors)
> @@ -781,13 +769,13 @@ vconn_recv_xid__(struct vconn *vconn, ovs_be32 xid, 
> struct ofpbuf **replyp,
> 
> error = ofptype_decode(, oh);
> if (!error && type == OFPTYPE_ERROR) {
> -vconn_add_bundle_error(oh, errors);
> +ovs_list_push_back(errors, >list_node);
> } else {
> VLOG_DBG_RL(_ofmsg_rl, "%s: received reply with xid %08"PRIx32
> " != expected %08"PRIx32,
> vconn->name, ntohl(recv_xid), ntohl(xid));
> +ofpbuf_delete(reply);
> }
> -ofpbuf_delete(reply);
> }
> }
> 
> @@ -1078,7 +1066,8 @@ vconn_bundle_reply_validate(struct ofpbuf *reply,
> }
> 
> if (type == OFPTYPE_ERROR) {
> -vconn_add_bundle_error(oh, errors);
> +struct ofpbuf *copy = ofpbuf_clone(reply);
> +ovs_list_push_back(errors, >list_node);
> return ofperr_decode_msg(oh, NULL);
> }
> if (type != OFPTYPE_BUNDLE_CONTROL) {
> @@ -1150,13 +1139,13 @@ vconn_recv_error(struct vconn *vconn, struct ovs_list 
> *errors)
> oh = reply->data;
> ofperr = ofptype_decode(, oh);
> if (!ofperr && type == OFPTYPE_ERROR) {
> -vconn_add_bundle_error(oh, errors);
> +ovs_list_push_back(errors, >list_node);
> } else {
> VLOG_DBG_RL(_ofmsg_rl,
> "%s: received unexpected reply with xid 
> %08"PRIx32,
> vconn->name, ntohl(oh->xid));
> +ofpbuf_delete(reply);
> }
> -ofpbuf_delete(reply);
> }
> } while (!error);
> }
> @@ -1209,6 +1198,8 @@ vconn_bundle_add_msg(struct vconn *vconn, struct 
> ofputil_bundle_ctrl_msg *bc,
> return error;
> }
> 
> +/* Appends ofpbufs for received errors, if any, to 'errors'.  The caller must
> + * free the 

[ovs-dev] [PATCH 4/5] dpif-netdev: Add adaptive CD mechanism

2017-06-13 Thread yipeng1 . wang
From: Yipeng Wang 

When there are only one subtable in the megaflow cache, CD does not benefit.
In such case, CD actually hurts the performance because of the extra CD lookup
process.

This patch implements an adaptive turn on/off CD mechanism. The average
iterated subtable count will be collected for every 5 seconds, and depending
on this count, CD will be turned on or off during run-time.

This patch depends on previous patches.

Signed-off-by: Yipeng Wang 
Signed-off-by: Charlie Tai 
Co-authored-by: Charlie Tai 
Signed-off-by: Sameh Gobriel 
Co-authored-by: Sameh Gobriel 
Signed-off-by: Ren Wang 
Co-authored-by: Ren Wang 
Signed-off-by: Antonio Fischetti 
Co-authored-by: Antonio Fischetti 
---
 lib/dpif-netdev.c | 160 +++---
 1 file changed, 105 insertions(+), 55 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 691fbad..0f71099 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -220,6 +220,9 @@ typedef uint16_t cd_sig_t;
  */
 #define SUB_PTR_LEN 256
 
+/* Time in ms between the decisions of turning on or off CD. */
+#define DPCLS_CD_OPTIMIZATION_INTERVAL 5000
+
 /* The bucket struct for cuckoo distributor*/
 struct cd_bucket {
 cd_sig_t sig[CD_ENTRIES];   /* 2-byte long signature. */
@@ -243,6 +246,7 @@ struct dpcls {
 struct cmap subtables_map;
 struct pvector subtables;
 struct cd_cache *cdtable;   /* The cuckoo distributor. */
+uint8_t cd_on;/* Turn on or off CD during runtime. */
 struct dpcls_subtable *sub_ptrs[SUB_PTR_LEN]; /* Subtable pointer array. */
 };
 
@@ -608,6 +612,8 @@ struct dp_netdev_pmd_thread {
 struct cmap classifiers;
 /* Periodically sort subtable vectors according to hit frequencies */
 long long int next_optimization;
+/* Periodically decide if we should turn on/off CD */
+long long int next_cd_optimization;
 
 /* Statistics. */
 struct dp_netdev_pmd_stats stats;
@@ -2784,12 +2790,14 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
 cmap_insert(>flow_table, CONST_CAST(struct cmap_node *, >node),
 dp_netdev_flow_hash(>ufid));
 /* Insert to CD here. */
-if (OVS_LIKELY(key != NULL)) {
-struct dpcls_subtable *subtable = dpcls_find_subtable(cls, );
-int index = find_index_in_sub_ptrs(cls, subtable);
+if (cls->cd_on) {
+if (OVS_LIKELY(key != NULL)) {
+struct dpcls_subtable *subtable = dpcls_find_subtable(cls, );
+int index = find_index_in_sub_ptrs(cls, subtable);
 
-if (index != 0) {
-cd_insert(cls->cdtable, key, index);
+if (index != 0) {
+cd_insert(cls->cdtable, key, index);
+}
 }
 }
 
@@ -5945,6 +5953,12 @@ struct dpcls_subtable {
 struct cmap rules;   /* Contains "struct dpcls_rule"s. */
 uint32_t hit_cnt;/* Number of match hits in subtable in current
 optimization interval. */
+uint32_t access_cnt; /* With CD implemented, hit_cnt should be subtable
+  * hits that miss in CD, so the ranking mechanism
+  * which is based on hit_cnt still works properly.
+  * We have the access_cnt as total access count to
+  * each subtable to consider if we should turn on
+  * or turn off CD. */
 struct netdev_flow_key mask; /* Wildcards for fields (const). */
 /* 'mask' must be the last field, additional space is allocated here. */
 };
@@ -5964,6 +5978,7 @@ dpcls_init(struct dpcls *cls)
 VLOG_ERR("Create cuckoo distributor failed");
 }
 cd_init(cls->cdtable);
+cls->cd_on = 1;
 for (i = 0; i < SUB_PTR_LEN; i++) {
 cls->sub_ptrs[i] = 0;
 }
@@ -6011,6 +6026,7 @@ dpcls_create_subtable(struct dpcls *cls, const struct 
netdev_flow_key *mask)
- sizeof subtable->mask.mf + mask->len);
 cmap_init(>rules);
 subtable->hit_cnt = 0;
+subtable->access_cnt = 0;
 netdev_flow_key_clone(>mask, mask);
 cmap_insert(>subtables_map, >cmap_node, mask->hash);
 /* Add the new subtable at the end of the pvector (with no hits yet) */
@@ -6073,6 +6089,34 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread 
*pmd)
 pmd->next_optimization = now + DPCLS_OPTIMIZATION_INTERVAL;
 }
 }
+if (now > pmd->next_cd_optimization) {
+CMAP_FOR_EACH (cls, node, >classifiers) {
+struct pvector *pvec = >subtables;
+struct dpcls_subtable *subtable;
+float avg_table_cnt = 0;
+int cnt = 0;
+uint32_t total = 0;
+uint32_t sum = 0;
+PVECTOR_FOR_EACH (subtable,pvec) {
+sum += subtable->access_cnt * cnt;
+total += subtable->access_cnt;
+

[ovs-dev] [PATCH 5/5] unit-test: Add a delay for CD initialization.

2017-06-13 Thread yipeng1 . wang
From: Yipeng Wang 

This patch adds a delay during test 1215 for considering CD
initialization time.

Signed-off-by: Yipeng Wang 
Signed-off-by: Charlie Tai 
Co-authored-by: Charlie Tai 
Signed-off-by: Sameh Gobriel 
Co-authored-by: Sameh Gobriel 
Signed-off-by: Ren Wang 
Co-authored-by: Ren Wang 
Signed-off-by: Antonio Fischetti 
Co-authored-by: Antonio Fischetti 
---
 tests/ofproto-dpif.at | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index e222866..8b850a4 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -9494,6 +9494,9 @@ AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
 dnl Start a new connection from port 1.
 AT_CHECK([ovs-appctl netdev-dummy/receive p1 
'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=1,dst=2)'])
 
+# cuckoo distributor requires time for initilization, add sleep
+sleep 2
+
 AT_CHECK([cat ovs-vswitchd.log | strip_ufid | filter_flow_install], [0], [dnl
 recirc_id(0),in_port(1),eth_type(0x0800),ipv4(proto=17,frag=no), 
actions:ct(commit)
 ])
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 3/5] dpif-netdev: Add CD statistics

2017-06-13 Thread yipeng1 . wang
From: Yipeng Wang 

This patch adds CD hit and miss statistics to dp_stat_type. PMD stats will
show the total CD hit and miss counts. This patch depends on the first patch.

Signed-off-by: Yipeng Wang 
Signed-off-by: Charlie Tai 
Co-authored-by: Charlie Tai 
Signed-off-by: Sameh Gobriel 
Co-authored-by: Sameh Gobriel 
Signed-off-by: Ren Wang 
Co-authored-by: Ren Wang 
Signed-off-by: Antonio Fischetti 
Co-authored-by: Antonio Fischetti 
---
 lib/dpif-netdev.c | 36 ++--
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 23b3e42..691fbad 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -263,7 +263,7 @@ static void dpcls_remove(struct dpcls *, struct dpcls_rule 
*);
 static bool dpcls_lookup(struct dpcls *cls,
  const struct netdev_flow_key keys[],
  struct dpcls_rule **rules, size_t cnt,
- int *num_lookups_p);
+ int *num_lookups_p, int *cd_hit);
 static inline struct dpcls_subtable *
 dpcls_find_subtable(struct dpcls *cls, const struct netdev_flow_key *mask);
 
@@ -387,6 +387,8 @@ enum dp_stat_type {
 DP_STAT_LOST,   /* Packets not passed up to the client. */
 DP_STAT_LOOKUP_HIT, /* Number of subtable lookups for flow table
hits */
+DP_STAT_CD_HIT, /* Packets that hit CD. */
+DP_STAT_CD_MISS,/* Packets that miss CD. */
 DP_N_STATS
 };
 
@@ -848,7 +850,8 @@ pmd_info_show_stats(struct ds *reply,
 stats[i] = 0;
 }
 
-if (i != DP_STAT_LOST) {
+if (i != DP_STAT_LOST && i != DP_STAT_LOOKUP_HIT
+&& i != DP_STAT_CD_MISS && i != DP_STAT_CD_HIT) {
 /* Lost packets are already included in DP_STAT_MISS */
 total_packets += stats[i];
 }
@@ -885,6 +888,10 @@ pmd_info_show_stats(struct ds *reply,
   : 0,
   stats[DP_STAT_MISS], stats[DP_STAT_LOST]);
 
+ds_put_format(reply,
+  "\tCD hits:%llu\n\tCD miss:%llu\n",
+  stats[DP_STAT_CD_HIT], stats[DP_STAT_CD_MISS]);
+
 if (total_cycles == 0) {
 return;
 }
@@ -2486,7 +2493,7 @@ find_index_in_sub_ptrs(struct dpcls *cls,
 static struct dp_netdev_flow *
 dp_netdev_pmd_lookup_flow(struct dp_netdev_pmd_thread *pmd,
   const struct netdev_flow_key *key,
-  int *lookup_num_p)
+  int *lookup_num_p, int *cd_hit)
 {
 struct dpcls *cls;
 struct dpcls_rule *rule;
@@ -2495,7 +2502,7 @@ dp_netdev_pmd_lookup_flow(struct dp_netdev_pmd_thread 
*pmd,
 
 cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
 if (OVS_LIKELY(cls)) {
-dpcls_lookup(cls, key, , 1, lookup_num_p);
+dpcls_lookup(cls, key, , 1, lookup_num_p, cd_hit);
 netdev_flow = dp_netdev_flow_cast(rule);
 }
 return netdev_flow;
@@ -2848,7 +2855,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
 }
 
 ovs_mutex_lock(>flow_mutex);
-netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL);
+netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL, NULL);
 if (!netdev_flow) {
 if (put->flags & DPIF_FP_CREATE) {
 if (cmap_count(>flow_table) < MAX_FLOWS) {
@@ -5048,7 +5055,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
  * to be locking everyone out of making flow installs.  If we
  * move to a per-core classifier, it would be reasonable. */
 ovs_mutex_lock(>flow_mutex);
-netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL);
+netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL, NULL);
 if (OVS_LIKELY(!netdev_flow)) {
 netdev_flow = dp_netdev_flow_add(pmd, key, , ,
  add_actions->data,
@@ -5080,6 +5087,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 struct dp_netdev *dp = pmd->dp;
 int miss_cnt = 0, lost_cnt = 0;
 int lookup_cnt = 0, add_lookup_cnt;
+int cd_hit = 0, add_cd_hit;
 bool any_miss;
 size_t i;
 
@@ -5090,7 +5098,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 /* Get the classifier for the in_port */
 cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
 if (OVS_LIKELY(cls)) {
-any_miss = !dpcls_lookup(cls, keys, rules, cnt, _cnt);
+any_miss = !dpcls_lookup(cls, keys, rules, cnt, _cnt, _hit);
 } else {
 any_miss = true;
 memset(rules, 0, sizeof(rules));
@@ -5113,9 +5121,10 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
  * a rule covering this flow.  In this case, it's a lot cheaper
  * to catch it here than execute a miss. */
 netdev_flow = dp_netdev_pmd_lookup_flow(pmd, [i],
-_lookup_cnt);
+  

[ovs-dev] [PATCH 1/5] dpif-netdev: Basic CD feature with scalar lookup.

2017-06-13 Thread yipeng1 . wang
From: Yipeng Wang 

Cuckoo distributor (CD) is a double-hash function hash table, that helps
redirect packets to their corresponding subtables to avoid the sequential
search of megaflow subtables.  This is another layer of cache to cache flows
and their corresponding subtable indexes.  Different from a hash table, CD
can have certain false positive rate (since the full key is not stored for
space efficiency).  Our CD design was partially inspired by earlier concepts
proposed in "simTable"[1] and "Cuckoo Filter"[2], and DPDK's cuckoo hash
implementation.

For the current implementation, the design does not allow displacing items
when a bucket is full, which is different from the behavior of a cuckoo hash
table.  The advantage is that we do not need to store two signatures so that
the struct is more compact.  We use 16 entries per bucket for the
convenience of vector lookup.

Each classifier has its own cuckoo distributor.

Evaluation:
We create set of rules with various src IP. We feed traffic containing 1M
flows with various src IP and dst IP. All the flows hit 10/20/30
rules creating 10/20/30 subtables.

The table below shows the preliminary continuous testing results (full line
speed test) we collected with a uni-directional phy-to-phy setup. The
machine we tested on is a Xeon E5 server running with 2.2GHz cores. OvS
runs with 1 PMD. We use Spirent as the hardware traffic generator.

Scalar CD results:
1M flows:
no.subtable: 10  20  30
cd-ovs   3658328 3028111 2863329
orig_ovs 2683455 1646227 1240501
speedup  1.36x   1.84x   2.31x

[1] H. Lee and B. Lee, Approaches for improving tuple space search-based
table lookup, ICTC '15
[2] B. Fan, D. G. Andersen, M. Kaminsky, and M. D. Mitzenmacher,
Cuckoo Filter: Practically Better Than Bloom, CoNEXT '14

Signed-off-by: Yipeng Wang 
Signed-off-by: Charlie Tai 
Co-authored-by: Charlie Tai 
Signed-off-by: Sameh Gobriel 
Co-authored-by: Sameh Gobriel 
Signed-off-by: Ren Wang 
Co-authored-by: Ren Wang 
Signed-off-by: Antonio Fischetti 
Co-authored-by: Antonio Fischetti 
---
 lib/dpif-netdev.c | 421 +-
 1 file changed, 419 insertions(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2f224db..c697e78 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -172,6 +172,66 @@ struct emc_cache {
  i__ < EM_FLOW_HASH_SEGS;\
  i__++, srch_hash__ >>= EM_FLOW_HASH_SHIFT)
 
+
+/* Cuckoo distributor (CD) is a double-hash function hash table, that helps
+ * redirect packets to their corresponding subtables to avoid the sequential
+ * search of megaflow subtables.  This is another layer of cache to cache flows
+ * and their corresponding subtable indexes.  Different from a hash table, CD
+ * can have certain false positive rate (since the full key is not stored for
+ * space efficiency).  Our CD design was partially inspired by earlier concepts
+ * proposed in "simTable"[1] and "Cuckoo Filter"[2], and DPDK's cuckoo hash
+ * implementation.
+ *
+ * For the current implementation, the design does not allow displacing items
+ * when a bucket is full, which is different from the behavior of a cuckoo hash
+ * table.  The advantage is that we do not need to store two signatures so that
+ * the struct is more compact.  We use 16 entries per bucket for the
+ * convenience of vector lookup.
+ *
+ * Each classifier has its own cuckoo distributor.
+ *
+ * [1] H. Lee and B. Lee, Approaches for improving tuple space search-based
+ * table lookup, ICTC '15
+ * [2] B. Fan, D. G. Andersen, M. Kaminsky, and M. D. Mitzenmacher,
+ * Cuckoo Filter: Practically Better Than Bloom, CoNEXT '14
+ */
+
+#define CD_NUM_BUCKETS (1 << 16)
+#define CD_BUCKET_MASK (CD_NUM_BUCKETS - 1)
+/* Number of entries per bucket. */
+#define CD_ENTRIES 16
+#define CD_ENTRY_MASK (CD_ENTRIES - 1)
+
+/* These two seeds are used for hashing out two bucket locations. */
+#define CD_PRIM_SEED 10
+#define CD_SEC_SEED 20
+
+/* This bit is used to choose which bucket to replace CD's entry
+ * in cd_insert. */
+#define CD_BKT_BIT (1 << CD_ENTRIES)
+
+/* Two byte signature and same length of mask. */
+typedef uint16_t cd_sig_t;
+#define CD_SIG_MASK 0x
+
+/* Length of Subtable pointer array for cuckoo distributor to index subtables.
+ * The size of the table is at most 2^16-1 entires because the CD's entry
+ * provides 2 bytes for indexing currently.
+ */
+#define SUB_PTR_LEN 256
+
+/* The bucket struct for cuckoo distributor*/
+struct cd_bucket {
+cd_sig_t sig[CD_ENTRIES];   /* 2-byte long signature. */
+uint16_t table_index[CD_ENTRIES];   /* index to subtable pointer array. */
+} __attribute__ ((packed));
+
+
+struct cd_cache {
+struct cd_bucket buckets[CD_NUM_BUCKETS]; /* buckets array. */
+} __attribute__ ((aligned (64)));
+
+
 /* Simple non-wildcarding single-priority classifier. */
 
 /* 

[ovs-dev] [PATCH 2/5] dpif-netdev: Add AVX2 implementation for CD lookup.

2017-06-13 Thread yipeng1 . wang
From: Yipeng Wang 

This patch adds the AVX2 implementation during CD lookup. 16 entries of a
bucket will be compared together with the lookup key. This patch depends
on the first patch.

evaluation:
We setup the testing enviornment same to the previous patch. The AVX2
CD implementation's results are shown below.

AVX2 data:
1M flows:
no.subtable: 10  20  30
cd-ovs   3895961 3170530 2968555
orig-ovs 2683455 1646227 1240501
speedup  1.45x   1.92x   2.39x

Signed-off-by: Yipeng Wang 
Signed-off-by: Charlie Tai 
Co-authored-by: Charlie Tai 
Signed-off-by: Sameh Gobriel 
Co-authored-by: Sameh Gobriel 
Signed-off-by: Ren Wang 
Co-authored-by: Ren Wang 
Signed-off-by: Antonio Fischetti 
Co-authored-by: Antonio Fischetti 
---
 lib/dpif-netdev.c | 64 ++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c697e78..23b3e42 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2291,7 +2291,37 @@ cd_lookup_bulk_pipe(struct dpcls *cls, const struct 
netdev_flow_key keys[],
 
 rte_prefetch0(prim_bkt1);
 rte_prefetch0(sec_bkt1);
+#ifdef __AVX2__
+prim_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+_mm256_load_si256((__m256i const *)prim_bkt0->sig),
+_mm256_set1_epi16(temp_sig0)));
 
+
+sec_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+_mm256_load_si256((__m256i const *)sec_bkt0->sig),
+_mm256_set1_epi16(temp_sig0)));
+
+if (prim_hitmask) {
+loc = raw_ctz(prim_hitmask) / 2;
+data[i-1] =
+ prim_bkt0->table_index[loc];
+if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+hits |= 1 << (i - 1);
+prim_bkt0 = prim_bkt1;
+sec_bkt0 = sec_bkt1;
+temp_sig0 = temp_sig1;
+continue;
+}
+}
+
+if (sec_hitmask) {
+loc = raw_ctz(sec_hitmask) / 2;
+data[i-1] = sec_bkt0->table_index[loc];
+if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+   hits |= 1 << (i - 1);
+}
+}
+#else
 unsigned int j;
 prim_hitmask = 0;
 sec_hitmask = 0;
@@ -2320,12 +2350,42 @@ cd_lookup_bulk_pipe(struct dpcls *cls, const struct 
netdev_flow_key keys[],
 hits |= 1 << (i - 1);
 }
 }
-
+#endif
 prim_bkt0 = prim_bkt1;
 sec_bkt0 = sec_bkt1;
 temp_sig0 = temp_sig1;
 }
 
+#ifdef __AVX2__
+prim_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+_mm256_load_si256((__m256i const *)prim_bkt0->sig),
+_mm256_set1_epi16(temp_sig0)));
+
+
+sec_hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+_mm256_load_si256((__m256i const *)sec_bkt0->sig),
+_mm256_set1_epi16(temp_sig0)));
+
+if (prim_hitmask) {
+loc = raw_ctz(prim_hitmask) / 2;
+data[i-1] = prim_bkt0->table_index[loc];
+if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+hits |= 1 << (i - 1);
+if (hit_mask != NULL) {
+*hit_mask = hits;
+}
+return;
+}
+ }
+
+if (sec_hitmask) {
+loc = raw_ctz(sec_hitmask) / 2;
+data[i-1] = sec_bkt0->table_index[loc];
+if (data[i-1] != 0 && cls->sub_ptrs[data[i-1]] != 0) {
+   hits |= 1 << (i - 1);
+}
+}
+#else
 unsigned int j;
 prim_hitmask = 0;
 sec_hitmask = 0;
@@ -2355,9 +2415,11 @@ cd_lookup_bulk_pipe(struct dpcls *cls, const struct 
netdev_flow_key keys[],
 }
 }
 
+#endif
 if (hit_mask != NULL) {
 *hit_mask = hits;
 }
+
 }
 
 static int
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 0/5] dpif-netdev: Cuckoo-Distributor implementation

2017-06-13 Thread yipeng1 . wang
From: Yipeng Wang 

The Datapath Classifier uses tuple space search for flow classification.
The rules are arranged into a set of tuples/subtables (each with a
distinct mask).  Each subtable is implemented as a hash table and lookup
is done with flow keys formed by selecting the bits from the packet header
based on each subtable's mask. Tuple space search will sequentially search
each subtable until a match is found. With a large number of subtables, a
sequential search of the subtables could consume a lot of CPU cycles. In
a testbench with a uniform traffic pattern equally distributed across 20
subtables, we measured that up to 65% of total execution time is attributed
to the megaflow cache lookup.

This patch presents the idea of the two-layer hierarchical lookup, where a
low overhead first level of indirection is accessed first, we call this
level cuckoo distributor (CD). If a flow key has been inserted in the flow
table the first level will indicate with high probability that which
subtable to look into. A lookup is performed on the second level (the
target subtable) to retrieve the result. If the key doesn’t have a match,
then we revert back to the sequential search of subtables. The patch is
partially inspired by earlier concepts proposed in "simTable"[1] and
"Cuckoo Filter"[2], and DPDK's Cuckoo Hash implementation.

This patch can improve the already existing Subtable Ranking when traffic
data has high entropy. Subtable Ranking helps minimize the number of
traversed subtables when most of the traffic hit the same subtable.
However, in the case of high entropy traffic such as traffic coming from
a physical port, multiple subtables could be hit with a similar frequency.
In this case the average subtable lookups per hit would be much greater
than 1. In addition, CD can adaptively turn off when it finds the traffic
mostly hit one subtable. Thus, CD will not be an overhead when Subtable
Ranking works well.

Scheme:

 ---
|  CD   |
 ---
   \
\
 -  - -
|sub  ||sub  |...|sub  |
|table||table|   |table|
 -  - -

Evaluation:

We create set of rules with various src IP. We feed traffic containing various
numbers of flows with various src IP and dst IP. All the flows hit 10/20/30
rules creating 10/20/30 subtables.

The table below shows the preliminary continuous testing results (full line
speed test) we collected with a uni-directional phy-to-phy setup. The
machine we tested on is a Xeon E5 server running with 2.2GHz cores. OvS
runs with 1 PMD. We use Spirent as the hardware traffic generator.

AVX2 data:
20k flows:
no.subtable: 10  20  30
cd-ovs   4267332 3478251 3126763
orig-ovs 3260883 2174551 1689981
speedup  1.31x   1.60x   1.85x

100k flows:
no.subtable: 10  20  30
cd-ovs   4015783 3276100 2970645
orig-ovs 2692882 1711955 1302321
speedup  1.49x   1.91x   2.28x

1M flows:
no.subtable: 10  20  30
cd-ovs   3895961 3170530 2968555
orig-ovs 2683455 1646227 1240501
speedup  1.45x   1.92x   2.39x

Scalar data:
1M flows:
no.subtable: 10  20  30
cd-ovs   3658328 3028111 2863329
orig_ovs 2683455 1646227 1240501
speedup  1.36x   1.84x   2.31x

[1] H. Lee and B. Lee, Approaches for improving tuple space search-based
table lookup, ICTC '15
[2] B. Fan, D. G. Andersen, M. Kaminsky, and M. D. Mitzenmacher,
Cuckoo Filter: Practically Better Than Bloom, CoNEXT '14

This patch set is created based on commit
a13784ba95efeb5a1f77253df40d433a1ce60087

The previous RFC on mailing list are at:
https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/331834.html
https://mail.openvswitch.org/pipermail/ovs-dev/2017-April/330570.html

Signed-off-by: Yipeng Wang 
Signed-off-by: Charlie Tai 
Co-authored-by: Charlie Tai 
Signed-off-by: Sameh Gobriel 
Co-authored-by: Sameh Gobriel 
Signed-off-by: Ren Wang 
Co-authored-by: Ren Wang 
Signed-off-by: Antonio Fischetti 
Co-authored-by: Antonio Fischetti 


Yipeng Wang (5):
  dpif-netdev: Basic CD feature with scalar lookup.
  dpif-netdev: Add AVX2 implementation for CD lookup.
  dpif-netdev: Add CD statistics
  dpif-netdev: Add adaptive CD mechanism
  unit-test: Add a delay for CD initialization.

 lib/dpif-netdev.c | 566 +-
 tests/ofproto-dpif.at |   3 +
 2 files changed, 558 insertions(+), 11 deletions(-)

-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ofp-errors: avoid read overrun in ofperr_decode_msg()

2017-06-13 Thread Ben Pfaff
On Tue, Jun 13, 2017 at 02:31:22PM -0400, Lance Richardson wrote:
> vconn_add_bundle_error() stores a maximum of 64 bytes of an
> OpenFlow packet, however ofperr_decode_msg() assumes that the
> entire packet is present. This leads to a buffer read overrun
> when the the packet is copied to another buffer using the full
> packet size.
> 
> Fix by adding a parameter to ofperr_decode_msg() indicating the
> size of the buffer containing the OpenFlow packet.
> 
> Found via gcc's address sanitizer.
> 
> Fixes: 506c1ddb3404 ("vconn: Better bundle error management.")
> Signed-off-by: Lance Richardson 

I'm not sure why we keep just the first 64 bytes.  It seems actually
easier to just keep the whole thing:
https://patchwork.ozlabs.org/patch/775500/
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] ovs-ofctl: Avoid read overrun in ofperr_decode_msg().

2017-06-13 Thread Ben Pfaff
vconn_add_bundle_error() was keeping at most 64 bytes of an OpenFlow
error message, then it was passing it to ofperr_decode_msg(), which assumed
that the full message was available.  This led to a buffer overread.
There's no good reason why it was only keeping the first 64 bytes, so this
commit changes it to keep the whole error message, sidestepping the
problem.

struct vconn_bundle_error only existed for this special case, so remove it
in favor of a chain of ofpbufs.

Found via gcc's address sanitizer.

Reported-by: Lance Richardson 
CC: Jarno Rajahalme 
Fixes: 506c1ddb3404 ("vconn: Better bundle error management.")
Signed-off-by: Ben Pfaff 
---
 include/openvswitch/vconn.h | 12 
 lib/vconn.c | 25 -
 utilities/ovs-ofctl.c   | 10 ++
 3 files changed, 14 insertions(+), 33 deletions(-)

diff --git a/include/openvswitch/vconn.h b/include/openvswitch/vconn.h
index 40ca9edfe868..90f9bad2c1c9 100644
--- a/include/openvswitch/vconn.h
+++ b/include/openvswitch/vconn.h
@@ -61,18 +61,6 @@ int vconn_dump_flows(struct vconn *, const struct 
ofputil_flow_stats_request *,
  enum ofputil_protocol,
  struct ofputil_flow_stats **fsesp, size_t *n_fsesp);
 
-/* Bundle errors must be free()d by the caller. */
-struct vconn_bundle_error {
-struct ovs_list list_node;
-
-/* OpenFlow header and some of the message contents for error reporting. */
-union {
-struct ofp_header ofp_msg;
-uint8_t ofp_msg_data[64];
-};
-};
-
-/* Bundle errors must be free()d by the caller. */
 int vconn_bundle_transact(struct vconn *, struct ovs_list *requests,
   uint16_t bundle_flags,
   struct ovs_list *errors);
diff --git a/lib/vconn.c b/lib/vconn.c
index 6997eaa96e2c..8a9f0ca8fa96 100644
--- a/lib/vconn.c
+++ b/lib/vconn.c
@@ -744,18 +744,6 @@ vconn_recv_block(struct vconn *vconn, struct ofpbuf **msgp)
 return retval;
 }
 
-static void
-vconn_add_bundle_error(const struct ofp_header *oh, struct ovs_list *errors)
-{
-if (errors) {
-struct vconn_bundle_error *err = xmalloc(sizeof *err);
-size_t len = ntohs(oh->length);
-
-memcpy(err->ofp_msg_data, oh, MIN(len, sizeof err->ofp_msg_data));
-ovs_list_push_back(errors, >list_node);
-}
-}
-
 static int
 vconn_recv_xid__(struct vconn *vconn, ovs_be32 xid, struct ofpbuf **replyp,
  struct ovs_list *errors)
@@ -781,13 +769,13 @@ vconn_recv_xid__(struct vconn *vconn, ovs_be32 xid, 
struct ofpbuf **replyp,
 
 error = ofptype_decode(, oh);
 if (!error && type == OFPTYPE_ERROR) {
-vconn_add_bundle_error(oh, errors);
+ovs_list_push_back(errors, >list_node);
 } else {
 VLOG_DBG_RL(_ofmsg_rl, "%s: received reply with xid %08"PRIx32
 " != expected %08"PRIx32,
 vconn->name, ntohl(recv_xid), ntohl(xid));
+ofpbuf_delete(reply);
 }
-ofpbuf_delete(reply);
 }
 }
 
@@ -1078,7 +1066,8 @@ vconn_bundle_reply_validate(struct ofpbuf *reply,
 }
 
 if (type == OFPTYPE_ERROR) {
-vconn_add_bundle_error(oh, errors);
+struct ofpbuf *copy = ofpbuf_clone(reply);
+ovs_list_push_back(errors, >list_node);
 return ofperr_decode_msg(oh, NULL);
 }
 if (type != OFPTYPE_BUNDLE_CONTROL) {
@@ -1150,13 +1139,13 @@ vconn_recv_error(struct vconn *vconn, struct ovs_list 
*errors)
 oh = reply->data;
 ofperr = ofptype_decode(, oh);
 if (!ofperr && type == OFPTYPE_ERROR) {
-vconn_add_bundle_error(oh, errors);
+ovs_list_push_back(errors, >list_node);
 } else {
 VLOG_DBG_RL(_ofmsg_rl,
 "%s: received unexpected reply with xid %08"PRIx32,
 vconn->name, ntohl(oh->xid));
+ofpbuf_delete(reply);
 }
-ofpbuf_delete(reply);
 }
 } while (!error);
 }
@@ -1209,6 +1198,8 @@ vconn_bundle_add_msg(struct vconn *vconn, struct 
ofputil_bundle_ctrl_msg *bc,
 return error;
 }
 
+/* Appends ofpbufs for received errors, if any, to 'errors'.  The caller must
+ * free the received errors. */
 int
 vconn_bundle_transact(struct vconn *vconn, struct ovs_list *requests,
   uint16_t flags, struct ovs_list *errors)
diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
index dca9be3a5995..95989eb11d16 100644
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -699,16 +699,18 @@ static void
 bundle_print_errors(struct ovs_list *errors, struct ovs_list *requests,
 const char *vconn_name)
 {
-struct vconn_bundle_error *error, *next;
+struct ofpbuf *error, *next;
 struct ofpbuf *bmsg;
 
 INIT_CONTAINER(bmsg, requests, list_node);
 
 

[ovs-dev] Empleados Conflictivos y Difíciles

2017-06-13 Thread Detener oportunamente conductas Tóxicas
DIRIGIDO A: Directivos, gerentes, supervisores y en general a todo líder que
tenga personal a su cargo.

Cómo Trabajar con Empleados Difíciles y Conflictivos


Mientras que la mayoría de los trabajadores no dan en absoluto problemas, se 
llevan bien con los demás y hacen su trabajo eficientemente, 
los “trabajadores conflictivos” rompen el ritmo de trabajo, hacen difícil la 
comunicación entre todos, incomodan a otros compañeros y 
superiores o simplemente resisten de modo pasivo; tienen actitudes que irritan 
a todos y con nada están a gusto.

Usted Aprenderá a :
 
- Identificar al personal conflictivo o difícil.
- Aprender técnicas para resolver problemas.
- Aprenderá a poner un alto a los problemas generados por empleados 
conflictivos.
- Detener oportunamente conductas “Tóxicas”


Los cuatro problemas principales que los directivos atraviesan con sus 
colaboradores más difíciles son las personalidades difíciles, 
las malas actitudes, el bajo desempeño y los problemas de comunicación


¿Requiere la información a la Brevedad?
responda este email con la palabra: Empleados.
Junto con los siguientes datos: 

Nombre:

Teléfono:

Empresa:

centro telefónico: 018002129393
 

Lic. Arturo López
Coordinador de Evento


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v1 1/4] ovn: l3ha, handling of multiple gateways

2017-06-13 Thread Russell Bryant
On Tue, Jun 13, 2017 at 4:40 PM, Russell Bryant  wrote:
> On Fri, Jun 2, 2017 at 8:31 AM,   wrote:
>> From: Miguel Angel Ajo 
>>
>> This patch handles multiple gateways with priorities in chassisredirect
>> ports, any gateway with a chassis redirect port will implement the
>> rules to de-encapsulate incomming packets for such port.
>>
>> And hosts targetting a remote chassisredirect port will setup a
>> bundle(active_backup, ..) action to each tunnel port, in the given
>> priority order.
>>
>> Signed-off-by: Miguel Angel Ajo 
>> ---
>>  ovn/controller/binding.c|   9 +--
>>  ovn/controller/lflow.c  |   6 +-
>>  ovn/controller/lport.c  | 119 
>> 
>>  ovn/controller/lport.h  |  28 ++
>>  ovn/controller/ovn-controller.c |   5 +-
>>  ovn/controller/physical.c   | 114 --
>>  6 files changed, 255 insertions(+), 26 deletions(-)
>
> Some high level comments to start ...
>
> Ideally with a patch series, each patch should be applicable on its
> own.  With this patch applied, some tests are failing for me.
>
> Documentation should also be included with whatever patch first
> introduces functionality, so I'd expect docs on the updated
> redirect-chassis format here.
>
> Please read over
> Documentation/internals/contributing/coding-style.rst.  There are some
> minor style issues throughout the patch.  I can point them out in a
> more detailed pass.
>
> The patch makes me wonder if we should introduce a more structured
> format for specifying chassis associated with a router port.  It feels
> like we're encoding too much in a single option string.  Maybe we
> should add a new "chassis" column to Logical_Router_Port, that can
> include a list of chassis, which would have to be a new record type in
> OVN northbound, containing much less info than the southbound
> counterpart.  We'd have to add a similar new column to the
> Port_Binding table in OVN southbound.  I'm curious what you and others
> think about this, or if the parsed option string is fine.

Sorry, I replied to v1, but all comments apply to v2.

-- 
Russell Bryant
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v1 1/4] ovn: l3ha, handling of multiple gateways

2017-06-13 Thread Russell Bryant
On Fri, Jun 2, 2017 at 8:31 AM,   wrote:
> From: Miguel Angel Ajo 
>
> This patch handles multiple gateways with priorities in chassisredirect
> ports, any gateway with a chassis redirect port will implement the
> rules to de-encapsulate incomming packets for such port.
>
> And hosts targetting a remote chassisredirect port will setup a
> bundle(active_backup, ..) action to each tunnel port, in the given
> priority order.
>
> Signed-off-by: Miguel Angel Ajo 
> ---
>  ovn/controller/binding.c|   9 +--
>  ovn/controller/lflow.c  |   6 +-
>  ovn/controller/lport.c  | 119 
> 
>  ovn/controller/lport.h  |  28 ++
>  ovn/controller/ovn-controller.c |   5 +-
>  ovn/controller/physical.c   | 114 --
>  6 files changed, 255 insertions(+), 26 deletions(-)

Some high level comments to start ...

Ideally with a patch series, each patch should be applicable on its
own.  With this patch applied, some tests are failing for me.

Documentation should also be included with whatever patch first
introduces functionality, so I'd expect docs on the updated
redirect-chassis format here.

Please read over
Documentation/internals/contributing/coding-style.rst.  There are some
minor style issues throughout the patch.  I can point them out in a
more detailed pass.

The patch makes me wonder if we should introduce a more structured
format for specifying chassis associated with a router port.  It feels
like we're encoding too much in a single option string.  Maybe we
should add a new "chassis" column to Logical_Router_Port, that can
include a list of chassis, which would have to be a new record type in
OVN northbound, containing much less info than the southbound
counterpart.  We'd have to add a similar new column to the
Port_Binding table in OVN southbound.  I'm curious what you and others
think about this, or if the parsed option string is fine.

-- 
Russell Bryant
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVN Load Balancing Feedback

2017-06-13 Thread Ethan Jackson
Sure that makes sense.  I guess what's a little weird is that the load
balancer lives on the logical switch instead of the logical router.
Typically, when I think of a load balancer in the real world, it *is*
a router that just happens to rewrite IP addresses.  It wouldn't be a
router behind a switch that rewrites IPs, but not macs.

That said, not a huge deal ... it seems to work for us.  My only worry
is that another team wouldn't be able to figure it out.

Ethan

On Tue, Jun 13, 2017 at 12:30 PM, Guru Shetty  wrote:
>
>
> On 12 June 2017 at 16:54, Kevin Lin  wrote:
>>
>> Hello, My name is Kevin Lin and I work with Ethan on Quilt (quilt.io). We
>> just started using load balancing for the project -- Ethan wanted me to
>> write to
>> you all with feedback on the load balancer, and to get some feedback on
>> our
>> approach.
>>
>> For context, we have a number of containers connected to a single logical
>> switch. We would like to create load balancers across groups of these
>> containers. The load balancer should have it's own IP and MAC address, and
>> be accessible from any of the containers connected to the switch.
>>
>> The point that confused me about the load balancer was that it only
>> rewrites
>> the IP addresses, and doesn't handle MAC addresses for you. As a
>> result, it's very easy to set up a load balancer on a logical switch, that
>> changes the IP to the appropriate desitination container, but can't
>> respond
>> ARPs or rewrite MAC addresses. This got us thinking, that what we really
>> want
>> is a load balancer attached to a logical router, but the documentation
>> seems to
>> indicate that this can't be set up without using a gateway.
>>
>> Anyways, we got it working, but we ended up with this rather convoluted
>> design:
>> - The load balancer is associated with the logical switch.
>> - A logical router is connected to the logical switch. That just responds
>> to
>>   ARPs and forwards traffic sent to it back onto the logical switch (after
>>   rewriting the MAC).
>> - The IP of the load balancer is associated with this logical router's
>> port.
>
>
> This is how we use it for kubernetes too. The general thought process I went
> with was that you won't have just a logical switch without some connected
> gateway (either logical router or external default gateway) and we offload
> the hairpin to the connected router. This is how kube-proxy works in google
> cloud too. I did not want to add the additional complexity of ARP responses
> from the load-balancer.
>
>>
>>
>> This way, when containers ARP for and send traffic to a load balanced IP,
>> it
>> gets routed to the logical router, but the load balancer rules rewrite the
>> destination IP. The router then receives this packet, and routes it
>> through to
>> the rewritten IP. It works, but it's a bit of an unnatural hack.
>>
>> Hope this is helpful,
>> Kevin
>>
>



-- 
Ethan J. Jackson
quilt.io
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] sandbox: disable ssl for backup ovn southbound db

2017-06-13 Thread Ben Pfaff
On Tue, Jun 13, 2017 at 01:51:06PM -0400, Lance Richardson wrote:
> Since the sandbox environment was changed to enable SSL usage for
> OVN_Southbound connections, the backup southbound server emits
> the log message "socket_util|ERR|6642: bind: Address already in use"
> every 2.5 seconds.
> 
> Fix by configuring the backup db server to not use remote configuration
> from the database (the unix: socket can still be used, as was the
> case before SSL was enabled).
> 
> Fixes: 0ced2a5c5e47 ("sandbox: use ssl for ovn-controller to sb db 
> connection")
> Signed-off-by: Lance Richardson 

Thanks, applied to master.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [patch_v1] conntrack: Reset nat_info in un_nat conns.

2017-06-13 Thread Ben Pfaff
On Tue, Jun 13, 2017 at 11:20:54AM -0700, Greg Rose wrote:
> On 06/13/2017 07:46 AM, Darrell Ball wrote:
> >Un-nat conns have no nat_info as do default conns.
> >However, un-nat conns are originally templated from the
> >corresponding default conns and therefore need to
> >have their nat_info explicitly nulled.  This
> >otherwise exposes a double free if conntrack_destroy()
> >were to be used to destroy the connection tracker.  This
> >would apply to cleaning the datapath after testing.
> >
> >Fixes: 286de2729955 ("dpdk: Userspace Datapath: Introduce NAT Support.")
> >Signed-off-by: Darrell Ball 
> >---
> >  lib/conntrack.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> >diff --git a/lib/conntrack.c b/lib/conntrack.c
> >index 146edd7..90b154a 100644
> >--- a/lib/conntrack.c
> >+++ b/lib/conntrack.c
> >@@ -573,6 +573,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet 
> >*pkt,
> >  nc->conn_type == CT_CONN_TYPE_DEFAULT) {
> >  *nc = *conn_for_un_nat_copy;
> >  conn_for_un_nat_copy->conn_type = CT_CONN_TYPE_UN_NAT;
> >+conn_for_un_nat_copy->nat_info = NULL;
> >  }
> >  ct_rwlock_unlock(>nat_resources_lock);
> >
> >
> I don't have a way to test this right at the moment but it's pretty simple 
> and looks good to me.
> 
> Thanks Darrell!
> 
> Acked-by: Greg Rose 

Thanks Darrell and Greg, I applied this to master.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 7/8] netdev-dpdk: Configurable retries while enqueuing to vHost User ports.

2017-06-13 Thread Bodireddy, Bhanuprakash
Hi Kevin,

>On 06/07/2017 10:21 AM, Bhanuprakash Bodireddy wrote:
>> This commit adds "vhost-enque-retry" where in the number of retries
>> performed while enqueuing packets to vHostUser ports can be configured
>> in ovsdb.
>>
>> Currently number of retries are set to '8' and a retry is performed
>> when atleast some packets have been successfully sent on previous
>attempt.
>> While this approach works well, it causes throughput drop when
>> multiple vHost User ports are servied by same PMD thread.
>
>Hi Bhanu,
>
>You are saying the approach works well but you are changing the default
>behaviour. It would be good to explain a bit more about the negative effects
>of changing the default and compare that against the positive effects, so
>everyone gets a balanced view. If you have measurements that would be
>even better.

This issue was discussed earlier at different forums (OvS-DPDK day during 2016 
fall conference and community call) about the negative effect of retries on 
vHost User ports. Giving a bit of background for others interested in this 
problem:

In OvS 2.5 Release: 
The retries on the vHost User ports were performed until a timeout(~100 micro 
seconds)  is reached. 
The problem with that approach was If the guest is connected and isn't actively 
processing its queues, it could potentially impact the performance of 
neighboring guests (other vHost User ports) provided the same PMD thread is 
servicing them all.  It was reported by me and you indeed provided the fix in 
2.6

In OvS 2.6 Release:
Timeout logic is removed and retry logic is introduced. Here a maximum up to 
'8' retries can be performed provided atleast one packet is transmitted 
successfully in the previous attempt.  

Problem:
Take the case where there are few VMs (with 3 vHost User ports each) serviced 
by same PMD thread. Some of the VMs are forwarding at high rates(using dpdk 
based app) and the remaining are slow VMs doing kernel forwarding in the guest. 
In this case the PMD would spend significant cycles for slower VMs and may end 
up doing maximum of 8 retries all the time.  However, in some cases  doing a 
retry immediately isn't of much value as there may not be any free descriptors 
available. 

Also if there are more slow ports, the packets can potentially get tail dropped 
at the NIC as PMD is busy processing the packets and doing retries. I don't 
have numbers right now to back this problem but can do some tests next week to 
assess the impact with and without retries. Also adding jan here who wanted the 
retry logic to be configurable.

Regards,
Bhanuprakash. 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVN Load Balancing Feedback

2017-06-13 Thread Guru Shetty
On 12 June 2017 at 16:54, Kevin Lin  wrote:

> Hello, My name is Kevin Lin and I work with Ethan on Quilt (quilt.io). We
> just started using load balancing for the project -- Ethan wanted me to
> write to
> you all with feedback on the load balancer, and to get some feedback on our
> approach.
>
> For context, we have a number of containers connected to a single logical
> switch. We would like to create load balancers across groups of these
> containers. The load balancer should have it's own IP and MAC address, and
> be accessible from any of the containers connected to the switch.
>
> The point that confused me about the load balancer was that it only
> rewrites
> the IP addresses, and doesn't handle MAC addresses for you. As a
> result, it's very easy to set up a load balancer on a logical switch, that
> changes the IP to the appropriate desitination container, but can't respond
> ARPs or rewrite MAC addresses. This got us thinking, that what we really
> want
> is a load balancer attached to a logical router, but the documentation
> seems to
> indicate that this can't be set up without using a gateway.
>
> Anyways, we got it working, but we ended up with this rather convoluted
> design:
> - The load balancer is associated with the logical switch.
> - A logical router is connected to the logical switch. That just responds
> to
>   ARPs and forwards traffic sent to it back onto the logical switch (after
>   rewriting the MAC).
> - The IP of the load balancer is associated with this logical router's
> port.
>

This is how we use it for kubernetes too. The general thought process I
went with was that you won't have just a logical switch without some
connected gateway (either logical router or external default gateway) and
we offload the hairpin to the connected router. This is how kube-proxy
works in google cloud too. I did not want to add the additional complexity
of ARP responses from the load-balancer.


>
> This way, when containers ARP for and send traffic to a load balanced IP,
> it
> gets routed to the logical router, but the load balancer rules rewrite the
> destination IP. The router then receives this packet, and routes it
> through to
> the rewritten IP. It works, but it's a bit of an unnatural hack.
>
> Hope this is helpful,
> Kevin
>
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [RFC PATCH 00/21] Add OVS DPDK keep-alive functionality

2017-06-13 Thread Aaron Conole
Bhanuprakash Bodireddy  writes:

> Keepalive feature is aimed at achieving Fastpath Service Assurance
> in OVS-DPDK deployments. It adds support for monitoring the packet
> processing cores(PMD thread cores) by dispatching heartbeats at regular
> intervals. Incase of heartbeat misses additional health checks are
> enabled on the PMD thread to detect the failure and the same shall be
> reported to higher level fault management systems/frameworks.
>
> The implementation uses OVSDB for reporting the datapath status and the
> health of the PMD threads. Any external monitoring application can read
> the status from OVSDB at regular intervals (or) subscribe to the updates
> in OVSDB so that they get notified when the changes happen on OVSDB.
>
> POSIX shared memory object is created and initialized for storing the
> status of the PMD threads. This is initialized by main thread(vswitchd)
> as part of init process and will be periodically updated by 'keepalive'
> thread. keepalive feature can be enabled through below OVSDB settings.
>
> enable-keepalive=true
>   - Keepalive feature is disabled by default.
>
> keepalive-interval="5000"
>   - Timer interval in milliseconds for monitoring the packet
> processing cores.
>
> keepalive-shm-name="/ovs_keepalive_shm_name"
>   - Shared memory block name where the events shall be updated.
>
> When KA is enabled, 'ovs-keepalive' thread shall be spawned that wakes
> up at regular intervals to update the timestamp and status of pmd cores
> in shared memory region. This information shall be read by vswitchd thread
> and write the status in to 'keepalive' column of Open_vSwitch table in OVSDB.
>
> An external monitoring framework like collectd with ovs events support
> can read (or) subscribe to the datapath status changes in ovsdb. When the 
> state
> is updated, the collectd shall be notified and will eventually relay the 
> status
> to ceilometer service running in the controller. Below is the high level
> overview of deployment model.
>
> Compute NodeControllerCompute Node
>
> Collectd  <--> Ceilometer <>   Collectd
>
> OvS DPDK   OvS DPDK
>
> +-+
> | VM  |
> +--+--+
> \---+---/
> |
> +--+---+   ++--+ +--+---+
> | OVS  |-> |   ovsevents plugin| --> |   collectd   |
> +--+---+   ++--+ +--+---+
>
> +--+-+ +---++ |
> | Ceilometer | <-- | collectd ceilometer plugin |  <---
> +--+-+ +---++
>
> Performance impact
> --
>   No noticeable performance or latency impact is observed with
>   KA feature enabled.
>
> Bhanuprakash Bodireddy (21):
>
> [10] Patches help update OVSDB with keepalive status
>
>   vswitch.xml: Add keepalive support.
>   ovsschema: Introduce 'keepalive' column in Open_vSwitch.
>   dpdk: Add helper functions for DPDK datapath keepalive.
>   process: Retrieve process status.
>   Keepalive: Add initial keepalive support.
>   bridge: Invoke keepalive framework.
>   keepalive: Add more helper functions to KA framework.
>   dpif-netdev: Register packet processing cores to KA framework.
>   dpif-netdev: Dispatch heartbeats for DPDK datapath.
>   keepalive: Retrieve PMD status periodically.
>   bridge: Update keepalive status in ovsdb
>
>   keepalive: Add support to query keepalive statistics.
>   keepalive: Add support to query keepalive status.
>   dpif-netdev: Add helper function to check false positives.
>
> [5] Following patches add additional health checks in case of heartbeat
> failure. The following can still be improved and WIP.
>
>   dpif-netdev: Add additional datapath health checks.
>   keepalive: Check the link status as part of PMD health checks.
>   keepalive: Check the packet statisitcs as part of PMD health checks.
>   keepalive: Check the PMD cycle stats as part of PMD health checks.
>   netdev-dpdk: Enable PMD health checks on heartbeat failure.
>
>   keepalive: Display extended Keepalive status.
>   Documentation: Update DPDK doc with Keepalive feature.
>

Hi Bhanu,

I've been playing with this a little bit;  is it too late to consider
tracking 'threads' instead of 'cores'?  I'm not sure what it means for a
particular core ID to be 'healthy' - but I know what 'pmd24' not
responding means.

Additionally, I'd suggest keeping words like 'healthy', and 'unhealthy'
out of it.  I'd basically just have this keepalive report things on the
thread you *know* - last time it poked your status register (and you can
also track things like cpu utilization, etc, if you'd like).  Then let
your higher level thing that reads ceilometer make those "healthy"
determinations.  After all, sometimes 0% utilization is 

Re: [ovs-dev] [PATCH V2] netdev-dpdk: use rte_eth_dev_set_mtu

2017-06-13 Thread Aaron Conole
Mark Kavanagh  writes:

> DPDK provides an API to set the MTU of compatible physical devices -
> rte_eth_dev_set_mtu(). Prior to DPDK v16.07 however, this API was not
> implemented in some DPDK PMDs (i40e, specifically). To allow the use
> of jumbo frames with affected NICs in OvS-DPDK, MTU configuration was
> achieved by setting the jumbo frame flag, and corresponding maximum
> permitted Rx frame size, in an rte_eth_conf structure for the NIC
> port, and subsequently invoking rte_eth_dev_configure() with that
> configuration.
>
> However, that method does not set the MTU field of the underlying DPDK
> structure (rte_eth_dev) for the corresponding physical device;
> consequently, rte_eth_dev_get_mtu() reports the incorrect MTU for an
> OvS-DPDK phy device with non-standard MTU.
>
> Resolve this issue by invoking rte_eth_dev_set_mtu() when setting up
> or modifying the MTU of a DPDK phy port.
>
> Fixes: 0072e93 ("netdev-dpdk: add support for jumbo frames")
> Reported-by: Aaron Conole 
> Reported-by: Vipin Varghese 
> Signed-off-by: Mark Kavanagh 
> ---

I won't have a chance to test this near-term, but it looks correct to
me.  Thanks for this work, Mark!

Reviewed-by: Aaron Conole 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 0/8] netdev-dpdk: Use intermediate queue during packet transmission.

2017-06-13 Thread Bodireddy, Bhanuprakash
Hi Eelco

>Hi Bhanu,
>
>Went over the full patch set, and the changes look good to me.
>All my previous concerns are addressed, and therefore I'm acking this series.

Thanks for reviewing the series and acking it.

>
>I do have one small remark regarding the dpdk_tx_queue struct, see
>individual patch email.

I agree with what you suggested.
I have to send out v2 anyways as Ben suggested to rename the API from 
netdev_txq_drain() to netdev_txq_flush(). I will factor in your suggestion in 
V2. 

>
>Here are some numbers with this patch on a none tuned system, single run.
>This just to make sure we still benefit with both patches applied.
>
>Throughput for PV scenario, with 64 byte packets
>
>Number
>flows   MASTER With PATCH
>=====
>   10  4,531,4247,884,607
>   32  3,137,3006,367,643
>   50  2,552,7256,649,985
>  100  2,473,8355,876,677
>  500  2,308,8405,265,986
>1000  2,380,7555,001,081
>
>
>Throughput for PVP scenario, with 64 byte packets
>
>Number
>flows   MASTER With PATCH
>=====
>   10  2,309,2543,800,747
>   32  1,626,3803,324,561
>   50  1,538,8793,092,792
>  100  1,429,0282,887,488
>  500  1,271,7732,537,624
>1000  1,268,4302,442,405
>
>Latency test
>
>  MASTER
>  ===
>  Pkt size  min(ns)  avg(ns)  max(ns)
>   512  9,94712,381   264,131
>  1024  7,662 9,445   194,463
>  1280  7,790 9,115   196,059
>  1518  8,103 9,599   197,646
>
>  PATCH
>  =
>  Pkt size  min(ns)  avg(ns)  max(ns)
>   512  10,195   12,551   199,699
>  1024  7,838 9,612   206,378
>  1280  8,151 9,575   187,848
>  1518  8,095 9,643   198,552
>
>
>Throughput for PP scenario, with 64 byte packets:
>
>Number
>flows   MASTER With PATCH
>=====
>   10  7,430,6168,853,037
>   32  4,770,1906,774,006
>   50  4,736,2597,336,776
>  100  4,699,2376,146,151
>  500  3,870,0195,242,781
>1000  3,853,8835,121,911
>
>
>Latency test
>
>  MASTER
>  ===
>  Pkt size  min(ns)  avg(ns)  max(ns)
>   512  4,8875,596165,246
>  1024  5,8016,447170,842
>  1280  6,3557,056159,056
>  1518  6,8607,634160,860
>
>  PATCH
>  =
>  Pkt size  min(ns)  avg(ns)  max(ns)
>   512  4,7835,521158,134
>  1024  5,8016,359170,859
>  1280  6,3156,878150,301
>  1518  6,5797,398143,068
>
>
>Acked-by: Eelco Chaudron 

Thanks for your time in testing and sharing the numbers here.

Bhanuprakash.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] redhat: make the rpm aware of the lock file

2017-06-13 Thread Ben Pfaff
On Tue, Jun 13, 2017 at 02:26:03PM -0400, Aaron Conole wrote:
> Currently, the db lockfile will cause the openvswitch directory to
> linger after uninstall because the rpm database isn't aware that it
> should be treated as part of the system.  This commit informs the rpmdb
> properly as a 'ghost' so that when the package is uninstalled, it will
> be removed automatically.  This means that if no extra files exist in
> /etc/openvswitch, the whole directory will be removed from /etc/.
> 
> Acked-by: Flavio Leitner 
> Reviewed-by: Markos Chandras 
> Signed-off-by: Aaron Conole 

Applied to master and branch-2.7, thanks!
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] ofp-errors: avoid read overrun in ofperr_decode_msg()

2017-06-13 Thread Lance Richardson
vconn_add_bundle_error() stores a maximum of 64 bytes of an
OpenFlow packet, however ofperr_decode_msg() assumes that the
entire packet is present. This leads to a buffer read overrun
when the the packet is copied to another buffer using the full
packet size.

Fix by adding a parameter to ofperr_decode_msg() indicating the
size of the buffer containing the OpenFlow packet.

Found via gcc's address sanitizer.

Fixes: 506c1ddb3404 ("vconn: Better bundle error management.")
Signed-off-by: Lance Richardson 
---
 include/openvswitch/ofp-errors.h | 1 +
 lib/ofp-errors.c | 6 --
 lib/ofp-print.c  | 2 +-
 lib/vconn.c  | 2 +-
 ovn/controller/ofctrl.c  | 5 +++--
 utilities/ovs-ofctl.c| 3 ++-
 6 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/include/openvswitch/ofp-errors.h b/include/openvswitch/ofp-errors.h
index aeb58e0..1b28084 100644
--- a/include/openvswitch/ofp-errors.h
+++ b/include/openvswitch/ofp-errors.h
@@ -809,6 +809,7 @@ bool ofperr_is_valid(enum ofperr);
 enum ofperr ofperr_from_name(const char *);
 
 enum ofperr ofperr_decode_msg(const struct ofp_header *,
+  size_t ofp_header_buf_len,
   struct ofpbuf *payload);
 struct ofpbuf *ofperr_encode_reply(enum ofperr, const struct ofp_header *);
 struct ofpbuf *ofperr_encode_hello(enum ofperr, enum ofp_version ofp_version,
diff --git a/lib/ofp-errors.c b/lib/ofp-errors.c
index adc9295..8f65c51 100644
--- a/lib/ofp-errors.c
+++ b/lib/ofp-errors.c
@@ -282,7 +282,8 @@ ofperr_get_code(enum ofperr error, enum ofp_version version)
  * aligned).  The caller must free the payload (with ofpbuf_uninit()) when it
  * is no longer needed.  On failure, '*payload' is cleared. */
 enum ofperr
-ofperr_decode_msg(const struct ofp_header *oh, struct ofpbuf *payload)
+ofperr_decode_msg(const struct ofp_header *oh, size_t oh_buflen,
+  struct ofpbuf *payload)
 {
 const struct ofp_error_msg *oem;
 enum ofpraw raw;
@@ -294,7 +295,8 @@ ofperr_decode_msg(const struct ofp_header *oh, struct 
ofpbuf *payload)
 }
 
 /* Pull off the error message. */
-struct ofpbuf b = ofpbuf_const_initializer(oh, ntohs(oh->length));
+size_t oh_size = MIN(oh_buflen, ntohs(oh->length));
+struct ofpbuf b = ofpbuf_const_initializer(oh, oh_size);
 enum ofperr error = ofpraw_pull(, );
 if (error) {
 return 0;
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 423df31..a341ce0 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -1596,7 +1596,7 @@ ofp_print_error_msg(struct ds *string, const struct 
ofp_header *oh,
 enum ofperr error;
 char *s;
 
-error = ofperr_decode_msg(oh, );
+error = ofperr_decode_msg(oh, len, );
 if (!error) {
 ds_put_cstr(string, "***decode error***");
 ds_put_hex_dump(string, oh + 1, len - sizeof *oh, 0, true);
diff --git a/lib/vconn.c b/lib/vconn.c
index 6997eaa..32cb80d 100644
--- a/lib/vconn.c
+++ b/lib/vconn.c
@@ -1079,7 +1079,7 @@ vconn_bundle_reply_validate(struct ofpbuf *reply,
 
 if (type == OFPTYPE_ERROR) {
 vconn_add_bundle_error(oh, errors);
-return ofperr_decode_msg(oh, NULL);
+return ofperr_decode_msg(oh, ntohs(oh->length), NULL);
 }
 if (type != OFPTYPE_BUNDLE_CONTROL) {
 return OFPERR_OFPBRC_BAD_TYPE;
diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
index 277d3d7..b9988d1 100644
--- a/ovn/controller/ofctrl.c
+++ b/ovn/controller/ofctrl.c
@@ -283,8 +283,9 @@ recv_S_TLV_TABLE_REQUESTED(const struct ofp_header *oh, 
enum ofptype type,
  ofperr_to_string(error));
 }
 } else if (type == OFPTYPE_ERROR) {
+enum ofperr error = ofperr_decode_msg(oh, ntohs(oh->length), NULL);
 VLOG_ERR("switch refused to allocate Geneve option (%s)",
- ofperr_to_string(ofperr_decode_msg(oh, NULL)));
+ ofperr_to_string(error));
 } else {
 char *s = ofp_to_string(oh, ntohs(oh->length), NULL, 1);
 VLOG_ERR("unexpected reply to TLV table request (%s)", s);
@@ -327,7 +328,7 @@ recv_S_TLV_TABLE_MOD_SENT(const struct ofp_header *oh, enum 
ofptype type,
 } else if (oh->xid == xid2 && type == OFPTYPE_BARRIER_REPLY) {
 state = S_CLEAR_FLOWS;
 } else if (oh->xid == xid && type == OFPTYPE_ERROR) {
-enum ofperr error = ofperr_decode_msg(oh, NULL);
+enum ofperr error = ofperr_decode_msg(oh, ntohs(oh->length), NULL);
 if (error == OFPERR_NXTTMFC_ALREADY_MAPPED ||
 error == OFPERR_NXTTMFC_DUP_ENTRY) {
 VLOG_INFO("raced with another controller adding "
diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
index dca9be3..d60e5d5 100644
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -708,7 +708,8 @@ bundle_print_errors(struct ovs_list *errors, struct 
ovs_list *requests,
 enum ofperr ofperr;
 struct 

[ovs-dev] [PATCH] redhat: make the rpm aware of the lock file

2017-06-13 Thread Aaron Conole
Currently, the db lockfile will cause the openvswitch directory to
linger after uninstall because the rpm database isn't aware that it
should be treated as part of the system.  This commit informs the rpmdb
properly as a 'ghost' so that when the package is uninstalled, it will
be removed automatically.  This means that if no extra files exist in
/etc/openvswitch, the whole directory will be removed from /etc/.

Acked-by: Flavio Leitner 
Reviewed-by: Markos Chandras 
Signed-off-by: Aaron Conole 
---
 rhel/openvswitch-fedora.spec.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in
index 9fc5f27..f822ad3 100644
--- a/rhel/openvswitch-fedora.spec.in
+++ b/rhel/openvswitch-fedora.spec.in
@@ -273,6 +273,7 @@ rm -rf $RPM_BUILD_ROOT/%{_datadir}/openvswitch/python/
 install -d -m 0755 $RPM_BUILD_ROOT/%{_sharedstatedir}/openvswitch
 
 touch $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch/conf.db
+touch $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch/.conf.db.~lock~
 touch $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch/system-id.conf
 
 install -p -m 644 -D selinux/openvswitch-custom.pp \
@@ -481,6 +482,7 @@ fi
 %{_sysconfdir}/bash_completion.d/ovs-vsctl-bashcomp.bash
 %dir %{_sysconfdir}/openvswitch
 %config %ghost %{_sysconfdir}/openvswitch/conf.db
+%ghost %{_sysconfdir}/openvswitch/.conf.db.~lock~
 %config %ghost %{_sysconfdir}/openvswitch/system-id.conf
 %config(noreplace) %{_sysconfdir}/sysconfig/openvswitch
 %config(noreplace) %{_sysconfdir}/logrotate.d/openvswitch
-- 
2.9.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [patch_v1] conntrack: Reset nat_info in un_nat conns.

2017-06-13 Thread Greg Rose

On 06/13/2017 07:46 AM, Darrell Ball wrote:

Un-nat conns have no nat_info as do default conns.
However, un-nat conns are originally templated from the
corresponding default conns and therefore need to
have their nat_info explicitly nulled.  This
otherwise exposes a double free if conntrack_destroy()
were to be used to destroy the connection tracker.  This
would apply to cleaning the datapath after testing.

Fixes: 286de2729955 ("dpdk: Userspace Datapath: Introduce NAT Support.")
Signed-off-by: Darrell Ball 
---
  lib/conntrack.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 146edd7..90b154a 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -573,6 +573,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
  nc->conn_type == CT_CONN_TYPE_DEFAULT) {
  *nc = *conn_for_un_nat_copy;
  conn_for_un_nat_copy->conn_type = CT_CONN_TYPE_UN_NAT;
+conn_for_un_nat_copy->nat_info = NULL;
  }
  ct_rwlock_unlock(>nat_resources_lock);



I don't have a way to test this right at the moment but it's pretty simple and 
looks good to me.

Thanks Darrell!

Acked-by: Greg Rose 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] sandbox: disable ssl for backup ovn southbound db

2017-06-13 Thread Lance Richardson
Since the sandbox environment was changed to enable SSL usage for
OVN_Southbound connections, the backup southbound server emits
the log message "socket_util|ERR|6642: bind: Address already in use"
every 2.5 seconds.

Fix by configuring the backup db server to not use remote configuration
from the database (the unix: socket can still be used, as was the
case before SSL was enabled).

Fixes: 0ced2a5c5e47 ("sandbox: use ssl for ovn-controller to sb db connection")
Signed-off-by: Lance Richardson 
---
 tutorial/ovs-sandbox | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tutorial/ovs-sandbox b/tutorial/ovs-sandbox
index 15a3a0a..258ea9b 100755
--- a/tutorial/ovs-sandbox
+++ b/tutorial/ovs-sandbox
@@ -372,7 +372,6 @@ if $ovn; then
 rungdb $gdb_ovsdb $gdb_ovsdb_ex ovsdb-server --detach --no-chdir \
 --pidfile="$sandbox"/ovnsb_db2.pid  -vconsole:off \
 --log-file="$sandbox"/ovnsb_db2.log \
---remote=db:OVN_Southbound,SB_Global,connections \
 --private-key=db:OVN_Southbound,SSL,private_key \
 --certificate=db:OVN_Southbound,SSL,certificate \
 --ca-cert=db:OVN_Southbound,SSL,ca_cert \
-- 
2.9.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 1/4] ovn: l3ha, handling of multiple gateways

2017-06-13 Thread Russell Bryant
On Tue, Jun 13, 2017 at 1:22 PM, Guru Shetty  wrote:
>
>
> On 13 June 2017 at 10:17, Russell Bryant  wrote:
>>
>> On Fri, Jun 9, 2017 at 10:46 AM, Guru Shetty  wrote:
>> > On 8 June 2017 at 14:39, Ben Pfaff  wrote:
>> >
>> >> On Thu, Jun 08, 2017 at 02:05:05PM +, majop...@redhat.com wrote:
>> >> > From: Miguel Angel Ajo 
>> >> >
>> >> > This patch handles multiple gateways with priorities in
>> >> > chassisredirect
>> >> > ports, any gateway with a chassis redirect port will implement the
>> >> > rules to de-encapsulate incomming packets for such port.
>> >> >
>> >> > And hosts targetting a remote chassisredirect port will setup a
>> >> > bundle(active_backup, ..) action to each tunnel port, in the given
>> >> > priority order.
>> >> >
>> >> > Signed-off-by: Miguel Angel Ajo 
>> >>
>> >> I feel unqualified to fully and properly review this series.  Guru, is
>> >> it something you'd feel able to take a look at?  Is anyone else
>> >> planning
>> >> to review this?
>> >>
>> >
>> > I will have a go at it.
>>
>> Thanks a lot, Guru.  Since this is important for OpenStack, let me
>> know if you won't have time to review and I'll make time to help.  I
>> think you're a better reviewer for this one, though.
>
> Russell,
>  If you want to do the initial round, please go ahead. Since you know more
> about the usecase and how it is to be used by OpenStack, your review will
> help.

OK - I'll do a pass on them.

-- 
Russell Bryant
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] sandbox: ovn rbac support for sandbox environment

2017-06-13 Thread Russell Bryant
On Tue, Jun 13, 2017 at 1:24 PM, Ben Pfaff  wrote:
> On Tue, Jun 13, 2017 at 01:13:31PM -0400, Russell Bryant wrote:
>> On Mon, Jun 12, 2017 at 6:42 PM, Lance Richardson  
>> wrote:
>> > Enable OVN_Southbound RBAC by default in the sandbox environment,
>> > provide a new option "--no-ovn-rbac" to disable it.
>> >
>> > Signed-off-by: Lance Richardson 
>> > ---
>> >  tutorial/ovs-sandbox | 17 +
>> >  1 file changed, 13 insertions(+), 4 deletions(-)
>>
>> Thanks for the patch.  It seems to work fine for me.  I applied this to 
>> master.
>>
>> I noticed that the backup southbound database is generating a lot of
>> errors in the log, though it's unrelated to this patch.  I believe it
>> was caused by the earlier patch to make ovs-sandbox use SSL by
>> default.
>>
>> 2017-06-13T17:04:07.433Z|7|socket_util|ERR|6642: bind: Address
>> already in use
>> 2017-06-13T17:04:07.433Z|8|ovsdb_jsonrpc_server|ERR|pssl:6642:
>> listen failed: Address already in use
>>
>> I'm not sure the best way to clean this up.  Perhaps we could just run
>> each southbound db in its own net namespace.
>
> Maybe run it on a different port?

I was thinking it wouldn't work because we're setting up the ssl
connection in the OVN_Southbound db, which is replicated between the
two, so they're both trying to listen on the same port.  If we can set
up the connection only as command line args instead of through the db,
a different port would work.

-- 
Russell Bryant
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] sandbox: ovn rbac support for sandbox environment

2017-06-13 Thread Lance Richardson
> From: "Russell Bryant" 
> To: "Lance Richardson" 
> Cc: "ovs dev" 
> Sent: Tuesday, 13 June, 2017 1:13:31 PM
> Subject: Re: [ovs-dev] [PATCH] sandbox: ovn rbac support for sandbox 
> environment
> 
> On Mon, Jun 12, 2017 at 6:42 PM, Lance Richardson 
> wrote:
> > Enable OVN_Southbound RBAC by default in the sandbox environment,
> > provide a new option "--no-ovn-rbac" to disable it.
> >
> > Signed-off-by: Lance Richardson 
> > ---
> >  tutorial/ovs-sandbox | 17 +
> >  1 file changed, 13 insertions(+), 4 deletions(-)
> 
> Thanks for the patch.  It seems to work fine for me.  I applied this to
> master.
> 
> I noticed that the backup southbound database is generating a lot of
> errors in the log, though it's unrelated to this patch.  I believe it
> was caused by the earlier patch to make ovs-sandbox use SSL by
> default.
> 
> 2017-06-13T17:04:07.433Z|7|socket_util|ERR|6642: bind: Address
> already in use
> 2017-06-13T17:04:07.433Z|8|ovsdb_jsonrpc_server|ERR|pssl:6642:
> listen failed: Address already in use
> 

I see.. before SSL was enabled, a separate AF_UNIX socket was used
for each southbound db, with SSL enabled we do have a collision.

It's not clear to me what you can do with the backup db in the
sandbox environment other than use ovn-sbctl to inspect its
contents as mentioned in the script. If that's it, maybe we
could simply remove this option when starting the backup ovsdb-server
(the AF_UNIX socket is still available):

--remote=db:OVN_Southbound,SB_Global,connections \

> I'm not sure the best way to clean this up.  Perhaps we could just run
> each southbound db in its own net namespace.

Hopefully we can find a way that avoids needing administrative privileges...

> 
> --
> Russell Bryant
> 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] sandbox: ovn rbac support for sandbox environment

2017-06-13 Thread Ben Pfaff
On Tue, Jun 13, 2017 at 01:13:31PM -0400, Russell Bryant wrote:
> On Mon, Jun 12, 2017 at 6:42 PM, Lance Richardson  wrote:
> > Enable OVN_Southbound RBAC by default in the sandbox environment,
> > provide a new option "--no-ovn-rbac" to disable it.
> >
> > Signed-off-by: Lance Richardson 
> > ---
> >  tutorial/ovs-sandbox | 17 +
> >  1 file changed, 13 insertions(+), 4 deletions(-)
> 
> Thanks for the patch.  It seems to work fine for me.  I applied this to 
> master.
> 
> I noticed that the backup southbound database is generating a lot of
> errors in the log, though it's unrelated to this patch.  I believe it
> was caused by the earlier patch to make ovs-sandbox use SSL by
> default.
> 
> 2017-06-13T17:04:07.433Z|7|socket_util|ERR|6642: bind: Address
> already in use
> 2017-06-13T17:04:07.433Z|8|ovsdb_jsonrpc_server|ERR|pssl:6642:
> listen failed: Address already in use
> 
> I'm not sure the best way to clean this up.  Perhaps we could just run
> each southbound db in its own net namespace.

Maybe run it on a different port?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 1/4] ovn: l3ha, handling of multiple gateways

2017-06-13 Thread Guru Shetty
On 13 June 2017 at 10:17, Russell Bryant  wrote:

> On Fri, Jun 9, 2017 at 10:46 AM, Guru Shetty  wrote:
> > On 8 June 2017 at 14:39, Ben Pfaff  wrote:
> >
> >> On Thu, Jun 08, 2017 at 02:05:05PM +, majop...@redhat.com wrote:
> >> > From: Miguel Angel Ajo 
> >> >
> >> > This patch handles multiple gateways with priorities in
> chassisredirect
> >> > ports, any gateway with a chassis redirect port will implement the
> >> > rules to de-encapsulate incomming packets for such port.
> >> >
> >> > And hosts targetting a remote chassisredirect port will setup a
> >> > bundle(active_backup, ..) action to each tunnel port, in the given
> >> > priority order.
> >> >
> >> > Signed-off-by: Miguel Angel Ajo 
> >>
> >> I feel unqualified to fully and properly review this series.  Guru, is
> >> it something you'd feel able to take a look at?  Is anyone else planning
> >> to review this?
> >>
> >
> > I will have a go at it.
>
> Thanks a lot, Guru.  Since this is important for OpenStack, let me
> know if you won't have time to review and I'll make time to help.  I
> think you're a better reviewer for this one, though.
>
Russell,
 If you want to do the initial round, please go ahead. Since you know more
about the usecase and how it is to be used by OpenStack, your review will
help.


>
> --
> Russell Bryant
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 1/4] ovn: l3ha, handling of multiple gateways

2017-06-13 Thread Russell Bryant
On Fri, Jun 9, 2017 at 10:46 AM, Guru Shetty  wrote:
> On 8 June 2017 at 14:39, Ben Pfaff  wrote:
>
>> On Thu, Jun 08, 2017 at 02:05:05PM +, majop...@redhat.com wrote:
>> > From: Miguel Angel Ajo 
>> >
>> > This patch handles multiple gateways with priorities in chassisredirect
>> > ports, any gateway with a chassis redirect port will implement the
>> > rules to de-encapsulate incomming packets for such port.
>> >
>> > And hosts targetting a remote chassisredirect port will setup a
>> > bundle(active_backup, ..) action to each tunnel port, in the given
>> > priority order.
>> >
>> > Signed-off-by: Miguel Angel Ajo 
>>
>> I feel unqualified to fully and properly review this series.  Guru, is
>> it something you'd feel able to take a look at?  Is anyone else planning
>> to review this?
>>
>
> I will have a go at it.

Thanks a lot, Guru.  Since this is important for OpenStack, let me
know if you won't have time to review and I'll make time to help.  I
think you're a better reviewer for this one, though.

-- 
Russell Bryant
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] sandbox: ovn rbac support for sandbox environment

2017-06-13 Thread Russell Bryant
On Mon, Jun 12, 2017 at 6:42 PM, Lance Richardson  wrote:
> Enable OVN_Southbound RBAC by default in the sandbox environment,
> provide a new option "--no-ovn-rbac" to disable it.
>
> Signed-off-by: Lance Richardson 
> ---
>  tutorial/ovs-sandbox | 17 +
>  1 file changed, 13 insertions(+), 4 deletions(-)

Thanks for the patch.  It seems to work fine for me.  I applied this to master.

I noticed that the backup southbound database is generating a lot of
errors in the log, though it's unrelated to this patch.  I believe it
was caused by the earlier patch to make ovs-sandbox use SSL by
default.

2017-06-13T17:04:07.433Z|7|socket_util|ERR|6642: bind: Address
already in use
2017-06-13T17:04:07.433Z|8|ovsdb_jsonrpc_server|ERR|pssl:6642:
listen failed: Address already in use

I'm not sure the best way to clean this up.  Perhaps we could just run
each southbound db in its own net namespace.

-- 
Russell Bryant
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] treewide: undefined behavior, passing null in nonnull parameters

2017-06-13 Thread Lance Richardson
> From: "Lance Richardson" 
> To: d...@openvswitch.org
> Sent: Tuesday, 13 June, 2017 12:57:38 PM
> Subject: [ovs-dev] [PATCH] treewide: undefined behavior,  passing null in 
> nonnull parameters
> 
> Eliminate a number of instances of undefined behavior related to
> passing NULL in parameters having "nonnull" annotations.
> 
> Found with gcc's undefined behavior sanitizer.
> 
> Signed-off-by: Lance Richardson 
> ---

This patch addresses all ubsan errors of the "null vs. nonnull" flavor.
The remaining errors are:

$ grep runtime tests/testsuite.dir/*/testsuite.log
tests/testsuite.dir/0044/testsuite.log:+tests/test-hash.c:59:40: runtime error: 
shift exponent 64 is too large for 64-bit type 'long unsigned int'
tests/testsuite.dir/0057/testsuite.log:+tests/test-util.c:88:23: runtime error: 
left shift of 1 by 31 places cannot be represented in type 'int'
tests/testsuite.dir/0062/testsuite.log:+tests/test-util.c:49:29: runtime error: 
left shift of 1 by 31 places cannot be represented in type 'int'
tests/testsuite.dir/0062/testsuite.log:+tests/test-util.c:52:30: runtime error: 
left shift of 1 by 31 places cannot be represented in type 'int'
tests/testsuite.dir/0062/testsuite.log:+tests/test-util.c:52:42: runtime error: 
left shift of 1 by 31 places cannot be represented in type 'int'
tests/testsuite.dir/0062/testsuite.log:+tests/test-util.c:52:48: runtime error: 
signed integer overflow: -2147483648 - 1 cannot be represented in type 'int'
tests/testsuite.dir/0062/testsuite.log:+tests/test-util.c:55:50: runtime error: 
left shift of 1 by 31 places cannot be represented in type 'int'
tests/testsuite.dir/0062/testsuite.log:+tests/test-util.c:55:67: runtime error: 
left shift of 1 by 31 places cannot be represented in type 'int'
tests/testsuite.dir/0062/testsuite.log:+tests/test-util.c:55:56: runtime error: 
signed integer overflow: -2147483648 - 1 cannot be represented in type 'int'
tests/testsuite.dir/0432/testsuite.log:+lib/odp-util.c:5440:24: runtime error: 
load of misaligned address 0x01d91f7c for type 'const union ovs_u128', 
which requires 8 byte alignment
tests/testsuite.dir/0435/testsuite.log:+lib/odp-util.c:511:65: runtime error: 
member access within misaligned address 0x019aa9b2 for type 'const struct 
ip6_hdr', which requires 4 byte alignment
tests/testsuite.dir/0435/testsuite.log:+lib/odp-util.c:511:24: runtime error: 
member access within misaligned address 0x019aa9b2 for type 'const struct 
ip6_hdr', which requires 4 byte alignment
tests/testsuite.dir/0435/testsuite.log:+lib/odp-util.c:510:68: runtime error: 
member access within misaligned address 0x019aa9b2 for type 'const struct 
ip6_hdr', which requires 4 byte alignment
tests/testsuite.dir/0435/testsuite.log:+lib/odp-util.c:510:23: runtime error: 
member access within misaligned address 0x019aa9b2 for type 'const struct 
ip6_hdr', which requires 4 byte alignment

Running the undefined behavior sanitizer is a matter of essentially:

   ./configure CFLAGS=-fsanitize=undefined
   make check

Possibly requiring either:

   yum install libubsan
or
   apt-get install libubsan0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] treewide: undefined behavior, passing null in nonnull parameters

2017-06-13 Thread Ben Pfaff
On Tue, Jun 13, 2017 at 12:57:38PM -0400, Lance Richardson wrote:
> Eliminate a number of instances of undefined behavior related to
> passing NULL in parameters having "nonnull" annotations.
> 
> Found with gcc's undefined behavior sanitizer.
> 
> Signed-off-by: Lance Richardson 

Thanks, applied to master.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] treewide: undefined behavior, passing null in nonnull parameters

2017-06-13 Thread Lance Richardson
Eliminate a number of instances of undefined behavior related to
passing NULL in parameters having "nonnull" annotations.

Found with gcc's undefined behavior sanitizer.

Signed-off-by: Lance Richardson 
---
 lib/netlink.c |  5 -
 lib/ofpbuf.c  |  2 +-
 lib/svec.c|  4 +++-
 lib/util.c|  2 +-
 lib/util.h| 21 +
 ovn/utilities/ovn-nbctl.c | 18 ++
 6 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/lib/netlink.c b/lib/netlink.c
index 3da22a1..0246131 100644
--- a/lib/netlink.c
+++ b/lib/netlink.c
@@ -241,7 +241,10 @@ void
 nl_msg_put_unspec(struct ofpbuf *msg, uint16_t type,
   const void *data, size_t size)
 {
-memcpy(nl_msg_put_unspec_uninit(msg, type, size), data, size);
+void *ptr;
+
+ptr = nl_msg_put_unspec_uninit(msg, type, size);
+nullable_memcpy(ptr, data, size);
 }
 
 /* Appends a Netlink attribute of the given 'type' and no payload to 'msg'.
diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
index 3019c4a..f4a9040 100644
--- a/lib/ofpbuf.c
+++ b/lib/ofpbuf.c
@@ -375,7 +375,7 @@ void *
 ofpbuf_put_zeros(struct ofpbuf *b, size_t size)
 {
 void *dst = ofpbuf_put_uninit(b, size);
-memset(dst, 0, size);
+nullable_memset(dst, 0, size);
 return dst;
 }
 
diff --git a/lib/svec.c b/lib/svec.c
index aad04e3..297a60c 100644
--- a/lib/svec.c
+++ b/lib/svec.c
@@ -127,7 +127,9 @@ compare_strings(const void *a_, const void *b_)
 void
 svec_sort(struct svec *svec)
 {
-qsort(svec->names, svec->n, sizeof *svec->names, compare_strings);
+if (svec->n) {
+qsort(svec->names, svec->n, sizeof *svec->names, compare_strings);
+}
 }
 
 void
diff --git a/lib/util.c b/lib/util.c
index b2a1f8a..85b9350 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -132,7 +132,7 @@ void *
 xmemdup(const void *p_, size_t size)
 {
 void *p = xmalloc(size);
-memcpy(p, p_, size);
+nullable_memcpy(p, p_, size);
 return p;
 }
 
diff --git a/lib/util.h b/lib/util.h
index d2374b2..c2d1c3f 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -135,6 +135,27 @@ void free_cacheline(void *);
 void ovs_strlcpy(char *dst, const char *src, size_t size);
 void ovs_strzcpy(char *dst, const char *src, size_t size);
 
+/* The C standards say that neither the 'dst' nor 'src' argument to
+ * memcpy() may be null, even if 'n' is zero.  This wrapper tolerates
+ * the null case. */
+static inline void
+nullable_memcpy(void *dst, const void *src, size_t n)
+{
+if (n) {
+memcpy(dst, src, n);
+}
+}
+
+/* The C standards say that the 'dst' argument to memset may not be
+ * null, even if 'n' is zero.  This wrapper tolerates the null case. */
+static inline void
+nullable_memset(void *dst, int c, size_t n)
+{
+if (n) {
+memset(dst, c, n);
+}
+}
+
 /* Copy string SRC to DST, but no more bytes than the shorter of DST or SRC.
  * DST and SRC must both be char arrays, not pointers, and with GNU C, this
  * raises a compiler error if either DST or SRC is a pointer instead of an
diff --git a/ovn/utilities/ovn-nbctl.c b/ovn/utilities/ovn-nbctl.c
index bd0160a..bea010a 100644
--- a/ovn/utilities/ovn-nbctl.c
+++ b/ovn/utilities/ovn-nbctl.c
@@ -920,7 +920,7 @@ nbctl_lsp_add(struct ctl_context *ctx)
 nbrec_logical_switch_verify_ports(ls);
 struct nbrec_logical_switch_port **new_ports = xmalloc(sizeof *new_ports *
 (ls->n_ports + 1));
-memcpy(new_ports, ls->ports, sizeof *new_ports * ls->n_ports);
+nullable_memcpy(new_ports, ls->ports, sizeof *new_ports * ls->n_ports);
 new_ports[ls->n_ports] = CONST_CAST(struct nbrec_logical_switch_port *,
  lsp);
 nbrec_logical_switch_set_ports(ls, new_ports, ls->n_ports + 1);
@@ -1379,7 +1379,7 @@ nbctl_acl_add(struct ctl_context *ctx)
 /* Insert the acl into the logical switch. */
 nbrec_logical_switch_verify_acls(ls);
 struct nbrec_acl **new_acls = xmalloc(sizeof *new_acls * (ls->n_acls + 1));
-memcpy(new_acls, ls->acls, sizeof *new_acls * ls->n_acls);
+nullable_memcpy(new_acls, ls->acls, sizeof *new_acls * ls->n_acls);
 new_acls[ls->n_acls] = acl;
 nbrec_logical_switch_set_acls(ls, new_acls, ls->n_acls + 1);
 free(new_acls);
@@ -1697,7 +1697,8 @@ nbctl_lr_lb_add(struct ctl_context *ctx)
 struct nbrec_load_balancer **new_lbs
 = xmalloc(sizeof *new_lbs * (lr->n_load_balancer + 1));
 
-memcpy(new_lbs, lr->load_balancer, sizeof *new_lbs * lr->n_load_balancer);
+nullable_memcpy(new_lbs, lr->load_balancer,
+sizeof *new_lbs * lr->n_load_balancer);
 new_lbs[lr->n_load_balancer] = CONST_CAST(struct nbrec_load_balancer *,
 new_lb);
 nbrec_logical_router_set_load_balancer(lr, new_lbs,
@@ -1793,7 +1794,8 @@ nbctl_ls_lb_add(struct ctl_context *ctx)
 struct nbrec_load_balancer **new_lbs
 = xmalloc(sizeof 

Re: [ovs-dev] [PATCH] byte-order: Fix undefined behavior of BYTES_TO_BE32.

2017-06-13 Thread Ben Pfaff
On Tue, Jun 13, 2017 at 11:34:25AM -0400, Lance Richardson wrote:
> > From: "Ben Pfaff" 
> > To: "Lance Richardson" 
> > Cc: d...@openvswitch.org
> > Sent: Tuesday, 13 June, 2017 11:17:26 AM
> > Subject: Re: [PATCH] byte-order: Fix undefined behavior of BYTES_TO_BE32.
> > 
> > On Tue, Jun 13, 2017 at 09:09:44AM -0400, Lance Richardson wrote:
> > > > From: "Ben Pfaff" 
> > > > To: d...@openvswitch.org
> > > > Cc: "Ben Pfaff" , "Lance Richardson" 
> > > > Sent: Tuesday, 13 June, 2017 12:51:14 AM
> > > > Subject: [PATCH] byte-order: Fix undefined behavior of BYTES_TO_BE32.
> > > > 
> > > > A left shift that would produce a result that is not representable
> > > > by the type of the expression's result has "undefined behavior"
> > > > according to the C language standard. Avoid this by casting values
> > > > that could set the upper bit to unsigned types.
> > > > 
> > > > Also document and convert a macro to a function.
> > > > 
> > > > While we're at it, delete the unused macro BE16S_TO_BE32.
> > > > 
> > > > Found via gcc's undefined behavior sanitizer.
> > > > 
> > > > Reported-by: Lance Richardson 
> > > > Signed-off-by: Ben Pfaff 
> > > > ---
> > > >  lib/byte-order.h | 21 +
> > > >  lib/flow.c   |  2 +-
> > > >  2 files changed, 14 insertions(+), 9 deletions(-)
> > > > 
> > > 
> > > Looks good.
> > > 
> > > Acked-by: Lance Richardson 
> > 
> > Thanks.  I applied this to master.
> > 
> > Do you think that it is worthwhile to apply this to older branches?
> > 
> 
> I would guess not... the only danger here would be if the compiler
> incorrectly optimized something based on inferring that the high-order
> bit of the value being shifted 24 bits has to be zero because there
> would be "undefined behavior" if it were set. From looking through
> the code, there doesn't seem to be any real exposure.

OK, thanks.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] hash: Remove conflicting hash_finish() prototype.

2017-06-13 Thread Ben Pfaff
Thanks, I applied this to master.

On Tue, Jun 13, 2017 at 04:28:19PM +, Darrell Ball wrote:
> correction, the hash tests do pass in instrinsic mode
> 
> Acked-by: Darrell Ball 
> 
> 
> 
> On 6/13/17, 9:17 AM, "Darrell Ball"  wrote:
> 
> I have been looking to fixing this when I saw the e-mail from Antonio
> Using intrinsic mode is failing the “hash functions” test (#44),
> which I suppose is better than never having being able to build.
> 
>  
> 
> 
> 
> On 6/13/17, 8:31 AM, "ovs-dev-boun...@openvswitch.org on behalf of Ben 
> Pfaff"  wrote:
> 
> Normally, hash_finish() is declared as:
> static inline uint32_t hash_finish(uint32_t hash, uint32_t final)
> 
> When __SSE4_2__ && __x86_64__, it is declared as:
> static inline uint32_t hash_finish(uint64_t hash, uint64_t final)
> 
> A recent commit added an unneeded prototype in the first form, which 
> caused
> an error due to the redeclaration of a different type when the second 
> form
> was actually used.  This removes the prototype, fixing the problem.
> 
> It may not be a great idea to have two different forms for this 
> function,
> but it's long standing and so I don't want to change it immediately 
> without
> proper consideration.
> 
> Reported-by: "Fischetti, Antonio" 
> Fixes: 67702b79d845 ("hash: New helper functions for adding words in 
> a buffer to a hash.")
> Signed-off-by: Ben Pfaff 
> ---
>  lib/hash.h | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/lib/hash.h b/lib/hash.h
> index 7dffeaa9cacc..a642a1e97954 100644
> --- a/lib/hash.h
> +++ b/lib/hash.h
> @@ -92,7 +92,6 @@ static inline uint32_t mhash_finish(uint32_t hash)
>  
>  static inline uint32_t hash_add(uint32_t hash, uint32_t data);
>  static inline uint32_t hash_add64(uint32_t hash, uint64_t data);
> -static inline uint32_t hash_finish(uint32_t hash, uint32_t final);
>  
>  static inline uint32_t hash_add_words(uint32_t, const uint32_t *, 
> size_t);
>  static inline uint32_t hash_add_words64(uint32_t, const uint64_t *, 
> size_t);
> -- 
> 2.10.2
> 
> ___
> dev mailing list
> d...@openvswitch.org
> 
> https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev=DwICAg=uilaK90D4TOVoH58JNXRgQ=BVhFA09CGX7JQ5Ih-uZnsw=I31eLoUgkJs498Dx_N-zRLMemNQMUQon8J-2Cr8Wehk=A8aQngnjfcomJtsIQAQAEJTWG5Tja7XLlQ6G-iFNZEQ=
>  
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] hash: Remove conflicting hash_finish() prototype.

2017-06-13 Thread Darrell Ball
correction, the hash tests do pass in instrinsic mode

Acked-by: Darrell Ball 



On 6/13/17, 9:17 AM, "Darrell Ball"  wrote:

I have been looking to fixing this when I saw the e-mail from Antonio
Using intrinsic mode is failing the “hash functions” test (#44),
which I suppose is better than never having being able to build.

 



On 6/13/17, 8:31 AM, "ovs-dev-boun...@openvswitch.org on behalf of Ben 
Pfaff"  wrote:

Normally, hash_finish() is declared as:
static inline uint32_t hash_finish(uint32_t hash, uint32_t final)

When __SSE4_2__ && __x86_64__, it is declared as:
static inline uint32_t hash_finish(uint64_t hash, uint64_t final)

A recent commit added an unneeded prototype in the first form, which 
caused
an error due to the redeclaration of a different type when the second 
form
was actually used.  This removes the prototype, fixing the problem.

It may not be a great idea to have two different forms for this 
function,
but it's long standing and so I don't want to change it immediately 
without
proper consideration.

Reported-by: "Fischetti, Antonio" 
Fixes: 67702b79d845 ("hash: New helper functions for adding words in a 
buffer to a hash.")
Signed-off-by: Ben Pfaff 
---
 lib/hash.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/hash.h b/lib/hash.h
index 7dffeaa9cacc..a642a1e97954 100644
--- a/lib/hash.h
+++ b/lib/hash.h
@@ -92,7 +92,6 @@ static inline uint32_t mhash_finish(uint32_t hash)
 
 static inline uint32_t hash_add(uint32_t hash, uint32_t data);
 static inline uint32_t hash_add64(uint32_t hash, uint64_t data);
-static inline uint32_t hash_finish(uint32_t hash, uint32_t final);
 
 static inline uint32_t hash_add_words(uint32_t, const uint32_t *, 
size_t);
 static inline uint32_t hash_add_words64(uint32_t, const uint64_t *, 
size_t);
-- 
2.10.2

___
dev mailing list
d...@openvswitch.org

https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev=DwICAg=uilaK90D4TOVoH58JNXRgQ=BVhFA09CGX7JQ5Ih-uZnsw=I31eLoUgkJs498Dx_N-zRLMemNQMUQon8J-2Cr8Wehk=A8aQngnjfcomJtsIQAQAEJTWG5Tja7XLlQ6G-iFNZEQ=
 










___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] hash: Remove conflicting hash_finish() prototype.

2017-06-13 Thread Darrell Ball
I have been looking to fixing this when I saw the e-mail from Antonio
Using intrinsic mode is failing the “hash functions” test (#44),
which I suppose is better than never having being able to build.

 



On 6/13/17, 8:31 AM, "ovs-dev-boun...@openvswitch.org on behalf of Ben Pfaff" 
 wrote:

Normally, hash_finish() is declared as:
static inline uint32_t hash_finish(uint32_t hash, uint32_t final)

When __SSE4_2__ && __x86_64__, it is declared as:
static inline uint32_t hash_finish(uint64_t hash, uint64_t final)

A recent commit added an unneeded prototype in the first form, which caused
an error due to the redeclaration of a different type when the second form
was actually used.  This removes the prototype, fixing the problem.

It may not be a great idea to have two different forms for this function,
but it's long standing and so I don't want to change it immediately without
proper consideration.

Reported-by: "Fischetti, Antonio" 
Fixes: 67702b79d845 ("hash: New helper functions for adding words in a 
buffer to a hash.")
Signed-off-by: Ben Pfaff 
---
 lib/hash.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/hash.h b/lib/hash.h
index 7dffeaa9cacc..a642a1e97954 100644
--- a/lib/hash.h
+++ b/lib/hash.h
@@ -92,7 +92,6 @@ static inline uint32_t mhash_finish(uint32_t hash)
 
 static inline uint32_t hash_add(uint32_t hash, uint32_t data);
 static inline uint32_t hash_add64(uint32_t hash, uint64_t data);
-static inline uint32_t hash_finish(uint32_t hash, uint32_t final);
 
 static inline uint32_t hash_add_words(uint32_t, const uint32_t *, size_t);
 static inline uint32_t hash_add_words64(uint32_t, const uint64_t *, 
size_t);
-- 
2.10.2

___
dev mailing list
d...@openvswitch.org

https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev=DwICAg=uilaK90D4TOVoH58JNXRgQ=BVhFA09CGX7JQ5Ih-uZnsw=I31eLoUgkJs498Dx_N-zRLMemNQMUQon8J-2Cr8Wehk=A8aQngnjfcomJtsIQAQAEJTWG5Tja7XLlQ6G-iFNZEQ=
 








___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/6] redhat: replace python3 with python package macro

2017-06-13 Thread Aaron Conole
Aaron Conole  writes:

> Flavio Leitner  writes:
>
>> On Sat, Jun 03, 2017 at 11:09:57AM -0400, Aaron Conole wrote:
>>> According to the packaging guidelines found at
>>> https://fedoraproject.org/wiki/PackagingDrafts:Python3EPEL, when
>>> specifying a python3 package, use the %{python3_pkgversion} macro to get
>>> the appropriate suffix.
>>
>> This looks incomplete because the package's name remains python3-openvswitch
>> where it should have been python%{python3_pkgversion}-openvswitch.
>> Same issue with the requires for that subpackage.
>
> Makes sense.  I'll fix it up, and test.
>
> Thanks Flavio!
>

With a clean Fedora system, I don't see the error that I was getting
that led me to change this.

Additionally, some asking around has led me to understand
that those guidelines are for EPEL packages (openvswitch is not an EPEL
package).  And it seems like they may not even be current.

I'm going to drop this patch from my submission.  Sorry for the noise.

-Aaron
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [RFC] treewide: undefined behavior, passing null in nonnull parameters

2017-06-13 Thread Lance Richardson
> From: "Ben Pfaff" 
> To: "Lance Richardson" 
> Cc: d...@openvswitch.org
> Sent: Tuesday, 13 June, 2017 11:21:16 AM
> Subject: Re: [ovs-dev] [RFC] treewide: undefined behavior, passing null in 
> nonnull parameters
> 
> On Tue, Jun 13, 2017 at 08:21:23AM -0400, Lance Richardson wrote:
> > > From: "Ben Pfaff" 
> > > To: "Lance Richardson" 
> > > Cc: d...@openvswitch.org
> > > Sent: Tuesday, 13 June, 2017 1:06:12 AM
> > > Subject: Re: [ovs-dev] [RFC] treewide: undefined behavior, passing null
> > > in nonnull parameters
> > > 
> > > On Mon, Jun 12, 2017 at 08:06:01PM -0400, Lance Richardson wrote:
> > > > Eliminate a number of instances of undefined behavior related to
> > > > passing NULL in parameters having "nonnull" annotations.
> > > > 
> > > > Found with gcc's undefined behavior sanitizer.
> > > > 
> > > > Signed-off-by: Lance Richardson 
> > > > ---
> > > > 
> > > > Posting this as RFC because there is no apparent risk of
> > > > unwanted compiler optimizations related to undefined behavior
> > > > in existing code. The main value in fixing these issues is
> > > > in reducing noise to make it easier to find problematic
> > > > cases in the future.
> > > > 
> > > > Here is a small example of the type of unwanted optimization
> > > > to be concerned about:
> > > > 
> > > > test1a.c:
> > > > 
> > > > #include 
> > > > 
> > > > extern void foo(char*, size_t);
> > > > 
> > > > int main(int argc, char **argv)
> > > > {
> > > > char x[128];
> > > > 
> > > > foo(x, sizeof x);
> > > > foo(NULL, 0);
> > > > 
> > > > return 0;
> > > > }
> > > > 
> > > > test1b.c:
> > > > 
> > > > #include 
> > > > #include 
> > > > 
> > > > void foo(char *bar, size_t len)
> > > > {
> > > > memset(bar, 0, len);
> > > > 
> > > > if (bar)
> > > > printf("bar is non-null: %p\n", bar);
> > > > }
> > > > 
> > > > Compile and run:
> > > > gcc -o test -O2 test1a.c test1b.c
> > > > ./test
> > > > 
> > > > Output (second line might be a bit of a surprise):
> > > > bar is non-null: 0x7fff80f90d50
> > > > bar is non-null: (nil)
> > > 
> > > Hmm.  That is surprising.
> > > 
> > > > diff --git a/lib/netlink.c b/lib/netlink.c
> > > > index 3da22a1..fcad884 100644
> > > > --- a/lib/netlink.c
> > > > +++ b/lib/netlink.c
> > > > @@ -241,7 +241,12 @@ void
> > > >  nl_msg_put_unspec(struct ofpbuf *msg, uint16_t type,
> > > >const void *data, size_t size)
> > > >  {
> > > > -memcpy(nl_msg_put_unspec_uninit(msg, type, size), data, size);
> > > > +void *ptr;
> > > > +
> > > > +ptr = nl_msg_put_unspec_uninit(msg, type, size);
> > > > +if (size) {
> > > > +memcpy(ptr, data, size);
> > > > +}
> > > >  }
> > > 
> > > I guess the above is above null 'data', since 'ptr' should always be
> > > nonnull.  In that case, it seems reasonable.
> > > 
> > > >  /* Appends a Netlink attribute of the given 'type' and no payload to
> > > >  'msg'.
> > > > diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
> > > > index 3019c4a..2e71fed 100644
> > > > --- a/lib/ofpbuf.c
> > > > +++ b/lib/ofpbuf.c
> > > > @@ -375,7 +375,9 @@ void *
> > > >  ofpbuf_put_zeros(struct ofpbuf *b, size_t size)
> > > >  {
> > > >  void *dst = ofpbuf_put_uninit(b, size);
> > > > -memset(dst, 0, size);
> > > > +if (size) {
> > > > +memset(dst, 0, size);
> > > > +}
> > > >  return dst;
> > > >  }
> > > 
> > > In the above, when is dst null?  It looks to me like ofpbuf_put_uninit()
> > > always returns nonnull.
> > > 
> > 
> > Looks like it could return NULL if called with b->data = NULL, b->size = 0,
> > and
> > size = 0. Seems odd to want to append no zero bytes to an empty buffer, but
> > it
> > apparently happens while running the unit tests.
> 
> OK.  Somewhat weird.
> 
> > > > diff --git a/lib/svec.c b/lib/svec.c
> > > > index aad04e3..297a60c 100644
> > > > --- a/lib/svec.c
> > > > +++ b/lib/svec.c
> > > > @@ -127,7 +127,9 @@ compare_strings(const void *a_, const void *b_)
> > > >  void
> > > >  svec_sort(struct svec *svec)
> > > >  {
> > > > -qsort(svec->names, svec->n, sizeof *svec->names, compare_strings);
> > > > +if (svec->n) {
> > > > +qsort(svec->names, svec->n, sizeof *svec->names,
> > > > compare_strings);
> > > > +}
> > > >  }
> > > 
> > > This one in svec_sort() looks good to me.
> > > 
> > > >  void
> > > > diff --git a/lib/util.c b/lib/util.c
> > > > index b2a1f8a..ddf8546 100644
> > > > --- a/lib/util.c
> > > > +++ b/lib/util.c
> > > > @@ -132,7 +132,9 @@ void *
> > > >  xmemdup(const void *p_, size_t size)
> > > >  {
> > > >  void *p = xmalloc(size);
> > > > -memcpy(p, p_, size);
> > > > +if (size) {
> > > > +memcpy(p, p_, size);
> > > > +}
> > > >  return p;
> > > >  }
> > > 
> > > I guess that the above must be about a null 'p_' parameter?  xmalloc()
> > > 

Re: [ovs-dev] [PATCH] byte-order: Fix undefined behavior of BYTES_TO_BE32.

2017-06-13 Thread Lance Richardson
> From: "Ben Pfaff" 
> To: "Lance Richardson" 
> Cc: d...@openvswitch.org
> Sent: Tuesday, 13 June, 2017 11:17:26 AM
> Subject: Re: [PATCH] byte-order: Fix undefined behavior of BYTES_TO_BE32.
> 
> On Tue, Jun 13, 2017 at 09:09:44AM -0400, Lance Richardson wrote:
> > > From: "Ben Pfaff" 
> > > To: d...@openvswitch.org
> > > Cc: "Ben Pfaff" , "Lance Richardson" 
> > > Sent: Tuesday, 13 June, 2017 12:51:14 AM
> > > Subject: [PATCH] byte-order: Fix undefined behavior of BYTES_TO_BE32.
> > > 
> > > A left shift that would produce a result that is not representable
> > > by the type of the expression's result has "undefined behavior"
> > > according to the C language standard. Avoid this by casting values
> > > that could set the upper bit to unsigned types.
> > > 
> > > Also document and convert a macro to a function.
> > > 
> > > While we're at it, delete the unused macro BE16S_TO_BE32.
> > > 
> > > Found via gcc's undefined behavior sanitizer.
> > > 
> > > Reported-by: Lance Richardson 
> > > Signed-off-by: Ben Pfaff 
> > > ---
> > >  lib/byte-order.h | 21 +
> > >  lib/flow.c   |  2 +-
> > >  2 files changed, 14 insertions(+), 9 deletions(-)
> > > 
> > 
> > Looks good.
> > 
> > Acked-by: Lance Richardson 
> 
> Thanks.  I applied this to master.
> 
> Do you think that it is worthwhile to apply this to older branches?
> 

I would guess not... the only danger here would be if the compiler
incorrectly optimized something based on inferring that the high-order
bit of the value being shifted 24 bits has to be zero because there
would be "undefined behavior" if it were set. From looking through
the code, there doesn't seem to be any real exposure.

Thanks,

   Lance
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [patch_v2 3/3] conntrack: Add hash_finish() to conn_key_hash().

2017-06-13 Thread Ben Pfaff
Oops, that's my fault.  Can you confirm that this patch solves the
problem?
https://patchwork.ozlabs.org/patch/775320/

On Tue, Jun 13, 2017 at 01:15:24PM +, Fischetti, Antonio wrote:
> Hi Darrell,
> it seems in lib/hash.h there's already a hash_finish() function for the 
> Intrinsic mode where the 1st parm is a uint64_t:
> static inline uint32_t hash_finish(uint64_t hash, uint64_t final)
> 
> so I'm getting some errors when building with CFLAGS="-O2 -march=native -g" 
> 
> lib/hash.h:180:24: error: conflicting types for 'hash_finish'
>  static inline uint32_t hash_finish(uint64_t hash, uint64_t final)
> 
> lib/hash.h:95:24: note: previous declaration of 'hash_finish' was here
>  static inline uint32_t hash_finish(uint32_t hash, uint32_t final);
> 
> 
> Antonio
> 
> > -Original Message-
> > From: ovs-dev-boun...@openvswitch.org [mailto:ovs-dev-
> > boun...@openvswitch.org] On Behalf Of Darrell Ball
> > Sent: Friday, June 9, 2017 11:31 PM
> > To: d...@openvswitch.org
> > Subject: [ovs-dev] [patch_v2 3/3] conntrack: Add hash_finish() to
> > conn_key_hash().
> > 
> > The function conn_key_hash() is updated to include
> > a call to hash_finish() and also to make use of a
> > new hash abstraction - ct_endpoint_hash_add().
> > 
> > Fixes: a489b16854b5 ("conntrack: New userspace connection tracker.")
> > Signed-off-by: Darrell Ball 
> > ---
> >  lib/conntrack.c | 10 +++---
> >  1 file changed, 3 insertions(+), 7 deletions(-)
> > 
> > diff --git a/lib/conntrack.c b/lib/conntrack.c
> > index 9584a0a..146edd7 100644
> > --- a/lib/conntrack.c
> > +++ b/lib/conntrack.c
> > @@ -1529,14 +1529,10 @@ static uint32_t
> >  conn_key_hash(const struct conn_key *key, uint32_t basis)
> >  {
> >  uint32_t hsrc, hdst, hash;
> > -int i;
> > 
> >  hsrc = hdst = basis;
> > -
> > -for (i = 0; i < sizeof(key->src) / sizeof(uint32_t); i++) {
> > -hsrc = hash_add(hsrc, ((uint32_t *) >src)[i]);
> > -hdst = hash_add(hdst, ((uint32_t *) >dst)[i]);
> > -}
> > +hsrc = ct_endpoint_hash_add(hsrc, >src);
> > +hdst = ct_endpoint_hash_add(hdst, >dst);
> > 
> >  /* Even if source and destination are swapped the hash will be the
> > same. */
> >  hash = hsrc ^ hdst;
> > @@ -1546,7 +1542,7 @@ conn_key_hash(const struct conn_key *key, uint32_t
> > basis)
> >(uint32_t *) (key + 1) - (uint32_t *) (>dst +
> > 1),
> >hash);
> > 
> > -return hash;
> > +return hash_finish(hash, 0);
> >  }
> > 
> >  static void
> > --
> > 1.9.1
> > 
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] hash: Remove conflicting hash_finish() prototype.

2017-06-13 Thread Ben Pfaff
Normally, hash_finish() is declared as:
static inline uint32_t hash_finish(uint32_t hash, uint32_t final)

When __SSE4_2__ && __x86_64__, it is declared as:
static inline uint32_t hash_finish(uint64_t hash, uint64_t final)

A recent commit added an unneeded prototype in the first form, which caused
an error due to the redeclaration of a different type when the second form
was actually used.  This removes the prototype, fixing the problem.

It may not be a great idea to have two different forms for this function,
but it's long standing and so I don't want to change it immediately without
proper consideration.

Reported-by: "Fischetti, Antonio" 
Fixes: 67702b79d845 ("hash: New helper functions for adding words in a buffer 
to a hash.")
Signed-off-by: Ben Pfaff 
---
 lib/hash.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/hash.h b/lib/hash.h
index 7dffeaa9cacc..a642a1e97954 100644
--- a/lib/hash.h
+++ b/lib/hash.h
@@ -92,7 +92,6 @@ static inline uint32_t mhash_finish(uint32_t hash)
 
 static inline uint32_t hash_add(uint32_t hash, uint32_t data);
 static inline uint32_t hash_add64(uint32_t hash, uint64_t data);
-static inline uint32_t hash_finish(uint32_t hash, uint32_t final);
 
 static inline uint32_t hash_add_words(uint32_t, const uint32_t *, size_t);
 static inline uint32_t hash_add_words64(uint32_t, const uint64_t *, size_t);
-- 
2.10.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [RFC] treewide: undefined behavior, passing null in nonnull parameters

2017-06-13 Thread Ben Pfaff
On Tue, Jun 13, 2017 at 08:21:23AM -0400, Lance Richardson wrote:
> > From: "Ben Pfaff" 
> > To: "Lance Richardson" 
> > Cc: d...@openvswitch.org
> > Sent: Tuesday, 13 June, 2017 1:06:12 AM
> > Subject: Re: [ovs-dev] [RFC] treewide: undefined behavior, passing null in 
> > nonnull parameters
> > 
> > On Mon, Jun 12, 2017 at 08:06:01PM -0400, Lance Richardson wrote:
> > > Eliminate a number of instances of undefined behavior related to
> > > passing NULL in parameters having "nonnull" annotations.
> > > 
> > > Found with gcc's undefined behavior sanitizer.
> > > 
> > > Signed-off-by: Lance Richardson 
> > > ---
> > > 
> > > Posting this as RFC because there is no apparent risk of
> > > unwanted compiler optimizations related to undefined behavior
> > > in existing code. The main value in fixing these issues is
> > > in reducing noise to make it easier to find problematic
> > > cases in the future.
> > > 
> > > Here is a small example of the type of unwanted optimization
> > > to be concerned about:
> > > 
> > > test1a.c:
> > > 
> > > #include 
> > > 
> > > extern void foo(char*, size_t);
> > > 
> > > int main(int argc, char **argv)
> > > {
> > > char x[128];
> > > 
> > > foo(x, sizeof x);
> > > foo(NULL, 0);
> > > 
> > > return 0;
> > > }
> > > 
> > > test1b.c:
> > > 
> > > #include 
> > > #include 
> > > 
> > > void foo(char *bar, size_t len)
> > > {
> > > memset(bar, 0, len);
> > > 
> > > if (bar)
> > > printf("bar is non-null: %p\n", bar);
> > > }
> > > 
> > > Compile and run:
> > > gcc -o test -O2 test1a.c test1b.c
> > > ./test
> > > 
> > > Output (second line might be a bit of a surprise):
> > > bar is non-null: 0x7fff80f90d50
> > > bar is non-null: (nil)
> > 
> > Hmm.  That is surprising.
> > 
> > > diff --git a/lib/netlink.c b/lib/netlink.c
> > > index 3da22a1..fcad884 100644
> > > --- a/lib/netlink.c
> > > +++ b/lib/netlink.c
> > > @@ -241,7 +241,12 @@ void
> > >  nl_msg_put_unspec(struct ofpbuf *msg, uint16_t type,
> > >const void *data, size_t size)
> > >  {
> > > -memcpy(nl_msg_put_unspec_uninit(msg, type, size), data, size);
> > > +void *ptr;
> > > +
> > > +ptr = nl_msg_put_unspec_uninit(msg, type, size);
> > > +if (size) {
> > > +memcpy(ptr, data, size);
> > > +}
> > >  }
> > 
> > I guess the above is above null 'data', since 'ptr' should always be
> > nonnull.  In that case, it seems reasonable.
> > 
> > >  /* Appends a Netlink attribute of the given 'type' and no payload to
> > >  'msg'.
> > > diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
> > > index 3019c4a..2e71fed 100644
> > > --- a/lib/ofpbuf.c
> > > +++ b/lib/ofpbuf.c
> > > @@ -375,7 +375,9 @@ void *
> > >  ofpbuf_put_zeros(struct ofpbuf *b, size_t size)
> > >  {
> > >  void *dst = ofpbuf_put_uninit(b, size);
> > > -memset(dst, 0, size);
> > > +if (size) {
> > > +memset(dst, 0, size);
> > > +}
> > >  return dst;
> > >  }
> > 
> > In the above, when is dst null?  It looks to me like ofpbuf_put_uninit()
> > always returns nonnull.
> > 
> 
> Looks like it could return NULL if called with b->data = NULL, b->size = 0, 
> and
> size = 0. Seems odd to want to append no zero bytes to an empty buffer, but it
> apparently happens while running the unit tests.

OK.  Somewhat weird.

> > > diff --git a/lib/svec.c b/lib/svec.c
> > > index aad04e3..297a60c 100644
> > > --- a/lib/svec.c
> > > +++ b/lib/svec.c
> > > @@ -127,7 +127,9 @@ compare_strings(const void *a_, const void *b_)
> > >  void
> > >  svec_sort(struct svec *svec)
> > >  {
> > > -qsort(svec->names, svec->n, sizeof *svec->names, compare_strings);
> > > +if (svec->n) {
> > > +qsort(svec->names, svec->n, sizeof *svec->names, 
> > > compare_strings);
> > > +}
> > >  }
> > 
> > This one in svec_sort() looks good to me.
> > 
> > >  void
> > > diff --git a/lib/util.c b/lib/util.c
> > > index b2a1f8a..ddf8546 100644
> > > --- a/lib/util.c
> > > +++ b/lib/util.c
> > > @@ -132,7 +132,9 @@ void *
> > >  xmemdup(const void *p_, size_t size)
> > >  {
> > >  void *p = xmalloc(size);
> > > -memcpy(p, p_, size);
> > > +if (size) {
> > > +memcpy(p, p_, size);
> > > +}
> > >  return p;
> > >  }
> > 
> > I guess that the above must be about a null 'p_' parameter?  xmalloc()
> > never returns null.
> > 
> > Maybe we should invent a nullable_memcpy() helper:
> > 
> > /* The C standards say that neither the 'dst' nor 'src' argument to
> >  * memcpy() may be null, even if 'n' is zero.  This wrapper tolerates
> >  * the null case. */
> > static inline void
> > nullable_memcpy(void *dst, const void *src, size_t n)
> > {
> > if (n) {
> > memcpy(dst, src, n);
> > }
> > }
> > 
> 
> Makes sense, ditto for a nullable_memset().

OK, maybe that's the approach we should take.

[ovs-dev] [PATCH V11 28/33] dpctl: Indicate if flow is offloaded when dumping flows of all types

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

When verbosity is requested on dump-flows (-m) indicate which flows
are offloaded.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Acked-by: Flavio Leitner 
---
 lib/dpctl.c| 11 ---
 lib/dpif-netlink.c |  4 
 lib/dpif.h |  1 +
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/lib/dpctl.c b/lib/dpctl.c
index a2ee8a2..7f44d02 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -739,7 +739,7 @@ dpctl_dump_dps(int argc OVS_UNUSED, const char *argv[] 
OVS_UNUSED,
 
 static void
 format_dpif_flow(struct ds *ds, const struct dpif_flow *f, struct hmap *ports,
- struct dpctl_params *dpctl_p)
+ char *type, struct dpctl_params *dpctl_p)
 {
 if (dpctl_p->verbosity && f->ufid_present) {
 odp_format_ufid(>ufid, ds);
@@ -750,6 +750,9 @@ format_dpif_flow(struct ds *ds, const struct dpif_flow *f, 
struct hmap *ports,
 ds_put_cstr(ds, ", ");
 
 dpif_flow_stats_format(>stats, ds);
+if (dpctl_p->verbosity && !type && f->offloaded) {
+ds_put_cstr(ds, ", offloaded:yes");
+}
 ds_put_cstr(ds, ", actions:");
 format_odp_actions(ds, f->actions, f->actions_len);
 }
@@ -850,6 +853,7 @@ dpctl_dump_flows(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 BUILD_ASSERT(PMD_ID_NULL != NON_PMD_CORE_ID);
 
 ds_init();
+memset(, 0, sizeof f);
 flow_dump = dpif_flow_dump_create(dpif, false, (type ? type : "dpctl"));
 flow_dump_thread = dpif_flow_dump_thread_create(flow_dump);
 while (dpif_flow_dump_next(flow_dump_thread, , 1)) {
@@ -886,7 +890,8 @@ dpctl_dump_flows(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 }
 pmd_id = f.pmd_id;
 }
-format_dpif_flow(, , _names, dpctl_p);
+format_dpif_flow(, , _names, type, dpctl_p);
+
 dpctl_print(dpctl_p, "%s\n", ds_cstr());
 }
 dpif_flow_dump_thread_destroy(flow_dump_thread);
@@ -1069,7 +1074,7 @@ dpctl_get_flow(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 }
 
 ds_init();
-format_dpif_flow(, , _names, dpctl_p);
+format_dpif_flow(, , _names, NULL, dpctl_p);
 dpctl_print(dpctl_p, "%s\n", ds_cstr());
 ds_destroy();
 
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index e030bb0..5e648e7 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -1639,6 +1639,7 @@ dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, struct 
dpif_flow *dpif_flow,
_flow->ufid);
 }
 dpif_netlink_flow_get_stats(datapath_flow, _flow->stats);
+dpif_flow->offloaded = false;
 }
 
 /* The design is such that all threads are working together on the first dump
@@ -1721,6 +1722,9 @@ dpif_netlink_netdev_match_to_dpif_flow(struct match 
*match,
 flow->ufid = *ufid;
 
 flow->pmd_id = PMD_ID_NULL;
+
+flow->offloaded = true;
+
 return 0;
 }
 
diff --git a/lib/dpif.h b/lib/dpif.h
index b1f516e..38efd29 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -591,6 +591,7 @@ struct dpif_flow {
 bool ufid_present;/* True if 'ufid' was provided by datapath.*/
 unsigned pmd_id;  /* Datapath poll mode driver id. */
 struct dpif_flow_stats stats; /* Flow statistics. */
+bool offloaded;   /* True if flow is offloaded */
 };
 int dpif_flow_dump_next(struct dpif_flow_dump_thread *,
 struct dpif_flow *flows, int max_flows);
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] byte-order: Fix undefined behavior of BYTES_TO_BE32.

2017-06-13 Thread Ben Pfaff
On Tue, Jun 13, 2017 at 09:09:44AM -0400, Lance Richardson wrote:
> > From: "Ben Pfaff" 
> > To: d...@openvswitch.org
> > Cc: "Ben Pfaff" , "Lance Richardson" 
> > Sent: Tuesday, 13 June, 2017 12:51:14 AM
> > Subject: [PATCH] byte-order: Fix undefined behavior of BYTES_TO_BE32.
> > 
> > A left shift that would produce a result that is not representable
> > by the type of the expression's result has "undefined behavior"
> > according to the C language standard. Avoid this by casting values
> > that could set the upper bit to unsigned types.
> > 
> > Also document and convert a macro to a function.
> > 
> > While we're at it, delete the unused macro BE16S_TO_BE32.
> > 
> > Found via gcc's undefined behavior sanitizer.
> > 
> > Reported-by: Lance Richardson 
> > Signed-off-by: Ben Pfaff 
> > ---
> >  lib/byte-order.h | 21 +
> >  lib/flow.c   |  2 +-
> >  2 files changed, 14 insertions(+), 9 deletions(-)
> > 
> 
> Looks good.
> 
> Acked-by: Lance Richardson 

Thanks.  I applied this to master.

Do you think that it is worthwhile to apply this to older branches?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 29/33] tests: Add system-offloads-testsuite

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

The new system-offloads-testsuite, which can be launched via
`make check-offloads`, tests offloading capabilities
to makes sure that certian flows are actually offloaded.

The tests run on virtual netdevices (VETH).

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 tests/.gitignore   |  1 +
 tests/automake.mk  | 16 +
 tests/ofproto-macros.at|  6 ++--
 tests/system-offloads-testsuite.at | 25 ++
 tests/system-offloads-traffic.at   | 67 ++
 5 files changed, 113 insertions(+), 2 deletions(-)
 create mode 100644 tests/system-offloads-testsuite.at
 create mode 100644 tests/system-offloads-traffic.at

diff --git a/tests/.gitignore b/tests/.gitignore
index f4540a3..77e5a95 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -12,6 +12,7 @@
 /pki/
 /system-kmod-testsuite
 /system-userspace-testsuite
+/system-offloads-testsuite
 /test-aes128
 /test-atomic
 /test-bundle
diff --git a/tests/automake.mk b/tests/automake.mk
index c6bd120..e88c622 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -4,9 +4,11 @@ EXTRA_DIST += \
$(SYSTEM_TESTSUITE_AT) \
$(SYSTEM_KMOD_TESTSUITE_AT) \
$(SYSTEM_USERSPACE_TESTSUITE_AT) \
+   $(SYSTEM_OFFLOADS_TESTSUITE_AT) \
$(TESTSUITE) \
$(SYSTEM_KMOD_TESTSUITE) \
$(SYSTEM_USERSPACE_TESTSUITE) \
+   $(SYSTEM_OFFLOADS_TESTSUITE) \
tests/atlocal.in \
$(srcdir)/package.m4 \
$(srcdir)/tests/testsuite \
@@ -112,12 +114,18 @@ SYSTEM_TESTSUITE_AT = \
tests/system-ovn.at \
tests/system-traffic.at
 
+SYSTEM_OFFLOADS_TESTSUITE_AT = \
+   tests/system-common-macros.at \
+   tests/system-offloads-traffic.at \
+   tests/system-offloads-testsuite.at
+
 check_SCRIPTS += tests/atlocal
 
 TESTSUITE = $(srcdir)/tests/testsuite
 TESTSUITE_PATCH = $(srcdir)/tests/testsuite.patch
 SYSTEM_KMOD_TESTSUITE = $(srcdir)/tests/system-kmod-testsuite
 SYSTEM_USERSPACE_TESTSUITE = $(srcdir)/tests/system-userspace-testsuite
+SYSTEM_OFFLOADS_TESTSUITE = $(srcdir)/tests/system-offloads-testsuite
 DISTCLEANFILES += tests/atconfig tests/atlocal
 
 AUTOTEST_PATH = 
utilities:vswitchd:ovsdb:vtep:tests:$(PTHREAD_WIN32_DIR_DLL):ovn/controller-vtep:ovn/northd:ovn/utilities:ovn/controller
@@ -229,6 +237,10 @@ check-system-userspace: all
set $(SHELL) '$(SYSTEM_USERSPACE_TESTSUITE)' -C tests  
AUTOTEST_PATH='$(AUTOTEST_PATH)' $(TESTSUITEFLAGS) -j1; \
"$$@" || (test X'$(RECHECK)' = Xyes && "$$@" --recheck)
 
+check-offloads: all
+   set $(SHELL) '$(SYSTEM_OFFLOADS_TESTSUITE)' -C tests  
AUTOTEST_PATH='$(AUTOTEST_PATH)' $(TESTSUITEFLAGS) -j1; \
+   "$$@" || (test X'$(RECHECK)' = Xyes && "$$@" --recheck)
+
 clean-local:
test ! -f '$(TESTSUITE)' || $(SHELL) '$(TESTSUITE)' -C tests --clean
 
@@ -253,6 +265,10 @@ $(SYSTEM_USERSPACE_TESTSUITE): package.m4 
$(SYSTEM_TESTSUITE_AT) $(SYSTEM_USERSP
$(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at
$(AM_V_at)mv $@.tmp $@
 
+$(SYSTEM_OFFLOADS_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) 
$(SYSTEM_OFFLOADS_TESTSUITE_AT) $(COMMON_MACROS_AT)
+   $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at
+   $(AM_V_at)mv $@.tmp $@
+
 # The `:;' works around a Bash 3.2 bug when the output is not writeable.
 $(srcdir)/package.m4: $(top_srcdir)/configure.ac
$(AM_V_GEN):;{ \
diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at
index faff5b0..0adf555 100644
--- a/tests/ofproto-macros.at
+++ b/tests/ofproto-macros.at
@@ -317,7 +317,7 @@ m4_define([_OVS_VSWITCHD_START],
AT_CAPTURE_FILE([ovsdb-server.log])
 
dnl Initialize database.
-   AT_CHECK([ovs-vsctl --no-wait init])
+   AT_CHECK([ovs-vsctl --no-wait init $2])
 
dnl Start ovs-vswitchd.
AT_CHECK([ovs-vswitchd $1 --detach --no-chdir --pidfile --log-file -vvconn 
-vofproto_dpif -vunixctl], [0], [], [stderr])
@@ -331,7 +331,9 @@ m4_define([_OVS_VSWITCHD_START],
 /ofproto|INFO|using datapath ID/d
 /netdev_linux|INFO|.*device has unknown hardware address family/d
 /ofproto|INFO|datapath ID changed to fedcba9876543210/d
-/dpdk|INFO|DPDK Disabled - Use other_config:dpdk-init to enable/d']])
+/dpdk|INFO|DPDK Disabled - Use other_config:dpdk-init to enable/d
+/netdev: Flow API/d
+/tc: Using policy/d']])
 ])
 
 # OVS_VSWITCHD_START([vsctl-args], [vsctl-output], [=override],
diff --git a/tests/system-offloads-testsuite.at 
b/tests/system-offloads-testsuite.at
new file mode 100644
index 000..eb5d2d4
--- /dev/null
+++ b/tests/system-offloads-testsuite.at
@@ -0,0 +1,25 @@
+AT_INIT
+
+AT_COPYRIGHT([Copyright (c) 2016 Mellanox Technologies, Ltd.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You 

[ovs-dev] [PATCH V11 27/33] dpctl: Add an option to dump only certain kinds of flows

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Usage:
# to dump all datapath flows (default):
ovs-dpctl dump-flows

# to dump only flows that in kernel datapath:
ovs-dpctl dump-flows type=ovs

# to dump only flows that are offloaded:
ovs-dpctl dump-flows type=offloaded

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/dpctl.c   | 44 --
 lib/dpctl.man |  7 -
 lib/dpif-netdev.c |  3 ++-
 lib/dpif-netlink.c| 63 ++-
 lib/dpif-provider.h   |  6 +++--
 lib/dpif.c|  4 +--
 lib/dpif.h|  3 ++-
 ofproto/ofproto-dpif-upcall.c |  3 ++-
 ofproto/ofproto-dpif.c|  2 +-
 9 files changed, 106 insertions(+), 29 deletions(-)

diff --git a/lib/dpctl.c b/lib/dpctl.c
index 2dfaeeb..a2ee8a2 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -754,6 +754,11 @@ format_dpif_flow(struct ds *ds, const struct dpif_flow *f, 
struct hmap *ports,
 format_odp_actions(ds, f->actions, f->actions_len);
 }
 
+static char *supported_dump_types[] = {
+"offloaded",
+"ovs",
+};
+
 static int
 dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p)
 {
@@ -762,6 +767,7 @@ dpctl_dump_flows(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 char *name;
 
 char *filter = NULL;
+char *type = NULL;
 struct flow flow_filter;
 struct flow_wildcards wc_filter;
 
@@ -773,22 +779,29 @@ dpctl_dump_flows(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 struct dpif_flow_dump *flow_dump;
 struct dpif_flow f;
 int pmd_id = PMD_ID_NULL;
+int lastargc = 0;
 int error;
 
-if (argc > 1 && !strncmp(argv[argc - 1], "filter=", 7)) {
-filter = xstrdup(argv[--argc] + 7);
+while (argc > 1 && lastargc != argc) {
+lastargc = argc;
+if (!strncmp(argv[argc - 1], "filter=", 7) && !filter) {
+filter = xstrdup(argv[--argc] + 7);
+} else if (!strncmp(argv[argc - 1], "type=", 5) && !type) {
+type = xstrdup(argv[--argc] + 5);
+}
 }
-name = (argc == 2) ? xstrdup(argv[1]) : get_one_dp(dpctl_p);
+
+name = (argc > 1) ? xstrdup(argv[1]) : get_one_dp(dpctl_p);
 if (!name) {
 error = EINVAL;
-goto out_freefilter;
+goto out_free;
 }
 
 error = parsed_dpif_open(name, false, );
 free(name);
 if (error) {
 dpctl_error(dpctl_p, error, "opening datapath");
-goto out_freefilter;
+goto out_free;
 }
 
 
@@ -816,6 +829,20 @@ dpctl_dump_flows(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 }
 }
 
+if (type) {
+error = EINVAL;
+for (int i = 0; i < ARRAY_SIZE(supported_dump_types); i++) {
+if (!strcmp(supported_dump_types[i], type)) {
+error = 0;
+break;
+}
+}
+if (error) {
+dpctl_error(dpctl_p, error, "Failed to parse type (%s)", type);
+goto out_free;
+}
+}
+
 /* Make sure that these values are different. PMD_ID_NULL means that the
  * pmd is unspecified (e.g. because the datapath doesn't have different
  * pmd threads), while NON_PMD_CORE_ID refers to every non pmd threads
@@ -823,7 +850,7 @@ dpctl_dump_flows(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 BUILD_ASSERT(PMD_ID_NULL != NON_PMD_CORE_ID);
 
 ds_init();
-flow_dump = dpif_flow_dump_create(dpif, false);
+flow_dump = dpif_flow_dump_create(dpif, false, (type ? type : "dpctl"));
 flow_dump_thread = dpif_flow_dump_thread_create(flow_dump);
 while (dpif_flow_dump_next(flow_dump_thread, , 1)) {
 if (filter) {
@@ -874,8 +901,9 @@ out_dpifclose:
 odp_portno_names_destroy(_names);
 hmap_destroy(_names);
 dpif_close(dpif);
-out_freefilter:
+out_free:
 free(filter);
+free(type);
 return error;
 }
 
@@ -1558,7 +1586,7 @@ static const struct dpctl_command all_commands[] = {
 { "set-if", "dp iface...", 2, INT_MAX, dpctl_set_if, DP_RW },
 { "dump-dps", "", 0, 0, dpctl_dump_dps, DP_RO },
 { "show", "[dp...]", 0, INT_MAX, dpctl_show, DP_RO },
-{ "dump-flows", "[dp] [filter=..]", 0, 2, dpctl_dump_flows, DP_RO },
+{ "dump-flows", "[dp] [filter=..] [type=..]", 0, 3, dpctl_dump_flows, 
DP_RO },
 { "add-flow", "[dp] flow actions", 2, 3, dpctl_add_flow, DP_RW },
 { "mod-flow", "[dp] flow actions", 2, 3, dpctl_mod_flow, DP_RW },
 { "get-flow", "[dp] ufid", 1, 2, dpctl_get_flow, DP_RO },
diff --git a/lib/dpctl.man b/lib/dpctl.man
index f7ae311..f6e4a7a 100644
--- a/lib/dpctl.man
+++ b/lib/dpctl.man
@@ -99,7 +99,7 @@ default.  When multiple datapaths exist, then a datapath name 
is
 required.
 .
 .TP
-.DO "[\fB\-m 

[ovs-dev] [PATCH V11 31/33] dpif: Refactor flow logging functions to be used by other modules

2017-06-13 Thread Roi Dayan
To be reused by other modules.

Signed-off-by: Roi Dayan 
Reviewed-by: Paul Blakey 
Acked-by: Flavio Leitner 
---
 lib/dpif.c | 87 +++---
 lib/dpif.h | 28 
 2 files changed, 72 insertions(+), 43 deletions(-)

diff --git a/lib/dpif.c b/lib/dpif.c
index 7dc0d64..10bdd70 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -92,24 +92,10 @@ static struct vlog_rate_limit dpmsg_rl = 
VLOG_RATE_LIMIT_INIT(600, 600);
 /* Not really much point in logging many dpif errors. */
 static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
 
-static void log_flow_message(const struct dpif *dpif, int error,
- const char *operation,
- const struct nlattr *key, size_t key_len,
- const struct nlattr *mask, size_t mask_len,
- const ovs_u128 *ufid,
- const struct dpif_flow_stats *stats,
- const struct nlattr *actions, size_t actions_len);
 static void log_operation(const struct dpif *, const char *operation,
   int error);
-static bool should_log_flow_message(int error);
-static void log_flow_put_message(struct dpif *, const struct dpif_flow_put *,
- int error);
-static void log_flow_del_message(struct dpif *, const struct dpif_flow_del *,
- int error);
-static void log_execute_message(struct dpif *, const struct dpif_execute *,
-bool subexecute, int error);
-static void log_flow_get_message(const struct dpif *,
- const struct dpif_flow_get *, int error);
+static bool should_log_flow_message(const struct vlog_module *module,
+int error);
 
 /* Incremented whenever tnl route, arp, etc changes. */
 struct seq *tnl_conf_seq;
@@ -1125,8 +,9 @@ dpif_flow_dump_next(struct dpif_flow_dump_thread *thread,
 if (n > 0) {
 struct dpif_flow *f;
 
-for (f = flows; f < [n] && should_log_flow_message(0); f++) {
-log_flow_message(dpif, 0, "flow_dump",
+for (f = flows; f < [n]
+ && should_log_flow_message(_module, 0); f++) {
+log_flow_message(dpif, 0, _module, "flow_dump",
  f->key, f->key_len, f->mask, f->mask_len,
  >ufid, >stats, f->actions, f->actions_len);
 }
@@ -1231,7 +1218,8 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch 
*packets_,
 execute.probe = false;
 execute.mtu = 0;
 aux->error = dpif_execute(aux->dpif, );
-log_execute_message(aux->dpif, , true, aux->error);
+log_execute_message(aux->dpif, _module, ,
+true, aux->error);
 
 dp_packet_delete(clone);
 
@@ -1346,7 +1334,7 @@ dpif_operate(struct dpif *dpif, struct dpif_op **ops, 
size_t n_ops)
 struct dpif_flow_put *put = >u.flow_put;
 
 COVERAGE_INC(dpif_flow_put);
-log_flow_put_message(dpif, put, error);
+log_flow_put_message(dpif, _module, put, error);
 if (error && put->stats) {
 memset(put->stats, 0, sizeof *put->stats);
 }
@@ -1360,7 +1348,7 @@ dpif_operate(struct dpif *dpif, struct dpif_op **ops, 
size_t n_ops)
 if (error) {
 memset(get->flow, 0, sizeof *get->flow);
 }
-log_flow_get_message(dpif, get, error);
+log_flow_get_message(dpif, _module, get, error);
 
 break;
 }
@@ -1369,7 +1357,7 @@ dpif_operate(struct dpif *dpif, struct dpif_op **ops, 
size_t n_ops)
 struct dpif_flow_del *del = >u.flow_del;
 
 COVERAGE_INC(dpif_flow_del);
-log_flow_del_message(dpif, del, error);
+log_flow_del_message(dpif, _module, del, error);
 if (error && del->stats) {
 memset(del->stats, 0, sizeof *del->stats);
 }
@@ -1378,7 +1366,8 @@ dpif_operate(struct dpif *dpif, struct dpif_op **ops, 
size_t n_ops)
 
 case DPIF_OP_EXECUTE:
 COVERAGE_INC(dpif_execute);
-log_execute_message(dpif, >u.execute, false, error);
+log_execute_message(dpif, _module, >u.execute,
+false, error);
 break;
 }
 }
@@ -1690,14 +1679,16 @@ flow_message_log_level(int error)
 }
 
 static bool
-should_log_flow_message(int error)
+should_log_flow_message(const struct vlog_module *module, int error)
 {
-return !vlog_should_drop(_module, 

[ovs-dev] [PATCH V11 33/33] NEWS: add a note about hw offloading

2017-06-13 Thread Roi Dayan
Signed-off-by: Roi Dayan 
Acked-by: Flavio Leitner 
---
 NEWS | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/NEWS b/NEWS
index 82004c8..fd2c98e 100644
--- a/NEWS
+++ b/NEWS
@@ -58,6 +58,9 @@ Post-v2.7.0
  * Transparently pop and push Ethernet headers at transmit/reception
of packets to/from L3 tunnels.
- The BFD detection multiplier is now user-configurable.
+   - New support for HW offloading
+ * HW offloading is disabled by default.
+ * HW offloading is done through the TC interface.
 
 v2.7.0 - 21 Feb 2017
 -
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 32/33] dpif-netlink: Use dpif logging functions

2017-06-13 Thread Roi Dayan
Remove redundant logging functions and reuse
the exposed dpif logging functions.

Signed-off-by: Roi Dayan 
Reviewed-by: Paul Blakey 
Acked-by: Flavio Leitner 
---
 lib/dpif-netlink.c | 39 ++-
 1 file changed, 6 insertions(+), 33 deletions(-)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 5e648e7..4852a09 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -2186,32 +2186,6 @@ out:
 return err;
 }
 
-static void
-dbg_print_flow(const struct nlattr *key, size_t key_len,
-   const struct nlattr *mask, size_t mask_len,
-   const struct nlattr *actions, size_t actions_len,
-   const ovs_u128 *ufid,
-   const char *op)
-{
-struct ds s;
-
-ds_init();
-ds_put_cstr(, op);
-ds_put_cstr(, " (");
-odp_format_ufid(ufid, );
-ds_put_cstr(, ")");
-if (key_len) {
-ds_put_cstr(, "\nflow (verbose): ");
-odp_flow_format(key, key_len, mask, mask_len, NULL, , true);
-ds_put_cstr(, "\nflow: ");
-odp_flow_format(key, key_len, mask, mask_len, NULL, , false);
-}
-ds_put_cstr(, "\nactions: ");
-format_odp_actions(, actions, actions_len);
-VLOG_DBG("\n%s", ds_cstr());
-ds_destroy();
-}
-
 static int
 try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op)
 {
@@ -2224,9 +2198,8 @@ try_send_to_netdev(struct dpif_netlink *dpif, struct 
dpif_op *op)
 if (!put->ufid) {
 break;
 }
-dbg_print_flow(put->key, put->key_len, put->mask, put->mask_len,
-   put->actions, put->actions_len, put->ufid,
-   (put->flags & DPIF_FP_MODIFY ? "PUT(MODIFY)" : "PUT"));
+
+log_flow_put_message(>dpif, _module, put, 0);
 err = parse_flow_put(dpif, put);
 break;
 }
@@ -2236,8 +2209,8 @@ try_send_to_netdev(struct dpif_netlink *dpif, struct 
dpif_op *op)
 if (!del->ufid) {
 break;
 }
-dbg_print_flow(del->key, del->key_len, NULL, 0, NULL, 0,
-   del->ufid, "DEL");
+
+log_flow_del_message(>dpif, _module, del, 0);
 err = netdev_ports_flow_del(DPIF_HMAP_KEY(>dpif), del->ufid,
 del->stats);
 break;
@@ -2248,8 +2221,8 @@ try_send_to_netdev(struct dpif_netlink *dpif, struct 
dpif_op *op)
 if (!op->u.flow_get.ufid) {
 break;
 }
-dbg_print_flow(get->key, get->key_len, NULL, 0, NULL, 0,
-   get->ufid, "GET");
+
+log_flow_get_message(>dpif, _module, get, 0);
 err = parse_flow_get(dpif, get);
 break;
 }
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 30/33] netdev: Init flow api on already added ports on offload enable

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Ports already added to a switch are not being initialized for offloading
so when enabling offload we need to go over those ports.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/netdev.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/lib/netdev.c b/lib/netdev.c
index 0aae83a..001b7b3 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -2346,6 +2346,18 @@ netdev_ports_flow_get(const void *obj, struct match 
*match,
 }
 
 #ifdef __linux__
+static void
+netdev_ports_flow_init(void)
+{
+struct port_to_netdev_data *data;
+
+ovs_mutex_lock(_hmap_mutex);
+HMAP_FOR_EACH(data, node, _to_netdev) {
+   netdev_init_flow_api(data->netdev);
+}
+ovs_mutex_unlock(_hmap_mutex);
+}
+
 void
 netdev_set_flow_api_enabled(const struct smap *ovs_other_config)
 {
@@ -2360,6 +2372,8 @@ netdev_set_flow_api_enabled(const struct smap 
*ovs_other_config)
 tc_set_policy(smap_get_def(ovs_other_config, "tc-policy",
TC_POLICY_DEFAULT));
 
+netdev_ports_flow_init();
+
 ovsthread_once_done();
 }
 }
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 26/33] dpctl: Add filter arg to dump-flows command info

2017-06-13 Thread Roi Dayan
This is for it to appear in bash completion.

Signed-off-by: Roi Dayan 
Acked-by: Flavio Leitner 
---
 lib/dpctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/dpctl.c b/lib/dpctl.c
index ae23913..2dfaeeb 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -1558,7 +1558,7 @@ static const struct dpctl_command all_commands[] = {
 { "set-if", "dp iface...", 2, INT_MAX, dpctl_set_if, DP_RW },
 { "dump-dps", "", 0, 0, dpctl_dump_dps, DP_RO },
 { "show", "[dp...]", 0, INT_MAX, dpctl_show, DP_RO },
-{ "dump-flows", "[dp]", 0, 2, dpctl_dump_flows, DP_RO },
+{ "dump-flows", "[dp] [filter=..]", 0, 2, dpctl_dump_flows, DP_RO },
 { "add-flow", "[dp] flow actions", 2, 3, dpctl_add_flow, DP_RW },
 { "mod-flow", "[dp] flow actions", 2, 3, dpctl_mod_flow, DP_RW },
 { "get-flow", "[dp] ufid", 1, 2, dpctl_get_flow, DP_RO },
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 25/33] netdev-tc-offloads: Add ingress on netdev flow api init

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/netdev-tc-offloads.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index 19c4082..72f4546 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -33,6 +33,7 @@
 #include "hash.h"
 #include "dpif.h"
 #include "tc.h"
+#include "netdev-linux.h"
 
 VLOG_DEFINE_THIS_MODULE(netdev_tc_offloads);
 
@@ -921,7 +922,27 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
 }
 
 int
-netdev_tc_init_flow_api(struct netdev *netdev OVS_UNUSED)
+netdev_tc_init_flow_api(struct netdev *netdev)
 {
+int ifindex;
+int error;
+
+ifindex = netdev_get_ifindex(netdev);
+if (ifindex < 0) {
+VLOG_ERR_RL(_rl, "failed to get ifindex for %s: %s",
+netdev_get_name(netdev), ovs_strerror(-ifindex));
+return -ifindex;
+}
+
+error = tc_add_del_ingress_qdisc(ifindex, true);
+
+if (error && error != EEXIST) {
+VLOG_ERR("failed adding ingress qdisc required for offloading: %s",
+ ovs_strerror(error));
+return error;
+}
+
+VLOG_INFO("added ingress qdisc to %s", netdev_get_name(netdev));
+
 return 0;
 }
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 23/33] netdev-linux: Disallow setting policing when configured with hw offload

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Notify as not supported. Otherwise the ingress qdisc is being removed and
offload rules will be removed.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/netdev-linux.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 44dfac5..ce0a153 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -2087,6 +2087,14 @@ netdev_linux_set_policing(struct netdev *netdev_,
 int ifindex;
 int error;
 
+if (netdev_is_flow_api_enabled()) {
+if (kbits_rate) {
+VLOG_WARN_RL(, "%s: policing with offload isn't supported",
+ netdev_name);
+}
+return EOPNOTSUPP;
+}
+
 kbits_burst = (!kbits_rate ? 0   /* Force to 0 if no rate specified. */
: !kbits_burst ? 8000 /* Default to 8000 kbits if 0. */
: kbits_burst);   /* Stick with user-specified value. */
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 21/33] dpif-netlink: Use netdev flow get api to query a flow

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Search all datapath added netdevs for a given flow
using netdev flow api and parse it back to dpif flow.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/dpif-netlink.c | 51 ++-
 lib/netdev.c   | 21 +
 lib/netdev.h   |  5 +
 3 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 6afa83d..0e6ed43 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -1989,6 +1989,45 @@ dpif_netlink_operate__(struct dpif_netlink *dpif,
 }
 
 static int
+parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get)
+{
+struct dpif_flow *dpif_flow = get->flow;
+struct match match;
+struct nlattr *actions;
+struct dpif_flow_stats stats;
+struct ofpbuf buf;
+uint64_t act_buf[1024 / 8];
+struct odputil_keybuf maskbuf;
+struct odputil_keybuf keybuf;
+struct odputil_keybuf actbuf;
+struct ofpbuf key, mask, act;
+int err;
+
+ofpbuf_use_stack(, _buf, sizeof act_buf);
+err = netdev_ports_flow_get(DPIF_HMAP_KEY(>dpif), ,
+, get->ufid, , );
+if (err) {
+return err;
+}
+
+VLOG_DBG("found flow from netdev, translating to dpif flow");
+
+ofpbuf_use_stack(, , sizeof keybuf);
+ofpbuf_use_stack(, , sizeof actbuf);
+ofpbuf_use_stack(, , sizeof maskbuf);
+dpif_netlink_netdev_match_to_dpif_flow(, , , actions,
+   ,
+   (ovs_u128 *) get->ufid,
+   dpif_flow,
+   false);
+ofpbuf_put(get->buffer, nl_attr_get(actions), nl_attr_get_size(actions));
+dpif_flow->actions = ofpbuf_at(get->buffer, 0, 0);
+dpif_flow->actions_len = nl_attr_get_size(actions);
+
+return 0;
+}
+
+static int
 parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
 {
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
@@ -2160,7 +2199,17 @@ try_send_to_netdev(struct dpif_netlink *dpif, struct 
dpif_op *op)
 del->stats);
 break;
 }
-case DPIF_OP_FLOW_GET:
+case DPIF_OP_FLOW_GET: {
+struct dpif_flow_get *get = >u.flow_get;
+
+if (!op->u.flow_get.ufid) {
+break;
+}
+dbg_print_flow(get->key, get->key_len, NULL, 0, NULL, 0,
+   get->ufid, "GET");
+err = parse_flow_get(dpif, get);
+break;
+}
 case DPIF_OP_EXECUTE:
 default:
 break;
diff --git a/lib/netdev.c b/lib/netdev.c
index 4311c21..0aae83a 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -2324,6 +2324,27 @@ netdev_ports_flow_del(const void *obj, const ovs_u128 
*ufid,
 return ENOENT;
 }
 
+int
+netdev_ports_flow_get(const void *obj, struct match *match,
+  struct nlattr **actions,
+  const ovs_u128 *ufid,
+  struct dpif_flow_stats *stats,
+  struct ofpbuf *buf)
+{
+struct port_to_netdev_data *data;
+
+ovs_mutex_lock(_hmap_mutex);
+HMAP_FOR_EACH(data, node, _to_netdev) {
+if (data->obj == obj && !netdev_flow_get(data->netdev, match, actions,
+ ufid, stats, buf)) {
+ovs_mutex_unlock(_hmap_mutex);
+return 0;
+}
+}
+ovs_mutex_unlock(_hmap_mutex);
+return ENOENT;
+}
+
 #ifdef __linux__
 void
 netdev_set_flow_api_enabled(const struct smap *ovs_other_config)
diff --git a/lib/netdev.h b/lib/netdev.h
index 2ddc595..31846fa 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -192,6 +192,11 @@ struct netdev_flow_dump 
**netdev_ports_flow_dump_create(const void *obj,
 void netdev_ports_flow_flush(const void *obj);
 int netdev_ports_flow_del(const void *obj, const ovs_u128 *ufid,
   struct dpif_flow_stats *stats);
+int netdev_ports_flow_get(const void *obj, struct match *match,
+  struct nlattr **actions,
+  const ovs_u128 *ufid,
+  struct dpif_flow_stats *stats,
+  struct ofpbuf *buf);
 
 /* native tunnel APIs */
 /* Structure to pass parameters required to build a tunnel header. */
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 18/33] netdev-tc-offloads: Implement netdev flow put using tc interface

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Currently only tunnel offload is supported.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/netdev-tc-offloads.c | 403 +--
 1 file changed, 393 insertions(+), 10 deletions(-)

diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index a9e35e0..8dbccb1 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -89,7 +89,7 @@ del_ufid_tc_mapping(const ovs_u128 *ufid)
 
 /* Add ufid entry to ufid_tc hashmap.
  * If entry exists already it will be replaced. */
-static void OVS_UNUSED
+static void
 add_ufid_tc_mapping(const ovs_u128 *ufid, int prio, int handle,
 struct netdev *netdev, int ifindex)
 {
@@ -120,7 +120,7 @@ add_ufid_tc_mapping(const ovs_u128 *ufid, int prio, int 
handle,
  * Returns handle if successful and fill prio and netdev for that ufid.
  * Otherwise returns 0.
  */
-static int OVS_UNUSED
+static int
 get_ufid_tc_mapping(const ovs_u128 *ufid, int *prio, struct netdev **netdev)
 {
 size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
@@ -183,7 +183,7 @@ struct prio_map_data {
  *
  * Return prio on success or 0 if we are out of prios.
  */
-static uint16_t OVS_UNUSED
+static uint16_t
 get_prio_for_tc_flower(struct tc_flower *flower)
 {
 static struct hmap prios = HMAP_INITIALIZER();
@@ -441,16 +441,399 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
 return false;
 }
 
+static int
+parse_put_flow_set_action(struct tc_flower *flower, const struct nlattr *set,
+  size_t set_len)
+{
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+const struct nlattr *set_attr;
+size_t set_left;
+
+NL_ATTR_FOR_EACH_UNSAFE(set_attr, set_left, set, set_len) {
+if (nl_attr_type(set_attr) == OVS_KEY_ATTR_TUNNEL) {
+const struct nlattr *tunnel = nl_attr_get(set_attr);
+const size_t tunnel_len = nl_attr_get_size(set_attr);
+const struct nlattr *tun_attr;
+size_t tun_left;
+
+flower->set.set = true;
+NL_ATTR_FOR_EACH_UNSAFE(tun_attr, tun_left, tunnel, tunnel_len) {
+switch (nl_attr_type(tun_attr)) {
+case OVS_TUNNEL_KEY_ATTR_ID: {
+flower->set.id = nl_attr_get_be64(tun_attr);
+}
+break;
+case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: {
+flower->set.ipv4.ipv4_src = nl_attr_get_be32(tun_attr);
+}
+break;
+case OVS_TUNNEL_KEY_ATTR_IPV4_DST: {
+flower->set.ipv4.ipv4_dst = nl_attr_get_be32(tun_attr);
+}
+break;
+case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: {
+flower->set.ipv6.ipv6_src =
+nl_attr_get_in6_addr(tun_attr);
+}
+break;
+case OVS_TUNNEL_KEY_ATTR_IPV6_DST: {
+flower->set.ipv6.ipv6_dst =
+nl_attr_get_in6_addr(tun_attr);
+}
+break;
+case OVS_TUNNEL_KEY_ATTR_TP_SRC: {
+flower->set.tp_src = nl_attr_get_be16(tun_attr);
+}
+break;
+case OVS_TUNNEL_KEY_ATTR_TP_DST: {
+flower->set.tp_dst = nl_attr_get_be16(tun_attr);
+}
+break;
+}
+}
+} else {
+VLOG_DBG_RL(, "unsupported set action type: %d",
+nl_attr_type(set_attr));
+return EOPNOTSUPP;
+}
+}
+return 0;
+}
+
+static int
+test_key_and_mask(struct match *match) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+const struct flow *key = >flow;
+struct flow *mask = >wc.masks;
+
+if (mask->pkt_mark) {
+VLOG_DBG_RL(, "offloading attribute pkt_mark isn't supported");
+return EOPNOTSUPP;
+}
+
+if (mask->recirc_id && key->recirc_id) {
+VLOG_DBG_RL(, "offloading attribute recirc_id isn't supported");
+return EOPNOTSUPP;
+}
+mask->recirc_id = 0;
+
+if (mask->dp_hash) {
+VLOG_DBG_RL(, "offloading attribute dp_hash isn't supported");
+return EOPNOTSUPP;
+}
+
+if (mask->conj_id) {
+VLOG_DBG_RL(, "offloading attribute conj_id isn't supported");
+return EOPNOTSUPP;
+}
+
+if (mask->skb_priority) {
+VLOG_DBG_RL(, "offloading attribute skb_priority isn't supported");
+return EOPNOTSUPP;
+}
+
+if (mask->actset_output) {
+VLOG_DBG_RL(,
+"offloading attribute actset_output isn't supported");
+return EOPNOTSUPP;
+}
+
+if (mask->ct_state) {
+

[ovs-dev] [PATCH V11 22/33] netdev-tc-offloads: Implement flow get using tc interface

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Search the requested ufid for a offloaded flow, and if found,
dump and parse it back to required format.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/netdev-tc-offloads.c | 50 ++--
 1 file changed, 44 insertions(+), 6 deletions(-)

diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index 0092d48..19c4082 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -838,13 +838,51 @@ netdev_tc_flow_put(struct netdev *netdev, struct match 
*match,
 
 int
 netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED,
-   struct match *match OVS_UNUSED,
-   struct nlattr **actions OVS_UNUSED,
-   const ovs_u128 *ufid OVS_UNUSED,
-   struct dpif_flow_stats *stats OVS_UNUSED,
-   struct ofpbuf *buf OVS_UNUSED)
+   struct match *match,
+   struct nlattr **actions,
+   const ovs_u128 *ufid,
+   struct dpif_flow_stats *stats,
+   struct ofpbuf *buf)
 {
-return EOPNOTSUPP;
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+struct netdev *dev;
+struct tc_flower flower;
+odp_port_t in_port;
+int prio = 0;
+int ifindex;
+int handle;
+int err;
+
+handle = get_ufid_tc_mapping(ufid, , );
+if (!handle) {
+return ENOENT;
+}
+
+ifindex = netdev_get_ifindex(dev);
+if (ifindex < 0) {
+VLOG_ERR_RL(_rl, "failed to get ifindex for %s: %s",
+netdev_get_name(dev), ovs_strerror(-ifindex));
+netdev_close(dev);
+return -ifindex;
+}
+
+VLOG_DBG_RL(, "flow get (dev %s prio %d handle %d)",
+netdev_get_name(dev), prio, handle);
+err = tc_get_flower(ifindex, prio, handle, );
+netdev_close(dev);
+if (err) {
+VLOG_ERR_RL(_rl, "flow get failed (dev %s prio %d handle %d): 
%s",
+netdev_get_name(dev), prio, handle, ovs_strerror(err));
+return err;
+}
+
+in_port = netdev_ifindex_to_odp_port(ifindex);
+parse_tc_flower_to_match(, match, actions, stats, buf);
+
+match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX);
+match->flow.in_port.odp_port = in_port;
+
+return 0;
 }
 
 int
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 19/33] dpif-netlink: Use netdev flow del api to delete a flow

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

If a flow was offloaded to a netdev we delete it using netdev
flow api.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/dpif-netlink.c | 13 -
 lib/netdev.c   | 18 ++
 lib/netdev.h   |  2 ++
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 2b955a8..6afa83d 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -2148,7 +2148,18 @@ try_send_to_netdev(struct dpif_netlink *dpif, struct 
dpif_op *op)
 err = parse_flow_put(dpif, put);
 break;
 }
-case DPIF_OP_FLOW_DEL:
+case DPIF_OP_FLOW_DEL: {
+struct dpif_flow_del *del = >u.flow_del;
+
+if (!del->ufid) {
+break;
+}
+dbg_print_flow(del->key, del->key_len, NULL, 0, NULL, 0,
+   del->ufid, "DEL");
+err = netdev_ports_flow_del(DPIF_HMAP_KEY(>dpif), del->ufid,
+del->stats);
+break;
+}
 case DPIF_OP_FLOW_GET:
 case DPIF_OP_EXECUTE:
 default:
diff --git a/lib/netdev.c b/lib/netdev.c
index 41960b6..4311c21 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -2306,6 +2306,24 @@ netdev_ports_flow_dump_create(const void *obj, int 
*ports)
 return dumps;
 }
 
+int
+netdev_ports_flow_del(const void *obj, const ovs_u128 *ufid,
+  struct dpif_flow_stats *stats)
+{
+struct port_to_netdev_data *data;
+
+ovs_mutex_lock(_hmap_mutex);
+HMAP_FOR_EACH(data, node, _to_netdev) {
+if (data->obj == obj && !netdev_flow_del(data->netdev, ufid, stats)) {
+ovs_mutex_unlock(_hmap_mutex);
+return 0;
+}
+}
+ovs_mutex_unlock(_hmap_mutex);
+
+return ENOENT;
+}
+
 #ifdef __linux__
 void
 netdev_set_flow_api_enabled(const struct smap *ovs_other_config)
diff --git a/lib/netdev.h b/lib/netdev.h
index 0b2e674..2ddc595 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -190,6 +190,8 @@ odp_port_t netdev_ifindex_to_odp_port(int ifindex);
 struct netdev_flow_dump **netdev_ports_flow_dump_create(const void *obj,
 int *ports);
 void netdev_ports_flow_flush(const void *obj);
+int netdev_ports_flow_del(const void *obj, const ovs_u128 *ufid,
+  struct dpif_flow_stats *stats);
 
 /* native tunnel APIs */
 /* Structure to pass parameters required to build a tunnel header. */
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 15/33] netdev-tc-offloads: Implement netdev flow dump api using tc interface

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/netdev-tc-offloads.c | 187 ---
 1 file changed, 178 insertions(+), 9 deletions(-)

diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index 0786048..0eb62e0 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -150,7 +150,7 @@ get_ufid_tc_mapping(const ovs_u128 *ufid, int *prio, struct 
netdev **netdev)
  *
  * Returns true on success.
  */
-static bool OVS_UNUSED
+static bool
 find_ufid(int prio, int handle, struct netdev *netdev, ovs_u128 *ufid)
 {
 int ifindex = netdev_get_ifindex(netdev);
@@ -188,9 +188,20 @@ int
 netdev_tc_flow_dump_create(struct netdev *netdev,
struct netdev_flow_dump **dump_out)
 {
-struct netdev_flow_dump *dump = xzalloc(sizeof *dump);
+struct netdev_flow_dump *dump;
+int ifindex;
+
+ifindex = netdev_get_ifindex(netdev);
+if (ifindex < 0) {
+VLOG_ERR_RL(_rl, "failed to get ifindex for %s: %s",
+netdev_get_name(netdev), ovs_strerror(-ifindex));
+return -ifindex;
+}
 
+dump = xzalloc(sizeof *dump);
+dump->nl_dump = xzalloc(sizeof *dump->nl_dump);
 dump->netdev = netdev_ref(netdev);
+tc_dump_flower_start(ifindex, dump->nl_dump);
 
 *dump_out = dump;
 
@@ -200,21 +211,179 @@ netdev_tc_flow_dump_create(struct netdev *netdev,
 int
 netdev_tc_flow_dump_destroy(struct netdev_flow_dump *dump)
 {
+nl_dump_done(dump->nl_dump);
 netdev_close(dump->netdev);
+free(dump->nl_dump);
 free(dump);
+return 0;
+}
+
+static int
+parse_tc_flower_to_match(struct tc_flower *flower,
+ struct match *match,
+ struct nlattr **actions,
+ struct dpif_flow_stats *stats,
+ struct ofpbuf *buf) {
+size_t act_off;
+struct tc_flower_key *key = >key;
+struct tc_flower_key *mask = >mask;
+odp_port_t outport = 0;
+
+if (flower->ifindex_out) {
+outport = netdev_ifindex_to_odp_port(flower->ifindex_out);
+if (!outport) {
+return ENOENT;
+}
+}
+
+ofpbuf_clear(buf);
+
+match_init_catchall(match);
+match_set_dl_src_masked(match, key->src_mac, mask->src_mac);
+match_set_dl_dst_masked(match, key->dst_mac, mask->dst_mac);
+
+if (key->eth_type == htons(ETH_TYPE_VLAN)) {
+match_set_dl_vlan(match, htons(key->vlan_id));
+match_set_dl_vlan_pcp(match, key->vlan_prio);
+match_set_dl_type(match, key->encap_eth_type);
+flow_fix_vlan_tpid(>flow);
+} else {
+match_set_dl_type(match, key->eth_type);
+}
+
+if (key->ip_proto && is_ip_any(>flow)) {
+match_set_nw_proto(match, key->ip_proto);
+}
+
+match_set_nw_src_masked(match, key->ipv4.ipv4_src, mask->ipv4.ipv4_src);
+match_set_nw_dst_masked(match, key->ipv4.ipv4_dst, mask->ipv4.ipv4_dst);
+
+match_set_ipv6_src_masked(match,
+  >ipv6.ipv6_src, >ipv6.ipv6_src);
+match_set_ipv6_dst_masked(match,
+  >ipv6.ipv6_dst, >ipv6.ipv6_dst);
+
+match_set_tp_dst_masked(match, key->dst_port, mask->dst_port);
+match_set_tp_src_masked(match, key->src_port, mask->src_port);
+
+if (flower->tunnel.tunnel) {
+match_set_tun_id(match, flower->tunnel.id);
+if (flower->tunnel.ipv4.ipv4_dst) {
+match_set_tun_src(match, flower->tunnel.ipv4.ipv4_src);
+match_set_tun_dst(match, flower->tunnel.ipv4.ipv4_dst);
+} else if (!is_all_zeros(>tunnel.ipv6.ipv6_dst,
+   sizeof flower->tunnel.ipv6.ipv6_dst)) {
+match_set_tun_ipv6_src(match, >tunnel.ipv6.ipv6_src);
+match_set_tun_ipv6_dst(match, >tunnel.ipv6.ipv6_dst);
+}
+if (flower->tunnel.tp_dst) {
+match_set_tun_tp_dst(match, flower->tunnel.tp_dst);
+}
+}
+
+act_off = nl_msg_start_nested(buf, OVS_FLOW_ATTR_ACTIONS);
+{
+if (flower->vlan_pop) {
+nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN);
+}
+
+if (flower->vlan_push_id || flower->vlan_push_prio) {
+struct ovs_action_push_vlan *push;
+push = nl_msg_put_unspec_zero(buf, OVS_ACTION_ATTR_PUSH_VLAN,
+  sizeof *push);
+
+push->vlan_tpid = htons(ETH_TYPE_VLAN);
+push->vlan_tci = htons(flower->vlan_push_id
+   | (flower->vlan_push_prio << 13)
+   | VLAN_CFI);
+}
+
+if (flower->set.set) {
+size_t set_offset = nl_msg_start_nested(buf, OVS_ACTION_ATTR_SET);
+size_t tunnel_offset =
+nl_msg_start_nested(buf, 

[ovs-dev] [PATCH V11 24/33] netdev-vport: Use common offloads interface

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

netdev vports are backed by actualy netdev at the kernel
level, so they can use the common netdev-tc offloads interface
for flow offloading (if enabled).

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/netdev-linux.c |  7 +++---
 lib/netdev-linux.h |  2 ++
 lib/netdev-vport.c | 65 +++---
 3 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index ce0a153..f5dc30f 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -530,7 +530,6 @@ static int set_flags(const char *, unsigned int flags);
 static int update_flags(struct netdev_linux *netdev, enum netdev_flags off,
 enum netdev_flags on, enum netdev_flags *old_flagsp)
 OVS_REQUIRES(netdev->mutex);
-static int do_get_ifindex(const char *netdev_name);
 static int get_ifindex(const struct netdev *, int *ifindexp);
 static int do_set_addr(struct netdev *netdev,
int ioctl_nr, const char *ioctl_name,
@@ -5414,8 +5413,8 @@ set_flags(const char *name, unsigned int flags)
 return af_inet_ifreq_ioctl(name, , SIOCSIFFLAGS, "SIOCSIFFLAGS");
 }
 
-static int
-do_get_ifindex(const char *netdev_name)
+int
+linux_get_ifindex(const char *netdev_name)
 {
 struct ifreq ifr;
 int error;
@@ -5438,7 +5437,7 @@ get_ifindex(const struct netdev *netdev_, int *ifindexp)
 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 
 if (!(netdev->cache_valid & VALID_IFINDEX)) {
-int ifindex = do_get_ifindex(netdev_get_name(netdev_));
+int ifindex = linux_get_ifindex(netdev_get_name(netdev_));
 
 if (ifindex < 0) {
 netdev->get_ifindex_error = -ifindex;
diff --git a/lib/netdev-linux.h b/lib/netdev-linux.h
index d944691..880f864 100644
--- a/lib/netdev-linux.h
+++ b/lib/netdev-linux.h
@@ -27,6 +27,7 @@ struct netdev;
 
 int netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag,
   const char *flag_name, bool enable);
+int linux_get_ifindex(const char *netdev_name);
 
 #define LINUX_FLOW_OFFLOAD_API  \
 netdev_tc_flow_flush,   \
@@ -37,4 +38,5 @@ int netdev_linux_ethtool_set_flag(struct netdev *netdev, 
uint32_t flag,
 netdev_tc_flow_get, \
 netdev_tc_flow_del, \
 netdev_tc_init_flow_api
+
 #endif /* netdev-linux.h */
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index fc02438..640cdbe 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -45,6 +45,10 @@
 #include "unaligned.h"
 #include "unixctl.h"
 #include "openvswitch/vlog.h"
+#include "netdev-tc-offloads.h"
+#ifdef __linux__
+#include "netdev-linux.h"
+#endif
 
 VLOG_DEFINE_THIS_MODULE(netdev_vport);
 
@@ -806,10 +810,37 @@ get_stats(const struct netdev *netdev, struct 
netdev_stats *stats)
 }
 
 
+#ifdef __linux__
+static int
+netdev_vport_get_ifindex__(const struct netdev *netdev_)
+{
+char buf[NETDEV_VPORT_NAME_BUFSIZE];
+const char *name = netdev_vport_get_dpif_port(netdev_, buf, sizeof(buf));
+
+return linux_get_ifindex(name);
+}
+
+static int
+netdev_vport_get_ifindex(const struct netdev *netdev_)
+{
+if (netdev_is_flow_api_enabled())
+return netdev_vport_get_ifindex__(netdev_);
+else
+return -EOPNOTSUPP;
+}
+
+#define NETDEV_VPORT_GET_IFINDEX netdev_vport_get_ifindex
+#define NETDEV_FLOW_OFFLOAD_API LINUX_FLOW_OFFLOAD_API
+#else /* !__linux__ */
+#define NETDEV_VPORT_GET_IFINDEX NULL
+#define NETDEV_FLOW_OFFLOAD_API NO_OFFLOAD_API
+#endif /* __linux__ */
+
 #define VPORT_FUNCTIONS(GET_CONFIG, SET_CONFIG, \
 GET_TUNNEL_CONFIG, GET_STATUS,  \
 BUILD_HEADER,   \
-PUSH_HEADER, POP_HEADER)\
+PUSH_HEADER, POP_HEADER,\
+GET_IFINDEX)\
 NULL,   \
 netdev_vport_run,   \
 netdev_vport_wait,  \
@@ -834,7 +865,7 @@ get_stats(const struct netdev *netdev, struct netdev_stats 
*stats)
 netdev_vport_get_etheraddr, \
 NULL,   /* get_mtu */   \
 NULL,   /* set_mtu */   \
-NULL,   /* get_ifindex */   \
+GET_IFINDEX,\
 NULL,   /* get_carrier */   \
 NULL,   /* get_carrier_resets */\
 NULL,   

[ovs-dev] [PATCH V11 17/33] netdev-tc-offloads: Add flower mask to priority map

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Flower classifer requires a different priority per mask,
so we hash the mask and generate a new priority for
each new mask used.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/netdev-tc-offloads.c | 54 
 1 file changed, 54 insertions(+)

diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index 0eb62e0..a9e35e0 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -170,6 +170,60 @@ find_ufid(int prio, int handle, struct netdev *netdev, 
ovs_u128 *ufid)
 return (data != NULL);
 }
 
+struct prio_map_data {
+struct hmap_node node;
+struct tc_flower_key mask;
+ovs_be16 protocol;
+uint16_t prio;
+};
+
+/* Get free prio for tc flower
+ * If prio is already allocated for mask/eth_type combination then return it.
+ * If not assign new prio.
+ *
+ * Return prio on success or 0 if we are out of prios.
+ */
+static uint16_t OVS_UNUSED
+get_prio_for_tc_flower(struct tc_flower *flower)
+{
+static struct hmap prios = HMAP_INITIALIZER();
+static struct ovs_mutex prios_lock = OVS_MUTEX_INITIALIZER;
+static uint16_t last_prio = 0;
+size_t key_len = sizeof(struct tc_flower_key);
+size_t hash = hash_bytes(>mask, key_len,
+ (OVS_FORCE uint32_t) flower->key.eth_type);
+struct prio_map_data *data;
+struct prio_map_data *new_data;
+
+/* We can use the same prio for same mask/eth combination but must have
+ * different prio if not. Flower classifier will reject same prio for
+ * different mask/eth combination. */
+ovs_mutex_lock(_lock);
+HMAP_FOR_EACH_WITH_HASH(data, node, hash, ) {
+if (!memcmp(>mask, >mask, key_len)
+&& data->protocol == flower->key.eth_type) {
+ovs_mutex_unlock(_lock);
+return data->prio;
+}
+}
+
+if (last_prio == UINT16_MAX) {
+/* last_prio can overflow if there will be many different kinds of
+ * flows which shouldn't happen organically. */
+ovs_mutex_unlock(_lock);
+return 0;
+}
+
+new_data = xzalloc(sizeof *new_data);
+memcpy(_data->mask, >mask, key_len);
+new_data->prio = ++last_prio;
+new_data->protocol = flower->key.eth_type;
+hmap_insert(, _data->node, hash);
+ovs_mutex_unlock(_lock);
+
+return new_data->prio;
+}
+
 int
 netdev_tc_flow_flush(struct netdev *netdev)
 {
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 16/33] dpif-netlink: Use netdev flow put api to insert a flow

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Using the new netdev flow api operate will now try and
offload flows to the relevant netdev of the input port.
Other operate methods flows will come in later patches.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/dpif-netlink.c | 216 +++--
 lib/odp-util.c |  56 ++
 lib/odp-util.h |   3 +
 3 files changed, 268 insertions(+), 7 deletions(-)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 4d39a5c..2b955a8 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -73,6 +73,7 @@ enum { MAX_PORTS = USHRT_MAX };
 #define ETH_FLAG_LRO  (1 << 15)/* LRO is enabled */
 
 #define FLOW_DUMP_MAX_BATCH 50
+#define OPERATE_MAX_OPS 50
 
 struct dpif_netlink_dp {
 /* Generic Netlink header. */
@@ -1831,8 +1832,6 @@ static size_t
 dpif_netlink_operate__(struct dpif_netlink *dpif,
struct dpif_op **ops, size_t n_ops)
 {
-enum { MAX_OPS = 50 };
-
 struct op_auxdata {
 struct nl_transaction txn;
 
@@ -1841,12 +1840,12 @@ dpif_netlink_operate__(struct dpif_netlink *dpif,
 
 struct ofpbuf reply;
 uint64_t reply_stub[1024 / 8];
-} auxes[MAX_OPS];
+} auxes[OPERATE_MAX_OPS];
 
-struct nl_transaction *txnsp[MAX_OPS];
+struct nl_transaction *txnsp[OPERATE_MAX_OPS];
 size_t i;
 
-n_ops = MIN(n_ops, MAX_OPS);
+n_ops = MIN(n_ops, OPERATE_MAX_OPS);
 for (i = 0; i < n_ops; i++) {
 struct op_auxdata *aux = [i];
 struct dpif_op *op = ops[i];
@@ -1989,18 +1988,221 @@ dpif_netlink_operate__(struct dpif_netlink *dpif,
 return n_ops;
 }
 
+static int
+parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
+{
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+struct match match;
+odp_port_t in_port;
+const struct nlattr *nla;
+size_t left;
+int outputs = 0;
+struct netdev *dev;
+struct offload_info info;
+ovs_be16 dst_port = 0;
+int err;
+
+if (put->flags & DPIF_FP_PROBE) {
+return EOPNOTSUPP;
+}
+
+err = parse_key_and_mask_to_match(put->key, put->key_len, put->mask,
+  put->mask_len, );
+if (err) {
+return err;
+}
+
+/* When we try to install a dummy flow from a probed feature. */
+if (match.flow.dl_type == htons(0x1234)) {
+return EOPNOTSUPP;
+}
+
+in_port = match.flow.in_port.odp_port;
+dev = netdev_ports_get(in_port, DPIF_HMAP_KEY(>dpif));
+if (!dev) {
+return EOPNOTSUPP;
+}
+
+/* Get tunnel dst port and count outputs */
+NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
+if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
+const struct netdev_tunnel_config *tnl_cfg;
+struct netdev *outdev;
+odp_port_t out_port;
+
+outputs++;
+if (outputs > 1) {
+VLOG_DBG_RL(, "offloading multiple ports isn't supported");
+err = EOPNOTSUPP;
+goto out;
+}
+
+out_port = nl_attr_get_odp_port(nla);
+outdev = netdev_ports_get(out_port, DPIF_HMAP_KEY(>dpif));
+if (!outdev) {
+err = EOPNOTSUPP;
+goto out;
+}
+tnl_cfg = netdev_get_tunnel_config(outdev);
+if (tnl_cfg && tnl_cfg->dst_port != 0) {
+dst_port = tnl_cfg->dst_port;
+}
+netdev_close(outdev);
+}
+}
+
+info.port_hmap_obj = DPIF_HMAP_KEY(>dpif);
+info.tp_dst_port = dst_port;
+err = netdev_flow_put(dev, ,
+  CONST_CAST(struct nlattr *, put->actions),
+  put->actions_len,
+  CONST_CAST(ovs_u128 *, put->ufid),
+  , put->stats);
+
+if (!err) {
+if (put->flags & DPIF_FP_MODIFY) {
+struct dpif_op *opp;
+struct dpif_op op;
+
+op.type = DPIF_OP_FLOW_DEL;
+op.u.flow_del.key = put->key;
+op.u.flow_del.key_len = put->key_len;
+op.u.flow_del.ufid = put->ufid;
+op.u.flow_del.pmd_id = put->pmd_id;
+op.u.flow_del.stats = NULL;
+op.u.flow_del.terse = false;
+
+opp = 
+dpif_netlink_operate__(dpif, , 1);
+}
+
+VLOG_DBG("added flow");
+} else if (err != EEXIST) {
+VLOG_ERR_RL(, "failed to offload flow: %s", ovs_strerror(err));
+}
+
+out:
+if (err && err != EEXIST && (put->flags & DPIF_FP_MODIFY)) {
+/* Modified rule can't be offloaded, try and delete from HW */
+int del_err = netdev_flow_del(dev, put->ufid, put->stats);
+
+if (!del_err) {
+/* 

[ovs-dev] [PATCH V11 11/33] netdev-tc-offloads: Implement netdev flow flush using tc interface

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/netdev-tc-offloads.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index d050adb..918c4c9 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -36,10 +36,20 @@
 
 VLOG_DEFINE_THIS_MODULE(netdev_tc_offloads);
 
+static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
+
 int
-netdev_tc_flow_flush(struct netdev *netdev OVS_UNUSED)
+netdev_tc_flow_flush(struct netdev *netdev)
 {
-return EOPNOTSUPP;
+int ifindex = netdev_get_ifindex(netdev);
+
+if (ifindex < 0) {
+VLOG_ERR_RL(_rl, "failed to get ifindex for %s: %s",
+netdev_get_name(netdev), ovs_strerror(-ifindex));
+return -ifindex;
+}
+
+return tc_flush(ifindex);
 }
 
 int
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 13/33] netdev-tc-offloads: Add ufid to tc/netdev map

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Flows offloaded to tc are identified by priority
and handle pair while OVS flows are identified by ufid.
Added a hash map to convert between the two for later
retrieval and deleting of offloaded flows.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/netdev-tc-offloads.c | 132 +++
 1 file changed, 132 insertions(+)

diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index 918c4c9..0786048 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -38,6 +38,138 @@ VLOG_DEFINE_THIS_MODULE(netdev_tc_offloads);
 
 static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
 
+static struct hmap ufid_tc = HMAP_INITIALIZER(_tc);
+static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
+
+/**
+ * struct ufid_tc_data - data entry for ufid_tc hmap.
+ * @ufid_node: Element in @ufid_tc hash table by ufid key.
+ * @tc_node: Element in @ufid_tc hash table by prio/handle/ifindex key.
+ * @ufid: ufid assigned to the flow
+ * @prio: tc priority
+ * @handle: tc handle
+ * @ifindex: netdev ifindex.
+ * @netdev: netdev associated with the tc rule
+ */
+struct ufid_tc_data {
+struct hmap_node ufid_node;
+struct hmap_node tc_node;
+ovs_u128 ufid;
+uint16_t prio;
+uint32_t handle;
+int ifindex;
+struct netdev *netdev;
+};
+
+/* Remove matching ufid entry from ufid_tc hashmap. */
+static void
+del_ufid_tc_mapping(const ovs_u128 *ufid)
+{
+size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
+struct ufid_tc_data *data;
+
+ovs_mutex_lock(_lock);
+HMAP_FOR_EACH_WITH_HASH(data, ufid_node, ufid_hash, _tc) {
+if (ovs_u128_equals(*ufid, data->ufid)) {
+break;
+}
+}
+
+if (!data) {
+ovs_mutex_unlock(_lock);
+return;
+}
+
+hmap_remove(_tc, >ufid_node);
+hmap_remove(_tc, >tc_node);
+netdev_close(data->netdev);
+free(data);
+ovs_mutex_unlock(_lock);
+}
+
+/* Add ufid entry to ufid_tc hashmap.
+ * If entry exists already it will be replaced. */
+static void OVS_UNUSED
+add_ufid_tc_mapping(const ovs_u128 *ufid, int prio, int handle,
+struct netdev *netdev, int ifindex)
+{
+size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
+size_t tc_hash = hash_int(hash_int(prio, handle), ifindex);
+struct ufid_tc_data *new_data = xzalloc(sizeof *new_data);
+
+del_ufid_tc_mapping(ufid);
+
+new_data->ufid = *ufid;
+new_data->prio = prio;
+new_data->handle = handle;
+new_data->netdev = netdev_ref(netdev);
+new_data->ifindex = ifindex;
+
+ovs_mutex_lock(_lock);
+hmap_insert(_tc, _data->ufid_node, ufid_hash);
+hmap_insert(_tc, _data->tc_node, tc_hash);
+ovs_mutex_unlock(_lock);
+}
+
+/* Get ufid from ufid_tc hashmap.
+ *
+ * If netdev output param is not NULL then the function will return
+ * associated netdev on success and a refcount is taken on that netdev.
+ * The caller is then responsible to close the netdev.
+ *
+ * Returns handle if successful and fill prio and netdev for that ufid.
+ * Otherwise returns 0.
+ */
+static int OVS_UNUSED
+get_ufid_tc_mapping(const ovs_u128 *ufid, int *prio, struct netdev **netdev)
+{
+size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
+struct ufid_tc_data *data;
+int handle = 0;
+
+ovs_mutex_lock(_lock);
+HMAP_FOR_EACH_WITH_HASH(data, ufid_node, ufid_hash, _tc) {
+if (ovs_u128_equals(*ufid, data->ufid)) {
+if (prio) {
+*prio = data->prio;
+}
+if (netdev) {
+*netdev = netdev_ref(data->netdev);
+}
+handle = data->handle;
+break;
+}
+}
+ovs_mutex_unlock(_lock);
+
+return handle;
+}
+
+/* Find ufid entry in ufid_tc hashmap using prio, handle and netdev.
+ * The result is saved in ufid.
+ *
+ * Returns true on success.
+ */
+static bool OVS_UNUSED
+find_ufid(int prio, int handle, struct netdev *netdev, ovs_u128 *ufid)
+{
+int ifindex = netdev_get_ifindex(netdev);
+struct ufid_tc_data *data;
+size_t tc_hash = hash_int(hash_int(prio, handle), ifindex);
+
+ovs_mutex_lock(_lock);
+HMAP_FOR_EACH_WITH_HASH(data, tc_node, tc_hash,  _tc) {
+if (data->prio == prio && data->handle == handle
+&& data->ifindex == ifindex) {
+*ufid = data->ufid;
+break;
+}
+}
+ovs_mutex_unlock(_lock);
+
+return (data != NULL);
+}
+
 int
 netdev_tc_flow_flush(struct netdev *netdev)
 {
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 12/33] dpif-netlink: Dump netdevs flows on flow dump

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

While dumping flows, dump flows that were offloaded to
netdev and parse them back to dpif flow.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/dpif-netlink.c | 170 +
 lib/netdev.c   |  33 +++
 lib/netdev.h   |   2 +
 3 files changed, 205 insertions(+)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index f67b3c4..4d39a5c 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -39,6 +39,7 @@
 #include "flow.h"
 #include "fat-rwlock.h"
 #include "netdev.h"
+#include "netdev-provider.h"
 #include "netdev-linux.h"
 #include "netdev-vport.h"
 #include "netlink-conntrack.h"
@@ -71,6 +72,8 @@ enum { MAX_PORTS = USHRT_MAX };
  * missing if we have old headers. */
 #define ETH_FLAG_LRO  (1 << 15)/* LRO is enabled */
 
+#define FLOW_DUMP_MAX_BATCH 50
+
 struct dpif_netlink_dp {
 /* Generic Netlink header. */
 uint8_t cmd;
@@ -1445,6 +1448,10 @@ struct dpif_netlink_flow_dump {
 struct dpif_flow_dump up;
 struct nl_dump nl_dump;
 atomic_int status;
+struct netdev_flow_dump **netdev_dumps;
+int netdev_dumps_num;/* Number of netdev_flow_dumps */
+struct ovs_mutex netdev_lock;/* Guards the following. */
+int netdev_current_dump OVS_GUARDED; /* Shared current dump */
 };
 
 static struct dpif_netlink_flow_dump *
@@ -1453,6 +1460,26 @@ dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
 return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
 }
 
+static void
+start_netdev_dump(const struct dpif *dpif_,
+  struct dpif_netlink_flow_dump *dump)
+{
+ovs_mutex_init(>netdev_lock);
+
+if (!netdev_is_flow_api_enabled()) {
+dump->netdev_dumps_num = 0;
+dump->netdev_dumps = NULL;
+return;
+}
+
+ovs_mutex_lock(>netdev_lock);
+dump->netdev_current_dump = 0;
+dump->netdev_dumps
+= netdev_ports_flow_dump_create(DPIF_HMAP_KEY(dpif_),
+>netdev_dumps_num);
+ovs_mutex_unlock(>netdev_lock);
+}
+
 static struct dpif_flow_dump *
 dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
 {
@@ -1477,6 +1504,8 @@ dpif_netlink_flow_dump_create(const struct dpif *dpif_, 
bool terse)
 atomic_init(>status, 0);
 dump->up.terse = terse;
 
+start_netdev_dump(dpif_, dump);
+
 return >up;
 }
 
@@ -1487,6 +1516,17 @@ dpif_netlink_flow_dump_destroy(struct dpif_flow_dump 
*dump_)
 unsigned int nl_status = nl_dump_done(>nl_dump);
 int dump_status;
 
+for (int i = 0; i < dump->netdev_dumps_num; i++) {
+int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
+
+if (err != 0 && err != EOPNOTSUPP) {
+VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
+}
+}
+
+free(dump->netdev_dumps);
+ovs_mutex_destroy(>netdev_lock);
+
 /* No other thread has access to 'dump' at this point. */
 atomic_read_relaxed(>status, _status);
 free(dump);
@@ -1500,6 +1540,13 @@ struct dpif_netlink_flow_dump_thread {
 struct dpif_flow_stats stats;
 struct ofpbuf nl_flows; /* Always used to store flows. */
 struct ofpbuf *nl_actions;  /* Used if kernel does not supply actions. */
+int netdev_dump_idx;/* This thread current netdev dump index */
+bool netdev_done;   /* If we are finished dumping netdevs */
+
+/* (Key/Mask/Actions) Buffers for netdev dumping */
+struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
+struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
+struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
 };
 
 static struct dpif_netlink_flow_dump_thread *
@@ -1519,6 +1566,8 @@ dpif_netlink_flow_dump_thread_create(struct 
dpif_flow_dump *dump_)
 thread->dump = dump;
 ofpbuf_init(>nl_flows, NL_DUMP_BUFSIZE);
 thread->nl_actions = NULL;
+thread->netdev_dump_idx = 0;
+thread->netdev_done = !(thread->netdev_dump_idx < dump->netdev_dumps_num);
 
 return >up;
 }
@@ -1556,6 +1605,89 @@ dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, struct 
dpif_flow *dpif_flow,
 dpif_netlink_flow_get_stats(datapath_flow, _flow->stats);
 }
 
+/* The design is such that all threads are working together on the first dump
+ * to the last, in order (at first they all on dump 0).
+ * When the first thread finds that the given dump is finished,
+ * they all move to the next. If two or more threads find the same dump
+ * is finished at the same time, the first one will advance the shared
+ * netdev_current_dump and the others will catch up. */
+static void
+dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread *thread)
+{
+struct dpif_netlink_flow_dump *dump = thread->dump;
+
+ovs_mutex_lock(>netdev_lock);
+/* if we 

[ovs-dev] [PATCH V11 14/33] match: Add helper function to set tunnel tp_dst

2017-06-13 Thread Roi Dayan
Add help function match_set_tun_tp_dst().
Will be used in the nxt commit.
This patch doesn't change any functionality.

Signed-off-by: Roi Dayan 
Acked-by: Flavio Leitner 
---
 include/openvswitch/match.h |  2 ++
 lib/match.c | 13 +
 2 files changed, 15 insertions(+)

diff --git a/include/openvswitch/match.h b/include/openvswitch/match.h
index 9e15da2..70da928 100644
--- a/include/openvswitch/match.h
+++ b/include/openvswitch/match.h
@@ -86,6 +86,8 @@ void match_set_tun_tos(struct match *match, uint8_t tos);
 void match_set_tun_tos_masked(struct match *match, uint8_t tos, uint8_t mask);
 void match_set_tun_flags(struct match *match, uint16_t flags);
 void match_set_tun_flags_masked(struct match *match, uint16_t flags, uint16_t 
mask);
+void match_set_tun_tp_dst(struct match *match, ovs_be16 tp_dst);
+void match_set_tun_tp_dst_masked(struct match *match, ovs_be16 port, ovs_be16 
mask);
 void match_set_tun_gbp_id_masked(struct match *match, ovs_be16 gbp_id, 
ovs_be16 mask);
 void match_set_tun_gbp_id(struct match *match, ovs_be16 gbp_id);
 void match_set_tun_gbp_flags_masked(struct match *match, uint8_t flags, 
uint8_t mask);
diff --git a/lib/match.c b/lib/match.c
index ebcdb29..9aa0d88 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -279,6 +279,19 @@ match_set_tun_flags_masked(struct match *match, uint16_t 
flags, uint16_t mask)
 }
 
 void
+match_set_tun_tp_dst(struct match *match, ovs_be16 tp_dst)
+{
+match_set_tun_tp_dst_masked(match, tp_dst, OVS_BE16_MAX);
+}
+
+void
+match_set_tun_tp_dst_masked(struct match *match, ovs_be16 port, ovs_be16 mask)
+{
+match->wc.masks.tunnel.tp_dst = mask;
+match->flow.tunnel.tp_dst = port & mask;
+}
+
+void
 match_set_tun_gbp_id_masked(struct match *match, ovs_be16 gbp_id, ovs_be16 
mask)
 {
 match->wc.masks.tunnel.gbp_id = mask;
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 07/33] other-config: Add hw-offload switch to control netdev flow offloading

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Add a new configuration option - hw-offload that enables netdev
flow api. Enabling this option will allow offloading flows
using netdev implementation instead of the kernel datapath.
This configuration option defaults to false - disabled.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/netdev.c | 35 +++
 lib/netdev.h |  2 ++
 vswitchd/bridge.c|  1 +
 vswitchd/vswitch.xml | 15 +++
 4 files changed, 53 insertions(+)

diff --git a/lib/netdev.c b/lib/netdev.c
index 21d2b68..5008a43 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -92,6 +92,8 @@ struct netdev_registered_class {
 struct ovs_refcount refcnt;
 };
 
+static bool netdev_flow_api_enabled = false;
+
 /* This is set pretty low because we probably won't learn anything from the
  * additional log messages. */
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
@@ -2107,7 +2109,40 @@ netdev_init_flow_api(struct netdev *netdev)
 {
 const struct netdev_class *class = netdev->netdev_class;
 
+if (!netdev_is_flow_api_enabled()) {
+return EOPNOTSUPP;
+}
+
 return (class->init_flow_api
 ? class->init_flow_api(netdev)
 : EOPNOTSUPP);
 }
+
+bool
+netdev_is_flow_api_enabled(void)
+{
+return netdev_flow_api_enabled;
+}
+
+#ifdef __linux__
+void
+netdev_set_flow_api_enabled(const struct smap *ovs_other_config)
+{
+if (smap_get_bool(ovs_other_config, "hw-offload", false)) {
+static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+if (ovsthread_once_start()) {
+netdev_flow_api_enabled = true;
+
+VLOG_INFO("netdev: Flow API Enabled");
+
+ovsthread_once_done();
+}
+}
+}
+#else
+void
+netdev_set_flow_api_enabled(const struct smap *ovs_other_config OVS_UNUSED)
+{
+}
+#endif
diff --git a/lib/netdev.h b/lib/netdev.h
index 87fa32a..5572366 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -179,6 +179,8 @@ int netdev_flow_get(struct netdev *, struct match *, struct 
nlattr **actions,
 int netdev_flow_del(struct netdev *, const ovs_u128 *,
 struct dpif_flow_stats *);
 int netdev_init_flow_api(struct netdev *);
+bool netdev_is_flow_api_enabled(void);
+void netdev_set_flow_api_enabled(const struct smap *ovs_other_config);
 
 /* native tunnel APIs */
 /* Structure to pass parameters required to build a tunnel header. */
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index cc7a43b..8336d70 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -2955,6 +2955,7 @@ bridge_run(void)
 cfg = ovsrec_open_vswitch_first(idl);
 
 if (cfg) {
+netdev_set_flow_api_enabled(>other_config);
 dpdk_init(>other_config);
 }
 
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 892f839..7e5062f 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -178,6 +178,21 @@
 
   The default is 1.
 
+
+
+  
+
+  Set this value to true to enable netdev flow offload.
+
+
+  The default value is false. Changing this value requires
+  restarting the daemon
+
+
+  Currently Open vSwitch supports hardware offloading on
+  Linux systems.  On other systems, this value is ignored.
+
   
 
   https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 09/33] dpif: Save added ports in a port map for netdev flow api use

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

To use netdev flow offloading api, dpifs needs to iterate over
added ports. This addition inserts the added dpif ports in a hash map,
The map will also be used to translate dpif ports to netdevs.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/dpif.c   |  32 +++
 lib/dpif.h   |   2 +
 lib/netdev.c | 132 +++
 lib/netdev.h |   6 +++
 4 files changed, 172 insertions(+)

diff --git a/lib/dpif.c b/lib/dpif.c
index fe6a986..7756315 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -355,7 +355,28 @@ do_open(const char *name, const char *type, bool create, 
struct dpif **dpifp)
 error = registered_class->dpif_class->open(registered_class->dpif_class,
name, create, );
 if (!error) {
+struct dpif_port_dump port_dump;
+struct dpif_port dpif_port;
+
 ovs_assert(dpif->dpif_class == registered_class->dpif_class);
+
+DPIF_PORT_FOR_EACH(_port, _dump, dpif) {
+struct netdev *netdev;
+int err;
+
+if (!strcmp(dpif_port.type, "internal")) {
+continue;
+}
+
+err = netdev_open(dpif_port.name, dpif_port.type, );
+
+if (!err) {
+netdev_ports_insert(netdev, DPIF_HMAP_KEY(dpif), _port);
+netdev_close(netdev);
+} else {
+VLOG_WARN("could not open netdev %s type %s", name, type);
+}
+}
 } else {
 dp_class_unref(registered_class);
 }
@@ -548,6 +569,15 @@ dpif_port_add(struct dpif *dpif, struct netdev *netdev, 
odp_port_t *port_nop)
 if (!error) {
 VLOG_DBG_RL(_rl, "%s: added %s as port %"PRIu32,
 dpif_name(dpif), netdev_name, port_no);
+
+if (strcmp(netdev_get_type(netdev), "internal")) {
+struct dpif_port dpif_port;
+
+dpif_port.type = CONST_CAST(char *, netdev_get_type(netdev));
+dpif_port.name = CONST_CAST(char *, netdev_name);
+dpif_port.port_no = port_no;
+netdev_ports_insert(netdev, DPIF_HMAP_KEY(dpif), _port);
+}
 } else {
 VLOG_WARN_RL(_rl, "%s: failed to add %s as port: %s",
  dpif_name(dpif), netdev_name, ovs_strerror(error));
@@ -572,6 +602,8 @@ dpif_port_del(struct dpif *dpif, odp_port_t port_no)
 if (!error) {
 VLOG_DBG_RL(_rl, "%s: port_del(%"PRIu32")",
 dpif_name(dpif), port_no);
+
+netdev_ports_remove(port_no, DPIF_HMAP_KEY(dpif));
 } else {
 log_operation(dpif, "port_del", error);
 }
diff --git a/lib/dpif.h b/lib/dpif.h
index 5d49b11..81376c0 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -401,6 +401,8 @@
 extern "C" {
 #endif
 
+#define DPIF_HMAP_KEY(x) ((x)->dpif_class)
+
 struct dpif;
 struct dpif_class;
 struct dpif_flow;
diff --git a/lib/netdev.c b/lib/netdev.c
index 3204689..b0e1abd 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -2127,6 +2127,138 @@ netdev_is_flow_api_enabled(void)
 return netdev_flow_api_enabled;
 }
 
+/* Protects below port hashmaps. */
+static struct ovs_mutex netdev_hmap_mutex = OVS_MUTEX_INITIALIZER;
+
+static struct hmap port_to_netdev OVS_GUARDED_BY(netdev_hmap_mutex)
+= HMAP_INITIALIZER(_to_netdev);
+static struct hmap ifindex_to_port OVS_GUARDED_BY(netdev_hmap_mutex)
+= HMAP_INITIALIZER(_to_port);
+
+struct port_to_netdev_data {
+struct hmap_node node;
+struct netdev *netdev;
+struct dpif_port dpif_port;
+const void *obj;
+};
+
+struct ifindex_to_port_data {
+struct hmap_node node;
+int ifindex;
+odp_port_t port;
+};
+
+static struct port_to_netdev_data *
+netdev_ports_lookup(odp_port_t port_no, const void *obj)
+OVS_REQUIRES(netdev_hmap_mutex)
+{
+size_t hash = hash_int(odp_to_u32(port_no), hash_pointer(obj, 0));
+struct port_to_netdev_data *data;
+
+HMAP_FOR_EACH_WITH_HASH(data, node, hash, _to_netdev) {
+if (data->obj == obj && data->dpif_port.port_no == port_no) {
+return data;
+}
+}
+return NULL;
+}
+
+int
+netdev_ports_insert(struct netdev *netdev, const void *obj,
+struct dpif_port *dpif_port)
+{
+size_t hash = hash_int(odp_to_u32(dpif_port->port_no),
+   hash_pointer(obj, 0));
+struct port_to_netdev_data *data;
+struct ifindex_to_port_data *ifidx;
+int ifindex = netdev_get_ifindex(netdev);
+
+if (ifindex < 0) {
+return ENODEV;
+}
+
+data = xzalloc(sizeof *data);
+ifidx = xzalloc(sizeof *ifidx);
+
+ovs_mutex_lock(_hmap_mutex);
+if (netdev_ports_lookup(dpif_port->port_no, obj)) {
+ovs_mutex_unlock(_hmap_mutex);
+return EEXIST;
+}
+
+data->netdev = 

[ovs-dev] [PATCH V11 10/33] dpif-netlink: Flush added ports using netdev flow api

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

If netdev flow offloading is enabled, flush all
added ports using netdev flow api.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/dpif-netlink.c |  5 +
 lib/netdev.c   | 14 ++
 lib/netdev.h   |  1 +
 3 files changed, 20 insertions(+)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 71b1d5d..f67b3c4 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -1154,6 +1154,11 @@ dpif_netlink_flow_flush(struct dpif *dpif_)
 dpif_netlink_flow_init();
 flow.cmd = OVS_FLOW_CMD_DEL;
 flow.dp_ifindex = dpif->dp_ifindex;
+
+if (netdev_is_flow_api_enabled()) {
+netdev_ports_flow_flush(DPIF_HMAP_KEY(dpif_));
+}
+
 return dpif_netlink_flow_transact(, NULL, NULL);
 }
 
diff --git a/lib/netdev.c b/lib/netdev.c
index b0e1abd..7707f40 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -2259,6 +2259,20 @@ netdev_ifindex_to_odp_port(int ifindex)
 return ret;
 }
 
+void
+netdev_ports_flow_flush(const void *obj)
+{
+struct port_to_netdev_data *data;
+
+ovs_mutex_lock(_hmap_mutex);
+HMAP_FOR_EACH(data, node, _to_netdev) {
+if (data->obj == obj) {
+netdev_flow_flush(data->netdev);
+}
+}
+ovs_mutex_unlock(_hmap_mutex);
+}
+
 #ifdef __linux__
 void
 netdev_set_flow_api_enabled(const struct smap *ovs_other_config)
diff --git a/lib/netdev.h b/lib/netdev.h
index e59d018..99dfde5 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -187,6 +187,7 @@ int netdev_ports_insert(struct netdev *, const void *obj, 
struct dpif_port *);
 struct netdev *netdev_ports_get(odp_port_t port, const void *obj);
 int netdev_ports_remove(odp_port_t port, const void *obj);
 odp_port_t netdev_ifindex_to_odp_port(int ifindex);
+void netdev_ports_flow_flush(const void *obj);
 
 /* native tunnel APIs */
 /* Structure to pass parameters required to build a tunnel header. */
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 08/33] other-config: Add tc-policy switch to control tc flower flag

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Add a new configuration tc-policy option that controls tc
flower flag. Possible options are none, skip_sw, skip_hw.
The default is none which is to insert the rule both to sw and hw.
This option is only relevant if hw-offload is enabled.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
---
 lib/netdev.c |  6 ++
 lib/tc.c | 43 ++-
 lib/tc.h |  3 +++
 vswitchd/vswitch.xml | 17 +
 4 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/lib/netdev.c b/lib/netdev.c
index 5008a43..3204689 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -54,6 +54,9 @@
 #include "openvswitch/vlog.h"
 #include "flow.h"
 #include "util.h"
+#ifdef __linux__
+#include "tc.h"
+#endif
 
 VLOG_DEFINE_THIS_MODULE(netdev);
 
@@ -2136,6 +2139,9 @@ netdev_set_flow_api_enabled(const struct smap 
*ovs_other_config)
 
 VLOG_INFO("netdev: Flow API Enabled");
 
+tc_set_policy(smap_get_def(ovs_other_config, "tc-policy",
+   TC_POLICY_DEFAULT));
+
 ovsthread_once_done();
 }
 }
diff --git a/lib/tc.c b/lib/tc.c
index d3eaf98..9ca7b76 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -39,6 +39,14 @@ VLOG_DEFINE_THIS_MODULE(tc);
 
 static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
 
+enum tc_offload_policy {
+TC_POLICY_NONE,
+TC_POLICY_SKIP_SW,
+TC_POLICY_SKIP_HW
+};
+
+static enum tc_offload_policy tc_policy = TC_POLICY_NONE;
+
 struct tcmsg *
 tc_make_request(int ifindex, int type, unsigned int flags,
 struct ofpbuf *request)
@@ -739,6 +747,18 @@ tc_get_flower(int ifindex, int prio, int handle, struct 
tc_flower *flower)
 return error;
 }
 
+static int
+tc_get_tc_cls_policy(enum tc_offload_policy policy)
+{
+if (policy == TC_POLICY_SKIP_HW) {
+return TCA_CLS_FLAGS_SKIP_HW;
+} else if (policy == TC_POLICY_SKIP_SW) {
+return TCA_CLS_FLAGS_SKIP_SW;
+}
+
+return 0;
+}
+
 static void
 nl_msg_put_act_push_vlan(struct ofpbuf *request, uint16_t vid, uint8_t prio)
 {
@@ -1044,7 +1064,7 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct 
tc_flower *flower)
 }
 }
 
-nl_msg_put_u32(request, TCA_FLOWER_FLAGS, 0);
+nl_msg_put_u32(request, TCA_FLOWER_FLAGS, tc_get_tc_cls_policy(tc_policy));
 
 if (flower->tunnel.tunnel) {
 nl_msg_put_flower_tunnel(request, flower);
@@ -1089,3 +1109,24 @@ tc_replace_flower(int ifindex, uint16_t prio, uint32_t 
handle,
 
 return error;
 }
+
+void
+tc_set_policy(const char *policy)
+{
+if (!policy) {
+return;
+}
+
+if (!strcmp(policy, "skip_sw")) {
+tc_policy = TC_POLICY_SKIP_SW;
+} else if (!strcmp(policy, "skip_hw")) {
+tc_policy = TC_POLICY_SKIP_HW;
+} else if (!strcmp(policy, "none")) {
+tc_policy = TC_POLICY_NONE;
+} else {
+VLOG_WARN("tc: Invalid policy '%s'", policy);
+return;
+}
+
+VLOG_INFO("tc: Using policy '%s'", policy);
+}
diff --git a/lib/tc.h b/lib/tc.h
index 78470a7..a472b99 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -36,6 +36,8 @@
 
 #define TC_INGRESS_PARENT TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS)
 
+#define TC_POLICY_DEFAULT "none"
+
 /* Returns tc handle 'major':'minor'. */
 static inline unsigned int
 tc_make_handle(unsigned int major, unsigned int minor)
@@ -152,5 +154,6 @@ int tc_flush(int ifindex);
 int tc_dump_flower_start(int ifindex, struct nl_dump *dump);
 int parse_netlink_to_tc_flower(struct ofpbuf *reply,
struct tc_flower *flower);
+void tc_set_policy(const char *policy);
 
 #endif /* tc.h */
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 7e5062f..96bf84c 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -195,6 +195,23 @@
 
   
 
+  
+
+Specified the policy used with HW offloading.
+Options:
+none- Add software rule and offload rule to 
HW.
+skip_sw - Offload rule to HW only.
+skip_hw - Add software rule without offloading 
rule to HW.
+
+
+This is only relevant if HW offloading is enabled (hw-offload).
+
+
+  The default value is none.
+
+  
+
   
 
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 06/33] netdev: Adding a new netdev API to be used for offloading flows

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Add a new API interface for offloading dpif flows to netdev.
The API consist on the following:
  flow_put - offload a new flow
  flow_get - query an offloaded flow
  flow_del - delete an offloaded flow
  flow_flush - flush all offloaded flows
  flow_dump_* - dump all offloaded flows

In upcoming commits we will introduce an implementation of this
API for netdev-linux.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
Acked-by: Flavio Leitner 
---
 lib/automake.mk  |   2 +
 lib/netdev-bsd.c |   2 +
 lib/netdev-dpdk.c|   1 +
 lib/netdev-dummy.c   |   2 +
 lib/netdev-linux.c   |  15 +--
 lib/netdev-linux.h   |   9 
 lib/netdev-provider.h|  72 ++
 lib/netdev-tc-offloads.c | 114 +++
 lib/netdev-tc-offloads.h |  42 +
 lib/netdev-vport.c   |  11 -
 lib/netdev.c |  91 +
 lib/netdev.h |  23 ++
 12 files changed, 379 insertions(+), 5 deletions(-)
 create mode 100644 lib/netdev-tc-offloads.c
 create mode 100644 lib/netdev-tc-offloads.h

diff --git a/lib/automake.mk b/lib/automake.mk
index 0ac4708..54a1032 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -358,6 +358,8 @@ lib_libopenvswitch_la_SOURCES += \
lib/if-notifier.h \
lib/netdev-linux.c \
lib/netdev-linux.h \
+   lib/netdev-tc-offloads.c \
+   lib/netdev-tc-offloads.h \
lib/netlink-conntrack.c \
lib/netlink-conntrack.h \
lib/netlink-notifier.c \
diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index c51646a..f863a18 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -1548,6 +1548,8 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum 
netdev_flags off,
 netdev_bsd_rxq_recv, \
 netdev_bsd_rxq_wait, \
 netdev_bsd_rxq_drain,\
+ \
+NO_OFFLOAD_API   \
 }
 
 const struct netdev_class netdev_bsd_class =
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index b770b70..e8de47a 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -3303,6 +3303,7 @@ unlock:
 RXQ_RECV, \
 NULL,   /* rx_wait */ \
 NULL,   /* rxq_drain */   \
+NO_OFFLOAD_API\
 }
 
 static const struct netdev_class dpdk_class =
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index d973d7e..d189a86 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -1414,6 +1414,8 @@ netdev_dummy_update_flags(struct netdev *netdev_,
 netdev_dummy_rxq_recv,  \
 netdev_dummy_rxq_wait,  \
 netdev_dummy_rxq_drain, \
+\
+NO_OFFLOAD_API  \
 }
 
 static const struct netdev_class dummy_class =
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index c8145c6..44dfac5 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -54,6 +54,7 @@
 #include "hash.h"
 #include "openvswitch/hmap.h"
 #include "netdev-provider.h"
+#include "netdev-tc-offloads.h"
 #include "netdev-vport.h"
 #include "netlink-notifier.h"
 #include "netlink-socket.h"
@@ -2796,7 +2797,8 @@ netdev_linux_update_flags(struct netdev *netdev_, enum 
netdev_flags off,
 }
 
 #define NETDEV_LINUX_CLASS(NAME, CONSTRUCT, GET_STATS,  \
-   GET_FEATURES, GET_STATUS)\
+   GET_FEATURES, GET_STATUS,\
+   FLOW_OFFLOAD_API)\
 {   \
 NAME,   \
 false,  /* is_pmd */\
@@ -2865,6 +2867,8 @@ netdev_linux_update_flags(struct netdev *netdev_, enum 
netdev_flags off,
 netdev_linux_rxq_recv,  \
 netdev_linux_rxq_wait,  \
 netdev_linux_rxq_drain, \
+\
+FLOW_OFFLOAD_API\
 }
 
 const struct netdev_class netdev_linux_class =
@@ -2873,7 +2877,8 @@ const struct netdev_class netdev_linux_class =
 netdev_linux_construct,
 netdev_linux_get_stats,
 netdev_linux_get_features,
-netdev_linux_get_status);
+

[ovs-dev] [PATCH V11 04/33] tc: Move functions the create/parse handle to be static inline

2017-06-13 Thread Roi Dayan
Those functions are just wrappers to available macros for readability.
Move them to tc.h to avoid function-call overhead.

Signed-off-by: Roi Dayan 
Acked-by: Flavio Leitner 
---
 lib/tc.c | 21 -
 lib/tc.h | 24 +---
 2 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/lib/tc.c b/lib/tc.c
index a71a9e0..1f12e4a 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -24,27 +24,6 @@
 
 VLOG_DEFINE_THIS_MODULE(tc);
 
-/* Returns tc handle 'major':'minor'. */
-unsigned int
-tc_make_handle(unsigned int major, unsigned int minor)
-{
-return TC_H_MAKE(major << 16, minor);
-}
-
-/* Returns the major number from 'handle'. */
-unsigned int
-tc_get_major(unsigned int handle)
-{
-return TC_H_MAJ(handle) >> 16;
-}
-
-/* Returns the minor number from 'handle'. */
-unsigned int
-tc_get_minor(unsigned int handle)
-{
-return TC_H_MIN(handle);
-}
-
 struct tcmsg *
 tc_make_request(int ifindex, int type, unsigned int flags,
 struct ofpbuf *request)
diff --git a/lib/tc.h b/lib/tc.h
index 420cdf8..ad8a458 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -22,9 +22,27 @@
 #include 
 #include "openvswitch/ofpbuf.h"
 
-unsigned int tc_make_handle(unsigned int major, unsigned int minor);
-unsigned int tc_get_major(unsigned int handle);
-unsigned int tc_get_minor(unsigned int handle);
+/* Returns tc handle 'major':'minor'. */
+static inline unsigned int
+tc_make_handle(unsigned int major, unsigned int minor)
+{
+return TC_H_MAKE(major << 16, minor);
+}
+
+/* Returns the major number from 'handle'. */
+static inline unsigned int
+tc_get_major(unsigned int handle)
+{
+return TC_H_MAJ(handle) >> 16;
+}
+
+/* Returns the minor number from 'handle'. */
+static inline unsigned int
+tc_get_minor(unsigned int handle)
+{
+return TC_H_MIN(handle);
+}
+
 struct tcmsg *tc_make_request(int ifindex, int type,
   unsigned int flags, struct ofpbuf *);
 int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 01/33] netdev-linux: Refactor two tc functions

2017-06-13 Thread Roi Dayan
Refactor tc_make_request and tc_add_del_ingress_qdisc to accept
ifindex instead of netdev struct.
We later want to move those outside netdev-linux module to be
used by other modules.
This patch doesn't change any functionality.

Signed-off-by: Roi Dayan 
Co-authored-by: Paul Blakey 
Signed-off-by: Paul Blakey 
Acked-by: Joe Stringer 
Acked-by: Flavio Leitner 
---
 lib/netdev-linux.c | 91 +-
 1 file changed, 55 insertions(+), 36 deletions(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 1b88775..d794453 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -442,10 +442,14 @@ static unsigned int tc_ticks_to_bytes(unsigned int rate, 
unsigned int ticks);
 static unsigned int tc_bytes_to_ticks(unsigned int rate, unsigned int size);
 static unsigned int tc_buffer_per_jiffy(unsigned int rate);
 
-static struct tcmsg *tc_make_request(const struct netdev *, int type,
+static struct tcmsg *tc_make_request(int ifindex, int type,
  unsigned int flags, struct ofpbuf *);
+static struct tcmsg *netdev_linux_tc_make_request(const struct netdev *,
+  int type,
+  unsigned int flags,
+  struct ofpbuf *);
 static int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
-static int tc_add_del_ingress_qdisc(struct netdev *netdev, bool add);
+static int tc_add_del_ingress_qdisc(int ifindex, bool add);
 static int tc_add_policer(struct netdev *,
   uint32_t kbits_rate, uint32_t kbits_burst);
 
@@ -2089,6 +2093,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
 {
 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 const char *netdev_name = netdev_get_name(netdev_);
+int ifindex;
 int error;
 
 kbits_burst = (!kbits_rate ? 0   /* Force to 0 if no rate specified. */
@@ -2106,9 +2111,14 @@ netdev_linux_set_policing(struct netdev *netdev_,
 netdev->cache_valid &= ~VALID_POLICING;
 }
 
+error = get_ifindex(netdev_, );
+if (error) {
+goto out;
+}
+
 COVERAGE_INC(netdev_set_policing);
 /* Remove any existing ingress qdisc. */
-error = tc_add_del_ingress_qdisc(netdev_, false);
+error = tc_add_del_ingress_qdisc(ifindex, false);
 if (error) {
 VLOG_WARN_RL(, "%s: removing policing failed: %s",
  netdev_name, ovs_strerror(error));
@@ -2116,7 +2126,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
 }
 
 if (kbits_rate) {
-error = tc_add_del_ingress_qdisc(netdev_, true);
+error = tc_add_del_ingress_qdisc(ifindex, true);
 if (error) {
 VLOG_WARN_RL(, "%s: adding policing qdisc failed: %s",
  netdev_name, ovs_strerror(error));
@@ -2385,7 +2395,7 @@ start_queue_dump(const struct netdev *netdev, struct 
queue_dump_state *state)
 struct ofpbuf request;
 struct tcmsg *tcmsg;
 
-tcmsg = tc_make_request(netdev, RTM_GETTCLASS, 0, );
+tcmsg = netdev_linux_tc_make_request(netdev, RTM_GETTCLASS, 0, );
 if (!tcmsg) {
 return false;
 }
@@ -2944,8 +2954,8 @@ codel_setup_qdisc__(struct netdev *netdev, uint32_t 
target, uint32_t limit,
 
 tc_del_qdisc(netdev);
 
-tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-NLM_F_EXCL | NLM_F_CREATE, );
+tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+ NLM_F_EXCL | NLM_F_CREATE, );
 if (!tcmsg) {
 return ENODEV;
 }
@@ -3162,8 +3172,8 @@ fqcodel_setup_qdisc__(struct netdev *netdev, uint32_t 
target, uint32_t limit,
 
 tc_del_qdisc(netdev);
 
-tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-NLM_F_EXCL | NLM_F_CREATE, );
+tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+ NLM_F_EXCL | NLM_F_CREATE, );
 if (!tcmsg) {
 return ENODEV;
 }
@@ -3386,8 +3396,8 @@ sfq_setup_qdisc__(struct netdev *netdev, uint32_t 
quantum, uint32_t perturb)
 
 tc_del_qdisc(netdev);
 
-tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-NLM_F_EXCL | NLM_F_CREATE, );
+tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+ NLM_F_EXCL | NLM_F_CREATE, );
 if (!tcmsg) {
 return ENODEV;
 }
@@ -3573,8 +3583,8 @@ htb_setup_qdisc__(struct netdev *netdev)
 
 tc_del_qdisc(netdev);
 
-tcmsg = tc_make_request(netdev, RTM_NEWQDISC,
-NLM_F_EXCL | NLM_F_CREATE, );
+tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWQDISC,
+ NLM_F_EXCL | NLM_F_CREATE, );
 if (!tcmsg) {
 return ENODEV;
  

[ovs-dev] [PATCH V11 05/33] tc: Add tc flower functions

2017-06-13 Thread Roi Dayan
Add tc helper functions to query and manipulate the flower classifier.

Signed-off-by: Paul Blakey 
Co-authored-by: Roi Dayan 
Signed-off-by: Roi Dayan 
Acked-by: Flavio Leitner 
---
 lib/tc.c | 998 +++
 lib/tc.h | 105 +++
 2 files changed, 1103 insertions(+)

diff --git a/lib/tc.c b/lib/tc.c
index 1f12e4a..d3eaf98 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Nicira, 
Inc.
+ * Copyright (c) 2016 Mellanox Technologies, Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,13 +18,27 @@
 #include 
 #include "tc.h"
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "byte-order.h"
 #include "netlink-socket.h"
 #include "netlink.h"
 #include "openvswitch/ofpbuf.h"
 #include "openvswitch/vlog.h"
+#include "packets.h"
+#include "timeval.h"
+#include 
 
 VLOG_DEFINE_THIS_MODULE(tc);
 
+static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
+
 struct tcmsg *
 tc_make_request(int ifindex, int type, unsigned int flags,
 struct ofpbuf *request)
@@ -91,3 +106,986 @@ tc_add_del_ingress_qdisc(int ifindex, bool add)
 
 return 0;
 }
+
+static const struct nl_policy tca_policy[] = {
+[TCA_KIND] = { .type = NL_A_STRING, .optional = false, },
+[TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false, },
+[TCA_STATS] = { .type = NL_A_UNSPEC,
+.min_len = sizeof(struct tc_stats), .optional = true, },
+[TCA_STATS2] = { .type = NL_A_NESTED, .optional = true, },
+};
+
+static const struct nl_policy tca_flower_policy[] = {
+[TCA_FLOWER_CLASSID] = { .type = NL_A_U32, .optional = true, },
+[TCA_FLOWER_INDEV] = { .type = NL_A_STRING, .max_len = IFNAMSIZ,
+   .optional = true, },
+[TCA_FLOWER_KEY_ETH_SRC] = { .type = NL_A_UNSPEC,
+ .min_len = ETH_ALEN, .optional = true, },
+[TCA_FLOWER_KEY_ETH_DST] = { .type = NL_A_UNSPEC,
+ .min_len = ETH_ALEN, .optional = true, },
+[TCA_FLOWER_KEY_ETH_SRC_MASK] = { .type = NL_A_UNSPEC,
+  .min_len = ETH_ALEN,
+  .optional = true, },
+[TCA_FLOWER_KEY_ETH_DST_MASK] = { .type = NL_A_UNSPEC,
+  .min_len = ETH_ALEN,
+  .optional = true, },
+[TCA_FLOWER_KEY_ETH_TYPE] = { .type = NL_A_U16, .optional = false, },
+[TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, },
+[TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, },
+[TCA_FLOWER_KEY_IP_PROTO] = { .type = NL_A_U8, .optional = true, },
+[TCA_FLOWER_KEY_IPV4_SRC] = { .type = NL_A_U32, .optional = true, },
+[TCA_FLOWER_KEY_IPV4_DST] = {.type = NL_A_U32, .optional = true, },
+[TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional = true, },
+[TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NL_A_U32, .optional = true, },
+[TCA_FLOWER_KEY_IPV6_SRC] = { .type = NL_A_UNSPEC,
+  .min_len = sizeof(struct in6_addr),
+  .optional = true, },
+[TCA_FLOWER_KEY_IPV6_DST] = { .type = NL_A_UNSPEC,
+  .min_len = sizeof(struct in6_addr),
+  .optional = true, },
+[TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC,
+   .min_len = sizeof(struct in6_addr),
+   .optional = true, },
+[TCA_FLOWER_KEY_IPV6_DST_MASK] = { .type = NL_A_UNSPEC,
+   .min_len = sizeof(struct in6_addr),
+   .optional = true, },
+[TCA_FLOWER_KEY_TCP_SRC] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_TCP_DST] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_UDP_SRC] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_UDP_DST] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_VLAN_ID] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NL_A_U8, .optional = true, },
+[TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NL_A_U32, .optional = true, },
+[TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NL_A_U32, 

[ovs-dev] [PATCH V11 03/33] tc: Refactor tcm handle assignment when creating filter qdisc

2017-06-13 Thread Roi Dayan
Use the available TC macros instead of 0x.

Signed-off-by: Roi Dayan 
Acked-by: Flavio Leitner 
---
 lib/tc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/tc.c b/lib/tc.c
index d3263a2..a71a9e0 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -95,7 +95,7 @@ tc_add_del_ingress_qdisc(int ifindex, bool add)
 int flags = add ? NLM_F_EXCL | NLM_F_CREATE : 0;
 
 tcmsg = tc_make_request(ifindex, type, flags, );
-tcmsg->tcm_handle = tc_make_handle(0x, 0);
+tcmsg->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
 tcmsg->tcm_parent = TC_H_INGRESS;
 nl_msg_put_string(, TCA_KIND, "ingress");
 nl_msg_put_unspec(, TCA_OPTIONS, NULL, 0);
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V11 02/33] tc: Introduce tc module

2017-06-13 Thread Roi Dayan
From: Paul Blakey 

Add tc module to expose tc operations to be used by other modules.
Move some tc related functions from netdev-linux.c to tc.c
This patch doesn't change any functionality.

Signed-off-by: Paul Blakey 
Co-authored-by: Roi Dayan 
Signed-off-by: Roi Dayan 
Acked-by: Joe Stringer 
Acked-by: Flavio Leitner 
---
 lib/automake.mk|   4 +-
 lib/netdev-linux.c | 100 +-
 lib/tc.c   | 114 +
 lib/tc.h   |  33 
 4 files changed, 151 insertions(+), 100 deletions(-)
 create mode 100644 lib/tc.c
 create mode 100644 lib/tc.h

diff --git a/lib/automake.mk b/lib/automake.mk
index f5baba2..0ac4708 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -368,7 +368,9 @@ lib_libopenvswitch_la_SOURCES += \
lib/rtnetlink.c \
lib/rtnetlink.h \
lib/route-table.c \
-   lib/route-table.h
+   lib/route-table.h \
+   lib/tc.c \
+   lib/tc.h
 endif
 
 if DPDK_NETDEV
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index d794453..c8145c6 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -29,9 +29,6 @@
 #include 
 #include 
 #include 
-#include 
-#include 
-#include 
 #include 
 #include 
 #include 
@@ -70,6 +67,7 @@
 #include "openvswitch/shash.h"
 #include "socket-util.h"
 #include "sset.h"
+#include "tc.h"
 #include "timer.h"
 #include "unaligned.h"
 #include "openvswitch/vlog.h"
@@ -434,22 +432,14 @@ static const struct tc_ops *const tcs[] = {
 NULL
 };
 
-static unsigned int tc_make_handle(unsigned int major, unsigned int minor);
-static unsigned int tc_get_major(unsigned int handle);
-static unsigned int tc_get_minor(unsigned int handle);
-
 static unsigned int tc_ticks_to_bytes(unsigned int rate, unsigned int ticks);
 static unsigned int tc_bytes_to_ticks(unsigned int rate, unsigned int size);
 static unsigned int tc_buffer_per_jiffy(unsigned int rate);
 
-static struct tcmsg *tc_make_request(int ifindex, int type,
- unsigned int flags, struct ofpbuf *);
 static struct tcmsg *netdev_linux_tc_make_request(const struct netdev *,
   int type,
   unsigned int flags,
   struct ofpbuf *);
-static int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
-static int tc_add_del_ingress_qdisc(int ifindex, bool add);
 static int tc_add_policer(struct netdev *,
   uint32_t kbits_rate, uint32_t kbits_burst);
 
@@ -4657,44 +4647,6 @@ static double ticks_per_s;
  */
 static unsigned int buffer_hz;
 
-/* Returns tc handle 'major':'minor'. */
-static unsigned int
-tc_make_handle(unsigned int major, unsigned int minor)
-{
-return TC_H_MAKE(major << 16, minor);
-}
-
-/* Returns the major number from 'handle'. */
-static unsigned int
-tc_get_major(unsigned int handle)
-{
-return TC_H_MAJ(handle) >> 16;
-}
-
-/* Returns the minor number from 'handle'. */
-static unsigned int
-tc_get_minor(unsigned int handle)
-{
-return TC_H_MIN(handle);
-}
-
-static struct tcmsg *
-tc_make_request(int ifindex, int type, unsigned int flags,
-struct ofpbuf *request)
-{
-struct tcmsg *tcmsg;
-
-ofpbuf_init(request, 512);
-nl_msg_put_nlmsghdr(request, sizeof *tcmsg, type, NLM_F_REQUEST | flags);
-tcmsg = ofpbuf_put_zeros(request, sizeof *tcmsg);
-tcmsg->tcm_family = AF_UNSPEC;
-tcmsg->tcm_ifindex = ifindex;
-/* Caller should fill in tcmsg->tcm_handle. */
-/* Caller should fill in tcmsg->tcm_parent. */
-
-return tcmsg;
-}
-
 static struct tcmsg *
 netdev_linux_tc_make_request(const struct netdev *netdev, int type,
  unsigned int flags, struct ofpbuf *request)
@@ -4710,56 +4662,6 @@ netdev_linux_tc_make_request(const struct netdev 
*netdev, int type,
 return tc_make_request(ifindex, type, flags, request);
 }
 
-static int
-tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
-{
-int error = nl_transact(NETLINK_ROUTE, request, replyp);
-ofpbuf_uninit(request);
-return error;
-}
-
-/* Adds or deletes a root ingress qdisc on 'netdev'.  We use this for
- * policing configuration.
- *
- * This function is equivalent to running the following when 'add' is true:
- * /sbin/tc qdisc add dev  handle : ingress
- *
- * This function is equivalent to running the following when 'add' is false:
- * /sbin/tc qdisc del dev  handle : ingress
- *
- * The configuration and stats may be seen with the following command:
- * /sbin/tc -s qdisc show dev 
- *
- * Returns 0 if successful, otherwise a positive errno value.
- */
-static int
-tc_add_del_ingress_qdisc(int ifindex, bool add)
-{
-struct ofpbuf request;
-struct tcmsg *tcmsg;
-

[ovs-dev] [patch_v1] conntrack: Reset nat_info in un_nat conns.

2017-06-13 Thread Darrell Ball
Un-nat conns have no nat_info as do default conns.
However, un-nat conns are originally templated from the
corresponding default conns and therefore need to
have their nat_info explicitly nulled.  This
otherwise exposes a double free if conntrack_destroy()
were to be used to destroy the connection tracker.  This
would apply to cleaning the datapath after testing.

Fixes: 286de2729955 ("dpdk: Userspace Datapath: Introduce NAT Support.")
Signed-off-by: Darrell Ball 
---
 lib/conntrack.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 146edd7..90b154a 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -573,6 +573,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
 nc->conn_type == CT_CONN_TYPE_DEFAULT) {
 *nc = *conn_for_un_nat_copy;
 conn_for_un_nat_copy->conn_type = CT_CONN_TYPE_UN_NAT;
+conn_for_un_nat_copy->nat_info = NULL;
 }
 ct_rwlock_unlock(>nat_resources_lock);
 
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V10 05/33] tc: Add tc flower functions

2017-06-13 Thread Flavio Leitner
On Mon, Jun 12, 2017 at 05:57:37PM +0300, Roi Dayan wrote:
> 
> 
> On 09/06/2017 21:37, Flavio Leitner wrote:
> > On Thu, Jun 08, 2017 at 02:46:22PM +0300, Roi Dayan wrote:
> > > Add tc helper functions to query and manipulate the flower classifier.
> > > 
> > > Signed-off-by: Paul Blakey 
> > > Co-authored-by: Roi Dayan 
> > > Signed-off-by: Roi Dayan 
> > 
> > Acked-by: Flavio Leitner 
> > 
> > Not sure why SCTP wasn't implemented, but not a blocker either.
> 
> 
> Hi Flavio,
> I didn't add it because later needed to spread changes across more
> commits to support it to the end. planned to do it in a later
> commit after the series.
> All the other changes took me long enough and I wanted to minimize the
> wait.

OK, it seems that at this point we could merge and let others start to
contribute as well.

fbl


> 
> > 
> > > --- a/lib/tc.c
> > > +++ b/lib/tc.c
> > [...]
> > > +
> > > +#define JIFFIES_TO_MS(x) (x * 10)
> > > +};
> > 
> > Thanks for fixing this
> > 
> > > +static void
> > > +nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower)
> > > +{
> > > +flower->lastused = time_msec() - JIFFIES_TO_MS(tm->lastuse);
> > > +}
> > > +
> > 
> > And this, much better.
> > > +bs = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs);
> > > +put_32aligned_u64(>n_packets, bs->packets);
> > > +put_32aligned_u64(>n_bytes, bs->bytes);
> > 
> > 
> > > +int
> > > +tc_dump_flower_start(int ifindex, struct nl_dump *dump)
> > > +{
> > > +struct ofpbuf request;
> > > +struct tcmsg *tcmsg;
> > > +
> > > +tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_DUMP, 
> > > );
> > > +tcmsg->tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
> > 
> > But that went in an opposite direction of the previous patch because
> > it is used like 5 times and it could be a define in tc.h leaving the TC
> > details hidden in there.
> > 
> 
> right. planned to do it but somehow skipped it. I'll be happy
> to update this if needed for this series or in a later commit.
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

-- 
Flavio

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [patch_v2 3/3] conntrack: Add hash_finish() to conn_key_hash().

2017-06-13 Thread Fischetti, Antonio
Hi Darrell,
it seems in lib/hash.h there's already a hash_finish() function for the 
Intrinsic mode where the 1st parm is a uint64_t:
static inline uint32_t hash_finish(uint64_t hash, uint64_t final)

so I'm getting some errors when building with CFLAGS="-O2 -march=native -g" 

lib/hash.h:180:24: error: conflicting types for 'hash_finish'
 static inline uint32_t hash_finish(uint64_t hash, uint64_t final)

lib/hash.h:95:24: note: previous declaration of 'hash_finish' was here
 static inline uint32_t hash_finish(uint32_t hash, uint32_t final);


Antonio

> -Original Message-
> From: ovs-dev-boun...@openvswitch.org [mailto:ovs-dev-
> boun...@openvswitch.org] On Behalf Of Darrell Ball
> Sent: Friday, June 9, 2017 11:31 PM
> To: d...@openvswitch.org
> Subject: [ovs-dev] [patch_v2 3/3] conntrack: Add hash_finish() to
> conn_key_hash().
> 
> The function conn_key_hash() is updated to include
> a call to hash_finish() and also to make use of a
> new hash abstraction - ct_endpoint_hash_add().
> 
> Fixes: a489b16854b5 ("conntrack: New userspace connection tracker.")
> Signed-off-by: Darrell Ball 
> ---
>  lib/conntrack.c | 10 +++---
>  1 file changed, 3 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 9584a0a..146edd7 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -1529,14 +1529,10 @@ static uint32_t
>  conn_key_hash(const struct conn_key *key, uint32_t basis)
>  {
>  uint32_t hsrc, hdst, hash;
> -int i;
> 
>  hsrc = hdst = basis;
> -
> -for (i = 0; i < sizeof(key->src) / sizeof(uint32_t); i++) {
> -hsrc = hash_add(hsrc, ((uint32_t *) >src)[i]);
> -hdst = hash_add(hdst, ((uint32_t *) >dst)[i]);
> -}
> +hsrc = ct_endpoint_hash_add(hsrc, >src);
> +hdst = ct_endpoint_hash_add(hdst, >dst);
> 
>  /* Even if source and destination are swapped the hash will be the
> same. */
>  hash = hsrc ^ hdst;
> @@ -1546,7 +1542,7 @@ conn_key_hash(const struct conn_key *key, uint32_t
> basis)
>(uint32_t *) (key + 1) - (uint32_t *) (>dst +
> 1),
>hash);
> 
> -return hash;
> +return hash_finish(hash, 0);
>  }
> 
>  static void
> --
> 1.9.1
> 
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] byte-order: Fix undefined behavior of BYTES_TO_BE32.

2017-06-13 Thread Lance Richardson
> From: "Ben Pfaff" 
> To: d...@openvswitch.org
> Cc: "Ben Pfaff" , "Lance Richardson" 
> Sent: Tuesday, 13 June, 2017 12:51:14 AM
> Subject: [PATCH] byte-order: Fix undefined behavior of BYTES_TO_BE32.
> 
> A left shift that would produce a result that is not representable
> by the type of the expression's result has "undefined behavior"
> according to the C language standard. Avoid this by casting values
> that could set the upper bit to unsigned types.
> 
> Also document and convert a macro to a function.
> 
> While we're at it, delete the unused macro BE16S_TO_BE32.
> 
> Found via gcc's undefined behavior sanitizer.
> 
> Reported-by: Lance Richardson 
> Signed-off-by: Ben Pfaff 
> ---
>  lib/byte-order.h | 21 +
>  lib/flow.c   |  2 +-
>  2 files changed, 14 insertions(+), 9 deletions(-)
> 

Looks good.

Acked-by: Lance Richardson 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 7/8] netdev-dpdk: Configurable retries while enqueuing to vHost User ports.

2017-06-13 Thread Kevin Traynor
On 06/07/2017 10:21 AM, Bhanuprakash Bodireddy wrote:
> This commit adds "vhost-enque-retry" where in the number of retries
> performed while enqueuing packets to vHostUser ports can be configured
> in ovsdb.
> 
> Currently number of retries are set to '8' and a retry is performed
> when atleast some packets have been successfully sent on previous attempt.
> While this approach works well, it causes throughput drop when multiple
> vHost User ports are servied by same PMD thread.

Hi Bhanu,

You are saying the approach works well but you are changing the default
behaviour. It would be good to explain a bit more about the negative
effects of changing the default and compare that against the positive
effects, so everyone gets a balanced view. If you have measurements that
would be even better.

Kevin.

> 
> This commit by default disables retry mechanism and if retry logic needed
> the number of retries can be set in ovsdb. For example if a maximum of
> 3 retries has to be performed with atleast some pkts successfully
> enqueued in previous attempt, set below:
> 
>   $ ovs-vsctl set Open_vSwitch . other_config:vhost-enque-retry=3
> 
> CC: Kevin Traynor 
> Signed-off-by: Bhanuprakash Bodireddy 
> Signed-off-by: Antonio Fischetti 
> Co-authored-by: Antonio Fischetti 
> ---
>  lib/dpdk.c   | 10 ++
>  lib/dpdk.h   |  1 +
>  lib/netdev-dpdk.c|  4 ++--
>  vswitchd/vswitch.xml | 12 
>  4 files changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/dpdk.c b/lib/dpdk.c
> index 8da6c32..77c8274 100644
> --- a/lib/dpdk.c
> +++ b/lib/dpdk.c
> @@ -41,6 +41,7 @@ VLOG_DEFINE_THIS_MODULE(dpdk);
>  static FILE *log_stream = NULL;   /* Stream for DPDK log redirection */
>  
>  static char *vhost_sock_dir = NULL;   /* Location of vhost-user sockets */
> +static int vhost_enq_retries_num = 0;
>  
>  static int
>  process_vhost_flags(char *flag, const char *default_val, int size,
> @@ -345,6 +346,9 @@ dpdk_init__(const struct smap *ovs_other_config)
>  vhost_sock_dir = sock_dir_subcomponent;
>  }
>  
> +vhost_enq_retries_num = smap_get_int(ovs_other_config,
> +  "vhost-enque-retry", 0);
> +
>  argv = grow_argv(, 0, 1);
>  argc = 1;
>  argv[0] = xstrdup(ovs_get_program_name());
> @@ -489,3 +493,9 @@ dpdk_set_lcore_id(unsigned cpu)
>  ovs_assert(cpu != NON_PMD_CORE_ID);
>  RTE_PER_LCORE(_lcore_id) = cpu;
>  }
> +
> +int
> +dpdk_get_vhost_retries(void)
> +{
> +return vhost_enq_retries_num;
> +}
> diff --git a/lib/dpdk.h b/lib/dpdk.h
> index 673a1f1..9bbd49c 100644
> --- a/lib/dpdk.h
> +++ b/lib/dpdk.h
> @@ -35,5 +35,6 @@ struct smap;
>  void dpdk_init(const struct smap *ovs_other_config);
>  void dpdk_set_lcore_id(unsigned cpu);
>  const char *dpdk_get_vhost_sock_dir(void);
> +int dpdk_get_vhost_retries(void);
>  
>  #endif /* dpdk.h */
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 765718e..a092412 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -146,7 +146,6 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / 
> ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF))
>  /* DPDK library uses uint8_t for port_id. */
>  typedef uint8_t dpdk_port_t;
>  
> -#define VHOST_ENQ_RETRY_NUM 8
>  #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
>  
>  static const struct rte_eth_conf port_conf = {
> @@ -1727,6 +1726,7 @@ netdev_dpdk_vhost_tx_burst(struct netdev_dpdk *dev, int 
> qid)
>  
>  int tx_vid = netdev_dpdk_get_vid(dev);
>  int tx_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
> +int vhost_retries = dpdk_get_vhost_retries();
>  uint32_t sent = 0;
>  uint32_t retries = 0;
>  uint32_t sum, total_pkts;
> @@ -1745,7 +1745,7 @@ netdev_dpdk_vhost_tx_burst(struct netdev_dpdk *dev, int 
> qid)
>  /* 'sum; packet have to be retransmitted */
>  sum -= ret;
>  }
> -} while (sum && (retries++ < VHOST_ENQ_RETRY_NUM));
> +} while (sum && (retries++ < vhost_retries));
>  
>  for (int i=0; i < total_pkts; i++) {
>  dp_packet_delete(txq->pkts[i]);
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index 892f839..f19fa03 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -307,6 +307,18 @@
>  
>
>  
> +   +  type='{"type": "integer", "minInteger": 0, "maxInteger": 8}'>
> +
> +  Specifies the number of retries performed while enqueuing packets
> +  on to the vhost user ports. If this value is unset, no retries by
> +  default is performed on the enqueue side.
> +
> +
> +  Changing this value requires restarting the daemon.
> +
> +  
> +
>type='{"type": "integer", "minInteger": 1}'>
>  
> 

___
dev mailing list
d...@openvswitch.org

Re: [ovs-dev] [PATCH 0/8] netdev-dpdk: Use intermediate queue during packet transmission.

2017-06-13 Thread Eelco Chaudron

Hi Bhanu,

Went over the full patch set, and the changes look good to me.
All my previous concerns are addressed, and therefore I'm acking this 
series.


I do have one small remark regarding the dpdk_tx_queue struct, see 
individual

patch email.

Here are some numbers with this patch on a none tuned system, single run.
This just to make sure we still benefit with both patches applied.

Throughput for PV scenario, with 64 byte packets

Number
flows   MASTER With PATCH
=====
  10  4,531,4247,884,607
  32  3,137,3006,367,643
  50  2,552,7256,649,985
 100  2,473,8355,876,677
 500  2,308,8405,265,986
1000  2,380,7555,001,081


Throughput for PVP scenario, with 64 byte packets

Number
flows   MASTER With PATCH
=====
  10  2,309,2543,800,747
  32  1,626,3803,324,561
  50  1,538,8793,092,792
 100  1,429,0282,887,488
 500  1,271,7732,537,624
1000  1,268,4302,442,405

Latency test

 MASTER
 ===
 Pkt size  min(ns)  avg(ns)  max(ns)
  512  9,94712,381   264,131
 1024  7,662 9,445   194,463
 1280  7,790 9,115   196,059
 1518  8,103 9,599   197,646

 PATCH
 =
 Pkt size  min(ns)  avg(ns)  max(ns)
  512  10,195   12,551   199,699
 1024  7,838 9,612   206,378
 1280  8,151 9,575   187,848
 1518  8,095 9,643   198,552


Throughput for PP scenario, with 64 byte packets:

Number
flows   MASTER With PATCH
=====
  10  7,430,6168,853,037
  32  4,770,1906,774,006
  50  4,736,2597,336,776
 100  4,699,2376,146,151
 500  3,870,0195,242,781
1000  3,853,8835,121,911


Latency test

 MASTER
 ===
 Pkt size  min(ns)  avg(ns)  max(ns)
  512  4,8875,596165,246
 1024  5,8016,447170,842
 1280  6,3557,056159,056
 1518  6,8607,634160,860

 PATCH
 =
 Pkt size  min(ns)  avg(ns)  max(ns)
  512  4,7835,521158,134
 1024  5,8016,359170,859
 1280  6,3156,878150,301
 1518  6,5797,398143,068


Acked-by: Eelco Chaudron 



On 07/06/17 11:20, Bhanuprakash Bodireddy wrote:

After packet classification, packets are queued in to batches depending
on the matching netdev flow. Thereafter each batch is processed to
execute the related actions. This becomes particularly inefficient if
there are few packets in each batch as rte_eth_tx_burst() incurs expensive
MMIO writes.

This patch series implements intermediate queue for DPDK and vHost User ports.
Packets are queued and burst when the packet count exceeds threshold. Also
drain logic is implemented to handle cases where packets can get stuck in
the tx queues at low rate traffic conditions. Care has been taken to see
that latency is well with in the acceptable limits. Testing shows significant
performance gains with this implementation.

This path series combines the earlier 2 patches posted below.
   DPDK patch: 
https://mail.openvswitch.org/pipermail/ovs-dev/2017-April/331039.html
   vHost User patch: 
https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/332271.html

Also this series proposes to disable the retries on vHost User ports and make it
configurable via ovsdb.(controversial?)

Performance Numbers with intermediate queue:

   DPDK ports
  ===

   Throughput for P2P scenario, for two 82599ES 10G port with 64 byte packets

   Number
   flows   MASTER With PATCH
   ====
 10   1072728313393844
 32704225311228799
 507515491 9607791
1005838699 9430730
5005285066 7845807
   10005226477 7135601

Latency test

MASTER
===
Pkt size  min(ns)  avg(ns)  max(ns)
 512  4,631  5,022309,914
1024  5,545  5,749104,294
1280  5,978  6,159 45,306
1518  6,419  6,774946,850

PATCH
=
Pkt size  min(ns)  avg(ns)  max(ns)
 512  4,711  5,064182,477
1024  5,601  5,888701,654
1280  6,018  6,491533,037
1518  6,467  6,734312,471

vHost User ports
   ==

   Throughput for PV scenario, with 64 byte packets

Number
flows   MASTERWith PATCH
     =   =
 105945899 7833914
 323872211 6530133
 503283713 6618711
1003132540 5857226
5002964499 5273006
   10002931952 5178038

   Latency test.

   MASTER
   ===
   Pkt size  min(ns)  avg(ns)  max(ns)
512  10,011   12,100   281,915
   1024   7,8709,313   193,116
   1280   7,8629,036   194,439
   1518   8,2159,417   

Re: [ovs-dev] [PATCH 5/8] netdev-dpdk: Add netdev_dpdk_vhost_txq_drain function.

2017-06-13 Thread Eelco Chaudron

On 07/06/17 11:21, Bhanuprakash Bodireddy wrote:

Add netdev_dpdk_vhost_txq_drain(), that flushes packets on vHost User
port queues. Also add netdev_dpdk_vhost_tx_burst() function that
uses rte_vhost_enqueue_burst() to enqueue burst of packets on vHost User
ports.

Signed-off-by: Bhanuprakash Bodireddy 
Signed-off-by: Antonio Fischetti 
Co-authored-by: Antonio Fischetti 
---
  lib/netdev-dpdk.c | 67 +--
  1 file changed, 65 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 4a9d9aa..dfaa3cd 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -308,10 +308,15 @@ struct dpdk_tx_queue {
  * to enabled by guest. */
  int count; /* Number of buffered packets waiting to
be sent. */
+int vhost_pkt_cnt; /* Number of buffered packets waiting to
+  be sent on vhost port */
  struct rte_mbuf *burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD];
 /* Intermediate queue where packets can
  * be buffered to amortize the cost of MMIO
  * writes. */
+struct dp_packet *pkts[INTERIM_QUEUE_BURST_THRESHOLD];
+   /* Intermediate queue where packets can
+
Does it make sense to create a union for buffers and count, as the type 
is not shared on the given port?
We also need some consistent naming, i.e count vs vhost_pkt_cnt, and 
burst_pkts vs pkts.



   * be buffered for vhost ports */
  };
  
  /* dpdk has no way to remove dpdk ring ethernet devices

@@ -1714,6 +1719,63 @@ netdev_dpdk_vhost_update_tx_counters(struct netdev_stats 
*stats,
  }
  }
  
+static int

+netdev_dpdk_vhost_tx_burst(struct netdev_dpdk *dev, int qid)
+{
+struct dpdk_tx_queue *txq = >tx_q[qid];
+struct rte_mbuf **cur_pkts = (struct rte_mbuf **)txq->pkts;
+
+int tx_vid = netdev_dpdk_get_vid(dev);
+int tx_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
+uint32_t sent = 0;
+uint32_t retries = 0;
+uint32_t sum, total_pkts;
+
+total_pkts = sum = txq->vhost_pkt_cnt;
+do {
+uint32_t ret;
+ret = rte_vhost_enqueue_burst(tx_vid, tx_qid, _pkts[sent], sum);
+if (OVS_UNLIKELY(!ret)) {
+/* No packets enqueued - do not retry. */
+break;
+} else {
+/* Packet have been sent */
+sent += ret;
+
+/* 'sum; packet have to be retransmitted */
+sum -= ret;
+}
+} while (sum && (retries++ < VHOST_ENQ_RETRY_NUM));
+
+for (int i=0; i < total_pkts; i++) {
+dp_packet_delete(txq->pkts[i]);
+}
+
+/* Reset pkt count */
+txq->vhost_pkt_cnt = 0;
+
+/* 'sum' refers to packets dropped */
+return sum;
+}
+
+/* Drain the txq if there are any packets available.
+ * dynamic_txqs/concurrent_txq is disabled for vHost User ports as
+ * 'OVS_VHOST_MAX_QUEUE_NUM[1024]' txqs are preallocated.
+ */
+static int
+netdev_dpdk_vhost_txq_drain(struct netdev *netdev, int qid,
+bool concurrent_txq OVS_UNUSED)
+{
+struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+struct dpdk_tx_queue *txq = >tx_q[qid];
+
+if (OVS_LIKELY(txq->vhost_pkt_cnt)) {
+netdev_dpdk_vhost_tx_burst(dev, qid);
+}
+
+return 0;
+}
+
  static void
  __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
   struct dp_packet **pkts, int cnt)
@@ -3425,7 +3487,8 @@ static const struct netdev_class dpdk_vhost_class =
  NULL,
  netdev_dpdk_vhost_reconfigure,
  netdev_dpdk_vhost_rxq_recv,
-NULL);
+netdev_dpdk_vhost_txq_drain);
+
  static const struct netdev_class dpdk_vhost_client_class =
  NETDEV_DPDK_CLASS(
  "dpdkvhostuserclient",
@@ -3441,7 +3504,7 @@ static const struct netdev_class dpdk_vhost_client_class =
  NULL,
  netdev_dpdk_vhost_client_reconfigure,
  netdev_dpdk_vhost_rxq_recv,
-NULL);
+netdev_dpdk_vhost_txq_drain);
  
  void

  netdev_dpdk_register(void)



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [RFC] treewide: undefined behavior, passing null in nonnull parameters

2017-06-13 Thread Lance Richardson
> From: "Ben Pfaff" 
> To: "Lance Richardson" 
> Cc: d...@openvswitch.org
> Sent: Tuesday, 13 June, 2017 1:06:12 AM
> Subject: Re: [ovs-dev] [RFC] treewide: undefined behavior, passing null in 
> nonnull parameters
> 
> On Mon, Jun 12, 2017 at 08:06:01PM -0400, Lance Richardson wrote:
> > Eliminate a number of instances of undefined behavior related to
> > passing NULL in parameters having "nonnull" annotations.
> > 
> > Found with gcc's undefined behavior sanitizer.
> > 
> > Signed-off-by: Lance Richardson 
> > ---
> > 
> > Posting this as RFC because there is no apparent risk of
> > unwanted compiler optimizations related to undefined behavior
> > in existing code. The main value in fixing these issues is
> > in reducing noise to make it easier to find problematic
> > cases in the future.
> > 
> > Here is a small example of the type of unwanted optimization
> > to be concerned about:
> > 
> > test1a.c:
> > 
> > #include 
> > 
> > extern void foo(char*, size_t);
> > 
> > int main(int argc, char **argv)
> > {
> > char x[128];
> > 
> > foo(x, sizeof x);
> > foo(NULL, 0);
> > 
> > return 0;
> > }
> > 
> > test1b.c:
> > 
> > #include 
> > #include 
> > 
> > void foo(char *bar, size_t len)
> > {
> > memset(bar, 0, len);
> > 
> > if (bar)
> > printf("bar is non-null: %p\n", bar);
> > }
> > 
> > Compile and run:
> > gcc -o test -O2 test1a.c test1b.c
> > ./test
> > 
> > Output (second line might be a bit of a surprise):
> > bar is non-null: 0x7fff80f90d50
> > bar is non-null: (nil)
> 
> Hmm.  That is surprising.
> 
> > diff --git a/lib/netlink.c b/lib/netlink.c
> > index 3da22a1..fcad884 100644
> > --- a/lib/netlink.c
> > +++ b/lib/netlink.c
> > @@ -241,7 +241,12 @@ void
> >  nl_msg_put_unspec(struct ofpbuf *msg, uint16_t type,
> >const void *data, size_t size)
> >  {
> > -memcpy(nl_msg_put_unspec_uninit(msg, type, size), data, size);
> > +void *ptr;
> > +
> > +ptr = nl_msg_put_unspec_uninit(msg, type, size);
> > +if (size) {
> > +memcpy(ptr, data, size);
> > +}
> >  }
> 
> I guess the above is above null 'data', since 'ptr' should always be
> nonnull.  In that case, it seems reasonable.
> 
> >  /* Appends a Netlink attribute of the given 'type' and no payload to
> >  'msg'.
> > diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
> > index 3019c4a..2e71fed 100644
> > --- a/lib/ofpbuf.c
> > +++ b/lib/ofpbuf.c
> > @@ -375,7 +375,9 @@ void *
> >  ofpbuf_put_zeros(struct ofpbuf *b, size_t size)
> >  {
> >  void *dst = ofpbuf_put_uninit(b, size);
> > -memset(dst, 0, size);
> > +if (size) {
> > +memset(dst, 0, size);
> > +}
> >  return dst;
> >  }
> 
> In the above, when is dst null?  It looks to me like ofpbuf_put_uninit()
> always returns nonnull.
> 

Looks like it could return NULL if called with b->data = NULL, b->size = 0, and
size = 0. Seems odd to want to append no zero bytes to an empty buffer, but it
apparently happens while running the unit tests.

> > diff --git a/lib/svec.c b/lib/svec.c
> > index aad04e3..297a60c 100644
> > --- a/lib/svec.c
> > +++ b/lib/svec.c
> > @@ -127,7 +127,9 @@ compare_strings(const void *a_, const void *b_)
> >  void
> >  svec_sort(struct svec *svec)
> >  {
> > -qsort(svec->names, svec->n, sizeof *svec->names, compare_strings);
> > +if (svec->n) {
> > +qsort(svec->names, svec->n, sizeof *svec->names, compare_strings);
> > +}
> >  }
> 
> This one in svec_sort() looks good to me.
> 
> >  void
> > diff --git a/lib/util.c b/lib/util.c
> > index b2a1f8a..ddf8546 100644
> > --- a/lib/util.c
> > +++ b/lib/util.c
> > @@ -132,7 +132,9 @@ void *
> >  xmemdup(const void *p_, size_t size)
> >  {
> >  void *p = xmalloc(size);
> > -memcpy(p, p_, size);
> > +if (size) {
> > +memcpy(p, p_, size);
> > +}
> >  return p;
> >  }
> 
> I guess that the above must be about a null 'p_' parameter?  xmalloc()
> never returns null.
> 
> Maybe we should invent a nullable_memcpy() helper:
> 
> /* The C standards say that neither the 'dst' nor 'src' argument to
>  * memcpy() may be null, even if 'n' is zero.  This wrapper tolerates
>  * the null case. */
> static inline void
> nullable_memcpy(void *dst, const void *src, size_t n)
> {
> if (n) {
> memcpy(dst, src, n);
> }
> }
> 

Makes sense, ditto for a nullable_memset().

Thanks,

   Lance
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V10 05/33] tc: Add tc flower functions

2017-06-13 Thread Roi Dayan



On 08/06/2017 14:46, Roi Dayan wrote:

Add tc helper functions to query and manipulate the flower classifier.

Signed-off-by: Paul Blakey 
Co-authored-by: Roi Dayan 
Signed-off-by: Roi Dayan 
---
 lib/tc.c | 989 +++
 lib/tc.h | 103 +++
 2 files changed, 1092 insertions(+)

diff --git a/lib/tc.c b/lib/tc.c
index 1f12e4a..30ece84 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Nicira, 
Inc.
+ * Copyright (c) 2016 Mellanox Technologies, Ltd.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,13 +18,26 @@
 #include 
 #include "tc.h"
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "byte-order.h"
 #include "netlink-socket.h"
 #include "netlink.h"
 #include "openvswitch/ofpbuf.h"
 #include "openvswitch/vlog.h"
+#include "timeval.h"
+#include 

 VLOG_DEFINE_THIS_MODULE(tc);

+static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
+
 struct tcmsg *
 tc_make_request(int ifindex, int type, unsigned int flags,
 struct ofpbuf *request)
@@ -91,3 +105,978 @@ tc_add_del_ingress_qdisc(int ifindex, bool add)

 return 0;
 }
+
+static const struct nl_policy tca_policy[] = {
+[TCA_KIND] = { .type = NL_A_STRING, .optional = false, },
+[TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false, },
+[TCA_STATS] = { .type = NL_A_UNSPEC,
+.min_len = sizeof(struct tc_stats), .optional = true, },
+[TCA_STATS2] = { .type = NL_A_NESTED, .optional = true, },
+};
+
+static const struct nl_policy tca_flower_policy[] = {
+[TCA_FLOWER_CLASSID] = { .type = NL_A_U32, .optional = true, },
+[TCA_FLOWER_INDEV] = { .type = NL_A_STRING, .max_len = IFNAMSIZ,
+   .optional = true, },
+[TCA_FLOWER_KEY_ETH_SRC] = { .type = NL_A_UNSPEC,
+ .min_len = ETH_ALEN, .optional = true, },
+[TCA_FLOWER_KEY_ETH_DST] = { .type = NL_A_UNSPEC,
+ .min_len = ETH_ALEN, .optional = true, },
+[TCA_FLOWER_KEY_ETH_SRC_MASK] = { .type = NL_A_UNSPEC,
+  .min_len = ETH_ALEN,
+  .optional = true, },
+[TCA_FLOWER_KEY_ETH_DST_MASK] = { .type = NL_A_UNSPEC,
+  .min_len = ETH_ALEN,
+  .optional = true, },
+[TCA_FLOWER_KEY_ETH_TYPE] = { .type = NL_A_U16, .optional = false, },
+[TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, },
+[TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, },
+[TCA_FLOWER_KEY_IP_PROTO] = { .type = NL_A_U8, .optional = true, },
+[TCA_FLOWER_KEY_IPV4_SRC] = { .type = NL_A_U32, .optional = true, },
+[TCA_FLOWER_KEY_IPV4_DST] = {.type = NL_A_U32, .optional = true, },
+[TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional = true, },
+[TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NL_A_U32, .optional = true, },
+[TCA_FLOWER_KEY_IPV6_SRC] = { .type = NL_A_UNSPEC,
+  .min_len = sizeof(struct in6_addr),
+  .optional = true, },
+[TCA_FLOWER_KEY_IPV6_DST] = { .type = NL_A_UNSPEC,
+  .min_len = sizeof(struct in6_addr),
+  .optional = true, },
+[TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC,
+   .min_len = sizeof(struct in6_addr),
+   .optional = true, },
+[TCA_FLOWER_KEY_IPV6_DST_MASK] = { .type = NL_A_UNSPEC,
+   .min_len = sizeof(struct in6_addr),
+   .optional = true, },
+[TCA_FLOWER_KEY_TCP_SRC] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_TCP_DST] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_UDP_SRC] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_UDP_DST] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_VLAN_ID] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NL_A_U8, .optional = true, },
+[TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NL_A_U16, .optional = true, },
+[TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NL_A_U32, .optional = true, },
+[TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NL_A_U32, .optional = true, },
+

Re: [ovs-dev] [PATCH V10 15/33] netdev-tc-offloads: Implement netdev flow dump api using tc interface

2017-06-13 Thread Roi Dayan



On 08/06/2017 14:46, Roi Dayan wrote:

From: Paul Blakey 

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
Reviewed-by: Simon Horman 
---
 lib/netdev-tc-offloads.c | 186 ---
 1 file changed, 177 insertions(+), 9 deletions(-)

diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index 0786048..4b14c5c 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -150,7 +150,7 @@ get_ufid_tc_mapping(const ovs_u128 *ufid, int *prio, struct 
netdev **netdev)
  *
  * Returns true on success.
  */
-static bool OVS_UNUSED
+static bool
 find_ufid(int prio, int handle, struct netdev *netdev, ovs_u128 *ufid)
 {
 int ifindex = netdev_get_ifindex(netdev);
@@ -188,9 +188,20 @@ int
 netdev_tc_flow_dump_create(struct netdev *netdev,
struct netdev_flow_dump **dump_out)
 {
-struct netdev_flow_dump *dump = xzalloc(sizeof *dump);
+struct netdev_flow_dump *dump;
+int ifindex;
+
+ifindex = netdev_get_ifindex(netdev);
+if (ifindex < 0) {
+VLOG_ERR_RL(_rl, "failed to get ifindex for %s: %s",
+netdev_get_name(netdev), ovs_strerror(-ifindex));
+return -ifindex;
+}

+dump = xzalloc(sizeof *dump);
+dump->nl_dump = xzalloc(sizeof *dump->nl_dump);
 dump->netdev = netdev_ref(netdev);
+tc_dump_flower_start(ifindex, dump->nl_dump);

 *dump_out = dump;

@@ -200,21 +211,178 @@ netdev_tc_flow_dump_create(struct netdev *netdev,
 int
 netdev_tc_flow_dump_destroy(struct netdev_flow_dump *dump)
 {
+nl_dump_done(dump->nl_dump);
 netdev_close(dump->netdev);
+free(dump->nl_dump);
 free(dump);
+return 0;
+}
+
+static int
+parse_tc_flower_to_match(struct tc_flower *flower,
+ struct match *match,
+ struct nlattr **actions,
+ struct dpif_flow_stats *stats,
+ struct ofpbuf *buf) {
+size_t act_off;
+struct tc_flower_key *key = >key;
+struct tc_flower_key *mask = >mask;
+odp_port_t outport = 0;
+
+if (flower->ifindex_out) {
+outport = netdev_ifindex_to_odp_port(flower->ifindex_out);
+if (!outport) {
+return ENOENT;
+}
+}
+
+ofpbuf_clear(buf);
+
+match_init_catchall(match);
+match_set_dl_type(match, key->eth_type);
+match_set_dl_src_masked(match, key->src_mac, mask->src_mac);
+match_set_dl_dst_masked(match, key->dst_mac, mask->dst_mac);
+if (key->vlan_id || key->vlan_prio) {


we should probably check if key->eth_type is vlan eth type.


+match_set_dl_vlan(match, htons(key->vlan_id));
+match_set_dl_vlan_pcp(match, key->vlan_prio);
+match_set_dl_type(match, key->encap_eth_type);
+}
+
+if (key->ip_proto &&
+(key->eth_type == htons(ETH_P_IP)
+ || key->eth_type == htons(ETH_P_IPV6))) {


we missed here. key is flower key and for vlan this will be vlan eth
type so we need to compare to match->dl_type which is little up
being set to the encap eth type if in vlan.


+match_set_nw_proto(match, key->ip_proto);
+}
+
+match_set_nw_src_masked(match, key->ipv4.ipv4_src, mask->ipv4.ipv4_src);
+match_set_nw_dst_masked(match, key->ipv4.ipv4_dst, mask->ipv4.ipv4_dst);
+
+match_set_ipv6_src_masked(match,
+  >ipv6.ipv6_src, >ipv6.ipv6_src);
+match_set_ipv6_dst_masked(match,
+  >ipv6.ipv6_dst, >ipv6.ipv6_dst);
+
+match_set_tp_dst_masked(match, key->dst_port, mask->dst_port);
+match_set_tp_src_masked(match, key->src_port, mask->src_port);
+
+if (flower->tunnel.tunnel) {
+match_set_tun_id(match, flower->tunnel.id);
+if (flower->tunnel.ipv4.ipv4_dst) {
+match_set_tun_src(match, flower->tunnel.ipv4.ipv4_src);
+match_set_tun_dst(match, flower->tunnel.ipv4.ipv4_dst);
+} else if (!is_all_zeros(>tunnel.ipv6.ipv6_dst,
+   sizeof flower->tunnel.ipv6.ipv6_dst)) {
+match_set_tun_ipv6_src(match, >tunnel.ipv6.ipv6_src);
+match_set_tun_ipv6_dst(match, >tunnel.ipv6.ipv6_dst);
+}
+if (flower->tunnel.tp_dst) {
+match_set_tun_tp_dst(match, flower->tunnel.tp_dst);
+}
+}
+
+act_off = nl_msg_start_nested(buf, OVS_FLOW_ATTR_ACTIONS);
+{
+if (flower->vlan_pop) {
+nl_msg_put_flag(buf, OVS_ACTION_ATTR_POP_VLAN);
+}
+
+if (flower->vlan_push_id || flower->vlan_push_prio) {
+struct ovs_action_push_vlan *push;
+push = nl_msg_put_unspec_zero(buf, OVS_ACTION_ATTR_PUSH_VLAN,
+  sizeof *push);
+
+push->vlan_tpid = htons(ETH_TYPE_VLAN);
+push->vlan_tci = htons(flower->vlan_push_id
+   | (flower->vlan_push_prio << 13)

Re: [ovs-dev] [RFC PATCH v2 03/19] Keepalive: Add initial keepalive support.

2017-06-13 Thread Bodireddy, Bhanuprakash
>Hi Bhanu,
>
>Bhanuprakash Bodireddy  writes:
>
>> This commit introduces the initial keepalive support by adding
>> 'keepalive' module and also helper and initialization functions that
>> will be invoked by later commits.
>>
>> This commit adds new ovsdb column "keepalive". It shows the overall
>> datapath status and the health of the cores running datapath threads.
>>
>> For eg:
>>   To enable keepalive feature.
>>   'ovs-vsctl --no-wait set Open_vSwitch . other_config:enable-
>keepalive=true'
>>
>>   To set timer interval of 5000ms for monitoring packet processing cores;
>>   'ovs-vsctl --no-wait set Open_vSwitch . \
>>  other_config:keepalive-interval="5000"
>>
>>   To set shared memory block name where the events shall be updated
>>   'ovs-vsctl --no-wait set Open_vSwitch .
>>  other_config:keepalive-shm-name="/ovs_keepalive_shm_name"'
>>
>> Signed-off-by: Bhanuprakash Bodireddy
>> 
>> ---
>
>Please drop the shm from this in a future spin.  I could break the internal 
>state
>quite easily (like the very first torturous thing I
>did) by dumping /dev/urandom to the shared memory file.  I could very easily
>craft something that will keep this shared memory in a bad state.
>
>In fact, I haven't even tried to do things like overwrite existing shared 
>memory
>objects (should be possible, and will break other projects, ecryptfs is the 
>first
>that comes to mind).  It would be better to just drop it.

Hi Aaron,

I agree with you.  I have already started working on the code to get rid of SHM.
I will wait for few more days to see if there is any more feedback on the 
remaining patches and will send out new series(v3 - without SHM implementation).

Bhanuprakash.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v6] dpif-netdev: Assign ports to pmds on non-local numa node.

2017-06-13 Thread O Mahony, Billy
Hi All,

Does anyone else have any comments on this patch?

I'm adding Ilya and Jan to the CC as I believe you both had comments on this 
previously. Apologies if I've forgotten anyone else that commented from the CC!

Regards,
/Billy

> -Original Message-
> From: Stokes, Ian
> Sent: Thursday, May 11, 2017 12:09 PM
> To: O Mahony, Billy ; d...@openvswitch.org
> Subject: RE: [ovs-dev] [PATCH v6] dpif-netdev: Assign ports to pmds on non-
> local numa node.
> 
> > Previously if there is no available (non-isolated) pmd on the numa
> > node for a port then the port is not polled at all. This can result in
> > a non- operational system until such time as nics are physically
> > repositioned. It is preferable to operate with a pmd on the 'wrong'
> > numa node albeit with lower performance. Local pmds are still chosen
> when available.
> >
> > Signed-off-by: Billy O'Mahony 
> > ---
> > v6: Change 'port' to 'queue' in a warning msg
> > v5: Fix warning msg; Update same in docs
> > v4: Fix a checkpatch error
> > v3: Fix warning messages not appearing when using multiqueue
> > v2: Add details of warning messages into docs
> >
> >  Documentation/intro/install/dpdk.rst | 10 +
> >  lib/dpif-netdev.c| 43
> > +++-
> >  2 files changed, 48 insertions(+), 5 deletions(-)
> >
> > diff --git a/Documentation/intro/install/dpdk.rst
> > b/Documentation/intro/install/dpdk.rst
> > index d1c0e65..7a66bff 100644
> > --- a/Documentation/intro/install/dpdk.rst
> > +++ b/Documentation/intro/install/dpdk.rst
> > @@ -460,6 +460,16 @@ affinitized accordingly.
> >  pmd thread on a NUMA node is only created if there is at least
> > one DPDK
> >  interface from that NUMA node added to OVS.
> >
> > +  .. note::
> > +   On NUMA systems PCI devices are also local to a NUMA node.  Rx
> > + queues
> > for
> > +   PCI device will assigned to a pmd on it's local NUMA node if
> > + pmd-cpu-
> > mask
> > +   has created a pmd thread on that NUMA node.  If not the queue will be
> > +   assigned to a pmd on a remote NUMA node.  This will result in reduced
> > +   maximum throughput on that device.  In the case such a queue
> > assignment
> > +   is made a warning message will be logged: "There's no available (non-
> > +   isolated) pmd thread on numa node N. Queue Q on port P will be
> > assigned to
> > +   the pmd on core C (numa node N'). Expect reduced performance."
> > +
> >  - QEMU vCPU thread Affinity
> >
> >A VM performing simple packet forwarding or running complex packet
> > pipelines diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
> > b3a0806..34f1963 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -3149,10 +3149,13 @@ rr_numa_list_lookup(struct rr_numa_list *rr,
> > int
> > numa_id)  }
> >
> >  static void
> > -rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
> > +rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr,
> > +  int *all_numa_ids, unsigned all_numa_ids_sz,
> > +  int *num_ids_written)
> >  {
> >  struct dp_netdev_pmd_thread *pmd;
> >  struct rr_numa *numa;
> > +unsigned idx = 0;
> >
> >  hmap_init(>numas);
> >
> > @@ -3170,7 +3173,11 @@ rr_numa_list_populate(struct dp_netdev *dp,
> > struct rr_numa_list *rr)
> >  numa->n_pmds++;
> >  numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof
> > *numa-
> > >pmds);
> >  numa->pmds[numa->n_pmds - 1] = pmd;
> > +
> > +all_numa_ids[idx % all_numa_ids_sz] = pmd->numa_id;
> > +idx++;
> >  }
> > +*num_ids_written = idx;
> >  }
> >
> >  static struct dp_netdev_pmd_thread *
> > @@ -3202,8 +3209,15 @@ rxq_scheduling(struct dp_netdev *dp, bool
> > pinned)
> > OVS_REQUIRES(dp->port_mutex)  {
> >  struct dp_netdev_port *port;
> >  struct rr_numa_list rr;
> > +int all_numa_ids [64];
> > +int all_numa_ids_sz = sizeof all_numa_ids / sizeof all_numa_ids[0];
> > +unsigned all_numa_ids_idx = 0;
> > +int all_numa_ids_max_idx = 0;
> > +int num_numa_ids = 0;
> >
> > -rr_numa_list_populate(dp, );
> > +rr_numa_list_populate(dp, , all_numa_ids, all_numa_ids_sz,
> > +  _numa_ids);
> > +all_numa_ids_max_idx = MIN(num_numa_ids - 1, all_numa_ids_sz -
> > + 1);
> >
> >  HMAP_FOR_EACH (port, node, >ports) {
> >  struct rr_numa *numa;
> > @@ -3234,10 +3248,29 @@ rxq_scheduling(struct dp_netdev *dp, bool
> > pinned)
> > OVS_REQUIRES(dp->port_mutex)
> >  }
> >  } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
> >  if (!numa) {
> > -VLOG_WARN("There's no available (non isolated) pmd
> > thread "
> > +if (all_numa_ids_max_idx < 0) {
> > +VLOG_ERR("There is no available
> > + (non-isolated)
> > pmd "
> > +