Re: [ovs-dev] [PATCH] socket-util: Rate limit logs for bind attempts.

2018-08-20 Thread Ben Pfaff
Thanks, applied to master.

On Mon, Aug 20, 2018 at 04:34:37PM -0700, Yifeng Sun wrote:
> Looks good to me, thanks.
> 
> Reviewed-by: Yifeng Sun 
> 
> On Mon, Aug 20, 2018 at 4:10 PM, Ben Pfaff  wrote:
> 
> > This reduces the amount of logging when higher-level code retries binding
> > ports that are in use.
> >
> > Signed-off-by: Ben Pfaff 
> > ---
> >  lib/socket-util.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/socket-util.c b/lib/socket-util.c
> > index 504f4cd59554..df9b01a9e848 100644
> > --- a/lib/socket-util.c
> > +++ b/lib/socket-util.c
> > @@ -725,7 +725,8 @@ inet_open_passive(int style, const char *target, int
> > default_port,
> >  /* Bind. */
> >  if (bind(fd, (struct sockaddr *) , ss_length()) < 0) {
> >  error = sock_errno();
> > -VLOG_ERR("%s: bind: %s", target, sock_strerror(error));
> > +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> > +VLOG_ERR_RL(, "%s: bind: %s", target, sock_strerror(error));
> >  goto error;
> >  }
> >
> > --
> > 2.16.1
> >
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] ofproto-dpif-xlate: Avoid deadlock on multicast snooping recursion.

2018-08-20 Thread Ben Pfaff
Until now, OVS did multicast snooping outputs holding the read-lock on
the mcast_snooping object.  This could recurse via a patch port to try to
take the write-lock on the same object, which deadlocked.  This patch fixes
the problem, by releasing the read-lock before doing any outputs.

It would probably be better to use RCU for mcast_snooping.  That would be
a bigger patch and less suitable for backporting.

Reported-by: Sameh Elsharkawy
Reported-at: https://github.com/openvswitch/ovs-issues/issues/153
Signed-off-by: Ben Pfaff 
---
 ofproto/ofproto-dpif-xlate.c | 102 +++
 1 file changed, 84 insertions(+), 18 deletions(-)

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index e26f6c8f554a..7701b64a2451 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -534,6 +534,8 @@ static void do_xlate_actions(const struct ofpact *, size_t 
ofpacts_len,
 static void clone_xlate_actions(const struct ofpact *, size_t ofpacts_len,
 struct xlate_ctx *, bool, bool);
 static void xlate_normal(struct xlate_ctx *);
+static void xlate_normal_flood(struct xlate_ctx *ct,
+   struct xbundle *in_xbundle, struct xvlan *);
 static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port,
uint8_t table_id, bool may_packet_in,
bool honor_table_miss, bool with_ct_orig,
@@ -2690,6 +2692,53 @@ update_mcast_snooping_table(const struct xlate_ctx *ctx,
 }
 ovs_rwlock_unlock(>rwlock);
 }
+
+/* A list of multicast output ports.
+ *
+ * We accumulate output ports and then do all the outputs afterward.  It would
+ * be more natural to do the outputs one at a time as we discover the need for
+ * each one, but this can cause a deadlock because we need to take the
+ * mcast_snooping's rwlock for reading to iterate through the port lists and
+ * doing an output, if it goes to a patch port, can eventually come back to the
+ * same mcast_snooping and attempt to take the write lock (see
+ * https://github.com/openvswitch/ovs-issues/issues/153). */
+struct mcast_output {
+/* Discrete ports. */
+struct xbundle **xbundles;
+size_t n, allocated;
+
+/* If set, flood to all ports. */
+bool flood;
+};
+#define MCAST_OUTPUT_INIT { NULL, 0, 0, false }
+
+/* Add 'mcast_bundle' to 'out'. */
+static void
+mcast_output_add(struct mcast_output *out, struct xbundle *mcast_xbundle)
+{
+if (out->n >= out->allocated) {
+out->xbundles = x2nrealloc(out->xbundles, >allocated,
+   sizeof *out->xbundles);
+}
+out->xbundles[out->n++] = mcast_xbundle;
+}
+
+/* Outputs the packet in 'ctx' to all of the output ports in 'out', given input
+ * bundle 'in_xbundle' and the current 'xvlan'. */
+static void
+mcast_output_finish(struct xlate_ctx *ctx, struct mcast_output *out,
+struct xbundle *in_xbundle, struct xvlan *xvlan)
+{
+if (out->flood) {
+xlate_normal_flood(ctx, in_xbundle, xvlan);
+} else {
+for (size_t i = 0; i < out->n; i++) {
+output_normal(ctx, out->xbundles[i], xvlan);
+}
+}
+
+free(out->xbundles);
+}
 
 /* send the packet to ports having the multicast group learned */
 static void
@@ -2697,7 +2746,7 @@ xlate_normal_mcast_send_group(struct xlate_ctx *ctx,
   struct mcast_snooping *ms OVS_UNUSED,
   struct mcast_group *grp,
   struct xbundle *in_xbundle,
-  const struct xvlan *xvlan)
+  struct mcast_output *out)
 OVS_REQ_RDLOCK(ms->rwlock)
 {
 struct mcast_group_bundle *b;
@@ -2707,7 +2756,7 @@ xlate_normal_mcast_send_group(struct xlate_ctx *ctx,
 mcast_xbundle = xbundle_lookup(ctx->xcfg, b->port);
 if (mcast_xbundle && mcast_xbundle != in_xbundle) {
 xlate_report(ctx, OFT_DETAIL, "forwarding to mcast group port");
-output_normal(ctx, mcast_xbundle, xvlan);
+mcast_output_add(out, mcast_xbundle);
 } else if (!mcast_xbundle) {
 xlate_report(ctx, OFT_WARN,
  "mcast group port is unknown, dropping");
@@ -2723,7 +2772,8 @@ static void
 xlate_normal_mcast_send_mrouters(struct xlate_ctx *ctx,
  struct mcast_snooping *ms,
  struct xbundle *in_xbundle,
- const struct xvlan *xvlan)
+ const struct xvlan *xvlan,
+ struct mcast_output *out)
 OVS_REQ_RDLOCK(ms->rwlock)
 {
 struct mcast_mrouter_bundle *mrouter;
@@ -2734,7 +2784,7 @@ xlate_normal_mcast_send_mrouters(struct xlate_ctx *ctx,
 if (mcast_xbundle && mcast_xbundle != in_xbundle
 && mrouter->vlan == xvlan->v[0].vid) {
 

Re: [ovs-dev] [ovs-discuss] fix the mod_vlan_vid actions with OpenFlow13.

2018-08-20 Thread wangyunjian



> -Original Message-
> From: Eric Garver [mailto:e...@erig.me]
> Sent: Monday, August 20, 2018 9:15 PM
> To: wangyunjian 
> Cc: d...@openvswitch.org; ovs-disc...@openvswitch.org; Zhoulei (stone,
> Cloud Networking) ; thomasfherb...@gmail.com
> Subject: Re: [ovs-discuss] [ovs-dev] fix the mod_vlan_vid actions with
> OpenFlow13.
> 
> On Mon, Aug 20, 2018 at 02:17:34AM +, wangyunjian wrote:
> >
> >
> [..]
> > > > On Fri, Aug 17, 2018 at 12:15:30PM +, wangyunjian wrote:
> > > > > The datapath flow which pushs double vlan was found using
> > > > > ovs-appctl dpctl/dump-flows, but the flow was set mod_vlan_vid
> actions.
> > > > > This problem is discovered from "Add support for 802.1ad (QinQ
> > > tunneling)".
> > > >
> > > > Thanks for reporting. Can you say what version of OVS you're using?
> > > > Including any extra patches you may have applied.
> > >
> > > The version of OVS is master branch(git log "
> > > be5e6d6822e60b5b84ac65dcd1b249145356a809
> > > ofp-ed-props: Fix hang for crafted OpenFlow encap/decap properties".).
> > >
> > > >
> > > > > My test steps:
> > > > >
> > > > > 1) ovs-vsctl add-br ovsbr0 -- set bridge ovsbr0 datapath_type=netdev
> > > > >ovs-vsctl add-port ovsbr0 eth2 -- set Interface eth2
> > > > > type=dpdk
> > > > options:dpdk-devargs=:03:00.0
> > > > >ovs-ofctl  -O OpenFlow13 add-flow ovsbr0 "
> > > > > table=0,priority=2,in_port=1
> > > > actions=mod_vlan_vid:3,NORMAL"
> 
> What happens if you add a wildcard VLAN match here?
> e.g. vlan_tci=0x1000/0x1000

The packet is set vlan_vid ok with adding the wildcard VLAN match.

linux-jrWzwZ:/data/wyj/git/ovs # ovs-ofctl -O OpenFlow13 add-flow ovsbr0 
"cookie=0xb043f0d196265635,table=0,priority=2,in_port=1,vlan_tci=0x1000/0x1000 
actions=mod_vlan_vid:2,NORMAL"
linux-jrWzwZ:/data/wyj/git/ovs # tcpdump -i ovsbr0 -ne
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on ovsbr0, link-type EN10MB (Ethernet), capture size 65535 bytes
10:06:53.284556 90:17:ac:b0:0a:ff > Broadcast, ethertype 802.1Q (0x8100), 
length 64: vlan 2, p 0, ethertype ARP, Request who-has 3.3.3.18 tell 3.3.3.17, 
length 46
10:06:54.286542 90:17:ac:b0:0a:ff > Broadcast, ethertype 802.1Q (0x8100), 
length 64: vlan 2, p 0, ethertype ARP, Request who-has 3.3.3.18 tell 3.3.3.17, 
length 46
10:06:56.283594 90:17:ac:b0:0a:ff > Broadcast, ethertype 802.1Q (0x8100), 
length 64: vlan 2, p 0, ethertype ARP, Request who-has 3.3.3.18 tell 3.3.3.17, 
length 46
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] socket-util: Rate limit logs for bind attempts.

2018-08-20 Thread Yifeng Sun
Looks good to me, thanks.

Reviewed-by: Yifeng Sun 

On Mon, Aug 20, 2018 at 4:10 PM, Ben Pfaff  wrote:

> This reduces the amount of logging when higher-level code retries binding
> ports that are in use.
>
> Signed-off-by: Ben Pfaff 
> ---
>  lib/socket-util.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/lib/socket-util.c b/lib/socket-util.c
> index 504f4cd59554..df9b01a9e848 100644
> --- a/lib/socket-util.c
> +++ b/lib/socket-util.c
> @@ -725,7 +725,8 @@ inet_open_passive(int style, const char *target, int
> default_port,
>  /* Bind. */
>  if (bind(fd, (struct sockaddr *) , ss_length()) < 0) {
>  error = sock_errno();
> -VLOG_ERR("%s: bind: %s", target, sock_strerror(error));
> +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> +VLOG_ERR_RL(, "%s: bind: %s", target, sock_strerror(error));
>  goto error;
>  }
>
> --
> 2.16.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] socket-util: Rate limit logs for bind attempts.

2018-08-20 Thread Ben Pfaff
This reduces the amount of logging when higher-level code retries binding
ports that are in use.

Signed-off-by: Ben Pfaff 
---
 lib/socket-util.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/socket-util.c b/lib/socket-util.c
index 504f4cd59554..df9b01a9e848 100644
--- a/lib/socket-util.c
+++ b/lib/socket-util.c
@@ -725,7 +725,8 @@ inet_open_passive(int style, const char *target, int 
default_port,
 /* Bind. */
 if (bind(fd, (struct sockaddr *) , ss_length()) < 0) {
 error = sock_errno();
-VLOG_ERR("%s: bind: %s", target, sock_strerror(error));
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+VLOG_ERR_RL(, "%s: bind: %s", target, sock_strerror(error));
 goto error;
 }
 
-- 
2.16.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v1] rhel: support kmod build against mulitple kernel versions, fedora

2018-08-20 Thread Martin Xu
Thanks for the suggestions. I removed the "obseletes" from the patch, so we
can go either way for the kmod RPM naming. A new patch is sent out.

Martin

On Mon, Jul 30, 2018 at 12:18 PM Flavio Leitner  wrote:

> On Fri, Jul 27, 2018 at 11:19:58AM -0700, Martin Xu wrote:
> > Hi Flavio,
> >
> > Thanks for the review. I was thinking of using this to prevent a user to
> > directly install openvswitch-kmod rpm built with the fedora spec file
> when
> > the system already has the kmod-openvswitch built from rhel6 spec file.
> > Packages are named differently. In that case, it's not a matter of
> > versions. They could be built off the identical source tree, but
> shouldn't
> > coexist. Perhaps there's other ways to implement what I intended to do
> > here, if you have any suggestions?
>
> If the files are the same, rpm will complain about the file conflicts
> when it tries to install. In that case we only need Conflicts and perhaps
> it's ok to not add a version.
>
> But for obsoletes, it essentially says that your package should
> replace the other one. If that is what you want, that's okay too.
> However, we will be unable to roll back that in the future if we
> need to revive kmod-openvswitch as this package once built cannot
> be changed and it will always replace the other one.
>
> When you add a 'provides', you will create a "virtual package" named
> after the provides, with the version and release you want, so RPM can
> compare versions and releases as usual. For example:
>
> Provides: kmod-openvswitch = %{version}-%{release}
>
> That means you package is also known to rpm as
> kmod-openvswitch-2.10.0-10.fc29
>
> fbl
>
>
> >
> > Best,
> > Martin
> >
> > On Wed, Jul 25, 2018 at 9:10 AM, Flavio Leitner  wrote:
> >
> > > On Fri, Jul 20, 2018 at 03:24:53PM -0700, Martin Xu wrote:
> > > > This patch ports changes from kmod rhel6 spec file to fedora spec
> file,
> > > > to support packaging kernel modules built against multiple versions
> of
> > > > kernel sources.
> > > >
> > > > RHEL 7.4 introduced backward incompatible changes in the kernel. As
> > > > a result, prebuilt PRM packages against kernels newer than 693.17.1
> > > > will cannot be used on systems with older kernels, vice versa.
> > > >
> > > > Intended to work only on RHEL 7.4 (kernel version 3.10.0-693.yy.zz).
> > > > This patch allows multiple kernel version numbers delimited by
> > > > whitespace to be passed as variable "kversion". The result RPM
> packages
> > > > the kernel module .ko files from all specified kernel versions. For
> > > > example,
> > > >
> > > > make rpm-fedora-kmod \
> > > > RPMBUILD_OPT='-D "kversion 3.10.0-693.1.1.el7.x86_64 \
> > > > 3.10.0-693.17.1.el7.x86_64"'
> > > >
> > > > By default, make tries to build against the current running kernel.
> > > >
> > > > This patch also includes a script to update the weak-update symlinks
> > > > if the system kernel version is upgraded or downgraded after
> > > > openvswitch-kmod is installed.
> > > >
> > > > Signed-off-by: Martin Xu 
> > > > CC: Greg Rose 
> > > > CC: Flavio Leitner 
> > > > ---
> > > >  rhel/openvswitch-kmod-fedora.spec.in | 86
> > > +++-
> > > >  1 file changed, 55 insertions(+), 31 deletions(-)
> > > >
> > > > diff --git a/rhel/openvswitch-kmod-fedora.spec.in b/rhel/
> > > openvswitch-kmod-fedora.spec.in
> > > > index c0cd298..24f8290 100644
> > > > --- a/rhel/openvswitch-kmod-fedora.spec.in
> > > > +++ b/rhel/openvswitch-kmod-fedora.spec.in
> > > > @@ -1,6 +1,6 @@
> > > >  # Spec file for Open vSwitch.
> > > >
> > > > -# Copyright (C) 2009, 2010, 2015 Nicira Networks, Inc.
> > > > +# Copyright (C) 2009, 2010, 2015, 2018 Nicira Networks, Inc.
> > > >  #
> > > >  # Copying and distribution of this file, with or without
> modification,
> > > >  # are permitted in any medium without royalty provided the copyright
> > > > @@ -26,6 +26,9 @@ Release: 1%{?dist}
> > > >  Source: openvswitch-%{version}.tar.gz
> > > >  #Source1: openvswitch-init
> > > >  Buildroot: /tmp/openvswitch-xen-rpm
> > > > +Provides: kmod-openvswitch
> > > > +Conflicts: kmod-openvswitch
> > > > +Obsoletes: kmod-openvswitch
> > >
> > > Usually the above is versioned to avoid future issues.
> > > e.g.: Conflicts: kmod-openvswitch < %{version}-%{release}
> > >
> > > I didn't spot anything else other than the above, thanks!
> > > fbl
> > >
> > > >
> > > >  %description
> > > >  Open vSwitch provides standard network bridging functions augmented
> with
> > > > @@ -36,55 +39,76 @@ traffic. This package contains the kernel
> modules.
> > > >  %setup -q -n openvswitch-%{version}
> > > >
> > > >  %build
> > > > -%configure --with-linux=/lib/modules/%{kernel}/build --enable-ssl
> > > > -make %{_smp_mflags} -C datapath/linux
> > > > +for kv in %{kversion}; do
> > > > +mkdir -p _$kv
> > > > +(cd _$kv && /bin/cp -f ../configure . && %configure --srcdir=..
> \
> > > > +--with-linux=/usr/src/kernels/${kv}/ --enable-ssl)
> > > > +make 

[ovs-dev] [PATCH v3] rhel: support kmod build against mulitple kernel versions, fedora

2018-08-20 Thread Martin Xu
This patch ports changes from kmod rhel6 spec file to fedora spec file,
to support packaging kernel modules built against multiple versions of
kernel sources.

RHEL 7.4 introduced backward incompatible changes in the kernel. As
a result, prebuilt PRM packages against kernels newer than 693.17.1
will cannot be used on systems with older kernels, vice versa.

Intended to work only on RHEL 7.4 (kernel version 3.10.0-693.yy.zz).
This patch allows multiple kernel version numbers delimited by
whitespace to be passed as variable "kversion". The result RPM packages
the kernel module .ko files from all specified kernel versions. For
example,

make rpm-fedora-kmod \
RPMBUILD_OPT='-D "kversion 3.10.0-693.1.1.el7.x86_64 \
3.10.0-693.17.1.el7.x86_64"'

By default, make tries to build against the current running kernel.

This patch also includes a script to update the weak-update symlinks
if the system kernel version is upgraded or downgraded after
openvswitch-kmod is installed.

Signed-off-by: Martin Xu 
CC: Greg Rose 
CC: Flavio Leitner 
---
v1->v2: fix a bug in v1. v1 removed the if condition (erase but not upgrade)
in postun section by mistake. Without the if condition, the upgrade would also
remove the symlinks created in weak-updates created by the newer version.
v2->v3: added some comments for the use of kversion macro in fedora spec file;
removed "Obsoletes: kmod-openvswitch"; also incorporated change for
rhel7.2 change in rhel6 spec file in the %post section

 rhel/openvswitch-kmod-fedora.spec.in | 93 
 1 file changed, 62 insertions(+), 31 deletions(-)

diff --git a/rhel/openvswitch-kmod-fedora.spec.in 
b/rhel/openvswitch-kmod-fedora.spec.in
index c0cd298..dc5ea7a 100644
--- a/rhel/openvswitch-kmod-fedora.spec.in
+++ b/rhel/openvswitch-kmod-fedora.spec.in
@@ -1,6 +1,6 @@
 # Spec file for Open vSwitch.
 
-# Copyright (C) 2009, 2010, 2015 Nicira Networks, Inc.
+# Copyright (C) 2009, 2010, 2015, 2018 Nicira Networks, Inc.
 #
 # Copying and distribution of this file, with or without modification,
 # are permitted in any medium without royalty provided the copyright
@@ -9,6 +9,12 @@
 
 %global debug_package %{nil}
 
+# Use the kversion macro such as
+# RPMBUILD_OPT='-D "kversion 3.10.0-693.1.1.el7.x86_64 
3.10.0-693.17.1.el7.x86_64"'
+# to build package for mulitple kernel versions in the same package
+# This only works for kernel 3.10.0 major revision 693 (RHEL 7.4)
+# and major revision 327 (RHEL 7.2)
+# By default, build against the current running kernel version
 #%define kernel 3.1.5-1.fc16.x86_64
 #define kernel %{kernel_source}
 %{?kversion:%define kernel %kversion}
@@ -26,6 +32,8 @@ Release: 1%{?dist}
 Source: openvswitch-%{version}.tar.gz
 #Source1: openvswitch-init
 Buildroot: /tmp/openvswitch-xen-rpm
+Provides: kmod-openvswitch
+Conflicts: kmod-openvswitch
 
 %description
 Open vSwitch provides standard network bridging functions augmented with
@@ -36,55 +44,78 @@ traffic. This package contains the kernel modules.
 %setup -q -n openvswitch-%{version}
 
 %build
-%configure --with-linux=/lib/modules/%{kernel}/build --enable-ssl
-make %{_smp_mflags} -C datapath/linux
+for kv in %{kversion}; do
+mkdir -p _$kv
+(cd _$kv && /bin/cp -f ../configure . && %configure --srcdir=.. \
+--with-linux=/usr/src/kernels/${kv}/ --enable-ssl)
+make %{_smp_mflags} -C _$kv/datapath/linux
+done
 
 %install
+export INSTALL_MOD_DIR=extra/openvswitch
 rm -rf $RPM_BUILD_ROOT
-make INSTALL_MOD_PATH=$RPM_BUILD_ROOT -C datapath/linux modules_install
+for kv in %{kversion}; do
+make INSTALL_MOD_PATH=$RPM_BUILD_ROOT -C _$kv/datapath/linux 
modules_install
+done
 mkdir -p $RPM_BUILD_ROOT/etc/depmod.d
-for module in $RPM_BUILD_ROOT/lib/modules/%{kernel}/extra/*.ko
-do
-modname="$(basename ${module})"
-echo "override ${modname%.ko} * extra" >> \
-$RPM_BUILD_ROOT/etc/depmod.d/kmod-openvswitch.conf
-echo "override ${modname%.ko} * weak-updates" >> \
-$RPM_BUILD_ROOT/etc/depmod.d/kmod-openvswitch.conf
+for kv in %{kversion}; do
+for module in $RPM_BUILD_ROOT/lib/modules/${kv}/extra/openvswitch/*.ko
+do
+modname="$(basename ${module})"
+grep -qsPo "^\s*override ${modname%.ko} \* extra\/openvwitch" \
+$RPM_BUILD_ROOT/etc/depmod.d/kmod-openvswitch.conf || \
+echo "override ${modname%.ko} * extra/openvswitch" >> \
+$RPM_BUILD_ROOT/etc/depmod.d/kmod-openvswitch.conf
+grep -qsPo "^\s*override ${modname%.ko} \* weak-updates\/openvwitch" \
+$RPM_BUILD_ROOT/etc/depmod.d/kmod-openvswitch.conf || \
+echo "override ${modname%.ko} * weak-updates/openvswitch" >> \
+$RPM_BUILD_ROOT/etc/depmod.d/kmod-openvswitch.conf
+done
 done
+install -d -m 0755 $RPM_BUILD_ROOT/usr/share/openvswitch/scripts
+install -p -m 0755 rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh \
+$RPM_BUILD_ROOT/usr/share/openvswitch/scripts/ovs-kmod-manage.sh
 
 

Re: [ovs-dev] [ovs-dev,branch-2.10,2 of 2] Prepare for 2.10.1.

2018-08-20 Thread 0-day Robot
Bleep bloop.  Greetings Justin Pettit, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Too many signoffs; are you missing Co-authored-by lines?
Lines checked: 56, Warnings: 0, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@bytheb.org

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [ovs-dev, branch-2.10, 1 of 2] Revert "Prepare for 2.10.1."

2018-08-20 Thread 0-day Robot
Bleep bloop.  Greetings Justin Pettit, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
ERROR: Too many signoffs; are you missing Co-authored-by lines?
Lines checked: 62, Warnings: 0, Errors: 1


Please check this out.  If you feel there has been an error, please email 
acon...@bytheb.org

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH RFC 0/3] AF_XDP support for OVS

2018-08-20 Thread Ben Pfaff
[dropping iovisor-dev as I'm not on that list]

On Fri, Aug 17, 2018 at 05:29:33PM -0700, William Tu wrote:
> The patch series introduces AF_XDP support for OVS netdev.
> AF_XDP is a new address family working together with eBPF.
> In short, a socket with AF_XDP family can receive and send
> packets from an eBPF/XDP program attached to the netdev.
> For more details about AF_XDP, please see linux kernel's
> Documentation/networking/af_xdp.rst
> 
> OVS has a couple of netdev types, i.e., system, tap, or
> internal.  The patch first adds a new netdev types called
> "afxdp", and implement its configuration, packet reception,
> and transmit functions.  Since the AF_XDP socket, xsk,
> operates in userspace, once ovs-vswitchd receives packets
> from xsk, the proposed architecture re-uses the existing
> userspace dpif-netdev datapath.  As a result, most of
> the packet processing happens at the userspace instead of
> linux kernel.

This is really cool.  BPF and XDP and AF_XDP are all fascinating
technology and I'd like to see OVS able to take advantage of them.

What do you see as the next step for this patch series?  I see that it's
an RFC and that it depends on a fairly long patch series.  Is the next
step to rebase the series and start to get parts of it into the OVS
tree?  Or is it to refine this AF_XDP patch series first?  Or do you
have some other approach in mind?

Thanks,

Ben.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] ovn: Add DHCP support for option 252.

2018-08-20 Thread Mark Michelson
This adds DHCP support for web proxy auto detection.

Signed-off-by: Mark Michelson 
---
 ovn/lib/ovn-l7.h| 2 ++
 ovn/northd/ovn-northd.c | 3 ++-
 ovn/ovn-nb.xml  | 8 
 tests/ovn.at| 6 +++---
 tests/test-ovn.c| 1 +
 5 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/ovn/lib/ovn-l7.h b/ovn/lib/ovn-l7.h
index b8d6683bc..817e9f002 100644
--- a/ovn/lib/ovn-l7.h
+++ b/ovn/lib/ovn-l7.h
@@ -70,6 +70,8 @@ struct gen_opts_map {
 #define DHCP_OPT_T1 DHCP_OPTION("T1", 58, "uint32")
 #define DHCP_OPT_T2 DHCP_OPTION("T2", 59, "uint32")
 
+#define DHCP_OPT_WPAD DHCP_OPTION("wpad", 252, "str")
+
 static inline uint32_t
 gen_opt_hash(char *opt_name)
 {
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index ba86bf559..a1cfd4bdf 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -7186,7 +7186,8 @@ static struct gen_opts_map supported_dhcp_opts[] = {
 DHCP_OPT_MTU,
 DHCP_OPT_LEASE_TIME,
 DHCP_OPT_T1,
-DHCP_OPT_T2
+DHCP_OPT_T2,
+DHCP_OPT_WPAD,
 };
 
 static struct gen_opts_map supported_dhcpv6_opts[] = {
diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
index bc60a25dd..441a2deae 100644
--- a/ovn/ovn-nb.xml
+++ b/ovn/ovn-nb.xml
@@ -1991,6 +1991,14 @@
 Microsoft Windows DHCPv4 clients.
   
 
+
+
+  
+The DHCPv4 option code for this option is 252. This option is used
+as part of web proxy auto discovery to provide a URL for a web
+proxy.
+  
+
   
 
   
diff --git a/tests/ovn.at b/tests/ovn.at
index 70c6c50b3..6165d69c0 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -1100,9 +1100,9 @@ put_arp(inport, arp.spa, arp.sha);
 # put_dhcp_opts
 reg1[0] = put_dhcp_opts(offerip = 1.2.3.4, router = 10.0.0.1);
 encodes as 
controller(userdata=00.00.00.02.00.00.00.00.00.01.de.10.00.00.00.40.01.02.03.04.03.04.0a.00.00.01,pause)
-reg2[5] = 
put_dhcp_opts(offerip=10.0.0.4,router=10.0.0.1,netmask=255.255.254.0,mtu=1400,domain="ovn.org");
-formats as reg2[5] = put_dhcp_opts(offerip = 10.0.0.4, router = 10.0.0.1, 
netmask = 255.255.254.0, mtu = 1400, domain = "ovn.org");
-encodes as 
controller(userdata=00.00.00.02.00.00.00.00.00.01.de.10.00.00.00.25.0a.00.00.04.03.04.0a.00.00.01.01.04.ff.ff.fe.00.1a.02.05.78.0f.07.6f.76.6e.2e.6f.72.67,pause)
+reg2[5] = 
put_dhcp_opts(offerip=10.0.0.4,router=10.0.0.1,netmask=255.255.254.0,mtu=1400,domain="ovn.org",wpad="https://example.org;);
+formats as reg2[5] = put_dhcp_opts(offerip = 10.0.0.4, router = 10.0.0.1, 
netmask = 255.255.254.0, mtu = 1400, domain = "ovn.org", wpad = 
"https://example.org;);
+encodes as 
controller(userdata=00.00.00.02.00.00.00.00.00.01.de.10.00.00.00.25.0a.00.00.04.03.04.0a.00.00.01.01.04.ff.ff.fe.00.1a.02.05.78.0f.07.6f.76.6e.2e.6f.72.67.fc.13.68.74.74.70.73.3a.2f.2f.65.78.61.6d.70.6c.65.2e.6f.72.67,pause)
 reg0[15] = 
put_dhcp_opts(offerip=10.0.0.4,router=10.0.0.1,netmask=255.255.255.0,mtu=1400,ip_forward_enable=1,default_ttl=121,dns_server={8.8.8.8,7.7.7.7},classless_static_route={30.0.0.0/24,10.0.0.4,40.0.0.0/16,10.0.0.6,0.0.0.0/0,10.0.0.1},ethernet_encap=1,router_discovery=0);
 formats as reg0[15] = put_dhcp_opts(offerip = 10.0.0.4, router = 10.0.0.1, 
netmask = 255.255.255.0, mtu = 1400, ip_forward_enable = 1, default_ttl = 121, 
dns_server = {8.8.8.8, 7.7.7.7}, classless_static_route = {30.0.0.0/24, 
10.0.0.4, 40.0.0.0/16, 10.0.0.6, 0.0.0.0/0, 10.0.0.1}, ethernet_encap = 1, 
router_discovery = 0);
 encodes as 
controller(userdata=00.00.00.02.00.00.00.00.00.01.de.10.00.00.00.6f.0a.00.00.04.03.04.0a.00.00.01.01.04.ff.ff.ff.00.1a.02.05.78.13.01.01.17.01.79.06.08.08.08.08.08.07.07.07.07.79.14.18.1e.00.00.0a.00.00.04.10.28.00.0a.00.00.06.00.0a.00.00.01.24.01.01.1f.01.00,pause)
diff --git a/tests/test-ovn.c b/tests/test-ovn.c
index d4a5d599e..5e6d1c3b4 100644
--- a/tests/test-ovn.c
+++ b/tests/test-ovn.c
@@ -182,6 +182,7 @@ create_gen_opts(struct hmap *dhcp_opts, struct hmap 
*dhcpv6_opts,
 dhcp_opt_add(dhcp_opts, "tcp_ttl", 37, "uint8");
 dhcp_opt_add(dhcp_opts, "mtu", 26, "uint16");
 dhcp_opt_add(dhcp_opts, "lease_time",  51, "uint32");
+dhcp_opt_add(dhcp_opts, "wpad", 252, "str");
 
 /* DHCPv6 options. */
 hmap_init(dhcpv6_opts);
-- 
2.14.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [branch-2.10 1/2] Revert "Prepare for 2.10.1."

2018-08-20 Thread Ben Pfaff
On Mon, Aug 20, 2018 at 01:01:59PM -0700, Justin Pettit wrote:
> There are a couple more patches that we'd like to apply as part of
> 2.10.0.
> 
> This reverts commit c491d2d095756bd3499b1061adce0deeba55ffdd.
> 
> Signed-off-by: Justin Pettit 

Fair enough.

Acked-by: Ben Pfaff 
for the series.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [branch-2.10 1/2] Revert "Prepare for 2.10.1."

2018-08-20 Thread Justin Pettit
There are a couple more patches that we'd like to apply as part of
2.10.0.

This reverts commit c491d2d095756bd3499b1061adce0deeba55ffdd.

Signed-off-by: Justin Pettit 
---
 NEWS | 4 
 configure.ac | 2 +-
 debian/changelog | 6 --
 3 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/NEWS b/NEWS
index 04de80781dc4..8e21180537db 100644
--- a/NEWS
+++ b/NEWS
@@ -1,7 +1,3 @@
-v2.10.1 - xx xxx 
--
-
-
 v2.10.0 - 18 Aug 2018
 -
- ovs-vswitchd and utilities now support DNS names in OpenFlow and
diff --git a/configure.ac b/configure.ac
index 5dbc0311afdf..e3daf335e264 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 AC_PREREQ(2.63)
-AC_INIT(openvswitch, 2.10.1, b...@openvswitch.org)
+AC_INIT(openvswitch, 2.10.0, b...@openvswitch.org)
 AC_CONFIG_SRCDIR([datapath/datapath.c])
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_AUX_DIR([build-aux])
diff --git a/debian/changelog b/debian/changelog
index 0dce5c547729..ce7ac516a52d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,9 +1,3 @@
-openvswitch (2.10.1-1) unstable; urgency=low
-
-   * New upstream version
-
- -- Open vSwitch team   Sat, 18 Aug 2018 10:38:23 -0700
-
 openvswitch (2.10.0-1) unstable; urgency=low
 
* New upstream version
-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [branch-2.10 2/2] Prepare for 2.10.1.

2018-08-20 Thread Justin Pettit
Signed-off-by: Justin Pettit 
---
 NEWS | 4 
 configure.ac | 2 +-
 debian/changelog | 6 ++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index 8e21180537db..04de80781dc4 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,7 @@
+v2.10.1 - xx xxx 
+-
+
+
 v2.10.0 - 18 Aug 2018
 -
- ovs-vswitchd and utilities now support DNS names in OpenFlow and
diff --git a/configure.ac b/configure.ac
index e3daf335e264..5dbc0311afdf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 AC_PREREQ(2.63)
-AC_INIT(openvswitch, 2.10.0, b...@openvswitch.org)
+AC_INIT(openvswitch, 2.10.1, b...@openvswitch.org)
 AC_CONFIG_SRCDIR([datapath/datapath.c])
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_AUX_DIR([build-aux])
diff --git a/debian/changelog b/debian/changelog
index ce7ac516a52d..0dce5c547729 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+openvswitch (2.10.1-1) unstable; urgency=low
+
+   * New upstream version
+
+ -- Open vSwitch team   Sat, 18 Aug 2018 10:38:23 -0700
+
 openvswitch (2.10.0-1) unstable; urgency=low
 
* New upstream version
-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Question about presentations at OVScon 2018

2018-08-20 Thread Allen Djal
Hi,

Is there any chance I can help with your creative needs at Open vSwitch?
Fullstack Digital provides branding and design services to Open Source
companies like the {code} team
.

Here are some examples of event page design 
 and presentation design
 we've
done. I’d love to earn your trust with one free presentation re-design so
Fullstack is first in line when you need a creative partner.

Let me know if you are interested!


-- 

[image: Fullstack Digital] 

Allen Djal  /   Brand & Marketing
al...@fullstackdigital.com  /   (818) 674 - 8970 <818-674-8970>

Fullstack Digital
Go further, faster  /  Full-stack digital agency
www.fullstackdigital.com   /
@fullstackdigitl 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v8 12/14] dpdk-tests: Add unit-tests for multi-seg mbufs.

2018-08-20 Thread Tiago Lam
In order to create a minimal environment that allows the tests to get
mbufs from an existing mempool, the following approach is taken:
- EAL is initialised (by using the main dpdk_init()) and a (very) small
  mempool is instantiated (mimicking the logic in dpdk_mp_create()).
  This mempool instance is global and used by all the tests;
- Packets are then allocated from the instantiated mempool, and tested
  on, by running some operations on them and manipulating data.

The tests introduced focus on testing DPDK dp_packets (where
source=DPBUF_DPDK), linked with a single or multiple mbufs, across
several operations, such as:
- dp_packet_put();
- dp_packet_shift();
- dp_packet_reserve();
- dp_packet_push_uninit();
- dp_packet_clear();
- dp_packet_equal();
- dp_packet_linear_data();
- And as a consequence of some of these, dp_packet_put_uninit() and
  dp_packet_resize__().

Finally, this has also been integrated with the new DPDK testsuite.
Thus, when running `$sudo make check-dpdk` one will also be running
these tests.

Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 tests/automake.mk  |  10 +-
 tests/dpdk-packet-mbufs.at |   7 +
 tests/system-dpdk-testsuite.at |   1 +
 tests/test-dpdk-mbufs.c| 619 +
 4 files changed, 636 insertions(+), 1 deletion(-)
 create mode 100644 tests/dpdk-packet-mbufs.at
 create mode 100644 tests/test-dpdk-mbufs.c

diff --git a/tests/automake.mk b/tests/automake.mk
index 49ceb41..f484f69 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -135,7 +135,8 @@ SYSTEM_DPDK_TESTSUITE_AT = \
tests/system-common-macros.at \
tests/system-dpdk-macros.at \
tests/system-dpdk-testsuite.at \
-   tests/system-dpdk.at
+   tests/system-dpdk.at \
+   tests/dpdk-packet-mbufs.at
 
 check_SCRIPTS += tests/atlocal
 
@@ -392,6 +393,10 @@ tests_ovstest_SOURCES = \
tests/test-vconn.c \
tests/test-aa.c \
tests/test-stopwatch.c
+if DPDK_NETDEV
+tests_ovstest_SOURCES += \
+   tests/test-dpdk-mbufs.c
+endif
 
 if !WIN32
 tests_ovstest_SOURCES += \
@@ -404,6 +409,9 @@ tests_ovstest_SOURCES += \
 endif
 
 tests_ovstest_LDADD = lib/libopenvswitch.la ovn/lib/libovn.la
+if DPDK_NETDEV
+tests_ovstest_LDFLAGS = $(AM_LDFLAGS) $(DPDK_vswitchd_LDFLAGS)
+endif
 
 noinst_PROGRAMS += tests/test-strtok_r
 tests_test_strtok_r_SOURCES = tests/test-strtok_r.c
diff --git a/tests/dpdk-packet-mbufs.at b/tests/dpdk-packet-mbufs.at
new file mode 100644
index 000..f28e4fc
--- /dev/null
+++ b/tests/dpdk-packet-mbufs.at
@@ -0,0 +1,7 @@
+AT_BANNER([OVS-DPDK dp_packet unit tests])
+
+AT_SETUP([OVS-DPDK dp_packet - mbufs allocation])
+AT_KEYWORDS([dp_packet, multi-seg, mbufs])
+AT_CHECK(ovstest test-dpdk-packet, [], [ignore], [ignore])
+
+AT_CLEANUP
diff --git a/tests/system-dpdk-testsuite.at b/tests/system-dpdk-testsuite.at
index 382f09e..f5edf58 100644
--- a/tests/system-dpdk-testsuite.at
+++ b/tests/system-dpdk-testsuite.at
@@ -23,3 +23,4 @@ m4_include([tests/system-common-macros.at])
 m4_include([tests/system-dpdk-macros.at])
 
 m4_include([tests/system-dpdk.at])
+m4_include([tests/dpdk-packet-mbufs.at])
diff --git a/tests/test-dpdk-mbufs.c b/tests/test-dpdk-mbufs.c
new file mode 100644
index 000..19081a3
--- /dev/null
+++ b/tests/test-dpdk-mbufs.c
@@ -0,0 +1,619 @@
+/*
+ * Copyright (c) 2018 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "dp-packet.h"
+#include "ovstest.h"
+#include "dpdk.h"
+#include "smap.h"
+
+#define N_MBUFS 1024
+#define MBUF_DATA_LEN 2048
+
+static int num_tests = 0;
+
+/* Global var to hold a mempool instance, "test-mp", used in all of the tests
+ * below. This instance is instantiated in dpdk_setup_eal_with_mp(). */
+static struct rte_mempool *mp;
+
+/* Test data used to fill the packets with data. Note that this isn't a string
+ * that repsents a valid packet, by any means. The pattern is generated in set_
+ * testing_pattern_str() and the sole purpose is to verify the data remains the
+ * same after inserting and operating on multi-segment mbufs. */
+static char *test_str;
+
+/* Asserts a dp_packet that holds a single mbuf, where:
+ * - nb_segs must be 1;
+ * - pkt_len must be equal to data_len which in turn must equal the provided
+ *   'pkt_len';
+ * - data_off must start at the provided 'data_ofs';
+ * - next must be NULL. 

[ovs-dev] [PATCH v8 09/14] dp-packet: Add support for data "linearization".

2018-08-20 Thread Tiago Lam
Previous commits have added support to the dp_packet API to handle
multi-segmented packets, where data is not stored contiguously in
memory. However, in some cases, it is inevitable and data must be
provided contiguously. Examples of such cases are when performing csums
over the entire packet data, or when write()'ing to a file descriptor
(for a tap interface, for example). For such cases, the dp_packet API
has been extended to provide a way to transform a multi-segmented
DPBUF_DPDK packet into a DPBUF_MALLOC system packet (at the expense of a
copy of memory). If the packet's data is already stored in memory
contigously then there's no need to convert the packet.

Additionally, the main use cases that were assuming that a dp_packet's
data is always held contiguously in memory were changed to make use of
the new "linear functions" in the dp_packet API when there's a need to
traverse the entire's packet data. Per the example above, when the
packet's data needs to be write() to the tap's file descriptor, or when
the conntrack module needs to verify a packet's checksum, the data is
now linearized.

Two new functions have also been added to the packets module to perform
the checksum over a dp_packet's data (using the alredy used csum API).
Initially, this is just a way to abstract the data's linearization, but
in the future this could be optimized to perform the checksum over the
multi-segmented packets, without the need to copy.

Signed-off-by: Tiago Lam 
---
 lib/bfd.c |  3 +-
 lib/conntrack.c   | 17 +
 lib/dp-packet.c   | 18 +
 lib/dp-packet.h   | 89 +++
 lib/dpif-netlink.c|  2 +-
 lib/dpif.c|  2 +-
 lib/netdev-bsd.c  |  2 +-
 lib/netdev-dummy.c|  5 ++-
 lib/netdev-linux.c|  5 ++-
 lib/netdev-native-tnl.c   | 24 ++--
 lib/odp-execute.c |  2 +-
 lib/ofp-print.c   |  2 +-
 lib/ovs-lldp.c|  3 +-
 lib/packets.c | 20 +-
 lib/packets.h |  3 ++
 ofproto/ofproto-dpif-sflow.c  |  2 +-
 ofproto/ofproto-dpif-upcall.c |  2 +-
 ofproto/ofproto-dpif-xlate.c  | 12 --
 18 files changed, 168 insertions(+), 45 deletions(-)

diff --git a/lib/bfd.c b/lib/bfd.c
index 5308262..d50d2da 100644
--- a/lib/bfd.c
+++ b/lib/bfd.c
@@ -722,7 +722,8 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
 if (!msg) {
 VLOG_INFO_RL(, "%s: Received too-short BFD control message (only "
  "%"PRIdPTR" bytes long, at least %d required).",
- bfd->name, (uint8_t *) dp_packet_tail(p) - l7,
+ bfd->name, dp_packet_size(p) -
+ (l7 - (uint8_t *) dp_packet_data(p)),
  BFD_PACKET_LEN);
 goto out;
 }
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 974f985..15d1ed2 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -636,6 +636,8 @@ reverse_pat_packet(struct dp_packet *pkt, const struct conn 
*conn)
 static void
 reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
 {
+void *l3 = dp_packet_linear_ofs(pkt, pkt->l3_ofs);
+void *l4 = dp_packet_linear_ofs(pkt, pkt->l4_ofs);
 char *tail = dp_packet_tail(pkt);
 char pad = dp_packet_l2_pad_size(pkt);
 struct conn_key inner_key;
@@ -644,8 +646,8 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn 
*conn)
 uint16_t orig_l4_ofs = pkt->l4_ofs;
 
 if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
-struct ip_header *nh = dp_packet_l3(pkt);
-struct icmp_header *icmp = dp_packet_l4(pkt);
+struct ip_header *nh = l3;
+struct icmp_header *icmp = l4;
 struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1);
 extract_l3_ipv4(_key, inner_l3, tail - ((char *)inner_l3) - pad,
 _l4, false);
@@ -664,8 +666,8 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn 
*conn)
 icmp->icmp_csum = 0;
 icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad);
 } else {
-struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
-struct icmp6_error_header *icmp6 = dp_packet_l4(pkt);
+struct ovs_16aligned_ip6_hdr *nh6 = l3;
+struct icmp6_error_header *icmp6 = l4;
 struct ovs_16aligned_ip6_hdr *inner_l3_6 =
 (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1);
 extract_l3_ipv6(_key, inner_l3_6,
@@ -1320,6 +1322,7 @@ conntrack_execute(struct conntrack *ct, struct 
dp_packet_batch *pkt_batch,
 write_ct_md(packet, zone, NULL, NULL, NULL);
 continue;
 }
+
 process_one(ct, packet, , zone, force, commit, now, setmark,
 setlabel, nat_action_info, tp_src, tp_dst, helper);
 }
@@ -1902,8 +1905,8 @@ conn_key_extract(struct conntrack *ct, struct dp_packet 
*pkt, ovs_be16 dl_type,

[ovs-dev] [PATCH v8 11/14] netdev-dpdk: support multi-segment jumbo frames

2018-08-20 Thread Tiago Lam
From: Mark Kavanagh 

Currently, jumbo frame support for OvS-DPDK is implemented by
increasing the size of mbufs within a mempool, such that each mbuf
within the pool is large enough to contain an entire jumbo frame of
a user-defined size. Typically, for each user-defined MTU,
'requested_mtu', a new mempool is created, containing mbufs of size
~requested_mtu.

With the multi-segment approach, a port uses a single mempool,
(containing standard/default-sized mbufs of ~2k bytes), irrespective
of the user-requested MTU value. To accommodate jumbo frames, mbufs
are chained together, where each mbuf in the chain stores a portion of
the jumbo frame. Each mbuf in the chain is termed a segment, hence the
name.

== Enabling multi-segment mbufs ==
Multi-segment and single-segment mbufs are mutually exclusive, and the
user must decide on which approach to adopt on init. The introduction
of a new OVSDB field, 'dpdk-multi-seg-mbufs', facilitates this. This
is a global boolean value, which determines how jumbo frames are
represented across all DPDK ports. In the absence of a user-supplied
value, 'dpdk-multi-seg-mbufs' defaults to false, i.e. multi-segment
mbufs must be explicitly enabled / single-segment mbufs remain the
default.

Setting the field is identical to setting existing DPDK-specific OVSDB
fields:

ovs-vsctl set Open_vSwitch . other_config:dpdk-init=true
ovs-vsctl set Open_vSwitch . other_config:dpdk-lcore-mask=0x10
ovs-vsctl set Open_vSwitch . other_config:dpdk-socket-mem=4096,0
==> ovs-vsctl set Open_vSwitch . other_config:dpdk-multi-seg-mbufs=true

Co-authored-by: Tiago Lam 

Signed-off-by: Mark Kavanagh 
Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 Documentation/topics/dpdk/jumbo-frames.rst | 67 ++
 Documentation/topics/dpdk/memory.rst   | 36 
 NEWS   |  1 +
 lib/dpdk.c |  8 
 lib/netdev-dpdk.c  | 66 +
 lib/netdev-dpdk.h  |  2 +
 vswitchd/vswitch.xml   | 22 ++
 7 files changed, 194 insertions(+), 8 deletions(-)

diff --git a/Documentation/topics/dpdk/jumbo-frames.rst 
b/Documentation/topics/dpdk/jumbo-frames.rst
index 00360b4..07bf3ca 100644
--- a/Documentation/topics/dpdk/jumbo-frames.rst
+++ b/Documentation/topics/dpdk/jumbo-frames.rst
@@ -71,3 +71,70 @@ Jumbo frame support has been validated against 9728B frames, 
which is the
 largest frame size supported by Fortville NIC using the DPDK i40e driver, but
 larger frames and other DPDK NIC drivers may be supported. These cases are
 common for use cases involving East-West traffic only.
+
+---
+Multi-segment mbufs
+---
+
+Instead of increasing the size of mbufs within a mempool, such that each mbuf
+within the pool is large enough to contain an entire jumbo frame of a
+user-defined size, mbufs can be chained together instead. In this approach each
+mbuf in the chain stores a portion of the jumbo frame, by default ~2K bytes,
+irrespective of the user-requested MTU value. Since each mbuf in the chain is
+termed a segment, this approach is named "multi-segment mbufs".
+
+This approach may bring more flexibility in use cases where the maximum packet
+length may be hard to guess. For example, in cases where packets originate from
+sources marked for oflload (such as TSO), each packet may be larger than the
+MTU, and as such, when forwarding it to a DPDK port a single mbuf may not be
+enough to hold all of the packet's data.
+
+Multi-segment and single-segment mbufs are mutually exclusive, and the user
+must decide on which approach to adopt on initialisation. If multi-segment
+mbufs is to be enabled, it can be done so with the following command::
+
+$ ovs-vsctl set Open_vSwitch . other_config:dpdk-multi-seg-mbufs=true
+
+Single-segment mbufs still remain the default when using OvS-DPDK, and the
+above option `dpdk-multi-seg-mbufs` must be explicitly set to `true` if
+multi-segment mbufs are to be used.
+
+~
+Performance notes
+~
+
+When using multi-segment mbufs some PMDs may not support vectorized Tx
+functions, due to its non-contiguous nature. As a result this can hit
+performance for smaller packet sizes. For example, on a setup sending 64B
+packets at line rate, a decrease of ~20% has been observed. The performance
+impact stops being noticeable for larger packet sizes, although the exact size
+will between PMDs, and depending on the architecture one's using.
+
+Tests performed with the i40e PMD driver only showed this limitation for 64B
+packets, and the same rate was observed when comparing multi-segment mbufs and
+single-segment mbuf for 128B packets. In other words, the 20% drop in
+performance was not observed for packets >= 128B during this test case.
+
+Because of this, multi-segment mbufs is not advised to be used with smaller
+packet 

[ovs-dev] [PATCH v8 14/14] dpdk-tests: End-to-end tests for multi-seg mbufs.

2018-08-20 Thread Tiago Lam
The following tests are added to the DPDK testsuite to add some
coverage for the multi-segment mbufs:
- Check that multi-segment mbufs are disabled by default;
- Check that providing `other_config:dpdk-multi-seg-mbufs=true` indeed
  enables mbufs;
- Using a DPDK port, send a random packet out and check that `ofctl
  dump-flows` shows the correct amount of packets and bytes sent.

Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 tests/system-dpdk.at | 65 
 1 file changed, 65 insertions(+)

diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at
index 3d21b01..af8de8c 100644
--- a/tests/system-dpdk.at
+++ b/tests/system-dpdk.at
@@ -71,3 +71,68 @@ OVS_VSWITCHD_STOP("/does not exist. The Open vSwitch kernel 
module is probably n
 ")
 AT_CLEANUP
 dnl --
+
+AT_SETUP([Jumbo frames - Multi-segment disabled by default])
+OVS_DPDK_START()
+
+AT_CHECK([grep "multi-segment mbufs enabled" ovs-vswitchd.log], [1], [])
+OVS_VSWITCHD_STOP("/Global register is changed during/d
+/EAL: No free hugepages reported in hugepages-1048576kB/d
+")
+AT_CLEANUP
+
+AT_SETUP([Jumbo frames - Multi-segment enabled])
+OVS_DPDK_START([dpdk-multi-seg-mbufs=true])
+AT_CHECK([grep "multi-segment mbufs enabled" ovs-vswitchd.log], [], [stdout])
+OVS_VSWITCHD_STOP("/Global register is changed during/d
+/EAL: No free hugepages reported in hugepages-1048576kB/d
+")
+AT_CLEANUP
+
+AT_SETUP([Jumbo frames - Multi-segment mbufs Tx])
+OVS_DPDK_PRE_CHECK()
+OVS_DPDK_START([per-port-memory=true dpdk-multi-seg-mbufs=true])
+
+dnl Add userspace bridge and attach it to OVS
+AT_CHECK([ovs-vsctl add-br br10 -- set bridge br10 datapath_type=netdev])
+AT_CHECK([ovs-vsctl add-port br10 dpdk0 \
+-- set Interface dpdk0 type=dpdk options:dpdk-devargs=$(cat PCI_ADDR) \
+-- set Interface dpdk0 mtu_request=9000], [], [stdout], [stderr])
+
+AT_CHECK([ovs-vsctl show], [], [stdout])
+
+dnl Add flows to send packets out from the 'dpdk0' port
+AT_CHECK([
+ovs-ofctl del-flows br10
+ovs-ofctl add-flow br10 in_port=LOCAL,actions=output:dpdk0
+], [], [stdout])
+
+AT_CHECK([ovs-ofctl dump-flows br10], [], [stdout])
+
+dnl Send packet out, of the 'dpdk0' port
+AT_CHECK([
+ARP_HEADER="09000B0009000A000806000108000604000100010A\
+0100020A02"
+dnl Build a random hex string to append to the ARP_HEADER
+RANDOM_BODY=$(printf '0102030405%.0s' {1..1750})
+dnl 8792B ARP packet
+RANDOM_ARP="$ARP_HEADER$RANDOM_BODY"
+
+ovs-ofctl packet-out br10 "packet=$RANDOM_ARP,action=resubmit:LOCAL"
+], [], [stdout])
+
+AT_CHECK([ovs-ofctl dump-flows br10], [0], [stdout])
+
+dnl Confirm the single packet as been sent with correct size
+AT_CHECK([ovs-ofctl dump-flows br10 | ofctl_strip | grep in_port], [0], [dnl
+ n_packets=1, n_bytes=8792, in_port=LOCAL actions=output:1
+])
+
+dnl Clean up
+OVS_VSWITCHD_STOP("/does not exist. The Open vSwitch kernel module is probably 
not loaded./d
+/Failed to enable flow control/d
+/failed to connect to \/tmp\/dpdkvhostclient0: No such file or directory/d
+/Global register is changed during/d
+/EAL: No free hugepages reported in hugepages-1048576kB/d
+")
+AT_CLEANUP
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v8 08/14] dp-packet: copy data from multi-seg. DPDK mbuf

2018-08-20 Thread Tiago Lam
From: Michael Qiu 

When doing packet clone, if packet source is from DPDK driver,
multi-segment must be considered, and copy the segment's data one by
one.

Also, lots of DPDK mbuf's info is missed during a copy, like packet
type, ol_flags, etc.  That information is very important for DPDK to do
packets processing.

Co-authored-by: Mark Kavanagh 
Co-authored-by: Tiago Lam 

Signed-off-by: Michael Qiu 
Signed-off-by: Mark Kavanagh 
Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/dp-packet.c   | 69 ++-
 lib/dp-packet.h   |  3 +++
 lib/netdev-dpdk.c |  1 +
 3 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 167bf43..806640b 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -48,6 +48,22 @@ dp_packet_use__(struct dp_packet *b, void *base, size_t 
allocated,
 dp_packet_set_size(b, 0);
 }
 
+#ifdef DPDK_NETDEV
+void
+dp_packet_copy_mbuf_flags(struct dp_packet *dst, const struct dp_packet *src)
+{
+ovs_assert(dst != NULL && src != NULL);
+struct rte_mbuf *buf_dst = &(dst->mbuf);
+struct rte_mbuf buf_src = src->mbuf;
+
+buf_dst->ol_flags = buf_src.ol_flags;
+buf_dst->packet_type = buf_src.packet_type;
+buf_dst->tx_offload = buf_src.tx_offload;
+}
+#else
+#define dp_packet_copy_mbuf_flags(arg1, arg2)
+#endif
+
 /* Initializes 'b' as an empty dp_packet that contains the 'allocated' bytes of
  * memory starting at 'base'.  'base' should be the first byte of a region
  * obtained from malloc().  It will be freed (with free()) if 'b' is resized or
@@ -158,6 +174,44 @@ dp_packet_clone(const struct dp_packet *buffer)
 return dp_packet_clone_with_headroom(buffer, 0);
 }
 
+#ifdef DPDK_NETDEV
+struct dp_packet *
+dp_packet_clone_with_headroom(const struct dp_packet *b, size_t headroom) {
+struct dp_packet *new_buffer;
+uint32_t pkt_len = dp_packet_size(b);
+
+/* copy multi-seg data */
+if (b->source == DPBUF_DPDK && !rte_pktmbuf_is_contiguous(>mbuf)) {
+void *dst = NULL;
+struct rte_mbuf *mbuf = CONST_CAST(struct rte_mbuf *, >mbuf);
+
+new_buffer = dp_packet_new_with_headroom(pkt_len, headroom);
+dst = dp_packet_data(new_buffer);
+dp_packet_set_size(new_buffer, pkt_len);
+
+if (!rte_pktmbuf_read(mbuf, 0, pkt_len, dst)) {
+return NULL;
+}
+} else {
+new_buffer = dp_packet_clone_data_with_headroom(dp_packet_data(b),
+dp_packet_size(b),
+headroom);
+}
+
+/* Copy the following fields into the returned buffer: l2_pad_size,
+ * l2_5_ofs, l3_ofs, l4_ofs, cutlen, packet_type and md. */
+memcpy(_buffer->l2_pad_size, >l2_pad_size,
+   sizeof(struct dp_packet) -
+   offsetof(struct dp_packet, l2_pad_size));
+
+dp_packet_copy_mbuf_flags(new_buffer, b);
+if (dp_packet_rss_valid(new_buffer)) {
+new_buffer->mbuf.hash.rss = b->mbuf.hash.rss;
+}
+
+return new_buffer;
+}
+#else
 /* Creates and returns a new dp_packet whose data are copied from 'buffer'.
  * The returned dp_packet will additionally have 'headroom' bytes of
  * headroom. */
@@ -165,32 +219,25 @@ struct dp_packet *
 dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom)
 {
 struct dp_packet *new_buffer;
+uint32_t pkt_len = dp_packet_size(buffer);
 
 new_buffer = dp_packet_clone_data_with_headroom(dp_packet_data(buffer),
- dp_packet_size(buffer),
- headroom);
+ pkt_len, headroom);
+
 /* Copy the following fields into the returned buffer: l2_pad_size,
  * l2_5_ofs, l3_ofs, l4_ofs, cutlen, packet_type and md. */
 memcpy(_buffer->l2_pad_size, >l2_pad_size,
 sizeof(struct dp_packet) -
 offsetof(struct dp_packet, l2_pad_size));
 
-#ifdef DPDK_NETDEV
-new_buffer->mbuf.ol_flags = buffer->mbuf.ol_flags;
-#else
 new_buffer->rss_hash_valid = buffer->rss_hash_valid;
-#endif
-
 if (dp_packet_rss_valid(new_buffer)) {
-#ifdef DPDK_NETDEV
-new_buffer->mbuf.hash.rss = buffer->mbuf.hash.rss;
-#else
 new_buffer->rss_hash = buffer->rss_hash;
-#endif
 }
 
 return new_buffer;
 }
+#endif
 
 /* Creates and returns a new dp_packet that initially contains a copy of the
  * 'size' bytes of data starting at 'data' with no headroom or tailroom. */
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 3a99044..022e420 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -124,6 +124,9 @@ void dp_packet_init_dpdk(struct dp_packet *);
 void dp_packet_init(struct dp_packet *, size_t);
 void dp_packet_uninit(struct dp_packet *);
 
+void dp_packet_copy_mbuf_flags(struct dp_packet *dst,
+   const struct dp_packet *src);
+
 

[ovs-dev] [PATCH v8 05/14] dp-packet: Fix data_len handling multi-seg mbufs.

2018-08-20 Thread Tiago Lam
When a dp_packet is from a DPDK source, and it contains multi-segment
mbufs, the data_len is not equal to the packet size, pkt_len. Instead,
the data_len of each mbuf in the chain should be considered while
distributing the new (provided) size.

To account for the above dp_packet_set_size() has been changed so that,
in the multi-segment mbufs case, only the data_len on the last mbuf of
the chain and the total size of the packet, pkt_len, are changed. The
data_len on the intermediate mbufs preceeding the last mbuf is not
changed by dp_packet_set_size(). Furthermore, in some cases
dp_packet_set_size() may be used to set a smaller size than the current
packet size, thus effectively trimming the end of the packet. In the
multi-segment mbufs case this may lead to lingering mbufs that may need
freeing.

__dp_packet_set_data() now also updates an mbufs' data_len after setting
the data offset. This is so that both fields are always in sync for each
mbuf in a chain.

Co-authored-by: Michael Qiu 
Co-authored-by: Mark Kavanagh 
Co-authored-by: Przemyslaw Lal 
Co-authored-by: Marcin Ksiadz 
Co-authored-by: Yuanhan Liu 

Signed-off-by: Michael Qiu 
Signed-off-by: Mark Kavanagh 
Signed-off-by: Przemyslaw Lal 
Signed-off-by: Marcin Ksiadz 
Signed-off-by: Yuanhan Liu 
Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/dp-packet.h | 76 -
 1 file changed, 64 insertions(+), 12 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 6376039..d2803af 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -429,17 +429,49 @@ dp_packet_size(const struct dp_packet *b)
 static inline void
 dp_packet_set_size(struct dp_packet *b, uint32_t v)
 {
-/* netdev-dpdk does not currently support segmentation; consequently, for
- * all intents and purposes, 'data_len' (16 bit) and 'pkt_len' (32 bit) may
- * be used interchangably.
- *
- * On the datapath, it is expected that the size of packets
- * (and thus 'v') will always be <= UINT16_MAX; this means that there is no
- * loss of accuracy in assigning 'v' to 'data_len'.
- */
-b->mbuf.data_len = (uint16_t)v;  /* Current seg length. */
-b->mbuf.pkt_len = v; /* Total length of all segments linked to
-  * this segment. */
+if (b->source == DPBUF_DPDK) {
+struct rte_mbuf *mbuf = >mbuf;
+uint16_t new_len = v;
+uint16_t data_len;
+uint16_t nb_segs = 0;
+uint16_t pkt_len = 0;
+
+/* Trim 'v' length bytes from the end of the chained buffers, freeing
+   any buffers that may be left floating */
+while (mbuf) {
+data_len = MIN(new_len, mbuf->data_len);
+mbuf->data_len = data_len;
+
+if (new_len - data_len <= 0) {
+/* Free the rest of chained mbufs */
+free_dpdk_buf(CONTAINER_OF(mbuf->next, struct dp_packet,
+   mbuf));
+mbuf->next = NULL;
+} else if (!mbuf->next) {
+/* Don't assign more than what we have available */
+mbuf->data_len = MIN(new_len,
+ mbuf->buf_len - mbuf->data_off);
+}
+
+new_len -= data_len;
+nb_segs += 1;
+pkt_len += mbuf->data_len;
+mbuf = mbuf->next;
+}
+
+/* pkt_len != v would effectively mean that pkt_len < than 'v' (as
+ * being bigger is logically impossible). Being < than 'v' would mean
+ * the 'v' provided was bigger than the available room, which is the
+ * responsibility of the caller to make sure there is enough room */
+ovs_assert(pkt_len == v);
+
+b->mbuf.nb_segs = nb_segs;
+b->mbuf.pkt_len = pkt_len;
+} else {
+b->mbuf.data_len = v;
+/* Total length of all segments linked to this segment. */
+b->mbuf.pkt_len = v;
+}
 }
 
 static inline uint16_t
@@ -451,7 +483,27 @@ __packet_data(const struct dp_packet *b)
 static inline void
 __packet_set_data(struct dp_packet *b, uint16_t v)
 {
-b->mbuf.data_off = v;
+if (b->source == DPBUF_DPDK) {
+/* Moving data_off away from the first mbuf in the chain is not a
+ * possibility using DPBUF_DPDK dp_packets */
+ovs_assert(v == UINT16_MAX || v <= b->mbuf.buf_len);
+
+uint16_t prev_ofs = b->mbuf.data_off;
+b->mbuf.data_off = v;
+int16_t ofs_diff = prev_ofs - b->mbuf.data_off;
+
+/* When dealing with DPDK mbufs, keep data_off and data_len in sync.
+ * Thus, update data_len if the length changes with the move of
+ * data_off. However, if data_len is 0, there's no data to move and
+ * data_len should remain 0. */
+
+if (b->mbuf.data_len != 0) {
+b->mbuf.data_len = MIN(b->mbuf.data_len + ofs_diff,
+   b->mbuf.buf_len - 

[ovs-dev] [PATCH v8 07/14] dp-packet: Handle multi-seg mubfs in shift() func.

2018-08-20 Thread Tiago Lam
In its current implementation dp_packet_shift() is also unaware of
multi-seg mbufs (that holds data in memory non-contiguously) and assumes
that data exists contiguously in memory, memmove'ing data to perform the
shift.

To add support for multi-seg mbuds a new set of functions was
introduced, dp_packet_mbuf_shift() and dp_packet_mbuf_write(). These
functions are used by dp_packet_shift(), when handling multi-seg mbufs,
to shift and write data within a chain of mbufs.

Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/dp-packet.c | 100 
 lib/dp-packet.h |  10 ++
 2 files changed, 110 insertions(+)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 2aaeaae..167bf43 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -294,6 +294,100 @@ dp_packet_prealloc_headroom(struct dp_packet *b, size_t 
size)
 }
 }
 
+#ifdef DPDK_NETDEV
+/* Write len data bytes in a mbuf at specified offset.
+ *
+ * 'mbuf', pointer to the destination mbuf where 'ofs' is, and the mbuf where
+ * the data will first be written.
+ * 'ofs', the offset within the provided 'mbuf' where 'data' is to be written.
+ * 'len', the size of the to be written 'data'.
+ * 'data', pointer to the to be written bytes.
+ *
+ * XXX: This function is the counterpart of the `rte_pktmbuf_read()` function
+ * available with DPDK, in the rte_mbuf.h */
+void
+dp_packet_mbuf_write(struct rte_mbuf *mbuf, int16_t ofs, uint32_t len,
+ const void *data)
+{
+char *dst_addr;
+uint16_t data_len;
+int len_copy;
+while (mbuf) {
+if (len == 0) {
+break;
+}
+
+dst_addr = rte_pktmbuf_mtod_offset(mbuf, char *, ofs);
+data_len = MBUF_BUF_END(mbuf->buf_addr, mbuf->buf_len) - dst_addr;
+
+len_copy = MIN(len, data_len);
+/* We don't know if 'data' is the result of a rte_pktmbuf_read() call,
+ * in which case we may end up writing to the same region of memory we
+ * are reading from and overlapping. Hence the use of memmove() here */
+memmove(dst_addr, data, len_copy);
+
+data = ((char *) data) + len_copy;
+len -= len_copy;
+ofs = 0;
+
+mbuf->data_len = len_copy;
+mbuf = mbuf->next;
+}
+}
+
+static void
+dp_packet_mbuf_shift_(struct rte_mbuf *dbuf, int16_t dst_ofs,
+  const struct rte_mbuf *sbuf, uint16_t src_ofs, int len)
+{
+char *rd = xmalloc(sizeof(*rd) * len);
+const char *wd = rte_pktmbuf_read(sbuf, src_ofs, len, rd);
+
+ovs_assert(wd);
+
+dp_packet_mbuf_write(dbuf, dst_ofs, len, wd);
+
+free(rd);
+}
+
+/* Similarly to dp_packet_shift(), shifts the data within the mbufs of a
+ * dp_packet of DPBUF_DPDK source by 'delta' bytes.
+ * Caller must make sure of the following conditions:
+ * - When shifting left, delta can't be bigger than the data_len available in
+ *   the last mbuf;
+ * - When shifting right, delta can't be bigger than the space available in the
+ *   first mbuf (buf_len - data_off).
+ * Both these conditions guarantee that a shift operation doesn't fall outside
+ * the bounds of the existing mbufs, so that the first and last mbufs (when
+ * using multi-segment mbufs), remain the same. */
+static void
+dp_packet_mbuf_shift(struct dp_packet *b, int delta)
+{
+uint16_t src_ofs;
+int16_t dst_ofs;
+
+struct rte_mbuf *mbuf = CONST_CAST(struct rte_mbuf *, >mbuf);
+struct rte_mbuf *tmbuf = rte_pktmbuf_lastseg(mbuf);
+
+if (delta < 0) {
+ovs_assert(-delta <= tmbuf->data_len);
+} else {
+ovs_assert(delta < (mbuf->buf_len - mbuf->data_off));
+}
+
+/* Set the destination and source offsets to copy to */
+dst_ofs = delta;
+src_ofs = 0;
+
+/* Shift data from src mbuf and offset to dst mbuf and offset */
+dp_packet_mbuf_shift_(mbuf, dst_ofs, mbuf, src_ofs,
+  rte_pktmbuf_pkt_len(mbuf));
+
+/* Update mbufs' properties, and if using multi-segment mbufs, first and
+ * last mbuf's data_len also needs to be adjusted */
+mbuf->data_off = mbuf->data_off + dst_ofs;
+}
+#endif
+
 /* Shifts all of the data within the allocated space in 'b' by 'delta' bytes.
  * For example, a 'delta' of 1 would cause each byte of data to move one byte
  * forward (from address 'p' to 'p+1'), and a 'delta' of -1 would cause each
@@ -306,6 +400,12 @@ dp_packet_shift(struct dp_packet *b, int delta)
: true);
 
 if (delta != 0) {
+#ifdef DPDK_NETDEV
+if (b->source == DPBUF_DPDK) {
+dp_packet_mbuf_shift(b, delta);
+return;
+}
+#endif
 char *dst = (char *) dp_packet_data(b) + delta;
 memmove(dst, dp_packet_data(b), dp_packet_size(b));
 dp_packet_set_data(b, dst);
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 48be19b..3a99044 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -80,6 +80,11 @@ struct dp_packet {
 };
 };
 
+#ifdef 

[ovs-dev] [PATCH v8 13/14] dpdk-tests: Accept other configs in OVS_DPDK_START

2018-08-20 Thread Tiago Lam
As it stands, OVS_DPDK_START() won't allow other configs to be set
before starting the ovs-vswitchd daemon. This is a problem since some
configs, such as the "dpdk-multi-seg-mbufs=true" for enabling the
multi-segment mbufs, need to be set prior to start OvS.

To support other options, OVS_DPDK_START() has been modified to accept
extra configs in the form "$config_name=$config_value". It then uses
ovs-vsctl to set the configs.

Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 tests/system-dpdk-macros.at | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/system-dpdk-macros.at b/tests/system-dpdk-macros.at
index 0762ee0..7c65834 100644
--- a/tests/system-dpdk-macros.at
+++ b/tests/system-dpdk-macros.at
@@ -21,7 +21,7 @@ m4_define([OVS_DPDK_PRE_CHECK],
 ])
 
 
-# OVS_DPDK_START()
+# OVS_DPDK_START([other-conf-args])
 #
 # Create an empty database and start ovsdb-server. Add special configuration
 # dpdk-init to enable DPDK functionality. Start ovs-vswitchd connected to that
@@ -48,6 +48,10 @@ m4_define([OVS_DPDK_START],
AT_CHECK([lscpu], [], [stdout])
AT_CHECK([cat stdout | grep "NUMA node(s)" | awk '{c=1; while (c++<$(3)) 
{printf "1024,"}; print "1024"}' > SOCKET_MEM])
AT_CHECK([ovs-vsctl --no-wait set Open_vSwitch . 
other_config:dpdk-socket-mem="$(cat SOCKET_MEM)"])
+   dnl Iterate through $other-conf-args list and include them
+   m4_foreach_w(opt, $1, [
+   AT_CHECK([ovs-vsctl --no-wait set Open_vSwitch . other_config:opt])
+   ])
 
dnl Start ovs-vswitchd.
AT_CHECK([ovs-vswitchd --detach --no-chdir --pidfile --log-file -vvconn 
-vofproto_dpif -vunixctl], [0], [stdout], [stderr])
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v8 10/14] netdev-dpdk: copy large packet to multi-seg. mbufs

2018-08-20 Thread Tiago Lam
From: Mark Kavanagh 

Currently, packets are only copied to a single segment in the function
dpdk_do_tx_copy(). This could be an issue in the case of jumbo frames,
particularly when multi-segment mbufs are involved.

This patch calculates the number of segments needed by a packet and
copies the data to each segment.

A new function, dpdk_buf_alloc(), has also been introduced as a wrapper
around the nonpmd_mp_mutex to serialise allocations from a non-pmd
context.

Co-authored-by: Michael Qiu 
Co-authored-by: Tiago Lam 

Signed-off-by: Mark Kavanagh 
Signed-off-by: Michael Qiu 
Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/netdev-dpdk.c | 84 +--
 1 file changed, 75 insertions(+), 9 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index e005d00..e2df825 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -552,6 +552,27 @@ dpdk_rte_mzalloc(size_t sz)
 return rte_zmalloc(OVS_VPORT_DPDK, sz, OVS_CACHE_LINE_SIZE);
 }
 
+static struct rte_mbuf *
+dpdk_buf_alloc(struct rte_mempool *mp)
+{
+struct rte_mbuf *mbuf = NULL;
+
+/* If non-pmd we need to lock on nonpmd_mp_mutex mutex */
+if (!dpdk_thread_is_pmd()) {
+ovs_mutex_lock(_mp_mutex);
+
+mbuf = rte_pktmbuf_alloc(mp);
+
+ovs_mutex_unlock(_mp_mutex);
+
+return mbuf;
+}
+
+mbuf = rte_pktmbuf_alloc(mp);
+
+return mbuf;
+}
+
 void
 free_dpdk_buf(struct dp_packet *packet)
 {
@@ -2316,6 +2337,49 @@ out:
 }
 }
 
+static int
+dpdk_copy_dp_packet_to_mbuf(struct dp_packet *packet, struct rte_mbuf **head,
+struct rte_mempool *mp)
+{
+struct rte_mbuf *mbuf, *fmbuf;
+uint32_t size = dp_packet_size(packet);
+uint16_t max_data_len;
+uint32_t nb_segs = 0;
+
+/* Allocate first mbuf to know the size of data available */
+fmbuf = mbuf = *head = dpdk_buf_alloc(mp);
+if (OVS_UNLIKELY(!mbuf)) {
+return ENOMEM;
+}
+
+/* All new allocated mbuf's max data len is the same */
+max_data_len = mbuf->buf_len - mbuf->data_off;
+
+/* Calculate # of output mbufs. */
+nb_segs = size / max_data_len;
+if (size % max_data_len) {
+nb_segs = nb_segs + 1;
+}
+
+/* Allocate additional mbufs, less the one alredy allocated above */
+for (int i = 1; i < nb_segs; i++) {
+mbuf->next = dpdk_buf_alloc(mp);
+if (!mbuf->next) {
+free_dpdk_buf(CONTAINER_OF(fmbuf, struct dp_packet, mbuf));
+fmbuf = NULL;
+return ENOMEM;
+}
+mbuf = mbuf->next;
+}
+
+fmbuf->nb_segs = nb_segs;
+fmbuf->pkt_len = size;
+
+dp_packet_mbuf_write(fmbuf, 0, size, dp_packet_data(packet));
+
+return 0;
+}
+
 /* Tx function. Transmit packets indefinitely */
 static void
 dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
@@ -2332,6 +2396,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
 uint32_t cnt = batch_cnt;
 uint32_t dropped = 0;
+uint32_t i;
 
 if (dev->type != DPDK_DEV_VHOST) {
 /* Check if QoS has been configured for this netdev. */
@@ -2342,28 +2407,29 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 
 uint32_t txcnt = 0;
 
-for (uint32_t i = 0; i < cnt; i++) {
+for (i = 0; i < cnt; i++) {
 struct dp_packet *packet = batch->packets[i];
 uint32_t size = dp_packet_size(packet);
+int err = 0;
 
 if (OVS_UNLIKELY(size > dev->max_packet_len)) {
 VLOG_WARN_RL(, "Too big size %u max_packet_len %d",
  size, dev->max_packet_len);
-
 dropped++;
 continue;
 }
 
-pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
-if (OVS_UNLIKELY(!pkts[txcnt])) {
+err = dpdk_copy_dp_packet_to_mbuf(packet, [txcnt],
+  dev->dpdk_mp->mp);
+if (err != 0) {
+if (err == ENOMEM) {
+VLOG_ERR_RL(, "Failed to alloc mbufs! %u packets dropped",
+cnt - i);
+}
+
 dropped += cnt - i;
 break;
 }
-
-/* We have to do a copy for now */
-memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
-   dp_packet_data(packet), size);
-dp_packet_set_size((struct dp_packet *)pkts[txcnt], size);
 dp_packet_copy_mbuf_flags((struct dp_packet *)pkts[txcnt], packet);
 
 txcnt++;
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v8 06/14] dp-packet: Handle multi-seg mbufs in helper funcs.

2018-08-20 Thread Tiago Lam
Most helper functions in dp-packet assume that the data held by a
dp_packet is contiguous, and perform operations such as pointer
arithmetic under that assumption. However, with the introduction of
multi-segment mbufs, where data is non-contiguous, such assumptions are
no longer possible. Some examples of Such helper functions are
dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(),
dp_packet_get_allocated() and dp_packet_at().

Thus, instead of assuming contiguous data in dp_packet, they  now
iterate over the (non-contiguous) data in mbufs to perform their
calculations.

Finally, dp_packet_use__() has also been modified to perform the
initialisation of the packet (and setting the source) before continuing
to set its size and data length, which now depends on the type of
packet.

Co-authored-by: Mark Kavanagh 

Signed-off-by: Mark Kavanagh 
Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/dp-packet.c |   4 +-
 lib/dp-packet.h | 150 +++-
 2 files changed, 140 insertions(+), 14 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 782e7c2..2aaeaae 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -41,11 +41,11 @@ static void
 dp_packet_use__(struct dp_packet *b, void *base, size_t allocated,
  enum dp_packet_source source)
 {
+dp_packet_init__(b, allocated, source);
+
 dp_packet_set_base(b, base);
 dp_packet_set_data(b, base);
 dp_packet_set_size(b, 0);
-
-dp_packet_init__(b, allocated, source);
 }
 
 /* Initializes 'b' as an empty dp_packet that contains the 'allocated' bytes of
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index d2803af..48be19b 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -185,9 +185,25 @@ dp_packet_delete(struct dp_packet *b)
 static inline void *
 dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
 {
-return offset + size <= dp_packet_size(b)
-   ? (char *) dp_packet_data(b) + offset
-   : NULL;
+if (offset + size > dp_packet_size(b)) {
+return NULL;
+}
+
+#ifdef DPDK_NETDEV
+if (b->source == DPBUF_DPDK) {
+struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, >mbuf);
+
+while (buf && offset > buf->data_len) {
+offset -= buf->data_len;
+
+buf = buf->next;
+}
+
+return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) : NULL;
+}
+#endif
+
+return (char *) dp_packet_data(b) + offset;
 }
 
 /* Returns a pointer to byte 'offset' in 'b', which must contain at least
@@ -196,13 +212,23 @@ static inline void *
 dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size)
 {
 ovs_assert(offset + size <= dp_packet_size(b));
-return ((char *) dp_packet_data(b)) + offset;
+return dp_packet_at(b, offset, size);
 }
 
 /* Returns a pointer to byte following the last byte of data in use in 'b'. */
 static inline void *
 dp_packet_tail(const struct dp_packet *b)
 {
+#ifdef DPDK_NETDEV
+if (b->source == DPBUF_DPDK) {
+struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, >mbuf);
+/* Find last segment where data ends, meaning the tail of the chained
+ *  mbufs must be there */
+buf = rte_pktmbuf_lastseg(buf);
+
+return rte_pktmbuf_mtod_offset(buf, void *, buf->data_len);
+}
+#endif
 return (char *) dp_packet_data(b) + dp_packet_size(b);
 }
 
@@ -211,6 +237,15 @@ dp_packet_tail(const struct dp_packet *b)
 static inline void *
 dp_packet_end(const struct dp_packet *b)
 {
+#ifdef DPDK_NETDEV
+if (b->source == DPBUF_DPDK) {
+struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &(b->mbuf));
+
+buf = rte_pktmbuf_lastseg(buf);
+
+return (char *) buf->buf_addr + buf->buf_len;
+}
+#endif
 return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
 }
 
@@ -236,6 +271,15 @@ dp_packet_tailroom(const struct dp_packet *b)
 static inline void
 dp_packet_clear(struct dp_packet *b)
 {
+#ifdef DPDK_NETDEV
+if (b->source == DPBUF_DPDK) {
+/* sets pkt_len and data_len to zero and frees unused mbufs */
+dp_packet_set_size(b, 0);
+rte_pktmbuf_reset(>mbuf);
+
+return;
+}
+#endif
 dp_packet_set_data(b, dp_packet_base(b));
 dp_packet_set_size(b, 0);
 }
@@ -252,24 +296,38 @@ dp_packet_pull(struct dp_packet *b, size_t size)
 return data;
 }
 
+#ifdef DPDK_NETDEV
+/* Similar to dp_packet_try_pull() but doesn't actually pull any data, only
+ * checks if it could and returns true or false accordingly.
+ *
+ * Valid for dp_packets carrying mbufs only. */
+static inline bool
+dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) {
+if (size > b->mbuf.data_len) {
+return false;
+}
+
+return true;
+}
+#endif
+
 /* If 'b' has at least 'size' bytes of data, removes that many bytes from the
  * head end of 'b' and returns the first byte removed.  Otherwise, returns a
  * null pointer without modifying 

[ovs-dev] [PATCH v8 04/14] netdev-dpdk: Serialise non-pmds mbufs' alloc/free.

2018-08-20 Thread Tiago Lam
A new mutex, 'nonpmd_mp_mutex', has been introduced to serialise
allocation and free operations by non-pmd threads on a given mempool.

free_dpdk_buf() has been modified to make use of the introduced mutex.

Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/netdev-dpdk.c | 33 ++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index ebd55e9..aee8e20 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -322,6 +322,16 @@ static struct ovs_mutex dpdk_mp_mutex 
OVS_ACQ_AFTER(dpdk_mutex)
 static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex)
 = OVS_LIST_INITIALIZER(_mp_list);
 
+/* This mutex must be used by non pmd threads when allocating or freeing
+ * mbufs through mempools, when outside of the `non_pmd_mutex` mutex, in struct
+ * dp_netdev.
+ * The reason, as pointed out in the "Known Issues" section in DPDK's EAL docs,
+ * is that the implementation on which mempool is based off is non-preemptable.
+ * Since non-pmds may end up not being pinned this could lead to the preemption
+ * between non-pmds performing operations on the same mempool, which could lead
+ * to memory corruption. */
+static struct ovs_mutex nonpmd_mp_mutex = OVS_MUTEX_INITIALIZER;
+
 struct dpdk_mp {
  struct rte_mempool *mp;
  int mtu;
@@ -492,6 +502,8 @@ struct netdev_rxq_dpdk {
 dpdk_port_t port_id;
 };
 
+static bool dpdk_thread_is_pmd(void);
+
 static void netdev_dpdk_destruct(struct netdev *netdev);
 static void netdev_dpdk_vhost_destruct(struct netdev *netdev);
 
@@ -525,6 +537,12 @@ dpdk_buf_size(int mtu)
  NETDEV_DPDK_MBUF_ALIGN);
 }
 
+static bool
+dpdk_thread_is_pmd(void)
+{
+ return rte_lcore_id() != NON_PMD_CORE_ID;
+}
+
 /* Allocates an area of 'sz' bytes from DPDK.  The memory is zero'ed.
  *
  * Unlike xmalloc(), this function can return NULL on failure. */
@@ -535,11 +553,20 @@ dpdk_rte_mzalloc(size_t sz)
 }
 
 void
-free_dpdk_buf(struct dp_packet *p)
+free_dpdk_buf(struct dp_packet *packet)
 {
-struct rte_mbuf *pkt = (struct rte_mbuf *) p;
+/* If non-pmd we need to lock on nonpmd_mp_mutex mutex */
+if (!dpdk_thread_is_pmd()) {
+ovs_mutex_lock(_mp_mutex);
+
+rte_pktmbuf_free(>mbuf);
+
+ovs_mutex_unlock(_mp_mutex);
+
+return;
+}
 
-rte_pktmbuf_free(pkt);
+rte_pktmbuf_free(>mbuf);
 }
 
 static void
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v8 00/14] Support multi-segment mbufs

2018-08-20 Thread Tiago Lam
Overview

This patchset introduces support for multi-segment mbufs to OvS-DPDK.
Multi-segment mbufs are typically used when the size of an mbuf is
insufficient to contain the entirety of a packet's data. Instead, the
data is split across numerous mbufs, each carrying a portion, or
'segment', of the packet data. Mbufs are chained via their 'next'
attribute (an mbuf pointer).

The main motivation behind the support for multi-segment mbufs is to
later introduce TSO (use case i. below) / GRO in OvS-DPDK, which is
planned to be introduced after this series.

Use Cases
=
i.  Handling oversized (guest-originated) frames, which are marked
for hardware accelration/offload (TSO, for example).

Packets which originate from a non-DPDK source may be marked for
offload; as such, they may be larger than the permitted ingress
interface's MTU, and may be stored in an oversized dp-packet. In
order to transmit such packets over a DPDK port, their contents
must be copied to a DPDK mbuf (via dpdk_do_tx_copy). However, in
its current implementation, that function only copies data into
a single mbuf; if the space available in the mbuf is exhausted,
but not all packet data has been copied, then it is lost.
Similarly, when cloning a DPDK mbuf, it must be considered
whether that mbuf contains multiple segments. Both issues are
resolved within this patchset.

ii. Handling jumbo frames.

While OvS already supports jumbo frames, it does so by increasing
mbuf size, such that the entirety of a jumbo frame may be handled
in a single mbuf. This is certainly the preferred, and most
performant approach (and remains the default).

Enabling multi-segment mbufs

Multi-segment and single-segment mbufs are mutually exclusive, and the
user must decide on which approach to adopt on init. The introduction
of a new OVSDB field, 'dpdk-multi-seg-mbufs', facilitates this.

This is a global boolean value, which determines how jumbo frames are
represented across all DPDK ports. In the absence of a user-supplied
value, 'dpdk-multi-seg-mbufs' defaults to false, i.e. multi-segment
mbufs must be explicitly enabled / single-segment mbufs remain the
default.

Setting the field is identical to setting existing DPDK-specific OVSDB
fields:

ovs-vsctl set Open_vSwitch . other_config:dpdk-init=true
ovs-vsctl set Open_vSwitch . other_config:dpdk-lcore-mask=0x10
ovs-vsctl set Open_vSwitch . other_config:dpdk-socket-mem=4096,0
==> ovs-vsctl set Open_vSwitch . other_config:dpdk-multi-seg-mbufs=true

Performance notes (based on v8, 1st non-RFC)
=
In order to test for regressions in performance, tests were run on top
of master 88125d6 and v8 of this patchset, both with the multi-segment
mbufs option enabled and disabled.

VSperf was used to run the phy2phy_cont and pvp_cont tests with varying
packet sizes of 64B, 1500B and 7000B, on a 10Gbps interface.

Test | Size | Master | Multi-seg disabled | Multi-seg enabled
-
p2p  |  64  | ~22.7  |  ~22.65|   ~18.3
p2p  | 1500 |  ~1.6  |~1.6|~1.6
p2p  | 7000 | ~0.36  |   ~0.36|   ~0.36
pvp  |  64  |  ~6.7  |~6.7|~6.3
pvp  | 1500 |  ~1.6  |~1.6|~1.6
pvp  | 7000 | ~0.36  |   ~0.36|   ~0.36

Packet size is in bytes, while all packet rates are reported in mpps
(aggregated).

No noticeable regression has been observed (certainly everything is
within the ± 5% margin of existing performance), aside from the 64B
packet size case when multi-segment mbuf is enabled. This is
expected, however, because of how Tx vectoriszed functions are
incompatible with multi-segment mbufs on some PMDs. The PMD under
use during these tests was the i40e (on a Intel X710 NIC), which
indeed doesn't support vectorized Tx functions with multi-segment
mbufs.

---
v8: - Rebase on master 1dd218a ("ovsdb-idl: Fix recently introduced Python 3
  tests.");
- Address Ian's comment:
  - Fix sparse warnings on patch 07/14 and 12/14 by allocating memory
dynamically.
- Address Ilya's comments:
  - netdev_linux_tap_batch_send() and udp_extract_tnl_md() now linearize
the data before hand, beforing write()'ing or performing the checksums
in the data;
  - Some other cases have been found and adapted; The new patch 09/14
introduced in the series is where the "linearization" logic is
introduced and, as a consequence, some users of the dp_packet API,
which were assuming the data is held contiguously in memory, are
changed to use the new APIs.
- Add support for multi-segment mbufs to dp_packet_equal() (patch 06/14);
- Fix a bug in patch 08/14 where the call to dp_packet_copy_mbuf_flags() in
  dp_packet_clone_with_headroom() was setting incorrectly the nb_segs field
  on the 

[ovs-dev] [PATCH v8 03/14] dp-packet: Fix allocated size on DPDK init.

2018-08-20 Thread Tiago Lam
When enabled with DPDK OvS deals with two types of packets, the ones
coming from the mempool and the ones locally created by OvS - which are
copied to mempool mbufs before output. In the latter, the space is
allocated from the system, while in the former the mbufs are allocated
from a mempool, which takes care of initialising them appropriately.

In the current implementation, during mempool's initialisation of mbufs,
dp_packet_set_allocated() is called from dp_packet_init_dpdk() without
considering that the allocated space, in the case of multi-segment
mbufs, might be greater than a single mbuf.  Furthermore, given that
dp_packet_init_dpdk() is on the code path that's called upon mempool's
initialisation, a call to dp_packet_set_allocated() is redundant, since
mempool takes care of initialising it.

To fix this, dp_packet_set_allocated() is no longer called after
initialisation of a mempool, only in dp_packet_init__(), which is still
called by OvS when initialising locally created packets.

Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/dp-packet.c   | 3 +--
 lib/dp-packet.h   | 2 +-
 lib/netdev-dpdk.c | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 443c225..782e7c2 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -99,9 +99,8 @@ dp_packet_use_const(struct dp_packet *b, const void *data, 
size_t size)
  * buffer.  Here, non-transient ovs dp-packet fields are initialized for
  * packets that are part of a DPDK memory pool. */
 void
-dp_packet_init_dpdk(struct dp_packet *b, size_t allocated)
+dp_packet_init_dpdk(struct dp_packet *b)
 {
-dp_packet_set_allocated(b, allocated);
 b->source = DPBUF_DPDK;
 }
 
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index b948fe1..6376039 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -114,7 +114,7 @@ void dp_packet_use(struct dp_packet *, void *, size_t);
 void dp_packet_use_stub(struct dp_packet *, void *, size_t);
 void dp_packet_use_const(struct dp_packet *, const void *, size_t);
 
-void dp_packet_init_dpdk(struct dp_packet *, size_t allocated);
+void dp_packet_init_dpdk(struct dp_packet *);
 
 void dp_packet_init(struct dp_packet *, size_t);
 void dp_packet_uninit(struct dp_packet *);
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 0cd9ff6..ebd55e9 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -550,7 +550,7 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp OVS_UNUSED,
 {
 struct rte_mbuf *pkt = _p;
 
-dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len);
+dp_packet_init_dpdk((struct dp_packet *) pkt);
 }
 
 static int
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v8 01/14] netdev-dpdk: fix mbuf sizing

2018-08-20 Thread Tiago Lam
From: Mark Kavanagh 

There are numerous factors that must be considered when calculating
the size of an mbuf:
- the data portion of the mbuf must be sized in accordance With Rx
  buffer alignment (typically 1024B). So, for example, in order to
  successfully receive and capture a 1500B packet, mbufs with a
  data portion of size 2048B must be used.
- in OvS, the elements that comprise an mbuf are:
  * the dp packet, which includes a struct rte mbuf (704B)
  * RTE_PKTMBUF_HEADROOM (128B)
  * packet data (aligned to 1k, as previously described)
  * RTE_PKTMBUF_TAILROOM (typically 0)

Some PMDs require that the total mbuf size (i.e. the total sum of all
of the above-listed components' lengths) is cache-aligned. To satisfy
this requirement, it may be necessary to round up the total mbuf size
with respect to cacheline size. In doing so, it's possible that the
dp_packet's data portion is inadvertently increased in size, such that
it no longer adheres to Rx buffer alignment. Consequently, the
following property of the mbuf no longer holds true:

mbuf.data_len == mbuf.buf_len - mbuf.data_off

This creates a problem in the case of multi-segment mbufs, where that
assumption is assumed to be true for all but the final segment in an
mbuf chain. Resolve this issue by adjusting the size of the mbuf's
private data portion, as opposed to the packet data portion when
aligning mbuf size to cachelines.

Fixes: 4be4d22 ("netdev-dpdk: clean up mbuf initialization")
Fixes: 31b88c9 ("netdev-dpdk: round up mbuf_size to cache_line_size")
CC: Santosh Shukla 
Signed-off-by: Mark Kavanagh 
Acked-by: Santosh Shukla 
Acked-by: Eelco Chaudron 
---
 lib/netdev-dpdk.c | 56 +--
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index ac02a09..0cd9ff6 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -88,10 +88,6 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 
20);
 #define MTU_TO_MAX_FRAME_LEN(mtu)   ((mtu) + ETHER_HDR_MAX_LEN)
 #define FRAME_LEN_TO_MTU(frame_len) ((frame_len)\
  - ETHER_HDR_LEN - ETHER_CRC_LEN)
-#define MBUF_SIZE(mtu)  ROUND_UP((MTU_TO_MAX_FRAME_LEN(mtu) \
- + sizeof(struct dp_packet) \
- + RTE_PKTMBUF_HEADROOM),   \
- RTE_CACHE_LINE_SIZE)
 #define NETDEV_DPDK_MBUF_ALIGN  1024
 #define NETDEV_DPDK_MAX_PKT_LEN 9728
 
@@ -637,7 +633,11 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu, bool 
per_port_mp)
 char mp_name[RTE_MEMPOOL_NAMESIZE];
 const char *netdev_name = netdev_get_name(>up);
 int socket_id = dev->requested_socket_id;
-uint32_t n_mbufs;
+uint32_t n_mbufs = 0;
+uint32_t mbuf_size = 0;
+uint32_t aligned_mbuf_size = 0;
+uint32_t mbuf_priv_data_len = 0;
+uint32_t pkt_size = 0;
 uint32_t hash = hash_string(netdev_name, 0);
 struct dpdk_mp *dmp = NULL;
 int ret;
@@ -650,6 +650,9 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu, bool 
per_port_mp)
 dmp->mtu = mtu;
 dmp->refcount = 1;
 
+/* Get the size of each mbuf, based on the MTU */
+mbuf_size = dpdk_buf_size(dev->requested_mtu);
+
 n_mbufs = dpdk_calculate_mbufs(dev, mtu, per_port_mp);
 
 do {
@@ -661,8 +664,8 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu, bool 
per_port_mp)
  * so this is not an issue for tasks such as debugging.
  */
 ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE,
-   "ovs%08x%02d%05d%07u",
-   hash, socket_id, mtu, n_mbufs);
+   "ovs%08x%02d%05d%07u",
+hash, socket_id, mtu, n_mbufs);
 if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
 VLOG_DBG("snprintf returned %d. "
  "Failed to generate a mempool name for \"%s\". "
@@ -671,17 +674,34 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu, bool 
per_port_mp)
 break;
 }
 
-VLOG_DBG("Port %s: Requesting a mempool of %u mbufs "
-  "on socket %d for %d Rx and %d Tx queues.",
-  netdev_name, n_mbufs, socket_id,
-  dev->requested_n_rxq, dev->requested_n_txq);
-
-dmp->mp = rte_pktmbuf_pool_create(mp_name, n_mbufs,
-  MP_CACHE_SZ,
-  sizeof (struct dp_packet)
-  - sizeof (struct rte_mbuf),
-  MBUF_SIZE(mtu)
-  - sizeof(struct dp_packet),
+VLOG_DBG("Port %s: Requesting a mempool of %u mbufs of size %u "
+  "on socket %d for %d Rx and %d Tx queues, "
+  "cache line size of %u",
+  netdev_name, n_mbufs, 

[ovs-dev] [PATCH v8 02/14] dp-packet: Init specific mbuf fields.

2018-08-20 Thread Tiago Lam
From: Mark Kavanagh 

dp_packets are created using xmalloc(); in the case of OvS-DPDK, it's
possible the the resultant mbuf portion of the dp_packet contains
random data. For some mbuf fields, specifically those related to
multi-segment mbufs and/or offload features, random values may cause
unexpected behaviour, should the dp_packet's contents be later copied
to a DPDK mbuf. It is critical therefore, that these fields should be
initialized to 0.

This patch ensures that the following mbuf fields are initialized to
appropriate values on creation of a new dp_packet:
   - ol_flags=0
   - nb_segs=1
   - tx_offload=0
   - packet_type=0
   - next=NULL

Adapted from an idea by Michael Qiu :
https://patchwork.ozlabs.org/patch/777570/

Co-authored-by: Tiago Lam 

Signed-off-by: Mark Kavanagh 
Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/dp-packet.h | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index ba91e58..b948fe1 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -625,14 +625,15 @@ dp_packet_mbuf_rss_flag_reset(struct dp_packet *p 
OVS_UNUSED)
 }
 
 /* This initialization is needed for packets that do not come
- * from DPDK interfaces, when vswitchd is built with --with-dpdk.
- * The DPDK rte library will still otherwise manage the mbuf.
- * We only need to initialize the mbuf ol_flags. */
+ * from DPDK interfaces, when vswitchd is built with --with-dpdk. */
 static inline void
 dp_packet_mbuf_init(struct dp_packet *p OVS_UNUSED)
 {
 #ifdef DPDK_NETDEV
-p->mbuf.ol_flags = 0;
+struct rte_mbuf *mbuf = &(p->mbuf);
+mbuf->ol_flags = mbuf->tx_offload = mbuf->packet_type = 0;
+mbuf->nb_segs = 1;
+mbuf->next = NULL;
 #endif
 }
 
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVS DPDK Latest & HWOL Branches

2018-08-20 Thread Ben Pfaff
On Thu, Aug 16, 2018 at 09:00:04AM -0700, Ben Pfaff wrote:
> On Wed, Aug 15, 2018 at 01:14:46PM -0700, Ben Pfaff wrote:
> > On Wed, Aug 15, 2018 at 11:17:21AM -0700, Ben Pfaff wrote:
> > > On Wed, Aug 15, 2018 at 01:49:45PM +0100, Ian Stokes wrote:
> > > > On 8/14/2018 10:19 PM, Ben Pfaff wrote:
> > > > >On Tue, Aug 14, 2018 at 04:42:06PM +, Stokes, Ian wrote:
> > > > >>Recently at the OVS DPDK community meeting the case for 2 new 
> > > > >>branches was raised.
> > > > >>
> > > > >>https://mail.openvswitch.org/pipermail/ovs-dev/2018-August/350898.html
> > > > >>
> > > > >>These branches would be:
> > > > >>
> > > > >>(i) OVS DPDK Latest: This branch would essentially be OVS master 
> > > > >>using the latest DPDK release (Including non LTS releases).
> > > > >>
> > > > >>The purpose of this branch would be to allow OVS DPDK developers to 
> > > > >>assess the latest DPDK releases with OVS and provide feedback to the 
> > > > >>DPDK community if changes are required. Currently OVS transitions 
> > > > >>between supported DPDK releases using DPDK LTS releases only. DPDK 
> > > > >>LTS releases happen annually. The next DPDK LTS release would be 
> > > > >>18.11. However the other non-lts DPDK releases (x.02, x.05, x.08) can 
> > > > >>introduce/change APIs that impact OVS DPDK (Such as the HWOL). This 
> > > > >>feedback would be in place for the next LTS release before OVS 
> > > > >>transitions to the next x.11 LTS.
> > > > >>
> > > > >>(ii) OVS DPDK HWOL: This branch would be forked from OVS DPDK Latest 
> > > > >>but would encompass the HWOL development work that is ongoing.
> > > > >>
> > > > >>The feeling as regards the need for a OVS DPDK HWOL branch is that it 
> > > > >>requires new features only available in the latest DPDK releases and 
> > > > >>that there will be a lot of code rework required as its validated 
> > > > >>with various HW devices over time before an acceptable solution will 
> > > > >>be in place.
> > > > >>
> > > > >>There was a question as regards the logistics of where the branches 
> > > > >>should reside. It was suggested that they could be part of the OVS 
> > > > >>Repo to centralize the development work but that is obviously 
> > > > >>something that would have to be raised with yourself and the other 
> > > > >>project maintainers.
> > > > >>
> > > > >>An alternative would be that it would be hosted on a developers 
> > > > >>GitHub repo similar to how the dpdk_merge branches currently work.
> > > > >>
> > > > >>Neither of the branches would be subject to releases as the end goal 
> > > > >>of the development work carried out on them would make its way into 
> > > > >>OVS Master eventually.
> > > > >
> > > > >This seems reasonable, as long as it doesn't overburden developers with
> > > > >branches.
> > > > 
> > > > I agree, and I think the messaging as regards its purpose of usage 
> > > > should be
> > > > clear, it's intended as a tool to aid development of features for 
> > > > intended
> > > > for OVS master in the future such as DPDK releases or . Developers could
> > > > ignore these branches if the features they develop are already 
> > > > available in
> > > > the existing supported DPDK.
> > > > 
> > > > >
> > > > >How do you prefer to host it?  Any of the following would be fine with
> > > > >me, in descending order of preference:
> > > > >
> > > > >* Host in openvswitch github repo forked from main openvswitch/ovs 
> > > > >repo.
> > > > 
> > > > I think the option above would be good, better than being provisioned 
> > > > on a
> > > > developers own github repo. At least when documenting its purpose it 
> > > > would
> > > > look a little more official rather than pointing to a personal github 
> > > > repo.
> > > > 
> > > > In terms of commit access I take it the default ovs committers would 
> > > > have
> > > > access here as well (I take it they would be the people to create the 
> > > > repo).
> > > > In terms of merging would this follow the pull_request process to 
> > > > yourself?
> > > 
> > > I'd think it would make sense to give you the ability to push directly
> > > to this new repo.  It's only merging into release branches that the OVS
> > > contribution policy is supposed to gate.
> > 
> > I talked to Justin about this.  He pointed out that if we create a repo
> > with "dpdk" in the name, it's likely to confuse people who are casually
> > looking for OVS with DPDK support.  It still makes sense to do this in
> > some kind of "official" place, though, so now I'm willing to support the
> > idea of just putting the branches into the main OVS repo.  Let me check
> > with the committers list, first.
> 
> So far I've received two positive responses and no negatives to this
> idea from the committers.  I'll give it a while longer but I think we'll
> probably just have you do it in the main repo.

I didn't receive any more feedback, so I created a "dpdk" team, invited
you to it, and added read/write permission for the main openvswitch
repository.  

Re: [ovs-dev] [PATCH] netdev-linux: Avoid division by 0 if kernel reports bad scheduler data.

2018-08-20 Thread Ben Pfaff
Thanks, applied to master and backported.

On Mon, Aug 20, 2018 at 09:15:54AM -0700, Yifeng Sun wrote:
> Looks good to me, thanks.
> 
> Reviewed-by: Yifeng Sun 
> 
> On Sat, Aug 18, 2018 at 10:17 AM, Ben Pfaff  wrote:
> 
> > If the kernel reported a value of 0 for the second value in
> > /proc/net/psched, it would cause a division-by-zero fault in
> > read_psched().  I don't know of a kernel that would actually do that, but
> > it's still better to be safe.
> >
> > Found by clang static analyzer
> >
> > Reported-by: Bhargava Shastry 
> > Signed-off-by: Ben Pfaff 
> > ---
> >  lib/netdev-linux.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> > index 0c42268d9d6c..e16ea58a085e 100644
> > --- a/lib/netdev-linux.c
> > +++ b/lib/netdev-linux.c
> > @@ -5166,7 +5166,7 @@ read_psched(void)
> >  VLOG_DBG("%s: psched parameters are: %u %u %u %u", fn, a, b, c, d);
> >  fclose(stream);
> >
> > -if (!a || !c) {
> > +if (!a || !b || !c) {
> >  VLOG_WARN("%s: invalid scheduler parameters", fn);
> >  goto exit;
> >  }
> > --
> > 2.16.1
> >
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] Fix socket permissions on Linux

2018-08-20 Thread Ben Pfaff
On Mon, Aug 20, 2018 at 11:08:36AM -0400, Aaron Conole wrote:
> Terry Wilson  writes:
> 
> >> Gather 'round folks, and let me tell you the tale of a series long
> >> ago posted:
> >>
> >> https://mail.openvswitch.org/pipermail/ovs-dev/2016-August/321866.html
> >>
> >> Something... something ... black magic...
> >> I think the fchmod needs to happen after the bind for the permissions
> >> to actually be changed.  That's how the unit tests in that series are
> >> coded.
> >
> > If you move fchmod after the bind, you get 0755 and not 0750 on my
> > system. It seems weird fchmod interacting with umask since it seems
> > like the whole purpose of chmod/fchmod are to change permissions
> > ignoring umask. You certainly aren't going to get the same behavior
> > with fchmod on a regular file anyway. It seemed like the patch that
> > specifically changed 0700 -> 0770 intended group permissions to always
> > be there. I can see arguments both ways, though. The problem with
> > modifying umask before calling is that you are changing the
> > permissions for every created file (log files, etc.). Maybe that isn't
> > a problem, I just know that it was something that came up in our
> > internal discussions hacking around things with umask.
> 
> Strange.  I've tested on regular files, and find that umask is *correctly*
> ignored.  The test program I'm using:
> 
> --- 8< ---
> #include 
> #include 
> #include 
> #include 
> 
> int main(int argc, char *argv[])
> {
> /* Change the mode */
> if (argc < 2) {
> printf("pass a filename\n");
> return -1;
> }
> 
> int fd = open(argv[1], O_RDWR);
> if (fd < 0) {
> perror("open");
> return -1;
> }
> 
> fchmod(fd, 0770);
> close(fd);
> return 0;
> }
> --- >8 ---
> 
> output
> ---
> 
> 10:42:20 aconole /tmp$ umask
> 0022
> 10:42:22 aconole /tmp$ touch tmp.foo
> 10:42:25 aconole /tmp$ ls -lah | grep tmp.foo
> -rw-r--r--.  1 aconole aconole0 Aug 20 10:42 tmp.foo
> 10:42:29 aconole /tmp$ ./test tmp.foo
> 10:42:36 aconole /tmp$ ls -lah | grep tmp.foo
> -rwxrwx---.  1 aconole aconole0 Aug 20 10:42 tmp.foo
> 
> ---
> 
> of course, /tmp has some other sticky bits set, but I tried in my own
> home directory, with the same results.
> 
> Then, I played a bit with unix sockets.  What I found was interesting.
> 
> If I use fchmod on the actual socket, it seems not possible to adjust
> the permissions after creation time.  That means post bind, the
> permissions are locked for fd, at least as far as fchmod goes.
> 
> HOWEVER, if I independently call 'chmod(path, mode)' the mode bits are
> changed without factoring the umask (as I expect).  There might need to
> be some additional work to get this to do the right thing under linux,
> but I think it's possible without going through the clone path.  Looking
> at the old patches I had, I did a traversal and eventually fchmodat(),
> which probably does the right thing (but I haven't dusted those patches
> off to actually try them out).
> 
> Ben, Terry, how would you feel if I did some minor necromancy on that
> code?  Any objections?

If you can do something that avoids fork and path-traversal issues, that
would be great.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] netdev-linux: Avoid division by 0 if kernel reports bad scheduler data.

2018-08-20 Thread Yifeng Sun
Looks good to me, thanks.

Reviewed-by: Yifeng Sun 

On Sat, Aug 18, 2018 at 10:17 AM, Ben Pfaff  wrote:

> If the kernel reported a value of 0 for the second value in
> /proc/net/psched, it would cause a division-by-zero fault in
> read_psched().  I don't know of a kernel that would actually do that, but
> it's still better to be safe.
>
> Found by clang static analyzer
>
> Reported-by: Bhargava Shastry 
> Signed-off-by: Ben Pfaff 
> ---
>  lib/netdev-linux.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 0c42268d9d6c..e16ea58a085e 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -5166,7 +5166,7 @@ read_psched(void)
>  VLOG_DBG("%s: psched parameters are: %u %u %u %u", fn, a, b, c, d);
>  fclose(stream);
>
> -if (!a || !c) {
> +if (!a || !b || !c) {
>  VLOG_WARN("%s: invalid scheduler parameters", fn);
>  goto exit;
>  }
> --
> 2.16.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4 1/2] ovs python: ovs.stream.open_block() returns success even if the remote is unreachable

2018-08-20 Thread Numan Siddique
On Mon, Aug 20, 2018 at 7:43 PM Simon Horman 
wrote:

>
>
> On 19 August 2018 at 00:12, Ben Pfaff  wrote:
>
>> On Sat, Aug 18, 2018 at 09:40:05PM +0200, Simon Horman wrote:
>> > On Tue, Aug 07, 2018 at 05:07:58PM +0530, nusid...@redhat.com wrote:
>> > > From: Numan Siddique 
>> > >
>> > > The python function ovs.socket_util.check_connection_completion()
>> uses select()
>> > > (provided by python) to monitor the socket file descriptor. The
>> select()
>> > > returns 1 when the file descriptor becomes ready. For error cases
>> like -
>> > > 111 (Connection refused) and 113 (No route to host) (POLLERR),
>> ovs.poller._SelectSelect.poll()
>> > > expects the exceptfds list to be set by select(). But that is not the
>> case.
>> > > As per the select() man page, writefds list will be set for POLLERR.
>> > > Please see "Correspondence between select() and poll() notifications"
>> section of select(2)
>> > > man page.
>> > >
>> > > Because of this behavior,
>> ovs.socket_util.check_connection_completion() returns success
>> > > even if the remote is unreachable or not listening on the port.
>> > >
>> > > This patch fixes this issue by using poll() to check the connection
>> status similar to
>> > > the C implementation of check_connection_completion().
>> > >
>> > > A new function 'get_system_poll() is added in ovs/poller.py which
>> returns the
>> > > select.poll() object. If select.poll is monkey patched by
>> eventlet/gevent, it
>> > > gets the original select.poll() and returns it.
>> > >
>> > > The test cases added in this patch fails without the fix.
>> > >
>> > > Suggested-by: Ben Pfaff 
>> > > Signed-off-by: Numan Siddique 
>> >
>> > I believe that this patch is the cause of the testsuite failure
>> currently
>> > exhibited in the master branch when tested using travis-ci.
>> >
>> > https://travis-ci.org/openvswitch/ovs/jobs/417506303
>> >
>> > ...
>> >
>> > > diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at
>> > > index 014382850..e8a26e971 100644
>> > > --- a/tests/ovsdb-idl.at
>> > > +++ b/tests/ovsdb-idl.at
>> > > @@ -1720,3 +1720,19 @@ OVSDB_CHECK_IDL_COMPOUND_INDEX_WITH_REF([set,
>> simple3 idl-compound-index-with-re
>> > >  007: check simple4: empty
>> > >  008: End test
>> > >  ]])
>> > > +
>> > > +m4_define([CHECK_STREAM_OPEN_BLOCK_PY],
>> > > +  [AT_SETUP([$1])
>> > > +   AT_SKIP_IF([test $2 = no])
>> > > +   AT_KEYWORDS([Check PY Stream open block - $3])
>> > > +   AT_CHECK([ovsdb_start_idltest "ptcp:0:127.0.0.1"])
>> > > +   PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT])
>> > > +   WRONG_PORT=$(($TCP_PORT+1))
>> > > +   AT_CHECK([$3 $srcdir/test-stream.py tcp:127.0.0.1:$TCP_PORT],
>> [0], [ignore])
>> > > +   AT_CHECK([$3 $srcdir/test-stream.py tcp:127.0.0.1:$WRONG_PORT],
>> [1], [ignore])
>> > > +   OVSDB_SERVER_SHUTDOWN
>> > > +   AT_CHECK([$3 $srcdir/test-stream.py tcp:127.0.0.1:$TCP_PORT],
>> [1], [ignore])
>> > > +   AT_CLEANUP])
>> > > +
>> > > +CHECK_STREAM_OPEN_BLOCK_PY([Check PY2 Stream open block],
>> [$HAVE_PYTHON], [$PYTHON])
>> > > +CHECK_STREAM_OPEN_BLOCK_PY([Check PY3 Stream open block],
>> [$HAVE_PYTHON], [$PYTHON3])
>> >
>> > I wonder if the problem exhibited by travis-ci would be resolved
>> > by changing $HAVE_PYTHON to $HAVE_PYTHON3 on the "PY3 Stream" line.
>> >
>> > Also, I wonder if s/PYTHON/PYTHON2/ is appropriate on the "PY2 Stream"
>> line.
>>
>> You're right, thanks.  I sent a fix:
>> https://patchwork.ozlabs.org/patch/959273/
>
>
>
Oops. Thanks Ben for the fix.



> Thanks Ben, much appreciated.
>
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] Fix socket permissions on Linux

2018-08-20 Thread Aaron Conole
Terry Wilson  writes:

>> Gather 'round folks, and let me tell you the tale of a series long
>> ago posted:
>>
>> https://mail.openvswitch.org/pipermail/ovs-dev/2016-August/321866.html
>>
>> Something... something ... black magic...
>> I think the fchmod needs to happen after the bind for the permissions
>> to actually be changed.  That's how the unit tests in that series are
>> coded.
>
> If you move fchmod after the bind, you get 0755 and not 0750 on my
> system. It seems weird fchmod interacting with umask since it seems
> like the whole purpose of chmod/fchmod are to change permissions
> ignoring umask. You certainly aren't going to get the same behavior
> with fchmod on a regular file anyway. It seemed like the patch that
> specifically changed 0700 -> 0770 intended group permissions to always
> be there. I can see arguments both ways, though. The problem with
> modifying umask before calling is that you are changing the
> permissions for every created file (log files, etc.). Maybe that isn't
> a problem, I just know that it was something that came up in our
> internal discussions hacking around things with umask.

Strange.  I've tested on regular files, and find that umask is *correctly*
ignored.  The test program I'm using:

--- 8< ---
#include 
#include 
#include 
#include 

int main(int argc, char *argv[])
{
/* Change the mode */
if (argc < 2) {
printf("pass a filename\n");
return -1;
}

int fd = open(argv[1], O_RDWR);
if (fd < 0) {
perror("open");
return -1;
}

fchmod(fd, 0770);
close(fd);
return 0;
}
--- >8 ---

output
---

10:42:20 aconole /tmp$ umask
0022
10:42:22 aconole /tmp$ touch tmp.foo
10:42:25 aconole /tmp$ ls -lah | grep tmp.foo
-rw-r--r--.  1 aconole aconole0 Aug 20 10:42 tmp.foo
10:42:29 aconole /tmp$ ./test tmp.foo
10:42:36 aconole /tmp$ ls -lah | grep tmp.foo
-rwxrwx---.  1 aconole aconole0 Aug 20 10:42 tmp.foo

---

of course, /tmp has some other sticky bits set, but I tried in my own
home directory, with the same results.

Then, I played a bit with unix sockets.  What I found was interesting.

If I use fchmod on the actual socket, it seems not possible to adjust
the permissions after creation time.  That means post bind, the
permissions are locked for fd, at least as far as fchmod goes.

HOWEVER, if I independently call 'chmod(path, mode)' the mode bits are
changed without factoring the umask (as I expect).  There might need to
be some additional work to get this to do the right thing under linux,
but I think it's possible without going through the clone path.  Looking
at the old patches I had, I did a traversal and eventually fchmodat(),
which probably does the right thing (but I haven't dusted those patches
off to actually try them out).

Ben, Terry, how would you feel if I did some minor necromancy on that
code?  Any objections?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4 1/2] ovs python: ovs.stream.open_block() returns success even if the remote is unreachable

2018-08-20 Thread Simon Horman
On 19 August 2018 at 00:12, Ben Pfaff  wrote:

> On Sat, Aug 18, 2018 at 09:40:05PM +0200, Simon Horman wrote:
> > On Tue, Aug 07, 2018 at 05:07:58PM +0530, nusid...@redhat.com wrote:
> > > From: Numan Siddique 
> > >
> > > The python function ovs.socket_util.check_connection_completion()
> uses select()
> > > (provided by python) to monitor the socket file descriptor. The
> select()
> > > returns 1 when the file descriptor becomes ready. For error cases like
> -
> > > 111 (Connection refused) and 113 (No route to host) (POLLERR),
> ovs.poller._SelectSelect.poll()
> > > expects the exceptfds list to be set by select(). But that is not the
> case.
> > > As per the select() man page, writefds list will be set for POLLERR.
> > > Please see "Correspondence between select() and poll() notifications"
> section of select(2)
> > > man page.
> > >
> > > Because of this behavior, ovs.socket_util.check_connection_completion()
> returns success
> > > even if the remote is unreachable or not listening on the port.
> > >
> > > This patch fixes this issue by using poll() to check the connection
> status similar to
> > > the C implementation of check_connection_completion().
> > >
> > > A new function 'get_system_poll() is added in ovs/poller.py which
> returns the
> > > select.poll() object. If select.poll is monkey patched by
> eventlet/gevent, it
> > > gets the original select.poll() and returns it.
> > >
> > > The test cases added in this patch fails without the fix.
> > >
> > > Suggested-by: Ben Pfaff 
> > > Signed-off-by: Numan Siddique 
> >
> > I believe that this patch is the cause of the testsuite failure currently
> > exhibited in the master branch when tested using travis-ci.
> >
> > https://travis-ci.org/openvswitch/ovs/jobs/417506303
> >
> > ...
> >
> > > diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at
> > > index 014382850..e8a26e971 100644
> > > --- a/tests/ovsdb-idl.at
> > > +++ b/tests/ovsdb-idl.at
> > > @@ -1720,3 +1720,19 @@ OVSDB_CHECK_IDL_COMPOUND_INDEX_WITH_REF([set,
> simple3 idl-compound-index-with-re
> > >  007: check simple4: empty
> > >  008: End test
> > >  ]])
> > > +
> > > +m4_define([CHECK_STREAM_OPEN_BLOCK_PY],
> > > +  [AT_SETUP([$1])
> > > +   AT_SKIP_IF([test $2 = no])
> > > +   AT_KEYWORDS([Check PY Stream open block - $3])
> > > +   AT_CHECK([ovsdb_start_idltest "ptcp:0:127.0.0.1"])
> > > +   PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT])
> > > +   WRONG_PORT=$(($TCP_PORT+1))
> > > +   AT_CHECK([$3 $srcdir/test-stream.py tcp:127.0.0.1:$TCP_PORT],
> [0], [ignore])
> > > +   AT_CHECK([$3 $srcdir/test-stream.py tcp:127.0.0.1:$WRONG_PORT],
> [1], [ignore])
> > > +   OVSDB_SERVER_SHUTDOWN
> > > +   AT_CHECK([$3 $srcdir/test-stream.py tcp:127.0.0.1:$TCP_PORT],
> [1], [ignore])
> > > +   AT_CLEANUP])
> > > +
> > > +CHECK_STREAM_OPEN_BLOCK_PY([Check PY2 Stream open block],
> [$HAVE_PYTHON], [$PYTHON])
> > > +CHECK_STREAM_OPEN_BLOCK_PY([Check PY3 Stream open block],
> [$HAVE_PYTHON], [$PYTHON3])
> >
> > I wonder if the problem exhibited by travis-ci would be resolved
> > by changing $HAVE_PYTHON to $HAVE_PYTHON3 on the "PY3 Stream" line.
> >
> > Also, I wonder if s/PYTHON/PYTHON2/ is appropriate on the "PY2 Stream"
> line.
>
> You're right, thanks.  I sent a fix:
> https://patchwork.ozlabs.org/patch/959273/


Thanks Ben, much appreciated.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [ovs-discuss] fix the mod_vlan_vid actions with OpenFlow13.

2018-08-20 Thread Eric Garver
On Mon, Aug 20, 2018 at 02:17:34AM +, wangyunjian wrote:
> 
> 
[..]
> > > On Fri, Aug 17, 2018 at 12:15:30PM +, wangyunjian wrote:
> > > > The datapath flow which pushs double vlan was found using ovs-appctl
> > > > dpctl/dump-flows, but the flow was set mod_vlan_vid actions.
> > > > This problem is discovered from "Add support for 802.1ad (QinQ
> > tunneling)".
> > >
> > > Thanks for reporting. Can you say what version of OVS you're using?
> > > Including any extra patches you may have applied.
> > 
> > The version of OVS is master branch(git log "
> > be5e6d6822e60b5b84ac65dcd1b249145356a809
> > ofp-ed-props: Fix hang for crafted OpenFlow encap/decap properties".).
> > 
> > >
> > > > My test steps:
> > > >
> > > > 1) ovs-vsctl add-br ovsbr0 -- set bridge ovsbr0 datapath_type=netdev
> > > >ovs-vsctl add-port ovsbr0 eth2 -- set Interface eth2 type=dpdk
> > > options:dpdk-devargs=:03:00.0
> > > >ovs-ofctl  -O OpenFlow13 add-flow ovsbr0 "
> > > > table=0,priority=2,in_port=1
> > > actions=mod_vlan_vid:3,NORMAL"

What happens if you add a wildcard VLAN match here?
e.g. vlan_tci=0x1000/0x1000
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Organización y registro de información

2018-08-20 Thread Tecnologías de la información




  






 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

---
Este correo electrónico ha sido comprobado en busca de virus por AVG.
http://www.avg.com
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Gold Bar and Diamonds Offer

2018-08-20 Thread Morgan Thomas
Dear ,

We have 200 kg X 22 karat gold bar and Uncut rough diamond 2,200 Cts
of none bankable condition for sale to any interested buyer.

Confirm your interest We will offer you 3% commission if you are not a
buyer and connecting us to buyers.Before reply we only want to make
sure that you really want to buy not just sending emails without any
results.Confirm your interest.

Let me have your secured number/mobile number in your reply to open up
verbal communication with you.

Hoping to hear from you soon.
Regards,
Morgan Thomas
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev