Re: [ovs-dev] [PATCH 1/2] ovn-controller: support MAC_Binding aging

2018-03-20 Thread Mark Michelson
I think if we're adding this, it needs to be able to work with IPv6 as 
well. The main thing that you would need to do here is to send IPv6 
neighbor solicitations instead of ARP requests for IPv6 addresses.


On 03/15/2018 04:20 AM, Guoshuai Li wrote:

Add the MAC_Binding aging. The default aging time is 20 minutes.
Send the ARP request at 10(1*20/2), 13(2*20/3), 15(3*20/4) minutes.
If no ARP reply is received within 20 minutes, the MAC_Binding column will be
deleted.

Sync a MAC_Binding cache in the ovn-controller where lport redirect chassis,
to records timestamps and ARP send times. Time traversal cache to send ARP
requests or aging.

Signed-off-by: Guoshuai Li 
---
  ovn/controller/pinctrl.c | 363 +++
  1 file changed, 363 insertions(+)

diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index b4dbd8c29..b258a7f29 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -70,6 +70,18 @@ static void run_put_mac_bindings(struct controller_ctx *);
  static void wait_put_mac_bindings(struct controller_ctx *);
  static void flush_put_mac_bindings(void);
  
+static void init_aging_mac_bindings(void);

+static void destroy_aging_mac_bindings(void);
+static void aging_mac_bindings_wait(void);
+static void aging_mac_bindings_run(const struct controller_ctx *,
+   const struct ovsrec_bridge *,
+   const struct sbrec_chassis *,
+   const struct hmap *);
+static void init_aging_mac_binding(uint32_t,
+   uint32_t,
+   const char *,
+   uint32_t);
+
  static void init_send_garps(void);
  static void destroy_send_garps(void);
  static void send_garp_wait(void);
@@ -105,6 +117,7 @@ pinctrl_init(void)
  swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION);
  conn_seq_no = 0;
  init_put_mac_bindings();
+init_aging_mac_bindings();
  init_send_garps();
  init_ipv6_ras();
  }
@@ -1166,6 +1179,7 @@ pinctrl_run(struct controller_ctx *ctx,
  send_garp_run(ctx, br_int, chassis, chassis_index, local_datapaths,
active_tunnels);
  send_ipv6_ras(ctx, local_datapaths);
+aging_mac_bindings_run(ctx, br_int, chassis, local_datapaths);
  }
  
  /* Table of ipv6_ra_state structures, keyed on logical port name */

@@ -1467,6 +1481,7 @@ pinctrl_wait(struct controller_ctx *ctx)
  rconn_recv_wait(swconn);
  send_garp_wait();
  ipv6_ra_wait();
+aging_mac_bindings_wait();
  }
  
  void

@@ -1474,6 +1489,7 @@ pinctrl_destroy(void)
  {
  rconn_destroy(swconn);
  destroy_put_mac_bindings();
+destroy_aging_mac_bindings();
  destroy_send_garps();
  destroy_ipv6_ras();
  }
@@ -1567,6 +1583,9 @@ pinctrl_handle_put_mac_binding(const struct flow *md,
  }
  pmb->timestamp = time_msec();
  pmb->mac = headers->dl_src;
+
+/* init aging mac_binding timestamp and arp_send_count */
+init_aging_mac_binding(dp_key, port_key, ip_s, hash);
  }
  
  static void

@@ -1647,6 +1666,350 @@ flush_put_mac_bindings(void)
  }
  }
  
+/* Buffered "aging_mac_binding" operation. */
+struct aging_mac_binding {
+struct hmap_node hmap_node; /* In 'aging_mac_binding'. */
+
+long long int timestamp;/* In milliseconds. */
+
+/* Key. */
+uint32_t dp_key;
+uint32_t port_key;
+char ip_s[INET6_ADDRSTRLEN + 1];


The +1 is unnecessary. INET6_ADDRSTRLEN already has the requisite space 
for the null byte built in.



+
+int arp_send_count;
+};
+
+/* Contains "struct aging_mac_binding"s.
+   The cache for mac_bindings */
+static struct hmap aging_mac_bindings;
+
+/* Next aging mac binding time announcement in ms. */
+static long long int next_wait_time;
+
+static long long int base_reachable_time = 20 * 60 * 1000;
+
+static void
+init_aging_mac_bindings(void)
+{
+hmap_init(_mac_bindings);
+next_wait_time = LLONG_MAX;
+}
+
+static void
+flush_aging_mac_bindings(void)
+{
+struct aging_mac_binding *amb;
+HMAP_FOR_EACH_POP (amb, hmap_node, _mac_bindings) {
+free(amb);
+}
+}
+
+static struct aging_mac_binding *
+find_aging_mac_binding(uint32_t dp_key,
+   uint32_t port_key,
+   const char *ip_s,
+   uint32_t hash)
+{
+struct aging_mac_binding *amb;
+HMAP_FOR_EACH_WITH_HASH (amb, hmap_node, hash, _mac_bindings) {
+if (amb->dp_key == dp_key && amb->port_key == port_key
+&& !strcmp(amb->ip_s, ip_s)) {
+return amb;
+}
+}
+return NULL;
+}
+
+static void
+insert_aging_mac_bindings(int64_t dp_key, int64_t port_key, const char *ip_s)
+{
+uint32_t hash = hash_string(ip_s, hash_2words(dp_key, port_key));
+struct aging_mac_binding *amb
+= find_aging_mac_binding(dp_key, port_key, ip_s, hash);
+if (!amb) {
+

[ovs-dev] [PATCH 2/4] selinux: create a transition type for module loading

2018-03-20 Thread Aaron Conole
Defines a type 'openvswitch_load_module_t' used exclusively for loading
modules.  This means that the 'openvswitch_t' domain won't require
access to the module loading facility - such access can only happen
after transitioning through the 'openvswitch_load_module_exec_t'
transition context.

A future commit will label the appropriate script with extended attributes
to make use of this new domain.

Signed-off-by: Aaron Conole 
---
 selinux/openvswitch-custom.te.in | 79 +---
 1 file changed, 74 insertions(+), 5 deletions(-)

diff --git a/selinux/openvswitch-custom.te.in b/selinux/openvswitch-custom.te.in
index db3cf6d8d..31e8fab15 100644
--- a/selinux/openvswitch-custom.te.in
+++ b/selinux/openvswitch-custom.te.in
@@ -1,13 +1,31 @@
 module openvswitch-custom 1.0.1;
 
 require {
+role system_r;
+role object_r;
+
 type openvswitch_t;
 type openvswitch_rw_t;
 type openvswitch_tmp_t;
 type openvswitch_var_run_t;
 
+type bin_t;
 type ifconfig_exec_t;
+type init_t;
+type init_var_run_t;
+type insmod_exec_t;
 type hostname_exec_t;
+type modules_conf_t;
+type modules_object_t;
+type passwd_file_t;
+type plymouth_exec_t;
+type proc_t;
+type shell_exec_t;
+type sssd_t;
+type sssd_public_t;
+type sssd_var_lib_t;
+type sysfs_t;
+type systemd_unit_file_t;
 type tun_tap_device_t;
 
 @begin_dpdk@
@@ -21,18 +39,36 @@ require {
 
 class capability { dac_override audit_write };
 class chr_file { write getattr read open ioctl };
-class dir { write remove_name add_name lock read };
-class file { write getattr read open execute execute_no_trans create 
unlink };
+class dir { write remove_name add_name lock read getattr search open };
+class fd { use };
+class file { write getattr read open execute execute_no_trans create 
unlink map entrypoint lock ioctl };
+class fifo_file { getattr read write append ioctl lock open };
+class filesystem getattr;
+class lnk_file { read open };
 class netlink_audit_socket { create nlmsg_relay audit_write read write 
};
 class netlink_socket { setopt getopt create connect getattr write read 
};
-class unix_stream_socket { write getattr read connectto connect setopt 
getopt sendto accept bind recvfrom acceptfrom };
+class sock_file { write };
+class system module_load;
+class process { sigchld signull transition noatsecure siginh rlimitinh 
};
+class unix_stream_socket { write getattr read connectto connect setopt 
getopt sendto accept bind recvfrom acceptfrom ioctl };
 
 @begin_dpdk@
-class sock_file { read write append getattr open };
+class sock_file { read append getattr open };
 class tun_socket { relabelfrom relabelto create };
 @end_dpdk@
 }
 
+#= Set up the transition domain =
+type openvswitch_load_module_exec_t;
+type openvswitch_load_module_t;
+
+domain_type(openvswitch_load_module_exec_t);
+domain_type(openvswitch_load_module_t);
+role object_r types openvswitch_load_module_exec_t;
+role system_r types openvswitch_load_module_t;
+domain_entry_file(openvswitch_load_module_t, openvswitch_load_module_exec_t);
+domtrans_pattern(openvswitch_t, openvswitch_load_module_exec_t, 
openvswitch_load_module_t);
+
 #= openvswitch_t ==
 allow openvswitch_t self:capability { dac_override audit_write };
 allow openvswitch_t self:netlink_audit_socket { create nlmsg_relay audit_write 
read write };
@@ -41,10 +77,11 @@ allow openvswitch_t self:netlink_socket { setopt getopt 
create connect getattr w
 allow openvswitch_t hostname_exec_t:file { read getattr open execute 
execute_no_trans };
 allow openvswitch_t ifconfig_exec_t:file { read getattr open execute 
execute_no_trans };
 
-allow openvswitch_t openvswitch_rw_t:dir { write remove_name add_name lock 
read };
+allow openvswitch_t openvswitch_rw_t:dir { write remove_name add_name lock 
read getattr open search };
 allow openvswitch_t openvswitch_rw_t:file { write getattr read open execute 
execute_no_trans create unlink };
 allow openvswitch_t openvswitch_tmp_t:file { execute execute_no_trans };
 allow openvswitch_t openvswitch_tmp_t:unix_stream_socket { write getattr read 
connectto connect setopt getopt sendto accept bind recvfrom acceptfrom };
+allow openvswitch_t openvswitch_var_run_t:dir { getattr read open search };
 allow openvswitch_t tun_tap_device_t:chr_file { read write getattr open ioctl 
};
 
 @begin_dpdk@
@@ -58,3 +95,35 @@ allow openvswitch_t svirt_tmpfs_t:sock_file { read write 
append getattr open };
 allow openvswitch_t svirt_t:unix_stream_socket { connectto read write getattr 
sendto recvfrom setopt };
 allow openvswitch_t vfio_device_t:chr_file { read write open ioctl getattr };
 

[ovs-dev] [PATCH 1/4] ovs-kmod-ctl: introduce a kernel module load script

2018-03-20 Thread Aaron Conole
Currently, Open vSwitch on linux embeds the logic of loading and unloading
kernel modules into the ovs-ctl and ovs-lib script files.  This works, but
it means that there is no way to leverage extended filesystem attributes
to grant fine grain permissions relating to module loading.

The split out utility 'ovs-kmod-ctl' will be used in an upcoming commit
for RHEL-based distributions to have a separate transition domain that
will allow module loading to be given to a separate selinux domain from
the openvswitch_t domain.

Signed-off-by: Aaron Conole 
---
 debian/openvswitch-switch.install  |   1 +
 debian/openvswitch-switch.manpages |   1 +
 rhel/openvswitch-fedora.spec.in|   2 +
 rhel/openvswitch.spec.in   |   2 +
 utilities/.gitignore   |   1 +
 utilities/automake.mk  |   5 +
 utilities/ovs-ctl.in   |  32 +-
 utilities/ovs-kmod-ctl.8   | 103 +
 utilities/ovs-kmod-ctl.in  | 228 +
 utilities/ovs-lib.in   |  12 +-
 10 files changed, 350 insertions(+), 37 deletions(-)
 create mode 100644 utilities/ovs-kmod-ctl.8
 create mode 100644 utilities/ovs-kmod-ctl.in

diff --git a/debian/openvswitch-switch.install 
b/debian/openvswitch-switch.install
index bfb391fe8..6a6e9a543 100644
--- a/debian/openvswitch-switch.install
+++ b/debian/openvswitch-switch.install
@@ -12,5 +12,6 @@ usr/sbin/ovs-vswitchd
 usr/sbin/ovsdb-server
 usr/share/openvswitch/scripts/ovs-check-dead-ifs
 usr/share/openvswitch/scripts/ovs-ctl
+usr/share/openvswitch/scripts/ovs-kmod-ctl
 usr/share/openvswitch/scripts/ovs-save
 usr/share/openvswitch/vswitch.ovsschema
diff --git a/debian/openvswitch-switch.manpages 
b/debian/openvswitch-switch.manpages
index a2f661a3e..47a1ba174 100644
--- a/debian/openvswitch-switch.manpages
+++ b/debian/openvswitch-switch.manpages
@@ -2,6 +2,7 @@ ovsdb/ovsdb-server.1
 utilities/ovs-ctl.8
 utilities/ovs-dpctl-top.8
 utilities/ovs-dpctl.8
+utilities/ovs-kmod-ctl.8
 utilities/ovs-pcap.1
 utilities/ovs-tcpdump.8
 utilities/ovs-tcpundump.1
diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in
index 8a804942b..8fbc985ce 100644
--- a/rhel/openvswitch-fedora.spec.in
+++ b/rhel/openvswitch-fedora.spec.in
@@ -542,6 +542,7 @@ fi
 %{_datadir}/openvswitch/scripts/ovs-save
 %{_datadir}/openvswitch/scripts/ovs-vtep
 %{_datadir}/openvswitch/scripts/ovs-ctl
+%{_datadir}/openvswitch/scripts/ovs-kmod-ctl
 %{_datadir}/openvswitch/scripts/ovs-systemd-reload
 %config %{_datadir}/openvswitch/vswitch.ovsschema
 %config %{_datadir}/openvswitch/vtep.ovsschema
@@ -574,6 +575,7 @@ fi
 %{_mandir}/man8/ovs-ctl.8*
 %{_mandir}/man8/ovs-dpctl.8*
 %{_mandir}/man8/ovs-dpctl-top.8*
+%{_mandir}/man8/ovs-kmod-ctl.8*
 %{_mandir}/man8/ovs-ofctl.8*
 %{_mandir}/man8/ovs-pki.8*
 %{_mandir}/man8/ovs-vsctl.8*
diff --git a/rhel/openvswitch.spec.in b/rhel/openvswitch.spec.in
index 876990698..71d5afbdb 100644
--- a/rhel/openvswitch.spec.in
+++ b/rhel/openvswitch.spec.in
@@ -236,6 +236,7 @@ exit 0
 /usr/share/man/man8/ovs-ctl.8.gz
 /usr/share/man/man8/ovs-dpctl.8.gz
 /usr/share/man/man8/ovs-dpctl-top.8.gz
+/usr/share/man/man8/ovs-kmod-ctl.8.gz
 /usr/share/man/man8/ovs-ofctl.8.gz
 /usr/share/man/man8/ovs-parse-backtrace.8.gz
 /usr/share/man/man8/ovs-pki.8.gz
@@ -249,6 +250,7 @@ exit 0
 /usr/share/openvswitch/scripts/ovs-bugtool-*
 /usr/share/openvswitch/scripts/ovs-check-dead-ifs
 /usr/share/openvswitch/scripts/ovs-ctl
+/usr/share/openvswitch/scripts/ovs-kmod-ctl
 /usr/share/openvswitch/scripts/ovs-lib
 /usr/share/openvswitch/scripts/ovs-save
 /usr/share/openvswitch/scripts/ovs-vtep
diff --git a/utilities/.gitignore b/utilities/.gitignore
index 34c58f20f..eb2a69bf3 100644
--- a/utilities/.gitignore
+++ b/utilities/.gitignore
@@ -13,6 +13,7 @@
 /ovs-dpctl.8
 /ovs-dpctl-top
 /ovs-dpctl-top.8
+/ovs-kmod-ctl
 /ovs-l3ping
 /ovs-l3ping.8
 /ovs-lib
diff --git a/utilities/automake.mk b/utilities/automake.mk
index 60cf1c5ed..d8f2374a3 100644
--- a/utilities/automake.mk
+++ b/utilities/automake.mk
@@ -20,6 +20,7 @@ endif
 scripts_SCRIPTS += \
utilities/ovs-check-dead-ifs \
utilities/ovs-ctl \
+   utilities/ovs-kmod-ctl \
utilities/ovs-save
 scripts_DATA += utilities/ovs-lib
 
@@ -44,6 +45,7 @@ EXTRA_DIST += \
utilities/ovs-dev.py \
utilities/ovs-docker \
utilities/ovs-dpctl-top.in \
+   utilities/ovs-kmod-ctl.in \
utilities/ovs-l3ping.in \
utilities/ovs-lib.in \
utilities/ovs-parse-backtrace.in \
@@ -63,6 +65,7 @@ MAN_ROOTS += \
utilities/ovs-ctl.8 \
utilities/ovs-dpctl.8.in \
utilities/ovs-dpctl-top.8.in \
+   utilities/ovs-kmod-ctl.8 \
utilities/ovs-l3ping.8.in \
utilities/ovs-ofctl.8.in \
utilities/ovs-parse-backtrace.8 \
@@ -81,6 +84,7 @@ CLEANFILES += \
utilities/ovs-dpctl.8 \
utilities/ovs-dpctl-top \
utilities/ovs-dpctl-top.8 \
+   

[ovs-dev] [PATCH 4/4] rhel: selinux-policy to invoke proper label macros

2018-03-20 Thread Aaron Conole
The rpm doesn't invoke all of the required selinux helpers to enact labeling
or relabeling on all versions of Fedora/RHEL.  According to:
  https://fedoraproject.org/wiki/SELinux/IndependentPolicy

This commit switches to use the selinux rpm macros which will ensure that
all of the labels defined in the .fc.in file are applied properly.

Signed-off-by: Aaron Conole 
---
 rhel/openvswitch-fedora.spec.in | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in
index 8fbc985ce..b606cb7e0 100644
--- a/rhel/openvswitch-fedora.spec.in
+++ b/rhel/openvswitch-fedora.spec.in
@@ -340,6 +340,9 @@ rm -f $RPM_BUILD_ROOT%{_bindir}/ovs-parse-backtrace \
 %clean
 rm -rf $RPM_BUILD_ROOT
 
+%pre selinux-policy
+%selinux_relabel_pre -s targeted
+
 %preun
 %if 0%{?systemd_preun:1}
 %systemd_preun %{name}.service
@@ -444,7 +447,7 @@ fi
 %endif
 
 %post selinux-policy
-/usr/sbin/semodule -i 
%{_datadir}/selinux/packages/%{name}/openvswitch-custom.pp &> /dev/null || :
+%selinux_modules_install -s targeted 
%{_datadir}/selinux/packages/%{name}/openvswitch-custom.pp
 
 %postun
 %if 0%{?systemd_postun:1}
@@ -476,9 +479,12 @@ fi
 
 %postun selinux-policy
 if [ $1 -eq 0 ] ; then
-  /usr/sbin/semodule -r openvswitch-custom &> /dev/null || :
+  %selinux_modules_uninstall -s targeted openvswitch-custom
 fi
 
+%posttrans selinux-policy
+%selinux_relabel_post -s targeted
+
 %files selinux-policy
 %defattr(-,root,root)
 %{_datadir}/selinux/packages/%{name}/openvswitch-custom.pp
-- 
2.14.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 3/4] selinux: introduce domain transitioned kmod helper

2018-03-20 Thread Aaron Conole
This commit uses the previously defined selinux label to transition
from the openvswitch_t to openvswitch_load_module_t domain, by way of
a specially labelled ovs-kmod-ctl helper.

Signed-off-by: Aaron Conole 
---
 selinux/.gitignore   | 4 
 selinux/automake.mk  | 3 ++-
 selinux/openvswitch-custom.fc.in | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 selinux/openvswitch-custom.fc.in

diff --git a/selinux/.gitignore b/selinux/.gitignore
index 83a0afb51..64e834cd1 100644
--- a/selinux/.gitignore
+++ b/selinux/.gitignore
@@ -1 +1,5 @@
 openvswitch-custom.te
+openvswitch-custom.fc
+openvswitch-custom.pp
+openvswitch-custom.if
+tmp/
diff --git a/selinux/automake.mk b/selinux/automake.mk
index b37e8f337..c7dfe6ed5 100644
--- a/selinux/automake.mk
+++ b/selinux/automake.mk
@@ -6,11 +6,12 @@
 # without warranty of any kind.
 
 EXTRA_DIST += \
+selinux/openvswitch-custom.fc.in \
 selinux/openvswitch-custom.te.in
 
 PHONY: selinux-policy
 
-selinux-policy: selinux/openvswitch-custom.te
+selinux-policy: selinux/openvswitch-custom.te selinux/openvswitch-custom.fc
$(MAKE) -C selinux/ -f /usr/share/selinux/devel/Makefile
 
 CLEANFILES += \
diff --git a/selinux/openvswitch-custom.fc.in b/selinux/openvswitch-custom.fc.in
new file mode 100644
index 0..c2756d04b
--- /dev/null
+++ b/selinux/openvswitch-custom.fc.in
@@ -0,0 +1 @@
+@pkgdatadir@/scripts/ovs-kmod-ctl -- 
gen_context(system_u:object_r:openvswitch_load_module_exec_t,s0)
-- 
2.14.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 0/4] selinux: introduce a transition domain for loading kmods

2018-03-20 Thread Aaron Conole
On linux systems, the initial start of openvswitch attempts to load
the openvswitch.ko kernel module.  This module allows openvswitch to
utilize the kernel datapath.

Some of these linux systems, notably Fedora and RHEL, use selinux to
enforce additional restrictions on various processes by way of allowing
or disallowing access from a specific selinux domain to a particular
operation on an selinux type.  On these systems, the openvswitch
initialization will be run from the 'openvswitch_t' selinux domain.
Attempts by a process in the 'openvswitch_t' selinux domain to load a
kernel module will be denied.

One solution would be to simply allow 'openvswitch_t' to load a kernel
directly.  This essentially means that 'openvswitch_t' would really be
'unconfined_t' - since an attacker that can control the code can issue
a kernel load.

The solution implemented here uses a labeled file in the openvswitch
scripts directory, which is writable only by root.  That file will force
a domain transition to the 'openvswitch_load_module_t' domain.  The
'openvswitch_load_module_t' domain will then be granted permissions to
load a kernel module.

Please vet the new permissions in 2/4 *carefully*.  I've tried to keep
it as restricted as possible.  If there are any useful selinux interfaces
that would simplify the permission grants needed for the new domain, I'm
happy to spin a v2.

Aaron Conole (4):
  ovs-kmod-ctl: introduce a kernel module load script
  selinux: create a transition type for module loading
  selinux: introduce domain transitioned kmod helper
  rhel: selinux-policy to invoke proper label macros

 debian/openvswitch-switch.install  |   1 +
 debian/openvswitch-switch.manpages |   1 +
 rhel/openvswitch-fedora.spec.in|  12 +-
 rhel/openvswitch.spec.in   |   2 +
 selinux/.gitignore |   4 +
 selinux/automake.mk|   3 +-
 selinux/openvswitch-custom.fc.in   |   1 +
 selinux/openvswitch-custom.te.in   |  79 -
 utilities/.gitignore   |   1 +
 utilities/automake.mk  |   5 +
 utilities/ovs-ctl.in   |  32 +-
 utilities/ovs-kmod-ctl.8   | 103 +
 utilities/ovs-kmod-ctl.in  | 228 +
 utilities/ovs-lib.in   |  12 +-
 14 files changed, 439 insertions(+), 45 deletions(-)
 create mode 100644 selinux/openvswitch-custom.fc.in
 create mode 100644 utilities/ovs-kmod-ctl.8
 create mode 100644 utilities/ovs-kmod-ctl.in

-- 
2.14.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 4/4] ofp-flow: Reduce memory consumption for ofputil_flow_mod, using minimatch.

2018-03-20 Thread Ben Pfaff
Until now, struct ofputil_flow_mod, which represents an OpenFlow flow table
modification request, has incorporated a struct match, which made the
overall ofputil_flow_mod about 2.5 kB.  This is OK for a small number of
flows, but absurdly inflates memory requirements when there are hundreds of
thousands of flows.  This commit fixes the problem by changing struct match
to struct minimatch inside ofputil_flow_mod, which reduces its size to
about 100 bytes plus the actual size of the flow match (usually a few dozen
bytes).

This affects memory usage of ovs-ofctl (when it adds a large number of
flows) more than ovs-vswitchd.

Reported-by: Michael Ben-Ami 
Signed-off-by: Ben Pfaff 
---
 AUTHORS.rst|  1 +
 include/openvswitch/ofp-flow.h |  2 +-
 lib/learn.c| 12 --
 lib/learning-switch.c  | 14 +--
 lib/ofp-bundle.c   |  1 +
 lib/ofp-flow.c | 89 ++
 ofproto/ofproto-dpif-xlate.c   |  6 ++-
 ofproto/ofproto-dpif.c | 17 
 ofproto/ofproto-dpif.h |  2 +-
 ofproto/ofproto-provider.h |  2 +-
 ofproto/ofproto.c  | 49 ++-
 ovn/controller/ofctrl.c| 22 ++-
 utilities/ovs-ofctl.c  | 81 +++---
 13 files changed, 185 insertions(+), 113 deletions(-)

diff --git a/AUTHORS.rst b/AUTHORS.rst
index dc69ba3f3c1f..81f9b6f28b88 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -506,6 +506,7 @@ Marvin Pascual  mar...@pascual.com.ph
 Maxime Brun m.b...@alphalink.fr
 Madhu Venugopal maven...@gmail.com
 Michael A. Collins  mike.a.coll...@ark-net.org
+Michael Ben-Ami mben...@digitalocean.com
 Michael Hu  m...@nicira.com
 Michael J. Smalley  michaeljsmal...@gmail.com
 Michael Mao m...@nicira.com
diff --git a/include/openvswitch/ofp-flow.h b/include/openvswitch/ofp-flow.h
index 17d48f12e060..2ff2e45b66c4 100644
--- a/include/openvswitch/ofp-flow.h
+++ b/include/openvswitch/ofp-flow.h
@@ -73,7 +73,7 @@ void ofputil_flow_mod_flags_format(struct ds *, enum 
ofputil_flow_mod_flags);
 struct ofputil_flow_mod {
 struct ovs_list list_node; /* For queuing flow_mods. */
 
-struct match match;
+struct minimatch match;
 int priority;
 
 /* Cookie matching.  The flow_mod affects only flows that have cookies that
diff --git a/lib/learn.c b/lib/learn.c
index f5d15503fe79..c4d5b3e0c449 100644
--- a/lib/learn.c
+++ b/lib/learn.c
@@ -91,14 +91,17 @@ learn_check(const struct ofpact_learn *learn, const struct 
match *src_match)
  * 'ofpacts' and retains ownership of it.  'fm->ofpacts' will point into the
  * 'ofpacts' buffer.
  *
+ * The caller must eventually destroy fm->match.
+ *
  * The caller has to actually execute 'fm'. */
 void
 learn_execute(const struct ofpact_learn *learn, const struct flow *flow,
   struct ofputil_flow_mod *fm, struct ofpbuf *ofpacts)
 {
 const struct ofpact_learn_spec *spec;
+struct match match;
 
-match_init_catchall(>match);
+match_init_catchall();
 fm->priority = learn->priority;
 fm->cookie = htonll(0);
 fm->cookie_mask = htonll(0);
@@ -140,10 +143,10 @@ learn_execute(const struct ofpact_learn *learn, const 
struct flow *flow,
 
 switch (spec->dst_type) {
 case NX_LEARN_DST_MATCH:
-mf_write_subfield(>dst, , >match);
-match_add_ethernet_prereq(>match, spec->dst.field);
+mf_write_subfield(>dst, , );
+match_add_ethernet_prereq(, spec->dst.field);
 mf_vl_mff_set_tlv_bitmap(
-spec->dst.field, >match.flow.tunnel.metadata.present.map);
+spec->dst.field, );
 break;
 
 case NX_LEARN_DST_LOAD:
@@ -173,6 +176,7 @@ learn_execute(const struct ofpact_learn *learn, const 
struct flow *flow,
 }
 }
 
+minimatch_init(>match, );
 fm->ofpacts = ofpacts->data;
 fm->ofpacts_len = ofpacts->size;
 }
diff --git a/lib/learning-switch.c b/lib/learning-switch.c
index 3a9e015bbe9c..04eaa6e8e73f 100644
--- a/lib/learning-switch.c
+++ b/lib/learning-switch.c
@@ -206,7 +206,6 @@ lswitch_handshake(struct lswitch *sw)
 output.max_len = OFP_DEFAULT_MISS_SEND_LEN;
 
 struct ofputil_flow_mod fm = {
-.match = MATCH_CATCHALL_INITIALIZER,
 .priority = 0,
 .table_id = 0,
 .command = OFPFC_ADD,
@@ -216,8 +215,10 @@ lswitch_handshake(struct lswitch *sw)
 .ofpacts = ,
 .ofpacts_len = sizeof output,
 };
-
+minimatch_init_catchall();
 msg = ofputil_encode_flow_mod(, protocol);
+minimatch_destroy();
+
 error = rconn_send(sw->rconn, msg, NULL);
 if (error) {
 VLOG_INFO_RL(, "%s: failed to add default flow (%s)",
@@ 

[ovs-dev] [PATCH 3/4] flow, match, classifier: Add new functions for miniflow and minimatch.

2018-03-20 Thread Ben Pfaff
The miniflow and minimatch APIs lack several of the features of the flow
and match APIs.  This commit adds a few of the missing functions.

These functions will be used for the first time in an upcoming commit.

Signed-off-by: Ben Pfaff 
---
 include/openvswitch/match.h |  4 
 lib/classifier.c| 19 +++
 lib/classifier.h|  4 
 lib/flow.h  | 27 +++
 lib/match.c | 44 
 5 files changed, 98 insertions(+)

diff --git a/include/openvswitch/match.h b/include/openvswitch/match.h
index e5cf1a0d7c14..49333ae8f7a1 100644
--- a/include/openvswitch/match.h
+++ b/include/openvswitch/match.h
@@ -246,6 +246,7 @@ struct minimatch {
 };
 
 void minimatch_init(struct minimatch *, const struct match *);
+void minimatch_init_catchall(struct minimatch *);
 void minimatch_clone(struct minimatch *, const struct minimatch *);
 void minimatch_move(struct minimatch *dst, struct minimatch *src);
 void minimatch_destroy(struct minimatch *);
@@ -253,6 +254,7 @@ void minimatch_destroy(struct minimatch *);
 void minimatch_expand(const struct minimatch *, struct match *);
 
 bool minimatch_equal(const struct minimatch *a, const struct minimatch *b);
+uint32_t minimatch_hash(const struct minimatch *, uint32_t basis);
 
 bool minimatch_matches_flow(const struct minimatch *, const struct flow *);
 
@@ -262,6 +264,8 @@ void minimatch_format(const struct minimatch *, const 
struct tun_table *,
 char *minimatch_to_string(const struct minimatch *,
   const struct ofputil_port_map *, int priority);
 
+bool minimatch_has_default_hidden_fields(const struct minimatch *);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/classifier.c b/lib/classifier.c
index cea9053b6f67..edb40c89c608 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -1223,6 +1223,25 @@ classifier_find_match_exactly(const struct classifier 
*cls,
 return retval;
 }
 
+/* Finds and returns a rule in 'cls' with priority 'priority' and exactly the
+ * same matching criteria as 'target', and that is visible in 'version'.
+ * Returns a null pointer if 'cls' doesn't contain an exact match visible in
+ * 'version'. */
+const struct cls_rule *
+classifier_find_minimatch_exactly(const struct classifier *cls,
+  const struct minimatch *target, int priority,
+  ovs_version_t version)
+{
+const struct cls_rule *retval;
+struct cls_rule cr;
+
+cls_rule_init_from_minimatch(, target, priority);
+retval = classifier_find_rule_exactly(cls, , version);
+cls_rule_destroy();
+
+return retval;
+}
+
 /* Checks if 'target' would overlap any other rule in 'cls' in 'version'.  Two
  * rules are considered to overlap if both rules have the same priority and a
  * packet could match both, and if both rules are visible in the same version.
diff --git a/lib/classifier.h b/lib/classifier.h
index 1263afdfd1a9..d1bd4aa12a78 100644
--- a/lib/classifier.h
+++ b/lib/classifier.h
@@ -406,6 +406,10 @@ const struct cls_rule *classifier_find_match_exactly(const 
struct classifier *,
  const struct match *,
  int priority,
  ovs_version_t);
+const struct cls_rule *classifier_find_minimatch_exactly(
+const struct classifier *, const struct minimatch *,
+int priority, ovs_version_t);
+
 bool classifier_is_empty(const struct classifier *);
 int classifier_count(const struct classifier *);
 
diff --git a/lib/flow.h b/lib/flow.h
index 3331e2068ed8..5e41567b2668 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -728,6 +728,11 @@ static inline ovs_be32 miniflow_get_be32(const struct 
miniflow *,
 static inline uint16_t miniflow_get_vid(const struct miniflow *, size_t);
 static inline uint16_t miniflow_get_tcp_flags(const struct miniflow *);
 static inline ovs_be64 miniflow_get_metadata(const struct miniflow *);
+static inline uint64_t miniflow_get_tun_metadata_present_map(
+const struct miniflow *);
+static inline uint32_t miniflow_get_recirc_id(const struct miniflow *);
+static inline uint32_t miniflow_get_dp_hash(const struct miniflow *);
+static inline ovs_be32 miniflow_get_ports(const struct miniflow *);
 
 bool miniflow_equal(const struct miniflow *a, const struct miniflow *b);
 bool miniflow_equal_in_minimask(const struct miniflow *a,
@@ -857,6 +862,27 @@ miniflow_get_metadata(const struct miniflow *flow)
 return MINIFLOW_GET_BE64(flow, metadata);
 }
 
+/* Returns the bitmap that indicates which tunnel metadata fields are present
+ * in 'flow'. */
+static inline uint64_t
+miniflow_get_tun_metadata_present_map(const struct miniflow *flow)
+{
+return MINIFLOW_GET_U64(flow, tunnel.metadata.present.map);
+}
+
+/* Returns the recirc_id in 'flow.' */
+static inline uint32_t

[ovs-dev] [PATCH 2/4] flow: Improve type-safety of MINIFLOW_GET_TYPE.

2018-03-20 Thread Ben Pfaff
Until mow, this macro has blindly read the passed-in type's size, but
that's unnecessarily risky.  This commit changes it to verify that the
passed-in type is the same size as the field and, on GCC and Clang, that
the types are compatible.  It also adds a version that does not check,
for the one case where (currently) we deliberately read the wrong size,
and updates a few uses to use more precise field names.

Signed-off-by: Ben Pfaff 
---
 lib/classifier.c  |  8 
 lib/dpif-netdev.c |  3 ++-
 lib/flow.c|  2 +-
 lib/flow.h| 17 -
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index ef7e3de0728c..cea9053b6f67 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -495,8 +495,8 @@ classifier_count(const struct classifier *cls)
 static inline ovs_be32 minimatch_get_ports(const struct minimatch *match)
 {
 /* Could optimize to use the same map if needed for fast path. */
-return MINIFLOW_GET_BE32(match->flow, tp_src)
-& MINIFLOW_GET_BE32(>mask->masks, tp_src);
+return (miniflow_get_ports(match->flow)
+& miniflow_get_ports(>mask->masks));
 }
 
 /* Inserts 'rule' into 'cls' in 'version'.  Until 'rule' is removed from 'cls',
@@ -1501,7 +1501,7 @@ insert_subtable(struct classifier *cls, const struct 
minimask *mask)
 /* Ports trie. */
 ovsrcu_set_hidden(>ports_trie, NULL);
 *CONST_CAST(int *, >ports_mask_len)
-= 32 - ctz32(ntohl(MINIFLOW_GET_BE32(>masks, tp_src)));
+= 32 - ctz32(ntohl(miniflow_get_ports(>masks)));
 
 /* List of rules. */
 rculist_init(>rules_list);
@@ -1696,7 +1696,7 @@ find_match_wc(const struct cls_subtable *subtable, 
ovs_version_t version,
 unsigned int mbits;
 ovs_be32 value, plens, mask;
 
-mask = MINIFLOW_GET_BE32(>mask.masks, tp_src);
+mask = miniflow_get_ports(>mask.masks);
 value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;
 mbits = trie_lookup_value(>ports_trie, , , 32);
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index b07fc6b8b327..be31fd0922b0 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2200,7 +2200,8 @@ dp_netdev_pmd_lookup_flow(struct dp_netdev_pmd_thread 
*pmd,
 {
 struct dpcls *cls;
 struct dpcls_rule *rule;
-odp_port_t in_port = u32_to_odp(MINIFLOW_GET_U32(>mf, in_port));
+odp_port_t in_port = u32_to_odp(MINIFLOW_GET_U32(>mf,
+ in_port.odp_port));
 struct dp_netdev_flow *netdev_flow = NULL;
 
 cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
diff --git a/lib/flow.c b/lib/flow.c
index 38ff29c8cd14..09b66b81858f 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -2007,7 +2007,7 @@ miniflow_hash_5tuple(const struct miniflow *flow, 
uint32_t basis)
 }
 
 /* Add both ports at once. */
-hash = hash_add(hash, MINIFLOW_GET_U32(flow, tp_src));
+hash = hash_add(hash, (OVS_FORCE uint32_t) miniflow_get_ports(flow));
 }
 out:
 return hash_finish(hash, 42);
diff --git a/lib/flow.h b/lib/flow.h
index 770a07a62778..3331e2068ed8 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -684,6 +684,14 @@ miniflow_get__(const struct miniflow *mf, size_t idx)
 /* Get the value of the struct flow 'FIELD' as up to 8 byte wide integer type
  * 'TYPE' from miniflow 'MF'. */
 #define MINIFLOW_GET_TYPE(MF, TYPE, FIELD)  \
+(BUILD_ASSERT(sizeof(TYPE) == sizeof(((struct flow *)0)->FIELD)),   \
+ BUILD_ASSERT_GCCONLY(__builtin_types_compatible_p(TYPE, typeof(((struct 
flow *)0)->FIELD))), \
+ MINIFLOW_GET_TYPE__(MF, TYPE, FIELD))
+
+/* Like MINIFLOW_GET_TYPE, but without checking that TYPE is the correct width
+ * for FIELD.  (This is useful for deliberately reading adjacent fields in one
+ * go.)  */
+#define MINIFLOW_GET_TYPE__(MF, TYPE, FIELD)\
 (MINIFLOW_IN_MAP(MF, FLOW_U64_OFFSET(FIELD))\
  ? ((OVS_FORCE const TYPE *)miniflow_get__(MF, FLOW_U64_OFFSET(FIELD))) \
  [FLOW_U64_OFFREM(FIELD) / sizeof(TYPE)]\
@@ -806,7 +814,7 @@ miniflow_get_vid(const struct miniflow *flow, size_t n)
 {
 if (n < FLOW_MAX_VLAN_HEADERS) {
 union flow_vlan_hdr hdr = {
-.qtag = MINIFLOW_GET_BE32(flow, vlans[n])
+.qtag = MINIFLOW_GET_BE32(flow, vlans[n].qtag)
 };
 return vlan_tci_to_vid(hdr.tci);
 }
@@ -849,6 +857,13 @@ miniflow_get_metadata(const struct miniflow *flow)
 return MINIFLOW_GET_BE64(flow, metadata);
 }
 
+
+/* Returns the 'tp_src' and 'tp_dst' fields together as one piece of data. */
+static inline ovs_be32
+miniflow_get_ports(const struct miniflow *flow)
+{
+return MINIFLOW_GET_TYPE__(flow, ovs_be32, tp_src);
+}
 /* Returns the mask for the OpenFlow 1.1+ "metadata" field in 'mask'.
  *
  * The return value is all-1-bits if 'mask' matches on the whole value of the
-- 
2.16.1


[ovs-dev] [PATCH 1/4] match: Add 'tun_md' member to struct minimatch.

2018-03-20 Thread Ben Pfaff
struct match has had a 'tun_md' member for a long time, but struct
minimatch has never had one.  This doesn't matter for the purposes for
which minimatch is currently used, but it means that a minimatch is not
completely substitutable for a match and therefore blocks some new uses.
This patch adds the member.

Signed-off-by: Ben Pfaff 
---
 include/openvswitch/match.h|  1 +
 include/openvswitch/tun-metadata.h |  5 +
 lib/match.c|  7 ++-
 lib/tun-metadata.c | 17 +
 4 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/include/openvswitch/match.h b/include/openvswitch/match.h
index 61a67de2c413..e5cf1a0d7c14 100644
--- a/include/openvswitch/match.h
+++ b/include/openvswitch/match.h
@@ -242,6 +242,7 @@ struct minimatch {
 };
 struct miniflow *flows[2];
 };
+struct tun_metadata_allocation *tun_md;
 };
 
 void minimatch_init(struct minimatch *, const struct match *);
diff --git a/include/openvswitch/tun-metadata.h 
b/include/openvswitch/tun-metadata.h
index 935c5c495027..dc0312ecb724 100644
--- a/include/openvswitch/tun-metadata.h
+++ b/include/openvswitch/tun-metadata.h
@@ -101,6 +101,11 @@ struct tun_metadata_allocation {
 bool valid; /* Set to true after any allocation occurs. */
 };
 
+struct tun_metadata_allocation *tun_metadata_allocation_clone(
+const struct tun_metadata_allocation *);
+void tun_metadata_allocation_copy(struct tun_metadata_allocation *,
+  const struct tun_metadata_allocation *);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/match.c b/lib/match.c
index 97a5282997b0..3eab0fd5dec9 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -1663,6 +1663,8 @@ minimatch_init(struct minimatch *dst, const struct match 
*src)
 miniflow_alloc(dst->flows, 2, );
 miniflow_init(dst->flow, >flow);
 minimask_init(dst->mask, >wc);
+
+dst->tun_md = tun_metadata_allocation_clone(>tun_md);
 }
 
 /* Initializes 'dst' as a copy of 'src'.  The caller must eventually free 'dst'
@@ -1677,6 +1679,7 @@ minimatch_clone(struct minimatch *dst, const struct 
minimatch *src)
miniflow_get_values(src->flow), data_size);
 memcpy(miniflow_values(>mask->masks),
miniflow_get_values(>mask->masks), data_size);
+dst->tun_md = tun_metadata_allocation_clone(src->tun_md);
 }
 
 /* Initializes 'dst' with the data in 'src', destroying 'src'.  The caller must
@@ -1686,6 +1689,7 @@ minimatch_move(struct minimatch *dst, struct minimatch 
*src)
 {
 dst->flow = src->flow;
 dst->mask = src->mask;
+dst->tun_md = src->tun_md;
 }
 
 /* Frees any memory owned by 'match'.  Does not free the storage in which
@@ -1694,6 +1698,7 @@ void
 minimatch_destroy(struct minimatch *match)
 {
 free(match->flow);
+free(match->tun_md);
 }
 
 /* Initializes 'dst' as a copy of 'src'. */
@@ -1702,7 +1707,7 @@ minimatch_expand(const struct minimatch *src, struct 
match *dst)
 {
 miniflow_expand(src->flow, >flow);
 minimask_expand(src->mask, >wc);
-memset(>tun_md, 0, sizeof dst->tun_md);
+tun_metadata_allocation_copy(>tun_md, src->tun_md);
 }
 
 /* Returns true if 'a' and 'b' match the same packets, false otherwise.  */
diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c
index c0d9180e020e..f8a0e19524e9 100644
--- a/lib/tun-metadata.c
+++ b/lib/tun-metadata.c
@@ -924,3 +924,20 @@ tun_metadata_match_format(struct ds *s, const struct match 
*match)
 ds_put_char(s, ',');
 }
 }
+
+struct tun_metadata_allocation *
+tun_metadata_allocation_clone(const struct tun_metadata_allocation *src)
+{
+return src && src->valid ? xmemdup(src, sizeof *src) : NULL;
+}
+
+void
+tun_metadata_allocation_copy(struct tun_metadata_allocation *dst,
+ const struct tun_metadata_allocation *src)
+{
+if (src && src->valid) {
+*dst = *src;
+} else {
+memset(dst, 0, sizeof *dst);
+}
+}
-- 
2.16.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] cómo desarrollar mejor la inteligencia de los niños

2018-03-20 Thread Preparando excelentes lectores
 

 
Cómo preparar excelentes lectores desde la primera infancia 
Marzo 23 - webinar Interactivo

Objetivo:

En esta capacitación veremos los detalles de una metodología que está 
revolucionando la manera de enseñar a leer y que está soportada 
científicamente. Le daremos los detalles para que empiece a preparar lectores 
de alto rendimiento. 

8 razones para asistir a este webinar :

- Porque el mundo está cambiando y la educación no se puede quedar atrás.
- Para ser protagonista de los cambios en la educación.
- Facilitar la labor educativa como padre o docente.
- Ayudar a mejorar a nuestra sociedad.
- Poder ayudar a una generación de niños que necesitan hacer de la lectura su 
principal herramienta de formación.
- Afrontar de mejor manera la educación de sus hijos. 
- Saber cómo desarrollar mejor la inteligencia de los niños.
 - Una buena educación es la mejor manera de construir la paz. 
 
 
Temario e Inscripciones:

Respondiendo por este medio "Leer"+TELÉFONO + NOMBRE o marcando al:

045 + 5515546630  



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] B2B Marketing Professional contacts list

2018-03-20 Thread marissa . jordan
Hi,Would you be interested in  
B2B Marketing Professional contacts which can help you to grow up your  
business campaigns?Titles: C-level, VP-level,  
Directors, Managers Etc.Please let me know if  
you’re interested, and I will get back to your with more information on the  
same.Feel free to share your  
thoughts.Regards,Marissa
powered by GSM. Free mail merge and  
email marketing software for Gmail.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] can not well distributed when use dp_hash for ovs group

2018-03-20 Thread Jan Scheurich
I just submitted v1 of the patch series to the mailing list.
Please have a look and comment.

BR, Jan

> -Original Message-
> From: Jan Scheurich
> Sent: Tuesday, 20 March, 2018 15:16
> To: 'ychen' ; d...@openvswitch.org; ja...@ovn.org
> Subject: RE: [ovs-dev] can not well distributed when use dp_hash for ovs group
> 
> Hi ychen,
> 
> We have been working on this problem and have a fully working patch that we 
> will submit to the ML shortly.
> We would be glad if you could give it a test and review.
> 
> Thanks, Jan
> 
> > -Original Message-
> > From: ovs-dev-boun...@openvswitch.org 
> > [mailto:ovs-dev-boun...@openvswitch.org] On Behalf Of ychen
> > Sent: Tuesday, 20 March, 2018 14:20
> > To: d...@openvswitch.org; ja...@ovn.org
> > Subject: [ovs-dev] can not well distributed when use dp_hash for ovs group
> >
> > hi,
> >   I tested dp_hash for ovs group, and found that dp_hash can not well 
> > distributed, some buckets even can not be selected.
> >   In my testing environment, I have 11 buckets:
> > group_id=131841,type=select,selection_method=dp_hash,
> > bucket=bucket_id:51162,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> > >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.29:80))),
> > bucket=bucket_id:42099,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> > >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.25:80))),
> > bucket=bucket_id:53526,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> > >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.27:80))),
> > bucket=bucket_id:12221,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> > >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.40:80))),
> > bucket=bucket_id:2787,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> > >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.26:80))),
> > bucket=bucket_id:18951,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> > >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.24:80))),
> > bucket=bucket_id:32559,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> > >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.62:80))),
> > bucket=bucket_id:35550,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> > >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.43:80))),
> > bucket=bucket_id:9026,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> > >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.57:80))),
> > bucket=bucket_id:26811,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> > >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.34:80)))
> >
> >
> >  But about 3~5 buckets can not be selected always.
> >
> >
> >   In the function xlate_dp_hash_select_group(), I found the code:
> >   uint32_t mask = (1 << log_2_ceil(n_buckets)) - 1;
> >   uint32_t basis = 0xc2b73583 * (ctx->xin->flow.dp_hash & mask);
> > uint32_t score =  (hash_int(bucket->bucket_id, basis) & 0x) * 
> > bucket->weight;
> >
> >
> >  for the above formula, if n_buckets is 11, then there are totally 16 
> > probabilities for basis.
> > so how can we make sure the best score can well distributed in the 11 
> > buckets?
> >
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/3] userspace datapath: Add OVS_HASH_L4_SYMMETRIC dp_hash algorithm

2018-03-20 Thread Jan Scheurich
This commit implements a new dp_hash algorithm OVS_HASH_L4_SYMMETRIC in
the netdev datapath. It will be used as default hash algorithm for the
dp_hash-based select groups in a subsequent commit to maintain
compatibility with the symmetry property of the current default hash
selection method.

Signed-off-by: Jan Scheurich 
Signed-off-by: Nitin Katiyar 
Co-authored-by: Nitin Katiyar 
---
 datapath/linux/compat/include/linux/openvswitch.h |  4 +++
 lib/flow.c| 42 +--
 lib/flow.h|  1 +
 lib/odp-execute.c | 23 +++--
 4 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/datapath/linux/compat/include/linux/openvswitch.h 
b/datapath/linux/compat/include/linux/openvswitch.h
index 84ebcaf..2bb3cb2 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -720,6 +720,10 @@ struct ovs_action_push_vlan {
  */
 enum ovs_hash_alg {
OVS_HASH_ALG_L4,
+#ifndef __KERNEL__
+   OVS_HASH_ALG_SYM_L4,
+#endif
+   __OVS_HASH_MAX
 };
 
 /*
diff --git a/lib/flow.c b/lib/flow.c
index 38ff29c..a57a5da 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -2108,6 +2108,44 @@ flow_hash_symmetric_l4(const struct flow *flow, uint32_t 
basis)
 return jhash_bytes(, sizeof fields, basis);
 }
 
+/* Symmetrically Hashes non-IP 'flow' based on its L2 headers. */
+uint32_t
+flow_hash_symmetric_l2(const struct flow *flow, uint32_t basis)
+{
+union {
+struct {
+ovs_be16 eth_type;
+ovs_be16 vlan_tci;
+struct eth_addr eth_addr;
+ovs_be16 pad;
+};
+uint32_t word[3];
+} fields;
+
+uint32_t hash = basis;
+int i;
+
+if (flow->packet_type != htons(PT_ETH)) {
+/* Cannot hash non-Ethernet flows */
+return 0;
+}
+
+for (i = 0; i < ARRAY_SIZE(fields.eth_addr.be16); i++) {
+fields.eth_addr.be16[i] =
+flow->dl_src.be16[i] ^ flow->dl_dst.be16[i];
+}
+for (i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
+fields.vlan_tci ^= flow->vlans[i].tci & htons(VLAN_VID_MASK);
+}
+fields.eth_type = flow->dl_type;
+fields.pad = 0;
+
+hash = hash_add(hash, fields.word[0]);
+hash = hash_add(hash, fields.word[1]);
+hash = hash_add(hash, fields.word[2]);
+return hash_finish(hash, basis);
+}
+
 /* Hashes 'flow' based on its L3 through L4 protocol information */
 uint32_t
 flow_hash_symmetric_l3l4(const struct flow *flow, uint32_t basis,
@@ -2128,8 +2166,8 @@ flow_hash_symmetric_l3l4(const struct flow *flow, 
uint32_t basis,
 hash = hash_add64(hash, a[i] ^ b[i]);
 }
 } else {
-/* Cannot hash non-IP flows */
-return 0;
+/* Revert to hashing L2 headers */
+return flow_hash_symmetric_l2(flow, basis);
 }
 
 hash = hash_add(hash, flow->nw_proto);
diff --git a/lib/flow.h b/lib/flow.h
index 770a07a..e6033ad 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -236,6 +236,7 @@ hash_odp_port(odp_port_t odp_port)
 
 uint32_t flow_hash_5tuple(const struct flow *flow, uint32_t basis);
 uint32_t flow_hash_symmetric_l4(const struct flow *flow, uint32_t basis);
+uint32_t flow_hash_symmetric_l2(const struct flow *flow, uint32_t basis);
 uint32_t flow_hash_symmetric_l3l4(const struct flow *flow, uint32_t basis,
  bool inc_udp_ports );
 
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 1969f02..c716c41 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -726,14 +726,16 @@ odp_execute_actions(void *dp, struct dp_packet_batch 
*batch, bool steal,
 }
 
 switch ((enum ovs_action_attr) type) {
+
 case OVS_ACTION_ATTR_HASH: {
 const struct ovs_action_hash *hash_act = nl_attr_get(a);
 
-/* Calculate a hash value directly.  This might not match the
+/* Calculate a hash value directly. This might not match the
  * value computed by the datapath, but it is much less expensive,
  * and the current use case (bonding) does not require a strict
  * match to work properly. */
-if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
+switch (hash_act->hash_alg) {
+case OVS_HASH_ALG_L4: {
 struct flow flow;
 uint32_t hash;
 
@@ -749,7 +751,22 @@ odp_execute_actions(void *dp, struct dp_packet_batch 
*batch, bool steal,
 }
 packet->md.dp_hash = hash;
 }
-} else {
+break;
+}
+case OVS_HASH_ALG_SYM_L4: {
+struct flow flow;
+uint32_t hash;
+
+DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+flow_extract(packet, );
+   

[ovs-dev] [PATCH 0/3] Use improved dp_hash select group by default

2018-03-20 Thread Jan Scheurich
The current default OpenFlow select group implementation sends every new L4 flow
to the slow path for the balancing decision and installs a 5-tuple "miniflow"
in the datapath to forward subsequent packets of the connection accordingly. 
Clearly this has major scalability issues with many parallel L4 flows and high
connection setup rates.

The dp_hash selection method for the OpenFlow select group was added to OVS
as an alternative. It avoids the scalability issues for the price of an 
additional recirculation in the datapath. The dp_hash method is only available
to OF1.5 SDN controllers speaking the Netronome Group Mod extension to 
configure the selection mechanism. This severely limited the applicability of
the dp_hash select group in the past.

Furthermore, testing revealed that the implemented dp_hash selection often
generated a very uneven distribution of flows over group buckets and didn't 
consider bucket weights at all.

The present patch set in a first step improves the dp_hash selection method to
much more accurately distribute flows over weighted group buckets. In a second
step it makes the improved dp_hash method the default in OVS for select groups
that can be accurately handled by dp_hash. That should be the vast majority of
cases. Otherwise we fall back to the legacy slow-path selection method.

The Netronome extension can still be used to override the default decision and
require the legacy slow-path or the dp_hash selection method.

Jan Scheurich (3):
  userspace datapath: Add OVS_HASH_L4_SYMMETRIC dp_hash algorithm
  ofproto-dpif: Improve dp_hash selection method for select groups
  ofproto-dpif: Use dp_hash as default selection method

 datapath/linux/compat/include/linux/openvswitch.h |   4 +
 include/openvswitch/ofp-group.h   |   1 +
 lib/flow.c|  42 +-
 lib/flow.h|   1 +
 lib/odp-execute.c |  23 +++-
 lib/ofp-group.c   |  15 ++-
 ofproto/ofproto-dpif-xlate.c  |  70 ++
 ofproto/ofproto-dpif.c| 156 ++
 ofproto/ofproto-dpif.h|  14 ++
 ofproto/ofproto-provider.h|   2 +-
 tests/mpls-xlate.at   |  26 ++--
 tests/ofproto-dpif.at |  66 ++---
 12 files changed, 351 insertions(+), 69 deletions(-)

-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 3/3] ofproto-dpif: Use dp_hash as default selection method

2018-03-20 Thread Jan Scheurich
The dp_hash selection method for select groups overcomes the scalability
problems of the current default selection method which, due to L2-L4
hashing during xlation and un-wildcarding of the hashed fields,
basically requires an upcall to the slow path to load-balance every
L4 connection. The consequence are an explosion of datapath flows
(megaflows degenerate to miniflows) and a limitation of connection
setup rate OVS can handle.

This commit changes the default selection method to dp_hash, provided the
bucket configuration is such that the dp_hash method can accurately
represent the bucket weights with up to 64 hash values. Otherwise we
stick to original default hash method.

We use the new dp_hash algorithm OVS_HASH_L4_SYMMETRIC to maintain the
symmetry property of the old default hash method.

A controller can explicitly request the old default hash selection method
by specifying selection method "hash" with an empty list of fields in the
Group properties of the OpenFlow 1.5 Group Mod message.

Signed-off-by: Jan Scheurich 
Signed-off-by: Nitin Katiyar 
Co-authored-by: Nitin Katiyar 
---
 lib/ofp-group.c| 15 +++
 ofproto/ofproto-dpif.c | 18 +++--
 ofproto/ofproto-dpif.h |  1 +
 ofproto/ofproto-provider.h |  2 +-
 tests/mpls-xlate.at| 26 +++---
 tests/ofproto-dpif.at  | 66 ++
 6 files changed, 88 insertions(+), 40 deletions(-)

diff --git a/lib/ofp-group.c b/lib/ofp-group.c
index 31b0437..c5ddc65 100644
--- a/lib/ofp-group.c
+++ b/lib/ofp-group.c
@@ -1518,12 +1518,17 @@ parse_group_prop_ntr_selection_method(struct ofpbuf 
*payload,
 return OFPERR_OFPBPC_BAD_VALUE;
 }
 
-error = oxm_pull_field_array(payload->data, fields_len,
- >fields);
-if (error) {
-OFPPROP_LOG(, false,
+if (fields_len > 0) {
+error = oxm_pull_field_array(payload->data, fields_len,
+>fields);
+if (error) {
+OFPPROP_LOG(, false,
 "ntr selection method fields are invalid");
-return error;
+return error;
+}
+} else {
+/* Selection_method "hash: w/o fields means default hash method. */
+gp->fields.values_size = 0;
 }
 
 return 0;
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 383d04e..1801670 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -4812,8 +4812,21 @@ group_set_selection_method(struct group_dpif *group)
 
 if (selection_method[0] == '\0') {
 VLOG_INFO("No selection method specified.");
-group->selection_method = SEL_METHOD_DEFAULT;
-
+/* If the controller has not specified a selection method, check if
+ * the dp_hash selection method with max 64 hash values is appropriate
+ * for the given bucket configuration. */
+if (group_setup_dp_hash_table(group, 64)) {
+/* Use dp_hash selection method with symmetric L4 hash. */
+VLOG_INFO("  Use dp_hash with %d hash values.",
+  group->hash_mask + 1);
+group->selection_method = SEL_METHOD_DP_HASH;
+group->hash_alg = OVS_HASH_ALG_SYM_L4;
+group->hash_basis = 0xdeadbeef;
+} else {
+/* Fall back to original default hashing in slow path. */
+VLOG_INFO("  Falling back to default hash method.");
+group->selection_method = SEL_METHOD_DEFAULT;
+}
 } else if (!strcmp(selection_method, "dp_hash")) {
 VLOG_INFO("Selection method specified: dp_hash.");
 /* Try to use dp_hash if possible at all. */
@@ -4831,6 +4844,7 @@ group_set_selection_method(struct group_dpif *group)
 VLOG_INFO("  Falling back to default hash method.");
 group->selection_method = SEL_METHOD_DEFAULT;
 }
+
 } else if (!strcmp(selection_method, "hash")) {
 VLOG_INFO("Selection method specified: hash.");
 if (props->fields.values_size > 0) {
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index 6a58b48..b1972aa 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -61,6 +61,7 @@ struct ofproto_async_msg;
 struct ofproto_dpif;
 struct uuid;
 struct xlate_cache;
+struct xlate_ctx;
 
 /* Number of implemented OpenFlow tables. */
 enum { N_TABLES = 255 };
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 92d4622..fd5b249 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -572,7 +572,7 @@ struct ofgroup {
 const struct ovs_list buckets;/* Contains "struct ofputil_bucket"s. */
 const uint32_t n_buckets;
 
-const struct ofputil_group_props props;
+struct ofputil_group_props props;
 
 struct rule_collection rules OVS_GUARDED;   /* Referring rules. */
 };
diff --git a/tests/mpls-xlate.at 

[ovs-dev] [PATCH 2/3] ofproto-dpif: Improve dp_hash selection method for select groups

2018-03-20 Thread Jan Scheurich
The current implementation of the "dp_hash" selection method suffers
from two deficiences: 1. The hash mask and hence the number of dp_hash
values is just large enough to cover the number of group buckets, but
does not consider the case that buckets have different weights. 2. The
xlate-time selection of best bucket from the masked dp_hash value often
results in bucket load distributions that are quite different from the
bucket weights because the number of available masked dp_hash values
is too small (2-6 bits compared to 32 bits of a full hash in the default
hash selection method).

This commit provides a more accurate implementation of the dp_hash
select group by applying the well known Webster method for distributing
a small number of "seats" fairly over the weighted "parties"
(see https://en.wikipedia.org/wiki/Webster/Sainte-Lagu%C3%AB_method).
The dp_hash mask is autmatically chosen large enough to provide good
enough accuracy even with widely differing weights.

This distribution happens at group modification time and the resulting
table is stored with the group-dpif struct. At xlation time, we use the
masked dp_hash values as index to look up the assigned bucket.

If the bucket should not be live, we do a circular search over the
mapping table until we find the first live bucket. As the buckets in
the table are by construction in pseudo-random order with a frequency
according to their weight, this method maintains correct distribution
even if one or more buckets are non-live.

Xlation is further simplified by storing some derived select group state
at group construction in struct group-dpif in a form better suited for
xlation purposes.

Signed-off-by: Jan Scheurich 
Signed-off-by: Nitin Katiyar 
Co-authored-by: Nitin Katiyar 
Signed-off-by: Jan Scheurich 
---
 include/openvswitch/ofp-group.h |   1 +
 ofproto/ofproto-dpif-xlate.c|  70 
 ofproto/ofproto-dpif.c  | 142 
 ofproto/ofproto-dpif.h  |  13 
 4 files changed, 200 insertions(+), 26 deletions(-)

diff --git a/include/openvswitch/ofp-group.h b/include/openvswitch/ofp-group.h
index 8d893a5..af4033d 100644
--- a/include/openvswitch/ofp-group.h
+++ b/include/openvswitch/ofp-group.h
@@ -47,6 +47,7 @@ struct bucket_counter {
 /* Bucket for use in groups. */
 struct ofputil_bucket {
 struct ovs_list list_node;
+uint16_t aux;   /* Padding. Also used for temporary data. */
 uint16_t weight;/* Relative weight, for "select" groups. */
 ofp_port_t watch_port;  /* Port whose state affects whether this bucket
  * is live. Only required for fast failover
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index bc6429c..021a4d8 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -4232,35 +4232,51 @@ xlate_hash_fields_select_group(struct xlate_ctx *ctx, 
struct group_dpif *group,
 }
 }
 
+
+static struct ofputil_bucket *
+group_dp_hash_best_bucket(struct xlate_ctx *ctx,
+  const struct group_dpif *group,
+  uint32_t dp_hash)
+{
+struct ofputil_bucket *bucket, *best_bucket = NULL;
+uint32_t n_hash = group->hash_mask + 1;
+
+uint32_t hash = dp_hash &= group->hash_mask;
+ctx->wc->masks.dp_hash |= group->hash_mask;
+
+/* Starting from the original masked dp_hash value iterate over the
+ * hash mapping table to find the first live bucket. As the buckets
+ * are quasi-randomly spread over the hash values, this maintains
+ * a distribution according to bucket weights even when some buckets
+ * are non-live. */
+for (int i = 0; i < n_hash; i++) {
+bucket = group->hash_map[(hash + i) % n_hash];
+if (bucket_is_alive(ctx, bucket, 0)) {
+best_bucket = bucket;
+break;
+}
+}
+
+return best_bucket;
+}
+
 static void
 xlate_dp_hash_select_group(struct xlate_ctx *ctx, struct group_dpif *group,
bool is_last_action)
 {
-struct ofputil_bucket *bucket;
-
 /* dp_hash value 0 is special since it means that the dp_hash has not been
  * computed, as all computed dp_hash values are non-zero.  Therefore
  * compare to zero can be used to decide if the dp_hash value is valid
  * without masking the dp_hash field. */
 if (!ctx->xin->flow.dp_hash) {
-uint64_t param = group->up.props.selection_method_param;
-
-ctx_trigger_recirculate_with_hash(ctx, param >> 32, (uint32_t)param);
+ctx_trigger_recirculate_with_hash(ctx, group->hash_alg,
+  group->hash_basis);
 } else {
-uint32_t n_buckets = group->up.n_buckets;
-if (n_buckets) {
-/* Minimal mask to cover the number of buckets. */
-   

Re: [ovs-dev] [PATCH V1 0/2] TC offload dump fix and add frag

2018-03-20 Thread Simon Horman
2018/03/20 17:21 "Roi Dayan" :



On 12/03/2018 14:58, Roi Dayan wrote:

> Hi,
>
> The first patch fixing error handling when parsing tc rules for dump flows.
> The second patch adds support for IP fragmentation to TC parsing.
>
> V1:
> - also support frag first/later
>
> Thanks,
> Roi
>
> Roi Dayan (2):
>lib/tc: Handle error parsing action in nl_parse_single_action
>netdev-tc-offloads: Add support for IP fragmentation
>
>   acinclude.m4 |  6 +++---
>   include/linux/pkt_cls.h  |  5 +++--
>   lib/netdev-tc-offloads.c | 38 --
>   lib/tc.c | 31 +--
>   lib/tc.h |  1 +
>   5 files changed, 64 insertions(+), 17 deletions(-)
>
>

Hi Ben,

any comments about the changes?


Hi Roi,

Sorry for the delay. I will review these.


Thanks,
Roi
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V1 0/2] TC offload dump fix and add frag

2018-03-20 Thread Roi Dayan



On 12/03/2018 14:58, Roi Dayan wrote:

Hi,

The first patch fixing error handling when parsing tc rules for dump flows.
The second patch adds support for IP fragmentation to TC parsing.

V1:
- also support frag first/later

Thanks,
Roi

Roi Dayan (2):
   lib/tc: Handle error parsing action in nl_parse_single_action
   netdev-tc-offloads: Add support for IP fragmentation

  acinclude.m4 |  6 +++---
  include/linux/pkt_cls.h  |  5 +++--
  lib/netdev-tc-offloads.c | 38 --
  lib/tc.c | 31 +--
  lib/tc.h |  1 +
  5 files changed, 64 insertions(+), 17 deletions(-)




Hi Ben,

any comments about the changes?

Thanks,
Roi
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] rhel: don't drop capabilities when running as root

2018-03-20 Thread Timothy Redaelli
On Tue, 13 Feb 2018 16:42:16 -0500
Aaron Conole  wrote:

> Currently, regardless of which user is being set as the running user,
> Open vSwitch daemons on RHEL systems drop capabilities.  This means
> the very powerful CAP_SYS_ADMIN is dropped, even when the user is
> 'root'.
> 
> For the majority of use cases this behavior works, as the user can
> enable or disable various configurations, regardless of which datapath
> functions are desired.  However, when using certain DPDK PMDs, the
> enablement and configuration calls require CAP_SYS_ADMIN.
> 
> Instead of retaining CAP_SYS_ADMIN in all cases, which would
> practically nullify the uid/gid and privilege drop, we don't pass the
> --ovs-user option to the daemons.  This shunts the capability and
> privilege dropping code.
> 
> Reported-by: Marcos Felipe Schwarz 
> Reported-at:
> https://mail.openvswitch.org/pipermail/ovs-discuss/2018-January/045955.html
> Fixes: e3e738a3d058 ("redhat: allow dpdk to also run as non-root
> user") Signed-off-by: Aaron Conole  ---
> NOTE: I did test this a little bit on my system, passing packets, etc.
>   But more eyes can't be bad.
> 
>  rhel/usr_lib_systemd_system_ovs-vswitchd.service.in | 7 ---
>  rhel/usr_lib_systemd_system_ovsdb-server.service| 6 --
>  2 files changed, 8 insertions(+), 5 deletions(-)
> 

Acked-By: Timothy Redaelli 

-- 
Timothy Redaelli
Software Engineer
Red Hat Italia
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ovn-controller: Handle Port_Binding's "requested-chassis" option in physical.c

2018-03-20 Thread Han Zhou
On Tue, Mar 20, 2018 at 12:51 AM, Numan Siddique 
wrote:
>
>
>
> On Tue, Mar 20, 2018 at 4:33 AM, Han Zhou  wrote:
>>
>>
>>
>> On Mon, Mar 19, 2018 at 12:52 PM,  wrote:
>> >
>> > From: Numan Siddique 
>> >
>> > ovn-controller is not considering Port_Binding option
"requested-chassis"
>> > when adding flows in table 0 and table 65. This patch adds this check.
>>
>> Hi Numan,
>>
>> The check looks good, but sorry that I didn't understand the problem.
E.g. there are 2 host A and B, and lsp1 is set as iface-id on both hosts.
When requested-chassis for lsp1 is A, then in SB DB the port-binding should
be on A. Then in physical.c when processing the port binding for lsp1, it
should always point to host A. So how could the VIF on B be reachable?
Could you help explain a little bit?
>>
>
> Sorry. I should have updated the commit message with more details. So I
sent out a v2 with the updated commit message -
https://patchwork.ozlabs.org/patch/888106/
> In this case what is happening is that ovn-controller on A is updating
the lsp1's Port_Binding.chassis to A which is as expected and it also
considers lsp1 is residing on it. But ovn-controller running on B is also
adding the OF flows in table 0 and table 65 for lsp1's vif interface
instead of considering lsp1 as a remote port. This is because the simap
"localvif_to_ofport" will have an entry for lsp1 and the code here never
gets hit -
https://github.com/openvswitch/ovs/blob/master/ovn/controller/physical.c#L480
and instead this gets hit -
https://github.com/openvswitch/ovs/blob/master/ovn/controller/physical.c#L507
.
>
> You could run the updated test in this patch without the fix and can see
the failures.
>
>
I see. So the wrong VIF is reachable ONLY from the local HV when this
happens, but from other HVs the correct VIF will be reached. I thought it
as some security problem if requested-chassis didn't work, but it turns out
to be more of a correctness problem rather than security :) Thanks for the
explain!

Acked-by: Han Zhou 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] netdev-dpdk: Remove 'error' from non error log.

2018-03-20 Thread Kevin Traynor
Presently, if OVS tries to setup more queues than
are allowed by a specific NIC, OVS will handle
this case by retrying with a lower amount of queues.

Rather than reporting initial failed queue setups
in the logs as ERROR, they are reported as INFO but
contain the word 'error'. Unless a user has detailed
knowledge of OVS-DPDK workings, this is confusing.

Let's remove 'error' and the DPDK error code from
the INFO log.

Signed-off-by: Kevin Traynor 
---
 lib/netdev-dpdk.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index af9843a..2032712 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -729,6 +729,6 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int 
n_rxq, int n_txq)
   dev->socket_id, NULL);
 if (diag) {
-VLOG_INFO("Interface %s txq(%d) setup error: %s",
-  dev->up.name, i, rte_strerror(-diag));
+VLOG_INFO("Interface %s unable to setup txq(%d)",
+  dev->up.name, i);
 break;
 }
@@ -745,6 +745,6 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int 
n_rxq, int n_txq)
   dev->socket_id, NULL, dev->mp);
 if (diag) {
-VLOG_INFO("Interface %s rxq(%d) setup error: %s",
-  dev->up.name, i, rte_strerror(-diag));
+VLOG_INFO("Interface %s unable to setup rxq(%d)",
+  dev->up.name, i);
 break;
 }
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] can not well distributed when use dp_hash for ovs group

2018-03-20 Thread Jan Scheurich
Hi ychen,

We have been working on this problem and have a fully working patch that we 
will submit to the ML shortly.
We would be glad if you could give it a test and review.

Thanks, Jan

> -Original Message-
> From: ovs-dev-boun...@openvswitch.org 
> [mailto:ovs-dev-boun...@openvswitch.org] On Behalf Of ychen
> Sent: Tuesday, 20 March, 2018 14:20
> To: d...@openvswitch.org; ja...@ovn.org
> Subject: [ovs-dev] can not well distributed when use dp_hash for ovs group
> 
> hi,
>   I tested dp_hash for ovs group, and found that dp_hash can not well 
> distributed, some buckets even can not be selected.
>   In my testing environment, I have 11 buckets:
> group_id=131841,type=select,selection_method=dp_hash,
> bucket=bucket_id:51162,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.29:80))),
> bucket=bucket_id:42099,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.25:80))),
> bucket=bucket_id:53526,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.27:80))),
> bucket=bucket_id:12221,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.40:80))),
> bucket=bucket_id:2787,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.26:80))),
> bucket=bucket_id:18951,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.24:80))),
> bucket=bucket_id:32559,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.62:80))),
> bucket=bucket_id:35550,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.43:80))),
> bucket=bucket_id:9026,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.57:80))),
> bucket=bucket_id:26811,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]-
> >NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.34:80)))
> 
> 
>  But about 3~5 buckets can not be selected always.
> 
> 
>   In the function xlate_dp_hash_select_group(), I found the code:
>   uint32_t mask = (1 << log_2_ceil(n_buckets)) - 1;
>   uint32_t basis = 0xc2b73583 * (ctx->xin->flow.dp_hash & mask);
> uint32_t score =  (hash_int(bucket->bucket_id, basis) & 0x) * 
> bucket->weight;
> 
> 
>  for the above formula, if n_buckets is 11, then there are totally 16 
> probabilities for basis.
> so how can we make sure the best score can well distributed in the 11 buckets?
> 
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] question about dp_packet lifetime

2018-03-20 Thread Alessandro Rosetti
Hi Darrell,

I'm developing netmap support for my thesis and I hope it will make it for
OVS 2.10.
In the next days I'm going to post the first prototype patch that is almost
ready

Thanks to you,
Alessandro

On 19 Mar 2018 9:26 pm, "Darrell Ball"  wrote:

> Hi Alessandro
>
> I also think this would be interesting.
> Is netmap integration being actively being worked on for OVS 2.10 ?
>
> Thanks Darrell
>
> On Wed, Feb 7, 2018 at 9:19 AM, Ilya Maximets 
> wrote:
>
>> > Hi,
>>
>> Hi, Alessandro.
>>
>> >
>> >   My name is Alessandro Rosetti, and I'm currently adding netmap
>> support to
>> > ovs, following an approach similar to DPDK.
>>
>> Good to know that someone started to work on this. IMHO, it's a good idea.
>> I also wanted to try to implement this someday, but had no much time.
>>
>> >
>> > I've created a new netdev: netdev_netmap that uses the pmd
>> infrastructure.
>> > The prototype I have seems to work fine (I still need to tune
>> performance,
>> > test optional features, and test more complex topologies.)
>>
>> Cool. Looking forward for your RFC patch-set.
>>
>> >
>> > I have a question about the lifetime of dp_packets.
>> > Is there any guarantee that the dp_packets allocated in a receive
>> callback
>> > (e.g. netdev_netmap_rxq_recv) are consumed by OVS (e.g. dropped,
>> cloned, or
>> > sent to other ports) **before** a subsequent call to the receive
>> callback
>> > (on the same port)?
>> > Or is it possible for dp_packets to be stored somewhere (e.g. in an OVS
>> > internal queue) and live across subsequent invocations of the receive
>> > callback that allocated them?
>>
>> I think that there was never such a guarantee, but recent changes in
>> userspace
>> datapath completely ruined this assumption. I mean output packet batching
>> support.
>>
>> Please refer the following commits for details:
>> 009e003 2017-12-14 | dpif-netdev: Output packet batching.
>> c71ea3c 2018-01-15 | dpif-netdev: Time based output batching.
>> 00adb8d 2018-01-15 | docs: Describe output packet batching in DPDK guide.
>>
>> >
>> > I need to know if this is the case to check that my current prototype is
>> > safe.
>> > I use per-port pre-allocation of dp_packets, for maximum performance.
>> I've
>> > seen that DPDK uses its internal allocator to allocate and deallocate
>> > dp_packets, but netmap does not expose one.
>> > Each packet received with netmap is created as a new type dp_packet:
>> > DPBUF_NETMAP. The data points to a netmap buffer (preallocated by the
>> > kernel).
>> > When I receive data (netdev_netmap_rxq_recv) I reuse the dp_packets,
>> > updating the internal pointer and a couple of additional informations
>> > stored inside the dp_packet.
>> > When I have to send data I use zero copy if dp_packet is DPBUF_NETMAP
>> and
>> > copy if it's not.
>> >
>> > Thanks for the help!
>> > Alessandro.
>>
>>
>> ___
>> dev mailing list
>> d...@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
>
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] can not well distributed when use dp_hash for ovs group

2018-03-20 Thread ychen
hi, 
  I tested dp_hash for ovs group, and found that dp_hash can not well 
distributed, some buckets even can not be selected.
  In my testing environment, I have 11 buckets:
group_id=131841,type=select,selection_method=dp_hash,
bucket=bucket_id:51162,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]->NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.29:80))),
bucket=bucket_id:42099,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]->NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.25:80))),
bucket=bucket_id:53526,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]->NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.27:80))),
bucket=bucket_id:12221,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]->NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.40:80))),
bucket=bucket_id:2787,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]->NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.26:80))),
bucket=bucket_id:18951,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]->NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.24:80))),
bucket=bucket_id:32559,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]->NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.62:80))),
bucket=bucket_id:35550,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]->NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.43:80))),
bucket=bucket_id:9026,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]->NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.57:80))),
bucket=bucket_id:26811,weight:100,actions=ct(commit,table=70,zone=2,exec(move:NXM_OF_ETH_SRC[]->NXM_NX_CT_LABEL[0..47],nat(dst=10.204.8.34:80)))


 But about 3~5 buckets can not be selected always.


  In the function xlate_dp_hash_select_group(), I found the code:
  uint32_t mask = (1 << log_2_ceil(n_buckets)) - 1;
  uint32_t basis = 0xc2b73583 * (ctx->xin->flow.dp_hash & mask);
uint32_t score =  (hash_int(bucket->bucket_id, basis) & 0x) * 
bucket->weight;


 for the above formula, if n_buckets is 11, then there are totally 16 
probabilities for basis.
so how can we make sure the best score can well distributed in the 11 buckets?
 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] try_ukey_replace() try lock strategy

2018-03-20 Thread Eelco Chaudron

Hi Joe,

I'm investigating an issue where I've seen "handler_duplicate_upcall" 
incrementing once, and where "upcall_ukey_replace" is happening quite 
often.


This all relates to the try_ukey_replace() function:

+static bool
+try_ukey_replace(struct umap *umap, struct udpif_key *old_ukey,
+ struct udpif_key *new_ukey)
+    OVS_REQUIRES(umap->mutex)
+    OVS_TRY_LOCK(true, new_ukey->mutex)
+{
+    bool replaced = false;
+
+    if (!ovs_mutex_trylock(_ukey->mutex)) {
+    if (old_ukey->state == UKEY_EVICTED) {
+    /* The flow was deleted during the current revalidator dump,
+ * but its ukey won't be fully cleaned up until the sweep 
phase.

+ * In the mean time, we are receiving upcalls for this traffic.
+ * Expedite the (new) flow install by replacing the ukey. */
+    ovs_mutex_lock(_ukey->mutex);
+    cmap_replace(>cmap, _ukey->cmap_node,
+ _ukey->cmap_node, new_ukey->hash);
+    ovsrcu_postpone(ukey_delete__, old_ukey);
+    transition_ukey(old_ukey, UKEY_DELETED);
+    transition_ukey(new_ukey, UKEY_VISIBLE);
+    replaced = true;
+    }
+    ovs_mutex_unlock(_ukey->mutex);
+    }
+
+    if (replaced) {
+    COVERAGE_INC(upcall_ukey_replace);
+    } else {
+    COVERAGE_INC(handler_duplicate_upcall);
+    }
+    return replaced;
+}
+



Currently, there is no debugging in place to get an idea if the lock 
failed or the wrong state it was in. I'll instrument the code if we can 
get it to fail again/reliably.


In the meantime, I was wondering why we do an ovs_mutex_trylock(), and 
not just a lock()?


Unfortunately, I've only seen it once, and replication efforts have not 
been successful.


Thanks,

Eelco
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3] ovn-controller: Handle Port_Binding's "requested-chassis" option in physical.c

2018-03-20 Thread Jakub Sitnicki
On Tue, 20 Mar 2018 16:59:42 +0530
nusid...@redhat.com wrote:

> From: Numan Siddique 
> 
> When a Logical_Switch_Port P's options is set with 'requested-chassis=hv1'
> and if the user has bound this logical port to two OVS interfaces each in
> different host (eg. hv1 and hv2), then ovn-controller in hv1 sets the
> P's Port_Binding.chassis to hv1 which is as expected. But on hv2, 
> ovn-controller
> is adding OF flows in table 0 and table 65 for the OVS interface instead of
> considering 'P' as a remote port. When another logical port bound on hv2,
> pings to the logical port 'P', the packet gets delivered to hv2 OVS interface
> instead of hv1 OVS interface, which is wrong.
> 
> This scenario is most likely to happen when requested-chassis option is used
> by CMS during migration of a VM from one chassis to another.
> 
> This patch fixes this issue by checking the Port_Binding's "requested-chassis"
> option in physical.c before adding the flows in table 0 an 65.
> 
> Reported-by: Marcin Mirecki 
> Reported-at: 
> https://mail.openvswitch.org/pipermail/ovs-dev/2018-March/345266.html
> Signed-off-by: Numan Siddique 
> Tested-by: Marcin Mirecki 
> ---
> 
> v2 -> v3: Addressed the review comment from Jakub Sitnicki
> v1 -> v2: Updated the commit message
> 
>  ovn/controller/physical.c | 11 +++
>  tests/ovn.at  | 32 +++-
>  2 files changed, 38 insertions(+), 5 deletions(-)

Reviewed-by: Jakub Sitnicki 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3] ovn-controller: Handle Port_Binding's "requested-chassis" option in physical.c

2018-03-20 Thread nusiddiq
From: Numan Siddique 

When a Logical_Switch_Port P's options is set with 'requested-chassis=hv1'
and if the user has bound this logical port to two OVS interfaces each in
different host (eg. hv1 and hv2), then ovn-controller in hv1 sets the
P's Port_Binding.chassis to hv1 which is as expected. But on hv2, ovn-controller
is adding OF flows in table 0 and table 65 for the OVS interface instead of
considering 'P' as a remote port. When another logical port bound on hv2,
pings to the logical port 'P', the packet gets delivered to hv2 OVS interface
instead of hv1 OVS interface, which is wrong.

This scenario is most likely to happen when requested-chassis option is used
by CMS during migration of a VM from one chassis to another.

This patch fixes this issue by checking the Port_Binding's "requested-chassis"
option in physical.c before adding the flows in table 0 an 65.

Reported-by: Marcin Mirecki 
Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-dev/2018-March/345266.html
Signed-off-by: Numan Siddique 
Tested-by: Marcin Mirecki 
---

v2 -> v3: Addressed the review comment from Jakub Sitnicki
v1 -> v2: Updated the commit message

 ovn/controller/physical.c | 11 +++
 tests/ovn.at  | 32 +++-
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index 5a80e2cda..8c92c1d9b 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -466,6 +466,17 @@ consider_port_binding(struct controller_ctx *ctx,
 } else {
 ofport = u16_to_ofp(simap_get(_to_ofport,
   binding->logical_port));
+const char *requested_chassis = smap_get(>options,
+ "requested-chassis");
+if (ofport && requested_chassis && requested_chassis[0] &&
+strcmp(requested_chassis, chassis->name) &&
+strcmp(requested_chassis, chassis->hostname)) {
+/* Even though there is an ofport for this port_binding, it is
+ * requested on a different chassis. So ignore this ofport.
+ */
+ofport = 0;
+}
+
 if ((!strcmp(binding->type, "localnet")
 || !strcmp(binding->type, "l2gateway"))
 && ofport && binding->tag) {
diff --git a/tests/ovn.at b/tests/ovn.at
index 5f985f345..75b32ef1a 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -9112,13 +9112,15 @@ sim_add hv1
 as hv1
 ovs-vsctl add-br br-phys
 ovn_attach n1 br-phys 192.168.0.11
-ovs-vsctl -- add-port br-int hv1-vif0
+ovs-vsctl -- add-port br-int hv1-vif0 -- \
+set Interface hv1-vif0 ofport-request=1
 
 sim_add hv2
 as hv2
 ovs-vsctl add-br br-phys
 ovn_attach n1 br-phys 192.168.0.12
-ovs-vsctl -- add-port br-int hv2-vif0
+ovs-vsctl -- add-port br-int hv2-vif0 -- \
+set Interface hv2-vif0 ofport-request=1
 
 # Allow only chassis hv1 to bind logical port lsp0.
 ovn-nbctl lsp-set-options lsp0 requested-chassis=hv1
@@ -9138,7 +9140,11 @@ ovs-vsctl set interface hv2-vif0 
external-ids:iface-id=lsp0
 OVS_WAIT_UNTIL([test 1 = $(grep -c "Not claiming lport lsp0" 
hv2/ovn-controller.log)])
 AT_CHECK([test x$(ovn-sbctl --bare --columns chassis find port_binding 
logical_port=lsp0) = x], [0], [])
 
-# (2) Chassis hv1 should bind lsp0 when physical to logical mapping exists on 
hv1.
+# (2) Chassis hv2 should not add flows in OFTABLE_PHY_TO_LOG and 
OFTABLE_LOG_TO_PHY tables.
+AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=0 | grep in_port=1], [1], 
[])
+AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=65 | grep output], [1], [])
+
+# (3) Chassis hv1 should bind lsp0 when physical to logical mapping exists on 
hv1.
 echo "verifying that hv1 binds lsp0 when hv1 physical/logical mapping is added"
 as hv1
 ovs-vsctl set interface hv1-vif0 external-ids:iface-id=lsp0
@@ -9146,7 +9152,12 @@ ovs-vsctl set interface hv1-vif0 
external-ids:iface-id=lsp0
 OVS_WAIT_UNTIL([test 1 = $(grep -c "Claiming lport lsp0" 
hv1/ovn-controller.log)])
 AT_CHECK([test $(ovn-sbctl --bare --columns chassis find port_binding 
logical_port=lsp0) = "$hv1_uuid"], [0], [])
 
-# (3) Chassis hv1 should release lsp0 binding and chassis hv2 should bind lsp0 
when
+# (4) Chassis hv1 should add flows in OFTABLE_PHY_TO_LOG and 
OFTABLE_LOG_TO_PHY tables.
+as hv1 ovs-ofctl dump-flows br-int
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep in_port=1], [0], 
[ignore])
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep 
actions=output:1], [0], [ignore])
+
+# (5) Chassis hv1 should release lsp0 binding and chassis hv2 should bind lsp0 
when
 # the requested chassis for lsp0 is changed from hv1 to hv2.
 echo "verifying that lsp0 binding moves when requested-chassis is changed"
 
@@ -9154,6 +9165,13 @@ ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
 OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 

Re: [ovs-dev] [PATCH v7 1/6] dpif-netdev: associate flow with a mark id

2018-03-20 Thread Shahaf Shuler
Hi Ian,

Thursday, March 15, 2018 1:55 PM, Stokes, Ian:
> Adding Shahaf Shuler
> > > +static void
> > > +try_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
> odp_port_t
> > in_port,
> > > +struct dp_netdev_flow *flow, struct match *match,
> > > +const struct nlattr *actions, size_t actions_len) {
> > > +struct offload_info info;
> > > +struct dp_netdev_port *port;
> > > +bool modification = flow->mark != INVALID_FLOW_MARK;
> > > +const char *op = modification ? "modify" : "add";
> > > +uint32_t mark;
> > > +int ret;
> > > +
> > > +ovs_mutex_lock(_mark.mutex);
> > > +
> > > +if (modification) {
> > > +mark = flow->mark;
> > > +} else {
> > > +if (!netdev_is_flow_api_enabled()) {
> > > +goto out;
> > > +}
> > > +
> > > +/*
> > > + * If a mega flow has already been offloaded (from other PMD
> > > + * instances), do not offload it again.
> > > + */
> > > +mark = megaflow_to_mark_find(>mega_ufid);
> > > +if (mark != INVALID_FLOW_MARK) {
> > > +VLOG_DBG("flow has already been offloaded with mark
> > > + %u\n",
> > > mark);
> > > +mark_to_flow_associate(mark, flow);
> > > +goto out;
> > > +}
> > > +
> > > +mark = flow_mark_alloc();
> > > +if (mark == INVALID_FLOW_MARK) {
> > > +VLOG_ERR("failed to allocate flow mark!\n");
> > > +goto out;
> > > +}
> > > +}
> > > +info.flow_mark = mark;
> > > +
> > > +ovs_mutex_lock(>dp->port_mutex);
> > > +port = dp_netdev_lookup_port(pmd->dp, in_port);
> > > +if (!port) {
> > > +ovs_mutex_unlock(>dp->port_mutex);
> > > +goto out;
> > > +}
> > > +ret = netdev_flow_put(port->netdev, match,
> > > +  CONST_CAST(struct nlattr *, actions),
> > > +  actions_len, >mega_ufid, , NULL);
> > > +ovs_mutex_unlock(>dp->port_mutex);
> > > +
> > > +if (ret) {
> > > +VLOG_ERR("failed to %s netdev flow with mark %u\n", op, mark);
> > > +if (!modification) {
> > > +flow_mark_free(mark);
> > > +} else {
> > > +mark_to_flow_disassociate(pmd, flow);
> >
> >In the case here would it not be a call to flush? If
> > the flow could not be modified wouldn't you have to remove all
> > references for it not just on this PMD?

I discussed it with Finn,  

Let's first agree on the case were flow modification succeeds. Please correct 
me if I am wrong about the flow here. For flow modification each PMD will 
attempt to modify the flow on its private flow table. 
This will result in multiple flow destroy and creation on the device 
(netdev_flow_put). After the last PMD finishes each PMD has the modified flow 
on its private table and the device has one flow with a single mark which was 
created by the last PMD.

If such behavior is OK, then the error case should also be OK. In case of flow 
modification error, each PMD will disassociate the flow from its private table. 
The last PMD which has the last reference for the flow mark will remove it also 
from the device. Hence there is no need to flush the flow from each PMD upon 
the first failure. 





___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ovn-controller: Handle Port_Binding's "requested-chassis" option in physical.c

2018-03-20 Thread Numan Siddique
On Tue, Mar 20, 2018 at 3:27 PM, Jakub Sitnicki  wrote:

> Hi Numan,
>
> On Tue, 20 Mar 2018 13:06:33 +0530
> nusid...@redhat.com wrote:
>
> > From: Numan Siddique 
> >
> > When a Logical_Switch_Port P's options is set with
> 'requested-chassis=hv1'
> > and if the user has bound this logical port to two OVS interfaces each in
> > different host (eg. hv1 and hv2), then ovn-controller in hv1 sets the
> > P's Port_Binding.chassis to hv1 which is as expected. But on hv2,
> ovn-controller
> > is adding OF flows in table 0 and table 65 for the OVS interface instead
> of
> > considering 'P' as a remote port. When another logical port bound on hv2,
> > pings to the logical port 'P', the packet gets delivered to hv2 OVS
> interface
> > instead of hv1 OVS interface, which is wrong.
> >
> > This scenario is most likely to happen when requested-chassis option is
> used
> > by CMS during migration of a VM from one chassis to another.
> >
> > This patch fixes this issue by checking the Port_Binding's
> "requested-chassis"
> > option in physical.c before adding the flows in table 0 an 65.
> >
> > Reported-by: Marcin Mirecki 
> > Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-March/
> 345266.html
> >
> > Signed-off-by: Numan Siddique 
> > ---
> >
> > v1 -> v2 : Updated the commit message with more details
> >
> >  ovn/controller/physical.c | 11 +++
> >  tests/ovn.at  | 32 +++-
> >  2 files changed, 38 insertions(+), 5 deletions(-)
> >
> > diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
> > index 5a80e2cda..84113ebd2 100644
> > --- a/ovn/controller/physical.c
> > +++ b/ovn/controller/physical.c
> > @@ -466,6 +466,17 @@ consider_port_binding(struct controller_ctx *ctx,
> >  } else {
> >  ofport = u16_to_ofp(simap_get(_to_ofport,
> >binding->logical_port));
> > +const char *requested_chassis = smap_get(>options,
> > + "requested-chassis");
> > +if (ofport && requested_chassis &&
> > +strcmp(requested_chassis, chassis->name) &&
> > +strcmp(requested_chassis, chassis->hostname)) {
>
> Is it possible to get an unintended match here on an empty string?
>
> We never should, right? But it seems ->hostname could be empty if
> things go wrong with gethostname() in chassis_run().
>
> It caught my attention because in the only other place where we match on
> "requested-chassis", in consider_local_datapath(), there is a check to
> rule out the empty string:
>
> const char *vif_chassis = smap_get(_rec->options,
>"requested-chassis");
> bool can_bind = !vif_chassis || !vif_chassis[0]
> || !strcmp(vif_chassis, chassis_rec->name)
> || !strcmp(vif_chassis, chassis_rec->hostname);
>
> WDYT?
>

Thanks for pointing it out. I agree. I will add the additional check.



> -Jakub
>
> > +/* Even though there is an ofport for this port_binding, it
> is
> > + * requested on a different chassis. So ignore this ofport.
> > + */
> > +ofport = 0;
> > +}
> > +
> >  if ((!strcmp(binding->type, "localnet")
> >  || !strcmp(binding->type, "l2gateway"))
> >  && ofport && binding->tag) {
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ovn-controller: Handle Port_Binding's "requested-chassis" option in physical.c

2018-03-20 Thread Jakub Sitnicki
Hi Numan,

On Tue, 20 Mar 2018 13:06:33 +0530
nusid...@redhat.com wrote:

> From: Numan Siddique 
> 
> When a Logical_Switch_Port P's options is set with 'requested-chassis=hv1'
> and if the user has bound this logical port to two OVS interfaces each in
> different host (eg. hv1 and hv2), then ovn-controller in hv1 sets the
> P's Port_Binding.chassis to hv1 which is as expected. But on hv2, 
> ovn-controller
> is adding OF flows in table 0 and table 65 for the OVS interface instead of
> considering 'P' as a remote port. When another logical port bound on hv2,
> pings to the logical port 'P', the packet gets delivered to hv2 OVS interface
> instead of hv1 OVS interface, which is wrong.
> 
> This scenario is most likely to happen when requested-chassis option is used
> by CMS during migration of a VM from one chassis to another.
> 
> This patch fixes this issue by checking the Port_Binding's "requested-chassis"
> option in physical.c before adding the flows in table 0 an 65.
> 
> Reported-by: Marcin Mirecki 
> Reported-at: 
> https://mail.openvswitch.org/pipermail/ovs-dev/2018-March/345266.html
> 
> Signed-off-by: Numan Siddique 
> ---
> 
> v1 -> v2 : Updated the commit message with more details
> 
>  ovn/controller/physical.c | 11 +++
>  tests/ovn.at  | 32 +++-
>  2 files changed, 38 insertions(+), 5 deletions(-)
> 
> diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
> index 5a80e2cda..84113ebd2 100644
> --- a/ovn/controller/physical.c
> +++ b/ovn/controller/physical.c
> @@ -466,6 +466,17 @@ consider_port_binding(struct controller_ctx *ctx,
>  } else {
>  ofport = u16_to_ofp(simap_get(_to_ofport,
>binding->logical_port));
> +const char *requested_chassis = smap_get(>options,
> + "requested-chassis");
> +if (ofport && requested_chassis &&
> +strcmp(requested_chassis, chassis->name) &&
> +strcmp(requested_chassis, chassis->hostname)) {

Is it possible to get an unintended match here on an empty string?

We never should, right? But it seems ->hostname could be empty if
things go wrong with gethostname() in chassis_run().

It caught my attention because in the only other place where we match on
"requested-chassis", in consider_local_datapath(), there is a check to
rule out the empty string:

const char *vif_chassis = smap_get(_rec->options,
   "requested-chassis");
bool can_bind = !vif_chassis || !vif_chassis[0]
|| !strcmp(vif_chassis, chassis_rec->name)
|| !strcmp(vif_chassis, chassis_rec->hostname);

WDYT?

-Jakub

> +/* Even though there is an ofport for this port_binding, it is
> + * requested on a different chassis. So ignore this ofport.
> + */
> +ofport = 0;
> +}
> +
>  if ((!strcmp(binding->type, "localnet")
>  || !strcmp(binding->type, "l2gateway"))
>  && ofport && binding->tag) {
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ovn-controller: Handle Port_Binding's "requested-chassis" option in physical.c

2018-03-20 Thread Marcin Mirecki
I tested the patch and it worked as expected

Thanks!

On Tue, Mar 20, 2018 at 10:13 AM, Numan Siddique 
wrote:

-- Forwarded message --
From: 
Date: Tue, Mar 20, 2018 at 1:06 PM
Subject: [PATCH v2] ovn-controller: Handle Port_Binding's
"requested-chassis" option in physical.c
To: d...@openvswitch.org
Cc: Numan Siddique 


From: Numan Siddique 

When a Logical_Switch_Port P's options is set with 'requested-chassis=hv1'
and if the user has bound this logical port to two OVS interfaces each in
different host (eg. hv1 and hv2), then ovn-controller in hv1 sets the
P's Port_Binding.chassis to hv1 which is as expected. But on hv2,
ovn-controller
is adding OF flows in table 0 and table 65 for the OVS interface instead of
considering 'P' as a remote port. When another logical port bound on hv2,
pings to the logical port 'P', the packet gets delivered to hv2 OVS
interface
instead of hv1 OVS interface, which is wrong.

This scenario is most likely to happen when requested-chassis option is used
by CMS during migration of a VM from one chassis to another.

This patch fixes this issue by checking the Port_Binding's
"requested-chassis"
option in physical.c before adding the flows in table 0 an 65.

Reported-by: Marcin Mirecki 
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-March/34
5266.html

Signed-off-by: Numan Siddique 

Tested-by: Marcin Mirecki 
---

v1 -> v2 : Updated the commit message with more details

 ovn/controller/physical.c | 11 +++
 tests/ovn.at  | 32 +++-
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index 5a80e2cda..84113ebd2 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -466,6 +466,17 @@ consider_port_binding(struct controller_ctx *ctx,
 } else {
 ofport = u16_to_ofp(simap_get(_to_ofport,
   binding->logical_port));
+const char *requested_chassis = smap_get(>options,
+ "requested-chassis");
+if (ofport && requested_chassis &&
+strcmp(requested_chassis, chassis->name) &&
+strcmp(requested_chassis, chassis->hostname)) {
+/* Even though there is an ofport for this port_binding, it is
+ * requested on a different chassis. So ignore this ofport.
+ */
+ofport = 0;
+}
+
 if ((!strcmp(binding->type, "localnet")
 || !strcmp(binding->type, "l2gateway"))
 && ofport && binding->tag) {
diff --git a/tests/ovn.at b/tests/ovn.at
index 5f985f345..75b32ef1a 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -9112,13 +9112,15 @@ sim_add hv1
 as hv1
 ovs-vsctl add-br br-phys
 ovn_attach n1 br-phys 192.168.0.11
-ovs-vsctl -- add-port br-int hv1-vif0
+ovs-vsctl -- add-port br-int hv1-vif0 -- \
+set Interface hv1-vif0 ofport-request=1

 sim_add hv2
 as hv2
 ovs-vsctl add-br br-phys
 ovn_attach n1 br-phys 192.168.0.12
-ovs-vsctl -- add-port br-int hv2-vif0
+ovs-vsctl -- add-port br-int hv2-vif0 -- \
+set Interface hv2-vif0 ofport-request=1

 # Allow only chassis hv1 to bind logical port lsp0.
 ovn-nbctl lsp-set-options lsp0 requested-chassis=hv1
@@ -9138,7 +9140,11 @@ ovs-vsctl set interface hv2-vif0
external-ids:iface-id=lsp0
 OVS_WAIT_UNTIL([test 1 = $(grep -c "Not claiming lport lsp0"
hv2/ovn-controller.log)])
 AT_CHECK([test x$(ovn-sbctl --bare --columns chassis find port_binding
logical_port=lsp0) = x], [0], [])

-# (2) Chassis hv1 should bind lsp0 when physical to logical mapping exists
on hv1.
+# (2) Chassis hv2 should not add flows in OFTABLE_PHY_TO_LOG and
OFTABLE_LOG_TO_PHY tables.
+AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=0 | grep in_port=1],
[1], [])
+AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=65 | grep output], [1],
[])
+
+# (3) Chassis hv1 should bind lsp0 when physical to logical mapping exists
on hv1.
 echo "verifying that hv1 binds lsp0 when hv1 physical/logical mapping is
added"
 as hv1
 ovs-vsctl set interface hv1-vif0 external-ids:iface-id=lsp0
@@ -9146,7 +9152,12 @@ ovs-vsctl set interface hv1-vif0
external-ids:iface-id=lsp0
 OVS_WAIT_UNTIL([test 1 = $(grep -c "Claiming lport lsp0"
hv1/ovn-controller.log)])
 AT_CHECK([test $(ovn-sbctl --bare --columns chassis find port_binding
logical_port=lsp0) = "$hv1_uuid"], [0], [])

-# (3) Chassis hv1 should release lsp0 binding and chassis hv2 should bind
lsp0 when
+# (4) Chassis hv1 should add flows in OFTABLE_PHY_TO_LOG and
OFTABLE_LOG_TO_PHY tables.
+as hv1 ovs-ofctl dump-flows br-int
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep in_port=1],
[0], [ignore])
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep
actions=output:1], [0], [ignore])
+
+# (5) Chassis hv1 should release 

Re: [ovs-dev] [PATCH] netdev-dpdk: Refactor custom stats.

2018-03-20 Thread Ilya Maximets
On 19.03.2018 13:22, Weglicki, MichalX wrote:
> Hello, 

Hello.

> 
> I've went through the patch quite carefully. 

Thanks for reviewing this.

> Main change refactors creation cached IDs and Names from IF-like code block 
> to "Goto" code block. 

Current code is over-nested. It has nesting level of 6 (7 including function 
definition).
It's like:
netdev_dpdk_configure_xstats
{
if () {
if () {
...
} else {
...
if () {
...
if () {
} else if {
}
...
if () {
for () {
if () {   <-- level #7 !
}
}
} else {
}
}
}
} else {
}
if () {
}
}


IMHO, This is not readable. Especially, combining with constant line wraps 
because
of limited line lengths. I wanted to make plain execution sequence to simplify
understanding of the code. Most of the 'if' statements are just error checking.
And the single exit point from such conditions usually implemented by 'goto's.
It's a common practice for system programming. It doesn't worth to keep so deep
nesting level for error checking conditions. This also matches the style of all
the other code in netdev-dpdk and not only here.

> 
> There are also some initializations removal, which I don't personally agree 
> with - even if those seems to be not needed, code may always evolve in the 
> future.

Could you, please, specify?

> There is one xcalloc pointless check, true. 
> 
> The last important thing is that counters configuration can change during 
> runtime, and I was facing a problem, where amount of counters was different 
> during some configuration stages. That is why my initial implementation was 
> looking for counter name based on given ID, not returned index in array => 
> That was a reason to keep whole list of names, but only limited IDs(filtered 
> out). Also I do think that returned array should be checked against IDs, as 
> implementation can always change in the future as well.

Hmm. OK. But I think, in this case we could even more simplify the code.
As I understand, the 'netdev_dpdk_reconfigure' is the only function that
could make influence on the stats counters in HW. Correct me, if I'm wrong.
In this case we could call 'netdev_dpdk_configure_xstats()' only once at the
end of reconfiguration. After that all the inconsistency with returned
from DPDK values should be treated as HW or DPDK error and we should not
handle this case in OVS and just do not return any stats.
This will simplify 'netdev_dpdk_get_custom_stats()' too.

We also could refactor 'netdev_dpdk_get_stats' in a same way and reuse
already configured xstats.

What do you think?

Best regards, Ilya Maximets.

> 
> Br, 
> Michal. 
> 
>> -Original Message-
>> From: Ilya Maximets [mailto:i.maxim...@samsung.com]
>> Sent: Tuesday, January 23, 2018 2:14 PM
>> To: ovs-dev@openvswitch.org
>> Cc: Heetae Ahn ; Ben Pfaff ; Stokes, 
>> Ian ; Weglicki, MichalX
>> ; Ilya Maximets 
>> Subject: [PATCH] netdev-dpdk: Refactor custom stats.
>>
>> This code is overcomplicated and completely unreadable. And a
>> bunch of recently fixed memory leaks confirms that statement.
>>
>> Main concerns that were fixed:
>> * Too big nesting level.
>> * Useless checks like pointer checking after xmalloc.
>> * Misleading comments.
>> * Bad names of the variables.
>>
>> As a bonus, size of the code significantly reduced.
>>
>> Signed-off-by: Ilya Maximets 
>> ---
>>
>> This patch made on top of memory leaks' fixes:
>> * https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343537.html
>> * https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343538.html
>>
>>  lib/netdev-dpdk.c | 215 
>> --
>>  1 file changed, 80 insertions(+), 135 deletions(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index 50a94d1..e834c7e 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -437,10 +437,9 @@ struct netdev_dpdk {
>>
>>  PADDED_MEMBERS(CACHE_LINE_SIZE,
>>  /* Names of all XSTATS counters */
>> -struct rte_eth_xstat_name *rte_xstats_names;
>> -int rte_xstats_names_size;
>> -int rte_xstats_ids_size;
>> -uint64_t *rte_xstats_ids;
>> +struct rte_eth_xstat_name *xstats_names;
>> +uint64_t *xstats_ids;
>> +int xstats_count;
>>  );
>>  };
>>
>> @@ -901,6 +900,8 @@ common_construct(struct netdev *netdev, dpdk_port_t 
>> port_no,
>>  dev->vhost_reconfigured = false;
>>  dev->attached = false;
>>
>> +dev->xstats_count = 0;
>> +
>>  ovsrcu_init(>qos_conf, NULL);
>>
>>  ovsrcu_init(>ingress_policer, NULL);
>> @@ -925,13 +926,6 @@ 

Re: [ovs-dev] [PATCH] ovn-controller: Handle Port_Binding's "requested-chassis" option in physical.c

2018-03-20 Thread Numan Siddique
On Tue, Mar 20, 2018 at 4:33 AM, Han Zhou  wrote:

>
>
> On Mon, Mar 19, 2018 at 12:52 PM,  wrote:
> >
> > From: Numan Siddique 
> >
> > ovn-controller is not considering Port_Binding option "requested-chassis"
> > when adding flows in table 0 and table 65. This patch adds this check.
>
> Hi Numan,
>
> The check looks good, but sorry that I didn't understand the problem. E.g.
> there are 2 host A and B, and lsp1 is set as iface-id on both hosts. When
> requested-chassis for lsp1 is A, then in SB DB the port-binding should be
> on A. Then in physical.c when processing the port binding for lsp1, it
> should always point to host A. So how could the VIF on B be reachable?
> Could you help explain a little bit?
>
>
Sorry. I should have updated the commit message with more details. So I
sent out a v2 with the updated commit message -
https://patchwork.ozlabs.org/patch/888106/
In this case what is happening is that ovn-controller on A is updating the
lsp1's Port_Binding.chassis to A which is as expected and it also considers
lsp1 is residing on it. But ovn-controller running on B is also adding the
OF flows in table 0 and table 65 for lsp1's vif interface instead of
considering lsp1 as a remote port. This is because the simap
"localvif_to_ofport" will have an entry for lsp1 and the code here never
gets hit -
https://github.com/openvswitch/ovs/blob/master/ovn/controller/physical.c#L480
and instead this gets hit -
https://github.com/openvswitch/ovs/blob/master/ovn/controller/physical.c#L507
.

You could run the updated test in this patch without the fix and can see
the failures.

Thanks
Numan





> Thanks,
> Han
> >
> > Reported-by: Marcin Mirecki 
> > Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-March/
> 345266.html
> >
> > Signed-off-by: Numan Siddique 
> > ---
> >  ovn/controller/physical.c | 11 +++
> >  tests/ovn.at  | 32 +++-
> >  2 files changed, 38 insertions(+), 5 deletions(-)
> >
> > diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
> > index 5a80e2cda..84113ebd2 100644
> > --- a/ovn/controller/physical.c
> > +++ b/ovn/controller/physical.c
> > @@ -466,6 +466,17 @@ consider_port_binding(struct controller_ctx *ctx,
> >  } else {
> >  ofport = u16_to_ofp(simap_get(_to_ofport,
> >binding->logical_port));
> > +const char *requested_chassis = smap_get(>options,
> > + "requested-chassis");
> > +if (ofport && requested_chassis &&
> > +strcmp(requested_chassis, chassis->name) &&
> > +strcmp(requested_chassis, chassis->hostname)) {
> > +/* Even though there is an ofport for this port_binding, it
> is
> > + * requested on a different chassis. So ignore this ofport.
> > + */
> > +ofport = 0;
> > +}
> > +
> >  if ((!strcmp(binding->type, "localnet")
> >  || !strcmp(binding->type, "l2gateway"))
> >  && ofport && binding->tag) {
> > diff --git a/tests/ovn.at b/tests/ovn.at
> > index 5f985f345..75b32ef1a 100644
> > --- a/tests/ovn.at
> > +++ b/tests/ovn.at
> > @@ -9112,13 +9112,15 @@ sim_add hv1
> >  as hv1
> >  ovs-vsctl add-br br-phys
> >  ovn_attach n1 br-phys 192.168.0.11
> > -ovs-vsctl -- add-port br-int hv1-vif0
> > +ovs-vsctl -- add-port br-int hv1-vif0 -- \
> > +set Interface hv1-vif0 ofport-request=1
> >
> >  sim_add hv2
> >  as hv2
> >  ovs-vsctl add-br br-phys
> >  ovn_attach n1 br-phys 192.168.0.12
> > -ovs-vsctl -- add-port br-int hv2-vif0
> > +ovs-vsctl -- add-port br-int hv2-vif0 -- \
> > +set Interface hv2-vif0 ofport-request=1
> >
> >  # Allow only chassis hv1 to bind logical port lsp0.
> >  ovn-nbctl lsp-set-options lsp0 requested-chassis=hv1
> > @@ -9138,7 +9140,11 @@ ovs-vsctl set interface hv2-vif0
> external-ids:iface-id=lsp0
> >  OVS_WAIT_UNTIL([test 1 = $(grep -c "Not claiming lport lsp0"
> hv2/ovn-controller.log)])
> >  AT_CHECK([test x$(ovn-sbctl --bare --columns chassis find port_binding
> logical_port=lsp0) = x], [0], [])
> >
> > -# (2) Chassis hv1 should bind lsp0 when physical to logical mapping
> exists on hv1.
> > +# (2) Chassis hv2 should not add flows in OFTABLE_PHY_TO_LOG and
> OFTABLE_LOG_TO_PHY tables.
> > +AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=0 | grep in_port=1],
> [1], [])
> > +AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=65 | grep output],
> [1], [])
> > +
> > +# (3) Chassis hv1 should bind lsp0 when physical to logical mapping
> exists on hv1.
> >  echo "verifying that hv1 binds lsp0 when hv1 physical/logical mapping
> is added"
> >  as hv1
> >  ovs-vsctl set interface hv1-vif0 external-ids:iface-id=lsp0
> > @@ -9146,7 +9152,12 @@ ovs-vsctl set interface hv1-vif0
> external-ids:iface-id=lsp0
> >  OVS_WAIT_UNTIL([test 1 = $(grep -c "Claiming lport 

[ovs-dev] [PATCH v2] ovn-controller: Handle Port_Binding's "requested-chassis" option in physical.c

2018-03-20 Thread nusiddiq
From: Numan Siddique 

When a Logical_Switch_Port P's options is set with 'requested-chassis=hv1'
and if the user has bound this logical port to two OVS interfaces each in
different host (eg. hv1 and hv2), then ovn-controller in hv1 sets the
P's Port_Binding.chassis to hv1 which is as expected. But on hv2, ovn-controller
is adding OF flows in table 0 and table 65 for the OVS interface instead of
considering 'P' as a remote port. When another logical port bound on hv2,
pings to the logical port 'P', the packet gets delivered to hv2 OVS interface
instead of hv1 OVS interface, which is wrong.

This scenario is most likely to happen when requested-chassis option is used
by CMS during migration of a VM from one chassis to another.

This patch fixes this issue by checking the Port_Binding's "requested-chassis"
option in physical.c before adding the flows in table 0 an 65.

Reported-by: Marcin Mirecki 
Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-dev/2018-March/345266.html

Signed-off-by: Numan Siddique 
---

v1 -> v2 : Updated the commit message with more details

 ovn/controller/physical.c | 11 +++
 tests/ovn.at  | 32 +++-
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index 5a80e2cda..84113ebd2 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -466,6 +466,17 @@ consider_port_binding(struct controller_ctx *ctx,
 } else {
 ofport = u16_to_ofp(simap_get(_to_ofport,
   binding->logical_port));
+const char *requested_chassis = smap_get(>options,
+ "requested-chassis");
+if (ofport && requested_chassis &&
+strcmp(requested_chassis, chassis->name) &&
+strcmp(requested_chassis, chassis->hostname)) {
+/* Even though there is an ofport for this port_binding, it is
+ * requested on a different chassis. So ignore this ofport.
+ */
+ofport = 0;
+}
+
 if ((!strcmp(binding->type, "localnet")
 || !strcmp(binding->type, "l2gateway"))
 && ofport && binding->tag) {
diff --git a/tests/ovn.at b/tests/ovn.at
index 5f985f345..75b32ef1a 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -9112,13 +9112,15 @@ sim_add hv1
 as hv1
 ovs-vsctl add-br br-phys
 ovn_attach n1 br-phys 192.168.0.11
-ovs-vsctl -- add-port br-int hv1-vif0
+ovs-vsctl -- add-port br-int hv1-vif0 -- \
+set Interface hv1-vif0 ofport-request=1
 
 sim_add hv2
 as hv2
 ovs-vsctl add-br br-phys
 ovn_attach n1 br-phys 192.168.0.12
-ovs-vsctl -- add-port br-int hv2-vif0
+ovs-vsctl -- add-port br-int hv2-vif0 -- \
+set Interface hv2-vif0 ofport-request=1
 
 # Allow only chassis hv1 to bind logical port lsp0.
 ovn-nbctl lsp-set-options lsp0 requested-chassis=hv1
@@ -9138,7 +9140,11 @@ ovs-vsctl set interface hv2-vif0 
external-ids:iface-id=lsp0
 OVS_WAIT_UNTIL([test 1 = $(grep -c "Not claiming lport lsp0" 
hv2/ovn-controller.log)])
 AT_CHECK([test x$(ovn-sbctl --bare --columns chassis find port_binding 
logical_port=lsp0) = x], [0], [])
 
-# (2) Chassis hv1 should bind lsp0 when physical to logical mapping exists on 
hv1.
+# (2) Chassis hv2 should not add flows in OFTABLE_PHY_TO_LOG and 
OFTABLE_LOG_TO_PHY tables.
+AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=0 | grep in_port=1], [1], 
[])
+AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=65 | grep output], [1], [])
+
+# (3) Chassis hv1 should bind lsp0 when physical to logical mapping exists on 
hv1.
 echo "verifying that hv1 binds lsp0 when hv1 physical/logical mapping is added"
 as hv1
 ovs-vsctl set interface hv1-vif0 external-ids:iface-id=lsp0
@@ -9146,7 +9152,12 @@ ovs-vsctl set interface hv1-vif0 
external-ids:iface-id=lsp0
 OVS_WAIT_UNTIL([test 1 = $(grep -c "Claiming lport lsp0" 
hv1/ovn-controller.log)])
 AT_CHECK([test $(ovn-sbctl --bare --columns chassis find port_binding 
logical_port=lsp0) = "$hv1_uuid"], [0], [])
 
-# (3) Chassis hv1 should release lsp0 binding and chassis hv2 should bind lsp0 
when
+# (4) Chassis hv1 should add flows in OFTABLE_PHY_TO_LOG and 
OFTABLE_LOG_TO_PHY tables.
+as hv1 ovs-ofctl dump-flows br-int
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=0 | grep in_port=1], [0], 
[ignore])
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=65 | grep 
actions=output:1], [0], [ignore])
+
+# (5) Chassis hv1 should release lsp0 binding and chassis hv2 should bind lsp0 
when
 # the requested chassis for lsp0 is changed from hv1 to hv2.
 echo "verifying that lsp0 binding moves when requested-chassis is changed"
 
@@ -9154,6 +9165,13 @@ ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
 OVS_WAIT_UNTIL([test 1 = $(grep -c "Releasing lport lsp0 from this chassis" 
hv1/ovn-controller.log)])
 OVS_WAIT_UNTIL([test $(ovn-sbctl --bare --columns chassis find