date:20180129

[ovs-dev] deleting chassis doesn't delete the lport and lflows from northd

2018-01-29 Thread Ali Gin

Hi team/Ben:

Want to get inputs about compute de-comm use case. So when deleting the
chassis from southd after gracefully killing the compute, doesn't delete
the ports binded to the northd lswitch.


Below is the detail of a sandbox from recent scale test env.
root@fake-host:~/sandbox-192.168.83.8# ls
br0.mgmt  db.sock   ovn-uuid
ovs-vswitchd.13976.ctl
br0.snoop ovn-controller.14063.ctl  ovsdb-server.13959.ctl
ovs-vswitchd.log
br-int.mgmt   ovn-controller.logovsdb-server.log
ovs-vswitchd.pid
br-int.snoop  ovn-controller.pidovsdb-server.pid
ovs-vswitchd.sh
conf.db   ovn-controller.sh ovsdb-server.sh sandbox.rc
root@fake-host:~/sandbox-192.168.83.8# ps aux | grep 14063
root 14063  1.9  0.0 114904 88456 ?Ss   00:23  21:53
ovn-controller --detach --no-chdir --pidfile -vconsole:off -vsyslog:off
-vfile:info --log-file
root 1  0.0  0.0  14224   972 pts/0S+   19:30   0:00 grep
--color=auto 14063
root@fake-host:~/sandbox-192.168.83.8# kill 14063

root@fake-host~/sandbox-192.168.83.8# ps aux | grep 13976
root 13976  1.2  0.8 2480864 2325768 ? Ssl  00:23  13:49
ovs-vswitchd --detach --no-chdir --pidfile -vconsole:off -vsyslog:off
-vfile:info --log-file --enable-dummy=override
root 44688  0.0  0.0  14224  1008 pts/0S+   19:30   0:00 grep
--color=auto 13976

root@fake-host:~/sandbox-192.168.83.8# kill 13959
root@fake-host:~/sandbox-192.168.83.8# ovs-vsctl show
ovs-vsctl: unix:/root/sandbox-192.168.83.8/db.sock: database connection
failed (No such file or directory)

*southdb details:*
Chassis "7aa3f562-4b8c-4b63-b22a-d7da7fcdedcb"
hostname: "fake234"
Encap geneve
ip: "192.168.83.8/16"
options: {csum="true"}
Port_Binding "lport_5ead7e_UCUe2A"
Port_Binding "lport_5ead7e_LaD1jw"
Port_Binding "lport_5ead7e_XcVUdA"
Port_Binding "lport_5ead7e_wPkica"
Port_Binding "lport_5ead7e_Ms539W"
Port_Binding "lport_5ead7e_UGdRuk"
Port_Binding "lport_5ead7e_8jHLXB"
Port_Binding "lport_5ead7e_IKRwFf"
Port_Binding "lport_5ead7e_U2eXES"
Port_Binding "lport_5ead7e_UIVysQ"

ovn-sbctl chassis-del 7aa3f562-4b8c-4b63-b22a-d7da7fcdedcb

ovn-nbctl show 5cf94839-d4b6-424c-9081-20739e48eb53
port lport_5ead7e_UCUe2A
addresses: ["5e:7f:47:62:6b:d0 172.145.74.24"]

ovn-sbctl lflow-list | grep 5e:7f:47:62:6b:d0
  table=0 (ls_in_port_sec_l2  ), priority=50   , match=(inport ==
"lport_5ead7e_UCUe2A" && eth.src == {5e:7f:47:62:6b:d0}), action=(next;)
  table=2 (ls_in_port_sec_nd  ), priority=90   , match=(inport ==
"lport_5ead7e_UCUe2A" && eth.src == 5e:7f:47:62:6b:d0 && arp.sha ==
5e:7f:47:62:6b:d0), action=(next;)
  table=2 (ls_in_port_sec_nd  ), priority=90   , match=(inport ==
"lport_5ead7e_UCUe2A" && eth.src == 5e:7f:47:62:6b:d0 && ip6 && nd &&
((nd.sll == 00:00:00:00:00:00 || nd.sll == 5e:7f:47:62:6b:d0) || ((nd.tll
== 00:00:00:00:00:00 || nd.tll == 5e:7f:47:62:6b:d0, action=(next;)
  table=15(ls_in_l2_lkup  ), priority=50   , match=(eth.dst ==
5e:7f:47:62:6b:d0), action=(outport = "lport_5ead7e_UCUe2A"; output;)
  table=8 (ls_out_port_sec_l2 ), priority=50   , match=(outport ==
"lport_5ead7e_UCUe2A" && eth.dst == {5e:7f:47:62:6b:d0}), action=(output;)

So is it kept on purpose or should we make code changes to take care of
deleting the same from northd too since the lports are left orphan.


Please for your inputs further.



Regards,
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] kernel crash bug caused by ixgbevf kernel module of centos-3.10.0-229.20.1.el7

2018-01-29 Thread Sam

I found a bug about ixgbevf kernel module in centos-3.10.0-229.20.1.el7.
And this bug is also in 3.10.0-514.10.2.el7.

How to produce this bug: use SRIOV first, then add lots of network traffic
on vf port, and then ifdow/ifup vf port, after many times, this bug happens.

BUG:

[308026.586026] ixgbevf :01:10.0: NIC Link is Down
[308026.586037] ixgbevf :01:10.1: NIC Link is Down
[308026.683724] bonding: bond1: link status definitely down for
interface enp1s16, disabling it
[308026.683728] bonding: bond1: now running without any active interface !
[308026.683729] bonding: bond1: link status definitely down for
interface enp1s16f1, disabling it
[308028.266060] bonding: bond1: Removing slave enp1s16.
[308028.266135] bonding: bond1: Warning: the permanent HWaddr of
enp1s16 - 4e:cd:a6:59:26:2c - is still in use by bond1. Set the HWaddr
of enp1s16 to a different address to avoid conflicts.
[308028.266139] bonding: bond1: releasing active interface enp1s16
[308028.359872] BUG: unable to handle kernel NULL pointer dereference
at 0008
[308028.361319] IP: []
ixgbevf_alloc_rx_buffers+0x60/0x160 [ixgbevf]
[308028.362049] PGD 0
[308028.362777] Oops:  [#1] SMP
[308028.363481] Modules linked in: ixgbevf(OF) igb_uio(OF)
iptable_mangle iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4
nf_nat_ipv4 nf_nat nf_conntrack iptable_filter nbd(OF) vhost_net
macvtap macvlan udp_diag unix_diag af_packet_diag netlink_diag tun
tcp_diag inet_diag uio bonding ext4 mbcache jbd2 intel_powerclamp
coretemp kvm_intel kvm crct10dif_pclmul crc32_pclmul crc32c_intel
ghash_clmulni_intel mgag200 aesni_intel iTCO_wdt lrw dcdbas gf128mul
syscopyarea sysfillrect iTCO_vendor_support glue_helper sysimgblt
ablk_helper ttm cryptd ipmi_devintf igb ixgbe drm_kms_helper drm
i2c_algo_bit ptp i2c_core ipmi_si pps_core sg mdio ipmi_msghandler dca
sb_edac mei_me mei shpchp lpc_ich pcspkr mfd_core edac_core wmi
acpi_power_meter acpi_pad ip_tables xfs libcrc32c sd_mod crc_t10dif
crct10dif_common ahci libahci
[308028.368487]  libata megaraid_sas [last unloaded: ixgbevf]
[308028.369345] CPU: 0 PID: 21971 Comm: kworker/0:1 Tainted: GF
W  O--   3.10.0-229.el7.x86_64 #1
[308028.370226] Hardware name: Dell Inc. PowerEdge R720/068CDY, BIOS
2.5.2 01/28/2015
[308028.371132] Workqueue: events ixgbevf_service_task [ixgbevf]
[308028.372038] task: 88022b0dad80 ti: 88010905c000 task.ti:
88010905c000
[308028.372965] RIP: 0010:[]  []
ixgbevf_alloc_rx_buffers+0x60/0x160 [ixgbevf]
[308028.373949] RSP: 0018:88010905fd10  EFLAGS: 00010287
[308028.374900] RAX: 0200 RBX:  RCX:

[308028.375895] RDX:  RSI: 01ff RDI:
8800b82061c0
[308028.376841] RBP: 88010905fd48 R08: 0282 R09:
0001
[308028.377780] R10: 0004 R11: 0005 R12:

[308028.378702] R13: fe00 R14: 01ff R15:
8800b82061c0
[308028.379628] FS:  () GS:882f7fa0()
knlGS:
[308028.380540] CS:  0010 DS:  ES:  CR0: 80050033
[308028.381471] CR2: 0008 CR3: 0190a000 CR4:
001427f0
[308028.382376] DR0:  DR1:  DR2:

[308028.383291] DR3:  DR6: 0ff0 DR7:
0400
[308028.384180] Stack:
[308028.385051]  8832d1b58bc0 88010905fd28 8832d1b588c0
0009
[308028.385933]  8832d1b58bc0 8800b82061c0 1028
88010905fdb8
[308028.386804]  a0496ba3 8832d1b58e58 00022b1e2000
819e2108
[308028.387693] Call Trace:
[308028.388520]  [] ixgbevf_configure+0x5d3/0x7d0 [ixgbevf]
[308028.389363]  [] ixgbevf_reinit_locked+0x65/0x90 [ixgbevf]
[308028.390213]  [] ixgbevf_service_task+0x324/0x420 [ixgbevf]
[308028.391043]  [] process_one_work+0x17b/0x470
[308028.391888]  [] worker_thread+0x11b/0x400
[308028.392728]  [] ? rescuer_thread+0x400/0x400
[308028.393576]  [] kthread+0xcf/0xe0
[308028.394434]  [] ? kthread_create_on_node+0x140/0x140
[308028.395339]  [] ret_from_fork+0x7c/0xb0
[308028.396205]  [] ? kthread_create_on_node+0x140/0x140
[308028.397068] Code: c5 41 89 f6 49 89 c4 48 8d 14 40 48 8b 47 28 49
c1 e4 04 4c 03 67 20 48 8d 1c d0 0f b7 47 4c 41 29 c5 66 0f 1f 84 00
00 00 00 00 <48> 83 7b 08 00 74 73 8b 53 10 48 8b 03 48 01 d0 49 83 c4
10 48
[308028.398959] RIP  []
ixgbevf_alloc_rx_buffers+0x60/0x160 [ixgbevf]
[308028.399910]  RSP 
[308028.400846] CR2: 0008
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH v1 0/5] datapath: enable NSH support in kernel compat mode

2018-01-29 Thread Yang, Yi Y

Greg, thank you so much for your reviewing, which Linux distribution version 
has linux-3.10.107 kernel?  I’ll add a test case for kernel datapath, fix 
compiling issue on linux-3.10.107 and rebase it to master.

From: Gregory Rose [mailto:gvrose8...@gmail.com]
Sent: Tuesday, January 30, 2018 1:51 AM
To: Yang, Yi Y ; d...@openvswitch.org
Cc: b...@ovn.org; jan.scheur...@ericsson.com
Subject: Re: [PATCH v1 0/5] datapath: enable NSH support in kernel compat mode

On 1/10/2018 11:53 PM, Yi Yang wrote:


This patch series is to backport NSH support patches in Linux net-next tree

to OVS in order that it can support NSH in kernel compat mode.



Yi Yang (5):

  datapath: ether: add NSH ethertype

  datapath: vxlan: factor out VXLAN-GPE next protocol

  datapath: net: add NSH header structures and helpers

  datapath: nsh: add GSO support

  datapath: enable NSH support



 NEWS  |   1 +

 datapath/Modules.mk   |   4 +-

 datapath/actions.c| 116 

 datapath/datapath.c   |   4 +

 datapath/flow.c   |  51 

 datapath/flow.h   |   7 +

 datapath/flow_netlink.c   | 343 +-

 datapath/flow_netlink.h   |   5 +

 datapath/linux/Modules.mk |   2 +

 datapath/linux/compat/include/linux/if_ether.h|   4 +

 datapath/linux/compat/include/linux/openvswitch.h |   6 +-

 datapath/linux/compat/include/net/nsh.h   | 313 

 datapath/linux/compat/include/net/tun_proto.h |  49 

 datapath/linux/compat/include/net/vxlan.h |   6 -

 datapath/linux/compat/vxlan.c |  32 +-

 datapath/nsh.c| 142 +

 16 files changed, 1048 insertions(+), 37 deletions(-)

 create mode 100644 datapath/linux/compat/include/net/nsh.h

 create mode 100644 datapath/linux/compat/include/net/tun_proto.h

 create mode 100644 datapath/nsh.c



Hi Yi,

My apologies for the delay in reviewing this series.

I've finished up my review and I think it mostly looks pretty good but I did 
find an issue compiling on a 3.10.107 kernel build:
CC [M] 
/home/travis/build/gvrose8192/ovs-experimental/datapath/linux/vport-netdev.o
/home/travis/build/gvrose8192/ovs-experimental/datapath/linux/nsh.c:108:17: 
error: undefined identifier 'skb_gso_error_unwind'
CC [M] /home/travis/build/gvrose8192/ovs-experimental/datapath/linux/nsh.o
/home/travis/build/gvrose8192/ovs-experimental/datapath/linux/nsh.c: In 
function ‘nsh_gso_segment’:
/home/travis/build/gvrose8192/ovs-experimental/datapath/linux/nsh.c:108:3: 
error: implicit declaration of function ‘skb_gso_error_unwind’ 
[-Werror=implicit-function-declaration]
skb_gso_error_unwind(skb, htons(ETH_P_NSH), nsh_len,
^
cc1: some warnings being treated as errors
make[3]: *** 
[/home/travis/build/gvrose8192/ovs-experimental/datapath/linux/nsh.o] Error 1
make[3]: *** Waiting for unfinished jobs
make[2]: *** 
[_module_/home/travis/build/gvrose8192/ovs-experimental/datapath/linux] Error 2
make[2]: Leaving directory 
`/home/travis/build/gvrose8192/ovs-experimental/linux-3.10.107'
make[1]: *** [default] Error 2
make[1]: Leaving directory 
`/home/travis/build/gvrose8192/ovs-experimental/datapath/linux'
make: *** [all-recursive] Error 1

So we'll need to fix that up and I also think the patches will need to be 
rebased to current master.  That second part is my fault... so sorry again 
about that.

One other thing, I ran this through our standard 'make check and make 
check-kmod' tests and everything was fine so the patches don't seem break 
anything.  I'm still concerned though that the test coverage probably didn't 
hit any parts of your code.  I'm wondering if there is some way I can test the 
code path and get some test coverage there.  Could you write up a self test for 
the tests/system-traffic.at kernel test?  Of if that's not practical is there 
some other way I could test this code?

Thanks,

- Greg

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] Técnicas de Supervisión Efectiva

2018-01-29 Thread Habilidades necesarias para alcanzar sus objetivos

Adecuada gestión de personal para resultados excelentes 

Técnicas de Supervisión Efectiva, Liderazgo, Productividad y Manejo de 
Conflictos
14 de Febrero- Psi. Dolores Romero Mora - 9am- 8pm

Las habilidades de supervisión son imprescindibles para aquellos jefes o 
supervisores que tienen equipos de trabajo a su cargo y tienen el reto de 
alcanzar objetivos dentro de las empresas. Es posible ocupar un puesto de este 
tipo por dar buenos resultados en nuestras funciones, por antigüedad o porque 
hemos demostrado capacidad y responsabilidad en el cumplimiento de las tareas 
asignadas. Sin embargo, esto no garantiza el éxito, ya que las competencias que 
se requieren para ser un buen jefe o supervisor no son las mismas que se 
requieren para ser un buen empleado o colaborador. 

BENEFICIOS DE ASISTIR: 

- Conocerá cuáles son las competencias de su puesto de trabajo.
- Aprenderá sobre las habilidades de comunicación y asertividad para con sus 
colaboradores.
- Identificará los tipos de liderazgo y técnicas motivacionales a implementar 
con su equipo de trabajo.
- Comprenderá los estilos y técnicas para manejo de conflictos y negociación.
- ¡Más información aplicable a sus actividades! 

¿Requiere la información a la Brevedad? responda este email con la palabra: 
Supervisión + nombre - teléfono - correo.


centro telefónico:018002120744 


 


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] [PATCH V2 7/7] compat: Fix compiler headers

2018-01-29 Thread Greg Rose

Since Linux kernel upstream commit d15155824c50
("linux/compiler.h: Split into compiler.h and compiler_types.h") this
error check for the gcc compiler header is no longer valid.  Remove
so that openvswitch builds for linux kernels 4.14.8 and since.

Signed-off-by: Greg Rose 
---
 acinclude.m4   | 3 +++
 datapath/linux/compat/include/linux/compiler-gcc.h | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/acinclude.m4 b/acinclude.m4
index d0f9d82..a838a46 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -798,6 +798,9 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
   OVS_FIND_PARAM_IFELSE([$KSRC/include/linux/netdevice.h],
 [netdev_master_upper_dev_link], [extack],
 [OVS_DEFINE([HAVE_UPPER_DEV_LINK_EXTACK])])
+  OVS_GREP_IFELSE([$KSRC/include/linux/compiler_types.h],
+  [__LINUX_COMPILER_TYPES_H],
+  [OVS_DEFINE([HAVE_LINUX_COMPILER_TYPES_H])])
 
   if cmp -s datapath/linux/kcompat.h.new \
 datapath/linux/kcompat.h >/dev/null 2>&1; then
diff --git a/datapath/linux/compat/include/linux/compiler-gcc.h 
b/datapath/linux/compat/include/linux/compiler-gcc.h
index bf057f7..bfcd531 100644
--- a/datapath/linux/compat/include/linux/compiler-gcc.h
+++ b/datapath/linux/compat/include/linux/compiler-gcc.h
@@ -1,6 +1,8 @@
 #ifndef __LINUX_COMPILER_H
+#ifndef HAVE_LINUX_COMPILER_TYPES_H
 #error "Please don't include  directly, include 
 instead."
 #endif
+#endif
 
 #include_next 
 
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] [PATCH V2 6/7] travis: Update kernel test list from kernel.org

2018-01-29 Thread Greg Rose

Signed-off-by: Greg Rose 
---
 .travis.yml | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 48acc8e..25458a3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -26,17 +26,16 @@ sudo: false
 
 env:
   - OPTS="--disable-ssl"
-  - TESTSUITE=1 KERNEL=3.16.47
+  - TESTSUITE=1 KERNEL=3.16.51
   - TESTSUITE=1 OPTS="--enable-shared"
   - BUILD_ENV="-m32" OPTS="--disable-ssl"
-  - KERNEL=3.16.47 DPDK=1
-  - KERNEL=3.16.47 DPDK=1 OPTS="--enable-shared"
-  - KERNEL=4.13
-  - KERNEL=4.12.11
-  - KERNEL=4.9.48
-  - KERNEL=4.4.87
-  - KERNEL=4.1.43
-  - KERNEL=3.10.107
+  - KERNEL=3.16.51 DPDK=1
+  - KERNEL=3.16.51 DPDK=1 OPTS="--enable-shared"
+  - KERNEL=4.14.3
+  - KERNEL=4.9.66
+  - KERNEL=4.4.103
+  - KERNEL=4.1.46
+  - KERNEL=3.10.108
   - TESTSUITE=1 LIBS=-ljemalloc
 
 matrix:
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] [PATCH V2 5/7] acinclude.m4: Enable Linux 4.14

2018-01-29 Thread Greg Rose

Signed-off-by: Greg Rose 
---
 acinclude.m4 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/acinclude.m4 b/acinclude.m4
index 768c20c..d0f9d82 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -151,10 +151,10 @@ AC_DEFUN([OVS_CHECK_LINUX], [
 AC_MSG_RESULT([$kversion])
 
 if test "$version" -ge 4; then
-   if test "$version" = 4 && test "$patchlevel" -le 13; then
+   if test "$version" = 4 && test "$patchlevel" -le 14; then
   : # Linux 4.x
else
-  AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version 
newer than 4.13.x is not supported (please refer to the FAQ for advice)])
+  AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version 
newer than 4.14.x is not supported (please refer to the FAQ for advice)])
fi
 elif test "$version" = 3 && test "$patchlevel" -ge 10; then
: # Linux 3.x
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] [PATCH V2 4/7] datapath: Fix SKB_GSO_UDP usage

2018-01-29 Thread Greg Rose

Using SKB_GSO_UDP breaks the compilation on Linux 4.14. Check for
the HAVE_SKB_GSO_UDP compiler #define.

Signed-off-by: Greg Rose 
---
 datapath/datapath.c |  9 ++---
 datapath/linux/compat/stt.c | 11 ++-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 1780819..a3fdd8f 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -339,8 +339,10 @@ static int queue_gso_packets(struct datapath *dp, struct 
sk_buff *skb,
 const struct dp_upcall_info *upcall_info,
 uint32_t cutlen)
 {
+#ifdef HAVE_SKB_GSO_UDP
unsigned short gso_type = skb_shinfo(skb)->gso_type;
struct sw_flow_key later_key;
+#endif
struct sk_buff *segs, *nskb;
struct ovs_skb_cb ovs_cb;
int err;
@@ -352,7 +354,7 @@ static int queue_gso_packets(struct datapath *dp, struct 
sk_buff *skb,
return PTR_ERR(segs);
if (segs == NULL)
return -EINVAL;
-
+#ifdef HAVE_SKB_GSO_UDP
if (gso_type & SKB_GSO_UDP) {
/* The initial flow key extracted by ovs_flow_key_extract()
 * in this case is for a first fragment, so we need to
@@ -361,14 +363,15 @@ static int queue_gso_packets(struct datapath *dp, struct 
sk_buff *skb,
later_key = *key;
later_key.ip.frag = OVS_FRAG_TYPE_LATER;
}
-
+#endif
/* Queue all of the segments. */
skb = segs;
do {
*OVS_CB(skb) = ovs_cb;
+#ifdef HAVE_SKB_GSO_UDP
if (gso_type & SKB_GSO_UDP && skb != segs)
key = _key;
-
+#endif
err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
if (err)
break;
diff --git a/datapath/linux/compat/stt.c b/datapath/linux/compat/stt.c
index 37d5f4b..66a97f2 100644
--- a/datapath/linux/compat/stt.c
+++ b/datapath/linux/compat/stt.c
@@ -81,8 +81,13 @@ struct stt_dev {
 #define STT_PROTO_TCP  BIT(3)
 #define STT_PROTO_TYPES(STT_PROTO_IPV4 | STT_PROTO_TCP)
 
+#ifdef HAVE_SKB_GSO_UDP
 #define SUPPORTED_GSO_TYPES (SKB_GSO_TCPV4 | SKB_GSO_UDP | SKB_GSO_DODGY | \
 SKB_GSO_TCPV6)
+#else
+#define SUPPORTED_GSO_TYPES (SKB_GSO_TCPV4 | SKB_GSO_DODGY | \
+SKB_GSO_TCPV6)
+#endif
 
 /* The length and offset of a fragment are encoded in the sequence number.
  * STT_SEQ_LEN_SHIFT is the left shift needed to store the length.
@@ -1310,7 +1315,7 @@ static bool validate_checksum(struct sk_buff *skb)
 static bool set_offloads(struct sk_buff *skb)
 {
struct stthdr *stth = stt_hdr(skb);
-   unsigned short gso_type;
+   unsigned short gso_type = 0;
int l3_header_size;
int l4_header_size;
u16 csum_offset;
@@ -1351,7 +1356,9 @@ static bool set_offloads(struct sk_buff *skb)
case STT_PROTO_IPV4:
/* UDP/IPv4 */
csum_offset = offsetof(struct udphdr, check);
+#ifdef HAVE_SKB_GSO_UDP
gso_type = SKB_GSO_UDP;
+#endif
l3_header_size = sizeof(struct iphdr);
l4_header_size = sizeof(struct udphdr);
skb->protocol = htons(ETH_P_IP);
@@ -1359,7 +1366,9 @@ static bool set_offloads(struct sk_buff *skb)
default:
/* UDP/IPv6 */
csum_offset = offsetof(struct udphdr, check);
+#ifdef HAVE_SKB_GSO_UDP
gso_type = SKB_GSO_UDP;
+#endif
l3_header_size = sizeof(struct ipv6hdr);
l4_header_size = sizeof(struct udphdr);
skb->protocol = htons(ETH_P_IPV6);
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] [PATCH V2 3/7] datapath: conntrack: make protocol tracker pointers const

2018-01-29 Thread Greg Rose

From: Florian Westphal 

Upstream commit:
commit b3480fe059ac9121b5714205b4ddae14b59ef4be
Author: Florian Westphal 
Date:   Sat Aug 12 00:57:08 2017 +0200

netfilter: conntrack: make protocol tracker pointers const

Doesn't change generated code, but will make it easier to eventually
make the actual trackers themselvers const.

Signed-off-by: Florian Westphal 
Signed-off-by: Pablo Neira Ayuso 

Signed-off-by: Greg Rose 
---
 datapath/conntrack.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/datapath/conntrack.c b/datapath/conntrack.c
index 3f79433..a75ae3c 100644
--- a/datapath/conntrack.c
+++ b/datapath/conntrack.c
@@ -613,8 +613,8 @@ static struct nf_conn *
 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 u8 l3num, struct sk_buff *skb, bool natted)
 {
-   struct nf_conntrack_l3proto *l3proto;
-   struct nf_conntrack_l4proto *l4proto;
+   const struct nf_conntrack_l3proto *l3proto;
+   const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] [PATCH V2 2/7] compat: Do not include headers when not compiling

2018-01-29 Thread Greg Rose

If the entire file is not going to be compiled because OVS is using
upstream tunnel support then also don't bother pulling in the headers.

Signed-off-by: Greg Rose 
---
 datapath/linux/compat/ip_gre.c| 2 +-
 datapath/linux/compat/ip_output.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/datapath/linux/compat/ip_gre.c b/datapath/linux/compat/ip_gre.c
index 94fdaa9..4e32591 100644
--- a/datapath/linux/compat/ip_gre.c
+++ b/datapath/linux/compat/ip_gre.c
@@ -12,6 +12,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#ifndef USE_UPSTREAM_TUNNEL
 #include 
 #include 
 #include 
@@ -52,7 +53,6 @@
 #include 
 #include 
 
-#ifndef USE_UPSTREAM_TUNNEL
 #if IS_ENABLED(CONFIG_IPV6)
 #include 
 #include 
diff --git a/datapath/linux/compat/ip_output.c 
b/datapath/linux/compat/ip_output.c
index edca340..e2f869f 100644
--- a/datapath/linux/compat/ip_output.c
+++ b/datapath/linux/compat/ip_output.c
@@ -45,6 +45,7 @@
  * Hirokazu Takahashi: sendfile() on UDP works now.
  */
 
+#ifndef HAVE_CORRECT_MRU_HANDLING
 #include 
 #include 
 #include 
@@ -82,7 +83,6 @@
 #include 
 #include 
 
-#ifndef HAVE_CORRECT_MRU_HANDLING
 static inline void rpl_ip_options_fragment(struct sk_buff *skb)
 {
unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr);
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] [PATCH V2 1/7] datapath: Fix netdev_master_upper_dev_link for 4.14

2018-01-29 Thread Greg Rose

An extended netlink ack has been added for 4.14 - add compat layer
changes so that it compiles for all kernels up to and including
4.14.

Signed-off-by: Greg Rose 
---
 acinclude.m4|  3 +++
 datapath/linux/compat/include/linux/netdevice.h | 15 ++-
 datapath/vport-netdev.c |  9 -
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/acinclude.m4 b/acinclude.m4
index c04c2c6..768c20c 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -795,6 +795,9 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
 [OVS_DEFINE([HAVE_LIST_IN_NF_HOOK_OPS])])
   OVS_GREP_IFELSE([$KSRC/include/uapi/linux/netfilter/nf_conntrack_common.h],
   [IP_CT_UNTRACKED])
+  OVS_FIND_PARAM_IFELSE([$KSRC/include/linux/netdevice.h],
+[netdev_master_upper_dev_link], [extack],
+[OVS_DEFINE([HAVE_UPPER_DEV_LINK_EXTACK])])
 
   if cmp -s datapath/linux/kcompat.h.new \
 datapath/linux/kcompat.h >/dev/null 2>&1; then
diff --git a/datapath/linux/compat/include/linux/netdevice.h 
b/datapath/linux/compat/include/linux/netdevice.h
index 3c3cf42..c460332 100644
--- a/datapath/linux/compat/include/linux/netdevice.h
+++ b/datapath/linux/compat/include/linux/netdevice.h
@@ -101,13 +101,26 @@ static inline bool netif_needs_gso(struct sk_buff *skb,
 #ifndef HAVE_NETDEV_MASTER_UPPER_DEV_LINK_RH
 static inline int rpl_netdev_master_upper_dev_link(struct net_device *dev,
   struct net_device *upper_dev,
-  void *upper_priv, void 
*upper_info)
+  void *upper_priv,
+  void *upper_info, void *extack)
 {
return netdev_master_upper_dev_link(dev, upper_dev);
 }
 #define netdev_master_upper_dev_link rpl_netdev_master_upper_dev_link
 
 #endif
+#else
+#ifndef HAVE_UPPER_DEV_LINK_EXTACK
+static inline int rpl_netdev_master_upper_dev_link(struct net_device *dev,
+  struct net_device *upper_dev,
+  void *upper_priv,
+  void *upper_info, void *extack)
+{
+   return netdev_master_upper_dev_link(dev, upper_dev, upper_priv,
+   upper_info);
+}
+#define netdev_master_upper_dev_link rpl_netdev_master_upper_dev_link
+#endif
 #endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c
index 697c442..e2d8eaf 100644
--- a/datapath/vport-netdev.c
+++ b/datapath/vport-netdev.c
@@ -112,8 +112,15 @@ struct vport *ovs_netdev_link(struct vport *vport, const 
char *name)
}
 
rtnl_lock();
+#ifdef HAVE_NETDEV_MASTER_UPPER_DEV_LINK_RH
err = netdev_master_upper_dev_link(vport->dev,
-  get_dpdev(vport->dp), NULL, NULL);
+  get_dpdev(vport->dp),
+  NULL, NULL);
+#else
+   err = netdev_master_upper_dev_link(vport->dev,
+  get_dpdev(vport->dp),
+  NULL, NULL, NULL);
+#endif
if (err)
goto error_unlock;
 
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH 4/5] ovs-vswitchd: Fire RCU callbacks before exit to reduce memory leak warnings.

2018-01-29 Thread William Tu

On Mon, Jan 29, 2018 at 2:44 PM, Ben Pfaff  wrote:
> On Mon, Jan 29, 2018 at 02:35:35PM -0800, William Tu wrote:
>> On Mon, Jan 29, 2018 at 2:27 PM, Ben Pfaff  wrote:
>> > On Mon, Jan 29, 2018 at 02:16:24PM -0800, William Tu wrote:
>> >> On Thu, Jan 25, 2018 at 3:39 PM, Ben Pfaff  wrote:
>> >> > ovs-vswitchd makes extensive use of RCU to defer freeing memory past the
>> >> > latest time that it could be in use by a thread.  Until now, 
>> >> > ovs-vswitchd
>> >> > has not waited for RCU callbacks to fire before exiting.  This meant 
>> >> > that
>> >> > in many cases, when ovs-vswitchd exits, many blocks of memory are stuck 
>> >> > in
>> >> > RCU callback queues, which valgrind often reports as "possible" memory
>> >> > leaks.
>> >> >
>> >> > This commit adds a new function ovsrcu_exit() that waits and fires as 
>> >> > many
>> >> > RCU callbacks as it reasonably can.  It can only do so for the thread 
>> >> > that
>> >> > calls it and the thread that calls the callbacks, but generally speaking
>> >> > ovs-vswitchd shuts down other threads before it exits anyway, so this is
>> >> > pretty good.
>> >> >
>> >> > In my testing this eliminates most valgrind warnings for tests that run
>> >> > ovs-vswitchd.  This ought to make it easier to distinguish new leaks 
>> >> > that
>> >> > are real from existing non-leaks.
>> >> >
>> >> > Signed-off-by: Ben Pfaff 
>> >> > ---
>> >>
>> >> Looks good to me.
>> >> One limitation is that since this patch init the ovs barrier for size=2,
>> >> the ovsrcu_exit() can only be used in ovs-vswitchd.  Otherwise users
>> >> have to remember to bump up this barrier number.
>> >
>> > I don't understand that comment.  Can you explain?  Why would other
>> > daemons need a larger barrier number?
>>
>> We init the postpone_barrier to 2
>> + ovs_barrier_init(_barrier, 2);
>>
>> and every daemon calls ovsrcu_exit will call
>> +ovs_barrier_block(_barrier);
>> which increments the counter
>>
>> and the ovsrcu_postpone_thread also calls
>> +ovs_barrier_block(_barrier);
>>
>> So if one more daemon calls ovsrcu_exit, then we have to bump the number to 
>> 3?
>
> How would daemons share a barrier?  Our daemons don't use shared memory.

Oh, I see now. The postpone_barrier is only shared between the two.
then there is no such issue. thank for clarifying.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH 4/5] ovs-vswitchd: Fire RCU callbacks before exit to reduce memory leak warnings.

2018-01-29 Thread Ben Pfaff

On Mon, Jan 29, 2018 at 02:35:35PM -0800, William Tu wrote:
> On Mon, Jan 29, 2018 at 2:27 PM, Ben Pfaff  wrote:
> > On Mon, Jan 29, 2018 at 02:16:24PM -0800, William Tu wrote:
> >> On Thu, Jan 25, 2018 at 3:39 PM, Ben Pfaff  wrote:
> >> > ovs-vswitchd makes extensive use of RCU to defer freeing memory past the
> >> > latest time that it could be in use by a thread.  Until now, ovs-vswitchd
> >> > has not waited for RCU callbacks to fire before exiting.  This meant that
> >> > in many cases, when ovs-vswitchd exits, many blocks of memory are stuck 
> >> > in
> >> > RCU callback queues, which valgrind often reports as "possible" memory
> >> > leaks.
> >> >
> >> > This commit adds a new function ovsrcu_exit() that waits and fires as 
> >> > many
> >> > RCU callbacks as it reasonably can.  It can only do so for the thread 
> >> > that
> >> > calls it and the thread that calls the callbacks, but generally speaking
> >> > ovs-vswitchd shuts down other threads before it exits anyway, so this is
> >> > pretty good.
> >> >
> >> > In my testing this eliminates most valgrind warnings for tests that run
> >> > ovs-vswitchd.  This ought to make it easier to distinguish new leaks that
> >> > are real from existing non-leaks.
> >> >
> >> > Signed-off-by: Ben Pfaff 
> >> > ---
> >>
> >> Looks good to me.
> >> One limitation is that since this patch init the ovs barrier for size=2,
> >> the ovsrcu_exit() can only be used in ovs-vswitchd.  Otherwise users
> >> have to remember to bump up this barrier number.
> >
> > I don't understand that comment.  Can you explain?  Why would other
> > daemons need a larger barrier number?
> 
> We init the postpone_barrier to 2
> + ovs_barrier_init(_barrier, 2);
> 
> and every daemon calls ovsrcu_exit will call
> +ovs_barrier_block(_barrier);
> which increments the counter
> 
> and the ovsrcu_postpone_thread also calls
> +ovs_barrier_block(_barrier);
> 
> So if one more daemon calls ovsrcu_exit, then we have to bump the number to 3?

How would daemons share a barrier?  Our daemons don't use shared memory.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH 4/5] ovs-vswitchd: Fire RCU callbacks before exit to reduce memory leak warnings.

2018-01-29 Thread William Tu

On Mon, Jan 29, 2018 at 2:27 PM, Ben Pfaff  wrote:
> On Mon, Jan 29, 2018 at 02:16:24PM -0800, William Tu wrote:
>> On Thu, Jan 25, 2018 at 3:39 PM, Ben Pfaff  wrote:
>> > ovs-vswitchd makes extensive use of RCU to defer freeing memory past the
>> > latest time that it could be in use by a thread.  Until now, ovs-vswitchd
>> > has not waited for RCU callbacks to fire before exiting.  This meant that
>> > in many cases, when ovs-vswitchd exits, many blocks of memory are stuck in
>> > RCU callback queues, which valgrind often reports as "possible" memory
>> > leaks.
>> >
>> > This commit adds a new function ovsrcu_exit() that waits and fires as many
>> > RCU callbacks as it reasonably can.  It can only do so for the thread that
>> > calls it and the thread that calls the callbacks, but generally speaking
>> > ovs-vswitchd shuts down other threads before it exits anyway, so this is
>> > pretty good.
>> >
>> > In my testing this eliminates most valgrind warnings for tests that run
>> > ovs-vswitchd.  This ought to make it easier to distinguish new leaks that
>> > are real from existing non-leaks.
>> >
>> > Signed-off-by: Ben Pfaff 
>> > ---
>>
>> Looks good to me.
>> One limitation is that since this patch init the ovs barrier for size=2,
>> the ovsrcu_exit() can only be used in ovs-vswitchd.  Otherwise users
>> have to remember to bump up this barrier number.
>
> I don't understand that comment.  Can you explain?  Why would other
> daemons need a larger barrier number?

We init the postpone_barrier to 2
+ ovs_barrier_init(_barrier, 2);

and every daemon calls ovsrcu_exit will call
+ovs_barrier_block(_barrier);
which increments the counter

and the ovsrcu_postpone_thread also calls
+ovs_barrier_block(_barrier);

So if one more daemon calls ovsrcu_exit, then we have to bump the number to 3?
William
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH 5/5] ovs-vswitchd: Avoid or suppress memory leak warning for glibc aio.

2018-01-29 Thread William Tu

On Thu, Jan 25, 2018 at 3:39 PM, Ben Pfaff  wrote:
> The asynchronous IO library in glibc starts threads that show up as memory
> leaks in valgrind.  This commit attempts to avoid the warnings by flushing
> all the asynchronous I/O to the log file before exiting.  This only does
> part of the job for glibc since it keeps the threads around for some
> undefined idle time before killing them, so in addition this commit adds a
> valgrind suppression to stop displaying these warnings in any case.
>
> Signed-off-by: Ben Pfaff 
> ---

Looks good to me.
Acked-by: William Tu 

>  include/openvswitch/vlog.h |  1 +
>  lib/vlog.c | 10 ++
>  tests/glibc.supp   |  9 +
>  vswitchd/ovs-vswitchd.c|  1 +
>  4 files changed, 21 insertions(+)
>
> diff --git a/include/openvswitch/vlog.h b/include/openvswitch/vlog.h
> index 3a4042113a36..98d477911acc 100644
> --- a/include/openvswitch/vlog.h
> +++ b/include/openvswitch/vlog.h
> @@ -146,6 +146,7 @@ void vlog_set_syslog_target(const char *target);
>  /* Initialization. */
>  void vlog_init(void);
>  void vlog_enable_async(void);
> +void vlog_disable_async(void);
>
>  /* Functions for actual logging. */
>  void vlog(const struct vlog_module *, enum vlog_level, const char *format, 
> ...)
> diff --git a/lib/vlog.c b/lib/vlog.c
> index 6e87665fcd11..f286950431ff 100644
> --- a/lib/vlog.c
> +++ b/lib/vlog.c
> @@ -836,6 +836,16 @@ vlog_enable_async(void)
>  ovs_mutex_unlock(_file_mutex);
>  }
>
> +void
> +vlog_disable_async(void)
> +{
> +ovs_mutex_lock(_file_mutex);
> +log_async = false;
> +async_append_destroy(log_writer);
> +log_writer = NULL;
> +ovs_mutex_unlock(_file_mutex);
> +}
> +
>  /* Print the current logging level for each module. */
>  char *
>  vlog_get_levels(void)
> diff --git a/tests/glibc.supp b/tests/glibc.supp
> index 948ee013f458..031f8bde0f77 100644
> --- a/tests/glibc.supp
> +++ b/tests/glibc.supp
> @@ -15,3 +15,12 @@
> fun:set_up_timer
>  }
>
> +{
> +   aio
> +   Memcheck:Leak
> +   fun:calloc
> +   ...
> +   fun:allocate_stack
> +   ...
> +   fun:__aio_create_helper_thread
> +}
> diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
> index 53e511999594..12cb5d494d41 100644
> --- a/vswitchd/ovs-vswitchd.c
> +++ b/vswitchd/ovs-vswitchd.c
> @@ -136,6 +136,7 @@ main(int argc, char *argv[])
>  bridge_exit(cleanup);
>  unixctl_server_destroy(unixctl);
>  service_stop();
> +vlog_disable_async();
>  ovsrcu_exit();
>
>  return 0;
> --
> 2.10.2
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH 4/5] ovs-vswitchd: Fire RCU callbacks before exit to reduce memory leak warnings.

2018-01-29 Thread Ben Pfaff

On Mon, Jan 29, 2018 at 02:16:24PM -0800, William Tu wrote:
> On Thu, Jan 25, 2018 at 3:39 PM, Ben Pfaff  wrote:
> > ovs-vswitchd makes extensive use of RCU to defer freeing memory past the
> > latest time that it could be in use by a thread.  Until now, ovs-vswitchd
> > has not waited for RCU callbacks to fire before exiting.  This meant that
> > in many cases, when ovs-vswitchd exits, many blocks of memory are stuck in
> > RCU callback queues, which valgrind often reports as "possible" memory
> > leaks.
> >
> > This commit adds a new function ovsrcu_exit() that waits and fires as many
> > RCU callbacks as it reasonably can.  It can only do so for the thread that
> > calls it and the thread that calls the callbacks, but generally speaking
> > ovs-vswitchd shuts down other threads before it exits anyway, so this is
> > pretty good.
> >
> > In my testing this eliminates most valgrind warnings for tests that run
> > ovs-vswitchd.  This ought to make it easier to distinguish new leaks that
> > are real from existing non-leaks.
> >
> > Signed-off-by: Ben Pfaff 
> > ---
> 
> Looks good to me.
> One limitation is that since this patch init the ovs barrier for size=2,
> the ovsrcu_exit() can only be used in ovs-vswitchd.  Otherwise users
> have to remember to bump up this barrier number.

I don't understand that comment.  Can you explain?  Why would other
daemons need a larger barrier number?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH 4/5] ovs-vswitchd: Fire RCU callbacks before exit to reduce memory leak warnings.

2018-01-29 Thread William Tu

On Thu, Jan 25, 2018 at 3:39 PM, Ben Pfaff  wrote:
> ovs-vswitchd makes extensive use of RCU to defer freeing memory past the
> latest time that it could be in use by a thread.  Until now, ovs-vswitchd
> has not waited for RCU callbacks to fire before exiting.  This meant that
> in many cases, when ovs-vswitchd exits, many blocks of memory are stuck in
> RCU callback queues, which valgrind often reports as "possible" memory
> leaks.
>
> This commit adds a new function ovsrcu_exit() that waits and fires as many
> RCU callbacks as it reasonably can.  It can only do so for the thread that
> calls it and the thread that calls the callbacks, but generally speaking
> ovs-vswitchd shuts down other threads before it exits anyway, so this is
> pretty good.
>
> In my testing this eliminates most valgrind warnings for tests that run
> ovs-vswitchd.  This ought to make it easier to distinguish new leaks that
> are real from existing non-leaks.
>
> Signed-off-by: Ben Pfaff 
> ---

Looks good to me.
One limitation is that since this patch init the ovs barrier for size=2,
the ovsrcu_exit() can only be used in ovs-vswitchd.  Otherwise users
have to remember to bump up this barrier number.

Acked-by: William Tu 

>  lib/ovs-rcu.c   | 55 
> +++--
>  lib/ovs-rcu.h   |  2 ++
>  vswitchd/ovs-vswitchd.c |  2 ++
>  3 files changed, 57 insertions(+), 2 deletions(-)
>
> diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c
> index 05a46d4524e3..ebc8120f0fd3 100644
> --- a/lib/ovs-rcu.c
> +++ b/lib/ovs-rcu.c
> @@ -19,6 +19,7 @@
>  #include "ovs-rcu.h"
>  #include "fatal-signal.h"
>  #include "guarded-list.h"
> +#include "latch.h"
>  #include "openvswitch/list.h"
>  #include "ovs-thread.h"
>  #include "openvswitch/poll-loop.h"
> @@ -58,6 +59,9 @@ static struct ovs_mutex ovsrcu_threads_mutex;
>  static struct guarded_list flushed_cbsets;
>  static struct seq *flushed_cbsets_seq;
>
> +static struct latch postpone_exit;
> +static struct ovs_barrier postpone_barrier;
> +
>  static void ovsrcu_init_module(void);
>  static void ovsrcu_flush_cbset__(struct ovsrcu_perthread *, bool);
>  static void ovsrcu_flush_cbset(struct ovsrcu_perthread *);
> @@ -111,6 +115,8 @@ ovsrcu_quiesced(void)
>  } else {
>  static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
>  if (ovsthread_once_start()) {
> +latch_init(_exit);
> +ovs_barrier_init(_barrier, 2);
>  ovs_thread_create("urcu", ovsrcu_postpone_thread, NULL);
>  ovsthread_once_done();
>  }
> @@ -232,6 +238,49 @@ ovsrcu_synchronize(void)
>  ovsrcu_quiesce_end();
>  }
>
> +/* Waits until as many postponed callbacks as possible have executed.
> + *
> + * As a side effect, stops the background thread that calls the callbacks and
> + * prevents it from being restarted.  This means that this function should 
> only
> + * be called soon before a process exits, as a mechanism for releasing memory
> + * to make memory leaks easier to detect, since any further postponed 
> callbacks
> + * won't actually get called.
> + *
> + * This function can only wait for callbacks registered by the current thread
> + * and the background thread that calls the callbacks.  Thus, it will be most
> + * effective if other threads have already exited. */
> +void
> +ovsrcu_exit(void)
> +{
> +/* Stop the postpone thread and wait for it to exit.  Otherwise, there's 
> no
> + * way to wait for that thread to finish calling callbacks itself. */
> +if (!single_threaded()) {
> +ovsrcu_quiesced();  /* Ensure that the postpone thread exists. */
> +latch_set(_exit);
> +ovs_barrier_block(_barrier);
> +}
> +
> +/* Repeatedly:
> + *
> + *- Wait for a grace period.  One important side effect is to push 
> the
> + *  running thread's cbset into 'flushed_cbsets' so that the next 
> call
> + *  has something to call.
> + *
> + *- Call all the callbacks in 'flushed_cbsets'.  If there aren't any,
> + *  we're done, otherwise the callbacks themselves might have 
> requested
> + *  more deferred callbacks so we go around again.
> + *
> + * We limit the number of iterations just in case some bug causes an
> + * infinite loop.  This function is just for making memory leaks easier 
> to
> + * spot so there's no point in breaking things on that basis. */
> +for (int i = 0; i < 8; i++) {
> +ovsrcu_synchronize();
> +if (!ovsrcu_call_postponed()) {
> +break;
> +}
> +}
> +}
> +
>  /* Registers 'function' to be called, passing 'aux' as argument, after the
>   * next grace period.
>   *
> @@ -303,15 +352,17 @@ ovsrcu_postpone_thread(void *arg OVS_UNUSED)
>  {
>  pthread_detach(pthread_self());
>
> -for (;;) {
> +while (!latch_is_set(_exit)) {
>  uint64_t seqno = seq_read(flushed_cbsets_seq);
>

[ovs-dev] [PATCH] ovn-nbctl: update manpage for lsp-set-type.

2018-01-29 Thread Han Zhou

Signed-off-by: Han Zhou 
---
 ovn/utilities/ovn-nbctl.8.xml | 44 ++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/ovn/utilities/ovn-nbctl.8.xml b/ovn/utilities/ovn-nbctl.8.xml
index 3688d35..f5ad360 100644
--- a/ovn/utilities/ovn-nbctl.8.xml
+++ b/ovn/utilities/ovn-nbctl.8.xml
@@ -280,7 +280,49 @@
 
   lsp-set-type port type
   
-Set the type for the logical port.  No special types have been 
implemented yet.
+
+  Set the type for the logical port.  The type must be one of the 
following:
+
+
+
+  (empty string)
+  
+A VM (or VIF) interface.
+  
+
+  router
+  
+A connection to a logical router.
+  
+
+  localnet
+  
+A connection to a locally accessible network from each 
ovn-controller
+instance. A logical switch can only have a single localnet port
+attached. This is used to model direct connectivity to an existing
+network.
+  
+
+  localport
+  
+A connection to a local VIF. Traffic that arrives on a localport is
+never forwarded over a tunnel to another chassis. These ports are
+present on every chassis and have the same address in all of them.
+This is used to model connectivity to local services that run on
+every hypervisor.
+  
+
+  l2gateway
+  
+A connection to a physical network.
+  
+
+  vtep
+  
+A port to a logical switch on a VTEP gateway.
+  
+
+
   
 
   lsp-get-type port
-- 
2.1.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] Outsourcing y sus Consecuencias Fiscales

2018-01-29 Thread Infracciones y sanciones

 
OUTSOURCING Y SUS CONSECUENCIAS FISCALES
Febrero 07 - webinar Interactivo

Este webinar está diseñado para las empresas que prestan servicios de 
outsourcing de personal o las interesadas en contratar personal bajo la figura 
del outsourcing.

TEMARIO:

El Outsourcing en términos de la Ley Federal del Trabajo.
Tratamiento fiscal del outsourcing en impuesto sobre la renta.
Tratamiento fiscal del Outsourcing en la Ley del Seguro Social .
Tratamiento fiscal del Outsourcing en la Ley de INFONAVIT.
Tratamiento fiscal del Outsourcing en Impuesto sobre nóminas 
 
Temario e Inscripciones:

Respondiendo por este medio "Outsourcing"+TELÉFONO + NOMBRE o marcando al:

045 + 5515546630  



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] [PATCH v1] doc: Added OVS Conntrack tutorial

2018-01-29 Thread Ashish Varma

OVS supports connection tracker related match fields and actions.
Added a tutorial to demonstrate the basic use cases for some of these
match fields and actions.

Signed-off-by: Ashish Varma 
---
 Documentation/automake.mk |   1 +
 Documentation/tutorials/index.rst |   1 +
 Documentation/tutorials/ovs-conntrack.rst | 572 ++
 3 files changed, 574 insertions(+)
 create mode 100644 Documentation/tutorials/ovs-conntrack.rst

diff --git a/Documentation/automake.mk b/Documentation/automake.mk
index 2b202cb..93cf3a1 100644
--- a/Documentation/automake.mk
+++ b/Documentation/automake.mk
@@ -27,6 +27,7 @@ DOC_SOURCE = \
Documentation/tutorials/ovs-advanced.rst \
Documentation/tutorials/ovn-openstack.rst \
Documentation/tutorials/ovn-sandbox.rst \
+   Documentation/tutorials/ovs-conntrack.rst \
Documentation/topics/index.rst \
Documentation/topics/bonding.rst \
Documentation/topics/idl-compound-indexes.rst \
diff --git a/Documentation/tutorials/index.rst 
b/Documentation/tutorials/index.rst
index c2d343b..ab90b7c 100644
--- a/Documentation/tutorials/index.rst
+++ b/Documentation/tutorials/index.rst
@@ -43,3 +43,4 @@ vSwitch.
ovs-advanced
ovn-sandbox
ovn-openstack
+   ovs-conntrack
diff --git a/Documentation/tutorials/ovs-conntrack.rst 
b/Documentation/tutorials/ovs-conntrack.rst
new file mode 100644
index 000..31cd1a8
--- /dev/null
+++ b/Documentation/tutorials/ovs-conntrack.rst
@@ -0,0 +1,572 @@
+..
+  Licensed under the Apache License, Version 2.0 (the "License"); you may
+  not use this file except in compliance with the License. You may obtain
+  a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  License for the specific language governing permissions and limitations
+  under the License.
+
+  Convention for heading levels in Open vSwitch documentation:
+
+  ===  Heading 0 (reserved for the title in a document)
+  ---  Heading 1
+  ~~~  Heading 2
+  +++  Heading 3
+  '''  Heading 4
+
+  Avoid deeper levels because they do not render well.
+
+==
+OVS Conntrack Tutorial
+==
+
+OVS can be used with the Connection tracking system
+where OpenFlow flow can be used to match on the state of a TCP, UDP, ICMP,
+etc., connections. (Connection tracking system supports tracking of both
+statefull and stateless protocols)
+
+This tutorial demonstrates how OVS can use the connection tracking system
+to match on the TCP segments from connection setup to connection teardown.
+It will use OVS with the linux kernel module as the datapath for this
+tutorial. (datapath which utilizes the openvswitch kernel module to do
+the packet processing in the linux kernel)
+It was tested with the “master” branch of Open vSwitch.
+
+Definitions
+---
+
+**conntrack**: is a connection tracking module for stateful packet
+inspection.
+
+**pipeline**: is the packet processing pipeline which is the path taken by
+the packet when traversing through the tables where the pakcet matches the
+match fields of a flow in the table and performs the actions present in
+the matched flow.
+
+**network namespace**: is a way to create virtual routing domains within
+a single instance of linux kernel.  Each network namespace has it's own
+instance of network tables (arp, routing) and certain interfaces attached
+to it.
+
+**flow**: used in this tutorial refers to the OpenFlow flow which can be
+programmed using an OpenFlow controller or OVS command line tools like
+ovs-ofctl which is used here.  A flow will have match fields and actions.
+
+Conntrack Related Fields
+
+
+Match Fields
+
+OVS supports following match fields related to conntrack:
+
+1. **ct_state**:
+The state of a connection matching the packet.
+Possible values:
+
+- *new*
+- *est*
+- *rel*
+- *rpl*
+- *inv*
+- *trk*
+- *snat*
+- *dnat*
+
+Each of these flags is preceded by either a "+" for a flag that
+must be set, or a "-" for a flag that must be unset.
+Multiple flags can also be specified e.g. ct_state=+trk+new
+We will see the usage of some these flags below. For a detailed
+description, please see the OVS fields documentation at:
+http://openvswitch.org/support/dist-docs/ovs-fields.7.txt
+
+2. **ct_zone**: A zone is an independent connection tracking context which can
+be set by a ct action.
+A 16-bit ct_zone set by the most recent ct action (by an OpenFlow
+flow on a conntrack entry) can be used as a match field in
+another flow entry.
+
+3. **ct_mark**:
+The 32-bit metadata committed, by an action

[ovs-dev] [PATCH] ofctrl: Remove unused declaration.

2018-01-29 Thread Han Zhou

Signed-off-by: Han Zhou 
---
 ovn/controller/ofctrl.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ovn/controller/ofctrl.h b/ovn/controller/ofctrl.h
index 125f9a4..d53bc68 100644
--- a/ovn/controller/ofctrl.h
+++ b/ovn/controller/ofctrl.h
@@ -54,6 +54,4 @@ void ofctrl_add_flow(struct hmap *desired_flows, uint8_t 
table_id,
  uint16_t priority, uint64_t cookie,
  const struct match *, const struct ofpbuf *ofpacts);
 
-void ofctrl_flow_table_clear(void);
-
 #endif /* ovn/ofctrl.h */
-- 
2.1.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] [PATCH] packet-type-aware.at: Fix check failure

2018-01-29 Thread Yifeng Sun

The test (ptap - recirculate after packet_type change) failed because
function format_odp_key_attr__ outputs src, dst and proto in the case of
OVS_KEY_ATTR_IPV4.

Signed-off-by: Yifeng Sun 
---
 tests/packet-type-aware.at | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/packet-type-aware.at b/tests/packet-type-aware.at
index f43095c60a45..634fa5f6603f 100644
--- a/tests/packet-type-aware.at
+++ b/tests/packet-type-aware.at
@@ -1021,7 +1021,7 @@ AT_CHECK([
 ], [0], [flow-dump from non-dpdk interfaces:
 
recirc_id(0),in_port(p0),packet_type(ns=0,id=0),eth(src=aa:bb:cc:00:00:02,dst=aa:bb:cc:00:00:01),eth_type(0x0800),ipv4(dst=20.0.0.1,proto=47,frag=no),
 packets:3, bytes:378, used:0.0s, actions:tnl_pop(gre_sys)
 
tunnel(src=20.0.0.2,dst=20.0.0.1,flags(-df-csum)),recirc_id(0),in_port(gre_sys),packet_type(ns=1,id=0x8847),mpls(label=999/0x0,tc=0/0,ttl=64/0x0,bos=1/1),
 packets:3, bytes:264, used:0.0s, 
actions:push_eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00),pop_mpls(eth_type=0x800),recirc(0x1)
-tunnel(src=20.0.0.2,dst=20.0.0.1,flags(-df-csum)),recirc_id(0x1),in_port(gre_sys),packet_type(ns=0,id=0),eth(dst=00:00:00:00:00:00),eth_type(0x0800),ipv4(ttl=64,frag=no),
 packets:3, bytes:294, used:0.0s, actions:set(ipv4(ttl=63)),int-br
+tunnel(src=20.0.0.2,dst=20.0.0.1,flags(-df-csum)),recirc_id(0x1),in_port(gre_sys),packet_type(ns=0,id=0),eth(dst=00:00:00:00:00:00),eth_type(0x0800),ipv4(dst=192.168.10.10,proto=1,ttl=64,frag=no),
 packets:3, bytes:294, used:0.0s, actions:set(ipv4(ttl=63)),int-br
 ])
 
 ovs-appctl time/warp 1000
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH] poc: Introduce Proof of Concepts (Package building)

2018-01-29 Thread Ansis Atteka

On 26 January 2018 at 09:13, Gregory Rose  wrote:
> On 1/19/2018 7:55 PM, Ansis Atteka wrote:
>>
>> From: Ansis Atteka 
>>
>> This patch sets up foundations for Proof of Concepts that
>> simply materialize documentation into Ansible instructions
>> executed in virtualized Vagrant environment.
>>
>> This Proof of Concept allows to easily build:
>> 1. *.deb packages on Ubuntu 16.04; AND
>> 2. *.rpm packages on CentOS 7.4.
>> It also sets up DEB and RPM repository over HTTP that can
>> be used to pull these openvswitch packages with apt-get
>> or yum from another host.
>>
>> This particular Proof of Concept is intended to address
>> following use-cases:
>> 1. for new OVS users to see how debian and rpm packages are
>> built;
>> 2. for developers to easily check for packaging build
>> regressions;
>> 3. for developers to easily share their sandbox builds
>> into QE setups (opposed to manually copying binaries);
>> 4. for developers to add other Proof of Concepts
>> that possibly may require full end-to-end integration
>> with other thirdparty projects (e.g. DPI, libvirt, IPsec)
>> and need Open vSwitch packages.
>>
>> Signed-off-by: Ansis Atteka 
>> ---
>>   .gitignore   |   2 +
>>   Documentation/topics/testing.rst |  46 ++
>>   Makefile.am  |   3 ++
>>   poc/builders/Vagrantfile |  35 ++
>>   poc/playbook-centos-builder.yml  | 100
>> +++
>>   poc/playbook-ubuntu-builder.yml  |  66 ++
>>   6 files changed, 252 insertions(+)
>>   create mode 100644 poc/builders/Vagrantfile
>>   create mode 100644 poc/playbook-centos-builder.yml
>>   create mode 100644 poc/playbook-ubuntu-builder.yml
>>
>> diff --git a/.gitignore b/.gitignore
>> index 8019bee41..81faf270d 100644
>> --- a/.gitignore
>> +++ b/.gitignore
>> @@ -16,6 +16,7 @@
>>   *.lib
>>   *.pdb
>>   *.pyc
>> +*.retry
>>   *.so
>>   *.suo
>>   **/*.sym
>> @@ -29,6 +30,7 @@
>>   .dirstamp
>>   .libs
>>   .tmp_versions
>> +.vagrant
>>   .gitattributes
>>   /Makefile
>>   /Makefile.in
>> diff --git a/Documentation/topics/testing.rst
>> b/Documentation/topics/testing.rst
>> index a49336b79..4d93944c7 100644
>> --- a/Documentation/topics/testing.rst
>> +++ b/Documentation/topics/testing.rst
>> @@ -389,3 +389,49 @@ validate the suitability of different vSwitch
>> implementations in a telco
>>   deployment environment. More information can be found on the `OPNFV
>> wiki`_.
>> .. _OPNFV wiki: https://wiki.opnfv.org/display/vsperf/VSperf+Home
>> +
>> +Proof of Concepts
>> +~
>> +
>> +Proof of Concepts are documentation materialized into Ansible recipes
>> +executed in Virtualbox environment orchastrated by Vagrant.  Proof of
>> +Concepts allow developers to create small virtualized setups that
>> +demonstrate how certain Open vSwitch features are intended to work,
>> +especially when integration with thirdparty software is involved.
>
> s/thirdparty/third party
>
>> +
>> +The host where Vagrant runs does not need to have any special software
>> +installed besides vagrant, virtualbox and ansible.
>
>
> Seem it also requires Apache?
Since the Ansible recipes are executed only on the guest (and not
host) then only guests need Apache/httpd installed. Let me know if I
am missing something and you still had to install Apache on host?

Apache is required only to have the packages distributed via repository.

>
>> +
>> +The following Proof of Concepts are supported:
>> +
>> +Builders
>> +
>> +
>> +This Proof of Concept demonsrtates integration with Debian and RPM
>> +packaging tools:
>> +
>> +$ cd ./poc/builders
>> +
>> +# When setting up Proof of Concept for the first time run:
>> +$ vagrant up
>> +
>> +Once that command finished you can get packages from /var/www/html
>> +directory.  Since those hosts are also configured as repositories then
>> +you can add them to /etc/apt/sources.list.d or /etc/yum.repos.d
>> +configuration files.
>> +
>> +# When you have made changes to OVS and want to rebuild packages run:
>> +$ git commit -a
>> +$ vagrant rsync && vagrant provision
>> +
>> +Each packages are rebuilt the Open vSwitch release number increases
>
>
> Maybe "When packages are rebuilt" ?

You are right. Will send V2.

>
> There are some checkpatch warnings about lines too long but I'm not worried
> about those.  There
> are a few other checkpatch warnings about lines with trailing whitespace.
> Those should probably
> be cleaned up.

Agree about whitespace errors, not sure how I missed them...

Will look into truncating lines to 80 characters. There are some lines that
require file-system paths so it may require a little more creativity to truncate
them without sacrificing readability.

>
> The rest of the patch seems fine.  I applied it and ran the simple test case
> you outline
> and it worked as

[ovs-dev] Open vSwitch selected as ACM SOSR Software Systems Award Winner for 2018

2018-01-29 Thread Ben Pfaff

Yesterday, the ACM SOSR awards committee announced that it was granting
Open vSwitch its second annual Software Systems Award, which is given to
recognize the development of a software system that has had a
significant impact on SDN research, implementations, and tools.  The
award comes along with a small cash prize (which will be directed toward
OVS community benefit) and an invited talk at SOSR in Los Angeles on
March 28-29.

There is a one-line announcement of the award selection at
https://conferences.sigcomm.org/sosr/2018/index.html and more
information about the award itself at
https://conferences.sigcomm.org/sosr/2018/award.html.

Thanks to everyone for your support and contributions to Open vSwitch
over the years!  OVS has come to be a major force in industry and
academia and we could have not done it without all the great people in
our community.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] Mempool issue for OVS 2.9

2018-01-29 Thread Jan Scheurich

> -Original Message-
> From: Ilya Maximets [mailto:i.maxim...@samsung.com]
> Sent: Monday, 29 January, 2018 09:35
> To: Jan Scheurich ; Venkatesan Pradeep 
> ; Stokes, Ian
> ; d...@openvswitch.org
> Cc: Kevin Traynor ; Flavio Leitner ; 
> Loftus, Ciara ; Kavanagh, Mark B
> ; Ben Pfaff (b...@ovn.org) ; 
> acon...@redhat.com; disc...@openvswitch.org
> Subject: Re: Mempool issue for OVS 2.9
> 
> On 29.01.2018 11:19, Jan Scheurich wrote:
> > Hi,
> >
> > I'd like to take one step back and look at how much many mbufs we actually 
> > need.
> >
> > Today mbufs are consumed in the following places:
> >
> >  1. Rx queues of **physical** dpdk ports: dev->requested_n_rxq * 
> > dev->requested_rxq_size
> > Note 1: These mbufs are hogged up at all times.
> > Note 2: There is little point in configuring more rx queues per phy 
> > port than there are PMDs to poll them.
> > Note 3: The rx queues of vhostuser ports exist as virtqueues in the 
> > guest and do not hog mbufs.
> >  2. One batch per PMD during processing: #PMD * NETDEV_MAX_BURST
> >  3. One batch per tx queue with time-based tx batching: 
> > dev->requested_n_txq * NETDEV_MAX_BURST
> >  4. Tx queues of **physical** ports: dev->requested_n_txq * expected peak 
> > tx queue fill level
> > Note 1:  The maximum of 2K mbufs per tx queue can only be reached if 
> > the OVS transmit rate exceeds the line rate for a long time.
> This can only happen for large packets and when the traffic originates from 
> VMs on the compute node. This would be a case of under-
> dimensioning and packets would be dropped in any case. Excluding that 
> scenario, a typical peak tx queue fill level would be when all
> PMDs transmit a full batch at the same time: #PMDs * NETDEV_MAX_BURST.
> 
> Above assumption is wrong. Just look at ixgbe driver:
> drivers/net/ixgbe/ixgbe_rxtx.c: tx_xmit_pkts():
> 
>/*
> * Begin scanning the H/W ring for done descriptors when the
> * number of available descriptors drops below tx_free_thresh.  For
> * each done descriptor, free the associated buffer.
> */
>if (txq->nb_tx_free < txq->tx_free_thresh)
>┊   ixgbe_tx_free_bufs(txq);
> 
> The default value for 'tx_free_thresh' is 32. So, if I'll configure number
> of TX descriptors to 4096, driver will start to free mbufs only when it will
> have more than 4063 mbufs inside its TX queue. No matter how frequent calls
> to send() function.

OK, but that doesn't change my general argument. The mbufs hogged in the tx 
side of the phy port driver are coming from all ports (least likely the port 
itself). Considering them in dimensioning the port's private mempool is 
conceptually wrong. In my simplified dimensioning formula below I have already 
assumed full occupancy of the tx queue for phy ports. The second key 
observation is that vhostuser ports do not hog mbufs at all. And vhost zero 
copy doesn't change that.

BTW:, is there any reason why phy drivers should free tx mbufs only when the tx 
ring is close to becoming full? I'd understand that want need to free them in 
batches for performance reasons, but is there no cheap possibility to do this 
earlier?

> 
> > Note 2: Vhostuser ports do not store mbufs in tx queues due to copying 
> > to virtio descriptors
> >
> >
> > For concreteness let us use an example of a typical, rather large OVS 
> > deployment in an NFVI cloud:
> >
> >   * Two cores with 4 PMDs per NUMA socket using HT.
> >   * Two physical ports using RSS over 4 rx queues to enable load-sharing 
> > over the 4 local PMDs and 9 tx queues (8 PMDs plus non PMD)
> >   * 100 vhostuser ports with a varying number of rx and tx queue pairs (128 
> > in total).
> >
> >
> > In the above example deployments this translates into
> >
> >  1. 4 * 2K = 8K mbufs per physical port (16K in total)
> >  2. 8 * 32 = 256 mbufs total
> >  3. (128 +  23*9) * 32 = 4672 mbufs in total
> >  4. 9 * 32 = 288 mbufs per physical port (Adding some safety margin, a 
> > total of 2K mbufs)
> >
> > ---
> > Total : 23K mbufs
> >
> > This is way lower than the size of the earlier shared mempool (256K mbufs), 
> > which explains why we have never observed out of mbuf
> drops in our NFVI deployments. The vswitchd crash that triggered the change 
> to per-port mempools only happened because they tried to
> configure 64 rx and tx queues per physical port for multiple ports. I can’t 
> see any reason for configuring more rx and tx queues than
> polling PMDs, though.
> >
> > The actual consumption of mbufs scales primarily with the number of 
> > physical ports (a, c and d) and only to a much lower degree with
> the number of vhost ports/queues (c).
> >
> > Except for the phy rx queues, all other cases buffer a statistical mix of 
> > mbufs received on all ports.

Re: [ovs-dev] [PATCH v3 3/3] datapath-windows: Optimize conntrack lock implementation.

2018-01-29 Thread Anand Kumar

Hi Alin,

Thanks for the review. I’ll address the warning and send out the patch.

Thanks,
Anand Kumar

On 1/29/18, 5:20 AM, "Alin Serdean"  wrote:

Trimming the patch a bit.

Just one small nit from the static analyzer inlined.

Rest looks good.

Acked-by: Alin Gabriel Serdean 

<--8<-->
 /*
@@ -124,12 +135,9 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)  VOID
 OvsCleanupConntrack(VOID)
 {
-LOCK_STATE_EX lockState, lockStateNat;
-NdisAcquireRWLockWrite(ovsConntrackLockObj, , 0);
+LOCK_STATE_EX lockStateNat;
 ctThreadCtx.exit = 1;
 KeSetEvent(, 0, FALSE);
-NdisReleaseRWLock(ovsConntrackLockObj, );
-
 KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
   KernelMode, FALSE, NULL);
 ObDereferenceObject(ctThreadCtx.threadObject);
@@ -142,8 +150,14 @@ OvsCleanupConntrack(VOID)
 ovsConntrackTable = NULL;
 }
 
-NdisFreeRWLock(ovsConntrackLockObj);
-ovsConntrackLockObj = NULL;
+for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
+if (ovsCtBucketLock[i] != NULL) {
[Alin Serdean] datapath-windows\ovsext\conntrack.c(154): warning C6001: 
Using uninitialized memory '*ovsCtBucketLock'.
+NdisFreeRWLock(ovsCtBucketLock[i]);
+}
+}
+OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG);
+ovsCtBucketLock = NULL;
+
 NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0);
 OvsNatCleanup();
 NdisReleaseRWLock(ovsCtNatLockObj, ); @@ -179,11 +193,20 
@@ OvsCtUpdateFlowKey(struct OvsFlowKey *key,
 }
 }
 
<--8<-->




___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH v1 0/5] datapath: enable NSH support in kernel compat mode

2018-01-29 Thread Gregory Rose


On 1/10/2018 11:53 PM, Yi Yang wrote:

This patch series is to backport NSH support patches in Linux net-next tree
to OVS in order that it can support NSH in kernel compat mode.

Yi Yang (5):
   datapath: ether: add NSH ethertype
   datapath: vxlan: factor out VXLAN-GPE next protocol
   datapath: net: add NSH header structures and helpers
   datapath: nsh: add GSO support
   datapath: enable NSH support

  NEWS  |   1 +
  datapath/Modules.mk   |   4 +-
  datapath/actions.c| 116 
  datapath/datapath.c   |   4 +
  datapath/flow.c   |  51 
  datapath/flow.h   |   7 +
  datapath/flow_netlink.c   | 343 +-
  datapath/flow_netlink.h   |   5 +
  datapath/linux/Modules.mk |   2 +
  datapath/linux/compat/include/linux/if_ether.h|   4 +
  datapath/linux/compat/include/linux/openvswitch.h |   6 +-
  datapath/linux/compat/include/net/nsh.h   | 313 
  datapath/linux/compat/include/net/tun_proto.h |  49 
  datapath/linux/compat/include/net/vxlan.h |   6 -
  datapath/linux/compat/vxlan.c |  32 +-
  datapath/nsh.c| 142 +
  16 files changed, 1048 insertions(+), 37 deletions(-)
  create mode 100644 datapath/linux/compat/include/net/nsh.h
  create mode 100644 datapath/linux/compat/include/net/tun_proto.h
  create mode 100644 datapath/nsh.c



Hi Yi,

My apologies for the delay in reviewing this series.

I've finished up my review and I think it mostly looks pretty good but I 
did find an issue compiling on a 3.10.107 kernel build:


CC [M] 
/home/travis/build/gvrose8192/ovs-experimental/datapath/linux/vport-netdev.o
/home/travis/build/gvrose8192/ovs-experimental/datapath/linux/nsh.c:108:17: 
error: undefined identifier 'skb_gso_error_unwind'

CC [M] /home/travis/build/gvrose8192/ovs-experimental/datapath/linux/nsh.o
/home/travis/build/gvrose8192/ovs-experimental/datapath/linux/nsh.c: In 
function ‘nsh_gso_segment’:
/home/travis/build/gvrose8192/ovs-experimental/datapath/linux/nsh.c:108:3: 
error: implicit declaration of function ‘skb_gso_error_unwind’ 
[-Werror=implicit-function-declaration]

skb_gso_error_unwind(skb, htons(ETH_P_NSH), nsh_len,
^
cc1: some warnings being treated as errors
make[3]: *** 
[/home/travis/build/gvrose8192/ovs-experimental/datapath/linux/nsh.o] 
Error 1

make[3]: *** Waiting for unfinished jobs
make[2]: *** 
[_module_/home/travis/build/gvrose8192/ovs-experimental/datapath/linux] 
Error 2
make[2]: Leaving directory 
`/home/travis/build/gvrose8192/ovs-experimental/linux-3.10.107'

make[1]: *** [default] Error 2
make[1]: Leaving directory 
`/home/travis/build/gvrose8192/ovs-experimental/datapath/linux'

make: *** [all-recursive] Error 1

So we'll need to fix that up and I also think the patches will need to 
be rebased to current master.  That second part is my fault... so sorry 
again about that.


One other thing, I ran this through our standard 'make check and make 
check-kmod' tests and everything was fine so the patches don't seem 
break anything.  I'm still concerned though that the test coverage 
probably didn't hit any parts of your code.  I'm wondering if there is 
some way I can test the code path and get some test coverage there.  
Could you write up a self test for the tests/system-traffic.at kernel 
test?  Of if that's not practical is there some other way I could test 
this code?


Thanks,

- Greg
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] OVS DPDK: dpdk_merge pull request for master

2018-01-29 Thread Stokes, Ian

> -Original Message-
> From: Ben Pfaff [mailto:b...@ovn.org]
> Sent: Saturday, January 27, 2018 5:35 PM
> To: Stokes, Ian 
> Cc: ovs-dev@openvswitch.org
> Subject: Re: OVS DPDK: dpdk_merge pull request for master
> 
> Thanks for all the pull requests.  I merged all of these into their
> respective branches.

Thanks Ben.

Regards
Ian
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] [PATCH v4 3/3] datapath-windows: Optimize conntrack lock implementation.

2018-01-29 Thread Anand Kumar

Currently, there is one global lock for conntrack module, which protects
conntrack entries and conntrack table. All the NAT operations are
performed holding this lock.

This becomes inefficient, as the number of conntrack entries grow.
With new implementation, we will have two PNDIS_RW_LOCK_EX locks in
conntrack.

1. ovsCtBucketLock - one rw lock per bucket of the conntrack table,
which is shared by all the ct entries that belong to the same bucket.
2. lock -  a rw lock in OVS_CT_ENTRY structure that protects the members
of conntrack entry.

Also, OVS_CT_ENTRY structure will have a lock reference(bucketLockRef)
to the corresponding OvsCtBucketLock of conntrack table.
We need this reference to retrieve ovsCtBucketLock from ct entry
for delete operation.

Signed-off-by: Anand Kumar 
---
v1->v2: Address potential memory leak in conntrack initialization.
v2->v3: Fix invalid memory access after deleting ct entry.
v3->v4: Address warning "uninitialized memory"
---
 datapath-windows/ovsext/Conntrack-nat.c |   6 +
 datapath-windows/ovsext/Conntrack.c | 233 
 datapath-windows/ovsext/Conntrack.h |   3 +
 3 files changed, 157 insertions(+), 85 deletions(-)

diff --git a/datapath-windows/ovsext/Conntrack-nat.c 
b/datapath-windows/ovsext/Conntrack-nat.c
index 7975770..316c946 100644
--- a/datapath-windows/ovsext/Conntrack-nat.c
+++ b/datapath-windows/ovsext/Conntrack-nat.c
@@ -167,12 +167,16 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
 {
 UINT32 natFlag;
 const struct ct_endpoint* endpoint;
+LOCK_STATE_EX lockState;
+/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */
+NdisAcquireRWLockRead(entry->lock, , 0);
 /* When it is NAT, only entry->rev_key contains NATTED address;
When it is unNAT, only entry->key contains the UNNATTED address;*/
 const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key;
 BOOLEAN isSrcNat;
 
 if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {
+NdisReleaseRWLock(entry->lock, );
 return;
 }
 isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) ||
@@ -202,6 +206,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
 }
 } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){
 // XXX: IPv6 packet not supported yet.
+NdisReleaseRWLock(entry->lock, );
 return;
 }
 if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {
@@ -215,6 +220,7 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
 }
 }
 }
+NdisReleaseRWLock(entry->lock, );
 }
 
 
diff --git a/datapath-windows/ovsext/Conntrack.c 
b/datapath-windows/ovsext/Conntrack.c
index 7d56a50..c90c000 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -31,7 +31,7 @@
 KSTART_ROUTINE OvsConntrackEntryCleaner;
 static PLIST_ENTRY ovsConntrackTable;
 static OVS_CT_THREAD_CTX ctThreadCtx;
-static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
+static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL;
 static PNDIS_RW_LOCK_EX ovsCtNatLockObj;
 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
 static LONG ctTotalEntries;
@@ -49,20 +49,14 @@ MapNlToCtTuple(POVS_MESSAGE msgIn, PNL_ATTR attr,
 NTSTATUS
 OvsInitConntrack(POVS_SWITCH_CONTEXT context)
 {
-NTSTATUS status;
+NTSTATUS status = STATUS_SUCCESS;
 HANDLE threadHandle = NULL;
 ctTotalEntries = 0;
+UINT32 numBucketLocks = CT_HASH_TABLE_SIZE;
 
 /* Init the sync-lock */
-ovsConntrackLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
-if (ovsConntrackLockObj == NULL) {
-return STATUS_INSUFFICIENT_RESOURCES;
-}
-
 ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
 if (ovsCtNatLockObj == NULL) {
-NdisFreeRWLock(ovsConntrackLockObj);
-ovsConntrackLockObj = NULL;
 return STATUS_INSUFFICIENT_RESOURCES;
 }
 
@@ -71,15 +65,27 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
  * CT_HASH_TABLE_SIZE,
  OVS_CT_POOL_TAG);
 if (ovsConntrackTable == NULL) {
-NdisFreeRWLock(ovsConntrackLockObj);
-ovsConntrackLockObj = NULL;
 NdisFreeRWLock(ovsCtNatLockObj);
 ovsCtNatLockObj = NULL;
 return STATUS_INSUFFICIENT_RESOURCES;
 }
 
-for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
+ovsCtBucketLock = OvsAllocateMemoryWithTag(sizeof(PNDIS_RW_LOCK_EX)
+   * CT_HASH_TABLE_SIZE,
+   OVS_CT_POOL_TAG);
+if (ovsCtBucketLock == NULL) {
+status = STATUS_INSUFFICIENT_RESOURCES;
+goto freeTable;
+}
+
+for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
 InitializeListHead([i]);
+ovsCtBucketLock[i] = NdisAllocateRWLock(context->NdisFilterHandle);
+if (ovsCtBucketLock[i] == NULL) {
+status =

[ovs-dev] FW: OVS-DPDK full offload RFC proposal discussion

2018-01-29 Thread Chandran, Sugesh

Initial discussion on the OVS-DPDK RFC approach.
We will be discussing about the following points in the meeting today.

Regards
_Sugesh

From: Finn Christensen [mailto:f...@napatech.com]
Sent: Friday, January 26, 2018 1:41 PM
To: Chandran, Sugesh ; Loftus, Ciara 
; Doherty, Declan 
Subject: RE: OVS-DPDK full offload RFC proposal discussion

Thanks Sugesh,

See my comments below.

I'll be on the conf call on Monday.

Regards,
Finn

From: Chandran, Sugesh [mailto:sugesh.chand...@intel.com]
Sent: 25. januar 2018 21:33
To: Finn Christensen >; Loftus, 
Ciara >; Doherty, Declan 
>
Subject: RE: OVS-DPDK full offload RFC proposal discussion

Hi Finn,

Once again thank you for putting these up.
Please find my comments inline below.

Regards
_Sugesh

From: Finn Christensen [mailto:f...@napatech.com]
Sent: Tuesday, January 23, 2018 11:42 AM
To: Chandran, Sugesh 
>; Loftus, Ciara 
>; Doherty, Declan 
>
Subject: OVS-DPDK full offload RFC proposal discussion

Hi Sugesh,

My apology for not sending this earlier.
As discussed in meeting, I here send you a semi-detailed description of how we 
see the next step towards OVS-DPDK hw full offload.
Please add all the Intel people who wants to participate in this email thread.

Proposal: OVS changes for full offload, as an addition to the partial offload 
currently proposed.

Generally let the hw-offloaded flow match+action functionality be a slave of 
the megaflow cache. Let it be seamlessly offloaded when applicable (when all 
flow actions are in the range of supported actions implemented). Otherwise 
failover to partial offload, and if no success, normal SW switching will be 
used.

1)  Handle OUTPUT action:
Map odp_port_no to DPDK port_id, so that an OVS_ACTION_ATTR_OUTPUT may be 
converted into a netdev_dpdk device known port_id. If the port is not found in 
dpdk_list, or the specific dpdk device does not handle hw-offloading, do 
partial-offload (don't use actions besides the partial-offload added MARK and 
RSS).
Multiple OUTPUT actions may be specified (in case of flooding), then don't full 
offload.
a.  Register ODP port number in netdev_dpdk on DPDK_DEV_ETH instances (put 
odp_port_no in netdev_dpdk structure).
b.  In netdev_dpdk_add_rte_flow_offload() function, catch 
OVS_ACTION_ATTR_OUTPUT and find the dpdk_dev from dpdk_list of which matches 
its odp_port_no. Then setup a RTE_FLOW_ACTION_TYPE_ETHDEV_PORT containing DPDK 
port_id for target port.
[Sugesh] Yes, that make sense. Do you think the representor port can also be 
defined as normal DPDK ports? We are experiencing some difficulties when trying 
to overload the same DPDK port for representor ports/accelerated ports. More 
comments below.
[Finn] Yes. But you are right, if you need special OVSDB settings to configure 
a vport, then you will need a new DPDK type. However, initially, we do not 
necessarily need this. I do not see this as a huge issue, and if we need it I 
think we can add that also to the patchset.

2)  Handle statistics:
Separate registration/mapping of partial offloaded flows and full offloaded 
flows and query statistics from full offloaded flows with a specific interval, 
updating the userspace datapath megaflow cache with these statistics. Done 
using rte_flow_query. This includes packet count (hits), bytes and seen 
tcp_flags.
a.  When a full offloaded flow has been successfully added, then add that 
rte_flow to a separate hw-offload map, containing only full-offloaded-flows.
[Sugesh] Yes. We also following the same method
b.  Add the RTE_FLOW_ACTION_TYPE_COUNT to the full-offloaded flows, so that 
statistics may be retrieved later, for that rte_flow.
[Sugesh] Make sense.
c.  Add a timed task to the hw-offload-thread, so that all full-offloaded 
flows can be stat-queried using rte_flow_query() function. Retreived with an 
interval of maybe 1 or 2 seconds. Call dp_netdev_flow_used with result.
[Sugesh]Ok, so we might need to use the stats in revalidator to expire the 
flows?
Just a note, some hardware may able to evict the flows by itself after the 
idle-timeout. The rte_flow_query logic should account that as well when polling 
the stats.
[Finn] Yes, good point. Let the flow_query also indicate if a flow has been 
canceled, and remove it accordingly in flow map.

d.  tcp_flags should be retrieved by rte_flow_query() also. This will need 
an extension to the current rte_flow_query_count structure.
[Sugesh] Ok
e.  Use the flow_get function in the DPDK_FLOW_OFFLOAD_API to implement the 
rte_flow_query call and convert format to dpif_flow_stats.
[Sugesh]

[ovs-dev] test

2018-01-29 Thread Ben Pfaff

I've heard that there are problems with the mailing list this morning,
so here's a test email.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] [PATCH v4 1/3] datapath-windows: Refactor conntrack code.

2018-01-29 Thread Anand Kumar

Some of the functions and  code are refactored
so that new conntrack lock can be implemented

Signed-off-by: Anand Kumar 
Acked-by: Alin Gabriel Serdean 
---
 datapath-windows/ovsext/Conntrack-nat.c |  11 +-
 datapath-windows/ovsext/Conntrack.c | 174 ++--
 datapath-windows/ovsext/Conntrack.h |   4 -
 3 files changed, 103 insertions(+), 86 deletions(-)

diff --git a/datapath-windows/ovsext/Conntrack-nat.c 
b/datapath-windows/ovsext/Conntrack-nat.c
index c778f12..7975770 100644
--- a/datapath-windows/ovsext/Conntrack-nat.c
+++ b/datapath-windows/ovsext/Conntrack-nat.c
@@ -93,26 +93,23 @@ NTSTATUS OvsNatInit()
 sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
 OVS_CT_POOL_TAG);
 if (ovsNatTable == NULL) {
-goto failNoMem;
+return STATUS_INSUFFICIENT_RESOURCES;
 }
 
 ovsUnNatTable = OvsAllocateMemoryWithTag(
 sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
 OVS_CT_POOL_TAG);
 if (ovsUnNatTable == NULL) {
-goto freeNatTable;
+OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
+return STATUS_INSUFFICIENT_RESOURCES;
 }
 
 for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
 InitializeListHead([i]);
 InitializeListHead([i]);
 }
-return STATUS_SUCCESS;
 
-freeNatTable:
-OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
-failNoMem:
-return STATUS_INSUFFICIENT_RESOURCES;
+return STATUS_SUCCESS;
 }
 
 /*
diff --git a/datapath-windows/ovsext/Conntrack.c 
b/datapath-windows/ovsext/Conntrack.c
index 169ec4f..3cde836 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -33,7 +33,7 @@ static PLIST_ENTRY ovsConntrackTable;
 static OVS_CT_THREAD_CTX ctThreadCtx;
 static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
-static UINT64 ctTotalEntries;
+static LONG ctTotalEntries;
 
 static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple);
 static __inline NDIS_STATUS
@@ -212,7 +212,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, OvsConntrackKeyLookupCtx 
*ctx,
 InsertHeadList([ctx->hash & CT_HASH_TABLE_MASK],
>link);
 
-ctTotalEntries++;
+InterlockedIncrement((LONG volatile *));
 return TRUE;
 }
 
@@ -235,11 +235,6 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
 *entryCreated = FALSE;
 state |= OVS_CS_F_NEW;
 
-parentEntry = OvsCtRelatedLookup(ctx->key, currentTime);
-if (parentEntry != NULL) {
-state |= OVS_CS_F_RELATED;
-}
-
 switch (ipProto) {
 case IPPROTO_TCP:
 {
@@ -283,6 +278,11 @@ OvsCtEntryCreate(OvsForwardingContext *fwdCtx,
 break;
 }
 
+parentEntry = OvsCtRelatedLookup(ctx->key, currentTime);
+if (parentEntry != NULL && state != OVS_CS_F_INVALID) {
+state |= OVS_CS_F_RELATED;
+}
+
 if (state != OVS_CS_F_INVALID && commit) {
 if (entry) {
 entry->parent = parentEntry;
@@ -315,6 +315,7 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
  BOOLEAN reply,
  UINT64 now)
 {
+CT_UPDATE_RES status;
 switch (ipProto) {
 case IPPROTO_TCP:
 {
@@ -322,32 +323,23 @@ OvsCtUpdateEntry(OVS_CT_ENTRY* entry,
 const TCPHdr *tcp;
 tcp = OvsGetTcp(nbl, l4Offset, );
 if (!tcp) {
-return CT_UPDATE_INVALID;
+status =  CT_UPDATE_INVALID;
+break;
 }
-return OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now);
+status =  OvsConntrackUpdateTcpEntry(entry, tcp, nbl, reply, now);
+break;
 }
 case IPPROTO_ICMP:
-return OvsConntrackUpdateIcmpEntry(entry, reply, now);
+status =  OvsConntrackUpdateIcmpEntry(entry, reply, now);
+break;
 case IPPROTO_UDP:
-return OvsConntrackUpdateOtherEntry(entry, reply, now);
+status =  OvsConntrackUpdateOtherEntry(entry, reply, now);
+break;
 default:
-return CT_UPDATE_INVALID;
-}
-}
-
-static __inline VOID
-OvsCtEntryDelete(POVS_CT_ENTRY entry)
-{
-if (entry == NULL) {
-return;
-}
-if (entry->natInfo.natAction) {
-OvsNatDeleteKey(>key);
+status =  CT_UPDATE_INVALID;
+break;
 }
-OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
-RemoveEntryList(>link);
-OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
-ctTotalEntries--;
+return status;
 }
 
 static __inline BOOLEAN
@@ -358,6 +350,24 @@ OvsCtEntryExpired(POVS_CT_ENTRY entry)
 return entry->expiration < currentTime;
 }
 
+static __inline VOID
+OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)
+{
+if (entry == NULL) {
+return;
+}
+if (forceDelete || OvsCtEntryExpired(entry)) {
+if (entry->natInfo.natAction) {
+OvsNatDeleteKey(>key);
+}
+OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
+RemoveEntryList(>link);
+

Re: [ovs-dev] [PATCH v2 0/4] Check size of packets before sending

2018-01-29 Thread David Miller

From: Daniel Axtens 
Date: Mon, 29 Jan 2018 14:20:58 +1100

> OK, so how about:
> 
>  - first, a series that introduces skb_gso_validate_mac_len and uses it
>in bnx2x. This should be backportable without difficulty.
> 
>  - then, a series that wires skb_gso_validate_mac_len into the core -
>validate_xmit_skb for instance, and reverts the bnx2x fix. This would
>not need to be backported.
> 
> If that's an approach we can agree on, I am ready to send it when
> net-next opens again.

Please send the bnx2x specific fix now, thank you.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH v3 3/3] datapath-windows: Optimize conntrack lock implementation.

2018-01-29 Thread Alin Serdean

Trimming the patch a bit.

Just one small nit from the static analyzer inlined.

Rest looks good.

Acked-by: Alin Gabriel Serdean 
<--8<-->
 /*
@@ -124,12 +135,9 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)  VOID
 OvsCleanupConntrack(VOID)
 {
-LOCK_STATE_EX lockState, lockStateNat;
-NdisAcquireRWLockWrite(ovsConntrackLockObj, , 0);
+LOCK_STATE_EX lockStateNat;
 ctThreadCtx.exit = 1;
 KeSetEvent(, 0, FALSE);
-NdisReleaseRWLock(ovsConntrackLockObj, );
-
 KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
   KernelMode, FALSE, NULL);
 ObDereferenceObject(ctThreadCtx.threadObject);
@@ -142,8 +150,14 @@ OvsCleanupConntrack(VOID)
 ovsConntrackTable = NULL;
 }
 
-NdisFreeRWLock(ovsConntrackLockObj);
-ovsConntrackLockObj = NULL;
+for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
+if (ovsCtBucketLock[i] != NULL) {
[Alin Serdean] datapath-windows\ovsext\conntrack.c(154): warning C6001: Using 
uninitialized memory '*ovsCtBucketLock'.
+NdisFreeRWLock(ovsCtBucketLock[i]);
+}
+}
+OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG);
+ovsCtBucketLock = NULL;
+
 NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0);
 OvsNatCleanup();
 NdisReleaseRWLock(ovsCtNatLockObj, ); @@ -179,11 +193,20 @@ 
OvsCtUpdateFlowKey(struct OvsFlowKey *key,
 }
 }
 
<--8<-->
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] [PATCH v4 0/3] datapath-windows: New lock implementation in conntrack

2018-01-29 Thread Anand Kumar

This patch series replaces existing one RW lock implemenation in
conntrack with two RW locks in conntrack and one RW lock in NAT.
---
v1->v2:
 - Patch 3, address review comments
v2->v3:
 - Patch 3, fix invalid memory access after deleting ct entry
v3->v4:
 - Patch 3, address static analyzer warning message
---
Anand Kumar (3):
  datapath-windows: Refactor conntrack code.
  datapath-windows: Add a global level RW lock for NAT
  datapath-windows: Optimize conntrack lock implementation.

 datapath-windows/ovsext/Conntrack-nat.c |  17 +-
 datapath-windows/ovsext/Conntrack.c | 413 
 datapath-windows/ovsext/Conntrack.h |   7 +-
 3 files changed, 279 insertions(+), 158 deletions(-)

-- 
2.9.3.windows.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] Hardware Acceleration in OVS-DPDK

2018-01-29 Thread Chandran, Sugesh

Thank you all for attending today call.
I have updated the MOM in the following document.

https://docs.google.com/document/d/1KeQB5NIUph721uuk1f1wMy4QXwSWxlzDaumd_bWX6YI/edit?usp=sharing


Regards
_Sugesh


  -Original Appointment-
  From:
  Sent: None
  To: Chandran, Sugesh; d...@openvswitch.org; Darrell Ball; Simon Horman; 
Stokes, Ian; Yuanhan Liu; 'Finn Christensen'; 'jiaquan song'; 
'pieter.jansenvanvuu...@netronome.com'; Doherty, Declan; 
'frikkie.scho...@netronome.com'; Bodireddy, Bhanuprakash; Keane, Lorna; Giller, 
Robin; Loftus, Ciara; Awal, Mohammad Abdul; Eelco Chaudron 
; NPG SW Data Plane Virtual Switching and FPGA
  Subject: Hardware Acceleration in OVS-DPDK
  When: Monday, January 29, 2018 11:00 AM-12:00 PM (UTC+00:00) Dublin, 
Edinburgh, Lisbon, London.
  Where: Skype Meeting


  Hi All,

  As discussed in the last hardware acceleration meeting, I am setting up 
the follow up call to discuss about submitting a RFC patch series on OVS-DPDK 
full hardware acceleration solution.
  This time I am scheduling the call  at PRC time zone friendly.

  Agenda for the Call
1)  DPDK changes that Intel is working on to support Full offload.(RTE_FLOW 
changes, port-rep)
2)  Proposed OVS changes for the full acceleration. How it can leverage the 
proposed DPDK APIs. Also look at how these changes will work with hardware from 
different vendors
3)  How the proposal is going to interfere the existing partial offload 
solution.


  MOM of last call can be found at following link. Minutes will be captures 
in the same doc.

  
https://docs.google.com/document/d/1KeQB5NIUph721uuk1f1wMy4QXwSWxlzDaumd_bWX6YI/edit?usp=sharing


  
.
  --> Join Skype Meeting
Trouble Joining? Try Skype Web 
App
  Join by phone
  +1(916)356-2663 (or your local bridge access #) Choose bridge 
5.
 (Global) English (United States)
  Find a local number

  Conference ID: 241032418
   Forgot your dial-in PIN? 
|Help

  [!OC([1033])!]
  
.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH v5 1/5] dpif-netdev: associate flow with a mark id

2018-01-29 Thread Finn Christensen

Hi Yuanhan,

This will not break our PMD. I think PMDs should be able to handle rss_conf == 
NULL and then failover to default or initially set rss_conf.

Finn

>-Original Message-
>From: Yuanhan Liu [mailto:y...@fridaylinux.org]
>Sent: 29. januar 2018 07:59
>To: Stokes, Ian 
>Cc: d...@openvswitch.org; Finn Christensen ; Darrell Ball
>; Chandran, Sugesh ;
>Simon Horman 
>Subject: Re: [PATCH v5 1/5] dpif-netdev: associate flow with a mark id
>
>On Fri, Jan 26, 2018 at 04:19:30PM +0800, Yuanhan Liu wrote:
>> > > +static bool
>> > > +flow_mark_has_no_ref(uint32_t mark) {
>> > > +struct dp_netdev_flow *flow;
>> > > +
>> >
>> > Maybe I'm missing something below, but I expected a hash to be
>computed for mark before being called with CMAP_FOR_EACH_WITH_HASH?
>>
>> I treat "mark" as the hash, as it's with the same type of "hash" and
>> it's uniq. But you are probably right, it might be better to get the
>> hash by "hash_int". Will fix it.
>
>Oops, I forgot to do the coressponding change for mark_to_flow find. Thus,
>the partial offload is completely skiped, as retrieving the flow from mark
>would always fail (returing NULL).
>
>The reason I missed it, while I was testing v6, is I found the flow creation is
>failed. I don't really change anything between v5 and v6 and when I was back
>to v5, I also met same issue. I then thought it might be introduced by the
>OFED or firmware change I have done (for switching to other projects).
>I then thought it's something I could figure out laterly. Thus, v6 was sent out
>basically just with a build test.
>
>I then figured out today that the flow creation failure is introduced by a MLX5
>PMD driver change in DPDK v17.11. It looks like a bug to me. And there are 2
>solutions for that:
>
>- fix it in MLX5 PMD (if it's a bug); I was talking to the author made
>  such change.
>- set rss_conf to NULL, which will let DPDK to follow the one OVS-DPDK
>  has set in the beginning.
>
>I chose 2, sicne option 1 won't change the fact that it won't work with DPDK
>v17.11.
>
>And Finn, I probably need your help to verify that option 2 won't break Napa
>PMD drivers.
>
>I will send v7 soon. Please help review.
>
>Thanks.
>
>   --yliu
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] OVS 2.9 Mempool Issue

2018-01-29 Thread Stokes, Ian

Hi Jan/Pradeep,

Thanks for flagging this issue, I hadn't considered it before. I agree that 
this needs to be investigated further.

If it's of use I have reverted the per port mempool patches manually for 
testing with the head of master so that both approaches can be checked against 
these type of bugs.

https://github.com/istokes/ovs/tree/mempool_revert

I can start updating documentation also record these issues if they are not 
fixed in time for the release.

Thanks
Ian

From: Jan Scheurich [mailto:jan.scheur...@ericsson.com]
Sent: Monday, January 29, 2018 1:58 PM
To: Venkatesan Pradeep ; Stokes, Ian 

Cc: Ilya Maximets ; d...@openvswitch.org
Subject: RE: OVS 2.9 Mempool Issue

Hi Pradeep,

Yes, this is for sure a bug in the current OVS mempool handling (and possibly 
in many other DPDK applications). In the past it was perhaps not so critical 
because shared mempools rarely got released, but with per port mempools it 
becomes a real threat. And as you say, we do not have any test experience with 
long-running OVS-DPDK and frequent creation/deletion of vhostuser ports.

@Ian: This is also something that should be noted as a known bug in OVS 2.9.0 
release documentation if we can't fix it in time for the release.

Regards, Jan


From: Venkatesan Pradeep
Sent: Monday, 29 January, 2018 14:42
To: Jan Scheurich 
>; Stokes, Ian 
>
Subject: RE: OVS 2.9 Mempool Issue

Hi Ian,

There is one more mempool issue that needs to be evaluated. I had brought it up 
in my post yesterday and Ilya also confirmed it today. When a mempool is 
deleted (as a consequence of port deletion or configuration change) there is no 
attempt made to free the mbufs sitting on other port's tx queues. When they get 
freed up eventually by the driver (in a lazy fashion depending on driver 
specific threshold) there is potential for corruption/crash. This problem 
exists even with the shared mempool model but this situation would occur only 
when the last reference to the mempool goes away. For instance, if all ports 
share the same MTU the situation will never occur.

In some very limited tests that I did, mempool deletion did not cause any 
obvious problems but it was not an exhaustive test  To verify the problem 
actually exists, I setup a magic number in the mempool, cleared it when the 
mempool is deleted and validated the magic number when an mbuf is freed. Sure 
enough, the validation check failed when the mempool was deleted.

Given that vhostuser ports keep getting added and deleted as part of VM 
provisioning it is quite likely that this situation will be hit often. Perhaps 
there are some safeguards (inbuilt or incidental) that prevented the 
corruption/crash but I think it is important to understand if we can live 
without addressing the issue.

Thanks,

Pradeep

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

[ovs-dev] FW: Hallo

2018-01-29 Thread Wang, Hongbin



From: Wang, Hongbin
Sent: 29 January 2018 10:48
To: Wang, Hongbin
Subject: Hallo

  Brauchen Sie einen Kredit?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH v2 0/4] Check size of packets before sending

2018-01-29 Thread Daniel Axtens

Eric Dumazet  writes:

> On Fri, 2018-01-26 at 00:44 +1100, Daniel Axtens wrote:
>> Hi Eric,
>> 
>> > May I ask which tree are you targeting ?
>> > 
>> > ( Documentation/networking/netdev-FAQ.txt )
>> 
>> I have been targeting net-next, but I haven't pulled for about two
>> weeks. I will rebase and if there are conflicts I will resend early next
>> week.
>> 
>> > Anything touching GSO is very risky and should target net-next,
>> > especially considering 4.15 is released this week end.
>> > 
>> > Are we really willing to backport this intrusive series in stable
>> > trees, or do we have a smaller fix for bnx2x ?
>> 
>> I do actually have a smaller fix for bnx2x, although it would need more work:
>> https://patchwork.ozlabs.org/patch/859410/
>> 
>> It leaves open the possibility of too-large packets causing issues on
>> other drivers. DaveM wasn't a fan: 
>> https://patchwork.ozlabs.org/patch/859410/#1839429
>
> Yes, I know he prefers a generic solution, but I am pragmatic here.
> Old kernels are very far from current GSO stack in net-next.
>
> Backporting all the dependencies is going to be very boring/risky.

OK, so how about:

 - first, a series that introduces skb_gso_validate_mac_len and uses it
   in bnx2x. This should be backportable without difficulty.

 - then, a series that wires skb_gso_validate_mac_len into the core -
   validate_xmit_skb for instance, and reverts the bnx2x fix. This would
   not need to be backported.

If that's an approach we can agree on, I am ready to send it when
net-next opens again.

Regards,
Daniel
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] OVS 2.9 Mempool Issue

2018-01-29 Thread Jan Scheurich

Hi Pradeep,

Yes, this is for sure a bug in the current OVS mempool handling (and possibly 
in many other DPDK applications). In the past it was perhaps not so critical 
because shared mempools rarely got released, but with per port mempools it 
becomes a real threat. And as you say, we do not have any test experience with 
long-running OVS-DPDK and frequent creation/deletion of vhostuser ports.

@Ian: This is also something that should be noted as a known bug in OVS 2.9.0 
release documentation if we can't fix it in time for the release.

Regards, Jan


From: Venkatesan Pradeep
Sent: Monday, 29 January, 2018 14:42
To: Jan Scheurich ; Stokes, Ian 

Subject: RE: OVS 2.9 Mempool Issue

Hi Ian,

There is one more mempool issue that needs to be evaluated. I had brought it up 
in my post yesterday and Ilya also confirmed it today. When a mempool is 
deleted (as a consequence of port deletion or configuration change) there is no 
attempt made to free the mbufs sitting on other port's tx queues. When they get 
freed up eventually by the driver (in a lazy fashion depending on driver 
specific threshold) there is potential for corruption/crash. This problem 
exists even with the shared mempool model but this situation would occur only 
when the last reference to the mempool goes away. For instance, if all ports 
share the same MTU the situation will never occur.

In some very limited tests that I did, mempool deletion did not cause any 
obvious problems but it was not an exhaustive test  To verify the problem 
actually exists, I setup a magic number in the mempool, cleared it when the 
mempool is deleted and validated the magic number when an mbuf is freed. Sure 
enough, the validation check failed when the mempool was deleted.

Given that vhostuser ports keep getting added and deleted as part of VM 
provisioning it is quite likely that this situation will be hit often. Perhaps 
there are some safeguards (inbuilt or incidental) that prevented the 
corruption/crash but I think it is important to understand if we can live 
without addressing the issue.

Thanks,

Pradeep

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] Mempool issue for OVS 2.9

2018-01-29 Thread Jan Scheurich

Hi,

I'd like to take one step back and look at how much many mbufs we actually need.

Today mbufs are consumed in the following places:
a)  Rx queues of *physical* dpdk ports: dev->requested_n_rxq * 
dev->requested_rxq_size
   Note 1: These mbufs are hogged up at all times.
   Note 2: There is little point in configuring more rx queues per phy port 
than there are PMDs to poll them.
   Note 3: The rx queues of vhostuser ports exist as virtqueues in the guest 
and do not hog mbufs.
b)  One batch per PMD during processing: #PMD * NETDEV_MAX_BURST
c)  One batch per tx queue with time-based tx batching: 
dev->requested_n_txq * NETDEV_MAX_BURST
d)  Tx queues of *physical* ports: dev->requested_n_txq * expected peak tx 
queue fill level
Note 1:  The maximum of 2K mbufs per tx queue can only be reached if the OVS 
transmit rate exceeds the line rate for a long time. This can only happen for 
large packets and when the traffic originates from VMs on the compute node. 
This would be a case of under-dimensioning and packets would be dropped in any 
case. Excluding that scenario, a typical peak tx queue fill level would be when 
all PMDs transmit a full batch at the same time: #PMDs * NETDEV_MAX_BURST.
Note 2: Vhostuser ports do not store mbufs in tx queues due to copying to 
virtio descriptors

For concreteness let us use an example of a typical, rather large OVS 
deployment in an NFVI cloud:
*   Two cores with 4 PMDs per NUMA socket using HT.
*   Two physical ports using RSS over 4 rx queues to enable load-sharing 
over the 4 local PMDs and 9 tx queues (8 PMDs plus non PMD)
*   100 vhostuser ports with a varying number of rx and tx queue pairs (128 
in total).

In the above example deployments this translates into
a)  4 * 2K = 8K mbufs per physical port (16K in total)
b)  8 * 32 = 256 mbufs total
c)  (128 +  23*9) * 32 = 4672 mbufs in total
d)  9 * 32 = 288 mbufs per physical port (Adding some safety margin, a 
total of 2K mbufs)
---
Total : 23K mbufs

This is way lower than the size of the earlier shared mempool (256K mbufs), 
which explains why we have never observed out of mbuf drops in our NFVI 
deployments. The vswitchd crash that triggered the change to per-port mempools 
only happened because they tried to configure 64 rx and tx queues per physical 
port for multiple ports. I can't see any reason for configuring more rx and tx 
queues than polling PMDs, though.

The actual consumption of mbufs scales primarily with the number of physical 
ports (a, c and d) and only to a much lower degree with the number of vhost 
ports/queues (c).

Except for the phy rx queues, all other cases buffer a statistical mix of mbufs 
received on all ports. There seems little point in assigning per-port mempools 
for these.

I think we should revert to a shared mempool (per MTU size) with a simple 
dimensioning formula that only depends on the number of physical ports and the 
number of PMDs, both of which are zero day configuration parameters that are 
set by OVS users.

For example:
 #mbuf = SUM/physical ports [n_rxq * rxq_size + (#PMDs + 1) * txq_size] + 16K

The fixed 16K would cover for b) and c) for up to 512 vhostuser tx queues, 
which should be ample.
In the above example this result in 2 * [ 4 * 2K + 9 * 2K ] + 8K  = 60K mbufs.

BR, Jan

> -Original Message-
> From: Venkatesan Pradeep
> Sent: Friday, 26 January, 2018 18:34
> To: Jan Scheurich ; Stokes, Ian 
> ; ovs-disc...@openvswitch.org
> Cc: Kevin Traynor ; Flavio Leitner ; 
> Ilya Maximets (i.maxim...@samsung.com)
> ; Loftus, Ciara ; Kavanagh, 
> Mark B ; Ben Pfaff
> (b...@ovn.org) ; acon...@redhat.com
> Subject: RE: Mempool issue for OVS 2.9
>
> Response marked [Pradeep]
>
> Thanks,
>
> Pradeep
>
> -Original Message-
> From: Jan Scheurich
> Sent: Friday, January 26, 2018 10:26 PM
> To: Stokes, Ian >; 
> ovs-disc...@openvswitch.org
> Cc: Kevin Traynor >; Flavio 
> Leitner >; Ilya Maximets 
> (i.maxim...@samsung.com)
> >; Loftus, Ciara 
> >; Kavanagh, Mark B 
> >; Ben Pfaff
> (b...@ovn.org) >; 
> acon...@redhat.com; Venkatesan Pradeep 
> >
> Subject: RE: Mempool issue for OVS 2.9
>
> > -Original Message-
> > From: Stokes, Ian [mailto:ian.sto...@intel.com]
> > Sent: Friday, 26 January, 2018 13:01
> > To:

Re: [ovs-dev] [PATCH 00/12] Backport upstream Linux OVS patches

2018-01-29 Thread Gregory Rose


On 1/23/2018 11:35 AM, Justin Pettit wrote:

Enough time has passed that this doesn't apply cleanly to master either.  Would 
you mind rebasing these, too?

Thanks,

--Justin


Yep, I can do that.

thanks,

- Greg





On Dec 11, 2017, at 1:50 PM, Greg Rose  wrote:

The following patches are available in the current Linux upstream
git repository:

  183dea5 openvswitch: do not propagate headroom updates to internal port
  311af51 openvswitch: use ktime_get_ts64() instead of ktime_get_ts()
  67c8d22 openvswitch: fix the incorrect flow action alloc size
  2734166 net: openvswitch: datapath: fix data type in queue_gso_packets
  0c19f846 net: accept UFO datagrams from tuntap and packet
  b74912a openvswitch: meter: fix NULL pointer dereference in 
ovs_meter_cmd_reply_start
  6dc14dc openvswitch: Using kfree_rcu() to simplify the code
  06c2351 openvswitch: Make local function ovs_nsh_key_attr_size() static
  8a860c2 openvswitch: Fix return value check in ovs_meter_cmd_features()
  cd8a6c3 openvswitch: Add meter action support
  96fbc13 openvswitch: Add meter infrastructure
  9602c01 openvswitch: export get_dp() API.
  b2d0f5d openvswitch: enable NSH support
  9354d45 openvswitch: reliable interface indentification in port dumps
  2a17178 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
  b244131 License cleanup: add SPDX GPL-2.0 license identifier to files with no 
license
  279badc openvswitch: conntrack: mark expected switch fall-through
  b822696 openvswitch: add ct_clear action
  ceaa001 openvswitch: Add erspan tunnel support.
  42ab19e net: Add extack to upper device linking
  5829e62 openvswitch: Fix an error handling path in 
'ovs_nla_init_match_and_action()'

This patch series backports all of those patches except these four:
  279badc openvswitch: conntrack: mark expected switch fall-through
  b822696 openvswitch: add ct_clear action
  ceaa001 openvswitch: Add erspan tunnel support.
  b2d0f5d openvswitch: enable NSH support

Upstream patch 279badc isn't necessary since a patch for it was recently
independently added.

Upstream patches b2d0f5d, b822696 and ceaa001 require user space
changes to allow OVS to build.  I will work with the authors of
those patches to get backports and required user space changes
posted separately.

Andy Zhou has sent me additional patches for the user space side of
the meter patches.  In this case the kernel datapath meter patches
do not require the user space code to compile correctly so we can
separate the application of the kernel datapath patches and the
user space patches.  I will update and post Andy's user space side
meter patches in the near future.

The remaining patches are addressed in this patch series as indicated
below.

Andy Zhou (3):
  datapath: export get_dp() API
  datapath: Add meter netlink definitions
  datapath: Add meter infrastructure

Arnd Bergmann (1):
  datapath: use ktime_get_ts64() instead of ktime_get_ts()

Christophe JAILLET (1):
  datapath:  Fix an error handling path in
'ovs_nla_init_match_and_action()

Gustavo A. R. Silva (2):
  datapath: meter: fix NULL pointer dereference in
ovs_meter_cmd_reply_start
  datapath: fix data type in queue_gso_packets

Jiri Benc (1):
  datapath: reliable interface indentification in port dumps

Paolo Abeni (1):
  datapath: do not propagate headroom updates to internal port

Wei Yongjun (2):
  datapath: Fix return value check in ovs_meter_cmd_features()
  datapath: Using kfree_rcu() to simplify the code

zhangliping (1):
  datapath: fix the incorrect flow action alloc size

acinclude.m4  |   4 +-
datapath/Modules.mk   |   6 +-
datapath/datapath.c   |  97 ++--
datapath/datapath.h   |  38 +-
datapath/dp_notify.c  |   3 +-
datapath/flow.c   |   6 +-
datapath/flow_netlink.c   |  16 +-
datapath/linux/compat/include/linux/netdevice.h   |  19 -
datapath/linux/compat/include/linux/openvswitch.h |  53 ++
datapath/linux/compat/include/net/netlink.h   |   9 +
datapath/meter.c  | 597 ++
datapath/meter.h  |  54 ++
datapath/vport-internal_dev.c |  19 +-
13 files changed, 821 insertions(+), 100 deletions(-)
create mode 100644 datapath/meter.c
create mode 100644 datapath/meter.h

--
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH V3 0/6] Enable OVS on Linux 4.14 kernel

2018-01-29 Thread Gregory Rose




On 1/23/2018 12:13 AM, Justin Pettit wrote:

Hi, Greg.  I haven't looked at these in detail, but it doesn't look like NEWS 
or the FAQ supported kernel list was updated.  Would you mind adding those and 
rebasing against the current master?  If you can do that, I'll take a closer 
look and get them merged into master and branch-2.9.

Thanks,

--Justin


Sure, I'll do that.

Thanks Justin!

- Greg





On Dec 8, 2017, at 1:58 PM, Greg Rose  wrote:

Various fixes and compat layer changes required to enable building
OVS for the upstream Linux 4.14 kernel.

The constant changing of the netdev_master_upper_dev_link parameters
is a real headache.  I couldn't think of any cleaner way to do it
than the approach I used but I welcome suggestions on how to make
that code less ugly - because it's ten miles of bad road ugly.
But it does compile and pass basic checks on all of the currently
supported kernels.

There's more fixes for SKB_GSO_UDP - I'm not sure why these don't
show up for the 4.13 or previous kernels but I think it has to do
with the recent change to make sure the SKB_GSO_UDP was searched
as a whole word and thus exposed more fractures.

I updated .travis.yml to use all the most recent supported LTS
and stable kernels from kernel.org.

V2 - Pull in upstream patch for conntrack protocol pointers
V3 - Fix authors

Greg Rose (6):
  datapath: Fix netdev_master_upper_dev_link for 4.14
  compat: Do not include headers when not compiling
  datapath: conntrack: make protocol tracker pointers const
  datapath: Fix SKB_GSO_UDP usage
  acinclude.m4: Enable Linux 4.14
  travis: Update kernel test list from kernel.org

.travis.yml | 17 -
acinclude.m4|  7 +--
datapath/conntrack.c|  2 +-
datapath/datapath.c |  9 ++---
datapath/linux/compat/include/linux/netdevice.h | 15 ++-
datapath/linux/compat/ip_gre.c  |  2 +-
datapath/linux/compat/ip_output.c   |  2 +-
datapath/linux/compat/stt.c | 11 ++-
datapath/vport-netdev.c |  9 -
9 files changed, 54 insertions(+), 20 deletions(-)

--
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] Mempool issue for OVS 2.9

2018-01-29 Thread Ilya Maximets

On 29.01.2018 11:50, Jan Scheurich wrote:
>> -Original Message-
>> From: Ilya Maximets [mailto:i.maxim...@samsung.com]
>> Sent: Monday, 29 January, 2018 09:35
>> To: Jan Scheurich ; Venkatesan Pradeep 
>> ; Stokes, Ian
>> ; d...@openvswitch.org
>> Cc: Kevin Traynor ; Flavio Leitner ; 
>> Loftus, Ciara ; Kavanagh, Mark B
>> ; Ben Pfaff (b...@ovn.org) ; 
>> acon...@redhat.com; disc...@openvswitch.org
>> Subject: Re: Mempool issue for OVS 2.9
>>
>> On 29.01.2018 11:19, Jan Scheurich wrote:
>>> Hi,
>>>
>>> I'd like to take one step back and look at how much many mbufs we actually 
>>> need.
>>>
>>> Today mbufs are consumed in the following places:
>>>
>>>  1. Rx queues of **physical** dpdk ports: dev->requested_n_rxq * 
>>> dev->requested_rxq_size
>>> Note 1: These mbufs are hogged up at all times.
>>> Note 2: There is little point in configuring more rx queues per phy 
>>> port than there are PMDs to poll them.
>>> Note 3: The rx queues of vhostuser ports exist as virtqueues in the 
>>> guest and do not hog mbufs.
>>>  2. One batch per PMD during processing: #PMD * NETDEV_MAX_BURST
>>>  3. One batch per tx queue with time-based tx batching: 
>>> dev->requested_n_txq * NETDEV_MAX_BURST
>>>  4. Tx queues of **physical** ports: dev->requested_n_txq * expected peak 
>>> tx queue fill level
>>> Note 1:  The maximum of 2K mbufs per tx queue can only be reached if 
>>> the OVS transmit rate exceeds the line rate for a long time.
>> This can only happen for large packets and when the traffic originates from 
>> VMs on the compute node. This would be a case of under-
>> dimensioning and packets would be dropped in any case. Excluding that 
>> scenario, a typical peak tx queue fill level would be when all
>> PMDs transmit a full batch at the same time: #PMDs * NETDEV_MAX_BURST.
>>
>> Above assumption is wrong. Just look at ixgbe driver:
>> drivers/net/ixgbe/ixgbe_rxtx.c: tx_xmit_pkts():
>>
>>/*
>> * Begin scanning the H/W ring for done descriptors when the
>> * number of available descriptors drops below tx_free_thresh.  For
>> * each done descriptor, free the associated buffer.
>> */
>>if (txq->nb_tx_free < txq->tx_free_thresh)
>>┊   ixgbe_tx_free_bufs(txq);
>>
>> The default value for 'tx_free_thresh' is 32. So, if I'll configure number
>> of TX descriptors to 4096, driver will start to free mbufs only when it will
>> have more than 4063 mbufs inside its TX queue. No matter how frequent calls
>> to send() function.
> 
> OK, but that doesn't change my general argument. The mbufs hogged in the tx 
> side of the phy port driver are coming from all ports (least likely the port 
> itself). Considering them in dimensioning the port's private mempool is 
> conceptually wrong. In my simplified dimensioning formula below I have 
> already assumed full occupancy of the tx queue for phy ports. The second key 
> observation is that vhostuser ports do not hog mbufs at all. And vhost zero 
> copy doesn't change that.

Formula below maybe good for static environment. I want to change number of
PMD threads dynamically in my deployments and this working in current per-port
model and with oversized shared pool. If we'll try to reduce memory consumption
of the shared pool we'll have to reconfigure all the devices each time we
change the number of PMD threads. This would be really bad.
So, size of the memory pool should not depend on dynamic characteristics of the
datapath or other ports to avoid unexpected interrupts in traffic flows in case
of random changes in configuration. Of course, it could depend on 
characteristics
of the port itself in case of per-port model. In case of shared mempool model 
the
size should only depend on static datapath configuration.

> 
> BTW:, is there any reason why phy drivers should free tx mbufs only when the 
> tx ring is close to becoming full? I'd understand that want need to free them 
> in batches for performance reasons, but is there no cheap possibility to do 
> this earlier?
> 
>>
>>> Note 2: Vhostuser ports do not store mbufs in tx queues due to copying 
>>> to virtio descriptors
>>>
>>>
>>> For concreteness let us use an example of a typical, rather large OVS 
>>> deployment in an NFVI cloud:
>>>
>>>   * Two cores with 4 PMDs per NUMA socket using HT.
>>>   * Two physical ports using RSS over 4 rx queues to enable load-sharing 
>>> over the 4 local PMDs and 9 tx queues (8 PMDs plus non PMD)
>>>   * 100 vhostuser ports with a varying number of rx and tx queue pairs (128 
>>> in total).
>>>
>>>
>>> In the above example deployments this translates into
>>>
>>>  1. 4 * 2K = 8K mbufs per physical port (16K in total)
>>>  2. 8 * 32 = 256 mbufs total
>>>  3. (128 +  23*9) * 32 = 4672 mbufs in total
>>>  4. 9 * 32 = 288 mbufs per

Re: [ovs-dev] Mempool issue for OVS 2.9

2018-01-29 Thread Ilya Maximets

On 29.01.2018 11:19, Jan Scheurich wrote:
> Hi,
>  
> I'd like to take one step back and look at how much many mbufs we actually 
> need.
>  
> Today mbufs are consumed in the following places:
> 
>  1. Rx queues of **physical** dpdk ports: dev->requested_n_rxq * 
> dev->requested_rxq_size
> Note 1: These mbufs are hogged up at all times.
> Note 2: There is little point in configuring more rx queues per phy port 
> than there are PMDs to poll them.
> Note 3: The rx queues of vhostuser ports exist as virtqueues in the guest 
> and do not hog mbufs.
>  2. One batch per PMD during processing: #PMD * NETDEV_MAX_BURST
>  3. One batch per tx queue with time-based tx batching: dev->requested_n_txq 
> * NETDEV_MAX_BURST
>  4. Tx queues of **physical** ports: dev->requested_n_txq * expected peak tx 
> queue fill level
> Note 1:  The maximum of 2K mbufs per tx queue can only be reached if the 
> OVS transmit rate exceeds the line rate for a long time. This can only happen 
> for large packets and when the traffic originates from VMs on the compute 
> node. This would be a case of under-dimensioning and packets would be dropped 
> in any case. Excluding that scenario, a typical peak tx queue fill level 
> would be when all PMDs transmit a full batch at the same time: #PMDs * 
> NETDEV_MAX_BURST.

Above assumption is wrong. Just look at ixgbe driver:
drivers/net/ixgbe/ixgbe_rxtx.c: tx_xmit_pkts():

   /*
* Begin scanning the H/W ring for done descriptors when the
* number of available descriptors drops below tx_free_thresh.  For
* each done descriptor, free the associated buffer.
*/
   if (txq->nb_tx_free < txq->tx_free_thresh)
   ┊   ixgbe_tx_free_bufs(txq);

The default value for 'tx_free_thresh' is 32. So, if I'll configure number
of TX descriptors to 4096, driver will start to free mbufs only when it will
have more than 4063 mbufs inside its TX queue. No matter how frequent calls
to send() function.

> Note 2: Vhostuser ports do not store mbufs in tx queues due to copying to 
> virtio descriptors
> 
>  
> For concreteness let us use an example of a typical, rather large OVS 
> deployment in an NFVI cloud:
> 
>   * Two cores with 4 PMDs per NUMA socket using HT.
>   * Two physical ports using RSS over 4 rx queues to enable load-sharing over 
> the 4 local PMDs and 9 tx queues (8 PMDs plus non PMD)
>   * 100 vhostuser ports with a varying number of rx and tx queue pairs (128 
> in total).
> 
>  
> In the above example deployments this translates into
> 
>  1. 4 * 2K = 8K mbufs per physical port (16K in total)
>  2. 8 * 32 = 256 mbufs total
>  3. (128 +  23*9) * 32 = 4672 mbufs in total
>  4. 9 * 32 = 288 mbufs per physical port (Adding some safety margin, a total 
> of 2K mbufs)
> 
> ---
> Total : 23K mbufs
>  
> This is way lower than the size of the earlier shared mempool (256K mbufs), 
> which explains why we have never observed out of mbuf drops in our NFVI 
> deployments. The vswitchd crash that triggered the change to per-port 
> mempools only happened because they tried to configure 64 rx and tx queues 
> per physical port for multiple ports. I can’t see any reason for configuring 
> more rx and tx queues than polling PMDs, though.
>  
> The actual consumption of mbufs scales primarily with the number of physical 
> ports (a, c and d) and only to a much lower degree with the number of vhost 
> ports/queues (c).
>  
> Except for the phy rx queues, all other cases buffer a statistical mix of 
> mbufs received on all ports. There seems little point in assigning per-port 
> mempools for these.
>  
> I think we should revert to a shared mempool (per MTU size) with a simple 
> dimensioning formula that only depends on the number of physical ports and 
> the number of PMDs, both of which are zero day configuration parameters that 
> are set by OVS users.
>  
> For example:
> #mbuf = SUM/physical ports [n_rxq * rxq_size + (#PMDs + 1) * txq_size] + 16K
>  
> The fixed 16K would cover for b) and c) for up to 512 vhostuser tx queues, 
> which should be ample.
> In the above example this result in 2 * [ 4 * 2K + 9 * 2K ] + 8K  = 60K mbufs.
>  
> BR, Jan
>  
>> -Original Message-
>> From: Venkatesan Pradeep
>> Sent: Friday, 26 January, 2018 18:34
>> To: Jan Scheurich ; Stokes, Ian 
>> ; ovs-disc...@openvswitch.org
>> Cc: Kevin Traynor ; Flavio Leitner ; 
>> Ilya Maximets (i.maxim...@samsung.com)
>> ; Loftus, Ciara ; Kavanagh, 
>> Mark B ; Ben Pfaff
>> (b...@ovn.org) ; acon...@redhat.com
>> Subject: RE: Mempool issue for OVS 2.9
>> 
>> Response marked [Pradeep]
>> 
>> Thanks,
>> 
>> Pradeep
>> 
>> -Original Message-
>> From: Jan Scheurich
>> Sent: Friday, January 26, 2018 10:26 PM
>> To: Stokes, Ian >; 
>>

[ovs-dev] [PATCH v4 2/3] datapath-windows: Add a global level RW lock for NAT

2018-01-29 Thread Anand Kumar

Currently NAT module relies on the existing conntrack lock.
This patch provides a basic lock implementation for NAT module
in conntrack.

Signed-off-by: Anand Kumar 
Acked-by: Alin Gabriel Serdean 
---
 datapath-windows/ovsext/Conntrack.c | 36 ++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/datapath-windows/ovsext/Conntrack.c 
b/datapath-windows/ovsext/Conntrack.c
index 3cde836..7d56a50 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -32,6 +32,7 @@ KSTART_ROUTINE OvsConntrackEntryCleaner;
 static PLIST_ENTRY ovsConntrackTable;
 static OVS_CT_THREAD_CTX ctThreadCtx;
 static PNDIS_RW_LOCK_EX ovsConntrackLockObj;
+static PNDIS_RW_LOCK_EX ovsCtNatLockObj;
 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
 static LONG ctTotalEntries;
 
@@ -58,6 +59,13 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
 return STATUS_INSUFFICIENT_RESOURCES;
 }
 
+ovsCtNatLockObj = NdisAllocateRWLock(context->NdisFilterHandle);
+if (ovsCtNatLockObj == NULL) {
+NdisFreeRWLock(ovsConntrackLockObj);
+ovsConntrackLockObj = NULL;
+return STATUS_INSUFFICIENT_RESOURCES;
+}
+
 /* Init the Hash Buffer */
 ovsConntrackTable = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
  * CT_HASH_TABLE_SIZE,
@@ -65,6 +73,8 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
 if (ovsConntrackTable == NULL) {
 NdisFreeRWLock(ovsConntrackLockObj);
 ovsConntrackLockObj = NULL;
+NdisFreeRWLock(ovsCtNatLockObj);
+ovsCtNatLockObj = NULL;
 return STATUS_INSUFFICIENT_RESOURCES;
 }
 
@@ -82,6 +92,9 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
 NdisFreeRWLock(ovsConntrackLockObj);
 ovsConntrackLockObj = NULL;
 
+NdisFreeRWLock(ovsCtNatLockObj);
+ovsCtNatLockObj = NULL;
+
 OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
 ovsConntrackTable = NULL;
 
@@ -111,7 +124,7 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
 VOID
 OvsCleanupConntrack(VOID)
 {
-LOCK_STATE_EX lockState;
+LOCK_STATE_EX lockState, lockStateNat;
 NdisAcquireRWLockWrite(ovsConntrackLockObj, , 0);
 ctThreadCtx.exit = 1;
 KeSetEvent(, 0, FALSE);
@@ -131,7 +144,11 @@ OvsCleanupConntrack(VOID)
 
 NdisFreeRWLock(ovsConntrackLockObj);
 ovsConntrackLockObj = NULL;
+NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0);
 OvsNatCleanup();
+NdisReleaseRWLock(ovsCtNatLockObj, );
+NdisFreeRWLock(ovsCtNatLockObj);
+ovsCtNatLockObj = NULL;
 }
 
 static __inline VOID
@@ -197,15 +214,19 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, 
OvsConntrackKeyLookupCtx *ctx,
 if (natInfo == NULL) {
 entry->natInfo.natAction = NAT_ACTION_NONE;
 } else {
+LOCK_STATE_EX lockStateNat;
+NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0);
 if (OvsIsForwardNat(natInfo->natAction)) {
 entry->natInfo = *natInfo;
 if (!OvsNatTranslateCtEntry(entry)) {
+NdisReleaseRWLock(ovsCtNatLockObj, );
 return FALSE;
 }
 ctx->hash = OvsHashCtKey(>key);
 } else {
 entry->natInfo.natAction = natInfo->natAction;
 }
+NdisReleaseRWLock(ovsCtNatLockObj, );
 }
 
 entry->timestampStart = now;
@@ -358,7 +379,10 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)
 }
 if (forceDelete || OvsCtEntryExpired(entry)) {
 if (entry->natInfo.natAction) {
+LOCK_STATE_EX lockStateNat;
+NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0);
 OvsNatDeleteKey(>key);
+NdisReleaseRWLock(ovsCtNatLockObj, );
 }
 OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
 RemoveEntryList(>link);
@@ -560,7 +584,10 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
 return NDIS_STATUS_INVALID_PACKET;
 }
 
+LOCK_STATE_EX lockStateNat;
+NdisAcquireRWLockRead(ovsCtNatLockObj, , 0);
 natEntry = OvsNatLookup(>key, TRUE);
+NdisReleaseRWLock(ovsCtNatLockObj, );
 if (natEntry) {
 /* Translate address first for reverse NAT */
 ctx->key = natEntry->ctEntry->key;
@@ -813,8 +840,11 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
  */
 if (natInfo->natAction != NAT_ACTION_NONE)
 {
+LOCK_STATE_EX lockStateNat;
+NdisAcquireRWLockWrite(ovsCtNatLockObj, , 0);
 OvsNatPacket(fwdCtx, entry, entry->natInfo.natAction,
  key, ctx.reply);
+NdisReleaseRWLock(ovsCtNatLockObj, );
 }
 
 OvsCtSetMarkLabel(key, entry, mark, labels, );
@@ -1052,7 +1082,7 @@ OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 
*tuple)
 PLIST_ENTRY link, next;
 POVS_CT_ENTRY entry;
 
-LOCK_STATE_EX lockState;
+LOCK_STATE_EX lockState, lockStateNat;
 NdisAcquireRWLockWrite(ovsConntrackLockObj, ,

47 matches

Mail list logo