Re: [ovs-dev] [PATCH] ovn-northd: Apply pre ACLs when using Port Groups

2018-06-19 Thread Daniel Alvarez Sanchez
Thanks a lot Han for the review. Just sent the v2 with the test fixed.
I'll leave the hash index for a follow up as I'm short in time but if
you want to edit my patch feel free to do it or send another one.

Thanks again for the Port Groups implementation! :)
Cheers,
Daniel

On Wed, Jun 20, 2018 at 4:15 AM, Han Zhou  wrote:

>
>
> On Tue, Jun 19, 2018 at 5:49 PM, Ben Pfaff  wrote:
> >
> > On Tue, Jun 19, 2018 at 05:27:20PM -0700, Han Zhou wrote:
> > > All looks good to me except that the test case "ovn -- ACLs on Port
> Groups"
> > > is broken with this change. I think it is because conntrack is not
> > > supported in the dummy datapath and so the stateful ACL would not work
> in
> > > the test suites, and it was passing just because of this bug. So, to
> fix
> > > the test case, you need below change:
> >
> > I would have guessed that conntrack works OK in the dummy datapath
> > because dpif-netdev supports conntrack.
>
> Ah, I admit that I am ignorant on this. I need to study more on it to
> understand why this test case doesn't work. Is there any
> tool/documentation/example on how to debug the dummy datapath conntrack,
> such as dumping the conntrack table entries?
>
> Thanks,
> Han
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] ovn-northd: Apply pre ACLs when using Port Groups

2018-06-19 Thread Daniel Alvarez
When using Port Groups, the pre ACLs were not applied so the
conntrack action was not performed. This patch takes Port Groups
into account when processing the pre ACLs.

As a follow up, we could enhance this patch by creating an index
from lswitch to port groups.

Signed-off-by: Daniel Alvarez 
---
 ovn/northd/ovn-northd.c | 100 +++-
 tests/ovn.at|   2 +-
 2 files changed, 57 insertions(+), 45 deletions(-)

diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index 72fe4e795..818ac59fa 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -2835,8 +2835,47 @@ build_dhcpv6_action(struct ovn_port *op, struct in6_addr 
*offer_ip,
 return true;
 }
 
+struct ovn_port_group_ls {
+struct hmap_node key_node;  /* Index on 'key'. */
+struct uuid key;/* nb_ls->header_.uuid. */
+const struct nbrec_logical_switch *nb_ls;
+};
+
+struct ovn_port_group {
+struct hmap_node key_node;  /* Index on 'key'. */
+struct uuid key;/* nb_pg->header_.uuid. */
+const struct nbrec_port_group *nb_pg;
+struct hmap nb_lswitches;   /* NB lswitches related to the port group */
+size_t n_acls;  /* Number of ACLs applied to the port group */
+struct nbrec_acl **acls;/* ACLs applied to the port group */
+};
+
+static void
+ovn_port_group_ls_add(struct ovn_port_group *pg,
+  const struct nbrec_logical_switch *nb_ls)
+{
+struct ovn_port_group_ls *pg_ls = xzalloc(sizeof *pg_ls);
+pg_ls->key = nb_ls->header_.uuid;
+pg_ls->nb_ls = nb_ls;
+hmap_insert(>nb_lswitches, _ls->key_node, uuid_hash(_ls->key));
+}
+
+static struct ovn_port_group_ls *
+ovn_port_group_ls_find(struct ovn_port_group *pg, const struct uuid *ls_uuid)
+{
+struct ovn_port_group_ls *pg_ls;
+
+HMAP_FOR_EACH_WITH_HASH (pg_ls, key_node, uuid_hash(ls_uuid),
+ >nb_lswitches) {
+if (uuid_equals(ls_uuid, _ls->key)) {
+return pg_ls;
+}
+}
+return NULL;
+}
+
 static bool
-has_stateful_acl(struct ovn_datapath *od)
+has_stateful_acl(struct ovn_datapath *od, struct hmap *port_groups)
 {
 for (size_t i = 0; i < od->nbs->n_acls; i++) {
 struct nbrec_acl *acl = od->nbs->acls[i];
@@ -2845,13 +2884,25 @@ has_stateful_acl(struct ovn_datapath *od)
 }
 }
 
+struct ovn_port_group *pg;
+HMAP_FOR_EACH (pg, key_node, port_groups) {
+if (ovn_port_group_ls_find(pg, >nbs->header_.uuid)) {
+for (size_t i = 0; i < pg->n_acls; i++) {
+struct nbrec_acl *acl = pg->acls[i];
+if (!strcmp(acl->action, "allow-related")) {
+return true;
+}
+}
+}
+}
 return false;
 }
 
 static void
-build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
+build_pre_acls(struct ovn_datapath *od, struct hmap *lflows,
+   struct hmap *port_groups)
 {
-bool has_stateful = has_stateful_acl(od);
+bool has_stateful = has_stateful_acl(od, port_groups);
 
 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
  * allowed by default. */
@@ -3309,21 +3360,6 @@ consider_acl(struct hmap *lflows, struct ovn_datapath 
*od,
 free(stage_hint);
 }
 
-struct ovn_port_group_ls {
-struct hmap_node key_node;  /* Index on 'key'. */
-struct uuid key;/* nb_ls->header_.uuid. */
-const struct nbrec_logical_switch *nb_ls;
-};
-
-struct ovn_port_group {
-struct hmap_node key_node;  /* Index on 'key'. */
-struct uuid key;/* nb_pg->header_.uuid. */
-const struct nbrec_port_group *nb_pg;
-struct hmap nb_lswitches;   /* NB lswitches related to the port group */
-size_t n_acls;  /* Number of ACLs applied to the port group */
-struct nbrec_acl **acls;/* ACLs applied to the port group */
-};
-
 static struct ovn_port_group *
 ovn_port_group_create(struct hmap *pgs,
   const struct nbrec_port_group *nb_pg)
@@ -3338,30 +3374,6 @@ ovn_port_group_create(struct hmap *pgs,
 return pg;
 }
 
-static void
-ovn_port_group_ls_add(struct ovn_port_group *pg,
-  const struct nbrec_logical_switch *nb_ls)
-{
-struct ovn_port_group_ls *pg_ls = xzalloc(sizeof *pg_ls);
-pg_ls->key = nb_ls->header_.uuid;
-pg_ls->nb_ls = nb_ls;
-hmap_insert(>nb_lswitches, _ls->key_node, uuid_hash(_ls->key));
-}
-
-static struct ovn_port_group_ls *
-ovn_port_group_ls_find(struct ovn_port_group *pg, const struct uuid *ls_uuid)
-{
-struct ovn_port_group_ls *pg_ls;
-
-HMAP_FOR_EACH_WITH_HASH (pg_ls, key_node, uuid_hash(ls_uuid),
- >nb_lswitches) {
-if (uuid_equals(ls_uuid, _ls->key)) {
-return pg_ls;
-}
-}
-return NULL;
-}
-
 static void
 ovn_port_group_destroy(struct hmap *pgs, struct ovn_port_group *pg)
 {
@@ -3416,7 +3428,7 @@ static void
 

Re: [ovs-dev] [PATCH] ovn-northd: Apply pre ACLs when using Port Groups

2018-06-19 Thread Han Zhou
On Tue, Jun 19, 2018 at 5:49 PM, Ben Pfaff  wrote:
>
> On Tue, Jun 19, 2018 at 05:27:20PM -0700, Han Zhou wrote:
> > All looks good to me except that the test case "ovn -- ACLs on Port
Groups"
> > is broken with this change. I think it is because conntrack is not
> > supported in the dummy datapath and so the stateful ACL would not work
in
> > the test suites, and it was passing just because of this bug. So, to fix
> > the test case, you need below change:
>
> I would have guessed that conntrack works OK in the dummy datapath
> because dpif-netdev supports conntrack.

Ah, I admit that I am ignorant on this. I need to study more on it to
understand why this test case doesn't work. Is there any
tool/documentation/example on how to debug the dummy datapath conntrack,
such as dumping the conntrack table entries?

Thanks,
Han
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ovn-northd: Apply pre ACLs when using Port Groups

2018-06-19 Thread Ben Pfaff
On Tue, Jun 19, 2018 at 05:27:20PM -0700, Han Zhou wrote:
> All looks good to me except that the test case "ovn -- ACLs on Port Groups"
> is broken with this change. I think it is because conntrack is not
> supported in the dummy datapath and so the stateful ACL would not work in
> the test suites, and it was passing just because of this bug. So, to fix
> the test case, you need below change:

I would have guessed that conntrack works OK in the dummy datapath
because dpif-netdev supports conntrack.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ovn-northd: Apply pre ACLs when using Port Groups

2018-06-19 Thread Han Zhou
On Tue, Jun 19, 2018 at 5:27 PM, Han Zhou  wrote:
>
>
>
> On Tue, Jun 19, 2018 at 3:46 PM, Daniel Alvarez 
wrote:
> >
> > When using Port Groups, the pre ACLs were not applied so the
> > conntrack action was not performed. This patch takes Port Groups
> > into account when processing the pre ACLs.
> >
> > Signed-off-by: Daniel Alvarez 
> > ---
> >  ovn/northd/ovn-northd.c | 100
+++-
> >  1 file changed, 56 insertions(+), 44 deletions(-)
> >
> > diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
> > index 72fe4e795..818ac59fa 100644
> > --- a/ovn/northd/ovn-northd.c
> > +++ b/ovn/northd/ovn-northd.c
> > @@ -2835,8 +2835,47 @@ build_dhcpv6_action(struct ovn_port *op, struct
in6_addr *offer_ip,
> >  return true;
> >  }
> >
> > +struct ovn_port_group_ls {
> > +struct hmap_node key_node;  /* Index on 'key'. */
> > +struct uuid key;/* nb_ls->header_.uuid. */
> > +const struct nbrec_logical_switch *nb_ls;
> > +};
> > +
> > +struct ovn_port_group {
> > +struct hmap_node key_node;  /* Index on 'key'. */
> > +struct uuid key;/* nb_pg->header_.uuid. */
> > +const struct nbrec_port_group *nb_pg;
> > +struct hmap nb_lswitches;   /* NB lswitches related to the port
group */
> > +size_t n_acls;  /* Number of ACLs applied to the port
group */
> > +struct nbrec_acl **acls;/* ACLs applied to the port group */
> > +};
> > +
> > +static void
> > +ovn_port_group_ls_add(struct ovn_port_group *pg,
> > +  const struct nbrec_logical_switch *nb_ls)
> > +{
> > +struct ovn_port_group_ls *pg_ls = xzalloc(sizeof *pg_ls);
> > +pg_ls->key = nb_ls->header_.uuid;
> > +pg_ls->nb_ls = nb_ls;
> > +hmap_insert(>nb_lswitches, _ls->key_node,
uuid_hash(_ls->key));
> > +}
> > +
> > +static struct ovn_port_group_ls *
> > +ovn_port_group_ls_find(struct ovn_port_group *pg, const struct uuid
*ls_uuid)
> > +{
> > +struct ovn_port_group_ls *pg_ls;
> > +
> > +HMAP_FOR_EACH_WITH_HASH (pg_ls, key_node, uuid_hash(ls_uuid),
> > + >nb_lswitches) {
> > +if (uuid_equals(ls_uuid, _ls->key)) {
> > +return pg_ls;
> > +}
> > +}
> > +return NULL;
> > +}
> > +
> >  static bool
> > -has_stateful_acl(struct ovn_datapath *od)
> > +has_stateful_acl(struct ovn_datapath *od, struct hmap *port_groups)
> >  {
> >  for (size_t i = 0; i < od->nbs->n_acls; i++) {
> >  struct nbrec_acl *acl = od->nbs->acls[i];
> > @@ -2845,13 +2884,25 @@ has_stateful_acl(struct ovn_datapath *od)
> >  }
> >  }
> >
> > +struct ovn_port_group *pg;
> > +HMAP_FOR_EACH (pg, key_node, port_groups) {
> > +if (ovn_port_group_ls_find(pg, >nbs->header_.uuid)) {
> > +for (size_t i = 0; i < pg->n_acls; i++) {
> > +struct nbrec_acl *acl = pg->acls[i];
> > +if (!strcmp(acl->action, "allow-related")) {
> > +return true;
> > +}
> > +}
> > +}
> > +}
> >  return false;
> >  }
> >
> >  static void
> > -build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
> > +build_pre_acls(struct ovn_datapath *od, struct hmap *lflows,
> > +   struct hmap *port_groups)
> >  {
> > -bool has_stateful = has_stateful_acl(od);
> > +bool has_stateful = has_stateful_acl(od, port_groups);
> >
> >  /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
> >   * allowed by default. */
> > @@ -3309,21 +3360,6 @@ consider_acl(struct hmap *lflows, struct
ovn_datapath *od,
> >  free(stage_hint);
> >  }
> >
> > -struct ovn_port_group_ls {
> > -struct hmap_node key_node;  /* Index on 'key'. */
> > -struct uuid key;/* nb_ls->header_.uuid. */
> > -const struct nbrec_logical_switch *nb_ls;
> > -};
> > -
> > -struct ovn_port_group {
> > -struct hmap_node key_node;  /* Index on 'key'. */
> > -struct uuid key;/* nb_pg->header_.uuid. */
> > -const struct nbrec_port_group *nb_pg;
> > -struct hmap nb_lswitches;   /* NB lswitches related to the port
group */
> > -size_t n_acls;  /* Number of ACLs applied to the port
group */
> > -struct nbrec_acl **acls;/* ACLs applied to the port group */
> > -};
> > -
> >  static struct ovn_port_group *
> >  ovn_port_group_create(struct hmap *pgs,
> >const struct nbrec_port_group *nb_pg)
> > @@ -3338,30 +3374,6 @@ ovn_port_group_create(struct hmap *pgs,
> >  return pg;
> >  }
> >
> > -static void
> > -ovn_port_group_ls_add(struct ovn_port_group *pg,
> > -  const struct nbrec_logical_switch *nb_ls)
> > -{
> > -struct ovn_port_group_ls *pg_ls = xzalloc(sizeof *pg_ls);
> > -pg_ls->key = nb_ls->header_.uuid;
> > -pg_ls->nb_ls = nb_ls;
> > -hmap_insert(>nb_lswitches, _ls->key_node,
uuid_hash(_ls->key));
> > -}
> > -
> > -static struct ovn_port_group_ls *
> > 

Re: [ovs-dev] [PATCH] ovn-northd: Apply pre ACLs when using Port Groups

2018-06-19 Thread Han Zhou
On Tue, Jun 19, 2018 at 3:46 PM, Daniel Alvarez  wrote:
>
> When using Port Groups, the pre ACLs were not applied so the
> conntrack action was not performed. This patch takes Port Groups
> into account when processing the pre ACLs.
>
> Signed-off-by: Daniel Alvarez 
> ---
>  ovn/northd/ovn-northd.c | 100
+++-
>  1 file changed, 56 insertions(+), 44 deletions(-)
>
> diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
> index 72fe4e795..818ac59fa 100644
> --- a/ovn/northd/ovn-northd.c
> +++ b/ovn/northd/ovn-northd.c
> @@ -2835,8 +2835,47 @@ build_dhcpv6_action(struct ovn_port *op, struct
in6_addr *offer_ip,
>  return true;
>  }
>
> +struct ovn_port_group_ls {
> +struct hmap_node key_node;  /* Index on 'key'. */
> +struct uuid key;/* nb_ls->header_.uuid. */
> +const struct nbrec_logical_switch *nb_ls;
> +};
> +
> +struct ovn_port_group {
> +struct hmap_node key_node;  /* Index on 'key'. */
> +struct uuid key;/* nb_pg->header_.uuid. */
> +const struct nbrec_port_group *nb_pg;
> +struct hmap nb_lswitches;   /* NB lswitches related to the port
group */
> +size_t n_acls;  /* Number of ACLs applied to the port
group */
> +struct nbrec_acl **acls;/* ACLs applied to the port group */
> +};
> +
> +static void
> +ovn_port_group_ls_add(struct ovn_port_group *pg,
> +  const struct nbrec_logical_switch *nb_ls)
> +{
> +struct ovn_port_group_ls *pg_ls = xzalloc(sizeof *pg_ls);
> +pg_ls->key = nb_ls->header_.uuid;
> +pg_ls->nb_ls = nb_ls;
> +hmap_insert(>nb_lswitches, _ls->key_node,
uuid_hash(_ls->key));
> +}
> +
> +static struct ovn_port_group_ls *
> +ovn_port_group_ls_find(struct ovn_port_group *pg, const struct uuid
*ls_uuid)
> +{
> +struct ovn_port_group_ls *pg_ls;
> +
> +HMAP_FOR_EACH_WITH_HASH (pg_ls, key_node, uuid_hash(ls_uuid),
> + >nb_lswitches) {
> +if (uuid_equals(ls_uuid, _ls->key)) {
> +return pg_ls;
> +}
> +}
> +return NULL;
> +}
> +
>  static bool
> -has_stateful_acl(struct ovn_datapath *od)
> +has_stateful_acl(struct ovn_datapath *od, struct hmap *port_groups)
>  {
>  for (size_t i = 0; i < od->nbs->n_acls; i++) {
>  struct nbrec_acl *acl = od->nbs->acls[i];
> @@ -2845,13 +2884,25 @@ has_stateful_acl(struct ovn_datapath *od)
>  }
>  }
>
> +struct ovn_port_group *pg;
> +HMAP_FOR_EACH (pg, key_node, port_groups) {
> +if (ovn_port_group_ls_find(pg, >nbs->header_.uuid)) {
> +for (size_t i = 0; i < pg->n_acls; i++) {
> +struct nbrec_acl *acl = pg->acls[i];
> +if (!strcmp(acl->action, "allow-related")) {
> +return true;
> +}
> +}
> +}
> +}
>  return false;
>  }
>
>  static void
> -build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
> +build_pre_acls(struct ovn_datapath *od, struct hmap *lflows,
> +   struct hmap *port_groups)
>  {
> -bool has_stateful = has_stateful_acl(od);
> +bool has_stateful = has_stateful_acl(od, port_groups);
>
>  /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
>   * allowed by default. */
> @@ -3309,21 +3360,6 @@ consider_acl(struct hmap *lflows, struct
ovn_datapath *od,
>  free(stage_hint);
>  }
>
> -struct ovn_port_group_ls {
> -struct hmap_node key_node;  /* Index on 'key'. */
> -struct uuid key;/* nb_ls->header_.uuid. */
> -const struct nbrec_logical_switch *nb_ls;
> -};
> -
> -struct ovn_port_group {
> -struct hmap_node key_node;  /* Index on 'key'. */
> -struct uuid key;/* nb_pg->header_.uuid. */
> -const struct nbrec_port_group *nb_pg;
> -struct hmap nb_lswitches;   /* NB lswitches related to the port
group */
> -size_t n_acls;  /* Number of ACLs applied to the port
group */
> -struct nbrec_acl **acls;/* ACLs applied to the port group */
> -};
> -
>  static struct ovn_port_group *
>  ovn_port_group_create(struct hmap *pgs,
>const struct nbrec_port_group *nb_pg)
> @@ -3338,30 +3374,6 @@ ovn_port_group_create(struct hmap *pgs,
>  return pg;
>  }
>
> -static void
> -ovn_port_group_ls_add(struct ovn_port_group *pg,
> -  const struct nbrec_logical_switch *nb_ls)
> -{
> -struct ovn_port_group_ls *pg_ls = xzalloc(sizeof *pg_ls);
> -pg_ls->key = nb_ls->header_.uuid;
> -pg_ls->nb_ls = nb_ls;
> -hmap_insert(>nb_lswitches, _ls->key_node,
uuid_hash(_ls->key));
> -}
> -
> -static struct ovn_port_group_ls *
> -ovn_port_group_ls_find(struct ovn_port_group *pg, const struct uuid
*ls_uuid)
> -{
> -struct ovn_port_group_ls *pg_ls;
> -
> -HMAP_FOR_EACH_WITH_HASH (pg_ls, key_node, uuid_hash(ls_uuid),
> - >nb_lswitches) {
> -if (uuid_equals(ls_uuid, _ls->key)) {
> -return 

[ovs-dev] [PATCH] datapath: Add meter action support.

2018-06-19 Thread Justin Pettit
From: Andy Zhou 

Upstream commit:
commit cd8a6c33693c1b89d2737ffdbf9611564e9ac907
Author: Andy Zhou 
Date:   Fri Nov 10 12:09:43 2017 -0800

openvswitch: Add meter action support

Implements OVS kernel meter action support.

Signed-off-by: Andy Zhou 
Signed-off-by: David S. Miller 

Signed-off-by: Justin Pettit 
---
 NEWS  | 5 +++--
 datapath/actions.c| 6 ++
 datapath/datapath.h   | 1 +
 datapath/flow_netlink.c   | 6 ++
 datapath/linux/compat/include/linux/openvswitch.h | 2 +-
 5 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/NEWS b/NEWS
index 7f6589a46878..cd15a332c47e 100644
--- a/NEWS
+++ b/NEWS
@@ -19,8 +19,9 @@ Post-v2.9.0
  * New OpenFlow 1.0 extensions for group support.
  * Default selection method for select groups is now dp_hash with improved
accuracy.
-   - Linux kernel 4.14
- * Add support for compiling OVS with the latest Linux 4.14 kernel
+   - Linux datapath
+ * Add support for compiling OVS with the latest Linux 4.14 kernel.
+ * Added support for meters.
- ovn:
  * implemented icmp4/icmp6/tcp_reset actions in order to drop the packet
and reply with a RST for TCP or ICMPv4/ICMPv6 unreachable message for
diff --git a/datapath/actions.c b/datapath/actions.c
index eab147617c8b..56b013601393 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -1341,6 +1341,12 @@ static int do_execute_actions(struct datapath *dp, 
struct sk_buff *skb,
case OVS_ACTION_ATTR_POP_NSH:
err = pop_nsh(skb, key);
break;
+
+   case OVS_ACTION_ATTR_METER:
+   if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
+   consume_skb(skb);
+   return 0;
+   }
}
 
if (unlikely(err)) {
diff --git a/datapath/datapath.h b/datapath/datapath.h
index 93c9ed505448..c38286df75c7 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -31,6 +31,7 @@
 #include "compat.h"
 #include "flow.h"
 #include "flow_table.h"
+#include "meter.h"
 #include "vport-internal_dev.h"
 
 #define DP_MAX_PORTS   USHRT_MAX
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 1b7bad8fe2ab..bea525a5dfcb 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -92,6 +92,7 @@ static bool actions_may_change_flow(const struct nlattr 
*actions)
case OVS_ACTION_ATTR_SAMPLE:
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SET_MASKED:
+   case OVS_ACTION_ATTR_METER:
default:
return true;
}
@@ -2853,6 +2854,7 @@ static int __ovs_nla_copy_actions(struct net *net, const 
struct nlattr *attr,
[OVS_ACTION_ATTR_POP_ETH] = 0,
[OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
[OVS_ACTION_ATTR_POP_NSH] = 0,
+   [OVS_ACTION_ATTR_METER] = sizeof(u32),
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -3038,6 +3040,10 @@ static int __ovs_nla_copy_actions(struct net *net, const 
struct nlattr *attr,
break;
}
 
+   case OVS_ACTION_ATTR_METER:
+   /* Non-existent meters are simply ignored.  */
+   break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
diff --git a/datapath/linux/compat/include/linux/openvswitch.h 
b/datapath/linux/compat/include/linux/openvswitch.h
index 5c1e2383f4f9..8e5f3b6fbfb1 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -934,12 +934,12 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_CT_CLEAR, /* No argument. */
OVS_ACTION_ATTR_PUSH_NSH, /* Nested OVS_NSH_KEY_ATTR_*. */
OVS_ACTION_ATTR_POP_NSH,  /* No argument. */
+   OVS_ACTION_ATTR_METER, /* u32 meter number. */
 
 #ifndef __KERNEL__
OVS_ACTION_ATTR_TUNNEL_PUSH,   /* struct ovs_action_push_tnl*/
OVS_ACTION_ATTR_TUNNEL_POP,/* u32 port number. */
OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*.  */
-   OVS_ACTION_ATTR_METER, /* u32 meter number. */
 #endif
__OVS_ACTION_ATTR_MAX,/* Nothing past this will be accepted
   * from userspace. */
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v9 0/7] OVS-DPDK flow offload with rte_flow

2018-06-19 Thread Koujalagi, MalleshX
Hi Shahaf,

Thanks for setting up VXLAN env. and trying to reproduce, appreciate!!

As you suggested, I moved to DPDK17.11.3 stable and OvS 2.9.0, still observed 
same issue.

An attached Vxlan_diagram.jpg, to get clear idea, we are seeing issue while 
traffic injected from Traffic Generator-->HOST B[eth1-->Vxlan-->eth0]-->Vxlan 
Tunnel Packet-->HOST A[eth0-->Vxlan-->eth1]-->Traffic Generator direction,  at 
Host A (eth0-->Vxlan-->eth1), we see Vxlan DECAP issue in case of HW-offload 
enabled. Vxlan Tunnel packets are not DECAP from eth0 to eth1, not observed any 
packet receiving @ eth1.

We injected random destination ip address(196.0.0.(0/1)),

1.Find tcpdump @ Host A (eth0).
07:24:12.013848 68:05:ca:33:ff:99 > 68:05:ca:33:fe:f9, ethertype IPv4 (0x0800), 
length 110: 172.168.1.2.46816 > 172.168.1.1.4789: VXLAN, flags [I] (0x08), vni 0
00:10:94:00:00:12 > 00:00:01:00:00:01, ethertype IPv4 (0x0800), length 60: 
197.0.0.0.1024 > 196.0.0.1.1024: UDP, length 18
07:24:12.013860 68:05:ca:33:ff:99 > 68:05:ca:33:fe:f9, ethertype IPv4 (0x0800), 
length 110: 172.168.1.2.50892 > 172.168.1.1.4789: VXLAN, flags [I] (0x08), vni 0
00:10:94:00:00:12 > 00:00:01:00:00:01, ethertype IPv4 (0x0800), length 60: 
197.0.0.0.1024 > 196.0.0.0.1024: UDP, length 18


2.   Ovs logs.

3.   Sure, further we will debug this issue using Skype/webex session. 
Please let me know good time.


Best regards
-/Mallesh


From: Shahaf Shuler [mailto:shah...@mellanox.com]
Sent: Monday, June 18, 2018 4:05 AM
To: Koujalagi, MalleshX ; y...@fridaylinux.org; 
f...@napatech.com
Cc: d...@openvswitch.org; Ergin, Mesut A ; Tsai, James 

Subject: RE: [ovs-dev] [PATCH v9 0/7] OVS-DPDK flow offload with rte_flow

Mallesh,

I was finally able to setup the vxlan testing w/ OVS. Instead of using OVS on 
both sides I used vxlan i/f to inject the traffic on one host and run ovs with 
vxlan tunnel configuration you specified on the other.

I was not able to reproduce your case. Actually I see the rules are created 
successfully (see attached ovs log "vxlan_tep")

Few observations:

1.   The rule created for the VXLAN is for the outer pattern up to the UDP, 
hence supported by the device

a.   2018-06-18T08:11:17.466Z|00057|dpif_netdev(pmd53)|DBG|flow match: 
recirc_id=0,eth,udp,vlan_tci=0x,dl_src=e4:1d:2d:

ca:ca:6a,dl_dst=e4:1d:2d:a3:aa:74,nw_dst=2.2.2.1,nw_frag=no,tp_dst=4789

2.   I do see segfault on mlx5 PMD on 17.11.0 and this was resolved on 
17.11.3 stable.

3.   I used MLNX_OFED_LINUX-4.3-2.0.1.0, however I don't expect it to cause 
any diff

The packets being sent to the ovs w/ vxlan tunnel logic are[1]. tcpdump after 
the ovs logic is [2], the packet outer headers were removed as expected.

On top of all, I tried to force the validate function to fail. Indeed the flow 
was not created but the decap functionality still happens (see attached ovs log 
"vxlan_tep_force_err")

Can you please:

1.   Provide me the type of packets you inject and don't see being decap.

2.   Try w/ 17.11.3 stable to avoid any issues from the DPDK side

3.   If you still see the issue can we set webex/skype meeting for joint 
debug, currently I don't see the error you reported on.


[1]
###[ Ethernet ]###
  dst= e4:1d:2d:a3:aa:74
  src= e4:1d:2d:ca:ca:6a
  type= IPv4
###[ IP ]###
 version= 4
 ihl= None
 tos= 0x0
 len= None
 id= 1
 flags=
 frag= 0
 ttl= 64
 proto= udp
 chksum= None
 src= 2.2.2.2
 dst= 2.2.2.1
 \options\
###[ UDP ]###
sport= 34550
dport= 4789
len= None
chksum= None
###[ VXLAN ]###
   flags= I
   reserved1= 0
   vni= 1000
   reserved2= 0x0
###[ Ethernet ]###
  dst= 00:00:5e:00:01:01
  src= 18:66:da:f5:37:64
  type= IPv4
###[ IP ]###
 version= 4
 ihl= None
 tos= 0x0
 len= None
 id= 1
 flags=
 frag= 0
 ttl= 64
 proto= tcp
 chksum= None
 src= 10.7.12.62
 dst= 1.1.1.1
 \options\
###[ TCP ]###
sport= ftp_data
dport= http
seq= 0
ack= 0
dataofs= None
reserved= 0
flags= S
window= 8192
chksum= None
urgptr= 0
options= {}

[2]
11:13:59.477667 18:66:da:f5:37:64 > 00:00:5e:00:01:01, ethertype IPv4 (0x0800), 
length 60: (tos 0x0, ttl 64, id 1, off
set 0, flags [none], proto TCP (6), length 40)
10.7.12.62.20 > 1.1.1.1.80: Flags [S], cksum 0x7738 (correct), seq 0, win 
8192, length 0

--Shahaf

From: Koujalagi, MalleshX 
mailto:malleshx.koujal...@intel.com>>
Sent: Friday, June 15, 2018 9:29 PM
To: Shahaf Shuler mailto:shah...@mellanox.com>>; 

[ovs-dev] [PATCH] ovn-northd: Apply pre ACLs when using Port Groups

2018-06-19 Thread Daniel Alvarez
When using Port Groups, the pre ACLs were not applied so the
conntrack action was not performed. This patch takes Port Groups
into account when processing the pre ACLs.

Signed-off-by: Daniel Alvarez 
---
 ovn/northd/ovn-northd.c | 100 +++-
 1 file changed, 56 insertions(+), 44 deletions(-)

diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index 72fe4e795..818ac59fa 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -2835,8 +2835,47 @@ build_dhcpv6_action(struct ovn_port *op, struct in6_addr 
*offer_ip,
 return true;
 }
 
+struct ovn_port_group_ls {
+struct hmap_node key_node;  /* Index on 'key'. */
+struct uuid key;/* nb_ls->header_.uuid. */
+const struct nbrec_logical_switch *nb_ls;
+};
+
+struct ovn_port_group {
+struct hmap_node key_node;  /* Index on 'key'. */
+struct uuid key;/* nb_pg->header_.uuid. */
+const struct nbrec_port_group *nb_pg;
+struct hmap nb_lswitches;   /* NB lswitches related to the port group */
+size_t n_acls;  /* Number of ACLs applied to the port group */
+struct nbrec_acl **acls;/* ACLs applied to the port group */
+};
+
+static void
+ovn_port_group_ls_add(struct ovn_port_group *pg,
+  const struct nbrec_logical_switch *nb_ls)
+{
+struct ovn_port_group_ls *pg_ls = xzalloc(sizeof *pg_ls);
+pg_ls->key = nb_ls->header_.uuid;
+pg_ls->nb_ls = nb_ls;
+hmap_insert(>nb_lswitches, _ls->key_node, uuid_hash(_ls->key));
+}
+
+static struct ovn_port_group_ls *
+ovn_port_group_ls_find(struct ovn_port_group *pg, const struct uuid *ls_uuid)
+{
+struct ovn_port_group_ls *pg_ls;
+
+HMAP_FOR_EACH_WITH_HASH (pg_ls, key_node, uuid_hash(ls_uuid),
+ >nb_lswitches) {
+if (uuid_equals(ls_uuid, _ls->key)) {
+return pg_ls;
+}
+}
+return NULL;
+}
+
 static bool
-has_stateful_acl(struct ovn_datapath *od)
+has_stateful_acl(struct ovn_datapath *od, struct hmap *port_groups)
 {
 for (size_t i = 0; i < od->nbs->n_acls; i++) {
 struct nbrec_acl *acl = od->nbs->acls[i];
@@ -2845,13 +2884,25 @@ has_stateful_acl(struct ovn_datapath *od)
 }
 }
 
+struct ovn_port_group *pg;
+HMAP_FOR_EACH (pg, key_node, port_groups) {
+if (ovn_port_group_ls_find(pg, >nbs->header_.uuid)) {
+for (size_t i = 0; i < pg->n_acls; i++) {
+struct nbrec_acl *acl = pg->acls[i];
+if (!strcmp(acl->action, "allow-related")) {
+return true;
+}
+}
+}
+}
 return false;
 }
 
 static void
-build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
+build_pre_acls(struct ovn_datapath *od, struct hmap *lflows,
+   struct hmap *port_groups)
 {
-bool has_stateful = has_stateful_acl(od);
+bool has_stateful = has_stateful_acl(od, port_groups);
 
 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
  * allowed by default. */
@@ -3309,21 +3360,6 @@ consider_acl(struct hmap *lflows, struct ovn_datapath 
*od,
 free(stage_hint);
 }
 
-struct ovn_port_group_ls {
-struct hmap_node key_node;  /* Index on 'key'. */
-struct uuid key;/* nb_ls->header_.uuid. */
-const struct nbrec_logical_switch *nb_ls;
-};
-
-struct ovn_port_group {
-struct hmap_node key_node;  /* Index on 'key'. */
-struct uuid key;/* nb_pg->header_.uuid. */
-const struct nbrec_port_group *nb_pg;
-struct hmap nb_lswitches;   /* NB lswitches related to the port group */
-size_t n_acls;  /* Number of ACLs applied to the port group */
-struct nbrec_acl **acls;/* ACLs applied to the port group */
-};
-
 static struct ovn_port_group *
 ovn_port_group_create(struct hmap *pgs,
   const struct nbrec_port_group *nb_pg)
@@ -3338,30 +3374,6 @@ ovn_port_group_create(struct hmap *pgs,
 return pg;
 }
 
-static void
-ovn_port_group_ls_add(struct ovn_port_group *pg,
-  const struct nbrec_logical_switch *nb_ls)
-{
-struct ovn_port_group_ls *pg_ls = xzalloc(sizeof *pg_ls);
-pg_ls->key = nb_ls->header_.uuid;
-pg_ls->nb_ls = nb_ls;
-hmap_insert(>nb_lswitches, _ls->key_node, uuid_hash(_ls->key));
-}
-
-static struct ovn_port_group_ls *
-ovn_port_group_ls_find(struct ovn_port_group *pg, const struct uuid *ls_uuid)
-{
-struct ovn_port_group_ls *pg_ls;
-
-HMAP_FOR_EACH_WITH_HASH (pg_ls, key_node, uuid_hash(ls_uuid),
- >nb_lswitches) {
-if (uuid_equals(ls_uuid, _ls->key)) {
-return pg_ls;
-}
-}
-return NULL;
-}
-
 static void
 ovn_port_group_destroy(struct hmap *pgs, struct ovn_port_group *pg)
 {
@@ -3416,7 +3428,7 @@ static void
 build_acls(struct ovn_datapath *od, struct hmap *lflows,
struct hmap *port_groups)
 {
-bool has_stateful = 

Re: [ovs-dev] 64Byte packet performance regression on 2.9 from 2.7

2018-06-19 Thread Shahaji Bhosle via dev
Hi Ilya,
This issue is a release blocker for us, just wanted to check check if you
need more details from us? Anything to expedite or root cause the problem
we can help
Please let us know
Thanks Shahaji

On Mon, Jun 18, 2018 at 10:20 AM Shahaji Bhosle 
wrote:

> Thanks Ilya, I will look at the commit, but not sure now how to tell how
> much real work is being done, I would have liked polling cycles to be
> treated as before and not towards packet processing. That does explain, as
> long as there are packets on the wire we are always 100%, basically cannot
> tell how efficiently the CPUs are being used.
> Thanks, Shahaji
>
> On Mon, Jun 18, 2018 at 10:07 AM, Ilya Maximets 
> wrote:
>
>> Thanks for the data.
>>
>> I have to note additionally that the meaning of "processing cycles"
>> significantly changed since the following commit:
>>
>> commit a2ac666d5265c01661e189caac321d962f54649f
>> Author: Ciara Loftus 
>> Date:   Mon Feb 20 12:53:00 2017 +
>>
>> dpif-netdev: Change definitions of 'idle' & 'processing' cycles
>>
>> Instead of counting all polling cycles as processing cycles, only
>> count
>> the cycles where packets were received from the polling.
>>
>> This could explain the difference in "PMD Processing Cycles" column,
>> because successful "POLLING" cycles are now included into "PROCESSING".
>>
>> Best regards, Ilya Maximets.
>>
>> On 18.06.2018 16:31, Shahaji Bhosle wrote:
>> > Hi Ilya,
>> > Thanks for the quick reply,
>> > Please find the numbers for our PHY-PHY test, please note that with OVS
>> 2.9.1 + DPDK 17.11 even a 10% of the below numbers will make the OVS
>> 2.9+DPDK17.11 processing cycles to hit 100%, but 2.7 will on our setup
>> never goes above 75% for processing cycles. I am also attaching the perf
>> report between the two code bases and I think the
>> "11.26%--dp_netdev_pmd_flush_output_packets" is causing us to take the
>> performance hit. Out testing is also SRIOV and CPUs are ARM A72 cores. We
>> are happy to run more tests, it is not easy for use to move back to OVS
>> 2.8, but could happy to try more experiments if it helps us narrow down
>> further. Please note we have also tried increasing the tx-flush-interval
>> and it helps a little but still not significant enough. Let us know.
>> >
>> > Thanks, Shahaji
>> >
>> >
>> > *Setup:*
>> > IXIAPort 0 {(PF0)==[OVS+DPDK]==(PF1)} Port
>> 1<-SFP28>IXIA
>> >
>> > release/version   config  Testdirection   MPPSIxia Line
>> rate (%)  PMD Processing Cycles (%)
>> > OVS 2.9 + DPDK 17.11  OVS on Maia (PF0--PF1)  No drop port 1 to 2
>>  31.385  99.9
>> >   port 2 to 1
>>  31.385  99.9
>> >   bi  15.5 +
>> 15.5 42  99.9
>> >
>> >
>> > OVS 2.7 + DPDK 16.11  OVS on Maia (PF0--PF1)  No drop port 1 to 2
>>  33.890  71
>> >   port 2 to 1
>>  32.788  70
>> >   bi
>> 17.4+17.4   47  74
>> >
>> >
>> >
>> >
>> >
>> >
>> >
>> > On Mon, Jun 18, 2018 at 4:25 AM, Nitin Katiyar <
>> nitin.kati...@ericsson.com > wrote:
>> >
>> > Hi,
>> > We also experienced degradation from OVS2.6/2.7 to OVS2.8.2(with
>> DPDK17.05.02). The drop is more for 64 bytes packet size (~8-10%) even with
>> higher number of flows. I tried OVS 2.8 with DPDK17.11 and it improved for
>> higher packet sizes but 64 bytes size is still the concern.
>> >
>> > Regards,
>> > Nitin
>> >
>> > -Original Message-
>> > From: Ilya Maximets [mailto:i.maxim...@samsung.com > i.maxim...@samsung.com>]
>> > Sent: Monday, June 18, 2018 1:32 PM
>> > To: ovs-dev@openvswitch.org ;
>> shahaji.bho...@broadcom.com 
>> > Subject: Re: [ovs-dev] 64Byte packet performance regression on 2.9
>> from 2.7
>> >
>> > CC: Shahaji Bhosle
>> >
>> > Sorry, missed you in CC list.
>> >
>> > Best regards, Ilya Maximets.
>> >
>> > On 15.06.2018 10:44, Ilya Maximets wrote:
>> > >> Hi,
>> > >> I just upgraded from OvS 2.7 + DPDK 16.11 to OvS2.9 + DPDK 17.11
>> and
>> > >> running into performance issue with 64 Byte packet rate. One
>> > >> interesting thing that I notice that even at very light load from
>> > >> IXIA the processing cycles on all the PMD threads run close to
>> 100%
>> > >> of the cpu cycle on 2.9 OvS, but on OvS 2.7 even under full load
>> the
>> > >> processing cycles remain at 75% of the cpu cycles.
>> > >>
>> > >> Attaching the FlameGraphs of both the versions, the only thing
>> that
>> > >> pops out to me is the new way invoking netdev_send() is on 2.9 is
>> > >> being invoked via  dp_netdev_pmd_flush_output_packets() which
>> seems
>> > >> to be adding another 

[ovs-dev] [ANNOUNCE] SkyNet (aka the 0-day Robot)

2018-06-19 Thread Aaron Conole
Greetings list,

I've set up a robot that pulls series information from patchwork, and
runs a jenkins task that does a few basic sanity checks on the patch,
and emails the mailing list if it detects things are incorrect.

The robot will send from ro...@bytheb.org for now (unless someone wants
to donate a better alias).  That means you can setup a spam filter if
you really want to ignore it.

On the other hand, all of the code used to run the robot is available
at:
  https://github.com/orgcandman/pw-ci/

That includes a basic patchwork monitoring framework, and ci backend
submission tool.  Additionally, the example jenkins job I'm using.

Patches / flames welcome :)

-Aaron
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] Automated robotic reply. Re: ...

2018-06-19 Thread Ben Pfaff
On Tue, Jun 19, 2018 at 05:20:08PM -0400, 0-day Robot wrote:
> Bleep bloop.  Greetings Vishal Deep Ajmera, I am a robot and I have tried out 
> your patch
> with message ID  
> <1529202111-20855-1-git-send-email-vishal.deep.ajm...@ericsson.com>
> Thanks for your contribution.

Watch out.  It's a short step from "I have tried out your patch" to
"Kill all humans!"
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] Automated robotic reply. Re: ...

2018-06-19 Thread Aaron Conole
0-day Robot  writes:

> Bleep bloop.  Greetings Vishal Deep Ajmera, I am a robot and I have tried out 
> your patch
> with message ID  
> <1529202111-20855-1-git-send-email-vishal.deep.ajm...@ericsson.com>
> Thanks for your contribution.
>
> I encountered some error that I wasn't expecting.  See the details below.
>
>
> checkpatch:
> == Checking ".patch" ==
> WARNING: Line is 84 characters long (recommended limit is 79)
> #159 FILE: lib/dpif-netdev.c:5222:
> /* Flow batching should be performed only after fast-path 
> processing
>
> WARNING: Line is 93 characters long (recommended limit is 79)
> #253 FILE: lib/dpif-netdev.c:5485:
> dp_netdev_queue_batches(map->packet, map->flow, map->tcp_flags, 
> batches, _batches);
>
> Lines checked: 265, Warnings: 2, Errors: 0
>
>
> Please check this out.  If you feel there has been an error, please email me 
> back.

Oops.  I need to update the template.  Please don't reply to the bot - all
messages for it go to the bit bucket.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Automated robotic reply. Re: ...

2018-06-19 Thread 0-day Robot
Bleep bloop.  Greetings Vishal Deep Ajmera, I am a robot and I have tried out 
your patch
with message ID  
<1529202111-20855-1-git-send-email-vishal.deep.ajm...@ericsson.com>
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
== Checking ".patch" ==
WARNING: Line is 84 characters long (recommended limit is 79)
#159 FILE: lib/dpif-netdev.c:5222:
/* Flow batching should be performed only after fast-path 
processing

WARNING: Line is 93 characters long (recommended limit is 79)
#253 FILE: lib/dpif-netdev.c:5485:
dp_netdev_queue_batches(map->packet, map->flow, map->tcp_flags, 
batches, _batches);

Lines checked: 265, Warnings: 2, Errors: 0


Please check this out.  If you feel there has been an error, please email me 
back.

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] datapath: Fix compiler warning for HAVE_RHEL7_MAX_MTU.

2018-06-19 Thread Justin Pettit
Signed-off-by: Justin Pettit 
---
 datapath/vport-internal_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c
index 3fa86815c7fa..629965eab19f 100644
--- a/datapath/vport-internal_dev.c
+++ b/datapath/vport-internal_dev.c
@@ -169,7 +169,7 @@ static void do_setup(struct net_device *netdev)
 
 #ifdef HAVE_NET_DEVICE_WITH_MAX_MTU
netdev->max_mtu = ETH_MAX_MTU;
-#elif HAVE_RHEL7_MAX_MTU
+#elif defined(HAVE_RHEL7_MAX_MTU)
netdev->extended->max_mtu = ETH_MAX_MTU;
 #endif
netdev->netdev_ops = _dev_netdev_ops;
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] python: add IDL examples

2018-06-19 Thread Ben Pfaff
On Tue, Jun 19, 2018 at 11:51:00AM -0700, Cpp Code wrote:
> Do we need licensing information in these sample files?
> 
> On Mon, Jun 18, 2018 at 8:53 PM, Ben Pfaff  wrote:
> 
> > On Sun, Jun 17, 2018 at 04:24:34AM -0700, Toms Atteka wrote:
> > > created sample python scripts, which helps to learn how to use OVSDB
> > library
> > >
> > > Signed-off-by: Toms Atteka 
> >
> > Thanks for working on making it easier to understand how to use Open
> > vSwitch.
> >
> > This doesn't build, because:
> >
> > The following files are in git but not the distribution:
> > python/howto/IDL/delete_bridges.py
> > python/howto/IDL/insert_bridge.py
> > python/howto/IDL/ovs_monitor.py
> > Makefile:6214: recipe for target 'dist-hook-git' failed
> >
> > I think it would be good to ensure that flake8 runs on the new files
> > too.
> >
> > Do you have an idea how these new files can be discoverable to users?
> 
> 
> So far I am not really sure which would be the best way to do that.
> 
> 
> > I guess you decided that they should not be in the Documentation
> > directory.
> 
> 
> From current structure documentation folder feels more like textual
> information.
> 
> 
> > Maybe there should at least be a pointer to them from the
> > documentation.
> 
> 
> I could add reference inside integration.rst below: "More information on
> the python bindings is available at ``python/ovs/db/idl.py``."

That could be helpful.

Maybe it would be worth writing a brief document about the Python IDL
for the Documentation directory?  It could point to the examples and
explain a little bit about them.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVS (master) + DPDK(17.11) + multi-queue

2018-06-19 Thread Ravi Kerur
Hi,


On Tue, Jun 19, 2018 at 12:27 AM Ilya Maximets 
wrote:

> Hi,
> According to your log, your NIC has limited size of tx queues:
>
>   2018-06-19T04:34:46.106Z|00089|dpdk|ERR|PMD: Unsupported size of TX queue
>(max size: 1024)
>
> This means that you have to configure 'n_txq_desc' <= 1024 in order to
> configure your NIC properly. OVS uses 2048 by default and this is causes
> device configuration failure.
>
> Try this:
>
> ovs-vsctl set interface eth1 options:n_txq_desc=1024
>
> It also likely that you will have to configure the same value for
> 'n_rxq_desc'.
>
>
Thank you. It was indeed no. queue descriptors issue. Modifying the config
to 1024 fixed it.

I am using 'pdump/pcap' features in dpdk for packet capture with OVS-DPDK
and currently it doesn't seem to work. I used following link

https://software.intel.com/en-us/articles/dpdk-pdump-in-open-vswitch-with-dpdk

OVS is linked to DPDK 17.11.2 which has pdump library built. OVS has pdump
server socket file created

ls -l /usr/local/var/run/openvswitch/
total 8
...
srwxr-x--- 1 root root 0 Jun 19 19:26 pdump_server_socket
srwxr-x--- 1 root root 0 Jun 19 19:26 vhost-user-1
srwxr-x--- 1 root root 0 Jun 19 19:26 vhost-user-2

./x86_64-native-linuxapp-gcc/build/app/pdump/dpdk-pdump -- --pdump
port=1,queue=*,rx-dev=/tmp/pkts.pcap
--server-socket-path=/usr/local/var/run/openvswitch
EAL: Detected 8 lcore(s)
PANIC in rte_eal_config_attach():
*Cannot open '/var/run/.rte_config' for rte_mem_config*
6: [./x86_64-native-linuxapp-gcc/build/app/pdump/dpdk-pdump(_start+0x29)
[0x4472e9]]
5: [/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)
[0x7ff0ae7ba830]]
4: [./x86_64-native-linuxapp-gcc/build/app/pdump/dpdk-pdump(main+0x155)
[0x44aa76]]
3:
[./x86_64-native-linuxapp-gcc/build/app/pdump/dpdk-pdump(rte_eal_init+0xc7d)
[0x49ef0d]]
2:
[./x86_64-native-linuxapp-gcc/build/app/pdump/dpdk-pdump(__rte_panic+0xc3)
[0x43ebb3]]
1:
[./x86_64-native-linuxapp-gcc/build/app/pdump/dpdk-pdump(rte_dump_stack+0x2b)
[0x4a573b]]
Aborted

Anything I am missing?

Thanks.


> Note that OVS manages TX queues itself and it will try to allocate
> separate TX queue for each PMD thread + 1 for non-PMD threads. So,
> it's kind of impossible to force it to configure only one TX queue
> in case HW supports multiqueue.
>
> > Hi,
> >
> > I am using above configuration on my testbed and when I try to add a port
> > which is bound to igb_uio, I see following errors due to Tx queue
> > configuration. I just want to use single Tx and Rx queue for my testing.
> I
> > looked at Documentation/intro/install/dpdk.rst, i see only "DPDK Physical
> > Port Rx Queues" and nothing for "Tx Queues". Kindly let me know how can I
> > use single tx/rx queues and if I have to use multiple Tx queues what
> > configuration changes I need to do?
> >
> > ovs logs==
> > 2018-06-19T04:33:23.720Z|00075|bridge|INFO|ovs-vswitchd (Open vSwitch)
> > 2.9.90
> > 2018-06-19T04:33:32.735Z|00076|memory|INFO|127688 kB peak resident set
> size
> > after 10.1 seconds
> > 2018-06-19T04:33:32.735Z|00077|memory|INFO|handlers:5 ports:1
> > revalidators:3 rules:5
> > 2018-06-19T04:33:40.857Z|00078|rconn|INFO|br0<->unix#0: connected
> > 2018-06-19T04:33:40.858Z|00079|rconn|INFO|br0<->unix#1: connected
> > 2018-06-19T04:33:40.859Z|00080|rconn|INFO|br0<->unix#2: connected
> > 2018-06-19T04:34:46.094Z|00081|dpdk|INFO|EAL: PCI device :00:06.0 on
> > NUMA socket 0
> > 2018-06-19T04:34:46.094Z|00082|dpdk|INFO|EAL:   probe driver: 1d0f:ec20
> > net_ena
> > 2018-06-19T04:34:46.095Z|00083|dpdk|INFO|PMD: eth_ena_dev_init():
> > Initializing 0:0:6.0
> > 2018-06-19T04:34:46.095Z|00084|netdev_dpdk|INFO|Device ':00:06.0'
> > attached to DPDK
> > 2018-06-19T04:34:46.099Z|00085|dpif_netdev|INFO|PMD thread on numa_id: 0,
> > core id:  0 created.
> > 2018-06-19T04:34:46.099Z|00086|dpif_netdev|INFO|There are 1 pmd threads
> on
> > numa node 0
> > 2018-06-19T04:34:46.105Z|00087|netdev_dpdk|WARN|Rx checksum offload is
> not
> > supported on port 0
> > 2018-06-19T04:34:46.105Z|00088|dpdk|INFO|PMD: Set MTU: 1500
> > 2018-06-19T04:34:46.106Z|00089|dpdk|ERR|PMD: Unsupported size of TX queue
> > (max size: 1024)
> > 2018-06-19T04:34:46.106Z|00090|netdev_dpdk|INFO|Interface eth1 unable to
> > setup txq(0): Invalid argument
> > 2018-06-19T04:34:46.106Z|00091|netdev_dpdk|ERR|Interface eth1(rxq:1 txq:2
> > lsc interrupt mode:false) configure error: Invalid argument
> > 2018-06-19T04:34:46.106Z|00092|dpif_netdev|ERR|Failed to set interface
> eth1
> > new configuration
> > 2018-06-19T04:34:46.106Z|00093|bridge|WARN|could not add network device
> > eth1 to ofproto (No such device)
> >
> >
> > ethtool -l eth1
> > Channel parameters for eth1:
> > Pre-set maximums:
> > RX:128
> > TX:128
> > Other:0
> > Combined:0
> > Current hardware settings:
> > RX:8
> > TX:8
> > Other:0
> > Combined:0
> >
> > ovs-vsctl get  Open_vSwitch . 

Re: [ovs-dev] [PATCH] python: add IDL examples

2018-06-19 Thread Cpp Code
Do we need licensing information in these sample files?

On Mon, Jun 18, 2018 at 8:53 PM, Ben Pfaff  wrote:

> On Sun, Jun 17, 2018 at 04:24:34AM -0700, Toms Atteka wrote:
> > created sample python scripts, which helps to learn how to use OVSDB
> library
> >
> > Signed-off-by: Toms Atteka 
>
> Thanks for working on making it easier to understand how to use Open
> vSwitch.
>
> This doesn't build, because:
>
> The following files are in git but not the distribution:
> python/howto/IDL/delete_bridges.py
> python/howto/IDL/insert_bridge.py
> python/howto/IDL/ovs_monitor.py
> Makefile:6214: recipe for target 'dist-hook-git' failed
>
> I think it would be good to ensure that flake8 runs on the new files
> too.
>
> Do you have an idea how these new files can be discoverable to users?


So far I am not really sure which would be the best way to do that.


> I guess you decided that they should not be in the Documentation
> directory.


>From current structure documentation folder feels more like textual
information.


> Maybe there should at least be a pointer to them from the
> documentation.


I could add reference inside integration.rst below: "More information on
the python bindings is available at ``python/ovs/db/idl.py``."


> Maybe there should be a pointer to them from the IDL
> sources.
>
> Thanks,
>
> Ben.
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Work With Me

2018-06-19 Thread info
Hello There,
Those who say it can not be done, should not interrupt those doing it. Am sorry 
for encroaching your privacy, I have a mammoth business proposal which you 
might be interested. Kindly reply to sarah.orengo...@gmail.com for more info...
Sincerely,
Sarah Orengo
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 0/3] Optimize conntrack performance

2018-06-19 Thread Anand Kumar
Hi Alin,

Thanks for running the code analysis on the patch series.
As discussed in the Hyper-V meeting, I have addressed them and send out a v4 of 
my patch series.

Thanks,
Anand Kumar

On 6/19/18, 9:14 AM, "Alin Serdean"  wrote:

Thanks a lot for the series and the benchmarks .

This is not an actual review. I applied the patches and ran the code 
analysis
and I get the following:
ovs\datapath-windows\ovsext\conntrack-nat.c(151): warning C28167: The 
function
'OvsNatCleanup' changes the IRQL and does not restore the IRQL before it 
exits.
It should be annotated to reflect the change or the IRQL should be restored.
IRQL was last set at line 162.

ovs\datapath-windows\ovsext\conntrack-related.c(147): warning C28167:
The function 'OvsCtRelatedFlush' changes the IRQL and does not restore the 
IRQL
before it exits. It should be annotated to reflect the
change or the IRQL should be restored. IRQL was last set at line 163. 

ovs\datapath-windows\ovsext\conntrack-related.c(163): warning C6001:
Using uninitialized memory 'lockState'. 

ovs\datapath-windows\ovsext\conntrack-related.c(163): warning C26110: Caller
failing to hold lock 'ovsCtRelatedLockObj' before calling function
'NdisReleaseRWLock'.

ovs\datapath-windows\ovsext\conntrack-related.c(210): warning C28122: The
function 'NdisReleaseRWLock' is not permitted to be called at a low IRQ 
level.
Prior function calls are inconsistent with this constraint: 
It may be that the error is actually in some prior call that limited the 
range.
Maximum legal IRQL was last set to 1 at line 211. 

ovs\datapath-windows\ovsext\conntrack-related.c(176): warning C28166: The
function 'OvsCtRelatedEntryCleaner' does not restore the IRQL to the value 
that
was current at function entry and is required to do so.
IRQL was last set at line 210. 

ovs\datapath-windows\ovsext\conntrack-related.c(210): warning C6001: Using
uninitialized memory 'lockState'. 

ovs\datapath-windows\ovsext\conntrack-related.c(210): warning C26110: Caller
failing to hold lock 'ovsCtRelatedLockObj' before calling function
'NdisReleaseRWLock'.

Can you please add code annotations where needed?

Thanks,
Alin.

> -Mesaj original-
> De la: ovs-dev-boun...@openvswitch.org  boun...@openvswitch.org> În numele Anand Kumar
> Trimis: Tuesday, June 19, 2018 3:56 AM
> Către: d...@openvswitch.org
> Subiect: [ovs-dev] [PATCH v2 0/3] Optimize conntrack performance
> 
> This patch series is primarily to refactor conntrack code for better 
throughput
> with conntrack.
> 
> With this patch series TCP throughput with conntrack increased by ~50%.
> 
> Anand Kumar (3):
>   datapath-windows: Use spinlock instead of RW lock for ct entry
>   datapath-windows: Implement locking in conntrack NAT.
>   datapath-windows: Compute ct hash based on 5-tuple and zone
> 
>  datapath-windows/ovsext/Conntrack-ftp.c |   4 +-
>  datapath-windows/ovsext/Conntrack-nat.c |  34 +-
>  datapath-windows/ovsext/Conntrack-related.c |  17 +-
>  datapath-windows/ovsext/Conntrack-tcp.c |  15 +-
>  datapath-windows/ovsext/Conntrack.c | 469 
+
> ---
>  datapath-windows/ovsext/Conntrack.h |  40 ++-
>  datapath-windows/ovsext/Util.h  |  18 ++
>  7 files changed, 308 insertions(+), 289 deletions(-)
> 
> --
> 2.9.3.windows.1
> 
> ___
> dev mailing list
> d...@openvswitch.org
> 
https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev=02%7C01%7Ckumaranand%40vmware.com%7C640738ce7d194db0ec1f08d5d5ffce4a%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C1%7C0%7C636650217048180105=afwy4KiFTylRjqBu1K7Qg8cbQdmSfIiXB3%2FrNdkviF0%3D=0


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 3/3] datapath-windows: Compute ct hash based on 5-tuple and zone

2018-06-19 Thread Anand Kumar
Conntrack 5-tuple consists of src address, dst address, src port,
dst port and protocol which will be unique to a ct session.
Use this information along with zone to compute hash.

Also re-factor conntrack code related to parsing netlink attributes.

Testing:
Verified loading/unloading the driver with driver verified enabled.
Ran TCP/UDP and ICMP traffic.

Signed-off-by: Anand Kumar 
---
v1->v2: Updated commit message to include testing done.
v2->v3: No change
v3->v4: No change
---
 datapath-windows/ovsext/Conntrack.c | 228 ++--
 datapath-windows/ovsext/Conntrack.h |   2 -
 2 files changed, 116 insertions(+), 114 deletions(-)

diff --git a/datapath-windows/ovsext/Conntrack.c 
b/datapath-windows/ovsext/Conntrack.c
index 8fa1e07..dd16602 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -151,6 +151,24 @@ OvsCleanupConntrack(VOID)
 OvsNatCleanup();
 }
 
+/*
+ *
+ * OvsCtHashKey
+ * Compute hash using 5-tuple and zone.
+ *
+ */
+UINT32
+OvsCtHashKey(const OVS_CT_KEY *key)
+{
+UINT32 hsrc, hdst, hash;
+hsrc = key->src.addr.ipv4 | ntohl(key->src.port);
+hdst = key->dst.addr.ipv4 | ntohl(key->dst.port);
+hash = hsrc ^ hdst; /* TO identify reverse traffic */
+hash = hash | (key->zone + key->nw_proto);
+hash = OvsJhashWords((uint32_t*) , 1, hash);
+return hash;
+}
+
 static __inline VOID
 OvsCtKeyReverse(OVS_CT_KEY *key)
 {
@@ -231,7 +249,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry,
 if (!OvsNatTranslateCtEntry(entry)) {
 return FALSE;
 }
-ctx->hash = OvsHashCtKey(>key);
+ctx->hash = OvsCtHashKey(>key);
 } else {
 entry->natInfo.natAction = natInfo->natAction;
 }
@@ -531,20 +549,6 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
 return found;
 }
 
-UINT32
-OvsHashCtKey(const OVS_CT_KEY *key)
-{
-UINT32 hsrc, hdst, hash;
-hsrc = OvsJhashBytes((UINT32*) >src, sizeof(key->src), 0);
-hdst = OvsJhashBytes((UINT32*) >dst, sizeof(key->dst), 0);
-hash = hsrc ^ hdst; /* TO identify reverse traffic */
-hash = OvsJhashBytes((uint32_t *) >dst + 1,
- ((uint32_t *) (key + 1) -
- (uint32_t *) (>dst + 1)),
- hash);
-return hash;
-}
-
 static UINT8
 OvsReverseIcmpType(UINT8 type)
 {
@@ -642,7 +646,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
 OvsCtKeyReverse(>key);
 }
 
-ctx->hash = OvsHashCtKey(>key);
+ctx->hash = OvsCtHashKey(>key);
 return NDIS_STATUS_SUCCESS;
 }
 
@@ -953,7 +957,6 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
   OvsFlowKey *key,
   const PNL_ATTR a)
 {
-PNL_ATTR ctAttr;
 BOOLEAN commit = FALSE;
 BOOLEAN force = FALSE;
 BOOLEAN postUpdateEvent = FALSE;
@@ -973,109 +976,110 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
 return status;
 }
 
-/* XXX Convert this to NL_ATTR_FOR_EACH */
-ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_ZONE);
-if (ctAttr) {
-zone = NlAttrGetU16(ctAttr);
-}
-ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_COMMIT);
-if (ctAttr) {
-commit = TRUE;
-}
-ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_MARK);
-if (ctAttr) {
-mark = NlAttrGet(ctAttr);
-}
-ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_LABELS);
-if (ctAttr) {
-labels = NlAttrGet(ctAttr);
-}
-natActionInfo.natAction = NAT_ACTION_NONE;
-ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_NAT);
-if (ctAttr) {
-/* Pares Nested NAT attributes. */
-PNL_ATTR natAttr;
-unsigned int left;
-BOOLEAN hasMinIp = FALSE;
-BOOLEAN hasMinPort = FALSE;
-BOOLEAN hasMaxIp = FALSE;
-BOOLEAN hasMaxPort = FALSE;
-NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) {
-enum ovs_nat_attr subtype = NlAttrType(natAttr);
-switch(subtype) {
-case OVS_NAT_ATTR_SRC:
-case OVS_NAT_ATTR_DST:
-natActionInfo.natAction |=
-((subtype == OVS_NAT_ATTR_SRC)
-? NAT_ACTION_SRC : NAT_ACTION_DST);
+PNL_ATTR ctAttr = NULL;
+INT left;
+
+NL_NESTED_FOR_EACH (ctAttr, left, a) {
+switch(NlAttrType(ctAttr)) {
+case OVS_CT_ATTR_ZONE:
+zone = NlAttrGetU16(ctAttr);
+break;
+case OVS_CT_ATTR_COMMIT:
+commit = TRUE;
+break;
+case OVS_CT_ATTR_MARK:
+mark = NlAttrGet(ctAttr);
 break;
-case OVS_NAT_ATTR_IP_MIN:
-memcpy(,
-   NlAttrData(natAttr), NlAttrGetSize(natAttr));
-hasMinIp = 

[ovs-dev] [PATCH v4 2/3] datapath-windows: Implement locking in conntrack NAT.

2018-06-19 Thread Anand Kumar
This patch primarily replaces existing ndis RWlock based implementaion
for NAT in conntrack with a spinlock based implementation inside NAT,
module along with some conntrack optimization.

- The 'ovsNatTable' and 'ovsUnNatTable' tables are shared
  between cleanup threads and packet processing thread.
  In order to protect these two tables use a spinlock.
  Also introduce counters to track number of nat entries.
- Introduce a new function OvsGetTcpHeader() to retrieve TCP header
  and payload length, to optimize for TCP traffic.
- Optimize conntrack look up.
- Remove 'bucketlockRef' member from conntrack entry structure.

Testing:
Verified loading/unloading the driver with driver verified enabled.
Ran TCP/UDP and ICMP traffic.

Signed-off-by: Anand Kumar 
---
v1->v2: Merge patch 2 and 3 so that NAT locks related changes are in a
single patch.
v2->v3: No change
v3->v4: No change
---
 datapath-windows/ovsext/Conntrack-ftp.c |   4 +-
 datapath-windows/ovsext/Conntrack-nat.c |  27 +++-
 datapath-windows/ovsext/Conntrack-tcp.c |  15 ++---
 datapath-windows/ovsext/Conntrack.c | 110 +---
 datapath-windows/ovsext/Conntrack.h |  36 +++
 5 files changed, 100 insertions(+), 92 deletions(-)

diff --git a/datapath-windows/ovsext/Conntrack-ftp.c 
b/datapath-windows/ovsext/Conntrack-ftp.c
index 6830dfa..ce09a65 100644
--- a/datapath-windows/ovsext/Conntrack-ftp.c
+++ b/datapath-windows/ovsext/Conntrack-ftp.c
@@ -129,14 +129,14 @@ OvsCtHandleFtp(PNET_BUFFER_LIST curNbl,
 char temp[256] = { 0 };
 char ftpMsg[256] = { 0 };
 
+UINT32 len;
 TCPHdr tcpStorage;
 const TCPHdr *tcp;
-tcp = OvsGetTcp(curNbl, layers->l4Offset, );
+tcp = OvsGetTcpHeader(curNbl, layers, , );
 if (!tcp) {
 return NDIS_STATUS_INVALID_PACKET;
 }
 
-UINT32 len = OvsGetTcpPayloadLength(curNbl);
 if (len > sizeof(temp)) {
 /* We only care up to 256 */
 len = sizeof(temp);
diff --git a/datapath-windows/ovsext/Conntrack-nat.c 
b/datapath-windows/ovsext/Conntrack-nat.c
index da1814f..11057e6 100644
--- a/datapath-windows/ovsext/Conntrack-nat.c
+++ b/datapath-windows/ovsext/Conntrack-nat.c
@@ -3,7 +3,8 @@
 
 PLIST_ENTRY ovsNatTable = NULL;
 PLIST_ENTRY ovsUnNatTable = NULL;
-
+static NDIS_SPIN_LOCK ovsCtNatLock;
+static ULONG ovsNatEntries;
 /*
  *---
  * OvsHashNatKey
@@ -109,6 +110,8 @@ NTSTATUS OvsNatInit()
 InitializeListHead([i]);
 }
 
+NdisAllocateSpinLock();
+ovsNatEntries = 0;
 return STATUS_SUCCESS;
 }
 
@@ -121,6 +124,11 @@ NTSTATUS OvsNatInit()
 VOID OvsNatFlush(UINT16 zone)
 {
 PLIST_ENTRY link, next;
+if (!ovsNatEntries) {
+return;
+}
+
+NdisAcquireSpinLock();
 for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
 LIST_FORALL_SAFE([i], link, next) {
 POVS_NAT_ENTRY entry =
@@ -131,6 +139,7 @@ VOID OvsNatFlush(UINT16 zone)
 }
 }
 }
+NdisReleaseSpinLock();
 }
 
 /*
@@ -144,10 +153,14 @@ VOID OvsNatCleanup()
 if (ovsNatTable == NULL) {
return;
 }
+
+NdisAcquireSpinLock();
 OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
 OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG);
 ovsNatTable = NULL;
 ovsUnNatTable = NULL;
+NdisReleaseSpinLock();
+NdisFreeSpinLock();
 }
 
 /*
@@ -250,10 +263,13 @@ static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, 
UINT32 basis)
 VOID
 OvsNatAddEntry(OVS_NAT_ENTRY* entry)
 {
+NdisAcquireSpinLock();
 InsertHeadList(OvsNatGetBucket(>key, FALSE),
>link);
 InsertHeadList(OvsNatGetBucket(>value, TRUE),
>reverseLink);
+NdisReleaseSpinLock();
+NdisInterlockedIncrement((PLONG));
 }
 
 /*
@@ -399,21 +415,29 @@ OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse)
 PLIST_ENTRY link;
 POVS_NAT_ENTRY entry;
 
+if (!ovsNatEntries) {
+return NULL;
+}
+
+NdisAcquireSpinLock();
 LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) {
 if (reverse) {
 entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink);
 
 if (OvsNatKeyAreSame(ctKey, >value)) {
+NdisReleaseSpinLock();
 return entry;
 }
 } else {
 entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
 
 if (OvsNatKeyAreSame(ctKey, >key)) {
+NdisReleaseSpinLock();
 return entry;
 }
 }
 }
+NdisReleaseSpinLock();
 return NULL;
 }
 
@@ -432,6 +456,7 @@ OvsNatDeleteEntry(POVS_NAT_ENTRY entry)
 RemoveEntryList(>link);
 RemoveEntryList(>reverseLink);
 OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
+NdisInterlockedDecrement((PLONG));
 }
 
 /*
diff --git a/datapath-windows/ovsext/Conntrack-tcp.c 
b/datapath-windows/ovsext/Conntrack-tcp.c
index 8cbab24..eda42ac 100644
--- 

[ovs-dev] [PATCH v4 1/3] datapath-windows: Use spinlock instead of RW lock for ct entry

2018-06-19 Thread Anand Kumar
This patch mainly changes a ndis RW lock for conntrack entry to a
spinlock along with some minor refactor in conntrack. Using
spinlock instead of RW lock as RW locks causes performance hits
when acquired/released multiple times.

- Use NdisInterlockedXX wrapper api's instead of InterlockedXX.
- Update 'ctTotalRelatedEntries' using interlocked functions.
- Move conntrack lock out of NAT module.

Testing:
Verified loading/unloading the driver with driver verified enabled.
Ran TCP/UDP and ICMP traffic.

Signed-off-by: Anand Kumar 
---
v1->v2: Calculate the dispatch level only in cases where the locks are being 
acquired
multiple times within a given context and minor style change.
v2->v3: Fix kernel crash while executing cleanup thread in conntrack-related
v3->v4: Fix a bug found through code analysis
---
 datapath-windows/ovsext/Conntrack-nat.c |   7 +-
 datapath-windows/ovsext/Conntrack-related.c |  21 ++---
 datapath-windows/ovsext/Conntrack.c | 135 ++--
 datapath-windows/ovsext/Conntrack.h |   2 +-
 datapath-windows/ovsext/Util.h  |  18 
 5 files changed, 96 insertions(+), 87 deletions(-)

diff --git a/datapath-windows/ovsext/Conntrack-nat.c 
b/datapath-windows/ovsext/Conntrack-nat.c
index 316c946..da1814f 100644
--- a/datapath-windows/ovsext/Conntrack-nat.c
+++ b/datapath-windows/ovsext/Conntrack-nat.c
@@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
 {
 UINT32 natFlag;
 const struct ct_endpoint* endpoint;
-LOCK_STATE_EX lockState;
-/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */
-NdisAcquireRWLockRead(entry->lock, , 0);
+
 /* When it is NAT, only entry->rev_key contains NATTED address;
When it is unNAT, only entry->key contains the UNNATTED address;*/
 const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key;
 BOOLEAN isSrcNat;
 
 if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {
-NdisReleaseRWLock(entry->lock, );
 return;
 }
 isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) ||
@@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
 }
 } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){
 // XXX: IPv6 packet not supported yet.
-NdisReleaseRWLock(entry->lock, );
 return;
 }
 if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {
@@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
 }
 }
 }
-NdisReleaseRWLock(entry->lock, );
 }
 
 
diff --git a/datapath-windows/ovsext/Conntrack-related.c 
b/datapath-windows/ovsext/Conntrack-related.c
index ec4b536..950be98 100644
--- a/datapath-windows/ovsext/Conntrack-related.c
+++ b/datapath-windows/ovsext/Conntrack-related.c
@@ -18,7 +18,7 @@
 #include "Jhash.h"
 
 static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */
-static UINT64 ctTotalRelatedEntries;
+static ULONG ctTotalRelatedEntries;
 static OVS_CT_THREAD_CTX ctRelThreadCtx;
 static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj;
 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
@@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime)
 POVS_CT_REL_ENTRY entry;
 LOCK_STATE_EX lockState;
 
-NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0);
-
 if (!ctTotalRelatedEntries) {
-NdisReleaseRWLock(ovsCtRelatedLockObj, );
 return NULL;
 }
 
+NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0);
 for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
 /* XXX - Scan the table based on the hash instead */
 LIST_FORALL_SAFE([i], link, next) {
@@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry)
 {
 RemoveEntryList(>link);
 OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
-ctTotalRelatedEntries--;
+NdisInterlockedDecrement((PLONG));
 }
 
 NDIS_STATUS
@@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto,
 NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0);
 InsertHeadList([hash & CT_HASH_TABLE_MASK],
>link);
-ctTotalRelatedEntries++;
+NdisInterlockedIncrement((PLONG));
 NdisReleaseRWLock(ovsCtRelatedLockObj, );
 
 return NDIS_STATUS_SUCCESS;
@@ -150,20 +148,19 @@ OvsCtRelatedFlush()
 {
 PLIST_ENTRY link, next;
 POVS_CT_REL_ENTRY entry;
-
 LOCK_STATE_EX lockState;
-NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0);
 
 if (ctTotalRelatedEntries) {
+NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0);
 for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
 LIST_FORALL_SAFE([i], link, next) {
 entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link);
 OvsCtRelatedEntryDelete(entry);
 }
 }
+NdisReleaseRWLock(ovsCtRelatedLockObj, );
 }
 
-NdisReleaseRWLock(ovsCtRelatedLockObj, );
 return NDIS_STATUS_SUCCESS;
 }
 
@@ -189,9 +186,8 @@ OvsCtRelatedEntryCleaner(PVOID data)
 /* Lock has been 

[ovs-dev] [PATCH v4 0/3] Optimize conntrack performance

2018-06-19 Thread Anand Kumar
This patch series is primarily to refactor conntrack code for
better throughput with conntrack.

With this patch series TCP throughput with conntrack increased
by ~50%.

Anand Kumar (3):
  datapath-windows: Use spinlock instead of RW lock for ct entry
  datapath-windows: Implement locking in conntrack NAT.
  datapath-windows: Compute ct hash based on 5-tuple and zone

 datapath-windows/ovsext/Conntrack-ftp.c |   4 +-
 datapath-windows/ovsext/Conntrack-nat.c |  34 +-
 datapath-windows/ovsext/Conntrack-related.c |  21 +-
 datapath-windows/ovsext/Conntrack-tcp.c |  15 +-
 datapath-windows/ovsext/Conntrack.c | 469 +---
 datapath-windows/ovsext/Conntrack.h |  40 ++-
 datapath-windows/ovsext/Util.h  |  18 ++
 7 files changed, 310 insertions(+), 291 deletions(-)

-- 
2.9.3.windows.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] Open vSwitch Debian package maintainership

2018-06-19 Thread Ben Pfaff
On Mon, Jun 18, 2018 at 11:31:03PM +0200, Thomas Goirand wrote:
> On 06/18/2018 09:56 PM, Ben Pfaff wrote:
> > Hi Thomas.  Thanks for the latest upload of the Open vSwitch packages to
> > Debian.
> > 
> > I'm no longer interested in being a maintainer for the downstream Debian
> > packages of Open vSwitch.  I just don't have sufficient time to do it
> > properly (and honestly have not had that time for at least a few years).
> > The next time you make an upload, would you mind removing me from the
> > Maintainers field?
> > 
> > Thanks,
> > 
> > Ben.
> 
> Ben,
> 
> Reading the above is very frustrating. Yes, I know how to do packages,
> but I really need help from upstream. In many ways, I do need help from
> upstream, and especially from someone knowing packaging. I very much
> would prefer if you could reconsider your above decision. Even if you
> don't do the bulk of the work, I will need help when there's FTBFS and such.
> 
> For example, currently, OpenVSwitch can build because I have patched the
> unit test suite to remove lots of tests. I would very much prefer if I
> had real fixes, rather than disabling them.
> 
> There's also areas that I don't really understand (yet?). For example, I
> have yet understood what DPDK is, and how I should package it. The only
> way I could do it would be cheating by reading what has been done in
> Ubuntu. That's not the way I want to continue doing the work. I don't
> really get what is the difference between OVS and OVN as well.
> 
> So, definitively, please continue to help!

I didn't intend to increase your frustration, only to acknowledge that I
haven't been helping in the primary way that a Debian developer should
for packages that he/she/they maintains (by updating packaging and
building and uploading it) and that this probably won't change.

Everything you mention is the sort of thing that I (and others on the
ovs-dev mailing list) can actually help with, though.  Maybe you have
brought up some of these before, although I don't recall it.  Sometimes
the list is busy, or the people who read the list are busy, and
questions don't get answered, and in that case it's best to ask again a
while later.

I can help to address some of these issues now.

Debian-wise, the unit tests are a bit of an exercise in frustration
themselves.  Some of the OVS unit tests are timing sensitive.  We have
spent quite a bit of work on that, but it's difficult to be perfect (and
we have a lot of tests!)  There are two things that distinguish the
tests running on Debian buildds.  First, Debian has far more
architectures than any other distro, which means that really odd
failures show up.  This is usually an advantage because we find a wider
range of bugs, but occasionally something comes up that is very
difficult to debug.  Second, the buildds really seem to bring out the
worst in the tests somehow.  I've sometimes tried to reproduce the
timing-related failures anywhere else (on my own machine, on Debian
machines, ...) and they just won't.  That makes it really hard to deal
with them.

About DPDK, well, DPDK is a library for userspace networking.  It takes
control of network interfaces away from the kernel, using its own
network drivers, and processes packets in userspace.  It's generally
much faster than kernel networking because it's more specialized (less
general-purpose) and because of a number of clever tricks it does.  OVS
can take advantage of DPDK if it is configured to do so.  In my opinion,
the ideal way to add DPDK support to OVS would be to just link it into
the main binary, since OVS is designed to use it if it is configured to
do so or not use it otherwise and therefore OVS with DPDK is strictly
more useful than OVS without it.  However, last I checked the DPDK
library itself has some nastiness like doing CPU checks at startup time
and aborting if the CPU isn't suitable in some way (it's pickier than
Debian is about CPU features), so I guess that having it in a separate
package is probably the realistic approach.  (The separate package
wouldn't need to be anything other than an alternate ovs-vswitchd
binary, perhaps substituted for the non-DPDK version using dpkg-divert
etc.)

OVS versus OVN: Probably you know what OVS is already.  Where OVS is a
programmable switch, OVN is an application that runs on top of it.  OVN
is an OpenFlow controller plus a centralized database that allows a
cloud management system like OpenStack or Kubernetes (with appropriate
plugins) to control the networking in a collection of hypervisors.
We're talking about breaking OVN apart from OVS as a separate project;
OVS has been a great place to incubate the OVN project, but the two are
logically distinct.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 0/3] Optimize conntrack performance

2018-06-19 Thread Alin Serdean
Thanks a lot for the series and the benchmarks .

This is not an actual review. I applied the patches and ran the code analysis
and I get the following:
ovs\datapath-windows\ovsext\conntrack-nat.c(151): warning C28167: The function
'OvsNatCleanup' changes the IRQL and does not restore the IRQL before it exits.
It should be annotated to reflect the change or the IRQL should be restored.
IRQL was last set at line 162.

ovs\datapath-windows\ovsext\conntrack-related.c(147): warning C28167:
The function 'OvsCtRelatedFlush' changes the IRQL and does not restore the IRQL
before it exits. It should be annotated to reflect the
change or the IRQL should be restored. IRQL was last set at line 163. 

ovs\datapath-windows\ovsext\conntrack-related.c(163): warning C6001:
Using uninitialized memory 'lockState'. 

ovs\datapath-windows\ovsext\conntrack-related.c(163): warning C26110: Caller
failing to hold lock 'ovsCtRelatedLockObj' before calling function
'NdisReleaseRWLock'.

ovs\datapath-windows\ovsext\conntrack-related.c(210): warning C28122: The
function 'NdisReleaseRWLock' is not permitted to be called at a low IRQ level.
Prior function calls are inconsistent with this constraint: 
It may be that the error is actually in some prior call that limited the range.
Maximum legal IRQL was last set to 1 at line 211. 

ovs\datapath-windows\ovsext\conntrack-related.c(176): warning C28166: The
function 'OvsCtRelatedEntryCleaner' does not restore the IRQL to the value that
was current at function entry and is required to do so.
IRQL was last set at line 210. 

ovs\datapath-windows\ovsext\conntrack-related.c(210): warning C6001: Using
uninitialized memory 'lockState'. 

ovs\datapath-windows\ovsext\conntrack-related.c(210): warning C26110: Caller
failing to hold lock 'ovsCtRelatedLockObj' before calling function
'NdisReleaseRWLock'.

Can you please add code annotations where needed?

Thanks,
Alin.

> -Mesaj original-
> De la: ovs-dev-boun...@openvswitch.org  boun...@openvswitch.org> În numele Anand Kumar
> Trimis: Tuesday, June 19, 2018 3:56 AM
> Către: d...@openvswitch.org
> Subiect: [ovs-dev] [PATCH v2 0/3] Optimize conntrack performance
> 
> This patch series is primarily to refactor conntrack code for better 
> throughput
> with conntrack.
> 
> With this patch series TCP throughput with conntrack increased by ~50%.
> 
> Anand Kumar (3):
>   datapath-windows: Use spinlock instead of RW lock for ct entry
>   datapath-windows: Implement locking in conntrack NAT.
>   datapath-windows: Compute ct hash based on 5-tuple and zone
> 
>  datapath-windows/ovsext/Conntrack-ftp.c |   4 +-
>  datapath-windows/ovsext/Conntrack-nat.c |  34 +-
>  datapath-windows/ovsext/Conntrack-related.c |  17 +-
>  datapath-windows/ovsext/Conntrack-tcp.c |  15 +-
>  datapath-windows/ovsext/Conntrack.c | 469 +
> ---
>  datapath-windows/ovsext/Conntrack.h |  40 ++-
>  datapath-windows/ovsext/Util.h  |  18 ++
>  7 files changed, 308 insertions(+), 289 deletions(-)
> 
> --
> 2.9.3.windows.1
> 
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] rconn: Introduce new invariant to fix assertion failure in corner case.

2018-06-19 Thread Ben Pfaff
On Tue, Jun 19, 2018 at 10:58:36AM +0300, Ilya Maximets wrote:
> On 18.06.2018 22:24, Ben Pfaff wrote:
> > On Mon, Jun 18, 2018 at 06:34:13PM +0300, Ilya Maximets wrote:
> >> On 18.06.2018 18:07, Ben Pfaff wrote:
> >>> On Mon, Jun 18, 2018 at 05:18:49PM +0300, Ilya Maximets wrote:
> > On Wed, May 23, 2018 at 09:28:59PM -0700, Ben Pfaff wrote:
> >> On Wed, May 23, 2018 at 06:06:44PM -0700, Han Zhou wrote:
> >>> On Wed, May 23, 2018 at 5:14 PM, Ben Pfaff  wrote:
> 
>  Until now, rconn_get_version() has only reported the OpenFlow 
>  version in
>  use when the rconn is actually connected.  This makes sense, but it 
>  has a
>  harsh consequence.  Consider code like this:
> 
>  if (rconn_is_connected(rconn) && rconn_get_version(rconn) >= 0) {
>  for (int i = 0; i < 2; i++) {
>  struct ofpbuf *b = ofputil_encode_echo_request(
>  rconn_get_version(rconn));
>  rconn_send(rconn, b, NULL);
>  }
>  }
> 
>  Maybe not the smartest code in the world, and probably no one would 
>  write
>  this exact code in any case, but it doesn't look too risky or crazy.
> 
>  But it is.  The second trip through the loop can assert-fail inside
>  ofputil_encode_echo_request() because rconn_get_version(rconn) 
>  returns -1
>  instead of a valid OpenFlow version.  That happens if the first call 
>  to
>  rconn_send() encounters an error while sending the message and 
>  therefore
>  destroys the underlying vconn and disconnects so that 
>  rconn_get_version()
>  doesn't have a vconn to query for its version.
> 
>  In a case like this where all the code to send the messages is close 
>  by,
> >>> we
>  could just check rconn_get_version() in each loop iteration.  We 
>  could
> >>> even
>  go through the tree and convince ourselves that individual bits of 
>  code
> >>> are
>  safe, or be conservative and check rconn_get_version() >= 0 in the 
>  iffy
>  cases.  But this seems to me like an ongoing source of risk and a 
>  way to
>  get things wrong in corner cases.
> 
>  This commit takes a different approach.  It introduces a new 
>  invariant: if
>  an rconn has ever been connected, then it returns a valid OpenFlow 
>  version
>  from rconn_get_version().  In addition, if an rconn is currently
> >>> connected,
>  then the OpenFlow version it returns is the correct one (that may be
>  obvious, but there were corner cases before where it returned -1 even
>  though rconn_is_connected() returned true).
> 
>  With this commit, the code above would work OK.  If the first call to
>  rconn_send() encounters an error sending the message, then
>  rconn_get_version() in the second iteration will return the same 
>  value as
>  in the first iteration.  The message passed to rconn_send() will end 
>  up
>  being discarded, but that's much better than either an assertion 
>  failure
> >>> or
>  having to carefully analyze a lot of our code to deal with one 
>  unusual
>  corner case.
> 
>  Reported-by: Han Zhou 
>  Signed-off-by: Ben Pfaff 
>  ---
>   lib/learning-switch.c |  2 +-
>   lib/rconn.c   | 41 -
>   lib/vconn.c   |  1 +
>   3 files changed, 18 insertions(+), 26 deletions(-)
> 
> >>> Acked-by: Han Zhou 
> >>
> >> Thanks.  I applied this to master.  I'll backport it to older versions
> >> if no one notices trouble soon.
> >
> > I backported as far as branch-2.5.
> 
>  Sorry for being late, but I just tried to use branch-2.9 on my test 
>  environment
>  and found that this patch produces a lot of "connected" logs that are a 
>  bit annoying:
> 
>  2018-06-18T12:55:06.610Z|03051|rconn|INFO|br-int<->unix#2873: connected
>  2018-06-18T12:55:08.609Z|03052|rconn|INFO|br-int<->unix#2874: connected
>  2018-06-18T12:55:08.610Z|03053|rconn|INFO|br-int<->unix#2875: connected
>  2018-06-18T12:55:10.608Z|03054|rconn|INFO|br-int<->unix#2876: connected
>  2018-06-18T12:55:10.609Z|03055|rconn|INFO|br-int<->unix#2877: connected
>  2018-06-18T12:55:12.609Z|03056|rconn|INFO|br-int<->unix#2878: connected
>  2018-06-18T12:55:12.609Z|03057|rconn|INFO|br-int<->unix#2879: connected
>  2018-06-18T12:55:14.612Z|03058|rconn|INFO|br-int<->unix#2880: connected
>  2018-06-18T12:55:14.613Z|03059|rconn|INFO|br-int<->unix#2881: connected
>  

Re: [ovs-dev] [PATCH 2/2] ovndb-servers.ocf: Set connections in NB and SB DB tables,

2018-06-19 Thread aginwala
Thanks guys for the review! Have sent revised one @
https://patchwork.ozlabs.org/patch/931665/ . Please review and let me know.


Regards,


On Tue, Jun 19, 2018 at 3:35 AM, Numan Siddique  wrote:

> On Tue, Jun 19, 2018 at 6:14 AM, Han Zhou  wrote:
>
> >
> >
> > On Fri, Jun 8, 2018 at 12:32 PM, aginwala  wrote:
> > >
> > > so that we can adjust inactivity_probe for master node, while still not
> > > listening on TCP on slave node using use_remote_in_db in ovn-ctl.
> > >
> >
> > Minor comment: the commit message is better to have a short summary as
> > subject, and then the detailed descriptions.
> >
> > > Signed-off-by: aginwala 
> > > ---
> > >  ovn/utilities/ovndb-servers.ocf | 39 +++---
> > -
> > >  1 file changed, 23 insertions(+), 16 deletions(-)
> > >
> > > diff --git a/ovn/utilities/ovndb-servers.ocf
> > b/ovn/utilities/ovndb-servers.ocf
> > > index 9391b89..52141c7 100755
> > > --- a/ovn/utilities/ovndb-servers.ocf
> > > +++ b/ovn/utilities/ovndb-servers.ocf
> > > @@ -172,25 +172,29 @@ ovsdb_server_notify() {
> > >  ${OVN_CTL} --ovn-manage-ovsdb=no start_northd
> > >  fi
> > >
> > > -# Not needed while listening on 0.0.0.0 as we do not want to
> > allow
> > > -# local binds. However, it is needed if vip ip is binded to
> > nodes.
> > > -if [ "x${LISTEN_ON_MASTER_IP_ONLY}" = xyes ]; then
> > > -conn=`ovn-nbctl get NB_global . connections`
> > > -if [ "$conn" == "[]" ]
> > > -then
> > > -ovn-nbctl -- --id=@conn_uuid create Connection \
> > > -target="p${NB_MASTER_PROTO}\:${NB_MASTER_PORT}\:${MASTER_IP}" \
> > > +# In order to over-ride inactivity_probe for LB use case, we
> > need to
> > > +# create connection entry to listen on 0.0.0.0 for master
> node.
> > > +if [ "x${LISTEN_ON_MASTER_IP_ONLY}" = xno ]; then
> > > +   LISTON_ON_IP="0.0.0.0"
> > > +else
> > > +   LISTON_ON_IP=${MASTER_IP}
> > > +fi
> > > +conn=`ovn-nbctl get NB_global . connections`
> > > +if [ "$conn" == "[]" ]
> > > +then
> > > +ovn-nbctl -- --id=@conn_uuid create Connection \
> > > +target="p${NB_MASTER_PROTO}\:${NB_MASTER_PORT}\:${LISTON_ON_IP}" \
> > >  inactivity_probe=$INACTIVE_PROBE -- set NB_Global .
> > connections=@conn_uuid
> > > -fi
> > > +fi
> > >
> > > -conn=`ovn-sbctl get SB_global . connections`
> > > -if [ "$conn" == "[]" ]
> > > -then
> > > -ovn-sbctl -- --id=@conn_uuid create Connection \
> > > -target="p${SB_MASTER_PROTO}\:${SB_MASTER_PORT}\:${MASTER_IP}" \
> > > +conn=`ovn-sbctl get SB_global . connections`
> > > +if [ "$conn" == "[]" ]
> > > +then
> > > +ovn-sbctl -- --id=@conn_uuid create Connection \
> > > +target="p${SB_MASTER_PROTO}\:${SB_MASTER_PORT}\:${LISTON_ON_IP}" \
> > >  inactivity_probe=$INACTIVE_PROBE -- set SB_Global .
> > connections=@conn_uuid
> > > -fi
> > >  fi
> > > +
> > >  else
> > >  if [ "$MANAGE_NORTHD" = "yes" ]; then
> > >  # Stop ovn-northd service. Set --ovn-manage-ovsdb=no so
> that
> > > @@ -355,7 +359,10 @@ ovsdb_server_start() {
> > >  set $@ --db-nb-sync-from-proto=${NB_MASTER_PROTO}
> > >  set $@ --db-sb-sync-from-port=${SB_MASTER_PORT}
> > >  set $@ --db-sb-sync-from-proto=${SB_MASTER_PROTO}
> > > -
> > > +if [ "x${LISTEN_ON_MASTER_IP_ONLY}" = xno ]; then
> > > +set $@ --db-sb-use-remote-in-db="no"
> > > +set $@ --db-nb-use-remote-in-db="no"
> > > +fi
> > >  fi
> > >
> > >  $@ start_ovsdb
> > > --
> > > 1.9.1
> > >
> > > ___
> > > dev mailing list
> > > d...@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
> > Acked-by: Han Zhou 
> >
>
>
>
> Acked-by: Numan Siddique 
>
> Agree with Han. The commit message may be a bit clearer.
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/2] ovndb-servers: Set connection table when using

2018-06-19 Thread aginwala
load balancer to manage ovndb clusters via pacemaker.

This is will allow setting inactivity probe on the master node.
For pacemaker to manage ovndb resources via LB, we skipped creating connection
table and hence the inactivity probe was getting set to 5000 by default.
In order to over-ride it we need this table. However, we need to skip slaves
listening on local sb and nb connections table so that LB feature is
intact and only master is listening on 0.0.0.0

e.g --remote=db:OVN_Southbound,SB_Global,connections and
--remote=db:OVN_Northbound,NB_Global,connections

will be skipped for slave SB and NB dbs respectively by unsetting
--db-sb-use-remote-in-db  and --db-nb-use-remote-in-db in ovn-ctl.

Signed-off-by: aginwala 
---
 ovn/utilities/ovndb-servers.ocf | 39 +++
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/ovn/utilities/ovndb-servers.ocf b/ovn/utilities/ovndb-servers.ocf
index 9391b89..52141c7 100755
--- a/ovn/utilities/ovndb-servers.ocf
+++ b/ovn/utilities/ovndb-servers.ocf
@@ -172,25 +172,29 @@ ovsdb_server_notify() {
 ${OVN_CTL} --ovn-manage-ovsdb=no start_northd
 fi
 
-# Not needed while listening on 0.0.0.0 as we do not want to allow
-# local binds. However, it is needed if vip ip is binded to nodes.
-if [ "x${LISTEN_ON_MASTER_IP_ONLY}" = xyes ]; then
-conn=`ovn-nbctl get NB_global . connections`
-if [ "$conn" == "[]" ]
-then
-ovn-nbctl -- --id=@conn_uuid create Connection \
-target="p${NB_MASTER_PROTO}\:${NB_MASTER_PORT}\:${MASTER_IP}" \
+# In order to over-ride inactivity_probe for LB use case, we need to
+# create connection entry to listen on 0.0.0.0 for master node.
+if [ "x${LISTEN_ON_MASTER_IP_ONLY}" = xno ]; then
+   LISTON_ON_IP="0.0.0.0"
+else
+   LISTON_ON_IP=${MASTER_IP}
+fi
+conn=`ovn-nbctl get NB_global . connections`
+if [ "$conn" == "[]" ]
+then
+ovn-nbctl -- --id=@conn_uuid create Connection \
+target="p${NB_MASTER_PROTO}\:${NB_MASTER_PORT}\:${LISTON_ON_IP}" \
 inactivity_probe=$INACTIVE_PROBE -- set NB_Global . connections=@conn_uuid
-fi
+fi
 
-conn=`ovn-sbctl get SB_global . connections`
-if [ "$conn" == "[]" ]
-then
-ovn-sbctl -- --id=@conn_uuid create Connection \
-target="p${SB_MASTER_PROTO}\:${SB_MASTER_PORT}\:${MASTER_IP}" \
+conn=`ovn-sbctl get SB_global . connections`
+if [ "$conn" == "[]" ]
+then
+ovn-sbctl -- --id=@conn_uuid create Connection \
+target="p${SB_MASTER_PROTO}\:${SB_MASTER_PORT}\:${LISTON_ON_IP}" \
 inactivity_probe=$INACTIVE_PROBE -- set SB_Global . connections=@conn_uuid
-fi
 fi
+
 else
 if [ "$MANAGE_NORTHD" = "yes" ]; then
 # Stop ovn-northd service. Set --ovn-manage-ovsdb=no so that
@@ -355,7 +359,10 @@ ovsdb_server_start() {
 set $@ --db-nb-sync-from-proto=${NB_MASTER_PROTO}
 set $@ --db-sb-sync-from-port=${SB_MASTER_PORT}
 set $@ --db-sb-sync-from-proto=${SB_MASTER_PROTO}
-
+if [ "x${LISTEN_ON_MASTER_IP_ONLY}" = xno ]; then
+set $@ --db-sb-use-remote-in-db="no"
+set $@ --db-nb-use-remote-in-db="no"
+fi
 fi
 
 $@ start_ovsdb
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] (no subject)

2018-06-19 Thread marketing
Brauchen Sie einen Investor? 
 Benötigen Sie geschäftliche oder private Kredite? 
 Wir geben Darlehen an jeden einzelnen und Unternehmen mit 3% Zinssatz 
jährlich. Für weitere Informationen, kontaktieren Sie uns per e-Mail Dank ich 
hoffe, 
 von Ihnen zu hören 
  
 Do you need an investor? 
 Do you need business or personal loans? 
 We give out loan to any individual and company at 3% interest rate yearly. For 
more information, Contact us via Email Thanks 
 I hope to hear from you 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [RFC PATCH v2 1/1] dpdk: Support both shared and per port mempools.

2018-06-19 Thread Kevin Traynor
On 06/19/2018 12:16 PM, Kevin Traynor wrote:
> On 06/19/2018 12:11 PM, Kevin Traynor wrote:
>>> +if (per_port_mp && rte_errno == EEXIST) {
>>> +LIST_FOR_EACH (next, list_node, _mp_list) {
>>> +if (dmp->mp == next->mp) {
>>> +rte_free(dmp);
>>> +dmp = next;
>>> +dmp->refcount = 1;
>>> +}
>>> +}
>>> +}
>>> +else {
>>> +ovs_list_push_back(_mp_list, >list_node);
>>> +}
>> I think you need to increment refcount and use the safe list option. How
>> about
>>
> 
> Actually no, you don't need the safe list option, as it's the dmp that
> is being freed

I obviously misread this (or wasn't concentrating enough), you do still
need the dmp = next as well.

> 
>> if (rte_errno == EEXIST) {
>> LIST_FOR_EACH_SAFE (next, list_node, _mp_list) {
>> if (dmp->mp == next->mp) {
>> next->refcount++;
>> rte_free(dmp);
>> break;
>> }
>> }
>> } else {
>> dmp->refcount++;
>> ovs_list_push_back(_mp_list, >list_node);
>> }
>>
> 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [RFC PATCH v2 1/1] dpdk: Support both shared and per port mempools.

2018-06-19 Thread Kevin Traynor
On 06/19/2018 12:11 PM, Kevin Traynor wrote:
>> +if (per_port_mp && rte_errno == EEXIST) {
>> +LIST_FOR_EACH (next, list_node, _mp_list) {
>> +if (dmp->mp == next->mp) {
>> +rte_free(dmp);
>> +dmp = next;
>> +dmp->refcount = 1;
>> +}
>> +}
>> +}
>> +else {
>> +ovs_list_push_back(_mp_list, >list_node);
>> +}
> I think you need to increment refcount and use the safe list option. How
> about
> 

Actually no, you don't need the safe list option, as it's the dmp that
is being freed

> if (rte_errno == EEXIST) {
> LIST_FOR_EACH_SAFE (next, list_node, _mp_list) {
> if (dmp->mp == next->mp) {
> next->refcount++;
> rte_free(dmp);
> break;
> }
> }
> } else {
> dmp->refcount++;
> ovs_list_push_back(_mp_list, >list_node);
> }
> 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [RFC PATCH v2 1/1] dpdk: Support both shared and per port mempools.

2018-06-19 Thread Kevin Traynor
On 06/11/2018 05:37 PM, Ian Stokes wrote:
> This commit re-introduces the concept of shared mempools as the default
> memory model for DPDK devices. Per port mempools are still available but
> must be enabled explicitly by a user.
> 

Hi Ian, thanks for v2, comments below

> OVS previously used a shared mempool model for ports with the same MTU
> and socket configuration. This was replaced by a per port mempool model
> to address issues flagged by users such as:
> 
> https://mail.openvswitch.org/pipermail/ovs-discuss/2016-September/042560.html
> 
> However the per port model potentially requires an increase in memory
> resource requirements to support the same number of ports and configuration
> as the shared port model.
> 
> This is considered a blocking factor for current deployments of OVS
> when upgrading to future OVS releases as a user may have to redimension
> memory for the same deployment configuration. This may not be possible for
> users.
> 
> This commit resolves the issue by re-introducing shared mempools as
> the default memory behaviour in OVS DPDK but also refactors the memory
> configuration code to allow for per port mempools.
> 
> This patch adds a new global config option, per-port-mp, that

s/per-port-mp/per-port-memory/

> controls the enablement of per port mempools for DPDK devices.
> 
> ovs-vsctl set Open_vSwitch . other_config:per-port-memory=true
> 
> This value defaults to false; to enable per port memory support,
> this field should be set to true when setting other global parameters
> on init (such as "dpdk-socket-mem", for example). Changing the value at
> runtime is not supported, and requires restarting the vswitch
> daemon.
> 
> The mempool sweep functionality is also replaced with the
> sweep functionality from OVS 2.9 found in commits
> 
> c77f692 (netdev-dpdk: Free mempool only when no in-use mbufs.)
> a7fb0a4 (netdev-dpdk: Add mempool reuse/free debug.)
> 
> As this patch is RFC there are a number of TO-DOs including adding a
> memory calculation section to the documentation for both models. This is
> expected to be completed in the v1 after RFC.
> 
> Signed-off-by: Ian Stokes 
> 
> ---
> v1 -> v2
> * Rebase to head of master.
> * Change global config option 'per-port-mp-enabled' to 'per-port-memory'.
>   in commit message & documentation and code.
> * Specify in documentation that restart of daemon is only required if per
>   port-port-memory is set after dpdk-init=true.
> * Return comment 'Contains all 'struct dpdk_mp's.' which was lost in
>   previous refactor.
> * Improve comments regarding unique mempool names in the shared mempool
>   usecase.
> * Check per_port_mp condition first when verifying if new mempool is
>   required in netdev_dpdk_mempool_configure() for the shared mempool
>   usecase.
> * Correctly return ret variable instead of 0 for function
>   netdev_dpdk_mempool_configure().
> * Move VLOG_ERR regarding failure to create mempool for a device prior to
>   dpdk_mp_create() returns.
> * Add VLOG_DBG message flagging that the number of mbufs could not be
>   allocated and that it will be retried with half that amount.
> * Fix indentation of VLOG_ERR in netdev_dpdk_mempool_configure().
> * Handle EEXIST case for per port memory in function dpdk_mp_get() to
>   avoid duplication of dpdk_mps, correctly set refcount and return
>   correct dpdk_mp for the device to use.
> ---
>  Documentation/automake.mk|   1 +
>  Documentation/topics/dpdk/index.rst  |   1 +
>  Documentation/topics/dpdk/memory.rst |  67 
>  NEWS |   1 +
>  lib/dpdk-stub.c  |   6 +
>  lib/dpdk.c   |  12 ++
>  lib/dpdk.h   |   1 +
>  lib/netdev-dpdk.c| 298 
> +++
>  vswitchd/vswitch.xml |  17 ++
>  9 files changed, 304 insertions(+), 100 deletions(-)
>  create mode 100644 Documentation/topics/dpdk/memory.rst
> 
> diff --git a/Documentation/automake.mk b/Documentation/automake.mk
> index 2202df4..141b33d 100644
> --- a/Documentation/automake.mk
> +++ b/Documentation/automake.mk
> @@ -36,6 +36,7 @@ DOC_SOURCE = \
>   Documentation/topics/dpdk/index.rst \
>   Documentation/topics/dpdk/bridge.rst \
>   Documentation/topics/dpdk/jumbo-frames.rst \
> + Documentation/topics/dpdk/memory.rst \
>   Documentation/topics/dpdk/pdump.rst \
>   Documentation/topics/dpdk/phy.rst \
>   Documentation/topics/dpdk/pmd.rst \
> diff --git a/Documentation/topics/dpdk/index.rst 
> b/Documentation/topics/dpdk/index.rst
> index 181f61a..cf24a7b 100644
> --- a/Documentation/topics/dpdk/index.rst
> +++ b/Documentation/topics/dpdk/index.rst
> @@ -40,3 +40,4 @@ The DPDK Datapath
> /topics/dpdk/qos
> /topics/dpdk/pdump
> /topics/dpdk/jumbo-frames
> +   /topics/dpdk/memory
> diff --git a/Documentation/topics/dpdk/memory.rst 
> b/Documentation/topics/dpdk/memory.rst
> new file mode 100644
> index 

[ovs-dev] OVS Queue Mechanism in Kernel Space !

2018-06-19 Thread rakesh kumar
Dear Team,

1 . How  ovs has implemented the Queue mechanism in Kernel mode ?
2 . Which part of the code is or module is dealing withe Ingress and
Outgress Ports  ?
3. How does Open Flow table differs with Filtering database used in IEEE
802.1Q ?

List Please give your views for the above query's .




Regards
Rakesh Kumar
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] ovndb-servers.ocf: Set connections in NB and SB DB tables,

2018-06-19 Thread Numan Siddique
On Tue, Jun 19, 2018 at 6:14 AM, Han Zhou  wrote:

>
>
> On Fri, Jun 8, 2018 at 12:32 PM, aginwala  wrote:
> >
> > so that we can adjust inactivity_probe for master node, while still not
> > listening on TCP on slave node using use_remote_in_db in ovn-ctl.
> >
>
> Minor comment: the commit message is better to have a short summary as
> subject, and then the detailed descriptions.
>
> > Signed-off-by: aginwala 
> > ---
> >  ovn/utilities/ovndb-servers.ocf | 39 +++---
> -
> >  1 file changed, 23 insertions(+), 16 deletions(-)
> >
> > diff --git a/ovn/utilities/ovndb-servers.ocf
> b/ovn/utilities/ovndb-servers.ocf
> > index 9391b89..52141c7 100755
> > --- a/ovn/utilities/ovndb-servers.ocf
> > +++ b/ovn/utilities/ovndb-servers.ocf
> > @@ -172,25 +172,29 @@ ovsdb_server_notify() {
> >  ${OVN_CTL} --ovn-manage-ovsdb=no start_northd
> >  fi
> >
> > -# Not needed while listening on 0.0.0.0 as we do not want to
> allow
> > -# local binds. However, it is needed if vip ip is binded to
> nodes.
> > -if [ "x${LISTEN_ON_MASTER_IP_ONLY}" = xyes ]; then
> > -conn=`ovn-nbctl get NB_global . connections`
> > -if [ "$conn" == "[]" ]
> > -then
> > -ovn-nbctl -- --id=@conn_uuid create Connection \
> > -target="p${NB_MASTER_PROTO}\:${NB_MASTER_PORT}\:${MASTER_IP}" \
> > +# In order to over-ride inactivity_probe for LB use case, we
> need to
> > +# create connection entry to listen on 0.0.0.0 for master node.
> > +if [ "x${LISTEN_ON_MASTER_IP_ONLY}" = xno ]; then
> > +   LISTON_ON_IP="0.0.0.0"
> > +else
> > +   LISTON_ON_IP=${MASTER_IP}
> > +fi
> > +conn=`ovn-nbctl get NB_global . connections`
> > +if [ "$conn" == "[]" ]
> > +then
> > +ovn-nbctl -- --id=@conn_uuid create Connection \
> > +target="p${NB_MASTER_PROTO}\:${NB_MASTER_PORT}\:${LISTON_ON_IP}" \
> >  inactivity_probe=$INACTIVE_PROBE -- set NB_Global .
> connections=@conn_uuid
> > -fi
> > +fi
> >
> > -conn=`ovn-sbctl get SB_global . connections`
> > -if [ "$conn" == "[]" ]
> > -then
> > -ovn-sbctl -- --id=@conn_uuid create Connection \
> > -target="p${SB_MASTER_PROTO}\:${SB_MASTER_PORT}\:${MASTER_IP}" \
> > +conn=`ovn-sbctl get SB_global . connections`
> > +if [ "$conn" == "[]" ]
> > +then
> > +ovn-sbctl -- --id=@conn_uuid create Connection \
> > +target="p${SB_MASTER_PROTO}\:${SB_MASTER_PORT}\:${LISTON_ON_IP}" \
> >  inactivity_probe=$INACTIVE_PROBE -- set SB_Global .
> connections=@conn_uuid
> > -fi
> >  fi
> > +
> >  else
> >  if [ "$MANAGE_NORTHD" = "yes" ]; then
> >  # Stop ovn-northd service. Set --ovn-manage-ovsdb=no so that
> > @@ -355,7 +359,10 @@ ovsdb_server_start() {
> >  set $@ --db-nb-sync-from-proto=${NB_MASTER_PROTO}
> >  set $@ --db-sb-sync-from-port=${SB_MASTER_PORT}
> >  set $@ --db-sb-sync-from-proto=${SB_MASTER_PROTO}
> > -
> > +if [ "x${LISTEN_ON_MASTER_IP_ONLY}" = xno ]; then
> > +set $@ --db-sb-use-remote-in-db="no"
> > +set $@ --db-nb-use-remote-in-db="no"
> > +fi
> >  fi
> >
> >  $@ start_ovsdb
> > --
> > 1.9.1
> >
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
> Acked-by: Han Zhou 
>



Acked-by: Numan Siddique 

Agree with Han. The commit message may be a bit clearer.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] dpif-netdev: Avoid reordering of packets in a batch with same megaflow

2018-06-19 Thread Vishal Deep Ajmera
> I looked into the code and the logic seems good to me.

> But reordering in dataplane also has performance implications especially 
> considering non-TCP traffic. Also, the packets came to OvS may already been 
> out-of-ordered.  Could you provide some performance data points showing  when 
> all EMC hit and mix of EMC-hit and EMC-miss cases, and how is the throughput 
> affected? Also maybe considering only do it for TCP traffic?

Thanks Yipeng for reviewing the patch.

The patch is for *preventing* OVS to do any reordering of packets received in a 
batch which belongs to same megaflow. OVS should send all packets for a given 
megaflow in the same order as it receives in the batch.

However if a packet is already received out-of-order from the network itself, 
OVS cannot put it back in order.

Regarding performance it also depends on application how it treats any 
out-of-order packet. For e.g. if a TCP application does not use SACK then end 
point may request retransmissions of packets which are received out-of-order 
thereby reducing effective throughput. Similarly any delayed (out-of-order) ACK 
can also cause fast-retransmits from sender side.

As per my understanding in principle if OVS is acting as a L2 switch (NORMAL 
flow) then it should not change the order of received packets for a given flow 
since we don't know how an application (Virtual Machine) will react to 
out-of-order packets.

Warm Regards,
Vishal Ajmera
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] rconn: Introduce new invariant to fix assertion failure in corner case.

2018-06-19 Thread Ilya Maximets
On 18.06.2018 22:24, Ben Pfaff wrote:
> On Mon, Jun 18, 2018 at 06:34:13PM +0300, Ilya Maximets wrote:
>> On 18.06.2018 18:07, Ben Pfaff wrote:
>>> On Mon, Jun 18, 2018 at 05:18:49PM +0300, Ilya Maximets wrote:
> On Wed, May 23, 2018 at 09:28:59PM -0700, Ben Pfaff wrote:
>> On Wed, May 23, 2018 at 06:06:44PM -0700, Han Zhou wrote:
>>> On Wed, May 23, 2018 at 5:14 PM, Ben Pfaff  wrote:

 Until now, rconn_get_version() has only reported the OpenFlow version 
 in
 use when the rconn is actually connected.  This makes sense, but it 
 has a
 harsh consequence.  Consider code like this:

 if (rconn_is_connected(rconn) && rconn_get_version(rconn) >= 0) {
 for (int i = 0; i < 2; i++) {
 struct ofpbuf *b = ofputil_encode_echo_request(
 rconn_get_version(rconn));
 rconn_send(rconn, b, NULL);
 }
 }

 Maybe not the smartest code in the world, and probably no one would 
 write
 this exact code in any case, but it doesn't look too risky or crazy.

 But it is.  The second trip through the loop can assert-fail inside
 ofputil_encode_echo_request() because rconn_get_version(rconn) returns 
 -1
 instead of a valid OpenFlow version.  That happens if the first call to
 rconn_send() encounters an error while sending the message and 
 therefore
 destroys the underlying vconn and disconnects so that 
 rconn_get_version()
 doesn't have a vconn to query for its version.

 In a case like this where all the code to send the messages is close 
 by,
>>> we
 could just check rconn_get_version() in each loop iteration.  We could
>>> even
 go through the tree and convince ourselves that individual bits of code
>>> are
 safe, or be conservative and check rconn_get_version() >= 0 in the iffy
 cases.  But this seems to me like an ongoing source of risk and a way 
 to
 get things wrong in corner cases.

 This commit takes a different approach.  It introduces a new 
 invariant: if
 an rconn has ever been connected, then it returns a valid OpenFlow 
 version
 from rconn_get_version().  In addition, if an rconn is currently
>>> connected,
 then the OpenFlow version it returns is the correct one (that may be
 obvious, but there were corner cases before where it returned -1 even
 though rconn_is_connected() returned true).

 With this commit, the code above would work OK.  If the first call to
 rconn_send() encounters an error sending the message, then
 rconn_get_version() in the second iteration will return the same value 
 as
 in the first iteration.  The message passed to rconn_send() will end up
 being discarded, but that's much better than either an assertion 
 failure
>>> or
 having to carefully analyze a lot of our code to deal with one unusual
 corner case.

 Reported-by: Han Zhou 
 Signed-off-by: Ben Pfaff 
 ---
  lib/learning-switch.c |  2 +-
  lib/rconn.c   | 41 -
  lib/vconn.c   |  1 +
  3 files changed, 18 insertions(+), 26 deletions(-)

>>> Acked-by: Han Zhou 
>>
>> Thanks.  I applied this to master.  I'll backport it to older versions
>> if no one notices trouble soon.
>
> I backported as far as branch-2.5.

 Sorry for being late, but I just tried to use branch-2.9 on my test 
 environment
 and found that this patch produces a lot of "connected" logs that are a 
 bit annoying:

 2018-06-18T12:55:06.610Z|03051|rconn|INFO|br-int<->unix#2873: connected
 2018-06-18T12:55:08.609Z|03052|rconn|INFO|br-int<->unix#2874: connected
 2018-06-18T12:55:08.610Z|03053|rconn|INFO|br-int<->unix#2875: connected
 2018-06-18T12:55:10.608Z|03054|rconn|INFO|br-int<->unix#2876: connected
 2018-06-18T12:55:10.609Z|03055|rconn|INFO|br-int<->unix#2877: connected
 2018-06-18T12:55:12.609Z|03056|rconn|INFO|br-int<->unix#2878: connected
 2018-06-18T12:55:12.609Z|03057|rconn|INFO|br-int<->unix#2879: connected
 2018-06-18T12:55:14.612Z|03058|rconn|INFO|br-int<->unix#2880: connected
 2018-06-18T12:55:14.613Z|03059|rconn|INFO|br-int<->unix#2881: connected
 2018-06-18T12:55:16.613Z|03060|rconn|INFO|br-int<->unix#2882: connected
 2018-06-18T12:55:16.614Z|03061|rconn|INFO|br-int<->unix#2883: connected
 2018-06-18T12:55:18.609Z|03062|rconn|INFO|br-int<->unix#2884: connected
 2018-06-18T12:55:18.610Z|03063|rconn|INFO|br-int<->unix#2885: connected
 2018-06-18T12:55:20.610Z|03064|rconn|INFO|br-int<->unix#2886: 

Re: [ovs-dev] [PATCH] rconn: Introduce new invariant to fix assertion failure in corner case.

2018-06-19 Thread Ilya Maximets
On 18.06.2018 22:25, Ben Pfaff wrote:
> On Mon, Jun 18, 2018 at 09:31:43AM -0700, Han Zhou wrote:
>> On Mon, Jun 18, 2018 at 8:34 AM, Ilya Maximets 
>> wrote:
>>>
>>> On 18.06.2018 18:07, Ben Pfaff wrote:
 On Mon, Jun 18, 2018 at 05:18:49PM +0300, Ilya Maximets wrote:
>> On Wed, May 23, 2018 at 09:28:59PM -0700, Ben Pfaff wrote:
>>> On Wed, May 23, 2018 at 06:06:44PM -0700, Han Zhou wrote:
 On Wed, May 23, 2018 at 5:14 PM, Ben Pfaff  wrote:
>
> Until now, rconn_get_version() has only reported the OpenFlow
>> version in
> use when the rconn is actually connected.  This makes sense, but
>> it has a
> harsh consequence.  Consider code like this:
>
> if (rconn_is_connected(rconn) && rconn_get_version(rconn) >=
>> 0) {
> for (int i = 0; i < 2; i++) {
> struct ofpbuf *b = ofputil_encode_echo_request(
> rconn_get_version(rconn));
> rconn_send(rconn, b, NULL);
> }
> }
>
> Maybe not the smartest code in the world, and probably no one
>> would write
> this exact code in any case, but it doesn't look too risky or
>> crazy.
>
> But it is.  The second trip through the loop can assert-fail inside
> ofputil_encode_echo_request() because rconn_get_version(rconn)
>> returns -1
> instead of a valid OpenFlow version.  That happens if the first
>> call to
> rconn_send() encounters an error while sending the message and
>> therefore
> destroys the underlying vconn and disconnects so that
>> rconn_get_version()
> doesn't have a vconn to query for its version.
>
> In a case like this where all the code to send the messages is
>> close by,
 we
> could just check rconn_get_version() in each loop iteration.  We
>> could
 even
> go through the tree and convince ourselves that individual bits of
>> code
 are
> safe, or be conservative and check rconn_get_version() >= 0 in the
>> iffy
> cases.  But this seems to me like an ongoing source of risk and a
>> way to
> get things wrong in corner cases.
>
> This commit takes a different approach.  It introduces a new
>> invariant: if
> an rconn has ever been connected, then it returns a valid OpenFlow
>> version
> from rconn_get_version().  In addition, if an rconn is currently
 connected,
> then the OpenFlow version it returns is the correct one (that may
>> be
> obvious, but there were corner cases before where it returned -1
>> even
> though rconn_is_connected() returned true).
>
> With this commit, the code above would work OK.  If the first call
>> to
> rconn_send() encounters an error sending the message, then
> rconn_get_version() in the second iteration will return the same
>> value as
> in the first iteration.  The message passed to rconn_send() will
>> end up
> being discarded, but that's much better than either an assertion
>> failure
 or
> having to carefully analyze a lot of our code to deal with one
>> unusual
> corner case.
>
> Reported-by: Han Zhou 
> Signed-off-by: Ben Pfaff 
> ---
>  lib/learning-switch.c |  2 +-
>  lib/rconn.c   | 41
>> -
>  lib/vconn.c   |  1 +
>  3 files changed, 18 insertions(+), 26 deletions(-)
>
 Acked-by: Han Zhou 
>>>
>>> Thanks.  I applied this to master.  I'll backport it to older
>> versions
>>> if no one notices trouble soon.
>>
>> I backported as far as branch-2.5.
>
> Sorry for being late, but I just tried to use branch-2.9 on my test
>> environment
> and found that this patch produces a lot of "connected" logs that are
>> a bit annoying:
>
> 2018-06-18T12:55:06.610Z|03051|rconn|INFO|br-int<->unix#2873: connected
> 2018-06-18T12:55:08.609Z|03052|rconn|INFO|br-int<->unix#2874: connected
> 2018-06-18T12:55:08.610Z|03053|rconn|INFO|br-int<->unix#2875: connected
> 2018-06-18T12:55:10.608Z|03054|rconn|INFO|br-int<->unix#2876: connected
> 2018-06-18T12:55:10.609Z|03055|rconn|INFO|br-int<->unix#2877: connected
> 2018-06-18T12:55:12.609Z|03056|rconn|INFO|br-int<->unix#2878: connected
> 2018-06-18T12:55:12.609Z|03057|rconn|INFO|br-int<->unix#2879: connected
> 2018-06-18T12:55:14.612Z|03058|rconn|INFO|br-int<->unix#2880: connected
> 2018-06-18T12:55:14.613Z|03059|rconn|INFO|br-int<->unix#2881: connected
> 2018-06-18T12:55:16.613Z|03060|rconn|INFO|br-int<->unix#2882: connected
> 2018-06-18T12:55:16.614Z|03061|rconn|INFO|br-int<->unix#2883: connected
> 2018-06-18T12:55:18.609Z|03062|rconn|INFO|br-int<->unix#2884: connected
> 

Re: [ovs-dev] OVS (master) + DPDK(17.11) + multi-queue

2018-06-19 Thread Ilya Maximets
Hi,
According to your log, your NIC has limited size of tx queues:

  2018-06-19T04:34:46.106Z|00089|dpdk|ERR|PMD: Unsupported size of TX queue
   (max size: 1024)

This means that you have to configure 'n_txq_desc' <= 1024 in order to
configure your NIC properly. OVS uses 2048 by default and this is causes
device configuration failure.

Try this:

ovs-vsctl set interface eth1 options:n_txq_desc=1024

It also likely that you will have to configure the same value for 'n_rxq_desc'.


Note that OVS manages TX queues itself and it will try to allocate
separate TX queue for each PMD thread + 1 for non-PMD threads. So,
it's kind of impossible to force it to configure only one TX queue
in case HW supports multiqueue.

> Hi,
> 
> I am using above configuration on my testbed and when I try to add a port
> which is bound to igb_uio, I see following errors due to Tx queue
> configuration. I just want to use single Tx and Rx queue for my testing. I
> looked at Documentation/intro/install/dpdk.rst, i see only "DPDK Physical
> Port Rx Queues" and nothing for "Tx Queues". Kindly let me know how can I
> use single tx/rx queues and if I have to use multiple Tx queues what
> configuration changes I need to do?
> 
> ovs logs==
> 2018-06-19T04:33:23.720Z|00075|bridge|INFO|ovs-vswitchd (Open vSwitch)
> 2.9.90
> 2018-06-19T04:33:32.735Z|00076|memory|INFO|127688 kB peak resident set size
> after 10.1 seconds
> 2018-06-19T04:33:32.735Z|00077|memory|INFO|handlers:5 ports:1
> revalidators:3 rules:5
> 2018-06-19T04:33:40.857Z|00078|rconn|INFO|br0<->unix#0: connected
> 2018-06-19T04:33:40.858Z|00079|rconn|INFO|br0<->unix#1: connected
> 2018-06-19T04:33:40.859Z|00080|rconn|INFO|br0<->unix#2: connected
> 2018-06-19T04:34:46.094Z|00081|dpdk|INFO|EAL: PCI device :00:06.0 on
> NUMA socket 0
> 2018-06-19T04:34:46.094Z|00082|dpdk|INFO|EAL:   probe driver: 1d0f:ec20
> net_ena
> 2018-06-19T04:34:46.095Z|00083|dpdk|INFO|PMD: eth_ena_dev_init():
> Initializing 0:0:6.0
> 2018-06-19T04:34:46.095Z|00084|netdev_dpdk|INFO|Device ':00:06.0'
> attached to DPDK
> 2018-06-19T04:34:46.099Z|00085|dpif_netdev|INFO|PMD thread on numa_id: 0,
> core id:  0 created.
> 2018-06-19T04:34:46.099Z|00086|dpif_netdev|INFO|There are 1 pmd threads on
> numa node 0
> 2018-06-19T04:34:46.105Z|00087|netdev_dpdk|WARN|Rx checksum offload is not
> supported on port 0
> 2018-06-19T04:34:46.105Z|00088|dpdk|INFO|PMD: Set MTU: 1500
> 2018-06-19T04:34:46.106Z|00089|dpdk|ERR|PMD: Unsupported size of TX queue
> (max size: 1024)
> 2018-06-19T04:34:46.106Z|00090|netdev_dpdk|INFO|Interface eth1 unable to
> setup txq(0): Invalid argument
> 2018-06-19T04:34:46.106Z|00091|netdev_dpdk|ERR|Interface eth1(rxq:1 txq:2
> lsc interrupt mode:false) configure error: Invalid argument
> 2018-06-19T04:34:46.106Z|00092|dpif_netdev|ERR|Failed to set interface eth1
> new configuration
> 2018-06-19T04:34:46.106Z|00093|bridge|WARN|could not add network device
> eth1 to ofproto (No such device)
> 
> 
> ethtool -l eth1
> Channel parameters for eth1:
> Pre-set maximums:
> RX:128
> TX:128
> Other:0
> Combined:0
> Current hardware settings:
> RX:8
> TX:8
> Other:0
> Combined:0
> 
> ovs-vsctl get  Open_vSwitch . other_config
> {dpdk-extra="--file-prefix=host", dpdk-hugepage-dir="/dev/hugepages_1G",
> dpdk-init="true"}
> 
> ovs-vswitchd --version
> ovs-vswitchd (Open vSwitch) 2.9.90
> DPDK 17.11.2
> 
> ovs-vsctl get Open_vSwitch . dpdk_version
> "DPDK 17.11.2"
> 
> ovs-vsctl get Open_vSwitch . dpdk_initialized
> true
> 
> Thanks.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 3/3] datapath-windows: Compute ct hash based on 5-tuple and zone

2018-06-19 Thread Anand Kumar
Conntrack 5-tuple consists of src address, dst address, src port,
dst port and protocol which will be unique to a ct session.
Use this information along with zone to compute hash.

Also re-factor conntrack code related to parsing netlink attributes.

Testing:
Verified loading/unloading the driver with driver verified enabled.
Ran TCP/UDP and ICMP traffic.

Signed-off-by: Anand Kumar 
---
v1->v2: Updated commit message to include testing done.
v2->v3: No change.
---
 datapath-windows/ovsext/Conntrack.c | 228 ++--
 datapath-windows/ovsext/Conntrack.h |   2 -
 2 files changed, 116 insertions(+), 114 deletions(-)

diff --git a/datapath-windows/ovsext/Conntrack.c 
b/datapath-windows/ovsext/Conntrack.c
index 8fa1e07..dd16602 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -151,6 +151,24 @@ OvsCleanupConntrack(VOID)
 OvsNatCleanup();
 }
 
+/*
+ *
+ * OvsCtHashKey
+ * Compute hash using 5-tuple and zone.
+ *
+ */
+UINT32
+OvsCtHashKey(const OVS_CT_KEY *key)
+{
+UINT32 hsrc, hdst, hash;
+hsrc = key->src.addr.ipv4 | ntohl(key->src.port);
+hdst = key->dst.addr.ipv4 | ntohl(key->dst.port);
+hash = hsrc ^ hdst; /* TO identify reverse traffic */
+hash = hash | (key->zone + key->nw_proto);
+hash = OvsJhashWords((uint32_t*) , 1, hash);
+return hash;
+}
+
 static __inline VOID
 OvsCtKeyReverse(OVS_CT_KEY *key)
 {
@@ -231,7 +249,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry,
 if (!OvsNatTranslateCtEntry(entry)) {
 return FALSE;
 }
-ctx->hash = OvsHashCtKey(>key);
+ctx->hash = OvsCtHashKey(>key);
 } else {
 entry->natInfo.natAction = natInfo->natAction;
 }
@@ -531,20 +549,6 @@ OvsCtLookup(OvsConntrackKeyLookupCtx *ctx)
 return found;
 }
 
-UINT32
-OvsHashCtKey(const OVS_CT_KEY *key)
-{
-UINT32 hsrc, hdst, hash;
-hsrc = OvsJhashBytes((UINT32*) >src, sizeof(key->src), 0);
-hdst = OvsJhashBytes((UINT32*) >dst, sizeof(key->dst), 0);
-hash = hsrc ^ hdst; /* TO identify reverse traffic */
-hash = OvsJhashBytes((uint32_t *) >dst + 1,
- ((uint32_t *) (key + 1) -
- (uint32_t *) (>dst + 1)),
- hash);
-return hash;
-}
-
 static UINT8
 OvsReverseIcmpType(UINT8 type)
 {
@@ -642,7 +646,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
 OvsCtKeyReverse(>key);
 }
 
-ctx->hash = OvsHashCtKey(>key);
+ctx->hash = OvsCtHashKey(>key);
 return NDIS_STATUS_SUCCESS;
 }
 
@@ -953,7 +957,6 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
   OvsFlowKey *key,
   const PNL_ATTR a)
 {
-PNL_ATTR ctAttr;
 BOOLEAN commit = FALSE;
 BOOLEAN force = FALSE;
 BOOLEAN postUpdateEvent = FALSE;
@@ -973,109 +976,110 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
 return status;
 }
 
-/* XXX Convert this to NL_ATTR_FOR_EACH */
-ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_ZONE);
-if (ctAttr) {
-zone = NlAttrGetU16(ctAttr);
-}
-ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_COMMIT);
-if (ctAttr) {
-commit = TRUE;
-}
-ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_MARK);
-if (ctAttr) {
-mark = NlAttrGet(ctAttr);
-}
-ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_LABELS);
-if (ctAttr) {
-labels = NlAttrGet(ctAttr);
-}
-natActionInfo.natAction = NAT_ACTION_NONE;
-ctAttr = NlAttrFindNested(a, OVS_CT_ATTR_NAT);
-if (ctAttr) {
-/* Pares Nested NAT attributes. */
-PNL_ATTR natAttr;
-unsigned int left;
-BOOLEAN hasMinIp = FALSE;
-BOOLEAN hasMinPort = FALSE;
-BOOLEAN hasMaxIp = FALSE;
-BOOLEAN hasMaxPort = FALSE;
-NL_NESTED_FOR_EACH_UNSAFE (natAttr, left, ctAttr) {
-enum ovs_nat_attr subtype = NlAttrType(natAttr);
-switch(subtype) {
-case OVS_NAT_ATTR_SRC:
-case OVS_NAT_ATTR_DST:
-natActionInfo.natAction |=
-((subtype == OVS_NAT_ATTR_SRC)
-? NAT_ACTION_SRC : NAT_ACTION_DST);
+PNL_ATTR ctAttr = NULL;
+INT left;
+
+NL_NESTED_FOR_EACH (ctAttr, left, a) {
+switch(NlAttrType(ctAttr)) {
+case OVS_CT_ATTR_ZONE:
+zone = NlAttrGetU16(ctAttr);
+break;
+case OVS_CT_ATTR_COMMIT:
+commit = TRUE;
+break;
+case OVS_CT_ATTR_MARK:
+mark = NlAttrGet(ctAttr);
 break;
-case OVS_NAT_ATTR_IP_MIN:
-memcpy(,
-   NlAttrData(natAttr), NlAttrGetSize(natAttr));
-hasMinIp = TRUE;
+

[ovs-dev] [PATCH v3 2/3] datapath-windows: Implement locking in conntrack NAT.

2018-06-19 Thread Anand Kumar
This patch primarily replaces existing ndis RWlock based implementaion
for NAT in conntrack with a spinlock based implementation inside NAT,
module along with some conntrack optimization.

- The 'ovsNatTable' and 'ovsUnNatTable' tables are shared
  between cleanup threads and packet processing thread.
  In order to protect these two tables use a spinlock.
  Also introduce counters to track number of nat entries.
- Introduce a new function OvsGetTcpHeader() to retrieve TCP header
  and payload length, to optimize for TCP traffic.
- Optimize conntrack look up.
- Remove 'bucketlockRef' member from conntrack entry structure.

Testing:
Verified loading/unloading the driver with driver verified enabled.
Ran TCP/UDP and ICMP traffic.

Signed-off-by: Anand Kumar 
---
v1->v2: Merge patch 2 and 3 so that NAT locks related changes are in a
single patch.
v2->v3: No change.
---
 datapath-windows/ovsext/Conntrack-ftp.c |   4 +-
 datapath-windows/ovsext/Conntrack-nat.c |  27 +++-
 datapath-windows/ovsext/Conntrack-tcp.c |  15 ++---
 datapath-windows/ovsext/Conntrack.c | 110 +---
 datapath-windows/ovsext/Conntrack.h |  36 +++
 5 files changed, 100 insertions(+), 92 deletions(-)

diff --git a/datapath-windows/ovsext/Conntrack-ftp.c 
b/datapath-windows/ovsext/Conntrack-ftp.c
index 6830dfa..ce09a65 100644
--- a/datapath-windows/ovsext/Conntrack-ftp.c
+++ b/datapath-windows/ovsext/Conntrack-ftp.c
@@ -129,14 +129,14 @@ OvsCtHandleFtp(PNET_BUFFER_LIST curNbl,
 char temp[256] = { 0 };
 char ftpMsg[256] = { 0 };
 
+UINT32 len;
 TCPHdr tcpStorage;
 const TCPHdr *tcp;
-tcp = OvsGetTcp(curNbl, layers->l4Offset, );
+tcp = OvsGetTcpHeader(curNbl, layers, , );
 if (!tcp) {
 return NDIS_STATUS_INVALID_PACKET;
 }
 
-UINT32 len = OvsGetTcpPayloadLength(curNbl);
 if (len > sizeof(temp)) {
 /* We only care up to 256 */
 len = sizeof(temp);
diff --git a/datapath-windows/ovsext/Conntrack-nat.c 
b/datapath-windows/ovsext/Conntrack-nat.c
index da1814f..11057e6 100644
--- a/datapath-windows/ovsext/Conntrack-nat.c
+++ b/datapath-windows/ovsext/Conntrack-nat.c
@@ -3,7 +3,8 @@
 
 PLIST_ENTRY ovsNatTable = NULL;
 PLIST_ENTRY ovsUnNatTable = NULL;
-
+static NDIS_SPIN_LOCK ovsCtNatLock;
+static ULONG ovsNatEntries;
 /*
  *---
  * OvsHashNatKey
@@ -109,6 +110,8 @@ NTSTATUS OvsNatInit()
 InitializeListHead([i]);
 }
 
+NdisAllocateSpinLock();
+ovsNatEntries = 0;
 return STATUS_SUCCESS;
 }
 
@@ -121,6 +124,11 @@ NTSTATUS OvsNatInit()
 VOID OvsNatFlush(UINT16 zone)
 {
 PLIST_ENTRY link, next;
+if (!ovsNatEntries) {
+return;
+}
+
+NdisAcquireSpinLock();
 for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
 LIST_FORALL_SAFE([i], link, next) {
 POVS_NAT_ENTRY entry =
@@ -131,6 +139,7 @@ VOID OvsNatFlush(UINT16 zone)
 }
 }
 }
+NdisReleaseSpinLock();
 }
 
 /*
@@ -144,10 +153,14 @@ VOID OvsNatCleanup()
 if (ovsNatTable == NULL) {
return;
 }
+
+NdisAcquireSpinLock();
 OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
 OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG);
 ovsNatTable = NULL;
 ovsUnNatTable = NULL;
+NdisReleaseSpinLock();
+NdisFreeSpinLock();
 }
 
 /*
@@ -250,10 +263,13 @@ static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, 
UINT32 basis)
 VOID
 OvsNatAddEntry(OVS_NAT_ENTRY* entry)
 {
+NdisAcquireSpinLock();
 InsertHeadList(OvsNatGetBucket(>key, FALSE),
>link);
 InsertHeadList(OvsNatGetBucket(>value, TRUE),
>reverseLink);
+NdisReleaseSpinLock();
+NdisInterlockedIncrement((PLONG));
 }
 
 /*
@@ -399,21 +415,29 @@ OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse)
 PLIST_ENTRY link;
 POVS_NAT_ENTRY entry;
 
+if (!ovsNatEntries) {
+return NULL;
+}
+
+NdisAcquireSpinLock();
 LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) {
 if (reverse) {
 entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink);
 
 if (OvsNatKeyAreSame(ctKey, >value)) {
+NdisReleaseSpinLock();
 return entry;
 }
 } else {
 entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
 
 if (OvsNatKeyAreSame(ctKey, >key)) {
+NdisReleaseSpinLock();
 return entry;
 }
 }
 }
+NdisReleaseSpinLock();
 return NULL;
 }
 
@@ -432,6 +456,7 @@ OvsNatDeleteEntry(POVS_NAT_ENTRY entry)
 RemoveEntryList(>link);
 RemoveEntryList(>reverseLink);
 OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
+NdisInterlockedDecrement((PLONG));
 }
 
 /*
diff --git a/datapath-windows/ovsext/Conntrack-tcp.c 
b/datapath-windows/ovsext/Conntrack-tcp.c
index 8cbab24..eda42ac 100644
--- 

[ovs-dev] [PATCH v3 0/3] Optimize conntrack performance

2018-06-19 Thread Anand Kumar
This patch series is primarily to refactor conntrack code for
better throughput with conntrack.

With this patch series TCP throughput with conntrack increased
by ~50%.

Anand Kumar (3):
  datapath-windows: Use spinlock instead of RW lock for ct entry
  datapath-windows: Implement locking in conntrack NAT.
  datapath-windows: Compute ct hash based on 5-tuple and zone

 datapath-windows/ovsext/Conntrack-ftp.c |   4 +-
 datapath-windows/ovsext/Conntrack-nat.c |  34 +-
 datapath-windows/ovsext/Conntrack-related.c |  19 +-
 datapath-windows/ovsext/Conntrack-tcp.c |  15 +-
 datapath-windows/ovsext/Conntrack.c | 469 +---
 datapath-windows/ovsext/Conntrack.h |  40 ++-
 datapath-windows/ovsext/Util.h  |  18 ++
 7 files changed, 309 insertions(+), 290 deletions(-)

-- 
2.9.3.windows.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 1/3] datapath-windows: Use spinlock instead of RW lock for ct entry

2018-06-19 Thread Anand Kumar
This patch mainly changes a ndis RW lock for conntrack entry to a
spinlock along with some minor refactor in conntrack. Using
spinlock instead of RW lock as RW locks causes performance hits
when acquired/released multiple times.

- Use NdisInterlockedXX wrapper api's instead of InterlockedXX.
- Update 'ctTotalRelatedEntries' using interlocked functions.
- Move conntrack lock out of NAT module.

Testing:
Verified loading/unloading the driver with driver verified enabled.
Ran TCP/UDP and ICMP traffic.

Signed-off-by: Anand Kumar 
---
v1->v2: Calculate the dispatch level only in cases where the locks are being 
acquired
multiple times within a given context and minor style change.
v2->v3: Fix kernel crash while executing cleanup thread in conntrack-related
---
 datapath-windows/ovsext/Conntrack-nat.c |   7 +-
 datapath-windows/ovsext/Conntrack-related.c |  19 ++--
 datapath-windows/ovsext/Conntrack.c | 135 ++--
 datapath-windows/ovsext/Conntrack.h |   2 +-
 datapath-windows/ovsext/Util.h  |  18 
 5 files changed, 95 insertions(+), 86 deletions(-)

diff --git a/datapath-windows/ovsext/Conntrack-nat.c 
b/datapath-windows/ovsext/Conntrack-nat.c
index 316c946..da1814f 100644
--- a/datapath-windows/ovsext/Conntrack-nat.c
+++ b/datapath-windows/ovsext/Conntrack-nat.c
@@ -167,16 +167,13 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
 {
 UINT32 natFlag;
 const struct ct_endpoint* endpoint;
-LOCK_STATE_EX lockState;
-/* XXX: Move conntrack locks out of NAT after implementing lock in NAT. */
-NdisAcquireRWLockRead(entry->lock, , 0);
+
 /* When it is NAT, only entry->rev_key contains NATTED address;
When it is unNAT, only entry->key contains the UNNATTED address;*/
 const OVS_CT_KEY *ctKey = reverse ? >key : >rev_key;
 BOOLEAN isSrcNat;
 
 if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {
-NdisReleaseRWLock(entry->lock, );
 return;
 }
 isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) ||
@@ -206,7 +203,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
 }
 } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){
 // XXX: IPv6 packet not supported yet.
-NdisReleaseRWLock(entry->lock, );
 return;
 }
 if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {
@@ -220,7 +216,6 @@ OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
 }
 }
 }
-NdisReleaseRWLock(entry->lock, );
 }
 
 
diff --git a/datapath-windows/ovsext/Conntrack-related.c 
b/datapath-windows/ovsext/Conntrack-related.c
index ec4b536..257dd37 100644
--- a/datapath-windows/ovsext/Conntrack-related.c
+++ b/datapath-windows/ovsext/Conntrack-related.c
@@ -18,7 +18,7 @@
 #include "Jhash.h"
 
 static PLIST_ENTRY ovsCtRelatedTable; /* Holds related entries */
-static UINT64 ctTotalRelatedEntries;
+static ULONG ctTotalRelatedEntries;
 static OVS_CT_THREAD_CTX ctRelThreadCtx;
 static PNDIS_RW_LOCK_EX ovsCtRelatedLockObj;
 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
@@ -75,13 +75,11 @@ OvsCtRelatedLookup(OVS_CT_KEY key, UINT64 currentTime)
 POVS_CT_REL_ENTRY entry;
 LOCK_STATE_EX lockState;
 
-NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0);
-
 if (!ctTotalRelatedEntries) {
-NdisReleaseRWLock(ovsCtRelatedLockObj, );
 return NULL;
 }
 
+NdisAcquireRWLockRead(ovsCtRelatedLockObj, , 0);
 for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
 /* XXX - Scan the table based on the hash instead */
 LIST_FORALL_SAFE([i], link, next) {
@@ -103,7 +101,7 @@ OvsCtRelatedEntryDelete(POVS_CT_REL_ENTRY entry)
 {
 RemoveEntryList(>link);
 OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
-ctTotalRelatedEntries--;
+NdisInterlockedDecrement((PLONG));
 }
 
 NDIS_STATUS
@@ -139,7 +137,7 @@ OvsCtRelatedEntryCreate(UINT8 ipProto,
 NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0);
 InsertHeadList([hash & CT_HASH_TABLE_MASK],
>link);
-ctTotalRelatedEntries++;
+NdisInterlockedIncrement((PLONG));
 NdisReleaseRWLock(ovsCtRelatedLockObj, );
 
 return NDIS_STATUS_SUCCESS;
@@ -150,11 +148,10 @@ OvsCtRelatedFlush()
 {
 PLIST_ENTRY link, next;
 POVS_CT_REL_ENTRY entry;
-
 LOCK_STATE_EX lockState;
-NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0);
 
 if (ctTotalRelatedEntries) {
+NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0);
 for (int i = 0; i < CT_HASH_TABLE_SIZE; i++) {
 LIST_FORALL_SAFE([i], link, next) {
 entry = CONTAINING_RECORD(link, OVS_CT_REL_ENTRY, link);
@@ -189,9 +186,8 @@ OvsCtRelatedEntryCleaner(PVOID data)
 /* Lock has been freed by 'OvsCleanupCtRelated()' */
 break;
 }
-NdisAcquireRWLockWrite(ovsCtRelatedLockObj, , 0);
+
 if (context->exit) {
-NdisReleaseRWLock(ovsCtRelatedLockObj, );
 break;
 }
 
@@ -201,6 +197,7 @@ 

Re: [ovs-dev] [PATCH v1] Avoid dp_hash recirculation for balance-tcp bond selection mode

2018-06-19 Thread Manohar Krishnappa Chidambaraswamy
Hi Ilya,

Thanx for taking a look. Please see inline.

Thanx
Manu

On 18/06/18, 4:04 PM, "Ilya Maximets"  wrote:

> Hi

Hi,
I just wanted to clarify few things about RSS hash. See inline.

One more thing:
Despite of usual OVS bonding, this implementation doesn't support
shifting the load between ports. Am I right?
This could be an issue, because few heavy flows could be mapped to
a single port, while other ports will be underloaded. This will
be a bad case for tunnelling where we have only few heavy flows.
As I understood, this version of bonding doesn't support any load
statistics.
[manu] Yes that’s correct. This implementation does not yet support
accumulation of per slave stats (in "struct bond_entry"). Since load
balancing is done without using the dp_hashed flows, rule level stats
can't be used and bond_rebalance() won't take effect. I was planning
to add per-slave stats collection/accumulation in OVS_ACTION_ATTR_LB_OUTPUT
handling. This will be done in another patch set.

Best regards, Ilya Maximets.

> Problem:
> 
> In OVS-DPDK, flows with output over a bond interface of type “balance-tcp”
> (using a hash on TCP/UDP 5-tuple) get translated by the ofproto layer into
> "HASH" and "RECIRC" datapath actions. After recirculation, the packet is
> forwarded to the bond member port based on 8-bits of the datapath hash
> value computed through dp_hash. This causes performance degradation in the
> following ways:
> 
> 1. L4-Hash computation in software is CPU intensive, it consumes
> considerable CPU cycles of the PMD.

RSS is in use in most cases in current master and 2.9. Details below.
[manu] OK Thanx. I was working on an earlier version of OVS and didn’t notice it
while porting to master.

> 
> 2. The recirculation of the packet implies another lookup of the packet’s
> flow key in the exact match cache (EMC) and potentially Megaflow 
classifier
> (DPCLS). This is the biggest cost factor.
> 
> 3. The recirculated packets have a new “RSS” hash and compete with the
> original packets for the scarce number of EMC slots. This implies more
> EMC misses and potentially EMC thrashing causing costly DPCLS lookups.
> 
> 4. The 256 extra megaflow entries per bond for dp_hash bond selection put
> additional load on the revalidation threads.
>  
> Owing to this performance degradation, deployments stick to “balance-slb”
> bond mode even though it does not do active-active load balancing for 
> VXLAN- and GRE-tunnelled traffic because all tunnel packet have the same
> source MAC address.
>  
> Proposed optimization:
> -- 
> This proposal has 2 main optimizations in balance-tcp handling at egress.
>  
> 1. When feasible, re-use the existing L4 RSS-hash of the packet for bond
> selection instead of computing another L4-hash in software.

This is already done. See commit
95a6cb3497c3 ("odp-execute: Reuse rss hash in OVS_ACTION_ATTR_HASH.")

It was done a year ago and, currently, if RSS is available it's used for
OVS_ACTION_ATTR_HASH while balanced bonding handling.

So, at least, you should reword a lot of RSS related comments around the 
code.
[manu] With this I think OVS_ACTION_ATTR_HASH can be reused and only
OVS_ACTION_ATTR_RECIRC action can be replaced with OVS_ACTION_ATTR_LB_OUTPUT.
So it will be "HASH + LB-OUTPUT" instead of existing "HASH + RECIRC".
Will evaluate this and then send v2 diffs.

>  
> 2. Introduce a new load-balancing output action instead of recirculation:
>
> Maintain one table per-bond (could just be an array of uint16's) and
> Program it the same way internal flows are created today for each possible
> hash value(256 entries) from ofproto layer. Use this table to load-balance
> flows as part of output action processing.
>  
> Currently xlate_normal() -> output_normal() -> 
bond_update_post_recirc_rules()
> -> bond_may_recirc() and compose_output_action__() generate
> “dp_hash(hash_l4(0))” and “recirc()” actions. In this case the
> RecircID identifies the bond. For the recirculated packets the ofproto 
layer
> installs megaflow entries that match on RecircID and masked dp_hash and 
send
> them to the corresponding output port.
>  
> Instead, we will now generate a new action "lb_output(bond,)" 
which
> combines hash computation (only if needed, else re-use RSS hash) and 
inline
> load-balancing over the bond. This action is used *only* for balance-tcp 
bonds
> in OVS-DPDK datapath (the OVS kernel datapath remains unchanged).



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev