Re: [ovs-dev] [PATCH v10 0/2] ovn: Support native DHCP using 'continuations'

2016-06-08 Thread Ben Pfaff
I liked this series but I wanted to make some changes, so I sent out my
own proposal based on it, with patch 0/4 starting here:
http://openvswitch.org/pipermail/dev/2016-June/072517.html

Will you look it over and see what you think?

I might still have comments on the final patch; most notably, I haven't
studied the Subnet table yet.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH 1/4] expr: Shorten declarations of expr_context.

2016-06-08 Thread Ben Pfaff
Seems to me that this makes the code slightly easier to follow.

Signed-off-by: Ben Pfaff 
---
 ovn/lib/expr.c | 28 
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/ovn/lib/expr.c b/ovn/lib/expr.c
index f274ab4..7ff9538 100644
--- a/ovn/lib/expr.c
+++ b/ovn/lib/expr.c
@@ -1025,13 +1025,7 @@ expr_parse__(struct expr_context *ctx)
 struct expr *
 expr_parse(struct lexer *lexer, const struct shash *symtab, char **errorp)
 {
-struct expr_context ctx;
-
-ctx.lexer = lexer;
-ctx.symtab = symtab;
-ctx.error = NULL;
-ctx.not = false;
-
+struct expr_context ctx = { .lexer = lexer, .symtab = symtab };
 struct expr *e = expr_parse__();
 *errorp = ctx.error;
 ovs_assert((ctx.error != NULL) != (e != NULL));
@@ -1108,12 +1102,7 @@ parse_field_from_string(const char *s, const struct 
shash *symtab,
 lexer_init(, s);
 lexer_get();
 
-struct expr_context ctx;
-ctx.lexer = 
-ctx.symtab = symtab;
-ctx.error = NULL;
-ctx.not = false;
-
+struct expr_context ctx = { .lexer = , .symtab = symtab };
 bool ok = parse_field(, field);
 if (!ok) {
 *errorp = ctx.error;
@@ -2861,12 +2850,7 @@ expr_parse_assignment(struct lexer *lexer, const struct 
shash *symtab,
   const void *aux,
   struct ofpbuf *ofpacts, struct expr **prereqsp)
 {
-struct expr_context ctx;
-ctx.lexer = lexer;
-ctx.symtab = symtab;
-ctx.error = NULL;
-ctx.not = false;
-
+struct expr_context ctx = { .lexer = lexer, .symtab = symtab };
 struct expr *prereqs = parse_assignment(, lookup_port, aux, ofpacts);
 if (ctx.error) {
 expr_destroy(prereqs);
@@ -2881,12 +2865,8 @@ expr_parse_field(struct lexer *lexer, int n_bits, bool 
rw,
  const struct shash *symtab,
  struct mf_subfield *sf, struct expr **prereqsp)
 {
+struct expr_context ctx = { .lexer = lexer, .symtab = symtab };
 struct expr *prereqs = NULL;
-struct expr_context ctx;
-ctx.lexer = lexer;
-ctx.symtab = symtab;
-ctx.error = NULL;
-ctx.not = false;
 
 struct expr_field field;
 if (!parse_field(, )) {
-- 
2.1.3

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH 2/4] expr: Refactor parsing of assignments and exchanges.

2016-06-08 Thread Ben Pfaff
As written, it was difficult for the OVN logical action code to add support
for new actions of the form "dst = ...", because the code to parse the left
side of the assignment was a monolithic part of the expr library.  This
commit refactors the code division so that an upcoming patch can support a
new "dst = func(args);" kind of action.

Signed-off-by: Ben Pfaff 
---
 ovn/lib/actions.c |  52 +++-
 ovn/lib/expr.c| 173 +++---
 ovn/lib/expr.h|  36 ++--
 tests/ovn.at  |   2 +-
 4 files changed, 165 insertions(+), 98 deletions(-)

diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
index 5f0bf19..f9309da 100644
--- a/ovn/lib/actions.c
+++ b/ovn/lib/actions.c
@@ -106,19 +106,35 @@ action_syntax_error(struct action_context *ctx, const 
char *message, ...)
 static void
 parse_set_action(struct action_context *ctx)
 {
-struct expr *prereqs;
+struct expr *prereqs = NULL;
+struct expr_field dst;
 char *error;
 
-error = expr_parse_assignment(ctx->lexer, ctx->ap->symtab,
-  ctx->ap->lookup_port, ctx->ap->aux,
-  ctx->ofpacts, );
+error = expr_parse_field(ctx->lexer, ctx->ap->symtab, );
 if (error) {
+goto exit;
+}
+
+if (lexer_match(ctx->lexer, LEX_T_EXCHANGE)) {
+error = expr_parse_exchange(ctx->lexer, , ctx->ap->symtab,
+ctx->ap->lookup_port, ctx->ap->aux,
+ctx->ofpacts, );
+} else if (lexer_match(ctx->lexer, LEX_T_EQUALS)) {
+error = expr_parse_assignment(
+ctx->lexer, , ctx->ap->symtab, ctx->ap->lookup_port,
+ctx->ap->aux, ctx->ofpacts, );
+} else {
+action_syntax_error(ctx, "expecting `=' or `<->'");
+}
+
+exit:
+if (!error) {
+ctx->prereqs = expr_combine(EXPR_T_AND, ctx->prereqs, prereqs);
+} else {
+expr_destroy(prereqs);
 action_error(ctx, "%s", error);
 free(error);
-return;
 }
-
-ctx->prereqs = expr_combine(EXPR_T_AND, ctx->prereqs, prereqs);
 }
 
 static void
@@ -282,19 +298,23 @@ static bool
 action_parse_field(struct action_context *ctx,
int n_bits, struct mf_subfield *sf)
 {
-struct expr *prereqs;
+struct expr_field field;
 char *error;
 
-error = expr_parse_field(ctx->lexer, n_bits, false, ctx->ap->symtab, sf,
- );
-if (error) {
-action_error(ctx, "%s", error);
-free(error);
-return false;
+error = expr_parse_field(ctx->lexer, ctx->ap->symtab, );
+if (!error) {
+struct expr *prereqs;
+error = expr_expand_field(ctx->lexer, ctx->ap->symtab,
+  , n_bits, false, sf, );
+if (!error) {
+ctx->prereqs = expr_combine(EXPR_T_AND, ctx->prereqs, prereqs);
+return true;
+}
 }
 
-ctx->prereqs = expr_combine(EXPR_T_AND, ctx->prereqs, prereqs);
-return true;
+action_error(ctx, "%s", error);
+free(error);
+return false;
 }
 
 static void
diff --git a/ovn/lib/expr.c b/ovn/lib/expr.c
index 7ff9538..31b94d8 100644
--- a/ovn/lib/expr.c
+++ b/ovn/lib/expr.c
@@ -438,15 +438,6 @@ struct expr_constant_set {
 bool in_curlies;  /* Whether the constants were in {}. */
 };
 
-/* A reference to a symbol or a subfield of a symbol.
- *
- * For string fields, ofs and n_bits are 0. */
-struct expr_field {
-const struct expr_symbol *symbol; /* The symbol. */
-int ofs;  /* Starting bit offset. */
-int n_bits;   /* Number of bits. */
-};
-
 /* Context maintained during expr_parse(). */
 struct expr_context {
 struct lexer *lexer;/* Lexer for pulling more tokens. */
@@ -2698,49 +2689,44 @@ init_stack_action(const struct expr_field *f, struct 
ofpact_stack *stack)
 mf_subfield_from_expr_field(f, >subfield);
 }
 
-static struct expr *
-parse_assignment(struct expr_context *ctx,
+static char * OVS_WARN_UNUSED_RESULT
+parse_assignment(struct lexer *lexer, struct expr_field *dst,
+ const struct shash *symtab, bool exchange,
  bool (*lookup_port)(const void *aux, const char *port_name,
  unsigned int *portp),
- const void *aux, struct ofpbuf *ofpacts)
+ const void *aux, struct ofpbuf *ofpacts,
+ struct expr **prereqsp)
 {
+struct expr_context ctx = { .lexer = lexer, .symtab = symtab };
 struct expr *prereqs = NULL;
 
 /* Parse destination and do basic checking. */
-struct expr_field dst;
-if (!parse_field(ctx, )) {
-goto exit;
-}
-bool exchange = lexer_match(ctx->lexer, LEX_T_EXCHANGE);
-if (!exchange && !lexer_match(ctx->lexer, LEX_T_EQUALS)) {
-expr_syntax_error(ctx, "expecting `='.");
-goto exit;
- 

[ovs-dev] [PATCH 3/4] ovn-controller: Add 'put_dhcp_opts' action in ovn-controller

2016-06-08 Thread Ben Pfaff
From: Numan Siddique 

This patch adds a new OVN action 'put_dhcp_opts' to support native
DHCP in OVN.

ovn-controller parses this action and adds a NXT_PACKET_IN2
OF flow with 'pause' flag set and the DHCP options stored in
'userdata' field.

When the valid DHCP packet is received by ovn-controller, it frames a
new DHCP reply packet with the DHCP options present in the
'userdata' field and resumes the packet and stores 1 in the OVS
register provded in the 'put_dhcp_opts' parameter. If the packet is invalid,
it resumes the packet without any modifying and stores 0 in the OVS register.

Eg. put_dhcp_opts(reg0, efferip = 10.0.0.4, router = 10.0.0.1,
  netmask = 255.255.255.0, lease_time = 3600,)

A new 'DHCP_Options' table is added in SB DB which stores
the supported DHCP options with DHCP code and type. ovn-northd is
expected to popule this table.

The next patch will add logical flows with this action.

Signed-off-by: Numan Siddique 
Co-authored-by: Ben Pfaff 
Signed-off-by: Ben Pfaff 
---
 include/openvswitch/meta-flow.h |  12 +++
 lib/dhcp.h  |  13 +++
 ovn/controller/lflow.c  |  11 +++
 ovn/controller/pinctrl.c| 188 +++-
 ovn/lib/actions.c   | 206 ++--
 ovn/lib/actions.h   |  13 +++
 ovn/lib/automake.mk |   1 +
 ovn/lib/expr.c  |  45 ++---
 ovn/lib/expr.h  |  38 
 ovn/lib/ovn-dhcp.h  | 111 ++
 ovn/ovn-sb.ovsschema|  16 +++-
 ovn/ovn-sb.xml  | 198 ++
 tests/automake.mk   |   1 +
 tests/ovn.at|  76 +++
 tests/test-ovn-dhcp.c   | 149 +
 tests/test-ovn.c|  32 +++
 16 files changed, 1065 insertions(+), 45 deletions(-)
 create mode 100644 ovn/lib/ovn-dhcp.h
 create mode 100644 tests/test-ovn-dhcp.c

diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h
index e5f9962..84a0946 100644
--- a/include/openvswitch/meta-flow.h
+++ b/include/openvswitch/meta-flow.h
@@ -1930,6 +1930,18 @@ union mf_subvalue {
 
 /* Convenient access to just least-significant bits in various forms. */
 struct {
+uint8_t dummy_u8[127];
+uint8_t u8_val;
+};
+struct {
+ovs_be16 dummy_be16[63];
+ovs_be16 be16_int;
+};
+struct {
+ovs_be32 dummy_be32[31];
+ovs_be32 be32_int;
+};
+struct {
 ovs_be64 dummy_integer[15];
 ovs_be64 integer;
 };
diff --git a/lib/dhcp.h b/lib/dhcp.h
index 6f97298..271e0a5 100644
--- a/lib/dhcp.h
+++ b/lib/dhcp.h
@@ -25,6 +25,8 @@
 #define DHCP_SERVER_PORT67   /* Port used by DHCP server. */
 #define DHCP_CLIENT_PORT68   /* Port used by DHCP client. */
 
+#define DHCP_MAGIC_COOKIE 0x63825363
+
 #define DHCP_HEADER_LEN 236
 struct dhcp_header {
 uint8_t op; /* DHCP_BOOTREQUEST or DHCP_BOOTREPLY. */
@@ -45,4 +47,15 @@ struct dhcp_header {
 };
 BUILD_ASSERT_DECL(DHCP_HEADER_LEN == sizeof(struct dhcp_header));
 
+#define DHCP_OP_REQUEST1
+#define DHCP_OP_REPLY  2
+
+#define DHCP_MSG_DISCOVER  1
+#define DHCP_MSG_OFFER 2
+#define DHCP_MSG_REQUEST   3
+#define DHCP_MSG_ACK   5
+
+#define DHCP_OPT_MSG_TYPE  53
+#define DHCP_OPT_END   255
+
 #endif /* dhcp.h */
diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
index efc427d..52e6131 100644
--- a/ovn/controller/lflow.c
+++ b/ovn/controller/lflow.c
@@ -24,6 +24,7 @@
 #include "ovn-controller.h"
 #include "ovn/lib/actions.h"
 #include "ovn/lib/expr.h"
+#include "ovn/lib/ovn-dhcp.h"
 #include "ovn/lib/ovn-sb-idl.h"
 #include "packets.h"
 #include "simap.h"
@@ -203,6 +204,13 @@ add_logical_flows(struct controller_ctx *ctx, const struct 
lport_index *lports,
 {
 uint32_t conj_id_ofs = 1;
 
+struct hmap dhcp_opts = HMAP_INITIALIZER(_opts);
+const struct sbrec_dhcp_options *dhcp_opt_row;
+SBREC_DHCP_OPTIONS_FOR_EACH(dhcp_opt_row, ctx->ovnsb_idl) {
+dhcp_opt_add(_opts, dhcp_opt_row->name, dhcp_opt_row->code,
+ dhcp_opt_row->type);
+}
+
 const struct sbrec_logical_flow *lflow;
 SBREC_LOGICAL_FLOW_FOR_EACH (lflow, ctx->ovnsb_idl) {
 /* Determine translation of logical table IDs to physical table IDs. */
@@ -274,6 +282,7 @@ add_logical_flows(struct controller_ctx *ctx, const struct 
lport_index *lports,
 };
 struct action_params ap = {
 .symtab = ,
+.dhcp_opts = _opts,
 .lookup_port = lookup_port_cb,
 .aux = ,
 .ct_zones = ct_zones,
@@ -357,6 +366,8 @@ add_logical_flows(struct controller_ctx *ctx, const struct 
lport_index *lports,
 ofpbuf_uninit();
 conj_id_ofs += n_conjs;
   

[ovs-dev] [PATCH 4/4] ovn-northd: Add logical flows to support native DHCP

2016-06-08 Thread Ben Pfaff
From: Numan Siddique 

OVN implements a native DHCP support which caters to the common
use case of providing an IP address to a booting instance by
providing stateless replies to DHCP requests based on statically
configured address mappings. To do this it allows a short list of
DHCP options to be configured and applied at each compute host
running ovn-controller.

A new table 'Subnet' is added in OVN NB DB to store the DHCP options.

For each logical port following flows are added if the CMS has defined
DHCP options in the 'Subnet' column

 - A logical flow which copies the DHCP options to the DHCP
   request packets using the 'put_dhcp_opts' action and advances the
   packet to the next stage.

 - A logical flow which implements the DHCP reponder by sending
   the DHCP reply back to the inport once the 'put_dhcp_opts' action
   is applied.

Signed-off-by: Numan Siddique 
Signed-off-by: Ben Pfaff 
---
 ovn/northd/ovn-northd.8.xml   |  91 +++-
 ovn/northd/ovn-northd.c   | 267 ++-
 ovn/ovn-nb.ovsschema  |  19 ++-
 ovn/ovn-nb.xml| 314 +-
 ovn/utilities/ovn-nbctl.8.xml |  29 
 ovn/utilities/ovn-nbctl.c | 196 ++
 tests/ovn.at  | 250 +
 tests/test-ovn-dhcp.c | 135 ++
 8 files changed, 1291 insertions(+), 10 deletions(-)

diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
index 1983812..c8b8403 100644
--- a/ovn/northd/ovn-northd.8.xml
+++ b/ovn/northd/ovn-northd.8.xml
@@ -343,7 +343,90 @@ output;
   
 
 
-Ingress Table 6: Destination Lookup
+Ingress Table 6: DHCP option processing
+
+
+  This table adds the DHCP options to a DHCP packet from the
+  logical ports configured with IPv4 address(es) and DHCP options.
+
+
+
+  
+
+  A priority-100 logical flow is added for these logical ports
+  which matches the packet with udp.src = 68 and
+  udp.dst = 67 and applies the action
+  put_dhcp_opts and advances the packet to the table 7.
+
+
+
+reg0[0] = put_dhcp_opts(offer_ip = O, options...);
+next;
+
+
+
+  For DHCPDISCOVER and DHCPREQUEST, this transforms the packet into a
+  DHCP reply, adds the DHCP offer IP O and options to the
+  packet, and stores 1 into reg0[0].  For other kinds of packets, it
+  just stores 0 into reg0[0].  Either way, it continues to the next
+  table.
+
+
+  
+
+  
+A priority-0 flow that matches all packets to advances to table 7.
+  
+
+
+Ingress Table 7: DHCP responses
+
+
+  This table implements DHCP responder for the DHCP replies generated by
+  by the previous table.
+
+
+
+  
+
+  A priority 100 logical flow is added for the logical ports configured
+  with DHCP options which matches IPv4 packets with udp.src == 68
+   udp.dst == 67  reg0[0] == 1 and
+  responds back to the inport after applying these
+  actions.  If reg0[0] is set to 1, it means that the
+  action put_dhcp_opts was successful.
+
+
+
+eth.dst = eth.src;
+eth.src = E;
+ip4.dst = O;
+ip4.src = S;
+udp.src = 67;
+udp.dst = 68;
+outport = P;
+inport = ""; /* Allow sending out inport. */
+output;
+
+
+
+  where E is the server MAC address and S is the
+  server IPv4 address defined in the DHCP options and O is
+  the IPv4 address defined in the logical port's addresses column.
+
+
+
+  (This terminates packet processing; the packet does not go on the
+  next ingress table.)
+
+  
+
+  
+A priority-0 flow that matches all packets to advances to table 8.
+  
+
+
+Ingress Table 8: Destination Lookup
 
 
   This table implements switching behavior.  It contains these logical
@@ -387,6 +470,12 @@ output;
   This is similar to ingress table 4 except for to-lport ACLs.
 
 
+
+  Also a priority 34000 logical flow is added for each subnet of the 
logical
+  switch which has DHCP options defined to allow the DHCP reply packet
+  from the Ingress Table 7: DHCP resume.
+
+
 Egress Table 2: Egress Port Security - IP
 
 
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index cac0148..e613970 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -26,6 +26,7 @@
 #include "hash.h"
 #include "hmap.h"
 #include "json.h"
+#include "ovn/lib/ovn-dhcp.h"
 #include "ovn/lib/lex.h"
 #include "ovn/lib/ovn-nb-idl.h"
 #include "ovn/lib/ovn-sb-idl.h"
@@ -33,6 +34,7 @@
 #include "packets.h"
 #include "poll-loop.h"
 #include "smap.h"
+#include "sset.h"
 #include "stream.h"
 #include "stream-ssl.h"
 #include 

[ovs-dev] [PATCH 0/4] OVN DHCP support proposal

2016-06-08 Thread Ben Pfaff
Patches 1 and 2 are new.

Patch 3 is based on https://patchwork.ozlabs.org/patch/631320/, with:
  - Minor style fixes.
  - Change syntax of put_dhcp_opts(), from:
  put_dhcp_opts(reg0, offerip=1.2.3.4, ...)
to:
  reg0[0] = put_dhcp_opts(offerip=1.2.3.4, ...)
That is, the result is now expressed as a return value, which is
more natural for people coming from C, and the result is now a
single bit instead of an entire register, which makes more sense
for a Boolean anyway and doesn't use up a whole register.
  - Arguments to put_dhcp_opts are now architecture independent instead
of host-endian, which makes it possible to test them using the
test-ovn "parse-actions" instead of a separate program.
  - Added negative tests for put_dhcp_opts parsing.
  - Revised documentation to avoid talking about "pausing" and "resuming"
the pipeline.  The trip to ovn-controller should be transparent to
the writer of the OVN logical flows.

Patch 4 is based on https://patchwork.ozlabs.org/patch/631321/, with:
  - Minor style fixes.
  - Adapt actions to changed put_dhcp_opts() syntax.
  - Revised ovn-northd and documentation to avoid talking about
"pausing" and "resuming" the pipeline.  The trip to ovn-controller
should be transparent to the writer of the OVN logical flows.

Ben Pfaff (2):
  expr: Shorten declarations of expr_context.
  expr: Refactor parsing of assignments and exchanges.

Numan Siddique (2):
  ovn-controller: Add 'put_dhcp_opts' action in ovn-controller
  ovn-northd: Add logical flows to support native DHCP

 include/openvswitch/meta-flow.h |  12 ++
 lib/dhcp.h  |  13 ++
 ovn/controller/lflow.c  |  11 ++
 ovn/controller/pinctrl.c| 188 ++-
 ovn/lib/actions.c   | 252 +++---
 ovn/lib/actions.h   |  13 ++
 ovn/lib/automake.mk |   1 +
 ovn/lib/expr.c  | 246 ++
 ovn/lib/expr.h  |  74 -
 ovn/lib/ovn-dhcp.h  | 111 ++
 ovn/northd/ovn-northd.8.xml |  91 ++-
 ovn/northd/ovn-northd.c | 267 +++-
 ovn/ovn-nb.ovsschema|  19 ++-
 ovn/ovn-nb.xml  | 314 +-
 ovn/ovn-sb.ovsschema|  16 +-
 ovn/ovn-sb.xml  | 198 
 ovn/utilities/ovn-nbctl.8.xml   |  29 
 ovn/utilities/ovn-nbctl.c   | 196 
 tests/automake.mk   |   1 +
 tests/ovn.at| 328 +++-
 tests/test-ovn-dhcp.c   | 284 ++
 tests/test-ovn.c|  32 
 22 files changed, 2522 insertions(+), 174 deletions(-)
 create mode 100644 ovn/lib/ovn-dhcp.h
 create mode 100644 tests/test-ovn-dhcp.c

-- 
2.1.3

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] ovs-numa: Fix a compilation error

2016-06-08 Thread Takashi YAMAMOTO
On Thu, Jun 9, 2016 at 3:31 AM, Ben Pfaff  wrote:

> On Wed, Jun 08, 2016 at 03:19:46PM +0900, YAMAMOTO Takashi wrote:
> > Fix the following error on NetBSD 7.0.
> >
> > ../lib/ovs-numa.c: In function 'ovs_numa_set_cpu_mask':
> > ../lib/ovs-numa.c:555:9: error: array subscript has type 'char'
> [-Werror=char-subscripts]
> >
> > Signed-off-by: YAMAMOTO Takashi 
>
> Thanks.
>
> Acked-by: Ben Pfaff 
>

applied, thank you
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v3] datapath-windows: Add GRE checksum

2016-06-08 Thread Alin Serdean
Thanks for the review answer inlined.

Thanks,
Alin.

> -Mesaj original-
> De la: Nithin Raju [mailto:nit...@vmware.com]
> Trimis: Wednesday, June 8, 2016 10:10 PM
> Către: Alin Serdean ;
> dev@openvswitch.org
> Subiect: Re: [ovs-dev] [PATCH v3] datapath-windows: Add GRE checksum
> 
> 
> >+/* Get a contignuous buffer for the maxmimum length of a GRE
> >+ header
> >*/
> >+bufferStart = NdisGetDataBuffer(curNb, OVS_MAX_GRE_LGTH, NULL, 1,
> >+ 0);
> 
> Sorry we have to go back and forth on this. Like I mentioned in the previous
> email, the idea is to get a contiguous chunk of memory so we can walk all the
> header until the GRE header. It is a good idea to use
> NdisGetDataBuffer() instead of copying the NBL. But, we won¹t avoid the
> copied NBL anyway since decap has to happen on the copied NBL.
[Alin Gabriel Serdean: ] Agreed. I only wanted a check to make sure the whole 
ETH, IP, GRE was contiguous. 
> 
> In any case, NdisGetDataBuffer() has some pitfalls:
> "If the requested data in the buffer is contiguous, this return value is a
> pointer to a location that NDIS provides. If the data is not contiguous, NDIS
> uses the Storage parameter as follows:
> 
> * If the Storage parameter is non-NULL, NDIS copies the data to the buffer at
> Storage. This return value is the pointer passed to the Storage parameter.
> * If the Storage parameter is NULL, this return value is NULL.²
> 
> So, if the first MDL does not fit the headers until the GRE headers, we need
> to pass explicit memory to NdisGetDataBuffer() in argument #3 in order for
> NDIS to copy it over to a contiguous chunk of memory.
[Alin Gabriel Serdean: ] We have to take into consideration that NDIS 6.3 
supports NVGRE and in NDIS 6.4 GRE 
(https://support.microsoft.com/en-us/kb/3022776 ).
Packets can be split in multiple Mdl's, if the miniport supports it, as early 
as at the beginning of the upper-layer-protocol 
(https://msdn.microsoft.com/en-us/library/windows/hardware/ff571065(v=vs.85).aspx)
 and if the split header is not greater than the maximum header size which is 
configurable. In the case of VXLAN / STT we can be in the case above, because 
of the UDP/TCP like packet, but for GRE things differ since there are cards 
which support offloading:
https://msdn.microsoft.com/en-us/library/windows/hardware/dn605832(v=vs.85).aspx
"The protocol driver will guarantee that all the prepended encapsulation 
headers (ETH, IP, GRE) will be physically contiguous and will be in the first 
MDL of the packet."
I do not think we will be in a situation in which a GRE header will not be 
continuous.
If you consider we should still have a fragmented header problem, I will 
allocate a buffer and execute another NdisGetDataBuffer call if the first one 
failed.
> 
> Thanks,
> -- Nithin

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] doc: Fix an error in FAQ.

2016-06-08 Thread Ryan Moats
"dev"  wrote on 06/07/2016 12:56:50 AM:

> From: Han Zhou 
> To: dev@openvswitch.org
> Date: 06/07/2016 12:57 AM
> Subject: [ovs-dev] [PATCH] doc: Fix an error in FAQ.
> Sent by: "dev" 
>
> Signed-off-by: Han Zhou 
> ---
>  FAQ.md | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/FAQ.md b/FAQ.md
> index df6f225..cc4fdf6 100644
> --- a/FAQ.md
> +++ b/FAQ.md
> @@ -1056,7 +1056,7 @@ A: Yes.  For traffic that egresses from a
> switch, OVS supports traffic
>
> Keep in mind that ingress and egress are from the perspective of the
> switch.  That means that egress shaping limits the rate at which
> -   traffic is allowed to transmit from a physical interface, but the
> +   traffic is allowed to transmit from a physical interface, but not the
> rate at which traffic will be received on a virtual machine's VIF.
> For ingress policing, the behavior is the opposite.
>
> --

Acked-by: Ryan Moats 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 8/8] automake: Add ovs-bugtool.in to flake8-check.

2016-06-08 Thread Ryan Moats
"dev"  wrote on 06/06/2016 02:25:59 AM:

> From: Gurucharan Shetty 
> To: dev@openvswitch.org
> Date: 06/06/2016 12:23 PM
> Subject: [ovs-dev] [PATCH 8/8] automake: Add ovs-bugtool.in to
flake8-check.
> Sent by: "dev" 
>
> Signed-off-by: Gurucharan Shetty 
> ---
>  utilities/bugtool/automake.mk | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/utilities/bugtool/automake.mk
b/utilities/bugtool/automake.mk
> index b795fb3..3d9a54e 100644
> --- a/utilities/bugtool/automake.mk
> +++ b/utilities/bugtool/automake.mk
> @@ -33,6 +33,8 @@ bugtool_scripts = \
>
>  scripts_SCRIPTS += $(bugtool_scripts)
>
> +FLAKE8_PYFILES += utilities/bugtool/ovs-bugtool.in
> +
>  bugtoolpluginsdir = $(pkgdatadir)/bugtool-plugins
>  INSTALL_DATA_LOCAL += bugtool-install-data-local
>  bugtool-install-data-local:
> --

All of these didn't appear to make the patchworks queue,
so the following is an ack of the whole series - I've
looked at all of them and they look good...

Acked-by: Ryan Moats 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v6 1/2] doc: Refactor DPDK install documentation

2016-06-08 Thread Flavio Leitner
On Tue, Jun 07, 2016 at 11:42:11PM +0200, Mauricio Vásquez wrote:
> On Tue, Jun 7, 2016 at 10:52 PM, Bodireddy, Bhanuprakash <
> bhanuprakash.bodire...@intel.com> wrote:
> 
> > Thanks Mauricio for your comments, My comments inline.
> >
> > >+  it has to be configured with DPDK support and is done by './configure
> > --
> > >with-dpdk'.
> > >+  This section focus on generic recipe that suits most cases and for
> > >distribution
> > >+  specific instructions, refer [INSTALL.Fedora.md], [INSTALL.RHEL.md]
> > and
> > >+  [INSTALL.Debian.md].
> > >
> > >-9. Rx Mergeable buffers
> > >+  OVS can be downloaded in compressed format from the OVS release page
> > >(or)
> > >+  cloned from git repository if user intends to develop and contribute
> > >+  patches upstream.
> > >
> > > I think it is better just to have one download method, it keeps things
> > simple.
> >
> > [BHANU] This is done for a reason.  Most of the users would like to work
> > on a stable release and few would be working on the master branch.
> >
> 
> The point here is that this file is in the master branch, if a user wants
> to use a stable release it should download that stable release and read the
> installation guide for that specific release.
> 
> 
> > One section covers downloading the stable release as compressed file. This
> > section would be updated once 2.6 is released.
> > Other section comes handy for users working on master.
> >
> 
> I see that both methods are actually downloading the master version.
> 
> Additionally, I think that keeping instructions in the master about how to
> compile previous versions can be difficult, mainly due to the DPDK required
> version. As an example, if you want to include here instructions for ovs
> 2.5 you have to use DPDK 2.2.

I sort of disagree here for two reasons.  One is that I think we
should assume the user is reading the document from the sources,
so no downloading instructions are needed.  I think that is the
approach used by INSTALL.md and it seems to be working.

The second reason is that the sources in the development branch
is going to be in the release tarball at some point.  Let's say
next 2.6 will point only to master snapshots.  So, it seems to
me that if we want to keep the downloading instructions, we could
get both ways explained.  For example:

The OVS sources can be downloaded in different ways and you can skip
this section if you already have the correct sources.  Otherwise
download the correct version using one of the suggested methods below
and follow the documentation provided for that specific version.

* OVS stable releases can be downloaded in compressed format from 
http://openvswitch.org/releases/

 wget -O ovs.tar http://openvswitch.org/releases/openvswitch-.tar.gz
 mkdir -p /usr/src/ovs
 tar -xvf ovs.tar -C /usr/src/ovs --strip-components=1
 export OVS_DIR=/usr/src/ovs

* OVS current development can be downloaded in compressed format from
https://github.com/openvswitch/ovs/tarball/master
wget -O ovs.tar https://github.com/openvswitch/ovs/tarball/master
mkdir -p /usr/src/ovs
tar -xvf ovs.tar -C /usr/src/ovs --strip-components=1
export OVS_DIR=/usr/src/ovs

* OVS current development can be clone using 'git' tool:
cd /usr/src/
 git clone https://github.com/openvswitch/ovs.git
 export OVS_DIR=/usr/src/ovs

fbl

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] Bug#826780: Bug#826780: Please package OVS >= 2.6.0.dev1

2016-06-08 Thread Joe Stringer
On 8 June 2016 at 16:40, Ben Pfaff  wrote:
> On Thu, Jun 09, 2016 at 01:23:12AM +0200, Thomas Goirand wrote:
>> Source: openvswitch
>> Version: 2.3.0+git20140819-4
>> Severity: normal
>>
>> Hi,
>>
>> OpenStack Neutron currently has:
>>
>> ovs>=2.5.0;python_version=='2.7' # Apache-2.0
>> ovs>=2.6.0.dev1;python_version>='3.4' # Apache-2.0
>>
>> in its requirements.txt. Please package this at least to Debian Experimental,
>> so that I can package Neutron Newton Beta 1.
>
> I can package 2.5.x, though there are reports that there's one testsuite
> failure on big-endian architectures.
>
> OVS 2.6.0 doesn't exist yet, so it can't be packaged.  When Neutron asks
> for 2.6.0 what does it mean?

It would appear that someone has uploaded an OVS 2.5.90 development
version of the OVS python libraries to pypi as 2.6.0.dev1:
https://pypi.python.org/pypi/ovs/2.6.0.dev1

This probably has the latest python3 fixes that were applied since OVS
2.5, so is required if someone wants to run neutron under python3.

Russell, do you know much about this? I'd CC otherwiseguy, but I don't
know who that is off-hand.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] VxLAN-gpe implementation

2016-06-08 Thread Alexander Duyck
On Wed, Jun 8, 2016 at 3:20 PM, Hannes Frederic Sowa  wrote:
> On 08.06.2016 23:21, Alexander Duyck wrote:
>> On Wed, Jun 8, 2016 at 12:46 PM, Hannes Frederic Sowa  
>> wrote:
>>> On 08.06.2016 17:38, Alexander Duyck wrote:
 On Wed, Jun 8, 2016 at 7:48 AM, Hannes Frederic Sowa  
 wrote:
> On 08.06.2016 14:51, Jiri Benc wrote:
>> On Mon, 6 Jun 2016 14:22:58 -0700, Jesse Gross wrote:
>>> On Sat, Jun 4, 2016 at 6:39 AM, Yi Yang  wrote:
>>> [...]
  datapath/vport-netdev.c   |   3 +-
  datapath/vport-vxlan.c|  17 ++-
>>>
>>> These changes aren't upstream yet. Please do that before backporting 
>>> them here.
>>>
>>> However, the changes to vport-vxlan.c are modifying compatibility code
>>> that shouldn't be extended further. Instead, just use the existing
>>> VXLAN netlink interfaces that have already been created to enable
>>> these features.
>>>
>>> There is also a number of other patches to the OVS kernel module/VXLAN
>>> that have not been backported. Pravin started doing this work but it
>>> hasn't been applied yet. In general, I think it makes sense to
>>> backport patches in order so that the diffs of the patches match those
>>> of upstream.
>>>
>>> Finally, I have a question about receive side offloading with
>>> VXLAN-gpe. This is primarily an upstream issue but is present in the
>>> code being backported here as well. The VXLAN code sets up receive
>>> offloads for all ports regardless of whether they are classic VXLAN or
>>> L2/L3 GPE and expects NICs to parse the packets. I don't think this is
>>> safe because there are a number of NICs out there that predate the
>>> existence of GPE and therefore won't do this parsing correctly. I
>>> think that it is necessary to disable receive offloading for
>>> non-Ethernet VXLAN-GPE unless the offloading interface is extended.
>>
>> Coincidentally, I was talking about this with Hannes a few days ago.
>> I'm adding him to CC.
>>
>> I guess you're referring to ndo_add_vxlan_port, right? I agree that
>> this interface needs changes, especially considering that we know
>> whether the UDP port belongs to VXLAN or VXLAN-GPE. But from my
>> understanding of how drivers use this callback, the worst thing that
>> could happen is suboptimal generation of rx hashes and thus steering
>> the packets to a different receive queue than in the optimal case.
>> Surely something to fix but it seems it won't cause much functional
>> troubles with the current code?
>
> I am not sure if we must upgrade the interface. Can't drivers always
> configure vxlan-gpe and are always backwards compatible?
>
> Non vxlan-gpe capable hardware would have to abort checksum offloading
> as soon as they can't interpret the vxlan header anyway, so the packets
> end up on the slow path and nothing bad should happen.
>
> Possibly some hardware will verify inner checksums despite it could not
> understand the vxlan header completely. In this case we probably will
> drop the packet in the driver. Anyway, I would be in favor to simply
> present one knob, namely vxlan-offloading, to the user, instead a knob
> for each version of vxlan.
>
> Unfortunately I couldn't get a definitive answer from the specs
> regarding the checksuming details.
>
> Bye,
> Hannes

 This is starting to sound like the same conversation we had on netdev
 when the ndo_add_geneve_port was added.  One easy fix for guaranteeing
 that we can perform the checksum offload is to just enable the outer
 UDP checksum.  Then we can still perform GRO and use the outer UDP
 source port for generating a hash.  If possible we should make this
 the default for all new UDP based tunnels going forward simply because
 it allows for backwards-compatibility with existing offloads.
>>>
>>> Yes, but this time we are only in vxlan-versioning-world only. :)
>>
>> Right.  It leads me back into the original thought I had which was we
>> should be providing the UDP tunnel type via something like an
>> enumerated type so drivers could just opt in if they see a tunnel type
>> they recognize.  Having to add a function for each new tunnel type is
>> just silly.  Then we could have support for VXLAN be separate from
>> VXLAN-GPE without having to add a whole new set of functions.
>
> Partially agreed. I hope we can soon provide a patch that allows us to
> query the state of offloading of networking cards, a la ethtool. My idea
> was to make this depending on if the ndo_op for vxlan or geneve isn't NULL.
>
> Right now we still have the problem, that the check if the card actually
> supports udp offloading for vxlan or geneve is not depending on if the

Re: [ovs-dev] [PATCH] ovn-controller: Fix memory leak reported byvalgrind.

2016-06-08 Thread Ryan Moats
"dev"  wrote on 06/05/2016 09:37:35 AM:

> From: William Tu 
> To: dev@openvswitch.org
> Date: 06/05/2016 09:37 AM
> Subject: [ovs-dev] [PATCH] ovn-controller: Fix memory leak reported
> by valgrind.
> Sent by: "dev" 
>
> Calling ovsdb_idl_set_remote() might overwrite the 'idl->session'.  The
patch
> fixes them by freeing 'idl->session' before it is overwritten.
>
> Testcast ovn-controller - ovn-bridge-mappings reports two definitelylosts
in:
> xmalloc (util.c:112)
> jsonrpc_session_open (jsonrpc.c:784)
> ovsdb_idl_create (ovsdb-idl.c:246)
> main (ovn-controller.c:384)
> and,
> xmalloc (util.c:112)
> jsonrpc_session_open (jsonrpc.c:784)
> ovsdb_idl_set_remote (ovsdb-idl.c:289)
> main (ovn-controller.c:409)
>
> Signed-off-by: William Tu 

Looked at this by inspection and it makes sense.  I also checked
ovsdb/jsonrpc-server.c but I don't think that usage leaks - William, did
you
also check that?

Acked-by: Ryan Moats 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] tests: Remove "test" from test names.

2016-06-08 Thread Ryan Moats
"dev"  wrote on 06/08/2016 09:01:16 PM:

> From: Ben Pfaff 
> To: dev@openvswitch.org
> Cc: Ben Pfaff 
> Date: 06/08/2016 09:01 PM
> Subject: [ovs-dev] [PATCH] tests: Remove "test" from test names.
> Sent by: "dev" 
>
> Every test is a test, so each test doesn't need to attest to being a
test.
>
> Signed-off-by: Ben Pfaff 

Simple enough...

Acked-by: Ryan Moats 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH] tests: Remove "test" from test names.

2016-06-08 Thread Ben Pfaff
Every test is a test, so each test doesn't need to attest to being a test.

Signed-off-by: Ben Pfaff 
---
 tests/auto-attach.at |  2 +-
 tests/library.at | 44 ++--
 tests/ovn-controller-vtep.at | 12 ++--
 tests/ovn-sbctl.at   |  2 +-
 4 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/tests/auto-attach.at b/tests/auto-attach.at
index ad18a8a..ded1126 100644
--- a/tests/auto-attach.at
+++ b/tests/auto-attach.at
@@ -1,6 +1,6 @@
 AT_BANNER([auto-attach unit tests])
 
-AT_SETUP([auto-attach - packet tests])
+AT_SETUP([auto-attach - packets])
 AT_KEYWORDS([auto-attach])
 AT_CHECK(ovstest test-aa, [], [ignore], [ignore])
 
diff --git a/tests/library.at b/tests/library.at
index 8e6d602..e5095a0 100644
--- a/tests/library.at
+++ b/tests/library.at
@@ -1,77 +1,77 @@
 AT_BANNER([library unit tests])
 
-AT_SETUP([test flow extractor])
+AT_SETUP([flow extractor])
 AT_CHECK([$PERL `which flowgen.pl` >/dev/null 3>flows 4>pcap])
 AT_CHECK([ovstest test-flows flows pcap], [0], [checked 247 packets, 0 errors
 ])
 AT_CLEANUP
 
-AT_SETUP([test TCP/IP checksumming])
+AT_SETUP([TCP/IP checksumming])
 AT_CHECK([ovstest test-csum], [0], 
[####
 ])
 AT_CLEANUP
 
-AT_SETUP([test hash functions])
+AT_SETUP([hash functions])
 AT_CHECK([ovstest test-hash])
 AT_CLEANUP
 
-AT_SETUP([test hash map])
+AT_SETUP([hash map])
 AT_KEYWORDS([hmap])
 AT_CHECK([ovstest test-hmap], [0], [
 ])
 AT_CLEANUP
 
-AT_SETUP([test hash index])
+AT_SETUP([hash index])
 AT_KEYWORDS([hindex])
 AT_CHECK([ovstest test-hindex], [0], [.
 ])
 AT_CLEANUP
 
-AT_SETUP([test cuckoo hash])
+AT_SETUP([cuckoo hash])
 AT_KEYWORDS([cmap])
 AT_CHECK([ovstest test-cmap check 1], [0], [...
 ])
 AT_CLEANUP
 
-AT_SETUP([test counting cockoo hash])
+AT_SETUP([counting cockoo hash])
 AT_KEYWORDS([cmap])
 AT_CHECK([ovstest test-ccmap check 1], [0], [...
 ])
 AT_CLEANUP
 
-AT_SETUP([test atomic operations])
+AT_SETUP([atomic operations])
 AT_CHECK([ovstest test-atomic])
 AT_CLEANUP
 
-AT_SETUP([test linked lists])
+AT_SETUP([linked lists])
 AT_CHECK([ovstest test-list], [0], [...
 ])
 AT_CLEANUP
 
-AT_SETUP([test packet library])
+AT_SETUP([packet library])
 AT_CHECK([ovstest test-packets])
 AT_CLEANUP
 
-AT_SETUP([test SHA-1])
+AT_SETUP([SHA-1])
 AT_CHECK([ovstest test-sha1], [0], [.
 ])
 AT_CLEANUP
 
-AT_SETUP([test type properties])
+AT_SETUP([type properties])
 AT_CHECK([test-type-props])
 AT_CLEANUP
 
-AT_SETUP([test strtok_r bug fix])
+AT_SETUP([strtok_r bug fix])
 AT_CHECK([test-strtok_r], [0], [NULL NULL
 ])
 AT_CLEANUP
 
-AT_SETUP([test byte order conversion])
+AT_SETUP([byte order conversion])
 AT_KEYWORDS([byte order])
 AT_CHECK([ovstest test-byte-order])
 AT_CLEANUP
 
-AT_SETUP([test random number generator])
+AT_SETUP([random number generator])
 AT_CHECK([ovstest test-random], [0], [dnl
 average=7fa2014f
 
@@ -142,7 +142,7 @@ m4_foreach(
AT_CHECK([ovstest test-util testname], [0], [], [])
AT_CLEANUP])
 
-AT_SETUP([test unix socket, short pathname - C])
+AT_SETUP([unix socket, short pathname - C])
 AT_SKIP_IF([test "$IS_WIN32" = "yes"])
 AT_CHECK([ovstest test-unix-socket x])
 AT_CLEANUP
@@ -152,7 +152,7 @@ dnl go in a fixed-length field in struct sockaddr_un.  
Generally the limit
 dnl is about 100 bytes.  On Linux, we work around this by indirecting through
 dnl a directory fd using /proc/self/fd/.  We do not have a workaround
 dnl for other platforms, so we skip the test there.
-AT_SETUP([test unix socket, long pathname - C])
+AT_SETUP([unix socket, long pathname - C])
 AT_SKIP_IF([test "$IS_WIN32" = "yes"])
 dnl Linux sockaddr_un has a 108-byte limit, so this needs to be longer.
 dnl Linux "ecryptfs" has a 143-byte limit, so we use that many bytes.
@@ -166,7 +166,7 @@ AT_CHECK([cd $longname && ovstest test-unix-socket 
../$longname/socket socket])
 AT_CLEANUP
 
 m4_define([UNIX_SOCKET_SHORT_PATHNAME_PYN],
-  [AT_SETUP([test unix socket, short pathname - $1])
+  [AT_SETUP([unix socket, short pathname - $1])
AT_SKIP_IF([test $2 = no || test "$IS_WIN32" = "yes"])
AT_KEYWORDS([python unixsocket])
AT_CHECK([$3 $srcdir/test-unix-socket.py x])
@@ -181,7 +181,7 @@ dnl is about 100 bytes.  On Linux, we work around this by 
indirecting through
 dnl a directory fd using /proc/self/fd/.  We do not have a workaround
 dnl for other platforms, so we skip the test there.
 m4_define([UNIX_SOCKET_LONG_PATHNAME_PYN],
-  [AT_SETUP([test unix socket, long pathname - $1])
+  [AT_SETUP([unix socket, long pathname - $1])
AT_SKIP_IF([test $2 = no || test "$IS_WIN32" = "yes"])
AT_KEYWORDS([python unixsocket])
dnl Linux sockaddr_un has a 108-byte limit, so this needs to be longer.
@@ -224,7 +224,7 @@ AT_SETUP([snprintf])
 AT_CHECK([ovstest test-util snprintf])
 AT_CLEANUP
 
-AT_SETUP([test bitmap functions])
+AT_SETUP([bitmap functions])
 

Re: [ovs-dev] [PATCH] ovn-northd: fix logical router icmp response for directed broadcasts

2016-06-08 Thread Flaviof
On Wed, Jun 8, 2016 at 5:51 PM, Flavio Fernandes  wrote:

> Responding to icmp queries where the L3 destination is a directed broadcast
> was not being properly handled, causing the reply to be sent to all logical
> ports except for the one port that should receive it.
>
> Reference to the mailing list thread:
> http://openvswitch.org/pipermail/discuss/2016-June/021619.html
>
> This is a proposal for using choice C in the mail discussion; where
> handling
> of icmp queries to broadcast is performed by a separate logical rule.
> Unit test has been augmented to exercise this scenario.
>
> Note that since broadcast is contained to node where ovn-controller is
> running,
> there may be no real concern for a potential DOS attack scenario.
>
> Signed-off-by: Flavio Fernandes 
> ---
>


Update:
While testing this change, I noticed that the action eth_dst
is not affecting dl_dst. So, assuming option 'c' is the way
to go, there is still some more teaking to do here!

https://gist.github.com/4e2a080248bbde35ebbc2de956c4a194
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v3 5/5] ovn: DNAT and SNAT on a gateway router.

2016-06-08 Thread Mickey Spiegel
Guru,

>The  names "inside_ip" and "outside_ip" keeps tripping me up. The  
>nomenclature feels similar to tunnel outside_ip and tunnel inside_ip.  What do 
>you think about "logical_ip" and "physical_ip" instead?

A quick search revealed that "inside" and "outside" are not always used the way 
that I thought, so my proposed terminology does not really work.

Thinking about "logical_ip" and "physical_ip", I am concerned that 
"physical_ip" will confuse people. When I hear "physical_ip", my mind wanders 
to pieces of hardware, which is not what this is about.

How about "internal_ip"/"external_ip"?

Mickey

-Guru Shetty  wrote: -
To: Mickey Spiegel/San Jose/IBM@IBMUS
From: Guru Shetty 
Date: 06/07/2016 08:14AM
Cc: ovs dev 
Subject: Re: [ovs-dev] [PATCH v3 5/5] ovn: DNAT and SNAT on a gateway router.



I am always a little nervous about putting things in schema that I know will 
change later,
especially when it comes to the structure of the config.

I am thinking of ovn-nb.ovsschema changes along the following lines:

 "Logical_Router": {
 "columns": {
 "name": {"type": "string"},
 "ports": {"type": {"key": {"type": "uuid",
"refTable": "Logical_Router_Port",
"refType": "strong"},
"min": 0,
"max": "unlimited"}},
 "static_routes": {"type": {"key": {"type": "uuid",
 "refTable": 
"Logical_Router_Static_Route",
 "refType": "strong"},
"min": 0,
"max": "unlimited"}},
 "default_gw": {"type": {"key": "string", "min": 0, "max": 1}},
 "enabled": {"type": {"key": "boolean", "min": 0, "max": 1}},
+"nat": {"type": {"key": {"type": "uuid",
+ "refTable": "NAT”,
+ "refType": "strong"},
+   "min": 0,
+   "max": "unlimited"}},
 "options": {
  "type": {"key": "string",
   "value": "string",
   "min": 0,
   "max": "unlimited"}},
 "external_ids": {
 "type": {"key": "string", "value": "string",
  "min": 0, "max": "unlimited"}}},
 "isRoot": true},


+“NAT”: {
+"columns": {
+"outside_ip": {"type": {"key": "string", "min": 0, "max": 1}},
+"inside_ip": {"type": {"key": "string", "min": 0, "max": 1}},
+"direction": {"type": {"key": {"type": "string",
+   "enum": ["set", ["dnat", “snat", 
"dnat_and_snat"]]}}},
+"isRoot": false},

DNAT maps from destination outside_ip to inside_ip.
SNAT maps from source inside_ip to outside_ip.
If direction == dnat or direction == dnat_and_snat, then check for inside_ip 
mask == 32
If direction == snat or direction == dnat_and_snat, then check for outside_ip 
== 32

The names "inside_ip" and "outside_ip" keeps tripping me up. The nomenclature 
feels similar to tunnel outside_ip and tunnel inside_ip. What do you think 
about "logical_ip" and "physical_ip" instead? 
 

>As an addendum, the current schema does dnat:30.0.0.3=192.168.1.2. I
>would like to enhance it to also be able to provide ports. Something
>like dnat:30.0.0.3:4443=192.168.1.2:80
>
>So we will need to include the above with the new table that you are
>thinking too.

You can add outside_protocol, outside_l4_port, inside_protocol, and 
inside_l4_port.

>> Note also that for distributed handling of floating IPs, we will need MAC
>> addresses to go along with the floating IPs. This makes me think it might
>> be worth having an indirection to a separate nat table.

We will add outside_mac in the patch for distributed handling of floating IPs.

Mickey



 

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v18 9/9] Add incremental processing to lflow_run and physical_run

2016-06-08 Thread Ryan Moats
From: "RYAN D. MOATS" 

This code changes to allow incremental processing of the
logical flow and physical binding tables whenver possible.

Note: flows created by physical_run for multicast_groups are
*NOT* handled incrementally due to to be solved issues
with GWs and local routers.

Signed-off-by: Ryan Moats 
---
 ovn/controller/binding.c|  6 
 ovn/controller/encaps.c |  5 +++
 ovn/controller/lflow.c  | 68 +
 ovn/controller/lflow.h  |  1 +
 ovn/controller/lport.c  | 14 ++--
 ovn/controller/lport.h  |  4 ++-
 ovn/controller/ovn-controller.c |  1 -
 ovn/controller/patch.c  |  8 +
 ovn/controller/physical.c   | 75 +
 ovn/controller/physical.h   |  1 +
 10 files changed, 165 insertions(+), 18 deletions(-)

diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
index 80f38b9..4a2bd0d 100644
--- a/ovn/controller/binding.c
+++ b/ovn/controller/binding.c
@@ -15,6 +15,8 @@
 
 #include 
 #include "binding.h"
+#include "lflow.h"
+#include "lport.h"
 
 #include "lib/bitmap.h"
 #include "lib/hmap.h"
@@ -118,6 +120,7 @@ remove_local_datapath(struct hmap *local_datapaths, struct 
local_datapath *ld)
 hmap_remove(local_datapaths, >hmap_node);
 hmap_remove(_datapaths_by_uuid, >uuid_hmap_node);
 free(ld);
+lflow_reset_processing();
 }
 
 static void
@@ -156,6 +159,9 @@ add_local_datapath(struct hmap *local_datapaths,
 binding_rec->datapath->tunnel_key);
 hmap_insert(_datapaths_by_uuid, >uuid_hmap_node,
 uuid_hash(uuid));
+lport_index_reset();
+mcgroup_index_reset();
+lflow_reset_processing();
 }
 
 static void
diff --git a/ovn/controller/encaps.c b/ovn/controller/encaps.c
index b5f874f..736da8e 100644
--- a/ovn/controller/encaps.c
+++ b/ovn/controller/encaps.c
@@ -17,6 +17,7 @@
 #include "encaps.h"
 #include "binding.h"
 #include "lflow.h"
+#include "lport.h"
 
 #include "lib/hash.h"
 #include "lib/sset.h"
@@ -234,6 +235,9 @@ tunnel_add(const struct sbrec_chassis *chassis_rec,
 free(port_name);
 free(ports);
 binding_reset_processing();
+lport_index_reset();
+mcgroup_index_reset();
+lflow_reset_processing();
 process_full_encaps = true;
 }
 
@@ -417,6 +421,7 @@ encaps_run(struct controller_ctx *ctx, const struct 
ovsrec_bridge *br_int,
 _hash->uuid_node);
 free(port_hash);
 binding_reset_processing();
+lflow_reset_processing();
 }
 continue;
 }
diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
index 464f9c8..702624d 100644
--- a/ovn/controller/lflow.c
+++ b/ovn/controller/lflow.c
@@ -26,6 +26,7 @@
 #include "ovn/lib/expr.h"
 #include "ovn/lib/ovn-sb-idl.h"
 #include "packets.h"
+#include "physical.h"
 #include "simap.h"
 
 VLOG_DEFINE_THIS_MODULE(lflow);
@@ -35,6 +36,17 @@ VLOG_DEFINE_THIS_MODULE(lflow);
 /* Contains "struct expr_symbol"s for fields supported by OVN lflows. */
 static struct shash symtab;
 
+static bool full_flow_processing = false;
+static bool full_logical_flow_processing = false;
+static bool full_neighbor_flow_processing = false;
+
+void
+lflow_reset_processing(void)
+{
+full_flow_processing = true;
+physical_reset_processing();
+}
+
 static void
 add_logical_register(struct shash *symtab, enum mf_field_id id)
 {
@@ -365,12 +377,30 @@ add_logical_flows(struct controller_ctx *ctx, const 
struct lport_index *lports,
   const struct simap *ct_zones)
 {
 uint32_t conj_id_ofs = 1;
-
 const struct sbrec_logical_flow *lflow;
-SBREC_LOGICAL_FLOW_FOR_EACH (lflow, ctx->ovnsb_idl) {
-consider_logical_flow(lports, mcgroups, lflow, local_datapaths,
-  patched_datapaths, ct_zones, _id_ofs,
-  true);
+
+if (full_logical_flow_processing) {
+SBREC_LOGICAL_FLOW_FOR_EACH (lflow, ctx->ovnsb_idl) {
+consider_logical_flow(lports, mcgroups, lflow, local_datapaths,
+  patched_datapaths, ct_zones, _id_ofs,
+  true);
+}
+full_logical_flow_processing = false;
+} else {
+SBREC_LOGICAL_FLOW_FOR_EACH_TRACKED (lflow, ctx->ovnsb_idl) {
+bool is_deleted = sbrec_logical_flow_row_get_seqno(lflow,
+OVSDB_IDL_CHANGE_DELETE) > 0;
+bool is_new = sbrec_logical_flow_row_get_seqno(lflow,
+OVSDB_IDL_CHANGE_MODIFY) == 0;
+
+if (is_deleted) {
+ofctrl_remove_flows(>header_.uuid);
+continue;
+}
+consider_logical_flow(lports, mcgroups, lflow, local_datapaths,
+  patched_datapaths, ct_zones, _id_ofs,
+  is_new);
+

[ovs-dev] [PATCH v18 7/9] Refactor multicast group processing in physical.c

2016-06-08 Thread Ryan Moats
From: "RYAN D. MOATS" 

Extract block from SBREC_MULTICAST_GROUP_FOR_EACH block
in physical_run to helper method so that it can be reused when
doing incremental processing.

Signed-off-by: RYAN D. MOATS 
---
 ovn/controller/physical.c | 233 --
 1 file changed, 123 insertions(+), 110 deletions(-)

diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index 45b6953..3aa0cad 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -456,6 +456,126 @@ consider_port_binding(enum mf_field_id mff_ovn_geneve,
 }
 }
 
+static void
+consider_mc_group(enum mf_field_id mff_ovn_geneve,
+  const struct simap *ct_zones,
+  struct hmap *local_datapaths,
+  const struct sbrec_multicast_group *mc,
+  struct ofpbuf *ofpacts_p,
+  struct ofpbuf *remote_ofpacts_p, bool is_new)
+{
+struct sset remote_chassis = SSET_INITIALIZER(_chassis);
+struct match match;
+
+match_init_catchall();
+match_set_metadata(, htonll(mc->datapath->tunnel_key));
+match_set_reg(, MFF_LOG_OUTPORT - MFF_REG0, mc->tunnel_key);
+
+/* Go through all of the ports in the multicast group:
+ *
+ *- For remote ports, add the chassis to 'remote_chassis'.
+ *
+ *- For local ports (other than logical patch ports), add actions
+ *  to 'ofpacts_p' to set the output port and resubmit.
+ *
+ *- For logical patch ports, add actions to 'remote_ofpacts_p'
+ *  instead.  (If we put them in 'ofpacts', then the output
+ *  would happen on every hypervisor in the multicast group,
+ *  effectively duplicating the packet.)
+ */
+ofpbuf_clear(ofpacts_p);
+ofpbuf_clear(remote_ofpacts_p);
+for (size_t i = 0; i < mc->n_ports; i++) {
+struct sbrec_port_binding *port = mc->ports[i];
+
+if (port->datapath != mc->datapath) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+VLOG_WARN_RL(, UUID_FMT": multicast group contains ports "
+ "in wrong datapath",
+ UUID_ARGS(>header_.uuid));
+continue;
+}
+
+int zone_id = simap_get(ct_zones, port->logical_port);
+if (zone_id) {
+put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, ofpacts_p);
+}
+
+if (!strcmp(port->type, "patch")) {
+put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
+ remote_ofpacts_p);
+put_resubmit(OFTABLE_DROP_LOOPBACK, remote_ofpacts_p);
+} else if (simap_contains(_to_ofport,
+   (port->parent_port && *port->parent_port)
+   ? port->parent_port : port->logical_port)) {
+put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
+put_resubmit(OFTABLE_DROP_LOOPBACK, ofpacts_p);
+} else if (port->chassis && !get_localnet_port(local_datapaths,
+ mc->datapath->tunnel_key)) {
+/* Add remote chassis only when localnet port not exist,
+ * otherwise multicast will reach remote ports through localnet
+ * port. */
+sset_add(_chassis, port->chassis->name);
+}
+}
+
+/* Table 33, priority 100.
+ * ===
+ *
+ * Handle output to the local logical ports in the multicast group, if
+ * any. */
+bool local_ports = ofpacts_p->size > 0;
+if (local_ports) {
+/* Following delivery to local logical ports, restore the multicast
+ * group as the logical output port. */
+put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, ofpacts_p);
+
+ofctrl_add_flow(OFTABLE_LOCAL_OUTPUT, 100, , ofpacts_p,
+>header_.uuid, is_new);
+}
+
+/* Table 32, priority 100.
+ * ===
+ *
+ * Handle output to the remote chassis in the multicast group, if
+ * any. */
+if (!sset_is_empty(_chassis) || remote_ofpacts_p->size > 0) {
+if (remote_ofpacts_p->size > 0) {
+/* Following delivery to logical patch ports, restore the
+ * multicast group as the logical output port. */
+put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
+ remote_ofpacts_p);
+}
+
+const char *chassis;
+const struct chassis_tunnel *prev = NULL;
+SSET_FOR_EACH (chassis, _chassis) {
+const struct chassis_tunnel *tun
+= chassis_tunnel_find(chassis);
+if (!tun) {
+continue;
+}
+
+if (!prev || tun->type != prev->type) {
+put_encapsulation(mff_ovn_geneve, tun, mc->datapath,
+  mc->tunnel_key, remote_ofpacts_p);
+prev = tun;
+}
+

[ovs-dev] [PATCH v18 6/9] Refactor port binding processing in physical.c

2016-06-08 Thread Ryan Moats
From: "RYAN D. MOATS" 

Extract block from SBREC_PORT_BINDING_FOR_EACH block in
physical_run to helper method so that it can be reused when
doing incremental processing.

Side effects:
  - localvif_to_oport and tunnels are now static file scoped

Signed-off-by: RYAN D. MOATS 
---
 ovn/controller/physical.c | 614 --
 1 file changed, 314 insertions(+), 300 deletions(-)

diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index a4ef873..45b6953 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -54,6 +54,10 @@ physical_register_ovs_idl(struct ovsdb_idl *ovs_idl)
 static struct uuid *hc_uuid = NULL; // uuid to identify OF flows not associated
 // with ovsdb rows.
 
+static struct simap localvif_to_ofport =
+SIMAP_INITIALIZER(_to_ofport);
+static struct hmap tunnels = HMAP_INITIALIZER();
+
 /* Maps from a chassis to the OpenFlow port number of the tunnel that can be
  * used to reach that chassis. */
 struct chassis_tunnel {
@@ -64,11 +68,11 @@ struct chassis_tunnel {
 };
 
 static struct chassis_tunnel *
-chassis_tunnel_find(struct hmap *tunnels, const char *chassis_id)
+chassis_tunnel_find(const char *chassis_id)
 {
 struct chassis_tunnel *tun;
 HMAP_FOR_EACH_WITH_HASH (tun, hmap_node, hash_string(chassis_id, 0),
- tunnels) {
+ ) {
 if (!strcmp(tun->chassis_id, chassis_id)) {
 return tun;
 }
@@ -149,14 +153,315 @@ get_localnet_port(struct hmap *local_datapaths, int64_t 
tunnel_key)
 return ld ? ld->localnet_port : NULL;
 }
 
+static void
+consider_port_binding(enum mf_field_id mff_ovn_geneve,
+  const struct simap *ct_zones,
+  struct hmap *local_datapaths,
+  struct hmap *patched_datapaths,
+  const struct sbrec_port_binding *binding,
+  struct ofpbuf *ofpacts_p, bool is_new)
+{
+/* Skip the port binding if the port is on a datapath that is neither
+ * local nor with any logical patch port connected, because local ports
+ * would never need to talk to those ports.
+ *
+ * Even with this approach there could still be unnecessary port
+ * bindings processed. A better approach would be a kind of "flood
+ * fill" algorithm:
+ *
+ *   1. Initialize set S to the logical datapaths that have a port
+ *  located on the hypervisor.
+ *
+ *   2. For each patch port P in a logical datapath in S, add the
+ *  logical datapath of the remote end of P to S.  Iterate
+ *  until S reaches a fixed point.
+ *
+ * This can be implemented in northd, which can generate the sets and
+ * save it on each port-binding record in SB, and ovn-controller can
+ * use the information directly. However, there can be update storms
+ * when a pair of patch ports are added/removed to connect/disconnect
+ * large lrouters and lswitches. This need to be studied further.
+ */
+uint32_t dp_key = binding->datapath->tunnel_key;
+uint32_t port_key = binding->tunnel_key;
+if (!get_local_datapath(local_datapaths, dp_key)
+&& !get_patched_datapath(patched_datapaths, dp_key)) {
+return;
+}
+
+/* Find the OpenFlow port for the logical port, as 'ofport'.  This is
+ * one of:
+ *
+ * - If the port is a VIF on the chassis we're managing, the
+ *   OpenFlow port for the VIF.  'tun' will be NULL.
+ *
+ *   The same logic handles logical patch ports, as well as
+ *   localnet patch ports.
+ *
+ *   For a container nested inside a VM and accessible via a VLAN,
+ *   'tag' is the VLAN ID; otherwise 'tag' is 0.
+ *
+ *   For a localnet patch port, if a VLAN ID was configured, 'tag'
+ *   is set to that VLAN ID; otherwise 'tag' is 0.
+ *
+ * - If the port is on a remote chassis, the OpenFlow port for a
+ *   tunnel to the VIF's remote chassis.  'tun' identifies that
+ *   tunnel.
+ */
+
+int tag = 0;
+ofp_port_t ofport;
+bool is_remote = false;
+if (binding->parent_port && *binding->parent_port) {
+if (!binding->tag) {
+return;
+}
+ofport = u16_to_ofp(simap_get(_to_ofport,
+  binding->parent_port));
+if (ofport) {
+tag = *binding->tag;
+}
+} else {
+ofport = u16_to_ofp(simap_get(_to_ofport,
+  binding->logical_port));
+if (!strcmp(binding->type, "localnet") && ofport && binding->tag) {
+tag = *binding->tag;
+}
+}
+
+const struct chassis_tunnel *tun = NULL;
+const struct sbrec_port_binding *localnet_port =
+get_localnet_port(local_datapaths, dp_key);
+if 

[ovs-dev] [PATCH v18 5/9] Refactor lflow.c

2016-06-08 Thread Ryan Moats
From: "RYAN D. MOATS" 

Refactor code block inside of SBREC_LOGICAL_FLOW_FOR_EACH
loop in add_logical_flow so that this can be reused when
incremental processing is added.

Signed-off-by: RYAN D. MOATS 
---
 ovn/controller/lflow.c | 315 ++---
 1 file changed, 165 insertions(+), 150 deletions(-)

diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
index d3d95cd..259ac1f 100644
--- a/ovn/controller/lflow.c
+++ b/ovn/controller/lflow.c
@@ -193,169 +193,184 @@ is_switch(const struct sbrec_datapath_binding *ldp)
 
 }
 
-/* Adds the logical flows from the Logical_Flow table to flow tables. */
 static void
-add_logical_flows(struct controller_ctx *ctx, const struct lport_index *lports,
-  const struct mcgroup_index *mcgroups,
-  const struct hmap *local_datapaths,
-  const struct hmap *patched_datapaths,
-  const struct simap *ct_zones)
+consider_logical_flow(const struct lport_index *lports,
+  const struct mcgroup_index *mcgroups,
+  const struct sbrec_logical_flow *lflow,
+  const struct hmap *local_datapaths,
+  const struct hmap *patched_datapaths,
+  const struct simap *ct_zones,
+  uint32_t *conj_id_ofs_p,
+  bool is_new)
 {
-uint32_t conj_id_ofs = 1;
+/* Determine translation of logical table IDs to physical table IDs. */
+bool ingress = !strcmp(lflow->pipeline, "ingress");
 
-const struct sbrec_logical_flow *lflow;
-SBREC_LOGICAL_FLOW_FOR_EACH (lflow, ctx->ovnsb_idl) {
-/* Determine translation of logical table IDs to physical table IDs. */
-bool ingress = !strcmp(lflow->pipeline, "ingress");
-
-const struct sbrec_datapath_binding *ldp = lflow->logical_datapath;
-if (!ldp) {
-continue;
-}
-if (is_switch(ldp)) {
-/* For a logical switch datapath, local_datapaths tells us if there
- * are any local ports for this datapath.  If not, we can skip
- * processing logical flows if that logical switch datapath is not
- * patched to any logical router.
- *
- * Otherwise, we still need both ingress and egress pipeline
- * because even if there are no local ports, we still may need to
- * execute the ingress pipeline after a packet leaves a logical
- * router and we need to do egress pipeline for a switch that
- * is connected to only routers.  Further optimization is possible,
- * but not based on what we know with local_datapaths right now.
- *
- * A better approach would be a kind of "flood fill" algorithm:
- *
- *   1. Initialize set S to the logical datapaths that have a port
- *  located on the hypervisor.
- *
- *   2. For each patch port P in a logical datapath in S, add the
- *  logical datapath of the remote end of P to S.  Iterate
- *  until S reaches a fixed point.
- *
- * This can be implemented in northd, which can generate the sets 
and
- * save it on each port-binding record in SB, and ovn-controller 
can
- * use the information directly. However, there can be update 
storms
- * when a pair of patch ports are added/removed to 
connect/disconnect
- * large lrouters and lswitches. This need to be studied further.
- */
-
-if (!get_local_datapath(local_datapaths, ldp->tunnel_key)) {
-if (!get_patched_datapath(patched_datapaths,
-  ldp->tunnel_key)) {
-continue;
-}
+const struct sbrec_datapath_binding *ldp = lflow->logical_datapath;
+if (!ldp) {
+return;
+}
+if (is_switch(ldp)) {
+/* For a logical switch datapath, local_datapaths tells us if there
+ * are any local ports for this datapath.  If not, we can skip
+ * processing logical flows if that logical switch datapath is not
+ * patched to any logical router.
+ *
+ * Otherwise, we still need both ingress and egress pipeline
+ * because even if there are no local ports, we still may need to
+ * execute the ingress pipeline after a packet leaves a logical
+ * router and we need to do egress pipeline for a switch that
+ * is connected to only routers.  Further optimization is possible,
+ * but not based on what we know with local_datapaths right now.
+ *
+ * A better approach would be a kind of "flood fill" algorithm:
+ *
+ *   1. Initialize set S to the logical datapaths that have a port
+ *  located on 

[ovs-dev] [PATCH v18 4/9] Persist ovn flow tables.

2016-06-08 Thread Ryan Moats
From: "RYAN D. MOATS" 

Ensure that ovn flow tables are persisted so that changes to
them chan be applied incrementally - this is a prereq for
making lflow_run and physical_run incremental.

Signed-off-by: Ryan Moats 
---
 ovn/controller/lflow.c  |  26 ++--
 ovn/controller/lflow.h  |   3 +-
 ovn/controller/ofctrl.c | 265 
 ovn/controller/ofctrl.h |  18 ++-
 ovn/controller/ovn-controller.c |   9 +-
 ovn/controller/physical.c   |  59 +
 ovn/controller/physical.h   |   2 +-
 7 files changed, 258 insertions(+), 124 deletions(-)

diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
index efc427d..d3d95cd 100644
--- a/ovn/controller/lflow.c
+++ b/ovn/controller/lflow.c
@@ -193,13 +193,13 @@ is_switch(const struct sbrec_datapath_binding *ldp)
 
 }
 
-/* Adds the logical flows from the Logical_Flow table to 'flow_table'. */
+/* Adds the logical flows from the Logical_Flow table to flow tables. */
 static void
 add_logical_flows(struct controller_ctx *ctx, const struct lport_index *lports,
   const struct mcgroup_index *mcgroups,
   const struct hmap *local_datapaths,
   const struct hmap *patched_datapaths,
-  const struct simap *ct_zones, struct hmap *flow_table)
+  const struct simap *ct_zones)
 {
 uint32_t conj_id_ofs = 1;
 
@@ -330,8 +330,8 @@ add_logical_flows(struct controller_ctx *ctx, const struct 
lport_index *lports,
 m->match.flow.conj_id += conj_id_ofs;
 }
 if (!m->n) {
-ofctrl_add_flow(flow_table, ptable, lflow->priority,
->match, );
+ofctrl_add_flow(ptable, lflow->priority, >match, ,
+>header_.uuid, true);
 } else {
 uint64_t conj_stubs[64 / 8];
 struct ofpbuf conj;
@@ -346,8 +346,8 @@ add_logical_flows(struct controller_ctx *ctx, const struct 
lport_index *lports,
 dst->clause = src->clause;
 dst->n_clauses = src->n_clauses;
 }
-ofctrl_add_flow(flow_table, ptable, lflow->priority,
->match, );
+ofctrl_add_flow(ptable, lflow->priority, >match, ,
+>header_.uuid, true);
 ofpbuf_uninit();
 }
 }
@@ -372,12 +372,12 @@ put_load(const uint8_t *data, size_t len,
 bitwise_one(>mask, sf->field->n_bytes, ofs, n_bits);
 }
 
-/* Adds an OpenFlow flow to 'flow_table' for each MAC binding in the OVN
+/* Adds an OpenFlow flow to flow tables for each MAC binding in the OVN
  * southbound database, using 'lports' to resolve logical port names to
  * numbers. */
 static void
 add_neighbor_flows(struct controller_ctx *ctx,
-   const struct lport_index *lports, struct hmap *flow_table)
+   const struct lport_index *lports)
 {
 struct ofpbuf ofpacts;
 struct match match;
@@ -413,8 +413,8 @@ add_neighbor_flows(struct controller_ctx *ctx,
 ofpbuf_clear();
 put_load(mac.ea, sizeof mac.ea, MFF_ETH_DST, 0, 48, );
 
-ofctrl_add_flow(flow_table, OFTABLE_MAC_BINDING, 100,
-, );
+ofctrl_add_flow(OFTABLE_MAC_BINDING, 100, , ,
+>header_.uuid, true);
 }
 ofpbuf_uninit();
 }
@@ -426,11 +426,11 @@ lflow_run(struct controller_ctx *ctx, const struct 
lport_index *lports,
   const struct mcgroup_index *mcgroups,
   const struct hmap *local_datapaths,
   const struct hmap *patched_datapaths,
-  const struct simap *ct_zones, struct hmap *flow_table)
+  const struct simap *ct_zones)
 {
 add_logical_flows(ctx, lports, mcgroups, local_datapaths,
-  patched_datapaths, ct_zones, flow_table);
-add_neighbor_flows(ctx, lports, flow_table);
+  patched_datapaths, ct_zones);
+add_neighbor_flows(ctx, lports);
 }
 
 void
diff --git a/ovn/controller/lflow.h b/ovn/controller/lflow.h
index a3fc50c..8f8f81a 100644
--- a/ovn/controller/lflow.h
+++ b/ovn/controller/lflow.h
@@ -63,8 +63,7 @@ void lflow_run(struct controller_ctx *, const struct 
lport_index *,
const struct mcgroup_index *,
const struct hmap *local_datapaths,
const struct hmap *patched_datapaths,
-   const struct simap *ct_zones,
-   struct hmap *flow_table);
+   const struct simap *ct_zones);
 void lflow_destroy(void);
 
 #endif /* ovn/lflow.h */
diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
index f537bc0..060c6a2 100644
--- a/ovn/controller/ofctrl.c
+++ b/ovn/controller/ofctrl.c
@@ -16,7 +16,9 @@
 #include 
 #include "byte-order.h"
 #include "dirs.h"
+#include "flow.h"
 #include "hash.h"
+#include 

[ovs-dev] [PATCH v18 3/9] Persist lport_index and mcgroup_index structures

2016-06-08 Thread Ryan Moats
From: "RYAN D. MOATS" 

This is preparatory to making physical_run and lflow_run process
incrementally as changes to the data in these structures control
that processing.

Signed-off-by: RYAN D. MOATS 
---
 ovn/controller/lport.c  | 220 +---
 ovn/controller/lport.h  |  22 +++-
 ovn/controller/ovn-controller.c |  16 +--
 3 files changed, 212 insertions(+), 46 deletions(-)

diff --git a/ovn/controller/lport.c b/ovn/controller/lport.c
index a7ae320..461cabd 100644
--- a/ovn/controller/lport.c
+++ b/ovn/controller/lport.c
@@ -17,6 +17,7 @@
 
 #include "lport.h"
 #include "hash.h"
+#include "lflow.h"
 #include "openvswitch/vlog.h"
 #include "ovn/lib/ovn-sb-idl.h"
 
@@ -24,48 +25,108 @@ VLOG_DEFINE_THIS_MODULE(lport);
 
 /* A logical port. */
 struct lport {
-struct hmap_node name_node; /* Index by name. */
-struct hmap_node key_node;  /* Index by (dp_key, port_key). */
+struct hmap_node name_node;  /* Index by name. */
+struct hmap_node key_node;   /* Index by (dp_key, port_key). */
+struct hmap_node uuid_node;  /* Index by row uuid. */
+const struct uuid *uuid;
 const struct sbrec_port_binding *pb;
 };
 
+static bool full_lport_rebuild = false;
+static bool full_mc_rebuild = false;
+
+void
+lport_index_reset(void)
+{
+full_lport_rebuild = true;
+}
+
 void
-lport_index_init(struct lport_index *lports, struct ovsdb_idl *ovnsb_idl)
+lport_index_init(struct lport_index *lports)
 {
 hmap_init(>by_name);
 hmap_init(>by_key);
+hmap_init(>by_uuid);
+}
 
-const struct sbrec_port_binding *pb;
-SBREC_PORT_BINDING_FOR_EACH (pb, ovnsb_idl) {
-if (lport_lookup_by_name(lports, pb->logical_port)) {
-static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-VLOG_WARN_RL(, "duplicate logical port name '%s'",
- pb->logical_port);
-continue;
-}
-
-struct lport *p = xmalloc(sizeof *p);
-hmap_insert(>by_name, >name_node,
-hash_string(pb->logical_port, 0));
-hmap_insert(>by_key, >key_node,
-hash_int(pb->tunnel_key, pb->datapath->tunnel_key));
-p->pb = pb;
+void
+lport_index_remove(struct lport_index *lports, const struct uuid *uuid)
+{
+const struct lport *port = lport_lookup_by_uuid(lports, uuid);
+if (port) {
+hmap_remove(>by_name, (struct hmap_node *) >name_node);
+hmap_remove(>by_key, (struct hmap_node *) >key_node);
+hmap_remove(>by_uuid, (struct hmap_node *) >uuid_node);
+free((void *) port);
 }
 }
 
 void
-lport_index_destroy(struct lport_index *lports)
+lport_index_clear(struct lport_index *lports)
 {
 /* Destroy all of the "struct lport"s.
  *
- * We don't have to remove the node from both indexes. */
-struct lport *port;
-HMAP_FOR_EACH_POP (port, name_node, >by_name) {
+ * We have to remove the node from all indexes. */
+struct lport *port, *next;
+HMAP_FOR_EACH_SAFE (port, next, name_node, >by_name) {
+hmap_remove(>by_name, >name_node);
+hmap_remove(>by_key, >key_node);
+hmap_remove(>by_uuid, >uuid_node);
 free(port);
 }
+}
+
+static void
+consider_lport_index(struct lport_index *lports,
+ const struct sbrec_port_binding *pb)
+{
+if (lport_lookup_by_name(lports, pb->logical_port)) {
+return;
+}
+
+struct lport *p = xmalloc(sizeof *p);
+hmap_insert(>by_name, >name_node,
+hash_string(pb->logical_port, 0));
+hmap_insert(>by_key, >key_node,
+hash_int(pb->tunnel_key, pb->datapath->tunnel_key));
+hmap_insert(>by_uuid, >uuid_node,
+uuid_hash(>header_.uuid));
+p->uuid = >header_.uuid;
+p->pb = pb;
+}
+
+void
+lport_index_fill(struct lport_index *lports, struct ovsdb_idl *ovnsb_idl)
+{
+const struct sbrec_port_binding *pb;
+if (full_lport_rebuild) {
+lport_index_clear(lports);
+SBREC_PORT_BINDING_FOR_EACH (pb, ovnsb_idl) {
+consider_lport_index(lports, pb);
+}
+full_lport_rebuild = false;
+} else {
+SBREC_PORT_BINDING_FOR_EACH_TRACKED (pb, ovnsb_idl) {
+bool is_delete = sbrec_port_binding_row_get_seqno(pb,
+OVSDB_IDL_CHANGE_DELETE) > 0;
+
+if (is_delete) {
+lport_index_remove(lports, >header_.uuid);
+continue;
+}
+consider_lport_index(lports, pb);
+}
+}
+}
+
+void
+lport_index_destroy(struct lport_index *lports)
+{
+lport_index_clear(lports);
 
 hmap_destroy(>by_name);
 hmap_destroy(>by_key);
+hmap_destroy(>by_uuid);
 }
 
 /* Finds and returns the lport with the given 'name', or NULL if no such lport
@@ -83,6 +144,20 @@ lport_lookup_by_name(const struct lport_index *lports, 
const char *name)
 return NULL;
 }
 
+const struct lport *

[ovs-dev] You're Only Going to See This ONCE

2016-06-08 Thread dev
This email is only going out to a select few people.

Actually, you're one out of only a handful of people who will get to see this 
system.
People like Warren Buffet have been using similar trading systems for years.
People such as Warren make money while they sleep...

And so can YOU. 
If you aren't already earning 1K or more each day, then you HAVE to 
see this
You're only going to be given ONE opportunity.___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] bridge: allow OVS to connect to Unix Domain Sockets outside its run directory

2016-06-08 Thread Ansis Atteka
On 8 June 2016 at 16:45, Ansis Atteka  wrote:

>
>
> On 8 June 2016 at 14:02, Ben Pfaff  wrote:
>
>> On Thu, Jun 02, 2016 at 07:47:33PM -0700, Ansis Atteka wrote:
>> > Before this patch OVS refused to connect to a local controller that
>> > had its Unix Domain Socket outside Open vSwitch run directory (e.g.
>> > outside '/var/run/openvswitch/').
>> >
>> > After this patch this restriction imposed by Open vSwitch itself is
>> > abandoned and OVS should be able to connect to controller's Unix Domain
>> > Sockets anywhere under filesystem.
>>
>> When I run "netstat -lnx" on my laptop, I see a bunch of listening Unix
>> domain sockets.
>>
>
>> Some of these listening sockets are security sensitive, such as SSH
>> agents, so it wouldn't be good to have a remote manager be able to point
>> OVS to them: what if a clever person could figure out how to send
>> arbitrary data to them (maybe in a packet-in message somehow?) via
>> OpenFlow.  Other examples are dbus and udev sockets.
>>
>> That's my main worry here.
>>
>
> Ok, I see your concern now. At least with SELinux enabled such attacks
> would fail on default Fedora, RHEL and CentOS:
>
> [root@localhost ovs]# tail /var/log/openvswitch/ovs-vswitchd.log
> 2016-06-08T22:51:48.215Z|00033|connmgr|INFO|brz: added service controller
> "punix://var/run/openvswitch/brz.mgmt"
> 2016-06-08T22:51:48.215Z|00034|bridge|INFO|bridge brs: using datapath ID
> e6d0bb7b3047
> 2016-06-08T22:51:48.215Z|00035|connmgr|INFO|brs: added service controller
> "punix://var/run/openvswitch/brs.mgmt"
> 2016-06-08T22:51:48.220Z|00036|bridge|INFO|ovs-vswitchd (Open vSwitch)
> 2.5.90
> 2016-06-08T22:51:48.665Z|00037|rconn|INFO|brX<->unix:/run/udev/control:
> connecting...
> 2016-06-08T22:51:48.665Z|00038|rconn|WARN|brX<->unix:/run/udev/control:
> connection failed (Permission denied)
> 2016-06-08T22:51:48.665Z|00039|rconn|INFO|brX<->unix:/run/udev/control:
> waiting 2 seconds before reconnect
> 2016-06-08T22:51:50.666Z|00040|rconn|INFO|brX<->unix:/run/udev/control:
> connecting...
> 2016-06-08T22:51:50.666Z|00041|rconn|WARN|brX<->unix:/run/udev/control:
> connection failed (Permission denied)
>
> because Open vSwitch runs under openvswitch_t domain:
>
> system_u:system_r:openvswitch_t:s0 root  16567 16566  0 66914 36460   0
> 15:51 ?00:00:00 ovs-vswitchd unix:/var/run/openvswitch/db.sock
> -vconsole:emer -vsyslog:err -vfile:info --mlockall --no-chdir
> --log-file=/var/log/openvswitch/ovs-vswitchd.log
> --pidfile=/var/run/openvswitch/ovs-vswitchd.pid --detach --monitor
>
> and OVS is unable to connect sockets other than those that have type
> openvswitch*_t type:
>
> srwxr-x---. 1 root root system_u:object_r:openvswitch_var_run_t:s0 0 Jun
>  8 15:51 /var/run/openvswitch/brT.mgmt
>
> For example, udev control socket and others use udev_var_run_t type:
>
> srw---. 1 root root system_u:object_r:udev_var_run_t:s0 0 Jun  2 15:43
> /run/udev/control
>
>
> So the problem my patch is trying to solve is that there could be over
> confined processes trying
>
Just noticed that my sentence was abruptly cut:

"So the problem my patch is trying to solve is that there could be
over-confined processes trying to connect to OVS but they can't do that
because /var/run/openvswitch and its contents are owned by root:root."


>
> Here are the options I see:
> 1. abandon my patch and tell everyone to figure out on their own on how to
> connect to Open vSwitch sockets under /var/run/openvswitch/ directory (e.g.
> by changing group of /var/run/openvswitch or by opening socket while their
> processes are running under root user and only after that they change the
> user).
> 2. abandon my patch, but implement something similar to Aaron's DPDK
> socket chown()/chmod() patch and tell others to use it.
> 3. abandon my patch, but implement some kind of access control inside
> OVSDB. For example, the roles could be "local admin" and "remote
> controller". "remote controller" should have limited access to certain
> database files. This is non-
>
s/database files/database tables or columns/

> trivial, but may be something to consider long-term because OVSDB
> currently contains a lot of things to which access is granted on
> all-or-nothing basis.
> 4. move forward with my patch, but allow to dynamically specify whitelist.
> Obviously this whitelist must not be configurable through OVSDB because
> then it loses its purpose.
> 5. move forward with my patch, but use Mandatory Access Control to specify
> whitelist of paths to which OVS can connect to (this makes assumption that
> MAC - SELinux or AppArmor - is up and running).
>
> Unfortunately there is no silver bullet for this problem.
>
> Actually, there may be a 6th solution:
6. move forward with my patch and hope that OpenFlow handshake would make
the other side to close the socket early. This is still not that good as
proper access control.
___
dev mailing list
dev@openvswitch.org

Re: [ovs-dev] [PATCH v10 2/2] ovn-northd: Add logical flows to support native DHCP

2016-06-08 Thread Numan Siddique
Sorry for the typo Ramu
On Jun 9, 2016 5:40 AM, "Numan Siddique"  wrote:

> Thanks Dani for testing the patches.
> Regards
> Numan
> On Jun 9, 2016 12:25 AM, "Ramu Ramamurthy" 
> wrote:
>
>> I tested and verified this patchset using the corresponding WIP
>> openstack dhcp patch.
>> native-dhcp works end-to-end as advertized - I tested with the default
>> dhcp-options
>> that are used when VMs boot.
>>
>> On Mon, Jun 6, 2016 at 10:49 PM, Numan Siddique 
>> wrote:
>> > OVN implements a native DHCP support which caters to the common
>> > use case of providing an IP address to a booting instance by
>> > providing stateless replies to DHCP requests based on statically
>> > configured address mappings. To do this it allows a short list of
>> > DHCP options to be configured and applied at each compute host
>> > running ovn-controller.
>> >
>> > A new table 'Subnet' is added in OVN NB DB to store the DHCP options.
>> >
>> > For each logical port following flows are added if the CMS has defined
>> > DHCP options in the 'Subnet' column
>> >
>> >  - A logical flow which copies the DHCP options to the DHCP
>> >request packets using the 'put_dhcp_opts' action and advances the
>> >packet to the next stage.
>> >
>> >  - A logical flow which implements the DHCP reponder by sending
>> >the DHCP reply back to the inport once the 'put_dhcp_opts' action
>> >is applied.
>> >
>> > Signed-Off-by: Numan Siddique 
>>
>
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v10 2/2] ovn-northd: Add logical flows to support native DHCP

2016-06-08 Thread Numan Siddique
Thanks Dani for testing the patches.
Regards
Numan
On Jun 9, 2016 12:25 AM, "Ramu Ramamurthy" 
wrote:

> I tested and verified this patchset using the corresponding WIP
> openstack dhcp patch.
> native-dhcp works end-to-end as advertized - I tested with the default
> dhcp-options
> that are used when VMs boot.
>
> On Mon, Jun 6, 2016 at 10:49 PM, Numan Siddique 
> wrote:
> > OVN implements a native DHCP support which caters to the common
> > use case of providing an IP address to a booting instance by
> > providing stateless replies to DHCP requests based on statically
> > configured address mappings. To do this it allows a short list of
> > DHCP options to be configured and applied at each compute host
> > running ovn-controller.
> >
> > A new table 'Subnet' is added in OVN NB DB to store the DHCP options.
> >
> > For each logical port following flows are added if the CMS has defined
> > DHCP options in the 'Subnet' column
> >
> >  - A logical flow which copies the DHCP options to the DHCP
> >request packets using the 'put_dhcp_opts' action and advances the
> >packet to the next stage.
> >
> >  - A logical flow which implements the DHCP reponder by sending
> >the DHCP reply back to the inport once the 'put_dhcp_opts' action
> >is applied.
> >
> > Signed-Off-by: Numan Siddique 
>
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] bridge: allow OVS to connect to Unix Domain Sockets outside its run directory

2016-06-08 Thread Ansis Atteka
On 8 June 2016 at 14:02, Ben Pfaff  wrote:

> On Thu, Jun 02, 2016 at 07:47:33PM -0700, Ansis Atteka wrote:
> > Before this patch OVS refused to connect to a local controller that
> > had its Unix Domain Socket outside Open vSwitch run directory (e.g.
> > outside '/var/run/openvswitch/').
> >
> > After this patch this restriction imposed by Open vSwitch itself is
> > abandoned and OVS should be able to connect to controller's Unix Domain
> > Sockets anywhere under filesystem.
>
> When I run "netstat -lnx" on my laptop, I see a bunch of listening Unix
> domain sockets.
>

> Some of these listening sockets are security sensitive, such as SSH
> agents, so it wouldn't be good to have a remote manager be able to point
> OVS to them: what if a clever person could figure out how to send
> arbitrary data to them (maybe in a packet-in message somehow?) via
> OpenFlow.  Other examples are dbus and udev sockets.
>
> That's my main worry here.
>

Ok, I see your concern now. At least with SELinux enabled such attacks
would fail on default Fedora, RHEL and CentOS:

[root@localhost ovs]# tail /var/log/openvswitch/ovs-vswitchd.log
2016-06-08T22:51:48.215Z|00033|connmgr|INFO|brz: added service controller
"punix://var/run/openvswitch/brz.mgmt"
2016-06-08T22:51:48.215Z|00034|bridge|INFO|bridge brs: using datapath ID
e6d0bb7b3047
2016-06-08T22:51:48.215Z|00035|connmgr|INFO|brs: added service controller
"punix://var/run/openvswitch/brs.mgmt"
2016-06-08T22:51:48.220Z|00036|bridge|INFO|ovs-vswitchd (Open vSwitch)
2.5.90
2016-06-08T22:51:48.665Z|00037|rconn|INFO|brX<->unix:/run/udev/control:
connecting...
2016-06-08T22:51:48.665Z|00038|rconn|WARN|brX<->unix:/run/udev/control:
connection failed (Permission denied)
2016-06-08T22:51:48.665Z|00039|rconn|INFO|brX<->unix:/run/udev/control:
waiting 2 seconds before reconnect
2016-06-08T22:51:50.666Z|00040|rconn|INFO|brX<->unix:/run/udev/control:
connecting...
2016-06-08T22:51:50.666Z|00041|rconn|WARN|brX<->unix:/run/udev/control:
connection failed (Permission denied)

because Open vSwitch runs under openvswitch_t domain:

system_u:system_r:openvswitch_t:s0 root  16567 16566  0 66914 36460   0
15:51 ?00:00:00 ovs-vswitchd unix:/var/run/openvswitch/db.sock
-vconsole:emer -vsyslog:err -vfile:info --mlockall --no-chdir
--log-file=/var/log/openvswitch/ovs-vswitchd.log
--pidfile=/var/run/openvswitch/ovs-vswitchd.pid --detach --monitor

and OVS is unable to connect sockets other than those that have type
openvswitch*_t type:

srwxr-x---. 1 root root system_u:object_r:openvswitch_var_run_t:s0 0 Jun  8
15:51 /var/run/openvswitch/brT.mgmt

For example, udev control socket and others use udev_var_run_t type:

srw---. 1 root root system_u:object_r:udev_var_run_t:s0 0 Jun  2 15:43
/run/udev/control


So the problem my patch is trying to solve is that there could be over
confined processes trying

Here are the options I see:
1. abandon my patch and tell everyone to figure out on their own on how to
connect to Open vSwitch sockets under /var/run/openvswitch/ directory (e.g.
by changing group of /var/run/openvswitch or by opening socket while their
processes are running under root user and only after that they change the
user).
2. abandon my patch, but implement something similar to Aaron's DPDK socket
chown()/chmod() patch and tell others to use it.
3. abandon my patch, but implement some kind of access control inside
OVSDB. For example, the roles could be "local admin" and "remote
controller". "remote controller" should have limited access to certain
database files. This is non-trivial, but may be something to consider
long-term because OVSDB currently contains a lot of things to which access
is granted on all-or-nothing basis.
4. move forward with my patch, but allow to dynamically specify whitelist.
Obviously this whitelist must not be configurable through OVSDB because
then it loses its purpose.
5. move forward with my patch, but use Mandatory Access Control to specify
whitelist of paths to which OVS can connect to (this makes assumption that
MAC - SELinux or AppArmor - is up and running).

Unfortunately there is no silver bullet for this problem.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] Bug#826780: Please package OVS >= 2.6.0.dev1

2016-06-08 Thread Ben Pfaff
On Thu, Jun 09, 2016 at 01:23:12AM +0200, Thomas Goirand wrote:
> Source: openvswitch
> Version: 2.3.0+git20140819-4
> Severity: normal
> 
> Hi,
> 
> OpenStack Neutron currently has:
> 
> ovs>=2.5.0;python_version=='2.7' # Apache-2.0
> ovs>=2.6.0.dev1;python_version>='3.4' # Apache-2.0
> 
> in its requirements.txt. Please package this at least to Debian Experimental,
> so that I can package Neutron Newton Beta 1.

I can package 2.5.x, though there are reports that there's one testsuite
failure on big-endian architectures.

OVS 2.6.0 doesn't exist yet, so it can't be packaged.  When Neutron asks
for 2.6.0 what does it mean?
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] Bug#826780: Please package OVS >= 2.6.0.dev1

2016-06-08 Thread Thomas Goirand
Source: openvswitch
Version: 2.3.0+git20140819-4
Severity: normal

Hi,

OpenStack Neutron currently has:

ovs>=2.5.0;python_version=='2.7' # Apache-2.0
ovs>=2.6.0.dev1;python_version>='3.4' # Apache-2.0

in its requirements.txt. Please package this at least to Debian Experimental,
so that I can package Neutron Newton Beta 1.

Cheers,

Thomas Goirand (zigo)
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v2 2/3] fedora: Add pcap, tcpdump and tcpundump utilities to test

2016-06-08 Thread Flavio Leitner
On Wed, Jun 08, 2016 at 05:49:56PM -0400, Aaron Conole wrote:
> The openvswitch-test package is setup for enabling / performing tests
> for openvswitch setups.  Adding these utilities would enable a richer
> set of debugging utilities for performing diagnostics.
> 
> Signed-off-by: Aaron Conole 
> ---

Acked-by: Flavio Leitner 


___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v2 1/3] ovs-tcpdump: Add a tcpdump wrapper utility

2016-06-08 Thread Flavio Leitner
On Wed, Jun 08, 2016 at 05:49:55PM -0400, Aaron Conole wrote:
> Currently, there is some documentation which describes setting up and
> using port mirrors for bridges. This documentation is helpful to setup
> a packet capture for specific ports.
> 
> However, a utility to do such packet capture would be valuable, both
> as an exercise in documenting the steps an additional time, and as a way
> of providing an out-of-the-box experience for running a capture.
> 
> This commit adds a tcpdump-wrapper utility for such purpose. It uses the
> Open vSwitch python library to add/remove ports and mirrors to/from the
> Open vSwitch database. It will create a tcpdump instance listening on
> the mirror port (allowing the user to specify additional arguments), and
> dump data to the screen (or otherwise).
> 
> Signed-off-by: Aaron Conole 
> ---

Acked-by: Flavio Leitner 


___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] VxLAN-gpe implementation

2016-06-08 Thread Hannes Frederic Sowa
On 08.06.2016 23:21, Alexander Duyck wrote:
> On Wed, Jun 8, 2016 at 12:46 PM, Hannes Frederic Sowa  
> wrote:
>> On 08.06.2016 17:38, Alexander Duyck wrote:
>>> On Wed, Jun 8, 2016 at 7:48 AM, Hannes Frederic Sowa  
>>> wrote:
 On 08.06.2016 14:51, Jiri Benc wrote:
> On Mon, 6 Jun 2016 14:22:58 -0700, Jesse Gross wrote:
>> On Sat, Jun 4, 2016 at 6:39 AM, Yi Yang  wrote:
>> [...]
>>>  datapath/vport-netdev.c   |   3 +-
>>>  datapath/vport-vxlan.c|  17 ++-
>>
>> These changes aren't upstream yet. Please do that before backporting 
>> them here.
>>
>> However, the changes to vport-vxlan.c are modifying compatibility code
>> that shouldn't be extended further. Instead, just use the existing
>> VXLAN netlink interfaces that have already been created to enable
>> these features.
>>
>> There is also a number of other patches to the OVS kernel module/VXLAN
>> that have not been backported. Pravin started doing this work but it
>> hasn't been applied yet. In general, I think it makes sense to
>> backport patches in order so that the diffs of the patches match those
>> of upstream.
>>
>> Finally, I have a question about receive side offloading with
>> VXLAN-gpe. This is primarily an upstream issue but is present in the
>> code being backported here as well. The VXLAN code sets up receive
>> offloads for all ports regardless of whether they are classic VXLAN or
>> L2/L3 GPE and expects NICs to parse the packets. I don't think this is
>> safe because there are a number of NICs out there that predate the
>> existence of GPE and therefore won't do this parsing correctly. I
>> think that it is necessary to disable receive offloading for
>> non-Ethernet VXLAN-GPE unless the offloading interface is extended.
>
> Coincidentally, I was talking about this with Hannes a few days ago.
> I'm adding him to CC.
>
> I guess you're referring to ndo_add_vxlan_port, right? I agree that
> this interface needs changes, especially considering that we know
> whether the UDP port belongs to VXLAN or VXLAN-GPE. But from my
> understanding of how drivers use this callback, the worst thing that
> could happen is suboptimal generation of rx hashes and thus steering
> the packets to a different receive queue than in the optimal case.
> Surely something to fix but it seems it won't cause much functional
> troubles with the current code?

 I am not sure if we must upgrade the interface. Can't drivers always
 configure vxlan-gpe and are always backwards compatible?

 Non vxlan-gpe capable hardware would have to abort checksum offloading
 as soon as they can't interpret the vxlan header anyway, so the packets
 end up on the slow path and nothing bad should happen.

 Possibly some hardware will verify inner checksums despite it could not
 understand the vxlan header completely. In this case we probably will
 drop the packet in the driver. Anyway, I would be in favor to simply
 present one knob, namely vxlan-offloading, to the user, instead a knob
 for each version of vxlan.

 Unfortunately I couldn't get a definitive answer from the specs
 regarding the checksuming details.

 Bye,
 Hannes
>>>
>>> This is starting to sound like the same conversation we had on netdev
>>> when the ndo_add_geneve_port was added.  One easy fix for guaranteeing
>>> that we can perform the checksum offload is to just enable the outer
>>> UDP checksum.  Then we can still perform GRO and use the outer UDP
>>> source port for generating a hash.  If possible we should make this
>>> the default for all new UDP based tunnels going forward simply because
>>> it allows for backwards-compatibility with existing offloads.
>>
>> Yes, but this time we are only in vxlan-versioning-world only. :)
> 
> Right.  It leads me back into the original thought I had which was we
> should be providing the UDP tunnel type via something like an
> enumerated type so drivers could just opt in if they see a tunnel type
> they recognize.  Having to add a function for each new tunnel type is
> just silly.  Then we could have support for VXLAN be separate from
> VXLAN-GPE without having to add a whole new set of functions.

Partially agreed. I hope we can soon provide a patch that allows us to
query the state of offloading of networking cards, a la ethtool. My idea
was to make this depending on if the ndo_op for vxlan or geneve isn't NULL.

Right now we still have the problem, that the check if the card actually
supports udp offloading for vxlan or geneve is not depending on if the
ndo op is available but can be checked in the specific handler. This
should be factored out and I don't see a reason why we shouldn't convert
to enum style.

>> Hmm, it is a 

[ovs-dev] [PATCH] ovn-northd: fix logical router icmp response for directed broadcasts

2016-06-08 Thread Flavio Fernandes
Responding to icmp queries where the L3 destination is a directed broadcast
was not being properly handled, causing the reply to be sent to all logical
ports except for the one port that should receive it.

Reference to the mailing list thread:
http://openvswitch.org/pipermail/discuss/2016-June/021619.html

This is a proposal for using choice C in the mail discussion; where handling
of icmp queries to broadcast is performed by a separate logical rule.
Unit test has been augmented to exercise this scenario.

Note that since broadcast is contained to node where ovn-controller is running,
there may be no real concern for a potential DOS attack scenario.

Signed-off-by: Flavio Fernandes 
---
 ovn/northd/ovn-northd.c | 28 +---
 tests/ovn.at| 39 +++
 2 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index cac0148..fe86d05 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -1957,9 +1957,8 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap 
*ports,
  * (i.e. the incoming locally attached net) does not matter.
  * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
 match = xasprintf(
-"(ip4.dst == "IP_FMT" || ip4.dst == "IP_FMT") && "
-"icmp4.type == 8 && icmp4.code == 0",
-IP_ARGS(op->ip), IP_ARGS(op->bcast));
+"ip4.dst == "IP_FMT" && icmp4.type == 8 && icmp4.code == 0",
+IP_ARGS(op->ip));
 char *actions = xasprintf(
 "ip4.dst = ip4.src; "
 "ip4.src = "IP_FMT"; "
@@ -1973,6 +1972,29 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap 
*ports,
 free(match);
 free(actions);
 
+   /* ICMP echo reply for L3 directed broadcast packets. Note that
+* 1) this is only applicable to a specific inport and 2) the
+ * destination mac is overriden to create a unicast response
+ * that is sent out of inport. */
+match = xasprintf(
+"inport == %s && ip4.dst == "IP_FMT" && "
+"icmp4.type == 8 && icmp4.code == 0",
+op->json_key, IP_ARGS(op->bcast));
+actions = xasprintf(
+"eth.dst = "ETH_ADDR_FMT"; "
+"ip4.dst = ip4.src; "
+"ip4.src = "IP_FMT"; "
+"ip.ttl = 255; "
+"icmp4.type = 0; "
+"inport = \"\"; /* Allow sending out inport. */ "
+"next; ",
+ETH_ADDR_ARGS(op->mac),
+   IP_ARGS(op->ip));
+ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
+  match, actions);
+free(match);
+free(actions);
+
 /* ARP reply.  These flows reply to ARP requests for the router's own
  * IP address. */
 match = xasprintf(
diff --git a/tests/ovn.at b/tests/ovn.at
index 633cf35..1ccf7c6 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -3127,13 +3127,14 @@ trim_zeros() {
 for i in 1 2; do
 : > vif$i.expected
 done
-# test_ipv4_icmp_request INPORT ETH_SRC ETH_DST IPV4_SRC IPV4_DST IP_CHKSUM 
ICMP_CHKSUM [EXP_IP_CHKSUM EXP_ICMP_CHKSUM]
+# test_ipv4_icmp_request INPORT ETH_SRC ETH_DST IPV4_SRC IPV4_DST IP_CHKSUM 
ICMP_CHKSUM \
+#[EXP_ETH_SRC EXP_IPV4_SRC EXP_IP_CHKSUM 
EXP_ICMP_CHKSUM]
 #
 # Causes a packet to be received on INPORT.  The packet is an ICMPv4
 # request with ETH_SRC, ETH_DST, IPV4_SRC, IPV4_DST, IP_CHSUM and
-# ICMP_CHKSUM as specified.  If EXP_IP_CHKSUM and EXP_ICMP_CHKSUM are
-# provided, then it should be the ip and icmp checksums of the packet
-# responded; otherwise, no reply is expected.
+# ICMP_CHKSUM as specified.  If EXP_* params are provided, then it
+# should be the source ethernet, source ip, ip and icmp checksums
+# of the packet responded; otherwise, no reply is expected.
 # In the absence of an ip checksum calculation helpers, this relies
 # on the caller to provide the checksums for the ip and icmp headers.
 # XXX This should be more systematic.
@@ -3142,12 +3143,14 @@ done
 # ETH_SRC and ETH_DST are each 12 hex digits.
 # IPV4_SRC and IPV4_DST are each 8 hex digits.
 # IP_CHSUM and ICMP_CHKSUM are each 4 hex digits.
+# EXP_ETH_SRC is 12 hex digits.
+# EXP_IPV4_SRC is 8 hex digits.
 # EXP_IP_CHSUM and EXP_ICMP_CHKSUM are each 4 hex digits.
 test_ipv4_icmp_request() {
 local inport=$1 eth_src=$2 eth_dst=$3 ipv4_src=$4 ipv4_dst=$5 ip_chksum=$6 
icmp_chksum=$7
-local exp_ip_chksum=$8 exp_icmp_chksum=$9
 shift; shift; shift; shift; shift; shift; shift
-shift; shift
+local exp_eth_src=$1 exp_ipv4_src=$2 exp_ip_chksum=$3 exp_icmp_chksum=$4
+shift; shift; shift; shift
 
 # Use ttl to exercise section 4.2.2.9 of RFC1812
 local ip_ttl=01
@@ -3159,30 +3162,42 @@ test_ipv4_icmp_request() {
 local 

[ovs-dev] [PATCH v2 1/3] ovs-tcpdump: Add a tcpdump wrapper utility

2016-06-08 Thread Aaron Conole
Currently, there is some documentation which describes setting up and
using port mirrors for bridges. This documentation is helpful to setup
a packet capture for specific ports.

However, a utility to do such packet capture would be valuable, both
as an exercise in documenting the steps an additional time, and as a way
of providing an out-of-the-box experience for running a capture.

This commit adds a tcpdump-wrapper utility for such purpose. It uses the
Open vSwitch python library to add/remove ports and mirrors to/from the
Open vSwitch database. It will create a tcpdump instance listening on
the mirror port (allowing the user to specify additional arguments), and
dump data to the screen (or otherwise).

Signed-off-by: Aaron Conole 
---
v1->v2:
* Added .gitignore entries
* Fixed a manpages distribution error

 NEWS   |   2 +
 manpages.mk|   6 +
 utilities/.gitignore   |   2 +
 utilities/automake.mk  |   9 +-
 utilities/ovs-tcpdump.8.in |  51 +
 utilities/ovs-tcpdump.in   | 453 +
 6 files changed, 522 insertions(+), 1 deletion(-)
 create mode 100644 utilities/ovs-tcpdump.8.in
 create mode 100755 utilities/ovs-tcpdump.in

diff --git a/NEWS b/NEWS
index ba201cf..042b361 100644
--- a/NEWS
+++ b/NEWS
@@ -55,6 +55,8 @@ Post-v2.5.0
  * Flow based tunnel match and action can be used for IPv6 address using
tun_ipv6_src, tun_ipv6_dst fields.
  * Added support for IPv6 tunnels to native tunneling.
+   - A wrapper script, 'ovs-tcpdump', to easily port-mirror an OVS port and
+ watch with tcpdump
 
 v2.5.0 - 26 Feb 2016
 -
diff --git a/manpages.mk b/manpages.mk
index d4d4f21..cf83e44 100644
--- a/manpages.mk
+++ b/manpages.mk
@@ -164,6 +164,12 @@ utilities/ovs-pki.8: \
utilities/ovs-pki.8.in
 utilities/ovs-pki.8.in:
 
+utilities/ovs-tcpdump.8: \
+   utilities/ovs-tcpdump.8.in \
+   lib/common.man
+utilities/ovs-tcpdump.8.in:
+lib/common.man:
+
 utilities/ovs-tcpundump.1: \
utilities/ovs-tcpundump.1.in \
lib/common-syn.man \
diff --git a/utilities/.gitignore b/utilities/.gitignore
index 10cb4af..34c58f2 100644
--- a/utilities/.gitignore
+++ b/utilities/.gitignore
@@ -26,6 +26,8 @@
 /ovs-pki.8
 /ovs-test
 /ovs-test.8
+/ovs-tcpdump
+/ovs-tcpdump.8
 /ovs-tcpundump
 /ovs-tcpundump.1
 /ovs-vlan-bug-workaround
diff --git a/utilities/automake.mk b/utilities/automake.mk
index 1cc66b6..9d5b425 100644
--- a/utilities/automake.mk
+++ b/utilities/automake.mk
@@ -12,6 +12,7 @@ bin_SCRIPTS += \
utilities/ovs-l3ping \
utilities/ovs-parse-backtrace \
utilities/ovs-pcap \
+   utilities/ovs-tcpdump \
utilities/ovs-tcpundump \
utilities/ovs-test \
utilities/ovs-vlan-test
@@ -52,6 +53,7 @@ EXTRA_DIST += \
utilities/ovs-pipegen.py \
utilities/ovs-pki.in \
utilities/ovs-save \
+   utilities/ovs-tcpdump.in \
utilities/ovs-tcpundump.in \
utilities/ovs-test.in \
utilities/ovs-vlan-test.in \
@@ -69,6 +71,7 @@ MAN_ROOTS += \
utilities/ovs-parse-backtrace.8 \
utilities/ovs-pcap.1.in \
utilities/ovs-pki.8.in \
+   utilities/ovs-tcpdump.8.in \
utilities/ovs-tcpundump.1.in \
utilities/ovs-vlan-bug-workaround.8.in \
utilities/ovs-test.8.in \
@@ -94,6 +97,8 @@ DISTCLEANFILES += \
utilities/ovs-pki.8 \
utilities/ovs-sim \
utilities/ovs-sim.1 \
+   utilities/ovs-tcpdump \
+   utilities/ovs-tcpdump.8 \
utilities/ovs-tcpundump \
utilities/ovs-tcpundump.1 \
utilities/ovs-test \
@@ -114,6 +119,7 @@ man_MANS += \
utilities/ovs-parse-backtrace.8 \
utilities/ovs-pcap.1 \
utilities/ovs-pki.8 \
+   utilities/ovs-tcpdump.8 \
utilities/ovs-tcpundump.1 \
utilities/ovs-vlan-bug-workaround.8 \
utilities/ovs-test.8 \
@@ -148,6 +154,7 @@ utilities_nlmon_LDADD = lib/libopenvswitch.la
 endif
 
 FLAKE8_PYFILES += utilities/ovs-pcap.in \
-   utilities/checkpatch.py utilities/ovs-dev.py
+   utilities/checkpatch.py utilities/ovs-dev.py \
+   utilities/ovs-tcpdump.in
 
 include utilities/bugtool/automake.mk
diff --git a/utilities/ovs-tcpdump.8.in b/utilities/ovs-tcpdump.8.in
new file mode 100644
index 000..ecd0937
--- /dev/null
+++ b/utilities/ovs-tcpdump.8.in
@@ -0,0 +1,51 @@
+.TH ovs\-tcpdump 8 "@VERSION@" "Open vSwitch" "Open vSwitch Manual"
+.
+.SH NAME
+ovs\-tcpdump \- Dump traffic from an Open vSwitch port using \fBtcpdump\fR.
+.
+.SH SYNOPSIS
+\fBovs\-tcpdump\fR \fB\-i\fR \fIport\fR \fBtcpdump options...\fR
+.
+.SH DESCRIPTION
+\fBovs\-tcpdump\fR creates switch mirror ports in the \fBovs\-vswitchd\fR
+daemon and executes \fBtcpdump\fR to listen against those ports. When the
+\fBtcpdump\fR instance exits, it then cleans up the mirror port it created.
+.PP
+\fBovs\-tcpdump\fR will not allow multiple mirrors for the 

[ovs-dev] [PATCH v2 2/3] fedora: Add pcap, tcpdump and tcpundump utilities to test

2016-06-08 Thread Aaron Conole
The openvswitch-test package is setup for enabling / performing tests
for openvswitch setups.  Adding these utilities would enable a richer
set of debugging utilities for performing diagnostics.

Signed-off-by: Aaron Conole 
---
v1->v2:
* Introduced

 rhel/openvswitch-fedora.spec.in | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in
index 0759096..6c59b08 100644
--- a/rhel/openvswitch-fedora.spec.in
+++ b/rhel/openvswitch-fedora.spec.in
@@ -223,11 +223,7 @@ install -p -m 644 -D selinux/openvswitch-custom.pp \
 
 # remove unpackaged files
 rm -f $RPM_BUILD_ROOT%{_bindir}/ovs-parse-backtrace \
-$RPM_BUILD_ROOT%{_bindir}/ovs-pcap \
-$RPM_BUILD_ROOT%{_bindir}/ovs-tcpundump \
 $RPM_BUILD_ROOT%{_sbindir}/ovs-vlan-bug-workaround \
-$RPM_BUILD_ROOT%{_mandir}/man1/ovs-pcap.1 \
-$RPM_BUILD_ROOT%{_mandir}/man1/ovs-tcpundump.1 \
 $RPM_BUILD_ROOT%{_mandir}/man8/ovs-vlan-bug-workaround.8
 
 %check
@@ -390,9 +386,15 @@ fi
 %{_bindir}/ovs-test
 %{_bindir}/ovs-vlan-test
 %{_bindir}/ovs-l3ping
+%{_bindir}/ovs-pcap
+%{_bindir}/ovs-tcpdump
+%{_bindir}/ovs-tcpundump
 %{_mandir}/man8/ovs-test.8*
 %{_mandir}/man8/ovs-vlan-test.8*
 %{_mandir}/man8/ovs-l3ping.8*
+%{_mandir}/man1/ovs-pcap.1*
+%{_mandir}/man8/ovs-tcpdump.8*
+%{_mandir}/man1/ovs-tcpundump.1*
 %{python_sitelib}/ovstest
 
 %files devel
-- 
2.5.5

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 3/3] debian: Add the tcpdump utility to the debian package

2016-06-08 Thread Aaron Conole
Add ovs-tcpdump to the debian build.

Signed-off-by: Aaron Conole 
---
* Introduced

 debian/openvswitch-switch.install  | 1 +
 debian/openvswitch-switch.manpages | 1 +
 2 files changed, 2 insertions(+)

diff --git a/debian/openvswitch-switch.install 
b/debian/openvswitch-switch.install
index 1a5459e..bfb391f 100644
--- a/debian/openvswitch-switch.install
+++ b/debian/openvswitch-switch.install
@@ -3,6 +3,7 @@ etc/bash_completion.d/ovs-vsctl-bashcomp.bash
 usr/bin/ovs-dpctl
 usr/bin/ovs-dpctl-top
 usr/bin/ovs-pcap
+usr/bin/ovs-tcpdump
 usr/bin/ovs-tcpundump
 usr/bin/ovs-vlan-test
 usr/bin/ovs-vsctl
diff --git a/debian/openvswitch-switch.manpages 
b/debian/openvswitch-switch.manpages
index 1b28c94..a3aae64 100644
--- a/debian/openvswitch-switch.manpages
+++ b/debian/openvswitch-switch.manpages
@@ -3,6 +3,7 @@ utilities/ovs-ctl.8
 utilities/ovs-dpctl-top.8
 utilities/ovs-dpctl.8
 utilities/ovs-pcap.1
+utilities/ovs-tcpdump.8
 utilities/ovs-tcpundump.1
 utilities/ovs-vlan-test.8
 utilities/ovs-vsctl.8
-- 
2.5.5

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 0/3] ovs-tcpdump: A utility for convenient port mirroring

2016-06-08 Thread Aaron Conole
Open vSwitch ships with a mirroring facility.  However, this facility can
sometimes be cumbersome to setup.  To make this easier for end users, this
patch series introduces a utility `ovs-tcpdump` which wraps the various
ovs database configurations to enable a mirror, and then invokes tcpdump
or a similar utility to monitor the mirror.

The `ovs-tcpdump` utility is also added to the fedora and debian distribution
packages.

Aaron Conole (3):
  ovs-tcpdump: Add a tcpdump wrapper utility
  fedora: Add pcap, tcpdump and tcpundump utilities to test
  debian: Add the tcpdump utility to the debian package

 NEWS   |   2 +
 debian/openvswitch-switch.install  |   1 +
 debian/openvswitch-switch.manpages |   1 +
 manpages.mk|   6 +
 rhel/openvswitch-fedora.spec.in|  10 +-
 utilities/.gitignore   |   2 +
 utilities/automake.mk  |   9 +-
 utilities/ovs-tcpdump.8.in |  51 +
 utilities/ovs-tcpdump.in   | 453 +
 9 files changed, 530 insertions(+), 5 deletions(-)
 create mode 100644 utilities/ovs-tcpdump.8.in
 create mode 100755 utilities/ovs-tcpdump.in

-- 
2.5.5

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] [PATCH v3 2/2] ovn: add lflows for 'na' action for ND

2016-06-08 Thread Ben Pfaff
On Wed, Jun 08, 2016 at 03:28:58PM +0800, Zong Kai LI wrote:
> This patch adds some lflows for 'na' action to support ND versus ARP.
> 
> For ovn-northd, it will generate lflows per each IPv6 address on
> echo lport, with lport mac and IPv6 addresss, with 'na' action.
> e.g. match=(ip6 && nd && icmp6.type == 135 &&
> nd.target == fde3:f657:aac1:0:f816:3eff:fe13:8198),
>  action=(na{fa:16:3e:13:81:98; reg0 = 0x1; outport = inport;
> inport = ""; output;};)
> And new lflows will be set in tabel ls_in_arp_nd_rsp, which is renamed
> from previous ls_in_arp_rsp.
> 
> Setting reg0 = 0x1 to mention that such kind of NA packets are replied
> by ovn-controller, and for these packets, dont do conntrack on them.
> Also modfiy current table 32 and table 48, to make these packets
> output directly.
> 
> Signed-off-by: Zong Kai LI 

I don't understand why it is necessary to have special-case code in
ovn-controller physical_run() for neighbor advertisements.  Nothing
similar is needed for ARP.  It would be much better to avoid special
cases.  Can you explain?  At any rate, ovn-controller should definitely
not have any knowledge of what purpose the logical flows use registers
for.

This adds a Linux-specific header file to physical.c, but that should
not be necessary.

None of these casts should be necessary:
> +match_set_nw_proto(, (uint8_t)IPPROTO_ICMPV6);
> +match_set_icmp_type(, (uint8_t)ND_NEIGHBOR_ADVERT);
> +match_set_reg(, 0, (uint32_t)1);

In ovn-northd, it seems like a really bad idea to use substrings
searches on ACLs as a basis for making decisions.

Thanks,

Ben.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [patch_v2] ovn: Fix receive from vxlan in ovn-controller.

2016-06-08 Thread Darrell Ball
OVN only supports source_node replication and previously vtep interaction,
which used service node replication by default for multicast/broadcast/unknown
unicast traffic worked by happenstance.  Because of limited vxlan
encapsulation metadata, received packets were resubmitted to find the egress
port(s). This is not correct for multicast, broadcast and unknown unicast 
traffic
as traffic will get resent on the tunnel mesh. ovn-controller is changed not to
send traffic received from vxlan tunnels out the tunnel mesh again.  Traffic
received from vxlan tunnels is now only sent locally as intended.

To support keeping state for receipt from a vxlan tunnel a MFF logical
register is allocated for general scratchpad purposes and one bit is used for
receipt from vxlan.  The new register usage is documented in a new
OVN-DESIGN.md document and a table is added to track MFF logical metadata
and register usage.

Some micro-details (e.g.) register assignments) that may change over time
were moved from the ovn-architecture.7.xml document to the OVN-DESIGN.md
document.  The OVN-DESIGN.md file was tested using the following markdown
parsers:

https://jbt.github.io/markdown-editor/
http://dillinger.io/

As part of this change ovn-controller-vtep is hard-coded to set the replication
mode of each logical switch to source node as OVN will only support source
node replication.

Signed-off-by: Darrell Ball 
---

 v1->v2:  Rebased after recent conflicting commit.  Converted some xml
 comments ported from the ovn-architecture document. Removed redundant
 register initialization and unnecessary bit declaration.

 ovn/OVN-DESIGN.md  | 180 +++
 ovn/automake.mk|   1 +
 ovn/controller-vtep/vtep.c |   4 +
 ovn/controller/physical.c  |  25 --
 ovn/lib/logical-fields.h   |  13 +++-
 ovn/ovn-architecture.7.xml | 188 -
 tests/ovn.at   |   3 +
 7 files changed, 219 insertions(+), 195 deletions(-)
 create mode 100644 ovn/OVN-DESIGN.md

diff --git a/ovn/OVN-DESIGN.md b/ovn/OVN-DESIGN.md
new file mode 100644
index 000..6676de5
--- /dev/null
+++ b/ovn/OVN-DESIGN.md
@@ -0,0 +1,180 @@
+OVN register and metadata usage:
+---
+
+logical datapath field:
+
+A field that denotes the logical datapath through which a packet is being
+processed.
+_Keep the following in sync with MFF_LOG_DATAPATH in_
+_ovn/lib/logical-fields.h._
+OVN uses the field that OpenFlow 1.1+ simply (and confusingly) calls
+'metadata' to store the logical datapath.  (This field is passed across
+tunnels as part of the tunnel key.)
+
+
+logical input port field:
+
+A field that denotes the logical port from which the packet entered the
+logical datapath.
+_Keep the following in sync with MFF_LOG_INPORT in_
+_ovn/lib/logical-fields.h._
+OVN stores this in Nicira extension register number 6.
+
+Geneve and STT tunnels pass this field as part of the tunnel key.
+Although VXLAN tunnels do not explicitly carry a logical input port,
+OVN only uses VXLAN to communicate with gateways that from OVN's
+perspective consist of only a single logical port, so that OVN can set
+the logical input port field to this one on ingress to the OVN logical
+pipeline.
+
+
+logical output port field:
+
+A field that denotes the logical port from which the packet will leave
+the logical datapath.  This is initialized to 0 at the beginning of the
+logical ingress pipeline.
+_Keep the following in sync with MFF_LOG_OUTPORT in_
+_ovn/lib/logical-fields.h._
+OVN stores this in Nicira extension register number 7.
+
+Geneve and STT tunnels pass this field as part of the tunnel key.  VXLAN
+tunnels do not transmit the logical output port field.
+
+
+conntrack zone field for logical ports:
+
+A field that denotes the connection tracking zone for logical ports.
+The value only has local significance and is not meaningful between
+chassis.  This is initialized to 0 at the beginning of the logical
+ingress pipeline.  OVN stores this in Nicira extension register number 5.
+
+
+conntrack zone fields for Gateway router:
+
+Fields that denote the connection tracking zones for Gateway routers.
+These values only have local significance (only on chassis that have
+Gateway routers instantiated) and is not meaningful between
+chassis.  OVN stores the zone information for DNATting in Nicira
+extension register number 3 and zone information for SNATing in Nicira
+extension register number 4.
+
+
+flags field:
+
+Scratchpad flags that denote the pipeline state between tables.  The
+values only have local significance and are not meaningful between
+chassis.  This is initialized to 0 at the beginning of the logical
+ingress pipeline. 
+_Keep the following in sync with MFF_LOG_FLAGS in_
+_ovn/lib/logical-fields.h._
+OVN stores this in Nicira extension register number 2.
+
+
+VLAN ID:
+
+The VLAN ID is used as an interface between OVN and containers nested
+inside a 

[ovs-dev] [patch_v2] ovn: Fix receive from vxlan in ovn-controller.

2016-06-08 Thread Darrell Ball
OVN only supports source_node replication and previously vtep interaction,
which used service node replication by default for multicast/broadcast/unknown
unicast traffic worked by happenstance.  Because of limited vxlan
encapsulation metadata, received packets were resubmitted to find the egress
port(s). This is not correct for multicast, broadcast and unknown unicast 
traffic
as traffic will get resent on the tunnel mesh. ovn-controller is changed not to
send traffic received from vxlan tunnels out the tunnel mesh again.  Traffic
received from vxlan tunnels is now only sent locally as intended.

To support keeping state for receipt from a vxlan tunnel a MFF logical
register is allocated for general scratchpad purposes and one bit is used for
receipt from vxlan.  The new register usage is documented in a new
OVN-DESIGN.md document and a table is added to track MFF logical metadata
and register usage.

Some micro-details (e.g.) register assignments) that may change over time
were moved from the ovn-architecture.7.xml document to the OVN-DESIGN.md
document.  The OVN-DESIGN.md file was tested using the following markdown
parsers:

https://jbt.github.io/markdown-editor/
http://dillinger.io/

As part of this change ovn-controller-vtep is hard-coded to set the replication
mode of each logical switch to source node as OVN will only support source
node replication.

Signed-off-by: Darrell Ball 
---

 v1->v2:  Rebased after recent conflicting commit.  Converted some xml
 comments ported from the ovn-architecture document. Removed redundant
 register initialization and unnecessary bit declaration.

 ovn/OVN-DESIGN.md  | 180 +++
 ovn/automake.mk|   1 +
 ovn/controller-vtep/vtep.c |   4 +
 ovn/controller/physical.c  |  25 --
 ovn/lib/logical-fields.h   |  13 +++-
 ovn/ovn-architecture.7.xml | 188 -
 tests/ovn.at   |   3 +
 7 files changed, 219 insertions(+), 195 deletions(-)
 create mode 100644 ovn/OVN-DESIGN.md

diff --git a/ovn/OVN-DESIGN.md b/ovn/OVN-DESIGN.md
new file mode 100644
index 000..6676de5
--- /dev/null
+++ b/ovn/OVN-DESIGN.md
@@ -0,0 +1,180 @@
+OVN register and metadata usage:
+---
+
+logical datapath field:
+
+A field that denotes the logical datapath through which a packet is being
+processed.
+_Keep the following in sync with MFF_LOG_DATAPATH in_
+_ovn/lib/logical-fields.h._
+OVN uses the field that OpenFlow 1.1+ simply (and confusingly) calls
+'metadata' to store the logical datapath.  (This field is passed across
+tunnels as part of the tunnel key.)
+
+
+logical input port field:
+
+A field that denotes the logical port from which the packet entered the
+logical datapath.
+_Keep the following in sync with MFF_LOG_INPORT in_
+_ovn/lib/logical-fields.h._
+OVN stores this in Nicira extension register number 6.
+
+Geneve and STT tunnels pass this field as part of the tunnel key.
+Although VXLAN tunnels do not explicitly carry a logical input port,
+OVN only uses VXLAN to communicate with gateways that from OVN's
+perspective consist of only a single logical port, so that OVN can set
+the logical input port field to this one on ingress to the OVN logical
+pipeline.
+
+
+logical output port field:
+
+A field that denotes the logical port from which the packet will leave
+the logical datapath.  This is initialized to 0 at the beginning of the
+logical ingress pipeline.
+_Keep the following in sync with MFF_LOG_OUTPORT in_
+_ovn/lib/logical-fields.h._
+OVN stores this in Nicira extension register number 7.
+
+Geneve and STT tunnels pass this field as part of the tunnel key.  VXLAN
+tunnels do not transmit the logical output port field.
+
+
+conntrack zone field for logical ports:
+
+A field that denotes the connection tracking zone for logical ports.
+The value only has local significance and is not meaningful between
+chassis.  This is initialized to 0 at the beginning of the logical
+ingress pipeline.  OVN stores this in Nicira extension register number 5.
+
+
+conntrack zone fields for Gateway router:
+
+Fields that denote the connection tracking zones for Gateway routers.
+These values only have local significance (only on chassis that have
+Gateway routers instantiated) and is not meaningful between
+chassis.  OVN stores the zone information for DNATting in Nicira
+extension register number 3 and zone information for SNATing in Nicira
+extension register number 4.
+
+
+flags field:
+
+Scratchpad flags that denote the pipeline state between tables.  The
+values only have local significance and are not meaningful between
+chassis.  This is initialized to 0 at the beginning of the logical
+ingress pipeline. 
+_Keep the following in sync with MFF_LOG_FLAGS in_
+_ovn/lib/logical-fields.h._
+OVN stores this in Nicira extension register number 2.
+
+
+VLAN ID:
+
+The VLAN ID is used as an interface between OVN and containers nested
+inside a 

Re: [ovs-dev] [PATCH] [PATCH v3 1/2] ovn-controller: Add 'na' action for ND

2016-06-08 Thread Ben Pfaff
On Wed, Jun 08, 2016 at 03:26:30PM +0800, Zong Kai LI wrote:
> This patch adds a new OVN action 'na' to support ND versus ARP.
> 
> When ovn-controller received a ND packet, it frames a NA packet for
> reply, with mac address parsed from userdata as eth.dst. Then it
> reloads metadata info from previous packet to framed packet, and
> finally send the framed packet back with left actions.
> 
> Eg. na{12:34:56:78:9a:bc; reg0 = 0x1; outport = inport; inport = ""; output;};
> 
> Since patch port for IPv6 router interface is not ready yet, this
> patch will only try to deal with ND from VM. This patch will set
> RSO flags to 011 for NA packets.
> 
> The next patch will do logical flows works for this action.
> 
> Signed-off-by: Zong Kai LI 

Thanks for the patch!

I think it would be better to retain more of the flavor of the arp
action here.  For that action, it is the responsibility of whoever
writes the flow to initialize anything that cannot be inferred by the
action.  For example, "arp" does not take a parameter that is assigned
to eth.src; instead, whoever writes the flow simply puts an assignment
to eth.src inside the nested set of actions.  That's more flexible, too,
because it allows eth.src to come from someplace other than an immediate
constant.

The intent with OVN logical actions, by the way, is that only actions
should be enclosed in {}; data parameters should be enclosed in ().  It
looks funny to me to mix both of them inside {}.  But if we drop the
eth.src parameter then that solves the problem.

The documentation for reg0, outport, and inport seems strange, because
it implies that the action actually changes these.  It doesn't, as far
as I can tell.

I still don't see any attempt to deal with alignment issues for RISC
machines.  Using ALIGNED_CAST does not solve a problem, it only
suppresses a warning.

I'm appending the changes that I recommend for the documentation.  My
changes do not actually implement these changes, only document them.  My
changes also include some stylistic fixes to better match
CodingStyle.md.

Thanks,

Ben.

--8<--cut here-->8--

diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index 1af9e89..79c4070 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -765,7 +765,7 @@ pinctrl_handle_na(const struct flow *ip_flow,
 goto exit;
 }
 
-// Frame the NA packet with RSO=011.
+/* Frame the NA packet with RSO=011. */
 uint64_t packet_stub[128 / 8];
 struct dp_packet packet;
 dp_packet_use_stub(, packet_stub, sizeof packet_stub);
@@ -774,7 +774,7 @@ pinctrl_handle_na(const struct flow *ip_flow,
&(ip_flow->nd_target), &(ip_flow->ipv6_src),
htons(0x6000));
 
-// Reload previous packet metadata.
+/* Reload previous packet metadata. */
 uint64_t ofpacts_stub[4096 / 8];
 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
 reload_metadata(, md);
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index 5189401..3fd626f 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -986,7 +986,7 @@
 
 
 
-  na{A; action; ... };
+  na { action; ... };
 
 
 
@@ -999,38 +999,29 @@
 
   
 The NA packet that this action operates on is initialized based on
-the IPv6 packet being processed(with userdata), as follows:
+the IPv6 packet being processed, as follows.  These are default
+values that the nested actions will probably want to change:
   
 
   
-eth.dst copied from eth.src
-eth.src copied from userdata
+eth.dst copied from eth.src.
+eth.src unchanged
 eth.type = 0x86dd
 ip6.dst copied from ip6.src
 ip6.src copied from nd.target
 icmp6.type = 136 (Neighbor Advertisement)
 nd.target unchanged
 nd.sll = 00:00:00:00:00:00
-nd.sll copied from userdata
+nd.sll unchanged
   
 
   
-These are default values that the nested actions will probably want
-to change:
-  
-
-  
-reg0 = 0x1(Mark as replied by ovn-controller)
-outport copied from inport
-inport = ""
-  
-
 The ND packet has the same VLAN header, if any, as the IP packet
 it replaces.
   
 
   
-Prerequisite: ndicmp6.type == 135
+Prerequisite: nd  icmp6.type == 135
   
 
 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCHv6 1/3] ofp-actions: Add truncate action.

2016-06-08 Thread pravin shelar
On Tue, Jun 7, 2016 at 10:53 PM, William Tu  wrote:
> The patch adds a new action to support packet truncation.  The new action
> is formatted as 'output(port=n,max_len=m)', as output to port n, with
> packet size being MIN(original_size, m).
>
> One use case is to enable port mirroring to send smaller packets to the
> destination port so that only useful packet information is mirrored/copied,
> saving some performance overhead of copying entire packet payload.  Example
> use case is below as well as shown in the testcases:
>
> - Output to port 1 with max_len 100 bytes.
> - The output packet size on port 1 will be MIN(original_packet_size, 100).
> # ovs-ofctl add-flow br0 'actions=output(port=1,max_len=100)'
>
> - The scope of max_len is limited to output action itself.  The following
>   packet size of output:1 and output:2 will be intact.
> # ovs-ofctl add-flow br0 \
> 'actions=output(port=1,max_len=100),output:1,output:2'
> - The Datapath actions shows:
> # Datapath actions: trunc(100),1,1,2
>
> Signed-off-by: William Tu 
> ---
Can you send next version against net-next tree? All feature patches
needs to be pushed upstream OVS first.

...

> diff --git a/datapath/actions.c b/datapath/actions.c
> index dcf8591..92ee3f9 100644
> --- a/datapath/actions.c
> +++ b/datapath/actions.c
> @@ -778,6 +778,7 @@ static int output_userspace(struct datapath *dp, struct 
> sk_buff *skb,
> memset(, 0, sizeof(upcall));
> upcall.cmd = OVS_PACKET_CMD_ACTION;
> upcall.mru = OVS_CB(skb)->mru;
> +   upcall.cutlen = OVS_CB(skb)->cutlen;
>
> for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
>  a = nla_next(a, )) {
> @@ -854,10 +855,17 @@ static int sample(struct datapath *dp, struct sk_buff 
> *skb,
> return 0;
>
> /* The only known usage of sample action is having a single user-space
> +* action, or having a truncate action followed by a single user-space
>  * action. Treat this usage as a special case.
>  * The output_userspace() should clone the skb to be sent to the
> -* user space. This skb will be consumed by its caller.
> -*/
> +* user space. This skb will be consumed by its caller. */
> +   if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) {
> +   struct ovs_action_trunc *trunc = nla_data(a);
> +   OVS_CB(skb)->cutlen = skb->len > trunc->max_len ?
> +   skb->len - trunc->max_len : 0;
> +   a = nla_next(a, );
> +   }
> +
> if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
>nla_is_last(a, rem)))
> return output_userspace(dp, skb, key, a, actions, 
> actions_len);
> @@ -1040,10 +1048,15 @@ static int do_execute_actions(struct datapath *dp, 
> struct sk_buff *skb,
> for (a = attr, rem = len; rem > 0;
>  a = nla_next(a, )) {
> int err = 0;
> +   int cutlen = OVS_CB(skb)->cutlen;
>
> if (unlikely(prev_port != -1)) {
> struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
>
> +   if (cutlen > 0) {
> +   pskb_trim(out_skb, out_skb->len - cutlen);
> +   OVS_CB(skb)->cutlen = 0;
> +   }
> if (out_skb)
> do_output(dp, out_skb, prev_port, key);
>
Lets  move the pskb_trim() call inside do_output().  So that NULL skb
is not passed to the function.


> @@ -1055,6 +1068,16 @@ static int do_execute_actions(struct datapath *dp, 
> struct sk_buff *skb,
> prev_port = nla_get_u32(a);
> break;
>
> +   case OVS_ACTION_ATTR_TRUNC: {
> +   struct ovs_action_trunc *trunc = nla_data(a);
> +
> +   if (trunc->max_len < ETH_MIN_FRAME_LEN)
> +   return -EINVAL;
Does max_len still needs to be check after checking the at flow install.

> +   OVS_CB(skb)->cutlen = skb->len > trunc->max_len ?
> +   skb->len - trunc->max_len : 0;
> +   break;
> +   }
> +
> case OVS_ACTION_ATTR_USERSPACE:
> output_userspace(dp, skb, key, a, attr, len);
> break;
> @@ -1125,8 +1148,15 @@ static int do_execute_actions(struct datapath *dp, 
> struct sk_buff *skb,
> }
> }
>
> -   if (prev_port != -1)
> +   if (prev_port != -1) {
> +   uint32_t cutlen = OVS_CB(skb)->cutlen;
> +
> +   if (cutlen > 0) {
> +   pskb_trim(skb, skb->len - cutlen);
> +   OVS_CB(skb)->cutlen = 0;
> +   }
> 

Re: [ovs-dev] [PATCH] VxLAN-gpe implementation

2016-06-08 Thread Alexander Duyck
On Wed, Jun 8, 2016 at 12:46 PM, Hannes Frederic Sowa  wrote:
> On 08.06.2016 17:38, Alexander Duyck wrote:
>> On Wed, Jun 8, 2016 at 7:48 AM, Hannes Frederic Sowa  
>> wrote:
>>> On 08.06.2016 14:51, Jiri Benc wrote:
 On Mon, 6 Jun 2016 14:22:58 -0700, Jesse Gross wrote:
> On Sat, Jun 4, 2016 at 6:39 AM, Yi Yang  wrote:
> [...]
>>  datapath/vport-netdev.c   |   3 +-
>>  datapath/vport-vxlan.c|  17 ++-
>
> These changes aren't upstream yet. Please do that before backporting them 
> here.
>
> However, the changes to vport-vxlan.c are modifying compatibility code
> that shouldn't be extended further. Instead, just use the existing
> VXLAN netlink interfaces that have already been created to enable
> these features.
>
> There is also a number of other patches to the OVS kernel module/VXLAN
> that have not been backported. Pravin started doing this work but it
> hasn't been applied yet. In general, I think it makes sense to
> backport patches in order so that the diffs of the patches match those
> of upstream.
>
> Finally, I have a question about receive side offloading with
> VXLAN-gpe. This is primarily an upstream issue but is present in the
> code being backported here as well. The VXLAN code sets up receive
> offloads for all ports regardless of whether they are classic VXLAN or
> L2/L3 GPE and expects NICs to parse the packets. I don't think this is
> safe because there are a number of NICs out there that predate the
> existence of GPE and therefore won't do this parsing correctly. I
> think that it is necessary to disable receive offloading for
> non-Ethernet VXLAN-GPE unless the offloading interface is extended.

 Coincidentally, I was talking about this with Hannes a few days ago.
 I'm adding him to CC.

 I guess you're referring to ndo_add_vxlan_port, right? I agree that
 this interface needs changes, especially considering that we know
 whether the UDP port belongs to VXLAN or VXLAN-GPE. But from my
 understanding of how drivers use this callback, the worst thing that
 could happen is suboptimal generation of rx hashes and thus steering
 the packets to a different receive queue than in the optimal case.
 Surely something to fix but it seems it won't cause much functional
 troubles with the current code?
>>>
>>> I am not sure if we must upgrade the interface. Can't drivers always
>>> configure vxlan-gpe and are always backwards compatible?
>>>
>>> Non vxlan-gpe capable hardware would have to abort checksum offloading
>>> as soon as they can't interpret the vxlan header anyway, so the packets
>>> end up on the slow path and nothing bad should happen.
>>>
>>> Possibly some hardware will verify inner checksums despite it could not
>>> understand the vxlan header completely. In this case we probably will
>>> drop the packet in the driver. Anyway, I would be in favor to simply
>>> present one knob, namely vxlan-offloading, to the user, instead a knob
>>> for each version of vxlan.
>>>
>>> Unfortunately I couldn't get a definitive answer from the specs
>>> regarding the checksuming details.
>>>
>>> Bye,
>>> Hannes
>>
>> This is starting to sound like the same conversation we had on netdev
>> when the ndo_add_geneve_port was added.  One easy fix for guaranteeing
>> that we can perform the checksum offload is to just enable the outer
>> UDP checksum.  Then we can still perform GRO and use the outer UDP
>> source port for generating a hash.  If possible we should make this
>> the default for all new UDP based tunnels going forward simply because
>> it allows for backwards-compatibility with existing offloads.
>
> Yes, but this time we are only in vxlan-versioning-world only. :)

Right.  It leads me back into the original thought I had which was we
should be providing the UDP tunnel type via something like an
enumerated type so drivers could just opt in if they see a tunnel type
they recognize.  Having to add a function for each new tunnel type is
just silly.  Then we could have support for VXLAN be separate from
VXLAN-GPE without having to add a whole new set of functions.

> Hmm, it is a good question why we didn't do that already. Do you
> remember the reasons?

Nope.  Although back when we last had this discussion LCO and GSO
partial didn't exist.  Now that they do we should probably just take
advantage of what we have since then we can get offloads automatically
for almost every new tunnel protocol assuming the NICs aren't relying
on parsing the frames in hardware/firmware for such an offload.

Also I know there are concerns about regressions on the older tunnel
protocols such as VXLAN and GENEVE since there is already support out
there in hardware and it could hurt performance to enable the outer
checksum if you are talking with an 

Re: [ovs-dev] [PATCH] bridge: allow OVS to connect to Unix Domain Sockets outside its run directory

2016-06-08 Thread Ben Pfaff
On Thu, Jun 02, 2016 at 07:47:33PM -0700, Ansis Atteka wrote:
> Before this patch OVS refused to connect to a local controller that
> had its Unix Domain Socket outside Open vSwitch run directory (e.g.
> outside '/var/run/openvswitch/').
> 
> After this patch this restriction imposed by Open vSwitch itself is
> abandoned and OVS should be able to connect to controller's Unix Domain
> Sockets anywhere under filesystem.

When I run "netstat -lnx" on my laptop, I see a bunch of listening Unix
domain sockets.

Some of these listening sockets are security sensitive, such as SSH
agents, so it wouldn't be good to have a remote manager be able to point
OVS to them: what if a clever person could figure out how to send
arbitrary data to them (maybe in a packet-in message somehow?) via
OpenFlow.  Other examples are dbus and udev sockets.

That's my main worry here.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] sparse: Fix conflict between netinet/in.h and linux/in.h

2016-06-08 Thread Daniele Di Proietto





On 08/06/2016 13:30, "Ben Pfaff"  wrote:

>On Wed, Jun 08, 2016 at 01:07:32PM -0700, Ben Pfaff wrote:
>> On Wed, Jun 01, 2016 at 07:23:29PM -0700, Daniele Di Proietto wrote:
>> > linux/in.h (from linux uapi headers) carries many of the same
>> > definitions as netinet/in.h (from glibc).
>> > 
>> > If linux/in.h is included after netinet/in.h, conflicts are avoided in
>> > two ways:
>> > 
>> > 1) linux/libc-compat.h (included by linux/in.h) detects the include
>> >guard of netinet/in.h and defines some macros (e.g.
>> >__UAPI_DEF_IN_IPPROTO) to 0.  linux/in.h avoids exporting the same
>> >enums if those macros are 0.
>> > 
>> > 2) The two files are allowed to redefine the same macros as long as the
>> >values are the same.
>> > 
>> > Our include/sparse/netinet/in.h creates problems, because:
>> > 
>> > 1) It uses a custom include guard
>> > 2) It uses dummy values for some macros.
>> > 
>> > This commit changes include/sparse/netinet/in.h to use the same include
>> > guard as glibc netinet/in.h, and to use the same values for some macros.
>> > 
>> > I think this problem is present with linux headers after
>> > a263653ed798("netfilter: don't pull include/linux/netfilter.h from netns
>> > headers") which cause our lib/netlink-conntrack.c to include linux/in.h
>> > after netinet/in.h.
>> 
>> The change to the macros seems fine.
>> 
>> The change to the include guard concerns me though.  If other
>> definitions of e.g. htons() are used, then how will those have the
>> correct types for sparse checking?
>
>After talking to Daniele I understand better.  The reason to use glibc's
>header guard is that the Linux headers check for glibc's header guard
>and behave differently if it is present, and we want that same behavior
>when our "sparse"-compatible header is used.  Now that I understand, I
>support the change.

Yeah, it's quite intricated

I'm not sure our sparse headers cover all corner cases of glibc+linux headers,
but for now the build appears to work.

>
>Acked-by: Ben Pfaff 

Thanks for the review and testing, I pushed this to master
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v3] lib: Use a more accurate value for CPU count (sched_getaffinity).

2016-06-08 Thread Ben Pfaff
On Thu, Jun 02, 2016 at 04:34:35PM +0200, Quentin Monnet wrote:
> From: David Marchand 
> 
> Relying on /proc/cpuinfo to count the number of available cores is not
> the best option:
> 
> - The code is x86-specific.
> - If the process is started with a different CPU affinity, then it will
>   wrongly try to start too many threads (for an example, imagine an OVS
>   daemon restricted to 4 CPU threads on a 128 threads system).
> 
> This commit removes /proc/cpuinfo parsing, and introduces instead a call
> to sched_getaffinity(), which is architecture-independant, in order to
> retrieve the list of CPU threads available to the current process and to
> count them.

This appears to be Linux-specific.  On non-Linux systems (e.g. FreeBSD,
Mac OS) I suggest using just _SC_NPROCESSORS_ONLN.

Thanks,

Ben.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v2] ovsdb: Expose vhost-user socket directory in ovsdb

2016-06-08 Thread Ben Pfaff
On Thu, Jun 02, 2016 at 11:25:56AM +0100, Robert Wojciechowicz wrote:
> In order to correctly interoperate with Openstack and ODL,
> the vhost-user socket directory must be exposed from OVS via OVSDB.
> Different distros may package OVS in different ways,
> so the locations of these sockets may vary depending on how
> ovs-vswitchd has been started. Some clients need information where
> the sockets are located when instantiating Qemu virtual machines.
> The full vhost-user socket directory is constructed from current
> OVS working directory and optionally from specified subdirectory.
> This patch exposes vhost-user socket directory in Open_vSwitch
> table in other_config column in two following keys:
> 1. ovs-run-dir- OVS working directory
> 2. vhost-sock-dir - subdirectory of ovs-run-dir (might be empty)
> 
> Signed-off-by: Robert Wojciechowicz 
> 
> v1->v2
> - moving vswitch-idl.h dependency inside #ifdef block
> - sock_dir_subcomponent initialization with ""

Same comment as v1: architecturally, ovs-vswitchd only reads
other-config columns, it never writes to them.  Please fix.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] sparse: Fix conflict between netinet/in.h and linux/in.h

2016-06-08 Thread Ben Pfaff
On Wed, Jun 08, 2016 at 01:07:32PM -0700, Ben Pfaff wrote:
> On Wed, Jun 01, 2016 at 07:23:29PM -0700, Daniele Di Proietto wrote:
> > linux/in.h (from linux uapi headers) carries many of the same
> > definitions as netinet/in.h (from glibc).
> > 
> > If linux/in.h is included after netinet/in.h, conflicts are avoided in
> > two ways:
> > 
> > 1) linux/libc-compat.h (included by linux/in.h) detects the include
> >guard of netinet/in.h and defines some macros (e.g.
> >__UAPI_DEF_IN_IPPROTO) to 0.  linux/in.h avoids exporting the same
> >enums if those macros are 0.
> > 
> > 2) The two files are allowed to redefine the same macros as long as the
> >values are the same.
> > 
> > Our include/sparse/netinet/in.h creates problems, because:
> > 
> > 1) It uses a custom include guard
> > 2) It uses dummy values for some macros.
> > 
> > This commit changes include/sparse/netinet/in.h to use the same include
> > guard as glibc netinet/in.h, and to use the same values for some macros.
> > 
> > I think this problem is present with linux headers after
> > a263653ed798("netfilter: don't pull include/linux/netfilter.h from netns
> > headers") which cause our lib/netlink-conntrack.c to include linux/in.h
> > after netinet/in.h.
> 
> The change to the macros seems fine.
> 
> The change to the include guard concerns me though.  If other
> definitions of e.g. htons() are used, then how will those have the
> correct types for sparse checking?

After talking to Daniele I understand better.  The reason to use glibc's
header guard is that the Linux headers check for glibc's header guard
and behave differently if it is present, and we want that same behavior
when our "sparse"-compatible header is used.  Now that I understand, I
support the change.

Acked-by: Ben Pfaff 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] ovs-tcpdump: Add a tcpdump wrapper utility

2016-06-08 Thread Ben Pfaff
On Wed, Jun 08, 2016 at 04:54:31PM -0300, Flavio Leitner wrote:
> On Tue, Jun 07, 2016 at 08:41:13AM -0700, Ben Pfaff wrote:
> > On Wed, Jun 01, 2016 at 01:30:40PM -0400, Aaron Conole wrote:
> > > Currently, there is some documentation which describes setting up and
> > > using port mirrors for bridges. This documentation is helpful to setup
> > > a packet capture for specific ports.
> > > 
> > > However, a utility to do such packet capture would be valuable, both
> > > as an exercise in documenting the steps an additional time, and as a way
> > > of providing an out-of-the-box experience for running a capture.
> > > 
> > > This commit adds a tcpdump-wrapper utility for such purpose. It uses the
> > > Open vSwitch python library to add/remove ports and mirrors to/from the
> > > Open vSwitch database. It will create a tcpdump instance listening on
> > > the mirror port (allowing the user to specify additional arguments), and
> > > dump data to the screen (or otherwise).
> > > 
> > > Signed-off-by: Aaron Conole 
> > 
> > This is pretty cool but I don't feel up to reviewing this much Python.
> > Anyone else want to take a shot at it?  Alternatively, if no one does,
> > then I'm willing to just take it as-is since it's so independent of
> > everything else and therefore low risk.
> 
> I am in the same situation.
> 
> The tool works for me with tcpdump (default) or tshark, on a dpdk
> port, vhost-user port or internal port (ovs bridge).  I also tested
> with custom installation and noticed no issues.
> 
> The only issue I found is that it breaks rpm build because the tool
> is neither packaged nor excluded.  I spoke with Aaron offline and he
> will follow up with a fix.  I will wait for that before ACK this one.

Great, let's get that fixed in v2 and then applied.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v10 5/5] userspace: add non-tap (l3) support to GRE vports

2016-06-08 Thread Ben Pfaff
On Wed, Jun 01, 2016 at 02:04:57PM +0900, Simon Horman wrote:
> On Tue, May 31, 2016 at 08:20:24PM -0700, Ben Pfaff wrote:
> > There's a change to tnl_port_show() to "Skip ports with duplicate 'port'
> > field".  I don't understand this change.  Can you explain it?  (It's
> > O(n**2) in the number of ports, too.)
> 
> The problem that this tries to address is that it is now possible
> to have an l3 and non-l3 tunnel which share the same port and when
> the ports are dumped this shows up as a duplicate.
> 
> For example in the tunnel-push-pop.at test updated by this patch the
> following duplicate gre port appears without this portion of the change:

OK, so I understand the solution now, but I don't yet understand the
problem: why are these represented as two separate tunnels, and why do
those separate tunnels have the same port number?

Thanks,

Ben.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] sparse: Fix conflict between netinet/in.h and linux/in.h

2016-06-08 Thread Ben Pfaff
On Wed, Jun 01, 2016 at 07:23:29PM -0700, Daniele Di Proietto wrote:
> linux/in.h (from linux uapi headers) carries many of the same
> definitions as netinet/in.h (from glibc).
> 
> If linux/in.h is included after netinet/in.h, conflicts are avoided in
> two ways:
> 
> 1) linux/libc-compat.h (included by linux/in.h) detects the include
>guard of netinet/in.h and defines some macros (e.g.
>__UAPI_DEF_IN_IPPROTO) to 0.  linux/in.h avoids exporting the same
>enums if those macros are 0.
> 
> 2) The two files are allowed to redefine the same macros as long as the
>values are the same.
> 
> Our include/sparse/netinet/in.h creates problems, because:
> 
> 1) It uses a custom include guard
> 2) It uses dummy values for some macros.
> 
> This commit changes include/sparse/netinet/in.h to use the same include
> guard as glibc netinet/in.h, and to use the same values for some macros.
> 
> I think this problem is present with linux headers after
> a263653ed798("netfilter: don't pull include/linux/netfilter.h from netns
> headers") which cause our lib/netlink-conntrack.c to include linux/in.h
> after netinet/in.h.

The change to the macros seems fine.

The change to the include guard concerns me though.  If other
definitions of e.g. htons() are used, then how will those have the
correct types for sparse checking?
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] ovs-tcpdump: Add a tcpdump wrapper utility

2016-06-08 Thread Flavio Leitner
On Tue, Jun 07, 2016 at 08:41:13AM -0700, Ben Pfaff wrote:
> On Wed, Jun 01, 2016 at 01:30:40PM -0400, Aaron Conole wrote:
> > Currently, there is some documentation which describes setting up and
> > using port mirrors for bridges. This documentation is helpful to setup
> > a packet capture for specific ports.
> > 
> > However, a utility to do such packet capture would be valuable, both
> > as an exercise in documenting the steps an additional time, and as a way
> > of providing an out-of-the-box experience for running a capture.
> > 
> > This commit adds a tcpdump-wrapper utility for such purpose. It uses the
> > Open vSwitch python library to add/remove ports and mirrors to/from the
> > Open vSwitch database. It will create a tcpdump instance listening on
> > the mirror port (allowing the user to specify additional arguments), and
> > dump data to the screen (or otherwise).
> > 
> > Signed-off-by: Aaron Conole 
> 
> This is pretty cool but I don't feel up to reviewing this much Python.
> Anyone else want to take a shot at it?  Alternatively, if no one does,
> then I'm willing to just take it as-is since it's so independent of
> everything else and therefore low risk.

I am in the same situation.

The tool works for me with tcpdump (default) or tshark, on a dpdk
port, vhost-user port or internal port (ovs bridge).  I also tested
with custom installation and noticed no issues.

The only issue I found is that it breaks rpm build because the tool
is neither packaged nor excluded.  I spoke with Aaron offline and he
will follow up with a fix.  I will wait for that before ACK this one.

-- 
fbl

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH] ovn-northd: no logical router icmp response for directed broadcasts

2016-06-08 Thread Flavio Fernandes
Responding to icmp queries where the L3 destination is a directed broadcast
was not being properly handled, causing the reply to be sent to all logical
ports except for the one port that should receive it.

Reference to the mailing list thread:
http://openvswitch.org/pipermail/discuss/2016-June/021619.html

This is a proposal for using choice B in the mail discussion; where icmp
queries to broadcast are simply not responded by the logical router.

Signed-off-by: Flavio Fernandes 
---
 ovn/northd/ovn-northd.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index cac0148..c850954 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -1957,9 +1957,8 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap 
*ports,
  * (i.e. the incoming locally attached net) does not matter.
  * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
 match = xasprintf(
-"(ip4.dst == "IP_FMT" || ip4.dst == "IP_FMT") && "
-"icmp4.type == 8 && icmp4.code == 0",
-IP_ARGS(op->ip), IP_ARGS(op->bcast));
+"ip4.dst == "IP_FMT" && icmp4.type == 8 && icmp4.code == 0",
+IP_ARGS(op->ip));
 char *actions = xasprintf(
 "ip4.dst = ip4.src; "
 "ip4.src = "IP_FMT"; "
-- 
1.9.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] VxLAN-gpe implementation

2016-06-08 Thread Hannes Frederic Sowa
On 08.06.2016 17:38, Alexander Duyck wrote:
> On Wed, Jun 8, 2016 at 7:48 AM, Hannes Frederic Sowa  
> wrote:
>> On 08.06.2016 14:51, Jiri Benc wrote:
>>> On Mon, 6 Jun 2016 14:22:58 -0700, Jesse Gross wrote:
 On Sat, Jun 4, 2016 at 6:39 AM, Yi Yang  wrote:
 [...]
>  datapath/vport-netdev.c   |   3 +-
>  datapath/vport-vxlan.c|  17 ++-

 These changes aren't upstream yet. Please do that before backporting them 
 here.

 However, the changes to vport-vxlan.c are modifying compatibility code
 that shouldn't be extended further. Instead, just use the existing
 VXLAN netlink interfaces that have already been created to enable
 these features.

 There is also a number of other patches to the OVS kernel module/VXLAN
 that have not been backported. Pravin started doing this work but it
 hasn't been applied yet. In general, I think it makes sense to
 backport patches in order so that the diffs of the patches match those
 of upstream.

 Finally, I have a question about receive side offloading with
 VXLAN-gpe. This is primarily an upstream issue but is present in the
 code being backported here as well. The VXLAN code sets up receive
 offloads for all ports regardless of whether they are classic VXLAN or
 L2/L3 GPE and expects NICs to parse the packets. I don't think this is
 safe because there are a number of NICs out there that predate the
 existence of GPE and therefore won't do this parsing correctly. I
 think that it is necessary to disable receive offloading for
 non-Ethernet VXLAN-GPE unless the offloading interface is extended.
>>>
>>> Coincidentally, I was talking about this with Hannes a few days ago.
>>> I'm adding him to CC.
>>>
>>> I guess you're referring to ndo_add_vxlan_port, right? I agree that
>>> this interface needs changes, especially considering that we know
>>> whether the UDP port belongs to VXLAN or VXLAN-GPE. But from my
>>> understanding of how drivers use this callback, the worst thing that
>>> could happen is suboptimal generation of rx hashes and thus steering
>>> the packets to a different receive queue than in the optimal case.
>>> Surely something to fix but it seems it won't cause much functional
>>> troubles with the current code?
>>
>> I am not sure if we must upgrade the interface. Can't drivers always
>> configure vxlan-gpe and are always backwards compatible?
>>
>> Non vxlan-gpe capable hardware would have to abort checksum offloading
>> as soon as they can't interpret the vxlan header anyway, so the packets
>> end up on the slow path and nothing bad should happen.
>>
>> Possibly some hardware will verify inner checksums despite it could not
>> understand the vxlan header completely. In this case we probably will
>> drop the packet in the driver. Anyway, I would be in favor to simply
>> present one knob, namely vxlan-offloading, to the user, instead a knob
>> for each version of vxlan.
>>
>> Unfortunately I couldn't get a definitive answer from the specs
>> regarding the checksuming details.
>>
>> Bye,
>> Hannes
> 
> This is starting to sound like the same conversation we had on netdev
> when the ndo_add_geneve_port was added.  One easy fix for guaranteeing
> that we can perform the checksum offload is to just enable the outer
> UDP checksum.  Then we can still perform GRO and use the outer UDP
> source port for generating a hash.  If possible we should make this
> the default for all new UDP based tunnels going forward simply because
> it allows for backwards-compatibility with existing offloads.

Yes, but this time we are only in vxlan-versioning-world only. :)

Hmm, it is a good question why we didn't do that already. Do you
remember the reasons?

> Really I wonder if we shouldn't just start pulling out all the support
> for the ndo_add_vxlan/geneve_port code anyway, or at least start
> forcing it to include more data such as an endpoint IP/IPv6 address

As far as I know Intel cards don't support adding UDP tuples but only
simple ports to the list of udp protocol offloads.

On the other side, I think the current way how we do it is quite
okayish. At the same time where we establish the udp offload we
unconditionally add the socket to the wildcard socket table (local_ip ==
0). At least no other socket can bind to such a socket (maybe
IP_FREEBIND etc. are an exception here).

> and tunnel type.  There end up being a number of cases where the
> offload could end up asserting itself where it isn't actually wanted
> such as a mixed environment where the PF is setting up the offloads
> via ndo_add_vxlan_port and then trying to apply that to VFs or other
> PFs on the same device which aren't operating in the same network
> namespace.  The worst case ends up being that you have UDP checksum
> offloads stripped for flows that would have otherwise been reporting
> valid checksum 

Re: [ovs-dev] [PATCH v3] datapath-windows: Add GRE checksum

2016-06-08 Thread Nithin Raju

> #endif
>@@ -299,34 +312,21 @@ OvsDecapGre(POVS_SWITCH_CONTEXT switchContext,
> EthHdr *ethHdr;
> IPHdr *ipHdr;
> GREHdr *greHdr;
>-UINT32 tunnelSize = 0, packetLength = 0;
>+UINT32 tunnelSize, packetLength;
> UINT32 headRoom = 0;
> PUINT8 bufferStart;
> NDIS_STATUS status;
> 
> curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
> packetLength = NET_BUFFER_DATA_LENGTH(curNb);
>-tunnelSize = GreTunHdrSize(tunKey->flags);
>+curMdl = NET_BUFFER_CURRENT_MDL(curNb);
>+tunnelSize = GreTunHdrSize(0);
> if (packetLength <= tunnelSize) {
> return NDIS_STATUS_INVALID_LENGTH;
> }
> 
>-/*
>- * Create a copy of the NBL so that we have all the headers in one
>MDL.
>- */
>-*newNbl = OvsPartialCopyNBL(switchContext, curNbl,
>-tunnelSize + OVS_DEFAULT_COPY_SIZE, 0,
>-TRUE /*copy NBL info */);
>-
>-if (*newNbl == NULL) {
>-return NDIS_STATUS_RESOURCES;
>-}
>-
>-curNbl = *newNbl;
>-curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
>-curMdl = NET_BUFFER_CURRENT_MDL(curNb);
>-bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
>LowPagePriority) +
>-  NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
>+/* Get a contignuous buffer for the maxmimum length of a GRE header
>*/
>+bufferStart = NdisGetDataBuffer(curNb, OVS_MAX_GRE_LGTH, NULL, 1, 0);

Sorry we have to go back and forth on this. Like I mentioned in the
previous email, the idea is to get a contiguous chunk of memory so we can
walk all the header until the GRE header. It is a good idea to use
NdisGetDataBuffer() instead of copying the NBL. But, we won¹t avoid the
copied NBL anyway since decap has to happen on the copied NBL.

In any case, NdisGetDataBuffer() has some pitfalls:
"If the requested data in the buffer is contiguous, this return value is a
pointer to a location that NDIS provides. If the data is not contiguous,
NDIS uses the Storage parameter as follows:

* If the Storage parameter is non-NULL, NDIS copies the data to the buffer
at Storage. This return value is the pointer passed to the Storage
parameter.
* If the Storage parameter is NULL, this return value is NULL.²

So, if the first MDL does not fit the headers until the GRE headers, we
need to pass explicit memory to NdisGetDataBuffer() in argument #3 in
order for NDIS to copy it over to a contiguous chunk of memory.

Thanks,
-- Nithin

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v10 2/2] ovn-northd: Add logical flows to support native DHCP

2016-06-08 Thread Ramu Ramamurthy
I tested and verified this patchset using the corresponding WIP
openstack dhcp patch.
native-dhcp works end-to-end as advertized - I tested with the default
dhcp-options
that are used when VMs boot.

On Mon, Jun 6, 2016 at 10:49 PM, Numan Siddique  wrote:
> OVN implements a native DHCP support which caters to the common
> use case of providing an IP address to a booting instance by
> providing stateless replies to DHCP requests based on statically
> configured address mappings. To do this it allows a short list of
> DHCP options to be configured and applied at each compute host
> running ovn-controller.
>
> A new table 'Subnet' is added in OVN NB DB to store the DHCP options.
>
> For each logical port following flows are added if the CMS has defined
> DHCP options in the 'Subnet' column
>
>  - A logical flow which copies the DHCP options to the DHCP
>request packets using the 'put_dhcp_opts' action and advances the
>packet to the next stage.
>
>  - A logical flow which implements the DHCP reponder by sending
>the DHCP reply back to the inport once the 'put_dhcp_opts' action
>is applied.
>
> Signed-Off-by: Numan Siddique 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 3/3] JSON serialization via Python's json lib

2016-06-08 Thread Ben Pfaff
On Wed, Jun 08, 2016 at 08:55:15AM -0500, Terry Wilson wrote:
> There is no particularly good reason to use our own Python JSON
> serialization implementation when serialization can be done faster
> with Python's built-in JSON library.
> 
> A few tests were changed due to Python's default JSON library
> returning slightly more precise floating point numbers.
> 
> Signed-off-by: Terry Wilson 

I get a build failure which is probably because I run "configure" in a
directory different from the source directory:

(cd python/ && /usr/bin/python3 setup.py build_ext --inplace)
/bin/bash: line 0: cd: python/: No such file or directory
Makefile:6149: recipe for target 
'../python/ovs/_json.cpython-35m-i386-linux-gnu.so' failed

Also it's not necessary to put Makefile commands in parentheses since
they always run in a subshell anyway.

Thanks,

Ben.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 2/3] Add optional C extension wrapper for Python JSON parsing

2016-06-08 Thread Ben Pfaff
On Wed, Jun 08, 2016 at 08:55:14AM -0500, Terry Wilson wrote:
> The pure Python in-tree JSON parser is *much* slower than the
> in-tree C JSON parser. A local test parsing a 100Mb JSON file
> showed the Python version taking 270 seconds. With the C wrapper,
> it took under 4 seconds.
> 
> The C extension will be used automatically if it can be built. If
> the extension fails to build, a warning is displayed and the build
> is restarted without the extension.
> 
> The Serializer class is replaced with Python's built-in
> JSON library since the ability to process chunked data is not
> needed in that case.
> 
> The extension should work with both Python 2.7 and Python 3.3+.
> 
> Signed-off-by: Terry Wilson 

Applied, thanks!

The automatic behavior here is an asset for a developer, but it is
probably undesirable for use in packaging systems, because one wants to
make sure that the packaging always has or does not have the extension,
without depending on what the system has on it.  So it might be good to
support something like --enable-openssl=[yes|no|check], where "yes"
means that OpenSSL must be available, "no" means that OpenSSL will not
be used, and "check" (the default) means to use OpenSSL if it's
available.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH] Fix release date for 2.4.1.

2016-06-08 Thread Thadeu Lima de Souza Cascardo
Signed-off-by: Thadeu Lima de Souza Cascardo 
---
 NEWS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index 2eba9a1..8b1044e 100644
--- a/NEWS
+++ b/NEWS
@@ -2,7 +2,7 @@ v2.4.2 - xx xxx 
 -
 
 
-v2.4.1 - 20 Aug 2015
+v2.4.1 - 28 Mar 2016
 -
- Fixes buffer overflow for crafted MPLS packets (CVE-2016-2074).
- Bug fixes
-- 
2.5.5

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 1/3] Ensure significand remains an integer in Python3 json parser

2016-06-08 Thread Ben Pfaff
On Wed, Jun 08, 2016 at 08:55:13AM -0500, Terry Wilson wrote:
> The / operation in Python 2 is "floor division" for int/long types
> while in Python 3 is "true division". This means that the
> significand can become a float with the existing code in Python 3.
> This, in turn, can result in a parse of something like [1.10e1]
> returning 11 in Python 2 and 11.0 in Python 3. Switching to the
> // operator resolves this difference.
> 
> The JSON tests do not catch this difference because the built-in
> serializer prints floats with the %.15g format which will convert
> floats with no fractional part to an integer representation.
> 
> Signed-off-by: Terry Wilson 

Applied, thanks!
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] ovs-numa: Fix a compilation error

2016-06-08 Thread Ben Pfaff
On Wed, Jun 08, 2016 at 03:19:46PM +0900, YAMAMOTO Takashi wrote:
> Fix the following error on NetBSD 7.0.
> 
> ../lib/ovs-numa.c: In function 'ovs_numa_set_cpu_mask':
> ../lib/ovs-numa.c:555:9: error: array subscript has type 'char' 
> [-Werror=char-subscripts]
> 
> Signed-off-by: YAMAMOTO Takashi 

Thanks.

Acked-by: Ben Pfaff 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] Question about SHA1 usage in OVSDB Server

2016-06-08 Thread Ben Pfaff
On Wed, Jun 08, 2016 at 05:13:09PM +, Rodriguez Betancourt, Esteban wrote:
> I was performing some profiling on OVSDB, when inserting a lot of
> rows. The callgrind results shows that the SHA1 calculation takes near
> 10% of the time within our test (the whole file writing, including
> SHA1, takes like 20%).
> 
> We want to know further about why that checksum is need, in order to
> decide how it could be optimized (we are considering: using processor
> specific instructions, changing the algorithm or removing it at all).

The checksum is there to make writing the log atomic.  Without the
checksum, it would be hard to tell whether data read from the file after
a header line was actually the data that was meant to be read or just
junk that happened to be on disk when there was a power failure or data
corruption.

A weaker checksum could serve the same purpose.

> By the way, I would want to know if there are any updates on the
> review of the IDL Compound Indexes (pull request #127).

I guess I lost track.  I tend to forget about pull requests because they
don't show up in patchwork.  Sorry about that, I'll try to get to it
soon.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 4/5] dpif-netlink: add GRE creation support

2016-06-08 Thread Thadeu Lima de Souza Cascardo
Creates GRE devices using rtnetlink and tunnel metadata. If the kernel does
not support tunnel metadata, it will return EEXIST because of the fallback
tunnel. However, on kernels between v3.10 and v3.12, it will not. So, we need to
verify the created tunnel has the tunnel metadata attribute.

This was tested on kernels 4.2.3, 4.3.6, 4.4.9, 4.5.5 and RHEL 3.10 based. All
of them worked with the system traffic test "datapath - ping over gre tunnel".
Also tested on a 3.10 based kernel without the fix for the existing fallback
tunnel.  That is, the kernel would not return EEXIST. Yet, the test works fine.

Signed-off-by: Thadeu Lima de Souza Cascardo 
---
 lib/dpif-netlink.c | 134 +
 1 file changed, 134 insertions(+)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 2845268..725d751 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -963,6 +964,12 @@ netdev_vxlan_destroy(const char *name)
 return netdev_linux_destroy(name);
 }
 
+static int
+netdev_gre_destroy(const char *name)
+{
+return netdev_linux_destroy(name);
+}
+
 /*
  * On some older systems, these enums are not defined.
  */
@@ -977,6 +984,10 @@ netdev_vxlan_destroy(const char *name)
 #define IFLA_VXLAN_COLLECT_METADATA 25
 #endif
 
+#if IFLA_GRE_MAX < 18
+#define IFLA_GRE_COLLECT_METADATA 18
+#endif
+
 static int
 netdev_vxlan_create(struct netdev *netdev)
 {
@@ -1030,6 +1041,115 @@ netdev_vxlan_create(struct netdev *netdev)
 return err;
 }
 
+/*
+ * On some Linux versions, creating the device with IFLA_GRE_COLLECT_METADATA
+ * will succeed, even though that attribute is not supported. We need to verify
+ * the device has been created with that attribute. In case it has not, we
+ * destroy it and use the compat code.
+ */
+static int
+netdev_gre_verify(const char *name)
+{
+int err;
+struct ofpbuf request, *reply;
+struct ifinfomsg *ifmsg;
+
+static const struct nl_policy rtlink_policy[] = {
+[IFLA_LINKINFO] = { .type = NL_A_NESTED },
+};
+static const struct nl_policy linkinfo_policy[] = {
+[IFLA_INFO_KIND] = { .type = NL_A_STRING },
+[IFLA_INFO_DATA] = { .type = NL_A_NESTED },
+};
+static const struct nl_policy gre_policy[] = {
+[IFLA_GRE_COLLECT_METADATA] = { .type = NL_A_FLAG },
+};
+
+ofpbuf_init(, 0);
+nl_msg_put_nlmsghdr(, 0, RTM_GETLINK,
+NLM_F_REQUEST);
+ofpbuf_put_zeros(, sizeof(struct ifinfomsg));
+nl_msg_put_string(, IFLA_IFNAME, name);
+
+err = nl_transact(NETLINK_ROUTE, , );
+if (!err) {
+struct nlattr *rtlink[ARRAY_SIZE(rtlink_policy)];
+struct nlattr *linkinfo[ARRAY_SIZE(linkinfo_policy)];
+struct nlattr *gre[ARRAY_SIZE(gre_policy)];
+
+err = EINVAL;
+ifmsg = ofpbuf_at(reply, NLMSG_HDRLEN, sizeof *ifmsg);
+if (nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *ifmsg,
+rtlink_policy, rtlink, ARRAY_SIZE(rtlink_policy))) {
+if (nl_parse_nested(rtlink[IFLA_LINKINFO], linkinfo_policy,
+linkinfo, ARRAY_SIZE(linkinfo_policy)) &&
+!strcmp(nl_attr_get_string(linkinfo[IFLA_INFO_KIND]),
+"gretap")) {
+if (nl_parse_nested(linkinfo[IFLA_INFO_DATA], gre_policy, gre,
+ARRAY_SIZE(gre_policy)) &&
+nl_attr_get_flag(gre[IFLA_GRE_COLLECT_METADATA])) {
+err = 0;
+}
+}
+}
+ofpbuf_uninit(reply);
+}
+ofpbuf_uninit();
+return err;
+}
+
+static int
+netdev_gre_create(struct netdev *netdev)
+{
+int err;
+struct ofpbuf request, *reply;
+size_t linkinfo_off, infodata_off;
+char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
+const char *name = netdev_vport_get_dpif_port(netdev,
+  namebuf, sizeof namebuf);
+struct ifinfomsg *ifinfo;
+const struct netdev_tunnel_config *tnl_cfg;
+tnl_cfg = netdev_get_tunnel_config(netdev);
+if (!tnl_cfg) { /* or assert? */
+return EINVAL;
+}
+
+ofpbuf_init(, 0);
+nl_msg_put_nlmsghdr(, 0, RTM_NEWLINK,
+NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE);
+ifinfo = ofpbuf_put_zeros(, sizeof(struct ifinfomsg));
+ifinfo->ifi_change = ifinfo->ifi_flags = IFF_UP;
+nl_msg_put_string(, IFLA_IFNAME, name);
+nl_msg_put_u32(, IFLA_MTU, UINT16_MAX);
+linkinfo_off = nl_msg_start_nested(, IFLA_LINKINFO);
+nl_msg_put_string(, IFLA_INFO_KIND, "gretap");
+infodata_off = nl_msg_start_nested(, IFLA_INFO_DATA);
+nl_msg_put_flag(, IFLA_GRE_COLLECT_METADATA);
+nl_msg_end_nested(, infodata_off);
+nl_msg_end_nested(, linkinfo_off);
+
+err = nl_transact(NETLINK_ROUTE, , );
+
+if (!err) {
+ofpbuf_uninit(reply);
+ 

[ovs-dev] [PATCH v2 5/5] dpif-netlink: add GENEVE creation support

2016-06-08 Thread Thadeu Lima de Souza Cascardo
Creates GENEVE devices using rtnetlink and tunnel metadata. If the kernel does
not support tunnel metadata, it will return EINVAL because of the missing ID and
REMOTE attributes.

This was tested on kernels 4.2.3, 4.3.6, 4.4.9 and 4.5.5. All of them worked
with the system traffic test "datapath - ping over geneve tunnel".

Signed-off-by: Thadeu Lima de Souza Cascardo 
---
 lib/dpif-netlink.c | 82 ++
 1 file changed, 82 insertions(+)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 725d751..9b186b2 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -970,6 +970,12 @@ netdev_gre_destroy(const char *name)
 return netdev_linux_destroy(name);
 }
 
+static int
+netdev_geneve_destroy(const char *name)
+{
+return netdev_linux_destroy(name);
+}
+
 /*
  * On some older systems, these enums are not defined.
  */
@@ -988,6 +994,18 @@ netdev_gre_destroy(const char *name)
 #define IFLA_GRE_COLLECT_METADATA 18
 #endif
 
+#ifndef IFLA_GENEVE_MAX
+#define IFLA_GENEVE_MAX 0
+#define IFLA_GENEVE_PORT 5
+#endif
+
+#if IFLA_GENEVE_MAX < 6
+#define IFLA_GENEVE_COLLECT_METADATA 6
+#endif
+#if IFLA_GENEVE_MAX < 10
+#define IFLA_GENEVE_UDP_ZERO_CSUM6_RX 10
+#endif
+
 static int
 netdev_vxlan_create(struct netdev *netdev)
 {
@@ -1150,6 +1168,56 @@ netdev_gre_create(struct netdev *netdev)
 return err;
 }
 
+static int
+netdev_geneve_create(struct netdev *netdev)
+{
+int err;
+struct ofpbuf request, *reply;
+size_t linkinfo_off, infodata_off;
+char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
+const char *name = netdev_vport_get_dpif_port(netdev,
+  namebuf, sizeof namebuf);
+struct ifinfomsg *ifinfo;
+const struct netdev_tunnel_config *tnl_cfg;
+tnl_cfg = netdev_get_tunnel_config(netdev);
+if (!tnl_cfg) { /* or assert? */
+return EINVAL;
+}
+
+ofpbuf_init(, 0);
+nl_msg_put_nlmsghdr(, 0, RTM_NEWLINK,
+NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE);
+ifinfo = ofpbuf_put_zeros(, sizeof(struct ifinfomsg));
+ifinfo->ifi_change = ifinfo->ifi_flags = IFF_UP;
+nl_msg_put_string(, IFLA_IFNAME, name);
+nl_msg_put_u32(, IFLA_MTU, UINT16_MAX);
+linkinfo_off = nl_msg_start_nested(, IFLA_LINKINFO);
+nl_msg_put_string(, IFLA_INFO_KIND, "geneve");
+infodata_off = nl_msg_start_nested(, IFLA_INFO_DATA);
+nl_msg_put_flag(, IFLA_GENEVE_COLLECT_METADATA);
+nl_msg_put_u8(, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 1);
+nl_msg_put_be16(, IFLA_GENEVE_PORT, tnl_cfg->dst_port);
+nl_msg_end_nested(, infodata_off);
+nl_msg_end_nested(, linkinfo_off);
+
+err = nl_transact(NETLINK_ROUTE, , );
+
+if (!err) {
+ofpbuf_uninit(reply);
+}
+
+/*
+ * Linux versions older than 4.3 will return EINVAL in case the GENEVE_ID 
is
+ * not set, which is sufficient to verify COLLECT_METADATA is supported.
+ */
+if (err == EINVAL) {
+err = EOPNOTSUPP;
+}
+
+ofpbuf_uninit();
+return err;
+}
+
 #else
 
 static int
@@ -1165,6 +1233,12 @@ netdev_gre_create(struct netdev *netdev OVS_UNUSED)
 }
 
 static int
+netdev_geneve_create(struct netdev *netdev OVS_UNUSED)
+{
+return EOPNOTSUPP;
+}
+
+static int
 netdev_vxlan_destroy(const char *name OVS_UNUSED)
 {
 return EOPNOTSUPP;
@@ -1176,6 +1250,12 @@ netdev_gre_destroy(const char *name OVS_UNUSED)
 return EOPNOTSUPP;
 }
 
+static int
+netdev_geneve_destroy(const char *name OVS_UNUSED)
+{
+return EOPNOTSUPP;
+}
+
 #endif
 
 static int
@@ -1191,6 +1271,7 @@ dpif_netlink_port_create(struct netdev *netdev)
 case OVS_VPORT_TYPE_GRE:
 return netdev_gre_create(netdev);
 case OVS_VPORT_TYPE_GENEVE:
+return netdev_geneve_create(netdev);
 case OVS_VPORT_TYPE_NETDEV:
 case OVS_VPORT_TYPE_INTERNAL:
 case OVS_VPORT_TYPE_LISP:
@@ -1212,6 +1293,7 @@ dpif_netlink_port_destroy(const char *name, const char 
*type)
 case OVS_VPORT_TYPE_GRE:
 return netdev_gre_destroy(name);
 case OVS_VPORT_TYPE_GENEVE:
+return netdev_geneve_destroy(name);
 case OVS_VPORT_TYPE_NETDEV:
 case OVS_VPORT_TYPE_INTERNAL:
 case OVS_VPORT_TYPE_LISP:
-- 
2.5.5

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 3/5] dpif-netlink: add VXLAN creation support

2016-06-08 Thread Thadeu Lima de Souza Cascardo
Creates VXLAN devices using rtnetlink and tunnel metadata. If the kernel does
not support tunnel metadata, it will return EINVAL because of the missing VNI
attribute.

This was tested on kernels 4.2.3, 4.3.6, 4.4.9, 4.5.5 and RHEL-based 3.10. All
of them worked with the system traffic test "datapath - ping over vxlan tunnel".

Signed-off-by: Thadeu Lima de Souza Cascardo 
---
 lib/dpif-netlink.c | 119 -
 1 file changed, 118 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index b624118..2845268 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -24,7 +24,9 @@
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -931,6 +933,119 @@ dpif_netlink_port_add_compat(struct dpif_netlink *dpif, 
struct netdev *netdev,
 
 }
 
+#ifdef __linux__
+
+static int
+netdev_linux_destroy(const char *name)
+{
+int err;
+struct ofpbuf request, *reply;
+
+ofpbuf_init(, 0);
+nl_msg_put_nlmsghdr(, 0, RTM_DELLINK,
+NLM_F_REQUEST | NLM_F_ACK);
+ofpbuf_put_zeros(, sizeof(struct ifinfomsg));
+nl_msg_put_string(, IFLA_IFNAME, name);
+
+err = nl_transact(NETLINK_ROUTE, , );
+
+if (!err) {
+ofpbuf_uninit(reply);
+}
+
+ofpbuf_uninit();
+return err;
+}
+
+static int
+netdev_vxlan_destroy(const char *name)
+{
+return netdev_linux_destroy(name);
+}
+
+/*
+ * On some older systems, these enums are not defined.
+ */
+
+#ifndef IFLA_VXLAN_MAX
+#define IFLA_VXLAN_MAX 0
+#define IFLA_VXLAN_PORT 15
+#endif
+#if IFLA_VXLAN_MAX < 20
+#define IFLA_VXLAN_UDP_ZERO_CSUM6_RX 20
+#define IFLA_VXLAN_GBP 23
+#define IFLA_VXLAN_COLLECT_METADATA 25
+#endif
+
+static int
+netdev_vxlan_create(struct netdev *netdev)
+{
+int err;
+struct ofpbuf request, *reply;
+size_t linkinfo_off, infodata_off;
+char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
+const char *name = netdev_vport_get_dpif_port(netdev,
+  namebuf, sizeof namebuf);
+struct ifinfomsg *ifinfo;
+const struct netdev_tunnel_config *tnl_cfg;
+tnl_cfg = netdev_get_tunnel_config(netdev);
+if (!tnl_cfg) { /* or assert? */
+return EINVAL;
+}
+
+ofpbuf_init(, 0);
+nl_msg_put_nlmsghdr(, 0, RTM_NEWLINK,
+NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE);
+ifinfo = ofpbuf_put_zeros(, sizeof(struct ifinfomsg));
+ifinfo->ifi_change = ifinfo->ifi_flags = IFF_UP;
+nl_msg_put_string(, IFLA_IFNAME, name);
+nl_msg_put_u32(, IFLA_MTU, UINT16_MAX);
+linkinfo_off = nl_msg_start_nested(, IFLA_LINKINFO);
+nl_msg_put_string(, IFLA_INFO_KIND, "vxlan");
+infodata_off = nl_msg_start_nested(, IFLA_INFO_DATA);
+nl_msg_put_u8(, IFLA_VXLAN_COLLECT_METADATA, 1);
+nl_msg_put_u8(, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 1);
+if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP)) {
+nl_msg_put_flag(, IFLA_VXLAN_GBP);
+}
+nl_msg_put_be16(, IFLA_VXLAN_PORT, tnl_cfg->dst_port);
+nl_msg_end_nested(, infodata_off);
+nl_msg_end_nested(, linkinfo_off);
+
+err = nl_transact(NETLINK_ROUTE, , );
+
+if (!err) {
+ofpbuf_uninit(reply);
+}
+
+/*
+ * Linux versions older than 4.3 will return EINVAL in case the VID is not
+ * set, which is sufficient to verify COLLECT_METADATA is supported.
+ */
+if (err == EINVAL) {
+err = EOPNOTSUPP;
+}
+
+ofpbuf_uninit();
+return err;
+}
+
+#else
+
+static int
+netdev_vxlan_create(struct netdev *netdev OVS_UNUSED)
+{
+return EOPNOTSUPP;
+}
+
+static int
+netdev_vxlan_destroy(const char *name OVS_UNUSED)
+{
+return EOPNOTSUPP;
+}
+
+#endif
+
 static int
 dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
   const char *port_name, struct dpif_port *dpif_port);
@@ -940,6 +1055,7 @@ dpif_netlink_port_create(struct netdev *netdev)
 {
 switch (netdev_to_ovs_vport_type(netdev_get_type(netdev))) {
 case OVS_VPORT_TYPE_VXLAN:
+return netdev_vxlan_create(netdev);
 case OVS_VPORT_TYPE_GRE:
 case OVS_VPORT_TYPE_GENEVE:
 case OVS_VPORT_TYPE_NETDEV:
@@ -955,10 +1071,11 @@ dpif_netlink_port_create(struct netdev *netdev)
 }
 
 static int
-dpif_netlink_port_destroy(const char *name OVS_UNUSED, const char *type)
+dpif_netlink_port_destroy(const char *name, const char *type)
 {
 switch (netdev_to_ovs_vport_type(type)) {
 case OVS_VPORT_TYPE_VXLAN:
+return netdev_vxlan_destroy(name);
 case OVS_VPORT_TYPE_GRE:
 case OVS_VPORT_TYPE_GENEVE:
 case OVS_VPORT_TYPE_NETDEV:
-- 
2.5.5

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 2/5] dpif-netlink: break out code to add compat and non-compat vports

2016-06-08 Thread Thadeu Lima de Souza Cascardo
The vport type for adding tunnels is now compatibility code and any new features
from tunnels must configure the tunnel as an interface using the tunnel metadata
support.

In order to be able to add those tunnels, we need to add code to create the
tunnels and add them as NETDEV vports. And when there is no support to create
them, we need to use the compatibility code and add them as tunnel vports.

When removing those tunnels, we need to remove the interfaces as well, and
detecting the right type might be important, at least to distinguish the tunnel
vports that we should remove and the interfaces that we shouldn't.

Signed-off-by: Thadeu Lima de Souza Cascardo 
---
 lib/dpif-netlink.c | 195 +++--
 1 file changed, 144 insertions(+), 51 deletions(-)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 9bff3a8..b624118 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -780,10 +780,8 @@ get_vport_type(const struct dpif_netlink_vport *vport)
 }
 
 static enum ovs_vport_type
-netdev_to_ovs_vport_type(const struct netdev *netdev)
+netdev_to_ovs_vport_type(const char *type)
 {
-const char *type = netdev_get_type(netdev);
-
 if (!strcmp(type, "tap") || !strcmp(type, "system")) {
 return OVS_VPORT_TYPE_NETDEV;
 } else if (!strcmp(type, "internal")) {
@@ -804,19 +802,14 @@ netdev_to_ovs_vport_type(const struct netdev *netdev)
 }
 
 static int
-dpif_netlink_port_add__(struct dpif_netlink *dpif, struct netdev *netdev,
+dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
+enum ovs_vport_type type,
+struct ofpbuf *options,
 odp_port_t *port_nop)
 OVS_REQ_WRLOCK(dpif->upcall_lock)
 {
-const struct netdev_tunnel_config *tnl_cfg;
-char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
-const char *name = netdev_vport_get_dpif_port(netdev,
-  namebuf, sizeof namebuf);
-const char *type = netdev_get_type(netdev);
 struct dpif_netlink_vport request, reply;
 struct ofpbuf *buf;
-uint64_t options_stub[64 / 8];
-struct ofpbuf options;
 struct nl_sock **socksp = NULL;
 uint32_t *upcall_pids;
 int error = 0;
@@ -831,52 +824,19 @@ dpif_netlink_port_add__(struct dpif_netlink *dpif, struct 
netdev *netdev,
 dpif_netlink_vport_init();
 request.cmd = OVS_VPORT_CMD_NEW;
 request.dp_ifindex = dpif->dp_ifindex;
-request.type = netdev_to_ovs_vport_type(netdev);
-if (request.type == OVS_VPORT_TYPE_UNSPEC) {
-VLOG_WARN_RL(_rl, "%s: cannot create port `%s' because it has "
- "unsupported type `%s'",
- dpif_name(>dpif), name, type);
-vport_del_socksp(dpif, socksp);
-return EINVAL;
-}
+request.type = type;
 request.name = name;
 
-if (request.type == OVS_VPORT_TYPE_NETDEV) {
-#ifdef _WIN32
-/* XXX : Map appropiate Windows handle */
-#else
-netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false);
-#endif
-}
-
-tnl_cfg = netdev_get_tunnel_config(netdev);
-if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
-ofpbuf_use_stack(, options_stub, sizeof options_stub);
-if (tnl_cfg->dst_port) {
-nl_msg_put_u16(, OVS_TUNNEL_ATTR_DST_PORT,
-   ntohs(tnl_cfg->dst_port));
-}
-if (tnl_cfg->exts) {
-size_t ext_ofs;
-int i;
-
-ext_ofs = nl_msg_start_nested(, OVS_TUNNEL_ATTR_EXTENSION);
-for (i = 0; i < 32; i++) {
-if (tnl_cfg->exts & (1 << i)) {
-nl_msg_put_flag(, i);
-}
-}
-nl_msg_end_nested(, ext_ofs);
-}
-request.options = options.data;
-request.options_len = options.size;
-}
-
 request.port_no = *port_nop;
 upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
 request.n_upcall_pids = socksp ? dpif->n_handlers : 1;
 request.upcall_pids = upcall_pids;
 
+if (options) {
+request.options = options->data;
+request.options_len = options->size;
+}
+
 error = dpif_netlink_vport_transact(, , );
 if (!error) {
 *port_nop = reply.port_no;
@@ -916,6 +876,127 @@ exit:
 }
 
 static int
+dpif_netlink_port_add_compat(struct dpif_netlink *dpif, struct netdev *netdev,
+ odp_port_t *port_nop)
+OVS_REQ_WRLOCK(dpif->upcall_lock)
+{
+const struct netdev_tunnel_config *tnl_cfg;
+char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
+const char *name = netdev_vport_get_dpif_port(netdev,
+  namebuf, sizeof namebuf);
+const char *type = netdev_get_type(netdev);
+uint64_t options_stub[64 / 8];
+struct ofpbuf options;
+enum ovs_vport_type ovs_type;
+
+ovs_type = 

[ovs-dev] [PATCH v2 1/5] netdev: get device type from vport prefix if not found

2016-06-08 Thread Thadeu Lima de Souza Cascardo
If we cannot find the device type because it's not opened yet, check if it uses
a reserved prefix for a vport type and return that type.

Since these names are reserved, we can assume this is the right type.

This is important when we are querying the datapath right after vswitch has
started and using the right type will be even more important when we add support
to creating tunnel ports with rtnetlink.

Signed-off-by: Thadeu Lima de Souza Cascardo 
---
 lib/netdev.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/lib/netdev.c b/lib/netdev.c
index 4be806d..2109818 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -281,6 +281,21 @@ netdev_enumerate_types(struct sset *types)
 }
 }
 
+static const char *
+netdev_vport_type_from_name(const char *name)
+{
+struct netdev_registered_class *rc;
+const char *type;
+CMAP_FOR_EACH (rc, cmap_node, _classes) {
+const char *dpif_port = netdev_vport_class_get_dpif_port(rc->class);
+if (dpif_port && !strncmp(name, dpif_port, strlen(dpif_port))) {
+type = rc->class->type;
+return type;
+}
+}
+return NULL;
+}
+
 /* Check that the network device name is not the same as any of the registered
  * vport providers' dpif_port name (dpif_port is NULL if the vport provider
  * does not define it) or the datapath internal port name (e.g. ovs-system).
@@ -1762,6 +1777,9 @@ netdev_get_type_from_name(const char *name)
 struct netdev *dev = netdev_from_name(name);
 const char *type = dev ? netdev_get_type(dev) : NULL;
 netdev_close(dev);
+if (type == NULL) {
+return netdev_vport_type_from_name(name);
+}
 return type;
 }
 
-- 
2.5.5

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 0/5] create tunnel devices using rtnetlink interface

2016-06-08 Thread Thadeu Lima de Souza Cascardo
This series adds support for the creation of tunnels using the rtnetlink
interface. This will open the possibility for new features and flags on those
vports without the need to change vport compatibility code.

Support for STT and LISP have not been added because these are not upstream yet,
so we don't know how the interface will be like upstream. And there are no
features in the current drivers right now we could make use of.

We are able to set the MTU to UINT16_MAX since it is not restricted by the
driver during newlink.

Thadeu Lima de Souza Cascardo (5):
  netdev: get device type from vport prefix if not found
  dpif-netlink: break out code to add compat and non-compat vports
  dpif-netlink: add VXLAN creation support
  dpif-netlink: add GRE creation support
  dpif-netlink: add GENEVE creation support

 lib/dpif-netlink.c | 526 -
 lib/netdev.c   |  18 ++
 2 files changed, 494 insertions(+), 50 deletions(-)

-- 
2.5.5

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] sparse: Fix conflict between netinet/in.h and linux/in.h

2016-06-08 Thread Joe Stringer
On 1 June 2016 at 19:23, Daniele Di Proietto  wrote:
> linux/in.h (from linux uapi headers) carries many of the same
> definitions as netinet/in.h (from glibc).
>
> If linux/in.h is included after netinet/in.h, conflicts are avoided in
> two ways:
>
> 1) linux/libc-compat.h (included by linux/in.h) detects the include
>guard of netinet/in.h and defines some macros (e.g.
>__UAPI_DEF_IN_IPPROTO) to 0.  linux/in.h avoids exporting the same
>enums if those macros are 0.
>
> 2) The two files are allowed to redefine the same macros as long as the
>values are the same.
>
> Our include/sparse/netinet/in.h creates problems, because:
>
> 1) It uses a custom include guard
> 2) It uses dummy values for some macros.
>
> This commit changes include/sparse/netinet/in.h to use the same include
> guard as glibc netinet/in.h, and to use the same values for some macros.
>
> I think this problem is present with linux headers after
> a263653ed798("netfilter: don't pull include/linux/netfilter.h from netns
> headers") which cause our lib/netlink-conntrack.c to include linux/in.h
> after netinet/in.h.
>
> sample output from sparse:
>
> /usr/include/linux/in.h:29:9: warning: preprocessor token IPPROTO_IP
> redefined
> ../include/sparse/netinet/in.h:60:9: this was the original definition
> /usr/include/linux/in.h:31:9: warning: preprocessor token IPPROTO_ICMP
> redefined
> ../include/sparse/netinet/in.h:63:9: this was the original definition
> [...]
> /usr/include/linux/in.h:28:3: error: bad enum definition
> /usr/include/linux/in.h:28:3: error: Expected } at end of specifier
> /usr/include/linux/in.h:28:3: error: got 0
> /usr/include/linux/in.h:84:16: error: redefinition of struct in_addr
>
> Signed-off-by: Daniele Di Proietto 

I was noticing these issues too, thanks for fixing them.

Tested-by: Joe Stringer 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH 3/3] JSON serialization via Python's json lib

2016-06-08 Thread Terry Wilson
There is no particularly good reason to use our own Python JSON
serialization implementation when serialization can be done faster
with Python's built-in JSON library.

A few tests were changed due to Python's default JSON library
returning slightly more precise floating point numbers.

Signed-off-by: Terry Wilson 
---
 configure.ac   |   2 +
 m4/openvswitch.m4  |  47 
 python/automake.mk |  22 +++
 python/ovs/json.py | 106 +
 tests/json.at  |  26 ++---
 5 files changed, 101 insertions(+), 102 deletions(-)

diff --git a/configure.ac b/configure.ac
index 05d80d5..5472a52 100644
--- a/configure.ac
+++ b/configure.ac
@@ -96,6 +96,8 @@ OVS_CHECK_LIBCAPNG
 OVS_CHECK_LOGDIR
 OVS_CHECK_PYTHON
 OVS_CHECK_PYTHON3
+OVS_CHECK_PYTHON_HEADERS
+OVS_CHECK_PYTHON3_HEADERS
 OVS_CHECK_FLAKE8
 OVS_CHECK_DOT
 OVS_CHECK_IF_PACKET
diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4
index a448223..5b81154 100644
--- a/m4/openvswitch.m4
+++ b/m4/openvswitch.m4
@@ -589,3 +589,50 @@ AC_DEFUN([OVS_CHECK_PRAGMA_MESSAGE],
  [AC_DEFINE(HAVE_PRAGMA_MESSAGE,1,[Define if compiler supports #pragma
  message directive])])
   ])
+
+dnl OVS_CHECK_PYTHON_HEADERS
+AC_DEFUN([OVS_CHECK_PYTHON_HEADERS],
+  [AC_REQUIRE([OVS_CHECK_PYTHON])
+AC_PATH_PROG([PYTHON_CONFIG], python-config, no)
+if test "$PYTHON_CONFIG" != no; then
+  PYTHON_INCLUDES=`$PYTHON_CONFIG --includes`
+  PYTHON_LIBS=`$PYTHON_CONFIG --libs`
+  PYTHON_LDFLAGS=`$PYTHON_CONFIG --ldflags`
+  save_LIBS="$LIBS"
+  save_CPPFLAGS="$CPPFLAGS"
+  save_LDFLAGS="$LDFLAGS"
+  LIBS="$PYTHON_LIBS"
+  LDFLAGS="$PYTHON_LDFLAGS"
+  CPPFLAGS="$PYTHON_INCLUDES"
+  AC_LINK_IFELSE(
+[AC_LANG_PROGRAM([#include ],[])],
+ [have_py_headers=true])
+  LIBS="$save_LIBS"
+  CPPFLAGS="$save_CPPFLAGS"
+  LDFLAGS="$save_LDFLAGS"
+fi
+   AM_CONDITIONAL([HAVE_PYTHON_HEADERS], [test "$have_py_headers" = "true"])])
+  ])
+
+AC_DEFUN([OVS_CHECK_PYTHON3_HEADERS],
+  [AC_REQUIRE([OVS_CHECK_PYTHON3])
+AC_PATH_PROG([PYTHON3_CONFIG], python3-config, no)
+if test "$PYTHON3_CONFIG" != no; then
+  PYTHON3_INCLUDES=`$PYTHON3_CONFIG --includes`
+  PYTHON3_LIBS=`$PYTHON3_CONFIG --libs`
+  PYTHON3_LDFLAGS=`$PYTHON3_CONFIG --ldflags`
+  save_LIBS="$LIBS"
+  save_CPPFLAGS="$CPPFLAGS"
+  save_LDFLAGS="$LDFLAGS"
+  LIBS="$PYTHON3_LIBS"
+  LDFLAGS="$PYTHON3_LDFLAGS"
+  CPPFLAGS="$PYTHON3_INCLUDES"
+  AC_LINK_IFELSE(
+[AC_LANG_PROGRAM([#include ],[])],
+ [have_py_headers=true])
+  LIBS="$save_LIBS"
+  CPPFLAGS="$save_CPPFLAGS"
+  LDFLAGS="$save_LDFLAGS"
+fi
+   AM_CONDITIONAL([HAVE_PYTHON3_HEADERS], [test "$have_py_headers" = "true"])])
+  ])
diff --git a/python/automake.mk b/python/automake.mk
index ecad39d..9ae2487 100644
--- a/python/automake.mk
+++ b/python/automake.mk
@@ -112,3 +112,25 @@ $(srcdir)/python/ovs/dirs.py: python/ovs/dirs.py.template
< $? > $@.tmp && \
mv $@.tmp $@
 EXTRA_DIST += python/ovs/dirs.py.template
+
+.PHONY : clean_python_extensions
+clean_python_extensions:
+   (cd python/ && rm -f ovs/*.so)
+
+if HAVE_PYTHON_HEADERS
+$(srcdir)/python/ovs/_json.so:
+   (cd python/ && $(PYTHON) setup.py build_ext --inplace)
+
+ALL_LOCAL += $(srcdir)/python/ovs/_json.so
+endif
+
+if HAVE_PYTHON3_HEADERS
+PY3_EXT_NAME=$(srcdir)/python/ovs/_json$(shell $(PYTHON3) -c \
+"from distutils import 
sysconfig;print(sysconfig.get_config_var('EXT_SUFFIX'))")
+$(PY3_EXT_NAME):
+   (cd python/ && $(PYTHON3) setup.py build_ext --inplace)
+
+ALL_LOCAL += $(PY3_EXT_NAME)
+endif
+
+CLEAN_LOCAL += clean_python_extensions
diff --git a/python/ovs/json.py b/python/ovs/json.py
index ea0400a..ddf5dd2 100644
--- a/python/ovs/json.py
+++ b/python/ovs/json.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import absolute_import
+import functools
+import json
 import re
 import sys
 
 import six
-from six.moves import range
 
 try:
 import ovs._json
@@ -25,112 +27,24 @@ except ImportError:
 
 __pychecker__ = 'no-stringiter'
 
-escapes = {ord('"'): u"\\\"",
-   ord("\\"): u"",
-   ord("\b"): u"\\b",
-   ord("\f"): u"\\f",
-   ord("\n"): u"\\n",
-   ord("\r"): u"\\r",
-   ord("\t"): u"\\t"}
-for esc in range(32):
-if esc not in escapes:
-escapes[esc] = u"\\u%04x" % esc
-
 SPACES_PER_LEVEL = 2
-
-
-class _Serializer(object):
-def __init__(self, stream, pretty, sort_keys):
-self.stream = stream
-self.pretty = pretty
-self.sort_keys = sort_keys
-self.depth = 0
-
-def __serialize_string(self, s):
-self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
-
-

[ovs-dev] [PATCH 2/3] Add optional C extension wrapper for Python JSON parsing

2016-06-08 Thread Terry Wilson
The pure Python in-tree JSON parser is *much* slower than the
in-tree C JSON parser. A local test parsing a 100Mb JSON file
showed the Python version taking 270 seconds. With the C wrapper,
it took under 4 seconds.

The C extension will be used automatically if it can be built. If
the extension fails to build, a warning is displayed and the build
is restarted without the extension.

The Serializer class is replaced with Python's built-in
JSON library since the ability to process chunked data is not
needed in that case.

The extension should work with both Python 2.7 and Python 3.3+.

Signed-off-by: Terry Wilson 
---
 Makefile.am|   2 +-
 python/automake.mk |   3 +
 python/ovs/_json.c | 268 +
 python/ovs/json.py |  11 +++
 python/setup.py|  51 +-
 5 files changed, 332 insertions(+), 3 deletions(-)
 create mode 100644 python/ovs/_json.c

diff --git a/Makefile.am b/Makefile.am
index 69dbe3d..8cb8523 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -251,7 +251,7 @@ config-h-check:
@cd $(srcdir); \
if test -e .git && (git --version) >/dev/null 2>&1 && \
   git --no-pager grep -L '#include ' `git ls-files | grep 
'\.c$$' | \
-   grep -vE 
'^datapath|^lib/sflow|^third-party|^datapath-windows'`; \
+   grep -vE 
'^datapath|^lib/sflow|^third-party|^datapath-windows|^python'`; \
then \
echo "See above for list of violations of the rule that"; \
echo "every C source file must #include ."; \
diff --git a/python/automake.mk b/python/automake.mk
index dc62422..ecad39d 100644
--- a/python/automake.mk
+++ b/python/automake.mk
@@ -46,6 +46,9 @@ EXTRA_DIST += \
python/README.rst \
python/setup.py
 
+# C extension support.
+EXTRA_DIST += python/ovs/_json.c
+
 PYFILES = $(ovs_pyfiles) python/ovs/dirs.py $(ovstest_pyfiles)
 EXTRA_DIST += $(PYFILES)
 PYCOV_CLEAN_FILES += $(PYFILES:.py=.py,cover)
diff --git a/python/ovs/_json.c b/python/ovs/_json.c
new file mode 100644
index 000..c4e2af3
--- /dev/null
+++ b/python/ovs/_json.c
@@ -0,0 +1,268 @@
+#include "Python.h"
+#include 
+#include "structmember.h"
+
+#if PY_MAJOR_VERSION >= 3
+#define IS_PY3K
+#endif
+
+typedef struct {
+PyObject_HEAD
+struct json_parser *_parser;
+} json_ParserObject;
+
+static void
+Parser_dealloc(json_ParserObject * p)
+{
+json_parser_abort(p->_parser);
+Py_TYPE(p)->tp_free(p);
+}
+
+static PyObject *
+Parser_new(PyTypeObject * type, PyObject * args, PyObject * kwargs)
+{
+json_ParserObject *self;
+static char *kwlist[] = { "check_trailer", NULL };
+PyObject *check_trailer = NULL;
+int ct_int = 0;
+
+if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O", kwlist,
+ _trailer)) {
+return NULL;
+}
+
+if (check_trailer != NULL) {
+ct_int = PyObject_IsTrue(check_trailer);
+if (ct_int < 0) {
+return NULL;
+} else if (ct_int) {
+ct_int = JSPF_TRAILER;
+}
+}
+
+self = (json_ParserObject *) type->tp_alloc(type, 0);
+if (self != NULL) {
+self->_parser = json_parser_create(ct_int);
+}
+
+return (PyObject *) self;
+}
+
+static PyObject *
+Parser_feed(json_ParserObject * self, PyObject * args)
+{
+Py_ssize_t input_sz;
+PyObject *input;
+size_t rd;
+char *input_str;
+
+if (self->_parser == NULL) {
+return NULL;
+}
+
+if (!PyArg_UnpackTuple(args, "input", 1, 1, )) {
+return NULL;
+}
+#ifdef IS_PY3K
+if ((input_str = PyUnicode_AsUTF8AndSize(input, _sz)) == NULL) {
+#else
+if (PyString_AsStringAndSize(input, _str, _sz) < 0) {
+#endif
+return NULL;
+}
+
+rd = json_parser_feed(self->_parser, input_str, (size_t) input_sz);
+
+#ifdef IS_PY3K
+return PyLong_FromSize_t(rd);
+#else
+return PyInt_FromSize_t(rd);
+#endif
+}
+
+static PyObject *
+Parser_is_done(json_ParserObject * self)
+{
+if (self->_parser == NULL) {
+return NULL;
+}
+return PyBool_FromLong(json_parser_is_done(self->_parser));
+}
+
+static PyObject *
+json_to_python(struct json *json)
+{
+switch (json->type) {
+case JSON_NULL:
+Py_RETURN_NONE;
+case JSON_FALSE:
+Py_RETURN_FALSE;
+case JSON_TRUE:
+Py_RETURN_TRUE;
+case JSON_OBJECT:{
+struct shash_node *node;
+PyObject *dict = PyDict_New();
+
+if (dict == NULL) {
+return PyErr_NoMemory();
+}
+SHASH_FOR_EACH(node, json->u.object) {
+PyObject *key = PyUnicode_FromString(node->name);
+PyObject *val = json_to_python(node->data);
+
+if (!(key && val) || PyDict_SetItem(dict, key, val)) {
+Py_XDECREF(key);
+Py_XDECREF(val);
+Py_XDECREF(dict);
+return NULL;
+

[ovs-dev] [PATCH 1/3] Ensure significand remains an integer in Python3 json parser

2016-06-08 Thread Terry Wilson
The / operation in Python 2 is "floor division" for int/long types
while in Python 3 is "true division". This means that the
significand can become a float with the existing code in Python 3.
This, in turn, can result in a parse of something like [1.10e1]
returning 11 in Python 2 and 11.0 in Python 3. Switching to the
// operator resolves this difference.

The JSON tests do not catch this difference because the built-in
serializer prints floats with the %.15g format which will convert
floats with no fractional part to an integer representation.

Signed-off-by: Terry Wilson 
---
 python/ovs/json.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/ovs/json.py b/python/ovs/json.py
index 42e697d..ff986ea 100644
--- a/python/ovs/json.py
+++ b/python/ovs/json.py
@@ -280,7 +280,7 @@ class Parser(object):
 significand *= 10
 pow10 -= 1
 while pow10 < 0 and significand % 10 == 0:
-significand /= 10
+significand //= 10
 pow10 += 1
 if (pow10 == 0 and
 ((not sign and significand < 2 ** 63) or
-- 
1.8.3.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v4 0/3] Python JSON improvements

2016-06-08 Thread Terry Wilson
This patch set adds an optional C extension wrapper for the
Python JSON parser. In local tests, it sped up parsing a 100Mb
JSON file by over 70x. It also switches to using the built-in
Python JSON serializer and fixes a small difference between
Python 2 and 3 when parsing numbers.

Hopefully, the build system changes are resolved in this
version.

Terry Wilson (3):
  Ensure significand remains an integer in Python3 json parser
  Add optional C extension wrapper for Python JSON parsing
  JSON serialization via Python's json lib

 Makefile.am|   2 +-
 configure.ac   |   2 +
 m4/openvswitch.m4  |  47 ++
 python/automake.mk |  25 +
 python/ovs/_json.c | 268 +
 python/ovs/json.py | 119 +---
 python/setup.py|  51 +-
 tests/json.at  |  26 --
 8 files changed, 434 insertions(+), 106 deletions(-)
 create mode 100644 python/ovs/_json.c

-- 
1.8.3.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] ofproto-dpif-upcall: prevent memory leak on log message

2016-06-08 Thread Thadeu Lima de Souza Cascardo
On Wed, Jun 08, 2016 at 09:58:51AM -0700, Joe Stringer wrote:
> On 8 June 2016 at 09:04, Thadeu Lima de Souza Cascardo
>  wrote:
> > When DPIF does not support UFID (like old kernels), it may print this 
> > message
> > quite frequently, if using an OVS version that does not include the 
> > upstream fix
> > af50de8 ("ofproto-dpif-upcall: Pass key to dpif_flow_get().").
> >
> > Signed-off-by: Thadeu Lima de Souza Cascardo 
> > Fixes: af50de8 ("ofproto-dpif-upcall: Pass key to dpif_flow_get().")
> 
> Thanks for the fix.
> 
> It seems to me that af50de8 ("ofproto-dpif-upcall: Pass key to
> dpif_flow_get().") makes this problem less likely, while 64bb477f0568
> ("dpif: Minimize memory copy for revalidation.") is the commit which
> actually introduced the bug. I updated the "Fixes:" tag and used this
> commit to determine which branches the fix should apply to.
> 
> Applied to master, branch-2.5 and branch-2.4.

Yes, I meant 64bb477f0568. My bad while copy and pasting the commit reference.

Thanks.
Cascardo.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] Get Back On Track With Our Breakthrough Solution

2016-06-08 Thread dev
Are you tired of the excess weight and would like to get back on track without 
any painful efforts?

Brand-new formula and advanced structure of our exclusive product will 
compliment your results
and will help you to lose weight with no side-effects.


Click Here!

Stock is limited, make sure to get yours in order to have a beautiful and 
healthy body!
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] Question about SHA1 usage in OVSDB Server

2016-06-08 Thread Rodriguez Betancourt, Esteban
Hello,
I was performing some profiling on OVSDB, when inserting a lot of rows. The 
callgrind
results shows that the SHA1 calculation takes near 10% of the time within our 
test
(the whole file writing, including SHA1, takes like 20%).

We want to know further about why that checksum is need, in order to decide how
it could be optimized (we are considering: using processor specific 
instructions, changing
the algorithm or removing it at all).

By the way, I would want to know if there are any updates on the review of the 
IDL Compound
Indexes (pull request #127).

Thanks,
Esteban
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] ovs-numa: Fix a compilation error

2016-06-08 Thread Daniele Di Proietto
Acked-by: Daniele Di Proietto 

2016-06-07 23:19 GMT-07:00 YAMAMOTO Takashi :

> Fix the following error on NetBSD 7.0.
>
> ../lib/ovs-numa.c: In function 'ovs_numa_set_cpu_mask':
> ../lib/ovs-numa.c:555:9: error: array subscript has type 'char'
> [-Werror=char-subscripts]
>
> Signed-off-by: YAMAMOTO Takashi 
> ---
>  lib/ovs-numa.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
> index 6750a14..7652636 100644
> --- a/lib/ovs-numa.c
> +++ b/lib/ovs-numa.c
> @@ -552,7 +552,7 @@ ovs_numa_set_cpu_mask(const char *cmask)
>  }
>
>  for (i = strlen(cmask) - 1; i >= 0; i--) {
> -char hex = toupper(cmask[i]);
> +char hex = toupper((unsigned char)cmask[i]);
>  int bin, j;
>
>  if (hex >= '0' && hex <= '9') {
> --
> 2.5.4 (Apple Git-61)
>
> ___
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] ofproto-dpif-upcall: prevent memory leak on log message

2016-06-08 Thread Joe Stringer
On 8 June 2016 at 09:04, Thadeu Lima de Souza Cascardo
 wrote:
> When DPIF does not support UFID (like old kernels), it may print this message
> quite frequently, if using an OVS version that does not include the upstream 
> fix
> af50de8 ("ofproto-dpif-upcall: Pass key to dpif_flow_get().").
>
> Signed-off-by: Thadeu Lima de Souza Cascardo 
> Fixes: af50de8 ("ofproto-dpif-upcall: Pass key to dpif_flow_get().")

Thanks for the fix.

It seems to me that af50de8 ("ofproto-dpif-upcall: Pass key to
dpif_flow_get().") makes this problem less likely, while 64bb477f0568
("dpif: Minimize memory copy for revalidation.") is the commit which
actually introduced the bug. I updated the "Fixes:" tag and used this
commit to determine which branches the fix should apply to.

Applied to master, branch-2.5 and branch-2.4.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] WIP OVN ND for Logical_Port

2016-06-08 Thread Justin Pettit

> On Jun 8, 2016, at 1:12 AM, Zong Kai LI  wrote:
> 
> I know Justin is working on implement router patch port for IPv6 switch, but 
> I'm not sure whether will Justin also work on implement ND or not.

Yes, I'm planning to add support for neighbor discovery and router 
advertisements as part of IPv6 support.

--Justin


___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] OpenStack Proposal: Integration with OVN L3 Gateway

2016-06-08 Thread Guru Shetty
On 8 June 2016 at 09:16, Amitabha Biswas  wrote:

> Hi Brian,
>
> The each gateway router only has a single transit network attached to it,
> and each gateway router to connected to the common provider/external
> network as well. We can probably do away with the transit network if we use
> router pairing (at a later point).
>

The transit network would be needed with router peering too. (Router
peering will only reduce a logical switch creation in OVN NB database apart
from internal implementation details around logical datapaths).

>
>
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] OpenStack Proposal: Integration with OVN L3 Gateway

2016-06-08 Thread Amitabha Biswas
Here is the proposal in etherpad, I hope it makes it more readable -

https://etherpad.openstack.org/p/Integration_with_OVN_L3_Gateway 


Thanks
Amitabha

> On Jun 8, 2016, at 5:59 AM, Kyle Mestery  wrote:
> 
> On Tue, Jun 7, 2016 at 3:50 PM, Amitabha Biswas  wrote:
>> This proposal outlines the modifications needed in networking-ovn (addresses 
>> https://bugs.launchpad.net/networking-ovn/+bug/1551717 
>> ) to provide 
>> Floating IP (FIP) and SNAT using the L3 gateway router patches listed below:
>> 
>> http://patchwork.ozlabs.org/patch/624312/ 
>> 
>> http://patchwork.ozlabs.org/patch/624313/ 
>> 
>> http://patchwork.ozlabs.org/patch/624314/ 
>> 
>> http://patchwork.ozlabs.org/patch/624315/ 
>> 
>> http://patchwork.ozlabs.org/patch/629607/ 
>> 
>> 
>> Diagram:
>> 
>> +---+   +---+
>> | NET 1 |   | NET 2 |
>> +---+   +---+
>>   |   |
>>   |   *   |
>>   | ** ** |
>>   |   ***  * **   |
>>   +---RP1 *  DR   * RP2 --+
>>   ***  * **
>> ** **
>>   *
>>  DTRP (168.254.128.2)
>>   |
>>   |
>>   |
>>   +--+
>>   | Transit Network  |
>>   | 169.254.128.0/30 |
>>   +--+
>>   |
>>   |
>>   |
>>   |
>>  GTRP (169.254.128.1)
>>***
>>  **   **
>>**   *   *   ** +--+
>>* GW  *-| Provider Network |
>>**   *   *   ** +--+
>>  **   **
>>***
>> 
>> New Entities:
>> 
>> OVN Join/Transit Networks
>> One per Neutron Router - /30 address space with only 2 ports for e.g. 
>> 169.254.128.0/30
>> Created when an external gateway is added to a router.
>> One extra datapath per router with an External Gateway.
>> (Alternate option - One Transit Network in a deployment, IPAM becomes a 
>> headache - Not discussed here).
>> Prevent Neutron from using that /30 address space. Specify in networking-ovn 
>> con file.
>> Create 1 new “Join” neutron network (to represent all Join OVN Networks) in 
>> the networking-ovn.
>> Note that it may be possible to replace the Join/Transit network using 
>> Router Peering in later versions  (not discussed here).
>> Allocate 2 ports in the Join network in the networking-ovn plugin.
>> Logical Gateway Transit Router Port (gtrp), 169.254.128.1
>> Logical Distributed Transit Router Port (dtrp), 169.254.128.2
>> Note that Neutron only sees 1 Join network with 2 ports; OVN sees a replica 
>> of this Join network as a new Logical Switch for each Gateway Router. The 
>> mapping of OVN Logical Switch(es) Join(s) to Gateway Router is discussed in 
>> OVN (Default) Gateway Routers below.
>> Note that the MAC addresses of lgrp and lip will be the same on each OVN 
>> Join Network, but because they are in different branches of the network 
>> topology it doesn’t matter.
>> OVN (Default) Gateway Routers:
>> One per Neutron Router.
>> 2 ports
>> Logical Gateway Transit Router Port (gtrp), 169.254.128.1 (same for each OVN 
>> Join network).
>> External/Provider Router Port (legwrp), this is allocated by neutron.
>> Scheduling - The current OVN gateway proposal relies on the CMS/nbctl to 
>> decide on which hypervisor (HV) to schedule a particular gateway router.
>> A setting on the chassis (new external_id key or a new column) that allows 
>> the hypervisor admin to specify that a chassis can or cannot be used to host 
>> a gateway router (similar to a network node in OpenStack). Default - Allow 
>> (for compatibility purposes).
>> The networking-ovn plugin picks up the list of “candidate” chassis from the 
>> Southbound DB and uses an existing scheduling algorithm
>> Use a simple random.choice i.e. ChanceScheduler (Version 1)
>> Tap into the neutron’s LeastRouterScheduler - but that requires the 
>> networking-ovn (or some a hacked up version of the L3 agent) to imitate the 
>> L3 agent running on various network nodes.
>> Populate the SNAT and DNAT columns in the logical router table. This is 
>> under review in OVS - 
>> http://openvswitch.org/pipermail/dev/2016-June/072169.html 
>> 
>> Create static routing entry in the gateway router to route tenant bound 

Re: [ovs-dev] OpenStack Proposal: Integration with OVN L3 Gateway

2016-06-08 Thread Amitabha Biswas
Hi Brian,

The each gateway router only has a single transit network attached to it, and 
each gateway router to connected to the common provider/external network as 
well. We can probably do away with the transit network if we use router pairing 
(at a later point).

I will change the transit network from /30 to /31 and not allow broadcast.

My email client mangled up the earlier email, I have the proposal also written 
up in etherpad:

https://etherpad.openstack.org/p/Integration_with_OVN_L3_Gateway 
.

Amitabha

> On Jun 8, 2016, at 7:18 AM, Brian Haley  wrote:
> 
> On 06/07/2016 04:50 PM, Amitabha Biswas wrote:
>> This proposal outlines the modifications needed in networking-ovn (addresses 
>> https://bugs.launchpad.net/networking-ovn/+bug/1551717 
>> ) to provide 
>> Floating IP (FIP) and SNAT using the L3 gateway router patches listed below:
>> 
>> http://patchwork.ozlabs.org/patch/624312/ 
>> 
>> http://patchwork.ozlabs.org/patch/624313/ 
>> 
>> http://patchwork.ozlabs.org/patch/624314/ 
>> 
>> http://patchwork.ozlabs.org/patch/624315/ 
>> 
>> http://patchwork.ozlabs.org/patch/629607/ 
>> 
>> 
>> Diagram:
>> 
>> +---+   +---+
>> | NET 1 |   | NET 2 |
>> +---+   +---+
>>|   |
>>|   *   |
>>| ** ** |
>>|   ***  * **   |
>>+---RP1 *  DR   * RP2 --+
>>***  * **
>>  ** **
>>*
>>   DTRP (168.254.128.2)
>>|
>>|
>>|
>>+--+
>>| Transit Network  |
>>| 169.254.128.0/30 |
>>+--+
>>|
>>|
>>|
>>|
>>   GTRP (169.254.128.1)
>> ***
>>   **   **
>> **   *   *   ** +--+
>> * GW  *-| Provider Network |
>> **   *   *   ** +--+
>>   **   **
>> ***
>> 
>> New Entities:
>> 
>> OVN Join/Transit Networks
>> One per Neutron Router - /30 address space with only 2 ports for e.g. 
>> 169.254.128.0/30
>> Created when an external gateway is added to a router.
> 
> Hi Amitabha,
> 
> If this is just a point-to-point link you can use /31, the neutron DVR code 
> does this for inter-namespace links between elements.  You just need to be 
> careful and not specify a broadcast address when adding the IP.  I'm assuming 
> the GW in this case has multiple transit networks attached?
> 
> -Brian
> 
> 
>> One extra datapath per router with an External Gateway.
>> (Alternate option - One Transit Network in a deployment, IPAM becomes a 
>> headache - Not discussed here).
>> Prevent Neutron from using that /30 address space. Specify in networking-ovn 
>> con file.
>> Create 1 new “Join” neutron network (to represent all Join OVN Networks) in 
>> the networking-ovn.
>> Note that it may be possible to replace the Join/Transit network using 
>> Router Peering in later versions  (not discussed here).
>> Allocate 2 ports in the Join network in the networking-ovn plugin.
>> Logical Gateway Transit Router Port (gtrp), 169.254.128.1
>> Logical Distributed Transit Router Port (dtrp), 169.254.128.2
>> Note that Neutron only sees 1 Join network with 2 ports; OVN sees a replica 
>> of this Join network as a new Logical Switch for each Gateway Router. The 
>> mapping of OVN Logical Switch(es) Join(s) to Gateway Router is discussed in 
>> OVN (Default) Gateway Routers below.
>> Note that the MAC addresses of lgrp and lip will be the same on each OVN 
>> Join Network, but because they are in different branches of the network 
>> topology it doesn’t matter.
>> OVN (Default) Gateway Routers:
>> One per Neutron Router.
>> 2 ports
>> Logical Gateway Transit Router Port (gtrp), 169.254.128.1 (same for each OVN 
>> Join network).
>> External/Provider Router Port (legwrp), this is allocated by neutron.
>> Scheduling - The current OVN gateway proposal relies on the CMS/nbctl to 
>> decide on which hypervisor (HV) to schedule a particular gateway router.
>> A setting on the chassis (new external_id key or a new column) that allows 
>> the hypervisor admin to specify that a chassis can or cannot be used to host 
>> a gateway router (similar to a network node in OpenStack). Default - Allow 
>> (for compatibility purposes).

[ovs-dev] [PATCH] ofproto-dpif-upcall: prevent memory leak on log message

2016-06-08 Thread Thadeu Lima de Souza Cascardo
When DPIF does not support UFID (like old kernels), it may print this message
quite frequently, if using an OVS version that does not include the upstream fix
af50de8 ("ofproto-dpif-upcall: Pass key to dpif_flow_get().").

Signed-off-by: Thadeu Lima de Souza Cascardo 
Fixes: af50de8 ("ofproto-dpif-upcall: Pass key to dpif_flow_get().")
---
 ofproto/ofproto-dpif-upcall.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 1374950..a18fc5a 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -2056,6 +2056,7 @@ log_unexpected_flow(const struct dpif_flow *flow, int 
error)
   "unexpected flow (%s): ", ovs_strerror(error));
 odp_format_ufid(>ufid, );
 VLOG_WARN_RL(, "%s", ds_cstr());
+ds_destroy();
 }
 
 static void
-- 
2.5.5

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] VxLAN-gpe implementation

2016-06-08 Thread Alexander Duyck
On Wed, Jun 8, 2016 at 7:48 AM, Hannes Frederic Sowa  wrote:
> On 08.06.2016 14:51, Jiri Benc wrote:
>> On Mon, 6 Jun 2016 14:22:58 -0700, Jesse Gross wrote:
>>> On Sat, Jun 4, 2016 at 6:39 AM, Yi Yang  wrote:
>>> [...]
  datapath/vport-netdev.c   |   3 +-
  datapath/vport-vxlan.c|  17 ++-
>>>
>>> These changes aren't upstream yet. Please do that before backporting them 
>>> here.
>>>
>>> However, the changes to vport-vxlan.c are modifying compatibility code
>>> that shouldn't be extended further. Instead, just use the existing
>>> VXLAN netlink interfaces that have already been created to enable
>>> these features.
>>>
>>> There is also a number of other patches to the OVS kernel module/VXLAN
>>> that have not been backported. Pravin started doing this work but it
>>> hasn't been applied yet. In general, I think it makes sense to
>>> backport patches in order so that the diffs of the patches match those
>>> of upstream.
>>>
>>> Finally, I have a question about receive side offloading with
>>> VXLAN-gpe. This is primarily an upstream issue but is present in the
>>> code being backported here as well. The VXLAN code sets up receive
>>> offloads for all ports regardless of whether they are classic VXLAN or
>>> L2/L3 GPE and expects NICs to parse the packets. I don't think this is
>>> safe because there are a number of NICs out there that predate the
>>> existence of GPE and therefore won't do this parsing correctly. I
>>> think that it is necessary to disable receive offloading for
>>> non-Ethernet VXLAN-GPE unless the offloading interface is extended.
>>
>> Coincidentally, I was talking about this with Hannes a few days ago.
>> I'm adding him to CC.
>>
>> I guess you're referring to ndo_add_vxlan_port, right? I agree that
>> this interface needs changes, especially considering that we know
>> whether the UDP port belongs to VXLAN or VXLAN-GPE. But from my
>> understanding of how drivers use this callback, the worst thing that
>> could happen is suboptimal generation of rx hashes and thus steering
>> the packets to a different receive queue than in the optimal case.
>> Surely something to fix but it seems it won't cause much functional
>> troubles with the current code?
>
> I am not sure if we must upgrade the interface. Can't drivers always
> configure vxlan-gpe and are always backwards compatible?
>
> Non vxlan-gpe capable hardware would have to abort checksum offloading
> as soon as they can't interpret the vxlan header anyway, so the packets
> end up on the slow path and nothing bad should happen.
>
> Possibly some hardware will verify inner checksums despite it could not
> understand the vxlan header completely. In this case we probably will
> drop the packet in the driver. Anyway, I would be in favor to simply
> present one knob, namely vxlan-offloading, to the user, instead a knob
> for each version of vxlan.
>
> Unfortunately I couldn't get a definitive answer from the specs
> regarding the checksuming details.
>
> Bye,
> Hannes

This is starting to sound like the same conversation we had on netdev
when the ndo_add_geneve_port was added.  One easy fix for guaranteeing
that we can perform the checksum offload is to just enable the outer
UDP checksum.  Then we can still perform GRO and use the outer UDP
source port for generating a hash.  If possible we should make this
the default for all new UDP based tunnels going forward simply because
it allows for backwards-compatibility with existing offloads.

Really I wonder if we shouldn't just start pulling out all the support
for the ndo_add_vxlan/geneve_port code anyway, or at least start
forcing it to include more data such as an endpoint IP/IPv6 address
and tunnel type.  There end up being a number of cases where the
offload could end up asserting itself where it isn't actually wanted
such as a mixed environment where the PF is setting up the offloads
via ndo_add_vxlan_port and then trying to apply that to VFs or other
PFs on the same device which aren't operating in the same network
namespace.  The worst case ends up being that you have UDP checksum
offloads stripped for flows that would have otherwise been reporting
valid checksum because the interfaces think they have tunnels with an
invalid inner checksum, or not L4 checksum at all.

- Alex
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] at test vlog: Switch from stderr to log

2016-06-08 Thread Paul Boca

Acked-by: Paul-Daniel Boca 
Tested-by: Paul-Daniel Boca 

> -Original Message-
> From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Alin Serdean
> Sent: Wednesday, June 8, 2016 5:02 PM
> To: dev@openvswitch.org
> Subject: [ovs-dev] [PATCH] at test vlog: Switch from stderr to log
> 
> Using the --detach parameter the child does not propagate the first
> message to the parent.
> 
> Proposed change use the log file instead of the stderr.
> 
> Signed-off-by: Alin Gabriel Serdean 
> ---
>  tests/vlog.at | 10 ++
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/tests/vlog.at b/tests/vlog.at
> index b793611..b96f394 100644
> --- a/tests/vlog.at
> +++ b/tests/vlog.at
> @@ -278,8 +278,9 @@ on_exit 'kill `cat test-unixctl.pid`'
>  AT_CAPTURE_FILE([log])
>  AT_CAPTURE_FILE([log.old])
>  AT_CHECK([ovstest test-unixctl --log-file=`pwd`/log --pidfile --detach],
> -  [0], [], [stderr])
> -AT_CHECK([vlog_filt stderr], [0], [opened log file
> +  [0], [], [ignore])
> +AT_CHECK([vlog_filt log], [0], [opened log file
> +Entering run loop.
>  ])
> 
>  AT_CHECK([APPCTL -t test-unixctl log message])
> @@ -355,8 +356,9 @@ on_exit 'kill `cat test-unixctl.pid`'
> 
>  AT_CAPTURE_FILE([log])
>  AT_CHECK([ovstest test-unixctl --log-file=`pwd`/log --pidfile --detach],
> -  [0], [], [stderr])
> -AT_CHECK([vlog_filt stderr], [0], [opened log file
> +  [0], [], [ignore])
> +AT_CHECK([vlog_filt log], [0], [opened log file
> +Entering run loop.
>  ])
> 
>  AT_CHECK([APPCTL -t test-unixctl vlog/list | sed -n '1,2p
> --
> 1.9.5.msysgit.0
> ___
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] netdev-dpdk: NUMA Aware vHost User

2016-06-08 Thread Loftus, Ciara
> Thanks for the patch!
> I'm not sure how to best handle the libnuma dependency. Question:
> Is it still useful to move the device to a PMD thread on the appropriate
> numa socket, even if DPDK is compiled without
> CONFIG_RTE_LIBRTE_VHOST_NUMA? If it's useful, I'm fine with the
> approach followed by this patch.  Otherwise I think we should handle
> the -lnuma inclusion like -lfuse for CUSE and introduce two ifdefs (one
> on #include  and one on new_device()).
> Small comments inline, otherwise this looks good to me.
> Thanks,
> Daniele

Hi Daniele,

Thanks for the feedback. I'll address your comments in the next revision.
Regarding your question above - as it is now, the PMD will only be relocated if 
the config option is enabled.
If the config option is not enabled it behaves as before, so the case you 
mention above will not occur.

Thanks,
Ciara

> 
> 2016-05-24 6:15 GMT-07:00 Ciara Loftus :
> This commit allows for vHost User memory from QEMU, DPDK and OVS, as
> well as the servicing PMD, to all come from the same socket.
> 
> The socket id of a vhost-user port used to be set to that of the master
> lcore. Now it is possible to update the socket id if it is detected
> (during VM boot) that the vhost device memory is not on this node. If
> this is the case, a new mempool is created from the new node, and the
> PMD thread currently servicing the port will no longer, in favour of a
> thread from the new node (if enabled in the pmd-cpu-mask).
> 
> To avail of this functionality, one must enable the
> CONFIG_RTE_LIBRTE_VHOST_NUMA DPDK configuration option.
> 
> Signed-off-by: Ciara Loftus 
> ---
>  .travis.yml                     |  3 +++
>  INSTALL.DPDK.md                 |  8 ++--
>  NEWS                            |  3 +++
>  acinclude.m4                    |  2 +-
>  lib/netdev-dpdk.c               | 37 ++--
> -
>  rhel/openvswitch-fedora.spec.in |  1 +
>  6 files changed, 48 insertions(+), 6 deletions(-)
> 
> diff --git a/.travis.yml b/.travis.yml
> index ee2cf21..faba325 100644
> --- a/.travis.yml
> +++ b/.travis.yml
> @@ -11,10 +11,13 @@ addons:
>      packages:
>        - bc
>        - gcc-multilib
> +      - libnuma1
> 
> I think libnuma-dev depends on libnuma1, so the above line might not be
> necessary.
> 
> +      - libnuma-dev
>        - libssl-dev
>        - llvm-dev
>        - libjemalloc1
>        - libjemalloc-dev
> +      - numactl
> 
> Do we need the numactl package?
> 
> 
>  before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
> 
> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
> index 93f92e4..bbe0234 100644
> --- a/INSTALL.DPDK.md
> +++ b/INSTALL.DPDK.md
> @@ -16,7 +16,7 @@ OVS needs a system with 1GB hugepages support.
>  Building and Installing:
>  
> 
> -Required: DPDK 16.04
> +Required: DPDK 16.04, libnuma
>  Optional (if building with vhost-cuse): `fuse`, `fuse-devel` (`libfuse-dev`
>  on Debian/Ubuntu)
> 
> @@ -443,7 +443,11 @@ Performance Tuning:
> 
>         It is good practice to ensure that threads that are in the datapath 
> are
>         pinned to cores in the same NUMA area. e.g. pmd threads and QEMU
> vCPUs
> -       responsible for forwarding.
> +       responsible for forwarding. If DPDK is built with
> +       CONFIG_RTE_LIBRTE_VHOST_NUMA=y, vHost User ports automatically
> +       detect the NUMA socket of the QEMU vCPUs and will be serviced by a
> PMD
> +       from the same node provided a core on this node is enabled in the
> +       pmd-cpu-mask.
> 
>    9. Rx Mergeable buffers
> 
> diff --git a/NEWS b/NEWS
> index 4e81cad..24ca39f 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -32,6 +32,9 @@ Post-v2.5.0
>       * DB entries have been added for many of the DPDK EAL command line
>         arguments. Additional arguments can be passed via the dpdk-extra
>         entry.
> +     * PMD threads servicing vHost User ports can now come from the NUMA
> +       node that device memory is located on if
> CONFIG_RTE_LIBRTE_VHOST_NUMA
> +       is enabled in DPDK.
>     - ovs-benchmark: This utility has been removed due to lack of use and
>       bitrot.
>     - ovs-appctl:
> diff --git a/acinclude.m4 b/acinclude.m4
> index f3de855..99ddf04 100644
> --- a/acinclude.m4
> +++ b/acinclude.m4
> @@ -218,7 +218,7 @@ AC_DEFUN([OVS_CHECK_DPDK], [
>      DPDKLIB_FOUND=false
>      save_LIBS=$LIBS
>      for extras in "" "-ldl"; do
> -        LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB"
> +        LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB -lnuma"
>          AC_LINK_IFELSE(
>             [AC_LANG_PROGRAM([#include 
>                               #include ],
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 0d1b8c9..ad6c4bb 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -30,6 +30,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include "dirs.h"
>  #include "dp-packet.h"
> @@ -378,6 +379,9 @@ struct netdev_dpdk {
>       * 

Re: [ovs-dev] [PATCH] at tests: Allow Python tests to be run on Windows

2016-06-08 Thread Ben Pfaff
Our usual practice would be to fix the test failures before enabling the
tests; this could for example be the final patch in a series that fixes
them.

On Wed, Jun 08, 2016 at 03:31:06PM +, Alin Serdean wrote:
> ERROR: 225 tests were run,
> 59 failed (1 expected failure).
> 226 tests were skipped.
> 
> Result given by running only the python tests.
> 
> Paul will add more patches on top of this one to fix the failures.
> 
> I can send this patch later on, after all his patches get integrated on top 
> of master.
> 
> Alin.
> 
> > -Mesaj original-
> > De la: Ben Pfaff [mailto:b...@ovn.org]
> > Trimis: Wednesday, June 8, 2016 6:07 PM
> > Către: Alin Serdean 
> > Cc: dev@openvswitch.org
> > Subiect: Re: [ovs-dev] [PATCH] at tests: Allow Python tests to be run on
> > Windows
> > 
> > On Wed, Jun 08, 2016 at 02:16:28PM +, Alin Serdean wrote:
> > > This patch removes the code which disables Python tests to be run on
> > > Windows.
> > >
> > > Signed-off-by: Alin Gabriel Serdean 
> > 
> > Do the Python tests pass on Windows now, then?
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] at tests: Allow Python tests to be run on Windows

2016-06-08 Thread Alin Serdean
ERROR: 225 tests were run,
59 failed (1 expected failure).
226 tests were skipped.

Result given by running only the python tests.

Paul will add more patches on top of this one to fix the failures.

I can send this patch later on, after all his patches get integrated on top of 
master.

Alin.

> -Mesaj original-
> De la: Ben Pfaff [mailto:b...@ovn.org]
> Trimis: Wednesday, June 8, 2016 6:07 PM
> Către: Alin Serdean 
> Cc: dev@openvswitch.org
> Subiect: Re: [ovs-dev] [PATCH] at tests: Allow Python tests to be run on
> Windows
> 
> On Wed, Jun 08, 2016 at 02:16:28PM +, Alin Serdean wrote:
> > This patch removes the code which disables Python tests to be run on
> > Windows.
> >
> > Signed-off-by: Alin Gabriel Serdean 
> 
> Do the Python tests pass on Windows now, then?
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] at tests: Allow Python tests to be run on Windows

2016-06-08 Thread Ben Pfaff
On Wed, Jun 08, 2016 at 02:16:28PM +, Alin Serdean wrote:
> This patch removes the code which disables Python tests to be run on
> Windows.
> 
> Signed-off-by: Alin Gabriel Serdean 

Do the Python tests pass on Windows now, then?
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] VxLAN-gpe implementation

2016-06-08 Thread Hannes Frederic Sowa
On 08.06.2016 14:51, Jiri Benc wrote:
> On Mon, 6 Jun 2016 14:22:58 -0700, Jesse Gross wrote:
>> On Sat, Jun 4, 2016 at 6:39 AM, Yi Yang  wrote:
>> [...]
>>>  datapath/vport-netdev.c   |   3 +-
>>>  datapath/vport-vxlan.c|  17 ++-
>>
>> These changes aren't upstream yet. Please do that before backporting them 
>> here.
>>
>> However, the changes to vport-vxlan.c are modifying compatibility code
>> that shouldn't be extended further. Instead, just use the existing
>> VXLAN netlink interfaces that have already been created to enable
>> these features.
>>
>> There is also a number of other patches to the OVS kernel module/VXLAN
>> that have not been backported. Pravin started doing this work but it
>> hasn't been applied yet. In general, I think it makes sense to
>> backport patches in order so that the diffs of the patches match those
>> of upstream.
>>
>> Finally, I have a question about receive side offloading with
>> VXLAN-gpe. This is primarily an upstream issue but is present in the
>> code being backported here as well. The VXLAN code sets up receive
>> offloads for all ports regardless of whether they are classic VXLAN or
>> L2/L3 GPE and expects NICs to parse the packets. I don't think this is
>> safe because there are a number of NICs out there that predate the
>> existence of GPE and therefore won't do this parsing correctly. I
>> think that it is necessary to disable receive offloading for
>> non-Ethernet VXLAN-GPE unless the offloading interface is extended.
> 
> Coincidentally, I was talking about this with Hannes a few days ago.
> I'm adding him to CC.
> 
> I guess you're referring to ndo_add_vxlan_port, right? I agree that
> this interface needs changes, especially considering that we know
> whether the UDP port belongs to VXLAN or VXLAN-GPE. But from my
> understanding of how drivers use this callback, the worst thing that
> could happen is suboptimal generation of rx hashes and thus steering
> the packets to a different receive queue than in the optimal case.
> Surely something to fix but it seems it won't cause much functional
> troubles with the current code?

I am not sure if we must upgrade the interface. Can't drivers always
configure vxlan-gpe and are always backwards compatible?

Non vxlan-gpe capable hardware would have to abort checksum offloading
as soon as they can't interpret the vxlan header anyway, so the packets
end up on the slow path and nothing bad should happen.

Possibly some hardware will verify inner checksums despite it could not
understand the vxlan header completely. In this case we probably will
drop the packet in the driver. Anyway, I would be in favor to simply
present one knob, namely vxlan-offloading, to the user, instead a knob
for each version of vxlan.

Unfortunately I couldn't get a definitive answer from the specs
regarding the checksuming details.

Bye,
Hannes

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v6 2/2] doc: Refactor DPDK install guide, add ADVANCED doc

2016-06-08 Thread Bodireddy, Bhanuprakash
Thanks Mauricio for your comments. Comments inline.

>+### 3.7 Compiler Optimizations
>+
>+  The default compiler optimization level is '-O2'. Changing this to
>+  more aggressive compiler optimization such as '-O3 -march=native'
>+  with gcc(verified on 5.3.1) can produce performance gains though not
>+  siginificant. '-march=native' will produce optimized code on local machine
>+  and should be used when SW compilation is done on Testbed.
>+
>+##  4. Performance Tuning
>+
>
>Reading section 3 I feel all the setting are oriented to the performance, then
>what is the difference between section 3 and 4?

Section 3  talks about BIOS settings, PCIe slot selection, core isolation, 
NUMA. I have recommended best known configuration but this can vary between 
server platforms and can be ignored in few cases. Section 4 talks about pmd, 
qemu threads affinity, MQ, EMC, mrg_rxbuf which are quite generic ones and have 
to be tuned by users looking for extra performance. 
As you pointed both sections3, 4 are all about achieving optimum performance 
with OVS DPDK.

>+### 4.1 Affinity
>+
>+For superior performance, DPDK pmd threads and Qemu vCPU threads
>+needs to be affinitized accordingly.
>+
>+  * PMD thread Affinity
>+
>+    A poll mode driver (pmd) thread handles the I/O of all DPDK
>+    interfaces assigned to it. A pmd thread shall poll the ports
>+    for incoming packets, switch the packets and send to tx port.
>+    pmd thread is CPU bound, and needs to be affinitized to isolated
>+    cores for optimum performance.
>+
>+    By setting a bit in the mask, a pmd thread is created and pinned
>+    to the corresponding CPU core. e.g. to run a pmd thread on core 2
>+
>+    `ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=4`
>+
>+    Note: pmd thread on a NUMA node is only created if there is
>+    at least one DPDK interface from that NUMA node added to OVS.
>+
>+  * Qemu vCPU thread Affinity
>+
>+    A VM performing simple packet forwarding or running complex packet
>+    pipelines has to ensure that the vCPU threads performing the work has
>+    as much CPU occupancy as possible.
>+
>+    Example: On a multicore VM, multiple QEMU vCPU threads shall be
>spawned.
>+    when the DPDK 'testpmd' application that does packet forwarding
>+    is invoked, 'taskset' cmd should be used to affinitize the vCPU threads
>+    to the dedicated isolated cores on the host system.
>+
>+### 4.2 Multiple poll mode driver threads
>+

Regards,
Bhanu Prakash.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] OpenStack Proposal: Integration with OVN L3 Gateway

2016-06-08 Thread Brian Haley

On 06/07/2016 04:50 PM, Amitabha Biswas wrote:

This proposal outlines the modifications needed in networking-ovn (addresses 
https://bugs.launchpad.net/networking-ovn/+bug/1551717 
) to provide Floating 
IP (FIP) and SNAT using the L3 gateway router patches listed below:

http://patchwork.ozlabs.org/patch/624312/ 

http://patchwork.ozlabs.org/patch/624313/ 

http://patchwork.ozlabs.org/patch/624314/ 

http://patchwork.ozlabs.org/patch/624315/ 

http://patchwork.ozlabs.org/patch/629607/ 


Diagram:

+---+   +---+
| NET 1 |   | NET 2 |
+---+   +---+
|   |
|   *   |
| ** ** |
|   ***  * **   |
+---RP1 *  DR   * RP2 --+
***  * **
  ** **
*
   DTRP (168.254.128.2)
|
|
|
+--+
| Transit Network  |
| 169.254.128.0/30 |
+--+
|
|
|
|
   GTRP (169.254.128.1)
 ***
   **   **
 **   *   *   ** +--+
 * GW  *-| Provider Network |
 **   *   *   ** +--+
   **   **
 ***

New Entities:

OVN Join/Transit Networks
One per Neutron Router - /30 address space with only 2 ports for e.g. 
169.254.128.0/30
Created when an external gateway is added to a router.


Hi Amitabha,

If this is just a point-to-point link you can use /31, the neutron DVR code does 
this for inter-namespace links between elements.  You just need to be careful 
and not specify a broadcast address when adding the IP.  I'm assuming the GW in 
this case has multiple transit networks attached?


-Brian



One extra datapath per router with an External Gateway.
(Alternate option - One Transit Network in a deployment, IPAM becomes a 
headache - Not discussed here).
Prevent Neutron from using that /30 address space. Specify in networking-ovn 
con file.
Create 1 new “Join” neutron network (to represent all Join OVN Networks) in the 
networking-ovn.
Note that it may be possible to replace the Join/Transit network using Router 
Peering in later versions  (not discussed here).
Allocate 2 ports in the Join network in the networking-ovn plugin.
Logical Gateway Transit Router Port (gtrp), 169.254.128.1
Logical Distributed Transit Router Port (dtrp), 169.254.128.2
Note that Neutron only sees 1 Join network with 2 ports; OVN sees a replica of 
this Join network as a new Logical Switch for each Gateway Router. The mapping 
of OVN Logical Switch(es) Join(s) to Gateway Router is discussed in OVN 
(Default) Gateway Routers below.
Note that the MAC addresses of lgrp and lip will be the same on each OVN Join 
Network, but because they are in different branches of the network topology it 
doesn’t matter.
OVN (Default) Gateway Routers:
One per Neutron Router.
2 ports
Logical Gateway Transit Router Port (gtrp), 169.254.128.1 (same for each OVN 
Join network).
External/Provider Router Port (legwrp), this is allocated by neutron.
Scheduling - The current OVN gateway proposal relies on the CMS/nbctl to decide 
on which hypervisor (HV) to schedule a particular gateway router.
A setting on the chassis (new external_id key or a new column) that allows the 
hypervisor admin to specify that a chassis can or cannot be used to host a 
gateway router (similar to a network node in OpenStack). Default - Allow (for 
compatibility purposes).
The networking-ovn plugin picks up the list of “candidate” chassis from the 
Southbound DB and uses an existing scheduling algorithm
Use a simple random.choice i.e. ChanceScheduler (Version 1)
Tap into the neutron’s LeastRouterScheduler - but that requires the 
networking-ovn (or some a hacked up version of the L3 agent) to imitate the L3 
agent running on various network nodes.
Populate the SNAT and DNAT columns in the logical router table. This is under review 
in OVS - http://openvswitch.org/pipermail/dev/2016-June/072169.html 

Create static routing entry in the gateway router to route tenant bound traffic 
to the distributed logical router.ar gate

Existing Entities:

Distributed Logical Routers:
Set the default gateway of the distributed logical router to the IP Address of 
the corresponding Logical Gateway Transit Router Port 

[ovs-dev] [PATCH] at tests: Allow Python tests to be run on Windows

2016-06-08 Thread Alin Serdean
This patch removes the code which disables Python tests to be run on
Windows.

Signed-off-by: Alin Gabriel Serdean 
---
 tests/atlocal.in | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/tests/atlocal.in b/tests/atlocal.in
index f174061..410199f 100644
--- a/tests/atlocal.in
+++ b/tests/atlocal.in
@@ -109,13 +109,6 @@ else
 HAVE_IPV6=no
 fi
 
-# XXX: Disable Python related tests on Windows because Open vSwitch code
-# written in Python has not been ported to the Windows platform. We will
-# need to remove the next block after porting is complete.
-if test "$IS_WIN32" = "yes"; then
-HAVE_PYTHON="no"
-fi
-
 if test "$HAVE_PYTHON" = "yes" \
&& test "x`$PYTHON $abs_top_srcdir/tests/test-l7.py --help | grep 'ftp'`" 
!= x; then
 HAVE_PYFTPDLIB="yes"
-- 
1.9.5.msysgit.0
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH] at test vlog: Switch from stderr to log

2016-06-08 Thread Alin Serdean
Using the --detach parameter the child does not propagate the first
message to the parent.

Proposed change use the log file instead of the stderr.

Signed-off-by: Alin Gabriel Serdean 
---
 tests/vlog.at | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/vlog.at b/tests/vlog.at
index b793611..b96f394 100644
--- a/tests/vlog.at
+++ b/tests/vlog.at
@@ -278,8 +278,9 @@ on_exit 'kill `cat test-unixctl.pid`'
 AT_CAPTURE_FILE([log])
 AT_CAPTURE_FILE([log.old])
 AT_CHECK([ovstest test-unixctl --log-file=`pwd`/log --pidfile --detach],
-  [0], [], [stderr])
-AT_CHECK([vlog_filt stderr], [0], [opened log file
+  [0], [], [ignore])
+AT_CHECK([vlog_filt log], [0], [opened log file
+Entering run loop.
 ])
 
 AT_CHECK([APPCTL -t test-unixctl log message])
@@ -355,8 +356,9 @@ on_exit 'kill `cat test-unixctl.pid`'
 
 AT_CAPTURE_FILE([log])
 AT_CHECK([ovstest test-unixctl --log-file=`pwd`/log --pidfile --detach],
-  [0], [], [stderr])
-AT_CHECK([vlog_filt stderr], [0], [opened log file
+  [0], [], [ignore])
+AT_CHECK([vlog_filt log], [0], [opened log file
+Entering run loop.
 ])
 
 AT_CHECK([APPCTL -t test-unixctl vlog/list | sed -n '1,2p
-- 
1.9.5.msysgit.0
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v3] datapath-windows: Add GRE checksum

2016-06-08 Thread Alin Serdean
This patch introduces GRE checksum computation if the userspace requires
it on Tx. On Rx we verify the GRE checksum if the checksum bit was
specified and also inform the userspace about it.

Also fix the GRE header length as specified by the GRE flags not the
tunnel flags.

Signed-off-by: Alin Gabriel Serdean 
---
v3: Fix access length and address comments
v2: Initial commit
---
 datapath-windows/ovsext/Gre.c | 79 ++-
 datapath-windows/ovsext/Gre.h | 16 +
 2 files changed, 65 insertions(+), 30 deletions(-)

diff --git a/datapath-windows/ovsext/Gre.c b/datapath-windows/ovsext/Gre.c
index cb41593..bee3877 100644
--- a/datapath-windows/ovsext/Gre.c
+++ b/datapath-windows/ovsext/Gre.c
@@ -135,7 +135,8 @@ OvsDoEncapGre(POVS_VPORT_ENTRY vport,
 IPHdr *ipHdr;
 PGREHdr greHdr;
 POVS_GRE_VPORT vportGre;
-UINT32 headRoom = GreTunHdrSize(tunKey->flags);
+PCHAR pChk = NULL;
+UINT32 headRoom = GreTunHdrSize(OvsTunnelFlagsToGreFlags(tunKey->flags));
 #if DBG
 UINT32 counterHeadRoom;
 #endif
@@ -259,6 +260,7 @@ OvsDoEncapGre(POVS_VPORT_ENTRY vport,
 
 if (tunKey->flags & OVS_TNL_F_CSUM) {
 RtlZeroMemory(currentOffset, 4);
+pChk = currentOffset;
 currentOffset += 4;
 #if DBG
 counterHeadRoom -= 4;
@@ -275,6 +277,17 @@ OvsDoEncapGre(POVS_VPORT_ENTRY vport,
 #endif
 }
 
+/* Checksum needs to be done after the GRE header has been set */
+if (tunKey->flags & OVS_TNL_F_CSUM) {
+ASSERT(pChk);
+UINT16 chksum =
+CalculateChecksumNB(curNb,
+(UINT16)(NET_BUFFER_DATA_LENGTH(curNb) -
+ sizeof *ipHdr - sizeof *ethHdr),
+sizeof *ipHdr + sizeof *ethHdr);
+RtlCopyMemory(pChk, , 2);
+}
+
 #if DBG
 ASSERT(counterHeadRoom == 0);
 #endif
@@ -299,34 +312,21 @@ OvsDecapGre(POVS_SWITCH_CONTEXT switchContext,
 EthHdr *ethHdr;
 IPHdr *ipHdr;
 GREHdr *greHdr;
-UINT32 tunnelSize = 0, packetLength = 0;
+UINT32 tunnelSize, packetLength;
 UINT32 headRoom = 0;
 PUINT8 bufferStart;
 NDIS_STATUS status;
 
 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
 packetLength = NET_BUFFER_DATA_LENGTH(curNb);
-tunnelSize = GreTunHdrSize(tunKey->flags);
+curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+tunnelSize = GreTunHdrSize(0);
 if (packetLength <= tunnelSize) {
 return NDIS_STATUS_INVALID_LENGTH;
 }
 
-/*
- * Create a copy of the NBL so that we have all the headers in one MDL.
- */
-*newNbl = OvsPartialCopyNBL(switchContext, curNbl,
-tunnelSize + OVS_DEFAULT_COPY_SIZE, 0,
-TRUE /*copy NBL info */);
-
-if (*newNbl == NULL) {
-return NDIS_STATUS_RESOURCES;
-}
-
-curNbl = *newNbl;
-curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
-curMdl = NET_BUFFER_CURRENT_MDL(curNb);
-bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, 
LowPagePriority) +
-  NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+/* Get a contignuous buffer for the maxmimum length of a GRE header */
+bufferStart = NdisGetDataBuffer(curNb, OVS_MAX_GRE_LGTH, NULL, 1, 0);
 if (!bufferStart) {
 status = NDIS_STATUS_RESOURCES;
 goto dropNbl;
@@ -346,16 +346,35 @@ OvsDecapGre(POVS_SWITCH_CONTEXT switchContext,
 greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
 headRoom += sizeof *greHdr;
 
+tunnelSize = GreTunHdrSize(greHdr->flags);
+
+/* Verify the packet length after looking at the GRE flags*/
+if (packetLength <= tunnelSize) {
+return NDIS_STATUS_INVALID_LENGTH;
+}
+
 /* Validate if GRE header protocol type. */
 if (greHdr->protocolType != GRE_NET_TEB) {
-status = STATUS_NDIS_INVALID_PACKET;
-goto dropNbl;
+*newNbl = NULL;
+return STATUS_NDIS_INVALID_PACKET;
 }
 
 PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr;
 
 if (greHdr->flags & GRE_CSUM) {
 tunKey->flags |= OVS_TNL_F_CSUM;
+UINT16 prevChksum = *((UINT16 *)currentOffset);
+RtlZeroMemory(currentOffset, 2);
+UINT16 chksum =
+CalculateChecksumNB(curNb,
+(UINT16)(NET_BUFFER_DATA_LENGTH(curNb) -
+(ipHdr->ihl * 4 + sizeof *ethHdr)),
+ipHdr->ihl * 4 + sizeof *ethHdr);
+if (prevChksum != chksum) {
+*newNbl = NULL;
+return STATUS_NDIS_INVALID_PACKET;
+}
+RtlCopyMemory(currentOffset, , 2);
 currentOffset += 4;
 headRoom += 4;
 }
@@ -369,11 +388,25 @@ OvsDecapGre(POVS_SWITCH_CONTEXT switchContext,
 headRoom += 4;
 }
 
+/*
+ * Create a copy of the NBL so that we have all the headers 

Re: [ovs-dev] [PATCH] [PATCH v3 1/2] ovn-controller: Add 'na' action for ND

2016-06-08 Thread Zong Kai LI
On Wed, Jun 8, 2016 at 6:19 PM, Numan Siddique  wrote:

>
>> --- a/ovn/ovn-sb.xml
>> +++ b/ovn/ovn-sb.xml
>> @@ -985,6 +985,55 @@
>>Prerequisite: ip4
>>  
>>
>> +
>> +  na{A; action; ...
>> };
>> +
>> +
>> +
>> +  
>> +Temporarily replaces the IPv6 packet being processed by an NA
>> +packet and executes each nested action on the NA
>> +packet.  Actions following the na action, if any,
>> apply
>> +to the original, unmodified packet.
>> +  
>> +
>> +  
>> +The NA packet that this action operates on is initialized
>> based on
>> +the IPv6 packet being processed(with userdata), as follows:
>> +  
>> +
>> +  
>> +eth.dst copied from eth.src
>> +eth.src copied from userdata
>> +eth.type = 0x86dd
>> +ip6.dst copied from
>> ip6.src
>> +ip6.src copied from
>> nd.target
>> +icmp6.type = 136 (Neighbor
>> Advertisement)
>> +nd.target unchanged
>> +nd.sll = 00:00:00:00:00:00
>> +nd.sll copied from userdata
>> +  
>> +
>> +  
>> +These are default values that the nested actions will
>> probably want
>> +to change:
>> +  
>> +
>> +  
>> +reg0 = 0x1(Mark as replied by
>> ovn-controller)
>> +outport copied from inport
>> +inport = ""
>> +  
>> +
>> +The ND packet has the same VLAN header, if any, as the IP
>> packet
>> +it replaces.
>> +  
>> +
>> +  
>> +Prerequisite: ndicmp6.type ==
>> 135
>> +  
>> +
>> +
>>
>>
> ​Hi Zong Kai LI,
> I am seeing compilation errors when I apply this patch. Probably some xml
> tag above has a mismatch.
>
> ---
>   File "/usr/lib64/python2.7/xml/dom/expatbuilder.py", line 207, in
> parseFile
> parser.Parse(buffer, 0)
> xml.parsers.expat.ExpatError: mismatched tag: line 1035, column 10
> Makefile:6208: recipe for target 'ovn/ovn-sb.5' failed
>
> ​​
>
>>
>> ___
>> dev mailing list
>> dev@openvswitch.org
>> http://openvswitch.org/mailman/listinfo/dev
>>
>
>
Hi, Numan.
Sorry for that mistake, the correct things should be like:
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index 5189401..857105d 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -1017,7 +1017,7 @@
   
 These are default values that the nested actions will probably
want
 to change:
-  
+  

   
 reg0 = 0x1(Mark as replied by
ovn-controller)
@@ -1025,6 +1025,7 @@
 inport = ""
   

+  
 The ND packet has the same VLAN header, if any, as the IP
packet
 it replaces.
   

And I noticed you add a comment in the other patch: "This patch fails to
apply on the latest master.".
I'm not sure what does that mean? Once things get clear, I will try to
submit a new patch later.

Thanks for your time and have a nice day! :)

Best regards,
Zong Kai, LI
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] OpenStack Proposal: Integration with OVN L3 Gateway

2016-06-08 Thread Kyle Mestery
On Tue, Jun 7, 2016 at 3:50 PM, Amitabha Biswas  wrote:
> This proposal outlines the modifications needed in networking-ovn (addresses 
> https://bugs.launchpad.net/networking-ovn/+bug/1551717 
> ) to provide Floating 
> IP (FIP) and SNAT using the L3 gateway router patches listed below:
>
> http://patchwork.ozlabs.org/patch/624312/ 
> 
> http://patchwork.ozlabs.org/patch/624313/ 
> 
> http://patchwork.ozlabs.org/patch/624314/ 
> 
> http://patchwork.ozlabs.org/patch/624315/ 
> 
> http://patchwork.ozlabs.org/patch/629607/ 
> 
>
> Diagram:
>
> +---+   +---+
> | NET 1 |   | NET 2 |
> +---+   +---+
>|   |
>|   *   |
>| ** ** |
>|   ***  * **   |
>+---RP1 *  DR   * RP2 --+
>***  * **
>  ** **
>*
>   DTRP (168.254.128.2)
>|
>|
>|
>+--+
>| Transit Network  |
>| 169.254.128.0/30 |
>+--+
>|
>|
>|
>|
>   GTRP (169.254.128.1)
> ***
>   **   **
> **   *   *   ** +--+
> * GW  *-| Provider Network |
> **   *   *   ** +--+
>   **   **
> ***
>
> New Entities:
>
> OVN Join/Transit Networks
> One per Neutron Router - /30 address space with only 2 ports for e.g. 
> 169.254.128.0/30
> Created when an external gateway is added to a router.
> One extra datapath per router with an External Gateway.
> (Alternate option - One Transit Network in a deployment, IPAM becomes a 
> headache - Not discussed here).
> Prevent Neutron from using that /30 address space. Specify in networking-ovn 
> con file.
> Create 1 new “Join” neutron network (to represent all Join OVN Networks) in 
> the networking-ovn.
> Note that it may be possible to replace the Join/Transit network using Router 
> Peering in later versions  (not discussed here).
> Allocate 2 ports in the Join network in the networking-ovn plugin.
> Logical Gateway Transit Router Port (gtrp), 169.254.128.1
> Logical Distributed Transit Router Port (dtrp), 169.254.128.2
> Note that Neutron only sees 1 Join network with 2 ports; OVN sees a replica 
> of this Join network as a new Logical Switch for each Gateway Router. The 
> mapping of OVN Logical Switch(es) Join(s) to Gateway Router is discussed in 
> OVN (Default) Gateway Routers below.
> Note that the MAC addresses of lgrp and lip will be the same on each OVN Join 
> Network, but because they are in different branches of the network topology 
> it doesn’t matter.
> OVN (Default) Gateway Routers:
> One per Neutron Router.
> 2 ports
> Logical Gateway Transit Router Port (gtrp), 169.254.128.1 (same for each OVN 
> Join network).
> External/Provider Router Port (legwrp), this is allocated by neutron.
> Scheduling - The current OVN gateway proposal relies on the CMS/nbctl to 
> decide on which hypervisor (HV) to schedule a particular gateway router.
> A setting on the chassis (new external_id key or a new column) that allows 
> the hypervisor admin to specify that a chassis can or cannot be used to host 
> a gateway router (similar to a network node in OpenStack). Default - Allow 
> (for compatibility purposes).
> The networking-ovn plugin picks up the list of “candidate” chassis from the 
> Southbound DB and uses an existing scheduling algorithm
> Use a simple random.choice i.e. ChanceScheduler (Version 1)
> Tap into the neutron’s LeastRouterScheduler - but that requires the 
> networking-ovn (or some a hacked up version of the L3 agent) to imitate the 
> L3 agent running on various network nodes.
> Populate the SNAT and DNAT columns in the logical router table. This is under 
> review in OVS - http://openvswitch.org/pipermail/dev/2016-June/072169.html 
> 
> Create static routing entry in the gateway router to route tenant bound 
> traffic to the distributed logical router.ar gate
>
> Existing Entities:
>
> Distributed Logical Routers:
> Set the default gateway of the distributed logical router to the IP Address 
> of the corresponding Logical Gateway Transit Router Port (169.254.128.1).
>
> It would be good to get some feedback on this strategy. Guru mentioned that 
> he saw a need for ARP 

Re: [ovs-dev] [PATCH] VxLAN-gpe implementation

2016-06-08 Thread Jiri Benc
On Mon, 6 Jun 2016 14:22:58 -0700, Jesse Gross wrote:
> On Sat, Jun 4, 2016 at 6:39 AM, Yi Yang  wrote:
> [...]
> >  datapath/vport-netdev.c   |   3 +-
> >  datapath/vport-vxlan.c|  17 ++-
> 
> These changes aren't upstream yet. Please do that before backporting them 
> here.
> 
> However, the changes to vport-vxlan.c are modifying compatibility code
> that shouldn't be extended further. Instead, just use the existing
> VXLAN netlink interfaces that have already been created to enable
> these features.
> 
> There is also a number of other patches to the OVS kernel module/VXLAN
> that have not been backported. Pravin started doing this work but it
> hasn't been applied yet. In general, I think it makes sense to
> backport patches in order so that the diffs of the patches match those
> of upstream.
> 
> Finally, I have a question about receive side offloading with
> VXLAN-gpe. This is primarily an upstream issue but is present in the
> code being backported here as well. The VXLAN code sets up receive
> offloads for all ports regardless of whether they are classic VXLAN or
> L2/L3 GPE and expects NICs to parse the packets. I don't think this is
> safe because there are a number of NICs out there that predate the
> existence of GPE and therefore won't do this parsing correctly. I
> think that it is necessary to disable receive offloading for
> non-Ethernet VXLAN-GPE unless the offloading interface is extended.

Coincidentally, I was talking about this with Hannes a few days ago.
I'm adding him to CC.

I guess you're referring to ndo_add_vxlan_port, right? I agree that
this interface needs changes, especially considering that we know
whether the UDP port belongs to VXLAN or VXLAN-GPE. But from my
understanding of how drivers use this callback, the worst thing that
could happen is suboptimal generation of rx hashes and thus steering
the packets to a different receive queue than in the optimal case.
Surely something to fix but it seems it won't cause much functional
troubles with the current code?

 Jiri
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


  1   2   >