[ovs-dev] [PATCH v3] openvswitch: enable NSH support

2017-08-15 Thread Yi Yang
v2->v3
 - Change OVS_KEY_ATTR_NSH to nested key to handle
   length-fixed attributes and length-variable
   attriubte more flexibly.
 - Remove struct ovs_action_push_nsh completely
 - Add code to handle nested attribute for SET_MASKED
 - Change PUSH_NSH to use the nested OVS_KEY_ATTR_NSH
   to transfer NSH header data.
 - Fix comments and coding style issues by Jiri and Eric

v1->v2
 - Change encap_nsh and decap_nsh to push_nsh and pop_nsh
 - Dynamically allocate struct ovs_action_push_nsh for
   length-variable metadata.

OVS master and 2.8 branch has merged NSH userspace
patch series, this patch is to enable NSH support
in kernel data path in order that OVS can support
NSH in 2.8 release in compat mode by porting this.

Signed-off-by: Yi Yang 
---
 drivers/net/vxlan.c  |   7 +
 include/net/nsh.h| 150 +++
 include/uapi/linux/openvswitch.h |  30 
 net/openvswitch/actions.c| 175 ++
 net/openvswitch/flow.c   |  39 +
 net/openvswitch/flow.h   |  11 ++
 net/openvswitch/flow_netlink.c   | 304 ++-
 net/openvswitch/flow_netlink.h   |   4 +
 8 files changed, 719 insertions(+), 1 deletion(-)
 create mode 100644 include/net/nsh.h

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index dbca067..843714c 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include 
@@ -1267,6 +1268,9 @@ static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
case VXLAN_GPE_NP_IPV6:
*protocol = htons(ETH_P_IPV6);
break;
+   case VXLAN_GPE_NP_NSH:
+   *protocol = htons(ETH_P_NSH);
+   break;
case VXLAN_GPE_NP_ETHERNET:
*protocol = htons(ETH_P_TEB);
break;
@@ -1806,6 +1810,9 @@ static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 
vxflags,
case htons(ETH_P_IPV6):
gpe->next_protocol = VXLAN_GPE_NP_IPV6;
return 0;
+   case htons(ETH_P_NSH):
+   gpe->next_protocol = VXLAN_GPE_NP_NSH;
+   return 0;
case htons(ETH_P_TEB):
gpe->next_protocol = VXLAN_GPE_NP_ETHERNET;
return 0;
diff --git a/include/net/nsh.h b/include/net/nsh.h
new file mode 100644
index 000..54f44f6
--- /dev/null
+++ b/include/net/nsh.h
@@ -0,0 +1,150 @@
+#ifndef __NET_NSH_H
+#define __NET_NSH_H 1
+
+
+/*
+ * Network Service Header:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Ver|O|C|R|R|R|R|R|R|Length   |   MD Type   |  Next Proto   |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Service Path ID| Service Index |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   |
+ * ~   Mandatory/Optional Context Header   ~
+ * |   |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * Ver = The version field is used to ensure backward compatibility
+ *   going forward with future NSH updates.  It MUST be set to 0x0
+ *   by the sender, in this first revision of NSH.
+ *
+ * O = OAM. when set to 0x1 indicates that this packet is an operations
+ * and management (OAM) packet.  The receiving SFF and SFs nodes
+ * MUST examine the payload and take appropriate action.
+ *
+ * C = context. Indicates that a critical metadata TLV is present.
+ *
+ * Length : total length, in 4-byte words, of NSH including the Base
+ *  Header, the Service Path Header and the optional variable
+ *  TLVs.
+ * MD Type: indicates the format of NSH beyond the mandatory Base Header
+ *  and the Service Path Header.
+ *
+ * Next Protocol: indicates the protocol type of the original packet. A
+ *  new IANA registry will be created for protocol type.
+ *
+ * Service Path Identifier (SPI): identifies a service path.
+ *  Participating nodes MUST use this identifier for Service
+ *  Function Path selection.
+ *
+ * Service Index (SI): provides location within the SFP.
+ *
+ * [0] https://tools.ietf.org/html/draft-ietf-sfc-nsh-13
+ */
+
+/**
+ * struct nsh_md1_ctx - Keeps track of NSH context data
+ * @nshc<1-4>: NSH Contexts.
+ */
+struct nsh_md1_ctx {
+   __be32 context[4];
+};
+
+struct nsh_md2_tlv {
+   __be16 md_class;
+   u8 type;
+   u8 length;
+   u8 md_value[];
+};
+
+struct nsh_hdr {
+   __be16 ver_flags_len;
+   u8 md_type;
+   u8 next_proto;
+   __be32 path_hdr;
+   union {
+   struct nsh_md1_ctx md1;
+   struct nsh_md2_tlv md2[0];
+   };
+};
+
+#define NSH_M_TYPE2_MAX_LEN 256
+
+struct push_nsh_para {
+   __be16 ver_flags_len;
+ 

Re: [ovs-dev] [PATCHv2 2/2] tests: Comment which netcat version the opts are for.

2017-08-15 Thread Joe Stringer
On 15 August 2017 at 16:45, Flavio Leitner  wrote:
> On Tue, 15 Aug 2017 16:15:55 -0700
> Joe Stringer  wrote:
>
>> Signed-off-by: Joe Stringer 
>> ---
>>  tests/atlocal.in | 2 ++
>>  1 file changed, 2 insertions(+)
>>
>> diff --git a/tests/atlocal.in b/tests/atlocal.in
>> index ba143d30f1d7..7c5e9e3357e5 100644
>> --- a/tests/atlocal.in
>> +++ b/tests/atlocal.in
>> @@ -152,8 +152,10 @@ find_command nc
>>
>>  # Determine correct netcat option to quit on stdin EOF
>>  if nc --version 2>&1 | grep -q nmap.org; then
>> +# Nmap netcat
>>  NC_EOF_OPT="--send-only -w 5"
>>  else
>> +# BSD netcat
>>  NC_EOF_OPT="-q 1 -w 5"
>>  fi
>>
>
> Acked-by: Flavio Leitner 

Thanks, applied to master.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCHv2 1/2] tests: Put maximum timeout on netcat calls.

2017-08-15 Thread Joe Stringer
On 15 August 2017 at 16:45, Flavio Leitner  wrote:
> On Tue, 15 Aug 2017 16:15:54 -0700
> Joe Stringer  wrote:
>
>> This was causing test script execution to hang forever on Ubuntu Zesty.
>> Make sure it times out within 5 seconds, so at least it will fail out
>> properly.
>>
>> Signed-off-by: Joe Stringer 
>> ---
>>  tests/atlocal.in | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/tests/atlocal.in b/tests/atlocal.in
>> index 6a339f8fc312..ba143d30f1d7 100644
>> --- a/tests/atlocal.in
>> +++ b/tests/atlocal.in
>> @@ -152,9 +152,9 @@ find_command nc
>>
>>  # Determine correct netcat option to quit on stdin EOF
>>  if nc --version 2>&1 | grep -q nmap.org; then
>> -NC_EOF_OPT="--send-only"
>> +NC_EOF_OPT="--send-only -w 5"
>>  else
>> -NC_EOF_OPT="-q 1"
>> +NC_EOF_OPT="-q 1 -w 5"
>>  fi
>>
>>  # Set HAVE_TCPDUMP
>
>
> make check-system-userspace TESTSUITEFLAGS="35"
> ## -- ##
> ## openvswitch 2.8.90 test suite. ##
> ## -- ##
>  35: conntrack - ICMP relatedok
>
> ## - ##
> ## Test results. ##
> ## - ##
>
> 1 test was successful.
>
> in parallel:
> ps auwx | grep nc | grep 'send-'
> root  2379  0.0  0.0   9680  2508 pts/25   S+   20:42   0:00 bash -c echo 
> a | nc --send-only -w 5 -u 10.1.1.2 1
>
> Acked-by: Flavio Leitner 

Thanks, applied to master.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCHv2 2/2] tests: Comment which netcat version the opts are for.

2017-08-15 Thread Flavio Leitner
On Tue, 15 Aug 2017 16:15:55 -0700
Joe Stringer  wrote:

> Signed-off-by: Joe Stringer 
> ---
>  tests/atlocal.in | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/tests/atlocal.in b/tests/atlocal.in
> index ba143d30f1d7..7c5e9e3357e5 100644
> --- a/tests/atlocal.in
> +++ b/tests/atlocal.in
> @@ -152,8 +152,10 @@ find_command nc
>  
>  # Determine correct netcat option to quit on stdin EOF
>  if nc --version 2>&1 | grep -q nmap.org; then
> +# Nmap netcat
>  NC_EOF_OPT="--send-only -w 5"
>  else
> +# BSD netcat
>  NC_EOF_OPT="-q 1 -w 5"
>  fi
>  

Acked-by: Flavio Leitner 

-- 
Flavio

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCHv2 1/2] tests: Put maximum timeout on netcat calls.

2017-08-15 Thread Flavio Leitner
On Tue, 15 Aug 2017 16:15:54 -0700
Joe Stringer  wrote:

> This was causing test script execution to hang forever on Ubuntu Zesty.
> Make sure it times out within 5 seconds, so at least it will fail out
> properly.
> 
> Signed-off-by: Joe Stringer 
> ---
>  tests/atlocal.in | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/tests/atlocal.in b/tests/atlocal.in
> index 6a339f8fc312..ba143d30f1d7 100644
> --- a/tests/atlocal.in
> +++ b/tests/atlocal.in
> @@ -152,9 +152,9 @@ find_command nc
>  
>  # Determine correct netcat option to quit on stdin EOF
>  if nc --version 2>&1 | grep -q nmap.org; then
> -NC_EOF_OPT="--send-only"
> +NC_EOF_OPT="--send-only -w 5"
>  else
> -NC_EOF_OPT="-q 1"
> +NC_EOF_OPT="-q 1 -w 5"
>  fi
>  
>  # Set HAVE_TCPDUMP


make check-system-userspace TESTSUITEFLAGS="35" 
## -- ##
## openvswitch 2.8.90 test suite. ##
## -- ##
 35: conntrack - ICMP relatedok

## - ##
## Test results. ##
## - ##

1 test was successful.

in parallel:
ps auwx | grep nc | grep 'send-'
root  2379  0.0  0.0   9680  2508 pts/25   S+   20:42   0:00 bash -c echo a 
| nc --send-only -w 5 -u 10.1.1.2 1

Acked-by: Flavio Leitner 

Thanks Joe
-- 
Flavio

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCHv2 2/2] tests: Comment which netcat version the opts are for.

2017-08-15 Thread Joe Stringer
Signed-off-by: Joe Stringer 
---
 tests/atlocal.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/atlocal.in b/tests/atlocal.in
index ba143d30f1d7..7c5e9e3357e5 100644
--- a/tests/atlocal.in
+++ b/tests/atlocal.in
@@ -152,8 +152,10 @@ find_command nc
 
 # Determine correct netcat option to quit on stdin EOF
 if nc --version 2>&1 | grep -q nmap.org; then
+# Nmap netcat
 NC_EOF_OPT="--send-only -w 5"
 else
+# BSD netcat
 NC_EOF_OPT="-q 1 -w 5"
 fi
 
-- 
2.13.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] tests: Put maximum timeout on netcat calls.

2017-08-15 Thread Joe Stringer
On 15 August 2017 at 15:23, Flavio Leitner  wrote:
> On Tue, 15 Aug 2017 15:03:25 -0700
> Joe Stringer  wrote:
>
>> This was causing test script execution to hang forever on Ubuntu Zesty.
>> Make sure it times out within 5 seconds, so at least it will fail out
>> properly.
>>
>> Signed-off-by: Joe Stringer 
>> ---
>>  tests/atlocal.in | 3 ++-
>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/tests/atlocal.in b/tests/atlocal.in
>> index 6a339f8fc312..6ac32fbb3d73 100644
>> --- a/tests/atlocal.in
>> +++ b/tests/atlocal.in
>> @@ -154,7 +154,8 @@ find_command nc
>>  if nc --version 2>&1 | grep -q nmap.org; then
>>  NC_EOF_OPT="--send-only"
>>  else
>> -NC_EOF_OPT="-q 1"
>> +# BSD netcat
>> +NC_EOF_OPT="-q 1 -w 5"
>>  fi
>>
>>  # Set HAVE_TCPDUMP
>
> Why not fixing on both?
>
> $ nc --version
> Ncat: Version 7.40 ( https://nmap.org/ncat )
>
> $ nc --help  | grep -- -w
>   -w, --wait   Connect timeout

Mostly because I didn't have the nmap version on hand so didn't know
if it worked ;)

Thanks, I'll update this.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] datapath-windows: Do not modify port field for ICMP during SNAT/DNAT

2017-08-15 Thread Anand Kumar
During SNAT/DNAT, we should not be updating the port field of ct_endpoint
struct, as ICMP packets do not have port information. Since port and
icmp_id are overlapped in ct_endpoint struct, icmp_id gets changed.
As a result, NAT look up fails to find a matching entry.

This patch addresses this issue by not modifying icmp_id field during
SNAT/DNAT only for ICMP traffic

The current NAT module doesn't take the ICMP type/code into account
during the lookups. Fix this to make it similar with the other conntrack
module.

Signed-off-by: Anand Kumar 
---
 datapath-windows/ovsext/Conntrack-nat.c | 22 +++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/datapath-windows/ovsext/Conntrack-nat.c 
b/datapath-windows/ovsext/Conntrack-nat.c
index ae6b92c..c778f12 100644
--- a/datapath-windows/ovsext/Conntrack-nat.c
+++ b/datapath-windows/ovsext/Conntrack-nat.c
@@ -22,6 +22,12 @@ OvsHashNatKey(const OVS_CT_KEY *key)
 HASH_ADD(src.port);
 HASH_ADD(dst.port);
 HASH_ADD(zone);
+/* icmp_id and port overlap in the union */
+HASH_ADD(src.icmp_type);
+HASH_ADD(dst.icmp_type);
+HASH_ADD(src.icmp_code);
+HASH_ADD(dst.icmp_code);
+
 #undef HASH_ADD
 return hash;
 }
@@ -44,6 +50,11 @@ OvsNatKeyAreSame(const OVS_CT_KEY *key1, const OVS_CT_KEY 
*key2)
 FIELD_COMPARE(src.port);
 FIELD_COMPARE(dst.port);
 FIELD_COMPARE(zone);
+/* icmp_id and port overlap in the union */
+FIELD_COMPARE(src.icmp_type);
+FIELD_COMPARE(dst.icmp_type);
+FIELD_COMPARE(src.icmp_code);
+FIELD_COMPARE(dst.icmp_code);
 return TRUE;
 #undef FIELD_COMPARE
 }
@@ -253,6 +264,7 @@ OvsNatAddEntry(OVS_NAT_ENTRY* entry)
  * Update an Conntrack entry with NAT information. Translated address and
  * port will be generated and write back to the conntrack entry as a
  * result.
+ * Note: For ICMP, only address is translated.
  *
  */
 BOOLEAN
@@ -271,7 +283,7 @@ OvsNatTranslateCtEntry(OVS_CT_ENTRY *entry)
 BOOLEAN allPortsTried;
 BOOLEAN originalPortsTried;
 struct ct_addr firstAddr;
-
+
 uint32_t hash = OvsNatHashRange(entry, 0);
 
 if ((entry->natInfo.natAction & NAT_ACTION_SRC) &&
@@ -310,10 +322,14 @@ OvsNatTranslateCtEntry(OVS_CT_ENTRY *entry)
 for (;;) {
 if (entry->natInfo.natAction & NAT_ACTION_SRC) {
 entry->rev_key.dst.addr = ctAddr;
-entry->rev_key.dst.port = htons(port);
+if (entry->rev_key.nw_proto != IPPROTO_ICMP) {
+entry->rev_key.dst.port = htons(port);
+}
 } else {
 entry->rev_key.src.addr = ctAddr;
-entry->rev_key.src.port = htons(port);
+if (entry->rev_key.nw_proto != IPPROTO_ICMP) {
+entry->rev_key.src.port = htons(port);
+}
 }
 
 OVS_NAT_ENTRY *natEntry = OvsNatLookup(>rev_key, TRUE);
-- 
2.9.3.windows.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] datapath-windows: Update Orig Tuple to use ICMP Type and Code

2017-08-15 Thread Anand Kumar
- Also add in some padding for the ct_endpoint's union.

Co-authored-by: Sairam Venugopal 
Signed-off-by: Anand Kumar 
---
 datapath-windows/ovsext/Conntrack.c | 11 +--
 datapath-windows/ovsext/Conntrack.h |  6 --
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/datapath-windows/ovsext/Conntrack.c 
b/datapath-windows/ovsext/Conntrack.c
index 917ebee..ce8c1c8 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -782,9 +782,16 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
 
 key->ct.tuple_ipv4.ipv4_src = ctKey->src.addr.ipv4_aligned;
 key->ct.tuple_ipv4.ipv4_dst = ctKey->dst.addr.ipv4_aligned;
-key->ct.tuple_ipv4.src_port = ctKey->src.port;
-key->ct.tuple_ipv4.dst_port = ctKey->dst.port;
 key->ct.tuple_ipv4.ipv4_proto = ctKey->nw_proto;
+
+/* Orig tuple Port is overloaded to take in ICMP-Type & Code */
+/* This mimics the behavior in lib/conntrack.c*/
+key->ct.tuple_ipv4.src_port = ctKey->nw_proto != IPPROTO_ICMP ?
+  ctKey->src.port :
+  htons(ctKey->src.icmp_type);
+key->ct.tuple_ipv4.dst_port = ctKey->nw_proto != IPPROTO_ICMP ?
+  ctKey->dst.port :
+  htons(ctKey->src.icmp_code);
 }
 
 if (entryCreated && entry) {
diff --git a/datapath-windows/ovsext/Conntrack.h 
b/datapath-windows/ovsext/Conntrack.h
index 04ca299..bca7d90 100644
--- a/datapath-windows/ovsext/Conntrack.h
+++ b/datapath-windows/ovsext/Conntrack.h
@@ -41,14 +41,16 @@ struct ct_addr {
 struct ct_endpoint {
 struct ct_addr addr;
 union {
-ovs_be16 port;
+struct {
+ovs_be16 port;
+uint16 pad_port;
+};
 struct {
 ovs_be16 icmp_id;
 uint8_t icmp_type;
 uint8_t icmp_code;
 };
 };
-UINT16 pad;
 };
 
 typedef enum CT_UPDATE_RES {
-- 
2.9.3.windows.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] tests: Put maximum timeout on netcat calls.

2017-08-15 Thread Flavio Leitner
On Tue, 15 Aug 2017 15:03:25 -0700
Joe Stringer  wrote:

> This was causing test script execution to hang forever on Ubuntu Zesty.
> Make sure it times out within 5 seconds, so at least it will fail out
> properly.
> 
> Signed-off-by: Joe Stringer 
> ---
>  tests/atlocal.in | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/atlocal.in b/tests/atlocal.in
> index 6a339f8fc312..6ac32fbb3d73 100644
> --- a/tests/atlocal.in
> +++ b/tests/atlocal.in
> @@ -154,7 +154,8 @@ find_command nc
>  if nc --version 2>&1 | grep -q nmap.org; then
>  NC_EOF_OPT="--send-only"
>  else
> -NC_EOF_OPT="-q 1"
> +# BSD netcat
> +NC_EOF_OPT="-q 1 -w 5"
>  fi
>  
>  # Set HAVE_TCPDUMP

Why not fixing on both?

$ nc --version 
Ncat: Version 7.40 ( https://nmap.org/ncat )

$ nc --help  | grep -- -w
  -w, --wait   Connect timeout

-- 
Flavio

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] datapath-windows: Update ICMP-Type and Code comparison in CT lookup

2017-08-15 Thread Anand Kumar
Hi Alin,

Thanks for the review. Please find my responses inline.
Will address the review comments and send out v2 version of the patch.

Thanks,
Anand Kumar

On 8/14/17, 8:04 PM, "Alin Serdean"  wrote:

Hi Sai and Anand,

Thanks a lot for the patch. I have a few questions regarding the approach. 
Please see inline.

> -Original Message-
> From: ovs-dev-boun...@openvswitch.org [mailto:ovs-dev-
> boun...@openvswitch.org] On Behalf Of Anand Kumar
> Sent: Friday, August 11, 2017 11:42 PM
> To: d...@openvswitch.org
> Subject: [ovs-dev] [PATCH] datapath-windows: Update ICMP-Type and Code
> comparison in CT lookup
> 
>  - Update the CT comparison function to compare individual fields instead 
of
> NdisEqualMemory.
[Alin Serdean] I don't like this change, especially mixing both members of 
union, i.e:
> +(ctxKey.dst.port == entryKey.dst.port) &&
> +(ctxKey.dst.icmp_id == entryKey.dst.icmp_id) &&
Why are you trying to change via member by member comparison?
   [Anand Kumar]: Done. Previously, we ran into issues while verifying NAT with 
ICMP, which now is fixed with my other patch.
> - Add in some padding for the ct_endpoint's union.
[Alin Serdean] Why is this needed? 
[Anand Kumar] This is needed because size of the union is 32 bits. 
   Another question do we still need the 'pad' member inside ct_endpoint 
(https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_openvswitch_ovs_blob_master_datapath-2Dwindows_ovsext_Conntrack.h-23L51=DwIFAg=uilaK90D4TOVoH58JNXRgQ=Q5z9tBe-nAOpE7LIHSPV8uy5-437agMXvkeHHMkR8Us=1Rj8RlVaAovz2iLrWHvT6N469qFN036rzjLkBJweZDw=hky8AVI9m8GjZOD8r21WYvEHBPcDcVVZiwMKqk07IwU=
 ) ?
[Anand Kumar] Yes, the padding at the end of structure is no longer needed. 
I will address in next version.
> - Update the Orig Tuple to use ICMP Type and Code instead of Port for ICMP
[Alin Serdean] Agreed
> 
> Co-authored-by: Sairam Venugopal 
> Signed-off-by: Anand Kumar 






___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] datapath-windows: Do not modify port field for ICMP during SNAT/DNAT

2017-08-15 Thread Anand Kumar
Hi Alin,

Thanks for reviewing the patch.  Please find my responses inline.
Will send out a v2 patch addressing the review comments.

Thanks,
Anand Kumar

On 8/14/17, 8:14 PM, "Alin Serdean"  wrote:

We should revisit how we do hashes and compares over the 'OVS_CT_KEY' at 
some point.

As you pointed "/* icmp_id and port overlap in the union */"
You can drop the lines:
>  HASH_ADD(src.port);
>  HASH_ADD(dst.port);
   [Anand Kumar] : We would still need it here since ICMP id is not included 
for computing hash.
And
>  FIELD_COMPARE(src.port);
>  FIELD_COMPARE(dst.port);
the outcome should be the same.
  [Anand Kumar] : Not required, will remove icmp_id field instead of port.
   FIELD_COMPARE(src.icmp_id);
   FIELD_COMPARE(dst.icmp_id);


Everything else looks good.
Thanks,
Alin.

> -Original Message-
> From: ovs-dev-boun...@openvswitch.org [mailto:ovs-dev-
> boun...@openvswitch.org] On Behalf Of Anand Kumar
> Sent: Friday, August 11, 2017 6:59 AM
> To: d...@openvswitch.org
> Subject: [ovs-dev] [PATCH] datapath-windows: Do not modify port field for
> ICMP during SNAT/DNAT
> 
> During SNAT/DNAT, we should not be updating the port field of ct_endpoint
> struct, as ICMP packets do not have port information. Since port and 
icmp_id
> are overlapped in ct_endpoint struct, icmp_id gets changed.
> As a result, NAT look up fails to find a matching entry.
> 
> This patch addresses this issue by not modifying icmp_id field during
> SNAT/DNAT only for ICMP traffic
> 
> The current NAT module doesn't take the ICMP type/id/code into account
> during the lookups. Fix this to make it similar with the other conntrack
> module.
> 
> Signed-off-by: Anand Kumar 
> ---
>  datapath-windows/ovsext/Conntrack-nat.c | 23 --
> -
>  1 file changed, 20 insertions(+), 3 deletions(-)
> 
> diff --git a/datapath-windows/ovsext/Conntrack-nat.c b/datapath-
> windows/ovsext/Conntrack-nat.c
> index ae6b92c..eb6f9db 100644
> --- a/datapath-windows/ovsext/Conntrack-nat.c
> +++ b/datapath-windows/ovsext/Conntrack-nat.c
> @@ -22,6 +22,12 @@ OvsHashNatKey(const OVS_CT_KEY *key)
>  HASH_ADD(src.port);
>  HASH_ADD(dst.port);
>  HASH_ADD(zone);
> +/* icmp_id and port overlap in the union */
> +HASH_ADD(src.icmp_type);
> +HASH_ADD(dst.icmp_type);
> +HASH_ADD(src.icmp_code);
> +HASH_ADD(dst.icmp_code);
> +
>  #undef HASH_ADD
>  return hash;
>  }
> @@ -44,6 +50,12 @@ OvsNatKeyAreSame(const OVS_CT_KEY *key1, const
> OVS_CT_KEY *key2)
>  FIELD_COMPARE(src.port);
>  FIELD_COMPARE(dst.port);
>  FIELD_COMPARE(zone);
> +FIELD_COMPARE(src.icmp_id);
> +FIELD_COMPARE(dst.icmp_id);
> +FIELD_COMPARE(src.icmp_type);
> +FIELD_COMPARE(dst.icmp_type);
> +FIELD_COMPARE(src.icmp_code);
> +FIELD_COMPARE(dst.icmp_code);
>  return TRUE;
>  #undef FIELD_COMPARE
>  }
> @@ -253,6 +265,7 @@ OvsNatAddEntry(OVS_NAT_ENTRY* entry)
>   * Update an Conntrack entry with NAT information. Translated address
> and
>   * port will be generated and write back to the conntrack entry as a
>   * result.
> + * Note: For ICMP, only address is translated.
>   
*
>   */
>  BOOLEAN
> @@ -271,7 +284,7 @@ OvsNatTranslateCtEntry(OVS_CT_ENTRY *entry)
>  BOOLEAN allPortsTried;
>  BOOLEAN originalPortsTried;
>  struct ct_addr firstAddr;
> -
> +
>  uint32_t hash = OvsNatHashRange(entry, 0);
> 
>  if ((entry->natInfo.natAction & NAT_ACTION_SRC) && @@ -310,10
> +323,14 @@ OvsNatTranslateCtEntry(OVS_CT_ENTRY *entry)
>  for (;;) {
>  if (entry->natInfo.natAction & NAT_ACTION_SRC) {
>  entry->rev_key.dst.addr = ctAddr;
> -entry->rev_key.dst.port = htons(port);
> +if (entry->rev_key.nw_proto != IPPROTO_ICMP) {
> +entry->rev_key.dst.port = htons(port);
> +}
>  } else {
>  entry->rev_key.src.addr = ctAddr;
> -entry->rev_key.src.port = htons(port);
> +if (entry->rev_key.nw_proto != IPPROTO_ICMP) {
> +entry->rev_key.src.port = htons(port);
> +}
>  }
> 
>  OVS_NAT_ENTRY *natEntry = OvsNatLookup(>rev_key, TRUE);
> --
> 2.9.3.windows.1
> 
> ___
> dev mailing list
> d...@openvswitch.org
> 

[ovs-dev] [PATCH] tests: Put maximum timeout on netcat calls.

2017-08-15 Thread Joe Stringer
This was causing test script execution to hang forever on Ubuntu Zesty.
Make sure it times out within 5 seconds, so at least it will fail out
properly.

Signed-off-by: Joe Stringer 
---
 tests/atlocal.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/atlocal.in b/tests/atlocal.in
index 6a339f8fc312..6ac32fbb3d73 100644
--- a/tests/atlocal.in
+++ b/tests/atlocal.in
@@ -154,7 +154,8 @@ find_command nc
 if nc --version 2>&1 | grep -q nmap.org; then
 NC_EOF_OPT="--send-only"
 else
-NC_EOF_OPT="-q 1"
+# BSD netcat
+NC_EOF_OPT="-q 1 -w 5"
 fi
 
 # Set HAVE_TCPDUMP
-- 
2.13.3

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCHv3 0/4] Improve C++ support for OVSDB IDL.

2017-08-15 Thread Joe Stringer
On 11 August 2017 at 11:06, Joe Stringer  wrote:
> In the OVSDB IDL, we use C++ keywords such as "new", "mutable", "class"
> for variable and field names. This series updates these names so that
> they don't conflict with the C++ keywords, which should improve the
> ability to use the IDL from C++ code. This series focuses primarily on
> code that exists in the tree; To address such problems for generated
> field names that come from an OVSDB schema, a subsequent patch will
> still be required. For instance, the ovs-vswitchd schema has a column
> named "protected". This ends up as a field name in the generated
> lib/vswitch-idl.[ch] code, which causes similar problems to those
> addressed by this series.

Thanks folks, I pushed this to master.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V2] tc: Add SCTP support

2017-08-15 Thread Flavio Leitner
On Thu, 27 Jul 2017 10:17:23 +0200
Simon Horman  wrote:

> On Wed, Jul 26, 2017 at 02:35:51PM -0700, Joe Stringer wrote:
> > On 25 July 2017 at 04:39, Roi Dayan  wrote:  
> > > From: Vlad Buslov 
> > >
> > > Implement SCTP source and destination ports support for flower.
> > >
> > > Signed-off-by: Vlad Buslov 
> > > Reviewed-by: Paul Blakey 
> > > Acked-by: Roi Dayan 
> > > ---  
> > 
> > Acked-by: Joe Stringer   
> 
> Thanks, applied to master.

I know it's applied, but still wanted to review.
ACK and thanks for following up with this.

-- 
Flavio

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [patch_v4 2/2] dpif-netdev: Refactor some pmd stats.

2017-08-15 Thread Darrell Ball
The per packets stats are presently overlapping in that
miss stats include lost stats; make these stats non-overlapping
for clarity and make this clear in the dp_stat_type enum.  This
also eliminates the need to increment two 'miss' stats for a
single packet.

The subtable lookup stats is renamed to make it
clear that it relates to masked lookups.
The stats that total to the number of packets seen are defined
in dp_stat_type and an api is created to total the stats in case
these stats are further divided in the future.

The pmd stats test is enhanced to include megaflow stats
counting and checking.
Also, miss and lost stats are annotated to make it clear
what they mean.

Signed-off-by: Darrell Ball 
---
 lib/dpif-netdev.c | 78 ++-
 tests/pmd.at  | 31 +-
 2 files changed, 74 insertions(+), 35 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 17e1666..dfc6684 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -323,12 +323,19 @@ static struct dp_netdev_port *dp_netdev_lookup_port(const 
struct dp_netdev *dp,
 OVS_REQUIRES(dp->port_mutex);
 
 enum dp_stat_type {
-DP_STAT_EXACT_HIT,  /* Packets that had an exact match (emc). */
-DP_STAT_MASKED_HIT, /* Packets that matched in the flow table. */
-DP_STAT_MISS,   /* Packets that did not match. */
-DP_STAT_LOST,   /* Packets not passed up to the client. */
-DP_STAT_LOOKUP_HIT, /* Number of subtable lookups for flow table
-   hits */
+DP_STAT_EXACT_HIT,  /* Packets that had an exact match (emc). */
+DP_STAT_MASKED_HIT, /* Packets that matched in the flow table. */
+DP_STAT_MISS,   /* Packets that did not match and upcall was
+   done. */
+DP_STAT_LOST,   /* Packets that did not match and upcall was
+   not done. */
+DP_N_PER_PKT_CNT,   /* The above statistics account for the total
+   number of packets seen and should not be
+   overlapping with each other. */
+DP_STAT_MASKED_LOOKUP = DP_N_PER_PKT_CNT,  /* Number of subtable lookups
+  for flow table hits. Each
+  MASKED_HIT hit will have
+  >= 1 MASKED_LOOKUP(s). */
 DP_N_STATS
 };
 
@@ -749,13 +756,22 @@ enum pmd_info_type {
 PMD_INFO_SHOW_RXQ /* Show poll-lists of pmd threads. */
 };
 
+static unsigned long long
+dp_netdev_calcul_total_packets(unsigned long long *stats)
+{
+unsigned long long total_packets = 0;
+for (uint8_t i = 0; i < DP_N_PER_PKT_CNT; i++) {
+total_packets += stats[i];
+}
+return total_packets;
+}
+
 static void
 pmd_info_show_stats(struct ds *reply,
 struct dp_netdev_pmd_thread *pmd,
 unsigned long long stats[DP_N_STATS],
 uint64_t cycles[PMD_N_CYCLES])
 {
-unsigned long long total_packets;
 uint64_t total_cycles = 0;
 int i;
 
@@ -773,10 +789,6 @@ pmd_info_show_stats(struct ds *reply,
 }
 }
 
-/* Sum of all the matched and not matched packets gives the total.  */
-total_packets = stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT]
-+ stats[DP_STAT_MISS];
-
 for (i = 0; i < PMD_N_CYCLES; i++) {
 if (cycles[i] > pmd->cycles_zero[i]) {
cycles[i] -= pmd->cycles_zero[i];
@@ -800,11 +812,12 @@ pmd_info_show_stats(struct ds *reply,
 
 ds_put_format(reply,
   "\temc hits:%llu\n\tmegaflow hits:%llu\n"
-  "\tavg. subtable lookups per hit:%.2f\n"
-  "\tmiss:%llu\n\tlost:%llu\n",
+  "\tavg. subtable lookups per megaflow hit:%.2f\n"
+  "\tmiss(upcall done):%llu\n\tlost(upcall not done):%llu\n",
   stats[DP_STAT_EXACT_HIT], stats[DP_STAT_MASKED_HIT],
   stats[DP_STAT_MASKED_HIT] > 0
-  ? (1.0*stats[DP_STAT_LOOKUP_HIT])/stats[DP_STAT_MASKED_HIT]
+  ? (1.0 * stats[DP_STAT_MASKED_LOOKUP])
+ / stats[DP_STAT_MASKED_HIT]
   : 0,
   stats[DP_STAT_MISS], stats[DP_STAT_LOST]);
 
@@ -820,6 +833,9 @@ pmd_info_show_stats(struct ds *reply,
   cycles[PMD_CYCLES_PROCESSING],
   cycles[PMD_CYCLES_PROCESSING] / (double)total_cycles * 100);
 
+/* Sum of all the matched and not matched packets gives the total.  */
+unsigned long long total_packets =
+ dp_netdev_calcul_total_packets(stats);
 if (total_packets == 0) {
 return;
 }
@@ -4724,18 +4740,18 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
 return dp_packet_batch_size(packets_);
 }
 
-static inline void
+static inline int
 

[ovs-dev] [patch_v4 1/2] dpif-netdev: Fix per packet cycles statistics.

2017-08-15 Thread Darrell Ball
From: Ilya Maximets 

DP_STAT_LOOKUP_HIT statistics used mistakenly for calculation
of total number of packets. This leads to completely wrong
per packet cycles statistics.

For example:

emc hits:0
megaflow hits:253702308
avg. subtable lookups per hit:1.50
miss:0
lost:0
avg cycles per packet: 248.32 (157498766585/634255770)

In this case 634255770 total_packets value used for avg
per packet calculation:

  total_packets = 'megaflow hits' + 'megaflow hits' * 1.5

The real value should be 524.38 (157498766585/253702308)

Fix that by summing only stats that reflect match/not match.
It's decided to make direct summing of required values instead of
disabling some stats in a loop to make calculations more clear and
avoid similar issues in the future.

CC: Jan Scheurich 
Fixes: 3453b4d62a98 ("dpif-netdev: dpcls per in_port with sorted subtables")
Signed-off-by: Ilya Maximets 
Acked-by: Jan Scheurich 
---
 lib/dpif-netdev.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index e2cd931..17e1666 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -755,7 +755,7 @@ pmd_info_show_stats(struct ds *reply,
 unsigned long long stats[DP_N_STATS],
 uint64_t cycles[PMD_N_CYCLES])
 {
-unsigned long long total_packets = 0;
+unsigned long long total_packets;
 uint64_t total_cycles = 0;
 int i;
 
@@ -771,13 +771,12 @@ pmd_info_show_stats(struct ds *reply,
 } else {
 stats[i] = 0;
 }
-
-if (i != DP_STAT_LOST) {
-/* Lost packets are already included in DP_STAT_MISS */
-total_packets += stats[i];
-}
 }
 
+/* Sum of all the matched and not matched packets gives the total.  */
+total_packets = stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT]
++ stats[DP_STAT_MISS];
+
 for (i = 0; i < PMD_N_CYCLES; i++) {
 if (cycles[i] > pmd->cycles_zero[i]) {
cycles[i] -= pmd->cycles_zero[i];
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [patch_v4 0/2] dpif-netdev: Fix and refactor pmd stats.

2017-08-15 Thread Darrell Ball
Fix total pmd stats calculation, refactor pmd stats, enhance
the pmd stats test and clarify pmd-stats-show output.

v3->v4: Combine patches 2 and 3 because v3 introduced a
dependency.
Also add a OVS_LIKELY in one location.

v2->v3: Fix a bug in patch 2 and fold in review comments.

v1->v2: Add additional patches.

Darrell Ball (1):
  dpif-netdev: Refactor some pmd stats.

Ilya Maximets (1):
  dpif-netdev: Fix per packet cycles statistics.

 lib/dpif-netdev.c | 79 ++-
 tests/pmd.at  | 31 +-
 2 files changed, 74 insertions(+), 36 deletions(-)

-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [patch_v3 0/3] dpif-netdev: Fix and refactor pmd stats.

2017-08-15 Thread Darrell Ball
I will resend this, combining patches 2 and 3

Darrell

-Original Message-
From:  on behalf of Darrell Ball 

Date: Monday, August 14, 2017 at 10:57 PM
To: "dlu...@gmail.com" , "d...@openvswitch.org" 

Subject: [ovs-dev] [patch_v3 0/3] dpif-netdev: Fix and refactor pmd stats.

Fix total pmd stats calculation, refactor pmd stats, enhance
the pmd stats test and clarify pmd-stats-show output.

Darrell Ball (2):
  dpif-netdev: Refactor some pmd stats.
  tests: Enhance the pmd stats test.

Ilya Maximets (1):
  dpif-netdev: Fix per packet cycles statistics.

 lib/dpif-netdev.c | 79 
++-
 tests/pmd.at  | 31 +-
 2 files changed, 74 insertions(+), 36 deletions(-)

-- 
1.9.1

___
dev mailing list
d...@openvswitch.org

https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev=DwICAg=uilaK90D4TOVoH58JNXRgQ=BVhFA09CGX7JQ5Ih-uZnsw=5md6gD1Pro77WLMcFol8kyoX-Ahox7RkA0mShRAX_RU=M8rj19kzq86kE7Ltx6BtCorKWBgAjJPLtGNMU8h-EwE=
 


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Am Emma Malik, I need your assistance/partnership please Email Me: ema...@qq.com

2017-08-15 Thread fagrswto
  

  
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 4/4] netdev-tc-offloads: Add support for action set

2017-08-15 Thread Joe Stringer
On 8 August 2017 at 07:21, Roi Dayan  wrote:
> From: Paul Blakey 
>
> Implement support for offloading ovs action set using
> tc header rewrite action.
>
> Signed-off-by: Paul Blakey 
> Reviewed-by: Roi Dayan 
> ---



Another couple of bits I noticed while responding..

> @@ -454,10 +572,56 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
>  }
>
>  static int
> +parse_put_flow_set_masked_action(struct tc_flower *flower,
> + const struct nlattr *set,
> + size_t set_len,
> + bool hasmask)
> +{
> +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> +const struct nlattr *set_attr;
> +char *key = (char *) >rewrite.key;
> +char *mask = (char *) >rewrite.mask;
> +size_t set_left;
> +int i, j;
> +
> +NL_ATTR_FOR_EACH_UNSAFE(set_attr, set_left, set, set_len) {
> +int type = nl_attr_type(set_attr);
> +size_t size = nl_attr_get_size(set_attr) / 2;
> +char *set_data = CONST_CAST(char *, nl_attr_get(set_attr));
> +char *set_mask = set_data + size;
> +
> +if (type >= ARRAY_SIZE(set_flower_map)) {
> +VLOG_DBG_RL(, "unsupported set action type: %d", type);
> +return EOPNOTSUPP;

I think that this assumes that 'set_flower_map' has every entry
populated up until the maximum supported key field - but are some
missing - for example OVS_KEY_ATTR_VLAN. Could we also check by
indexing into set_flower_map and checking if it's a valid entry?

> +}
> +
> +for (i = 0; i < ARRAY_SIZE(set_flower_map[type]); i++) {
> +struct netlink_field *f = _flower_map[type][i];

Separate thought - you're iterating nlattrs above then iterating all
the key attributes in the flower_map here. Couldn't you just directly
index into the packet field that this action will modify?

> +
> +if (!f->size) {
> +break;
> +}

I think that headers that are not in the set_flower_map will hit this,
which would be unsupported (similar to above comment).

> +
> +for (j = 0; j < f->size; j++) {
> +char maskval = hasmask ? set_mask[f->offset + j] : 0xFF;
> +
> +key[f->flower_offset + j] = maskval & set_data[f->offset + 
> j];
> +mask[f->flower_offset + j] = maskval;
> +}
> +}
> +}
> +
> +if (!is_all_zeros(>rewrite, sizeof flower->rewrite)) {
> +flower->rewrite.rewrite = true;
> +}
> +
> +return 0;
> +}
> +
> +static int
>  parse_put_flow_set_action(struct tc_flower *flower, const struct nlattr *set,
>size_t set_len)
>  {
> -static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>  const struct nlattr *set_attr;
>  size_t set_left;
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 4/4] netdev-tc-offloads: Add support for action set

2017-08-15 Thread Joe Stringer
On 15 August 2017 at 01:19, Paul Blakey  wrote:
>
>
> On 15/08/2017 10:28, Paul Blakey wrote:
>>
>>
>>
>> On 15/08/2017 04:04, Joe Stringer wrote:
>>>
>>> On 8 August 2017 at 07:21, Roi Dayan  wrote:

 From: Paul Blakey 

 Implement support for offloading ovs action set using
 tc header rewrite action.

 Signed-off-by: Paul Blakey 
 Reviewed-by: Roi Dayan 
 ---
>>>
>>>
>>> 
>>>
 @@ -274,6 +346,48 @@ netdev_tc_flow_dump_destroy(struct netdev_flow_dump
 *dump)
   return 0;
   }

 +static void
 +parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf,
 +   struct tc_flower *flower)
 +{
 +char *mask = (char *) >rewrite.mask;
 +char *data = (char *) >rewrite.key;
 +
 +for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) {
 +char *put = NULL;
 +size_t nested = 0;
 +int len = ovs_flow_key_attr_lens[type].len;
 +
 +if (len <= 0) {
 +continue;
 +}
 +
 +for (int j = 0; j < ARRAY_SIZE(set_flower_map[type]); j++) {
 +struct netlink_field *f = _flower_map[type][j];
 +
 +if (!f->size) {
 +break;
 +}
>>>
>>>
>>> Seems like maybe there's similar feedback here to the previous patch
>>> wrt sparsely populated array set_flower_map[].
>>>
>>
>> I'll answer there.
>
>
> Scratch that, I actually meant to write it here.
>
> With this map we have fast (direct lookup) access on the put path
> (parse_put_flow_set_masked_action) , and slow access on the reverse dump
> path (parse_flower_rewrite_to_netlink_action) which iterates over all
> indices, although it should skips them  fast if they are empty.
>
> Changing this to sparse map will slow both of the paths, but dumping would
> be faster. We didn't write this maps for performance but for convince of
> adding new supported attributes.

OK, thanks for the explanation. I figured there must be a reason it
was indexed that way, I just didn't see it in my first review pass
through the series. Obviously fast path should be fast, and typically
we don't worry quite as much about dumping fast (although I have in
the past tested this to determine how many flows we will attempt to
populate in adverse conditions).

> What we can do to both maps is:
>
> 1) Using ovs thread once, we can create a fast (direct lookup) access map
> for both paths - generating a reverse map at init time and using that
> instead for dumping.

Until there's a clear, known advantage I'm not sure if we should do this.

> 2) Rewrite it in a non sparse way, use it for both.

I think that we generally are more interested in 'put' performance so
if this sacrifices it for 'dump' performance, then it's probably not a
good idea (though it may still be too early to really optimize these).

> 3) Rewrite it using a switch case, the logic for now is pretty much the same
> for all attributes.
>
> What do you think?

Do you think that this third option would improve the readability,
allow more compile-time checking on the code, or more code reuse?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 3/4] tc: Add header rewrite using tc pedit action

2017-08-15 Thread Joe Stringer
On 15 August 2017 at 01:19, Paul Blakey  wrote:
>
>
> On 15/08/2017 00:56, Joe Stringer wrote:
>>
>> On 8 August 2017 at 07:21, Roi Dayan  wrote:
>>>
>>> From: Paul Blakey 
>>>
>>> @@ -346,6 +425,96 @@ nl_parse_flower_ip(struct nlattr **attrs, struct
>>> tc_flower *flower) {
>>>   }
>>>   }
>>>
>>> +static const struct nl_policy pedit_policy[] = {
>>> +[TCA_PEDIT_PARMS_EX] = { .type = NL_A_UNSPEC,
>>> + .min_len = sizeof(struct tc_pedit),
>>> + .optional = false, },
>>> +[TCA_PEDIT_KEYS_EX]   = { .type = NL_A_NESTED,
>>> +  .optional = false, },
>>> +};
>>> +
>>> +static int
>>> +nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower)
>>> +{
>>> +struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)];
>>> +const struct tc_pedit *pe;
>>> +const struct tc_pedit_key *keys;
>>> +const struct nlattr *nla, *keys_ex, *ex_type;
>>> +const void *keys_attr;
>>> +char *rewrite_key = (void *) >rewrite.key;
>>> +char *rewrite_mask = (void *) >rewrite.mask;
>>
>>
>> These are actually 'struct tc_flower_key', shouldn't we use the actual
>> types? (I realise there is pointer arithmetic below, but usually we
>> restrict the usage of void casting and pointer arithmetic as much as
>> possible).
>>
>
> Yes, It's for the pointer arithmetic. (void *) cast was for the
> clangs warning "cast increases required alignment of target type"
> which we couldn't find another way to suppress.
> I don't think alignments an issue here.

Ah. I don't have particularly much experience with pointer alignment.

>
>>> +size_t keys_ex_size, left;
>>> +int type, i = 0;
>>> +
>>> +if (!nl_parse_nested(options, pedit_policy, pe_attrs,
>>> + ARRAY_SIZE(pedit_policy))) {
>>> +VLOG_ERR_RL(_rl, "failed to parse pedit action options");
>>> +return EPROTO;
>>> +}
>>> +
>>> +pe = nl_attr_get_unspec(pe_attrs[TCA_PEDIT_PARMS_EX], sizeof *pe);
>>> +keys = pe->keys;
>>> +keys_attr = pe_attrs[TCA_PEDIT_KEYS_EX];
>>> +keys_ex = nl_attr_get(keys_attr);
>>> +keys_ex_size = nl_attr_get_size(keys_attr);
>>> +
>>> +NL_ATTR_FOR_EACH(nla, left, keys_ex, keys_ex_size) {
>>> +if (i >= pe->nkeys) {
>>> +break;
>>> +}
>>> +
>>> +if (nl_attr_type(nla) == TCA_PEDIT_KEY_EX) {
>>> +ex_type = nl_attr_find_nested(nla, TCA_PEDIT_KEY_EX_HTYPE);
>>> +type = nl_attr_get_u16(ex_type);
>>> +
>>> +for (int j = 0; j < ARRAY_SIZE(flower_pedit_map); j++) {
>>> +struct flower_key_to_pedit *m = _pedit_map[j];
>>> +int flower_off = j;
>>> +int sz = m->size;
>>> +int mf = m->offset;
>>> +
>>> +if (!sz || m->htype != type) {
>>> +   continue;
>>> +}
>>
>>
>> If flower_pedit_map was just a regular array and the offset was
>> included in 'struct flower_key_to_pedit', then I don't think we need
>> the above check?
>>
>>> +
>>> +/* check overlap between current pedit key, which is
>>> always
>>> + * 4 bytes (range [off, off + 3]), and a map entry in
>>> + * flower_pedit_map (range [mf, mf + sz - 1]) */
>>> +if ((keys->off >= mf && keys->off < mf + sz)
>>> +|| (keys->off + 3 >= mf && keys->off + 3 < mf + sz))
>>> {
>>> +int diff = flower_off + (keys->off - mf);
>>> +uint32_t *dst = (void *) (rewrite_key + diff);
>>> +uint32_t *dst_m = (void *) (rewrite_mask + diff);
>>> +uint32_t mask = ~(keys->mask);
>>> +uint32_t zero_bits;
>>> +
>>> +if (keys->off < mf) {
>>> +zero_bits = 8 * (mf - keys->off);
>>> +mask &= UINT32_MAX << zero_bits;
>>> +} else if (keys->off + 4 > mf + m->size) {
>>> +zero_bits = 8 * (keys->off + 4 - mf - m->size);
>>> +mask &= UINT32_MAX >> zero_bits;
>>> +}
>>
>>
>> I think this is all trying to avoid having a giant switch case here
>> which would handle all of the possible flower key attributes, similar
>> to somewhere else where this kind of iteration occurs.
>
> Right that was the objective.
>
>  Is there any
>>
>> overlap between this code and calc_offsets()calc_offsets was to calculate
>> the word offsets and masks for the first
>
> and last word to write, here its the reverse so its already in word size
> with correct masks, and easier to write back.

OK.

>>
>> Could we somehow trigger an assert or a strong warning message if the
>> offsets of our flower_pedit_map and the flower key don't align, given
>> that there's a bunch of pointer arithmetic 

Re: [ovs-dev] [patch_v5] dp-packet: Reset DPDK HWOL checksum flags on init.

2017-08-15 Thread Darrell Ball


-Original Message-
From: "Chandran, Sugesh" 
Date: Sunday, August 13, 2017 at 8:06 AM
To: Darrell Ball , Ben Pfaff 
Cc: "d...@openvswitch.org" 
Subject: RE: [ovs-dev] [patch_v5] dp-packet: Reset DPDK HWOL checksum flags on 
init.



Regards
_Sugesh


> -Original Message-
> From: Darrell Ball [mailto:db...@vmware.com]
> Sent: Thursday, August 10, 2017 5:15 PM
> To: Chandran, Sugesh ; Ben Pfaff
> 
> Cc: d...@openvswitch.org
> Subject: Re: [ovs-dev] [patch_v5] dp-packet: Reset DPDK HWOL checksum
> flags on init.
> 
> 
> 
> -Original Message-
> From: Darrell Ball 
> Date: Wednesday, August 9, 2017 at 1:38 PM
> To: "Chandran, Sugesh" , Ben Pfaff
> 
> Cc: "d...@openvswitch.org" 
> Subject: Re: [ovs-dev] [patch_v5] dp-packet: Reset DPDK HWOL checksum
> flags on init.
> 
> 
> 
> -Original Message-
> From: "Chandran, Sugesh" 
> Date: Wednesday, August 9, 2017 at 12:55 PM
> To: Darrell Ball , Ben Pfaff 
> Cc: "d...@openvswitch.org" 
> Subject: RE: [ovs-dev] [patch_v5] dp-packet: Reset DPDK HWOL checksum
> flags on init.
> 
> 
> 
> Regards
> _Sugesh
> 
> 
> > > >
> > > > Correct, I reused 
reset_dp_packet_checksum_ol_flags() to do
> the
> > > initialization as well
> > > > I could also have created a separate function.
> > > >
> > > > In case a DPDK dev is used, those flags will be 
managed by
> DPDK.
> > > >
> > > >  That sounds like a
> > > > bug in itself--is there a missing call to 
initialize the mbuf
> > somewhere?
> > > >
> > > > Are you suggesting to initialize the whole mbuf for 
each packet
> ?
> > >
> > > The issue that I'm raising is that it's unusual to 
take an
> > > uninitialized, indeterminate field, and then 
initialize it by clearing
> a
> > > few bits.  It's much more conventional to initialize 
it by setting it
> to
> > > zero, like this:
> > >
> > > p->mbuf.ol_flags = 0;
> > >
> > >
> > > That is better; I will create a separate function then.
> > > I will resend
> > > Thanks
> > [Sugesh] I also agree with Ben here.
> > Currently OVS uses only checksum offload flags from mbuf(As 
I am
> aware
> > of).
> > But there are other flag bits that may get used in future 
like TSO.
> > So its better to initialize the mbuf properly before using.
> >
> > Here is the mbuf reset function in DPDK that get called 
when an
> existing
> > memory is mapped to
> > Mbuf.
> > I believe only the ol_flags are relevant for now in OVS.
> >
> > There is no higher cost associated with initializing all the 
ol_flags vs
> some
> > flags, so that is fine.
> > It will be done twice in the case of a packet received from a 
dpdk
> device, but
> > it is a small cost.
> > I was more concerned about the dual responsibility conflict when
> packets are
> > received from a DPDK device and this is why I wanted to limit 
the scope
> of
> > the flag management in OVS; it should be ok, though.
> > Hence, I mentioned that I will initialize all the ol_flags.
> >
> > JTBC, are you suggesting to initialize all the fields below ?
> > This would mean when packets are received from a DPDK dev, both
> the rte
> > library and OVS would separately initialize all the fields 
below –
> meaning, it
> > would be done twice for each packet.
> >
> [Sugesh] No, we don’t have to initialize all the fields. But we 
can have a
> generic function
> to init all mbuf fields that are relevant in OVS.
> For now its only ol_flags. In the future this function must be 
updated
> when we use more
> fields from rte_mbuf.
> 
> We are on the same page then.
> I plan for the function to have a generic name, so this fine.
> 
> 
> Sorry, I 

[ovs-dev] [RFC patch v1] dp-packet: Refactor DPDK packet initialization.

2017-08-15 Thread Darrell Ball
DPDK uses dp-packet pools and manages the mbuf portion of
each packet. When a pool is created, partial initialization is
also done on the OVS portion (i.e. non-mbuf).  Since packet
memory is reused, this is not very useful for transient
fields and is also misleading.  Furthermore, some of these
transient fields are properly initialized for DPDK packets
entering OVS anyways, which is the only reasonable way to do this.
Another field, cutlen, is initialized in this manner in the pool
and intended to be reset when cutlen is applied on sending the
packet out. However, if cutlen context is set but the packet is
not sent out for some reason, then the packet header would be
corrupted in the memory pool.  It is better to just reset the
cutlen in the packets when received.  I did not detect a
degradation in performance, however, I would be willing to
have some degradation, since this is a proper way to handle
this.  In addition to initializing cutlen in received packets,
the other OVS transient fields are removed from the DPDK pool
initialization.

Signed-off-by: Darrell Ball 
---
 lib/dp-packet.c   | 16 +---
 lib/dp-packet.h   | 10 ++
 lib/netdev-dpdk.c |  2 ++
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index c1f43f3..312f63a 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -92,16 +92,18 @@ dp_packet_use_const(struct dp_packet *b, const void *data, 
size_t size)
 dp_packet_set_size(b, size);
 }
 
-/* Initializes 'b' as an empty dp_packet that contains the 'allocated' bytes of
- * memory starting at 'base'.  DPDK allocated dp_packet and *data is allocated
- * from one continous memory region, so in memory data start right after
- * dp_packet.  Therefore there is special method to free this type of
- * buffer.  dp_packet base, data and size are initialized by dpdk rcv() so no
- * need to initialize those fields. */
+/* Initializes 'b' as an empty dp_packet that contains the 'allocated' bytes.
+ * DPDK allocated dp_packet and *data is allocated from one continous memory
+ * region as part of memory pool, so in memory data start right after
+ * dp_packet.  Therefore, there is a special method to free this type of
+ * buffer.  Here, non-transient ovs dp-packet fields are initialized for
+ * packets that are part of a DPDK memory pool. */
 void
 dp_packet_init_dpdk(struct dp_packet *b, size_t allocated)
 {
-dp_packet_init__(b, allocated, DPBUF_DPDK);
+dp_packet_set_allocated(b, allocated);
+b->source = DPBUF_DPDK;
+b->packet_type = htonl(PT_ETH);
 }
 
 /* Initializes 'b' as an empty dp_packet with an initial capacity of 'size'
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index bb3f9db..cbeb831 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -801,6 +801,16 @@ dp_packet_delete_batch(struct dp_packet_batch *batch, bool 
may_steal)
 }
 
 static inline void
+dp_packet_batch_init_cutlen(struct dp_packet_batch *batch)
+{
+struct dp_packet *packet;
+
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
+dp_packet_reset_cutlen(packet);
+}
+}
+
+static inline void
 dp_packet_batch_apply_cutlen(struct dp_packet_batch *batch)
 {
 if (batch->trunc) {
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 1aaf6f7..92453b2 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1644,6 +1644,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
  nb_rx, dropped);
 rte_spinlock_unlock(>stats_lock);
 
+dp_packet_batch_init_cutlen(batch);
 batch->count = (int) nb_rx;
 return 0;
 }
@@ -1683,6 +1684,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct 
dp_packet_batch *batch)
 rte_spinlock_unlock(>stats_lock);
 }
 
+dp_packet_batch_init_cutlen(batch);
 batch->count = nb_rx;
 
 return 0;
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH/RFC] bond: add enable-recirc configuration for bond.

2017-08-15 Thread Simon Horman
From: Pieter Jansen van Vuuren 

Adds a config parameter that determines if a bond will use recirculation
in the kernel datapath when implementing a bond in balance-tcp mode.

The default for enable-recirc is "true", resulting in the traditional
implementation of a bond in balance-tcp mode. Setting enable-recirc to
false results in datapath rules that do not rely on the recirculation
action.

example usage:
ovs-vsctl set port bond0 other_config:enable-recirc=false

Advantages:
- Allows TC offloading of OVS bonds on hardware which
  does not support recirculation
- Appears to result in lower latency (in systems using few flows).

Quick ping test results in:

other_config:enable-recirc=false
rtt min/avg/max/mdev = 0.039/0.193/7.612/1.059 ms

other_config:enable-recirc=true
rtt min/avg/max/mdev = 0.038/0.321/14.091/1.967 ms

More comprehensive testing is in progress.

Signed-off-by: Pieter Jansen van Vuuren 
Signed-off-by: Simon Horman 
---
 ofproto/bond.c   | 11 ++-
 ofproto/bond.h   |  1 +
 vswitchd/bridge.c|  3 +++
 vswitchd/vswitch.xml |  8 
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/ofproto/bond.c b/ofproto/bond.c
index 365a3ca7ffad..46f8a9afcb3b 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -147,6 +147,7 @@ struct bond {
/* The MAC address of the active interface. */
 /* Legacy compatibility. */
 bool lacp_fallback_ab; /* Fallback to active-backup on LACP failure. */
+bool recirc_enabled;
 
 struct ovs_refcount ref_cnt;
 };
@@ -437,6 +438,11 @@ bond_reconfigure(struct bond *bond, const struct 
bond_settings *s)
 revalidate = true;
 }
 
+if (bond->recirc_enabled != s->recirc_enabled) {
+bond->recirc_enabled = s->recirc_enabled;
+revalidate = true;
+}
+
 if (bond->rebalance_interval != s->rebalance_interval) {
 bond->rebalance_interval = s->rebalance_interval;
 revalidate = true;
@@ -458,7 +464,10 @@ bond_reconfigure(struct bond *bond, const struct 
bond_settings *s)
 }
 
 if (bond->balance != BM_AB) {
-if (!bond->recirc_id) {
+   if (!bond->recirc_enabled) {
+recirc_free_id(bond->recirc_id);
+bond->recirc_id = 0;
+} else if (!bond->recirc_id) {
 bond->recirc_id = recirc_alloc_id(bond->ofproto);
 }
 } else if (bond->recirc_id) {
diff --git a/ofproto/bond.h b/ofproto/bond.h
index e7c3d9bc35dd..beb937b9910e 100644
--- a/ofproto/bond.h
+++ b/ofproto/bond.h
@@ -53,6 +53,7 @@ struct bond_settings {
 int down_delay; /* ms before disabling a down slave. */
 
 bool lacp_fallback_ab_cfg;  /* Fallback to active-backup on LACP failure. 
*/
+bool recirc_enabled;
 
 struct eth_addr active_slave_mac;
 /* The MAC address of the interface
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index a8cbae78cb23..b4b5c89ca6a0 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -4259,6 +4259,9 @@ port_configure_bond(struct port *port, struct 
bond_settings *s)
 s->lacp_fallback_ab_cfg = smap_get_bool(>cfg->other_config,
"lacp-fallback-ab", false);
 
+s->recirc_enabled = smap_get_bool(>cfg->other_config,
+  "enable-recirc", true);
+
 LIST_FOR_EACH (iface, port_elem, >ifaces) {
 netdev_set_miimon_interval(iface->netdev, miimon_interval);
 }
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 074535b588ef..7b97d720d276 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1693,6 +1693,14 @@
 is configured to LACP mode, the bond will use LACP.
   
 
+
+
+  
+Determines if a bond will use recirculation in the kernel datapath
+when implementing a bond in balance-tcp mode.
+  
+
   
 
   
-- 
2.1.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ovn: Support for taas(tap-as-a-service) function

2017-08-15 Thread Russell Bryant
Thanks for updating this!  This looks closer to what I was hoping for.
It will most likely be next week before I can complete a detailed
review, though.

On Tue, Aug 15, 2017 at 4:28 AM,   wrote:
> Taas was designed to provide tenants and service providers a means of
> monitoring the traffic flowing in their Neutron provisioned virtual
> networks. It is useful for network trouble-shooting, security and
> analytics. The taas presentations could be found from
> https://github.com/openstack/tap-as-a-service/blob/master/doc/source/presentations.rst
> , and the api reference could be found from
> https://github.com/openstack/tap-as-a-service/blob/master/API_REFERENCE.rst
>
> To support taas function, this patch add two type of logica_switch_port,
> "mirror" and "taas". port with type "mirror" is used as inport for monitor
> flow in logica_switch, and port with type "taas" is used as outport for
> monitor flow in logica_switch.
>
> The ovn-controller will make the relations of the ports in tap_service and
> tap_flow to mirror port and taas port.
>
> Signed-off-by: wang qianyu 
> ---
>  ovn/controller/binding.c|  12 ++
>  ovn/controller/ovn-controller.c |   2 +
>  ovn/controller/physical.c   | 185 +-
>  ovn/lib/logical-fields.c|   4 +
>  ovn/lib/logical-fields.h|   4 +
>  ovn/northd/ovn-northd.c | 329
> 
>  ovn/ovn-nb.xml  |  69 +
>  7 files changed, 603 insertions(+), 2 deletions(-)
>
> diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
> index 32309e9..fc74ea0 100644
> --- a/ovn/controller/binding.c
> +++ b/ovn/controller/binding.c
> @@ -437,6 +437,18 @@ consider_local_datapath(struct controller_ctx *ctx,
>   * for them. */
>  sset_add(local_lports, binding_rec->logical_port);
>  our_chassis = false;
> +} else if (!strcmp(binding_rec->type, "mirror")) {
> +add_local_datapath(ctx, binding_rec->datapath,
> +   false, local_datapaths);
> +} else if (!strcmp(binding_rec->type, "taas")) {
> +const char *target_port_name = smap_get(_rec->options,
> +  "target-port");
> +if (target_port_name &&
> +sset_contains(local_lports, target_port_name)) {
> +our_chassis = true;
> +}
> +add_local_datapath(ctx, binding_rec->datapath,
> +   false, local_datapaths);
>  }
>
>  if (ctx->ovnsb_idl_txn) {
> diff --git a/ovn/controller/ovn-controller.c
> b/ovn/controller/ovn-controller.c
> index e2c9652..0a148e4 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -150,6 +150,8 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
>  struct ovsdb_idl_condition mg = OVSDB_IDL_CONDITION_INIT();
>  struct ovsdb_idl_condition dns = OVSDB_IDL_CONDITION_INIT();
>  sbrec_port_binding_add_clause_type(, OVSDB_F_EQ, "patch");
> +sbrec_port_binding_add_clause_type(, OVSDB_F_EQ, "mirror");
> +sbrec_port_binding_add_clause_type(, OVSDB_F_EQ, "taas");
>  /* XXX: We can optimize this, if we find a way to only monitor
>   * ports that have a Gateway_Chassis that point's to our own
>   * chassis */
> diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
> index df71979..7b55b04 100644
> --- a/ovn/controller/physical.c
> +++ b/ovn/controller/physical.c
> @@ -291,9 +291,100 @@ load_logical_ingress_metadata(const struct
> sbrec_port_binding *binding,
>  }
>
>  static void
> +taas_port_handle(struct controller_ctx *ctx,
> + const struct sbrec_port_binding *binding,
> + struct ofpbuf *ofpacts_p,
> + struct hmap *flow_table,
> + uint32_t dp_key,
> + uint32_t port_key)
> +{
> +const char *target_port_name = smap_get(>options,
> +  "target-port");
> +if (!target_port_name) {
> +VLOG_INFO("taas port %s not configure target-port",
> + binding->logical_port);
> +return;
> +}
> +const struct sbrec_port_binding *target_port = lport_lookup_by_name(
> +ctx->ovnsb_idl, target_port_name);
> +if (!target_port) {
> +VLOG_INFO("can not find target port %s in this switch",
> +target_port_name);
> +return;
> +}
> +
> +ofp_port_t ofport = u16_to_ofp(simap_get(_to_ofport,
> +  target_port_name));
> +if (!ofport) {
> +VLOG_INFO("can not find ofport of %s in this switch",
> +target_port_name);
> +return;
> +}
> +struct match match;
> +
> +/* Table 33, priority 100.
> + * ===
> + *
> + * Implements output to local hypervisor.  Each flow matches a
> + * logical 

Re: [ovs-dev] [PATCHv2] ofproto-dpif: Mark packets as "untracked" after call to ct().

2017-08-15 Thread Russell Bryant
I've reached out to 3 other users of OVS connection tracking for
feedback and will report back to this thread once I hear anything.

On Fri, Aug 11, 2017 at 2:58 AM, Justin Pettit  wrote:
> I just wanted to specially call out this patch for testing.  My hope is that 
> we can merge this into 2.8, but wanted to give people a heads up that they 
> may want to try it with their applications.  The most visible change is that 
> a call to ct() will clear the ct_state for any actions that follow it.  As 
> before, the ct_state will be accessible to flows rooted in the table 
> specified by the "table" argument to ct(), but now another ct() action will 
> clear the state for the current processing path (but the new ct_state will be 
> available if the "table" argument is given).
>
> My hope is that this won't affect people's pipelines, but it's worth checking 
> if it does.  I would recommend that anyone that uses the conntrack action in 
> their applications give this patch a try (on top of branch-2.8).  If there 
> are any problems, I can try to help you reconstruct your pipeline so it's not 
> affected.  (The changes should be minimal, and there are a couple of examples 
> in the patch itself.)  I've done some light testing with OVN, and it didn't 
> require any changes, and I hope it's the same for everyone else.
>
> The reason I'd like to get this change into 2.8 is that it's a requirement 
> for a performance optimization I'd like to get into 2.9.  Currently, if a 
> controller action is specified, all traffic is slow-pathed to userspace.  As 
> we start making heavier use of local controllers, this will have a serious 
> negative effect on forwarding performance and increase in CPU usage.  I've 
> made a few other (more minor, corner-casey) changes to the OpenFlow 
> processing in 2.8, and I'd like to get all potentially breaking issues into a 
> single release.
>
> If you have an application that uses conntrack, please give it a try and let 
> me know if you run into any issues.
>
> Thanks!
>
> --Justin
>
>
>> On Aug 10, 2017, at 11:38 PM, Justin Pettit  wrote:
>>
>> Packet and Connection state is only available to the processing path
>> that follows the "recirc_table" argument of the ct() action.  The
>> previous behavior made these states available until the end of the
>> pipeline.  This commit changes the behavior so that the Packet and
>> Connection state are cleared for the current processing path whenever
>> ct() is called (in addition to reaching the end of the pipeline.)
>>
>> A future commit will remove the behavior that a "send to controller"
>> action causes all packets for that flow to be handled via the slow-path.
>> The current behavior of connection tracking state makes that difficult
>> due to datapath actions containing multiple OpenFlow rules that may
>> contain different connection tracking states.  This change will make
>> that future commit possible.
>>
>> Signed-off-by: Justin Pettit 
>> ---
>> v1->v2: Fix system tests.
>> ---
>> lib/ofp-actions.c| 27 +--
>> ofproto/ofproto-dpif-xlate.c | 21 +++--
>> tests/ofproto-dpif.at| 10 +-
>> tests/system-traffic.at  |  4 ++--
>> utilities/ovs-ofctl.8.in | 10 ++
>> 5 files changed, 33 insertions(+), 39 deletions(-)
>>
>> diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c
>> index bfc8a805ffd5..71eb70c3c239 100644
>> --- a/lib/ofp-actions.c
>> +++ b/lib/ofp-actions.c
>> @@ -5858,20 +5858,19 @@ format_DEBUG_RECIRC(const struct ofpact_null *a 
>> OVS_UNUSED,
>>  *
>>  *   - Packet State:
>>  *
>> - *  Untracked packets have not yet passed through the connection 
>> tracker,
>> - *  and the connection state for such packets is unknown. In most cases,
>> - *  packets entering the OpenFlow pipeline will initially be in the
>> - *  untracked state. Untracked packets may become tracked by executing
>> - *  NXAST_CT with a "recirc_table" specified. This makes various aspects
>> - *  about the connection available, in particular the connection state.
>> - *
>> - *  Tracked packets have previously passed through the connection 
>> tracker.
>> - *  These packets will remain tracked through until the end of the 
>> OpenFlow
>> - *  pipeline. Tracked packets which have NXAST_CT executed with a
>> - *  "recirc_table" specified will return to the tracked state.
>> - *
>> - *  The packet state is only significant for the duration of packet
>> - *  processing within the OpenFlow pipeline.
>> + *  Untracked packets have an unknown connection state.  In most
>> + *  cases, packets entering the OpenFlow pipeline will initially be
>> + *  in the untracked state. Untracked packets may become tracked by
>> + *  executing NXAST_CT with a "recirc_table" specified. This makes
>> + *  various aspects about the connection available, in particular
>> + *  the connection 

[ovs-dev] ATTN: Sir /Madam,

2017-08-15 Thread FRANK AMADI
From: Frank Amadi
Director Computer Telex Dept
Central bank of Nigeria
Email:frankamad...@yahoo.com

ATTN: Sir /Madam,

I
 am .Frank Amadi Director Computer Telex Dept of central bank of 
Nigeria. I decided to contact you because of the prevailing security 
report reaching my office and the intense nature of polity in 
Nigeria.This is to inform you about the recent plan of federal 
government of Nigeria to send your fund to you via diplomatic immunity 
cash delivery payment.

This system will be easier for you and for
 us. We are going to pay you US$10.Million part contract payment of your
 total sum via the above approved mode of payment.Every necessary 
arrangement has put in place for the secured of all required security 
documents to cover the money. The Money is coming on 2 security proof 
boxes, the boxes are sealed with Synthetic nylon seal and padded with 
machine.I will use my position as the foreign operation director of this
 apex bank to make sure this consignments (fund ) are been safely and 
legally release to You.

The boxes are coming with assigned diplomatic agents who will accompany the 
Boxes to your approved destination address.

For
 a hitch free and safe delivery, you are required to furnish me 
immediately upon receipt of this message with the below sated 
information

1. Your full house address
2. Your identity such as, international passport, driver's license or national 
identification card.
3. Your contact phone numbers,
4.Occupation

The
 diplomatic Agents attached will travel with these required 
information's For the delivery of your fund. They will call you 
immediately they arrives Your country's airport. I hope you understand 
me.I will let you know when these consignments will be lifted. IMPORTANT
 NOTE: The diplomatic Agents does not know the original contents of the 
boxes.

For security reasons what we declare d to them as the 
contents of the consignments are Sensitive photographic Film Material.I 
did not declare money to them please. So if they call you and ask you 
the contents please tell them the same thing ok.Contact me immediately 
and I will let you know how far I have gone with The arrangement.

I
 will secure the clearance Certificate that will be tagged on the boxes 
which I will dispatch along with the security inner Keys of the 
consignments to enable you access these consignments as soon as it 
delivered to you.This clearance instruments will make it pass every 
custom checkpoint all over the world without hitch.

Confirm the 
receipt of this message and send the requirements to me Immediately you 
receive this message.Please I need your urgent reply because the boxes 
are schedule to airlift as soon as we hear from you. Contact me 
immediately you receive this message.

Best Regards,

From:Frank Amadi
Director Computer Telex Dept
Central bank of Nigeria
Email:frankamad...@yahoo.com

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] pwclient XML errors and form feeds

2017-08-15 Thread Aaron Conole
Hi Mark,

Mark Michelson  writes:

> Earlier today on IRC I brought up an error I had while attempting to use
> pwclient to get a patch from patchwork. Here [1] is the pastebin I linked
> to. At the time, I thought it was due to HTTP chunked encoding being used
> and the xmlrpclib or HTTP client library not handling that properly.
>
> I debugged the problem further and I now have figured out that the problem
> has to do with a form feed character being present in the patch. The
> Patchwork server places the patch text inside an XML 1.0 document; however,
> form-feeds are illegal in XML 1.0 [2]. This makes the use of form feeds and
> the use of pwclient incompatible.
>
> Aaron Conole pointed out that once Patchwork 2.0 is available, the git-pw
> command will render pwclient obsolete. I checked, and git-pw will use
> Patchwork 2.0's REST API (which is JSON-based) instead of XMLRPC, so this
> is indeed only a temporary problem.
>
> In the meantime, is there some other command line tool OVS devs use instead
> of pwclient for getting patches?

I use curl.  I guess pwclient could probably be patched to fix this
issue.  However, I think the xmlrpc interface isn't as nice as the
RESTful api being implemented by patchwork 2.0.

> Also also, can someone inform me why the coding guidelines encourage using
> form feeds? Do they aid in documentation generation? Do people use editors
> that render them in some meaningful way? Searching git logs and the mailing
> list archives didn't give me anything to go on.

Time to get all old-timey and preachy ;-).

Form feed, as a control mechanism, is a bit outdated.  It was really
useful in the 80s and 90s, because:

  1. Printers understood it
  2. Editors at the time understood that backward-page and forward-page
 should use the form feed as a pagination delimiter.
  3. Having lots of smaller files was more problematic than a few large,
 sectioned files because dependency tracking wasn't quite there.
  4. People knew why the convention of form feed was used.

Nowadays, most people don't know why projects (especially older projects
such as emacs) use the convention, especially since modern editors are
really good at contextual help, direct jump to any kinds of symbols, and
autocompletion (features that didn't exist in a really usable fashion
until the early- to mid-2000s).

For those folks who do use editors which understand (such as emacs, and
vi/vim), you can easily jump between logical sections of a file.  For
instance, it might be customary on some projects to have the file laid
out:

- >8 -
// This is the header
// it includes crazy things that cvs will insert
// such as $author: $
// and $log: $



#include 
#include 
...
#include 



static const int foo = 0xdeadbeef;
static const int bar = 0x1baddad0;
static const int baz = 0xdbdbdbdb;



static long int ffoo() {
...
}

static long int fbar() {
...
}

...



MODULE_INIT(__init_fn);
MODULE_NAME("SomeModule");
- >8 -

And (for instance) with emacs, pressing 'C-x ]' allows skipping between
the various 'logical' sections, while 'C-x [' skips backwards
similarly.  For vi / vim, I'm not sure.  I thought ']]' and '[[' were
the delimiter search keys, but the delimiters they use are opening
brace at the beginning of a line.  And other editors... well.. sorry.

It's a convention that doesn't bother me, but I don't think is really
well known anymore (and will cause problems when we try to use some
tools which don't understand the convention - as you see).

Hope this helps answer the question :-)

> Thanks,
> Mark Michelson
>
> [1] https://paste.fedoraproject.org/paste/gE6neR6-Yf19YnO7hM576g
> [2] https://www.w3.org/TR/REC-xml/#charsets
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] ovn: Support for taas(tap-as-a-service) function

2017-08-15 Thread wang . qianyu
Taas was designed to provide tenants and service providers a means of 
monitoring the traffic flowing in their Neutron provisioned virtual 
networks. It is useful for network trouble-shooting, security and 
analytics. The taas presentations could be found from 
https://github.com/openstack/tap-as-a-service/blob/master/doc/source/presentations.rst
, and the api reference could be found from 
https://github.com/openstack/tap-as-a-service/blob/master/API_REFERENCE.rst

To support taas function, this patch add two type of logica_switch_port, 
"mirror" and "taas". port with type "mirror" is used as inport for monitor 
flow in logica_switch, and port with type "taas" is used as outport for 
monitor flow in logica_switch.

The ovn-controller will make the relations of the ports in tap_service and 
tap_flow to mirror port and taas port.

Signed-off-by: wang qianyu 
---
 ovn/controller/binding.c|  12 ++
 ovn/controller/ovn-controller.c |   2 +
 ovn/controller/physical.c   | 185 +-
 ovn/lib/logical-fields.c|   4 +
 ovn/lib/logical-fields.h|   4 +
 ovn/northd/ovn-northd.c | 329 

 ovn/ovn-nb.xml  |  69 +
 7 files changed, 603 insertions(+), 2 deletions(-)

diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
index 32309e9..fc74ea0 100644
--- a/ovn/controller/binding.c
+++ b/ovn/controller/binding.c
@@ -437,6 +437,18 @@ consider_local_datapath(struct controller_ctx *ctx,
  * for them. */
 sset_add(local_lports, binding_rec->logical_port);
 our_chassis = false;
+} else if (!strcmp(binding_rec->type, "mirror")) {
+add_local_datapath(ctx, binding_rec->datapath,
+   false, local_datapaths);
+} else if (!strcmp(binding_rec->type, "taas")) {
+const char *target_port_name = smap_get(_rec->options,
+  "target-port");
+if (target_port_name &&
+sset_contains(local_lports, target_port_name)) {
+our_chassis = true;
+}
+add_local_datapath(ctx, binding_rec->datapath,
+   false, local_datapaths);
 }
 
 if (ctx->ovnsb_idl_txn) {
diff --git a/ovn/controller/ovn-controller.c 
b/ovn/controller/ovn-controller.c
index e2c9652..0a148e4 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -150,6 +150,8 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
 struct ovsdb_idl_condition mg = OVSDB_IDL_CONDITION_INIT();
 struct ovsdb_idl_condition dns = OVSDB_IDL_CONDITION_INIT();
 sbrec_port_binding_add_clause_type(, OVSDB_F_EQ, "patch");
+sbrec_port_binding_add_clause_type(, OVSDB_F_EQ, "mirror");
+sbrec_port_binding_add_clause_type(, OVSDB_F_EQ, "taas");
 /* XXX: We can optimize this, if we find a way to only monitor
  * ports that have a Gateway_Chassis that point's to our own
  * chassis */
diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index df71979..7b55b04 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -291,9 +291,100 @@ load_logical_ingress_metadata(const struct 
sbrec_port_binding *binding,
 }
 
 static void
+taas_port_handle(struct controller_ctx *ctx,
+ const struct sbrec_port_binding *binding,
+ struct ofpbuf *ofpacts_p,
+ struct hmap *flow_table,
+ uint32_t dp_key,
+ uint32_t port_key)
+{
+const char *target_port_name = smap_get(>options,
+  "target-port");
+if (!target_port_name) {
+VLOG_INFO("taas port %s not configure target-port",
+ binding->logical_port);
+return;
+}
+const struct sbrec_port_binding *target_port = lport_lookup_by_name(
+ctx->ovnsb_idl, target_port_name);
+if (!target_port) {
+VLOG_INFO("can not find target port %s in this switch",
+target_port_name);
+return;
+}
+
+ofp_port_t ofport = u16_to_ofp(simap_get(_to_ofport,
+  target_port_name));
+if (!ofport) {
+VLOG_INFO("can not find ofport of %s in this switch",
+target_port_name);
+return;
+}
+struct match match;
+
+/* Table 33, priority 100.
+ * ===
+ *
+ * Implements output to local hypervisor.  Each flow matches a
+ * logical output port on the local hypervisor, and resubmits to
+ * table 34.
+ */
+match_init_catchall();
+ofpbuf_clear(ofpacts_p);
+match_set_metadata(, htonll(dp_key));
+match_set_reg(, MFF_LOG_OUTPORT - MFF_REG0, port_key);
+
+put_load(1, MFF_LOG_FLAGS, MLF_CLONED_FLOW_BIT, 1, ofpacts_p);
+/* Resubmit to table 34. */
+put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts_p);
+

Re: [ovs-dev] [PATCH 3/4] tc: Add header rewrite using tc pedit action

2017-08-15 Thread Paul Blakey



On 15/08/2017 00:56, Joe Stringer wrote:

On 8 August 2017 at 07:21, Roi Dayan  wrote:

From: Paul Blakey 

To be later used to implement ovs action set offloading.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
---


Hi Paul and folks, some questions inline.

utilities/checkpatch.py says:

$ utilities/checkpatch.py -4
== Checking 00ecb482f5d1 ("compat: Add act_pedit compatibility for old
kernels") ==
Lines checked: 127, no obvious problems found

== Checking 7503746718f6 ("odp-util: Expose ovs flow key attr len
table for reuse") ==
Lines checked: 195, no obvious problems found

== Checking 1e8ab68aefe1 ("tc: Add header rewrite using tc pedit action") ==
ERROR: Improper whitespace around control block
#359 FILE: lib/tc.c:480:
NL_ATTR_FOR_EACH(nla, left, keys_ex, keys_ex_size) {

Lines checked: 721, Warnings: 0, Errors: 1

== Checking f6b52ce504c2 ("netdev-tc-offloads: Add support for action set") ==
ERROR: Improper whitespace around control block
#908 FILE: lib/netdev-tc-offloads.c:590:
NL_ATTR_FOR_EACH_UNSAFE(set_attr, set_left, set, set_len) {

Lines checked: 981, Warnings: 0, Errors: 1




+static struct flower_key_to_pedit flower_pedit_map[] = {
+[offsetof(struct tc_flower_key, ipv4.ipv4_src)] = {
+TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
+12,
+MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_src)
+},


Why are these items indexed by the offset, and not just an array we
iterate? It seems like the only places where we iterate this, we use
"for (i=0; ... i++)", which means that we iterate several times across
empty items, and this array is sparsely populated.
Right we can make this one not sparse as there is no direct access to 
some index. Or some of the suggestions on the other patch's map (like 
creating a fast map for both put and dump paths).





+[offsetof(struct tc_flower_key, ipv4.ipv4_dst)] = {
+TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
+16,
+MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_dst)
+},
+[offsetof(struct tc_flower_key, ipv4.rewrite_ttl)] = {
+TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
+8,
+MEMBER_SIZEOF(struct tc_flower_key, ipv4.rewrite_ttl)
+},





@@ -346,6 +425,96 @@ nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower 
*flower) {
  }
  }

+static const struct nl_policy pedit_policy[] = {
+[TCA_PEDIT_PARMS_EX] = { .type = NL_A_UNSPEC,
+ .min_len = sizeof(struct tc_pedit),
+ .optional = false, },
+[TCA_PEDIT_KEYS_EX]   = { .type = NL_A_NESTED,
+  .optional = false, },
+};
+
+static int
+nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower)
+{
+struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)];
+const struct tc_pedit *pe;
+const struct tc_pedit_key *keys;
+const struct nlattr *nla, *keys_ex, *ex_type;
+const void *keys_attr;
+char *rewrite_key = (void *) >rewrite.key;
+char *rewrite_mask = (void *) >rewrite.mask;


These are actually 'struct tc_flower_key', shouldn't we use the actual
types? (I realise there is pointer arithmetic below, but usually we
restrict the usage of void casting and pointer arithmetic as much as
possible).



Yes, It's for the pointer arithmetic. (void *) cast was for the
clangs warning "cast increases required alignment of target type"
which we couldn't find another way to suppress.
I don't think alignments an issue here.



+size_t keys_ex_size, left;
+int type, i = 0;
+
+if (!nl_parse_nested(options, pedit_policy, pe_attrs,
+ ARRAY_SIZE(pedit_policy))) {
+VLOG_ERR_RL(_rl, "failed to parse pedit action options");
+return EPROTO;
+}
+
+pe = nl_attr_get_unspec(pe_attrs[TCA_PEDIT_PARMS_EX], sizeof *pe);
+keys = pe->keys;
+keys_attr = pe_attrs[TCA_PEDIT_KEYS_EX];
+keys_ex = nl_attr_get(keys_attr);
+keys_ex_size = nl_attr_get_size(keys_attr);
+
+NL_ATTR_FOR_EACH(nla, left, keys_ex, keys_ex_size) {
+if (i >= pe->nkeys) {
+break;
+}
+
+if (nl_attr_type(nla) == TCA_PEDIT_KEY_EX) {
+ex_type = nl_attr_find_nested(nla, TCA_PEDIT_KEY_EX_HTYPE);
+type = nl_attr_get_u16(ex_type);
+
+for (int j = 0; j < ARRAY_SIZE(flower_pedit_map); j++) {
+struct flower_key_to_pedit *m = _pedit_map[j];
+int flower_off = j;
+int sz = m->size;
+int mf = m->offset;
+
+if (!sz || m->htype != type) {
+   continue;
+}


If flower_pedit_map was just a regular array and the offset was
included in 'struct flower_key_to_pedit', then I don't think we need
the above check?


+
+/* check overlap between current pedit key, which is always
+ * 4 bytes (range [off, off + 3]), 

Re: [ovs-dev] [PATCH 4/4] netdev-tc-offloads: Add support for action set

2017-08-15 Thread Paul Blakey



On 15/08/2017 10:28, Paul Blakey wrote:



On 15/08/2017 04:04, Joe Stringer wrote:

On 8 August 2017 at 07:21, Roi Dayan  wrote:

From: Paul Blakey 

Implement support for offloading ovs action set using
tc header rewrite action.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
---




@@ -274,6 +346,48 @@ netdev_tc_flow_dump_destroy(struct 
netdev_flow_dump *dump)

  return 0;
  }

+static void
+parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf,
+   struct tc_flower *flower)
+{
+char *mask = (char *) >rewrite.mask;
+char *data = (char *) >rewrite.key;
+
+for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) {
+char *put = NULL;
+size_t nested = 0;
+int len = ovs_flow_key_attr_lens[type].len;
+
+if (len <= 0) {
+continue;
+}
+
+for (int j = 0; j < ARRAY_SIZE(set_flower_map[type]); j++) {
+struct netlink_field *f = _flower_map[type][j];
+
+if (!f->size) {
+break;
+}


Seems like maybe there's similar feedback here to the previous patch
wrt sparsely populated array set_flower_map[].



I'll answer there.


Scratch that, I actually meant to write it here.

With this map we have fast (direct lookup) access on the put path 
(parse_put_flow_set_masked_action) , and slow access on the reverse dump 
path (parse_flower_rewrite_to_netlink_action) which iterates over all 
indices, although it should skips them  fast if they are empty.


Changing this to sparse map will slow both of the paths, but dumping 
would be faster. We didn't write this maps for performance but for 
convince of adding new supported attributes.


What we can do to both maps is:

1) Using ovs thread once, we can create a fast (direct lookup) access 
map for both paths - generating a reverse map at init time and using 
that instead for dumping.

2) Rewrite it in a non sparse way, use it for both.
3) Rewrite it using a switch case, the logic for now is pretty much the 
same for all attributes.


What do you think?






+
+if (!is_all_zeros(mask + f->flower_offset, f->size)) {
+if (!put) {
+nested = nl_msg_start_nested(buf,
+ 
OVS_ACTION_ATTR_SET_MASKED);

+put = nl_msg_put_unspec_zero(buf, type, len * 2);


Do we ever have multiple of these attributes in a single nested
OVS_ACTION_ATTR_SET_MASKED? If so, won't the following memcpys avoid
putting the netlink header for the subsequent sets?



  OVS_ACTION_ATTR_SET_MASKED always has a single nested OVS_KEY_ATTR_* 
attribute so we put it (all zeros) the first time we find a sub 
attribute (e.g ipv4_dst of ipv4) that isn't zero and use memcpy below to 
overwrite the non zero sub attributes.



+}
+
+memcpy(put + f->offset, data + f->flower_offset, 
f->size);

+memcpy(put + len + f->offset,
+   mask + f->flower_offset, f->size);


Could we these combine with the nl_msg_put_unspec() above?



We'd have to accumulate the data as we go over the inner loop and then 
write it.
We could, for example, create a stack buffer like char 
buff[MAX_ATTR_LEN] (kinda ugly), write it there, and if we wrote 
something to buff, flush it to a nested attribute in a single write.
Would that be better? It might be less readable and it is less flexible 
(defining of MAX_ATTR_LEN).









___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 4/4] netdev-tc-offloads: Add support for action set

2017-08-15 Thread Paul Blakey



On 15/08/2017 04:04, Joe Stringer wrote:

On 8 August 2017 at 07:21, Roi Dayan  wrote:

From: Paul Blakey 

Implement support for offloading ovs action set using
tc header rewrite action.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
---





@@ -274,6 +346,48 @@ netdev_tc_flow_dump_destroy(struct netdev_flow_dump *dump)
  return 0;
  }

+static void
+parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf,
+   struct tc_flower *flower)
+{
+char *mask = (char *) >rewrite.mask;
+char *data = (char *) >rewrite.key;
+
+for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) {
+char *put = NULL;
+size_t nested = 0;
+int len = ovs_flow_key_attr_lens[type].len;
+
+if (len <= 0) {
+continue;
+}
+
+for (int j = 0; j < ARRAY_SIZE(set_flower_map[type]); j++) {
+struct netlink_field *f = _flower_map[type][j];
+
+if (!f->size) {
+break;
+}


Seems like maybe there's similar feedback here to the previous patch
wrt sparsely populated array set_flower_map[].



I'll answer there.


+
+if (!is_all_zeros(mask + f->flower_offset, f->size)) {
+if (!put) {
+nested = nl_msg_start_nested(buf,
+ OVS_ACTION_ATTR_SET_MASKED);
+put = nl_msg_put_unspec_zero(buf, type, len * 2);


Do we ever have multiple of these attributes in a single nested
OVS_ACTION_ATTR_SET_MASKED? If so, won't the following memcpys avoid
putting the netlink header for the subsequent sets?



 OVS_ACTION_ATTR_SET_MASKED always has a single nested OVS_KEY_ATTR_* 
attribute so we put it (all zeros) the first time we find a sub 
attribute (e.g ipv4_dst of ipv4) that isn't zero and use memcpy below to 
overwrite the non zero sub attributes.



+}
+
+memcpy(put + f->offset, data + f->flower_offset, f->size);
+memcpy(put + len + f->offset,
+   mask + f->flower_offset, f->size);


Could we these combine with the nl_msg_put_unspec() above?



We'd have to accumulate the data as we go over the inner loop and then 
write it.
We could, for example, create a stack buffer like char 
buff[MAX_ATTR_LEN] (kinda ugly), write it there, and if we wrote 
something to buff, flush it to a nested attribute in a single write.
Would that be better? It might be less readable and it is less flexible 
(defining of MAX_ATTR_LEN).








___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [patch_v3 3/3] tests: Enhance the pmd stats test.

2017-08-15 Thread Darrell Ball
The pmd stats test is enhanced to include megaflow stats
counting and checking.

Also, miss and lost stats are annotated to make it clear
what they mean.

Signed-off-by: Darrell Ball 
---
 tests/pmd.at | 31 +--
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/tests/pmd.at b/tests/pmd.at
index b6732ea..1faa977 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -174,9 +174,9 @@ AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed 
SED_NUMA_CORE_PATTERN | se
 pmd thread numa_id  core_id :
emc hits:0
megaflow hits:0
-   avg. subtable lookups per hit:0.00
-   miss:0
-   lost:0
+   avg. subtable lookups per megaflow hit:0.00
+   miss(upcall done):0
+   lost(upcall not done):0
 ])
 
 ovs-appctl time/stop
@@ -201,9 +201,28 @@ AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed 
SED_NUMA_CORE_PATTERN | se
 pmd thread numa_id  core_id :
emc hits:19
megaflow hits:0
-   avg. subtable lookups per hit:0.00
-   miss:1
-   lost:0
+   avg. subtable lookups per megaflow hit:0.00
+   miss(upcall done):1
+   lost(upcall not done):0
+])
+
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:emc-insert-inv-prob=0])
+(
+for i in `seq 0 5`;
+do
+
pkt="in_port(7),eth(src=50:54:00:00:00:77,dst=50:54:00:00:01:78),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.1.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)"
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 $pkt])
+done
+)
+ovs-appctl time/warp 100
+
+AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN | 
sed '/cycles/d' | grep pmd -A 5], [0], [dnl
+pmd thread numa_id  core_id :
+   emc hits:19
+   megaflow hits:6
+   avg. subtable lookups per megaflow hit:1.00
+   miss(upcall done):1
+   lost(upcall not done):0
 ])
 
 OVS_VSWITCHD_STOP
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [patch_v3 2/3] dpif-netdev: Refactor some pmd stats.

2017-08-15 Thread Darrell Ball
The per packets stats are presently overlapping in that
miss stats include lost stats; make these stats non-overlapping
for clarity and make this clear in the dp_stat_type enum.  This
also eliminates the need to increment two 'miss' stats for a
single packet.

The subtable lookup stats is renamed to make it
clear that it relates to masked lookups.

The stats that total to the number of packets seen are defined
in dp_stat_type and an api is created to total the stats in case
these stats are further divided in the future.

Signed-off-by: Darrell Ball 
---
 lib/dpif-netdev.c | 78 ++-
 1 file changed, 49 insertions(+), 29 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 17e1666..4aa967a 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -323,12 +323,19 @@ static struct dp_netdev_port *dp_netdev_lookup_port(const 
struct dp_netdev *dp,
 OVS_REQUIRES(dp->port_mutex);
 
 enum dp_stat_type {
-DP_STAT_EXACT_HIT,  /* Packets that had an exact match (emc). */
-DP_STAT_MASKED_HIT, /* Packets that matched in the flow table. */
-DP_STAT_MISS,   /* Packets that did not match. */
-DP_STAT_LOST,   /* Packets not passed up to the client. */
-DP_STAT_LOOKUP_HIT, /* Number of subtable lookups for flow table
-   hits */
+DP_STAT_EXACT_HIT,  /* Packets that had an exact match (emc). */
+DP_STAT_MASKED_HIT, /* Packets that matched in the flow table. */
+DP_STAT_MISS,   /* Packets that did not match and upcall was
+   done. */
+DP_STAT_LOST,   /* Packets that did not match and upcall was
+   not done. */
+DP_N_PER_PKT_CNT,   /* The above statistics account for the total
+   number of packets seen and should not be
+   overlapping with each other. */
+DP_STAT_MASKED_LOOKUP = DP_N_PER_PKT_CNT,  /* Number of subtable lookups
+  for flow table hits. Each
+  MASKED_HIT hit will have
+  >= 1 MASKED_LOOKUP(s). */
 DP_N_STATS
 };
 
@@ -749,13 +756,22 @@ enum pmd_info_type {
 PMD_INFO_SHOW_RXQ /* Show poll-lists of pmd threads. */
 };
 
+static unsigned long long
+dp_netdev_calcul_total_packets(unsigned long long *stats)
+{
+unsigned long long total_packets = 0;
+for (uint8_t i = 0; i < DP_N_PER_PKT_CNT; i++) {
+total_packets += stats[i];
+}
+return total_packets;
+}
+
 static void
 pmd_info_show_stats(struct ds *reply,
 struct dp_netdev_pmd_thread *pmd,
 unsigned long long stats[DP_N_STATS],
 uint64_t cycles[PMD_N_CYCLES])
 {
-unsigned long long total_packets;
 uint64_t total_cycles = 0;
 int i;
 
@@ -773,10 +789,6 @@ pmd_info_show_stats(struct ds *reply,
 }
 }
 
-/* Sum of all the matched and not matched packets gives the total.  */
-total_packets = stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT]
-+ stats[DP_STAT_MISS];
-
 for (i = 0; i < PMD_N_CYCLES; i++) {
 if (cycles[i] > pmd->cycles_zero[i]) {
cycles[i] -= pmd->cycles_zero[i];
@@ -800,11 +812,12 @@ pmd_info_show_stats(struct ds *reply,
 
 ds_put_format(reply,
   "\temc hits:%llu\n\tmegaflow hits:%llu\n"
-  "\tavg. subtable lookups per hit:%.2f\n"
-  "\tmiss:%llu\n\tlost:%llu\n",
+  "\tavg. subtable lookups per megaflow hit:%.2f\n"
+  "\tmiss(upcall done):%llu\n\tlost(upcall not done):%llu\n",
   stats[DP_STAT_EXACT_HIT], stats[DP_STAT_MASKED_HIT],
   stats[DP_STAT_MASKED_HIT] > 0
-  ? (1.0*stats[DP_STAT_LOOKUP_HIT])/stats[DP_STAT_MASKED_HIT]
+  ? (1.0 * stats[DP_STAT_MASKED_LOOKUP])
+ / stats[DP_STAT_MASKED_HIT]
   : 0,
   stats[DP_STAT_MISS], stats[DP_STAT_LOST]);
 
@@ -820,6 +833,9 @@ pmd_info_show_stats(struct ds *reply,
   cycles[PMD_CYCLES_PROCESSING],
   cycles[PMD_CYCLES_PROCESSING] / (double)total_cycles * 100);
 
+/* Sum of all the matched and not matched packets gives the total.  */
+unsigned long long total_packets =
+ dp_netdev_calcul_total_packets(stats);
 if (total_packets == 0) {
 return;
 }
@@ -4724,18 +4740,18 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
 return dp_packet_batch_size(packets_);
 }
 
-static inline void
+static inline int
 handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
  struct dp_packet *packet,
  const struct netdev_flow_key *key,
  struct ofpbuf *actions, struct ofpbuf