[ovs-dev] [PATCH 5/5] lib/netdev-dpdk: copy large packet to multi-segment mbufs

2017-06-18 Thread Michael Qiu
From: Michael Qiu 

Currently, one packet is only copied to one segment
in function dpdk_do_tx_copy(),  this could be an issue
when a jumbo frame comes, especially for multiple segments.

This patch calculate the segment number needed by the packet and
copy the data to different segments.

Signed-off-by: Michael Qiu 
---
 lib/netdev-dpdk.c | 53 -
 1 file changed, 48 insertions(+), 5 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 0485872..38ec2ed 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1776,14 +1776,16 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 #endif
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
+struct rte_mbuf *temp, *head = NULL;
 int dropped = 0;
 int newcnt = 0;
-int i;
+int i, j, nb_segs;
 
 dp_packet_batch_apply_cutlen(batch);
 
 for (i = 0; i < batch->count; i++) {
 int size = dp_packet_size(batch->packets[i]);
+int max_data_len, tmp_len;
 
 if (OVS_UNLIKELY(size > dev->max_packet_len)) {
 VLOG_WARN_RL(, "Too big size %d max_packet_len %d",
@@ -1793,7 +1795,24 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 continue;
 }
 
-pkts[newcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
+temp = pkts[newcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
+
+/* all new allocated mbuf's max data len is the same */
+max_data_len = temp->buf_len - temp->data_off;
+
+nb_segs = size/max_data_len;
+if (size % max_data_len)
+nb_segs = nb_segs + 1;
+
+for (j = 1; j < nb_segs; j++) {
+temp->next = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
+if (!temp->next) {
+rte_pktmbuf_free(pkts[newcnt]);
+pkts[newcnt] = NULL;
+break;
+}
+temp = temp->next;
+}
 
 if (!pkts[newcnt]) {
 dropped += batch->count - i;
@@ -1801,10 +1820,34 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 }
 
 /* We have to do a copy for now */
-memcpy(rte_pktmbuf_mtod(pkts[newcnt], void *),
-   dp_packet_data(batch->packets[i]), size);
+rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
+temp = pkts[newcnt];
+tmp_len = size < max_data_len ? size: max_data_len;
+if (batch->packets[i]->source == DPBUF_DPDK) {
+head = &(batch->packets[i]->mbuf);
+while (temp && head && size > 0) {
+rte_memcpy(rte_pktmbuf_mtod(temp, void*), 
dp_packet_data((struct dp_packet *)head),tmp_len);
+rte_pktmbuf_data_len(temp) = tmp_len;
+head = head->next;
+size = size - tmp_len;
+tmp_len =  size < max_data_len ? size: max_data_len;
+temp = temp->next;
+}
+} else {
+int offset = 0;
+while (temp && size > 0) {
+memcpy(rte_pktmbuf_mtod(temp, void *),
+dp_packet_at(batch->packets[i], offset,tmp_len), tmp_len);
+rte_pktmbuf_data_len(temp) = tmp_len;
+temp = temp->next;
+size = size - tmp_len;
+offset +=tmp_len;
+tmp_len =  size < max_data_len ? size: max_data_len;
+}
+}
+
 
-pkts[newcnt]->nb_segs = batch->packets[i]->mbuf.nb_segs;
+pkts[newcnt]->nb_segs = nb_segs;
 pkts[newcnt]->ol_flags = batch->packets[i]->mbuf.ol_flags;
 pkts[newcnt]->packet_type = batch->packets[i]->mbuf.packet_type;
 pkts[newcnt]->tx_offload = batch->packets[i]->mbuf.tx_offload;
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 4/5] lib/dp-packet: copy multi-segments data from DPDK mbuf

2017-06-18 Thread Michael Qiu
From: Michael Qiu 

When doing packet clone, if packet source is from DPDK driver,
multi-segment must be considered, and copy the segment's
data one by one.

Signed-off-by: Michael Qiu 
---
 lib/dp-packet.c | 27 ---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 9f872a1..278706e 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -167,9 +167,30 @@ dp_packet_clone_with_headroom(const struct dp_packet 
*buffer, size_t headroom)
 {
 struct dp_packet *new_buffer;
 
-new_buffer = dp_packet_clone_data_with_headroom(dp_packet_data(buffer),
- dp_packet_size(buffer),
- headroom);
+uint32_t size = dp_packet_size(buffer);
+
+/* copy multi-seg data */
+#ifdef DPDK_NETDEV
+if (buffer->source == DPBUF_DPDK && buffer->mbuf.nb_segs > 1) {
+uint32_t off_set = 0;
+void *dst = NULL;
+struct rte_mbuf *tmbuf = CONST_CAST(struct rte_mbuf *, 
&(buffer->mbuf));
+
+new_buffer = dp_packet_new_with_headroom(size, headroom);
+dst = dp_packet_put_uninit(new_buffer, size);
+
+while (tmbuf) {
+rte_memcpy((char *)dst + off_set,
+   rte_pktmbuf_mtod(tmbuf, void *), tmbuf->data_len);
+off_set += tmbuf->data_len;
+tmbuf = tmbuf->next;
+}
+}
+else
+#endif
+new_buffer = dp_packet_clone_data_with_headroom(dp_packet_data(buffer),
+size, headroom);
+
 new_buffer->l2_pad_size = buffer->l2_pad_size;
 new_buffer->l2_5_ofs = buffer->l2_5_ofs;
 new_buffer->l3_ofs = buffer->l3_ofs;
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/5] lib/dp-packet: copy additional packet info when do packet copy

2017-06-18 Thread Michael Qiu
From: Michael Qiu 

Currently, when doing packet copy, lots of DPDK mbuf's info
will be missed, like packet type, ol_flags, etc.
Those information is very important for DPDK to do
packets processing.

Signed-off-by: Michael Qiu 
---
 lib/dp-packet.c   | 3 +++
 lib/netdev-dpdk.c | 4 
 2 files changed, 7 insertions(+)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index ee2c449..9f872a1 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -179,6 +179,9 @@ dp_packet_clone_with_headroom(const struct dp_packet 
*buffer, size_t headroom)
 new_buffer->packet_type = buffer->packet_type;
 #ifdef DPDK_NETDEV
 new_buffer->mbuf.ol_flags = buffer->mbuf.ol_flags;
+new_buffer->mbuf.tx_offload = buffer->mbuf.tx_offload;
+new_buffer->mbuf.packet_type = buffer->mbuf.packet_type;
+new_buffer->mbuf.nb_segs = buffer->mbuf.nb_segs;
 #else
 new_buffer->rss_hash_valid = buffer->rss_hash_valid;
 #endif
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index bba4de3..0485872 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1804,6 +1804,10 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 memcpy(rte_pktmbuf_mtod(pkts[newcnt], void *),
dp_packet_data(batch->packets[i]), size);
 
+pkts[newcnt]->nb_segs = batch->packets[i]->mbuf.nb_segs;
+pkts[newcnt]->ol_flags = batch->packets[i]->mbuf.ol_flags;
+pkts[newcnt]->packet_type = batch->packets[i]->mbuf.packet_type;
+pkts[newcnt]->tx_offload = batch->packets[i]->mbuf.tx_offload;
 rte_pktmbuf_data_len(pkts[newcnt]) = size;
 rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
 
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 0/5 v3] DPDK multi-segment mbuf support

2017-06-18 Thread Michael Qiu
From: Michael Qiu 


Currently, OVS only support DPDK single segment mbuf,
it could lead problems, like a large non-DPDK source
packet transmit to dpdk port.

Also, OVS doesn't copy enough info in mbuf when do
packet copy.

At the same time, vlan and tunnelling packet's DPDK
offloads, for example TSO, needs multi-segment mbuf's
support.

This patchset solved all above issues.

--
v3 --> v2
 rebase code to newest upstream.
 using MIN() to calculate the data_len instead of if/else

v2 --> v1
 rebase code to newest upstream.
 fix some typo in commit log.

Michael Qiu (4):
  lib/dp-packet: init the mbuf to zero when build with DPDK
  lib/dp-packet: Fix data_len issue with multi-segments
  lib/dp-packet: copy multi-segments data from DPDK mbuf
  lib/netdev-dpdk: copy large packet to multi-segment mbufs

suzhengwei (1):
  lib/dp-packet: copy additional packet info when do packet copy

 lib/dp-packet.c   | 33 ++---
 lib/dp-packet.h   | 18 --
 lib/netdev-dpdk.c | 55 +++
 3 files changed, 89 insertions(+), 17 deletions(-)

-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 02/12] openvswitch.h: Use odp_port_t for port numbers in userspace-only structs.

2017-06-18 Thread nickcooper-zhangtonghao
Reviewed-by: nickcooper-zhangtonghao 

> On Jun 19, 2017, at 7:29 AM, Ben Pfaff  wrote:
> 
> Using the correct type reduces the need for type conversions.
> 
> Signed-off-by: Ben Pfaff >
> ---
> datapath/linux/compat/include/linux/openvswitch.h | 4 ++--
> lib/dpif-netdev.c | 2 +-
> lib/netdev.c  | 2 +-
> ofproto/ofproto-dpif-sflow.c  | 2 +-
> ofproto/ofproto-dpif-xlate.c  | 4 ++--
> 5 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/datapath/linux/compat/include/linux/openvswitch.h 
> b/datapath/linux/compat/include/linux/openvswitch.h
> index 4c88de1d610d..24e51cb311d2 100644
> --- a/datapath/linux/compat/include/linux/openvswitch.h
> +++ b/datapath/linux/compat/include/linux/openvswitch.h
> @@ -714,8 +714,8 @@ struct ovs_action_hash {
>  * this header to build final header according to actual packet parameters.
>  */
> struct ovs_action_push_tnl {
> - uint32_t tnl_port;
> - uint32_t out_port;
> + odp_port_t tnl_port;
> + odp_port_t out_port;
>   uint32_t header_len;
>   uint32_t tnl_type; /* For logging. */
>   uint32_t header[TNL_PUSH_HEADER_SIZE / 4];
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 2b65dc74a269..f97e97ab2931 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -4956,7 +4956,7 @@ push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
> 
> data = nl_attr_get(attr);
> 
> -tun_port = pmd_tnl_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
> +tun_port = pmd_tnl_port_cache_lookup(pmd, data->tnl_port);
> if (!tun_port) {
> err = -EINVAL;
> goto error;
> diff --git a/lib/netdev.c b/lib/netdev.c
> index 001b7b37bb57..765bf4b9ccad 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -831,7 +831,7 @@ netdev_push_header(const struct netdev *netdev,
> struct dp_packet *packet;
> DP_PACKET_BATCH_FOR_EACH (packet, batch) {
> netdev->netdev_class->push_header(packet, data);
> -pkt_metadata_init(>md, u32_to_odp(data->out_port));
> +pkt_metadata_init(>md, data->out_port);
> }
> 
> return 0;
> diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
> index d9fddb1564b5..fc665a636853 100644
> --- a/ofproto/ofproto-dpif-sflow.c
> +++ b/ofproto/ofproto-dpif-sflow.c
> @@ -901,7 +901,7 @@ sflow_read_tnl_push_action(const struct nlattr *attr,
> const struct ip_header *ip
> = ALIGNED_CAST(const struct ip_header *, eth + 1);
> 
> -sflow_actions->out_port = u32_to_odp(data->out_port);
> +sflow_actions->out_port = data->out_port;
> 
> /* Ethernet. */
> /* TODO: SFlow does not currently define a MAC-in-MAC
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index e15e3dec3f1c..48c4bad4ac0b 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -3211,8 +3211,8 @@ build_tunnel_send(struct xlate_ctx *ctx, const struct 
> xport *xport,
> if (err) {
> return err;
> }
> -tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port);
> -tnl_push_data.out_port = odp_to_u32(out_dev->odp_port);
> +tnl_push_data.tnl_port = tunnel_odp_port;
> +tnl_push_data.out_port = out_dev->odp_port;
> 
> /* After tunnel header has been added, packet_type of flow and base_flow
>  * need to be set to PT_ETH. */
> -- 
> 2.10.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 01/12] ofp-util: Remove prototype for unimplemented function.

2017-06-18 Thread nickcooper-zhangtonghao

Reviewed-by: nickcooper-zhangtonghao 

> On Jun 19, 2017, at 7:29 AM, Ben Pfaff  wrote:
> 
> Signed-off-by: Ben Pfaff >
> ---
> include/openvswitch/ofp-util.h | 2 --
> 1 file changed, 2 deletions(-)
> 
> diff --git a/include/openvswitch/ofp-util.h b/include/openvswitch/ofp-util.h
> index bbf6ffec5dd3..07723b427ce8 100644
> --- a/include/openvswitch/ofp-util.h
> +++ b/include/openvswitch/ofp-util.h
> @@ -247,8 +247,6 @@ void ofputil_match_to_ofp10_match(const struct match *, 
> struct ofp10_match *);
> enum ofperr ofputil_pull_ofp11_match(struct ofpbuf *, const struct tun_table 
> *,
>  const struct vl_mff_map *, struct match 
> *,
>  uint16_t *padded_match_len);
> -enum ofperr ofputil_pull_ofp11_mask(struct ofpbuf *, struct match *,
> -struct mf_bitmap *bm);
> enum ofperr ofputil_match_from_ofp11_match(const struct ofp11_match *,
>struct match *);
> int ofputil_put_ofp11_match(struct ofpbuf *, const struct match *,
> -- 
> 2.10.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 00/12] Packet type aware pipeline

2017-06-18 Thread Ben Pfaff
On Mon, Jun 19, 2017 at 07:29:29AM +0800, Ben Pfaff wrote:
> This series is based on Zoltan Balogh's series here:
> https://patchwork.ozlabs.org/patch/770490/
> https://patchwork.ozlabs.org/patch/770487/
> https://patchwork.ozlabs.org/patch/770495/
> https://patchwork.ozlabs.org/patch/770498/
> https://patchwork.ozlabs.org/patch/770488/
> https://patchwork.ozlabs.org/patch/770489/
> 
> v1->v2:
>   - Squash fixup patches.
>   - Apply changes agreed with Jan.
>   - Not yet done: Figure out whether to really show packet_type in (some)
> match_format() output.
>   - New patch at the end unsuccessfully tries to re-enable packet-aware
> test.  Either I don't have enough insight yet, or it just reveals a
> bug or two.
>   - 4 new patches at beginning.  First one is trivial.  Next 3 are intended
> to make it easier to debug the packet aware test that is still failing.
> Jan, you don't have to feel obligated to review these if you feel they
> are off-topic; I will get separate reviews.

A new concern came up while thinking about this series.  The
OVS_ATTR_PACKET_TYPE does not appear to be implemented in the kernel
module, and what's more, because of #ifdefs, OVS_ATTR_PACKET_TYPE will
actually have a different value in the kernel module than in userspace.
What's the plan here?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 11/12] userspace: Introduce packet_type in OF 1.5 packet-out

2017-06-18 Thread Ben Pfaff
From: Zoltán Balogh 

Introducing packet_type in OF 1.5 packet-out.
Partly based on Jean Tourrilhes's work.

Add test cases for OF1.5 packet-out
Add negative test case for OF1.5 packet-out
Modify wildcarding and packet-out test printout.

Signed-off-by: Jean Tourrilhes 
Signed-off-by: Zoltan Balogh 
Co-authored-by: Jan Scheurich 
Signed-off-by: Ben Pfaff 
---
 lib/flow.c  | 36 +++-
 lib/ofp-parse.c | 13 +
 lib/ofp-print.c |  4 +-
 lib/ofp-util.c  |  2 +
 ofproto/ofproto.c   |  3 +
 tests/ofproto.at| 85 +
 tests/system-userspace-packet-type-aware.at |  2 +-
 utilities/ovs-ofctl.c   |  1 +
 8 files changed, 128 insertions(+), 18 deletions(-)

diff --git a/lib/flow.c b/lib/flow.c
index dbca4d03da3d..75a91cc6a2f3 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1441,6 +1441,8 @@ void
 flow_wildcards_init_for_packet(struct flow_wildcards *wc,
const struct flow *flow)
 {
+ovs_be16 dl_type = OVS_BE16_MAX;
+
 memset(>masks, 0x0, sizeof wc->masks);
 
 /* Update this function whenever struct flow changes. */
@@ -1493,25 +1495,29 @@ flow_wildcards_init_for_packet(struct flow_wildcards 
*wc,
 /* actset_output wildcarded. */
 
 WC_MASK_FIELD(wc, packet_type);
-WC_MASK_FIELD(wc, dl_dst);
-WC_MASK_FIELD(wc, dl_src);
-WC_MASK_FIELD(wc, dl_type);
-
-/* No need to set mask of inner VLANs that don't exist. */
-for (int i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
-/* Always show the first zero VLAN. */
-WC_MASK_FIELD(wc, vlans[i]);
-if (flow->vlans[i].tci == htons(0)) {
-break;
+if (flow->packet_type == htonl(PT_ETH)) {
+WC_MASK_FIELD(wc, dl_dst);
+WC_MASK_FIELD(wc, dl_src);
+WC_MASK_FIELD(wc, dl_type);
+/* No need to set mask of inner VLANs that don't exist. */
+for (int i = 0; i < FLOW_MAX_VLAN_HEADERS; i++) {
+/* Always show the first zero VLAN. */
+WC_MASK_FIELD(wc, vlans[i]);
+if (flow->vlans[i].tci == htons(0)) {
+break;
+}
 }
+dl_type = flow->dl_type;
+} else {
+dl_type = pt_ns_type_be(flow->packet_type);
 }
 
-if (flow->dl_type == htons(ETH_TYPE_IP)) {
+if (dl_type == htons(ETH_TYPE_IP)) {
 WC_MASK_FIELD(wc, nw_src);
 WC_MASK_FIELD(wc, nw_dst);
 WC_MASK_FIELD(wc, ct_nw_src);
 WC_MASK_FIELD(wc, ct_nw_dst);
-} else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
+} else if (dl_type == htons(ETH_TYPE_IPV6)) {
 WC_MASK_FIELD(wc, ipv6_src);
 WC_MASK_FIELD(wc, ipv6_dst);
 WC_MASK_FIELD(wc, ipv6_label);
@@ -1523,15 +1529,15 @@ flow_wildcards_init_for_packet(struct flow_wildcards 
*wc,
 WC_MASK_FIELD(wc, ct_ipv6_src);
 WC_MASK_FIELD(wc, ct_ipv6_dst);
 }
-} else if (flow->dl_type == htons(ETH_TYPE_ARP) ||
-   flow->dl_type == htons(ETH_TYPE_RARP)) {
+} else if (dl_type == htons(ETH_TYPE_ARP) ||
+   dl_type == htons(ETH_TYPE_RARP)) {
 WC_MASK_FIELD(wc, nw_src);
 WC_MASK_FIELD(wc, nw_dst);
 WC_MASK_FIELD(wc, nw_proto);
 WC_MASK_FIELD(wc, arp_sha);
 WC_MASK_FIELD(wc, arp_tha);
 return;
-} else if (eth_type_mpls(flow->dl_type)) {
+} else if (eth_type_mpls(dl_type)) {
 for (int i = 0; i < FLOW_MAX_MPLS_LABELS; i++) {
 WC_MASK_FIELD(wc, mpls_lse[i]);
 if (flow->mpls_lse[i] & htonl(MPLS_BOS_MASK)) {
diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index 8e2448b20dbd..528b75b4f4e1 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -667,6 +667,19 @@ parse_ofp_packet_out_str__(struct ofputil_packet_out *po, 
char *string,
 goto out;
 }
 match_set_in_port(>flow_metadata, in_port);
+} else if (!strcmp(name, "packet_type")) {
+char *ns = value;
+char *ns_type = strstr(value, ",");
+if (ns_type) {
+ovs_be32 packet_type;
+*ns_type = '\0';
+packet_type = PACKET_TYPE_BE(strtoul(ns, NULL, 0),
+ strtoul(++ns_type, NULL, 0));
+match_set_packet_type(>flow_metadata, packet_type);
+} else {
+error = xasprintf("%s(%s) can't be interpreted", name, value);
+goto out;
+}
 } else if (!strcmp(name, "packet")) {
 const char *error_msg = eth_from_hex(value, );
 if (error_msg) {
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 8a6c54e1da0f..4370cb5221fc 100644
--- a/lib/ofp-print.c
+++ 

[ovs-dev] [PATCH v2 07/12] userspace: Add OXM field MFF_PACKET_TYPE

2017-06-18 Thread Ben Pfaff
From: Jan Scheurich 

Allow packet type namespace OFPHTN_ETHERTYPE as alternative pre-requisite
for matching L3 protocols (MPLS, IP, IPv6, ARP etc).

Change the meta-flow definition of packet_type field to use the new
custom format MFS_PACKET_TYPE representing "(NS,NS_TYPE)".

Parsing routine for MFS_PACKET_TYPE added to meta-flow.c. Formatting
routine for field packet_type extracted from match_format() and moved to
flow.c to be used from meta-flow.c for formatting MFS_PACKET_TYPE.

Updated the ovs-fields man page source meta-flow.xml with documentation
for packet-type-aware bridges and added documentation for field packet_type.

Added packet_type to the matching properties in tests/ofproto.at. Should be
removed later, when packet_type_aware bridge attribute will be introduced.

Signed-off-by: Jan Scheurich 
Signed-off-by: Ben Pfaff 
---
 build-aux/extract-ofp-fields|   3 +-
 include/openvswitch/match.h |   5 +
 include/openvswitch/meta-flow.h |  20 
 lib/flow.c  |  34 +-
 lib/flow.h  |  27 +++--
 lib/learn.c |   1 +
 lib/match.c |  98 +++--
 lib/meta-flow.c |  86 +--
 lib/meta-flow.xml   | 156 +++
 lib/nx-match.c  |  34 +-
 lib/odp-util.c  |  38 +++
 lib/ofp-parse.c |  12 +++
 lib/ofp-util.c  |  67 +---
 ofproto/ofproto-dpif-xlate.c|   1 +
 ofproto/tunnel.c|   2 -
 tests/dpif-netdev.at|  89 
 tests/odp.at|   1 +
 tests/ofproto-dpif.at   | 230 
 tests/ofproto.at|   1 +
 tests/ovs-ofctl.at  |   2 +-
 tests/pmd.at|   8 +-
 tests/tunnel-push-pop-ipv6.at   |   2 +-
 tests/tunnel-push-pop.at|   2 +-
 tests/tunnel.at |  18 ++--
 24 files changed, 653 insertions(+), 284 deletions(-)

diff --git a/build-aux/extract-ofp-fields b/build-aux/extract-ofp-fields
index d5b8a820251e..24dd756ad7d5 100755
--- a/build-aux/extract-ofp-fields
+++ b/build-aux/extract-ofp-fields
@@ -36,7 +36,8 @@ FORMATTING = {"decimal":("MFS_DECIMAL",  1,   
8),
   "OpenFlow 1.1+ port": ("MFS_OFP_PORT_OXM", 4,   4),
   "frag":   ("MFS_FRAG", 1,   1),
   "tunnel flags":   ("MFS_TNL_FLAGS",2,   2),
-  "TCP flags":  ("MFS_TCP_FLAGS",2,   2)}
+  "TCP flags":  ("MFS_TCP_FLAGS",2,   2),
+  "packet type":("MFS_PACKET_TYPE",  4,   4)}
 
 PREREQS = {"none": "MFP_NONE",
"Ethernet": "MFP_ETHERNET",
diff --git a/include/openvswitch/match.h b/include/openvswitch/match.h
index 70da928fe47d..aca725265c79 100644
--- a/include/openvswitch/match.h
+++ b/include/openvswitch/match.h
@@ -23,6 +23,7 @@
 
 struct ds;
 struct ofputil_port_map;
+struct mf_field;
 
 /* A flow classification match.
  *
@@ -119,6 +120,10 @@ void match_set_ct_ipv6_dst_masked(struct match *, const 
struct in6_addr *,
   const struct in6_addr *);
 
 void match_set_packet_type(struct match *, ovs_be32 packet_type);
+void match_set_default_packet_type(struct match *);
+bool match_has_default_packet_type(const struct match *);
+void match_add_ethernet_prereq(struct match *, const struct mf_field *);
+
 void match_set_skb_priority(struct match *, uint32_t skb_priority);
 void match_set_dl_type(struct match *, ovs_be16);
 void match_set_dl_src(struct match *, const struct eth_addr );
diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h
index cbfd3ba65b73..fc109501d869 100644
--- a/include/openvswitch/meta-flow.h
+++ b/include/openvswitch/meta-flow.h
@@ -133,6 +133,11 @@ struct ofputil_tlv_table_mod;
  *
  *   TCP flags: See the description of tcp_flags in ovs-ofctl(8).
  *
+ *   packet type: A pair of packet type namespace NS and NS_TYPE within
+ *   that namespace "(NS,NS_TYPE)". NS and NS_TYPE are formatted in
+ *   decimal or hexadecimal as and accept decimal and hexadecimal (with
+ *   0x prefix) at parsing.
+ *
  *   Prerequisites:
  *
  * The field's prerequisites.  The values should be straightfoward.
@@ -248,6 +253,20 @@ enum OVS_PACKED_ENUM mf_field_id {
  */
 MFF_RECIRC_ID,
 
+/* "packet_type".
+ *
+ * Define the packet type in OpenFlow 1.5+.
+ *
+ * Type: be32.
+ * Maskable: no.
+ * Formatting: packet type.
+ * Prerequisites: none.
+ * Access: read-only.
+ * NXM: none.
+ * OXM: OXM_OF_PACKET_TYPE(44) since OF1.5 and v2.8.
+ */
+MFF_PACKET_TYPE,
+
 /* "conj_id".
  *
  * ID for "conjunction" actions.  Please refer to ovs-ofctl(8)
@@ -1860,6 +1879,7 @@ enum OVS_PACKED_ENUM 

[ovs-dev] [PATCH v2 06/12] nx-match: Add context argument to nxm_put__().

2017-06-18 Thread Ben Pfaff
An upcoming commit will need to pass an extra piece of data from
nx_put_raw() into all of its direct and indirect calls to nxm_put__().
This commit prepares for that by switching from a "struct ofpbuf *"
parameter to a context structure that, currently, contains just a
struct ofpbuf *.  The upcoming commit will add another member to the
context struct.

This commit has no visible effect on behavior.

Signed-off-by: Ben Pfaff 
---
 lib/nx-match.c | 232 +
 lib/nx-match.h |   6 +-
 lib/tun-metadata.c |   4 +-
 3 files changed, 131 insertions(+), 111 deletions(-)

diff --git a/lib/nx-match.c b/lib/nx-match.c
index 334ecd4a3f1a..6278b7758783 100644
--- a/lib/nx-match.c
+++ b/lib/nx-match.c
@@ -772,203 +772,222 @@ oxm_pull_field_array(const void *fields_data, size_t 
fields_len,
  * 'put' functions whose names end in 'm' add a field that might be wildcarded.
  * Other 'put' functions add exact-match fields.
  */
+
+struct nxm_put_ctx {
+struct ofpbuf *output;
+};
+
 void
-nxm_put__(struct ofpbuf *b, enum mf_field_id field, enum ofp_version version,
-  const void *value, const void *mask, size_t n_bytes)
+nxm_put_entry_raw(struct ofpbuf *b,
+  enum mf_field_id field, enum ofp_version version,
+  const void *value, const void *mask, size_t n_bytes)
 {
 nx_put_header_len(b, field, version, !!mask, n_bytes);
 ofpbuf_put(b, value, n_bytes);
 if (mask) {
 ofpbuf_put(b, mask, n_bytes);
 }
+}
 
+static void
+nxm_put__(struct nxm_put_ctx *ctx,
+  enum mf_field_id field, enum ofp_version version,
+  const void *value, const void *mask, size_t n_bytes)
+{
+nxm_put_entry_raw(ctx->output, field, version, value, mask, n_bytes);
 }
 
 static void
-nxm_put(struct ofpbuf *b, enum mf_field_id field, enum ofp_version version,
+nxm_put(struct nxm_put_ctx *ctx,
+enum mf_field_id field, enum ofp_version version,
 const void *value, const void *mask, size_t n_bytes)
 {
 if (!is_all_zeros(mask, n_bytes)) {
 bool masked = !is_all_ones(mask, n_bytes);
-nxm_put__(b, field, version, value, masked ? mask : NULL, n_bytes);
+nxm_put__(ctx, field, version, value, masked ? mask : NULL, n_bytes);
 }
 }
 
 static void
-nxm_put_8m(struct ofpbuf *b, enum mf_field_id field, enum ofp_version version,
+nxm_put_8m(struct nxm_put_ctx *ctx,
+   enum mf_field_id field, enum ofp_version version,
uint8_t value, uint8_t mask)
 {
-nxm_put(b, field, version, , , sizeof value);
+nxm_put(ctx, field, version, , , sizeof value);
 }
 
 static void
-nxm_put_8(struct ofpbuf *b, enum mf_field_id field, enum ofp_version version,
-  uint8_t value)
+nxm_put_8(struct nxm_put_ctx *ctx,
+  enum mf_field_id field, enum ofp_version version, uint8_t value)
 {
-nxm_put__(b, field, version, , NULL, sizeof value);
+nxm_put__(ctx, field, version, , NULL, sizeof value);
 }
 
 static void
-nxm_put_16m(struct ofpbuf *b, enum mf_field_id field, enum ofp_version version,
+nxm_put_16m(struct nxm_put_ctx *ctx,
+enum mf_field_id field, enum ofp_version version,
 ovs_be16 value, ovs_be16 mask)
 {
-nxm_put(b, field, version, , , sizeof value);
+nxm_put(ctx, field, version, , , sizeof value);
 }
 
 static void
-nxm_put_16(struct ofpbuf *b, enum mf_field_id field, enum ofp_version version,
-   ovs_be16 value)
+nxm_put_16(struct nxm_put_ctx *ctx,
+   enum mf_field_id field, enum ofp_version version, ovs_be16 value)
 {
-nxm_put__(b, field, version, , NULL, sizeof value);
+nxm_put__(ctx, field, version, , NULL, sizeof value);
 }
 
 static void
-nxm_put_32m(struct ofpbuf *b, enum mf_field_id field, enum ofp_version version,
+nxm_put_32m(struct nxm_put_ctx *ctx,
+enum mf_field_id field, enum ofp_version version,
 ovs_be32 value, ovs_be32 mask)
 {
-nxm_put(b, field, version, , , sizeof value);
+nxm_put(ctx, field, version, , , sizeof value);
 }
 
 static void
-nxm_put_32(struct ofpbuf *b, enum mf_field_id field, enum ofp_version version,
-   ovs_be32 value)
+nxm_put_32(struct nxm_put_ctx *ctx,
+   enum mf_field_id field, enum ofp_version version, ovs_be32 value)
 {
-nxm_put__(b, field, version, , NULL, sizeof value);
+nxm_put__(ctx, field, version, , NULL, sizeof value);
 }
 
 static void
-nxm_put_64m(struct ofpbuf *b, enum mf_field_id field, enum ofp_version version,
+nxm_put_64m(struct nxm_put_ctx *ctx,
+enum mf_field_id field, enum ofp_version version,
 ovs_be64 value, ovs_be64 mask)
 {
-nxm_put(b, field, version, , , sizeof value);
+nxm_put(ctx, field, version, , , sizeof value);
 }
 
 static void
-nxm_put_128m(struct ofpbuf *b,
+nxm_put_128m(struct nxm_put_ctx *ctx,
  enum mf_field_id field, enum ofp_version version,
  const ovs_be128 value, const ovs_be128 mask)
 {
-

[ovs-dev] [PATCH v2 08/12] userspace: Handling of versatile tunnel ports

2017-06-18 Thread Ben Pfaff
In netdev_gre_build_header(), GRE protocol and VXLAN next_potocol is set based
on packet_type of flow. If it's about an Ethernet packet, it is set to
ETP_TYPE_TEB. Otherwise, if the name space is OFPHTN_ETHERNET, it is set
according to the name space type.

Signed-off-by: Jan Scheurich 
Signed-off-by: Ben Pfaff 
---
 NEWS  |   6 +--
 lib/meta-flow.xml |  28 ++-
 lib/netdev-bsd.c  |   1 +
 lib/netdev-dpdk.c |   1 +
 lib/netdev-dummy.c|   1 +
 lib/netdev-linux.c|   1 +
 lib/netdev-native-tnl.c   |  23 ++---
 lib/netdev-provider.h |   6 +++
 lib/netdev-vport.c| 106 ++
 lib/netdev-vport.h|   1 -
 lib/netdev.c  |   8 
 lib/netdev.h  |  29 +++-
 ofproto/ofproto-dpif-xlate.c  |  35 --
 ofproto/ofproto-dpif.c|   4 +-
 ofproto/tunnel.c  |  27 ---
 tests/tunnel-push-pop-ipv6.at |   4 +-
 tests/tunnel-push-pop.at  |   4 +-
 vswitchd/vswitch.xml  |  94 ++---
 18 files changed, 277 insertions(+), 102 deletions(-)

diff --git a/NEWS b/NEWS
index a2f5a6dc8e54..8b0ad6191325 100644
--- a/NEWS
+++ b/NEWS
@@ -59,11 +59,9 @@ Post-v2.7.0
  * OVN services are no longer restarted automatically after upgrade.
- Add --cleanup option to command 'ovs-appctl exit' (see ovs-vswitchd(8)).
- L3 tunneling:
- * Add "layer3" options for tunnel ports that support non-Ethernet (L3)
-   payload (GRE, VXLAN-GPE).
+ * Use new tunnel port option "packet_type" to configure L2 vs. L3.
  * New vxlan tunnel extension "gpe" to support VXLAN-GPE tunnels.
- * Transparently pop and push Ethernet headers at transmit/reception
-   of packets to/from L3 tunnels.
+ * New support for non-Ethernet (L3) payloads in GRE and VXLAN-GPE.
- The BFD detection multiplier is now user-configurable.
- New support for HW offloading
  * HW offloading is disabled by default.
diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml
index 856e1ba8cf7b..dc2731e2a260 100644
--- a/lib/meta-flow.xml
+++ b/lib/meta-flow.xml
@@ -26,19 +26,25 @@
 networking technology in use are called called root fields.
 Open vSwitch 2.7 and earlier considered Ethernet fields to be root fields,
 and this remains the default mode of operation for Open vSwitch bridges.
-In this mode, when a packet is received from a non-Ethernet interfaces,
-such as a layer-3 LISP or GRE tunnel, Open vSwitch force-fits it to this
+When a packet is received from a non-Ethernet interfaces, such as a layer-3
+LISP tunnel, Open vSwitch 2.7 and earlier force-fit the packet to this
 Ethernet-centric point of view by pretending that an Ethernet header is
 present whose Ethernet type that indicates the packet's actual type (and
 whose source and destination addresses are all-zero).
   
 
   
-Open vSwitch 2.8 and later supports the ``packet type-aware pipeline''
-concept introduced in OpenFlow 1.5.  A bridge configured to be packet
-type-aware can handle packets of multiple networking technologies, such as
-Ethernet, IP, ARP, MPLS, or NSH in parallel.  Such a bridge does not have
-any root fields.
+Open vSwitch 2.8 and later implement the ``packet type-aware pipeline''
+concept introduced in OpenFlow 1.5.  Such a pipeline does not have any root
+fields.  Instead, a new metadata field, ,
+indicates the basic type of the packet, which can be Ethernet, IPv4, IPv6,
+or another type.  For backward compatibility, by default Open vSwitch 2.8
+imitates the behavior of Open vSwitch 2.7 and earlier.  Later versions of
+Open vSwitch may change the default, and in the meantime controllers can
+turn off this legacy behavior by setting
+other-config:packet-type to ptap in the
+Bridge table.  (See ovs-vwitchd.conf.db(5) for
+more information.)
   
 
   
@@ -332,14 +338,6 @@ tcp,tp_src=0x07c0/0xfff0
 mplsm  eth_type=0x8848
   
 
-  
-These shorthand notations continue to work in packet type-aware bridges.
-The absence of a packet_type match implies
-packet_type=ethernet, so that shorthands match on Ethernet
-packets with the implied eth_type. Please note that the shorthand
-ip does not match packets of packet_type (1,0x800) for IPv4.
-  
-
 
   Evolution of OpenFlow Fields
 
diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index f863a189cd5e..6cc83d347795 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -1517,6 +1517,7 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum 
netdev_flags off,
  \
 GET_FEATURES,\
 NULL, /* set_advertisement */\
+NULL, /* get_pt_mode */  \
 NULL, 

[ovs-dev] [PATCH v2 05/12] ofpbuf: New function ofpbuf_insert().

2017-06-18 Thread Ben Pfaff
This will receive its first users in an upcoming commit.

Signed-off-by: Ben Pfaff 
---
 include/openvswitch/ofpbuf.h |  1 +
 lib/ofpbuf.c | 18 ++
 2 files changed, 19 insertions(+)

diff --git a/include/openvswitch/ofpbuf.h b/include/openvswitch/ofpbuf.h
index bc25bb8a1780..6142f4a588e1 100644
--- a/include/openvswitch/ofpbuf.h
+++ b/include/openvswitch/ofpbuf.h
@@ -141,6 +141,7 @@ void ofpbuf_reserve(struct ofpbuf *, size_t);
 void *ofpbuf_push_uninit(struct ofpbuf *b, size_t);
 void *ofpbuf_push_zeros(struct ofpbuf *, size_t);
 void *ofpbuf_push(struct ofpbuf *b, const void *, size_t);
+void ofpbuf_insert(struct ofpbuf *b, size_t offset, const void *data, size_t);
 
 static inline size_t ofpbuf_headroom(const struct ofpbuf *);
 static inline size_t ofpbuf_tailroom(const struct ofpbuf *);
diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
index f4a9040646ef..9c0623688f16 100644
--- a/lib/ofpbuf.c
+++ b/lib/ofpbuf.c
@@ -461,6 +461,24 @@ ofpbuf_push(struct ofpbuf *b, const void *p, size_t size)
 return dst;
 }
 
+/* Inserts the 'n' bytes of 'data' into 'b' starting at the given 'offset',
+ * moving data forward as necessary to make room.
+ *
+ * 'data' must not point inside 'b'. */
+void
+ofpbuf_insert(struct ofpbuf *b, size_t offset, const void *data, size_t n)
+{
+if (offset < b->size) {
+ofpbuf_put_uninit(b, n);
+memmove((char *) b->data + offset + n, (char *) b->data + offset,
+b->size - offset);
+memcpy((char *) b->data + offset, data, n);
+} else {
+ovs_assert(offset == b->size);
+ofpbuf_put(b, data, n);
+}
+}
+
 /* Returns the data in 'b' as a block of malloc()'d memory and frees the buffer
  * within 'b'.  (If 'b' itself was dynamically allocated, e.g. with
  * ofpbuf_new(), then it should still be freed with, e.g., ofpbuf_delete().) */
-- 
2.10.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 02/12] openvswitch.h: Use odp_port_t for port numbers in userspace-only structs.

2017-06-18 Thread Ben Pfaff
Using the correct type reduces the need for type conversions.

Signed-off-by: Ben Pfaff 
---
 datapath/linux/compat/include/linux/openvswitch.h | 4 ++--
 lib/dpif-netdev.c | 2 +-
 lib/netdev.c  | 2 +-
 ofproto/ofproto-dpif-sflow.c  | 2 +-
 ofproto/ofproto-dpif-xlate.c  | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/datapath/linux/compat/include/linux/openvswitch.h 
b/datapath/linux/compat/include/linux/openvswitch.h
index 4c88de1d610d..24e51cb311d2 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -714,8 +714,8 @@ struct ovs_action_hash {
  * this header to build final header according to actual packet parameters.
  */
 struct ovs_action_push_tnl {
-   uint32_t tnl_port;
-   uint32_t out_port;
+   odp_port_t tnl_port;
+   odp_port_t out_port;
uint32_t header_len;
uint32_t tnl_type; /* For logging. */
uint32_t header[TNL_PUSH_HEADER_SIZE / 4];
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2b65dc74a269..f97e97ab2931 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4956,7 +4956,7 @@ push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
 
 data = nl_attr_get(attr);
 
-tun_port = pmd_tnl_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
+tun_port = pmd_tnl_port_cache_lookup(pmd, data->tnl_port);
 if (!tun_port) {
 err = -EINVAL;
 goto error;
diff --git a/lib/netdev.c b/lib/netdev.c
index 001b7b37bb57..765bf4b9ccad 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -831,7 +831,7 @@ netdev_push_header(const struct netdev *netdev,
 struct dp_packet *packet;
 DP_PACKET_BATCH_FOR_EACH (packet, batch) {
 netdev->netdev_class->push_header(packet, data);
-pkt_metadata_init(>md, u32_to_odp(data->out_port));
+pkt_metadata_init(>md, data->out_port);
 }
 
 return 0;
diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
index d9fddb1564b5..fc665a636853 100644
--- a/ofproto/ofproto-dpif-sflow.c
+++ b/ofproto/ofproto-dpif-sflow.c
@@ -901,7 +901,7 @@ sflow_read_tnl_push_action(const struct nlattr *attr,
 const struct ip_header *ip
 = ALIGNED_CAST(const struct ip_header *, eth + 1);
 
-sflow_actions->out_port = u32_to_odp(data->out_port);
+sflow_actions->out_port = data->out_port;
 
 /* Ethernet. */
 /* TODO: SFlow does not currently define a MAC-in-MAC
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index e15e3dec3f1c..48c4bad4ac0b 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -3211,8 +3211,8 @@ build_tunnel_send(struct xlate_ctx *ctx, const struct 
xport *xport,
 if (err) {
 return err;
 }
-tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port);
-tnl_push_data.out_port = odp_to_u32(out_dev->odp_port);
+tnl_push_data.tnl_port = tunnel_odp_port;
+tnl_push_data.out_port = out_dev->odp_port;
 
 /* After tunnel header has been added, packet_type of flow and base_flow
  * need to be set to PT_ETH. */
-- 
2.10.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 03/12] ovs-dpctl: New --names option to use port names in flow dumps.

2017-06-18 Thread Ben Pfaff
Until now, printing names in "ovs-dpctl dump-flows" was tied to the overall
output verbosity, which in practice meant that to see port names a user had
to see a distracting amount of verbosity.  This decouples names from
verbosity.

I'd like to make showing names the default for interactive usage, but so
far names aren't accepted in input so that would frustrate cut-and-paste,
which is an important use of "ovs-dpctl dump-flows" output.

Signed-off-by: Ben Pfaff 
---
 lib/dpctl.c  | 76 ++--
 lib/dpctl.h  |  3 ++
 lib/dpctl.man|  7 +++--
 lib/odp-util.c   |  4 +--
 ofproto/ofproto-dpif.c   | 48 +-
 utilities/ovs-dpctl.8.in |  9 +-
 utilities/ovs-dpctl.c| 20 +
 7 files changed, 125 insertions(+), 42 deletions(-)

diff --git a/lib/dpctl.c b/lib/dpctl.c
index 7f44d025dcf1..1e3bf0a517db 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -49,6 +49,8 @@
 #include "unixctl.h"
 #include "util.h"
 #include "openvswitch/ofp-parse.h"
+#include "openvswitch/vlog.h"
+VLOG_DEFINE_THIS_MODULE(dpctl);
 
 typedef int dpctl_command_handler(int argc, const char *argv[],
   struct dpctl_params *);
@@ -762,6 +764,36 @@ static char *supported_dump_types[] = {
 "ovs",
 };
 
+static struct hmap *
+dpctl_get_portno_names(struct dpif *dpif, const struct dpctl_params *dpctl_p)
+{
+if (dpctl_p->names) {
+struct hmap *portno_names = xmalloc(sizeof *portno_names);
+hmap_init(portno_names);
+
+struct dpif_port_dump port_dump;
+struct dpif_port dpif_port;
+DPIF_PORT_FOR_EACH (_port, _dump, dpif) {
+odp_portno_names_set(portno_names, dpif_port.port_no,
+ dpif_port.name);
+}
+
+return portno_names;
+} else {
+return NULL;
+}
+}
+
+static void
+dpctl_free_portno_names(struct hmap *portno_names)
+{
+if (portno_names) {
+odp_portno_names_destroy(portno_names);
+hmap_destroy(portno_names);
+free(portno_names);
+}
+}
+
 static int
 dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p)
 {
@@ -774,10 +806,6 @@ dpctl_dump_flows(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 struct flow flow_filter;
 struct flow_wildcards wc_filter;
 
-struct dpif_port_dump port_dump;
-struct dpif_port dpif_port;
-struct hmap portno_names;
-
 struct dpif_flow_dump_thread *flow_dump_thread;
 struct dpif_flow_dump *flow_dump;
 struct dpif_flow f;
@@ -807,15 +835,14 @@ dpctl_dump_flows(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 goto out_free;
 }
 
-
-hmap_init(_names);
-DPIF_PORT_FOR_EACH (_port, _dump, dpif) {
-odp_portno_names_set(_names, dpif_port.port_no, dpif_port.name);
-}
+struct hmap *portno_names = dpctl_get_portno_names(dpif, dpctl_p);
 
 if (filter) {
 struct ofputil_port_map port_map;
 ofputil_port_map_init(_map);
+
+struct dpif_port_dump port_dump;
+struct dpif_port dpif_port;
 DPIF_PORT_FOR_EACH (_port, _dump, dpif) {
 ofputil_port_map_put(_map,
  u16_to_ofp(odp_to_u32(dpif_port.port_no)),
@@ -890,7 +917,7 @@ dpctl_dump_flows(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 }
 pmd_id = f.pmd_id;
 }
-format_dpif_flow(, , _names, type, dpctl_p);
+format_dpif_flow(, , portno_names, type, dpctl_p);
 
 dpctl_print(dpctl_p, "%s\n", ds_cstr());
 }
@@ -903,8 +930,7 @@ dpctl_dump_flows(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 ds_destroy();
 
 out_dpifclose:
-odp_portno_names_destroy(_names);
-hmap_destroy(_names);
+dpctl_free_portno_names(portno_names);
 dpif_close(dpif);
 out_free:
 free(filter);
@@ -1032,11 +1058,8 @@ dpctl_get_flow(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 {
 const char *key_s = argv[argc - 1];
 struct dpif_flow flow;
-struct dpif_port dpif_port;
-struct dpif_port_dump port_dump;
 struct dpif *dpif;
 char *dp_name;
-struct hmap portno_names;
 ovs_u128 ufid;
 struct ofpbuf buf;
 uint64_t stub[DPIF_FLOW_BUFSIZE / 8];
@@ -1055,10 +1078,8 @@ dpctl_get_flow(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 }
 
 ofpbuf_use_stub(, , sizeof stub);
-hmap_init(_names);
-DPIF_PORT_FOR_EACH (_port, _dump, dpif) {
-odp_portno_names_set(_names, dpif_port.port_no, dpif_port.name);
-}
+
+struct hmap *portno_names = dpctl_get_portno_names(dpif, dpctl_p);
 
 n = odp_ufid_from_string(key_s, );
 if (n <= 0) {
@@ -1074,13 +1095,12 @@ dpctl_get_flow(int argc, const char *argv[], struct 
dpctl_params *dpctl_p)
 }
 
 ds_init();
-format_dpif_flow(, , _names, NULL, dpctl_p);
+

[ovs-dev] [PATCH v2 01/12] ofp-util: Remove prototype for unimplemented function.

2017-06-18 Thread Ben Pfaff
Signed-off-by: Ben Pfaff 
---
 include/openvswitch/ofp-util.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/openvswitch/ofp-util.h b/include/openvswitch/ofp-util.h
index bbf6ffec5dd3..07723b427ce8 100644
--- a/include/openvswitch/ofp-util.h
+++ b/include/openvswitch/ofp-util.h
@@ -247,8 +247,6 @@ void ofputil_match_to_ofp10_match(const struct match *, 
struct ofp10_match *);
 enum ofperr ofputil_pull_ofp11_match(struct ofpbuf *, const struct tun_table *,
  const struct vl_mff_map *, struct match *,
  uint16_t *padded_match_len);
-enum ofperr ofputil_pull_ofp11_mask(struct ofpbuf *, struct match *,
-struct mf_bitmap *bm);
 enum ofperr ofputil_match_from_ofp11_match(const struct ofp11_match *,
struct match *);
 int ofputil_put_ofp11_match(struct ofpbuf *, const struct match *,
-- 
2.10.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 00/12] Packet type aware pipeline

2017-06-18 Thread Ben Pfaff
This series is based on Zoltan Balogh's series here:
https://patchwork.ozlabs.org/patch/770490/
https://patchwork.ozlabs.org/patch/770487/
https://patchwork.ozlabs.org/patch/770495/
https://patchwork.ozlabs.org/patch/770498/
https://patchwork.ozlabs.org/patch/770488/
https://patchwork.ozlabs.org/patch/770489/

v1->v2:
  - Squash fixup patches.
  - Apply changes agreed with Jan.
  - Not yet done: Figure out whether to really show packet_type in (some)
match_format() output.
  - New patch at the end unsuccessfully tries to re-enable packet-aware
test.  Either I don't have enough insight yet, or it just reveals a
bug or two.
  - 4 new patches at beginning.  First one is trivial.  Next 3 are intended
to make it easier to debug the packet aware test that is still failing.
Jan, you don't have to feel obligated to review these if you feel they
are off-topic; I will get separate reviews.

Ben Pfaff (8):
  ofp-util: Remove prototype for unimplemented function.
  openvswitch.h: Use odp_port_t for port numbers in userspace-only
structs.
  ovs-dpctl: New --names option to use port names in flow dumps.
  odp-util: Use port names in output in more places.
  ofpbuf: New function ofpbuf_insert().
  nx-match: Add context argument to nxm_put__().
  userspace: Handling of versatile tunnel ports
  work on packet aware test

Jan Scheurich (3):
  userspace: Add OXM field MFF_PACKET_TYPE
  tests: Added unit tests in packet-type-aware.at
  userspace: Complete Packet In handling

Zoltán Balogh (1):
  userspace: Introduce packet_type in OF 1.5 packet-out

 NEWS  |   6 +-
 build-aux/extract-ofp-fields  |   3 +-
 datapath/linux/compat/include/linux/openvswitch.h |   4 +-
 include/openvswitch/match.h   |   5 +
 include/openvswitch/meta-flow.h   |  20 +
 include/openvswitch/ofp-util.h|   2 -
 include/openvswitch/ofpbuf.h  |   1 +
 lib/dpctl.c   |  84 ++--
 lib/dpctl.h   |   3 +
 lib/dpctl.man |   7 +-
 lib/dpif-netdev.c |   4 +-
 lib/dpif.c|   4 +-
 lib/flow.c|  74 +++-
 lib/flow.h|  27 +-
 lib/learn.c   |   1 +
 lib/match.c   |  98 +++--
 lib/meta-flow.c   |  86 +++-
 lib/meta-flow.xml | 154 ++-
 lib/netdev-bsd.c  |   1 +
 lib/netdev-dpdk.c |   1 +
 lib/netdev-dummy.c|   1 +
 lib/netdev-linux.c|   1 +
 lib/netdev-native-tnl.c   |  23 +-
 lib/netdev-provider.h |   6 +
 lib/netdev-vport.c| 106 +++--
 lib/netdev-vport.h|   1 -
 lib/netdev.c  |  10 +-
 lib/netdev.h  |  29 +-
 lib/nx-match.c| 264 +++-
 lib/nx-match.h|   6 +-
 lib/odp-util.c| 138 +++---
 lib/odp-util.h|   5 +-
 lib/ofp-parse.c   |  25 ++
 lib/ofp-print.c   |  11 +-
 lib/ofp-util.c|  69 ++-
 lib/ofpbuf.c  |  18 +
 lib/tun-metadata.c|   4 +-
 ofproto/ofproto-dpif-sflow.c  |   2 +-
 ofproto/ofproto-dpif-trace.c  |   2 +-
 ofproto/ofproto-dpif-xlate.c  |  45 +-
 ofproto/ofproto-dpif.c|  54 ++-
 ofproto/ofproto.c |   3 +
 ofproto/tunnel.c  |  29 +-
 tests/automake.mk |   6 +-
 tests/dpif-netdev.at  |  89 ++--
 tests/odp.at  |   1 +
 tests/ofproto-dpif.at | 230 +-
 tests/ofproto.at  |  86 
 tests/ovs-ofctl.at|   2 +-
 tests/packet-type-aware.at| 484 ++
 tests/pmd.at  |   8 +-
 tests/system-userspace-packet-type-aware.at   | 422 +++
 tests/system-userspace-testsuite.at   |   1 +
 tests/test-odp.c  |   2 +-
 tests/testsuite.at|   1 +
 tests/tunnel-push-pop-ipv6.at

[ovs-dev] Darlehen Angebot bei 2% pro Jahr

2017-06-18 Thread William Investment LLC
Wir bieten Darlehensfonds zu einem Jahreszins von 2% an. Es gibt keine 
Bonitätsprüfung Geschichte. Vertrauen Sie uns mit allen Arten von Darlehen 
umzugehen. Interessierte Kandidaten sollten uns heute für ihre schnelle 
Online-und leichte Darlehen und keine Sicherheiten Einlagen kontaktieren. 
E-mail: williaminvestmentllc...@gmail.com Lender: Roush W. James Handy: +1 
(516)341-6554

---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/2] checkpatch: Suggest ovs_assert() to author.

2017-06-18 Thread Bhanuprakash Bodireddy
Suggest the author to use the OVS wrapper of the assert function.

Signed-off-by: Bhanuprakash Bodireddy 
---
 utilities/checkpatch.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py
index b45a255..304d2fd 100755
--- a/utilities/checkpatch.py
+++ b/utilities/checkpatch.py
@@ -256,6 +256,7 @@ std_functions = [
 ('strerror', 'Use ovs_strerror() in place of strerror()'),
 ('sleep', 'Use xsleep() in place of sleep()'),
 ('abort', 'Use ovs_abort() in place of abort()'),
+('assert', 'Use ovs_assert() in place of assert()'),
 ('error', 'Use ovs_error() in place of error()'),
 ]
 checks += [
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 5/6] netdev-dpdk: Add netdev_dpdk_vhost_txq_flush function.

2017-06-18 Thread Bhanuprakash Bodireddy
Add netdev_dpdk_vhost_txq_flush(), that flushes packets on vHost User
port queues. Also add netdev_dpdk_vhost_tx_burst() function that
uses rte_vhost_enqueue_burst() to enqueue burst of packets on vHost User
ports.

Signed-off-by: Bhanuprakash Bodireddy 
Signed-off-by: Antonio Fischetti 
Co-authored-by: Antonio Fischetti 
Acked-by: Eelco Chaudron 
---
 lib/netdev-dpdk.c | 76 ---
 1 file changed, 72 insertions(+), 4 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 50a9a2c..47343e8 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -307,12 +307,22 @@ struct dpdk_tx_queue {
 * pmd threads (see 'concurrent_txq'). */
 int map;   /* Mapping of configured vhost-user queues
 * to enabled by guest. */
-int dpdk_pkt_cnt;  /* Number of buffered packets waiting to
+union {
+int dpdk_pkt_cnt;  /* Number of buffered packets waiting to
   be sent on DPDK tx queue. */
-struct rte_mbuf *dpdk_burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD];
+int vhost_pkt_cnt; /* Number of buffered packets waiting to
+  be sent on vhost port. */
+};
+
+union {
+struct rte_mbuf *dpdk_burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD];
/* Intermediate queue where packets can
 * be buffered to amortize the cost of MMIO
 * writes. */
+struct dp_packet *vhost_burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD];
+   /* Intermediate queue where packets can
+* be buffered for vhost ports. */
+};
 };
 
 /* dpdk has no way to remove dpdk ring ethernet devices
@@ -1719,6 +1729,63 @@ netdev_dpdk_vhost_update_tx_counters(struct netdev_stats 
*stats,
 }
 }
 
+static int
+netdev_dpdk_vhost_tx_burst(struct netdev_dpdk *dev, int qid)
+{
+struct dpdk_tx_queue *txq = >tx_q[qid];
+struct rte_mbuf **cur_pkts = (struct rte_mbuf **)txq->vhost_burst_pkts;
+
+int tx_vid = netdev_dpdk_get_vid(dev);
+int tx_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
+uint32_t sent = 0;
+uint32_t retries = 0;
+uint32_t sum, total_pkts;
+
+total_pkts = sum = txq->vhost_pkt_cnt;
+do {
+uint32_t ret;
+ret = rte_vhost_enqueue_burst(tx_vid, tx_qid, _pkts[sent], sum);
+if (OVS_UNLIKELY(!ret)) {
+/* No packets enqueued - do not retry. */
+break;
+} else {
+/* Packet have been sent */
+sent += ret;
+
+/* 'sum' packet have to be retransmitted */
+sum -= ret;
+}
+} while (sum && (retries++ < VHOST_ENQ_RETRY_NUM));
+
+for (int i = 0; i < total_pkts; i++) {
+dp_packet_delete(txq->vhost_burst_pkts[i]);
+}
+
+/* Reset pkt count */
+txq->vhost_pkt_cnt = 0;
+
+/* 'sum' refers to packets dropped */
+return sum;
+}
+
+/* Flush the txq if there are any packets available.
+ * dynamic_txqs/concurrent_txq is disabled for vHost User ports as
+ * 'OVS_VHOST_MAX_QUEUE_NUM' txqs are preallocated.
+ */
+static int
+netdev_dpdk_vhost_txq_flush(struct netdev *netdev, int qid,
+bool concurrent_txq OVS_UNUSED)
+{
+struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+struct dpdk_tx_queue *txq = >tx_q[qid];
+
+if (OVS_LIKELY(txq->vhost_pkt_cnt)) {
+netdev_dpdk_vhost_tx_burst(dev, qid);
+}
+
+return 0;
+}
+
 static void
 __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
  struct dp_packet **pkts, int cnt)
@@ -3432,7 +3499,8 @@ static const struct netdev_class dpdk_vhost_class =
 NULL,
 netdev_dpdk_vhost_reconfigure,
 netdev_dpdk_vhost_rxq_recv,
-NULL);
+netdev_dpdk_vhost_txq_flush);
+
 static const struct netdev_class dpdk_vhost_client_class =
 NETDEV_DPDK_CLASS(
 "dpdkvhostuserclient",
@@ -3448,7 +3516,7 @@ static const struct netdev_class dpdk_vhost_client_class =
 NULL,
 netdev_dpdk_vhost_client_reconfigure,
 netdev_dpdk_vhost_rxq_recv,
-NULL);
+netdev_dpdk_vhost_txq_flush);
 
 void
 netdev_dpdk_register(void)
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 6/6] netdev-dpdk: Enable intermediate queue for vHost User port.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit refactors the __netdev_dpdk_vhost_send() and enables
intermediate queue where in the packets are buffered till the threshold
'INTERIM_QUEUE_BURST_THRESHOLD[32] is hit and eventually gets transmitted.

This commit improves the throughput as reported below in simple Physical
to virtual testcase with higher flows @10G Line rate.

  Num FlowMaster  Commit
     =   =
  10  5945899 7833914
  32  3872211 6530133
  50  3283713 6618711
  100 3132540 5857226
  500 2964499 5273006
  10002931952 5178038

Latency stats:

  MASTER
  ---
  Pkt size  min(ns)  avg(ns)  max(ns)
  512   10,011   12,100   281,915
  1024   7,8709,313   193,116
  1280   7,8629,036   194,439
  1518   8,2159,417   204,782

  MASTER + COMMIT
  ---
  Pkt size  min(ns)  avg(ns)  max(ns)
  512   10,492   13,655   281,538
  1024   8,4079,784   205,095
  1280   8,3999,750   194,888
  1518   8,3679,722   196,973

Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/332271.html
 [By Eelco Chaudron ]
Signed-off-by: Bhanuprakash Bodireddy 
Signed-off-by: Antonio Fischetti 
Co-authored-by: Antonio Fischetti 
Acked-by: Eelco Chaudron 
---
 lib/netdev-dpdk.c | 38 +++---
 1 file changed, 15 insertions(+), 23 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 47343e8..69cc5ff 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1794,16 +1794,21 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
 struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
 unsigned int total_pkts = cnt;
 unsigned int dropped = 0;
-int i, retries = 0;
+int i;
 
 qid = dev->tx_q[qid % netdev->n_txq].map;
+struct dpdk_tx_queue *txq = >tx_q[qid];
 
 if (OVS_UNLIKELY(!is_vhost_running(dev) || qid < 0
  || !(dev->flags & NETDEV_UP))) {
 rte_spinlock_lock(>stats_lock);
 dev->stats.tx_dropped+= cnt;
 rte_spinlock_unlock(>stats_lock);
-goto out;
+
+for (i = 0; i < total_pkts; i++) {
+dp_packet_delete(pkts[i]);
+}
+return;
 }
 
 rte_spinlock_lock(>tx_q[qid].tx_lock);
@@ -1813,34 +1818,21 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
 cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
 dropped = total_pkts - cnt;
 
-do {
-int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
-unsigned int tx_pkts;
-
-tx_pkts = rte_vhost_enqueue_burst(netdev_dpdk_get_vid(dev),
-  vhost_qid, cur_pkts, cnt);
-if (OVS_LIKELY(tx_pkts)) {
-/* Packets have been sent.*/
-cnt -= tx_pkts;
-/* Prepare for possible retry.*/
-cur_pkts = _pkts[tx_pkts];
-} else {
-/* No packets sent - do not retry.*/
-break;
+int idx = 0;
+while (idx < cnt) {
+txq->vhost_burst_pkts[txq->vhost_pkt_cnt++] = pkts[idx++];
+
+if (txq->vhost_pkt_cnt >= INTERIM_QUEUE_BURST_THRESHOLD) {
+dropped += netdev_dpdk_vhost_tx_burst(dev, qid);
 }
-} while (cnt && (retries++ <= VHOST_ENQ_RETRY_NUM));
+}
 
 rte_spinlock_unlock(>tx_q[qid].tx_lock);
 
 rte_spinlock_lock(>stats_lock);
 netdev_dpdk_vhost_update_tx_counters(>stats, pkts, total_pkts,
- cnt + dropped);
+ dropped);
 rte_spinlock_unlock(>stats_lock);
-
-out:
-for (i = 0; i < total_pkts - dropped; i++) {
-dp_packet_delete(pkts[i]);
-}
 }
 
 /* Tx function. Transmit packets indefinitely */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 4/6] dpif-netdev: Flush the packets in intermediate queue.

2017-06-18 Thread Bhanuprakash Bodireddy
Under low rate traffic conditions, there can be 2 issues.
  (1) Packets potentially can get stuck in the intermediate queue.
  (2) Latency of the packets can increase significantly due to
   buffering in intermediate queue.

This commit handles the (1) issue by flushing the tx port queues from
PMD processing loop. Also this commit addresses issue (2) by flushing
the tx queues after every rxq port processing. This reduces the latency
with out impacting the forwarding throughput.

   MASTER
  
   Pkt size  min(ns)   avg(ns)   max(ns)
512  4,631  5,022309,914
   1024  5,545  5,749104,294
   1280  5,978  6,159 45,306
   1518  6,419  6,774946,850

  MASTER + COMMIT
  -
   Pkt size  min(ns)   avg(ns)   max(ns)
512  4,711  5,064182,477
   1024  5,601  5,888701,654
   1280  6,018  6,491533,037
   1518  6,467  6,734312,471

PMDs can be teared down and spawned at runtime and so the rxq and txq
mapping of the PMD threads can change. In few cases packets can get
stuck in the queue due to reconfiguration and this commit helps flush
the queues.

Suggested-by: Eelco Chaudron 
Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-dev/2017-April/331039.html
Signed-off-by: Bhanuprakash Bodireddy 
Signed-off-by: Antonio Fischetti 
Co-authored-by: Antonio Fischetti 
Signed-off-by: Markus Magnusson 
Co-authored-by: Markus Magnusson 
Acked-by: Eelco Chaudron 
---
 lib/dpif-netdev.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index d59208e..dfd88aa 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3761,6 +3761,8 @@ reload:
 for (i = 0; i < poll_cnt; i++) {
 dp_netdev_process_rxq_port(pmd, poll_list[i].rx,
poll_list[i].port_no);
+
+dp_netdev_flush_txq_ports(pmd);
 }
 
 if (lc++ > 1024) {
@@ -3781,6 +3783,9 @@ reload:
 }
 }
 
+/* Flush the queues as part of reconfiguration logic. */
+dp_netdev_flush_txq_ports(pmd);
+
 poll_cnt = pmd_load_queues_and_ports(pmd, _list);
 exiting = latch_is_set(>exit_latch);
 /* Signal here to make sure the pmd finishes
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 3/6] netdev-dpdk: Add intermediate queue support.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit introduces netdev_dpdk_eth_tx_queue() function that
implements intermediate queue and packet buffering. The packets get
buffered till the threshold 'INTERIM_QUEUE_BURST_THRESHOLD[32] is
reached and eventually gets transmitted.

To handle the case(eg: ping) where packets are sent at low rate and
can potentially get stuck in the queue, flush logic is implemented
that gets invoked from dp_netdev_flush_txq_ports() as part of PMD packet
processing loop.

Signed-off-by: Bhanuprakash Bodireddy 
Signed-off-by: Antonio Fischetti 
Co-authored-by: Antonio Fischetti 
Signed-off-by: Markus Magnusson 
Co-authored-by: Markus Magnusson 
Acked-by: Eelco Chaudron 
---
 lib/dpif-netdev.c | 44 +++-
 lib/netdev-dpdk.c | 35 ++-
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2b65dc7..d59208e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -332,6 +332,7 @@ enum pmd_cycles_counter_type {
 };
 
 #define XPS_TIMEOUT_MS 500LL
+#define LAST_USED_QID_NONE -1
 
 /* Contained by struct dp_netdev_port's 'rxqs' member.  */
 struct dp_netdev_rxq {
@@ -492,7 +493,13 @@ struct rxq_poll {
 struct tx_port {
 struct dp_netdev_port *port;
 int qid;
-long long last_used;
+int last_used_qid;/* Last queue id where packets got
+ enqueued. */
+long long last_used;  /* In case XPS is enabled, it contains the
+   * timestamp of the last time the port was
+   * used by the thread to send data.  After
+   * XPS_TIMEOUT_MS elapses the qid will be
+   * marked as -1. */
 struct hmap_node node;
 };
 
@@ -3081,6 +3088,25 @@ cycles_count_end(struct dp_netdev_pmd_thread *pmd,
 }
 
 static void
+dp_netdev_flush_txq_ports(struct dp_netdev_pmd_thread *pmd)
+{
+struct tx_port *cached_tx_port;
+int tx_qid;
+
+HMAP_FOR_EACH (cached_tx_port, node, >send_port_cache) {
+tx_qid = cached_tx_port->last_used_qid;
+
+if (tx_qid != LAST_USED_QID_NONE) {
+netdev_txq_flush(cached_tx_port->port->netdev, tx_qid,
+ cached_tx_port->port->dynamic_txqs);
+
+/* Queue flushed and mark it empty. */
+cached_tx_port->last_used_qid = LAST_USED_QID_NONE;
+}
+}
+}
+
+static void
 dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
struct netdev_rxq *rx,
odp_port_t port_no)
@@ -4356,6 +4382,7 @@ dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread 
*pmd,
 
 tx->port = port;
 tx->qid = -1;
+tx->last_used_qid = LAST_USED_QID_NONE;
 
 hmap_insert(>tx_ports, >node, hash_port_no(tx->port->port_no));
 pmd->need_reload = true;
@@ -4926,6 +4953,14 @@ dpif_netdev_xps_get_tx_qid(const struct 
dp_netdev_pmd_thread *pmd,
 
 dpif_netdev_xps_revalidate_pmd(pmd, now, false);
 
+/* The tx queue can change in XPS case, make sure packets in previous
+ * queue is flushed properly. */
+if (tx->last_used_qid != LAST_USED_QID_NONE &&
+   tx->qid != tx->last_used_qid) {
+netdev_txq_flush(port->netdev, tx->last_used_qid, port->dynamic_txqs);
+tx->last_used_qid = LAST_USED_QID_NONE;
+}
+
 VLOG_DBG("Core %d: New TX queue ID %d for port \'%s\'.",
  pmd->core_id, tx->qid, netdev_get_name(tx->port->netdev));
 return min_qid;
@@ -5021,6 +5056,13 @@ dp_execute_cb(void *aux_, struct dp_packet_batch 
*packets_,
 tx_qid = pmd->static_tx_qid;
 }
 
+/* In case these packets gets buffered into an intermediate
+ * queue and XPS is enabled the flush function could find a
+ * different tx qid assigned to its thread.  We keep track
+ * of the qid we're now using, that will trigger the flush
+ * function and will select the right queue to flush. */
+p->last_used_qid = tx_qid;
+
 netdev_send(p->port->netdev, tx_qid, packets_, may_steal,
 dynamic_txqs);
 return;
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 1e83116..50a9a2c 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1434,6 +1434,7 @@ static inline int
 netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
  struct rte_mbuf **pkts, int cnt)
 {
+struct dpdk_tx_queue *txq = >tx_q[qid];
 uint32_t nb_tx = 0;
 
 while (nb_tx != cnt) {
@@ -1457,6 +1458,7 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
 }
 }
 
+txq->dpdk_pkt_cnt = 0;
 return cnt - nb_tx;
 }
 
@@ -1841,6 +1843,37 @@ 

[ovs-dev] [PATCH 2/6] netdev-dpdk: Add netdev_dpdk_txq_flush function.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds netdev_dpdk_txq_flush() function. If there are
any packets waiting in the queue, they are transmitted instantly
using the rte_eth_tx_burst function. In XPS enabled case, lock is
taken on the tx queue before flushing the queue.

Signed-off-by: Bhanuprakash Bodireddy 
Signed-off-by: Antonio Fischetti 
Co-authored-by: Antonio Fischetti 
Signed-off-by: Markus Magnusson 
Co-authored-by: Markus Magnusson 
Acked-by: Eelco Chaudron 
---
 lib/netdev-dpdk.c | 31 +--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index cc84539..1e83116 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -293,6 +293,11 @@ struct dpdk_mp {
 struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
 };
 
+/* Queue 'INTERIM_QUEUE_BURST_THRESHOLD' packets before transmitting.
+ * Defaults to 'NETDEV_MAX_BURST'(32) packets.
+ */
+#define INTERIM_QUEUE_BURST_THRESHOLD NETDEV_MAX_BURST
+
 /* There should be one 'struct dpdk_tx_queue' created for
  * each cpu core. */
 struct dpdk_tx_queue {
@@ -302,6 +307,12 @@ struct dpdk_tx_queue {
 * pmd threads (see 'concurrent_txq'). */
 int map;   /* Mapping of configured vhost-user queues
 * to enabled by guest. */
+int dpdk_pkt_cnt;  /* Number of buffered packets waiting to
+  be sent on DPDK tx queue. */
+struct rte_mbuf *dpdk_burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD];
+   /* Intermediate queue where packets can
+* be buffered to amortize the cost of MMIO
+* writes. */
 };
 
 /* dpdk has no way to remove dpdk ring ethernet devices
@@ -1897,9 +1908,25 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
  * few packets (< INTERIM_QUEUE_BURST_THRESHOLD) buffered in the queue.
  */
 static int
-netdev_dpdk_txq_flush(struct netdev *netdev OVS_UNUSED,
-  int qid OVS_UNUSED, bool concurrent_txq OVS_UNUSED)
+netdev_dpdk_txq_flush(struct netdev *netdev,
+  int qid, bool concurrent_txq)
 {
+struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+struct dpdk_tx_queue *txq = >tx_q[qid];
+
+if (OVS_LIKELY(txq->dpdk_pkt_cnt)) {
+if (OVS_UNLIKELY(concurrent_txq)) {
+qid = qid % dev->up.n_txq;
+rte_spinlock_lock(>tx_q[qid].tx_lock);
+}
+
+netdev_dpdk_eth_tx_burst(dev, qid, txq->dpdk_burst_pkts,
+ txq->dpdk_pkt_cnt);
+
+if (OVS_UNLIKELY(concurrent_txq)) {
+rte_spinlock_unlock(>tx_q[qid].tx_lock);
+}
+}
 return 0;
 }
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/6] netdev: Add netdev_txq_flush function.

2017-06-18 Thread Bhanuprakash Bodireddy
Add netdev_txq_flush(), that flush packets on a queue. This is needed
to transmit packets on the intermediate queue.

Signed-off-by: Bhanuprakash Bodireddy 
Signed-off-by: Antonio Fischetti 
Co-authored-by: Antonio Fischetti 
Signed-off-by: Markus Magnusson 
Co-authored-by: Markus Magnusson 
Acked-by: Eelco Chaudron 
---
 lib/netdev-bsd.c  |  1 +
 lib/netdev-dpdk.c | 26 +-
 lib/netdev-dummy.c|  1 +
 lib/netdev-linux.c|  1 +
 lib/netdev-provider.h |  8 
 lib/netdev-vport.c|  2 +-
 lib/netdev.c  |  9 +
 lib/netdev.h  |  1 +
 8 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index f863a18..cb0edd6 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -1548,6 +1548,7 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum 
netdev_flags off,
 netdev_bsd_rxq_recv, \
 netdev_bsd_rxq_wait, \
 netdev_bsd_rxq_drain,\
+NULL,\
  \
 NO_OFFLOAD_API   \
 }
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index bba4de3..cc84539 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1892,6 +1892,17 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
 }
 }
 
+/* Flush tx queues
+ * This is done periodically to empty the intermediate queue in case of
+ * few packets (< INTERIM_QUEUE_BURST_THRESHOLD) buffered in the queue.
+ */
+static int
+netdev_dpdk_txq_flush(struct netdev *netdev OVS_UNUSED,
+  int qid OVS_UNUSED, bool concurrent_txq OVS_UNUSED)
+{
+return 0;
+}
+
 static int
 netdev_dpdk_eth_send(struct netdev *netdev, int qid,
  struct dp_packet_batch *batch, bool may_steal,
@@ -3241,7 +3252,7 @@ unlock:
   SET_CONFIG, SET_TX_MULTIQ, SEND,\
   GET_CARRIER, GET_STATS, \
   GET_FEATURES, GET_STATUS,   \
-  RECONFIGURE, RXQ_RECV)  \
+  RECONFIGURE, RXQ_RECV, TXQ_FLUSH)   \
 { \
 NAME, \
 true,   /* is_pmd */  \
@@ -3308,6 +3319,7 @@ unlock:
 RXQ_RECV, \
 NULL,   /* rx_wait */ \
 NULL,   /* rxq_drain */   \
+TXQ_FLUSH,  /* txq_flush */   \
 NO_OFFLOAD_API\
 }
 
@@ -3325,7 +3337,8 @@ static const struct netdev_class dpdk_class =
 netdev_dpdk_get_features,
 netdev_dpdk_get_status,
 netdev_dpdk_reconfigure,
-netdev_dpdk_rxq_recv);
+netdev_dpdk_rxq_recv,
+netdev_dpdk_txq_flush);
 
 static const struct netdev_class dpdk_ring_class =
 NETDEV_DPDK_CLASS(
@@ -3341,7 +3354,8 @@ static const struct netdev_class dpdk_ring_class =
 netdev_dpdk_get_features,
 netdev_dpdk_get_status,
 netdev_dpdk_reconfigure,
-netdev_dpdk_rxq_recv);
+netdev_dpdk_rxq_recv,
+NULL);
 
 static const struct netdev_class dpdk_vhost_class =
 NETDEV_DPDK_CLASS(
@@ -3357,7 +3371,8 @@ static const struct netdev_class dpdk_vhost_class =
 NULL,
 NULL,
 netdev_dpdk_vhost_reconfigure,
-netdev_dpdk_vhost_rxq_recv);
+netdev_dpdk_vhost_rxq_recv,
+NULL);
 static const struct netdev_class dpdk_vhost_client_class =
 NETDEV_DPDK_CLASS(
 "dpdkvhostuserclient",
@@ -3372,7 +3387,8 @@ static const struct netdev_class dpdk_vhost_client_class =
 NULL,
 NULL,
 netdev_dpdk_vhost_client_reconfigure,
-netdev_dpdk_vhost_rxq_recv);
+netdev_dpdk_vhost_rxq_recv,
+NULL);
 
 void
 netdev_dpdk_register(void)
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index d189a86..216c98e 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -1414,6 +1414,7 @@ netdev_dummy_update_flags(struct netdev *netdev_,
 netdev_dummy_rxq_recv,  \
 netdev_dummy_rxq_wait,  \
 netdev_dummy_rxq_drain, \
+NULL,   \
 \
 NO_OFFLOAD_API  \
 }
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index f5dc30f..a2775da 100644
--- 

[ovs-dev] [PATCH 0/6 V2] netdev-dpdk: Use intermediate queue during packet transmission.

2017-06-18 Thread Bhanuprakash Bodireddy
After packet classification, packets are queued in to batches depending
on the matching netdev flow. Thereafter each batch is processed to
execute the related actions. This becomes particularly inefficient if
there are few packets in each batch as rte_eth_tx_burst() incurs expensive
MMIO writes.

This patch series implements intermediate queue for DPDK and vHost User ports.
Packets are queued and burst when the packet count exceeds threshold. Also
drain logic is implemented to handle cases where packets can get stuck in
the tx queues at low rate traffic conditions. Care has been taken to see
that latency is well with in the acceptable limits. Testing shows significant
performance gains with this implementation.

This path series combines the earlier 2 patches posted below.
  DPDK patch: 
https://mail.openvswitch.org/pipermail/ovs-dev/2017-April/331039.html
  vHost User patch: 
https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/332271.html

Also this series proposes to disable the retries on vHost User ports and make 
it 
configurable via ovsdb.(controversial?)

Performance Numbers with intermediate queue:

  DPDK ports
 ===

  Throughput for P2P scenario, for two 82599ES 10G port with 64 byte packets

  Number
  flows   MASTER With PATCH
  ====
10   1072728313393844
32704225311228799
507515491 9607791
   1005838699 9430730
   5005285066 7845807
  10005226477 7135601

   Latency test

   MASTER
   ===
   Pkt size  min(ns)  avg(ns)  max(ns)
512  4,631  5,022309,914
   1024  5,545  5,749104,294
   1280  5,978  6,159 45,306
   1518  6,419  6,774946,850

   PATCH
   =
   Pkt size  min(ns)  avg(ns)  max(ns)
512  4,711  5,064182,477
   1024  5,601  5,888701,654
   1280  6,018  6,491533,037
   1518  6,467  6,734312,471

   vHost User ports
  ==

  Throughput for PV scenario, with 64 byte packets

   Number
   flows   MASTERWith PATCH
    =   =
105945899 7833914
323872211 6530133
503283713 6618711
   1003132540 5857226
   5002964499 5273006
  10002931952 5178038

  Latency test.

  MASTER
  ===
  Pkt size  min(ns)  avg(ns)  max(ns)
   512  10,011   12,100   281,915
  1024   7,8709,313   193,116
  1280   7,8629,036   194,439
  1518   8,2159,417   204,782

  PATCH
  ===
  Pkt size  min(ns)  avg(ns)  max(ns)
   512  10,492   13,655   281,538
  1024   8,4079,784   205,095
  1280   8,3999,750   194,888
  1518   8,3679,722   196,973

Performance number reported by Eelco Chaudron  at
  https://mail.openvswitch.org/pipermail/ovs-dev/2017-June/333949.html
  https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/332271.html
  https://mail.openvswitch.org/pipermail/ovs-dev/2017-April/331039.html

---
v1->v2
  * Rebased on master due to HW offload changes.
  * Introduced union for packet count and buffers and changed the variable
names appropriately.
  * No functional change changes.

Bhanuprakash Bodireddy (6):
  netdev: Add netdev_txq_flush function.
  netdev-dpdk: Add netdev_dpdk_txq_flush function.
  netdev-dpdk: Add intermediate queue support.
  dpif-netdev: Flush the packets in intermediate queue.
  netdev-dpdk: Add netdev_dpdk_vhost_txq_flush function.
  netdev-dpdk: Enable intermediate queue for vHost User port.

 lib/dpif-netdev.c |  49 -
 lib/netdev-bsd.c  |   1 +
 lib/netdev-dpdk.c | 194 ++
 lib/netdev-dummy.c|   1 +
 lib/netdev-linux.c|   1 +
 lib/netdev-provider.h |   8 +++
 lib/netdev-vport.c|   2 +-
 lib/netdev.c  |   9 +++
 lib/netdev.h  |   1 +
 9 files changed, 235 insertions(+), 31 deletions(-)

-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 17/18] keepalive: Display extended Keepalive status.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds support to display the extended keepalive status.
The status can be displayed as follows.

  $ ovs-appctl keepalive/pmd-xstats-show

  keepAlive Status  : Enabled
  keepAlive Interval: 1000 ms

  pmd64
  PMD core_id : 0
  PMD thread id   : 1269 [ACTIVE]
  PMD heartbeats  : enabled
  PMD state   : ALIVE
  Last seen timestamp : 9123706507798853
  PMD failure count   : 0

  pmd65
  PMD core_id : 1
  PMD thread id   : 1270 [ACTIVE]
  PMD heartbeats  : enabled
  PMD state   : ALIVE
  Last seen timestamp : 9123706507801627
  PMD failure count   : 0

  pmd64
  PMD core_id : 2
  PMD thread id   : 1271 [ACTIVE]
  PMD heartbeats  : enabled
  PMD state   : ALIVE
  Last seen timestamp : 9125112827794550
  PMD failure count   : 0
  PMD health check: enabled
  Packet Stats
  Port dpdk0, Queue: 1, Link status: up
  rx_packets : 1801284454
  tx_packets : 0
  Cycle Stats
  Polling cycles : 35426111637
  Processing cycles : 10123697085

For PMD on core 2, on a heartbeat failure, health checks are enabled
and additional stats(pkt stats, cpu cycles) are displayed as above.

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/keepalive.c | 112 
 1 file changed, 112 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index d475ace..38bff91 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -730,6 +730,116 @@ ka_unixctl_status(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
 ds_destroy();
 }
 
+static void
+ka_unixctl_pmd_xstats_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+   const char *argv[] OVS_UNUSED, void *ka_info_)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+ds_put_format(,
+  "\n\t\tKeepalive xstats\n\n");
+
+ds_put_format(, "keepalive status  : %s\n",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+if (!ka_is_enabled()) {
+goto out;
+}
+
+ds_put_format(, "keepalive interval: %"PRIu32" ms\n",
+  get_ka_interval());
+
+struct keepalive_info *ka_info = (struct keepalive_info *)ka_info_;
+if (!ka_info) {
+goto out;
+}
+
+ds_put_format(, "PMD threads   : %"PRIu32" \n", ka_info->pmd_cnt);
+
+struct ka_process_info *pinfo, *pinfo_next;
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info->process_list) {
+char *state = NULL;
+if (pinfo->core_state == KA_STATE_UNUSED ||
+ pinfo->core_state == KA_STATE_SLEEP)
+continue;
+
+switch (pinfo->core_state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_CHECK:
+state = "HEALTH_CHECK_RUNNING";
+break;
+case KA_STATE_UNUSED:
+break;
+}
+
+ds_put_format(, "\n");
+ds_put_format(, "  %s\n", pinfo->name);
+ds_put_format(, "\tPMD core_id : %d\n", pinfo->core_id);
+ds_put_format(, "\tPMD thread-id   : %d [%s]\n",
+ pinfo->tid, process_is_active(pinfo->tid) ?
+ "ACTIVE" : "INACTIVE");
+ds_put_format(, "\tPMD heartbeats  : %s\n",
+  ka_is_enabled() ? "enabled" : "disabled");
+ds_put_format(, "\tPMD state   : %s\n", state);
+ds_put_format(, "\tLast seen timestamp : %"PRIu64"\n",
+  pinfo->core_last_seen_times);
+
+ds_put_format(, "\tPMD failure count   : %d\n",
+  pinfo->failures);
+
+int health_check = pinfo->healthcheck;
+if (health_check) {
+ds_put_format(, "\tPMD health check: %s\n",
+  health_check ? "enabled" : "disabled");
+ds_put_format(, "\tPacket Stats\n");
+
+int cid = pinfo->core_id;
+int n = ka_info->ext_stats[cid].num_poll_ports;
+for (int idx = 0; idx < n; idx++) {
+ds_put_format(, "\t\tPort %s, Link status: %s\n",
+ka_info->ext_stats[cid].port_stats[idx].port,
+ka_info->ext_stats[cid].port_stats[idx].link_state);
+ds_put_format(, "\t\trx_packets : %"PRIu64"\n",
+  ka_info->ext_stats[cid].  \
+  

[ovs-dev] [RFC PATCH v3 16/18] netdev-dpdk: Enable PMD health checks on heartbeat failure.

2017-06-18 Thread Bhanuprakash Bodireddy
The keepalive thread sends heartbeats to PMD thread and when PMD fails to
respond to successive heartbeats the PMD is potentially stalled. The PMD
state transition is as below:

ALIVE -> MISSING -> DEAD -> GONE

This commit enables PMD healthchecks when PMD doesn't respond to
heartbeats. This is needed to handle false negatives. With this commit
the new state transition is as below:

ALIVE -> MISSING -> DEAD -> CHECK -> GONE

PMD Health checking state is introduced and will immediately kickin when
the PMD gets in to DEAD state. As part of this below are considered.

  - Link status of the ports polled by PMD thread.
  - Statistics of the ports polled by PMD thread.
  - PMD polling and processing cycles.

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/keepalive.c   | 16 ++
 lib/keepalive.h   |  2 ++
 lib/netdev-dpdk.c | 62 +--
 3 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index 3690b70..d475ace 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -108,6 +108,7 @@ ka_register_thread(int tid, bool thread_is_pmd)
 pinfo->heartbeats = true;
 pinfo->core_id = core_id;
 pinfo->healthcheck = PMD_HC_DISABLE;
+pinfo->failures = 0;
 
 char *pname = get_process_name(tid);
 if (pname) {
@@ -269,6 +270,21 @@ ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state 
state,
 }
 
 void
+ka_inc_pmd_failures(unsigned core_id)
+{
+struct ka_process_info *pinfo;
+int tid = ka_get_pmd_tid(core_id);
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info->process_list) {
+if (pinfo->core_id == core_id) {
+pinfo->failures++;
+}
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+}
+
+void
 ka_load_process_list(struct hmap **process_list)
 {
 if (ka_is_enabled()) {
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 1bd639b..4f30f36 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -64,6 +64,7 @@ struct ka_process_info {
 enum pmdhealth_check healthcheck;
 enum keepalive_state core_state;
 uint64_t core_last_seen_times;
+int failures;
 struct hmap_node node;
 };
 
@@ -127,6 +128,7 @@ void ka_disable_pmd_health_check(unsigned);
 bool ka_is_pmdhealth_check_enabled(unsigned);
 enum pmdhealth_check ka_get_pmd_health_check_state(unsigned);
 void ka_set_pmd_health_check_state(unsigned, enum pmdhealth_check);
+void ka_inc_pmd_failures(unsigned);
 
 void ka_store_pmd_id(unsigned core);
 uint32_t get_ka_interval(void);
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index f33eeff..f71b017 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -602,6 +602,52 @@ dpdk_failcore_cb(void *ptr_data OVS_UNUSED, const int 
core_id)
 }
 }
 
+static void
+dpdk_ka_handle_failure(enum keepalive_state fail_state, const int core_id,
+   const enum rte_keepalive_state core_state,
+   uint64_t last_alive)
+{
+if (fail_state == KA_STATE_DEAD) {
+/* If process is in DEFUNC/UNINTERRUPTIBLE/TRACED state it is inactive
+ * and no additional health checks are needed. */
+uint32_t tid = ka_get_pmd_tid(core_id);
+if (process_is_active(tid)) {
+   /* Enable PMD health check only when PMD is in 'RUNNING' state and
+* still doesn't respond to heartbeats. Health checks are needed to
+* analyze other stats as we are in penultimate state of declaring
+* PMD as failed. */
+ka_enable_pmd_health_check(core_id);
+}
+ka_set_pmd_state_ts(core_id, KA_STATE_DEAD, last_alive);
+}
+
+if (fail_state == KA_STATE_GONE) {
+int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+
+switch (pmd_hc_state) {
+case PMD_HC_ENABLE:
+break;
+case PMD_HC_DISABLE:
+VLOG_DBG_RL(, "PMD thread [%d] died, health check disabled",
+core_id);
+break;
+case PMD_HC_PROGRESS:
+ka_set_pmd_state_ts(core_id, KA_STATE_CHECK, last_alive);
+break;
+
+case PMD_HC_COMPLETE:
+ka_inc_pmd_failures(core_id);
+ka_set_pmd_state_ts(core_id, core_state, last_alive);
+ka_disable_pmd_health_check(core_id);
+break;
+
+default:
+VLOG_DBG_RL(, "Unknown health check state %d", pmd_hc_state);
+OVS_NOT_REACHED();
+}
+}
+}
+
 /*
  * This function shall be invoked periodically to write the core status and
  * last seen timestamp of the cores in to keepalive info structure.
@@ -614,11 +660,23 @@ dpdk_ka_update_core_state(void *ptr_data OVS_UNUSED, 
const int core_id,
 case RTE_KA_STATE_ALIVE:
 case RTE_KA_STATE_MISSING:
 ka_set_pmd_state_ts(core_id, KA_STATE_ALIVE, last_alive);
+
+/* 

[ovs-dev] [RFC PATCH v3 14/18] keepalive: Check the packet statistics as part of PMD health checks.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds the support to check the packet statistics on the port
polled by PMD thread. If the packets aren't processed due to PMD thread
stall/deadlock the statistics wont update and this can be used by
monitoring framework to confirm PMD failure.

This mechanism has limitation with MQ enabled. In some cases queues of
the DPDK port can be polled by different PMD threads. Even if one PMD
thread stalls the port statistics will be incremented due to an other
queue processed by different PMD. The function can return active state
considering the packets processed in this case.

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/dpif-netdev.c | 25 +++---
 lib/keepalive.c   | 97 +++
 lib/keepalive.h   |  5 +++
 3 files changed, 122 insertions(+), 5 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 4d8d3e7..ad48ee5 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -975,8 +975,9 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 static void
 pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 {
-int port_link_status = 0;
 struct rxq_poll *poll;
+int port_link_status = 0;
+int port_stats = 0;
 
 struct svec pmd_poll_list;
 svec_init(_poll_list);
@@ -991,22 +992,36 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 int i = 0;
 SVEC_FOR_EACH (i, port_name, _poll_list) {
 struct netdev *dev = netdev_from_name(port_name);
+VLOG_DBG("Keepalive: Checking port %s", port_name);
 if (dev) {
 char *link_state = netdev_get_carrier(dev) ? "up" : "down";
 ka_info_update_port_status(port_name, 0, link_state,
 pmd->core_id, i);
+if (!strcmp(link_state, "up")) {
+ka_info_update_port_statistics(dev, pmd->core_id, i);
+}
 netdev_close(dev);
 }
 }
 svec_destroy(_poll_list);
 
-port_link_status = ka_get_polled_ports_status(pmd->core_id);
-
 int pmd_hc_state = ka_get_pmd_health_check_state(pmd->core_id);
-if (PMD_HC_COMPLETE == pmd_hc_state) {
-if (port_link_status == ACTIVE_RUN_STATE) {
+switch (pmd_hc_state) {
+case PMD_HC_ENABLE:
+ka_set_pmd_health_check_state(pmd->core_id, PMD_HC_PROGRESS);
+break;
+case PMD_HC_PROGRESS:
+ka_set_pmd_health_check_state(pmd->core_id, PMD_HC_COMPLETE);
+break;
+case PMD_HC_COMPLETE:
+port_link_status = ka_get_polled_ports_status(pmd->core_id);
+port_stats = ka_get_polled_ports_stats(pmd->core_id);
+
+if (port_link_status == ACTIVE_RUN_STATE &&
+   port_stats == ACTIVE_RUN_STATE ) {
 ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0);
 }
+break;
 }
 }
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 9251849..4234912 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -24,6 +24,7 @@
 #include "dpdk.h"
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
+#include "netdev-dpdk.h"
 #include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
@@ -31,6 +32,7 @@
 #include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
 
 static bool keepalive_enable = false;/* Keepalive disabled by default */
 static bool ka_init_status = ka_init_failure; /* Keepalive initialization */
@@ -453,6 +455,31 @@ enum pmdhealth_status ka_get_polled_ports_status(unsigned 
core_id)
 }
 }
 
+enum pmdhealth_status ka_get_polled_ports_stats(unsigned core_id)
+{
+if (!ka_info) {
+return FAILURE_STATE;
+}
+
+int failed = 0;
+int n_ports = ka_info->ext_stats[core_id].num_poll_ports;
+for (int i = 0; i < n_ports; i++) {
+int state;
+state =
+  ka_info->ext_stats[core_id].port_stats[i].state[PORT_STATS_CHECK];
+if (state == FAILURE_STATE) {
+failed = 1;
+break;
+}
+}
+
+if (!failed) {
+return ACTIVE_RUN_STATE;
+} else {
+return FAILURE_STATE;
+}
+}
+
 void
 ka_info_update_port_status(const char *port, int qid OVS_UNUSED,
char *link_state, int core_id, int idx)
@@ -480,6 +507,76 @@ ka_info_update_port_status(const char *port, int qid 
OVS_UNUSED,
state;
 }
 
+void
+ka_info_update_port_statistics(const struct netdev *netdev,
+  int core_id, int idx)
+{
+int error;
+int state = FAILURE_STATE;
+
+if (!ka_info) {
+VLOG_ERR_RL(, "Keepalive disabled");
+return;
+}
+ka_info->ext_stats[core_id].num_poll_ports = idx;
+
+int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+if (PMD_HC_ENABLE == pmd_hc_state) {
+struct netdev_stats *stats;
+stats = 

[ovs-dev] [RFC PATCH v3 15/18] keepalive: Check the PMD cycle stats as part of PMD health checks.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds the support to check the PMD cycle stats. If the cycles
aren't changing for a duration of time this can be flagged as possible
PMD stall.

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/dpif-netdev.c | 16 +---
 lib/dpif-netdev.h |  6 ++
 lib/keepalive.c   | 51 +++
 lib/keepalive.h   |  3 +++
 4 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ad48ee5..b1a9fc4 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -328,12 +328,6 @@ enum dp_stat_type {
 DP_N_STATS
 };
 
-enum pmd_cycles_counter_type {
-PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */
-PMD_CYCLES_PROCESSING,  /* Cycles spent processing packets */
-PMD_N_CYCLES
-};
-
 #define XPS_TIMEOUT_MS 500LL
 
 /* Contained by struct dp_netdev_port's 'rxqs' member.  */
@@ -978,6 +972,8 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 struct rxq_poll *poll;
 int port_link_status = 0;
 int port_stats = 0;
+int pmd_polling = 0;
+uint64_t cycles[PMD_N_CYCLES];
 
 struct svec pmd_poll_list;
 svec_init(_poll_list);
@@ -1005,6 +1001,11 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 }
 svec_destroy(_poll_list);
 
+for (int idx = 0; idx < ARRAY_SIZE(cycles); idx++) {
+atomic_read_relaxed(>cycles.n[idx], [idx]);
+}
+pmd_polling = ka_info_update_pmd_cycles(pmd->core_id, cycles);
+
 int pmd_hc_state = ka_get_pmd_health_check_state(pmd->core_id);
 switch (pmd_hc_state) {
 case PMD_HC_ENABLE:
@@ -1018,7 +1019,8 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 port_stats = ka_get_polled_ports_stats(pmd->core_id);
 
 if (port_link_status == ACTIVE_RUN_STATE &&
-   port_stats == ACTIVE_RUN_STATE ) {
+  port_stats == ACTIVE_RUN_STATE &&
+pmd_polling == ACTIVE_RUN_STATE) {
 ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0);
 }
 break;
diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h
index 6db6ed2..e7c2400 100644
--- a/lib/dpif-netdev.h
+++ b/lib/dpif-netdev.h
@@ -33,6 +33,12 @@ extern "C" {
  * headers to be aligned on a 4-byte boundary.  */
 enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
 
+enum pmd_cycles_counter_type {
+PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */
+PMD_CYCLES_PROCESSING,  /* Cycles spent processing packets */
+PMD_N_CYCLES
+};
+
 bool dpif_is_netdev(const struct dpif *);
 
 #define NR_QUEUE   1
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 4234912..3690b70 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -577,6 +577,57 @@ ka_info_update_port_statistics(const struct netdev *netdev,
 state;
 }
 
+int
+ka_info_update_pmd_cycles(int core_id, uint64_t cycles[PMD_N_CYCLES])
+{
+int pmd_state = ACTIVE_RUN_STATE;
+if (!ka_info) {
+return FAILURE_STATE;
+}
+
+uint64_t total_cycles = 0;
+for (int i = 0; i < PMD_N_CYCLES; i++) {
+if (cycles[i] > 0) {
+total_cycles += cycles[i];
+}
+}
+
+if (!total_cycles) {
+return -1;
+}
+
+int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+if (PMD_HC_ENABLE == pmd_hc_state) {
+ka_info->ext_stats[core_id].cycles[PMD_CYCLES_POLLING] =
+   cycles[PMD_CYCLES_POLLING];
+
+ka_info->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING] =
+   cycles[PMD_CYCLES_PROCESSING];
+}
+
+if (PMD_HC_PROGRESS == pmd_hc_state) {
+uint64_t polling_cycles_cnt = 0, proc_cycles_cnt = 0;
+uint64_t prev_poll_cycles =
+ka_info->ext_stats[core_id].cycles[PMD_CYCLES_POLLING];
+uint64_t prev_proc_cycles =
+ka_info->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING];
+
+VLOG_DBG_RL(, "Keepalive: Going to check the PMD thresholds now.");
+
+polling_cycles_cnt = cycles[PMD_CYCLES_POLLING] - prev_poll_cycles;
+
+proc_cycles_cnt = cycles[PMD_CYCLES_PROCESSING]
+   - prev_proc_cycles;
+
+if (!polling_cycles_cnt && !proc_cycles_cnt) {
+VLOG_DBG("PMD FAILURE!");
+pmd_state = FAILURE_STATE;
+}
+}
+
+return pmd_state;
+}
+
 static void
 ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
const char *argv[] OVS_UNUSED, void *ka_info_)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index a132d74..1bd639b 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -27,6 +27,7 @@
 #define KA_DP_MAXCORES 128
 #endif /* DPDK_NETDEV */
 
+#include "dpif-netdev.h"
 #include "netdev.h"
 
 struct smap;
@@ -76,6 +77,7 @@ struct poll_port_stats {
 struct pmd_extended_stats {
 char *health_status;
 struct poll_port_stats *port_stats;
+uint64_t 

[ovs-dev] [RFC PATCH v3 13/18] keepalive: Check the link status as part of PMD health checks.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds the initial support in to performing PMD health checks.
The ports handled by the PMD threads are checked for the link status and
the same is updated in to keepalive info structure.

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/dpif-netdev.c | 37 +++--
 lib/keepalive.c   | 52 
 lib/keepalive.h   | 18 +-
 3 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 06d2e23..4d8d3e7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -51,6 +51,7 @@
 #include "keepalive.h"
 #include "latch.h"
 #include "netdev.h"
+#include "netdev-provider.h"
 #include "netdev-vport.h"
 #include "netlink.h"
 #include "odp-execute.h"
@@ -972,9 +973,41 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 }
 
 static void
-pmd_health_check(struct dp_netdev_pmd_thread *pmd OVS_UNUSED)
+pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 {
-/* Nothing */
+int port_link_status = 0;
+struct rxq_poll *poll;
+
+struct svec pmd_poll_list;
+svec_init(_poll_list);
+HMAP_FOR_EACH (poll, node, >poll_list) {
+svec_add(_poll_list, netdev_rxq_get_name(poll->rxq->rx));
+}
+
+/* With MQ enabled, remove the duplicates. */
+svec_sort_unique(_poll_list);
+
+const char *port_name;
+int i = 0;
+SVEC_FOR_EACH (i, port_name, _poll_list) {
+struct netdev *dev = netdev_from_name(port_name);
+if (dev) {
+char *link_state = netdev_get_carrier(dev) ? "up" : "down";
+ka_info_update_port_status(port_name, 0, link_state,
+pmd->core_id, i);
+netdev_close(dev);
+}
+}
+svec_destroy(_poll_list);
+
+port_link_status = ka_get_polled_ports_status(pmd->core_id);
+
+int pmd_hc_state = ka_get_pmd_health_check_state(pmd->core_id);
+if (PMD_HC_COMPLETE == pmd_hc_state) {
+if (port_link_status == ACTIVE_RUN_STATE) {
+ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0);
+}
+}
 }
 
 static void
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 997bebf..9251849 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -428,6 +428,58 @@ ka_stats_run(void)
 return ka_stats;
 }
 
+enum pmdhealth_status ka_get_polled_ports_status(unsigned core_id)
+{
+if (OVS_UNLIKELY(!ka_info)) {
+return FAILURE_STATE;
+}
+
+int failed = 0;
+int n_ports = ka_info->ext_stats[core_id].num_poll_ports;
+for (int i = 0; i < n_ports; i++) {
+int state;
+state =
+  ka_info->ext_stats[core_id].port_stats[i].state[PORT_LINK_CHECK];
+if (state == FAILURE_STATE) {
+failed = 1;
+break;
+}
+}
+
+if (!failed) {
+return ACTIVE_RUN_STATE;
+} else {
+return FAILURE_STATE;
+}
+}
+
+void
+ka_info_update_port_status(const char *port, int qid OVS_UNUSED,
+   char *link_state, int core_id, int idx)
+{
+if (OVS_UNLIKELY(!ka_info)) {
+return;
+}
+
+ka_info->ext_stats[core_id].num_poll_ports = idx;
+
+if (OVS_LIKELY(core_id != NON_PMD_CORE_ID)) {
+ka_info->ext_stats[core_id].port_stats[idx].port = port;
+ka_info->ext_stats[core_id].port_stats[idx].link_state =
+   link_state;
+}
+
+int state;
+if (!strcmp(link_state, "down")) {
+state = FAILURE_STATE;
+} else {
+state = ACTIVE_RUN_STATE;
+}
+
+ka_info->ext_stats[core_id].port_stats[idx].state[PORT_LINK_CHECK] =
+   state;
+}
+
 static void
 ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
const char *argv[] OVS_UNUSED, void *ka_info_)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 8877ca6..69697bd 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -47,6 +47,12 @@ enum pmdhealth_check {
 PMD_HC_COMPLETE
 };
 
+enum port_health_check {
+PORT_LINK_CHECK = 0,
+PORT_STATS_CHECK,
+PORT_NUM_CHECKS
+};
+
 struct ka_process_info {
 char name[16];
 int tid;
@@ -60,10 +66,12 @@ struct ka_process_info {
 
 struct poll_port_stats {
 const char *port;
-int qid;
+char *link_state;
+int state[PORT_NUM_CHECKS];
 };
 
 struct pmd_extended_stats {
+char *health_status;
 struct poll_port_stats *port_stats;
 int num_poll_ports;
 };
@@ -92,6 +100,11 @@ enum keepalive_status {
 ka_init_success
 };
 
+enum pmdhealth_status {
+FAILURE_STATE = 0,
+ACTIVE_RUN_STATE
+};
+
 void ka_init(const struct smap *);
 void ka_destroy(void);
 void ka_set_pmd_state_ts(unsigned, enum keepalive_state, uint64_t);
@@ -121,4 +134,7 @@ struct smap *ka_stats_run(void);
 void ka_load_process_list(struct hmap **);
 
 void 

[ovs-dev] [RFC PATCH v3 12/18] dpif-netdev: Add additional datapath health checks.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit enables additional datapath health checks. The checks
are enabled only on a PMD heartbeat failure. On missing three successive
heartbeats additional health checks needs to be performed on respective
PMD thread to confirm the failure.

The datapath health is monitored periodically from keepalive thread.
It should be noted that the PMD health checks are only performed on
the PMD threads whose health check is enabled.

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/dpif-netdev.c | 30 +
 lib/keepalive.c   | 81 +++
 lib/keepalive.h   | 16 +++
 3 files changed, 127 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 93bda20..06d2e23 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -971,6 +971,35 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 *n = k;
 }
 
+static void
+pmd_health_check(struct dp_netdev_pmd_thread *pmd OVS_UNUSED)
+{
+/* Nothing */
+}
+
+static void
+get_datapath_health(struct dp_netdev *dp)
+{
+static struct hmap *process_list = NULL;
+if (!process_list) {
+ka_load_process_list(_list);
+}
+
+struct ka_process_info *pinfo;
+HMAP_FOR_EACH (pinfo, node, process_list) {
+int core_id = pinfo->core_id;
+struct dp_netdev_pmd_thread *pmd;
+
+/* Check only PMD threads whose health check is enabled. */
+if (OVS_LIKELY(pinfo->healthcheck == PMD_HC_DISABLE)) {
+continue;
+}
+
+pmd = dp_netdev_get_pmd(dp, core_id);
+pmd_health_check(pmd);
+}
+}
+
 static void *
 ovs_keepalive(void *f_)
 {
@@ -982,6 +1011,7 @@ ovs_keepalive(void *f_)
 int n_pmds = cmap_count(>poll_threads) - 1;
 if (n_pmds > 0) {
 dispatch_heartbeats();
+get_datapath_health(dp);
 get_ka_stats();
 }
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 6edb440..997bebf 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -105,6 +105,7 @@ ka_register_thread(int tid, bool thread_is_pmd)
 pinfo->tid = tid;
 pinfo->heartbeats = true;
 pinfo->core_id = core_id;
+pinfo->healthcheck = PMD_HC_DISABLE;
 
 char *pname = get_process_name(tid);
 if (pname) {
@@ -176,6 +177,78 @@ ka_mark_pmd_thread_sleep(void)
 }
 
 void
+ka_enable_pmd_health_check(unsigned core_id)
+{
+if (ka_is_enabled()) {
+struct ka_process_info *pinfo;
+int tid = ka_get_pmd_tid(core_id);
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info->process_list) {
+if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+pinfo->healthcheck = PMD_HC_ENABLE;
+}
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+}
+}
+
+void
+ka_disable_pmd_health_check(unsigned core_id)
+{
+if (ka_is_enabled()) {
+struct ka_process_info *pinfo;
+int tid = ka_get_pmd_tid(core_id);
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info->process_list) {
+if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+pinfo->healthcheck = PMD_HC_DISABLE;
+}
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+}
+}
+
+enum pmdhealth_check
+ka_get_pmd_health_check_state(unsigned core_id)
+OVS_REQUIRES(ka_info->proclist_mutex)
+{
+int hc = PMD_HC_DISABLE;
+if (ka_is_enabled()) {
+struct ka_process_info *pinfo;
+int tid = ka_get_pmd_tid(core_id);
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info->process_list) {
+if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+hc = pinfo->healthcheck;
+}
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+}
+
+return hc;
+}
+
+void
+ka_set_pmd_health_check_state(unsigned core_id, enum pmdhealth_check state)
+{
+if (ka_is_enabled()) {
+struct ka_process_info *pinfo;
+int tid = ka_get_pmd_tid(core_id);
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info->process_list) {
+if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+pinfo->healthcheck = state;
+}
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+}
+}
+
+void
 ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state,
 uint64_t last_alive)
 {
@@ -193,6 +266,14 @@ ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state 
state,
 ovs_mutex_unlock(_info->proclist_mutex);
 }
 
+void
+ka_load_process_list(struct hmap 

[ovs-dev] [RFC PATCH v3 11/18] keepalive: Add support to query keepalive status.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds support to query if keepalive status is
enabled/disabled.

  $ ovs-appctl keepalive/status
keepAlive Status: Enabled

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/keepalive.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index a4b8d01..6edb440 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -420,6 +420,19 @@ out:
 ds_destroy();
 }
 
+static void
+ka_unixctl_status(struct unixctl_conn *conn, int argc OVS_UNUSED,
+  const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+
+ds_put_format(, "keepAlive Status: %s",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
 static int
 ka_init__(void)
 {
@@ -466,6 +479,8 @@ ka_init(const struct smap *ovs_other_config)
 
 unixctl_command_register("keepalive/pmd-health-show", "", 0, 0,
   ka_unixctl_pmd_health_show, ka_info);
+unixctl_command_register("keepalive/status", "", 0, 0,
+  ka_unixctl_status, NULL);
 
 ovsthread_once_done(_enable);
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 10/18] keepalive: Add support to query keepalive statistics.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds support to query keepalive statistics. Datapath health
status can be retrieved as follows:

  $ ovs-appctl keepalive/pmd-health-show

  Keepalive status

keepalive status  : Enabled
keepalive interval: 1000 ms
PMD threads   : 8

 PMDCORESTATE   LAST SEEN TIMESTAMP
pmd620  ALIVE   8632183482028293
pmd631  ALIVE   8632183482028425
pmd642  ALIVE   8632190191004294
pmd653  ALIVE   8632183482028525
pmd664  GONE8612183482028117
pmd675  ALIVE   8632190191004984
pmd686  ALIVE   8632190191005713
pmd697  ALIVE   8632190191006555

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/keepalive.c | 78 +
 1 file changed, 78 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index f0b75f0..a4b8d01 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -24,9 +24,11 @@
 #include "dpdk.h"
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
+#include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
 #include "process.h"
+#include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
 
@@ -345,6 +347,79 @@ ka_stats_run(void)
 return ka_stats;
 }
 
+static void
+ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+   const char *argv[] OVS_UNUSED, void *ka_info_)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+ds_put_format(,
+  "\n\t\tKeepalive status\n\n");
+
+ds_put_format(, "keepalive status  : %s\n",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+if (!ka_is_enabled()) {
+goto out;
+}
+
+ds_put_format(, "keepalive interval: %"PRIu32" ms\n",
+  get_ka_interval());
+
+struct keepalive_info *ka_info = (struct keepalive_info *)ka_info_;
+if (OVS_UNLIKELY(!ka_info)) {
+goto out;
+}
+
+ds_put_format(, "PMD threads   : %"PRIu32" \n", ka_info->pmd_cnt);
+ds_put_format(,
+  "\n PMD\tCORE\tSTATE\tLAST SEEN TIMESTAMP\n");
+
+struct ka_process_info *pinfo, *pinfo_next;
+
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info->process_list) {
+char *state = NULL;
+if (pinfo->core_state == KA_STATE_UNUSED ||
+  pinfo->core_state == KA_STATE_SLEEP)
+continue;
+
+switch (pinfo->core_state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_CHECK:
+state = "HEALTH_CHECK_RUNNING";
+break;
+case KA_STATE_UNUSED:
+break;
+}
+ds_put_format(, "%s\t%2d\t%s\t%"PRIu64"\n",
+  pinfo->name, pinfo->core_id, state,
+  pinfo->core_last_seen_times);
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+
+ds_put_format(, "\n");
+out:
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
 static int
 ka_init__(void)
 {
@@ -389,6 +464,9 @@ ka_init(const struct smap *ovs_other_config)
 VLOG_INFO("OvS Keepalive disabled.");
 }
 
+unixctl_command_register("keepalive/pmd-health-show", "", 0, 0,
+  ka_unixctl_pmd_health_show, ka_info);
+
 ovsthread_once_done(_enable);
 }
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 09/18] bridge: Update keepalive status in OVSDB

2017-06-18 Thread Bhanuprakash Bodireddy
This commit allows vswitchd thread to update the OVSDB with the
status of all registered PMD threads. The status can be monitored
using ovsdb-client and the sample output is below.

$ ovsdb-client monitor Open_vSwitch Open_vSwitch keepalive

rowaction keepalive
7b746190-ee71-4dcc-becf-f8cb9c7cb909 old  { "PMD62"="ALIVE,0,9226457935188922"
"PMD63"="ALIVE,1,9226457935189628"
"PMD64"="ALIVE,2,9226457935189897"
"PMD65"="ALIVE,3,9226457935190127"}

 new  { "PMD62"="ALIVE,0,9226460230167364"
"PMD63"="ALIVE,1,9226460230168100"
"PMD64"="ALIVE,2,9226460230168905"
"PMD65"="ALIVE,3,9226460230169632"}

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/keepalive.c   | 15 +++
 lib/keepalive.h   |  1 +
 vswitchd/bridge.c | 26 ++
 3 files changed, 42 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index b437bef..f0b75f0 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -330,6 +330,21 @@ get_ka_stats(void)
 ovs_mutex_unlock();
 }
 
+struct smap *
+ka_stats_run(void)
+{
+struct smap *ka_stats = NULL;
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+ka_stats = keepalive_stats;
+keepalive_stats = NULL;
+}
+ovs_mutex_unlock();
+
+return ka_stats;
+}
+
 static int
 ka_init__(void)
 {
diff --git a/lib/keepalive.h b/lib/keepalive.h
index bdec34f..356e761 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -102,6 +102,7 @@ int ka_get_pmd_tid(unsigned core);
 int ka_alloc_portstats(unsigned, int);
 void ka_destroy_portstats(void);
 void get_ka_stats(void);
+struct smap *ka_stats_run(void);
 
 void dispatch_heartbeats(void);
 #endif /* keepalive.h */
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 3927b9f..4b6b528 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -286,6 +286,7 @@ static bool port_is_synthetic(const struct port *);
 
 static void reconfigure_system_stats(const struct ovsrec_open_vswitch *);
 static void run_system_stats(void);
+static void run_keepalive_stats(void);
 
 static void bridge_configure_mirrors(struct bridge *);
 static struct mirror *mirror_create(struct bridge *,
@@ -403,6 +404,7 @@ bridge_init(const char *remote)
 
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_cur_cfg);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_statistics);
+ovsdb_idl_omit_alert(idl, _open_vswitch_col_keepalive);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_datapath_types);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_iface_types);
 ovsdb_idl_omit(idl, _open_vswitch_col_external_ids);
@@ -2690,6 +2692,29 @@ run_system_stats(void)
 }
 }
 
+void
+run_keepalive_stats(void)
+{
+struct smap *ka_stats;
+const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(idl);
+
+ka_stats = ka_stats_run();
+if (ka_stats && cfg) {
+struct ovsdb_idl_txn *txn;
+struct ovsdb_datum datum;
+
+txn = ovsdb_idl_txn_create(idl);
+ovsdb_datum_from_smap(, ka_stats);
+smap_destroy(ka_stats);
+ovsdb_idl_txn_write(>header_, _open_vswitch_col_keepalive,
+);
+ovsdb_idl_txn_commit(txn);
+ovsdb_idl_txn_destroy(txn);
+
+free(ka_stats);
+}
+}
+
 static const char *
 ofp12_controller_role_to_str(enum ofp12_controller_role role)
 {
@@ -3039,6 +3064,7 @@ bridge_run(void)
 run_stats_update();
 run_status_update();
 run_system_stats();
+run_keepalive_stats();
 }
 
 void
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 08/18] keepalive: Retrieve PMD status periodically.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit implements APIs to retrieve the PMD thread status and return
the status in the below format for each PMD thread.

  Format: PMDID="STATUS,core id,last_seen_timestamp"
  eg: PMD62="ALIVE,2,9220698256784207"
  PMD63="GONE,3,9220698256786231"

The status is periodically retrieved by keepalive thread and stored in
keepalive_stats struc which later shall be retrieved by vswitchd thread.
In case of four PMD threads the status is as below:

   "PMD62"="ALIVE,0,9220698256784207"
   "PMD63"="ALIVE,1,9220698256784913"
   "PMD64"="ALIVE,2,9220698256785902"
   "PMD65"="ALIVE,3,9220698256786231"

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/dpif-netdev.c |  1 +
 lib/keepalive.c   | 73 +++
 lib/keepalive.h   |  1 +
 3 files changed, 75 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index cf4d68c..93bda20 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -982,6 +982,7 @@ ovs_keepalive(void *f_)
 int n_pmds = cmap_count(>poll_threads) - 1;
 if (n_pmds > 0) {
 dispatch_heartbeats();
+get_ka_stats();
 }
 
 ovsrcu_quiesce_start();
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 353f1d1..b437bef 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -25,6 +25,7 @@
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
 #include "openvswitch/vlog.h"
+#include "ovs-thread.h"
 #include "process.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
@@ -34,6 +35,9 @@ static bool ka_init_status = ka_init_failure; /* Keepalive 
initialization */
 static uint32_t keepalive_timer_interval; /* keepalive timer interval */
 static struct keepalive_info *ka_info = NULL;
 
+static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
+static struct smap *keepalive_stats OVS_GUARDED_BY(mutex);
+
 inline bool
 ka_is_enabled(void)
 {
@@ -257,6 +261,75 @@ keepalive_info_create(void)
 return ka_info;
 }
 
+static void
+get_pmd_status(struct smap *ka_pmd_stats)
+OVS_REQUIRES(ka_info->proclist_mutex)
+{
+if (OVS_UNLIKELY(!ka_info)) {
+return;
+}
+
+struct ka_process_info *pinfo, *pinfo_next;
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info->process_list) {
+int core_id = pinfo->core_id;
+char *state = NULL;
+if (pinfo->core_state == KA_STATE_UNUSED ||
+   pinfo->core_state == KA_STATE_SLEEP ) {
+continue;
+}
+
+switch (pinfo->core_state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_CHECK:
+state = "HEALTH_CHECK_RUNNING";
+break;
+case KA_STATE_UNUSED:
+break;
+}
+
+smap_add_format(ka_pmd_stats, pinfo->name, "%s,%d,%ld",
+state, core_id, pinfo->core_last_seen_times);
+}
+}
+
+void
+get_ka_stats(void)
+{
+struct smap *ka_pmd_stats;
+ka_pmd_stats = xmalloc(sizeof *ka_pmd_stats);
+smap_init(ka_pmd_stats);
+
+ovs_mutex_lock(_info->proclist_mutex);
+get_pmd_status(ka_pmd_stats);
+ovs_mutex_unlock(_info->proclist_mutex);
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+smap_destroy(keepalive_stats);
+free(keepalive_stats);
+keepalive_stats = NULL;
+}
+keepalive_stats = ka_pmd_stats;
+ovs_mutex_unlock();
+}
+
 static int
 ka_init__(void)
 {
diff --git a/lib/keepalive.h b/lib/keepalive.h
index cfe02e5..bdec34f 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -101,6 +101,7 @@ int get_ka_init_status(void);
 int ka_get_pmd_tid(unsigned core);
 int ka_alloc_portstats(unsigned, int);
 void ka_destroy_portstats(void);
+void get_ka_stats(void);
 
 void dispatch_heartbeats(void);
 #endif /* keepalive.h */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 06/18] dpif-netdev: Register packet processing cores to KA framework.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit registers the packet processing PMD cores to keepalive
framework. Only PMDs that have rxqs mapped will be registered and
actively monitored by KA framework.

This commit spawns a keepalive thread that will dispatch heartbeats to
PMD cores. The pmd threads respond to heartbeats by marking themselves
alive. As long as PMD responds to heartbeats it is considered 'healthy'.

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/dpif-netdev.c | 100 +
 lib/keepalive.c   | 130 +++---
 lib/keepalive.h   |  25 ++-
 3 files changed, 236 insertions(+), 19 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ce141e8..4b7c835 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -72,6 +72,7 @@
 #include "seq.h"
 #include "smap.h"
 #include "sset.h"
+#include "svec.h"
 #include "timeval.h"
 #include "tnl-neigh-cache.h"
 #include "tnl-ports.h"
@@ -970,6 +971,96 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 *n = k;
 }
 
+static void *
+ovs_keepalive(void *f_ OVS_UNUSED)
+{
+pthread_detach(pthread_self());
+
+for (;;) {
+ovsrcu_quiesce_start();
+usleep(get_ka_interval() * 1000);
+ovsrcu_quiesce_end();
+}
+
+return NULL;
+}
+
+static void
+ka_thread_start(struct dp_netdev *dp)
+{
+static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+if (ovsthread_once_start()) {
+ovs_thread_create("ovs_keepalive", ovs_keepalive, dp);
+
+ovsthread_once_done();
+}
+}
+
+static void
+pmd_num_poll_ports(struct dp_netdev_pmd_thread *pmd, int *num_poll_ports)
+{
+struct svec pmd_port_poll_list;
+svec_init(_port_poll_list);
+
+struct rxq_poll *poll;
+const char *port_name;
+int i = 0;
+
+HMAP_FOR_EACH (poll, node, >poll_list) {
+svec_add(_port_poll_list, netdev_rxq_get_name(poll->rxq->rx));
+}
+/* With MQ enabled, remove the duplicates. */
+svec_sort_unique(_port_poll_list);
+SVEC_FOR_EACH (i, port_name, _port_poll_list) {
+VLOG_DBG("%d Port:%s", i, port_name);
+}
+svec_destroy(_port_poll_list);
+
+*num_poll_ports = i;
+VLOG_DBG("PMD thread [%d] polling [%d] ports",
+ pmd->core_id, *num_poll_ports);
+}
+
+static void
+ka_register_datapath_threads(struct dp_netdev *dp)
+{
+int ka_init = get_ka_init_status();
+VLOG_DBG("Keepalive: Was initialization successful? [%s]",
+ka_init ? "Success" : "Failure");
+if (!ka_init) {
+return;
+}
+
+ka_thread_start(dp);
+
+struct dp_netdev_pmd_thread *pmd;
+CMAP_FOR_EACH (pmd, node, >poll_threads) {
+/* Skip PMD thread with no rxqs mapping. */
+if (OVS_UNLIKELY(!hmap_count(>poll_list))) {
+continue;
+}
+
+/*  Register only PMD threads. */
+if (pmd->core_id != NON_PMD_CORE_ID) {
+int err;
+int nports;
+pmd_num_poll_ports(pmd, );
+err = ka_alloc_portstats(pmd->core_id, nports);
+if (err) {
+VLOG_FATAL("Unable to allocate memory for PMD core %d",
+pmd->core_id);
+return;
+}
+
+int tid = ka_get_pmd_tid(pmd->core_id);
+ka_register_thread(tid, true);
+VLOG_DBG("Registered PMD thread [%d] on Core [%d] to KA framework",
+  tid, pmd->core_id);
+}
+}
+}
+
 static void
 dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
  void *aux)
@@ -3541,6 +3632,9 @@ reconfigure_datapath(struct dp_netdev *dp)
 
 /* Reload affected pmd threads. */
 reload_affected_pmds(dp);
+
+/* Register datapath threads to KA monitoring. */
+ka_register_datapath_threads(dp);
 }
 
 /* Returns true if one of the netdevs in 'dp' requires a reconfiguration */
@@ -3740,6 +3834,9 @@ reload:
poll_list[i].port_no);
 }
 
+/* Mark PMD thread alive. */
+ka_mark_pmd_thread_alive();
+
 if (lc++ > 1024) {
 bool reload;
 
@@ -3770,6 +3867,9 @@ reload:
 goto reload;
 }
 
+int tid = ka_get_pmd_tid(pmd->core_id);
+ka_unregister_thread(tid, true);
+
 free(poll_list);
 pmd_free_cached_ports(pmd);
 return NULL;
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 54faf49..64ab117 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -25,6 +25,7 @@
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
 #include "openvswitch/vlog.h"
+#include "process.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
 
@@ -76,21 +77,77 @@ ka_store_pmd_id(unsigned core_idx)
 }
 }
 
-/* Register packet processing PMD thread to KA framework. */
+/* Register thread to KA framework. */
 void
-ka_register_pmd_thread(int tid OVS_UNUSED, unsigned core_id)
+ka_register_thread(int tid, bool 

[ovs-dev] [RFC PATCH v3 07/18] dpif-netdev: Enable heartbeats for DPDK datapath.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds heartbeat mechanism support for DPDK datapath. Heartbeats
are sent to registered PMD threads at predefined intervals (as set in ovsdb
with 'keepalive-interval').

The heartbeats are only enabled when there is atleast one port added to
the bridge and with active PMD thread polling the port.

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/dpdk-stub.c   | 6 ++
 lib/dpdk.c| 7 +++
 lib/dpdk.h| 2 ++
 lib/dpif-netdev.c | 9 -
 lib/keepalive.c   | 9 +
 lib/keepalive.h   | 1 +
 6 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index d7fb19b..bf7b891 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -72,3 +72,9 @@ dpdk_mark_pmd_core_sleep(void)
 {
 /* Nothing */
 }
+
+void
+dpdk_dispatch_pmd_hb(void)
+{
+/* Nothing */
+}
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 917ef58..231d045 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -537,3 +537,10 @@ dpdk_mark_pmd_core_sleep(void)
 {
 rte_keepalive_mark_sleep(rte_global_keepalive_info);
 }
+
+/* Dispatch pings */
+void
+dpdk_dispatch_pmd_hb(void)
+{
+rte_keepalive_dispatch_pings(NULL, rte_global_keepalive_info);
+}
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 177624d..9fb438d 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -48,4 +48,6 @@ void dpdk_unregister_pmd_core(unsigned core_id);
 void dpdk_mark_pmd_core_alive(void);
 void dpdk_mark_pmd_core_sleep(void);
 
+void dpdk_dispatch_pmd_hb(void);
+
 #endif /* dpdk.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 4b7c835..cf4d68c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -972,11 +972,18 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 }
 
 static void *
-ovs_keepalive(void *f_ OVS_UNUSED)
+ovs_keepalive(void *f_)
 {
+struct dp_netdev *dp = f_;
+
 pthread_detach(pthread_self());
 
 for (;;) {
+int n_pmds = cmap_count(>poll_threads) - 1;
+if (n_pmds > 0) {
+dispatch_heartbeats();
+}
+
 ovsrcu_quiesce_start();
 usleep(get_ka_interval() * 1000);
 ovsrcu_quiesce_end();
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 64ab117..353f1d1 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -239,6 +239,15 @@ ka_destroy_portstats(void)
 }
 }
 
+/* Dispatch pings */
+void
+dispatch_heartbeats(void)
+{
+#ifdef DPDK_NETDEV
+dpdk_dispatch_pmd_hb();
+#endif
+}
+
 static struct keepalive_info *
 keepalive_info_create(void)
 {
diff --git a/lib/keepalive.h b/lib/keepalive.h
index f1e232d..cfe02e5 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -102,4 +102,5 @@ int ka_get_pmd_tid(unsigned core);
 int ka_alloc_portstats(unsigned, int);
 void ka_destroy_portstats(void);
 
+void dispatch_heartbeats(void);
 #endif /* keepalive.h */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 05/18] dpif-netdev: Add helper function to store datapath tids.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds an API to store the PMD thread ids in to KA info struct.
The thread ids shall be used to check false positives and for status and
statistics reporting.

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/dpif-netdev.c |  3 +++
 lib/keepalive.c   | 13 +
 lib/keepalive.h   |  1 +
 3 files changed, 17 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2b65dc7..ce141e8 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -48,6 +48,7 @@
 #include "fat-rwlock.h"
 #include "flow.h"
 #include "hmapx.h"
+#include "keepalive.h"
 #include "latch.h"
 #include "netdev.h"
 #include "netdev-vport.h"
@@ -3708,6 +3709,8 @@ pmd_thread_main(void *f_)
 
 poll_list = NULL;
 
+ka_store_pmd_id(pmd->core_id);
+
 /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
 ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
 ovs_numa_thread_setaffinity_core(pmd->core_id);
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 747d947..54faf49 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -63,6 +63,19 @@ get_ka_init_status(void)
 return ka_init_status;
 }
 
+void
+ka_store_pmd_id(unsigned core_idx)
+{
+int tid = -1;
+#ifdef DPDK_NETDEV
+tid = rte_sys_gettid();
+#endif
+
+if (ka_is_enabled()) {
+ka_info->thread_id[core_idx] = tid;
+}
+}
+
 /* Register packet processing PMD thread to KA framework. */
 void
 ka_register_pmd_thread(int tid OVS_UNUSED, unsigned core_id)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index a35b309..67f89da 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -76,6 +76,7 @@ void ka_unregister_pmd_thread(int, unsigned);
 void ka_mark_pmd_thread_alive(void);
 void ka_mark_pmd_thread_sleep(void);
 
+void ka_store_pmd_id(unsigned core);
 uint32_t get_ka_interval(void);
 int get_ka_init_status(void);
 int ka_get_pmd_tid(unsigned core);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 04/18] keepalive: Add more helper functions to KA framework.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit introduces helper functions in 'keepalive' module that are
needed to register/unregister PMD threads to KA framework. Also
introduce APIs to mark the PMD core states.

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/keepalive.c | 49 +
 lib/keepalive.h |  9 +
 2 files changed, 58 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index 7d1c01c..747d947 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -50,6 +50,55 @@ ka_get_pmd_tid(unsigned core_idx)
 return tid;
 }
 
+/* Return the Keepalive timer interval. */
+inline uint32_t
+get_ka_interval(void)
+{
+return keepalive_timer_interval;
+}
+
+inline int
+get_ka_init_status(void)
+{
+return ka_init_status;
+}
+
+/* Register packet processing PMD thread to KA framework. */
+void
+ka_register_pmd_thread(int tid OVS_UNUSED, unsigned core_id)
+{
+if (ka_is_enabled()) {
+dpdk_register_pmd_core(core_id);
+}
+}
+
+/* Unregister packet processing PMD thread from KA framework. */
+void
+ka_unregister_pmd_thread(int tid OVS_UNUSED, unsigned core_id)
+{
+if (ka_is_enabled()) {
+dpdk_unregister_pmd_core(core_id);
+}
+}
+
+/* Mark packet processing core alive. */
+inline void
+ka_mark_pmd_thread_alive(void)
+{
+if (ka_is_enabled()) {
+dpdk_mark_pmd_core_alive();
+}
+}
+
+/* Mark packet processing core as idle. */
+inline void
+ka_mark_pmd_thread_sleep(void)
+{
+if (ka_is_enabled()) {
+dpdk_mark_pmd_core_sleep();
+}
+}
+
 void
 ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state,
 uint64_t last_alive)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index b87b66f..a35b309 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -71,4 +71,13 @@ void ka_set_pmd_state_ts(unsigned, enum keepalive_state, 
uint64_t);
 
 int ka_get_pmd_tid(unsigned core);
 bool ka_is_enabled(void);
+void ka_register_pmd_thread(int, unsigned);
+void ka_unregister_pmd_thread(int, unsigned);
+void ka_mark_pmd_thread_alive(void);
+void ka_mark_pmd_thread_sleep(void);
+
+uint32_t get_ka_interval(void);
+int get_ka_init_status(void);
+int ka_get_pmd_tid(unsigned core);
+
 #endif /* keepalive.h */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 03/18] Keepalive: Add initial keepalive support.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit introduces the initial keepalive support by adding
'keepalive' module and also helper and initialization functions
that will be invoked by later commits.

This commit adds new ovsdb column "keepalive" that shows the status
of the datapath threads. This is implemented for DPDK datapath and
only status of PMD threads is reported.

For eg:
  To enable keepalive feature.
  'ovs-vsctl --no-wait set Open_vSwitch . other_config:enable-keepalive=true'

  To set timer interval of 5000ms for monitoring packet processing
cores.
  'ovs-vsctl --no-wait set Open_vSwitch . \
 other_config:keepalive-interval="5000"

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/automake.mk|   2 +
 lib/dpdk.c |  17 +
 lib/dpdk.h |   2 +
 lib/keepalive.c| 160 +
 lib/keepalive.h|  74 +
 lib/netdev-dpdk.c  |  61 -
 lib/netdev-dpdk.h  |   5 ++
 vswitchd/bridge.c  |   5 ++
 vswitchd/vswitch.ovsschema |   8 ++-
 vswitchd/vswitch.xml   |  49 ++
 10 files changed, 380 insertions(+), 3 deletions(-)
 create mode 100644 lib/keepalive.c
 create mode 100644 lib/keepalive.h

diff --git a/lib/automake.mk b/lib/automake.mk
index 54a1032..8f6e146 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -110,6 +110,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/json.c \
lib/jsonrpc.c \
lib/jsonrpc.h \
+   lib/keepalive.c \
+   lib/keepalive.h \
lib/lacp.c \
lib/lacp.h \
lib/latch.h \
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 8db63bf..917ef58 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -32,6 +32,7 @@
 
 #include "dirs.h"
 #include "fatal-signal.h"
+#include "keepalive.h"
 #include "netdev-dpdk.h"
 #include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
@@ -477,6 +478,22 @@ dpdk_init(const struct smap *ovs_other_config)
 }
 }
 
+int
+dpdk_ka_init(struct keepalive_info *ka_info)
+{
+/* Initialize keepalive subsystem */
+if ((rte_global_keepalive_info =
+rte_keepalive_create(_failcore_cb, ka_info)) == NULL) {
+VLOG_ERR("Keepalive initialization failed.");
+return -1;
+} else {
+rte_keepalive_register_relay_callback(rte_global_keepalive_info,
+dpdk_ka_update_core_state, ka_info);
+}
+
+return 0;
+}
+
 const char *
 dpdk_get_vhost_sock_dir(void)
 {
diff --git a/lib/dpdk.h b/lib/dpdk.h
index bdbb51b..177624d 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -34,9 +34,11 @@
 #endif /* DPDK_NETDEV */
 
 struct smap;
+struct keepalive_info;
 
 struct rte_keepalive *rte_global_keepalive_info;
 void dpdk_init(const struct smap *ovs_other_config);
+int dpdk_ka_init(struct keepalive_info *ka_info);
 void dpdk_set_lcore_id(unsigned cpu);
 const char *dpdk_get_vhost_sock_dir(void);
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
new file mode 100644
index 000..7d1c01c
--- /dev/null
+++ b/lib/keepalive.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "dpdk.h"
+#include "keepalive.h"
+#include "lib/vswitch-idl.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(keepalive);
+
+static bool keepalive_enable = false;/* Keepalive disabled by default */
+static bool ka_init_status = ka_init_failure; /* Keepalive initialization */
+static uint32_t keepalive_timer_interval; /* keepalive timer interval */
+static struct keepalive_info *ka_info = NULL;
+
+inline bool
+ka_is_enabled(void)
+{
+return keepalive_enable;
+}
+
+inline int
+ka_get_pmd_tid(unsigned core_idx)
+{
+int tid = -1;
+if (ka_is_enabled()) {
+tid = ka_info->thread_id[core_idx];
+}
+ovs_assert(tid > 0);
+return tid;
+}
+
+void
+ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state,
+uint64_t last_alive)
+{
+struct ka_process_info *pinfo;
+int tid = ka_get_pmd_tid(core_id);
+
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info->process_list) {
+if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+pinfo->core_state = state;
+pinfo->core_last_seen_times = 

[ovs-dev] [RFC PATCH v3 01/18] dpdk: Add helper functions for DPDK datapath keepalive.

2017-06-18 Thread Bhanuprakash Bodireddy
Introduce helper functions in 'dpdk' module that are needed for
DPDK keepalive functionality. Also add dummy functions in 'dpdk-stub' module
that are needed when DPDK datapath is not available.

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/dpdk-stub.c | 24 
 lib/dpdk.c  | 31 +++
 lib/dpdk.h  | 10 ++
 3 files changed, 65 insertions(+)

diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index daef729..d7fb19b 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -48,3 +48,27 @@ dpdk_get_vhost_sock_dir(void)
 {
 return NULL;
 }
+
+void
+dpdk_register_pmd_core(unsigned core_id OVS_UNUSED)
+{
+/* Nothing */
+}
+
+void
+dpdk_unregister_pmd_core(unsigned core_id OVS_UNUSED)
+{
+/* Nothing */
+}
+
+void
+dpdk_mark_pmd_core_alive(void)
+{
+/* Nothing */
+}
+
+void
+dpdk_mark_pmd_core_sleep(void)
+{
+/* Nothing */
+}
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 8da6c32..8db63bf 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #ifdef DPDK_PDUMP
@@ -489,3 +490,33 @@ dpdk_set_lcore_id(unsigned cpu)
 ovs_assert(cpu != NON_PMD_CORE_ID);
 RTE_PER_LCORE(_lcore_id) = cpu;
 }
+
+/* Register packet processing core 'core_id' for liveness checks. */
+void
+dpdk_register_pmd_core(unsigned core)
+{
+rte_keepalive_register_core(rte_global_keepalive_info, core);
+}
+
+void
+dpdk_unregister_pmd_core(unsigned core OVS_UNUSED)
+{
+/* XXX: DPDK unfortunately hasn't implemented unregister API
+ * This will be fixed later, instead use sleep API now.
+ */
+rte_keepalive_mark_sleep(rte_global_keepalive_info);
+}
+
+/* Mark packet processing core alive. */
+void
+dpdk_mark_pmd_core_alive(void)
+{
+rte_keepalive_mark_alive(rte_global_keepalive_info);
+}
+
+/* Mark packet processing core as idle. */
+void
+dpdk_mark_pmd_core_sleep(void)
+{
+rte_keepalive_mark_sleep(rte_global_keepalive_info);
+}
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 673a1f1..bdbb51b 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -17,6 +17,7 @@
 #ifndef DPDK_H
 #define DPDK_H
 
+#include 
 #ifdef DPDK_NETDEV
 
 #include 
@@ -26,14 +27,23 @@
 
 #else
 
+#include 
+
 #define NON_PMD_CORE_ID UINT32_MAX
 
 #endif /* DPDK_NETDEV */
 
 struct smap;
 
+struct rte_keepalive *rte_global_keepalive_info;
 void dpdk_init(const struct smap *ovs_other_config);
 void dpdk_set_lcore_id(unsigned cpu);
 const char *dpdk_get_vhost_sock_dir(void);
 
+/* Keepalive APIs */
+void dpdk_register_pmd_core(unsigned core_id);
+void dpdk_unregister_pmd_core(unsigned core_id);
+void dpdk_mark_pmd_core_alive(void);
+void dpdk_mark_pmd_core_sleep(void);
+
 #endif /* dpdk.h */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 02/18] process: Add helper functions to retrieve process related info.

2017-06-18 Thread Bhanuprakash Bodireddy
Implement helper functions to retrieve the process status, name and last
core the process was scheduled. The APIs will be used by keepalive monitoring
framework in future commits.

Signed-off-by: Bhanuprakash Bodireddy 
---
 lib/process.c | 152 ++
 lib/process.h |  13 +
 2 files changed, 165 insertions(+)

diff --git a/lib/process.c b/lib/process.c
index e9d0ba9..4c029c1 100644
--- a/lib/process.c
+++ b/lib/process.c
@@ -50,6 +50,20 @@ struct process {
 int status;
 };
 
+struct pstate2Num {
+char *pidstate;
+int num;
+};
+
+const struct pstate2Num pstate_map[] = {
+{ "S", STOPPED_STATE },
+{ "R", ACTIVE_STATE },
+{ "t", TRACED_STATE },
+{ "Z", DEFUNC_STATE },
+{ "D", UNINTERRUPTIBLE_SLEEP_STATE },
+{ "NULL", UNUSED_STATE },
+};
+
 /* Pipe used to signal child termination. */
 static int fds[2];
 
@@ -390,6 +404,144 @@ process_run(void)
 #endif
 }
 
+int
+get_process_status(int pid, int *pstate)
+{
+#ifdef __linux__
+static char process_name[20];
+FILE *stream;
+char line[75];
+char Name[15], value[5], status[20];
+int i, ln;
+
+snprintf(process_name, sizeof(process_name),
+ "/proc/%d/status", pid);
+stream = fopen(process_name, "r");
+if (stream == NULL) {
+VLOG_WARN_ONCE("%s: open failed: %s", process_name,
+ovs_strerror(errno));
+return errno;
+}
+
+ln=0;
+while (fgets(line, sizeof line, stream)) {
+if (!ovs_scan(line,
+  "%6s %2s %14s\n",
+   Name, value, status)) {
+VLOG_WARN_ONCE("%s: could not parse line %d: %s",
+process_name, ln, line);
+continue;
+}
+if (!strcmp(Name, "State:")) {
+for (i=0; pstate_map[i].pidstate != NULL; i++) {
+if (strcmp(pstate_map[i].pidstate, value) == 0) {
+VLOG_WARN_ONCE("The state is %s, status is %d\n",
+pstate_map[i].pidstate, pstate_map[i].num);
+*pstate = pstate_map[i].num;
+break;
+}
+}
+break;
+}
+ln++;
+   }
+   return 0;
+#else
+   return ENOSYS;
+#endif
+}
+
+bool
+process_is_active(int pid)
+{
+#ifdef __linux__
+int pstate;
+int err = get_process_status(pid, );
+if (!err) {
+if (pstate == ACTIVE_STATE) {
+return true;
+}
+}
+return false;
+#else
+   return false;
+#endif
+}
+
+char *
+get_process_name(int pid)
+{
+#ifdef __linux__
+static char proc_path[PATH_MAX];
+FILE *stream;
+char line[20];
+char *pname = xmalloc(20);
+
+if (pid == -1) {
+   VLOG_ERR("Invalid process id : %d", pid);
+   return NULL;
+}
+
+snprintf(proc_path, sizeof(proc_path),
+ "/proc/%d/task/%d/comm", pid, pid);
+stream = fopen(proc_path, "r");
+if (!stream) {
+VLOG_WARN("%s: open failed: %s", proc_path, ovs_strerror(errno));
+return NULL;
+}
+
+if (fgets(line, sizeof line, stream) != NULL) {
+if (ovs_scan(line, "%s", pname)) {
+return pname;
+}
+}
+return NULL;
+#else
+return NULL;
+#endif
+}
+
+/* Retrieve the last core id that executed the process.
+ *
+ * Refer http://man7.org/linux/man-pages/man5/proc.5.html
+ * and the processor field in /proc/[pid]/stat.
+ */
+int
+get_cpu_num(int pid)
+{
+#ifdef __linux__
+static char proc_path[PATH_MAX];
+FILE *stream;
+char line[500];
+
+snprintf(proc_path, sizeof(proc_path),
+ "/proc/%d/stat", pid);
+stream = fopen(proc_path, "r");
+if (!stream) {
+VLOG_WARN_ONCE("%s: open failed: %s", proc_path, ovs_strerror(errno));
+return errno;
+}
+
+int i;
+int cpu_id = -1;
+if (fgets(line, sizeof line, stream) != NULL) {
+char *tok, *endptr = NULL;
+for (tok = strtok_r(line, " ", ), i = 1; tok != NULL;
+tok = strtok_r(NULL, " ", ), i++) {
+VLOG_DBG("token :%s", tok);
+if (i == 39) {
+cpu_id = atoi(tok);
+break;
+}
+}
+}
+
+ovs_assert(cpu_id >= 0)
+return cpu_id;
+#else
+return ENOSYS;
+#endif
+}
 
 /* Causes the next call to poll_block() to wake up when process 'p' has
  * exited. */
diff --git a/lib/process.h b/lib/process.h
index 3feac7e..041767d 100644
--- a/lib/process.h
+++ b/lib/process.h
@@ -20,6 +20,15 @@
 #include 
 #include 
 
+enum process_states {
+UNUSED_STATE,
+STOPPED_STATE,
+ACTIVE_STATE,
+TRACED_STATE,
+DEFUNC_STATE,
+UNINTERRUPTIBLE_SLEEP_STATE
+};
+
 struct process;
 
 /* Starting and monitoring subprocesses.
@@ -38,6 +47,10 @@ bool process_exited(struct process *);
 int process_status(const struct process *);
 void process_run(void);
 void process_wait(struct process *);
+int 

[ovs-dev] [RFC PATCH v3 00/18] Add OVS DPDK keep-alive functionality

2017-06-18 Thread Bhanuprakash Bodireddy
Keepalive feature is aimed at achieving Fastpath Service Assurance
in OVS-DPDK deployments. It adds support for monitoring the packet
processing cores(PMD thread cores) by dispatching heartbeats at regular
intervals. Incase of heartbeat misses additional health checks are
enabled on the PMD thread to detect the failure and the same shall be
reported to higher level fault management systems/frameworks.

The implementation uses OVSDB for reporting the datapath status and the
health of the PMD threads. Any external monitoring application can read
the status from OVSDB at regular intervals (or) subscribe to the updates
in OVSDB so that they get notified when the changes happen on OVSDB.

keepalive info struct is created and initialized for storing the
status of the PMD threads. This is initialized by main thread(vswitchd)
as part of init process and will be periodically updated by 'keepalive'
thread. keepalive feature can be enabled through below OVSDB settings.

enable-keepalive=true
  - Keepalive feature is disabled by default.

keepalive-interval="5000"
  - Timer interval in milliseconds for monitoring the packet
processing cores.

When KA is enabled, 'ovs-keepalive' thread shall be spawned that wakes
up at regular intervals to update the timestamp and status of pmd cores
in keepalive info struct. This information shall be read by vswitchd thread
and write the status in to 'keepalive' column of Open_vSwitch table in OVSDB.

An external monitoring framework like collectd with ovs events support
can read (or) subscribe to the datapath status changes in ovsdb. When the state
is updated, the collectd shall be notified and will eventually relay the status
to ceilometer service running in the controller. Below is the high level
overview of deployment model.

Compute NodeControllerCompute Node

Collectd  <--> Ceilometer <>   Collectd

OvS DPDK   OvS DPDK

+-+
| VM  |
+--+--+
\---+---/
|
+--+---+   ++--+ +--+---+
| OVS  |-> |   ovsevents plugin| --> |   collectd   |
+--+---+   ++--+ +--+---+

+--+-+ +---++ |
| Ceilometer | <-- | collectd ceilometer plugin |  <---
+--+-+ +---++

Performance impact:
  No noticeable performance or latency impact is observed with
  KA feature enabled.

-
v2-> v3
  * Remove POSIX shared memory block implementation (suggested by Aaron).
  * Rework the logic to register and track threads instead of cores. This way
in the future any thread can be registered to KA framework. For now only PMD
threads are tracked (suggested by Aaron).
  * Refactor few APIs and further clean up the code.
   
v1-> v2
  * Merged the xml and schema commits to later commit where the actual
implementation is done(suggested by Ben).
  * Fix ovs-appctl keepalive/* hang issue when KA disabled.
  * Fixed memory leaks with appctl commands for keepalive/pmd-health-show,
pmd-xstats-show.
  * Refactored code and fixed APIs dealing with PMD health monitoring.

Bhanuprakash Bodireddy (18):
[9] patches help update OVSDB with keepalive status

  dpdk: Add helper functions for DPDK datapath keepalive.
  process: Add helper functions to retrieve process related info.
  Keepalive: Add initial keepalive support.
  keepalive: Add more helper functions to KA framework.
  dpif-netdev: Add helper function to store datapath tids.
  dpif-netdev: Register packet processing cores to KA framework.
  dpif-netdev: Enable heartbeats for DPDK datapath.
  keepalive: Retrieve PMD status periodically.
  bridge: Update keepalive status in OVSDB

  keepalive: Add support to query keepalive statistics.
  keepalive: Add support to query keepalive status.

[5] Patches add additional health checks in case of heartbeat failure.

  dpif-netdev: Add additional datapath health checks.
  keepalive: Check the link status as part of PMD health checks.
  keepalive: Check the packet statistics as part of PMD health checks.
  keepalive: Check the PMD cycle stats as part of PMD health checks.
  netdev-dpdk: Enable PMD health checks on heartbeat failure.
  keepalive: Display extended Keepalive status.
  Documentation: Update DPDK doc with Keepalive feature.

 Documentation/howto/dpdk.rst |  90 +
 lib/automake.mk  |   2 +
 lib/dpdk-stub.c  |  30 ++
 lib/dpdk.c   |  55 +++
 lib/dpdk.h   |  14 +
 lib/dpif-netdev.c| 203 +-
 lib/dpif-netdev.h|   6 +
 lib/keepalive.c  | 917 +++
 lib/keepalive.h  | 150 +++
 lib/netdev-dpdk.c| 119 +-
 lib/netdev-dpdk.h|   5 +
 lib/process.c| 152 +++