DPDK v1.8.0 makes significant changes to struct rte_mbuf, including removal of the 'pkt' and 'data' fields. The latter, formally a pointer, is now calculated via an offset from the start of the segment buffer. These fields are referenced by OVS when accessing the data section of an ofpbuf.
The following changes are required to add support for DPDK 1.8: - update affected functions to use the correct rte_mbuf fields - remove init function from netdev-dpdk (no longer required as rte_eal_pci_probe is now invoked from eal_init) - split large amounts of data across multiple ofpbufs; with the removal of the mbuf's 'data' pointer, and replacement with a 'data_off' field, it is necessary to limit the size of data contained in an ofpbuf to UINT16_MAX when mbufs are used (data_off and data_len are both of type uint16_t). Were data not split across multiple ofpbufs, values larger than UINT16_MAX for 'data_len' and 'data_off' would result in wrap-around, and consequently, data corruption. Changes introduced in this patch prevent this from occurring. Signed-off-by: Mark Kavanagh <mark.b.kavan...@intel.com> Signed-off-by: Mark Gray <mark.d.g...@intel.com> Signed-off-by: Rory Sexton <rory.sex...@intel.com> --- lib/jsonrpc.c | 27 +++++++++++++++++++-------- lib/netdev-dpdk.c | 31 +++++++++---------------------- lib/ofpbuf.c | 4 +++- lib/ofpbuf.h | 35 +++++++++++++++++++++++++++++------ lib/packet-dpif.h | 4 ++-- 5 files changed, 62 insertions(+), 39 deletions(-) diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c index f15adca..7bbdc22 100644 --- a/lib/jsonrpc.c +++ b/lib/jsonrpc.c @@ -238,10 +238,10 @@ jsonrpc_log_msg(const struct jsonrpc *rpc, const char *title, int jsonrpc_send(struct jsonrpc *rpc, struct jsonrpc_msg *msg) { - struct ofpbuf *buf; struct json *json; size_t length; char *s; + size_t remaining; if (rpc->status) { jsonrpc_msg_destroy(msg); @@ -252,15 +252,26 @@ jsonrpc_send(struct jsonrpc *rpc, struct jsonrpc_msg *msg) json = jsonrpc_msg_to_json(msg); s = json_to_string(json, 0); - length = strlen(s); + remaining = length = strlen(s); json_destroy(json); - buf = xmalloc(sizeof *buf); - ofpbuf_use(buf, s, length); - ofpbuf_set_size(buf, length); - list_push_back(&rpc->output, &buf->list_node); - rpc->output_count++; - rpc->backlog += length; + /* Large (i.e. > OFPBUF_DATA_MAX) strings must be split across multiple + * ofpbufs to prevent data corruption. This is largely applicable when DPDK + * mbufs are used, since the 'data_off' and 'data_len' fields are of type + * uint16_t, and subject to wrap-around if the amount of data destined for + * the containing ofpbuf exceeds UINT16_MAX. + */ + while (remaining) { + size_t segment_size; + segment_size = remaining > OFPBUF_DATA_MAX ? OFPBUF_DATA_MAX : remaining; + + struct ofpbuf *new_buf = ofpbuf_clone_data((char *)s + length - remaining, segment_size); + list_push_back(&rpc->output, &new_buf->list_node); + + rpc->output_count++; + rpc->backlog += segment_size; + remaining -= segment_size; + } if (rpc->output_count >= 50) { VLOG_INFO_RL(&rl, "excessive sending backlog, jsonrpc: %s, num of" diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 0ede200..d4f859a 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -28,6 +28,9 @@ #include <unistd.h> #include <stdio.h> +#include <rte_config.h> +#include <rte_mbuf.h> + #include "dpif-netdev.h" #include "list.h" #include "netdev-dpdk.h" @@ -265,13 +268,12 @@ __rte_pktmbuf_init(struct rte_mempool *mp, m->buf_len = (uint16_t)buf_len; /* keep some headroom between start of buffer and data */ - m->pkt.data = (char*) m->buf_addr + RTE_MIN(RTE_PKTMBUF_HEADROOM, m->buf_len); + m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, m->buf_len); /* init some constant fields */ - m->type = RTE_MBUF_PKT; m->pool = mp; - m->pkt.nb_segs = 1; - m->pkt.in_port = 0xff; + m->nb_segs = 1; + m->port = 0xff; } static void @@ -825,7 +827,8 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dpif_packet ** pkts, } /* We have to do a copy for now */ - memcpy(mbufs[newcnt]->pkt.data, ofpbuf_data(&pkts[i]->ofpbuf), size); + memcpy(rte_pktmbuf_mtod(mbufs[newcnt], char *), + ofpbuf_data(&pkts[i]->ofpbuf), size); rte_pktmbuf_data_len(mbufs[newcnt]) = size; rte_pktmbuf_pkt_len(mbufs[newcnt]) = size; @@ -1270,22 +1273,6 @@ dpdk_common_init(void) ovs_thread_create("dpdk_watchdog", dpdk_watchdog, NULL); } -static int -dpdk_class_init(void) -{ - int result; - - result = rte_eal_pci_probe(); - if (result) { - VLOG_ERR("Cannot probe PCI"); - return -result; - } - - VLOG_INFO("Ethernet Device Count: %d", (int)rte_eth_dev_count()); - - return 0; -} - /* Client Rings */ static int @@ -1510,7 +1497,7 @@ dpdk_init(int argc, char **argv) const struct netdev_class dpdk_class = NETDEV_DPDK_CLASS( "dpdk", - dpdk_class_init, + NULL, netdev_dpdk_construct, netdev_dpdk_set_multiq, netdev_dpdk_eth_send); diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c index 4946e6f..6e9e17f 100644 --- a/lib/ofpbuf.c +++ b/lib/ofpbuf.c @@ -280,7 +280,6 @@ ofpbuf_resize__(struct ofpbuf *b, size_t new_headroom, size_t new_tailroom) } b->allocated = new_allocated; - ofpbuf_set_base(b, new_base); new_data = (char *) new_base + new_headroom; if (ofpbuf_data(b) != new_data) { @@ -289,7 +288,10 @@ ofpbuf_resize__(struct ofpbuf *b, size_t new_headroom, size_t new_tailroom) b->frame = (char *) b->frame + data_delta; } + ofpbuf_set_base(b, new_base); ofpbuf_set_data(b, new_data); + } else { + ofpbuf_set_base(b, new_base); } } diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h index 4e7038d0..ef0c319 100644 --- a/lib/ofpbuf.h +++ b/lib/ofpbuf.h @@ -19,6 +19,11 @@ #include <stddef.h> #include <stdint.h> + +#ifdef DPDK_NETDEV +#include <rte_common.h> +#endif + #include "list.h" #include "packets.h" #include "util.h" @@ -28,6 +33,12 @@ extern "C" { #endif +#ifdef DPDK_NETDEV + #define OFPBUF_DATA_MAX UINT16_MAX +#else + #define OFPBUF_DATA_MAX UINT32_MAX +#endif + enum OVS_PACKED_ENUM ofpbuf_source { OFPBUF_MALLOC, /* Obtained via malloc(). */ OFPBUF_STACK, /* Un-movable stack space or static buffer. */ @@ -386,12 +397,23 @@ BUILD_ASSERT_DECL(offsetof(struct ofpbuf, mbuf) == 0); static inline void * ofpbuf_data(const struct ofpbuf *b) { - return b->mbuf.pkt.data; + return rte_pktmbuf_mtod(&(b->mbuf), void *); } static inline void ofpbuf_set_data(struct ofpbuf *b, void *d) { - b->mbuf.pkt.data = d; + uintptr_t data_delta; + + /* NULL 'd' value is valid */ + if (unlikely(d == NULL)) { + b->mbuf.data_off = 0; + } else { + ovs_assert(d >= b->mbuf.buf_addr); + /* Work out the offset between the start of segment buffer and 'd' */ + data_delta = RTE_PTR_DIFF(d, b->mbuf.buf_addr); + ovs_assert(data_delta <= OFPBUF_DATA_MAX); + b->mbuf.data_off = data_delta; + } } static inline void * ofpbuf_base(const struct ofpbuf *b) @@ -406,14 +428,15 @@ static inline void ofpbuf_set_base(struct ofpbuf *b, void *d) static inline uint32_t ofpbuf_size(const struct ofpbuf *b) { - return b->mbuf.pkt.pkt_len; + return b->mbuf.pkt_len; } static inline void ofpbuf_set_size(struct ofpbuf *b, uint32_t v) { - b->mbuf.pkt.data_len = v; /* Current seg length. */ - b->mbuf.pkt.pkt_len = v; /* Total length of all segments linked to - * this segment. */ + ovs_assert(v <= OFPBUF_DATA_MAX); + b->mbuf.data_len = v; /* Current seg length. */ + b->mbuf.pkt_len = v; /* Total length of all segments linked to + * this segment. */ } #else diff --git a/lib/packet-dpif.h b/lib/packet-dpif.h index 1a5efb6..692a81a 100644 --- a/lib/packet-dpif.h +++ b/lib/packet-dpif.h @@ -50,7 +50,7 @@ static inline void dpif_packet_delete(struct dpif_packet *p) static inline uint32_t dpif_packet_get_dp_hash(struct dpif_packet *p) { #ifdef DPDK_NETDEV - return p->ofpbuf.mbuf.pkt.hash.rss; + return p->ofpbuf.mbuf.hash.rss; #else return p->dp_hash; #endif @@ -60,7 +60,7 @@ static inline void dpif_packet_set_dp_hash(struct dpif_packet *p, uint32_t hash) { #ifdef DPDK_NETDEV - p->ofpbuf.mbuf.pkt.hash.rss = hash; + p->ofpbuf.mbuf.hash.rss = hash; #else p->dp_hash = hash; #endif -- 1.7.4.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev