> On Jun 3, 2014, at 6:11 PM, Ryan Wilson <[email protected]> wrote:
>
> When a bridge of datatype type netdev receives a packet, it copies the
> packet from the NIC to a buffer in userspace. Currently, when making
> an upcall, the packet is again copied to the upcall's buffer. However,
> this extra copy is not necessary when the datapath exists in userspace
> as the upcall can directly access the packet data.
>
> This patch eliminates this extra copy of the packet data in most cases.
> In cases where the packet may still be used later by callers of
> dp_netdev_execute_actions, making a copy of the packet data is still
> necessary.
>
> This patch also adds a header_ field to 'struct ofpbuf' when using
> DPDK. This field holds a pointer to the allocated buffer in the
> rte_mempool. Thus, an upcall packet ofpbuf allocated on the stack can
> now share data and free memory of a rte_mempool allocated ofpbuf.
>
> Signed-off-by: Ryan Wilson <[email protected]>
> Acked-by: Jarno Rajahalme <[email protected]>
> ---
> v2: Addressed Jarno's comment to use direct pointer assignment for
> upcall->packet instead of ofpbuf_set().
> v3: Addressed issues with DPDK mentioned by Pravin.
> v4: Fixed various bugs found in perf test
> ---
> lib/dpif-netdev.c | 15 +++++----------
> lib/netdev-dpdk.c | 2 +-
> lib/ofpbuf.c | 6 +++++-
> lib/ofpbuf.h | 20 ++++++++++++++------
> 4 files changed, 25 insertions(+), 18 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 91c83d6..c89ae20 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -2024,7 +2024,6 @@ dp_netdev_input(struct dp_netdev *dp, struct ofpbuf
> *packet,
> miniflow_hash_5tuple(&key.flow, 0)
> % dp->n_handlers,
> DPIF_UC_MISS, &key.flow, NULL);
> - ofpbuf_delete(packet);
> }
> }
>
> @@ -2063,7 +2062,6 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct
> ofpbuf *packet,
> if (userdata) {
> buf_size += NLA_ALIGN(userdata->nla_len);
> }
> - buf_size += ofpbuf_size(packet);
> ofpbuf_init(buf, buf_size);
>
> /* Put ODP flow. */
> @@ -2078,15 +2076,14 @@ dp_netdev_output_userspace(struct dp_netdev *dp,
> struct ofpbuf *packet,
> NLA_ALIGN(userdata->nla_len));
> }
>
> - ofpbuf_set_data(&upcall->packet,
> - ofpbuf_put(buf, ofpbuf_data(packet),
> ofpbuf_size(packet)));
> - ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet));
> + upcall->packet = *packet;
>
> seq_change(q->seq);
>
> error = 0;
> } else {
> dp_netdev_count_packet(dp, DP_STAT_LOST);
> + ofpbuf_delete(packet);
> error = ENOBUFS;
> }
> ovs_mutex_unlock(&q->mutex);
> @@ -2120,19 +2117,17 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
> break;
>
> case OVS_ACTION_ATTR_USERSPACE: {
> + struct ofpbuf *userspace_packet;
> const struct nlattr *userdata;
>
> userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
> + userspace_packet = may_steal ? packet : ofpbuf_clone(packet);
>
> - dp_netdev_output_userspace(aux->dp, packet,
> + dp_netdev_output_userspace(aux->dp, userspace_packet,
> miniflow_hash_5tuple(aux->key, 0)
> % aux->dp->n_handlers,
> DPIF_UC_ACTION, aux->key,
> userdata);
> -
> - if (may_steal) {
> - ofpbuf_delete(packet);
> - }
> break;
> }
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index ba41d2e..e48bfde 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -205,7 +205,7 @@ dpdk_rte_mzalloc(size_t sz)
> void
> free_dpdk_buf(struct ofpbuf *b)
> {
> - struct rte_mbuf *pkt = (struct rte_mbuf *) b;
> + struct rte_mbuf *pkt = (struct rte_mbuf *) ofpbuf_header(b);
>
Could use the address of the 'mbuf' member instead of a cast here?
> rte_mempool_put(pkt->pool, pkt);
> }
> diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
> index 1f4b61d..9490a1e 100644
> --- a/lib/ofpbuf.c
> +++ b/lib/ofpbuf.c
> @@ -117,6 +117,9 @@ void
> ofpbuf_init_dpdk(struct ofpbuf *b, size_t allocated)
> {
> ofpbuf_init__(b, allocated, OFPBUF_DPDK);
> +#ifdef DPDK_NETDEV
> + ofpbuf_set_header(b, b);
> +#endif
> }
>
> /* Initializes 'b' as an empty ofpbuf with an initial capacity of 'size'
> @@ -134,8 +137,9 @@ ofpbuf_uninit(struct ofpbuf *b)
> if (b) {
> if (b->source == OFPBUF_MALLOC) {
> free(ofpbuf_base(b));
> + } else if (b->source == OFPBUF_DPDK) {
> + free_dpdk_buf(b);
> }
> - ovs_assert(b->source != OFPBUF_DPDK);
> }
> }
>
> diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h
> index 85be899..54113d0 100644
> --- a/lib/ofpbuf.h
> +++ b/lib/ofpbuf.h
> @@ -59,6 +59,7 @@ enum OVS_PACKED_ENUM ofpbuf_source {
> struct ofpbuf {
> #ifdef DPDK_NETDEV
> struct rte_mbuf mbuf; /* DPDK mbuf */
> + struct ofpbuf *header_; /* First byte of allocated buffer header. */
Do we actually care that this is an ofpbuf pointer? Currently it is only used
to free the dpdk buffer, so this would be clearer:
void *dpdk_buf;
> #else
> void *base_; /* First byte of allocated space. */
> void *data_; /* First byte actually in use. */
> @@ -174,13 +175,10 @@ static inline void *ofpbuf_get_uninit_pointer(struct
> ofpbuf *b)
> static inline void ofpbuf_delete(struct ofpbuf *b)
> {
> if (b) {
> - if (b->source == OFPBUF_DPDK) {
> - free_dpdk_buf(b);
> - return;
> - }
> -
> ofpbuf_uninit(b);
> - free(b);
> + if (b->source != OFPBUF_DPDK) {
> + free(b);
> + }
> }
> }
>
> @@ -386,6 +384,16 @@ static inline void ofpbuf_set_size(struct ofpbuf *b,
> uint32_t v)
> * this segment. */
> }
>
> +static inline struct ofpbuf * ofpbuf_header(const struct ofpbuf *b)
> +{
> + return b->header_;
> +}
> +
> +static inline void ofpbuf_set_header(struct ofpbuf *b, struct ofpbuf *h)
> +{
> + b->header_ = h;
> +}
> +
This abstraction is not necessary, if the pointer only exists for DPDK. Just
use the pointer directly instead.
> #else
> static inline void * ofpbuf_data(const struct ofpbuf *b)
> {
> --
> 1.7.9.5
>
> _______________________________________________
> dev mailing list
> [email protected]
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev