Hi Mark, Thanks for splitting the clean up, this looks better. comment below.
On Tue, 16 Feb 2016 11:31:48 +0000 Mark Kavanagh <[email protected]> wrote: > Current mbuf initialization relies on magic numbers and does not > accomodate mbufs of different sizes. > > Resolve this issue by ensuring that mbufs are always aligned to a 1k > boundary (a typical DPDK NIC Rx buffer alignment). > > Signed-off-by: Mark Kavanagh <[email protected]> > --- > lib/netdev-dpdk.c | 51 ++++++++++++++++++++++++++++++++++++--------------- > 1 file changed, 36 insertions(+), 15 deletions(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index e4f789b..5bcd36d 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -69,14 +69,14 @@ static struct vlog_rate_limit rl = > VLOG_RATE_LIMIT_INIT(5, 20); > * The minimum mbuf size is limited to avoid scatter behaviour and drop in > * performance for standard Ethernet MTU. > */ > -#define MTU_TO_MAX_LEN(mtu) ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN) > -#define MBUF_SIZE_MTU(mtu) (MTU_TO_MAX_LEN(mtu) \ > - + sizeof(struct dp_packet) \ > - + RTE_PKTMBUF_HEADROOM) > -#define MBUF_SIZE_DRIVER (2048 \ > - + sizeof (struct rte_mbuf) \ > - + RTE_PKTMBUF_HEADROOM) > -#define MBUF_SIZE(mtu) MAX(MBUF_SIZE_MTU(mtu), MBUF_SIZE_DRIVER) > +#define ETHER_HDR_MAX_LEN (ETHER_HDR_LEN + ETHER_CRC_LEN + (2 * > VLAN_HEADER_LEN)) > +#define MTU_TO_FRAME_LEN(mtu) ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN) > +#define MTU_TO_MAX_FRAME_LEN(mtu) ((mtu) + ETHER_HDR_MAX_LEN) > +#define FRAME_LEN_TO_MTU(frame_len) ((frame_len)- ETHER_HDR_LEN - > ETHER_CRC_LEN) > +#define MBUF_SIZE(mtu) ( MTU_TO_MAX_FRAME_LEN(mtu) \ > + + sizeof(struct dp_packet) \ > + + RTE_PKTMBUF_HEADROOM) > +#define NETDEV_DPDK_MBUF_ALIGN 1024 > > /* Max and min number of packets in the mempool. OVS tries to allocate a > * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have > @@ -252,6 +252,22 @@ is_dpdk_class(const struct netdev_class *class) > return class->construct == netdev_dpdk_construct; > } > > +/* DPDK NIC drivers allocate RX buffers at a particular granularity, > typically > + * aligned at 1k or less. If a declared mbuf size is not a multiple of this > + * value, insufficient buffers are allocated to accomodate the packet in its > + * entirety. Furthermore, certain drivers need to ensure that there is also > + * sufficient space in the Rx buffer to accommodate two VLAN tags (for QinQ > + * frames). If the RX buffer is too small, then the driver enables scatter RX > + * behaviour, which reduces performance. To prevent this, use a buffer size > that > + * is closest to 'mtu', but which satisfies the aforementioned criteria. > + */ > +static uint32_t > +dpdk_buf_size(int mtu) > +{ > + return ROUND_UP((MTU_TO_MAX_FRAME_LEN(mtu) + RTE_PKTMBUF_HEADROOM), > + NETDEV_DPDK_MBUF_ALIGN); > +} > + > /* XXX: use dpdk malloc for entire OVS. in fact huge page should be used > * for all other segments data, bss and text. */ > > @@ -313,7 +329,7 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp, > { > struct rte_mbuf *m = _m; > > - __rte_pktmbuf_init(mp, opaque_arg, _m, i); > + rte_pktmbuf_init(mp, opaque_arg, _m, i); Nice, but at this point __rte_pktmbuf_init() is not used anymore. Although it is removed in the second patch, it makes sense to be part of this patch instead. > dp_packet_init_dpdk((struct dp_packet *) m, m->buf_len); > } > @@ -324,6 +340,7 @@ dpdk_mp_get(int socket_id, int mtu) > OVS_REQUIRES(dpdk_mutex) > struct dpdk_mp *dmp = NULL; > char mp_name[RTE_MEMPOOL_NAMESIZE]; > unsigned mp_size; > + struct rte_pktmbuf_pool_private mbp_priv; > > LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) { > if (dmp->socket_id == socket_id && dmp->mtu == mtu) { > @@ -336,6 +353,8 @@ dpdk_mp_get(int socket_id, int mtu) > OVS_REQUIRES(dpdk_mutex) > dmp->socket_id = socket_id; > dmp->mtu = mtu; > dmp->refcount = 1; > + mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct dp_packet); > + mbp_priv.mbuf_priv_size = sizeof (struct dp_packet) - sizeof (struct > rte_mbuf); > > mp_size = MAX_NB_MBUF; > do { > @@ -347,7 +366,7 @@ dpdk_mp_get(int socket_id, int mtu) > OVS_REQUIRES(dpdk_mutex) > dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu), > MP_CACHE_SZ, > sizeof(struct rte_pktmbuf_pool_private), > - rte_pktmbuf_pool_init, NULL, > + rte_pktmbuf_pool_init, &mbp_priv, > ovs_rte_pktmbuf_init, NULL, > socket_id, 0); > } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= > MIN_NB_MBUF); > @@ -584,6 +603,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int > port_no, > struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); > int sid; > int err = 0; > + uint32_t buf_size; > > ovs_mutex_init(&netdev->mutex); > ovs_mutex_lock(&netdev->mutex); > @@ -604,9 +624,10 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int > port_no, > netdev->type = type; > netdev->flags = 0; > netdev->mtu = ETHER_MTU; > - netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu); > + netdev->max_packet_len = ETHER_MAX_LEN; The value is correct but in other places we are using the macro MTU_TO_FRAME_LEN(dev->mtu). > - netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu); > + buf_size = dpdk_buf_size(netdev->mtu); > + netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, > FRAME_LEN_TO_MTU(buf_size)); This is inconsistent with netdev_dpdk_set_mtu() which calls dpdk_mp_get() passing dev->mtu directly. Shouldn't that be fixed too? > if (!netdev->dpdk_mp) { > err = ENOMEM; > goto unlock; > @@ -1440,14 +1461,14 @@ netdev_dpdk_set_mtu(const struct netdev *netdev, int > mtu) > old_mp = dev->dpdk_mp; > dev->dpdk_mp = mp; > dev->mtu = mtu; > - dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu); > + dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu); > > err = dpdk_eth_dev_init(dev); > if (err) { > dpdk_mp_put(mp); > dev->mtu = old_mtu; > dev->dpdk_mp = old_mp; > - dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu); > + dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu); > dpdk_eth_dev_init(dev); > goto out; > } > @@ -1736,7 +1757,7 @@ netdev_dpdk_get_status(const struct netdev *netdev_, > struct smap *args) > smap_add_format(args, "numa_id", "%d", > rte_eth_dev_socket_id(dev->port_id)); > smap_add_format(args, "driver_name", "%s", dev_info.driver_name); > smap_add_format(args, "min_rx_bufsize", "%u", dev_info.min_rx_bufsize); > - smap_add_format(args, "max_rx_pktlen", "%u", dev_info.max_rx_pktlen); > + smap_add_format(args, "max_rx_pktlen", "%u", dev->max_packet_len); > smap_add_format(args, "max_rx_queues", "%u", dev_info.max_rx_queues); > smap_add_format(args, "max_tx_queues", "%u", dev_info.max_tx_queues); > smap_add_format(args, "max_mac_addrs", "%u", dev_info.max_mac_addrs); Thanks, -- fbl _______________________________________________ dev mailing list [email protected] http://openvswitch.org/mailman/listinfo/dev
