>
>Hi Mark,
>
>Thanks for splitting the clean up, this looks better.
>comment below.
>
>
>On Tue, 16 Feb 2016 11:31:48 +0000
>Mark Kavanagh <[email protected]> wrote:
>
>> Current mbuf initialization relies on magic numbers and does not
>> accomodate mbufs of different sizes.
>>
>> Resolve this issue by ensuring that mbufs are always aligned to a 1k
>> boundary (a typical DPDK NIC Rx buffer alignment).
>>
>> Signed-off-by: Mark Kavanagh <[email protected]>
>> ---
>> lib/netdev-dpdk.c | 51 ++++++++++++++++++++++++++++++++++++---------------
>> 1 file changed, 36 insertions(+), 15 deletions(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index e4f789b..5bcd36d 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -69,14 +69,14 @@ static struct vlog_rate_limit rl =
>> VLOG_RATE_LIMIT_INIT(5, 20);
>> * The minimum mbuf size is limited to avoid scatter behaviour and drop in
>> * performance for standard Ethernet MTU.
>> */
>> -#define MTU_TO_MAX_LEN(mtu) ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN)
>> -#define MBUF_SIZE_MTU(mtu) (MTU_TO_MAX_LEN(mtu) \
>> - + sizeof(struct dp_packet) \
>> - + RTE_PKTMBUF_HEADROOM)
>> -#define MBUF_SIZE_DRIVER (2048 \
>> - + sizeof (struct rte_mbuf) \
>> - + RTE_PKTMBUF_HEADROOM)
>> -#define MBUF_SIZE(mtu) MAX(MBUF_SIZE_MTU(mtu), MBUF_SIZE_DRIVER)
>> +#define ETHER_HDR_MAX_LEN (ETHER_HDR_LEN + ETHER_CRC_LEN + (2 *
>VLAN_HEADER_LEN))
>> +#define MTU_TO_FRAME_LEN(mtu) ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN)
>> +#define MTU_TO_MAX_FRAME_LEN(mtu) ((mtu) + ETHER_HDR_MAX_LEN)
>> +#define FRAME_LEN_TO_MTU(frame_len) ((frame_len)- ETHER_HDR_LEN -
>> ETHER_CRC_LEN)
>> +#define MBUF_SIZE(mtu) ( MTU_TO_MAX_FRAME_LEN(mtu) \
>> + + sizeof(struct dp_packet) \
>> + + RTE_PKTMBUF_HEADROOM)
>> +#define NETDEV_DPDK_MBUF_ALIGN 1024
>>
>> /* Max and min number of packets in the mempool. OVS tries to allocate a
>> * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have
>> @@ -252,6 +252,22 @@ is_dpdk_class(const struct netdev_class *class)
>> return class->construct == netdev_dpdk_construct;
>> }
>>
>> +/* DPDK NIC drivers allocate RX buffers at a particular granularity,
>> typically
>> + * aligned at 1k or less. If a declared mbuf size is not a multiple of this
>> + * value, insufficient buffers are allocated to accomodate the packet in its
>> + * entirety. Furthermore, certain drivers need to ensure that there is also
>> + * sufficient space in the Rx buffer to accommodate two VLAN tags (for QinQ
>> + * frames). If the RX buffer is too small, then the driver enables scatter
>> RX
>> + * behaviour, which reduces performance. To prevent this, use a buffer size
>> that
>> + * is closest to 'mtu', but which satisfies the aforementioned criteria.
>> + */
>> +static uint32_t
>> +dpdk_buf_size(int mtu)
>> +{
>> + return ROUND_UP((MTU_TO_MAX_FRAME_LEN(mtu) + RTE_PKTMBUF_HEADROOM),
>> + NETDEV_DPDK_MBUF_ALIGN);
>> +}
>> +
>> /* XXX: use dpdk malloc for entire OVS. in fact huge page should be used
>> * for all other segments data, bss and text. */
>>
>> @@ -313,7 +329,7 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp,
>> {
>> struct rte_mbuf *m = _m;
>>
>> - __rte_pktmbuf_init(mp, opaque_arg, _m, i);
>> + rte_pktmbuf_init(mp, opaque_arg, _m, i);
>
>
>Nice, but at this point __rte_pktmbuf_init() is not used anymore.
>Although it is removed in the second patch, it makes sense to be
>part of this patch instead.
Thanks Flavio - I missed during the split. I'll add it back into this patch, as
originally intended.
>
>
>
>> dp_packet_init_dpdk((struct dp_packet *) m, m->buf_len);
>> }
>> @@ -324,6 +340,7 @@ dpdk_mp_get(int socket_id, int mtu)
>> OVS_REQUIRES(dpdk_mutex)
>> struct dpdk_mp *dmp = NULL;
>> char mp_name[RTE_MEMPOOL_NAMESIZE];
>> unsigned mp_size;
>> + struct rte_pktmbuf_pool_private mbp_priv;
>>
>> LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
>> if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
>> @@ -336,6 +353,8 @@ dpdk_mp_get(int socket_id, int mtu)
>> OVS_REQUIRES(dpdk_mutex)
>> dmp->socket_id = socket_id;
>> dmp->mtu = mtu;
>> dmp->refcount = 1;
>> + mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct
>> dp_packet);
>> + mbp_priv.mbuf_priv_size = sizeof (struct dp_packet) - sizeof (struct
>> rte_mbuf);
>>
>> mp_size = MAX_NB_MBUF;
>> do {
>> @@ -347,7 +366,7 @@ dpdk_mp_get(int socket_id, int mtu)
>> OVS_REQUIRES(dpdk_mutex)
>> dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
>> MP_CACHE_SZ,
>> sizeof(struct
>> rte_pktmbuf_pool_private),
>> - rte_pktmbuf_pool_init, NULL,
>> + rte_pktmbuf_pool_init, &mbp_priv,
>> ovs_rte_pktmbuf_init, NULL,
>> socket_id, 0);
>> } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >=
>> MIN_NB_MBUF);
>> @@ -584,6 +603,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int
>> port_no,
>> struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
>> int sid;
>> int err = 0;
>> + uint32_t buf_size;
>>
>> ovs_mutex_init(&netdev->mutex);
>> ovs_mutex_lock(&netdev->mutex);
>> @@ -604,9 +624,10 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int
>> port_no,
>> netdev->type = type;
>> netdev->flags = 0;
>> netdev->mtu = ETHER_MTU;
>> - netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu);
>> + netdev->max_packet_len = ETHER_MAX_LEN;
>
>The value is correct but in other places we are using the macro
>MTU_TO_FRAME_LEN(dev->mtu).
>
>
>> - netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu);
>> + buf_size = dpdk_buf_size(netdev->mtu);
>> + netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id,
>> FRAME_LEN_TO_MTU(buf_size));
>
>This is inconsistent with netdev_dpdk_set_mtu() which calls dpdk_mp_get()
>passing dev->mtu directly. Shouldn't that be fixed too?
>
>
>> if (!netdev->dpdk_mp) {
>> err = ENOMEM;
>> goto unlock;
>> @@ -1440,14 +1461,14 @@ netdev_dpdk_set_mtu(const struct netdev *netdev, int
>> mtu)
>> old_mp = dev->dpdk_mp;
>> dev->dpdk_mp = mp;
>> dev->mtu = mtu;
>> - dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu);
>> + dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>>
>> err = dpdk_eth_dev_init(dev);
>> if (err) {
>> dpdk_mp_put(mp);
>> dev->mtu = old_mtu;
>> dev->dpdk_mp = old_mp;
>> - dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu);
>> + dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>> dpdk_eth_dev_init(dev);
>> goto out;
>> }
>> @@ -1736,7 +1757,7 @@ netdev_dpdk_get_status(const struct netdev *netdev_,
>> struct smap
>*args)
>> smap_add_format(args, "numa_id", "%d",
>> rte_eth_dev_socket_id(dev->port_id));
>> smap_add_format(args, "driver_name", "%s", dev_info.driver_name);
>> smap_add_format(args, "min_rx_bufsize", "%u", dev_info.min_rx_bufsize);
>> - smap_add_format(args, "max_rx_pktlen", "%u", dev_info.max_rx_pktlen);
>> + smap_add_format(args, "max_rx_pktlen", "%u", dev->max_packet_len);
>> smap_add_format(args, "max_rx_queues", "%u", dev_info.max_rx_queues);
>> smap_add_format(args, "max_tx_queues", "%u", dev_info.max_tx_queues);
>> smap_add_format(args, "max_mac_addrs", "%u", dev_info.max_mac_addrs);
>
>Thanks,
>--
>fbl
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev