Hi Mark,

Thanks for splitting the clean up, this looks better.
comment below.


On Tue, 16 Feb 2016 11:31:48 +0000
Mark Kavanagh <[email protected]> wrote:

> Current mbuf initialization relies on magic numbers and does not
> accomodate mbufs of different sizes.
> 
> Resolve this issue by ensuring that mbufs are always aligned to a 1k
> boundary (a typical DPDK NIC Rx buffer alignment).
> 
> Signed-off-by: Mark Kavanagh <[email protected]>
> ---
>  lib/netdev-dpdk.c | 51 ++++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 36 insertions(+), 15 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index e4f789b..5bcd36d 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -69,14 +69,14 @@ static struct vlog_rate_limit rl = 
> VLOG_RATE_LIMIT_INIT(5, 20);
>   * The minimum mbuf size is limited to avoid scatter behaviour and drop in
>   * performance for standard Ethernet MTU.
>   */
> -#define MTU_TO_MAX_LEN(mtu)  ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN)
> -#define MBUF_SIZE_MTU(mtu)   (MTU_TO_MAX_LEN(mtu)        \
> -                              + sizeof(struct dp_packet) \
> -                              + RTE_PKTMBUF_HEADROOM)
> -#define MBUF_SIZE_DRIVER     (2048                       \
> -                              + sizeof (struct rte_mbuf) \
> -                              + RTE_PKTMBUF_HEADROOM)
> -#define MBUF_SIZE(mtu)       MAX(MBUF_SIZE_MTU(mtu), MBUF_SIZE_DRIVER)
> +#define ETHER_HDR_MAX_LEN           (ETHER_HDR_LEN + ETHER_CRC_LEN + (2 * 
> VLAN_HEADER_LEN))
> +#define MTU_TO_FRAME_LEN(mtu)       ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN)
> +#define MTU_TO_MAX_FRAME_LEN(mtu)   ((mtu) + ETHER_HDR_MAX_LEN)
> +#define FRAME_LEN_TO_MTU(frame_len) ((frame_len)- ETHER_HDR_LEN - 
> ETHER_CRC_LEN)
> +#define MBUF_SIZE(mtu)              ( MTU_TO_MAX_FRAME_LEN(mtu)   \
> +                                    + sizeof(struct dp_packet)    \
> +                                    + RTE_PKTMBUF_HEADROOM)
> +#define NETDEV_DPDK_MBUF_ALIGN      1024
>  
>  /* Max and min number of packets in the mempool.  OVS tries to allocate a
>   * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have
> @@ -252,6 +252,22 @@ is_dpdk_class(const struct netdev_class *class)
>      return class->construct == netdev_dpdk_construct;
>  }
>  
> +/* DPDK NIC drivers allocate RX buffers at a particular granularity, 
> typically
> + * aligned at 1k or less. If a declared mbuf size is not a multiple of this
> + * value, insufficient buffers are allocated to accomodate the packet in its
> + * entirety. Furthermore, certain drivers need to ensure that there is also
> + * sufficient space in the Rx buffer to accommodate two VLAN tags (for QinQ
> + * frames). If the RX buffer is too small, then the driver enables scatter RX
> + * behaviour, which reduces performance. To prevent this, use a buffer size 
> that
> + * is closest to 'mtu', but which satisfies the aforementioned criteria.
> + */
> +static uint32_t
> +dpdk_buf_size(int mtu)
> +{
> +    return ROUND_UP((MTU_TO_MAX_FRAME_LEN(mtu) + RTE_PKTMBUF_HEADROOM),
> +                     NETDEV_DPDK_MBUF_ALIGN);
> +}
> +
>  /* XXX: use dpdk malloc for entire OVS. in fact huge page should be used
>   * for all other segments data, bss and text. */
>  
> @@ -313,7 +329,7 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp,
>  {
>      struct rte_mbuf *m = _m;
>  
> -    __rte_pktmbuf_init(mp, opaque_arg, _m, i);
> +    rte_pktmbuf_init(mp, opaque_arg, _m, i);


Nice, but at this point __rte_pktmbuf_init() is not used anymore.
Although it is removed in the second patch, it makes sense to be
part of this patch instead.



>      dp_packet_init_dpdk((struct dp_packet *) m, m->buf_len);
>  }
> @@ -324,6 +340,7 @@ dpdk_mp_get(int socket_id, int mtu) 
> OVS_REQUIRES(dpdk_mutex)
>      struct dpdk_mp *dmp = NULL;
>      char mp_name[RTE_MEMPOOL_NAMESIZE];
>      unsigned mp_size;
> +    struct rte_pktmbuf_pool_private mbp_priv;
>  
>      LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
>          if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
> @@ -336,6 +353,8 @@ dpdk_mp_get(int socket_id, int mtu) 
> OVS_REQUIRES(dpdk_mutex)
>      dmp->socket_id = socket_id;
>      dmp->mtu = mtu;
>      dmp->refcount = 1;
> +    mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct dp_packet);
> +    mbp_priv.mbuf_priv_size = sizeof (struct dp_packet) - sizeof (struct 
> rte_mbuf);
>  
>      mp_size = MAX_NB_MBUF;
>      do {
> @@ -347,7 +366,7 @@ dpdk_mp_get(int socket_id, int mtu) 
> OVS_REQUIRES(dpdk_mutex)
>          dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
>                                       MP_CACHE_SZ,
>                                       sizeof(struct rte_pktmbuf_pool_private),
> -                                     rte_pktmbuf_pool_init, NULL,
> +                                     rte_pktmbuf_pool_init, &mbp_priv,
>                                       ovs_rte_pktmbuf_init, NULL,
>                                       socket_id, 0);
>      } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= 
> MIN_NB_MBUF);
> @@ -584,6 +603,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
> port_no,
>      struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
>      int sid;
>      int err = 0;
> +    uint32_t buf_size;
>  
>      ovs_mutex_init(&netdev->mutex);
>      ovs_mutex_lock(&netdev->mutex);
> @@ -604,9 +624,10 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
> port_no,
>      netdev->type = type;
>      netdev->flags = 0;
>      netdev->mtu = ETHER_MTU;
> -    netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu);
> +    netdev->max_packet_len = ETHER_MAX_LEN;

The value is correct but in other places we are using the macro
MTU_TO_FRAME_LEN(dev->mtu).

  
> -    netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu);
> +    buf_size = dpdk_buf_size(netdev->mtu);
> +    netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, 
> FRAME_LEN_TO_MTU(buf_size));

This is inconsistent with netdev_dpdk_set_mtu() which calls dpdk_mp_get()
passing dev->mtu directly. Shouldn't that be fixed too?


>      if (!netdev->dpdk_mp) {
>          err = ENOMEM;
>          goto unlock;
> @@ -1440,14 +1461,14 @@ netdev_dpdk_set_mtu(const struct netdev *netdev, int 
> mtu)
>      old_mp = dev->dpdk_mp;
>      dev->dpdk_mp = mp;
>      dev->mtu = mtu;
> -    dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu);
> +    dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>  
>      err = dpdk_eth_dev_init(dev);
>      if (err) {
>          dpdk_mp_put(mp);
>          dev->mtu = old_mtu;
>          dev->dpdk_mp = old_mp;
> -        dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu);
> +        dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>          dpdk_eth_dev_init(dev);
>          goto out;
>      }
> @@ -1736,7 +1757,7 @@ netdev_dpdk_get_status(const struct netdev *netdev_, 
> struct smap *args)
>      smap_add_format(args, "numa_id", "%d", 
> rte_eth_dev_socket_id(dev->port_id));
>      smap_add_format(args, "driver_name", "%s", dev_info.driver_name);
>      smap_add_format(args, "min_rx_bufsize", "%u", dev_info.min_rx_bufsize);
> -    smap_add_format(args, "max_rx_pktlen", "%u", dev_info.max_rx_pktlen);
> +    smap_add_format(args, "max_rx_pktlen", "%u", dev->max_packet_len);
>      smap_add_format(args, "max_rx_queues", "%u", dev_info.max_rx_queues);
>      smap_add_format(args, "max_tx_queues", "%u", dev_info.max_tx_queues);
>      smap_add_format(args, "max_mac_addrs", "%u", dev_info.max_mac_addrs);

Thanks,
-- 
fbl

_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev

Reply via email to