>
>On Tue, 16 Feb 2016 11:31:49 +0000
>Mark Kavanagh <[email protected]> wrote:
>
>> Add support for Jumbo Frames to DPDK-enabled port types,
>> using single-segment-mbufs.
>>
>> Using this approach, the amount of memory allocated for each mbuf
>> to store frame data is increased to a value greater than 1518B
>> (typical Ethernet maximum frame length). The increased space
>> available in the mbuf means that an entire Jumbo Frame can be carried
>> in a single mbuf, as opposed to partitioning it across multiple mbuf
>> segments.
>>
>> The amount of space allocated to each mbuf to hold frame data is
>> defined dynamically by the user when adding a DPDK port to a bridge.
>> If an MTU value is not supplied, or the user-supplied value is invalid,
>> the MTU for the port defaults to standard Ethernet MTU (i.e. 1500B).
>>
>> Signed-off-by: Mark Kavanagh <[email protected]>
>> ---
>> INSTALL.DPDK.md | 1 -
>> lib/netdev-dpdk.c | 259
>> +++++++++++++++++++++++++++++++++---------------------
>> 2 files changed, 159 insertions(+), 101 deletions(-)
>>
>> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
>> index d892788..722fb9e 100644
>> --- a/INSTALL.DPDK.md
>> +++ b/INSTALL.DPDK.md
>> @@ -881,7 +881,6 @@ determines how many queues can be used by the guest.
>> Restrictions:
>> -------------
>>
>> - - Work with 1500 MTU, needs few changes in DPDK lib to fix this issue.
>> - Currently DPDK port does not make use any offload functionality.
>> - DPDK-vHost support works with 1G huge pages.
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index 5bcd36d..93a0930 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -77,6 +77,7 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5,
>> 20);
>> + sizeof(struct dp_packet) \
>> + RTE_PKTMBUF_HEADROOM)
>> #define NETDEV_DPDK_MBUF_ALIGN 1024
>> +#define NETDEV_DPDK_MAX_FRAME_LEN 13312
>>
>> /* Max and min number of packets in the mempool. OVS tries to allocate a
>> * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have
>> @@ -294,34 +295,6 @@ free_dpdk_buf(struct dp_packet *p)
>> }
>>
>> static void
>> -__rte_pktmbuf_init(struct rte_mempool *mp,
>> - void *opaque_arg OVS_UNUSED,
>> - void *_m,
>> - unsigned i OVS_UNUSED)
>> -{
>> - struct rte_mbuf *m = _m;
>> - uint32_t buf_len = mp->elt_size - sizeof(struct dp_packet);
>> -
>> - RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct dp_packet));
>> -
>> - memset(m, 0, mp->elt_size);
>> -
>> - /* start of buffer is just after mbuf structure */
>> - m->buf_addr = (char *)m + sizeof(struct dp_packet);
>> - m->buf_physaddr = rte_mempool_virt2phy(mp, m) +
>> - sizeof(struct dp_packet);
>> - m->buf_len = (uint16_t)buf_len;
>> -
>> - /* keep some headroom between start of buffer and data */
>> - m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, m->buf_len);
>> -
>> - /* init some constant fields */
>> - m->pool = mp;
>> - m->nb_segs = 1;
>> - m->port = 0xff;
>> -}
>> -
>> -static void
>> ovs_rte_pktmbuf_init(struct rte_mempool *mp,
>> void *opaque_arg OVS_UNUSED,
>> void *_m,
>> @@ -450,6 +423,7 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int
>> n_rxq, int n_txq)
>> {
>> int diag = 0;
>> int i;
>> + struct rte_eth_conf conf = port_conf;
>>
>> /* A device may report more queues than it makes available (this has
>> * been observed for Intel xl710, which reserves some of them for
>> @@ -461,7 +435,15 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int
>> n_rxq, int
>n_txq)
>> VLOG_INFO("Retrying setup with (rxq:%d txq:%d)", n_rxq, n_txq);
>> }
>>
>> - diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq,
>> &port_conf);
>> + if (dev->mtu > ETHER_MTU) {
>> + conf.rxmode.jumbo_frame = 1;
>> + conf.rxmode.max_rx_pkt_len = dev->max_packet_len;
>> + } else {
>> + conf.rxmode.jumbo_frame = 0;
>> + conf.rxmode.max_rx_pkt_len = 0;
>> + }
>> +
>> + diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, &conf);
>> if (diag) {
>> break;
>> }
>> @@ -602,8 +584,6 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int
>> port_no,
>> {
>> struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
>> int sid;
>> - int err = 0;
>> - uint32_t buf_size;
>>
>> ovs_mutex_init(&netdev->mutex);
>> ovs_mutex_lock(&netdev->mutex);
>> @@ -623,15 +603,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int
>> port_no,
>> netdev->port_id = port_no;
>> netdev->type = type;
>> netdev->flags = 0;
>> - netdev->mtu = ETHER_MTU;
>> - netdev->max_packet_len = ETHER_MAX_LEN;
>> -
>> - buf_size = dpdk_buf_size(netdev->mtu);
>> - netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id,
>> FRAME_LEN_TO_MTU(buf_size));
>> - if (!netdev->dpdk_mp) {
>> - err = ENOMEM;
>> - goto unlock;
>> - }
>> + netdev->mtu = 0;
>
>This zero is special because it means we can set MTU otherwise
>the mtu_request is just ignored, so maybe we could define
>MTU_NOT_SET or something like that.
Sounds good.
>
>
>> netdev_->n_txq = NR_QUEUE;
>> netdev_->n_rxq = NR_QUEUE;
>> @@ -640,20 +612,12 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int
>> port_no,
>>
>> if (type == DPDK_DEV_ETH) {
>> netdev_dpdk_alloc_txq(netdev, NR_QUEUE);
>> - err = dpdk_eth_dev_init(netdev);
>> - if (err) {
>> - goto unlock;
>> - }
>> }
>>
>> list_push_back(&dpdk_list, &netdev->list_node);
>>
>> -unlock:
>> - if (err) {
>> - rte_free(netdev->tx_q);
>> - }
>> ovs_mutex_unlock(&netdev->mutex);
>> - return err;
>> + return 0;
>> }
>>
>> static int
>> @@ -671,6 +635,27 @@ dpdk_dev_parse_name(const char dev_name[], const char
>> prefix[],
>> return 0;
>> }
>>
>> +static void
>> +dpdk_dev_parse_mtu(const struct smap *args, int *mtu)
>> +{
>> + const char *mtu_str = smap_get(args, "mtu_request");
>> + char *end_ptr = NULL;
>> + int local_mtu;
>> +
>> + if (mtu_str) {
>> + local_mtu = strtoul(mtu_str, &end_ptr, 0);
>> + }
>> + if (!mtu_str || local_mtu < ETHER_MTU ||
>> + local_mtu > FRAME_LEN_TO_MTU(NETDEV_DPDK_MAX_FRAME_LEN) ||
>> + *end_ptr != '\0') {
>> + local_mtu = ETHER_MTU;
>> + VLOG_WARN("Invalid or missing mtu_request parameter - defaulting to
>> %d.\n",
>> + local_mtu);
>
>This prints a warning message by default. It should only tell if the MTU
>is invalid and perhaps if the mtu different than the default as an
>informative message.
Ah yes, that's true. I'll rework this in the next version.
>
>
>> + }
>> +
>> + *mtu = local_mtu;
>> +}
>> +
>> static int
>> vhost_construct_helper(struct netdev *netdev_) OVS_REQUIRES(dpdk_mutex)
>> {
>> @@ -801,15 +786,72 @@ netdev_dpdk_get_config(const struct netdev *netdev,
>> struct smap
>*args)
>> smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq);
>> smap_add_format(args, "requested_tx_queues", "%d", netdev->n_txq);
>> smap_add_format(args, "configured_tx_queues", "%d", dev->real_n_txq);
>> + smap_add_format(args, "mtu", "%d", dev->mtu);
>> ovs_mutex_unlock(&dev->mutex);
>>
>> return 0;
>> }
>>
>> +/* Set the mtu of DPDK_DEV_ETH ports */
>> +static int
>> +netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu)
>> +{
>> + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> + int err, dpdk_mtu;
>> + uint32_t buf_size;
>> + struct dpdk_mp *mp;
>> +
>> + ovs_mutex_lock(&dpdk_mutex);
>> + ovs_mutex_lock(&dev->mutex);
>> + if (dev->mtu == mtu) {
>> + err = 0;
>> + goto out;
>> + }
>> +
>> + buf_size = dpdk_buf_size(mtu);
>> + dpdk_mtu = FRAME_LEN_TO_MTU(buf_size);
>> +
>> + mp = dpdk_mp_get(dev->socket_id, dpdk_mtu);
>> + if (!mp) {
>> + err = ENOMEM;
>> + goto out;
>> + }
>> +
>> + rte_eth_dev_stop(dev->port_id);
>> +
>> + dev->dpdk_mp = mp;
>> + dev->mtu = mtu;
>> + dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>> +
>> + err = dpdk_eth_dev_init(dev);
>> + if (err) {
>> + VLOG_WARN("Unable to set MTU '%d' for '%s'; falling back to default
>> "
>> + "MTU '%d'\n", mtu, dev->up.name, ETHER_MTU);
>> + dpdk_mp_put(mp);
>> + dev->mtu = ETHER_MTU;
>> + mp = dpdk_mp_get(dev->socket_id, dev->mtu);
>> + if (!mp) {
>> + err = ENOMEM;
>> + goto out;
>> + }
>> + dev->dpdk_mp = mp;
>> + dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>> + dpdk_eth_dev_init(dev);
>> + goto out;
>> + } else {
>> + netdev_change_seq_changed(netdev);
>> + }
>> +out:
>> + ovs_mutex_unlock(&dev->mutex);
>> + ovs_mutex_unlock(&dpdk_mutex);
>> + return err;
>> +}
>> +
>> static int
>> netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
>> {
>> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> + int mtu;
>>
>> ovs_mutex_lock(&dev->mutex);
>> netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq",
>> @@ -817,6 +859,10 @@ netdev_dpdk_set_config(struct netdev *netdev, const
>> struct smap *args)
>> netdev_change_seq_changed(netdev);
>> ovs_mutex_unlock(&dev->mutex);
>>
>> + if (!dev->mtu) {
>> + dpdk_dev_parse_mtu(args, &mtu);
>> + return netdev_dpdk_set_mtu(netdev, mtu);
>> + }
>
>Can we actually check if dev->mtu is different from mtu_request and then
>print a warning if so? Because the requested config will be ignored.
Good idea - I'll add that in the next patch.
Thanks for your feedback again, Flavio!
>
>
>> return 0;
>> }
>>
>> @@ -1435,53 +1481,6 @@ netdev_dpdk_get_mtu(const struct netdev *netdev, int
>> *mtup)
>> }
>>
>> static int
>> -netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu)
>> -{
>> - struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> - int old_mtu, err;
>> - struct dpdk_mp *old_mp;
>> - struct dpdk_mp *mp;
>> -
>> - ovs_mutex_lock(&dpdk_mutex);
>> - ovs_mutex_lock(&dev->mutex);
>> - if (dev->mtu == mtu) {
>> - err = 0;
>> - goto out;
>> - }
>> -
>> - mp = dpdk_mp_get(dev->socket_id, dev->mtu);
>> - if (!mp) {
>> - err = ENOMEM;
>> - goto out;
>> - }
>> -
>> - rte_eth_dev_stop(dev->port_id);
>> -
>> - old_mtu = dev->mtu;
>> - old_mp = dev->dpdk_mp;
>> - dev->dpdk_mp = mp;
>> - dev->mtu = mtu;
>> - dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>> -
>> - err = dpdk_eth_dev_init(dev);
>> - if (err) {
>> - dpdk_mp_put(mp);
>> - dev->mtu = old_mtu;
>> - dev->dpdk_mp = old_mp;
>> - dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>> - dpdk_eth_dev_init(dev);
>> - goto out;
>> - }
>> -
>> - dpdk_mp_put(old_mp);
>> - netdev_change_seq_changed(netdev);
>> -out:
>> - ovs_mutex_unlock(&dev->mutex);
>> - ovs_mutex_unlock(&dpdk_mutex);
>> - return err;
>> -}
>> -
>> -static int
>> netdev_dpdk_get_carrier(const struct netdev *netdev_, bool *carrier);
>>
>> static int
>> @@ -2024,6 +2023,57 @@ dpdk_vhost_user_class_init(void)
>> return 0;
>> }
>>
>> +/* Set the mtu of DPDK_DEV_VHOST ports */
>> +static int
>> +netdev_dpdk_vhost_set_mtu(const struct netdev *netdev, int mtu)
>> +{
>> + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> + int err = 0;
>> + struct dpdk_mp *mp;
>> +
>> + ovs_mutex_lock(&dpdk_mutex);
>> + ovs_mutex_lock(&dev->mutex);
>> + if (dev->mtu == mtu) {
>> + err = 0;
>> + goto out;
>> + }
>> +
>> + mp = dpdk_mp_get(dev->socket_id, mtu);
>> + if (!mp) {
>> + err = ENOMEM;
>> + goto out;
>> + }
>> +
>> + dev->dpdk_mp = mp;
>> + dev->mtu = mtu;
>> + dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>> +
>> + netdev_change_seq_changed(netdev);
>> +out:
>> + ovs_mutex_unlock(&dev->mutex);
>> + ovs_mutex_unlock(&dpdk_mutex);
>> + return err;
>> +}
>> +
>> +static int
>> +netdev_dpdk_vhost_set_config(struct netdev *netdev, const struct smap *args)
>> +{
>> + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> + int mtu;
>> +
>> + ovs_mutex_lock(&dev->mutex);
>> + netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq",
>> + netdev->requested_n_rxq), 1);
>> + netdev_change_seq_changed(netdev);
>> + ovs_mutex_unlock(&dev->mutex);
>> +
>> + if (!dev->mtu) {
>> + dpdk_dev_parse_mtu(args, &mtu);
>> + return netdev_dpdk_vhost_set_mtu(netdev, mtu);
>> + }
>> + return 0;
>> +}
>> +
>> static void
>> dpdk_common_init(void)
>> {
>> @@ -2160,8 +2210,9 @@ unlock_dpdk:
>> return err;
>> }
>>
>> -#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, MULTIQ, SEND, \
>> - GET_CARRIER, GET_STATS, GET_FEATURES, GET_STATUS, RXQ_RECV) \
>> +#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, SET_CONFIG, \
>> + MULTIQ, SEND, SET_MTU, GET_CARRIER, GET_STATS, GET_FEATURES, \
>> + GET_STATUS, RXQ_RECV) \
>> { \
>> NAME, \
>> INIT, /* init */ \
>> @@ -2173,7 +2224,7 @@ unlock_dpdk:
>> DESTRUCT, \
>> netdev_dpdk_dealloc, \
>> netdev_dpdk_get_config, \
>> - netdev_dpdk_set_config, \
>> + SET_CONFIG , \
>> NULL, /* get_tunnel_config */ \
>> NULL, /* build header */ \
>> NULL, /* push header */ \
>> @@ -2187,7 +2238,7 @@ unlock_dpdk:
>> netdev_dpdk_set_etheraddr, \
>> netdev_dpdk_get_etheraddr, \
>> netdev_dpdk_get_mtu, \
>> - netdev_dpdk_set_mtu, \
>> + SET_MTU, \
>> netdev_dpdk_get_ifindex, \
>> GET_CARRIER, \
>> netdev_dpdk_get_carrier_resets, \
>> @@ -2333,8 +2384,10 @@ static const struct netdev_class dpdk_class =
>> NULL,
>> netdev_dpdk_construct,
>> netdev_dpdk_destruct,
>> + netdev_dpdk_set_config,
>> netdev_dpdk_set_multiq,
>> netdev_dpdk_eth_send,
>> + netdev_dpdk_set_mtu,
>> netdev_dpdk_get_carrier,
>> netdev_dpdk_get_stats,
>> netdev_dpdk_get_features,
>> @@ -2347,8 +2400,10 @@ static const struct netdev_class dpdk_ring_class =
>> NULL,
>> netdev_dpdk_ring_construct,
>> netdev_dpdk_destruct,
>> + netdev_dpdk_set_config,
>> netdev_dpdk_set_multiq,
>> netdev_dpdk_ring_send,
>> + netdev_dpdk_set_mtu,
>> netdev_dpdk_get_carrier,
>> netdev_dpdk_get_stats,
>> netdev_dpdk_get_features,
>> @@ -2361,8 +2416,10 @@ static const struct netdev_class OVS_UNUSED
>> dpdk_vhost_cuse_class =
>> dpdk_vhost_cuse_class_init,
>> netdev_dpdk_vhost_cuse_construct,
>> netdev_dpdk_vhost_destruct,
>> + netdev_dpdk_set_config,
>> netdev_dpdk_vhost_cuse_set_multiq,
>> netdev_dpdk_vhost_send,
>> + NULL,
>> netdev_dpdk_vhost_get_carrier,
>> netdev_dpdk_vhost_get_stats,
>> NULL,
>> @@ -2375,8 +2432,10 @@ static const struct netdev_class OVS_UNUSED
>> dpdk_vhost_user_class =
>> dpdk_vhost_user_class_init,
>> netdev_dpdk_vhost_user_construct,
>> netdev_dpdk_vhost_destruct,
>> + netdev_dpdk_vhost_set_config,
>> netdev_dpdk_vhost_set_multiq,
>> netdev_dpdk_vhost_send,
>> + netdev_dpdk_vhost_set_mtu,
>> netdev_dpdk_vhost_get_carrier,
>> netdev_dpdk_vhost_get_stats,
>> NULL,
>
>Thanks,
>
>--
>fbl
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev