>
>On Fri, 19 Feb 2016 11:25:12 +0000
>Mark Kavanagh <mark.b.kavan...@intel.com> wrote:
>
>> Add support for Jumbo Frames to DPDK-enabled port types,
>> using single-segment-mbufs.
>>
>> Using this approach, the amount of memory allocated for each mbuf
>> to store frame data is increased to a value greater than 1518B
>> (typical Ethernet maximum frame length). The increased space
>> available in the mbuf means that an entire Jumbo Frame can be carried
>> in a single mbuf, as opposed to partitioning it across multiple mbuf
>> segments.
>>
>> The amount of space allocated to each mbuf to hold frame data is
>> defined dynamically by the user when adding a DPDK port to a bridge.
>> If an MTU value is not supplied, or the user-supplied value is invalid,
>> the MTU for the port defaults to standard Ethernet MTU (i.e. 1500B).
>>
>> Signed-off-by: Mark Kavanagh <mark.b.kavan...@intel.com>
>> ---
>>  INSTALL.DPDK.md   |  60 ++++++++++++-
>>  NEWS              |   3 +-
>>  lib/netdev-dpdk.c | 248 
>> +++++++++++++++++++++++++++++++++++++-----------------
>>  3 files changed, 233 insertions(+), 78 deletions(-)
>>
>> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
>> index d892788..4ca98cb 100644
>> --- a/INSTALL.DPDK.md
>> +++ b/INSTALL.DPDK.md
>> @@ -878,10 +878,63 @@ by adding the following string:
>>  to <interface> sections of all network devices used by DPDK. Parameter 'N'
>>  determines how many queues can be used by the guest.
>>
>> +Jumbo Frames
>> +------------
>> +
>> +Support for Jumbo Frames may be enabled at run-time for DPDK-type ports.
>> +
>> +To avail of Jumbo Frame support, add the 'mtu_request' option to the 
>> ovs-vsctl
>> +'add-port' command-line, along with the required MTU for the port.
>> +e.g.
>> +
>> +     ```
>> +     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk
>options:mtu_request=9000
>> +     ```
>> +
>> +When Jumbo Frames are enabled, the size of a DPDK port's mbuf segments are
>> +increased, such that a full Jumbo Frame may be accommodated inside a single
>> +mbuf segment. Once set, the MTU for a DPDK port is immutable.
>> +
>> +Note that from an OVSDB perspective, the `mtu_request` option for a specific
>> +port may be disregarded once initially set, as subsequent modifications to 
>> this
>> +field are disregarded by the DPDK port. As with non-DPDK ports, the MTU of 
>> DPDK
>> +ports is reported by the `Interface` table's 'mtu' field.
>> +
>> +Jumbo frame support has been validated against 13312B frames, using the
>> +DPDK `igb_uio` driver, but larger frames and other DPDK NIC drivers may
>> +theoretically be supported. Supported port types excludes vHost-Cuse ports, 
>> as
>> +that feature is pending deprecation.
>> +
>> +vHost Ports and Jumbo Frames
>> +----------------------------
>> +Jumbo frame support is available for DPDK vHost-User ports only. Some 
>> additional
>> +configuration is needed to take advantage of this feature:
>> +
>> +  1. `mergeable buffers` must be enabled for vHost ports, as demonstrated in
>> +      the QEMU command line snippet below:
>> +
>> +      ```
>> +      '-netdev type=vhost-user,id=mynet1,chardev=char0,vhostforce \'
>> +      '-device 
>> virtio-net-pci,mac=00:00:00:00:00:01,netdev=mynet1,mrg_rxbuf=on'
>> +      ```
>> +
>> +  2. Where virtio devices are bound to the Linux kernel driver in a guest
>> +     environment (i.e. interfaces are not bound to an in-guest DPDK 
>> driver), the
>> +     MTU of those logical network interfaces must also be increased. This
>> +     avoids segmentation of Jumbo Frames in the guest. Note that 'MTU' 
>> refers
>> +     to the length of the IP packet only, and not that of the entire frame.
>> +
>> +     e.g. To calculate the exact MTU of a standard IPv4 frame, subtract the 
>> L2
>> +     header and CRC lengths (i.e. 18B) from the max supported frame size.
>> +     So, to set the MTU for a 13312B Jumbo Frame:
>> +
>> +      ```
>> +      ifconfig eth1 mtu 13294
>> +      ```
>> +
>>  Restrictions:
>>  -------------
>>
>> -  - Work with 1500 MTU, needs few changes in DPDK lib to fix this issue.
>>    - Currently DPDK port does not make use any offload functionality.
>>    - DPDK-vHost support works with 1G huge pages.
>>
>> @@ -922,6 +975,11 @@ Restrictions:
>>      the next release of DPDK (which includes the above patch) is available 
>> and
>>      integrated into OVS.
>>
>> +  Jumbo Frames:
>> +  - `virtio-pmd`: DPDK apps in the guest do not exit gracefully. This is a 
>> DPDK
>> +     issue that is currently being investigated.
>> +  - vHost-Cuse: Jumbo Frame support is not available for vHost Cuse ports.
>> +
>>  Bug Reporting:
>>  --------------
>>
>> diff --git a/NEWS b/NEWS
>> index 3e33871..43127f9 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -10,10 +10,11 @@ Post-v2.5.0
>>     - DPDK:
>>       * New option "n_rxq" for PMD interfaces.
>>         Old 'other_config:n-dpdk-rxqs' is no longer supported.
>> +     * Support Jumbo Frames
>> +
>>     - ovs-benchmark: This utility has been removed due to lack of use and
>>       bitrot.
>>
>> -
>>  v2.5.0 - xx xxx xxxx
>>  ---------------------
>>     - Dropped support for Python older than version 2.7.  As a consequence,
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index 2a06bb5..ac89ee6 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -77,6 +77,8 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 
>> 20);
>>                                      + sizeof(struct dp_packet)    \
>>                                      + RTE_PKTMBUF_HEADROOM)
>>  #define NETDEV_DPDK_MBUF_ALIGN      1024
>> +#define NETDEV_DPDK_MAX_FRAME_LEN   13312
>> +#define MTU_NOT_SET                 0
>>
>>  /* Max and min number of packets in the mempool.  OVS tries to allocate a
>>   * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have
>> @@ -422,6 +424,7 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int 
>> n_rxq, int n_txq)
>>  {
>>      int diag = 0;
>>      int i;
>> +    struct rte_eth_conf conf = port_conf;
>>
>>      /* A device may report more queues than it makes available (this has
>>       * been observed for Intel xl710, which reserves some of them for
>> @@ -433,7 +436,15 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int 
>> n_rxq, int
>n_txq)
>>              VLOG_INFO("Retrying setup with (rxq:%d txq:%d)", n_rxq, n_txq);
>>          }
>>
>> -        diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, 
>> &port_conf);
>> +        if (dev->mtu > ETHER_MTU) {
>> +            conf.rxmode.jumbo_frame = 1;
>> +            conf.rxmode.max_rx_pkt_len = dev->max_packet_len;
>> +        } else {
>> +            conf.rxmode.jumbo_frame = 0;
>> +            conf.rxmode.max_rx_pkt_len = 0;
>> +        }
>> +
>> +        diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, &conf);
>>          if (diag) {
>>              break;
>>          }
>> @@ -574,8 +585,6 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
>> port_no,
>>  {
>>      struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
>>      int sid;
>> -    int err = 0;
>> -    uint32_t buf_size;
>>
>>      ovs_mutex_init(&netdev->mutex);
>>      ovs_mutex_lock(&netdev->mutex);
>> @@ -595,15 +604,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
>> port_no,
>>      netdev->port_id = port_no;
>>      netdev->type = type;
>>      netdev->flags = 0;
>> -    netdev->mtu = ETHER_MTU;
>> -    netdev->max_packet_len = MTU_TO_FRAME_LEN(netdev->mtu);
>> -
>> -    buf_size = dpdk_buf_size(netdev->mtu);
>> -    netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, 
>> FRAME_LEN_TO_MTU(buf_size));
>> -    if (!netdev->dpdk_mp) {
>> -        err = ENOMEM;
>> -        goto unlock;
>> -    }
>> +    netdev->mtu = MTU_NOT_SET;
>>
>>      netdev_->n_txq = NR_QUEUE;
>>      netdev_->n_rxq = NR_QUEUE;
>> @@ -612,20 +613,12 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
>> port_no,
>>
>>      if (type == DPDK_DEV_ETH) {
>>          netdev_dpdk_alloc_txq(netdev, NR_QUEUE);
>> -        err = dpdk_eth_dev_init(netdev);
>> -        if (err) {
>> -            goto unlock;
>> -        }
>>      }
>>
>>      list_push_back(&dpdk_list, &netdev->list_node);
>>
>> -unlock:
>> -    if (err) {
>> -        rte_free(netdev->tx_q);
>> -    }
>>      ovs_mutex_unlock(&netdev->mutex);
>> -    return err;
>> +    return 0;
>>  }
>>
>>  static int
>> @@ -643,6 +636,31 @@ dpdk_dev_parse_name(const char dev_name[], const char 
>> prefix[],
>>      return 0;
>>  }
>>
>> +static void
>> +dpdk_dev_parse_mtu(const struct smap *args, int *mtu)
>> +{
>> +    const char *mtu_str = smap_get(args, "mtu_request");
>> +    char *end_ptr = NULL;
>> +    int local_mtu;
>> +
>> +    if (!mtu_str) {
>> +        local_mtu = ETHER_MTU;
>> +    } else {
>> +        local_mtu = strtoul(mtu_str, &end_ptr, 0);
>> +        if (local_mtu < ETHER_MTU ||
>> +            local_mtu > FRAME_LEN_TO_MTU(NETDEV_DPDK_MAX_FRAME_LEN) ||
>> +            *end_ptr != '\0') {
>> +            local_mtu = ETHER_MTU;
>> +            VLOG_WARN("Invalid mtu_request parameter - defaulting to %d.\n",
>> +                    local_mtu);
>> +        } else {
>> +            VLOG_INFO("mtu_request parameter %d detected.\n", local_mtu);
>
>That message could be VLOG_DBG because it only tells about a parameter being
>detected and not if the mtu request succeed.  Other than that the patch looks
>good to me.

Great!

>
>If no one else has comments to address, maybe a commiter can fix the message
>level to avoid re-spinning the patchset.

Daniele - are you happy with this, or would you prefer that I spin a new 
version?

>
>Thanks for the patch!

Thanks for the feedback! :)

>
>Acked-by: Flavio Leitner <f...@sysclose.org>
>
>--
>fbl
>
>
>
>> +        }
>> +    }
>> +
>> +    *mtu = local_mtu;
>> +}
>> +
>>  static int
>>  vhost_construct_helper(struct netdev *netdev_) OVS_REQUIRES(dpdk_mutex)
>>  {
>> @@ -773,15 +791,72 @@ netdev_dpdk_get_config(const struct netdev *netdev, 
>> struct smap
>*args)
>>      smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq);
>>      smap_add_format(args, "requested_tx_queues", "%d", netdev->n_txq);
>>      smap_add_format(args, "configured_tx_queues", "%d", dev->real_n_txq);
>> +    smap_add_format(args, "mtu", "%d", dev->mtu);
>>      ovs_mutex_unlock(&dev->mutex);
>>
>>      return 0;
>>  }
>>
>> +/* Set the mtu of DPDK_DEV_ETH ports */
>> +static int
>> +netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu)
>> +{
>> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> +    int err, dpdk_mtu;
>> +    uint32_t buf_size;
>> +    struct dpdk_mp *mp;
>> +
>> +    ovs_mutex_lock(&dpdk_mutex);
>> +    ovs_mutex_lock(&dev->mutex);
>> +    if (dev->mtu == mtu) {
>> +        err = 0;
>> +        goto out;
>> +    }
>> +
>> +    buf_size = dpdk_buf_size(mtu);
>> +    dpdk_mtu = FRAME_LEN_TO_MTU(buf_size);
>> +
>> +    mp = dpdk_mp_get(dev->socket_id, dpdk_mtu);
>> +    if (!mp) {
>> +        err = ENOMEM;
>> +        goto out;
>> +    }
>> +
>> +    rte_eth_dev_stop(dev->port_id);
>> +
>> +    dev->dpdk_mp = mp;
>> +    dev->mtu = mtu;
>> +    dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>> +
>> +    err = dpdk_eth_dev_init(dev);
>> +    if (err) {
>> +        VLOG_WARN("Unable to set MTU '%d' for '%s'; falling back to default 
>> "
>> +                  "MTU '%d'\n", mtu, dev->up.name, ETHER_MTU);
>> +        dpdk_mp_put(mp);
>> +        dev->mtu = ETHER_MTU;
>> +        mp = dpdk_mp_get(dev->socket_id, dev->mtu);
>> +        if (!mp) {
>> +            err = ENOMEM;
>> +            goto out;
>> +        }
>> +        dev->dpdk_mp = mp;
>> +        dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>> +        dpdk_eth_dev_init(dev);
>> +        goto out;
>> +    } else {
>> +        netdev_change_seq_changed(netdev);
>> +    }
>> +out:
>> +    ovs_mutex_unlock(&dev->mutex);
>> +    ovs_mutex_unlock(&dpdk_mutex);
>> +    return err;
>> +}
>> +
>>  static int
>>  netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> +    int mtu;
>>
>>      ovs_mutex_lock(&dev->mutex);
>>      netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq",
>> @@ -789,6 +864,14 @@ netdev_dpdk_set_config(struct netdev *netdev, const 
>> struct smap *args)
>>      netdev_change_seq_changed(netdev);
>>      ovs_mutex_unlock(&dev->mutex);
>>
>> +    dpdk_dev_parse_mtu(args, &mtu);
>> +
>> +    if (!dev->mtu) {
>> +        return netdev_dpdk_set_mtu(netdev, mtu);
>> +    } else if (mtu != dev->mtu) {
>> +        VLOG_WARN("Unable to set MTU %d for port %d; this port has 
>> immutable MTU "
>> +                  "%d\n", mtu, dev->port_id, dev->mtu);
>> +    }
>>      return 0;
>>  }
>>
>> @@ -1407,57 +1490,6 @@ netdev_dpdk_get_mtu(const struct netdev *netdev, int 
>> *mtup)
>>  }
>>
>>  static int
>> -netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu)
>> -{
>> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> -    int old_mtu, err, dpdk_mtu;
>> -    struct dpdk_mp *old_mp;
>> -    struct dpdk_mp *mp;
>> -    uint32_t buf_size;
>> -
>> -    ovs_mutex_lock(&dpdk_mutex);
>> -    ovs_mutex_lock(&dev->mutex);
>> -    if (dev->mtu == mtu) {
>> -        err = 0;
>> -        goto out;
>> -    }
>> -
>> -    buf_size = dpdk_buf_size(mtu);
>> -    dpdk_mtu = FRAME_LEN_TO_MTU(buf_size);
>> -
>> -    mp = dpdk_mp_get(dev->socket_id, dpdk_mtu);
>> -    if (!mp) {
>> -        err = ENOMEM;
>> -        goto out;
>> -    }
>> -
>> -    rte_eth_dev_stop(dev->port_id);
>> -
>> -    old_mtu = dev->mtu;
>> -    old_mp = dev->dpdk_mp;
>> -    dev->dpdk_mp = mp;
>> -    dev->mtu = mtu;
>> -    dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>> -
>> -    err = dpdk_eth_dev_init(dev);
>> -    if (err) {
>> -        dpdk_mp_put(mp);
>> -        dev->mtu = old_mtu;
>> -        dev->dpdk_mp = old_mp;
>> -        dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>> -        dpdk_eth_dev_init(dev);
>> -        goto out;
>> -    }
>> -
>> -    dpdk_mp_put(old_mp);
>> -    netdev_change_seq_changed(netdev);
>> -out:
>> -    ovs_mutex_unlock(&dev->mutex);
>> -    ovs_mutex_unlock(&dpdk_mutex);
>> -    return err;
>> -}
>> -
>> -static int
>>  netdev_dpdk_get_carrier(const struct netdev *netdev_, bool *carrier);
>>
>>  static int
>> @@ -2000,6 +2032,61 @@ dpdk_vhost_user_class_init(void)
>>      return 0;
>>  }
>>
>> +/* Set the mtu of DPDK_DEV_VHOST ports */
>> +static int
>> +netdev_dpdk_vhost_set_mtu(const struct netdev *netdev, int mtu)
>> +{
>> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> +    int err = 0;
>> +    struct dpdk_mp *mp;
>> +
>> +    ovs_mutex_lock(&dpdk_mutex);
>> +    ovs_mutex_lock(&dev->mutex);
>> +    if (dev->mtu == mtu) {
>> +        err = 0;
>> +        goto out;
>> +    }
>> +
>> +    mp = dpdk_mp_get(dev->socket_id, mtu);
>> +    if (!mp) {
>> +        err = ENOMEM;
>> +        goto out;
>> +    }
>> +
>> +    dev->dpdk_mp = mp;
>> +    dev->mtu = mtu;
>> +    dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>> +
>> +    netdev_change_seq_changed(netdev);
>> +out:
>> +    ovs_mutex_unlock(&dev->mutex);
>> +    ovs_mutex_unlock(&dpdk_mutex);
>> +    return err;
>> +}
>> +
>> +static int
>> +netdev_dpdk_vhost_set_config(struct netdev *netdev, const struct smap *args)
>> +{
>> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> +    int mtu;
>> +
>> +    ovs_mutex_lock(&dev->mutex);
>> +    netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq",
>> +                                               netdev->requested_n_rxq), 1);
>> +    netdev_change_seq_changed(netdev);
>> +    ovs_mutex_unlock(&dev->mutex);
>> +
>> +    dpdk_dev_parse_mtu(args, &mtu);
>> +
>> +    if (!dev->mtu) {
>> +        return netdev_dpdk_vhost_set_mtu(netdev, mtu);
>> +    } else if (mtu != dev->mtu) {
>> +        VLOG_WARN("Unable to set MTU %d for vhost port; this port has 
>> immutable MTU "
>> +                  "%d\n", mtu, dev->mtu);
>> +    }
>> +    return 0;
>> +}
>> +
>>  static void
>>  dpdk_common_init(void)
>>  {
>> @@ -2136,8 +2223,9 @@ unlock_dpdk:
>>      return err;
>>  }
>>
>> -#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, MULTIQ, SEND, \
>> -    GET_CARRIER, GET_STATS, GET_FEATURES, GET_STATUS, RXQ_RECV)          \
>> +#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, SET_CONFIG, \
>> +        MULTIQ, SEND, SET_MTU, GET_CARRIER, GET_STATS, GET_FEATURES,   \
>> +        GET_STATUS, RXQ_RECV)                                          \
>>  {                                                             \
>>      NAME,                                                     \
>>      INIT,                       /* init */                    \
>> @@ -2149,7 +2237,7 @@ unlock_dpdk:
>>      DESTRUCT,                                                 \
>>      netdev_dpdk_dealloc,                                      \
>>      netdev_dpdk_get_config,                                   \
>> -    netdev_dpdk_set_config,                                   \
>> +    SET_CONFIG            ,                                   \
>>      NULL,                       /* get_tunnel_config */       \
>>      NULL,                       /* build header */            \
>>      NULL,                       /* push header */             \
>> @@ -2163,7 +2251,7 @@ unlock_dpdk:
>>      netdev_dpdk_set_etheraddr,                                \
>>      netdev_dpdk_get_etheraddr,                                \
>>      netdev_dpdk_get_mtu,                                      \
>> -    netdev_dpdk_set_mtu,                                      \
>> +    SET_MTU,                                                  \
>>      netdev_dpdk_get_ifindex,                                  \
>>      GET_CARRIER,                                              \
>>      netdev_dpdk_get_carrier_resets,                           \
>> @@ -2309,8 +2397,10 @@ static const struct netdev_class dpdk_class =
>>          NULL,
>>          netdev_dpdk_construct,
>>          netdev_dpdk_destruct,
>> +        netdev_dpdk_set_config,
>>          netdev_dpdk_set_multiq,
>>          netdev_dpdk_eth_send,
>> +        netdev_dpdk_set_mtu,
>>          netdev_dpdk_get_carrier,
>>          netdev_dpdk_get_stats,
>>          netdev_dpdk_get_features,
>> @@ -2323,8 +2413,10 @@ static const struct netdev_class dpdk_ring_class =
>>          NULL,
>>          netdev_dpdk_ring_construct,
>>          netdev_dpdk_destruct,
>> +        netdev_dpdk_set_config,
>>          netdev_dpdk_set_multiq,
>>          netdev_dpdk_ring_send,
>> +        netdev_dpdk_set_mtu,
>>          netdev_dpdk_get_carrier,
>>          netdev_dpdk_get_stats,
>>          netdev_dpdk_get_features,
>> @@ -2337,8 +2429,10 @@ static const struct netdev_class OVS_UNUSED 
>> dpdk_vhost_cuse_class =
>>          dpdk_vhost_cuse_class_init,
>>          netdev_dpdk_vhost_cuse_construct,
>>          netdev_dpdk_vhost_destruct,
>> +        netdev_dpdk_set_config,
>>          netdev_dpdk_vhost_cuse_set_multiq,
>>          netdev_dpdk_vhost_send,
>> +        NULL,
>>          netdev_dpdk_vhost_get_carrier,
>>          netdev_dpdk_vhost_get_stats,
>>          NULL,
>> @@ -2351,8 +2445,10 @@ static const struct netdev_class OVS_UNUSED 
>> dpdk_vhost_user_class =
>>          dpdk_vhost_user_class_init,
>>          netdev_dpdk_vhost_user_construct,
>>          netdev_dpdk_vhost_destruct,
>> +        netdev_dpdk_vhost_set_config,
>>          netdev_dpdk_vhost_set_multiq,
>>          netdev_dpdk_vhost_send,
>> +        netdev_dpdk_vhost_set_mtu,
>>          netdev_dpdk_vhost_get_carrier,
>>          netdev_dpdk_vhost_get_stats,
>>          NULL,

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to