On 08/22/2017 06:14 PM, Aaron Conole wrote:
> "Fischetti, Antonio" <[email protected]> writes:
>
>> Hi Kevin, pls see comments inline.
>> I'm going to rebase and rework this patch on behalf of Robert.
>>
>> Thanks,
>> Antonio
>>
>>> -----Original Message-----
>>> From: [email protected]
>>> [mailto:[email protected]]
>>> On Behalf Of Kevin Traynor
>>> Sent: Tuesday, April 11, 2017 9:37 AM
>>> To: Wojciechowicz, RobertX <[email protected]>;
>>> [email protected]
>>> Subject: Re: [ovs-dev] [PATCH v3] netdev-dpdk: Create separate memory pool
>>> for
>>> each port
>>>
>>> On 04/07/2017 09:20 AM, Robert Wojciechowicz wrote:
>>>> Since it's possible to delete memory pool in DPDK
>>>> we can try to estimate better required memory size
>>>> when port is reconfigured, e.g. with different number
>>>> of rx queues.
>>>>
>>>> Signed-off-by: Robert Wojciechowicz <[email protected]>
>>>> Acked-by: Ian Stokes <[email protected]>
>>>> ---
>>>> v2:
>>>> - removing mempool reference counter
>>>> - making sure mempool name isn't longer than RTE_MEMPOOL_NAMESIZE
>>>>
>>>> v3:
>>>> - adding memory for corner cases
>>>> ---
>>>> lib/netdev-dpdk.c | 118
>>>> ++++++++++++++++++++++++++--------------------------
>>> --
>>>> 1 file changed, 57 insertions(+), 61 deletions(-)
>>>>
>>>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>>>> index ddc651b..6b781ac 100644
>>>> --- a/lib/netdev-dpdk.c
>>>> +++ b/lib/netdev-dpdk.c
>>>> @@ -275,14 +275,12 @@ static struct ovs_list dpdk_list
>>> OVS_GUARDED_BY(dpdk_mutex)
>>>> static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex)
>>>> = OVS_MUTEX_INITIALIZER;
>>>>
>>>> -static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex)
>>>> - = OVS_LIST_INITIALIZER(&dpdk_mp_list);
>>>> -
>>>> struct dpdk_mp {
>>>> struct rte_mempool *mp;
>>>> int mtu;
>>>> int socket_id;
>>>> - int refcount;
>>>> + char if_name[IFNAMSIZ];
>>>> + unsigned mp_size;
>>>> struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
>>>> };
>>>>
>>>> @@ -463,78 +461,82 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp,
>>>> dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len);
>>>> }
>>>>
>>>> +/*
>>>> + * XXX Full DPDK memory pool name must be unique
>>>> + * and cannot be longer than RTE_MEMPOOL_NAMESIZE
>>>> + */
>>>> +static char *
>>>> +dpdk_mp_name(struct dpdk_mp *dmp)
>>>> +{
>>>> + uint32_t h = hash_string(dmp->if_name, 0);
>>>> + char *mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof(char));
>>>> + int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u",
>>>> + h, dmp->mtu, dmp->mp_size);
>>>> + if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
>>>> + return NULL;
>>>> + }
>>>> + return mp_name;
>>>> +}
>>>> +
>>>> static struct dpdk_mp *
>>>> -dpdk_mp_create(int socket_id, int mtu)
>>>> +dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
>>>> {
>>>> struct rte_pktmbuf_pool_private mbp_priv;
>>>> struct dpdk_mp *dmp;
>>>> - unsigned mp_size;
>>>> char *mp_name;
>>>>
>>>> dmp = dpdk_rte_mzalloc(sizeof *dmp);
>>>> if (!dmp) {
>>>> return NULL;
>>>> }
>>>> - dmp->socket_id = socket_id;
>>>> + dmp->socket_id = dev->requested_socket_id;
>>>> dmp->mtu = mtu;
>>>> - dmp->refcount = 1;
>>>> + strncpy(dmp->if_name, dev->up.name, IFNAMSIZ);
>>>> mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct
>>> dp_packet);
>>>> mbp_priv.mbuf_priv_size = sizeof(struct dp_packet)
>>>> - - sizeof(struct rte_mbuf);
>>>> - /* XXX: this is a really rough method of provisioning memory.
>>>> - * It's impossible to determine what the exact memory requirements are
>>>> - * when the number of ports and rxqs that utilize a particular mempool
>>> can
>>>> - * change dynamically at runtime. For now, use this rough heurisitic.
>>>> + - sizeof(struct rte_mbuf);
>>>> + /*
>>>> + * XXX: rough estimation of memory required for port:
>>>> + * <packets required to fill the device rxqs>
>>>> + * + <packets that could be stuck on other ports txqs>
>>>> + * + <packets in the pmd threads>
>>>> + * + <additional memory for corner cases>
>>>> */
>>>> - if (mtu >= ETHER_MTU) {
>>>> - mp_size = MAX_NB_MBUF;
>>>> - } else {
>>>> - mp_size = MIN_NB_MBUF;
>>>> - }
>>>>
>>>> - do {
>>>> - mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id,
>>>> - mp_size);
>>>> + dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size
>>>> + + dev->requested_n_txq * dev->requested_txq_size
>>>> + + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST
>>>> + + MIN_NB_MBUF;
>>>>
>>>> - dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
>>>> + do {
>>>> + mp_name = dpdk_mp_name(dmp);
>>>> + dmp->mp = rte_mempool_create(mp_name, dmp->mp_size,
>>>> MBUF_SIZE(mtu),
>>>> MP_CACHE_SZ,
>>>> sizeof(struct
>>> rte_pktmbuf_pool_private),
>>>> rte_pktmbuf_pool_init, &mbp_priv,
>>>> ovs_rte_pktmbuf_init, NULL,
>>>> - socket_id, 0);
>>>> + dmp->socket_id, 0);
>>>
>>> fyi - either you or Hemant will need to rebase if his patch [1] or yours
>>> gets gets merged as he has some tidy up in this area.
>>>
>>> [1] https://mail.openvswitch.org/pipermail/ovs-dev/2017-March/330186.html
>>>
>>
>> [Antonio]
>> Hemant's patch was upstreamed with commit id
>> 401b70d632729555b555040e4cee13a5ffc5ed3d
>> "netdev-dpdk: leverage the mempool offload support". So this patch needs
>> to be rebased.
>>
>>
>>>> if (dmp->mp) {
>>>> VLOG_DBG("Allocated \"%s\" mempool with %u mbufs",
>>>> - mp_name, mp_size);
>>>> + mp_name, dmp->mp_size);
>>>> }
>>>> free(mp_name);
>>>> if (dmp->mp) {
>>>> return dmp;
>>>> }
>>>> - } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
>>>> + } while (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF);
>>>>
>>>> rte_free(dmp);
>>>> return NULL;
>>>> }
>>>>
>>>> static struct dpdk_mp *
>>>> -dpdk_mp_get(int socket_id, int mtu)
>>>> +dpdk_mp_get(struct netdev_dpdk *dev, int mtu)
>>>> {
>>>> struct dpdk_mp *dmp;
>>>>
>>>> ovs_mutex_lock(&dpdk_mp_mutex);
>>>> - LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
>>>> - if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
>>>
>>> The mtu is rounded from the requested_mtu, so it may be the same value
>>> for different requested_mtus. This leads to trying to create a mempool
>>> of the same name which will fail with EEXISTS that is not handled. You
>>> either need to put in a check to prevent the same mempool being created
>>> (similar to above) or handle the case where rte_mempool_create returns
>>> EEXISTS.
>>
>> [Antonio]
>> I think the only viable option is to restore the existing
>> dpdk_mp_list mechanism above.
>> Because when mempool create fails with EEXIST the returned
>> value is NULL and it seems to me there's no way to retrieve
>> - and then return - a pointer to the existing mempool.
>> What do you think?
>
> These two APIs can allow retrieving the mempool based on mempool
> characteristics, no?
>
> struct rte_mempool *rte_mempool_lookup(const char *name);
>
> void rte_mempool_walk(void (*func)(struct rte_mempool *, void *arg),
> void *arg);
>
> I think in the case of EEXIST, one of these would be sufficient to
> locate the appropriate mempool. Maybe I missed something.
>
Sounds right.
Antonio, I'd probably favour letting DPDK api do the work than trying to
catch it in OVS but you'd need to have a look at how clean either way is.
>>
>>>
>>>
>>>> - dmp->refcount++;
>>>> - goto out;
>>>> - }
>>>> - }
>>>> -
>>>> - dmp = dpdk_mp_create(socket_id, mtu);
>>>> - if (dmp) {
>>>> - ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
>>>> - }
>>>> -
>>>> -out:
>>>> + dmp = dpdk_mp_create(dev, mtu);
>>>> ovs_mutex_unlock(&dpdk_mp_mutex);
>>>>
>>>> return dmp;
>>>> @@ -543,18 +545,18 @@ out:
>>>> static void
>>>> dpdk_mp_put(struct dpdk_mp *dmp)
>>>> {
>>>> + char *mp_name;
>>>> +
>>>> if (!dmp) {
>>>> return;
>>>> }
>>>>
>>>> ovs_mutex_lock(&dpdk_mp_mutex);
>>>> - ovs_assert(dmp->refcount);
>>>> -
>>>> - if (!--dmp->refcount) {
>>>> - ovs_list_remove(&dmp->list_node);
>>>> - rte_mempool_free(dmp->mp);
>>>> - rte_free(dmp);
>>>> - }
>>>> + mp_name = dpdk_mp_name(dmp);
>>>> + VLOG_DBG("Releasing \"%s\" mempool", mp_name);
>>>> + free(mp_name);
>>>> + rte_mempool_free(dmp->mp);
>>>> + rte_free(dmp);
>>>> ovs_mutex_unlock(&dpdk_mp_mutex);
>>>> }
>>>>
>>>> @@ -569,7 +571,7 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
>>>> uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
>>>> struct dpdk_mp *mp;
>>>>
>>>> - mp = dpdk_mp_get(dev->requested_socket_id,
>>>> FRAME_LEN_TO_MTU(buf_size));
>>>> + mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size));
>>>> if (!mp) {
>>>> VLOG_ERR("Insufficient memory to create memory pool for netdev "
>>>> "%s, with MTU %d on socket %d\n",
>>>> @@ -3131,12 +3133,9 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
>>>>
>>>> rte_eth_dev_stop(dev->port_id);
>>>>
>>>> - if (dev->mtu != dev->requested_mtu
>>>> - || dev->socket_id != dev->requested_socket_id) {
>>>> - err = netdev_dpdk_mempool_configure(dev);
>>>> - if (err) {
>>>> - goto out;
>>>> - }
>>>> + err = netdev_dpdk_mempool_configure(dev);
>>>> + if (err) {
>>>> + goto out;
>>>> }
>>>>
>>>> netdev->n_txq = dev->requested_n_txq;
>>>> @@ -3174,14 +3173,11 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk
>>> *dev)
>>>>
>>>> netdev_dpdk_remap_txqs(dev);
>>>>
>>>> - if (dev->requested_socket_id != dev->socket_id
>>>> - || dev->requested_mtu != dev->mtu) {
>>>> - err = netdev_dpdk_mempool_configure(dev);
>>>> - if (err) {
>>>> - return err;
>>>> - } else {
>>>> - netdev_change_seq_changed(&dev->up);
>>>> - }
>>>> + err = netdev_dpdk_mempool_configure(dev);
>>>> + if (err) {
>>>> + return err;
>>>> + } else {
>>>> + netdev_change_seq_changed(&dev->up);
>>>> }
>>>>
>>>> if (netdev_dpdk_get_vid(dev) >= 0) {
>>>>
>>>
>>> _______________________________________________
>>> dev mailing list
>>> [email protected]
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> _______________________________________________
>> dev mailing list
>> [email protected]
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev