> -----Original Message-----
> From: Kevin Traynor [mailto:[email protected]]
> Sent: Tuesday, August 22, 2017 7:58 PM
> To: Aaron Conole <[email protected]>; Fischetti, Antonio
> <[email protected]>
> Cc: Wojciechowicz, RobertX <[email protected]>;
> [email protected]
> Subject: Re: [ovs-dev] [PATCH v3] netdev-dpdk: Create separate memory pool for
> each port
>
> On 08/22/2017 06:14 PM, Aaron Conole wrote:
> > "Fischetti, Antonio" <[email protected]> writes:
> >
> >> Hi Kevin, pls see comments inline.
> >> I'm going to rebase and rework this patch on behalf of Robert.
> >>
> >> Thanks,
> >> Antonio
> >>
> >>> -----Original Message-----
> >>> From: [email protected] [mailto:ovs-dev-
> [email protected]]
> >>> On Behalf Of Kevin Traynor
> >>> Sent: Tuesday, April 11, 2017 9:37 AM
> >>> To: Wojciechowicz, RobertX <[email protected]>;
> >>> [email protected]
> >>> Subject: Re: [ovs-dev] [PATCH v3] netdev-dpdk: Create separate memory pool
> for
> >>> each port
> >>>
> >>> On 04/07/2017 09:20 AM, Robert Wojciechowicz wrote:
> >>>> Since it's possible to delete memory pool in DPDK
> >>>> we can try to estimate better required memory size
> >>>> when port is reconfigured, e.g. with different number
> >>>> of rx queues.
> >>>>
> >>>> Signed-off-by: Robert Wojciechowicz <[email protected]>
> >>>> Acked-by: Ian Stokes <[email protected]>
> >>>> ---
> >>>> v2:
> >>>> - removing mempool reference counter
> >>>> - making sure mempool name isn't longer than RTE_MEMPOOL_NAMESIZE
> >>>>
> >>>> v3:
> >>>> - adding memory for corner cases
> >>>> ---
> >>>> lib/netdev-dpdk.c | 118
> >>>> ++++++++++++++++++++++++++-----------------------
> ---
> >>> --
> >>>> 1 file changed, 57 insertions(+), 61 deletions(-)
> >>>>
> >>>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> >>>> index ddc651b..6b781ac 100644
> >>>> --- a/lib/netdev-dpdk.c
> >>>> +++ b/lib/netdev-dpdk.c
> >>>> @@ -275,14 +275,12 @@ static struct ovs_list dpdk_list
> >>> OVS_GUARDED_BY(dpdk_mutex)
> >>>> static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex)
> >>>> = OVS_MUTEX_INITIALIZER;
> >>>>
> >>>> -static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex)
> >>>> - = OVS_LIST_INITIALIZER(&dpdk_mp_list);
> >>>> -
> >>>> struct dpdk_mp {
> >>>> struct rte_mempool *mp;
> >>>> int mtu;
> >>>> int socket_id;
> >>>> - int refcount;
> >>>> + char if_name[IFNAMSIZ];
> >>>> + unsigned mp_size;
> >>>> struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
> >>>> };
> >>>>
> >>>> @@ -463,78 +461,82 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp,
> >>>> dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len);
> >>>> }
> >>>>
> >>>> +/*
> >>>> + * XXX Full DPDK memory pool name must be unique
> >>>> + * and cannot be longer than RTE_MEMPOOL_NAMESIZE
> >>>> + */
> >>>> +static char *
> >>>> +dpdk_mp_name(struct dpdk_mp *dmp)
> >>>> +{
> >>>> + uint32_t h = hash_string(dmp->if_name, 0);
> >>>> + char *mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof(char));
> >>>> + int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u",
> >>>> + h, dmp->mtu, dmp->mp_size);
> >>>> + if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
> >>>> + return NULL;
> >>>> + }
> >>>> + return mp_name;
> >>>> +}
> >>>> +
> >>>> static struct dpdk_mp *
> >>>> -dpdk_mp_create(int socket_id, int mtu)
> >>>> +dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
> >>>> {
> >>>> struct rte_pktmbuf_pool_private mbp_priv;
> >>>> struct dpdk_mp *dmp;
> >>>> - unsigned mp_size;
> >>>> char *mp_name;
> >>>>
> >>>> dmp = dpdk_rte_mzalloc(sizeof *dmp);
> >>>> if (!dmp) {
> >>>> return NULL;
> >>>> }
> >>>> - dmp->socket_id = socket_id;
> >>>> + dmp->socket_id = dev->requested_socket_id;
> >>>> dmp->mtu = mtu;
> >>>> - dmp->refcount = 1;
> >>>> + strncpy(dmp->if_name, dev->up.name, IFNAMSIZ);
> >>>> mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct
> >>> dp_packet);
> >>>> mbp_priv.mbuf_priv_size = sizeof(struct dp_packet)
> >>>> - - sizeof(struct rte_mbuf);
> >>>> - /* XXX: this is a really rough method of provisioning memory.
> >>>> - * It's impossible to determine what the exact memory requirements
> are
> >>>> - * when the number of ports and rxqs that utilize a particular
> mempool
> >>> can
> >>>> - * change dynamically at runtime. For now, use this rough
> >>>> heurisitic.
> >>>> + - sizeof(struct rte_mbuf);
> >>>> + /*
> >>>> + * XXX: rough estimation of memory required for port:
> >>>> + * <packets required to fill the device rxqs>
> >>>> + * + <packets that could be stuck on other ports txqs>
> >>>> + * + <packets in the pmd threads>
> >>>> + * + <additional memory for corner cases>
> >>>> */
> >>>> - if (mtu >= ETHER_MTU) {
> >>>> - mp_size = MAX_NB_MBUF;
> >>>> - } else {
> >>>> - mp_size = MIN_NB_MBUF;
> >>>> - }
> >>>>
> >>>> - do {
> >>>> - mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id,
> >>>> - mp_size);
> >>>> + dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size
> >>>> + + dev->requested_n_txq * dev->requested_txq_size
> >>>> + + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST
> >>>> + + MIN_NB_MBUF;
> >>>>
> >>>> - dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
> >>>> + do {
> >>>> + mp_name = dpdk_mp_name(dmp);
> >>>> + dmp->mp = rte_mempool_create(mp_name, dmp->mp_size,
> MBUF_SIZE(mtu),
> >>>> MP_CACHE_SZ,
> >>>> sizeof(struct
> >>> rte_pktmbuf_pool_private),
> >>>> rte_pktmbuf_pool_init, &mbp_priv,
> >>>> ovs_rte_pktmbuf_init, NULL,
> >>>> - socket_id, 0);
> >>>> + dmp->socket_id, 0);
> >>>
> >>> fyi - either you or Hemant will need to rebase if his patch [1] or yours
> >>> gets gets merged as he has some tidy up in this area.
> >>>
> >>> [1] https://mail.openvswitch.org/pipermail/ovs-dev/2017-March/330186.html
> >>>
> >>
> >> [Antonio]
> >> Hemant's patch was upstreamed with commit id
> >> 401b70d632729555b555040e4cee13a5ffc5ed3d
> >> "netdev-dpdk: leverage the mempool offload support". So this patch needs
> >> to be rebased.
> >>
> >>
> >>>> if (dmp->mp) {
> >>>> VLOG_DBG("Allocated \"%s\" mempool with %u mbufs",
> >>>> - mp_name, mp_size);
> >>>> + mp_name, dmp->mp_size);
> >>>> }
> >>>> free(mp_name);
> >>>> if (dmp->mp) {
> >>>> return dmp;
> >>>> }
> >>>> - } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
> >>>> + } while (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF);
> >>>>
> >>>> rte_free(dmp);
> >>>> return NULL;
> >>>> }
> >>>>
> >>>> static struct dpdk_mp *
> >>>> -dpdk_mp_get(int socket_id, int mtu)
> >>>> +dpdk_mp_get(struct netdev_dpdk *dev, int mtu)
> >>>> {
> >>>> struct dpdk_mp *dmp;
> >>>>
> >>>> ovs_mutex_lock(&dpdk_mp_mutex);
> >>>> - LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
> >>>> - if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
> >>>
> >>> The mtu is rounded from the requested_mtu, so it may be the same value
> >>> for different requested_mtus. This leads to trying to create a mempool
> >>> of the same name which will fail with EEXISTS that is not handled. You
> >>> either need to put in a check to prevent the same mempool being created
> >>> (similar to above) or handle the case where rte_mempool_create returns
> >>> EEXISTS.
> >>
> >> [Antonio]
> >> I think the only viable option is to restore the existing
> >> dpdk_mp_list mechanism above.
> >> Because when mempool create fails with EEXIST the returned
> >> value is NULL and it seems to me there's no way to retrieve
> >> - and then return - a pointer to the existing mempool.
> >> What do you think?
> >
> > These two APIs can allow retrieving the mempool based on mempool
> > characteristics, no?
> >
> > struct rte_mempool *rte_mempool_lookup(const char *name);
> >
> > void rte_mempool_walk(void (*func)(struct rte_mempool *, void *arg),
> > void *arg);
> >
> > I think in the case of EEXIST, one of these would be sufficient to
> > locate the appropriate mempool. Maybe I missed something.
> >
>
> Sounds right.
>
> Antonio, I'd probably favour letting DPDK api do the work than trying to
> catch it in OVS but you'd need to have a look at how clean either way is.
[Antonio]
Thanks guys for your suggestions, rte_mempool_lookup seems to be really useful.
I'll post a reworked version within tomorrow.
>
> >>
> >>>
> >>>
> >>>> - dmp->refcount++;
> >>>> - goto out;
> >>>> - }
> >>>> - }
> >>>> -
> >>>> - dmp = dpdk_mp_create(socket_id, mtu);
> >>>> - if (dmp) {
> >>>> - ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
> >>>> - }
> >>>> -
> >>>> -out:
> >>>> + dmp = dpdk_mp_create(dev, mtu);
> >>>> ovs_mutex_unlock(&dpdk_mp_mutex);
> >>>>
> >>>> return dmp;
> >>>> @@ -543,18 +545,18 @@ out:
> >>>> static void
> >>>> dpdk_mp_put(struct dpdk_mp *dmp)
> >>>> {
> >>>> + char *mp_name;
> >>>> +
> >>>> if (!dmp) {
> >>>> return;
> >>>> }
> >>>>
> >>>> ovs_mutex_lock(&dpdk_mp_mutex);
> >>>> - ovs_assert(dmp->refcount);
> >>>> -
> >>>> - if (!--dmp->refcount) {
> >>>> - ovs_list_remove(&dmp->list_node);
> >>>> - rte_mempool_free(dmp->mp);
> >>>> - rte_free(dmp);
> >>>> - }
> >>>> + mp_name = dpdk_mp_name(dmp);
> >>>> + VLOG_DBG("Releasing \"%s\" mempool", mp_name);
> >>>> + free(mp_name);
> >>>> + rte_mempool_free(dmp->mp);
> >>>> + rte_free(dmp);
> >>>> ovs_mutex_unlock(&dpdk_mp_mutex);
> >>>> }
> >>>>
> >>>> @@ -569,7 +571,7 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk
> >>>> *dev)
> >>>> uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
> >>>> struct dpdk_mp *mp;
> >>>>
> >>>> - mp = dpdk_mp_get(dev->requested_socket_id,
> FRAME_LEN_TO_MTU(buf_size));
> >>>> + mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size));
> >>>> if (!mp) {
> >>>> VLOG_ERR("Insufficient memory to create memory pool for netdev "
> >>>> "%s, with MTU %d on socket %d\n",
> >>>> @@ -3131,12 +3133,9 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
> >>>>
> >>>> rte_eth_dev_stop(dev->port_id);
> >>>>
> >>>> - if (dev->mtu != dev->requested_mtu
> >>>> - || dev->socket_id != dev->requested_socket_id) {
> >>>> - err = netdev_dpdk_mempool_configure(dev);
> >>>> - if (err) {
> >>>> - goto out;
> >>>> - }
> >>>> + err = netdev_dpdk_mempool_configure(dev);
> >>>> + if (err) {
> >>>> + goto out;
> >>>> }
> >>>>
> >>>> netdev->n_txq = dev->requested_n_txq;
> >>>> @@ -3174,14 +3173,11 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk
> >>> *dev)
> >>>>
> >>>> netdev_dpdk_remap_txqs(dev);
> >>>>
> >>>> - if (dev->requested_socket_id != dev->socket_id
> >>>> - || dev->requested_mtu != dev->mtu) {
> >>>> - err = netdev_dpdk_mempool_configure(dev);
> >>>> - if (err) {
> >>>> - return err;
> >>>> - } else {
> >>>> - netdev_change_seq_changed(&dev->up);
> >>>> - }
> >>>> + err = netdev_dpdk_mempool_configure(dev);
> >>>> + if (err) {
> >>>> + return err;
> >>>> + } else {
> >>>> + netdev_change_seq_changed(&dev->up);
> >>>> }
> >>>>
> >>>> if (netdev_dpdk_get_vid(dev) >= 0) {
> >>>>
> >>>
> >>> _______________________________________________
> >>> dev mailing list
> >>> [email protected]
> >>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >> _______________________________________________
> >> dev mailing list
> >> [email protected]
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev