Hi Kevin, pls see comments inline. I'm going to rebase and rework this patch on behalf of Robert.
Thanks, Antonio > -----Original Message----- > From: ovs-dev-boun...@openvswitch.org [mailto:ovs-dev-boun...@openvswitch.org] > On Behalf Of Kevin Traynor > Sent: Tuesday, April 11, 2017 9:37 AM > To: Wojciechowicz, RobertX <robertx.wojciechow...@intel.com>; > d...@openvswitch.org > Subject: Re: [ovs-dev] [PATCH v3] netdev-dpdk: Create separate memory pool for > each port > > On 04/07/2017 09:20 AM, Robert Wojciechowicz wrote: > > Since it's possible to delete memory pool in DPDK > > we can try to estimate better required memory size > > when port is reconfigured, e.g. with different number > > of rx queues. > > > > Signed-off-by: Robert Wojciechowicz <robertx.wojciechow...@intel.com> > > Acked-by: Ian Stokes <ian.sto...@intel.com> > > --- > > v2: > > - removing mempool reference counter > > - making sure mempool name isn't longer than RTE_MEMPOOL_NAMESIZE > > > > v3: > > - adding memory for corner cases > > --- > > lib/netdev-dpdk.c | 118 > > ++++++++++++++++++++++++++-------------------------- > -- > > 1 file changed, 57 insertions(+), 61 deletions(-) > > > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index ddc651b..6b781ac 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -275,14 +275,12 @@ static struct ovs_list dpdk_list > OVS_GUARDED_BY(dpdk_mutex) > > static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex) > > = OVS_MUTEX_INITIALIZER; > > > > -static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex) > > - = OVS_LIST_INITIALIZER(&dpdk_mp_list); > > - > > struct dpdk_mp { > > struct rte_mempool *mp; > > int mtu; > > int socket_id; > > - int refcount; > > + char if_name[IFNAMSIZ]; > > + unsigned mp_size; > > struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex); > > }; > > > > @@ -463,78 +461,82 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp, > > dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len); > > } > > > > +/* > > + * XXX Full DPDK memory pool name must be unique > > + * and cannot be longer than RTE_MEMPOOL_NAMESIZE > > + */ > > +static char * > > +dpdk_mp_name(struct dpdk_mp *dmp) > > +{ > > + uint32_t h = hash_string(dmp->if_name, 0); > > + char *mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof(char)); > > + int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u", > > + h, dmp->mtu, dmp->mp_size); > > + if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) { > > + return NULL; > > + } > > + return mp_name; > > +} > > + > > static struct dpdk_mp * > > -dpdk_mp_create(int socket_id, int mtu) > > +dpdk_mp_create(struct netdev_dpdk *dev, int mtu) > > { > > struct rte_pktmbuf_pool_private mbp_priv; > > struct dpdk_mp *dmp; > > - unsigned mp_size; > > char *mp_name; > > > > dmp = dpdk_rte_mzalloc(sizeof *dmp); > > if (!dmp) { > > return NULL; > > } > > - dmp->socket_id = socket_id; > > + dmp->socket_id = dev->requested_socket_id; > > dmp->mtu = mtu; > > - dmp->refcount = 1; > > + strncpy(dmp->if_name, dev->up.name, IFNAMSIZ); > > mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct > dp_packet); > > mbp_priv.mbuf_priv_size = sizeof(struct dp_packet) > > - - sizeof(struct rte_mbuf); > > - /* XXX: this is a really rough method of provisioning memory. > > - * It's impossible to determine what the exact memory requirements are > > - * when the number of ports and rxqs that utilize a particular mempool > can > > - * change dynamically at runtime. For now, use this rough heurisitic. > > + - sizeof(struct rte_mbuf); > > + /* > > + * XXX: rough estimation of memory required for port: > > + * <packets required to fill the device rxqs> > > + * + <packets that could be stuck on other ports txqs> > > + * + <packets in the pmd threads> > > + * + <additional memory for corner cases> > > */ > > - if (mtu >= ETHER_MTU) { > > - mp_size = MAX_NB_MBUF; > > - } else { > > - mp_size = MIN_NB_MBUF; > > - } > > > > - do { > > - mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id, > > - mp_size); > > + dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size > > + + dev->requested_n_txq * dev->requested_txq_size > > + + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST > > + + MIN_NB_MBUF; > > > > - dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu), > > + do { > > + mp_name = dpdk_mp_name(dmp); > > + dmp->mp = rte_mempool_create(mp_name, dmp->mp_size, MBUF_SIZE(mtu), > > MP_CACHE_SZ, > > sizeof(struct > rte_pktmbuf_pool_private), > > rte_pktmbuf_pool_init, &mbp_priv, > > ovs_rte_pktmbuf_init, NULL, > > - socket_id, 0); > > + dmp->socket_id, 0); > > fyi - either you or Hemant will need to rebase if his patch [1] or yours > gets gets merged as he has some tidy up in this area. > > [1] https://mail.openvswitch.org/pipermail/ovs-dev/2017-March/330186.html > [Antonio] Hemant's patch was upstreamed with commit id 401b70d632729555b555040e4cee13a5ffc5ed3d "netdev-dpdk: leverage the mempool offload support". So this patch needs to be rebased. > > if (dmp->mp) { > > VLOG_DBG("Allocated \"%s\" mempool with %u mbufs", > > - mp_name, mp_size); > > + mp_name, dmp->mp_size); > > } > > free(mp_name); > > if (dmp->mp) { > > return dmp; > > } > > - } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF); > > + } while (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF); > > > > rte_free(dmp); > > return NULL; > > } > > > > static struct dpdk_mp * > > -dpdk_mp_get(int socket_id, int mtu) > > +dpdk_mp_get(struct netdev_dpdk *dev, int mtu) > > { > > struct dpdk_mp *dmp; > > > > ovs_mutex_lock(&dpdk_mp_mutex); > > - LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) { > > - if (dmp->socket_id == socket_id && dmp->mtu == mtu) { > > The mtu is rounded from the requested_mtu, so it may be the same value > for different requested_mtus. This leads to trying to create a mempool > of the same name which will fail with EEXISTS that is not handled. You > either need to put in a check to prevent the same mempool being created > (similar to above) or handle the case where rte_mempool_create returns > EEXISTS. [Antonio] I think the only viable option is to restore the existing dpdk_mp_list mechanism above. Because when mempool create fails with EEXIST the returned value is NULL and it seems to me there's no way to retrieve - and then return - a pointer to the existing mempool. What do you think? > > > > - dmp->refcount++; > > - goto out; > > - } > > - } > > - > > - dmp = dpdk_mp_create(socket_id, mtu); > > - if (dmp) { > > - ovs_list_push_back(&dpdk_mp_list, &dmp->list_node); > > - } > > - > > -out: > > + dmp = dpdk_mp_create(dev, mtu); > > ovs_mutex_unlock(&dpdk_mp_mutex); > > > > return dmp; > > @@ -543,18 +545,18 @@ out: > > static void > > dpdk_mp_put(struct dpdk_mp *dmp) > > { > > + char *mp_name; > > + > > if (!dmp) { > > return; > > } > > > > ovs_mutex_lock(&dpdk_mp_mutex); > > - ovs_assert(dmp->refcount); > > - > > - if (!--dmp->refcount) { > > - ovs_list_remove(&dmp->list_node); > > - rte_mempool_free(dmp->mp); > > - rte_free(dmp); > > - } > > + mp_name = dpdk_mp_name(dmp); > > + VLOG_DBG("Releasing \"%s\" mempool", mp_name); > > + free(mp_name); > > + rte_mempool_free(dmp->mp); > > + rte_free(dmp); > > ovs_mutex_unlock(&dpdk_mp_mutex); > > } > > > > @@ -569,7 +571,7 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev) > > uint32_t buf_size = dpdk_buf_size(dev->requested_mtu); > > struct dpdk_mp *mp; > > > > - mp = dpdk_mp_get(dev->requested_socket_id, FRAME_LEN_TO_MTU(buf_size)); > > + mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size)); > > if (!mp) { > > VLOG_ERR("Insufficient memory to create memory pool for netdev " > > "%s, with MTU %d on socket %d\n", > > @@ -3131,12 +3133,9 @@ netdev_dpdk_reconfigure(struct netdev *netdev) > > > > rte_eth_dev_stop(dev->port_id); > > > > - if (dev->mtu != dev->requested_mtu > > - || dev->socket_id != dev->requested_socket_id) { > > - err = netdev_dpdk_mempool_configure(dev); > > - if (err) { > > - goto out; > > - } > > + err = netdev_dpdk_mempool_configure(dev); > > + if (err) { > > + goto out; > > } > > > > netdev->n_txq = dev->requested_n_txq; > > @@ -3174,14 +3173,11 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk > *dev) > > > > netdev_dpdk_remap_txqs(dev); > > > > - if (dev->requested_socket_id != dev->socket_id > > - || dev->requested_mtu != dev->mtu) { > > - err = netdev_dpdk_mempool_configure(dev); > > - if (err) { > > - return err; > > - } else { > > - netdev_change_seq_changed(&dev->up); > > - } > > + err = netdev_dpdk_mempool_configure(dev); > > + if (err) { > > + return err; > > + } else { > > + netdev_change_seq_changed(&dev->up); > > } > > > > if (netdev_dpdk_get_vid(dev) >= 0) { > > > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev