"Fischetti, Antonio" <[email protected]> writes:

> Hi Kevin, pls see comments inline.
> I'm going to rebase and rework this patch on behalf of Robert.  
>
> Thanks,
> Antonio
>
>> -----Original Message-----
>> From: [email protected] 
>> [mailto:[email protected]]
>> On Behalf Of Kevin Traynor
>> Sent: Tuesday, April 11, 2017 9:37 AM
>> To: Wojciechowicz, RobertX <[email protected]>;
>> [email protected]
>> Subject: Re: [ovs-dev] [PATCH v3] netdev-dpdk: Create separate memory pool 
>> for
>> each port
>> 
>> On 04/07/2017 09:20 AM, Robert Wojciechowicz wrote:
>> > Since it's possible to delete memory pool in DPDK
>> > we can try to estimate better required memory size
>> > when port is reconfigured, e.g. with different number
>> > of rx queues.
>> >
>> > Signed-off-by: Robert Wojciechowicz <[email protected]>
>> > Acked-by: Ian Stokes <[email protected]>
>> > ---
>> > v2:
>> > - removing mempool reference counter
>> > - making sure mempool name isn't longer than RTE_MEMPOOL_NAMESIZE
>> >
>> > v3:
>> > - adding memory for corner cases
>> > ---
>> >  lib/netdev-dpdk.c | 118 
>> > ++++++++++++++++++++++++++--------------------------
>> --
>> >  1 file changed, 57 insertions(+), 61 deletions(-)
>> >
>> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> > index ddc651b..6b781ac 100644
>> > --- a/lib/netdev-dpdk.c
>> > +++ b/lib/netdev-dpdk.c
>> > @@ -275,14 +275,12 @@ static struct ovs_list dpdk_list
>> OVS_GUARDED_BY(dpdk_mutex)
>> >  static struct ovs_mutex dpdk_mp_mutex OVS_ACQ_AFTER(dpdk_mutex)
>> >      = OVS_MUTEX_INITIALIZER;
>> >
>> > -static struct ovs_list dpdk_mp_list OVS_GUARDED_BY(dpdk_mp_mutex)
>> > -    = OVS_LIST_INITIALIZER(&dpdk_mp_list);
>> > -
>> >  struct dpdk_mp {
>> >      struct rte_mempool *mp;
>> >      int mtu;
>> >      int socket_id;
>> > -    int refcount;
>> > +    char if_name[IFNAMSIZ];
>> > +    unsigned mp_size;
>> >      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
>> >  };
>> >
>> > @@ -463,78 +461,82 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp,
>> >      dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len);
>> >  }
>> >
>> > +/*
>> > + * XXX Full DPDK memory pool name must be unique
>> > + * and cannot be longer than RTE_MEMPOOL_NAMESIZE
>> > + */
>> > +static char *
>> > +dpdk_mp_name(struct dpdk_mp *dmp)
>> > +{
>> > +    uint32_t h = hash_string(dmp->if_name, 0);
>> > +    char *mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof(char));
>> > +    int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u",
>> > +                       h, dmp->mtu, dmp->mp_size);
>> > +    if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
>> > +        return NULL;
>> > +    }
>> > +    return mp_name;
>> > +}
>> > +
>> >  static struct dpdk_mp *
>> > -dpdk_mp_create(int socket_id, int mtu)
>> > +dpdk_mp_create(struct netdev_dpdk *dev, int mtu)
>> >  {
>> >      struct rte_pktmbuf_pool_private mbp_priv;
>> >      struct dpdk_mp *dmp;
>> > -    unsigned mp_size;
>> >      char *mp_name;
>> >
>> >      dmp = dpdk_rte_mzalloc(sizeof *dmp);
>> >      if (!dmp) {
>> >          return NULL;
>> >      }
>> > -    dmp->socket_id = socket_id;
>> > +    dmp->socket_id = dev->requested_socket_id;
>> >      dmp->mtu = mtu;
>> > -    dmp->refcount = 1;
>> > +    strncpy(dmp->if_name, dev->up.name, IFNAMSIZ);
>> >      mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct
>> dp_packet);
>> >      mbp_priv.mbuf_priv_size = sizeof(struct dp_packet)
>> > -                              - sizeof(struct rte_mbuf);
>> > -    /* XXX: this is a really rough method of provisioning memory.
>> > -     * It's impossible to determine what the exact memory requirements are
>> > -     * when the number of ports and rxqs that utilize a particular mempool
>> can
>> > -     * change dynamically at runtime. For now, use this rough heurisitic.
>> > +        - sizeof(struct rte_mbuf);
>> > +    /*
>> > +     * XXX: rough estimation of memory required for port:
>> > +     * <packets required to fill the device rxqs>
>> > +     * + <packets that could be stuck on other ports txqs>
>> > +     * + <packets in the pmd threads>
>> > +     * + <additional memory for corner cases>
>> >       */
>> > -    if (mtu >= ETHER_MTU) {
>> > -        mp_size = MAX_NB_MBUF;
>> > -    } else {
>> > -        mp_size = MIN_NB_MBUF;
>> > -    }
>> >
>> > -    do {
>> > -        mp_name = xasprintf("ovs_mp_%d_%d_%u", dmp->mtu, dmp->socket_id,
>> > -                            mp_size);
>> > +    dmp->mp_size = dev->requested_n_rxq * dev->requested_rxq_size
>> > +        + dev->requested_n_txq * dev->requested_txq_size
>> > +        + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST
>> > +        + MIN_NB_MBUF;
>> >
>> > -        dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
>> > +    do {
>> > +        mp_name = dpdk_mp_name(dmp);
>> > +        dmp->mp = rte_mempool_create(mp_name, dmp->mp_size, 
>> > MBUF_SIZE(mtu),
>> >                                       MP_CACHE_SZ,
>> >                                       sizeof(struct
>> rte_pktmbuf_pool_private),
>> >                                       rte_pktmbuf_pool_init, &mbp_priv,
>> >                                       ovs_rte_pktmbuf_init, NULL,
>> > -                                     socket_id, 0);
>> > +                                     dmp->socket_id, 0);
>> 
>> fyi - either you or Hemant will need to rebase if his patch [1] or yours
>> gets gets merged as he has some tidy up in this area.
>> 
>> [1] https://mail.openvswitch.org/pipermail/ovs-dev/2017-March/330186.html
>> 
>
> [Antonio] 
> Hemant's patch was upstreamed with commit id
> 401b70d632729555b555040e4cee13a5ffc5ed3d
> "netdev-dpdk: leverage the mempool offload support". So this patch needs
> to be rebased.
>
>
>> >          if (dmp->mp) {
>> >              VLOG_DBG("Allocated \"%s\" mempool with %u mbufs",
>> > -                     mp_name, mp_size);
>> > +                     mp_name, dmp->mp_size);
>> >          }
>> >          free(mp_name);
>> >          if (dmp->mp) {
>> >              return dmp;
>> >          }
>> > -    } while (rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
>> > +    } while (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF);
>> >
>> >      rte_free(dmp);
>> >      return NULL;
>> >  }
>> >
>> >  static struct dpdk_mp *
>> > -dpdk_mp_get(int socket_id, int mtu)
>> > +dpdk_mp_get(struct netdev_dpdk *dev, int mtu)
>> >  {
>> >      struct dpdk_mp *dmp;
>> >
>> >      ovs_mutex_lock(&dpdk_mp_mutex);
>> > -    LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
>> > -        if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
>> 
>> The mtu is rounded from the requested_mtu, so it may be the same value
>> for different requested_mtus. This leads to trying to create a mempool
>> of the same name which will fail with EEXISTS that is not handled. You
>> either need to put in a check to prevent the same mempool being created
>> (similar to above) or handle the case where rte_mempool_create returns
>> EEXISTS.
>
> [Antonio]
> I think the only viable option is to restore the existing 
> dpdk_mp_list mechanism above.
> Because when mempool create fails with EEXIST the returned 
> value is NULL and it seems to me there's no way to retrieve
> - and then return - a pointer to the existing mempool.
> What do you think?

These two APIs can allow retrieving the mempool based on mempool
characteristics, no?

  struct rte_mempool *rte_mempool_lookup(const char *name);

  void rte_mempool_walk(void (*func)(struct rte_mempool *, void *arg),
                        void *arg);

I think in the case of EEXIST, one of these would be sufficient to
locate the appropriate mempool.  Maybe I missed something.

>
>> 
>> 
>> > -            dmp->refcount++;
>> > -            goto out;
>> > -        }
>> > -    }
>> > -
>> > -    dmp = dpdk_mp_create(socket_id, mtu);
>> > -    if (dmp) {
>> > -        ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
>> > -    }
>> > -
>> > -out:
>> > +    dmp = dpdk_mp_create(dev, mtu);
>> >      ovs_mutex_unlock(&dpdk_mp_mutex);
>> >
>> >      return dmp;
>> > @@ -543,18 +545,18 @@ out:
>> >  static void
>> >  dpdk_mp_put(struct dpdk_mp *dmp)
>> >  {
>> > +    char *mp_name;
>> > +
>> >      if (!dmp) {
>> >          return;
>> >      }
>> >
>> >      ovs_mutex_lock(&dpdk_mp_mutex);
>> > -    ovs_assert(dmp->refcount);
>> > -
>> > -    if (!--dmp->refcount) {
>> > -        ovs_list_remove(&dmp->list_node);
>> > -        rte_mempool_free(dmp->mp);
>> > -        rte_free(dmp);
>> > -    }
>> > +    mp_name = dpdk_mp_name(dmp);
>> > +    VLOG_DBG("Releasing \"%s\" mempool", mp_name);
>> > +    free(mp_name);
>> > +    rte_mempool_free(dmp->mp);
>> > +    rte_free(dmp);
>> >      ovs_mutex_unlock(&dpdk_mp_mutex);
>> >  }
>> >
>> > @@ -569,7 +571,7 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
>> >      uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
>> >      struct dpdk_mp *mp;
>> >
>> > -    mp = dpdk_mp_get(dev->requested_socket_id, 
>> > FRAME_LEN_TO_MTU(buf_size));
>> > +    mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size));
>> >      if (!mp) {
>> >          VLOG_ERR("Insufficient memory to create memory pool for netdev "
>> >                   "%s, with MTU %d on socket %d\n",
>> > @@ -3131,12 +3133,9 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
>> >
>> >      rte_eth_dev_stop(dev->port_id);
>> >
>> > -    if (dev->mtu != dev->requested_mtu
>> > -        || dev->socket_id != dev->requested_socket_id) {
>> > -        err = netdev_dpdk_mempool_configure(dev);
>> > -        if (err) {
>> > -            goto out;
>> > -        }
>> > +    err = netdev_dpdk_mempool_configure(dev);
>> > +    if (err) {
>> > +        goto out;
>> >      }
>> >
>> >      netdev->n_txq = dev->requested_n_txq;
>> > @@ -3174,14 +3173,11 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk
>> *dev)
>> >
>> >      netdev_dpdk_remap_txqs(dev);
>> >
>> > -    if (dev->requested_socket_id != dev->socket_id
>> > -        || dev->requested_mtu != dev->mtu) {
>> > -        err = netdev_dpdk_mempool_configure(dev);
>> > -        if (err) {
>> > -            return err;
>> > -        } else {
>> > -            netdev_change_seq_changed(&dev->up);
>> > -        }
>> > +    err = netdev_dpdk_mempool_configure(dev);
>> > +    if (err) {
>> > +        return err;
>> > +    } else {
>> > +        netdev_change_seq_changed(&dev->up);
>> >      }
>> >
>> >      if (netdev_dpdk_get_vid(dev) >= 0) {
>> >
>> 
>> _______________________________________________
>> dev mailing list
>> [email protected]
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to