Fix issues on reconfiguration of pre-existing mempools. This patch avoids to call dpdk_mp_put() - and erroneously release the mempool - when it already exists. Create mempool names by considering also the NUMA socket number. So a name reflects what socket the mempool is allocated on. This change is needed for the NUMA-awareness feature.
CC: Mark B Kavanagh <[email protected]> CC: Kevin Traynor <[email protected]> CC: Aaron Conole <[email protected]> CC: Darrell Ball <[email protected]> Reported-by: Ciara Loftus <[email protected]> Tested-by: Ciara Loftus <[email protected]> Reported-by: RĂ³bert Mulik <[email protected]> Fixes: d555d9bded5f ("netdev-dpdk: Create separate memory pool for each port.") Signed-off-by: Antonio Fischetti <[email protected]> --- Test on releasing pre-existing mempools ======================================= I've tested this patch by - changing at run-time the number of Rx queues: ovs-vsctl set Interface dpdk0 type=dpdk options:n_rxq=4 - reducing the MTU of the dpdk ports of 1 byte to force the configuration of an existing mempool: ovs-vsctl set Interface dpdk0 mtu_request=1499 This issue was observed in a PVP test topology with dpdkvhostuserclient ports. It can happen also with dpdk type ports, eg by reducing the MTU of 1 byte. To replicate the bug scenario in the PVP case it's sufficient to set 1 dpdkvhostuserclient port, and just boot the VM. Below some more details on my own test setup. PVP test setup -------------- CLIENT_SOCK_DIR=/tmp SOCK0=dpdkvhostuser0 SOCK1=dpdkvhostuser1 1 PMD Add 2 dpdk ports, n_rxq=1 Add 2 vhu ports both of type dpdkvhostuserclient and specify vhost-server-path ovs-vsctl set Interface dpdkvhostuser0 options:vhost-server-path="$CLIENT_SOCK_DIR/$SOCK0" ovs-vsctl set Interface dpdkvhostuser1 options:vhost-server-path="$CLIENT_SOCK_DIR/$SOCK1" Set port-based rules: dpdk0 <--> vhu0 and dpdk1 <--> vhu1 add-flow br0 in_port=1,action=output:3 add-flow br0 in_port=3,action=output:1 add-flow br0 in_port=4,action=output:2 add-flow br0 in_port=2,action=output:4 Launch QEMU ----------- As OvS vhu ports are acting as clients, we must specify 'server' in the next command. VM_IMAGE=<path/to/your/vm/image> sudo -E taskset 0x3F00 $QEMU_DIR/x86_64-softmmu/qemu-system-x86_64 -name us-vhost-vm1 -cpu host -enable-kvm -m 4096M -object memory-backend-file,id=mem,size=4096M,mem-path=/dev/hugepages,share=on -numa node,memdev=mem -mem-prealloc -smp 4 -drive file=$VM_IMAGE -chardev socket,id=char0,path=$CLIENT_SOCK_DIR/$SOCK0,server -netdev type=vhost-user,id=mynet1,chardev=char0,vhostforce -device virtio-net-pci,mac=00:00:00:00:00:01,netdev=mynet1,mrg_rxbuf=off -chardev socket,id=char1,path=$CLIENT_SOCK_DIR/$SOCK1,server -netdev type=vhost-user,id=mynet2,chardev=char1,vhostforce -device virtio-net-pci,mac=00:00:00:00:00:02,netdev=mynet2,mrg_rxbuf=off --nographic Expected behavior ----------------- With this fix OvS shouldn't crash. Test NUMA-Awareness feature =========================== Mempool names now contains the requested socket id and become like: "ovs_4adb057e_1_2030_20512". Tested with DPDK 17.05.2 (from dpdk-stable branch). NUMA-awareness feature enabled (DPDK/config/common_base). Created 1 single dpdkvhostuser port type. OvS pmd-cpu-mask=FF00003 # enable cores on both numa nodes QEMU core mask = 0xFC000 # cores for qemu on numa node 1 only Before launching the VM: ------------------------ ovs-appctl dpif-netdev/pmd-rxq-show shows core #1 is serving the vhu port. pmd thread numa_id 0 core_id 1: isolated : false port: dpdkvhostuser0 queue-id: 0 After launching the VM: ----------------------- the vhu port is now managed by core #27 pmd thread numa_id 1 core_id 27: isolated : false port: dpdkvhostuser0 queue-id: 0 and the log shows a new mempool is allocated on NUMA node 1, while the previous one is deleted: 2017-10-06T14:04:55Z|00105|netdev_dpdk|DBG|Allocated "ovs_4adb057e_1_2030_20512" mempool with 20512 mbufs 2017-10-06T14:04:55Z|00106|netdev_dpdk|DBG|Releasing "ovs_4adb057e_0_2030_20512" mempool --- lib/netdev-dpdk.c | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index c60f46f..7f2d7ed 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -499,8 +499,8 @@ dpdk_mp_name(struct dpdk_mp *dmp) { uint32_t h = hash_string(dmp->if_name, 0); char *mp_name = xcalloc(RTE_MEMPOOL_NAMESIZE, sizeof *mp_name); - int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%u", - h, dmp->mtu, dmp->mp_size); + int ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_%x_%d_%d_%u", + h, dmp->socket_id, dmp->mtu, dmp->mp_size); if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) { return NULL; } @@ -508,12 +508,13 @@ dpdk_mp_name(struct dpdk_mp *dmp) } static struct dpdk_mp * -dpdk_mp_create(struct netdev_dpdk *dev, int mtu) +dpdk_mp_create(struct netdev_dpdk *dev, int mtu, bool *mp_exists) { struct dpdk_mp *dmp = dpdk_rte_mzalloc(sizeof *dmp); if (!dmp) { return NULL; } + *mp_exists = false; dmp->socket_id = dev->requested_socket_id; dmp->mtu = mtu; ovs_strzcpy(dmp->if_name, dev->up.name, IFNAMSIZ); @@ -530,15 +531,14 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu) + MIN(RTE_MAX_LCORE, dev->requested_n_rxq) * NETDEV_MAX_BURST + MIN_NB_MBUF; - bool mp_exists = false; - do { char *mp_name = dpdk_mp_name(dmp); VLOG_DBG("Requesting a mempool of %u mbufs for netdev %s " - "with %d Rx and %d Tx queues.", + "with %d Rx and %d Tx queues, socket id:%d.", dmp->mp_size, dev->up.name, - dev->requested_n_rxq, dev->requested_n_txq); + dev->requested_n_rxq, dev->requested_n_txq, + dev->requested_socket_id); dmp->mp = rte_pktmbuf_pool_create(mp_name, dmp->mp_size, MP_CACHE_SZ, @@ -559,7 +559,7 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu) /* As the mempool create returned EEXIST we can expect the * lookup has returned a valid pointer. If for some reason * that's not the case we keep track of it. */ - mp_exists = true; + *mp_exists = true; } else { VLOG_ERR("Failed mempool \"%s\" create request of %u mbufs", mp_name, dmp->mp_size); @@ -573,7 +573,7 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu) rte_mempool_obj_iter(dmp->mp, ovs_rte_pktmbuf_init, NULL); return dmp; } - } while (!mp_exists && + } while (!(*mp_exists) && (rte_errno == ENOMEM && (dmp->mp_size /= 2) >= MIN_NB_MBUF)); rte_free(dmp); @@ -581,12 +581,12 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu) } static struct dpdk_mp * -dpdk_mp_get(struct netdev_dpdk *dev, int mtu) +dpdk_mp_get(struct netdev_dpdk *dev, int mtu, bool *mp_exists) { struct dpdk_mp *dmp; ovs_mutex_lock(&dpdk_mp_mutex); - dmp = dpdk_mp_create(dev, mtu); + dmp = dpdk_mp_create(dev, mtu, mp_exists); ovs_mutex_unlock(&dpdk_mp_mutex); return dmp; @@ -620,14 +620,22 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev) { uint32_t buf_size = dpdk_buf_size(dev->requested_mtu); struct dpdk_mp *mp; + bool mp_exists; - mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size)); + mp = dpdk_mp_get(dev, FRAME_LEN_TO_MTU(buf_size), &mp_exists); if (!mp) { VLOG_ERR("Failed to create memory pool for netdev " "%s, with MTU %d on socket %d: %s\n", dev->up.name, dev->requested_mtu, dev->requested_socket_id, rte_strerror(rte_errno)); return rte_errno; + } else if (mp_exists) { + /* If a new MTU was requested and its rounded value equals the one + * that is currently used, then the existing mempool is returned. + * Update dev with the new values. */ + dev->mtu = dev->requested_mtu; + dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu); + return EEXIST; } else { dpdk_mp_put(dev->dpdk_mp); dev->dpdk_mp = mp; @@ -3207,7 +3215,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev) rte_eth_dev_stop(dev->port_id); err = netdev_dpdk_mempool_configure(dev); - if (err) { + if (err && err != EEXIST) { goto out; } @@ -3247,12 +3255,12 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev) netdev_dpdk_remap_txqs(dev); err = netdev_dpdk_mempool_configure(dev); - if (err) { - return err; - } else { + if (!err) { + /* A new mempool was created. */ netdev_change_seq_changed(&dev->up); + } else if (err != EEXIST){ + return err; } - if (netdev_dpdk_get_vid(dev) >= 0) { if (dev->vhost_reconfigured == false) { dev->vhost_reconfigured = true; -- 2.4.11 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
