On 01/03/2016 09:12, "Kavanagh, Mark B" <mark.b.kavan...@intel.com> wrote:
>Hi Daniele, > >Some comments inline - thanks again for the patchset! > >Cheers, >Mark > >> >>This introduces in dpif-netdev and netdev-dpdk the first use for the >>newly introduce reconfigure netdev call. >> >>When a request to change the number of queues comes, netdev-dpdk will >>remember this and notify the upper layer via >>netdev_request_reconfigure(). >> >>The datapath, instead of periodically calling netdev_set_multiq(), can >>detect this and call reconfigure(). >> >>This mechanism can also be used to: >>* Automatically match the number of rxq with the one provided by qemu >> via the new_device callback. >>* Provide a way to change the MTU of dpdk devices at runtime. >> >>Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com> >>--- >> lib/dpif-netdev.c | 69 ++++++++++----------- >> lib/netdev-dpdk.c | 167 >>+++++++++++++++++++++++++++++--------------------- >> lib/netdev-provider.h | 19 ++---- >> lib/netdev.c | 30 ++------- >> lib/netdev.h | 3 +- >> 5 files changed, 139 insertions(+), 149 deletions(-) >> >>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >>index 2adba89..3421df7 100644 >>--- a/lib/dpif-netdev.c >>+++ b/lib/dpif-netdev.c >>@@ -256,8 +256,6 @@ struct dp_netdev_port { >> unsigned n_rxq; /* Number of elements in 'rxq' */ >> struct netdev_rxq **rxq; >> char *type; /* Port type as requested by user. */ >>- int latest_requested_n_rxq; /* Latest requested from netdev number >>- of rx queues. */ >> }; >> >> /* Contained by struct dp_netdev_flow's 'stats' member. */ >>@@ -1161,8 +1159,7 @@ do_add_port(struct dp_netdev *dp, const char >>*devname, const char >>*type, >> /* There can only be ovs_numa_get_n_cores() pmd threads, >> * so creates a txq for each, and one extra for the non >> * pmd threads. */ >>- error = netdev_set_multiq(netdev, n_cores + 1, >>- netdev_requested_n_rxq(netdev)); >>+ error = netdev_set_multiq(netdev, n_cores + 1); >> if (error && (error != EOPNOTSUPP)) { >> VLOG_ERR("%s, cannot set multiq", devname); >> goto out_close; >>@@ -1174,7 +1171,14 @@ do_add_port(struct dp_netdev *dp, const char >>*devname, const char >>*type, >> port->n_rxq = netdev_n_rxq(netdev); >> port->rxq = xmalloc(sizeof *port->rxq * port->n_rxq); >> port->type = xstrdup(type); >>- port->latest_requested_n_rxq = netdev_requested_n_rxq(netdev); >>+ >>+ if (netdev_is_reconf_required(netdev)) { >>+ error = netdev_reconfigure(netdev); >>+ if (error) { >>+ goto out_close; > >This causes a memory leak, as port->rxq has already been allocated. > >Perhaps if you initialize n_open_rxqs=0 before this block, you can then >use 'goto out_rx_close' instead. Oops, you're right. Thanks for noticing I've decided to move the reconfigure call before the rxq allocation in case netdev_reconfigure() changes the number of rxqs > >>+ } >>+ } >>+ >> n_open_rxqs = 0; >> for (i = 0; i < port->n_rxq; i++) { >> error = netdev_rxq_open(netdev, &port->rxq[i], i); >>@@ -2450,25 +2454,6 @@ dpif_netdev_operate(struct dpif *dpif, struct >>dpif_op **ops, size_t >>n_ops) >> } >> } >> >>-/* Returns true if the configuration for rx queues or cpu mask >>- * is changed. */ >>-static bool >>-pmd_n_rxq_changed(const struct dp_netdev *dp) >>-{ >>- struct dp_netdev_port *port; >>- >>- CMAP_FOR_EACH (port, node, &dp->ports) { >>- int requested_n_rxq = netdev_requested_n_rxq(port->netdev); >>- >>- if (netdev_is_pmd(port->netdev) >>- && port->latest_requested_n_rxq != requested_n_rxq) { >>- return true; >>- } >>- } >>- >>- return false; >>-} >>- >> static bool >> cmask_equals(const char *a, const char *b) >> { >>@@ -2601,14 +2586,12 @@ reconfigure_pmd_threads(struct dp_netdev *dp) >> dp_netdev_destroy_all_pmds(dp); >> >> CMAP_FOR_EACH (port, node, &dp->ports) { >>- struct netdev *netdev = port->netdev; >>- int requested_n_rxq = netdev_requested_n_rxq(netdev); >>- if (netdev_is_pmd(port->netdev) >>- && port->latest_requested_n_rxq != requested_n_rxq) { >>+ if (netdev_is_reconf_required(port->netdev)) { >> cmap_remove(&dp->ports, &port->node, >>hash_odp_port(port->port_no)); >> hmapx_add(&to_reconfigure, port); >> } >> } >>+ >> ovs_mutex_unlock(&dp->port_mutex); >> >> /* Waits for the other threads to see the ports removed from the >>cmap, >>@@ -2617,11 +2600,9 @@ reconfigure_pmd_threads(struct dp_netdev *dp) >> >> ovs_mutex_lock(&dp->port_mutex); >> HMAPX_FOR_EACH (node, &to_reconfigure) { >>- int requested_n_rxq = netdev_requested_n_rxq(port->netdev); >> int i, err; >> >> port = node->data; >>- requested_n_rxq = netdev_requested_n_rxq(port->netdev); >> /* Closes the existing 'rxq's. */ >> for (i = 0; i < port->n_rxq; i++) { >> netdev_rxq_close(port->rxq[i]); >>@@ -2630,17 +2611,14 @@ reconfigure_pmd_threads(struct dp_netdev *dp) >> port->n_rxq = 0; >> >> /* Sets the new rx queue config. */ > >Given that netdev_reconfigure can be used to set the config of more than >just the Rx queues, this comment should probably be made more general. You're right, I changed it to /* Allows the netdev to apply the pending configuration changes. */ > >>- err = netdev_set_multiq(port->netdev, ovs_numa_get_n_cores() + >>1, >>- requested_n_rxq); >>+ err = netdev_reconfigure(port->netdev); >> if (err && (err != EOPNOTSUPP)) { >>- VLOG_ERR("Failed to set dpdk interface %s rx_queue to: %u", >>- netdev_get_name(port->netdev), >>- requested_n_rxq); >>+ VLOG_ERR("Failed to set interface %s new configuration", >>+ netdev_get_name(port->netdev)); >> do_destroy_port(port); >> failed_config = true; >> continue; >> } >>- port->latest_requested_n_rxq = requested_n_rxq; >> /* If the netdev_reconfigure() above succeeds, reopens the >>'rxq's and >> * inserts the port back in the cmap, to allow transmitting >>packets. */ >> port->n_rxq = netdev_n_rxq(port->netdev); >>@@ -2671,6 +2649,21 @@ reconfigure_pmd_threads(struct dp_netdev *dp) >> dp_netdev_reset_pmd_threads(dp); >> } >> >>+/* Returns true if one of the netdevs in 'dp' requires a >>reconfiguration */ >>+static bool >>+ports_require_restart(const struct dp_netdev *dp) >>+{ >>+ struct dp_netdev_port *port; >>+ >>+ CMAP_FOR_EACH (port, node, &dp->ports) { >>+ if (netdev_is_reconf_required(port->netdev)) { >>+ return true; >>+ } >>+ } >>+ >>+ return false; >>+} >>+ >> /* Return true if needs to revalidate datapath flows. */ >> static bool >> dpif_netdev_run(struct dpif *dpif) >>@@ -2696,7 +2689,7 @@ dpif_netdev_run(struct dpif *dpif) >> ovs_mutex_unlock(&dp->non_pmd_mutex); >> >> if (!cmask_equals(dp->pmd_cmask, dp->requested_pmd_cmask) >>- || pmd_n_rxq_changed(dp)) { >>+ || ports_require_restart(dp)) { >> reconfigure_pmd_threads(dp); >> } >> >>@@ -2719,6 +2712,8 @@ dpif_netdev_wait(struct dpif *dpif) >> >> ovs_mutex_lock(&dp_netdev_mutex); >> CMAP_FOR_EACH (port, node, &dp->ports) { >>+ netdev_wait_reconf_required(port->netdev); >>+ >> if (!netdev_is_pmd(port->netdev)) { >> int i; >> >>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c >>index a48ca71..17b8d51 100644 >>--- a/lib/netdev-dpdk.c >>+++ b/lib/netdev-dpdk.c >>@@ -237,6 +237,12 @@ struct netdev_dpdk { >> >> /* In dpdk_list. */ >> struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); >>+ >>+ /* The following properties cannot be changed when a device is >>running, >>+ * so we remember the request and update them next time >>+ * netdev_dpdk*_reconfigure() is called */ >>+ int requested_n_txq; >>+ int requested_n_rxq; >> }; >> >> struct netdev_rxq_dpdk { >>@@ -614,7 +620,8 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned >>int port_no, >> >> netdev_->n_txq = NR_QUEUE; >> netdev_->n_rxq = NR_QUEUE; >>- netdev_->requested_n_rxq = NR_QUEUE; >>+ netdev->requested_n_rxq = NR_QUEUE; >>+ netdev->requested_n_txq = NR_QUEUE; >> netdev->real_n_txq = NR_QUEUE; >> >> if (type == DPDK_DEV_ETH) { >>@@ -796,7 +803,7 @@ netdev_dpdk_get_config(const struct netdev *netdev, >>struct smap *args) >> >> ovs_mutex_lock(&dev->mutex); >> >>- smap_add_format(args, "requested_rx_queues", "%d", >>netdev->requested_n_rxq); >>+ smap_add_format(args, "requested_rx_queues", "%d", >>dev->requested_n_rxq); >> smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq); >> smap_add_format(args, "requested_tx_queues", "%d", netdev->n_txq); >> smap_add_format(args, "configured_tx_queues", "%d", >>dev->real_n_txq); >>@@ -809,11 +816,13 @@ static int >> netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args) >> { >> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); >>+ int new_n_rxq = MAX(smap_get_int(args, "n_rxq", >>dev->requested_n_rxq), 1); > >Should we provide some parameter checking here? i.e. what happens if the >user provides a negative value? >Alternatively, the string may be terminated with junk; smap_get_int >relies on atoi to convert the user argument, so it may be better to use >strtoul to sanitize the input instead. Negative values shouldn't be a problem because we use MAX(..., 1) That's true the string may be terminated with junk, maybe we should fix smap_get_int? I'm not sure we should do more here. I've decided to leave it as it is for now > >> >> ovs_mutex_lock(&dev->mutex); >>- netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq", >>- >>netdev->requested_n_rxq), 1); >>- netdev_change_seq_changed(netdev); >>+ if (new_n_rxq != dev->requested_n_rxq) { >>+ dev->requested_n_rxq = new_n_rxq; >>+ netdev_request_reconfigure(netdev); >>+ } >> ovs_mutex_unlock(&dev->mutex); >> >> return 0; >>@@ -827,93 +836,44 @@ netdev_dpdk_get_numa_id(const struct netdev >>*netdev_) >> return netdev->socket_id; >> } >> >>-/* Sets the number of tx queues and rx queues for the dpdk interface. >>- * If the configuration fails, do not try restoring its old >>configuration >>- * and just returns the error. */ >>+/* Sets the number of tx queues for the dpdk interface. */ >> static int >>-netdev_dpdk_set_multiq(struct netdev *netdev_, unsigned int n_txq, >>- unsigned int n_rxq) >>+netdev_dpdk_set_multiq(struct netdev *netdev_, unsigned int n_txq) >> { >> struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); >> int err = 0; >>- int old_rxq, old_txq; >> >>- if (netdev->up.n_txq == n_txq && netdev->up.n_rxq == n_rxq) { >>- return err; >>- } >>- >>- ovs_mutex_lock(&dpdk_mutex); >> ovs_mutex_lock(&netdev->mutex); >> >>- rte_eth_dev_stop(netdev->port_id); >>- >>- old_txq = netdev->up.n_txq; >>- old_rxq = netdev->up.n_rxq; >>- netdev->up.n_txq = n_txq; >>- netdev->up.n_rxq = n_rxq; >>- >>- rte_free(netdev->tx_q); >>- err = dpdk_eth_dev_init(netdev); >>- netdev_dpdk_alloc_txq(netdev, netdev->real_n_txq); >>- if (err) { >>- /* If there has been an error, it means that the requested >>queues >>- * have not been created. Restore the old numbers. */ >>- netdev->up.n_txq = old_txq; >>- netdev->up.n_rxq = old_rxq; >>+ if (netdev->up.n_txq == n_txq) { >>+ goto out; >> } >> >>- netdev->txq_needs_locking = netdev->real_n_txq != netdev->up.n_txq; >>+ netdev->requested_n_txq = n_txq; >>+ netdev_request_reconfigure(netdev_); >> >>+out: >> ovs_mutex_unlock(&netdev->mutex); >>- ovs_mutex_unlock(&dpdk_mutex); >>- >> return err; >>+ >> } >> >> static int >>-netdev_dpdk_vhost_cuse_set_multiq(struct netdev *netdev_, unsigned int >>n_txq, >>- unsigned int n_rxq) >>+netdev_dpdk_vhost_set_multiq(struct netdev *netdev_, unsigned int n_txq) >> { >> struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); >> int err = 0; >> >>- if (netdev->up.n_txq == n_txq && netdev->up.n_rxq == n_rxq) { >>- return err; >>- } >>- >>- ovs_mutex_lock(&dpdk_mutex); >> ovs_mutex_lock(&netdev->mutex); >> >>- netdev->up.n_txq = n_txq; >>- netdev->real_n_txq = 1; >>- netdev->up.n_rxq = 1; >>- netdev->txq_needs_locking = netdev->real_n_txq != netdev->up.n_txq; >>- >>- ovs_mutex_unlock(&netdev->mutex); >>- ovs_mutex_unlock(&dpdk_mutex); >>- >>- return err; >>-} >>- >>-static int >>-netdev_dpdk_vhost_set_multiq(struct netdev *netdev_, unsigned int n_txq, >>- unsigned int n_rxq) >>-{ >>- struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); >>- int err = 0; >>- >>- if (netdev->up.n_txq == n_txq && netdev->up.n_rxq == n_rxq) { >>- return err; >>+ if (netdev->up.n_txq == n_txq) { >>+ goto out; >> } >> >>- ovs_mutex_lock(&dpdk_mutex); >>- ovs_mutex_lock(&netdev->mutex); >>- >>- netdev->up.n_txq = n_txq; >>- netdev->up.n_rxq = n_rxq; >>+ netdev->requested_n_txq = n_txq; > >Do we need to request a reconfigure here? You're right, thanks for noticing! That change makes it equal to netdev_dpdk_set_multiq(), so I've merged the two functions > >> >>+out: >> ovs_mutex_unlock(&netdev->mutex); >>- ovs_mutex_unlock(&dpdk_mutex); >> >> return err; >> } >>@@ -2239,8 +2199,69 @@ unlock_dpdk: >> return err; >> } >> >>-#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, MULTIQ, >>SEND, \ >>- GET_CARRIER, GET_STATS, GET_FEATURES, GET_STATUS, RXQ_RECV) >> \ >>+static int >>+netdev_dpdk_reconfigure(struct netdev *netdev_) >>+{ >>+ struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); >>+ int err = 0; >>+ >>+ ovs_mutex_lock(&dpdk_mutex); >>+ ovs_mutex_lock(&netdev->mutex); >>+ rte_eth_dev_stop(netdev->port_id); >>+ >>+ netdev_->n_txq = netdev->requested_n_txq; >>+ netdev_->n_rxq = netdev->requested_n_rxq; >>+ >>+ rte_free(netdev->tx_q); >>+ err = dpdk_eth_dev_init(netdev); >>+ netdev_dpdk_alloc_txq(netdev, netdev->real_n_txq); >>+ >>+ netdev->txq_needs_locking = netdev->real_n_txq != netdev->up.n_txq; >>+ >>+ ovs_mutex_unlock(&netdev->mutex); >>+ ovs_mutex_unlock(&dpdk_mutex); >>+ >>+ return err; >>+} >>+ >>+static int >>+netdev_dpdk_vhost_user_reconfigure(struct netdev *netdev_) >>+{ >>+ struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); >>+ >>+ ovs_mutex_lock(&dpdk_mutex); >>+ ovs_mutex_lock(&netdev->mutex); >>+ >>+ netdev->up.n_txq = netdev->requested_n_txq; >>+ netdev->up.n_rxq = netdev->requested_n_rxq; >>+ >>+ ovs_mutex_unlock(&netdev->mutex); >>+ ovs_mutex_unlock(&dpdk_mutex); >>+ >>+ return 0; >>+} >>+ >>+static int >>+netdev_dpdk_vhost_cuse_reconfigure(struct netdev *netdev_) >>+{ >>+ struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); >>+ >>+ ovs_mutex_lock(&dpdk_mutex); >>+ ovs_mutex_lock(&netdev->mutex); >>+ >>+ netdev->up.n_txq = netdev->requested_n_txq; >>+ netdev->real_n_txq = 1; >>+ netdev->up.n_rxq = 1; >>+ netdev->txq_needs_locking = netdev->real_n_txq != netdev->up.n_txq; >>+ >>+ ovs_mutex_unlock(&netdev->mutex); >>+ ovs_mutex_unlock(&dpdk_mutex); >>+ >>+ return 0; >>+} >>+ >>+#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, MULTIQ, >>SEND, \ >>+ GET_CARRIER, GET_STATS, GET_FEATURES, GET_STATUS, RECONFIGURE, >>RXQ_RECV) \ >> { \ >> NAME, \ >> INIT, /* init */ \ >>@@ -2298,7 +2319,7 @@ unlock_dpdk: >> NULL, /* arp_lookup */ \ >> \ >> netdev_dpdk_update_flags, \ >>- NULL, /* reconfigure */ \ >>+ RECONFIGURE, \ >> \ >> netdev_dpdk_rxq_alloc, \ >> netdev_dpdk_rxq_construct, \ >>@@ -2419,6 +2440,7 @@ static const struct netdev_class dpdk_class = >> netdev_dpdk_get_stats, >> netdev_dpdk_get_features, >> netdev_dpdk_get_status, >>+ netdev_dpdk_reconfigure, >> netdev_dpdk_rxq_recv); >> >> static const struct netdev_class dpdk_ring_class = >>@@ -2433,6 +2455,7 @@ static const struct netdev_class dpdk_ring_class = >> netdev_dpdk_get_stats, >> netdev_dpdk_get_features, >> netdev_dpdk_get_status, >>+ netdev_dpdk_reconfigure, >> netdev_dpdk_rxq_recv); >> >> static const struct netdev_class OVS_UNUSED dpdk_vhost_cuse_class = >>@@ -2441,12 +2464,13 @@ static const struct netdev_class OVS_UNUSED >>dpdk_vhost_cuse_class = >> dpdk_vhost_cuse_class_init, >> netdev_dpdk_vhost_cuse_construct, >> netdev_dpdk_vhost_destruct, >>- netdev_dpdk_vhost_cuse_set_multiq, >>+ netdev_dpdk_vhost_set_multiq, >> netdev_dpdk_vhost_send, >> netdev_dpdk_vhost_get_carrier, >> netdev_dpdk_vhost_get_stats, >> NULL, >> NULL, >>+ netdev_dpdk_vhost_cuse_reconfigure, >> netdev_dpdk_vhost_rxq_recv); >> >> static const struct netdev_class OVS_UNUSED dpdk_vhost_user_class = >>@@ -2461,6 +2485,7 @@ static const struct netdev_class OVS_UNUSED >>dpdk_vhost_user_class = >> netdev_dpdk_vhost_get_stats, >> NULL, >> NULL, >>+ netdev_dpdk_vhost_user_reconfigure, >> netdev_dpdk_vhost_rxq_recv); >> >> void >>diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h >>index 9646cca..29b3949 100644 >>--- a/lib/netdev-provider.h >>+++ b/lib/netdev-provider.h >>@@ -67,8 +67,6 @@ struct netdev { >> * modify them. */ >> int n_txq; >> int n_rxq; >>- /* Number of rx queues requested by user. */ >>- int requested_n_rxq; >> int ref_cnt; /* Times this devices was >>opened. */ >> struct shash_node *node; /* Pointer to element in global >>map. */ >> struct ovs_list saved_flags_list; /* Contains "struct >>netdev_saved_flags". */ >>@@ -295,13 +293,8 @@ struct netdev_class { >> * such info, returns NETDEV_NUMA_UNSPEC. */ >> int (*get_numa_id)(const struct netdev *netdev); >> >>- /* Configures the number of tx queues and rx queues of 'netdev'. >>- * Return 0 if successful, otherwise a positive errno value. >>- * >>- * 'n_rxq' specifies the maximum number of receive queues to create. >>- * The netdev provider might choose to create less (e.g. if the >>hardware >>- * supports only a smaller number). The actual number of queues >>created >>- * is stored in the 'netdev->n_rxq' field. >>+ /* Configures the number of tx queues of 'netdev'. Returns 0 if >>successful, >>+ * otherwise a positive errno value. >> * >> * 'n_txq' specifies the exact number of transmission queues to >>create. >> * The caller will call netdev_send() concurrently from 'n_txq' >>different >>@@ -309,12 +302,8 @@ struct netdev_class { >> * making sure that these concurrent calls do not create a race >>condition >> * by using multiple hw queues or locking. >> * >>- * On error, the tx queue and rx queue configuration is >>indeterminant. >>- * Caller should make decision on whether to restore the previous or >>- * the default configuration. Also, caller must make sure there is >>no >>- * other thread accessing the queues at the same time. */ >>- int (*set_multiq)(struct netdev *netdev, unsigned int n_txq, >>- unsigned int n_rxq); >>+ * On error, the tx queue and rx queue configuration is unchanged. >>*/ > >It's implicit that the Rx queue config is unchanged, since the set_multiq >function now only affects Tx queues, so there's probably no need to state >it explicitly. You're right, I removed the reference to the rx queues. Thank you for your review, Mark. I've incorporated all your comments. I hope to get more reviews, otherwise I'll post a v2 soon _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev