> -----Original Message-----
> From: Darrell Ball [mailto:[email protected]]
> Sent: Tuesday, July 11, 2017 6:49 PM
> To: O Mahony, Billy <[email protected]>; [email protected]
> Subject: Re: [ovs-dev] [RFC v2 2/4] netdev-dpdk: Apply ingress_sched config
> to dpdk phy ports
>
>
>
> On 7/11/17, 9:58 AM, "[email protected] on behalf of Billy
> O'Mahony" <[email protected] on behalf of
> [email protected]> wrote:
>
> Ingress scheduling configuration is given effect by way of Flow Director
> filters. A small subset of the possible ingress scheduling possible is
> implemented in this patch.
>
> Signed-off-by: Billy O'Mahony <[email protected]>
> ---
> include/openvswitch/ofp-parse.h | 3 ++
> lib/dpif-netdev.c | 1 +
> lib/netdev-dpdk.c | 117
> ++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 121 insertions(+)
>
> diff --git a/include/openvswitch/ofp-parse.h b/include/openvswitch/ofp-
> parse.h
> index fc5784e..08d6086 100644
> --- a/include/openvswitch/ofp-parse.h
> +++ b/include/openvswitch/ofp-parse.h
> @@ -37,6 +37,9 @@ struct ofputil_table_mod;
> struct ofputil_bundle_msg;
> struct ofputil_tlv_table_mod;
> struct simap;
> +struct tun_table;
> +struct flow_wildcards;
> +struct ofputil_port_map;
> enum ofputil_protocol;
>
> char *parse_ofp_str(struct ofputil_flow_mod *, int command, const char
> *str_,
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 2f224db..66712c7 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -44,6 +44,7 @@
> #include "dp-packet.h"
> #include "dpif.h"
> #include "dpif-provider.h"
> +#include "netdev-provider.h"
> #include "dummy.h"
> #include "fat-rwlock.h"
> #include "flow.h"
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index d14c381..93556e7 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -33,6 +33,8 @@
> #include <rte_meter.h>
> #include <rte_virtio_net.h>
>
> +#include <openvswitch/ofp-parse.h>
> +#include <openvswitch/ofp-util.h>
> #include "dirs.h"
> #include "dp-packet.h"
> #include "dpdk.h"
> @@ -168,6 +170,10 @@ static const struct rte_eth_conf port_conf = {
> .txmode = {
> .mq_mode = ETH_MQ_TX_NONE,
> },
> + .fdir_conf = {
> + .mode = RTE_FDIR_MODE_PERFECT,
> + },
> +
> };
>
> enum { DPDK_RING_SIZE = 256 };
> @@ -652,6 +658,15 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk
> *dev, int n_rxq, int n_txq)
> int i;
> struct rte_eth_conf conf = port_conf;
>
> + /* Ingress scheduling requires ETH_MQ_RX_NONE so limit it to when
> exactly
> + * two rxqs are defined. Otherwise MQ will not work as expected. */
> + if (dev->ingress_sched_str && n_rxq == 2) {
> + conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
> + }
> + else {
> + conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
> + }
> +
> if (dev->mtu > ETHER_MTU) {
> conf.rxmode.jumbo_frame = 1;
> conf.rxmode.max_rx_pkt_len = dev->max_packet_len;
> @@ -752,6 +767,106 @@ dpdk_eth_flow_ctrl_setup(struct netdev_dpdk
> *dev) OVS_REQUIRES(dev->mutex)
> }
> }
>
> +static void
> +dpdk_apply_ingress_scheduling(struct netdev_dpdk *dev, int n_rxq)
> +{
> + if (!dev->ingress_sched_str) {
> + return;
> + }
> +
> + if (n_rxq != 2) {
> + VLOG_ERR("Interface %s: Ingress scheduling config ignored; " \
> + "Requires n_rxq==2.", dev->up.name);
> + }
> +
> + int priority_q_id = n_rxq-1;
> + char *key, *val, *str, *iter;
> +
> + ovs_be32 ip_src, ip_dst;
> + ip_src = ip_dst = 0;
> +
> + uint16_t eth_type, port_src, port_dst;
> + eth_type = port_src = port_dst = 0;
> + uint8_t ip_proto = 0;
> +
> + char *mallocd_str; /* str_to_x returns malloc'd str we'll need to
> free */
> + /* Parse the configuration into local vars */
> + iter = str = xstrdup(dev->ingress_sched_str);
> + while (ofputil_parse_key_value (&iter, &key, &val)) {
> + if (strcmp(key, "nw_src") == 0 || strcmp(key, "ip_src") == 0) {
> + mallocd_str = str_to_ip(val, &ip_src);
> + } else if (strcmp(key, "nw_dst") == 0 || strcmp(key, "ip_dst")
> == 0) {
> + mallocd_str = str_to_ip(val, &ip_dst);
> + } else if (strcmp(key, "dl_type") == 0 ||
> + strcmp(key, "eth_type") == 0) {
> + mallocd_str = str_to_u16(val, "eth_type/dl_type", ð_type);
> + } else if (strcmp(key, "tcp_src") == 0 ||
> + strcmp(key, "tp_src") == 0 ||
> + strcmp(key, "udp_src") == 0) {
> + mallocd_str = str_to_u16(val, "tcp/udp_src", &port_src);
> + } else if (strcmp(key, "tcp_dst") == 0 ||
> + strcmp(key, "tp_dst") == 0 ||
> + strcmp(key, "udp_dst") == 0) {
> + mallocd_str = str_to_u16(val, "tcp/udp_dst", &port_dst);
> + } else if (strcmp(key, "ip") == 0) {
> + eth_type = 0x0800;
> + } else if (strcmp(key, "udp") == 0) {
> + eth_type = 0x0800;
> + ip_proto = 17;
> + } else if (strcmp(key, "tcp") == 0) {
> + eth_type = 0x0800;
> + ip_proto = 6;
> + } else {
> + VLOG_WARN("Ignoring unsupported ingress scheduling field
> '%s'",
> \
> + key);
> + }
> + if (mallocd_str) {
> + VLOG_ERR ("%s", mallocd_str);
> + free(mallocd_str);
> + mallocd_str = NULL;
> + }
> + }
> + free (str);
> +
> + /* Set the filters */
> + int diag = 0;
> + if (eth_type && ip_src && ip_dst && port_src && port_dst &&
> ip_proto) {
> + struct rte_eth_fdir_filter entry = { 0 };
> + entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP;
> + entry.input.flow.udp4_flow.ip.src_ip = ip_src;
> + entry.input.flow.udp4_flow.ip.dst_ip = ip_dst;
> + entry.input.flow.udp4_flow.src_port = htons(port_src);
> + entry.input.flow.udp4_flow.dst_port = htons(port_dst);
> + entry.action.rx_queue = priority_q_id;
> + entry.action.behavior = RTE_ETH_FDIR_ACCEPT;
> + entry.action.report_status = RTE_ETH_FDIR_REPORT_ID;
> + diag = rte_eth_dev_filter_ctrl(dev->port_id,
> + RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_ADD, &entry);
> + }
> + else if (eth_type && !ip_src && !ip_dst && !port_src
> + && !port_dst && !ip_proto) {
> + struct rte_eth_ethertype_filter entry = {0};
> + memset (&entry, 0, sizeof entry);
> + entry.ether_type = eth_type;
> + entry.flags = 0;
> + entry.queue = priority_q_id;
> + diag = rte_eth_dev_filter_ctrl(dev->port_id,
> + RTE_ETH_FILTER_ETHERTYPE, RTE_ETH_FILTER_ADD, &entry);
> + }
> + else {
> + VLOG_ERR("Unsupported ingress scheduling match-field
> combination.");
>
> The above ‘if’ and ‘else if’ cases seem quite limiting; can we have just
> ‘port_dst’ or just ‘ip_src’ for examples ?
[[BO'M]] Hopefully yes all these kinds of permutations and combinations would
be supported. It depends ultimately on what the underlying functionality (in
this case the NIC plus dpdk drivers) supports.
>
>
> + return;
> + }
> +
> + if (diag) {
> + VLOG_ERR("Failed to add ingress scheduling filter.");
>
> I am sure you thought about this already, but this may be hard for the user to
> know what might be allowed; it would a be trial and error exercise.
>
> Would an interface ‘capability’ help here ?
[[BO'M]] My initial reaction is that I'm not sure it would.
If an attempt is made to prioritize certain traffic and it fails then nothing
is lost. Everything should work the same as if no prioritization specified.
Maybe there could be a case where the 'user' want to prioritize traffic of
type=x or y or z (say three different eth_types) but failing that will
prioritize traffic just of type x. But the code to use to discover the
capability (or lack of it in this case) would be more complicated than just
trying the ideal case and then falling back to the simpler case.
Would the user of the capability discovery api be code (ie a higher up layer of
logic) or a human on the cli. (or a program using the cli).
Though there is no checking parse of the config in
netdev_dpdk_set_ingress_sched() which could be used to detect some unsupported
configurations before the reconfig is applied (by which time it's too late to
pass ENOTSUPP) back to the netdev client. I'll see if I can do something about
that in a later version.
>
>
> + }
> + else {
> + /* Mark the appropriate q as prioritized */
> + dev->up.priority_rxq = priority_q_id;
> + }
> +}
> +
> static int
> dpdk_eth_dev_init(struct netdev_dpdk *dev)
> OVS_REQUIRES(dev->mutex)
> @@ -774,6 +889,8 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
> return -diag;
> }
>
> + dpdk_apply_ingress_scheduling(dev, n_rxq);
> +
> diag = rte_eth_dev_start(dev->port_id);
> if (diag) {
> VLOG_ERR("Interface %s start error: %s", dev->up.name,
> --
> 2.7.4
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://urldefense.proofpoint.com/v2/url?u=https-
> 3A__mail.openvswitch.org_mailman_listinfo_ovs-
> 2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-
> uZnsw&m=mD-1gqbGI1_5EN7JASS5cT8B1gcLrBXG-
> Bb1yRgrihU&s=mCo9sG_4f7mU8sW-qBwmEEs-fRC77rOa_SO5G0wtOhc&e=
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev