flow cache (EMC/SMC) lookup and with a large number of flows it also
results in dpcls lookup due to cache miss. Avoiding cache lookup in
such configurations results in better performance (pps and cpp).
This patch provides a new interface config parameter - "port-forward",
to avoid datapath cache lookup. When this is enabled, the datapath flow
is saved in the port when there is a cache hit for the initial packet.
For subsequent packets, the flow is readily found in the port structure,
thus avoiding cache and dpcls lookup.
Example:
$ ovs-vsctl add-port br0 dpdk0 \
-- set Interface dpdk0 other_config:port-forward=true
A coverage counter has also been added to track packets processed in
port-forward mode.
$ ovs-appctl coverage/show | grep datapath_port_forward_packet
Signed-off-by: Sriharsha Basavapatna <[email protected]>
---
lib/dpif-netdev.c | 79 ++++++++++++++++++++++++++++++++++----------
vswitchd/vswitch.xml | 11 ++++++
2 files changed, 72 insertions(+), 18 deletions(-)
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 251788b04..133ed7c1e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -114,6 +114,7 @@ COVERAGE_DEFINE(datapath_drop_invalid_port);
COVERAGE_DEFINE(datapath_drop_invalid_bond);
COVERAGE_DEFINE(datapath_drop_invalid_tnl_port);
COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
+COVERAGE_DEFINE(datapath_port_forward_packet);
/* Protects against changes to 'dp_netdevs'. */
static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
@@ -483,6 +484,7 @@ struct dp_netdev_port {
unsigned *txq_used; /* Number of threads that use each tx queue.
*/
struct ovs_mutex txq_used_mutex;
bool emc_enabled; /* If true EMC will be used. */
+ bool port_forward;
char *type; /* Port type as requested by user. */
char *rxq_affinity_list; /* Requested affinity of rx queues. */
};
@@ -557,6 +559,7 @@ struct dp_netdev_flow {
bool dead;
uint32_t mark; /* Unique flow mark assigned to a flow */
+ void *port_forward_txp;
/* Statistics. */
struct dp_netdev_flow_stats stats;
@@ -610,6 +613,7 @@ struct polled_queue {
struct dp_netdev_rxq *rxq;
odp_port_t port_no;
bool emc_enabled;
+ bool port_forward;
bool rxq_enabled;
uint64_t change_seq;
};
@@ -628,6 +632,7 @@ struct tx_port {
long long last_used;
struct hmap_node node;
long long flush_time;
+ struct dp_netdev_flow *port_forward_flow;
struct dp_packet_batch output_pkts;
struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
};
@@ -840,7 +845,8 @@ static void dp_netdev_execute_actions(struct
dp_netdev_pmd_thread *pmd,
const struct nlattr *actions,
size_t actions_len);
static void dp_netdev_input(struct dp_netdev_pmd_thread *,
- struct dp_packet_batch *, odp_port_t port_no);
+ struct dp_packet_batch *, odp_port_t port_no,
+ bool port_forward);
static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
struct dp_packet_batch *);
@@ -2083,6 +2089,7 @@ port_create(const char *devname, const char *type,
port->type = xstrdup(type);
port->sf = NULL;
port->emc_enabled = true;
+ port->port_forward = false;
port->need_reconfigure = true;
ovs_mutex_init(&port->txq_used_mutex);
@@ -2845,6 +2852,15 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
if (flow->mark != INVALID_FLOW_MARK) {
queue_netdev_flow_del(pmd, flow);
}
+ if (flow->port_forward_txp) {
+ struct tx_port *p = flow->port_forward_txp;
+ if (p->port_forward_flow == flow) {
+ VLOG_DBG("Deleting port_forward_flow: port: %p flow: %p",
+ p, flow);
+ p->port_forward_flow = NULL;
+ flow->port_forward_txp = NULL;
+ }
+ }
flow->dead = true;
dp_netdev_flow_unref(flow);
@@ -3664,6 +3680,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
flow->dead = false;
flow->batch = NULL;
flow->mark = INVALID_FLOW_MARK;
+ flow->port_forward_txp = NULL;
*CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
*CONST_CAST(struct flow *, &flow->flow) = match->flow;
*CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
@@ -4498,6 +4515,7 @@ dpif_netdev_port_set_config(struct dpif *dpif, odp_port_t
port_no,
int error = 0;
const char *affinity_list = smap_get(cfg, "pmd-rxq-affinity");
bool emc_enabled = smap_get_bool(cfg, "emc-enable", true);
+ bool port_forward = smap_get_bool(cfg, "port-forward", false);
ovs_mutex_lock(&dp->port_mutex);
error = get_port_by_number(dp, port_no, &port);
@@ -4505,6 +4523,7 @@ dpif_netdev_port_set_config(struct dpif *dpif, odp_port_t
port_no,
goto unlock;
}
+ port->port_forward = port_forward;
if (emc_enabled != port->emc_enabled) {
struct dp_netdev_pmd_thread *pmd;
struct ds ds = DS_EMPTY_INITIALIZER;
@@ -4725,7 +4744,7 @@ dp_netdev_pmd_flush_output_packets(struct
dp_netdev_pmd_thread *pmd,
static int
dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
struct dp_netdev_rxq *rxq,
- odp_port_t port_no)
+ odp_port_t port_no, bool port_forward)
{
struct pmd_perf_stats *s = &pmd->perf_stats;
struct dp_packet_batch batch;
@@ -4765,7 +4784,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread
*pmd,
}
}
/* Process packet batch. */
- dp_netdev_input(pmd, &batch, port_no);
+ dp_netdev_input(pmd, &batch, port_no, port_forward);
/* Assign processing cycles to rx queue. */
cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
@@ -5791,7 +5810,8 @@ dpif_netdev_run(struct dpif *dpif)
if (dp_netdev_process_rxq_port(non_pmd,
&port->rxqs[i],
- port->port_no)) {
+ port->port_no,
+ port->port_forward)) {
need_to_flush = false;
}
}
@@ -5962,6 +5982,7 @@ pmd_load_queues_and_ports(struct dp_netdev_pmd_thread
*pmd,
poll_list[i].rxq = poll->rxq;
poll_list[i].port_no = poll->rxq->port->port_no;
poll_list[i].emc_enabled = poll->rxq->port->emc_enabled;
+ poll_list[i].port_forward = poll->rxq->port->port_forward;
poll_list[i].rxq_enabled = netdev_rxq_enabled(poll->rxq->rx);
poll_list[i].change_seq =
netdev_get_change_seq(poll->rxq->port->netdev);
@@ -6056,7 +6077,8 @@ reload:
process_packets =
dp_netdev_process_rxq_port(pmd, poll_list[i].rxq,
- poll_list[i].port_no);
+ poll_list[i].port_no,
+ poll_list[i].port_forward);
rx_packets += process_packets;
}
@@ -7064,6 +7086,13 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, n_smc_hit);
}
+static struct tx_port *
+pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
+ odp_port_t port_no)
+{
+ return tx_port_lookup(&pmd->send_port_cache, port_no);
+}
+
/* Try to process all ('cnt') the 'packets' using only the datapath flow cache
* 'pmd->flow_cache'. If a flow is not found for a packet 'packets[i]', the
* miniflow is copied into 'keys' and the packet pointer is moved at the
@@ -7087,7 +7116,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
struct packet_batch_per_flow batches[], size_t *n_batches,
struct dp_packet_flow_map *flow_map,
size_t *n_flows, uint8_t *index_map,
- bool md_is_valid, odp_port_t port_no)
+ bool md_is_valid, odp_port_t port_no, bool port_forward)
{
struct netdev_flow_key *key = &keys[0];
size_t n_missed = 0, n_emc_hit = 0;
@@ -7127,6 +7156,19 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
pkt_metadata_init(&packet->md, port_no);
}
+ if (OVS_UNLIKELY(port_forward)) {
+ struct tx_port *p;
+
+ tcp_flags = parse_tcp_flags(packet);
+ p = pmd_send_port_cache_lookup(pmd, port_no);
+ if (p->port_forward_flow) {
+ dp_netdev_queue_batches(packet, p->port_forward_flow,
+ tcp_flags, batches, n_batches);
+ COVERAGE_INC(datapath_port_forward_packet);
+ continue;
+ }
+ }
+
if ((*recirc_depth_get() == 0) &&
dp_packet_has_flow_mark(packet, &mark)) {
flow = mark_to_flow_find(pmd, mark);
@@ -7170,6 +7212,13 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
packet_enqueue_to_flow_map(packet, flow, tcp_flags,
flow_map, map_cnt++);
}
+ if (OVS_UNLIKELY(port_forward)) {
+ struct tx_port *p = pmd_send_port_cache_lookup(pmd, port_no);
+ VLOG_DBG("Setting port_forward_flow: port: %p flow: %p",
+ p, flow);
+ p->port_forward_flow = flow;
+ flow->port_forward_txp = p;
+ }
} else {
/* Exact match cache missed. Group missed packets together at
* the beginning of the 'packets' array. */
@@ -7410,7 +7459,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
static void
dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets,
- bool md_is_valid, odp_port_t port_no)
+ bool md_is_valid, odp_port_t port_no, bool port_forward)
{
#if !defined(__CHECKER__) && !defined(_WIN32)
const size_t PKT_ARRAY_SIZE = dp_packet_batch_size(packets);
@@ -7431,7 +7480,8 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
n_batches = 0;
dfc_processing(pmd, packets, keys, missed_keys, batches, &n_batches,
- flow_map, &n_flows, index_map, md_is_valid, port_no);
+ flow_map, &n_flows, index_map, md_is_valid, port_no,
+ port_forward);
if (!dp_packet_batch_is_empty(packets)) {
/* Get ingress port from first packet's metadata. */
@@ -7472,16 +7522,16 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
static void
dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets,
- odp_port_t port_no)
+ odp_port_t port_no, bool port_forward)
{
- dp_netdev_input__(pmd, packets, false, port_no);
+ dp_netdev_input__(pmd, packets, false, port_no, port_forward);
}
static void
dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets)
{
- dp_netdev_input__(pmd, packets, true, 0);
+ dp_netdev_input__(pmd, packets, true, 0, false);
}
struct dp_netdev_execute_aux {
@@ -7581,13 +7631,6 @@ pmd_tnl_port_cache_lookup(const struct
dp_netdev_pmd_thread *pmd,
return tx_port_lookup(&pmd->tnl_port_cache, port_no);
}
-static struct tx_port *
-pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
- odp_port_t port_no)
-{
- return tx_port_lookup(&pmd->send_port_cache, port_no);
-}
-
static int
push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
const struct nlattr *attr,
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 4597a215d..11ce80aff 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -3369,6 +3369,17 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch
options:peer=p1 \
Defaults to true.
</p>
</column>
+
+ <column name="other_config" key="port-forward"
+ type='{"type": "boolean"}'>
+ <p>
+ Specifies if port forwarding should be used while processing
+ packets received from this interface.
+ </p>
+ <p>
+ Defaults to false.
+ </p>
+ </column>
</group>
<group title="MTU">