Hi Harsha,

On 4/20/2021 11:07 AM, Sriharsha Basavapatna wrote:
Sometimes a port might be configured with a single flow that just
forwards packets to another port. This would be useful in configs
where the bridge is just fowarding packets between two ports (for
example, between a vhost-user port and a physical port). A flow
that matches only on the in_port and with an action that forwards
to another port would be configured, to avoid learning or matching
on packet headers.

Example:
$ ovs-ofctl add-flow br0 in_port=1,actions=output:2
$ ovs-ofctl add-flow br0 in_port=2,actions=output:1

This translates to a datapath flow with the match fields wildcarded
for the packet headers. However, the datapath processing still involves

There are still several matches (not wildcards):

  - recirc_id
  - in_port
  - packet_type
  - dl_type
  - vlan_tci
  - nw_frag (for ip packets)

So there might be multiple flows for each such openflow rule.

In the past, I have tried to optimize such scenario, see:

https://mail.openvswitch.org/pipermail/ovs-dev/2019-April/357882.html

That was wrong as commented afterwards.

Another related patch-set was this (also not accepted):

https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/363948.html

Ilya wrote an alternative patch:

https://patchwork.ozlabs.org/patch/1105880/

AFAIR, it didn't improve performance either.

Besides, I've tried this patch. Maybe I did something wrong (I configured port-forward=true on those ports and those openflow rules, and pinged between those ports). I didn't see it worked (the coverage, and also I added my own prints).

With this proposed patch, what will be the behavior in case there are multiple DP flows for that single openflow rule?

Thanks,
Eli

flow cache (EMC/SMC) lookup and with a large number of flows it also
results in dpcls lookup due to cache miss. Avoiding cache lookup in
such configurations results in better performance (pps and cpp).

This patch provides a new interface config parameter - "port-forward",
to avoid datapath cache lookup. When this is enabled, the datapath flow
is saved in the port when there is a cache hit for the initial packet.
For subsequent packets, the flow is readily found in the port structure,
thus avoiding cache and dpcls lookup.

Example:
$ ovs-vsctl add-port br0 dpdk0 \
-- set Interface dpdk0 other_config:port-forward=true

A coverage counter has also been added to track packets processed in
port-forward mode.

$ ovs-appctl coverage/show   | grep datapath_port_forward_packet

Signed-off-by: Sriharsha Basavapatna <[email protected]>
---
  lib/dpif-netdev.c    | 79 ++++++++++++++++++++++++++++++++++----------
  vswitchd/vswitch.xml | 11 ++++++
  2 files changed, 72 insertions(+), 18 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 251788b04..133ed7c1e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -114,6 +114,7 @@ COVERAGE_DEFINE(datapath_drop_invalid_port);
  COVERAGE_DEFINE(datapath_drop_invalid_bond);
  COVERAGE_DEFINE(datapath_drop_invalid_tnl_port);
  COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
+COVERAGE_DEFINE(datapath_port_forward_packet);
/* Protects against changes to 'dp_netdevs'. */
  static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
@@ -483,6 +484,7 @@ struct dp_netdev_port {
      unsigned *txq_used;         /* Number of threads that use each tx queue. 
*/
      struct ovs_mutex txq_used_mutex;
      bool emc_enabled;           /* If true EMC will be used. */
+    bool port_forward;
      char *type;                 /* Port type as requested by user. */
      char *rxq_affinity_list;    /* Requested affinity of rx queues. */
  };
@@ -557,6 +559,7 @@ struct dp_netdev_flow {
bool dead;
      uint32_t mark;               /* Unique flow mark assigned to a flow */
+    void *port_forward_txp;
/* Statistics. */
      struct dp_netdev_flow_stats stats;
@@ -610,6 +613,7 @@ struct polled_queue {
      struct dp_netdev_rxq *rxq;
      odp_port_t port_no;
      bool emc_enabled;
+    bool port_forward;
      bool rxq_enabled;
      uint64_t change_seq;
  };
@@ -628,6 +632,7 @@ struct tx_port {
      long long last_used;
      struct hmap_node node;
      long long flush_time;
+    struct dp_netdev_flow *port_forward_flow;
      struct dp_packet_batch output_pkts;
      struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
  };
@@ -840,7 +845,8 @@ static void dp_netdev_execute_actions(struct 
dp_netdev_pmd_thread *pmd,
                                        const struct nlattr *actions,
                                        size_t actions_len);
  static void dp_netdev_input(struct dp_netdev_pmd_thread *,
-                            struct dp_packet_batch *, odp_port_t port_no);
+                            struct dp_packet_batch *, odp_port_t port_no,
+                            bool port_forward);
  static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
                                    struct dp_packet_batch *);
@@ -2083,6 +2089,7 @@ port_create(const char *devname, const char *type,
      port->type = xstrdup(type);
      port->sf = NULL;
      port->emc_enabled = true;
+    port->port_forward = false;
      port->need_reconfigure = true;
      ovs_mutex_init(&port->txq_used_mutex);
@@ -2845,6 +2852,15 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
      if (flow->mark != INVALID_FLOW_MARK) {
          queue_netdev_flow_del(pmd, flow);
      }
+    if (flow->port_forward_txp) {
+        struct tx_port *p = flow->port_forward_txp;
+        if (p->port_forward_flow == flow) {
+            VLOG_DBG("Deleting port_forward_flow: port: %p flow: %p",
+                      p, flow);
+            p->port_forward_flow = NULL;
+            flow->port_forward_txp = NULL;
+        }
+    }
      flow->dead = true;
dp_netdev_flow_unref(flow);
@@ -3664,6 +3680,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
      flow->dead = false;
      flow->batch = NULL;
      flow->mark = INVALID_FLOW_MARK;
+    flow->port_forward_txp = NULL;
      *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
      *CONST_CAST(struct flow *, &flow->flow) = match->flow;
      *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
@@ -4498,6 +4515,7 @@ dpif_netdev_port_set_config(struct dpif *dpif, odp_port_t 
port_no,
      int error = 0;
      const char *affinity_list = smap_get(cfg, "pmd-rxq-affinity");
      bool emc_enabled = smap_get_bool(cfg, "emc-enable", true);
+    bool port_forward = smap_get_bool(cfg, "port-forward", false);
ovs_mutex_lock(&dp->port_mutex);
      error = get_port_by_number(dp, port_no, &port);
@@ -4505,6 +4523,7 @@ dpif_netdev_port_set_config(struct dpif *dpif, odp_port_t 
port_no,
          goto unlock;
      }
+ port->port_forward = port_forward;
      if (emc_enabled != port->emc_enabled) {
          struct dp_netdev_pmd_thread *pmd;
          struct ds ds = DS_EMPTY_INITIALIZER;
@@ -4725,7 +4744,7 @@ dp_netdev_pmd_flush_output_packets(struct 
dp_netdev_pmd_thread *pmd,
  static int
  dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
                             struct dp_netdev_rxq *rxq,
-                           odp_port_t port_no)
+                           odp_port_t port_no, bool port_forward)
  {
      struct pmd_perf_stats *s = &pmd->perf_stats;
      struct dp_packet_batch batch;
@@ -4765,7 +4784,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread 
*pmd,
              }
          }
          /* Process packet batch. */
-        dp_netdev_input(pmd, &batch, port_no);
+        dp_netdev_input(pmd, &batch, port_no, port_forward);
/* Assign processing cycles to rx queue. */
          cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
@@ -5791,7 +5810,8 @@ dpif_netdev_run(struct dpif *dpif)
if (dp_netdev_process_rxq_port(non_pmd,
                                                     &port->rxqs[i],
-                                                   port->port_no)) {
+                                                   port->port_no,
+                                                   port->port_forward)) {
                          need_to_flush = false;
                      }
                  }
@@ -5962,6 +5982,7 @@ pmd_load_queues_and_ports(struct dp_netdev_pmd_thread 
*pmd,
          poll_list[i].rxq = poll->rxq;
          poll_list[i].port_no = poll->rxq->port->port_no;
          poll_list[i].emc_enabled = poll->rxq->port->emc_enabled;
+        poll_list[i].port_forward = poll->rxq->port->port_forward;
          poll_list[i].rxq_enabled = netdev_rxq_enabled(poll->rxq->rx);
          poll_list[i].change_seq =
                       netdev_get_change_seq(poll->rxq->port->netdev);
@@ -6056,7 +6077,8 @@ reload:
process_packets =
                  dp_netdev_process_rxq_port(pmd, poll_list[i].rxq,
-                                           poll_list[i].port_no);
+                                           poll_list[i].port_no,
+                                           poll_list[i].port_forward);
              rx_packets += process_packets;
          }
@@ -7064,6 +7086,13 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, n_smc_hit);
  }
+static struct tx_port *
+pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
+                           odp_port_t port_no)
+{
+    return tx_port_lookup(&pmd->send_port_cache, port_no);
+}
+
  /* Try to process all ('cnt') the 'packets' using only the datapath flow cache
   * 'pmd->flow_cache'. If a flow is not found for a packet 'packets[i]', the
   * miniflow is copied into 'keys' and the packet pointer is moved at the
@@ -7087,7 +7116,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
                 struct packet_batch_per_flow batches[], size_t *n_batches,
                 struct dp_packet_flow_map *flow_map,
                 size_t *n_flows, uint8_t *index_map,
-               bool md_is_valid, odp_port_t port_no)
+               bool md_is_valid, odp_port_t port_no, bool port_forward)
  {
      struct netdev_flow_key *key = &keys[0];
      size_t n_missed = 0, n_emc_hit = 0;
@@ -7127,6 +7156,19 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
              pkt_metadata_init(&packet->md, port_no);
          }
+ if (OVS_UNLIKELY(port_forward)) {
+            struct tx_port *p;
+
+            tcp_flags = parse_tcp_flags(packet);
+            p = pmd_send_port_cache_lookup(pmd, port_no);
+            if (p->port_forward_flow) {
+                dp_netdev_queue_batches(packet, p->port_forward_flow,
+                                        tcp_flags, batches, n_batches);
+                COVERAGE_INC(datapath_port_forward_packet);
+                continue;
+            }
+        }
+
          if ((*recirc_depth_get() == 0) &&
              dp_packet_has_flow_mark(packet, &mark)) {
              flow = mark_to_flow_find(pmd, mark);
@@ -7170,6 +7212,13 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd,
                  packet_enqueue_to_flow_map(packet, flow, tcp_flags,
                                             flow_map, map_cnt++);
              }
+            if (OVS_UNLIKELY(port_forward)) {
+                struct tx_port *p = pmd_send_port_cache_lookup(pmd, port_no);
+                VLOG_DBG("Setting port_forward_flow: port: %p flow: %p",
+                         p, flow);
+                p->port_forward_flow = flow;
+                flow->port_forward_txp = p;
+            }
          } else {
              /* Exact match cache missed. Group missed packets together at
               * the beginning of the 'packets' array. */
@@ -7410,7 +7459,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
  static void
  dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
                    struct dp_packet_batch *packets,
-                  bool md_is_valid, odp_port_t port_no)
+                  bool md_is_valid, odp_port_t port_no, bool port_forward)
  {
  #if !defined(__CHECKER__) && !defined(_WIN32)
      const size_t PKT_ARRAY_SIZE = dp_packet_batch_size(packets);
@@ -7431,7 +7480,8 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
n_batches = 0;
      dfc_processing(pmd, packets, keys, missed_keys, batches, &n_batches,
-                   flow_map, &n_flows, index_map, md_is_valid, port_no);
+                   flow_map, &n_flows, index_map, md_is_valid, port_no,
+                   port_forward);
if (!dp_packet_batch_is_empty(packets)) {
          /* Get ingress port from first packet's metadata. */
@@ -7472,16 +7522,16 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
  static void
  dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
                  struct dp_packet_batch *packets,
-                odp_port_t port_no)
+                odp_port_t port_no, bool port_forward)
  {
-    dp_netdev_input__(pmd, packets, false, port_no);
+    dp_netdev_input__(pmd, packets, false, port_no, port_forward);
  }
static void
  dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
                        struct dp_packet_batch *packets)
  {
-    dp_netdev_input__(pmd, packets, true, 0);
+    dp_netdev_input__(pmd, packets, true, 0, false);
  }
struct dp_netdev_execute_aux {
@@ -7581,13 +7631,6 @@ pmd_tnl_port_cache_lookup(const struct 
dp_netdev_pmd_thread *pmd,
      return tx_port_lookup(&pmd->tnl_port_cache, port_no);
  }
-static struct tx_port *
-pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
-                           odp_port_t port_no)
-{
-    return tx_port_lookup(&pmd->send_port_cache, port_no);
-}
-
  static int
  push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
                  const struct nlattr *attr,
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 4597a215d..11ce80aff 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -3369,6 +3369,17 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch 
options:peer=p1 \
            Defaults to true.
          </p>
        </column>
+
+      <column name="other_config" key="port-forward"
+              type='{"type": "boolean"}'>
+        <p>
+          Specifies if port forwarding should be used while processing
+          packets received from this interface.
+        </p>
+        <p>
+          Defaults to false.
+        </p>
+      </column>
      </group>
<group title="MTU">
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to