This patch doesn't seem to be in Patchwork.

> -----Original Message-----
> From: [email protected] [mailto:ovs-dev-
> [email protected]] On Behalf Of [email protected]
> Sent: Thursday, May 25, 2017 12:01 PM
> To: [email protected]
> Subject: [ovs-dev] [PATCH RFC] Conntrack: Avoid recirculation for
> established connections.
> 
> From: Antonio Fischetti <[email protected]>
> 
> With conntrack enabled, packets get recirculated and this impacts
> the performance with thousands of active concurrent connections.
> 
> This patch is aimed at avoiding recirculation for packets belonging to
> established connections in steady state. This is achieved by
> manipulating the megaflows and actions. In this case the actions of the
> megaflow of recirculated packet is initially updated with new 'CT_SKIP'
> action and later updated with the actions of the megaflow of the
> original packet(to handle zones). There after the EMC entry
> belonging to the original packet is updated to point to the 'megaflow' of
> the recirculated packet there by avoiding recirculation.
> 
> Signed-off-by: Antonio Fischetti <[email protected]>
> Signed-off-by: Bhanuprakash Bodireddy <[email protected]>
> Co-authored-by: Bhanuprakash Bodireddy <[email protected]>
> ---
> - ~10% performance improvement is observed in our testing with UDP
> streams.
> - Limited testing is done with RFC patch and the tests include hundreds of
>   concurrent active TCP connections.
> - This is very early implementation and we are posting here to get initial
>   feedback.
> - This RFC patch is implemented leveraging EMC, but optimization could be
>   extended to dpcls as well to handle higher flows.
> - No VXLAN testing is done with this patch.
> 
>  datapath/linux/compat/include/linux/openvswitch.h |   1 +
>  lib/dpif-netdev.c                                 | 148
> ++++++++++++++++++++--
>  lib/packets.h                                     |   2 +
>  3 files changed, 143 insertions(+), 8 deletions(-)
> 
> diff --git a/datapath/linux/compat/include/linux/openvswitch.h
> b/datapath/linux/compat/include/linux/openvswitch.h
> index d22102e..6dc5674 100644
> --- a/datapath/linux/compat/include/linux/openvswitch.h
> +++ b/datapath/linux/compat/include/linux/openvswitch.h
> @@ -762,6 +762,7 @@ enum ovs_ct_attr {
>       OVS_CT_ATTR_NAT,        /* Nested OVS_NAT_ATTR_* */
>       OVS_CT_ATTR_FORCE_COMMIT,  /* No argument */
>       OVS_CT_ATTR_EVENTMASK,  /* u32 mask of IPCT_* events. */
> +     OVS_CT_ATTR_SKIP,       /* No argument, does not invoke CT. */
>       __OVS_CT_ATTR_MAX
>  };
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index b50164b..fbbb42e 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -2101,7 +2101,8 @@ emc_probabilistic_insert(struct dp_netdev_pmd_thread
> *pmd,
>  }
> 
>  static inline struct dp_netdev_flow *
> -emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key)
> +emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key,
> +        void **pfound_entry)
>  {
>      struct emc_entry *current_entry;
> 
> @@ -2110,11 +2111,13 @@ emc_lookup(struct emc_cache *cache, const struct
> netdev_flow_key *key)
>              && emc_entry_alive(current_entry)
>              && netdev_flow_key_equal_mf(&current_entry->key, &key->mf)) {
> 
> +            *pfound_entry = current_entry;
>              /* We found the entry with the 'key->mf' miniflow */
>              return current_entry->flow;
>          }
>      }
> 
> +    *pfound_entry = NULL;
>      return NULL;
>  }
> 
> @@ -4583,10 +4586,12 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
>          key->hash = dpif_netdev_packet_get_rss_hash(packet, &key->mf);
> 
>          /* If EMC is disabled skip emc_lookup */
> -        flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key);
> +        flow = (cur_min == 0) ? NULL:
> +                emc_lookup(flow_cache, key, &packet->md.prev_emc_entry);
>          if (OVS_LIKELY(flow)) {
>              dp_netdev_queue_batches(packet, flow, &key->mf, batches,
>                                      n_batches);
> +            packet->md.prev_flow = flow;
>          } else {
>              /* Exact match cache missed. Group missed packets together at
>               * the beginning of the 'packets' array. */
> @@ -4595,6 +4600,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
>               * must be returned to the caller. The next key should be
> extracted
>               * to 'keys[n_missed + 1]'. */
>              key = &keys[++n_missed];
> +            packet->md.prev_flow = NULL;
>          }
>      }
> 
> @@ -4604,6 +4610,9 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
>      return dp_packet_batch_size(packets_);
>  }
> 
> +#define CT_PREPEND_ACTION_LEN 0x0C
> +#define CT_PREPEND_ACTION_SPARE_LEN 32
> +#define CT_TOT_ACTIONS_LEN (CT_PREPEND_ACTION_LEN +
> CT_PREPEND_ACTION_SPARE_LEN)
>  static inline void
>  handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
>                       struct dp_packet *packet,
> @@ -4663,12 +4672,44 @@ handle_packet_upcall(struct dp_netdev_pmd_thread
> *pmd,
>          ovs_mutex_lock(&pmd->flow_mutex);
>          netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL);
>          if (OVS_LIKELY(!netdev_flow)) {
> -            netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid,
> -                                             add_actions->data,
> -                                             add_actions->size);
> +            /* Recirculated packets that belong to established
> connections
> +             * are treated differently.  A place-holder is prepended to
> the
> +             * list of actions. */
> +            if (!(packet->md.ct_state & CS_INVALID) &&
> +                 (packet->md.ct_state & CS_TRACKED) &&
> +                 (packet->md.ct_state & CS_ESTABLISHED) &&
> +                 (packet->md.recirc_id)) {
> +
> +                /* Prepend an OVS_CT_ATTR_SKIP to the list of actions
> inside
> +                 * add_actions.  It does not really invoke the ConnTrack
> module,
> +                 * it's a Ct action that works as a place-holder.  It
> will be
> +                 * overwritten inside emc_patch_established_conn() with
> +                 * the proper ct(zone=X) action. */
> +                struct dp_netdev_actions *new_actions;
> +                uint8_t tmp_action[CT_TOT_ACTIONS_LEN] = {
> +                        0x0C, 0x00, 0x0C, 0x00,
> +                        0x06, 0x00, 0x09, 0x00,
> +                        0x00, 0x00, 0x00, 0x00,
> +                        /* will append here add_actions. */
> +                };
> +                memcpy(&tmp_action[CT_PREPEND_ACTION_LEN],
> +                        add_actions->data,
> +                        add_actions->size);
> +                new_actions = dp_netdev_actions_create(
> +                        (const struct nlattr *)tmp_action,
> +                        CT_PREPEND_ACTION_LEN + add_actions->size);
> +                netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid,
> +                                                 new_actions->actions,
> +                                                 new_actions->size);
> +            } else {
> +                netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid,
> +                                                 add_actions->data,
> +                                                 add_actions->size);
> +            }
>          }
>          ovs_mutex_unlock(&pmd->flow_mutex);
> -        emc_probabilistic_insert(pmd, key, netdev_flow);
> +        /* For this RFC, probabilistic emc insertion is disabled. */
> +        emc_insert(&pmd->flow_cache, key, netdev_flow);
>      }
>  }
> 
> @@ -4761,7 +4802,8 @@ fast_path_processing(struct dp_netdev_pmd_thread
> *pmd,
> 
>          flow = dp_netdev_flow_cast(rules[i]);
> 
> -        emc_probabilistic_insert(pmd, &keys[i], flow);
> +        /* For testing purposes the emc_insert is restored here. */
> +        emc_insert(&pmd->flow_cache, &keys[i], flow);
>          dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches,
> n_batches);
>      }
> 
> @@ -4998,6 +5040,78 @@ dp_execute_userspace_action(struct
> dp_netdev_pmd_thread *pmd,
>      }
>  }
> 
> +/* Search into EMC to retrieve the entry related to the original packet
> + * and the entry related to the recirculated packet.
> + * If both EMC entries are alive, then:
> + *  - The flow actions of the recirculated packet are updated with
> + *    'ct(zone=N)' as retrieved from the actions of the original flow.
> + *  - The EMC orig entry flow is updated with the flow pointer of recirc
> entry.
> + */
> +static inline void
> +emc_patch_established_conn(struct dp_netdev_pmd_thread *pmd,
> +        struct dp_packet *packet, long long now OVS_UNUSED)
> +{
> +    struct emc_cache *cache = &pmd->flow_cache;
> +    struct netdev_flow_key key;
> +    struct emc_entry *orig_entry; /* EMC entry hit by the original
> packet. */
> +    struct emc_entry *recirc_entry; /* EMC entry for recirculated packet.
> */
> +    uint32_t old_hash;
> +
> +    if (!packet->md.prev_emc_entry) {
> +        return;
> +    }
> +
> +    orig_entry = packet->md.prev_emc_entry;
> +    if (!emc_entry_alive(orig_entry)) {
> +        return;
> +    }
> +
> +    /* Check that the original EMC entry was not overwritten. */
> +    struct dp_netdev_flow *orig_flow = orig_entry->flow;
> +    if (packet->md.prev_flow && (packet->md.prev_flow != orig_flow)) {
> +       return;
> +    }
> +
> +    /* TODO as we are calling miniflow_extract now, can we avoid to
> invoke
> +     * it again when we'll classify this recirculated packet? */
> +    miniflow_extract(packet, &key.mf);
> +    key.len = 0; /* Not computed yet. */
> +    old_hash = dp_packet_get_rss_hash(packet);
> +    key.hash = dpif_netdev_packet_get_rss_hash(packet, NULL);
> +    dp_packet_set_rss_hash(packet, old_hash);
> +
> +    EMC_FOR_EACH_POS_WITH_HASH(cache, recirc_entry, key.hash) {
> +        if (recirc_entry->key.hash == key.hash
> +            && emc_entry_alive(recirc_entry)
> +            && netdev_flow_key_equal_mf(&recirc_entry->key, &key.mf)) {
> +
> +            if (orig_entry->flow == recirc_entry->flow) {
> +                /* Nothing to do, already patched by a packet of this
> +                 * same batch. */
> +                return;
> +            }
> +            /* We found the entry related to the recirculated packet.
> +             * Retrieve the actions for orig and recirc entries. * */
> +            struct dp_netdev_actions * orig_actions =
> +                    dp_netdev_flow_get_actions(orig_entry->flow);
> +            struct dp_netdev_actions * recirc_actions =
> +                    dp_netdev_flow_get_actions(recirc_entry->flow);
> +
> +            /* The orig entry action contains a CT action with the zone
> info. */
> +            struct nlattr *p = &orig_actions->actions[0];
> +
> +            /* Overwrite the CT Skip action of recirc entry with
> ct(zone=N). */
> +            memcpy(recirc_actions->actions, p, p->nla_len);
> +
> +            /* Update orig EMC entry. */
> +            orig_entry->flow = recirc_entry->flow;
> +            dp_netdev_flow_ref(orig_entry->flow);
> +
> +            return;
> +        }
> +    }
> +}
> +
>  static void
>  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>                const struct nlattr *a, bool may_steal)
> @@ -5024,7 +5138,6 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
> *packets_,
>              } else {
>                  tx_qid = pmd->static_tx_qid;
>              }
> -
>              netdev_send(p->port->netdev, tx_qid, packets_, may_steal,
>                          dynamic_txqs);
>              return;
> @@ -5145,10 +5258,21 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
> *packets_,
>              struct dp_packet *packet;
>              DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
>                  packet->md.recirc_id = nl_attr_get_u32(a);
> +
> +                if (!(packet->md.ct_state & CS_INVALID) &&
> +                     (packet->md.ct_state & CS_TRACKED) &&
> +                     (packet->md.ct_state & CS_ESTABLISHED)) {
> +                    (*depth)++;
> +                    emc_patch_established_conn(pmd, packet, now);
> +                    (*depth)--;
> +                }
>              }
> 
>              (*depth)++;
>              dp_netdev_recirculate(pmd, packets_);
> +            DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
> +                packet->md.recirc_id = 0;
> +            }
>              (*depth)--;
> 
>              return;
> @@ -5166,6 +5290,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
> *packets_,
>          const char *helper = NULL;
>          const uint32_t *setmark = NULL;
>          const struct ovs_key_ct_labels *setlabel = NULL;
> +        bool skip_ct = false;
> 
>          NL_ATTR_FOR_EACH_UNSAFE (b, left, nl_attr_get(a),
>                                   nl_attr_get_size(a)) {
> @@ -5194,6 +5319,9 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
> *packets_,
>                  /* Silently ignored, as userspace datapath does not
> generate
>                   * netlink events. */
>                  break;
> +            case OVS_CT_ATTR_SKIP:
> +                skip_ct = true;
> +                break;
>              case OVS_CT_ATTR_NAT:
>              case OVS_CT_ATTR_UNSPEC:
>              case __OVS_CT_ATTR_MAX:
> @@ -5201,6 +5329,10 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
> *packets_,
>              }
>          }
> 
> +        if (OVS_UNLIKELY(skip_ct)) {
> +            break;
> +        }
> +
>          conntrack_execute(&dp->conntrack, packets_, aux->flow->dl_type,
> force,
>                            commit, zone, setmark, setlabel, helper);
>          break;
> diff --git a/lib/packets.h b/lib/packets.h
> index 7dbf6dd..2c46a15 100644
> --- a/lib/packets.h
> +++ b/lib/packets.h
> @@ -112,6 +112,8 @@ struct pkt_metadata {
>      struct flow_tnl tunnel;     /* Encapsulating tunnel parameters. Note
> that
>                                   * if 'ip_dst' == 0, the rest of the
> fields may
>                                   * be uninitialized. */
> +    void *prev_emc_entry;       /* EMC entry that this packet matched. */
> +    void *prev_flow;            /* Flow pointed by the matching EMC
> entry. */
>  };
> 
>  static inline void
> --
> 2.4.11
> 
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to