This patch doesn't seem to be in Patchwork.
> -----Original Message----- > From: [email protected] [mailto:ovs-dev- > [email protected]] On Behalf Of [email protected] > Sent: Thursday, May 25, 2017 12:01 PM > To: [email protected] > Subject: [ovs-dev] [PATCH RFC] Conntrack: Avoid recirculation for > established connections. > > From: Antonio Fischetti <[email protected]> > > With conntrack enabled, packets get recirculated and this impacts > the performance with thousands of active concurrent connections. > > This patch is aimed at avoiding recirculation for packets belonging to > established connections in steady state. This is achieved by > manipulating the megaflows and actions. In this case the actions of the > megaflow of recirculated packet is initially updated with new 'CT_SKIP' > action and later updated with the actions of the megaflow of the > original packet(to handle zones). There after the EMC entry > belonging to the original packet is updated to point to the 'megaflow' of > the recirculated packet there by avoiding recirculation. > > Signed-off-by: Antonio Fischetti <[email protected]> > Signed-off-by: Bhanuprakash Bodireddy <[email protected]> > Co-authored-by: Bhanuprakash Bodireddy <[email protected]> > --- > - ~10% performance improvement is observed in our testing with UDP > streams. > - Limited testing is done with RFC patch and the tests include hundreds of > concurrent active TCP connections. > - This is very early implementation and we are posting here to get initial > feedback. > - This RFC patch is implemented leveraging EMC, but optimization could be > extended to dpcls as well to handle higher flows. > - No VXLAN testing is done with this patch. > > datapath/linux/compat/include/linux/openvswitch.h | 1 + > lib/dpif-netdev.c | 148 > ++++++++++++++++++++-- > lib/packets.h | 2 + > 3 files changed, 143 insertions(+), 8 deletions(-) > > diff --git a/datapath/linux/compat/include/linux/openvswitch.h > b/datapath/linux/compat/include/linux/openvswitch.h > index d22102e..6dc5674 100644 > --- a/datapath/linux/compat/include/linux/openvswitch.h > +++ b/datapath/linux/compat/include/linux/openvswitch.h > @@ -762,6 +762,7 @@ enum ovs_ct_attr { > OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */ > OVS_CT_ATTR_FORCE_COMMIT, /* No argument */ > OVS_CT_ATTR_EVENTMASK, /* u32 mask of IPCT_* events. */ > + OVS_CT_ATTR_SKIP, /* No argument, does not invoke CT. */ > __OVS_CT_ATTR_MAX > }; > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index b50164b..fbbb42e 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -2101,7 +2101,8 @@ emc_probabilistic_insert(struct dp_netdev_pmd_thread > *pmd, > } > > static inline struct dp_netdev_flow * > -emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key) > +emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key, > + void **pfound_entry) > { > struct emc_entry *current_entry; > > @@ -2110,11 +2111,13 @@ emc_lookup(struct emc_cache *cache, const struct > netdev_flow_key *key) > && emc_entry_alive(current_entry) > && netdev_flow_key_equal_mf(¤t_entry->key, &key->mf)) { > > + *pfound_entry = current_entry; > /* We found the entry with the 'key->mf' miniflow */ > return current_entry->flow; > } > } > > + *pfound_entry = NULL; > return NULL; > } > > @@ -4583,10 +4586,12 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, > key->hash = dpif_netdev_packet_get_rss_hash(packet, &key->mf); > > /* If EMC is disabled skip emc_lookup */ > - flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key); > + flow = (cur_min == 0) ? NULL: > + emc_lookup(flow_cache, key, &packet->md.prev_emc_entry); > if (OVS_LIKELY(flow)) { > dp_netdev_queue_batches(packet, flow, &key->mf, batches, > n_batches); > + packet->md.prev_flow = flow; > } else { > /* Exact match cache missed. Group missed packets together at > * the beginning of the 'packets' array. */ > @@ -4595,6 +4600,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, > * must be returned to the caller. The next key should be > extracted > * to 'keys[n_missed + 1]'. */ > key = &keys[++n_missed]; > + packet->md.prev_flow = NULL; > } > } > > @@ -4604,6 +4610,9 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, > return dp_packet_batch_size(packets_); > } > > +#define CT_PREPEND_ACTION_LEN 0x0C > +#define CT_PREPEND_ACTION_SPARE_LEN 32 > +#define CT_TOT_ACTIONS_LEN (CT_PREPEND_ACTION_LEN + > CT_PREPEND_ACTION_SPARE_LEN) > static inline void > handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, > struct dp_packet *packet, > @@ -4663,12 +4672,44 @@ handle_packet_upcall(struct dp_netdev_pmd_thread > *pmd, > ovs_mutex_lock(&pmd->flow_mutex); > netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL); > if (OVS_LIKELY(!netdev_flow)) { > - netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid, > - add_actions->data, > - add_actions->size); > + /* Recirculated packets that belong to established > connections > + * are treated differently. A place-holder is prepended to > the > + * list of actions. */ > + if (!(packet->md.ct_state & CS_INVALID) && > + (packet->md.ct_state & CS_TRACKED) && > + (packet->md.ct_state & CS_ESTABLISHED) && > + (packet->md.recirc_id)) { > + > + /* Prepend an OVS_CT_ATTR_SKIP to the list of actions > inside > + * add_actions. It does not really invoke the ConnTrack > module, > + * it's a Ct action that works as a place-holder. It > will be > + * overwritten inside emc_patch_established_conn() with > + * the proper ct(zone=X) action. */ > + struct dp_netdev_actions *new_actions; > + uint8_t tmp_action[CT_TOT_ACTIONS_LEN] = { > + 0x0C, 0x00, 0x0C, 0x00, > + 0x06, 0x00, 0x09, 0x00, > + 0x00, 0x00, 0x00, 0x00, > + /* will append here add_actions. */ > + }; > + memcpy(&tmp_action[CT_PREPEND_ACTION_LEN], > + add_actions->data, > + add_actions->size); > + new_actions = dp_netdev_actions_create( > + (const struct nlattr *)tmp_action, > + CT_PREPEND_ACTION_LEN + add_actions->size); > + netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid, > + new_actions->actions, > + new_actions->size); > + } else { > + netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid, > + add_actions->data, > + add_actions->size); > + } > } > ovs_mutex_unlock(&pmd->flow_mutex); > - emc_probabilistic_insert(pmd, key, netdev_flow); > + /* For this RFC, probabilistic emc insertion is disabled. */ > + emc_insert(&pmd->flow_cache, key, netdev_flow); > } > } > > @@ -4761,7 +4802,8 @@ fast_path_processing(struct dp_netdev_pmd_thread > *pmd, > > flow = dp_netdev_flow_cast(rules[i]); > > - emc_probabilistic_insert(pmd, &keys[i], flow); > + /* For testing purposes the emc_insert is restored here. */ > + emc_insert(&pmd->flow_cache, &keys[i], flow); > dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, > n_batches); > } > > @@ -4998,6 +5040,78 @@ dp_execute_userspace_action(struct > dp_netdev_pmd_thread *pmd, > } > } > > +/* Search into EMC to retrieve the entry related to the original packet > + * and the entry related to the recirculated packet. > + * If both EMC entries are alive, then: > + * - The flow actions of the recirculated packet are updated with > + * 'ct(zone=N)' as retrieved from the actions of the original flow. > + * - The EMC orig entry flow is updated with the flow pointer of recirc > entry. > + */ > +static inline void > +emc_patch_established_conn(struct dp_netdev_pmd_thread *pmd, > + struct dp_packet *packet, long long now OVS_UNUSED) > +{ > + struct emc_cache *cache = &pmd->flow_cache; > + struct netdev_flow_key key; > + struct emc_entry *orig_entry; /* EMC entry hit by the original > packet. */ > + struct emc_entry *recirc_entry; /* EMC entry for recirculated packet. > */ > + uint32_t old_hash; > + > + if (!packet->md.prev_emc_entry) { > + return; > + } > + > + orig_entry = packet->md.prev_emc_entry; > + if (!emc_entry_alive(orig_entry)) { > + return; > + } > + > + /* Check that the original EMC entry was not overwritten. */ > + struct dp_netdev_flow *orig_flow = orig_entry->flow; > + if (packet->md.prev_flow && (packet->md.prev_flow != orig_flow)) { > + return; > + } > + > + /* TODO as we are calling miniflow_extract now, can we avoid to > invoke > + * it again when we'll classify this recirculated packet? */ > + miniflow_extract(packet, &key.mf); > + key.len = 0; /* Not computed yet. */ > + old_hash = dp_packet_get_rss_hash(packet); > + key.hash = dpif_netdev_packet_get_rss_hash(packet, NULL); > + dp_packet_set_rss_hash(packet, old_hash); > + > + EMC_FOR_EACH_POS_WITH_HASH(cache, recirc_entry, key.hash) { > + if (recirc_entry->key.hash == key.hash > + && emc_entry_alive(recirc_entry) > + && netdev_flow_key_equal_mf(&recirc_entry->key, &key.mf)) { > + > + if (orig_entry->flow == recirc_entry->flow) { > + /* Nothing to do, already patched by a packet of this > + * same batch. */ > + return; > + } > + /* We found the entry related to the recirculated packet. > + * Retrieve the actions for orig and recirc entries. * */ > + struct dp_netdev_actions * orig_actions = > + dp_netdev_flow_get_actions(orig_entry->flow); > + struct dp_netdev_actions * recirc_actions = > + dp_netdev_flow_get_actions(recirc_entry->flow); > + > + /* The orig entry action contains a CT action with the zone > info. */ > + struct nlattr *p = &orig_actions->actions[0]; > + > + /* Overwrite the CT Skip action of recirc entry with > ct(zone=N). */ > + memcpy(recirc_actions->actions, p, p->nla_len); > + > + /* Update orig EMC entry. */ > + orig_entry->flow = recirc_entry->flow; > + dp_netdev_flow_ref(orig_entry->flow); > + > + return; > + } > + } > +} > + > static void > dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, > const struct nlattr *a, bool may_steal) > @@ -5024,7 +5138,6 @@ dp_execute_cb(void *aux_, struct dp_packet_batch > *packets_, > } else { > tx_qid = pmd->static_tx_qid; > } > - > netdev_send(p->port->netdev, tx_qid, packets_, may_steal, > dynamic_txqs); > return; > @@ -5145,10 +5258,21 @@ dp_execute_cb(void *aux_, struct dp_packet_batch > *packets_, > struct dp_packet *packet; > DP_PACKET_BATCH_FOR_EACH (packet, packets_) { > packet->md.recirc_id = nl_attr_get_u32(a); > + > + if (!(packet->md.ct_state & CS_INVALID) && > + (packet->md.ct_state & CS_TRACKED) && > + (packet->md.ct_state & CS_ESTABLISHED)) { > + (*depth)++; > + emc_patch_established_conn(pmd, packet, now); > + (*depth)--; > + } > } > > (*depth)++; > dp_netdev_recirculate(pmd, packets_); > + DP_PACKET_BATCH_FOR_EACH (packet, packets_) { > + packet->md.recirc_id = 0; > + } > (*depth)--; > > return; > @@ -5166,6 +5290,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch > *packets_, > const char *helper = NULL; > const uint32_t *setmark = NULL; > const struct ovs_key_ct_labels *setlabel = NULL; > + bool skip_ct = false; > > NL_ATTR_FOR_EACH_UNSAFE (b, left, nl_attr_get(a), > nl_attr_get_size(a)) { > @@ -5194,6 +5319,9 @@ dp_execute_cb(void *aux_, struct dp_packet_batch > *packets_, > /* Silently ignored, as userspace datapath does not > generate > * netlink events. */ > break; > + case OVS_CT_ATTR_SKIP: > + skip_ct = true; > + break; > case OVS_CT_ATTR_NAT: > case OVS_CT_ATTR_UNSPEC: > case __OVS_CT_ATTR_MAX: > @@ -5201,6 +5329,10 @@ dp_execute_cb(void *aux_, struct dp_packet_batch > *packets_, > } > } > > + if (OVS_UNLIKELY(skip_ct)) { > + break; > + } > + > conntrack_execute(&dp->conntrack, packets_, aux->flow->dl_type, > force, > commit, zone, setmark, setlabel, helper); > break; > diff --git a/lib/packets.h b/lib/packets.h > index 7dbf6dd..2c46a15 100644 > --- a/lib/packets.h > +++ b/lib/packets.h > @@ -112,6 +112,8 @@ struct pkt_metadata { > struct flow_tnl tunnel; /* Encapsulating tunnel parameters. Note > that > * if 'ip_dst' == 0, the rest of the > fields may > * be uninitialized. */ > + void *prev_emc_entry; /* EMC entry that this packet matched. */ > + void *prev_flow; /* Flow pointed by the matching EMC > entry. */ > }; > > static inline void > -- > 2.4.11 > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
