Hi Ian, I've replied on the checkpatch issues inline below.
Thanks, Cian > -----Original Message----- > From: Stokes, Ian <[email protected]> > Sent: Tuesday 15 June 2021 18:26 > To: Ferriter, Cian <[email protected]>; [email protected]; Van > Haaren, Harry <[email protected]> > Cc: [email protected]; Gaetan Rivet <[email protected]>; Sriharsha > Basavapatna <[email protected]> > Subject: RE: [ovs-dev] [v12 01/16] dpif-netdev: Refactor to multiple header > files. > > > Hi Ian, > > > > Thanks for the review. My responses are inline. > > > > > -----Original Message----- > > > From: Stokes, Ian <[email protected]> > > > Sent: Wednesday 19 May 2021 16:54 > > > To: Ferriter, Cian <[email protected]>; [email protected] > > > Cc: [email protected] > > > Subject: RE: [ovs-dev] [v12 01/16] dpif-netdev: Refactor to multiple > > > header > > > files. > > > > > > > Split the very large file dpif-netdev.c and the datastructures > > > > it contains into multiple header files. Each header file is > > > > responsible for the datastructures of that component. > > > > > > > > This logical split allows better reuse and modularity of the code, > > > > and reduces the very large file dpif-netdev.c to be more managable. > > > > > > > > Due to dependencies between components, it is not possible to > > > > move component in smaller granularities than this patch. > > > > > > > > To explain the dependencies better, eg: > > > > > > > > DPCLS has no deps (from dpif-netdev.c file) > > > > FLOW depends on DPCLS (struct dpcls_rule) > > > > DFC depends on DPCLS (netdev_flow_key) and FLOW (netdev_flow_key) > > > > THREAD depends on DFC (struct dfc_cache) > > > > > > > > DFC_PROC depends on THREAD (struct pmd_thread) > > > > > > > > DPCLS lookup.h/c require only DPCLS > > > > DPCLS implementations require only dpif-netdev-lookup.h. > > > > - This change was made in 2.12 release with function pointers > > > > - This commit only refactors the name to "private-dpcls.h" > > > > > > > > Signed-off-by: Harry van Haaren <[email protected]> > > > > Co-authored-by: Cian Ferriter <[email protected]> > > > > Signed-off-by: Cian Ferriter <[email protected]> > > > > > > Hi Cian/Harry, > > > > > > Thanks for the patch. > > > > > > One risk I think we need to flag/discuss further with the community is > > > the > > > impact the refactor will have on other patches in progress and that is > > > best > > > dealt with. > > > > > > In an effort to help with this I may Cc folks who have patches that would > > > be > > > affected. > > > > > > Best to come with a plan WRT integration order because although > > > functionally little changes here there would be a wider impact to ongoing > > > patches. > > > > > > I have a few queries below inline below. > > > > > > > If you want me to help here by CC'ing some specific folks in the next > > version of > > the patch set, let me know. > > Sure, I've CCd some folks from Nvidia and Broadcom who may be interested. It > may be worth including them for this aspect of the > patch series going forward. > > > > > > > --- > > > > lib/automake.mk | 4 + > > > > lib/dpif-netdev-lookup-autovalidator.c | 1 - > > > > lib/dpif-netdev-lookup-avx512-gather.c | 1 - > > > > lib/dpif-netdev-lookup-generic.c | 1 - > > > > lib/dpif-netdev-lookup.h | 2 +- > > > > lib/dpif-netdev-private-dfc.h | 244 ++++++++++++ > > > > lib/dpif-netdev-private-dpcls.h | 129 ++++++ > > > > lib/dpif-netdev-private-flow.h | 162 ++++++++ > > > > lib/dpif-netdev-private-thread.h | 206 ++++++++++ > > > > lib/dpif-netdev-private.h | 100 +---- > > > > lib/dpif-netdev.c | 519 +------------------------ > > > > 11 files changed, 760 insertions(+), 609 deletions(-) > > > > create mode 100644 lib/dpif-netdev-private-dfc.h > > > > create mode 100644 lib/dpif-netdev-private-dpcls.h > > > > create mode 100644 lib/dpif-netdev-private-flow.h > > > > create mode 100644 lib/dpif-netdev-private-thread.h > > > > > > > > diff --git a/lib/automake.mk b/lib/automake.mk > > > > index 39901bd6d..9fa8712c3 100644 > > > > --- a/lib/automake.mk > > > > +++ b/lib/automake.mk > > > > @@ -111,6 +111,10 @@ lib_libopenvswitch_la_SOURCES = \ > > > > lib/dpif-netdev-lookup-generic.c \ > > > > lib/dpif-netdev.c \ > > > > lib/dpif-netdev.h \ > > > > +lib/dpif-netdev-private-dfc.h \ > > > > +lib/dpif-netdev-private-dpcls.h \ > > > > +lib/dpif-netdev-private-flow.h \ > > > > +lib/dpif-netdev-private-thread.h \ > > > > lib/dpif-netdev-private.h \ > > > > lib/dpif-netdev-perf.c \ > > > > lib/dpif-netdev-perf.h \ > > > > diff --git a/lib/dpif-netdev-lookup-autovalidator.c b/lib/dpif-netdev- > > > lookup- > > > > autovalidator.c > > > > index 97b59fdd0..475e1ab1e 100644 > > > > --- a/lib/dpif-netdev-lookup-autovalidator.c > > > > +++ b/lib/dpif-netdev-lookup-autovalidator.c > > > > @@ -17,7 +17,6 @@ > > > > #include <config.h> > > > > #include "dpif-netdev.h" > > > > #include "dpif-netdev-lookup.h" > > > > -#include "dpif-netdev-private.h" > > > > #include "openvswitch/vlog.h" > > > > > > > > VLOG_DEFINE_THIS_MODULE(dpif_lookup_autovalidator); > > > > diff --git a/lib/dpif-netdev-lookup-avx512-gather.c b/lib/dpif-netdev- > > > lookup- > > > > avx512-gather.c > > > > index 5e3634249..8fc1cdfa5 100644 > > > > --- a/lib/dpif-netdev-lookup-avx512-gather.c > > > > +++ b/lib/dpif-netdev-lookup-avx512-gather.c > > > > @@ -21,7 +21,6 @@ > > > > > > > > #include "dpif-netdev.h" > > > > #include "dpif-netdev-lookup.h" > > > > -#include "dpif-netdev-private.h" > > > > #include "cmap.h" > > > > #include "flow.h" > > > > #include "pvector.h" > > > > diff --git a/lib/dpif-netdev-lookup-generic.c b/lib/dpif-netdev-lookup- > > > > generic.c > > > > index b1a0cfc36..e3b6be4b6 100644 > > > > --- a/lib/dpif-netdev-lookup-generic.c > > > > +++ b/lib/dpif-netdev-lookup-generic.c > > > > @@ -17,7 +17,6 @@ > > > > > > > > #include <config.h> > > > > #include "dpif-netdev.h" > > > > -#include "dpif-netdev-private.h" > > > > #include "dpif-netdev-lookup.h" > > > > > > > > #include "bitmap.h" > > > > diff --git a/lib/dpif-netdev-lookup.h b/lib/dpif-netdev-lookup.h > > > > index bd72aa29b..59f51faa0 100644 > > > > --- a/lib/dpif-netdev-lookup.h > > > > +++ b/lib/dpif-netdev-lookup.h > > > > @@ -19,7 +19,7 @@ > > > > > > > > #include <config.h> > > > > #include "dpif-netdev.h" > > > > -#include "dpif-netdev-private.h" > > > > +#include "dpif-netdev-private-dpcls.h" > > > > > > > > /* Function to perform a probe for the subtable bit fingerprint. > > > > * Returns NULL if not valid, or a valid function pointer to call for > > > > this > > > > diff --git a/lib/dpif-netdev-private-dfc.h > > > > b/lib/dpif-netdev-private-dfc.h > > > > new file mode 100644 > > > > index 000000000..52349a3fc > > > > --- /dev/null > > > > +++ b/lib/dpif-netdev-private-dfc.h > > > > @@ -0,0 +1,244 @@ > > > > +/* > > > > + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc. > > > > + * Copyright (c) 2019, 2020, 2021 Intel Corporation. > > > > + * > > > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > > > + * you may not use this file except in compliance with the License. > > > > + * You may obtain a copy of the License at: > > > > + * > > > > + * http://www.apache.org/licenses/LICENSE-2.0 > > > > + * > > > > + * Unless required by applicable law or agreed to in writing, software > > > > + * distributed under the License is distributed on an "AS IS" BASIS, > > > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express > > > or > > > > implied. > > > > + * See the License for the specific language governing permissions and > > > > + * limitations under the License. > > > > + */ > > > > + > > > > +#ifndef DPIF_NETDEV_PRIVATE_DFC_H > > > > +#define DPIF_NETDEV_PRIVATE_DFC_H 1 > > > > + > > > > +#include <stdbool.h> > > > > +#include <stdint.h> > > > > + > > > > +#include "dpif.h" > > > > +#include "dpif-netdev-private-dpcls.h" > > > > +#include "dpif-netdev-private-flow.h" > > > > + > > > > +#ifdef __cplusplus > > > > +extern "C" { > > > > +#endif > > > > + > > > > +/* EMC cache and SMC cache compose the datapath flow cache (DFC) > > > > + * > > > > + * Exact match cache for frequently used flows > > > > + * > > > > + * The cache uses a 32-bit hash of the packet (which can be the RSS > > > > hash) > > > to > > > > + * search its entries for a miniflow that matches exactly the miniflow > > > > of > > > the > > > > + * packet. It stores the 'dpcls_rule' (rule) that matches the miniflow. > > > > + * > > > > + * A cache entry holds a reference to its 'dp_netdev_flow'. > > > > + * > > > > + * A miniflow with a given hash can be in one of EM_FLOW_HASH_SEGS > > > > different > > > > + * entries. The 32-bit hash is split into EM_FLOW_HASH_SEGS values > > > > (each > > > > of > > > > + * them is EM_FLOW_HASH_SHIFT bits wide and the remainder is thrown > > > > away). Each > > > > + * value is the index of a cache entry where the miniflow could be. > > > > + * > > > > + * > > > > + * Signature match cache (SMC) > > > > + * > > > > + * This cache stores a 16-bit signature for each flow without storing > > > > keys, > > > > and > > > > + * stores the corresponding 16-bit flow_table index to the > > > > 'dp_netdev_flow'. > > > > + * Each flow thus occupies 32bit which is much more memory efficient > > > than > > > > EMC. > > > > + * SMC uses a set-associative design that each bucket contains > > > > + * SMC_ENTRY_PER_BUCKET number of entries. > > > > + * Since 16-bit flow_table index is used, if there are more than 2^16 > > > > + * dp_netdev_flow, SMC will miss them that cannot be indexed by a 16- > > > bit > > > > value. > > > > + * > > > > + * > > > > + * Thread-safety > > > > + * ============= > > > > + * > > > > + * Each pmd_thread has its own private exact match cache. > > > > + * If dp_netdev_input is not called from a pmd thread, a mutex is used. > > > > + */ > > > > + > > > > +#define EM_FLOW_HASH_SHIFT 13 > > > > +#define EM_FLOW_HASH_ENTRIES (1u << EM_FLOW_HASH_SHIFT) > > > > +#define EM_FLOW_HASH_MASK (EM_FLOW_HASH_ENTRIES - 1) > > > > +#define EM_FLOW_HASH_SEGS 2 > > > > + > > > > +/* SMC uses a set-associative design. A bucket contains a set of > > > > entries > > > that > > > > + * a flow item can occupy. For now, it uses one hash function rather > > > > than > > > > two > > > > + * as for the EMC design. */ > > > > +#define SMC_ENTRY_PER_BUCKET 4 > > > > +#define SMC_ENTRIES (1u << 20) > > > > +#define SMC_BUCKET_CNT (SMC_ENTRIES / SMC_ENTRY_PER_BUCKET) > > > > +#define SMC_MASK (SMC_BUCKET_CNT - 1) > > > > + > > > > +/* Default EMC insert probability is 1 / > > > > DEFAULT_EM_FLOW_INSERT_INV_PROB */ > > > > +#define DEFAULT_EM_FLOW_INSERT_INV_PROB 100 > > > > +#define DEFAULT_EM_FLOW_INSERT_MIN (UINT32_MAX / \ > > > > + DEFAULT_EM_FLOW_INSERT_INV_PROB) > > > > + > > > > +struct emc_entry { > > > > + struct dp_netdev_flow *flow; > > > > + struct netdev_flow_key key; /* key.hash used for emc hash value. > > > > */ > > > > +}; > > > > + > > > > +struct emc_cache { > > > > + struct emc_entry entries[EM_FLOW_HASH_ENTRIES]; > > > > + int sweep_idx; /* For emc_cache_slow_sweep(). */ > > > > +}; > > > > + > > > > +struct smc_bucket { > > > > + uint16_t sig[SMC_ENTRY_PER_BUCKET]; > > > > + uint16_t flow_idx[SMC_ENTRY_PER_BUCKET]; > > > > +}; > > > > + > > > > +/* Signature match cache, differentiate from EMC cache */ > > > > +struct smc_cache { > > > > + struct smc_bucket buckets[SMC_BUCKET_CNT]; > > > > +}; > > > > + > > > > +struct dfc_cache { > > > > + struct emc_cache emc_cache; > > > > + struct smc_cache smc_cache; > > > > +}; > > > > + > > > > +/* Iterate in the exact match cache through every entry that might > > > contain a > > > > + * miniflow with hash 'HASH'. */ > > > > +#define EMC_FOR_EACH_POS_WITH_HASH(EMC, CURRENT_ENTRY, > > > HASH) > > > > > > I spotted a few checkpatch errors regarding control blocks for this > > > define as > > > well as a few others > > > > > > ERROR: Improper whitespace around control block > > > #231 FILE: lib/dpif-netdev-private-dfc.h:111: > > > #define EMC_FOR_EACH_POS_WITH_HASH(EMC, CURRENT_ENTRY, HASH) > > > I couldn't fix the above error. > > > ERROR: Improper whitespace around control block > > > #347 FILE: lib/dpif-netdev-private-dfc.h:227: > > > EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) { > > > I could fix this above error. > > > ERROR: Improper whitespace around control block > > > #465 FILE: lib/dpif-netdev-private-dpcls.h:95: > > > #define NETDEV_FLOW_KEY_FOR_EACH_IN_FLOWMAP(VALUE, KEY, > > > FLOWMAP) \ > > > I couldn't fix the above error. > > > ERROR: Inappropriate bracing around statement > > > #466 FILE: lib/dpif-netdev-private-dpcls.h:96: > > > MINIFLOW_FOR_EACH_IN_FLOWMAP (VALUE, &(KEY)->mf, FLOWMAP) > > > I couldn't fix the above error. > > > > > > My thinking here though is that it doesn't seem that you have changed the > > > code, so these were pre-existing? > > > > > > Is it an easy lift to fix so as to have a clear check patch? > > > > > > > Correct, these were pre-existing checkpatch errors, we have just moved the > > code without fixing. > > > > It is an easy list to fix these errors. I'll fix this for the next version. > > > > Thanks. > After trying to fix the errors, I think only one of them is valid. I could fix one of the 4 checkpatch errors. The others are false positives. There are 2 types: 1st can be fixed by adding a space between the macro name and the '('. This isn't what we want though, because it changes the macro from a function-like macro to an object-like macro and the code doesn't compile. I think checkpatch is looking for uses with "*FOR_EACH*" macros, not the definitions. ``` ERROR: Improper whitespace around control block #230 FILE: lib/dpif-netdev-private-dfc.h:111: #define EMC_FOR_EACH_POS_WITH_HASH(EMC, CURRENT_ENTRY, HASH) \ ``` 2nd expects a '{' because it things it's calling the *FOR_EACH* type macro, but we are defining it here. ``` ERROR: Inappropriate bracing around statement #465 FILE: lib/dpif-netdev-private-dpcls.h:96: MINIFLOW_FOR_EACH_IN_FLOWMAP (VALUE, &(KEY)->mf, FLOWMAP) ``` How do you suggest we proceed on these 3 errors. I'm not sure what checkpatch wants here. > > > > \ > > > > + for (uint32_t i__ = 0, srch_hash__ = (HASH); > > > > \ > > > > + (CURRENT_ENTRY) = &(EMC)->entries[srch_hash__ & > > > > EM_FLOW_HASH_MASK], \ > > > > + i__ < EM_FLOW_HASH_SEGS; > > > > \ > > > > + i__++, srch_hash__ >>= EM_FLOW_HASH_SHIFT) > > > > + > > > > +static inline bool > > > > +emc_entry_alive(struct emc_entry *ce) > > > > +{ > > > > + return ce->flow && !ce->flow->dead; > > > > +} > > > > + > > > > +static inline void > > > > +emc_clear_entry(struct emc_entry *ce) > > > > +{ > > > > + if (ce->flow) { > > > > + dp_netdev_flow_unref(ce->flow); > > > > + ce->flow = NULL; > > > > + } > > > > +} > > > > + > > > > +static inline void > > > > +smc_clear_entry(struct smc_bucket *b, int idx) > > > > +{ > > > > + b->flow_idx[idx] = UINT16_MAX; > > > > +} > > > > > > So for both SMC and EMC clear entry above, I seen you've changed the > > > functions to be inline. > > > > > > Was the motivation to change to inline performance based and if so did you > > > see any gains? > > > > > > The same question applies to any other functions that were changed to > > > inline > > > also. > > > > The reason inline was added to some of the functions when they were moved > > was to get rid of "unused-function" type GCC errors like the below: > > > > In file included from lib/dpif-netdev-private.h:31:0, > > from lib/dpif-netdev-avx512.c:27: > > lib/dpif-netdev-private-dfc.h:168:1: error: 'dfc_cache_init' defined but > > not used > > [-Werror=unused-function] > > dfc_cache_init(struct dfc_cache *flow_cache) > > ^~~~~~~~~~~~~~ > > > > After going back and checking, adding the "inline" keyword to get rid of > > these > > warnings isn't necessary in all cases. After looking at this again, a > > cleaner > > approach is to add these DFC functions to a "lib/dpif-netdev-private-dfc.c" > > file > > and clean up the "inline" keywords. > > > > I'll address this in the next version. > > > > Sounds good, thanks. > > > > > + > > > > +static inline void > > > > +emc_cache_init(struct emc_cache *flow_cache) > > > > +{ > > > > + int i; > > > > + > > > > + flow_cache->sweep_idx = 0; > > > > + for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) { > > > > + flow_cache->entries[i].flow = NULL; > > > > + flow_cache->entries[i].key.hash = 0; > > > > + flow_cache->entries[i].key.len = sizeof(struct miniflow); > > > > + flowmap_init(&flow_cache->entries[i].key.mf.map); > > > > + } > > > > +} > > > > + > > > > +static inline void > > > > +smc_cache_init(struct smc_cache *smc_cache) > > > > +{ > > > > + int i, j; > > > > + for (i = 0; i < SMC_BUCKET_CNT; i++) { > > > > + for (j = 0; j < SMC_ENTRY_PER_BUCKET; j++) { > > > > + smc_cache->buckets[i].flow_idx[j] = UINT16_MAX; > > > > + } > > > > + } > > > > +} > > > > + > > > > +static inline void > > > > +dfc_cache_init(struct dfc_cache *flow_cache) > > > > +{ > > > > + emc_cache_init(&flow_cache->emc_cache); > > > > + smc_cache_init(&flow_cache->smc_cache); > > > > +} > > > > + > > > > +static inline void > > > > +emc_cache_uninit(struct emc_cache *flow_cache) > > > > +{ > > > > + int i; > > > > + > > > > + for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) { > > > > + emc_clear_entry(&flow_cache->entries[i]); > > > > + } > > > > +} > > > > + > > > > +static inline void > > > > +smc_cache_uninit(struct smc_cache *smc) > > > > +{ > > > > + int i, j; > > > > + > > > > + for (i = 0; i < SMC_BUCKET_CNT; i++) { > > > > + for (j = 0; j < SMC_ENTRY_PER_BUCKET; j++) { > > > > + smc_clear_entry(&(smc->buckets[i]), j); > > > > + } > > > > + } > > > > +} > > > > + > > > > +static inline void > > > > +dfc_cache_uninit(struct dfc_cache *flow_cache) > > > > +{ > > > > + smc_cache_uninit(&flow_cache->smc_cache); > > > > + emc_cache_uninit(&flow_cache->emc_cache); > > > > +} > > > > + > > > > +/* Check and clear dead flow references slowly (one entry at each > > > > + * invocation). */ > > > > +static inline void > > > > +emc_cache_slow_sweep(struct emc_cache *flow_cache) > > > > +{ > > > > + struct emc_entry *entry = &flow_cache->entries[flow_cache- > > > > >sweep_idx]; > > > > + > > > > + if (!emc_entry_alive(entry)) { > > > > + emc_clear_entry(entry); > > > > + } > > > > + flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & > > > > EM_FLOW_HASH_MASK; > > > > +} > > > > + > > > > +/* Used to compare 'netdev_flow_key' in the exact match cache to a > > > > miniflow. > > > > + * The maps are compared bitwise, so both 'key->mf' and 'mf' must have > > > > been > > > > + * generated by miniflow_extract. */ > > > > +static inline bool > > > > +emc_flow_key_equal_mf(const struct netdev_flow_key *key, > > > > + const struct miniflow *mf) > > > > +{ > > > > + return !memcmp(&key->mf, mf, key->len); > > > > +} > > > > + > > > > +static inline struct dp_netdev_flow * > > > > +emc_lookup(struct emc_cache *cache, const struct netdev_flow_key > > > *key) > > > > +{ > > > > + struct emc_entry *current_entry; > > > > + > > > > + EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) { > > > > + if (current_entry->key.hash == key->hash > > > > + && emc_entry_alive(current_entry) > > > > + && emc_flow_key_equal_mf(¤t_entry->key, &key->mf)) { > > > > + > > > > + /* We found the entry with the 'key->mf' miniflow */ > > > > + return current_entry->flow; > > > > + } > > > > + } > > > > + > > > > + return NULL; > > > > +} > > > > + > > > > +#ifdef __cplusplus > > > > +} > > > > +#endif > > > > + > > > > +#endif /* dpif-netdev-private-dfc.h */ > > > > diff --git a/lib/dpif-netdev-private-dpcls.h b/lib/dpif-netdev-private- > > > dpcls.h > > > > new file mode 100644 > > > > index 000000000..f223a93e4 > > > > --- /dev/null > > > > +++ b/lib/dpif-netdev-private-dpcls.h > > > > @@ -0,0 +1,129 @@ > > > > +/* > > > > + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc. > > > > + * Copyright (c) 2019, 2020, 2021 Intel Corporation. > > > > + * > > > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > > > + * you may not use this file except in compliance with the License. > > > > + * You may obtain a copy of the License at: > > > > + * > > > > + * http://www.apache.org/licenses/LICENSE-2.0 > > > > + * > > > > + * Unless required by applicable law or agreed to in writing, software > > > > + * distributed under the License is distributed on an "AS IS" BASIS, > > > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express > > > or > > > > implied. > > > > + * See the License for the specific language governing permissions and > > > > + * limitations under the License. > > > > + */ > > > > + > > > > +#ifndef DPIF_NETDEV_PRIVATE_DPCLS_H > > > > +#define DPIF_NETDEV_PRIVATE_DPCLS_H 1 > > > > + > > > > +#include <stdbool.h> > > > > +#include <stdint.h> > > > > + > > > > +#include "dpif.h" > > > > +#include "cmap.h" > > > > +#include "openvswitch/thread.h" > > > > + > > > > +#ifdef __cplusplus > > > > +extern "C" { > > > > +#endif > > > > + > > > > +/* Forward declaration for lookup_func typedef. */ > > > > +struct dpcls_subtable; > > > > +struct dpcls_rule; > > > > + > > > > +/* Must be public as it is instantiated in subtable struct below. */ > > > > +struct netdev_flow_key { > > > > + uint32_t hash; /* Hash function differs for different users. > > > > */ > > > > + uint32_t len; /* Length of the following miniflow (incl. > > > > map). */ > > > > + struct miniflow mf; > > > > + uint64_t buf[FLOW_MAX_PACKET_U64S]; > > > > +}; > > > > + > > > > +/* A rule to be inserted to the classifier. */ > > > > +struct dpcls_rule { > > > > + struct cmap_node cmap_node; /* Within struct dpcls_subtable > > > > 'rules'. > > > */ > > > > + struct netdev_flow_key *mask; /* Subtable's mask. */ > > > > + struct netdev_flow_key flow; /* Matching key. */ > > > > + /* 'flow' must be the last field, additional space is allocated > > > > here. */ > > > > +}; > > > > + > > > > +/* Lookup function for a subtable in the dpcls. This function is called > > > > + * by each subtable with an array of packets, and a bitmask of packets > > > > to > > > > + * perform the lookup on. Using a function pointer gives flexibility to > > > > + * optimize the lookup function based on subtable properties and the > > > > + * CPU instruction set available at runtime. > > > > + */ > > > > +typedef > > > > +uint32_t (*dpcls_subtable_lookup_func)(struct dpcls_subtable > > > *subtable, > > > > + uint32_t keys_map, > > > > + const struct netdev_flow_key > > > > *keys[], > > > > + struct dpcls_rule **rules); > > > > + > > > > +/* A set of rules that all have the same fields wildcarded. */ > > > > +struct dpcls_subtable { > > > > + /* The fields are only used by writers. */ > > > > + struct cmap_node cmap_node OVS_GUARDED; /* Within dpcls > > > > 'subtables_map'. */ > > > > + > > > > + /* These fields are accessed by readers. */ > > > > + struct cmap rules; /* Contains "struct dpcls_rule"s. */ > > > > + uint32_t hit_cnt; /* Number of match hits in subtable > > > > in current > > > > + optimization interval. */ > > > > + > > > > + /* Miniflow fingerprint that the subtable matches on. The miniflow > > > "bits" > > > > + * are used to select the actual dpcls lookup implementation at > > > > subtable > > > > + * creation time. > > > > + */ > > > > + uint8_t mf_bits_set_unit0; > > > > + uint8_t mf_bits_set_unit1; > > > > + > > > > + /* The lookup function to use for this subtable. If there is a > > > > known > > > > + * property of the subtable (eg: only 3 bits of miniflow metadata > > > > is > > > > + * used for the lookup) then this can point at an optimized > > > > version of > > > > + * the lookup function for this particular subtable. */ > > > > + dpcls_subtable_lookup_func lookup_func; > > > > + > > > > + /* Caches the masks to match a packet to, reducing runtime > > > calculations. > > > > */ > > > > + uint64_t *mf_masks; > > > > + > > > > + struct netdev_flow_key mask; /* Wildcards for fields (const). */ > > > > + /* 'mask' must be the last field, additional space is allocated > > > > here. */ > > > > +}; > > > > + > > > > +/* Iterate through netdev_flow_key TNL u64 values specified by > > > > 'FLOWMAP'. */ > > > > +#define NETDEV_FLOW_KEY_FOR_EACH_IN_FLOWMAP(VALUE, KEY, > > > > FLOWMAP) \ > > > > + MINIFLOW_FOR_EACH_IN_FLOWMAP (VALUE, &(KEY)->mf, > > > FLOWMAP) > > > > + > > > > +/* Generates a mask for each bit set in the subtable's miniflow. */ > > > > +void > > > > +netdev_flow_key_gen_masks(const struct netdev_flow_key *tbl, > > > > + uint64_t *mf_masks, > > > > + const uint32_t mf_bits_u0, > > > > + const uint32_t mf_bits_u1); > > > > + > > > > +/* Matches a dpcls rule against the incoming packet in 'target' */ > > > > +bool dpcls_rule_matches_key(const struct dpcls_rule *rule, > > > > + const struct netdev_flow_key *target); > > > > + > > > > +static inline uint32_t > > > > +dpif_netdev_packet_get_rss_hash_orig_pkt(struct dp_packet *packet, > > > > + const struct miniflow *mf) > > > > +{ > > > > + uint32_t hash; > > > > + > > > > + if (OVS_LIKELY(dp_packet_rss_valid(packet))) { > > > > + hash = dp_packet_get_rss_hash(packet); > > > > + } else { > > > > + hash = miniflow_hash_5tuple(mf, 0); > > > > + dp_packet_set_rss_hash(packet, hash); > > > > + } > > > > + > > > > + return hash; > > > > +} > > > > + > > > > +#ifdef __cplusplus > > > > +} > > > > +#endif > > > > + > > > > +#endif /* dpif-netdev-private-dpcls.h */ > > > > diff --git a/lib/dpif-netdev-private-flow.h > > > > b/lib/dpif-netdev-private-flow.h > > > > new file mode 100644 > > > > index 000000000..027d68e0b > > > > --- /dev/null > > > > +++ b/lib/dpif-netdev-private-flow.h > > > > @@ -0,0 +1,162 @@ > > > > +/* > > > > + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc. > > > > + * Copyright (c) 2019, 2020, 2021 Intel Corporation. > > > > + * > > > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > > > + * you may not use this file except in compliance with the License. > > > > + * You may obtain a copy of the License at: > > > > + * > > > > + * http://www.apache.org/licenses/LICENSE-2.0 > > > > + * > > > > + * Unless required by applicable law or agreed to in writing, software > > > > + * distributed under the License is distributed on an "AS IS" BASIS, > > > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express > > > or > > > > implied. > > > > + * See the License for the specific language governing permissions and > > > > + * limitations under the License. > > > > + */ > > > > + > > > > +#ifndef DPIF_NETDEV_PRIVATE_FLOW_H > > > > +#define DPIF_NETDEV_PRIVATE_FLOW_H 1 > > > > + > > > > +#include <stdbool.h> > > > > +#include <stdint.h> > > > > + > > > > +#include "dpif.h" > > > > +#include "dpif-netdev-private-dpcls.h" > > > > +#include "cmap.h" > > > > +#include "openvswitch/thread.h" > > > > + > > > > +#ifdef __cplusplus > > > > +extern "C" { > > > > +#endif > > > > + > > > > +/* Contained by struct dp_netdev_flow's 'stats' member. */ > > > > +struct dp_netdev_flow_stats { > > > > + atomic_llong used; /* Last used time, in monotonic > > > > msecs. */ > > > > + atomic_ullong packet_count; /* Number of packets matched. */ > > > > + atomic_ullong byte_count; /* Number of bytes matched. */ > > > > + atomic_uint16_t tcp_flags; /* Bitwise-OR of seen tcp_flags > > > > values. */ > > > > +}; > > > > + > > > > +/* Contained by struct dp_netdev_flow's 'last_attrs' member. */ > > > > +struct dp_netdev_flow_attrs { > > > > + atomic_bool offloaded; /* True if flow is offloaded to HW. > > > > */ > > > > + ATOMIC(const char *) dp_layer; /* DP layer the flow is handled in. > > > > */ > > > > +}; > > > > + > > > > +/* A flow in 'dp_netdev_pmd_thread's 'flow_table'. > > > > + * > > > > + * > > > > + * Thread-safety > > > > + * ============= > > > > + * > > > > + * Except near the beginning or ending of its lifespan, rule 'rule' > > > > belongs to > > > > + * its pmd thread's classifier. The text below calls this classifier > > > > 'cls'. > > > > + * > > > > + * Motivation > > > > + * ---------- > > > > + * > > > > + * The thread safety rules described here for "struct dp_netdev_flow" > > > > are > > > > + * motivated by two goals: > > > > + * > > > > + * - Prevent threads that read members of "struct dp_netdev_flow" > > > from > > > > + * reading bad data due to changes by some thread concurrently > > > > modifying > > > > + * those members. > > > > + * > > > > + * - Prevent two threads making changes to members of a given > > > > "struct > > > > + * dp_netdev_flow" from interfering with each other. > > > > + * > > > > + * > > > > + * Rules > > > > + * ----- > > > > + * > > > > + * A flow 'flow' may be accessed without a risk of being freed during > > > > an > > > RCU > > > > + * grace period. Code that needs to hold onto a flow for a while > > > > + * should try incrementing 'flow->ref_cnt' with dp_netdev_flow_ref(). > > > > + * > > > > + * 'flow->ref_cnt' protects 'flow' from being freed. It doesn't > > > > protect the > > > > + * flow from being deleted from 'cls' and it doesn't protect members of > > > > 'flow' > > > > + * from modification. > > > > + * > > > > + * Some members, marked 'const', are immutable. Accessing other > > > > members > > > > + * requires synchronization, as noted in more detail below. > > > > + */ > > > > +struct dp_netdev_flow { > > > > + const struct flow flow; /* Unmasked flow that created this > > > > entry. */ > > > > + /* Hash table index by unmasked flow. */ > > > > + const struct cmap_node node; /* In owning dp_netdev_pmd_thread's > > > */ > > > > + /* 'flow_table'. */ > > > > + const struct cmap_node mark_node; /* In owning flow_mark's > > > > mark_to_flow */ > > > > + const ovs_u128 ufid; /* Unique flow identifier. */ > > > > + const ovs_u128 mega_ufid; /* Unique mega flow identifier. */ > > > > + const unsigned pmd_id; /* The 'core_id' of pmd thread owning > > > > this > > > */ > > > > + /* flow. */ > > > > + > > > > + /* Number of references. > > > > + * The classifier owns one reference. > > > > + * Any thread trying to keep a rule from being freed should hold > > > > its own > > > > + * reference. */ > > > > + struct ovs_refcount ref_cnt; > > > > + > > > > + bool dead; > > > > + uint32_t mark; /* Unique flow mark assigned to a > > > > flow */ > > > > + > > > > + /* Statistics. */ > > > > + struct dp_netdev_flow_stats stats; > > > > + > > > > + /* Statistics and attributes received from the netdev offload > > > > provider. > > > */ > > > > + atomic_int netdev_flow_get_result; > > > > + struct dp_netdev_flow_stats last_stats; > > > > + struct dp_netdev_flow_attrs last_attrs; > > > > + > > > > + /* Actions. */ > > > > + OVSRCU_TYPE(struct dp_netdev_actions *) actions; > > > > + > > > > + /* While processing a group of input packets, the datapath uses the > > > next > > > > + * member to store a pointer to the output batch for the flow. It > > > > is > > > > + * reset after the batch has been sent out (See > > > > dp_netdev_queue_batches(), > > > > + * packet_batch_per_flow_init() and > > > > packet_batch_per_flow_execute()). */ > > > > + struct packet_batch_per_flow *batch; > > > > + > > > > + /* Packet classification. */ > > > > + char *dp_extra_info; /* String to return in a flow > > > > dump/get. */ > > > > + struct dpcls_rule cr; /* In owning dp_netdev's 'cls'. */ > > > > + /* 'cr' must be the last member. */ > > > > +}; > > > > + > > > > +static inline uint32_t > > > > +dp_netdev_flow_hash(const ovs_u128 *ufid) > > > > +{ > > > > + return ufid->u32[0]; > > > > +} > > > > + > > > > +/* Given the number of bits set in miniflow's maps, returns the size > > > > of the > > > > + * 'netdev_flow_key.mf' */ > > > > +static inline size_t > > > > +netdev_flow_key_size(size_t flow_u64s) > > > > +{ > > > > + return sizeof(struct miniflow) + MINIFLOW_VALUES_SIZE(flow_u64s); > > > > +} > > > > + > > > > +/* forward declaration required for EMC to unref flows */ > > > > +void dp_netdev_flow_unref(struct dp_netdev_flow *); > > > > + > > > > +/* A set of datapath actions within a "struct dp_netdev_flow". > > > > + * > > > > + * > > > > + * Thread-safety > > > > + * ============= > > > > + * > > > > + * A struct dp_netdev_actions 'actions' is protected with RCU. */ > > > > +struct dp_netdev_actions { > > > > + /* These members are immutable: they do not change during the > > > struct's > > > > + * lifetime. */ > > > > + unsigned int size; /* Size of 'actions', in bytes. */ > > > > + struct nlattr actions[]; /* Sequence of OVS_ACTION_ATTR_* > > > attributes. > > > > */ > > > > +}; > > > > + > > > > +#ifdef __cplusplus > > > > +} > > > > +#endif > > > > + > > > > +#endif /* dpif-netdev-private-flow.h */ > > > > diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private- > > > > thread.h > > > > new file mode 100644 > > > > index 000000000..5e5308b96 > > > > --- /dev/null > > > > +++ b/lib/dpif-netdev-private-thread.h > > > > @@ -0,0 +1,206 @@ > > > > +/* > > > > + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc. > > > > + * Copyright (c) 2019, 2020, 2021 Intel Corporation. > > > > + * > > > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > > > + * you may not use this file except in compliance with the License. > > > > + * You may obtain a copy of the License at: > > > > + * > > > > + * http://www.apache.org/licenses/LICENSE-2.0 > > > > + * > > > > + * Unless required by applicable law or agreed to in writing, software > > > > + * distributed under the License is distributed on an "AS IS" BASIS, > > > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express > > > or > > > > implied. > > > > + * See the License for the specific language governing permissions and > > > > + * limitations under the License. > > > > + */ > > > > + > > > > +#ifndef DPIF_NETDEV_PRIVATE_THREAD_H > > > > +#define DPIF_NETDEV_PRIVATE_THREAD_H 1 > > > > + > > > > +#include <stdbool.h> > > > > +#include <stdint.h> > > > > + > > > > +#include "dpif.h" > > > > +#include "cmap.h" > > > > + > > > > +#include "dpif-netdev-private-dfc.h" > > > > +#include "dpif-netdev-perf.h" > > > > +#include "openvswitch/thread.h" > > > > + > > > > +#ifdef __cplusplus > > > > +extern "C" { > > > > +#endif > > > > + > > > > +/* PMD Thread Structures */ > > > > + > > > > +/* A set of properties for the current processing loop that is not > > > > directly > > > > + * associated with the pmd thread itself, but with the packets being > > > > + * processed or the short-term system configuration (for example, > > > > time). > > > > + * Contained by struct dp_netdev_pmd_thread's 'ctx' member. */ > > > > +struct dp_netdev_pmd_thread_ctx { > > > > + /* Latest measured time. See 'pmd_thread_ctx_time_update()'. */ > > > > + long long now; > > > > + /* RX queue from which last packet was received. */ > > > > + struct dp_netdev_rxq *last_rxq; > > > > + /* EMC insertion probability context for the current processing > > > > cycle. */ > > > > + uint32_t emc_insert_min; > > > > +}; > > > > + > > > > +/* PMD: Poll modes drivers. PMD accesses devices via polling to > > > > eliminate > > > > + * the performance overhead of interrupt processing. Therefore netdev > > > > can > > > > + * not implement rx-wait for these devices. dpif-netdev needs to poll > > > > + * these device to check for recv buffer. pmd-thread does polling for > > > > + * devices assigned to itself. > > > > + * > > > > + * DPDK used PMD for accessing NIC. > > > > + * > > > > + * Note, instance with cpu core id NON_PMD_CORE_ID will be reserved > > > for > > > > + * I/O of all non-pmd threads. There will be no actual thread created > > > > + * for the instance. > > > > + * > > > > + * Each struct has its own flow cache and classifier per managed > > > > ingress > > > > port. > > > > + * For packets received on ingress port, a look up is done on > > > corresponding > > > > PMD > > > > + * thread's flow cache and in case of a miss, lookup is performed in > > > > the > > > > + * corresponding classifier of port. Packets are executed with the > > > > found > > > > + * actions in either case. > > > > + * */ > > > > +struct dp_netdev_pmd_thread { > > > > + struct dp_netdev *dp; > > > > + struct ovs_refcount ref_cnt; /* Every reference must be > > > > refcount'ed. > > > */ > > > > + struct cmap_node node; /* In 'dp->poll_threads'. */ > > > > + > > > > + /* Per thread exact-match cache. Note, the instance for cpu core > > > > + * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly > > > > + * need to be protected by 'non_pmd_mutex'. Every other instance > > > > + * will only be accessed by its own pmd thread. */ > > > > + OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct dfc_cache flow_cache; > > > > + > > > > + /* Flow-Table and classifiers > > > > + * > > > > + * Writers of 'flow_table' must take the 'flow_mutex'. > > > > Corresponding > > > > + * changes to 'classifiers' must be made while still holding the > > > > + * 'flow_mutex'. > > > > + */ > > > > + struct ovs_mutex flow_mutex; > > > > + struct cmap flow_table OVS_GUARDED; /* Flow table. */ > > > > + > > > > + /* One classifier per in_port polled by the pmd */ > > > > + struct cmap classifiers; > > > > + /* Periodically sort subtable vectors according to hit frequencies > > > > */ > > > > + long long int next_optimization; > > > > + /* End of the next time interval for which processing cycles > > > > + are stored for each polled rxq. */ > > > > + long long int rxq_next_cycle_store; > > > > + > > > > + /* Last interval timestamp. */ > > > > + uint64_t intrvl_tsc_prev; > > > > + /* Last interval cycles. */ > > > > + atomic_ullong intrvl_cycles; > > > > + > > > > + /* Current context of the PMD thread. */ > > > > + struct dp_netdev_pmd_thread_ctx ctx; > > > > + > > > > + struct seq *reload_seq; > > > > + uint64_t last_reload_seq; > > > > + > > > > + /* These are atomic variables used as a synchronization and > > > configuration > > > > + * points for thread reload/exit. > > > > + * > > > > + * 'reload' atomic is the main one and it's used as a memory > > > > + * synchronization point for all other knobs and data. > > > > + * > > > > + * For a thread that requests PMD reload: > > > > + * > > > > + * * All changes that should be visible to the PMD thread must > > > > be made > > > > + * before setting the 'reload'. These changes could use any > > > > memory > > > > + * ordering model including 'relaxed'. > > > > + * * Setting the 'reload' atomic should occur in the same thread > > > > where > > > > + * all other PMD configuration options updated. > > > > + * * Setting the 'reload' atomic should be done with 'release' > > > > memory > > > > + * ordering model or stricter. This will guarantee that all > > > > previous > > > > + * changes (including non-atomic and 'relaxed') will be > > > > visible to > > > > + * the PMD thread. > > > > + * * To check that reload is done, thread should poll the > > > > 'reload' atomic > > > > + * to become 'false'. Polling should be done with 'acquire' > > > > memory > > > > + * ordering model or stricter. This ensures that PMD thread > > > completed > > > > + * the reload process. > > > > + * > > > > + * For the PMD thread: > > > > + * > > > > + * * PMD thread should read 'reload' atomic with 'acquire' memory > > > > + * ordering model or stricter. This will guarantee that all > > > > changes > > > > + * made before setting the 'reload' in the requesting thread > > > > will be > > > > + * visible to the PMD thread. > > > > + * * All other configuration data could be read with any memory > > > > + * ordering model (including non-atomic and 'relaxed') but > > > > *only > > > after* > > > > + * reading the 'reload' atomic set to 'true'. > > > > + * * When the PMD reload done, PMD should (optionally) set all > > > > the > > > > below > > > > + * knobs except the 'reload' to their default ('false') values > > > > and > > > > + * (mandatory), as the last step, set the 'reload' to 'false' > > > > using > > > > + * 'release' memory ordering model or stricter. This will > > > > inform the > > > > + * requesting thread that PMD has completed a reload cycle. > > > > + */ > > > > + atomic_bool reload; /* Do we need to reload ports? */ > > > > + atomic_bool wait_for_reload; /* Can we busy wait for the next > > > reload? > > > > */ > > > > + atomic_bool reload_tx_qid; /* Do we need to reload > > > > static_tx_qid? > > > */ > > > > + atomic_bool exit; /* For terminating the pmd thread. > > > > */ > > > > + > > > > + pthread_t thread; > > > > + unsigned core_id; /* CPU core id of this pmd thread. > > > > */ > > > > + int numa_id; /* numa node id of this pmd > > > > thread. */ > > > > + bool isolated; > > > > + > > > > + /* Queue id used by this pmd thread to send packets on all netdevs > > > > if > > > > + * XPS disabled for this netdev. All static_tx_qid's are unique > > > > and less > > > > + * than 'cmap_count(dp->poll_threads)'. */ > > > > + uint32_t static_tx_qid; > > > > + > > > > + /* Number of filled output batches. */ > > > > + int n_output_batches; > > > > + > > > > + struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and > > > > 'tx_ports'. > > > */ > > > > + /* List of rx queues to poll. */ > > > > + struct hmap poll_list OVS_GUARDED; > > > > + /* Map of 'tx_port's used for transmission. Written by the main > > > > thread, > > > > + * read by the pmd thread. */ > > > > + struct hmap tx_ports OVS_GUARDED; > > > > + > > > > + struct ovs_mutex bond_mutex; /* Protects updates of 'tx_bonds'. > > > > */ > > > > + /* Map of 'tx_bond's used for transmission. Written by the main > > > > thread > > > > + * and read by the pmd thread. */ > > > > + struct cmap tx_bonds; > > > > + > > > > + /* These are thread-local copies of 'tx_ports'. One contains only > > > > tunnel > > > > + * ports (that support push_tunnel/pop_tunnel), the other contains > > > ports > > > > + * with at least one txq (that support send). A port can be in > > > > both. > > > > + * > > > > + * There are two separate maps to make sure that we don't try to > > > execute > > > > + * OUTPUT on a device which has 0 txqs or PUSH/POP on a non-tunnel > > > > device. > > > > + * > > > > + * The instances for cpu core NON_PMD_CORE_ID can be accessed by > > > > multiple > > > > + * threads, and thusly need to be protected by 'non_pmd_mutex'. > > > Every > > > > + * other instance will only be accessed by its own pmd thread. */ > > > > + struct hmap tnl_port_cache; > > > > + struct hmap send_port_cache; > > > > + > > > > + /* Keep track of detailed PMD performance statistics. */ > > > > + struct pmd_perf_stats perf_stats; > > > > + > > > > + /* Stats from previous iteration used by automatic pmd > > > > + * load balance logic. */ > > > > + uint64_t prev_stats[PMD_N_STATS]; > > > > + atomic_count pmd_overloaded; > > > > + > > > > + /* Set to true if the pmd thread needs to be reloaded. */ > > > > + bool need_reload; > > > > + > > > > + /* Next time when PMD should try RCU quiescing. */ > > > > + long long next_rcu_quiesce; > > > > +}; > > > > + > > > > +#ifdef __cplusplus > > > > +} > > > > +#endif > > > > + > > > > +#endif /* dpif-netdev-private-thread.h */ > > > > diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h > > > > index 4fda1220b..d7b6fd7ec 100644 > > > > --- a/lib/dpif-netdev-private.h > > > > +++ b/lib/dpif-netdev-private.h > > > > @@ -18,95 +18,17 @@ > > > > #ifndef DPIF_NETDEV_PRIVATE_H > > > > #define DPIF_NETDEV_PRIVATE_H 1 > > > > > > > > -#include <stdbool.h> > > > > -#include <stdint.h> > > > > - > > > > -#include "dpif.h" > > > > -#include "cmap.h" > > > > - > > > > -#ifdef __cplusplus > > > > -extern "C" { > > > > -#endif > > > > - > > > > -/* Forward declaration for lookup_func typedef. */ > > > > -struct dpcls_subtable; > > > > -struct dpcls_rule; > > > > - > > > > -/* Must be public as it is instantiated in subtable struct below. */ > > > > -struct netdev_flow_key { > > > > - uint32_t hash; /* Hash function differs for different users. > > > > */ > > > > - uint32_t len; /* Length of the following miniflow (incl. > > > > map). */ > > > > - struct miniflow mf; > > > > - uint64_t buf[FLOW_MAX_PACKET_U64S]; > > > > -}; > > > > - > > > > -/* A rule to be inserted to the classifier. */ > > > > -struct dpcls_rule { > > > > - struct cmap_node cmap_node; /* Within struct dpcls_subtable > > > > 'rules'. > > > */ > > > > - struct netdev_flow_key *mask; /* Subtable's mask. */ > > > > - struct netdev_flow_key flow; /* Matching key. */ > > > > - /* 'flow' must be the last field, additional space is allocated > > > > here. */ > > > > -}; > > > > - > > > > -/* Lookup function for a subtable in the dpcls. This function is called > > > > - * by each subtable with an array of packets, and a bitmask of packets > > > > to > > > > - * perform the lookup on. Using a function pointer gives flexibility to > > > > - * optimize the lookup function based on subtable properties and the > > > > - * CPU instruction set available at runtime. > > > > +/* This header includes the various dpif-netdev components' header > > > > + * files in the appropriate order. Unfortunately there is a strict > > > > + * requirement in the include order due to dependences between > > > > components. > > > > + * E.g: > > > > + * DFC/EMC/SMC requires the netdev_flow_key struct > > > > + * PMD thread requires DFC_flow struct > > > > + * > > > > */ > > > > -typedef > > > > -uint32_t (*dpcls_subtable_lookup_func)(struct dpcls_subtable *subtable, > > > > - uint32_t keys_map, > > > > - const struct netdev_flow_key > > > > *keys[], > > > > - struct dpcls_rule **rules); > > > > - > > > > -/* A set of rules that all have the same fields wildcarded. */ > > > > -struct dpcls_subtable { > > > > - /* The fields are only used by writers. */ > > > > - struct cmap_node cmap_node OVS_GUARDED; /* Within dpcls > > > > 'subtables_map'. */ > > > > - > > > > - /* These fields are accessed by readers. */ > > > > - struct cmap rules; /* Contains "struct dpcls_rule"s. */ > > > > - uint32_t hit_cnt; /* Number of match hits in subtable > > > > in current > > > > - optimization interval. */ > > > > - > > > > - /* Miniflow fingerprint that the subtable matches on. The miniflow > > > "bits" > > > > - * are used to select the actual dpcls lookup implementation at > > > > subtable > > > > - * creation time. > > > > - */ > > > > - uint8_t mf_bits_set_unit0; > > > > - uint8_t mf_bits_set_unit1; > > > > - > > > > - /* The lookup function to use for this subtable. If there is a > > > > known > > > > - * property of the subtable (eg: only 3 bits of miniflow metadata > > > > is > > > > - * used for the lookup) then this can point at an optimized > > > > version of > > > > - * the lookup function for this particular subtable. */ > > > > - dpcls_subtable_lookup_func lookup_func; > > > > - > > > > - /* Caches the masks to match a packet to, reducing runtime > > > > calculations. > > > > */ > > > > - uint64_t *mf_masks; > > > > - > > > > - struct netdev_flow_key mask; /* Wildcards for fields (const). */ > > > > - /* 'mask' must be the last field, additional space is allocated > > > > here. */ > > > > -}; > > > > - > > > > -/* Iterate through netdev_flow_key TNL u64 values specified by > > > > 'FLOWMAP'. */ > > > > -#define NETDEV_FLOW_KEY_FOR_EACH_IN_FLOWMAP(VALUE, KEY, > > > > FLOWMAP) \ > > > > - MINIFLOW_FOR_EACH_IN_FLOWMAP (VALUE, &(KEY)->mf, > > > FLOWMAP) > > > > - > > > > -/* Generates a mask for each bit set in the subtable's miniflow. */ > > > > -void > > > > -netdev_flow_key_gen_masks(const struct netdev_flow_key *tbl, > > > > - uint64_t *mf_masks, > > > > - const uint32_t mf_bits_u0, > > > > - const uint32_t mf_bits_u1); > > > > - > > > > -/* Matches a dpcls rule against the incoming packet in 'target' */ > > > > -bool dpcls_rule_matches_key(const struct dpcls_rule *rule, > > > > - const struct netdev_flow_key *target); > > > > - > > > > -#ifdef __cplusplus > > > > -} > > > > -#endif > > > > +#include "dpif-netdev-private-flow.h" > > > > +#include "dpif-netdev-private-dpcls.h" > > > > +#include "dpif-netdev-private-dfc.h" > > > > +#include "dpif-netdev-private-thread.h" > > > > > > > > #endif /* netdev-private.h */ > > > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > > > > index 251788b04..298bfe444 100644 > > > > --- a/lib/dpif-netdev.c > > > > +++ b/lib/dpif-netdev.c > > > > @@ -17,6 +17,7 @@ > > > > #include <config.h> > > > > #include "dpif-netdev.h" > > > > #include "dpif-netdev-private.h" > > > > +#include "dpif-netdev-private-dfc.h" > > > > > > > > #include <ctype.h> > > > > #include <errno.h> > > > > @@ -44,6 +45,7 @@ > > > > #include "dpif.h" > > > > #include "dpif-netdev-lookup.h" > > > > #include "dpif-netdev-perf.h" > > > > +#include "dpif-netdev-private-dfc.h" > > > > > > Is the include above a duplicate? Looks like it was added above at line > > > 17 also > > > in the same file? > > > > > > > Yes, good spot, this is a duplicate. I'll remove this in the next version. > > Thanks. > > > > > > > #include "dpif-provider.h" > > > > #include "dummy.h" > > > > #include "fat-rwlock.h" > > > > @@ -142,90 +144,6 @@ static struct odp_support dp_netdev_support = { > > > > .ct_orig_tuple6 = true, > > > > }; > > > > > > > > -/* EMC cache and SMC cache compose the datapath flow cache (DFC) > > > > - * > > > > - * Exact match cache for frequently used flows > > > > - * > > > > - * The cache uses a 32-bit hash of the packet (which can be the RSS > > > > hash) > > > to > > > > - * search its entries for a miniflow that matches exactly the miniflow > > > > of the > > > > - * packet. It stores the 'dpcls_rule' (rule) that matches the miniflow. > > > > - * > > > > - * A cache entry holds a reference to its 'dp_netdev_flow'. > > > > - * > > > > - * A miniflow with a given hash can be in one of EM_FLOW_HASH_SEGS > > > > different > > > > - * entries. The 32-bit hash is split into EM_FLOW_HASH_SEGS values > > > > (each > > > > of > > > > - * them is EM_FLOW_HASH_SHIFT bits wide and the remainder is thrown > > > > away). Each > > > > - * value is the index of a cache entry where the miniflow could be. > > > > - * > > > > - * > > > > - * Signature match cache (SMC) > > > > - * > > > > - * This cache stores a 16-bit signature for each flow without storing > > > > keys, > > > and > > > > - * stores the corresponding 16-bit flow_table index to the > > > > 'dp_netdev_flow'. > > > > - * Each flow thus occupies 32bit which is much more memory efficient > > > than > > > > EMC. > > > > - * SMC uses a set-associative design that each bucket contains > > > > - * SMC_ENTRY_PER_BUCKET number of entries. > > > > - * Since 16-bit flow_table index is used, if there are more than 2^16 > > > > - * dp_netdev_flow, SMC will miss them that cannot be indexed by a > > > > 16-bit > > > > value. > > > > - * > > > > - * > > > > - * Thread-safety > > > > - * ============= > > > > - * > > > > - * Each pmd_thread has its own private exact match cache. > > > > - * If dp_netdev_input is not called from a pmd thread, a mutex is used. > > > > - */ > > > > - > > > > -#define EM_FLOW_HASH_SHIFT 13 > > > > -#define EM_FLOW_HASH_ENTRIES (1u << EM_FLOW_HASH_SHIFT) > > > > -#define EM_FLOW_HASH_MASK (EM_FLOW_HASH_ENTRIES - 1) > > > > -#define EM_FLOW_HASH_SEGS 2 > > > > - > > > > -/* SMC uses a set-associative design. A bucket contains a set of > > > > entries > > > that > > > > - * a flow item can occupy. For now, it uses one hash function rather > > > > than > > > > two > > > > - * as for the EMC design. */ > > > > -#define SMC_ENTRY_PER_BUCKET 4 > > > > -#define SMC_ENTRIES (1u << 20) > > > > -#define SMC_BUCKET_CNT (SMC_ENTRIES / SMC_ENTRY_PER_BUCKET) > > > > -#define SMC_MASK (SMC_BUCKET_CNT - 1) > > > > - > > > > -/* Default EMC insert probability is 1 / > > > > DEFAULT_EM_FLOW_INSERT_INV_PROB */ > > > > -#define DEFAULT_EM_FLOW_INSERT_INV_PROB 100 > > > > -#define DEFAULT_EM_FLOW_INSERT_MIN (UINT32_MAX / \ > > > > - DEFAULT_EM_FLOW_INSERT_INV_PROB) > > > > - > > > > -struct emc_entry { > > > > - struct dp_netdev_flow *flow; > > > > - struct netdev_flow_key key; /* key.hash used for emc hash value. > > > > */ > > > > -}; > > > > - > > > > -struct emc_cache { > > > > - struct emc_entry entries[EM_FLOW_HASH_ENTRIES]; > > > > - int sweep_idx; /* For emc_cache_slow_sweep(). */ > > > > -}; > > > > - > > > > -struct smc_bucket { > > > > - uint16_t sig[SMC_ENTRY_PER_BUCKET]; > > > > - uint16_t flow_idx[SMC_ENTRY_PER_BUCKET]; > > > > -}; > > > > - > > > > -/* Signature match cache, differentiate from EMC cache */ > > > > -struct smc_cache { > > > > - struct smc_bucket buckets[SMC_BUCKET_CNT]; > > > > -}; > > > > - > > > > -struct dfc_cache { > > > > - struct emc_cache emc_cache; > > > > - struct smc_cache smc_cache; > > > > -}; > > > > - > > > > -/* Iterate in the exact match cache through every entry that might > > > > contain > > > a > > > > - * miniflow with hash 'HASH'. */ > > > > -#define EMC_FOR_EACH_POS_WITH_HASH(EMC, CURRENT_ENTRY, > > > HASH) > > > > \ > > > > - for (uint32_t i__ = 0, srch_hash__ = (HASH); > > > > \ > > > > - (CURRENT_ENTRY) = &(EMC)->entries[srch_hash__ & > > > > EM_FLOW_HASH_MASK], \ > > > > - i__ < EM_FLOW_HASH_SEGS; > > > > \ > > > > - i__++, srch_hash__ >>= EM_FLOW_HASH_SHIFT) > > > > > > > > > > > > > > > > /* Simple non-wildcarding single-priority classifier. */ > > > > > > > > @@ -487,119 +405,10 @@ struct dp_netdev_port { > > > > char *rxq_affinity_list; /* Requested affinity of rx queues. */ > > > > }; > > > > > > > > -/* Contained by struct dp_netdev_flow's 'stats' member. */ > > > > -struct dp_netdev_flow_stats { > > > > - atomic_llong used; /* Last used time, in monotonic > > > > msecs. */ > > > > - atomic_ullong packet_count; /* Number of packets matched. */ > > > > - atomic_ullong byte_count; /* Number of bytes matched. */ > > > > - atomic_uint16_t tcp_flags; /* Bitwise-OR of seen tcp_flags > > > > values. */ > > > > -}; > > > > - > > > > -/* Contained by struct dp_netdev_flow's 'last_attrs' member. */ > > > > -struct dp_netdev_flow_attrs { > > > > - atomic_bool offloaded; /* True if flow is offloaded to HW. > > > > */ > > > > - ATOMIC(const char *) dp_layer; /* DP layer the flow is handled in. > > > > */ > > > > -}; > > > > - > > > > -/* A flow in 'dp_netdev_pmd_thread's 'flow_table'. > > > > - * > > > > - * > > > > - * Thread-safety > > > > - * ============= > > > > - * > > > > - * Except near the beginning or ending of its lifespan, rule 'rule' > > > > belongs to > > > > - * its pmd thread's classifier. The text below calls this classifier > > > > 'cls'. > > > > - * > > > > - * Motivation > > > > - * ---------- > > > > - * > > > > - * The thread safety rules described here for "struct dp_netdev_flow" > > > > are > > > > - * motivated by two goals: > > > > - * > > > > - * - Prevent threads that read members of "struct dp_netdev_flow" > > > from > > > > - * reading bad data due to changes by some thread concurrently > > > > modifying > > > > - * those members. > > > > - * > > > > - * - Prevent two threads making changes to members of a given > > > > "struct > > > > - * dp_netdev_flow" from interfering with each other. > > > > - * > > > > - * > > > > - * Rules > > > > - * ----- > > > > - * > > > > - * A flow 'flow' may be accessed without a risk of being freed during > > > > an > > > RCU > > > > - * grace period. Code that needs to hold onto a flow for a while > > > > - * should try incrementing 'flow->ref_cnt' with dp_netdev_flow_ref(). > > > > - * > > > > - * 'flow->ref_cnt' protects 'flow' from being freed. It doesn't > > > > protect the > > > > - * flow from being deleted from 'cls' and it doesn't protect members of > > > > 'flow' > > > > - * from modification. > > > > - * > > > > - * Some members, marked 'const', are immutable. Accessing other > > > > members > > > > - * requires synchronization, as noted in more detail below. > > > > - */ > > > > -struct dp_netdev_flow { > > > > - const struct flow flow; /* Unmasked flow that created this > > > > entry. */ > > > > - /* Hash table index by unmasked flow. */ > > > > - const struct cmap_node node; /* In owning dp_netdev_pmd_thread's > > > */ > > > > - /* 'flow_table'. */ > > > > - const struct cmap_node mark_node; /* In owning flow_mark's > > > > mark_to_flow */ > > > > - const ovs_u128 ufid; /* Unique flow identifier. */ > > > > - const ovs_u128 mega_ufid; /* Unique mega flow identifier. */ > > > > - const unsigned pmd_id; /* The 'core_id' of pmd thread owning > > > > this > > > */ > > > > - /* flow. */ > > > > - > > > > - /* Number of references. > > > > - * The classifier owns one reference. > > > > - * Any thread trying to keep a rule from being freed should hold > > > > its own > > > > - * reference. */ > > > > - struct ovs_refcount ref_cnt; > > > > - > > > > - bool dead; > > > > - uint32_t mark; /* Unique flow mark assigned to a > > > > flow */ > > > > - > > > > - /* Statistics. */ > > > > - struct dp_netdev_flow_stats stats; > > > > - > > > > - /* Statistics and attributes received from the netdev offload > > > > provider. */ > > > > - atomic_int netdev_flow_get_result; > > > > - struct dp_netdev_flow_stats last_stats; > > > > - struct dp_netdev_flow_attrs last_attrs; > > > > - > > > > - /* Actions. */ > > > > - OVSRCU_TYPE(struct dp_netdev_actions *) actions; > > > > - > > > > - /* While processing a group of input packets, the datapath uses > > > > the next > > > > - * member to store a pointer to the output batch for the flow. It > > > > is > > > > - * reset after the batch has been sent out (See > > > > dp_netdev_queue_batches(), > > > > - * packet_batch_per_flow_init() and > > > packet_batch_per_flow_execute()). > > > > */ > > > > - struct packet_batch_per_flow *batch; > > > > - > > > > - /* Packet classification. */ > > > > - char *dp_extra_info; /* String to return in a flow > > > > dump/get. */ > > > > - struct dpcls_rule cr; /* In owning dp_netdev's 'cls'. */ > > > > - /* 'cr' must be the last member. */ > > > > -}; > > > > - > > > > -static void dp_netdev_flow_unref(struct dp_netdev_flow *); > > > > static bool dp_netdev_flow_ref(struct dp_netdev_flow *); > > > > static int dpif_netdev_flow_from_nlattrs(const struct nlattr *, > > > > uint32_t, > > > > struct flow *, bool); > > > > > > > > -/* A set of datapath actions within a "struct dp_netdev_flow". > > > > - * > > > > - * > > > > - * Thread-safety > > > > - * ============= > > > > - * > > > > - * A struct dp_netdev_actions 'actions' is protected with RCU. */ > > > > -struct dp_netdev_actions { > > > > - /* These members are immutable: they do not change during the > > > struct's > > > > - * lifetime. */ > > > > - unsigned int size; /* Size of 'actions', in bytes. */ > > > > - struct nlattr actions[]; /* Sequence of OVS_ACTION_ATTR_* > > > attributes. > > > > */ > > > > -}; > > > > - > > > > struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr > > > *, > > > > size_t); > > > > struct dp_netdev_actions *dp_netdev_flow_get_actions( > > > > @@ -646,171 +455,6 @@ struct tx_bond { > > > > struct member_entry member_buckets[BOND_BUCKETS]; > > > > }; > > > > > > > > -/* A set of properties for the current processing loop that is not > > > > directly > > > > - * associated with the pmd thread itself, but with the packets being > > > > - * processed or the short-term system configuration (for example, > > > > time). > > > > - * Contained by struct dp_netdev_pmd_thread's 'ctx' member. */ > > > > -struct dp_netdev_pmd_thread_ctx { > > > > - /* Latest measured time. See 'pmd_thread_ctx_time_update()'. */ > > > > - long long now; > > > > - /* RX queue from which last packet was received. */ > > > > - struct dp_netdev_rxq *last_rxq; > > > > - /* EMC insertion probability context for the current processing > > > > cycle. */ > > > > - uint32_t emc_insert_min; > > > > -}; > > > > - > > > > -/* PMD: Poll modes drivers. PMD accesses devices via polling to > > > > eliminate > > > > - * the performance overhead of interrupt processing. Therefore netdev > > > can > > > > - * not implement rx-wait for these devices. dpif-netdev needs to poll > > > > - * these device to check for recv buffer. pmd-thread does polling for > > > > - * devices assigned to itself. > > > > - * > > > > - * DPDK used PMD for accessing NIC. > > > > - * > > > > - * Note, instance with cpu core id NON_PMD_CORE_ID will be reserved > > > for > > > > - * I/O of all non-pmd threads. There will be no actual thread created > > > > - * for the instance. > > > > - * > > > > - * Each struct has its own flow cache and classifier per managed > > > > ingress > > > port. > > > > - * For packets received on ingress port, a look up is done on > > > > corresponding > > > > PMD > > > > - * thread's flow cache and in case of a miss, lookup is performed in > > > > the > > > > - * corresponding classifier of port. Packets are executed with the > > > > found > > > > - * actions in either case. > > > > - * */ > > > > -struct dp_netdev_pmd_thread { > > > > - struct dp_netdev *dp; > > > > - struct ovs_refcount ref_cnt; /* Every reference must be > > > > refcount'ed. > > > */ > > > > - struct cmap_node node; /* In 'dp->poll_threads'. */ > > > > - > > > > - /* Per thread exact-match cache. Note, the instance for cpu core > > > > - * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly > > > > - * need to be protected by 'non_pmd_mutex'. Every other instance > > > > - * will only be accessed by its own pmd thread. */ > > > > - OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct dfc_cache flow_cache; > > > > - > > > > - /* Flow-Table and classifiers > > > > - * > > > > - * Writers of 'flow_table' must take the 'flow_mutex'. > > > > Corresponding > > > > - * changes to 'classifiers' must be made while still holding the > > > > - * 'flow_mutex'. > > > > - */ > > > > - struct ovs_mutex flow_mutex; > > > > - struct cmap flow_table OVS_GUARDED; /* Flow table. */ > > > > - > > > > - /* One classifier per in_port polled by the pmd */ > > > > - struct cmap classifiers; > > > > - /* Periodically sort subtable vectors according to hit frequencies > > > > */ > > > > - long long int next_optimization; > > > > - /* End of the next time interval for which processing cycles > > > > - are stored for each polled rxq. */ > > > > - long long int rxq_next_cycle_store; > > > > - > > > > - /* Last interval timestamp. */ > > > > - uint64_t intrvl_tsc_prev; > > > > - /* Last interval cycles. */ > > > > - atomic_ullong intrvl_cycles; > > > > - > > > > - /* Current context of the PMD thread. */ > > > > - struct dp_netdev_pmd_thread_ctx ctx; > > > > - > > > > - struct seq *reload_seq; > > > > - uint64_t last_reload_seq; > > > > - > > > > - /* These are atomic variables used as a synchronization and > > > configuration > > > > - * points for thread reload/exit. > > > > - * > > > > - * 'reload' atomic is the main one and it's used as a memory > > > > - * synchronization point for all other knobs and data. > > > > - * > > > > - * For a thread that requests PMD reload: > > > > - * > > > > - * * All changes that should be visible to the PMD thread must > > > > be made > > > > - * before setting the 'reload'. These changes could use any > > > > memory > > > > - * ordering model including 'relaxed'. > > > > - * * Setting the 'reload' atomic should occur in the same thread > > > > where > > > > - * all other PMD configuration options updated. > > > > - * * Setting the 'reload' atomic should be done with 'release' > > > > memory > > > > - * ordering model or stricter. This will guarantee that all > > > > previous > > > > - * changes (including non-atomic and 'relaxed') will be > > > > visible to > > > > - * the PMD thread. > > > > - * * To check that reload is done, thread should poll the > > > > 'reload' atomic > > > > - * to become 'false'. Polling should be done with 'acquire' > > > > memory > > > > - * ordering model or stricter. This ensures that PMD thread > > > > completed > > > > - * the reload process. > > > > - * > > > > - * For the PMD thread: > > > > - * > > > > - * * PMD thread should read 'reload' atomic with 'acquire' memory > > > > - * ordering model or stricter. This will guarantee that all > > > > changes > > > > - * made before setting the 'reload' in the requesting thread > > > > will be > > > > - * visible to the PMD thread. > > > > - * * All other configuration data could be read with any memory > > > > - * ordering model (including non-atomic and 'relaxed') but > > > > *only > > > after* > > > > - * reading the 'reload' atomic set to 'true'. > > > > - * * When the PMD reload done, PMD should (optionally) set all > > > > the > > > > below > > > > - * knobs except the 'reload' to their default ('false') values > > > > and > > > > - * (mandatory), as the last step, set the 'reload' to 'false' > > > > using > > > > - * 'release' memory ordering model or stricter. This will > > > > inform the > > > > - * requesting thread that PMD has completed a reload cycle. > > > > - */ > > > > - atomic_bool reload; /* Do we need to reload ports? */ > > > > - atomic_bool wait_for_reload; /* Can we busy wait for the next > > > > reload? > > > > */ > > > > - atomic_bool reload_tx_qid; /* Do we need to reload > > > > static_tx_qid? */ > > > > - atomic_bool exit; /* For terminating the pmd thread. > > > > */ > > > > - > > > > - pthread_t thread; > > > > - unsigned core_id; /* CPU core id of this pmd thread. > > > > */ > > > > - int numa_id; /* numa node id of this pmd > > > > thread. */ > > > > - bool isolated; > > > > - > > > > - /* Queue id used by this pmd thread to send packets on all netdevs > > > > if > > > > - * XPS disabled for this netdev. All static_tx_qid's are unique > > > > and less > > > > - * than 'cmap_count(dp->poll_threads)'. */ > > > > - uint32_t static_tx_qid; > > > > - > > > > - /* Number of filled output batches. */ > > > > - int n_output_batches; > > > > - > > > > - struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and > > > > 'tx_ports'. > > > */ > > > > - /* List of rx queues to poll. */ > > > > - struct hmap poll_list OVS_GUARDED; > > > > - /* Map of 'tx_port's used for transmission. Written by the main > > > > thread, > > > > - * read by the pmd thread. */ > > > > - struct hmap tx_ports OVS_GUARDED; > > > > - > > > > - struct ovs_mutex bond_mutex; /* Protects updates of 'tx_bonds'. > > > > */ > > > > - /* Map of 'tx_bond's used for transmission. Written by the main > > > > thread > > > > - * and read by the pmd thread. */ > > > > - struct cmap tx_bonds; > > > > - > > > > - /* These are thread-local copies of 'tx_ports'. One contains only > > > > tunnel > > > > - * ports (that support push_tunnel/pop_tunnel), the other contains > > > ports > > > > - * with at least one txq (that support send). A port can be in > > > > both. > > > > - * > > > > - * There are two separate maps to make sure that we don't try to > > > execute > > > > - * OUTPUT on a device which has 0 txqs or PUSH/POP on a non-tunnel > > > > device. > > > > - * > > > > - * The instances for cpu core NON_PMD_CORE_ID can be accessed by > > > > multiple > > > > - * threads, and thusly need to be protected by 'non_pmd_mutex'. > > > Every > > > > - * other instance will only be accessed by its own pmd thread. */ > > > > - struct hmap tnl_port_cache; > > > > - struct hmap send_port_cache; > > > > - > > > > - /* Keep track of detailed PMD performance statistics. */ > > > > - struct pmd_perf_stats perf_stats; > > > > - > > > > - /* Stats from previous iteration used by automatic pmd > > > > - * load balance logic. */ > > > > - uint64_t prev_stats[PMD_N_STATS]; > > > > - atomic_count pmd_overloaded; > > > > - > > > > - /* Set to true if the pmd thread needs to be reloaded. */ > > > > - bool need_reload; > > > > - > > > > - /* Next time when PMD should try RCU quiescing. */ > > > > - long long next_rcu_quiesce; > > > > -}; > > > > - > > > > /* Interface to netdev-based datapath. */ > > > > struct dpif_netdev { > > > > struct dpif dpif; > > > > @@ -915,90 +559,12 @@ static inline struct dpcls * > > > > dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd, > > > > odp_port_t in_port); > > > > > > > > -static inline bool emc_entry_alive(struct emc_entry *ce); > > > > -static void emc_clear_entry(struct emc_entry *ce); > > > > -static void smc_clear_entry(struct smc_bucket *b, int idx); > > > > - > > > > static void dp_netdev_request_reconfigure(struct dp_netdev *dp); > > > > static inline bool > > > > pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd); > > > > static void queue_netdev_flow_del(struct dp_netdev_pmd_thread > > > *pmd, > > > > struct dp_netdev_flow *flow); > > > > > > > > -static void > > > > -emc_cache_init(struct emc_cache *flow_cache) > > > > -{ > > > > - int i; > > > > - > > > > - flow_cache->sweep_idx = 0; > > > > - for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) { > > > > - flow_cache->entries[i].flow = NULL; > > > > - flow_cache->entries[i].key.hash = 0; > > > > - flow_cache->entries[i].key.len = sizeof(struct miniflow); > > > > - flowmap_init(&flow_cache->entries[i].key.mf.map); > > > > - } > > > > -} > > > > - > > > > -static void > > > > -smc_cache_init(struct smc_cache *smc_cache) > > > > -{ > > > > - int i, j; > > > > - for (i = 0; i < SMC_BUCKET_CNT; i++) { > > > > - for (j = 0; j < SMC_ENTRY_PER_BUCKET; j++) { > > > > - smc_cache->buckets[i].flow_idx[j] = UINT16_MAX; > > > > - } > > > > - } > > > > -} > > > > - > > > > -static void > > > > -dfc_cache_init(struct dfc_cache *flow_cache) > > > > -{ > > > > - emc_cache_init(&flow_cache->emc_cache); > > > > - smc_cache_init(&flow_cache->smc_cache); > > > > -} > > > > - > > > > -static void > > > > -emc_cache_uninit(struct emc_cache *flow_cache) > > > > -{ > > > > - int i; > > > > - > > > > - for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) { > > > > - emc_clear_entry(&flow_cache->entries[i]); > > > > - } > > > > -} > > > > - > > > > -static void > > > > -smc_cache_uninit(struct smc_cache *smc) > > > > -{ > > > > - int i, j; > > > > - > > > > - for (i = 0; i < SMC_BUCKET_CNT; i++) { > > > > - for (j = 0; j < SMC_ENTRY_PER_BUCKET; j++) { > > > > - smc_clear_entry(&(smc->buckets[i]), j); > > > > - } > > > > - } > > > > -} > > > > - > > > > -static void > > > > -dfc_cache_uninit(struct dfc_cache *flow_cache) > > > > -{ > > > > - smc_cache_uninit(&flow_cache->smc_cache); > > > > - emc_cache_uninit(&flow_cache->emc_cache); > > > > -} > > > > - > > > > -/* Check and clear dead flow references slowly (one entry at each > > > > - * invocation). */ > > > > -static void > > > > -emc_cache_slow_sweep(struct emc_cache *flow_cache) > > > > -{ > > > > - struct emc_entry *entry = &flow_cache->entries[flow_cache- > > > > >sweep_idx]; > > > > - > > > > - if (!emc_entry_alive(entry)) { > > > > - emc_clear_entry(entry); > > > > - } > > > > - flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & > > > > EM_FLOW_HASH_MASK; > > > > -} > > > > - > > > > /* Updates the time in PMD threads context and should be called in > > > > three > > > > cases: > > > > * > > > > * 1. PMD structure initialization: > > > > @@ -2347,19 +1913,13 @@ dp_netdev_flow_free(struct dp_netdev_flow > > > > *flow) > > > > free(flow); > > > > } > > > > > > > > -static void dp_netdev_flow_unref(struct dp_netdev_flow *flow) > > > > +void dp_netdev_flow_unref(struct dp_netdev_flow *flow) > > > > { > > > > if (ovs_refcount_unref_relaxed(&flow->ref_cnt) == 1) { > > > > ovsrcu_postpone(dp_netdev_flow_free, flow); > > > > } > > > > } > > > > > > > > -static uint32_t > > > > -dp_netdev_flow_hash(const ovs_u128 *ufid) > > > > -{ > > > > - return ufid->u32[0]; > > > > -} > > > > - > > > > static inline struct dpcls * > > > > dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd, > > > > odp_port_t in_port) > > > > @@ -2976,14 +2536,6 @@ static bool dp_netdev_flow_ref(struct > > > > dp_netdev_flow *flow) > > > > * single memcmp(). > > > > * - These functions can be inlined by the compiler. */ > > > > > > > > -/* Given the number of bits set in miniflow's maps, returns the size > > > > of the > > > > - * 'netdev_flow_key.mf' */ > > > > -static inline size_t > > > > -netdev_flow_key_size(size_t flow_u64s) > > > > -{ > > > > - return sizeof(struct miniflow) + MINIFLOW_VALUES_SIZE(flow_u64s); > > > > -} > > > > - > > > > static inline bool > > > > netdev_flow_key_equal(const struct netdev_flow_key *a, > > > > const struct netdev_flow_key *b) > > > > @@ -2992,16 +2544,6 @@ netdev_flow_key_equal(const struct > > > > netdev_flow_key *a, > > > > return a->hash == b->hash && !memcmp(&a->mf, &b->mf, a->len); > > > > } > > > > > > > > -/* Used to compare 'netdev_flow_key' in the exact match cache to a > > > > miniflow. > > > > - * The maps are compared bitwise, so both 'key->mf' and 'mf' must have > > > > been > > > > - * generated by miniflow_extract. */ > > > > -static inline bool > > > > -netdev_flow_key_equal_mf(const struct netdev_flow_key *key, > > > > - const struct miniflow *mf) > > > > -{ > > > > - return !memcmp(&key->mf, mf, key->len); > > > > -} > > > > > > Am I correct in assuming the function above is moved to the DFC header > > > file > > > but renamed "emc_flow_key_equal_mf"? > > > > > > If so then it might be worth making a list of any specific function name > > > changes that have occurred for the commit message (there shouldn't be too > > > many right?). > > > > > > It's just one thing to move a function to a header file but another to > > > change > > > the name without flagging outright. Probably seems minor but would > > > imagine someone searing for the previous function would not be sure what > > > it > > > was replaced with. > > > > Good point, this change should be called out in the commit message. I'll do > > that > > in the next version. > > > > > > - > > > > static inline void > > > > netdev_flow_key_clone(struct netdev_flow_key *dst, > > > > const struct netdev_flow_key *src) > > > > @@ -3068,21 +2610,6 @@ netdev_flow_key_init_masked(struct > > > > netdev_flow_key *dst, > > > > (dst_u64 - miniflow_get_values(&dst->mf)) > > > > * 8); > > > > } > > > > > > > > -static inline bool > > > > -emc_entry_alive(struct emc_entry *ce) > > > > -{ > > > > - return ce->flow && !ce->flow->dead; > > > > -} > > > > - > > > > -static void > > > > -emc_clear_entry(struct emc_entry *ce) > > > > -{ > > > > - if (ce->flow) { > > > > - dp_netdev_flow_unref(ce->flow); > > > > - ce->flow = NULL; > > > > - } > > > > -} > > > > - > > > > static inline void > > > > emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow, > > > > const struct netdev_flow_key *key) > > > > @@ -3148,24 +2675,6 @@ emc_probabilistic_insert(struct > > > > dp_netdev_pmd_thread *pmd, > > > > } > > > > } > > > > > > > > -static inline struct dp_netdev_flow * > > > > -emc_lookup(struct emc_cache *cache, const struct netdev_flow_key > > > *key) > > > > -{ > > > > - struct emc_entry *current_entry; > > > > - > > > > - EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) { > > > > - if (current_entry->key.hash == key->hash > > > > - && emc_entry_alive(current_entry) > > > > - && netdev_flow_key_equal_mf(¤t_entry->key, &key->mf)) > > > { > > > > - > > > > - /* We found the entry with the 'key->mf' miniflow */ > > > > - return current_entry->flow; > > > > - } > > > > - } > > > > - > > > > - return NULL; > > > > -} > > > > - > > > > static inline const struct cmap_node * > > > > smc_entry_get(struct dp_netdev_pmd_thread *pmd, const uint32_t > > > hash) > > > > { > > > > @@ -3186,12 +2695,6 @@ smc_entry_get(struct dp_netdev_pmd_thread > > > > *pmd, const uint32_t hash) > > > > return NULL; > > > > } > > > > > > > > -static void > > > > -smc_clear_entry(struct smc_bucket *b, int idx) > > > > -{ > > > > - b->flow_idx[idx] = UINT16_MAX; > > > > -} > > > > - > > > > /* Insert the flow_table index into SMC. Insertion may fail when 1) > > > > SMC is > > > > * turned off, 2) the flow_table index is larger than uint16_t can > > > > handle. > > > > * If there is already an SMC entry having same signature, the index > > > > will be > > > > @@ -6875,22 +6378,6 @@ dp_netdev_upcall(struct > > > dp_netdev_pmd_thread > > > > *pmd, struct dp_packet *packet_, > > > > actions, wc, put_actions, dp->upcall_aux); > > > > } > > > > > > > > -static inline uint32_t > > > > -dpif_netdev_packet_get_rss_hash_orig_pkt(struct dp_packet *packet, > > > > - const struct miniflow *mf) > > > > -{ > > > > - uint32_t hash; > > > > - > > > > - if (OVS_LIKELY(dp_packet_rss_valid(packet))) { > > > > - hash = dp_packet_get_rss_hash(packet); > > > > - } else { > > > > - hash = miniflow_hash_5tuple(mf, 0); > > > > - dp_packet_set_rss_hash(packet, hash); > > > > - } > > > > - > > > > - return hash; > > > > -} > > > > - > > > > static inline uint32_t > > > > dpif_netdev_packet_get_rss_hash(struct dp_packet *packet, > > > > const struct miniflow *mf) > > > > -- > > > > 2.31.1 > > > > > > Overall besides the minor issues I've queries I think this looks OK. But > > > maybe > > > it will be worth flagging these changes on a wider level to the community > > > 9Somone who may not be interested in the specific AVX512 work may have > > > input on this refactor). > > > > > > I'll raise this at the weekly patch review update. > > > > > > Regards > > > Ian > > > > > > > Thanks again for the review Ian. > > > > > > > > > > _______________________________________________ > > > > dev mailing list > > > > [email protected] > > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
