Thanks for the comments, reply inlined: >-----Original Message----- >From: Bodireddy, Bhanuprakash >Sent: Friday, February 23, 2018 1:06 PM >To: Wang, Yipeng1 <yipeng1.w...@intel.com>; d...@openvswitch.org; >jan.scheur...@ericsson.com >Cc: Tai, Charlie <charlie....@intel.com> >Subject: RE: [ovs-dev] [RFC 4/4] dpif-netdev.c: Add indirect table > >Hi Yipeng, > >>If we store pointers in DFC, then the memory requirement is large. When >>there are VMs or multiple PMDs running on the same platform, they will >>compete the shared cache. So we want DFC to be as memory efficient as >>possible. > >> >>Indirect table is a simple hash table that map the DFC's result to the >>dp_netdev_flow's pointer. This is to reduce the memory size of the DFC >>cache, assuming that the megaflow count is much smaller than the exact >>match flow count. With this commit, we could reduce the 8-byte pointer to a >>2-byte index in DFC cache so that the memory/cache requirement is almost >>halved. Another approach we plan to try is to use the flow_table as the >>indirect table. > >I assume this patch is only aimed at reducing the DFC cache memory foot print >and doesn't introduce any new functionality ? > [Wang, Yipeng] Yes, by introducing an indirect table. We have another solution to use the "flow_table" we will post later. But there will be pros/cons.
>With this I see the dfc_bucket size is at 32 bytes from earlier 80bytes in 3/4 >and the buckets now will be aligned to cache lines. >Also the dfc_cache size reduced to ~8MB from ~12MB in 1/4 and ~14Mb in 3/4 >patches. > [Wang, Yipeng] With 1M entry, the size is as below: 1/4: 8MB 3/4: 10MB 4/4: 4MB + indirect table size, which is 512k to contain 64k rules. >I am guessing there might be some performance improvement with this patch due >to buckets aligning to cache lines apart of reduced >memory footprint. Do you see any such advantage here in your benchmarks? > [Wang, Yipeng] Actually due to the indirect table (1 more memory jump), we saw 1-2% throughput drop comparing to 3/4 in simple tests. But as you said this does not consider the cache efficiency effect. There will be performance improvement with cache contention. So we believe this is more scalable. >Regards, >Bhanuprakash. > >> >>The indirect table size is a fixed constant for now. >> >>Signed-off-by: Yipeng Wang <yipeng1.w...@intel.com> >>--- >> lib/dpif-netdev.c | 69 +++++++++++++++++++++++++++++++++++------------ >>-------- >> 1 file changed, 44 insertions(+), 25 deletions(-) >> >>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 50a1d25..35197d3 >>100644 >>--- a/lib/dpif-netdev.c >>+++ b/lib/dpif-netdev.c >>@@ -151,6 +151,12 @@ struct netdev_flow_key { >> >> #define DFC_MASK_LEN 20 >> #define DFC_ENTRY_PER_BUCKET 8 >>+ >>+/* For now we fix the Indirect table size, ideally it should be sized >>+according >>+ * to max megaflow count but less than 2^16 */ #define >>+INDIRECT_TABLE_SIZE (1u << 12) #define INDIRECT_TABLE_MASK >>+(INDIRECT_TABLE_SIZE - 1) >> #define DFC_ENTRIES (1u << DFC_MASK_LEN) #define DFC_BUCKET_CNT >>(DFC_ENTRIES / DFC_ENTRY_PER_BUCKET) #define DFC_MASK >>(DFC_BUCKET_CNT - 1) @@ -175,13 +181,14 @@ struct emc_cache { >> >> struct dfc_bucket { >> uint16_t sig[DFC_ENTRY_PER_BUCKET]; >>- struct dp_netdev_flow *flow[DFC_ENTRY_PER_BUCKET]; >>+ uint16_t index[DFC_ENTRY_PER_BUCKET]; >> }; >> >> struct dfc_cache { >> struct emc_cache emc_cache; >> struct dfc_bucket buckets[DFC_BUCKET_CNT]; >> int sweep_idx; >>+ struct dp_netdev_flow *indirect_table[INDIRECT_TABLE_SIZE]; >> }; >> >> > >>@@ -754,7 +761,7 @@ static int dpif_netdev_xps_get_tx_qid(const struct >>dp_netdev_pmd_thread *pmd, >> >> static inline bool dfc_entry_alive(struct dp_netdev_flow *flow); static void >>emc_clear_entry(struct emc_entry *ce); -static void dfc_clear_entry(struct >>dfc_bucket *b, int idx); >>+static void dfc_clear_entry(struct dp_netdev_flow **flow, struct >>+dfc_bucket *b, int idx); >> >> static void dp_netdev_request_reconfigure(struct dp_netdev *dp); >> >>@@ -782,9 +789,12 @@ dfc_cache_init(struct dfc_cache *flow_cache) >> emc_cache_init(&flow_cache->emc_cache); >> for (i = 0; i < DFC_BUCKET_CNT; i++) { >> for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) { >>- flow_cache->buckets[i].flow[j] = NULL; >>+ flow_cache->buckets[i].sig[j] = 0; >> } >> } >>+ for (i = 0; i < INDIRECT_TABLE_SIZE; i++) { >>+ flow_cache->indirect_table[i] = NULL; >>+ } >> flow_cache->sweep_idx = 0; >> } >> >>@@ -805,7 +815,7 @@ dfc_cache_uninit(struct dfc_cache *flow_cache) >> >> for (i = 0; i < DFC_BUCKET_CNT; i++) { >> for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) { >>- dfc_clear_entry(&(flow_cache->buckets[i]), j); >>+ dfc_clear_entry(flow_cache->indirect_table, >>+ &(flow_cache->buckets[i]), j); >> } >> } >> emc_cache_uninit(&flow_cache->emc_cache); >>@@ -2259,7 +2269,7 @@ dfc_entry_get(struct dfc_cache *cache, const >>uint32_t hash) >> uint16_t sig = hash >> 16; >> for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) { >> if(bucket->sig[i] == sig) { >>- return bucket->flow[i]; >>+ return cache->indirect_table[bucket->index[i]]; >> } >> } >> return NULL; >>@@ -2272,28 +2282,33 @@ dfc_entry_alive(struct dp_netdev_flow *flow) } >> >> static void >>-dfc_clear_entry(struct dfc_bucket *b, int idx) >>+dfc_clear_entry(struct dp_netdev_flow **ind_table, struct dfc_bucket >>+*b, int idx) >> { >>- if (b->flow[idx]) { >>- dp_netdev_flow_unref(b->flow[idx]); >>- b->flow[idx] = NULL; >>+ if (ind_table[b->index[idx]]) { >>+ dp_netdev_flow_unref(ind_table[b->index[idx]]); >>+ ind_table[b->index[idx]] = NULL; >> } >> } >> >>-static inline void >>-dfc_change_entry(struct dfc_bucket *b, int idx, struct dp_netdev_flow >>*flow) >>+ >>+static inline uint16_t >>+indirect_table_insert(struct dp_netdev_flow **indirect_table, >>+ struct dp_netdev_flow *flow) >> { >>- if (b->flow[idx] != flow) { >>- if (b->flow[idx]) { >>- dp_netdev_flow_unref(b->flow[idx]); >>+ uint32_t hash = dp_netdev_flow_hash(&flow->ufid); >>+ uint16_t index = hash & INDIRECT_TABLE_MASK; >>+ if (indirect_table[index] != flow){ >>+ if(indirect_table[index]) { >>+ dp_netdev_flow_unref(indirect_table[index]); >> } >>- >> if (dp_netdev_flow_ref(flow)) { >>- b->flow[idx] = flow; >>- } else { >>- b->flow[idx] = NULL; >>+ indirect_table[index] = flow; >>+ } >>+ else { >>+ indirect_table[index] = NULL; >> } >> } >>+ return index; >> } >> >> static inline void >>@@ -2302,25 +2317,28 @@ dfc_insert(struct dp_netdev_pmd_thread *pmd, >> struct dp_netdev_flow *flow) { >> struct dfc_cache *cache = &pmd->flow_cache; >>- >>+ struct dp_netdev_flow **ind_table = cache->indirect_table; >> struct dfc_bucket *bucket = &cache->buckets[key->hash & DFC_MASK]; >>+ >>+ uint16_t index = indirect_table_insert(ind_table, flow); >>+ >> uint16_t sig = key->hash >> 16; >> for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) { >> if(bucket->sig[i] == sig) { >>- dfc_change_entry(bucket, i, flow); >>+ bucket->index[i] = index; >> return; >> } >> } >> for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) { >>- if(bucket->flow[i] == NULL) { >>+ if(bucket->sig[i] == 0) { >> bucket->sig[i] = sig; >>- dfc_change_entry(bucket, i, flow); >>+ bucket->index[i] = index; >> return; >> } >> } >> int idx = random_uint32() & (DFC_ENTRY_PER_BUCKET - 1); >> bucket->sig[idx] = sig; >>- dfc_change_entry(bucket, idx, flow); >>+ bucket->index[idx] = index; >> } >> >> static inline struct dp_netdev_flow * >>@@ -2381,8 +2399,9 @@ dfc_slow_sweep(struct dfc_cache *cache) >> uint32_t bkt = cache->sweep_idx / DFC_ENTRY_PER_BUCKET; >> struct dfc_bucket *bucket = &cache->buckets[bkt]; >> for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++){ >>- if (!dfc_entry_alive(bucket->flow[i])) { >>- dfc_clear_entry(bucket, i); >>+ if (!dfc_entry_alive(cache->indirect_table[bucket->index[i]])) { >>+ dfc_clear_entry(cache->indirect_table, bucket, i); >>+ bucket->sig[i] = 0; >> } >> } >> } >>-- >>2.7.4 >> >>_______________________________________________ >>dev mailing list >>d...@openvswitch.org >>https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev