Re: [ovs-dev] [RFC 4/4] dpif-netdev.c: Add indirect table

2018-02-26 Thread Wang, Yipeng1
Thanks for the comments, reply inlined:

>-Original Message-
>From: Bodireddy, Bhanuprakash
>Sent: Friday, February 23, 2018 1:06 PM
>To: Wang, Yipeng1 <yipeng1.w...@intel.com>; d...@openvswitch.org; 
>jan.scheur...@ericsson.com
>Cc: Tai, Charlie <charlie@intel.com>
>Subject: RE: [ovs-dev] [RFC 4/4] dpif-netdev.c: Add indirect table
>
>Hi Yipeng,
>
>>If we store pointers in DFC, then the memory requirement is large. When
>>there are VMs or multiple PMDs running on the same platform, they will
>>compete the shared cache. So we want DFC to be as memory efficient as
>>possible.
>
>>
>>Indirect table is a simple hash table that map the DFC's result to the
>>dp_netdev_flow's pointer. This is to reduce the memory size of the DFC
>>cache, assuming that the megaflow count is much smaller than the exact
>>match flow count. With this commit, we could reduce the 8-byte pointer to a
>>2-byte index in DFC cache so that the memory/cache requirement is almost
>>halved. Another approach we plan to try is to use the flow_table as the
>>indirect table.
>
>I assume this patch is only aimed at reducing the DFC cache memory foot print 
>and doesn't introduce any new functionality ?
>
[Wang, Yipeng] Yes, by introducing an indirect table. We have another solution 
to use the "flow_table" we will post later. But there will be pros/cons.

>With this I see the dfc_bucket size is at 32 bytes from earlier 80bytes in 3/4 
>and the buckets now will be aligned to cache lines.
>Also the dfc_cache size reduced to ~8MB from ~12MB in 1/4 and ~14Mb in 3/4 
>patches.
>
[Wang, Yipeng]  With 1M entry, the size is as below:
1/4: 8MB
3/4: 10MB
4/4: 4MB + indirect table size, which is 512k to contain 64k rules.


>I am guessing there might be some performance improvement with this patch due 
>to buckets aligning to cache lines apart of reduced
>memory footprint. Do you see any such advantage here in your benchmarks?
>
[Wang, Yipeng] Actually due to the indirect table (1 more memory jump), we saw 
1-2% throughput drop comparing to 3/4 in simple tests. 
But as you said this does not consider the cache efficiency effect. There will 
be performance improvement with cache contention. So we believe
this is more scalable.

>Regards,
>Bhanuprakash.
>
>>
>>The indirect table size is a fixed constant for now.
>>
>>Signed-off-by: Yipeng Wang <yipeng1.w...@intel.com>
>>---
>> lib/dpif-netdev.c | 69 +++
>>
>> 1 file changed, 44 insertions(+), 25 deletions(-)
>>
>>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 50a1d25..35197d3
>>100644
>>--- a/lib/dpif-netdev.c
>>+++ b/lib/dpif-netdev.c
>>@@ -151,6 +151,12 @@ struct netdev_flow_key {
>>
>> #define DFC_MASK_LEN 20
>> #define DFC_ENTRY_PER_BUCKET 8
>>+
>>+/* For now we fix the Indirect table size, ideally it should be sized
>>+according
>>+ * to max megaflow count but less than 2^16  */ #define
>>+INDIRECT_TABLE_SIZE (1u << 12) #define INDIRECT_TABLE_MASK
>>+(INDIRECT_TABLE_SIZE - 1)
>> #define DFC_ENTRIES (1u << DFC_MASK_LEN)  #define DFC_BUCKET_CNT
>>(DFC_ENTRIES / DFC_ENTRY_PER_BUCKET)  #define DFC_MASK
>>(DFC_BUCKET_CNT - 1) @@ -175,13 +181,14 @@ struct emc_cache {
>>
>> struct dfc_bucket {
>> uint16_t sig[DFC_ENTRY_PER_BUCKET];
>>-struct dp_netdev_flow *flow[DFC_ENTRY_PER_BUCKET];
>>+uint16_t index[DFC_ENTRY_PER_BUCKET];
>> };
>>
>> struct dfc_cache {
>> struct emc_cache emc_cache;
>> struct dfc_bucket buckets[DFC_BUCKET_CNT];
>> int sweep_idx;
>>+struct dp_netdev_flow *indirect_table[INDIRECT_TABLE_SIZE];
>> };
>>
>>
>
>>@@ -754,7 +761,7 @@ static int dpif_netdev_xps_get_tx_qid(const struct
>>dp_netdev_pmd_thread *pmd,
>>
>> static inline bool dfc_entry_alive(struct dp_netdev_flow *flow);  static void
>>emc_clear_entry(struct emc_entry *ce); -static void dfc_clear_entry(struct
>>dfc_bucket *b, int idx);
>>+static void dfc_clear_entry(struct dp_netdev_flow **flow, struct
>>+dfc_bucket *b, int idx);
>>
>> static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
>>
>>@@ -782,9 +789,12 @@ dfc_cache_init(struct dfc_cache *flow_cache)
>> emc_cache_init(_cache->emc_cache);
>> for (i = 0; i < DFC_BUCKET_CNT; i++) {
>> for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
>>-flow_cache->buckets[i].flow[j] = NULL;
>>+flow_cache->buckets[i].sig[j] = 0;
>> }
>> }
>>+for (i = 0; i < INDIRECT_TAB

Re: [ovs-dev] [RFC 4/4] dpif-netdev.c: Add indirect table

2018-02-23 Thread Bodireddy, Bhanuprakash
Hi Yipeng,

>If we store pointers in DFC, then the memory requirement is large. When
>there are VMs or multiple PMDs running on the same platform, they will
>compete the shared cache. So we want DFC to be as memory efficient as
>possible.

>
>Indirect table is a simple hash table that map the DFC's result to the
>dp_netdev_flow's pointer. This is to reduce the memory size of the DFC
>cache, assuming that the megaflow count is much smaller than the exact
>match flow count. With this commit, we could reduce the 8-byte pointer to a
>2-byte index in DFC cache so that the memory/cache requirement is almost
>halved. Another approach we plan to try is to use the flow_table as the
>indirect table.

I assume this patch is only aimed at reducing the DFC cache memory foot print 
and doesn't introduce any new functionality ?

With this I see the dfc_bucket size is at 32 bytes from earlier 80bytes in 3/4 
and the buckets now will be aligned to cache lines.
Also the dfc_cache size reduced to ~8MB from ~12MB in 1/4 and ~14Mb in 3/4 
patches.

I am guessing there might be some performance improvement with this patch due 
to buckets aligning to cache lines apart of reduced memory footprint. Do you 
see any such advantage here in your benchmarks?

Regards,
Bhanuprakash.

>
>The indirect table size is a fixed constant for now.
>
>Signed-off-by: Yipeng Wang 
>---
> lib/dpif-netdev.c | 69 +++
>
> 1 file changed, 44 insertions(+), 25 deletions(-)
>
>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 50a1d25..35197d3
>100644
>--- a/lib/dpif-netdev.c
>+++ b/lib/dpif-netdev.c
>@@ -151,6 +151,12 @@ struct netdev_flow_key {
>
> #define DFC_MASK_LEN 20
> #define DFC_ENTRY_PER_BUCKET 8
>+
>+/* For now we fix the Indirect table size, ideally it should be sized
>+according
>+ * to max megaflow count but less than 2^16  */ #define
>+INDIRECT_TABLE_SIZE (1u << 12) #define INDIRECT_TABLE_MASK
>+(INDIRECT_TABLE_SIZE - 1)
> #define DFC_ENTRIES (1u << DFC_MASK_LEN)  #define DFC_BUCKET_CNT
>(DFC_ENTRIES / DFC_ENTRY_PER_BUCKET)  #define DFC_MASK
>(DFC_BUCKET_CNT - 1) @@ -175,13 +181,14 @@ struct emc_cache {
>
> struct dfc_bucket {
> uint16_t sig[DFC_ENTRY_PER_BUCKET];
>-struct dp_netdev_flow *flow[DFC_ENTRY_PER_BUCKET];
>+uint16_t index[DFC_ENTRY_PER_BUCKET];
> };
>
> struct dfc_cache {
> struct emc_cache emc_cache;
> struct dfc_bucket buckets[DFC_BUCKET_CNT];
> int sweep_idx;
>+struct dp_netdev_flow *indirect_table[INDIRECT_TABLE_SIZE];
> };
>
> 

>@@ -754,7 +761,7 @@ static int dpif_netdev_xps_get_tx_qid(const struct
>dp_netdev_pmd_thread *pmd,
>
> static inline bool dfc_entry_alive(struct dp_netdev_flow *flow);  static void
>emc_clear_entry(struct emc_entry *ce); -static void dfc_clear_entry(struct
>dfc_bucket *b, int idx);
>+static void dfc_clear_entry(struct dp_netdev_flow **flow, struct
>+dfc_bucket *b, int idx);
>
> static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
>
>@@ -782,9 +789,12 @@ dfc_cache_init(struct dfc_cache *flow_cache)
> emc_cache_init(_cache->emc_cache);
> for (i = 0; i < DFC_BUCKET_CNT; i++) {
> for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
>-flow_cache->buckets[i].flow[j] = NULL;
>+flow_cache->buckets[i].sig[j] = 0;
> }
> }
>+for (i = 0; i < INDIRECT_TABLE_SIZE; i++) {
>+flow_cache->indirect_table[i] = NULL;
>+}
> flow_cache->sweep_idx = 0;
> }
>
>@@ -805,7 +815,7 @@ dfc_cache_uninit(struct dfc_cache *flow_cache)
>
> for (i = 0; i < DFC_BUCKET_CNT; i++) {
> for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
>-dfc_clear_entry(&(flow_cache->buckets[i]), j);
>+dfc_clear_entry(flow_cache->indirect_table,
>+ &(flow_cache->buckets[i]), j);
> }
> }
> emc_cache_uninit(_cache->emc_cache);
>@@ -2259,7 +2269,7 @@ dfc_entry_get(struct dfc_cache *cache, const
>uint32_t hash)
> uint16_t sig = hash >> 16;
> for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
> if(bucket->sig[i] == sig) {
>-return bucket->flow[i];
>+return cache->indirect_table[bucket->index[i]];
> }
> }
> return NULL;
>@@ -2272,28 +2282,33 @@ dfc_entry_alive(struct dp_netdev_flow *flow)  }
>
> static void
>-dfc_clear_entry(struct dfc_bucket *b, int idx)
>+dfc_clear_entry(struct dp_netdev_flow **ind_table, struct dfc_bucket
>+*b, int idx)
> {
>-if (b->flow[idx]) {
>-dp_netdev_flow_unref(b->flow[idx]);
>-b->flow[idx] = NULL;
>+if (ind_table[b->index[idx]]) {
>+dp_netdev_flow_unref(ind_table[b->index[idx]]);
>+ind_table[b->index[idx]] = NULL;
> }
> }
>
>-static inline void
>-dfc_change_entry(struct dfc_bucket *b, int idx, struct dp_netdev_flow
>*flow)
>+
>+static inline uint16_t
>+indirect_table_insert(struct dp_netdev_flow **indirect_table,
>+struct dp_netdev_flow *flow)
> {
>-if (b->flow[idx] != flow) {
>-

[ovs-dev] [RFC 4/4] dpif-netdev.c: Add indirect table

2018-01-18 Thread Yipeng Wang
If we store pointers in DFC, then the memory requirement is large. When
there are VMs or multiple PMDs running on the same platform, they will
compete the shared cache. So we want DFC to be as memory efficient as possible.

Indirect table is a simple hash table that map the DFC's result
to the dp_netdev_flow's pointer. This is to reduce the memory size of the
DFC cache, assuming that the megaflow count is much smaller than
the exact match flow count. With this commit, we could reduce the 8-byte
pointer to a 2-byte index in DFC cache so that the memory/cache requirement is
almost halved. Another approach we plan to try is to use the flow_table
as the indirect table.

The indirect table size is a fixed constant for now.

Signed-off-by: Yipeng Wang 
---
 lib/dpif-netdev.c | 69 +++
 1 file changed, 44 insertions(+), 25 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 50a1d25..35197d3 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -151,6 +151,12 @@ struct netdev_flow_key {
 
 #define DFC_MASK_LEN 20
 #define DFC_ENTRY_PER_BUCKET 8
+
+/* For now we fix the Indirect table size, ideally it should be sized according
+ * to max megaflow count but less than 2^16
+ */
+#define INDIRECT_TABLE_SIZE (1u << 12)
+#define INDIRECT_TABLE_MASK (INDIRECT_TABLE_SIZE - 1)
 #define DFC_ENTRIES (1u << DFC_MASK_LEN)
 #define DFC_BUCKET_CNT (DFC_ENTRIES / DFC_ENTRY_PER_BUCKET)
 #define DFC_MASK (DFC_BUCKET_CNT - 1)
@@ -175,13 +181,14 @@ struct emc_cache {
 
 struct dfc_bucket {
 uint16_t sig[DFC_ENTRY_PER_BUCKET];
-struct dp_netdev_flow *flow[DFC_ENTRY_PER_BUCKET];
+uint16_t index[DFC_ENTRY_PER_BUCKET];
 };
 
 struct dfc_cache {
 struct emc_cache emc_cache;
 struct dfc_bucket buckets[DFC_BUCKET_CNT];
 int sweep_idx;
+struct dp_netdev_flow *indirect_table[INDIRECT_TABLE_SIZE];
 };
 
 
@@ -754,7 +761,7 @@ static int dpif_netdev_xps_get_tx_qid(const struct 
dp_netdev_pmd_thread *pmd,
 
 static inline bool dfc_entry_alive(struct dp_netdev_flow *flow);
 static void emc_clear_entry(struct emc_entry *ce);
-static void dfc_clear_entry(struct dfc_bucket *b, int idx);
+static void dfc_clear_entry(struct dp_netdev_flow **flow, struct dfc_bucket 
*b, int idx);
 
 static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
 
@@ -782,9 +789,12 @@ dfc_cache_init(struct dfc_cache *flow_cache)
 emc_cache_init(_cache->emc_cache);
 for (i = 0; i < DFC_BUCKET_CNT; i++) {
 for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
-flow_cache->buckets[i].flow[j] = NULL;
+flow_cache->buckets[i].sig[j] = 0;
 }
 }
+for (i = 0; i < INDIRECT_TABLE_SIZE; i++) {
+flow_cache->indirect_table[i] = NULL;
+}
 flow_cache->sweep_idx = 0;
 }
 
@@ -805,7 +815,7 @@ dfc_cache_uninit(struct dfc_cache *flow_cache)
 
 for (i = 0; i < DFC_BUCKET_CNT; i++) {
 for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
-dfc_clear_entry(&(flow_cache->buckets[i]), j);
+dfc_clear_entry(flow_cache->indirect_table, 
&(flow_cache->buckets[i]), j);
 }
 }
 emc_cache_uninit(_cache->emc_cache);
@@ -2259,7 +2269,7 @@ dfc_entry_get(struct dfc_cache *cache, const uint32_t 
hash)
 uint16_t sig = hash >> 16;
 for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
 if(bucket->sig[i] == sig) {
-return bucket->flow[i];
+return cache->indirect_table[bucket->index[i]];
 }
 }
 return NULL;
@@ -2272,28 +2282,33 @@ dfc_entry_alive(struct dp_netdev_flow *flow)
 }
 
 static void
-dfc_clear_entry(struct dfc_bucket *b, int idx)
+dfc_clear_entry(struct dp_netdev_flow **ind_table, struct dfc_bucket *b, int 
idx)
 {
-if (b->flow[idx]) {
-dp_netdev_flow_unref(b->flow[idx]);
-b->flow[idx] = NULL;
+if (ind_table[b->index[idx]]) {
+dp_netdev_flow_unref(ind_table[b->index[idx]]);
+ind_table[b->index[idx]] = NULL;
 }
 }
 
-static inline void
-dfc_change_entry(struct dfc_bucket *b, int idx, struct dp_netdev_flow *flow)
+
+static inline uint16_t
+indirect_table_insert(struct dp_netdev_flow **indirect_table,
+struct dp_netdev_flow *flow)
 {
-if (b->flow[idx] != flow) {
-if (b->flow[idx]) {
-dp_netdev_flow_unref(b->flow[idx]);
+uint32_t hash = dp_netdev_flow_hash(>ufid);
+uint16_t index = hash & INDIRECT_TABLE_MASK;
+if (indirect_table[index] != flow){
+if(indirect_table[index]) {
+dp_netdev_flow_unref(indirect_table[index]);
 }
-
 if (dp_netdev_flow_ref(flow)) {
-b->flow[idx] = flow;
-} else {
-b->flow[idx] = NULL;
+indirect_table[index] = flow;
+}
+else {
+indirect_table[index] = NULL;
 }
 }
+return index;
 }
 
 static inline void
@@ -2302,25 +2317,28 @@ dfc_insert(struct dp_netdev_pmd_thread *pmd,