Re: [ovs-dev] [RFC 4/4] dpif-netdev.c: Add indirect table

2018-02-26 Thread Wang, Yipeng1
Thanks for the comments, reply inlined:

>-Original Message-
>From: Bodireddy, Bhanuprakash
>Sent: Friday, February 23, 2018 1:06 PM
>To: Wang, Yipeng1 ; d...@openvswitch.org; 
>jan.scheur...@ericsson.com
>Cc: Tai, Charlie 
>Subject: RE: [ovs-dev] [RFC 4/4] dpif-netdev.c: Add indirect table
>
>Hi Yipeng,
>
>>If we store pointers in DFC, then the memory requirement is large. When
>>there are VMs or multiple PMDs running on the same platform, they will
>>compete the shared cache. So we want DFC to be as memory efficient as
>>possible.
>
>>
>>Indirect table is a simple hash table that map the DFC's result to the
>>dp_netdev_flow's pointer. This is to reduce the memory size of the DFC
>>cache, assuming that the megaflow count is much smaller than the exact
>>match flow count. With this commit, we could reduce the 8-byte pointer to a
>>2-byte index in DFC cache so that the memory/cache requirement is almost
>>halved. Another approach we plan to try is to use the flow_table as the
>>indirect table.
>
>I assume this patch is only aimed at reducing the DFC cache memory foot print 
>and doesn't introduce any new functionality ?
>
[Wang, Yipeng] Yes, by introducing an indirect table. We have another solution 
to use the "flow_table" we will post later. But there will be pros/cons.

>With this I see the dfc_bucket size is at 32 bytes from earlier 80bytes in 3/4 
>and the buckets now will be aligned to cache lines.
>Also the dfc_cache size reduced to ~8MB from ~12MB in 1/4 and ~14Mb in 3/4 
>patches.
>
[Wang, Yipeng]  With 1M entry, the size is as below:
1/4: 8MB
3/4: 10MB
4/4: 4MB + indirect table size, which is 512k to contain 64k rules.


>I am guessing there might be some performance improvement with this patch due 
>to buckets aligning to cache lines apart of reduced
>memory footprint. Do you see any such advantage here in your benchmarks?
>
[Wang, Yipeng] Actually due to the indirect table (1 more memory jump), we saw 
1-2% throughput drop comparing to 3/4 in simple tests. 
But as you said this does not consider the cache efficiency effect. There will 
be performance improvement with cache contention. So we believe
this is more scalable.

>Regards,
>Bhanuprakash.
>
>>
>>The indirect table size is a fixed constant for now.
>>
>>Signed-off-by: Yipeng Wang 
>>---
>> lib/dpif-netdev.c | 69 +++
>>
>> 1 file changed, 44 insertions(+), 25 deletions(-)
>>
>>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 50a1d25..35197d3
>>100644
>>--- a/lib/dpif-netdev.c
>>+++ b/lib/dpif-netdev.c
>>@@ -151,6 +151,12 @@ struct netdev_flow_key {
>>
>> #define DFC_MASK_LEN 20
>> #define DFC_ENTRY_PER_BUCKET 8
>>+
>>+/* For now we fix the Indirect table size, ideally it should be sized
>>+according
>>+ * to max megaflow count but less than 2^16  */ #define
>>+INDIRECT_TABLE_SIZE (1u << 12) #define INDIRECT_TABLE_MASK
>>+(INDIRECT_TABLE_SIZE - 1)
>> #define DFC_ENTRIES (1u << DFC_MASK_LEN)  #define DFC_BUCKET_CNT
>>(DFC_ENTRIES / DFC_ENTRY_PER_BUCKET)  #define DFC_MASK
>>(DFC_BUCKET_CNT - 1) @@ -175,13 +181,14 @@ struct emc_cache {
>>
>> struct dfc_bucket {
>> uint16_t sig[DFC_ENTRY_PER_BUCKET];
>>-struct dp_netdev_flow *flow[DFC_ENTRY_PER_BUCKET];
>>+uint16_t index[DFC_ENTRY_PER_BUCKET];
>> };
>>
>> struct dfc_cache {
>> struct emc_cache emc_cache;
>> struct dfc_bucket buckets[DFC_BUCKET_CNT];
>> int sweep_idx;
>>+struct dp_netdev_flow *indirect_table[INDIRECT_TABLE_SIZE];
>> };
>>
>>
>
>>@@ -754,7 +761,7 @@ static int dpif_netdev_xps_get_tx_qid(const struct
>>dp_netdev_pmd_thread *pmd,
>>
>> static inline bool dfc_entry_alive(struct dp_netdev_flow *flow);  static void
>>emc_clear_entry(struct emc_entry *ce); -static void dfc_clear_entry(struct
>>dfc_bucket *b, int idx);
>>+static void dfc_clear_entry(struct dp_netdev_flow **flow, struct
>>+dfc_bucket *b, int idx);
>>
>> static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
>>
>>@@ -782,9 +789,12 @@ dfc_cache_init(struct dfc_cache *flow_cache)
>> emc_cache_init(&flow_cache->emc_cache);
>> for (i = 0; i < DFC_BUCKET_CNT; i++) {
>> for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
>>-flow_cache->buckets[i].flow[j] = NULL;
>>+flow_cache->buckets[i].sig[j] = 0;
>> }
>> }
>>+for (i = 0; i < INDIRECT_TABLE_SIZE; i++) {
>>+flow_cache->indirect_table[i] 

Re: [ovs-dev] [RFC 4/4] dpif-netdev.c: Add indirect table

2018-02-23 Thread Bodireddy, Bhanuprakash
Hi Yipeng,

>If we store pointers in DFC, then the memory requirement is large. When
>there are VMs or multiple PMDs running on the same platform, they will
>compete the shared cache. So we want DFC to be as memory efficient as
>possible.

>
>Indirect table is a simple hash table that map the DFC's result to the
>dp_netdev_flow's pointer. This is to reduce the memory size of the DFC
>cache, assuming that the megaflow count is much smaller than the exact
>match flow count. With this commit, we could reduce the 8-byte pointer to a
>2-byte index in DFC cache so that the memory/cache requirement is almost
>halved. Another approach we plan to try is to use the flow_table as the
>indirect table.

I assume this patch is only aimed at reducing the DFC cache memory foot print 
and doesn't introduce any new functionality ?

With this I see the dfc_bucket size is at 32 bytes from earlier 80bytes in 3/4 
and the buckets now will be aligned to cache lines.
Also the dfc_cache size reduced to ~8MB from ~12MB in 1/4 and ~14Mb in 3/4 
patches.

I am guessing there might be some performance improvement with this patch due 
to buckets aligning to cache lines apart of reduced memory footprint. Do you 
see any such advantage here in your benchmarks?

Regards,
Bhanuprakash.

>
>The indirect table size is a fixed constant for now.
>
>Signed-off-by: Yipeng Wang 
>---
> lib/dpif-netdev.c | 69 +++
>
> 1 file changed, 44 insertions(+), 25 deletions(-)
>
>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 50a1d25..35197d3
>100644
>--- a/lib/dpif-netdev.c
>+++ b/lib/dpif-netdev.c
>@@ -151,6 +151,12 @@ struct netdev_flow_key {
>
> #define DFC_MASK_LEN 20
> #define DFC_ENTRY_PER_BUCKET 8
>+
>+/* For now we fix the Indirect table size, ideally it should be sized
>+according
>+ * to max megaflow count but less than 2^16  */ #define
>+INDIRECT_TABLE_SIZE (1u << 12) #define INDIRECT_TABLE_MASK
>+(INDIRECT_TABLE_SIZE - 1)
> #define DFC_ENTRIES (1u << DFC_MASK_LEN)  #define DFC_BUCKET_CNT
>(DFC_ENTRIES / DFC_ENTRY_PER_BUCKET)  #define DFC_MASK
>(DFC_BUCKET_CNT - 1) @@ -175,13 +181,14 @@ struct emc_cache {
>
> struct dfc_bucket {
> uint16_t sig[DFC_ENTRY_PER_BUCKET];
>-struct dp_netdev_flow *flow[DFC_ENTRY_PER_BUCKET];
>+uint16_t index[DFC_ENTRY_PER_BUCKET];
> };
>
> struct dfc_cache {
> struct emc_cache emc_cache;
> struct dfc_bucket buckets[DFC_BUCKET_CNT];
> int sweep_idx;
>+struct dp_netdev_flow *indirect_table[INDIRECT_TABLE_SIZE];
> };
>
> 

>@@ -754,7 +761,7 @@ static int dpif_netdev_xps_get_tx_qid(const struct
>dp_netdev_pmd_thread *pmd,
>
> static inline bool dfc_entry_alive(struct dp_netdev_flow *flow);  static void
>emc_clear_entry(struct emc_entry *ce); -static void dfc_clear_entry(struct
>dfc_bucket *b, int idx);
>+static void dfc_clear_entry(struct dp_netdev_flow **flow, struct
>+dfc_bucket *b, int idx);
>
> static void dp_netdev_request_reconfigure(struct dp_netdev *dp);
>
>@@ -782,9 +789,12 @@ dfc_cache_init(struct dfc_cache *flow_cache)
> emc_cache_init(&flow_cache->emc_cache);
> for (i = 0; i < DFC_BUCKET_CNT; i++) {
> for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
>-flow_cache->buckets[i].flow[j] = NULL;
>+flow_cache->buckets[i].sig[j] = 0;
> }
> }
>+for (i = 0; i < INDIRECT_TABLE_SIZE; i++) {
>+flow_cache->indirect_table[i] = NULL;
>+}
> flow_cache->sweep_idx = 0;
> }
>
>@@ -805,7 +815,7 @@ dfc_cache_uninit(struct dfc_cache *flow_cache)
>
> for (i = 0; i < DFC_BUCKET_CNT; i++) {
> for (j = 0; j < DFC_ENTRY_PER_BUCKET; j++) {
>-dfc_clear_entry(&(flow_cache->buckets[i]), j);
>+dfc_clear_entry(flow_cache->indirect_table,
>+ &(flow_cache->buckets[i]), j);
> }
> }
> emc_cache_uninit(&flow_cache->emc_cache);
>@@ -2259,7 +2269,7 @@ dfc_entry_get(struct dfc_cache *cache, const
>uint32_t hash)
> uint16_t sig = hash >> 16;
> for (int i = 0; i < DFC_ENTRY_PER_BUCKET; i++) {
> if(bucket->sig[i] == sig) {
>-return bucket->flow[i];
>+return cache->indirect_table[bucket->index[i]];
> }
> }
> return NULL;
>@@ -2272,28 +2282,33 @@ dfc_entry_alive(struct dp_netdev_flow *flow)  }
>
> static void
>-dfc_clear_entry(struct dfc_bucket *b, int idx)
>+dfc_clear_entry(struct dp_netdev_flow **ind_table, struct dfc_bucket
>+*b, int idx)
> {
>-if (b->flow[idx]) {
>-dp_netdev_flow_unref(b->flow[idx]);
>-b->flow[idx] = NULL;
>+if (ind_table[b->index[idx]]) {
>+dp_netdev_flow_unref(ind_table[b->index[idx]]);
>+ind_table[b->index[idx]] = NULL;
> }
> }
>
>-static inline void
>-dfc_change_entry(struct dfc_bucket *b, int idx, struct dp_netdev_flow
>*flow)
>+
>+static inline uint16_t
>+indirect_table_insert(struct dp_netdev_flow **indirect_table,
>+struct dp_netdev_flow *flow)
> {
>-if (b->flow[idx] != flow) {
>-if (b->flo