Re: [Mesa-dev] [PATCH v4] i965 : optimized bucket index calculation

2017-11-20 Thread Ian Romanick
On 11/08/2017 09:45 PM, aravindan.muthuku...@intel.com wrote:
> From: Aravindan Muthukumar 
> 
> Reducing Bucket index calculation to O(1).
> 
> This algorithm calculates the index using matrix method.
> Matrix arrangement is as below:
> Assuming PAGE_SIZE is 4096.
> 
>   1*4096   2*40963*40964*4096
>   5*4096   6*40967*40968*4096
>   10*4096  12*4096   14*4096   16*4096
>   20*4096  24*4096   28*4096   32*4096
>...  ...   ...   ...
>...  ...   ...   ...
>...  ...   ...   max_cache_size
> 
> From this matrix its clearly seen that every row
> follows the below way:
>   ...   ...   ...n
> n+(1/4)n  n+(1/2)n  n+(3/4)n2n
> 
> Row is calculated as log2(size/PAGE_SIZE)
> Column is calculated as converting the difference
> between the elements to fit into power size of two
> and indexing it.
> 
> Final Index is (row*4)+(col-1)
> 
> Tested with Intel Mesa CI.
> 
> Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20)
> 
> v4: Review comments on style and code comments implemented (Ian).
> v3: Review comments implemented (Ian).
> v2: Review comments implemented (Jason).
> 
> Signed-off-by: Aravindan Muthukumar 
> Signed-off-by: Kedar Karanje 

Since a fair amount of the code is now authored by me, I guess it's more
appropriate to add

Signed-off-by: Ian Romanick 

I think 3 S-o-b and a R-b should be sufficient, so I have pushed it.

> Reviewed-by: Yogesh Marathe 
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c | 47 
> --
>  1 file changed, 39 insertions(+), 8 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 17036b5..f21df5a 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -86,6 +86,8 @@
>  
>  #define memclear(s) memset(, 0, sizeof(s))
>  
> +#define PAGE_SIZE 4096
> +
>  #define FILE_DEBUG_FLAG DEBUG_BUFMGR
>  
>  static inline int
> @@ -180,19 +182,44 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t 
> pitch, uint32_t tiling)
> return ALIGN(pitch, tile_width);
>  }
>  
> +/**
> + * This function finds the correct bucket fit for the input size.
> + * The function works with O(1) complexity when the requested size
> + * was queried instead of iterating the size through all the buckets.
> + */
>  static struct bo_cache_bucket *
>  bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
>  {
> -   int i;
> +   /* Calculating the pages and rounding up to the page size. */
> +   const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
> +
> +   /* Row  Bucket sizesclz((x-1) | 3)   RowColumn
> +*in pages  stride   size
> +*   0:   1  2  3  4 -> 30 30 30 304   1
> +*   1:   5  6  7  8 -> 29 29 29 294   1
> +*   2:  10 12 14 16 -> 28 28 28 288   2
> +*   3:  20 24 28 32 -> 27 27 27 27   16   4
> +*/
> +   const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
> +   const unsigned row_max_pages = 4 << row;
> +   
> +   /* The '& ~2' is the special case for row 1. In row 1, max pages /
> +* 2 is 2, but the previous row maximum is zero (because there is
> +* no previous row). All row maximum sizes are power of 2, so that
> +* is the only case where that bit will be set.
> +*/
> +   const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
> +   int col_size_log2 = row - 1;
> +   col_size_log2 += (col_size_log2 < 0);
>  
> -   for (i = 0; i < bufmgr->num_buckets; i++) {
> -  struct bo_cache_bucket *bucket = >cache_bucket[i];
> -  if (bucket->size >= size) {
> - return bucket;
> -  }
> -   }
> +   const unsigned col = (pages - prev_row_max_pages +
> +((1 << col_size_log2) - 1)) >> col_size_log2;
>  
> -   return NULL;
> +   /* Calculating the index based on the row and column. */
> +   const unsigned index = (row * 4) + (col - 1);
> +
> +   return (index < bufmgr->num_buckets) ?
> +  >cache_bucket[index] : NULL;
>  }
>  
>  int
> @@ -1254,6 +1281,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
> list_inithead(>cache_bucket[i].head);
> bufmgr->cache_bucket[i].size = size;
> bufmgr->num_buckets++;
> +
> +   assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]);
> +   assert(bucket_for_size(bufmgr, size - 2048) == >cache_bucket[i]);
> +   assert(bucket_for_size(bufmgr, size + 1) != >cache_bucket[i]);
>  }
>  
>  static void
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4] i965 : optimized bucket index calculation

2017-11-19 Thread Muthukumar, Aravindan
Hi Ian,
Could you please review the below version of the patch and provide the 
comments? 
All the comments which were given in the previous versions are incorporated.

Thanks,
Aravindan

> -Original Message-
> From: Muthukumar, Aravindan
> Sent: Thursday, November 9, 2017 11:15 AM
> To: mesa-dev@lists.freedesktop.org
> Cc: Muthukumar, Aravindan ; J Karanje,
> Kedar 
> Subject: [PATCH v4] i965 : optimized bucket index calculation
> 
> From: Aravindan Muthukumar 
> 
> Reducing Bucket index calculation to O(1).
> 
> This algorithm calculates the index using matrix method.
> Matrix arrangement is as below:
> Assuming PAGE_SIZE is 4096.
> 
>   1*4096   2*40963*40964*4096
>   5*4096   6*40967*40968*4096
>   10*4096  12*4096   14*4096   16*4096
>   20*4096  24*4096   28*4096   32*4096
>...  ...   ...   ...
>...  ...   ...   ...
>...  ...   ...   max_cache_size
> 
> From this matrix its clearly seen that every row follows the below way:
>   ...   ...   ...n
> n+(1/4)n  n+(1/2)n  n+(3/4)n2n
> 
> Row is calculated as log2(size/PAGE_SIZE) Column is calculated as converting
> the difference between the elements to fit into power size of two and indexing
> it.
> 
> Final Index is (row*4)+(col-1)
> 
> Tested with Intel Mesa CI.
> 
> Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20)
> 
> v4: Review comments on style and code comments implemented (Ian).
> v3: Review comments implemented (Ian).
> v2: Review comments implemented (Jason).
> 
> Signed-off-by: Aravindan Muthukumar 
> Signed-off-by: Kedar Karanje 
> Reviewed-by: Yogesh Marathe 
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c | 47
> --
>  1 file changed, 39 insertions(+), 8 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 17036b5..f21df5a 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -86,6 +86,8 @@
> 
>  #define memclear(s) memset(, 0, sizeof(s))
> 
> +#define PAGE_SIZE 4096
> +
>  #define FILE_DEBUG_FLAG DEBUG_BUFMGR
> 
>  static inline int
> @@ -180,19 +182,44 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t
> pitch, uint32_t tiling)
> return ALIGN(pitch, tile_width);
>  }
> 
> +/**
> + * This function finds the correct bucket fit for the input size.
> + * The function works with O(1) complexity when the requested size
> + * was queried instead of iterating the size through all the buckets.
> + */
>  static struct bo_cache_bucket *
>  bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)  {
> -   int i;
> +   /* Calculating the pages and rounding up to the page size. */
> +   const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
> +
> +   /* Row  Bucket sizesclz((x-1) | 3)   RowColumn
> +*in pages  stride   size
> +*   0:   1  2  3  4 -> 30 30 30 304   1
> +*   1:   5  6  7  8 -> 29 29 29 294   1
> +*   2:  10 12 14 16 -> 28 28 28 288   2
> +*   3:  20 24 28 32 -> 27 27 27 27   16   4
> +*/
> +   const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
> +   const unsigned row_max_pages = 4 << row;
> +
> +   /* The '& ~2' is the special case for row 1. In row 1, max pages /
> +* 2 is 2, but the previous row maximum is zero (because there is
> +* no previous row). All row maximum sizes are power of 2, so that
> +* is the only case where that bit will be set.
> +*/
> +   const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
> +   int col_size_log2 = row - 1;
> +   col_size_log2 += (col_size_log2 < 0);
> 
> -   for (i = 0; i < bufmgr->num_buckets; i++) {
> -  struct bo_cache_bucket *bucket = >cache_bucket[i];
> -  if (bucket->size >= size) {
> - return bucket;
> -  }
> -   }
> +   const unsigned col = (pages - prev_row_max_pages +
> +((1 << col_size_log2) - 1)) >> col_size_log2;
> 
> -   return NULL;
> +   /* Calculating the index based on the row and column. */
> +   const unsigned index = (row * 4) + (col - 1);
> +
> +   return (index < bufmgr->num_buckets) ?
> +  >cache_bucket[index] : NULL;
>  }
> 
>  int
> @@ -1254,6 +1281,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
> list_inithead(>cache_bucket[i].head);
> bufmgr->cache_bucket[i].size = size;
> bufmgr->num_buckets++;
> +
> +   assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]);
> +   assert(bucket_for_size(bufmgr, size - 2048) == >cache_bucket[i]);
> +   assert(bucket_for_size(bufmgr, size + 1) !=
> + >cache_bucket[i]);
>  }
> 
>  static void
> --
> 2.7.4


[Mesa-dev] [PATCH v4] i965 : optimized bucket index calculation

2017-11-08 Thread aravindan . muthukumar
From: Aravindan Muthukumar 

Reducing Bucket index calculation to O(1).

This algorithm calculates the index using matrix method.
Matrix arrangement is as below:
Assuming PAGE_SIZE is 4096.

  1*4096   2*40963*40964*4096
  5*4096   6*40967*40968*4096
  10*4096  12*4096   14*4096   16*4096
  20*4096  24*4096   28*4096   32*4096
   ...  ...   ...   ...
   ...  ...   ...   ...
   ...  ...   ...   max_cache_size

From this matrix its clearly seen that every row
follows the below way:
  ...   ...   ...n
n+(1/4)n  n+(1/2)n  n+(3/4)n2n

Row is calculated as log2(size/PAGE_SIZE)
Column is calculated as converting the difference
between the elements to fit into power size of two
and indexing it.

Final Index is (row*4)+(col-1)

Tested with Intel Mesa CI.

Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20)

v4: Review comments on style and code comments implemented (Ian).
v3: Review comments implemented (Ian).
v2: Review comments implemented (Jason).

Signed-off-by: Aravindan Muthukumar 
Signed-off-by: Kedar Karanje 
Reviewed-by: Yogesh Marathe 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 47 --
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 17036b5..f21df5a 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -86,6 +86,8 @@
 
 #define memclear(s) memset(, 0, sizeof(s))
 
+#define PAGE_SIZE 4096
+
 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
 
 static inline int
@@ -180,19 +182,44 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, 
uint32_t tiling)
return ALIGN(pitch, tile_width);
 }
 
+/**
+ * This function finds the correct bucket fit for the input size.
+ * The function works with O(1) complexity when the requested size
+ * was queried instead of iterating the size through all the buckets.
+ */
 static struct bo_cache_bucket *
 bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
 {
-   int i;
+   /* Calculating the pages and rounding up to the page size. */
+   const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+
+   /* Row  Bucket sizesclz((x-1) | 3)   RowColumn
+*in pages  stride   size
+*   0:   1  2  3  4 -> 30 30 30 304   1
+*   1:   5  6  7  8 -> 29 29 29 294   1
+*   2:  10 12 14 16 -> 28 28 28 288   2
+*   3:  20 24 28 32 -> 27 27 27 27   16   4
+*/
+   const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
+   const unsigned row_max_pages = 4 << row;
+   
+   /* The '& ~2' is the special case for row 1. In row 1, max pages /
+* 2 is 2, but the previous row maximum is zero (because there is
+* no previous row). All row maximum sizes are power of 2, so that
+* is the only case where that bit will be set.
+*/
+   const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
+   int col_size_log2 = row - 1;
+   col_size_log2 += (col_size_log2 < 0);
 
-   for (i = 0; i < bufmgr->num_buckets; i++) {
-  struct bo_cache_bucket *bucket = >cache_bucket[i];
-  if (bucket->size >= size) {
- return bucket;
-  }
-   }
+   const unsigned col = (pages - prev_row_max_pages +
+((1 << col_size_log2) - 1)) >> col_size_log2;
 
-   return NULL;
+   /* Calculating the index based on the row and column. */
+   const unsigned index = (row * 4) + (col - 1);
+
+   return (index < bufmgr->num_buckets) ?
+  >cache_bucket[index] : NULL;
 }
 
 int
@@ -1254,6 +1281,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
list_inithead(>cache_bucket[i].head);
bufmgr->cache_bucket[i].size = size;
bufmgr->num_buckets++;
+
+   assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]);
+   assert(bucket_for_size(bufmgr, size - 2048) == >cache_bucket[i]);
+   assert(bucket_for_size(bufmgr, size + 1) != >cache_bucket[i]);
 }
 
 static void
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev