Re: [Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation

2017-11-08 Thread Muthukumar, Aravindan
> On 11/06/2017 08:30 PM, aravindan.muthuku...@intel.com wrote:
> > From: Aravindan Muthukumar 
> >
> > Now the complexity has been reduced to O(1)
> >
> > Algorithm calculates the index using matrix method.
> > Matrix arrangement is as below:
> > Assuming PAGE_SIZE is 4096.
> >
> >   1*4096   2*40963*40964*4096
> >   5*4096   6*40967*40968*4096
> >   10*4096  12*4096   14*4096   16*4096
> >   20*4096  24*4096   28*4096   32*4096
> >...  ...   ...   ...
> >...  ...   ...   ...
> >...  ...   ...   max_cache_size
> >
> > From this matrix its clearly seen that every row follows the below
> > way:
> >   ...   ...   ...n
> > n+(1/4)n  n+(1/2)n  n+(3/4)n2n
> >
> > Row is calculated as log2(size/PAGE_SIZE) Column is calculated as
> > converting the difference between the elements to fit into power size
> > of two and indexing it.
> >
> > Final Index is (row*4)+(col-1)
> >
> > Tested with Intel Mesa CI.
> >
> > Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767%
> > (n=20)
> >
> > v3: review comments implemented (Ian).
> > v2: review comments implemented (Jason).
> >
> > Signed-off-by: Aravindan Muthukumar 
> > Signed-off-by: Kedar Karanje 
> > Reviewed-by: Yogesh Marathe 
> > ---
> >  src/mesa/drivers/dri/i965/brw_bufmgr.c | 38
> > +++---
> >  1 file changed, 30 insertions(+), 8 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > index 17036b5..9a423da 100644
> > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > @@ -86,6 +86,8 @@
> >
> >  #define memclear(s) memset(, 0, sizeof(s))
> >
> > +#define PAGE_SIZE 4096
> > +
> >  #define FILE_DEBUG_FLAG DEBUG_BUFMGR
> >
> >  static inline int
> > @@ -180,19 +182,35 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t
> pitch, uint32_t tiling)
> > return ALIGN(pitch, tile_width);
> >  }
> >
> > +/*
> > + * This function finds the correct bucket fit for the input size.
> > + * The function works with O(1) complexity when the requested size
> > + * was queried instead of iterating the size through all the buckets.
> > + */
> >  static struct bo_cache_bucket *
> >  bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)  {
> > -   int i;
> > +   /* Calculating the pages and rounding up to the page size. */
> > +   const unsigned int pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
> >
> > -   for (i = 0; i < bufmgr->num_buckets; i++) {
> > -  struct bo_cache_bucket *bucket = >cache_bucket[i];
> > -  if (bucket->size >= size) {
> > - return bucket;
> > -  }
> > -   }
> > +   /* Finding the row number based on the calculated pages. */
> > +   const unsigned int rows = 30 - __builtin_clz((pages - 1) | 3);
> >
> > -   return NULL;
> 
> Why did you make random (and incorrect) style changes and delete
> (useful) comments from the code I sent?
>

> > > Thanks Ian. I added comments based on my understanding and I get the 
> > > point I'll push v4 with your comments.
 
> > +   const unsigned int row_max_pages = 4 << rows;
> > +   const unsigned int prev_row_max_pages = (row_max_pages / 2) & ~2;
> > +
> > +   /* Finding the column number using column interval. */
> > +   int col_size_log2 = rows - 1;
> > +   col_size_log2 += (col_size_log2 < 0);
> > +
> > +   const unsigned int col = ( (pages - prev_row_max_pages +
> > +( (1 << col_size_log2) - 1) ) >>
> > + col_size_log2 );
> > +
> > +   /* Calculating the index based on the row and column. */
> > +   const unsigned int index = (rows * 4) + (col - 1);
> > +
> > +   return (index < bufmgr->num_buckets) ?
> > +  >cache_bucket[index] : NULL;
> >  }
> >
> >  int
> > @@ -1254,6 +1272,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
> > list_inithead(>cache_bucket[i].head);
> > bufmgr->cache_bucket[i].size = size;
> > bufmgr->num_buckets++;
> > +
> > +   assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]);
> > +   assert(bucket_for_size(bufmgr, size - 2048) == 
> > >cache_bucket[i]);
> > +   assert(bucket_for_size(bufmgr, size + 1) !=
> > + >cache_bucket[i]);
> >  }
> >
> >  static void
> >

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation

2017-11-07 Thread Ian Romanick
On 11/06/2017 08:30 PM, aravindan.muthuku...@intel.com wrote:
> From: Aravindan Muthukumar 
> 
> Now the complexity has been reduced to O(1)
> 
> Algorithm calculates the index using matrix method.
> Matrix arrangement is as below:
> Assuming PAGE_SIZE is 4096.
> 
>   1*4096   2*40963*40964*4096
>   5*4096   6*40967*40968*4096
>   10*4096  12*4096   14*4096   16*4096
>   20*4096  24*4096   28*4096   32*4096
>...  ...   ...   ...
>...  ...   ...   ...
>...  ...   ...   max_cache_size
> 
> From this matrix its clearly seen that every row
> follows the below way:
>   ...   ...   ...n
> n+(1/4)n  n+(1/2)n  n+(3/4)n2n
> 
> Row is calculated as log2(size/PAGE_SIZE)
> Column is calculated as converting the difference
> between the elements to fit into power size of two
> and indexing it.
> 
> Final Index is (row*4)+(col-1)
> 
> Tested with Intel Mesa CI.
> 
> Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20)
> 
> v3: review comments implemented (Ian).
> v2: review comments implemented (Jason).
>  
> Signed-off-by: Aravindan Muthukumar 
> Signed-off-by: Kedar Karanje 
> Reviewed-by: Yogesh Marathe 
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c | 38 
> +++---
>  1 file changed, 30 insertions(+), 8 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 17036b5..9a423da 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -86,6 +86,8 @@
>  
>  #define memclear(s) memset(, 0, sizeof(s))
>  
> +#define PAGE_SIZE 4096
> +
>  #define FILE_DEBUG_FLAG DEBUG_BUFMGR
>  
>  static inline int
> @@ -180,19 +182,35 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t 
> pitch, uint32_t tiling)
> return ALIGN(pitch, tile_width);
>  }
>  
> +/*
> + * This function finds the correct bucket fit for the input size.
> + * The function works with O(1) complexity when the requested size
> + * was queried instead of iterating the size through all the buckets.
> + */
>  static struct bo_cache_bucket *
>  bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
>  {
> -   int i;
> +   /* Calculating the pages and rounding up to the page size. */
> +   const unsigned int pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
>  
> -   for (i = 0; i < bufmgr->num_buckets; i++) {
> -  struct bo_cache_bucket *bucket = >cache_bucket[i];
> -  if (bucket->size >= size) {
> - return bucket;
> -  }
> -   }
> +   /* Finding the row number based on the calculated pages. */
> +   const unsigned int rows = 30 - __builtin_clz((pages - 1) | 3);
>  
> -   return NULL;

Why did you make random (and incorrect) style changes and delete
(useful) comments from the code I sent?

> +   const unsigned int row_max_pages = 4 << rows;
> +   const unsigned int prev_row_max_pages = (row_max_pages / 2) & ~2;
> +
> +   /* Finding the column number using column interval. */
> +   int col_size_log2 = rows - 1;
> +   col_size_log2 += (col_size_log2 < 0);
> +
> +   const unsigned int col = ( (pages - prev_row_max_pages +
> +( (1 << col_size_log2) - 1) ) >> col_size_log2 );
> +
> +   /* Calculating the index based on the row and column. */
> +   const unsigned int index = (rows * 4) + (col - 1);
> +
> +   return (index < bufmgr->num_buckets) ?
> +  >cache_bucket[index] : NULL;
>  }
>  
>  int
> @@ -1254,6 +1272,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
> list_inithead(>cache_bucket[i].head);
> bufmgr->cache_bucket[i].size = size;
> bufmgr->num_buckets++;
> +
> +   assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]);
> +   assert(bucket_for_size(bufmgr, size - 2048) == >cache_bucket[i]);
> +   assert(bucket_for_size(bufmgr, size + 1) != >cache_bucket[i]);
>  }
>  
>  static void
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation

2017-11-06 Thread aravindan . muthukumar
From: Aravindan Muthukumar 

Now the complexity has been reduced to O(1)

Algorithm calculates the index using matrix method.
Matrix arrangement is as below:
Assuming PAGE_SIZE is 4096.

  1*4096   2*40963*40964*4096
  5*4096   6*40967*40968*4096
  10*4096  12*4096   14*4096   16*4096
  20*4096  24*4096   28*4096   32*4096
   ...  ...   ...   ...
   ...  ...   ...   ...
   ...  ...   ...   max_cache_size

From this matrix its clearly seen that every row
follows the below way:
  ...   ...   ...n
n+(1/4)n  n+(1/2)n  n+(3/4)n2n

Row is calculated as log2(size/PAGE_SIZE)
Column is calculated as converting the difference
between the elements to fit into power size of two
and indexing it.

Final Index is (row*4)+(col-1)

Tested with Intel Mesa CI.

Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20)

v3: review comments implemented (Ian).
v2: review comments implemented (Jason).
 
Signed-off-by: Aravindan Muthukumar 
Signed-off-by: Kedar Karanje 
Reviewed-by: Yogesh Marathe 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 38 +++---
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 17036b5..9a423da 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -86,6 +86,8 @@
 
 #define memclear(s) memset(, 0, sizeof(s))
 
+#define PAGE_SIZE 4096
+
 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
 
 static inline int
@@ -180,19 +182,35 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, 
uint32_t tiling)
return ALIGN(pitch, tile_width);
 }
 
+/*
+ * This function finds the correct bucket fit for the input size.
+ * The function works with O(1) complexity when the requested size
+ * was queried instead of iterating the size through all the buckets.
+ */
 static struct bo_cache_bucket *
 bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
 {
-   int i;
+   /* Calculating the pages and rounding up to the page size. */
+   const unsigned int pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
 
-   for (i = 0; i < bufmgr->num_buckets; i++) {
-  struct bo_cache_bucket *bucket = >cache_bucket[i];
-  if (bucket->size >= size) {
- return bucket;
-  }
-   }
+   /* Finding the row number based on the calculated pages. */
+   const unsigned int rows = 30 - __builtin_clz((pages - 1) | 3);
 
-   return NULL;
+   const unsigned int row_max_pages = 4 << rows;
+   const unsigned int prev_row_max_pages = (row_max_pages / 2) & ~2;
+
+   /* Finding the column number using column interval. */
+   int col_size_log2 = rows - 1;
+   col_size_log2 += (col_size_log2 < 0);
+
+   const unsigned int col = ( (pages - prev_row_max_pages +
+( (1 << col_size_log2) - 1) ) >> col_size_log2 );
+
+   /* Calculating the index based on the row and column. */
+   const unsigned int index = (rows * 4) + (col - 1);
+
+   return (index < bufmgr->num_buckets) ?
+  >cache_bucket[index] : NULL;
 }
 
 int
@@ -1254,6 +1272,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
list_inithead(>cache_bucket[i].head);
bufmgr->cache_bucket[i].size = size;
bufmgr->num_buckets++;
+
+   assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]);
+   assert(bucket_for_size(bufmgr, size - 2048) == >cache_bucket[i]);
+   assert(bucket_for_size(bufmgr, size + 1) != >cache_bucket[i]);
 }
 
 static void
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation.

2017-10-26 Thread aravindan . muthukumar
From: Aravindan Muthukumar 

 Avoiding the loop which was running with O(n) complexity.
 Now the complexity has been reduced to O(1)

 Algorithm calculates the index using matrix method.
 Matrix arrangement is as below:
 Assuming PAGE_SIZE is 4096.

  1*4096   2*40963*40964*4096
  5*4096   6*40967*40968*4096
  10*4096  12*4096   14*4096   16*4096
  20*4096  24*4096   28*4096   32*4096
   ...  ...   ...   ...
   ...  ...   ...   ...
   ...  ...   ...   max_cache_size

 From this matrix its clearly seen that every row
 follows the below way:
  ...   ...   ...n
n+(1/4)n  n+(1/2)n  n+(3/4)n2n

 Row is calulated as log2(size/PAGE_SIZE)
 Column is calculated as converting the difference
 between the elements to fit into power size of two
 and indexing it.

 Final Index is (row*4)+(col-1)

 Tested with Intel Mesa CI.

 Improves performance of 3d Mark on Broxton.
 Analyzed using Compare Perf Analyser:
 Average : 201.2 +/- 65.4836 (n=20)
 Percentage : 0.705966% +/- 0.229767% (n=20)

 v3: Review comments implemented

 Signed-off-by: Aravindan Muthukumar 
 Signed-off-by: Kedar Karanje 
 Reviewed-by: Yogesh Marathe 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 42 --
 1 file changed, 35 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 17036b5..49514a4 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -86,6 +86,8 @@
 
 #define memclear(s) memset(, 0, sizeof(s))
 
+#define PAGE_SIZE 4096
+
 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
 
 static inline int
@@ -180,19 +182,41 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, 
uint32_t tiling)
return ALIGN(pitch, tile_width);
 }
 
+static inline int
+ilog2_round_up(int value)
+{
+   assert(value != 0);
+   return 32 - __builtin_clz(value - 1);
+}
+
+/*
+ * This function finds the correct bucket fit for the input size.
+ * The function works with O(1) complexity when the requested size
+ * was queried instead of iterating the size through all the buckets.
+ */
 static struct bo_cache_bucket *
 bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
 {
-   int i;
+   int index;
 
-   for (i = 0; i < bufmgr->num_buckets; i++) {
-  struct bo_cache_bucket *bucket = >cache_bucket[i];
-  if (bucket->size >= size) {
- return bucket;
-  }
+   /* Condition for size less than 4*4096 (16KB) page size. */
+   if (size <= 4 * PAGE_SIZE) {
+  index = DIV_ROUND_UP(size, PAGE_SIZE) - 1;
+   } else {
+  /* Number of pages of page size */
+  const int pages = DIV_ROUND_UP(size, PAGE_SIZE);
+  const int pages_log2 = ilog2_round_up(pages) - 1;
+
+  /* Finding the row and column of the matrix */
+  const int row = pages_log2 - 1;
+  const int col = DIV_ROUND_UP((pages - (1 << pages_log2)),
+   (1 << (pages_log2 - 2)));
+  /* Using the calculated row and column to index into the matrix */
+  index = (row << 2) + (col - 1);
}
 
-   return NULL;
+   return (index >= 0 && index < bufmgr->num_buckets) ?
+  >cache_bucket[index] : NULL;
 }
 
 int
@@ -1254,6 +1278,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
list_inithead(>cache_bucket[i].head);
bufmgr->cache_bucket[i].size = size;
bufmgr->num_buckets++;
+
+   assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]);
+   assert(bucket_for_size(bufmgr, size - 2048) == >cache_bucket[i]);
+   assert(bucket_for_size(bufmgr, size + 1) != >cache_bucket[i]);
 }
 
 static void
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev