Re: [Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation
> On 11/06/2017 08:30 PM, aravindan.muthuku...@intel.com wrote: > > From: Aravindan Muthukumar> > > > Now the complexity has been reduced to O(1) > > > > Algorithm calculates the index using matrix method. > > Matrix arrangement is as below: > > Assuming PAGE_SIZE is 4096. > > > > 1*4096 2*40963*40964*4096 > > 5*4096 6*40967*40968*4096 > > 10*4096 12*4096 14*4096 16*4096 > > 20*4096 24*4096 28*4096 32*4096 > >... ... ... ... > >... ... ... ... > >... ... ... max_cache_size > > > > From this matrix its clearly seen that every row follows the below > > way: > > ... ... ...n > > n+(1/4)n n+(1/2)n n+(3/4)n2n > > > > Row is calculated as log2(size/PAGE_SIZE) Column is calculated as > > converting the difference between the elements to fit into power size > > of two and indexing it. > > > > Final Index is (row*4)+(col-1) > > > > Tested with Intel Mesa CI. > > > > Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% > > (n=20) > > > > v3: review comments implemented (Ian). > > v2: review comments implemented (Jason). > > > > Signed-off-by: Aravindan Muthukumar > > Signed-off-by: Kedar Karanje > > Reviewed-by: Yogesh Marathe > > --- > > src/mesa/drivers/dri/i965/brw_bufmgr.c | 38 > > +++--- > > 1 file changed, 30 insertions(+), 8 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c > > b/src/mesa/drivers/dri/i965/brw_bufmgr.c > > index 17036b5..9a423da 100644 > > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c > > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c > > @@ -86,6 +86,8 @@ > > > > #define memclear(s) memset(, 0, sizeof(s)) > > > > +#define PAGE_SIZE 4096 > > + > > #define FILE_DEBUG_FLAG DEBUG_BUFMGR > > > > static inline int > > @@ -180,19 +182,35 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t > pitch, uint32_t tiling) > > return ALIGN(pitch, tile_width); > > } > > > > +/* > > + * This function finds the correct bucket fit for the input size. > > + * The function works with O(1) complexity when the requested size > > + * was queried instead of iterating the size through all the buckets. > > + */ > > static struct bo_cache_bucket * > > bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) { > > - int i; > > + /* Calculating the pages and rounding up to the page size. */ > > + const unsigned int pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; > > > > - for (i = 0; i < bufmgr->num_buckets; i++) { > > - struct bo_cache_bucket *bucket = >cache_bucket[i]; > > - if (bucket->size >= size) { > > - return bucket; > > - } > > - } > > + /* Finding the row number based on the calculated pages. */ > > + const unsigned int rows = 30 - __builtin_clz((pages - 1) | 3); > > > > - return NULL; > > Why did you make random (and incorrect) style changes and delete > (useful) comments from the code I sent? > > > > Thanks Ian. I added comments based on my understanding and I get the > > > point I'll push v4 with your comments. > > + const unsigned int row_max_pages = 4 << rows; > > + const unsigned int prev_row_max_pages = (row_max_pages / 2) & ~2; > > + > > + /* Finding the column number using column interval. */ > > + int col_size_log2 = rows - 1; > > + col_size_log2 += (col_size_log2 < 0); > > + > > + const unsigned int col = ( (pages - prev_row_max_pages + > > +( (1 << col_size_log2) - 1) ) >> > > + col_size_log2 ); > > + > > + /* Calculating the index based on the row and column. */ > > + const unsigned int index = (rows * 4) + (col - 1); > > + > > + return (index < bufmgr->num_buckets) ? > > + >cache_bucket[index] : NULL; > > } > > > > int > > @@ -1254,6 +1272,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) > > list_inithead(>cache_bucket[i].head); > > bufmgr->cache_bucket[i].size = size; > > bufmgr->num_buckets++; > > + > > + assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]); > > + assert(bucket_for_size(bufmgr, size - 2048) == > > >cache_bucket[i]); > > + assert(bucket_for_size(bufmgr, size + 1) != > > + >cache_bucket[i]); > > } > > > > static void > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation
On 11/06/2017 08:30 PM, aravindan.muthuku...@intel.com wrote: > From: Aravindan Muthukumar> > Now the complexity has been reduced to O(1) > > Algorithm calculates the index using matrix method. > Matrix arrangement is as below: > Assuming PAGE_SIZE is 4096. > > 1*4096 2*40963*40964*4096 > 5*4096 6*40967*40968*4096 > 10*4096 12*4096 14*4096 16*4096 > 20*4096 24*4096 28*4096 32*4096 >... ... ... ... >... ... ... ... >... ... ... max_cache_size > > From this matrix its clearly seen that every row > follows the below way: > ... ... ...n > n+(1/4)n n+(1/2)n n+(3/4)n2n > > Row is calculated as log2(size/PAGE_SIZE) > Column is calculated as converting the difference > between the elements to fit into power size of two > and indexing it. > > Final Index is (row*4)+(col-1) > > Tested with Intel Mesa CI. > > Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20) > > v3: review comments implemented (Ian). > v2: review comments implemented (Jason). > > Signed-off-by: Aravindan Muthukumar > Signed-off-by: Kedar Karanje > Reviewed-by: Yogesh Marathe > --- > src/mesa/drivers/dri/i965/brw_bufmgr.c | 38 > +++--- > 1 file changed, 30 insertions(+), 8 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c > b/src/mesa/drivers/dri/i965/brw_bufmgr.c > index 17036b5..9a423da 100644 > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c > @@ -86,6 +86,8 @@ > > #define memclear(s) memset(, 0, sizeof(s)) > > +#define PAGE_SIZE 4096 > + > #define FILE_DEBUG_FLAG DEBUG_BUFMGR > > static inline int > @@ -180,19 +182,35 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t > pitch, uint32_t tiling) > return ALIGN(pitch, tile_width); > } > > +/* > + * This function finds the correct bucket fit for the input size. > + * The function works with O(1) complexity when the requested size > + * was queried instead of iterating the size through all the buckets. > + */ > static struct bo_cache_bucket * > bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) > { > - int i; > + /* Calculating the pages and rounding up to the page size. */ > + const unsigned int pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; > > - for (i = 0; i < bufmgr->num_buckets; i++) { > - struct bo_cache_bucket *bucket = >cache_bucket[i]; > - if (bucket->size >= size) { > - return bucket; > - } > - } > + /* Finding the row number based on the calculated pages. */ > + const unsigned int rows = 30 - __builtin_clz((pages - 1) | 3); > > - return NULL; Why did you make random (and incorrect) style changes and delete (useful) comments from the code I sent? > + const unsigned int row_max_pages = 4 << rows; > + const unsigned int prev_row_max_pages = (row_max_pages / 2) & ~2; > + > + /* Finding the column number using column interval. */ > + int col_size_log2 = rows - 1; > + col_size_log2 += (col_size_log2 < 0); > + > + const unsigned int col = ( (pages - prev_row_max_pages + > +( (1 << col_size_log2) - 1) ) >> col_size_log2 ); > + > + /* Calculating the index based on the row and column. */ > + const unsigned int index = (rows * 4) + (col - 1); > + > + return (index < bufmgr->num_buckets) ? > + >cache_bucket[index] : NULL; > } > > int > @@ -1254,6 +1272,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) > list_inithead(>cache_bucket[i].head); > bufmgr->cache_bucket[i].size = size; > bufmgr->num_buckets++; > + > + assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]); > + assert(bucket_for_size(bufmgr, size - 2048) == >cache_bucket[i]); > + assert(bucket_for_size(bufmgr, size + 1) != >cache_bucket[i]); > } > > static void > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation
From: Aravindan MuthukumarNow the complexity has been reduced to O(1) Algorithm calculates the index using matrix method. Matrix arrangement is as below: Assuming PAGE_SIZE is 4096. 1*4096 2*40963*40964*4096 5*4096 6*40967*40968*4096 10*4096 12*4096 14*4096 16*4096 20*4096 24*4096 28*4096 32*4096 ... ... ... ... ... ... ... ... ... ... ... max_cache_size From this matrix its clearly seen that every row follows the below way: ... ... ...n n+(1/4)n n+(1/2)n n+(3/4)n2n Row is calculated as log2(size/PAGE_SIZE) Column is calculated as converting the difference between the elements to fit into power size of two and indexing it. Final Index is (row*4)+(col-1) Tested with Intel Mesa CI. Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20) v3: review comments implemented (Ian). v2: review comments implemented (Jason). Signed-off-by: Aravindan Muthukumar Signed-off-by: Kedar Karanje Reviewed-by: Yogesh Marathe --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 38 +++--- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 17036b5..9a423da 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -86,6 +86,8 @@ #define memclear(s) memset(, 0, sizeof(s)) +#define PAGE_SIZE 4096 + #define FILE_DEBUG_FLAG DEBUG_BUFMGR static inline int @@ -180,19 +182,35 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling) return ALIGN(pitch, tile_width); } +/* + * This function finds the correct bucket fit for the input size. + * The function works with O(1) complexity when the requested size + * was queried instead of iterating the size through all the buckets. + */ static struct bo_cache_bucket * bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) { - int i; + /* Calculating the pages and rounding up to the page size. */ + const unsigned int pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; - for (i = 0; i < bufmgr->num_buckets; i++) { - struct bo_cache_bucket *bucket = >cache_bucket[i]; - if (bucket->size >= size) { - return bucket; - } - } + /* Finding the row number based on the calculated pages. */ + const unsigned int rows = 30 - __builtin_clz((pages - 1) | 3); - return NULL; + const unsigned int row_max_pages = 4 << rows; + const unsigned int prev_row_max_pages = (row_max_pages / 2) & ~2; + + /* Finding the column number using column interval. */ + int col_size_log2 = rows - 1; + col_size_log2 += (col_size_log2 < 0); + + const unsigned int col = ( (pages - prev_row_max_pages + +( (1 << col_size_log2) - 1) ) >> col_size_log2 ); + + /* Calculating the index based on the row and column. */ + const unsigned int index = (rows * 4) + (col - 1); + + return (index < bufmgr->num_buckets) ? + >cache_bucket[index] : NULL; } int @@ -1254,6 +1272,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) list_inithead(>cache_bucket[i].head); bufmgr->cache_bucket[i].size = size; bufmgr->num_buckets++; + + assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]); + assert(bucket_for_size(bufmgr, size - 2048) == >cache_bucket[i]); + assert(bucket_for_size(bufmgr, size + 1) != >cache_bucket[i]); } static void -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation.
From: Aravindan MuthukumarAvoiding the loop which was running with O(n) complexity. Now the complexity has been reduced to O(1) Algorithm calculates the index using matrix method. Matrix arrangement is as below: Assuming PAGE_SIZE is 4096. 1*4096 2*40963*40964*4096 5*4096 6*40967*40968*4096 10*4096 12*4096 14*4096 16*4096 20*4096 24*4096 28*4096 32*4096 ... ... ... ... ... ... ... ... ... ... ... max_cache_size From this matrix its clearly seen that every row follows the below way: ... ... ...n n+(1/4)n n+(1/2)n n+(3/4)n2n Row is calulated as log2(size/PAGE_SIZE) Column is calculated as converting the difference between the elements to fit into power size of two and indexing it. Final Index is (row*4)+(col-1) Tested with Intel Mesa CI. Improves performance of 3d Mark on Broxton. Analyzed using Compare Perf Analyser: Average : 201.2 +/- 65.4836 (n=20) Percentage : 0.705966% +/- 0.229767% (n=20) v3: Review comments implemented Signed-off-by: Aravindan Muthukumar Signed-off-by: Kedar Karanje Reviewed-by: Yogesh Marathe --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 42 -- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 17036b5..49514a4 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -86,6 +86,8 @@ #define memclear(s) memset(, 0, sizeof(s)) +#define PAGE_SIZE 4096 + #define FILE_DEBUG_FLAG DEBUG_BUFMGR static inline int @@ -180,19 +182,41 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling) return ALIGN(pitch, tile_width); } +static inline int +ilog2_round_up(int value) +{ + assert(value != 0); + return 32 - __builtin_clz(value - 1); +} + +/* + * This function finds the correct bucket fit for the input size. + * The function works with O(1) complexity when the requested size + * was queried instead of iterating the size through all the buckets. + */ static struct bo_cache_bucket * bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) { - int i; + int index; - for (i = 0; i < bufmgr->num_buckets; i++) { - struct bo_cache_bucket *bucket = >cache_bucket[i]; - if (bucket->size >= size) { - return bucket; - } + /* Condition for size less than 4*4096 (16KB) page size. */ + if (size <= 4 * PAGE_SIZE) { + index = DIV_ROUND_UP(size, PAGE_SIZE) - 1; + } else { + /* Number of pages of page size */ + const int pages = DIV_ROUND_UP(size, PAGE_SIZE); + const int pages_log2 = ilog2_round_up(pages) - 1; + + /* Finding the row and column of the matrix */ + const int row = pages_log2 - 1; + const int col = DIV_ROUND_UP((pages - (1 << pages_log2)), + (1 << (pages_log2 - 2))); + /* Using the calculated row and column to index into the matrix */ + index = (row << 2) + (col - 1); } - return NULL; + return (index >= 0 && index < bufmgr->num_buckets) ? + >cache_bucket[index] : NULL; } int @@ -1254,6 +1278,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) list_inithead(>cache_bucket[i].head); bufmgr->cache_bucket[i].size = size; bufmgr->num_buckets++; + + assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]); + assert(bucket_for_size(bufmgr, size - 2048) == >cache_bucket[i]); + assert(bucket_for_size(bufmgr, size + 1) != >cache_bucket[i]); } static void -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev