Re: [Mesa-dev] [PATCH] radv/winsys: replace bo list searchs with a hash table.
2018-01-31 3:38 GMT+01:00 Dave Airlie: > On 31 January 2018 at 12:27, Dieter Nützel wrote: >> Ping! > > I'm not sure this was much of a win, and if it makes things worse in some > cases, > then it needs a lot more investigation, so probably consider it dead for now. > > Dave. > I have a new hash table implementation lying around on my local machine that is specifically designed for storing pointers. It lowers memory usage, and has much better cache locality and general performance is nice. It has reduced the impact of hash tables quite noticeably with the testing that I've done as of yet. I'll see if I can get it out the door soon, as it might come in handy for usecases like this. >> >> Am 11.01.2018 04:53, schrieb Dave Airlie: >>> >>> From: Dave Airlie >>> >>> This should make the merging of cmd buffers less CPU intensive, >>> note I said *should* :) >>> --- >>> src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 47 >>> --- >>> 1 file changed, 20 insertions(+), 27 deletions(-) >>> >>> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c >>> b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c >>> index 0ee56f91447..9a39d237ae8 100644 >>> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c >>> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c >>> @@ -33,6 +33,7 @@ >>> #include "radv_amdgpu_bo.h" >>> #include "sid.h" >>> >>> +#include "util/hash_table.h" >>> >>> enum { >>> VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 >>> @@ -584,6 +585,9 @@ static int radv_amdgpu_create_bo_list(struct >>> radv_amdgpu_winsys *ws, >>> priorities[0] = 8; >>> } >>> >>> + struct hash_table *ht = _mesa_hash_table_create(NULL, >>> _mesa_hash_pointer, >>> + >>> _mesa_key_pointer_equal); >>> + >>> for (unsigned i = 0; i < count + !!extra_cs; ++i) { >>> struct radv_amdgpu_cs *cs; >>> >>> @@ -595,50 +599,39 @@ static int radv_amdgpu_create_bo_list(struct >>> radv_amdgpu_winsys *ws, >>> if (!cs->num_buffers) >>> continue; >>> >>> - if (unique_bo_count == 0) { >>> - memcpy(handles, cs->handles, >>> cs->num_buffers * sizeof(amdgpu_bo_handle)); >>> - memcpy(priorities, cs->priorities, >>> cs->num_buffers * sizeof(uint8_t)); >>> - unique_bo_count = cs->num_buffers; >>> - continue; >>> - } >>> - int unique_bo_so_far = unique_bo_count; >>> for (unsigned j = 0; j < cs->num_buffers; ++j) { >>> - bool found = false; >>> - for (unsigned k = 0; k < unique_bo_so_far; >>> ++k) { >>> - if (handles[k] == cs->handles[j]) >>> { >>> - found = true; >>> - priorities[k] = >>> MAX2(priorities[k], >>> - >>> cs->priorities[j]); >>> - break; >>> - } >>> - } >>> - if (!found) { >>> + struct hash_entry *entry = >>> _mesa_hash_table_search(ht, (void >>> *)cs->handles[j]); >>> + if (!entry) { >>> + _mesa_hash_table_insert(ht, (void >>> *)cs->handles[j], (void >>> *)(uintptr_t)unique_bo_count); >>> handles[unique_bo_count] = >>> cs->handles[j]; >>> priorities[unique_bo_count] = >>> cs->priorities[j]; >>> ++unique_bo_count; >>> + } else { >>> + int bo_idx = (uint32_t)(unsigned >>> long)entry->data; >>> + priorities[bo_idx] = >>> MAX2(priorities[bo_idx], >>> + >>> cs->priorities[j]); >>> } >>> } >>> for (unsigned j = 0; j < cs->num_virtual_buffers; >>> ++j) { >>> struct radv_amdgpu_winsys_bo *virtual_bo = >>> radv_amdgpu_winsys_bo(cs->virtual_buffers[j]); >>> for(unsigned k = 0; k < >>> virtual_bo->bo_count; ++k) { >>> struct radv_amdgpu_winsys_bo *bo = >>> virtual_bo->bos[k]; >>> - bool found = false; >>> - for (unsigned m = 0; m < >>> unique_bo_count; ++m) { >>> - if (handles[m] == bo->bo) >>> { >>> - found = true;
Re: [Mesa-dev] [PATCH] radv/winsys: replace bo list searchs with a hash table.
Am 12.01.2018 01:31, schrieb Dieter Nützel: Tested-by: Dieter NützelThis offer an additional 1-1,5% (~20 fps) boost with 'smoketest'. 6290 presents in 5.00045 seconds (FPS: 1257.89) 6291 presents in 5.00041 seconds (FPS: 1258.1) 6293 presents in 5.00065 seconds (FPS: 1258.44) 6289 presents in 5.0006 seconds (FPS: 1257.65) But _only_ with 'ondemand' governor!!! cpupower frequency-set -g ondemand => GOOD cpupower frequency-set -g performance => BAD cpupower frequency-set -g schedutil => BAD performance: 100-110 fps _lower_ schedutil: 110-140 fps _lower_ Hello Dave, if you refer to this part, I'm not sure this was much of a win, and if it makes things worse in some cases, then it needs a lot more investigation, so probably consider it dead for now. Dave. it was _only_ additional finding on my side. Boost is there, I'm running daily with it. 'New' finding for me was, that 'performance' and 'schedutil' is slower as 'ondemand' for Vulkan (only radv tested) opposed to OpenGL, where 'performance' is the clear winner (in most games much more min and avg plus max fps). With 'cpupower frequency-set -g performance' I get _more_ noise (bus?) from my system during 'smoketest'. (Apart from this patch.) And it is somewhat slower. Your call. Greetings, Dieter Am 11.01.2018 04:53, schrieb Dave Airlie: From: Dave Airlie This should make the merging of cmd buffers less CPU intensive, note I said *should* :) --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 47 --- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 0ee56f91447..9a39d237ae8 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -33,6 +33,7 @@ #include "radv_amdgpu_bo.h" #include "sid.h" +#include "util/hash_table.h" enum { VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 @@ -584,6 +585,9 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, priorities[0] = 8; } + struct hash_table *ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + for (unsigned i = 0; i < count + !!extra_cs; ++i) { struct radv_amdgpu_cs *cs; @@ -595,50 +599,39 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, if (!cs->num_buffers) continue; - if (unique_bo_count == 0) { -memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle)); -memcpy(priorities, cs->priorities, cs->num_buffers * sizeof(uint8_t)); - unique_bo_count = cs->num_buffers; - continue; - } - int unique_bo_so_far = unique_bo_count; for (unsigned j = 0; j < cs->num_buffers; ++j) { - bool found = false; - for (unsigned k = 0; k < unique_bo_so_far; ++k) { - if (handles[k] == cs->handles[j]) { - found = true; - priorities[k] = MAX2(priorities[k], - cs->priorities[j]); - break; - } - } - if (!found) { + struct hash_entry *entry = _mesa_hash_table_search(ht, (void *)cs->handles[j]); + if (!entry) { + _mesa_hash_table_insert(ht, (void *)cs->handles[j], (void *)(uintptr_t)unique_bo_count); handles[unique_bo_count] = cs->handles[j]; priorities[unique_bo_count] = cs->priorities[j]; ++unique_bo_count; + } else { + int bo_idx = (uint32_t)(unsigned long)entry->data; + priorities[bo_idx] = MAX2(priorities[bo_idx], + cs->priorities[j]); } } for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) { struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]); for(unsigned k = 0; k < virtual_bo->bo_count; ++k) { struct
Re: [Mesa-dev] [PATCH] radv/winsys: replace bo list searchs with a hash table.
On 31 January 2018 at 12:27, Dieter Nützelwrote: > Ping! I'm not sure this was much of a win, and if it makes things worse in some cases, then it needs a lot more investigation, so probably consider it dead for now. Dave. > > Am 11.01.2018 04:53, schrieb Dave Airlie: >> >> From: Dave Airlie >> >> This should make the merging of cmd buffers less CPU intensive, >> note I said *should* :) >> --- >> src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 47 >> --- >> 1 file changed, 20 insertions(+), 27 deletions(-) >> >> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c >> b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c >> index 0ee56f91447..9a39d237ae8 100644 >> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c >> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c >> @@ -33,6 +33,7 @@ >> #include "radv_amdgpu_bo.h" >> #include "sid.h" >> >> +#include "util/hash_table.h" >> >> enum { >> VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 >> @@ -584,6 +585,9 @@ static int radv_amdgpu_create_bo_list(struct >> radv_amdgpu_winsys *ws, >> priorities[0] = 8; >> } >> >> + struct hash_table *ht = _mesa_hash_table_create(NULL, >> _mesa_hash_pointer, >> + >> _mesa_key_pointer_equal); >> + >> for (unsigned i = 0; i < count + !!extra_cs; ++i) { >> struct radv_amdgpu_cs *cs; >> >> @@ -595,50 +599,39 @@ static int radv_amdgpu_create_bo_list(struct >> radv_amdgpu_winsys *ws, >> if (!cs->num_buffers) >> continue; >> >> - if (unique_bo_count == 0) { >> - memcpy(handles, cs->handles, >> cs->num_buffers * sizeof(amdgpu_bo_handle)); >> - memcpy(priorities, cs->priorities, >> cs->num_buffers * sizeof(uint8_t)); >> - unique_bo_count = cs->num_buffers; >> - continue; >> - } >> - int unique_bo_so_far = unique_bo_count; >> for (unsigned j = 0; j < cs->num_buffers; ++j) { >> - bool found = false; >> - for (unsigned k = 0; k < unique_bo_so_far; >> ++k) { >> - if (handles[k] == cs->handles[j]) >> { >> - found = true; >> - priorities[k] = >> MAX2(priorities[k], >> - >> cs->priorities[j]); >> - break; >> - } >> - } >> - if (!found) { >> + struct hash_entry *entry = >> _mesa_hash_table_search(ht, (void >> *)cs->handles[j]); >> + if (!entry) { >> + _mesa_hash_table_insert(ht, (void >> *)cs->handles[j], (void >> *)(uintptr_t)unique_bo_count); >> handles[unique_bo_count] = >> cs->handles[j]; >> priorities[unique_bo_count] = >> cs->priorities[j]; >> ++unique_bo_count; >> + } else { >> + int bo_idx = (uint32_t)(unsigned >> long)entry->data; >> + priorities[bo_idx] = >> MAX2(priorities[bo_idx], >> + >> cs->priorities[j]); >> } >> } >> for (unsigned j = 0; j < cs->num_virtual_buffers; >> ++j) { >> struct radv_amdgpu_winsys_bo *virtual_bo = >> radv_amdgpu_winsys_bo(cs->virtual_buffers[j]); >> for(unsigned k = 0; k < >> virtual_bo->bo_count; ++k) { >> struct radv_amdgpu_winsys_bo *bo = >> virtual_bo->bos[k]; >> - bool found = false; >> - for (unsigned m = 0; m < >> unique_bo_count; ++m) { >> - if (handles[m] == bo->bo) >> { >> - found = true; >> - priorities[m] = >> MAX2(priorities[m], >> - >> cs->virtual_buffer_priorities[j]); >> - break; >> - } >> - } >> - if (!found) { >> + >> + struct hash_entry *entry = >> _mesa_hash_table_search(ht, (void *)bo->bo); >> + if (!entry) { >> + >> _mesa_hash_table_insert(ht, (void
Re: [Mesa-dev] [PATCH] radv/winsys: replace bo list searchs with a hash table.
Ping! Am 11.01.2018 04:53, schrieb Dave Airlie: From: Dave AirlieThis should make the merging of cmd buffers less CPU intensive, note I said *should* :) --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 47 --- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 0ee56f91447..9a39d237ae8 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -33,6 +33,7 @@ #include "radv_amdgpu_bo.h" #include "sid.h" +#include "util/hash_table.h" enum { VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 @@ -584,6 +585,9 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, priorities[0] = 8; } + struct hash_table *ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + for (unsigned i = 0; i < count + !!extra_cs; ++i) { struct radv_amdgpu_cs *cs; @@ -595,50 +599,39 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, if (!cs->num_buffers) continue; - if (unique_bo_count == 0) { -memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle)); -memcpy(priorities, cs->priorities, cs->num_buffers * sizeof(uint8_t)); - unique_bo_count = cs->num_buffers; - continue; - } - int unique_bo_so_far = unique_bo_count; for (unsigned j = 0; j < cs->num_buffers; ++j) { - bool found = false; - for (unsigned k = 0; k < unique_bo_so_far; ++k) { - if (handles[k] == cs->handles[j]) { - found = true; - priorities[k] = MAX2(priorities[k], - cs->priorities[j]); - break; - } - } - if (!found) { + struct hash_entry *entry = _mesa_hash_table_search(ht, (void *)cs->handles[j]); + if (!entry) { + _mesa_hash_table_insert(ht, (void *)cs->handles[j], (void *)(uintptr_t)unique_bo_count); handles[unique_bo_count] = cs->handles[j]; priorities[unique_bo_count] = cs->priorities[j]; ++unique_bo_count; + } else { + int bo_idx = (uint32_t)(unsigned long)entry->data; + priorities[bo_idx] = MAX2(priorities[bo_idx], + cs->priorities[j]); } } for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) { struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]); for(unsigned k = 0; k < virtual_bo->bo_count; ++k) { struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k]; - bool found = false; - for (unsigned m = 0; m < unique_bo_count; ++m) { - if (handles[m] == bo->bo) { - found = true; - priorities[m] = MAX2(priorities[m], - cs->virtual_buffer_priorities[j]); - break; - } - } - if (!found) { + + struct hash_entry *entry = _mesa_hash_table_search(ht, (void *)bo->bo); + if (!entry) { + _mesa_hash_table_insert(ht, (void *)bo->bo, (void *)(uintptr_t)unique_bo_count); handles[unique_bo_count] = bo->bo; priorities[unique_bo_count] = cs->virtual_buffer_priorities[j]; ++unique_bo_count; + }
Re: [Mesa-dev] [PATCH] radv/winsys: replace bo list searchs with a hash table.
Ping! Any progress? Am 11.01.2018 04:53, schrieb Dave Airlie: From: Dave AirlieThis should make the merging of cmd buffers less CPU intensive, note I said *should* :) --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 47 --- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 0ee56f91447..9a39d237ae8 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -33,6 +33,7 @@ #include "radv_amdgpu_bo.h" #include "sid.h" +#include "util/hash_table.h" enum { VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 @@ -584,6 +585,9 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, priorities[0] = 8; } + struct hash_table *ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + for (unsigned i = 0; i < count + !!extra_cs; ++i) { struct radv_amdgpu_cs *cs; @@ -595,50 +599,39 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, if (!cs->num_buffers) continue; - if (unique_bo_count == 0) { -memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle)); -memcpy(priorities, cs->priorities, cs->num_buffers * sizeof(uint8_t)); - unique_bo_count = cs->num_buffers; - continue; - } - int unique_bo_so_far = unique_bo_count; for (unsigned j = 0; j < cs->num_buffers; ++j) { - bool found = false; - for (unsigned k = 0; k < unique_bo_so_far; ++k) { - if (handles[k] == cs->handles[j]) { - found = true; - priorities[k] = MAX2(priorities[k], - cs->priorities[j]); - break; - } - } - if (!found) { + struct hash_entry *entry = _mesa_hash_table_search(ht, (void *)cs->handles[j]); + if (!entry) { + _mesa_hash_table_insert(ht, (void *)cs->handles[j], (void *)(uintptr_t)unique_bo_count); handles[unique_bo_count] = cs->handles[j]; priorities[unique_bo_count] = cs->priorities[j]; ++unique_bo_count; + } else { + int bo_idx = (uint32_t)(unsigned long)entry->data; + priorities[bo_idx] = MAX2(priorities[bo_idx], + cs->priorities[j]); } } for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) { struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]); for(unsigned k = 0; k < virtual_bo->bo_count; ++k) { struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k]; - bool found = false; - for (unsigned m = 0; m < unique_bo_count; ++m) { - if (handles[m] == bo->bo) { - found = true; - priorities[m] = MAX2(priorities[m], - cs->virtual_buffer_priorities[j]); - break; - } - } - if (!found) { + + struct hash_entry *entry = _mesa_hash_table_search(ht, (void *)bo->bo); + if (!entry) { + _mesa_hash_table_insert(ht, (void *)bo->bo, (void *)(uintptr_t)unique_bo_count); handles[unique_bo_count] = bo->bo; priorities[unique_bo_count] = cs->virtual_buffer_priorities[j]; ++unique_bo_count; +
Re: [Mesa-dev] [PATCH] radv/winsys: replace bo list searchs with a hash table.
Tested-by: Dieter NützelThis offer an additional 1-1,5% (~20 fps) boost with 'smoketest'. 6290 presents in 5.00045 seconds (FPS: 1257.89) 6291 presents in 5.00041 seconds (FPS: 1258.1) 6293 presents in 5.00065 seconds (FPS: 1258.44) 6289 presents in 5.0006 seconds (FPS: 1257.65) But _only_ with 'ondemand' governor!!! cpupower frequency-set -g ondemand => GOOD cpupower frequency-set -g performance => BAD cpupower frequency-set -g schedutil => BAD performance: 100-110 fps _lower_ schedutil: 110-140 fps _lower_ Cheers, Dieter Am 11.01.2018 04:53, schrieb Dave Airlie: From: Dave Airlie This should make the merging of cmd buffers less CPU intensive, note I said *should* :) --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 47 --- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 0ee56f91447..9a39d237ae8 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -33,6 +33,7 @@ #include "radv_amdgpu_bo.h" #include "sid.h" +#include "util/hash_table.h" enum { VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 @@ -584,6 +585,9 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, priorities[0] = 8; } + struct hash_table *ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + for (unsigned i = 0; i < count + !!extra_cs; ++i) { struct radv_amdgpu_cs *cs; @@ -595,50 +599,39 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, if (!cs->num_buffers) continue; - if (unique_bo_count == 0) { -memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle)); -memcpy(priorities, cs->priorities, cs->num_buffers * sizeof(uint8_t)); - unique_bo_count = cs->num_buffers; - continue; - } - int unique_bo_so_far = unique_bo_count; for (unsigned j = 0; j < cs->num_buffers; ++j) { - bool found = false; - for (unsigned k = 0; k < unique_bo_so_far; ++k) { - if (handles[k] == cs->handles[j]) { - found = true; - priorities[k] = MAX2(priorities[k], - cs->priorities[j]); - break; - } - } - if (!found) { + struct hash_entry *entry = _mesa_hash_table_search(ht, (void *)cs->handles[j]); + if (!entry) { + _mesa_hash_table_insert(ht, (void *)cs->handles[j], (void *)(uintptr_t)unique_bo_count); handles[unique_bo_count] = cs->handles[j]; priorities[unique_bo_count] = cs->priorities[j]; ++unique_bo_count; + } else { + int bo_idx = (uint32_t)(unsigned long)entry->data; + priorities[bo_idx] = MAX2(priorities[bo_idx], + cs->priorities[j]); } } for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) { struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]); for(unsigned k = 0; k < virtual_bo->bo_count; ++k) { struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k]; - bool found = false; - for (unsigned m = 0; m < unique_bo_count; ++m) { - if (handles[m] == bo->bo) { - found = true; - priorities[m] = MAX2(priorities[m], - cs->virtual_buffer_priorities[j]); - break; - } - } - if (!found) { + + struct
Re: [Mesa-dev] [PATCH] radv/winsys: replace bo list searchs with a hash table.
mmh, last time I tried to use a hash table for the amdgpu winsys of RadeonSI it was *worse*, so not sure if that really helps. Though, I don't have any numbers to show you because it was a while ago. On 01/11/2018 04:53 AM, Dave Airlie wrote: From: Dave AirlieThis should make the merging of cmd buffers less CPU intensive, note I said *should* :) --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 47 --- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 0ee56f91447..9a39d237ae8 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -33,6 +33,7 @@ #include "radv_amdgpu_bo.h" #include "sid.h" +#include "util/hash_table.h" enum { VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 @@ -584,6 +585,9 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, priorities[0] = 8; } + struct hash_table *ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + for (unsigned i = 0; i < count + !!extra_cs; ++i) { struct radv_amdgpu_cs *cs; @@ -595,50 +599,39 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, if (!cs->num_buffers) continue; - if (unique_bo_count == 0) { - memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle)); - memcpy(priorities, cs->priorities, cs->num_buffers * sizeof(uint8_t)); - unique_bo_count = cs->num_buffers; - continue; - } - int unique_bo_so_far = unique_bo_count; for (unsigned j = 0; j < cs->num_buffers; ++j) { - bool found = false; - for (unsigned k = 0; k < unique_bo_so_far; ++k) { - if (handles[k] == cs->handles[j]) { - found = true; - priorities[k] = MAX2(priorities[k], - cs->priorities[j]); - break; - } - } - if (!found) { + struct hash_entry *entry = _mesa_hash_table_search(ht, (void *)cs->handles[j]); + if (!entry) { + _mesa_hash_table_insert(ht, (void *)cs->handles[j], (void *)(uintptr_t)unique_bo_count); handles[unique_bo_count] = cs->handles[j]; priorities[unique_bo_count] = cs->priorities[j]; ++unique_bo_count; + } else { + int bo_idx = (uint32_t)(unsigned long)entry->data; + priorities[bo_idx] = MAX2(priorities[bo_idx], + cs->priorities[j]); } } for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) { struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]); for(unsigned k = 0; k < virtual_bo->bo_count; ++k) { struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k]; - bool found = false; - for (unsigned m = 0; m < unique_bo_count; ++m) { - if (handles[m] == bo->bo) { - found = true; - priorities[m] = MAX2(priorities[m], - cs->virtual_buffer_priorities[j]); - break; - } - } - if (!found) { + + struct hash_entry *entry = _mesa_hash_table_search(ht, (void *)bo->bo); + if (!entry) { + _mesa_hash_table_insert(ht, (void *)bo->bo, (void *)(uintptr_t)unique_bo_count);
[Mesa-dev] [PATCH] radv/winsys: replace bo list searchs with a hash table.
From: Dave AirlieThis should make the merging of cmd buffers less CPU intensive, note I said *should* :) --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 47 --- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 0ee56f91447..9a39d237ae8 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -33,6 +33,7 @@ #include "radv_amdgpu_bo.h" #include "sid.h" +#include "util/hash_table.h" enum { VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 @@ -584,6 +585,9 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, priorities[0] = 8; } + struct hash_table *ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + for (unsigned i = 0; i < count + !!extra_cs; ++i) { struct radv_amdgpu_cs *cs; @@ -595,50 +599,39 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, if (!cs->num_buffers) continue; - if (unique_bo_count == 0) { - memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle)); - memcpy(priorities, cs->priorities, cs->num_buffers * sizeof(uint8_t)); - unique_bo_count = cs->num_buffers; - continue; - } - int unique_bo_so_far = unique_bo_count; for (unsigned j = 0; j < cs->num_buffers; ++j) { - bool found = false; - for (unsigned k = 0; k < unique_bo_so_far; ++k) { - if (handles[k] == cs->handles[j]) { - found = true; - priorities[k] = MAX2(priorities[k], - cs->priorities[j]); - break; - } - } - if (!found) { + struct hash_entry *entry = _mesa_hash_table_search(ht, (void *)cs->handles[j]); + if (!entry) { + _mesa_hash_table_insert(ht, (void *)cs->handles[j], (void *)(uintptr_t)unique_bo_count); handles[unique_bo_count] = cs->handles[j]; priorities[unique_bo_count] = cs->priorities[j]; ++unique_bo_count; + } else { + int bo_idx = (uint32_t)(unsigned long)entry->data; + priorities[bo_idx] = MAX2(priorities[bo_idx], + cs->priorities[j]); } } for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) { struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]); for(unsigned k = 0; k < virtual_bo->bo_count; ++k) { struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k]; - bool found = false; - for (unsigned m = 0; m < unique_bo_count; ++m) { - if (handles[m] == bo->bo) { - found = true; - priorities[m] = MAX2(priorities[m], - cs->virtual_buffer_priorities[j]); - break; - } - } - if (!found) { + + struct hash_entry *entry = _mesa_hash_table_search(ht, (void *)bo->bo); + if (!entry) { + _mesa_hash_table_insert(ht, (void *)bo->bo, (void *)(uintptr_t)unique_bo_count); handles[unique_bo_count] = bo->bo; priorities[unique_bo_count] = cs->virtual_buffer_priorities[j];