There is a potential race between the __sync_fetch_and_add and the futex_wake where another thread could come in and start waiting. If we hit this case, the other thread will never get woken back up because the futex_wake doesn't get called. We can fix this by calling futex_wake unconditionally.
There was another potential bug because __sync_fetch_and_add does not guarantee that previous writes are globally visible. In particular, the updates to the pool caused by growing it may not be visible. If memory writes from the growing thread happen out-of-order, this could cause a waiting thread to come in and try to pull a block before the grow has completed. Now that we are no longer predicating the futex_wake, we no longer need the result of the __sync_fetch_and_add and it can be replaced with a __sync_synchronize and a regular 64-bit write. --- src/intel/vulkan/anv_allocator.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index fe14d6c..a64ebd0 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -546,7 +546,7 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool, struct anv_block_state *pool_state, uint32_t block_size) { - struct anv_block_state state, old, new; + struct anv_block_state state, new; while (1) { state.u64 = __sync_fetch_and_add(&pool_state->u64, block_size); @@ -564,9 +564,9 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool, new.end = anv_block_pool_grow(pool, pool_state); } while (new.end < new.next); - old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64); - if (old.next != state.next) - futex_wake(&pool_state->end, INT_MAX); + __sync_synchronize(); + pool_state->u64 = new.u64; + futex_wake(&pool_state->end, INT_MAX); return state.next; } else { futex_wait(&pool_state->end, state.end, NULL); @@ -645,7 +645,7 @@ anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool, uint32_t state_size, uint32_t block_size) { - struct anv_block_state block, old, new; + struct anv_block_state block, new; uint32_t offset; /* If our state is large, we don't need any sub-allocation from a block. @@ -663,9 +663,10 @@ anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool, offset = anv_block_pool_alloc(block_pool, block_size); new.next = offset + state_size; new.end = offset + block_size; - old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64); - if (old.next != block.next) - futex_wake(&pool->block.end, INT_MAX); + + __sync_synchronize(); + pool->block.u64 = new.u64; + futex_wake(&pool->block.end, INT_MAX); return offset; } else { futex_wait(&pool->block.end, block.end, NULL); -- 2.5.0.400.gff86faf _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev