Module: Mesa Branch: main Commit: 83a5fb9fafd0f740dbee1a204720719f150a2427 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=83a5fb9fafd0f740dbee1a204720719f150a2427
Author: Yonggang Luo <luoyongg...@gmail.com> Date: Wed Aug 9 14:02:42 2023 +0800 util: Fixes note: the alignment of ‘_Atomic long long int’ fields changed in GCC 11. This is a improve of https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22121 Signed-off-by: Yonggang Luo <luoyongg...@gmail.com> Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23961> --- src/amd/vulkan/radv_query.c | 34 +++++++++++------------ src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h | 6 ++-- src/util/disk_cache.c | 2 +- src/util/disk_cache_os.c | 10 +++---- src/util/u_atomic.h | 24 ++++++++++++---- 5 files changed, 45 insertions(+), 31 deletions(-) diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 26aaaec5a5d..5f3a7ffdf67 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1359,7 +1359,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first uint64_t value; do { - value = p_atomic_read(src64); + value = p_atomic_read(&src64->value); } while (value == TIMESTAMP_NOT_READY && (flags & VK_QUERY_RESULT_WAIT_BIT)); available = value != TIMESTAMP_NOT_READY; @@ -1392,8 +1392,8 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first continue; do { - start = p_atomic_read(src64 + 2 * i); - end = p_atomic_read(src64 + 2 * i + 1); + start = p_atomic_read(&src64[2 * i].value); + end = p_atomic_read(&src64[2 * i + 1].value); } while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && (flags & VK_QUERY_RESULT_WAIT_BIT)); if (!(start & (1ull << 63)) || !(end & (1ull << 63))) @@ -1481,7 +1481,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first do { available = 1; for (int j = 0; j < 4; j++) { - if (!(p_atomic_read(src64 + j) & 0x8000000000000000UL)) + if (!(p_atomic_read(&src64[j].value) & 0x8000000000000000UL)) available = 0; } } while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)); @@ -1489,8 +1489,8 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) result = VK_NOT_READY; - num_primitives_written = src64[3] - src64[1]; - primitive_storage_needed = src64[2] - src64[0]; + num_primitives_written = p_atomic_read_relaxed(&src64[3].value) - p_atomic_read_relaxed(&src64[1].value); + primitive_storage_needed = p_atomic_read_relaxed(&src64[2].value) - p_atomic_read_relaxed(&src64[0].value); if (flags & VK_QUERY_RESULT_64_BIT) { if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) @@ -1522,12 +1522,12 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first */ do { available = 1; - if (!(p_atomic_read(src64 + 0) & 0x8000000000000000UL) || - !(p_atomic_read(src64 + 2) & 0x8000000000000000UL)) { + if (!(p_atomic_read(&src64[0].value) & 0x8000000000000000UL) || + !(p_atomic_read(&src64[2].value) & 0x8000000000000000UL)) { available = 0; } - if (uses_gds_query && (!(p_atomic_read(src64 + 4) & 0x8000000000000000UL) || - !(p_atomic_read(src64 + 5) & 0x8000000000000000UL))) { + if (uses_gds_query && (!(p_atomic_read(&src64[4].value) & 0x8000000000000000UL) || + !(p_atomic_read(&src64[5].value) & 0x8000000000000000UL))) { available = 0; } } while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)); @@ -1535,11 +1535,11 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) result = VK_NOT_READY; - primitive_storage_needed = src64[2] - src64[0]; + primitive_storage_needed = p_atomic_read_relaxed(&src64[2].value) - p_atomic_read_relaxed(&src64[0].value); if (uses_gds_query) { /* Accumulate the result that was copied from GDS in case NGG shader has been used. */ - primitive_storage_needed += src64[5] - src64[4]; + primitive_storage_needed += p_atomic_read_relaxed(&src64[5].value) - p_atomic_read_relaxed(&src64[4].value); } if (flags & VK_QUERY_RESULT_64_BIT) { @@ -1560,13 +1560,13 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first do { avail = true; for (unsigned i = 0; i < pc_pool->num_passes; ++i) - if (!p_atomic_read(src64 + pool->stride / 8 - i - 1)) + if (!p_atomic_read(&src64[pool->stride / 8 - i - 1].value)) avail = false; } while (!avail && (flags & VK_QUERY_RESULT_WAIT_BIT)); available = avail; - radv_pc_get_results(pc_pool, src64, dest); + radv_pc_get_results(pc_pool, &src64->value, dest); dest += pc_pool->num_counters * sizeof(union VkPerformanceCounterResultKHR); break; } @@ -1576,8 +1576,8 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first do { available = 1; - if (!(p_atomic_read(src64 + 0) & 0x8000000000000000UL) || - !(p_atomic_read(src64 + 1) & 0x8000000000000000UL)) { + if (!(p_atomic_read(&src64[0].value) & 0x8000000000000000UL) || + !(p_atomic_read(&src64[1].value) & 0x8000000000000000UL)) { available = 0; } } while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)); @@ -1585,7 +1585,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) result = VK_NOT_READY; - ms_prim_gen = src64[1] - src64[0]; + ms_prim_gen = p_atomic_read_relaxed(&src64[1].value) - p_atomic_read_relaxed(&src64[0].value); if (flags & VK_QUERY_RESULT_64_BIT) { if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h index 5ebe34129ba..01afc129812 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h @@ -52,9 +52,9 @@ struct radv_amdgpu_winsys { bool reserve_vmid; uint64_t perftest; - p_atomic_uint64_t allocated_vram; - p_atomic_uint64_t allocated_vram_vis; - p_atomic_uint64_t allocated_gtt; + alignas(8) uint64_t allocated_vram; + alignas(8) uint64_t allocated_vram_vis; + alignas(8) uint64_t allocated_gtt; /* Global BO list */ struct { diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c index 1d23b92af7e..aac052bf9eb 100644 --- a/src/util/disk_cache.c +++ b/src/util/disk_cache.c @@ -442,7 +442,7 @@ cache_put(void *job, void *gdata, int thread_index) goto done; /* If the cache is too large, evict something else first. */ - while (*dc_job->cache->size + dc_job->size > dc_job->cache->max_size && + while (p_atomic_read_relaxed(&dc_job->cache->size->value) + dc_job->size > dc_job->cache->max_size && i < 8) { disk_cache_evict_lru_item(dc_job->cache); i++; diff --git a/src/util/disk_cache_os.c b/src/util/disk_cache_os.c index 20467785091..3ec1acda56e 100644 --- a/src/util/disk_cache_os.c +++ b/src/util/disk_cache_os.c @@ -456,7 +456,7 @@ disk_cache_evict_lru_item(struct disk_cache *cache) free(dir_path); if (size) { - p_atomic_add(cache->size, - (uint64_t)size); + p_atomic_add(&cache->size->value, - (uint64_t)size); return; } @@ -483,7 +483,7 @@ disk_cache_evict_lru_item(struct disk_cache *cache) free_lru_file_list(lru_file_list); if (size) - p_atomic_add(cache->size, - (uint64_t)size); + p_atomic_add(&cache->size->value, - (uint64_t)size); } void @@ -499,7 +499,7 @@ disk_cache_evict_item(struct disk_cache *cache, char *filename) free(filename); if (sb.st_blocks) - p_atomic_add(cache->size, - (uint64_t)sb.st_blocks * 512); + p_atomic_add(&cache->size->value, - (uint64_t)sb.st_blocks * 512); } static void * @@ -819,7 +819,7 @@ disk_cache_write_item_to_disk(struct disk_cache_put_job *dc_job, goto done; } - p_atomic_add(dc_job->cache->size, sb.st_blocks * 512); + p_atomic_add(&dc_job->cache->size->value, sb.st_blocks * 512); done: if (fd_final != -1) @@ -1062,7 +1062,7 @@ disk_cache_mmap_cache_index(void *mem_ctx, struct disk_cache *cache, goto path_fail; cache->index_mmap_size = size; - cache->size = (uint64_t *) cache->index_mmap; + cache->size = (p_atomic_uint64_t *) cache->index_mmap; cache->stored_keys = cache->index_mmap + sizeof(uint64_t); mapped = true; diff --git a/src/util/u_atomic.h b/src/util/u_atomic.h index ec77bb4d39c..6d604a6de04 100644 --- a/src/util/u_atomic.h +++ b/src/util/u_atomic.h @@ -343,13 +343,27 @@ static inline uint64_t p_atomic_xchg_64(uint64_t *v, uint64_t i) /* On x86 we can have sizeof(uint64_t) = 8 and _Alignof(uint64_t) = 4. causing split locks. The * implementation does handle that correctly, but with an internal mutex. Extend the alignment to * avoid this. + * `p_atomic_int64_t` and `p_atomic_uint64_t` are used for casting any pointer to + * `p_atomic_int64_t *` and `p_atomic_uint64_t *`. That's for telling the compiler is accessing + * the 64 bits atomic in 8 byte aligned way to avoid clang `misaligned atomic operation` warning. + * To define 64 bits atomic memeber in struct type, + * use `alignas(8) int64_t $member` or `alignas(8) uint64_t $member` is enough. */ -#if __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__) && defined(USE_GCC_ATOMIC_BUILTINS) -typedef int64_t __attribute__((aligned(_Alignof(_Atomic(int64_t))))) p_atomic_int64_t; -typedef uint64_t __attribute__((aligned(_Alignof(_Atomic(uint64_t))))) p_atomic_uint64_t; +typedef struct { +#ifndef __cplusplus + _Alignas(8) #else -typedef int64_t p_atomic_int64_t; -typedef uint64_t p_atomic_uint64_t; + alignas(8) #endif + int64_t value; +} p_atomic_int64_t; +typedef struct { +#ifndef __cplusplus + _Alignas(8) +#else + alignas(8) +#endif + uint64_t value; +} p_atomic_uint64_t; #endif /* U_ATOMIC_H */