Module: Mesa
Branch: main
Commit: 83a5fb9fafd0f740dbee1a204720719f150a2427
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=83a5fb9fafd0f740dbee1a204720719f150a2427

Author: Yonggang Luo <luoyongg...@gmail.com>
Date:   Wed Aug  9 14:02:42 2023 +0800

util: Fixes note: the alignment of ‘_Atomic long long int’ fields changed in 
GCC 11.

This is a improve of 
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22121

Signed-off-by: Yonggang Luo <luoyongg...@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23961>

---

 src/amd/vulkan/radv_query.c                       | 34 +++++++++++------------
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h |  6 ++--
 src/util/disk_cache.c                             |  2 +-
 src/util/disk_cache_os.c                          | 10 +++----
 src/util/u_atomic.h                               | 24 ++++++++++++----
 5 files changed, 45 insertions(+), 31 deletions(-)

diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index 26aaaec5a5d..5f3a7ffdf67 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1359,7 +1359,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool 
queryPool, uint32_t first
          uint64_t value;
 
          do {
-            value = p_atomic_read(src64);
+            value = p_atomic_read(&src64->value);
          } while (value == TIMESTAMP_NOT_READY && (flags & 
VK_QUERY_RESULT_WAIT_BIT));
 
          available = value != TIMESTAMP_NOT_READY;
@@ -1392,8 +1392,8 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool 
queryPool, uint32_t first
                continue;
 
             do {
-               start = p_atomic_read(src64 + 2 * i);
-               end = p_atomic_read(src64 + 2 * i + 1);
+               start = p_atomic_read(&src64[2 * i].value);
+               end = p_atomic_read(&src64[2 * i + 1].value);
             } while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && 
(flags & VK_QUERY_RESULT_WAIT_BIT));
 
             if (!(start & (1ull << 63)) || !(end & (1ull << 63)))
@@ -1481,7 +1481,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool 
queryPool, uint32_t first
          do {
             available = 1;
             for (int j = 0; j < 4; j++) {
-               if (!(p_atomic_read(src64 + j) & 0x8000000000000000UL))
+               if (!(p_atomic_read(&src64[j].value) & 0x8000000000000000UL))
                   available = 0;
             }
          } while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT));
@@ -1489,8 +1489,8 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool 
queryPool, uint32_t first
          if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
             result = VK_NOT_READY;
 
-         num_primitives_written = src64[3] - src64[1];
-         primitive_storage_needed = src64[2] - src64[0];
+         num_primitives_written = p_atomic_read_relaxed(&src64[3].value) - 
p_atomic_read_relaxed(&src64[1].value);
+         primitive_storage_needed = p_atomic_read_relaxed(&src64[2].value) - 
p_atomic_read_relaxed(&src64[0].value);
 
          if (flags & VK_QUERY_RESULT_64_BIT) {
             if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
@@ -1522,12 +1522,12 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool 
queryPool, uint32_t first
           */
          do {
             available = 1;
-            if (!(p_atomic_read(src64 + 0) & 0x8000000000000000UL) ||
-                !(p_atomic_read(src64 + 2) & 0x8000000000000000UL)) {
+            if (!(p_atomic_read(&src64[0].value) & 0x8000000000000000UL) ||
+                !(p_atomic_read(&src64[2].value) & 0x8000000000000000UL)) {
                available = 0;
             }
-            if (uses_gds_query && (!(p_atomic_read(src64 + 4) & 
0x8000000000000000UL) ||
-                                   !(p_atomic_read(src64 + 5) & 
0x8000000000000000UL))) {
+            if (uses_gds_query && (!(p_atomic_read(&src64[4].value) & 
0x8000000000000000UL) ||
+                                   !(p_atomic_read(&src64[5].value) & 
0x8000000000000000UL))) {
                available = 0;
             }
          } while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT));
@@ -1535,11 +1535,11 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool 
queryPool, uint32_t first
          if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
             result = VK_NOT_READY;
 
-         primitive_storage_needed = src64[2] - src64[0];
+         primitive_storage_needed = p_atomic_read_relaxed(&src64[2].value) - 
p_atomic_read_relaxed(&src64[0].value);
 
          if (uses_gds_query) {
             /* Accumulate the result that was copied from GDS in case NGG 
shader has been used. */
-            primitive_storage_needed += src64[5] - src64[4];
+            primitive_storage_needed += p_atomic_read_relaxed(&src64[5].value) 
- p_atomic_read_relaxed(&src64[4].value);
          }
 
          if (flags & VK_QUERY_RESULT_64_BIT) {
@@ -1560,13 +1560,13 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool 
queryPool, uint32_t first
          do {
             avail = true;
             for (unsigned i = 0; i < pc_pool->num_passes; ++i)
-               if (!p_atomic_read(src64 + pool->stride / 8 - i - 1))
+               if (!p_atomic_read(&src64[pool->stride / 8 - i - 1].value))
                   avail = false;
          } while (!avail && (flags & VK_QUERY_RESULT_WAIT_BIT));
 
          available = avail;
 
-         radv_pc_get_results(pc_pool, src64, dest);
+         radv_pc_get_results(pc_pool, &src64->value, dest);
          dest += pc_pool->num_counters * sizeof(union 
VkPerformanceCounterResultKHR);
          break;
       }
@@ -1576,8 +1576,8 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool 
queryPool, uint32_t first
 
          do {
             available = 1;
-            if (!(p_atomic_read(src64 + 0) & 0x8000000000000000UL) ||
-                !(p_atomic_read(src64 + 1) & 0x8000000000000000UL)) {
+            if (!(p_atomic_read(&src64[0].value) & 0x8000000000000000UL) ||
+                !(p_atomic_read(&src64[1].value) & 0x8000000000000000UL)) {
                available = 0;
             }
          } while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT));
@@ -1585,7 +1585,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool 
queryPool, uint32_t first
          if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
             result = VK_NOT_READY;
 
-         ms_prim_gen = src64[1] - src64[0];
+         ms_prim_gen = p_atomic_read_relaxed(&src64[1].value) - 
p_atomic_read_relaxed(&src64[0].value);
 
          if (flags & VK_QUERY_RESULT_64_BIT) {
             if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
index 5ebe34129ba..01afc129812 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
@@ -52,9 +52,9 @@ struct radv_amdgpu_winsys {
    bool reserve_vmid;
    uint64_t perftest;
 
-   p_atomic_uint64_t allocated_vram;
-   p_atomic_uint64_t allocated_vram_vis;
-   p_atomic_uint64_t allocated_gtt;
+   alignas(8) uint64_t allocated_vram;
+   alignas(8) uint64_t allocated_vram_vis;
+   alignas(8) uint64_t allocated_gtt;
 
    /* Global BO list */
    struct {
diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 1d23b92af7e..aac052bf9eb 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -442,7 +442,7 @@ cache_put(void *job, void *gdata, int thread_index)
          goto done;
 
       /* If the cache is too large, evict something else first. */
-      while (*dc_job->cache->size + dc_job->size > dc_job->cache->max_size &&
+      while (p_atomic_read_relaxed(&dc_job->cache->size->value) + dc_job->size 
> dc_job->cache->max_size &&
              i < 8) {
          disk_cache_evict_lru_item(dc_job->cache);
          i++;
diff --git a/src/util/disk_cache_os.c b/src/util/disk_cache_os.c
index 20467785091..3ec1acda56e 100644
--- a/src/util/disk_cache_os.c
+++ b/src/util/disk_cache_os.c
@@ -456,7 +456,7 @@ disk_cache_evict_lru_item(struct disk_cache *cache)
    free(dir_path);
 
    if (size) {
-      p_atomic_add(cache->size, - (uint64_t)size);
+      p_atomic_add(&cache->size->value, - (uint64_t)size);
       return;
    }
 
@@ -483,7 +483,7 @@ disk_cache_evict_lru_item(struct disk_cache *cache)
    free_lru_file_list(lru_file_list);
 
    if (size)
-      p_atomic_add(cache->size, - (uint64_t)size);
+      p_atomic_add(&cache->size->value, - (uint64_t)size);
 }
 
 void
@@ -499,7 +499,7 @@ disk_cache_evict_item(struct disk_cache *cache, char 
*filename)
    free(filename);
 
    if (sb.st_blocks)
-      p_atomic_add(cache->size, - (uint64_t)sb.st_blocks * 512);
+      p_atomic_add(&cache->size->value, - (uint64_t)sb.st_blocks * 512);
 }
 
 static void *
@@ -819,7 +819,7 @@ disk_cache_write_item_to_disk(struct disk_cache_put_job 
*dc_job,
       goto done;
    }
 
-   p_atomic_add(dc_job->cache->size, sb.st_blocks * 512);
+   p_atomic_add(&dc_job->cache->size->value, sb.st_blocks * 512);
 
  done:
    if (fd_final != -1)
@@ -1062,7 +1062,7 @@ disk_cache_mmap_cache_index(void *mem_ctx, struct 
disk_cache *cache,
       goto path_fail;
    cache->index_mmap_size = size;
 
-   cache->size = (uint64_t *) cache->index_mmap;
+   cache->size = (p_atomic_uint64_t *) cache->index_mmap;
    cache->stored_keys = cache->index_mmap + sizeof(uint64_t);
    mapped = true;
 
diff --git a/src/util/u_atomic.h b/src/util/u_atomic.h
index ec77bb4d39c..6d604a6de04 100644
--- a/src/util/u_atomic.h
+++ b/src/util/u_atomic.h
@@ -343,13 +343,27 @@ static inline uint64_t p_atomic_xchg_64(uint64_t *v, 
uint64_t i)
 /* On x86 we can have sizeof(uint64_t) = 8 and _Alignof(uint64_t) = 4. causing 
split locks. The
  * implementation does handle that correctly, but with an internal mutex. 
Extend the alignment to
  * avoid this.
+ * `p_atomic_int64_t` and `p_atomic_uint64_t` are used for casting any pointer 
to
+ * `p_atomic_int64_t *` and `p_atomic_uint64_t *`. That's for telling the 
compiler is accessing
+ * the 64 bits atomic in 8 byte aligned way to avoid clang `misaligned atomic 
operation` warning.
+ * To define 64 bits atomic memeber in struct type,
+ * use `alignas(8) int64_t $member` or `alignas(8) uint64_t $member` is enough.
  */
-#if  __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__) && 
defined(USE_GCC_ATOMIC_BUILTINS)
-typedef int64_t __attribute__((aligned(_Alignof(_Atomic(int64_t))))) 
p_atomic_int64_t;
-typedef uint64_t __attribute__((aligned(_Alignof(_Atomic(uint64_t))))) 
p_atomic_uint64_t;
+typedef struct {
+#ifndef __cplusplus
+   _Alignas(8)
 #else
-typedef int64_t p_atomic_int64_t;
-typedef uint64_t p_atomic_uint64_t;
+   alignas(8)
 #endif
+   int64_t value;
+} p_atomic_int64_t;
+typedef struct {
+#ifndef __cplusplus
+   _Alignas(8)
+#else
+   alignas(8)
+#endif
+   uint64_t value;
+} p_atomic_uint64_t;
 
 #endif /* U_ATOMIC_H */

Reply via email to