Module: Mesa Branch: main Commit: 811f332d81e943b09b055cb6f55eb560d8318bdd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=811f332d81e943b09b055cb6f55eb560d8318bdd
Author: Rob Clark <[email protected]> Date: Wed Jun 21 13:39:56 2023 -0700 tu/drm: Factor out shared helpers Factor out a few things that we can re-use between virtio and msm backends. Signed-off-by: Rob Clark <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23533> --- src/freedreno/vulkan/meson.build | 2 +- src/freedreno/vulkan/tu_knl_drm.cc | 387 ++++++++++++++++++++++++++++++ src/freedreno/vulkan/tu_knl_drm.h | 64 +++++ src/freedreno/vulkan/tu_knl_drm_msm.cc | 414 +-------------------------------- 4 files changed, 456 insertions(+), 411 deletions(-) diff --git a/src/freedreno/vulkan/meson.build b/src/freedreno/vulkan/meson.build index aeb78455a56..298d81f5a4b 100644 --- a/src/freedreno/vulkan/meson.build +++ b/src/freedreno/vulkan/meson.build @@ -78,7 +78,7 @@ endif if freedreno_kmds.contains('msm') tu_flags += '-DTU_HAS_MSM' - libtu_files += files('tu_knl_drm_msm.cc') + libtu_files += files('tu_knl_drm_msm.cc', 'tu_knl_drm.cc') tu_deps += dep_libdrm endif diff --git a/src/freedreno/vulkan/tu_knl_drm.cc b/src/freedreno/vulkan/tu_knl_drm.cc new file mode 100644 index 00000000000..1450feadf7b --- /dev/null +++ b/src/freedreno/vulkan/tu_knl_drm.cc @@ -0,0 +1,387 @@ +/* + * Copyright © 2018 Google, Inc. + * Copyright © 2015 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include <fcntl.h> +#include <sys/mman.h> +#include <xf86drm.h> + +#include "tu_knl_drm.h" +#include "tu_device.h" + +static inline void +tu_sync_cacheline_to_gpu(void const *p __attribute__((unused))) +{ +#if DETECT_ARCH_AARCH64 + /* Clean data cache. */ + __asm volatile("dc cvac, %0" : : "r" (p) : "memory"); +#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64) + __builtin_ia32_clflush(p); +#elif DETECT_ARCH_ARM + /* DCCMVAC - same as DC CVAC on aarch64. + * Seems to be illegal to call from userspace. + */ + //__asm volatile("mcr p15, 0, %0, c7, c10, 1" : : "r" (p) : "memory"); + unreachable("Cache line clean is unsupported on ARMv7"); +#endif +} + +static inline void +tu_sync_cacheline_from_gpu(void const *p __attribute__((unused))) +{ +#if DETECT_ARCH_AARCH64 + /* Clean and Invalidate data cache, there is no separate Invalidate. */ + __asm volatile("dc civac, %0" : : "r" (p) : "memory"); +#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64) + __builtin_ia32_clflush(p); +#elif DETECT_ARCH_ARM + /* DCCIMVAC - same as DC CIVAC on aarch64. + * Seems to be illegal to call from userspace. + */ + //__asm volatile("mcr p15, 0, %0, c7, c14, 1" : : "r" (p) : "memory"); + unreachable("Cache line invalidate is unsupported on ARMv7"); +#endif +} + +void +tu_sync_cache_bo(struct tu_device *dev, + struct tu_bo *bo, + VkDeviceSize offset, + VkDeviceSize size, + enum tu_mem_sync_op op) +{ + uintptr_t level1_dcache_size = dev->physical_device->level1_dcache_size; + char *start = (char *) bo->map + offset; + char *end = start + (size == VK_WHOLE_SIZE ? (bo->size - offset) : size); + + start = (char *) ((uintptr_t) start & ~(level1_dcache_size - 1)); + + for (; start < end; start += level1_dcache_size) { + if (op == TU_MEM_SYNC_CACHE_TO_GPU) { + tu_sync_cacheline_to_gpu(start); + } else { + tu_sync_cacheline_from_gpu(start); + } + } +} + +static VkResult +sync_cache(VkDevice _device, + enum tu_mem_sync_op op, + uint32_t count, + const VkMappedMemoryRange *ranges) +{ + TU_FROM_HANDLE(tu_device, device, _device); + + if (!device->physical_device->has_cached_non_coherent_memory) { + tu_finishme( + "data cache clean and invalidation are unsupported on this arch!"); + return VK_SUCCESS; + } + + for (uint32_t i = 0; i < count; i++) { + TU_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory); + tu_sync_cache_bo(device, mem->bo, ranges[i].offset, ranges[i].size, op); + } + + return VK_SUCCESS; +} + +VkResult +tu_FlushMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount, + pMemoryRanges); +} + +VkResult +tu_InvalidateMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount, + pMemoryRanges); +} + +int +tu_drm_export_dmabuf(struct tu_device *dev, struct tu_bo *bo) +{ + int prime_fd; + int ret = drmPrimeHandleToFD(dev->fd, bo->gem_handle, + DRM_CLOEXEC | DRM_RDWR, &prime_fd); + + return ret == 0 ? prime_fd : -1; +} + +void +tu_drm_bo_finish(struct tu_device *dev, struct tu_bo *bo) +{ + assert(bo->gem_handle); + + u_rwlock_rdlock(&dev->dma_bo_lock); + + if (!p_atomic_dec_zero(&bo->refcnt)) { + u_rwlock_rdunlock(&dev->dma_bo_lock); + return; + } + + if (bo->map) + munmap(bo->map, bo->size); + + tu_debug_bos_del(dev, bo); + + mtx_lock(&dev->bo_mutex); + dev->bo_count--; + dev->bo_list[bo->bo_list_idx] = dev->bo_list[dev->bo_count]; + + struct tu_bo* exchanging_bo = tu_device_lookup_bo(dev, dev->bo_list[bo->bo_list_idx].handle); + exchanging_bo->bo_list_idx = bo->bo_list_idx; + + if (bo->implicit_sync) + dev->implicit_sync_bo_count--; + + mtx_unlock(&dev->bo_mutex); + + if (dev->physical_device->has_set_iova) { + mtx_lock(&dev->vma_mutex); + struct tu_zombie_vma *vma = (struct tu_zombie_vma *) + u_vector_add(&dev->zombie_vmas); + vma->gem_handle = bo->gem_handle; +#ifdef TU_HAS_VIRTIO + vma->res_id = bo->res_id; +#endif + vma->iova = bo->iova; + vma->size = bo->size; + vma->fence = p_atomic_read(&dev->queues[0]->fence); + + /* Must be cleared under the VMA mutex, or another thread could race to + * reap the VMA, closing the BO and letting a new GEM allocation produce + * this handle again. + */ + memset(bo, 0, sizeof(*bo)); + mtx_unlock(&dev->vma_mutex); + } else { + /* Our BO structs are stored in a sparse array in the physical device, + * so we don't want to free the BO pointer, instead we want to reset it + * to 0, to signal that array entry as being free. + */ + uint32_t gem_handle = bo->gem_handle; + memset(bo, 0, sizeof(*bo)); + + /* Note that virtgpu GEM_CLOSE path is a bit different, but it does + * not use the !has_set_iova path so we can ignore that + */ + struct drm_gem_close req = { + .handle = gem_handle, + }; + + drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req); + } + + u_rwlock_rdunlock(&dev->dma_bo_lock); +} + +uint32_t +tu_syncobj_from_vk_sync(struct vk_sync *sync) +{ + uint32_t syncobj = -1; + if (vk_sync_is_tu_timeline_sync(sync)) { + syncobj = to_tu_timeline_sync(sync)->syncobj; + } else if (vk_sync_type_is_drm_syncobj(sync->type)) { + syncobj = vk_sync_as_drm_syncobj(sync)->syncobj; + } + + assert(syncobj != -1); + + return syncobj; +} + +static VkResult +tu_timeline_sync_init(struct vk_device *vk_device, + struct vk_sync *vk_sync, + uint64_t initial_value) +{ + struct tu_device *device = container_of(vk_device, struct tu_device, vk); + struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync); + uint32_t flags = 0; + + assert(device->fd >= 0); + + int err = drmSyncobjCreate(device->fd, flags, &sync->syncobj); + + if (err < 0) { + return vk_error(device, VK_ERROR_DEVICE_LOST); + } + + sync->state = initial_value ? TU_TIMELINE_SYNC_STATE_SIGNALED : + TU_TIMELINE_SYNC_STATE_RESET; + + return VK_SUCCESS; +} + +static void +tu_timeline_sync_finish(struct vk_device *vk_device, + struct vk_sync *vk_sync) +{ + struct tu_device *dev = container_of(vk_device, struct tu_device, vk); + struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync); + + assert(dev->fd >= 0); + ASSERTED int err = drmSyncobjDestroy(dev->fd, sync->syncobj); + assert(err == 0); +} + +static VkResult +tu_timeline_sync_reset(struct vk_device *vk_device, + struct vk_sync *vk_sync) +{ + struct tu_device *dev = container_of(vk_device, struct tu_device, vk); + struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync); + + int err = drmSyncobjReset(dev->fd, &sync->syncobj, 1); + if (err) { + return vk_errorf(dev, VK_ERROR_UNKNOWN, + "DRM_IOCTL_SYNCOBJ_RESET failed: %m"); + } else { + sync->state = TU_TIMELINE_SYNC_STATE_RESET; + } + + return VK_SUCCESS; +} + +static VkResult +drm_syncobj_wait(struct tu_device *device, + uint32_t *handles, uint32_t count_handles, + uint64_t timeout_nsec, bool wait_all) +{ + uint32_t syncobj_wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT; + if (wait_all) syncobj_wait_flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL; + + /* syncobj absolute timeouts are signed. clamp OS_TIMEOUT_INFINITE down. */ + timeout_nsec = MIN2(timeout_nsec, (uint64_t)INT64_MAX); + + int err = drmSyncobjWait(device->fd, handles, + count_handles, timeout_nsec, + syncobj_wait_flags, + NULL /* first_signaled */); + if (err && errno == ETIME) { + return VK_TIMEOUT; + } else if (err) { + return vk_errorf(device, VK_ERROR_UNKNOWN, + "DRM_IOCTL_SYNCOBJ_WAIT failed: %m"); + } + + return VK_SUCCESS; +} + +/* Based on anv_bo_sync_wait */ +static VkResult +tu_timeline_sync_wait(struct vk_device *vk_device, + uint32_t wait_count, + const struct vk_sync_wait *waits, + enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + struct tu_device *dev = container_of(vk_device, struct tu_device, vk); + bool wait_all = !(wait_flags & VK_SYNC_WAIT_ANY); + + uint32_t handles[wait_count]; + uint32_t submit_count; + VkResult ret = VK_SUCCESS; + uint32_t pending = wait_count; + struct tu_timeline_sync *submitted_syncs[wait_count]; + + while (pending) { + pending = 0; + submit_count = 0; + + for (unsigned i = 0; i < wait_count; ++i) { + struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync); + + if (sync->state == TU_TIMELINE_SYNC_STATE_RESET) { + assert(!(wait_flags & VK_SYNC_WAIT_PENDING)); + pending++; + } else if (sync->state == TU_TIMELINE_SYNC_STATE_SIGNALED) { + if (wait_flags & VK_SYNC_WAIT_ANY) + return VK_SUCCESS; + } else if (sync->state == TU_TIMELINE_SYNC_STATE_SUBMITTED) { + if (!(wait_flags & VK_SYNC_WAIT_PENDING)) { + handles[submit_count] = sync->syncobj; + submitted_syncs[submit_count++] = sync; + } + } + } + + if (submit_count > 0) { + do { + ret = drm_syncobj_wait(dev, handles, submit_count, abs_timeout_ns, wait_all); + } while (ret == VK_TIMEOUT && os_time_get_nano() < abs_timeout_ns); + + if (ret == VK_SUCCESS) { + for (unsigned i = 0; i < submit_count; ++i) { + struct tu_timeline_sync *sync = submitted_syncs[i]; + sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED; + } + } else { + /* return error covering timeout */ + return ret; + } + } else if (pending > 0) { + /* If we've hit this then someone decided to vkWaitForFences before + * they've actually submitted any of them to a queue. This is a + * fairly pessimal case, so it's ok to lock here and use a standard + * pthreads condition variable. + */ + pthread_mutex_lock(&dev->submit_mutex); + + /* It's possible that some of the fences have changed state since the + * last time we checked. Now that we have the lock, check for + * pending fences again and don't wait if it's changed. + */ + uint32_t now_pending = 0; + for (uint32_t i = 0; i < wait_count; i++) { + struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync); + if (sync->state == TU_TIMELINE_SYNC_STATE_RESET) + now_pending++; + } + assert(now_pending <= pending); + + if (now_pending == pending) { + struct timespec abstime = { + .tv_sec = abs_timeout_ns / NSEC_PER_SEC, + .tv_nsec = abs_timeout_ns % NSEC_PER_SEC, + }; + + ASSERTED int ret; + ret = pthread_cond_timedwait(&dev->timeline_cond, + &dev->submit_mutex, &abstime); + assert(ret != EINVAL); + if (os_time_get_nano() >= abs_timeout_ns) { + pthread_mutex_unlock(&dev->submit_mutex); + return VK_TIMEOUT; + } + } + + pthread_mutex_unlock(&dev->submit_mutex); + } + } + + return ret; +} + +const struct vk_sync_type tu_timeline_sync_type = { + .size = sizeof(struct tu_timeline_sync), + .features = (enum vk_sync_features)( + VK_SYNC_FEATURE_BINARY | VK_SYNC_FEATURE_GPU_WAIT | + VK_SYNC_FEATURE_GPU_MULTI_WAIT | VK_SYNC_FEATURE_CPU_WAIT | + VK_SYNC_FEATURE_CPU_RESET | VK_SYNC_FEATURE_WAIT_ANY | + VK_SYNC_FEATURE_WAIT_PENDING), + .init = tu_timeline_sync_init, + .finish = tu_timeline_sync_finish, + .reset = tu_timeline_sync_reset, + .wait_many = tu_timeline_sync_wait, +}; diff --git a/src/freedreno/vulkan/tu_knl_drm.h b/src/freedreno/vulkan/tu_knl_drm.h new file mode 100644 index 00000000000..dffd6a1c4a5 --- /dev/null +++ b/src/freedreno/vulkan/tu_knl_drm.h @@ -0,0 +1,64 @@ +/* + * Copyright © 2018 Google, Inc. + * Copyright © 2015 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#ifndef TU_KNL_DRM_H +#define TU_KNL_DRM_H + +#include "tu_knl.h" +#include "drm-uapi/msm_drm.h" + +#include "vk_util.h" + +#include "util/timespec.h" + +enum tu_mem_sync_op { + TU_MEM_SYNC_CACHE_TO_GPU, + TU_MEM_SYNC_CACHE_FROM_GPU, +}; + +void +tu_sync_cache_bo(struct tu_device *dev, + struct tu_bo *bo, + VkDeviceSize offset, + VkDeviceSize size, + enum tu_mem_sync_op op); + +int tu_drm_export_dmabuf(struct tu_device *dev, struct tu_bo *bo); +void tu_drm_bo_finish(struct tu_device *dev, struct tu_bo *bo); + +static inline void +get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns) +{ + struct timespec t; + clock_gettime(CLOCK_MONOTONIC, &t); + tv->tv_sec = t.tv_sec + ns / 1000000000; + tv->tv_nsec = t.tv_nsec + ns % 1000000000; +} + +static inline bool +fence_before(uint32_t a, uint32_t b) +{ + return (int32_t)(a - b) < 0; +} + +extern const struct vk_sync_type tu_timeline_sync_type; + +static inline bool +vk_sync_is_tu_timeline_sync(const struct vk_sync *sync) +{ + return sync->type == &tu_timeline_sync_type; +} + +static inline struct tu_timeline_sync * +to_tu_timeline_sync(struct vk_sync *sync) +{ + assert(sync->type == &tu_timeline_sync_type); + return container_of(sync, struct tu_timeline_sync, base); +} + +uint32_t tu_syncobj_from_vk_sync(struct vk_sync *sync); + +#endif \ No newline at end of file diff --git a/src/freedreno/vulkan/tu_knl_drm_msm.cc b/src/freedreno/vulkan/tu_knl_drm_msm.cc index da4c04ff1c4..cbb15bc16fb 100644 --- a/src/freedreno/vulkan/tu_knl_drm_msm.cc +++ b/src/freedreno/vulkan/tu_knl_drm_msm.cc @@ -17,13 +17,12 @@ #include "drm-uapi/msm_drm.h" #include "util/u_debug.h" #include "util/hash_table.h" -#include "util/timespec.h" -#include "util/os_time.h" #include "tu_cmd_buffer.h" #include "tu_cs.h" #include "tu_device.h" #include "tu_dynamic_rendering.h" +#include "tu_knl_drm.h" struct tu_queue_submit { @@ -260,28 +259,6 @@ tu_gem_info(const struct tu_device *dev, uint32_t gem_handle, uint32_t info) return req.value; } -enum tu_mem_sync_op -{ - TU_MEM_SYNC_CACHE_TO_GPU, - TU_MEM_SYNC_CACHE_FROM_GPU, -}; - -static void -sync_cache_bo(struct tu_device *dev, - struct tu_bo *bo, - VkDeviceSize offset, - VkDeviceSize size, - enum tu_mem_sync_op op); - -static inline void -get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns) -{ - struct timespec t; - clock_gettime(CLOCK_MONOTONIC, &t); - tv->tv_sec = t.tv_sec + ns / 1000000000; - tv->tv_nsec = t.tv_nsec + ns % 1000000000; -} - static VkResult tu_wait_fence(struct tu_device *dev, uint32_t queue_id, @@ -601,7 +578,7 @@ msm_bo_init(struct tu_device *dev, * * MSM already does this automatically for uncached (MSM_BO_WC) memory. */ - sync_cache_bo(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU); + tu_sync_cache_bo(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU); } return result; @@ -658,16 +635,6 @@ msm_bo_init_dmabuf(struct tu_device *dev, return result; } -static int -msm_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo) -{ - int prime_fd; - int ret = drmPrimeHandleToFD(dev->fd, bo->gem_handle, - DRM_CLOEXEC | DRM_RDWR, &prime_fd); - - return ret == 0 ? prime_fd : -1; -} - static VkResult msm_bo_map(struct tu_device *dev, struct tu_bo *bo) { @@ -696,353 +663,6 @@ msm_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo) mtx_unlock(&dev->bo_mutex); } -static void -msm_bo_finish(struct tu_device *dev, struct tu_bo *bo) -{ - assert(bo->gem_handle); - - u_rwlock_rdlock(&dev->dma_bo_lock); - - if (!p_atomic_dec_zero(&bo->refcnt)) { - u_rwlock_rdunlock(&dev->dma_bo_lock); - return; - } - - if (bo->map) - munmap(bo->map, bo->size); - - tu_debug_bos_del(dev, bo); - - mtx_lock(&dev->bo_mutex); - dev->bo_count--; - dev->bo_list[bo->bo_list_idx] = dev->bo_list[dev->bo_count]; - - struct tu_bo* exchanging_bo = tu_device_lookup_bo(dev, dev->bo_list[bo->bo_list_idx].handle); - exchanging_bo->bo_list_idx = bo->bo_list_idx; - - if (bo->implicit_sync) - dev->implicit_sync_bo_count--; - - mtx_unlock(&dev->bo_mutex); - - if (dev->physical_device->has_set_iova) { - mtx_lock(&dev->vma_mutex); - struct tu_zombie_vma *vma = (struct tu_zombie_vma *) - u_vector_add(&dev->zombie_vmas); - vma->gem_handle = bo->gem_handle; - vma->iova = bo->iova; - vma->size = bo->size; - vma->fence = p_atomic_read(&dev->queues[0]->fence); - - /* Must be cleared under the VMA mutex, or another thread could race to - * reap the VMA, closing the BO and letting a new GEM allocation produce - * this handle again. - */ - memset(bo, 0, sizeof(*bo)); - mtx_unlock(&dev->vma_mutex); - } else { - /* Our BO structs are stored in a sparse array in the physical device, - * so we don't want to free the BO pointer, instead we want to reset it - * to 0, to signal that array entry as being free. - */ - uint32_t gem_handle = bo->gem_handle; - memset(bo, 0, sizeof(*bo)); - - tu_gem_close(dev, gem_handle); - } - - u_rwlock_rdunlock(&dev->dma_bo_lock); -} - -static inline void -tu_sync_cacheline_to_gpu(void const *p __attribute__((unused))) -{ -#if DETECT_ARCH_AARCH64 - /* Clean data cache. */ - __asm volatile("dc cvac, %0" : : "r" (p) : "memory"); -#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64) - __builtin_ia32_clflush(p); -#elif DETECT_ARCH_ARM - /* DCCMVAC - same as DC CVAC on aarch64. - * Seems to be illegal to call from userspace. - */ - //__asm volatile("mcr p15, 0, %0, c7, c10, 1" : : "r" (p) : "memory"); - unreachable("Cache line clean is unsupported on ARMv7"); -#endif -} - -static inline void -tu_sync_cacheline_from_gpu(void const *p __attribute__((unused))) -{ -#if DETECT_ARCH_AARCH64 - /* Clean and Invalidate data cache, there is no separate Invalidate. */ - __asm volatile("dc civac, %0" : : "r" (p) : "memory"); -#elif (DETECT_ARCH_X86 || DETECT_ARCH_X86_64) - __builtin_ia32_clflush(p); -#elif DETECT_ARCH_ARM - /* DCCIMVAC - same as DC CIVAC on aarch64. - * Seems to be illegal to call from userspace. - */ - //__asm volatile("mcr p15, 0, %0, c7, c14, 1" : : "r" (p) : "memory"); - unreachable("Cache line invalidate is unsupported on ARMv7"); -#endif -} - -static void -sync_cache_bo(struct tu_device *dev, - struct tu_bo *bo, - VkDeviceSize offset, - VkDeviceSize size, - enum tu_mem_sync_op op) -{ - uintptr_t level1_dcache_size = dev->physical_device->level1_dcache_size; - char *start = (char *) bo->map + offset; - char *end = start + (size == VK_WHOLE_SIZE ? (bo->size - offset) : size); - - start = (char *) ((uintptr_t) start & ~(level1_dcache_size - 1)); - - for (; start < end; start += level1_dcache_size) { - if (op == TU_MEM_SYNC_CACHE_TO_GPU) { - tu_sync_cacheline_to_gpu(start); - } else { - tu_sync_cacheline_from_gpu(start); - } - } -} - -static VkResult -sync_cache(VkDevice _device, - enum tu_mem_sync_op op, - uint32_t count, - const VkMappedMemoryRange *ranges) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - if (!device->physical_device->has_cached_non_coherent_memory) { - tu_finishme( - "data cache clean and invalidation are unsupported on this arch!"); - return VK_SUCCESS; - } - - for (uint32_t i = 0; i < count; i++) { - TU_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory); - sync_cache_bo(device, mem->bo, ranges[i].offset, ranges[i].size, op); - } - - return VK_SUCCESS; -} - -VkResult -tu_FlushMappedMemoryRanges(VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) -{ - return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount, - pMemoryRanges); -} - -VkResult -tu_InvalidateMappedMemoryRanges(VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) -{ - return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount, - pMemoryRanges); -} - -static inline bool -vk_sync_is_tu_timeline_sync(const struct vk_sync *sync); -static struct tu_timeline_sync * -to_tu_timeline_sync(struct vk_sync *sync); - -static uint32_t -tu_syncobj_from_vk_sync(struct vk_sync *sync) -{ - uint32_t syncobj = -1; - if (vk_sync_is_tu_timeline_sync(sync)) { - syncobj = to_tu_timeline_sync(sync)->syncobj; - } else if (vk_sync_type_is_drm_syncobj(sync->type)) { - syncobj = vk_sync_as_drm_syncobj(sync)->syncobj; - } - - assert(syncobj != -1); - - return syncobj; -} - -static VkResult -tu_timeline_sync_init(struct vk_device *vk_device, - struct vk_sync *vk_sync, - uint64_t initial_value) -{ - struct tu_device *device = container_of(vk_device, struct tu_device, vk); - struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync); - uint32_t flags = 0; - - assert(device->fd >= 0); - - int err = drmSyncobjCreate(device->fd, flags, &sync->syncobj); - - if (err < 0) { - return vk_error(device, VK_ERROR_DEVICE_LOST); - } - - sync->state = initial_value ? TU_TIMELINE_SYNC_STATE_SIGNALED : - TU_TIMELINE_SYNC_STATE_RESET; - - return VK_SUCCESS; -} - -static void -tu_timeline_sync_finish(struct vk_device *vk_device, - struct vk_sync *vk_sync) -{ - struct tu_device *dev = container_of(vk_device, struct tu_device, vk); - struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync); - - assert(dev->fd >= 0); - ASSERTED int err = drmSyncobjDestroy(dev->fd, sync->syncobj); - assert(err == 0); -} - -static VkResult -tu_timeline_sync_reset(struct vk_device *vk_device, - struct vk_sync *vk_sync) -{ - struct tu_device *dev = container_of(vk_device, struct tu_device, vk); - struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync); - - int err = drmSyncobjReset(dev->fd, &sync->syncobj, 1); - if (err) { - return vk_errorf(dev, VK_ERROR_UNKNOWN, - "DRM_IOCTL_SYNCOBJ_RESET failed: %m"); - } else { - sync->state = TU_TIMELINE_SYNC_STATE_RESET; - } - - return VK_SUCCESS; -} - -static VkResult -drm_syncobj_wait(struct tu_device *device, - uint32_t *handles, uint32_t count_handles, - uint64_t timeout_nsec, bool wait_all) -{ - uint32_t syncobj_wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT; - if (wait_all) syncobj_wait_flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL; - - /* syncobj absolute timeouts are signed. clamp OS_TIMEOUT_INFINITE down. */ - timeout_nsec = MIN2(timeout_nsec, (uint64_t)INT64_MAX); - - int err = drmSyncobjWait(device->fd, handles, - count_handles, timeout_nsec, - syncobj_wait_flags, - NULL /* first_signaled */); - if (err && errno == ETIME) { - return VK_TIMEOUT; - } else if (err) { - return vk_errorf(device, VK_ERROR_UNKNOWN, - "DRM_IOCTL_SYNCOBJ_WAIT failed: %m"); - } - - return VK_SUCCESS; -} - -/* Based on anv_bo_sync_wait */ -static VkResult -tu_timeline_sync_wait(struct vk_device *vk_device, - uint32_t wait_count, - const struct vk_sync_wait *waits, - enum vk_sync_wait_flags wait_flags, - uint64_t abs_timeout_ns) -{ - struct tu_device *dev = container_of(vk_device, struct tu_device, vk); - bool wait_all = !(wait_flags & VK_SYNC_WAIT_ANY); - - uint32_t handles[wait_count]; - uint32_t submit_count; - VkResult ret = VK_SUCCESS; - uint32_t pending = wait_count; - struct tu_timeline_sync *submitted_syncs[wait_count]; - - while (pending) { - pending = 0; - submit_count = 0; - - for (unsigned i = 0; i < wait_count; ++i) { - struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync); - - if (sync->state == TU_TIMELINE_SYNC_STATE_RESET) { - assert(!(wait_flags & VK_SYNC_WAIT_PENDING)); - pending++; - } else if (sync->state == TU_TIMELINE_SYNC_STATE_SIGNALED) { - if (wait_flags & VK_SYNC_WAIT_ANY) - return VK_SUCCESS; - } else if (sync->state == TU_TIMELINE_SYNC_STATE_SUBMITTED) { - if (!(wait_flags & VK_SYNC_WAIT_PENDING)) { - handles[submit_count] = sync->syncobj; - submitted_syncs[submit_count++] = sync; - } - } - } - - if (submit_count > 0) { - do { - ret = drm_syncobj_wait(dev, handles, submit_count, abs_timeout_ns, wait_all); - } while (ret == VK_TIMEOUT && os_time_get_nano() < abs_timeout_ns); - - if (ret == VK_SUCCESS) { - for (unsigned i = 0; i < submit_count; ++i) { - struct tu_timeline_sync *sync = submitted_syncs[i]; - sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED; - } - } else { - /* return error covering timeout */ - return ret; - } - } else if (pending > 0) { - /* If we've hit this then someone decided to vkWaitForFences before - * they've actually submitted any of them to a queue. This is a - * fairly pessimal case, so it's ok to lock here and use a standard - * pthreads condition variable. - */ - pthread_mutex_lock(&dev->submit_mutex); - - /* It's possible that some of the fences have changed state since the - * last time we checked. Now that we have the lock, check for - * pending fences again and don't wait if it's changed. - */ - uint32_t now_pending = 0; - for (uint32_t i = 0; i < wait_count; i++) { - struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync); - if (sync->state == TU_TIMELINE_SYNC_STATE_RESET) - now_pending++; - } - assert(now_pending <= pending); - - if (now_pending == pending) { - struct timespec abstime = { - .tv_sec = abs_timeout_ns / NSEC_PER_SEC, - .tv_nsec = abs_timeout_ns % NSEC_PER_SEC, - }; - - ASSERTED int ret; - ret = pthread_cond_timedwait(&dev->timeline_cond, - &dev->submit_mutex, &abstime); - assert(ret != EINVAL); - if (os_time_get_nano() >= abs_timeout_ns) { - pthread_mutex_unlock(&dev->submit_mutex); - return VK_TIMEOUT; - } - } - - pthread_mutex_unlock(&dev->submit_mutex); - } - } - - return ret; -} - static VkResult tu_queue_submit_create_locked(struct tu_queue *queue, struct vk_queue_submit *vk_submit, @@ -1418,40 +1038,14 @@ static const struct tu_knl msm_knl_funcs = { .submitqueue_close = msm_submitqueue_close, .bo_init = msm_bo_init, .bo_init_dmabuf = msm_bo_init_dmabuf, - .bo_export_dmabuf = msm_bo_export_dmabuf, + .bo_export_dmabuf = tu_drm_export_dmabuf, .bo_map = msm_bo_map, .bo_allow_dump = msm_bo_allow_dump, - .bo_finish = msm_bo_finish, + .bo_finish = tu_drm_bo_finish, .device_wait_u_trace = msm_device_wait_u_trace, .queue_submit = msm_queue_submit, }; -static const struct vk_sync_type tu_timeline_sync_type = { - .size = sizeof(struct tu_timeline_sync), - .features = (enum vk_sync_features)( - VK_SYNC_FEATURE_BINARY | VK_SYNC_FEATURE_GPU_WAIT | - VK_SYNC_FEATURE_GPU_MULTI_WAIT | VK_SYNC_FEATURE_CPU_WAIT | - VK_SYNC_FEATURE_CPU_RESET | VK_SYNC_FEATURE_WAIT_ANY | - VK_SYNC_FEATURE_WAIT_PENDING), - .init = tu_timeline_sync_init, - .finish = tu_timeline_sync_finish, - .reset = tu_timeline_sync_reset, - .wait_many = tu_timeline_sync_wait, -}; - -static inline bool -vk_sync_is_tu_timeline_sync(const struct vk_sync *sync) -{ - return sync->type == &tu_timeline_sync_type; -} - -static struct tu_timeline_sync * -to_tu_timeline_sync(struct vk_sync *sync) -{ - assert(sync->type == &tu_timeline_sync_type); - return container_of(sync, struct tu_timeline_sync, base); -} - VkResult tu_knl_drm_msm_load(struct tu_instance *instance, int fd, struct _drmVersion *version,
