From: Dave Airlie <[email protected]>

This uses the new kernel interfaces for reduced cs overhead,
We only set the local flag for memory allocations that don't have
 a dedicated allocation and ones that aren't imports.

v2: add to all the internal buffer creation paths.

Signed-off-by: Dave Airlie <[email protected]>
---
 src/amd/vulkan/radv_cmd_buffer.c              |  3 ++-
 src/amd/vulkan/radv_debug.c                   |  3 ++-
 src/amd/vulkan/radv_descriptor_set.c          |  2 +-
 src/amd/vulkan/radv_device.c                  | 20 ++++++++++++--------
 src/amd/vulkan/radv_query.c                   |  2 +-
 src/amd/vulkan/radv_radeon_winsys.h           |  1 +
 src/amd/vulkan/radv_shader.c                  |  2 +-
 src/amd/vulkan/si_cmd_buffer.c                |  3 ++-
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c |  4 ++++
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h |  1 +
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c |  3 +++
 11 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 807283be1d8..2e7c3e61bb7 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -263,7 +263,8 @@ radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer 
*cmd_buffer,
        bo = device->ws->buffer_create(device->ws,
                                       new_size, 4096,
                                       RADEON_DOMAIN_GTT,
-                                      RADEON_FLAG_CPU_ACCESS);
+                                      RADEON_FLAG_CPU_ACCESS|
+                                      RADEON_FLAG_NO_INTERPROCESS_SHARING);
 
        if (!bo) {
                cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c
index b69c05b64f3..cdf8e7a1145 100644
--- a/src/amd/vulkan/radv_debug.c
+++ b/src/amd/vulkan/radv_debug.c
@@ -61,7 +61,8 @@ radv_init_trace(struct radv_device *device)
 
        device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
                                             RADEON_DOMAIN_VRAM,
-                                            RADEON_FLAG_CPU_ACCESS);
+                                            RADEON_FLAG_CPU_ACCESS|
+                                            
RADEON_FLAG_NO_INTERPROCESS_SHARING);
        if (!device->trace_bo)
                return false;
 
diff --git a/src/amd/vulkan/radv_descriptor_set.c 
b/src/amd/vulkan/radv_descriptor_set.c
index c6b736bb689..167944f4e2f 100644
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -431,7 +431,7 @@ VkResult radv_CreateDescriptorPool(
 
        if (bo_size) {
                pool->bo = device->ws->buffer_create(device->ws, bo_size,
-                                                       32, RADEON_DOMAIN_VRAM, 
0);
+                                                       32, RADEON_DOMAIN_VRAM, 
RADEON_FLAG_NO_INTERPROCESS_SHARING);
                pool->mapped_ptr = (uint8_t*)device->ws->buffer_map(pool->bo);
        }
        pool->size = bo_size;
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index ebc74fbadef..c4f6e2b2dc4 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1394,6 +1394,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
        unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
        unsigned max_offchip_buffers;
        unsigned hs_offchip_param = 0;
+       uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | 
RADEON_FLAG_NO_INTERPROCESS_SHARING;
        if (!queue->has_tess_rings) {
                if (needs_tess_rings)
                        add_tess_rings = true;
@@ -1427,7 +1428,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                              scratch_size,
                                                              4096,
                                                              
RADEON_DOMAIN_VRAM,
-                                                             
RADEON_FLAG_NO_CPU_ACCESS);
+                                                             ring_bo_flags);
                if (!scratch_bo)
                        goto fail;
        } else
@@ -1438,7 +1439,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                      
compute_scratch_size,
                                                                      4096,
                                                                      
RADEON_DOMAIN_VRAM,
-                                                                     
RADEON_FLAG_NO_CPU_ACCESS);
+                                                                     
ring_bo_flags);
                if (!compute_scratch_bo)
                        goto fail;
 
@@ -1450,7 +1451,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                esgs_ring_size,
                                                                4096,
                                                                
RADEON_DOMAIN_VRAM,
-                                                               
RADEON_FLAG_NO_CPU_ACCESS);
+                                                               ring_bo_flags);
                if (!esgs_ring_bo)
                        goto fail;
        } else {
@@ -1463,7 +1464,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                gsvs_ring_size,
                                                                4096,
                                                                
RADEON_DOMAIN_VRAM,
-                                                               
RADEON_FLAG_NO_CPU_ACCESS);
+                                                               ring_bo_flags);
                if (!gsvs_ring_bo)
                        goto fail;
        } else {
@@ -1476,14 +1477,14 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                       
tess_factor_ring_size,
                                                                       256,
                                                                       
RADEON_DOMAIN_VRAM,
-                                                                      
RADEON_FLAG_NO_CPU_ACCESS);
+                                                                      
ring_bo_flags);
                if (!tess_factor_ring_bo)
                        goto fail;
                tess_offchip_ring_bo = 
queue->device->ws->buffer_create(queue->device->ws,
                                                                       
tess_offchip_ring_size,
                                                                       256,
                                                                       
RADEON_DOMAIN_VRAM,
-                                                                      
RADEON_FLAG_NO_CPU_ACCESS);
+                                                                       
ring_bo_flags);
                if (!tess_offchip_ring_bo)
                        goto fail;
        } else {
@@ -1510,7 +1511,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                 size,
                                                                 4096,
                                                                 
RADEON_DOMAIN_VRAM,
-                                                                
RADEON_FLAG_CPU_ACCESS);
+                                                                
RADEON_FLAG_CPU_ACCESS|RADEON_FLAG_NO_INTERPROCESS_SHARING);
                if (!descriptor_bo)
                        goto fail;
        } else
@@ -2119,6 +2120,9 @@ VkResult radv_alloc_memory(VkDevice                       
 _device,
        if (mem_flags & RADV_MEM_IMPLICIT_SYNC)
                flags |= RADEON_FLAG_IMPLICIT_SYNC;
 
+       if (!dedicate_info && !import_info)
+               flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
        mem->bo = device->ws->buffer_create(device->ws, alloc_size, 
device->physical_device->rad_info.max_alignment,
                                               domain, flags);
 
@@ -2682,7 +2686,7 @@ VkResult radv_CreateEvent(
 
        event->bo = device->ws->buffer_create(device->ws, 8, 8,
                                              RADEON_DOMAIN_GTT,
-                                             RADEON_FLAG_VA_UNCACHED | 
RADEON_FLAG_CPU_ACCESS);
+                                             RADEON_FLAG_VA_UNCACHED | 
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
        if (!event->bo) {
                vk_free2(&device->alloc, pAllocator, event);
                return VK_ERROR_OUT_OF_DEVICE_MEMORY;
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index 20d4ec060fc..64fc5c5cd2d 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -780,7 +780,7 @@ VkResult radv_CreateQueryPool(
                size += 4 * pCreateInfo->queryCount;
 
        pool->bo = device->ws->buffer_create(device->ws, size,
-                                            64, RADEON_DOMAIN_GTT, 0);
+                                            64, RADEON_DOMAIN_GTT, 
RADEON_FLAG_NO_INTERPROCESS_SHARING);
 
        if (!pool->bo) {
                vk_free2(&device->alloc, pAllocator, pool);
diff --git a/src/amd/vulkan/radv_radeon_winsys.h 
b/src/amd/vulkan/radv_radeon_winsys.h
index cf5a9e8f069..395c8499b3d 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -54,6 +54,7 @@ enum radeon_bo_flag { /* bitfield */
        RADEON_FLAG_VIRTUAL =       (1 << 3),
        RADEON_FLAG_VA_UNCACHED =   (1 << 4),
        RADEON_FLAG_IMPLICIT_SYNC = (1 << 5),
+       RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 6),
 };
 
 enum radeon_bo_usage { /* bitfield */
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 59039170687..7f10798fdf4 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -325,7 +325,7 @@ radv_alloc_shader_memory(struct radv_device *device,
 
        slab->size = 256 * 1024;
        slab->bo = device->ws->buffer_create(device->ws, slab->size, 256,
-                                            RADEON_DOMAIN_VRAM, 0);
+                                            RADEON_DOMAIN_VRAM, 
RADEON_FLAG_NO_INTERPROCESS_SHARING);
        slab->ptr = (char*)device->ws->buffer_map(slab->bo);
        list_inithead(&slab->shaders);
 
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 20144d39ea3..89ee399817d 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -571,7 +571,8 @@ cik_create_gfx_config(struct radv_device *device)
        device->gfx_init = device->ws->buffer_create(device->ws,
                                                     cs->cdw * 4, 4096,
                                                     RADEON_DOMAIN_GTT,
-                                                    RADEON_FLAG_CPU_ACCESS);
+                                                    RADEON_FLAG_CPU_ACCESS|
+                                                    
RADEON_FLAG_NO_INTERPROCESS_SHARING);
        if (!device->gfx_init)
                goto fail;
 
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
index 15099b318e7..dac549a20ad 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
@@ -332,6 +332,10 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
                request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
        if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
                request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
+       if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && ws->info.drm_minor 
>= 20) {
+               bo->is_local = true;
+               request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
+       }
 
        /* this won't do anything on pre 4.9 kernels */
        if (ws->zero_all_vram_allocs && (initial_domain & RADEON_DOMAIN_VRAM))
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
index f32e4308386..f9aac9451c0 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
@@ -45,6 +45,7 @@ struct radv_amdgpu_winsys_bo {
        uint64_t size;
        struct radv_amdgpu_winsys *ws;
        bool is_virtual;
+       bool is_local;
        int ref_count;
 
        union {
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index 46e5b767033..a4c63f92261 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -471,6 +471,9 @@ static void radv_amdgpu_cs_add_buffer(struct 
radeon_winsys_cs *_cs,
                return;
        }
 
+       if (bo->is_local)
+               return;
+
        radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority);
 }
 
-- 
2.14.2

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to