On all other platforms, it's safe to use the usual PTE settings for both
internal and external BOs.  On Broadwell, however, we can't get the
right caching behavior for scanout without disabling eLLC and we really
don't want to do this on everything.

In order to do this, we add an anv-specific BO flag for "external" and
use that to distinguish between buffers which may be shared with other
processes and/or display and those which are entirely internal.  That,
together with an anv_mocs_for_bo helper lets us choose the right MOCS
settings for each BO use.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99507
Cc: mesa-sta...@lists.freedesktop.org
---
 src/intel/vulkan/anv_allocator.c   | 12 ++++++++++--
 src/intel/vulkan/anv_batch_chain.c |  2 +-
 src/intel/vulkan/anv_blorp.c       | 15 ++++++++-------
 src/intel/vulkan/anv_device.c      |  9 +++++++--
 src/intel/vulkan/anv_image.c       |  5 +++--
 src/intel/vulkan/anv_intel.c       |  2 +-
 src/intel/vulkan/anv_private.h     | 20 ++++++++++++++++++++
 src/intel/vulkan/gen7_cmd_buffer.c |  3 ++-
 src/intel/vulkan/gen8_cmd_buffer.c |  3 ++-
 src/intel/vulkan/genX_cmd_buffer.c | 18 +++++++++---------
 src/intel/vulkan/genX_gpu_memcpy.c |  5 ++---
 src/intel/vulkan/genX_state.c      |  6 ++++++
 12 files changed, 71 insertions(+), 29 deletions(-)

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index ab01d46cbeb..f62d48ae3fe 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -1253,7 +1253,8 @@ anv_bo_cache_lookup(struct anv_bo_cache *cache, uint32_t 
gem_handle)
    (EXEC_OBJECT_WRITE | \
     EXEC_OBJECT_ASYNC | \
     EXEC_OBJECT_SUPPORTS_48B_ADDRESS | \
-    EXEC_OBJECT_PINNED)
+    EXEC_OBJECT_PINNED | \
+    ANV_BO_EXTERNAL)
 
 VkResult
 anv_bo_cache_alloc(struct anv_device *device,
@@ -1311,6 +1312,7 @@ anv_bo_cache_import(struct anv_device *device,
                     struct anv_bo **bo_out)
 {
    assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS));
+   assert(bo_flags & ANV_BO_EXTERNAL);
 
    pthread_mutex_lock(&cache->mutex);
 
@@ -1327,7 +1329,7 @@ anv_bo_cache_import(struct anv_device *device,
        * client has imported a BO twice in different ways and they get what
        * they have coming.
        */
-      uint64_t new_flags = 0;
+      uint64_t new_flags = ANV_BO_EXTERNAL;
       new_flags |= (bo->bo.flags | bo_flags) & EXEC_OBJECT_WRITE;
       new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_ASYNC;
       new_flags |= (bo->bo.flags & bo_flags) & 
EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
@@ -1411,6 +1413,12 @@ anv_bo_cache_export(struct anv_device *device,
    assert(anv_bo_cache_lookup(cache, bo_in->gem_handle) == bo_in);
    struct anv_cached_bo *bo = (struct anv_cached_bo *)bo_in;
 
+   /* This BO must have been flagged external in order for us to be able
+    * to export it.  This is done based on external options passed into
+    * anv_AllocateMemory.
+    */
+   assert(bo->bo.flags & ANV_BO_EXTERNAL);
+
    int fd = anv_gem_handle_to_fd(device, bo->bo.gem_handle);
    if (fd < 0)
       return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
diff --git a/src/intel/vulkan/anv_batch_chain.c 
b/src/intel/vulkan/anv_batch_chain.c
index 0f7c8325ea4..3e13553ac18 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -1088,7 +1088,7 @@ anv_execbuf_add_bo(struct anv_execbuf *exec,
       obj->relocs_ptr = 0;
       obj->alignment = 0;
       obj->offset = bo->offset;
-      obj->flags = bo->flags | extra_flags;
+      obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags;
       obj->rsvd1 = 0;
       obj->rsvd2 = 0;
    }
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index cd67cc636b2..9eb7aead8b0 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -155,7 +155,7 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device,
       .addr = {
          .buffer = buffer->address.bo,
          .offset = buffer->address.offset + offset,
-         .mocs = device->default_mocs,
+         .mocs = anv_mocs_for_bo(device, buffer->address.bo),
       },
    };
 
@@ -208,7 +208,7 @@ get_blorp_surf_for_anv_image(const struct anv_device 
*device,
       .addr = {
          .buffer = image->planes[plane].address.bo,
          .offset = image->planes[plane].address.offset + surface->offset,
-         .mocs = device->default_mocs,
+         .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
       },
    };
 
@@ -218,7 +218,7 @@ get_blorp_surf_for_anv_image(const struct anv_device 
*device,
       blorp_surf->aux_addr = (struct blorp_address) {
          .buffer = image->planes[plane].address.bo,
          .offset = image->planes[plane].address.offset + aux_surface->offset,
-         .mocs = device->default_mocs,
+         .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
       };
       blorp_surf->aux_usage = aux_usage;
 
@@ -663,12 +663,12 @@ void anv_CmdCopyBuffer(
       struct blorp_address src = {
          .buffer = src_buffer->address.bo,
          .offset = src_buffer->address.offset + pRegions[r].srcOffset,
-         .mocs = cmd_buffer->device->default_mocs,
+         .mocs = anv_mocs_for_bo(cmd_buffer->device, src_buffer->address.bo),
       };
       struct blorp_address dst = {
          .buffer = dst_buffer->address.bo,
          .offset = dst_buffer->address.offset + pRegions[r].dstOffset,
-         .mocs = cmd_buffer->device->default_mocs,
+         .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
       };
 
       blorp_buffer_copy(&batch, src, dst, pRegions[r].size);
@@ -721,7 +721,7 @@ void anv_CmdUpdateBuffer(
       struct blorp_address dst = {
          .buffer = dst_buffer->address.bo,
          .offset = dst_buffer->address.offset + dstOffset,
-         .mocs = cmd_buffer->device->default_mocs,
+         .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
       };
 
       blorp_buffer_copy(&batch, src, dst, copy_size);
@@ -1431,7 +1431,8 @@ anv_image_copy_to_shadow(struct anv_cmd_buffer 
*cmd_buffer,
          .buffer = image->planes[0].address.bo,
          .offset = image->planes[0].address.offset +
                    image->planes[0].shadow_surface.offset,
-         .mocs = cmd_buffer->device->default_mocs,
+         .mocs = anv_mocs_for_bo(cmd_buffer->device,
+                                 image->planes[0].address.bo),
       },
    };
 
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 07de5f7543e..aaf6bceb302 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -2212,8 +2212,8 @@ VkResult anv_AllocateMemory(
              fd_info->handleType ==
                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
 
-      result = anv_bo_cache_import(device, &device->bo_cache,
-                                   fd_info->fd, bo_flags, &mem->bo);
+      result = anv_bo_cache_import(device, &device->bo_cache, fd_info->fd,
+                                   bo_flags | ANV_BO_EXTERNAL, &mem->bo);
       if (result != VK_SUCCESS)
          goto fail;
 
@@ -2250,6 +2250,11 @@ VkResult anv_AllocateMemory(
        */
       close(fd_info->fd);
    } else {
+      const VkExportMemoryAllocateInfoKHR *fd_info =
+         vk_find_struct_const(pAllocateInfo->pNext, 
EXPORT_MEMORY_ALLOCATE_INFO_KHR);
+      if (fd_info && fd_info->handleTypes)
+         bo_flags |= ANV_BO_EXTERNAL;
+
       result = anv_bo_cache_alloc(device, &device->bo_cache,
                                   pAllocateInfo->allocationSize, bo_flags,
                                   &mem->bo);
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index 36d4ac13c75..28d70967b05 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -1095,7 +1095,7 @@ anv_image_fill_surface_state(struct anv_device *device,
                             .size = surface->isl.size,
                             .format = ISL_FORMAT_RAW,
                             .stride = 1,
-                            .mocs = device->default_mocs);
+                            .mocs = anv_mocs_for_bo(device, address.bo));
       state_inout->address = address,
       state_inout->aux_address = ANV_NULL_ADDRESS;
       state_inout->clear_address = ANV_NULL_ADDRESS;
@@ -1196,7 +1196,8 @@ anv_image_fill_surface_state(struct anv_device *device,
                           .aux_address = anv_address_physical(aux_address),
                           .clear_address = anv_address_physical(clear_address),
                           .use_clear_address = 
!anv_address_is_null(clear_address),
-                          .mocs = device->default_mocs,
+                          .mocs = anv_mocs_for_bo(device,
+                                                  state_inout->address.bo),
                           .x_offset_sa = tile_x_sa,
                           .y_offset_sa = tile_y_sa);
 
diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c
index 06db5787a9c..ed1bc096c66 100644
--- a/src/intel/vulkan/anv_intel.c
+++ b/src/intel/vulkan/anv_intel.c
@@ -73,7 +73,7 @@ VkResult anv_CreateDmaBufImageINTEL(
 
    image = anv_image_from_handle(image_h);
 
-   uint64_t bo_flags = 0;
+   uint64_t bo_flags = ANV_BO_EXTERNAL;
    if (device->instance->physicalDevice.supports_48bit_addresses)
       bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
    if (device->instance->physicalDevice.use_softpin)
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 5537c8cab57..5598d2bf5be 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -555,6 +555,10 @@ anv_multialloc_alloc2(struct anv_multialloc *ma,
    return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope);
 }
 
+/* Extra ANV-defined BO flags which won't be passed to the kernel */
+#define ANV_BO_EXTERNAL    (1ull << 31)
+#define ANV_BO_FLAG_MASK   (1ull << 31)
+
 struct anv_bo {
    uint32_t gem_handle;
 
@@ -1012,6 +1016,7 @@ struct anv_device {
     struct anv_scratch_pool                     scratch_pool;
 
     uint32_t                                    default_mocs;
+    uint32_t                                    external_mocs;
 
     pthread_mutex_t                             mutex;
     pthread_cond_t                              queue_submit;
@@ -1041,6 +1046,15 @@ anv_binding_table_pool_free(struct anv_device *device, 
struct anv_state state) {
    anv_state_pool_free(anv_binding_table_pool(device), state);
 }
 
+static inline uint32_t
+anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo)
+{
+   if (bo->flags & ANV_BO_EXTERNAL)
+      return device->external_mocs;
+   else
+      return device->default_mocs;
+}
+
 static void inline
 anv_state_flush(struct anv_device *device, struct anv_state state)
 {
@@ -1322,6 +1336,12 @@ _anv_combine_address(struct anv_batch *batch, void 
*location,
       .AgeforQUADLRU = 0                                       \
    }
 
+#define GEN8_EXTERNAL_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) {     \
+      .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,  \
+      .TargetCache = L3DefertoPATforLLCeLLCselection,                      \
+      .AgeforQUADLRU = 0                                                   \
+   }
+
 /* Skylake: MOCS is now an index into an array of 62 different caching
  * configurations programmed by the kernel.
  */
diff --git a/src/intel/vulkan/gen7_cmd_buffer.c 
b/src/intel/vulkan/gen7_cmd_buffer.c
index 3acfbb710c0..fc51c067439 100644
--- a/src/intel/vulkan/gen7_cmd_buffer.c
+++ b/src/intel/vulkan/gen7_cmd_buffer.c
@@ -245,7 +245,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer 
*cmd_buffer)
          ib.CutIndexEnable             = pipeline->primitive_restart;
 #endif
          ib.IndexFormat                = cmd_buffer->state.gfx.gen7.index_type;
-         ib.MemoryObjectControlState   = GENX(MOCS);
+         ib.IndexBufferMOCS            = anv_mocs_for_bo(cmd_buffer->device,
+                                                         buffer->address.bo);
 
          ib.BufferStartingAddress      = anv_address_add(buffer->address,
                                                          offset);
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c 
b/src/intel/vulkan/gen8_cmd_buffer.c
index ca2baf84a19..752d04f3013 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -565,7 +565,8 @@ void genX(CmdBindIndexBuffer)(
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
       ib.IndexFormat                = vk_to_gen_index_type[indexType];
-      ib.MemoryObjectControlState   = GENX(MOCS);
+      ib.IndexBufferMOCS            = anv_mocs_for_bo(cmd_buffer->device,
+                                                      buffer->address.bo);
       ib.BufferStartingAddress      = anv_address_add(buffer->address, offset);
       ib.BufferSize                 = buffer->size - offset;
    }
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index b7ed817d3a0..b55acca4521 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2514,12 +2514,11 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer 
*cmd_buffer)
          struct GENX(VERTEX_BUFFER_STATE) state = {
             .VertexBufferIndex = vb,
 
-#if GEN_GEN >= 8
-            .MemoryObjectControlState = GENX(MOCS),
-#else
+            .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
+                                                buffer->address.bo),
+#if GEN_GEN <= 7
             .BufferAccessType = pipeline->vb[vb].instanced ? INSTANCEDATA : 
VERTEXDATA,
             .InstanceDataStepRate = pipeline->vb[vb].instance_divisor,
-            .VertexBufferMemoryObjectControlState = GENX(MOCS),
 #endif
 
             .AddressModifyEnable = true,
@@ -2633,12 +2632,11 @@ emit_vertex_bo(struct anv_cmd_buffer *cmd_buffer,
          .VertexBufferIndex = index,
          .AddressModifyEnable = true,
          .BufferPitch = 0,
+         .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo),
 #if (GEN_GEN >= 8)
-         .MemoryObjectControlState = GENX(MOCS),
          .BufferStartingAddress = addr,
          .BufferSize = size
 #else
-         .VertexBufferMemoryObjectControlState = GENX(MOCS),
          .BufferStartingAddress = addr,
          .EndAddress = anv_address_add(addr, size),
 #endif
@@ -3390,9 +3388,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
*cmd_buffer)
    if (dw == NULL)
       return;
 
-   struct isl_depth_stencil_hiz_emit_info info = {
-      .mocs = device->default_mocs,
-   };
+   struct isl_depth_stencil_hiz_emit_info info = { };
 
    if (iview)
       info.view = &iview->planes[0].isl;
@@ -3410,6 +3406,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
*cmd_buffer)
                               image->planes[depth_plane].address.bo,
                               image->planes[depth_plane].address.offset +
                               surface->offset);
+      info.mocs =
+         anv_mocs_for_bo(device, image->planes[depth_plane].address.bo);
 
       const uint32_t ds =
          cmd_buffer->state.subpass->depth_stencil_attachment->attachment;
@@ -3441,6 +3439,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
*cmd_buffer)
                               image->planes[stencil_plane].address.bo,
                               image->planes[stencil_plane].address.offset +
                               surface->offset);
+      info.mocs =
+         anv_mocs_for_bo(device, image->planes[stencil_plane].address.bo);
    }
 
    isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
diff --git a/src/intel/vulkan/genX_gpu_memcpy.c 
b/src/intel/vulkan/genX_gpu_memcpy.c
index eaafcfa3b22..b51c1804659 100644
--- a/src/intel/vulkan/genX_gpu_memcpy.c
+++ b/src/intel/vulkan/genX_gpu_memcpy.c
@@ -158,11 +158,10 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer 
*cmd_buffer,
          .AddressModifyEnable = true,
          .BufferStartingAddress = { src, src_offset },
          .BufferPitch = bs,
+         .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, src),
 #if (GEN_GEN >= 8)
-         .MemoryObjectControlState = GENX(MOCS),
          .BufferSize = size,
 #else
-         .VertexBufferMemoryObjectControlState = GENX(MOCS),
          .EndAddress = { src, src_offset + size - 1 },
 #endif
       });
@@ -219,7 +218,7 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer 
*cmd_buffer,
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
       sob.SOBufferIndex = 0;
-      sob.SOBufferObjectControlState = GENX(MOCS);
+      sob.SOBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, dst),
       sob.SurfaceBaseAddress = (struct anv_address) { dst, dst_offset };
 
 #if GEN_GEN >= 8
diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index 4a175b9234d..37b99f865e9 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -93,6 +93,12 @@ genX(init_device_state)(struct anv_device *device)
 {
    GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs,
                                           &GENX(MOCS));
+#if GEN_GEN == 8
+   GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->external_mocs,
+                                          &GENX(EXTERNAL_MOCS));
+#else
+   device->external_mocs = device->default_mocs;
+#endif
 
    struct anv_batch batch;
 
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to