If EXEC_OBJECT_PINNED is set, we don't want to emit any relocations.
We simply want to add the BO to the validation list, and possibly mark
it as writeable.  The new brw_use_pinned_bo() interface does just that.

To avoid having to make every caller consider both the relocation and
softpin cases, we make emit_reloc() call brw_use_pinned_bo() when given
a softpinned buffer.

We also can't grow buffers that are softpinned - the mechanism places a
larger BO at the same offset as the original, which requires moving BOs
around in the VMA.  With softpin, we only allocate enough VMA for the
original size of the BO.
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 40 +++++++++++++++++--
 src/mesa/drivers/dri/i965/intel_batchbuffer.h |  4 ++
 2 files changed, 40 insertions(+), 4 deletions(-)

Overallocating is gross, but I have to make incremental progress somehow.
For batch buffers, the ultimate plan is to switch from growing to batch
chaining (just create another batch and MI_BATCH_BUFFER_START to GOTO
the new batch and carry on).  We can do that on Gen8+.  It's easier to
do that in the softpin world - otherwise, we'd need a third set of
relocation lists, which gets messier.

For state buffers, the plan is to set Dynamic State Base Address to a
fixed 4GB region of the VMA, then just use intel_upload.c to make
however many buffers we need, allocated out of that memzone.  Being able
to fix BO addresses within 4GB of the base address eliminates the need
to force all state to be in a single BO, and gives us a lot more
flexibility with less magic required.

But again...can't convert everything overnight, especially when having
to care about older hardware and ancient kernels.

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index fe1ea02ca41..96f1a238c0d 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -232,6 +232,10 @@ recreate_growing_buffer(struct brw_context *brw,
    struct intel_batchbuffer *batch = &brw->batch;
    struct brw_bufmgr *bufmgr = screen->bufmgr;
 
+   /* We can't grow buffers when using softpin, so just overallocate them. */
+   if (brw_using_softpin(bufmgr))
+      size *= 2;
+
    grow->bo = brw_bo_alloc(bufmgr, name, size, memzone);
    grow->bo->kflags |= can_do_exec_capture(screen) ? EXEC_OBJECT_CAPTURE : 0;
    grow->partial_bo = NULL;
@@ -389,6 +393,13 @@ grow_buffer(struct brw_context *brw,
    struct brw_bufmgr *bufmgr = brw->bufmgr;
    struct brw_bo *bo = grow->bo;
 
+   /* We can't grow buffers that are softpinned, as the growing mechanism
+    * involves putting a larger buffer at the same gtt_offset...and we've
+    * only allocated the smaller amount of VMA.  Without relocations, this
+    * simply won't work.  This should never happen, however.
+    */
+   assert(!(bo->kflags & EXEC_OBJECT_PINNED));
+
    perf_debug("Growing %s - ran out of space\n", bo->name);
 
    if (grow->partial_bo) {
@@ -730,7 +741,8 @@ execbuffer(int fd,
       bo->index = -1;
 
       /* Update brw_bo::gtt_offset */
-      if (batch->validation_list[i].offset != bo->gtt_offset) {
+      if (!(bo->kflags & EXEC_OBJECT_PINNED) &&
+          batch->validation_list[i].offset != bo->gtt_offset) {
          DBG("BO %d migrated: 0x%" PRIx64 " -> 0x%llx\n",
              bo->gem_handle, bo->gtt_offset,
              batch->validation_list[i].offset);
@@ -933,6 +945,14 @@ emit_reloc(struct intel_batchbuffer *batch,
 {
    assert(target != NULL);
 
+   if (target->kflags & EXEC_OBJECT_PINNED) {
+      brw_use_pinned_bo(batch, target, reloc_flags & RELOC_WRITE);
+      return target->gtt_offset + target_offset;
+   }
+
+   unsigned int index = add_exec_bo(batch, target);
+   struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index];
+
    if (rlist->reloc_count == rlist->reloc_array_size) {
       rlist->reloc_array_size *= 2;
       rlist->relocs = realloc(rlist->relocs,
@@ -940,9 +960,6 @@ emit_reloc(struct intel_batchbuffer *batch,
                               sizeof(struct drm_i915_gem_relocation_entry));
    }
 
-   unsigned int index = add_exec_bo(batch, target);
-   struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index];
-
    if (reloc_flags & RELOC_32BIT) {
       /* Restrict this buffer to the low 32 bits of the address space.
        *
@@ -976,6 +993,21 @@ emit_reloc(struct intel_batchbuffer *batch,
    return entry->offset + target_offset;
 }
 
+void
+brw_use_pinned_bo(struct intel_batchbuffer *batch, struct brw_bo *bo,
+                  unsigned writable_flag)
+{
+   assert(bo->kflags & EXEC_OBJECT_PINNED);
+   assert((writable_flag & ~EXEC_OBJECT_WRITE) == 0);
+
+   unsigned int index = add_exec_bo(batch, bo);
+   struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index];
+   assert(entry->offset == bo->gtt_offset);
+
+   if (writable_flag)
+      entry->flags |= EXEC_OBJECT_WRITE;
+}
+
 uint64_t
 brw_batch_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
                 struct brw_bo *target, uint32_t target_offset,
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index 7be5b10f3ab..9c3f1e93a87 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -55,6 +55,10 @@ bool brw_batch_references(struct intel_batchbuffer *batch, 
struct brw_bo *bo);
 #define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT
 /* Inverted meaning, but using the same bit...emit_reloc will flip it. */
 #define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS
+
+void brw_use_pinned_bo(struct intel_batchbuffer *batch, struct brw_bo *bo,
+                       unsigned writeable_flag);
+
 uint64_t brw_batch_reloc(struct intel_batchbuffer *batch,
                          uint32_t batch_offset,
                          struct brw_bo *target,
-- 
2.17.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to