For the common path where we want to execute the batch, if we push the
no_hw detection down to the execbuf we can eliminate one loop over all
the execobjects. For the less common path where we don't want to execute
the batch, no_hw was leaving out_fence uninitialised.

Cc: Kenneth Graunke <kenn...@whitecape.org>
Cc: Matt Turner <matts...@gmail.com>
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 116 +++++++++++++-------------
 1 file changed, 56 insertions(+), 60 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 7f5f240597..59b142cc75 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -438,13 +438,6 @@ static void do_batch_dump(struct brw_context *brw) { }
 static void
 brw_new_batch(struct brw_context *brw)
 {
-   /* Unreference any BOs held by the previous batch, and reset counts. */
-   for (int i = 0; i < brw->batch.exec_count; i++) {
-      if (brw->batch.exec_bos[i] != brw->batch.bo) {
-         brw_bo_unreference(brw->batch.exec_bos[i]);
-      }
-      brw->batch.exec_bos[i] = NULL;
-   }
    brw->batch.reloc_count = 0;
    brw->batch.exec_count = 0;
    brw->batch.aperture_space = BATCH_SZ;
@@ -571,21 +564,21 @@ throttle(struct brw_context *brw)
 }
 
 static int
-execbuffer(int fd,
-           struct intel_batchbuffer *batch,
-           uint32_t ctx_id,
-           int used,
+execbuffer(struct brw_context *brw,
            int in_fence,
            int *out_fence,
            int flags)
 {
+   struct intel_batchbuffer *batch = &brw->batch;
    struct drm_i915_gem_execbuffer2 execbuf = {
       .buffers_ptr = (uintptr_t) batch->exec_objects,
       .buffer_count = batch->exec_count,
       .batch_start_offset = 0,
-      .batch_len = used,
+      .batch_len = 4 * USED_BATCH(*batch),
       .flags = flags,
-      .rsvd1 = ctx_id, /* rsvd1 is actually the context ID */
+
+      /* rsvd1 is actually the context ID */
+      .rsvd1 = batch->ring == RENDER_RING ? brw->hw_ctx : 0,
    };
 
    unsigned long cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2;
@@ -601,9 +594,14 @@ execbuffer(int fd,
       execbuf.flags |= I915_EXEC_FENCE_OUT;
    }
 
-   int ret = drmIoctl(fd, cmd, &execbuf);
-   if (ret != 0)
-      ret = -errno;
+   int ret = 0;
+   if (likely(!brw->screen->no_hw)) {
+      __DRIscreen *dri_screen = brw->screen->driScrnPriv;
+      if (unlikely(drmIoctl(dri_screen->fd, cmd, &execbuf)))
+         ret = -errno;
+   } else {
+      out_fence = NULL;
+   }
 
    for (int i = 0; i < batch->exec_count; i++) {
       struct brw_bo *bo = batch->exec_bos[i];
@@ -617,6 +615,11 @@ execbuffer(int fd,
              bo->gem_handle, bo->offset64, batch->exec_objects[i].offset);
          bo->offset64 = batch->exec_objects[i].offset;
       }
+
+      if (batch->exec_bos[i] != batch->bo) {
+         brw_bo_unreference(batch->exec_bos[i]);
+      }
+      batch->exec_bos[i] = NULL;
    }
 
    if (ret == 0 && out_fence != NULL)
@@ -628,7 +631,6 @@ execbuffer(int fd,
 static int
 do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
 {
-   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
    struct intel_batchbuffer *batch = &brw->batch;
    int ret = 0;
 
@@ -644,55 +646,49 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, 
int *out_fence_fd)
       }
    }
 
-   if (!brw->screen->no_hw) {
-      unsigned int flags;
-
-      /* The requirement for using I915_EXEC_NO_RELOC are:
-       *
-       *   The addresses written in the objects must match the corresponding
-       *   reloc.presumed_offset which in turn must match the corresponding
-       *   execobject.offset.
-       *
-       *   Any render targets written to in the batch must be flagged with
-       *   EXEC_OBJECT_WRITE.
-       *
-       *   To avoid stalling, execobject.offset should match the current
-       *   address of that object within the active context.
-       */
-      flags = I915_EXEC_NO_RELOC;
-      if (brw->gen >= 6 && batch->ring == BLT_RING) {
-         flags |= I915_EXEC_BLT;
-      } else {
-         flags |= I915_EXEC_RENDER;
-      }
+   unsigned int flags;
 
-      if (batch->needs_sol_reset)
-        flags |= I915_EXEC_GEN7_SOL_RESET;
-
-      struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[0];
-      assert(exec->handle == batch->bo->gem_handle);
-      exec->relocation_count = batch->reloc_count;
-      exec->relocs_ptr = (uintptr_t) batch->relocs;
-      if (batch->use_exec_lut) {
-         flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT;
-      } else {
-         struct drm_i915_gem_exec_object2 tmp = *exec;
-         unsigned int index = batch->exec_count - 1;
-         *exec = batch->exec_objects[index];
-         batch->exec_objects[index] = tmp;
-      }
+   /* The requirement for using I915_EXEC_NO_RELOC are:
+    *
+    *   The addresses written in the objects must match the corresponding
+    *   reloc.presumed_offset which in turn must match the corresponding
+    *   execobject.offset.
+    *
+    *   Any render targets written to in the batch must be flagged with
+    *   EXEC_OBJECT_WRITE.
+    *
+    *   To avoid stalling, execobject.offset should match the current
+    *   address of that object within the active context.
+    */
+   flags = I915_EXEC_NO_RELOC;
+   if (brw->gen >= 6 && batch->ring == BLT_RING) {
+      flags |= I915_EXEC_BLT;
+   } else {
+      flags |= I915_EXEC_RENDER;
+   }
 
-      if (ret == 0) {
-         uint32_t hw_ctx = batch->ring == RENDER_RING ? brw->hw_ctx : 0;
+   if (batch->needs_sol_reset)
+      flags |= I915_EXEC_GEN7_SOL_RESET;
 
-         ret = execbuffer(dri_screen->fd, batch, hw_ctx,
-                          4 * USED_BATCH(*batch),
-                          in_fence_fd, out_fence_fd, flags);
-      }
+   struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[0];
+   assert(exec->handle == batch->bo->gem_handle);
+   exec->relocation_count = batch->reloc_count;
+   exec->relocs_ptr = (uintptr_t) batch->relocs;
+   if (batch->use_exec_lut) {
+      flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT;
+   } else {
+      struct drm_i915_gem_exec_object2 tmp = *exec;
+      unsigned int index = batch->exec_count - 1;
+      *exec = batch->exec_objects[index];
+      batch->exec_objects[index] = tmp;
+   }
 
-      throttle(brw);
+   if (ret == 0) {
+      ret = execbuffer(brw, in_fence_fd, out_fence_fd, flags);
    }
 
+   throttle(brw);
+
    if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
       do_batch_dump(brw);
 
-- 
2.13.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to