Re: [Mesa-dev] [PATCH 08/13] i965: Convert reloc.target_handle into an index for I915_EXEC_HANDLE_LUT

2017-07-20 Thread Chris Wilson
Quoting Kenneth Graunke (2017-07-20 17:57:22)
> On Thursday, July 20, 2017 8:05:19 AM PDT Chris Wilson wrote:
> > Quoting Kenneth Graunke (2017-07-19 23:36:58)
> > > On Wednesday, July 19, 2017 3:09:16 AM PDT Chris Wilson wrote:
> > > >  #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
> > > > @@ -117,21 +125,12 @@ add_exec_bo(struct intel_batchbuffer *batch, 
> > > > struct brw_bo *bo)
> > > >   batch->exec_array_size * 
> > > > sizeof(batch->exec_objects[0]));
> > > > }
> > > >  
> > > > -   struct drm_i915_gem_exec_object2 *validation_entry =
> > > > -  >exec_objects[batch->exec_count];
> > > > -   validation_entry->handle = bo->gem_handle;
> > > > -   if (bo == batch->bo) {
> > > > -  validation_entry->relocation_count = batch->reloc_count;
> > > > -  validation_entry->relocs_ptr = (uintptr_t) batch->relocs;
> > > > -   } else {
> > > > -  validation_entry->relocation_count = 0;
> > > > -  validation_entry->relocs_ptr = 0;
> > > > -   }
> > > > -   validation_entry->alignment = bo->align;
> > > > -   validation_entry->offset = bo->offset64;
> > > > -   validation_entry->flags = bo->kflags;
> > > > -   validation_entry->rsvd1 = 0;
> > > > -   validation_entry->rsvd2 = 0;
> > > > +   struct drm_i915_gem_exec_object2 *exec =
> > > > +  memset(>exec_objects[batch->exec_count], 0, 
> > > > sizeof(*exec));
> > > > +   exec->handle = bo->gem_handle;
> > > > +   exec->alignment = bo->align;
> > > > +   exec->offset = bo->offset64;
> > > > +   exec->flags = bo->kflags;
> > > 
> > > I liked the name "validation_entry" given that we call this the 
> > > "validation
> > > list"...exec matches the struct name better, but I think validation_entry
> > > helps distinguish the two lists...
> > 
> > Hmm, how about
> > 
> > -   struct drm_i915_gem_exec_object2 *exec =
> > -  memset(>exec_objects[batch->exec_count], 0, sizeof(*exec));
> > -   exec->handle = bo->gem_handle;
> > -   exec->alignment = bo->align;
> > -   exec->offset = bo->offset64;
> > -   exec->flags = bo->kflags;
> > +   batch->exec_objects[batch->exec_count] = (struct 
> > drm_i915_gem_exec_object2){
> > +  .handle = bo->gem_handle,
> > +  .alignment = bo->align,
> > +  .offset = bo->offset64,
> > +  .flags = bo->kflags,
> > +   };
> > 
> > and skip the impossible problem of naming?
> > 
> > But we still end up with a couple of 
> >   struct drm_i915_gem_exec_object2 *
> >   validation_entry = >exec_objects[index];
> > Could I just call those exec_object?
> > -Chris
> 
> I'm not objecting too strongly, call it exec or exec_object if you like. 
> The initializer use is pretty nice.
> 
> "validation list" is a bit of a weird name anyway...

As you've seen, I think there's some merit to a distinct name so we
don't get confused with exec_bos, I've settled for

  struct drm_i915_gem_exec_object2 *entry = >validation_list[index];

as that fits into 80cols :)
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/13] i965: Convert reloc.target_handle into an index for I915_EXEC_HANDLE_LUT

2017-07-20 Thread Kenneth Graunke
On Thursday, July 20, 2017 8:05:19 AM PDT Chris Wilson wrote:
> Quoting Kenneth Graunke (2017-07-19 23:36:58)
> > On Wednesday, July 19, 2017 3:09:16 AM PDT Chris Wilson wrote:
> > >  #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
> > > @@ -117,21 +125,12 @@ add_exec_bo(struct intel_batchbuffer *batch, struct 
> > > brw_bo *bo)
> > >   batch->exec_array_size * 
> > > sizeof(batch->exec_objects[0]));
> > > }
> > >  
> > > -   struct drm_i915_gem_exec_object2 *validation_entry =
> > > -  >exec_objects[batch->exec_count];
> > > -   validation_entry->handle = bo->gem_handle;
> > > -   if (bo == batch->bo) {
> > > -  validation_entry->relocation_count = batch->reloc_count;
> > > -  validation_entry->relocs_ptr = (uintptr_t) batch->relocs;
> > > -   } else {
> > > -  validation_entry->relocation_count = 0;
> > > -  validation_entry->relocs_ptr = 0;
> > > -   }
> > > -   validation_entry->alignment = bo->align;
> > > -   validation_entry->offset = bo->offset64;
> > > -   validation_entry->flags = bo->kflags;
> > > -   validation_entry->rsvd1 = 0;
> > > -   validation_entry->rsvd2 = 0;
> > > +   struct drm_i915_gem_exec_object2 *exec =
> > > +  memset(>exec_objects[batch->exec_count], 0, sizeof(*exec));
> > > +   exec->handle = bo->gem_handle;
> > > +   exec->alignment = bo->align;
> > > +   exec->offset = bo->offset64;
> > > +   exec->flags = bo->kflags;
> > 
> > I liked the name "validation_entry" given that we call this the "validation
> > list"...exec matches the struct name better, but I think validation_entry
> > helps distinguish the two lists...
> 
> Hmm, how about
> 
> -   struct drm_i915_gem_exec_object2 *exec =
> -  memset(>exec_objects[batch->exec_count], 0, sizeof(*exec));
> -   exec->handle = bo->gem_handle;
> -   exec->alignment = bo->align;
> -   exec->offset = bo->offset64;
> -   exec->flags = bo->kflags;
> +   batch->exec_objects[batch->exec_count] = (struct 
> drm_i915_gem_exec_object2){
> +  .handle = bo->gem_handle,
> +  .alignment = bo->align,
> +  .offset = bo->offset64,
> +  .flags = bo->kflags,
> +   };
> 
> and skip the impossible problem of naming?
> 
> But we still end up with a couple of 
>   struct drm_i915_gem_exec_object2 *
>   validation_entry = >exec_objects[index];
> Could I just call those exec_object?
> -Chris

I'm not objecting too strongly, call it exec or exec_object if you like. 
The initializer use is pretty nice.

"validation list" is a bit of a weird name anyway...

--Ken

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/13] i965: Convert reloc.target_handle into an index for I915_EXEC_HANDLE_LUT

2017-07-20 Thread Chris Wilson
Quoting Kenneth Graunke (2017-07-19 23:36:58)
> On Wednesday, July 19, 2017 3:09:16 AM PDT Chris Wilson wrote:
> >  #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
> > @@ -117,21 +125,12 @@ add_exec_bo(struct intel_batchbuffer *batch, struct 
> > brw_bo *bo)
> >   batch->exec_array_size * sizeof(batch->exec_objects[0]));
> > }
> >  
> > -   struct drm_i915_gem_exec_object2 *validation_entry =
> > -  >exec_objects[batch->exec_count];
> > -   validation_entry->handle = bo->gem_handle;
> > -   if (bo == batch->bo) {
> > -  validation_entry->relocation_count = batch->reloc_count;
> > -  validation_entry->relocs_ptr = (uintptr_t) batch->relocs;
> > -   } else {
> > -  validation_entry->relocation_count = 0;
> > -  validation_entry->relocs_ptr = 0;
> > -   }
> > -   validation_entry->alignment = bo->align;
> > -   validation_entry->offset = bo->offset64;
> > -   validation_entry->flags = bo->kflags;
> > -   validation_entry->rsvd1 = 0;
> > -   validation_entry->rsvd2 = 0;
> > +   struct drm_i915_gem_exec_object2 *exec =
> > +  memset(>exec_objects[batch->exec_count], 0, sizeof(*exec));
> > +   exec->handle = bo->gem_handle;
> > +   exec->alignment = bo->align;
> > +   exec->offset = bo->offset64;
> > +   exec->flags = bo->kflags;
> 
> I liked the name "validation_entry" given that we call this the "validation
> list"...exec matches the struct name better, but I think validation_entry
> helps distinguish the two lists...

Hmm, how about

-   struct drm_i915_gem_exec_object2 *exec =
-  memset(>exec_objects[batch->exec_count], 0, sizeof(*exec));
-   exec->handle = bo->gem_handle;
-   exec->alignment = bo->align;
-   exec->offset = bo->offset64;
-   exec->flags = bo->kflags;
+   batch->exec_objects[batch->exec_count] = (struct drm_i915_gem_exec_object2){
+  .handle = bo->gem_handle,
+  .alignment = bo->align,
+  .offset = bo->offset64,
+  .flags = bo->kflags,
+   };

and skip the impossible problem of naming?

But we still end up with a couple of 
struct drm_i915_gem_exec_object2 *
validation_entry = >exec_objects[index];
Could I just call those exec_object?
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/13] i965: Convert reloc.target_handle into an index for I915_EXEC_HANDLE_LUT

2017-07-19 Thread Kenneth Graunke
On Wednesday, July 19, 2017 3:09:16 AM PDT Chris Wilson wrote:
> Passing the index of the target buffer via the reloc.target_handle is
> marginally more efficient for the kernel (it can avoid some allocations,
> and can use a direct lookup rather than a hash or search). It is also
> useful for ourselves as we can use the index into our exec_bos for other
> tasks.
> 
> v2: Only enable HANDLE_LUT if we can use BATCH_FIRST and thereby avoid
> a post-processing loop to fixup the relocations.
> v3: Move kernel probing from context creation to screen init.
> Use batch->use_exec_lut as it more descriptive of what's going on (Daniel)
> 
> Signed-off-by: Chris Wilson 
> Cc: Kenneth Graunke 
> Cc: Matt Turner 
> Cc: Jason Ekstrand 
> Cc: Daniel Vetter 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h   |  1 +
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 60 
> +--
>  src/mesa/drivers/dri/i965/intel_screen.c  | 20 +
>  src/mesa/drivers/dri/i965/intel_screen.h  |  5 +++
>  4 files changed, 64 insertions(+), 22 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index ffe4792b73..62ce5e472c 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -452,6 +452,7 @@ struct intel_batchbuffer {
>  
> uint32_t state_batch_offset;
> enum brw_gpu_ring ring;
> +   bool use_exec_lut;
> bool needs_sol_reset;
> bool state_base_address_emitted;
>  
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
> b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 065a9c1c0c..5f9639cd4d 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -62,8 +62,6 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
> struct brw_bufmgr *bufmgr,
> bool has_llc)
>  {
> -   intel_batchbuffer_reset(batch, bufmgr, has_llc);
> -
> if (!has_llc) {
>batch->cpu_map = malloc(BATCH_SZ);
>batch->map = batch->cpu_map;
> @@ -85,6 +83,16 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
>batch->state_batch_sizes =
>   _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare);
> }
> +
> +   struct brw_context *brw = container_of(batch, brw, batch);
> +   /* To use the LUT method for execbuf, we also require placing the batch
> +* first (to simplify our implementation). We require a kernel recent
> +* enough to always support EXEC_LUT_HANDLE, but we must check that
> +* the kernel supports EXEC_BATCH_FIRST.
> +*/
> +   batch->use_exec_lut = brw->screen->kerninfo.has_exec_batch_first;
> +
> +   intel_batchbuffer_reset(batch, bufmgr, has_llc);
>  }
>  
>  #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
> @@ -117,21 +125,12 @@ add_exec_bo(struct intel_batchbuffer *batch, struct 
> brw_bo *bo)
>   batch->exec_array_size * sizeof(batch->exec_objects[0]));
> }
>  
> -   struct drm_i915_gem_exec_object2 *validation_entry =
> -  >exec_objects[batch->exec_count];
> -   validation_entry->handle = bo->gem_handle;
> -   if (bo == batch->bo) {
> -  validation_entry->relocation_count = batch->reloc_count;
> -  validation_entry->relocs_ptr = (uintptr_t) batch->relocs;
> -   } else {
> -  validation_entry->relocation_count = 0;
> -  validation_entry->relocs_ptr = 0;
> -   }
> -   validation_entry->alignment = bo->align;
> -   validation_entry->offset = bo->offset64;
> -   validation_entry->flags = bo->kflags;
> -   validation_entry->rsvd1 = 0;
> -   validation_entry->rsvd2 = 0;
> +   struct drm_i915_gem_exec_object2 *exec =
> +  memset(>exec_objects[batch->exec_count], 0, sizeof(*exec));
> +   exec->handle = bo->gem_handle;
> +   exec->alignment = bo->align;
> +   exec->offset = bo->offset64;
> +   exec->flags = bo->kflags;

I liked the name "validation_entry" given that we call this the "validation
list"...exec matches the struct name better, but I think validation_entry
helps distinguish the two lists...

Moving the relocation count rubbish out to do_flush_locked is a good idea.

>  
> bo->index = batch->exec_count;
> batch->exec_bos[batch->exec_count] = bo;
> @@ -157,6 +156,11 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch,
> }
> batch->map_next = batch->map;
>  
> +   if (batch->use_exec_lut) {
> +  add_exec_bo(batch, batch->bo);
> +  assert(batch->bo->index == 0);
> +   }
> +
> batch->reserved_space = BATCH_RESERVED;
> batch->state_batch_offset = batch->bo->size;
> batch->needs_sol_reset = false;
> @@ -663,15 +667,25 @@ do_flush_locked(struct brw_context *brw, int 
> in_fence_fd, int *out_fence_fd)
>} else {
>   flags |= I915_EXEC_RENDER;
>}
> +
>if 

[Mesa-dev] [PATCH 08/13] i965: Convert reloc.target_handle into an index for I915_EXEC_HANDLE_LUT

2017-07-19 Thread Chris Wilson
Passing the index of the target buffer via the reloc.target_handle is
marginally more efficient for the kernel (it can avoid some allocations,
and can use a direct lookup rather than a hash or search). It is also
useful for ourselves as we can use the index into our exec_bos for other
tasks.

v2: Only enable HANDLE_LUT if we can use BATCH_FIRST and thereby avoid
a post-processing loop to fixup the relocations.
v3: Move kernel probing from context creation to screen init.
Use batch->use_exec_lut as it more descriptive of what's going on (Daniel)

Signed-off-by: Chris Wilson 
Cc: Kenneth Graunke 
Cc: Matt Turner 
Cc: Jason Ekstrand 
Cc: Daniel Vetter 
---
 src/mesa/drivers/dri/i965/brw_context.h   |  1 +
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 60 +--
 src/mesa/drivers/dri/i965/intel_screen.c  | 20 +
 src/mesa/drivers/dri/i965/intel_screen.h  |  5 +++
 4 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index ffe4792b73..62ce5e472c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -452,6 +452,7 @@ struct intel_batchbuffer {
 
uint32_t state_batch_offset;
enum brw_gpu_ring ring;
+   bool use_exec_lut;
bool needs_sol_reset;
bool state_base_address_emitted;
 
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 065a9c1c0c..5f9639cd4d 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -62,8 +62,6 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
struct brw_bufmgr *bufmgr,
bool has_llc)
 {
-   intel_batchbuffer_reset(batch, bufmgr, has_llc);
-
if (!has_llc) {
   batch->cpu_map = malloc(BATCH_SZ);
   batch->map = batch->cpu_map;
@@ -85,6 +83,16 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch,
   batch->state_batch_sizes =
  _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare);
}
+
+   struct brw_context *brw = container_of(batch, brw, batch);
+   /* To use the LUT method for execbuf, we also require placing the batch
+* first (to simplify our implementation). We require a kernel recent
+* enough to always support EXEC_LUT_HANDLE, but we must check that
+* the kernel supports EXEC_BATCH_FIRST.
+*/
+   batch->use_exec_lut = brw->screen->kerninfo.has_exec_batch_first;
+
+   intel_batchbuffer_reset(batch, bufmgr, has_llc);
 }
 
 #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
@@ -117,21 +125,12 @@ add_exec_bo(struct intel_batchbuffer *batch, struct 
brw_bo *bo)
  batch->exec_array_size * sizeof(batch->exec_objects[0]));
}
 
-   struct drm_i915_gem_exec_object2 *validation_entry =
-  >exec_objects[batch->exec_count];
-   validation_entry->handle = bo->gem_handle;
-   if (bo == batch->bo) {
-  validation_entry->relocation_count = batch->reloc_count;
-  validation_entry->relocs_ptr = (uintptr_t) batch->relocs;
-   } else {
-  validation_entry->relocation_count = 0;
-  validation_entry->relocs_ptr = 0;
-   }
-   validation_entry->alignment = bo->align;
-   validation_entry->offset = bo->offset64;
-   validation_entry->flags = bo->kflags;
-   validation_entry->rsvd1 = 0;
-   validation_entry->rsvd2 = 0;
+   struct drm_i915_gem_exec_object2 *exec =
+  memset(>exec_objects[batch->exec_count], 0, sizeof(*exec));
+   exec->handle = bo->gem_handle;
+   exec->alignment = bo->align;
+   exec->offset = bo->offset64;
+   exec->flags = bo->kflags;
 
bo->index = batch->exec_count;
batch->exec_bos[batch->exec_count] = bo;
@@ -157,6 +156,11 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch,
}
batch->map_next = batch->map;
 
+   if (batch->use_exec_lut) {
+  add_exec_bo(batch, batch->bo);
+  assert(batch->bo->index == 0);
+   }
+
batch->reserved_space = BATCH_RESERVED;
batch->state_batch_offset = batch->bo->size;
batch->needs_sol_reset = false;
@@ -663,15 +667,25 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, 
int *out_fence_fd)
   } else {
  flags |= I915_EXEC_RENDER;
   }
+
   if (batch->needs_sol_reset)
 flags |= I915_EXEC_GEN7_SOL_RESET;
 
+  unsigned int index;
+  if (batch->use_exec_lut) {
+ flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT;
+ index = 0;
+  } else {
+ index = add_exec_bo(batch, batch->bo);
+  }
+
+  struct drm_i915_gem_exec_object2 *exec = >exec_objects[index];
+  exec->relocation_count = batch->reloc_count;
+  exec->relocs_ptr = (uintptr_t) batch->relocs;
+
   if (ret == 0) {
  uint32_t hw_ctx = batch->ring == RENDER_RING ? brw->hw_ctx : 0;