Re: [Mesa-dev] [PATCH 1/2] nouveau: Add basic memory object support

2018-06-21 Thread Ilia Mirkin
Hi Miguel,

Preface: I know little about this ext, so feel free to educate me on
the wrongness of my thinking.

On Thu, Jun 21, 2018 at 10:01 PM, Miguel A. Vico  wrote:
> Add memory object support for nvc0 and nv50
>
> Signed-off-by: Miguel A Vico Moya 
> ---
>  .../drivers/nouveau/nv50/nv50_miptree.c   | 49 +
>  .../drivers/nouveau/nv50/nv50_resource.c  | 52 +++
>  .../drivers/nouveau/nv50/nv50_resource.h  | 33 
>  .../drivers/nouveau/nvc0/nvc0_resource.c  | 22 
>  4 files changed, 146 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
> index f2e304fde6..91007d3dac 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
> @@ -397,13 +397,13 @@ nv50_miptree_create(struct pipe_screen *pscreen,
> return pt;
>  }
>
> -struct pipe_resource *
> -nv50_miptree_from_handle(struct pipe_screen *pscreen,
> - const struct pipe_resource *templ,
> - struct winsys_handle *whandle)
> +static struct pipe_resource *
> +nv50_miptree_from_bo(struct pipe_screen *pscreen,
> + const struct pipe_resource *templ,
> + struct nouveau_bo *bo,
> + uint32_t stride)
>  {
> struct nv50_miptree *mt;
> -   unsigned stride;
>
> /* only supports 2D, non-mipmapped textures for the moment */

Won't this be a drag, since you're supposed to be able to "place" 3d
textures, as well as mip-mapped ones?

The reason I haven't looked at doing VK for nouveau yet is that the
nouveau kernel API does not allow explicit userspace-side VA
management, which would be required to allow something like this. I
believe it would also be required to implement this GL extension. Feel
free to correct my thinking.

Cheers,

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] nouveau: Add basic memory object support

2018-06-21 Thread Miguel A. Vico
Add memory object support for nvc0 and nv50

Signed-off-by: Miguel A Vico Moya 
---
 .../drivers/nouveau/nv50/nv50_miptree.c   | 49 +
 .../drivers/nouveau/nv50/nv50_resource.c  | 52 +++
 .../drivers/nouveau/nv50/nv50_resource.h  | 33 
 .../drivers/nouveau/nvc0/nvc0_resource.c  | 22 
 4 files changed, 146 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c 
b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
index f2e304fde6..91007d3dac 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -397,13 +397,13 @@ nv50_miptree_create(struct pipe_screen *pscreen,
return pt;
 }
 
-struct pipe_resource *
-nv50_miptree_from_handle(struct pipe_screen *pscreen,
- const struct pipe_resource *templ,
- struct winsys_handle *whandle)
+static struct pipe_resource *
+nv50_miptree_from_bo(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ,
+ struct nouveau_bo *bo,
+ uint32_t stride)
 {
struct nv50_miptree *mt;
-   unsigned stride;
 
/* only supports 2D, non-mipmapped textures for the moment */
if ((templ->target != PIPE_TEXTURE_2D &&
@@ -417,11 +417,8 @@ nv50_miptree_from_handle(struct pipe_screen *pscreen,
if (!mt)
   return NULL;
 
-   mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, );
-   if (mt->base.bo == NULL) {
-  FREE(mt);
-  return NULL;
-   }
+   nouveau_bo_ref(bo, >base.bo);
+
mt->base.domain = mt->base.bo->flags & NOUVEAU_BO_APER;
mt->base.address = mt->base.bo->offset;
 
@@ -439,6 +436,38 @@ nv50_miptree_from_handle(struct pipe_screen *pscreen,
return >base.base;
 }
 
+struct pipe_resource *
+nv50_miptree_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+   struct pipe_resource *resource;
+   struct nouveau_bo *bo;
+   uint32_t stride;
+
+   bo = nouveau_screen_bo_from_handle(pscreen, whandle, );
+   if (bo == NULL) {
+  return NULL;
+   }
+
+   resource = nv50_miptree_from_bo(pscreen, templ, bo, stride);
+
+   /* nv50_miptree_from_bo will increment bo's refcount if succeeded */
+   nouveau_bo_ref(NULL, );
+
+   return resource;
+}
+
+struct pipe_resource *
+nv50_miptree_from_memobj(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ,
+ struct pipe_memory_object *memobj)
+{
+   struct nv50_memory_object *mo = nv50_memory_object(memobj);
+
+   return nv50_miptree_from_bo(pscreen, templ, mo->bo, mo->stride);
+}
+
 
 /* Offset of zslice @z from start of level @l. */
 inline unsigned
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.c 
b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
index aed8c6241d..2a93c8820e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_resource.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
@@ -91,6 +91,55 @@ nv50_invalidate_resource(struct pipe_context *pipe, struct 
pipe_resource *res)
   nouveau_buffer_invalidate(pipe, res);
 }
 
+struct pipe_resource *
+nv50_resource_from_memobj(struct pipe_screen *screen,
+  const struct pipe_resource *templ,
+  struct pipe_memory_object *memobj,
+  uint64_t offset)
+{
+   if (offset != 0) {
+  debug_printf("%s: attempt to import unsupported winsys offset %lu\n",
+   __FUNCTION__, offset);
+  return NULL;
+   }
+
+   if (templ->target == PIPE_BUFFER)
+  return NULL;
+   else
+  return nv50_miptree_from_memobj(screen, templ, memobj);
+}
+
+struct pipe_memory_object *
+nv50_memobj_from_handle(struct pipe_screen *screen,
+struct winsys_handle *whandle,
+bool dedicated)
+{
+   struct nv50_memory_object *mo;
+
+   mo = CALLOC_STRUCT(nv50_memory_object);
+   if (!mo)
+  return NULL;
+
+   mo->bo = nouveau_screen_bo_from_handle(screen, whandle, >stride);
+   if (mo->bo == NULL) {
+  FREE(mo);
+  return NULL;
+   }
+   mo->base.dedicated = dedicated;
+
+   return >base;
+}
+
+void
+nv50_memobj_destroy(struct pipe_screen *screen,
+struct pipe_memory_object *memobj)
+{
+   struct nv50_memory_object *mo = nv50_memory_object(memobj);
+
+   nouveau_bo_ref(NULL, >bo);
+   FREE(mo);
+}
+
 void
 nv50_init_resource_functions(struct pipe_context *pcontext)
 {
@@ -111,4 +160,7 @@ nv50_screen_init_resource_functions(struct pipe_screen 
*pscreen)
pscreen->resource_from_handle = nv50_resource_from_handle;
pscreen->resource_get_handle = u_resource_get_handle_vtbl;
pscreen->resource_destroy = u_resource_destroy_vtbl;
+   pscreen->resource_from_memobj = nv50_resource_from_memobj;
+   pscreen->memobj_create_from_handle = 

[Mesa-dev] [PATCH 2/2] nouveau: Enable support for EXT_external_objects

2018-06-21 Thread Miguel A. Vico
Enable EXT_external_objects for nvc0 and nv50

Signed-off-by: Miguel A Vico Moya 
---
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 3a3c43b774..e5babd5580 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -201,6 +201,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_CLOCK:
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+   case PIPE_CAP_MEMOBJ:
   return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
   return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -273,7 +274,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_BINDLESS_TEXTURE:
case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
case PIPE_CAP_QUERY_SO_OVERFLOW:
-   case PIPE_CAP_MEMOBJ:
case PIPE_CAP_LOAD_CONSTBUF:
case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
case PIPE_CAP_TILE_RASTER_ORDER:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 02890c7165..ce344e33c5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -259,6 +259,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_QUERY_SO_OVERFLOW:
+   case PIPE_CAP_MEMOBJ:
   return 1;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
   return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
@@ -309,7 +310,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
-   case PIPE_CAP_MEMOBJ:
case PIPE_CAP_LOAD_CONSTBUF:
case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
case PIPE_CAP_TILE_RASTER_ORDER:
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] nouveau: Add support for EXT_external_objects

2018-06-21 Thread Miguel A. Vico
Hi,

These patches have been sitting in my local tree for some time now. They
are kind of related to the Generic Allocator work, but not specific to it.

James Jones's original kmscube port
(https://gitlab.freedesktop.org/allocator/kmscube) relies on the
EXT_external_objects extension to import allocator allocations to OpenGL as a
texture object. However, the Nouveau implementation of these mechanisms is
missing in Mesa.

These two patches will implement and enable the extension on both nv50 and nvc0.

Hoping to get them merged as I believe they might be useful regardless.

You can also check these changes on Gitlab here:

  
https://gitlab.freedesktop.org/mvicomoya/mesa/tree/wip/EXT_external_objects-nouveau

Thanks,
Miguel.


Miguel A. Vico (2):
  nouveau: Add basic memory object support
  nouveau: Enable support for EXT_external_objects

 src/gallium/drivers/nouveau/nv50/nv50_miptree.c  | 49 
+++--
 src/gallium/drivers/nouveau/nv50/nv50_resource.c | 52 

 src/gallium/drivers/nouveau/nv50/nv50_resource.h | 33 
+
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_resource.c | 22 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   |  2 +-
 6 files changed, 148 insertions(+), 12 deletions(-)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/nir: Disable varying packing when doing transform feedback.

2018-06-21 Thread Eric Anholt
Timothy Arceri  writes:

> nir_compact_varyings() is meant to skip over varyings used by xfb:
>
>   /* We can't repack xfb varyings. */
>   if (var->data.always_active_io)
>  continue;
>
> Any idea why that isn't working in this case?

Looks like GLSL IR has that flag wrong.  points.7 has v_var6,7,8,9
transform feedback output, but the IR says:

decl_var shader_out INTERP_MODE_NONE SOMEACT vec4 gl_Position 
(VARYING_SLOT_POS, 0, 0)
decl_var shader_out INTERP_MODE_NONE SOMEACT float gl_PointSize 
(VARYING_SLOT_PSIZ.x, 0, 0)
decl_var shader_out INTERP_MODE_FLAT ALWAYSACT  vec4[1] v_var6 
(VARYING_SLOT_VAR0, 0, 0)
decl_var shader_out INTERP_MODE_FLAT ALWAYSACT  ivec3 packed:v_var8 
(VARYING_SLOT_VAR7.xyz, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var0 
(VARYING_SLOT_VAR3.x, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var0@0 
(VARYING_SLOT_VAR3.y, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var0@1 
(VARYING_SLOT_VAR3.z, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var1 
(VARYING_SLOT_VAR4.x, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var1@2 
(VARYING_SLOT_VAR4.y, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var1@3 
(VARYING_SLOT_VAR4.z, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var10,v_var9 
(VARYING_SLOT_VAR1.x, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var10,v_var9@4 
(VARYING_SLOT_VAR1.y, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var10,v_var9@5 
(VARYING_SLOT_VAR1.z, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var10,v_var9@6 
(VARYING_SLOT_VAR1.w, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var11[0] 
(VARYING_SLOT_VAR5.x, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var11[0]@7 
(VARYING_SLOT_VAR5.y, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var11[0]@8 
(VARYING_SLOT_VAR5.z, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int 
packed:v_var2,v_var5,v_var7[0],v_var7[1] (VARYING_SLOT_VAR2.x, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int 
packed:v_var2,v_var5,v_var7[0],v_var7[1]@9 (VARYING_SLOT_VAR2.y, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int 
packed:v_var2,v_var5,v_var7[0],v_var7[1]@10 (VARYING_SLOT_VAR2.z, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int 
packed:v_var2,v_var5,v_var7[0],v_var7[1]@11 (VARYING_SLOT_VAR2.w, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var3 
(VARYING_SLOT_VAR6.x, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var3@12 
(VARYING_SLOT_VAR6.y, 0, 0)
decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var3@13 
(VARYING_SLOT_VAR6.z, 0, 0)
decl_var centroid shader_out INTERP_MODE_NONE SOMEACT float v_var4 
(VARYING_SLOT_VAR8.x, 0, 0)
decl_var centroid shader_out INTERP_MODE_NONE SOMEACT float v_var4@14 
(VARYING_SLOT_VAR8.y, 0, 0)
decl_var centroid shader_out INTERP_MODE_NONE SOMEACT float v_var4@15 
(VARYING_SLOT_VAR8.z, 0, 0)
decl_var centroid shader_out INTERP_MODE_NONE SOMEACT float v_var4@16 
(VARYING_SLOT_VAR8.w, 0, 0)

However, even if I make lower_packed_varyings.cpp flag those as always
active, I still get my varyings reordered if I revert my change to not
call nir_compact_varyings:

 decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var0 
(VARYING_SLOT_VAR12.x, 5, 0)
 decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var0@0 
(VARYING_SLOT_VAR12.y, 5, 0)
 decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var0@1 
(VARYING_SLOT_VAR12.z, 5, 0)
-decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var1 
(VARYING_SLOT_VAR13.x, 6, 0)
-decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var1@2 
(VARYING_SLOT_VAR13.y, 6, 0)
+decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var1 
(VARYING_SLOT_VAR12.w, 5, 0)
+decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var1@2 
(VARYING_SLOT_VAR13.x, 6, 0)
 decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var1@3 
(VARYING_SLOT_VAR13.z, 6, 0)
-decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var11[0] 
(VARYING_SLOT_VAR14.x, 7, 0)
-decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var11[0]@4 
(VARYING_SLOT_VAR14.y, 7, 0)
-decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var11[0]@5 
(VARYING_SLOT_VAR14.z, 7, 0)
-decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var3 
(VARYING_SLOT_VAR15.x, 8, 0)
-decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var3@6 
(VARYING_SLOT_VAR15.y, 8, 0)
-decl_var shader_out INTERP_MODE_FLAT SOMEACT int packed:v_var3@7 
(VARYING_SLOT_VAR15.z, 8, 0)
-decl_var shader_out INTERP_MODE_FLAT ALWAYSACT  ivec3 packed:v_var8 
(VARYING_SLOT_VAR16.xyz, 9, 0)
-decl_var centroid shader_out INTERP_MODE_NONE SOMEACT float v_var4 
(VARYING_SLOT_VAR17.x, 10, 0)
-decl_var centroid shader_out INTERP_MODE_NONE SOMEACT float v_var4@8 
(VARYING_SLOT_VAR17.y, 10, 0)
-decl_var 

Re: [Mesa-dev] [PATCH 03/23] intel/eu: Use brw_set_desc() along with a helper to set common descriptor controls.

2018-06-21 Thread Kenneth Graunke
On Thursday, June 21, 2018 2:59:30 PM PDT Francisco Jerez wrote:
> Kenneth Graunke  writes:
> 
> > On Monday, June 11, 2018 7:25:55 PM PDT Francisco Jerez wrote:
> >> This replaces brw_set_message_descriptor() with the composition of
> >> brw_set_desc() and a new inline helper function that packs the common
> >> message descriptor controls into an integer.  The goal is to represent
> >> all message descriptors as a 32-bit integer which is written at once
> >> into the instruction, which is more flexible (SENDS anyone?), robust
> >> (see d2eecf0b0b24d203d0f171807681dffd830d54de fixing an issue
> >> ultimately caused by some bits of the extended message descriptor
> >> being left undefined) and future-proof than the current approach of
> >> specifying the individual descriptor fields directly into the
> >> instruction.
> >> 
> >> This approach also seems more self-documenting, since it will allow
> >> removing calls to functions with way too many arguments like
> >> brw_set_*_message() and brw_send_indirect_message(), and instead
> >> provide a single descriptor argument constructed from an appropriate
> >> combination of brw_*_desc() helpers.
> >> 
> >> Note that because brw_set_message_descriptor() was (conditionally?)
> >> overriding fields of the instruction which strictly speaking weren't
> >> part of the message descriptor, this involves calling
> >> brw_inst_set_sfid() and brw_inst_set_eot() in some cases in addition
> >> to brw_set_desc().
> >> ---
> >>  src/intel/compiler/brw_eu.h   |  29 +---
> >>  src/intel/compiler/brw_eu_emit.c  | 108 
> >> +++---
> >>  src/intel/compiler/brw_vec4_generator.cpp |  17 +++--
> >>  3 files changed, 68 insertions(+), 86 deletions(-)
> >> 
> >> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
> >> index 5a396339fde..b2b20713e45 100644
> >> --- a/src/intel/compiler/brw_eu.h
> >> +++ b/src/intel/compiler/brw_eu.h
> >> @@ -256,14 +256,6 @@ void brw_set_sampler_message(struct brw_codegen *p,
> >>   unsigned simd_mode,
> >>   unsigned return_format);
> >>  
> >> -void brw_set_message_descriptor(struct brw_codegen *p,
> >> -brw_inst *inst,
> >> -enum brw_message_target sfid,
> >> -unsigned msg_length,
> >> -unsigned response_length,
> >> -bool header_present,
> >> -bool end_of_thread);
> >> -
> >>  void brw_set_dp_read_message(struct brw_codegen *p,
> >> brw_inst *insn,
> >> unsigned binding_table_index,
> >> @@ -287,6 +279,27 @@ void brw_set_dp_write_message(struct brw_codegen *p,
> >>  unsigned end_of_thread,
> >>  unsigned send_commit_msg);
> >>  
> >> +/**
> >> + * Construct a message descriptor immediate with the specified common
> >> + * descriptor controls.
> >> + */
> >> +static inline uint32_t
> >> +brw_message_desc(const struct gen_device_info *devinfo,
> >> + unsigned msg_length,
> >> + unsigned response_length,
> >> + bool header_present)
> >> +{
> >
> > Perhaps it would be good to add
> >
> >   assert(msg_length >= 1 && msg_length <= 15);
> >
> >> +   if (devinfo->gen >= 5) {
> >
> >   assert(response_length <= 16);
> >
> >
> >> +  return (msg_length << 25 |
> >> +  response_length << 20 |
> >> +  header_present << 19);
> >> +   } else {
> >
> >   assert(response_length <= 8);
> >
> > I'm not so concerned with validating the values here, just thinking it
> > might make sense to verify that mlen fits in a U4, for example, so we
> > don't accidentally bleed over into other fields when encoding it.
> >
> 
> It's kind of a PITA to assert that each field is in range manually for
> each one of these helpers (the following patches introduce a pile of
> functions very much like this one), and verifying that the assertions
> are complete and match the definition of the hardware fields is not
> straightforward to review.  I would have used the SET_FIELD() macro if
> it wasn't because it relies on macros being defined with specific
> suffixes for each field.  If you believe it's going to be valuable I
> think I'm going to introduce a new helper more easily reusable than
> SET_FIELD() checking for overflow based on a bitfield specification, and
> use it instead of the plain left-shift operators.

I'm fine with landing things as is, but I do think it would be valuable
to assert that the values are in range.  At the very least, brw_inst has
historically done that, and genxml's similar asserts have caught all
kinds of problems.  I definitely agree that doing this ad-hoc gets
messy, and adding a new macro would help a lot.

You could combine it with the shift, or else just do:


[Mesa-dev] [PATCH 1/3] nv50/ir: optimize slct(b, c, set(a, 0)) to slct(b, c, a)

2018-06-21 Thread Karol Herbst
From: Karol Herbst 

helps mainly feral ported games

shader-db changes:
total instructions in shared programs : 5751418 -> 5730139 (-0.37%)
total gprs used in shared programs: 664007 -> 663206 (-0.12%)
total shared used in shared programs  : 548832 -> 548832 (0.00%)
total local used in shared programs   : 20956 -> 20956 (0.00%)
total bytes used in shared programs   : 61439848 -> 61212472 (-0.37%)

local sharedgpr   inst  bytes
helped   0   0 67337463746
  hurt   0   0  49   8   8

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_peephole.cpp  | 47 +--
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 39177bd044b..32c6f9ee7af 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1664,7 +1664,8 @@ private:
bool tryADDToMADOrSAD(Instruction *, operation toOp);
void handleMINMAX(Instruction *);
void handleRCP(Instruction *);
-   void handleSLCT(Instruction *);
+   void handleSLCT(CmpInstruction *);
+   bool tryMergeSLCTSET(CmpInstruction *slct, CmpInstruction *set);
void handleLOGOP(Instruction *);
void handleCVT_NEG(Instruction *);
void handleCVT_CVT(Instruction *);
@@ -1847,8 +1848,12 @@ AlgebraicOpt::handleRCP(Instruction *rcp)
 }
 
 void
-AlgebraicOpt::handleSLCT(Instruction *slct)
+AlgebraicOpt::handleSLCT(CmpInstruction *slct)
 {
+   Instruction *insn = slct->getSrc(2)->getInsn();
+   while(insn && insn->op == OP_SET && tryMergeSLCTSET(slct, insn->asCmp())) {
+  insn = slct->getSrc(2)->getInsn();
+   }
if (slct->getSrc(2)->reg.file == FILE_IMMEDIATE) {
   if (slct->getSrc(2)->asImm()->compare(slct->asCmp()->setCond, 0.0f))
  slct->setSrc(0, slct->getSrc(1));
@@ -1861,6 +1866,42 @@ AlgebraicOpt::handleSLCT(Instruction *slct)
slct->setSrc(2, NULL);
 }
 
+bool
+AlgebraicOpt::tryMergeSLCTSET(CmpInstruction *slct, CmpInstruction *set)
+{
+   assert(slct->op == OP_SLCT && set->op == OP_SET);
+
+   if (typeSizeof(set->sType) != 4)
+  return false;
+
+   CondCode setCC = set->getCondition();
+   CondCode slctCC = slct->getCondition();
+   CondCode newCC = setCC;
+
+   if (slctCC != CC_NE && slctCC != CC_EQ)
+  return false;
+
+   ImmediateValue imm0;
+   int s;
+
+   if (set->src(0).getImmediate(imm0) && imm0.isInteger(0))
+  s = 1;
+   else if (set->src(1).getImmediate(imm0) && imm0.isInteger(0))
+  s = 0;
+   else
+  return false;
+
+   slct->setSrc(2, set->getSrc(s));
+   if (s)
+  newCC = reverseCondCode(newCC);
+   if (slctCC == CC_EQ)
+  newCC = inverseCondCode(newCC);
+
+   slct->sType = set->sType;
+   slct->setCondition(newCC);
+   return true;
+}
+
 void
 AlgebraicOpt::handleLOGOP(Instruction *logop)
 {
@@ -2196,7 +2237,7 @@ AlgebraicOpt::visit(BasicBlock *bb)
  handleMINMAX(i);
  break;
   case OP_SLCT:
- handleSLCT(i);
+ handleSLCT(i->asCmp());
  break;
   case OP_AND:
   case OP_OR:
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] nv50/ir: convert slct with boolean result to set

2018-06-21 Thread Karol Herbst
From: Karol Herbst 

helps mainly feral ported games

changes in shader-db:
total instructions in shared programs : 5730139 -> 5726007 (-0.07%)
total gprs used in shared programs: 663206 -> 663147 (-0.01%)
total shared used in shared programs  : 548832 -> 548832 (0.00%)
total local used in shared programs   : 20956 -> 20956 (0.00%)
total bytes used in shared programs   : 61212472 -> 61168408 (-0.07%)

local sharedgpr   inst  bytes
helped   0   0  6110871087
  hurt   0   0   4   0   0

Signed-off-by: Karol Herbst 
---
 .../nouveau/codegen/nv50_ir_peephole.cpp  | 50 +++
 1 file changed, 50 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 3a4d7e27ef7..3c1ee7f92f5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -345,6 +345,8 @@ private:
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
void opnd(Instruction *, ImmediateValue&, int s);
+   // 3 srcs where 1st and 2nd are immediates
+   void opnd(Instruction *, ImmediateValue&, ImmediateValue&);
void opnd3(Instruction *, ImmediateValue&);
 
void unary(Instruction *, const ImmediateValue&);
@@ -400,6 +402,10 @@ ConstantFolding::visit(BasicBlock *bb)
  opnd(i, src1, 1);
   if (i->srcExists(2) && i->src(2).getImmediate(src2))
  opnd3(i, src2);
+  else
+  if (i->srcExists(2) &&
+  i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1))
+ opnd(i, src0, src1);
}
return true;
 }
@@ -902,6 +908,50 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
}
 }
 
+void
+ConstantFolding::opnd(Instruction *i, ImmediateValue , ImmediateValue 
)
+{
+   const Storage  = imm0.reg;
+   const Storage  = imm1.reg;
+
+   switch (i->op) {
+   case OP_SLCT: {
+  CmpInstruction *slct = i->asCmp();
+  if (a.data.u32 == 0x && b.data.u32 == 0x0) {
+ slct->setSrc(0, slct->getSrc(2));
+ slct->setSrc(2, NULL);
+ slct->dType = TYPE_U32;
+ slct->op = OP_SET;
+  }
+  else if (a.data.u32 == 0x3f80 && b.data.u32 == 0x0) {
+ slct->setSrc(0, slct->getSrc(2));
+ slct->setSrc(2, NULL);
+ slct->dType = TYPE_F32;
+ slct->op = OP_SET;
+  }
+  else if (a.data.u32 == 0x0 && b.data.u32 == 0x) {
+ slct->swapSources(0, 1);
+ slct->setSrc(0, slct->getSrc(2));
+ slct->setSrc(2, NULL);
+ slct->dType = TYPE_U32;
+ slct->setCondition(inverseCondCode(slct->getCondition()));
+ slct->op = OP_SET;
+  }
+  else if (a.data.u32 == 0x0 && b.data.u32 == 0x3f80) {
+ slct->swapSources(0, 1);
+ slct->setSrc(0, slct->getSrc(2));
+ slct->setSrc(2, NULL);
+ slct->dType = TYPE_F32;
+ slct->setCondition(inverseCondCode(slct->getCondition()));
+ slct->op = OP_SET;
+  }
+  break;
+   }
+   default:
+  break;
+   }
+}
+
 void
 ConstantFolding::opnd3(Instruction *i, ImmediateValue )
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] nv50/ir: clean up src2 in ConstantFolding

2018-06-21 Thread Karol Herbst
Fixes a crash triggered by the next patch.

no changes in shader-db

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 32c6f9ee7af..3a4d7e27ef7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -730,6 +730,7 @@ ConstantFolding::expr(Instruction *i,
   i->op = i->saturate ? OP_SAT : OP_MOV;
   if (i->saturate)
  unary(i, *i->getSrc(0)->asImm());
+  i->setSrc(2, NULL);
   break;
}
i->subOp = 0;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/3] nv50/ir: Some set+slct based optimization

2018-06-21 Thread Karol Herbst
Doesn't break on games I tested with and piglit was okay with that as well.

Plan to test this on my Kepler card later.

total changes in shader-db:

total instructions in shared programs : 5751418 -> 5726007 (-0.44%)
total gprs used in shared programs: 664007 -> 663147 (-0.13%)
total shared used in shared programs  : 548832 -> 548832 (0.00%)
total local used in shared programs   : 20956 -> 20956 (0.00%)
total bytes used in shared programs   : 61439848 -> 61168408 (-0.44%)

local sharedgpr   inst  bytes 
helped   0   0 71937663766 
  hurt   0   0  39   8   8

Karol Herbst (3):
  nv50/ir: optimize slct(b, c, set(a, 0)) to slct(b, c, a)
  nv50/ir: clean up src2 in ConstantFolding
  nv50/ir: convert slct with boolean result to set

 .../nouveau/codegen/nv50_ir_peephole.cpp  | 98 ++-
 1 file changed, 95 insertions(+), 3 deletions(-)

-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: Disable __gen_validate_value if NDEBUG is set.

2018-06-21 Thread Eric Anholt
Kenneth Graunke  writes:

> We were enabling undefined memory checking for genxml values based on
> Valgrind being installed at build time, even for release builds.  This
> generates piles and piles of assembly whenever you touch genxml.
>
> With gcc 7.3.1 and -O3 and -march=native on a Kabylake with Valgrind
> installed at build time:
>
>   textdatabss dechex filename
>5978385  262884  13488 6254757 5f70a5 libvulkan_intel.so
>3799377  262884  13488 4075749 3e30e5 libvulkan_intel.so
>
> That's a 36% reduction in text size.
> ---
>  src/intel/vulkan/anv_private.h | 2 ++
>  1 file changed, 2 insertions(+)
>
>  Eric,
>
>  You probably will want to make the equivalent change in
>  src/broadcom/cle/v3d_packet_helpers.h

Thanks for the cc on this one -- it paid off for v3d, too.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/23] intel: Assorted code generation improvements.

2018-06-21 Thread Kenneth Graunke
On Monday, June 11, 2018 7:25:52 PM PDT Francisco Jerez wrote:
> This series contains a few code generation refactors and clean-ups
> that are starting to pile up in an internal branch.  By itself this
> series shouldn't cause any functional changes, but there's no reason
> why it couldn't be reviewed already which would lessen our pain
> keeping the branch up-to-date.
> 
> Patches 1-16 rework the mechanism used in most of the back-end code
> generator to construct message descriptors in-place as part of the
> immediate of either a SEND(C) instruction or some other ALU
> instruction, which won't work as soon as we introduce support for the
> SENDS instruction and has proven to be rather fragile (see
> d2eecf0b0b24d203d0f171807681dffd830d54de), among other disadvantages.
> 
> Patches 18-22 drop the hard-coded correspondence between i965 IR
> opcodes and native hardware opcodes, which can fail horribly if the
> back-end compiler ever needs to use more than one of the hardware
> instructions with the same opcode (simultaneously or not), or if the
> opcode of any instruction is reassigned.  Fixing this involves
> reworking the opcode description tables to allow efficient two-way
> look-up by either IR or hardware opcode.

Patches 1-19 are:
Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 19/23] intel/eu: Fix up various type conversions in brw_eu.c that are illegal C++.

2018-06-21 Thread Kenneth Graunke
On Monday, June 11, 2018 7:26:11 PM PDT Francisco Jerez wrote:
> diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c
> index 6ef0a6a577c..80f05240b42 100644
> --- a/src/intel/compiler/brw_eu.c
> +++ b/src/intel/compiler/brw_eu.c
> @@ -356,14 +356,14 @@ brw_disassemble(const struct gen_device_info *devinfo,
> bool dump_hex = (INTEL_DEBUG & DEBUG_HEX) != 0;
>  
> for (int offset = start; offset < end;) {
> -  const brw_inst *insn = assembly + offset;
> +  const brw_inst *insn = (const brw_inst *)((char *)assembly + offset);

This isn't standard C either, but void * math is a pretty common
compiler extension that's been around forever.  Personally, I'd
prefer that we keep using it, as it's much more readable.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/23] intel/eu: Use brw_set_desc() along with a helper to set common descriptor controls.

2018-06-21 Thread Francisco Jerez
Kenneth Graunke  writes:

> On Monday, June 11, 2018 7:25:55 PM PDT Francisco Jerez wrote:
>> This replaces brw_set_message_descriptor() with the composition of
>> brw_set_desc() and a new inline helper function that packs the common
>> message descriptor controls into an integer.  The goal is to represent
>> all message descriptors as a 32-bit integer which is written at once
>> into the instruction, which is more flexible (SENDS anyone?), robust
>> (see d2eecf0b0b24d203d0f171807681dffd830d54de fixing an issue
>> ultimately caused by some bits of the extended message descriptor
>> being left undefined) and future-proof than the current approach of
>> specifying the individual descriptor fields directly into the
>> instruction.
>> 
>> This approach also seems more self-documenting, since it will allow
>> removing calls to functions with way too many arguments like
>> brw_set_*_message() and brw_send_indirect_message(), and instead
>> provide a single descriptor argument constructed from an appropriate
>> combination of brw_*_desc() helpers.
>> 
>> Note that because brw_set_message_descriptor() was (conditionally?)
>> overriding fields of the instruction which strictly speaking weren't
>> part of the message descriptor, this involves calling
>> brw_inst_set_sfid() and brw_inst_set_eot() in some cases in addition
>> to brw_set_desc().
>> ---
>>  src/intel/compiler/brw_eu.h   |  29 +---
>>  src/intel/compiler/brw_eu_emit.c  | 108 
>> +++---
>>  src/intel/compiler/brw_vec4_generator.cpp |  17 +++--
>>  3 files changed, 68 insertions(+), 86 deletions(-)
>> 
>> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
>> index 5a396339fde..b2b20713e45 100644
>> --- a/src/intel/compiler/brw_eu.h
>> +++ b/src/intel/compiler/brw_eu.h
>> @@ -256,14 +256,6 @@ void brw_set_sampler_message(struct brw_codegen *p,
>>   unsigned simd_mode,
>>   unsigned return_format);
>>  
>> -void brw_set_message_descriptor(struct brw_codegen *p,
>> -brw_inst *inst,
>> -enum brw_message_target sfid,
>> -unsigned msg_length,
>> -unsigned response_length,
>> -bool header_present,
>> -bool end_of_thread);
>> -
>>  void brw_set_dp_read_message(struct brw_codegen *p,
>>   brw_inst *insn,
>>   unsigned binding_table_index,
>> @@ -287,6 +279,27 @@ void brw_set_dp_write_message(struct brw_codegen *p,
>>unsigned end_of_thread,
>>unsigned send_commit_msg);
>>  
>> +/**
>> + * Construct a message descriptor immediate with the specified common
>> + * descriptor controls.
>> + */
>> +static inline uint32_t
>> +brw_message_desc(const struct gen_device_info *devinfo,
>> + unsigned msg_length,
>> + unsigned response_length,
>> + bool header_present)
>> +{
>
> Perhaps it would be good to add
>
>   assert(msg_length >= 1 && msg_length <= 15);
>
>> +   if (devinfo->gen >= 5) {
>
>   assert(response_length <= 16);
>
>
>> +  return (msg_length << 25 |
>> +  response_length << 20 |
>> +  header_present << 19);
>> +   } else {
>
>   assert(response_length <= 8);
>
> I'm not so concerned with validating the values here, just thinking it
> might make sense to verify that mlen fits in a U4, for example, so we
> don't accidentally bleed over into other fields when encoding it.
>

It's kind of a PITA to assert that each field is in range manually for
each one of these helpers (the following patches introduce a pile of
functions very much like this one), and verifying that the assertions
are complete and match the definition of the hardware fields is not
straightforward to review.  I would have used the SET_FIELD() macro if
it wasn't because it relies on macros being defined with specific
suffixes for each field.  If you believe it's going to be valuable I
think I'm going to introduce a new helper more easily reusable than
SET_FIELD() checking for overflow based on a bitfield specification, and
use it instead of the plain left-shift operators.

> I suppose the validator already catches this, though...
>

I don't think the validator will be able to catch such cases in general.

>> +  return (msg_length << 20 |
>> +  response_length << 16);
>> +   }
>> +}


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Requests for Proposal for hosting XDC 2019

2018-06-21 Thread Daniel Vetter
On Thu, Jun 21, 2018 at 11:16 PM, Daniel Vetter  wrote:
> Hi all,
>
> The X.org board is soliciting proposals to host XDC in 2019. By the usual
> rotation a location in (North) America is preferred, but the board will also
> consider other locations, especially if there's an interesting co-location
> with another conference.
>
> If you consider hosting XDC, we have assembled a wiki page with what's
> generally expected and needed:
>
> https://www.x.org/wiki/Events/RFP/
>
> If possible the board would like to decide on the next location at XDC
> 2017 in Mountain View, please submit your proposal with at least the key

^^ should be XDC 2018 in La Coruna ofc.

So much for not properly updating the template again this year :-)
-Daniel

> information about location, possible dates and estimated costs to
> bo...@foundation.x.org latest by 31th August. An early quick heads-up
> to the board if you consider hosting would also be good, in case we
> need to adjust the schedule a bit. Also earlier is better since in
> generally there
> will be a bit of Q with organizers.
>
> And if you just have some questions about what organizing XDC entails,
> please feel free to chat with a previous organizers, or with someone from
> the board.
>
> Thanks, Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch



-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Requests for Proposal for hosting XDC 2019

2018-06-21 Thread Daniel Vetter
Hi all,

The X.org board is soliciting proposals to host XDC in 2019. By the usual
rotation a location in (North) America is preferred, but the board will also
consider other locations, especially if there's an interesting co-location
with another conference.

If you consider hosting XDC, we have assembled a wiki page with what's
generally expected and needed:

https://www.x.org/wiki/Events/RFP/

If possible the board would like to decide on the next location at XDC
2017 in Mountain View, please submit your proposal with at least the key
information about location, possible dates and estimated costs to
bo...@foundation.x.org latest by 31th August. An early quick heads-up
to the board if you consider hosting would also be good, in case we
need to adjust the schedule a bit. Also earlier is better since in
generally there
will be a bit of Q with organizers.

And if you just have some questions about what organizing XDC entails,
please feel free to chat with a previous organizers, or with someone from
the board.

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] freedreno: a2xx: fix clear color

2018-06-21 Thread Jonathan Marek
the format of the CLEAR_COLOR register doesn't depend on the target format
this fixes clear color when rendering to 32-bit RGBA and 16-bit targets

Signed-off-by: Jonathan Marek 
---
 src/gallium/drivers/freedreno/a2xx/fd2_draw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c 
b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
index ca634d794a..6f0535fa2b 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
@@ -135,7 +135,7 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
uint32_t reg, colr = 0;
 
if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
-   colr  = pack_rgba(fb->cbufs[0]->format, color->f);
+   colr = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f);
 
/* emit generic state now: */
fd2_emit_state(ctx, ctx->dirty &
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] freedreno: a2xx: fix crash when freeing context

2018-06-21 Thread Jonathan Marek
Signed-off-by: Jonathan Marek 
---
 src/gallium/drivers/freedreno/a2xx/fd2_program.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_program.c 
b/src/gallium/drivers/freedreno/a2xx/fd2_program.c
index 9a77457251..834a7c7fcd 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_program.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_program.c
@@ -54,6 +54,8 @@ create_shader(enum shader_t type)
 static void
 delete_shader(struct fd2_shader_stateobj *so)
 {
+   if (!so)
+   return;
ir2_shader_destroy(so->ir);
free(so->tokens);
free(so->bin);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] freedreno: a2xx: increase size of the offset field in instr_fetch_vtx_t

2018-06-21 Thread Jonathan Marek
The offset field is 22 bit large.
11 bits are necessary because MaxVertexAttribRelativeOffset = 2047

Signed-off-by: Jonathan Marek 
---
 src/gallium/drivers/freedreno/a2xx/instr-a2xx.h | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h 
b/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h
index 0d6e138daf..ac972ed35a 100644
--- a/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h
+++ b/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h
@@ -366,10 +366,8 @@ typedef struct PACKED {
uint8_t pred_select  : 1;
/* dword2: */
uint8_t stride   : 8;
-   /* possibly offset and reserved4 are swapped on a200? */
-   uint8_t offset   : 8;
-   uint8_t reserved4: 8;
-   uint8_t reserved5: 7;
+   uint32_toffset   : 22;
+   uint8_t reserved4: 1;
uint8_t pred_condition   : 1;
 } instr_fetch_vtx_t;
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] freedreno: a2xx: fix crash on first clear

2018-06-21 Thread Jonathan Marek
blend can be NULL, so check for that

Signed-off-by: Jonathan Marek 
---
 src/gallium/drivers/freedreno/a2xx/fd2_emit.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c 
b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
index 4bf41b2c67..dcf7ed10b5 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
@@ -295,7 +295,7 @@ fd2_emit_state(struct fd_context *ctx, const enum 
fd_dirty_3d_state dirty)
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
-   OUT_RING(ring, zsa->rb_colorcontrol | blend->rb_colorcontrol);
+   OUT_RING(ring, blend ? zsa->rb_colorcontrol | 
blend->rb_colorcontrol : 0);
}
 
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
@@ -305,13 +305,13 @@ fd2_emit_state(struct fd_context *ctx, const enum 
fd_dirty_3d_state dirty)
 
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
-   OUT_RING(ring, blend->rb_blendcontrol_alpha |
+   OUT_RING(ring, blend ? blend->rb_blendcontrol_alpha |
COND(has_alpha, blend->rb_blendcontrol_rgb) |
-   COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb));
+   COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb) : 
0);
 
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
-   OUT_RING(ring, blend->rb_colormask);
+   OUT_RING(ring, blend ? blend->rb_colormask : 0xf);
}
 
if (dirty & FD_DIRTY_BLEND_COLOR) {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] freedreno: add a20x

2018-06-21 Thread Jonathan Marek
this patch adds support for a20x, which has some differences with a220:
-no VGT_MAX_VTX_INDX register
-no CLEAR_COLOR register
-set RB_BC_CONTROL in restore (hangs without)
-different CP_DRAW_INDX format

tested with kmscube and glmark2 scenes, on par with a220

Signed-off-by: Jonathan Marek 
---
 src/gallium/drivers/freedreno/a2xx/fd2_draw.c | 37 +--
 src/gallium/drivers/freedreno/a2xx/fd2_emit.c | 10 +
 src/gallium/drivers/freedreno/a2xx/fd2_gmem.c | 22 ++-
 .../drivers/freedreno/freedreno_draw.h| 27 +-
 .../drivers/freedreno/freedreno_screen.c  |  1 +
 .../drivers/freedreno/freedreno_screen.h  |  6 +++
 .../drivers/freedreno/freedreno_util.h| 13 +++
 7 files changed, 85 insertions(+), 31 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c 
b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
index 8df1793a35..ca634d794a 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
@@ -101,12 +101,14 @@ fd2_draw_vbo(struct fd_context *ctx, const struct 
pipe_draw_info *info,
OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
 
-   OUT_WFI (ring);
+   if (!is_a20x(ctx->screen)) {
+   OUT_WFI (ring);
 
-   OUT_PKT3(ring, CP_SET_CONSTANT, 3);
-   OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
-   OUT_RING(ring, info->max_index);/* VGT_MAX_VTX_INDX */
-   OUT_RING(ring, info->min_index);/* VGT_MIN_VTX_INDX */
+   OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+   OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+   OUT_RING(ring, info->max_index);/* VGT_MAX_VTX_INDX */
+   OUT_RING(ring, info->min_index);/* VGT_MIN_VTX_INDX */
+   }
 
fd_draw_emit(ctx->batch, ring, ctx->primtypes[info->mode],
 IGNORE_VISIBILITY, info, index_offset);
@@ -157,9 +159,18 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
 
-   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-   OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
-   OUT_RING(ring, colr);
+   if (is_a20x(ctx->screen)) {
+   OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+   OUT_RING(ring, 0x0480);
+   OUT_RING(ring, color->ui[0]);
+   OUT_RING(ring, color->ui[1]);
+   OUT_RING(ring, color->ui[2]);
+   OUT_RING(ring, color->ui[3]);
+   } else {
+   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+   OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
+   OUT_RING(ring, colr);
+   }
 
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
@@ -264,10 +275,12 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
OUT_RING(ring, 0x0);
}
 
-   OUT_PKT3(ring, CP_SET_CONSTANT, 3);
-   OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
-   OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
-   OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
+   if (!is_a20x(ctx->screen)) {
+   OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+   OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+   OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
+   OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
+   }
 
fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, 
NULL);
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c 
b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
index d749eb0324..4bf41b2c67 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
@@ -332,6 +332,16 @@ fd2_emit_state(struct fd_context *ctx, const enum 
fd_dirty_3d_state dirty)
 void
 fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
 {
+   if (is_a20x(ctx->screen)) {
+   OUT_PKT0(ring, REG_A2XX_RB_BC_CONTROL, 1);
+   OUT_RING(ring,
+   A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(3) |
+   A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP |
+   A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE |
+   A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) |
+   A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3));
+   }
+
OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
OUT_RING(ring, 0x0002);
 
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c 
b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
index 46a7d18ef0..62382995c0 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c

Re: [Mesa-dev] [PATCH mesa 2/4] vulkan: add VK_EXT_display_control [v8]

2018-06-21 Thread Keith Packard
Jason Ekstrand  writes:

>> Looks good.  With that, patches 1-3 are
>
> Reviewed-by: Jason Ekstrand 

Thanks.

> I'll let Dave or Bas review your fence hackery in radv.

Sounds fine. I'll prod them if I don't get any response in the next day
or so.

-- 
-keith


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/23] intel/eu: Use brw_set_desc() along with a helper to set common descriptor controls.

2018-06-21 Thread Kenneth Graunke
On Monday, June 11, 2018 7:25:55 PM PDT Francisco Jerez wrote:
> This replaces brw_set_message_descriptor() with the composition of
> brw_set_desc() and a new inline helper function that packs the common
> message descriptor controls into an integer.  The goal is to represent
> all message descriptors as a 32-bit integer which is written at once
> into the instruction, which is more flexible (SENDS anyone?), robust
> (see d2eecf0b0b24d203d0f171807681dffd830d54de fixing an issue
> ultimately caused by some bits of the extended message descriptor
> being left undefined) and future-proof than the current approach of
> specifying the individual descriptor fields directly into the
> instruction.
> 
> This approach also seems more self-documenting, since it will allow
> removing calls to functions with way too many arguments like
> brw_set_*_message() and brw_send_indirect_message(), and instead
> provide a single descriptor argument constructed from an appropriate
> combination of brw_*_desc() helpers.
> 
> Note that because brw_set_message_descriptor() was (conditionally?)
> overriding fields of the instruction which strictly speaking weren't
> part of the message descriptor, this involves calling
> brw_inst_set_sfid() and brw_inst_set_eot() in some cases in addition
> to brw_set_desc().
> ---
>  src/intel/compiler/brw_eu.h   |  29 +---
>  src/intel/compiler/brw_eu_emit.c  | 108 
> +++---
>  src/intel/compiler/brw_vec4_generator.cpp |  17 +++--
>  3 files changed, 68 insertions(+), 86 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
> index 5a396339fde..b2b20713e45 100644
> --- a/src/intel/compiler/brw_eu.h
> +++ b/src/intel/compiler/brw_eu.h
> @@ -256,14 +256,6 @@ void brw_set_sampler_message(struct brw_codegen *p,
>   unsigned simd_mode,
>   unsigned return_format);
>  
> -void brw_set_message_descriptor(struct brw_codegen *p,
> -brw_inst *inst,
> -enum brw_message_target sfid,
> -unsigned msg_length,
> -unsigned response_length,
> -bool header_present,
> -bool end_of_thread);
> -
>  void brw_set_dp_read_message(struct brw_codegen *p,
>brw_inst *insn,
>unsigned binding_table_index,
> @@ -287,6 +279,27 @@ void brw_set_dp_write_message(struct brw_codegen *p,
> unsigned end_of_thread,
> unsigned send_commit_msg);
>  
> +/**
> + * Construct a message descriptor immediate with the specified common
> + * descriptor controls.
> + */
> +static inline uint32_t
> +brw_message_desc(const struct gen_device_info *devinfo,
> + unsigned msg_length,
> + unsigned response_length,
> + bool header_present)
> +{

Perhaps it would be good to add

  assert(msg_length >= 1 && msg_length <= 15);

> +   if (devinfo->gen >= 5) {

  assert(response_length <= 16);


> +  return (msg_length << 25 |
> +  response_length << 20 |
> +  header_present << 19);
> +   } else {

  assert(response_length <= 8);

I'm not so concerned with validating the values here, just thinking it
might make sense to verify that mlen fits in a U4, for example, so we
don't accidentally bleed over into other fields when encoding it.

I suppose the validator already catches this, though...

> +  return (msg_length << 20 |
> +  response_length << 16);
> +   }
> +}


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106283] Shader replacements works only for limited use cases

2018-06-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106283

--- Comment #12 from Mark Janes  ---
do you have an apitrace file of your workload, so I can test this functionality
in FrameRetrace?

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa 2/4] vulkan: add VK_EXT_display_control [v8]

2018-06-21 Thread Jason Ekstrand
On Thu, Jun 21, 2018 at 7:37 AM, Keith Packard  wrote:

> Jason Ekstrand  writes:
>
> >> +  if (!ret)
> >> + return VK_SUCCESS;
> >> +
> >> +  if (errno != ENOMEM) {
> >
> > This strikes me as a bit odd. What does ENOMEM mean if not "out of
> > memory"?
>
> ENOMEM means that the queue is full and that we should drain it and try
> again; that's what the wait_for_event call is below.
>
> The other-than-ENOMEM case is for some other failure, such as VT switch
> or lease revoke. For RegisterDisplayEvent, there aren't any return
> values other than VK_SUCCESS defined, and we're already assuming we can
> use VK_OUT_OF_HOST_MEMORY for any function which allocates memory.
>
> I think the correct value might be VK_ERROR_DEVICE_LOST or
> VK_ERROR_OUT_OF_DATE_KHR as something "bad" has clearly happened? The
> other place this is called is from QueuePresent, where either of those
> error codes are allowed. I could convert that message to
> VK_OUT_OF_HOST_MEMORY for RegisterDisplayEvent if you think that's a
> good idea.
>
> The sleep prevents an application from spinning at this failure,
> allowing the user to gracefully terminate the application.
>
> >
> >> + wsi_display_debug("queue vblank event %lu failed\n",
> >> fence->sequence);
> >> + struct timespec delay = {
> >> +.tv_sec = 0,
> >> +.tv_nsec = 1ull,
> >> + };
> >> + nanosleep(, NULL);
> >> + return VK_ERROR_OUT_OF_HOST_MEMORY;
> >
> > Given your previous explanation, I think this is ok but I think it
> deserves
> > a comment.
>
> Wilco.
>
> I've added comments to this section to try and explain what's going on:
>
>   if (!ret)
>  return VK_SUCCESS;
>
>   if (errno != ENOMEM) {
>
>  /* Something unexpected happened. Pause for a moment so the
>   * application doesn't just spin and then return a failure
> indication
>   */
>
>  wsi_display_debug("queue vblank event %lu failed\n",
> fence->sequence);
>  struct timespec delay = {
> .tv_sec = 0,
> .tv_nsec = 1ull,
>  };
>  nanosleep(, NULL);
>  return VK_ERROR_OUT_OF_HOST_MEMORY;
>

I don't really like VK_ERROR_OUT_OF_HOST_MEMORY here but I don't know what
else to do at the moment.  The error codes for this extension are not
well-defined...  I think I'm fine with it for now.


>   }
>
>   /* The kernel event queue is full. Wait for some events to be
>* processed and try again
>*/
>
>   pthread_mutex_lock(>wait_mutex);
>   ret = wsi_display_wait_for_event(wsi, wsi_rel_to_abs_time(1u
> ll));
>   pthread_mutex_unlock(>wait_mutex);
>
>   if (ret) {
>  wsi_display_debug("vblank queue full, event wait failed\n");
>  return VK_ERROR_OUT_OF_HOST_MEMORY;
>   }


Looks good.  With that, patches 1-3 are

Reviewed-by: Jason Ekstrand 

I'll let Dave or Bas review your fence hackery in radv.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 06/16] intel: aubinator: handle GGTT mappings

2018-06-21 Thread Rafael Antognolli
This patch is

Reviewed-by: Rafael Antognolli 

On Thu, Jun 21, 2018 at 05:29:05PM +0100, Lionel Landwerlin wrote:
> We use memfd to store physical pages as they get read/written to and
> the GGTT entries translating virtual address to physical pages.
> 
> Based on a commit by Scott Phillips.
> 
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/intel/tools/aubinator.c | 257 ++--
>  1 file changed, 244 insertions(+), 13 deletions(-)
> 
> diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
> index 3b04ba3f431..05083dbcda0 100644
> --- a/src/intel/tools/aubinator.c
> +++ b/src/intel/tools/aubinator.c
> @@ -39,12 +39,23 @@
>  
>  #include "util/list.h"
>  #include "util/macros.h"
> +#include "util/rb_tree.h"
>  
>  #include "common/gen_decoder.h"
>  #include "common/gen_disasm.h"
>  #include "common/gen_gem.h"
>  #include "intel_aub.h"
>  
> +#ifndef HAVE_MEMFD_CREATE
> +#include 
> +
> +static inline int
> +memfd_create(const char *name, unsigned int flags)
> +{
> +   return syscall(SYS_memfd_create, name, flags);
> +}
> +#endif
> +
>  /* Below is the only command missing from intel_aub.h in libdrm
>   * So, reuse intel_aub.h from libdrm and #define the
>   * AUB_MI_BATCH_BUFFER_END as below
> @@ -73,20 +84,39 @@ struct gen_batch_decode_ctx batch_ctx;
>  struct bo_map {
> struct list_head link;
> struct gen_batch_decode_bo bo;
> +   bool unmap_after_use;
> +};
> +
> +struct ggtt_entry {
> +   struct rb_node node;
> +   uint64_t virt_addr;
> +   uint64_t phys_addr;
> +};
> +
> +struct phys_mem {
> +   struct rb_node node;
> +   uint64_t fd_offset;
> +   uint64_t phys_addr;
> +   uint8_t *data;
>  };
>  
>  static struct list_head maps;
> +static struct rb_tree ggtt = {NULL};
> +static struct rb_tree mem = {NULL};
> +int mem_fd = -1;
> +off_t mem_fd_len = 0;
>  
>  FILE *outfile;
>  
>  struct brw_instruction;
>  
>  static void
> -add_gtt_bo_map(struct gen_batch_decode_bo bo)
> +add_gtt_bo_map(struct gen_batch_decode_bo bo, bool unmap_after_use)
>  {
> struct bo_map *m = calloc(1, sizeof(*m));
>  
> m->bo = bo;
> +   m->unmap_after_use = unmap_after_use;
> list_add(>link, );
>  }
>  
> @@ -94,21 +124,209 @@ static void
>  clear_bo_maps(void)
>  {
> list_for_each_entry_safe(struct bo_map, i, , link) {
> +  if (i->unmap_after_use)
> + munmap((void *)i->bo.map, i->bo.size);
>list_del(>link);
>free(i);
> }
>  }
>  
> +static inline struct ggtt_entry *
> +ggtt_entry_next(struct ggtt_entry *entry)
> +{
> +   if (!entry)
> +  return NULL;
> +   struct rb_node *node = rb_node_next(>node);
> +   if (!node)
> +  return NULL;
> +   return rb_node_data(struct ggtt_entry, node, node);
> +}
> +
> +static inline int
> +cmp_uint64(uint64_t a, uint64_t b)
> +{
> +   if (a < b)
> +  return -1;
> +   if (a > b)
> +  return 1;
> +   return 0;
> +}
> +
> +static inline int
> +cmp_ggtt_entry(const struct rb_node *node, const void *addr)
> +{
> +   struct ggtt_entry *entry = rb_node_data(struct ggtt_entry, node, node);
> +   return cmp_uint64(entry->virt_addr, *(const uint64_t *)addr);
> +}
> +
> +static struct ggtt_entry *
> +ensure_ggtt_entry(struct rb_tree *tree, uint64_t virt_addr)
> +{
> +   struct rb_node *node = rb_tree_search_sloppy(, _addr,
> +cmp_ggtt_entry);
> +   int cmp = 0;
> +   if (!node || (cmp = cmp_ggtt_entry(node, _addr))) {
> +  struct ggtt_entry *new_entry = calloc(1, sizeof(*new_entry));
> +  new_entry->virt_addr = virt_addr;
> +  rb_tree_insert_at(, node, _entry->node, cmp > 0);
> +  node = _entry->node;
> +   }
> +
> +   return rb_node_data(struct ggtt_entry, node, node);
> +}
> +
> +static struct ggtt_entry *
> +search_ggtt_entry(uint64_t virt_addr)
> +{
> +   virt_addr &= ~0xfff;
> +
> +   struct rb_node *node = rb_tree_search(, _addr, cmp_ggtt_entry);
> +
> +   if (!node)
> +  return NULL;
> +
> +   return rb_node_data(struct ggtt_entry, node, node);
> +}
> +
> +static inline int
> +cmp_phys_mem(const struct rb_node *node, const void *addr)
> +{
> +   struct phys_mem *mem = rb_node_data(struct phys_mem, node, node);
> +   return cmp_uint64(mem->phys_addr, *(uint64_t *)addr);
> +}
> +
> +static struct phys_mem *
> +ensure_phys_mem(uint64_t phys_addr)
> +{
> +   struct rb_node *node = rb_tree_search_sloppy(, _addr, 
> cmp_phys_mem);
> +   int cmp = 0;
> +   if (!node || (cmp = cmp_phys_mem(node, _addr))) {
> +  struct phys_mem *new_mem = calloc(1, sizeof(*new_mem));
> +  new_mem->phys_addr = phys_addr;
> +  new_mem->fd_offset = mem_fd_len;
> +
> +  int ftruncate_res = ftruncate(mem_fd, mem_fd_len += 4096);
> +  assert(ftruncate_res == 0);
> +
> +  new_mem->data = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED,
> +   mem_fd, new_mem->fd_offset);
> +  assert(new_mem->data != MAP_FAILED);
> +
> +  rb_tree_insert_at(, node, _mem->node, cmp > 0);
> +  node = 

Re: [Mesa-dev] [PATCH] swr/rastr: Don't assume non-WIN is always 64 bit, instead use the x86_64 define

2018-06-21 Thread Gert Wollny
Ping 
Am Samstag, den 26.05.2018, 23:20 +0200 schrieb Gert Wollny:
> One must not assume that compiling swr on non-Windows platforms is
> always
> done for 64 bit archs. For instance in an Gentoo multiarch
> installation if
> swr is enabled, it will be build for all archs.
> 
> Fixes: fa4ab7910e3492b09b40e00c0b82a7bb1bae03d0
>swr/rast: Add some SIMD_T utility functors
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106401
> Signed-off-by: Gert Wollny 
> ---
>  src/gallium/drivers/swr/rasterizer/common/simdlib.hpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
> b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
> index 24cf27d4db..a418c2cd6a 100644
> --- a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
> +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
> @@ -606,7 +606,7 @@ struct SIMDVecHash
>  {
>  INLINE uint32_t operator ()(Integer val) const
>  {
> -#if defined(_WIN64) || !defined(_WIN32) // assume non-Windows is
> always 64-bit
> +#if defined(_WIN64) || defined(__x86_64__)
>  static_assert(sizeof(void*) == 8, "This path only meant for
> 64-bit code");
>  
>  uint64_t crc32 = 0;
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 06/16] intel: aubinator: handle GGTT mappings

2018-06-21 Thread Lionel Landwerlin
We use memfd to store physical pages as they get read/written to and
the GGTT entries translating virtual address to physical pages.

Based on a commit by Scott Phillips.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/aubinator.c | 257 ++--
 1 file changed, 244 insertions(+), 13 deletions(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 3b04ba3f431..05083dbcda0 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -39,12 +39,23 @@
 
 #include "util/list.h"
 #include "util/macros.h"
+#include "util/rb_tree.h"
 
 #include "common/gen_decoder.h"
 #include "common/gen_disasm.h"
 #include "common/gen_gem.h"
 #include "intel_aub.h"
 
+#ifndef HAVE_MEMFD_CREATE
+#include 
+
+static inline int
+memfd_create(const char *name, unsigned int flags)
+{
+   return syscall(SYS_memfd_create, name, flags);
+}
+#endif
+
 /* Below is the only command missing from intel_aub.h in libdrm
  * So, reuse intel_aub.h from libdrm and #define the
  * AUB_MI_BATCH_BUFFER_END as below
@@ -73,20 +84,39 @@ struct gen_batch_decode_ctx batch_ctx;
 struct bo_map {
struct list_head link;
struct gen_batch_decode_bo bo;
+   bool unmap_after_use;
+};
+
+struct ggtt_entry {
+   struct rb_node node;
+   uint64_t virt_addr;
+   uint64_t phys_addr;
+};
+
+struct phys_mem {
+   struct rb_node node;
+   uint64_t fd_offset;
+   uint64_t phys_addr;
+   uint8_t *data;
 };
 
 static struct list_head maps;
+static struct rb_tree ggtt = {NULL};
+static struct rb_tree mem = {NULL};
+int mem_fd = -1;
+off_t mem_fd_len = 0;
 
 FILE *outfile;
 
 struct brw_instruction;
 
 static void
-add_gtt_bo_map(struct gen_batch_decode_bo bo)
+add_gtt_bo_map(struct gen_batch_decode_bo bo, bool unmap_after_use)
 {
struct bo_map *m = calloc(1, sizeof(*m));
 
m->bo = bo;
+   m->unmap_after_use = unmap_after_use;
list_add(>link, );
 }
 
@@ -94,21 +124,209 @@ static void
 clear_bo_maps(void)
 {
list_for_each_entry_safe(struct bo_map, i, , link) {
+  if (i->unmap_after_use)
+ munmap((void *)i->bo.map, i->bo.size);
   list_del(>link);
   free(i);
}
 }
 
+static inline struct ggtt_entry *
+ggtt_entry_next(struct ggtt_entry *entry)
+{
+   if (!entry)
+  return NULL;
+   struct rb_node *node = rb_node_next(>node);
+   if (!node)
+  return NULL;
+   return rb_node_data(struct ggtt_entry, node, node);
+}
+
+static inline int
+cmp_uint64(uint64_t a, uint64_t b)
+{
+   if (a < b)
+  return -1;
+   if (a > b)
+  return 1;
+   return 0;
+}
+
+static inline int
+cmp_ggtt_entry(const struct rb_node *node, const void *addr)
+{
+   struct ggtt_entry *entry = rb_node_data(struct ggtt_entry, node, node);
+   return cmp_uint64(entry->virt_addr, *(const uint64_t *)addr);
+}
+
+static struct ggtt_entry *
+ensure_ggtt_entry(struct rb_tree *tree, uint64_t virt_addr)
+{
+   struct rb_node *node = rb_tree_search_sloppy(, _addr,
+cmp_ggtt_entry);
+   int cmp = 0;
+   if (!node || (cmp = cmp_ggtt_entry(node, _addr))) {
+  struct ggtt_entry *new_entry = calloc(1, sizeof(*new_entry));
+  new_entry->virt_addr = virt_addr;
+  rb_tree_insert_at(, node, _entry->node, cmp > 0);
+  node = _entry->node;
+   }
+
+   return rb_node_data(struct ggtt_entry, node, node);
+}
+
+static struct ggtt_entry *
+search_ggtt_entry(uint64_t virt_addr)
+{
+   virt_addr &= ~0xfff;
+
+   struct rb_node *node = rb_tree_search(, _addr, cmp_ggtt_entry);
+
+   if (!node)
+  return NULL;
+
+   return rb_node_data(struct ggtt_entry, node, node);
+}
+
+static inline int
+cmp_phys_mem(const struct rb_node *node, const void *addr)
+{
+   struct phys_mem *mem = rb_node_data(struct phys_mem, node, node);
+   return cmp_uint64(mem->phys_addr, *(uint64_t *)addr);
+}
+
+static struct phys_mem *
+ensure_phys_mem(uint64_t phys_addr)
+{
+   struct rb_node *node = rb_tree_search_sloppy(, _addr, 
cmp_phys_mem);
+   int cmp = 0;
+   if (!node || (cmp = cmp_phys_mem(node, _addr))) {
+  struct phys_mem *new_mem = calloc(1, sizeof(*new_mem));
+  new_mem->phys_addr = phys_addr;
+  new_mem->fd_offset = mem_fd_len;
+
+  int ftruncate_res = ftruncate(mem_fd, mem_fd_len += 4096);
+  assert(ftruncate_res == 0);
+
+  new_mem->data = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED,
+   mem_fd, new_mem->fd_offset);
+  assert(new_mem->data != MAP_FAILED);
+
+  rb_tree_insert_at(, node, _mem->node, cmp > 0);
+  node = _mem->node;
+   }
+
+   return rb_node_data(struct phys_mem, node, node);
+}
+
+static struct phys_mem *
+search_phys_mem(uint64_t phys_addr)
+{
+   phys_addr &= ~0xfff;
+
+   struct rb_node *node = rb_tree_search(, _addr, cmp_phys_mem);
+
+   if (!node)
+  return NULL;
+
+   return rb_node_data(struct phys_mem, node, node);
+}
+
+static void
+handle_ggtt_entry_write(uint64_t address, const void *_data, uint32_t _size)
+{
+   uint64_t virt_addr = (address / 

[Mesa-dev] [PATCH v3 13/16] intel: tools: dump-gpu: dump 48-bit addresses

2018-06-21 Thread Lionel Landwerlin
From: Scott D Phillips 

For gen8+, write out PPGTT tables in aub files so that full 48-bit
addresses can be serialized.

v2: Fix handling of `end` index in map_ppgtt

v3: Correctly mark GGTT entry as present (Rafael)

Signed-off-by: Scott D Phillips 
Signed-off-by: Lionel Landwerlin 
Cc: Jordan Justen 
---
 src/intel/tools/intel_aub.h  |   3 +-
 src/intel/tools/intel_dump_gpu.c | 315 +++
 2 files changed, 151 insertions(+), 167 deletions(-)

diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h
index 9ca548edaf3..2888515048f 100644
--- a/src/intel/tools/intel_aub.h
+++ b/src/intel/tools/intel_aub.h
@@ -117,7 +117,8 @@
 /* DW3 */
 
 #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_MASK0xf000
-#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_LOCAL   (1 << 28)
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT(0 << 28)
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL(2 << 28)
 #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY  (4 << 28)
 
 /**
diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 86c133da433..a9ce109b2b6 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -51,6 +51,8 @@
 #define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1))
 #define MI_LRI_FORCE_POSTED   (1<<12)
 
+#define MI_BATCH_NON_SECURE_I965 (1 << 8)
+
 #define MI_BATCH_BUFFER_END (0xA << 23)
 
 #define min(a, b) ({\
@@ -59,6 +61,12 @@
  _a < _b ? _a : _b; \
   })
 
+#define max(a, b) ({\
+ __typeof(a) _a = (a);  \
+ __typeof(b) _b = (b);  \
+ _a > _b ? _a : _b; \
+  })
+
 #define HWS_PGA_RCSUNIT  0x02080
 #define HWS_PGA_VCSUNIT0   0x12080
 #define HWS_PGA_BCSUNIT  0x22080
@@ -93,8 +101,12 @@
 
 #define RING_SIZE (1 * 4096)
 #define PPHWSP_SIZE (1 * 4096)
-#define GEN10_LR_CONTEXT_RENDER_SIZE   (19 * 4096)
-#define GEN8_LR_CONTEXT_OTHER_SIZE   (2 * 4096)
+#define GEN11_LR_CONTEXT_RENDER_SIZE(14 * 4096)
+#define GEN10_LR_CONTEXT_RENDER_SIZE(19 * 4096)
+#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * 4096)
+#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * 4096)
+#define GEN8_LR_CONTEXT_OTHER_SIZE  (2 * 4096)
+
 
 #define STATIC_GGTT_MAP_START 0
 
@@ -110,14 +122,19 @@
 #define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN8_LR_CONTEXT_OTHER_SIZE)
 #define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START)
 
-#define CONTEXT_FLAGS (0x229)   /* Normal Priority | L3-LLC Coherency |
-   Legacy Context with no 64 bit VA support | 
Valid */
+#define PML4_PHYS_ADDR ((uint64_t)(STATIC_GGTT_MAP_END))
+
+#define CONTEXT_FLAGS (0x339)   /* Normal Priority | L3-LLC Coherency |
+ * PPGTT Enabled |
+ * Legacy Context with 64 bit VA support |
+ * Valid
+ */
 
-#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 32 | RENDER_CONTEXT_ADDR  | 
CONTEXT_FLAGS)
-#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 32 | BLITTER_CONTEXT_ADDR | 
CONTEXT_FLAGS)
-#define VIDEO_CONTEXT_DESCRIPTOR   ((uint64_t)3 << 32 | VIDEO_CONTEXT_ADDR   | 
CONTEXT_FLAGS)
+#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 62 | RENDER_CONTEXT_ADDR  | 
CONTEXT_FLAGS)
+#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 62 | BLITTER_CONTEXT_ADDR | 
CONTEXT_FLAGS)
+#define VIDEO_CONTEXT_DESCRIPTOR   ((uint64_t)3 << 62 | VIDEO_CONTEXT_ADDR   | 
CONTEXT_FLAGS)
 
-static const uint32_t render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE /
+static const uint32_t render_context_init[GEN9_LR_CONTEXT_RENDER_SIZE / /* 
Choose the largest */
   sizeof(uint32_t)] = {
0 /* MI_NOOP */,
MI_LOAD_REGISTER_IMM_n(14) | MI_LRI_FORCE_POSTED,
@@ -147,8 +164,8 @@ static const uint32_t 
render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE /
0x2280 /* PDP2_LDW */,  0,
0x227C /* PDP1_UDW */,  0,
0x2278 /* PDP1_LDW */,  0,
-   0x2274 /* PDP0_UDW */,  0,
-   0x2270 /* PDP0_LDW */,  0,
+   0x2274 /* PDP0_UDW */,  PML4_PHYS_ADDR >> 32,
+   0x2270 /* PDP0_LDW */,  PML4_PHYS_ADDR,
/* MI_NOOP */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
@@ -185,8 +202,8 @@ static const uint32_t 
blitter_context_init[GEN8_LR_CONTEXT_OTHER_SIZE /
0x22280 /* PDP2_LDW */,  0,
0x2227C /* PDP1_UDW */,  0,
0x22278 /* PDP1_LDW */,  0,
-   0x22274 /* PDP0_UDW */,  0,
-   0x22270 /* PDP0_LDW */,  0,
+   0x22274 /* PDP0_UDW */,  PML4_PHYS_ADDR >> 32,
+   0x22270 /* PDP0_LDW */,  PML4_PHYS_ADDR,
/* MI_NOOP */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
@@ -220,8 +237,8 @@ static const uint32_t 
video_context_init[GEN8_LR_CONTEXT_OTHER_SIZE /
0x1C280 /* PDP2_LDW */,  

[Mesa-dev] [PATCH v3 15/16] intel: intel_dump_gpu: use simulator id in captures

2018-06-21 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_aub.h  | 2 +-
 src/intel/tools/intel_dump_gpu.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h
index 2888515048f..74ca26ab9bb 100644
--- a/src/intel/tools/intel_aub.h
+++ b/src/intel/tools/intel_aub.h
@@ -103,7 +103,7 @@
 /* DW2 */
 
 #define AUB_MEM_TRACE_VERSION_DEVICE_MASK  0xff00
-#define AUB_MEM_TRACE_VERSION_DEVICE_CNL   (15 << 8)
+#define AUB_MEM_TRACE_VERSION_DEVICE_SHIFT  8
 
 #define AUB_MEM_TRACE_VERSION_METHOD_MASK  0x000c
 #define AUB_MEM_TRACE_VERSION_METHOD_PHY   (1 << 18)
diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index a9ce109b2b6..6107035d5bc 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -512,7 +512,7 @@ write_execlists_header(void)
dwords = 5 + app_name_len / sizeof(uint32_t);
dword_out(CMD_MEM_TRACE_VERSION | (dwords - 1));
dword_out(AUB_MEM_TRACE_VERSION_FILE_VERSION);
-   dword_out(AUB_MEM_TRACE_VERSION_DEVICE_CNL);
+   dword_out(devinfo.simulator_id << AUB_MEM_TRACE_VERSION_DEVICE_SHIFT);
dword_out(0);  /* version */
dword_out(0);  /* version */
data_out(app_name, app_name_len);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 16/16] intel: tools: remove drm-uapi defines

2018-06-21 Thread Lionel Landwerlin
We already embed the headers, no need to redefine defines/structs.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/intel_dump_gpu.c | 30 +-
 1 file changed, 1 insertion(+), 29 deletions(-)

diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 6107035d5bc..8a7dd52e746 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -272,27 +272,6 @@ static struct bo *bos;
 
 #define DRM_MAJOR 226
 
-#ifndef DRM_I915_GEM_USERPTR
-
-#define DRM_I915_GEM_USERPTR  0x33
-#define DRM_IOCTL_I915_GEM_USERPTR   DRM_IOWR (DRM_COMMAND_BASE + 
DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
-
-struct drm_i915_gem_userptr {
-   __u64 user_ptr;
-   __u64 user_size;
-   __u32 flags;
-#define I915_USERPTR_READ_ONLY 0x1
-#define I915_USERPTR_UNSYNCHRONIZED 0x8000
-   /**
-* Returned handle for the object.
-*
-* Object handles are nonzero.
-*/
-   __u32 handle;
-};
-
-#endif
-
 /* We set bit 0 in the map pointer for userptr BOs so we know not to
  * munmap them on DRM_IOCTL_GEM_CLOSE.
  */
@@ -300,10 +279,6 @@ struct drm_i915_gem_userptr {
 #define IS_USERPTR(p) ((uintptr_t) (p) & USERPTR_FLAG)
 #define GET_PTR(p) ( (void *) ((uintptr_t) p & ~(uintptr_t) 1) )
 
-#ifndef I915_EXEC_BATCH_FIRST
-#define I915_EXEC_BATCH_FIRST (1 << 18)
-#endif
-
 static inline bool use_execlists(void)
 {
return devinfo.gen >= 8;
@@ -1127,9 +1102,6 @@ maybe_init(void)
fail_if(bos == NULL, "intel_aubdump: out of memory\n");
 }
 
-#define LOCAL_IOCTL_I915_GEM_EXECBUFFER2_WR \
-   DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct 
drm_i915_gem_execbuffer2)
-
 __attribute__ ((visibility ("default"))) int
 ioctl(int fd, unsigned long request, ...)
 {
@@ -1185,7 +1157,7 @@ ioctl(int fd, unsigned long request, ...)
   }
 
   case DRM_IOCTL_I915_GEM_EXECBUFFER2:
-  case LOCAL_IOCTL_I915_GEM_EXECBUFFER2_WR: {
+  case DRM_IOCTL_I915_GEM_EXECBUFFER2_WR: {
  dump_execbuffer2(fd, argp);
  if (device_override)
 return 0;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 08/16] intel/batch-decoder: handle non-contiguous binding table / surface state

2018-06-21 Thread Lionel Landwerlin
From: Scott D Phillips 

Reviewed-by: Lionel Landwerlin 
---
 src/intel/common/gen_batch_decoder.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/intel/common/gen_batch_decoder.c 
b/src/intel/common/gen_batch_decoder.c
index 3852f32de36..2b6978da92d 100644
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -236,20 +236,30 @@ dump_binding_table(struct gen_batch_decode_ctx *ctx, 
uint32_t offset, int count)
   return;
}
 
+   struct gen_batch_decode_bo bo = ctx->surface_base;
const uint32_t *pointers = ctx->surface_base.map + offset;
for (int i = 0; i < count; i++) {
   if (pointers[i] == 0)
  continue;
 
-  if (pointers[i] % 32 != 0 ||
-  (pointers[i] + strct->dw_length * 4) >= ctx->surface_base.size) {
+  if (pointers[i] % 32 != 0) {
+ fprintf(ctx->fp, "pointer %u: %08x \n", i, pointers[i]);
+ continue;
+  }
+
+  uint64_t addr = ctx->surface_base.addr + pointers[i];
+  uint32_t size = strct->dw_length * 4;
+
+  if (addr < bo.addr || addr + size >= bo.addr + bo.size)
+ bo = ctx->get_bo(ctx->user_data, addr);
+
+  if (addr < bo.addr || addr + size >= bo.addr + bo.size) {
  fprintf(ctx->fp, "pointer %u: %08x \n", i, pointers[i]);
  continue;
   }
 
   fprintf(ctx->fp, "pointer %u: %08x\n", i, pointers[i]);
-  ctx_print_group(ctx, strct, ctx->surface_base.addr + pointers[i],
-  ctx->surface_base.map + pointers[i]);
+  ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr));
}
 }
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 14/16] intel: devinfo: add simulator id

2018-06-21 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/intel/dev/gen_device_info.c | 47 ++---
 src/intel/dev/gen_device_info.h |  5 
 2 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c
index 8e971329892..b0ae4d18034 100644
--- a/src/intel/dev/gen_device_info.c
+++ b/src/intel/dev/gen_device_info.c
@@ -105,6 +105,7 @@ static const struct gen_device_info gen_device_info_i965 = {
   .size = 256,
},
.timestamp_frequency = 1250,
+   .simulator_id = -1,
 };
 
 static const struct gen_device_info gen_device_info_g4x = {
@@ -124,6 +125,7 @@ static const struct gen_device_info gen_device_info_g4x = {
   .size = 384,
},
.timestamp_frequency = 1250,
+   .simulator_id = -1,
 };
 
 static const struct gen_device_info gen_device_info_ilk = {
@@ -142,6 +144,7 @@ static const struct gen_device_info gen_device_info_ilk = {
   .size = 1024,
},
.timestamp_frequency = 1250,
+   .simulator_id = -1,
 };
 
 static const struct gen_device_info gen_device_info_snb_gt1 = {
@@ -170,6 +173,7 @@ static const struct gen_device_info gen_device_info_snb_gt1 
= {
   },
},
.timestamp_frequency = 1250,
+   .simulator_id = -1,
 };
 
 static const struct gen_device_info gen_device_info_snb_gt2 = {
@@ -198,6 +202,7 @@ static const struct gen_device_info gen_device_info_snb_gt2 
= {
   },
},
.timestamp_frequency = 1250,
+   .simulator_id = -1,
 };
 
 #define GEN7_FEATURES   \
@@ -236,6 +241,7 @@ static const struct gen_device_info gen_device_info_ivb_gt1 
= {
  [MESA_SHADER_GEOMETRY]  = 192,
   },
},
+   .simulator_id = 7,
 };
 
 static const struct gen_device_info gen_device_info_ivb_gt2 = {
@@ -265,6 +271,7 @@ static const struct gen_device_info gen_device_info_ivb_gt2 
= {
  [MESA_SHADER_GEOMETRY]  = 320,
   },
},
+   .simulator_id = 7,
 };
 
 static const struct gen_device_info gen_device_info_byt = {
@@ -294,6 +301,7 @@ static const struct gen_device_info gen_device_info_byt = {
  [MESA_SHADER_GEOMETRY]  = 192,
   },
},
+   .simulator_id = 10,
 };
 
 #define HSW_FEATURES \
@@ -328,6 +336,7 @@ static const struct gen_device_info gen_device_info_hsw_gt1 
= {
  [MESA_SHADER_GEOMETRY]  = 256,
   },
},
+   .simulator_id = 9,
 };
 
 static const struct gen_device_info gen_device_info_hsw_gt2 = {
@@ -356,6 +365,7 @@ static const struct gen_device_info gen_device_info_hsw_gt2 
= {
  [MESA_SHADER_GEOMETRY]  = 640,
   },
},
+   .simulator_id = 9,
 };
 
 static const struct gen_device_info gen_device_info_hsw_gt3 = {
@@ -384,6 +394,7 @@ static const struct gen_device_info gen_device_info_hsw_gt3 
= {
  [MESA_SHADER_GEOMETRY]  = 640,
   },
},
+   .simulator_id = 9,
 };
 
 /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
@@ -429,7 +440,8 @@ static const struct gen_device_info gen_device_info_bdw_gt1 
= {
  [MESA_SHADER_TESS_EVAL] = 1536,
  [MESA_SHADER_GEOMETRY]  = 960,
   },
-   }
+   },
+   .simulator_id = 11,
 };
 
 static const struct gen_device_info gen_device_info_bdw_gt2 = {
@@ -453,7 +465,8 @@ static const struct gen_device_info gen_device_info_bdw_gt2 
= {
  [MESA_SHADER_TESS_EVAL] = 1536,
  [MESA_SHADER_GEOMETRY]  = 960,
   },
-   }
+   },
+   .simulator_id = 11,
 };
 
 static const struct gen_device_info gen_device_info_bdw_gt3 = {
@@ -477,7 +490,8 @@ static const struct gen_device_info gen_device_info_bdw_gt3 
= {
  [MESA_SHADER_TESS_EVAL] = 1536,
  [MESA_SHADER_GEOMETRY]  = 960,
   },
-   }
+   },
+   .simulator_id = 11,
 };
 
 static const struct gen_device_info gen_device_info_chv = {
@@ -507,7 +521,8 @@ static const struct gen_device_info gen_device_info_chv = {
  [MESA_SHADER_TESS_EVAL] = 384,
  [MESA_SHADER_GEOMETRY]  = 256,
   },
-   }
+   },
+   .simulator_id = 13,
 };
 
 #define GEN9_HW_INFO\
@@ -603,6 +618,7 @@ static const struct gen_device_info gen_device_info_skl_gt1 
= {
.num_eu_per_subslice = 6,
.l3_banks = 2,
.urb.size = 192,
+   .simulator_id = 12,
 };
 
 static const struct gen_device_info gen_device_info_skl_gt2 = {
@@ -612,6 +628,7 @@ static const struct gen_device_info gen_device_info_skl_gt2 
= {
.num_subslices = { 3, },
.num_eu_per_subslice = 8,
.l3_banks = 4,
+   .simulator_id = 12,
 };
 
 static const struct gen_device_info gen_device_info_skl_gt3 = {
@@ -621,6 +638,7 @@ static const struct gen_device_info gen_device_info_skl_gt3 
= {
.num_subslices = { 3, 3, },
.num_eu_per_subslice = 8,
.l3_banks = 8,
+   .simulator_id = 12,
 };
 
 static const struct gen_device_info gen_device_info_skl_gt4 = {
@@ -639,18 +657,21 @@ static const struct gen_device_info 
gen_device_info_skl_gt4 = {
 * only 1008KB of this will be used."
 */

[Mesa-dev] [PATCH v3 07/16] intel/tools/aubinator: aubinate ppgtt aubs

2018-06-21 Thread Lionel Landwerlin
From: Scott D Phillips 

v2: by Lionel
Fix memfd_create compilation issue
Fix pml4 address stored on 32 instead of 64bits
Return no buffer if first ppgtt page is not mapped

v3: Drop additional memfd_create() (Rafael)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/tools/aubinator.c | 73 -
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 05083dbcda0..8989d558b66 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -327,6 +327,68 @@ get_ggtt_batch_bo(void *user_data, uint64_t address)
 
return bo;
 }
+
+static struct phys_mem *
+ppgtt_walk(uint64_t pml4, uint64_t address)
+{
+   uint64_t shift = 39;
+   uint64_t addr = pml4;
+   for (int level = 4; level > 0; level--) {
+  struct phys_mem *table = search_phys_mem(addr);
+  if (!table)
+ return NULL;
+  int index = (address >> shift) & 0x1ff;
+  uint64_t entry = ((uint64_t *)table->data)[index];
+  if (!(entry & 1))
+ return NULL;
+  addr = entry & ~0xfff;
+  shift -= 9;
+   }
+   return search_phys_mem(addr);
+}
+
+static bool
+ppgtt_mapped(uint64_t pml4, uint64_t address)
+{
+   return ppgtt_walk(pml4, address) != NULL;
+}
+
+static struct gen_batch_decode_bo
+get_ppgtt_batch_bo(void *user_data, uint64_t address)
+{
+   struct gen_batch_decode_bo bo = {0};
+   uint64_t pml4 = *(uint64_t *)user_data;
+
+   address &= ~0xfff;
+
+   if (!ppgtt_mapped(pml4, address))
+  return bo;
+
+   /* Map everything until the first gap since we don't know how much the
+* decoder actually needs.
+*/
+   uint64_t end = address;
+   while (ppgtt_mapped(pml4, end))
+  end += 4096;
+
+   bo.addr = address;
+   bo.size = end - address;
+   bo.map = mmap(NULL, bo.size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+   assert(bo.map != MAP_FAILED);
+
+   for (uint64_t page = address; page < end; page += 4096) {
+  struct phys_mem *phys_mem = ppgtt_walk(pml4, page);
+
+  void *res = mmap((uint8_t *)bo.map + (page - bo.addr), 4096, PROT_READ,
+   MAP_SHARED | MAP_FIXED, mem_fd, phys_mem->fd_offset);
+  assert(res != MAP_FAILED);
+   }
+
+   add_gtt_bo_map(bo, true);
+
+   return bo;
+}
+
 #define GEN_ENGINE_RENDER 1
 #define GEN_ENGINE_BLITTER 2
 
@@ -368,6 +430,7 @@ handle_trace_block(uint32_t *p)
   }
 
   (void)engine; /* TODO */
+  batch_ctx.get_bo = get_ggtt_batch_bo;
   gen_print_batch(_ctx, bo.map, bo.size, 0);
 
   clear_bo_maps();
@@ -393,7 +456,7 @@ aubinator_init(uint16_t aub_pci_id, const char *app_name)
batch_flags |= GEN_BATCH_DECODE_FLOATS;
 
gen_batch_decode_ctx_init(_ctx, , outfile, batch_flags,
- xml_path, get_ggtt_batch_bo, NULL, NULL);
+ xml_path, NULL, NULL, NULL);
batch_ctx.max_vbo_decoded_lines = max_vbo_lines;
 
char *color = GREEN_HEADER, *reset_color = NORMAL;
@@ -533,12 +596,20 @@ handle_memtrace_reg_write(uint32_t *p)
uint32_t ring_buffer_head = context[5];
uint32_t ring_buffer_tail = context[7];
uint32_t ring_buffer_start = context[9];
+   uint64_t pml4 = (uint64_t)context[49] << 32 | context[51];
 
struct gen_batch_decode_bo ring_bo = get_ggtt_batch_bo(NULL,
   ring_buffer_start);
assert(ring_bo.size > 0);
void *commands = (uint8_t *)ring_bo.map + (ring_bo.addr - 
ring_buffer_start);
 
+   if (context_descriptor & 0x100 /* ppgtt */) {
+  batch_ctx.get_bo = get_ppgtt_batch_bo;
+  batch_ctx.user_data = 
+   } else {
+  batch_ctx.get_bo = get_ggtt_batch_bo;
+   }
+
(void)engine; /* TODO */
gen_print_batch(_ctx, commands, ring_buffer_tail - ring_buffer_head,
0);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 05/16] util: rb-tree: A simple, invasive, red-black tree

2018-06-21 Thread Lionel Landwerlin
From: Jason Ekstrand 

This is a simple, invasive, liberally licensed red-black tree
implementation. It's an invasive data structure similar to the
Linux kernel linked-list where the intention is that you embed a
rb_node struct the data structure you intend to put into the
tree.

The implementation is mostly based on the one in "Introduction to
Algorithms", third edition, by Cormen, Leiserson, Rivest, and
Stein. There were a few other key design points:

 * It's an invasive data structure similar to the [Linux kernel
   linked list].

 * It uses NULL for leaves instead of a sentinel. This means a few
   algorithms differ a small bit from the ones in "Introduction to
   Algorithms".

 * All search operations are inlined so that the compiler can
   optimize away the function pointer call.
---
 src/util/Makefile.sources |   2 +
 src/util/meson.build  |   2 +
 src/util/rb_tree.c| 421 ++
 src/util/rb_tree.h| 269 
 4 files changed, 694 insertions(+)
 create mode 100644 src/util/rb_tree.c
 create mode 100644 src/util/rb_tree.h

diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
index 534520ce763..37eb0880e35 100644
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -30,6 +30,8 @@ MESA_UTIL_FILES := \
ralloc.h \
rand_xor.c \
rand_xor.h \
+   rb_tree.c \
+   rb_tree.h \
register_allocate.c \
register_allocate.h \
rgtc.c \
diff --git a/src/util/meson.build b/src/util/meson.build
index c777984e28d..62425bb237b 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -54,6 +54,8 @@ files_mesa_util = files(
   'ralloc.h',
   'rand_xor.c',
   'rand_xor.h',
+  'rb_tree.c',
+  'rb_tree.h',
   'register_allocate.c',
   'register_allocate.h',
   'rgtc.c',
diff --git a/src/util/rb_tree.c b/src/util/rb_tree.c
new file mode 100644
index 000..a86fa31a809
--- /dev/null
+++ b/src/util/rb_tree.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright © 2017 Jason Ekstrand
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "rb_tree.h"
+
+/** \file rb_tree.c
+ *
+ * An implementation of a red-black tree
+ *
+ * This file implements the guts of a red-black tree.  The implementation
+ * is mostly based on the one in "Introduction to Algorithms", third
+ * edition, by Cormen, Leiserson, Rivest, and Stein.  The primary
+ * divergence in our algorithms from those presented in CLRS is that we use
+ * NULL for the leaves instead of a sentinel.  This means we have to do a
+ * tiny bit more tracking in our implementation of delete but it makes the
+ * algorithms far more explicit than stashing stuff in the sentinel.
+ */
+
+#include 
+#include 
+#include 
+
+static bool
+rb_node_is_black(struct rb_node *n)
+{
+/* NULL nodes are leaves and therefore black */
+return (n == NULL) || (n->parent & 1);
+}
+
+static bool
+rb_node_is_red(struct rb_node *n)
+{
+return !rb_node_is_black(n);
+}
+
+static void
+rb_node_set_black(struct rb_node *n)
+{
+n->parent |= 1;
+}
+
+static void
+rb_node_set_red(struct rb_node *n)
+{
+n->parent &= ~1ull;
+}
+
+static void
+rb_node_copy_color(struct rb_node *dst, struct rb_node *src)
+{
+dst->parent = (dst->parent & ~1ull) | (src->parent & 1);
+}
+
+static void
+rb_node_set_parent(struct rb_node *n, struct rb_node *p)
+{
+n->parent = (n->parent & 1) | (uintptr_t)p;
+}
+
+static struct rb_node *
+rb_node_minimum(struct rb_node *node)
+{
+while (node->left)
+node = node->left;
+return node;
+}
+
+static struct rb_node *
+rb_node_maximum(struct rb_node *node)
+{
+while (node->right)
+node = node->right;
+return node;
+}
+
+void
+rb_tree_init(struct rb_tree *T)
+{
+T->root = NULL;
+}
+
+/**
+ * Replace the subtree of T rooted at u with the subtree rooted at v
+ *
+ * This is called RB-transplant in CLRS.
+ *
+ * The node to be replaced is assumed to be a 

[Mesa-dev] [PATCH v3 00/16] intel: aubinator: handle ppgtt & softpin

2018-06-21 Thread Lionel Landwerlin
Hi all,

Some fixes spotted by Rafael.

Thanks,

Jason Ekstrand (1):
  util: rb-tree: A simple, invasive, red-black tree

Lionel Landwerlin (12):
  intel: aubinator: remove unused variables
  intel: aubinator: remove standard input processing option
  intel: aubinator: rework register writes handling
  intel: aubinator: drop the 1Tb GTT mapping
  intel: aubinator: handle GGTT mappings
  intel: batch-decoder: don't asks for constant BO until decoding
  intel: batch-decoder: add missing return line
  intel: tools: update intel_aub.h
  intel: tools: import intel_aubdump
  intel: devinfo: add simulator id
  intel: intel_dump_gpu: use simulator id in captures
  intel: tools: remove drm-uapi defines

Scott D Phillips (3):
  intel/tools/aubinator: aubinate ppgtt aubs
  intel/batch-decoder: handle non-contiguous binding table / surface
state
  intel: tools: dump-gpu: dump 48-bit addresses

 src/intel/Makefile.am|2 +
 src/intel/common/gen_batch_decoder.c |   37 +-
 src/intel/dev/gen_device_info.c  |   47 +-
 src/intel/dev/gen_device_info.h  |5 +
 src/intel/tools/aubinator.c  |  625 +
 src/intel/tools/intel_aub.h  |   27 +
 src/intel/tools/intel_dump_gpu.c | 1268 ++
 src/intel/tools/intel_dump_gpu.in|  107 +++
 src/intel/tools/meson.build  |   18 +
 src/util/Makefile.sources|2 +
 src/util/meson.build |2 +
 src/util/rb_tree.c   |  421 +
 src/util/rb_tree.h   |  269 ++
 13 files changed, 2635 insertions(+), 195 deletions(-)
 create mode 100644 src/intel/tools/intel_dump_gpu.c
 create mode 100755 src/intel/tools/intel_dump_gpu.in
 create mode 100644 src/util/rb_tree.c
 create mode 100644 src/util/rb_tree.h

--
2.17.1
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 12/16] intel: tools: import intel_aubdump

2018-06-21 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
Acked-by: Rafael Antognolli 
---
 src/intel/Makefile.am |2 +
 src/intel/tools/intel_dump_gpu.c  | 1313 +
 src/intel/tools/intel_dump_gpu.in |  107 +++
 src/intel/tools/meson.build   |   18 +
 4 files changed, 1440 insertions(+)
 create mode 100644 src/intel/tools/intel_dump_gpu.c
 create mode 100755 src/intel/tools/intel_dump_gpu.in

diff --git a/src/intel/Makefile.am b/src/intel/Makefile.am
index 3e098a7ac9b..8448640983f 100644
--- a/src/intel/Makefile.am
+++ b/src/intel/Makefile.am
@@ -71,6 +71,8 @@ EXTRA_DIST = \
isl/meson.build \
tools/intel_sanitize_gpu.c \
tools/intel_sanitize_gpu.in \
+   tools/intel_dump_gpu.c \
+   tools/intel_dump_gpu.in \
tools/meson.build \
vulkan/meson.build \
meson.build
diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
new file mode 100644
index 000..86c133da433
--- /dev/null
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -0,0 +1,1313 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "intel_aub.h"
+
+#include "dev/gen_device_info.h"
+#include "util/macros.h"
+
+#ifndef ALIGN
+#define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
+#endif
+
+#define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1))
+#define MI_LRI_FORCE_POSTED   (1<<12)
+
+#define MI_BATCH_BUFFER_END (0xA << 23)
+
+#define min(a, b) ({\
+ __typeof(a) _a = (a);  \
+ __typeof(b) _b = (b);  \
+ _a < _b ? _a : _b; \
+  })
+
+#define HWS_PGA_RCSUNIT  0x02080
+#define HWS_PGA_VCSUNIT0   0x12080
+#define HWS_PGA_BCSUNIT  0x22080
+
+#define GFX_MODE_RCSUNIT   0x0229c
+#define GFX_MODE_VCSUNIT0   0x1229c
+#define GFX_MODE_BCSUNIT   0x2229c
+
+#define EXECLIST_SUBMITPORT_RCSUNIT   0x02230
+#define EXECLIST_SUBMITPORT_VCSUNIT0   0x12230
+#define EXECLIST_SUBMITPORT_BCSUNIT   0x22230
+
+#define EXECLIST_STATUS_RCSUNIT  0x02234
+#define EXECLIST_STATUS_VCSUNIT0   0x12234
+#define EXECLIST_STATUS_BCSUNIT  0x22234
+
+#define EXECLIST_SQ_CONTENTS0_RCSUNIT   0x02510
+#define EXECLIST_SQ_CONTENTS0_VCSUNIT0   0x12510
+#define EXECLIST_SQ_CONTENTS0_BCSUNIT   0x22510
+
+#define EXECLIST_CONTROL_RCSUNIT   0x02550
+#define EXECLIST_CONTROL_VCSUNIT0   0x12550
+#define EXECLIST_CONTROL_BCSUNIT   0x22550
+
+#define MEMORY_MAP_SIZE (64 /* MiB */ * 1024 * 1024)
+
+#define PTE_SIZE 4
+#define GEN8_PTE_SIZE 8
+
+#define NUM_PT_ENTRIES (ALIGN(MEMORY_MAP_SIZE, 4096) / 4096)
+#define PT_SIZE ALIGN(NUM_PT_ENTRIES * GEN8_PTE_SIZE, 4096)
+
+#define RING_SIZE (1 * 4096)
+#define PPHWSP_SIZE (1 * 4096)
+#define GEN10_LR_CONTEXT_RENDER_SIZE   (19 * 4096)
+#define GEN8_LR_CONTEXT_OTHER_SIZE   (2 * 4096)
+
+#define STATIC_GGTT_MAP_START 0
+
+#define RENDER_RING_ADDR STATIC_GGTT_MAP_START
+#define RENDER_CONTEXT_ADDR (RENDER_RING_ADDR + RING_SIZE)
+
+#define BLITTER_RING_ADDR (RENDER_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN10_LR_CONTEXT_RENDER_SIZE)
+#define BLITTER_CONTEXT_ADDR (BLITTER_RING_ADDR + RING_SIZE)
+
+#define VIDEO_RING_ADDR (BLITTER_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN8_LR_CONTEXT_OTHER_SIZE)
+#define VIDEO_CONTEXT_ADDR (VIDEO_RING_ADDR + RING_SIZE)
+
+#define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + 
GEN8_LR_CONTEXT_OTHER_SIZE)
+#define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START)
+
+#define CONTEXT_FLAGS (0x229)   /* Normal Priority | L3-LLC Coherency |
+   Legacy Context with no 64 bit VA support | 
Valid */
+
+#define RENDER_CONTEXT_DESCRIPTOR  ((uint64_t)1 << 

[Mesa-dev] [PATCH v3 11/16] intel: tools: update intel_aub.h

2018-06-21 Thread Lionel Landwerlin
Scott added new stuff in IGT.

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/tools/intel_aub.h | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h
index 5f0aba8e68e..9ca548edaf3 100644
--- a/src/intel/tools/intel_aub.h
+++ b/src/intel/tools/intel_aub.h
@@ -49,6 +49,12 @@
 #define CMD_AUB(7 << 29)
 
 #define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16))
+
+#define CMD_MEM_TRACE_REGISTER_POLL(CMD_AUB | (0x2e << 23) | (0x02 << 16))
+#define CMD_MEM_TRACE_REGISTER_WRITE   (CMD_AUB | (0x2e << 23) | (0x03 << 16))
+#define CMD_MEM_TRACE_MEMORY_WRITE (CMD_AUB | (0x2e << 23) | (0x06 << 16))
+#define CMD_MEM_TRACE_VERSION  (CMD_AUB | (0x2e << 23) | (0x0e << 16))
+
 /* DW1 */
 # define AUB_HEADER_MAJOR_SHIFT24
 # define AUB_HEADER_MINOR_SHIFT16
@@ -92,8 +98,28 @@
 #define AUB_TRACE_MEMTYPE_PCI  (3 << 16)
 #define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16)
 
+#define AUB_MEM_TRACE_VERSION_FILE_VERSION 1
+
 /* DW2 */
 
+#define AUB_MEM_TRACE_VERSION_DEVICE_MASK  0xff00
+#define AUB_MEM_TRACE_VERSION_DEVICE_CNL   (15 << 8)
+
+#define AUB_MEM_TRACE_VERSION_METHOD_MASK  0x000c
+#define AUB_MEM_TRACE_VERSION_METHOD_PHY   (1 << 18)
+
+#define AUB_MEM_TRACE_REGISTER_SIZE_MASK   0x000f
+#define AUB_MEM_TRACE_REGISTER_SIZE_DWORD  (2 << 16)
+
+#define AUB_MEM_TRACE_REGISTER_SPACE_MASK  0xf000
+#define AUB_MEM_TRACE_REGISTER_SPACE_MMIO  (0 << 28)
+
+/* DW3 */
+
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_MASK0xf000
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_LOCAL   (1 << 28)
+#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY  (4 << 28)
+
 /**
  * aub_state_struct_type enum values are encoded with the top 16 bits
  * representing the type to be delivered to the .aub file, and the bottom 16
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 09/16] intel: batch-decoder: don't asks for constant BO until decoding

2018-06-21 Thread Lionel Landwerlin
With PPGTT mappings, our aubinator implementation can be quite slow if
we request a buffer that doesn't exist. Instead of doing a PPGTT walk
for invalid addresses (0 lengths), wait until we're sure we want to
decode the data.

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/common/gen_batch_decoder.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/intel/common/gen_batch_decoder.c 
b/src/intel/common/gen_batch_decoder.c
index 2b6978da92d..81d8298c28b 100644
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -562,9 +562,8 @@ decode_3dstate_constant(struct gen_batch_decode_ctx *ctx, 
const uint32_t *p)
struct gen_group *body =
   gen_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
 
-   uint32_t read_length[4];
-   struct gen_batch_decode_bo buffer[4];
-   memset(buffer, 0, sizeof(buffer));
+   uint32_t read_length[4] = {0};
+   uint64_t read_addr[4];
 
struct gen_field_iterator outer;
gen_field_iterator_init(, inst, p, 0, false);
@@ -581,18 +580,24 @@ decode_3dstate_constant(struct gen_batch_decode_ctx *ctx, 
const uint32_t *p)
  if (sscanf(iter.name, "Read Length[%d]", ) == 1) {
 read_length[idx] = iter.raw_value;
  } else if (sscanf(iter.name, "Buffer[%d]", ) == 1) {
-buffer[idx] = ctx_get_bo(ctx, iter.raw_value);
+read_addr[idx] = iter.raw_value;
  }
   }
 
   for (int i = 0; i < 4; i++) {
- if (read_length[i] == 0 || buffer[i].map == NULL)
+ if (read_length[i] == 0)
 continue;
 
+ struct gen_batch_decode_bo buffer = ctx_get_bo(ctx, read_addr[i]);
+ if (!buffer.map) {
+fprintf(ctx->fp, "constant buffer %d unavailable\n", i);
+continue;
+ }
+
  unsigned size = read_length[i] * 32;
  fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
 
- ctx_print_buffer(ctx, buffer[i], size, 0, -1);
+ ctx_print_buffer(ctx, buffer, size, 0, -1);
   }
}
 }
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 10/16] intel: batch-decoder: add missing return line

2018-06-21 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/common/gen_batch_decoder.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/common/gen_batch_decoder.c 
b/src/intel/common/gen_batch_decoder.c
index 81d8298c28b..fc0ff95a476 100644
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -854,7 +854,7 @@ gen_print_batch(struct gen_batch_decode_ctx *ctx,
  }
 
  if (next_batch.map == NULL) {
-fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable",
+fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
 next_batch.addr);
  }
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 04/16] intel: aubinator: drop the 1Tb GTT mapping

2018-06-21 Thread Lionel Landwerlin
Now that we're softpinning the address of our BOs in anv & i965, the
addresses selected start at the top of the addressing space. This is a
problem for the current implementation of aubinator which uses only a
40bit mmapped address space.

This change keeps track of all the memory writes from the aub file and
fetch them on request by the batch decoder. As a result we can get rid
of the 1<<40 mmapped address space and only rely on the mmap aub file
\o/

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/tools/aubinator.c | 130 +---
 1 file changed, 75 insertions(+), 55 deletions(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index d0b26219eb4..3b04ba3f431 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -37,10 +37,12 @@
 #include 
 #include 
 
+#include "util/list.h"
 #include "util/macros.h"
 
 #include "common/gen_decoder.h"
 #include "common/gen_disasm.h"
+#include "common/gen_gem.h"
 #include "intel_aub.h"
 
 /* Below is the only command missing from intel_aub.h in libdrm
@@ -68,13 +70,45 @@ char *input_file = NULL, *xml_path = NULL;
 struct gen_device_info devinfo;
 struct gen_batch_decode_ctx batch_ctx;
 
-uint64_t gtt_size, gtt_end;
-void *gtt;
+struct bo_map {
+   struct list_head link;
+   struct gen_batch_decode_bo bo;
+};
+
+static struct list_head maps;
 
 FILE *outfile;
 
 struct brw_instruction;
 
+static void
+add_gtt_bo_map(struct gen_batch_decode_bo bo)
+{
+   struct bo_map *m = calloc(1, sizeof(*m));
+
+   m->bo = bo;
+   list_add(>link, );
+}
+
+static void
+clear_bo_maps(void)
+{
+   list_for_each_entry_safe(struct bo_map, i, , link) {
+  list_del(>link);
+  free(i);
+   }
+}
+
+static struct gen_batch_decode_bo
+get_gen_batch_bo(void *user_data, uint64_t address)
+{
+   list_for_each_entry(struct bo_map, i, , link)
+  if (i->bo.addr <= address && i->bo.addr + i->bo.size > address)
+ return i->bo;
+
+   return (struct gen_batch_decode_bo) { .map = NULL };
+}
+
 #define GEN_ENGINE_RENDER 1
 #define GEN_ENGINE_BLITTER 2
 
@@ -84,26 +118,23 @@ handle_trace_block(uint32_t *p)
int operation = p[1] & AUB_TRACE_OPERATION_MASK;
int type = p[1] & AUB_TRACE_TYPE_MASK;
int address_space = p[1] & AUB_TRACE_ADDRESS_SPACE_MASK;
-   uint64_t offset = p[3];
-   uint32_t size = p[4];
int header_length = p[0] & 0x;
-   uint32_t *data = p + header_length + 2;
int engine = GEN_ENGINE_RENDER;
-
-   if (devinfo.gen >= 8)
-  offset += (uint64_t) p[5] << 32;
+   struct gen_batch_decode_bo bo = {
+  .map = p + header_length + 2,
+  /* Addresses written by aubdump here are in canonical form but the batch
+   * decoder always gives us addresses with the top 16bits zeroed, so do
+   * the same here.
+   */
+  .addr = gen_48b_address((devinfo.gen >= 8 ? ((uint64_t) p[5] << 32) : 0) 
|
+  ((uint64_t) p[3])),
+  .size = p[4],
+   };
 
switch (operation) {
case AUB_TRACE_OP_DATA_WRITE:
-  if (address_space != AUB_TRACE_MEMTYPE_GTT)
- break;
-  if (gtt_size < offset + size) {
- fprintf(stderr, "overflow gtt space: %s\n", strerror(errno));
- exit(EXIT_FAILURE);
-  }
-  memcpy((char *) gtt + offset, data, size);
-  if (gtt_end < offset + size)
- gtt_end = offset + size;
+  if (address_space == AUB_TRACE_MEMTYPE_GTT)
+ add_gtt_bo_map(bo);
   break;
case AUB_TRACE_OP_COMMAND_WRITE:
   switch (type) {
@@ -119,27 +150,13 @@ handle_trace_block(uint32_t *p)
   }
 
   (void)engine; /* TODO */
-  gen_print_batch(_ctx, data, size, 0);
+  gen_print_batch(_ctx, bo.map, bo.size, 0);
 
-  gtt_end = 0;
+  clear_bo_maps();
   break;
}
 }
 
-static struct gen_batch_decode_bo
-get_gen_batch_bo(void *user_data, uint64_t address)
-{
-   if (address > gtt_end)
-  return (struct gen_batch_decode_bo) { .map = NULL };
-
-   /* We really only have one giant address range */
-   return (struct gen_batch_decode_bo) {
-  .addr = 0,
-  .map = gtt,
-  .size = gtt_size
-   };
-}
-
 static void
 aubinator_init(uint16_t aub_pci_id, const char *app_name)
 {
@@ -289,34 +306,44 @@ handle_memtrace_reg_write(uint32_t *p)
}
 
const uint32_t pphwsp_size = 4096;
-   uint32_t *context = (uint32_t*)(gtt + (context_descriptor & 0xf000) + 
pphwsp_size);
+   uint32_t pphwsp_addr = context_descriptor & 0xf000;
+   struct gen_batch_decode_bo pphwsp_bo = get_gen_batch_bo(NULL, pphwsp_addr);
+   uint32_t *context = (uint32_t *)((uint8_t *)pphwsp_bo.map +
+(pphwsp_bo.addr - pphwsp_addr) +
+pphwsp_size);
+
uint32_t ring_buffer_head = context[5];
uint32_t ring_buffer_tail = context[7];
uint32_t ring_buffer_start = context[9];
-   uint32_t *commands = (uint32_t*)((uint8_t*)gtt + ring_buffer_start + 
ring_buffer_head);
+
+   

[Mesa-dev] [PATCH v3 02/16] intel: aubinator: remove standard input processing option

2018-06-21 Thread Lionel Landwerlin
On a follow up commit in this series, we stop copying the data from
the mmap'ed file into our big gtt mmap, and start referencing data in
it directly. So reallocating the read buffer and adding more data from
stdin wouldn't work. For that reason, let's stop supporting stdin
process.

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/tools/aubinator.c | 102 +---
 1 file changed, 12 insertions(+), 90 deletions(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 0e2fd5fb34a..92db1ed0b31 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -334,17 +334,6 @@ aub_file_open(const char *filename)
return file;
 }
 
-static struct aub_file *
-aub_file_stdin(void)
-{
-   struct aub_file *file;
-
-   file = calloc(1, sizeof *file);
-   file->stream = stdin;
-
-   return file;
-}
-
 #define TYPE(dw)   (((dw) >> 29) & 7)
 #define OPCODE(dw) (((dw) >> 23) & 0x3f)
 #define SUBOPCODE(dw)  (((dw) >> 16) & 0x7f)
@@ -382,8 +371,7 @@ aub_file_decode_batch(struct aub_file *file)
uint32_t *p, h, *new_cursor;
int header_length, bias;
 
-   if (file->end - file->cursor < 1)
-  return AUB_ITEM_DECODE_NEED_MORE_DATA;
+   assert(file->cursor < file->end);
 
p = file->cursor;
h = *p;
@@ -405,13 +393,11 @@ aub_file_decode_batch(struct aub_file *file)
 
new_cursor = p + header_length + bias;
if ((h & 0x) == MAKE_HEADER(TYPE_AUB, OPCODE_AUB, SUBOPCODE_BLOCK)) 
{
-  if (file->end - file->cursor < 4)
- return AUB_ITEM_DECODE_NEED_MORE_DATA;
+  assert(file->end - file->cursor >= 4);
   new_cursor += p[4] / 4;
}
 
-   if (new_cursor > file->end)
-  return AUB_ITEM_DECODE_NEED_MORE_DATA;
+   assert(new_cursor <= file->end);
 
switch (h & 0x) {
case MAKE_HEADER(TYPE_AUB, OPCODE_AUB, SUBOPCODE_HEADER):
@@ -452,48 +438,6 @@ aub_file_more_stuff(struct aub_file *file)
return file->cursor < file->end || (file->stream && !feof(file->stream));
 }
 
-#define AUB_READ_BUFFER_SIZE (4096)
-#define MAX(a, b) ((a) < (b) ? (b) : (a))
-
-static void
-aub_file_data_grow(struct aub_file *file)
-{
-   size_t old_size = (file->mem_end - file->map) * 4;
-   size_t new_size = MAX(old_size * 2, AUB_READ_BUFFER_SIZE);
-   uint32_t *new_start = realloc(file->map, new_size);
-
-   file->cursor = new_start + (file->cursor - file->map);
-   file->end = new_start + (file->end - file->map);
-   file->map = new_start;
-   file->mem_end = file->map + (new_size / 4);
-}
-
-static bool
-aub_file_data_load(struct aub_file *file)
-{
-   size_t r;
-
-   if (file->stream == NULL)
-  return false;
-
-   /* First remove any consumed data */
-   if (file->cursor > file->map) {
-  memmove(file->map, file->cursor,
-  (file->end - file->cursor) * 4);
-  file->end -= file->cursor - file->map;
-  file->cursor = file->map;
-   }
-
-   /* Then load some new data in */
-   if ((file->mem_end - file->end) < (AUB_READ_BUFFER_SIZE / 4))
-  aub_file_data_grow(file);
-
-   r = fread(file->end, 1, (file->mem_end - file->end) * 4, file->stream);
-   file->end += r / 4;
-
-   return r != 0;
-}
-
 static void
 setup_pager(void)
 {
@@ -525,9 +469,8 @@ static void
 print_help(const char *progname, FILE *file)
 {
fprintf(file,
-   "Usage: %s [OPTION]... [FILE]\n"
-   "Decode aub file contents from either FILE or the standard 
input.\n\n"
-   "A valid --gen option must be provided.\n\n"
+   "Usage: %s [OPTION]... FILE\n"
+   "Decode aub file contents from FILE.\n\n"
"  --help display this help and exit\n"
"  --gen=platform decode for given platform (3 letter 
platform name)\n"
"  --headers  decode only command headers\n"
@@ -596,14 +539,14 @@ int main(int argc, char *argv[])
   }
}
 
-   if (help || argc == 1) {
+   if (optind < argc)
+  input_file = argv[optind];
+
+   if (help || !input_file) {
   print_help(argv[0], stderr);
   exit(0);
}
 
-   if (optind < argc)
-  input_file = argv[optind];
-
/* Do this before we redirect stdout to pager. */
if (option_color == COLOR_AUTO)
   option_color = isatty(1) ? COLOR_ALWAYS : COLOR_NEVER;
@@ -611,11 +554,6 @@ int main(int argc, char *argv[])
if (isatty(1) && pager)
   setup_pager();
 
-   if (input_file == NULL)
-  file = aub_file_stdin();
-   else
-  file = aub_file_open(input_file);
-
/* mmap a terabyte for our gtt space. */
gtt_size = 1ull << 40;
gtt = mmap(NULL, gtt_size, PROT_READ | PROT_WRITE,
@@ -625,26 +563,10 @@ int main(int argc, char *argv[])
   exit(EXIT_FAILURE);
}
 
-   while (aub_file_more_stuff(file)) {
-  switch (aub_file_decode_batch(file)) {
-  case AUB_ITEM_DECODE_OK:
- break;
-  case AUB_ITEM_DECODE_NEED_MORE_DATA:
- if (!file->stream) {
-file->cursor = file->end;
-   

[Mesa-dev] [PATCH v3 03/16] intel: aubinator: rework register writes handling

2018-06-21 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/tools/aubinator.c | 82 -
 1 file changed, 54 insertions(+), 28 deletions(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 92db1ed0b31..d0b26219eb4 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -224,46 +224,72 @@ handle_memtrace_version(uint32_t *p)
 static void
 handle_memtrace_reg_write(uint32_t *p)
 {
+   static struct execlist_regs {
+  uint32_t render_elsp[4];
+  int render_elsp_index;
+  uint32_t blitter_elsp[4];
+  int blitter_elsp_index;
+   } state = {};
+
uint32_t offset = p[1];
uint32_t value = p[5];
+
int engine;
-   static int render_elsp_writes = 0;
-   static int blitter_elsp_writes = 0;
-   static int render_elsq0 = 0;
-   static int blitter_elsq0 = 0;
-   uint8_t *pphwsp;
-
-   if (offset == 0x2230) {
-  render_elsp_writes++;
+   uint64_t context_descriptor;
+
+   switch (offset) {
+   case 0x2230: /* render elsp */
+  state.render_elsp[state.render_elsp_index++] = value;
+  if (state.render_elsp_index < 4)
+ return;
+
+  state.render_elsp_index = 0;
   engine = GEN_ENGINE_RENDER;
-   } else if (offset == 0x22230) {
-  blitter_elsp_writes++;
+  context_descriptor = (uint64_t)state.render_elsp[2] << 32 |
+ state.render_elsp[3];
+  break;
+   case 0x22230: /* blitter elsp */
+  state.blitter_elsp[state.blitter_elsp_index++] = value;
+  if (state.blitter_elsp_index < 4)
+ return;
+
+  state.blitter_elsp_index = 0;
   engine = GEN_ENGINE_BLITTER;
-   } else if (offset == 0x2510) {
-  render_elsq0 = value;
-   } else if (offset == 0x22510) {
-  blitter_elsq0 = value;
-   } else if (offset == 0x2550 || offset == 0x22550) {
-  /* nothing */;
-   } else {
+  context_descriptor = (uint64_t)state.blitter_elsp[2] << 32 |
+ state.blitter_elsp[3];
+  break;
+   case 0x2510: /* render elsq0 lo */
+  state.render_elsp[3] = value;
   return;
-   }
-
-   if (render_elsp_writes > 3 || blitter_elsp_writes > 3) {
-  render_elsp_writes = blitter_elsp_writes = 0;
-  pphwsp = (uint8_t*)gtt + (value & 0xf000);
-   } else if (offset == 0x2550) {
+  break;
+   case 0x2514: /* render elsq0 hi */
+  state.render_elsp[2] = value;
+  return;
+  break;
+   case 0x22510: /* blitter elsq0 lo */
+  state.blitter_elsp[3] = value;
+  return;
+  break;
+   case 0x22514: /* blitter elsq0 hi */
+  state.blitter_elsp[2] = value;
+  return;
+  break;
+   case 0x2550: /* render elsc */
   engine = GEN_ENGINE_RENDER;
-  pphwsp = (uint8_t*)gtt + (render_elsq0 & 0xf000);
-   } else if (offset == 0x22550) {
+  context_descriptor = (uint64_t)state.render_elsp[2] << 32 |
+ state.render_elsp[3];
+  break;
+   case 0x22550: /* blitter elsc */
   engine = GEN_ENGINE_BLITTER;
-  pphwsp = (uint8_t*)gtt + (blitter_elsq0 & 0xf000);
-   } else {
+  context_descriptor = (uint64_t)state.blitter_elsp[2] << 32 |
+ state.blitter_elsp[3];
+  break;
+   default:
   return;
}
 
const uint32_t pphwsp_size = 4096;
-   uint32_t *context = (uint32_t*)(pphwsp + pphwsp_size);
+   uint32_t *context = (uint32_t*)(gtt + (context_descriptor & 0xf000) + 
pphwsp_size);
uint32_t ring_buffer_head = context[5];
uint32_t ring_buffer_tail = context[7];
uint32_t ring_buffer_start = context[9];
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 01/16] intel: aubinator: remove unused variables

2018-06-21 Thread Lionel Landwerlin
These memory offsets are stored in the gen_batch_decode_ctx.

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Rafael Antognolli 
---
 src/intel/tools/aubinator.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 77676e9c670..0e2fd5fb34a 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -70,11 +70,6 @@ struct gen_batch_decode_ctx batch_ctx;
 
 uint64_t gtt_size, gtt_end;
 void *gtt;
-uint64_t general_state_base;
-uint64_t surface_state_base;
-uint64_t dynamic_state_base;
-uint64_t instruction_base;
-uint64_t instruction_bound;
 
 FILE *outfile;
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] meson: Correct behavior of vdpau=auto

2018-06-21 Thread Dylan Baker
Quoting Eric Engestrom (2018-06-21 08:43:56)
> On Thursday, 2018-06-21 08:38:05 -0700, Dylan Baker wrote:
> > Currently if vdpau is set to auto, it will be disabled only in cases
> > where gallium is disabled or the host OS is not supported (mac, haiku,
> > windows). However on (for example) Linux if libvdpau is not installed
> > then the build will error because of the unmet dependency. This corrects
> > auto to do the right thing, and not error if libvdpau is not installed.
> > 
> > Fixes: 992af0a4b8224bdb4809e01c2f00d2f32546aee5
> >("meson: dedup gallium-vdpau logic")
> > Signed-off-by: Dylan Baker 
> 
> Oops, that was me :]

I reviewed it and didn't spot it either. I only noticed when I re-installed my
system :)

> 
> Series is
> Reviewed-by: Eric Engestrom 
> 

Thanks!


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] meson: Correct behavior of vdpau=auto

2018-06-21 Thread Eric Engestrom
On Thursday, 2018-06-21 08:38:05 -0700, Dylan Baker wrote:
> Currently if vdpau is set to auto, it will be disabled only in cases
> where gallium is disabled or the host OS is not supported (mac, haiku,
> windows). However on (for example) Linux if libvdpau is not installed
> then the build will error because of the unmet dependency. This corrects
> auto to do the right thing, and not error if libvdpau is not installed.
> 
> Fixes: 992af0a4b8224bdb4809e01c2f00d2f32546aee5
>("meson: dedup gallium-vdpau logic")
> Signed-off-by: Dylan Baker 

Oops, that was me :]

Series is
Reviewed-by: Eric Engestrom 

> ---
>  meson.build | 17 +
>  1 file changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/meson.build b/meson.build
> index a2d59776c05..429865c9442 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -398,16 +398,17 @@ elif not (with_gallium_r300 or with_gallium_r600 or 
> with_gallium_radeonsi or
>else
>  _vdpau = 'false'
>endif
> -elif _vdpau == 'auto'
> -  _vdpau = 'true'
>  endif
> -with_gallium_vdpau = _vdpau == 'true'
>  dep_vdpau = null_dep
> -if with_gallium_vdpau
> -  dep_vdpau = dependency('vdpau', version : '>= 1.1')
> -  dep_vdpau = declare_dependency(
> -compile_args : run_command(prog_pkgconfig, ['vdpau', 
> '--cflags']).stdout().split()
> -  )
> +with_gallium_vdpau = false
> +if _vdpau != 'false'
> +  dep_vdpau = dependency('vdpau', version : '>= 1.1', required : _vdpau == 
> 'true')
> +  if dep_vdpau.found()
> +dep_vdpau = declare_dependency(
> +  compile_args : run_command(prog_pkgconfig, ['vdpau', 
> '--cflags']).stdout().split()
> +)
> +with_gallium_vdpau = true
> +  endif
>  endif
>  
>  if with_gallium_vdpau
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] meson: Fix auto option for va

2018-06-21 Thread Dylan Baker
The same as the previous two patches, but for the libva state tracker.

Fixes: 724916c8a84b5bba8f880f17da936585d52c97b6
   ("meson: dedup gallium-xvmc logic")
Signed-off-by: Dylan Baker 
---
 meson.build | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/meson.build b/meson.build
index 82e2b535254..e88616c9e82 100644
--- a/meson.build
+++ b/meson.build
@@ -559,13 +559,16 @@ elif not (with_gallium_r600 or with_gallium_radeonsi or 
with_gallium_nouveau)
 elif _va == 'auto'
   _va = 'true'
 endif
-with_gallium_va = _va == 'true'
+with_gallium_va = false
 dep_va = null_dep
-if with_gallium_va
-  dep_va = dependency('libva', version : '>= 0.39.0')
-  dep_va_headers = declare_dependency(
-compile_args : run_command(prog_pkgconfig, ['libva', 
'--cflags']).stdout().split()
-  )
+if _va != 'false'
+  dep_va = dependency('libva', version : '>= 0.38.0', required : _va == 'true')
+  if dep_va.found()
+dep_va_headers = declare_dependency(
+  compile_args : run_command(prog_pkgconfig, ['libva', 
'--cflags']).stdout().split()
+)
+with_gallium_va = true
+  endif
 endif
 
 va_drivers_path = get_option('va-libs-path')
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] meson: Correct behavior of vdpau=auto

2018-06-21 Thread Dylan Baker
Currently if vdpau is set to auto, it will be disabled only in cases
where gallium is disabled or the host OS is not supported (mac, haiku,
windows). However on (for example) Linux if libvdpau is not installed
then the build will error because of the unmet dependency. This corrects
auto to do the right thing, and not error if libvdpau is not installed.

Fixes: 992af0a4b8224bdb4809e01c2f00d2f32546aee5
   ("meson: dedup gallium-vdpau logic")
Signed-off-by: Dylan Baker 
---
 meson.build | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/meson.build b/meson.build
index a2d59776c05..429865c9442 100644
--- a/meson.build
+++ b/meson.build
@@ -398,16 +398,17 @@ elif not (with_gallium_r300 or with_gallium_r600 or 
with_gallium_radeonsi or
   else
 _vdpau = 'false'
   endif
-elif _vdpau == 'auto'
-  _vdpau = 'true'
 endif
-with_gallium_vdpau = _vdpau == 'true'
 dep_vdpau = null_dep
-if with_gallium_vdpau
-  dep_vdpau = dependency('vdpau', version : '>= 1.1')
-  dep_vdpau = declare_dependency(
-compile_args : run_command(prog_pkgconfig, ['vdpau', 
'--cflags']).stdout().split()
-  )
+with_gallium_vdpau = false
+if _vdpau != 'false'
+  dep_vdpau = dependency('vdpau', version : '>= 1.1', required : _vdpau == 
'true')
+  if dep_vdpau.found()
+dep_vdpau = declare_dependency(
+  compile_args : run_command(prog_pkgconfig, ['vdpau', 
'--cflags']).stdout().split()
+)
+with_gallium_vdpau = true
+  endif
 endif
 
 if with_gallium_vdpau
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] meson: Fix auto option for xvmc

2018-06-21 Thread Dylan Baker
This fixes the same problem as the previous patch did for vdpau, but for
xvmc.

Fixes: 724916c8a84b5bba8f880f17da936585d52c97b6
   ("meson: dedup gallium-xvmc logic")
Signed-off-by: Dylan Baker 
---
 meson.build | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/meson.build b/meson.build
index 429865c9442..82e2b535254 100644
--- a/meson.build
+++ b/meson.build
@@ -438,13 +438,12 @@ elif not (with_gallium_r600 or with_gallium_nouveau)
   else
 _xvmc = 'false'
   endif
-elif _xvmc == 'auto'
-  _xvmc = 'true'
 endif
-with_gallium_xvmc = _xvmc == 'true'
 dep_xvmc = null_dep
-if with_gallium_xvmc
-  dep_xvmc = dependency('xvmc', version : '>= 1.0.6')
+with_gallium_xvmc = false
+if _xmvc != 'false'
+  dep_xvmc = dependency('xvmc', version : '>= 1.0.6', required : _xvmc == 
'true')
+  with_gallium_xvmc = dep_xvmc.found()
 endif
 
 xvmc_drivers_path = get_option('xvmc-libs-path')
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] glsl/glcpp/tests: reinstate srcdir/abs_builddir blurb

2018-06-21 Thread Dylan Baker
Quoting Emil Velikov (2018-04-24 10:49:21)
> From: Emil Velikov 
> 
> Bring back the "detection" of the said variables, to allow
> standalone execution.
> 
> Fixes: db8cd8e36771 ("glcpp/tests: Convert shell scripts to a python
> script")
> Cc: Dylan Baker 
> Signed-off-by: Emil Velikov 
> ---
>  src/compiler/glsl/glcpp/tests/glcpp-test.sh | 13 +
>  1 file changed, 13 insertions(+)
> 
> diff --git a/src/compiler/glsl/glcpp/tests/glcpp-test.sh 
> b/src/compiler/glsl/glcpp/tests/glcpp-test.sh
> index 7ca8aa26a8..b8397ec890 100755
> --- a/src/compiler/glsl/glcpp/tests/glcpp-test.sh
> +++ b/src/compiler/glsl/glcpp/tests/glcpp-test.sh
> @@ -1,3 +1,16 @@
>  #!/bin/sh
>  
> +if [ -z "$srcdir" -o -z "$abs_builddir" ]; then
> +echo ""
> +echo "Warning: you're invoking the script manually and things may fail."
> +echo "Attempting to determine/set srcdir and abs_builddir variables."
> +echo ""
> +
> +# Should point to `dirname Makefile.glsl.am`
> +srcdir=./../../../
> +cd `dirname "$0"`
> +# Should point to `dirname Makefile` equivalent to the above.
> +abs_builddir=`pwd`/../../../
> +fi
> +
>  $PYTHON2 $srcdir/glsl/glcpp/tests/glcpp_test.py 
> $abs_builddir/glsl/glcpp/glcpp $srcdir/glsl/glcpp/tests --unix --windows 
> --oldmac --bizarro
> -- 
> 2.16.0
> 

Hi Emil,

This doesn't apply cleanly to 18.1 because it needs the previous patch. Would
you like me to pull that as well, or drop this?

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 1/2] wayland/egl: initialize window surface size to window size

2018-06-21 Thread Juan A. Suarez Romero
Gently ping to get a review O:)


J.A.

On Wed, 2018-06-06 at 12:40 +0200, Juan A. Suarez Romero wrote:
> When creating a windows surface with eglCreateWindowSurface(), the
> width and height returned by eglQuerySurface(EGL_{WIDTH,HEIGHT}) is
> invalid until buffers are updated (like calling glClear()).
> 
> But according to EGL 1.5 spec, section 3.5.6 ("Surface Attributes"):
> 
>   "Querying EGL_WIDTH and EGL_HEIGHT returns respectively the width and
>height, in pixels, of the surface. For a window or pixmap surface,
>these values are initially equal to the width and height of the
>native window or pixmap with respect to which the surface was
>created"
> 
> This fixes dEQP-EGL.functional.color_clears.* CTS tests
> 
> v2:
> - Do not modify attached_{width,height} (Daniel)
> - Do not update size on resizing window (Brendan)
> 
> CC: Daniel Stone 
> CC: Brendan King 
> ---
>  src/egl/drivers/dri2/platform_wayland.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/src/egl/drivers/dri2/platform_wayland.c 
> b/src/egl/drivers/dri2/platform_wayland.c
> index 63da21cdf55..f62cbbc5c02 100644
> --- a/src/egl/drivers/dri2/platform_wayland.c
> +++ b/src/egl/drivers/dri2/platform_wayland.c
> @@ -255,6 +255,9 @@ dri2_wl_create_window_surface(_EGLDriver *drv, 
> _EGLDisplay *disp,
>goto cleanup_surf;
> }
>  
> +   dri2_surf->base.Width = window->width;
> +   dri2_surf->base.Height = window->height;
> +
> visual_idx = dri2_wl_visual_idx_from_config(dri2_dpy, config);
> assert(visual_idx != -1);
>  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] i965/gen6/gs: Handle case where a GS doesn't allocate VUE

2018-06-21 Thread andrii.simiklit

Hello,

Thanks for your feedback.
I made changes which you requested.
I hope that I understood you right.
The solution which you suggested "VUE allocation unconditionally" fixes this bug
and we have avoided the endif in the program end.


Ok, then we are in agreement.

 Section 1.6.5.4 VUE Allocation:
  " The following description is applicable only to the GS stage.
The threads are not passed an initial handle.
In stead, they request a first handle (if any) via the URB
shared function’s FF_SYNC message (see Shared Functions).
If additional handles are required,
the URB_WRITE allocate mechanism (mentioned above) is used."If GS doesn't allocate/request VUEs then GS shouldn't use the 
Dereference (COMPLETE + UNUSED) message. So when GS produces no 
output GS doesn't allocate VUEs at all and GS shouldn't use 
Dereference message. 
Agreed as well. But do notice that none of this is pre-ILK as far as 
the documentation goes, it is the same across all supported platforms 
up to SNB.


But do notice that according to

   Section "1.6.5.2 VUE Allocation (GS, CLIP) [Pre-DevIL] " (vol2, part1)
" The following description is applicable only to the GS, CLIP stages.
  The GS and CLIP threads are passed a single, initial destination VUE 
handle.
  These threads may be required to output more than one destination VUE, and
  therefore they are provided with a mechanism to
  request additional handles.
  .."

So if we want to support Pre-ILK we have to implement the different solutions 
for ILK, SNB (1) and for Pre-ILK (2):
   1. The "make the FF_SYNC happen unconditionally" solution will work correctly
for ILK, SNB according to "1.6.5.3 VUE Allocation (GS, CLIP) [DevIL]"
and "1.6.5.4 VUE Allocation (GS) [DevSNB+]" (vol2, part1).
   2. The current GS implementation will work correctly
for Pre-ILK according to "1.6.5.2 VUE Allocation (GS, CLIP) [Pre-DevIL]"

Must I implement something additional for Pre-ILK?

Regards,
Andrii.

On 21.06.18 16:40, Andrii Simiklit wrote:


We can not use the VUE Dereference flags combination for EOT
message under ILK and SNB because the threads are not initialized
there with initial VUE handle unlike Pre-IL.
So to avoid GPU hangs on SNB and ILK we need
to avoid usage of the VUE Dereference flags combination.
(Was tested only on SNB but according to the specification
SNB Volume 2 Part 1: 1.6.5.3, 1.6.5.6
the ILK must behave itself in the similar way)

v2: Approach to fix this issue was changed.
Instead of different EOT flags in the program end
we will create VUE every time even if GS produces no output.

Signed-off-by: Andrii Simiklit 
---
  src/intel/compiler/gen6_gs_visitor.cpp | 88 +-
  1 file changed, 23 insertions(+), 65 deletions(-)

diff --git a/src/intel/compiler/gen6_gs_visitor.cpp 
b/src/intel/compiler/gen6_gs_visitor.cpp
index ac3ba55..b831d33 100644
--- a/src/intel/compiler/gen6_gs_visitor.cpp
+++ b/src/intel/compiler/gen6_gs_visitor.cpp
@@ -300,11 +300,10 @@ gen6_gs_visitor::emit_urb_write_opcode(bool complete, int 
base_mrf,
/* Otherwise we always request to allocate a new VUE handle. If this is
 * the last write before the EOT message and the new handle never gets
 * used it will be dereferenced when we send the EOT message. This is
-   * necessary to avoid different setups (under Pre-IL only) for the EOT 
message (one for the
+   * necessary to avoid different setups for the EOT message (one for the
 * case when there is no output and another for the case when there is)
 * which would require to end the program with an IF/ELSE/ENDIF block,
-   * something we do not want.
-   * But for ILK and SNB we can not avoid the end the program with an 
IF/ELSE/ENDIF block.
+   * something we do not want.
 */
inst = emit(GS_OPCODE_URB_WRITE_ALLOCATE);
inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
@@ -351,27 +350,27 @@ gen6_gs_visitor::emit_thread_end()
 int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
  
 /* Issue the FF_SYNC message and obtain the initial VUE handle. */

-   emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), 
BRW_CONDITIONAL_G));
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-  this->current_annotation = "gen6 thread end: ff_sync";
+   this->current_annotation = "gen6 thread end: ff_sync";
  
-  vec4_instruction *inst;

-  if (prog->info.has_transform_feedback_varyings) {
+   vec4_instruction *inst = NULL;
+   if (prog->info.has_transform_feedback_varyings) {
   src_reg sol_temp(this, glsl_type::uvec4_type);
   emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
-  dst_reg(this->svbi),
-  this->vertex_count,
-  this->prim_count,
-  sol_temp);
+   dst_reg(this->svbi),
+   this->vertex_count,
+   this->prim_count,
+   sol_temp);
   inst = 

[Mesa-dev] [Bug 106283] Shader replacements works only for limited use cases

2018-06-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106283

--- Comment #11 from i...@yahoo.com ---
(In reply to iive from comment #10)
> Just a reminder about the shader replacement feature request.
> 
> I hope that you haven't forgotten about it.

When I got my first reply I was so hopeful that this
simple request could be fulfilled in a few days.

Yet, here we are, one month after my last reminder
and I don't even get replies anymore.

Please, don't postpone it to infinity.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa 2/4] vulkan: add VK_EXT_display_control [v8]

2018-06-21 Thread Keith Packard
Jason Ekstrand  writes:

>> +  if (!ret)
>> + return VK_SUCCESS;
>> +
>> +  if (errno != ENOMEM) {
>
> This strikes me as a bit odd. What does ENOMEM mean if not "out of
> memory"?

ENOMEM means that the queue is full and that we should drain it and try
again; that's what the wait_for_event call is below.

The other-than-ENOMEM case is for some other failure, such as VT switch
or lease revoke. For RegisterDisplayEvent, there aren't any return
values other than VK_SUCCESS defined, and we're already assuming we can
use VK_OUT_OF_HOST_MEMORY for any function which allocates memory.

I think the correct value might be VK_ERROR_DEVICE_LOST or
VK_ERROR_OUT_OF_DATE_KHR as something "bad" has clearly happened? The
other place this is called is from QueuePresent, where either of those
error codes are allowed. I could convert that message to
VK_OUT_OF_HOST_MEMORY for RegisterDisplayEvent if you think that's a
good idea.

The sleep prevents an application from spinning at this failure,
allowing the user to gracefully terminate the application.

>
>> + wsi_display_debug("queue vblank event %lu failed\n", 
>> fence->sequence);
>> + struct timespec delay = {
>> +.tv_sec = 0,
>> +.tv_nsec = 1ull,
>> + };
>> + nanosleep(, NULL);
>> + return VK_ERROR_OUT_OF_HOST_MEMORY;
>
> Given your previous explanation, I think this is ok but I think it deserves 
> a comment.

Wilco.

I've added comments to this section to try and explain what's going on:

  if (!ret)
 return VK_SUCCESS;

  if (errno != ENOMEM) {

 /* Something unexpected happened. Pause for a moment so the
  * application doesn't just spin and then return a failure indication
  */

 wsi_display_debug("queue vblank event %lu failed\n", fence->sequence);
 struct timespec delay = {
.tv_sec = 0,
.tv_nsec = 1ull,
 };
 nanosleep(, NULL);
 return VK_ERROR_OUT_OF_HOST_MEMORY;
  }

  /* The kernel event queue is full. Wait for some events to be
   * processed and try again
   */

  pthread_mutex_lock(>wait_mutex);
  ret = wsi_display_wait_for_event(wsi, wsi_rel_to_abs_time(1ull));
  pthread_mutex_unlock(>wait_mutex);

  if (ret) {
 wsi_display_debug("vblank queue full, event wait failed\n");
 return VK_ERROR_OUT_OF_HOST_MEMORY;
  }

-- 
-keith


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 3/3] egl/android: Add DRM node probing and filtering

2018-06-21 Thread Tomasz Figa
Hi Rob,

On Wed, Jun 20, 2018 at 10:26 PM Robert Foss  wrote:
>
> This patch both adds support for probing & filtering DRM nodes
> and switches away from using the GRALLOC_MODULE_PERFORM_GET_DRM_FD
> gralloc call.
>
> Currently the filtering is based just on the driver name,
> and the desired name is supplied using the "drm.gpu.vendor_name"
> Android property.
>
> Signed-off-by: Robert Foss 
> ---
>
> Changes since v3:
>  - Reduced number of probing return codes
>  - Simplified driver vendor check in droid_probe_device()
>  - Fixed type with ';' prepended to a if-statement
>  - Removed a strlen call
>  - Switched a sprintf to snprintf
>  - Replaced fd == -1 check with < 0
>  - Simplified switch+goto statements

Only 2 nits. Please feel free to add my R-b after fixing them (or
maybe a willing committer could fix them up when applying ;)). Thanks!

[snip]
> +static probe_ret_t
> +droid_probe_device(_EGLDisplay *disp, int fd, char *vendor)

const char *vendor

[snip]
> +static int
> +droid_open_device(_EGLDisplay *disp)
> +{
[snip]
> +  int ret = droid_probe_device(disp, fd, vendor_name);
> +  switch (ret) {
> +  case probe_success:
> + goto success;
> +  case probe_filtered_out:
> + /* Set as fallback */
> + if (fallback_fd == -1)
> +fallback_fd = fd;
> + break;
> +  case probe_fail:
> + break;
> +  }
> +
> +  if (fallback_fd != fd)
> + close(fd);
> +  fd = -1;
> +  continue;

This continue doesn't do anything.

Best regards,
Tomasz
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] mesa: handle a bunch of formats in IMPLEMENTATION_COLOR_READ_*

2018-06-21 Thread Tomeu Vizoso
On 31 May 2018 at 00:25, Eric Anholt  wrote:
> Tomeu Vizoso  writes:
>
>> Virgl could save a lot of work converting buffers in the host side
>> between formats if Mesa supported a bunch of other formats when reading
>> pixels.
>>
>> This commit adds cases to handle specific formats so that the values
>> reported by the two calls match more closely the underlying native
>> formats.
>>
>> In GLES is important that IMPLEMENTATION_COLOR_READ_* return the native
>> format and data type because the spec only allows reading with those,
>> besides GL_RGBA or GL_RGBA_INTEGER.
>>
>> Additionally, because virgl currently doesn't implement such
>> conversions, this commit fixes several tests in
>> dEQP-GLES3.functional.fbo.color.clear.*, when using virgl in the guest
>> side.
>>
>> The logic is based on knowledge that is shared with
>> _mesa_format_matches_format_and_type() but we cannot assert that the
>> results match as we don't have all the starting information at both
>> points. So leave the assert out and hope CI comes soon to save us all.
>>
>> v2: * Let R10G10B10A2_UINT fall back to GL_RGBA_INTEGER (Eric Anholt)
>> * Assert with _mesa_format_matches_format_and_type (Eric Anholt)
>>
>> v3: * Remove the assert, as it won't be reliable (Eric Anholt)
>>
>> Signed-off-by: Tomeu Vizoso 
>> ---
>>  src/mesa/main/framebuffer.c | 68 +++--
>>  1 file changed, 43 insertions(+), 25 deletions(-)
>>
>> diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
>> index 8e751b453b75..b1a544b5646d 100644
>> --- a/src/mesa/main/framebuffer.c
>> +++ b/src/mesa/main/framebuffer.c
>> @@ -834,18 +834,52 @@ _mesa_get_color_read_format(struct gl_context *ctx,
>> }
>> else {
>>const mesa_format format = fb->_ColorReadBuffer->Format;
>> -  const GLenum data_type = _mesa_get_format_datatype(format);
>> +  GLenum data_type;
>> +  GLuint comps;
>>
>> -  if (format == MESA_FORMAT_B8G8R8A8_UNORM)
>> +  _mesa_uncompressed_format_to_type_and_comps(format, _type, 
>> );
>> +
>> +  switch (format) {
>> +  case MESA_FORMAT_RGBA_UINT8:
>> + return GL_RGBA_INTEGER;
>> +  case MESA_FORMAT_B8G8R8A8_UNORM:
>>   return GL_BGRA;
>> -  else if (format == MESA_FORMAT_B5G6R5_UNORM)
>> +  case MESA_FORMAT_B5G6R5_UNORM:
>> +  case MESA_FORMAT_R11G11B10_FLOAT:
>>   return GL_RGB;
>> -  else if (format == MESA_FORMAT_R_UNORM8)
>> +  case MESA_FORMAT_RG_FLOAT32:
>> +  case MESA_FORMAT_RG_FLOAT16:
>> +  case MESA_FORMAT_R8G8_UNORM:
>> + return GL_RG;
>> +  case MESA_FORMAT_RG_SINT32:
>> +  case MESA_FORMAT_RG_UINT32:
>> +  case MESA_FORMAT_RG_SINT16:
>> +  case MESA_FORMAT_RG_UINT16:
>> +  case MESA_FORMAT_RG_SINT8:
>> +  case MESA_FORMAT_RG_UINT8:
>> + return GL_RG_INTEGER;
>> +  case MESA_FORMAT_R_FLOAT32:
>> +  case MESA_FORMAT_R_FLOAT16:
>> +  case MESA_FORMAT_R_UNORM8:
>>   return GL_RED;
>> +  case MESA_FORMAT_R_SINT32:
>> +  case MESA_FORMAT_R_UINT32:
>> +  case MESA_FORMAT_R_SINT16:
>> +  case MESA_FORMAT_R_UINT16:
>> +  case MESA_FORMAT_R_SINT8:
>> +  case MESA_FORMAT_R_UINT8:
>> + return GL_RED_INTEGER;
>> +  default:
>> + break;
>> +  }
>>
>>switch (data_type) {
>>case GL_UNSIGNED_INT:
>> +  case GL_UNSIGNED_INT_2_10_10_10_REV:
>> +  case GL_UNSIGNED_SHORT:
>
> Formats like MESA_FORMAT_R_UNORM16 return GL_UNSIGNED_SHORT, but they
> should be returning GL_RGBA, right?  Similar for SNORM, and similar for BYTE
> cases.  Maybe _mesa_format_is_integer() could help you here?

You are right that it shouldn't be GL_RGBA_INTEGER as per the current
code, but I think it should be GL_RED and not GL_RGBA.

I will add those Mesa formats to the switch case, as
_mesa_format_is_integer() isn't enough to tell.

Thanks,

Tomeu

>
>>case GL_INT:
>> +  case GL_SHORT:
>> +  case GL_BYTE:
>>   return GL_RGBA_INTEGER;
>>default:
>>   return GL_RGBA;
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] i965/gen6/gs: Handle case where a GS doesn't allocate VUE

2018-06-21 Thread Andrii Simiklit
We can not use the VUE Dereference flags combination for EOT
message under ILK and SNB because the threads are not initialized
there with initial VUE handle unlike Pre-IL.
So to avoid GPU hangs on SNB and ILK we need
to avoid usage of the VUE Dereference flags combination.
(Was tested only on SNB but according to the specification
SNB Volume 2 Part 1: 1.6.5.3, 1.6.5.6
the ILK must behave itself in the similar way)

v2: Approach to fix this issue was changed.
Instead of different EOT flags in the program end
we will create VUE every time even if GS produces no output.

Signed-off-by: Andrii Simiklit 
---
 src/intel/compiler/gen6_gs_visitor.cpp | 88 +-
 1 file changed, 23 insertions(+), 65 deletions(-)

diff --git a/src/intel/compiler/gen6_gs_visitor.cpp 
b/src/intel/compiler/gen6_gs_visitor.cpp
index ac3ba55..b831d33 100644
--- a/src/intel/compiler/gen6_gs_visitor.cpp
+++ b/src/intel/compiler/gen6_gs_visitor.cpp
@@ -300,11 +300,10 @@ gen6_gs_visitor::emit_urb_write_opcode(bool complete, int 
base_mrf,
   /* Otherwise we always request to allocate a new VUE handle. If this is
* the last write before the EOT message and the new handle never gets
* used it will be dereferenced when we send the EOT message. This is
-   * necessary to avoid different setups (under Pre-IL only) for the EOT 
message (one for the
+   * necessary to avoid different setups for the EOT message (one for the
* case when there is no output and another for the case when there is)
* which would require to end the program with an IF/ELSE/ENDIF block,
-   * something we do not want. 
-   * But for ILK and SNB we can not avoid the end the program with an 
IF/ELSE/ENDIF block.
+   * something we do not want.
*/
   inst = emit(GS_OPCODE_URB_WRITE_ALLOCATE);
   inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
@@ -351,27 +350,27 @@ gen6_gs_visitor::emit_thread_end()
int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
 
/* Issue the FF_SYNC message and obtain the initial VUE handle. */
-   emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), 
BRW_CONDITIONAL_G));
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-  this->current_annotation = "gen6 thread end: ff_sync";
+   this->current_annotation = "gen6 thread end: ff_sync";
 
-  vec4_instruction *inst;
-  if (prog->info.has_transform_feedback_varyings) {
+   vec4_instruction *inst = NULL;
+   if (prog->info.has_transform_feedback_varyings) {
  src_reg sol_temp(this, glsl_type::uvec4_type);
  emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
-  dst_reg(this->svbi),
-  this->vertex_count,
-  this->prim_count,
-  sol_temp);
+   dst_reg(this->svbi),
+   this->vertex_count,
+   this->prim_count,
+   sol_temp);
  inst = emit(GS_OPCODE_FF_SYNC,
  dst_reg(this->temp), this->prim_count, this->svbi);
-  } else {
+   } else {
  inst = emit(GS_OPCODE_FF_SYNC,
  dst_reg(this->temp), this->prim_count, brw_imm_ud(0u));
-  }
-  inst->base_mrf = base_mrf;
+   }
+   inst->base_mrf = base_mrf;
 
+   emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), 
BRW_CONDITIONAL_G));
+   emit(IF(BRW_PREDICATE_NORMAL));
+   {
   /* Loop over all buffered vertices and emit URB write messages */
   this->current_annotation = "gen6 thread end: urb writes init";
   src_reg vertex(this, glsl_type::uint_type);
@@ -415,7 +414,7 @@ gen6_gs_visitor::emit_thread_end()
dst_reg reg = dst_reg(MRF, mrf);
reg.type = output_reg[varying][0].type;
data.type = reg.type;
-   vec4_instruction *inst = emit(MOV(reg, data));
+   inst = emit(MOV(reg, data));
inst->force_writemask_all = true;
 
mrf++;
@@ -450,11 +449,8 @@ gen6_gs_visitor::emit_thread_end()
   if (prog->info.has_transform_feedback_varyings)
  xfb_write();
}
-   const bool common_eot_approach_can_be_used = (devinfo->gen < 5);
-   if(common_eot_approach_can_be_used)
-   {
-  emit(BRW_OPCODE_ENDIF);  
-   }
+   emit(BRW_OPCODE_ENDIF);
+
/* Finally, emit EOT message.
 *
 * In gen6 we need to end the thread differently depending on whether we 
have
@@ -464,35 +460,11 @@ gen6_gs_visitor::emit_thread_end()
 *
 * However, this would lead us to end the program with an ENDIF opcode,
 * which we want to avoid, so what we do is that we always request a new
-* VUE handle every time we do a URB WRITE, even for the last vertex we 
emit.
+* VUE handle every time, even if GS produces no output.
 * With this we make sure that whether we have emitted at least one vertex
 * or none at all, we have to finish the thread without writing to the URB,
-* which works for both cases (but only under Pre-IL) by setting 
-* the COMPLETE and 

Re: [Mesa-dev] [PATCH 2/2] i965/gen6/gs: Handle case where a GS doesn't allocate VUE

2018-06-21 Thread andrey simiklit
HI all,

Sorry, just ignore this patch with incorrect subject.
I will send correct one shortly.

Regards,
Andrii.

On Thu, Jun 21, 2018 at 4:12 PM, Andrii Simiklit 
wrote:

> We can not use the VUE Dereference flags combination for EOT
> message under ILK and SNB because the threads are not initialized
> there with initial VUE handle unlike Pre-IL.
> So to avoid GPU hangs on SNB and ILK we need
> to avoid usage of the VUE Dereference flags combination.
> (Was tested only on SNB but according to the specification
> SNB Volume 2 Part 1: 1.6.5.3, 1.6.5.6
> the ILK must behave itself in the similar way)
>
> v2: Approach to fix this issue was changed.
> Instead of different EOT flags in the program end
> we will create VUE every time even if GS produces no output.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105399
>
> Signed-off-by: Andrii Simiklit 
> ---
>  src/intel/compiler/gen6_gs_visitor.cpp | 88
> +-
>  1 file changed, 23 insertions(+), 65 deletions(-)
>
> diff --git a/src/intel/compiler/gen6_gs_visitor.cpp
> b/src/intel/compiler/gen6_gs_visitor.cpp
> index ac3ba55..b831d33 100644
> --- a/src/intel/compiler/gen6_gs_visitor.cpp
> +++ b/src/intel/compiler/gen6_gs_visitor.cpp
> @@ -300,11 +300,10 @@ gen6_gs_visitor::emit_urb_write_opcode(bool
> complete, int base_mrf,
>/* Otherwise we always request to allocate a new VUE handle. If
> this is
> * the last write before the EOT message and the new handle never
> gets
> * used it will be dereferenced when we send the EOT message. This
> is
> -   * necessary to avoid different setups (under Pre-IL only) for the
> EOT message (one for the
> +   * necessary to avoid different setups for the EOT message (one for
> the
> * case when there is no output and another for the case when there
> is)
> * which would require to end the program with an IF/ELSE/ENDIF
> block,
> -   * something we do not want.
> -   * But for ILK and SNB we can not avoid the end the program with an
> IF/ELSE/ENDIF block.
> +   * something we do not want.
> */
>inst = emit(GS_OPCODE_URB_WRITE_ALLOCATE);
>inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
> @@ -351,27 +350,27 @@ gen6_gs_visitor::emit_thread_end()
> int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
>
> /* Issue the FF_SYNC message and obtain the initial VUE handle. */
> -   emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u),
> BRW_CONDITIONAL_G));
> -   emit(IF(BRW_PREDICATE_NORMAL));
> -   {
> -  this->current_annotation = "gen6 thread end: ff_sync";
> +   this->current_annotation = "gen6 thread end: ff_sync";
>
> -  vec4_instruction *inst;
> -  if (prog->info.has_transform_feedback_varyings) {
> +   vec4_instruction *inst = NULL;
> +   if (prog->info.has_transform_feedback_varyings) {
>   src_reg sol_temp(this, glsl_type::uvec4_type);
>   emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
> -  dst_reg(this->svbi),
> -  this->vertex_count,
> -  this->prim_count,
> -  sol_temp);
> +   dst_reg(this->svbi),
> +   this->vertex_count,
> +   this->prim_count,
> +   sol_temp);
>   inst = emit(GS_OPCODE_FF_SYNC,
>   dst_reg(this->temp), this->prim_count, this->svbi);
> -  } else {
> +   } else {
>   inst = emit(GS_OPCODE_FF_SYNC,
>   dst_reg(this->temp), this->prim_count,
> brw_imm_ud(0u));
> -  }
> -  inst->base_mrf = base_mrf;
> +   }
> +   inst->base_mrf = base_mrf;
>
> +   emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u),
> BRW_CONDITIONAL_G));
> +   emit(IF(BRW_PREDICATE_NORMAL));
> +   {
>/* Loop over all buffered vertices and emit URB write messages */
>this->current_annotation = "gen6 thread end: urb writes init";
>src_reg vertex(this, glsl_type::uint_type);
> @@ -415,7 +414,7 @@ gen6_gs_visitor::emit_thread_end()
> dst_reg reg = dst_reg(MRF, mrf);
> reg.type = output_reg[varying][0].type;
> data.type = reg.type;
> -   vec4_instruction *inst = emit(MOV(reg, data));
> +   inst = emit(MOV(reg, data));
> inst->force_writemask_all = true;
>
> mrf++;
> @@ -450,11 +449,8 @@ gen6_gs_visitor::emit_thread_end()
>if (prog->info.has_transform_feedback_varyings)
>   xfb_write();
> }
> -   const bool common_eot_approach_can_be_used = (devinfo->gen < 5);
> -   if(common_eot_approach_can_be_used)
> -   {
> -  emit(BRW_OPCODE_ENDIF);
> -   }
> +   emit(BRW_OPCODE_ENDIF);
> +
> /* Finally, emit EOT message.
>  *
>  * In gen6 we need to end the thread differently depending on whether
> we have
> @@ -464,35 +460,11 @@ gen6_gs_visitor::emit_thread_end()
>  *
>  * However, this would lead us to end the program with an ENDIF opcode,
>  * which we 

[Mesa-dev] [PATCH 2/2] i965/gen6/gs: Handle case where a GS doesn't allocate VUE

2018-06-21 Thread Andrii Simiklit
We can not use the VUE Dereference flags combination for EOT
message under ILK and SNB because the threads are not initialized
there with initial VUE handle unlike Pre-IL.
So to avoid GPU hangs on SNB and ILK we need
to avoid usage of the VUE Dereference flags combination.
(Was tested only on SNB but according to the specification
SNB Volume 2 Part 1: 1.6.5.3, 1.6.5.6
the ILK must behave itself in the similar way)

v2: Approach to fix this issue was changed.
Instead of different EOT flags in the program end
we will create VUE every time even if GS produces no output.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105399

Signed-off-by: Andrii Simiklit 
---
 src/intel/compiler/gen6_gs_visitor.cpp | 88 +-
 1 file changed, 23 insertions(+), 65 deletions(-)

diff --git a/src/intel/compiler/gen6_gs_visitor.cpp 
b/src/intel/compiler/gen6_gs_visitor.cpp
index ac3ba55..b831d33 100644
--- a/src/intel/compiler/gen6_gs_visitor.cpp
+++ b/src/intel/compiler/gen6_gs_visitor.cpp
@@ -300,11 +300,10 @@ gen6_gs_visitor::emit_urb_write_opcode(bool complete, int 
base_mrf,
   /* Otherwise we always request to allocate a new VUE handle. If this is
* the last write before the EOT message and the new handle never gets
* used it will be dereferenced when we send the EOT message. This is
-   * necessary to avoid different setups (under Pre-IL only) for the EOT 
message (one for the
+   * necessary to avoid different setups for the EOT message (one for the
* case when there is no output and another for the case when there is)
* which would require to end the program with an IF/ELSE/ENDIF block,
-   * something we do not want. 
-   * But for ILK and SNB we can not avoid the end the program with an 
IF/ELSE/ENDIF block.
+   * something we do not want.
*/
   inst = emit(GS_OPCODE_URB_WRITE_ALLOCATE);
   inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
@@ -351,27 +350,27 @@ gen6_gs_visitor::emit_thread_end()
int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
 
/* Issue the FF_SYNC message and obtain the initial VUE handle. */
-   emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), 
BRW_CONDITIONAL_G));
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-  this->current_annotation = "gen6 thread end: ff_sync";
+   this->current_annotation = "gen6 thread end: ff_sync";
 
-  vec4_instruction *inst;
-  if (prog->info.has_transform_feedback_varyings) {
+   vec4_instruction *inst = NULL;
+   if (prog->info.has_transform_feedback_varyings) {
  src_reg sol_temp(this, glsl_type::uvec4_type);
  emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
-  dst_reg(this->svbi),
-  this->vertex_count,
-  this->prim_count,
-  sol_temp);
+   dst_reg(this->svbi),
+   this->vertex_count,
+   this->prim_count,
+   sol_temp);
  inst = emit(GS_OPCODE_FF_SYNC,
  dst_reg(this->temp), this->prim_count, this->svbi);
-  } else {
+   } else {
  inst = emit(GS_OPCODE_FF_SYNC,
  dst_reg(this->temp), this->prim_count, brw_imm_ud(0u));
-  }
-  inst->base_mrf = base_mrf;
+   }
+   inst->base_mrf = base_mrf;
 
+   emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), 
BRW_CONDITIONAL_G));
+   emit(IF(BRW_PREDICATE_NORMAL));
+   {
   /* Loop over all buffered vertices and emit URB write messages */
   this->current_annotation = "gen6 thread end: urb writes init";
   src_reg vertex(this, glsl_type::uint_type);
@@ -415,7 +414,7 @@ gen6_gs_visitor::emit_thread_end()
dst_reg reg = dst_reg(MRF, mrf);
reg.type = output_reg[varying][0].type;
data.type = reg.type;
-   vec4_instruction *inst = emit(MOV(reg, data));
+   inst = emit(MOV(reg, data));
inst->force_writemask_all = true;
 
mrf++;
@@ -450,11 +449,8 @@ gen6_gs_visitor::emit_thread_end()
   if (prog->info.has_transform_feedback_varyings)
  xfb_write();
}
-   const bool common_eot_approach_can_be_used = (devinfo->gen < 5);
-   if(common_eot_approach_can_be_used)
-   {
-  emit(BRW_OPCODE_ENDIF);  
-   }
+   emit(BRW_OPCODE_ENDIF);
+
/* Finally, emit EOT message.
 *
 * In gen6 we need to end the thread differently depending on whether we 
have
@@ -464,35 +460,11 @@ gen6_gs_visitor::emit_thread_end()
 *
 * However, this would lead us to end the program with an ENDIF opcode,
 * which we want to avoid, so what we do is that we always request a new
-* VUE handle every time we do a URB WRITE, even for the last vertex we 
emit.
+* VUE handle every time, even if GS produces no output.
 * With this we make sure that whether we have emitted at least one vertex
 * or none at all, we have to finish the thread without writing to the URB,
-* which works for both 

Re: [Mesa-dev] [PATCH] docs/release-calendar: restore the missing 18.1 column

2018-06-21 Thread Emil Velikov
On 20 June 2018 at 16:19, Dylan Baker  wrote:
> Quoting Emil Velikov (2018-06-20 05:33:48)
>> From: Emil Velikov 
>>
>> Earlier commit removed the column, instead of adjusting it's height.
>>
>> Cc: Dylan Baker 
>> Fixes: 0d4f338a116 ("docs: Update release-notes and calendar")
>> Signed-off-by: Emil Velikov 
>> ---
>>  docs/release-calendar.html | 1 +
>>  1 file changed, 1 insertion(+)
>>
>> diff --git a/docs/release-calendar.html b/docs/release-calendar.html
>> index af574c6b29b..fbaec2dd0c2 100644
>> --- a/docs/release-calendar.html
>> +++ b/docs/release-calendar.html
>> @@ -39,6 +39,7 @@ if you'd like to nominate a patch in the next stable 
>> release.
>>  Notes
>>  
>>  
>> +18.1
>>  2018-06-29
>>  18.1.3
>>  Dylan Baker
>> --
>> 2.17.1
>>
>
> Reviewed-by: Dylan Baker 
>
> I'll be glad for the sphinx stuff to land and not have to hand edit html :/
>
Indeed, sphinx will make it far better. Yet again we all make
mistakes, we're human after all.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] radv: change the returned error in radv_signal_fence()

2018-06-21 Thread Samuel Pitoiset
From my point of view, when we aren't able to submit a CS
something terribly wrong happens and we are most likely
going to lost the device.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_device.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index ffeb6450b3..3c63d8c86d 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2470,9 +2470,8 @@ static VkResult radv_signal_fence(struct radv_queue 
*queue,
   false, fence->fence);
radv_free_sem_info(_info);
 
-   /* TODO: find a better error */
if (ret)
-   return vk_error(queue->device->instance, 
VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   return vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
 
return VK_SUCCESS;
 }
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] radv: check the return values of radv_signal_fence()

2018-06-21 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_device.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 3c63d8c86d..681d8a12c7 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2615,7 +2615,9 @@ VkResult radv_QueueSubmit(
 
if (fence) {
if (!fence_emitted) {
-   radv_signal_fence(queue, fence);
+   result = radv_signal_fence(queue, fence);
+   if (result != VK_SUCCESS)
+   return result;
}
fence->submitted = true;
}
@@ -3172,6 +3174,7 @@ radv_sparse_image_opaque_bind_memory(struct radv_device 
*device,
RADV_FROM_HANDLE(radv_queue, queue, _queue);
struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
bool fence_emitted = false;
+   VkResult result;
 
for (uint32_t i = 0; i < bindInfoCount; ++i) {
struct radv_winsys_sem_info sem_info;
@@ -3213,7 +3216,9 @@ radv_sparse_image_opaque_bind_memory(struct radv_device 
*device,
 
if (fence) {
if (!fence_emitted) {
-   radv_signal_fence(queue, fence);
+   result = radv_signal_fence(queue, fence);
+   if (result != VK_SUCCESS)
+   return result;
}
fence->submitted = true;
}
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] radv: always check the return error when submitting a CS

2018-06-21 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_device.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 681d8a12c7..b72b5d969d 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -3175,6 +3175,7 @@ radv_sparse_image_opaque_bind_memory(struct radv_device 
*device,
struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
bool fence_emitted = false;
VkResult result;
+   int ret;
 
for (uint32_t i = 0; i < bindInfoCount; ++i) {
struct radv_winsys_sem_info sem_info;
@@ -3200,11 +3201,16 @@ radv_sparse_image_opaque_bind_memory(struct radv_device 
*device,
return result;
 
if (pBindInfo[i].waitSemaphoreCount || 
pBindInfo[i].signalSemaphoreCount) {
-   queue->device->ws->cs_submit(queue->hw_ctx, 
queue->queue_idx,
-
>device->empty_cs[queue->queue_family_index],
-1, NULL, NULL,
-_info, NULL,
-false, base_fence);
+   ret = queue->device->ws->cs_submit(queue->hw_ctx, 
queue->queue_idx,
+ 
>device->empty_cs[queue->queue_family_index],
+ 1, NULL, NULL,
+ _info, NULL,
+ false, base_fence);
+   if (ret) {
+   radv_loge("failed to submit CS %d\n", i);
+   abort();
+   }
+
fence_emitted = true;
if (fence)
fence->submitted = true;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/compiler: emit actual barriers for working-group level barriers

2018-06-21 Thread Iago Toral Quiroga
Until now we have assumed that we could skip emitting these barriers
in the general case based on empirical testing and a few assumptions
detailed in a comment in the driver code, however, recent CTS tests
have showed that we actually need them to produce correct behavior.
---
 src/intel/compiler/brw_fs_nir.cpp | 25 ++---
 1 file changed, 2 insertions(+), 23 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 0abb4798e70..d0648c89865 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -3884,6 +3884,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   break;
}
 
+   case nir_intrinsic_group_memory_barrier:
+   case nir_intrinsic_memory_barrier_shared:
case nir_intrinsic_memory_barrier_atomic_counter:
case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_memory_barrier_image:
@@ -3895,29 +3897,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   break;
}
 
-   case nir_intrinsic_group_memory_barrier:
-   case nir_intrinsic_memory_barrier_shared:
-  /* We treat these workgroup-level barriers as no-ops.  This should be
-   * safe at present and as long as:
-   *
-   *  - Memory access instructions are not subsequently reordered by the
-   *compiler back-end.
-   *
-   *  - All threads from a given compute shader workgroup fit within a
-   *single subslice and therefore talk to the same HDC shared unit
-   *what supposedly guarantees ordering and coherency between threads
-   *from the same workgroup.  This may change in the future when we
-   *start splitting workgroups across multiple subslices.
-   *
-   *  - The context is not in fault-and-stream mode, which could cause
-   *memory transactions (including to SLM) prior to the barrier to be
-   *replayed after the barrier if a pagefault occurs.  This shouldn't
-   *be a problem up to and including SKL because fault-and-stream is
-   *not usable due to hardware issues, but that's likely to change in
-   *the future.
-   */
-  break;
-
case nir_intrinsic_shader_clock: {
   /* We cannot do anything if there is an event, so ignore it for now */
   const fs_reg shader_clock = get_timestamp(bld);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: add missing display list support for ARB_compute_shader

2018-06-21 Thread Timothy Arceri
The extension is enabled for compat profile but there is currently
no display list support.
---
 src/mesa/main/dlist.c | 87 +++
 1 file changed, 87 insertions(+)

diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 8b1ddb05038..e5c8f22ac80 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -510,6 +510,10 @@ typedef enum
OPCODE_SAMPLER_PARAMETERIIV,
OPCODE_SAMPLER_PARAMETERUIV,
 
+   /* ARB_compute_shader */
+   OPCODE_DISPATCH_COMPUTE,
+   OPCODE_DISPATCH_COMPUTE_INDIRECT,
+
/* GL_ARB_sync */
OPCODE_WAIT_SYNC,
 
@@ -604,6 +608,7 @@ typedef union gl_dlist_node Node;
 union pointer
 {
void *ptr;
+   GLintptr intptr;
GLuint dwords[POINTER_DWORDS];
 };
 
@@ -643,6 +648,41 @@ get_pointer(const Node *node)
 }
 
 
+/**
+ * Save a 4 or 8-byte pointer at dest (and dest+1).
+ */
+static inline void
+save_intpointer(Node *dest, GLintptr src)
+{
+   union pointer p;
+   unsigned i;
+
+   STATIC_ASSERT(POINTER_DWORDS == 1 || POINTER_DWORDS == 2);
+   STATIC_ASSERT(sizeof(Node) == 4);
+
+   p.intptr = src;
+
+   for (i = 0; i < POINTER_DWORDS; i++)
+  dest[i].ui = p.dwords[i];
+}
+
+
+/**
+ * Retrieve a 4 or 8-byte pointer from node (node+1).
+ */
+static inline GLintptr
+get_intpointer(const Node *node)
+{
+   union pointer p;
+   unsigned i;
+
+   for (i = 0; i < POINTER_DWORDS; i++)
+  p.dwords[i] = node[i].ui;
+
+   return p.intptr;
+}
+
+
 /**
  * Used to store a 64-bit uint in a pair of "Nodes" for the sake of 32-bit
  * environment.
@@ -6570,6 +6610,41 @@ save_DrawTransformFeedbackStreamInstanced(GLenum mode, 
GLuint name,
}
 }
 
+static void GLAPIENTRY
+save_DispatchCompute(GLuint num_groups_x, GLuint num_groups_y,
+ GLuint num_groups_z)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   Node *n;
+   ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx);
+   n = alloc_instruction(ctx, OPCODE_DISPATCH_COMPUTE, 3);
+   if (n) {
+  n[1].ui = num_groups_x;
+  n[2].ui = num_groups_y;
+  n[3].ui = num_groups_z;
+   }
+   if (ctx->ExecuteFlag) {
+  CALL_DispatchCompute(ctx->Exec, (num_groups_x, num_groups_y,
+   num_groups_z));
+   }
+}
+
+static void GLAPIENTRY
+save_DispatchComputeIndirect(GLintptr indirect)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   Node *n;
+   ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx);
+   n = alloc_instruction(ctx, OPCODE_DISPATCH_COMPUTE_INDIRECT,
+ POINTER_DWORDS);
+   if (n) {
+  save_intpointer([1], indirect);
+   }
+   if (ctx->ExecuteFlag) {
+  CALL_DispatchComputeIndirect(ctx->Exec, (indirect));
+   }
+}
+
 static void GLAPIENTRY
 save_UseProgram(GLuint program)
 {
@@ -10429,6 +10504,14 @@ execute_list(struct gl_context *ctx, GLuint list)
 }
 break;
 
+ /* ARB_compute_shader */
+ case OPCODE_DISPATCH_COMPUTE:
+CALL_DispatchCompute(ctx->Exec, (n[1].ui, n[2].ui, n[3].ui));
+break;
+ case OPCODE_DISPATCH_COMPUTE_INDIRECT:
+CALL_DispatchComputeIndirect(ctx->Exec, (get_intpointer([1])));
+break;
+
  /* GL_ARB_sync */
  case OPCODE_WAIT_SYNC:
 {
@@ -11138,6 +11221,10 @@ _mesa_initialize_save_table(const struct gl_context 
*ctx)
SET_DepthRangeArrayv(table, save_DepthRangeArrayv);
SET_DepthRangeIndexed(table, save_DepthRangeIndexed);
 
+   /* 122. ARB_compute_shader */
+   SET_DispatchCompute(table, save_DispatchCompute);
+   SET_DispatchComputeIndirect(table, save_DispatchComputeIndirect);
+
/* 173. GL_EXT_blend_func_separate */
SET_BlendFuncSeparate(table, save_BlendFuncSeparateEXT);
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: bump minimum supported LLVM version to 5.0

2018-06-21 Thread Eric Engestrom
On Thursday, 2018-06-21 11:42:29 +0200, Juan A. Suarez Romero wrote:
> On Tue, 2018-06-19 at 11:53 +0200, Juan A. Suarez Romero wrote:
> > On Mon, 2018-06-18 at 16:29 +0100, Eric Engestrom wrote:
> > > On Monday, 2018-06-18 16:23:41 +0200, Juan A. Suarez Romero wrote:
> > > > RADV now requires LLVM 5.0 or greater, and thus we can't build dist
> > > > tarball because swr requires LLVM 4.0.
> > > > 
> > > > Let's bump required LLVM to 5.0 in swr too.
> > > > 
> > > > Fixes: f9eb1ef870 ("amd: remove support for LLVM 4.0")
> > > > Cc: George Kyriazis 
> > > > Cc: Tim Rowley 
> > > > Cc: Emil Velikov 
> > > > Cc: Dylan Baker 
> > > > Cc: Eric Engestrom 
> > > 
> > > s/imgtec/intel/ :)
> > > (I moved)
> > > 
> > 
> > Fixed :)
> > 
> > > > ---
> > > >  .travis.yml | 12 ++--
> > > >  configure.ac|  7 ---
> > > >  meson.build |  4 +---
> > > >  src/gallium/drivers/swr/Makefile.am |  6 +++---
> > > >  src/gallium/drivers/swr/SConscript  |  4 ++--
> > > >  5 files changed, 16 insertions(+), 17 deletions(-)
> > > > 
> > > > diff --git a/.travis.yml b/.travis.yml
> > > > index b1fc7de9587..c9a30fa0ef5 100644
> > > > --- a/.travis.yml
> > > > +++ b/.travis.yml
> > > > @@ -92,7 +92,7 @@ matrix:
> > > >  - BUILD=make
> > > >  - MAKEFLAGS="-j4"
> > > >  - MAKE_CHECK_COMMAND="true"
> > > > -- LLVM_VERSION=4.0
> > > > +- LLVM_VERSION=5.0
> > > >  - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
> > > >  - OVERRIDE_CC="gcc-4.8"
> > > >  - OVERRIDE_CXX="g++-4.8"
> > > > @@ -105,12 +105,12 @@ matrix:
> > > >addons:
> > > >  apt:
> > > >sources:
> > > > -- llvm-toolchain-trusty-4.0
> > > > +- llvm-toolchain-trusty-5.0
> > > >packages:
> > > >  # LLVM packaging is broken and misses these dependencies
> > > >  - libedit-dev
> > > >  # From sources above
> > > > -- llvm-4.0-dev
> > > > +- llvm-5.0-dev
> > > >  # Common
> > > >  - xz-utils
> > > >  - x11proto-xf86vidmode-dev
> > > > @@ -432,7 +432,7 @@ matrix:
> > > >  - BUILD=scons
> > > >  - SCONSFLAGS="-j4"
> > > >  - SCONS_TARGET="swr=1"
> > > > -- LLVM_VERSION=4.0
> > > > +- LLVM_VERSION=5.0
> > > >  - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
> > > >  # Keep it symmetrical to the make build. There's no actual 
> > > > SWR, yet.
> > > >  - SCONS_CHECK_COMMAND="true"
> > > > @@ -441,13 +441,13 @@ matrix:
> > > >addons:
> > > >  apt:
> > > >sources:
> > > > -- llvm-toolchain-trusty-4.0
> > > > +- llvm-toolchain-trusty-5.0
> > > >packages:
> > > >  - scons
> > > >  # LLVM packaging is broken and misses these dependencies
> > > >  - libedit-dev
> > > >  # From sources above
> > > > -- llvm-4.0-dev
> > > > +- llvm-5.0-dev
> > > >  # Common
> > > >  - xz-utils
> > > >  - x11proto-xf86vidmode-dev
> > > > diff --git a/configure.ac b/configure.ac
> > > > index 7a0e4754208..543b6fe061b 100644
> > > > --- a/configure.ac
> > > > +++ b/configure.ac
> > > > @@ -110,7 +110,7 @@ LLVM_REQUIRED_OPENCL=3.9.0
> > > >  LLVM_REQUIRED_R600=3.9.0
> > > >  LLVM_REQUIRED_RADEONSI=5.0.0
> > > >  LLVM_REQUIRED_RADV=5.0.0
> > > > -LLVM_REQUIRED_SWR=4.0.0
> > > > +LLVM_REQUIRED_SWR=5.0.0
> > > >  
> > > >  dnl Check for progs
> > > >  AC_PROG_CPP
> > > > @@ -2755,8 +2755,9 @@ if test -n "$with_gallium_drivers"; then
> > > >  fi
> > > >  
> > > >  # XXX: Keep in sync with LLVM_REQUIRED_SWR
> > > > -AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != 
> > > > x4.0.0 -a \
> > > > -  "x$LLVM_VERSION" != 
> > > > x4.0.1)
> > > > +AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != 
> > > > x5.0.0 -a \
> > > > +  "x$LLVM_VERSION" != 
> > > > x5.0.1 -a \
> > > > +  "x$LLVM_VERSION" != 
> > > > x5.0.2)
> > > 
> > > That check seems designed to break every time something in mesa changes
> > > supported llvm version. Is there a reason for it not to be a simple
> > > `>= 4.0` check?
> > > 
> > 
> > Because gen_builder.hpp is a generated file and it contains information 
> > that is
> > specific to the LLVM version it originates from. Apparently, this file is
> > forward compatible, but not backward, and it is included in dist tarball.
> > 
> > I guess the problem is that if you end up building the distball with, let's 
> > say
> > LLVM 6.0, then the tarball couldn't be built with LLVM 5.0, due this file
> > compatibility. Hence why it "forces" to use the minimum common LLVM version 
> > for
> > all the drivers.
> > 
> > This was 

Re: [Mesa-dev] [PATCH] swr: bump minimum supported LLVM version to 5.0

2018-06-21 Thread Juan A. Suarez Romero
On Wed, 2018-06-20 at 20:11 +, Cherniak, Bruce wrote:
> > On Jun 18, 2018, at 9:23 AM, Juan A. Suarez Romero  
> > wrote:
> > 
> > RADV now requires LLVM 5.0 or greater, and thus we can't build dist
> > tarball because swr requires LLVM 4.0.
> > 
> > Let's bump required LLVM to 5.0 in swr too.
> 
> Sorry, I didn't see this sooner.  We are fine with bumping the swr minimum 
> requirement
> to LLVM 5.0.  It generates better code anyway.
> 


Thanks! I'll add an "Acked-by: Bruce Cherniak 


J.A.

> > Fixes: f9eb1ef870 ("amd: remove support for LLVM 4.0")
> > Cc: George Kyriazis 
> > Cc: Tim Rowley 
> > Cc: Emil Velikov 
> > Cc: Dylan Baker 
> > Cc: Eric Engestrom 
> > ---
> > .travis.yml | 12 ++--
> > configure.ac|  7 ---
> > meson.build |  4 +---
> > src/gallium/drivers/swr/Makefile.am |  6 +++---
> > src/gallium/drivers/swr/SConscript  |  4 ++--
> > 5 files changed, 16 insertions(+), 17 deletions(-)
> > 
> > diff --git a/.travis.yml b/.travis.yml
> > index b1fc7de9587..c9a30fa0ef5 100644
> > --- a/.travis.yml
> > +++ b/.travis.yml
> > @@ -92,7 +92,7 @@ matrix:
> > - BUILD=make
> > - MAKEFLAGS="-j4"
> > - MAKE_CHECK_COMMAND="true"
> > -- LLVM_VERSION=4.0
> > +- LLVM_VERSION=5.0
> > - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
> > - OVERRIDE_CC="gcc-4.8"
> > - OVERRIDE_CXX="g++-4.8"
> > @@ -105,12 +105,12 @@ matrix:
> >   addons:
> > apt:
> >   sources:
> > -- llvm-toolchain-trusty-4.0
> > +- llvm-toolchain-trusty-5.0
> >   packages:
> > # LLVM packaging is broken and misses these dependencies
> > - libedit-dev
> > # From sources above
> > -- llvm-4.0-dev
> > +- llvm-5.0-dev
> > # Common
> > - xz-utils
> > - x11proto-xf86vidmode-dev
> > @@ -432,7 +432,7 @@ matrix:
> > - BUILD=scons
> > - SCONSFLAGS="-j4"
> > - SCONS_TARGET="swr=1"
> > -- LLVM_VERSION=4.0
> > +- LLVM_VERSION=5.0
> > - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
> > # Keep it symmetrical to the make build. There's no actual SWR, yet.
> > - SCONS_CHECK_COMMAND="true"
> > @@ -441,13 +441,13 @@ matrix:
> >   addons:
> > apt:
> >   sources:
> > -- llvm-toolchain-trusty-4.0
> > +- llvm-toolchain-trusty-5.0
> >   packages:
> > - scons
> > # LLVM packaging is broken and misses these dependencies
> > - libedit-dev
> > # From sources above
> > -- llvm-4.0-dev
> > +- llvm-5.0-dev
> > # Common
> > - xz-utils
> > - x11proto-xf86vidmode-dev
> > diff --git a/configure.ac b/configure.ac
> > index 7a0e4754208..543b6fe061b 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -110,7 +110,7 @@ LLVM_REQUIRED_OPENCL=3.9.0
> > LLVM_REQUIRED_R600=3.9.0
> > LLVM_REQUIRED_RADEONSI=5.0.0
> > LLVM_REQUIRED_RADV=5.0.0
> > -LLVM_REQUIRED_SWR=4.0.0
> > +LLVM_REQUIRED_SWR=5.0.0
> > 
> > dnl Check for progs
> > AC_PROG_CPP
> > @@ -2755,8 +2755,9 @@ if test -n "$with_gallium_drivers"; then
> > fi
> > 
> > # XXX: Keep in sync with LLVM_REQUIRED_SWR
> > -AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x4.0.0 
> > -a \
> > -  "x$LLVM_VERSION" != x4.0.1)
> > +AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x5.0.0 
> > -a \
> > +  "x$LLVM_VERSION" != x5.0.1 
> > -a \
> > +  "x$LLVM_VERSION" != x5.0.2)
> > 
> > if test "x$enable_llvm" = "xyes" -a "$with_gallium_drivers"; then
> > llvm_require_version $LLVM_REQUIRED_GALLIUM "gallium"
> > diff --git a/meson.build b/meson.build
> > index 65ae32172d2..a5662160d66 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -1130,10 +1130,8 @@ if with_gallium_opencl
> >   llvm_optional_modules += ['coroutines', 'opencl']
> > endif
> > 
> > -if with_amd_vk or with_gallium_radeonsi
> > +if with_amd_vk or with_gallium_radeonsi or with_gallium_swr
> >   _llvm_version = '>= 5.0.0'
> > -elif with_gallium_swr
> > -  _llvm_version = '>= 4.0.0'
> > elif with_gallium_opencl or with_gallium_r600
> >   _llvm_version = '>= 3.9.0'
> > else
> > diff --git a/src/gallium/drivers/swr/Makefile.am 
> > b/src/gallium/drivers/swr/Makefile.am
> > index 8b3150288e6..5cc3f77478a 100644
> > --- a/src/gallium/drivers/swr/Makefile.am
> > +++ b/src/gallium/drivers/swr/Makefile.am
> > @@ -374,9 +374,9 @@ include $(top_srcdir)/install-gallium-links.mk
> > # created with the oldest supported version of LLVM.
> > dist-hook:
> > if SWR_INVALID_LLVM_VERSION
> > -   @echo "***"
> > -   @echo "LLVM 4.0.0 or LLVM 4.0.1 required to 

Re: [Mesa-dev] [PATCH] swr: bump minimum supported LLVM version to 5.0

2018-06-21 Thread Juan A. Suarez Romero
On Tue, 2018-06-19 at 11:53 +0200, Juan A. Suarez Romero wrote:
> On Mon, 2018-06-18 at 16:29 +0100, Eric Engestrom wrote:
> > On Monday, 2018-06-18 16:23:41 +0200, Juan A. Suarez Romero wrote:
> > > RADV now requires LLVM 5.0 or greater, and thus we can't build dist
> > > tarball because swr requires LLVM 4.0.
> > > 
> > > Let's bump required LLVM to 5.0 in swr too.
> > > 
> > > Fixes: f9eb1ef870 ("amd: remove support for LLVM 4.0")
> > > Cc: George Kyriazis 
> > > Cc: Tim Rowley 
> > > Cc: Emil Velikov 
> > > Cc: Dylan Baker 
> > > Cc: Eric Engestrom 
> > 
> > s/imgtec/intel/ :)
> > (I moved)
> > 
> 
> Fixed :)
> 
> > > ---
> > >  .travis.yml | 12 ++--
> > >  configure.ac|  7 ---
> > >  meson.build |  4 +---
> > >  src/gallium/drivers/swr/Makefile.am |  6 +++---
> > >  src/gallium/drivers/swr/SConscript  |  4 ++--
> > >  5 files changed, 16 insertions(+), 17 deletions(-)
> > > 
> > > diff --git a/.travis.yml b/.travis.yml
> > > index b1fc7de9587..c9a30fa0ef5 100644
> > > --- a/.travis.yml
> > > +++ b/.travis.yml
> > > @@ -92,7 +92,7 @@ matrix:
> > >  - BUILD=make
> > >  - MAKEFLAGS="-j4"
> > >  - MAKE_CHECK_COMMAND="true"
> > > -- LLVM_VERSION=4.0
> > > +- LLVM_VERSION=5.0
> > >  - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
> > >  - OVERRIDE_CC="gcc-4.8"
> > >  - OVERRIDE_CXX="g++-4.8"
> > > @@ -105,12 +105,12 @@ matrix:
> > >addons:
> > >  apt:
> > >sources:
> > > -- llvm-toolchain-trusty-4.0
> > > +- llvm-toolchain-trusty-5.0
> > >packages:
> > >  # LLVM packaging is broken and misses these dependencies
> > >  - libedit-dev
> > >  # From sources above
> > > -- llvm-4.0-dev
> > > +- llvm-5.0-dev
> > >  # Common
> > >  - xz-utils
> > >  - x11proto-xf86vidmode-dev
> > > @@ -432,7 +432,7 @@ matrix:
> > >  - BUILD=scons
> > >  - SCONSFLAGS="-j4"
> > >  - SCONS_TARGET="swr=1"
> > > -- LLVM_VERSION=4.0
> > > +- LLVM_VERSION=5.0
> > >  - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
> > >  # Keep it symmetrical to the make build. There's no actual SWR, 
> > > yet.
> > >  - SCONS_CHECK_COMMAND="true"
> > > @@ -441,13 +441,13 @@ matrix:
> > >addons:
> > >  apt:
> > >sources:
> > > -- llvm-toolchain-trusty-4.0
> > > +- llvm-toolchain-trusty-5.0
> > >packages:
> > >  - scons
> > >  # LLVM packaging is broken and misses these dependencies
> > >  - libedit-dev
> > >  # From sources above
> > > -- llvm-4.0-dev
> > > +- llvm-5.0-dev
> > >  # Common
> > >  - xz-utils
> > >  - x11proto-xf86vidmode-dev
> > > diff --git a/configure.ac b/configure.ac
> > > index 7a0e4754208..543b6fe061b 100644
> > > --- a/configure.ac
> > > +++ b/configure.ac
> > > @@ -110,7 +110,7 @@ LLVM_REQUIRED_OPENCL=3.9.0
> > >  LLVM_REQUIRED_R600=3.9.0
> > >  LLVM_REQUIRED_RADEONSI=5.0.0
> > >  LLVM_REQUIRED_RADV=5.0.0
> > > -LLVM_REQUIRED_SWR=4.0.0
> > > +LLVM_REQUIRED_SWR=5.0.0
> > >  
> > >  dnl Check for progs
> > >  AC_PROG_CPP
> > > @@ -2755,8 +2755,9 @@ if test -n "$with_gallium_drivers"; then
> > >  fi
> > >  
> > >  # XXX: Keep in sync with LLVM_REQUIRED_SWR
> > > -AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x4.0.0 
> > > -a \
> > > -  "x$LLVM_VERSION" != x4.0.1)
> > > +AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x5.0.0 
> > > -a \
> > > +  "x$LLVM_VERSION" != x5.0.1 
> > > -a \
> > > +  "x$LLVM_VERSION" != x5.0.2)
> > 
> > That check seems designed to break every time something in mesa changes
> > supported llvm version. Is there a reason for it not to be a simple
> > `>= 4.0` check?
> > 
> 
> Because gen_builder.hpp is a generated file and it contains information that 
> is
> specific to the LLVM version it originates from. Apparently, this file is
> forward compatible, but not backward, and it is included in dist tarball.
> 
> I guess the problem is that if you end up building the distball with, let's 
> say
> LLVM 6.0, then the tarball couldn't be built with LLVM 5.0, due this file
> compatibility. Hence why it "forces" to use the minimum common LLVM version 
> for
> all the drivers.
> 
> This was added in commit 5233eaf9ee8 ("automake: add SWR LLVM gen_builder.hpp
> workaround") and modified in commit b39f6d5fc7c ("travis: radeonsi and radv 
> need
> LLVM 4.0").
> 
> 
>   J.A.
> 
> > That's the hunk that was causing issues, wasn't it? Everything else in
> > this patch is just to keep the same version number everywhere?
> > 
> > 

[Mesa-dev] [Bug 106986] glGetQueryiv error when querying number of result bits for GL_ANY_SAMPLES_PASSED_CONSERVATIVE

2018-06-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106986

Danylo  changed:

   What|Removed |Added

 CC||danylo.pilia...@gmail.com

--- Comment #1 from Danylo  ---
Sent a patch https://patchwork.freedesktop.org/patch/230710/

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Return number of result bits for GL_ANY_SAMPLES_PASSED_CONSERVATIVE

2018-06-21 Thread Danylo Piliaiev
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106986

Signed-off-by: Danylo Piliaiev 
---
 src/mesa/main/queryobj.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c
index 7ee820a..7547fa1 100644
--- a/src/mesa/main/queryobj.c
+++ b/src/mesa/main/queryobj.c
@@ -698,6 +698,7 @@ _mesa_GetQueryIndexediv(GLenum target, GLuint index, GLenum 
pname,
 *params = ctx->Const.QueryCounterBits.SamplesPassed;
 break;
  case GL_ANY_SAMPLES_PASSED:
+ case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
 /* The minimum value of this is 1 if it's nonzero, and the value
  * is only ever GL_TRUE or GL_FALSE, so no sense in reporting more
  * bits.
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: bump minimum supported LLVM version to 5.0

2018-06-21 Thread Emil Velikov
On 19 June 2018 at 10:53, Juan A. Suarez Romero  wrote:
> On Mon, 2018-06-18 at 16:29 +0100, Eric Engestrom wrote:
>> On Monday, 2018-06-18 16:23:41 +0200, Juan A. Suarez Romero wrote:
>> > RADV now requires LLVM 5.0 or greater, and thus we can't build dist
>> > tarball because swr requires LLVM 4.0.
>> >
>> > Let's bump required LLVM to 5.0 in swr too.
>> >
>> > Fixes: f9eb1ef870 ("amd: remove support for LLVM 4.0")
>> > Cc: George Kyriazis 
>> > Cc: Tim Rowley 
>> > Cc: Emil Velikov 
>> > Cc: Dylan Baker 
>> > Cc: Eric Engestrom 
>>
>> s/imgtec/intel/ :)
>> (I moved)
>>
>
> Fixed :)
>
>> > ---
>> >  .travis.yml | 12 ++--
>> >  configure.ac|  7 ---
>> >  meson.build |  4 +---
>> >  src/gallium/drivers/swr/Makefile.am |  6 +++---
>> >  src/gallium/drivers/swr/SConscript  |  4 ++--
>> >  5 files changed, 16 insertions(+), 17 deletions(-)
>> >
>> > diff --git a/.travis.yml b/.travis.yml
>> > index b1fc7de9587..c9a30fa0ef5 100644
>> > --- a/.travis.yml
>> > +++ b/.travis.yml
>> > @@ -92,7 +92,7 @@ matrix:
>> >  - BUILD=make
>> >  - MAKEFLAGS="-j4"
>> >  - MAKE_CHECK_COMMAND="true"
>> > -- LLVM_VERSION=4.0
>> > +- LLVM_VERSION=5.0
>> >  - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
>> >  - OVERRIDE_CC="gcc-4.8"
>> >  - OVERRIDE_CXX="g++-4.8"
>> > @@ -105,12 +105,12 @@ matrix:
>> >addons:
>> >  apt:
>> >sources:
>> > -- llvm-toolchain-trusty-4.0
>> > +- llvm-toolchain-trusty-5.0
>> >packages:
>> >  # LLVM packaging is broken and misses these dependencies
>> >  - libedit-dev
>> >  # From sources above
>> > -- llvm-4.0-dev
>> > +- llvm-5.0-dev
>> >  # Common
>> >  - xz-utils
>> >  - x11proto-xf86vidmode-dev
>> > @@ -432,7 +432,7 @@ matrix:
>> >  - BUILD=scons
>> >  - SCONSFLAGS="-j4"
>> >  - SCONS_TARGET="swr=1"
>> > -- LLVM_VERSION=4.0
>> > +- LLVM_VERSION=5.0
>> >  - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
>> >  # Keep it symmetrical to the make build. There's no actual SWR, 
>> > yet.
>> >  - SCONS_CHECK_COMMAND="true"
>> > @@ -441,13 +441,13 @@ matrix:
>> >addons:
>> >  apt:
>> >sources:
>> > -- llvm-toolchain-trusty-4.0
>> > +- llvm-toolchain-trusty-5.0
>> >packages:
>> >  - scons
>> >  # LLVM packaging is broken and misses these dependencies
>> >  - libedit-dev
>> >  # From sources above
>> > -- llvm-4.0-dev
>> > +- llvm-5.0-dev
>> >  # Common
>> >  - xz-utils
>> >  - x11proto-xf86vidmode-dev
>> > diff --git a/configure.ac b/configure.ac
>> > index 7a0e4754208..543b6fe061b 100644
>> > --- a/configure.ac
>> > +++ b/configure.ac
>> > @@ -110,7 +110,7 @@ LLVM_REQUIRED_OPENCL=3.9.0
>> >  LLVM_REQUIRED_R600=3.9.0
>> >  LLVM_REQUIRED_RADEONSI=5.0.0
>> >  LLVM_REQUIRED_RADV=5.0.0
>> > -LLVM_REQUIRED_SWR=4.0.0
>> > +LLVM_REQUIRED_SWR=5.0.0
>> >
>> >  dnl Check for progs
>> >  AC_PROG_CPP
>> > @@ -2755,8 +2755,9 @@ if test -n "$with_gallium_drivers"; then
>> >  fi
>> >
>> >  # XXX: Keep in sync with LLVM_REQUIRED_SWR
>> > -AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x4.0.0 
>> > -a \
>> > -  "x$LLVM_VERSION" != x4.0.1)
>> > +AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x5.0.0 
>> > -a \
>> > +  "x$LLVM_VERSION" != x5.0.1 
>> > -a \
>> > +  "x$LLVM_VERSION" != x5.0.2)
>>
>> That check seems designed to break every time something in mesa changes
>> supported llvm version. Is there a reason for it not to be a simple
>> `>= 4.0` check?
>>
>
> Because gen_builder.hpp is a generated file and it contains information that 
> is
> specific to the LLVM version it originates from. Apparently, this file is
> forward compatible, but not backward, and it is included in dist tarball.
>
> I guess the problem is that if you end up building the distball with, let's 
> say
> LLVM 6.0, then the tarball couldn't be built with LLVM 5.0, due this file
> compatibility. Hence why it "forces" to use the minimum common LLVM version 
> for
> all the drivers.
>
> This was added in commit 5233eaf9ee8 ("automake: add SWR LLVM gen_builder.hpp
> workaround") and modified in commit b39f6d5fc7c ("travis: radeonsi and radv 
> need
> LLVM 4.0").
>
Precisely. Currently the python generators produce some _unused_ code.
So when you try to use LLVM 5.0 based sources with LLVM 4.0, you get
compilation failures.

Ideally, the generator will be fixed to remove the unused code, but
that for another day.

With a confirmation from the SWR devs, the patch 

[Mesa-dev] [Bug 106986] glGetQueryiv error when querying number of result bits for GL_ANY_SAMPLES_PASSED_CONSERVATIVE

2018-06-21 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106986

Bug ID: 106986
   Summary: glGetQueryiv error when querying number of result bits
for GL_ANY_SAMPLES_PASSED_CONSERVATIVE
   Product: Mesa
   Version: git
  Hardware: All
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: danylo.pilia...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

glGetQueryiv has GL_ANY_SAMPLES_PASSED_CONSERVATIVE as a valid target and spec
says:

> If pname is GL_QUERY_COUNTER_BITS, the implementation-dependent number 
> of bits used to hold the result of queries for target is returned in params. 

Now glGetQueryiv(GL_ANY_SAMPLES_PASSED_CONSERVATIVE, GL_QUERY_COUNTER_BITS,
) produces an error.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 000/129] nir: Move to using instructions for derefs

2018-06-21 Thread Kenneth Graunke
On Tuesday, June 12, 2018 12:44:44 PM PDT Kenneth Graunke wrote:
> On Thursday, May 31, 2018 10:01:43 PM PDT Jason Ekstrand wrote:
> > Ideally, I'd like the series to get some amount of real review before it
> > lands.  Honestly, it's been baking long enough and tested by enough people
> > on enough drivers that we can probably throw a bunch of Acked-by and
> > Tested-bys on it and call it a day but I'd rather not.  I plan to review
> > all of the patches I didn't write but that will have to wait until
> > tomorrow.
> > 
> > At the very least, I'd like some sort of an ACK from a variety of the
> > people that use NIR on the core concept and the sort of general shape of
> > things at the end of the series.  A lot of work has gone into this but it's
> > also a big change and the more positive feedback it gets, the more
> > comfortable I'll be pulling the trigger.
> 
> Patches 1-28 are
> Reviewed-by: Kenneth Graunke 

Patches 1-105 and 123-129 are:
Reviewed-by: Kenneth Graunke 

Patch 106 (opt_copy_prop_vars) is also R-b assuming that you fix the
bug that I pointed out in my reply there.

I ran out of energy for reviewing the ac/radv/radeonsi patches...not
sure anyone really needs me to anyway.  I think you're good to go on
this series, at least from my point of view.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH O/4] RadeonSI fixes for Stoney

2018-06-21 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

for the series.

On Thu, Jun 21, 2018 at 3:01 AM, Marek Olšák  wrote:
> These were all discovered on Stoney except for the last patch, which is a 
> precaution.
>
> Please review.
>
> Thanks,
> Marek
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: make sure CP DMA is idle at the end of IBs

2018-06-21 Thread Samuel Pitoiset
Ported from RadeonSI.
This might fix some synchronization issues.

I don't know if that will affect performance.

CC: 
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c |  6 ++
 src/amd/vulkan/radv_private.h|  2 ++
 src/amd/vulkan/si_cmd_buffer.c   | 12 +++-
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 8bd41bc41ac..a8ab4d3b977 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2585,6 +2585,12 @@ VkResult radv_EndCommandBuffer(
si_emit_cache_flush(cmd_buffer);
}
 
+   /* Make sure CP DMA is idle at the end of IBs after L2 prefetches
+* because the kernel doesn't wait for it.
+*/
+   if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
+   si_cp_dma_wait_for_idle(cmd_buffer);
+
vk_free(_buffer->pool->alloc, cmd_buffer->state.attachments);
 
if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs))
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index c77a8b297f8..b0ecb626d4c 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1088,6 +1088,8 @@ void si_cp_dma_prefetch(struct radv_cmd_buffer 
*cmd_buffer, uint64_t va,
 unsigned size);
 void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
uint64_t size, unsigned value);
+void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
+
 void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
 bool
 radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index e350bccae33..a5ee79a9e72 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -1034,7 +1034,6 @@ static void si_emit_cp_dma(struct radv_cmd_buffer 
*cmd_buffer,
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint32_t header = 0, command = 0;
 
-   assert(size);
assert(size <= cp_dma_max_byte_count(cmd_buffer));
 
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
@@ -1237,6 +1236,17 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer 
*cmd_buffer, uint64_t va,
}
 }
 
+void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
+{
+   /* Issue a dummy DMA that copies zero bytes.
+*
+* The DMA engine will see that there's no work to do and skip this
+* DMA request, however, the CP will see the sync flag and still wait
+* for all DMAs to complete.
+*/
+   si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC);
+}
+
 /* For MSAA sample positions. */
 #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
(((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) |  \
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 106/129] nir: Rework opt_copy_prop_vars to use deref instructions

2018-06-21 Thread Kenneth Graunke
On Thursday, May 31, 2018 10:06:47 PM PDT Jason Ekstrand wrote:
[snip]
> @@ -529,57 +509,55 @@ static bool
>  load_from_deref_entry_value(struct copy_prop_var_state *state,
>  struct copy_entry *entry,
>  nir_builder *b, nir_intrinsic_instr *intrin,
> -nir_deref_var *src, struct value *value)
> +nir_deref_instr *src, struct value *value)
>  {
> *value = entry->src;
>  
> -   /* Walk the deref to get the two tails and also figure out if we need to
> -* specialize any wildcards.
> -*/
> -   bool need_to_specialize_wildcards = false;
> -   nir_deref *entry_tail = >dst->deref;
> -   nir_deref *src_tail = >deref;
> -   while (entry_tail->child && src_tail->child) {
> -  assert(src_tail->child->deref_type == entry_tail->child->deref_type);
> -  if (src_tail->child->deref_type == nir_deref_type_array) {
> - nir_deref_array *entry_arr = nir_deref_as_array(entry_tail->child);
> - nir_deref_array *src_arr = nir_deref_as_array(src_tail->child);
> -

I think there might be a bug here...note this condition...

> - if (src_arr->deref_array_type != nir_deref_array_type_wildcard &&
> - entry_arr->deref_array_type == nir_deref_array_type_wildcard)

Old: Source NOT wildcard, dest is wildcard.

> -need_to_specialize_wildcards = true;
> -  }
> +   b->cursor = nir_instr_remove(>instr);
>  
> -  entry_tail = entry_tail->child;
> -  src_tail = src_tail->child;
> +   nir_deref_path entry_dst_path, src_path;
> +   nir_deref_path_init(_dst_path, entry->dst, state->mem_ctx);
> +   nir_deref_path_init(_path, src, state->mem_ctx);
> +
> +   bool need_to_specialize_wildcards = false;
> +   nir_deref_instr **entry_p = _dst_path.path[1];
> +   nir_deref_instr **src_p = _path.path[1];
> +   while (*entry_p && *src_p) {
> +  nir_deref_instr *entry_tail = *entry_p++;
> +  nir_deref_instr *src_tail = *src_p++;
> +
> +  if (src_tail->deref_type == nir_deref_type_array &&
> +  entry_tail->deref_type == nir_deref_type_array_wildcard)

New: Source IS wildcard, dest is wildcard.  I think you want != on the
source condition to match the old behavior.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/gen6/gs: Handle case where a GS doesn't allocate VUE

2018-06-21 Thread Iago Toral
On Wed, 2018-06-20 at 18:23 +0300, andrii.simiklit wrote:
> Hello,
> 
> Thanks for your feedback.
> 
> 
> 
> > We don't implement GS before gen6, and I don't think there are
> > plans
> > for it at this point, so I think we can just simplify the patch by
> > assuming that devinfo->gen is always going to be 6 here (later gens
> > use
> > a different implementation of GS).
> 
> Got it. I will fix it as soon as we validate this idea)
> 
> > Section 1.6.5.5 VUE Dereference (GS) (vol2, part1) says:
> > 
> > "It is possible and legal for a thread to produce no output
> >  or subsequently allocate a destination VUE that 
> >  was not required (e.g., the thread allocated ahead). 
> >  Therefore, there is a mechanism by which a thread can “give
> > back”  
> >  (dereference) an allocated VUE.  This mechanism must  be used
> > if   
> >  the  VUE is not written before the thread terminates.  A  kernel
> > can 
> >  explicitly dereference a VUE by issuing a URB_WRITE message 
> >  (specifying the to-be-dereference handle) with the Complete 
> >  bit set and the Used bit clear."
> > 
> > This is explicitly saying that COMPLETE + UNUSED is a valid
> > combination, and one that is in fact created for this very purpose.
> > Nothing in that text states that this is Pre-ILK or that this is
> > only
> > for thread pre-allocated VUEs alone.
> 
> Yes I agree that it is valid combination but this is valid only for 
> an allocated VUE (e.g., the thread allocated ahead). 
> As far as I understand, this line explicitly saying that this
> combination 
> only for an allocated VUEs:
>" Therefore, there is a mechanism by which a thread can 'give
> back' 
>  (dereference) an allocated VUE. "
> So according to that and to following section:

Ok, then we are in agreement.
>  Section 1.6.5.4 VUE Allocation:
>  " The following description is applicable only to the GS stage. 
>The threads are not passed an initial handle.  
>In stead, they request a first handle (if any) via the URB 
>shared function’s FF_SYNC message (see Shared Functions).  
>If additional handles are required, 
>the URB_WRITE allocate mechanism (mentioned above) is used."
> 
> If GS doesn't allocate/request VUEs then GS shouldn't use 
> the Dereference (COMPLETE + UNUSED) message. 
> So when GS produces no output GS doesn't allocate VUEs at all
> and GS shouldn't use Dereference message.

Agreed as well. But do notice that none of this is pre-ILK as far as
the documentation goes, it is the same across all supported platforms
up to SNB.
Now, we agree that the issue here is that we should not be
dereferencing handles that have not been allocated, and that the only
situation where this can happen with the existing implementation is
when the GS emits no output at all, rigth?
The problem with your patch is the following: if my memory serves me
right,  the intended behavior for this was that we always allocate a
VUE, *even* when the GS has no output. This is because otherwise we
need to end the GS program withan IF/ELSE/ENDIF block (like you do in
this patch), and that was undesirable at the time we wrote this
according to feedback from Intel. This is clearly explained in this
comment:
   /* Finally, emit EOT message.** In gen6 we need to end the
thread differently depending on whether we have* emitted at least
one vertex or not. In case we did, the EOT message must* always
include the COMPLETE flag or else the GPU hangs. If we have not*
produced any output we can't use the COMPLETE flag.** However,
this would lead us to end the program with an ENDIF opcode,* which
we want to avoid, so what we do is that we always request a new*
VUE handle every time we do a URB WRITE, even for the last vertex we
emit.* With this we make sure that whether we have emitted at least
one vertex* or none at all, we have to finish the thread without
writing to the URB,* which works for both cases by setting the
COMPLETE and UNUSED flags in* the EOT message.*/
So, unless Kenneth believes that ending the program and IF/ELSE/ENDIF
block is alright nowadays, this patch poses a problem.
With that being said, it is true that in the case of the GS emitting no
output, we are not allocating a VUE so we cannot dereference it and we
need to fix that. I think we can try the following and see if that
solves the problem:  Instead of terminating the program with an
IF/ELSE/ENDIF, we can make the FF_SYNC happen unconditionally (right
now it only happens if there is at least one vertex to emit). That
should ensure that at least one VUE is allocated when there is no
output and then we can end the program in the same fashion we have been
doung until now. According to Vol4, part2, section 2.4.4.1
FF_SYNC  Message  Header,  it is valid to specify 0 vertices/primitives
for the FF_SYNC message, so that should in theory work.
Could you see f if that fixes the problem?
Iago
> > Then in 2.4.2 Message Descriptor (vol4, part2), it