Re: [Mesa-dev] [PATCH 23/25] radeonsi: factor si_query_buffer logic out of si_query_hw

2019-02-01 Thread Timothy Arceri

On 2/2/19 2:56 pm, Marek Olšák wrote:

Feel free to revert the commit if it's not difficult.


Unfortunately it no longer seems to be a simple revert.



Marek

On Thu, Jan 31, 2019, 11:25 PM Timothy Arceri  wrote:


On 26/1/19 11:56 am, Marek Olšák wrote:
 > Timothy, can you please test the attached fix?

I'm having trouble compiling 32bit mesa on my machine at the moment so
haven't been able to test Batman. But this commit also causes No Mans
Sky to lock up my machine and the attached patch does not fix it.

 >
 > Thanks,
 > Marek
 >
 > On Wed, Jan 2, 2019 at 10:58 PM Timothy Arceri
mailto:tarc...@itsqueeze.com>
 > >> wrote:
 >
 >     This commit seems to cause bad stuttering in the Batman
Arkham City
 >     benchmark.
 >
 >     On 7/12/18 1:00 am, Nicolai Hähnle wrote:
 >      > From: Nicolai Hähnle mailto:nicolai.haeh...@amd.com>
 >     >>
 >      >
 >      > This is a move towards using composition instead of
inheritance for
 >      > different query types.
 >      >
 >      > This change weakens out-of-memory error reporting somewhat,
 >     though this
 >      > should be acceptable since we didn't consistently report such
 >     errors in
 >      > the first place.
 >      > ---
 >      >   src/gallium/drivers/radeonsi/si_perfcounter.c |   8 +-
 >      >   src/gallium/drivers/radeonsi/si_query.c       | 177
 >     +-
 >      >   src/gallium/drivers/radeonsi/si_query.h       |  17 +-
 >      >   src/gallium/drivers/radeonsi/si_texture.c     |   7 +-
 >      >   4 files changed, 99 insertions(+), 110 deletions(-)
 >      >
 >      > diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c
 >     b/src/gallium/drivers/radeonsi/si_perfcounter.c
 >      > index 0b3d8f89273..f0d10c054c4 100644
 >      > --- a/src/gallium/drivers/radeonsi/si_perfcounter.c
 >      > +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
 >      > @@ -761,23 +761,22 @@ static void si_pc_query_destroy(struct
 >     si_screen *sscreen,
 >      >               struct si_query_group *group = query->groups;
 >      >               query->groups = group->next;
 >      >               FREE(group);
 >      >       }
 >      >
 >      >       FREE(query->counters);
 >      >
 >      >       si_query_hw_destroy(sscreen, rquery);
 >      >   }
 >      >
 >      > -static bool si_pc_query_prepare_buffer(struct si_screen
*screen,
 >      > -                                    struct si_query_hw
*hwquery,
 >      > -                                    struct r600_resource
*buffer)
 >      > +static bool si_pc_query_prepare_buffer(struct si_context
*ctx,
 >      > +                                    struct
si_query_buffer *qbuf)
 >      >   {
 >      >       /* no-op */
 >      >       return true;
 >      >   }
 >      >
 >      >   static void si_pc_query_emit_start(struct si_context *sctx,
 >      >                                  struct si_query_hw *hwquery,
 >      >                                  struct r600_resource *buffer,
 >     uint64_t va)
 >      >   {
 >      >       struct si_query_pc *query = (struct si_query_pc
*)hwquery;
 >      > @@ -1055,23 +1054,20 @@ struct pipe_query
 >     *si_create_batch_query(struct pipe_context *ctx,
 >      >               counter->base = group->result_base + j;
 >      >               counter->stride = group->num_counters;
 >      >
 >      >               counter->qwords = 1;
 >      >               if ((block->b->b->flags & SI_PC_BLOCK_SE) &&
 >     group->se < 0)
 >      >                       counter->qwords = screen->info.max_se;
 >      >               if (group->instance < 0)
 >      >                       counter->qwords *= block->num_instances;
 >      >       }
 >      >
 >      > -     if (!si_query_hw_init(screen, >b))
 >      > -             goto error;
 >      > -
 >      >       return (struct pipe_query *)query;
 >      >
 >      >   error:
 >      >       si_pc_query_destroy(screen, >b.b);
 >      >       return NULL;
 >      >   }
 >      >
 >      >   static bool si_init_block_names(struct si_screen *screen,
 >      >                               struct si_pc_block *block)
 >      >   {
 >      > diff --git a/src/gallium/drivers/radeonsi/si_query.c
 >     b/src/gallium/drivers/radeonsi/si_query.c
 >      > index 479a1bbf2c4..5b0fba0ed92 100644
 >      > --- a/src/gallium/drivers/radeonsi/si_query.c
 >      > +++ b/src/gallium/drivers/radeonsi/si_query.c
 >      > @@ 

Re: [Mesa-dev] [PATCH 23/25] radeonsi: factor si_query_buffer logic out of si_query_hw

2019-02-01 Thread Marek Olšák
Feel free to revert the commit if it's not difficult.

Marek

On Thu, Jan 31, 2019, 11:25 PM Timothy Arceri  On 26/1/19 11:56 am, Marek Olšák wrote:
> > Timothy, can you please test the attached fix?
>
> I'm having trouble compiling 32bit mesa on my machine at the moment so
> haven't been able to test Batman. But this commit also causes No Mans
> Sky to lock up my machine and the attached patch does not fix it.
>
> >
> > Thanks,
> > Marek
> >
> > On Wed, Jan 2, 2019 at 10:58 PM Timothy Arceri  > > wrote:
> >
> > This commit seems to cause bad stuttering in the Batman Arkham City
> > benchmark.
> >
> > On 7/12/18 1:00 am, Nicolai Hähnle wrote:
> >  > From: Nicolai Hähnle  > >
> >  >
> >  > This is a move towards using composition instead of inheritance
> for
> >  > different query types.
> >  >
> >  > This change weakens out-of-memory error reporting somewhat,
> > though this
> >  > should be acceptable since we didn't consistently report such
> > errors in
> >  > the first place.
> >  > ---
> >  >   src/gallium/drivers/radeonsi/si_perfcounter.c |   8 +-
> >  >   src/gallium/drivers/radeonsi/si_query.c   | 177
> > +-
> >  >   src/gallium/drivers/radeonsi/si_query.h   |  17 +-
> >  >   src/gallium/drivers/radeonsi/si_texture.c |   7 +-
> >  >   4 files changed, 99 insertions(+), 110 deletions(-)
> >  >
> >  > diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c
> > b/src/gallium/drivers/radeonsi/si_perfcounter.c
> >  > index 0b3d8f89273..f0d10c054c4 100644
> >  > --- a/src/gallium/drivers/radeonsi/si_perfcounter.c
> >  > +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
> >  > @@ -761,23 +761,22 @@ static void si_pc_query_destroy(struct
> > si_screen *sscreen,
> >  >   struct si_query_group *group = query->groups;
> >  >   query->groups = group->next;
> >  >   FREE(group);
> >  >   }
> >  >
> >  >   FREE(query->counters);
> >  >
> >  >   si_query_hw_destroy(sscreen, rquery);
> >  >   }
> >  >
> >  > -static bool si_pc_query_prepare_buffer(struct si_screen *screen,
> >  > -struct si_query_hw *hwquery,
> >  > -struct r600_resource *buffer)
> >  > +static bool si_pc_query_prepare_buffer(struct si_context *ctx,
> >  > +struct si_query_buffer *qbuf)
> >  >   {
> >  >   /* no-op */
> >  >   return true;
> >  >   }
> >  >
> >  >   static void si_pc_query_emit_start(struct si_context *sctx,
> >  >  struct si_query_hw *hwquery,
> >  >  struct r600_resource *buffer,
> > uint64_t va)
> >  >   {
> >  >   struct si_query_pc *query = (struct si_query_pc *)hwquery;
> >  > @@ -1055,23 +1054,20 @@ struct pipe_query
> > *si_create_batch_query(struct pipe_context *ctx,
> >  >   counter->base = group->result_base + j;
> >  >   counter->stride = group->num_counters;
> >  >
> >  >   counter->qwords = 1;
> >  >   if ((block->b->b->flags & SI_PC_BLOCK_SE) &&
> > group->se < 0)
> >  >   counter->qwords = screen->info.max_se;
> >  >   if (group->instance < 0)
> >  >   counter->qwords *= block->num_instances;
> >  >   }
> >  >
> >  > - if (!si_query_hw_init(screen, >b))
> >  > - goto error;
> >  > -
> >  >   return (struct pipe_query *)query;
> >  >
> >  >   error:
> >  >   si_pc_query_destroy(screen, >b.b);
> >  >   return NULL;
> >  >   }
> >  >
> >  >   static bool si_init_block_names(struct si_screen *screen,
> >  >   struct si_pc_block *block)
> >  >   {
> >  > diff --git a/src/gallium/drivers/radeonsi/si_query.c
> > b/src/gallium/drivers/radeonsi/si_query.c
> >  > index 479a1bbf2c4..5b0fba0ed92 100644
> >  > --- a/src/gallium/drivers/radeonsi/si_query.c
> >  > +++ b/src/gallium/drivers/radeonsi/si_query.c
> >  > @@ -514,86 +514,129 @@ static struct pipe_query
> > *si_query_sw_create(unsigned query_type)
> >  >   query = CALLOC_STRUCT(si_query_sw);
> >  >   if (!query)
> >  >   return NULL;
> >  >
> >  >   query->b.type = query_type;
> >  >   query->b.ops = _query_ops;
> >  >
> >  >   return (struct pipe_query *)query;
> >  >   }
> >  >
> >  > -void si_query_hw_destroy(struct si_screen *sscreen,
> >  > -  struct si_query *rquery)
> >  > +void si_query_buffer_destroy(struct si_screen *sscreen, struct
> 

Re: [Mesa-dev] [ANNOUNCE] mesa 19.0.0-rc1

2019-02-01 Thread Mark Janes
Eero Tamminen  writes:

> Hi,
>
> On 31.1.2019 1.37, Dylan Baker wrote:
>> This email announces the mesa 19.0 release candidate 1. I'll keep this email
>> fairly brief since I'm already running a little late on getting this done :)
>> I've just had to resolve quite a few autotools issues to get the dist built.
>> 
>> Notable in the 19.0-rc1 branch is SWR is set to require LLVM 7 instead of 
>> LLVM
>> 6. It is impossible to bootstrap SWR with LLVM 6 and compile with  LLVM 7 
>> due to
>> LLVM API changes. Since RadeonSI and Radv both require LLVM 7 I've taken the
>> liberty of bumping SWR so that we could get a tarball built.
>> 
>> We've had an exciting release cycle, plenty of GL and Vulkan extensions, 
>> ~1600
>> commits since the 18.3 branchpoint with substantial work across all areas of
>> mesa.
>
> Are all the recent (i965) perf regressions included to it:
> * https://bugs.freedesktop.org/show_bug.cgi?id=109517 (spilling)
> * https://bugs.freedesktop.org/show_bug.cgi?id=109505 (Unigine)
> * https://bugs.freedesktop.org/show_bug.cgi?id=109216 (Vulkan)

These regressions all need to be added to the release tracker.  Thank
you for reporting them.

>   - Eero
>
> PS. There's also much older:
> * https://bugs.freedesktop.org/show_bug.cgi?id=107510
>
> Which was already fixed, but then regressed again, and regressing commit 
> wasn't anymore reverted.  I'm mentioning it because Timothy had a patch 
> series in October that fixed the tess/geom shader regressions (which 
> were largest), but for some reason it's not yet in upstream.
>
>> Expect rc2 about this time next week, see you then.
>> 
>> Dylan
>> 
>> git tag: mesa-19.0.0-rc1
>> 
>> https://mesa.freedesktop.org/archive/mesa-19.0.0-rc1.tar.gz
>> MD5:  b3a610b204d0cb3431823353a8cbe8e6  mesa-19.0.0-rc1.tar.gz
>> SHA1: d1f0d0bc49ec7e02d0cd7d141127fd2fefc72e35  mesa-19.0.0-rc1.tar.gz
>> SHA256: 0a14bb059f6cead4e50923df9c24d3c5025d9310803ca5189e019f07e539639e  
>> mesa-19.0.0-rc1.tar.gz
>> SHA512: 
>> 5bedc917afecef6a0dd11c56688a3e3fdbbaeaceca33062d6825b5525c6e78663e873bdecc96b98b0448d988ad81a7a8617c523e2d312384369c6a333b790b86
>>   mesa-19.0.0-rc1.tar.gz
>> PGP:  https://mesa.freedesktop.org/archive/mesa-19.0.0-rc1.tar.gz.sig
>> 
>> https://mesa.freedesktop.org/archive/mesa-19.0.0-rc1.tar.xz
>> MD5:  727abb6469e518ff1a2e1bde33543503  mesa-19.0.0-rc1.tar.xz
>> SHA1: 577642259cd269c883007df7c2772c8c636fabfb  mesa-19.0.0-rc1.tar.xz
>> SHA256: 8efb32956c428d23f78364f9eace5491bda9feaafd767128133672a5f79659e8  
>> mesa-19.0.0-rc1.tar.xz
>> SHA512: 
>> 23d21d6c4f03a1d9073ecb1f43dc251d581cdeb6b7cc24a19c299571070b4184ad4f22b0ca170ca42e58c62bb46eca0dadc334a952bbb7e0379961a30a6ca856
>>   mesa-19.0.0-rc1.tar.xz
>> PGP:  https://mesa.freedesktop.org/archive/mesa-19.0.0-rc1.tar.xz.sig
>> 
>> 
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>> 
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/6] glsl/linker: don't fail non static used inputs without matching outputs

2019-02-01 Thread Timothy Arceri

On 2/2/19 10:28 am, Timothy Arceri wrote:



On 2/2/19 5:05 am, Andres Gomez wrote:

If there is no Static Use of an input variable, the linker shouldn't
fail whenever there is no defined matching output variable in the
previous stage.

 From page 47 (page 51 of the PDF) of the GLSL 4.60 v.5 spec:

   " Only the input variables that are statically read need to be
 written by the previous stage; it is allowed to have superfluous
 declarations of input variables."

Now, we complete this exception whenever the input variable has an
explicit location. Previously, 18004c338f6 ("glsl: fail when a
shader's input var has not an equivalent out var in previous") took
care of the cases in which the input variable didn't have an explicit
location.

Additionally, likewise 1aa5738e666 ("glsl: relax input->output
validation for SSO programs"), avoid failing also for programs that
utilize GL_ARB_separate_shader_objects.

Cc: Timothy Arceri 
Cc: Iago Toral Quiroga 
Cc: Samuel Iglesias Gonsálvez 
Cc: Tapani Pälli 
Cc: Ian Romanick 
Signed-off-by: Andres Gomez 
---
  src/compiler/glsl/link_varyings.cpp | 16 ++--
  1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp

index e5f7d3e322a..6cebc5b3c5a 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -808,8 +808,20 @@ cross_validate_outputs_to_inputs(struct 
gl_context *ctx,
 output = 
output_explicit_locations[idx][input->data.location_frac].var;

-   if (output == NULL ||
-   input->data.location != output->data.location) {
+   if (output == NULL) {
+  /* A linker failure should only happen when, for 
programs
+   * not using sso, there is no output declaration 
and there

+   * is Static Use of the declared input.
+   */
+  if (input->data.used && !prog->SeparateShader) {


This is not really what used was designed for so it's always a bit 
unsettling to see this type of thing.


However its better that what we do now and is consistent with 
18004c338f6 so this patch is:


Reviewed-by: Timothy Arceri 


Actually I take this back after seeing Ilia's comment. I think we need 
some more piglit tests for the SSO cases where an SSO contains two or 
more stages.






+ linker_error(prog,
+  "%s shader input `%s' with explicit 
location "

+  "has no matching output\n",
+  
_mesa_shader_stage_to_string(consumer->Stage),

+  input->name);
+ break;
+  }
+   } else if (input->data.location != 
output->data.location) {

    linker_error(prog,
 "%s shader input `%s' with explicit 
location "

 "has no matching output\n",


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/6] glsl: correctly validate component layout qualifier for dvec{3, 4}

2019-02-01 Thread Timothy Arceri

On 2/2/19 5:05 am, Andres Gomez wrote:

 From page 62 (page 68 of the PDF) of the GLSL 4.50 v.7 spec:

   " A dvec3 or dvec4 can only be declared without specifying a
 component."

Therefore, using the "component" qualifier with a dvec3 or dvec4
should result in a compiling error.

Fixes: 94438578d21 ("glsl: validate and store component layout qualifier in GLSL 
IR")
Cc: Timothy Arceri 
Cc: Kenneth Graunke 
Signed-off-by: Andres Gomez 
---
  src/compiler/glsl/ast_to_hir.cpp | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 620153e6a34..6d060fd94cd 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -3698,6 +3698,9 @@ apply_layout_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
  "cannot be applied to a matrix, a structure, "
  "a block, or an array containing any of "
  "these.");
+} else if (components > 4 && type->is_64bit()) {
+   _mesa_glsl_error(loc, state, "component layout qualifier "
+"cannot be applied to dvec3 nor dvec4.");


maybe:

"cannot be applied to dvec%u", components / 2)

Otherwise:

Reviewed-by: Timothy Arceri 


  } else if (qual_component != 0 &&
  (qual_component + components - 1) > 3) {
 _mesa_glsl_error(loc, state, "component overflow (%u > 3)",


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/6] glsl/linker: don't fail non static used inputs without matching outputs

2019-02-01 Thread Ilia Mirkin
On Fri, Feb 1, 2019 at 1:08 PM Andres Gomez  wrote:
>
> If there is no Static Use of an input variable, the linker shouldn't
> fail whenever there is no defined matching output variable in the
> previous stage.
>
> From page 47 (page 51 of the PDF) of the GLSL 4.60 v.5 spec:
>
>   " Only the input variables that are statically read need to be
> written by the previous stage; it is allowed to have superfluous
> declarations of input variables."
>
> Now, we complete this exception whenever the input variable has an
> explicit location. Previously, 18004c338f6 ("glsl: fail when a
> shader's input var has not an equivalent out var in previous") took
> care of the cases in which the input variable didn't have an explicit
> location.
>
> Additionally, likewise 1aa5738e666 ("glsl: relax input->output
> validation for SSO programs"), avoid failing also for programs that
> utilize GL_ARB_separate_shader_objects.
>
> Cc: Timothy Arceri 
> Cc: Iago Toral Quiroga 
> Cc: Samuel Iglesias Gonsálvez 
> Cc: Tapani Pälli 
> Cc: Ian Romanick 
> Signed-off-by: Andres Gomez 
> ---
>  src/compiler/glsl/link_varyings.cpp | 16 ++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/src/compiler/glsl/link_varyings.cpp 
> b/src/compiler/glsl/link_varyings.cpp
> index e5f7d3e322a..6cebc5b3c5a 100644
> --- a/src/compiler/glsl/link_varyings.cpp
> +++ b/src/compiler/glsl/link_varyings.cpp
> @@ -808,8 +808,20 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
>
> output = 
> output_explicit_locations[idx][input->data.location_frac].var;
>
> -   if (output == NULL ||
> -   input->data.location != output->data.location) {
> +   if (output == NULL) {
> +  /* A linker failure should only happen when, for programs
> +   * not using sso, there is no output declaration and there
> +   * is Static Use of the declared input.
> +   */
> +  if (input->data.used && !prog->SeparateShader) {

Should this differentiate whether this is the first stage of a
separable program vs a later one? Presumably the exception only
applies at the separate program boundary, not to each shader within
the program?

> + linker_error(prog,
> +  "%s shader input `%s' with explicit 
> location "
> +  "has no matching output\n",
> +  
> _mesa_shader_stage_to_string(consumer->Stage),
> +  input->name);
> + break;
> +  }
> +   } else if (input->data.location != output->data.location) {
>linker_error(prog,
> "%s shader input `%s' with explicit location "
> "has no matching output\n",
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] glsl/linker: always validate explicit location among inputs

2019-02-01 Thread Timothy Arceri

Nice catch!

Reviewed-by: Timothy Arceri 

On 2/2/19 5:05 am, Andres Gomez wrote:

Outputs are always validated when having explicit locations and we
were trusting its outcome to catch similar problems with the inputs
since, in case of having undefined outputs for existing inputs, we
would be already reporting a linker error.

However, consider this case:

   " Shader stage n:
 ---

 ...

 layout(location = 0) out float a;

 ...

 Shader stage n+1:
 -

 ...

 layout(location = 0) in float b;
 layout(location = 0) in float c;

 ...
   "

Currently, this won't report a linker error even though location
aliasing is happening for the inputs.

Therefore, we also need to validate the inputs independently from the
outcome of the outputs validation.

Cc: Timothy Arceri 
Cc: Iago Toral Quiroga 
Cc: Ilia Mirkin 
Signed-off-by: Andres Gomez 
---
  src/compiler/glsl/link_varyings.cpp | 13 ++---
  1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 3f41832ac93..e5f7d3e322a 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -715,7 +715,8 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
   gl_linked_shader *consumer)
  {
 glsl_symbol_table parameters;
-   struct explicit_location_info explicit_locations[MAX_VARYING][4] = { 0 };
+   struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = { 
0 };
+   struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = { 
0 };
  
 /* Find all shader outputs in the "producer" stage.

  */
@@ -733,7 +734,7 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
* differently because they do not need to have matching names.
*/
   if (!validate_explicit_variable_location(ctx,
-  explicit_locations,
+  output_explicit_locations,
var, prog, producer)) {
  return;
   }
@@ -791,6 +792,12 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
 compute_variable_location_slot(input, consumer->Stage);
  unsigned slot_limit = idx + num_elements;
  
+if (!validate_explicit_variable_location(ctx,

+ input_explicit_locations,
+ input, prog, consumer)) {
+   return;
+}
+
  while (idx < slot_limit) {
 if (idx >= MAX_VARYING) {
linker_error(prog,
@@ -799,7 +806,7 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
return;
 }
  
-   output = explicit_locations[idx][input->data.location_frac].var;

+   output = 
output_explicit_locations[idx][input->data.location_frac].var;
  
 if (output == NULL ||

 input->data.location != output->data.location) {


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/6] glsl/linker: don't fail non static used inputs without matching outputs

2019-02-01 Thread Timothy Arceri



On 2/2/19 5:05 am, Andres Gomez wrote:

If there is no Static Use of an input variable, the linker shouldn't
fail whenever there is no defined matching output variable in the
previous stage.

 From page 47 (page 51 of the PDF) of the GLSL 4.60 v.5 spec:

   " Only the input variables that are statically read need to be
 written by the previous stage; it is allowed to have superfluous
 declarations of input variables."

Now, we complete this exception whenever the input variable has an
explicit location. Previously, 18004c338f6 ("glsl: fail when a
shader's input var has not an equivalent out var in previous") took
care of the cases in which the input variable didn't have an explicit
location.

Additionally, likewise 1aa5738e666 ("glsl: relax input->output
validation for SSO programs"), avoid failing also for programs that
utilize GL_ARB_separate_shader_objects.

Cc: Timothy Arceri 
Cc: Iago Toral Quiroga 
Cc: Samuel Iglesias Gonsálvez 
Cc: Tapani Pälli 
Cc: Ian Romanick 
Signed-off-by: Andres Gomez 
---
  src/compiler/glsl/link_varyings.cpp | 16 ++--
  1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index e5f7d3e322a..6cebc5b3c5a 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -808,8 +808,20 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
  
 output = output_explicit_locations[idx][input->data.location_frac].var;
  
-   if (output == NULL ||

-   input->data.location != output->data.location) {
+   if (output == NULL) {
+  /* A linker failure should only happen when, for programs
+   * not using sso, there is no output declaration and there
+   * is Static Use of the declared input.
+   */
+  if (input->data.used && !prog->SeparateShader) {


This is not really what used was designed for so it's always a bit 
unsettling to see this type of thing.


However its better that what we do now and is consistent with 
18004c338f6 so this patch is:


Reviewed-by: Timothy Arceri 



+ linker_error(prog,
+  "%s shader input `%s' with explicit location 
"
+  "has no matching output\n",
+  
_mesa_shader_stage_to_string(consumer->Stage),
+  input->name);
+ break;
+  }
+   } else if (input->data.location != output->data.location) {
linker_error(prog,
 "%s shader input `%s' with explicit location "
 "has no matching output\n",


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/6] glsl/linker: simplify xfb_offset vs xfb_stride overflow check

2019-02-01 Thread Timothy Arceri

On 2/2/19 5:05 am, Andres Gomez wrote:

Current implementation uses a complicated calculation which relies in
an implicit conversion to check the integral part of 2 division
results.

However, the calculation actually checks that the xfb_offset is
smaller or a multiplier of the xfb_stride. For example, while this is
expected to fail, it actually succeeds:

   "

 ...

 layout(xfb_buffer = 2, xfb_stride = 12) out block3 {
   layout(xfb_offset = 0) vec3 c;
   layout(xfb_offset = 12) vec3 d; // ERROR, requires stride of 24


Why does this require a stride of 24?

vec3 c uses bytes 0-11. So there is no issue with vec3 d having an 
offset of 12. Its been a long time since I worked on this but I think 
this change is wrong. I see no reason this should fail compilation.




 };

 ...

   "

Fixes: 2fab85aaea5 ("glsl: add xfb_stride link time validation")
Cc: Timothy Arceri 
Signed-off-by: Andres Gomez 
---
  src/compiler/glsl/link_varyings.cpp | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 6cebc5b3c5a..ab66ceb0d00 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -1213,8 +1213,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct 
gl_shader_program *prog,
   return false;
}
  
-  if ((this->offset / 4) / info->Buffers[buffer].Stride !=

-  (xfb_offset - 1) / info->Buffers[buffer].Stride) {
+  if (xfb_offset > info->Buffers[buffer].Stride) {
   linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
"buffer (%d)", xfb_offset * 4,
info->Buffers[buffer].Stride * 4, buffer);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 07/32] intel/isl: Rename ISL_TILING_Yf/s to ISL_TILING_GEN9_Yf/s

2019-02-01 Thread Jason Ekstrand
TBH, I don't for sure remember why this was needed.  I think it was because
gen10 has a slightly different miptail layout.  That said, miptails are
still broken on gen10 so maybe there's really no point?  I don't remember
for sure.

--Jason

On Fri, Oct 12, 2018 at 1:47 PM Jason Ekstrand  wrote:

> The Yf and Ys tilings change a bit between gen9 and gen10 so we have to
> be able to distinguish between them.
> ---
>  src/intel/isl/isl.c   | 12 ++--
>  src/intel/isl/isl.h   | 16 
>  src/intel/isl/isl_drm.c   |  4 ++--
>  src/intel/isl/isl_gen7.c  |  8 
>  src/intel/isl/isl_gen9.c  |  2 +-
>  src/intel/isl/isl_surface_state.c |  4 ++--
>  6 files changed, 23 insertions(+), 23 deletions(-)
>
> diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
> index d6beee987b5..392c15ca3fb 100644
> --- a/src/intel/isl/isl.c
> +++ b/src/intel/isl/isl.c
> @@ -217,9 +217,9 @@ isl_tiling_get_info(enum isl_tiling tiling,
>phys_B = isl_extent2d(128, 32);
>break;
>
> -   case ISL_TILING_Yf:
> -   case ISL_TILING_Ys: {
> -  bool is_Ys = tiling == ISL_TILING_Ys;
> +   case ISL_TILING_GEN9_Yf:
> +   case ISL_TILING_GEN9_Ys: {
> +  bool is_Ys = tiling == ISL_TILING_GEN9_Ys;
>
>assert(bs > 0);
>unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
> @@ -375,8 +375,8 @@ isl_surf_choose_tiling(const struct isl_device *dev,
>CHOOSE(ISL_TILING_LINEAR);
> }
>
> -   CHOOSE(ISL_TILING_Ys);
> -   CHOOSE(ISL_TILING_Yf);
> +   CHOOSE(ISL_TILING_GEN9_Ys);
> +   CHOOSE(ISL_TILING_GEN9_Yf);
> CHOOSE(ISL_TILING_Y0);
> CHOOSE(ISL_TILING_X);
> CHOOSE(ISL_TILING_W);
> @@ -715,7 +715,7 @@ isl_calc_phys_level0_extent_sa(const struct isl_device
> *dev,
>   assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D ||
>  dim_layout == ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ);
>
> -  if (tiling == ISL_TILING_Ys && info->samples > 1)
> +  if (tiling == ISL_TILING_GEN9_Ys && info->samples > 1)
>   isl_finishme("%s:%s: multisample TileYs layout", __FILE__,
> __func__);
>
>switch (msaa_layout) {
> diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
> index 4f8d38e22fb..1c7990f2dc7 100644
> --- a/src/intel/isl/isl.h
> +++ b/src/intel/isl/isl.h
> @@ -460,8 +460,8 @@ enum isl_tiling {
> ISL_TILING_W,
> ISL_TILING_X,
> ISL_TILING_Y0, /**< Legacy Y tiling */
> -   ISL_TILING_Yf, /**< Standard 4K tiling. The 'f' means "four". */
> -   ISL_TILING_Ys, /**< Standard 64K tiling. The 's' means "sixty-four". */
> +   ISL_TILING_GEN9_Yf, /**< Standard 4K tiling. The 'f' means "four". */
> +   ISL_TILING_GEN9_Ys, /**< Standard 64K tiling. The 's' means
> "sixty-four". */
> ISL_TILING_HIZ, /**< Tiling format for HiZ surfaces */
> ISL_TILING_CCS, /**< Tiling format for CCS surfaces */
>  };
> @@ -475,8 +475,8 @@ typedef uint32_t isl_tiling_flags_t;
>  #define ISL_TILING_W_BIT  (1u << ISL_TILING_W)
>  #define ISL_TILING_X_BIT  (1u << ISL_TILING_X)
>  #define ISL_TILING_Y0_BIT (1u << ISL_TILING_Y0)
> -#define ISL_TILING_Yf_BIT (1u << ISL_TILING_Yf)
> -#define ISL_TILING_Ys_BIT (1u << ISL_TILING_Ys)
> +#define ISL_TILING_GEN9_Yf_BIT(1u << ISL_TILING_GEN9_Yf)
> +#define ISL_TILING_GEN9_Ys_BIT(1u << ISL_TILING_GEN9_Ys)
>  #define ISL_TILING_HIZ_BIT(1u << ISL_TILING_HIZ)
>  #define ISL_TILING_CCS_BIT(1u << ISL_TILING_CCS)
>  #define ISL_TILING_ANY_MASK   (~0u)
> @@ -484,12 +484,12 @@ typedef uint32_t isl_tiling_flags_t;
>
>  /** Any Y tiling, including legacy Y tiling. */
>  #define ISL_TILING_ANY_Y_MASK (ISL_TILING_Y0_BIT | \
> -   ISL_TILING_Yf_BIT | \
> -   ISL_TILING_Ys_BIT)
> +   ISL_TILING_GEN9_Yf_BIT | \
> +   ISL_TILING_GEN9_Ys_BIT)
>
>  /** The Skylake BSpec refers to Yf and Ys as "standard tiling formats". */
> -#define ISL_TILING_STD_Y_MASK (ISL_TILING_Yf_BIT | \
> -   ISL_TILING_Ys_BIT)
> +#define ISL_TILING_STD_Y_MASK (ISL_TILING_GEN9_Yf_BIT | \
> +   ISL_TILING_GEN9_Ys_BIT)
>  /** @} */
>
>  /**
> diff --git a/src/intel/isl/isl_drm.c b/src/intel/isl/isl_drm.c
> index e16d7b63917..62fdd22d10d 100644
> --- a/src/intel/isl/isl_drm.c
> +++ b/src/intel/isl/isl_drm.c
> @@ -44,8 +44,8 @@ isl_tiling_to_i915_tiling(enum isl_tiling tiling)
>return I915_TILING_Y;
>
> case ISL_TILING_W:
> -   case ISL_TILING_Yf:
> -   case ISL_TILING_Ys:
> +   case ISL_TILING_GEN9_Yf:
> +   case ISL_TILING_GEN9_Ys:
> case ISL_TILING_HIZ:
> case ISL_TILING_CCS:
>return I915_TILING_NONE;
> diff --git a/src/intel/isl/isl_gen7.c b/src/intel/isl/isl_gen7.c
> 

Re: [Mesa-dev] [PATCH v2 08/32] intel/isl: Add gen10 variants of Yf and Ys tiling

2019-02-01 Thread Nanley Chery
On Fri, Oct 12, 2018 at 01:46:38PM -0500, Jason Ekstrand wrote:
> ---
>  src/intel/isl/isl.c   |  9 +++--
>  src/intel/isl/isl.h   | 12 ++--
>  src/intel/isl/isl_drm.c   |  2 ++
>  src/intel/isl/isl_gen7.c  |  8 +++-
>  src/intel/isl/isl_surface_state.c |  2 ++
>  5 files changed, 28 insertions(+), 5 deletions(-)
> 

This patch is
Reviewed-by: Nanley Chery 

> diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
> index 392c15ca3fb..3ffc6f627b2 100644
> --- a/src/intel/isl/isl.c
> +++ b/src/intel/isl/isl.c
> @@ -218,8 +218,11 @@ isl_tiling_get_info(enum isl_tiling tiling,
>break;
>  
> case ISL_TILING_GEN9_Yf:
> -   case ISL_TILING_GEN9_Ys: {
> -  bool is_Ys = tiling == ISL_TILING_GEN9_Ys;
> +   case ISL_TILING_GEN9_Ys:
> +   case ISL_TILING_GEN10_Yf:
> +   case ISL_TILING_GEN10_Ys: {
> +  bool is_Ys = tiling == ISL_TILING_GEN9_Ys ||
> +   tiling == ISL_TILING_GEN10_Ys;
>  
>assert(bs > 0);
>unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
> @@ -375,7 +378,9 @@ isl_surf_choose_tiling(const struct isl_device *dev,
>CHOOSE(ISL_TILING_LINEAR);
> }
>  
> +   CHOOSE(ISL_TILING_GEN10_Ys);
> CHOOSE(ISL_TILING_GEN9_Ys);
> +   CHOOSE(ISL_TILING_GEN10_Yf);
> CHOOSE(ISL_TILING_GEN9_Yf);
> CHOOSE(ISL_TILING_Y0);
> CHOOSE(ISL_TILING_X);
> diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
> index 1c7990f2dc7..200bfbfa85b 100644
> --- a/src/intel/isl/isl.h
> +++ b/src/intel/isl/isl.h
> @@ -462,6 +462,8 @@ enum isl_tiling {
> ISL_TILING_Y0, /**< Legacy Y tiling */
> ISL_TILING_GEN9_Yf, /**< Standard 4K tiling. The 'f' means "four". */
> ISL_TILING_GEN9_Ys, /**< Standard 64K tiling. The 's' means "sixty-four". 
> */
> +   ISL_TILING_GEN10_Yf, /**< Standard 4K tiling. The 'f' means "four". */
> +   ISL_TILING_GEN10_Ys, /**< Standard 64K tiling. The 's' means 
> "sixty-four". */
> ISL_TILING_HIZ, /**< Tiling format for HiZ surfaces */
> ISL_TILING_CCS, /**< Tiling format for CCS surfaces */
>  };
> @@ -477,6 +479,8 @@ typedef uint32_t isl_tiling_flags_t;
>  #define ISL_TILING_Y0_BIT (1u << ISL_TILING_Y0)
>  #define ISL_TILING_GEN9_Yf_BIT(1u << ISL_TILING_GEN9_Yf)
>  #define ISL_TILING_GEN9_Ys_BIT(1u << ISL_TILING_GEN9_Ys)
> +#define ISL_TILING_GEN10_Yf_BIT   (1u << ISL_TILING_GEN10_Yf)
> +#define ISL_TILING_GEN10_Ys_BIT   (1u << ISL_TILING_GEN10_Ys)
>  #define ISL_TILING_HIZ_BIT(1u << ISL_TILING_HIZ)
>  #define ISL_TILING_CCS_BIT(1u << ISL_TILING_CCS)
>  #define ISL_TILING_ANY_MASK   (~0u)
> @@ -485,11 +489,15 @@ typedef uint32_t isl_tiling_flags_t;
>  /** Any Y tiling, including legacy Y tiling. */
>  #define ISL_TILING_ANY_Y_MASK (ISL_TILING_Y0_BIT | \
> ISL_TILING_GEN9_Yf_BIT | \
> -   ISL_TILING_GEN9_Ys_BIT)
> +   ISL_TILING_GEN9_Ys_BIT | \
> +   ISL_TILING_GEN10_Yf_BIT | \
> +   ISL_TILING_GEN10_Ys_BIT)
>  
>  /** The Skylake BSpec refers to Yf and Ys as "standard tiling formats". */
>  #define ISL_TILING_STD_Y_MASK (ISL_TILING_GEN9_Yf_BIT | \
> -   ISL_TILING_GEN9_Ys_BIT)
> +   ISL_TILING_GEN9_Ys_BIT | \
> +   ISL_TILING_GEN10_Yf_BIT | \
> +   ISL_TILING_GEN10_Ys_BIT)
>  /** @} */
>  
>  /**
> diff --git a/src/intel/isl/isl_drm.c b/src/intel/isl/isl_drm.c
> index 62fdd22d10d..03f433a1058 100644
> --- a/src/intel/isl/isl_drm.c
> +++ b/src/intel/isl/isl_drm.c
> @@ -46,6 +46,8 @@ isl_tiling_to_i915_tiling(enum isl_tiling tiling)
> case ISL_TILING_W:
> case ISL_TILING_GEN9_Yf:
> case ISL_TILING_GEN9_Ys:
> +   case ISL_TILING_GEN10_Yf:
> +   case ISL_TILING_GEN10_Ys:
> case ISL_TILING_HIZ:
> case ISL_TILING_CCS:
>return I915_TILING_NONE;
> diff --git a/src/intel/isl/isl_gen7.c b/src/intel/isl/isl_gen7.c
> index 91cea299abc..f6f7e1ba7dc 100644
> --- a/src/intel/isl/isl_gen7.c
> +++ b/src/intel/isl/isl_gen7.c
> @@ -197,16 +197,22 @@ isl_gen6_filter_tiling(const struct isl_device *dev,
> assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
>  
> /* Clear flags unsupported on this hardware */
> -   if (ISL_DEV_GEN(dev) < 9) {
> +   if (ISL_DEV_GEN(dev) != 9) {
>*flags &= ~ISL_TILING_GEN9_Yf_BIT;
>*flags &= ~ISL_TILING_GEN9_Ys_BIT;
> }
> +   if (ISL_DEV_GEN(dev) < 10) {
> +  *flags &= ~ISL_TILING_GEN10_Yf_BIT;
> +  *flags &= ~ISL_TILING_GEN10_Ys_BIT;
> +   }
>  
> /* And... clear the Yf and Ys bits anyway because Anvil doesn't support
>  * them yet.
>  */
> *flags &= ~ISL_TILING_GEN9_Yf_BIT; /* FINISHME[SKL]: 

Re: [Mesa-dev] [PATCH v2 07/32] intel/isl: Rename ISL_TILING_Yf/s to ISL_TILING_GEN9_Yf/s

2019-02-01 Thread Nanley Chery
On Wed, Jan 23, 2019 at 02:25:14PM -0800, Nanley Chery wrote:
> On Fri, Oct 12, 2018 at 01:46:37PM -0500, Jason Ekstrand wrote:
> > The Yf and Ys tilings change a bit between gen9 and gen10 so we have to
> > be able to distinguish between them.
> > ---
> >  src/intel/isl/isl.c   | 12 ++--
> >  src/intel/isl/isl.h   | 16 
> >  src/intel/isl/isl_drm.c   |  4 ++--
> >  src/intel/isl/isl_gen7.c  |  8 
> >  src/intel/isl/isl_gen9.c  |  2 +-
> >  src/intel/isl/isl_surface_state.c |  4 ++--
> >  6 files changed, 23 insertions(+), 23 deletions(-)
> > 
> > diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
> > index d6beee987b5..392c15ca3fb 100644
> > --- a/src/intel/isl/isl.c
> > +++ b/src/intel/isl/isl.c
> > @@ -217,9 +217,9 @@ isl_tiling_get_info(enum isl_tiling tiling,
> >phys_B = isl_extent2d(128, 32);
> >break;
> >  
> > -   case ISL_TILING_Yf:
> > -   case ISL_TILING_Ys: {
> > -  bool is_Ys = tiling == ISL_TILING_Ys;
> > +   case ISL_TILING_GEN9_Yf:
> > +   case ISL_TILING_GEN9_Ys: {
> > +  bool is_Ys = tiling == ISL_TILING_GEN9_Ys;
> >  
> >assert(bs > 0);
> >unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
> > @@ -375,8 +375,8 @@ isl_surf_choose_tiling(const struct isl_device *dev,
> >CHOOSE(ISL_TILING_LINEAR);
> > }
> >  
> > -   CHOOSE(ISL_TILING_Ys);
> > -   CHOOSE(ISL_TILING_Yf);
> > +   CHOOSE(ISL_TILING_GEN9_Ys);
> > +   CHOOSE(ISL_TILING_GEN9_Yf);
> > CHOOSE(ISL_TILING_Y0);
> > CHOOSE(ISL_TILING_X);
> > CHOOSE(ISL_TILING_W);
> > @@ -715,7 +715,7 @@ isl_calc_phys_level0_extent_sa(const struct isl_device 
> > *dev,
> >   assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D ||
> >  dim_layout == ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ);
> >  
> > -  if (tiling == ISL_TILING_Ys && info->samples > 1)
> > +  if (tiling == ISL_TILING_GEN9_Ys && info->samples > 1)
> >   isl_finishme("%s:%s: multisample TileYs layout", __FILE__, 
> > __func__);
> >  
> 
> Shouldn't the next patch be updated with a similar change?
> 

This block is never deleted in this series.

> >switch (msaa_layout) {
> > diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
> > index 4f8d38e22fb..1c7990f2dc7 100644
> > --- a/src/intel/isl/isl.h
> > +++ b/src/intel/isl/isl.h
> > @@ -460,8 +460,8 @@ enum isl_tiling {
> > ISL_TILING_W,
> > ISL_TILING_X,
> > ISL_TILING_Y0, /**< Legacy Y tiling */
> > -   ISL_TILING_Yf, /**< Standard 4K tiling. The 'f' means "four". */
> > -   ISL_TILING_Ys, /**< Standard 64K tiling. The 's' means "sixty-four". */
> > +   ISL_TILING_GEN9_Yf, /**< Standard 4K tiling. The 'f' means "four". */
> > +   ISL_TILING_GEN9_Ys, /**< Standard 64K tiling. The 's' means 
> > "sixty-four". */
> > ISL_TILING_HIZ, /**< Tiling format for HiZ surfaces */
> > ISL_TILING_CCS, /**< Tiling format for CCS surfaces */
> >  };
> > @@ -475,8 +475,8 @@ typedef uint32_t isl_tiling_flags_t;
> >  #define ISL_TILING_W_BIT  (1u << ISL_TILING_W)
> >  #define ISL_TILING_X_BIT  (1u << ISL_TILING_X)
> >  #define ISL_TILING_Y0_BIT (1u << ISL_TILING_Y0)
> > -#define ISL_TILING_Yf_BIT (1u << ISL_TILING_Yf)
> > -#define ISL_TILING_Ys_BIT (1u << ISL_TILING_Ys)
> > +#define ISL_TILING_GEN9_Yf_BIT(1u << ISL_TILING_GEN9_Yf)
> > +#define ISL_TILING_GEN9_Ys_BIT(1u << ISL_TILING_GEN9_Ys)
> >  #define ISL_TILING_HIZ_BIT(1u << ISL_TILING_HIZ)
> >  #define ISL_TILING_CCS_BIT(1u << ISL_TILING_CCS)
> >  #define ISL_TILING_ANY_MASK   (~0u)
> > @@ -484,12 +484,12 @@ typedef uint32_t isl_tiling_flags_t;
> >  
> >  /** Any Y tiling, including legacy Y tiling. */
> >  #define ISL_TILING_ANY_Y_MASK (ISL_TILING_Y0_BIT | \
> > -   ISL_TILING_Yf_BIT | \
> > -   ISL_TILING_Ys_BIT)
> > +   ISL_TILING_GEN9_Yf_BIT | \
> > +   ISL_TILING_GEN9_Ys_BIT)
> >  
> >  /** The Skylake BSpec refers to Yf and Ys as "standard tiling formats". */
> > -#define ISL_TILING_STD_Y_MASK (ISL_TILING_Yf_BIT | \
> > -   ISL_TILING_Ys_BIT)
> > +#define ISL_TILING_STD_Y_MASK (ISL_TILING_GEN9_Yf_BIT | \
> > +   ISL_TILING_GEN9_Ys_BIT)
> >  /** @} */
> >  
> >  /**
> > diff --git a/src/intel/isl/isl_drm.c b/src/intel/isl/isl_drm.c
> > index e16d7b63917..62fdd22d10d 100644
> > --- a/src/intel/isl/isl_drm.c
> > +++ b/src/intel/isl/isl_drm.c
> > @@ -44,8 +44,8 @@ isl_tiling_to_i915_tiling(enum isl_tiling tiling)
> >return I915_TILING_Y;
> >  
> > case ISL_TILING_W:
> > -   case ISL_TILING_Yf:
> > -   case ISL_TILING_Ys:
> > +   case ISL_TILING_GEN9_Yf:
> > +   case 

Re: [Mesa-dev] [PATCH 19/19] radv: don't flush src stages when dstStageMask == BOTTOM_OF_PIPE

2019-02-01 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

for the series except 09 where I had comments.

On Tue, Jan 29, 2019 at 10:17 PM Samuel Pitoiset
 wrote:
>
> Original patch by Fredrik Höglund.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 16 +++-
>  src/amd/vulkan/radv_pass.c   |  6 --
>  2 files changed, 19 insertions(+), 3 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index 93982c1232e..3b215b4b103 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -4646,6 +4646,7 @@ struct radv_barrier_info {
> uint32_t eventCount;
> const VkEvent *pEvents;
> VkPipelineStageFlags srcStageMask;
> +   VkPipelineStageFlags dstStageMask;
>  };
>
>  static void
> @@ -4697,7 +4698,19 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer,
> image);
> }
>
> -   radv_stage_flush(cmd_buffer, info->srcStageMask);
> +   /* The Vulkan spec 1.1.98 says:
> +*
> +* "An execution dependency with only
> +*  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in the destination stage mask
> +*  will only prevent that stage from executing in subsequently
> +*  submitted commands. As this stage does not perform any actual
> +*  execution, this is not observable - in effect, it does not delay
> +*  processing of subsequent commands. Similarly an execution 
> dependency
> +*  with only VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage 
> mask
> +*  will effectively not wait for any prior commands to complete."
> +*/
> +   if (info->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
> +   radv_stage_flush(cmd_buffer, info->srcStageMask);
> cmd_buffer->state.flush_bits |= src_flush_bits;
>
> for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
> @@ -4738,6 +4751,7 @@ void radv_CmdPipelineBarrier(
> info.eventCount = 0;
> info.pEvents = NULL;
> info.srcStageMask = srcStageMask;
> +   info.dstStageMask = destStageMask;
>
> radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
>  bufferMemoryBarrierCount, pBufferMemoryBarriers,
> diff --git a/src/amd/vulkan/radv_pass.c b/src/amd/vulkan/radv_pass.c
> index 08ea2454750..7a724dc2da5 100644
> --- a/src/amd/vulkan/radv_pass.c
> +++ b/src/amd/vulkan/radv_pass.c
> @@ -47,11 +47,13 @@ radv_render_pass_add_subpass_dep(struct radv_render_pass 
> *pass,
> dst = 0;
>
> if (dst == VK_SUBPASS_EXTERNAL) {
> -   pass->end_barrier.src_stage_mask |= dep->srcStageMask;
> +   if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
> +   pass->end_barrier.src_stage_mask |= dep->srcStageMask;
> pass->end_barrier.src_access_mask |= dep->srcAccessMask;
> pass->end_barrier.dst_access_mask |= dep->dstAccessMask;
> } else {
> -   pass->subpasses[dst].start_barrier.src_stage_mask |= 
> dep->srcStageMask;
> +   if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
> +   pass->subpasses[dst].start_barrier.src_stage_mask |= 
> dep->srcStageMask;
> pass->subpasses[dst].start_barrier.src_access_mask |= 
> dep->srcAccessMask;
> pass->subpasses[dst].start_barrier.dst_access_mask |= 
> dep->dstAccessMask;
> }
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/cmd_buffer: check for NULL framebuffer

2019-02-01 Thread Jason Ekstrand
On Fri, Feb 1, 2019 at 10:14 AM Juan A. Suarez Romero 
wrote:

> This can happen when we record a VkCmdDraw in a secondary buffer that
> was created inheriting from the primary buffer, but with the framebuffer
> set to NULL in the VkCommandBufferInheritanceInfo.
>
> CC: Jason Ekstrand 
> ---
>  src/intel/vulkan/gen7_cmd_buffer.c | 13 +++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/src/intel/vulkan/gen7_cmd_buffer.c
> b/src/intel/vulkan/gen7_cmd_buffer.c
> index 352892aee33..fe1a47f6ce6 100644
> --- a/src/intel/vulkan/gen7_cmd_buffer.c
> +++ b/src/intel/vulkan/gen7_cmd_buffer.c
> @@ -70,12 +70,21 @@ gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer
> *cmd_buffer)
>};
>
>const int max = 0x;
> +
> +  uint32_t height = 0;
> +  uint32_t width = 0;
> +
> +  if (fb) {
> +height = fb->height;
> +width = fb->width;
> +  }
> +
>struct GEN7_SCISSOR_RECT scissor = {
>   /* Do this math using int64_t so overflow gets clamped
> correctly. */
>   .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max),
>   .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max),
> - .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y +
> s->extent.height - 1, 0, fb->height - 1),
> - .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x +
> s->extent.width - 1, 0, fb->width - 1)
> + .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y +
> s->extent.height - 1, 0, height - 1),
> + .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x +
> s->extent.width - 1, 0, width - 1)
>

If fb == NULL, won't width/height be 0 and this be -1 and we end up
clamping to -1?  I think we want to rather make the clamp conditional on
having the framebuffer.

--Jason


>};
>
>if (s->extent.width <= 0 || s->extent.height <= 0) {
> --
> 2.20.1
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109107] gallium/st/va: change va max_profiles when using Radeon VCN Hardware

2019-02-01 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109107

--- Comment #2 from Michael Eagle  ---
Hi,

I am not sure if this is related with regards to what is causing hardware
accelerated decoding on my Raven laptop not to work.

Fedora repos:
che-mesa, rpmfusion, fedora-rawhide-kernel-nodebug
Mesa-git installed is build from: sha 9279a28

chromium-vaapi package from rpmfusion repo, started with the following
command:
chromium-vaapi --enable-plugins --enable-extensions --enable-user-scripts
--enable-printing --enable-accelerated-video
--enable-native-gpu-memory-buffers --enable-accelerated-mjpeg-decode
-nable-gpu-rasterization --disable-gpu-driver-bug-workarounds --enable-sync
--flag-switches-begin --enable-accelerated-mjpeg-decode
--enable-accelerated-video --enable-zero-copy --ignore-gpu-blacklist
--enable-features=VizDisplayCompositor --flag-switches-end


chrome://gpu/

Native GpuMemoryBuffers: Hardware accelerated
Hardware Protected Video Decode: Hardware accelerated
Video Decode: Hardware accelerated
WebGL: Hardware accelerated
WebGL2: Hardware accelerated
-

chrome://media-internals/

video_codec_name vp9
video_dds false
video_decoder VpxVideoDecoder

so, as the VpxVideoDecoder, it means the gpu hardware acceleration is not
working.

Log:
[18642:18642:0201/223152.683850:ERROR:vaapi_wrapper.cc(568)] :
vaQueryConfigProfiles returned: 14
[18642:18642:0201/223152.683943:ERROR:vaapi_wrapper.cc(568)] :
vaQueryConfigProfiles returned: 14
[18642:18642:0201/223152.771320:ERROR:sandbox_linux.cc(364)] :
InitializeSandbox() called with multiple threads in process gpu-process.
[18642:18642:0201/224233.934476:ERROR:buffer_manager.cc(491)] :
[GroupMarkerNotSet(crbug.com/242999)!:C0B874BB3939]GL ERROR
:GL_INVALID_OPERATION : glBufferData: <- error from previous GL command

Specs:
I am using HP 15-cp0001na laptop with Ryzen 2700u:

inxi -G
Graphics:  Device-1: AMD Raven Ridge [Radeon Vega Series / Radeon Vega
Mobile Series] driver: amdgpu v: kernel
Display: x11 server: Fedora Project X.org 1.20.3 driver: amdgpu tty: N/A
OpenGL: renderer: AMD RAVEN (DRM 3.27.0 5.0.0-0.rc4.git0.1.fc30.x86_64 LLVM
9.0.0) v: 4.5 Mesa 19.1.0-devel
--
dmesg
[3.119158] amdgpu :04:00.0: ring vcn_dec uses VM inv eng 1 on hub 1
[3.119166] amdgpu :04:00.0: ring vcn_enc0 uses VM inv eng 4 on hub 1
[3.119170] amdgpu :04:00.0: ring vcn_enc1 uses VM inv eng 5 on hub 1
[3.119173] amdgpu :04:00.0: ring vcn_jpeg uses VM inv eng 6 on hub 1

vainfo

libva info: VA-API version 1.3.0
libva info: va_getDriverName() returns 0
libva info: Trying to open /usr/lib64/dri/radeonsi_drv_video.so
libva info: Found init function __vaDriverInit_1_3
libva info: va_openDriver() returns 0
vainfo: VA-API version: 1.3 (libva 2.3.0)
vainfo: Driver version: Mesa Gallium driver 19.1.0-devel for AMD RAVEN (DRM
3.27.0, 5.0.0-0.rc4.git0.1.fc30.x86_64, LLVM 9.0.0)
vainfo: Supported profile and entrypoints
  VAProfileMPEG2Simple: VAEntrypointVLD
  VAProfileMPEG2Main  : VAEntrypointVLD
  VAProfileVC1Simple  : VAEntrypointVLD
  VAProfileVC1Main: VAEntrypointVLD
  VAProfileVC1Advanced: VAEntrypointVLD
  VAProfileH264ConstrainedBaseline: VAEntrypointVLD
  VAProfileH264ConstrainedBaseline: VAEntrypointEncSlice
  VAProfileH264Main   : VAEntrypointVLD
  VAProfileH264Main   : VAEntrypointEncSlice
  VAProfileH264High   : VAEntrypointVLD
  VAProfileH264High   : VAEntrypointEncSlice
  VAProfileHEVCMain   : VAEntrypointVLD
  VAProfileHEVCMain   : VAEntrypointEncSlice
  VAProfileHEVCMain10 : VAEntrypointVLD
  VAProfileJPEGBaseline   : VAEntrypointVLD
  VAProfileVP9Profile0: VAEntrypointVLD
  VAProfileVP9Profile2: VAEntrypointVLD
  VAProfileNone   : VAEntrypointVideoProc

Possibly related:
https://sea-region.github.com/saiarcot895/chromium-ubuntu-build/issues/39

Thanks in advance!



On Fri, Dec 21, 2018 at 3:19 AM  wrote:

> *Comment # 1  on
> bug 109107  from
> zhoulei  *
>
> More details:
>
> In function 
> VASupportedProfiles::GetSupportedVAProfiles,https://github.com/chromium/chromium/blob/master/media/gpu/vaapi/vaapi_wrapper.cc#L571
>
> const int max_profiles = vaMaxNumProfiles(va_display_);
> vaMaxNumProfiles will return (PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH -
> PIPE_VIDEO_PROFILE_UNKNOWN) in mesa.
>
>  int num_supported_profiles;
>   VAStatus va_res = vaQueryConfigProfiles(va_display_, _profiles[0],
>   _supported_profiles);
>   VA_SUCCESS_OR_RETURN(va_res, "vaQueryConfigProfiles failed", false);
>   if (num_supported_profiles < 0 || num_supported_profiles > max_profiles) {
> LOG(ERROR) 

Re: [Mesa-dev] [PATCH] meson: drop the xcb-xrandr version requirement

2019-02-01 Thread Marek Olšák
If there is no feedback soon, I'll push this.

Marek

On Wed, Jan 30, 2019 at 12:44 PM Erik Faye-Lund <
erik.faye-l...@collabora.com> wrote:

> On Wed, 2019-01-30 at 12:32 -0500, Marek Olšák wrote:
> > ping
> >
>
> Probably worth including Keith, who added this line...
>
> But yeah, I tend to think that this makes sense. The autotools-build
> doesn't seem to tie this to a specific version, and seems to have been
> used without problems for almost a year. But let's hear what Keith
> things first...
>
> > On Fri, Jan 25, 2019 at 5:48 PM Marek Olšák  wrote:
> > > From: Marek Olšák 
> > >
> > > autotools doesn't have any requirement. This fixes meson on Ubuntu
> > > 16.04.
> > > ---
> > >  meson.build | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/meson.build b/meson.build
> > > index 34e2a032548..7f16c3070fe 100644
> > > --- a/meson.build
> > > +++ b/meson.build
> > > @@ -1389,21 +1389,21 @@ if with_platform_x11
> > >dep_xxf86vm = dependency('xxf86vm')
> > >  endif
> > >  dep_glproto = dependency('glproto', version : '>= 1.4.14')
> > >endif
> > >if (with_egl or (
> > >with_gallium_vdpau or with_gallium_xvmc or with_gallium_xa
> > > or
> > >with_gallium_omx != 'disabled'))
> > >  dep_xcb_xfixes = dependency('xcb-xfixes')
> > >endif
> > >if with_xlib_lease
> > > -dep_xcb_xrandr = dependency('xcb-randr', version : '>= 1.12')
> > > +dep_xcb_xrandr = dependency('xcb-randr')
> > >  dep_xlib_xrandr = dependency('xrandr', version : '>= 1.3')
> > >endif
> > >  endif
> > >
> > >  if get_option('gallium-extra-hud')
> > >pre_args += '-DHAVE_GALLIUM_EXTRA_HUD=1'
> > >  endif
> > >
> > >  _sensors = get_option('lmsensors')
> > >  if _sensors != 'false'
> > > ___
> > > mesa-dev mailing list
> > > mesa-dev@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] gallium\auxiliary\vl: Move dirty define to header file

2019-02-01 Thread James Zhu
Thanks for point it out!

James

On 2019-02-01 2:33 p.m., Matt Turner wrote:
> My OCD is really bothered by the backslashes in the commit title. Can
> we use forward slashes like all the other commits?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] gallium\auxiliary\vl: Move dirty define to header file

2019-02-01 Thread Matt Turner
My OCD is really bothered by the backslashes in the commit title. Can
we use forward slashes like all the other commits?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/compiler: update validator to account for half-float exec type promotion

2019-02-01 Thread Francisco Jerez
Iago Toral  writes:

> On Fri, 2019-01-25 at 12:54 -0800, Francisco Jerez wrote:
>> Iago Toral  writes:
>> 
>> > On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
>> > > Iago Toral  writes:
>> > > 
>> > > > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
>> > > > > Iago Toral Quiroga  writes:
>> > > > > 
>> > > > > > Commit c84ec70b3a72 implemented execution type promotion to
>> > > > > > 32-
>> > > > > > bit
>> > > > > > for
>> > > > > > conversions involving half-float registers, which empirical
>> > > > > > testing
>> > > > > > suggested
>> > > > > > was required, but it did not incorporate this change into
>> > > > > > the
>> > > > > > assembly validator
>> > > > > > logic. This commits adds that, preventing validation errors
>> > > > > > like
>> > > > > > this:
>> > > > > > 
>> > > > > 
>> > > > > I don't think we should be validating empirical assumptions
>> > > > > in
>> > > > > the EU
>> > > > > validator.
>> > > > 
>> > > > I am not sure I get your point, isn't c84ec70b3a72 also based
>> > > > on
>> > > > empirical testing after all?
>> > > > 
>> > > 
>> > > To some extent, but it doesn't attempt to enforce ISA
>> > > restrictions
>> > > based
>> > > on information obtained empirically.
>> > > 
>> > > > 
>> > > > > > mov(16)  g9<4>B   g3<16,8,2>HF { align1 1H };
>> > > > > > ERROR: Destination stride must be equal to the ratio of the
>> > > > > > sizes
>> > > > > > of the
>> > > > > >execution data type to the destination type
>> > > > > > 
>> > > > > > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to
>> > > > > > 32-bit
>> > > > > > when any half-float conversion is needed."
>> > > > > 
>> > > > > I don't think this "fixes" anything that ever worked.
>> > > > 
>> > > > It is true that the code in that trace above is not something
>> > > > we
>> > > > can
>> > > > produce right now, because it is a conversion from HF to B and
>> > > > that
>> > > > should only happen within the context of
>> > > > VK_KHR_shader_float16_int8,
>> > > > however, this is a consequence of the fact that since
>> > > > c84ec70b3a72
>> > > > there is an inconsistency between what we do at the IR level
>> > > > regarding
>> > > > execution size of HF conversions and what the EU validator is
>> > > > doing,
>> > > > and from that perspective this is really fixing an
>> > > > inconsistency
>> > > > that
>> > > > didn't exist before, and I thought we would want to address
>> > > > that
>> > > > sooner
>> > > > rather than later and track it down to the original change that
>> > > > introduced that inconsistency so we know where this is coming
>> > > > from.
>> > > > 
>> > > 
>> > > The "inconsistency" between the IR's get_exec_type() and the EU
>> > > validator's execution_type() has existed ever since
>> > > a05b6f25bf4bfad7
>> > > removed the HF assert from get_exec_type() without actually
>> > > implementing
>> > > the code required to handle HF operands (which is what my commit
>> > > c84ec70b3a72 did).
>> > 
>> > I agree with the fact that since a05b6f25bf4bfad7 the validator
>> > could
>> > reject valid code and that had nothing to do with your patch,
>> 
>> The validator rejected the same valid HF code since it was written,
>> that
>> had nothing to do with neither a05b6f25bf4bfad7 nor with my patch,
>> and
>> it is the real problem this patch was working around.
>> 
>> > but the inconsistency I am talking about here, that this patch
>> > fixes,
>> > is the one about get_exec_type() in the IR and execution_type() in
>> > the
>> > validator doing different things for HF instructions, which only
>> > exists since your patch and which you discuss below.
>> > 
>> 
>> The "inconsistency" exists ever since get_exec_type() was introduced
>> without correct handling of HF types (even though execution_type()
>> already attempted to handle it).  And I disagree that it's a real
>> inconsistency except due to the fact that the validator is
>> incorrectly
>> attempting to validate the alignment of the destination region
>> according
>> to a rule that doesn't apply to HF types.
>> 
>> > > > Anyway, that was my rationale for the Fixes tag, but if you
>> > > > think
>> > > > this
>> > > > is not useful I am happy to drop this patch and just include it
>> > > > as
>> > > > part
>> > > > of my series without the tag.
>> > > > 
>> > > 
>> > > I'd like to see the actual regioning restrictions for HF types
>> > > implemented in the EU validator as part of your series.
>> > 
>> > Ok, let's see if we can agree on what restrictions should we
>> > implement
>> > then. I can implement this restriction as documented:
>> > 
>> > "Conversion between Integer and HF (Half Float) must be DWord-
>> > aligned
>> > and strided by a DWord on the destination"
>> > 
>> > Instead of trying to apply the general one that is currently
>> > breaking.
>> > That will fix the assertion issue. I guess my issue with it was
>> > that
>> > going by your analysis this restriction is not telling the full
>> > picture, this is what 

Re: [Mesa-dev] [PATCH 6/6] glsl/linker: check for xfb_offset aliasing

2019-02-01 Thread Ilia Mirkin
This causes a O(N^2) behavior in the number of feedback'd variables,
which I suspect is undesirable. Elsewhere we use arrays and/or maps --
since the size of the xfb offset has a max value, an array-based
solution may work well.

On Fri, Feb 1, 2019 at 1:08 PM Andres Gomez  wrote:
>
> From page 76 (page 80 of the PDF) of the GLSL 4.60 v.5 spec:
>
>   " No aliasing in output buffers is allowed: It is a compile-time or
> link-time error to specify variables with overlapping transform
> feedback offsets."
>
> Currently, this is expected to fail, but it succeeds:
>
>   "
>
> ...
>
> layout (xfb_offset = 0) out vec2 a;
> layout (xfb_offset = 0) out vec4 b;
>
> ...
>
>   "
>
> Cc: Timothy Arceri 
> Signed-off-by: Andres Gomez 
> ---
>  src/compiler/glsl/link_varyings.cpp | 23 +++
>  1 file changed, 23 insertions(+)
>
> diff --git a/src/compiler/glsl/link_varyings.cpp 
> b/src/compiler/glsl/link_varyings.cpp
> index ab66ceb0d00..28e47f15733 100644
> --- a/src/compiler/glsl/link_varyings.cpp
> +++ b/src/compiler/glsl/link_varyings.cpp
> @@ -1173,6 +1173,29 @@ tfeedback_decl::store(struct gl_context *ctx, struct 
> gl_shader_program *prog,
>unsigned location = this->location;
>unsigned location_frac = this->location_frac;
>unsigned num_components = this->num_components();
> +
> +  /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout 
> Qualifiers,
> +   * Page 76, (Transform Feedback Layout Qualifiers):
> +   *
> +   * "No aliasing in output buffers is allowed: It is a compile-time or
> +   *  link-time error to specify variables with overlapping transform
> +   *  feedback offsets."
> +   */
> +  for (unsigned i = 0; i < info->NumOutputs; i++) {
> + const struct gl_transform_feedback_output  = 
> info->Outputs[i];
> +
> + if (output.OutputBuffer != buffer)
> +continue;
> +
> + if ((output.DstOffset < xfb_offset + num_components) &&
> + (output.DstOffset + output.NumComponents > xfb_offset)) {
> +linker_error(prog,
> + "variable '%s', xfb_offset (%d) is causing 
> aliasing.",
> + this->orig_name, xfb_offset * 4);
> +return false;
> + }
> +  }
> +
>while (num_components > 0) {
>   unsigned output_size = MIN2(num_components, 4 - location_frac);
>   assert((info->NumOutputs == 0 && max_outputs == 0) ||
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/6] glsl/linker: simplify xfb_offset vs xfb_stride overflow check

2019-02-01 Thread Ilia Mirkin
On Fri, Feb 1, 2019 at 1:08 PM Andres Gomez  wrote:
>
> Current implementation uses a complicated calculation which relies in
> an implicit conversion to check the integral part of 2 division
> results.
>
> However, the calculation actually checks that the xfb_offset is
> smaller or a multiplier of the xfb_stride. For example, while this is
> expected to fail, it actually succeeds:
>
>   "
>
> ...
>
> layout(xfb_buffer = 2, xfb_stride = 12) out block3 {
>   layout(xfb_offset = 0) vec3 c;
>   layout(xfb_offset = 12) vec3 d; // ERROR, requires stride of 24
> };
>
> ...
>
>   "
>
> Fixes: 2fab85aaea5 ("glsl: add xfb_stride link time validation")
> Cc: Timothy Arceri 
> Signed-off-by: Andres Gomez 
> ---
>  src/compiler/glsl/link_varyings.cpp | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/src/compiler/glsl/link_varyings.cpp 
> b/src/compiler/glsl/link_varyings.cpp
> index 6cebc5b3c5a..ab66ceb0d00 100644
> --- a/src/compiler/glsl/link_varyings.cpp
> +++ b/src/compiler/glsl/link_varyings.cpp
> @@ -1213,8 +1213,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct 
> gl_shader_program *prog,
>   return false;
>}
>
> -  if ((this->offset / 4) / info->Buffers[buffer].Stride !=
> -  (xfb_offset - 1) / info->Buffers[buffer].Stride) {
> +  if (xfb_offset > info->Buffers[buffer].Stride) {

>= ?

>   linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
>"buffer (%d)", xfb_offset * 4,
>info->Buffers[buffer].Stride * 4, buffer);
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 1/6] glsl/linker: location aliasing requires types to have the same width

2019-02-01 Thread Andres Gomez
From: Iago Toral Quiroga 

Regarding location aliasing requirements, the OpenGL spec says:

  "Further, when location aliasing, the aliases sharing the location
   must have the same underlying numerical type  (floating-point or
   integer)."

Khronos has further clarified that this also requires the underlying
types to have the same width, so we can't put a float and a double
in the same location slot for example. Future versions of the spec will
be corrected to make this clear.

This patch amends our implementation to account for this restriction.

In the process of doing this, I also noticed that we would attempt
to check aliasing requirements for record variables (including the test
for the numerical type) which is not allowed, instead, we should be
producing a linker error as soon as we see any attempt to do location
aliasing on non-numerical variables. For the particular case of structs,
we were producing a linker error in this case, but only because we
assumed that struct fields use all components in each location, so
any attempt to alias locations consumed by struct fields would produce
a link error due to component aliasing, which is not accurate of the
actual problem. This patch would make it produce an error for attempting
to alias a non-numerical variable instead, which is always accurate.

v2:
  - Do not assert if we see invalid numerical types. These come
straight from shader code, so we should produce linker errors if
shaders attempt to do location aliasing on variables that are not
numerical such as records.
  - While we are at it, improve error reporting for the case of
numerical type mismatch to include the shader stage.

v3:
  - Allow location aliasing of images and samplers. If we get these
it means bindless support is active and they should be handled
as 64-bit integers (Ilia)
  - Make sure we produce link errors for any non-numerical type
for which we attempt location aliasing, not just structs.

v4:
  - Rebased with minor fixes (Andres).
  - Added fixing tag to the commit log (Andres).

Fixes: 13652e7516a ("glsl/linker: Fix type checks for location aliasing")
Cc: Ilia Mirkin 
Signed-off-by: Andres Gomez 
---
 src/compiler/glsl/link_varyings.cpp | 64 +
 1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 3969c0120b3..3f41832ac93 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -424,15 +424,15 @@ compute_variable_location_slot(ir_variable *var, 
gl_shader_stage stage)
 
 struct explicit_location_info {
ir_variable *var;
-   unsigned numerical_type;
+   int numerical_type;
unsigned interpolation;
bool centroid;
bool sample;
bool patch;
 };
 
-static inline unsigned
-get_numerical_type(const glsl_type *type)
+static inline int
+get_numerical_sized_type(const glsl_type *type)
 {
/* From the OpenGL 4.6 spec, section 4.4.1 Input Layout Qualifiers, Page 68,
 * (Location aliasing):
@@ -440,10 +440,25 @@ get_numerical_type(const glsl_type *type)
 *"Further, when location aliasing, the aliases sharing the location
 * must have the same underlying numerical type  (floating-point or
 * integer)
+*
+* Khronos has further clarified that this also requires the underlying
+* types to have the same width, so we can't put a float and a double
+* in the same location slot for example. Future versions of the spec will
+* be corrected to make this clear.
+*
+* Notice that we allow location aliasing for bindless image/samplers too
+* since these are defined as 64-bit integers.
 */
-   if (type->is_float() || type->is_double())
+   if (type->is_float())
   return GLSL_TYPE_FLOAT;
-   return GLSL_TYPE_INT;
+   else if (type->is_integer())
+  return GLSL_TYPE_INT;
+   else if (type->is_double())
+  return GLSL_TYPE_DOUBLE;
+   else if (type->is_integer_64() || type->is_sampler() || type->is_image())
+  return GLSL_TYPE_INT64;
+
+   return -1; /* Not a numerical type */
 }
 
 static bool
@@ -461,14 +476,17 @@ check_location_aliasing(struct explicit_location_info 
explicit_locations[][4],
 gl_shader_stage stage)
 {
unsigned last_comp;
-   if (type->without_array()->is_record()) {
-  /* The component qualifier can't be used on structs so just treat
-   * all component slots as used.
+   const glsl_type *type_without_array = type->without_array();
+   int numerical_type = get_numerical_sized_type(type_without_array);
+   if (-1 == numerical_type) {
+  /* The component qualifier can't be used on non-numerical types so just
+   * treat all component slots as used. This will also make it so that
+   * any location aliasing attempt on non-numerical types is detected.
*/
   last_comp = 4;
} else {
-  unsigned dmul = type->without_array()->is_64bit() ? 2 : 1;

[Mesa-dev] [PATCH 4/6] glsl/linker: don't fail non static used inputs without matching outputs

2019-02-01 Thread Andres Gomez
If there is no Static Use of an input variable, the linker shouldn't
fail whenever there is no defined matching output variable in the
previous stage.

From page 47 (page 51 of the PDF) of the GLSL 4.60 v.5 spec:

  " Only the input variables that are statically read need to be
written by the previous stage; it is allowed to have superfluous
declarations of input variables."

Now, we complete this exception whenever the input variable has an
explicit location. Previously, 18004c338f6 ("glsl: fail when a
shader's input var has not an equivalent out var in previous") took
care of the cases in which the input variable didn't have an explicit
location.

Additionally, likewise 1aa5738e666 ("glsl: relax input->output
validation for SSO programs"), avoid failing also for programs that
utilize GL_ARB_separate_shader_objects.

Cc: Timothy Arceri 
Cc: Iago Toral Quiroga 
Cc: Samuel Iglesias Gonsálvez 
Cc: Tapani Pälli 
Cc: Ian Romanick 
Signed-off-by: Andres Gomez 
---
 src/compiler/glsl/link_varyings.cpp | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index e5f7d3e322a..6cebc5b3c5a 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -808,8 +808,20 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
 
output = 
output_explicit_locations[idx][input->data.location_frac].var;
 
-   if (output == NULL ||
-   input->data.location != output->data.location) {
+   if (output == NULL) {
+  /* A linker failure should only happen when, for programs
+   * not using sso, there is no output declaration and there
+   * is Static Use of the declared input.
+   */
+  if (input->data.used && !prog->SeparateShader) {
+ linker_error(prog,
+  "%s shader input `%s' with explicit location 
"
+  "has no matching output\n",
+  
_mesa_shader_stage_to_string(consumer->Stage),
+  input->name);
+ break;
+  }
+   } else if (input->data.location != output->data.location) {
   linker_error(prog,
"%s shader input `%s' with explicit location "
"has no matching output\n",
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/6] glsl/linker: several arb_enhanced_layouts related fixes

2019-02-01 Thread Andres Gomez
This series addresses the problems tested in the piglit series sent
for review at:
https://patchwork.freedesktop.org/series/56099/

Andres Gomez (5):
  glsl: correctly validate component layout qualifier for dvec{3,4}
  glsl/linker: always validate explicit location among inputs
  glsl/linker: don't fail non static used inputs without matching
outputs
  glsl/linker: simplify xfb_offset vs xfb_stride overflow check
  glsl/linker: check for xfb_offset aliasing

Iago Toral Quiroga (1):
  glsl/linker: location aliasing requires types to have the same width

 src/compiler/glsl/ast_to_hir.cpp|   3 +
 src/compiler/glsl/link_varyings.cpp | 121 ++--
 2 files changed, 98 insertions(+), 26 deletions(-)

-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] glsl: correctly validate component layout qualifier for dvec{3, 4}

2019-02-01 Thread Andres Gomez
From page 62 (page 68 of the PDF) of the GLSL 4.50 v.7 spec:

  " A dvec3 or dvec4 can only be declared without specifying a
component."

Therefore, using the "component" qualifier with a dvec3 or dvec4
should result in a compiling error.

Fixes: 94438578d21 ("glsl: validate and store component layout qualifier in 
GLSL IR")
Cc: Timothy Arceri 
Cc: Kenneth Graunke 
Signed-off-by: Andres Gomez 
---
 src/compiler/glsl/ast_to_hir.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 620153e6a34..6d060fd94cd 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -3698,6 +3698,9 @@ apply_layout_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
 "cannot be applied to a matrix, a structure, "
 "a block, or an array containing any of "
 "these.");
+} else if (components > 4 && type->is_64bit()) {
+   _mesa_glsl_error(loc, state, "component layout qualifier "
+"cannot be applied to dvec3 nor dvec4.");
 } else if (qual_component != 0 &&
 (qual_component + components - 1) > 3) {
_mesa_glsl_error(loc, state, "component overflow (%u > 3)",
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] glsl/linker: check for xfb_offset aliasing

2019-02-01 Thread Andres Gomez
From page 76 (page 80 of the PDF) of the GLSL 4.60 v.5 spec:

  " No aliasing in output buffers is allowed: It is a compile-time or
link-time error to specify variables with overlapping transform
feedback offsets."

Currently, this is expected to fail, but it succeeds:

  "

...

layout (xfb_offset = 0) out vec2 a;
layout (xfb_offset = 0) out vec4 b;

...

  "

Cc: Timothy Arceri 
Signed-off-by: Andres Gomez 
---
 src/compiler/glsl/link_varyings.cpp | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index ab66ceb0d00..28e47f15733 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -1173,6 +1173,29 @@ tfeedback_decl::store(struct gl_context *ctx, struct 
gl_shader_program *prog,
   unsigned location = this->location;
   unsigned location_frac = this->location_frac;
   unsigned num_components = this->num_components();
+
+  /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers,
+   * Page 76, (Transform Feedback Layout Qualifiers):
+   *
+   * "No aliasing in output buffers is allowed: It is a compile-time or
+   *  link-time error to specify variables with overlapping transform
+   *  feedback offsets."
+   */
+  for (unsigned i = 0; i < info->NumOutputs; i++) {
+ const struct gl_transform_feedback_output  = info->Outputs[i];
+
+ if (output.OutputBuffer != buffer)
+continue;
+
+ if ((output.DstOffset < xfb_offset + num_components) &&
+ (output.DstOffset + output.NumComponents > xfb_offset)) {
+linker_error(prog,
+ "variable '%s', xfb_offset (%d) is causing aliasing.",
+ this->orig_name, xfb_offset * 4);
+return false;
+ }
+  }
+
   while (num_components > 0) {
  unsigned output_size = MIN2(num_components, 4 - location_frac);
  assert((info->NumOutputs == 0 && max_outputs == 0) ||
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] glsl/linker: simplify xfb_offset vs xfb_stride overflow check

2019-02-01 Thread Andres Gomez
Current implementation uses a complicated calculation which relies in
an implicit conversion to check the integral part of 2 division
results.

However, the calculation actually checks that the xfb_offset is
smaller or a multiplier of the xfb_stride. For example, while this is
expected to fail, it actually succeeds:

  "

...

layout(xfb_buffer = 2, xfb_stride = 12) out block3 {
  layout(xfb_offset = 0) vec3 c;
  layout(xfb_offset = 12) vec3 d; // ERROR, requires stride of 24
};

...

  "

Fixes: 2fab85aaea5 ("glsl: add xfb_stride link time validation")
Cc: Timothy Arceri 
Signed-off-by: Andres Gomez 
---
 src/compiler/glsl/link_varyings.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 6cebc5b3c5a..ab66ceb0d00 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -1213,8 +1213,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct 
gl_shader_program *prog,
  return false;
   }
 
-  if ((this->offset / 4) / info->Buffers[buffer].Stride !=
-  (xfb_offset - 1) / info->Buffers[buffer].Stride) {
+  if (xfb_offset > info->Buffers[buffer].Stride) {
  linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
   "buffer (%d)", xfb_offset * 4,
   info->Buffers[buffer].Stride * 4, buffer);
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] glsl/linker: always validate explicit location among inputs

2019-02-01 Thread Andres Gomez
Outputs are always validated when having explicit locations and we
were trusting its outcome to catch similar problems with the inputs
since, in case of having undefined outputs for existing inputs, we
would be already reporting a linker error.

However, consider this case:

  " Shader stage n:
---

...

layout(location = 0) out float a;

...

Shader stage n+1:
-

...

layout(location = 0) in float b;
layout(location = 0) in float c;

...
  "

Currently, this won't report a linker error even though location
aliasing is happening for the inputs.

Therefore, we also need to validate the inputs independently from the
outcome of the outputs validation.

Cc: Timothy Arceri 
Cc: Iago Toral Quiroga 
Cc: Ilia Mirkin 
Signed-off-by: Andres Gomez 
---
 src/compiler/glsl/link_varyings.cpp | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 3f41832ac93..e5f7d3e322a 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -715,7 +715,8 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
  gl_linked_shader *consumer)
 {
glsl_symbol_table parameters;
-   struct explicit_location_info explicit_locations[MAX_VARYING][4] = { 0 };
+   struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = { 
0 };
+   struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = { 
0 };
 
/* Find all shader outputs in the "producer" stage.
 */
@@ -733,7 +734,7 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
   * differently because they do not need to have matching names.
   */
  if (!validate_explicit_variable_location(ctx,
-  explicit_locations,
+  output_explicit_locations,
   var, prog, producer)) {
 return;
  }
@@ -791,6 +792,12 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
compute_variable_location_slot(input, consumer->Stage);
 unsigned slot_limit = idx + num_elements;
 
+if (!validate_explicit_variable_location(ctx,
+ input_explicit_locations,
+ input, prog, consumer)) {
+   return;
+}
+
 while (idx < slot_limit) {
if (idx >= MAX_VARYING) {
   linker_error(prog,
@@ -799,7 +806,7 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
   return;
}
 
-   output = explicit_locations[idx][input->data.location_frac].var;
+   output = 
output_explicit_locations[idx][input->data.location_frac].var;
 
if (output == NULL ||
input->data.location != output->data.location) {
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/fs: Fix memory corruption when compiling a CS

2019-02-01 Thread Kenneth Graunke
On Saturday, January 26, 2019 7:47:42 AM PST Oscar Blumberg wrote:
> Missing check for shader stage in the fs_visitor would corrupt the
> cs_prog_data.push information and trigger crashes / corruption later
> when uploading the CS state.
> ---
>  src/intel/compiler/brw_fs_nir.cpp | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_nir.cpp 
> b/src/intel/compiler/brw_fs_nir.cpp
> index bdc883e53..21b03a089 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -3779,8 +3779,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
> nir_intrinsic_instr *instr
>   BRW_REGISTER_TYPE_UD);
>const fs_reg data = retype(get_nir_src(instr->src[2]),
>   BRW_REGISTER_TYPE_UD);
> -
> -  brw_wm_prog_data(prog_data)->has_side_effects = true;
> +  
> +  if (stage == MESA_SHADER_FRAGMENT)
> + brw_wm_prog_data(prog_data)->has_side_effects = true;
>  
>emit_untyped_write(bld, image, addr, data, 1,
>   instr->num_components);
> 

Good catch, thanks so much for the fix!  I'm running it through a quick
set of testing (though it looks obviously correct), assuming that all
comes back green I'll push this shortly.

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] gallium\auxiliary\vl: Add video compute shader render

2019-02-01 Thread James Zhu

On 2019-02-01 11:38 a.m., Christian König wrote:
> Am 01.02.19 um 17:28 schrieb Zhu, James:
>> Add video compute shader render. export CS_COMPOSITOR_RENDER=true
>> to enable video compute shader render.
>
> Ok that actually makes more sense, but I would either put everything 
> into one file or cleanly separate between gfx and compute implementation.

Don't want to jump too far at first. Plan to do in the future.

James

>
> Christian.
>
>>
>> Signed-off-by: James Zhu 
>> ---
>>   src/gallium/auxiliary/vl/vl_compositor.c | 19 +--
>>   1 file changed, 17 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
>> b/src/gallium/auxiliary/vl/vl_compositor.c
>> index 7ee8402..66a8fc9 100644
>> --- a/src/gallium/auxiliary/vl/vl_compositor.c
>> +++ b/src/gallium/auxiliary/vl/vl_compositor.c
>> @@ -1376,8 +1376,8 @@ vl_compositor_convert_rgb_to_yuv(struct 
>> vl_compositor_state *s,
>>  s->pipe->flush(s->pipe, NULL, 0);
>>   }
>>   -void
>> -vl_compositor_render(struct vl_compositor_state *s,
>> +static void
>> +vl_compositor_gfx_render(struct vl_compositor_state *s,
>>    struct vl_compositor   *c,
>>    struct pipe_surface    *dst_surface,
>>    struct u_rect  *dirty_area,
>> @@ -1419,6 +1419,21 @@ vl_compositor_render(struct 
>> vl_compositor_state *s,
>>  draw_layers(c, s, dirty_area);
>>   }
>>   +void
>> +vl_compositor_render(struct vl_compositor_state *s,
>> + struct vl_compositor   *c,
>> + struct pipe_surface    *dst_surface,
>> + struct u_rect  *dirty_area,
>> + bool    clear_dirty)
>> +{
>> +   assert(s);
>> +
>> +   if (cs_compositor_render_enable && s->layers->cs)
>> +  vl_compositor_cs_render(s, c, dst_surface, dirty_area, 
>> clear_dirty);
>> +   else
>> +  vl_compositor_gfx_render(s, c, dst_surface, dirty_area, 
>> clear_dirty);
>> +}
>> +
>>   bool
>>   vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe)
>>   {
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] gallium\auxiliary\vl: Move dirty define to header file

2019-02-01 Thread James Zhu

On 2019-02-01 11:34 a.m., Christian König wrote:
> Am 01.02.19 um 17:28 schrieb Zhu, James:
>> Move dirty define to header file to share with compute shader.
>>
>> Signed-off-by: James Zhu 
>> ---
>>   src/gallium/auxiliary/vl/vl_compositor.c | 3 ---
>>   src/gallium/auxiliary/vl/vl_compositor.h | 2 ++
>>   2 files changed, 2 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
>> b/src/gallium/auxiliary/vl/vl_compositor.c
>> index 159a295..2c6d585 100644
>> --- a/src/gallium/auxiliary/vl/vl_compositor.c
>> +++ b/src/gallium/auxiliary/vl/vl_compositor.c
>> @@ -42,9 +42,6 @@
>>   #include "vl_types.h"
>>   #include "vl_compositor.h"
>>   -#define MIN_DIRTY (0)
>> -#define MAX_DIRTY (1 << 15)
>> -
>>   enum VS_OUTPUT
>>   {
>>  VS_O_VPOS = 0,
>> diff --git a/src/gallium/auxiliary/vl/vl_compositor.h 
>> b/src/gallium/auxiliary/vl/vl_compositor.h
>> index 8819176..d51b5f5 100644
>> --- a/src/gallium/auxiliary/vl/vl_compositor.h
>> +++ b/src/gallium/auxiliary/vl/vl_compositor.h
>> @@ -44,6 +44,8 @@ struct pipe_context;
>>    */
>>     #define VL_COMPOSITOR_MAX_LAYERS 16
>> +#define MIN_DIRTY (0)
>> +#define MAX_DIRTY (1 << 15)
>
> That needs a proper prefix.
>
> E.g. put VL_COMPOSITOR_ in front of the name and rename all usages.
Sure. James
>
> Christian.
>
>>     /* deinterlace allgorithem */
>>   enum vl_compositor_deinterlace
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] gallium\auxiliary\vl: Add video compute shader render

2019-02-01 Thread Christian König

Am 01.02.19 um 17:28 schrieb Zhu, James:

Add video compute shader render. export CS_COMPOSITOR_RENDER=true
to enable video compute shader render.


Ok that actually makes more sense, but I would either put everything 
into one file or cleanly separate between gfx and compute implementation.


Christian.



Signed-off-by: James Zhu 
---
  src/gallium/auxiliary/vl/vl_compositor.c | 19 +--
  1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
b/src/gallium/auxiliary/vl/vl_compositor.c
index 7ee8402..66a8fc9 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -1376,8 +1376,8 @@ vl_compositor_convert_rgb_to_yuv(struct 
vl_compositor_state *s,
 s->pipe->flush(s->pipe, NULL, 0);
  }
  
-void

-vl_compositor_render(struct vl_compositor_state *s,
+static void
+vl_compositor_gfx_render(struct vl_compositor_state *s,
   struct vl_compositor   *c,
   struct pipe_surface*dst_surface,
   struct u_rect  *dirty_area,
@@ -1419,6 +1419,21 @@ vl_compositor_render(struct vl_compositor_state *s,
 draw_layers(c, s, dirty_area);
  }
  
+void

+vl_compositor_render(struct vl_compositor_state *s,
+ struct vl_compositor   *c,
+ struct pipe_surface*dst_surface,
+ struct u_rect  *dirty_area,
+ boolclear_dirty)
+{
+   assert(s);
+
+   if (cs_compositor_render_enable && s->layers->cs)
+  vl_compositor_cs_render(s, c, dst_surface, dirty_area, clear_dirty);
+   else
+  vl_compositor_gfx_render(s, c, dst_surface, dirty_area, clear_dirty);
+}
+
  bool
  vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe)
  {


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] gallium\auxiliary\vl: Add compute shader to support video compositor render

2019-02-01 Thread Christian König

Am 01.02.19 um 17:28 schrieb Zhu, James:

Add compute shader to support video compositor render.


I don't think that this is actually a good approach.

It adds a second implementation of the compositor instead of adapting 
the original one to use compute shaders when available.


Christian.



Signed-off-by: James Zhu 
---
  src/gallium/auxiliary/Makefile.sources  |   2 +
  src/gallium/auxiliary/meson.build   |   2 +
  src/gallium/auxiliary/vl/vl_compositor_cs.c | 414 
  src/gallium/auxiliary/vl/vl_compositor_cs.h |  56 
  4 files changed, 474 insertions(+)
  create mode 100644 src/gallium/auxiliary/vl/vl_compositor_cs.c
  create mode 100644 src/gallium/auxiliary/vl/vl_compositor_cs.h

diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index 50e8808..df000f6 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -348,6 +348,8 @@ VL_SOURCES := \
vl/vl_bicubic_filter.h \
vl/vl_compositor.c \
vl/vl_compositor.h \
+   vl/vl_compositor_cs.c \
+   vl/vl_compositor_cs.h \
vl/vl_csc.c \
vl/vl_csc.h \
vl/vl_decoder.c \
diff --git a/src/gallium/auxiliary/meson.build 
b/src/gallium/auxiliary/meson.build
index 57f7e69..74e4b48 100644
--- a/src/gallium/auxiliary/meson.build
+++ b/src/gallium/auxiliary/meson.build
@@ -445,6 +445,8 @@ files_libgalliumvl = files(
'vl/vl_bicubic_filter.h',
'vl/vl_compositor.c',
'vl/vl_compositor.h',
+  'vl/vl_compositor_cs.c',
+  'vl/vl_compositor_cs.h',
'vl/vl_csc.c',
'vl/vl_csc.h',
'vl/vl_decoder.c',
diff --git a/src/gallium/auxiliary/vl/vl_compositor_cs.c 
b/src/gallium/auxiliary/vl/vl_compositor_cs.c
new file mode 100644
index 000..3cd1a76
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_compositor_cs.c
@@ -0,0 +1,414 @@
+/**
+ *
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: James Zhu 
+ *
+ **/
+
+#include 
+
+#include "tgsi/tgsi_text.h"
+#include "vl_compositor_cs.h"
+
+struct cs_viewport {
+   float scale_x;
+   float scale_y;
+   int translate_x;
+   int translate_y;
+   struct u_rect area;
+};
+
+char *compute_shader_video_buffer =
+  "COMP\n"
+  "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
+  "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
+  "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
+
+  "DCL SV[0], THREAD_ID\n"
+  "DCL SV[1], BLOCK_ID\n"
+
+  "DCL CONST[0..5]\n"
+  "DCL SVIEW[0..2], RECT, FLOAT\n"
+  "DCL SAMP[0..2]\n"
+
+  "DCL IMAGE[0], 2D, WR\n"
+  "DCL TEMP[0..7]\n"
+
+  "IMM[0] UINT32 { 8, 8, 1, 0}\n"
+  "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
+
+  "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
+
+  /* Drawn area check */
+  "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
+  "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
+  "AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
+  "AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
+  "AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
+
+  "UIF TEMP[1]\n"
+ /* Translate */
+ "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
+ "U2F TEMP[2], TEMP[2]\n"
+ "DIV TEMP[3], TEMP[2], IMM[1].\n"
+
+ /* Scale */
+ "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
+ "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"
+
+ /* Fetch texels */
+ "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"
+ "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"
+ "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"
+
+ "MOV TEMP[4].w, IMM[1].\n"
+
+ /* Color Space Conversion */
+ "DP4 TEMP[7].x, CONST[0], TEMP[4]\n"
+ "DP4 TEMP[7].y, 

Re: [Mesa-dev] [PATCH 1/6] gallium\auxiliary\vl: Move dirty define to header file

2019-02-01 Thread Christian König

Am 01.02.19 um 17:28 schrieb Zhu, James:

Move dirty define to header file to share with compute shader.

Signed-off-by: James Zhu 
---
  src/gallium/auxiliary/vl/vl_compositor.c | 3 ---
  src/gallium/auxiliary/vl/vl_compositor.h | 2 ++
  2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
b/src/gallium/auxiliary/vl/vl_compositor.c
index 159a295..2c6d585 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -42,9 +42,6 @@
  #include "vl_types.h"
  #include "vl_compositor.h"
  
-#define MIN_DIRTY (0)

-#define MAX_DIRTY (1 << 15)
-
  enum VS_OUTPUT
  {
 VS_O_VPOS = 0,
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h 
b/src/gallium/auxiliary/vl/vl_compositor.h
index 8819176..d51b5f5 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -44,6 +44,8 @@ struct pipe_context;
   */
  
  #define VL_COMPOSITOR_MAX_LAYERS 16

+#define MIN_DIRTY (0)
+#define MAX_DIRTY (1 << 15)


That needs a proper prefix.

E.g. put VL_COMPOSITOR_ in front of the name and rename all usages.

Christian.

  
  /* deinterlace allgorithem */

  enum vl_compositor_deinterlace


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] gallium\auxiliary\vl: Move dirty define to header file

2019-02-01 Thread Zhu, James
Move dirty define to header file to share with compute shader.

Signed-off-by: James Zhu 
---
 src/gallium/auxiliary/vl/vl_compositor.c | 3 ---
 src/gallium/auxiliary/vl/vl_compositor.h | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
b/src/gallium/auxiliary/vl/vl_compositor.c
index 159a295..2c6d585 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -42,9 +42,6 @@
 #include "vl_types.h"
 #include "vl_compositor.h"
 
-#define MIN_DIRTY (0)
-#define MAX_DIRTY (1 << 15)
-
 enum VS_OUTPUT
 {
VS_O_VPOS = 0,
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h 
b/src/gallium/auxiliary/vl/vl_compositor.h
index 8819176..d51b5f5 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -44,6 +44,8 @@ struct pipe_context;
  */
 
 #define VL_COMPOSITOR_MAX_LAYERS 16
+#define MIN_DIRTY (0)
+#define MAX_DIRTY (1 << 15)
 
 /* deinterlace allgorithem */
 enum vl_compositor_deinterlace
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/6] Add compute shader support on video compositor render

2019-02-01 Thread Zhu, James
Initially add compute shader support on video compositor render process.
These patches include only normal video buffer, weave and video sub-pictures
compute shader for video compositor render. export CS_COMPOSITOR_RENDER=true
to enable compuet shader video compositor render.

James Zhu (6):
  gallium\auxiliary\vl: Move dirty define to header file
  gallium\auxiliary\vl: Increase csc_matrix size
  gallium\auxiliary\vl: Add compute shader to support video compositor
render
  gallium\auxiliary\vl: Add compute shader initilization, assign and
cleanup
  gallium\auxiliary\vl: Add debug option CS_COMPOSITOR_RENDER
  gallium\auxiliary\vl: Add video compute shader render

 src/gallium/auxiliary/Makefile.sources  |   2 +
 src/gallium/auxiliary/meson.build   |   2 +
 src/gallium/auxiliary/vl/vl_compositor.c|  59 +++-
 src/gallium/auxiliary/vl/vl_compositor.h|   6 +
 src/gallium/auxiliary/vl/vl_compositor_cs.c | 414 
 src/gallium/auxiliary/vl/vl_compositor_cs.h |  56 
 6 files changed, 532 insertions(+), 7 deletions(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_compositor_cs.c
 create mode 100644 src/gallium/auxiliary/vl/vl_compositor_cs.h

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] gallium\auxiliary\vl: Increase csc_matrix size

2019-02-01 Thread Zhu, James
Increase csc_matrix size to store more constants for compute shader.

Signed-off-by: James Zhu 
---
 src/gallium/auxiliary/vl/vl_compositor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
b/src/gallium/auxiliary/vl/vl_compositor.c
index 2c6d585..6ab4a60 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -1440,7 +1440,7 @@ vl_compositor_init_state(struct vl_compositor_state *s, 
struct pipe_context *pip
   pipe->screen,
   PIPE_BIND_CONSTANT_BUFFER,
   PIPE_USAGE_DEFAULT,
-  sizeof(csc_matrix) + 2*sizeof(float)
+  sizeof(csc_matrix) + 4*sizeof(float) + 6*sizeof(int)
);
 
if (!s->csc_matrix)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] gallium\auxiliary\vl: Add compute shader initilization, assign and cleanup

2019-02-01 Thread Zhu, James
Add compute shader initilization, assign and cleanup in vl_compositor API.

Signed-off-by: James Zhu 
---
 src/gallium/auxiliary/vl/vl_compositor.c | 30 +-
 src/gallium/auxiliary/vl/vl_compositor.h |  4 
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
b/src/gallium/auxiliary/vl/vl_compositor.c
index 6ab4a60..85fbab4 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -41,6 +41,7 @@
 #include "vl_csc.h"
 #include "vl_types.h"
 #include "vl_compositor.h"
+#include "vl_compositor_cs.h"
 
 enum VS_OUTPUT
 {
@@ -472,6 +473,24 @@ init_shaders(struct vl_compositor *c)
 {
assert(c);
 
+   c->cs_video_buffer = vl_compositor_cs_create_shader(c, 
compute_shader_video_buffer);
+   if (!c->cs_video_buffer) {
+  debug_printf("Unable to create video_buffer compute shader.\n");
+  return false;
+   }
+
+   c->cs_weave_rgb = vl_compositor_cs_create_shader(c, compute_shader_weave);
+   if (!c->cs_weave_rgb) {
+  debug_printf("Unable to create weave_rgb compute shader.\n");
+  return false;
+   }
+
+   c->cs_sub_pic = vl_compositor_cs_create_shader(c, compute_shader_sub_pic);
+   if (!c->cs_sub_pic) {
+  debug_printf("Unable to create sub-pictures compute shader.\n");
+  return false;
+   }
+
c->vs = create_vert_shader(c);
if (!c->vs) {
   debug_printf("Unable to create vertex shader.\n");
@@ -544,6 +563,9 @@ static void cleanup_shaders(struct vl_compositor *c)
c->pipe->delete_fs_state(c->pipe, c->fs_rgba);
c->pipe->delete_fs_state(c->pipe, c->fs_rgb_yuv.y);
c->pipe->delete_fs_state(c->pipe, c->fs_rgb_yuv.uv);
+   c->pipe->delete_compute_state(c->pipe, c->cs_video_buffer);
+   c->pipe->delete_compute_state(c->pipe, c->cs_weave_rgb);
+   c->pipe->delete_compute_state(c->pipe, c->cs_sub_pic);
 }
 
 static bool
@@ -1062,6 +1084,7 @@ vl_compositor_clear_layers(struct vl_compositor_state *s)
   s->layers[i].clearing = i ? false : true;
   s->layers[i].blend = NULL;
   s->layers[i].fs = NULL;
+  s->layers[i].cs = NULL;
   s->layers[i].viewport.scale[2] = 1;
   s->layers[i].viewport.translate[2] = 0;
   s->layers[i].rotate = VL_COMPOSITOR_ROTATE_0;
@@ -1186,6 +1209,7 @@ vl_compositor_set_buffer_layer(struct vl_compositor_state 
*s,
   switch(deinterlace) {
   case VL_COMPOSITOR_WEAVE:
  s->layers[layer].fs = c->fs_weave_rgb;
+ s->layers[layer].cs = c->cs_weave_rgb;
  break;
 
   case VL_COMPOSITOR_BOB_TOP:
@@ -1193,6 +1217,7 @@ vl_compositor_set_buffer_layer(struct vl_compositor_state 
*s,
  s->layers[layer].src.tl.y += half_a_line;
  s->layers[layer].src.br.y += half_a_line;
  s->layers[layer].fs = c->fs_video_buffer;
+ s->layers[layer].cs = c->cs_video_buffer;
  break;
 
   case VL_COMPOSITOR_BOB_BOTTOM:
@@ -1200,11 +1225,14 @@ vl_compositor_set_buffer_layer(struct 
vl_compositor_state *s,
  s->layers[layer].src.tl.y -= half_a_line;
  s->layers[layer].src.br.y -= half_a_line;
  s->layers[layer].fs = c->fs_video_buffer;
+ s->layers[layer].cs = c->cs_video_buffer;
  break;
   }
 
-   } else
+   } else {
   s->layers[layer].fs = c->fs_video_buffer;
+  s->layers[layer].cs = c->cs_video_buffer;
+   }
 }
 
 void
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h 
b/src/gallium/auxiliary/vl/vl_compositor.h
index d51b5f5..2fe70f3 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -72,6 +72,7 @@ struct vl_compositor_layer
struct pipe_viewport_state viewport;
 
void *fs;
+   void *cs;
void *samplers[3];
void *blend;
 
@@ -116,6 +117,9 @@ struct vl_compositor
void *fs_video_buffer;
void *fs_weave_rgb;
void *fs_rgba;
+   void *cs_video_buffer;
+   void *cs_weave_rgb;
+   void *cs_sub_pic;
 
struct {
   struct {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] gallium\auxiliary\vl: Add compute shader to support video compositor render

2019-02-01 Thread Zhu, James
Add compute shader to support video compositor render.

Signed-off-by: James Zhu 
---
 src/gallium/auxiliary/Makefile.sources  |   2 +
 src/gallium/auxiliary/meson.build   |   2 +
 src/gallium/auxiliary/vl/vl_compositor_cs.c | 414 
 src/gallium/auxiliary/vl/vl_compositor_cs.h |  56 
 4 files changed, 474 insertions(+)
 create mode 100644 src/gallium/auxiliary/vl/vl_compositor_cs.c
 create mode 100644 src/gallium/auxiliary/vl/vl_compositor_cs.h

diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index 50e8808..df000f6 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -348,6 +348,8 @@ VL_SOURCES := \
vl/vl_bicubic_filter.h \
vl/vl_compositor.c \
vl/vl_compositor.h \
+   vl/vl_compositor_cs.c \
+   vl/vl_compositor_cs.h \
vl/vl_csc.c \
vl/vl_csc.h \
vl/vl_decoder.c \
diff --git a/src/gallium/auxiliary/meson.build 
b/src/gallium/auxiliary/meson.build
index 57f7e69..74e4b48 100644
--- a/src/gallium/auxiliary/meson.build
+++ b/src/gallium/auxiliary/meson.build
@@ -445,6 +445,8 @@ files_libgalliumvl = files(
   'vl/vl_bicubic_filter.h',
   'vl/vl_compositor.c',
   'vl/vl_compositor.h',
+  'vl/vl_compositor_cs.c',
+  'vl/vl_compositor_cs.h',
   'vl/vl_csc.c',
   'vl/vl_csc.h',
   'vl/vl_decoder.c',
diff --git a/src/gallium/auxiliary/vl/vl_compositor_cs.c 
b/src/gallium/auxiliary/vl/vl_compositor_cs.c
new file mode 100644
index 000..3cd1a76
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_compositor_cs.c
@@ -0,0 +1,414 @@
+/**
+ *
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: James Zhu 
+ *
+ **/
+
+#include 
+
+#include "tgsi/tgsi_text.h"
+#include "vl_compositor_cs.h"
+
+struct cs_viewport {
+   float scale_x;
+   float scale_y;
+   int translate_x;
+   int translate_y;
+   struct u_rect area;
+};
+
+char *compute_shader_video_buffer =
+  "COMP\n"
+  "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
+  "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
+  "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
+
+  "DCL SV[0], THREAD_ID\n"
+  "DCL SV[1], BLOCK_ID\n"
+
+  "DCL CONST[0..5]\n"
+  "DCL SVIEW[0..2], RECT, FLOAT\n"
+  "DCL SAMP[0..2]\n"
+
+  "DCL IMAGE[0], 2D, WR\n"
+  "DCL TEMP[0..7]\n"
+
+  "IMM[0] UINT32 { 8, 8, 1, 0}\n"
+  "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
+
+  "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
+
+  /* Drawn area check */
+  "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
+  "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
+  "AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
+  "AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
+  "AND TEMP[1].x, TEMP[1]., TEMP[1].\n"
+
+  "UIF TEMP[1]\n"
+ /* Translate */
+ "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
+ "U2F TEMP[2], TEMP[2]\n"
+ "DIV TEMP[3], TEMP[2], IMM[1].\n"
+
+ /* Scale */
+ "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
+ "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"
+
+ /* Fetch texels */
+ "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"
+ "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"
+ "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"
+
+ "MOV TEMP[4].w, IMM[1].\n"
+
+ /* Color Space Conversion */
+ "DP4 TEMP[7].x, CONST[0], TEMP[4]\n"
+ "DP4 TEMP[7].y, CONST[1], TEMP[4]\n"
+ "DP4 TEMP[7].z, CONST[2], TEMP[4]\n"
+
+ "MOV TEMP[5].w, TEMP[4].\n"
+ "SLE TEMP[6].w, TEMP[5], CONST[3].\n"
+ "SGT TEMP[5].w, TEMP[5], CONST[3].\n"
+
+ "MAX TEMP[7].w, TEMP[5], 

[Mesa-dev] [PATCH 6/6] gallium\auxiliary\vl: Add video compute shader render

2019-02-01 Thread Zhu, James
Add video compute shader render. export CS_COMPOSITOR_RENDER=true
to enable video compute shader render.

Signed-off-by: James Zhu 
---
 src/gallium/auxiliary/vl/vl_compositor.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
b/src/gallium/auxiliary/vl/vl_compositor.c
index 7ee8402..66a8fc9 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -1376,8 +1376,8 @@ vl_compositor_convert_rgb_to_yuv(struct 
vl_compositor_state *s,
s->pipe->flush(s->pipe, NULL, 0);
 }
 
-void
-vl_compositor_render(struct vl_compositor_state *s,
+static void
+vl_compositor_gfx_render(struct vl_compositor_state *s,
  struct vl_compositor   *c,
  struct pipe_surface*dst_surface,
  struct u_rect  *dirty_area,
@@ -1419,6 +1419,21 @@ vl_compositor_render(struct vl_compositor_state *s,
draw_layers(c, s, dirty_area);
 }
 
+void
+vl_compositor_render(struct vl_compositor_state *s,
+ struct vl_compositor   *c,
+ struct pipe_surface*dst_surface,
+ struct u_rect  *dirty_area,
+ boolclear_dirty)
+{
+   assert(s);
+
+   if (cs_compositor_render_enable && s->layers->cs)
+  vl_compositor_cs_render(s, c, dst_surface, dirty_area, clear_dirty);
+   else
+  vl_compositor_gfx_render(s, c, dst_surface, dirty_area, clear_dirty);
+}
+
 bool
 vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe)
 {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] gallium\auxiliary\vl: Add debug option CS_COMPOSITOR_RENDER

2019-02-01 Thread Zhu, James
Add debug option CS_COMPOSITOR_RENDER to enable/diable video
compositor compute shader render through system environment
variable.

Signed-off-by: James Zhu 
---
 src/gallium/auxiliary/vl/vl_compositor.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c 
b/src/gallium/auxiliary/vl/vl_compositor.c
index 85fbab4..7ee8402 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -52,6 +52,8 @@ enum VS_OUTPUT
VS_O_VBOTTOM,
 };
 
+static bool cs_compositor_render_enable = FALSE;
+
 static void *
 create_vert_shader(struct vl_compositor *c)
 {
@@ -1422,6 +1424,9 @@ vl_compositor_init(struct vl_compositor *c, struct 
pipe_context *pipe)
 {
assert(c);
 
+   if (debug_get_bool_option("CS_COMPOSITOR_RENDER", false))
+  cs_compositor_render_enable = true;
+
memset(c, 0, sizeof(*c));
 
c->pipe = pipe;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv/cmd_buffer: check for NULL framebuffer

2019-02-01 Thread Juan A. Suarez Romero
This can happen when we record a VkCmdDraw in a secondary buffer that
was created inheriting from the primary buffer, but with the framebuffer
set to NULL in the VkCommandBufferInheritanceInfo.

CC: Jason Ekstrand 
---
 src/intel/vulkan/gen7_cmd_buffer.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/intel/vulkan/gen7_cmd_buffer.c 
b/src/intel/vulkan/gen7_cmd_buffer.c
index 352892aee33..fe1a47f6ce6 100644
--- a/src/intel/vulkan/gen7_cmd_buffer.c
+++ b/src/intel/vulkan/gen7_cmd_buffer.c
@@ -70,12 +70,21 @@ gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer 
*cmd_buffer)
   };
 
   const int max = 0x;
+
+  uint32_t height = 0;
+  uint32_t width = 0;
+
+  if (fb) {
+height = fb->height;
+width = fb->width;
+  }
+
   struct GEN7_SCISSOR_RECT scissor = {
  /* Do this math using int64_t so overflow gets clamped correctly. */
  .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max),
  .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max),
- .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + 
s->extent.height - 1, 0, fb->height - 1),
- .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + 
s->extent.width - 1, 0, fb->width - 1)
+ .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + 
s->extent.height - 1, 0, height - 1),
+ .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + 
s->extent.width - 1, 0, width - 1)
   };
 
   if (s->extent.width <= 0 || s->extent.height <= 0) {
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105371] r600_shader_from_tgsi - GPR limit exceeded - shader requires 360 registers

2019-02-01 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105371

--- Comment #22 from amonpaike  ---
good and bad news...

I've just tested the latest blender build with the latest mesa-devel (February
1 2019) driver..

the standard settings situation remained predominantly the same.

the good news is that if you launch blender with these parameters:
"DRI_PRIME=1 R600_DEBUG=nosb ./blender" 
with the shader backend acceleration off, blender with eevee works perfectly
again..

the bad news is that the performance goes fuckoff ..

so the problem is of the gallium drivers in particular of the shader compiler
with the lights and shadows of the realtime blender eevee rendering engine... 
:((

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 102565] u_debug_stack.c:114: undefined reference to `_Ux86_64_getcontext'

2019-02-01 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=102565

--- Comment #5 from Gert Wollny  ---
The change that triggered the problem was that I added a test program that
linked against libgallium.a but not against libunwind.so and the fix was to add
these link flags directly to the test program link flag (A better solution
would have been to add the libunwind linker flags to the common libgallium
depends).

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 102565] u_debug_stack.c:114: undefined reference to `_Ux86_64_getcontext'

2019-02-01 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=102565

--- Comment #4 from taiyang...@126.com ---
(In reply to Nicolai Hähnle from comment #3)
> Patch is pushed to master, commit c4741bbb6fb98f78551f9e42ae570dcc924e0031.

How to see the detail changes???

i come across the same problem.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109531] implicit declaration of function ‘ac_nir_get_max_workgroup_size

2019-02-01 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109531

Mike Lothian  changed:

   What|Removed |Added

 CC||m...@fireburn.co.uk

--- Comment #1 from Mike Lothian  ---
I've created a MR to fix this
https://gitlab.freedesktop.org/mesa/mesa/merge_requests/193 it gets things
building for me locally

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109531] implicit declaration of function ‘ac_nir_get_max_workgroup_size

2019-02-01 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109531

Samuel Pitoiset  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #2 from Samuel Pitoiset  ---
Fixed.
https://cgit.freedesktop.org/mesa/mesa/commit/?id=5e7f800f32a3d7299b157bd3028fc46455f77e83

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: take LDS into account for compute shader occupancy stats

2019-02-01 Thread Samuel Pitoiset

Fixed.

On 2/1/19 2:57 PM, Mike Lothian wrote:

Hi

I think you've left a few references to the old ac_nir_get_max_workgroup_size

FAILED: src/amd/vulkan/9198681@@vulkan_radeon@sha/radv_nir_to_llvm.c.o
x86_64-pc-linux-gnu-gcc -m32
-Isrc/amd/vulkan/9198681@@vulkan_radeon@sha -Isrc/amd/vulkan
-I../mesa-/src/amd/vulkan -Isrc/../include
-I../mesa-/src/../include -Isrc -I../mesa-/src -Isrc/mapi
rc/mesa -I../mesa-/src/gallium/include -Isrc/gallium/auxiliary
-I../mesa-/src/gallium/auxiliary -Isrc/amd -I../mesa-/src/amd
-Isrc/amd/common -I../mesa-/src/amd/common -Isrc/compiler -I.
-/src/vulkan/util -Isrc/vulkan/wsi -I../mesa-/src/vulkan/wsi
-Isrc/compiler/nir -I../mesa-/src/compiler/nir
-I/usr/lib/llvm/9/include -I/usr/include/libdrm
-fdiagnostics-color=always -DNDEBU
'-DPACKAGE_BUGREPORT="https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa;'
-DGLX_USE_TLS -DHAVE_ST_VDPAU -DENABLE_ST_OMX_BELLAGIO=0
-DENABLE_ST_OMX_TIZONIA=0 -DHAVE_X11_PLATFORM -DGLX_INDIRECT_REND
M_PLATFORM -DHAVE_SURFACELESS_PLATFORM -DENABLE_SHADER_CACHE
-DHAVE___BUILTIN_BSWAP32 -DHAVE___BUILTIN_BSWAP64 -DHAVE___BUILTIN_CLZ
-DHAVE___BUILTIN_CLZLL -DHAVE___BUILTIN_CTZ -DHAVE___BUILTIN_EXPECT -D
UILTIN_POPCOUNT -DHAVE___BUILTIN_POPCOUNTLL
-DHAVE___BUILTIN_UNREACHABLE -DHAVE_FUNC_ATTRIBUTE_CONST
-DHAVE_FUNC_ATTRIBUTE_FLATTEN -DHAVE_FUNC_ATTRIBUTE_MALLOC
-DHAVE_FUNC_ATTRIBUTE_PURE -DHAVE_FUNC_ATT
LT -DHAVE_FUNC_ATTRIBUTE_WEAK -DHAVE_FUNC_ATTRIBUTE_FORMAT
-DHAVE_FUNC_ATTRIBUTE_PACKED -DHAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL
-DHAVE_FUNC_ATTRIBUTE_VISIBILITY -DHAVE_FUNC_ATTRIBUTE_ALIAS
-DHAVE_FUNC_ATT
S -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM
-DMAJOR_IN_SYSMACROS -DHAVE_SYS_SYSCTL_H -DHAVE_LINUX_FUTEX_H
-DHAVE_ENDIAN_H -DHAVE_DLFCN_H -DHAVE_STRTOF -DHAVE_MKOSTEMP
-DHAVE_POSIX_MEMALI
RTOD_L -DHAVE_DLADDR -DHAVE_DL_ITERATE_PHDR -DHAVE_ZLIB -DHAVE_PTHREAD
-DHAVE_PTHREAD_SETAFFINITY -DHAVE_LIBDRM -DHAVE_LLVM=0x0900
-DMESA_LLVM_VERSION_PATCH=0 -DHAVE_WAYLAND_PLATFORM
-DWL_HIDE_DEPRECATE
S=1 -Werror=implicit-function-declaration -Werror=missing-prototypes
-Werror=return-type -fno-math-errno -fno-trapping-math
-Wno-missing-field-initializers -Wno-format-truncation -fPIC -pthread
-D_FILE_
-D__STDC_LIMIT_MACROS -D_LARGEFILE_SOURCE -D__STDC_CONSTANT_MACROS
-fvisibility=hidden -Wno-override-init -DVK_USE_PLATFORM_XCB_KHR
-DVK_USE_PLATFORM_XLIB_KHR -DVK_USE_PLATFORM_WAYLAND_KHR -DVK_USE_PLAT
O3 -march=native -pipe -MD -MQ
'src/amd/vulkan/9198681@@vulkan_radeon@sha/radv_nir_to_llvm.c.o' -MF
'src/amd/vulkan/9198681@@vulkan_radeon@sha/radv_nir_to_llvm.c.o.d' -o
'src/amd/vulkan/9198681@@vulkan_
/amd/vulkan/radv_nir_to_llvm.c
In file included from ../mesa-/src/mesa/main/macros.h:35,
  from ../mesa-/src/amd/vulkan/radv_private.h:51,
  from ../mesa-/src/amd/vulkan/radv_nir_to_llvm.c:28:
../mesa-/src/amd/vulkan/radv_nir_to_llvm.c: In function
‘ac_translate_nir_to_llvm’:
../mesa-/src/amd/vulkan/radv_nir_to_llvm.c:3453:33: error:
implicit declaration of function ‘ac_nir_get_max_workgroup_size’; did
you mean ‘radv_nir_get_max_workgroup_size’? [-Werror=implicit-functio

ac_nir_get_max_workgroup_size(ctx.options->chip_class,
  ^
../mesa-/src/util/u_math.h:659:31: note: in definition of macro ‘MAX2’
  #define MAX2( A, B )   ( (A)>(B) ? (A) : (B) )
^
cc1: some warnings being treated as errors

Cheers

Mike

On Fri, 1 Feb 2019 at 11:07, Timothy Arceri  wrote:

Ported from d205faeb6c96.
---
  src/amd/vulkan/radv_nir_to_llvm.c |  6 +++---
  src/amd/vulkan/radv_private.h |  3 +++
  src/amd/vulkan/radv_shader.c  | 10 --
  3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index e80938527e5..d90a4c0de1e 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -3372,9 +3372,9 @@ ac_setup_rings(struct radv_shader_context *ctx)
 }
  }

-static unsigned
-ac_nir_get_max_workgroup_size(enum chip_class chip_class,
- const struct nir_shader *nir)
+unsigned
+radv_nir_get_max_workgroup_size(enum chip_class chip_class,
+   const struct nir_shader *nir)
  {
 switch (nir->info.stage) {
 case MESA_SHADER_TESS_CTRL:
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 85c18906f84..e5b8286ea62 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1934,6 +1934,9 @@ void radv_compile_nir_shader(struct ac_llvm_compiler 
*ac_llvm,
  int nir_count,
  const struct radv_nir_compiler_options *options);

+unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
+const struct nir_shader *nir);
+
  /* radv_shader_info.h */
  struct 

Re: [Mesa-dev] [PATCH] radv: take LDS into account for compute shader occupancy stats

2019-02-01 Thread Mike Lothian
Hi

I think you've left a few references to the old ac_nir_get_max_workgroup_size

FAILED: src/amd/vulkan/9198681@@vulkan_radeon@sha/radv_nir_to_llvm.c.o
x86_64-pc-linux-gnu-gcc -m32
-Isrc/amd/vulkan/9198681@@vulkan_radeon@sha -Isrc/amd/vulkan
-I../mesa-/src/amd/vulkan -Isrc/../include
-I../mesa-/src/../include -Isrc -I../mesa-/src -Isrc/mapi
rc/mesa -I../mesa-/src/gallium/include -Isrc/gallium/auxiliary
-I../mesa-/src/gallium/auxiliary -Isrc/amd -I../mesa-/src/amd
-Isrc/amd/common -I../mesa-/src/amd/common -Isrc/compiler -I.
-/src/vulkan/util -Isrc/vulkan/wsi -I../mesa-/src/vulkan/wsi
-Isrc/compiler/nir -I../mesa-/src/compiler/nir
-I/usr/lib/llvm/9/include -I/usr/include/libdrm
-fdiagnostics-color=always -DNDEBU
'-DPACKAGE_BUGREPORT="https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa;'
-DGLX_USE_TLS -DHAVE_ST_VDPAU -DENABLE_ST_OMX_BELLAGIO=0
-DENABLE_ST_OMX_TIZONIA=0 -DHAVE_X11_PLATFORM -DGLX_INDIRECT_REND
M_PLATFORM -DHAVE_SURFACELESS_PLATFORM -DENABLE_SHADER_CACHE
-DHAVE___BUILTIN_BSWAP32 -DHAVE___BUILTIN_BSWAP64 -DHAVE___BUILTIN_CLZ
-DHAVE___BUILTIN_CLZLL -DHAVE___BUILTIN_CTZ -DHAVE___BUILTIN_EXPECT -D
UILTIN_POPCOUNT -DHAVE___BUILTIN_POPCOUNTLL
-DHAVE___BUILTIN_UNREACHABLE -DHAVE_FUNC_ATTRIBUTE_CONST
-DHAVE_FUNC_ATTRIBUTE_FLATTEN -DHAVE_FUNC_ATTRIBUTE_MALLOC
-DHAVE_FUNC_ATTRIBUTE_PURE -DHAVE_FUNC_ATT
LT -DHAVE_FUNC_ATTRIBUTE_WEAK -DHAVE_FUNC_ATTRIBUTE_FORMAT
-DHAVE_FUNC_ATTRIBUTE_PACKED -DHAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL
-DHAVE_FUNC_ATTRIBUTE_VISIBILITY -DHAVE_FUNC_ATTRIBUTE_ALIAS
-DHAVE_FUNC_ATT
S -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM
-DMAJOR_IN_SYSMACROS -DHAVE_SYS_SYSCTL_H -DHAVE_LINUX_FUTEX_H
-DHAVE_ENDIAN_H -DHAVE_DLFCN_H -DHAVE_STRTOF -DHAVE_MKOSTEMP
-DHAVE_POSIX_MEMALI
RTOD_L -DHAVE_DLADDR -DHAVE_DL_ITERATE_PHDR -DHAVE_ZLIB -DHAVE_PTHREAD
-DHAVE_PTHREAD_SETAFFINITY -DHAVE_LIBDRM -DHAVE_LLVM=0x0900
-DMESA_LLVM_VERSION_PATCH=0 -DHAVE_WAYLAND_PLATFORM
-DWL_HIDE_DEPRECATE
S=1 -Werror=implicit-function-declaration -Werror=missing-prototypes
-Werror=return-type -fno-math-errno -fno-trapping-math
-Wno-missing-field-initializers -Wno-format-truncation -fPIC -pthread
-D_FILE_
-D__STDC_LIMIT_MACROS -D_LARGEFILE_SOURCE -D__STDC_CONSTANT_MACROS
-fvisibility=hidden -Wno-override-init -DVK_USE_PLATFORM_XCB_KHR
-DVK_USE_PLATFORM_XLIB_KHR -DVK_USE_PLATFORM_WAYLAND_KHR -DVK_USE_PLAT
O3 -march=native -pipe -MD -MQ
'src/amd/vulkan/9198681@@vulkan_radeon@sha/radv_nir_to_llvm.c.o' -MF
'src/amd/vulkan/9198681@@vulkan_radeon@sha/radv_nir_to_llvm.c.o.d' -o
'src/amd/vulkan/9198681@@vulkan_
/amd/vulkan/radv_nir_to_llvm.c
In file included from ../mesa-/src/mesa/main/macros.h:35,
 from ../mesa-/src/amd/vulkan/radv_private.h:51,
 from ../mesa-/src/amd/vulkan/radv_nir_to_llvm.c:28:
../mesa-/src/amd/vulkan/radv_nir_to_llvm.c: In function
‘ac_translate_nir_to_llvm’:
../mesa-/src/amd/vulkan/radv_nir_to_llvm.c:3453:33: error:
implicit declaration of function ‘ac_nir_get_max_workgroup_size’; did
you mean ‘radv_nir_get_max_workgroup_size’? [-Werror=implicit-functio

ac_nir_get_max_workgroup_size(ctx.options->chip_class,
 ^
../mesa-/src/util/u_math.h:659:31: note: in definition of macro ‘MAX2’
 #define MAX2( A, B )   ( (A)>(B) ? (A) : (B) )
   ^
cc1: some warnings being treated as errors

Cheers

Mike

On Fri, 1 Feb 2019 at 11:07, Timothy Arceri  wrote:
>
> Ported from d205faeb6c96.
> ---
>  src/amd/vulkan/radv_nir_to_llvm.c |  6 +++---
>  src/amd/vulkan/radv_private.h |  3 +++
>  src/amd/vulkan/radv_shader.c  | 10 --
>  3 files changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
> b/src/amd/vulkan/radv_nir_to_llvm.c
> index e80938527e5..d90a4c0de1e 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -3372,9 +3372,9 @@ ac_setup_rings(struct radv_shader_context *ctx)
> }
>  }
>
> -static unsigned
> -ac_nir_get_max_workgroup_size(enum chip_class chip_class,
> - const struct nir_shader *nir)
> +unsigned
> +radv_nir_get_max_workgroup_size(enum chip_class chip_class,
> +   const struct nir_shader *nir)
>  {
> switch (nir->info.stage) {
> case MESA_SHADER_TESS_CTRL:
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 85c18906f84..e5b8286ea62 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1934,6 +1934,9 @@ void radv_compile_nir_shader(struct ac_llvm_compiler 
> *ac_llvm,
>  int nir_count,
>  const struct radv_nir_compiler_options *options);
>
> +unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
> +const struct nir_shader *nir);
> +
>  /* radv_shader_info.h */
>  

[Mesa-dev] [PATCH v2] egl/dri2: try to bind old context if bindContext failed

2019-02-01 Thread Luigi Santivetti
Before this change, if bindContext() failed then dri2_make_current() would
rebind the old EGL context and surfaces and return EGL_BAD_MATCH. However,
it wouldn't rebind the DRI context and surfaces, thus leaving it in an
inconsistent and unrecoverable state.

After this change, dri2_make_current() tries to bind the old DRI context
and surfaces when bindContext() failed. If unable to do so, it leaves EGL
and the DRI driver in a consistent state, it reports an error and returns
EGL_BAD_MATCH.

Fixes: 4e8f95f64d004aa1 ("egl_dri2: Always unbind old contexts")

Signed-off-by: Luigi Santivetti 
Reviewed-by: Frank Binns 
---

v2 fixes:
warning: assignment from incompatible pointer type 
[-Wincompatible-pointer-types]

gentle ping on this one.
Thanks,

Luigi

 src/egl/drivers/dri2/egl_dri2.c | 55 ++---
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index c98b9a5d18a..3c5237eb103 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -1446,8 +1446,9 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, 
_EGLSurface *dsurf,
_EGLSurface *old_dsurf, *old_rsurf;
_EGLSurface *tmp_dsurf, *tmp_rsurf;
__DRIdrawable *ddraw, *rdraw;
-   __DRIcontext *cctx;
+   __DRIcontext *cctx, *old_cctx;
EGLBoolean unbind;
+   EGLint egl_error;
 
if (!dri2_dpy)
   return _eglError(EGL_NOT_INITIALIZED, "eglMakeCurrent");
@@ -1472,7 +1473,7 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, 
_EGLSurface *dsurf,
cctx = (dri2_ctx) ? dri2_ctx->dri_context : NULL;
 
if (old_ctx) {
-  __DRIcontext *old_cctx = dri2_egl_context(old_ctx)->dri_context;
+  old_cctx = dri2_egl_context(old_ctx)->dri_context;
 
   if (old_dsurf)
  dri2_surf_update_fence_fd(old_ctx, disp, old_dsurf);
@@ -1489,17 +1490,24 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, 
_EGLSurface *dsurf,
unbind = (cctx == NULL && ddraw == NULL && rdraw == NULL);
 
if (!unbind && !dri2_dpy->core->bindContext(cctx, ddraw, rdraw)) {
+  __DRIdrawable *old_ddraw, *old_rdraw;
+
+  /* dri2_dpy->core->bindContext failed. We cannot tell for sure why, but
+   * setting the error to EGL_BAD_MATCH is surely better than leaving it
+   * as EGL_SUCCESS.
+   */
+  egl_error = EGL_BAD_MATCH;
+
+  old_ddraw = (old_dsurf) ? dri2_dpy->vtbl->get_dri_drawable(old_dsurf) : 
NULL;
+  old_rdraw = (old_rsurf) ? dri2_dpy->vtbl->get_dri_drawable(old_rsurf) : 
NULL;
+  old_cctx = (old_ctx) ? dri2_egl_context(old_ctx)->dri_context : NULL;
+
   /* undo the previous _eglBindContext */
   _eglBindContext(old_ctx, old_dsurf, old_rsurf, , _dsurf, 
_rsurf);
   assert(_ctx->base == ctx &&
  tmp_dsurf == dsurf &&
  tmp_rsurf == rsurf);
 
-  if (old_dsurf && _eglSurfaceInSharedBufferMode(old_dsurf) &&
-  old_dri2_dpy->vtbl->set_shared_buffer_mode) {
- old_dri2_dpy->vtbl->set_shared_buffer_mode(old_disp, old_dsurf, true);
-  }
-
   _eglPutSurface(dsurf);
   _eglPutSurface(rsurf);
   _eglPutContext(ctx);
@@ -1508,11 +1516,33 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, 
_EGLSurface *dsurf,
   _eglPutSurface(old_rsurf);
   _eglPutContext(old_ctx);
 
-  /* dri2_dpy->core->bindContext failed. We cannot tell for sure why, but
-   * setting the error to EGL_BAD_MATCH is surely better than leaving it
-   * as EGL_SUCCESS.
+  /* undo the previous dri2_dpy->core->unbindContext */
+  if (dri2_dpy->core->bindContext(old_cctx, old_ddraw, old_rdraw)) {
+ if (old_dsurf && _eglSurfaceInSharedBufferMode(old_dsurf) &&
+ old_dri2_dpy->vtbl->set_shared_buffer_mode) {
+old_dri2_dpy->vtbl->set_shared_buffer_mode(old_disp, old_dsurf, 
true);
+ }
+
+ return _eglError(egl_error, "eglMakeCurrent");
+  }
+
+  /* We cannot restore the same state as it was before calling
+   * eglMakeCurrent(), but we can keep EGL in a consistent state with
+   * the DRI driver by unbinding the old EGL context and surfaces.
*/
-  return _eglError(EGL_BAD_MATCH, "eglMakeCurrent");
+  ctx = NULL;
+  dsurf = rsurf = NULL;
+  unbind = true;
+
+  _eglBindContext(ctx, dsurf, rsurf, _ctx, _dsurf, _rsurf);
+  assert(_ctx->base == old_ctx &&
+ tmp_dsurf == old_dsurf &&
+ tmp_rsurf == old_rsurf);
+
+  _eglLog(_EGL_FATAL, "DRI2: failed to rebind the previous context");
+   } else {
+  /* We can no longer fail at this point. */
+  egl_error = EGL_SUCCESS;
}
 
dri2_destroy_surface(drv, disp, old_dsurf);
@@ -1538,6 +1568,9 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, 
_EGLSurface *dsurf,
   dri2_dpy->vtbl->set_shared_buffer_mode(disp, dsurf, mode);
}
 
+   if (egl_error != EGL_SUCCESS)
+  return _eglError(egl_error, "eglMakeCurrent");
+
return 

[Mesa-dev] [Bug 109531] implicit declaration of function ‘ac_nir_get_max_workgroup_size

2019-02-01 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109531

Bug ID: 109531
   Summary: implicit declaration of function
‘ac_nir_get_max_workgroup_size
   Product: Mesa
   Version: git
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/Vulkan/radeon
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: lonew...@xs4all.nl
QA Contact: mesa-dev@lists.freedesktop.org

Looks like
https://cgit.freedesktop.org/mesa/mesa/commit/?id=9b9ccee4d64b5e64f6638bca7a87b3f159e3be9c
didn't change ac_nir_get_max-workgroup_size to radv_nir_max_workgroup_size in
all necessary places.



./mesa/src/amd/vulkan/radv_shader.c: In function ‘generate_shader_stats’:
../mesa/src/amd/vulkan/radv_shader.c:752:5: error: implicit declaration of
function ‘ac_nir_get_max_workgroup_size’; did you mean
‘radv_nir_get_max_workgroup_size’? [-Werror=implicit-function-declaration]
 ac_nir_get_max_workgroup_size(chip_class, variant->nir);
 ^
 radv_nir_get_max_workgroup_size

../mesa/src/amd/vulkan/radv_nir_to_llvm.c: In function
‘ac_translate_nir_to_llvm’:
../mesa/src/amd/vulkan/radv_nir_to_llvm.c:3453:33: error: implicit declaration
of function ‘ac_nir_get_max_workgroup_size’; did you mean
‘radv_nir_get_max_workgroup_size’? [-Werror=implicit-function-declaration]

ac_nir_get_max_workgroup_size(ctx.options->chip_class,

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/compiler: update validator to account for half-float exec type promotion

2019-02-01 Thread Iago Toral
On Fri, 2019-02-01 at 12:34 +0100, Iago Toral wrote:
> On Fri, 2019-01-25 at 12:54 -0800, Francisco Jerez wrote:
> > Iago Toral  writes:
> > 
> > > On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
> > > > Iago Toral  writes:
> > > > 
> > > > > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
> > > > > > Iago Toral Quiroga  writes:
> > > > > > 
> > > > > > > Commit c84ec70b3a72 implemented execution type promotion
> > > > > > > to
> > > > > > > 32-
> > > > > > > bit
> > > > > > > for
> > > > > > > conversions involving half-float registers, which
> > > > > > > empirical
> > > > > > > testing
> > > > > > > suggested
> > > > > > > was required, but it did not incorporate this change into
> > > > > > > the
> > > > > > > assembly validator
> > > > > > > logic. This commits adds that, preventing validation
> > > > > > > errors
> > > > > > > like
> > > > > > > this:
> > > > > > > 
> > > > > > 
> > > > > > I don't think we should be validating empirical assumptions
> > > > > > in
> > > > > > the EU
> > > > > > validator.
> > > > > 
> > > > > I am not sure I get your point, isn't c84ec70b3a72 also based
> > > > > on
> > > > > empirical testing after all?
> > > > > 
> > > > 
> > > > To some extent, but it doesn't attempt to enforce ISA
> > > > restrictions
> > > > based
> > > > on information obtained empirically.
> > > > 
> > > > > 
> > > > > > > mov(16)  g9<4>B   g3<16,8,2>HF { align1 1H };
> > > > > > > ERROR: Destination stride must be equal to the ratio of
> > > > > > > the
> > > > > > > sizes
> > > > > > > of the
> > > > > > >execution data type to the destination type
> > > > > > > 
> > > > > > > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to
> > > > > > > 32-bit
> > > > > > > when any half-float conversion is needed."
> > > > > > 
> > > > > > I don't think this "fixes" anything that ever worked.
> > > > > 
> > > > > It is true that the code in that trace above is not something
> > > > > we
> > > > > can
> > > > > produce right now, because it is a conversion from HF to B
> > > > > and
> > > > > that
> > > > > should only happen within the context of
> > > > > VK_KHR_shader_float16_int8,
> > > > > however, this is a consequence of the fact that since
> > > > > c84ec70b3a72
> > > > > there is an inconsistency between what we do at the IR level
> > > > > regarding
> > > > > execution size of HF conversions and what the EU validator is
> > > > > doing,
> > > > > and from that perspective this is really fixing an
> > > > > inconsistency
> > > > > that
> > > > > didn't exist before, and I thought we would want to address
> > > > > that
> > > > > sooner
> > > > > rather than later and track it down to the original change
> > > > > that
> > > > > introduced that inconsistency so we know where this is coming
> > > > > from.
> > > > > 
> > > > 
> > > > The "inconsistency" between the IR's get_exec_type() and the EU
> > > > validator's execution_type() has existed ever since
> > > > a05b6f25bf4bfad7
> > > > removed the HF assert from get_exec_type() without actually
> > > > implementing
> > > > the code required to handle HF operands (which is what my
> > > > commit
> > > > c84ec70b3a72 did).
> > > 
> > > I agree with the fact that since a05b6f25bf4bfad7 the validator
> > > could
> > > reject valid code and that had nothing to do with your patch,
> > 
> > The validator rejected the same valid HF code since it was written,
> > that
> > had nothing to do with neither a05b6f25bf4bfad7 nor with my patch,
> > and
> > it is the real problem this patch was working around.
> > 
> > > but the inconsistency I am talking about here, that this patch
> > > fixes,
> > > is the one about get_exec_type() in the IR and execution_type()
> > > in
> > > the
> > > validator doing different things for HF instructions, which only
> > > exists since your patch and which you discuss below.
> > > 
> > 
> > The "inconsistency" exists ever since get_exec_type() was
> > introduced
> > without correct handling of HF types (even though execution_type()
> > already attempted to handle it).  And I disagree that it's a real
> > inconsistency except due to the fact that the validator is
> > incorrectly
> > attempting to validate the alignment of the destination region
> > according
> > to a rule that doesn't apply to HF types.
> > 
> > > > > Anyway, that was my rationale for the Fixes tag, but if you
> > > > > think
> > > > > this
> > > > > is not useful I am happy to drop this patch and just include
> > > > > it
> > > > > as
> > > > > part
> > > > > of my series without the tag.
> > > > > 
> > > > 
> > > > I'd like to see the actual regioning restrictions for HF types
> > > > implemented in the EU validator as part of your series.
> > > 
> > > Ok, let's see if we can agree on what restrictions should we
> > > implement
> > > then. I can implement this restriction as documented:
> > > 
> > > "Conversion between Integer and HF (Half Float) must be DWord-
> > > aligned
> > > and strided by a DWord on the 

Re: [Mesa-dev] [PATCH] intel/compiler: update validator to account for half-float exec type promotion

2019-02-01 Thread Iago Toral
On Fri, 2019-01-25 at 12:54 -0800, Francisco Jerez wrote:
> Iago Toral  writes:
> 
> > On Thu, 2019-01-24 at 11:45 -0800, Francisco Jerez wrote:
> > > Iago Toral  writes:
> > > 
> > > > On Wed, 2019-01-23 at 06:03 -0800, Francisco Jerez wrote:
> > > > > Iago Toral Quiroga  writes:
> > > > > 
> > > > > > Commit c84ec70b3a72 implemented execution type promotion to
> > > > > > 32-
> > > > > > bit
> > > > > > for
> > > > > > conversions involving half-float registers, which empirical
> > > > > > testing
> > > > > > suggested
> > > > > > was required, but it did not incorporate this change into
> > > > > > the
> > > > > > assembly validator
> > > > > > logic. This commits adds that, preventing validation errors
> > > > > > like
> > > > > > this:
> > > > > > 
> > > > > 
> > > > > I don't think we should be validating empirical assumptions
> > > > > in
> > > > > the EU
> > > > > validator.
> > > > 
> > > > I am not sure I get your point, isn't c84ec70b3a72 also based
> > > > on
> > > > empirical testing after all?
> > > > 
> > > 
> > > To some extent, but it doesn't attempt to enforce ISA
> > > restrictions
> > > based
> > > on information obtained empirically.
> > > 
> > > > 
> > > > > > mov(16)  g9<4>B   g3<16,8,2>HF { align1 1H };
> > > > > > ERROR: Destination stride must be equal to the ratio of the
> > > > > > sizes
> > > > > > of the
> > > > > >execution data type to the destination type
> > > > > > 
> > > > > > Fixes: c84ec70b3a72 "intel/fs: Promote execution type to
> > > > > > 32-bit
> > > > > > when any half-float conversion is needed."
> > > > > 
> > > > > I don't think this "fixes" anything that ever worked.
> > > > 
> > > > It is true that the code in that trace above is not something
> > > > we
> > > > can
> > > > produce right now, because it is a conversion from HF to B and
> > > > that
> > > > should only happen within the context of
> > > > VK_KHR_shader_float16_int8,
> > > > however, this is a consequence of the fact that since
> > > > c84ec70b3a72
> > > > there is an inconsistency between what we do at the IR level
> > > > regarding
> > > > execution size of HF conversions and what the EU validator is
> > > > doing,
> > > > and from that perspective this is really fixing an
> > > > inconsistency
> > > > that
> > > > didn't exist before, and I thought we would want to address
> > > > that
> > > > sooner
> > > > rather than later and track it down to the original change that
> > > > introduced that inconsistency so we know where this is coming
> > > > from.
> > > > 
> > > 
> > > The "inconsistency" between the IR's get_exec_type() and the EU
> > > validator's execution_type() has existed ever since
> > > a05b6f25bf4bfad7
> > > removed the HF assert from get_exec_type() without actually
> > > implementing
> > > the code required to handle HF operands (which is what my commit
> > > c84ec70b3a72 did).
> > 
> > I agree with the fact that since a05b6f25bf4bfad7 the validator
> > could
> > reject valid code and that had nothing to do with your patch,
> 
> The validator rejected the same valid HF code since it was written,
> that
> had nothing to do with neither a05b6f25bf4bfad7 nor with my patch,
> and
> it is the real problem this patch was working around.
> 
> > but the inconsistency I am talking about here, that this patch
> > fixes,
> > is the one about get_exec_type() in the IR and execution_type() in
> > the
> > validator doing different things for HF instructions, which only
> > exists since your patch and which you discuss below.
> > 
> 
> The "inconsistency" exists ever since get_exec_type() was introduced
> without correct handling of HF types (even though execution_type()
> already attempted to handle it).  And I disagree that it's a real
> inconsistency except due to the fact that the validator is
> incorrectly
> attempting to validate the alignment of the destination region
> according
> to a rule that doesn't apply to HF types.
> 
> > > > Anyway, that was my rationale for the Fixes tag, but if you
> > > > think
> > > > this
> > > > is not useful I am happy to drop this patch and just include it
> > > > as
> > > > part
> > > > of my series without the tag.
> > > > 
> > > 
> > > I'd like to see the actual regioning restrictions for HF types
> > > implemented in the EU validator as part of your series.
> > 
> > Ok, let's see if we can agree on what restrictions should we
> > implement
> > then. I can implement this restriction as documented:
> > 
> > "Conversion between Integer and HF (Half Float) must be DWord-
> > aligned
> > and strided by a DWord on the destination"
> > 
> > Instead of trying to apply the general one that is currently
> > breaking.
> > That will fix the assertion issue. I guess my issue with it was
> > that
> > going by your analysis this restriction is not telling the full
> > picture, this is what you had to say about this restriction:
> > 
> > "I have a feeling that the reason for this may be that the 16-bit
> > pipeline lacks the ability to handle 

Re: [Mesa-dev] [ANNOUNCE] mesa 19.0.0-rc1

2019-02-01 Thread Timothy Arceri



On 1/2/19 8:34 pm, Eero Tamminen wrote:

Hi,

On 31.1.2019 1.37, Dylan Baker wrote:
This email announces the mesa 19.0 release candidate 1. I'll keep this 
email
fairly brief since I'm already running a little late on getting this 
done :)
I've just had to resolve quite a few autotools issues to get the dist 
built.


Notable in the 19.0-rc1 branch is SWR is set to require LLVM 7 instead 
of LLVM
6. It is impossible to bootstrap SWR with LLVM 6 and compile with  
LLVM 7 due to
LLVM API changes. Since RadeonSI and Radv both require LLVM 7 I've 
taken the

liberty of bumping SWR so that we could get a tarball built.

We've had an exciting release cycle, plenty of GL and Vulkan 
extensions, ~1600
commits since the 18.3 branchpoint with substantial work across all 
areas of

mesa.


Are all the recent (i965) perf regressions included to it:
* https://bugs.freedesktop.org/show_bug.cgi?id=109517 (spilling)
* https://bugs.freedesktop.org/show_bug.cgi?id=109505 (Unigine)
* https://bugs.freedesktop.org/show_bug.cgi?id=109216 (Vulkan)
?

 - Eero

PS. There's also much older:
* https://bugs.freedesktop.org/show_bug.cgi?id=107510

Which was already fixed, but then regressed again, and regressing commit 
wasn't anymore reverted.  I'm mentioning it because Timothy had a patch 
series in October that fixed the tess/geom shader regressions (which 
were largest), but for some reason it's not yet in upstream.


My series didn't land because after trying a few times I couldn't get 
anybody to review it. CCing Jason. He can either review my patches [1] 
or fix up the code that he accidentally re-added.


https://gitlab.freedesktop.org/tarceri/mesa/commits/vectorize_io_v2





Expect rc2 about this time next week, see you then.

Dylan

git tag: mesa-19.0.0-rc1

https://mesa.freedesktop.org/archive/mesa-19.0.0-rc1.tar.gz
MD5:  b3a610b204d0cb3431823353a8cbe8e6  mesa-19.0.0-rc1.tar.gz
SHA1: d1f0d0bc49ec7e02d0cd7d141127fd2fefc72e35  mesa-19.0.0-rc1.tar.gz
SHA256: 
0a14bb059f6cead4e50923df9c24d3c5025d9310803ca5189e019f07e539639e  
mesa-19.0.0-rc1.tar.gz
SHA512: 
5bedc917afecef6a0dd11c56688a3e3fdbbaeaceca33062d6825b5525c6e78663e873bdecc96b98b0448d988ad81a7a8617c523e2d312384369c6a333b790b86  
mesa-19.0.0-rc1.tar.gz

PGP:  https://mesa.freedesktop.org/archive/mesa-19.0.0-rc1.tar.gz.sig

https://mesa.freedesktop.org/archive/mesa-19.0.0-rc1.tar.xz
MD5:  727abb6469e518ff1a2e1bde33543503  mesa-19.0.0-rc1.tar.xz
SHA1: 577642259cd269c883007df7c2772c8c636fabfb  mesa-19.0.0-rc1.tar.xz
SHA256: 
8efb32956c428d23f78364f9eace5491bda9feaafd767128133672a5f79659e8  
mesa-19.0.0-rc1.tar.xz
SHA512: 
23d21d6c4f03a1d9073ecb1f43dc251d581cdeb6b7cc24a19c299571070b4184ad4f22b0ca170ca42e58c62bb46eca0dadc334a952bbb7e0379961a30a6ca856  
mesa-19.0.0-rc1.tar.xz

PGP:  https://mesa.freedesktop.org/archive/mesa-19.0.0-rc1.tar.xz.sig


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: take LDS into account for compute shader occupancy stats

2019-02-01 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Fri, Feb 1, 2019 at 12:07 PM Timothy Arceri  wrote:
>
> Ported from d205faeb6c96.
> ---
>  src/amd/vulkan/radv_nir_to_llvm.c |  6 +++---
>  src/amd/vulkan/radv_private.h |  3 +++
>  src/amd/vulkan/radv_shader.c  | 10 --
>  3 files changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
> b/src/amd/vulkan/radv_nir_to_llvm.c
> index e80938527e5..d90a4c0de1e 100644
> --- a/src/amd/vulkan/radv_nir_to_llvm.c
> +++ b/src/amd/vulkan/radv_nir_to_llvm.c
> @@ -3372,9 +3372,9 @@ ac_setup_rings(struct radv_shader_context *ctx)
> }
>  }
>
> -static unsigned
> -ac_nir_get_max_workgroup_size(enum chip_class chip_class,
> - const struct nir_shader *nir)
> +unsigned
> +radv_nir_get_max_workgroup_size(enum chip_class chip_class,
> +   const struct nir_shader *nir)
>  {
> switch (nir->info.stage) {
> case MESA_SHADER_TESS_CTRL:
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 85c18906f84..e5b8286ea62 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1934,6 +1934,9 @@ void radv_compile_nir_shader(struct ac_llvm_compiler 
> *ac_llvm,
>  int nir_count,
>  const struct radv_nir_compiler_options *options);
>
> +unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
> +const struct nir_shader *nir);
> +
>  /* radv_shader_info.h */
>  struct radv_shader_info;
>
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 07450ff236b..a7fce02ee83 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -744,7 +744,8 @@ generate_shader_stats(struct radv_device *device,
>   gl_shader_stage stage,
>   struct _mesa_string_buffer *buf)
>  {
> -   unsigned lds_increment = device->physical_device->rad_info.chip_class 
> >= CIK ? 512 : 256;
> +   enum chip_class chip_class = 
> device->physical_device->rad_info.chip_class;
> +   unsigned lds_increment = chip_class >= CIK ? 512 : 256;
> struct ac_shader_config *conf;
> unsigned max_simd_waves;
> unsigned lds_per_wave = 0;
> @@ -757,12 +758,17 @@ generate_shader_stats(struct radv_device *device,
> lds_per_wave = conf->lds_size * lds_increment +
>align(variant->info.fs.num_interp * 48,
>  lds_increment);
> +   } else if (stage == MESA_SHADER_COMPUTE) {
> +   unsigned max_workgroup_size =
> +   ac_nir_get_max_workgroup_size(chip_class, 
> variant->nir);
> +   lds_per_wave = (conf->lds_size * lds_increment) /
> +  DIV_ROUND_UP(max_workgroup_size, 64);
> }
>
> if (conf->num_sgprs)
> max_simd_waves =
> MIN2(max_simd_waves,
> -
> ac_get_num_physical_sgprs(device->physical_device->rad_info.chip_class) / 
> conf->num_sgprs);
> +ac_get_num_physical_sgprs(chip_class) / 
> conf->num_sgprs);
>
> if (conf->num_vgprs)
> max_simd_waves =
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [ANNOUNCE] Mesa 18.3.3 release candidate

2019-02-01 Thread Emil Velikov
Hi Carsten,

On 2019/01/31, Carsten Haitzler wrote:
> On Wed, 30 Jan 2019 18:33:35 + Emil Velikov  
> said:
> 
> You might want to hold off on this. My bugfix was actually patched out by 
> partly
> removing some of it. The void ptr math should never have been there and wasn't
> in the final patch.
> 
> I'm talking about:
> 
> +void *cpu2 = cpu + 8;
> 
> In 300d3ae8b1445b5060f92c77c0f577f4b7b2c7d6
> 
> At least with gcc8 mesa is a dud on Raspberry Pi (can't upload/downlaod
> textures without crashing) without the fixes. I moved the secondary ptr math
> into the ASM chunk because the C compiler seemed to just mess up cpu2 ptr
> content/value for me on gcc8 (it also kept the parameter inputs/outputs 
> cleaner
> and consistent with other ASM chunks). Keeping this as void ptr math alone is
> just wrong and asking for trouble and as it unfixed a fix I already had in
> submitted patches.
> 
> Being at FOSDEM I now no longer have access to my OS image with all of this 
> set
> up to test and won't until next week. I can't dig in and verify. Without my
> fixes at all it's a dead man walking with gcc8, and thus Arch Linux is broken
> entirely on Rpi without it (and has been for a while now).
> 
If I understand this correctly, during the rework (by Eric I assume) some of
your fixes got invalidated. Yet the current code and binaries produced are
not worse off then before the patches.

Thus from stable POV, we're safe, since nothing has regressed per se. We will
apply the extra patches for the next release.

Thanks
Emil

P.S. How did you submit the patches - I cannot see them neither on mesa-dev
mailing list nor gitlab MR.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: take LDS into account for compute shader occupancy stats

2019-02-01 Thread Timothy Arceri
Ported from d205faeb6c96.
---
 src/amd/vulkan/radv_nir_to_llvm.c |  6 +++---
 src/amd/vulkan/radv_private.h |  3 +++
 src/amd/vulkan/radv_shader.c  | 10 --
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index e80938527e5..d90a4c0de1e 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -3372,9 +3372,9 @@ ac_setup_rings(struct radv_shader_context *ctx)
}
 }
 
-static unsigned
-ac_nir_get_max_workgroup_size(enum chip_class chip_class,
- const struct nir_shader *nir)
+unsigned
+radv_nir_get_max_workgroup_size(enum chip_class chip_class,
+   const struct nir_shader *nir)
 {
switch (nir->info.stage) {
case MESA_SHADER_TESS_CTRL:
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 85c18906f84..e5b8286ea62 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1934,6 +1934,9 @@ void radv_compile_nir_shader(struct ac_llvm_compiler 
*ac_llvm,
 int nir_count,
 const struct radv_nir_compiler_options *options);
 
+unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
+const struct nir_shader *nir);
+
 /* radv_shader_info.h */
 struct radv_shader_info;
 
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 07450ff236b..a7fce02ee83 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -744,7 +744,8 @@ generate_shader_stats(struct radv_device *device,
  gl_shader_stage stage,
  struct _mesa_string_buffer *buf)
 {
-   unsigned lds_increment = device->physical_device->rad_info.chip_class 
>= CIK ? 512 : 256;
+   enum chip_class chip_class = 
device->physical_device->rad_info.chip_class;
+   unsigned lds_increment = chip_class >= CIK ? 512 : 256;
struct ac_shader_config *conf;
unsigned max_simd_waves;
unsigned lds_per_wave = 0;
@@ -757,12 +758,17 @@ generate_shader_stats(struct radv_device *device,
lds_per_wave = conf->lds_size * lds_increment +
   align(variant->info.fs.num_interp * 48,
 lds_increment);
+   } else if (stage == MESA_SHADER_COMPUTE) {
+   unsigned max_workgroup_size =
+   ac_nir_get_max_workgroup_size(chip_class, 
variant->nir);
+   lds_per_wave = (conf->lds_size * lds_increment) /
+  DIV_ROUND_UP(max_workgroup_size, 64);
}
 
if (conf->num_sgprs)
max_simd_waves =
MIN2(max_simd_waves,
-
ac_get_num_physical_sgprs(device->physical_device->rad_info.chip_class) / 
conf->num_sgprs);
+ac_get_num_physical_sgprs(chip_class) / 
conf->num_sgprs);
 
if (conf->num_vgprs)
max_simd_waves =
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [ANNOUNCE] mesa 18.3.3

2019-02-01 Thread Emil Velikov
Mesa 18.3.3 is now available.

In this release we have:

A memory leak fix in the etnaviv driver, better NEON assembly code in vc4 and
couple of stability improvements to the radeonsi driver. Another memory leak
affecting all gallium drivers have also been addressed.

The time required to compile GLSL shaders with large amount of uniforms, such
as Godot, has been improved.

GLX and swrast have also seen some improvements. 

On the Vulkan side, the ANV driver adjusted the number of images supported
for gen8 and earlier to 8, while for newer hardware it remains the same 64.
The RADV driver has seens a collection of stability improvements and fix for
the Vulkan version advertised in some corner cases.

To top it all up, the meson build system has seen a steady amount of fixes:
the DSO version for the VDPAU drivers is now included, ICC compatibility
patches, opencl handling and relaxed handling when building osmesa.



Andres Gomez (2):
  bin/get-pick-list.sh: fix the oneline printing
  bin/get-pick-list.sh: fix redirection in sh

Axel Davy (1):
  st/nine: Immediately upload user provided textures

Bas Nieuwenhuizen (3):
  radv: Only use 32 KiB per threadgroup on Stoney.
  radv: Set partial_vs_wave for pipelines with just GS, not tess.
  nir: Account for atomics in copy propagation.

Bruce Cherniak (1):
  gallium/swr: Fix multi-context sync fence deadlock.

Carsten Haitzler (Rasterman) (2):
  vc4: Use named parameters for the NEON inline asm.
  vc4: Declare the cpu pointers as being modified in NEON asm.

Danylo Piliaiev (1):
  glsl: Fix copying function's out to temp if dereferenced by array

Dave Airlie (3):
  dri_interface: add put shm image2 (v2)
  glx: add support for putimageshm2 path (v2)
  gallium: use put image shm2 path (v2)

Dylan Baker (4):
  meson: allow building dri driver without window system if osmesa is 
classic
  meson: fix swr KNL build
  meson: Fix compiler checks for SWR with ICC
  meson: Add warnings and errors when using ICC

Emil Velikov (5):
  docs: add sha256 checksums for 18.3.2
  cherry-ignore: radv: Fix multiview depth clears
  cherry-ignore: spirv: Handle arbitrary bit sizes for deref array indices
  cherry-ignore: WARNING: Commit XXX lists invalid sha
  docs: add release notes for 18.3.3

Eric Anholt (2):
  vc4: Don't leak the GPU fd for renderonly usage.
  vc4: Enable NEON asm on meson cross-builds.

Eric Engestrom (2):
  configure: EGL requirements only apply if EGL is built
  meson/vdpau: add missing soversion

Iago Toral Quiroga (1):
  anv/device: fix maximum number of images supported

Jason Ekstrand (3):
  anv/nir: Rework arguments to apply_pipeline_layout
  anv: Only parse pImmutableSamplers if the descriptor has samplers
  nir/xfb: Fix offset accounting for dvec3/4

Karol Herbst (2):
  nv50/ir: disable tryCollapseChainedMULs in ConstantFolding for precise 
instructions
  glsl/lower_output_reads: set invariant and precise flags on temporaries

Lionel Landwerlin (1):
  anv: fix invalid binding table index computation

Marek Olšák (4):
  radeonsi: also apply the GS hang workaround to draws without tessellation
  radeonsi: fix a u_blitter crash after a shader with FBFETCH
  radeonsi: fix rendering to tiny viewports where the viewport center is > 
8K
  st/mesa: purge framebuffers when unbinding a context

Niklas Haas (1):
  radv: correctly use vulkan 1.0 by default

Pierre Moreau (1):
  meson: Fix with_gallium_icd to with_opencl_icd

Rob Clark (1):
  loader: fix the no-modifiers case

Samuel Pitoiset (1):
  radv: clean up setting partial_es_wave for distributed tess on VI

Timothy Arceri (5):
  ac/nir_to_llvm: fix interpolateAt* for arrays
  ac/nir_to_llvm: fix clamp shadow reference for more hardware
  radv/ac: fix some fp16 handling
  glsl: use remap location when serialising uniform program resource data
  glsl: Copy function out to temp if we don't directly ref a variable

Tomeu Vizoso (1):
  etnaviv: Consolidate buffer references from framebuffers

Vinson Lee (1):
  meson: Fix typo.

git tag: mesa-18.3.3

https://mesa.freedesktop.org/archive/mesa-18.3.3.tar.gz
MD5:  f2ab9d2f89aa9781fe813a9a7f73cd3d  mesa-18.3.3.tar.gz
SHA1: a70e232afed96cf79f064dfafb0f97c00072d871  mesa-18.3.3.tar.gz
SHA256: 6b9893942fe8011c7736d51448deb6ef80ece2257e0fac27b02e997a6605d5e4  
mesa-18.3.3.tar.gz
SHA512: 
af0d1d9166d49ad9e60e9bece51409b81a5f57069addd3c6744426bdfb3000f9448ab6b3f151f8f0774273064eb79232c64760ce5fb43ac3b56692435fdb91d3
  mesa-18.3.3.tar.gz
PGP:  https://mesa.freedesktop.org/archive/mesa-18.3.3.tar.gz.sig

https://mesa.freedesktop.org/archive/mesa-18.3.3.tar.xz
MD5:  e9d1a24dbd5ca20efa75b9a29eb4566f  mesa-18.3.3.tar.xz
SHA1: abb067204ae31493dba7710c378b2b90245108ca  mesa-18.3.3.tar.xz
SHA256: 2ab6886a6966c532ccbcc3b240925e681464b658244f0cbed752615af3936299  
mesa-18.3.3.tar.xz
SHA512: 

Re: [Mesa-dev] [PATCH] ac/radv/radeonsi: add ac_get_num_physical_sgprs() helper

2019-02-01 Thread Samuel Pitoiset

Reviewed-by: Samuel Pitoiset 

On 2/1/19 11:28 AM, Timothy Arceri wrote:

---
  src/amd/common/ac_gpu_info.h | 6 ++
  src/amd/vulkan/radv_device.c | 2 +-
  src/amd/vulkan/radv_shader.c | 4 ++--
  src/amd/vulkan/radv_shader.h | 6 --
  src/gallium/drivers/radeonsi/si_shader.c | 7 +++
  5 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index a7dc1094c05..b1ef9c53734 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -172,6 +172,12 @@ static inline unsigned ac_get_max_simd_waves(enum 
radeon_family family)
}
  }
  
+static inline uint32_t

+ac_get_num_physical_sgprs(enum chip_class chip_class)
+{
+   return chip_class >= VI ? 800 : 512;
+}
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 34d93b262f8..b8ef93d0cd2 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1129,7 +1129,7 @@ void radv_GetPhysicalDeviceProperties2(
  
  			/* SGPR. */

properties->sgprsPerSimd =
-   radv_get_num_physical_sgprs(pdevice);
+   
ac_get_num_physical_sgprs(pdevice->rad_info.chip_class);
properties->minSgprAllocation =
pdevice->rad_info.chip_class >= VI ? 16 : 8;
properties->maxSgprAllocation =
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 42efaf9c581..07450ff236b 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -762,7 +762,7 @@ generate_shader_stats(struct radv_device *device,
if (conf->num_sgprs)
max_simd_waves =
MIN2(max_simd_waves,
-radv_get_num_physical_sgprs(device->physical_device) 
/ conf->num_sgprs);
+
ac_get_num_physical_sgprs(device->physical_device->rad_info.chip_class) / 
conf->num_sgprs);
  
  	if (conf->num_vgprs)

max_simd_waves =
@@ -847,7 +847,7 @@ radv_GetShaderInfoAMD(VkDevice _device,
VkShaderStatisticsInfoAMD statistics = {};
statistics.shaderStageMask = shaderStage;
statistics.numPhysicalVgprs = RADV_NUM_PHYSICAL_VGPRS;
-   statistics.numPhysicalSgprs = 
radv_get_num_physical_sgprs(device->physical_device);
+   statistics.numPhysicalSgprs = 
ac_get_num_physical_sgprs(device->physical_device->rad_info.chip_class);
statistics.numAvailableSgprs = 
statistics.numPhysicalSgprs;
  
  			if (stage == MESA_SHADER_COMPUTE) {

diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 3652a811e80..b67cd2b4f15 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -407,10 +407,4 @@ static inline unsigned 
shader_io_get_unique_index(gl_varying_slot slot)
unreachable("illegal slot in get unique index\n");
  }
  
-static inline uint32_t

-radv_get_num_physical_sgprs(struct radv_physical_device *physical_device)
-{
-   return physical_device->rad_info.chip_class >= VI ? 800 : 512;
-}
-
  #endif
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index a2ed899b58f..efae02ee91c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5380,10 +5380,9 @@ static void si_calculate_max_simd_waves(struct si_shader 
*shader)
  
  	/* Compute the per-SIMD wave counts. */

if (conf->num_sgprs) {
-   if (sscreen->info.chip_class >= VI)
-   max_simd_waves = MIN2(max_simd_waves, 800 / 
conf->num_sgprs);
-   else
-   max_simd_waves = MIN2(max_simd_waves, 512 / 
conf->num_sgprs);
+   max_simd_waves =
+   MIN2(max_simd_waves,
+ac_get_num_physical_sgprs(sscreen->info.chip_class) / 
conf->num_sgprs);
}
  
  	if (conf->num_vgprs)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] ac/radv/radeonsi: add ac_get_num_physical_sgprs() helper

2019-02-01 Thread Timothy Arceri
---
 src/amd/common/ac_gpu_info.h | 6 ++
 src/amd/vulkan/radv_device.c | 2 +-
 src/amd/vulkan/radv_shader.c | 4 ++--
 src/amd/vulkan/radv_shader.h | 6 --
 src/gallium/drivers/radeonsi/si_shader.c | 7 +++
 5 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index a7dc1094c05..b1ef9c53734 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -172,6 +172,12 @@ static inline unsigned ac_get_max_simd_waves(enum 
radeon_family family)
}
 }
 
+static inline uint32_t
+ac_get_num_physical_sgprs(enum chip_class chip_class)
+{
+   return chip_class >= VI ? 800 : 512;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 34d93b262f8..b8ef93d0cd2 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1129,7 +1129,7 @@ void radv_GetPhysicalDeviceProperties2(
 
/* SGPR. */
properties->sgprsPerSimd =
-   radv_get_num_physical_sgprs(pdevice);
+   
ac_get_num_physical_sgprs(pdevice->rad_info.chip_class);
properties->minSgprAllocation =
pdevice->rad_info.chip_class >= VI ? 16 : 8;
properties->maxSgprAllocation =
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 42efaf9c581..07450ff236b 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -762,7 +762,7 @@ generate_shader_stats(struct radv_device *device,
if (conf->num_sgprs)
max_simd_waves =
MIN2(max_simd_waves,
-
radv_get_num_physical_sgprs(device->physical_device) / conf->num_sgprs);
+
ac_get_num_physical_sgprs(device->physical_device->rad_info.chip_class) / 
conf->num_sgprs);
 
if (conf->num_vgprs)
max_simd_waves =
@@ -847,7 +847,7 @@ radv_GetShaderInfoAMD(VkDevice _device,
VkShaderStatisticsInfoAMD statistics = {};
statistics.shaderStageMask = shaderStage;
statistics.numPhysicalVgprs = RADV_NUM_PHYSICAL_VGPRS;
-   statistics.numPhysicalSgprs = 
radv_get_num_physical_sgprs(device->physical_device);
+   statistics.numPhysicalSgprs = 
ac_get_num_physical_sgprs(device->physical_device->rad_info.chip_class);
statistics.numAvailableSgprs = 
statistics.numPhysicalSgprs;
 
if (stage == MESA_SHADER_COMPUTE) {
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 3652a811e80..b67cd2b4f15 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -407,10 +407,4 @@ static inline unsigned 
shader_io_get_unique_index(gl_varying_slot slot)
unreachable("illegal slot in get unique index\n");
 }
 
-static inline uint32_t
-radv_get_num_physical_sgprs(struct radv_physical_device *physical_device)
-{
-   return physical_device->rad_info.chip_class >= VI ? 800 : 512;
-}
-
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index a2ed899b58f..efae02ee91c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5380,10 +5380,9 @@ static void si_calculate_max_simd_waves(struct si_shader 
*shader)
 
/* Compute the per-SIMD wave counts. */
if (conf->num_sgprs) {
-   if (sscreen->info.chip_class >= VI)
-   max_simd_waves = MIN2(max_simd_waves, 800 / 
conf->num_sgprs);
-   else
-   max_simd_waves = MIN2(max_simd_waves, 512 / 
conf->num_sgprs);
+   max_simd_waves =
+   MIN2(max_simd_waves,
+
ac_get_num_physical_sgprs(sscreen->info.chip_class) / conf->num_sgprs);
}
 
if (conf->num_vgprs)
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109242] [RADV] The Witcher 3 system freeze

2019-02-01 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109242

--- Comment #35 from Bas Nieuwenhuizen  ---
Not sure the fix from the bug would affect a rx 570, but this has been
fixed in radeonsi as well:

commit 5183e794affbbbf7dc959332619b0852c21536d6
Author: Marek Olšák 
Date:   Mon Jan 14 18:24:08 2019 -0500

radeonsi: also apply the GS hang workaround to draws without tessellation

ported from AMDVLK.

Cc: 18.3 
Reviewed-by: Bas Nieuwenhuizen 

On Fri, Feb 1, 2019 at 11:10 AM  wrote:
>
> Comment # 34 on bug 109242 from madc...@atlas.cz
>
> I hate to make noise about a closed issue but I believe I have been seeing a
> GPU hang very similar to what is described here in Witcher 3 fights. I can
> reproduce it only when the game is run through classic WINE D3D->OpenGL
> translation, DXVK runs flawlessly for me.
>
> Since this fix is for RADV only, is it possible that a similar fix may be
> applicable for OpenGL path as well?
>
> FTR, I have an ASUS RX570.
>
> 
> You are receiving this mail because:
>
> You are the assignee for the bug.
> You are the QA Contact for the bug.
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v5] etnaviv: fix resource usage tracking across different pipe_context's

2019-02-01 Thread Guido Günther
Hi,
On Wed, Jan 30, 2019 at 05:28:14AM +0100, Marek Vasut wrote:
> From: Christian Gmeiner 
> 
> A pipe_resource can be shared by all the pipe_context's hanging off the
> same pipe_screen.
> 
> Signed-off-by: Christian Gmeiner 
> Signed-off-by: Marek Vasut 
> To: mesa-dev@lists.freedesktop.org
> Cc: etna...@lists.freedesktop.org

Tested-By: Guido Günther 

I'm not bold enough to add a
Reviewed-By: Guido Günther 

Cheers,
 -- Guido

> ---
> Changes from v1 -> v2:
>  - to remove the resource from the used_resources set when it is destroyed
> Changes from v2 -> v3:
>  - add locking with mtx_*() to resource and screen (Marek)
> Changes from v3 -> v4:
>  - drop rsc->lock, just use screen->lock for the entire serialization (Marek)
>  - simplify etna_resource_used() flush condition, which also prevents
>potentially flushing resources twice (Marek)
>  - don't remove resouces from screen->used_resources in
>etna_cmd_stream_reset_notify(), they may still be used in other
>contexts and may need flushing there later on (Marek)
> Changes from v4 -> v5:
>  - Fix coding style issues reported by Guido
> ---
>  src/gallium/drivers/etnaviv/etnaviv_context.c | 26 +-
>  src/gallium/drivers/etnaviv/etnaviv_context.h |  3 --
>  .../drivers/etnaviv/etnaviv_resource.c| 52 +++
>  .../drivers/etnaviv/etnaviv_resource.h|  8 +--
>  src/gallium/drivers/etnaviv/etnaviv_screen.c  | 12 +
>  src/gallium/drivers/etnaviv/etnaviv_screen.h  |  6 +++
>  6 files changed, 78 insertions(+), 29 deletions(-)
> 
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_context.c 
> b/src/gallium/drivers/etnaviv/etnaviv_context.c
> index 3038d21..2f8cae8 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_context.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_context.c
> @@ -36,6 +36,7 @@
>  #include "etnaviv_query.h"
>  #include "etnaviv_query_hw.h"
>  #include "etnaviv_rasterizer.h"
> +#include "etnaviv_resource.h"
>  #include "etnaviv_screen.h"
>  #include "etnaviv_shader.h"
>  #include "etnaviv_state.h"
> @@ -329,7 +330,8 @@ static void
>  etna_cmd_stream_reset_notify(struct etna_cmd_stream *stream, void *priv)
>  {
> struct etna_context *ctx = priv;
> -   struct etna_resource *rsc, *rsc_tmp;
> +   struct etna_screen *screen = ctx->screen;
> +   struct set_entry *entry;
>  
> etna_set_state(stream, VIVS_GL_API_MODE, VIVS_GL_API_MODE_OPENGL);
> etna_set_state(stream, VIVS_GL_VERTEX_ELEMENT_CONFIG, 0x0001);
> @@ -384,16 +386,18 @@ etna_cmd_stream_reset_notify(struct etna_cmd_stream 
> *stream, void *priv)
> ctx->dirty = ~0L;
> ctx->dirty_sampler_views = ~0L;
>  
> -   /* go through all the used resources and clear their status flag */
> -   LIST_FOR_EACH_ENTRY_SAFE(rsc, rsc_tmp, >used_resources, list)
> -   {
> -  debug_assert(rsc->status != 0);
> -  rsc->status = 0;
> -  rsc->pending_ctx = NULL;
> -  list_delinit(>list);
> -   }
> +   /*
> +* Go through all _resources_ associated with this _screen_, pending
> +* in this _context_ and mark them as not pending in this _context_
> +* anymore, since they were just flushed.
> +*/
> +   mtx_lock(>lock);
> +   set_foreach(screen->used_resources, entry) {
> +  struct etna_resource *rsc = (struct etna_resource *)entry->key;
>  
> -   assert(LIST_IS_EMPTY(>used_resources));
> +  _mesa_set_remove_key(rsc->pending_ctx, ctx);
> +   }
> +   mtx_unlock(>lock);
>  }
>  
>  static void
> @@ -437,8 +441,6 @@ etna_context_create(struct pipe_screen *pscreen, void 
> *priv, unsigned flags)
> /* need some sane default in case state tracker doesn't set some state: */
> ctx->sample_mask = 0x;
>  
> -   list_inithead(>used_resources);
> -
> /*  Set sensible defaults for state */
> etna_cmd_stream_reset_notify(ctx->stream, ctx);
>  
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_context.h 
> b/src/gallium/drivers/etnaviv/etnaviv_context.h
> index 584caa7..eff0a23 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_context.h
> +++ b/src/gallium/drivers/etnaviv/etnaviv_context.h
> @@ -136,9 +136,6 @@ struct etna_context {
> uint32_t prim_hwsupport;
> struct primconvert_context *primconvert;
>  
> -   /* list of resources used by currently-unsubmitted renders */
> -   struct list_head used_resources;
> -
> struct slab_child_pool transfer_pool;
> struct blitter_context *blitter;
>  
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_resource.c 
> b/src/gallium/drivers/etnaviv/etnaviv_resource.c
> index 3808c29..46ab849 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
> @@ -33,6 +33,7 @@
>  #include "etnaviv_screen.h"
>  #include "etnaviv_translate.h"
>  
> +#include "util/hash_table.h"
>  #include "util/u_inlines.h"
>  #include "util/u_memory.h"
>  
> @@ -275,7 +276,6 @@ etna_resource_alloc(struct pipe_screen *pscreen, unsigned 
> layout,
> rsc->halign = halign;
>  
> 

Re: [Mesa-dev] [PATCH 09/19] radv: use the new attachments array in CmdEndRenderPass()

2019-02-01 Thread Bas Nieuwenhuizen
On Fri, Feb 1, 2019 at 8:52 AM Samuel Pitoiset
 wrote:
>
>
> On 1/31/19 11:16 AM, Bas Nieuwenhuizen wrote:
> > On Tue, Jan 29, 2019 at 10:16 PM Samuel Pitoiset
> >  wrote:
> >> That shouldn't change anything as we check if the last
> >> subpass id is the final subpass.
> >>
> >> Signed-off-by: Samuel Pitoiset 
> >> ---
> >>   src/amd/vulkan/radv_cmd_buffer.c | 16 +---
> >>   1 file changed, 13 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> >> b/src/amd/vulkan/radv_cmd_buffer.c
> >> index 914ff0055d9..b0b453d76e9 100644
> >> --- a/src/amd/vulkan/radv_cmd_buffer.c
> >> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> >> @@ -4333,15 +4333,25 @@ void radv_CmdEndRenderPass(
> >>  VkCommandBuffer commandBuffer)
> >>   {
> >>  RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
> >> +   struct radv_cmd_state *state = _buffer->state;
> >> +   const struct radv_subpass *subpass = state->subpass;
> >> +   uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
> >>
> >>  radv_subpass_barrier(cmd_buffer, 
> >> _buffer->state.pass->end_barrier);
> >>
> >>  radv_cmd_buffer_resolve_subpass(cmd_buffer);
> >>
> >> -   for (unsigned i = 0; i < 
> >> cmd_buffer->state.framebuffer->attachment_count; ++i) {
> >> -   VkImageLayout layout = 
> >> cmd_buffer->state.pass->attachments[i].final_layout;
> >> +   for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
> >> +   const uint32_t a = subpass->attachments[i].attachment;
> >> +   if (a == VK_ATTACHMENT_UNUSED)
> >> +   continue;
> >> +
> >> +   if (state->pass->attachments[a].last_subpass_idx != 
> >> subpass_id)
> >> +   continue;
> > I don't think we can add this without adding logic that if the
> > attachment is not in the final subpass, we transition it to the final
> > layout earlier?
>
> I don't see the problem. If the attachment isn't in the final subpass,
> the transition should happen in the previous subpass?

It should if we did the right thing, but we did not implement doing
the final transition before the final subpass yet right?

>
> >
> >> +
> >> +   VkImageLayout layout = 
> >> state->pass->attachments[a].final_layout;
> >>  radv_handle_subpass_image_transition(cmd_buffer,
> >> - (struct radv_subpass_attachment){i, 
> >> layout});
> >> + (struct radv_subpass_attachment){a, 
> >> layout});
> >>  }
> >>
> >>  vk_free(_buffer->pool->alloc, cmd_buffer->state.attachments);
> >> --
> >> 2.20.1
> >>
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 109242] [RADV] The Witcher 3 system freeze

2019-02-01 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=109242

--- Comment #34 from madc...@atlas.cz ---
I hate to make noise about a closed issue but I believe I have been seeing a
GPU hang very similar to what is described here in Witcher 3 fights. I can
reproduce it only when the game is run through classic WINE D3D->OpenGL
translation, DXVK runs flawlessly for me.

Since this fix is for RADV only, is it possible that a similar fix may be
applicable for OpenGL path as well?

FTR, I have an ASUS RX570.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [ANNOUNCE] mesa 19.0.0-rc1

2019-02-01 Thread Eero Tamminen

Hi,

On 31.1.2019 1.37, Dylan Baker wrote:

This email announces the mesa 19.0 release candidate 1. I'll keep this email
fairly brief since I'm already running a little late on getting this done :)
I've just had to resolve quite a few autotools issues to get the dist built.

Notable in the 19.0-rc1 branch is SWR is set to require LLVM 7 instead of LLVM
6. It is impossible to bootstrap SWR with LLVM 6 and compile with  LLVM 7 due to
LLVM API changes. Since RadeonSI and Radv both require LLVM 7 I've taken the
liberty of bumping SWR so that we could get a tarball built.

We've had an exciting release cycle, plenty of GL and Vulkan extensions, ~1600
commits since the 18.3 branchpoint with substantial work across all areas of
mesa.


Are all the recent (i965) perf regressions included to it:
* https://bugs.freedesktop.org/show_bug.cgi?id=109517 (spilling)
* https://bugs.freedesktop.org/show_bug.cgi?id=109505 (Unigine)
* https://bugs.freedesktop.org/show_bug.cgi?id=109216 (Vulkan)
?

- Eero

PS. There's also much older:
* https://bugs.freedesktop.org/show_bug.cgi?id=107510

Which was already fixed, but then regressed again, and regressing commit 
wasn't anymore reverted.  I'm mentioning it because Timothy had a patch 
series in October that fixed the tess/geom shader regressions (which 
were largest), but for some reason it's not yet in upstream.



Expect rc2 about this time next week, see you then.

Dylan

git tag: mesa-19.0.0-rc1

https://mesa.freedesktop.org/archive/mesa-19.0.0-rc1.tar.gz
MD5:  b3a610b204d0cb3431823353a8cbe8e6  mesa-19.0.0-rc1.tar.gz
SHA1: d1f0d0bc49ec7e02d0cd7d141127fd2fefc72e35  mesa-19.0.0-rc1.tar.gz
SHA256: 0a14bb059f6cead4e50923df9c24d3c5025d9310803ca5189e019f07e539639e  
mesa-19.0.0-rc1.tar.gz
SHA512: 
5bedc917afecef6a0dd11c56688a3e3fdbbaeaceca33062d6825b5525c6e78663e873bdecc96b98b0448d988ad81a7a8617c523e2d312384369c6a333b790b86
  mesa-19.0.0-rc1.tar.gz
PGP:  https://mesa.freedesktop.org/archive/mesa-19.0.0-rc1.tar.gz.sig

https://mesa.freedesktop.org/archive/mesa-19.0.0-rc1.tar.xz
MD5:  727abb6469e518ff1a2e1bde33543503  mesa-19.0.0-rc1.tar.xz
SHA1: 577642259cd269c883007df7c2772c8c636fabfb  mesa-19.0.0-rc1.tar.xz
SHA256: 8efb32956c428d23f78364f9eace5491bda9feaafd767128133672a5f79659e8  
mesa-19.0.0-rc1.tar.xz
SHA512: 
23d21d6c4f03a1d9073ecb1f43dc251d581cdeb6b7cc24a19c299571070b4184ad4f22b0ca170ca42e58c62bb46eca0dadc334a952bbb7e0379961a30a6ca856
  mesa-19.0.0-rc1.tar.xz
PGP:  https://mesa.freedesktop.org/archive/mesa-19.0.0-rc1.tar.xz.sig


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/4] radv: gather more info about push constants

2019-02-01 Thread Samuel Pitoiset
This is needed in order to inline some push constants when possible.
This also adds a new helper for initializing the pass.

v2: - use MIN2/MAX2
- use UINT8_MAX

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_nir_to_llvm.c |  2 ++
 src/amd/vulkan/radv_private.h |  2 ++
 src/amd/vulkan/radv_shader.h  |  4 
 src/amd/vulkan/radv_shader_info.c | 30 +-
 4 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index e80938527e5..44426c84232 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -3439,6 +3439,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct 
ac_llvm_compiler *ac_llvm,
 
memset(shader_info, 0, sizeof(*shader_info));
 
+   radv_nir_shader_info_init(_info->info);
+
for(int i = 0; i < shader_count; ++i)
radv_nir_shader_info_pass(shaders[i], options, 
_info->info);
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 85c18906f84..4c76521a045 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1941,6 +1941,8 @@ void radv_nir_shader_info_pass(const struct nir_shader 
*nir,
   const struct radv_nir_compiler_options *options,
   struct radv_shader_info *info);
 
+void radv_nir_shader_info_init(struct radv_shader_info *info);
+
 struct radeon_winsys_sem;
 
 #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType)\
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 3652a811e80..0f049f9a528 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -162,6 +162,10 @@ struct radv_streamout_info {
 
 struct radv_shader_info {
bool loads_push_constants;
+   uint8_t min_push_constant_used;
+   uint8_t max_push_constant_used;
+   bool has_32bit_push_constants;
+   bool has_indirect_push_constants;
uint32_t desc_set_used_mask;
bool needs_multiview_view_index;
bool uses_invocation_id;
diff --git a/src/amd/vulkan/radv_shader_info.c 
b/src/amd/vulkan/radv_shader_info.c
index 7e5a3789af2..018ca703285 100644
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -190,6 +190,28 @@ gather_intrinsic_store_deref_info(const nir_shader *nir,
}
 }
 
+static void
+gather_push_constant_info(const nir_shader *nir,
+ const nir_intrinsic_instr *instr,
+ struct radv_shader_info *info)
+{
+   nir_const_value *cval = nir_src_as_const_value(instr->src[0]);
+
+   if (!cval)
+   info->has_indirect_push_constants = true;
+
+   if (instr->dest.ssa.bit_size == 32)
+   info->has_32bit_push_constants = true;
+
+   int base = nir_intrinsic_base(instr);
+   int range = nir_intrinsic_range(instr);
+
+   info->max_push_constant_used = MAX2(base + range, 
info->max_push_constant_used);
+   info->min_push_constant_used = MIN2(base, info->min_push_constant_used);
+
+   info->loads_push_constants = true;
+}
+
 static void
 gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
  struct radv_shader_info *info)
@@ -243,7 +265,7 @@ gather_intrinsic_info(const nir_shader *nir, const 
nir_intrinsic_instr *instr,
info->uses_prim_id = true;
break;
case nir_intrinsic_load_push_constant:
-   info->loads_push_constants = true;
+   gather_push_constant_info(nir, instr, info);
break;
case nir_intrinsic_vulkan_resource_index:
info->desc_set_used_mask |= (1 << 
nir_intrinsic_desc_set(instr));
@@ -504,6 +526,12 @@ gather_xfb_info(const nir_shader *nir, struct 
radv_shader_info *info)
ralloc_free(xfb);
 }
 
+void
+radv_nir_shader_info_init(struct radv_shader_info *info)
+{
+   info->min_push_constant_used = UINT8_MAX;
+}
+
 void
 radv_nir_shader_info_pass(const struct nir_shader *nir,
  const struct radv_nir_compiler_options *options,
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [ANNOUNCE] Mesa 18.3.3 release candidate

2019-02-01 Thread Carsten Haitzler
On Wed, 30 Jan 2019 18:33:35 + Emil Velikov  said:

You might want to hold off on this. My bugfix was actually patched out by partly
removing some of it. The void ptr math should never have been there and wasn't
in the final patch.

I'm talking about:

+void *cpu2 = cpu + 8;

In 300d3ae8b1445b5060f92c77c0f577f4b7b2c7d6

At least with gcc8 mesa is a dud on Raspberry Pi (can't upload/downlaod
textures without crashing) without the fixes. I moved the secondary ptr math
into the ASM chunk because the C compiler seemed to just mess up cpu2 ptr
content/value for me on gcc8 (it also kept the parameter inputs/outputs cleaner
and consistent with other ASM chunks). Keeping this as void ptr math alone is
just wrong and asking for trouble and as it unfixed a fix I already had in
submitted patches.

Being at FOSDEM I now no longer have access to my OS image with all of this set
up to test and won't until next week. I can't dig in and verify. Without my
fixes at all it's a dead man walking with gcc8, and thus Arch Linux is broken
entirely on Rpi without it (and has been for a while now).

> Hello list,
> 
> The candidate for the Mesa 18.3.3 is now available. Currently we have:
>  - 45 queued
>  - 4 nominated (outstanding)
>  - and 3 rejected patches
> 
> 
> In the current release candidate we have fix for a memory leak in the etnaviv
> driver, better NEON assembly code in vc4 and couple of stability improvements
> to the radeonsi driver. A memory leak affecting all gallium drivers have been
> addressed.
> 
> The time required to compile GLSL shaders with large amount of uniforms, such
> as Godot, has been improved.
> 
> GLX and swrast have also been improved for better. 
> 
> On the Vulkan side, the ANV driver adjusted the number of images supported
> for gen8 and earlier to 8, while for newer hardware it remains the same 64.
> The RADV driver has seens a collection of stability improvements and fix for
> the Vulkan version advertised in some corner cases.
> 
> To top it all up, the meson build system has seen a steady amount of fixes:
> the DSO version for the VDPAU drivers is now included, ICC compatibility
> patches, opencl handling and relaxed handling when building osmesa.
> 
> 
> Take a look at section "Mesa stable queue" for more information.
> 
> 
> Testing reports/general approval
> 
> Any testing reports (or general approval of the state of the branch) will be
> greatly appreciated.
> 
> The plan is to have 18.3.3 this thursday (31st January 2019), around or
> shortly after 18:00 GMT.
> 
> If you have any questions or suggestions - be that about the current patch
> queue or otherwise, please go ahead.
> 
> 
> Trivial merge conflicts
> ---
> 
> commit b280cdb59e38af5d10e148fb5f2ff5e29503bf10
> Author: Carsten Haitzler (Rasterman) 
> 
> vc4: Use named parameters for the NEON inline asm.
> 
> (cherry picked from commiti 522f68847152e9111def094f7fb35b44f3d0fc80)
> 
> 
> commit 813f0a82960ed637e862ee596cef23c6574a7888
> Author: Carsten Haitzler (Rasterman) 
> 
> vc4: Declare the cpu pointers as being modified in NEON asm.
> 
> (cherry picked from commit 300d3ae8b1445b5060f92c77c0f577f4b7b2c7d6)
> (cherry picked from commit 385843ac3ce1b868d9e24fcb2dbc0c8d5f5a7c99)
> 
> 
> commit 5e8af9e609632f7062bf9dc81b44c6877a7d8ee4
> Author: Tomeu Vizoso 
> 
> etnaviv: Consolidate buffer references from framebuffers
> 
> (cherry picked from commit bf1dfcc3e8120400c9a78d03dd914a786728b5f7)
> 
> 
> 
> Cheers,
> Emil
> 
> 
> Mesa stable queue
> -
> 
> Nominated (4)
> ==
> 
> Ernestas Kulik (2):
>   f6e49d5ad0f vc4: Fix leak in HW queries error path
>   90458bef544 v3d: Fix leak in resource setup error path
> 
> Jason Ekstrand (1):
>   cf42b0f9e2d intel/fs: Handle IMAGE_SIZE in size_read() and
> is_send_from_grf()
> 
> Rob Clark (1):
>   c3baa077bf6 freedreno: stop frob'ing pipe_resource::nr_samples
> 
> 
> Queued (45)
> ===
> 
> Andres Gomez (2):
>   bin/get-pick-list.sh: fix the oneline printing
>   bin/get-pick-list.sh: fix redirection in sh
> 
> Axel Davy (1):
>   st/nine: Immediately upload user provided textures
> 
> Bas Nieuwenhuizen (3):
>   radv: Only use 32 KiB per threadgroup on Stoney.
>   radv: Set partial_vs_wave for pipelines with just GS, not tess.
>   nir: Account for atomics in copy propagation.
> 
> Bruce Cherniak (1):
>   gallium/swr: Fix multi-context sync fence deadlock.
> 
> Carsten Haitzler (Rasterman) (2):
>   vc4: Use named parameters for the NEON inline asm.
>   vc4: Declare the cpu pointers as being modified in NEON asm.
> Squashed with:
>   vc4: Declare the last cpu pointer as being modified in NEON asm.
> 
> Dave Airlie (3):
>   dri_interface: add put shm image2 (v2)
>   glx: add support for putimageshm2 path (v2)
>   gallium: use put image shm2 path (v2)
> 
> Dylan Baker (4):
>   meson: allow