[Mesa-dev] [PATCH v2] glsl: move uniform calculation to link_uniforms

2016-01-19 Thread Tapani Pälli
Patch moves uniform calculation to happen during link_uniforms, this
is possible with help of UniformRemapTable that has all the reserved
locations.

Location assignment for implicit locations is changed so that we
utilize also the 'holes' that explicit uniform location assignment
might have left in UniformRemapTable, this makes it possible to fit
more uniforms as previously we were lazy here and wasting space.

Fixes following CTS tests:
   ES31-CTS.explicit_uniform_location.uniform-loc-mix-with-implicit-max
   ES31-CTS.explicit_uniform_location.uniform-loc-mix-with-implicit-max-array

v2: code cleanups (Matt), increment NumUniformRemapTable correctly
(Timothy), fix find_empty_block to work like intended (sigh) and
add some more comments.

Signed-off-by: Tapani Pälli 
---
 src/glsl/link_uniforms.cpp | 87 --
 src/glsl/linker.cpp| 19 --
 src/glsl/linker.h  |  3 +-
 3 files changed, 85 insertions(+), 24 deletions(-)

diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 33b2d4c..76ee70d 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -1057,9 +1057,40 @@ assign_hidden_uniform_slot_id(const char *name, unsigned 
hidden_id,
uniform_size->map->put(hidden_uniform_start + hidden_id, name);
 }
 
+/**
+ * Search UniformRemapTable for empty block big enough to hold given uniform.
+ * TODO Optimize this algorithm later if it turns out to be a major bottleneck.
+ */
+static int
+find_empty_block(struct gl_shader_program *prog,
+ struct gl_uniform_storage *uniform)
+{
+   const unsigned entries = MAX2(1, uniform->array_elements);
+   for (unsigned i = 0, j; i < prog->NumUniformRemapTable; i++) {
+  /* We found empty space in UniformRemapTable. */
+  if (prog->UniformRemapTable[i] == NULL) {
+ for (j = i; j < entries && j < prog->NumUniformRemapTable; j++) {
+if (prog->UniformRemapTable[j] != NULL) {
+   /* Entries do not fit in this space, continue searching
+* after this location.
+*/
+   i = j + 1;
+   break;
+}
+ }
+ /* Entries fit, we can return this location. */
+ if (i != j + 1) {
+return i;
+ }
+  }
+   }
+   return -1;
+}
+
 void
 link_assign_uniform_locations(struct gl_shader_program *prog,
-  unsigned int boolean_true)
+  unsigned int boolean_true,
+  unsigned int max_locations)
 {
ralloc_free(prog->UniformStorage);
prog->UniformStorage = NULL;
@@ -1150,6 +1181,20 @@ link_assign_uniform_locations(struct gl_shader_program 
*prog,
 
parcel_out_uniform_storage parcel(prog->UniformHash, uniforms, data);
 
+   unsigned total_entries = 0;
+
+   /* Calculate amount of 'holes' left after explicit locations were
+* reserved from UniformRemapTable.
+*/
+   unsigned empty_locs = 0;
+   for (unsigned i = 0; i < prog->NumUniformRemapTable; i++)
+  if (prog->UniformRemapTable[i] == NULL)
+ empty_locs++;
+
+   /* Add all the reserved explicit locations - empty locations in remap 
table. */
+   if (prog->NumUniformRemapTable)
+  total_entries = (prog->NumUniformRemapTable - 1) - empty_locs;
+
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
   if (prog->_LinkedShaders[i] == NULL)
 continue;
@@ -1213,21 +1258,43 @@ link_assign_uniform_locations(struct gl_shader_program 
*prog,
   /* how many new entries for this uniform? */
   const unsigned entries = MAX2(1, uniforms[i].array_elements);
 
-  /* resize remap table to fit new entries */
-  prog->UniformRemapTable =
- reralloc(prog,
-  prog->UniformRemapTable,
-  gl_uniform_storage *,
-  prog->NumUniformRemapTable + entries);
+  /* Find UniformRemapTable for empty blocks where we can fit this 
uniform. */
+  int chosen_location = -1;
+
+  if (empty_locs)
+ chosen_location = find_empty_block(prog, [i]);
+
+  if (chosen_location != -1) {
+ empty_locs -= entries;
+  } else {
+ chosen_location = prog->NumUniformRemapTable;
+
+ /* Add new entries to the total amount of entries. */
+ total_entries += entries;
+
+ /* resize remap table to fit new entries */
+ prog->UniformRemapTable =
+reralloc(prog,
+ prog->UniformRemapTable,
+ gl_uniform_storage *,
+ prog->NumUniformRemapTable + entries);
+ prog->NumUniformRemapTable += entries;
+  }
 
   /* set pointers for this uniform */
   for (unsigned j = 0; j < entries; j++)
- prog->UniformRemapTable[prog->NumUniformRemapTable+j] = [i];
+ prog->UniformRemapTable[chosen_location + j] = [i];
 
   /* set the base location in remap 

Re: [Mesa-dev] [PATCH 01/10] tgsi: add MEMBAR opcode to handle memoryBarrier* GLSL intrinsics

2016-01-19 Thread Marek Olšák
On Tue, Jan 19, 2016 at 3:25 AM, Ilia Mirkin  wrote:
> On Mon, Jan 18, 2016 at 6:06 AM, Marek Olšák  wrote:
>> For 1-4,
>>
>> Reviewed-by: Marek Olšák 
>>
>> I'm not very familiar with the code in 2, but the changes seem reasonable.
>>
>> Also, and I know this is not your mistake, but still, mtypes.h has:
>>
>> struct gl_atomic_buffer_binding
>>   AtomicBufferBindings[MAX_COMBINED_ATOMIC_BUFFERS];
>>
>> But it should be:
>>
>> struct gl_atomic_buffer_binding
>>   AtomicBufferBindings[16 /* or use a proper definition here */];
>>
>> It's not possible to use more than MaxAtomicBufferBindings, because
>> the slots are shared among all shader stages.
>
> Besides the suboptimal name, I don't see what's so terribly wrong
> about this. They're saying there are 15*6 binding points (the value of
> MAX_COMBINED_ATOMIC_BUFFERS), and that's also the
> MaxCombinedAtomicBuffers thing. I guess MaxCombinedAtomicBuffers
> should be N * the max # of bindings? So this is a bit more restrictive
> than it could be, but... meh.

I don't understand. AtomicBufferBindings are binding points. There is
a fixed number of binding points shared by all shader stages, for
example 15. It's not possible to bind a buffer above that. From the
spec:

"BindBufferBase and BindBufferRange will generate an INVALID_VALUE
error if  is greater than or equal to the value of
MAX_ATOMIC_COUNTER_BUFFER_BINDINGS".

This is why AtomicBufferBindings[MAX_COMBINED_ATOMIC_BUFFERS] wastes a
lot of memory.

MAX_COMBINED_ATOMIC_BUFFERS is about shader references, not bindings.
You can have 15 global bindings (slots), but 5 shaders can reference
them, which leads to 15*5 references. This is a thing that only the
linker should care about. It's only possible to get the maximum if all
5 stages use the same slots (because there are only 15). The spec is
pretty clear about it:

"If an atomic counter buffer is used by multiple shaders, each such
use counts separately against this combined limit. The combined atomic
counter buffer use limit can be obtained by calling GetIntegerv with a
 of MAX_COMBINED_ATOMIC_COUNTER_BUFFERS"

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 07/10] st/mesa: add support for memory barrier intrinsics

2016-01-19 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Tue, Jan 19, 2016 at 3:30 AM, Ilia Mirkin  wrote:
> Signed-off-by: Ilia Mirkin 
>
> v1 -> v2: use TGSI_MEMBAR defines
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 45 
> ++
>  1 file changed, 45 insertions(+)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 666b3d8..870991d 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -451,6 +451,7 @@ public:
>
> void visit_atomic_counter_intrinsic(ir_call *);
> void visit_ssbo_intrinsic(ir_call *);
> +   void visit_membar_intrinsic(ir_call *);
>
> st_src_reg result;
>
> @@ -3299,6 +3300,40 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
>  }
>
>  void
> +glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir)
> +{
> +   const char *callee = ir->callee->function_name();
> +
> +   if (!strcmp("__intrinsic_memory_barrier", callee))
> +  emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
> +   st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
> +  TGSI_MEMBAR_ATOMIC_BUFFER |
> +  TGSI_MEMBAR_SHADER_IMAGE |
> +  TGSI_MEMBAR_SHARED));
> +   else if (!strcmp("__intrinsic_memory_barrier_atomic_counter", callee))
> +  emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
> +   st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER));
> +   else if (!strcmp("__intrinsic_memory_barrier_buffer", callee))
> +  emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
> +   st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER));
> +   else if (!strcmp("__intrinsic_memory_barrier_image", callee))
> +  emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
> +   st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE));
> +   else if (!strcmp("__intrinsic_memory_barrier_shared", callee))
> +  emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
> +   st_src_reg_for_int(TGSI_MEMBAR_SHARED));
> +   else if (!strcmp("__intrinsic_group_memory_barrier", callee))
> +  emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
> +   st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
> +  TGSI_MEMBAR_ATOMIC_BUFFER |
> +  TGSI_MEMBAR_SHADER_IMAGE |
> +  TGSI_MEMBAR_SHARED |
> +  TGSI_MEMBAR_THREAD_GROUP));
> +   else
> +  assert(!"Unexpected memory barrier intrinsic");
> +}
> +
> +void
>  glsl_to_tgsi_visitor::visit(ir_call *ir)
>  {
> glsl_to_tgsi_instruction *call_inst;
> @@ -3329,6 +3364,16 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
>return;
> }
>
> +   if (!strcmp("__intrinsic_memory_barrier", callee) ||
> +   !strcmp("__intrinsic_memory_barrier_atomic_counter", callee) ||
> +   !strcmp("__intrinsic_memory_barrier_buffer", callee) ||
> +   !strcmp("__intrinsic_memory_barrier_image", callee) ||
> +   !strcmp("__intrinsic_memory_barrier_shared", callee) ||
> +   !strcmp("__intrinsic_group_memory_barrier", callee)) {
> +  visit_membar_intrinsic(ir);
> +  return;
> +   }
> +
> entry = get_function_signature(sig);
> /* Process in parameters. */
> foreach_two_lists(formal_node, >parameters,
> --
> 2.4.10
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glsl: move uniform calculation to link_uniforms

2016-01-19 Thread Lofstedt, Marta
This seem a bit suboptimal, since the same space is potentially searched 
multiple times. However, I believe that a better solution would be to use some 
other data structure which would probably require quite a big effort, so for 
now, this is:

Reviewed-by: Marta Lofstedt 


> -Original Message-
> From: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] On
> Behalf Of Tapani Pälli
> Sent: Tuesday, January 19, 2016 11:17 AM
> To: mesa-dev@lists.freedesktop.org
> Subject: [Mesa-dev] [PATCH v2] glsl: move uniform calculation to
> link_uniforms
> 
> Patch moves uniform calculation to happen during link_uniforms, this is
> possible with help of UniformRemapTable that has all the reserved locations.
> 
> Location assignment for implicit locations is changed so that we utilize also
> the 'holes' that explicit uniform location assignment might have left in
> UniformRemapTable, this makes it possible to fit more uniforms as
> previously we were lazy here and wasting space.
> 
> Fixes following CTS tests:
>ES31-CTS.explicit_uniform_location.uniform-loc-mix-with-implicit-max
>ES31-CTS.explicit_uniform_location.uniform-loc-mix-with-implicit-max-
> array
> 
> v2: code cleanups (Matt), increment NumUniformRemapTable correctly
> (Timothy), fix find_empty_block to work like intended (sigh) and
> add some more comments.
> 
> Signed-off-by: Tapani Pälli 
> ---
>  src/glsl/link_uniforms.cpp | 87
> --
>  src/glsl/linker.cpp| 19 --
>  src/glsl/linker.h  |  3 +-
>  3 files changed, 85 insertions(+), 24 deletions(-)
> 
> diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index
> 33b2d4c..76ee70d 100644
> --- a/src/glsl/link_uniforms.cpp
> +++ b/src/glsl/link_uniforms.cpp
> @@ -1057,9 +1057,40 @@ assign_hidden_uniform_slot_id(const char
> *name, unsigned hidden_id,
> uniform_size->map->put(hidden_uniform_start + hidden_id, name);  }
> 
> +/**
> + * Search UniformRemapTable for empty block big enough to hold given
> uniform.
> + * TODO Optimize this algorithm later if it turns out to be a major
> bottleneck.
> + */
> +static int
> +find_empty_block(struct gl_shader_program *prog,
> + struct gl_uniform_storage *uniform) {
> +   const unsigned entries = MAX2(1, uniform->array_elements);
> +   for (unsigned i = 0, j; i < prog->NumUniformRemapTable; i++) {
> +  /* We found empty space in UniformRemapTable. */
> +  if (prog->UniformRemapTable[i] == NULL) {
> + for (j = i; j < entries && j < prog->NumUniformRemapTable; j++) {
> +if (prog->UniformRemapTable[j] != NULL) {
> +   /* Entries do not fit in this space, continue searching
> +* after this location.
> +*/
> +   i = j + 1;
> +   break;
> +}
> + }
> + /* Entries fit, we can return this location. */
> + if (i != j + 1) {
> +return i;
> + }
> +  }
> +   }
> +   return -1;
> +}
> +
>  void
>  link_assign_uniform_locations(struct gl_shader_program *prog,
> -  unsigned int boolean_true)
> +  unsigned int boolean_true,
> +  unsigned int max_locations)
>  {
> ralloc_free(prog->UniformStorage);
> prog->UniformStorage = NULL;
> @@ -1150,6 +1181,20 @@ link_assign_uniform_locations(struct
> gl_shader_program *prog,
> 
> parcel_out_uniform_storage parcel(prog->UniformHash, uniforms, data);
> 
> +   unsigned total_entries = 0;
> +
> +   /* Calculate amount of 'holes' left after explicit locations were
> +* reserved from UniformRemapTable.
> +*/
> +   unsigned empty_locs = 0;
> +   for (unsigned i = 0; i < prog->NumUniformRemapTable; i++)
> +  if (prog->UniformRemapTable[i] == NULL)
> + empty_locs++;
> +
> +   /* Add all the reserved explicit locations - empty locations in remap 
> table.
> */
> +   if (prog->NumUniformRemapTable)
> +  total_entries = (prog->NumUniformRemapTable - 1) - empty_locs;
> +
> for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
>if (prog->_LinkedShaders[i] == NULL)
>continue;
> @@ -1213,21 +1258,43 @@ link_assign_uniform_locations(struct
> gl_shader_program *prog,
>/* how many new entries for this uniform? */
>const unsigned entries = MAX2(1, uniforms[i].array_elements);
> 
> -  /* resize remap table to fit new entries */
> -  prog->UniformRemapTable =
> - reralloc(prog,
> -  prog->UniformRemapTable,
> -  gl_uniform_storage *,
> -  prog->NumUniformRemapTable + entries);
> +  /* Find UniformRemapTable for empty blocks where we can fit this
> uniform. */
> +  int chosen_location = -1;
> +
> +  if (empty_locs)
> + chosen_location = find_empty_block(prog, [i]);
> +
> +  if (chosen_location != 

Re: [Mesa-dev] [RFC PATCH] mesa: Add MESA_SHADER_CAPTURE_PATH for writing .shader_test files.

2016-01-19 Thread Marek Olšák
On Mon, Jan 18, 2016 at 6:12 PM, Eero Tamminen
 wrote:
> Hi,
>
> On 18.01.2016 17:05, Kenneth Graunke wrote:
>>
>> This writes linked shader programs to .shader_test files to
>> $MESA_SHADER_CAPTURE_PATH in the format used by shader-db
>> (http://cgit.freedesktop.org/mesa/shader-db).
>>
>> It supports both GLSL shaders and ARB programs.  All stages that
>> are linked together are written in a single .shader_test file.
>>
>> This eliminates the need for shader-db's split-to-files.py, as Mesa
>> produces the desired format directly.  It's much more reliable than
>> parsing stdout/stderr, as those may contain extraneous messages, or
>> simply be closed by the application and unavailable.
>
>
> It also has the advantage that if IDs get recycled, it will just overwrite
> earlier file with the newer one, whereas split-to-files.py ignored later
> shader using same ID. Shaders used e.g. for game menus and startup are less
> interesting than ones used during gameplay.
>
> If one wants startup shaders not to be overwritten, one can just kill the
> program at that point.  One could also somewhat differentiate shaders to
> different game stages (startup, menus, level 1, level 2) by their file
> time stamps.

Wait, are you saying that this capturing method will overwrite shaders
sharing the same ID, leading to incomplete shader dumps? That's quite
a showstopper in my opinion.

The method we use is:
ST_DUMP_SHADERS=file
split-to-files.py file ..

There is no editing required.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/10] tgsi: add MEMBAR opcode to handle memoryBarrier* GLSL intrinsics

2016-01-19 Thread Ilia Mirkin
On Tue, Jan 19, 2016 at 5:49 AM, Marek Olšák  wrote:
> On Tue, Jan 19, 2016 at 3:25 AM, Ilia Mirkin  wrote:
>> On Mon, Jan 18, 2016 at 6:06 AM, Marek Olšák  wrote:
>>> For 1-4,
>>>
>>> Reviewed-by: Marek Olšák 
>>>
>>> I'm not very familiar with the code in 2, but the changes seem reasonable.
>>>
>>> Also, and I know this is not your mistake, but still, mtypes.h has:
>>>
>>> struct gl_atomic_buffer_binding
>>>   AtomicBufferBindings[MAX_COMBINED_ATOMIC_BUFFERS];
>>>
>>> But it should be:
>>>
>>> struct gl_atomic_buffer_binding
>>>   AtomicBufferBindings[16 /* or use a proper definition here */];
>>>
>>> It's not possible to use more than MaxAtomicBufferBindings, because
>>> the slots are shared among all shader stages.
>>
>> Besides the suboptimal name, I don't see what's so terribly wrong
>> about this. They're saying there are 15*6 binding points (the value of
>> MAX_COMBINED_ATOMIC_BUFFERS), and that's also the
>> MaxCombinedAtomicBuffers thing. I guess MaxCombinedAtomicBuffers
>> should be N * the max # of bindings? So this is a bit more restrictive
>> than it could be, but... meh.
>
> I don't understand. AtomicBufferBindings are binding points. There is
> a fixed number of binding points shared by all shader stages, for
> example 15. It's not possible to bind a buffer above that. From the
> spec:
>
> "BindBufferBase and BindBufferRange will generate an INVALID_VALUE
> error if  is greater than or equal to the value of
> MAX_ATOMIC_COUNTER_BUFFER_BINDINGS".
>
> This is why AtomicBufferBindings[MAX_COMBINED_ATOMIC_BUFFERS] wastes a
> lot of memory.
>
> MAX_COMBINED_ATOMIC_BUFFERS is about shader references, not bindings.
> You can have 15 global bindings (slots), but 5 shaders can reference
> them, which leads to 15*5 references. This is a thing that only the
> linker should care about. It's only possible to get the maximum if all
> 5 stages use the same slots (because there are only 15). The spec is
> pretty clear about it:
>
> "If an atomic counter buffer is used by multiple shaders, each such
> use counts separately against this combined limit. The combined atomic
> counter buffer use limit can be obtained by calling GetIntegerv with a
>  of MAX_COMBINED_ATOMIC_COUNTER_BUFFERS"
>
> Marek

Everything you say is correct, however it's perfectly legal to have N
maximum binding points as well as N maximum combined buffers. Yes,
they're counting, in essence, different things, but those numbers are
not inconsistent. You could have 1000 binding points, and 10
maximum combined buffers. The one thing that would never make sense
would be having more than 6 * binding points as the max combined
buffers (assuming compute gets counted).

Anyways, like you say, some optimization should be possible here.
Perhaps Francisco can elaborate as to why he picked these numbers.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glsl: move uniform calculation to link_uniforms

2016-01-19 Thread Ilia Mirkin
The data structure is a (memory) heap... there appears to be one in
mesa/main/mm.h. There's also one in nouveau_heap.h which is quite
simple and totally unreliant on nouveau, just happens to be there. How
hard would it be to integrate something like that?

The trouble with adding slow things is that you forget about them, and
they're not _that_ slow, but this stuff adds up.

  -ilia

On Tue, Jan 19, 2016 at 6:05 AM, Lofstedt, Marta
 wrote:
> This seem a bit suboptimal, since the same space is potentially searched 
> multiple times. However, I believe that a better solution would be to use 
> some other data structure which would probably require quite a big effort, so 
> for now, this is:
>
> Reviewed-by: Marta Lofstedt 
>
>
>> -Original Message-
>> From: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] On
>> Behalf Of Tapani Pälli
>> Sent: Tuesday, January 19, 2016 11:17 AM
>> To: mesa-dev@lists.freedesktop.org
>> Subject: [Mesa-dev] [PATCH v2] glsl: move uniform calculation to
>> link_uniforms
>>
>> Patch moves uniform calculation to happen during link_uniforms, this is
>> possible with help of UniformRemapTable that has all the reserved locations.
>>
>> Location assignment for implicit locations is changed so that we utilize also
>> the 'holes' that explicit uniform location assignment might have left in
>> UniformRemapTable, this makes it possible to fit more uniforms as
>> previously we were lazy here and wasting space.
>>
>> Fixes following CTS tests:
>>ES31-CTS.explicit_uniform_location.uniform-loc-mix-with-implicit-max
>>ES31-CTS.explicit_uniform_location.uniform-loc-mix-with-implicit-max-
>> array
>>
>> v2: code cleanups (Matt), increment NumUniformRemapTable correctly
>> (Timothy), fix find_empty_block to work like intended (sigh) and
>> add some more comments.
>>
>> Signed-off-by: Tapani Pälli 
>> ---
>>  src/glsl/link_uniforms.cpp | 87
>> --
>>  src/glsl/linker.cpp| 19 --
>>  src/glsl/linker.h  |  3 +-
>>  3 files changed, 85 insertions(+), 24 deletions(-)
>>
>> diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index
>> 33b2d4c..76ee70d 100644
>> --- a/src/glsl/link_uniforms.cpp
>> +++ b/src/glsl/link_uniforms.cpp
>> @@ -1057,9 +1057,40 @@ assign_hidden_uniform_slot_id(const char
>> *name, unsigned hidden_id,
>> uniform_size->map->put(hidden_uniform_start + hidden_id, name);  }
>>
>> +/**
>> + * Search UniformRemapTable for empty block big enough to hold given
>> uniform.
>> + * TODO Optimize this algorithm later if it turns out to be a major
>> bottleneck.
>> + */
>> +static int
>> +find_empty_block(struct gl_shader_program *prog,
>> + struct gl_uniform_storage *uniform) {
>> +   const unsigned entries = MAX2(1, uniform->array_elements);
>> +   for (unsigned i = 0, j; i < prog->NumUniformRemapTable; i++) {
>> +  /* We found empty space in UniformRemapTable. */
>> +  if (prog->UniformRemapTable[i] == NULL) {
>> + for (j = i; j < entries && j < prog->NumUniformRemapTable; j++) {
>> +if (prog->UniformRemapTable[j] != NULL) {
>> +   /* Entries do not fit in this space, continue searching
>> +* after this location.
>> +*/
>> +   i = j + 1;
>> +   break;
>> +}
>> + }
>> + /* Entries fit, we can return this location. */
>> + if (i != j + 1) {
>> +return i;
>> + }
>> +  }
>> +   }
>> +   return -1;
>> +}
>> +
>>  void
>>  link_assign_uniform_locations(struct gl_shader_program *prog,
>> -  unsigned int boolean_true)
>> +  unsigned int boolean_true,
>> +  unsigned int max_locations)
>>  {
>> ralloc_free(prog->UniformStorage);
>> prog->UniformStorage = NULL;
>> @@ -1150,6 +1181,20 @@ link_assign_uniform_locations(struct
>> gl_shader_program *prog,
>>
>> parcel_out_uniform_storage parcel(prog->UniformHash, uniforms, data);
>>
>> +   unsigned total_entries = 0;
>> +
>> +   /* Calculate amount of 'holes' left after explicit locations were
>> +* reserved from UniformRemapTable.
>> +*/
>> +   unsigned empty_locs = 0;
>> +   for (unsigned i = 0; i < prog->NumUniformRemapTable; i++)
>> +  if (prog->UniformRemapTable[i] == NULL)
>> + empty_locs++;
>> +
>> +   /* Add all the reserved explicit locations - empty locations in remap 
>> table.
>> */
>> +   if (prog->NumUniformRemapTable)
>> +  total_entries = (prog->NumUniformRemapTable - 1) - empty_locs;
>> +
>> for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
>>if (prog->_LinkedShaders[i] == NULL)
>>continue;
>> @@ -1213,21 +1258,43 @@ link_assign_uniform_locations(struct
>> gl_shader_program *prog,
>>/* how many new entries for this uniform? */
>> 

Re: [Mesa-dev] [PATCH v2] glsl: move uniform calculation to link_uniforms

2016-01-19 Thread Tapani Pälli



On 01/19/2016 01:14 PM, Ilia Mirkin wrote:

The data structure is a (memory) heap... there appears to be one in
mesa/main/mm.h. There's also one in nouveau_heap.h which is quite
simple and totally unreliant on nouveau, just happens to be there. How
hard would it be to integrate something like that?

The trouble with adding slow things is that you forget about them, and
they're not _that_ slow, but this stuff adds up.


The solution I had in mind is to build a list of empty slots when 
allocating remaptable or while finding slots (keep pushing unused empty 
slots to list) ... but if possible I would prefer optimization later. 
First of all, this is quite exotic path to hit with a real program (last 
words ... yes yes). Secondly, and more importantly, we can apply for 
certification sooner, there are very few failures left.




   -ilia

On Tue, Jan 19, 2016 at 6:05 AM, Lofstedt, Marta
 wrote:

This seem a bit suboptimal, since the same space is potentially searched 
multiple times. However, I believe that a better solution would be to use some 
other data structure which would probably require quite a big effort, so for 
now, this is:

Reviewed-by: Marta Lofstedt 



-Original Message-
From: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] On
Behalf Of Tapani Pälli
Sent: Tuesday, January 19, 2016 11:17 AM
To: mesa-dev@lists.freedesktop.org
Subject: [Mesa-dev] [PATCH v2] glsl: move uniform calculation to
link_uniforms

Patch moves uniform calculation to happen during link_uniforms, this is
possible with help of UniformRemapTable that has all the reserved locations.

Location assignment for implicit locations is changed so that we utilize also
the 'holes' that explicit uniform location assignment might have left in
UniformRemapTable, this makes it possible to fit more uniforms as
previously we were lazy here and wasting space.

Fixes following CTS tests:
ES31-CTS.explicit_uniform_location.uniform-loc-mix-with-implicit-max
ES31-CTS.explicit_uniform_location.uniform-loc-mix-with-implicit-max-
array

v2: code cleanups (Matt), increment NumUniformRemapTable correctly
 (Timothy), fix find_empty_block to work like intended (sigh) and
 add some more comments.

Signed-off-by: Tapani Pälli 
---
  src/glsl/link_uniforms.cpp | 87
--
  src/glsl/linker.cpp| 19 --
  src/glsl/linker.h  |  3 +-
  3 files changed, 85 insertions(+), 24 deletions(-)

diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index
33b2d4c..76ee70d 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -1057,9 +1057,40 @@ assign_hidden_uniform_slot_id(const char
*name, unsigned hidden_id,
 uniform_size->map->put(hidden_uniform_start + hidden_id, name);  }

+/**
+ * Search UniformRemapTable for empty block big enough to hold given
uniform.
+ * TODO Optimize this algorithm later if it turns out to be a major
bottleneck.
+ */
+static int
+find_empty_block(struct gl_shader_program *prog,
+ struct gl_uniform_storage *uniform) {
+   const unsigned entries = MAX2(1, uniform->array_elements);
+   for (unsigned i = 0, j; i < prog->NumUniformRemapTable; i++) {
+  /* We found empty space in UniformRemapTable. */
+  if (prog->UniformRemapTable[i] == NULL) {
+ for (j = i; j < entries && j < prog->NumUniformRemapTable; j++) {
+if (prog->UniformRemapTable[j] != NULL) {
+   /* Entries do not fit in this space, continue searching
+* after this location.
+*/
+   i = j + 1;
+   break;
+}
+ }
+ /* Entries fit, we can return this location. */
+ if (i != j + 1) {
+return i;
+ }
+  }
+   }
+   return -1;
+}
+
  void
  link_assign_uniform_locations(struct gl_shader_program *prog,
-  unsigned int boolean_true)
+  unsigned int boolean_true,
+  unsigned int max_locations)
  {
 ralloc_free(prog->UniformStorage);
 prog->UniformStorage = NULL;
@@ -1150,6 +1181,20 @@ link_assign_uniform_locations(struct
gl_shader_program *prog,

 parcel_out_uniform_storage parcel(prog->UniformHash, uniforms, data);

+   unsigned total_entries = 0;
+
+   /* Calculate amount of 'holes' left after explicit locations were
+* reserved from UniformRemapTable.
+*/
+   unsigned empty_locs = 0;
+   for (unsigned i = 0; i < prog->NumUniformRemapTable; i++)
+  if (prog->UniformRemapTable[i] == NULL)
+ empty_locs++;
+
+   /* Add all the reserved explicit locations - empty locations in remap table.
*/
+   if (prog->NumUniformRemapTable)
+  total_entries = (prog->NumUniformRemapTable - 1) - empty_locs;
+
 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
 

Re: [Mesa-dev] [RFC PATCH] mesa: Add MESA_SHADER_CAPTURE_PATH for writing .shader_test files.

2016-01-19 Thread Eero Tamminen

Hi,

On 19.01.2016 13:07, Marek Olšák wrote:

On Mon, Jan 18, 2016 at 6:12 PM, Eero Tamminen

On 18.01.2016 17:05, Kenneth Graunke wrote:

[...]

If one wants startup shaders not to be overwritten, one can just kill the
program at that point.  One could also somewhat differentiate shaders to
different game stages (startup, menus, level 1, level 2) by their file
time stamps.


Wait, are you saying that this capturing method will overwrite shaders
sharing the same ID, leading to incomplete shader dumps? That's quite
a showstopper in my opinion.


The old split-to-files.py overwrote the shaders too:
-
if shadertuple in shaders[prognum]:
  print("Warning: duplicate", shadertype, " shader ", shadernum,
"in program", prognum, "...tossing old shader.")
shaders[prognum][shadertuple] = ''
-

...except for ARB shaders where it just threw towel in on first shader
that re-used same ID:
-
elif arbmatch:
  shadertype = arbmatch.group(1)
  prognum = arbmatch.group(2)
  if prognum in programs:
print("dupe!")
exit(1)
-



The method we use is:
ST_DUMP_SHADERS=file
split-to-files.py file ..

There is no editing required.


At least with Intel backend there was a lot of extra lines that
split-to-files.py often left to the end of shaders, which required
manual cleanup.  Some time ago I posted a rewrite of split-to-files
which fixed that (among many other of its shortcomings):
http://lists.freedesktop.org/archives/mesa-dev/2015-December/102761.html


- Eero

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Trigger CS state reemission when new sampler state is uploaded.

2016-01-19 Thread Lofstedt, Marta
Thanks Curro,

Reviewed-by: Marta Lofstedt 


> -Original Message-
> From: Francisco Jerez [mailto:curroje...@riseup.net]
> Sent: Monday, January 18, 2016 9:37 PM
> To: mesa-dev@lists.freedesktop.org
> Cc: Lofstedt, Marta; Justen, Jordan L
> Subject: [PATCH 1/2] i965: Trigger CS state reemission when new sampler
> state is uploaded.
> 
> This reuses the NEW_SAMPLER_STATE_TABLE state bit (currently only used
> on pre-Gen7 hardware) to signal that the sampler state tables have changed
> in order to make sure that the GPGPU interface descriptor is updated.
> ---
>  src/mesa/drivers/dri/i965/brw_sampler_state.c | 2 +-
>  src/mesa/drivers/dri/i965/gen7_cs_state.c | 1 +
>  2 files changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c
> b/src/mesa/drivers/dri/i965/brw_sampler_state.c
> index d181468..24798a5 100644
> --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c
> @@ -582,7 +582,7 @@ brw_upload_sampler_state_table(struct
> brw_context *brw,
>batch_offset_for_sampler_state += size_in_bytes;
> }
> 
> -   if (brw->gen >= 7) {
> +   if (brw->gen >= 7 && stage_state->stage != MESA_SHADER_COMPUTE) {
>/* Emit a 3DSTATE_SAMPLER_STATE_POINTERS_XS packet. */
>gen7_emit_sampler_state_pointers_xs(brw, stage_state);
> } else {
> diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c
> b/src/mesa/drivers/dri/i965/gen7_cs_state.c
> index a025bb9..6d6988c 100644
> --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
> @@ -196,6 +196,7 @@ const struct brw_tracked_state brw_cs_state = {
>.brw = BRW_NEW_BATCH |
>   BRW_NEW_CS_PROG_DATA |
>   BRW_NEW_PUSH_CONSTANT_ALLOCATION |
> + BRW_NEW_SAMPLER_STATE_TABLE |
>   BRW_NEW_SURFACES,
> },
> .emit = brw_upload_cs_state
> --
> 2.7.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Implement compute sampler state atom.

2016-01-19 Thread Lofstedt, Marta
Reviewed-by: Marta Lofstedt 

> -Original Message-
> From: Francisco Jerez [mailto:curroje...@riseup.net]
> Sent: Monday, January 18, 2016 9:37 PM
> To: mesa-dev@lists.freedesktop.org
> Cc: Lofstedt, Marta; Justen, Jordan L
> Subject: [PATCH 2/2] i965: Implement compute sampler state atom.
> 
> Fixes a number of GLES31 CTS failures and hangs on various hardware:
> 
>  ES31-CTS.texture_gather.plain-gather-depth-2d
>  ES31-CTS.texture_gather.plain-gather-depth-2darray
>  ES31-CTS.texture_gather.plain-gather-depth-cube
>  ES31-CTS.texture_gather.offset-gather-depth-2d
>  ES31-CTS.texture_gather.offset-gather-depth-2darray
>  ES31-
> CTS.layout_binding.sampler2D_layout_binding_texture_ComputeShader
>  ES31-
> CTS.layout_binding.sampler2DArray_layout_binding_texture_ComputeShad
> er
>  ES31-CTS.explicit_uniform_location.uniform-loc-types-samplers
>  ES31-CTS.compute_shader.resources-texture
> 
> Some of them were actually passing by luck on some generations even
> though we weren't uploading sampler state tables explicitly for the compute
> stage, most likely because they relied on the cached sampler state left from
> previous rendering to be close enough.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92589
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93312
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93325
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93407
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93725
> Reported-by: Marta Lofstedt 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h   |  2 +-
>  src/mesa/drivers/dri/i965/brw_sampler_state.c | 20
> 
>  src/mesa/drivers/dri/i965/brw_state.h |  1 +
>  src/mesa/drivers/dri/i965/brw_state_upload.c  |  2 ++
>  4 files changed, 24 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h
> b/src/mesa/drivers/dri/i965/brw_context.h
> index b80db00..2a29dfe 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1264,7 +1264,7 @@ struct brw_context
> 
> int num_atoms[BRW_NUM_PIPELINES];
> const struct brw_tracked_state render_atoms[76];
> -   const struct brw_tracked_state compute_atoms[10];
> +   const struct brw_tracked_state compute_atoms[11];
> 
> /* If (INTEL_DEBUG & DEBUG_BATCH) */
> struct {
> diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c
> b/src/mesa/drivers/dri/i965/brw_sampler_state.c
> index 24798a5..c20a028 100644
> --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c
> @@ -693,3 +693,23 @@ const struct brw_tracked_state brw_tes_samplers =
> {
> },
> .emit = brw_upload_tes_samplers,
>  };
> +
> +static void
> +brw_upload_cs_samplers(struct brw_context *brw) {
> +   /* BRW_NEW_COMPUTE_PROGRAM */
> +   struct gl_program *cs = (struct gl_program *) brw->compute_program;
> +   if (!cs)
> +  return;
> +
> +   brw_upload_sampler_state_table(brw, cs, >cs.base); }
> +
> +const struct brw_tracked_state brw_cs_samplers = {
> +   .dirty = {
> +  .mesa = _NEW_TEXTURE,
> +  .brw = BRW_NEW_BATCH |
> + BRW_NEW_COMPUTE_PROGRAM,
> +   },
> +   .emit = brw_upload_cs_samplers,
> +};
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h
> b/src/mesa/drivers/dri/i965/brw_state.h
> index 7d61b7c..f44ccd6 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -75,6 +75,7 @@ extern const struct brw_tracked_state
> brw_vs_samplers;  extern const struct brw_tracked_state
> brw_tcs_samplers;  extern const struct brw_tracked_state
> brw_tes_samplers;  extern const struct brw_tracked_state
> brw_gs_samplers;
> +extern const struct brw_tracked_state brw_cs_samplers;
>  extern const struct brw_tracked_state brw_vs_ubo_surfaces;  extern const
> struct brw_tracked_state brw_vs_abo_surfaces;  extern const struct
> brw_tracked_state brw_vs_image_surfaces; diff --git
> a/src/mesa/drivers/dri/i965/brw_state_upload.c
> b/src/mesa/drivers/dri/i965/brw_state_upload.c
> index 876e130..ee75ca8 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> @@ -282,6 +282,7 @@ static const struct brw_tracked_state
> *gen7_compute_atoms[] =
> _cs_abo_surfaces,
> _texture_surfaces,
> _cs_work_groups_surface,
> +   _cs_samplers,
> _cs_state,
>  };
> 
> @@ -396,6 +397,7 @@ static const struct brw_tracked_state
> *gen8_compute_atoms[] =
> _cs_abo_surfaces,
> _texture_surfaces,
> _cs_work_groups_surface,
> +   _cs_samplers,
> _cs_state,
>  };
> 
> --
> 2.7.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Restore Mesa-style to shader_enums.c/h.

2016-01-19 Thread Brian Paul

On 01/15/2016 02:44 PM, Matt Turner wrote:

On Fri, Jan 15, 2016 at 1:39 PM, Ilia Mirkin  wrote:



? (Along with "extern" thrown in for no apparent reason.)


Heh :)


IIRC, many years ago there was some odd compiler that warned if function 
prototypes didn't have 'extern' (AIX? Ultrix?).


-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] i965: Provide sse2 version for rgba8 <-> bgra8 swizzle

2016-01-19 Thread Roland Scheidegger
Ping?

Am 17.01.2016 um 22:49 schrieb srol...@vmware.com:
> From: Roland Scheidegger 
> 
> The existing code used ssse3, and because it isn't compiled in a separate
> file compiled with that, it is usually not used (that, of course, could
> be fixed...), whereas sse2 is always present at least with 64bit builds.
> It is actually trivial to do with sse2 without pshufb, on some cpus (I'm
> looking at you, atom!) it might not even be slower.
> This is compile-tested only, it doesn't actually do what I really want
> (which is glReadPixels without doing byte access from an uncached region,
> which is what you'll get on intel chips not having llc, if your cpu doesn't
> support sse41 (in which case the rb would be copied over with movntdqa instead
> of mapped, so mesa format_utils byte swapping conversion will then access
> the cached region instead of the uncached one) - really need sse2-optimized
> convert_ubyte functions for a proper fix, otherwise google maps in firefox is
> reduced to fps below 1 fps), but hey why not. I don't have a gpu which could
> possibly hit this, albeit I succesfully used the exact same code elsewhere.
> 
> v2: fix andnot argument order, add comments
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 18 +++
>  src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 73 
> +-
>  2 files changed, 79 insertions(+), 12 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 108dd87..5fc4212 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -2773,6 +2773,24 @@ intel_miptree_map(struct brw_context *brw,
> } else if (!(mode & GL_MAP_WRITE_BIT) &&
>!mt->compressed && cpu_has_sse4_1 &&
>(mt->pitch % 16 == 0)) {
> +  /*
> +   * XXX: without sse4_1, in some situations still really want to copy
> +   * regardless. Presumably this is not done for performance reasons - 
> +   * with movntdqa thanks to the 64byte streaming load buffer the
> +   * uncached->cached copy followed by cached->cached later is always
> +   * faster than doing "ordinary" uncached->cached copy.
> +   * Without movntdqa, of course an additional copy doesn't help, albeit
> +   * it has to be said the uncached->cached one is an order of magnitude
> +   * slower than the later cached->cached one in any case.
> +   * But mesa may not do a simple memcpy on that memory later - some
> +   * glReadPixels paths for instance will well hit per-byte access which
> +   * is a complete performance killer on uncached memory. So in these
> +   * cases really want to copy regardless, unless the map below could
> +   * play some tricks making the memory cached.
> +   * (Or otherwise ensure mesa can't hit these paths often, for instance
> +   * glReadPixels requiring conversions could be handled by meta, so in
> +   * end it really would be just memcpy.)
> +   */
>intel_miptree_map_movntdqa(brw, mt, map, level, slice);
>  #endif
> } else {
> diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c 
> b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
> index 2383401..42fdde1 100644
> --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
> +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
> @@ -36,10 +36,13 @@
>  #include "brw_context.h"
>  #include "intel_tiled_memcpy.h"
>  
> -#ifdef __SSSE3__
> +#if defined(__SSSE3__)
>  #include 
> +#elif defined(__SSE2__)
> +#include 
>  #endif
>  
> +
>  #define FILE_DEBUG_FLAG DEBUG_TEXTURE
>  
>  #define ALIGN_DOWN(a, b) ROUND_DOWN_TO(a, b)
> @@ -56,23 +59,69 @@ static const uint32_t ytile_width = 128;
>  static const uint32_t ytile_height = 32;
>  static const uint32_t ytile_span = 16;
>  
> -#ifdef __SSSE3__
> +#if defined(__SSSE3__)
>  static const uint8_t rgba8_permutation[16] =
> { 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 };
>  
>  /* NOTE: dst must be 16-byte aligned. src may be unaligned. */
> -#define rgba8_copy_16_aligned_dst(dst, src)\
> -   _mm_store_si128((__m128i *)(dst),   \
> -   _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)(src)), \
> -*(__m128i *) rgba8_permutation))
> +static inline void
> +rgba8_copy_16_aligned_dst(void *dst, const void *src)
> +{
> +   __m128i reg;
> +   reg = _mm_loadu_si128((__m128i *)src);
> +   reg = _mm_shuffle_epi8(reg, *(__m128i *)rgba8_permutation);
> +   _mm_store_si128((__m128i *)dst, reg);
> +}
>  
>  /* NOTE: src must be 16-byte aligned. dst may be unaligned. */
> -#define rgba8_copy_16_aligned_src(dst, src)\
> -   _mm_storeu_si128((__m128i *)(dst),  \
> -_mm_shuffle_epi8(_mm_load_si128((__m128i *)(src)), \
> - 

Re: [Mesa-dev] [RFC 11/63] mesa/main: Add extension tracking bit for ARB_internalformat_query2

2016-01-19 Thread Nanley Chery
On Tue, Jan 19, 2016 at 05:41:51PM +0100, Eduardo Lima Mitev wrote:
> From: Antia Puentes 
> 
> ---
>  src/mesa/main/extensions_table.h | 1 +
>  src/mesa/main/mtypes.h   | 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/src/mesa/main/extensions_table.h 
> b/src/mesa/main/extensions_table.h
> index aeccb01..0d3e635 100644
> --- a/src/mesa/main/extensions_table.h
> +++ b/src/mesa/main/extensions_table.h
> @@ -73,6 +73,7 @@ EXT(ARB_half_float_vertex   , 
> ARB_half_float_vertex
>  EXT(ARB_indirect_parameters , ARB_indirect_parameters
> ,  x , GLC,  x ,  x , 2013)
>  EXT(ARB_instanced_arrays, ARB_instanced_arrays   
> , GLL, GLC,  x ,  x , 2008)
>  EXT(ARB_internalformat_query, ARB_internalformat_query   
> , GLL, GLC,  x ,  x , 2011)
> +EXT(ARB_internalformat_query2, ARB_internalformat_query2 
>   , GLL, GLC,  x ,  x , 2013)
^  ^
Looks like you have an extra space after the first and second entries.

- Nanley

>  EXT(ARB_invalidate_subdata  , dummy_true 
> , GLL, GLC,  x ,  x , 2012)
>  EXT(ARB_map_buffer_alignment, dummy_true 
> , GLL, GLC,  x ,  x , 2011)
>  EXT(ARB_map_buffer_range, ARB_map_buffer_range   
> , GLL, GLC,  x ,  x , 2008)
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 0992d4d..e0e0f4d 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -3775,6 +3775,7 @@ struct gl_extensions
> GLboolean ARB_indirect_parameters;
> GLboolean ARB_instanced_arrays;
> GLboolean ARB_internalformat_query;
> +   GLboolean ARB_internalformat_query2;
> GLboolean ARB_map_buffer_range;
> GLboolean ARB_occlusion_query;
> GLboolean ARB_occlusion_query2;
> -- 
> 2.5.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/6] Move nir/glsl to src/compiler, take 2

2016-01-19 Thread Matt Turner
On Mon, Jan 18, 2016 at 4:02 AM, Emil Velikov  wrote:
> Hi all,
>
> Here is another attempt at splitting up nir, glsl and the rest.
>
> The gist is that we have three independent static libraries - compiler,
> nir and glsl. Thus we don't need to pull nir if working with glsl and
> vice versa.
>
> For compilation purposes the three (not to mention mesa ir) _are_
> entwined, yet we can only link against the ones we want.
>
> Changes since last time - restored the parallel build for glsl/nir
>
> The lot can be found in branch nir-glsl-move-v2 at
> https://github.com/evelikov/Mesa/commits/nir-glsl-move
>
> All builds have been tested, although do give it a go on your
> config/setup.

I checked out your branch and tested it. Everything looks fine here.
Thanks Emil!

Acked-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/10] tgsi: add MEMBAR opcode to handle memoryBarrier* GLSL intrinsics

2016-01-19 Thread Francisco Jerez
Marek Olšák  writes:

> On Tue, Jan 19, 2016 at 9:09 PM, Francisco Jerez  
> wrote:
>> Marek Olšák  writes:
>>
>>> On Tue, Jan 19, 2016 at 3:25 AM, Ilia Mirkin  wrote:
 On Mon, Jan 18, 2016 at 6:06 AM, Marek Olšák  wrote:
> For 1-4,
>
> Reviewed-by: Marek Olšák 
>
> I'm not very familiar with the code in 2, but the changes seem reasonable.
>
> Also, and I know this is not your mistake, but still, mtypes.h has:
>
> struct gl_atomic_buffer_binding
>   AtomicBufferBindings[MAX_COMBINED_ATOMIC_BUFFERS];
>
> But it should be:
>
> struct gl_atomic_buffer_binding
>   AtomicBufferBindings[16 /* or use a proper definition here */];
>
> It's not possible to use more than MaxAtomicBufferBindings, because
> the slots are shared among all shader stages.

 Besides the suboptimal name, I don't see what's so terribly wrong
 about this. They're saying there are 15*6 binding points (the value of
 MAX_COMBINED_ATOMIC_BUFFERS), and that's also the
 MaxCombinedAtomicBuffers thing. I guess MaxCombinedAtomicBuffers
 should be N * the max # of bindings? So this is a bit more restrictive
 than it could be, but... meh.
>>>
>>> I don't understand. AtomicBufferBindings are binding points. There is
>>> a fixed number of binding points shared by all shader stages, for
>>> example 15. It's not possible to bind a buffer above that. From the
>>> spec:
>>>
>>> "BindBufferBase and BindBufferRange will generate an INVALID_VALUE
>>> error if  is greater than or equal to the value of
>>> MAX_ATOMIC_COUNTER_BUFFER_BINDINGS".
>>>
>>> This is why AtomicBufferBindings[MAX_COMBINED_ATOMIC_BUFFERS] wastes a
>>> lot of memory.
>>>
>> It only wastes any memory if your driver exposes a
>> GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS less than the
>> MAX_COMBINED_ATOMIC_BUFFERS macro, which might be necessary due to
>> hardware restrictions, and even in that case the "waste" will be a
>> constant amount of memory per context so I'm not particularly concerned
>> about it, you could avoid it by allocating the array manually based on
>> the MaxAtomicBufferBindings constant provided by the driver, but it
>> would be kind of painful.
>>
>> I didn't choose the binding point array size to be
>> MAX_COMBINED_ATOMIC_BUFFERS by accident, it's the maximum amount of
>> atomic counter binding points that can potentially be useful, because
>> anything greater than that is functionally equivalent to a context with
>> MAX_COMBINED_ATOMIC_BUFFERS binding points since you won't be able to
>> set up a pipeline using more than that many atomic buffers.  The i965
>> driver exposes MAX_COMBINED_ATOMIC_BUFFERS binding points for that
>> reason.
>
> It looks like i965 is wrong even. It sets:
> GL_MAX_ATOMIC_COUNTER_BINDINGS = 90
>
> Therefore, a shader is allowed to do:
> layout(binding = 89, offset = 0) uniform atomic_uint a;
>
> Does that work on i965? It won't report any error as far as I can see.
>
Yes, I'd expect that to work on i965.

> Marek


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] mesa: remove link validation that should be done elsewhere

2016-01-19 Thread Ian Romanick
On 01/05/2016 05:40 PM, Timothy Arceri wrote:
> Even if re-linking fails rendering shouldn't fail as the previous
> succesfully linked program will still be available. It also shouldn't
> be possible to have an unlinked program as part of the current rendering
> state.
> 
> This fixes a subtest in:
> ES31-CTS.sepshaderobjs.StateInteraction
> 
> This change should improve performance on CPU limited benchmarks as noted
> in commit d6c6b186cf308f.
> 
> From Section 7.3 (Program Objects) of the OpenGL 4.5 spec:
> 
>"If a program object that is active for any shader stage is re-linked
> unsuccessfully, the link status will be set to FALSE, but any existing
> executables and associated state will remain part of the current rendering
> state until a subsequent call to UseProgram, UseProgramStages, or
> BindProgramPipeline removes them from use. If such a program is attached 
> to
> any program pipeline object, the existing executables and associated state
> will remain part of the program pipeline object until a subsequent call to
> UseProgramStages removes them from use. An unsuccessfully linked program 
> may
> not be made part of the current rendering state by UseProgram or added to
> program pipeline objects by UseProgramStages until it is successfully
> re-linked."
> 
>"void UseProgram(uint program);
> 
>...
> 
>An INVALID_OPERATION error is generated if program has not been linked, or
>was last linked unsuccessfully.  The current rendering state is not 
> modified."

Right so if there's a problem with the program to begin with, that
should be caught by glUseProgram.  Once a program is in use, calling
glLinkProgram can't break it.  That sounds reasonable... and I'm always
in favor of removing things from draw-time validation.

Hmm... there is another potential for problem.  At what point does
glUseProgram exit if it detects that the in specified program is already
in use?  I would expect the following sequence to generate an error at
the second glUseProgram:

glUseProgram(p);
glDrawArrays(...);

glShaderSource(s, 1, _with_a_linking_program, NULL);
glAttachShader(p, s);
glLinkProgram(p);  // linking should fail
glUseProgram(p);   // this should generate an error, but...
   // leave the old executable in place?
glDrawArrays(...); // should generate same result as before

That's worth investigating.  It's probably also worth a couple piglit tests.

Either way, this patch is

Reviewed-by: Ian Romanick 

> V2: apply the rule to both core and compat.
> 
> Cc: Tapani Pälli 
> Cc: Ian Romanick 
> Cc: Brian Paul 
> ---
>  src/mesa/main/context.c | 63 
> +++--
>  1 file changed, 3 insertions(+), 60 deletions(-)
> 
> diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
> index be983d4..f3fd01f 100644
> --- a/src/mesa/main/context.c
> +++ b/src/mesa/main/context.c
> @@ -1930,31 +1930,6 @@ _mesa_check_blend_func_error(struct gl_context *ctx)
> return GL_TRUE;
>  }
>  
> -static bool
> -shader_linked_or_absent(struct gl_context *ctx,
> -const struct gl_shader_program *shProg,
> -bool *shader_present, const char *where)
> -{
> -   if (shProg) {
> -  *shader_present = true;
> -
> -  if (!shProg->LinkStatus) {
> - _mesa_error(ctx, GL_INVALID_OPERATION, "%s(shader not linked)", 
> where);
> - return false;
> -  }
> -#if 0 /* not normally enabled */
> -  {
> - char errMsg[100];
> - if (!_mesa_validate_shader_program(ctx, shProg, errMsg)) {
> -_mesa_warning(ctx, "Shader program %u is invalid: %s",
> -  shProg->Name, errMsg);
> - }
> -  }
> -#endif
> -   }
> -
> -   return true;
> -}
>  
>  /**
>   * Prior to drawing anything with glBegin, glDrawArrays, etc. this function
> @@ -1967,54 +1942,22 @@ shader_linked_or_absent(struct gl_context *ctx,
>  GLboolean
>  _mesa_valid_to_render(struct gl_context *ctx, const char *where)
>  {
> -   unsigned i;
> -
> /* This depends on having up to date derived state (shaders) */
> if (ctx->NewState)
>_mesa_update_state(ctx);
>  
> -   if (ctx->API == API_OPENGL_CORE || ctx->API == API_OPENGLES2) {
> -  bool from_glsl_shader[MESA_SHADER_COMPUTE] = { false };
> -
> -  for (i = 0; i < MESA_SHADER_COMPUTE; i++) {
> - if (!shader_linked_or_absent(ctx, ctx->_Shader->CurrentProgram[i],
> -  _glsl_shader[i], where))
> -return GL_FALSE;
> -  }
> -
> -  /* In OpenGL Core Profile and OpenGL ES 2.0 / 3.0, there are no 
> assembly
> -   * shaders.  Don't check state related to those.
> -   */
> -   } else {
> -  bool has_vertex_shader = false;
> -  bool has_fragment_shader = false;
> -
> -  /* In 

Re: [Mesa-dev] [PATCH v2] mesa: remove link validation that should be done elsewhere

2016-01-19 Thread Timothy Arceri
On Fri, 2016-01-15 at 08:50 +0200, Tapani Pälli wrote:
> 
> On 01/06/2016 03:40 AM, Timothy Arceri wrote:
> > Even if re-linking fails rendering shouldn't fail as the previous
> > succesfully linked program will still be available. It also
> > shouldn't
> > be possible to have an unlinked program as part of the current
> > rendering
> > state.
> > 
> > This fixes a subtest in:
> > ES31-CTS.sepshaderobjs.StateInteraction
> 
> Which is the last one, after this change this horrible test from
> depths 
> of hell starts to finally pass!

hehe

> 
> It would be cool if Ian and Brian could comment a bit here why these 
> checks were originally done and if they can be safely removed, was is
> just to play 'extra safe'?

I looked at the history a while back so to save everyone some time here
it is.

This was added way back in 2009 by Brian:


commit 56c4226fcc54158eb7fe54eeb13539a979ec155c
Author: Brian Paul 
Date:   Fri Aug 14 10:45:17 2009 -0600

mesa: new _mesa_valid_to_render() function

Tests if the current shader/program is valid and that the
framebuffer is
complete.  To be called by glBegin, glDrawArrays, etc.


There seems to have been only two major changes since then:


commit 84eba3ef71dfa822e5ff0463032cdd2e3515b888
Author: Ian Romanick 
Date:   Wed Oct 13 13:58:44 2010 -0700

Track separate programs for each stage

The assumption is that all stages are the same program or that
varyings are passed between stages using built-in varyings.

commit 79146065f9261a9004359338f1a7b8b5a534ebc3
Author: Ian Romanick 
Date:   Fri Feb 7 21:13:02 2014 -0800

mesa: Refactor per-stage link check to its own function

Signed-off-by: Ian Romanick 
Reviewed-by: Jordan Justen 


To me it looks like this was just added to play safe when adding other
valid to render checks.

> 
> 
> > This change should improve performance on CPU limited benchmarks as
> > noted
> > in commit d6c6b186cf308f.
> > 
> >  From Section 7.3 (Program Objects) of the OpenGL 4.5 spec:
> > 
> > "If a program object that is active for any shader stage is re
> > -linked
> >  unsuccessfully, the link status will be set to FALSE, but any
> > existing
> >  executables and associated state will remain part of the
> > current rendering
> >  state until a subsequent call to UseProgram, UseProgramStages,
> > or
> >  BindProgramPipeline removes them from use. If such a program
> > is attached to
> >  any program pipeline object, the existing executables and
> > associated state
> >  will remain part of the program pipeline object until a
> > subsequent call to
> >  UseProgramStages removes them from use. An unsuccessfully
> > linked program may
> >  not be made part of the current rendering state by UseProgram
> > or added to
> >  program pipeline objects by UseProgramStages until it is
> > successfully
> >  re-linked."
> > 
> > "void UseProgram(uint program);
> > 
> > ...
> > 
> > An INVALID_OPERATION error is generated if program has not been
> > linked, or
> > was last linked unsuccessfully.  The current rendering state is
> > not modified."
> > 
> > V2: apply the rule to both core and compat.
> > 
> > Cc: Tapani Pälli 
> > Cc: Ian Romanick 
> > Cc: Brian Paul 
> > ---
> >   src/mesa/main/context.c | 63 +++-
> > -
> >   1 file changed, 3 insertions(+), 60 deletions(-)
> > 
> > diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
> > index be983d4..f3fd01f 100644
> > --- a/src/mesa/main/context.c
> > +++ b/src/mesa/main/context.c
> > @@ -1930,31 +1930,6 @@ _mesa_check_blend_func_error(struct
> > gl_context *ctx)
> >  return GL_TRUE;
> >   }
> > 
> > -static bool
> > -shader_linked_or_absent(struct gl_context *ctx,
> > -const struct gl_shader_program *shProg,
> > -bool *shader_present, const char *where)
> > -{
> > -   if (shProg) {
> > -  *shader_present = true;
> > -
> > -  if (!shProg->LinkStatus) {
> > - _mesa_error(ctx, GL_INVALID_OPERATION, "%s(shader not
> > linked)", where);
> > - return false;
> > -  }
> > -#if 0 /* not normally enabled */
> > -  {
> > - char errMsg[100];
> > - if (!_mesa_validate_shader_program(ctx, shProg, errMsg))
> > {
> > -_mesa_warning(ctx, "Shader program %u is invalid: %s",
> > -  shProg->Name, errMsg);
> > - }
> > -  }
> > -#endif
> > -   }
> > -
> > -   return true;
> > -}
> > 
> >   /**
> >* Prior to drawing anything with glBegin, glDrawArrays, etc.
> > this function
> > @@ -1967,54 +1942,22 @@ shader_linked_or_absent(struct gl_context
> > *ctx,
> >   GLboolean
> >   _mesa_valid_to_render(struct gl_context *ctx, const char 

Re: [Mesa-dev] Allow duplicate layout qualifiers V2

2016-01-19 Thread Matt Turner
On Mon, Jan 18, 2016 at 12:29 AM, Timothy Arceri
 wrote:
> After sending out V1 where I didn't fix the global default
> merging (just issued and error), I got motivated and fixed
> it properly.
>
> There are two changes here.
>
> 1. ARB_enhanced_layouts
>
> Allows duplicates within a single layout qualifier e.g.
>
> layout(location = 0, location = 1) out vec4 a;
>
> 2. ARB_shading_language_420pack
>
> Allows multiple layout qualifiers e.g.
>
> layout(location = 0) layout(location = 2) out vec4 b;
>
>
> I've Cc'ed Matt as he gave feedback on my original change but
> I would be greatful to anyone who takes a look at these.

Patches 1 and 2 are

Reviewed-by: Matt Turner 

Patches 3-5 are

Acked-by: Matt Turner 

I really don't like bald true/false function arguments, but I don't
know what else to suggest.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/7] radeonsi: use SPI_SHADER_COL_FORMAT fields instead of export_16bpc

2016-01-19 Thread Nicolai Hähnle

On 19.01.2016 11:11, Marek Olšák wrote:

From: Marek Olšák 

This does change the behavior slightly:
   If a shader writes COLOR[i] and that color buffer isn't bound,
   the shader will export MRT_NULL instead and discard the IR tree that
   calculates the output. The only exception is alpha-to-coverage, which
   requires an alpha export.
---
  src/gallium/drivers/radeon/r600_pipe_common.h   |  1 +
  src/gallium/drivers/radeonsi/si_pipe.h  |  2 +-
  src/gallium/drivers/radeonsi/si_shader.c| 35 --
  src/gallium/drivers/radeonsi/si_shader.h|  2 +-
  src/gallium/drivers/radeonsi/si_state.c | 39 +++-
  src/gallium/drivers/radeonsi/si_state.h |  1 +
  src/gallium/drivers/radeonsi/si_state_shaders.c | 47 -
  7 files changed, 90 insertions(+), 37 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 27f6e98..f3271e2 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -252,6 +252,7 @@ struct r600_surface {
unsigned cb_color_fmask_slice;  /* EG and later */
unsigned cb_color_cmask;/* CB_COLORn_TILE (r600 only) */
unsigned cb_color_mask; /* R600 only */
+   unsigned spi_shader_col_format; /* SI+ */
unsigned sx_ps_downconvert; /* Stoney only */
unsigned sx_blend_opt_epsilon;  /* Stoney only */
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. 
R600 only */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index f83cb02..e2009de 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -125,7 +125,7 @@ struct si_framebuffer {
unsignedlog_samples;
unsignedcb0_is_integer;
unsignedcompressed_cb_mask;
-   unsignedexport_16bpc;
+   unsignedspi_shader_col_format;
unsigneddirty_cbufs;
booldirty_zsbuf;
  };
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 2de7def..266ef6d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1265,7 +1265,7 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
struct lp_build_context *uint =

_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
struct lp_build_context *base = _base->base;
-   unsigned compressed = 0;
+   unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
unsigned chan;

/* XXX: This controls which components of the output
@@ -1286,17 +1286,29 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
args[3] = lp_build_const_int32(base->gallivm, target);

if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+   unsigned col_formats =
+   si_shader_ctx->shader->key.ps.spi_shader_col_format;
int cbuf = target - V_008DFC_SQ_EXP_MRT;

-   if (cbuf >= 0 && cbuf < 8)
-   compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> 
cbuf) & 0x1;
+   assert(cbuf >= 0 && cbuf < 8);
+   spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
}

-   /* Set COMPR flag */
-   args[4] = compressed ? uint->one : uint->zero;
+   args[4] = uint->zero; /* COMPR flag */
+   args[5] = base->undef;
+   args[6] = base->undef;
+   args[7] = base->undef;
+   args[8] = base->undef;
+
+   switch (spi_shader_col_format) {
+   case V_028714_SPI_SHADER_ZERO:
+   args[0] = uint->zero; /* writemask */
+   args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_NULL);
+   break;
+
+   case V_028714_SPI_SHADER_FP16_ABGR:
+   args[4] = uint->one; /* COMPR flag */

-   if (compressed) {
-   /* Pixel shader needs to pack output values before export */
for (chan = 0; chan < 2; chan++) {
LLVMValueRef pack_args[2] = {
values[2 * chan],
@@ -1314,10 +1326,13 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
 packed,
 
LLVMFloatTypeInContext(base->gallivm->context),
 "");
-   args[chan + 7] = base->undef;
}
-   } else
+   break;
+
+   case V_028714_SPI_SHADER_32_ABGR:
memcpy([5], values, 

Re: [Mesa-dev] [PATCH] i965/vec4: Use UW type for multiply into accumulator on GEN8+

2016-01-19 Thread Matt Turner
On Sat, Jan 16, 2016 at 7:31 PM, Jason Ekstrand  wrote:
>
> On Jan 16, 2016 5:56 PM, "Matt Turner"  wrote:
>>
>> On Thu, Jan 14, 2016 at 12:27 PM, Matt Turner  wrote:
>> > On Thu, Jan 14, 2016 at 12:08 PM, Jason Ekstrand 
>> > wrote:
>> >> BDW adds the following restriction: "When multiplying DW x DW, the dst
>> >> cannot be accumulator."
>> >> ---
>> >>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 6 +-
>> >>  1 file changed, 5 insertions(+), 1 deletion(-)
>> >>
>> >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> >> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> >> index c228743..b2335bd 100644
>> >> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> >> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
>> >> @@ -1069,7 +1069,11 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
>> >> case nir_op_umul_high: {
>> >>struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
>> >>
>> >> -  emit(MUL(acc, op[0], op[1]));
>> >> +  if (devinfo->gen >=8)
>> >
>> > Space after >=
>> >
>> >> + emit(MUL(acc, op[0], retype(op[1], BRW_REGISTER_TYPE_UW)));
>> >> +  else
>> >> + emit(MUL(acc, op[0], op[1]));
>> >> +
>> >
>> > Do the
>> > tests/spec/arb_gpu_shader5/execution/built-in-functions/vs-{i,u}mulExtended*.shader_test
>> > tests currently fail on BDW with INTEL_DEBUG=vec4? If so, presumably
>> > this fixes it?
>
> It didn't fix anything

That's an aggravatingly ambiguous answer. Did it not fix anything
because all the tests already passed, or did it not fix anything
because the tests still fail?

I pulled out my Broadwell to test for myself:

First column is 0a68112~, second is 0a68112~ using INTEL_DEBUG=vec4,
third is 0a68112 using INTEL_DEBUG=vec4.

vs-imulextended: pass fail pass
vs-imulextended-nonuniform:  pass fail pass
vs-imulextended-only-msb:pass fail pass
vs-imulextended-only-msb-nonuniform: pass fail pass
vs-umulextended: pass fail pass
vs-umulextended-nonuniform:  pass fail pass
vs-umulextended-only-msb:pass fail pass
vs-umulextended-only-msb-nonuniform: pass fail pass

So the tests I asked you to run did fail without this patch and were
fixed by this patch.

For completeness, after those tests were fixed by this patch, the only
remaining regressions from INTEL_DEBUG=vec4 on BDW are
spec/arb_shader_draw_parameters/drawid-{indirect-baseinstance,basevertex,vertexid}.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] radeonsi: disable SPI color outputs the shader doesn't write

2016-01-19 Thread Nicolai Hähnle

Apart from two comments on patch #2, the series is

Reviewed-by: Nicolai Hähnle 

On 19.01.2016 11:11, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_shader.h|  4 
  src/gallium/drivers/radeonsi/si_state_shaders.c | 12 
  2 files changed, 16 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index f49290a..50375e2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -213,6 +213,10 @@ struct si_shader_selector {

/* PS parameters. */
unsigneddb_shader_control;
+   /* Set 0xf or 0x0 (4 bits) per each written output.
+* ANDed with spi_shader_col_format.
+*/
+   unsignedcolors_written_4bit;

/* masks of "get_unique_index" bits */
uint64_toutputs_written;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 80126f2..9daa924 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -646,6 +646,12 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII)
key->ps.color_is_int8 = sctx->framebuffer.color_is_int8;

+   /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't 
enabled). */
+   if (!key->ps.last_cbuf) {
+   key->ps.spi_shader_col_format &= 
sel->colors_written_4bit;
+   key->ps.color_is_int8 &= sel->info.colors_written;
+   }
+
if (rs) {
bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES 
&&
sctx->current_rast_prim <= 
PIPE_PRIM_POLYGON) ||
@@ -830,6 +836,12 @@ static void *si_create_shader_selector(struct pipe_context 
*ctx,
}
sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
break;
+
+   case PIPE_SHADER_FRAGMENT:
+   for (i = 0; i < 8; i++)
+   if (sel->info.colors_written & (1 << i))
+   sel->colors_written_4bit |= 0xf << (4 * i);
+   break;
}

/* DB_SHADER_CONTROL */


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] mesa: remove link validation that should be done elsewhere

2016-01-19 Thread Timothy Arceri
On Tue, 2016-01-19 at 14:09 -0800, Ian Romanick wrote:
> On 01/05/2016 05:40 PM, Timothy Arceri wrote:
> > Even if re-linking fails rendering shouldn't fail as the previous
> > succesfully linked program will still be available. It also
> > shouldn't
> > be possible to have an unlinked program as part of the current
> > rendering
> > state.
> > 
> > This fixes a subtest in:
> > ES31-CTS.sepshaderobjs.StateInteraction
> > 
> > This change should improve performance on CPU limited benchmarks as
> > noted
> > in commit d6c6b186cf308f.
> > 
> > From Section 7.3 (Program Objects) of the OpenGL 4.5 spec:
> > 
> >"If a program object that is active for any shader stage is re
> > -linked
> > unsuccessfully, the link status will be set to FALSE, but any
> > existing
> > executables and associated state will remain part of the
> > current rendering
> > state until a subsequent call to UseProgram, UseProgramStages,
> > or
> > BindProgramPipeline removes them from use. If such a program is
> > attached to
> > any program pipeline object, the existing executables and
> > associated state
> > will remain part of the program pipeline object until a
> > subsequent call to
> > UseProgramStages removes them from use. An unsuccessfully
> > linked program may
> > not be made part of the current rendering state by UseProgram
> > or added to
> > program pipeline objects by UseProgramStages until it is
> > successfully
> > re-linked."
> > 
> >"void UseProgram(uint program);
> > 
> >...
> > 
> >An INVALID_OPERATION error is generated if program has not been
> > linked, or
> >was last linked unsuccessfully.  The current rendering state is
> > not modified."
> 
> Right so if there's a problem with the program to begin with,
> that
> should be caught by glUseProgram.  Once a program is in use, calling
> glLinkProgram can't break it.  That sounds reasonable... and I'm
> always
> in favor of removing things from draw-time validation.
> 
> Hmm... there is another potential for problem.  At what point does
> glUseProgram exit if it detects that the in specified program is
> already
> in use?  I would expect the following sequence to generate an error
> at
> the second glUseProgram:
> 
> glUseProgram(p);
> glDrawArrays(...);
> 
> glShaderSource(s, 1, _with_a_linking_program, NULL);
> glAttachShader(p, s);
> glLinkProgram(p);  // linking should fail
> glUseProgram(p);   // this should generate an error, but...
>// leave the old executable in place?
> glDrawArrays(...); // should generate same result as before
> 
> That's worth investigating.  It's probably also worth a couple piglit
> tests.

Right, there are ES 3.1 CTS tests for this scenario using
UseProgramStage. As per the spec quote "If a program object that is
active for any shader stage is re-linked unsuccessfully, the link
status will be set to FALSE"

So UseProgram will see the flag set an error message and return just as
UseProgramStage does. I'll write the piglit test anyway so things don't
break in future.

Thanks for taking a look.


> 
> Either way, this patch is
> 
> Reviewed-by: Ian Romanick 
> 
> > V2: apply the rule to both core and compat.
> > 
> > Cc: Tapani Pälli 
> > Cc: Ian Romanick 
> > Cc: Brian Paul 
> > ---
> >  src/mesa/main/context.c | 63 +++--
> > 
> >  1 file changed, 3 insertions(+), 60 deletions(-)
> > 
> > diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
> > index be983d4..f3fd01f 100644
> > --- a/src/mesa/main/context.c
> > +++ b/src/mesa/main/context.c
> > @@ -1930,31 +1930,6 @@ _mesa_check_blend_func_error(struct
> > gl_context *ctx)
> > return GL_TRUE;
> >  }
> >  
> > -static bool
> > -shader_linked_or_absent(struct gl_context *ctx,
> > -const struct gl_shader_program *shProg,
> > -bool *shader_present, const char *where)
> > -{
> > -   if (shProg) {
> > -  *shader_present = true;
> > -
> > -  if (!shProg->LinkStatus) {
> > - _mesa_error(ctx, GL_INVALID_OPERATION, "%s(shader not
> > linked)", where);
> > - return false;
> > -  }
> > -#if 0 /* not normally enabled */
> > -  {
> > - char errMsg[100];
> > - if (!_mesa_validate_shader_program(ctx, shProg, errMsg))
> > {
> > -_mesa_warning(ctx, "Shader program %u is invalid: %s",
> > -  shProg->Name, errMsg);
> > - }
> > -  }
> > -#endif
> > -   }
> > -
> > -   return true;
> > -}
> >  
> >  /**
> >   * Prior to drawing anything with glBegin, glDrawArrays, etc. this
> > function
> > @@ -1967,54 +1942,22 @@ shader_linked_or_absent(struct gl_context
> > *ctx,
> >  GLboolean
> >  _mesa_valid_to_render(struct gl_context *ctx, const char *where)
> >  {
> > -   unsigned i;
> > -
> > /* This 

Re: [Mesa-dev] [PATCH 2/2] i965: Implement compute sampler state atom.

2016-01-19 Thread Francisco Jerez
Ben Widawsky  writes:

> On Mon, Jan 18, 2016 at 12:37:18PM -0800, Francisco Jerez wrote:
>> Fixes a number of GLES31 CTS failures and hangs on various hardware:
>> 
>>  ES31-CTS.texture_gather.plain-gather-depth-2d
>>  ES31-CTS.texture_gather.plain-gather-depth-2darray
>>  ES31-CTS.texture_gather.plain-gather-depth-cube
>>  ES31-CTS.texture_gather.offset-gather-depth-2d
>>  ES31-CTS.texture_gather.offset-gather-depth-2darray
>>  ES31-CTS.layout_binding.sampler2D_layout_binding_texture_ComputeShader
>>  ES31-CTS.layout_binding.sampler2DArray_layout_binding_texture_ComputeShader
>>  ES31-CTS.explicit_uniform_location.uniform-loc-types-samplers
>>  ES31-CTS.compute_shader.resources-texture
>> 
>> Some of them were actually passing by luck on some generations even
>> though we weren't uploading sampler state tables explicitly for the
>> compute stage, most likely because they relied on the cached sampler
>> state left from previous rendering to be close enough.
>> 
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92589
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93312
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93325
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93407
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93725
>> Reported-by: Marta Lofstedt 
>
> Thanks a lot for tracking this down. I was going with Marta's insight of the
> DEPTH_32F format thing and getting nowhere. It's really incredible that 
> anything
> worked at all.
>
> [snip]
>
>> +
>> +static void
>> +brw_upload_cs_samplers(struct brw_context *brw)
>> +{
>> +   /* BRW_NEW_COMPUTE_PROGRAM */
>> +   struct gl_program *cs = (struct gl_program *) brw->compute_program;
>> +   if (!cs)
>> +  return;
>> +
>> +   brw_upload_sampler_state_table(brw, cs, >cs.base);
>> +}
>> +
>
> Could you explain how brw->compute_program can be NULL here? I'm just trying 
> to
> understand this a bit better...
>
It might well be non-NULL at present, but I'm not aware of any strong
reason why it would be guaranteed to be non-NULL, and some of the other
brw_context::*_program fields are definitely allowed to be NULL, so it
seemed sensible to check ::compute_program for NULL too.  Shouldn't hurt
anyway.

>> +const struct brw_tracked_state brw_cs_samplers = {
>> +   .dirty = {
>> +  .mesa = _NEW_TEXTURE,
>> +  .brw = BRW_NEW_BATCH |
>> + BRW_NEW_COMPUTE_PROGRAM,
>> +   },
>> +   .emit = brw_upload_cs_samplers,
>
> [snip]


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/10] tgsi: add MEMBAR opcode to handle memoryBarrier* GLSL intrinsics

2016-01-19 Thread Marek Olšák
On Tue, Jan 19, 2016 at 9:09 PM, Francisco Jerez  wrote:
> Marek Olšák  writes:
>
>> On Tue, Jan 19, 2016 at 3:25 AM, Ilia Mirkin  wrote:
>>> On Mon, Jan 18, 2016 at 6:06 AM, Marek Olšák  wrote:
 For 1-4,

 Reviewed-by: Marek Olšák 

 I'm not very familiar with the code in 2, but the changes seem reasonable.

 Also, and I know this is not your mistake, but still, mtypes.h has:

 struct gl_atomic_buffer_binding
   AtomicBufferBindings[MAX_COMBINED_ATOMIC_BUFFERS];

 But it should be:

 struct gl_atomic_buffer_binding
   AtomicBufferBindings[16 /* or use a proper definition here */];

 It's not possible to use more than MaxAtomicBufferBindings, because
 the slots are shared among all shader stages.
>>>
>>> Besides the suboptimal name, I don't see what's so terribly wrong
>>> about this. They're saying there are 15*6 binding points (the value of
>>> MAX_COMBINED_ATOMIC_BUFFERS), and that's also the
>>> MaxCombinedAtomicBuffers thing. I guess MaxCombinedAtomicBuffers
>>> should be N * the max # of bindings? So this is a bit more restrictive
>>> than it could be, but... meh.
>>
>> I don't understand. AtomicBufferBindings are binding points. There is
>> a fixed number of binding points shared by all shader stages, for
>> example 15. It's not possible to bind a buffer above that. From the
>> spec:
>>
>> "BindBufferBase and BindBufferRange will generate an INVALID_VALUE
>> error if  is greater than or equal to the value of
>> MAX_ATOMIC_COUNTER_BUFFER_BINDINGS".
>>
>> This is why AtomicBufferBindings[MAX_COMBINED_ATOMIC_BUFFERS] wastes a
>> lot of memory.
>>
> It only wastes any memory if your driver exposes a
> GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS less than the
> MAX_COMBINED_ATOMIC_BUFFERS macro, which might be necessary due to
> hardware restrictions, and even in that case the "waste" will be a
> constant amount of memory per context so I'm not particularly concerned
> about it, you could avoid it by allocating the array manually based on
> the MaxAtomicBufferBindings constant provided by the driver, but it
> would be kind of painful.
>
> I didn't choose the binding point array size to be
> MAX_COMBINED_ATOMIC_BUFFERS by accident, it's the maximum amount of
> atomic counter binding points that can potentially be useful, because
> anything greater than that is functionally equivalent to a context with
> MAX_COMBINED_ATOMIC_BUFFERS binding points since you won't be able to
> set up a pipeline using more than that many atomic buffers.  The i965
> driver exposes MAX_COMBINED_ATOMIC_BUFFERS binding points for that
> reason.

It looks like i965 is wrong even. It sets:
GL_MAX_ATOMIC_COUNTER_BINDINGS = 90

Therefore, a shader is allowed to do:
layout(binding = 89, offset = 0) uniform atomic_uint a;

Does that work on i965? It won't report any error as far as I can see.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: fix discard-only fragment shaders

2016-01-19 Thread Marek Olšák
On Tue, Jan 19, 2016 at 9:13 PM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> When a fragment shader is used that has no outputs but does conditional
> discard (KILL_IF), all fragments are killed without this patch.
>
> By comparing various register settings, my conclusion is that the exec mask
> is either not properly forwarded to the DB by NULL exports or ends up being
> unused, at least when there is _only_ a NULL export (the ISA documentation
> claims that NULL exports can be used to override a previously exported exec
> mask).
>
> Of the various approaches I have tried to work around the problem, this one
> seems to be the least invasive one.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93761
> ---
> I've condensed the bug reporter's test case to a Piglit test that I will
> send out soon.
>
> This should probably go to stable as well, but won't apply as is because of
> the many recent changes to fragment shader export handling. I will follow
> this mail up with a version that applies to 11.1 and that I've confirmed
> fixes the problem there as well.
>
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
> b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 8ff70b4..0f0cbb2 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -456,6 +456,7 @@ static void si_shader_ps(struct si_shader *shader)
>info->writes_samplemask ? V_028710_SPI_SHADER_32_ABGR :
>info->writes_stencil ? V_028710_SPI_SHADER_32_GR :
>info->writes_z ? V_028710_SPI_SHADER_32_R :
> +  (info->uses_kill && !info->num_outputs) ? 
> V_028710_SPI_SHADER_32_R :

I guess you know this already: It should also take alpha-test and
SPI_SHADER_Z_FORMAT into account.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] New stable-branch 11.0 candidate pushed

2016-01-19 Thread Oded Gabbay
On Tue, Jan 19, 2016 at 2:48 PM, Emil Velikov  wrote:
>
> Hello list,
>
> The candidate for the Mesa 11.0.9 is now available. Currently we have:
>  - 22 queued
>  - 20 nominated (outstanding)
>  - and 1 rejected/obsolete patches
>
> Current queue addressed a few important driver issues - nouveau
> (sluggish 4K decoding, pushbuf/bufctx management), i965 (EGL crash),
> r600 (severge glitches in Soma) and some memory leaks.
>
>
> Take a look at section "Mesa stable queue" for more information.
>
>
> Dave, Marek,
>
> A few commits in the nominated list require backports. Please forward
> them to mesa-stable mailing list.
>
>
> Testing
> ---
> The following results are against piglit 4b6848c131c.
>
>
> Changes - classic i965(snb)
> ---
> None.
>
>
> Changes - swrast classic
> 
> None.
>
>
> Changes - gallium softpipe
> --
> None.
>
>
> Changes - gallium llvmpipe (LLVM 3.7)
> -
> None.
>
>
> Testing reports/general approval
> 
> Any testing reports (or general approval of the state of the branch)
> will be greatly appreciated.
>
>
> Trivial merge conflicts
> ---
> commit 7562abc8d54045544bfbbd1bd60f2a82bc820eb9
> Author: Grazvydas Ignotas 
>
> r600: fix constant buffer size programming
>
> (cherry picked from commit da0e216e069bd064199ed04b52de6fb23d810806)
>
>
> commit 29360107288b041f23875ed3133cc041c3c33906
> Author: Ilia Mirkin 
>
> nv50,nvc0: make sure there's pushbuf space and that we ref the bo early
>
> (cherry picked from commit c1d14c6817e3fa9a1c04f9b6c51b4ca601637843)
>
>
> commit 0c7b4c20138e4c84eb2d8f9e6368d81c0d6843c6
> Author: Nicolai Hähnle 
>
> st/mesa: use _mesa_delete_buffer_object
>
> (cherry picked from commit 1c2187b1c225b2f7e1891544d184bde60390977e)
>
>
>
> The plan is to have the final 11.0 release - 11.0.9 this Thursday
> (21st of Feb), some time after 13:00 GMT.
>
> If you have any questions or suggestions - be that about the current
> patch queue or otherwise, please go ahead.
>
>
> Cheers,
> Emil
>
>
> Mesa stable queue
> -
>
> Nominated (22)
> ==
>
> Boyan Ding (1):
>   i915: Add XRGB format to intel_screen_make_configs
>
> Brian Paul (1):
>   configure: don't try to build gallium DRI drivers if --disable-dri is 
> set
>
> Dave Airlie (3):
>   r600g: fix outputing to non-0 buffers for stream 0.
>   radeonsi: handle loading doubles as geometry shader inputs.
>
> * Dave - final call, can we get a backports for these please ?
>
>   glsl: fix subroutine lowering reusing actual parmaters
>
> Dawid Gan (1):
>   i965: handle stencil_bits parameter for MESA_FORMAT_B8G8R8X8_UNORM 
> format.
>
> Emil Velikov (3):
>   i965: store reference to the context within struct brw_fence
>   egl/dri2: expose srgb configs when KHR_gl_colorspace is available
>   i915: correctly parse/set the context flags
>
> Ilia Mirkin (1):
>   st/mesa: use surface format to generate mipmaps when available
>
> Jason Ekstrand (1):
>   i965/vec4: Use UW type for multiply into accumulator on GEN8+
>
> Jean-Sébastien Pédron (1):
>   ralloc: Use __attribute__((destructor)) instead of atexit(3)
>
> Kenneth Graunke (1):
>   glsl: Fix varying struct locations when varying packing is disabled.
>
> * Ken, this patch requires the following which imho are too much of a
> sidestep considering that this is the final 11.0 release. Would we
> still want this in ?
>
> 9fbcd8e8475e249c7f76b6d63b3a48b8684cb1ffglsl: pass stage into mark 
> function
> d97b060e6f305ce4ad050881944404b920c86edfglsl/fp64: add helper for
> dual slot double detection.
> 5dc22cadb5ed4a7cf8c7d1cbaf7296c27e567e0fglsl: fix
> count_attribute_slots to allow for different 64-bit handling
> 1fc39dae22843d6faf3ec43eab90c7d06f9f6f7bglsl: only update doubles
> inputs for vertex inputs.
>
>
> Marek Olšák (3):
>   program: add _mesa_reserve_parameter_storage
>   st/mesa: fix GLSL uniform updates for glBitmap & glDrawPixels (v2)
>
> * Marek can we please get a backport for the latter ? Alternatively
> can we drop then both, esp. since we don't have cases in the wild
> that trigger the issue ?
>
>   radeonsi: don't miss changes to SPI_TMPRING_SIZE
>
> Timothy Arceri (2):
>   glsl: fix segfault linking subroutine uniform with explicit location
>   mesa: fix segfault in glUniformSubroutinesuiv()
>
> Tom Stellard (4):
>   clover: Call clBuildProgram() notification function when build
> completes v2
>   gallium/drivers: Add threadsafe wrappers for pipe_context v2
>   clover: Use threadsafe wrappers for pipe_context v2
>   clover: Properly initialize LLVM targets when linking with component 
> libs
>
>
> Queued (20)
> ===
>
> Emil Velikov (3):
>   docs: add 

Re: [Mesa-dev] [PATCH] mesa: Move sanity check of BindVertexBuffer for OpenGL ES 3.1

2016-01-19 Thread Ian Romanick
On 01/08/2016 05:55 AM, Marta Lofstedt wrote:
> From: Marta Lofstedt 
> 
> Sanity check of BindVertexBuffer for OpenGL ES in
> _mesa_handle_bind_buffer_gen breaks OpenGL ES 2 conformance.

What was previously broken, and why does this fix it?  I think it's also
worth mentioning that this is a revert of
6c3de8996fbe9447e092cc75ccdd6f720fabaf4d.

> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93426
> Signed-off-by: Marta Lofstedt 
> ---
>  src/mesa/main/bufferobj.c | 2 +-
>  src/mesa/main/varray.c| 8 +++-
>  2 files changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
> index d7c5680..3ca1b3a 100644
> --- a/src/mesa/main/bufferobj.c
> +++ b/src/mesa/main/bufferobj.c
> @@ -953,7 +953,7 @@ _mesa_handle_bind_buffer_gen(struct gl_context *ctx,
>  {
> struct gl_buffer_object *buf = *buf_handle;
>  
> -   if (!buf && (ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx))) {
> +   if (!buf && (ctx->API == API_OPENGL_CORE)) {
>_mesa_error(ctx, GL_INVALID_OPERATION, "%s(non-gen name)", caller);
>return false;
> }
> diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
> index c71e16a..a8d757b 100644
> --- a/src/mesa/main/varray.c
> +++ b/src/mesa/main/varray.c
> @@ -1754,8 +1754,14 @@ vertex_array_vertex_buffer(struct gl_context *ctx,
> * Otherwise, we fall back to the same compat profile behavior as other
> * object references (automatically gen it).
> */
> -  if (!_mesa_handle_bind_buffer_gen(ctx, buffer, , func))
> +  if (!_mesa_handle_bind_buffer_gen(ctx, buffer, , func)) {
> +  return;
> +  }
> +
> +  if (!vbo && _mesa_is_gles31(ctx)) {
> + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(non-gen name)", func);
>   return;
> +  }
> } else {
>/* The ARB_vertex_attrib_binding spec says:
> *
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/10] tgsi: add MEMBAR opcode to handle memoryBarrier* GLSL intrinsics

2016-01-19 Thread Francisco Jerez
Marek Olšák  writes:

> On Tue, Jan 19, 2016 at 3:25 AM, Ilia Mirkin  wrote:
>> On Mon, Jan 18, 2016 at 6:06 AM, Marek Olšák  wrote:
>>> For 1-4,
>>>
>>> Reviewed-by: Marek Olšák 
>>>
>>> I'm not very familiar with the code in 2, but the changes seem reasonable.
>>>
>>> Also, and I know this is not your mistake, but still, mtypes.h has:
>>>
>>> struct gl_atomic_buffer_binding
>>>   AtomicBufferBindings[MAX_COMBINED_ATOMIC_BUFFERS];
>>>
>>> But it should be:
>>>
>>> struct gl_atomic_buffer_binding
>>>   AtomicBufferBindings[16 /* or use a proper definition here */];
>>>
>>> It's not possible to use more than MaxAtomicBufferBindings, because
>>> the slots are shared among all shader stages.
>>
>> Besides the suboptimal name, I don't see what's so terribly wrong
>> about this. They're saying there are 15*6 binding points (the value of
>> MAX_COMBINED_ATOMIC_BUFFERS), and that's also the
>> MaxCombinedAtomicBuffers thing. I guess MaxCombinedAtomicBuffers
>> should be N * the max # of bindings? So this is a bit more restrictive
>> than it could be, but... meh.
>
> I don't understand. AtomicBufferBindings are binding points. There is
> a fixed number of binding points shared by all shader stages, for
> example 15. It's not possible to bind a buffer above that. From the
> spec:
>
> "BindBufferBase and BindBufferRange will generate an INVALID_VALUE
> error if  is greater than or equal to the value of
> MAX_ATOMIC_COUNTER_BUFFER_BINDINGS".
>
> This is why AtomicBufferBindings[MAX_COMBINED_ATOMIC_BUFFERS] wastes a
> lot of memory.
>
It only wastes any memory if your driver exposes a
GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS less than the
MAX_COMBINED_ATOMIC_BUFFERS macro, which might be necessary due to
hardware restrictions, and even in that case the "waste" will be a
constant amount of memory per context so I'm not particularly concerned
about it, you could avoid it by allocating the array manually based on
the MaxAtomicBufferBindings constant provided by the driver, but it
would be kind of painful.

I didn't choose the binding point array size to be
MAX_COMBINED_ATOMIC_BUFFERS by accident, it's the maximum amount of
atomic counter binding points that can potentially be useful, because
anything greater than that is functionally equivalent to a context with
MAX_COMBINED_ATOMIC_BUFFERS binding points since you won't be able to
set up a pipeline using more than that many atomic buffers.  The i965
driver exposes MAX_COMBINED_ATOMIC_BUFFERS binding points for that
reason.

> MAX_COMBINED_ATOMIC_BUFFERS is about shader references, not bindings.
> You can have 15 global bindings (slots), but 5 shaders can reference
> them, which leads to 15*5 references. This is a thing that only the
> linker should care about. It's only possible to get the maximum if all
> 5 stages use the same slots (because there are only 15). The spec is
> pretty clear about it:
>
> "If an atomic counter buffer is used by multiple shaders, each such
> use counts separately against this combined limit. The combined atomic
> counter buffer use limit can be obtained by calling GetIntegerv with a
>  of MAX_COMBINED_ATOMIC_COUNTER_BUFFERS"
>
> Marek
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glx: don't force version == 2.0 for ES2 GLX context creation

2016-01-19 Thread Matt Turner
On Tue, Jan 19, 2016 at 7:08 AM, Ilia Mirkin  wrote:
> dEQP tests request a specific version. The EXT spec has been updated to
> allow other versions, so allow anything >= 2.0 to be requested.
>
> Signed-off-by: Ilia Mirkin 
> ---
>
> The X server was also blocking this... a patch has been sent (pending
> moderator approval).


Nice. Thanks.

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: make image views non-persistent objects

2016-01-19 Thread Ilia Mirkin
On Tue, Jan 19, 2016 at 10:42 AM, Nicolai Hähnle  wrote:
> On 18.01.2016 22:08, Ilia Mirkin wrote:
>>
>> Make them akin to shader buffers, with no refcounting/etc. Just used to
>> pass data about the bound image in ->set_shader_images.
>>
>> Signed-off-by: Ilia Mirkin 
>> ---
>>
>> I don't really see a reason why these were refcounted objects. It seems
>> like
>> it would be convenient to make these line up with shader buffers, so
>> that's
>> what I've done here.
>>
>> Please let me know if I'm missing something.
>
>
> I haven't thought about this much, but at least Radeon does quite a bit of
> work in create_sampler_view.
>
> Since everything boils down to the same hardware resource descriptors in the
> end, I'd expect the same to happen for a create_image_view. I believe we'll
> want a create_image_view which ends up calling code that is shared with
> create_sampler_view.
>
> So make that a vote against this change from me.
>
> Come to think of it, from a Radeon perspective I'm not sure why there is a
> separate pipe_image_view structure in the first place (other than perhaps
> reducing confusion about which combination of fields make sense).

A key difference between images and sampler views is that images don't
have any funky parameters -- no filtering, wrapping, etc. You can only
do the equivalent of texelFetch (imageLoad) and imageStore, to exact
integer coordinates. I think it's a much simpler object than a sampler
view.

Samplers/texture views are longer lived objects on NVIDIA hardware --
there are separate binding tables with them, and remapping tables,
etc. It would be inconvenient to reupload those every time, esp as it
involves various flushes. I don't think there's any such thing with
images.

Anyways, if the consensus is to make these live longer, that's fine
too. It just seemed unnecessary.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: fix discard-only fragment shaders (11.1 version)

2016-01-19 Thread Nicolai Hähnle
From: Nicolai Hähnle 

When a fragment shader is used that has no outputs but does conditional
discard (KILL_IF), all fragments are killed without this patch.

By comparing various register settings, my conclusion is that the exec mask
is either not properly forwarded to the DB by NULL exports or ends up being
unused, at least when there is _only_ a NULL export (the ISA documentation
claims that NULL exports can be used to override a previously exported exec
mask).

Of the various approaches I have tried to work around the problem, this one
seems to be the least invasive one.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93761
Cc: "11.0 11.1" 
---
 src/gallium/drivers/radeonsi/si_shader.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 1baa2eb..1c1aaa0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2291,6 +2291,9 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
last_args[6]= uint->zero;
last_args[7]= uint->zero;
last_args[8]= uint->zero;
+
+   if (info->uses_kill)
+   si_shader_ctx->shader->spi_shader_z_format = 
V_028710_SPI_SHADER_32_R;
}
 
/* Specify whether the EXEC mask represents the valid mask */
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Restore Mesa-style to shader_enums.c/h.

2016-01-19 Thread Ian Romanick
On 01/15/2016 01:44 PM, Matt Turner wrote:
> On Fri, Jan 15, 2016 at 1:39 PM, Ilia Mirkin  wrote:
>> On Fri, Jan 15, 2016 at 4:34 PM, Matt Turner  wrote:
>>> ---
>>>  src/glsl/nir/shader_enums.c | 24 
>>>  src/glsl/nir/shader_enums.h | 16 
>>>  2 files changed, 24 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/src/glsl/nir/shader_enums.c b/src/glsl/nir/shader_enums.c
>>> index 10f546a..16b20db 100644
>>> --- a/src/glsl/nir/shader_enums.c
>>> +++ b/src/glsl/nir/shader_enums.c
>>> @@ -33,7 +33,8 @@
>>>  #define ENUM(x) [x] = #x
>>>  #define NAME(val) val) < ARRAY_SIZE(names)) && names[(val)]) ? 
>>> names[(val)] : "UNKNOWN")
>>>
>>> -const char * gl_shader_stage_name(gl_shader_stage stage)
>>> +const char *
>>> +gl_shader_stage_name(gl_shader_stage stage)
>>>  {
>>> static const char *names[] = {
>>>ENUM(MESA_SHADER_VERTEX),
>>> @@ -51,7 +52,8 @@ const char * gl_shader_stage_name(gl_shader_stage stage)
>>>   * Translate a gl_shader_stage to a short shader stage name for debug
>>>   * printouts and error messages.
>>>   */
>>> -const char * _mesa_shader_stage_to_string(unsigned stage)
>>> +const char *
>>> +_mesa_shader_stage_to_string(unsigned stage)
>>>  {
>>> switch (stage) {
>>> case MESA_SHADER_VERTEX:   return "vertex";
>>> @@ -69,7 +71,8 @@ const char * _mesa_shader_stage_to_string(unsigned stage)
>>>   * Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS)
>>>   * for debug printouts and error messages.
>>>   */
>>> -const char * _mesa_shader_stage_to_abbrev(unsigned stage)
>>> +const char *
>>> +_mesa_shader_stage_to_abbrev(unsigned stage)
>>>  {
>>> switch (stage) {
>>> case MESA_SHADER_VERTEX:   return "VS";
>>> @@ -83,7 +86,8 @@ const char * _mesa_shader_stage_to_abbrev(unsigned stage)
>>> unreachable("Unknown shader stage.");
>>>  }
>>>
>>> -const char * gl_vert_attrib_name(gl_vert_attrib attrib)
>>> +const char *
>>> +gl_vert_attrib_name(gl_vert_attrib attrib)
>>>  {
>>> static const char *names[] = {
>>>ENUM(VERT_ATTRIB_POS),
>>> @@ -124,7 +128,8 @@ const char * gl_vert_attrib_name(gl_vert_attrib attrib)
>>> return NAME(attrib);
>>>  }
>>>
>>> -const char * gl_varying_slot_name(gl_varying_slot slot)
>>> +const char *
>>> +gl_varying_slot_name(gl_varying_slot slot)
>>>  {
>>> static const char *names[] = {
>>>ENUM(VARYING_SLOT_POS),
>>> @@ -190,7 +195,8 @@ const char * gl_varying_slot_name(gl_varying_slot slot)
>>> return NAME(slot);
>>>  }
>>>
>>> -const char * gl_system_value_name(gl_system_value sysval)
>>> +const char *
>>> +gl_system_value_name(gl_system_value sysval)
>>>  {
>>> static const char *names[] = {
>>>   ENUM(SYSTEM_VALUE_VERTEX_ID),
>>> @@ -216,7 +222,8 @@ const char * gl_system_value_name(gl_system_value 
>>> sysval)
>>> return NAME(sysval);
>>>  }
>>>
>>> -const char * glsl_interp_qualifier_name(enum glsl_interp_qualifier qual)
>>> +const char *
>>> +glsl_interp_qualifier_name(enum glsl_interp_qualifier qual)
>>>  {
>>> static const char *names[] = {
>>>ENUM(INTERP_QUALIFIER_NONE),
>>> @@ -228,7 +235,8 @@ const char * glsl_interp_qualifier_name(enum 
>>> glsl_interp_qualifier qual)
>>> return NAME(qual);
>>>  }
>>>
>>> -const char * gl_frag_result_name(gl_frag_result result)
>>> +const char *
>>> +gl_frag_result_name(gl_frag_result result)
>>>  {
>>> static const char *names[] = {
>>>ENUM(FRAG_RESULT_DEPTH),
>>> diff --git a/src/glsl/nir/shader_enums.h b/src/glsl/nir/shader_enums.h
>>> index c747464..efc0b0d 100644
>>> --- a/src/glsl/nir/shader_enums.h
>>> +++ b/src/glsl/nir/shader_enums.h
>>> @@ -47,19 +47,19 @@ typedef enum
>>> MESA_SHADER_COMPUTE = 5,
>>>  } gl_shader_stage;
>>>
>>> -const char * gl_shader_stage_name(gl_shader_stage stage);
>>> +const char *gl_shader_stage_name(gl_shader_stage stage);
>>
>> Wouldn't the mesa style be to do
>>
>> const char *
>> gl_shader_stage_name(gl_shader_stage stage);
> 
> It seems we do that in headers sometimes, but the real value in
> separating the return type from the function name is to be able to
> grep '^func' to find the definition in the *.c file. I'm fine with
> prototypes in headers having the return type on the same line.

It also helps you find where the prototype is.  If I want to add a
parameter to a function, I need to find and update both places.  It
doesn't matter as much when the .c and .h have the same name, but for
cases like meta.h or brw_context.h (which doesn't completely follow this
style), it matters more.

>> ? (Along with "extern" thrown in for no apparent reason.)
> 
> Heh :)
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

Re: [Mesa-dev] [PATCH] arb_shader_subroutine: fix lowering reusing actual parmaters

2016-01-19 Thread Ian Romanick
On 01/16/2016 08:23 PM, Dave Airlie wrote:
> From: Dave Airlie 
> 
> One of the oglconform tests was crashing here, and it was
> due to not cloning the actual parameters before creating the
> new call. This makes a call clone function that does the right
> things to make sure we clone all the needed info, and points
> the callee at it. (It differs from ->clone due to this).
> 
> this may fix https://bugs.freedesktop.org/show_bug.cgi?id=93722, I had this
> patch in my cts fixes tree, but hadn't had time to make sure I liked it.

I also sent this as a reply on the piglit list... but then realized you
had probably already sent the patch out. :)

For the sake of avoiding code duplication, I think it's better to make a
new overload of ir_call::clone that takes the callee parameter.  Then
implement the existing clone() as this->clone(mem_ctx, ht, this->callee).

> Signed-off-by: Dave Airlie 
> ---
>  src/glsl/lower_subroutine.cpp | 24 +++-
>  1 file changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/src/glsl/lower_subroutine.cpp b/src/glsl/lower_subroutine.cpp
> index a0df5e1..ac8ade1 100644
> --- a/src/glsl/lower_subroutine.cpp
> +++ b/src/glsl/lower_subroutine.cpp
> @@ -44,6 +44,7 @@ public:
> }
>  
> ir_visitor_status visit_leave(ir_call *);
> +   ir_call *call_clone(ir_call *call, ir_function_signature *callee);
> bool progress;
> struct _mesa_glsl_parse_state *state;
>  };
> @@ -58,6 +59,23 @@ lower_subroutine(exec_list *instructions, struct 
> _mesa_glsl_parse_state *state)
> return v.progress;
>  }
>  
> +ir_call *
> +lower_subroutine_visitor::call_clone(ir_call *call, ir_function_signature 
> *callee)
> +{
> +   void *mem_ctx = ralloc_parent(call);
> +   ir_dereference_variable *new_return_ref = NULL;
> +   if (call->return_deref != NULL)
> +  new_return_ref = call->return_deref->clone(mem_ctx, NULL);
> +
> +   exec_list new_parameters;
> +
> +   foreach_in_list(ir_instruction, ir, >actual_parameters) {
> +  new_parameters.push_tail(ir->clone(mem_ctx, NULL));
> +   }
> +
> +   return new(mem_ctx) ir_call(callee, new_return_ref, _parameters);
> +}
> +
>  ir_visitor_status
>  lower_subroutine_visitor::visit_leave(ir_call *ir)
>  {
> @@ -66,7 +84,6 @@ lower_subroutine_visitor::visit_leave(ir_call *ir)
>  
> void *mem_ctx = ralloc_parent(ir);
> ir_if *last_branch = NULL;
> -   ir_dereference_variable *return_deref = ir->return_deref;
>  
> for (int s = this->state->num_subroutines - 1; s >= 0; s--) {
>ir_rvalue *var;
> @@ -92,14 +109,11 @@ lower_subroutine_visitor::visit_leave(ir_call *ir)
>   fn->exact_matching_signature(this->state,
>>actual_parameters);
>  
> -  ir_call *new_call = new(mem_ctx) ir_call(sub_sig, return_deref, 
> >actual_parameters);
> +  ir_call *new_call = call_clone(ir, sub_sig);
>if (!last_branch)
>   last_branch = if_tree(equal(subr_to_int(var), lc), new_call);
>else
>   last_branch = if_tree(equal(subr_to_int(var), lc), new_call, 
> last_branch);
> -
> -  if (return_deref && s > 0)
> -return_deref = return_deref->clone(mem_ctx, NULL);
> }
> if (last_branch)
>ir->insert_before(last_branch);
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 01/10] tgsi: add MEMBAR opcode to handle memoryBarrier* GLSL intrinsics

2016-01-19 Thread Roland Scheidegger
Am 19.01.2016 um 18:26 schrieb Ilia Mirkin:
> This is designed to map the GLSL intrinsics. Should one have a desire
> to also support d3d11, one could figure out what the overlap is and
> rejigger the arguments so that both sets of desires are expressible. I
> glanced at SM5 sync before doing this, and TBH I couldn't really make
> sense of it:
> 
> https://urldefense.proofpoint.com/v2/url?u=https-3A__msdn.microsoft.com_en-2Dus_library_windows_desktop_hh447241-28v-3Dvs.85-29.aspx=BQIFaQ=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs=Vjtt0vs_iqoI31UfJxBl7yv9I2FeiaeAYgMTLKRBc_I=SMet2U2OIJ5oy5jQStnU-uILuZtC-MlPv4zHcWrYWVQ=xCCXURzgP9o6ovwd20TIT_Xmi1Bvv56cA4H6B1ItAIg=
>  
> 
> If you can interpret it and make concrete recommendations for change,
> I'm happy to accommodate.
I can't make all that much sense out of it neither (I don't even see how
the mapping is from the HLSL functions).
Albeit the thread group and shared memory sync seem close. I suppose for
shader storage buffers, atomic buffers, and images d3d11 would use all
the same sync, as it is all UAV. Albeit it can distinguish there from
per-group or global sync (the former more looks like TGSI_OPCODE_BARRIER?).
Anyway, I don't have any objections to this.

Roland



> 
> On Tue, Jan 19, 2016 at 12:04 PM, Roland Scheidegger  
> wrote:
>> I am actually wondering how well that would work for d3d11.
>> d3d11 just has AllMemoryBarrier, DeviceMemoryBarrier plus
>> GroupMemoryBarrier - and for each of them also a "WithGroupSync"
>> version. Hmm.
>>
>> Roland
>>
>> Am 19.01.2016 um 03:30 schrieb Ilia Mirkin:
>>> Signed-off-by: Ilia Mirkin 
>>> Reviewed-by: Marek Olšák  (v1)
>>>
>>> v1 -> v2: add defines for the various bits
>>> ---
>>>  src/gallium/auxiliary/tgsi/tgsi_info.c |  2 +-
>>>  src/gallium/docs/source/tgsi.rst   | 17 +
>>>  src/gallium/include/pipe/p_shader_tokens.h |  7 ++-
>>>  3 files changed, 24 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
>>> b/src/gallium/auxiliary/tgsi/tgsi_info.c
>>> index b270dd7..46b296f 100644
>>> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
>>> @@ -149,7 +149,7 @@ static const struct tgsi_opcode_info 
>>> opcode_info[TGSI_OPCODE_LAST] =
>>> { 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
>>> { 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
>>> { 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
>>> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 112 },  /* removed */
>>> +   { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR },
>>> { 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
>>> { 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
>>> { 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
>>> diff --git a/src/gallium/docs/source/tgsi.rst 
>>> b/src/gallium/docs/source/tgsi.rst
>>> index 7810a3e..489cbb0 100644
>>> --- a/src/gallium/docs/source/tgsi.rst
>>> +++ b/src/gallium/docs/source/tgsi.rst
>>> @@ -2372,6 +2372,23 @@ programs.
>>>the program.  Results are unspecified if any of the remaining
>>>threads terminates or never reaches an executed BARRIER instruction.
>>>
>>> +.. opcode:: MEMBAR - Memory barrier
>>> +
>>> +  ``MEMBAR type``
>>> +
>>> +  This opcode waits for the completion of all memory accesses based on
>>> +  the type passed in. The type is an immediate bitfield with the following
>>> +  meaning:
>>> +
>>> +  Bit 0: Shader storage buffers
>>> +  Bit 1: Atomic buffers
>>> +  Bit 2: Images
>>> +  Bit 3: Shared memory
>>> +  Bit 4: Thread group
>>> +
>>> +  These may be passed in in any combination. An implementation is free to 
>>> not
>>> +  distinguish between these as it sees fit. However these map to all the
>>> +  possibilities made available by GLSL.
>>>
>>>  .. _atomopcodes:
>>>
>>> diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
>>> b/src/gallium/include/pipe/p_shader_tokens.h
>>> index f300207..6539017 100644
>>> --- a/src/gallium/include/pipe/p_shader_tokens.h
>>> +++ b/src/gallium/include/pipe/p_shader_tokens.h
>>> @@ -420,7 +420,7 @@ struct tgsi_property_data {
>>>  #define TGSI_OPCODE_FSLT110
>>>  #define TGSI_OPCODE_FSNE111
>>>
>>> -/* gap */
>>> +#define TGSI_OPCODE_MEMBAR  112
>>>  #define TGSI_OPCODE_CALLNZ  113
>>>  /* gap */
>>>  #define TGSI_OPCODE_BREAKC  115
>>> @@ -744,6 +744,11 @@ struct tgsi_instruction_memory
>>> unsigned Padding   : 29;
>>>  };
>>>
>>> +#define TGSI_MEMBAR_SHADER_BUFFER (1 << 0)
>>> +#define TGSI_MEMBAR_ATOMIC_BUFFER (1 << 1)
>>> +#define TGSI_MEMBAR_SHADER_IMAGE  (1 << 2)
>>> +#define TGSI_MEMBAR_SHARED(1 << 3)
>>> +#define TGSI_MEMBAR_THREAD_GROUP  (1 << 4)
>>>
>>>  #ifdef __cplusplus
>>>  }
>>>
>>

___

Re: [Mesa-dev] [PATCH] gallium: make image views non-persistent objects

2016-01-19 Thread Marek Olšák
On Tue, Jan 19, 2016 at 4:42 PM, Nicolai Hähnle  wrote:
> On 18.01.2016 22:08, Ilia Mirkin wrote:
>>
>> Make them akin to shader buffers, with no refcounting/etc. Just used to
>> pass data about the bound image in ->set_shader_images.
>>
>> Signed-off-by: Ilia Mirkin 
>> ---
>>
>> I don't really see a reason why these were refcounted objects. It seems
>> like
>> it would be convenient to make these line up with shader buffers, so
>> that's
>> what I've done here.
>>
>> Please let me know if I'm missing something.
>
>
> I haven't thought about this much, but at least Radeon does quite a bit of
> work in create_sampler_view.
>
> Since everything boils down to the same hardware resource descriptors in the
> end, I'd expect the same to happen for a create_image_view. I believe we'll
> want a create_image_view which ends up calling code that is shared with
> create_sampler_view.
>
> So make that a vote against this change from me.
>
> Come to think of it, from a Radeon perspective I'm not sure why there is a
> separate pipe_image_view structure in the first place (other than perhaps
> reducing confusion about which combination of fields make sense).

Despite what radeon does, I had told Ilia that I would be okay with this.

It depends on st/mesa - can it cache existing views for future use? If
not, views aren't so useful.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: fix discard-only fragment shaders

2016-01-19 Thread Nicolai Hähnle
From: Nicolai Hähnle 

When a fragment shader is used that has no outputs but does conditional
discard (KILL_IF), all fragments are killed without this patch.

By comparing various register settings, my conclusion is that the exec mask
is either not properly forwarded to the DB by NULL exports or ends up being
unused, at least when there is _only_ a NULL export (the ISA documentation
claims that NULL exports can be used to override a previously exported exec
mask).

Of the various approaches I have tried to work around the problem, this one
seems to be the least invasive one.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93761
---
I've condensed the bug reporter's test case to a Piglit test that I will
send out soon.

This should probably go to stable as well, but won't apply as is because of
the many recent changes to fragment shader export handling. I will follow
this mail up with a version that applies to 11.1 and that I've confirmed
fixes the problem there as well.

 src/gallium/drivers/radeonsi/si_state_shaders.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 8ff70b4..0f0cbb2 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -456,6 +456,7 @@ static void si_shader_ps(struct si_shader *shader)
   info->writes_samplemask ? V_028710_SPI_SHADER_32_ABGR :
   info->writes_stencil ? V_028710_SPI_SHADER_32_GR :
   info->writes_z ? V_028710_SPI_SHADER_32_R :
+  (info->uses_kill && !info->num_outputs) ? 
V_028710_SPI_SHADER_32_R :
   V_028710_SPI_SHADER_ZERO);
 
si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, 
spi_shader_col_format);
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] i965: Provide sse2 version for rgba8 <-> bgra8 swizzle

2016-01-19 Thread Matt Turner
On Tue, Jan 19, 2016 at 3:05 PM, Roland Scheidegger  wrote:
> Ping?

Yesterday was a holiday in the US.

I'm planning to review this.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/vec4: Use UW type for multiply into accumulator on GEN8+

2016-01-19 Thread Jason Ekstrand
On Tue, Jan 19, 2016 at 12:57 PM, Matt Turner  wrote:

> On Sat, Jan 16, 2016 at 7:31 PM, Jason Ekstrand 
> wrote:
> >
> > On Jan 16, 2016 5:56 PM, "Matt Turner"  wrote:
> >>
> >> On Thu, Jan 14, 2016 at 12:27 PM, Matt Turner 
> wrote:
> >> > On Thu, Jan 14, 2016 at 12:08 PM, Jason Ekstrand <
> ja...@jlekstrand.net>
> >> > wrote:
> >> >> BDW adds the following restriction: "When multiplying DW x DW, the
> dst
> >> >> cannot be accumulator."
> >> >> ---
> >> >>  src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 6 +-
> >> >>  1 file changed, 5 insertions(+), 1 deletion(-)
> >> >>
> >> >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> >> >> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> >> >> index c228743..b2335bd 100644
> >> >> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> >> >> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
> >> >> @@ -1069,7 +1069,11 @@ vec4_visitor::nir_emit_alu(nir_alu_instr
> *instr)
> >> >> case nir_op_umul_high: {
> >> >>struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
> >> >>
> >> >> -  emit(MUL(acc, op[0], op[1]));
> >> >> +  if (devinfo->gen >=8)
> >> >
> >> > Space after >=
> >> >
> >> >> + emit(MUL(acc, op[0], retype(op[1], BRW_REGISTER_TYPE_UW)));
> >> >> +  else
> >> >> + emit(MUL(acc, op[0], op[1]));
> >> >> +
> >> >
> >> > Do the
> >> >
> tests/spec/arb_gpu_shader5/execution/built-in-functions/vs-{i,u}mulExtended*.shader_test
> >> > tests currently fail on BDW with INTEL_DEBUG=vec4? If so, presumably
> >> > this fixes it?
> >
> > It didn't fix anything
>
> That's an aggravatingly ambiguous answer. Did it not fix anything
> because all the tests already passed, or did it not fix anything
> because the tests still fail?
>
> I pulled out my Broadwell to test for myself:
>
> First column is 0a68112~, second is 0a68112~ using INTEL_DEBUG=vec4,
> third is 0a68112 using INTEL_DEBUG=vec4.
>
> vs-imulextended: pass fail pass
> vs-imulextended-nonuniform:  pass fail pass
> vs-imulextended-only-msb:pass fail pass
> vs-imulextended-only-msb-nonuniform: pass fail pass
> vs-umulextended: pass fail pass
> vs-umulextended-nonuniform:  pass fail pass
> vs-umulextended-only-msb:pass fail pass
> vs-umulextended-only-msb-nonuniform: pass fail pass
>
> So the tests I asked you to run did fail without this patch and were
> fixed by this patch.
>

Ok, I see what's going on.  We have no GS variants of those tests on BDW so
of course Jenkins didn't give me a difference thanks to SIMD8 VS.   Sorry
for the confusion; I wasn't paying enough attention.
--Jason


>
> For completeness, after those tests were fixed by this patch, the only
> remaining regressions from INTEL_DEBUG=vec4 on BDW are
>
> spec/arb_shader_draw_parameters/drawid-{indirect-baseinstance,basevertex,vertexid}.
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 00/63] ARB_internalformat_query2 on Mesa and i965 backend

2016-01-19 Thread Nanley Chery
On Tue, Jan 19, 2016 at 05:41:40PM +0100, Eduardo Lima Mitev wrote:
> Hello,
> 
> This is an RFC series adding support for the ARB_internalformat-query2 
> extension:
> 
> https://www.opengl.org/registry/specs/ARB/internalformat_query2.txt
> 
> The corresponding bug is being tracked at:
> 
> https://bugs.freedesktop.org/show_bug.cgi?id=92687
> 
> Why is this an RFC series instead of a formal merge-able patch-set?
> 
> Two reasons. Firstly, we are still polishing rough edges in some patches. 
> However, the support is complete to the best of our knowledge. Most of the 
> final changes we are making are improvements to particular query answers, 
> thus contained inside specific blocks, so they don't affect the structure of 
> the code.
> Secondly, we have been trying to get general feedback, and answers to some 
> doubts we posted on bugzilla, without much success. So maybe an explicit RFC 
> in the mailing list would bring more eyes to it.
> This is a rather large extension, with a long spec wording, certainly 
> difficult to review. So we totally appreciate the effort and brain cycles of 
> whoever takes on this.
> 
> 
> The patch-set is structured as follows:
> 
> * Patches 01 to 10 sets up the stage to query2. It will add a new, generic 
> driver hook that obsoletes QuerySamplesForFormat, which is removed. But it 
> doesn't introduce anything related with query2 yet.
> 
> * Patches 11 to 61 implement the different individual queries from query2 
> extension in the frontend (mesa/main), adding validation and helper functions 
> as needed.

There are a number of patches in this block that access the
ctx->Extensions member directly. The new and safer way to check for
extension support is to use the auto-generated helper functions defined
in main/extensions.h. Given the extension you're interested in with
Name String, GL_, you can see if it is enabled in the given
context by using the function, _mesa_has_(ctx)
(e.g. GL_ARB_shader_image_load_store ->
_mesa_has_ARB_shader_image_load_store(ctx)). This does all the context
checking required to determine extensions support (desktop GL vs GLES,
driver support, and API version requirement).

Regards,
Nanley

> 
> * Patches 62 to 63 activates the extension on i965. Only the queries where 
> the driver has something to return other than default value returned by the 
> frontend, are explicitly added.
> 
> 
> Some implementation notes:
> 
> * All the extension's frontend code is in main/formatquery.c, as it was 
> before for query1. Only that it also handles query2 now.
> 
> * As commented above, a new driver hook 'QueryInternalFormat' was added, 
> replacing the previous one 'QuerySamplesForFormat'.
> 
> * A fallback, generic function _mesa_query_internal_format_default() provides 
> generic implementation and sensible defaults for all queries, for drivers not 
> implementing query2. Backends that only care about answering some queries, 
> can call back this function for the other queries where a generic answer is 
> ok.
> 
> * For all pnames, the frontend code will do generic validation as per the 
> spec: check GL profile, version, extensions.
>   - If the frontend fails basic validation, it will give the corresponding 
> negative answer, depending on the pname, without going to the driver.
>   - If the frontend is fully qualified to provide an answer, it will (i.e, 
> MAX_WIDTH, COLOR_COMPONENTS, etc). Otherwise it will call the driver hook 
> (i.e, INTERNALFORMAT_PREFERRED).
>   - For the cases where the query must return full support, caveat support, 
> or no support; Mesa/main will always call the driver to decide between full 
> or caveat support (and only answer directly in the case of no-support).
> 
> * The last patches in the branch enable support for this extension in i965 
> backend (drivers/dri/i965/brw_formatquery.c). The backend code only handle 
> queries where the answer is affected by driver-specific stuff. But by 
> default, it calls back the frontend function with the default implementations.
> 
> * The 64 bits version of the query introduced by this extension 
> (GetInternalformati64v), was implemented as a wrapper around the 32 bits 
> version. Since only one query really requires the 64 bits API 
> (MAX_COMBINED_DIMENSIONS), we handle that pname as a special case. For the 
> rest of queries, we just forward the call to the default, 32 bits version.
> 
> 
> A git tree of the series can be found at:
> 
> https://github.com/Igalia/mesa/tree/internalformat-query2-rfc
> 
> 
> There is also a branch containing piglit tests for the extension, which my 
> colleague Alejando will send to the piglit mailing list for feedback/review.
> 
> 
> cheers,
> Eduardo (on behalf of the team that worked on this)
> 
> 
> Alejandro Piñeiro (9):
>   mesa: Add dispatch and extension XML for GL_ARB_internalformat_query2
>   mesa/main: not fill mesa_error on
> _mesa_legal_texture_base_format_for_target
>   mesa/formatquery: initial implementation for 

[Mesa-dev] [PATCH] llvmpipe: warn about illegal use of objects in different contexts

2016-01-19 Thread sroland
From: Roland Scheidegger 

Doing that is clearly a bug. We can't quite assert as st/mesa may hit this,
but increase at least visibility of it a bit.
(For the non-refcounted objects it would be illegal too, but we can't detect
that unless we'd store the context ourselves. Plus, those don't tend to cause
random crashes at context or object destruction time... So just sampler views,
surfaces and so targets for now.)
---
 src/gallium/drivers/llvmpipe/lp_state_sampler.c |  9 +
 src/gallium/drivers/llvmpipe/lp_state_so.c  |  9 +
 src/gallium/drivers/llvmpipe/lp_state_surface.c | 15 ++-
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c 
b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 69af38e..32bf9fd 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -129,6 +129,15 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
*/
   pipe_sampler_view_release(pipe,
 >sampler_views[shader][start + i]);
+  /*
+   * Warn if someone tries to set a view created in a different context
+   * (which is why we need the hack above in the first place).
+   * An assert would be better but st/mesa relies on it...
+   */
+  if (views[i] && views[i]->context != pipe) {
+ debug_printf("Illegal setting of sampler_view %d created in another "
+  "context\n", i);
+  }
   pipe_sampler_view_reference(>sampler_views[shader][start + i],
   views[i]);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_so.c 
b/src/gallium/drivers/llvmpipe/lp_state_so.c
index 2af04cd..b2afd6f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_so.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_so.c
@@ -70,6 +70,15 @@ llvmpipe_set_so_targets(struct pipe_context *pipe,
int i;
for (i = 0; i < num_targets; i++) {
   const boolean append = (offsets[i] == (unsigned)-1);
+  /*
+   * Warn if the so target was created in another context.
+   * XXX Not entirely sure if mesa/st may rely on this?
+   * Otherwise should just assert.
+   */
+  if (targets[i] && targets[i]->context != pipe) {
+ debug_printf("Illegal setting of so target with target %d created in "
+   "another context\n", i);
+  }
   pipe_so_target_reference((struct pipe_stream_output_target 
**)>so_targets[i], targets[i]);
   /* If we're not appending then lets set the internal
  offset to what was requested */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c 
b/src/gallium/drivers/llvmpipe/lp_state_surface.c
index c879ba9..b20b9c5 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c
@@ -52,6 +52,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
struct llvmpipe_context *lp = llvmpipe_context(pipe);
 
boolean changed = !util_framebuffer_state_equal(>framebuffer, fb);
+   unsigned i;
 
assert(fb->width <= LP_MAX_WIDTH);
assert(fb->height <= LP_MAX_HEIGHT);
@@ -66,10 +67,22 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
   const struct util_format_description *depth_desc =
  util_format_description(depth_format);
 
+  if (lp->framebuffer.zsbuf && lp->framebuffer.zsbuf->context != pipe) {
+ debug_printf("Illegal setting of fb state with zsbuf created in "
+   "another context\n");
+  }
+  for (i = 0; i < fb->nr_cbufs; i++) {
+ if (lp->framebuffer.cbufs[i] &&
+ lp->framebuffer.cbufs[i]->context != pipe) {
+debug_printf("Illegal setting of fb state with cbuf %d created in "
+  "another context\n", i);
+ }
+  }
+
   util_copy_framebuffer_state(>framebuffer, fb);
 
   if (LP_PERF & PERF_NO_DEPTH) {
-pipe_surface_reference(>framebuffer.zsbuf, NULL);
+ pipe_surface_reference(>framebuffer.zsbuf, NULL);
   }
 
   /*
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 85586] Draw module crashes in LLVM generated code since commit 60ec95fa1e0c42bd42358185970b20c9b81591fa

2016-01-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=85586

Michel Dänzer  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #2 from Michel Dänzer  ---
I think this was fixed long ago.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: add missing explicit_image_format flag to has_layout()

2016-01-19 Thread Timothy Arceri
Fixes piglit regression after fixes to duplicate layout rules.

Previously catching multiple layouts was relying on the code
meant to catch duplicates within a single layout(...), this
change triggers the rules for multiple layouts.

Cc: Mark Janes 
---
 src/glsl/ast_type.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index cf494d9..e0e3311 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -74,6 +74,7 @@ ast_type_qualifier::has_layout() const
   || this->flags.q.row_major
   || this->flags.q.packed
   || this->flags.q.explicit_location
+  || this->flags.q.explicit_image_format
   || this->flags.q.explicit_index
   || this->flags.q.explicit_binding
   || this->flags.q.explicit_offset
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] radeonsi: enable late VS allocation (v2)

2016-01-19 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index f005461..ca32f5c 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3742,8 +3742,25 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 
S_00B31C_CU_EN(0x));
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 
S_00B21C_CU_EN(0x));
-   si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 
S_00B118_CU_EN(0x));
-   si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 
S_00B11C_LIMIT(0));
+
+   if (sscreen->b.info.num_good_compute_units /
+   (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 
4) {
+   /* Too few available compute units per SH. Disallowing
+* VS to run on CU0 could hurt us more than late VS
+* allocation would help.
+*
+* LATE_ALLOC_VS = 2 is the highest safe number.
+*/
+   si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 
S_00B118_CU_EN(0x));
+   si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 
S_00B11C_LIMIT(2));
+   } else {
+   /* Set LATE_ALLOC_VS == 31. It should be less than
+* the number of scratch waves. VS can't run on CU0.
+*/
+   si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 
S_00B118_CU_EN(0xfffe));
+   si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 
S_00B11C_LIMIT(31));
+   }
+
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 
S_00B01C_CU_EN(0x));
}
 
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] radeonsi: allow tessellation on CU1 and ES on CU0

2016-01-19 Thread Marek Olšák
From: Marek Olšák 

We don't use on-chip GS, so it's not required to reserve CU1 for ES.
---
 src/gallium/drivers/radeonsi/si_state.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 4b674ed..f005461 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3738,9 +3738,9 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
 
if (sctx->b.chip_class >= CIK) {
-   si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 
S_00B51C_CU_EN(0xfffc));
+   si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 
S_00B51C_CU_EN(0xfffe));
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
-   si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 
S_00B31C_CU_EN(0xfffe));
+   si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 
S_00B31C_CU_EN(0x));
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 
S_00B21C_CU_EN(0x));
si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 
S_00B118_CU_EN(0x));
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 
S_00B11C_LIMIT(0));
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] winsys/amdgpu: compute num_good_compute_units correctly

2016-01-19 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 15 +--
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 9835024..69df363 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -266,17 +266,12 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
ws->info.r600_virtual_address = TRUE;
ws->info.r600_has_dma = dma.available_rings != 0;
 
-   /* Guess what the maximum compute unit number is by looking at the mask
-* of enabled CUs.
-*/
+   /* Get the number of good compute units. */
+   ws->info.num_good_compute_units = 0;
for (i = 0; i < ws->info.max_se; i++)
-  for (j = 0; j < ws->info.max_sh_per_se; j++) {
- unsigned max = util_last_bit(ws->amdinfo.cu_bitmap[i][j]);
-
- if (ws->info.num_good_compute_units < max)
-ws->info.num_good_compute_units = max;
-  }
-   ws->info.num_good_compute_units *= ws->info.max_se * ws->info.max_sh_per_se;
+  for (j = 0; j < ws->info.max_sh_per_se; j++)
+ ws->info.num_good_compute_units +=
+util_bitcount(ws->amdinfo.cu_bitmap[i][j]);
 
memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
   sizeof(ws->amdinfo.gb_tile_mode));
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] gallium/radeon: rename max_compute_units -> num_good_compute_units

2016-01-19 Thread Marek Olšák
From: Marek Olšák 

radeon sets this correctly, but not amdgpu
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 4 ++--
 src/gallium/drivers/radeon/radeon_winsys.h| 2 +-
 src/gallium/drivers/radeonsi/si_compute.c | 4 ++--
 src/gallium/drivers/radeonsi/si_pipe.c| 2 +-
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 6 +++---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 4 ++--
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index e926f56..4c066c1 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -705,7 +705,7 @@ static int r600_get_compute_param(struct pipe_screen 
*screen,
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
if (ret) {
uint32_t *max_compute_units = ret;
-   *max_compute_units = rscreen->info.max_compute_units;
+   *max_compute_units = 
rscreen->info.num_good_compute_units;
}
return sizeof(uint32_t);
 
@@ -973,7 +973,7 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 
20));
printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 
20));
printf("max_sclk = %i\n", rscreen->info.max_sclk);
-   printf("max_compute_units = %i\n", 
rscreen->info.max_compute_units);
+   printf("num_good_compute_units = %i\n", 
rscreen->info.num_good_compute_units);
printf("max_se = %i\n", rscreen->info.max_se);
printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index ad30474..2e5caa6 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -251,7 +251,7 @@ struct radeon_info {
 uint64_tgart_size;
 uint64_tvram_size;
 uint32_tmax_sclk;
-uint32_tmax_compute_units;
+uint32_tnum_good_compute_units;
 uint32_tmax_se;
 uint32_tmax_sh_per_se;
 
diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 5a08cbf..6ef6eee 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -61,7 +61,7 @@ static void init_scratch_buffer(struct si_context *sctx, 
struct si_compute *prog
 
/* Compute the scratch buffer size using the maximum number of waves.
 * This way we don't need to recompute it for each kernel launch. */
-   unsigned scratch_waves = 32 * sctx->screen->b.info.max_compute_units;
+   unsigned scratch_waves = 32 * 
sctx->screen->b.info.num_good_compute_units;
for (i = 0; i < program->shader.binary.global_symbol_count; i++) {
unsigned offset =
program->shader.binary.global_symbol_offsets[i];
@@ -402,7 +402,7 @@ static void si_launch_grid(
 
num_waves_for_scratch =
MIN2(num_waves_for_scratch,
-32 * sctx->screen->b.info.max_compute_units);
+32 * sctx->screen->b.info.num_good_compute_units);
si_pm4_set_reg(pm4, R_00B860_COMPUTE_TMPRING_SIZE,
/* The maximum value for WAVES is 32 * num CU.
 * If you program this value incorrectly, the GPU will hang if
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index f6ff4a8..d3130bc 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -208,7 +208,7 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
 * this for non-cs shaders.  Using the wrong value here can result in
 * GPU lockups, but the maximum value seems to always work.
 */
-   sctx->scratch_waves = 32 * sscreen->b.info.max_compute_units;
+   sctx->scratch_waves = 32 * sscreen->b.info.num_good_compute_units;
 
 #if HAVE_LLVM >= 0x0306
/* Initialize LLVM TargetMachine */
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 39d3aa4..9835024 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -273,10 +273,10 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
   for (j = 0; j < ws->info.max_sh_per_se; j++) {
  unsigned max = util_last_bit(ws->amdinfo.cu_bitmap[i][j]);
 
- if (ws->info.max_compute_units < max)
-

[Mesa-dev] [PATCH 2/2] radeonsi: implement SAMPLEPOS system value without a constant buffer load

2016-01-19 Thread Marek Olšák
From: Marek Olšák 

We always get per-sample input position.
---
 src/gallium/drivers/radeonsi/si_shader.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 5c536f8..d788c41 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1119,9 +1119,20 @@ static void declare_system_value(
value = get_sample_id(radeon_bld);
break;
 
-   case TGSI_SEMANTIC_SAMPLEPOS:
-   value = load_sample_position(radeon_bld, 
get_sample_id(radeon_bld));
+   case TGSI_SEMANTIC_SAMPLEPOS: {
+   LLVMValueRef pos[4] = {
+   LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_X_FLOAT),
+   LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_Y_FLOAT),
+   lp_build_const_float(gallivm, 0),
+   lp_build_const_float(gallivm, 0)
+   };
+   pos[0] = lp_build_emit_llvm_unary(_bld->soa.bld_base,
+ TGSI_OPCODE_FRC, pos[0]);
+   pos[1] = lp_build_emit_llvm_unary(_bld->soa.bld_base,
+ TGSI_OPCODE_FRC, pos[1]);
+   value = lp_build_gather_values(gallivm, pos, 4);
break;
+   }
 
case TGSI_SEMANTIC_SAMPLEMASK:
/* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Conditionalize atan2 math.

2016-01-19 Thread Matt Turner
I wonder how you came across that.

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: add missing explicit_image_format flag to has_layout()

2016-01-19 Thread Mark Janes
This patch fixes
arb_shader_image_load_store.compiler.declaration-format-qualifier-duplicate
tests.  However, it regresses 17 other arb_shader_image_load_store other
tests with:

Failed to compile fragment shader: 0:20(1): error: storage qualifiers must come 
after precise, invariant, interpolation, layout and auxiliary storage qualifiers

-Mark

Timothy Arceri  writes:

> Fixes piglit regression after fixes to duplicate layout rules.
>
> Previously catching multiple layouts was relying on the code
> meant to catch duplicates within a single layout(...), this
> change triggers the rules for multiple layouts.
>
> Cc: Mark Janes 
> ---
>  src/glsl/ast_type.cpp | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
> index cf494d9..e0e3311 100644
> --- a/src/glsl/ast_type.cpp
> +++ b/src/glsl/ast_type.cpp
> @@ -74,6 +74,7 @@ ast_type_qualifier::has_layout() const
>|| this->flags.q.row_major
>|| this->flags.q.packed
>|| this->flags.q.explicit_location
> +  || this->flags.q.explicit_image_format
>|| this->flags.q.explicit_index
>|| this->flags.q.explicit_binding
>|| this->flags.q.explicit_offset
> -- 
> 2.4.3
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: add missing explicit_image_format flag to has_layout()

2016-01-19 Thread Timothy Arceri
On Tue, 2016-01-19 at 16:28 -0800, Mark Janes wrote:
> This patch fixes
> arb_shader_image_load_store.compiler.declaration-format-qualifier
> -duplicate
> tests.  However, it regresses 17 other arb_shader_image_load_store
> other
> tests with:
> 
> Failed to compile fragment shader: 0:20(1): error: storage qualifiers
> must come after precise, invariant, interpolation, layout and
> auxiliary storage qualifiers

Yes it does :P Seems I only ran the compile tests. However this time it
looks like the piglit tests are doing the wrong thing and the change
has highlighted it.

> 
> -Mark
> 
> Timothy Arceri  writes:
> 
> > Fixes piglit regression after fixes to duplicate layout rules.
> > 
> > Previously catching multiple layouts was relying on the code
> > meant to catch duplicates within a single layout(...), this
> > change triggers the rules for multiple layouts.
> > 
> > Cc: Mark Janes 
> > ---
> >  src/glsl/ast_type.cpp | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
> > index cf494d9..e0e3311 100644
> > --- a/src/glsl/ast_type.cpp
> > +++ b/src/glsl/ast_type.cpp
> > @@ -74,6 +74,7 @@ ast_type_qualifier::has_layout() const
> >|| this->flags.q.row_major
> >|| this->flags.q.packed
> >|| this->flags.q.explicit_location
> > +  || this->flags.q.explicit_image_format
> >|| this->flags.q.explicit_index
> >|| this->flags.q.explicit_binding
> >|| this->flags.q.explicit_offset
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] nir: Lower ldexp to arithmetic.

2016-01-19 Thread Kenneth Graunke
This is a port of Matt's GLSL IR lowering pass to NIR.  It's required
because we translate SPIR-V directly to NIR, bypassing GLSL IR.

I haven't introduced a lower_ldexp flag, as I believe all current NIR
consumers would set the flag.  i965 wants this, vc4 doesn't implement
this feature, and st_glsl_to_tgsi currently lowers ldexp
unconditionally anyway.

Signed-off-by: Kenneth Graunke 
---
 src/glsl/nir/nir_opt_algebraic.py | 63 +++
 1 file changed, 63 insertions(+)

diff --git a/src/glsl/nir/nir_opt_algebraic.py 
b/src/glsl/nir/nir_opt_algebraic.py
index 7745b76..0976ed6 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -1,4 +1,5 @@
 #! /usr/bin/env python
+# -*- encoding: utf-8 -*-
 #
 # Copyright (C) 2014 Intel Corporation
 #
@@ -265,6 +266,68 @@ for op in ['flt', 'fge', 'feq', 'fne',
('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
]
 
+def ldexp_to_arith(x, exp):
+   """
+   Translates
+  ldexp x exp
+   into
+
+  extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
+  resulting_biased_exp = extracted_biased_exp + exp;
+
+  if (resulting_biased_exp < 1) {
+ return copysign(0.0, x);
+  }
+
+  return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
+ lshift(i2u(resulting_biased_exp), exp_shift));
+
+   which we can't actually implement as such, since NIR doesn't have
+   vectorized if-statements. We actually implement it without branches
+   using conditional-select:
+
+  extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
+  resulting_biased_exp = extracted_biased_exp + exp;
+
+  is_not_zero_or_underflow = gequal(resulting_biased_exp, 1);
+  x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x));
+  resulting_biased_exp = csel(is_not_zero_or_underflow,
+  resulting_biased_exp, 0);
+
+  return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
+ lshift(i2u(resulting_biased_exp), exp_shift));
+   """
+
+   sign_mask = 0x8000
+   exp_shift = 23
+   exp_width = 8
+
+   # Extract the biased exponent from .
+   extracted_biased_exp = ('ushr', ('iabs', x), exp_shift)
+   resulting_biased_exp = ('iadd', extracted_biased_exp, exp)
+
+   # Test if result is ±0.0, subnormal, or underflow by checking if the
+   # resulting biased exponent would be less than 0x1. If so, the result is
+   # 0.0 with the sign of x. (Actually, invert the conditions so that
+   # immediate values are the second arguments, which is better for i965)
+   zero_sign_x = ('iand', x, sign_mask)
+
+   is_not_zero_or_underflow = ('ige', resulting_biased_exp, 0x1)
+
+   # We could test for overflows by checking if the resulting biased exponent
+   # would be greater than 0xFE. Turns out we don't need to because the GLSL
+   # spec says:
+   #
+   #"If this product is too large to be represented in the
+   # floating-point type, the result is undefined."
+
+   return ('bitfield_insert',
+   ('bcsel', is_not_zero_or_underflow, x, zero_sign_x),
+   ('bcsel', is_not_zero_or_underflow, resulting_biased_exp, 0),
+   exp_shift, exp_width)
+
+optimizations += [(('ldexp', 'x', 'exp'), ldexp_to_arith('x', 'exp'))]
+
 # This section contains "late" optimizations that should be run after the
 # regular optimizations have finished.  Optimizations should go here if
 # they help code generation but do not necessarily produce code that is
-- 
2.7.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] nir: Let nir_opt_algebraic rules contain unsigned constants > INT_MAX.

2016-01-19 Thread Kenneth Graunke
struct.pack('i', val) interprets `val` as a signed integer, and dies
if `val` > INT_MAX.  For larger constants, we need to use 'I' which
interprets it as an unsigned value.

This patch makes us use 'I' for all values >= 0, and 'i' for negative
values.  This should work in all cases.

Signed-off-by: Kenneth Graunke 
---
 src/glsl/nir/nir_algebraic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/nir/nir_algebraic.py b/src/glsl/nir/nir_algebraic.py
index a30652f..14c0e82 100644
--- a/src/glsl/nir/nir_algebraic.py
+++ b/src/glsl/nir/nir_algebraic.py
@@ -108,7 +108,7 @@ class Constant(Value):
   if isinstance(self.value, (bool)):
  return 'NIR_TRUE' if self.value else 'NIR_FALSE'
   if isinstance(self.value, (int, long)):
- return hex(struct.unpack('I', struct.pack('i', self.value))[0])
+ return hex(struct.unpack('I', struct.pack('i' if self.value < 0 else 
'I', self.value))[0])
   elif isinstance(self.value, float):
  return hex(struct.unpack('I', struct.pack('f', self.value))[0])
   else:
-- 
2.7.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: add missing explicit_image_format flag to has_layout()

2016-01-19 Thread Francisco Jerez
Timothy Arceri  writes:

> Fixes piglit regression after fixes to duplicate layout rules.
>
> Previously catching multiple layouts was relying on the code
> meant to catch duplicates within a single layout(...), this
> change triggers the rules for multiple layouts.
>
> Cc: Mark Janes 
> ---
>  src/glsl/ast_type.cpp | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
> index cf494d9..e0e3311 100644
> --- a/src/glsl/ast_type.cpp
> +++ b/src/glsl/ast_type.cpp
> @@ -74,6 +74,7 @@ ast_type_qualifier::has_layout() const
>|| this->flags.q.row_major
>|| this->flags.q.packed
>|| this->flags.q.explicit_location
> +  || this->flags.q.explicit_image_format
>|| this->flags.q.explicit_index
>|| this->flags.q.explicit_binding
>|| this->flags.q.explicit_offset

Looks reasonable to me,
Reviewed-by: Francisco Jerez 

> -- 
> 2.4.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radeonsi: add max waves / CU to shader stats

2016-01-19 Thread eocallaghan

This series is,

Reviewed-by: Edward O'Callaghan 

On 2016-01-20 12:39, Marek Olšák wrote:

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c | 33 
+---

 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c
b/src/gallium/drivers/radeonsi/si_shader.c
index 0c5fd32..5c536f8 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3994,12 +3994,39 @@ static void si_shader_dump_stats(struct
si_screen *sscreen,
 struct pipe_debug_callback *debug,
 unsigned processor)
 {
+   /* Compute the maximum number of waves.
+* The pixel shader additionally allocates 1 - 48 blocks of LDS
+* depending on non-compile times parameters.
+*/
+   unsigned ps_lds_size = processor == TGSI_PROCESSOR_FRAGMENT ? 1 : 0;
+   unsigned lds_size = ps_lds_size + conf->lds_size;
+   unsigned max_waves = 10;
+
+   if (conf->num_sgprs) {
+   if (sscreen->b.chip_class >= VI)
+   max_waves = MIN2(max_waves, 800 / conf->num_sgprs);
+   else
+   max_waves = MIN2(max_waves, 512 / conf->num_sgprs);
+   }
+
+   if (conf->num_vgprs)
+   max_waves = MIN2(max_waves, 256 / conf->num_vgprs);
+
+   if (lds_size)
+   max_waves = MIN2(max_waves, 128 / lds_size);
+
if (r600_can_dump_shader(>b, processor)) {
fprintf(stderr, "*** SHADER STATS ***\n"
-   "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d 
blocks\n"
-   "Scratch: %d bytes per wave\n\n",
+   "SGPRS: %d\n"
+   "VGPRS: %d\n"
+   "Code Size: %d bytes\n"
+   "LDS: %d blocks\n"
+   "Scratch: %d bytes per wave\n"
+   "Max waves / CU: %d\n"
+   "\n",
conf->num_sgprs, conf->num_vgprs, code_size,
-   conf->lds_size, conf->scratch_bytes_per_wave);
+   conf->lds_size, conf->scratch_bytes_per_wave,
+   max_waves);
}

pipe_debug_message(debug, SHADER_INFO,


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] glapi: Build glapi_gentable.c only on Darwin

2016-01-19 Thread Andreas Boll
Thanks for reviewing!

Oh, I was motivated to do this patch to remove an unneeded symbol on
linux as we started tracking libGL's exported symbols in Debian.
As a side effect it accelerates the build a bit and reduces the size
of libGL significantly.

From looking at these threads [1][2] I found out that Arlie's patch
was reviewed but not pushed due to missing testing on OS X.
His patch would still make sense for OS X. I'll ping Jeremy on that thread.

Anyway I'll add the reported-by line.

Thanks,
Andreas

[1] https://patchwork.freedesktop.org/patch/59617/
[2] https://patchwork.freedesktop.org/patch/59812/

2016-01-13 20:50 GMT+01:00 Matt Turner :
> glxgears still works for me, and libGL goes from 4.2M to 3.3M.
>
> Reviewed-by: Matt Turner 
>
> We should also include some mention of Arlie's contribution, since he
> identified this and sent the initial patch:
>
> Reported-by: Arlie Davis 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] mesa: Reduce libGL.so binary size by about 15%

2016-01-19 Thread Andreas Boll
Jeremy, did you have a chance to test this patch?
This patch would be still useful for OS X. For non-OS X this patch [1]
reduces the size of libGL.so further more.

Thanks,
Andreas

[1] https://patchwork.freedesktop.org/patch/70372/

2015-09-28 19:46 GMT+02:00 Jeremy Huddleston Sequoia :
> I'll give it a go.
>
> It is still needed on OS X (and I think Windows).  It's just not used by the 
> X server any more.
>
> --Jeremy
>
>> On Sep 28, 2015, at 10:26, Arlie Davis  wrote:
>>
>> I tried building Mesa on OS X, but I'm not nearly as familiar with 
>> development on OS X, so I wasn't able to get it to build.  If someone could 
>> build / test that on OS X, it would certainly give more confidence in its 
>> correctness.  I *think* the generated code is correct, but you know how that 
>> is.
>>
>> If there is no need for this on OS X any longer, then the best thing might 
>> be to remove it entirely.
>>
>> On Sat, Sep 26, 2015 at 5:56 PM, Jeremy Huddleston Sequoia 
>>  wrote:
>> Reviewing diffs of code that generates code is always ick. =(
>>
>> This *looks* right to me, but has it been given a beating for correctness?  
>> If not, let me know, and I'll give it a whirl when I have some cycles.
>>
>> Reviewed-by: Jeremy Huddleston Sequoia 
>>
>> ---
>>
>> You're right that this used to be use in xserver as well, but that was 
>> removed in:
>>
>> commit e61e19959d9138d5b81b1f25b7aa3e257918170d
>> Author: Adam Jackson 
>> Date:   Tue Dec 3 13:45:43 2013 -0500
>>
>> xquartz/glx: Convert to non-glapi dispatch
>>
>> CGL doesn't have anything like glXGetProcAddress, and the old code just
>> called down to dlsym in any case.  It's a little mind-warping since
>> dlopening a framework actually loads multiple dylibs, but that's just
>> how OSX rolls.
>>
>> Signed-off-by: Adam Jackson 
>> Reviewed-by: Jeremy Huddleston Sequoia 
>>
>>
>> > On Sep 22, 2015, at 15:55, Ian Romanick  wrote:
>> >
>> > On 09/17/2015 03:19 PM, Arlie Davis wrote:
>> >> Ok, here's v2 of the change, with the suggested edits.
>> >
>> > So... I think this code is fine, and I admire the effort.  I have a
>> > couple concerns.
>> >
>> > 1. We have no way to test this, so it's quite possible something was 
>> > broken.
>> >
>> > 2. This function is only used in the OSX builds.  Jeremy is the
>> > maintainer for those builds, so I've added him to the CC list.
>> >
>> > For every non-OSX build, we should just stop linking
>> > src/mapi/glapi/glapi_gentable.c.  I thought maybe the X sever used it,
>> > but I couldn't find any evidence of that.
>> >
>> > If this is still a viable route, I have a few suggestions of follow-on
>> > patches...
>> >
>> > I guess this patch is
>> >
>> > Reviewed-by: Ian Romanick 
>> >
>> > but I really think we need to get Jeremy's approval before pushing it.
>> >
>> >> From 5f393faa058f453408dfc640eecae3fe6335dfed Mon Sep 17 00:00:00 2001
>> >> From: Arlie Davis 
>> >> Date: Tue, 15 Sep 2015 09:58:34 -0700
>> >> Subject: [PATCH] This patch significantly reduces the size of the libGL.so
>> >> binary. It does not change the (externally visible) behavior of libGL.so 
>> >> at
>> >> all.
>> >>
>> >> gl_gentable.py generates a function, _glapi_create_table_from_handle.
>> >> This function allocates a large dispatch table, consisting of 1300 or so
>> >> function pointers, and fills this dispatch table by doing symbol lookups
>> >> on a given shared library.  Previously, gl_gentable.py would generate a
>> >> single, very large _glapi_create_table_from_handle function, with a short
>> >> cluster of lines for each entry point (function).  The idiom it generates
>> >> was a NULL check, a call to snprintf, a call to dlsym / GetProcAddress,
>> >> and then a store into the dispatch table.  Since this function processes
>> >> a large number of entry points, this code is duplicated many times over.
>> >>
>> >> We can encode the same information much more compactly, by using a lookup
>> >> table.  The previous total size of _glapi_create_table_from_handle on x64
>> >> was 125848 bytes.  By using a lookup table, the size of
>> >> _glapi_create_table_from_handle (and the related lookup tables) is reduced
>> >> to 10840 bytes.  In other words, this enormous function is reduced by 91%.
>> >> The size of the entire libGL.so binary (measured when stripped) itself 
>> >> drops
>> >> by 15%.
>> >>
>> >> So the purpose of this change is to reduce the binary size, which frees up
>> >> disk space, memory, etc.
>> >> ---
>> >> src/mapi/glapi/gen/gl_gentable.py | 57 
>> >> ---
>> >> 1 file changed, 41 insertions(+), 16 deletions(-)
>> >>
>> >> diff --git a/src/mapi/glapi/gen/gl_gentable.py 
>> >> b/src/mapi/glapi/gen/gl_gentable.py
>> >> index 1b3eb72..7cd475a 100644
>> >> --- 

[Mesa-dev] Mesa 11.2.0 release plan

2016-01-19 Thread Emil Velikov
Hi all,

It's time to get the idea of where were are wrt the next Mesa release
- 11.2.0. As usual here is the normal plan, although everyone
interested in more than welcome to chime in

February 19th 2016 - Feature freeze/Release candidate 1
February 26th 2016 - Release candidate 2
March 04th 2016 - Release candidate 3
March 11th 2016 - Release candidate 4/Mesa 11.2.0

Thus we have a month to get any more new features in.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 03/10] st/mesa: add PROGRAM_BUFFER, stop using gl_register_file

2016-01-19 Thread Ilia Mirkin
Because I got a ton of errors about how PROGRAM_BUFFER was out of
bounds for the enum. I could add them to the enum, of course, but I
think that enum was about ARB_vp/fp things?

  -ilia

On Tue, Jan 19, 2016 at 9:23 AM, Brian Paul  wrote:
> Can you say why you're removing gl_register_file?  Enums are so much nicer
> when debugging.
>
> -Brian
>
>
> On 01/18/2016 07:30 PM, Ilia Mirkin wrote:
>>
>> Signed-off-by: Ilia Mirkin 
>> Reviewed-by: Marek Olšák 
>> ---
>>   src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 21 +++--
>>   1 file changed, 11 insertions(+), 10 deletions(-)
>>
>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> index d424e3b..055d187 100644
>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> @@ -53,6 +53,7 @@
>>
>>
>>   #define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
>> +#define PROGRAM_BUFFER(PROGRAM_FILE_MAX + 1)
>>   #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |\
>>  (1 << PROGRAM_CONSTANT) | \
>>  (1 << PROGRAM_UNIFORM))
>> @@ -69,7 +70,7 @@ static int swizzle_for_size(int size);
>>*/
>>   class st_src_reg {
>>   public:
>> -   st_src_reg(gl_register_file file, int index, const glsl_type *type)
>> +   st_src_reg(int file, int index, const glsl_type *type)
>>  {
>> this->file = file;
>> this->index = index;
>> @@ -88,7 +89,7 @@ public:
>> this->is_double_vertex_input = false;
>>  }
>>
>> -   st_src_reg(gl_register_file file, int index, int type)
>> +   st_src_reg(int file, int index, int type)
>>  {
>> this->type = type;
>> this->file = file;
>> @@ -104,7 +105,7 @@ public:
>> this->is_double_vertex_input = false;
>>  }
>>
>> -   st_src_reg(gl_register_file file, int index, int type, int index2D)
>> +   st_src_reg(int file, int index, int type, int index2D)
>>  {
>> this->type = type;
>> this->file = file;
>> @@ -138,7 +139,7 @@ public:
>>
>>  explicit st_src_reg(st_dst_reg reg);
>>
>> -   gl_register_file file; /**< PROGRAM_* from Mesa */
>> +   int file; /**< PROGRAM_* from Mesa */
>>  int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc.
>> */
>>  int index2D;
>>  GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
>> @@ -203,7 +204,7 @@ public:
>>
>>  explicit st_dst_reg(st_src_reg reg);
>>
>> -   gl_register_file file; /**< PROGRAM_* from Mesa */
>> +   int file; /**< PROGRAM_* from Mesa */
>>  int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc.
>> */
>>  int index2D;
>>  int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
>> @@ -273,14 +274,14 @@ public:
>>
>>   class variable_storage : public exec_node {
>>   public:
>> -   variable_storage(ir_variable *var, gl_register_file file, int index,
>> +   variable_storage(ir_variable *var, int file, int index,
>>   unsigned array_id = 0)
>> : file(file), index(index), var(var), array_id(array_id)
>>  {
>> /* empty */
>>  }
>>
>> -   gl_register_file file;
>> +   int file;
>>  int index;
>>  ir_variable *var; /* variable that maps to this, if any */
>>  unsigned array_id;
>> @@ -401,7 +402,7 @@ public:
>>
>>  variable_storage *find_variable_storage(ir_variable *var);
>>
>> -   int add_constant(gl_register_file file, gl_constant_value values[8],
>> +   int add_constant(int file, gl_constant_value values[8],
>>   int size, int datatype, GLuint *swizzle_out);
>>
>>  function_entry *get_function_signature(ir_function_signature *sig);
>> @@ -990,7 +991,7 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
>>   }
>>
>>   int
>> -glsl_to_tgsi_visitor::add_constant(gl_register_file file,
>> +glsl_to_tgsi_visitor::add_constant(int file,
>>  gl_constant_value values[8], int
>> size, int datatype,
>>  GLuint *swizzle_out)
>>   {
>> @@ -4550,7 +4551,7 @@ emit_immediate(struct st_translate *t,
>>* Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
>>*/
>>   static struct ureg_dst
>> -dst_register(struct st_translate *t, gl_register_file file, unsigned
>> index,
>> +dst_register(struct st_translate *t, int file, unsigned index,
>>unsigned array_id)
>>   {
>>  unsigned array;
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] New stable-branch 11.0 candidate pushed

2016-01-19 Thread Emil Velikov
Hello list,

The candidate for the Mesa 11.0.9 is now available. Currently we have:
 - 22 queued
 - 20 nominated (outstanding)
 - and 1 rejected/obsolete patches

Current queue addressed a few important driver issues - nouveau
(sluggish 4K decoding, pushbuf/bufctx management), i965 (EGL crash),
r600 (severge glitches in Soma) and some memory leaks.


Take a look at section "Mesa stable queue" for more information.


Dave, Marek,

A few commits in the nominated list require backports. Please forward
them to mesa-stable mailing list.


Testing
---
The following results are against piglit 4b6848c131c.


Changes - classic i965(snb)
---
None.


Changes - swrast classic

None.


Changes - gallium softpipe
--
None.


Changes - gallium llvmpipe (LLVM 3.7)
-
None.


Testing reports/general approval

Any testing reports (or general approval of the state of the branch)
will be greatly appreciated.


Trivial merge conflicts
---
commit 7562abc8d54045544bfbbd1bd60f2a82bc820eb9
Author: Grazvydas Ignotas 

r600: fix constant buffer size programming

(cherry picked from commit da0e216e069bd064199ed04b52de6fb23d810806)


commit 29360107288b041f23875ed3133cc041c3c33906
Author: Ilia Mirkin 

nv50,nvc0: make sure there's pushbuf space and that we ref the bo early

(cherry picked from commit c1d14c6817e3fa9a1c04f9b6c51b4ca601637843)


commit 0c7b4c20138e4c84eb2d8f9e6368d81c0d6843c6
Author: Nicolai Hähnle 

st/mesa: use _mesa_delete_buffer_object

(cherry picked from commit 1c2187b1c225b2f7e1891544d184bde60390977e)



The plan is to have the final 11.0 release - 11.0.9 this Thursday
(21st of Feb), some time after 13:00 GMT.

If you have any questions or suggestions - be that about the current
patch queue or otherwise, please go ahead.


Cheers,
Emil


Mesa stable queue
-

Nominated (22)
==

Boyan Ding (1):
  i915: Add XRGB format to intel_screen_make_configs

Brian Paul (1):
  configure: don't try to build gallium DRI drivers if --disable-dri is set

Dave Airlie (3):
  r600g: fix outputing to non-0 buffers for stream 0.
  radeonsi: handle loading doubles as geometry shader inputs.

* Dave - final call, can we get a backports for these please ?

  glsl: fix subroutine lowering reusing actual parmaters

Dawid Gan (1):
  i965: handle stencil_bits parameter for MESA_FORMAT_B8G8R8X8_UNORM format.

Emil Velikov (3):
  i965: store reference to the context within struct brw_fence
  egl/dri2: expose srgb configs when KHR_gl_colorspace is available
  i915: correctly parse/set the context flags

Ilia Mirkin (1):
  st/mesa: use surface format to generate mipmaps when available

Jason Ekstrand (1):
  i965/vec4: Use UW type for multiply into accumulator on GEN8+

Jean-Sébastien Pédron (1):
  ralloc: Use __attribute__((destructor)) instead of atexit(3)

Kenneth Graunke (1):
  glsl: Fix varying struct locations when varying packing is disabled.

* Ken, this patch requires the following which imho are too much of a
sidestep considering that this is the final 11.0 release. Would we
still want this in ?

9fbcd8e8475e249c7f76b6d63b3a48b8684cb1ffglsl: pass stage into mark function
d97b060e6f305ce4ad050881944404b920c86edfglsl/fp64: add helper for
dual slot double detection.
5dc22cadb5ed4a7cf8c7d1cbaf7296c27e567e0fglsl: fix
count_attribute_slots to allow for different 64-bit handling
1fc39dae22843d6faf3ec43eab90c7d06f9f6f7bglsl: only update doubles
inputs for vertex inputs.


Marek Olšák (3):
  program: add _mesa_reserve_parameter_storage
  st/mesa: fix GLSL uniform updates for glBitmap & glDrawPixels (v2)

* Marek can we please get a backport for the latter ? Alternatively
can we drop then both, esp. since we don't have cases in the wild
that trigger the issue ?

  radeonsi: don't miss changes to SPI_TMPRING_SIZE

Timothy Arceri (2):
  glsl: fix segfault linking subroutine uniform with explicit location
  mesa: fix segfault in glUniformSubroutinesuiv()

Tom Stellard (4):
  clover: Call clBuildProgram() notification function when build
completes v2
  gallium/drivers: Add threadsafe wrappers for pipe_context v2
  clover: Use threadsafe wrappers for pipe_context v2
  clover: Properly initialize LLVM targets when linking with component libs


Queued (20)
===

Emil Velikov (3):
  docs: add sha256 checksums for 11.0.8
  cherry-ignore: add patch already in branch
  cherry-ignore: add the dri3 glx null check patch

Grazvydas Ignotas (1):
  r600: fix constant buffer size programming

Ilia Mirkin (5):
  nvc0: don't forget to reset VTX_TMP bufctx slot after blit completion
  nv50/ir: float(s32 & 0xff) = float(u8), not s8
  nv50,nvc0: make 

Re: [Mesa-dev] [PATCH v2 03/10] st/mesa: add PROGRAM_BUFFER, stop using gl_register_file

2016-01-19 Thread Brian Paul
Can you say why you're removing gl_register_file?  Enums are so much 
nicer when debugging.


-Brian

On 01/18/2016 07:30 PM, Ilia Mirkin wrote:

Signed-off-by: Ilia Mirkin 
Reviewed-by: Marek Olšák 
---
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 21 +++--
  1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index d424e3b..055d187 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -53,6 +53,7 @@


  #define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
+#define PROGRAM_BUFFER(PROGRAM_FILE_MAX + 1)
  #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |\
 (1 << PROGRAM_CONSTANT) | \
 (1 << PROGRAM_UNIFORM))
@@ -69,7 +70,7 @@ static int swizzle_for_size(int size);
   */
  class st_src_reg {
  public:
-   st_src_reg(gl_register_file file, int index, const glsl_type *type)
+   st_src_reg(int file, int index, const glsl_type *type)
 {
this->file = file;
this->index = index;
@@ -88,7 +89,7 @@ public:
this->is_double_vertex_input = false;
 }

-   st_src_reg(gl_register_file file, int index, int type)
+   st_src_reg(int file, int index, int type)
 {
this->type = type;
this->file = file;
@@ -104,7 +105,7 @@ public:
this->is_double_vertex_input = false;
 }

-   st_src_reg(gl_register_file file, int index, int type, int index2D)
+   st_src_reg(int file, int index, int type, int index2D)
 {
this->type = type;
this->file = file;
@@ -138,7 +139,7 @@ public:

 explicit st_src_reg(st_dst_reg reg);

-   gl_register_file file; /**< PROGRAM_* from Mesa */
+   int file; /**< PROGRAM_* from Mesa */
 int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
 int index2D;
 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
@@ -203,7 +204,7 @@ public:

 explicit st_dst_reg(st_src_reg reg);

-   gl_register_file file; /**< PROGRAM_* from Mesa */
+   int file; /**< PROGRAM_* from Mesa */
 int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
 int index2D;
 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
@@ -273,14 +274,14 @@ public:

  class variable_storage : public exec_node {
  public:
-   variable_storage(ir_variable *var, gl_register_file file, int index,
+   variable_storage(ir_variable *var, int file, int index,
  unsigned array_id = 0)
: file(file), index(index), var(var), array_id(array_id)
 {
/* empty */
 }

-   gl_register_file file;
+   int file;
 int index;
 ir_variable *var; /* variable that maps to this, if any */
 unsigned array_id;
@@ -401,7 +402,7 @@ public:

 variable_storage *find_variable_storage(ir_variable *var);

-   int add_constant(gl_register_file file, gl_constant_value values[8],
+   int add_constant(int file, gl_constant_value values[8],
  int size, int datatype, GLuint *swizzle_out);

 function_entry *get_function_signature(ir_function_signature *sig);
@@ -990,7 +991,7 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
  }

  int
-glsl_to_tgsi_visitor::add_constant(gl_register_file file,
+glsl_to_tgsi_visitor::add_constant(int file,
 gl_constant_value values[8], int size, int 
datatype,
 GLuint *swizzle_out)
  {
@@ -4550,7 +4551,7 @@ emit_immediate(struct st_translate *t,
   * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
   */
  static struct ureg_dst
-dst_register(struct st_translate *t, gl_register_file file, unsigned index,
+dst_register(struct st_translate *t, int file, unsigned index,
   unsigned array_id)
  {
 unsigned array;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa for OpenVMS

2016-01-19 Thread Daniel Stone
Hi,

On 19 January 2016 at 02:14, Timothy Arceri
 wrote:
> On Mon, 2016-01-18 at 16:47 +0100, Jouk Jansen wrote:
>> Can someone insert these patches in the git-repository.
>> I cannot do it myself, because the git-client on my OpenVMS is very
>> -very
>> limited and does not allow this.
>
> Why not make the changes on another system and send the patches for
> review from that system. I doubt anyone here is going to do the work
> for you.

Not only that, but the FTP link won't open in Chrome ('command not
supported'), and using a command line client to get /openvms fails,
because you need 'cd OPENVMS.DIR' instead. Once you've got over that,
and figured out how to put a semicolon in FTP command names, you then
get a zip archive which expands just containing a collection of files
which form a partial source tree.

The normal patch contribution guidelines are:
  - make only one change (or set of related changes, e.g. 'remove
unsupported #pragma once in this file', 'add #ifdef __VMS__ to these
files', etc) per patch
  - build up a set of patches (using 'patch')
  - send these patches individually to the mailing list for review

A ZIP file on an OpenVMS FTP server you can't access with regular
clients, containing 70 files (some of which are automatically
generated from other source, e.g. glsl_lexer.cpp) and no indication of
the actual differences from git, is really very far outside these
guidelines. I'm sorry to hear OpenVMS doesn't have a functional git
client, but perhaps you could consider using an operating system which
allows you to generate patches in the way that any normal open source
project would expect.

(As an aside, I found 'VERSION=Mesa V3.4' worth a chuckle.)

Cheers,
Daniel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 03/10] st/mesa: add PROGRAM_BUFFER, stop using gl_register_file

2016-01-19 Thread Ilia Mirkin
On Tue, Jan 19, 2016 at 9:34 AM, Brian Paul  wrote:
> I don't have any issue with simply adding PROGRAM_BUFFER to the enum type.
> prog_src_register::File is a 4-bit field and we currently have 13 enum
> values so we're safe there (not sure if that even matters).

That's cutting it close... 13 = immed, 14 = buffer, 15 = image, and
we're out of items... if you're OK with that, I'll revert this change
and throw PROGRAM_IMMEDIATE and PROGRAM_BUFFER into the
gl_register_file list.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 03/10] st/mesa: add PROGRAM_BUFFER, stop using gl_register_file

2016-01-19 Thread Brian Paul
I don't have any issue with simply adding PROGRAM_BUFFER to the enum 
type.  prog_src_register::File is a 4-bit field and we currently have 13 
enum values so we're safe there (not sure if that even matters).


I know I've used gdb to debug/trace something in st_glsl_to_tgsi.cpp at 
least twice last year and seeing enum values in gdb makes life a lot easier.


-Brian


On 01/19/2016 07:24 AM, Ilia Mirkin wrote:

Because I got a ton of errors about how PROGRAM_BUFFER was out of
bounds for the enum. I could add them to the enum, of course, but I
think that enum was about ARB_vp/fp things?

   -ilia

On Tue, Jan 19, 2016 at 9:23 AM, Brian Paul  wrote:

Can you say why you're removing gl_register_file?  Enums are so much nicer
when debugging.

-Brian


On 01/18/2016 07:30 PM, Ilia Mirkin wrote:


Signed-off-by: Ilia Mirkin 
Reviewed-by: Marek Olšák 
---
   src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 21 +++--
   1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index d424e3b..055d187 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -53,6 +53,7 @@


   #define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
+#define PROGRAM_BUFFER(PROGRAM_FILE_MAX + 1)
   #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |\
  (1 << PROGRAM_CONSTANT) | \
  (1 << PROGRAM_UNIFORM))
@@ -69,7 +70,7 @@ static int swizzle_for_size(int size);
*/
   class st_src_reg {
   public:
-   st_src_reg(gl_register_file file, int index, const glsl_type *type)
+   st_src_reg(int file, int index, const glsl_type *type)
  {
 this->file = file;
 this->index = index;
@@ -88,7 +89,7 @@ public:
 this->is_double_vertex_input = false;
  }

-   st_src_reg(gl_register_file file, int index, int type)
+   st_src_reg(int file, int index, int type)
  {
 this->type = type;
 this->file = file;
@@ -104,7 +105,7 @@ public:
 this->is_double_vertex_input = false;
  }

-   st_src_reg(gl_register_file file, int index, int type, int index2D)
+   st_src_reg(int file, int index, int type, int index2D)
  {
 this->type = type;
 this->file = file;
@@ -138,7 +139,7 @@ public:

  explicit st_src_reg(st_dst_reg reg);

-   gl_register_file file; /**< PROGRAM_* from Mesa */
+   int file; /**< PROGRAM_* from Mesa */
  int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc.
*/
  int index2D;
  GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
@@ -203,7 +204,7 @@ public:

  explicit st_dst_reg(st_src_reg reg);

-   gl_register_file file; /**< PROGRAM_* from Mesa */
+   int file; /**< PROGRAM_* from Mesa */
  int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc.
*/
  int index2D;
  int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
@@ -273,14 +274,14 @@ public:

   class variable_storage : public exec_node {
   public:
-   variable_storage(ir_variable *var, gl_register_file file, int index,
+   variable_storage(ir_variable *var, int file, int index,
   unsigned array_id = 0)
 : file(file), index(index), var(var), array_id(array_id)
  {
 /* empty */
  }

-   gl_register_file file;
+   int file;
  int index;
  ir_variable *var; /* variable that maps to this, if any */
  unsigned array_id;
@@ -401,7 +402,7 @@ public:

  variable_storage *find_variable_storage(ir_variable *var);

-   int add_constant(gl_register_file file, gl_constant_value values[8],
+   int add_constant(int file, gl_constant_value values[8],
   int size, int datatype, GLuint *swizzle_out);

  function_entry *get_function_signature(ir_function_signature *sig);
@@ -990,7 +991,7 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
   }

   int
-glsl_to_tgsi_visitor::add_constant(gl_register_file file,
+glsl_to_tgsi_visitor::add_constant(int file,
  gl_constant_value values[8], int
size, int datatype,
  GLuint *swizzle_out)
   {
@@ -4550,7 +4551,7 @@ emit_immediate(struct st_translate *t,
* Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
*/
   static struct ureg_dst
-dst_register(struct st_translate *t, gl_register_file file, unsigned
index,
+dst_register(struct st_translate *t, int file, unsigned index,
unsigned array_id)
   {
  unsigned array;





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa for OpenVMS

2016-01-19 Thread Brian Paul

On 01/19/2016 06:24 AM, Daniel Stone wrote:

Hi,

On 19 January 2016 at 02:14, Timothy Arceri
 wrote:

On Mon, 2016-01-18 at 16:47 +0100, Jouk Jansen wrote:

Can someone insert these patches in the git-repository.
I cannot do it myself, because the git-client on my OpenVMS is very
-very
limited and does not allow this.


Why not make the changes on another system and send the patches for
review from that system. I doubt anyone here is going to do the work
for you.


Not only that, but the FTP link won't open in Chrome ('command not
supported'), and using a command line client to get /openvms fails,
because you need 'cd OPENVMS.DIR' instead. Once you've got over that,
and figured out how to put a semicolon in FTP command names, you then
get a zip archive which expands just containing a collection of files
which form a partial source tree.

The normal patch contribution guidelines are:
   - make only one change (or set of related changes, e.g. 'remove
unsupported #pragma once in this file', 'add #ifdef __VMS__ to these
files', etc) per patch
   - build up a set of patches (using 'patch')
   - send these patches individually to the mailing list for review

A ZIP file on an OpenVMS FTP server you can't access with regular
clients, containing 70 files (some of which are automatically
generated from other source, e.g. glsl_lexer.cpp) and no indication of
the actual differences from git, is really very far outside these
guidelines. I'm sorry to hear OpenVMS doesn't have a functional git
client, but perhaps you could consider using an operating system which
allows you to generate patches in the way that any normal open source
project would expect.


I think the last time Jouk submitted "patches" a few years ago they 
weren't real patches and I gave up trying to integrate the changes.
Jouk, can you prepare normal patches instead and post them for review to 
mesa-dev?


And, sorry to ask, but is anyone other than you interested in OpenVMS 
support?  I know you've been contributing OpenVMS support since the 
early days of Mesa, but I don't recall ever seeing any indication of any 
other users.


-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 03/10] st/mesa: add PROGRAM_BUFFER, stop using gl_register_file

2016-01-19 Thread Brian Paul

On 01/19/2016 07:37 AM, Ilia Mirkin wrote:

On Tue, Jan 19, 2016 at 9:34 AM, Brian Paul  wrote:

I don't have any issue with simply adding PROGRAM_BUFFER to the enum type.
prog_src_register::File is a 4-bit field and we currently have 13 enum
values so we're safe there (not sure if that even matters).


That's cutting it close... 13 = immed, 14 = buffer, 15 = image, and
we're out of items... if you're OK with that, I'll revert this change
and throw PROGRAM_IMMEDIATE and PROGRAM_BUFFER into the
gl_register_file list.


Sure.  We can always bump up the bitfield size if needed.

-Brian


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glx: don't force version == 2.0 for ES2 GLX context creation

2016-01-19 Thread Ilia Mirkin
dEQP tests request a specific version. The EXT spec has been updated to
allow other versions, so allow anything >= 2.0 to be requested.

Signed-off-by: Ilia Mirkin 
---

The X server was also blocking this... a patch has been sent (pending
moderator approval).

 src/glx/dri_common.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
index 8a56385..8d1e6f5 100644
--- a/src/glx/dri_common.c
+++ b/src/glx/dri_common.c
@@ -582,13 +582,12 @@ dri2_convert_glx_attribs(unsigned num_attribs, const 
uint32_t *attribs,
 
/* The GLX_EXT_create_context_es2_profile spec says:
 *
-* "... If the version requested is 2.0, and the
-* GLX_CONTEXT_ES2_PROFILE_BIT_EXT bit is set in the
+* "... If the version requested is a valid and supported OpenGL-ES
+* version, and the GLX_CONTEXT_ES_PROFILE_BIT_EXT bit is set in the
 * GLX_CONTEXT_PROFILE_MASK_ARB attribute (see below), then the context
-* returned will implement OpenGL ES 2.0. This is the only way in which
-* an implementation may request an OpenGL ES 2.0 context."
+* returned will implement the OpenGL ES version requested."
 */
-   if (*api == __DRI_API_GLES2 && (*major_ver != 2 || *minor_ver != 0)) {
+   if (*api == __DRI_API_GLES2 && *major_ver < 2) {
   *error = __DRI_CTX_ERROR_BAD_API;
   return false;
}
-- 
2.4.10

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: make image views non-persistent objects

2016-01-19 Thread Nicolai Hähnle

On 18.01.2016 22:08, Ilia Mirkin wrote:

Make them akin to shader buffers, with no refcounting/etc. Just used to
pass data about the bound image in ->set_shader_images.

Signed-off-by: Ilia Mirkin 
---

I don't really see a reason why these were refcounted objects. It seems like
it would be convenient to make these line up with shader buffers, so that's
what I've done here.

Please let me know if I'm missing something.


I haven't thought about this much, but at least Radeon does quite a bit 
of work in create_sampler_view.


Since everything boils down to the same hardware resource descriptors in 
the end, I'd expect the same to happen for a create_image_view. I 
believe we'll want a create_image_view which ends up calling code that 
is shared with create_sampler_view.


So make that a vote against this change from me.

Come to think of it, from a Radeon perspective I'm not sure why there is 
a separate pipe_image_view structure in the first place (other than 
perhaps reducing confusion about which combination of fields make sense).


Cheers,
Nicolai



  src/gallium/auxiliary/util/u_inlines.h| 11 ---
  src/gallium/drivers/ddebug/dd_context.c   | 28 +--
  src/gallium/drivers/ddebug/dd_pipe.h  |  2 +-
  src/gallium/drivers/ilo/ilo_state.c   |  2 +-
  src/gallium/drivers/nouveau/nvc0/nvc0_state.c |  2 +-
  src/gallium/include/pipe/p_context.h  | 14 ++
  src/gallium/include/pipe/p_state.h|  4 +---
  7 files changed, 7 insertions(+), 56 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_inlines.h 
b/src/gallium/auxiliary/util/u_inlines.h
index 57a3b0b..d081203 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -174,17 +174,6 @@ pipe_sampler_view_release(struct pipe_context *ctx,
  }

  static inline void
-pipe_image_view_reference(struct pipe_image_view **ptr, struct pipe_image_view 
*view)
-{
-   struct pipe_image_view *old_view = *ptr;
-
-   if (pipe_reference_described(&(*ptr)->reference, >reference,
-
(debug_reference_descriptor)debug_describe_image_view))
-  old_view->context->image_view_destroy(old_view->context, old_view);
-   *ptr = view;
-}
-
-static inline void
  pipe_so_target_reference(struct pipe_stream_output_target **ptr,
   struct pipe_stream_output_target *target)
  {
diff --git a/src/gallium/drivers/ddebug/dd_context.c 
b/src/gallium/drivers/ddebug/dd_context.c
index 3ae7764..9dfaa0a 100644
--- a/src/gallium/drivers/ddebug/dd_context.c
+++ b/src/gallium/drivers/ddebug/dd_context.c
@@ -415,30 +415,6 @@ dd_context_sampler_view_destroy(struct pipe_context *_pipe,
 pipe->sampler_view_destroy(pipe, view);
  }

-static struct pipe_image_view *
-dd_context_create_image_view(struct pipe_context *_pipe,
- struct pipe_resource *resource,
- const struct pipe_image_view *templ)
-{
-   struct pipe_context *pipe = dd_context(_pipe)->pipe;
-   struct pipe_image_view *view =
-  pipe->create_image_view(pipe, resource, templ);
-
-   if (!view)
-  return NULL;
-   view->context = _pipe;
-   return view;
-}
-
-static void
-dd_context_image_view_destroy(struct pipe_context *_pipe,
-  struct pipe_image_view *view)
-{
-   struct pipe_context *pipe = dd_context(_pipe)->pipe;
-
-   pipe->image_view_destroy(pipe, view);
-}
-
  static struct pipe_stream_output_target *
  dd_context_create_stream_output_target(struct pipe_context *_pipe,
 struct pipe_resource *res,
@@ -486,7 +462,7 @@ dd_context_set_sampler_views(struct pipe_context *_pipe, 
unsigned shader,
  static void
  dd_context_set_shader_images(struct pipe_context *_pipe, unsigned shader,
   unsigned start, unsigned num,
- struct pipe_image_view **views)
+ struct pipe_image_view *views)
  {
 struct dd_context *dctx = dd_context(_pipe);
 struct pipe_context *pipe = dctx->pipe;
@@ -744,8 +720,6 @@ dd_context_create(struct dd_screen *dscreen, struct 
pipe_context *pipe)
 CTX_INIT(sampler_view_destroy);
 CTX_INIT(create_surface);
 CTX_INIT(surface_destroy);
-   CTX_INIT(create_image_view);
-   CTX_INIT(image_view_destroy);
 CTX_INIT(transfer_map);
 CTX_INIT(transfer_flush_region);
 CTX_INIT(transfer_unmap);
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h 
b/src/gallium/drivers/ddebug/dd_pipe.h
index a045518..6505cea 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -93,7 +93,7 @@ struct dd_context
 struct pipe_constant_buffer 
constant_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
 struct pipe_sampler_view 
*sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
 struct dd_state 

[Mesa-dev] [PATCH 6/7] radeonsi: use all SPI color formats

2016-01-19 Thread Marek Olšák
From: Marek Olšák 

because not using SPI_SHADER_32_ABGR doubles fill rate.

We should also get optimal performance if alpha isn't needed or blending
isn't enabled.
---
 src/gallium/drivers/radeon/r600_pipe_common.h   |   6 +-
 src/gallium/drivers/radeonsi/si_blit.c  |   8 +
 src/gallium/drivers/radeonsi/si_pipe.h  |   4 +
 src/gallium/drivers/radeonsi/si_state.c | 207 +---
 src/gallium/drivers/radeonsi/si_state.h |   5 +
 src/gallium/drivers/radeonsi/si_state_shaders.c |  23 ++-
 6 files changed, 195 insertions(+), 58 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index f3271e2..d66e74f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -236,6 +236,7 @@ struct r600_surface {
/* Misc. color flags. */
bool alphatest_bypass;
bool export_16bpc;
+   bool color_is_int8;
 
/* Color registers. */
unsigned cb_color_info;
@@ -252,7 +253,10 @@ struct r600_surface {
unsigned cb_color_fmask_slice;  /* EG and later */
unsigned cb_color_cmask;/* CB_COLORn_TILE (r600 only) */
unsigned cb_color_mask; /* R600 only */
-   unsigned spi_shader_col_format; /* SI+ */
+   unsigned spi_shader_col_format; /* SI+, no blending, no 
alpha-to-coverage. */
+   unsigned spi_shader_col_format_alpha;   /* SI+, alpha-to-coverage */
+   unsigned spi_shader_col_format_blend;   /* SI+, blending without alpha. 
*/
+   unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with 
alpha. */
unsigned sx_ps_downconvert; /* Stoney only */
unsigned sx_blend_opt_epsilon;  /* Stoney only */
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. 
R600 only */
diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 75a9d56..a93887e 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -680,6 +680,14 @@ static bool do_hardware_msaa_resolve(struct pipe_context 
*ctx,
enum pipe_format format = int_to_norm_format(info->dst.format);
unsigned sample_mask = ~0;
 
+   /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and
+* the format is R16G16. Use R16A16, which does work.
+*/
+   if (format == PIPE_FORMAT_R16G16_UNORM)
+   format = PIPE_FORMAT_R16A16_UNORM;
+   if (format == PIPE_FORMAT_R16G16_SNORM)
+   format = PIPE_FORMAT_R16A16_SNORM;
+
if (info->src.resource->nr_samples > 1 &&
info->dst.resource->nr_samples <= 1 &&
util_max_layer(info->src.resource, 0) == 0 &&
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index e2009de..e2725fe 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -126,6 +126,10 @@ struct si_framebuffer {
unsignedcb0_is_integer;
unsignedcompressed_cb_mask;
unsignedspi_shader_col_format;
+   unsignedspi_shader_col_format_alpha;
+   unsignedspi_shader_col_format_blend;
+   unsignedspi_shader_col_format_blend_alpha;
+   unsignedcolor_is_int8; /* bitmask */
unsigneddirty_cbufs;
booldirty_zsbuf;
 };
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 492d3f9..42f5291 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -420,6 +420,9 @@ static void *si_create_blend_state_mode(struct pipe_context 
*ctx,
   S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
   S_028B70_ALPHA_TO_MASK_OFFSET3(2));
 
+   if (state->alpha_to_coverage)
+   blend->need_src_alpha_4bit |= 0xf;
+
blend->cb_target_mask = 0;
for (int i = 0; i < 8; i++) {
/* state->rt entries > 0 only written if independent blending */
@@ -457,6 +460,17 @@ static void *si_create_blend_state_mode(struct 
pipe_context *ctx,
blend_cntl |= 
S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
}
si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, 
blend_cntl);
+
+   blend->blend_enable_4bit |= 0xf << (i * 4);
+
+   /* This is only important for formats without alpha. */
+   if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+   dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+   srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+   dstRGB == 

[Mesa-dev] [PATCH 4/7] radeonsi: add shader conversion code for all SPI color formats

2016-01-19 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c | 153 ---
 src/gallium/drivers/radeonsi/si_shader.h |   1 +
 2 files changed, 140 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 266ef6d..ef4dbb1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1255,6 +1255,28 @@ static LLVMValueRef fetch_constant(
return result;
 }
 
+/* Upper 16 bits must be zero. */
+static LLVMValueRef si_llvm_pack_two_int16(struct gallivm_state *gallivm,
+  LLVMValueRef val[2])
+{
+   return LLVMBuildOr(gallivm->builder, val[0],
+  LLVMBuildShl(gallivm->builder, val[1],
+   lp_build_const_int32(gallivm, 16),
+   ""), "");
+}
+
+/* Upper 16 bits are ignored and will be dropped. */
+static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct gallivm_state 
*gallivm,
+   LLVMValueRef val[2])
+{
+   LLVMValueRef v[2] = {
+   LLVMBuildAnd(gallivm->builder, val[0],
+lp_build_const_int32(gallivm, 0x), ""),
+   val[1],
+   };
+   return si_llvm_pack_two_int16(gallivm, v);
+}
+
 /* Initialize arguments for the shader export intrinsic */
 static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
 LLVMValueRef *values,
@@ -1265,16 +1287,15 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
struct lp_build_context *uint =

_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
struct lp_build_context *base = _base->base;
+   struct gallivm_state *gallivm = base->gallivm;
+   LLVMBuilderRef builder = base->gallivm->builder;
+   LLVMValueRef val[4];
unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
unsigned chan;
+   bool is_int8;
 
-   /* XXX: This controls which components of the output
-* registers actually get exported. (e.g bit 0 means export
-* X component, bit 1 means export Y component, etc.)  I'm
-* hard coding this to 0xf for now.  In the future, we might
-* want to do something else.
-*/
-   args[0] = lp_build_const_int32(base->gallivm, 0xf);
+   /* Default is 0xf. Adjusted below depending on the format. */
+   args[0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
 
/* Specify whether the EXEC mask represents the valid mask */
args[1] = uint->zero;
@@ -1286,12 +1307,13 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
args[3] = lp_build_const_int32(base->gallivm, target);
 
if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
-   unsigned col_formats =
-   si_shader_ctx->shader->key.ps.spi_shader_col_format;
+   const union si_shader_key *key = _shader_ctx->shader->key;
+   unsigned col_formats = key->ps.spi_shader_col_format;
int cbuf = target - V_008DFC_SQ_EXP_MRT;
 
assert(cbuf >= 0 && cbuf < 8);
spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
+   is_int8 = (key->ps.color_is_int8 >> cbuf) & 0x1;
}
 
args[4] = uint->zero; /* COMPR flag */
@@ -1306,6 +1328,23 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_NULL);
break;
 
+   case V_028714_SPI_SHADER_32_R:
+   args[0] = uint->one; /* writemask */
+   args[5] = values[0];
+   break;
+
+   case V_028714_SPI_SHADER_32_GR:
+   args[0] = lp_build_const_int32(base->gallivm, 0x3); /* 
writemask */
+   args[5] = values[0];
+   args[6] = values[1];
+   break;
+
+   case V_028714_SPI_SHADER_32_AR:
+   args[0] = lp_build_const_int32(base->gallivm, 0x9); /* 
writemask */
+   args[5] = values[0];
+   args[8] = values[3];
+   break;
+
case V_028714_SPI_SHADER_FP16_ABGR:
args[4] = uint->one; /* COMPR flag */
 
@@ -1318,17 +1357,103 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
 
packed = lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.packf16",
-   
LLVMInt32TypeInContext(base->gallivm->context),
-   pack_args, 2,
+   uint->elem_type, pack_args, 
2,
  

Re: [Mesa-dev] [PATCH 1/9] gallium: Add PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY

2016-01-19 Thread Nicolai Hähnle

On 19.01.2016 10:45, Fredrik Höglund wrote:

On Tuesday 19 January 2016, Nicolai Hähnle wrote:


On 18.01.2016 18:50, Ilia Mirkin wrote:

On Mon, Jan 18, 2016 at 5:22 PM, Nicolai Hähnle  wrote:

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 90f..1ae557d 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -75,6 +75,8 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 return 1;
  case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
 return 2048;
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+  return 0;
  /* supported capabilities */
  case PIPE_CAP_TWO_SIDED_STENCIL:
  case PIPE_CAP_ANISOTROPIC_FILTER:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 712835c..99d7ae6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -121,6 +121,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 return 256;
  case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
 return 1; /* 256 for binding as RT, but that's not possible in GL */
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+  return 0;
  case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
 return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
  case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 7211df9..612f1c0 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -111,6 +111,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 return 256;
  case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
 return 1; /* 256 for binding as RT, but that's not possible in GL */
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+  return 0;
  case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
 return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
  case PIPE_CAP_MAX_VIEWPORTS:


I would greatly appreciate it if you could stick these into the
"unsupported" list of caps -- each of nv30/nv50/nvc0 should have one,
of varying length. (Same applies to the other cap you're adding.)


Do you really want that? I actually put them aside on purpose, because
the somewhat unusual sense of the cap means that the return value of 1
is something unsupported. Of course, one might argue that I should have
changed the boolean sense and renamed it to something like
PIPE_CAP_BUFFER_SAMPLER_VIEW_SWIZZLES.


That is actually the name I was going to use at first (minus the s).
But then it occurred to me that it would be misleading, since this
is not about pipe_sampler_view::swizzle_r/g/b/a.


It's about both the swizzles and the formats, isn't it? If it were only 
the formats, the deficiency could be fixed by using the swizzles.


Naming is hard. I'm happy with keeping it the way it is.

Cheers,
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/7] radeonsi: don't enable blending if colormask == 0

2016-01-19 Thread Marek Olšák
From: Marek Olšák 

most likely useless, but doesn't hurt
---
 src/gallium/drivers/radeonsi/si_state.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 2a6d2c6..ea441ac 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -433,6 +433,9 @@ static void *si_create_blend_state_mode(struct pipe_context 
*ctx,
 
unsigned blend_cntl = 0;
 
+   if (!state->rt[j].colormask)
+   continue;
+
/* we pretend 8 buffer are used, CB_SHADER_MASK will disable 
unused one */
blend->cb_target_mask |= state->rt[j].colormask << (4 * i);
 
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/9] gallium: Add PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY

2016-01-19 Thread Fredrik Höglund
On Tuesday 19 January 2016, Nicolai Hähnle wrote:
> 
> On 18.01.2016 18:50, Ilia Mirkin wrote:
> > On Mon, Jan 18, 2016 at 5:22 PM, Nicolai Hähnle  wrote:
> >> diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
> >> b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> >> index 90f..1ae557d 100644
> >> --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> >> +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> >> @@ -75,6 +75,8 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
> >> pipe_cap param)
> >> return 1;
> >>  case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
> >> return 2048;
> >> +   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
> >> +  return 0;
> >>  /* supported capabilities */
> >>  case PIPE_CAP_TWO_SIDED_STENCIL:
> >>  case PIPE_CAP_ANISOTROPIC_FILTER:
> >> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
> >> b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> >> index 712835c..99d7ae6 100644
> >> --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> >> +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> >> @@ -121,6 +121,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, 
> >> enum pipe_cap param)
> >> return 256;
> >>  case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
> >> return 1; /* 256 for binding as RT, but that's not possible in GL 
> >> */
> >> +   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
> >> +  return 0;
> >>  case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
> >> return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
> >>  case PIPE_CAP_MAX_VIEWPORTS:
> >> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
> >> b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> >> index 7211df9..612f1c0 100644
> >> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> >> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> >> @@ -111,6 +111,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, 
> >> enum pipe_cap param)
> >> return 256;
> >>  case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
> >> return 1; /* 256 for binding as RT, but that's not possible in GL 
> >> */
> >> +   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
> >> +  return 0;
> >>  case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
> >> return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
> >>  case PIPE_CAP_MAX_VIEWPORTS:
> >
> > I would greatly appreciate it if you could stick these into the
> > "unsupported" list of caps -- each of nv30/nv50/nvc0 should have one,
> > of varying length. (Same applies to the other cap you're adding.)
> 
> Do you really want that? I actually put them aside on purpose, because 
> the somewhat unusual sense of the cap means that the return value of 1 
> is something unsupported. Of course, one might argue that I should have 
> changed the boolean sense and renamed it to something like 
> PIPE_CAP_BUFFER_SAMPLER_VIEW_SWIZZLES.

That is actually the name I was going to use at first (minus the s).
But then it occurred to me that it would be misleading, since this
is not about pipe_sampler_view::swizzle_r/g/b/a.

Fredrik

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/7] radeonsi: use all SPI color formats

2016-01-19 Thread Axel Davy

On 19/01/2016 17:11, Marek Olšák wrote:

From: Marek Olšák 

because not using SPI_SHADER_32_ABGR doubles fill rate.

We should also get optimal performance if alpha isn't needed or blending
isn't enabled.
---
  src/gallium/drivers/radeon/r600_pipe_common.h   |   6 +-
  src/gallium/drivers/radeonsi/si_blit.c  |   8 +
  src/gallium/drivers/radeonsi/si_pipe.h  |   4 +
  src/gallium/drivers/radeonsi/si_state.c | 207 +---
  src/gallium/drivers/radeonsi/si_state.h |   5 +
  src/gallium/drivers/radeonsi/si_state_shaders.c |  23 ++-
  6 files changed, 195 insertions(+), 58 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index f3271e2..d66e74f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -236,6 +236,7 @@ struct r600_surface {
/* Misc. color flags. */
bool alphatest_bypass;
bool export_16bpc;
+   bool color_is_int8;
  
  	/* Color registers. */

unsigned cb_color_info;
@@ -252,7 +253,10 @@ struct r600_surface {
unsigned cb_color_fmask_slice;  /* EG and later */
unsigned cb_color_cmask;/* CB_COLORn_TILE (r600 only) */
unsigned cb_color_mask; /* R600 only */
-   unsigned spi_shader_col_format; /* SI+ */
+   unsigned spi_shader_col_format; /* SI+, no blending, no 
alpha-to-coverage. */
+   unsigned spi_shader_col_format_alpha;   /* SI+, alpha-to-coverage */
+   unsigned spi_shader_col_format_blend;   /* SI+, blending without alpha. 
*/
+   unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with 
alpha. */
unsigned sx_ps_downconvert; /* Stoney only */
unsigned sx_blend_opt_epsilon;  /* Stoney only */
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. 
R600 only */
diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 75a9d56..a93887e 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -680,6 +680,14 @@ static bool do_hardware_msaa_resolve(struct pipe_context 
*ctx,
enum pipe_format format = int_to_norm_format(info->dst.format);
unsigned sample_mask = ~0;
  
+	/* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and

+* the format is R16G16. Use R16A16, which does work.
+*/
+   if (format == PIPE_FORMAT_R16G16_UNORM)
+   format = PIPE_FORMAT_R16A16_UNORM;
+   if (format == PIPE_FORMAT_R16G16_SNORM)
+   format = PIPE_FORMAT_R16A16_SNORM;
+
if (info->src.resource->nr_samples > 1 &&
info->dst.resource->nr_samples <= 1 &&
util_max_layer(info->src.resource, 0) == 0 &&
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index e2009de..e2725fe 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -126,6 +126,10 @@ struct si_framebuffer {
unsignedcb0_is_integer;
unsignedcompressed_cb_mask;
unsignedspi_shader_col_format;
+   unsignedspi_shader_col_format_alpha;
+   unsignedspi_shader_col_format_blend;
+   unsignedspi_shader_col_format_blend_alpha;
+   unsignedcolor_is_int8; /* bitmask */
unsigneddirty_cbufs;
booldirty_zsbuf;
  };
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 492d3f9..42f5291 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -420,6 +420,9 @@ static void *si_create_blend_state_mode(struct pipe_context 
*ctx,
   S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
   S_028B70_ALPHA_TO_MASK_OFFSET3(2));
  
+	if (state->alpha_to_coverage)

+   blend->need_src_alpha_4bit |= 0xf;
+
blend->cb_target_mask = 0;
for (int i = 0; i < 8; i++) {
/* state->rt entries > 0 only written if independent blending */
@@ -457,6 +460,17 @@ static void *si_create_blend_state_mode(struct 
pipe_context *ctx,
blend_cntl |= 
S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
}
si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, 
blend_cntl);
+
+   blend->blend_enable_4bit |= 0xf << (i * 4);
+
+   /* This is only important for formats without alpha. */
+   if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+   dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+   srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+   

[Mesa-dev] [PATCH 2/7] radeonsi: use SPI_SHADER_COL_FORMAT fields instead of export_16bpc

2016-01-19 Thread Marek Olšák
From: Marek Olšák 

This does change the behavior slightly:
  If a shader writes COLOR[i] and that color buffer isn't bound,
  the shader will export MRT_NULL instead and discard the IR tree that
  calculates the output. The only exception is alpha-to-coverage, which
  requires an alpha export.
---
 src/gallium/drivers/radeon/r600_pipe_common.h   |  1 +
 src/gallium/drivers/radeonsi/si_pipe.h  |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c| 35 --
 src/gallium/drivers/radeonsi/si_shader.h|  2 +-
 src/gallium/drivers/radeonsi/si_state.c | 39 +++-
 src/gallium/drivers/radeonsi/si_state.h |  1 +
 src/gallium/drivers/radeonsi/si_state_shaders.c | 47 -
 7 files changed, 90 insertions(+), 37 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 27f6e98..f3271e2 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -252,6 +252,7 @@ struct r600_surface {
unsigned cb_color_fmask_slice;  /* EG and later */
unsigned cb_color_cmask;/* CB_COLORn_TILE (r600 only) */
unsigned cb_color_mask; /* R600 only */
+   unsigned spi_shader_col_format; /* SI+ */
unsigned sx_ps_downconvert; /* Stoney only */
unsigned sx_blend_opt_epsilon;  /* Stoney only */
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. 
R600 only */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index f83cb02..e2009de 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -125,7 +125,7 @@ struct si_framebuffer {
unsignedlog_samples;
unsignedcb0_is_integer;
unsignedcompressed_cb_mask;
-   unsignedexport_16bpc;
+   unsignedspi_shader_col_format;
unsigneddirty_cbufs;
booldirty_zsbuf;
 };
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 2de7def..266ef6d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1265,7 +1265,7 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
struct lp_build_context *uint =

_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
struct lp_build_context *base = _base->base;
-   unsigned compressed = 0;
+   unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
unsigned chan;
 
/* XXX: This controls which components of the output
@@ -1286,17 +1286,29 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
args[3] = lp_build_const_int32(base->gallivm, target);
 
if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+   unsigned col_formats =
+   si_shader_ctx->shader->key.ps.spi_shader_col_format;
int cbuf = target - V_008DFC_SQ_EXP_MRT;
 
-   if (cbuf >= 0 && cbuf < 8)
-   compressed = 
(si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
+   assert(cbuf >= 0 && cbuf < 8);
+   spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
}
 
-   /* Set COMPR flag */
-   args[4] = compressed ? uint->one : uint->zero;
+   args[4] = uint->zero; /* COMPR flag */
+   args[5] = base->undef;
+   args[6] = base->undef;
+   args[7] = base->undef;
+   args[8] = base->undef;
+
+   switch (spi_shader_col_format) {
+   case V_028714_SPI_SHADER_ZERO:
+   args[0] = uint->zero; /* writemask */
+   args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_NULL);
+   break;
+
+   case V_028714_SPI_SHADER_FP16_ABGR:
+   args[4] = uint->one; /* COMPR flag */
 
-   if (compressed) {
-   /* Pixel shader needs to pack output values before export */
for (chan = 0; chan < 2; chan++) {
LLVMValueRef pack_args[2] = {
values[2 * chan],
@@ -1314,10 +1326,13 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
 packed,
 
LLVMFloatTypeInContext(base->gallivm->context),
 "");
-   args[chan + 7] = base->undef;
}
-   } else
+   break;
+
+   case V_028714_SPI_SHADER_32_ABGR:
memcpy([5], values, sizeof(values[0]) * 4);
+   break;
+   }
 }
 
 

[Mesa-dev] [PATCH 7/7] radeonsi: disable SPI color outputs the shader doesn't write

2016-01-19 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.h|  4 
 src/gallium/drivers/radeonsi/si_state_shaders.c | 12 
 2 files changed, 16 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index f49290a..50375e2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -213,6 +213,10 @@ struct si_shader_selector {
 
/* PS parameters. */
unsigneddb_shader_control;
+   /* Set 0xf or 0x0 (4 bits) per each written output.
+* ANDed with spi_shader_col_format.
+*/
+   unsignedcolors_written_4bit;
 
/* masks of "get_unique_index" bits */
uint64_toutputs_written;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 80126f2..9daa924 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -646,6 +646,12 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII)
key->ps.color_is_int8 = sctx->framebuffer.color_is_int8;
 
+   /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't 
enabled). */
+   if (!key->ps.last_cbuf) {
+   key->ps.spi_shader_col_format &= 
sel->colors_written_4bit;
+   key->ps.color_is_int8 &= sel->info.colors_written;
+   }
+
if (rs) {
bool is_poly = (sctx->current_rast_prim >= 
PIPE_PRIM_TRIANGLES &&
sctx->current_rast_prim <= 
PIPE_PRIM_POLYGON) ||
@@ -830,6 +836,12 @@ static void *si_create_shader_selector(struct pipe_context 
*ctx,
}
sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
break;
+
+   case PIPE_SHADER_FRAGMENT:
+   for (i = 0; i < 8; i++)
+   if (sel->info.colors_written & (1 << i))
+   sel->colors_written_4bit |= 0xf << (4 * i);
+   break;
}
 
/* DB_SHADER_CONTROL */
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/7] radeonsi: set CB_SHADER_MASK according to SPI color formats

2016-01-19 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 52 +
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index fa78179..73a0ccc 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -397,13 +397,43 @@ static unsigned si_get_spi_shader_col_format(struct 
si_shader *shader)
return value;
 }
 
+static unsigned si_get_cb_shader_mask(unsigned spi_shader_col_format)
+{
+   unsigned i, cb_shader_mask = 0;
+
+   for (i = 0; i < 8; i++) {
+   switch ((spi_shader_col_format >> (i * 4)) & 0xf) {
+   case V_028714_SPI_SHADER_ZERO:
+   break;
+   case V_028714_SPI_SHADER_32_R:
+   cb_shader_mask |= 0x1 << (i * 4);
+   break;
+   case V_028714_SPI_SHADER_32_GR:
+   cb_shader_mask |= 0x3 << (i * 4);
+   break;
+   case V_028714_SPI_SHADER_32_AR:
+   cb_shader_mask |= 0x9 << (i * 4);
+   break;
+   case V_028714_SPI_SHADER_FP16_ABGR:
+   case V_028714_SPI_SHADER_UNORM16_ABGR:
+   case V_028714_SPI_SHADER_SNORM16_ABGR:
+   case V_028714_SPI_SHADER_UINT16_ABGR:
+   case V_028714_SPI_SHADER_SINT16_ABGR:
+   case V_028714_SPI_SHADER_32_ABGR:
+   cb_shader_mask |= 0xf << (i * 4);
+   break;
+   default:
+   assert(0);
+   }
+   }
+   return cb_shader_mask;
+}
+
 static void si_shader_ps(struct si_shader *shader)
 {
struct tgsi_shader_info *info = >selector->info;
struct si_pm4_state *pm4;
-   unsigned i, spi_ps_in_control, spi_shader_col_format;
-   unsigned cb_shader_mask = 0;
-   unsigned colors_written;
+   unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
unsigned num_sgprs, num_user_sgprs;
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
uint64_t va;
@@ -438,24 +468,12 @@ static void si_shader_ps(struct si_shader *shader)
TGSI_FS_COORD_PIXEL_CENTER_INTEGER)
spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1);
 
-   /* Find out what SPI_SHADER_COL_FORMAT and CB_SHADER_MASK should be. */
-   colors_written = info->colors_written;
-
-   if (info->colors_written == 0x1 &&
-   info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
-   colors_written |= (1 << (shader->key.ps.last_cbuf + 1)) - 1;
-   }
-
-   while (colors_written) {
-   i = u_bit_scan(_written);
-   cb_shader_mask |= 0xf << (4 * i);
-   }
-
spi_shader_col_format = si_get_spi_shader_col_format(shader);
+   cb_shader_mask = si_get_cb_shader_mask(spi_shader_col_format);
 
/* This must be non-zero for alpha-test/kill to work.
 * The hardware probably disables the pixel shader if no export
-* memory is allocated.
+* memory is allocated. Don't add this to CB_SHADER_MASK.
 */
if (!spi_shader_col_format &&
(shader->selector->info.uses_kill ||
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Using all PS export formats in RadeonSI

2016-01-19 Thread Marek Olšák
Hi,

This series adds support for all pixel shader export formats. It should double 
the fill rate for these formats:
- 1-4 channels of type S/UINT8, S/UINT16, S/UNORM16
- 1-2 channels of type S/UINT32, FLOAT32

Some of them have to fall back to the original slow mode depending on whether 
blending or alpha-to-coverage is enabled. ABGR_32 is the only slow format.

I haven't seen much improvement from this in real benchmarks.

This is a prerequisite for making RB+ work on Stoney, because RB+ is strict 
about which export formats can be used.

Please review.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/7] radeonsi: use 32_AR for alpha-to-coverage without a color buffer

2016-01-19 Thread Marek Olšák
From: Marek Olšák 

This avoids the fp16 packing instructions.
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 73a0ccc..158f1ce 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -623,7 +623,7 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
 */
if (!(key->ps.spi_shader_col_format & 0xf) &&
blend && blend->alpha_to_coverage)
-   key->ps.spi_shader_col_format |= 
V_028710_SPI_SHADER_FP16_ABGR;
+   key->ps.spi_shader_col_format |= 
V_028710_SPI_SHADER_32_AR;
 
if (rs) {
bool is_poly = (sctx->current_rast_prim >= 
PIPE_PRIM_TRIANGLES &&
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: Conditionalize atan2 math.

2016-01-19 Thread Kenneth Graunke
In the old hand-writen implementation of atan2, the calculation of
atan(y/x) was performed conditionally in the "then" block of the
outermost if statement.  I believe I accidentally lifted this out
into unconditional code when converting to IR builder.

For reference, the original hand-written IR is visible in commit
722eff674b832e2321f791c68358ef52d2a1ff25.

Signed-off-by: Kenneth Graunke 
Cc: Erik Faye-Lund 
---
 src/glsl/builtin_functions.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index f2e2165..95e86df 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -3267,7 +3267,7 @@ builtin_builder::_atan2(const glsl_type *type)
   ir_factory outer_then(_if->then_instructions, mem_ctx);
 
   /* Then...call atan(y/x) */
-  do_atan(body, glsl_type::float_type, r, div(y, x));
+  do_atan(outer_then, glsl_type::float_type, r, div(y, x));
 
   /* ...and fix it up: */
   ir_if *inner_if = new(mem_ctx) ir_if(less(x, imm(0.0f)));
-- 
2.7.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] radeonsi: add max waves / CU to shader stats

2016-01-19 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c | 33 +---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 0c5fd32..5c536f8 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3994,12 +3994,39 @@ static void si_shader_dump_stats(struct si_screen 
*sscreen,
 struct pipe_debug_callback *debug,
 unsigned processor)
 {
+   /* Compute the maximum number of waves.
+* The pixel shader additionally allocates 1 - 48 blocks of LDS
+* depending on non-compile times parameters.
+*/
+   unsigned ps_lds_size = processor == TGSI_PROCESSOR_FRAGMENT ? 1 : 0;
+   unsigned lds_size = ps_lds_size + conf->lds_size;
+   unsigned max_waves = 10;
+
+   if (conf->num_sgprs) {
+   if (sscreen->b.chip_class >= VI)
+   max_waves = MIN2(max_waves, 800 / conf->num_sgprs);
+   else
+   max_waves = MIN2(max_waves, 512 / conf->num_sgprs);
+   }
+
+   if (conf->num_vgprs)
+   max_waves = MIN2(max_waves, 256 / conf->num_vgprs);
+
+   if (lds_size)
+   max_waves = MIN2(max_waves, 128 / lds_size);
+
if (r600_can_dump_shader(>b, processor)) {
fprintf(stderr, "*** SHADER STATS ***\n"
-   "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d 
blocks\n"
-   "Scratch: %d bytes per wave\n\n",
+   "SGPRS: %d\n"
+   "VGPRS: %d\n"
+   "Code Size: %d bytes\n"
+   "LDS: %d blocks\n"
+   "Scratch: %d bytes per wave\n"
+   "Max waves / CU: %d\n"
+   "\n",
conf->num_sgprs, conf->num_vgprs, code_size,
-   conf->lds_size, conf->scratch_bytes_per_wave);
+   conf->lds_size, conf->scratch_bytes_per_wave,
+   max_waves);
}
 
pipe_debug_message(debug, SHADER_INFO,
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: make image views non-persistent objects

2016-01-19 Thread Roland Scheidegger
FWIW why exactly are pipe_sampler_view and pipe_surface ref counted?
These are all the same too from a high level perspective.
I guess though there'd be significant effort to change that...

Roland


Am 19.01.2016 um 04:08 schrieb Ilia Mirkin:
> Make them akin to shader buffers, with no refcounting/etc. Just used to
> pass data about the bound image in ->set_shader_images.
> 
> Signed-off-by: Ilia Mirkin 
> ---
> 
> I don't really see a reason why these were refcounted objects. It seems like
> it would be convenient to make these line up with shader buffers, so that's
> what I've done here.
> 
> Please let me know if I'm missing something.
> 
>  src/gallium/auxiliary/util/u_inlines.h| 11 ---
>  src/gallium/drivers/ddebug/dd_context.c   | 28 
> +--
>  src/gallium/drivers/ddebug/dd_pipe.h  |  2 +-
>  src/gallium/drivers/ilo/ilo_state.c   |  2 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_state.c |  2 +-
>  src/gallium/include/pipe/p_context.h  | 14 ++
>  src/gallium/include/pipe/p_state.h|  4 +---
>  7 files changed, 7 insertions(+), 56 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/util/u_inlines.h 
> b/src/gallium/auxiliary/util/u_inlines.h
> index 57a3b0b..d081203 100644
> --- a/src/gallium/auxiliary/util/u_inlines.h
> +++ b/src/gallium/auxiliary/util/u_inlines.h
> @@ -174,17 +174,6 @@ pipe_sampler_view_release(struct pipe_context *ctx,
>  }
>  
>  static inline void
> -pipe_image_view_reference(struct pipe_image_view **ptr, struct 
> pipe_image_view *view)
> -{
> -   struct pipe_image_view *old_view = *ptr;
> -
> -   if (pipe_reference_described(&(*ptr)->reference, >reference,
> -
> (debug_reference_descriptor)debug_describe_image_view))
> -  old_view->context->image_view_destroy(old_view->context, old_view);
> -   *ptr = view;
> -}
> -
> -static inline void
>  pipe_so_target_reference(struct pipe_stream_output_target **ptr,
>   struct pipe_stream_output_target *target)
>  {
> diff --git a/src/gallium/drivers/ddebug/dd_context.c 
> b/src/gallium/drivers/ddebug/dd_context.c
> index 3ae7764..9dfaa0a 100644
> --- a/src/gallium/drivers/ddebug/dd_context.c
> +++ b/src/gallium/drivers/ddebug/dd_context.c
> @@ -415,30 +415,6 @@ dd_context_sampler_view_destroy(struct pipe_context 
> *_pipe,
> pipe->sampler_view_destroy(pipe, view);
>  }
>  
> -static struct pipe_image_view *
> -dd_context_create_image_view(struct pipe_context *_pipe,
> - struct pipe_resource *resource,
> - const struct pipe_image_view *templ)
> -{
> -   struct pipe_context *pipe = dd_context(_pipe)->pipe;
> -   struct pipe_image_view *view =
> -  pipe->create_image_view(pipe, resource, templ);
> -
> -   if (!view)
> -  return NULL;
> -   view->context = _pipe;
> -   return view;
> -}
> -
> -static void
> -dd_context_image_view_destroy(struct pipe_context *_pipe,
> -  struct pipe_image_view *view)
> -{
> -   struct pipe_context *pipe = dd_context(_pipe)->pipe;
> -
> -   pipe->image_view_destroy(pipe, view);
> -}
> -
>  static struct pipe_stream_output_target *
>  dd_context_create_stream_output_target(struct pipe_context *_pipe,
> struct pipe_resource *res,
> @@ -486,7 +462,7 @@ dd_context_set_sampler_views(struct pipe_context *_pipe, 
> unsigned shader,
>  static void
>  dd_context_set_shader_images(struct pipe_context *_pipe, unsigned shader,
>   unsigned start, unsigned num,
> - struct pipe_image_view **views)
> + struct pipe_image_view *views)
>  {
> struct dd_context *dctx = dd_context(_pipe);
> struct pipe_context *pipe = dctx->pipe;
> @@ -744,8 +720,6 @@ dd_context_create(struct dd_screen *dscreen, struct 
> pipe_context *pipe)
> CTX_INIT(sampler_view_destroy);
> CTX_INIT(create_surface);
> CTX_INIT(surface_destroy);
> -   CTX_INIT(create_image_view);
> -   CTX_INIT(image_view_destroy);
> CTX_INIT(transfer_map);
> CTX_INIT(transfer_flush_region);
> CTX_INIT(transfer_unmap);
> diff --git a/src/gallium/drivers/ddebug/dd_pipe.h 
> b/src/gallium/drivers/ddebug/dd_pipe.h
> index a045518..6505cea 100644
> --- a/src/gallium/drivers/ddebug/dd_pipe.h
> +++ b/src/gallium/drivers/ddebug/dd_pipe.h
> @@ -93,7 +93,7 @@ struct dd_context
> struct pipe_constant_buffer 
> constant_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
> struct pipe_sampler_view 
> *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
> struct dd_state *sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
> -   struct pipe_image_view 
> *shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
> +   struct pipe_image_view 
> shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
> struct pipe_shader_buffer 
> 

Re: [Mesa-dev] [PATCH 6/7] radeonsi: use all SPI color formats

2016-01-19 Thread Marek Olšák
On Tue, Jan 19, 2016 at 5:20 PM, Axel Davy  wrote:
> On 19/01/2016 17:11, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> because not using SPI_SHADER_32_ABGR doubles fill rate.
>>
>> We should also get optimal performance if alpha isn't needed or blending
>> isn't enabled.
>> ---
>>   src/gallium/drivers/radeon/r600_pipe_common.h   |   6 +-
>>   src/gallium/drivers/radeonsi/si_blit.c  |   8 +
>>   src/gallium/drivers/radeonsi/si_pipe.h  |   4 +
>>   src/gallium/drivers/radeonsi/si_state.c | 207
>> +---
>>   src/gallium/drivers/radeonsi/si_state.h |   5 +
>>   src/gallium/drivers/radeonsi/si_state_shaders.c |  23 ++-
>>   6 files changed, 195 insertions(+), 58 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h
>> b/src/gallium/drivers/radeon/r600_pipe_common.h
>> index f3271e2..d66e74f 100644
>> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
>> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
>> @@ -236,6 +236,7 @@ struct r600_surface {
>> /* Misc. color flags. */
>> bool alphatest_bypass;
>> bool export_16bpc;
>> +   bool color_is_int8;
>> /* Color registers. */
>> unsigned cb_color_info;
>> @@ -252,7 +253,10 @@ struct r600_surface {
>> unsigned cb_color_fmask_slice;  /* EG and later */
>> unsigned cb_color_cmask;/* CB_COLORn_TILE (r600 only) */
>> unsigned cb_color_mask; /* R600 only */
>> -   unsigned spi_shader_col_format; /* SI+ */
>> +   unsigned spi_shader_col_format; /* SI+, no blending, no
>> alpha-to-coverage. */
>> +   unsigned spi_shader_col_format_alpha;   /* SI+, alpha-to-coverage
>> */
>> +   unsigned spi_shader_col_format_blend;   /* SI+, blending without
>> alpha. */
>> +   unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with
>> alpha. */
>> unsigned sx_ps_downconvert; /* Stoney only */
>> unsigned sx_blend_opt_epsilon;  /* Stoney only */
>> struct r600_resource *cb_buffer_fmask; /* Used for FMASK
>> relocations. R600 only */
>> diff --git a/src/gallium/drivers/radeonsi/si_blit.c
>> b/src/gallium/drivers/radeonsi/si_blit.c
>> index 75a9d56..a93887e 100644
>> --- a/src/gallium/drivers/radeonsi/si_blit.c
>> +++ b/src/gallium/drivers/radeonsi/si_blit.c
>> @@ -680,6 +680,14 @@ static bool do_hardware_msaa_resolve(struct
>> pipe_context *ctx,
>> enum pipe_format format = int_to_norm_format(info->dst.format);
>> unsigned sample_mask = ~0;
>>   + /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR
>> and
>> +* the format is R16G16. Use R16A16, which does work.
>> +*/
>> +   if (format == PIPE_FORMAT_R16G16_UNORM)
>> +   format = PIPE_FORMAT_R16A16_UNORM;
>> +   if (format == PIPE_FORMAT_R16G16_SNORM)
>> +   format = PIPE_FORMAT_R16A16_SNORM;
>> +
>> if (info->src.resource->nr_samples > 1 &&
>> info->dst.resource->nr_samples <= 1 &&
>> util_max_layer(info->src.resource, 0) == 0 &&
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>> b/src/gallium/drivers/radeonsi/si_pipe.h
>> index e2009de..e2725fe 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.h
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>> @@ -126,6 +126,10 @@ struct si_framebuffer {
>> unsignedcb0_is_integer;
>> unsignedcompressed_cb_mask;
>> unsignedspi_shader_col_format;
>> +   unsignedspi_shader_col_format_alpha;
>> +   unsignedspi_shader_col_format_blend;
>> +   unsignedspi_shader_col_format_blend_alpha;
>> +   unsignedcolor_is_int8; /* bitmask */
>> unsigneddirty_cbufs;
>> booldirty_zsbuf;
>>   };
>> diff --git a/src/gallium/drivers/radeonsi/si_state.c
>> b/src/gallium/drivers/radeonsi/si_state.c
>> index 492d3f9..42f5291 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.c
>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>> @@ -420,6 +420,9 @@ static void *si_create_blend_state_mode(struct
>> pipe_context *ctx,
>>S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
>>S_028B70_ALPHA_TO_MASK_OFFSET3(2));
>>   + if (state->alpha_to_coverage)
>> +   blend->need_src_alpha_4bit |= 0xf;
>> +
>> blend->cb_target_mask = 0;
>> for (int i = 0; i < 8; i++) {
>> /* state->rt entries > 0 only written if independent
>> blending */
>> @@ -457,6 +460,17 @@ static void *si_create_blend_state_mode(struct
>> pipe_context *ctx,
>> blend_cntl |=
>> S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
>> }
>> si_pm4_set_reg(pm4, 

[Mesa-dev] [RFC 00/63] ARB_internalformat_query2 on Mesa and i965 backend

2016-01-19 Thread Eduardo Lima Mitev
Hello,

This is an RFC series adding support for the ARB_internalformat-query2 
extension:

https://www.opengl.org/registry/specs/ARB/internalformat_query2.txt

The corresponding bug is being tracked at:

https://bugs.freedesktop.org/show_bug.cgi?id=92687

Why is this an RFC series instead of a formal merge-able patch-set?

Two reasons. Firstly, we are still polishing rough edges in some patches. 
However, the support is complete to the best of our knowledge. Most of the 
final changes we are making are improvements to particular query answers, thus 
contained inside specific blocks, so they don't affect the structure of the 
code.
Secondly, we have been trying to get general feedback, and answers to some 
doubts we posted on bugzilla, without much success. So maybe an explicit RFC in 
the mailing list would bring more eyes to it.
This is a rather large extension, with a long spec wording, certainly difficult 
to review. So we totally appreciate the effort and brain cycles of whoever 
takes on this.


The patch-set is structured as follows:

* Patches 01 to 10 sets up the stage to query2. It will add a new, generic 
driver hook that obsoletes QuerySamplesForFormat, which is removed. But it 
doesn't introduce anything related with query2 yet.

* Patches 11 to 61 implement the different individual queries from query2 
extension in the frontend (mesa/main), adding validation and helper functions 
as needed.

* Patches 62 to 63 activates the extension on i965. Only the queries where the 
driver has something to return other than default value returned by the 
frontend, are explicitly added.


Some implementation notes:

* All the extension's frontend code is in main/formatquery.c, as it was before 
for query1. Only that it also handles query2 now.

* As commented above, a new driver hook 'QueryInternalFormat' was added, 
replacing the previous one 'QuerySamplesForFormat'.

* A fallback, generic function _mesa_query_internal_format_default() provides 
generic implementation and sensible defaults for all queries, for drivers not 
implementing query2. Backends that only care about answering some queries, can 
call back this function for the other queries where a generic answer is ok.

* For all pnames, the frontend code will do generic validation as per the spec: 
check GL profile, version, extensions.
  - If the frontend fails basic validation, it will give the corresponding 
negative answer, depending on the pname, without going to the driver.
  - If the frontend is fully qualified to provide an answer, it will (i.e, 
MAX_WIDTH, COLOR_COMPONENTS, etc). Otherwise it will call the driver hook (i.e, 
INTERNALFORMAT_PREFERRED).
  - For the cases where the query must return full support, caveat support, or 
no support; Mesa/main will always call the driver to decide between full or 
caveat support (and only answer directly in the case of no-support).

* The last patches in the branch enable support for this extension in i965 
backend (drivers/dri/i965/brw_formatquery.c). The backend code only handle 
queries where the answer is affected by driver-specific stuff. But by default, 
it calls back the frontend function with the default implementations.

* The 64 bits version of the query introduced by this extension 
(GetInternalformati64v), was implemented as a wrapper around the 32 bits 
version. Since only one query really requires the 64 bits API 
(MAX_COMBINED_DIMENSIONS), we handle that pname as a special case. For the rest 
of queries, we just forward the call to the default, 32 bits version.


A git tree of the series can be found at:

https://github.com/Igalia/mesa/tree/internalformat-query2-rfc


There is also a branch containing piglit tests for the extension, which my 
colleague Alejando will send to the piglit mailing list for feedback/review.


cheers,
Eduardo (on behalf of the team that worked on this)


Alejandro Piñeiro (9):
  mesa: Add dispatch and extension XML for GL_ARB_internalformat_query2
  mesa/main: not fill mesa_error on
_mesa_legal_texture_base_format_for_target
  mesa/formatquery: initial implementation for GetInternalformati64v
  mesa/formatquery: handle unmodified buffer for SAMPLES on the 64-bit
query
  mesa/formatquery: support for IMAGE_FORMAT_COMPATIBILITY_TYPE
  main/formatquery: support for MAX_{WIDTH/HEIGHT/DEPTH/LAYERS}
  mesa/formatquery: support for MAX_COMBINED_DIMENSIONS
  mesa/texparam: make public target_allows_setting_sampler_parameters
  mesa/formatquery: added FILTER pname support

Antia Puentes (36):
  mesa/main: Add extension tracking bit for ARB_internalformat_query2
  mesa/formatquery: Added function to validate parameters
  mesa/formatquery: Added function to set 'unsupported' responses
  mesa/formatquery: Added a func to check if the  is supported
  mesa/formatquery: Added boilerplate code to extend GetInternalformativ
  mesa/main: Added empty skeleton of glGetInternalformati64v
  mesa/teximage: make public is_renderable_texture_format
  mesa/teximage: Make 

[Mesa-dev] [RFC 01/63] mesa: Add QueryInternalFormat to device driver virtual table

2016-01-19 Thread Eduardo Lima Mitev
This new function queries different driver parameters for a particular target
and texture format. It is basically a driver hook to support
ARB_internalformat_query2.

Since ARB_internalformat_query2 introduced several new query parameters
over ARB_internalformat_query, having one driver hook for each parameter
is no longer feasible. So this is the generic entry-point for calls
to glGetInternalFormativ and glGetInternalFormati64v.
---
 src/mesa/main/dd.h | 20 
 1 file changed, 20 insertions(+)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 70ed563..c3845ca 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -192,6 +192,26 @@ struct dd_function_table {
int samples[16]);
 
/**
+* Queries different driver parameters for a particular target and format.
+* Since ARB_internalformat_query2 introduced several new query parameters
+* over ARB_internalformat_query, having one driver hook for each parameter
+* is no longer feasible. So this is the generic entry-point for calls
+* to glGetInternalFormativ and glGetInternalFormati64v, after Mesa has
+* checked errors and default values.
+*
+* \param ctxGL context
+* \param target GL target enum
+* \param internalFormat GL format enum
+* \param pname  GL enum that specifies the info to query.
+* \param params Buffer to hold the result of the query.
+*/
+   void (*QueryInternalFormat)(struct gl_context *ctx,
+   GLenum target,
+   GLenum internalFormat,
+   GLenum pname,
+   GLint *params);
+
+   /**
 * Called by glTexImage[123]D() and glCopyTexImage[12]D()
 * Allocate texture memory and copy the user's image to the buffer.
 * The gl_texture_image fields, etc. will be fully initialized.
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 02/63] mesa: Add a default QueryInternalFormat() function for drivers

2016-01-19 Thread Eduardo Lima Mitev
This is a fallback function for drivers not implementing
ARB_internalformat_query2.
---
 src/mesa/drivers/common/driverfuncs.c |  1 +
 src/mesa/main/formatquery.c   | 16 
 src/mesa/main/formatquery.h   |  5 +
 3 files changed, 22 insertions(+)

diff --git a/src/mesa/drivers/common/driverfuncs.c 
b/src/mesa/drivers/common/driverfuncs.c
index 752aaf6..e6334d8 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -91,6 +91,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
/* Texture functions */
driver->ChooseTextureFormat = _mesa_choose_tex_format;
driver->QuerySamplesForFormat = _mesa_query_samples_for_format;
+   driver->QueryInternalFormat = _mesa_query_internal_format_default;
driver->TexImage = _mesa_store_teximage;
driver->TexSubImage = _mesa_store_texsubimage;
driver->GetTexSubImage = _mesa_meta_GetTexSubImage;
diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 85f7b6b..4f18b00 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -43,6 +43,22 @@ _mesa_query_samples_for_format(struct gl_context *ctx, 
GLenum target,
return 1;
 }
 
+/* default implementation of QueryInternalFormat driverfunc, for
+ * drivers not implementing ARB_internalformat_query2.
+ */
+void
+_mesa_query_internal_format_default(struct gl_context *ctx, GLenum target,
+GLenum internalFormat, GLenum pname,
+GLint *params)
+{
+   (void) ctx;
+   (void) target;
+   (void) internalFormat;
+   (void) pname;
+   (void) params;
+
+   /* @TODO */
+}
 
 void GLAPIENTRY
 _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname,
diff --git a/src/mesa/main/formatquery.h b/src/mesa/main/formatquery.h
index 6034000..b42299e 100644
--- a/src/mesa/main/formatquery.h
+++ b/src/mesa/main/formatquery.h
@@ -32,6 +32,11 @@ size_t
 _mesa_query_samples_for_format(struct gl_context *ctx, GLenum target,
GLenum internalFormat, int samples[16]);
 
+void
+_mesa_query_internal_format_default(struct gl_context *ctx, GLenum target,
+GLenum internalFormat, GLenum pname,
+GLint *params);
+
 extern void GLAPIENTRY
 _mesa_GetInternalformativ(GLenum target, GLenum internalformat,
   GLenum pname, GLsizei bufSize, GLint *params);
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 18/63] mesa/teximage: make public is_renderable_texture_format

2016-01-19 Thread Eduardo Lima Mitev
From: Antia Puentes 

It will be used by the ARB_internalformat_query2 implementation
to check if the 'internalformat' passed is supported by texture
MULTISAMPLE 'targets'.
---
 src/mesa/main/teximage.c | 6 +++---
 src/mesa/main/teximage.h | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 50141be..12d7d0e 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -5160,8 +5160,8 @@ _mesa_TextureBufferRange(GLuint texture, GLenum 
internalFormat, GLuint buffer,
   bufObj, offset, size, "glTextureBufferRange");
 }
 
-static GLboolean
-is_renderable_texture_format(struct gl_context *ctx, GLenum internalformat)
+GLboolean
+_mesa_is_renderable_texture_format(struct gl_context *ctx, GLenum 
internalformat)
 {
/* Everything that is allowed for renderbuffers,
 * except for a base format of GL_STENCIL_INDEX, unless supported.
@@ -5241,7 +5241,7 @@ texture_image_multisample(struct gl_context *ctx, GLuint 
dims,
   return;
}
 
-   if (!is_renderable_texture_format(ctx, internalformat)) {
+   if (!_mesa_is_renderable_texture_format(ctx, internalformat)) {
   /* Page 172 of OpenGL ES 3.1 spec says:
*   "An INVALID_ENUM error is generated if sizedinternalformat is not
*   color-renderable, depth-renderable, or stencil-renderable (as
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h
index 5df36c5..218506c 100644
--- a/src/mesa/main/teximage.h
+++ b/src/mesa/main/teximage.h
@@ -164,6 +164,9 @@ _mesa_legal_texture_base_format_for_target(struct 
gl_context *ctx,
unsigned dimensions,
const char *caller);
 
+GLboolean
+_mesa_is_renderable_texture_format(struct gl_context *ctx, GLenum 
internalformat);
+
 extern void
 _mesa_texture_sub_image(struct gl_context *ctx, GLuint dims,
 struct gl_texture_object *texObj,
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 20/63] mesa/main: not fill mesa_error on _mesa_legal_texture_base_format_for_target

2016-01-19 Thread Eduardo Lima Mitev
From: Alejandro Piñeiro 

This would allow to use this method if you are just querying if it is
allowed, like for arb_internalformat_query2.
---
 src/mesa/main/teximage.c   | 18 +-
 src/mesa/main/teximage.h   |  4 +---
 src/mesa/main/texstorage.c |  8 
 3 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index bf94636..250d758 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1566,19 +1566,12 @@ compressed_tex_size(GLsizei width, GLsizei height, 
GLsizei depth,
  * \param ctx GL context
  * \param target  Texture target
  * \param internalFormat  Internal format of the texture image
- * \param dimensions  Dimensionality at the caller.  This is \b not used
- *in the validation.  It is only used when logging
- *error messages.
- * \param caller  Base name of the calling function (e.g.,
- *"glTexImage" or "glTexStorage").
  *
  * \returns true if the combination is legal, false otherwise.
  */
 bool
 _mesa_legal_texture_base_format_for_target(struct gl_context *ctx,
-   GLenum target, GLenum 
internalFormat,
-   unsigned dimensions,
-   const char *caller)
+   GLenum target, GLenum 
internalFormat)
 {
if (_mesa_base_tex_format(ctx, internalFormat) == GL_DEPTH_COMPONENT
|| _mesa_base_tex_format(ctx, internalFormat) == GL_DEPTH_STENCIL
@@ -1617,9 +1610,6 @@ _mesa_legal_texture_base_format_for_target(struct 
gl_context *ctx,
   !((target == GL_TEXTURE_CUBE_MAP_ARRAY ||
  target == GL_PROXY_TEXTURE_CUBE_MAP_ARRAY) &&
 ctx->Extensions.ARB_texture_cube_map_array)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "%s%dD(bad target for depth texture)",
- caller, dimensions);
  return false;
   }
}
@@ -1863,9 +1853,11 @@ texture_error_check( struct gl_context *ctx,
}
 
/* additional checks for depth textures */
-   if (!_mesa_legal_texture_base_format_for_target(ctx, target, internalFormat,
-   dimensions, "glTexImage"))
+   if (!_mesa_legal_texture_base_format_for_target(ctx, target, 
internalFormat)) {
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glTexImage%dD(bad target for texture)", dimensions);
   return GL_TRUE;
+   }
 
/* additional checks for compressed textures */
if (_mesa_is_compressed_format(ctx, internalFormat)) {
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h
index f51bdd9..b693ad7 100644
--- a/src/mesa/main/teximage.h
+++ b/src/mesa/main/teximage.h
@@ -160,9 +160,7 @@ _mesa_validate_texbuffer_format(const struct gl_context 
*ctx,
 bool
 _mesa_legal_texture_base_format_for_target(struct gl_context *ctx,
GLenum target,
-   GLenum internalFormat,
-   unsigned dimensions,
-   const char *caller);
+   GLenum internalFormat);
 
 bool
 _mesa_format_no_online_compression(const struct gl_context *ctx, GLenum 
format);
diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c
index 9fd969f..f4a0760 100644
--- a/src/mesa/main/texstorage.c
+++ b/src/mesa/main/texstorage.c
@@ -358,11 +358,11 @@ tex_storage_error_check(struct gl_context *ctx,
}
 
/* additional checks for depth textures */
-   if (!_mesa_legal_texture_base_format_for_target(ctx, target, internalformat,
-   dims, dsa ?
-   "glTextureStorage" :
-   "glTexStorage"))
+   if (!_mesa_legal_texture_base_format_for_target(ctx, target, 
internalformat)) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, "glTex%sStorage%uD(bad target for 
texture)",
+  suffix, dims);
   return GL_TRUE;
+   }
 
return GL_FALSE;
 }
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 03/63] i965: Add boilerplate function for QueryInternalFormat driver hook

2016-01-19 Thread Eduardo Lima Mitev
By default, we call back the driver's hook fallback function that has generic
implementations for the all the queries.
---
 src/mesa/drivers/dri/i965/Makefile.sources  |  1 +
 src/mesa/drivers/dri/i965/brw_context.c |  1 +
 src/mesa/drivers/dri/i965/brw_context.h |  5 
 src/mesa/drivers/dri/i965/brw_formatquery.c | 45 +
 4 files changed, 52 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_formatquery.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index cea1e87..b76cac6 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -121,6 +121,7 @@ i965_FILES = \
brw_ff_gs.h \
brw_fs_channel_expressions.cpp \
brw_fs_vector_splitting.cpp \
+   brw_formatquery.c \
brw_gs.c \
brw_gs.h \
brw_gs_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 9ba3339..9152416 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -347,6 +347,7 @@ brw_init_driver_functions(struct brw_context *brw,
   brw_init_conditional_render_functions(functions);
 
functions->QuerySamplesForFormat = brw_query_samples_for_format;
+   functions->QueryInternalFormat = brw_query_internal_format;
 
functions->NewTransformFeedback = brw_new_transform_feedback;
functions->DeleteTransformFeedback = brw_delete_transform_feedback;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index b80db00..0ba642e 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1873,6 +1873,11 @@ void brw_emit_depth_stall_flushes(struct brw_context 
*brw);
 void gen7_emit_vs_workaround_flush(struct brw_context *brw);
 void gen7_emit_cs_stall_flush(struct brw_context *brw);
 
+/* brw_queryformat.c */
+void brw_query_internal_format(struct gl_context *ctx, GLenum target,
+   GLenum internalFormat, GLenum pname,
+   GLint *params);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_formatquery.c 
b/src/mesa/drivers/dri/i965/brw_formatquery.c
new file mode 100644
index 000..ad59f03
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_formatquery.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "main/formatquery.h"
+
+void
+brw_query_internal_format(struct gl_context *ctx, GLenum target,
+  GLenum internalFormat, GLenum pname, GLint *params)
+{
+   /* The Mesa layer gives us a temporary params buffer that is guaranteed
+* to be non-NULL, and have at least 16 elements.
+*/
+   assert(params != NULL);
+
+   switch (pname) {
+   default:
+  /* By default, we call the driver hook's fallback function from the 
frontend,
+   * which has generic implementation for all pnames.
+   */
+  _mesa_query_internal_format_default(ctx, target, internalFormat, pname,
+  params);
+  break;
+   }
+}
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 23/63] mesa/formatquery: Added the INTERNALFORMAT_SUPPORTED query

2016-01-19 Thread Eduardo Lima Mitev
From: Antia Puentes 

---
 src/mesa/main/formatquery.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 11bf133..da65624 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -575,6 +575,11 @@ _mesa_query_internal_format_default(struct gl_context 
*ctx, GLenum target,
case GL_NUM_SAMPLE_COUNTS:
   params[0] = 1;
   break;
+
+   case GL_INTERNALFORMAT_SUPPORTED:
+  params[0] = GL_TRUE;
+  break;
+
default:
   _set_default_response(pname, params);
   break;
@@ -647,7 +652,11 @@ _mesa_GetInternalformativ(GLenum target, GLenum 
internalformat, GLenum pname,
   buffer);
   break;
case GL_INTERNALFORMAT_SUPPORTED:
-  /* @TODO */
+  /* Having a supported  is implemented as a prerequisite
+   * for all the . Thus,  if we reach this point, the 
internalformat is
+   * supported.
+   */
+  buffer[0] = GL_TRUE;
   break;
 
case GL_INTERNALFORMAT_PREFERRED:
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 05/63] i965/formatquery: Respond queries SAMPLES and NUM_SAMPLE_COUNTS

2016-01-19 Thread Eduardo Lima Mitev
This effectively disables old QuerySamplesForFormat driver hook, since it is
never called by Mesa anymore.
---
 src/mesa/drivers/dri/i965/brw_context.c |  1 -
 src/mesa/drivers/dri/i965/brw_context.h |  2 --
 src/mesa/drivers/dri/i965/brw_formatquery.c | 14 +-
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 8186b7d..3a3fcd7 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -308,7 +308,6 @@ brw_init_driver_functions(struct brw_context *brw,
if (brw->gen >= 7)
   brw_init_conditional_render_functions(functions);
 
-   functions->QuerySamplesForFormat = brw_query_samples_for_format;
functions->QueryInternalFormat = brw_query_internal_format;
 
functions->NewTransformFeedback = brw_new_transform_feedback;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index e17f99c..0ba642e 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1874,8 +1874,6 @@ void gen7_emit_vs_workaround_flush(struct brw_context 
*brw);
 void gen7_emit_cs_stall_flush(struct brw_context *brw);
 
 /* brw_queryformat.c */
-size_t brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
-GLenum internalFormat, int samples[16]);
 void brw_query_internal_format(struct gl_context *ctx, GLenum target,
GLenum internalFormat, GLenum pname,
GLint *params);
diff --git a/src/mesa/drivers/dri/i965/brw_formatquery.c 
b/src/mesa/drivers/dri/i965/brw_formatquery.c
index 5ee1d69..1bd212c 100644
--- a/src/mesa/drivers/dri/i965/brw_formatquery.c
+++ b/src/mesa/drivers/dri/i965/brw_formatquery.c
@@ -24,7 +24,7 @@
 #include "brw_context.h"
 #include "main/formatquery.h"
 
-size_t
+static size_t
 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
  GLenum internalFormat, int samples[16])
 {
@@ -73,6 +73,18 @@ brw_query_internal_format(struct gl_context *ctx, GLenum 
target,
assert(params != NULL);
 
switch (pname) {
+   case GL_SAMPLES:
+  brw_query_samples_for_format(ctx, target, internalFormat, params);
+  break;
+
+   case GL_NUM_SAMPLE_COUNTS: {
+  size_t num_samples;
+  num_samples = brw_query_samples_for_format(ctx, target, internalFormat,
+ params);
+  params[0] = (GLint) num_samples;
+  break;
+   }
+
default:
   /* By default, we call the driver hook's fallback function from the 
frontend,
* which has generic implementation for all pnames.
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 16/63] mesa: Add dispatch and extension XML for GL_ARB_internalformat_query2

2016-01-19 Thread Eduardo Lima Mitev
From: Alejandro Piñeiro 

Equivalent to commit bda540 (that added GL_ARB_internalformat_query)
---
 src/mapi/glapi/gen/ARB_internalformat_query2.xml | 119 +++
 src/mapi/glapi/gen/gl_API.xml|   2 +-
 src/mesa/main/tests/dispatch_sanity.cpp  |   4 +
 3 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 src/mapi/glapi/gen/ARB_internalformat_query2.xml

diff --git a/src/mapi/glapi/gen/ARB_internalformat_query2.xml 
b/src/mapi/glapi/gen/ARB_internalformat_query2.xml
new file mode 100644
index 000..9b0f320
--- /dev/null
+++ b/src/mapi/glapi/gen/ARB_internalformat_query2.xml
@@ -0,0 +1,119 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 593ace4..9024f16 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -8195,7 +8195,7 @@
 
 http://www.w3.org/2001/XInclude"/>
 
-
+http://www.w3.org/2001/XInclude"/>
 
 
 
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
b/src/mesa/main/tests/dispatch_sanity.cpp
index 7610bcb..2f638c8 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -857,6 +857,9 @@ const struct function common_desktop_functions_possible[] = 
{
/* GL_ARB_internalformat_query */
{ "glGetInternalformativ", 30, -1 },
 
+   /* GL_ARB_internalformat_query */
+   { "glGetInternalformati64v", 30, -1 },
+
/* GL_ARB_multi_bind */
{ "glBindBuffersBase", 44, -1 },
{ "glBindBuffersRange", 44, -1 },
@@ -2352,6 +2355,7 @@ const struct function gles3_functions_possible[] = {
{ "glGetInteger64v", 30, -1 },
{ "glGetIntegeri_v", 30, -1 },
{ "glGetInternalformativ", 30, -1 },
+   { "glGetInternalformati64v", 30, -1 },
// glGetProgramBinary aliases glGetProgramBinaryOES in GLES 2
{ "glGetQueryiv", 30, -1 },
{ "glGetQueryObjectuiv", 30, -1 },
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 15/63] mesa/formatquery: Added boilerplate code to extend GetInternalformativ

2016-01-19 Thread Eduardo Lima Mitev
From: Antia Puentes 

The goal is to extend the GetInternalformativ query to implement the
ARB_internalformat_query2 specification, keeping the behaviour defined
by the ARB_internalformat_query if ARB_internalformat_query2 is not
supported.
---
 src/mesa/main/formatquery.c | 440 ++--
 1 file changed, 345 insertions(+), 95 deletions(-)

diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 961af6d..8d0cded 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -30,6 +30,24 @@
 #include "formatquery.h"
 #include "teximage.h"
 
+static bool
+_is_renderable(struct gl_context *ctx, GLenum internalformat)
+{
+   /*  Section 4.4.4 on page 212 of the  GLES 3.0.4 spec says:
+*
+* "An internal format is color-renderable if it is one of the
+* formats from table 3.13 noted as color-renderable or if it
+* is unsized format RGBA or RGB."
+*
+* Therefore, we must accept GL_RGB and GL_RGBA here.
+*/
+   if (internalformat != GL_RGB && internalformat != GL_RGBA &&
+   _mesa_base_fbo_format(ctx, internalformat) == 0)
+  return false;
+
+   return true;
+}
+
 /* Handles the cases where either ARB_internalformat_query or
  * ARB_internalformat_query2 have to return an error.
  */
@@ -232,23 +250,8 @@ _legal_parameters(struct gl_context *ctx, GLenum target, 
GLenum internalformat,
 * "If the  parameter to GetInternalformativ is not
 * color-, depth- or stencil-renderable, then an INVALID_ENUM error is
 * generated."
-*
-* Page 243 of the GLES 3.0.4 spec says this for GetInternalformativ:
-*
-* "internalformat must be color-renderable, depth-renderable or
-* stencilrenderable (as defined in section 4.4.4)."
-*
-* Section 4.4.4 on page 212 of the same spec says:
-*
-* "An internal format is color-renderable if it is one of the
-* formats from table 3.13 noted as color-renderable or if it
-* is unsized format RGBA or RGB."
-*
-* Therefore, we must accept GL_RGB and GL_RGBA here.
 */
-   if (!query2 &&
-   internalformat != GL_RGB && internalformat != GL_RGBA &&
-   _mesa_base_fbo_format(ctx, internalformat) == 0) {
+   if (!query2 && !_is_renderable(ctx, internalformat)) {
   _mesa_error(ctx, GL_INVALID_ENUM,
   "glGetInternalformativ(internalformat=%s)",
   _mesa_enum_to_string(internalformat));
@@ -452,7 +455,7 @@ _mesa_query_internal_format_default(struct gl_context *ctx, 
GLenum target,
   params[0] = 1;
   break;
default:
-  /* @TODO: handle default values for all the different pnames. */
+  _set_default_response(pname, params);
   break;
}
 }
@@ -466,6 +469,7 @@ _mesa_GetInternalformativ(GLenum target, GLenum 
internalformat, GLenum pname,
 
ASSERT_OUTSIDE_BEGIN_END(ctx);
 
+   /* ARB_internalformat_query is also mandatory for ARB_internalformat_query2 
*/
if (!ctx->Extensions.ARB_internalformat_query) {
   _mesa_error(ctx, GL_INVALID_OPERATION, "glGetInternalformativ");
   return;
@@ -473,99 +477,345 @@ _mesa_GetInternalformativ(GLenum target, GLenum 
internalformat, GLenum pname,
 
assert(ctx->Driver.QueryInternalFormat != NULL);
 
-   /* The ARB_internalformat_query spec says:
-*
-* "If the  parameter to GetInternalformativ is not one of
-* TEXTURE_2D_MULTISAMPLE, TEXTURE_2D_MULTISAMPLE_ARRAY or RENDERBUFFER
-* then an INVALID_ENUM error is generated."
-*/
-   switch (target) {
-   case GL_RENDERBUFFER:
-  break;
-
-   case GL_TEXTURE_2D_MULTISAMPLE:
-   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-  /* These enums are only valid if ARB_texture_multisample is supported */
-  if ((_mesa_is_desktop_gl(ctx) &&
-   ctx->Extensions.ARB_texture_multisample) ||
-  _mesa_is_gles31(ctx))
- break;
-
-   default:
-  _mesa_error(ctx, GL_INVALID_ENUM,
-  "glGetInternalformativ(target=%s)",
-  _mesa_enum_to_string(target));
+   if (!_legal_parameters(ctx, target, internalformat, pname, bufSize, params))
   return;
-   }
 
-   /* The ARB_internalformat_query spec says:
-*
-* "If the  parameter to GetInternalformativ is not
-* color-, depth- or stencil-renderable, then an INVALID_ENUM error is
-* generated."
-*
-* Page 243 of the GLES 3.0.4 spec says this for GetInternalformativ:
-*
-* "internalformat must be color-renderable, depth-renderable or
-* stencilrenderable (as defined in section 4.4.4)."
-*
-* Section 4.4.4 on page 212 of the same spec says:
-*
-* "An internal format is color-renderable if it is one of the
-* formats from table 3.13 noted as color-renderable or if it
-* is unsized format RGBA or RGB."
-*
-* Therefore, we must accept GL_RGB and GL_RGBA here.
-*/
-   if 

[Mesa-dev] [RFC 13/63] mesa/formatquery: Added function to set 'unsupported' responses

2016-01-19 Thread Eduardo Lima Mitev
From: Antia Puentes 

The ARB_internalformat_query2 specification defines which is the
reponse best representing "not supported" or "not applicable" for
each .

Queries for unsupported features, targets, internalformats, combinations
of: target and internalformat, target and pname, pname and internalformat,
do not return an error but the corresponding 'unsupported' response.
We will use that response as the default answer.

For SAMPLES the 'unsupported' response is to not modify the 'params' buffer.
---
 src/mesa/main/formatquery.c | 106 
 1 file changed, 106 insertions(+)

diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 47ef5cb..a0dc350 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -257,6 +257,112 @@ _legal_parameters(struct gl_context *ctx, GLenum target, 
GLenum internalformat,
return true;
 }
 
+/* Sets the appropriate "unsupported" response as defined by the
+ * ARB_internalformat_query2 spec for each each .
+ */
+static void
+_set_default_response(GLenum pname, GLint buffer[16])
+{
+   /* The ARB_internalformat_query2 defines which is the reponse best
+* representing "not supported" or "not applicable" for each .
+*
+* " In general:
+*  - size- or count-based queries will return zero,
+*  - support-, format- or type-based queries will return NONE,
+*  - boolean-based queries will return FALSE, and
+*  - list-based queries return no entries."
+*/
+   switch(pname) {
+   case GL_SAMPLES:
+  break;
+
+   case GL_MAX_COMBINED_DIMENSIONS:
+   case GL_NUM_SAMPLE_COUNTS:
+   case GL_INTERNALFORMAT_RED_SIZE:
+   case GL_INTERNALFORMAT_GREEN_SIZE:
+   case GL_INTERNALFORMAT_BLUE_SIZE:
+   case GL_INTERNALFORMAT_ALPHA_SIZE:
+   case GL_INTERNALFORMAT_DEPTH_SIZE:
+   case GL_INTERNALFORMAT_STENCIL_SIZE:
+   case GL_INTERNALFORMAT_SHARED_SIZE:
+   case GL_MAX_WIDTH:
+   case GL_MAX_HEIGHT:
+   case GL_MAX_DEPTH:
+   case GL_MAX_LAYERS:
+   case GL_IMAGE_TEXEL_SIZE:
+   case GL_TEXTURE_COMPRESSED_BLOCK_WIDTH:
+   case GL_TEXTURE_COMPRESSED_BLOCK_HEIGHT:
+   case GL_TEXTURE_COMPRESSED_BLOCK_SIZE:
+  buffer[0] = 0;
+  break;
+
+   case GL_INTERNALFORMAT_PREFERRED:
+   case GL_INTERNALFORMAT_RED_TYPE:
+   case GL_INTERNALFORMAT_GREEN_TYPE:
+   case GL_INTERNALFORMAT_BLUE_TYPE:
+   case GL_INTERNALFORMAT_ALPHA_TYPE:
+   case GL_INTERNALFORMAT_DEPTH_TYPE:
+   case GL_INTERNALFORMAT_STENCIL_TYPE:
+   case GL_FRAMEBUFFER_RENDERABLE:
+   case GL_FRAMEBUFFER_RENDERABLE_LAYERED:
+   case GL_FRAMEBUFFER_BLEND:
+   case GL_READ_PIXELS:
+   case GL_READ_PIXELS_FORMAT:
+   case GL_READ_PIXELS_TYPE:
+   case GL_TEXTURE_IMAGE_FORMAT:
+   case GL_TEXTURE_IMAGE_TYPE:
+   case GL_GET_TEXTURE_IMAGE_FORMAT:
+   case GL_GET_TEXTURE_IMAGE_TYPE:
+   case GL_MANUAL_GENERATE_MIPMAP:
+   case GL_AUTO_GENERATE_MIPMAP:
+   case GL_COLOR_ENCODING:
+   case GL_SRGB_READ:
+   case GL_SRGB_WRITE:
+   case GL_SRGB_DECODE_ARB:
+   case GL_FILTER:
+   case GL_VERTEX_TEXTURE:
+   case GL_TESS_CONTROL_TEXTURE:
+   case GL_TESS_EVALUATION_TEXTURE:
+   case GL_GEOMETRY_TEXTURE:
+   case GL_FRAGMENT_TEXTURE:
+   case GL_COMPUTE_TEXTURE:
+   case GL_TEXTURE_SHADOW:
+   case GL_TEXTURE_GATHER:
+   case GL_TEXTURE_GATHER_SHADOW:
+   case GL_SHADER_IMAGE_LOAD:
+   case GL_SHADER_IMAGE_STORE:
+   case GL_SHADER_IMAGE_ATOMIC:
+   case GL_IMAGE_COMPATIBILITY_CLASS:
+   case GL_IMAGE_PIXEL_FORMAT:
+   case GL_IMAGE_PIXEL_TYPE:
+   case GL_IMAGE_FORMAT_COMPATIBILITY_TYPE:
+   case GL_SIMULTANEOUS_TEXTURE_AND_DEPTH_TEST:
+   case GL_SIMULTANEOUS_TEXTURE_AND_STENCIL_TEST:
+   case GL_SIMULTANEOUS_TEXTURE_AND_DEPTH_WRITE:
+   case GL_SIMULTANEOUS_TEXTURE_AND_STENCIL_WRITE:
+   case GL_CLEAR_BUFFER:
+   case GL_TEXTURE_VIEW:
+   case GL_VIEW_COMPATIBILITY_CLASS:
+  buffer[0] = GL_NONE;
+  break;
+
+   case GL_INTERNALFORMAT_SUPPORTED:
+   case GL_COLOR_COMPONENTS:
+   case GL_DEPTH_COMPONENTS:
+   case GL_STENCIL_COMPONENTS:
+   case GL_COLOR_RENDERABLE:
+   case GL_DEPTH_RENDERABLE:
+   case GL_STENCIL_RENDERABLE:
+   case GL_MIPMAP:
+   case GL_TEXTURE_COMPRESSED:
+  buffer[0] = GL_FALSE;
+  break;
+
+   default:
+  unreachable("invalid 'pname'");
+   }
+}
+
 /* default implementation of QueryInternalFormat driverfunc, for
  * drivers not implementing ARB_internalformat_query2.
  */
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 22/63] mesa/formatquery: Added a func to check supported

2016-01-19 Thread Eduardo Lima Mitev
From: Antia Puentes 

From the ARB_internalformat_query2 specification:

  "The INTERNALFORMAT_SUPPORTED  can be used to determine if
   the internal format is supported, and the  other  are defined
   in terms of whether or not the format is supported."
---
 src/mesa/main/formatquery.c | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index c1734f5..11bf133 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -525,6 +525,39 @@ _is_resource_supported(struct gl_context *ctx, GLenum 
target,
return true;
 }
 
+static bool
+_is_internalformat_supported(struct gl_context *ctx, GLenum target,
+ GLenum internalformat)
+{
+   /* From the ARB_internalformat_query2 specification:
+*
+* "- INTERNALFORMAT_SUPPORTED: If  is an internal 
format
+* that is supported by the implementation in at least some subset of
+* possible operations, TRUE is written to .  If 

+* if not a valid token for any internal format usage, FALSE is 
returned.
+*
+*  that must be supported (in GL 4.2 or later) include
+*  the following:
+* - "sized internal formats" from Table 3.12, 3.13, and 3.15,
+* - any specific "compressed internal format" from Table 3.14,
+* - any "image unit format" from Table 3.21.
+* - any generic "compressed internal format" from Table 3.14, if 
the
+* implementation accepts it for any texture specification 
commands, and
+* - unsized or base internal format, if the implementation accepts
+* it for texture or image specification.
+*/
+   GLint buffer[1];
+
+   if (_mesa_base_tex_format(ctx, internalformat) < 0)
+  return false;
+
+   /* Let the driver have the final word */
+   ctx->Driver.QueryInternalFormat(ctx, target, internalformat,
+   GL_INTERNALFORMAT_SUPPORTED, buffer);
+
+   return (buffer[0] == GL_TRUE);
+}
+
 /* default implementation of QueryInternalFormat driverfunc, for
  * drivers not implementing ARB_internalformat_query2.
  */
@@ -577,6 +610,7 @@ _mesa_GetInternalformativ(GLenum target, GLenum 
internalformat, GLenum pname,
_set_default_response(pname, buffer);
 
if (!_is_target_supported(ctx, target) ||
+   !_is_internalformat_supported(ctx, target, internalformat) ||
!_is_resource_supported(ctx, target, internalformat, pname))
   goto end;
 
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 09/63] mesa/formatquery: Use new driver hook QueryInternalFormat

2016-01-19 Thread Eduardo Lima Mitev
Implements SAMPLES and NUM_SAMPLE_COUNTS queries using the new generic
driver call QueryInternalFormat, which is being introduced as replacement
of QuerySamplesForFormat to support ARB_internalformat_query2.
---
 src/mesa/main/formatquery.c | 42 ++
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index b55abcd..79f7549 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -54,10 +54,16 @@ _mesa_query_internal_format_default(struct gl_context *ctx, 
GLenum target,
(void) ctx;
(void) target;
(void) internalFormat;
-   (void) pname;
-   (void) params;
 
-   /* @TODO */
+   switch (pname) {
+   case GL_SAMPLES:
+   case GL_NUM_SAMPLE_COUNTS:
+  params[0] = 1;
+  break;
+   default:
+  /* @TODO: handle default values for all the different pnames. */
+  break;
+   }
 }
 
 void GLAPIENTRY
@@ -74,7 +80,7 @@ _mesa_GetInternalformativ(GLenum target, GLenum 
internalformat, GLenum pname,
   return;
}
 
-   assert(ctx->Driver.QuerySamplesForFormat != NULL);
+   assert(ctx->Driver.QueryInternalFormat != NULL);
 
/* The ARB_internalformat_query spec says:
 *
@@ -145,7 +151,8 @@ _mesa_GetInternalformativ(GLenum target, GLenum 
internalformat, GLenum pname,
 
switch (pname) {
case GL_SAMPLES:
-  ctx->Driver.QuerySamplesForFormat(ctx, target, internalformat, buffer);
+  ctx->Driver.QueryInternalFormat(ctx, target, internalformat, pname,
+  buffer);
   break;
case GL_NUM_SAMPLE_COUNTS: {
   if (_mesa_is_gles3(ctx) && _mesa_is_enum_format_integer(internalformat)) 
{
@@ -156,29 +163,8 @@ _mesa_GetInternalformativ(GLenum target, GLenum 
internalformat, GLenum pname,
   */
  buffer[0] = 0;
   } else {
- size_t num_samples;
-
- /* The driver can return 0, and we should pass that along to the
-  * application.  The ARB decided that ARB_internalformat_query should
-  * behave as ARB_internalformat_query2 in this situation.
-  *
-  * The ARB_internalformat_query2 spec says:
-  *
-  * "- NUM_SAMPLE_COUNTS: The number of sample counts that would be
-  *returned by querying SAMPLES is returned in .
-  ** If  is not color-renderable,
-  *  depth-renderable, or stencil-renderable (as defined in
-  *  section 4.4.4), or if  does not support multiple
-  *  samples (ie other than TEXTURE_2D_MULTISAMPLE,
-  *  TEXTURE_2D_MULTISAMPLE_ARRAY, or RENDERBUFFER), 0 is
-  *  returned."
-  */
- num_samples =  ctx->Driver.QuerySamplesForFormat(ctx, target, 
internalformat, buffer);
-
- /* QuerySamplesForFormat writes some stuff to buffer, so we have to
-  * separately over-write it with the requested value.
-  */
- buffer[0] = (GLint) num_samples;
+ ctx->Driver.QueryInternalFormat(ctx, target, internalformat, pname,
+ buffer);
   }
   break;
}
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 17/63] mesa/main: Added empty skeleton of glGetInternalformati64v

2016-01-19 Thread Eduardo Lima Mitev
From: Antia Puentes 

---
 src/mesa/main/formatquery.c | 16 
 src/mesa/main/formatquery.h |  4 
 2 files changed, 20 insertions(+)

diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 8d0cded..f9795ae 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -833,3 +833,19 @@ _mesa_GetInternalformativ(GLenum target, GLenum 
internalformat, GLenum pname,
 
return;
 }
+
+void GLAPIENTRY
+_mesa_GetInternalformati64v(GLenum target, GLenum internalformat,
+GLenum pname, GLsizei bufSize, GLint64 *params)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (!ctx->Extensions.ARB_internalformat_query2) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, "glGetInternalformati64v");
+  return;
+   }
+
+   _mesa_debug(ctx, "glGetInternalformati64v() not implemented");
+}
diff --git a/src/mesa/main/formatquery.h b/src/mesa/main/formatquery.h
index b42299e..1061fd2 100644
--- a/src/mesa/main/formatquery.h
+++ b/src/mesa/main/formatquery.h
@@ -41,4 +41,8 @@ extern void GLAPIENTRY
 _mesa_GetInternalformativ(GLenum target, GLenum internalformat,
   GLenum pname, GLsizei bufSize, GLint *params);
 
+extern void GLAPIENTRY
+_mesa_GetInternalformati64v(GLenum target, GLenum internalformat,
+GLenum pname, GLsizei bufSize, GLint64 *params);
+
 #endif /* FORMATQUERY_H */
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 12/63] mesa/formatquery: Added function to validate parameters

2016-01-19 Thread Eduardo Lima Mitev
From: Antia Puentes 

Handles the cases where an error should be returned according
to the ARB_internalformat_query and ARB_internalformat_query2
specifications.
---
 src/mesa/main/formatquery.c | 228 
 1 file changed, 228 insertions(+)

diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 13de970..47ef5cb 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -29,6 +29,234 @@
 #include "fbobject.h"
 #include "formatquery.h"
 
+/* Handles the cases where either ARB_internalformat_query or
+ * ARB_internalformat_query2 have to return an error.
+ */
+static bool
+_legal_parameters(struct gl_context *ctx, GLenum target, GLenum internalformat,
+  GLenum pname, GLsizei bufSize, GLint *params)
+
+{
+   bool query2 = ctx->Extensions.ARB_internalformat_query2;
+
+   /* The ARB_internalformat_query2 spec says:
+*
+*"The INVALID_ENUM error is generated if the  parameter to
+*GetInternalformati*v is not one of the targets listed in Table 6.xx.
+*/
+   switch(target){
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_1D_ARRAY:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_2D_ARRAY:
+   case GL_TEXTURE_3D:
+   case GL_TEXTURE_CUBE_MAP:
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_RECTANGLE:
+   case GL_TEXTURE_BUFFER:
+  if (!query2) {
+ /* The ARB_internalformat_query spec says:
+  *
+  * "If the  parameter to GetInternalformativ is not one of
+  *  TEXTURE_2D_MULTISAMPLE, TEXTURE_2D_MULTISAMPLE_ARRAY
+  *  or RENDERBUFFER then an INVALID_ENUM error is generated.
+  */
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glGetInternalformativ(target=%s)",
+ _mesa_enum_to_string(target));
+
+ return false;
+  }
+  break;
+
+   case GL_RENDERBUFFER:
+  break;
+
+   case GL_TEXTURE_2D_MULTISAMPLE:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+  /* The non-existence of ARB_texture_multisample is treated in
+   * ARB_internalformat_query implementation like an error.
+   */
+  if (!query2 &&
+  !(ctx->Extensions.ARB_texture_multisample && 
_mesa_is_desktop_gl(ctx)) &&
+  !_mesa_is_gles31(ctx)) {
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glGetInternalformativ(target=%s)",
+ _mesa_enum_to_string(target));
+
+ return false;
+  }
+  break;
+
+   default:
+  _mesa_error(ctx, GL_INVALID_ENUM,
+  "glGetInternalformativ(target=%s)",
+  _mesa_enum_to_string(target));
+  return false;
+   }
+
+
+   /* The ARB_internalformat_query2 spec says:
+*
+* "The INVALID_ENUM error is generated if the  parameter is
+* not one of the listed possibilities.
+*/
+   switch(pname){
+   case GL_SAMPLES:
+   case GL_NUM_SAMPLE_COUNTS:
+  break;
+
+   case GL_SRGB_DECODE_ARB:
+  /* The ARB_internalformat_query2 spec says:
+   *
+   * "If ARB_texture_sRGB_decode or EXT_texture_sRGB_decode or
+   * equivalent functionality is not supported, queries for the
+   * SRGB_DECODE_ARB  set the INVALID_ENUM error.
+   */
+  if (!ctx->Extensions.EXT_texture_sRGB_decode) {
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glGetInternalformativ(pname=%s)",
+ _mesa_enum_to_string(pname));
+ return false;
+  }
+  /* fallthrough */
+   case GL_INTERNALFORMAT_SUPPORTED:
+   case GL_INTERNALFORMAT_PREFERRED:
+   case GL_INTERNALFORMAT_RED_SIZE:
+   case GL_INTERNALFORMAT_GREEN_SIZE:
+   case GL_INTERNALFORMAT_BLUE_SIZE:
+   case GL_INTERNALFORMAT_ALPHA_SIZE:
+   case GL_INTERNALFORMAT_DEPTH_SIZE:
+   case GL_INTERNALFORMAT_STENCIL_SIZE:
+   case GL_INTERNALFORMAT_SHARED_SIZE:
+   case GL_INTERNALFORMAT_RED_TYPE:
+   case GL_INTERNALFORMAT_GREEN_TYPE:
+   case GL_INTERNALFORMAT_BLUE_TYPE:
+   case GL_INTERNALFORMAT_ALPHA_TYPE:
+   case GL_INTERNALFORMAT_DEPTH_TYPE:
+   case GL_INTERNALFORMAT_STENCIL_TYPE:
+   case GL_MAX_WIDTH:
+   case GL_MAX_HEIGHT:
+   case GL_MAX_DEPTH:
+   case GL_MAX_LAYERS:
+   case GL_MAX_COMBINED_DIMENSIONS:
+   case GL_COLOR_COMPONENTS:
+   case GL_DEPTH_COMPONENTS:
+   case GL_STENCIL_COMPONENTS:
+   case GL_COLOR_RENDERABLE:
+   case GL_DEPTH_RENDERABLE:
+   case GL_STENCIL_RENDERABLE:
+   case GL_FRAMEBUFFER_RENDERABLE:
+   case GL_FRAMEBUFFER_RENDERABLE_LAYERED:
+   case GL_FRAMEBUFFER_BLEND:
+   case GL_READ_PIXELS:
+   case GL_READ_PIXELS_FORMAT:
+   case GL_READ_PIXELS_TYPE:
+   case GL_TEXTURE_IMAGE_FORMAT:
+   case GL_TEXTURE_IMAGE_TYPE:
+   case GL_GET_TEXTURE_IMAGE_FORMAT:
+   case GL_GET_TEXTURE_IMAGE_TYPE:
+   case GL_MIPMAP:
+   case GL_MANUAL_GENERATE_MIPMAP:
+   case GL_AUTO_GENERATE_MIPMAP:
+   case GL_COLOR_ENCODING:
+   case GL_SRGB_READ:
+   case GL_SRGB_WRITE:
+   case GL_FILTER:
+   case GL_VERTEX_TEXTURE:

[Mesa-dev] [RFC 19/63] mesa/teximage: Make _mesa_format_no_online_compression public

2016-01-19 Thread Eduardo Lima Mitev
From: Antia Puentes 

It will be used by the ARB_internalformat_query2 implementation
to check if a certain compressed 'internalformat' is supported
by texture 'targets'.
---
 src/mesa/main/teximage.c | 2 +-
 src/mesa/main/teximage.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 12d7d0e..bf94636 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1280,7 +1280,7 @@ compressedteximage_only_format(const struct gl_context 
*ctx, GLenum format)
 /**
  * Return true if the format doesn't support online compression.
  */
-static bool
+bool
 _mesa_format_no_online_compression(const struct gl_context *ctx, GLenum format)
 {
return _mesa_is_astc_format(format) ||
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h
index 218506c..f51bdd9 100644
--- a/src/mesa/main/teximage.h
+++ b/src/mesa/main/teximage.h
@@ -164,6 +164,9 @@ _mesa_legal_texture_base_format_for_target(struct 
gl_context *ctx,
unsigned dimensions,
const char *caller);
 
+bool
+_mesa_format_no_online_compression(const struct gl_context *ctx, GLenum 
format);
+
 GLboolean
 _mesa_is_renderable_texture_format(struct gl_context *ctx, GLenum 
internalformat);
 
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 04/63] i965: Move brw_query_samples_for_format() to brw_queryformat.c

2016-01-19 Thread Eduardo Lima Mitev
Now that there is a dedicated source file for internal format queries, this
function belongs there.
---
 src/mesa/drivers/dri/i965/brw_context.c | 38 
 src/mesa/drivers/dri/i965/brw_context.h |  2 ++
 src/mesa/drivers/dri/i965/brw_formatquery.c | 39 +
 3 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 9152416..8186b7d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -75,44 +75,6 @@
  * Mesa's Driver Functions
  ***/
 
-static size_t
-brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
- GLenum internalFormat, int samples[16])
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   (void) target;
-
-   switch (brw->gen) {
-   case 9:
-  samples[0] = 16;
-  samples[1] = 8;
-  samples[2] = 4;
-  samples[3] = 2;
-  return 4;
-
-   case 8:
-  samples[0] = 8;
-  samples[1] = 4;
-  samples[2] = 2;
-  return 3;
-
-   case 7:
-  samples[0] = 8;
-  samples[1] = 4;
-  return 2;
-
-   case 6:
-  samples[0] = 4;
-  return 1;
-
-   default:
-  assert(brw->gen < 6);
-  samples[0] = 1;
-  return 1;
-   }
-}
-
 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 
 const char *
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 0ba642e..e17f99c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1874,6 +1874,8 @@ void gen7_emit_vs_workaround_flush(struct brw_context 
*brw);
 void gen7_emit_cs_stall_flush(struct brw_context *brw);
 
 /* brw_queryformat.c */
+size_t brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
+GLenum internalFormat, int samples[16]);
 void brw_query_internal_format(struct gl_context *ctx, GLenum target,
GLenum internalFormat, GLenum pname,
GLint *params);
diff --git a/src/mesa/drivers/dri/i965/brw_formatquery.c 
b/src/mesa/drivers/dri/i965/brw_formatquery.c
index ad59f03..5ee1d69 100644
--- a/src/mesa/drivers/dri/i965/brw_formatquery.c
+++ b/src/mesa/drivers/dri/i965/brw_formatquery.c
@@ -24,6 +24,45 @@
 #include "brw_context.h"
 #include "main/formatquery.h"
 
+size_t
+brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
+ GLenum internalFormat, int samples[16])
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   (void) target;
+   (void) internalFormat;
+
+   switch (brw->gen) {
+   case 9:
+  samples[0] = 16;
+  samples[1] = 8;
+  samples[2] = 4;
+  samples[3] = 2;
+  return 4;
+
+   case 8:
+  samples[0] = 8;
+  samples[1] = 4;
+  samples[2] = 2;
+  return 3;
+
+   case 7:
+  samples[0] = 8;
+  samples[1] = 4;
+  return 2;
+
+   case 6:
+  samples[0] = 4;
+  return 1;
+
+   default:
+  assert(brw->gen < 6);
+  samples[0] = 1;
+  return 1;
+   }
+}
+
 void
 brw_query_internal_format(struct gl_context *ctx, GLenum target,
   GLenum internalFormat, GLenum pname, GLint *params)
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >