[Mesa-dev] [PATCH] i965: Don't write beyond allocated memory.

2015-11-13 Thread Juha-Pekka Heikkila
Signed-off-by: Juha-Pekka Heikkila 
---
 src/mesa/drivers/dri/i965/brw_eu_validate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_validate.c 
b/src/mesa/drivers/dri/i965/brw_eu_validate.c
index eb57962..2de2ea1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_validate.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_validate.c
@@ -39,7 +39,7 @@ cat(struct string *dest, const struct string src)
 {
dest->str = realloc(dest->str, dest->len + src.len + 1);
memcpy(dest->str + dest->len, src.str, src.len);
-   dest->str[dest->len + src.len + 1] = '\0';
+   dest->str[dest->len + src.len] = '\0';
dest->len = dest->len + src.len;
 }
 #define CAT(dest, src) cat(, (struct string){src, strlen(src)})
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/skl: Disable fast clear for formats without alpha

2015-11-13 Thread Neil Roberts
Ben Widawsky  writes:

> Here is one proposal to fix the issue. I noticed that only formats
> without alpha were failing. This sucks for RGBX formats (which
> technically aren't fast clearable based on the surface format). The
> hunk for moving the format should happen regardless of this patch.

If we do this we could limit it to just multisample buffers. I think the
problem only happens when sampling the cleared texels but we effectively
never do that for single-sampled surfaces because in that case we always
do a resolve before using them as textures. Being able to fast clear a
single-sampled RGBX window system buffer is probably one of the more
useful use cases so it's probably worth allowing that.

We could also remove the explicit check the those two FLOAT16 formats in
that case because neither of them have alpha bits.

> Neil has another patch which attempts to not disable formats, and
> workaround the hardware issue that we cannot explain. I just wanted to
> put this on the list.

If anyone's interested, the patch is here:

https://github.com/bpeel/mesa/commit/2c7b2ddc8dba388665c258a1182

Obviously I'm not suggesting we merge that patch because it's a massive
kludge.

The test case which we're using to find these issues is here:

http://patchwork.freedesktop.org/patch/64578/

- Neil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Don't write beyond allocated memory.

2015-11-13 Thread Eduardo Lima Mitev
On 11/13/2015 12:36 PM, Juha-Pekka Heikkila wrote:
> Signed-off-by: Juha-Pekka Heikkila 
> ---
>  src/mesa/drivers/dri/i965/brw_eu_validate.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_validate.c 
> b/src/mesa/drivers/dri/i965/brw_eu_validate.c
> index eb57962..2de2ea1 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_validate.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_validate.c
> @@ -39,7 +39,7 @@ cat(struct string *dest, const struct string src)
>  {
> dest->str = realloc(dest->str, dest->len + src.len + 1);
> memcpy(dest->str + dest->len, src.str, src.len);
> -   dest->str[dest->len + src.len + 1] = '\0';
> +   dest->str[dest->len + src.len] = '\0';
> dest->len = dest->len + src.len;
>  }
>  #define CAT(dest, src) cat(, (struct string){src, strlen(src)})
> 

Good catch!

Reviewed-by: Eduardo Lima Mitev 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Clean up context constant initialization code.

2015-11-13 Thread Pohjolainen, Topi
On Thu, Nov 12, 2015 at 03:38:52PM -0800, Kenneth Graunke wrote:
> This was getting pretty out of hand, and with compute partially in place
> and tessellation on the way, it was only going to get worse.
> 
> This patch makes a "stage exists?" predicate and a "number of stages"
> count and uses them to clean up a lot of calculations.  We can just
> loop over shader stages and set things for the ones that exist.  For
> combined counts, we can just multiply by the number of stages.
> 
> It also tries to organize a little bit.
> 
> We should probably use _mesa_has_geometry_shaders/tessellation/compute
> here, but we can't because ctx->Version isn't initialized yet.  Perhaps
> that could be fixed in the future.
> 
> No change in "glxinfo -l" on Broadwell.
> 
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_context.c | 138 
> ++--
>  1 file changed, 58 insertions(+), 80 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
> b/src/mesa/drivers/dri/i965/brw_context.c
> index 2db99c7..89533ae 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -322,64 +322,85 @@ static void
>  brw_initialize_context_constants(struct brw_context *brw)
>  {
> struct gl_context *ctx = >ctx;
> +   const struct brw_compiler *compiler = brw->intelScreen->compiler;
> +
> +   bool stage_exists[MESA_SHADER_STAGES] = {

This could be const.

> +  [MESA_SHADER_VERTEX] = true,
> +  [MESA_SHADER_TESS_CTRL] = false,
> +  [MESA_SHADER_TESS_EVAL] = false,
> +  [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
> +  [MESA_SHADER_FRAGMENT] = true,
> +  [MESA_SHADER_COMPUTE] = 
> _mesa_extension_override_enables.ARB_compute_shader,
> +   };
> +
> +   unsigned num_stages = 0;
> +   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
> +  if (stage_exists[i])
> + num_stages++;
> +   }
>  
> unsigned max_samplers =
>brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
>  
> +   ctx->Const.MaxDualSourceDrawBuffers = 1;
> +   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
> +   ctx->Const.MaxCombinedShaderOutputResources =
> +  MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
> +
> ctx->Const.QueryCounterBits.Timestamp = 36;
>  
> +   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
> +   ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
> +   ctx->Const.MaxRenderbufferSize = 8192;
> +   ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
> +   ctx->Const.Max3DTextureLevels = 12; /* 2048 */
> +   ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
> +   ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
> +   ctx->Const.MaxTextureMbytes = 1536;
> +   ctx->Const.MaxTextureRectSize = 1 << 12;
> +   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
> ctx->Const.StripTextureBorder = true;
> +   if (brw->gen >= 7)
> +  ctx->Const.MaxProgramTextureGatherComponents = 4;
> +   else if (brw->gen == 6)
> +  ctx->Const.MaxProgramTextureGatherComponents = 1;
>  
> ctx->Const.MaxUniformBlockSize = 65536;
> +
> for (int i = 0; i < MESA_SHADER_STAGES; i++) {
>struct gl_program_constants *prog = >Const.Program[i];
> +
> +  if (!stage_exists[i])
> + continue;
> +
> +  prog->MaxTextureImageUnits = max_samplers;
> +
>prog->MaxUniformBlocks = BRW_MAX_UBO;
>prog->MaxCombinedUniformComponents =
>   prog->MaxUniformComponents +
>   ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
> +
> +  prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
> +  prog->MaxAtomicBuffers = BRW_MAX_ABO;
> +  prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 
> 0;
> +  prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
> }
>  
> -   ctx->Const.MaxDualSourceDrawBuffers = 1;
> -   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
> -   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 
> max_samplers;
> -   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
> +   if (ctx->Extensions.ARB_compute_shader)
> +  ctx->Const.MaxShaderStorageBufferBindings += BRW_MAX_SSBO;
> +
> +
> ctx->Const.MaxTextureUnits =
>MIN2(ctx->Const.MaxTextureCoordUnits,
> ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
> -   ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 
> max_samplers;
> -   if (brw->gen >= 6)
> -  ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 
> max_samplers;
> -   else
> -  ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
> -   if (_mesa_extension_override_enables.ARB_compute_shader) {
> -  ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 
> BRW_MAX_TEX_UNIT;
> -  ctx->Const.MaxUniformBufferBindings += BRW_MAX_UBO;
> -   } else {
> -  ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
> -   }
> -   

[Mesa-dev] [PATCH v3] nir/copy_propagate: do not copy-propagate MOV srcs with source modifiers

2015-11-13 Thread Iago Toral Quiroga
If a source operand in a MOV has source modifiers, then we cannot
copy-propagate it from the parent instruction and remove the MOV.

v2: remove the check for source modifiers from is_move() (Jason)

v3: Put the check for source modifiers back into is_move() since
this function is called from copy_prop_alu_src(). Add source
modifiers checks to is_vec() instead.
---

Jason, I had to revert v2 after noticing this, I did not realize that is_move()
was actually called from another place when you suggested removing the check
from there so I did not think that it could possibly break anything and did not
pass v2 through piglit again. Obviously I was wrong, sorry about that :-(

This version does not produce any regressions in piglit in my IVB laptop.

 src/glsl/nir/nir_opt_copy_propagate.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/glsl/nir/nir_opt_copy_propagate.c 
b/src/glsl/nir/nir_opt_copy_propagate.c
index 7d8bdd7..cfc8e331 100644
--- a/src/glsl/nir/nir_opt_copy_propagate.c
+++ b/src/glsl/nir/nir_opt_copy_propagate.c
@@ -55,10 +55,15 @@ static bool is_move(nir_alu_instr *instr)
 
 static bool is_vec(nir_alu_instr *instr)
 {
-   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
   if (!instr->src[i].src.is_ssa)
  return false;
 
+  /* we handle modifiers in a separate pass */
+  if (instr->src[i].abs || instr->src[i].negate)
+ return false;
+   }
+
return instr->op == nir_op_vec2 ||
   instr->op == nir_op_vec3 ||
   instr->op == nir_op_vec4;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] glsl: enable 'shared' keyword also for layout qualifiers

2015-11-13 Thread Kai Wasserbäch
Hi Emil,
Emil Velikov wrote on 12.11.2015 18:45:
> On 12 November 2015 at 15:36, Samuel Iglesias Gonsálvez
>  wrote:
>> On 12/11/15 15:28, Timothy Arceri wrote:
>>> On 13 November 2015 12:22:39 am AEDT, "Samuel Iglesias Gonsálvez" 
>>>  wrote:
 'shared' was added in ARB_uniform_buffer_object and also used
 in ARB_shader_storage_buffer_object.
>>>
>>> Hi Samuel,
>>>
>>> Shared for UBO and SSBOs is not a key word its just an identifier for a 
>>> layout qualifier, are you sure you need to make it available for those 
>>> extensions?
>>>
>>
>> Right. Please ignore this patch.
>>
> In this case, may I suggest that you tag the patch as Rejected (or
> similar) in patchwork [1]. Afaict there are quite a few patches in
> there from yourself and fellow colleagues. Any chance someone can go
> through them and change their status appropriately ?

Since I'm reading this from time to time I was wondering whether Mesa wouldn't
be better served by Phabricator instance? Maybe Matt and Tom, who send in most
of AMD's patches for the AMDGPU backend in LLVM can weigh in here?

I'm using Phabricator myself for a big project and I must say it's really neat.
Most status/meta updates can happen automatically as you commit your changes,
the review state is tracked properly and if a patch was rejected/abandoned that
is usually also clear from the state. Ie. in most cases there is no need to have
multiple people walk through the same list of patches/bugs etc.

(Bonus: for switching over from a Bugzilla to Phabricator, there's a pretty big
precedent with complete porting tools: Wikimedia did that)

Cheers,
Kai



signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Convert scalar_* flags to a scalar_stage array.

2015-11-13 Thread Pohjolainen, Topi
On Thu, Nov 12, 2015 at 03:38:51PM -0800, Kenneth Graunke wrote:
> I was going to add scalar_tcs and scalar_tes flags, and then thought
> better of it and decided to convert this to an array.  Simpler.
> 
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_compiler.h  |  3 +--
>  src/mesa/drivers/dri/i965/brw_context.c   |  2 +-
>  src/mesa/drivers/dri/i965/brw_gs.c|  3 ++-
>  src/mesa/drivers/dri/i965/brw_link.cpp| 11 +---
>  src/mesa/drivers/dri/i965/brw_program.c   |  3 ++-
>  src/mesa/drivers/dri/i965/brw_shader.cpp  | 31 
> ++-
>  src/mesa/drivers/dri/i965/brw_shader.h|  2 --
>  src/mesa/drivers/dri/i965/brw_vec4.cpp|  4 +--
>  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  2 +-
>  src/mesa/drivers/dri/i965/brw_vs.c|  7 ++---
>  10 files changed, 28 insertions(+), 40 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h 
> b/src/mesa/drivers/dri/i965/brw_compiler.h
> index e3a26d6..3f54616 100644
> --- a/src/mesa/drivers/dri/i965/brw_compiler.h
> +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
> @@ -89,8 +89,7 @@ struct brw_compiler {
> void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
> void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
>  
> -   bool scalar_vs;
> -   bool scalar_gs;
> +   bool scalar_stage[MESA_SHADER_STAGES];
> struct gl_shader_compiler_options 
> glsl_compiler_options[MESA_SHADER_STAGES];
>  };
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
> b/src/mesa/drivers/dri/i965/brw_context.c
> index ac6045d..2db99c7 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -525,7 +525,7 @@ brw_initialize_context_constants(struct brw_context *brw)
>ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms =
>   BRW_MAX_IMAGES;
>ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms =
> - (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0);
> + (brw->intelScreen->compiler->scalar_stage[MESA_SHADER_VERTEX] ? 
> BRW_MAX_IMAGES : 0);
>ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms =
>   BRW_MAX_IMAGES;
>ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
> b/src/mesa/drivers/dri/i965/brw_gs.c
> index ed0890f..ad5b242 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs.c
> @@ -87,7 +87,8 @@ brw_codegen_gs_prog(struct brw_context *brw,
> prog_data.base.base.nr_image_params = gs->NumImages;
>  
> brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, >program.Base,
> -   _data.base.base, compiler->scalar_gs);
> +   _data.base.base,
> +   compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
>  
> GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
> b/src/mesa/drivers/dri/i965/brw_link.cpp
> index 2991173..14421d4 100644
> --- a/src/mesa/drivers/dri/i965/brw_link.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_link.cpp
> @@ -66,12 +66,14 @@ brw_lower_packing_builtins(struct brw_context *brw,
> gl_shader_stage shader_type,
> exec_list *ir)
>  {
> +   const struct brw_compiler *compiler = brw->intelScreen->compiler;
> +
> int ops = LOWER_PACK_SNORM_2x16
> | LOWER_UNPACK_SNORM_2x16
> | LOWER_PACK_UNORM_2x16
> | LOWER_UNPACK_UNORM_2x16;
>  
> -   if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
> +   if (compiler->scalar_stage[shader_type]) {
>ops |= LOWER_UNPACK_UNORM_4x8
> | LOWER_UNPACK_SNORM_4x8
> | LOWER_PACK_UNORM_4x8
> @@ -84,7 +86,7 @@ brw_lower_packing_builtins(struct brw_context *brw,
> * lowering is needed. For SOA code, the Half2x16 ops must be
> * scalarized.
> */
> -  if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
> +  if (compiler->scalar_stage[shader_type]) {
>   ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
>   |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
>}
> @@ -103,6 +105,7 @@ process_glsl_ir(gl_shader_stage stage,
>  struct gl_shader *shader)
>  {
> struct gl_context *ctx = >ctx;
> +   const struct brw_compiler *compiler = brw->intelScreen->compiler;
> const struct gl_shader_compiler_options *options =
>>Const.ShaderCompilerOptions[shader->Stage];
>  
> @@ -161,7 +164,7 @@ process_glsl_ir(gl_shader_stage stage,
> do {
>progress = false;
>  
> -  if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) 
> {
> +  if (compiler->scalar_stage[shader->Stage]) {
>   

Re: [Mesa-dev] [PATCH] nir/glsl_to_nir: use _mesa_fls() to compute num_textures

2015-11-13 Thread Jason Ekstrand
On Nov 6, 2015 12:21 PM, "Matt Turner"  wrote:
>
> On Fri, Nov 6, 2015 at 8:27 AM, Juan A. Suarez Romero
>  wrote:
> > Replace the current loop by a direct call to _mesa_fls() function.
> >
> > It also fixes an implicit bug in the current code where num_textures
> > seems to be one value less than it should be when
sh->Program->SamplersUsed > 0.
> >
> > For instance, num_textures is 0 instead of 1 when
> > sh->Program->SamplersUsed is 1.
>
> Looks good to me, and we use _mesa_fls elsewhere to do this same
calculation.
>
> Reviewed-by: Matt Turner 
>
> Jason, was there some reason we weren't doing this? I'm confused why
> we would have had a one-line comment and a 4-line loop when the all we
> needed to do was call one functon?

I didn't want to pull a non-inline mesa function into NIR and add a link
dependency and I was too lazy to move it into util.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 9/9] st/mesa: add support for batch driver queries to perfmon

2015-11-13 Thread Nicolai Hähnle
---
 src/mesa/state_tracker/st_cb_perfmon.c | 75 ++
 src/mesa/state_tracker/st_cb_perfmon.h |  6 +++
 2 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 6c71a13..078d2c4 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,7 +42,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned *batch = NULL;
unsigned num_active_counters = 0;
+   unsigned max_batch_counters = 0;
+   unsigned num_batch_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
@@ -50,6 +53,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,6 +65,8 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
   }
 
   num_active_counters += m->ActiveGroups[gid];
+  if (stg->has_batch)
+ max_batch_counters += m->ActiveGroups[gid];
}
 
stm->active_counters = CALLOC(num_active_counters,
@@ -68,6 +74,9 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
if (!stm->active_counters)
   return false;
 
+   if (max_batch_counters)
+  batch = CALLOC(max_batch_counters, sizeof(*batch));
+
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
@@ -79,13 +88,35 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  struct st_perf_counter_object *cntr =
 >active_counters[stm->num_active_counters];
 
- cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
+ if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
+cntr->batch_index = num_batch_counters;
+batch[num_batch_counters++] = stc->query_type;
+ } else {
+cntr->query = pipe->create_query(pipe, stc->query_type, 0);
+if (!cntr->query)
+   goto fail;
+ }
  ++stm->num_active_counters;
   }
}
+
+   /* Create the batch query. */
+   if (num_batch_counters) {
+  stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters,
+  batch);
+  stm->batch_result = CALLOC(num_batch_counters, 
sizeof(stm->batch_result->batch[0]));
+  if (!stm->batch_query || !stm->batch_result)
+ goto fail;
+   }
+
+   FREE(batch);
return true;
+
+fail:
+   FREE(batch);
+   return false;
 }
 
 static void
@@ -102,6 +133,13 @@ reset_perf_monitor(struct st_perf_monitor_object *stm,
FREE(stm->active_counters);
stm->active_counters = NULL;
stm->num_active_counters = 0;
+
+   if (stm->batch_query) {
+  pipe->destroy_query(pipe, stm->batch_query);
+  stm->batch_query = NULL;
+   }
+   FREE(stm->batch_result);
+   stm->batch_result = NULL;
 }
 
 static struct gl_perf_monitor_object *
@@ -140,9 +178,13 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Start the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  if (!pipe->begin_query(pipe, query))
+  if (query && !pipe->begin_query(pipe, query))
   goto fail;
}
+
+   if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query))
+  goto fail;
+
return true;
 
 fail:
@@ -161,8 +203,12 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Stop the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  pipe->end_query(pipe, query);
+  if (query)
+ pipe->end_query(pipe, query);
}
+
+   if (stm->batch_query)
+  pipe->end_query(pipe, stm->batch_query);
 }
 
 static void
@@ -196,11 +242,16 @@ st_IsPerfMonitorResultAvailable(struct gl_context *ctx,
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
   union pipe_query_result result;
-  if (!pipe->get_query_result(pipe, query, FALSE, )) {
+  if (query && !pipe->get_query_result(pipe, query, FALSE, )) {
  /* 

[Mesa-dev] [PATCH 2/9] gallium/hud: remove unused field in query_info

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/auxiliary/hud/hud_driver_query.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index f14305e..3198ab3 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -48,7 +48,6 @@ struct query_info {
/* Ring of queries. If a query is busy, we use another slot. */
struct pipe_query *query[NUM_QUERIES];
unsigned head, tail;
-   unsigned num_queries;
 
uint64_t last_time;
uint64_t results_cumulative;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Nicolai Hähnle
This was only used to implement an unnecessarily restrictive interpretation
of the spec of AMD_performance_monitor. The spec says

  A performance monitor consists of a number of hardware and software
  counters that can be sampled by the GPU and reported back to the
  application.

I guess one could take this as a requirement that counters _must_ be sampled
by the GPU, but then why are they called _software_ counters? Besides,
there's not much reason _not_ to expose all counters that are available,
and this simplifies the code.
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  3 ---
 src/gallium/include/pipe/p_defines.h  |  7 ---
 src/mesa/state_tracker/st_cb_perfmon.c| 30 ---
 3 files changed, 40 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f539210..a1d6162 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -200,7 +200,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
if (id == NVC0_HW_SM_QUERY_GROUP) {
   if (screen->compute) {
  info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 
  /* Because we can't expose the number of hardware counters needed for
   * each different query, we don't want to allow more than one active
@@ -224,7 +223,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
   if (screen->compute) {
  if (screen->base.class_3d < NVE4_3D_CLASS) {
 info->name = "Performance metrics";
-info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 info->max_active_queries = 1;
 info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
 return 1;
@@ -234,7 +232,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
   info->name = "Driver statistics";
-  info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
   info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   return 1;
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7240154..7f241c8 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -829,12 +829,6 @@ enum pipe_driver_query_type
PIPE_DRIVER_QUERY_TYPE_HZ   = 6,
 };
 
-enum pipe_driver_query_group_type
-{
-   PIPE_DRIVER_QUERY_GROUP_TYPE_CPU = 0,
-   PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1,
-};
-
 /* Whether an average value per frame or a cumulative value should be
  * displayed.
  */
@@ -864,7 +858,6 @@ struct pipe_driver_query_info
 struct pipe_driver_query_group_info
 {
const char *name;
-   enum pipe_driver_query_group_type type;
unsigned max_active_queries;
unsigned num_queries;
 };
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 1bb5be3..4ec6d86 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -65,27 +65,6 @@ find_query_type(struct pipe_screen *screen, const char *name)
return type;
 }
 
-/**
- * Return TRUE if the underlying driver expose GPU counters.
- */
-static bool
-has_gpu_counters(struct pipe_screen *screen)
-{
-   int num_groups, gid;
-
-   num_groups = screen->get_driver_query_group_info(screen, 0, NULL);
-   for (gid = 0; gid < num_groups; gid++) {
-  struct pipe_driver_query_group_info group_info;
-
-  if (!screen->get_driver_query_group_info(screen, gid, _info))
- continue;
-
-  if (group_info.type == PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- return true;
-   }
-   return false;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
@@ -313,12 +292,6 @@ st_init_perfmon(struct st_context *st)
if (!screen->get_driver_query_info || !screen->get_driver_query_group_info)
   return false;
 
-   if (!has_gpu_counters(screen)) {
-  /* According to the spec, GL_AMD_performance_monitor must only
-   * expose GPU counters. */
-  return false;
-   }
-
/* Get the number of available queries. */
num_counters = screen->get_driver_query_info(screen, 0, NULL);
if (!num_counters)
@@ -339,9 +312,6 @@ st_init_perfmon(struct st_context *st)
   if (!screen->get_driver_query_group_info(screen, gid, _info))
  continue;
 
-  if (group_info.type != PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- continue;
-
   g->Name = group_info.name;
   g->MaxActiveCounters = group_info.max_active_queries;
   g->NumCounters = 0;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/10] radeon: count cs dwords separately for query begin and end

2015-11-13 Thread Nicolai Hähnle
This will be important for perfcounter queries.
---
 src/gallium/drivers/radeon/r600_query.c | 33 +++--
 src/gallium/drivers/radeon/r600_query.h |  3 ++-
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 4f89634..f8a30a2 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -342,16 +342,18 @@ static struct pipe_query *r600_query_hw_create(struct 
r600_common_context *rctx,
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
query->result_size = 16 * rctx->max_db;
-   query->num_cs_dw = 6;
+   query->num_cs_dw_begin = 6;
+   query->num_cs_dw_end = 6;
break;
case PIPE_QUERY_TIME_ELAPSED:
query->result_size = 16;
-   query->num_cs_dw = 8;
+   query->num_cs_dw_begin = 8;
+   query->num_cs_dw_end = 8;
query->flags = R600_QUERY_HW_FLAG_TIMER;
break;
case PIPE_QUERY_TIMESTAMP:
query->result_size = 8;
-   query->num_cs_dw = 8;
+   query->num_cs_dw_end = 8;
query->flags = R600_QUERY_HW_FLAG_TIMER |
   R600_QUERY_HW_FLAG_NO_START;
break;
@@ -361,13 +363,15 @@ static struct pipe_query *r600_query_hw_create(struct 
r600_common_context *rctx,
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
query->result_size = 32;
-   query->num_cs_dw = 6;
+   query->num_cs_dw_begin = 6;
+   query->num_cs_dw_end = 6;
query->stream = index;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
/* 11 values on EG, 8 on R600. */
query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 
16;
-   query->num_cs_dw = 6;
+   query->num_cs_dw_begin = 6;
+   query->num_cs_dw_end = 6;
break;
default:
assert(0);
@@ -465,7 +469,9 @@ static void r600_query_hw_emit_start(struct 
r600_common_context *ctx,
 
r600_update_occlusion_query_state(ctx, query->b.type, 1);
r600_update_prims_generated_query_state(ctx, query->b.type, 1);
-   ctx->need_gfx_cs_space(>b, query->num_cs_dw * 2, TRUE);
+
+   ctx->need_gfx_cs_space(>b, query->num_cs_dw_begin + 
query->num_cs_dw_end,
+  TRUE);
 
/* Get a new query buffer if needed. */
if (query->buffer.results_end + query->result_size > 
query->buffer.buf->b.b.width0) {
@@ -482,10 +488,9 @@ static void r600_query_hw_emit_start(struct 
r600_common_context *ctx,
query->ops->emit_start(ctx, query, query->buffer.buf, va);
 
if (query->flags & R600_QUERY_HW_FLAG_TIMER)
-   ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
+   ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw_end;
else
-   ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
-
+   ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw_end;
 }
 
 static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
@@ -546,7 +551,7 @@ static void r600_query_hw_emit_stop(struct 
r600_common_context *ctx,
 
/* The queries which need begin already called this in begin_query. */
if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
-   ctx->need_gfx_cs_space(>b, query->num_cs_dw, FALSE);
+   ctx->need_gfx_cs_space(>b, query->num_cs_dw_end, FALSE);
}
 
/* emit end query */
@@ -558,9 +563,9 @@ static void r600_query_hw_emit_stop(struct 
r600_common_context *ctx,
 
if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) {
if (query->flags & R600_QUERY_HW_FLAG_TIMER)
-   ctx->num_cs_dw_timer_queries_suspend -= 
query->num_cs_dw;
+   ctx->num_cs_dw_timer_queries_suspend -= 
query->num_cs_dw_end;
else
-   ctx->num_cs_dw_nontimer_queries_suspend -= 
query->num_cs_dw;
+   ctx->num_cs_dw_nontimer_queries_suspend -= 
query->num_cs_dw_end;
}
 
r600_update_occlusion_query_state(ctx, query->b.type, -1);
@@ -980,14 +985,14 @@ static unsigned 
r600_queries_num_cs_dw_for_resuming(struct r600_common_context *
 
LIST_FOR_EACH_ENTRY(query, query_list, list) {
/* begin + end */
-   num_dw += query->num_cs_dw * 2;
+   num_dw += query->num_cs_dw_begin + query->num_cs_dw_end;
 
/* Workaround for the fact that
 * num_cs_dw_nontimer_queries_suspend is incremented for every
 * resumed query, which raises the bar in 

[Mesa-dev] [PATCH 08/10] radeon: implement r600_query_hw_get_result via function pointers

2015-11-13 Thread Nicolai Hähnle
We will need the clear_result override for the batch query implementation.
---
 src/gallium/drivers/radeon/r600_query.c | 189 +++-
 src/gallium/drivers/radeon/r600_query.h |   4 +
 2 files changed, 94 insertions(+), 99 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 4b201fd..59e2a58 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -307,11 +307,18 @@ static void r600_query_hw_do_emit_stop(struct 
r600_common_context *ctx,
   struct r600_query_hw *query,
   struct r600_resource *buffer,
   uint64_t va);
+static void r600_query_hw_add_result(struct r600_common_context *ctx,
+struct r600_query_hw *, void *buffer,
+union pipe_query_result *result);
+static void r600_query_hw_clear_result(struct r600_query_hw *,
+  union pipe_query_result *);
 
 static struct r600_query_hw_ops query_hw_default_hw_ops = {
.prepare_buffer = r600_query_hw_prepare_buffer,
.emit_start = r600_query_hw_do_emit_start,
.emit_stop = r600_query_hw_do_emit_stop,
+   .clear_result = r600_query_hw_clear_result,
+   .add_result = r600_query_hw_add_result,
 };
 
 static struct pipe_query *r600_query_hw_create(struct r600_common_context 
*rctx,
@@ -695,7 +702,7 @@ static void r600_query_hw_end(struct r600_common_context 
*rctx,
LIST_DELINIT(>list);
 }
 
-static unsigned r600_query_read_result(char *map, unsigned start_index, 
unsigned end_index,
+static unsigned r600_query_read_result(void *map, unsigned start_index, 
unsigned end_index,
   bool test_status_bit)
 {
uint32_t *current_result = (uint32_t*)map;
@@ -713,47 +720,36 @@ static unsigned r600_query_read_result(char *map, 
unsigned start_index, unsigned
return 0;
 }
 
-static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
-   struct r600_query_hw *query,
-   struct r600_query_buffer *qbuf,
-   boolean wait,
-   union pipe_query_result *result)
+static void r600_query_hw_add_result(struct r600_common_context *ctx,
+struct r600_query_hw *query,
+void *buffer,
+union pipe_query_result *result)
 {
-   unsigned results_base = 0;
-   char *map;
-
-   map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf,
-   PIPE_TRANSFER_READ |
-   (wait ? 0 : 
PIPE_TRANSFER_DONTBLOCK));
-   if (!map)
-   return FALSE;
-
-   /* count all results across all data blocks */
switch (query->b.type) {
-   case PIPE_QUERY_OCCLUSION_COUNTER:
-   while (results_base != qbuf->results_end) {
+   case PIPE_QUERY_OCCLUSION_COUNTER: {
+   unsigned results_base = 0;
+   while (results_base != query->result_size) {
result->u64 +=
-   r600_query_read_result(map + results_base, 0, 
2, true);
+   r600_query_read_result(buffer + results_base, 
0, 2, true);
results_base += 16;
}
break;
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-   while (results_base != qbuf->results_end) {
+   }
+   case PIPE_QUERY_OCCLUSION_PREDICATE: {
+   unsigned results_base = 0;
+   while (results_base != query->result_size) {
result->b = result->b ||
-   r600_query_read_result(map + results_base, 0, 
2, true) != 0;
+   r600_query_read_result(buffer + results_base, 
0, 2, true) != 0;
results_base += 16;
}
break;
+   }
case PIPE_QUERY_TIME_ELAPSED:
-   while (results_base != qbuf->results_end) {
-   result->u64 +=
-   r600_query_read_result(map + results_base, 0, 
2, false);
-   results_base += query->result_size;
-   }
+   result->u64 += r600_query_read_result(buffer, 0, 2, false);
break;
case PIPE_QUERY_TIMESTAMP:
{
-   uint32_t *current_result = (uint32_t*)map;
+   uint32_t *current_result = (uint32_t*)buffer;
result->u64 = (uint64_t)current_result[0] |
  (uint64_t)current_result[1] << 32;
break;
@@ 

[Mesa-dev] [PATCH 00/10] radeon: cleanup and refactor the query implementation

2015-11-13 Thread Nicolai Hähnle
Hi,

in preparation for performance counters, this series makes the implementation
of queries pluggable, and separates query buffer handling from CS emit and
result collection for hardware queries.

Aside from two PIPE_QUERY_GPU_FINISHED-related fixes (using context flush,
picked up from Marek, and fixing a fence leak), this should not affect the
feature set in any way.

Please review!

Thanks,
Nicolai
---
 Makefile.sources   |1 
 r600_pipe_common.c |   46 --
 r600_pipe_common.h |   16 
 r600_query.c   | 1014 ++---
 r600_query.h   |  139 +++
 5 files changed, 734 insertions(+), 482 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/10] radeon: convert software queries to the new style

2015-11-13 Thread Nicolai Hähnle
Software queries are all queries that do not require suspend/resume
and explicit handling of result buffers.

Note that this fixes a fence leak with PIPE_QUERY_GPU_FINISHED, and it
contains Marek's fix to GPU_FINISHED's end_query() handling.
---
 src/gallium/drivers/radeon/r600_query.c | 366 +---
 1 file changed, 194 insertions(+), 172 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index fdab8e3..c7350f1 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -51,15 +51,195 @@ struct r600_query {
unsignednum_cs_dw;
/* linked list of queries */
struct list_headlist;
-   /* for custom non-GPU queries */
+   /* For transform feedback: which stream the query is for */
+   unsigned stream;
+};
+
+/* Queries without buffer handling or suspend/resume. */
+struct r600_query_sw {
+   struct r600_query b;
+
uint64_t begin_result;
uint64_t end_result;
/* Fence for GPU_FINISHED. */
struct pipe_fence_handle *fence;
-   /* For transform feedback: which stream the query is for */
-   unsigned stream;
 };
 
+static void r600_query_sw_destroy(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+   struct pipe_screen *screen = rctx->b.screen;
+   struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+   screen->fence_reference(screen, >fence, NULL);
+   FREE(query);
+}
+
+static enum radeon_value_id winsys_id_from_type(unsigned type)
+{
+   switch (type) {
+   case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
+   case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
+   case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
+   case R600_QUERY_NUM_CS_FLUSHES: return RADEON_NUM_CS_FLUSHES;
+   case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
+   case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
+   case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
+   case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
+   case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
+   case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
+   default: unreachable("query type does not correspond to winsys id");
+   }
+}
+
+static boolean r600_query_sw_begin(struct r600_common_context *rctx,
+  struct r600_query *rquery)
+{
+   struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+   switch(query->b.type) {
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   case PIPE_QUERY_GPU_FINISHED:
+   break;
+   case R600_QUERY_DRAW_CALLS:
+   query->begin_result = rctx->num_draw_calls;
+   break;
+   case R600_QUERY_REQUESTED_VRAM:
+   case R600_QUERY_REQUESTED_GTT:
+   case R600_QUERY_VRAM_USAGE:
+   case R600_QUERY_GTT_USAGE:
+   case R600_QUERY_GPU_TEMPERATURE:
+   case R600_QUERY_CURRENT_GPU_SCLK:
+   case R600_QUERY_CURRENT_GPU_MCLK:
+   query->begin_result = 0;
+   break;
+   case R600_QUERY_BUFFER_WAIT_TIME:
+   case R600_QUERY_NUM_CS_FLUSHES:
+   case R600_QUERY_NUM_BYTES_MOVED: {
+   enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+   query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+   break;
+   }
+   case R600_QUERY_GPU_LOAD:
+   query->begin_result = r600_gpu_load_begin(rctx->screen);
+   break;
+   case R600_QUERY_NUM_COMPILATIONS:
+   query->begin_result = 
p_atomic_read(>screen->num_compilations);
+   break;
+   case R600_QUERY_NUM_SHADERS_CREATED:
+   query->begin_result = 
p_atomic_read(>screen->num_shaders_created);
+   break;
+   default:
+   unreachable("r600_query_sw_begin: bad query type");
+   }
+
+   return TRUE;
+}
+
+static void r600_query_sw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+   struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+   switch(query->b.type) {
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   break;
+   case PIPE_QUERY_GPU_FINISHED:
+   rctx->b.flush(>b, >fence, 0);
+   break;
+   case R600_QUERY_DRAW_CALLS:
+   query->begin_result = rctx->num_draw_calls;
+   break;
+   case R600_QUERY_REQUESTED_VRAM:
+   case R600_QUERY_REQUESTED_GTT:
+   case R600_QUERY_VRAM_USAGE:
+   case R600_QUERY_GTT_USAGE:
+   case R600_QUERY_GPU_TEMPERATURE:
+   case R600_QUERY_CURRENT_GPU_SCLK:
+   case R600_QUERY_CURRENT_GPU_MCLK:
+   case 

Re: [Mesa-dev] [PATCH 07/11] i965: Move postprocess_nir to codegen time

2015-11-13 Thread Jason Ekstrand
On Nov 13, 2015 5:53 AM, "Iago Toral"  wrote:
>
> On Wed, 2015-11-11 at 17:26 -0800, Jason Ekstrand wrote:
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs.cpp  | 11 +--
> >  src/mesa/drivers/dri/i965/brw_nir.c   |  1 -
> >  src/mesa/drivers/dri/i965/brw_vec4.cpp|  5 -
> >  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  6 +-
> >  4 files changed, 18 insertions(+), 5 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > index ad94fa4..b8713ab 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > @@ -43,6 +43,7 @@
> >  #include "brw_wm.h"
> >  #include "brw_fs.h"
> >  #include "brw_cs.h"
> > +#include "brw_nir.h"
> >  #include "brw_vec4_gs_visitor.h"
> >  #include "brw_cfg.h"
> >  #include "brw_dead_control_flow.h"
> > @@ -5459,13 +5460,16 @@ brw_compile_fs(const struct brw_compiler
*compiler, void *log_data,
> > void *mem_ctx,
> > const struct brw_wm_prog_key *key,
> > struct brw_wm_prog_data *prog_data,
> > -   const nir_shader *shader,
> > +   const nir_shader *src_shader,
> > struct gl_program *prog,
> > int shader_time_index8, int shader_time_index16,
> > bool use_rep_send,
> > unsigned *final_assembly_size,
> > char **error_str)
> >  {
> > +   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
> > +   brw_postprocess_nir(shader, compiler->devinfo, true);
> > +
>
> Maybe it is a silly question, but why do we need to clone the shader to
> do this?

Because brw_compile_foo may be called multiple times on the same shader
source. Since brw_postprocess_nir alters the shader source, we need to make
a copy.

> > /* key->alpha_test_func means simulating alpha testing via discards,
> >  * so the shader definitely kills pixels.
> >  */
> > @@ -5618,11 +5622,14 @@ brw_compile_cs(const struct brw_compiler
*compiler, void *log_data,
> > void *mem_ctx,
> > const struct brw_cs_prog_key *key,
> > struct brw_cs_prog_data *prog_data,
> > -   const nir_shader *shader,
> > +   const nir_shader *src_shader,
> > int shader_time_index,
> > unsigned *final_assembly_size,
> > char **error_str)
> >  {
> > +   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
> > +   brw_postprocess_nir(shader, compiler->devinfo, true);
> > +
> > prog_data->local_size[0] = shader->info.cs.local_size[0];
> > prog_data->local_size[1] = shader->info.cs.local_size[1];
> > prog_data->local_size[2] = shader->info.cs.local_size[2];
> > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
b/src/mesa/drivers/dri/i965/brw_nir.c
> > index 21c2648..693b9cd 100644
> > --- a/src/mesa/drivers/dri/i965/brw_nir.c
> > +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> > @@ -391,7 +391,6 @@ brw_create_nir(struct brw_context *brw,
> >
> > brw_preprocess_nir(nir, is_scalar);
> > brw_lower_nir(nir, devinfo, shader_prog, is_scalar);
> > -   brw_postprocess_nir(nir, devinfo, is_scalar);
> >
> > return nir;
> >  }
> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> > index 8350a02..9f75bb6 100644
> > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> > @@ -2028,13 +2028,16 @@ brw_compile_vs(const struct brw_compiler
*compiler, void *log_data,
> > void *mem_ctx,
> > const struct brw_vs_prog_key *key,
> > struct brw_vs_prog_data *prog_data,
> > -   const nir_shader *shader,
> > +   const nir_shader *src_shader,
> > gl_clip_plane *clip_planes,
> > bool use_legacy_snorm_formula,
> > int shader_time_index,
> > unsigned *final_assembly_size,
> > char **error_str)
> >  {
> > +   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
> > +   brw_postprocess_nir(shader, compiler->devinfo, compiler->scalar_vs);
> > +
> > const unsigned *assembly = NULL;
> >
> > unsigned nr_attributes = _mesa_bitcount_64(prog_data->inputs_read);
> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> > index 49c1083..92b15d9 100644
> > --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> > @@ -30,6 +30,7 @@
> >  #include "brw_vec4_gs_visitor.h"
> >  #include "gen6_gs_visitor.h"
> >  #include "brw_fs.h"
> > +#include "brw_nir.h"
> >
> >  namespace brw {
> >
> > @@ -604,7 +605,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
void *log_data,
> > void *mem_ctx,
> > const struct 

Re: [Mesa-dev] [PATCH 4/7] [v2] i965/meta/gen9: Individually fast clear color attachments

2015-11-13 Thread Neil Roberts
Hi,

You don't seem to have included any of the suggestions I made in my
review. Was this deliberate? If not, the main points were:

• You don't need to call brw_fast_clear_init or use_rectlist in the
  function because these are already called before entering it.

• I don't think it's worth creating a framebuffer. Instead you can just
  call _mesa_meta_drawbuffers_from_bitfield(1 << index) in the loop.
  Modifying the draw buffers state should be ok because it's saved in
  the meta state and it's already done for the Gen8 code path.

I went ahead and tried the changes in a patch here:

https://github.com/bpeel/mesa/commit/b2aa8f2d90572392030e5177952bf

It doesn't cause any Jenkins regressions. Feel free to squash it into
the patch if you want, or of course if you prefer to keep your patch as
it is it's up to you.

Regards,
- Neil

Ben Widawsky  writes:

> The impetus for this patch comes from a seemingly benign statement within the
> spec (quoted within the patch). For me, this patch was at some point critical
> for getting stable piglit results (though this did not seem to be the case on 
> a
> branch Chad was working on).
>
> It is very important for clearing multiple color buffer attachments and can be
> observed in the following piglit tests:
> spec/arb_framebuffer_object/fbo-drawbuffers-none glclear
> spec/ext_framebuffer_multisample/blit-multiple-render-targets 0
>
> v2: Doing the framebuffer binding only once (Chad)
> Directly use the renderbuffers from the mt (Chad)
>
> Cc: Chad Versace 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 94 
> +
>  1 file changed, 81 insertions(+), 13 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c 
> b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
> index eac92d4..97444d7 100644
> --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
> +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
> @@ -428,6 +428,71 @@ use_rectlist(struct brw_context *brw, bool enable)
> brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
>  }
>  
> +/**
> + * Individually fast clear each color buffer attachment. On previous gens 
> this
> + * isn't required. The motivation for this comes from one line (which seems 
> to
> + * be specific to SKL+). The list item is in section titled _MCS Buffer for
> + * Render Target(s)_
> + *
> + *   "Since only one RT is bound with a clear pass, only one RT can be 
> cleared
> + *   at a time. To clear multiple RTs, multiple clear passes are required."
> + *
> + * The code follows the same idea as the resolve code which creates a fake 
> FBO
> + * to avoid interfering with too much of the GL state.
> + */
> +static void
> +fast_clear_attachments(struct brw_context *brw,
> +   struct gl_framebuffer *fb,
> +   uint32_t fast_clear_buffers,
> +   struct rect fast_clear_rect)
> +{
> +   assert(brw->gen >= 9);
> +   struct gl_context *ctx = >ctx;
> +   const GLuint old_fb = ctx->DrawBuffer->Name;
> +   GLuint fbo;
> +
> +   _mesa_GenFramebuffers(1, );
> +   _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo);
> +   _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0);
> +
> +   brw_fast_clear_init(brw);
> +   use_rectlist(brw, true);
> +   brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
> +
> +   /* SKL+ also has a resolve mode for compressed render targets and thus 
> more
> +* bits to let us select the type of resolve.  For fast clear resolves, it
> +* turns out we can use the same value as pre-SKL though.
> +*/
> +   set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE);
> +
> +   for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
> +  struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
> +  struct intel_renderbuffer *irb = intel_renderbuffer(rb);
> +  int index = fb->_ColorDrawBufferIndexes[buf];
> +
> +  if ((fast_clear_buffers & (1 << index)) == 0)
> + continue;
> +
> +
> +  _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer,
> + GL_COLOR_ATTACHMENT0, rb,
> + "meta fast clear (per-attachment)");
> +
> +  brw_draw_rectlist(ctx, _clear_rect, MAX2(1, fb->MaxNumLayers));
> +
> +  /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll
> +   * resolve them eventually.
> +   */
> +  irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
> +   }
> +
> +   set_fast_clear_op(brw, 0);
> +   use_rectlist(brw, false);
> +
> +   _mesa_DeleteFramebuffers(1, );
> +   _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, old_fb);
> +}
> +
>  bool
>  brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
>  GLbitfield buffers, bool partial_clear)
> @@ -603,12 +668,27 @@ brw_meta_fast_clear(struct brw_context *brw, struct 
> gl_framebuffer 

[Mesa-dev] [PATCH 04/10] radeon: add query handler function pointers

2015-11-13 Thread Nicolai Hähnle
The goal here is to be able to move the implementation details of hardware-
specific queries (in particular, performance counters) out of the common code.
---
 src/gallium/drivers/radeon/r600_query.c | 73 +
 src/gallium/drivers/radeon/r600_query.h | 16 
 2 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index b79d2d0..fdab8e3 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -26,7 +26,6 @@
 #include "r600_cs.h"
 #include "util/u_memory.h"
 
-
 struct r600_query_buffer {
/* The buffer where query results are stored. */
struct r600_resource*buf;
@@ -39,6 +38,8 @@ struct r600_query_buffer {
 };
 
 struct r600_query {
+   struct r600_query_ops *ops;
+
/* The query buffer and how many results are in it. */
struct r600_query_bufferbuffer;
/* The type of query */
@@ -59,6 +60,23 @@ struct r600_query {
unsigned stream;
 };
 
+static void r600_do_destroy_query(struct r600_common_context *, struct 
r600_query *);
+static boolean r600_do_begin_query(struct r600_common_context *, struct 
r600_query *);
+static void r600_do_end_query(struct r600_common_context *, struct r600_query 
*);
+static boolean r600_do_get_query_result(struct r600_common_context *,
+   struct r600_query *, boolean wait,
+   union pipe_query_result *result);
+static void r600_do_render_condition(struct r600_common_context *,
+struct r600_query *, boolean condition,
+uint mode);
+
+static struct r600_query_ops legacy_query_ops = {
+   .destroy = r600_do_destroy_query,
+   .begin = r600_do_begin_query,
+   .end = r600_do_end_query,
+   .get_result = r600_do_get_query_result,
+   .render_condition = r600_do_render_condition,
+};
 
 static bool r600_is_timer_query(unsigned type)
 {
@@ -366,6 +384,7 @@ static struct pipe_query *r600_create_query(struct 
pipe_context *ctx, unsigned q
return NULL;
 
query->type = query_type;
+   query->ops = _query_ops;
 
switch (query_type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -373,7 +392,6 @@ static struct pipe_query *r600_create_query(struct 
pipe_context *ctx, unsigned q
query->result_size = 16 * rctx->max_db;
query->num_cs_dw = 6;
break;
-   break;
case PIPE_QUERY_TIME_ELAPSED:
query->result_size = 16;
query->num_cs_dw = 8;
@@ -433,7 +451,15 @@ static struct pipe_query *r600_create_query(struct 
pipe_context *ctx, unsigned q
 
 static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query 
*query)
 {
-   struct r600_query *rquery = (struct r600_query*)query;
+   struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+   struct r600_query *rquery = (struct r600_query *)query;
+
+   rquery->ops->destroy(rctx, rquery);
+}
+
+static void r600_do_destroy_query(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
struct r600_query_buffer *prev = rquery->buffer.previous;
 
/* Release all query buffers. */
@@ -445,7 +471,7 @@ static void r600_destroy_query(struct pipe_context *ctx, 
struct pipe_query *quer
}
 
pipe_resource_reference((struct pipe_resource**)>buffer.buf, 
NULL);
-   FREE(query);
+   FREE(rquery);
 }
 
 static boolean r600_begin_query(struct pipe_context *ctx,
@@ -453,6 +479,13 @@ static boolean r600_begin_query(struct pipe_context *ctx,
 {
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
+
+   return rquery->ops->begin(rctx, rquery);
+}
+
+static boolean r600_do_begin_query(struct r600_common_context *rctx,
+  struct r600_query *rquery)
+{
struct r600_query_buffer *prev = rquery->buffer.previous;
 
if (!r600_query_needs_begin(rquery->type)) {
@@ -528,6 +561,12 @@ static void r600_end_query(struct pipe_context *ctx, 
struct pipe_query *query)
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
 
+   rquery->ops->end(rctx, rquery);
+}
+
+static void r600_do_end_query(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
/* Non-GPU queries. */
switch (rquery->type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
@@ -792,11 +831,19 @@ static boolean r600_get_query_buffer_result(struct 
r600_common_context *ctx,
 }
 
 static boolean r600_get_query_result(struct pipe_context *ctx,
-   

[Mesa-dev] [PATCH 01/10] radeon: move get_driver_query_info to r600_query.c

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 46 +
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
 src/gallium/drivers/radeon/r600_query.c   | 49 +++
 3 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 79e624e..41acfbc 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -737,50 +737,6 @@ static uint64_t r600_get_timestamp(struct pipe_screen 
*screen)
rscreen->info.r600_clock_crystal_freq;
 }
 
-static int r600_get_driver_query_info(struct pipe_screen *screen,
- unsigned index,
- struct pipe_driver_query_info *info)
-{
-   struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-   struct pipe_driver_query_info list[] = {
-   {"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, 
PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, 
PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
-   {"requested-VRAM", R600_QUERY_REQUESTED_VRAM, 
{rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"requested-GTT", R600_QUERY_REQUESTED_GTT, 
{rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, 
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
-   {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, 
PIPE_DRIVER_QUERY_TYPE_BYTES,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"VRAM-usage", R600_QUERY_VRAM_USAGE, 
{rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, 
PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"GPU-load", R600_QUERY_GPU_LOAD, {100}},
-   {"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
-   {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, 
PIPE_DRIVER_QUERY_TYPE_HZ},
-   {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, 
PIPE_DRIVER_QUERY_TYPE_HZ},
-   };
-   unsigned num_queries;
-
-   if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
-   num_queries = Elements(list);
-   else if (rscreen->info.drm_major == 3)
-   num_queries = Elements(list) - 3;
-   else
-   num_queries = Elements(list) - 4;
-
-   if (!info)
-   return num_queries;
-
-   if (index >= num_queries)
-   return 0;
-
-   *info = list[index];
-   return 1;
-}
-
 static void r600_fence_reference(struct pipe_screen *screen,
 struct pipe_fence_handle **dst,
 struct pipe_fence_handle *src)
@@ -968,7 +924,6 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
rscreen->b.get_device_vendor = r600_get_device_vendor;
rscreen->b.get_compute_param = r600_get_compute_param;
rscreen->b.get_paramf = r600_get_paramf;
-   rscreen->b.get_driver_query_info = r600_get_driver_query_info;
rscreen->b.get_timestamp = r600_get_timestamp;
rscreen->b.fence_finish = r600_fence_finish;
rscreen->b.fence_reference = r600_fence_reference;
@@ -984,6 +939,7 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
}
 
r600_init_screen_texture_functions(rscreen);
+   r600_init_screen_query_functions(rscreen);
 
rscreen->ws = ws;
rscreen->family = rscreen->info.family;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index b7f1a23..d2c54f3 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -534,6 +534,7 @@ uint64_t r600_gpu_load_begin(struct r600_common_screen 
*rscreen);
 unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
 
 /* r600_query.c */
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
 void r600_query_init(struct r600_common_context *rctx);
 void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
 void r600_resume_nontimer_queries(struct r600_common_context *ctx);
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 9a54025..8aa8774 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1017,6 +1017,50 @@ err:
return;
 }
 

[Mesa-dev] [PATCH 06/10] radeon: convert hardware queries to the new style

2015-11-13 Thread Nicolai Hähnle
Move r600_query and r600_query_hw into the header because we will want to
reuse the buffer handling and suspend/resume logic outside of the common
radeon code.
---
 src/gallium/drivers/radeon/r600_query.c | 281 +++-
 src/gallium/drivers/radeon/r600_query.h |  39 +
 2 files changed, 172 insertions(+), 148 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index c7350f1..eb2a563 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -26,35 +26,6 @@
 #include "r600_cs.h"
 #include "util/u_memory.h"
 
-struct r600_query_buffer {
-   /* The buffer where query results are stored. */
-   struct r600_resource*buf;
-   /* Offset of the next free result after current query data */
-   unsignedresults_end;
-   /* If a query buffer is full, a new buffer is created and the old one
-* is put in here. When we calculate the result, we sum up the samples
-* from all buffers. */
-   struct r600_query_buffer*previous;
-};
-
-struct r600_query {
-   struct r600_query_ops *ops;
-
-   /* The query buffer and how many results are in it. */
-   struct r600_query_bufferbuffer;
-   /* The type of query */
-   unsignedtype;
-   /* Size of the result in memory for both begin_query and end_query,
-* this can be one or two numbers, or it could even be a size of a 
structure. */
-   unsignedresult_size;
-   /* The number of dwords for begin_query or end_query. */
-   unsignednum_cs_dw;
-   /* linked list of queries */
-   struct list_headlist;
-   /* For transform feedback: which stream the query is for */
-   unsigned stream;
-};
-
 /* Queries without buffer handling or suspend/resume. */
 struct r600_query_sw {
struct r600_query b;
@@ -240,23 +211,23 @@ static struct pipe_query *r600_query_sw_create(struct 
pipe_context *ctx,
return (struct pipe_query *)query;
 }
 
-static void r600_do_destroy_query(struct r600_common_context *, struct 
r600_query *);
-static boolean r600_do_begin_query(struct r600_common_context *, struct 
r600_query *);
-static void r600_do_end_query(struct r600_common_context *, struct r600_query 
*);
-static boolean r600_do_get_query_result(struct r600_common_context *,
-   struct r600_query *, boolean wait,
-   union pipe_query_result *result);
-static void r600_do_render_condition(struct r600_common_context *,
-struct r600_query *, boolean condition,
-uint mode);
+void r600_query_hw_destroy(struct r600_common_context *rctx,
+  struct r600_query *rquery)
+{
+   struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+   struct r600_query_buffer *prev = query->buffer.previous;
 
-static struct r600_query_ops legacy_query_ops = {
-   .destroy = r600_do_destroy_query,
-   .begin = r600_do_begin_query,
-   .end = r600_do_end_query,
-   .get_result = r600_do_get_query_result,
-   .render_condition = r600_do_render_condition,
-};
+   /* Release all query buffers. */
+   while (prev) {
+   struct r600_query_buffer *qbuf = prev;
+   prev = prev->previous;
+   pipe_resource_reference((struct pipe_resource**)>buf, 
NULL);
+   FREE(qbuf);
+   }
+
+   pipe_resource_reference((struct pipe_resource**)>buffer.buf, 
NULL);
+   FREE(rquery);
+}
 
 static bool r600_is_timer_query(unsigned type)
 {
@@ -317,6 +288,77 @@ static struct r600_resource *r600_new_query_buffer(struct 
r600_common_context *c
return buf;
 }
 
+static boolean r600_query_hw_begin(struct r600_common_context *, struct 
r600_query *);
+static void r600_query_hw_end(struct r600_common_context *, struct r600_query 
*);
+static boolean r600_query_hw_get_result(struct r600_common_context *,
+   struct r600_query *, boolean wait,
+   union pipe_query_result *result);
+static void r600_do_render_condition(struct r600_common_context *,
+struct r600_query *, boolean condition,
+uint mode);
+
+static struct r600_query_ops query_hw_ops = {
+   .destroy = r600_query_hw_destroy,
+   .begin = r600_query_hw_begin,
+   .end = r600_query_hw_end,
+   .get_result = r600_query_hw_get_result,
+   .render_condition = r600_do_render_condition,
+};
+
+static struct pipe_query *r600_query_hw_create(struct r600_common_context 
*rctx,
+  unsigned query_type,
+   

[Mesa-dev] [PATCH 07/10] radeon: split hw query buffer handling from cs emit

2015-11-13 Thread Nicolai Hähnle
The idea here is that driver queries implemented outside of common code
will use the same query buffer handling with different logic for starting
and stopping the corresponding counters.
---
 src/gallium/drivers/radeon/r600_query.c | 198 +++-
 src/gallium/drivers/radeon/r600_query.h |  20 
 2 files changed, 135 insertions(+), 83 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index eb2a563..4b201fd 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -229,21 +229,10 @@ void r600_query_hw_destroy(struct r600_common_context 
*rctx,
FREE(rquery);
 }
 
-static bool r600_is_timer_query(unsigned type)
+static struct r600_resource *r600_new_query_buffer(struct r600_common_context 
*ctx,
+  struct r600_query_hw *query)
 {
-   return type == PIPE_QUERY_TIME_ELAPSED ||
-  type == PIPE_QUERY_TIMESTAMP;
-}
-
-static bool r600_query_needs_begin(unsigned type)
-{
-   return type != PIPE_QUERY_TIMESTAMP;
-}
-
-static struct r600_resource *r600_new_query_buffer(struct r600_common_context 
*ctx, unsigned type)
-{
-   unsigned j, i, num_results, buf_size = 4096;
-   uint32_t *results;
+   unsigned buf_size = 4096;
 
/* Queries are normally read by the CPU after
 * being written by the gpu, hence staging is probably a good
@@ -253,14 +242,34 @@ static struct r600_resource *r600_new_query_buffer(struct 
r600_common_context *c
pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM,
   PIPE_USAGE_STAGING, buf_size);
 
-   switch (type) {
-   case PIPE_QUERY_OCCLUSION_COUNTER:
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-   results = r600_buffer_map_sync_with_rings(ctx, buf, 
PIPE_TRANSFER_WRITE);
-   memset(results, 0, buf_size);
+   if (query->ops->prepare_buffer)
+   query->ops->prepare_buffer(ctx, query, buf);
+
+   return buf;
+}
+
+static void r600_query_hw_prepare_buffer(struct r600_common_context *ctx,
+struct r600_query_hw *query,
+struct r600_resource *buffer)
+ {
+   uint32_t *results;
+
+   if (query->b.type == PIPE_QUERY_TIME_ELAPSED ||
+   query->b.type == PIPE_QUERY_TIMESTAMP)
+   return;
+
+   results = r600_buffer_map_sync_with_rings(ctx, buffer,
+ PIPE_TRANSFER_WRITE);
+
+   memset(results, 0, buffer->b.b.width0);
+
+   if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
+   query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+   unsigned num_results;
+   unsigned i, j;
 
/* Set top bits for unused backends. */
-   num_results = buf_size / (16 * ctx->max_db);
+   num_results = buffer->b.b.width0 / (16 * ctx->max_db);
for (j = 0; j < num_results; j++) {
for (i = 0; i < ctx->max_db; i++) {
if (!(ctx->backend_mask & (1<max_db;
}
-   break;
-   case PIPE_QUERY_TIME_ELAPSED:
-   case PIPE_QUERY_TIMESTAMP:
-   break;
-   case PIPE_QUERY_PRIMITIVES_EMITTED:
-   case PIPE_QUERY_PRIMITIVES_GENERATED:
-   case PIPE_QUERY_SO_STATISTICS:
-   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-   case PIPE_QUERY_PIPELINE_STATISTICS:
-   results = r600_buffer_map_sync_with_rings(ctx, buf, 
PIPE_TRANSFER_WRITE);
-   memset(results, 0, buf_size);
-   break;
-   default:
-   assert(0);
}
-   return buf;
 }
 
 static boolean r600_query_hw_begin(struct r600_common_context *, struct 
r600_query *);
@@ -305,6 +299,21 @@ static struct r600_query_ops query_hw_ops = {
.render_condition = r600_do_render_condition,
 };
 
+static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
+   struct r600_query_hw *query,
+   struct r600_resource *buffer,
+   uint64_t va);
+static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
+  struct r600_query_hw *query,
+  struct r600_resource *buffer,
+  uint64_t va);
+
+static struct r600_query_hw_ops query_hw_default_hw_ops = {
+   .prepare_buffer = r600_query_hw_prepare_buffer,
+   .emit_start = r600_query_hw_do_emit_start,
+   .emit_stop = r600_query_hw_do_emit_stop,
+};
+
 static struct pipe_query 

[Mesa-dev] [PATCH 09/10] radeon: expose r600_query_hw functions for reuse

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/drivers/radeon/r600_query.c | 30 +-
 src/gallium/drivers/radeon/r600_query.h | 10 ++
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 59e2a58..4f89634 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -282,11 +282,6 @@ static void r600_query_hw_prepare_buffer(struct 
r600_common_context *ctx,
}
 }
 
-static boolean r600_query_hw_begin(struct r600_common_context *, struct 
r600_query *);
-static void r600_query_hw_end(struct r600_common_context *, struct r600_query 
*);
-static boolean r600_query_hw_get_result(struct r600_common_context *,
-   struct r600_query *, boolean wait,
-   union pipe_query_result *result);
 static void r600_do_render_condition(struct r600_common_context *,
 struct r600_query *, boolean condition,
 uint mode);
@@ -321,6 +316,16 @@ static struct r600_query_hw_ops query_hw_default_hw_ops = {
.add_result = r600_query_hw_add_result,
 };
 
+boolean r600_query_hw_init(struct r600_common_context *rctx,
+  struct r600_query_hw *query)
+{
+   query->buffer.buf = r600_new_query_buffer(rctx, query);
+   if (!query->buffer.buf)
+   return FALSE;
+
+   return TRUE;
+}
+
 static struct pipe_query *r600_query_hw_create(struct r600_common_context 
*rctx,
   unsigned query_type,
   unsigned index)
@@ -370,8 +375,7 @@ static struct pipe_query *r600_query_hw_create(struct 
r600_common_context *rctx,
return NULL;
}
 
-   query->buffer.buf = r600_new_query_buffer(rctx, query);
-   if (!query->buffer.buf) {
+   if (!r600_query_hw_init(rctx, query)) {
FREE(query);
return NULL;
}
@@ -645,8 +649,8 @@ static boolean r600_begin_query(struct pipe_context *ctx,
return rquery->ops->begin(rctx, rquery);
 }
 
-static boolean r600_query_hw_begin(struct r600_common_context *rctx,
-  struct r600_query *rquery)
+boolean r600_query_hw_begin(struct r600_common_context *rctx,
+   struct r600_query *rquery)
 {
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
struct r600_query_buffer *prev = query->buffer.previous;
@@ -691,7 +695,7 @@ static void r600_end_query(struct pipe_context *ctx, struct 
pipe_query *query)
rquery->ops->end(rctx, rquery);
 }
 
-static void r600_query_hw_end(struct r600_common_context *rctx,
+void r600_query_hw_end(struct r600_common_context *rctx,
  struct r600_query *rquery)
 {
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
@@ -858,9 +862,9 @@ static void r600_query_hw_clear_result(struct r600_query_hw 
*query,
util_query_clear_result(result, query->b.type);
 }
 
-static boolean r600_query_hw_get_result(struct r600_common_context *rctx,
-   struct r600_query *rquery,
-   boolean wait, union pipe_query_result 
*result)
+boolean r600_query_hw_get_result(struct r600_common_context *rctx,
+struct r600_query *rquery,
+boolean wait, union pipe_query_result *result)
 {
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
struct r600_query_buffer *qbuf;
diff --git a/src/gallium/drivers/radeon/r600_query.h 
b/src/gallium/drivers/radeon/r600_query.h
index 17a9da3..4e357f5 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -122,7 +122,17 @@ struct r600_query_hw {
unsigned stream;
 };
 
+boolean r600_query_hw_init(struct r600_common_context *rctx,
+  struct r600_query_hw *query);
 void r600_query_hw_destroy(struct r600_common_context *rctx,
   struct r600_query *rquery);
+boolean r600_query_hw_begin(struct r600_common_context *rctx,
+   struct r600_query *rquery);
+void r600_query_hw_end(struct r600_common_context *rctx,
+  struct r600_query *rquery);
+boolean r600_query_hw_get_result(struct r600_common_context *rctx,
+struct r600_query *rquery,
+boolean wait,
+union pipe_query_result *result);
 
 #endif /* R600_QUERY_H */
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/10] radeon: cleanup driver query list

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/drivers/radeon/r600_query.c | 84 +
 1 file changed, 55 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 8aa8774..60381b2 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1017,39 +1017,50 @@ err:
return;
 }
 
+#define X(name_, query_type_, type_, result_type_) \
+   { \
+   .name = name_, \
+   .query_type = R600_QUERY_##query_type_, \
+   .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
+   .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
+   .group_id = ~(unsigned)0 \
+   }
+
+static struct pipe_driver_query_info r600_driver_query_list[] = {
+   X("num-compilations",   NUM_COMPILATIONS,   UINT64, 
CUMULATIVE),
+   X("num-shaders-created",NUM_SHADERS_CREATED,UINT64, 
CUMULATIVE),
+   X("draw-calls", DRAW_CALLS, UINT64, 
CUMULATIVE),
+   X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
+   X("requested-GTT",  REQUESTED_GTT,  BYTES, AVERAGE),
+   X("buffer-wait-time",   BUFFER_WAIT_TIME,   MICROSECONDS, 
CUMULATIVE),
+   X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, 
CUMULATIVE),
+   X("num-bytes-moved",NUM_BYTES_MOVED,BYTES, 
CUMULATIVE),
+   X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
+   X("GTT-usage",  GTT_USAGE,  BYTES, AVERAGE),
+   X("GPU-load",   GPU_LOAD,   UINT64, 
AVERAGE),
+   X("temperature",GPU_TEMPERATURE,UINT64, 
AVERAGE),
+   X("shader-clock",   CURRENT_GPU_SCLK,   HZ, AVERAGE),
+   X("memory-clock",   CURRENT_GPU_MCLK,   HZ, AVERAGE),
+};
+
+#undef X
+
+static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
+{
+   if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
+   return Elements(r600_driver_query_list);
+   else if (rscreen->info.drm_major == 3)
+   return Elements(r600_driver_query_list) - 3;
+   else
+   return Elements(r600_driver_query_list) - 4;
+}
+
 static int r600_get_driver_query_info(struct pipe_screen *screen,
  unsigned index,
  struct pipe_driver_query_info *info)
 {
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-   struct pipe_driver_query_info list[] = {
-   {"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, 
PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, 
PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
-   {"requested-VRAM", R600_QUERY_REQUESTED_VRAM, 
{rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"requested-GTT", R600_QUERY_REQUESTED_GTT, 
{rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, 
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
-   {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, 
PIPE_DRIVER_QUERY_TYPE_BYTES,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"VRAM-usage", R600_QUERY_VRAM_USAGE, 
{rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, 
PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"GPU-load", R600_QUERY_GPU_LOAD, {100}},
-   {"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
-   {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, 
PIPE_DRIVER_QUERY_TYPE_HZ},
-   {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, 
PIPE_DRIVER_QUERY_TYPE_HZ},
-   };
-   unsigned num_queries;
-
-   if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
-   num_queries = Elements(list);
-   else if (rscreen->info.drm_major == 3)
-   num_queries = Elements(list) - 3;
-   else
-   num_queries = Elements(list) - 4;
+   unsigned num_queries = r600_get_num_queries(rscreen);
 
if (!info)
return num_queries;
@@ -1057,7 +1068,22 @@ static int r600_get_driver_query_info(struct pipe_screen 
*screen,
if (index >= num_queries)
return 0;
 
-   *info = list[index];
+   *info = r600_driver_query_list[index];
+
+   switch 

[Mesa-dev] [PATCH 03/10] radeon: move R600_QUERY_* constants into a new query header file

2015-11-13 Thread Nicolai Hähnle
More query-related structures will have to be moved into their own
header file to support hardware-specific performance counters.
---
 src/gallium/drivers/radeon/Makefile.sources   |  1 +
 src/gallium/drivers/radeon/r600_pipe_common.h | 15 
 src/gallium/drivers/radeon/r600_query.c   |  1 +
 src/gallium/drivers/radeon/r600_query.h   | 49 +++
 4 files changed, 51 insertions(+), 15 deletions(-)
 create mode 100644 src/gallium/drivers/radeon/r600_query.h

diff --git a/src/gallium/drivers/radeon/Makefile.sources 
b/src/gallium/drivers/radeon/Makefile.sources
index f63790c..d840ff8 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -7,6 +7,7 @@ C_SOURCES := \
r600_pipe_common.c \
r600_pipe_common.h \
r600_query.c \
+   r600_query.h \
r600_streamout.c \
r600_texture.c \
radeon_uvd.c \
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index d2c54f3..419f785 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -47,21 +47,6 @@
 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH   (PIPE_RESOURCE_FLAG_DRV_PRIV << 
1)
 #define R600_RESOURCE_FLAG_FORCE_TILING
(PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 
-#define R600_QUERY_DRAW_CALLS  (PIPE_QUERY_DRIVER_SPECIFIC + 0)
-#define R600_QUERY_REQUESTED_VRAM  (PIPE_QUERY_DRIVER_SPECIFIC + 1)
-#define R600_QUERY_REQUESTED_GTT   (PIPE_QUERY_DRIVER_SPECIFIC + 2)
-#define R600_QUERY_BUFFER_WAIT_TIME(PIPE_QUERY_DRIVER_SPECIFIC + 3)
-#define R600_QUERY_NUM_CS_FLUSHES  (PIPE_QUERY_DRIVER_SPECIFIC + 4)
-#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
-#define R600_QUERY_VRAM_USAGE  (PIPE_QUERY_DRIVER_SPECIFIC + 6)
-#define R600_QUERY_GTT_USAGE   (PIPE_QUERY_DRIVER_SPECIFIC + 7)
-#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8)
-#define R600_QUERY_CURRENT_GPU_SCLK(PIPE_QUERY_DRIVER_SPECIFIC + 9)
-#define R600_QUERY_CURRENT_GPU_MCLK(PIPE_QUERY_DRIVER_SPECIFIC + 10)
-#define R600_QUERY_GPU_LOAD(PIPE_QUERY_DRIVER_SPECIFIC + 11)
-#define R600_QUERY_NUM_COMPILATIONS(PIPE_QUERY_DRIVER_SPECIFIC + 12)
-#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13)
-
 #define R600_CONTEXT_STREAMOUT_FLUSH   (1u << 0)
 #define R600_CONTEXT_PRIVATE_FLAG  (1u << 1)
 
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 60381b2..b79d2d0 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -22,6 +22,7 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include "r600_query.h"
 #include "r600_cs.h"
 #include "util/u_memory.h"
 
diff --git a/src/gallium/drivers/radeon/r600_query.h 
b/src/gallium/drivers/radeon/r600_query.h
new file mode 100644
index 000..fc8b47b
--- /dev/null
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *  Nicolai Hähnle 
+ *
+ */
+
+#ifndef R600_QUERY_H
+#define R600_QUERY_H
+
+#include "pipe/p_defines.h"
+
+#define R600_QUERY_DRAW_CALLS  (PIPE_QUERY_DRIVER_SPECIFIC + 0)
+#define R600_QUERY_REQUESTED_VRAM  (PIPE_QUERY_DRIVER_SPECIFIC + 1)
+#define R600_QUERY_REQUESTED_GTT   (PIPE_QUERY_DRIVER_SPECIFIC + 2)
+#define R600_QUERY_BUFFER_WAIT_TIME(PIPE_QUERY_DRIVER_SPECIFIC + 3)
+#define R600_QUERY_NUM_CS_FLUSHES  (PIPE_QUERY_DRIVER_SPECIFIC + 4)
+#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
+#define R600_QUERY_VRAM_USAGE  (PIPE_QUERY_DRIVER_SPECIFIC + 6)
+#define R600_QUERY_GTT_USAGE   

Re: [Mesa-dev] [Nouveau] llvm TGSI backend (WIP) questions

2015-11-13 Thread Connor Abbott
On Fri, Nov 13, 2015 at 9:38 AM, Ilia Mirkin  wrote:
> On Fri, Nov 13, 2015 at 9:25 AM, Emil Velikov  
> wrote:
>> Hello Hans,
>>
>> Not to muddy the waters or anything, have you thought about the NIR
>> integration that Rob was thinking about ?
>> I'm pretty sure he'll be happy to have extra people helping him out.
>
> How would that in any way plug into llvm or nouveau? There's no OpenCL
> C -> NIR, and there's no NIR -> nv50 IR...
>
>   -ilia

Not to mention that there's no support for unstructured control flow
in NIR right now, which is a requirement for OpenCL. There might be,
but don't count on it.

Personally, I would think that the best thing long-term would be to
add SPIR-V as a possible IR and convert OpenCL C, since TGSI is...
err... less than perfect, for a variety of reasons, and adding a
SPIR-V parser is going to be easier and more stable than integrating
into the LLVM interfaces. Unfortunately, the final version of the spec
isn't released yet, and the only tool for producing it is currently
based on an older version of LLVM, but people are working on both
problems and at least one of them isn't going to be a problem very
soon :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: set matrix_stride for non matrices with atomic counter buffers

2015-11-13 Thread Tapani Pälli

On 11/13/2015 05:57 PM, Ilia Mirkin wrote:

On Fri, Nov 13, 2015 at 10:53 AM, Tapani Pälli  wrote:

On 11/12/2015 05:47 PM, Ilia Mirkin wrote:

On Mon, Nov 2, 2015 at 6:36 AM, Tapani Pälli 
wrote:

Patch sets matrix_stride as 0 for non matrix uniforms that are in a
atomic counter buffer. Matrix stride calculation for actual matrix
uniforms is done during link_assign_uniform_locations.

  From ARB_program_interface_query specification:

GL_MATRIX_STRIDE:

 "For active variables not declared as a matrix or array of matrices,
 zero is written to .  For active variables not backed by a
 buffer object, -1 is written to , regardless of the variable
 type."

Signed-off-by: Tapani Pälli 
---
   src/glsl/link_atomics.cpp | 2 ++
   1 file changed, 2 insertions(+)

diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp
index cdcc06d..3aa52db 100644
--- a/src/glsl/link_atomics.cpp
+++ b/src/glsl/link_atomics.cpp
@@ -240,6 +240,8 @@ link_assign_atomic_counter_resources(struct
gl_context *ctx,
storage->offset = var->data.atomic.offset;
storage->array_stride = (var->type->is_array() ?

var->type->without_array()->atomic_size() : 0);
+ if (!var->type->is_matrix())
+storage->matrix_stride = 0;

Can atomics ever be in matrices?


Nope, but one can query matrix stride property of a uniform that is 'backed
by' atomic counter buffer.

Right, I get that... but why the if (!var->type->is_matrix())?


Yes that's right, we should be able to just initialize matrix_stride as 
0 without check. I'll test this and then check can be removed.


// Tapani

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] Make generation of framebuffer and renderbuffer id's threadsafe

2015-11-13 Thread Samuel Maroy
This should fix the issue described in
https://bugs.freedesktop.org/show_bug.cgi?id=92633

---
 src/mesa/main/fbobject.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index fe6bdc2..6398ff6 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1637,6 +1637,8 @@ create_render_buffers(struct gl_context *ctx, GLsizei n, 
GLuint *renderbuffers,
if (!renderbuffers)
   return;

+   mtx_lock(>Shared->Mutex);
+
first = _mesa_HashFindFreeKeyBlock(ctx->Shared->RenderBuffers, n);

for (i = 0; i < n; i++) {
@@ -1647,11 +1649,10 @@ create_render_buffers(struct gl_context *ctx, GLsizei 
n, GLuint *renderbuffers,
  allocate_renderbuffer(ctx, name, func);
   } else {
  /* insert a dummy renderbuffer into the hash table */
- mtx_lock(>Shared->Mutex);
  _mesa_HashInsert(ctx->Shared->RenderBuffers, name, 
);
- mtx_unlock(>Shared->Mutex);
   }
}
+   mtx_unlock(>Shared->Mutex);
 }


@@ -2650,6 +2651,7 @@ create_framebuffers(GLsizei n, GLuint *framebuffers, bool 
dsa)
if (!framebuffers)
   return;

+   mtx_lock(>Shared->Mutex);
first = _mesa_HashFindFreeKeyBlock(ctx->Shared->FrameBuffers, n);

for (i = 0; i < n; i++) {
@@ -2660,16 +2662,17 @@ create_framebuffers(GLsizei n, GLuint *framebuffers, 
bool dsa)
  fb = ctx->Driver.NewFramebuffer(ctx, framebuffers[i]);
  if (!fb) {
 _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func);
-return;
+goto beach;
  }
   }
   else
  fb = 

-  mtx_lock(>Shared->Mutex);
   _mesa_HashInsert(ctx->Shared->FrameBuffers, name, fb);
-  mtx_unlock(>Shared->Mutex);
}
+
+beach:
+   mtx_unlock(>Shared->Mutex);
 }


--
2.1.4

This message is subject to the following terms and conditions: MAIL 
DISCLAIMER
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir/glsl_to_nir: use _mesa_fls() to compute num_textures

2015-11-13 Thread Juan A. Suarez Romero
On Fri, 2015-11-13 at 07:37 -0800, Jason Ekstrand wrote:
> I didn't want to pull a non-inline mesa function into NIR and add a
> link dependency and I was too lazy to move it into util.


But at this moment _mesa_fls() is an inline function. So I guess it is
safe to push it, isn't it?


J.A.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: error out in indirect draw when vertex bindings mismatch

2015-11-13 Thread Tapani Pälli

On 11/13/2015 03:40 PM, Samuel Iglesias Gonsálvez wrote:


On 13/11/15 11:32, Tapani Pälli wrote:

Patch adds additional mask for tracking which vertex buffer bindings
are set. This array can be directly compared to which vertex arrays
are enabled and should match when drawing.

Fixes following CTS tests:

ES31-CTS.draw_indirect.negative-noVBO-arrays
ES31-CTS.draw_indirect.negative-noVBO-elements

Signed-off-by: Tapani Pälli 
---
  src/mesa/main/api_validate.c | 13 +
  src/mesa/main/mtypes.h   |  3 +++
  src/mesa/main/varray.c   |  5 +
  3 files changed, 21 insertions(+)

diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index a490189..e82e89a 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -710,6 +710,19 @@ valid_draw_indirect(struct gl_context *ctx,
return GL_FALSE;
 }
  
+   /* From OpenGL ES 3.1 spec. section 10.5:

+* "An INVALID_OPERATION error is generated if zero is bound to
+* VERTEX_ARRAY_BINDING, DRAW_INDIRECT_BUFFER or to any enabled
+* vertex array."
+*
+* Here we check that vertex buffer bindings match with enabled
+* vertex arrays.
+*/
+   if (ctx->Array.VAO->_Enabled != ctx->Array.VAO->VertexBindingMask) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name);
+  return GL_FALSE;
+   }
+
 if (!_mesa_valid_prim_mode(ctx, mode, name))
return GL_FALSE;
  
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h

index 4efdf1e..6c6187f 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1419,6 +1419,9 @@ struct gl_vertex_array_object
 /** Vertex buffer bindings */
 struct gl_vertex_buffer_binding VertexBinding[VERT_ATTRIB_MAX];
  
+   /** Mask indicating which binding points are set. */

+   GLbitfield64 VertexBindingMask;
+
 /** Mask of VERT_BIT_* values indicating which arrays are enabled */
 GLbitfield64 _Enabled;
  
diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c

index 887d0c0..0a94c5a 100644
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -174,6 +174,11 @@ bind_vertex_buffer(struct gl_context *ctx,
binding->Offset = offset;
binding->Stride = stride;
  
+  if (vbo == ctx->Shared->NullBufferObj)

+ vao->VertexBindingMask &= ~VERT_BIT(index);
+  else
+ vao->VertexBindingMask |= VERT_BIT(index);
+

Should't it be VERT_BIT_GENERIC()?


I used VERT_BIT because that is used when enabling vertex arrays and 
this mask should match that one.



Sam


vao->NewArrays |= binding->_BoundArrays;
 }
  }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/7] [v2] i965/meta/gen9: Individually fast clear color attachments

2015-11-13 Thread Ben Widawsky
On Fri, Nov 13, 2015 at 04:55:33PM +0100, Neil Roberts wrote:
> Hi,
> 
> You don't seem to have included any of the suggestions I made in my
> review. Was this deliberate? If not, the main points were:
> 

It was not intentional, I apologize. It just got lost on top of Chad's feedback
which conflicted with yours in these two statements - he asked for something
similar and I didn't go back to check exactly what you said, assuming it was the
same thing.

> • You don't need to call brw_fast_clear_init or use_rectlist in the
>   function because these are already called before entering it.
> 
> • I don't think it's worth creating a framebuffer. Instead you can just
>   call _mesa_meta_drawbuffers_from_bitfield(1 << index) in the loop.
>   Modifying the draw buffers state should be ok because it's saved in
>   the meta state and it's already done for the Gen8 code path.
> 
> I went ahead and tried the changes in a patch here:
> 
> https://github.com/bpeel/mesa/commit/b2aa8f2d90572392030e5177952bf
> 
> It doesn't cause any Jenkins regressions. Feel free to squash it into
> the patch if you want, or of course if you prefer to keep your patch as
> it is it's up to you.

Thanks a lot, I will squash it in - and sorry again about ignoring your
feedback.

> 
> Regards,
> - Neil
> 
> Ben Widawsky  writes:

[snip]

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Can't get OpenGL 3.x inside VMware Workstation 12 (Ubuntu guest)

2015-11-13 Thread Brian Paul

Great!  Glad to hear it.  I'll check in the updated documentation today.

-Brian

On 11/13/2015 03:03 AM, Valera Rozuvan wrote:

Hi Brian,

The updated instructions worked for me. Thank you = )

Regards,
Valera Rozuvan | 
https://urldefense.proofpoint.com/v2/url?u=http-3A__valera.rozuvan.net_=BQIBaQ=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8=23mynH2CtHPfImZY-vSqq61hI3-5ZjhRGipoz9K8OJc=gg_NyTw0i8suRhBXUgjcqfHlEiHVvyOHUoboATZm7LU=

Skype: valera.rozuvan
E-mail: valera.rozu...@gmail.com
Phone: +38 (050) 837-29-73



On Wed, Nov 11, 2015 at 11:18 PM, Emil Velikov  wrote:

On 11 November 2015 at 19:51, Brian Paul  wrote:

On 11/11/2015 11:38 AM, Emil Velikov wrote:


On 11 November 2015 at 18:25, Thomas Hellstrom 
wrote:


On 11/11/2015 07:07 PM, Brian Paul wrote:


On 11/11/2015 10:44 AM, Emil Velikov wrote:


On 11 November 2015 at 16:48, Brian Paul  wrote:


On 11/11/2015 08:44 AM, Emil Velikov wrote:





I have seen similar type of documents in the past, most of which
going
out of date very quickly due to distribution changes and/or others.
Wondering how you'll feel about "check your distro and add svga to
the
gallium-drivers array" style of instructions ?




I'm afraid I don't quite understand what you're saying there.  Can you
elaborate?



Rather than walking through the requirements, configure and make/make
install steps, just forward people to the distro specific wiki on "how
to build mesa/kernel" and explicitly mention the differences:
mesa:
- XA must be enabled: --enable-xa
- svga must be listed in the gallium drivers:
--with-gallium-drivers=svga...

kernel:
- Set DRM_VMWGFX

others...



I guess I've never seen those wikis.  I'd have to search for them, but
I really don't have the time now.

We actually have an in-house shell script that installs all the
pre-req packages, pulls the git trees, builds and installs for a
variety of guest OSes.  But it has some VMware-specific stuff that I'd
have to trim out before making public.




Related: does the upstream [1] vmwgfx module work well when combined
with upstream core drm across different versions ? Considering how
well Thomas is handling upstreaming shouldn't the module from the
kernel be recommended ?



Either should be fine at this point but the build instructions cover
the case of one having an older distro that may not have the
GL3-enabled kernel module already.



The upstream[1] vmwgfx module should work well with any linux kernel
dating back to 2.6.32 unless the distro has changed the kernel API from
the base version. It ships with builtin stripped drm and ttm to handle
compatibility issues, and is intended for people (mostly including
ourselves and our QA team) that want to try out new features without
installing a completely new kernel.


Ok seems that my point is too subtle, so I'll try from another angle.

The wiki instructions say "nuke he vmwgfx.ko module" and implicitly
"keep drm.ko". If we ignore the unlikely cases where either one and/or
both is built-in, we can have a case where the new vmwgfx is build
against core drm from the upstream, yet the downstream drm module
is/gets loaded. As core drm often goes through various changes, you
can see how bad things are likely to happen.



Well, the above-mentioned build script doesn't touch drm.ko and works on
about 14 different versions of Ubuntu, Mint, Fedora, RHEL, etc. so I don't
think we've ever seen that conflict.  But if someone's doing their own
kernel/graphics builds/installs, who knows.  If it comes up, we'll just have
to address it.


Ouch... I see what's happening here. You're not using any of the
kernel core drm/ttm/foo - you're just static linking the local ones
into vmwgfx.ko. This will explain why the lack of issues.

Well played guys !

Cheers,
Emil


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Make generation of framebuffer and renderbuffer id's threadsafe

2015-11-13 Thread Brian Paul

Hi Samuel,

The subject line should probably be something like:

"mesa: add locking in create_render_buffers() and create_framebuffers()"

Then, the comment body:

"""
This makes generation of framebuffer and renderbuffer id's threadsafe

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92633
"""


On 11/13/2015 03:57 AM, Samuel Maroy wrote:

This should fix the issue described in
https://bugs.freedesktop.org/show_bug.cgi?id=92633

---
  src/mesa/main/fbobject.c | 13 -
  1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index fe6bdc2..6398ff6 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1637,6 +1637,8 @@ create_render_buffers(struct gl_context *ctx, GLsizei n, 
GLuint *renderbuffers,
 if (!renderbuffers)
return;

+   mtx_lock(>Shared->Mutex);
+
 first = _mesa_HashFindFreeKeyBlock(ctx->Shared->RenderBuffers, n);

 for (i = 0; i < n; i++) {
@@ -1647,11 +1649,10 @@ create_render_buffers(struct gl_context *ctx, GLsizei 
n, GLuint *renderbuffers,
   allocate_renderbuffer(ctx, name, func);


This function also calls mtx_lock(ctx->Shared->Mutex) so it will be a 
recursive lock.


In shared.c we init the mutex with:

   mtx_init(>Mutex, mtx_plain);

So we should probably use (mtx_plain | mtx_recursive) there.

I think someone else spotted this a while ago but it looks like nothing 
was changed.  We should probably do an audit of our locking to check for 
other places where this might happen.





} else {
   /* insert a dummy renderbuffer into the hash table */
- mtx_lock(>Shared->Mutex);
   _mesa_HashInsert(ctx->Shared->RenderBuffers, name, 
);
- mtx_unlock(>Shared->Mutex);
}
 }
+   mtx_unlock(>Shared->Mutex);


I'd insert one blank line before the new mtx_unlock().


  }


@@ -2650,6 +2651,7 @@ create_framebuffers(GLsizei n, GLuint *framebuffers, bool 
dsa)
 if (!framebuffers)
return;

+   mtx_lock(>Shared->Mutex);
 first = _mesa_HashFindFreeKeyBlock(ctx->Shared->FrameBuffers, n);

 for (i = 0; i < n; i++) {
@@ -2660,16 +2662,17 @@ create_framebuffers(GLsizei n, GLuint *framebuffers, 
bool dsa)
   fb = ctx->Driver.NewFramebuffer(ctx, framebuffers[i]);
   if (!fb) {
  _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func);
-return;
+goto beach;


Cute, but I think some people would prefer "goto cleanup" or something 
like that.



   }
}
else
   fb = 

-  mtx_lock(>Shared->Mutex);
_mesa_HashInsert(ctx->Shared->FrameBuffers, name, fb);
-  mtx_unlock(>Shared->Mutex);
 }
+
+beach:
+   mtx_unlock(>Shared->Mutex);
  }



-Brian



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/11] nir/lower_tex: Report progress

2015-11-13 Thread Jason Ekstrand
On Nov 13, 2015 5:25 AM, "Iago Toral"  wrote:
>
> On Wed, 2015-11-11 at 17:26 -0800, Jason Ekstrand wrote:
> > ---
> >  src/glsl/nir/nir.h   |  2 +-
> >  src/glsl/nir/nir_lower_tex.c | 19 +++
> >  2 files changed, 16 insertions(+), 5 deletions(-)
> >
> > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> > index 41125b1..2299ece 100644
> > --- a/src/glsl/nir/nir.h
> > +++ b/src/glsl/nir/nir.h
> > @@ -1981,7 +1981,7 @@ typedef struct nir_lower_tex_options {
> > unsigned saturate_r;
> >  } nir_lower_tex_options;
> >
> > -void nir_lower_tex(nir_shader *shader,
> > +bool nir_lower_tex(nir_shader *shader,
> > const nir_lower_tex_options *options);
> >
> >  void nir_lower_idiv(nir_shader *shader);
> > diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
> > index 8aaa48a..21ed103 100644
> > --- a/src/glsl/nir/nir_lower_tex.c
> > +++ b/src/glsl/nir/nir_lower_tex.c
> > @@ -41,6 +41,7 @@
> >  typedef struct {
> > nir_builder b;
> > const nir_lower_tex_options *options;
> > +   bool progress;
> >  } lower_tex_state;
> >
> >  static void
> > @@ -239,15 +240,21 @@ nir_lower_tex_block(nir_block *block, void
*void_state)
> >/* If we are clamping any coords, we must lower projector first
> > * as clamping happens *after* projection:
> > */
> > -  if (lower_txp || sat_mask)
> > +  if (lower_txp || sat_mask) {
> >   project_src(b, tex);
> > + state->progress = true;
> > +  }
> >
> >if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) &&
> > -  state->options->lower_rect)
> > +  state->options->lower_rect) {
> >   lower_rect(b, tex);
> > + state->progress = true;
> > +  }
> >
> > -  if (sat_mask)
> > +  if (sat_mask) {
> >   saturate_src(b, tex, sat_mask);
> > + state->progress = true;
> > +  }
> > }
> >
> > return true;
> > @@ -264,13 +271,17 @@ nir_lower_tex_impl(nir_function_impl *impl,
lower_tex_state *state)
> > nir_metadata_dominance);
> >  }
> >
> > -void
> > +bool
> >  nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
> >  {
> > lower_tex_state state;
> > state.options = options;
> > +   state.progress = false;
> > +
> > nir_foreach_overload(shader, overload) {
> >if (overload->impl)
> >   nir_lower_tex_impl(overload->impl, );
> > }
> > +
> > +   return state.progress;
> >  }
>
> If we are making this change then we also want to make the call to this
> pass use OPT() instead of OPT_V() in brw_preprocess_nir(), so it is
> consistent with patch 3 in this series.

Good call. Will do.

> With that change,
> Reviewed-by: Iago Toral Quiroga 
>
> Iago
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 8/9] gallium/hud: add support for batch queries

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/auxiliary/hud/hud_context.c  |  24 ++-
 src/gallium/auxiliary/hud/hud_driver_query.c | 248 +++
 src/gallium/auxiliary/hud/hud_private.h  |  13 +-
 3 files changed, 240 insertions(+), 45 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index ffe30b8..bcef701 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -57,6 +57,7 @@ struct hud_context {
struct cso_context *cso;
struct u_upload_mgr *uploader;
 
+   struct hud_batch_query_context *batch_query;
struct list_head pane_list;
 
/* states */
@@ -510,6 +511,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
hud_alloc_vertices(hud, >text, 4 * 512, 4 * sizeof(float));
 
/* prepare all graphs */
+   hud_batch_query_update(hud->batch_query);
+
LIST_FOR_EACH_ENTRY(pane, >pane_list, head) {
   LIST_FOR_EACH_ENTRY(gr, >graph_list, head) {
  gr->query_new_value(gr);
@@ -903,17 +906,21 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
   }
   else if (strcmp(name, "samples-passed") == 0 &&
has_occlusion_query(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "samples-passed",
+ hud_pipe_query_install(>batch_query, pane, hud->pipe,
+"samples-passed",
 PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else if (strcmp(name, "primitives-generated") == 0 &&
has_streamout(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
+ hud_pipe_query_install(>batch_query, pane, hud->pipe,
+"primitives-generated",
 PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else {
  boolean processed = FALSE;
@@ -938,17 +945,19 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
if (strcmp(name, pipeline_statistics_names[i]) == 0)
   break;
 if (i < Elements(pipeline_statistics_names)) {
-   hud_pipe_query_install(pane, hud->pipe, name,
+   hud_pipe_query_install(>batch_query, pane, hud->pipe, name,
   PIPE_QUERY_PIPELINE_STATISTICS, i,
   0, PIPE_DRIVER_QUERY_TYPE_UINT64,
-  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+  0);
processed = TRUE;
 }
  }
 
  /* driver queries */
  if (!processed) {
-if (!hud_driver_query_install(pane, hud->pipe, name)){
+if (!hud_driver_query_install(>batch_query, pane, hud->pipe,
+  name)) {
fprintf(stderr, "gallium_hud: unknown driver query '%s'\n", 
name);
 }
  }
@@ -1287,6 +1296,7 @@ hud_destroy(struct hud_context *hud)
   FREE(pane);
}
 
+   hud_batch_query_cleanup(>batch_query);
pipe->delete_fs_state(pipe, hud->fs_color);
pipe->delete_fs_state(pipe, hud->fs_text);
pipe->delete_vs_state(pipe, hud->vs);
diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index 3198ab3..abc9f54 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -34,13 +34,149 @@
 #include "hud/hud_private.h"
 #include "pipe/p_screen.h"
 #include "os/os_time.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include 
 
+// Must be a power of two
 #define NUM_QUERIES 8
 
+struct hud_batch_query_context {
+   struct pipe_context *pipe;
+   unsigned num_query_types;
+   unsigned allocated_query_types;
+   unsigned *query_types;
+
+   boolean failed;
+   struct pipe_query *query[NUM_QUERIES];
+   union pipe_query_result *result[NUM_QUERIES];
+   unsigned head, pending, results;
+};
+
+void
+hud_batch_query_update(struct hud_batch_query_context *bq)
+{
+   struct pipe_context *pipe;
+
+   if (!bq || bq->failed)
+  return;
+
+   pipe = bq->pipe;
+
+   if (bq->query[bq->head])
+  pipe->end_query(pipe, bq->query[bq->head]);
+
+   bq->results = 0;
+
+   while (bq->pending) {
+  unsigned idx = (bq->head - bq->pending + 1) % NUM_QUERIES;
+  

[Mesa-dev] [PATCH 6/9] st/mesa: maintain active perfmon counters in an array

2015-11-13 Thread Nicolai Hähnle
It is easy enough to pre-determine the required size, and arrays are
generally better behaved especially when they get large.
---
 src/mesa/state_tracker/st_cb_perfmon.c | 78 --
 src/mesa/state_tracker/st_cb_perfmon.h | 18 
 2 files changed, 55 insertions(+), 41 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index ec12eb2..6c71a13 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,15 +42,14 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned num_active_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
 
-   /* Create a query for each active counter. */
+   /* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
-  const struct st_perf_monitor_group *stg = >perfmon[gid];
-  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,19 +60,29 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
+  num_active_counters += m->ActiveGroups[gid];
+   }
+
+   stm->active_counters = CALLOC(num_active_counters,
+ sizeof(*stm->active_counters));
+   if (!stm->active_counters)
+  return false;
+
+   /* Create a query for each active counter. */
+   for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
+  const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;
+
   BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = >counters[cid];
- struct st_perf_counter_object *cntr;
-
- cntr = CALLOC_STRUCT(st_perf_counter_object);
- if (!cntr)
-return false;
+ struct st_perf_counter_object *cntr =
+>active_counters[stm->num_active_counters];
 
  cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
-
- list_addtail(>list, >active_counters);
+ ++stm->num_active_counters;
   }
}
return true;
@@ -83,24 +92,24 @@ static void
 reset_perf_monitor(struct st_perf_monitor_object *stm,
struct pipe_context *pipe)
 {
-   struct st_perf_counter_object *cntr, *tmp;
+   unsigned i;
 
-   LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, >active_counters, list) {
-  if (cntr->query)
- pipe->destroy_query(pipe, cntr->query);
-  list_del(>list);
-  free(cntr);
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (query)
+ pipe->destroy_query(pipe, query);
}
+   FREE(stm->active_counters);
+   stm->active_counters = NULL;
+   stm->num_active_counters = 0;
 }
 
 static struct gl_perf_monitor_object *
 st_NewPerfMonitor(struct gl_context *ctx)
 {
struct st_perf_monitor_object *stq = 
ST_CALLOC_STRUCT(st_perf_monitor_object);
-   if (stq) {
-  list_inithead(>active_counters);
+   if (stq)
   return >base;
-   }
return NULL;
 }
 
@@ -119,9 +128,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
-   if (LIST_IS_EMPTY(>active_counters)) {
+   if (!stm->num_active_counters) {
   /* Create a query for each active counter before starting
* a new monitoring session. */
   if (!init_perf_monitor(ctx, m))
@@ -129,8 +138,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
}
 
/* Start the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, >active_counters, list) {
-  if (!pipe->begin_query(pipe, cntr->query))
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (!pipe->begin_query(pipe, query))
   goto fail;
}
return true;
@@ -146,11 +156,13 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
/* Stop the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, >active_counters, list)
-  

[Mesa-dev] [PATCH 3/9] st/mesa: map semantic driver query types to underlying type

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/include/pipe/p_defines.h   | 2 ++
 src/mesa/state_tracker/st_cb_perfmon.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7f241c8..7ed9f6d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -791,6 +791,8 @@ union pipe_query_result
/* PIPE_QUERY_PRIMITIVES_GENERATED */
/* PIPE_QUERY_PRIMITIVES_EMITTED */
/* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+   /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */
/* PIPE_DRIVER_QUERY_TYPE_HZ */
uint64_t u64;
 
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 4ec6d86..dedb8f5 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -334,6 +334,9 @@ st_init_perfmon(struct st_context *st)
  c->Name = info.name;
  switch (info.type) {
 case PIPE_DRIVER_QUERY_TYPE_UINT64:
+case PIPE_DRIVER_QUERY_TYPE_BYTES:
+case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+case PIPE_DRIVER_QUERY_TYPE_HZ:
c->Minimum.u64 = 0;
c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1;
c->Type = GL_UNSIGNED_INT64_AMD;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/9] st/mesa: use BITSET_FOREACH_SET to loop through active perfmon counters

2015-11-13 Thread Nicolai Hähnle
---
 src/mesa/state_tracker/st_cb_perfmon.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 80ff170..ec12eb2 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -50,6 +50,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
   const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -60,14 +61,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
-  for (cid = 0; cid < g->NumCounters; cid++) {
- const struct gl_perf_monitor_counter *c = >Counters[cid];
+  BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = >counters[cid];
  struct st_perf_counter_object *cntr;
 
- if (!BITSET_TEST(m->ActiveCounters[gid], cid))
-continue;
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/9] gallium: batch query objects and related cleanups

2015-11-13 Thread Nicolai Hähnle
Hi,

the main point of this patch series is to introduce batch query objects.

For AMD_performance_monitor, hardware may not be able to start and stop
performance counters independently of each other. The current query interface
does not fit such hardware well.

With this series, drivers can mark driver-specific queries with the
PIPE_DRIVER_QUERY_FLAG_BATCH flag, which indicates that those queries require
the use of batch query objects. Batch query objects are created with an
immutable list of queries, which requires a new entry point in pipe_context,
but apart from that they use the same begin_query/end_query/etc. entry points.

The radeon-specific part that actually makes use of this feature is not quite
ready yet, but I already wanted to get this part out there for feedback.
Please review!

Thanks,
Nicolai
---
 gallium/auxiliary/hud/hud_context.c   |   24 ++
 gallium/auxiliary/hud/hud_driver_query.c  |  249 +-
 gallium/auxiliary/hud/hud_private.h   |   13 +
 gallium/drivers/nouveau/nvc0/nvc0_query.c |4 
 gallium/include/pipe/p_context.h  |3 
 gallium/include/pipe/p_defines.h  |   36 ++--
 mesa/state_tracker/st_cb_perfmon.c|  247 -
 mesa/state_tracker/st_cb_perfmon.h|   32 +++
 mesa/state_tracker/st_context.h   |3 
 9 files changed, 437 insertions(+), 174 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/9] st/mesa: store mapping from perfmon counter to query type

2015-11-13 Thread Nicolai Hähnle
Previously, when a performance monitor was initialized, an inner loop through
all driver queries with string comparisons for each enabled performance
monitor counter was used. This hurts when a driver exposes lots of queries.
---
 src/mesa/state_tracker/st_cb_perfmon.c | 74 +++---
 src/mesa/state_tracker/st_cb_perfmon.h | 14 +++
 src/mesa/state_tracker/st_context.h|  3 ++
 3 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index dedb8f5..80ff170 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -36,48 +36,20 @@
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 
-/**
- * Return a PIPE_QUERY_x type >= PIPE_QUERY_DRIVER_SPECIFIC, or -1 if
- * the driver-specific query doesn't exist.
- */
-static int
-find_query_type(struct pipe_screen *screen, const char *name)
-{
-   int num_queries;
-   int type = -1;
-   int i;
-
-   num_queries = screen->get_driver_query_info(screen, 0, NULL);
-   if (!num_queries)
-  return type;
-
-   for (i = 0; i < num_queries; i++) {
-  struct pipe_driver_query_info info;
-
-  if (!screen->get_driver_query_info(screen, i, ))
- continue;
-
-  if (!strncmp(info.name, name, strlen(name))) {
- type = info.query_type;
- break;
-  }
-   }
-   return type;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
+   struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
-   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
-   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct pipe_context *pipe = st->pipe;
int gid, cid;
 
-   st_flush_bitmap_cache(st_context(ctx));
+   st_flush_bitmap_cache(st);
 
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -90,20 +62,17 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 
   for (cid = 0; cid < g->NumCounters; cid++) {
  const struct gl_perf_monitor_counter *c = >Counters[cid];
+ const struct st_perf_monitor_counter *stc = >counters[cid];
  struct st_perf_counter_object *cntr;
- int query_type;
 
  if (!BITSET_TEST(m->ActiveCounters[gid], cid))
 continue;
 
- query_type = find_query_type(screen, c->Name);
- assert(query_type != -1);
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
 
- cntr->query= pipe->create_query(pipe, query_type, 0);
+ cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
 
@@ -286,6 +255,7 @@ st_init_perfmon(struct st_context *st)
struct gl_perf_monitor_state *perfmon = >ctx->PerfMonitor;
struct pipe_screen *screen = st->pipe->screen;
struct gl_perf_monitor_group *groups = NULL;
+   struct st_perf_monitor_group *stgroups = NULL;
int num_counters, num_groups;
int gid, cid;
 
@@ -304,26 +274,36 @@ st_init_perfmon(struct st_context *st)
if (!groups)
   return false;
 
+   stgroups = CALLOC(num_groups, sizeof(*stgroups));
+   if (!stgroups)
+  goto fail_only_groups;
+
for (gid = 0; gid < num_groups; gid++) {
   struct gl_perf_monitor_group *g = [perfmon->NumGroups];
   struct pipe_driver_query_group_info group_info;
   struct gl_perf_monitor_counter *counters = NULL;
+  struct st_perf_monitor_counter *stcounters = NULL;
 
   if (!screen->get_driver_query_group_info(screen, gid, _info))
  continue;
 
   g->Name = group_info.name;
   g->MaxActiveCounters = group_info.max_active_queries;
-  g->NumCounters = 0;
-  g->Counters = NULL;
 
   if (group_info.num_queries)
  counters = CALLOC(group_info.num_queries, sizeof(*counters));
   if (!counters)
  goto fail;
+  g->Counters = counters;
+
+  stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters));
+  if (!stcounters)
+ goto fail;
+  stgroups[perfmon->NumGroups].counters = stcounters;
 
   for (cid = 0; cid < num_counters; cid++) {
  struct gl_perf_monitor_counter *c = [g->NumCounters];
+ struct st_perf_monitor_counter *stc = [g->NumCounters];
  struct pipe_driver_query_info info;
 
  if (!screen->get_driver_query_info(screen, cid, ))
@@ -359,18 +339,25 @@ st_init_perfmon(struct st_context *st)
 default:
unreachable("Invalid driver query type!");
  }
+
+ 

[Mesa-dev] [PATCH 7/9] gallium: add the concept of batch queries

2015-11-13 Thread Nicolai Hähnle
Some drivers (in particular radeon[si], but also freedreno judging from
a quick grep) may want to expose performance counters that cannot be
individually enabled or disabled.

Allow such drivers to mark driver-specific queries as requiring a new
type of batch query object that is used to start and stop a list of queries
simultaneously.
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  1 +
 src/gallium/include/pipe/p_context.h  |  3 +++
 src/gallium/include/pipe/p_defines.h  | 27 +--
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index a1d6162..0608337 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->group_id = -1;
+   info->flags = 0;
 
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
if (id < num_sw_queries)
diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index 27f358f..f122c74 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -115,6 +115,9 @@ struct pipe_context {
struct pipe_query *(*create_query)( struct pipe_context *pipe,
unsigned query_type,
unsigned index );
+   struct pipe_query *(*create_batch_query)( struct pipe_context *pipe,
+ unsigned num_queries,
+ unsigned *query_types );
 
void (*destroy_query)(struct pipe_context *pipe,
  struct pipe_query *q);
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7ed9f6d..b3c8b9f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -776,6 +776,16 @@ struct pipe_query_data_pipeline_statistics
 };
 
 /**
+ * For batch queries.
+ */
+union pipe_numeric_type_union
+{
+   uint64_t u64;
+   uint32_t u32;
+   float f;
+};
+
+/**
  * Query result (returned by pipe_context::get_query_result).
  */
 union pipe_query_result
@@ -811,6 +821,9 @@ union pipe_query_result
 
/* PIPE_QUERY_PIPELINE_STATISTICS */
struct pipe_query_data_pipeline_statistics pipeline_statistics;
+
+   /* batch queries */
+   union pipe_numeric_type_union batch[0];
 };
 
 union pipe_color_union
@@ -840,12 +853,13 @@ enum pipe_driver_query_result_type
PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1,
 };
 
-union pipe_numeric_type_union
-{
-   uint64_t u64;
-   uint32_t u32;
-   float f;
-};
+/**
+ * Some hardware requires some hardware-specific queries to be submitted
+ * as batched queries. The corresponding query objects are created using
+ * create_batch_query, and at most one such query may be active at
+ * any time.
+ */
+#define PIPE_DRIVER_QUERY_FLAG_BATCH (1 << 0)
 
 struct pipe_driver_query_info
 {
@@ -855,6 +869,7 @@ struct pipe_driver_query_info
enum pipe_driver_query_type type;
enum pipe_driver_query_result_type result_type;
unsigned group_id;
+   unsigned flags;
 };
 
 struct pipe_driver_query_group_info
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: set matrix_stride for non matrices with atomic counter buffers

2015-11-13 Thread Ilia Mirkin
On Fri, Nov 13, 2015 at 10:53 AM, Tapani Pälli  wrote:
> On 11/12/2015 05:47 PM, Ilia Mirkin wrote:
>>
>> On Mon, Nov 2, 2015 at 6:36 AM, Tapani Pälli 
>> wrote:
>>>
>>> Patch sets matrix_stride as 0 for non matrix uniforms that are in a
>>> atomic counter buffer. Matrix stride calculation for actual matrix
>>> uniforms is done during link_assign_uniform_locations.
>>>
>>>  From ARB_program_interface_query specification:
>>>
>>> GL_MATRIX_STRIDE:
>>>
>>> "For active variables not declared as a matrix or array of matrices,
>>> zero is written to .  For active variables not backed by a
>>> buffer object, -1 is written to , regardless of the variable
>>> type."
>>>
>>> Signed-off-by: Tapani Pälli 
>>> ---
>>>   src/glsl/link_atomics.cpp | 2 ++
>>>   1 file changed, 2 insertions(+)
>>>
>>> diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp
>>> index cdcc06d..3aa52db 100644
>>> --- a/src/glsl/link_atomics.cpp
>>> +++ b/src/glsl/link_atomics.cpp
>>> @@ -240,6 +240,8 @@ link_assign_atomic_counter_resources(struct
>>> gl_context *ctx,
>>>storage->offset = var->data.atomic.offset;
>>>storage->array_stride = (var->type->is_array() ?
>>>
>>> var->type->without_array()->atomic_size() : 0);
>>> + if (!var->type->is_matrix())
>>> +storage->matrix_stride = 0;
>>
>> Can atomics ever be in matrices?
>
>
> Nope, but one can query matrix stride property of a uniform that is 'backed
> by' atomic counter buffer.

Right, I get that... but why the if (!var->type->is_matrix())?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 2/2] r200: fix bgrx8/xrgb8 blits

2015-11-13 Thread Emil Velikov
On 12 November 2015 at 22:53, Ian Romanick  wrote:
> I'll try to swap the RV200 for the R200 next week.  I'm not sure when
> Emil is planning the next stable release... I'll try to test before
> that... unless someone beats me to it. ;)
>
Next stable should be out a week from now.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/7] [v2] i965/meta/gen9: Individually fast clear color attachments

2015-11-13 Thread Neil Roberts
Ben Widawsky  writes:

> Thanks a lot, I will squash it in - and sorry again about ignoring your
> feedback.

Ok, no worries. Feel free to add

Reviewed-by: Neil Roberts 

if you squash the changes in.

Regards,
- Neil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Can't get OpenGL 3.x inside VMware Workstation 12 (Ubuntu guest)

2015-11-13 Thread Valera Rozuvan
Hi Brian,

The updated instructions worked for me. Thank you = )

Regards,
Valera Rozuvan | http://valera.rozuvan.net/

Skype: valera.rozuvan
E-mail: valera.rozu...@gmail.com
Phone: +38 (050) 837-29-73



On Wed, Nov 11, 2015 at 11:18 PM, Emil Velikov  wrote:
> On 11 November 2015 at 19:51, Brian Paul  wrote:
>> On 11/11/2015 11:38 AM, Emil Velikov wrote:
>>>
>>> On 11 November 2015 at 18:25, Thomas Hellstrom 
>>> wrote:

 On 11/11/2015 07:07 PM, Brian Paul wrote:
>
> On 11/11/2015 10:44 AM, Emil Velikov wrote:
>>
>> On 11 November 2015 at 16:48, Brian Paul  wrote:
>>>
>>> On 11/11/2015 08:44 AM, Emil Velikov wrote:
>>
>>

 I have seen similar type of documents in the past, most of which
 going
 out of date very quickly due to distribution changes and/or others.
 Wondering how you'll feel about "check your distro and add svga to
 the
 gallium-drivers array" style of instructions ?
>>>
>>>
>>>
>>> I'm afraid I don't quite understand what you're saying there.  Can you
>>> elaborate?
>>>
>>>
>> Rather than walking through the requirements, configure and make/make
>> install steps, just forward people to the distro specific wiki on "how
>> to build mesa/kernel" and explicitly mention the differences:
>> mesa:
>> - XA must be enabled: --enable-xa
>> - svga must be listed in the gallium drivers:
>> --with-gallium-drivers=svga...
>>
>> kernel:
>>- Set DRM_VMWGFX
>>
>> others...
>
>
> I guess I've never seen those wikis.  I'd have to search for them, but
> I really don't have the time now.
>
> We actually have an in-house shell script that installs all the
> pre-req packages, pulls the git trees, builds and installs for a
> variety of guest OSes.  But it has some VMware-specific stuff that I'd
> have to trim out before making public.
>
>
>>
>> Related: does the upstream [1] vmwgfx module work well when combined
>> with upstream core drm across different versions ? Considering how
>> well Thomas is handling upstreaming shouldn't the module from the
>> kernel be recommended ?
>
>
> Either should be fine at this point but the build instructions cover
> the case of one having an older distro that may not have the
> GL3-enabled kernel module already.
>

 The upstream[1] vmwgfx module should work well with any linux kernel
 dating back to 2.6.32 unless the distro has changed the kernel API from
 the base version. It ships with builtin stripped drm and ttm to handle
 compatibility issues, and is intended for people (mostly including
 ourselves and our QA team) that want to try out new features without
 installing a completely new kernel.

>>> Ok seems that my point is too subtle, so I'll try from another angle.
>>>
>>> The wiki instructions say "nuke he vmwgfx.ko module" and implicitly
>>> "keep drm.ko". If we ignore the unlikely cases where either one and/or
>>> both is built-in, we can have a case where the new vmwgfx is build
>>> against core drm from the upstream, yet the downstream drm module
>>> is/gets loaded. As core drm often goes through various changes, you
>>> can see how bad things are likely to happen.
>>
>>
>> Well, the above-mentioned build script doesn't touch drm.ko and works on
>> about 14 different versions of Ubuntu, Mint, Fedora, RHEL, etc. so I don't
>> think we've ever seen that conflict.  But if someone's doing their own
>> kernel/graphics builds/installs, who knows.  If it comes up, we'll just have
>> to address it.
>>
> Ouch... I see what's happening here. You're not using any of the
> kernel core drm/ttm/foo - you're just static linking the local ones
> into vmwgfx.ko. This will explain why the lack of issues.
>
> Well played guys !
>
> Cheers,
> Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] OT: Phabricator? (was: Re: [PATCH 1/2] glsl: enable 'shared' keyword also for layout qualifiers)

2015-11-13 Thread Kai Wasserbäch
Hi Emil,
Emil Velikov wrote on 13.11.2015 13:58:
> On 13 November 2015 at 09:14, Kai Wasserbäch  
> wrote:
>> Emil Velikov wrote on 12.11.2015 18:45:
>>> On 12 November 2015 at 15:36, Samuel Iglesias Gonsálvez
>>>  wrote:
 On 12/11/15 15:28, Timothy Arceri wrote:
> On 13 November 2015 12:22:39 am AEDT, "Samuel Iglesias Gonsálvez" 
>  wrote:
>> 'shared' was added in ARB_uniform_buffer_object and also used
>> in ARB_shader_storage_buffer_object.
>
> Hi Samuel,
>
> Shared for UBO and SSBOs is not a key word its just an identifier for a 
> layout qualifier, are you sure you need to make it available for those 
> extensions?
>

 Right. Please ignore this patch.

>>> In this case, may I suggest that you tag the patch as Rejected (or
>>> similar) in patchwork [1]. Afaict there are quite a few patches in
>>> there from yourself and fellow colleagues. Any chance someone can go
>>> through them and change their status appropriately ?
>>
>> Since I'm reading this from time to time I was wondering whether Mesa 
>> wouldn't
>> be better served by Phabricator instance? Maybe Matt and Tom, who send in 
>> most
>> of AMD's patches for the AMDGPU backend in LLVM can weigh in here?
>>
>> I'm using Phabricator myself for a big project and I must say it's really 
>> neat.
>> Most status/meta updates can happen automatically as you commit your changes,
>> the review state is tracked properly and if a patch was rejected/abandoned 
>> that
>> is usually also clear from the state. Ie. in most cases there is no need to 
>> have
>> multiple people walk through the same list of patches/bugs etc.
>>
>> (Bonus: for switching over from a Bugzilla to Phabricator, there's a pretty 
>> big
>> precedent with complete porting tools: Wikimedia did that)
>>
> Regardless of how clever the tool is there is always some user
> interaction needed. Damien have been working on improving patchwork
> and I believe it will be working pretty neatly in the not too distant
> future.

sure, there'll always be some level of interaction. My point was, that
Phabricator allows me in my experience with it, to reduce the amount of direct
interactions with it.

Example: if I put up a new revision for review, I can do so with a command line
tool I use instead of a git push to some feature branch and a git send-email to
the list. If code owners are defined, these can get added automatically by the
system as subscribers/reviewers (Herald rules can do that too). If a change has
been reviewed I land it with my command line tool which automatically marks the
review correctly. If somebody has requested to do something differently I can
reply to the comments inline and/or update the change for review. It all feels
pretty natural. (See
 and
 for
some details on that workflow.)

> Personally I'm not too fussed what we use - although the general
> question on X vs Y is a po-tay-to po-tah-to like case. To each their
> own :) Although I'd suspect that we can/should have a discussion on
> next XDC on topics such as these ?

I'm most likely not going to be at any XDC in the foreseeable future, but a
discussion about tools would probably best suited for a personal meeting.

Anyway, if there is more interest in Phabricator or this discussion I'm happy to
answer your questions off list or in a different thread. I'll stop posting in
this thread, since it is off-topic (sorry for that). Especially since I'm not a
major contributor to Mesa.

Cheers,
Kai



signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/11] i965/fs: Don't allow SINT32 as a return type for resinfo

2015-11-13 Thread Jason Ekstrand
On Nov 13, 2015 5:49 AM, "Iago Toral"  wrote:
>
> On Wed, 2015-11-11 at 17:26 -0800, Jason Ekstrand wrote:
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 10 +-
> >  1 file changed, 9 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > index 974219f..dad541b 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > @@ -680,7 +680,15 @@ fs_generator::generate_tex(fs_inst *inst, struct
brw_reg dst, struct brw_reg src
> >
> > switch (dst.type) {
> > case BRW_REGISTER_TYPE_D:
> > -  return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
> > +  /* SINT32 isn't actually allowed for TXS.  This isn't explicitly
stated
> > +   * in the PRM, but the i965 PRM explicitly lists UINT32 and
FLOAT32 as
> > +   * being valid for resinfo but not SINT32 (Vol. 4 Section
4.8.1.1).
> > +   * Emperical testing has also verified this.
>
> Empirical
>
> > +   */
>
> Actually, I only see UINT32. For example, from the IVB PRM, volume 4,
> part 1, page 130:
>
> "The surface indicated in the surface state is not sampled. Instead, the
> width, height, depth, and MIP count of the surface are returned as
> indicated in the table below. The format of the returned data is UINT32"
>
> I see the same text for HSW and BDW. Either way:
>
> Reviewed-by: Iago Toral Quiroga 

It's only gen 4-5 where you get any choice.  At least on gen4, you can ask
for it in float and the hardware will, supposedly do that. However, there's
no real point. I can change it to just always stomp resinfo to uint32.
--Jason

>
> > +  if (inst->opcode == SHADER_OPCODE_TXS)
> > + return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
> > +  else
> > + return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
> >break;
> > case BRW_REGISTER_TYPE_UD:
> >return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: set matrix_stride for non matrices with atomic counter buffers

2015-11-13 Thread Tapani Pälli

On 11/12/2015 05:47 PM, Ilia Mirkin wrote:

On Mon, Nov 2, 2015 at 6:36 AM, Tapani Pälli  wrote:

Patch sets matrix_stride as 0 for non matrix uniforms that are in a
atomic counter buffer. Matrix stride calculation for actual matrix
uniforms is done during link_assign_uniform_locations.

 From ARB_program_interface_query specification:

GL_MATRIX_STRIDE:

"For active variables not declared as a matrix or array of matrices,
zero is written to .  For active variables not backed by a
buffer object, -1 is written to , regardless of the variable
type."

Signed-off-by: Tapani Pälli 
---
  src/glsl/link_atomics.cpp | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp
index cdcc06d..3aa52db 100644
--- a/src/glsl/link_atomics.cpp
+++ b/src/glsl/link_atomics.cpp
@@ -240,6 +240,8 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
   storage->offset = var->data.atomic.offset;
   storage->array_stride = (var->type->is_array() ?
var->type->without_array()->atomic_size() : 
0);
+ if (!var->type->is_matrix())
+storage->matrix_stride = 0;

Can atomics ever be in matrices?


Nope, but one can query matrix stride property of a uniform that is 
'backed by' atomic counter buffer.



   -ilia


// Tapani
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: error out in indirect draw when vertex bindings mismatch

2015-11-13 Thread Fredrik Höglund
On Friday 13 November 2015, Tapani Pälli wrote:
> Patch adds additional mask for tracking which vertex buffer bindings
> are set. This array can be directly compared to which vertex arrays
> are enabled and should match when drawing.
> 
> Fixes following CTS tests:
> 
>ES31-CTS.draw_indirect.negative-noVBO-arrays
>ES31-CTS.draw_indirect.negative-noVBO-elements
> 
> Signed-off-by: Tapani Pälli 
> ---
>  src/mesa/main/api_validate.c | 13 +
>  src/mesa/main/mtypes.h   |  3 +++
>  src/mesa/main/varray.c   |  5 +
>  3 files changed, 21 insertions(+)
> 
> diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
> index a490189..e82e89a 100644
> --- a/src/mesa/main/api_validate.c
> +++ b/src/mesa/main/api_validate.c
> @@ -710,6 +710,19 @@ valid_draw_indirect(struct gl_context *ctx,
>return GL_FALSE;
> }
>  
> +   /* From OpenGL ES 3.1 spec. section 10.5:
> +* "An INVALID_OPERATION error is generated if zero is bound to
> +* VERTEX_ARRAY_BINDING, DRAW_INDIRECT_BUFFER or to any enabled
> +* vertex array."
> +*
> +* Here we check that vertex buffer bindings match with enabled
> +* vertex arrays.
> +*/
> +   if (ctx->Array.VAO->_Enabled != ctx->Array.VAO->VertexBindingMask) {

This test only works when the enabled vertex arrays are associated with
their default vertex buffer binding points.

> +  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name);
> +  return GL_FALSE;
> +   }
> +
> if (!_mesa_valid_prim_mode(ctx, mode, name))
>return GL_FALSE;
>  
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 4efdf1e..6c6187f 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -1419,6 +1419,9 @@ struct gl_vertex_array_object
> /** Vertex buffer bindings */
> struct gl_vertex_buffer_binding VertexBinding[VERT_ATTRIB_MAX];
>  
> +   /** Mask indicating which binding points are set. */
> +   GLbitfield64 VertexBindingMask;
> +
> /** Mask of VERT_BIT_* values indicating which arrays are enabled */
> GLbitfield64 _Enabled;
>  
> diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
> index 887d0c0..0a94c5a 100644
> --- a/src/mesa/main/varray.c
> +++ b/src/mesa/main/varray.c
> @@ -174,6 +174,11 @@ bind_vertex_buffer(struct gl_context *ctx,
>binding->Offset = offset;
>binding->Stride = stride;
>  
> +  if (vbo == ctx->Shared->NullBufferObj)
> + vao->VertexBindingMask &= ~VERT_BIT(index);
> +  else
> + vao->VertexBindingMask |= VERT_BIT(index);
> +
>vao->NewArrays |= binding->_BoundArrays;
> }
>  }
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 10/11] i965/fs: Stomp the texture return type to UINT32

2015-11-13 Thread Jason Ekstrand
Cc: Kenneth Graunke 


---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 4877504..61c63d4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -690,6 +690,17 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
dst, struct brw_reg src
   break;
}
 
+   /* Stomp the resinfo output type to UINT32.  On gens 4-5, the output type
+* is set as part of the message descriptor.  On gen4, the PRM seems to
+* allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on
+* later gens UINT32 is required.  Once you hit Sandy Bridge, the bit is
+* gone from the message descriptor entirely and you just get UINT32 all
+* the time regasrdless.  Since we can really only do non-UINT32 on gen4,
+* just stomp it to UINT32 all the time.
+*/
+   if (inst->opcode == SHADER_OPCODE_TXS)
+  return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
+
switch (inst->exec_size) {
case 8:
   simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/9] st/mesa: store mapping from perfmon counter to query type

2015-11-13 Thread Nicolai Hähnle
Previously, when a performance monitor was initialized, an inner loop through
all driver queries with string comparisons for each enabled performance
monitor counter was used. This hurts when a driver exposes lots of queries.

Reviewed-by: Samuel Pitoiset 
---
 src/mesa/state_tracker/st_cb_perfmon.c | 74 +++---
 src/mesa/state_tracker/st_cb_perfmon.h | 14 +++
 src/mesa/state_tracker/st_context.h|  3 ++
 3 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index dedb8f5..80ff170 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -36,48 +36,20 @@
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 
-/**
- * Return a PIPE_QUERY_x type >= PIPE_QUERY_DRIVER_SPECIFIC, or -1 if
- * the driver-specific query doesn't exist.
- */
-static int
-find_query_type(struct pipe_screen *screen, const char *name)
-{
-   int num_queries;
-   int type = -1;
-   int i;
-
-   num_queries = screen->get_driver_query_info(screen, 0, NULL);
-   if (!num_queries)
-  return type;
-
-   for (i = 0; i < num_queries; i++) {
-  struct pipe_driver_query_info info;
-
-  if (!screen->get_driver_query_info(screen, i, ))
- continue;
-
-  if (!strncmp(info.name, name, strlen(name))) {
- type = info.query_type;
- break;
-  }
-   }
-   return type;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
+   struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
-   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
-   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct pipe_context *pipe = st->pipe;
int gid, cid;
 
-   st_flush_bitmap_cache(st_context(ctx));
+   st_flush_bitmap_cache(st);
 
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -90,20 +62,17 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 
   for (cid = 0; cid < g->NumCounters; cid++) {
  const struct gl_perf_monitor_counter *c = >Counters[cid];
+ const struct st_perf_monitor_counter *stc = >counters[cid];
  struct st_perf_counter_object *cntr;
- int query_type;
 
  if (!BITSET_TEST(m->ActiveCounters[gid], cid))
 continue;
 
- query_type = find_query_type(screen, c->Name);
- assert(query_type != -1);
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
 
- cntr->query= pipe->create_query(pipe, query_type, 0);
+ cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
 
@@ -286,6 +255,7 @@ st_init_perfmon(struct st_context *st)
struct gl_perf_monitor_state *perfmon = >ctx->PerfMonitor;
struct pipe_screen *screen = st->pipe->screen;
struct gl_perf_monitor_group *groups = NULL;
+   struct st_perf_monitor_group *stgroups = NULL;
int num_counters, num_groups;
int gid, cid;
 
@@ -304,26 +274,36 @@ st_init_perfmon(struct st_context *st)
if (!groups)
   return false;
 
+   stgroups = CALLOC(num_groups, sizeof(*stgroups));
+   if (!stgroups)
+  goto fail_only_groups;
+
for (gid = 0; gid < num_groups; gid++) {
   struct gl_perf_monitor_group *g = [perfmon->NumGroups];
   struct pipe_driver_query_group_info group_info;
   struct gl_perf_monitor_counter *counters = NULL;
+  struct st_perf_monitor_counter *stcounters = NULL;
 
   if (!screen->get_driver_query_group_info(screen, gid, _info))
  continue;
 
   g->Name = group_info.name;
   g->MaxActiveCounters = group_info.max_active_queries;
-  g->NumCounters = 0;
-  g->Counters = NULL;
 
   if (group_info.num_queries)
  counters = CALLOC(group_info.num_queries, sizeof(*counters));
   if (!counters)
  goto fail;
+  g->Counters = counters;
+
+  stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters));
+  if (!stcounters)
+ goto fail;
+  stgroups[perfmon->NumGroups].counters = stcounters;
 
   for (cid = 0; cid < num_counters; cid++) {
  struct gl_perf_monitor_counter *c = [g->NumCounters];
+ struct st_perf_monitor_counter *stc = [g->NumCounters];
  struct pipe_driver_query_info info;
 
  if (!screen->get_driver_query_info(screen, cid, ))
@@ -359,18 +339,25 @@ st_init_perfmon(struct st_context *st)
 default:

[Mesa-dev] [PATCH v2 7/9] gallium: add the concept of batch queries

2015-11-13 Thread Nicolai Hähnle
Some drivers (in particular radeon[si], but also freedreno judging from
a quick grep) may want to expose performance counters that cannot be
individually enabled or disabled.

Allow such drivers to mark driver-specific queries as requiring a new
type of batch query object that is used to start and stop a list of queries
simultaneously.

v2: documentation for create_batch_query
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  1 +
 src/gallium/include/pipe/p_context.h  | 19 +++
 src/gallium/include/pipe/p_defines.h  | 27 +--
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index a1d6162..0608337 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->group_id = -1;
+   info->flags = 0;
 
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
if (id < num_sw_queries)
diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index 27f358f..be7447d 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -116,6 +116,25 @@ struct pipe_context {
unsigned query_type,
unsigned index );
 
+   /**
+* Create a query object that queries all given query types simultaneously.
+*
+* This can only be used for those query types for which
+* get_driver_query_info indicates that it must be used. Only one batch
+* query object may be active at a time.
+*
+* There may be additional constraints on which query types can be used
+* together, in particular those that are implied by
+* get_driver_query_group_info.
+*
+* \param num_queries the number of query types
+* \param query_types array of \p num_queries query types
+* \return a query object, or NULL on error.
+*/
+   struct pipe_query *(*create_batch_query)( struct pipe_context *pipe,
+ unsigned num_queries,
+ unsigned *query_types );
+
void (*destroy_query)(struct pipe_context *pipe,
  struct pipe_query *q);
 
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7ed9f6d..b3c8b9f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -776,6 +776,16 @@ struct pipe_query_data_pipeline_statistics
 };
 
 /**
+ * For batch queries.
+ */
+union pipe_numeric_type_union
+{
+   uint64_t u64;
+   uint32_t u32;
+   float f;
+};
+
+/**
  * Query result (returned by pipe_context::get_query_result).
  */
 union pipe_query_result
@@ -811,6 +821,9 @@ union pipe_query_result
 
/* PIPE_QUERY_PIPELINE_STATISTICS */
struct pipe_query_data_pipeline_statistics pipeline_statistics;
+
+   /* batch queries */
+   union pipe_numeric_type_union batch[0];
 };
 
 union pipe_color_union
@@ -840,12 +853,13 @@ enum pipe_driver_query_result_type
PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1,
 };
 
-union pipe_numeric_type_union
-{
-   uint64_t u64;
-   uint32_t u32;
-   float f;
-};
+/**
+ * Some hardware requires some hardware-specific queries to be submitted
+ * as batched queries. The corresponding query objects are created using
+ * create_batch_query, and at most one such query may be active at
+ * any time.
+ */
+#define PIPE_DRIVER_QUERY_FLAG_BATCH (1 << 0)
 
 struct pipe_driver_query_info
 {
@@ -855,6 +869,7 @@ struct pipe_driver_query_info
enum pipe_driver_query_type type;
enum pipe_driver_query_result_type result_type;
unsigned group_id;
+   unsigned flags;
 };
 
 struct pipe_driver_query_group_info
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/9] gallium: batch query objects and related cleanups

2015-11-13 Thread Nicolai Hähnle
Hi,

I have updated patches 6 - 9. Samuel, thank you for your input and I hope
you find your points to be resolved satisfactorily ;)

Cheers,
Nicolai
---
nha@deadlights:~/amd/mesa$ git diff master | diffstat
 gallium/auxiliary/hud/hud_context.c   |   24 +-
 gallium/auxiliary/hud/hud_driver_query.c  |  266 +-
 gallium/auxiliary/hud/hud_private.h   |   13 +
 gallium/drivers/nouveau/nvc0/nvc0_query.c |4 
 gallium/include/pipe/p_context.h  |   19 ++
 gallium/include/pipe/p_defines.h  |   36 ++--
 mesa/state_tracker/st_cb_perfmon.c|  253 
 mesa/state_tracker/st_cb_perfmon.h|   32 ++-
 mesa/state_tracker/st_context.h   |3 
 9 files changed, 475 insertions(+), 175 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 5/9] st/mesa: use BITSET_FOREACH_SET to loop through active perfmon counters

2015-11-13 Thread Nicolai Hähnle
Reviewed-by: Samuel Pitoiset 
---
 src/mesa/state_tracker/st_cb_perfmon.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 80ff170..ec12eb2 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -50,6 +50,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
   const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -60,14 +61,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
-  for (cid = 0; cid < g->NumCounters; cid++) {
- const struct gl_perf_monitor_counter *c = >Counters[cid];
+  BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = >counters[cid];
  struct st_perf_counter_object *cntr;
 
- if (!BITSET_TEST(m->ActiveCounters[gid], cid))
-continue;
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/9] st/mesa: maintain active perfmon counters in an array

2015-11-13 Thread Samuel Pitoiset



On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

It is easy enough to pre-determine the required size, and arrays are
generally better behaved especially when they get large.
---
  src/mesa/state_tracker/st_cb_perfmon.c | 78 --
  src/mesa/state_tracker/st_cb_perfmon.h | 18 
  2 files changed, 55 insertions(+), 41 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index ec12eb2..6c71a13 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,15 +42,14 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 struct st_context *st = st_context(ctx);
 struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
 struct pipe_context *pipe = st->pipe;
+   unsigned num_active_counters = 0;
 int gid, cid;

 st_flush_bitmap_cache(st);

-   /* Create a query for each active counter. */
+   /* Determine the number of active counters. */
 for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
-  const struct st_perf_monitor_group *stg = >perfmon[gid];
-  BITSET_WORD tmp;

if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
   /* Maximum number of counters reached. Cannot start the session. */
@@ -61,19 +60,29 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
   return false;
}

+  num_active_counters += m->ActiveGroups[gid];
+   }
+
+   stm->active_counters = CALLOC(num_active_counters,
+ sizeof(*stm->active_counters));
+   if (!stm->active_counters)
+  return false;


Previously, this function returned true when there was no active 
counters, and you changed the behaviour. Are you sure this is not going 
to break what the spec says about BeginPerfMonitor ? Did you make sure 
by running amd_performance_monitor piglit tests?



+
+   /* Create a query for each active counter. */
+   for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
+  const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;
+
BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
   const struct st_perf_monitor_counter *stc = >counters[cid];
- struct st_perf_counter_object *cntr;
-
- cntr = CALLOC_STRUCT(st_perf_counter_object);
- if (!cntr)
-return false;
+ struct st_perf_counter_object *cntr =
+>active_counters[stm->num_active_counters];

   cntr->query= pipe->create_query(pipe, stc->query_type, 0);
   cntr->id   = cid;
   cntr->group_id = gid;
-
- list_addtail(>list, >active_counters);
+ ++stm->num_active_counters;
}
 }
 return true;
@@ -83,24 +92,24 @@ static void
  reset_perf_monitor(struct st_perf_monitor_object *stm,
 struct pipe_context *pipe)
  {
-   struct st_perf_counter_object *cntr, *tmp;
+   unsigned i;

-   LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, >active_counters, list) {
-  if (cntr->query)
- pipe->destroy_query(pipe, cntr->query);
-  list_del(>list);
-  free(cntr);
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (query)
+ pipe->destroy_query(pipe, query);
 }
+   FREE(stm->active_counters);
+   stm->active_counters = NULL;
+   stm->num_active_counters = 0;
  }

  static struct gl_perf_monitor_object *
  st_NewPerfMonitor(struct gl_context *ctx)
  {
 struct st_perf_monitor_object *stq = 
ST_CALLOC_STRUCT(st_perf_monitor_object);
-   if (stq) {
-  list_inithead(>active_counters);
+   if (stq)
return >base;
-   }
 return NULL;
  }

@@ -119,9 +128,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  {
 struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
 struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;

-   if (LIST_IS_EMPTY(>active_counters)) {
+   if (!stm->num_active_counters) {
/* Create a query for each active counter before starting
 * a new monitoring session. */
if (!init_perf_monitor(ctx, m))
@@ -129,8 +138,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 }

 /* Start the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, >active_counters, list) {
-  if (!pipe->begin_query(pipe, cntr->query))
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (!pipe->begin_query(pipe, query))
goto fail;
 }
 return true;
@@ -146,11 +156,13 @@ st_EndPerfMonitor(struct gl_context *ctx, 

Re: [Mesa-dev] [PATCH 9/9] st/mesa: add support for batch driver queries to perfmon

2015-11-13 Thread Samuel Pitoiset



On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

---
  src/mesa/state_tracker/st_cb_perfmon.c | 75 ++
  src/mesa/state_tracker/st_cb_perfmon.h |  6 +++
  2 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 6c71a13..078d2c4 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,7 +42,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 struct st_context *st = st_context(ctx);
 struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
 struct pipe_context *pipe = st->pipe;
+   unsigned *batch = NULL;
 unsigned num_active_counters = 0;
+   unsigned max_batch_counters = 0;
+   unsigned num_batch_counters = 0;
 int gid, cid;

 st_flush_bitmap_cache(st);
@@ -50,6 +53,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 /* Determine the number of active counters. */
 for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];

if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
   /* Maximum number of counters reached. Cannot start the session. */
@@ -61,6 +65,8 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
}

num_active_counters += m->ActiveGroups[gid];
+  if (stg->has_batch)
+ max_batch_counters += m->ActiveGroups[gid];
 }

 stm->active_counters = CALLOC(num_active_counters,
@@ -68,6 +74,9 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 if (!stm->active_counters)
return false;

+   if (max_batch_counters)
+  batch = CALLOC(max_batch_counters, sizeof(*batch));


What about if batch is NULL?


+
 /* Create a query for each active counter. */
 for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
@@ -79,13 +88,35 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
   struct st_perf_counter_object *cntr =
  >active_counters[stm->num_active_counters];

- cntr->query= pipe->create_query(pipe, stc->query_type, 0);
   cntr->id   = cid;
   cntr->group_id = gid;
+ if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
+cntr->batch_index = num_batch_counters;
+batch[num_batch_counters++] = stc->query_type;
+ } else {
+cntr->query = pipe->create_query(pipe, stc->query_type, 0);
+if (!cntr->query)
+   goto fail;
+ }
   ++stm->num_active_counters;
}
 }
+
+   /* Create the batch query. */
+   if (num_batch_counters) {
+  stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters,
+  batch);
+  stm->batch_result = CALLOC(num_batch_counters, 
sizeof(stm->batch_result->batch[0]));
+  if (!stm->batch_query || !stm->batch_result)
+ goto fail;
+   }
+
+   FREE(batch);
 return true;
+
+fail:
+   FREE(batch);
+   return false;
  }

  static void
@@ -102,6 +133,13 @@ reset_perf_monitor(struct st_perf_monitor_object *stm,
 FREE(stm->active_counters);
 stm->active_counters = NULL;
 stm->num_active_counters = 0;
+
+   if (stm->batch_query) {
+  pipe->destroy_query(pipe, stm->batch_query);
+  stm->batch_query = NULL;
+   }
+   FREE(stm->batch_result);
+   stm->batch_result = NULL;
  }

  static struct gl_perf_monitor_object *
@@ -140,9 +178,13 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 /* Start the query for each active counter. */
 for (i = 0; i < stm->num_active_counters; ++i) {
struct pipe_query *query = stm->active_counters[i].query;
-  if (!pipe->begin_query(pipe, query))
+  if (query && !pipe->begin_query(pipe, query))
goto fail;
 }
+
+   if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query))
+  goto fail;
+
 return true;

  fail:
@@ -161,8 +203,12 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 /* Stop the query for each active counter. */
 for (i = 0; i < stm->num_active_counters; ++i) {
struct pipe_query *query = stm->active_counters[i].query;
-  pipe->end_query(pipe, query);
+  if (query)
+ pipe->end_query(pipe, query);
 }
+
+   if (stm->batch_query)
+  pipe->end_query(pipe, stm->batch_query);
  }

  static void
@@ -196,11 +242,16 @@ st_IsPerfMonitorResultAvailable(struct gl_context *ctx,
 for (i = 0; i < stm->num_active_counters; ++i) {
struct pipe_query *query = stm->active_counters[i].query;
union pipe_query_result result;
-  if 

Re: [Mesa-dev] [PATCH 4/9] st/mesa: store mapping from perfmon counter to query type

2015-11-13 Thread Samuel Pitoiset



On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

Previously, when a performance monitor was initialized, an inner loop through
all driver queries with string comparisons for each enabled performance
monitor counter was used. This hurts when a driver exposes lots of queries.


When I wrote this code one year ago for the nvc0 driver, this wasn't a 
real issue because it didn't expose lots of queries.


Anyway, this looks like a good improvement. Thanks!

Reviewed-by: Samuel Pitoiset 



---
  src/mesa/state_tracker/st_cb_perfmon.c | 74 +++---
  src/mesa/state_tracker/st_cb_perfmon.h | 14 +++
  src/mesa/state_tracker/st_context.h|  3 ++
  3 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index dedb8f5..80ff170 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -36,48 +36,20 @@
  #include "pipe/p_screen.h"
  #include "util/u_memory.h"

-/**
- * Return a PIPE_QUERY_x type >= PIPE_QUERY_DRIVER_SPECIFIC, or -1 if
- * the driver-specific query doesn't exist.
- */
-static int
-find_query_type(struct pipe_screen *screen, const char *name)
-{
-   int num_queries;
-   int type = -1;
-   int i;
-
-   num_queries = screen->get_driver_query_info(screen, 0, NULL);
-   if (!num_queries)
-  return type;
-
-   for (i = 0; i < num_queries; i++) {
-  struct pipe_driver_query_info info;
-
-  if (!screen->get_driver_query_info(screen, i, ))
- continue;
-
-  if (!strncmp(info.name, name, strlen(name))) {
- type = info.query_type;
- break;
-  }
-   }
-   return type;
-}
-
  static bool
  init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
  {
+   struct st_context *st = st_context(ctx);
 struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
-   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
-   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct pipe_context *pipe = st->pipe;
 int gid, cid;

-   st_flush_bitmap_cache(st_context(ctx));
+   st_flush_bitmap_cache(st);

 /* Create a query for each active counter. */
 for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];

if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
   /* Maximum number of counters reached. Cannot start the session. */
@@ -90,20 +62,17 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)

for (cid = 0; cid < g->NumCounters; cid++) {
   const struct gl_perf_monitor_counter *c = >Counters[cid];
+ const struct st_perf_monitor_counter *stc = >counters[cid];
   struct st_perf_counter_object *cntr;
- int query_type;

   if (!BITSET_TEST(m->ActiveCounters[gid], cid))
  continue;

- query_type = find_query_type(screen, c->Name);
- assert(query_type != -1);
-
   cntr = CALLOC_STRUCT(st_perf_counter_object);
   if (!cntr)
  return false;

- cntr->query= pipe->create_query(pipe, query_type, 0);
+ cntr->query= pipe->create_query(pipe, stc->query_type, 0);
   cntr->id   = cid;
   cntr->group_id = gid;

@@ -286,6 +255,7 @@ st_init_perfmon(struct st_context *st)
 struct gl_perf_monitor_state *perfmon = >ctx->PerfMonitor;
 struct pipe_screen *screen = st->pipe->screen;
 struct gl_perf_monitor_group *groups = NULL;
+   struct st_perf_monitor_group *stgroups = NULL;
 int num_counters, num_groups;
 int gid, cid;

@@ -304,26 +274,36 @@ st_init_perfmon(struct st_context *st)
 if (!groups)
return false;

+   stgroups = CALLOC(num_groups, sizeof(*stgroups));
+   if (!stgroups)
+  goto fail_only_groups;
+
 for (gid = 0; gid < num_groups; gid++) {
struct gl_perf_monitor_group *g = [perfmon->NumGroups];
struct pipe_driver_query_group_info group_info;
struct gl_perf_monitor_counter *counters = NULL;
+  struct st_perf_monitor_counter *stcounters = NULL;

if (!screen->get_driver_query_group_info(screen, gid, _info))
   continue;

g->Name = group_info.name;
g->MaxActiveCounters = group_info.max_active_queries;
-  g->NumCounters = 0;
-  g->Counters = NULL;

if (group_info.num_queries)
   counters = CALLOC(group_info.num_queries, sizeof(*counters));
if (!counters)
   goto fail;
+  g->Counters = counters;
+
+  stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters));
+  if (!stcounters)
+ goto fail;
+  stgroups[perfmon->NumGroups].counters = stcounters;

for (cid = 0; cid < num_counters; cid++) {
   struct gl_perf_monitor_counter *c = [g->NumCounters];
+ 

Re: [Mesa-dev] [PATCH v3] nir/copy_propagate: do not copy-propagate MOV srcs with source modifiers

2015-11-13 Thread Jason Ekstrand
On Fri, Nov 13, 2015 at 12:48 AM, Iago Toral Quiroga  wrote:
> If a source operand in a MOV has source modifiers, then we cannot
> copy-propagate it from the parent instruction and remove the MOV.
>
> v2: remove the check for source modifiers from is_move() (Jason)
>
> v3: Put the check for source modifiers back into is_move() since
> this function is called from copy_prop_alu_src(). Add source
> modifiers checks to is_vec() instead.
> ---
>
> Jason, I had to revert v2 after noticing this, I did not realize that 
> is_move()
> was actually called from another place when you suggested removing the check
> from there so I did not think that it could possibly break anything and did 
> not
> pass v2 through piglit again. Obviously I was wrong, sorry about that :-(

No worries.  Thanks for catching it.  I thought there was a reason it
was in is_move(), but I couldn't remember what...

> This version does not produce any regressions in piglit in my IVB laptop.

Sounds good.

Reviewed-by: Jason Ekstrand 

>  src/glsl/nir/nir_opt_copy_propagate.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/src/glsl/nir/nir_opt_copy_propagate.c 
> b/src/glsl/nir/nir_opt_copy_propagate.c
> index 7d8bdd7..cfc8e331 100644
> --- a/src/glsl/nir/nir_opt_copy_propagate.c
> +++ b/src/glsl/nir/nir_opt_copy_propagate.c
> @@ -55,10 +55,15 @@ static bool is_move(nir_alu_instr *instr)
>
>  static bool is_vec(nir_alu_instr *instr)
>  {
> -   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
> +   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
>if (!instr->src[i].src.is_ssa)
>   return false;
>
> +  /* we handle modifiers in a separate pass */
> +  if (instr->src[i].abs || instr->src[i].negate)
> + return false;
> +   }
> +
> return instr->op == nir_op_vec2 ||
>instr->op == nir_op_vec3 ||
>instr->op == nir_op_vec4;
> --
> 1.9.1
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 22/23] meta: Don't save or restore the VBO binding

2015-11-13 Thread Anuj Phogat
On Mon, Nov 9, 2015 at 4:56 PM, Ian Romanick  wrote:
> From: Ian Romanick 
>
> Nothing left in meta does anything with the VBO binding, so we don't
> need to save or restore it.  The VAO binding is still modified.
>
> Signed-off-by: Ian Romanick 
> ---
>  src/mesa/drivers/common/meta.c | 6 --
>  src/mesa/drivers/common/meta.h | 1 -
>  2 files changed, 7 deletions(-)
>
> diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
> index b06f683..b774d3c 100644
> --- a/src/mesa/drivers/common/meta.c
> +++ b/src/mesa/drivers/common/meta.c
> @@ -735,8 +735,6 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
>/* save vertex array object state */
>_mesa_reference_vao(ctx, >VAO,
> ctx->Array.VAO);
> -  _mesa_reference_buffer_object(ctx, >ArrayBufferObj,
> -ctx->Array.ArrayBufferObj);
>/* set some default state? */
> }
>
> @@ -1146,10 +1144,6 @@ _mesa_meta_end(struct gl_context *ctx)
> }
>
> if (state & MESA_META_VERTEX) {
> -  /* restore vertex buffer object */
> -  _mesa_BindBuffer(GL_ARRAY_BUFFER_ARB, save->ArrayBufferObj->Name);
> -  _mesa_reference_buffer_object(ctx, >ArrayBufferObj, NULL);
> -
>/* restore vertex array object */
>_mesa_BindVertexArray(save->VAO->Name);
>_mesa_reference_vao(ctx, >VAO, NULL);
> diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
> index 503e743..8121ed4 100644
> --- a/src/mesa/drivers/common/meta.h
> +++ b/src/mesa/drivers/common/meta.h
> @@ -155,7 +155,6 @@ struct save_state
>
> /** MESA_META_VERTEX */
> struct gl_vertex_array_object *VAO;
> -   struct gl_buffer_object *ArrayBufferObj;
>
> /** MESA_META_VIEWPORT */
> GLfloat ViewportX, ViewportY, ViewportW, ViewportH;
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/23] meta/TexSubImage: Don't pollute the buffer object namespace

2015-11-13 Thread Anuj Phogat
On Mon, Nov 9, 2015 at 4:56 PM, Ian Romanick  wrote:
> From: Ian Romanick 
>
> tl;dr: For many types of GL object, we can *NEVER* use the Gen function.
>
> In OpenGL ES (all versions!) and OpenGL compatibility profile,
> applications don't have to call Gen functions.  The GL spec is very
> clear about how you can mix-and-match generated names and non-generated
> names: you can use any name you want for a particular object type until
> you call the Gen function for that object type.
>
> Here's the problem scenario:
>
>  - Application calls a meta function that generates a name.  The first
>Gen will probably return 1.
>
>  - Application decides to use the same name for an object of the same
>type without calling Gen.  Many demo programs use names 1, 2, 3,
>etc. without calling Gen.
>
>  - Application calls the meta function again, and the meta function
>replaces the data.  The application's data is lost, and the app
>fails.  Have fun debugging that.
>
> Signed-off-by: Ian Romanick 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92363
> ---
>  src/mesa/drivers/common/meta_tex_subimage.c | 42 
> -
>  1 file changed, 24 insertions(+), 18 deletions(-)
>
> diff --git a/src/mesa/drivers/common/meta_tex_subimage.c 
> b/src/mesa/drivers/common/meta_tex_subimage.c
> index b0ac677..4adaad7 100644
> --- a/src/mesa/drivers/common/meta_tex_subimage.c
> +++ b/src/mesa/drivers/common/meta_tex_subimage.c
> @@ -69,7 +69,7 @@ create_texture_for_pbo(struct gl_context *ctx,
> int dims, int width, int height, int depth,
> GLenum format, GLenum type, const void *pixels,
> const struct gl_pixelstore_attrib *packing,
> -   GLuint *tmp_pbo, GLuint *tmp_tex)
> +   struct gl_buffer_object **tmp_pbo, GLuint *tmp_tex)
>  {
> uint32_t pbo_format;
> GLenum internal_format;
> @@ -101,7 +101,7 @@ create_texture_for_pbo(struct gl_context *ctx,
> row_stride = _mesa_image_row_stride(packing, width, format, type);
>
> if (_mesa_is_bufferobj(packing->BufferObj)) {
> -  *tmp_pbo = 0;
> +  *tmp_pbo = NULL;
>buffer_obj = packing->BufferObj;
>first_pixel += (intptr_t)pixels;
> } else {
> @@ -109,23 +109,27 @@ create_texture_for_pbo(struct gl_context *ctx,
>
>assert(create_pbo);
>
> -  _mesa_CreateBuffers(1, tmp_pbo);
> +  *tmp_pbo = ctx->Driver.NewBufferObject(ctx, 0xDEADBEEF);
> +  if (*tmp_pbo == NULL)
> + return NULL;
>
>/* In case of GL_PIXEL_PACK_BUFFER, pass null pointer for the pixel
> -   * data to avoid unnecessary data copying in _mesa_NamedBufferData().
> +   * data to avoid unnecessary data copying in _mesa_buffer_data.
> */
>if (is_pixel_pack)
> - _mesa_NamedBufferData(*tmp_pbo,
> -   last_pixel - first_pixel,
> -   NULL,
> -   GL_STREAM_READ);
> + _mesa_buffer_data(ctx, *tmp_pbo, GL_NONE,
> +   last_pixel - first_pixel,
> +   NULL,
> +   GL_STREAM_READ,
> +   __func__);
>else
> - _mesa_NamedBufferData(*tmp_pbo,
> -   last_pixel - first_pixel,
> -   (char *)pixels + first_pixel,
> -   GL_STREAM_DRAW);
> + _mesa_buffer_data(ctx, *tmp_pbo, GL_NONE,
> +   last_pixel - first_pixel,
> +   (char *)pixels + first_pixel,
> +   GL_STREAM_DRAW,
> +   __func__);
>
> -  buffer_obj = _mesa_lookup_bufferobj(ctx, *tmp_pbo);
> +  buffer_obj = *tmp_pbo;
>first_pixel = 0;
> }
>
> @@ -157,7 +161,7 @@ create_texture_for_pbo(struct gl_context *ctx,
>   row_stride,
>   read_only)) {
>_mesa_DeleteTextures(1, tmp_tex);
> -  _mesa_DeleteBuffers(1, tmp_pbo);
> +  _mesa_reference_buffer_object(ctx, tmp_pbo, NULL);
>return NULL;
> }
>
> @@ -173,7 +177,8 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint 
> dims,
> bool allocate_storage, bool create_pbo,
> const struct gl_pixelstore_attrib *packing)
>  {
> -   GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 };
> +   struct gl_buffer_object *pbo = NULL;
> +   GLuint pbo_tex = 0, fbos[2] = { 0, 0 };
> int image_height;
> struct gl_texture_image *pbo_tex_image;
> GLenum status;
> @@ -276,7 +281,7 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint 
> dims,
>  fail:
> _mesa_DeleteFramebuffers(2, fbos);
> _mesa_DeleteTextures(1, _tex);
> -   

Re: [Mesa-dev] [PATCH 23/23] meta: Don't save or restore the active client texture

2015-11-13 Thread Anuj Phogat
On Mon, Nov 9, 2015 at 4:56 PM, Ian Romanick  wrote:
> From: Ian Romanick 
>
> This setting is only used by glTexCoordPointer and related glEnable
> calls.  Since the preceeding commits removed all of those, it is not
> necessary to save, reset to default, or restore this state.
>
> Signed-off-by: Ian Romanick 
> ---
>  src/mesa/drivers/common/meta.c | 3 ---
>  src/mesa/drivers/common/meta.h | 1 -
>  2 files changed, 4 deletions(-)
>
> diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
> index b774d3c..5183648 100644
> --- a/src/mesa/drivers/common/meta.c
> +++ b/src/mesa/drivers/common/meta.c
> @@ -650,7 +650,6 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
>GLuint u, tgt;
>
>save->ActiveUnit = ctx->Texture.CurrentUnit;
> -  save->ClientActiveUnit = ctx->Array.ActiveTexture;
>save->EnvMode = ctx->Texture.Unit[0].EnvMode;
>
>/* Disable all texture units */
> @@ -683,7 +682,6 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
>
>/* set defaults for unit[0] */
>_mesa_ActiveTexture(GL_TEXTURE0);
> -  _mesa_ClientActiveTexture(GL_TEXTURE0);
>_mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
> }
>
> @@ -1110,7 +1108,6 @@ _mesa_meta_end(struct gl_context *ctx)
>
>/* restore current unit state */
>_mesa_ActiveTexture(GL_TEXTURE0 + save->ActiveUnit);
> -  _mesa_ClientActiveTexture(GL_TEXTURE0 + save->ClientActiveUnit);
> }
>
> if (state & MESA_META_TRANSFORM) {
> diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
> index 8121ed4..d2c22f0 100644
> --- a/src/mesa/drivers/common/meta.h
> +++ b/src/mesa/drivers/common/meta.h
> @@ -145,7 +145,6 @@ struct save_state
>
> /** MESA_META_TEXTURE */
> GLuint ActiveUnit;
> -   GLuint ClientActiveUnit;
> /** for unit[0] only */
> struct gl_texture_object *CurrentTexture[NUM_TEXTURE_TARGETS];
> /** mask of TEXTURE_2D_BIT, etc */
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] llvm TGSI backend (WIP) questions

2015-11-13 Thread Tom Stellard
On Fri, Nov 13, 2015 at 02:46:52PM +0100, Hans de Goede wrote:
> Hi All,
> 
> So as discussed I've started working on a TGSI backend for
> llvm to use as a way to get compute going on nouveau (and other gpu-s).
> 
> I'm still learning all the ins and outs of llvm so I do not have
> much to show yet.
> 
> I've rebased Francisco's (curro's) latest version on top of llvm
> trunk, and added a commit on top to actual get it build with the
> latest trunk. So currently I'm at the point where I've just
> taken Francisco's code, and made it compile, no more and no less.
> 
> I have a git repo with this work available here:
> 
> http://cgit.freedesktop.org/~jwrdegoede/llvm/
> 
> So the next step would be to test this and see if it actually
> does anything, questions:
> 
> 1) Does anyone have a simple test case / command where I can
> invoke just llvm and get TGSI asm output to check ?
> 

The easiest way to do this is with the llc tool which ships with llvm.
It compiles LLVM IR to target code, which in this case is tgsi.
I would recommend taking one of the simple examples from
test/CodeGen/AMDGPU (you may need to get these from llvm trunk, not sure
what llvm version you are using).

To use llc:

llc -march=tgsi input.ll -o -


This will output TGSI.


If you want to use clang to compile OpenCL C kernels to clang you will
need to teach clang about the TGSI target by implementing the a
sub-class of TargetInfo in lib/Basic/Targets.cpp.  Look at the 
AMDGPU target for examples, but I recommend starting with llc.

> 2) Assuming I get the above to (somewhat) work, is there a
> way to make llvm show the output of the various intermediate
> passes in a human readable form ?
> 

You can pass -print-before-all or -print-after-all to dump the
intermediate forms.

> Regards,
> 
> Hans
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/24] i965: Refactor register classes

2015-11-13 Thread Matt Turner
On Wed, Nov 11, 2015 at 3:20 PM, Kenneth Graunke  wrote:
> On Monday, November 02, 2015 04:29:10 PM Matt Turner wrote:
>> backend_reg (from which fs_reg, src_reg, and dst_reg inherit) includes a
>> brw_reg that's used for "hardware regs" -- precolored registers or 
>> architecture
>> registers. This leads to properties like source modifiers, the register type,
>> swizzles, and writemasks being duplicated between the derived classes and the
>> brw_reg and of course often being out of sync.
>>
>> This series removes the "fixed_hw_reg" field from backend_reg by just making
>> backend_reg inherit from brw_reg, and then removes fields duplicated in the
>> derived classes. In the process, it gets rid of HW_REG.
>>
>> This in turn simplifies a lot of code -- no longer do you have to check a
>> number of subfields if file == HW_REG.
>>
>> The last few patches begin some clean ups -- since the base of our register
>> classes is now brw_reg we don't need to do as many conversions. I've only
>> handled immediates so far and more is planned, but the series is growing 
>> large
>> and is a lot of churn already.
>>
>> The sizes of the register classes all shrink by 8 bytes:
>>
>>backend_reg   20 -> 12
>>fs_reg40 -> 32
>>src_reg   32 -> 24?
>>dst_reg   32 -> 24?
>>
>> The remaining fields in the classes are
>>
>>backend_reg: reg_offset
>>fs_reg:  reladdr, subreg_offset, stride
>>src_reg  reladdr
>>dst_reg  reladdr
>
> Assuming you address my and Emil's feedback, the series is:
>
> Reviewed-by: Kenneth Graunke 
>
> This is an invasive enough refactor that I believe running the
> assembly diffing tool would be worthwhile.

This is valuable, but was quite a pain. If we're looking for ideas for
useful tools, maybe this should be considered. It could be as simple
as writing the before and after assembly output to filenames based on
the name of the test execution, removing files that were identical and
leaving those with differences for manual inspection.

Anway, the differences are:

i965/vec4: Remove swizzle/writemask fields from src/dst_reg.
   We now use writemasks on null destinations (comparison instructions
with conditional mod)



Between that commit and "i965: Use BRW_MRF_COMPR4 macro in more
places.", presumably in "i965: Replace HW_REG with ARF/FIXED_GRF.", we
begin applying dependency hints on two instructions in
"bin/ext_transform_feedback-alignment" that are separated by a read of
the accumulator.



i965/fs: Replace fs_reg(imm) constructors with brw_imm_*().

Unexpected (but correct, and originally intended) type change from UD
to D in this hunk:

-   bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf80));
+   bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, brw_imm_d(0xbf80));



i965/vec4: Replace src_reg(imm) constructors with brw_imm_*().

Some intentional dst_null_d -> dst_null_ud changes where the sources
were UD (and one of them was being changed to brw_imm_ud).



I did notice throughout that

   bin/getteximage-formats -auto
   bin/getteximage-formats init-by-rendering -auto -fbo

have a MOV that would gain or lose a saturate modifier. I don't think
that's related to my series, but is worth investigating more (valgrind
didn't identify anything).
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/9] gallium: add the concept of batch queries

2015-11-13 Thread Samuel Pitoiset



On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

Some drivers (in particular radeon[si], but also freedreno judging from
a quick grep) may want to expose performance counters that cannot be
individually enabled or disabled.

Allow such drivers to mark driver-specific queries as requiring a new
type of batch query object that is used to start and stop a list of queries
simultaneously.
---
  src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  1 +
  src/gallium/include/pipe/p_context.h  |  3 +++
  src/gallium/include/pipe/p_defines.h  | 27 +--
  3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index a1d6162..0608337 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
 info->max_value.u64 = 0;
 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
 info->group_id = -1;
+   info->flags = 0;

  #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
 if (id < num_sw_queries)
diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index 27f358f..f122c74 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -115,6 +115,9 @@ struct pipe_context {
 struct pipe_query *(*create_query)( struct pipe_context *pipe,
 unsigned query_type,
 unsigned index );
+   struct pipe_query *(*create_batch_query)( struct pipe_context *pipe,
+ unsigned num_queries,
+ unsigned *query_types );


Could you please document that function like get_query_result()?



 void (*destroy_query)(struct pipe_context *pipe,
   struct pipe_query *q);
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7ed9f6d..b3c8b9f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -776,6 +776,16 @@ struct pipe_query_data_pipeline_statistics
  };

  /**
+ * For batch queries.
+ */
+union pipe_numeric_type_union
+{
+   uint64_t u64;
+   uint32_t u32;
+   float f;
+};
+
+/**
   * Query result (returned by pipe_context::get_query_result).
   */
  union pipe_query_result
@@ -811,6 +821,9 @@ union pipe_query_result

 /* PIPE_QUERY_PIPELINE_STATISTICS */
 struct pipe_query_data_pipeline_statistics pipeline_statistics;
+
+   /* batch queries */
+   union pipe_numeric_type_union batch[0];
  };

  union pipe_color_union
@@ -840,12 +853,13 @@ enum pipe_driver_query_result_type
 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1,
  };

-union pipe_numeric_type_union
-{
-   uint64_t u64;
-   uint32_t u32;
-   float f;
-};
+/**
+ * Some hardware requires some hardware-specific queries to be submitted
+ * as batched queries. The corresponding query objects are created using
+ * create_batch_query, and at most one such query may be active at
+ * any time.
+ */
+#define PIPE_DRIVER_QUERY_FLAG_BATCH (1 << 0)

  struct pipe_driver_query_info
  {
@@ -855,6 +869,7 @@ struct pipe_driver_query_info
 enum pipe_driver_query_type type;
 enum pipe_driver_query_result_type result_type;
 unsigned group_id;
+   unsigned flags;
  };

  struct pipe_driver_query_group_info



--
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/9] gallium/hud: remove unused field in query_info

2015-11-13 Thread Samuel Pitoiset

Reviewed-by: Samuel Pitoiset 


On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

---
  src/gallium/auxiliary/hud/hud_driver_query.c | 1 -
  1 file changed, 1 deletion(-)

diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index f14305e..3198ab3 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -48,7 +48,6 @@ struct query_info {
 /* Ring of queries. If a query is busy, we use another slot. */
 struct pipe_query *query[NUM_QUERIES];
 unsigned head, tail;
-   unsigned num_queries;

 uint64_t last_time;
 uint64_t results_cumulative;



--
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] st/mesa: map semantic driver query types to underlying type

2015-11-13 Thread Samuel Pitoiset



On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

---
  src/gallium/include/pipe/p_defines.h   | 2 ++
  src/mesa/state_tracker/st_cb_perfmon.c | 3 +++
  2 files changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7f241c8..7ed9f6d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -791,6 +791,8 @@ union pipe_query_result
 /* PIPE_QUERY_PRIMITIVES_GENERATED */
 /* PIPE_QUERY_PRIMITIVES_EMITTED */
 /* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+   /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */


When you are at it, please also add /* 
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */ to pipe_query_result.


With this minor change, this patch is:

Reviewed-by: Samuel Pitoiset 



 /* PIPE_DRIVER_QUERY_TYPE_HZ */
 uint64_t u64;

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 4ec6d86..dedb8f5 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -334,6 +334,9 @@ st_init_perfmon(struct st_context *st)
   c->Name = info.name;
   switch (info.type) {
  case PIPE_DRIVER_QUERY_TYPE_UINT64:
+case PIPE_DRIVER_QUERY_TYPE_BYTES:
+case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+case PIPE_DRIVER_QUERY_TYPE_HZ:
 c->Minimum.u64 = 0;
 c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1;
 c->Type = GL_UNSIGNED_INT64_AMD;



--
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 8/9] gallium/hud: add support for batch queries

2015-11-13 Thread Samuel Pitoiset

Some comments below.

On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

---
  src/gallium/auxiliary/hud/hud_context.c  |  24 ++-
  src/gallium/auxiliary/hud/hud_driver_query.c | 248 +++
  src/gallium/auxiliary/hud/hud_private.h  |  13 +-
  3 files changed, 240 insertions(+), 45 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index ffe30b8..bcef701 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -57,6 +57,7 @@ struct hud_context {
 struct cso_context *cso;
 struct u_upload_mgr *uploader;

+   struct hud_batch_query_context *batch_query;
 struct list_head pane_list;

 /* states */
@@ -510,6 +511,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
 hud_alloc_vertices(hud, >text, 4 * 512, 4 * sizeof(float));

 /* prepare all graphs */
+   hud_batch_query_update(hud->batch_query);
+
 LIST_FOR_EACH_ENTRY(pane, >pane_list, head) {
LIST_FOR_EACH_ENTRY(gr, >graph_list, head) {
   gr->query_new_value(gr);
@@ -903,17 +906,21 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
}
else if (strcmp(name, "samples-passed") == 0 &&
 has_occlusion_query(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "samples-passed",
+ hud_pipe_query_install(>batch_query, pane, hud->pipe,
+"samples-passed",
  PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
  PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
}
else if (strcmp(name, "primitives-generated") == 0 &&
 has_streamout(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
+ hud_pipe_query_install(>batch_query, pane, hud->pipe,
+"primitives-generated",
  PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
  PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
}
else {
   boolean processed = FALSE;
@@ -938,17 +945,19 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
 if (strcmp(name, pipeline_statistics_names[i]) == 0)
break;
  if (i < Elements(pipeline_statistics_names)) {
-   hud_pipe_query_install(pane, hud->pipe, name,
+   hud_pipe_query_install(>batch_query, pane, hud->pipe, name,
PIPE_QUERY_PIPELINE_STATISTICS, i,
0, PIPE_DRIVER_QUERY_TYPE_UINT64,
-  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+  0);
 processed = TRUE;
  }
   }

   /* driver queries */
   if (!processed) {
-if (!hud_driver_query_install(pane, hud->pipe, name)){
+if (!hud_driver_query_install(>batch_query, pane, hud->pipe,
+  name)) {
 fprintf(stderr, "gallium_hud: unknown driver query '%s'\n", 
name);
  }
   }
@@ -1287,6 +1296,7 @@ hud_destroy(struct hud_context *hud)
FREE(pane);
 }

+   hud_batch_query_cleanup(>batch_query);
 pipe->delete_fs_state(pipe, hud->fs_color);
 pipe->delete_fs_state(pipe, hud->fs_text);
 pipe->delete_vs_state(pipe, hud->vs);
diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index 3198ab3..abc9f54 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -34,13 +34,149 @@
  #include "hud/hud_private.h"
  #include "pipe/p_screen.h"
  #include "os/os_time.h"
+#include "util/u_math.h"
  #include "util/u_memory.h"
  #include 

+// Must be a power of two
  #define NUM_QUERIES 8

+struct hud_batch_query_context {
+   struct pipe_context *pipe;
+   unsigned num_query_types;
+   unsigned allocated_query_types;
+   unsigned *query_types;
+
+   boolean failed;
+   struct pipe_query *query[NUM_QUERIES];
+   union pipe_query_result *result[NUM_QUERIES];
+   unsigned head, pending, results;
+};
+
+void
+hud_batch_query_update(struct hud_batch_query_context *bq)
+{
+   struct pipe_context *pipe;
+
+   if (!bq || bq->failed)
+  return;
+
+   pipe = bq->pipe;
+
+   if (bq->query[bq->head])
+  pipe->end_query(pipe, bq->query[bq->head]);
+
+   

Re: [Mesa-dev] [PATCH 0/9] gallium: batch query objects and related cleanups

2015-11-13 Thread Samuel Pitoiset

Hi Nicolai,

Did you run amd_performance_monitor piglit tests to make sure all of 
your changes didn't break anything?


Did you test on nvc0 driver which is the only driver that currently 
exposes GL_AMD_performance_monitor? In case you didn't, I'll test it 
myself in the next few days. You might not have the hardware. :-)


Thanks.

On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

Hi,

the main point of this patch series is to introduce batch query objects.

For AMD_performance_monitor, hardware may not be able to start and stop
performance counters independently of each other. The current query interface
does not fit such hardware well.

With this series, drivers can mark driver-specific queries with the
PIPE_DRIVER_QUERY_FLAG_BATCH flag, which indicates that those queries require
the use of batch query objects. Batch query objects are created with an
immutable list of queries, which requires a new entry point in pipe_context,
but apart from that they use the same begin_query/end_query/etc. entry points.

The radeon-specific part that actually makes use of this feature is not quite
ready yet, but I already wanted to get this part out there for feedback.
Please review!

Thanks,
Nicolai
---
  gallium/auxiliary/hud/hud_context.c   |   24 ++
  gallium/auxiliary/hud/hud_driver_query.c  |  249 
+-
  gallium/auxiliary/hud/hud_private.h   |   13 +
  gallium/drivers/nouveau/nvc0/nvc0_query.c |4
  gallium/include/pipe/p_context.h  |3
  gallium/include/pipe/p_defines.h  |   36 ++--
  mesa/state_tracker/st_cb_perfmon.c|  247 -
  mesa/state_tracker/st_cb_perfmon.h|   32 +++
  mesa/state_tracker/st_context.h   |3
  9 files changed, 437 insertions(+), 174 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev



--
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Samuel Pitoiset



On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

This was only used to implement an unnecessarily restrictive interpretation
of the spec of AMD_performance_monitor. The spec says

   A performance monitor consists of a number of hardware and software
   counters that can be sampled by the GPU and reported back to the
   application.

I guess one could take this as a requirement that counters _must_ be sampled
by the GPU, but then why are they called _software_ counters? Besides,
there's not much reason _not_ to expose all counters that are available,
and this simplifies the code.


The spec says:

"
While BeginPerfMonitorAMD does mark the beginning of performance counter
collection, the counters do not begin collecting immediately.  Rather, 
the counters begin collection when BeginPerfMonitorAMD is processed by

the hardware.  That is, the API is asynchronous, and performance counter
collection does not begin until the graphics hardware processes the
BeginPerfMonitorAMD command.
"

This is why I introduced the notion of group of GPU counters in Gallium, 
because "processed by the hardware", "asynchronous" and "command" seem 
like the spec is talking about GPU only.


In which world, software counters are sampled by the GPU? :-)
This spec is definitely not clear about that...

Anyway, I disagree about this patch because :
1) we need to be agreed about what amd_performance_monitor must expose 
or not. Maybe it's time to ask the guys who wrote it?

2) this doesn't really simplify code.


---
  src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  3 ---
  src/gallium/include/pipe/p_defines.h  |  7 ---
  src/mesa/state_tracker/st_cb_perfmon.c| 30 ---
  3 files changed, 40 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f539210..a1d6162 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -200,7 +200,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
 if (id == NVC0_HW_SM_QUERY_GROUP) {
if (screen->compute) {
   info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;

   /* Because we can't expose the number of hardware counters needed for
* each different query, we don't want to allow more than one active
@@ -224,7 +223,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
if (screen->compute) {
   if (screen->base.class_3d < NVE4_3D_CLASS) {
  info->name = "Performance metrics";
-info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
  info->max_active_queries = 1;
  info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
  return 1;
@@ -234,7 +232,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
  #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
 else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
info->name = "Driver statistics";
-  info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
return 1;
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7240154..7f241c8 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -829,12 +829,6 @@ enum pipe_driver_query_type
 PIPE_DRIVER_QUERY_TYPE_HZ   = 6,
  };

-enum pipe_driver_query_group_type
-{
-   PIPE_DRIVER_QUERY_GROUP_TYPE_CPU = 0,
-   PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1,
-};
-
  /* Whether an average value per frame or a cumulative value should be
   * displayed.
   */
@@ -864,7 +858,6 @@ struct pipe_driver_query_info
  struct pipe_driver_query_group_info
  {
 const char *name;
-   enum pipe_driver_query_group_type type;
 unsigned max_active_queries;
 unsigned num_queries;
  };
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 1bb5be3..4ec6d86 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -65,27 +65,6 @@ find_query_type(struct pipe_screen *screen, const char *name)
 return type;
  }

-/**
- * Return TRUE if the underlying driver expose GPU counters.
- */
-static bool
-has_gpu_counters(struct pipe_screen *screen)
-{
-   int num_groups, gid;
-
-   num_groups = screen->get_driver_query_group_info(screen, 0, NULL);
-   for (gid = 0; gid < num_groups; gid++) {
-  struct pipe_driver_query_group_info group_info;
-
-  if (!screen->get_driver_query_group_info(screen, gid, _info))
- continue;
-
-  if (group_info.type == PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- return true;
-   }
-   return false;
-}
-
  static bool
  init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
  {
@@ -313,12 +292,6 @@ 

Re: [Mesa-dev] [PATCH 5/9] st/mesa: use BITSET_FOREACH_SET to loop through active perfmon counters

2015-11-13 Thread Samuel Pitoiset

Reviewed-by: Samuel Pitoiset 


On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

---
  src/mesa/state_tracker/st_cb_perfmon.c | 7 ++-
  1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 80ff170..ec12eb2 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -50,6 +50,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;

if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
   /* Maximum number of counters reached. Cannot start the session. */
@@ -60,14 +61,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
   return false;
}

-  for (cid = 0; cid < g->NumCounters; cid++) {
- const struct gl_perf_monitor_counter *c = >Counters[cid];
+  BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
   const struct st_perf_monitor_counter *stc = >counters[cid];
   struct st_perf_counter_object *cntr;

- if (!BITSET_TEST(m->ActiveCounters[gid], cid))
-continue;
-
   cntr = CALLOC_STRUCT(st_perf_counter_object);
   if (!cntr)
  return false;



--
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Convert scalar_* flags to a scalar_stage array.

2015-11-13 Thread Kenneth Graunke
On Friday, November 13, 2015 10:06:23 AM Pohjolainen, Topi wrote:
> On Thu, Nov 12, 2015 at 03:38:51PM -0800, Kenneth Graunke wrote:
> > I was going to add scalar_tcs and scalar_tes flags, and then thought
> > better of it and decided to convert this to an array.  Simpler.
> > 
> > Signed-off-by: Kenneth Graunke 
> > ---
> >  src/mesa/drivers/dri/i965/brw_compiler.h  |  3 +--
> >  src/mesa/drivers/dri/i965/brw_context.c   |  2 +-
> >  src/mesa/drivers/dri/i965/brw_gs.c|  3 ++-
> >  src/mesa/drivers/dri/i965/brw_link.cpp| 11 +---
> >  src/mesa/drivers/dri/i965/brw_program.c   |  3 ++-
> >  src/mesa/drivers/dri/i965/brw_shader.cpp  | 31 
> > ++-
> >  src/mesa/drivers/dri/i965/brw_shader.h|  2 --
> >  src/mesa/drivers/dri/i965/brw_vec4.cpp|  4 +--
> >  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  2 +-
> >  src/mesa/drivers/dri/i965/brw_vs.c|  7 ++---
> >  10 files changed, 28 insertions(+), 40 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h 
> > b/src/mesa/drivers/dri/i965/brw_compiler.h
> > index e3a26d6..3f54616 100644
> > --- a/src/mesa/drivers/dri/i965/brw_compiler.h
> > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
> > @@ -89,8 +89,7 @@ struct brw_compiler {
> > void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
> > void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
> >  
> > -   bool scalar_vs;
> > -   bool scalar_gs;
> > +   bool scalar_stage[MESA_SHADER_STAGES];
> > struct gl_shader_compiler_options 
> > glsl_compiler_options[MESA_SHADER_STAGES];
> >  };
> >  
> > diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
> > b/src/mesa/drivers/dri/i965/brw_context.c
> > index ac6045d..2db99c7 100644
> > --- a/src/mesa/drivers/dri/i965/brw_context.c
> > +++ b/src/mesa/drivers/dri/i965/brw_context.c
> > @@ -525,7 +525,7 @@ brw_initialize_context_constants(struct brw_context 
> > *brw)
> >ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms =
> >   BRW_MAX_IMAGES;
> >ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms =
> > - (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0);
> > + (brw->intelScreen->compiler->scalar_stage[MESA_SHADER_VERTEX] ? 
> > BRW_MAX_IMAGES : 0);
> >ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms =
> >   BRW_MAX_IMAGES;
> >ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
> > diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
> > b/src/mesa/drivers/dri/i965/brw_gs.c
> > index ed0890f..ad5b242 100644
> > --- a/src/mesa/drivers/dri/i965/brw_gs.c
> > +++ b/src/mesa/drivers/dri/i965/brw_gs.c
> > @@ -87,7 +87,8 @@ brw_codegen_gs_prog(struct brw_context *brw,
> > prog_data.base.base.nr_image_params = gs->NumImages;
> >  
> > brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, 
> > >program.Base,
> > -   _data.base.base, compiler->scalar_gs);
> > +   _data.base.base,
> > +   
> > compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
> >  
> > GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
> >  
> > diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
> > b/src/mesa/drivers/dri/i965/brw_link.cpp
> > index 2991173..14421d4 100644
> > --- a/src/mesa/drivers/dri/i965/brw_link.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_link.cpp
> > @@ -66,12 +66,14 @@ brw_lower_packing_builtins(struct brw_context *brw,
> > gl_shader_stage shader_type,
> > exec_list *ir)
> >  {
> > +   const struct brw_compiler *compiler = brw->intelScreen->compiler;
> > +
> > int ops = LOWER_PACK_SNORM_2x16
> > | LOWER_UNPACK_SNORM_2x16
> > | LOWER_PACK_UNORM_2x16
> > | LOWER_UNPACK_UNORM_2x16;
> >  
> > -   if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
> > +   if (compiler->scalar_stage[shader_type]) {
> >ops |= LOWER_UNPACK_UNORM_4x8
> > | LOWER_UNPACK_SNORM_4x8
> > | LOWER_PACK_UNORM_4x8
> > @@ -84,7 +86,7 @@ brw_lower_packing_builtins(struct brw_context *brw,
> > * lowering is needed. For SOA code, the Half2x16 ops must be
> > * scalarized.
> > */
> > -  if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) 
> > {
> > +  if (compiler->scalar_stage[shader_type]) {
> >   ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
> >   |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
> >}
> > @@ -103,6 +105,7 @@ process_glsl_ir(gl_shader_stage stage,
> >  struct gl_shader *shader)
> >  {
> > struct gl_context *ctx = >ctx;
> > +   const struct brw_compiler *compiler = brw->intelScreen->compiler;
> > const struct gl_shader_compiler_options *options =
> >

Re: [Mesa-dev] [PATCH 0/9] gallium: batch query objects and related cleanups

2015-11-13 Thread Nicolai Hähnle

Hi Samuel,

thanks for taking a look!

On 13.11.2015 18:35, Samuel Pitoiset wrote:

Did you run amd_performance_monitor piglit tests to make sure all of
your changes didn't break anything?


Yes, everything passes here.



Did you test on nvc0 driver which is the only driver that currently
exposes GL_AMD_performance_monitor? In case you didn't, I'll test it
myself in the next few days. You might not have the hardware. :-)


Sorry, I don't have the hardware.

Thanks,
Nicolai



Thanks.

On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

Hi,

the main point of this patch series is to introduce batch query objects.

For AMD_performance_monitor, hardware may not be able to start and stop
performance counters independently of each other. The current query
interface
does not fit such hardware well.

With this series, drivers can mark driver-specific queries with the
PIPE_DRIVER_QUERY_FLAG_BATCH flag, which indicates that those queries
require
the use of batch query objects. Batch query objects are created with an
immutable list of queries, which requires a new entry point in
pipe_context,
but apart from that they use the same begin_query/end_query/etc. entry
points.

The radeon-specific part that actually makes use of this feature is
not quite
ready yet, but I already wanted to get this part out there for feedback.
Please review!

Thanks,
Nicolai
---
  gallium/auxiliary/hud/hud_context.c   |   24 ++
  gallium/auxiliary/hud/hud_driver_query.c  |  249
+-
  gallium/auxiliary/hud/hud_private.h   |   13 +
  gallium/drivers/nouveau/nvc0/nvc0_query.c |4
  gallium/include/pipe/p_context.h  |3
  gallium/include/pipe/p_defines.h  |   36 ++--
  mesa/state_tracker/st_cb_perfmon.c|  247
-
  mesa/state_tracker/st_cb_perfmon.h|   32 +++
  mesa/state_tracker/st_context.h   |3
  9 files changed, 437 insertions(+), 174 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 92706] glBlitFramebuffer refuses to blit RGBA to RGB with MSAA

2015-11-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=92706

--- Comment #5 from EoD  ---
Is there any reason this is not getting merged? Is there any way to help
getting it merged?

It would be great to fix the aforementioned bugs before an 11.1 release.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] st/mesa: map semantic driver query types to underlying type

2015-11-13 Thread Samuel Pitoiset



On 11/13/2015 07:22 PM, Nicolai Hähnle wrote:

On 13.11.2015 18:34, Samuel Pitoiset wrote:



On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

---
  src/gallium/include/pipe/p_defines.h   | 2 ++
  src/mesa/state_tracker/st_cb_perfmon.c | 3 +++
  2 files changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_defines.h
b/src/gallium/include/pipe/p_defines.h
index 7f241c8..7ed9f6d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -791,6 +791,8 @@ union pipe_query_result
 /* PIPE_QUERY_PRIMITIVES_GENERATED */
 /* PIPE_QUERY_PRIMITIVES_EMITTED */
 /* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+   /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */


When you are at it, please also add /*
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */ to pipe_query_result.


Sorry, I don't understand. Isn't that what I'm doing here?


Hey sorry, my brain wasn't here when I wrote that comment. :)



Cheers,
Nicolai


With this minor change, this patch is:

Reviewed-by: Samuel Pitoiset 



 /* PIPE_DRIVER_QUERY_TYPE_HZ */
 uint64_t u64;

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c
b/src/mesa/state_tracker/st_cb_perfmon.c
index 4ec6d86..dedb8f5 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -334,6 +334,9 @@ st_init_perfmon(struct st_context *st)
   c->Name = info.name;
   switch (info.type) {
  case PIPE_DRIVER_QUERY_TYPE_UINT64:
+case PIPE_DRIVER_QUERY_TYPE_BYTES:
+case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+case PIPE_DRIVER_QUERY_TYPE_HZ:
 c->Minimum.u64 = 0;
 c->Maximum.u64 = info.max_value.u64 ?
info.max_value.u64 : -1;
 c->Type = GL_UNSIGNED_INT64_AMD;






___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/9] gallium: batch query objects and related cleanups

2015-11-13 Thread Samuel Pitoiset



On 11/13/2015 07:29 PM, Nicolai Hähnle wrote:

Hi Samuel,

thanks for taking a look!

On 13.11.2015 18:35, Samuel Pitoiset wrote:

Did you run amd_performance_monitor piglit tests to make sure all of
your changes didn't break anything?


Yes, everything passes here.


Looks good. Thanks.





Did you test on nvc0 driver which is the only driver that currently
exposes GL_AMD_performance_monitor? In case you didn't, I'll test it
myself in the next few days. You might not have the hardware. :-)


Sorry, I don't have the hardware.


That's not a problem, I'll test on NVIDIA GPUs.



Thanks,
Nicolai



Thanks.

On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

Hi,

the main point of this patch series is to introduce batch query objects.

For AMD_performance_monitor, hardware may not be able to start and stop
performance counters independently of each other. The current query
interface
does not fit such hardware well.

With this series, drivers can mark driver-specific queries with the
PIPE_DRIVER_QUERY_FLAG_BATCH flag, which indicates that those queries
require
the use of batch query objects. Batch query objects are created with an
immutable list of queries, which requires a new entry point in
pipe_context,
but apart from that they use the same begin_query/end_query/etc. entry
points.

The radeon-specific part that actually makes use of this feature is
not quite
ready yet, but I already wanted to get this part out there for feedback.
Please review!

Thanks,
Nicolai
---
  gallium/auxiliary/hud/hud_context.c   |   24 ++
  gallium/auxiliary/hud/hud_driver_query.c  |  249
+-
  gallium/auxiliary/hud/hud_private.h   |   13 +
  gallium/drivers/nouveau/nvc0/nvc0_query.c |4
  gallium/include/pipe/p_context.h  |3
  gallium/include/pipe/p_defines.h  |   36 ++--
  mesa/state_tracker/st_cb_perfmon.c|  247
-
  mesa/state_tracker/st_cb_perfmon.h|   32 +++
  mesa/state_tracker/st_context.h   |3
  9 files changed, 437 insertions(+), 174 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev






___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Samuel Pitoiset



On 11/13/2015 07:23 PM, Nicolai Hähnle wrote:

On 13.11.2015 18:35, Samuel Pitoiset wrote:

On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

This was only used to implement an unnecessarily restrictive
interpretation
of the spec of AMD_performance_monitor. The spec says

   A performance monitor consists of a number of hardware and software
   counters that can be sampled by the GPU and reported back to the
   application.

I guess one could take this as a requirement that counters _must_ be
sampled
by the GPU, but then why are they called _software_ counters? Besides,
there's not much reason _not_ to expose all counters that are available,
and this simplifies the code.


The spec says:

"
While BeginPerfMonitorAMD does mark the beginning of performance counter
collection, the counters do not begin collecting immediately.  Rather,
the counters begin collection when BeginPerfMonitorAMD is processed by
the hardware.  That is, the API is asynchronous, and performance counter
collection does not begin until the graphics hardware processes the
BeginPerfMonitorAMD command.
"


Right. I interpreted this as the authors' attempt to say that the
counting happens in what other parts of OpenGL traditionally call "the
server", i.e. the Begin/EndPerfMonitorAMD commands can be used to
bracket draw calls in the way you'd usually expect, in the same way that
e.g. changing the DepthFunc only affects rendering once the graphics
hardware "processes the DepthFunc command".



This is why I introduced the notion of group of GPU counters in Gallium,
because "processed by the hardware", "asynchronous" and "command" seem
like the spec is talking about GPU only.

In which world, software counters are sampled by the GPU? :-)
This spec is definitely not clear about that...

Anyway, I disagree about this patch because :
1) we need to be agreed about what amd_performance_monitor must expose
or not. Maybe it's time to ask the guys who wrote it?


Well, Catalyst exposes only hardware counters in
AMD_performance_monitor. But that's beside the point.

The real point is that the driver_query_group stuff is *only* used for
AMD_performance_monitor. So it makes no sense that a driver would ever
expose a driver_query_group that was not intended to be exposed via that
extension.

I understand that the group_type was added with good intentions. I might
have done the same. But in over a year (judging by the commit dates), no
other use case for driver_query_groups has come up.

So really, this is a question for everybody who cares about nouveau,
because nouveau is the only driver that (if a #define is enabled)
advertises a CPU driver_query_group.

Do you want that group to be accessible via AMD_performance_monitor?
Then be happy with this patch. Do you not want that group to be so
accessible? Then just remove it, because it serves no purpose either way.



My intention was to respect what I understood about that spec, but I 
must admit that you convinced me. :-)


You're right that the only SW queries group is *only* enabled when mesa 
is build in debug mode. So, this is really a minor issue.


I think I can live without those groups of queries, but I'll have a 
deeper look just to make sure.


Thanks!




2) this doesn't really simplify code.


The patch only removes LOCs, so I find that a weird argument ;)

Cheers,
Nicolai




---
  src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  3 ---
  src/gallium/include/pipe/p_defines.h  |  7 ---
  src/mesa/state_tracker/st_cb_perfmon.c| 30
---
  3 files changed, 40 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f539210..a1d6162 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -200,7 +200,6 @@ nvc0_screen_get_driver_query_group_info(struct
pipe_screen *pscreen,
 if (id == NVC0_HW_SM_QUERY_GROUP) {
if (screen->compute) {
   info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;

   /* Because we can't expose the number of hardware counters
needed for
* each different query, we don't want to allow more than
one active
@@ -224,7 +223,6 @@ nvc0_screen_get_driver_query_group_info(struct
pipe_screen *pscreen,
if (screen->compute) {
   if (screen->base.class_3d < NVE4_3D_CLASS) {
  info->name = "Performance metrics";
-info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
  info->max_active_queries = 1;
  info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
  return 1;
@@ -234,7 +232,6 @@ nvc0_screen_get_driver_query_group_info(struct
pipe_screen *pscreen,
  #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
 else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
info->name = "Driver statistics";
-  info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
info->max_active_queries = 

Re: [Mesa-dev] [PATCH 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Samuel Pitoiset



On 11/13/2015 07:27 PM, Ilia Mirkin wrote:

On Fri, Nov 13, 2015 at 1:23 PM, Nicolai Hähnle  wrote:

So really, this is a question for everybody who cares about nouveau, because
nouveau is the only driver that (if a #define is enabled) advertises a CPU
driver_query_group.

Do you want that group to be accessible via AMD_performance_monitor? Then be
happy with this patch. Do you not want that group to be so accessible? Then
just remove it, because it serves no purpose either way.


There's also the HUD, and Samuel's WIP NVIDIA PerfKit-style library impl.


As Nicolai said, the HUD doesn't care about those groups, as well as my 
NVIDIA PerfKit implementation.




   -ilia


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 10/11] i965/fs: Stomp the texture return type to UINT32

2015-11-13 Thread Kenneth Graunke
On Friday, November 13, 2015 10:14:56 AM Jason Ekstrand wrote:
> Cc: Kenneth Graunke 
> 
> 
> ---
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index 4877504..61c63d4 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -690,6 +690,17 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
> dst, struct brw_reg src
>break;
> }
>  
> +   /* Stomp the resinfo output type to UINT32.  On gens 4-5, the output type
> +* is set as part of the message descriptor.  On gen4, the PRM seems to
> +* allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on
> +* later gens UINT32 is required.  Once you hit Sandy Bridge, the bit is
> +* gone from the message descriptor entirely and you just get UINT32 all
> +* the time regasrdless.  Since we can really only do non-UINT32 on gen4,
> +* just stomp it to UINT32 all the time.
> +*/
> +   if (inst->opcode == SHADER_OPCODE_TXS)
> +  return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
> +
> switch (inst->exec_size) {
> case 8:
>simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
> 

I like this one better.

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v5 5/7] glsl: Add precision information to ir_variable

2015-11-13 Thread Ilia Mirkin
Looks like valgrind hates this for some reason. I'm seeing lots of

==16821== Conditional jump or move depends on uninitialised value(s)
==16821==at 0xA074D09: glsl_type::record_compare(glsl_type const*)
const (glsl_types.cpp:783)

Where line 783 is:

  if (this->fields.structure[i].precision
  != b->fields.structure[i].precision)

This happens with the trace from
https://bugs.freedesktop.org/show_bug.cgi?id=92229 but I suspect it
happens with just about anything with structs.

  -ilia


On Wed, Nov 11, 2015 at 7:45 AM, Samuel Iglesias Gonsálvez
 wrote:
> Reviewed-by: Samuel Iglesias Gonsálvez 
>
> On 06/11/15 13:03, Tapani Pälli wrote:
>> From: Iago Toral Quiroga 
>>
>> We will need this later on when we implement proper support for
>> precision qualifiers in the drivers and also to do link time checks for
>> uniforms as indicated by the spec.
>>
>> This patch also adds compile-time checks for variables without precision
>> information (currently, Mesa only checks that a default precision is set
>> for floats in fragment shaders).
>>
>> As indicated by Ian, the addition of the precision information to
>> ir_variable has been done using a bitfield and pahole to identify an
>> available hole so that memory requirements for ir_variable stay the
>> same.
>>
>> v2 (Ian):
>>   - Avoid if-ladders by defining arrays of supported sampler names and
>> indexing
>> into them with type->sampler_array + 2 * type->sampler_shadow
>>   - Make the code that selects the precision qualifier to use an utility
>> function
>>   - Fix a typo
>>
>> v3 (Tapani):
>>   - rebased
>>   - squashed in "Precision qualifiers are not allowed on structs"
>>   - fixed select_gles_precision for sampler arrays
>>   - fixed precision_qualifier_allowed for arrays of structs
>>
>> v4 (Tapani):
>>   - add atomic_uint handling
>>   - do not allow precision qualifier on images
>>   (issues reported by Marta)
>>
>> v5 (Tapani):
>>   - support precision qualifier on image types
>> ---
>>  src/glsl/ast_to_hir.cpp | 296 
>> 
>>  src/glsl/ir.h   |  13 ++
>>  src/glsl/nir/glsl_types.cpp |   4 +
>>  src/glsl/nir/glsl_types.h   |  11 ++
>>  4 files changed, 301 insertions(+), 23 deletions(-)
>>
>> diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
>> index b6d662b..1240615 100644
>> --- a/src/glsl/ast_to_hir.cpp
>> +++ b/src/glsl/ast_to_hir.cpp
>> @@ -2189,10 +2189,10 @@ precision_qualifier_allowed(const glsl_type *type)
>>  * From this, we infer that GLSL 1.30 (and later) should allow precision
>>  * qualifiers on sampler types just like float and integer types.
>>  */
>> -   return type->is_float()
>> +   return (type->is_float()
>> || type->is_integer()
>> -   || type->is_record()
>> -   || type->contains_opaque();
>> +   || type->contains_opaque())
>> +   && !type->without_array()->is_record();
>>  }
>>
>>  const glsl_type *
>> @@ -2210,31 +2210,268 @@ ast_type_specifier::glsl_type(const char **name,
>> return type;
>>  }
>>
>> -const glsl_type *
>> -ast_fully_specified_type::glsl_type(const char **name,
>> -struct _mesa_glsl_parse_state *state) 
>> const
>> +/**
>> + * From the OpenGL ES 3.0 spec, 4.5.4 Default Precision Qualifiers:
>> + *
>> + * "The precision statement
>> + *
>> + *precision precision-qualifier type;
>> + *
>> + *  can be used to establish a default precision qualifier. The type field 
>> can
>> + *  be either int or float or any of the sampler types, (...) If type is 
>> float,
>> + *  the directive applies to non-precision-qualified floating point type
>> + *  (scalar, vector, and matrix) declarations. If type is int, the directive
>> + *  applies to all non-precision-qualified integer type (scalar, vector, 
>> signed,
>> + *  and unsigned) declarations."
>> + *
>> + * We use the symbol table to keep the values of the default precisions for
>> + * each 'type' in each scope and we use the 'type' string from the precision
>> + * statement as key in the symbol table. When we want to retrieve the 
>> default
>> + * precision associated with a given glsl_type we need to know the type 
>> string
>> + * associated with it. This is what this function returns.
>> + */
>> +static const char *
>> +get_type_name_for_precision_qualifier(const glsl_type *type)
>>  {
>> -   const struct glsl_type *type = this->specifier->glsl_type(name, state);
>> -
>> -   if (type == NULL)
>> -  return NULL;
>> +   switch (type->base_type) {
>> +   case GLSL_TYPE_FLOAT:
>> +  return "float";
>> +   case GLSL_TYPE_UINT:
>> +   case GLSL_TYPE_INT:
>> +  return "int";
>> +   case GLSL_TYPE_ATOMIC_UINT:
>> +  return "atomic_uint";
>> +   case GLSL_TYPE_IMAGE:
>> +   /* fallthrough */
>> +   case GLSL_TYPE_SAMPLER: {
>> +  const unsigned type_idx =
>> + type->sampler_array + 2 * type->sampler_shadow;

Re: [Mesa-dev] [PATCH] nir/glsl_to_nir: use _mesa_fls() to compute num_textures

2015-11-13 Thread Jason Ekstrand
On Fri, Nov 13, 2015 at 7:44 AM, Juan A. Suarez Romero
 wrote:
> On Fri, 2015-11-13 at 07:37 -0800, Jason Ekstrand wrote:
>> I didn't want to pull a non-inline mesa function into NIR and add a
>> link dependency and I was too lazy to move it into util.
>
>
> But at this moment _mesa_fls() is an inline function. So I guess it is
> safe to push it, isn't it?

Sure, go ahead.  I'd like to see it moved to util eventually, but it
doesn't hurt anything and it fixes a bug.
--Jason
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] llvm TGSI backend (WIP) questions

2015-11-13 Thread Samuel Pitoiset



On 11/13/2015 02:46 PM, Hans de Goede wrote:

Hi All,


Hey Hans,



So as discussed I've started working on a TGSI backend for
llvm to use as a way to get compute going on nouveau (and other gpu-s).

I'm still learning all the ins and outs of llvm so I do not have
much to show yet.

I've rebased Francisco's (curro's) latest version on top of llvm
trunk, and added a commit on top to actual get it build with the
latest trunk. So currently I'm at the point where I've just
taken Francisco's code, and made it compile, no more and no less.

I have a git repo with this work available here:

http://cgit.freedesktop.org/~jwrdegoede/llvm/


Thanks for sharing your work. :-)



So the next step would be to test this and see if it actually
does anything, questions:

1) Does anyone have a simple test case / command where I can
invoke just llvm and get TGSI asm output to check ?

2) Assuming I get the above to (somewhat) work, is there a
way to make llvm show the output of the various intermediate
passes in a human readable form ?


Basically, you need to ask Clang to emit LLVM code for you, for example, 
this command will emit LLVM IR:


clang -cc1 -cl-std=CL1.2 -emit-llvm -triple spir64-unknown-unknown kernel.cl

Note that this command only works with an old LLVM version (I don't 
remember exactly).


But in your case, and for that TGSI backend, I don't think there is a 
-emit-tgsi option which can directly output TGSI from OpenCL.


The other way, and in my opinion the best, is to write a little C++ 
program based on Clang/LLVM API for generating TGSI code. To do that,
you can have a look at 
src/gallium/state_trackers/clover/llvm/invocation.cpp which contains an 
example (but it seems to be outdated).


Basically, you need to call that CompilerInvocation object with some 
parameters and all the stuff around. This should not take more than 
100LOC in my opinion. I think the first step should be to emit LLVM IR 
before trying to get TGSI working.


I could write that program for you if you want but I don't think to have 
time to do it during this weekend.


Thanks.



Regards,

Hans


--
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] i965: Handle lum, intensity and missing components in the fast clear

2015-11-13 Thread Ben Widawsky
On Thu, Nov 12, 2015 at 04:54:08PM +0100, Neil Roberts wrote:
> It looks like the sampler hardware doesn't take into account the
> surface format when sampling a cleared color after a fast clear has
> been done. So for example if you clear a GL_RED surface to 1,1,1,1
> then the sampling instructions will return 1,1,1,1 instead of 1,0,0,1.
> This patch makes it override the color that is programmed in the
> surface state in order to swizzle for luminance and intensity as well
> as overriding the missing components.
> 
> v2: Handle luminance and intensity formats
> ---
> 
> I made a more extensive test case which tests all of the formats in
> fbo_formats.h as well as using more than one test color here:
> 
> http://patchwork.freedesktop.org/patch/64578/
> 
> In the process I noticed that there is a similar problem with
> luminance and intensity textures so here is a v2 to cope with that.
> 
> I've made another version of this patch which is rebased on top of
> Ben's skl-fast-clear branch so we can take whichever version depending
> on which lands first:
> 
> https://github.com/bpeel/mesa/commit/da7edcb6dfd93c7dd86b2e148c44dff7
> 
>  src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 38 
> +++--
>  1 file changed, 35 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c 
> b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
> index 69fe7b4..3071590 100644
> --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
> +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
> @@ -361,12 +361,43 @@ is_color_fast_clear_compatible(struct brw_context *brw,
>   * SURFACE_STATE.
>   */
>  static uint32_t
> -compute_fast_clear_color_bits(const union gl_color_union *color)
> +compute_fast_clear_color_bits(mesa_format format,
> +  const union gl_color_union *color)
>  {
> +   union gl_color_union override_color = *color;
> uint32_t bits = 0;
> +
> +   /* The sampler doesn't look at the format of the surface when the fast
> +* clear color is used so we need to implement luminance, intensity and
> +* missing components manually.
> +*/
> +   switch (_mesa_get_format_base_format(format)) {
> +   case GL_INTENSITY:
> +  override_color.ui[3] = override_color.ui[0];
> +  /* flow through */
> +   case GL_LUMINANCE:
> +   case GL_LUMINANCE_ALPHA:
> +  override_color.ui[1] = override_color.ui[0];
> +  override_color.ui[2] = override_color.ui[0];
> +  break;

The definition for GL_LUMINANCE afaict:
"Each element is a single luminance value.  The GL converts it to
floating point, then assembles it into an RGBA element by replicating the
luminance value three times for red, green, and blue and attaching 1 for alpha.
Each component is then multiplied by the signed scale factor GL_c_SCALE, added
to the signed bias GL_c_BIAS, and clamped to the range [0,1] (see
glPixelTransfer)."

doesn't that mean you need
override_color.f[3] = 1.0f;

> +   default:
> +  for (int i = 0; i < 3; i++) {
> + if (!_mesa_format_has_color_component(format, i))
> +override_color.ui[i] = 0;
> +  }

Is there an easy way to verify that all formats want 0 for GB channels? It looks
right to me, but with my knowledge of GL, that doesn't mean much (I am looking
here: https://www.opengl.org/sdk/docs/man/html/glTexImage2D.xhtml)

I also think that component 0 must always have a color, right? (I'm not
requesting a change as such, just making sure my understanding of what you're
trying to do is correct).

> +  break;
> +   }
> +
> +   if (!_mesa_format_has_color_component(format, 3)) {
> +  if (_mesa_is_format_integer_color(format))
> + override_color.ui[3] = 1;

We shouldn't ever be fast clearing integer formats. We can on GEN8+, but we're
not doing it today. So I think it should be safe to remove this check.

> +  else
> + override_color.f[3] = 1.0f;
> +   }
> +
> for (int i = 0; i < 4; i++) {
>/* Testing for non-0 works for integer and float colors */
> -  if (color->f[i] != 0.0f)
> +  if (override_color.f[i] != 0.0f)
>   bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
> }
> return bits;
> @@ -505,7 +536,8 @@ brw_meta_fast_clear(struct brw_context *brw, struct 
> gl_framebuffer *fb,
>switch (clear_type) {
>case FAST_CLEAR:
>   irb->mt->fast_clear_color_value =
> -compute_fast_clear_color_bits(>Color.ClearColor);
> +compute_fast_clear_color_bits(irb->mt->format,
> +  >Color.ClearColor);
>   irb->need_downsample = true;
>  
>   /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the

Seems like a good patch to me. It would probably be nice to track down a good
spec reference if you manage to find one. I know I've seen such reference in SKL
docs (which aren't SKL specific) - but I am having trouble finding it in PRMs.
My VPN is broken, so I 

Re: [Mesa-dev] [PATCH 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Nicolai Hähnle

On 13.11.2015 19:27, Ilia Mirkin wrote:

On Fri, Nov 13, 2015 at 1:23 PM, Nicolai Hähnle  wrote:

So really, this is a question for everybody who cares about nouveau, because
nouveau is the only driver that (if a #define is enabled) advertises a CPU
driver_query_group.

Do you want that group to be accessible via AMD_performance_monitor? Then be
happy with this patch. Do you not want that group to be so accessible? Then
just remove it, because it serves no purpose either way.


There's also the HUD, and Samuel's WIP NVIDIA PerfKit-style library impl.


The HUD doesn't care about groups. If Samuel really cares about this for 
his library (which I haven't seen - where is it?), I can drop this patch.


Cheers,
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 17/23] meta: Partially convert _mesa_meta_DrawTex to DSA

2015-11-13 Thread Anuj Phogat
On Mon, Nov 9, 2015 at 4:56 PM, Ian Romanick  wrote:
> From: Ian Romanick 
>
> Signed-off-by: Ian Romanick 
> ---
>  src/mesa/drivers/common/meta.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
> index 8249eda..1bf3d52 100644
> --- a/src/mesa/drivers/common/meta.c
> +++ b/src/mesa/drivers/common/meta.c
> @@ -3305,14 +3305,15 @@ _mesa_meta_DrawTex(struct gl_context *ctx, GLfloat x, 
> GLfloat y, GLfloat z,
>_mesa_BindVertexArray(drawtex->VAO);
>
>/* create vertex array buffer */
> -  _mesa_GenBuffers(1, >VBO);
> -  _mesa_BindBuffer(GL_ARRAY_BUFFER_ARB, drawtex->VBO);
> -  _mesa_BufferData(GL_ARRAY_BUFFER_ARB, sizeof(verts),
> -  NULL, GL_DYNAMIC_DRAW_ARB);
> +  _mesa_CreateBuffers(1, >VBO);
> +  _mesa_NamedBufferData(drawtex->VBO, sizeof(verts),
> +NULL, GL_DYNAMIC_DRAW_ARB);
>
>/* client active texture is not part of the array object */
>active_texture = ctx->Array.ActiveTexture;
>
> +  _mesa_BindBuffer(GL_ARRAY_BUFFER_ARB, drawtex->VBO);
> +
>/* setup vertex arrays */
>_mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
>_mesa_EnableClientState(GL_VERTEX_ARRAY);
> @@ -3327,7 +3328,6 @@ _mesa_meta_DrawTex(struct gl_context *ctx, GLfloat x, 
> GLfloat y, GLfloat z,
> }
> else {
>_mesa_BindVertexArray(drawtex->VAO);
> -  _mesa_BindBuffer(GL_ARRAY_BUFFER_ARB, drawtex->VBO);
> }
>
> /* vertex positions, texcoords */
> @@ -3392,7 +3392,7 @@ _mesa_meta_DrawTex(struct gl_context *ctx, GLfloat x, 
> GLfloat y, GLfloat z,
>   verts[3].st[i][1] = t1;
>}
>
> -  _mesa_BufferSubData(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
> +  _mesa_NamedBufferSubData(drawtex->VBO, 0, sizeof(verts), verts);
> }
>
> _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18/23] meta: Track VBO using gl_buffer_object instead of GL API object handle in _mesa_meta_DrawTex

2015-11-13 Thread Anuj Phogat
On Mon, Nov 9, 2015 at 4:56 PM, Ian Romanick  wrote:
> From: Ian Romanick 
>
> Signed-off-by: Ian Romanick 
> ---
>  src/mesa/drivers/common/meta.c | 23 ++-
>  src/mesa/drivers/common/meta.h |  2 +-
>  2 files changed, 19 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
> index 1bf3d52..6927ae9 100644
> --- a/src/mesa/drivers/common/meta.c
> +++ b/src/mesa/drivers/common/meta.c
> @@ -3299,20 +3299,32 @@ _mesa_meta_DrawTex(struct gl_context *ctx, GLfloat x, 
> GLfloat y, GLfloat z,
> if (drawtex->VAO == 0) {
>/* one-time setup */
>GLint active_texture;
> +  GLuint VBO;
>
>/* create vertex array object */
>_mesa_GenVertexArrays(1, >VAO);
>_mesa_BindVertexArray(drawtex->VAO);
>
>/* create vertex array buffer */
> -  _mesa_CreateBuffers(1, >VBO);
> -  _mesa_NamedBufferData(drawtex->VBO, sizeof(verts),
> -NULL, GL_DYNAMIC_DRAW_ARB);
> +  _mesa_CreateBuffers(1, );
> +  drawtex->buf_obj = _mesa_lookup_bufferobj(ctx, VBO);
> +
> +  /* _mesa_lookup_bufferobj only returns NULL if name is 0.  If the 
> object
> +   * does not yet exist (i.e., hasn't been bound) it will return a dummy
> +   * object that you can't do anything with.
> +   */
> +  assert(drawtex->buf_obj != NULL && (drawtex->buf_obj)->Name == VBO);
> +  assert(drawtex->buf_obj == ctx->Array.ArrayBufferObj);
> +
> +  _mesa_buffer_data(ctx, drawtex->buf_obj, GL_NONE, sizeof(verts), verts,
> +GL_DYNAMIC_DRAW, __func__);
> +
> +  assert(drawtex->buf_obj->Size == sizeof(verts));
>
>/* client active texture is not part of the array object */
>active_texture = ctx->Array.ActiveTexture;
>
> -  _mesa_BindBuffer(GL_ARRAY_BUFFER_ARB, drawtex->VBO);
> +  _mesa_BindBuffer(GL_ARRAY_BUFFER_ARB, VBO);
>
>/* setup vertex arrays */
>_mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
> @@ -3392,7 +3404,8 @@ _mesa_meta_DrawTex(struct gl_context *ctx, GLfloat x, 
> GLfloat y, GLfloat z,
>   verts[3].st[i][1] = t1;
>}
>
> -  _mesa_NamedBufferSubData(drawtex->VBO, 0, sizeof(verts), verts);
> +  _mesa_buffer_sub_data(ctx, drawtex->buf_obj, 0, sizeof(verts), verts,
> +__func__);
> }
>
> _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
> diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
> index 9ce5d12..4d25957 100644
> --- a/src/mesa/drivers/common/meta.h
> +++ b/src/mesa/drivers/common/meta.h
> @@ -405,7 +405,7 @@ struct decompress_state
>  struct drawtex_state
>  {
> GLuint VAO;
> -   GLuint VBO;
> +   struct gl_buffer_object *buf_obj;
>  };
>
>  #define MAX_META_OPS_DEPTH  8
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/9] st/mesa: map semantic driver query types to underlying type

2015-11-13 Thread Nicolai Hähnle
Reviewed-by: Samuel Pitoiset 
---
 src/gallium/include/pipe/p_defines.h   | 2 ++
 src/mesa/state_tracker/st_cb_perfmon.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7f241c8..7ed9f6d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -791,6 +791,8 @@ union pipe_query_result
/* PIPE_QUERY_PRIMITIVES_GENERATED */
/* PIPE_QUERY_PRIMITIVES_EMITTED */
/* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+   /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */
/* PIPE_DRIVER_QUERY_TYPE_HZ */
uint64_t u64;
 
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 4ec6d86..dedb8f5 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -334,6 +334,9 @@ st_init_perfmon(struct st_context *st)
  c->Name = info.name;
  switch (info.type) {
 case PIPE_DRIVER_QUERY_TYPE_UINT64:
+case PIPE_DRIVER_QUERY_TYPE_BYTES:
+case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+case PIPE_DRIVER_QUERY_TYPE_HZ:
c->Minimum.u64 = 0;
c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1;
c->Type = GL_UNSIGNED_INT64_AMD;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Nicolai Hähnle
This was only used to implement an unnecessarily restrictive interpretation
of the spec of AMD_performance_monitor. The spec says

  A performance monitor consists of a number of hardware and software
  counters that can be sampled by the GPU and reported back to the
  application.

I guess one could take this as a requirement that counters _must_ be sampled
by the GPU, but then why are they called _software_ counters? Besides,
there's not much reason _not_ to expose all counters that are available,
and this simplifies the code.
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  3 ---
 src/gallium/include/pipe/p_defines.h  |  7 ---
 src/mesa/state_tracker/st_cb_perfmon.c| 30 ---
 3 files changed, 40 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f539210..a1d6162 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -200,7 +200,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
if (id == NVC0_HW_SM_QUERY_GROUP) {
   if (screen->compute) {
  info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 
  /* Because we can't expose the number of hardware counters needed for
   * each different query, we don't want to allow more than one active
@@ -224,7 +223,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
   if (screen->compute) {
  if (screen->base.class_3d < NVE4_3D_CLASS) {
 info->name = "Performance metrics";
-info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 info->max_active_queries = 1;
 info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
 return 1;
@@ -234,7 +232,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
   info->name = "Driver statistics";
-  info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
   info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   return 1;
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7240154..7f241c8 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -829,12 +829,6 @@ enum pipe_driver_query_type
PIPE_DRIVER_QUERY_TYPE_HZ   = 6,
 };
 
-enum pipe_driver_query_group_type
-{
-   PIPE_DRIVER_QUERY_GROUP_TYPE_CPU = 0,
-   PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1,
-};
-
 /* Whether an average value per frame or a cumulative value should be
  * displayed.
  */
@@ -864,7 +858,6 @@ struct pipe_driver_query_info
 struct pipe_driver_query_group_info
 {
const char *name;
-   enum pipe_driver_query_group_type type;
unsigned max_active_queries;
unsigned num_queries;
 };
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 1bb5be3..4ec6d86 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -65,27 +65,6 @@ find_query_type(struct pipe_screen *screen, const char *name)
return type;
 }
 
-/**
- * Return TRUE if the underlying driver expose GPU counters.
- */
-static bool
-has_gpu_counters(struct pipe_screen *screen)
-{
-   int num_groups, gid;
-
-   num_groups = screen->get_driver_query_group_info(screen, 0, NULL);
-   for (gid = 0; gid < num_groups; gid++) {
-  struct pipe_driver_query_group_info group_info;
-
-  if (!screen->get_driver_query_group_info(screen, gid, _info))
- continue;
-
-  if (group_info.type == PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- return true;
-   }
-   return false;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
@@ -313,12 +292,6 @@ st_init_perfmon(struct st_context *st)
if (!screen->get_driver_query_info || !screen->get_driver_query_group_info)
   return false;
 
-   if (!has_gpu_counters(screen)) {
-  /* According to the spec, GL_AMD_performance_monitor must only
-   * expose GPU counters. */
-  return false;
-   }
-
/* Get the number of available queries. */
num_counters = screen->get_driver_query_info(screen, 0, NULL);
if (!num_counters)
@@ -339,9 +312,6 @@ st_init_perfmon(struct st_context *st)
   if (!screen->get_driver_query_group_info(screen, gid, _info))
  continue;
 
-  if (group_info.type != PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- continue;
-
   g->Name = group_info.name;
   g->MaxActiveCounters = group_info.max_active_queries;
   g->NumCounters = 0;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 9/9] st/mesa: add support for batch driver queries to perfmon

2015-11-13 Thread Nicolai Hähnle
v2: forgot a null-pointer check (spotted by Samuel Pitoiset)
---
 src/mesa/state_tracker/st_cb_perfmon.c | 78 +++---
 src/mesa/state_tracker/st_cb_perfmon.h |  6 +++
 2 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 8628e23..39c3902 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,7 +42,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned *batch = NULL;
unsigned num_active_counters = 0;
+   unsigned max_batch_counters = 0;
+   unsigned num_batch_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
@@ -50,6 +53,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,6 +65,8 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
   }
 
   num_active_counters += m->ActiveGroups[gid];
+  if (stg->has_batch)
+ max_batch_counters += m->ActiveGroups[gid];
}
 
if (!num_active_counters)
@@ -71,6 +77,12 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
if (!stm->active_counters)
   return false;
 
+   if (max_batch_counters) {
+  batch = CALLOC(max_batch_counters, sizeof(*batch));
+  if (!batch)
+ return false;
+   }
+
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
@@ -82,13 +94,35 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  struct st_perf_counter_object *cntr =
 >active_counters[stm->num_active_counters];
 
- cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
+ if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
+cntr->batch_index = num_batch_counters;
+batch[num_batch_counters++] = stc->query_type;
+ } else {
+cntr->query = pipe->create_query(pipe, stc->query_type, 0);
+if (!cntr->query)
+   goto fail;
+ }
  ++stm->num_active_counters;
   }
}
+
+   /* Create the batch query. */
+   if (num_batch_counters) {
+  stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters,
+  batch);
+  stm->batch_result = CALLOC(num_batch_counters, 
sizeof(stm->batch_result->batch[0]));
+  if (!stm->batch_query || !stm->batch_result)
+ goto fail;
+   }
+
+   FREE(batch);
return true;
+
+fail:
+   FREE(batch);
+   return false;
 }
 
 static void
@@ -105,6 +139,13 @@ reset_perf_monitor(struct st_perf_monitor_object *stm,
FREE(stm->active_counters);
stm->active_counters = NULL;
stm->num_active_counters = 0;
+
+   if (stm->batch_query) {
+  pipe->destroy_query(pipe, stm->batch_query);
+  stm->batch_query = NULL;
+   }
+   FREE(stm->batch_result);
+   stm->batch_result = NULL;
 }
 
 static struct gl_perf_monitor_object *
@@ -143,9 +184,13 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Start the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  if (!pipe->begin_query(pipe, query))
+  if (query && !pipe->begin_query(pipe, query))
   goto fail;
}
+
+   if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query))
+  goto fail;
+
return true;
 
 fail:
@@ -164,8 +209,12 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Stop the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  pipe->end_query(pipe, query);
+  if (query)
+ pipe->end_query(pipe, query);
}
+
+   if (stm->batch_query)
+  pipe->end_query(pipe, stm->batch_query);
 }
 
 static void
@@ -199,11 +248,16 @@ st_IsPerfMonitorResultAvailable(struct gl_context *ctx,
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
   union pipe_query_result result;
-  if (!pipe->get_query_result(pipe, query, FALSE, 

[Mesa-dev] [PATCH v2 8/9] gallium/hud: add support for batch queries

2015-11-13 Thread Nicolai Hähnle
v2: be more defensive about allocations
---
 src/gallium/auxiliary/hud/hud_context.c  |  24 ++-
 src/gallium/auxiliary/hud/hud_driver_query.c | 265 +++
 src/gallium/auxiliary/hud/hud_private.h  |  13 +-
 3 files changed, 256 insertions(+), 46 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index ffe30b8..bcef701 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -57,6 +57,7 @@ struct hud_context {
struct cso_context *cso;
struct u_upload_mgr *uploader;
 
+   struct hud_batch_query_context *batch_query;
struct list_head pane_list;
 
/* states */
@@ -510,6 +511,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
hud_alloc_vertices(hud, >text, 4 * 512, 4 * sizeof(float));
 
/* prepare all graphs */
+   hud_batch_query_update(hud->batch_query);
+
LIST_FOR_EACH_ENTRY(pane, >pane_list, head) {
   LIST_FOR_EACH_ENTRY(gr, >graph_list, head) {
  gr->query_new_value(gr);
@@ -903,17 +906,21 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
   }
   else if (strcmp(name, "samples-passed") == 0 &&
has_occlusion_query(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "samples-passed",
+ hud_pipe_query_install(>batch_query, pane, hud->pipe,
+"samples-passed",
 PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else if (strcmp(name, "primitives-generated") == 0 &&
has_streamout(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
+ hud_pipe_query_install(>batch_query, pane, hud->pipe,
+"primitives-generated",
 PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else {
  boolean processed = FALSE;
@@ -938,17 +945,19 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
if (strcmp(name, pipeline_statistics_names[i]) == 0)
   break;
 if (i < Elements(pipeline_statistics_names)) {
-   hud_pipe_query_install(pane, hud->pipe, name,
+   hud_pipe_query_install(>batch_query, pane, hud->pipe, name,
   PIPE_QUERY_PIPELINE_STATISTICS, i,
   0, PIPE_DRIVER_QUERY_TYPE_UINT64,
-  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+  0);
processed = TRUE;
 }
  }
 
  /* driver queries */
  if (!processed) {
-if (!hud_driver_query_install(pane, hud->pipe, name)){
+if (!hud_driver_query_install(>batch_query, pane, hud->pipe,
+  name)) {
fprintf(stderr, "gallium_hud: unknown driver query '%s'\n", 
name);
 }
  }
@@ -1287,6 +1296,7 @@ hud_destroy(struct hud_context *hud)
   FREE(pane);
}
 
+   hud_batch_query_cleanup(>batch_query);
pipe->delete_fs_state(pipe, hud->fs_color);
pipe->delete_fs_state(pipe, hud->fs_text);
pipe->delete_vs_state(pipe, hud->vs);
diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index 3198ab3..29f70fc 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -34,13 +34,159 @@
 #include "hud/hud_private.h"
 #include "pipe/p_screen.h"
 #include "os/os_time.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include 
 
+// Must be a power of two
 #define NUM_QUERIES 8
 
+struct hud_batch_query_context {
+   struct pipe_context *pipe;
+   unsigned num_query_types;
+   unsigned allocated_query_types;
+   unsigned *query_types;
+
+   boolean failed;
+   struct pipe_query *query[NUM_QUERIES];
+   union pipe_query_result *result[NUM_QUERIES];
+   unsigned head, pending, results;
+};
+
+void
+hud_batch_query_update(struct hud_batch_query_context *bq)
+{
+   struct pipe_context *pipe;
+
+   if (!bq || bq->failed)
+  return;
+
+   pipe = bq->pipe;
+
+   if (bq->query[bq->head])
+  pipe->end_query(pipe, bq->query[bq->head]);
+
+   bq->results = 0;
+
+   while (bq->pending) {
+  unsigned idx = (bq->head - 

Re: [Mesa-dev] Patchwork admin rights

2015-11-13 Thread Matt Turner
On Fri, Nov 13, 2015 at 4:26 AM, Samuel Iglesias Gonsálvez
 wrote:
> -BEGIN PGP SIGNED MESSAGE-
> Hash: SHA256
>
> Hello,
>
> I would like to have admin permissions to Mesa and Piglit projects in
> patchwork [0] to change the status of patches that are mine but they
> are not assigned to me.
>
> I saw in previous emails than just asking for it here is enough. If I
> need to create a bug report in bugzilla, just tell me to do so.
>
> Thanks,
>
> Sam

Done -- I updated your permissions, along with Antia and Iago.

I looked for Alejandro and Eduardo, but I couldn't find accounts for them.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/9] gallium/hud: remove unused field in query_info

2015-11-13 Thread Nicolai Hähnle
Reviewed-by: Samuel Pitoiset 
---
 src/gallium/auxiliary/hud/hud_driver_query.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index f14305e..3198ab3 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -48,7 +48,6 @@ struct query_info {
/* Ring of queries. If a query is busy, we use another slot. */
struct pipe_query *query[NUM_QUERIES];
unsigned head, tail;
-   unsigned num_queries;
 
uint64_t last_time;
uint64_t results_cumulative;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 6/9] st/mesa: maintain active perfmon counters in an array

2015-11-13 Thread Nicolai Hähnle
It is easy enough to pre-determine the required size, and arrays are
generally better behaved especially when they get large.

v2: make sure init_perf_monitor returns true when no counters are active
(spotted by Samuel Pitoiset)
---
 src/mesa/state_tracker/st_cb_perfmon.c | 81 --
 src/mesa/state_tracker/st_cb_perfmon.h | 18 
 2 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index ec12eb2..8628e23 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,15 +42,14 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned num_active_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
 
-   /* Create a query for each active counter. */
+   /* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
-  const struct st_perf_monitor_group *stg = >perfmon[gid];
-  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,19 +60,32 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
+  num_active_counters += m->ActiveGroups[gid];
+   }
+
+   if (!num_active_counters)
+  return true;
+
+   stm->active_counters = CALLOC(num_active_counters,
+ sizeof(*stm->active_counters));
+   if (!stm->active_counters)
+  return false;
+
+   /* Create a query for each active counter. */
+   for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
+  const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;
+
   BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = >counters[cid];
- struct st_perf_counter_object *cntr;
-
- cntr = CALLOC_STRUCT(st_perf_counter_object);
- if (!cntr)
-return false;
+ struct st_perf_counter_object *cntr =
+>active_counters[stm->num_active_counters];
 
  cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
-
- list_addtail(>list, >active_counters);
+ ++stm->num_active_counters;
   }
}
return true;
@@ -83,24 +95,24 @@ static void
 reset_perf_monitor(struct st_perf_monitor_object *stm,
struct pipe_context *pipe)
 {
-   struct st_perf_counter_object *cntr, *tmp;
+   unsigned i;
 
-   LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, >active_counters, list) {
-  if (cntr->query)
- pipe->destroy_query(pipe, cntr->query);
-  list_del(>list);
-  free(cntr);
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (query)
+ pipe->destroy_query(pipe, query);
}
+   FREE(stm->active_counters);
+   stm->active_counters = NULL;
+   stm->num_active_counters = 0;
 }
 
 static struct gl_perf_monitor_object *
 st_NewPerfMonitor(struct gl_context *ctx)
 {
struct st_perf_monitor_object *stq = 
ST_CALLOC_STRUCT(st_perf_monitor_object);
-   if (stq) {
-  list_inithead(>active_counters);
+   if (stq)
   return >base;
-   }
return NULL;
 }
 
@@ -119,9 +131,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
-   if (LIST_IS_EMPTY(>active_counters)) {
+   if (!stm->num_active_counters) {
   /* Create a query for each active counter before starting
* a new monitoring session. */
   if (!init_perf_monitor(ctx, m))
@@ -129,8 +141,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
}
 
/* Start the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, >active_counters, list) {
-  if (!pipe->begin_query(pipe, cntr->query))
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (!pipe->begin_query(pipe, query))
   goto fail;
}
return true;
@@ -146,11 +159,13 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object 

Re: [Mesa-dev] [RFC] Vendor-neutral dispatch library for OpenGL

2015-11-13 Thread Kyle Brenneman

On 10/08/2015 12:29 PM, Emil Velikov wrote:

On 06/10/15 20:58, Kyle Brenneman wrote:

On 10/06/2015 12:43 PM, Emil Velikov wrote:

On 6 October 2015 at 16:39, Kyle Brenneman  wrote:

On 10/06/2015 07:34 AM, Emil Velikov wrote:

Hello Kyle,

A few questions/points of discussion:

   * What is your take on having a libglvnd 'package', which provides
the headers (and maybe other materials), apart from the libraries ?
I'm basically thinking about OpenGL.h, GLX.h, etc for programs that
wish to use new ABI, and glvnd{Foo,Bar}.h which mesa and other GL
implementations.

I haven't looked much into packaging yet. I'm open to any suggestions that
might make that easier, though.

One thing that I'm planning to do but haven't gotten to yet is to move the
public headers into a separate directory. There's a couple of header files
(libglxabi.h and GLdispatchABI.h) that are intended to be used by vendor
library implementations, and the other headers are all internal.

The headers that you'd use for compiling an application (gl.h, glx.h, etc.)
could easily go into a separate package, too.


By 'packaging' I meant that the relevant files are available after
`make install'. Currently for GLdispatchABI.h and others that's not
the case. We can leave it up-to the distributions to manage the actual
packages (if in doubt a document to guide them), but we can make sure
that the files (including pkg-config and cmake ones, separate set for
user/developer) are there.

The 'make install' command currently doesn't do anything with the
regular GL headers. I would expect that if someone's building an OpenGL
application, then they've probably already installed some version of the
GL headers separately. Still, I could probably add a configure option or
something to include them.


Pretty much what I was wondering. Mostly as I've noticed that the
official Nvidia driver has started shipping some headers which clash
with mesa ones :'( Perhaps the AMD proprietary driver ships some as well ?


Including the public ABI headers in a "make install" does sound like a
good idea. Maybe put them next to the normal GL headers, or under a
GLVND subdirectory?

A separate location sounds better imho. Then again this information will
be made available via the .pc .cmake files. People that don't use them
can hardcode thing as needed :)

[snip]
I agree that a separate directory from the other GL headers would be 
best. I'm leaning toward putting the libglvnd headers into a sibling 
directory to the GL headers, probably /usr/include/GLVND.


Putting them in a subdirectory under GL (/usr/include/GL/GLVND) would 
also be an option. But eventually, libglvnd will have an EGL header 
there as well, and putting that under /usr/include/GL seems less 
appropriate.


Does that sound like a good place for them?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: enable optimal raster config setting for fiji

2015-11-13 Thread Alex Deucher
Requires proper kernel tiling configurarion so check the tiling
config registers.

Signed-off-by: Alex Deucher 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_state.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 384c8e2..ff4d612 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3278,6 +3278,7 @@ si_write_harvested_raster_configs(struct si_context *sctx,
 
 static void si_init_config(struct si_context *sctx)
 {
+   struct si_screen *sscreen = sctx->screen;
unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
unsigned raster_config, raster_config_1;
@@ -3348,9 +3349,14 @@ static void si_init_config(struct si_context *sctx)
raster_config_1 = 0x002e;
break;
case CHIP_FIJI:
-   /* Fiji should be same as Hawaii, but that causes corruption in 
some cases */
-   raster_config = 0x1612; /* 0x3a00161a */
-   raster_config_1 = 0x002a; /* 0x002e */
+   if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x00e4) {
+   /* old kernels with old tiling config */
+   raster_config = 0x1612;
+   raster_config_1 = 0x002a;
+   } else {
+   raster_config = 0x3a00161a;
+   raster_config_1 = 0x002e;
+   }
break;
case CHIP_TONGA:
raster_config = 0x1612;
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 20/23] meta: Don't pollute the buffer object namespace in _mesa_meta_DrawTex

2015-11-13 Thread Anuj Phogat
On Mon, Nov 9, 2015 at 4:56 PM, Ian Romanick  wrote:
> From: Ian Romanick 
>
> tl;dr: For many types of GL object, we can *NEVER* use the Gen function.
>
> In OpenGL ES (all versions!) and OpenGL compatibility profile,
> applications don't have to call Gen functions.  The GL spec is very
> clear about how you can mix-and-match generated names and non-generated
> names: you can use any name you want for a particular object type until
> you call the Gen function for that object type.
>
> Here's the problem scenario:
>
>  - Application calls a meta function that generates a name.  The first
>Gen will probably return 1.
>
>  - Application decides to use the same name for an object of the same
>type without calling Gen.  Many demo programs use names 1, 2, 3,
>etc. without calling Gen.
>
>  - Application calls the meta function again, and the meta function
>replaces the data.  The application's data is lost, and the app
>fails.  Have fun debugging that.
>
> Signed-off-by: Ian Romanick 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92363
> ---
>  src/mesa/drivers/common/meta.c | 51 
> +++---
>  src/mesa/drivers/common/meta.h |  5 ++-
>  src/mesa/drivers/common/meta_blit.c|  5 +--
>  src/mesa/drivers/common/meta_generate_mipmap.c |  6 +--
>  4 files changed, 29 insertions(+), 38 deletions(-)
>
> diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
> index 57993cf..b06f683 100644
> --- a/src/mesa/drivers/common/meta.c
> +++ b/src/mesa/drivers/common/meta.c
> @@ -95,9 +95,12 @@ static struct blit_shader *
>  choose_blit_shader(GLenum target, struct blit_shader_table *table);
>
>  static void cleanup_temp_texture(struct temp_texture *tex);
> -static void meta_glsl_clear_cleanup(struct clear_state *clear);
> -static void meta_decompress_cleanup(struct decompress_state *decompress);
> -static void meta_drawpix_cleanup(struct drawpix_state *drawpix);
> +static void meta_glsl_clear_cleanup(struct gl_context *ctx,
> +struct clear_state *clear);
> +static void meta_decompress_cleanup(struct gl_context *ctx,
> +struct decompress_state *decompress);
> +static void meta_drawpix_cleanup(struct gl_context *ctx,
> + struct drawpix_state *drawpix);
>
>  void
>  _mesa_meta_bind_fbo_image(GLenum fboTarget, GLenum attachment,
> @@ -435,12 +438,12 @@ _mesa_meta_free(struct gl_context *ctx)
>  {
> GET_CURRENT_CONTEXT(old_context);
> _mesa_make_current(ctx, NULL, NULL);
> -   _mesa_meta_glsl_blit_cleanup(>Meta->Blit);
> -   meta_glsl_clear_cleanup(>Meta->Clear);
> -   _mesa_meta_glsl_generate_mipmap_cleanup(>Meta->Mipmap);
> +   _mesa_meta_glsl_blit_cleanup(ctx, >Meta->Blit);
> +   meta_glsl_clear_cleanup(ctx, >Meta->Clear);
> +   _mesa_meta_glsl_generate_mipmap_cleanup(ctx, >Meta->Mipmap);
> cleanup_temp_texture(>Meta->TempTex);
> -   meta_decompress_cleanup(>Meta->Decompress);
> -   meta_drawpix_cleanup(>Meta->DrawPix);
> +   meta_decompress_cleanup(ctx, >Meta->Decompress);
> +   meta_drawpix_cleanup(ctx, >Meta->DrawPix);
> if (old_context)
>_mesa_make_current(old_context, old_context->WinSysDrawBuffer, 
> old_context->WinSysReadBuffer);
> else
> @@ -1638,14 +1641,13 @@ meta_glsl_clear_init(struct gl_context *ctx, struct 
> clear_state *clear)
>  }
>
>  static void
> -meta_glsl_clear_cleanup(struct clear_state *clear)
> +meta_glsl_clear_cleanup(struct gl_context *ctx, struct clear_state *clear)
>  {
> if (clear->VAO == 0)
>return;
> _mesa_DeleteVertexArrays(1, >VAO);
> clear->VAO = 0;
> -   _mesa_DeleteBuffers(1, >buf_obj->Name);
> -   clear->buf_obj = NULL;
> +   _mesa_reference_buffer_object(ctx, >buf_obj, NULL);
> _mesa_DeleteProgram(clear->ShaderProg);
> clear->ShaderProg = 0;
>
> @@ -1939,14 +1941,13 @@ _mesa_meta_CopyPixels(struct gl_context *ctx, GLint 
> srcX, GLint srcY,
>  }
>
>  static void
> -meta_drawpix_cleanup(struct drawpix_state *drawpix)
> +meta_drawpix_cleanup(struct gl_context *ctx, struct drawpix_state *drawpix)
>  {
> if (drawpix->VAO != 0) {
>_mesa_DeleteVertexArrays(1, >VAO);
>drawpix->VAO = 0;
>
> -  _mesa_DeleteBuffers(1, >buf_obj->Name);
> -  drawpix->buf_obj = NULL;
> +  _mesa_reference_buffer_object(ctx, >buf_obj, NULL);
> }
>
> if (drawpix->StencilFP != 0) {
> @@ -2975,14 +2976,15 @@ meta_decompress_fbo_cleanup(struct 
> decompress_fbo_state *decompress_fbo)
>  }
>
>  static void
> -meta_decompress_cleanup(struct decompress_state *decompress)
> +meta_decompress_cleanup(struct gl_context *ctx,
> +struct decompress_state *decompress)
>  {
> meta_decompress_fbo_cleanup(>byteFBO);
> meta_decompress_fbo_cleanup(>floatFBO);
>
> if (decompress->VAO != 0) {
>

Re: [Mesa-dev] [PATCH 19/23] meta: Use internal functions for buffer object and VAO access in _mesa_meta_DrawTex

2015-11-13 Thread Anuj Phogat
On Mon, Nov 9, 2015 at 4:56 PM, Ian Romanick  wrote:
> From: Ian Romanick 
>
> Signed-off-by: Ian Romanick 
> ---
>  src/mesa/drivers/common/meta.c | 32 
>  1 file changed, 20 insertions(+), 12 deletions(-)
>
> diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
> index 6927ae9..57993cf 100644
> --- a/src/mesa/drivers/common/meta.c
> +++ b/src/mesa/drivers/common/meta.c
> @@ -3298,13 +3298,16 @@ _mesa_meta_DrawTex(struct gl_context *ctx, GLfloat x, 
> GLfloat y, GLfloat z,
>
> if (drawtex->VAO == 0) {
>/* one-time setup */
> -  GLint active_texture;
> +  struct gl_vertex_array_object *array_obj;
>GLuint VBO;
>
>/* create vertex array object */
>_mesa_GenVertexArrays(1, >VAO);
>_mesa_BindVertexArray(drawtex->VAO);
>
> +  array_obj = _mesa_lookup_vao(ctx, drawtex->VAO);
> +  assert(array_obj != NULL);
> +
>/* create vertex array buffer */
>_mesa_CreateBuffers(1, );
>drawtex->buf_obj = _mesa_lookup_bufferobj(ctx, VBO);
> @@ -3321,22 +3324,27 @@ _mesa_meta_DrawTex(struct gl_context *ctx, GLfloat x, 
> GLfloat y, GLfloat z,
>
>assert(drawtex->buf_obj->Size == sizeof(verts));
>
> -  /* client active texture is not part of the array object */
> -  active_texture = ctx->Array.ActiveTexture;
> -
>_mesa_BindBuffer(GL_ARRAY_BUFFER_ARB, VBO);
>
>/* setup vertex arrays */
> -  _mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
> -  _mesa_EnableClientState(GL_VERTEX_ARRAY);
> +  _mesa_update_array_format(ctx, array_obj, VERT_ATTRIB_POS,
> +3, GL_FLOAT, GL_RGBA, GL_FALSE,
> +GL_FALSE, GL_FALSE,
> +offsetof(struct vertex, x), true);
> +  _mesa_bind_vertex_buffer(ctx, array_obj, VERT_ATTRIB_POS,
> +   drawtex->buf_obj, 0, sizeof(struct vertex));
> +  _mesa_enable_vertex_array_attrib(ctx, array_obj, VERT_ATTRIB_POS);
> +
> +
>for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
> - _mesa_ClientActiveTexture(GL_TEXTURE0 + i);
> - _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), 
> OFFSET(st[i]));
> - _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
> + _mesa_update_array_format(ctx, array_obj, VERT_ATTRIB_TEX(i),
> +   2, GL_FLOAT, GL_RGBA, GL_FALSE,
> +   GL_FALSE, GL_FALSE,
> +   offsetof(struct vertex, st[i]), true);
> + _mesa_bind_vertex_buffer(ctx, array_obj, VERT_ATTRIB_TEX(i),
> +  drawtex->buf_obj, 0, sizeof(struct 
> vertex));
> + _mesa_enable_vertex_array_attrib(ctx, array_obj, 
> VERT_ATTRIB_TEX(i));
>}
> -
> -  /* restore client active texture */
> -  _mesa_ClientActiveTexture(GL_TEXTURE0 + active_texture);
> }
> else {
>_mesa_BindVertexArray(drawtex->VAO);
> --
> 2.1.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: Support TGSI_SEMANTIC_HELPER_INVOCATION

2015-11-13 Thread Nicolai Hähnle

On 13.11.2015 00:14, Glenn Kennard wrote:

Signed-off-by: Glenn Kennard 
---
Maybe there is a better way to check if a thread is a helper invocation?


Is ctx->face_gpr guaranteed to be initialized when 
load_helper_invocation is called?


Aside, I'm not sure I understand correctly what this is supposed to do. 
The values you're querying are related to multi-sampling, but my 
understanding has always been that helper invocations can also happen 
without multi-sampling: you always want to process 2x2 quads of pixels 
at a time to be able to compute derivatives for texture sampling. When 
the boundary of primitive intersects such a quad, you get helper 
invocations outside the primitive.


Cheers,
Nicolai


  src/gallium/drivers/r600/r600_shader.c | 83 +-
  1 file changed, 72 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 560197c..a227d78 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -530,7 +530,8 @@ static int r600_spi_sid(struct r600_shader_io * io)
name == TGSI_SEMANTIC_PSIZE ||
name == TGSI_SEMANTIC_EDGEFLAG ||
name == TGSI_SEMANTIC_FACE ||
-   name == TGSI_SEMANTIC_SAMPLEMASK)
+   name == TGSI_SEMANTIC_SAMPLEMASK ||
+   name == TGSI_SEMANTIC_HELPER_INVOCATION)
index = 0;
else {
if (name == TGSI_SEMANTIC_GENERIC) {
@@ -734,7 +735,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
case TGSI_FILE_SYSTEM_VALUE:
if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
-   d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
+   d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS ||
+   d->Semantic.Name == TGSI_SEMANTIC_HELPER_INVOCATION) {
break; /* Already handled from 
allocate_system_value_inputs */
} else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
if (!ctx->native_integers) {
@@ -776,13 +778,14 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
struct {
boolean enabled;
int *reg;
-   unsigned name, alternate_name;
+   unsigned associated_semantics[3];
} inputs[2] = {
-   { false, >face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* 
lives in Front Face GPR.z */
-
-   { false, >fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, 
TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */
+   { false, >face_gpr, { TGSI_SEMANTIC_SAMPLEMASK /* lives in 
Front Face GPR.z */,
+   TGSI_SEMANTIC_HELPER_INVOCATION, ~0u } },
+   { false, >fixed_pt_position_gpr, { TGSI_SEMANTIC_SAMPLEID  
/* in Fixed Point Position GPR.w */,
+   TGSI_SEMANTIC_SAMPLEPOS, 
TGSI_SEMANTIC_HELPER_INVOCATION } }
};
-   int i, k, num_regs = 0;
+   int i, k, l, num_regs = 0;

if (tgsi_parse_init(, ctx->tokens) != TGSI_PARSE_OK) {
return 0;
@@ -818,9 +821,11 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
struct tgsi_full_declaration *d = 

if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
for (k = 0; k < Elements(inputs); k++) {
-   if (d->Semantic.Name == inputs[k].name 
||
-   d->Semantic.Name == 
inputs[k].alternate_name) {
-   inputs[k].enabled = true;
+   for (l = 0; l < 3; l++) {
+   if (d->Semantic.Name == 
inputs[k].associated_semantics[l]) {
+   inputs[k].enabled = 
true;
+   break;
+   }
}
}
}
@@ -832,7 +837,7 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
for (i = 0; i < Elements(inputs); i++) {
boolean enabled = inputs[i].enabled;
int *reg = inputs[i].reg;
-   unsigned name = inputs[i].name;
+   unsigned name = inputs[i].associated_semantics[0];

if (enabled) {
int gpr = gpr_offset + num_regs++;
@@ -985,6 +990,56 @@ static int load_sample_position(struct r600_shader_ctx 
*ctx, struct r600_shader_
return t1;
  }

+static int load_helper_invocation(struct r600_shader_ctx *ctx,
+   

Re: [Mesa-dev] [PATCH v2] i965: Handle lum, intensity and missing components in the fast clear

2015-11-13 Thread Ilia Mirkin
On Fri, Nov 13, 2015 at 1:12 PM, Ben Widawsky  wrote:
> On Thu, Nov 12, 2015 at 04:54:08PM +0100, Neil Roberts wrote:
>> +   default:
>> +  for (int i = 0; i < 3; i++) {
>> + if (!_mesa_format_has_color_component(format, i))
>> +override_color.ui[i] = 0;
>> +  }
>
> Is there an easy way to verify that all formats want 0 for GB channels? It 
> looks

AFAIK that's true for GL, not DX.

> right to me, but with my knowledge of GL, that doesn't mean much (I am looking
> here: https://www.opengl.org/sdk/docs/man/html/glTexImage2D.xhtml)
>
> I also think that component 0 must always have a color, right? (I'm not
> requesting a change as such, just making sure my understanding of what you're
> trying to do is correct).

GL_ALPHA gets a 0 into rgb :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Nicolai Hähnle

On 13.11.2015 18:35, Samuel Pitoiset wrote:

On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

This was only used to implement an unnecessarily restrictive
interpretation
of the spec of AMD_performance_monitor. The spec says

   A performance monitor consists of a number of hardware and software
   counters that can be sampled by the GPU and reported back to the
   application.

I guess one could take this as a requirement that counters _must_ be
sampled
by the GPU, but then why are they called _software_ counters? Besides,
there's not much reason _not_ to expose all counters that are available,
and this simplifies the code.


The spec says:

"
While BeginPerfMonitorAMD does mark the beginning of performance counter
collection, the counters do not begin collecting immediately.  Rather,
the counters begin collection when BeginPerfMonitorAMD is processed by
the hardware.  That is, the API is asynchronous, and performance counter
collection does not begin until the graphics hardware processes the
BeginPerfMonitorAMD command.
"


Right. I interpreted this as the authors' attempt to say that the 
counting happens in what other parts of OpenGL traditionally call "the 
server", i.e. the Begin/EndPerfMonitorAMD commands can be used to 
bracket draw calls in the way you'd usually expect, in the same way that 
e.g. changing the DepthFunc only affects rendering once the graphics 
hardware "processes the DepthFunc command".




This is why I introduced the notion of group of GPU counters in Gallium,
because "processed by the hardware", "asynchronous" and "command" seem
like the spec is talking about GPU only.

In which world, software counters are sampled by the GPU? :-)
This spec is definitely not clear about that...

Anyway, I disagree about this patch because :
1) we need to be agreed about what amd_performance_monitor must expose
or not. Maybe it's time to ask the guys who wrote it?


Well, Catalyst exposes only hardware counters in 
AMD_performance_monitor. But that's beside the point.


The real point is that the driver_query_group stuff is *only* used for 
AMD_performance_monitor. So it makes no sense that a driver would ever 
expose a driver_query_group that was not intended to be exposed via that 
extension.


I understand that the group_type was added with good intentions. I might 
have done the same. But in over a year (judging by the commit dates), no 
other use case for driver_query_groups has come up.


So really, this is a question for everybody who cares about nouveau, 
because nouveau is the only driver that (if a #define is enabled) 
advertises a CPU driver_query_group.


Do you want that group to be accessible via AMD_performance_monitor? 
Then be happy with this patch. Do you not want that group to be so 
accessible? Then just remove it, because it serves no purpose either way.




2) this doesn't really simplify code.


The patch only removes LOCs, so I find that a weird argument ;)

Cheers,
Nicolai




---
  src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  3 ---
  src/gallium/include/pipe/p_defines.h  |  7 ---
  src/mesa/state_tracker/st_cb_perfmon.c| 30
---
  3 files changed, 40 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f539210..a1d6162 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -200,7 +200,6 @@ nvc0_screen_get_driver_query_group_info(struct
pipe_screen *pscreen,
 if (id == NVC0_HW_SM_QUERY_GROUP) {
if (screen->compute) {
   info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;

   /* Because we can't expose the number of hardware counters
needed for
* each different query, we don't want to allow more than
one active
@@ -224,7 +223,6 @@ nvc0_screen_get_driver_query_group_info(struct
pipe_screen *pscreen,
if (screen->compute) {
   if (screen->base.class_3d < NVE4_3D_CLASS) {
  info->name = "Performance metrics";
-info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
  info->max_active_queries = 1;
  info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
  return 1;
@@ -234,7 +232,6 @@ nvc0_screen_get_driver_query_group_info(struct
pipe_screen *pscreen,
  #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
 else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
info->name = "Driver statistics";
-  info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
return 1;
diff --git a/src/gallium/include/pipe/p_defines.h
b/src/gallium/include/pipe/p_defines.h
index 7240154..7f241c8 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -829,12 +829,6 @@ enum pipe_driver_query_type
 PIPE_DRIVER_QUERY_TYPE_HZ   = 6,

Re: [Mesa-dev] [PATCH 3/9] st/mesa: map semantic driver query types to underlying type

2015-11-13 Thread Nicolai Hähnle

On 13.11.2015 18:34, Samuel Pitoiset wrote:



On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

---
  src/gallium/include/pipe/p_defines.h   | 2 ++
  src/mesa/state_tracker/st_cb_perfmon.c | 3 +++
  2 files changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_defines.h
b/src/gallium/include/pipe/p_defines.h
index 7f241c8..7ed9f6d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -791,6 +791,8 @@ union pipe_query_result
 /* PIPE_QUERY_PRIMITIVES_GENERATED */
 /* PIPE_QUERY_PRIMITIVES_EMITTED */
 /* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+   /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */


When you are at it, please also add /*
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */ to pipe_query_result.


Sorry, I don't understand. Isn't that what I'm doing here?

Cheers,
Nicolai


With this minor change, this patch is:

Reviewed-by: Samuel Pitoiset 



 /* PIPE_DRIVER_QUERY_TYPE_HZ */
 uint64_t u64;

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c
b/src/mesa/state_tracker/st_cb_perfmon.c
index 4ec6d86..dedb8f5 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -334,6 +334,9 @@ st_init_perfmon(struct st_context *st)
   c->Name = info.name;
   switch (info.type) {
  case PIPE_DRIVER_QUERY_TYPE_UINT64:
+case PIPE_DRIVER_QUERY_TYPE_BYTES:
+case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+case PIPE_DRIVER_QUERY_TYPE_HZ:
 c->Minimum.u64 = 0;
 c->Maximum.u64 = info.max_value.u64 ?
info.max_value.u64 : -1;
 c->Type = GL_UNSIGNED_INT64_AMD;





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/mesa: maintain active perfmon counters in an array (v2)

2015-11-13 Thread Nicolai Hähnle
It is easy enough to pre-determine the required size, and arrays are
generally better behaved especially when they get large.

v2: make sure init_perf_monitor returns true when no counters are active
(spotted by Samuel Pitoiset)
---
Thanks Samuel, good catch! I did test with piglit and the tests passed, so
probably CALLOC returned non-null with a zero size, but it's better not to
rely on that.

Cheers,
Nicolai
---
 src/mesa/state_tracker/st_cb_perfmon.c | 81 --
 src/mesa/state_tracker/st_cb_perfmon.h | 18 
 2 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index ec12eb2..8628e23 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,15 +42,14 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned num_active_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
 
-   /* Create a query for each active counter. */
+   /* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
-  const struct st_perf_monitor_group *stg = >perfmon[gid];
-  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,19 +60,32 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
+  num_active_counters += m->ActiveGroups[gid];
+   }
+
+   if (!num_active_counters)
+  return true;
+
+   stm->active_counters = CALLOC(num_active_counters,
+ sizeof(*stm->active_counters));
+   if (!stm->active_counters)
+  return false;
+
+   /* Create a query for each active counter. */
+   for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
+  const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;
+
   BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = >counters[cid];
- struct st_perf_counter_object *cntr;
-
- cntr = CALLOC_STRUCT(st_perf_counter_object);
- if (!cntr)
-return false;
+ struct st_perf_counter_object *cntr =
+>active_counters[stm->num_active_counters];
 
  cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
-
- list_addtail(>list, >active_counters);
+ ++stm->num_active_counters;
   }
}
return true;
@@ -83,24 +95,24 @@ static void
 reset_perf_monitor(struct st_perf_monitor_object *stm,
struct pipe_context *pipe)
 {
-   struct st_perf_counter_object *cntr, *tmp;
+   unsigned i;
 
-   LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, >active_counters, list) {
-  if (cntr->query)
- pipe->destroy_query(pipe, cntr->query);
-  list_del(>list);
-  free(cntr);
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (query)
+ pipe->destroy_query(pipe, query);
}
+   FREE(stm->active_counters);
+   stm->active_counters = NULL;
+   stm->num_active_counters = 0;
 }
 
 static struct gl_perf_monitor_object *
 st_NewPerfMonitor(struct gl_context *ctx)
 {
struct st_perf_monitor_object *stq = 
ST_CALLOC_STRUCT(st_perf_monitor_object);
-   if (stq) {
-  list_inithead(>active_counters);
+   if (stq)
   return >base;
-   }
return NULL;
 }
 
@@ -119,9 +131,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
-   if (LIST_IS_EMPTY(>active_counters)) {
+   if (!stm->num_active_counters) {
   /* Create a query for each active counter before starting
* a new monitoring session. */
   if (!init_perf_monitor(ctx, m))
@@ -129,8 +141,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
}
 
/* Start the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, >active_counters, list) {
-  if (!pipe->begin_query(pipe, cntr->query))
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (!pipe->begin_query(pipe, query))
   goto fail;
}
return true;
@@ -146,11 +159,13 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 

  1   2   >