Re: [Mesa-dev] [PATCH] gallivm: disable f16c when not using AVX

2015-10-24 Thread Nicolai Hähnle

On 23.10.2015 23:26, srol...@vmware.com wrote:

From: Roland Scheidegger <srol...@vmware.com>

f16c intrinsic can only be emitted when AVX is used. So when we disable AVX
due to forcing 128bit vectors we must not use this intrinsic (depending on
llvm version, this worked previously because llvm used AVX even when we didn't
tell it to, however I've seen this fail with llvm 3.3 since
718249843b915decf8fccec92e466ac1a6219934 which seems to have the side effect
of disabling avx in llvm albeit it only touches sse flags really).
Possibly one day should actually try to use avx even with 128bit vectors...


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


---
  src/gallium/auxiliary/gallivm/lp_bld_init.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 017d075..e6eede8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -427,6 +427,7 @@ lp_build_init(void)
 */
util_cpu_caps.has_avx = 0;
util_cpu_caps.has_avx2 = 0;
+  util_cpu_caps.has_f16c = 0;
 }

  #ifdef PIPE_ARCH_PPC_64



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] radeonsi: simplify DCC handling in si_initialize_color_surface

2015-10-24 Thread Nicolai Hähnle
With the remark on patch 2 (radeonsi: properly check if DCC is enabled 
and allocated), the series is


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 24.10.2015 17:49, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

---
  src/gallium/drivers/radeonsi/si_state.c | 10 +++---
  1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 384c8e2..c87f661 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1926,8 +1926,9 @@ static void si_initialize_color_surface(struct si_context 
*sctx,
surf->cb_color_info = color_info;
surf->cb_color_attrib = color_attrib;

-   if (sctx->b.chip_class >= VI) {
+   if (sctx->b.chip_class >= VI && rtex->surface.dcc_enabled) {
unsigned max_uncompressed_block_size = 2;
+   uint64_t dcc_offset = rtex->surface.level[level].dcc_offset;

if (rtex->surface.nsamples > 1) {
if (rtex->surface.bpe == 1)
@@ -1938,12 +1939,7 @@ static void si_initialize_color_surface(struct 
si_context *sctx,

surf->cb_dcc_control = 
S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
   S_028C78_INDEPENDENT_64B_BLOCKS(1);
-
-   if (rtex->surface.dcc_enabled) {
-   uint64_t dcc_offset = 
rtex->surface.level[level].dcc_offset;
-
-   surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + 
dcc_offset) >> 8;
-   }
+   surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + dcc_offset) 
>> 8;
}

if (rtex->fmask.size) {



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] radeonsi: properly check if DCC is enabled and allocated

2015-10-24 Thread Nicolai Hähnle

On 24.10.2015 17:49, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeon/r600_texture.c | 2 +-
  src/gallium/drivers/radeonsi/cik_sdma.c   | 2 +-
  src/gallium/drivers/radeonsi/si_blit.c| 6 +++---
  src/gallium/drivers/radeonsi/si_dma.c | 2 +-
  src/gallium/drivers/radeonsi/si_state.c   | 4 ++--
  5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index f7a11a2..40075ae 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1367,7 +1367,7 @@ void evergreen_do_fast_color_clear(struct 
r600_common_context *rctx,
continue;
}

-   if (tex->surface.dcc_enabled) {
+   if (tex->dcc_buffer) {
uint32_t reset_value;
bool clear_words_needed;

diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c 
b/src/gallium/drivers/radeonsi/cik_sdma.c
index 25fd09a..e53af1d 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -243,7 +243,7 @@ void cik_sdma_copy(struct pipe_context *ctx,
if (src->format != dst->format ||
rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 ||
(rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << 
dst_level) ||
-   rdst->surface.dcc_enabled || rsrc->surface.dcc_enabled) {
+   rdst->dcc_buffer || rsrc->dcc_buffer) {
goto fallback;
}

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index a226436..302b75c 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -326,7 +326,7 @@ void si_decompress_color_textures(struct si_context *sctx,
assert(view);

tex = (struct r600_texture *)view->texture;
-   assert(tex->cmask.size || tex->fmask.size || 
tex->surface.dcc_enabled);
+   assert(tex->cmask.size || tex->fmask.size || tex->dcc_buffer);

si_blit_decompress_color(>b.b, tex,
 view->u.tex.first_level, 
view->u.tex.last_level,
@@ -455,7 +455,7 @@ static void si_decompress_subresource(struct pipe_context 
*ctx,
si_blit_decompress_depth_in_place(sctx, rtex, true,
  level, level,
  first_layer, 
last_layer);
-   } else if (rtex->fmask.size || rtex->cmask.size || 
rtex->surface.dcc_enabled) {
+   } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_buffer) {
si_blit_decompress_color(ctx, rtex, level, level,
 first_layer, last_layer);
}
@@ -676,7 +676,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context 
*ctx,
dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D &&
!(dst->surface.flags & RADEON_SURF_SCANOUT) &&
(!dst->cmask.size || !dst->dirty_level_mask) && /* dst cannot be 
fast-cleared */
-   !dst->surface.dcc_enabled) {
+   !dst->dcc_buffer) {
si_blitter_begin(ctx, SI_COLOR_RESOLVE |
 (info->render_condition_enable ? 0 : 
SI_DISABLE_RENDER_COND));
util_blitter_custom_resolve_color(sctx->blitter,
diff --git a/src/gallium/drivers/radeonsi/si_dma.c 
b/src/gallium/drivers/radeonsi/si_dma.c
index 73c026c..581e89f 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -249,7 +249,7 @@ void si_dma_copy(struct pipe_context *ctx,
(rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << 
dst_level) ||
rdst->cmask.size || rdst->fmask.size ||
rsrc->cmask.size || rsrc->fmask.size ||
-   rdst->surface.dcc_enabled || rsrc->surface.dcc_enabled) {
+   rdst->dcc_buffer || rsrc->dcc_buffer) {
goto fallback;
}

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index c87f661..18b6405 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1926,7 +1926,7 @@ static void si_initialize_color_surface(struct si_context 
*sctx,
surf->cb_color_info = color_info;
surf->cb_color_attrib = color_attrib;

-   if (sctx->b.chip_class >= VI && rtex->surface.dcc_enabled) {
+   if (sctx->b.chip_class >= VI && rtex->dcc_buffer) {
unsigned max_uncompressed_block_size = 2;
uint64_t dcc_offset = rtex->surface.level[level].dcc_offset;

@@ -2655,7 +2655,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |

[Mesa-dev] [PATCH] mesa: clamp MaxLevel for immutable textures at initialization

2015-10-22 Thread Nicolai Hähnle
The same clamping already happens for glTexParameteri. This change
also fixes a bug in mipmap generation, see
https://bugs.freedesktop.org/show_bug.cgi?id=91993

piglit test cases have been submitted for review (as additions to
arb_texture_storage-texture-storage and arb_texture_view-max-level).
---
 src/mesa/main/textureview.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c
index 04b7d73..b6eaa77 100644
--- a/src/mesa/main/textureview.c
+++ b/src/mesa/main/textureview.c
@@ -408,6 +408,8 @@ _mesa_set_texture_view_state(struct gl_context *ctx,
   texObj->NumLayers = 6;
   break;
}
+
+   texObj->MaxLevel = MIN2(texObj->MaxLevel, texObj->ImmutableLevels - 1);
 }
 
 /**
@@ -680,6 +682,7 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint 
origtexture,
texObj->NumLayers = newViewNumLayers;
texObj->Immutable = GL_TRUE;
texObj->ImmutableLevels = origTexObj->ImmutableLevels;
+   texObj->MaxLevel = MIN2(texObj->MaxLevel, texObj->ImmutableLevels - 1);
texObj->Target = target;
texObj->TargetIndex = _mesa_tex_target_to_index(ctx, target);
assert(texObj->TargetIndex < NUM_TEXTURE_TARGETS);
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/mesa: fix mipmap generation for immutable textures with incomplete pyramids

2015-10-22 Thread Nicolai Hähnle
(This is an alternative to my previous patch, "mesa: clamp MaxLevel for
immutable textures at initialization"; this patch has no opinion about
how the spec should be interpreted.)

Without the clamping by NumLevels, the state tracker would reallocate the
texture storage (incorrect) and even fail to copy the base level image
after reallocation, leading to the graphical glitch of
https://bugs.freedesktop.org/show_bug.cgi?id=91993 .

A piglit test has been submitted for review as well (subtest of
arb_texture_storage-texture-storage).
---
 src/mesa/state_tracker/st_gen_mipmap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/state_tracker/st_gen_mipmap.c 
b/src/mesa/state_tracker/st_gen_mipmap.c
index 26e1c21..3125b2a 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -61,6 +61,8 @@ compute_num_levels(struct gl_context *ctx,
 
numLevels = texObj->BaseLevel + baseImage->MaxNumLevels;
numLevels = MIN2(numLevels, (GLuint) texObj->MaxLevel + 1);
+   if (texObj->Immutable)
+  numLevels = MIN2(numLevels, texObj->NumLevels);
assert(numLevels >= 1);
 
return numLevels;
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/40] pipe-loader: wire up the 'static' drm pipe-loader

2015-10-22 Thread Nicolai Hähnle

On 18.10.2015 00:57, Emil Velikov wrote:

Add a list of driver descriptors and select one from the list, during
probe time.

As we'll need to have all the driver pipe_foo_screen_create() functions
provided externally (i.e. from another static lib) we need a separate
(non-inline) drm_helper, which contains the function declarations.

XXX: More than happy to rename things - header/functions/etc.

Signed-off-by: Emil Velikov 
---
  src/gallium/auxiliary/pipe-loader/Makefile.am  |   6 +-
  .../auxiliary/pipe-loader/pipe_loader_drm.c| 119 -
  .../auxiliary/target-helpers/drm_helper_public.h   |  34 ++
  3 files changed, 154 insertions(+), 5 deletions(-)
  create mode 100644 src/gallium/auxiliary/target-helpers/drm_helper_public.h

diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am 
b/src/gallium/auxiliary/pipe-loader/Makefile.am
index 6a4a667..7db4190 100644
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -34,12 +34,12 @@ AM_CFLAGS += \
  libpipe_loader_static_la_SOURCES += \
$(DRM_SOURCES)

-libpipe_loader_dynamic_la_SOURCES += \
-   $(DRM_SOURCES)
-
  libpipe_loader_static_la_LIBADD = \
$(top_builddir)/src/loader/libloader.la

+libpipe_loader_dynamic_la_SOURCES += \
+   $(DRM_SOURCES)
+
  libpipe_loader_dynamic_la_LIBADD = \
$(top_builddir)/src/loader/libloader.la

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c 
b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index 33274de..97e9dcb 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -36,6 +36,7 @@
  #include 

  #include "loader.h"
+#include "target-helpers/drm_helper_public.h"
  #include "state_tracker/drm_driver.h"
  #include "pipe_loader_priv.h"

@@ -51,7 +52,9 @@
  struct pipe_loader_drm_device {
 struct pipe_loader_device base;
 const struct drm_driver_descriptor *dd;
+#ifndef GALLIUM_STATIC_TARGETS
 struct util_dl_library *lib;
+#endif
 int fd;
  };

@@ -59,6 +62,103 @@ struct pipe_loader_drm_device {

  static const struct pipe_loader_ops pipe_loader_drm_ops;

+#ifdef GALLIUM_STATIC_TARGETS
+static const struct drm_conf_ret throttle_ret = {
+   DRM_CONF_INT,
+   {2},
+};
+
+static const struct drm_conf_ret share_fd_ret = {
+   DRM_CONF_BOOL,
+   {true},
+};
+
+static inline const struct drm_conf_ret *
+configuration_query(enum drm_conf conf)
+{
+   switch (conf) {
+   case DRM_CONF_THROTTLE:
+  return _ret;
+   case DRM_CONF_SHARE_FD:
+  return _fd_ret;
+   default:
+  break;
+   }
+   return NULL;
+}
+
+static const struct drm_driver_descriptor driver_descriptors[] = {
+{
+.name = "i915",
+.driver_name = "i915",
+.create_screen = pipe_i915_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "i965",
+.driver_name = "i915",
+.create_screen = pipe_ilo_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "nouveau",
+.driver_name = "nouveau",
+.create_screen = pipe_nouveau_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "r300",
+.driver_name = "radeon",
+.create_screen = pipe_r300_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "r600",
+.driver_name = "radeon",
+.create_screen = pipe_r600_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "radeonsi",
+.driver_name = "radeon",
+.create_screen = pipe_radeonsi_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "vmwgfx",
+.driver_name = "vmwgfx",
+.create_screen = pipe_vmwgfx_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "kgsl",
+.driver_name = "freedreno",
+.create_screen = pipe_freedreno_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "msm",
+.driver_name = "freedreno",
+.create_screen = pipe_freedreno_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "vc4",
+.driver_name = "vc4",
+.create_screen = pipe_vc4_create_screen,
+.configuration = configuration_query,
+},


I believe these should be guarded by the respective #if 
defined(GALLIUM_xxx).


I see that in patch 25 (target-helpers: add a non-inline drm_helper.h) 
you change the pipe_XXX_create_screen functions so that they return NULL 
if the corresponding driver has not been configured.


However, using #if guards here instead is bound to provide a clearer 
distinction between the "create_screen failed" and "driver missing" 
failure modes.


Cheers,
Nicolai


+#ifdef USE_VC4_SIMULATOR
+{
+   

Re: [Mesa-dev] [PATCH] r600: Fix special negative immediate constants when using ABS modifier.

2015-10-26 Thread Nicolai Hähnle

Hi Ivan,

On 25.10.2015 02:00, Ivan Kalvachev wrote:

Some constants (like 1.0 and 0.5) could be inlined as immediate inputs
without using their literal value. The r600_bytecode_special_constants()
function emulates the negative of these constants by using NEG modifier.

However some shaders define -1.0 constant and want to use it as 1.0.
They do so by using ABS modifier. But r600_bytecode_special_constants()
set NEG in addition to ABS. Since NEG modifier have priority over ABS one,
we get -|1.0| as result, instead of |1.0|.

The patch simply prevents the additional switching of NEG when ABS is set.


Nice catch. Is there a simple test case (e.g. in piglit) that exposes 
the incorrect behavior?



Signed-off-by: Ivan Kalvachev 
---
  src/gallium/drivers/r600/r600_asm.c| 9 +
  src/gallium/drivers/r600/r600_shader.c | 2 +-
  2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c
b/src/gallium/drivers/r600/r600_asm.c
index bc69806..8fc622c 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -635,8 +635,9 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
 return 0;
  }

-void r600_bytecode_special_constants(uint32_t value, unsigned *sel,
unsigned *neg)
+void r600_bytecode_special_constants(uint32_t value, unsigned *sel,
unsigned *neg, unsigned abs)
  {
+


Please remove the extra whitespace line.

Cheers,
Nicolai


 switch(value) {
 case 0:
 *sel = V_SQ_ALU_SRC_0;
@@ -655,11 +656,11 @@ void r600_bytecode_special_constants(uint32_t
value, unsigned *sel, unsigned *ne
 break;
 case 0xBF80: /* -1.0f */
 *sel = V_SQ_ALU_SRC_1;
-   *neg ^= 1;
+   *neg ^= !abs;
 break;
 case 0xBF00: /* -0.5f */
 *sel = V_SQ_ALU_SRC_0_5;
-   *neg ^= 1;
+   *neg ^= !abs;
 break;
 default:
 *sel = V_SQ_ALU_SRC_LITERAL;
@@ -1208,7 +1209,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
 }
 if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
 r600_bytecode_special_constants(nalu->src[i].value,
-   >src[i].sel, >src[i].neg);
+   >src[i].sel, >src[i].neg,
nalu->src[i].abs);
 }
 if (nalu->dst.sel >= bc->ngpr) {
 bc->ngpr = nalu->dst.sel + 1;
diff --git a/src/gallium/drivers/r600/r600_shader.c
b/src/gallium/drivers/r600/r600_shader.c
index 8efe902..50c0329 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1008,7 +1008,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
 (tgsi_src->Register.SwizzleX ==
tgsi_src->Register.SwizzleW)) {

 index = tgsi_src->Register.Index * 4 +
tgsi_src->Register.SwizzleX;
-
r600_bytecode_special_constants(ctx->literals[index], _src->sel,
_src->neg);
+
r600_bytecode_special_constants(ctx->literals[index], _src->sel,
_src->neg, r600_src->abs);
 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
 return;
 }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] st/mesa: implement ARB_copy_image

2015-10-27 Thread Nicolai Hähnle

On 25.10.2015 18:25, Marek Olšák wrote:

+/**
+ * Handle complex format conversions using 2 blits with a temporary texture
+ * in between, e.g. blitting from B10G10R10A2 to G16R16.
+ *
+ * This example is implemented this way:
+ * 1) First, blit from B10G10R10A2 to R10G10B10A2, which is canonical, so it
+ *can be reinterpreted as a different canonical format of the same bpp,
+ *such as R16G16. This blit only swaps R and B 10-bit components.
+ * 2) Finnaly, blit the result, which is R10G10B10A2, as R16G16 to G16R16.
+ *This blit only swaps R and G 16-bit components.
+ */


Typo: Finally

Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] st/mesa: fix mipmap generation for immutable textures with incomplete pyramids

2015-10-28 Thread Nicolai Hähnle
Without the clamping by NumLevels, the state tracker would reallocate the
texture storage (incorrect) and even fail to copy the base level image
after reallocation, leading to the graphical glitch of
https://bugs.freedesktop.org/show_bug.cgi?id=91993 .

A piglit test has been submitted for review as well (subtest of
arb_texture_storage-texture-storage).

v2: also bypass all calls to st_finalize_texture (suggested by Marek Olšák)

Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Marek Olšák 
---
 src/mesa/state_tracker/st_gen_mipmap.c | 68 ++
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/src/mesa/state_tracker/st_gen_mipmap.c 
b/src/mesa/state_tracker/st_gen_mipmap.c
index 26e1c21..b370040 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -61,6 +61,8 @@ compute_num_levels(struct gl_context *ctx,
 
numLevels = texObj->BaseLevel + baseImage->MaxNumLevels;
numLevels = MIN2(numLevels, (GLuint) texObj->MaxLevel + 1);
+   if (texObj->Immutable)
+  numLevels = MIN2(numLevels, texObj->NumLevels);
assert(numLevels >= 1);
 
return numLevels;
@@ -99,38 +101,40 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
 */
stObj->lastLevel = lastLevel;
 
-   if (pt->last_level < lastLevel) {
-  /* The current gallium texture doesn't have space for all the
-   * mipmap levels we need to generate.  So allocate a new texture.
-   */
-  struct pipe_resource *oldTex = stObj->pt;
-
-  /* create new texture with space for more levels */
-  stObj->pt = st_texture_create(st,
-oldTex->target,
-oldTex->format,
-lastLevel,
-oldTex->width0,
-oldTex->height0,
-oldTex->depth0,
-oldTex->array_size,
-0,
-oldTex->bind);
-
-  /* This will copy the old texture's base image into the new texture
-   * which we just allocated.
-   */
-  st_finalize_texture(ctx, st->pipe, texObj);
-
-  /* release the old tex (will likely be freed too) */
-  pipe_resource_reference(, NULL);
-  st_texture_release_all_sampler_views(st, stObj);
-   }
-   else {
-  /* Make sure that the base texture image data is present in the
-   * texture buffer.
-   */
-  st_finalize_texture(ctx, st->pipe, texObj);
+   if (!texObj->Immutable) {
+  if (pt->last_level < lastLevel) {
+ /* The current gallium texture doesn't have space for all the
+ * mipmap levels we need to generate.  So allocate a new texture.
+ */
+ struct pipe_resource *oldTex = stObj->pt;
+
+ /* create new texture with space for more levels */
+ stObj->pt = st_texture_create(st,
+   oldTex->target,
+   oldTex->format,
+   lastLevel,
+   oldTex->width0,
+   oldTex->height0,
+   oldTex->depth0,
+   oldTex->array_size,
+   0,
+   oldTex->bind);
+
+ /* This will copy the old texture's base image into the new texture
+ * which we just allocated.
+ */
+ st_finalize_texture(ctx, st->pipe, texObj);
+
+ /* release the old tex (will likely be freed too) */
+ pipe_resource_reference(, NULL);
+ st_texture_release_all_sampler_views(st, stObj);
+  }
+  else {
+ /* Make sure that the base texture image data is present in the
+ * texture buffer.
+ */
+ st_finalize_texture(ctx, st->pipe, texObj);
+  }
}
 
pt = stObj->pt;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] st/mesa: fix mipmap generation for immutable textures with incomplete pyramids

2015-10-29 Thread Nicolai Hähnle

On 29.10.2015 14:13, Marek Olšák wrote:

On Wed, Oct 28, 2015 at 1:00 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

Without the clamping by NumLevels, the state tracker would reallocate the
texture storage (incorrect) and even fail to copy the base level image
after reallocation, leading to the graphical glitch of
https://bugs.freedesktop.org/show_bug.cgi?id=91993 .

A piglit test has been submitted for review as well (subtest of
arb_texture_storage-texture-storage).

v2: also bypass all calls to st_finalize_texture (suggested by Marek Olšák)

Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Marek Olšák <marek.ol...@amd.com>


This looks good.

(a minor nit: an updated patch should not contain any reviewed-by
tags, because the updated version hadn't been seen by anybody at the
time of sending it to the list; it's okay to keep the tag now that
I've reviewed it)


Sorry about that, I'll be more careful about that in the future.

Nicolai



Marek



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Patchwork/mesa-stable question (was: Re: [PATCH v3] r600g: Fix special negative immediate constants when using ABS modifier.)

2015-10-29 Thread Nicolai Hähnle

On 29.10.2015 10:24, Ivan Kalvachev wrote:
[snip]

On 10/29/15, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

On 29.10.2015 01:52, Ivan Kalvachev wrote:

On 10/26/15, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

On 25.10.2015 02:00, Ivan Kalvachev wrote:

Some constants (like 1.0 and 0.5) could be inlined as immediate inputs
without using their literal value. The
r600_bytecode_special_constants()
function emulates the negative of these constants by using NEG
modifier.

However some shaders define -1.0 constant and want to use it as 1.0.
They do so by using ABS modifier. But r600_bytecode_special_constants()
set NEG in addition to ABS. Since NEG modifier have priority over ABS
one,
we get -|1.0| as result, instead of |1.0|.

The patch simply prevents the additional switching of NEG when ABS is
set.


Nice catch. Is there a simple test case (e.g. in piglit) that exposes
the incorrect behavior?


Not that I know of.

I've located the bug investigating visual problem in Nine.
https://github.com/iXit/Mesa-3D/issues/126
https://github.com/iXit/Mesa-3D/issues/127

I also heard that it fixes artifacts in "Need for Speed: Undercover"
and "Skyrim", once again, when using Nine.


I see. I guess it's not too surprising that Nine creates shaders that
look a bit different from the Mesa statetracker's.

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

This should probably also go to stable.

Do you need somebody to push this for you or can you do it yourself?

Cheers,
Nicolai


Yes, please.
I'm not developer and I cannot push it myself.


I pushed the patch.

I am not familiar with patchwork yet and have a related question: on my 
push, I got the following error message related to patchwork:


remote: E: failed to find patch for rev 
f75f21a24ae2dd83507f3d4d8007f0fcfe6db802


Apparently, patchwork didn't pick up Ivan's v3 patch, perhaps because it 
wasn't inline. Is this something to worry about? Specifically, I believe 
the patch is a candidate for the stable branch, and I added the 
appropriate Cc: in the commit message. Does the message above prevent it 
from being picked up?


Sorry for the noise :/

Thanks!
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] r600g: Fix special negative immediate constants when using ABS modifier.

2015-10-29 Thread Nicolai Hähnle

On 29.10.2015 01:52, Ivan Kalvachev wrote:

-- Forwarded message --
From: Ivan Kalvachev <ikalvac...@gmail.com>
Date: Wed, 28 Oct 2015 23:46:44 +0200
Subject: [PATCH v3] r600g: Fix special negative immediate constants
when using ABS modifier.
To: Nicolai Hähnle <nhaeh...@gmail.com>

On 10/26/15, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

Hi Ivan,

On 25.10.2015 02:00, Ivan Kalvachev wrote:

Some constants (like 1.0 and 0.5) could be inlined as immediate inputs
without using their literal value. The r600_bytecode_special_constants()
function emulates the negative of these constants by using NEG modifier.

However some shaders define -1.0 constant and want to use it as 1.0.
They do so by using ABS modifier. But r600_bytecode_special_constants()
set NEG in addition to ABS. Since NEG modifier have priority over ABS
one,
we get -|1.0| as result, instead of |1.0|.

The patch simply prevents the additional switching of NEG when ABS is
set.


Nice catch. Is there a simple test case (e.g. in piglit) that exposes
the incorrect behavior?


Not that I know of.

I've located the bug investigating visual problem in Nine.
https://github.com/iXit/Mesa-3D/issues/126
https://github.com/iXit/Mesa-3D/issues/127

I also heard that it fixes artifacts in "Need for Speed: Undercover"
and "Skyrim", once again, when using Nine.


I see. I guess it's not too surprising that Nine creates shaders that 
look a bit different from the Mesa statetracker's.


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

This should probably also go to stable.

Do you need somebody to push this for you or can you do it yourself?

Cheers,
Nicolai


Signed-off-by: Ivan Kalvachev <ikalvac...@gmail.com>
---
   src/gallium/drivers/r600/r600_asm.c| 9 +
   src/gallium/drivers/r600/r600_shader.c | 2 +-
   2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c
b/src/gallium/drivers/r600/r600_asm.c
index bc69806..8fc622c 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -635,8 +635,9 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode
*bc,
  return 0;
   }

-void r600_bytecode_special_constants(uint32_t value, unsigned *sel,
unsigned *neg)
+void r600_bytecode_special_constants(uint32_t value, unsigned *sel,
unsigned *neg, unsigned abs)
   {
+


Please remove the extra whitespace line.

Cheers,
Nicolai



I'm attaching v3 of the patch. Same as v2, but without the extra empty line.

Best Regards



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] winsys/amdgpu: remove the dcc_enable surface flag

2015-10-26 Thread Nicolai Hähnle

On 26.10.2015 11:41, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

dcc_size is sufficient and doesn't need a further comment in my opinion.
---
  src/gallium/drivers/radeon/r600_texture.c  |  3 +--
  src/gallium/drivers/radeon/radeon_winsys.h |  1 -
  src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 13 ++---
  3 files changed, 7 insertions(+), 10 deletions(-)


Agreed, this is an even better solution.

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>



diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 789c66f..edfdfe3 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -641,9 +641,8 @@ r600_texture_create_object(struct pipe_screen *screen,
return NULL;
}
}
-   if (rtex->surface.dcc_enabled) {
+   if (rtex->surface.dcc_size)
vi_texture_alloc_dcc_separate(rscreen, rtex);
-   }
}

/* Now create the backing buffer. */
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index 0178643..8bf1e15 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -371,7 +371,6 @@ struct radeon_surf {

  uint64_tdcc_size;
  uint64_tdcc_alignment;
-booldcc_enabled;
  };

  struct radeon_bo_list_item {
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index b442174..3006bd1 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -251,7 +251,7 @@ static int compute_level(struct amdgpu_winsys *ws,

 surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize;

-   if (surf->dcc_enabled) {
+   if (AddrSurfInfoIn->flags.dccCompatible) {
AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
@@ -267,10 +267,11 @@ static int compute_level(struct amdgpu_winsys *ws,
   surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize;
   surf->dcc_alignment = MAX2(surf->dcc_alignment, 
AddrDccOut->dccRamBaseAlign);
} else {
- surf->dcc_enabled = false;
+ surf->dcc_size = 0;
   surf_level->dcc_offset = 0;
}
 } else {
+  surf->dcc_size = 0;
surf_level->dcc_offset = 0;
 }

@@ -354,10 +355,6 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
 AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = surf->nsamples;
 AddrSurfInfoIn.tileIndex = -1;

-   surf->dcc_enabled =  !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
-!(surf->flags & RADEON_SURF_SCANOUT) &&
-!compressed && AddrDccIn.numSamples <= 1;
-
 /* Set the micro tile type. */
 if (surf->flags & RADEON_SURF_SCANOUT)
AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
@@ -373,7 +370,9 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
 AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
 AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
 AddrSurfInfoIn.flags.degrade4Space = 1;
-   AddrSurfInfoIn.flags.dccCompatible = surf->dcc_enabled;
+   AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) 
&&
+!(surf->flags & RADEON_SURF_SCANOUT) &&
+!compressed && AddrDccIn.numSamples <= 
1;

 /* This disables incorrect calculations (hacks) in addrlib. */
 AddrSurfInfoIn.flags.noStencil = 1;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: clamp MaxLevel for immutable textures at initialization

2015-10-22 Thread Nicolai Hähnle

On 22.10.2015 15:57, Fredrik Höglund wrote:

On Thursday 22 October 2015, Nicolai Hähnle wrote:

The same clamping already happens for glTexParameteri. This change
also fixes a bug in mipmap generation, see
https://bugs.freedesktop.org/show_bug.cgi?id=91993


I don't think this patch is correct.  The ARB_texture_view specification
doesn't say that MaxLevel should be initialized to the value of
TEXTURE_IMMUTABLE_LEVELS, only that it's interpreted relative to
the view and not relative to the original data store.

Liam Middlebrook also pointed out recently that the clamping done
in glTexParameteri is in fact a bug:

http://lists.freedesktop.org/archives/piglit/2015-June/016342.html

The language in the specification that says that MaxLevel is clamped
when the texture is immutable applies to texture minification,
magnification, and texture completeness; not to gl*Tex*Parameter*.


Ugh. I was torn between those two interpretations. I suppose nobody was 
confident enough to change gl*Tex*Parameter* either ;)


Thinking more on this, there is also a problematic interaction between 
glTextureView and glGenerate*Mipmap when the view does not extend to the 
highest level in the underlying texture. Clearly, this part of the spec 
could use some cleanups.


Any chance of an "official" clarification? I did not find corresponding 
Issues in the corresponding extensions. What are non-Mesa drivers doing?


Cheers,
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/40] pipe-loader: wire up the 'static' drm pipe-loader

2015-10-22 Thread Nicolai Hähnle

On 22.10.2015 17:32, Emil Velikov wrote:

On 22 October 2015 at 15:07, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

On 18.10.2015 00:57, Emil Velikov wrote:


Add a list of driver descriptors and select one from the list, during
probe time.

As we'll need to have all the driver pipe_foo_screen_create() functions
provided externally (i.e. from another static lib) we need a separate
(non-inline) drm_helper, which contains the function declarations.

XXX: More than happy to rename things - header/functions/etc.

Signed-off-by: Emil Velikov <emil.l.veli...@gmail.com>
---
   src/gallium/auxiliary/pipe-loader/Makefile.am  |   6 +-
   .../auxiliary/pipe-loader/pipe_loader_drm.c| 119
-
   .../auxiliary/target-helpers/drm_helper_public.h   |  34 ++
   3 files changed, 154 insertions(+), 5 deletions(-)
   create mode 100644
src/gallium/auxiliary/target-helpers/drm_helper_public.h

diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am
b/src/gallium/auxiliary/pipe-loader/Makefile.am
index 6a4a667..7db4190 100644
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -34,12 +34,12 @@ AM_CFLAGS += \
   libpipe_loader_static_la_SOURCES += \
 $(DRM_SOURCES)

-libpipe_loader_dynamic_la_SOURCES += \
-   $(DRM_SOURCES)
-
   libpipe_loader_static_la_LIBADD = \
 $(top_builddir)/src/loader/libloader.la

+libpipe_loader_dynamic_la_SOURCES += \
+   $(DRM_SOURCES)
+
   libpipe_loader_dynamic_la_LIBADD = \
 $(top_builddir)/src/loader/libloader.la

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index 33274de..97e9dcb 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -36,6 +36,7 @@
   #include 

   #include "loader.h"
+#include "target-helpers/drm_helper_public.h"
   #include "state_tracker/drm_driver.h"
   #include "pipe_loader_priv.h"

@@ -51,7 +52,9 @@
   struct pipe_loader_drm_device {
  struct pipe_loader_device base;
  const struct drm_driver_descriptor *dd;
+#ifndef GALLIUM_STATIC_TARGETS
  struct util_dl_library *lib;
+#endif
  int fd;
   };

@@ -59,6 +62,103 @@ struct pipe_loader_drm_device {

   static const struct pipe_loader_ops pipe_loader_drm_ops;

+#ifdef GALLIUM_STATIC_TARGETS
+static const struct drm_conf_ret throttle_ret = {
+   DRM_CONF_INT,
+   {2},
+};
+
+static const struct drm_conf_ret share_fd_ret = {
+   DRM_CONF_BOOL,
+   {true},
+};
+
+static inline const struct drm_conf_ret *
+configuration_query(enum drm_conf conf)
+{
+   switch (conf) {
+   case DRM_CONF_THROTTLE:
+  return _ret;
+   case DRM_CONF_SHARE_FD:
+  return _fd_ret;
+   default:
+  break;
+   }
+   return NULL;
+}
+
+static const struct drm_driver_descriptor driver_descriptors[] = {
+{
+.name = "i915",
+.driver_name = "i915",
+.create_screen = pipe_i915_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "i965",
+.driver_name = "i915",
+.create_screen = pipe_ilo_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "nouveau",
+.driver_name = "nouveau",
+.create_screen = pipe_nouveau_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "r300",
+.driver_name = "radeon",
+.create_screen = pipe_r300_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "r600",
+.driver_name = "radeon",
+.create_screen = pipe_r600_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "radeonsi",
+.driver_name = "radeon",
+.create_screen = pipe_radeonsi_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "vmwgfx",
+.driver_name = "vmwgfx",
+.create_screen = pipe_vmwgfx_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "kgsl",
+.driver_name = "freedreno",
+.create_screen = pipe_freedreno_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "msm",
+.driver_name = "freedreno",
+.create_screen = pipe_freedreno_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "vc4",
+.driver_name = "vc4",
+.create_screen = pipe_vc4_create_screen,
+.configuration = configuration_query,
+},



I believe these should be guarded by the respective #if
defined(GALLIUM_xxx).

I see that in patch 25 (targ

[Mesa-dev] [PATCH 9/9] st/mesa: add support for batch driver queries to perfmon

2015-11-13 Thread Nicolai Hähnle
---
 src/mesa/state_tracker/st_cb_perfmon.c | 75 ++
 src/mesa/state_tracker/st_cb_perfmon.h |  6 +++
 2 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 6c71a13..078d2c4 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,7 +42,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned *batch = NULL;
unsigned num_active_counters = 0;
+   unsigned max_batch_counters = 0;
+   unsigned num_batch_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
@@ -50,6 +53,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,6 +65,8 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
   }
 
   num_active_counters += m->ActiveGroups[gid];
+  if (stg->has_batch)
+ max_batch_counters += m->ActiveGroups[gid];
}
 
stm->active_counters = CALLOC(num_active_counters,
@@ -68,6 +74,9 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
if (!stm->active_counters)
   return false;
 
+   if (max_batch_counters)
+  batch = CALLOC(max_batch_counters, sizeof(*batch));
+
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
@@ -79,13 +88,35 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  struct st_perf_counter_object *cntr =
 >active_counters[stm->num_active_counters];
 
- cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
+ if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
+cntr->batch_index = num_batch_counters;
+batch[num_batch_counters++] = stc->query_type;
+ } else {
+cntr->query = pipe->create_query(pipe, stc->query_type, 0);
+if (!cntr->query)
+   goto fail;
+ }
  ++stm->num_active_counters;
   }
}
+
+   /* Create the batch query. */
+   if (num_batch_counters) {
+  stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters,
+  batch);
+  stm->batch_result = CALLOC(num_batch_counters, 
sizeof(stm->batch_result->batch[0]));
+  if (!stm->batch_query || !stm->batch_result)
+ goto fail;
+   }
+
+   FREE(batch);
return true;
+
+fail:
+   FREE(batch);
+   return false;
 }
 
 static void
@@ -102,6 +133,13 @@ reset_perf_monitor(struct st_perf_monitor_object *stm,
FREE(stm->active_counters);
stm->active_counters = NULL;
stm->num_active_counters = 0;
+
+   if (stm->batch_query) {
+  pipe->destroy_query(pipe, stm->batch_query);
+  stm->batch_query = NULL;
+   }
+   FREE(stm->batch_result);
+   stm->batch_result = NULL;
 }
 
 static struct gl_perf_monitor_object *
@@ -140,9 +178,13 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Start the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  if (!pipe->begin_query(pipe, query))
+  if (query && !pipe->begin_query(pipe, query))
   goto fail;
}
+
+   if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query))
+  goto fail;
+
return true;
 
 fail:
@@ -161,8 +203,12 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Stop the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  pipe->end_query(pipe, query);
+  if (query)
+ pipe->end_query(pipe, query);
}
+
+   if (stm->batch_query)
+  pipe->end_query(pipe, stm->batch_query);
 }
 
 static void
@@ -196,11 +242,16 @@ st_IsPerfMonitorResultAvailable(struct gl_context *ctx,
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
   union pipe_query_result result;
-  if (!pipe->get_query_result(pipe, query, FALSE, )) {
+  if (query && !pipe->get_query_result(pipe, query, FALSE, )) {
  /* 

[Mesa-dev] [PATCH 2/9] gallium/hud: remove unused field in query_info

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/auxiliary/hud/hud_driver_query.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index f14305e..3198ab3 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -48,7 +48,6 @@ struct query_info {
/* Ring of queries. If a query is busy, we use another slot. */
struct pipe_query *query[NUM_QUERIES];
unsigned head, tail;
-   unsigned num_queries;
 
uint64_t last_time;
uint64_t results_cumulative;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Nicolai Hähnle
This was only used to implement an unnecessarily restrictive interpretation
of the spec of AMD_performance_monitor. The spec says

  A performance monitor consists of a number of hardware and software
  counters that can be sampled by the GPU and reported back to the
  application.

I guess one could take this as a requirement that counters _must_ be sampled
by the GPU, but then why are they called _software_ counters? Besides,
there's not much reason _not_ to expose all counters that are available,
and this simplifies the code.
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  3 ---
 src/gallium/include/pipe/p_defines.h  |  7 ---
 src/mesa/state_tracker/st_cb_perfmon.c| 30 ---
 3 files changed, 40 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f539210..a1d6162 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -200,7 +200,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
if (id == NVC0_HW_SM_QUERY_GROUP) {
   if (screen->compute) {
  info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 
  /* Because we can't expose the number of hardware counters needed for
   * each different query, we don't want to allow more than one active
@@ -224,7 +223,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
   if (screen->compute) {
  if (screen->base.class_3d < NVE4_3D_CLASS) {
 info->name = "Performance metrics";
-info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 info->max_active_queries = 1;
 info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
 return 1;
@@ -234,7 +232,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
   info->name = "Driver statistics";
-  info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
   info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   return 1;
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7240154..7f241c8 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -829,12 +829,6 @@ enum pipe_driver_query_type
PIPE_DRIVER_QUERY_TYPE_HZ   = 6,
 };
 
-enum pipe_driver_query_group_type
-{
-   PIPE_DRIVER_QUERY_GROUP_TYPE_CPU = 0,
-   PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1,
-};
-
 /* Whether an average value per frame or a cumulative value should be
  * displayed.
  */
@@ -864,7 +858,6 @@ struct pipe_driver_query_info
 struct pipe_driver_query_group_info
 {
const char *name;
-   enum pipe_driver_query_group_type type;
unsigned max_active_queries;
unsigned num_queries;
 };
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 1bb5be3..4ec6d86 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -65,27 +65,6 @@ find_query_type(struct pipe_screen *screen, const char *name)
return type;
 }
 
-/**
- * Return TRUE if the underlying driver expose GPU counters.
- */
-static bool
-has_gpu_counters(struct pipe_screen *screen)
-{
-   int num_groups, gid;
-
-   num_groups = screen->get_driver_query_group_info(screen, 0, NULL);
-   for (gid = 0; gid < num_groups; gid++) {
-  struct pipe_driver_query_group_info group_info;
-
-  if (!screen->get_driver_query_group_info(screen, gid, _info))
- continue;
-
-  if (group_info.type == PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- return true;
-   }
-   return false;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
@@ -313,12 +292,6 @@ st_init_perfmon(struct st_context *st)
if (!screen->get_driver_query_info || !screen->get_driver_query_group_info)
   return false;
 
-   if (!has_gpu_counters(screen)) {
-  /* According to the spec, GL_AMD_performance_monitor must only
-   * expose GPU counters. */
-  return false;
-   }
-
/* Get the number of available queries. */
num_counters = screen->get_driver_query_info(screen, 0, NULL);
if (!num_counters)
@@ -339,9 +312,6 @@ st_init_perfmon(struct st_context *st)
   if (!screen->get_driver_query_group_info(screen, gid, _info))
  continue;
 
-  if (group_info.type != PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- continue;
-
   g->Name = group_info.name;
   g->MaxActiveCounters = group_info.max_active_queries;
   g->NumCounters = 0;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/10] radeon: count cs dwords separately for query begin and end

2015-11-13 Thread Nicolai Hähnle
This will be important for perfcounter queries.
---
 src/gallium/drivers/radeon/r600_query.c | 33 +++--
 src/gallium/drivers/radeon/r600_query.h |  3 ++-
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 4f89634..f8a30a2 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -342,16 +342,18 @@ static struct pipe_query *r600_query_hw_create(struct 
r600_common_context *rctx,
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
query->result_size = 16 * rctx->max_db;
-   query->num_cs_dw = 6;
+   query->num_cs_dw_begin = 6;
+   query->num_cs_dw_end = 6;
break;
case PIPE_QUERY_TIME_ELAPSED:
query->result_size = 16;
-   query->num_cs_dw = 8;
+   query->num_cs_dw_begin = 8;
+   query->num_cs_dw_end = 8;
query->flags = R600_QUERY_HW_FLAG_TIMER;
break;
case PIPE_QUERY_TIMESTAMP:
query->result_size = 8;
-   query->num_cs_dw = 8;
+   query->num_cs_dw_end = 8;
query->flags = R600_QUERY_HW_FLAG_TIMER |
   R600_QUERY_HW_FLAG_NO_START;
break;
@@ -361,13 +363,15 @@ static struct pipe_query *r600_query_hw_create(struct 
r600_common_context *rctx,
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
query->result_size = 32;
-   query->num_cs_dw = 6;
+   query->num_cs_dw_begin = 6;
+   query->num_cs_dw_end = 6;
query->stream = index;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
/* 11 values on EG, 8 on R600. */
query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 
16;
-   query->num_cs_dw = 6;
+   query->num_cs_dw_begin = 6;
+   query->num_cs_dw_end = 6;
break;
default:
assert(0);
@@ -465,7 +469,9 @@ static void r600_query_hw_emit_start(struct 
r600_common_context *ctx,
 
r600_update_occlusion_query_state(ctx, query->b.type, 1);
r600_update_prims_generated_query_state(ctx, query->b.type, 1);
-   ctx->need_gfx_cs_space(>b, query->num_cs_dw * 2, TRUE);
+
+   ctx->need_gfx_cs_space(>b, query->num_cs_dw_begin + 
query->num_cs_dw_end,
+  TRUE);
 
/* Get a new query buffer if needed. */
if (query->buffer.results_end + query->result_size > 
query->buffer.buf->b.b.width0) {
@@ -482,10 +488,9 @@ static void r600_query_hw_emit_start(struct 
r600_common_context *ctx,
query->ops->emit_start(ctx, query, query->buffer.buf, va);
 
if (query->flags & R600_QUERY_HW_FLAG_TIMER)
-   ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
+   ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw_end;
else
-   ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
-
+   ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw_end;
 }
 
 static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
@@ -546,7 +551,7 @@ static void r600_query_hw_emit_stop(struct 
r600_common_context *ctx,
 
/* The queries which need begin already called this in begin_query. */
if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
-   ctx->need_gfx_cs_space(>b, query->num_cs_dw, FALSE);
+   ctx->need_gfx_cs_space(>b, query->num_cs_dw_end, FALSE);
}
 
/* emit end query */
@@ -558,9 +563,9 @@ static void r600_query_hw_emit_stop(struct 
r600_common_context *ctx,
 
if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) {
if (query->flags & R600_QUERY_HW_FLAG_TIMER)
-   ctx->num_cs_dw_timer_queries_suspend -= 
query->num_cs_dw;
+   ctx->num_cs_dw_timer_queries_suspend -= 
query->num_cs_dw_end;
else
-   ctx->num_cs_dw_nontimer_queries_suspend -= 
query->num_cs_dw;
+   ctx->num_cs_dw_nontimer_queries_suspend -= 
query->num_cs_dw_end;
}
 
r600_update_occlusion_query_state(ctx, query->b.type, -1);
@@ -980,14 +985,14 @@ static unsigned 
r600_queries_num_cs_dw_for_resuming(struct r600_common_context *
 
LIST_FOR_EACH_ENTRY(query, query_list, list) {
/* begin + end */
-   num_dw += query->num_cs_dw * 2;
+   num_dw += query->num_cs_dw_begin + query->num_cs_dw_end;
 
/* Workaround for the fact that
 * num_cs_dw_nontimer_queries_suspend is incremented for every
 * resumed query, which raises the bar in 

[Mesa-dev] [PATCH 08/10] radeon: implement r600_query_hw_get_result via function pointers

2015-11-13 Thread Nicolai Hähnle
We will need the clear_result override for the batch query implementation.
---
 src/gallium/drivers/radeon/r600_query.c | 189 +++-
 src/gallium/drivers/radeon/r600_query.h |   4 +
 2 files changed, 94 insertions(+), 99 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 4b201fd..59e2a58 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -307,11 +307,18 @@ static void r600_query_hw_do_emit_stop(struct 
r600_common_context *ctx,
   struct r600_query_hw *query,
   struct r600_resource *buffer,
   uint64_t va);
+static void r600_query_hw_add_result(struct r600_common_context *ctx,
+struct r600_query_hw *, void *buffer,
+union pipe_query_result *result);
+static void r600_query_hw_clear_result(struct r600_query_hw *,
+  union pipe_query_result *);
 
 static struct r600_query_hw_ops query_hw_default_hw_ops = {
.prepare_buffer = r600_query_hw_prepare_buffer,
.emit_start = r600_query_hw_do_emit_start,
.emit_stop = r600_query_hw_do_emit_stop,
+   .clear_result = r600_query_hw_clear_result,
+   .add_result = r600_query_hw_add_result,
 };
 
 static struct pipe_query *r600_query_hw_create(struct r600_common_context 
*rctx,
@@ -695,7 +702,7 @@ static void r600_query_hw_end(struct r600_common_context 
*rctx,
LIST_DELINIT(>list);
 }
 
-static unsigned r600_query_read_result(char *map, unsigned start_index, 
unsigned end_index,
+static unsigned r600_query_read_result(void *map, unsigned start_index, 
unsigned end_index,
   bool test_status_bit)
 {
uint32_t *current_result = (uint32_t*)map;
@@ -713,47 +720,36 @@ static unsigned r600_query_read_result(char *map, 
unsigned start_index, unsigned
return 0;
 }
 
-static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
-   struct r600_query_hw *query,
-   struct r600_query_buffer *qbuf,
-   boolean wait,
-   union pipe_query_result *result)
+static void r600_query_hw_add_result(struct r600_common_context *ctx,
+struct r600_query_hw *query,
+void *buffer,
+union pipe_query_result *result)
 {
-   unsigned results_base = 0;
-   char *map;
-
-   map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf,
-   PIPE_TRANSFER_READ |
-   (wait ? 0 : 
PIPE_TRANSFER_DONTBLOCK));
-   if (!map)
-   return FALSE;
-
-   /* count all results across all data blocks */
switch (query->b.type) {
-   case PIPE_QUERY_OCCLUSION_COUNTER:
-   while (results_base != qbuf->results_end) {
+   case PIPE_QUERY_OCCLUSION_COUNTER: {
+   unsigned results_base = 0;
+   while (results_base != query->result_size) {
result->u64 +=
-   r600_query_read_result(map + results_base, 0, 
2, true);
+   r600_query_read_result(buffer + results_base, 
0, 2, true);
results_base += 16;
}
break;
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-   while (results_base != qbuf->results_end) {
+   }
+   case PIPE_QUERY_OCCLUSION_PREDICATE: {
+   unsigned results_base = 0;
+   while (results_base != query->result_size) {
result->b = result->b ||
-   r600_query_read_result(map + results_base, 0, 
2, true) != 0;
+   r600_query_read_result(buffer + results_base, 
0, 2, true) != 0;
results_base += 16;
}
break;
+   }
case PIPE_QUERY_TIME_ELAPSED:
-   while (results_base != qbuf->results_end) {
-   result->u64 +=
-   r600_query_read_result(map + results_base, 0, 
2, false);
-   results_base += query->result_size;
-   }
+   result->u64 += r600_query_read_result(buffer, 0, 2, false);
break;
case PIPE_QUERY_TIMESTAMP:
{
-   uint32_t *current_result = (uint32_t*)map;
+   uint32_t *current_result = (uint32_t*)buffer;
result->u64 = (uint64_t)current_result[0] |
  (uint64_t)current_result[1] << 32;
break;
@@ 

[Mesa-dev] [PATCH 00/10] radeon: cleanup and refactor the query implementation

2015-11-13 Thread Nicolai Hähnle
Hi,

in preparation for performance counters, this series makes the implementation
of queries pluggable, and separates query buffer handling from CS emit and
result collection for hardware queries.

Aside from two PIPE_QUERY_GPU_FINISHED-related fixes (using context flush,
picked up from Marek, and fixing a fence leak), this should not affect the
feature set in any way.

Please review!

Thanks,
Nicolai
---
 Makefile.sources   |1 
 r600_pipe_common.c |   46 --
 r600_pipe_common.h |   16 
 r600_query.c   | 1014 ++---
 r600_query.h   |  139 +++
 5 files changed, 734 insertions(+), 482 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/10] radeon: convert software queries to the new style

2015-11-13 Thread Nicolai Hähnle
Software queries are all queries that do not require suspend/resume
and explicit handling of result buffers.

Note that this fixes a fence leak with PIPE_QUERY_GPU_FINISHED, and it
contains Marek's fix to GPU_FINISHED's end_query() handling.
---
 src/gallium/drivers/radeon/r600_query.c | 366 +---
 1 file changed, 194 insertions(+), 172 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index fdab8e3..c7350f1 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -51,15 +51,195 @@ struct r600_query {
unsignednum_cs_dw;
/* linked list of queries */
struct list_headlist;
-   /* for custom non-GPU queries */
+   /* For transform feedback: which stream the query is for */
+   unsigned stream;
+};
+
+/* Queries without buffer handling or suspend/resume. */
+struct r600_query_sw {
+   struct r600_query b;
+
uint64_t begin_result;
uint64_t end_result;
/* Fence for GPU_FINISHED. */
struct pipe_fence_handle *fence;
-   /* For transform feedback: which stream the query is for */
-   unsigned stream;
 };
 
+static void r600_query_sw_destroy(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+   struct pipe_screen *screen = rctx->b.screen;
+   struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+   screen->fence_reference(screen, >fence, NULL);
+   FREE(query);
+}
+
+static enum radeon_value_id winsys_id_from_type(unsigned type)
+{
+   switch (type) {
+   case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
+   case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
+   case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
+   case R600_QUERY_NUM_CS_FLUSHES: return RADEON_NUM_CS_FLUSHES;
+   case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
+   case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
+   case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
+   case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
+   case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
+   case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
+   default: unreachable("query type does not correspond to winsys id");
+   }
+}
+
+static boolean r600_query_sw_begin(struct r600_common_context *rctx,
+  struct r600_query *rquery)
+{
+   struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+   switch(query->b.type) {
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   case PIPE_QUERY_GPU_FINISHED:
+   break;
+   case R600_QUERY_DRAW_CALLS:
+   query->begin_result = rctx->num_draw_calls;
+   break;
+   case R600_QUERY_REQUESTED_VRAM:
+   case R600_QUERY_REQUESTED_GTT:
+   case R600_QUERY_VRAM_USAGE:
+   case R600_QUERY_GTT_USAGE:
+   case R600_QUERY_GPU_TEMPERATURE:
+   case R600_QUERY_CURRENT_GPU_SCLK:
+   case R600_QUERY_CURRENT_GPU_MCLK:
+   query->begin_result = 0;
+   break;
+   case R600_QUERY_BUFFER_WAIT_TIME:
+   case R600_QUERY_NUM_CS_FLUSHES:
+   case R600_QUERY_NUM_BYTES_MOVED: {
+   enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+   query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+   break;
+   }
+   case R600_QUERY_GPU_LOAD:
+   query->begin_result = r600_gpu_load_begin(rctx->screen);
+   break;
+   case R600_QUERY_NUM_COMPILATIONS:
+   query->begin_result = 
p_atomic_read(>screen->num_compilations);
+   break;
+   case R600_QUERY_NUM_SHADERS_CREATED:
+   query->begin_result = 
p_atomic_read(>screen->num_shaders_created);
+   break;
+   default:
+   unreachable("r600_query_sw_begin: bad query type");
+   }
+
+   return TRUE;
+}
+
+static void r600_query_sw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+   struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+   switch(query->b.type) {
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   break;
+   case PIPE_QUERY_GPU_FINISHED:
+   rctx->b.flush(>b, >fence, 0);
+   break;
+   case R600_QUERY_DRAW_CALLS:
+   query->begin_result = rctx->num_draw_calls;
+   break;
+   case R600_QUERY_REQUESTED_VRAM:
+   case R600_QUERY_REQUESTED_GTT:
+   case R600_QUERY_VRAM_USAGE:
+   case R600_QUERY_GTT_USAGE:
+   case R600_QUERY_GPU_TEMPERATURE:
+   case R600_QUERY_CURRENT_GPU_SCLK:
+   case R600_QUERY_CURRENT_GPU_MCLK:
+   case 

[Mesa-dev] [PATCH 04/10] radeon: add query handler function pointers

2015-11-13 Thread Nicolai Hähnle
The goal here is to be able to move the implementation details of hardware-
specific queries (in particular, performance counters) out of the common code.
---
 src/gallium/drivers/radeon/r600_query.c | 73 +
 src/gallium/drivers/radeon/r600_query.h | 16 
 2 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index b79d2d0..fdab8e3 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -26,7 +26,6 @@
 #include "r600_cs.h"
 #include "util/u_memory.h"
 
-
 struct r600_query_buffer {
/* The buffer where query results are stored. */
struct r600_resource*buf;
@@ -39,6 +38,8 @@ struct r600_query_buffer {
 };
 
 struct r600_query {
+   struct r600_query_ops *ops;
+
/* The query buffer and how many results are in it. */
struct r600_query_bufferbuffer;
/* The type of query */
@@ -59,6 +60,23 @@ struct r600_query {
unsigned stream;
 };
 
+static void r600_do_destroy_query(struct r600_common_context *, struct 
r600_query *);
+static boolean r600_do_begin_query(struct r600_common_context *, struct 
r600_query *);
+static void r600_do_end_query(struct r600_common_context *, struct r600_query 
*);
+static boolean r600_do_get_query_result(struct r600_common_context *,
+   struct r600_query *, boolean wait,
+   union pipe_query_result *result);
+static void r600_do_render_condition(struct r600_common_context *,
+struct r600_query *, boolean condition,
+uint mode);
+
+static struct r600_query_ops legacy_query_ops = {
+   .destroy = r600_do_destroy_query,
+   .begin = r600_do_begin_query,
+   .end = r600_do_end_query,
+   .get_result = r600_do_get_query_result,
+   .render_condition = r600_do_render_condition,
+};
 
 static bool r600_is_timer_query(unsigned type)
 {
@@ -366,6 +384,7 @@ static struct pipe_query *r600_create_query(struct 
pipe_context *ctx, unsigned q
return NULL;
 
query->type = query_type;
+   query->ops = _query_ops;
 
switch (query_type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -373,7 +392,6 @@ static struct pipe_query *r600_create_query(struct 
pipe_context *ctx, unsigned q
query->result_size = 16 * rctx->max_db;
query->num_cs_dw = 6;
break;
-   break;
case PIPE_QUERY_TIME_ELAPSED:
query->result_size = 16;
query->num_cs_dw = 8;
@@ -433,7 +451,15 @@ static struct pipe_query *r600_create_query(struct 
pipe_context *ctx, unsigned q
 
 static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query 
*query)
 {
-   struct r600_query *rquery = (struct r600_query*)query;
+   struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+   struct r600_query *rquery = (struct r600_query *)query;
+
+   rquery->ops->destroy(rctx, rquery);
+}
+
+static void r600_do_destroy_query(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
struct r600_query_buffer *prev = rquery->buffer.previous;
 
/* Release all query buffers. */
@@ -445,7 +471,7 @@ static void r600_destroy_query(struct pipe_context *ctx, 
struct pipe_query *quer
}
 
pipe_resource_reference((struct pipe_resource**)>buffer.buf, 
NULL);
-   FREE(query);
+   FREE(rquery);
 }
 
 static boolean r600_begin_query(struct pipe_context *ctx,
@@ -453,6 +479,13 @@ static boolean r600_begin_query(struct pipe_context *ctx,
 {
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
+
+   return rquery->ops->begin(rctx, rquery);
+}
+
+static boolean r600_do_begin_query(struct r600_common_context *rctx,
+  struct r600_query *rquery)
+{
struct r600_query_buffer *prev = rquery->buffer.previous;
 
if (!r600_query_needs_begin(rquery->type)) {
@@ -528,6 +561,12 @@ static void r600_end_query(struct pipe_context *ctx, 
struct pipe_query *query)
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
 
+   rquery->ops->end(rctx, rquery);
+}
+
+static void r600_do_end_query(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
/* Non-GPU queries. */
switch (rquery->type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
@@ -792,11 +831,19 @@ static boolean r600_get_query_buffer_result(struct 
r600_common_context *ctx,
 }
 
 static boolean r600_get_query_result(struct pipe_context *ctx,
-   

[Mesa-dev] [PATCH 01/10] radeon: move get_driver_query_info to r600_query.c

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 46 +
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
 src/gallium/drivers/radeon/r600_query.c   | 49 +++
 3 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 79e624e..41acfbc 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -737,50 +737,6 @@ static uint64_t r600_get_timestamp(struct pipe_screen 
*screen)
rscreen->info.r600_clock_crystal_freq;
 }
 
-static int r600_get_driver_query_info(struct pipe_screen *screen,
- unsigned index,
- struct pipe_driver_query_info *info)
-{
-   struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-   struct pipe_driver_query_info list[] = {
-   {"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, 
PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, 
PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
-   {"requested-VRAM", R600_QUERY_REQUESTED_VRAM, 
{rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"requested-GTT", R600_QUERY_REQUESTED_GTT, 
{rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, 
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
-   {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, 
PIPE_DRIVER_QUERY_TYPE_BYTES,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"VRAM-usage", R600_QUERY_VRAM_USAGE, 
{rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, 
PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"GPU-load", R600_QUERY_GPU_LOAD, {100}},
-   {"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
-   {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, 
PIPE_DRIVER_QUERY_TYPE_HZ},
-   {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, 
PIPE_DRIVER_QUERY_TYPE_HZ},
-   };
-   unsigned num_queries;
-
-   if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
-   num_queries = Elements(list);
-   else if (rscreen->info.drm_major == 3)
-   num_queries = Elements(list) - 3;
-   else
-   num_queries = Elements(list) - 4;
-
-   if (!info)
-   return num_queries;
-
-   if (index >= num_queries)
-   return 0;
-
-   *info = list[index];
-   return 1;
-}
-
 static void r600_fence_reference(struct pipe_screen *screen,
 struct pipe_fence_handle **dst,
 struct pipe_fence_handle *src)
@@ -968,7 +924,6 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
rscreen->b.get_device_vendor = r600_get_device_vendor;
rscreen->b.get_compute_param = r600_get_compute_param;
rscreen->b.get_paramf = r600_get_paramf;
-   rscreen->b.get_driver_query_info = r600_get_driver_query_info;
rscreen->b.get_timestamp = r600_get_timestamp;
rscreen->b.fence_finish = r600_fence_finish;
rscreen->b.fence_reference = r600_fence_reference;
@@ -984,6 +939,7 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
}
 
r600_init_screen_texture_functions(rscreen);
+   r600_init_screen_query_functions(rscreen);
 
rscreen->ws = ws;
rscreen->family = rscreen->info.family;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index b7f1a23..d2c54f3 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -534,6 +534,7 @@ uint64_t r600_gpu_load_begin(struct r600_common_screen 
*rscreen);
 unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
 
 /* r600_query.c */
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
 void r600_query_init(struct r600_common_context *rctx);
 void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
 void r600_resume_nontimer_queries(struct r600_common_context *ctx);
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 9a54025..8aa8774 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1017,6 +1017,50 @@ err:
return;
 }
 

[Mesa-dev] [PATCH 06/10] radeon: convert hardware queries to the new style

2015-11-13 Thread Nicolai Hähnle
Move r600_query and r600_query_hw into the header because we will want to
reuse the buffer handling and suspend/resume logic outside of the common
radeon code.
---
 src/gallium/drivers/radeon/r600_query.c | 281 +++-
 src/gallium/drivers/radeon/r600_query.h |  39 +
 2 files changed, 172 insertions(+), 148 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index c7350f1..eb2a563 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -26,35 +26,6 @@
 #include "r600_cs.h"
 #include "util/u_memory.h"
 
-struct r600_query_buffer {
-   /* The buffer where query results are stored. */
-   struct r600_resource*buf;
-   /* Offset of the next free result after current query data */
-   unsignedresults_end;
-   /* If a query buffer is full, a new buffer is created and the old one
-* is put in here. When we calculate the result, we sum up the samples
-* from all buffers. */
-   struct r600_query_buffer*previous;
-};
-
-struct r600_query {
-   struct r600_query_ops *ops;
-
-   /* The query buffer and how many results are in it. */
-   struct r600_query_bufferbuffer;
-   /* The type of query */
-   unsignedtype;
-   /* Size of the result in memory for both begin_query and end_query,
-* this can be one or two numbers, or it could even be a size of a 
structure. */
-   unsignedresult_size;
-   /* The number of dwords for begin_query or end_query. */
-   unsignednum_cs_dw;
-   /* linked list of queries */
-   struct list_headlist;
-   /* For transform feedback: which stream the query is for */
-   unsigned stream;
-};
-
 /* Queries without buffer handling or suspend/resume. */
 struct r600_query_sw {
struct r600_query b;
@@ -240,23 +211,23 @@ static struct pipe_query *r600_query_sw_create(struct 
pipe_context *ctx,
return (struct pipe_query *)query;
 }
 
-static void r600_do_destroy_query(struct r600_common_context *, struct 
r600_query *);
-static boolean r600_do_begin_query(struct r600_common_context *, struct 
r600_query *);
-static void r600_do_end_query(struct r600_common_context *, struct r600_query 
*);
-static boolean r600_do_get_query_result(struct r600_common_context *,
-   struct r600_query *, boolean wait,
-   union pipe_query_result *result);
-static void r600_do_render_condition(struct r600_common_context *,
-struct r600_query *, boolean condition,
-uint mode);
+void r600_query_hw_destroy(struct r600_common_context *rctx,
+  struct r600_query *rquery)
+{
+   struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+   struct r600_query_buffer *prev = query->buffer.previous;
 
-static struct r600_query_ops legacy_query_ops = {
-   .destroy = r600_do_destroy_query,
-   .begin = r600_do_begin_query,
-   .end = r600_do_end_query,
-   .get_result = r600_do_get_query_result,
-   .render_condition = r600_do_render_condition,
-};
+   /* Release all query buffers. */
+   while (prev) {
+   struct r600_query_buffer *qbuf = prev;
+   prev = prev->previous;
+   pipe_resource_reference((struct pipe_resource**)>buf, 
NULL);
+   FREE(qbuf);
+   }
+
+   pipe_resource_reference((struct pipe_resource**)>buffer.buf, 
NULL);
+   FREE(rquery);
+}
 
 static bool r600_is_timer_query(unsigned type)
 {
@@ -317,6 +288,77 @@ static struct r600_resource *r600_new_query_buffer(struct 
r600_common_context *c
return buf;
 }
 
+static boolean r600_query_hw_begin(struct r600_common_context *, struct 
r600_query *);
+static void r600_query_hw_end(struct r600_common_context *, struct r600_query 
*);
+static boolean r600_query_hw_get_result(struct r600_common_context *,
+   struct r600_query *, boolean wait,
+   union pipe_query_result *result);
+static void r600_do_render_condition(struct r600_common_context *,
+struct r600_query *, boolean condition,
+uint mode);
+
+static struct r600_query_ops query_hw_ops = {
+   .destroy = r600_query_hw_destroy,
+   .begin = r600_query_hw_begin,
+   .end = r600_query_hw_end,
+   .get_result = r600_query_hw_get_result,
+   .render_condition = r600_do_render_condition,
+};
+
+static struct pipe_query *r600_query_hw_create(struct r600_common_context 
*rctx,
+  unsigned query_type,
+   

[Mesa-dev] [PATCH 07/10] radeon: split hw query buffer handling from cs emit

2015-11-13 Thread Nicolai Hähnle
The idea here is that driver queries implemented outside of common code
will use the same query buffer handling with different logic for starting
and stopping the corresponding counters.
---
 src/gallium/drivers/radeon/r600_query.c | 198 +++-
 src/gallium/drivers/radeon/r600_query.h |  20 
 2 files changed, 135 insertions(+), 83 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index eb2a563..4b201fd 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -229,21 +229,10 @@ void r600_query_hw_destroy(struct r600_common_context 
*rctx,
FREE(rquery);
 }
 
-static bool r600_is_timer_query(unsigned type)
+static struct r600_resource *r600_new_query_buffer(struct r600_common_context 
*ctx,
+  struct r600_query_hw *query)
 {
-   return type == PIPE_QUERY_TIME_ELAPSED ||
-  type == PIPE_QUERY_TIMESTAMP;
-}
-
-static bool r600_query_needs_begin(unsigned type)
-{
-   return type != PIPE_QUERY_TIMESTAMP;
-}
-
-static struct r600_resource *r600_new_query_buffer(struct r600_common_context 
*ctx, unsigned type)
-{
-   unsigned j, i, num_results, buf_size = 4096;
-   uint32_t *results;
+   unsigned buf_size = 4096;
 
/* Queries are normally read by the CPU after
 * being written by the gpu, hence staging is probably a good
@@ -253,14 +242,34 @@ static struct r600_resource *r600_new_query_buffer(struct 
r600_common_context *c
pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM,
   PIPE_USAGE_STAGING, buf_size);
 
-   switch (type) {
-   case PIPE_QUERY_OCCLUSION_COUNTER:
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-   results = r600_buffer_map_sync_with_rings(ctx, buf, 
PIPE_TRANSFER_WRITE);
-   memset(results, 0, buf_size);
+   if (query->ops->prepare_buffer)
+   query->ops->prepare_buffer(ctx, query, buf);
+
+   return buf;
+}
+
+static void r600_query_hw_prepare_buffer(struct r600_common_context *ctx,
+struct r600_query_hw *query,
+struct r600_resource *buffer)
+ {
+   uint32_t *results;
+
+   if (query->b.type == PIPE_QUERY_TIME_ELAPSED ||
+   query->b.type == PIPE_QUERY_TIMESTAMP)
+   return;
+
+   results = r600_buffer_map_sync_with_rings(ctx, buffer,
+ PIPE_TRANSFER_WRITE);
+
+   memset(results, 0, buffer->b.b.width0);
+
+   if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
+   query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+   unsigned num_results;
+   unsigned i, j;
 
/* Set top bits for unused backends. */
-   num_results = buf_size / (16 * ctx->max_db);
+   num_results = buffer->b.b.width0 / (16 * ctx->max_db);
for (j = 0; j < num_results; j++) {
for (i = 0; i < ctx->max_db; i++) {
if (!(ctx->backend_mask & (1<max_db;
}
-   break;
-   case PIPE_QUERY_TIME_ELAPSED:
-   case PIPE_QUERY_TIMESTAMP:
-   break;
-   case PIPE_QUERY_PRIMITIVES_EMITTED:
-   case PIPE_QUERY_PRIMITIVES_GENERATED:
-   case PIPE_QUERY_SO_STATISTICS:
-   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-   case PIPE_QUERY_PIPELINE_STATISTICS:
-   results = r600_buffer_map_sync_with_rings(ctx, buf, 
PIPE_TRANSFER_WRITE);
-   memset(results, 0, buf_size);
-   break;
-   default:
-   assert(0);
}
-   return buf;
 }
 
 static boolean r600_query_hw_begin(struct r600_common_context *, struct 
r600_query *);
@@ -305,6 +299,21 @@ static struct r600_query_ops query_hw_ops = {
.render_condition = r600_do_render_condition,
 };
 
+static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
+   struct r600_query_hw *query,
+   struct r600_resource *buffer,
+   uint64_t va);
+static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
+  struct r600_query_hw *query,
+  struct r600_resource *buffer,
+  uint64_t va);
+
+static struct r600_query_hw_ops query_hw_default_hw_ops = {
+   .prepare_buffer = r600_query_hw_prepare_buffer,
+   .emit_start = r600_query_hw_do_emit_start,
+   .emit_stop = r600_query_hw_do_emit_stop,
+};
+
 static struct pipe_query 

[Mesa-dev] [PATCH 09/10] radeon: expose r600_query_hw functions for reuse

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/drivers/radeon/r600_query.c | 30 +-
 src/gallium/drivers/radeon/r600_query.h | 10 ++
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 59e2a58..4f89634 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -282,11 +282,6 @@ static void r600_query_hw_prepare_buffer(struct 
r600_common_context *ctx,
}
 }
 
-static boolean r600_query_hw_begin(struct r600_common_context *, struct 
r600_query *);
-static void r600_query_hw_end(struct r600_common_context *, struct r600_query 
*);
-static boolean r600_query_hw_get_result(struct r600_common_context *,
-   struct r600_query *, boolean wait,
-   union pipe_query_result *result);
 static void r600_do_render_condition(struct r600_common_context *,
 struct r600_query *, boolean condition,
 uint mode);
@@ -321,6 +316,16 @@ static struct r600_query_hw_ops query_hw_default_hw_ops = {
.add_result = r600_query_hw_add_result,
 };
 
+boolean r600_query_hw_init(struct r600_common_context *rctx,
+  struct r600_query_hw *query)
+{
+   query->buffer.buf = r600_new_query_buffer(rctx, query);
+   if (!query->buffer.buf)
+   return FALSE;
+
+   return TRUE;
+}
+
 static struct pipe_query *r600_query_hw_create(struct r600_common_context 
*rctx,
   unsigned query_type,
   unsigned index)
@@ -370,8 +375,7 @@ static struct pipe_query *r600_query_hw_create(struct 
r600_common_context *rctx,
return NULL;
}
 
-   query->buffer.buf = r600_new_query_buffer(rctx, query);
-   if (!query->buffer.buf) {
+   if (!r600_query_hw_init(rctx, query)) {
FREE(query);
return NULL;
}
@@ -645,8 +649,8 @@ static boolean r600_begin_query(struct pipe_context *ctx,
return rquery->ops->begin(rctx, rquery);
 }
 
-static boolean r600_query_hw_begin(struct r600_common_context *rctx,
-  struct r600_query *rquery)
+boolean r600_query_hw_begin(struct r600_common_context *rctx,
+   struct r600_query *rquery)
 {
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
struct r600_query_buffer *prev = query->buffer.previous;
@@ -691,7 +695,7 @@ static void r600_end_query(struct pipe_context *ctx, struct 
pipe_query *query)
rquery->ops->end(rctx, rquery);
 }
 
-static void r600_query_hw_end(struct r600_common_context *rctx,
+void r600_query_hw_end(struct r600_common_context *rctx,
  struct r600_query *rquery)
 {
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
@@ -858,9 +862,9 @@ static void r600_query_hw_clear_result(struct r600_query_hw 
*query,
util_query_clear_result(result, query->b.type);
 }
 
-static boolean r600_query_hw_get_result(struct r600_common_context *rctx,
-   struct r600_query *rquery,
-   boolean wait, union pipe_query_result 
*result)
+boolean r600_query_hw_get_result(struct r600_common_context *rctx,
+struct r600_query *rquery,
+boolean wait, union pipe_query_result *result)
 {
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
struct r600_query_buffer *qbuf;
diff --git a/src/gallium/drivers/radeon/r600_query.h 
b/src/gallium/drivers/radeon/r600_query.h
index 17a9da3..4e357f5 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -122,7 +122,17 @@ struct r600_query_hw {
unsigned stream;
 };
 
+boolean r600_query_hw_init(struct r600_common_context *rctx,
+  struct r600_query_hw *query);
 void r600_query_hw_destroy(struct r600_common_context *rctx,
   struct r600_query *rquery);
+boolean r600_query_hw_begin(struct r600_common_context *rctx,
+   struct r600_query *rquery);
+void r600_query_hw_end(struct r600_common_context *rctx,
+  struct r600_query *rquery);
+boolean r600_query_hw_get_result(struct r600_common_context *rctx,
+struct r600_query *rquery,
+boolean wait,
+union pipe_query_result *result);
 
 #endif /* R600_QUERY_H */
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/10] radeon: cleanup driver query list

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/drivers/radeon/r600_query.c | 84 +
 1 file changed, 55 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 8aa8774..60381b2 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1017,39 +1017,50 @@ err:
return;
 }
 
+#define X(name_, query_type_, type_, result_type_) \
+   { \
+   .name = name_, \
+   .query_type = R600_QUERY_##query_type_, \
+   .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
+   .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
+   .group_id = ~(unsigned)0 \
+   }
+
+static struct pipe_driver_query_info r600_driver_query_list[] = {
+   X("num-compilations",   NUM_COMPILATIONS,   UINT64, 
CUMULATIVE),
+   X("num-shaders-created",NUM_SHADERS_CREATED,UINT64, 
CUMULATIVE),
+   X("draw-calls", DRAW_CALLS, UINT64, 
CUMULATIVE),
+   X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
+   X("requested-GTT",  REQUESTED_GTT,  BYTES, AVERAGE),
+   X("buffer-wait-time",   BUFFER_WAIT_TIME,   MICROSECONDS, 
CUMULATIVE),
+   X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, 
CUMULATIVE),
+   X("num-bytes-moved",NUM_BYTES_MOVED,BYTES, 
CUMULATIVE),
+   X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
+   X("GTT-usage",  GTT_USAGE,  BYTES, AVERAGE),
+   X("GPU-load",   GPU_LOAD,   UINT64, 
AVERAGE),
+   X("temperature",GPU_TEMPERATURE,UINT64, 
AVERAGE),
+   X("shader-clock",   CURRENT_GPU_SCLK,   HZ, AVERAGE),
+   X("memory-clock",   CURRENT_GPU_MCLK,   HZ, AVERAGE),
+};
+
+#undef X
+
+static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
+{
+   if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
+   return Elements(r600_driver_query_list);
+   else if (rscreen->info.drm_major == 3)
+   return Elements(r600_driver_query_list) - 3;
+   else
+   return Elements(r600_driver_query_list) - 4;
+}
+
 static int r600_get_driver_query_info(struct pipe_screen *screen,
  unsigned index,
  struct pipe_driver_query_info *info)
 {
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-   struct pipe_driver_query_info list[] = {
-   {"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, 
PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, 
PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
-   {"requested-VRAM", R600_QUERY_REQUESTED_VRAM, 
{rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"requested-GTT", R600_QUERY_REQUESTED_GTT, 
{rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, 
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
-   {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, 
PIPE_DRIVER_QUERY_TYPE_BYTES,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"VRAM-usage", R600_QUERY_VRAM_USAGE, 
{rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, 
PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"GPU-load", R600_QUERY_GPU_LOAD, {100}},
-   {"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
-   {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, 
PIPE_DRIVER_QUERY_TYPE_HZ},
-   {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, 
PIPE_DRIVER_QUERY_TYPE_HZ},
-   };
-   unsigned num_queries;
-
-   if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
-   num_queries = Elements(list);
-   else if (rscreen->info.drm_major == 3)
-   num_queries = Elements(list) - 3;
-   else
-   num_queries = Elements(list) - 4;
+   unsigned num_queries = r600_get_num_queries(rscreen);
 
if (!info)
return num_queries;
@@ -1057,7 +1068,22 @@ static int r600_get_driver_query_info(struct pipe_screen 
*screen,
if (index >= num_queries)
return 0;
 
-   *info = list[index];
+   *info = r600_driver_query_list[index];
+
+   switch 

[Mesa-dev] [PATCH 03/10] radeon: move R600_QUERY_* constants into a new query header file

2015-11-13 Thread Nicolai Hähnle
More query-related structures will have to be moved into their own
header file to support hardware-specific performance counters.
---
 src/gallium/drivers/radeon/Makefile.sources   |  1 +
 src/gallium/drivers/radeon/r600_pipe_common.h | 15 
 src/gallium/drivers/radeon/r600_query.c   |  1 +
 src/gallium/drivers/radeon/r600_query.h   | 49 +++
 4 files changed, 51 insertions(+), 15 deletions(-)
 create mode 100644 src/gallium/drivers/radeon/r600_query.h

diff --git a/src/gallium/drivers/radeon/Makefile.sources 
b/src/gallium/drivers/radeon/Makefile.sources
index f63790c..d840ff8 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -7,6 +7,7 @@ C_SOURCES := \
r600_pipe_common.c \
r600_pipe_common.h \
r600_query.c \
+   r600_query.h \
r600_streamout.c \
r600_texture.c \
radeon_uvd.c \
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index d2c54f3..419f785 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -47,21 +47,6 @@
 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH   (PIPE_RESOURCE_FLAG_DRV_PRIV << 
1)
 #define R600_RESOURCE_FLAG_FORCE_TILING
(PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 
-#define R600_QUERY_DRAW_CALLS  (PIPE_QUERY_DRIVER_SPECIFIC + 0)
-#define R600_QUERY_REQUESTED_VRAM  (PIPE_QUERY_DRIVER_SPECIFIC + 1)
-#define R600_QUERY_REQUESTED_GTT   (PIPE_QUERY_DRIVER_SPECIFIC + 2)
-#define R600_QUERY_BUFFER_WAIT_TIME(PIPE_QUERY_DRIVER_SPECIFIC + 3)
-#define R600_QUERY_NUM_CS_FLUSHES  (PIPE_QUERY_DRIVER_SPECIFIC + 4)
-#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
-#define R600_QUERY_VRAM_USAGE  (PIPE_QUERY_DRIVER_SPECIFIC + 6)
-#define R600_QUERY_GTT_USAGE   (PIPE_QUERY_DRIVER_SPECIFIC + 7)
-#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8)
-#define R600_QUERY_CURRENT_GPU_SCLK(PIPE_QUERY_DRIVER_SPECIFIC + 9)
-#define R600_QUERY_CURRENT_GPU_MCLK(PIPE_QUERY_DRIVER_SPECIFIC + 10)
-#define R600_QUERY_GPU_LOAD(PIPE_QUERY_DRIVER_SPECIFIC + 11)
-#define R600_QUERY_NUM_COMPILATIONS(PIPE_QUERY_DRIVER_SPECIFIC + 12)
-#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13)
-
 #define R600_CONTEXT_STREAMOUT_FLUSH   (1u << 0)
 #define R600_CONTEXT_PRIVATE_FLAG  (1u << 1)
 
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 60381b2..b79d2d0 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -22,6 +22,7 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include "r600_query.h"
 #include "r600_cs.h"
 #include "util/u_memory.h"
 
diff --git a/src/gallium/drivers/radeon/r600_query.h 
b/src/gallium/drivers/radeon/r600_query.h
new file mode 100644
index 000..fc8b47b
--- /dev/null
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *  Nicolai Hähnle <nicolai.haeh...@amd.com>
+ *
+ */
+
+#ifndef R600_QUERY_H
+#define R600_QUERY_H
+
+#include "pipe/p_defines.h"
+
+#define R600_QUERY_DRAW_CALLS  (PIPE_QUERY_DRIVER_SPECIFIC + 0)
+#define R600_QUERY_REQUESTED_VRAM  (PIPE_QUERY_DRIVER_SPECIFIC + 1)
+#define R600_QUERY_REQUESTED_GTT   (PIPE_QUERY_DRIVER_SPECIFIC + 2)
+#define R600_QUERY_BUFFER_WAIT_TIME(PIPE_QUERY_DRIVER_SPECIFIC + 3)
+#define R600_QUERY_NUM_CS_FLUSHES  (PIPE_QUERY_DRIVER_SPECIFIC + 4)
+#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
+#define R60

[Mesa-dev] [PATCH 8/9] gallium/hud: add support for batch queries

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/auxiliary/hud/hud_context.c  |  24 ++-
 src/gallium/auxiliary/hud/hud_driver_query.c | 248 +++
 src/gallium/auxiliary/hud/hud_private.h  |  13 +-
 3 files changed, 240 insertions(+), 45 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index ffe30b8..bcef701 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -57,6 +57,7 @@ struct hud_context {
struct cso_context *cso;
struct u_upload_mgr *uploader;
 
+   struct hud_batch_query_context *batch_query;
struct list_head pane_list;
 
/* states */
@@ -510,6 +511,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
hud_alloc_vertices(hud, >text, 4 * 512, 4 * sizeof(float));
 
/* prepare all graphs */
+   hud_batch_query_update(hud->batch_query);
+
LIST_FOR_EACH_ENTRY(pane, >pane_list, head) {
   LIST_FOR_EACH_ENTRY(gr, >graph_list, head) {
  gr->query_new_value(gr);
@@ -903,17 +906,21 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
   }
   else if (strcmp(name, "samples-passed") == 0 &&
has_occlusion_query(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "samples-passed",
+ hud_pipe_query_install(>batch_query, pane, hud->pipe,
+"samples-passed",
 PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else if (strcmp(name, "primitives-generated") == 0 &&
has_streamout(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
+ hud_pipe_query_install(>batch_query, pane, hud->pipe,
+"primitives-generated",
 PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else {
  boolean processed = FALSE;
@@ -938,17 +945,19 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
if (strcmp(name, pipeline_statistics_names[i]) == 0)
   break;
 if (i < Elements(pipeline_statistics_names)) {
-   hud_pipe_query_install(pane, hud->pipe, name,
+   hud_pipe_query_install(>batch_query, pane, hud->pipe, name,
   PIPE_QUERY_PIPELINE_STATISTICS, i,
   0, PIPE_DRIVER_QUERY_TYPE_UINT64,
-  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+  0);
processed = TRUE;
 }
  }
 
  /* driver queries */
  if (!processed) {
-if (!hud_driver_query_install(pane, hud->pipe, name)){
+if (!hud_driver_query_install(>batch_query, pane, hud->pipe,
+  name)) {
fprintf(stderr, "gallium_hud: unknown driver query '%s'\n", 
name);
 }
  }
@@ -1287,6 +1296,7 @@ hud_destroy(struct hud_context *hud)
   FREE(pane);
}
 
+   hud_batch_query_cleanup(>batch_query);
pipe->delete_fs_state(pipe, hud->fs_color);
pipe->delete_fs_state(pipe, hud->fs_text);
pipe->delete_vs_state(pipe, hud->vs);
diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index 3198ab3..abc9f54 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -34,13 +34,149 @@
 #include "hud/hud_private.h"
 #include "pipe/p_screen.h"
 #include "os/os_time.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include 
 
+// Must be a power of two
 #define NUM_QUERIES 8
 
+struct hud_batch_query_context {
+   struct pipe_context *pipe;
+   unsigned num_query_types;
+   unsigned allocated_query_types;
+   unsigned *query_types;
+
+   boolean failed;
+   struct pipe_query *query[NUM_QUERIES];
+   union pipe_query_result *result[NUM_QUERIES];
+   unsigned head, pending, results;
+};
+
+void
+hud_batch_query_update(struct hud_batch_query_context *bq)
+{
+   struct pipe_context *pipe;
+
+   if (!bq || bq->failed)
+  return;
+
+   pipe = bq->pipe;
+
+   if (bq->query[bq->head])
+  pipe->end_query(pipe, bq->query[bq->head]);
+
+   bq->results = 0;
+
+   while (bq->pending) {
+  unsigned idx = (bq->head - bq->pending + 1) % NUM_QUERIES;
+  

[Mesa-dev] [PATCH 6/9] st/mesa: maintain active perfmon counters in an array

2015-11-13 Thread Nicolai Hähnle
It is easy enough to pre-determine the required size, and arrays are
generally better behaved especially when they get large.
---
 src/mesa/state_tracker/st_cb_perfmon.c | 78 --
 src/mesa/state_tracker/st_cb_perfmon.h | 18 
 2 files changed, 55 insertions(+), 41 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index ec12eb2..6c71a13 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,15 +42,14 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned num_active_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
 
-   /* Create a query for each active counter. */
+   /* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
-  const struct st_perf_monitor_group *stg = >perfmon[gid];
-  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,19 +60,29 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
+  num_active_counters += m->ActiveGroups[gid];
+   }
+
+   stm->active_counters = CALLOC(num_active_counters,
+ sizeof(*stm->active_counters));
+   if (!stm->active_counters)
+  return false;
+
+   /* Create a query for each active counter. */
+   for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
+  const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;
+
   BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = >counters[cid];
- struct st_perf_counter_object *cntr;
-
- cntr = CALLOC_STRUCT(st_perf_counter_object);
- if (!cntr)
-return false;
+ struct st_perf_counter_object *cntr =
+>active_counters[stm->num_active_counters];
 
  cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
-
- list_addtail(>list, >active_counters);
+ ++stm->num_active_counters;
   }
}
return true;
@@ -83,24 +92,24 @@ static void
 reset_perf_monitor(struct st_perf_monitor_object *stm,
struct pipe_context *pipe)
 {
-   struct st_perf_counter_object *cntr, *tmp;
+   unsigned i;
 
-   LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, >active_counters, list) {
-  if (cntr->query)
- pipe->destroy_query(pipe, cntr->query);
-  list_del(>list);
-  free(cntr);
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (query)
+ pipe->destroy_query(pipe, query);
}
+   FREE(stm->active_counters);
+   stm->active_counters = NULL;
+   stm->num_active_counters = 0;
 }
 
 static struct gl_perf_monitor_object *
 st_NewPerfMonitor(struct gl_context *ctx)
 {
struct st_perf_monitor_object *stq = 
ST_CALLOC_STRUCT(st_perf_monitor_object);
-   if (stq) {
-  list_inithead(>active_counters);
+   if (stq)
   return >base;
-   }
return NULL;
 }
 
@@ -119,9 +128,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
-   if (LIST_IS_EMPTY(>active_counters)) {
+   if (!stm->num_active_counters) {
   /* Create a query for each active counter before starting
* a new monitoring session. */
   if (!init_perf_monitor(ctx, m))
@@ -129,8 +138,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
}
 
/* Start the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, >active_counters, list) {
-  if (!pipe->begin_query(pipe, cntr->query))
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (!pipe->begin_query(pipe, query))
   goto fail;
}
return true;
@@ -146,11 +156,13 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
/* Stop the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, >active_counters, list)
-  

[Mesa-dev] [PATCH 3/9] st/mesa: map semantic driver query types to underlying type

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/include/pipe/p_defines.h   | 2 ++
 src/mesa/state_tracker/st_cb_perfmon.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7f241c8..7ed9f6d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -791,6 +791,8 @@ union pipe_query_result
/* PIPE_QUERY_PRIMITIVES_GENERATED */
/* PIPE_QUERY_PRIMITIVES_EMITTED */
/* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+   /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */
/* PIPE_DRIVER_QUERY_TYPE_HZ */
uint64_t u64;
 
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 4ec6d86..dedb8f5 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -334,6 +334,9 @@ st_init_perfmon(struct st_context *st)
  c->Name = info.name;
  switch (info.type) {
 case PIPE_DRIVER_QUERY_TYPE_UINT64:
+case PIPE_DRIVER_QUERY_TYPE_BYTES:
+case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+case PIPE_DRIVER_QUERY_TYPE_HZ:
c->Minimum.u64 = 0;
c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1;
c->Type = GL_UNSIGNED_INT64_AMD;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/9] st/mesa: use BITSET_FOREACH_SET to loop through active perfmon counters

2015-11-13 Thread Nicolai Hähnle
---
 src/mesa/state_tracker/st_cb_perfmon.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 80ff170..ec12eb2 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -50,6 +50,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
   const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -60,14 +61,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
-  for (cid = 0; cid < g->NumCounters; cid++) {
- const struct gl_perf_monitor_counter *c = >Counters[cid];
+  BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = >counters[cid];
  struct st_perf_counter_object *cntr;
 
- if (!BITSET_TEST(m->ActiveCounters[gid], cid))
-continue;
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/9] gallium: batch query objects and related cleanups

2015-11-13 Thread Nicolai Hähnle
Hi,

the main point of this patch series is to introduce batch query objects.

For AMD_performance_monitor, hardware may not be able to start and stop
performance counters independently of each other. The current query interface
does not fit such hardware well.

With this series, drivers can mark driver-specific queries with the
PIPE_DRIVER_QUERY_FLAG_BATCH flag, which indicates that those queries require
the use of batch query objects. Batch query objects are created with an
immutable list of queries, which requires a new entry point in pipe_context,
but apart from that they use the same begin_query/end_query/etc. entry points.

The radeon-specific part that actually makes use of this feature is not quite
ready yet, but I already wanted to get this part out there for feedback.
Please review!

Thanks,
Nicolai
---
 gallium/auxiliary/hud/hud_context.c   |   24 ++
 gallium/auxiliary/hud/hud_driver_query.c  |  249 +-
 gallium/auxiliary/hud/hud_private.h   |   13 +
 gallium/drivers/nouveau/nvc0/nvc0_query.c |4 
 gallium/include/pipe/p_context.h  |3 
 gallium/include/pipe/p_defines.h  |   36 ++--
 mesa/state_tracker/st_cb_perfmon.c|  247 -
 mesa/state_tracker/st_cb_perfmon.h|   32 +++
 mesa/state_tracker/st_context.h   |3 
 9 files changed, 437 insertions(+), 174 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/9] st/mesa: store mapping from perfmon counter to query type

2015-11-13 Thread Nicolai Hähnle
Previously, when a performance monitor was initialized, an inner loop through
all driver queries with string comparisons for each enabled performance
monitor counter was used. This hurts when a driver exposes lots of queries.
---
 src/mesa/state_tracker/st_cb_perfmon.c | 74 +++---
 src/mesa/state_tracker/st_cb_perfmon.h | 14 +++
 src/mesa/state_tracker/st_context.h|  3 ++
 3 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index dedb8f5..80ff170 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -36,48 +36,20 @@
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 
-/**
- * Return a PIPE_QUERY_x type >= PIPE_QUERY_DRIVER_SPECIFIC, or -1 if
- * the driver-specific query doesn't exist.
- */
-static int
-find_query_type(struct pipe_screen *screen, const char *name)
-{
-   int num_queries;
-   int type = -1;
-   int i;
-
-   num_queries = screen->get_driver_query_info(screen, 0, NULL);
-   if (!num_queries)
-  return type;
-
-   for (i = 0; i < num_queries; i++) {
-  struct pipe_driver_query_info info;
-
-  if (!screen->get_driver_query_info(screen, i, ))
- continue;
-
-  if (!strncmp(info.name, name, strlen(name))) {
- type = info.query_type;
- break;
-  }
-   }
-   return type;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
+   struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
-   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
-   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct pipe_context *pipe = st->pipe;
int gid, cid;
 
-   st_flush_bitmap_cache(st_context(ctx));
+   st_flush_bitmap_cache(st);
 
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -90,20 +62,17 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 
   for (cid = 0; cid < g->NumCounters; cid++) {
  const struct gl_perf_monitor_counter *c = >Counters[cid];
+ const struct st_perf_monitor_counter *stc = >counters[cid];
  struct st_perf_counter_object *cntr;
- int query_type;
 
  if (!BITSET_TEST(m->ActiveCounters[gid], cid))
 continue;
 
- query_type = find_query_type(screen, c->Name);
- assert(query_type != -1);
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
 
- cntr->query= pipe->create_query(pipe, query_type, 0);
+ cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
 
@@ -286,6 +255,7 @@ st_init_perfmon(struct st_context *st)
struct gl_perf_monitor_state *perfmon = >ctx->PerfMonitor;
struct pipe_screen *screen = st->pipe->screen;
struct gl_perf_monitor_group *groups = NULL;
+   struct st_perf_monitor_group *stgroups = NULL;
int num_counters, num_groups;
int gid, cid;
 
@@ -304,26 +274,36 @@ st_init_perfmon(struct st_context *st)
if (!groups)
   return false;
 
+   stgroups = CALLOC(num_groups, sizeof(*stgroups));
+   if (!stgroups)
+  goto fail_only_groups;
+
for (gid = 0; gid < num_groups; gid++) {
   struct gl_perf_monitor_group *g = [perfmon->NumGroups];
   struct pipe_driver_query_group_info group_info;
   struct gl_perf_monitor_counter *counters = NULL;
+  struct st_perf_monitor_counter *stcounters = NULL;
 
   if (!screen->get_driver_query_group_info(screen, gid, _info))
  continue;
 
   g->Name = group_info.name;
   g->MaxActiveCounters = group_info.max_active_queries;
-  g->NumCounters = 0;
-  g->Counters = NULL;
 
   if (group_info.num_queries)
  counters = CALLOC(group_info.num_queries, sizeof(*counters));
   if (!counters)
  goto fail;
+  g->Counters = counters;
+
+  stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters));
+  if (!stcounters)
+ goto fail;
+  stgroups[perfmon->NumGroups].counters = stcounters;
 
   for (cid = 0; cid < num_counters; cid++) {
  struct gl_perf_monitor_counter *c = [g->NumCounters];
+ struct st_perf_monitor_counter *stc = [g->NumCounters];
  struct pipe_driver_query_info info;
 
  if (!screen->get_driver_query_info(screen, cid, ))
@@ -359,18 +339,25 @@ st_init_perfmon(struct st_context *st)
 default:
unreachable("Invalid driver query type!");
  }
+
+ 

[Mesa-dev] [PATCH 7/9] gallium: add the concept of batch queries

2015-11-13 Thread Nicolai Hähnle
Some drivers (in particular radeon[si], but also freedreno judging from
a quick grep) may want to expose performance counters that cannot be
individually enabled or disabled.

Allow such drivers to mark driver-specific queries as requiring a new
type of batch query object that is used to start and stop a list of queries
simultaneously.
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  1 +
 src/gallium/include/pipe/p_context.h  |  3 +++
 src/gallium/include/pipe/p_defines.h  | 27 +--
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index a1d6162..0608337 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->group_id = -1;
+   info->flags = 0;
 
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
if (id < num_sw_queries)
diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index 27f358f..f122c74 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -115,6 +115,9 @@ struct pipe_context {
struct pipe_query *(*create_query)( struct pipe_context *pipe,
unsigned query_type,
unsigned index );
+   struct pipe_query *(*create_batch_query)( struct pipe_context *pipe,
+ unsigned num_queries,
+ unsigned *query_types );
 
void (*destroy_query)(struct pipe_context *pipe,
  struct pipe_query *q);
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7ed9f6d..b3c8b9f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -776,6 +776,16 @@ struct pipe_query_data_pipeline_statistics
 };
 
 /**
+ * For batch queries.
+ */
+union pipe_numeric_type_union
+{
+   uint64_t u64;
+   uint32_t u32;
+   float f;
+};
+
+/**
  * Query result (returned by pipe_context::get_query_result).
  */
 union pipe_query_result
@@ -811,6 +821,9 @@ union pipe_query_result
 
/* PIPE_QUERY_PIPELINE_STATISTICS */
struct pipe_query_data_pipeline_statistics pipeline_statistics;
+
+   /* batch queries */
+   union pipe_numeric_type_union batch[0];
 };
 
 union pipe_color_union
@@ -840,12 +853,13 @@ enum pipe_driver_query_result_type
PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1,
 };
 
-union pipe_numeric_type_union
-{
-   uint64_t u64;
-   uint32_t u32;
-   float f;
-};
+/**
+ * Some hardware requires some hardware-specific queries to be submitted
+ * as batched queries. The corresponding query objects are created using
+ * create_batch_query, and at most one such query may be active at
+ * any time.
+ */
+#define PIPE_DRIVER_QUERY_FLAG_BATCH (1 << 0)
 
 struct pipe_driver_query_info
 {
@@ -855,6 +869,7 @@ struct pipe_driver_query_info
enum pipe_driver_query_type type;
enum pipe_driver_query_result_type result_type;
unsigned group_id;
+   unsigned flags;
 };
 
 struct pipe_driver_query_group_info
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/9] st/mesa: store mapping from perfmon counter to query type

2015-11-13 Thread Nicolai Hähnle
Previously, when a performance monitor was initialized, an inner loop through
all driver queries with string comparisons for each enabled performance
monitor counter was used. This hurts when a driver exposes lots of queries.

Reviewed-by: Samuel Pitoiset 
---
 src/mesa/state_tracker/st_cb_perfmon.c | 74 +++---
 src/mesa/state_tracker/st_cb_perfmon.h | 14 +++
 src/mesa/state_tracker/st_context.h|  3 ++
 3 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index dedb8f5..80ff170 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -36,48 +36,20 @@
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 
-/**
- * Return a PIPE_QUERY_x type >= PIPE_QUERY_DRIVER_SPECIFIC, or -1 if
- * the driver-specific query doesn't exist.
- */
-static int
-find_query_type(struct pipe_screen *screen, const char *name)
-{
-   int num_queries;
-   int type = -1;
-   int i;
-
-   num_queries = screen->get_driver_query_info(screen, 0, NULL);
-   if (!num_queries)
-  return type;
-
-   for (i = 0; i < num_queries; i++) {
-  struct pipe_driver_query_info info;
-
-  if (!screen->get_driver_query_info(screen, i, ))
- continue;
-
-  if (!strncmp(info.name, name, strlen(name))) {
- type = info.query_type;
- break;
-  }
-   }
-   return type;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
+   struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
-   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
-   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct pipe_context *pipe = st->pipe;
int gid, cid;
 
-   st_flush_bitmap_cache(st_context(ctx));
+   st_flush_bitmap_cache(st);
 
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -90,20 +62,17 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 
   for (cid = 0; cid < g->NumCounters; cid++) {
  const struct gl_perf_monitor_counter *c = >Counters[cid];
+ const struct st_perf_monitor_counter *stc = >counters[cid];
  struct st_perf_counter_object *cntr;
- int query_type;
 
  if (!BITSET_TEST(m->ActiveCounters[gid], cid))
 continue;
 
- query_type = find_query_type(screen, c->Name);
- assert(query_type != -1);
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
 
- cntr->query= pipe->create_query(pipe, query_type, 0);
+ cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
 
@@ -286,6 +255,7 @@ st_init_perfmon(struct st_context *st)
struct gl_perf_monitor_state *perfmon = >ctx->PerfMonitor;
struct pipe_screen *screen = st->pipe->screen;
struct gl_perf_monitor_group *groups = NULL;
+   struct st_perf_monitor_group *stgroups = NULL;
int num_counters, num_groups;
int gid, cid;
 
@@ -304,26 +274,36 @@ st_init_perfmon(struct st_context *st)
if (!groups)
   return false;
 
+   stgroups = CALLOC(num_groups, sizeof(*stgroups));
+   if (!stgroups)
+  goto fail_only_groups;
+
for (gid = 0; gid < num_groups; gid++) {
   struct gl_perf_monitor_group *g = [perfmon->NumGroups];
   struct pipe_driver_query_group_info group_info;
   struct gl_perf_monitor_counter *counters = NULL;
+  struct st_perf_monitor_counter *stcounters = NULL;
 
   if (!screen->get_driver_query_group_info(screen, gid, _info))
  continue;
 
   g->Name = group_info.name;
   g->MaxActiveCounters = group_info.max_active_queries;
-  g->NumCounters = 0;
-  g->Counters = NULL;
 
   if (group_info.num_queries)
  counters = CALLOC(group_info.num_queries, sizeof(*counters));
   if (!counters)
  goto fail;
+  g->Counters = counters;
+
+  stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters));
+  if (!stcounters)
+ goto fail;
+  stgroups[perfmon->NumGroups].counters = stcounters;
 
   for (cid = 0; cid < num_counters; cid++) {
  struct gl_perf_monitor_counter *c = [g->NumCounters];
+ struct st_perf_monitor_counter *stc = [g->NumCounters];
  struct pipe_driver_query_info info;
 
  if (!screen->get_driver_query_info(screen, cid, ))
@@ -359,18 +339,25 @@ st_init_perfmon(struct st_context *st)
 default:

[Mesa-dev] [PATCH v2 7/9] gallium: add the concept of batch queries

2015-11-13 Thread Nicolai Hähnle
Some drivers (in particular radeon[si], but also freedreno judging from
a quick grep) may want to expose performance counters that cannot be
individually enabled or disabled.

Allow such drivers to mark driver-specific queries as requiring a new
type of batch query object that is used to start and stop a list of queries
simultaneously.

v2: documentation for create_batch_query
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  1 +
 src/gallium/include/pipe/p_context.h  | 19 +++
 src/gallium/include/pipe/p_defines.h  | 27 +--
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index a1d6162..0608337 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->group_id = -1;
+   info->flags = 0;
 
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
if (id < num_sw_queries)
diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index 27f358f..be7447d 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -116,6 +116,25 @@ struct pipe_context {
unsigned query_type,
unsigned index );
 
+   /**
+* Create a query object that queries all given query types simultaneously.
+*
+* This can only be used for those query types for which
+* get_driver_query_info indicates that it must be used. Only one batch
+* query object may be active at a time.
+*
+* There may be additional constraints on which query types can be used
+* together, in particular those that are implied by
+* get_driver_query_group_info.
+*
+* \param num_queries the number of query types
+* \param query_types array of \p num_queries query types
+* \return a query object, or NULL on error.
+*/
+   struct pipe_query *(*create_batch_query)( struct pipe_context *pipe,
+ unsigned num_queries,
+ unsigned *query_types );
+
void (*destroy_query)(struct pipe_context *pipe,
  struct pipe_query *q);
 
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7ed9f6d..b3c8b9f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -776,6 +776,16 @@ struct pipe_query_data_pipeline_statistics
 };
 
 /**
+ * For batch queries.
+ */
+union pipe_numeric_type_union
+{
+   uint64_t u64;
+   uint32_t u32;
+   float f;
+};
+
+/**
  * Query result (returned by pipe_context::get_query_result).
  */
 union pipe_query_result
@@ -811,6 +821,9 @@ union pipe_query_result
 
/* PIPE_QUERY_PIPELINE_STATISTICS */
struct pipe_query_data_pipeline_statistics pipeline_statistics;
+
+   /* batch queries */
+   union pipe_numeric_type_union batch[0];
 };
 
 union pipe_color_union
@@ -840,12 +853,13 @@ enum pipe_driver_query_result_type
PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1,
 };
 
-union pipe_numeric_type_union
-{
-   uint64_t u64;
-   uint32_t u32;
-   float f;
-};
+/**
+ * Some hardware requires some hardware-specific queries to be submitted
+ * as batched queries. The corresponding query objects are created using
+ * create_batch_query, and at most one such query may be active at
+ * any time.
+ */
+#define PIPE_DRIVER_QUERY_FLAG_BATCH (1 << 0)
 
 struct pipe_driver_query_info
 {
@@ -855,6 +869,7 @@ struct pipe_driver_query_info
enum pipe_driver_query_type type;
enum pipe_driver_query_result_type result_type;
unsigned group_id;
+   unsigned flags;
 };
 
 struct pipe_driver_query_group_info
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/9] gallium: batch query objects and related cleanups

2015-11-13 Thread Nicolai Hähnle
Hi,

I have updated patches 6 - 9. Samuel, thank you for your input and I hope
you find your points to be resolved satisfactorily ;)

Cheers,
Nicolai
---
nha@deadlights:~/amd/mesa$ git diff master | diffstat
 gallium/auxiliary/hud/hud_context.c   |   24 +-
 gallium/auxiliary/hud/hud_driver_query.c  |  266 +-
 gallium/auxiliary/hud/hud_private.h   |   13 +
 gallium/drivers/nouveau/nvc0/nvc0_query.c |4 
 gallium/include/pipe/p_context.h  |   19 ++
 gallium/include/pipe/p_defines.h  |   36 ++--
 mesa/state_tracker/st_cb_perfmon.c|  253 
 mesa/state_tracker/st_cb_perfmon.h|   32 ++-
 mesa/state_tracker/st_context.h   |3 
 9 files changed, 475 insertions(+), 175 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 5/9] st/mesa: use BITSET_FOREACH_SET to loop through active perfmon counters

2015-11-13 Thread Nicolai Hähnle
Reviewed-by: Samuel Pitoiset 
---
 src/mesa/state_tracker/st_cb_perfmon.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 80ff170..ec12eb2 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -50,6 +50,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
   const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -60,14 +61,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
-  for (cid = 0; cid < g->NumCounters; cid++) {
- const struct gl_perf_monitor_counter *c = >Counters[cid];
+  BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = >counters[cid];
  struct st_perf_counter_object *cntr;
 
- if (!BITSET_TEST(m->ActiveCounters[gid], cid))
-continue;
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/9] gallium: batch query objects and related cleanups

2015-11-13 Thread Nicolai Hähnle

Hi Samuel,

thanks for taking a look!

On 13.11.2015 18:35, Samuel Pitoiset wrote:

Did you run amd_performance_monitor piglit tests to make sure all of
your changes didn't break anything?


Yes, everything passes here.



Did you test on nvc0 driver which is the only driver that currently
exposes GL_AMD_performance_monitor? In case you didn't, I'll test it
myself in the next few days. You might not have the hardware. :-)


Sorry, I don't have the hardware.

Thanks,
Nicolai



Thanks.

On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

Hi,

the main point of this patch series is to introduce batch query objects.

For AMD_performance_monitor, hardware may not be able to start and stop
performance counters independently of each other. The current query
interface
does not fit such hardware well.

With this series, drivers can mark driver-specific queries with the
PIPE_DRIVER_QUERY_FLAG_BATCH flag, which indicates that those queries
require
the use of batch query objects. Batch query objects are created with an
immutable list of queries, which requires a new entry point in
pipe_context,
but apart from that they use the same begin_query/end_query/etc. entry
points.

The radeon-specific part that actually makes use of this feature is
not quite
ready yet, but I already wanted to get this part out there for feedback.
Please review!

Thanks,
Nicolai
---
  gallium/auxiliary/hud/hud_context.c   |   24 ++
  gallium/auxiliary/hud/hud_driver_query.c  |  249
+-
  gallium/auxiliary/hud/hud_private.h   |   13 +
  gallium/drivers/nouveau/nvc0/nvc0_query.c |4
  gallium/include/pipe/p_context.h  |3
  gallium/include/pipe/p_defines.h  |   36 ++--
  mesa/state_tracker/st_cb_perfmon.c|  247
-
  mesa/state_tracker/st_cb_perfmon.h|   32 +++
  mesa/state_tracker/st_context.h   |3
  9 files changed, 437 insertions(+), 174 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Nicolai Hähnle

On 13.11.2015 19:27, Ilia Mirkin wrote:

On Fri, Nov 13, 2015 at 1:23 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

So really, this is a question for everybody who cares about nouveau, because
nouveau is the only driver that (if a #define is enabled) advertises a CPU
driver_query_group.

Do you want that group to be accessible via AMD_performance_monitor? Then be
happy with this patch. Do you not want that group to be so accessible? Then
just remove it, because it serves no purpose either way.


There's also the HUD, and Samuel's WIP NVIDIA PerfKit-style library impl.


The HUD doesn't care about groups. If Samuel really cares about this for 
his library (which I haven't seen - where is it?), I can drop this patch.


Cheers,
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/9] st/mesa: map semantic driver query types to underlying type

2015-11-13 Thread Nicolai Hähnle
Reviewed-by: Samuel Pitoiset 
---
 src/gallium/include/pipe/p_defines.h   | 2 ++
 src/mesa/state_tracker/st_cb_perfmon.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7f241c8..7ed9f6d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -791,6 +791,8 @@ union pipe_query_result
/* PIPE_QUERY_PRIMITIVES_GENERATED */
/* PIPE_QUERY_PRIMITIVES_EMITTED */
/* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+   /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */
/* PIPE_DRIVER_QUERY_TYPE_HZ */
uint64_t u64;
 
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 4ec6d86..dedb8f5 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -334,6 +334,9 @@ st_init_perfmon(struct st_context *st)
  c->Name = info.name;
  switch (info.type) {
 case PIPE_DRIVER_QUERY_TYPE_UINT64:
+case PIPE_DRIVER_QUERY_TYPE_BYTES:
+case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+case PIPE_DRIVER_QUERY_TYPE_HZ:
c->Minimum.u64 = 0;
c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1;
c->Type = GL_UNSIGNED_INT64_AMD;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Nicolai Hähnle
This was only used to implement an unnecessarily restrictive interpretation
of the spec of AMD_performance_monitor. The spec says

  A performance monitor consists of a number of hardware and software
  counters that can be sampled by the GPU and reported back to the
  application.

I guess one could take this as a requirement that counters _must_ be sampled
by the GPU, but then why are they called _software_ counters? Besides,
there's not much reason _not_ to expose all counters that are available,
and this simplifies the code.
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  3 ---
 src/gallium/include/pipe/p_defines.h  |  7 ---
 src/mesa/state_tracker/st_cb_perfmon.c| 30 ---
 3 files changed, 40 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f539210..a1d6162 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -200,7 +200,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
if (id == NVC0_HW_SM_QUERY_GROUP) {
   if (screen->compute) {
  info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 
  /* Because we can't expose the number of hardware counters needed for
   * each different query, we don't want to allow more than one active
@@ -224,7 +223,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
   if (screen->compute) {
  if (screen->base.class_3d < NVE4_3D_CLASS) {
 info->name = "Performance metrics";
-info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 info->max_active_queries = 1;
 info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
 return 1;
@@ -234,7 +232,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
   info->name = "Driver statistics";
-  info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
   info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   return 1;
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7240154..7f241c8 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -829,12 +829,6 @@ enum pipe_driver_query_type
PIPE_DRIVER_QUERY_TYPE_HZ   = 6,
 };
 
-enum pipe_driver_query_group_type
-{
-   PIPE_DRIVER_QUERY_GROUP_TYPE_CPU = 0,
-   PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1,
-};
-
 /* Whether an average value per frame or a cumulative value should be
  * displayed.
  */
@@ -864,7 +858,6 @@ struct pipe_driver_query_info
 struct pipe_driver_query_group_info
 {
const char *name;
-   enum pipe_driver_query_group_type type;
unsigned max_active_queries;
unsigned num_queries;
 };
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 1bb5be3..4ec6d86 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -65,27 +65,6 @@ find_query_type(struct pipe_screen *screen, const char *name)
return type;
 }
 
-/**
- * Return TRUE if the underlying driver expose GPU counters.
- */
-static bool
-has_gpu_counters(struct pipe_screen *screen)
-{
-   int num_groups, gid;
-
-   num_groups = screen->get_driver_query_group_info(screen, 0, NULL);
-   for (gid = 0; gid < num_groups; gid++) {
-  struct pipe_driver_query_group_info group_info;
-
-  if (!screen->get_driver_query_group_info(screen, gid, _info))
- continue;
-
-  if (group_info.type == PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- return true;
-   }
-   return false;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
@@ -313,12 +292,6 @@ st_init_perfmon(struct st_context *st)
if (!screen->get_driver_query_info || !screen->get_driver_query_group_info)
   return false;
 
-   if (!has_gpu_counters(screen)) {
-  /* According to the spec, GL_AMD_performance_monitor must only
-   * expose GPU counters. */
-  return false;
-   }
-
/* Get the number of available queries. */
num_counters = screen->get_driver_query_info(screen, 0, NULL);
if (!num_counters)
@@ -339,9 +312,6 @@ st_init_perfmon(struct st_context *st)
   if (!screen->get_driver_query_group_info(screen, gid, _info))
  continue;
 
-  if (group_info.type != PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- continue;
-
   g->Name = group_info.name;
   g->MaxActiveCounters = group_info.max_active_queries;
   g->NumCounters = 0;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 9/9] st/mesa: add support for batch driver queries to perfmon

2015-11-13 Thread Nicolai Hähnle
v2: forgot a null-pointer check (spotted by Samuel Pitoiset)
---
 src/mesa/state_tracker/st_cb_perfmon.c | 78 +++---
 src/mesa/state_tracker/st_cb_perfmon.h |  6 +++
 2 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 8628e23..39c3902 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,7 +42,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned *batch = NULL;
unsigned num_active_counters = 0;
+   unsigned max_batch_counters = 0;
+   unsigned num_batch_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
@@ -50,6 +53,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,6 +65,8 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
   }
 
   num_active_counters += m->ActiveGroups[gid];
+  if (stg->has_batch)
+ max_batch_counters += m->ActiveGroups[gid];
}
 
if (!num_active_counters)
@@ -71,6 +77,12 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
if (!stm->active_counters)
   return false;
 
+   if (max_batch_counters) {
+  batch = CALLOC(max_batch_counters, sizeof(*batch));
+  if (!batch)
+ return false;
+   }
+
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
@@ -82,13 +94,35 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  struct st_perf_counter_object *cntr =
 >active_counters[stm->num_active_counters];
 
- cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
+ if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
+cntr->batch_index = num_batch_counters;
+batch[num_batch_counters++] = stc->query_type;
+ } else {
+cntr->query = pipe->create_query(pipe, stc->query_type, 0);
+if (!cntr->query)
+   goto fail;
+ }
  ++stm->num_active_counters;
   }
}
+
+   /* Create the batch query. */
+   if (num_batch_counters) {
+  stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters,
+  batch);
+  stm->batch_result = CALLOC(num_batch_counters, 
sizeof(stm->batch_result->batch[0]));
+  if (!stm->batch_query || !stm->batch_result)
+ goto fail;
+   }
+
+   FREE(batch);
return true;
+
+fail:
+   FREE(batch);
+   return false;
 }
 
 static void
@@ -105,6 +139,13 @@ reset_perf_monitor(struct st_perf_monitor_object *stm,
FREE(stm->active_counters);
stm->active_counters = NULL;
stm->num_active_counters = 0;
+
+   if (stm->batch_query) {
+  pipe->destroy_query(pipe, stm->batch_query);
+  stm->batch_query = NULL;
+   }
+   FREE(stm->batch_result);
+   stm->batch_result = NULL;
 }
 
 static struct gl_perf_monitor_object *
@@ -143,9 +184,13 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Start the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  if (!pipe->begin_query(pipe, query))
+  if (query && !pipe->begin_query(pipe, query))
   goto fail;
}
+
+   if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query))
+  goto fail;
+
return true;
 
 fail:
@@ -164,8 +209,12 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Stop the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  pipe->end_query(pipe, query);
+  if (query)
+ pipe->end_query(pipe, query);
}
+
+   if (stm->batch_query)
+  pipe->end_query(pipe, stm->batch_query);
 }
 
 static void
@@ -199,11 +248,16 @@ st_IsPerfMonitorResultAvailable(struct gl_context *ctx,
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
   union pipe_query_result result;
-  if (!pipe->get_query_result(pipe, query, FALSE, 

[Mesa-dev] [PATCH v2 8/9] gallium/hud: add support for batch queries

2015-11-13 Thread Nicolai Hähnle
v2: be more defensive about allocations
---
 src/gallium/auxiliary/hud/hud_context.c  |  24 ++-
 src/gallium/auxiliary/hud/hud_driver_query.c | 265 +++
 src/gallium/auxiliary/hud/hud_private.h  |  13 +-
 3 files changed, 256 insertions(+), 46 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index ffe30b8..bcef701 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -57,6 +57,7 @@ struct hud_context {
struct cso_context *cso;
struct u_upload_mgr *uploader;
 
+   struct hud_batch_query_context *batch_query;
struct list_head pane_list;
 
/* states */
@@ -510,6 +511,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
hud_alloc_vertices(hud, >text, 4 * 512, 4 * sizeof(float));
 
/* prepare all graphs */
+   hud_batch_query_update(hud->batch_query);
+
LIST_FOR_EACH_ENTRY(pane, >pane_list, head) {
   LIST_FOR_EACH_ENTRY(gr, >graph_list, head) {
  gr->query_new_value(gr);
@@ -903,17 +906,21 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
   }
   else if (strcmp(name, "samples-passed") == 0 &&
has_occlusion_query(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "samples-passed",
+ hud_pipe_query_install(>batch_query, pane, hud->pipe,
+"samples-passed",
 PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else if (strcmp(name, "primitives-generated") == 0 &&
has_streamout(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
+ hud_pipe_query_install(>batch_query, pane, hud->pipe,
+"primitives-generated",
 PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else {
  boolean processed = FALSE;
@@ -938,17 +945,19 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
if (strcmp(name, pipeline_statistics_names[i]) == 0)
   break;
 if (i < Elements(pipeline_statistics_names)) {
-   hud_pipe_query_install(pane, hud->pipe, name,
+   hud_pipe_query_install(>batch_query, pane, hud->pipe, name,
   PIPE_QUERY_PIPELINE_STATISTICS, i,
   0, PIPE_DRIVER_QUERY_TYPE_UINT64,
-  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+  0);
processed = TRUE;
 }
  }
 
  /* driver queries */
  if (!processed) {
-if (!hud_driver_query_install(pane, hud->pipe, name)){
+if (!hud_driver_query_install(>batch_query, pane, hud->pipe,
+  name)) {
fprintf(stderr, "gallium_hud: unknown driver query '%s'\n", 
name);
 }
  }
@@ -1287,6 +1296,7 @@ hud_destroy(struct hud_context *hud)
   FREE(pane);
}
 
+   hud_batch_query_cleanup(>batch_query);
pipe->delete_fs_state(pipe, hud->fs_color);
pipe->delete_fs_state(pipe, hud->fs_text);
pipe->delete_vs_state(pipe, hud->vs);
diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index 3198ab3..29f70fc 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -34,13 +34,159 @@
 #include "hud/hud_private.h"
 #include "pipe/p_screen.h"
 #include "os/os_time.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include 
 
+// Must be a power of two
 #define NUM_QUERIES 8
 
+struct hud_batch_query_context {
+   struct pipe_context *pipe;
+   unsigned num_query_types;
+   unsigned allocated_query_types;
+   unsigned *query_types;
+
+   boolean failed;
+   struct pipe_query *query[NUM_QUERIES];
+   union pipe_query_result *result[NUM_QUERIES];
+   unsigned head, pending, results;
+};
+
+void
+hud_batch_query_update(struct hud_batch_query_context *bq)
+{
+   struct pipe_context *pipe;
+
+   if (!bq || bq->failed)
+  return;
+
+   pipe = bq->pipe;
+
+   if (bq->query[bq->head])
+  pipe->end_query(pipe, bq->query[bq->head]);
+
+   bq->results = 0;
+
+   while (bq->pending) {
+  unsigned idx = (bq->head - 

[Mesa-dev] [PATCH v2 2/9] gallium/hud: remove unused field in query_info

2015-11-13 Thread Nicolai Hähnle
Reviewed-by: Samuel Pitoiset 
---
 src/gallium/auxiliary/hud/hud_driver_query.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index f14305e..3198ab3 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -48,7 +48,6 @@ struct query_info {
/* Ring of queries. If a query is busy, we use another slot. */
struct pipe_query *query[NUM_QUERIES];
unsigned head, tail;
-   unsigned num_queries;
 
uint64_t last_time;
uint64_t results_cumulative;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 6/9] st/mesa: maintain active perfmon counters in an array

2015-11-13 Thread Nicolai Hähnle
It is easy enough to pre-determine the required size, and arrays are
generally better behaved especially when they get large.

v2: make sure init_perf_monitor returns true when no counters are active
(spotted by Samuel Pitoiset)
---
 src/mesa/state_tracker/st_cb_perfmon.c | 81 --
 src/mesa/state_tracker/st_cb_perfmon.h | 18 
 2 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index ec12eb2..8628e23 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,15 +42,14 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned num_active_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
 
-   /* Create a query for each active counter. */
+   /* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
-  const struct st_perf_monitor_group *stg = >perfmon[gid];
-  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,19 +60,32 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
+  num_active_counters += m->ActiveGroups[gid];
+   }
+
+   if (!num_active_counters)
+  return true;
+
+   stm->active_counters = CALLOC(num_active_counters,
+ sizeof(*stm->active_counters));
+   if (!stm->active_counters)
+  return false;
+
+   /* Create a query for each active counter. */
+   for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
+  const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;
+
   BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = >counters[cid];
- struct st_perf_counter_object *cntr;
-
- cntr = CALLOC_STRUCT(st_perf_counter_object);
- if (!cntr)
-return false;
+ struct st_perf_counter_object *cntr =
+>active_counters[stm->num_active_counters];
 
  cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
-
- list_addtail(>list, >active_counters);
+ ++stm->num_active_counters;
   }
}
return true;
@@ -83,24 +95,24 @@ static void
 reset_perf_monitor(struct st_perf_monitor_object *stm,
struct pipe_context *pipe)
 {
-   struct st_perf_counter_object *cntr, *tmp;
+   unsigned i;
 
-   LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, >active_counters, list) {
-  if (cntr->query)
- pipe->destroy_query(pipe, cntr->query);
-  list_del(>list);
-  free(cntr);
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (query)
+ pipe->destroy_query(pipe, query);
}
+   FREE(stm->active_counters);
+   stm->active_counters = NULL;
+   stm->num_active_counters = 0;
 }
 
 static struct gl_perf_monitor_object *
 st_NewPerfMonitor(struct gl_context *ctx)
 {
struct st_perf_monitor_object *stq = 
ST_CALLOC_STRUCT(st_perf_monitor_object);
-   if (stq) {
-  list_inithead(>active_counters);
+   if (stq)
   return >base;
-   }
return NULL;
 }
 
@@ -119,9 +131,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
-   if (LIST_IS_EMPTY(>active_counters)) {
+   if (!stm->num_active_counters) {
   /* Create a query for each active counter before starting
* a new monitoring session. */
   if (!init_perf_monitor(ctx, m))
@@ -129,8 +141,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
}
 
/* Start the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, >active_counters, list) {
-  if (!pipe->begin_query(pipe, cntr->query))
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (!pipe->begin_query(pipe, query))
   goto fail;
}
return true;
@@ -146,11 +159,13 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object 

Re: [Mesa-dev] [PATCH] r600g: Support TGSI_SEMANTIC_HELPER_INVOCATION

2015-11-13 Thread Nicolai Hähnle

On 13.11.2015 00:14, Glenn Kennard wrote:

Signed-off-by: Glenn Kennard 
---
Maybe there is a better way to check if a thread is a helper invocation?


Is ctx->face_gpr guaranteed to be initialized when 
load_helper_invocation is called?


Aside, I'm not sure I understand correctly what this is supposed to do. 
The values you're querying are related to multi-sampling, but my 
understanding has always been that helper invocations can also happen 
without multi-sampling: you always want to process 2x2 quads of pixels 
at a time to be able to compute derivatives for texture sampling. When 
the boundary of primitive intersects such a quad, you get helper 
invocations outside the primitive.


Cheers,
Nicolai


  src/gallium/drivers/r600/r600_shader.c | 83 +-
  1 file changed, 72 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 560197c..a227d78 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -530,7 +530,8 @@ static int r600_spi_sid(struct r600_shader_io * io)
name == TGSI_SEMANTIC_PSIZE ||
name == TGSI_SEMANTIC_EDGEFLAG ||
name == TGSI_SEMANTIC_FACE ||
-   name == TGSI_SEMANTIC_SAMPLEMASK)
+   name == TGSI_SEMANTIC_SAMPLEMASK ||
+   name == TGSI_SEMANTIC_HELPER_INVOCATION)
index = 0;
else {
if (name == TGSI_SEMANTIC_GENERIC) {
@@ -734,7 +735,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
case TGSI_FILE_SYSTEM_VALUE:
if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
-   d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
+   d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS ||
+   d->Semantic.Name == TGSI_SEMANTIC_HELPER_INVOCATION) {
break; /* Already handled from 
allocate_system_value_inputs */
} else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
if (!ctx->native_integers) {
@@ -776,13 +778,14 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
struct {
boolean enabled;
int *reg;
-   unsigned name, alternate_name;
+   unsigned associated_semantics[3];
} inputs[2] = {
-   { false, >face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* 
lives in Front Face GPR.z */
-
-   { false, >fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, 
TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */
+   { false, >face_gpr, { TGSI_SEMANTIC_SAMPLEMASK /* lives in 
Front Face GPR.z */,
+   TGSI_SEMANTIC_HELPER_INVOCATION, ~0u } },
+   { false, >fixed_pt_position_gpr, { TGSI_SEMANTIC_SAMPLEID  
/* in Fixed Point Position GPR.w */,
+   TGSI_SEMANTIC_SAMPLEPOS, 
TGSI_SEMANTIC_HELPER_INVOCATION } }
};
-   int i, k, num_regs = 0;
+   int i, k, l, num_regs = 0;

if (tgsi_parse_init(, ctx->tokens) != TGSI_PARSE_OK) {
return 0;
@@ -818,9 +821,11 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
struct tgsi_full_declaration *d = 

if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
for (k = 0; k < Elements(inputs); k++) {
-   if (d->Semantic.Name == inputs[k].name 
||
-   d->Semantic.Name == 
inputs[k].alternate_name) {
-   inputs[k].enabled = true;
+   for (l = 0; l < 3; l++) {
+   if (d->Semantic.Name == 
inputs[k].associated_semantics[l]) {
+   inputs[k].enabled = 
true;
+   break;
+   }
}
}
}
@@ -832,7 +837,7 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
for (i = 0; i < Elements(inputs); i++) {
boolean enabled = inputs[i].enabled;
int *reg = inputs[i].reg;
-   unsigned name = inputs[i].name;
+   unsigned name = inputs[i].associated_semantics[0];

if (enabled) {
int gpr = gpr_offset + num_regs++;
@@ -985,6 +990,56 @@ static int load_sample_position(struct r600_shader_ctx 
*ctx, struct r600_shader_
return t1;
  }

+static int load_helper_invocation(struct r600_shader_ctx *ctx,
+   

Re: [Mesa-dev] [PATCH 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Nicolai Hähnle

On 13.11.2015 18:35, Samuel Pitoiset wrote:

On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

This was only used to implement an unnecessarily restrictive
interpretation
of the spec of AMD_performance_monitor. The spec says

   A performance monitor consists of a number of hardware and software
   counters that can be sampled by the GPU and reported back to the
   application.

I guess one could take this as a requirement that counters _must_ be
sampled
by the GPU, but then why are they called _software_ counters? Besides,
there's not much reason _not_ to expose all counters that are available,
and this simplifies the code.


The spec says:

"
While BeginPerfMonitorAMD does mark the beginning of performance counter
collection, the counters do not begin collecting immediately.  Rather,
the counters begin collection when BeginPerfMonitorAMD is processed by
the hardware.  That is, the API is asynchronous, and performance counter
collection does not begin until the graphics hardware processes the
BeginPerfMonitorAMD command.
"


Right. I interpreted this as the authors' attempt to say that the 
counting happens in what other parts of OpenGL traditionally call "the 
server", i.e. the Begin/EndPerfMonitorAMD commands can be used to 
bracket draw calls in the way you'd usually expect, in the same way that 
e.g. changing the DepthFunc only affects rendering once the graphics 
hardware "processes the DepthFunc command".




This is why I introduced the notion of group of GPU counters in Gallium,
because "processed by the hardware", "asynchronous" and "command" seem
like the spec is talking about GPU only.

In which world, software counters are sampled by the GPU? :-)
This spec is definitely not clear about that...

Anyway, I disagree about this patch because :
1) we need to be agreed about what amd_performance_monitor must expose
or not. Maybe it's time to ask the guys who wrote it?


Well, Catalyst exposes only hardware counters in 
AMD_performance_monitor. But that's beside the point.


The real point is that the driver_query_group stuff is *only* used for 
AMD_performance_monitor. So it makes no sense that a driver would ever 
expose a driver_query_group that was not intended to be exposed via that 
extension.


I understand that the group_type was added with good intentions. I might 
have done the same. But in over a year (judging by the commit dates), no 
other use case for driver_query_groups has come up.


So really, this is a question for everybody who cares about nouveau, 
because nouveau is the only driver that (if a #define is enabled) 
advertises a CPU driver_query_group.


Do you want that group to be accessible via AMD_performance_monitor? 
Then be happy with this patch. Do you not want that group to be so 
accessible? Then just remove it, because it serves no purpose either way.




2) this doesn't really simplify code.


The patch only removes LOCs, so I find that a weird argument ;)

Cheers,
Nicolai




---
  src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  3 ---
  src/gallium/include/pipe/p_defines.h  |  7 ---
  src/mesa/state_tracker/st_cb_perfmon.c| 30
---
  3 files changed, 40 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f539210..a1d6162 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -200,7 +200,6 @@ nvc0_screen_get_driver_query_group_info(struct
pipe_screen *pscreen,
 if (id == NVC0_HW_SM_QUERY_GROUP) {
if (screen->compute) {
   info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;

   /* Because we can't expose the number of hardware counters
needed for
* each different query, we don't want to allow more than
one active
@@ -224,7 +223,6 @@ nvc0_screen_get_driver_query_group_info(struct
pipe_screen *pscreen,
if (screen->compute) {
   if (screen->base.class_3d < NVE4_3D_CLASS) {
  info->name = "Performance metrics";
-info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
  info->max_active_queries = 1;
  info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
  return 1;
@@ -234,7 +232,6 @@ nvc0_screen_get_driver_query_group_info(struct
pipe_screen *pscreen,
  #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
 else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
info->name = "Driver statistics";
-  info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
return 1;
diff --git a/src/gallium/include/pipe/p_defines.h
b/src/gallium/include/pipe/p_defines.h
index 7240154..7f241c8 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ 

Re: [Mesa-dev] [PATCH 3/9] st/mesa: map semantic driver query types to underlying type

2015-11-13 Thread Nicolai Hähnle

On 13.11.2015 18:34, Samuel Pitoiset wrote:



On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

---
  src/gallium/include/pipe/p_defines.h   | 2 ++
  src/mesa/state_tracker/st_cb_perfmon.c | 3 +++
  2 files changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_defines.h
b/src/gallium/include/pipe/p_defines.h
index 7f241c8..7ed9f6d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -791,6 +791,8 @@ union pipe_query_result
 /* PIPE_QUERY_PRIMITIVES_GENERATED */
 /* PIPE_QUERY_PRIMITIVES_EMITTED */
 /* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+   /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */


When you are at it, please also add /*
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */ to pipe_query_result.


Sorry, I don't understand. Isn't that what I'm doing here?

Cheers,
Nicolai


With this minor change, this patch is:

Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>



 /* PIPE_DRIVER_QUERY_TYPE_HZ */
 uint64_t u64;

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c
b/src/mesa/state_tracker/st_cb_perfmon.c
index 4ec6d86..dedb8f5 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -334,6 +334,9 @@ st_init_perfmon(struct st_context *st)
   c->Name = info.name;
   switch (info.type) {
  case PIPE_DRIVER_QUERY_TYPE_UINT64:
+case PIPE_DRIVER_QUERY_TYPE_BYTES:
+case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+case PIPE_DRIVER_QUERY_TYPE_HZ:
 c->Minimum.u64 = 0;
 c->Maximum.u64 = info.max_value.u64 ?
info.max_value.u64 : -1;
 c->Type = GL_UNSIGNED_INT64_AMD;





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/mesa: maintain active perfmon counters in an array (v2)

2015-11-13 Thread Nicolai Hähnle
It is easy enough to pre-determine the required size, and arrays are
generally better behaved especially when they get large.

v2: make sure init_perf_monitor returns true when no counters are active
(spotted by Samuel Pitoiset)
---
Thanks Samuel, good catch! I did test with piglit and the tests passed, so
probably CALLOC returned non-null with a zero size, but it's better not to
rely on that.

Cheers,
Nicolai
---
 src/mesa/state_tracker/st_cb_perfmon.c | 81 --
 src/mesa/state_tracker/st_cb_perfmon.h | 18 
 2 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index ec12eb2..8628e23 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,15 +42,14 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned num_active_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
 
-   /* Create a query for each active counter. */
+   /* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
-  const struct st_perf_monitor_group *stg = >perfmon[gid];
-  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,19 +60,32 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
+  num_active_counters += m->ActiveGroups[gid];
+   }
+
+   if (!num_active_counters)
+  return true;
+
+   stm->active_counters = CALLOC(num_active_counters,
+ sizeof(*stm->active_counters));
+   if (!stm->active_counters)
+  return false;
+
+   /* Create a query for each active counter. */
+   for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
+  const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = >perfmon[gid];
+  BITSET_WORD tmp;
+
   BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = >counters[cid];
- struct st_perf_counter_object *cntr;
-
- cntr = CALLOC_STRUCT(st_perf_counter_object);
- if (!cntr)
-return false;
+ struct st_perf_counter_object *cntr =
+>active_counters[stm->num_active_counters];
 
  cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
-
- list_addtail(>list, >active_counters);
+ ++stm->num_active_counters;
   }
}
return true;
@@ -83,24 +95,24 @@ static void
 reset_perf_monitor(struct st_perf_monitor_object *stm,
struct pipe_context *pipe)
 {
-   struct st_perf_counter_object *cntr, *tmp;
+   unsigned i;
 
-   LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, >active_counters, list) {
-  if (cntr->query)
- pipe->destroy_query(pipe, cntr->query);
-  list_del(>list);
-  free(cntr);
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (query)
+ pipe->destroy_query(pipe, query);
}
+   FREE(stm->active_counters);
+   stm->active_counters = NULL;
+   stm->num_active_counters = 0;
 }
 
 static struct gl_perf_monitor_object *
 st_NewPerfMonitor(struct gl_context *ctx)
 {
struct st_perf_monitor_object *stq = 
ST_CALLOC_STRUCT(st_perf_monitor_object);
-   if (stq) {
-  list_inithead(>active_counters);
+   if (stq)
   return >base;
-   }
return NULL;
 }
 
@@ -119,9 +131,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
-   if (LIST_IS_EMPTY(>active_counters)) {
+   if (!stm->num_active_counters) {
   /* Create a query for each active counter before starting
* a new monitoring session. */
   if (!init_perf_monitor(ctx, m))
@@ -129,8 +141,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
}
 
/* Start the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, >active_counters, list) {
-  if (!pipe->begin_query(pipe, cntr->query))
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (!pipe->begin_query(pipe, query))
   goto fail;
}
return true;
@@ -146,11 +159,13 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 

Re: [Mesa-dev] [PATCH] r600g: Support TGSI_SEMANTIC_HELPER_INVOCATION

2015-11-16 Thread Nicolai Hähnle

Hi Glenn,

On 14.11.2015 00:11, Glenn Kennard wrote:

On Fri, 13 Nov 2015 18:57:28 +0100, Nicolai Hähnle <nhaeh...@gmail.com>
wrote:


On 13.11.2015 00:14, Glenn Kennard wrote:

Signed-off-by: Glenn Kennard <glenn.kenn...@gmail.com>
---
Maybe there is a better way to check if a thread is a helper invocation?


Is ctx->face_gpr guaranteed to be initialized when
load_helper_invocation is called?



allocate_system_value_inputs() sets that if needed, and is called before
parsing any opcodes.


Sorry, you're right, I missed the second change to the inputs array there.



Aside, I'm not sure I understand correctly what this is supposed to
do. The values you're querying are related to multi-sampling, but my
understanding has always been that helper invocations can also happen
without multi-sampling: you always want to process 2x2 quads of pixels
at a time to be able to compute derivatives for texture sampling. When
the boundary of primitive intersects such a quad, you get helper
invocations outside the primitive.



Non-MSAA buffers act just like 1 sample buffers with regards to the
coverage mask supplied by the hardware, so helper invocations which have
no coverage get a 0 for the mask value, and normal fragments get 1.
Works with the piglit test case posted at least...


Here's why I'm still skeptical: According to the GLSL spec, the fragment 
shader is only run once per pixel by default, even when MSAA is enabled. 
_However_, if a shader statically accesses the SampleID, _then_ it must 
be run once per fragment. The way I understand it, your change forces 
the fragment shader to access SampleID, even when people ostensibly use 
HelperInvocation in the hope of optimizing something.


In the usual MSAA operation of only running the fragment shader once per 
pixel, HelperInvocation should be the same as SampleMask != 0, right? It 
seems like the right thing to do is to _not_ allocate the 
TGSI_SEMANTIC_SAMPLEID when TGSI_SEMANTIC_HELPER_INVOCATION is used, and 
then use different code paths in load_helper_invocation based on which 
of the source registers are actually there.


Cheers,
Nicolai




Cheers,
Nicolai


  src/gallium/drivers/r600/r600_shader.c | 83
+-
  1 file changed, 72 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c
b/src/gallium/drivers/r600/r600_shader.c
index 560197c..a227d78 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -530,7 +530,8 @@ static int r600_spi_sid(struct r600_shader_io * io)
  name == TGSI_SEMANTIC_PSIZE ||
  name == TGSI_SEMANTIC_EDGEFLAG ||
  name == TGSI_SEMANTIC_FACE ||
-name == TGSI_SEMANTIC_SAMPLEMASK)
+name == TGSI_SEMANTIC_SAMPLEMASK ||
+name == TGSI_SEMANTIC_HELPER_INVOCATION)
  index = 0;
  else {
  if (name == TGSI_SEMANTIC_GENERIC) {
@@ -734,7 +735,8 @@ static int tgsi_declaration(struct
r600_shader_ctx *ctx)
  case TGSI_FILE_SYSTEM_VALUE:
  if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
  d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
-d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
+d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS ||
+d->Semantic.Name == TGSI_SEMANTIC_HELPER_INVOCATION) {
  break; /* Already handled from
allocate_system_value_inputs */
  } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
  if (!ctx->native_integers) {
@@ -776,13 +778,14 @@ static int allocate_system_value_inputs(struct
r600_shader_ctx *ctx, int gpr_off
  struct {
  boolean enabled;
  int *reg;
-unsigned name, alternate_name;
+unsigned associated_semantics[3];
  } inputs[2] = {
-{ false, >face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /*
lives in Front Face GPR.z */
-
-{ false, >fixed_pt_position_gpr,
TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in
Fixed Point Position GPR.w */
+{ false, >face_gpr, { TGSI_SEMANTIC_SAMPLEMASK /* lives
in Front Face GPR.z */,
+TGSI_SEMANTIC_HELPER_INVOCATION, ~0u } },
+{ false, >fixed_pt_position_gpr, {
TGSI_SEMANTIC_SAMPLEID  /* in Fixed Point Position GPR.w */,
+TGSI_SEMANTIC_SAMPLEPOS, TGSI_SEMANTIC_HELPER_INVOCATION
} }
  };
-int i, k, num_regs = 0;
+int i, k, l, num_regs = 0;

  if (tgsi_parse_init(, ctx->tokens) != TGSI_PARSE_OK) {
  return 0;
@@ -818,9 +821,11 @@ static int allocate_system_value_inputs(struct
r600_shader_ctx *ctx, int gpr_off
  struct tgsi_full_declaration *d =

  if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
  for (k = 0; k < Elements(inputs); k++) {
-if (d->Semantic.Name == inputs[k].name ||
-d->Semantic.Name == inputs[k].alternate_name) {
-  

Re: [Mesa-dev] [PATCH 5/5] radeonsi: calculate optimal GS ring sizes to fix GS hangs on Tonga

2015-11-09 Thread Nicolai Hähnle
{
-   unsigned esgs_ring_size = 128 * 1024;
-   unsigned gsvs_ring_size = 60 * 1024 * 1024;
+   struct si_shader_selector *es =
+   sctx->tes_shader.cso ? sctx->tes_shader.cso : 
sctx->vs_shader.cso;
+   struct si_shader_selector *gs = sctx->gs_shader.cso;
+   struct si_pm4_state *pm4;

-   assert(!sctx->esgs_ring && !sctx->gsvs_ring);
+   /* Chip constants. */
+   unsigned num_se = sctx->screen->b.info.max_se;
+   unsigned wave_size = 64;
+   unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
+   unsigned gs_vertex_reuse = 16 * num_se; /* GS_VERTEX_REUSE register 
(per SE) */
+   unsigned alignment = 256 * num_se;
+   /* The maximum size is 63.999 MB per SE. */
+   unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
+
+   /* Calculate the minimum size. */
+   unsigned min_esgs_ring_size = align(es->esgs_itemsize * gs_vertex_reuse 
*
+   wave_size, alignment);
+
+   /* These are recommended sizes, not minimum sizes. */
+   unsigned esgs_ring_size = max_gs_waves * 2 * wave_size *
+ es->esgs_itemsize * 
gs->gs_input_verts_per_prim;
+   unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size *
+ gs->max_gsvs_emit_size * (gs->max_gs_stream + 
1);
+
+   min_esgs_ring_size = align(min_esgs_ring_size, alignment);
+   esgs_ring_size = align(esgs_ring_size, alignment);
+   gsvs_ring_size = align(gsvs_ring_size, alignment);
+
+   esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
+   gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
+
+   /* Some rings don't have to be allocated if shaders don't use them.
+* (e.g. no varyings between ES and GS or GS and PS)
+*/
+   bool update_esgs = esgs_ring_size &&
+  (!sctx->esgs_ring ||
+   sctx->esgs_ring->width0 < esgs_ring_size);
+   bool update_gsvs = gsvs_ring_size &&
+      (!sctx->gsvs_ring ||
+   sctx->gsvs_ring->width0 < gsvs_ring_size);


I take it the comment above should be "or GS and _VS_".

With this, the series is

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>



-   sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
-  PIPE_USAGE_DEFAULT, esgs_ring_size);
-   if (!sctx->esgs_ring)
-   return;
+   if (!update_esgs && !update_gsvs)
+   return true;

-   sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
-PIPE_USAGE_DEFAULT, 
gsvs_ring_size);
-   if (!sctx->gsvs_ring) {
+   if (update_esgs) {
pipe_resource_reference(>esgs_ring, NULL);
-   return;
+   sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, 
PIPE_BIND_CUSTOM,
+PIPE_USAGE_DEFAULT,
+esgs_ring_size);
+   if (!sctx->esgs_ring)
+   return false;
}

-   si_init_config_add_vgt_flush(sctx);
+   if (update_gsvs) {
+   pipe_resource_reference(>gsvs_ring, NULL);
+   sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, 
PIPE_BIND_CUSTOM,
+PIPE_USAGE_DEFAULT,
+gsvs_ring_size);
+   if (!sctx->gsvs_ring)
+   return false;
+   }
+
+   /* Create the "init_config_gs_rings" state. */
+   pm4 = CALLOC_STRUCT(si_pm4_state);
+   if (!pm4)
+   return false;

-   /* Append these registers to the init config state. */
if (sctx->b.chip_class >= CIK) {
-   if (sctx->b.chip_class >= VI) {
-   /* The maximum sizes are 63.999 MB on VI, because
-* the register fields only have 18 bits. */
-   assert(esgs_ring_size / 256 < (1 << 18));
-   assert(gsvs_ring_size / 256 < (1 << 18));
-   }
-   si_pm4_set_reg(sctx->init_config, R_030900_VGT_ESGS_RING_SIZE,
-  esgs_ring_size / 256);
-   si_pm4_set_reg(sctx->init_config, R_030904_VGT_GSVS_RING_SIZE,
-  gsvs_ring_size / 256);
+   if (sctx->esgs_ring)
+   si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE,
+  sctx->esgs_ring->width0 / 256);
+   if (sctx->gsvs_ring)
+  

Re: [Mesa-dev] [PATCH 6/7] gallium/radeon: remove predicate_drawing flag

2015-11-09 Thread Nicolai Hähnle

On 08.11.2015 22:48, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/r600/r600_hw_context.c| 2 +-
  src/gallium/drivers/r600/r600_state_common.c  | 2 +-
  src/gallium/drivers/radeon/r600_pipe_common.h | 1 -
  src/gallium/drivers/radeon/r600_query.c   | 1 -
  src/gallium/drivers/radeonsi/si_state_draw.c  | 2 +-
  5 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index 44e7cf2..2383175 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -81,7 +81,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned 
num_dw,
}

/* Count in render_condition(NULL) at the end of CS. */
-   if (ctx->b.predicate_drawing) {
+   if (ctx->b.current_render_cond) {
num_dw += 3;
}


Aside: What is this actually trying to count? The way I understand 
conditional drawing now, there is no reset state that we have to emit at 
the end of the CS (other than for the queries themselves, but AFAIU that 
is already counted by num_cs_dw_nontimer_queries_suspend above). Perhaps 
this block could actually be dropped entirely?


Cheers,
Nicolai



diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 28aedff..5cf5208 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1478,7 +1478,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const 
struct pipe_draw_info
struct pipe_draw_info info = *dinfo;
struct pipe_index_buffer ib = {};
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
-   bool render_cond_bit = rctx->b.predicate_drawing && 
!rctx->b.render_cond_force_off;
+   bool render_cond_bit = rctx->b.current_render_cond && 
!rctx->b.render_cond_force_off;
uint64_t mask;

if (!info.indirect && !info.count && (info.indexed || 
!info.count_from_stream_output)) {
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 09465ae..ba9000f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -421,7 +421,6 @@ struct r600_common_context {
struct pipe_query   *current_render_cond;
unsignedcurrent_render_cond_mode;
boolean current_render_cond_cond;
-   boolpredicate_drawing;
boolrender_cond_force_off; /* for u_blitter 
*/

/* MSAA sample locations.
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 145b629..9f92587 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -834,7 +834,6 @@ static void r600_render_condition(struct pipe_context *ctx,
rctx->current_render_cond = query;
rctx->current_render_cond_cond = condition;
rctx->current_render_cond_mode = mode;
-   rctx->predicate_drawing = query != NULL;

/* Compute the size of SET_PREDICATION packets. */
atom->num_dw = 0;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index ebc01e8..79e8876 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -457,7 +457,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
  {
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned sh_base_reg = 
sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
-   bool render_cond_bit = sctx->b.predicate_drawing && 
!sctx->b.render_cond_force_off;
+   bool render_cond_bit = sctx->b.current_render_cond && 
!sctx->b.render_cond_force_off;

if (info->count_from_stream_output) {
struct r600_so_target *t =



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] radeonsi: add glClearBufferSubData acceleration

2015-11-09 Thread Nicolai Hähnle

On 08.11.2015 22:44, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

Unaligned 8-bit and 16-bit clears are done in software.


I found this confusing at first. I think a better phrasing is something 
along the lines of:


8-bit and 16-bit clears which are not aligned to dwords are done in 
software.


With this, the whole series is

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


---
  src/gallium/drivers/radeonsi/si_blit.c | 60 ++
  1 file changed, 60 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index d320ac4..31f22c4 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -737,9 +737,69 @@ static void si_flush_resource(struct pipe_context *ctx,
}
  }

+static void si_pipe_clear_buffer(struct pipe_context *ctx,
+struct pipe_resource *dst,
+unsigned offset, unsigned size,
+const void *clear_value_ptr,
+int clear_value_size)
+{
+   struct si_context *sctx = (struct si_context*)ctx;
+   uint32_t dword_value;
+   unsigned i;
+
+   assert(offset % clear_value_size == 0);
+   assert(size % clear_value_size == 0);
+
+   if (clear_value_size > 4) {
+   const uint32_t *u32 = clear_value_ptr;
+   bool clear_dword_duplicated = true;
+
+   /* See if we can lower large fills to dword fills. */
+   for (i = 1; i < clear_value_size / 4; i++)
+   if (u32[0] != u32[i]) {
+   clear_dword_duplicated = false;
+   break;
+   }
+
+   if (!clear_dword_duplicated) {
+   /* Use transform feedback for 64-bit, 96-bit, and
+* 128-bit fills.
+*/
+   union pipe_color_union clear_value;
+
+   memcpy(_value, clear_value_ptr, clear_value_size);
+   si_blitter_begin(ctx, SI_DISABLE_RENDER_COND);
+   util_blitter_clear_buffer(sctx->blitter, dst, offset,
+ size, clear_value_size / 4,
+ _value);
+   si_blitter_end(ctx);
+   return;
+   }
+   }
+
+   /* Expand the clear value to a dword. */
+   switch (clear_value_size) {
+   case 1:
+   dword_value = *(uint8_t*)clear_value_ptr;
+   dword_value |= (dword_value << 8) |
+  (dword_value << 16) |
+  (dword_value << 24);
+   break;
+   case 2:
+   dword_value = *(uint16_t*)clear_value_ptr;
+   dword_value |= dword_value << 16;
+   break;
+   default:
+   dword_value = *(uint32_t*)clear_value_ptr;
+   }
+
+   sctx->b.clear_buffer(ctx, dst, offset, size, dword_value, false);
+}
+
  void si_init_blit_functions(struct si_context *sctx)
  {
sctx->b.b.clear = si_clear;
+   sctx->b.b.clear_buffer = si_pipe_clear_buffer;
sctx->b.b.clear_render_target = si_clear_render_target;
sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
sctx->b.b.resource_copy_region = si_resource_copy_region;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] gallium/radeon: inline the r600_rings structure

2015-11-09 Thread Nicolai Hähnle

The series is

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 08.11.2015 22:45, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

---
  src/gallium/drivers/r600/evergreen_compute.c| 14 ++---
  src/gallium/drivers/r600/evergreen_hw_context.c | 10 ++--
  src/gallium/drivers/r600/evergreen_state.c  | 66 
  src/gallium/drivers/r600/r600_blit.c|  2 +-
  src/gallium/drivers/r600/r600_hw_context.c  | 34 ++---
  src/gallium/drivers/r600/r600_pipe.c| 10 ++--
  src/gallium/drivers/r600/r600_state.c   | 68 -
  src/gallium/drivers/r600/r600_state_common.c| 36 ++---
  src/gallium/drivers/radeon/r600_buffer_common.c | 32 ++--
  src/gallium/drivers/radeon/r600_pipe_common.c   | 34 ++---
  src/gallium/drivers/radeon/r600_pipe_common.h   |  8 +--
  src/gallium/drivers/radeon/r600_query.c | 16 +++---
  src/gallium/drivers/radeon/r600_streamout.c | 18 +++
  src/gallium/drivers/radeonsi/cik_sdma.c | 14 ++---
  src/gallium/drivers/radeonsi/si_compute.c   | 12 ++---
  src/gallium/drivers/radeonsi/si_cp_dma.c| 10 ++--
  src/gallium/drivers/radeonsi/si_descriptors.c   | 38 +++---
  src/gallium/drivers/radeonsi/si_dma.c   | 14 ++---
  src/gallium/drivers/radeonsi/si_hw_context.c| 16 +++---
  src/gallium/drivers/radeonsi/si_pipe.c  |  8 +--
  src/gallium/drivers/radeonsi/si_pm4.c   |  6 +--
  src/gallium/drivers/radeonsi/si_state.c | 34 ++---
  src/gallium/drivers/radeonsi/si_state_draw.c| 24 -
  src/gallium/drivers/radeonsi/si_state_shaders.c |  4 +-
  24 files changed, 262 insertions(+), 266 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 6f2b7ba..5743e3f 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -346,7 +346,7 @@ static void evergreen_emit_direct_dispatch(
const uint *block_layout, const uint *grid_layout)
  {
int i;
-   struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
unsigned num_waves;
unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
@@ -417,12 +417,12 @@ static void evergreen_emit_direct_dispatch(
  static void compute_emit_cs(struct r600_context *ctx, const uint 
*block_layout,
const uint *grid_layout)
  {
-   struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+   struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
unsigned i;

/* make sure that the gfx ring is only one active */
-   if (ctx->b.rings.dma.cs && ctx->b.rings.dma.cs->cdw) {
-   ctx->b.rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+   if (ctx->b.dma.cs && ctx->b.dma.cs->cdw) {
+   ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}

/* Initialize all the compute-related registers.
@@ -439,7 +439,7 @@ static void compute_emit_cs(struct r600_context *ctx, const 
uint *block_layout,
/* XXX support more than 8 colorbuffers (the offsets are not a multiple 
of 0x3C for CB8-11) */
for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) {
struct r600_surface *cb = (struct 
r600_surface*)ctx->framebuffer.state.cbufs[i];
-   unsigned reloc = radeon_add_to_buffer_list(>b, 
>b.rings.gfx,
+   unsigned reloc = radeon_add_to_buffer_list(>b, >b.gfx,
   (struct 
r600_resource*)cb->base.texture,
   RADEON_USAGE_READWRITE,
   
RADEON_PRIO_SHADER_RW_BUFFER);
@@ -538,7 +538,7 @@ void evergreen_emit_cs_shader(
struct r600_cs_shader_state *state =
(struct r600_cs_shader_state*)atom;
struct r600_pipe_compute *shader = state->shader;
-   struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
uint64_t va;
struct r600_resource *code_bo;
unsigned ngpr, nstack;
@@ -564,7 +564,7 @@ void evergreen_emit_cs_shader(
radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */

radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
-   radeon_emit(cs, radeon_add_to_buffer_list(>b, >b.rings.gfx,
+   radeon_emit(cs, radeon_add_to_buffer_list(>b, >b.gfx,
  code_bo, RADEON_USAGE_READ,
  RADEON_PRIO_USER_SHADER));
  }
diff --git a/src/gallium/drivers/r600/everg

Re: [Mesa-dev] [PATCH 4/7] gallium/radeon: simplify restoring render condition after flush

2015-11-09 Thread Nicolai Hähnle

On 09.11.2015 10:43, Nicolai Hähnle wrote:

On 08.11.2015 22:48, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

---
  src/gallium/drivers/radeon/r600_pipe_common.c | 22
+-
  src/gallium/drivers/radeon/r600_pipe_common.h |  4 
  2 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 8739914..224da11 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c

[snip]

@@ -173,12 +162,11 @@ void r600_postflush_resume_features(struct
r600_common_context *ctx)
  r600_resume_timer_queries(ctx);
  }

-/* Re-enable render condition. */
-if (ctx->saved_render_cond) {
-ctx->b.render_condition(>b, ctx->saved_render_cond,
-  ctx->saved_render_cond_cond,
-  ctx->saved_render_cond_mode);
-}
+/* Just re-emit PKT3_SET_PREDICATION. */
+if (ctx->current_render_cond)
+ctx->b.render_condition(>b, ctx->current_render_cond,
+ctx->current_render_cond_cond,
+ctx->current_render_cond_mode);


Drop the "Just" from the comment, because while it makes sense in the
context of the diff, it does not make sense when only looking at the new
code.


Never mind, this becomes moot anyway with patch 5.



Nicolai


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/7] gallium/radeon: simplify restoring render condition after flush

2015-11-09 Thread Nicolai Hähnle

On 08.11.2015 22:48, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeon/r600_pipe_common.c | 22 +-
  src/gallium/drivers/radeon/r600_pipe_common.h |  4 
  2 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 8739914..224da11 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c

[snip]

@@ -173,12 +162,11 @@ void r600_postflush_resume_features(struct 
r600_common_context *ctx)
r600_resume_timer_queries(ctx);
}

-   /* Re-enable render condition. */
-   if (ctx->saved_render_cond) {
-   ctx->b.render_condition(>b, ctx->saved_render_cond,
- ctx->saved_render_cond_cond,
- ctx->saved_render_cond_mode);
-   }
+   /* Just re-emit PKT3_SET_PREDICATION. */
+   if (ctx->current_render_cond)
+   ctx->b.render_condition(>b, ctx->current_render_cond,
+   ctx->current_render_cond_cond,
+   ctx->current_render_cond_mode);


Drop the "Just" from the comment, because while it makes sense in the 
context of the diff, it does not make sense when only looking at the new 
code.


Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] gallium/radeon: shorten render_cond variable names

2015-11-09 Thread Nicolai Hähnle

The series is

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 08.11.2015 22:48, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

and ..._cond -> ..._invert
---
  src/gallium/drivers/r600/r600_hw_context.c|  2 +-
  src/gallium/drivers/r600/r600_state_common.c  |  2 +-
  src/gallium/drivers/radeon/r600_pipe_common.h |  6 +++---
  src/gallium/drivers/radeon/r600_query.c   | 14 +++---
  src/gallium/drivers/radeon/r600_texture.c |  2 +-
  src/gallium/drivers/radeonsi/si_state_draw.c  |  2 +-
  6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index 2383175..917808a 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -81,7 +81,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned 
num_dw,
}

/* Count in render_condition(NULL) at the end of CS. */
-   if (ctx->b.current_render_cond) {
+   if (ctx->b.render_cond) {
num_dw += 3;
}

diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 5cf5208..d629194 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1478,7 +1478,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const 
struct pipe_draw_info
struct pipe_draw_info info = *dinfo;
struct pipe_index_buffer ib = {};
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
-   bool render_cond_bit = rctx->b.current_render_cond && 
!rctx->b.render_cond_force_off;
+   bool render_cond_bit = rctx->b.render_cond && 
!rctx->b.render_cond_force_off;
uint64_t mask;

if (!info.indirect && !info.count && (info.indexed || 
!info.count_from_stream_output)) {
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index ba9000f..ebe633b 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -418,9 +418,9 @@ struct r600_common_context {

/* Render condition. */
struct r600_atomrender_cond_atom;
-   struct pipe_query   *current_render_cond;
-   unsignedcurrent_render_cond_mode;
-   boolean current_render_cond_cond;
+   struct pipe_query   *render_cond;
+   unsignedrender_cond_mode;
+   boolean render_cond_invert;
boolrender_cond_force_off; /* for u_blitter 
*/

/* MSAA sample locations.
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 9f92587..8c2b601 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -307,7 +307,7 @@ static void r600_emit_query_predication(struct 
r600_common_context *ctx,
struct r600_atom *atom)
  {
struct radeon_winsys_cs *cs = ctx->gfx.cs;
-   struct r600_query *query = (struct r600_query*)ctx->current_render_cond;
+   struct r600_query *query = (struct r600_query*)ctx->render_cond;
struct r600_query_buffer *qbuf;
uint32_t op;
bool flag_wait;
@@ -315,8 +315,8 @@ static void r600_emit_query_predication(struct 
r600_common_context *ctx,
if (!query)
return;

-   flag_wait = ctx->current_render_cond_mode == PIPE_RENDER_COND_WAIT ||
-   ctx->current_render_cond_mode == 
PIPE_RENDER_COND_BY_REGION_WAIT;
+   flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
+   ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;

switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -335,7 +335,7 @@ static void r600_emit_query_predication(struct 
r600_common_context *ctx,
}

/* if true then invert, see GL_ARB_conditional_render_inverted */
-   if (ctx->current_render_cond_cond)
+   if (ctx->render_cond_invert)
op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not 
visable/overflow */
else
op |= PREDICATION_DRAW_VISIBLE; /* Draw if visable/overflow */
@@ -831,9 +831,9 @@ static void r600_render_condition(struct pipe_context *ctx,
struct r600_query_buffer *qbuf;
struct r600_atom *atom = >render_cond_atom;

-   rctx->current_render_cond = query;
-   rctx->current_render_cond_cond = condition;
-   rctx->current_render_cond_mode = mode;
+   rctx->render_cond = query;
+   rctx->render_cond_invert = condition;
+   rctx->render_cond_mode = mode;

/* Compute the size of SET_PREDICATION pac

Re: [Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration

2015-11-05 Thread Nicolai Hähnle

On 04.11.2015 00:47, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_blit.c | 55 ++
  1 file changed, 55 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index fce014a..e934146 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context *ctx,
}
  }

+static void si_pipe_clear_buffer(struct pipe_context *ctx,
+struct pipe_resource *dst,
+unsigned offset, unsigned size,
+const void *clear_value,
+int clear_value_size)
+{
+   struct si_context *sctx = (struct si_context*)ctx;
+   const uint32_t *u32 = clear_value;
+   unsigned i;
+   bool clear_value_fits_dword = true;
+   uint8_t *map;
+
+   if (clear_value_size > 4)
+   for (i = 1; i < clear_value_size / 4; i++)
+   if (u32[0] != u32[i]) {
+   clear_value_fits_dword = false;
+   break;
+   }
+
+   /* Use CP DMA for the simple case. */
+   if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword) {
+   uint32_t value = u32[0];
+
+   switch (clear_value_size) {
+   case 1:
+   value &= 0xff;
+   value |= (value << 8) | (value << 16) | (value << 24);
+   break;
+   case 2:
+   value &= 0x;
+   value |= value << 16;
+   break;
+   }


To reduce the chance of complaints by valgrind et al:

switch (clear_value_size) {
case 1:
value = *(uint8_t *)u32;
value |= (value << 8) | (value << 16) | (value << 24);
break;
case 2:
value = *(uint16_t *)u32;
value |= value << 16;
break;
default:
value = *u32;
break;
}

Cheers,
Nicolai


+
+   sctx->b.clear_buffer(ctx, dst, offset, size, value, false);
+   return;
+   }
+
+   /* TODO: use a compute shader for other cases. */
+
+   /* Software fallback. */
+   map = r600_buffer_map_sync_with_rings(>b, r600_resource(dst),
+ PIPE_TRANSFER_WRITE);
+   if (!map)
+   return;
+
+   map += offset;
+   size /= clear_value_size;
+   for (i = 0; i < size; i++) {
+   memcpy(map, clear_value, clear_value_size);
+   map += clear_value_size;
+   }
+}
+
  void si_init_blit_functions(struct si_context *sctx)
  {
sctx->b.b.clear = si_clear;
+   sctx->b.b.clear_buffer = si_pipe_clear_buffer;
sctx->b.b.clear_render_target = si_clear_render_target;
sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
sctx->b.b.resource_copy_region = si_resource_copy_region;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] u_vbuf: fix vb slot assignment for translated buffers

2015-10-12 Thread Nicolai Hähnle

On 09.10.2015 23:57, Marek Olšák wrote:

Do you still have commit access and should somebody else push this?


I have to figure out how to get somebody to pay attention to 
https://bugs.freedesktop.org/show_bug.cgi?id=92281


Feel free to push this in the meantime; otherwise I'll eventually do it 
once I have access again.


Nicolai



Marek

On Sun, Oct 4, 2015 at 2:19 PM, Marek Olšák <mar...@gmail.com> wrote:

Oh, I forgot this:

Reviewed-by: Marek Olšák <marek.ol...@amd.com>

Marek

On Sun, Oct 4, 2015 at 2:03 PM, Marek Olšák <mar...@gmail.com> wrote:

Nice catch. Please add this to the commit message:

Cc: mesa-sta...@lists.freedesktop.org

It will be automatically picked for 11.0 after you push it.

Marek

On Sun, Oct 4, 2015 at 12:09 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

Vertex attributes of different categories (constant/per-instance/
per-vertex) go into different buffers for translation, and this is now
properly reflected in the vertex buffers passed to the driver.

Fixes e.g. piglit's point-vertex-id divisor test.
---
  src/gallium/auxiliary/util/u_vbuf.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/src/gallium/auxiliary/util/u_vbuf.c 
b/src/gallium/auxiliary/util/u_vbuf.c
index 3d2193c..b31ada1 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -544,6 +544,7 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,

   index = ffs(unused_vb_mask) - 1;
   fallback_vbs[type] = index;
+ unused_vb_mask &= ~(1 << index);
   /*printf("found slot=%i for type=%i\n", index, type);*/
}
 }
--
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeon: ensure that timing/profiling queries are suspended on flush

2015-11-18 Thread Nicolai Hähnle
The queries_suspended_for_flush flag is redundant because suspended queries
are not removed from their respective linked list.
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 13 ++---
 src/gallium/drivers/radeon/r600_pipe_common.h |  2 --
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 60be412..f03dcd9 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -27,6 +27,7 @@
 #include "r600_pipe_common.h"
 #include "r600_cs.h"
 #include "tgsi/tgsi_parse.h"
+#include "util/list.h"
 #include "util/u_draw_quad.h"
 #include "util/u_memory.h"
 #include "util/u_format_s3tc.h"
@@ -135,12 +136,10 @@ static void r600_memory_barrier(struct pipe_context *ctx, 
unsigned flags)
 void r600_preflush_suspend_features(struct r600_common_context *ctx)
 {
/* suspend queries */
-   ctx->queries_suspended_for_flush = false;
-   if (ctx->num_cs_dw_nontimer_queries_suspend) {
+   if (!LIST_IS_EMPTY(>active_nontimer_queries))
r600_suspend_nontimer_queries(ctx);
+   if (!LIST_IS_EMPTY(>active_timer_queries))
r600_suspend_timer_queries(ctx);
-   ctx->queries_suspended_for_flush = true;
-   }
 
ctx->streamout.suspended = false;
if (ctx->streamout.begin_emitted) {
@@ -157,10 +156,10 @@ void r600_postflush_resume_features(struct 
r600_common_context *ctx)
}
 
/* resume queries */
-   if (ctx->queries_suspended_for_flush) {
-   r600_resume_nontimer_queries(ctx);
+   if (!LIST_IS_EMPTY(>active_timer_queries))
r600_resume_timer_queries(ctx);
-   }
+   if (!LIST_IS_EMPTY(>active_nontimer_queries))
+   r600_resume_nontimer_queries(ctx);
 }
 
 static void r600_flush_from_st(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index f9fecdf..253d657 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -397,8 +397,6 @@ struct r600_common_context {
struct list_headactive_timer_queries;
unsignednum_cs_dw_nontimer_queries_suspend;
unsignednum_cs_dw_timer_queries_suspend;
-   /* If queries have been suspended. */
-   boolqueries_suspended_for_flush;
/* Additional hardware info. */
unsignedbackend_mask;
unsignedmax_db; /* for OQ */
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] util: move brw_env_var_as_boolean() to util

2015-11-19 Thread Nicolai Hähnle

Hi Rob,

On 18.11.2015 23:20, Rob Clark wrote:

From: Rob Clark 

Kind of a handy function.  And I'll what it available outside of i965
for common nir-pass helpers.

Signed-off-by: Rob Clark 
---
  src/mesa/drivers/dri/i965/brw_context.c  |  5 +++--
  src/mesa/drivers/dri/i965/brw_nir.c  |  4 +++-
  src/mesa/drivers/dri/i965/brw_shader.cpp |  3 ++-
  src/mesa/drivers/dri/i965/intel_debug.c  | 25 -
  src/mesa/drivers/dri/i965/intel_debug.h  |  2 --
  src/util/debug.c | 25 +
  src/util/debug.h |  2 ++
  7 files changed, 35 insertions(+), 31 deletions(-)

[.. snip ...]

diff --git a/src/util/debug.c b/src/util/debug.c
index 3729ce8..98b1853 100644
--- a/src/util/debug.c
+++ b/src/util/debug.c
@@ -51,3 +51,28 @@ parse_debug_string(const char *debug,

 return flag;
  }
+
+/**
+ * Reads an environment variable and interprets its value as a boolean.
+ *
+ * Recognizes 0/false/no and 1/true/yes.  Other values result in the default 
value.
+ */
+bool
+env_var_as_boolean(const char *var_name, bool default_value)
+{
+   const char *str = getenv(var_name);
+   if (str == NULL)
+  return default_value;
+
+   if (strcmp(str, "1") == 0 ||
+   strcasecmp(str, "true") == 0 ||
+   strcasecmp(str, "yes") == 0) {
+  return true;
+   } else if (strcmp(str, "0") == 0 ||
+  strcasecmp(str, "false") == 0 ||
+  strcasecmp(str, "no") == 0) {
+  return false;
+   } else {
+  return default_value;
+   }
+}


This all looks good to me. I do have two suggestions to slightly improve 
usability:


1) Add "on" and "off" as recognized values.

2) Add something to the effect of `fprintf(stderr, "%s: value not 
recognized, using default.\n", var_name);` to the default value branch.


Either way, feel free to add my R-b.

Cheers,
Nicolai


diff --git a/src/util/debug.h b/src/util/debug.h
index 801736a..3555417 100644
--- a/src/util/debug.h
+++ b/src/util/debug.h
@@ -38,6 +38,8 @@ struct debug_control {
  uint64_t
  parse_debug_string(const char *debug,
 const struct debug_control *control);
+bool
+env_var_as_boolean(const char *var_name, bool default_value);

  #ifdef __cplusplus
  } /* extern C */



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] u_vbuf: fix vb slot assignment for translated buffers

2015-10-04 Thread Nicolai Hähnle
Vertex attributes of different categories (constant/per-instance/
per-vertex) go into different buffers for translation, and this is now
properly reflected in the vertex buffers passed to the driver.

Fixes e.g. piglit's point-vertex-id divisor test.
---
 src/gallium/auxiliary/util/u_vbuf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/auxiliary/util/u_vbuf.c 
b/src/gallium/auxiliary/util/u_vbuf.c
index 3d2193c..b31ada1 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -544,6 +544,7 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
 
  index = ffs(unused_vb_mask) - 1;
  fallback_vbs[type] = index;
+ unused_vb_mask &= ~(1 << index);
  /*printf("found slot=%i for type=%i\n", index, type);*/
   }
}
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/10] gallium/pb_cache: add a copy of cache bufmgr independent of pb_manager

2015-12-08 Thread Nicolai Hähnle

On 06.12.2015 19:00, Marek Olšák wrote:

From: Marek Olšák 

This simplified (basically duplicated) version of pb_cache_manager will
allow removing some ugly hacks from radeon and amdgpu winsyses and
flatten simplify their design.

The difference is that winsyses must manually add buffers to the cache
in "destroy" functions and the cache doesn't know about the buffers before
that. The integration is therefore trivial and the impact on the winsys
design is negligible.
---
  src/gallium/auxiliary/Makefile.sources  |   1 +
  src/gallium/auxiliary/pipebuffer/pb_cache.c | 286 
  src/gallium/auxiliary/pipebuffer/pb_cache.h |  74 +++
  3 files changed, 361 insertions(+)
  create mode 100644 src/gallium/auxiliary/pipebuffer/pb_cache.c
  create mode 100644 src/gallium/auxiliary/pipebuffer/pb_cache.h

diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index 6160192..817308d 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -93,6 +93,7 @@ C_SOURCES := \
pipebuffer/pb_bufmgr_ondemand.c \
pipebuffer/pb_bufmgr_pool.c \
pipebuffer/pb_bufmgr_slab.c \
+   pipebuffer/pb_cache.c \


I believe pb_cache.h needs to be added as well.


pipebuffer/pb_validate.c \
pipebuffer/pb_validate.h \
postprocess/filters.h \
diff --git a/src/gallium/auxiliary/pipebuffer/pb_cache.c 
b/src/gallium/auxiliary/pipebuffer/pb_cache.c
new file mode 100644
index 000..45f600d
--- /dev/null
+++ b/src/gallium/auxiliary/pipebuffer/pb_cache.c

...

+/**
+ * \return 1   if compatible and can be reclaimed
+ * 0   if incompatible
+ *-1   if compatible and can't be reclaimed
+ */
+static int
+pb_cache_is_buffer_compat(struct pb_cache_entry *entry,
+  pb_size size, unsigned alignment, unsigned usage)
+{
+   struct pb_buffer *buf = entry->buffer;
+
+   if (usage & entry->mgr->bypass_usage)
+  return 0;


It should be possible to move this test to the top of 
pb_cache_reclaim_buffer, right?



+   if (buf->size < size)
+  return 0;
+
+   /* be lenient with size */
+   if (buf->size > (unsigned) (entry->mgr->size_factor * size))
+  return 0;
+
+   if (!pb_check_alignment(alignment, buf->alignment))
+  return 0;
+
+   if (!pb_check_usage(usage, buf->usage))
+  return 0;
+
+   return entry->mgr->can_reclaim(buf) ? 1 : -1;
+}
+
+/**
+ * Find a compatible buffer in the cache, return it, and remove it
+ * from the cache.
+ */
+struct pb_buffer *
+pb_cache_reclaim_buffer(struct pb_cache *mgr, pb_size size,
+unsigned alignment, unsigned usage)
+{
+   struct pb_cache_entry *entry;
+   struct pb_cache_entry *cur_entry;
+   struct list_head *cur, *next;
+   int64_t now;
+   int ret = 0;
+
+   pipe_mutex_lock(mgr->mutex);
+
+   entry = NULL;
+   cur = mgr->cache.next;
+   next = cur->next;
+
+   /* search in the expired buffers, freeing them in the process */
+   now = os_time_get();
+   while (cur != >cache) {
+  cur_entry = LIST_ENTRY(struct pb_cache_entry, cur, head);
+
+  if (!entry && (ret = pb_cache_is_buffer_compat(cur_entry, size,
+ alignment, usage) > 0))
+ entry = cur_entry;
+  else if (os_time_timeout(cur_entry->start, cur_entry->end, now))
+ destroy_buffer_locked(cur_entry);
+  else
+ /* This buffer (and all hereafter) are still hot in cache */
+ break;
+
+  /* the buffer is busy (and probably all remaining ones too) */
+  if (ret == -1)
+ break;
+
+  cur = next;
+  next = cur->next;
+   }
+
+   /* keep searching in the hot buffers */
+   if (!entry && ret != -1) {
+  while (cur != >cache) {
+ cur_entry = LIST_ENTRY(struct pb_cache_entry, cur, head);
+ ret = pb_cache_is_buffer_compat(cur_entry, size, alignment, usage);
+
+ if (ret > 0) {
+entry = cur_entry;
+break;
+ }
+ if (ret == -1)
+break;
+ /* no need to check the timeout here */
+ cur = next;
+ next = cur->next;
+  }
+   }
+
+   /* found a compatible buffer, return it */
+   if (entry) {
+  struct pb_buffer *buf = entry->buffer;
+
+  mgr->cache_size -= buf->size;
+  LIST_DEL(>head);
+  --mgr->num_buffers;
+  pipe_mutex_unlock(mgr->mutex);
+  /* Increase refcount */
+  pipe_reference_init(>reference, 1);
+  return buf;
+   }
+
+   pipe_mutex_unlock(mgr->mutex);
+   return NULL;
+}
+
+/**
+ * Empty the cache. Useful when there is not enough memory.
+ */
+void
+pb_cache_release_all_buffers(struct pb_cache *mgr)
+{
+   struct list_head *curr, *next;
+   struct pb_cache_entry *buf;
+
+   pipe_mutex_lock(mgr->mutex);
+   curr = mgr->cache.next;
+   next = curr->next;
+   while (curr != >cache) {
+  buf = LIST_ENTRY(struct pb_cache_entry, curr, head);
+  

Re: [Mesa-dev] [PATCH 09/10] winsys/radeon: use pb_cache instead of pb_cache_manager

2015-12-08 Thread Nicolai Hähnle

On 06.12.2015 19:01, Marek Olšák wrote:

From: Marek Olšák 

This is a prerequisite for the removal of radeon_winsys_cs_handle.
---
  src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 212 +++---
  src/gallium/winsys/radeon/drm/radeon_drm_bo.h |  14 +-
  src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |  22 +--
  src/gallium/winsys/radeon/drm/radeon_drm_winsys.h |   4 +-
  4 files changed, 74 insertions(+), 178 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 4c38379..9532c77 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c

...

  static const struct pb_vtbl radeon_bo_vtbl = {
-radeon_bo_destroy,
-NULL, /* never called */
-NULL, /* never called */
-radeon_bo_validate,
-radeon_bo_fence,
-radeon_bo_get_base_buffer,
+radeon_bo_destroy_or_cache
  };


I take it the other functions aren't called anymore? Perhaps this patch 
and #3 could use an explanation to that effect.


Nicolai



  #ifndef RADEON_GEM_GTT_WC
@@ -540,40 +490,39 @@ static const struct pb_vtbl radeon_bo_vtbl = {
  #define RADEON_GEM_NO_CPU_ACCESS  (1 << 4)
  #endif

-static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
-pb_size size,
-const struct pb_desc *desc)
+static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
+  unsigned size, unsigned alignment,
+  unsigned usage,
+  unsigned initial_domains,
+  unsigned flags)
  {
-struct radeon_bomgr *mgr = radeon_bomgr(_mgr);
-struct radeon_drm_winsys *rws = mgr->rws;
  struct radeon_bo *bo;
  struct drm_radeon_gem_create args;
-struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc;
  int r;

  memset(, 0, sizeof(args));

-assert(rdesc->initial_domains);
-assert((rdesc->initial_domains &
+assert(initial_domains);
+assert((initial_domains &
  ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);

  args.size = size;
-args.alignment = desc->alignment;
-args.initial_domain = rdesc->initial_domains;
+args.alignment = alignment;
+args.initial_domain = initial_domains;
  args.flags = 0;

-if (rdesc->flags & RADEON_FLAG_GTT_WC)
+if (flags & RADEON_FLAG_GTT_WC)
  args.flags |= RADEON_GEM_GTT_WC;
-if (rdesc->flags & RADEON_FLAG_CPU_ACCESS)
+if (flags & RADEON_FLAG_CPU_ACCESS)
  args.flags |= RADEON_GEM_CPU_ACCESS;
-if (rdesc->flags & RADEON_FLAG_NO_CPU_ACCESS)
+if (flags & RADEON_FLAG_NO_CPU_ACCESS)
  args.flags |= RADEON_GEM_NO_CPU_ACCESS;

  if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
  , sizeof(args))) {
  fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
  fprintf(stderr, "radeon:size  : %d bytes\n", size);
-fprintf(stderr, "radeon:alignment : %d bytes\n", desc->alignment);
+fprintf(stderr, "radeon:alignment : %d bytes\n", alignment);
  fprintf(stderr, "radeon:domains   : %d\n", args.initial_domain);
  fprintf(stderr, "radeon:flags : %d\n", args.flags);
  return NULL;
@@ -584,20 +533,21 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct 
pb_manager *_mgr,
  return NULL;

  pipe_reference_init(>base.reference, 1);
-bo->base.alignment = desc->alignment;
-bo->base.usage = desc->usage;
+bo->base.alignment = alignment;
+bo->base.usage = usage;
  bo->base.size = size;
  bo->base.vtbl = _bo_vtbl;
  bo->rws = rws;
  bo->handle = args.handle;
  bo->va = 0;
-bo->initial_domain = rdesc->initial_domains;
+bo->initial_domain = initial_domains;
  pipe_mutex_init(bo->map_mutex);
+pb_cache_init_entry(>bo_cache, >cache_entry, >base);

  if (rws->info.r600_virtual_address) {
  struct drm_radeon_gem_va va;

-bo->va = radeon_bomgr_find_va(rws, size, desc->alignment);
+bo->va = radeon_bomgr_find_va(rws, size, alignment);

  va.handle = bo->handle;
  va.vm_id = 0;
@@ -610,7 +560,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct 
pb_manager *_mgr,
  if (r && va.operation == RADEON_VA_RESULT_ERROR) {
  fprintf(stderr, "radeon: Failed to allocate virtual address for 
buffer:\n");
  fprintf(stderr, "radeon:size  : %d bytes\n", size);
-fprintf(stderr, "radeon:alignment : %d bytes\n", 
desc->alignment);
+fprintf(stderr, "radeon:alignment : %d bytes\n", alignment);
  fprintf(stderr, "radeon:domains   : %d\n", 
args.initial_domain);
  

Re: [Mesa-dev] [PATCH 00/10] Rework of pb_cache_manager for removal of radeon_winsys_cs_handle

2015-12-08 Thread Nicolai Hähnle

On 06.12.2015 19:00, Marek Olšák wrote:

This series addresses the weirdness in radeon drivers that every buffer has 2 
handles:
- pb_buffer (== pb_cache_buffer)
- radeon_winsys_cs_handle (winsys-specific pb_buffer)

The inefficiency of converting pb_cache_buffer into the winsys-specific buffer 
made me introduce radeon_winsys_cs_handle a few years ago, which has been used 
for almost everything (map/unmap/command submission/etc.) and pb_buffer has 
only held the reference keeping the buffer alive.

Now it's time to do this right.

This series moves the pb_cache_manager logic into an independent module 
"pb_cache". Next, all dependencies on pb_manager are removed from both winsyses 
and the new module is used instead. The result is that pb_buffer is equal to 
radeon_winsys_cs_handle, and the latter can be removed.


Very nice! I only have some comments on #1 and an identical remark about 
#3 & #9 (those two patches are also a bit awkward because they combine 
several seemingly unrelated changes, though I don't mind *that* much).


Modulo the comments mentioned above, the series is

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

Cheers,
Nicolai

Please review.


Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: last_gfx_fence is a winsys fence

2015-12-07 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Cc: "11.1" <mesa-sta...@lists.freedesktop.org>
---
 src/gallium/drivers/radeonsi/si_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
b/src/gallium/drivers/radeonsi/si_debug.c
index 0a4e0f9..cce665e 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -668,7 +668,7 @@ void si_check_vm_faults(struct si_context *sctx)
/* Use conservative timeout 800ms, after which we won't wait any
 * longer and assume the GPU is hung.
 */
-   screen->fence_finish(screen, sctx->last_gfx_fence, 800*1000*1000);
+   sctx->b.ws->fence_wait(sctx->b.ws, sctx->last_gfx_fence, 800*1000*1000);
 
if (!si_vm_fault_occured(sctx, ))
return;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] gallium/ddebug: add GALLIUM_DDEBUG_SKIP option

2015-12-09 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

When we know that hangs occur only very late in a reproducible run (e.g.
apitrace), we can save a lot of debugging time by skipping the flush and hang
detection for earlier draw calls.
---
 src/gallium/drivers/ddebug/dd_draw.c   | 39 +-
 src/gallium/drivers/ddebug/dd_pipe.h   |  3 +++
 src/gallium/drivers/ddebug/dd_screen.c |  9 
 3 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index b443c5b..0778099 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -588,8 +588,11 @@ dd_context_flush(struct pipe_context *_pipe,
 static void
 dd_before_draw(struct dd_context *dctx)
 {
-   if (dd_screen(dctx->base.screen)->mode == DD_DETECT_HANGS &&
-   !dd_screen(dctx->base.screen)->no_flush)
+   struct dd_screen *dscreen = dd_screen(dctx->base.screen);
+
+   if (dscreen->mode == DD_DETECT_HANGS &&
+   !dscreen->no_flush &&
+   dctx->num_draw_calls >= dscreen->skip_count)
   dd_flush_and_handle_hang(dctx, NULL, 0,
"GPU hang most likely caused by internal "
"driver commands");
@@ -598,22 +601,28 @@ dd_before_draw(struct dd_context *dctx)
 static void
 dd_after_draw(struct dd_context *dctx, struct dd_call *call)
 {
-   switch (dd_screen(dctx->base.screen)->mode) {
-   case DD_DETECT_HANGS:
-  if (!dd_screen(dctx->base.screen)->no_flush &&
-  dd_flush_and_check_hang(dctx, NULL, 0)) {
- dd_dump_call(dctx, call, PIPE_DEBUG_DEVICE_IS_HUNG);
+   struct dd_screen *dscreen = dd_screen(dctx->base.screen);
 
- /* Terminate the process to prevent future hangs. */
- dd_kill_process();
+   if (dctx->num_draw_calls >= dscreen->skip_count) {
+  switch (dscreen->mode) {
+  case DD_DETECT_HANGS:
+ if (!dscreen->no_flush &&
+dd_flush_and_check_hang(dctx, NULL, 0)) {
+dd_dump_call(dctx, call, PIPE_DEBUG_DEVICE_IS_HUNG);
+
+/* Terminate the process to prevent future hangs. */
+dd_kill_process();
+ }
+ break;
+  case DD_DUMP_ALL_CALLS:
+ dd_dump_call(dctx, call, 0);
+ break;
+  default:
+ assert(0);
   }
-  break;
-   case DD_DUMP_ALL_CALLS:
-  dd_dump_call(dctx, call, 0);
-  break;
-   default:
-  assert(0);
}
+
+   ++dctx->num_draw_calls;
 }
 
 static void
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h 
b/src/gallium/drivers/ddebug/dd_pipe.h
index 34f5920..a045518 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -45,6 +45,7 @@ struct dd_screen
unsigned timeout_ms;
enum dd_mode mode;
bool no_flush;
+   unsigned skip_count;
 };
 
 struct dd_query
@@ -110,6 +111,8 @@ struct dd_context
struct pipe_scissor_state scissors[PIPE_MAX_VIEWPORTS];
struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS];
float tess_default_levels[6];
+
+   unsigned num_draw_calls;
 };
 
 
diff --git a/src/gallium/drivers/ddebug/dd_screen.c 
b/src/gallium/drivers/ddebug/dd_screen.c
index a776580..2716845 100644
--- a/src/gallium/drivers/ddebug/dd_screen.c
+++ b/src/gallium/drivers/ddebug/dd_screen.c
@@ -290,6 +290,9 @@ ddebug_screen_create(struct pipe_screen *screen)
   puts("$HOME/"DD_DIR"/ when a hang is detected.");
   puts("If 'noflush' is specified, only detect hangs in pipe->flush.");
   puts("");
+  puts("  GALLIUM_DDEBUG_SKIP=[count]");
+  puts("Skip flush and hang detection for the given initial number of 
draw calls.");
+  puts("");
   exit(0);
}
 
@@ -349,5 +352,11 @@ ddebug_screen_create(struct pipe_screen *screen)
   assert(0);
}
 
+   dscreen->skip_count = debug_get_num_option("GALLIUM_DDEBUG_SKIP", 0);
+   if (dscreen->skip_count > 0) {
+  fprintf(stderr, "Gallium debugger skipping the first %u draw calls.\n",
+  dscreen->skip_count);
+   }
+
return >base;
 }
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] gallium/ddebug: regularly log the total number of draw calls

2015-12-09 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This helps in the use of GALLIUM_DDEBUG_SKIP: first run a target application
with skip set to a very large number and note how many draw calls happen
before the bug. Then re-run, skipping the corresponding number of calls.
Despite the additional run, this can still be much faster than not skipping
anything.
---
 src/gallium/drivers/ddebug/dd_draw.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index 0778099..0d7ee9a 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -623,6 +623,9 @@ dd_after_draw(struct dd_context *dctx, struct dd_call *call)
}
 
++dctx->num_draw_calls;
+   if (dscreen->skip_count && dctx->num_draw_calls % 1 == 0)
+  fprintf(stderr, "Gallium debugger reached %u draw calls.\n",
+  dctx->num_draw_calls);
 }
 
 static void
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] gallium r300 driver for PowerPC

2015-12-14 Thread Nicolai Hähnle

On 14.12.2015 04:10, Eero Tamminen wrote:

On 12/14/2015 10:44 AM, Herminio Hernandez, Jr. wrote:

I am new to this list. I have been trying to see if I can fix or at
least pin point an issue with Radeon r300 driver failing on PowerPC
systems. This has been a problem for a while and I would like to help
to get this fixed. I have done some debugging with valgrind and I
think I may see where the issue is but I would to have someone double
check what I am doing. So when I set my Default Depth to 16 I do get
3D acceleration but when I set to the default of 24 it breaks.
Valgrind reports memory leaks when I run glxgears with a Default Depth
of 24 but shows no definite memory leaks with a Depth of 16. I then
got the source code and created a dev environment andnran glxgears
through valgrind with my default depth of 24 and saw similar memory
leaks. Here is a sample of what I am seeing.

==25273== 108 (12 direct, 96 indirect) bytes in 1 blocks are
definitely lost in loss record 54 of 78
==25273==at 0xFFB2868: malloc (vg_replace_malloc.c:299)
==25273==by 0xED0457B: ???
==25273==by 0xEEC6F3B: ???
==25273==by 0xE95A78B: ???
==25273==by 0xED7DF7F: ???
==25273==by 0xED7D5DB: ???
==25273==by 0xEC5B377: ???
==25273==by 0xEC567EB: ???
==25273==by 0xFDEDFD3: dri2CreateScreen (dri2_glx.c:1235)
==25273==by 0xFDB866F: AllocAndFetchScreenConfigs (glxext.c:799)
==25273==by 0xFDB866F: __glXInitialize (glxext.c:910)
==25273==by 0xFDB36F3: GetGLXPrivScreenConfig.part.2 (glxcmds.c:172)
==25273==by 0xFDB396B: GetGLXPrivScreenConfig (glxcmds.c:168)
==25273==by 0xFDB396B: glXChooseVisual (glxcmds.c:1249)

It looks like the files in the src/glx branch is where the issue is. I
am attaching the summary portion of the output I got from valgrind. Am
I heading in the right direction?


Install debug symbols for the libraries that Valgrind is complaining
about, to see what actually leaks.  Because they all come from through
GetGLXPrivScreenConfig(), I think this is something (inconsequential) in
your X libraries, not Mesa.


This is below dri2CreateScreen in the call stack, so it's actually quite 
plausible that it's in the driver. Make sure you have those debug symbols.


Cheers,
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] mesa/main: use BITSET_FOREACH_SET in perf_monitor_result_size

2015-12-14 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This should make the code both faster and slightly clearer.
---
 src/mesa/main/performance_monitor.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/mesa/main/performance_monitor.c 
b/src/mesa/main/performance_monitor.c
index 98dfbea..43529b2 100644
--- a/src/mesa/main/performance_monitor.c
+++ b/src/mesa/main/performance_monitor.c
@@ -591,11 +591,10 @@ perf_monitor_result_size(const struct gl_context *ctx,
 
for (group = 0; group < ctx->PerfMonitor.NumGroups; group++) {
   const struct gl_perf_monitor_group *g = >PerfMonitor.Groups[group];
-  for (counter = 0; counter < g->NumCounters; counter++) {
- const struct gl_perf_monitor_counter *c = >Counters[counter];
+  BITSET_WORD tmp;
 
- if (!BITSET_TEST(m->ActiveCounters[group], counter))
-continue;
+  BITSET_FOREACH_SET(counter, tmp, m->ActiveCounters[group], 
g->NumCounters) {
+ const struct gl_perf_monitor_counter *c = >Counters[counter];
 
  size += sizeof(uint32_t); /* Group ID */
  size += sizeof(uint32_t); /* Counter ID */
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] radeonsi: fix perfcounter selection for SI_PC_MULTI_BLOCK layouts

2015-12-14 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

The incorrectly computed register count caused lockups.
---
 src/gallium/drivers/radeonsi/si_perfcounter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c 
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index a0ddff6..7ee1dae 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -436,7 +436,7 @@ static void si_pc_emit_select(struct r600_common_context 
*ctx,
 
dw = count + regs->num_prelude;
if (count >= regs->num_multi)
-   count += regs->num_multi;
+   dw += regs->num_multi;
radeon_set_uconfig_reg_seq(cs, regs->select0, dw);
for (idx = 0; idx < regs->num_prelude; ++idx)
radeon_emit(cs, 0);
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] gallium/radeon: remove unnecessary test in r600_pc_query_add_result

2015-12-14 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This test is a left-over of the initial development. It is unneeded and
misleading, so let's get rid of it.
---
 src/gallium/drivers/radeon/r600_perfcounter.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c 
b/src/gallium/drivers/radeon/r600_perfcounter.c
index a835aee..fad7bde 100644
--- a/src/gallium/drivers/radeon/r600_perfcounter.c
+++ b/src/gallium/drivers/radeon/r600_perfcounter.c
@@ -202,9 +202,6 @@ static void r600_pc_query_add_result(struct 
r600_common_context *ctx,
for (i = 0; i < query->num_counters; ++i) {
struct r600_pc_counter *counter = >counters[i];
 
-   if (counter->base == ~0)
-   continue;
-
for (j = 0; j < counter->dwords; ++j) {
uint32_t value = results[counter->base + j * 
counter->stride];
result->batch[i].u32 += value;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] radeonsi: add RADEON_REPLACE_SHADERS debug option

2015-12-17 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This option allows replacing a single shader by a pre-compiled ELF object
as generated by LLVM's llc, for example. This can be useful for debugging a
deterministically occuring error in shaders (and has in fact helped find
the causes of https://bugs.freedesktop.org/show_bug.cgi?id=93264).
---
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
 src/gallium/drivers/radeonsi/si_debug.c   | 94 +++
 src/gallium/drivers/radeonsi/si_pipe.c|  3 +
 src/gallium/drivers/radeonsi/si_pipe.h|  1 +
 src/gallium/drivers/radeonsi/si_shader.c  | 18 +++--
 5 files changed, 112 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index c3933b1d..556c7cc 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -87,6 +87,7 @@
 #define DBG_NO_DCC (1llu << 43)
 #define DBG_NO_DCC_CLEAR   (1llu << 44)
 #define DBG_NO_RB_PLUS (1llu << 45)
+#define DBG_REPLACE_SHADERS(1llu << 46)
 
 #define R600_MAP_BUFFER_ALIGNMENT 64
 
diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
b/src/gallium/drivers/radeonsi/si_debug.c
index c45f8c0..f50d98c 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -28,7 +28,9 @@
 #include "si_shader.h"
 #include "sid.h"
 #include "sid_tables.h"
+#include "radeon/radeon_elf_util.h"
 #include "ddebug/dd_util.h"
+#include "util/u_memory.h"
 
 
 static void si_dump_shader(struct si_shader_ctx_state *state, const char *name,
@@ -42,6 +44,98 @@ static void si_dump_shader(struct si_shader_ctx_state 
*state, const char *name,
fprintf(f, "%s\n\n", state->current->binary.disasm_string);
 }
 
+/**
+ * Shader compiles can be overridden with arbitrary ELF objects by setting
+ * the environment variable 
RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
+ */
+bool si_replace_shader(unsigned num, struct radeon_shader_binary *binary)
+{
+   const char *p = debug_get_option("RADEON_REPLACE_SHADERS", NULL);
+   const char *semicolon;
+   char *copy = NULL;
+   FILE *f;
+   long filesize, nread;
+   char *buf = NULL;
+   bool replaced = false;
+
+   if (!p)
+   return false;
+
+   while (*p) {
+   unsigned long i;
+   char *endp;
+   i = strtoul(p, , 0);
+
+   p = endp;
+   if (*p != ':') {
+   fprintf(stderr, "RADEON_REPLACE_SHADERS formatted 
badly.\n");
+   exit(1);
+   }
+   ++p;
+
+   if (i == num)
+   break;
+
+   p = strchr(p, ';');
+   if (!p)
+   return false;
+   ++p;
+   }
+   if (!*p)
+   return false;
+
+   semicolon = strchr(p, ';');
+   if (semicolon) {
+   p = copy = strndup(p, semicolon - p);
+   if (!copy) {
+   fprintf(stderr, "out of memory\n");
+   return false;
+   }
+   }
+
+   fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
+
+   f = fopen(p, "r");
+   if (!f) {
+   perror("radeonsi: failed to open file");
+   goto out_free;
+   }
+
+   if (fseek(f, 0, SEEK_END) != 0)
+   goto file_error;
+
+   filesize = ftell(f);
+   if (filesize < 0)
+   goto file_error;
+
+   if (fseek(f, 0, SEEK_SET) != 0)
+   goto file_error;
+
+   buf = MALLOC(filesize);
+   if (!buf) {
+   fprintf(stderr, "out of memory\n");
+   goto out_close;
+   }
+
+   nread = fread(buf, 1, filesize, f);
+   if (nread != filesize)
+   goto file_error;
+
+   radeon_elf_read(buf, filesize, binary);
+   replaced = true;
+
+out_close:
+   fclose(f);
+out_free:
+   FREE(buf);
+   free(copy);
+   return replaced;
+
+file_error:
+   perror("radeonsi: reading shader");
+   goto out_close;
+}
+
 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
  * read them, or use "aha -b -f file" to convert them to html.
  */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index ac13407..6a1911f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -639,6 +639,9 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)
if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
sscreen->b.debug_flags |= DBG_FS | D

[Mesa-dev] [PATCH 1/2] radeonsi: count compilations in si_compile_llvm

2015-12-17 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This changes the count slightly (because of si_generate_gs_copy_shader), but
this is only relevant for the driver-specific num-compilations query. It sets
the stage for the next commit.
---
 src/gallium/drivers/radeonsi/si_shader.c| 2 ++
 src/gallium/drivers/radeonsi/si_state_shaders.c | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 4a67276..511ed88 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3885,6 +3885,8 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
si_shader *shader,
shader->selector ? shader->selector->tokens : 
NULL);
bool dump_ir = dump_asm && !(sscreen->b.debug_flags & DBG_NO_IR);
 
+   p_atomic_inc(>b.num_compilations);
+
r = radeon_llvm_compile(mod, >binary,
r600_get_llvm_processor_name(sscreen->b.family), dump_ir, 
dump_asm, tm);
if (r)
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index f0147ce..8700590 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -634,7 +634,6 @@ static int si_shader_select(struct pipe_context *ctx,
sel->last_variant = shader;
}
state->current = shader;
-   p_atomic_inc(>screen->b.num_compilations);
pipe_mutex_unlock(sel->mutex);
return 0;
 }
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium/radeon: only dispose locally created target machine in radeon_llvm_compile

2015-12-17 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Unify the cleanup paths of the function rather than duplicating code.
---
 src/gallium/drivers/radeon/radeon_llvm_emit.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 6b2ebde..61ed940 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -188,8 +188,8 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct 
radeon_shader_binary *binar
if (mem_err) {
fprintf(stderr, "%s: %s", __FUNCTION__, err);
FREE(err);
-   LLVMDisposeTargetMachine(tm);
-   return 1;
+   rval = 1;
+   goto out;
}
 
if (0 != rval) {
@@ -205,6 +205,7 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct 
radeon_shader_binary *binar
/* Clean up */
LLVMDisposeMemoryBuffer(out_buffer);
 
+out:
if (dispose_tm) {
LLVMDisposeTargetMachine(tm);
}
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [OT] some contribution statistics

2015-12-15 Thread Nicolai Hähnle

On 15.12.2015 16:22, Kenneth Graunke wrote:

On Tuesday, December 15, 2015 02:23:07 PM Giuseppe Bilotta wrote:

The only problem with these numbers is actually the lack of a .mailmap
to normalize contributor name/emails, which obviously skews the
results a little bit towards the lower end. I don't suppose someone
has a .mailmap for Mesa contributors, or is interested in creating
one?


I actually have one of those!

http://cgit.freedesktop.org/~kwg/mesa/commit/?h=gitdm


Do you take patches?

Nicolai



--Ken



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] st/mesa: make KHR_debug output independent of context creation flags

2016-01-04 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Instead, keep track of GL_DEBUG_OUTPUT and (un)install the pipe_debug_callback
accordingly. Hardware drivers can still use the absence of the callback to
skip more expensive operations in the normal case, and users can no longer be
surprised by the need to set the debug flag at context creation time.
---
 src/mesa/state_tracker/st_context.c | 18 ++
 src/mesa/state_tracker/st_debug.c   | 70 +
 src/mesa/state_tracker/st_debug.h   |  4 +++
 src/mesa/state_tracker/st_manager.c | 64 +
 4 files changed, 93 insertions(+), 63 deletions(-)

diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index 724c3c5..31cc99d 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -80,6 +80,23 @@ DEBUG_GET_ONCE_BOOL_OPTION(mesa_mvp_dp4, "MESA_MVP_DP4", 
FALSE)
 
 
 /**
+ * Called via ctx->Driver.Enable()
+ */
+static void st_Enable(struct gl_context * ctx, GLenum cap, GLboolean state)
+{
+   struct st_context *st = st_context(ctx);
+
+   switch (cap) {
+   case GL_DEBUG_OUTPUT:
+  st_enable_debug_output(st, state);
+  break;
+   default:
+  break;
+   }
+}
+
+
+/**
  * Called via ctx->Driver.UpdateState()
  */
 void st_invalidate_state(struct gl_context * ctx, GLuint new_state)
@@ -457,5 +474,6 @@ void st_init_driver_functions(struct pipe_screen *screen,
 
st_init_vdpau_functions(functions);
 
+   functions->Enable = st_Enable;
functions->UpdateState = st_invalidate_state;
 }
diff --git a/src/mesa/state_tracker/st_debug.c 
b/src/mesa/state_tracker/st_debug.c
index 6d859c6..ac77558 100644
--- a/src/mesa/state_tracker/st_debug.c
+++ b/src/mesa/state_tracker/st_debug.c
@@ -104,3 +104,73 @@ st_print_current(void)
 }
 
 
+/**
+ * Installed as pipe_debug_callback when GL_DEBUG_OUTPUT is enabled.
+ */
+static void
+st_debug_message(void *data,
+ unsigned *id,
+ enum pipe_debug_type ptype,
+ const char *fmt,
+ va_list args)
+{
+   struct st_context *st = data;
+   enum mesa_debug_source source;
+   enum mesa_debug_type type;
+   enum mesa_debug_severity severity;
+
+   switch (ptype) {
+   case PIPE_DEBUG_TYPE_OUT_OF_MEMORY:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_ERROR;
+  severity = MESA_DEBUG_SEVERITY_MEDIUM;
+  break;
+   case PIPE_DEBUG_TYPE_ERROR:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_ERROR;
+  severity = MESA_DEBUG_SEVERITY_MEDIUM;
+  break;
+   case PIPE_DEBUG_TYPE_SHADER_INFO:
+  source = MESA_DEBUG_SOURCE_SHADER_COMPILER;
+  type = MESA_DEBUG_TYPE_OTHER;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_PERF_INFO:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_PERFORMANCE;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_INFO:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_OTHER;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_FALLBACK:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_PERFORMANCE;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_CONFORMANCE:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_OTHER;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   }
+   _mesa_gl_vdebug(st->ctx, id, source, type, severity, fmt, args);
+}
+
+void
+st_enable_debug_output(struct st_context *st, boolean enable)
+{
+   struct pipe_context *pipe = st->pipe;
+
+   if (!pipe->set_debug_callback)
+  return;
+
+   if (enable) {
+  struct pipe_debug_callback cb = { st_debug_message, st };
+  pipe->set_debug_callback(pipe, );
+   } else {
+  pipe->set_debug_callback(pipe, NULL);
+   }
+}
diff --git a/src/mesa/state_tracker/st_debug.h 
b/src/mesa/state_tracker/st_debug.h
index 288eccf..ed3ead8 100644
--- a/src/mesa/state_tracker/st_debug.h
+++ b/src/mesa/state_tracker/st_debug.h
@@ -32,6 +32,8 @@
 #include "pipe/p_compiler.h"
 #include "util/u_debug.h"
 
+struct st_context;
+
 extern void
 st_print_current(void);
 
@@ -59,6 +61,8 @@ extern int ST_DEBUG;
 
 void st_debug_init( void );
 
+void st_enable_debug_output(struct st_context *st, boolean enable);
+
 static inline void
 ST_DBG( unsigned flag, const char *fmt, ... )
 {
diff --git a/src/mesa/state_tracker/st_manager.c 
b/src/mesa/state_tracker/st_manager.c
index d0d261f..525aff7 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -623,58 +623,6 @@ st_context_destroy(struct st_context_iface *stctxi)
st_destroy_context(st);
 }
 
-static void
-st_debug_message(void *data,
- unsigned *id,
-   

[Mesa-dev] [PATCH v2] st/mesa: make KHR_debug output independent of context creation flags (v2)

2016-01-04 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Instead, keep track of GL_DEBUG_OUTPUT and (un)install the pipe_debug_callback
accordingly. Hardware drivers can still use the absence of the callback to
skip more expensive operations in the normal case, and users can no longer be
surprised by the need to set the debug flag at context creation time.

v2:
- re-add the proper initialization of debug contexts (Ilia Mirkin)
- silence a potential warning (Ilia Mirkin)
---
 src/mesa/state_tracker/st_context.c | 18 ++
 src/mesa/state_tracker/st_debug.c   | 72 +
 src/mesa/state_tracker/st_debug.h   |  4 +++
 src/mesa/state_tracker/st_manager.c | 61 +++
 4 files changed, 98 insertions(+), 57 deletions(-)

diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index 724c3c5..31cc99d 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -80,6 +80,23 @@ DEBUG_GET_ONCE_BOOL_OPTION(mesa_mvp_dp4, "MESA_MVP_DP4", 
FALSE)
 
 
 /**
+ * Called via ctx->Driver.Enable()
+ */
+static void st_Enable(struct gl_context * ctx, GLenum cap, GLboolean state)
+{
+   struct st_context *st = st_context(ctx);
+
+   switch (cap) {
+   case GL_DEBUG_OUTPUT:
+  st_enable_debug_output(st, state);
+  break;
+   default:
+  break;
+   }
+}
+
+
+/**
  * Called via ctx->Driver.UpdateState()
  */
 void st_invalidate_state(struct gl_context * ctx, GLuint new_state)
@@ -457,5 +474,6 @@ void st_init_driver_functions(struct pipe_screen *screen,
 
st_init_vdpau_functions(functions);
 
+   functions->Enable = st_Enable;
functions->UpdateState = st_invalidate_state;
 }
diff --git a/src/mesa/state_tracker/st_debug.c 
b/src/mesa/state_tracker/st_debug.c
index 6d859c6..134366d 100644
--- a/src/mesa/state_tracker/st_debug.c
+++ b/src/mesa/state_tracker/st_debug.c
@@ -104,3 +104,75 @@ st_print_current(void)
 }
 
 
+/**
+ * Installed as pipe_debug_callback when GL_DEBUG_OUTPUT is enabled.
+ */
+static void
+st_debug_message(void *data,
+ unsigned *id,
+ enum pipe_debug_type ptype,
+ const char *fmt,
+ va_list args)
+{
+   struct st_context *st = data;
+   enum mesa_debug_source source;
+   enum mesa_debug_type type;
+   enum mesa_debug_severity severity;
+
+   switch (ptype) {
+   case PIPE_DEBUG_TYPE_OUT_OF_MEMORY:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_ERROR;
+  severity = MESA_DEBUG_SEVERITY_MEDIUM;
+  break;
+   case PIPE_DEBUG_TYPE_ERROR:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_ERROR;
+  severity = MESA_DEBUG_SEVERITY_MEDIUM;
+  break;
+   case PIPE_DEBUG_TYPE_SHADER_INFO:
+  source = MESA_DEBUG_SOURCE_SHADER_COMPILER;
+  type = MESA_DEBUG_TYPE_OTHER;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_PERF_INFO:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_PERFORMANCE;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_INFO:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_OTHER;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_FALLBACK:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_PERFORMANCE;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   case PIPE_DEBUG_TYPE_CONFORMANCE:
+  source = MESA_DEBUG_SOURCE_API;
+  type = MESA_DEBUG_TYPE_OTHER;
+  severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+  break;
+   default:
+  unreachable("invalid debug type");
+   }
+   _mesa_gl_vdebug(st->ctx, id, source, type, severity, fmt, args);
+}
+
+void
+st_enable_debug_output(struct st_context *st, boolean enable)
+{
+   struct pipe_context *pipe = st->pipe;
+
+   if (!pipe->set_debug_callback)
+  return;
+
+   if (enable) {
+  struct pipe_debug_callback cb = { st_debug_message, st };
+  pipe->set_debug_callback(pipe, );
+   } else {
+  pipe->set_debug_callback(pipe, NULL);
+   }
+}
diff --git a/src/mesa/state_tracker/st_debug.h 
b/src/mesa/state_tracker/st_debug.h
index 288eccf..ed3ead8 100644
--- a/src/mesa/state_tracker/st_debug.h
+++ b/src/mesa/state_tracker/st_debug.h
@@ -32,6 +32,8 @@
 #include "pipe/p_compiler.h"
 #include "util/u_debug.h"
 
+struct st_context;
+
 extern void
 st_print_current(void);
 
@@ -59,6 +61,8 @@ extern int ST_DEBUG;
 
 void st_debug_init( void );
 
+void st_enable_debug_output(struct st_context *st, boolean enable);
+
 static inline void
 ST_DBG( unsigned flag, const char *fmt, ... )
 {
diff --git a/src/mesa/state_tracker/st_manager.c 
b/src/mesa/state_tracker/st_manager.c
index d0d261f..385e26b 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -39,6 +39,7 @@
 #include &

[Mesa-dev] [PATCH 5/6] gallium/radeon: send LLVM diagnostics as debug messages

2015-12-30 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Diagnostics sent during code generation and the every error message reported
by LLVMTargetMachineEmitToMemoryBuffer are disjoint reporting mechanisms. We
take care of both and also send an explicit message indicating failure at the
end, so that log parsers can more easily tell the boundary between shader
compiles.

Removed an fprintf that could never be triggered.
---
 src/gallium/drivers/radeon/radeon_llvm_emit.c | 61 ---
 1 file changed, 45 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index d0168f1..62e06ca 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -125,16 +125,44 @@ LLVMTargetRef radeon_llvm_get_r600_target(const char 
*triple)
return target;
 }
 
+struct radeon_llvm_diagnostics {
+   struct r600_common_context *rctx;
+   unsigned retval;
+};
+
 static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
 {
-   if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) {
-   unsigned int *diagnosticflag = (unsigned int *)context;
-   char *diaginfo_message = LLVMGetDiagInfoDescription(di);
+   struct radeon_llvm_diagnostics *diag = (struct radeon_llvm_diagnostics 
*)context;
+   LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
+   char *description = LLVMGetDiagInfoDescription(di);
+   const char *severity_str = NULL;
+
+   switch (severity) {
+   case LLVMDSError:
+   severity_str = "error";
+   break;
+   case LLVMDSWarning:
+   severity_str = "warning";
+   break;
+   case LLVMDSRemark:
+   severity_str = "remark";
+   break;
+   case LLVMDSNote:
+   severity_str = "note";
+   break;
+   default:
+   severity_str = "unknown";
+   }
+
+   pipe_debug_message(>rctx->debug, SHADER_INFO,
+  "LLVM diagnostic (%s): %s", severity_str, 
description);
 
-   *diagnosticflag = 1;
-   fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", 
diaginfo_message);
-   LLVMDisposeMessage(diaginfo_message);
+   if (severity == LLVMDSError) {
+   diag->retval = 1;
+   fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", 
description);
}
+
+   LLVMDisposeMessage(description);
 }
 
 /**
@@ -147,19 +175,21 @@ unsigned radeon_llvm_compile(struct r600_common_context 
*rctx,
 const char *gpu_family, bool dump_ir, bool 
dump_asm,
 LLVMTargetMachineRef tm)
 {
-
+   struct radeon_llvm_diagnostics diag;
char cpu[CPU_STRING_LEN];
char fs[FS_STRING_LEN];
char *err;
bool dispose_tm = false;
LLVMContextRef llvm_ctx;
-   unsigned rval = 0;
LLVMMemoryBufferRef out_buffer;
unsigned buffer_size;
const char *buffer_data;
char triple[TRIPLE_STRING_LEN];
LLVMBool mem_err;
 
+   diag.rctx = rctx;
+   diag.retval = 0;
+
if (!tm) {
strncpy(triple, "r600--", TRIPLE_STRING_LEN);
LLVMTargetRef target = radeon_llvm_get_r600_target(triple);
@@ -180,8 +210,7 @@ unsigned radeon_llvm_compile(struct r600_common_context 
*rctx,
/* Setup Diagnostic Handler*/
llvm_ctx = LLVMGetModuleContext(M);
 
-   LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, 
);
-   rval = 0;
+   LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, 
);
 
/* Compile IR*/
mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, 
,
@@ -190,15 +219,13 @@ unsigned radeon_llvm_compile(struct r600_common_context 
*rctx,
/* Process Errors/Warnings */
if (mem_err) {
fprintf(stderr, "%s: %s", __FUNCTION__, err);
+   pipe_debug_message(>debug, SHADER_INFO,
+  "LLVM emit error: %s", err);
FREE(err);
-   rval = 1;
+   diag.retval = 1;
goto out;
}
 
-   if (0 != rval) {
-   fprintf(stderr, "%s: Processing Diag Flag\n", __FUNCTION__);
-   }
-
/* Extract Shader Code*/
buffer_size = LLVMGetBufferSize(out_buffer);
buffer_data = LLVMGetBufferStart(out_buffer);
@@ -212,5 +239,7 @@ out:
if (dispose_tm) {
LLVMDisposeTargetMachine(tm);
}
-   return rval;
+   if (diag.retval != 0)
+   pipe_debug_message(>debug, SHADER_INFO, "LLVM compile 
failed");
+   return diag.retval;
 }
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] gallium/radeon: cleanup dump parameters to radeon_llvm_compile

2015-12-30 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Now that the functions gets a context pointer, it can determine IR and ASM
dumping by itself.

The dump parameter is still required because we cannot easily tell the shader
type at this point (one might argue that the separate enable flags for the
different types offer little value, but that would be a separate change).
---
 src/gallium/drivers/r600/r600_llvm.c  | 2 +-
 src/gallium/drivers/radeon/radeon_llvm_emit.c | 6 +++---
 src/gallium/drivers/radeon/radeon_llvm_emit.h | 2 +-
 src/gallium/drivers/radeonsi/si_shader.c  | 7 +++
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index a68f265..b72401d 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -923,7 +923,7 @@ unsigned r600_llvm_compile(
const char * gpu_family = r600_get_llvm_processor_name(family);
 
memset(, 0, sizeof(struct radeon_shader_binary));
-   r = radeon_llvm_compile(rctx, mod, , gpu_family, dump, dump, 
NULL);
+   r = radeon_llvm_compile(rctx, mod, , gpu_family, dump, NULL);
 
r = r600_create_shader(bc, , use_kill);
 
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 62e06ca..03dd5dd 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -172,7 +172,7 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef 
di, void *context)
  */
 unsigned radeon_llvm_compile(struct r600_common_context *rctx,
 LLVMModuleRef M, struct radeon_shader_binary 
*binary,
-const char *gpu_family, bool dump_ir, bool 
dump_asm,
+const char *gpu_family, bool dump,
 LLVMTargetMachineRef tm)
 {
struct radeon_llvm_diagnostics diag;
@@ -198,14 +198,14 @@ unsigned radeon_llvm_compile(struct r600_common_context 
*rctx,
}
strncpy(cpu, gpu_family, CPU_STRING_LEN);
memset(fs, 0, sizeof(fs));
-   if (dump_asm)
+   if (dump && !(rctx->screen->debug_flags & DBG_NO_ASM))
strncpy(fs, "+DumpCode", FS_STRING_LEN);
tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
  LLVMCodeGenLevelDefault, LLVMRelocDefault,
  LLVMCodeModelDefault);
dispose_tm = true;
}
-   if (dump_ir)
+   if (dump && !(rctx->screen->debug_flags & DBG_NO_IR))
LLVMDumpModule(M);
/* Setup Diagnostic Handler*/
llvm_ctx = LLVMGetModuleContext(M);
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.h 
b/src/gallium/drivers/radeon/radeon_llvm_emit.h
index be72c6b..e15d2f9 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.h
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.h
@@ -40,7 +40,7 @@ LLVMTargetRef radeon_llvm_get_r600_target(const char *triple);
 
 unsigned radeon_llvm_compile(struct r600_common_context *rctx,
 LLVMModuleRef M, struct radeon_shader_binary 
*binary,
-const char *gpu_family, bool dump_ir, bool 
dump_asm,
+const char *gpu_family, bool dump,
 LLVMTargetMachineRef tm);
 
 #endif /* RADEON_LLVM_EMIT_H */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 3f5690e..6102845 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3924,17 +3924,16 @@ int si_compile_llvm(struct si_context *sctx, struct 
si_shader *shader,
 {
struct si_screen *sscreen = sctx->screen;
int r = 0;
-   bool dump_asm = r600_can_dump_shader(>b,
+   bool dump = r600_can_dump_shader(>b,
shader->selector ? shader->selector->tokens : 
NULL);
-   bool dump_ir = dump_asm && !(sscreen->b.debug_flags & DBG_NO_IR);
unsigned count = p_atomic_inc_return(>b.num_compilations);
 
-   if (dump_ir || dump_asm)
+   if (dump)
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
 
if (!si_replace_shader(count, >binary)) {
r = radeon_llvm_compile(>b, mod, >binary,
-   r600_get_llvm_processor_name(sscreen->b.family), 
dump_ir, dump_asm, tm);
+   r600_get_llvm_processor_name(sscreen->b.family), dump, 
tm);
if (r)
return r;
}
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] gallium/radeon: pass r600_common_context into radeon_llvm_compile

2015-12-30 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This will allow us to send shader debug info via the context's debug callback.
---
 src/gallium/drivers/r600/evergreen_compute.c  | 2 +-
 src/gallium/drivers/r600/r600_llvm.c  | 3 ++-
 src/gallium/drivers/r600/r600_llvm.h  | 2 ++
 src/gallium/drivers/r600/r600_shader.c| 2 +-
 src/gallium/drivers/radeon/radeon_llvm_emit.c | 5 -
 src/gallium/drivers/radeon/radeon_llvm_emit.h | 4 +++-
 src/gallium/drivers/radeonsi/si_shader.c  | 2 +-
 7 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index d83eb17..1db107a 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -600,7 +600,7 @@ static void evergreen_launch_grid(
ctx->screen->has_compressed_msaa_texturing);
 bc->type = TGSI_PROCESSOR_COMPUTE;
 bc->isa = ctx->isa;
-r600_llvm_compile(mod, ctx->b.family, bc, _kill, dump);
+r600_llvm_compile(>b, mod, ctx->b.family, bc, _kill, 
dump);
 
 if (dump && !sb_disasm) {
 r600_bytecode_disasm(bc);
diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 1cc3031..a68f265 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -911,6 +911,7 @@ unsigned r600_create_shader(struct r600_bytecode *bc,
 }
 
 unsigned r600_llvm_compile(
+   struct r600_common_context *rctx,
LLVMModuleRef mod,
enum radeon_family family,
struct r600_bytecode *bc,
@@ -922,7 +923,7 @@ unsigned r600_llvm_compile(
const char * gpu_family = r600_get_llvm_processor_name(family);
 
memset(, 0, sizeof(struct radeon_shader_binary));
-   r = radeon_llvm_compile(mod, , gpu_family, dump, dump, NULL);
+   r = radeon_llvm_compile(rctx, mod, , gpu_family, dump, dump, 
NULL);
 
r = r600_create_shader(bc, , use_kill);
 
diff --git a/src/gallium/drivers/r600/r600_llvm.h 
b/src/gallium/drivers/r600/r600_llvm.h
index 9b5304d..5b091b9 100644
--- a/src/gallium/drivers/r600/r600_llvm.h
+++ b/src/gallium/drivers/r600/r600_llvm.h
@@ -7,6 +7,7 @@
 #include "radeon/radeon_llvm.h"
 #include 
 
+struct r600_common_context;
 struct r600_bytecode;
 struct r600_shader_ctx;
 struct radeon_llvm_context;
@@ -18,6 +19,7 @@ LLVMModuleRef r600_tgsi_llvm(
const struct tgsi_token * tokens);
 
 unsigned r600_llvm_compile(
+   struct r600_common_context *rctx,
LLVMModuleRef mod,
enum radeon_family family,
struct r600_bytecode *bc,
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index d411b0b..60d98a9 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -3259,7 +3259,7 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
ctx.shader->has_txq_cube_array_z_comp = 
radeon_llvm_ctx.has_txq_cube_array_z_comp;
ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers;
 
-   if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, 
_kill, dump)) {
+   if (r600_llvm_compile(>b, mod, rscreen->b.family, ctx.bc, 
_kill, dump)) {
radeon_llvm_dispose(_llvm_ctx);
use_llvm = 0;
fprintf(stderr, "R600 LLVM backend failed to compile "
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 61ed940..d0168f1 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -23,8 +23,10 @@
  * Authors: Tom Stellard <thomas.stell...@amd.com>
  *
  */
+
 #include "radeon_llvm_emit.h"
 #include "radeon_elf_util.h"
+#include "r600_pipe_common.h"
 #include "c11/threads.h"
 #include "gallivm/lp_bld_misc.h"
 #include "util/u_memory.h"
@@ -140,7 +142,8 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef 
di, void *context)
  *
  * @returns 0 for success, 1 for failure
  */
-unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary 
*binary,
+unsigned radeon_llvm_compile(struct r600_common_context *rctx,
+LLVMModuleRef M, struct radeon_shader_binary 
*binary,
 const char *gpu_family, bool dump_ir, bool 
dump_asm,
 LLVMTargetMachineRef tm)
 {
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.h 
b/src/gallium/drivers/radeon/radeon_llvm_emit.h
index e20aed9..be72c6b 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.h
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.h
@@ -31,13 +31,15 @@
 #inc

[Mesa-dev] [PATCH 3/6] radeonsi: send shader info as debug messages in addition to stderr output

2015-12-30 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

The output via stderr is very helpful for ad-hoc debugging tasks, so that 
remains
unchanged, but having the information available via debug messages as well
will allow the use of parallel shader-db runs.

Shader stats are always provided (if the context is a debug context, that is),
but you still have to enable the appropriate R600_DEBUG flags to get
disassembly (since it is rather spammy and is only generated by LLVM when we
explicitly ask for it).
---
 src/gallium/drivers/radeonsi/si_shader.c | 69 +---
 1 file changed, 55 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 606d571..bc9887b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3840,11 +3840,57 @@ int si_shader_binary_upload(struct si_screen *sscreen, 
struct si_shader *shader)
return 0;
 }
 
+static void si_shader_dump_disassembly(struct si_context *sctx,
+  const struct radeon_shader_binary 
*binary)
+{
+   char *line, *p;
+   unsigned i, count;
+
+   if (binary->disasm_string) {
+   fprintf(stderr, "\nShader Disassembly:\n\n");
+   fprintf(stderr, "%s\n", binary->disasm_string);
+
+   if (sctx->b.debug.debug_message) {
+   /* Very long debug messages are cut off, so send the
+* disassembly one line at a time. This causes more
+* overhead, but on the plus side it simplifies
+* parsing of resulting logs.
+*/
+   pipe_debug_message(>b.debug, SHADER_INFO,
+   "Shader Disassembly Begin");
+
+   line = binary->disasm_string;
+   while (*line) {
+   p = strchrnul(line, '\n');
+   count = p - line;
+
+   if (count) {
+   pipe_debug_message(>b.debug, 
SHADER_INFO,
+  "%.*s", count, line);
+   }
+
+   if (!*p)
+   break;
+   line = p + 1;
+   }
+
+   pipe_debug_message(>b.debug, SHADER_INFO,
+   "Shader Disassembly End");
+   }
+   } else {
+   fprintf(stderr, "SI CODE:\n");
+   for (i = 0; i < binary->code_size; i += 4) {
+   fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i,
+   binary->code[i + 3], binary->code[i + 2],
+   binary->code[i + 1], binary->code[i]);
+   }
+   }
+}
+
 int si_shader_binary_read(struct si_context *sctx, struct si_shader *shader)
 {
struct si_screen *sscreen = sctx->screen;
const struct radeon_shader_binary *binary = >binary;
-   unsigned i;
int r;
bool dump  = r600_can_dump_shader(>b,
shader->selector ? shader->selector->tokens : NULL);
@@ -3855,19 +3901,8 @@ int si_shader_binary_read(struct si_context *sctx, 
struct si_shader *shader)
return r;
 
if (dump) {
-   if (!(sscreen->b.debug_flags & DBG_NO_ASM)) {
-   if (binary->disasm_string) {
-   fprintf(stderr, "\nShader Disassembly:\n\n");
-   fprintf(stderr, "%s\n", binary->disasm_string);
-   } else {
-   fprintf(stderr, "SI CODE:\n");
-   for (i = 0; i < binary->code_size; i+=4 ) {
-   fprintf(stderr, "@0x%x: 
%02x%02x%02x%02x\n", i, binary->code[i + 3],
-   binary->code[i + 2], binary->code[i + 
1],
-   binary->code[i]);
-   }
-   }
-   }
+   if (!(sscreen->b.debug_flags & DBG_NO_ASM))
+   si_shader_dump_disassembly(sctx, binary);
 
fprintf(stderr, "*** SHADER STATS ***\n"
"SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d 
blocks\n"
@@ -3875,6 +3910,12 @@ int si_shader_binary_read(struct si_context *sctx, 
struct si_shader *shader)
shader->num_sgprs, shader->num_vgprs, binary->code_size,
shader->lds_s

[Mesa-dev] [PATCH 1/6] gallium/radeon: implement set_debug_callback

2015-12-30 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

---
 src/gallium/drivers/radeon/r600_pipe_common.c | 12 
 src/gallium/drivers/radeon/r600_pipe_common.h |  2 ++
 2 files changed, 14 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 9a5e987..41c7aa5 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -227,6 +227,17 @@ static enum pipe_reset_status r600_get_reset_status(struct 
pipe_context *ctx)
return PIPE_UNKNOWN_CONTEXT_RESET;
 }
 
+static void r600_set_debug_callback(struct pipe_context *ctx,
+   const struct pipe_debug_callback *cb)
+{
+   struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+
+   if (cb)
+   rctx->debug = *cb;
+   else
+   memset(>debug, 0, sizeof(rctx->debug));
+}
+
 bool r600_common_context_init(struct r600_common_context *rctx,
  struct r600_common_screen *rscreen)
 {
@@ -252,6 +263,7 @@ bool r600_common_context_init(struct r600_common_context 
*rctx,
rctx->b.transfer_inline_write = u_default_transfer_inline_write;
 rctx->b.memory_barrier = r600_memory_barrier;
rctx->b.flush = r600_flush_from_st;
+   rctx->b.set_debug_callback = r600_set_debug_callback;
 
if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
rctx->b.get_device_reset_status = r600_get_reset_status;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index c3933b1d..a69e627 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -440,6 +440,8 @@ struct r600_common_context {
 * the GPU addresses are updated. */
struct list_headtexture_buffers;
 
+   struct pipe_debug_callback  debug;
+
/* Copy one resource to another using async DMA. */
void (*dma_copy)(struct pipe_context *ctx,
 struct pipe_resource *dst,
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [shader-db PATCH 2/5] si-report.py: speed up and adjust to debug message reporting, allowing multi-threaded runs

2015-12-30 Thread Nicolai Hähnle
Benefit from recent changes to Mesa master which allows correct reporting
in multi-threaded runs, and generally more robust reporting when comparing
different runs, e.g. that differ in the set of tests that were run.

Parsing also becomes much faster because we (a) do not recompile regexes in
the inner iterations, (b) have fewer lines to parse in total, and (c) are
more selective about when we try to match which regular expression.

While we're at it, parse out S_NOP instructions from the disassembly and
report wait states.
---
 README   |   4 +-
 si-report.py | 208 ++-
 2 files changed, 136 insertions(+), 76 deletions(-)

diff --git a/README b/README
index 06294c9..9561c19 100644
--- a/README
+++ b/README
@@ -37,12 +37,10 @@ to run.
 
 === Running shaders ===
 
-./run shaders -1 2> new-run
+./run shaders > new-run 2> /dev/null
 
 Note that a debug mesa build required (ie. --enable-debug)
 
--1 option for disabling multi-threading is required to avoid garbled shader 
dumps.
-
 === Analysis ===
 ./si-report.py old-run new-run
 
diff --git a/si-report.py b/si-report.py
index ec88112..e4aea40 100755
--- a/si-report.py
+++ b/si-report.py
@@ -23,6 +23,8 @@
 # DEALINGS IN THE SOFTWARE.
 #
 
+from collections import defaultdict
+import itertools
 import re
 import sys
 
@@ -65,6 +67,10 @@ def get_scratch_str(value, suffixes = True):
 suffix = 'bytes per wave'
 return get_value_str(value, 'Scratch', suffix)
 
+def get_waitstates_str(value, suffixes = True):
+suffix = ''
+return get_value_str(value, 'Wait states', suffix)
+
 def calculate_percent_change(b, a):
 if b == 0:
 return 0
@@ -89,15 +95,17 @@ class si_stats:
 self.code_size = 0
 self.lds = 0
 self.scratch = 0
+self.waitstates = 0
 
 
 def to_string(self, suffixes = True):
-return "{}{}{}{}{}".format(
+return "{}{}{}{}{}{}".format(
 get_sgpr_str(self.sgprs, suffixes),
 get_vgpr_str(self.vgprs, suffixes),
 get_code_size_str(self.code_size, suffixes),
 get_lds_str(self.lds, suffixes),
-get_scratch_str(self.scratch, suffixes))
+get_scratch_str(self.scratch, suffixes),
+get_waitstates_str(self.waitstates, suffixes))
 
 
 def __str__(self):
@@ -109,6 +117,7 @@ class si_stats:
 self.code_size += other.code_size
 self.lds += other.lds
 self.scratch += other.scratch
+self.waitstates += other.waitstates
 
 def update(self, comp, cmp_fn):
 for name in self.__dict__.keys():
@@ -153,56 +162,76 @@ class si_stats:
 return False
 return True
 
-def get_results(filename):
-file = open(filename, "r")
-lines = file.read().split('\n')
-
-results = []
-current_stats = si_stats()
-
-for line in lines:
-re_start = re.compile("^\*\*\* SHADER STATS \*\*\*$")
-re_sgprs = re.compile("^SGPRS: ([0-9]+)$")
-re_vgprs = re.compile("^VGPRS: ([0-9]+)$")
-re_code_size = re.compile("^Code Size: ([0-9]+) bytes$")
-re_lds = re.compile("^LDS: ([0-9]+) blocks$")
-re_scratch = re.compile("^Scratch: ([0-9]+) bytes per wave$")
-re_end = re.compile("^\*+$")
-
-# First line of stats
-match = re.search(re_start, line)
-if match:
-continue
 
-match = re.search(re_sgprs, line)
-if match:
-current_stats.sgprs = int(match.groups()[0])
-continue
+class si_parser(object):
+re_stats = re.compile(
+r"^Shader Stats: SGPRS: ([0-9]+) VGPRS: ([0-9]+) Code Size: ([0-9]+) "+
+r"LDS: ([0-9]+) Scratch: ([0-9]+)$")
+re_nop = re.compile("^\ts_nop ([0-9]+)")
 
-match = re.search(re_vgprs, line)
-if match:
-current_stats.vgprs = int(match.groups()[0])
-continue
-
-match = re.search(re_code_size, line)
-if match:
-current_stats.code_size = int(match.groups()[0])
-continue
-
-match = re.search(re_lds, line)
-if match:
-current_stats.lds = int(match.groups()[0])
-continue
+def __init__(self):
+self._stats = None
+self._in_disasm = False
+
+def finish(self):
+return self._stats
+
+def parse(self, msg):
+if not self._in_disasm:
+if msg == "Shader Disassembly Begin":
+old_stats = self._stats
+self._stats = si_stats()
+self._in_disasm = True
+return old_stats
+
+match = si_parser.re_stats.match(msg)
+if match is not None:
+self._stats.sgprs = int(match.group(1))
+self._stats.vgprs = int(match.group(2))
+self._stats.code_size = int(match.group(3))
+self._stats.lds = int(match.group(4))
+

Re: [Mesa-dev] [PATCH] gallium/r600: Replace ALIGN_DIVUP with DIV_ROUND_UP

2015-12-30 Thread Nicolai Hähnle

On 30.12.2015 13:44, Krzysztof A. Sobiecki wrote:

Nicolai Hähnle <nhaeh...@gmail.com> writes:


On 30.12.2015 08:42, Krzysztof A. Sobiecki wrote:

Nicolai Hähnle <nhaeh...@gmail.com> writes:


On 29.12.2015 14:27, Krzysztof A. Sobiecki wrote:

From: Krzysztof Sobiecki <sob...@gmail.com>

ALIGN_DIVUP is a driver specific(r600g) macro that duplicates DIV_ROUND_UP 
functionality.
Replacing it with DIV_ROUND_UP eliminates this problems.


Those macros are actually slightly different, and the assembly
generated by the ALIGN_DIVUP looks clearly better to me.

I remember seeing a very long thread about this not so long ago - what
was the resolution there?

Cheers,
Nicolai


I would like to remove ALIGN_DIVUP first and then debate with
implementation DIV_ROUND_UP should use.

btw. I prefer 1 + ((x - 1) / y)


That produces an incorrect result when x is an unsigned type and equal
to 0 -- and that is something that existing code definitely relies on.

Cheers,
Nicolai


Then what about (x / y) + (i % y != 0)


Generates similar assembly to the DIV_ROUND_UP version.

Anyway, now that I look at it again I'd say just go ahead and add my 
R-b. Yes, the assembly looks slightly worse, but only slightly, and 
avoiding surprises with overflows down the line seems like a good idea.


Cheers,
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [shader-db PATCH 3/5] si-report.py: report LLVM compile errors

2015-12-30 Thread Nicolai Hähnle
No need to report details of those errors, but complain when errors are
encountered so they aren't ignored.
---
 si-report.py | 38 +-
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/si-report.py b/si-report.py
index e4aea40..9df2012 100755
--- a/si-report.py
+++ b/si-report.py
@@ -90,6 +90,7 @@ def cmp_min_per(current, comp):
 
 class si_stats:
 def __init__(self):
+self.error = False
 self.sgprs = 0
 self.vgprs = 0
 self.code_size = 0
@@ -194,6 +195,15 @@ class si_parser(object):
 old_stats = self._stats
 self._stats = None
 return old_stats
+
+if msg == "LLVM compile failed":
+old_stats = self._stats
+self._stats = None
+
+if old_stats is None:
+old_stats = si_stats()
+old_stats.error = True
+return old_stats
 else:
 if msg == "Shader Disassembly End":
 self._in_disasm = False
@@ -307,12 +317,15 @@ def compare_results(before_all_results, 
after_all_results):
 num_affected = 0
 num_tests = 0
 num_shaders = 0
+num_after_errors = 0
+num_before_errors = 0
 
 all_names = set(itertools.chain(before_all_results.keys(), 
after_all_results.keys()))
 
 only_after_names = []
 only_before_names = []
 count_mismatch_names = []
+errors_names = []
 
 for name in all_names:
 before_test_results = before_all_results.get(name)
@@ -329,8 +342,17 @@ def compare_results(before_all_results, after_all_results):
 count_mismatch_names.append(name)
 
 num_tests += 1
+have_error = False
 
 for before, after in zip(before_test_results, after_test_results):
+if before.error:
+num_before_errors += 1
+if after.error:
+num_after_errors += 1
+if after.error or before.error:
+have_error = True
+continue
+
 total_before.add(before)
 total_after.add(after)
 num_shaders += 1
@@ -347,6 +369,9 @@ def compare_results(before_all_results, after_all_results):
 max_increase_unit.update(comp, cmp_max_unit)
 max_decrease_unit.update(comp, cmp_min_unit)
 
+if have_error:
+errors_names.append(name)
+
 print '{} shaders in {} tests'.format(num_shaders, num_tests)
 print "Totals:"
 print_before_after_stats(total_before, total_after)
@@ -371,16 +396,19 @@ def compare_results(before_all_results, 
after_all_results):
 
 def report_ignored(names, what):
 if names:
-print "*** Tests {} are ignored:".format(what)
+print "*** {} are ignored:".format(what)
 s = ', '.join(names[:5])
 if len(names) > 5:
 s += ', and {} more'.format(len(names) - 5)
 print s
 
-report_ignored(only_after_names, "only in 'after' results")
-report_ignored(only_before_names, "only in 'before' results")
-report_ignored(count_mismatch_names, "with different number of shaders")
-
+report_ignored(only_after_names, "Tests only in 'after' results")
+report_ignored(only_before_names, "Tests only in 'before' results")
+report_ignored(count_mismatch_names, "Tests with different number of 
shaders")
+report_ignored(errors_names, "Shaders with compilation errors")
+if num_after_errors > 0 or num_before_errors > 0:
+print "*** Compile errors encountered! (before: {}, after: {})".format(
+num_before_errors, num_after_errors)
 
 def main():
 before = sys.argv[1]
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [shader-db PATCH 4/5] si-report.py: reduce code duplication in the definition of metrics

2015-12-30 Thread Nicolai Hähnle
---
 si-report.py | 90 +---
 1 file changed, 32 insertions(+), 58 deletions(-)

diff --git a/si-report.py b/si-report.py
index 9df2012..bb6ea6d 100755
--- a/si-report.py
+++ b/si-report.py
@@ -43,34 +43,6 @@ def get_value_str(value, prefix, suffix):
 space = ''
 return "{}: {}{}{}\n".format(prefix, get_str(value), space, suffix)
 
-def get_sgpr_str(value, suffixes = True):
-return get_value_str(value, 'SGPRS', '')
-
-def get_vgpr_str(value, suffixes = True):
-return get_value_str(value, 'VGPRS', '')
-
-def get_code_size_str(value, suffixes = True):
-suffix = ''
-if suffixes:
-suffix = 'bytes'
-return get_value_str(value, 'Code Size', suffix)
-
-def get_lds_str(value, suffixes = True):
-suffix = ''
-if suffixes:
-suffix = 'blocks'
-return get_value_str(value, 'LDS', suffix)
-
-def get_scratch_str(value, suffixes = True):
-suffix = ''
-if suffixes:
-suffix = 'bytes per wave'
-return get_value_str(value, 'Scratch', suffix)
-
-def get_waitstates_str(value, suffixes = True):
-suffix = ''
-return get_value_str(value, 'Wait states', suffix)
-
 def calculate_percent_change(b, a):
 if b == 0:
 return 0
@@ -89,39 +61,41 @@ def cmp_min_per(current, comp):
 return calculate_percent_change(comp[1], comp[2]) < 
calculate_percent_change(current[1], current[2])
 
 class si_stats:
+metrics = [
+('sgprs', 'SGPRS', ''),
+('vgprs', 'VGPRS', ''),
+('code_size', 'Code Size', 'bytes'),
+('lds', 'LDS', 'blocks'),
+('scratch', 'Scratch', 'bytes per wave'),
+('waitstates', 'Wait states', ''),
+]
+
 def __init__(self):
 self.error = False
-self.sgprs = 0
-self.vgprs = 0
-self.code_size = 0
-self.lds = 0
-self.scratch = 0
-self.waitstates = 0
 
+for name in self.get_metrics():
+self.__dict__[name] = 0
 
 def to_string(self, suffixes = True):
-return "{}{}{}{}{}{}".format(
-get_sgpr_str(self.sgprs, suffixes),
-get_vgpr_str(self.vgprs, suffixes),
-get_code_size_str(self.code_size, suffixes),
-get_lds_str(self.lds, suffixes),
-get_scratch_str(self.scratch, suffixes),
-get_waitstates_str(self.waitstates, suffixes))
+strings = []
+for name, printname, suffix in si_stats.metrics:
+if not suffixes:
+suffix = ''
+strings.append(get_value_str(self.__dict__[name], printname, 
suffix))
+return ''.join(strings)
 
+def get_metrics(self):
+return [m[0] for m in si_stats.metrics]
 
 def __str__(self):
 return self.to_string()
 
 def add(self, other):
-self.sgprs += other.sgprs
-self.vgprs += other.vgprs
-self.code_size += other.code_size
-self.lds += other.lds
-self.scratch += other.scratch
-self.waitstates += other.waitstates
+for name in self.get_metrics():
+self.__dict__[name] += other.__dict__[name]
 
 def update(self, comp, cmp_fn):
-for name in self.__dict__.keys():
+for name in self.get_metrics():
 current = self.__dict__[name]
 if type(current) != tuple:
 current = (0, 0, 0)
@@ -129,7 +103,7 @@ class si_stats:
 self.__dict__[name] = comp.__dict__[name]
 
 def update_max(self, comp):
-for name in self.__dict__.keys():
+for name in self.get_metrics():
 current = self.__dict__[name]
 if type(current) == tuple:
 current = self.__dict__[name][0]
@@ -137,7 +111,7 @@ class si_stats:
 self.__dict__[name] = comp.__dict__[name]
 
 def update_min(self, comp):
-for name in self.__dict__.keys():
+for name in self.get_metrics():
 current = self.__dict__[name]
 if type(current) == tuple:
 current = self.__dict__[name][0]
@@ -145,17 +119,17 @@ class si_stats:
 self.__dict__[name] = comp.__dict__[name]
 
 def update_increase(self, comp):
-for name in self.__dict__.keys():
+for name in self.get_metrics():
 if comp.__dict__[name][0] > 0:
 self.__dict__[name] += 1
 
 def update_decrease(self, comp):
-for name in self.__dict__.keys():
+for name in self.get_metrics():
 if comp.__dict__[name][0] < 0:
 self.__dict__[name] += 1
 
 def is_empty(self):
-for name in self.__dict__.keys():
+for name in self.get_metrics():
 x = self.__dict__[name]
 if type(x) == tuple and x[0] is not 0:
 return False
@@ -248,7 +222,7 @@ def get_results(filename):
 
 def compare_stats(before, after):
 result = si_stats()
-for name in 

[Mesa-dev] [shader-db PATCH 5/5] si-report.py: report the tests causing max increase/decrease

2015-12-30 Thread Nicolai Hähnle
---
 si-report.py | 51 +--
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/si-report.py b/si-report.py
index bb6ea6d..3156639 100755
--- a/si-report.py
+++ b/si-report.py
@@ -37,12 +37,6 @@ def get_str(value, suffix = ' %'):
 else:
 return value
 
-def get_value_str(value, prefix, suffix):
-space = ' '
-if len(suffix) == 0:
-space = ''
-return "{}: {}{}{}\n".format(prefix, get_str(value), space, suffix)
-
 def calculate_percent_change(b, a):
 if b == 0:
 return 0
@@ -76,12 +70,32 @@ class si_stats:
 for name in self.get_metrics():
 self.__dict__[name] = 0
 
+self._minmax_testname = {}
+
+def copy(self):
+copy = si_stats()
+copy.error = self.error
+
+for name in self.get_metrics():
+copy.__dict__[name] = self.__dict__[name]
+
+copy._minmax_testname = self._minmax_testname.copy()
+
+return copy
+
 def to_string(self, suffixes = True):
 strings = []
 for name, printname, suffix in si_stats.metrics:
-if not suffixes:
-suffix = ''
-strings.append(get_value_str(self.__dict__[name], printname, 
suffix))
+string = "{}: {}".format(printname, get_str(self.__dict__[name]))
+
+if suffixes and len(suffix) > 0:
+string += ' ' + suffix
+
+minmax_testname = self._minmax_testname.get(name)
+if minmax_testname is not None:
+string += ' (in {})'.format(minmax_testname)
+
+strings.append(string + '\n')
 return ''.join(strings)
 
 def get_metrics(self):
@@ -94,13 +108,14 @@ class si_stats:
 for name in self.get_metrics():
 self.__dict__[name] += other.__dict__[name]
 
-def update(self, comp, cmp_fn):
+def update(self, comp, cmp_fn, testname):
 for name in self.get_metrics():
 current = self.__dict__[name]
 if type(current) != tuple:
 current = (0, 0, 0)
 if cmp_fn(current, comp.__dict__[name]):
 self.__dict__[name] = comp.__dict__[name]
+self._minmax_testname[name] = testname
 
 def update_max(self, comp):
 for name in self.get_metrics():
@@ -251,14 +266,14 @@ def print_before_after_stats(before, after, divisor = 1):
 print result
 
 def print_cmp_stats(comp):
-result = si_stats()
+result = comp.copy()
 for name in result.get_metrics():
-if type(comp.__dict__[name]) != tuple:
+if type(result.__dict__[name]) != tuple:
 a = 0
 b = 0
 else:
-b = comp.__dict__[name][1]
-a = comp.__dict__[name][2]
+b = result.__dict__[name][1]
+a = result.__dict__[name][2]
 if b == 0:
 percent = format_float(0.0)
 else:
@@ -338,10 +353,10 @@ def compare_results(before_all_results, 
after_all_results):
 total_affected_after.add(after)
 increases.update_increase(comp)
 decreases.update_decrease(comp)
-max_increase_per.update(comp, cmp_max_per)
-max_decrease_per.update(comp, cmp_min_per)
-max_increase_unit.update(comp, cmp_max_unit)
-max_decrease_unit.update(comp, cmp_min_unit)
+max_increase_per.update(comp, cmp_max_per, name)
+max_decrease_per.update(comp, cmp_min_per, name)
+max_increase_unit.update(comp, cmp_max_unit, name)
+max_decrease_unit.update(comp, cmp_min_unit, name)
 
 if have_error:
 errors_names.append(name)
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [shader-db PATCH 1/5] run: create debug contexts

2015-12-30 Thread Nicolai Hähnle
For Gallium-based drivers, this is required for receiving shader information
via debug messages.
---
 run.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/run.c b/run.c
index 82d8c91..685f830 100644
--- a/run.c
+++ b/run.c
@@ -435,6 +435,7 @@ main(int argc, char **argv)
 EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR,
 EGL_CONTEXT_MAJOR_VERSION_KHR, 3,
 EGL_CONTEXT_MINOR_VERSION_KHR, 2,
+EGL_CONTEXT_FLAGS_KHR, EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR,
 EGL_NONE
 };
 EGLContext core_ctx = eglCreateContext(egl_dpy, cfg, EGL_NO_CONTEXT,
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/9] gallium/radeon: always add +DumpCode to the LLVM target machine for LLVM <= 3.5

2016-01-02 Thread Nicolai Hähnle
What's the reason for always having +DumpCode? Generating the assembly 
is some overhead that's usually unnecessary. Even if it's a small part 
of the profiles I've seen, it still seems like a natural thing to just 
skip. From what I can tell it should be dependent on any of the shader 
dumping flags + DBG_CHECK_VM being set. In any case, I suppose that 
would be for a separate commit.


Cheers,
Nicolai

On 01.01.2016 09:13, Marek Olšák wrote:

From: Marek Olšák 

It's the same behavior that we use for later LLVM.
---
  src/gallium/drivers/r600/r600_llvm.c  | 2 +-
  src/gallium/drivers/radeon/radeon_llvm_emit.c | 5 ++---
  src/gallium/drivers/radeon/radeon_llvm_emit.h | 2 +-
  src/gallium/drivers/radeonsi/si_shader.c  | 2 +-
  4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 1cc3031..7d93658 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -922,7 +922,7 @@ unsigned r600_llvm_compile(
const char * gpu_family = r600_get_llvm_processor_name(family);

memset(, 0, sizeof(struct radeon_shader_binary));
-   r = radeon_llvm_compile(mod, , gpu_family, dump, dump, NULL);
+   r = radeon_llvm_compile(mod, , gpu_family, dump, NULL);

r = r600_create_shader(bc, , use_kill);

diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 61ed940..f8c7f54 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -141,7 +141,7 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef 
di, void *context)
   * @returns 0 for success, 1 for failure
   */
  unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary 
*binary,
-const char *gpu_family, bool dump_ir, bool 
dump_asm,
+const char *gpu_family, bool dump_ir,
 LLVMTargetMachineRef tm)
  {

@@ -165,8 +165,7 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct 
radeon_shader_binary *binar
}
strncpy(cpu, gpu_family, CPU_STRING_LEN);
memset(fs, 0, sizeof(fs));
-   if (dump_asm)
-   strncpy(fs, "+DumpCode", FS_STRING_LEN);
+   strncpy(fs, "+DumpCode", FS_STRING_LEN);
tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
  LLVMCodeGenLevelDefault, LLVMRelocDefault,
  LLVMCodeModelDefault);
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.h 
b/src/gallium/drivers/radeon/radeon_llvm_emit.h
index e20aed9..5f956dd 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.h
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.h
@@ -38,7 +38,7 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type);
  LLVMTargetRef radeon_llvm_get_r600_target(const char *triple);

  unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary 
*binary,
-const char *gpu_family, bool dump_ir, bool 
dump_asm,
+const char *gpu_family, bool dump_ir,
 LLVMTargetMachineRef tm);

  #endif /* RADEON_LLVM_EMIT_H */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index a9297a5..4044961 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3884,7 +3884,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
si_shader *shader,
bool dump_ir = dump_asm && !(sscreen->b.debug_flags & DBG_NO_IR);

r = radeon_llvm_compile(mod, >binary,
-   r600_get_llvm_processor_name(sscreen->b.family), dump_ir, 
dump_asm, tm);
+   r600_get_llvm_processor_name(sscreen->b.family), dump_ir, tm);
if (r)
return r;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/9] RadeonSI: Some shaders cleanups

2016-01-02 Thread Nicolai Hähnle

This looks much better now :)

For the series: Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 01.01.2016 09:13, Marek Olšák wrote:

Hi,

These are shader cleanups mostly around si_compile_llvm.

You may wonder why the "move si_shader_binary_upload out of xxx" patches. They 
are part of my one-variant-per-shader rework, which needs a lot of restructuring.

Besides this, I have 2 more series of cleanup patches, which I will send when 
this lands.

Please review.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] tgsi/scan: set which color components are read by a fragment shader

2016-01-06 Thread Nicolai Hähnle

On 05.01.2016 20:46, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

This will be used by radeonsi.
---
  src/gallium/auxiliary/tgsi/tgsi_scan.c | 30 ++
  src/gallium/auxiliary/tgsi/tgsi_scan.h |  1 +
  2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index e3a6fb0..6ea32ee 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -187,14 +187,28 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
}

if (procType == TGSI_PROCESSOR_FRAGMENT &&
- !src->Register.Indirect &&
- info->input_semantic_name[src->Register.Index] ==
- TGSI_SEMANTIC_POSITION &&
-  (src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
-   src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
-   src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
-   src->Register.SwizzleW == TGSI_SWIZZLE_Z)) {
- info->reads_z = TRUE;
+  !src->Register.Indirect) {
+ unsigned name =
+info->input_semantic_name[src->Register.Index];
+ unsigned index =
+info->input_semantic_index[src->Register.Index];


Move index down into the TGSI_SEMANTIC_COLOR branch? Either way,

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


+
+ if (name == TGSI_SEMANTIC_POSITION &&
+ (src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
+  src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
+  src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
+  src->Register.SwizzleW == TGSI_SWIZZLE_Z))
+info->reads_z = TRUE;
+
+ if (name == TGSI_SEMANTIC_COLOR) {
+unsigned mask =
+  (1 << src->Register.SwizzleX) |
+  (1 << src->Register.SwizzleY) |
+  (1 << src->Register.SwizzleZ) |
+  (1 << src->Register.SwizzleW);
+
+info->colors_read |= mask << (index * 4);
+ }
}
 }

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h 
b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index a3e4378..b0b423a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -77,6 +77,7 @@ struct tgsi_shader_info

 uint opcode_count[TGSI_OPCODE_LAST];  /**< opcode histogram */

+   ubyte colors_read; /**< which color components are read by the FS */
 ubyte colors_written;
 boolean reads_position; /**< does fragment shader read position? */
 boolean reads_z; /**< does fragment shader read depth? */


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] tgsi/scan: fix tgsi_shader_info::reads_z

2016-01-06 Thread Nicolai Hähnle

Patches 1 & 2 are

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 05.01.2016 20:46, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

This has no users in Mesa.
---
  src/gallium/auxiliary/tgsi/tgsi_scan.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index e3feed9..e3a6fb0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -187,8 +187,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
}

if (procType == TGSI_PROCESSOR_FRAGMENT &&
-  info->reads_position &&
-  src->Register.Index == 0 &&
+ !src->Register.Indirect &&
+ info->input_semantic_name[src->Register.Index] ==
+ TGSI_SEMANTIC_POSITION &&
(src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
 src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
 src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/23] radeonsi: simplify setting the DONE bit for PS exports

2016-01-06 Thread Nicolai Hähnle

Patches 1-5 are

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 06.01.2016 07:41, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

First find out what the last export is and simply set the DONE bit there.
---
  src/gallium/drivers/radeonsi/si_shader.c| 126 ++--
  src/gallium/drivers/radeonsi/si_state_shaders.c |   2 +-
  2 files changed, 55 insertions(+), 73 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 85113c0..8441fb4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2109,10 +2109,36 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
struct tgsi_shader_info *info = >selector->info;
LLVMBuilderRef builder = base->gallivm->builder;
LLVMValueRef args[9];
-   LLVMValueRef last_args[9] = { 0 };
int depth_index = -1, stencil_index = -1, samplemask_index = -1;
+   int last_color_export = -1;
int i;

+   /* If there are no outputs, add a dummy export. */
+   if (!info->num_outputs) {
+   args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled 
channels */
+   args[1] = uint->one; /* whether the EXEC mask is valid */
+   args[2] = uint->one; /* DONE bit */
+   args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_MRT);
+   args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
+   args[5] = uint->zero; /* R */
+   args[6] = uint->zero; /* G */
+   args[7] = uint->zero; /* B */
+   args[8] = uint->zero; /* A */
+
+   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+  
LLVMVoidTypeInContext(base->gallivm->context),
+  args, 9, 0);
+   return;
+   }
+
+   /* Determine the last export. If MRTZ is present, it's always last.
+* Otherwise, find the last color export.
+*/
+   if (!info->writes_z && !info->writes_stencil && 
!info->writes_samplemask)
+   for (i = 0; i < info->num_outputs; i++)
+   if (info->output_semantic_name[i] == 
TGSI_SEMANTIC_COLOR)
+   last_color_export = i;
+
for (i = 0; i < info->num_outputs; i++) {
unsigned semantic_name = info->output_semantic_name[i];
unsigned semantic_index = info->output_semantic_index[i];
@@ -2157,56 +2183,48 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)

break;
default:
-   target = 0;
fprintf(stderr,
"Warning: SI unhandled fs output type:%d\n",
semantic_name);
+   continue;
}

-   si_llvm_init_export_args_load(bld_base,
- 
si_shader_ctx->radeon_bld.soa.outputs[i],
- target, args);
-
-   if (semantic_name == TGSI_SEMANTIC_COLOR) {
-   /* If there is an export instruction waiting to be 
emitted, do so now. */
-   if (last_args[0]) {
-   lp_build_intrinsic(base->gallivm->builder,
-  "llvm.SI.export",
+   /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
+   if (semantic_index == 0 &&
+   si_shader_ctx->shader->key.ps.last_cbuf > 0) {
+   for (int c = 1; c <= 
si_shader_ctx->shader->key.ps.last_cbuf; c++) {
+   si_llvm_init_export_args_load(bld_base,
+ 
si_shader_ctx->radeon_bld.soa.outputs[i],
+ 
V_008DFC_SQ_EXP_MRT + c, args);
+   lp_build_intrinsic(base->gallivm->builder, 
"llvm.SI.export",
   
LLVMVoidTypeInContext(base->gallivm->context),
-  last_args, 9, 0);
+  args, 9, 0);
}
+   }

-   /* This instruction will be emitted at the end of the 
shader. */
-   memcpy(last_args, args, sizeof(args));
-
-   /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is 
true. */
- 

[Mesa-dev] [PATCH 1/5] mesa/bufferobj: make _mesa_delete_buffer_object externally accessible

2016-01-05 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

gl_buffer_object has grown more complicated and requires cleanup. Using this
function from drivers will be more future-proof.
---
 src/mesa/main/bufferobj.c | 2 +-
 src/mesa/main/bufferobj.h | 4 
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 8a9f9b6..4a098ac 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -447,7 +447,7 @@ _mesa_new_buffer_object(struct gl_context *ctx, GLuint name)
  *
  * Default callback for the \c dd_function_table::DeleteBuffer() hook.
  */
-static void
+void
 _mesa_delete_buffer_object(struct gl_context *ctx,
struct gl_buffer_object *bufObj)
 {
diff --git a/src/mesa/main/bufferobj.h b/src/mesa/main/bufferobj.h
index 3eac96d..a5bfe88 100644
--- a/src/mesa/main/bufferobj.h
+++ b/src/mesa/main/bufferobj.h
@@ -109,6 +109,10 @@ _mesa_initialize_buffer_object(struct gl_context *ctx,
GLuint name);
 
 extern void
+_mesa_delete_buffer_object(struct gl_context *ctx,
+   struct gl_buffer_object *bufObj);
+
+extern void
 _mesa_reference_buffer_object_(struct gl_context *ctx,
struct gl_buffer_object **ptr,
struct gl_buffer_object *bufObj);
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] i965: use _mesa_delete_buffer_object

2016-01-05 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This is more future-proof, plugs the memory leak of Label and properly
destroys the buffer mutex.
---
 src/mesa/drivers/dri/i965/intel_buffer_objects.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c 
b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index b26c939..ce6b358 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -167,7 +167,7 @@ brw_delete_buffer(struct gl_context * ctx, struct 
gl_buffer_object *obj)
_mesa_buffer_unmap_all_mappings(ctx, obj);
 
drm_intel_bo_unreference(intel_obj->buffer);
-   free(intel_obj);
+   _mesa_delete_buffer_object(ctx, obj);
 }
 
 
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] i915: use _mesa_delete_buffer_object

2016-01-05 Thread Nicolai Hähnle
From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This is more future-proof, plugs the memory leak of Label and properly
destroys the buffer mutex.
---
 src/mesa/drivers/dri/i915/intel_buffer_objects.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i915/intel_buffer_objects.c 
b/src/mesa/drivers/dri/i915/intel_buffer_objects.c
index ef06743..e676096 100644
--- a/src/mesa/drivers/dri/i915/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i915/intel_buffer_objects.c
@@ -99,7 +99,7 @@ intel_bufferobj_free(struct gl_context * ctx, struct 
gl_buffer_object *obj)
_mesa_align_free(intel_obj->sys_buffer);
 
drm_intel_bo_unreference(intel_obj->buffer);
-   free(intel_obj);
+   _mesa_delete_buffer_object(ctx, obj);
 }
 
 
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   3   4   5   6   7   8   9   10   >