from:"Nicolai Hähnle"

Re: [Mesa-dev] [PATCH] radeonsi: fix a crash when binding a sampler buffer

2016-02-11 Thread Nicolai Hähnle


On 11.02.2016 10:00, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

Buffers don't contain r600_texture.

Broken by 7aedbbacae6d3ec3d06735fff2eb66:
"radeonsi: put image, fmask, and sampler descriptors into one array"

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94091


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


---
  src/gallium/drivers/radeonsi/si_descriptors.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 34cc06f..345f2bb 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -203,7 +203,8 @@ static void si_set_sampler_view(struct si_context *sctx,
pipe_sampler_view_reference(>views[slot], view);
memcpy(views->desc.list + slot * 16, rview->state, 8*4);

-   if (rtex && rtex->fmask.size) {
+   if (view->texture && view->texture->target != PIPE_BUFFER &&
+   rtex->fmask.size) {
memcpy(views->desc.list + slot*16 + 8,
   rview->fmask_state, 8*4);
} else {


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] gallium/radeon: drop support for LLVM 3.5

2016-02-11 Thread Nicolai Hähnle

creen *screen,
struct si_screen* sscreen = (struct si_screen *)screen;
struct radeon_winsys *ws = sscreen->b.ws;
LLVMTargetRef r600_target;
-#if HAVE_LLVM >= 0x0306
const char *triple = "amdgcn--";
-#endif
int shader, i;

if (!sctx)
@@ -210,7 +206,6 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
 */
sctx->scratch_waves = 32 * sscreen->b.info.num_good_compute_units;

-#if HAVE_LLVM >= 0x0306
/* Initialize LLVM TargetMachine */
r600_target = radeon_llvm_get_r600_target(triple);
sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
@@ -223,7 +218,6 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
   LLVMCodeGenLevelDefault,
   LLVMRelocDefault,
   LLVMCodeModelDefault);
-#endif

return >b.b;
  fail:
@@ -310,6 +304,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
case PIPE_CAP_QUERY_MEMORY_INFO:
+   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
return 1;

case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -335,9 +330,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return 4;

-   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
-   return HAVE_LLVM >= 0x0306;
-
case PIPE_CAP_GLSL_FEATURE_LEVEL:
return HAVE_LLVM >= 0x0307 ? 410 : 330;

@@ -449,18 +441,13 @@ static int si_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, enu
case PIPE_SHADER_TESS_CTRL:
case PIPE_SHADER_TESS_EVAL:
/* LLVM 3.6.2 is required for tessellation because of bug fixes 
there */
-   if (HAVE_LLVM < 0x0306 ||
-   (HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 2))
+   if (HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 2)
return 0;
break;
case PIPE_SHADER_COMPUTE:
switch (param) {
case PIPE_SHADER_CAP_PREFERRED_IR:
-#if HAVE_LLVM < 0x0306
-   return PIPE_SHADER_IR_LLVM;
-#else
return PIPE_SHADER_IR_NATIVE;
-#endif
case PIPE_SHADER_CAP_DOUBLES:
return HAVE_LLVM >= 0x0307;

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 8d7f458..fa4e26a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4369,12 +4369,10 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;

-   if (HAVE_LLVM >= 0x0306) {
-   bld_base->op_actions[TGSI_OPCODE_MAX].emit = 
build_tgsi_intrinsic_nomem;
-   bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = 
"llvm.maxnum.f32";
-   bld_base->op_actions[TGSI_OPCODE_MIN].emit = 
build_tgsi_intrinsic_nomem;
-   bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = 
"llvm.minnum.f32";
-   }
+   bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
+   bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
+   bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
+   bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
  }

  int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index dab27df..7823759 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -176,8 +176,7 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)

 /* LLVM 3.6 is required for VI. */
 if (ws->info.chip_class >= VI &&
-   (HAVE_LLVM < 0x0306 ||
-(HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 1))) {
+   HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 1) {
fprintf(stderr, "amdgpu: LLVM 3.6.1 is required, got LLVM %i.%i.%i\n",
HAVE_LLVM >> 8, HAVE_LLVM & 255, MESA_LLVM_VERSION_PATCH);
goto fail;



While you're at it, maybe change the comment to LLVM 3.6.1.

Including the gs copy shader addition, the series is

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: fix build with LLVM 3.6

2016-02-11 Thread Nicolai Hähnle


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 11.02.2016 11:50, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

Broken by this cleanup: 3dc1cb0cc7605a2f3128311f5a6052f740fc7b0d
---
  src/gallium/drivers/radeonsi/si_shader.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index baa1090..19c427a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4336,7 +4336,7 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
ctx->i1 = LLVMInt1TypeInContext(ctx->radeon_bld.gallivm.context);
ctx->i8 = LLVMInt8TypeInContext(ctx->radeon_bld.gallivm.context);
ctx->i32 = LLVMInt32TypeInContext(ctx->radeon_bld.gallivm.context);
-   ctx->i128 = LLVMInt128TypeInContext(ctx->radeon_bld.gallivm.context);
+   ctx->i128 = LLVMIntTypeInContext(ctx->radeon_bld.gallivm.context, 128);
ctx->f32 = LLVMFloatTypeInContext(ctx->radeon_bld.gallivm.context);
ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] radeonsi: Allow dumping LLVM IR before optimization passes

2016-02-05 Thread Nicolai Hähnle


On 04.02.2016 13:52, Tom Stellard wrote:

On Thu, Feb 04, 2016 at 09:15:26AM +0100, Nicolai Hähnle wrote:

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Set R600_DEBUG=preoptir to dump the LLVM IR before optimization passes,
to allow diagnosing problems caused by optimization passes.

Note that in order to compile the resulting IR with llc, you will first
have to run at least the mem2reg pass, e.g.

opt -mem2reg -S < shader.ll | llc -march=amdgcn -mcpu=bonaire

Signed-off-by: Michel Dänzer <michel.daen...@amd.com> (original patch)
Signed-off-by: Nicolai Hähnle <nicolai.haeh...@amd.com> (w/ debug flag)
---
Having the option is a good idea, but I prefer to have a separate debug
flag for it so that when you try to analyze bugs in codegen (which in
my experience happens more often) you don't have to worry about
replicating the exact same sequence of optimizations manually via the
command line to reproduce the problem there.

  src/gallium/drivers/radeon/r600_pipe_common.c |  1 +
  src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
  src/gallium/drivers/radeonsi/si_shader.c  | 16 ++--
  3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index c827dbd..a1432ed 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -393,6 +393,7 @@ static const struct debug_named_value 
common_debug_options[] = {
{ "noir", DBG_NO_IR, "Don't print the LLVM IR"},
{ "notgsi", DBG_NO_TGSI, "Don't print the TGSI"},
{ "noasm", DBG_NO_ASM, "Don't print disassembled shaders"},
+   { "preoptir", DBG_PREOPT_IR, "Print the LLVM IR before initial 
optimizations" },

/* features */
{ "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index c7e4c44..4e36631 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -71,6 +71,7 @@
  #define DBG_NO_IR (1 << 12)
  #define DBG_NO_TGSI   (1 << 13)
  #define DBG_NO_ASM(1 << 14)
+#define DBG_PREOPT_IR  (1 << 15)
  /* Bits 21-31 are reserved for the r600g driver. */
  /* features */
  #define DBG_NO_ASYNC_DMA  (1llu << 32)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 8b524cf..d9ed6b2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4092,7 +4092,7 @@ int si_compile_llvm(struct si_screen *sscreen,
if (r600_can_dump_shader(>b, processor)) {
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);

-   if (!(sscreen->b.debug_flags & DBG_NO_IR))
+   if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR)))
LLVMDumpModule(mod);
}

@@ -4178,6 +4178,12 @@ static int si_generate_gs_copy_shader(struct si_screen 
*sscreen,
si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);

LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+
+   /* Dump LLVM IR before any optimization passes */
+   if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
+   r600_can_dump_shader(>b, TGSI_PROCESSOR_GEOMETRY))
+   LLVMDumpModule(bld_base->base.gallivm->module);
+
radeon_llvm_finalize_module(_shader_ctx->radeon_bld);

if (dump)
@@ -4385,9 +4391,15 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
}

LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+   mod = bld_base->base.gallivm->module;
+
+   /* Dump LLVM IR before any optimization passes */
+   if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
+   r600_can_dump_shader(>b, si_shader_ctx.type))
+   LLVMDumpModule(mod);
+


Is there any reason not to add the dump in  radeon_llvm_finalize_module()
after PromoteMem2Reg has run?  This would make the output readable by llc
and then you would only need to add the dump call in one place.


In addition to Michel's observation, that's not really possible anyway 
because all the passes are run at once from LLVMRunFunctionPassManager, 
the functions before just set things up.


I did consider doing the dump from radeon_llvm_finalize_module, but the 
function doesn't have (and probably shouldn't have) the information 
needed to make the decision whether to dump or not, so IMO it's cleaner 
this way.


Cheers,
Nicolai



-Tom


radeon_llvm_finalize_module(_shader_ctx.radeon_bld);

-   mod = bld_base->base.gallivm->module;

Re: [Mesa-dev] [PATCH 1/1] r600, compute: Do not overwrite pipe_resource.screen

2016-02-05 Thread Nicolai Hähnle


On 05.02.2016 17:54, Jan Vesely wrote:

found by inspection.

Signed-off-by: Jan Vesely 


Pushed, thanks!


---
  src/gallium/drivers/r600/evergreen_compute.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index bc27333..3b4f139 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -967,8 +967,8 @@ struct pipe_resource *r600_compute_global_buffer_create(
templ->array_size);

result->base.b.vtbl = _global_buffer_vtbl;
-   result->base.b.b.screen = screen;
result->base.b.b = *templ;
+   result->base.b.b.screen = screen;
pipe_reference_init(>base.b.b.reference, 1);

size_in_dw = (templ->width0+3) / 4;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: don't emit unnecessary NULL exports for unbound targets (v2)

2016-02-05 Thread Nicolai Hähnle


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 05.02.2016 17:51, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

v2: remove semantic index == 0 checks
 add the else statement to remove shadowing of args
---
  src/gallium/drivers/radeonsi/si_shader.c | 88 ++--
  1 file changed, 62 insertions(+), 26 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index bd45d4a..63a2908 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2274,7 +2274,6 @@ static void si_export_mrt_color(struct 
lp_build_tgsi_context *bld_base,
  {
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
struct lp_build_context *base = _base->base;
-   LLVMValueRef args[9];
int i;

/* Clamp color */
@@ -2296,27 +2295,46 @@ static void si_export_mrt_color(struct 
lp_build_tgsi_context *bld_base,
color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]);

/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
-   if (index == 0 &&
-   si_shader_ctx->shader->key.ps.last_cbuf > 0) {
-   for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; 
c++) {
+   if (si_shader_ctx->shader->key.ps.last_cbuf > 0) {
+   LLVMValueRef args[8][9];
+   int c, last = -1;
+
+   /* Get the export arguments, also find out what the last one 
is. */
+   for (c = 0; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) {
si_llvm_init_export_args(bld_base, color,
-V_008DFC_SQ_EXP_MRT + c, args);
+V_008DFC_SQ_EXP_MRT + c, 
args[c]);
+   if (args[c][0] != bld_base->uint_bld.zero)
+   last = c;
+   }
+
+   /* Emit all exports. */
+   for (c = 0; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) {
+   if (is_last && last == c) {
+   args[c][1] = bld_base->uint_bld.one; /* whether 
the EXEC mask is valid */
+   args[c][2] = bld_base->uint_bld.one; /* DONE 
bit */
+   } else if (args[c][0] == bld_base->uint_bld.zero)
+   continue; /* unnecessary NULL export */
+
lp_build_intrinsic(base->gallivm->builder, 
"llvm.SI.export",
   
LLVMVoidTypeInContext(base->gallivm->context),
-  args, 9, 0);
+  args[c], 9, 0);
}
+   } else {
+   LLVMValueRef args[9];
+
+   /* Export */
+   si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + 
index,
+args);
+   if (is_last) {
+   args[1] = bld_base->uint_bld.one; /* whether the EXEC 
mask is valid */
+   args[2] = bld_base->uint_bld.one; /* DONE bit */
+   } else if (args[0] == bld_base->uint_bld.zero)
+   return; /* unnecessary NULL export */
+
+   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+  
LLVMVoidTypeInContext(base->gallivm->context),
+  args, 9, 0);
}
-
-   /* Export */
-   si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index,
-args);
-   if (is_last) {
-   args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is 
valid */
-   args[2] = bld_base->uint_bld.one; /* DONE bit */
-   }
-   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-  LLVMVoidTypeInContext(base->gallivm->context),
-  args, 9, 0);
  }

  static void si_export_null(struct lp_build_tgsi_context *bld_base)
@@ -2351,19 +2369,37 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
int last_color_export = -1;
int i;

-   /* If there are no outputs, add a dummy export. */
-   if (!info->num_outputs) {
-   si_export_null(bld_base);
-   return;
-   }
-
/* Determine the last export. If MRTZ is present, it's always last.
 * Otherwise, find the last color export.
 */
-   if (!info->writes_z && !info->writes_stencil && 
!info->writes_samplemask)
-   for (i = 0; i < info->num_outputs; i++)
-   if (info->

Re: [Mesa-dev] [PATCH 4/4] radeonsi: fix Hyper-Z on Stoney

2016-01-29 Thread Nicolai Hähnle


On 29.01.2016 15:02, Marek Olšák wrote:

From: Marek Olšák 

Cc: 10.0 10.1 


"11.0 11.1"



---
  src/gallium/drivers/radeon/r600_texture.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 109b860..38e79f4 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -533,6 +533,10 @@ static unsigned r600_texture_get_htile_size(struct 
r600_common_screen *rscreen,
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
return 0;

+   /* Overalign HTILE on Stoney to fix 
piglit/depthstencil-render-miplevels 585. */
+   if (rscreen->family == CHIP_STONEY)
+   num_pipes = 4;
+


I seem to recall problems with this family of tests on Tonga as well at 
some point. Could that be related?


Nicolai


switch (num_pipes) {
case 1:
cl_width = 32;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/4] radeonsi: rework RB+ for Stoney

2016-01-29 Thread Nicolai Hähnle


Patches 1-3: Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 29.01.2016 15:02, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

This fixes it.

States which also need to be taken into account:
- SPI color formats - each down-conversion format supports only a limited set
   of SPI formats
- whether MSAA resolving and logic op are enabled

These need special handling:
- blending
- disabled channels
---
  src/gallium/drivers/radeon/r600_pipe_common.h   |   2 -
  src/gallium/drivers/radeon/r600_texture.c   |   6 +-
  src/gallium/drivers/radeonsi/si_state.c | 326 
  src/gallium/drivers/radeonsi/si_state_shaders.c |   3 +
  4 files changed, 228 insertions(+), 109 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 88e9cbc..c7e4c44 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -261,8 +261,6 @@ struct r600_surface {
unsigned spi_shader_col_format_alpha;   /* SI+, alpha-to-coverage */
unsigned spi_shader_col_format_blend;   /* SI+, blending without alpha. 
*/
unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with 
alpha. */
-   unsigned sx_ps_downconvert; /* Stoney only */
-   unsigned sx_blend_opt_epsilon;  /* Stoney only */
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. 
R600 only */
struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. 
R600 only */

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 7c4717d..109b860 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1388,7 +1388,6 @@ void evergreen_do_fast_color_clear(struct 
r600_common_context *rctx,
return;

for (i = 0; i < fb->nr_cbufs; i++) {
-   struct r600_surface *surf;
struct r600_texture *tex;
unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;

@@ -1399,7 +1398,6 @@ void evergreen_do_fast_color_clear(struct 
r600_common_context *rctx,
if (!(*buffers & clear_bit))
continue;

-   surf = (struct r600_surface *)fb->cbufs[i];
tex = (struct r600_texture *)fb->cbufs[i]->texture;

/* 128-bit formats are unusupported */
@@ -1446,8 +1444,8 @@ void evergreen_do_fast_color_clear(struct 
r600_common_context *rctx,
if (clear_words_needed)
tex->dirty_level_mask |= 1 << 
fb->cbufs[i]->u.tex.level;
} else {
-   /* RB+ doesn't work with CMASK fast clear. */
-   if (surf->sx_ps_downconvert)
+   /* Stoney/RB+ doesn't work with CMASK fast clear. */
+   if (rctx->family == CHIP_STONEY)
continue;

/* ensure CMASK is enabled */
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 606b72f..7289aaa 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -272,6 +272,143 @@ static void si_emit_cb_render_state(struct si_context 
*sctx, struct r600_atom *a
cb_target_mask = 0;

radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask);
+
+   /* STONEY-specific register settings. */
+   if (sctx->b.family == CHIP_STONEY) {
+   unsigned spi_shader_col_format =
+   sctx->ps_shader.cso ?
+   sctx->ps_shader.current->key.ps.spi_shader_col_format : 
0;
+   unsigned sx_ps_downconvert = 0;
+   unsigned sx_blend_opt_epsilon = 0;
+   unsigned sx_blend_opt_control = 0;
+
+   for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
+   struct r600_surface *surf =
+   (struct 
r600_surface*)sctx->framebuffer.state.cbufs[i];
+   unsigned format, swap, spi_format, colormask;
+   bool has_alpha, has_rgb;
+
+   if (!surf)
+   continue;
+
+   format = G_028C70_FORMAT(surf->cb_color_info);
+   swap = G_028C70_COMP_SWAP(surf->cb_color_info);
+   spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
+   colormask = (cb_target_mask >> (i * 4)) & 0xf;
+
+   /* Set if RGB and A are present. */
+   has_alpha = 
!G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);
+
+   if (format == V_028C70_COLOR_8 ||
+

Re: [Mesa-dev] [PATCH 1/7] gallium: Add PIPE_CAP_FRAMEBUFFER_LAYERS

2016-02-03 Thread Nicolai Hähnle


On 31.01.2016 15:11, Ilia Mirkin wrote:

This seems odd. When would this not just be MaxArrayTextureLayers? IMO
a plain enable cap, like PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENTS makes
more sense here.


From the GL_ARB_framebuffer_no_attachments spec overview:

Additionally, this extension provides queryable implementation-dependent 
maximums for framebuffer width, height, layer count, and sample count, 
which may differ from similar limits on textures and renderbuffers. 
These maximums will be used to error-check the default framebuffer 
parameters and also permit implementations to expose the ability to 
rasterize to an attachment-less framebuffer larger than the maximum 
supported texture size.


So having a Gallium cap that corresponds to GL_MAX_FRAMEBUFFER_LAYERS is 
not entirely silly. One could even argue that there should be caps for 
the other MAX_FRAMEBUFFER_* values as well...


Cheers,
Nicolai


On Sun, Jan 31, 2016 at 1:25 AM, Edward O'Callaghan
 wrote:

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index d066784..99105bd 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -490,6 +490,7 @@ void st_init_extensions(struct pipe_screen *screen,
{ o(ARB_fragment_program_shadow),  PIPE_CAP_TEXTURE_SHADOW_MAP  
 },
{ o(ARB_framebuffer_object),   PIPE_CAP_MIXED_FRAMEBUFFER_SIZES 
 },
{ o(ARB_indirect_parameters),  
PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS   },
+  { o(ARB_framebuffer_no_attachments),   PIPE_CAP_MAX_FRAMEBUFFER_LAYERS   
},
{ o(ARB_instanced_arrays), 
PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR  },
{ o(ARB_occlusion_query),  PIPE_CAP_OCCLUSION_QUERY 
 },
{ o(ARB_occlusion_query2), PIPE_CAP_OCCLUSION_QUERY 
 },
@@ -962,6 +963,18 @@ void st_init_extensions(struct pipe_screen *screen,
   extensions->AMD_vertex_shader_viewport_index = GL_TRUE;
 }

+   /* ARB_framebuffer_no_attachments */
+   consts->MaxFramebufferWidth   = consts->MaxViewportWidth;
+   consts->MaxFramebufferHeight  = consts->MaxViewportHeight;
+   consts->MaxFramebufferSamples = consts->MaxSamples;
+   consts->MaxFramebufferLayers
+  = screen->get_param(screen, PIPE_CAP_MAX_FRAMEBUFFER_LAYERS);
+   if ((consts->MaxSamples >= 4 && consts->MaxFramebufferLayers >= 2048)
+  || (consts->MaxFramebufferSamples >= consts->MaxSamples &&
+  consts->MaxFramebufferLayers  >= consts->MaxArrayTextureLayers))
+  extensions->ARB_framebuffer_no_attachments = GL_TRUE;


You don't want both of these... either you want to just enable the ext
when the cap is on, or you want to enable the ext under certain more
complex conditions.

   -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] radeonsi: fix Hyper-Z on Stoney

2016-02-03 Thread Nicolai Hähnle


On 01.02.2016 15:16, Marek Olšák wrote:

On Fri, Jan 29, 2016 at 7:46 PM, Marek Olšák <mar...@gmail.com> wrote:

On Fri, Jan 29, 2016 at 4:15 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

On 29.01.2016 15:02, Marek Olšák wrote:


From: Marek Olšák <marek.ol...@amd.com>

Cc: 10.0 10.1 <mesa-sta...@lists.freedesktop.org>



"11.0 11.1"


Rb?


Yes.


I think it might worth checking if the depthstencil-render-miplevel
tests pass on Kabini. (does anybody have Kabini?)


Well. I confirmed today that the Kabini board I have here is dead even 
with a different power supply, so at least I don't anymore.


Nicolai



Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/5] Memory info extensions

2016-02-03 Thread Nicolai Hähnle


On 02.02.2016 14:44, Marek Olšák wrote:

Hi,

The motivation behind this is to allow games that use proprietary extensions to 
query the amount of VRAM to be able to query it with Mesa too. Such games are 
unlikely to use GLX_MESA_query_renderer in the foreseeable feature.

Unreal Engine 3 does use one of these. Despite that, it doesn't help. UE3 is 
unable to use advanced graphics and increase its GPU memory pool size even with 
these extensions.

Anyway, I'd like to merge this.

I don't plan to add a piglit, but I have a patch which adds support for both 
extensions to glxinfo.


Out of curiosity, which one of these does UE3 use?

The ATI extension part looks fine to me. I hope we don't run into 
problems where we end up reporting more aggressive availability than 
other drivers and then tempt applications into a behavior that triggers 
buffer swapping, but it seems like an acceptable risk.


The NV extension reports eviction counts, and I wonder if we might do 
more harm than good with an implementation that always reports 0 and 
misleads applications into thinking all is fine when they're really 
swapping buffers like hell.


For now, patches 1, 3, 5, and 4 without the NVX part are

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>



Please review.

  src/gallium/drivers/freedreno/freedreno_screen.c |  1 +
  src/gallium/drivers/i915/i915_screen.c   |  1 +
  src/gallium/drivers/ilo/ilo_screen.c |  1 +
  src/gallium/drivers/llvmpipe/lp_screen.c |  1 +
  src/gallium/drivers/nouveau/nv30/nv30_screen.c   |  1 +
  src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  1 +
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   |  1 +
  src/gallium/drivers/r300/r300_screen.c   |  1 +
  src/gallium/drivers/r600/r600_pipe.c |  1 +
  src/gallium/drivers/radeon/r600_pipe_common.c| 35 
+++
  src/gallium/drivers/radeonsi/si_pipe.c   |  1 +
  src/gallium/drivers/softpipe/sp_screen.c |  1 +
  src/gallium/drivers/svga/svga_screen.c   |  1 +
  src/gallium/drivers/vc4/vc4_screen.c |  1 +
  src/gallium/include/pipe/p_defines.h |  1 +
  src/gallium/include/pipe/p_screen.h  |  6 ++
  src/gallium/include/pipe/p_state.h   | 11 +++
  src/mapi/glapi/gen/gl_API.xml| 14 ++
  src/mesa/main/dd.h   | 10 ++
  src/mesa/main/extensions_table.h |  2 ++
  src/mesa/main/get.c  | 54 
++
  src/mesa/main/get_hash_params.py | 12 
  src/mesa/main/mtypes.h   |  2 ++
  src/mesa/state_tracker/st_context.c  | 23 +++
  src/mesa/state_tracker/st_extensions.c   |  2 ++
  25 files changed, 185 insertions(+)

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/4] radeonsi: experimental support for GPUPerfStudio

2016-02-03 Thread Nicolai Hähnle

Hi,

this bunch of patches meets GPUPerfStudio half-way in supporting the timing
features on CI+ hardware. The latest version of GPUPerfStudio is required.

With these patches, GPUPerfStudio should recognize our driver as supported
and offer its frame profiling features without crashing. It should also
report reasonable numbers in the profile. However, I haven't fully
validated the reported numbers, so while I'd like to get this merged now,
it should still be considered as somewhat experimental. Please review.

Thanks,
Nicolai
--
 .../drivers/radeon/r600_perfcounter.c|  38 +++---
 src/gallium/drivers/radeon/r600_query.c  |  80 ++-
 src/gallium/drivers/radeon/r600_query.h  |  32 ++---
 .../drivers/radeonsi/si_perfcounter.c| 121 +
 4 files changed, 201 insertions(+), 70 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/4] radeonsi: re-order the SQ_xx performance counter blocks

2016-02-03 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This is yet another change motivated by appeasing AMD GPUPerfStudio's
hardcoding of performance counter group numbers.
---
 src/gallium/drivers/radeon/r600_perfcounter.c | 38 ---
 src/gallium/drivers/radeon/r600_query.h   | 22 +++-
 src/gallium/drivers/radeonsi/si_perfcounter.c | 21 +++
 3 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c 
b/src/gallium/drivers/radeon/r600_perfcounter.c
index fad7bde..f3529a1 100644
--- a/src/gallium/drivers/radeon/r600_perfcounter.c
+++ b/src/gallium/drivers/radeon/r600_perfcounter.c
@@ -33,10 +33,6 @@
 /* Max counters per HW block */
 #define R600_QUERY_MAX_COUNTERS 16
 
-static const char * const r600_pc_shader_suffix[] = {
-   "", "_PS", "_VS", "_GS", "_ES", "_HS", "_LS", "_CS"
-};
-
 static struct r600_perfcounter_block *
 lookup_counter(struct r600_perfcounters *pc, unsigned index,
   unsigned *base_gid, unsigned *sub_index)
@@ -92,6 +88,8 @@ struct r600_pc_counter {
unsigned stride;
 };
 
+#define R600_PC_SHADERS_WINDOWING (1 << 31)
+
 struct r600_query_pc {
struct r600_query_hw b;
 
@@ -246,32 +244,29 @@ static struct r600_pc_group *get_group_state(struct 
r600_common_screen *screen,
if (block->flags & R600_PC_BLOCK_SHADER) {
unsigned sub_gids = block->num_instances;
unsigned shader_id;
-   unsigned shader_mask;
-   unsigned query_shader_mask;
+   unsigned shaders;
+   unsigned query_shaders;
 
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
sub_gids = sub_gids * screen->info.max_se;
shader_id = sub_gid / sub_gids;
sub_gid = sub_gid % sub_gids;
 
-   if (shader_id == 0)
-   shader_mask = R600_PC_SHADER_ALL;
-   else
-   shader_mask = 1 << (shader_id - 1);
+   shaders = screen->perfcounters->shader_type_bits[shader_id];
 
-   query_shader_mask = query->shaders & R600_PC_SHADER_ALL;
-   if (query_shader_mask && query_shader_mask != shader_mask) {
+   query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
+   if (query_shaders && query_shaders != shaders) {
fprintf(stderr, "r600_perfcounter: incompatible shader 
groups\n");
FREE(group);
return NULL;
}
-   query->shaders |= shader_mask;
+   query->shaders = shaders;
}
 
-   if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED) {
+   if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
// A non-zero value in query->shaders ensures that the shader
// masking is reset unless the user explicitly requests one.
-   query->shaders |= R600_PC_SHADER_WINDOWING;
+   query->shaders = R600_PC_SHADERS_WINDOWING;
}
 
if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
@@ -379,8 +374,8 @@ struct pipe_query *r600_create_batch_query(struct 
pipe_context *ctx,
}
 
if (query->shaders) {
-   if ((query->shaders & R600_PC_SHADER_ALL) == 0)
-   query->shaders |= R600_PC_SHADER_ALL;
+   if (query->shaders == R600_PC_SHADERS_WINDOWING)
+   query->shaders = 0x;
query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
}
 
@@ -438,7 +433,7 @@ static boolean r600_init_block_names(struct 
r600_common_screen *screen,
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
groups_se = screen->info.max_se;
if (block->flags & R600_PC_BLOCK_SHADER)
-   groups_shader = ARRAY_SIZE(r600_pc_shader_suffix);
+   groups_shader = screen->perfcounters->num_shader_types;
 
namelen = strlen(block->basename);
block->group_name_stride = namelen + 1;
@@ -462,14 +457,15 @@ static boolean r600_init_block_names(struct 
r600_common_screen *screen,
 
groupname = block->group_names;
for (i = 0; i < groups_shader; ++i) {
-   unsigned shaderlen = strlen(r600_pc_shader_suffix[i]);
+   const char *shader_suffix = 
screen->perfcounters->shader_type_suffixes[i];
+   unsigned shaderlen = strlen(shader_suffix);
for (j = 0; j < groups_se; ++j) {
for (k = 0; k < groups_instance; ++k) {
strcpy(groupname, block->

[Mesa-dev] [PATCH 4/4] radeonsi: add placeholder MC and SRBM performance counter groups

2016-02-03 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Yet another change motivated by AMD GPUPerfStudio compatibility. These groups
are not directly accessible from userspace, and AMD GPUPerfStudio does not
actually query them - it just requires them to be there. Hence, adding
a placeholder for now.
---
 src/gallium/drivers/radeonsi/si_perfcounter.c | 70 +--
 1 file changed, 54 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c 
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index f944a52..24855e4 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -56,6 +56,8 @@ enum si_pc_reg_layout {
 
/* Registers are laid out in decreasing rather than increasing order. */
SI_PC_REG_REVERSE = 4,
+
+   SI_PC_FAKE = 8,
 };
 
 struct si_pc_block_base {
@@ -325,6 +327,20 @@ static struct si_pc_block_base cik_WD = {
.counter0_lo = R_034200_WD_PERFCOUNTER0_LO,
 };
 
+static struct si_pc_block_base cik_MC = {
+   .name = "MC",
+   .num_counters = 4,
+
+   .layout = SI_PC_FAKE,
+};
+
+static struct si_pc_block_base cik_SRBM = {
+   .name = "SRBM",
+   .num_counters = 2,
+
+   .layout = SI_PC_FAKE,
+};
+
 /* Both the number of instances and selectors varies between chips of the same
  * class. We only differentiate by class here and simply expose the maximum
  * number over all chips in a class.
@@ -352,6 +368,8 @@ static struct si_pc_block groups_CIK[] = {
{ _GDS, 121 },
{ _VGT, 140 },
{ _IA, 22 },
+   { _MC, 22 },
+   { _SRBM, 19 },
{ _WD, 22 },
{ _CPG, 46 },
{ _CPC, 22 },
@@ -377,6 +395,8 @@ static struct si_pc_block groups_VI[] = {
{ _GDS, 121 },
{ _VGT, 147 },
{ _IA, 24 },
+   { _MC, 22 },
+   { _SRBM, 27 },
{ _WD, 37 },
{ _CPG, 48 },
{ _CPC, 24 },
@@ -391,7 +411,9 @@ static void si_pc_get_size(struct r600_perfcounter_block 
*group,
struct si_pc_block_base *regs = sigroup->b;
unsigned layout_multi = regs->layout & SI_PC_MULTI_MASK;
 
-   if (layout_multi == SI_PC_MULTI_BLOCK) {
+   if (regs->layout & SI_PC_FAKE) {
+   *num_select_dw = 0;
+   } else if (layout_multi == SI_PC_MULTI_BLOCK) {
if (count < regs->num_multi)
*num_select_dw = 2 * (count + 2) + regs->num_prelude;
else
@@ -454,6 +476,9 @@ static void si_pc_emit_select(struct r600_common_context 
*ctx,
 
assert(count <= regs->num_counters);
 
+   if (regs->layout & SI_PC_FAKE)
+   return;
+
if (layout_multi == SI_PC_MULTI_BLOCK) {
assert(!(regs->layout & SI_PC_REG_REVERSE));
 
@@ -613,22 +638,35 @@ static void si_pc_emit_read(struct r600_common_context 
*ctx,
unsigned reg = regs->counter0_lo;
unsigned reg_delta = 8;
 
-   if (regs->layout & SI_PC_REG_REVERSE)
-   reg_delta = -reg_delta;
-
-   for (idx = 0; idx < count; ++idx) {
-   if (regs->counters)
-   reg = regs->counters[idx];
+   if (!(regs->layout & SI_PC_FAKE)) {
+   if (regs->layout & SI_PC_REG_REVERSE)
+   reg_delta = -reg_delta;
 
-   radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-   radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
-   COPY_DATA_DST_SEL(COPY_DATA_MEM));
-   radeon_emit(cs, reg >> 2);
-   radeon_emit(cs, 0); /* unused */
-   radeon_emit(cs, va);
-   radeon_emit(cs, va >> 32);
-   va += 4;
-   reg += reg_delta;
+   for (idx = 0; idx < count; ++idx) {
+   if (regs->counters)
+   reg = regs->counters[idx];
+
+   radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+   radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
+   COPY_DATA_DST_SEL(COPY_DATA_MEM));
+   radeon_emit(cs, reg >> 2);
+   radeon_emit(cs, 0); /* unused */
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
+   va += 4;
+   reg += reg_delta;
+   }
+   } else {
+   for (idx = 0; idx < count; ++idx) {
+   radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+   radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
+   COPY_DATA_DST_SEL(COPY_DATA_MEM));
+   radeon_emit(cs, 0); /* immediate */
+   radeon_emit(cs, 0); /* unused */
+   radeon

[Mesa-dev] [PATCH 2/4] radeonsi: re-order the perfcounter hardware blocks

2016-02-03 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

As documented in the comment, AMD GPUPerfStudio unfortunately hardcodes the
order of performance counter groups. Let's do the pragmatic thing and present
the same order as Catalyst/Crimson.
---
 src/gallium/drivers/radeonsi/si_perfcounter.c | 30 ---
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c 
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 7ee1dae..b658866 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -311,53 +311,59 @@ static struct si_pc_block_base cik_WD = {
 /* Both the number of instances and selectors varies between chips of the same
  * class. We only differentiate by class here and simply expose the maximum
  * number over all chips in a class.
+ *
+ * Unfortunately, GPUPerfStudio uses the order of performance counter groups
+ * blindly once it believes it has identified the hardware, so the order of
+ * blocks here matters.
  */
 static struct si_pc_block groups_CIK[] = {
{ _CB, 226, 4 },
-   { _CPC, 22 },
{ _CPF, 17 },
-   { _CPG, 46 },
{ _DB, 257, 4 },
-   { _GDS, 121 },
{ _GRBM, 34 },
{ _GRBMSE, 15 },
-   { _IA, 22 },
-   { _PA_SC, 395 },
{ _PA_SU, 153 },
+   { _PA_SC, 395 },
{ _SPI, 186 },
{ _SQ, 252 },
{ _SX, 32 },
{ _TA, 111, 11 },
{ _TCA, 39, 2 },
{ _TCC, 160, 16 },
-   { _TCP, 154, 11 },
{ _TD, 55, 11 },
+   { _TCP, 154, 11 },
+   { _GDS, 121 },
{ _VGT, 140 },
+   { _IA, 22 },
{ _WD, 22 },
+   { _CPG, 46 },
+   { _CPC, 22 },
+
 };
 
 static struct si_pc_block groups_VI[] = {
{ _CB, 396, 4 },
-   { _CPC, 24 },
{ _CPF, 19 },
-   { _CPG, 48 },
{ _DB, 257, 4 },
-   { _GDS, 121 },
{ _GRBM, 34 },
{ _GRBMSE, 15 },
-   { _IA, 24 },
-   { _PA_SC, 397 },
{ _PA_SU, 153 },
+   { _PA_SC, 397 },
{ _SPI, 197 },
{ _SQ, 273 },
{ _SX, 34 },
{ _TA, 119, 16 },
{ _TCA, 35, 2 },
{ _TCC, 192, 16 },
-   { _TCP, 180, 16 },
{ _TD, 55, 16 },
+   { _TCP, 180, 16 },
+   { _GDS, 121 },
{ _VGT, 147 },
+   { _IA, 24 },
{ _WD, 37 },
+   { _CPG, 48 },
+   { _CPC, 24 },
+
 };
 
 static void si_pc_get_size(struct r600_perfcounter_block *group,
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/4] gallium/radeon: add GPIN driver query group

2016-02-03 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This group was used by older versions of AMD GPUPerfStudio (via
AMD_performance_monitor) to identify the GPU family, and GPUPerfStudio
still complains when it isn't available.
---
 src/gallium/drivers/radeon/r600_query.c | 80 +++--
 src/gallium/drivers/radeon/r600_query.h | 10 +
 2 files changed, 87 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 0aa19cd..f2094c6 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -100,6 +100,12 @@ static boolean r600_query_sw_begin(struct 
r600_common_context *rctx,
case R600_QUERY_NUM_SHADERS_CREATED:
query->begin_result = 
p_atomic_read(>screen->num_shaders_created);
break;
+   case R600_QUERY_GPIN_ASIC_ID:
+   case R600_QUERY_GPIN_NUM_SIMD:
+   case R600_QUERY_GPIN_NUM_RB:
+   case R600_QUERY_GPIN_NUM_SPI:
+   case R600_QUERY_GPIN_NUM_SE:
+   break;
default:
unreachable("r600_query_sw_begin: bad query type");
}
@@ -146,6 +152,12 @@ static void r600_query_sw_end(struct r600_common_context 
*rctx,
case R600_QUERY_NUM_SHADERS_CREATED:
query->end_result = 
p_atomic_read(>screen->num_shaders_created);
break;
+   case R600_QUERY_GPIN_ASIC_ID:
+   case R600_QUERY_GPIN_NUM_SIMD:
+   case R600_QUERY_GPIN_NUM_RB:
+   case R600_QUERY_GPIN_NUM_SPI:
+   case R600_QUERY_GPIN_NUM_SE:
+   break;
default:
unreachable("r600_query_sw_end: bad query type");
}
@@ -171,6 +183,22 @@ static boolean r600_query_sw_get_result(struct 
r600_common_context *rctx,
 wait ? PIPE_TIMEOUT_INFINITE : 
0);
return result->b;
}
+
+   case R600_QUERY_GPIN_ASIC_ID:
+   result->u32 = 0;
+   return TRUE;
+   case R600_QUERY_GPIN_NUM_SIMD:
+   result->u32 = rctx->screen->info.num_good_compute_units;
+   return TRUE;
+   case R600_QUERY_GPIN_NUM_RB:
+   result->u32 = rctx->screen->info.r600_num_backends;
+   return TRUE;
+   case R600_QUERY_GPIN_NUM_SPI:
+   result->u32 = 1; /* all supported chips have one SPI per SE */
+   return TRUE;
+   case R600_QUERY_GPIN_NUM_SE:
+   result->u32 = rctx->screen->info.max_se;
+   return TRUE;
}
 
result->u64 = query->end_result - query->begin_result;
@@ -1096,15 +1124,21 @@ err:
return;
 }
 
-#define X(name_, query_type_, type_, result_type_) \
+#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
{ \
.name = name_, \
.query_type = R600_QUERY_##query_type_, \
.type = PIPE_DRIVER_QUERY_TYPE_##type_, \
.result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
-   .group_id = ~(unsigned)0 \
+   .group_id = group_id_ \
}
 
+#define X(name_, query_type_, type_, result_type_) \
+   XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0)
+
+#define XG(group_, name_, query_type_, type_, result_type_) \
+   XFULL(name_, query_type_, type_, result_type_, 
R600_QUERY_GROUP_##group_)
+
 static struct pipe_driver_query_info r600_driver_query_list[] = {
X("num-compilations",   NUM_COMPILATIONS,   UINT64, 
CUMULATIVE),
X("num-shaders-created",NUM_SHADERS_CREATED,UINT64, 
CUMULATIVE),
@@ -1116,6 +1150,20 @@ static struct pipe_driver_query_info 
r600_driver_query_list[] = {
X("num-bytes-moved",NUM_BYTES_MOVED,BYTES, 
CUMULATIVE),
X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
X("GTT-usage",  GTT_USAGE,  BYTES, AVERAGE),
+
+   /* GPIN queries are for the benefit of old versions of GPUPerfStudio,
+* which use it as a fallback path to detect the GPU type.
+*
+* Note: The names of these queries are significant for GPUPerfStudio
+* (and possibly their order as well). */
+   XG(GPIN, "GPIN_000",GPIN_ASIC_ID,   UINT, AVERAGE),
+   XG(GPIN, "GPIN_001",GPIN_NUM_SIMD,  UINT, AVERAGE),
+   XG(GPIN, "GPIN_002",GPIN_NUM_RB,UINT, AVERAGE),
+   XG(GPIN, "GPIN_003",GPIN_NUM_SPI,   UINT, AVERAGE),
+   XG(GPIN, "GPIN_004",GPIN_NUM_SE,UINT, AVERAGE),
+
+   /* The following queries must be at the end of the list because

[Mesa-dev] [PATCH 1/2] gallium/radeon: emit LLVM `ret void` before radeon_llvm_finalize_module

2016-02-04 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This allows dumping a consumable LLVM module before the initial optimization
passes are run.
---
The missing `ret void` is why the IR would not be accepted.

I suspect Marek has something similar for the concatenating shaders, but this
is small enough that it should be easy to merge.

 src/gallium/drivers/r600/r600_llvm.c| 1 +
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 4 +---
 src/gallium/drivers/radeonsi/si_shader.c| 2 ++
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index 232db13..0fe7c74 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -848,6 +848,7 @@ LLVMModuleRef r600_tgsi_llvm(
 
lp_build_tgsi_llvm(bld_base, tokens);
 
+   LLVMBuildRetVoid(bld_base->base.gallivm->builder);
radeon_llvm_finalize_module(ctx);
 
return ctx->gallivm.module;
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 76be376..a44e3dc 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1638,11 +1638,9 @@ void radeon_llvm_create_func(struct radeon_llvm_context 
* ctx,
 void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
 {
struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm;
-   /* End the main function with Return*/
-   LLVMBuildRetVoid(gallivm->builder);
 
/* Create the pass manager */
-   ctx->gallivm.passmgr = LLVMCreateFunctionPassManagerForModule(
+   gallivm->passmgr = LLVMCreateFunctionPassManagerForModule(
gallivm->module);
 
/* This pass should eliminate all the load and store instructions */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 2192b21..8b524cf 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4177,6 +4177,7 @@ static int si_generate_gs_copy_shader(struct si_screen 
*sscreen,
 
si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
 
+   LLVMBuildRetVoid(bld_base->base.gallivm->builder);
radeon_llvm_finalize_module(_shader_ctx->radeon_bld);
 
if (dump)
@@ -4383,6 +4384,7 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
goto out;
}
 
+   LLVMBuildRetVoid(bld_base->base.gallivm->builder);
radeon_llvm_finalize_module(_shader_ctx.radeon_bld);
 
mod = bld_base->base.gallivm->module;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] radeonsi: Allow dumping LLVM IR before optimization passes

2016-02-04 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Set R600_DEBUG=preoptir to dump the LLVM IR before optimization passes,
to allow diagnosing problems caused by optimization passes.

Note that in order to compile the resulting IR with llc, you will first
have to run at least the mem2reg pass, e.g.

opt -mem2reg -S < shader.ll | llc -march=amdgcn -mcpu=bonaire

Signed-off-by: Michel Dänzer <michel.daen...@amd.com> (original patch)
Signed-off-by: Nicolai Hähnle <nicolai.haeh...@amd.com> (w/ debug flag)
---
Having the option is a good idea, but I prefer to have a separate debug
flag for it so that when you try to analyze bugs in codegen (which in
my experience happens more often) you don't have to worry about
replicating the exact same sequence of optimizations manually via the
command line to reproduce the problem there.

 src/gallium/drivers/radeon/r600_pipe_common.c |  1 +
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
 src/gallium/drivers/radeonsi/si_shader.c  | 16 ++--
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index c827dbd..a1432ed 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -393,6 +393,7 @@ static const struct debug_named_value 
common_debug_options[] = {
{ "noir", DBG_NO_IR, "Don't print the LLVM IR"},
{ "notgsi", DBG_NO_TGSI, "Don't print the TGSI"},
{ "noasm", DBG_NO_ASM, "Don't print disassembled shaders"},
+   { "preoptir", DBG_PREOPT_IR, "Print the LLVM IR before initial 
optimizations" },
 
/* features */
{ "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index c7e4c44..4e36631 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -71,6 +71,7 @@
 #define DBG_NO_IR  (1 << 12)
 #define DBG_NO_TGSI(1 << 13)
 #define DBG_NO_ASM (1 << 14)
+#define DBG_PREOPT_IR  (1 << 15)
 /* Bits 21-31 are reserved for the r600g driver. */
 /* features */
 #define DBG_NO_ASYNC_DMA   (1llu << 32)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 8b524cf..d9ed6b2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4092,7 +4092,7 @@ int si_compile_llvm(struct si_screen *sscreen,
if (r600_can_dump_shader(>b, processor)) {
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
 
-   if (!(sscreen->b.debug_flags & DBG_NO_IR))
+   if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR)))
LLVMDumpModule(mod);
}
 
@@ -4178,6 +4178,12 @@ static int si_generate_gs_copy_shader(struct si_screen 
*sscreen,
si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
 
LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+
+   /* Dump LLVM IR before any optimization passes */
+   if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
+   r600_can_dump_shader(>b, TGSI_PROCESSOR_GEOMETRY))
+   LLVMDumpModule(bld_base->base.gallivm->module);
+
radeon_llvm_finalize_module(_shader_ctx->radeon_bld);
 
if (dump)
@@ -4385,9 +4391,15 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
}
 
LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+   mod = bld_base->base.gallivm->module;
+
+   /* Dump LLVM IR before any optimization passes */
+   if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
+   r600_can_dump_shader(>b, si_shader_ctx.type))
+   LLVMDumpModule(mod);
+
radeon_llvm_finalize_module(_shader_ctx.radeon_bld);
 
-   mod = bld_base->base.gallivm->module;
r = si_compile_llvm(sscreen, >binary, >config, tm,
mod, debug, si_shader_ctx.type);
if (r) {
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/4] st/mesa: bail out of try_pbo_upload_common when constant upload fails

2016-02-03 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Also fixes a resource leak when an upload_mgr is used for constants.
---
 src/mesa/state_tracker/st_cb_texture.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 84dfc70..f2b607c 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1434,6 +1434,9 @@ try_pbo_upload_common(struct gl_context *ctx,
  u_upload_data(st->constbuf_uploader, 0, sizeof(constants),
st->ctx->Const.UniformBufferOffsetAlignment,
, _offset, );
+ if (!cb.buffer)
+goto fail_constant_upload;
+
  u_upload_unmap(st->constbuf_uploader);
   } else {
  cb.buffer = NULL;
@@ -1444,6 +1447,8 @@ try_pbo_upload_common(struct gl_context *ctx,
 
   cso_save_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
   cso_set_constant_buffer(st->cso_context, PIPE_SHADER_FRAGMENT, 0, );
+
+  pipe_resource_reference(, NULL);
}
 
/* Framebuffer_state */
@@ -1524,6 +1529,7 @@ try_pbo_upload_common(struct gl_context *ctx,
cso_restore_fragment_shader(st->cso_context);
cso_restore_stream_outputs(st->cso_context);
cso_restore_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
+fail_constant_upload:
cso_restore_vertex_elements(st->cso_context);
cso_restore_aux_vertex_buffer_slot(st->cso_context);
 fail_vertex_upload:
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/4] st/mesa: do uploads earlier in try_pbo_upload_common

2016-02-03 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

While rather unlikely, uploads _can_ fail. Doing them earlier means
we'll have to restore less state when they do fail, and it's slightly
easier to check the restore code.
---
 src/mesa/state_tracker/st_cb_texture.c | 89 +-
 1 file changed, 44 insertions(+), 45 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index e9ac9a6..0f2a4e9 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1356,50 +1356,12 @@ try_pbo_upload_common(struct gl_context *ctx,
   sampler_view = pipe->create_sampler_view(pipe, buffer, );
   if (sampler_view == NULL)
  return false;
-   }
-
-   /* Begin setting state. This is the point of no return. */
-   cso_save_fragment_sampler_views(st->cso_context);
-   cso_set_sampler_views(st->cso_context, PIPE_SHADER_FRAGMENT, 1,
- _view);
-
-   /* Framebuffer_state */
-   {
-  struct pipe_framebuffer_state fb;
-  memset(, 0, sizeof(fb));
-  fb.width = surface->width;
-  fb.height = surface->height;
-  fb.nr_cbufs = 1;
-  pipe_surface_reference([0], surface);
 
-  cso_save_framebuffer(st->cso_context);
-  cso_set_framebuffer(st->cso_context, );
-
-  pipe_surface_reference([0], NULL);
+  cso_save_fragment_sampler_views(st->cso_context);
+  cso_set_sampler_views(st->cso_context, PIPE_SHADER_FRAGMENT, 1,
+_view);
}
 
-   /* Viewport state */
-   {
-  struct pipe_viewport_state vp;
-  vp.scale[0] = 0.5f * surface->width;
-  vp.scale[1] = 0.5f * surface->height;
-  vp.scale[2] = 1.0f;
-  vp.translate[0] = 0.5f * surface->width;
-  vp.translate[1] = 0.5f * surface->height;
-  vp.translate[2] = 0.0f;
-
-  cso_save_viewport(st->cso_context);
-  cso_set_viewport(st->cso_context, );
-   }
-
-   /* Blend state */
-   cso_save_blend(st->cso_context);
-   cso_set_blend(st->cso_context, >pbo_upload.blend);
-
-   /* Rasterizer state */
-   cso_save_rasterizer(st->cso_context);
-   cso_set_rasterizer(st->cso_context, >pbo_upload.raster);
-
/* Upload vertices */
{
   struct pipe_vertex_buffer vbo;
@@ -1477,6 +1439,43 @@ try_pbo_upload_common(struct gl_context *ctx,
   cso_set_constant_buffer(st->cso_context, PIPE_SHADER_FRAGMENT, 0, );
}
 
+   /* Framebuffer_state */
+   {
+  struct pipe_framebuffer_state fb;
+  memset(, 0, sizeof(fb));
+  fb.width = surface->width;
+  fb.height = surface->height;
+  fb.nr_cbufs = 1;
+  pipe_surface_reference([0], surface);
+
+  cso_save_framebuffer(st->cso_context);
+  cso_set_framebuffer(st->cso_context, );
+
+  pipe_surface_reference([0], NULL);
+   }
+
+   /* Viewport state */
+   {
+  struct pipe_viewport_state vp;
+  vp.scale[0] = 0.5f * surface->width;
+  vp.scale[1] = 0.5f * surface->height;
+  vp.scale[2] = 1.0f;
+  vp.translate[0] = 0.5f * surface->width;
+  vp.translate[1] = 0.5f * surface->height;
+  vp.translate[2] = 0.0f;
+
+  cso_save_viewport(st->cso_context);
+  cso_set_viewport(st->cso_context, );
+   }
+
+   /* Blend state */
+   cso_save_blend(st->cso_context);
+   cso_set_blend(st->cso_context, >pbo_upload.blend);
+
+   /* Rasterizer state */
+   cso_save_rasterizer(st->cso_context);
+   cso_set_rasterizer(st->cso_context, >pbo_upload.raster);
+
/* Set up the shaders */
cso_save_vertex_shader(st->cso_context);
cso_set_vertex_shader_handle(st->cso_context, st->pbo_upload.vs);
@@ -1505,20 +1504,20 @@ try_pbo_upload_common(struct gl_context *ctx,
 0, 4, 0, depth);
}
 
-   cso_restore_fragment_sampler_views(st->cso_context);
cso_restore_framebuffer(st->cso_context);
cso_restore_viewport(st->cso_context);
cso_restore_blend(st->cso_context);
cso_restore_rasterizer(st->cso_context);
-   cso_restore_vertex_elements(st->cso_context);
-   cso_restore_aux_vertex_buffer_slot(st->cso_context);
-   cso_restore_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
cso_restore_vertex_shader(st->cso_context);
cso_restore_geometry_shader(st->cso_context);
cso_restore_tessctrl_shader(st->cso_context);
cso_restore_tesseval_shader(st->cso_context);
cso_restore_fragment_shader(st->cso_context);
cso_restore_stream_outputs(st->cso_context);
+   cso_restore_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
+   cso_restore_vertex_elements(st->cso_context);
+   cso_restore_aux_vertex_buffer_slot(st->cso_context);
+   cso_restore_fragment_sampler_views(st->cso_context);
 
pipe_sampler_view_reference(_view, NULL);
 
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/4] st/mesa: reduce the scope of sampler_view in try_pbo_upload_common

2016-02-03 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

We can get rid of our reference immediately, since the driver will hold
onto it for us.
---
 src/mesa/state_tracker/st_cb_texture.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 0f2a4e9..7429711 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1296,7 +1296,6 @@ try_pbo_upload_common(struct gl_context *ctx,
 {
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
-   struct pipe_sampler_view *sampler_view = NULL;
unsigned depth = surface->u.tex.last_layer - surface->u.tex.first_layer + 1;
unsigned skip_pixels = 0;
 
@@ -1337,6 +1336,7 @@ try_pbo_upload_common(struct gl_context *ctx,
   unsigned last_element = buf_offset + skip_pixels + upload_width - 1
  + (upload_height - 1 + (depth - 1) * image_height) * stride;
   struct pipe_sampler_view templ;
+  struct pipe_sampler_view *sampler_view;
 
   /* This should be ensured by Mesa before calling our callbacks */
   assert((last_element + 1) * bytes_per_pixel <= buffer->width0);
@@ -1360,6 +1360,8 @@ try_pbo_upload_common(struct gl_context *ctx,
   cso_save_fragment_sampler_views(st->cso_context);
   cso_set_sampler_views(st->cso_context, PIPE_SHADER_FRAGMENT, 1,
 _view);
+
+  pipe_sampler_view_reference(_view, NULL);
}
 
/* Upload vertices */
@@ -1519,8 +1521,6 @@ try_pbo_upload_common(struct gl_context *ctx,
cso_restore_aux_vertex_buffer_slot(st->cso_context);
cso_restore_fragment_sampler_views(st->cso_context);
 
-   pipe_sampler_view_reference(_view, NULL);
-
return true;
 }
 
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/4] st/mesa: bail out of try_pbo_upload_common when vertex upload fails

2016-02-03 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

At the same time, fix a memory leak noticed by Ilia Mirkin.
---
 src/mesa/state_tracker/st_cb_texture.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 7429711..84dfc70 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1298,6 +1298,7 @@ try_pbo_upload_common(struct gl_context *ctx,
struct pipe_context *pipe = st->pipe;
unsigned depth = surface->u.tex.last_layer - surface->u.tex.first_layer + 1;
unsigned skip_pixels = 0;
+   bool success = false;
 
/* Check alignment. */
{
@@ -1382,6 +1383,8 @@ try_pbo_upload_common(struct gl_context *ctx,
 
   u_upload_alloc(st->uploader, 0, 8 * sizeof(float), 4,
  _offset, , (void **) );
+  if (!verts)
+ goto fail_vertex_upload;
 
   verts[0] = x0;
   verts[1] = y0;
@@ -1405,6 +1408,8 @@ try_pbo_upload_common(struct gl_context *ctx,
   cso_save_aux_vertex_buffer_slot(st->cso_context);
   cso_set_vertex_buffers(st->cso_context, velem.vertex_buffer_index,
  1, );
+
+  pipe_resource_reference(, NULL);
}
 
/* Upload constants */
@@ -1506,6 +1511,8 @@ try_pbo_upload_common(struct gl_context *ctx,
 0, 4, 0, depth);
}
 
+   success = true;
+
cso_restore_framebuffer(st->cso_context);
cso_restore_viewport(st->cso_context);
cso_restore_blend(st->cso_context);
@@ -1519,9 +1526,10 @@ try_pbo_upload_common(struct gl_context *ctx,
cso_restore_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
cso_restore_vertex_elements(st->cso_context);
cso_restore_aux_vertex_buffer_slot(st->cso_context);
+fail_vertex_upload:
cso_restore_fragment_sampler_views(st->cso_context);
 
-   return true;
+   return success;
 }
 
 static bool
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/7] radeonsi: use SPI_SHADER_COL_FORMAT fields instead of export_16bpc

2016-01-19 Thread Nicolai Hähnle


On 19.01.2016 11:11, Marek Olšák wrote:

From: Marek Olšák 

This does change the behavior slightly:
   If a shader writes COLOR[i] and that color buffer isn't bound,
   the shader will export MRT_NULL instead and discard the IR tree that
   calculates the output. The only exception is alpha-to-coverage, which
   requires an alpha export.
---
  src/gallium/drivers/radeon/r600_pipe_common.h   |  1 +
  src/gallium/drivers/radeonsi/si_pipe.h  |  2 +-
  src/gallium/drivers/radeonsi/si_shader.c| 35 --
  src/gallium/drivers/radeonsi/si_shader.h|  2 +-
  src/gallium/drivers/radeonsi/si_state.c | 39 +++-
  src/gallium/drivers/radeonsi/si_state.h |  1 +
  src/gallium/drivers/radeonsi/si_state_shaders.c | 47 -
  7 files changed, 90 insertions(+), 37 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 27f6e98..f3271e2 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -252,6 +252,7 @@ struct r600_surface {
unsigned cb_color_fmask_slice;  /* EG and later */
unsigned cb_color_cmask;/* CB_COLORn_TILE (r600 only) */
unsigned cb_color_mask; /* R600 only */
+   unsigned spi_shader_col_format; /* SI+ */
unsigned sx_ps_downconvert; /* Stoney only */
unsigned sx_blend_opt_epsilon;  /* Stoney only */
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. 
R600 only */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index f83cb02..e2009de 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -125,7 +125,7 @@ struct si_framebuffer {
unsignedlog_samples;
unsignedcb0_is_integer;
unsignedcompressed_cb_mask;
-   unsignedexport_16bpc;
+   unsignedspi_shader_col_format;
unsigneddirty_cbufs;
booldirty_zsbuf;
  };
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 2de7def..266ef6d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1265,7 +1265,7 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
struct lp_build_context *uint =

_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
struct lp_build_context *base = _base->base;
-   unsigned compressed = 0;
+   unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
unsigned chan;

/* XXX: This controls which components of the output
@@ -1286,17 +1286,29 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
args[3] = lp_build_const_int32(base->gallivm, target);

if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+   unsigned col_formats =
+   si_shader_ctx->shader->key.ps.spi_shader_col_format;
int cbuf = target - V_008DFC_SQ_EXP_MRT;

-   if (cbuf >= 0 && cbuf < 8)
-   compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> 
cbuf) & 0x1;
+   assert(cbuf >= 0 && cbuf < 8);
+   spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
}

-   /* Set COMPR flag */
-   args[4] = compressed ? uint->one : uint->zero;
+   args[4] = uint->zero; /* COMPR flag */
+   args[5] = base->undef;
+   args[6] = base->undef;
+   args[7] = base->undef;
+   args[8] = base->undef;
+
+   switch (spi_shader_col_format) {
+   case V_028714_SPI_SHADER_ZERO:
+   args[0] = uint->zero; /* writemask */
+   args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_NULL);
+   break;
+
+   case V_028714_SPI_SHADER_FP16_ABGR:
+   args[4] = uint->one; /* COMPR flag */

-   if (compressed) {
-   /* Pixel shader needs to pack output values before export */
for (chan = 0; chan < 2; chan++) {
LLVMValueRef pack_args[2] = {
values[2 * chan],
@@ -1314,10 +1326,13 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
 packed,
 
LLVMFloatTypeInContext(base->gallivm->context),
 "");
-   args[chan + 7] = base->undef;
}
-   } else
+   break;
+
+   case V_028714_SPI_SHADER_32_ABGR:
memcpy([5], values,

Re: [Mesa-dev] [PATCH 7/7] radeonsi: disable SPI color outputs the shader doesn't write

2016-01-19 Thread Nicolai Hähnle


Apart from two comments on patch #2, the series is

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 19.01.2016 11:11, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

---
  src/gallium/drivers/radeonsi/si_shader.h|  4 
  src/gallium/drivers/radeonsi/si_state_shaders.c | 12 
  2 files changed, 16 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index f49290a..50375e2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -213,6 +213,10 @@ struct si_shader_selector {

/* PS parameters. */
unsigneddb_shader_control;
+   /* Set 0xf or 0x0 (4 bits) per each written output.
+* ANDed with spi_shader_col_format.
+*/
+   unsignedcolors_written_4bit;

/* masks of "get_unique_index" bits */
uint64_toutputs_written;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 80126f2..9daa924 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -646,6 +646,12 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII)
key->ps.color_is_int8 = sctx->framebuffer.color_is_int8;

+   /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't 
enabled). */
+   if (!key->ps.last_cbuf) {
+   key->ps.spi_shader_col_format &= 
sel->colors_written_4bit;
+   key->ps.color_is_int8 &= sel->info.colors_written;
+   }
+
if (rs) {
bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES 
&&
sctx->current_rast_prim <= 
PIPE_PRIM_POLYGON) ||
@@ -830,6 +836,12 @@ static void *si_create_shader_selector(struct pipe_context 
*ctx,
}
sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
break;
+
+   case PIPE_SHADER_FRAGMENT:
+   for (i = 0; i < 8; i++)
+   if (sel->info.colors_written & (1 << i))
+   sel->colors_written_4bit |= 0xf << (4 * i);
+   break;
}

/* DB_SHADER_CONTROL */


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/4] radeonsi: allow tessellation on CU1 and ES on CU0

2016-01-20 Thread Nicolai Hähnle


On 20.01.2016 19:20, Marek Olšák wrote:

On Wed, Jan 20, 2016 at 6:30 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

On 19.01.2016 20:20, Marek Olšák wrote:


From: Marek Olšák <marek.ol...@amd.com>

We don't use on-chip GS, so it's not required to reserve CU1 for ES.



Why is a deadlock not possible with an off-chip GS ring?


It's about LDS.

>

On-chip GS uses LDS instead of the rings, which can cause a PS
deadlock just like LS.

We could allow all shaders to run on all CUs if the kernel programmed
SPI_RESOURCE_RESERVE*_CU0 and *_CU1 slightly differently. Those
registers allow reserving a half of LDS for certain stages. For
example, reserving it for PS would mean that PS could use all of CU0
LDS, but LS could only use a half. This is best for chips with only a
few CUs.


Thanks, I got it now. Patch 3 also gets my R-b

Nicolai





Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: fix discard-only fragment shaders (v2)

2016-01-20 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

When a fragment shader is used that has no outputs but does conditional
discard (KILL_IF), all fragments are killed without this patch.

By comparing various register settings, my conclusion is that the exec mask
is either not properly forwarded to the DB by NULL exports or ends up being
unused, at least when there is _only_ a NULL export (the ISA documentation
claims that NULL exports can be used to override a previously exported exec
mask).

Of the various approaches I have tried to work around the problem, this one
seems to be the least invasive one.

v2: take discard by alpha test into account as well

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93761
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 8ff70b4..807c9e2 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -393,6 +393,7 @@ static void si_shader_ps(struct si_shader *shader)
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
uint64_t va;
bool has_centroid;
+   bool writes_execmask;
 
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 
@@ -452,10 +453,13 @@ static void si_shader_ps(struct si_shader *shader)
si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
 
+   writes_execmask = info->uses_kill ||
+ shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS;
si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT,
   info->writes_samplemask ? V_028710_SPI_SHADER_32_ABGR :
   info->writes_stencil ? V_028710_SPI_SHADER_32_GR :
   info->writes_z ? V_028710_SPI_SHADER_32_R :
+  (writes_execmask && !info->num_outputs) ? 
V_028710_SPI_SHADER_32_R :
   V_028710_SPI_SHADER_ZERO);
 
si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, 
spi_shader_col_format);
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: fix discard-only fragment shaders (11.1 version)

2016-01-19 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

When a fragment shader is used that has no outputs but does conditional
discard (KILL_IF), all fragments are killed without this patch.

By comparing various register settings, my conclusion is that the exec mask
is either not properly forwarded to the DB by NULL exports or ends up being
unused, at least when there is _only_ a NULL export (the ISA documentation
claims that NULL exports can be used to override a previously exported exec
mask).

Of the various approaches I have tried to work around the problem, this one
seems to be the least invasive one.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93761
Cc: "11.0 11.1" <mesa-sta...@lists.freedesktop.org>
---
 src/gallium/drivers/radeonsi/si_shader.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 1baa2eb..1c1aaa0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2291,6 +2291,9 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
last_args[6]= uint->zero;
last_args[7]= uint->zero;
last_args[8]= uint->zero;
+
+   if (info->uses_kill)
+   si_shader_ctx->shader->spi_shader_z_format = 
V_028710_SPI_SHADER_32_R;
}
 
/* Specify whether the EXEC mask represents the valid mask */
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: fix discard-only fragment shaders

2016-01-19 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

When a fragment shader is used that has no outputs but does conditional
discard (KILL_IF), all fragments are killed without this patch.

By comparing various register settings, my conclusion is that the exec mask
is either not properly forwarded to the DB by NULL exports or ends up being
unused, at least when there is _only_ a NULL export (the ISA documentation
claims that NULL exports can be used to override a previously exported exec
mask).

Of the various approaches I have tried to work around the problem, this one
seems to be the least invasive one.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93761
---
I've condensed the bug reporter's test case to a Piglit test that I will
send out soon.

This should probably go to stable as well, but won't apply as is because of
the many recent changes to fragment shader export handling. I will follow
this mail up with a version that applies to 11.1 and that I've confirmed
fixes the problem there as well.

 src/gallium/drivers/radeonsi/si_state_shaders.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 8ff70b4..0f0cbb2 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -456,6 +456,7 @@ static void si_shader_ps(struct si_shader *shader)
   info->writes_samplemask ? V_028710_SPI_SHADER_32_ABGR :
   info->writes_stencil ? V_028710_SPI_SHADER_32_GR :
   info->writes_z ? V_028710_SPI_SHADER_32_R :
+  (info->uses_kill && !info->num_outputs) ? 
V_028710_SPI_SHADER_32_R :
   V_028710_SPI_SHADER_ZERO);
 
si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, 
spi_shader_col_format);
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] mesa: Deal with size differences between GLuint and GLhandleARB in GetAttachedObjectsARB

2016-01-21 Thread Nicolai Hähnle


Wow... did you actually run into that crash?

On 20.01.2016 20:14, Jeremy Huddleston Sequoia wrote:

Signed-off-by: Jeremy Huddleston Sequoia 
---
  src/mesa/main/shaderapi.c | 12 +++-
  1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index a988f41..75fc17c 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1374,10 +1374,20 @@ _mesa_DetachShader(GLuint program, GLuint shader)

  void GLAPIENTRY
  _mesa_GetAttachedObjectsARB(GLhandleARB container, GLsizei maxCount,
-GLsizei * count, GLhandleARB * obj)
+GLsizei * count, GLhandleARB * objARB)
  {
+   int i;
+   GLuint *obj = calloc(maxCount, sizeof(GLuint));
+   assert(obj);
+


Is there a precedent for using assert in this way? It feels wrong to me, 
better set GL_OUT_OF_MEMORY.



 GET_CURRENT_CONTEXT(ctx);
 get_attached_shaders(ctx, container, maxCount, count, obj);
+
+   for (i=0 ; i < *count; i++) {
+  objARB[i] = (GLhandleARB)obj[i];


Since this can only ever be a widening of the type, you don't really 
need the cast here.


Nicolai


+   }
+
+   free(obj);
  }




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] mesa: Fix format warnings

2016-01-21 Thread Nicolai Hähnle


Patches 1 & 2 are

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 20.01.2016 20:14, Jeremy Huddleston Sequoia wrote:

main/shaderapi.c:1318:51: warning: format specifies type 'unsigned int' but the 
argument has type 'GLhandleARB' (aka 'unsigned long') [-Wformat]
   _mesa_debug(ctx, "glDeleteObjectARB(%u)\n", obj);
   ~~  ^~~
   %lu

Signed-off-by: Jeremy Huddleston Sequoia <jerem...@apple.com>
---
  src/mesa/main/shaderapi.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 9512e3b..a988f41 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1315,7 +1315,7 @@ _mesa_DeleteObjectARB(GLhandleARB obj)
  {
 if (MESA_VERBOSE & VERBOSE_API) {
GET_CURRENT_CONTEXT(ctx);
-  _mesa_debug(ctx, "glDeleteObjectARB(%u)\n", obj);
+  _mesa_debug(ctx, "glDeleteObjectARB(%lu)\n", (unsigned long)obj);
 }

 if (obj) {


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] r600g: don't leak driver const buffers

2016-01-21 Thread Nicolai Hähnle


On 20.01.2016 18:52, Grazvydas Ignotas wrote:

The buffers are referenced from r600_update_driver_const_buffers()
  -> r600_set_constant_buffer() -> u_upload_data(), but nothing
ever releases the reference. Similar case with driver_consts.
Found using valgrind.

Signed-off-by: Grazvydas Ignotas <nota...@gmail.com>
---
No commit access, if the patch is ok someone please commit it.


LGTM.

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

This should probably also be

Cc: <mesa-sta...@lists.freedesktop.org>



  src/gallium/drivers/r600/r600_pipe.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 08fdd36..8abd602 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -68,6 +68,7 @@ static const struct debug_named_value r600_debug_options[] = {
  static void r600_destroy_context(struct pipe_context *context)
  {
struct r600_context *rctx = (struct r600_context *)context;
+   unsigned sh;

r600_isa_destroy(rctx->isa);

@@ -76,6 +77,11 @@ static void r600_destroy_context(struct pipe_context 
*context)
pipe_resource_reference((struct pipe_resource**)>dummy_cmask, 
NULL);
pipe_resource_reference((struct pipe_resource**)>dummy_fmask, 
NULL);

+   for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
+   rctx->b.b.set_constant_buffer(>b.b, sh, 
R600_BUFFER_INFO_CONST_BUFFER, NULL);
+   free(rctx->driver_consts[sh].constants);
+   }
+
if (rctx->fixed_func_tcs_shader)
rctx->b.b.delete_tcs_state(>b.b, 
rctx->fixed_func_tcs_shader);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] mesa: Deal with size differences between GLuint and GLhandleARB in GetAttachedObjectsARB

2016-01-21 Thread Nicolai Hähnle


On 21.01.2016 11:34, Jeremy Huddleston Sequoia wrote:



On Jan 21, 2016, at 07:51, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

Wow... did you actually run into that crash?


No.  I was just paying attention to compiler warnings ;)


I'm glad someone does ;)



On 20.01.2016 20:14, Jeremy Huddleston Sequoia wrote:

Signed-off-by: Jeremy Huddleston Sequoia <jerem...@apple.com>
---
  src/mesa/main/shaderapi.c | 12 +++-
  1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index a988f41..75fc17c 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1374,10 +1374,20 @@ _mesa_DetachShader(GLuint program, GLuint shader)

  void GLAPIENTRY
  _mesa_GetAttachedObjectsARB(GLhandleARB container, GLsizei maxCount,
-GLsizei * count, GLhandleARB * obj)
+GLsizei * count, GLhandleARB * objARB)
  {
+   int i;
+   GLuint *obj = calloc(maxCount, sizeof(GLuint));
+   assert(obj);
+


Is there a precedent for using assert in this way? It feels wrong to me, better 
set GL_OUT_OF_MEMORY.


Yes, I was just following behavior from elsewhere in the same source file.  In 
read_shader(), we have:
buffer = malloc(shader_size);
assert(buffer);
I'm happy to change that to the following if you think it more appropriate:

GLuint *obj;
GET_CURRENT_CONTEXT(ctx);

obj = calloc(maxCount, sizeof(GLuint));
if (!obj) {
 _mesa_error(ctx, GL_OUT_OF_MEMORY, "glShaderSourceARB");
 return;
}
...


Yes, I'd appreciate that (with the correct function name of course ;)).






 GET_CURRENT_CONTEXT(ctx);
 get_attached_shaders(ctx, container, maxCount, count, obj);
+
+   for (i=0 ; i < *count; i++) {
+  objARB[i] = (GLhandleARB)obj[i];


Since this can only ever be a widening of the type, you don't really need the 
cast here.


True.


With those changes, you can add my R-b to this patch as well.

Cheers,
Nicolai





Nicolai


+   }
+
+   free(obj);
  }






___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 9/9] gallium/radeon: support PIPE_CAP_SURFACE_REINTERPRET_BLOCKS

2016-01-21 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This is already used internally in si_resource_copy_region for compressed
textures, so the only real change here is the adjusted surface size
computation.

Reviewed-by: Marek Olšák <marek.ol...@amd.com>
---
 src/gallium/drivers/r600/r600_pipe.c  |  2 +-
 src/gallium/drivers/radeon/r600_texture.c | 26 +++---
 src/gallium/drivers/radeonsi/si_pipe.c|  2 +-
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 17140bb..f519916 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -279,6 +279,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_INVALIDATE_BUFFER:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
return 1;
 
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
@@ -360,7 +361,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_GENERATE_MIPMAP:
-   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
return 0;
 
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 7c4717d..aec7e32 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1212,10 +1212,30 @@ static struct pipe_surface *r600_create_surface(struct 
pipe_context *pipe,
const struct pipe_surface 
*templ)
 {
unsigned level = templ->u.tex.level;
+   unsigned width = u_minify(tex->width0, level);
+   unsigned height = u_minify(tex->height0, level);
+
+   if (templ->format != tex->format) {
+   const struct util_format_description *tex_desc
+   = util_format_description(tex->format);
+   const struct util_format_description *templ_desc
+   = util_format_description(templ->format);
+
+   assert(tex_desc->block.bits == templ_desc->block.bits);
+
+   /* Adjust size of surface if and only if the block width or
+* height is changed. */
+   if (tex_desc->block.width != templ_desc->block.width ||
+   tex_desc->block.height != templ_desc->block.height) {
+   unsigned nblks_x = 
util_format_get_nblocksx(tex->format, width);
+   unsigned nblks_y = 
util_format_get_nblocksy(tex->format, height);
+
+   width = nblks_x * templ_desc->block.width;
+   height = nblks_y * templ_desc->block.height;
+   }
+   }
 
-   return r600_create_surface_custom(pipe, tex, templ,
- u_minify(tex->width0, level),
- u_minify(tex->height0, level));
+   return r600_create_surface_custom(pipe, tex, templ, width, height);
 }
 
 static void r600_surface_destroy(struct pipe_context *pipe,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 04e9d60..c50c707 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -304,6 +304,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_INVALIDATE_BUFFER:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
return 1;
 
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
@@ -352,7 +353,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_GENERATE_MIPMAP:
-   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
return 0;
 
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/3] Misc GLhandleARB GLuint cleanup

2016-01-21 Thread Nicolai Hähnle


On 20.01.2016 20:21, Jeremy Huddleston Sequoia wrote:

This series includes 3 changes related to help reconcile the differences in 
declaration between Apple's vs Mesa's GLhandleARB type.  The first, I expect to 
be mostly uncontroversial as they are mainly cleaning up and correcting some 
prototypes.  The second is just a casting which should be fine.  The last adds 
a bit of overhead to mesa_GetAttachedObjectsARB because we need to walk the 
returned array of GLuint and copy into the array of GLhandleARB.  I expect this 
last one will be a bit unwelcome on platforms where these two types are the 
same.

Would it be best to have a configure check for this and use it here (and maybe 
elsewhere in the future), or would you prefer to just use a platform 
preprocessing check for this since I suspect darwin is (and will only ever be) 
the only impacted platform.


It's not like one would expect programs to call GetAttachedObjectsARB in 
a fast path. I think the approach of patch 3 is fine (and actually 
better, because it has less potential for future breakage), except for 
the comments I sent you.


Cheers,
Nicolai



--Jeremy



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: Add option for SI scheduler

2016-01-21 Thread Nicolai Hähnle


On 21.01.2016 12:35, Axel Davy wrote:

Add a debug option to select the LLVM SI Machine Scheduler.
R600_DEBUG=sisched

Signed-off-by: Axel Davy <axel.d...@ens.fr>
---
The corresponding llvm patch is on llvm master,
and should land soon for 3.8 branch


I'd like to wait with pushing this until it has landed. Once it has,

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


  src/gallium/drivers/radeon/r600_pipe_common.c | 1 +
  src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
  src/gallium/drivers/radeonsi/si_pipe.c| 6 +-
  3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index e926f56..a9ce7b1 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -389,6 +389,7 @@ static const struct debug_named_value 
common_debug_options[] = {
{ "nodcc", DBG_NO_DCC, "Disable DCC." },
{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
{ "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
+   { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction 
Scheduler." },

DEBUG_NAMED_VALUE_END /* must be last */
  };
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 27f6e98..3020421 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -87,6 +87,7 @@
  #define DBG_NO_DCC(1llu << 43)
  #define DBG_NO_DCC_CLEAR  (1llu << 44)
  #define DBG_NO_RB_PLUS(1llu << 45)
+#define DBG_SI_SCHED   (1llu << 46)

  #define R600_MAP_BUFFER_ALIGNMENT 64

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index f6ff4a8..51bcba7 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -215,7 +215,11 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
r600_target = radeon_llvm_get_r600_target(triple);
sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
   
r600_get_llvm_processor_name(sscreen->b.family),
-  "+DumpCode,+vgpr-spilling",
+#if HAVE_LLVM >= 0x0308
+  sscreen->b.debug_flags & 
DBG_SI_SCHED ?
+   
"+DumpCode,+vgpr-spilling,+si-scheduler" :
+#endif
+   "+DumpCode,+vgpr-spilling",
   LLVMCodeGenLevelDefault,
   LLVMRelocDefault,
   LLVMCodeModelDefault);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] r600: Typos and whitespace fixes

2016-01-21 Thread Nicolai Hähnle




On 21.01.2016 11:17, Jan Vesely wrote:

Signed-off-by: Jan Vesely <jan.ves...@rutgers.edu>
---

Just some random stuff to make vim scream less

  src/gallium/drivers/r600/evergreen_compute.c | 6 +++---
  src/gallium/drivers/r600/evergreen_state.c   | 4 ++--
  src/gallium/drivers/r600/r600_asm.h  | 4 ++--
  3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index f50ddae..53799d3 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -719,7 +719,7 @@ static void evergreen_set_global_binding(
   * command stream by the start_cs_cmd atom.  However, since the 
SET_CONTEXT_REG
   * packet requires that the shader type bit be set, we must initialize all
   * context registers needed for compute in this function.  The registers
- * intialized by the start_cs_cmd atom can be found in evereen_state.c in the
+ * initialized by the start_cs_cmd atom can be found in evergreen_state.c in 
the
   * functions evergreen_init_atom_start_cs or cayman_init_atom_start_cs 
depending
   * on the GPU family.
   */
@@ -729,7 +729,7 @@ void evergreen_init_atom_start_compute_cs(struct 
r600_context *ctx)
int num_threads;
int num_stack_entries;

-   /* since all required registers are initialised in the
+   /* since all required registers are initialized in the


Someone once wrote that the predominant variant of English in the Linux 
kernel is Incorrect English, followed by American, British, and Indian.


Similarly, I don't think we're following a consistent style in Mesa. 
Might be best for your sanity to tell your editor to be a bit more 
lenient ;)


In any case,

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


 * start_compute_cs_cmd atom, we can EMIT_EARLY here.
 */
r600_init_command_buffer(cb, 256);
@@ -814,7 +814,7 @@ void evergreen_init_atom_start_compute_cs(struct 
r600_context *ctx)
 * R_008E28_SQ_STATIC_THREAD_MGMT3
 */

-   /* XXX: We may need to adjust the thread and stack resouce
+   /* XXX: We may need to adjust the thread and stack resource
 * values for 3D/compute interop */

r600_store_config_reg_seq(cb, 
R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5);
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 9dfb849..f4e10cf 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3670,9 +3670,9 @@ void evergreen_init_state_functions(struct r600_context 
*rctx)
unsigned id = 1;
unsigned i;
/* !!!
-*  To avoid GPU lockup registers must be emited in a specific order
+*  To avoid GPU lockup registers must be emitted in a specific order
 * (no kidding ...). The order below is important and have been
-* partialy infered from analyzing fglrx command stream.
+* partially inferred from analyzing fglrx command stream.
 *
 * Don't reorder atom without carefully checking the effect (GPU lockup
 * or piglit regression).
diff --git a/src/gallium/drivers/r600/r600_asm.h 
b/src/gallium/drivers/r600/r600_asm.h
index 0b78290..1629399 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -245,8 +245,8 @@ struct r600_bytecode {
unsignedar_chan;
unsignedar_handling;
unsignedr6xx_nop_after_rel_dst;
-   boolindex_loaded[2];
-   unsignedindex_reg[2]; /* indexing register CF_INDEX_[01] */
+   boolindex_loaded[2];
+   unsignedindex_reg[2]; /* indexing register CF_INDEX_[01] */
unsigneddebug_id;
struct r600_isa* isa;
  };


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/7] radeonsi: replace use of is_gs_copy_shader in si_shader_vs

2016-01-22 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

We now have an explicit parameter that contains the same information, and
this will allow us to get rid of is_gs_copy_shader in the si_shader struct.
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index a7259a7..36174eb 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -339,7 +339,7 @@ static void si_shader_vs(struct si_shader *shader, struct 
si_shader *gs)
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, 
RADEON_PRIO_USER_SHADER);
 
-   if (shader->is_gs_copy_shader) {
+   if (gs) {
vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_VERTEX) {
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 7/7] radeonsi: emit rw_buffers for tes_shader only if tes_shader present

2016-01-22 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

---
 src/gallium/drivers/radeonsi/si_descriptors.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index d157a9f..aad836d 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -978,9 +978,11 @@ void si_emit_shader_userdata(struct si_context *sctx, 
struct r600_atom *atom)
si_emit_shader_pointer(sctx, >const_buffers[i].desc, 
vs_base, true);
si_emit_shader_pointer(sctx, >rw_buffers[i].desc, 
vs_base, true);
 
-   /* The TESSEVAL shader needs this for the ESGS ring buffer. */
-   si_emit_shader_pointer(sctx, >rw_buffers[i].desc,
-  R_00B330_SPI_SHADER_USER_DATA_ES_0, 
true);
+   if (sctx->tes_shader.cso) {
+   /* The TESSEVAL shader needs this for the ESGS ring 
buffer. */
+   si_emit_shader_pointer(sctx, >rw_buffers[i].desc,
+  
R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
+   }
} else if (sctx->tes_shader.cso) {
/* The TESSEVAL shader needs this for streamout. */
si_emit_shader_pointer(sctx, 
>rw_buffers[PIPE_SHADER_VERTEX].desc,
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/7] radeonsi: geometry shader bug fix and cleanup

2016-01-22 Thread Nicolai Hähnle

Hi,

this series was prompted by a rendering bug reported for Dolphin. The bug is
fixed in the first two patches, and the remainder is assorted cleanups that
I noticed while working on the fix. Please review.

Thanks,
Nicolai
--
 .../drivers/radeonsi/si_descriptors.c|  8 +-
 src/gallium/drivers/radeonsi/si_shader.c | 14 ++--
 src/gallium/drivers/radeonsi/si_shader.h |  1 -
 .../drivers/radeonsi/si_state_shaders.c  | 78 +++---
 4 files changed, 62 insertions(+), 39 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/7] radeonsi: ensure that VGT_GS_MODE is sent when necessary

2016-01-22 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Specifically, when the API switches from using a GS to not using a GS and then
back to using the same GS again, we do not have to re-send all the GS state,
but we do have to send VGT_GS_MODE. So make VGT_GS_MODE consistently be a part
of the VS state.

This fixes a rendering bug in Dolphin, but surely other applications are
affected as well.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93648
Cc: "11.0 11.1" <mesa-sta...@lists.freedesktop.org>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 29 ++---
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 096f152..a7259a7 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -298,7 +298,14 @@ static void si_shader_gs(struct si_shader *shader)
   
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
 }
 
-static void si_shader_vs(struct si_shader *shader)
+/**
+ * Compute the state for \p shader, which will run as a vertex shader on the
+ * hardware.
+ *
+ * If \p gs is non-NULL, it points to the geometry shader for which this shader
+ * is the copy shader.
+ */
+static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
 {
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
@@ -313,15 +320,21 @@ static void si_shader_vs(struct si_shader *shader)
if (!pm4)
return;
 
-   /* If this is the GS copy shader, the GS state writes this register.
-* Otherwise, the VS state writes it.
+   /* We always write VGT_GS_MODE in the VS state, because every switch
+* between different shader pipelines involving a different GS or no
+* GS at all involves a switch of the VS (different GS use different
+* copy shaders). On the other hand, when the API switches from a GS to
+* no GS and then back to the same GS used originally, the GS state is
+* not sent again.
 */
-   if (!shader->is_gs_copy_shader) {
+   if (!gs) {
si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
   S_028A40_MODE(enable_prim_id ? 
V_028A40_GS_SCENARIO_A : 0));
si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 
enable_prim_id);
-   } else
+   } else {
+   si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(gs));
si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0);
+   }
 
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, 
RADEON_PRIO_USER_SHADER);
@@ -547,7 +560,7 @@ static void si_shader_init_pm4_state(struct si_shader 
*shader)
else if (shader->key.vs.as_es)
si_shader_es(shader);
else
-   si_shader_vs(shader);
+   si_shader_vs(shader, NULL);
break;
case PIPE_SHADER_TESS_CTRL:
si_shader_hs(shader);
@@ -556,11 +569,11 @@ static void si_shader_init_pm4_state(struct si_shader 
*shader)
if (shader->key.tes.as_es)
si_shader_es(shader);
else
-   si_shader_vs(shader);
+   si_shader_vs(shader, NULL);
break;
case PIPE_SHADER_GEOMETRY:
si_shader_gs(shader);
-   si_shader_vs(shader->gs_copy_shader);
+   si_shader_vs(shader->gs_copy_shader, shader);
break;
case PIPE_SHADER_FRAGMENT:
si_shader_ps(shader);
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/7] radeonsi: extract the VGT_GS_MODE calculation into its own function

2016-01-22 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Cc: "11.0 11.1" <mesa-sta...@lists.freedesktop.org>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 47 +++--
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 79f2335..096f152 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -212,13 +212,37 @@ static void si_shader_es(struct si_shader *shader)
si_set_tesseval_regs(shader, pm4);
 }
 
+/**
+ * Calculate the appropriate setting of VGT_GS_MODE when \p shader is a
+ * geometry shader.
+ */
+static uint32_t si_vgt_gs_mode(struct si_shader *shader)
+{
+   unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
+   unsigned cut_mode;
+
+   if (gs_max_vert_out <= 128) {
+   cut_mode = V_028A40_GS_CUT_128;
+   } else if (gs_max_vert_out <= 256) {
+   cut_mode = V_028A40_GS_CUT_256;
+   } else if (gs_max_vert_out <= 512) {
+   cut_mode = V_028A40_GS_CUT_512;
+   } else {
+   assert(gs_max_vert_out <= 1024);
+   cut_mode = V_028A40_GS_CUT_1024;
+   }
+
+   return S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
+  S_028A40_CUT_MODE(cut_mode)|
+  S_028A40_ES_WRITE_OPTIMIZE(1) |
+  S_028A40_GS_WRITE_OPTIMIZE(1);
+}
+
 static void si_shader_gs(struct si_shader *shader)
 {
unsigned gs_vert_itemsize = shader->selector->gsvs_vertex_size;
-   unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
unsigned gs_num_invocations = shader->selector->gs_num_invocations;
-   unsigned cut_mode;
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
uint64_t va;
@@ -232,22 +256,7 @@ static void si_shader_gs(struct si_shader *shader)
if (!pm4)
return;
 
-   if (gs_max_vert_out <= 128) {
-   cut_mode = V_028A40_GS_CUT_128;
-   } else if (gs_max_vert_out <= 256) {
-   cut_mode = V_028A40_GS_CUT_256;
-   } else if (gs_max_vert_out <= 512) {
-   cut_mode = V_028A40_GS_CUT_512;
-   } else {
-   assert(gs_max_vert_out <= 1024);
-   cut_mode = V_028A40_GS_CUT_1024;
-   }
-
-   si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
-  S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
-  S_028A40_CUT_MODE(cut_mode)|
-  S_028A40_ES_WRITE_OPTIMIZE(1) |
-  S_028A40_GS_WRITE_OPTIMIZE(1));
+   si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(shader));
 
si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, gsvs_itemsize);
si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize * 
((max_stream >= 2) ? 2 : 1));
@@ -255,7 +264,7 @@ static void si_shader_gs(struct si_shader *shader)
 
si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * 
(max_stream + 1));
 
-   si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
+   si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, 
shader->selector->gs_max_out_vertices);
 
si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize >> 
2);
si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, (max_stream >= 1) 
? gs_vert_itemsize >> 2 : 0);
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/7] radeonsi: do not set the shader->key for gs copy shaders

2016-01-22 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

The key for a geometry shader would be interpreted as the key for a vertex
shader further down the line, which really doesn't make sense.

This does not affect the contents of shader->key because geometry shaders
don't have any key entries anyway.
---
 src/gallium/drivers/radeonsi/si_shader.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index a4bc1de..5e0c2a8 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4360,7 +4360,6 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
shader->gs_copy_shader = CALLOC_STRUCT(si_shader);
shader->gs_copy_shader->selector = shader->selector;
-   shader->gs_copy_shader->key = shader->key;
si_shader_ctx.shader = shader->gs_copy_shader;
if ((r = si_generate_gs_copy_shader(sscreen, _shader_ctx,
shader, dump, debug))) {
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/7] radeonsi: si_llvm_emit_vs_epilogue is never used with gs copy shaders

2016-01-22 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Hence remove the misleading branch on is_gs_copy_shader.
---
 src/gallium/drivers/radeonsi/si_shader.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 6cc435c..a4bc1de 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2152,6 +2152,8 @@ static void si_llvm_emit_vs_epilogue(struct 
lp_build_tgsi_context * bld_base)
struct si_shader_output_values *outputs = NULL;
int i,j;
 
+   assert(!si_shader_ctx->is_gs_copy_shader);
+
outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
 
/* Vertex color clamping.
@@ -2160,8 +2162,7 @@ static void si_llvm_emit_vs_epilogue(struct 
lp_build_tgsi_context * bld_base)
 * an IF statement is added that clamps all colors if the constant
 * is true.
 */
-   if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
-   !si_shader_ctx->is_gs_copy_shader) {
+   if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
struct lp_build_if_state if_ctx;
LLVMValueRef cond = NULL;
LLVMValueRef addr, val;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/7] radeonsi: move is_gs_copy_shader to si_shader_context

2016-01-22 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

It is only used during shader creation now, so no need to keep it around
afterwards.
---
 src/gallium/drivers/radeonsi/si_shader.c | 10 +-
 src/gallium/drivers/radeonsi/si_shader.h |  1 -
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 1bd617f..6cc435c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -68,6 +68,7 @@ struct si_shader_context
struct si_shader *shader;
struct si_screen *screen;
unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
+   bool is_gs_copy_shader;
int param_streamout_config;
int param_streamout_write_index;
int param_streamout_offset[4];
@@ -2160,7 +2161,7 @@ static void si_llvm_emit_vs_epilogue(struct 
lp_build_tgsi_context * bld_base)
 * is true.
 */
if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
-   !si_shader_ctx->shader->is_gs_copy_shader) {
+   !si_shader_ctx->is_gs_copy_shader) {
struct lp_build_if_state if_ctx;
LLVMValueRef cond = NULL;
LLVMValueRef addr, val;
@@ -3554,7 +3555,7 @@ static void create_function(struct si_shader_context 
*si_shader_ctx)
params[SI_PARAM_LS_OUT_LAYOUT] = i32;
num_params = SI_PARAM_LS_OUT_LAYOUT+1;
} else {
-   if (shader->is_gs_copy_shader) {
+   if (si_shader_ctx->is_gs_copy_shader) {
last_array_pointer = SI_PARAM_CONST_BUFFERS;
num_params = SI_PARAM_CONST_BUFFERS+1;
} else {
@@ -3827,7 +3828,7 @@ static void preload_ring_buffers(struct si_shader_context 
*si_shader_ctx)
build_indexed_load_const(si_shader_ctx, buf_ptr, 
offset);
}
 
-   if (si_shader_ctx->shader->is_gs_copy_shader) {
+   if (si_shader_ctx->is_gs_copy_shader) {
LLVMValueRef offset = lp_build_const_int32(gallivm, 
SI_RING_GSVS);
 
si_shader_ctx->gsvs_ring[0] =
@@ -4075,7 +4076,6 @@ static int si_generate_gs_copy_shader(struct si_screen 
*sscreen,
struct lp_build_tgsi_context *bld_base = 
_shader_ctx->radeon_bld.soa.bld_base;
struct lp_build_context *base = _base->base;
struct lp_build_context *uint = _base->uint_bld;
-   struct si_shader *shader = si_shader_ctx->shader;
struct si_shader_output_values *outputs;
struct tgsi_shader_info *gsinfo = >selector->info;
LLVMValueRef args[9];
@@ -4084,7 +4084,7 @@ static int si_generate_gs_copy_shader(struct si_screen 
*sscreen,
outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
 
si_shader_ctx->type = TGSI_PROCESSOR_VERTEX;
-   shader->is_gs_copy_shader = true;
+   si_shader_ctx->is_gs_copy_shader = true;
 
radeon_llvm_context_init(_shader_ctx->radeon_bld);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 50375e2..c151207 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -297,7 +297,6 @@ struct si_shader {
booluses_instanceid;
unsignednr_pos_exports;
unsignednr_param_exports;
-   boolis_gs_copy_shader;
booldx10_clamp_mode; /* convert NaNs to 0 */
 };
 
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] util/ralloc: Remove double zero'ing of rzalloc buffers

2016-01-22 Thread Nicolai Hähnle


On 22.01.2016 16:02, Kenneth Graunke wrote:

On Friday, January 22, 2016 12:09:18 PM PST Nicolai Hähnle wrote:

On 22.01.2016 02:53, Jordan Justen wrote:

Juha-Pekka found this back in May 2015:
<1430915727-28677-1-git-send-email-juhapekka.heikk...@gmail.com>

  From the discussion, obviously it would be preferable to make
ralloc_size no longer return zeroed memory, but Juha-Pekka found that
it would break Mesa.

For now, let's point out the flaw, and stop doing the double zeroing
of rzalloc buffers.

Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com>
Cc: Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com>
Cc: Kenneth Graunke <kenn...@whitecape.org>
---

   For a release build, I saw the code size shrink by 64 bytes.

   src/util/ralloc.c | 15 +--
   1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/util/ralloc.c b/src/util/ralloc.c
index 6d4032b..24c1eee 100644
--- a/src/util/ralloc.c
+++ b/src/util/ralloc.c
@@ -49,6 +49,14 @@ _CRTIMP int _vscprintf(const char *format, va_list

argptr);

   #endif
   #endif

+/* ralloc_size has always used calloc to allocate memory. This has

allowed

+ * code using ralloc_size to depend on always receiving a cleared buffer.
+ *
+ * FIXME: Clean up the code base to allow this to be set to false, and

then

+ * remove it altogether.
+ */
+static const bool always_allocate_zeroed_memory = true;
+
   #define CANARY 0x5A1106

   struct ralloc_header
@@ -110,7 +118,10 @@ ralloc_context(const void *ctx)
   void *
   ralloc_size(const void *ctx, size_t size)
   {
-   void *block = calloc(1, size + sizeof(ralloc_header));
+   void *block =
+  always_allocate_zeroed_memory ?
+  calloc(1, size + sizeof(ralloc_header)) :
+  malloc(size + sizeof(ralloc_header));


There's an integer overflow here which would be good to fix.


Please explain?  ralloc_header is 40-44 bytes - the only way this will
overflow is if you asked for an absurd amount of memory (already near
the max value of size_t).  And, if you did, I'm not sure what we're
supposed to do about it...


A common method of triggering buffer overflows leading to security 
exploits is that the attacker sets an absurdly large buffer size 
somewhere - so large that additional calculations that increase the 
buffer size wrap around and result in a very small successful 
allocation. The code will then write memory based on the original, 
absurdly large buffer size. This means writing beyond the end of the 
allocated buffer.


The people who look for security exploits for a living are really good 
at finding ways in which such a situation can then be used to hijack the 
control flow of the program somehow (for example, targeted overwriting 
of the internal metadata of the heap... sounds crazy, but it's used) to 
do more or less whatever they want.


I think a decent way to protect against it is something like:

   /* Overflow of unsigned integer types is well defined */
   if (size + sizeof(ralloc_header) < size)
  return NULL;

or perhaps

   if (size > SIZE_MAX - sizeof(ralloc_header))
  return NULL;

Newer versions of gcc have nicer builtins for arithmetic with overflow 
check, but I don't know if we want to depend on those being available.


The hope is that the calling code properly handles allocation failure. 
Even if it doesn't, the result is much more likely to just be a segfault 
before anything dangerous can happen.


One may say that all of this depends on an attacker gaining access to 
Mesa, but WebGL is a thing, so...


Cheers,
Nicolai




Since it was there already in the older version, the patch is

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

as is.


  ralloc_header *info;
  ralloc_header *parent;

@@ -132,7 +143,7 @@ void *
   rzalloc_size(const void *ctx, size_t size)
   {
  void *ptr = ralloc_size(ctx, size);
-   if (likely(ptr != NULL))
+   if (!always_allocate_zeroed_memory && likely(ptr != NULL))
 memset(ptr, 0, size);
  return ptr;
   }





Dropping the memset seems reasonable.  I would prefer it if we simply
moved the contents of ralloc_size into rzalloc_size, and made
ralloc_size call rzalloc_size with a comment.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] gallium r300 driver for PowerPC

2016-01-25 Thread Nicolai Hähnle


On 24.01.2016 20:56, Herminio Hernandez, Jr. wrote:

So I believe I have all the debugging symbols installed. From what I am seeing 
in gdb and valgrind I am still thinking the issue is in the glx branch. For gdb 
I ran it twice and stopped it during it attempt to load the r300 driver and in 
it attempt to load the swrast driver. Both failed at the same place in the 
trace see below. Some is breaking when dlopen tries to load the driver. Just 
want to verify that I am looking at the right thing. Thanks!

Herminio

Starting program: /usr/bin/glxgears
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/powerpc-linux-gnu/libthread_db.so.1".
libGL: OpenDriver: trying /usr/lib/powerpc-linux-gnu/dri/tls/r300_dri.so
libGL: OpenDriver: trying /usr/lib/powerpc-linux-gnu/dri/r300_dri.so
^C^CQuit


So you do have the debugging symbols now, but this looks like you just 
interrupted the program manually. What's the point of that?


You originally reported some Valgrind issues. Do those still exist? What 
is really the problem?


Cheers,
Nicolai


(gdb) bt
#0  __GI__dl_debug_state () at dl-debug.c:74
#1  0xb7fd4730 in dl_open_worker (a=a@entry=0xbfffe7d8) at dl-open.c:306
#2  0xb7fcf31c in _dl_catch_error (objname=objname@entry=0xbfffe800, 
errstring=errstring@entry=0xbfffe7fc,
 mallocedp=mallocedp@entry=0xbfffe804, operate=0xb7fd4560 , 
args=args@entry=0xbfffe7d8) at dl-error.c:187
#3  0xb7fd3ec0 in _dl_open (file=0xbfffeb64 
"/usr/lib/powerpc-linux-gnu/dri/r300_dri.so", mode=-2147483390,
 caller_dlopen=0xfe728c8 <driOpenDriver+472>, nsid=-2, argc=1, 
argv=0xb424, env=0xb42c) at dl-open.c:653
#4  0x0f3bae44 in dlopen_doit (a=a@entry=0xbfffeb38) at dlopen.c:66
#5  0xb7fcf31c in _dl_catch_error (objname=0x10031c84, errstring=0x10031c88, 
mallocedp=0x10031c80,
 operate=0xf3badc0 , args=0xbfffeb38) at dl-error.c:187
#6  0x0f3bb7f4 in _dlerror_run (operate=0xf3badc0 , 
args=args@entry=0xbfffeb38) at dlerror.c:163
#7  0x0f3baf20 in __dlopen (file=file@entry=0xbfffeb64 
"/usr/lib/powerpc-linux-gnu/dri/r300_dri.so", mode=mode@entry=258)
 at dlopen.c:87
#8  0x0fe728c8 in driOpenDriver (driverName=0x10031c68 "r300") at 
../../../../src/glx/dri_common.c:141
#9  0x0fe76980 in dri2CreateScreen (screen=0, priv=0x1002fc20) at 
../../../../src/glx/dri2_glx.c:1211
#10 0x0fe41bb0 in AllocAndFetchScreenConfigs (priv=0x1002fc20, dpy=0x10025a10) 
at ../../../../src/glx/glxext.c:799
#11 __glXInitialize (dpy=dpy@entry=0x10025a10) at 
../../../../src/glx/glxext.c:910
#12 0x0fe3caa4 in GetGLXPrivScreenConfig (dpy=dpy@entry=0x10025a10, 
scrn=scrn@entry=0, ppriv=ppriv@entry=0xbfffed50,
 ppsc=ppsc@entry=0xbfffed54) at ../../../../src/glx/glxcmds.c:172
#13 0x0fe3cd04 in GetGLXPrivScreenConfig (ppsc=0xbfffed54, ppriv=0xbfffed50, 
scrn=, dpy=0x10025a10)
 at ../../../../src/glx/glxcmds.c:168
#14 glXChooseVisual (dpy=0x10025a10, screen=0, attribList=0xbfffef3c) at 
../../../../src/glx/glxcmds.c:1249
#15 0x10002a34 in ?? ()
#16 0x100010a4 in ?? ()
#17 0x0fa15a14 in generic_start_main (main=0x1ee0, argc=1, argv=0xb424, 
auxvec=0xb4d0, init=,
 rtld_fini=rtld_fini@entry=0xb7fcfad0 <_dl_fini>, stack_end=, 
fini=)
 at ../csu/libc-start.c:291
#18 0x0fa15c14 in __libc_start_main (argc=, argv=, 
ev=, auxvec=,
 rtld_fini=0xb7fcfad0 <_dl_fini>, stinfo=, 
stack_on_entry=)
 at ../sysdeps/unix/sysv/linux/powerpc/libc-start.c:94
#19 0x0000 in ?? ()
(gdb) q







On Dec 14, 2015, at 11:13 AM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

On 14.12.2015 04:10, Eero Tamminen wrote:

On 12/14/2015 10:44 AM, Herminio Hernandez, Jr. wrote:

I am new to this list. I have been trying to see if I can fix or at
least pin point an issue with Radeon r300 driver failing on PowerPC
systems. This has been a problem for a while and I would like to help
to get this fixed. I have done some debugging with valgrind and I
think I may see where the issue is but I would to have someone double
check what I am doing. So when I set my Default Depth to 16 I do get
3D acceleration but when I set to the default of 24 it breaks.
Valgrind reports memory leaks when I run glxgears with a Default Depth
of 24 but shows no definite memory leaks with a Depth of 16. I then
got the source code and created a dev environment andnran glxgears
through valgrind with my default depth of 24 and saw similar memory
leaks. Here is a sample of what I am seeing.

==25273== 108 (12 direct, 96 indirect) bytes in 1 blocks are
definitely lost in loss record 54 of 78
==25273==at 0xFFB2868: malloc (vg_replace_malloc.c:299)
==25273==by 0xED0457B: ???
==25273==by 0xEEC6F3B: ???
==25273==by 0xE95A78B: ???
==25273==by 0xED7DF7F: ???
==25273==by 0xED7D5DB: ???
==25273==by 0xEC5B377: ???
==25273==by 0xEC567EB: ???
==25273==by 0xFDEDFD3: dri2CreateScreen (dri2_glx.c:1235)
==25273==by 0xFDB866F: AllocAndFetchScreenConfigs (glxext

Re: [Mesa-dev] [PATCH 17/17] st/mesa: expose ARB_compute_shader when compute is supported

2016-01-25 Thread Nicolai Hähnle


On 25.01.2016 10:04, Ilia Mirkin wrote:

Yeah so this isn't going to fly. r600/radeonsi both expose
PIPE_CAP_COMPUTE, for clover, but won't support these compute shaders
quite yet. Not sure what the solution is here... maybe just add
another cap? PIPE_CAP_GLSL_COMPUTE or something? Or require a backend
to also set PIPE_CAP_GLSL_VERSION to 430 for ARB_compute_shader to
become exposed? Don't know.


GLSL 430 also requires arrays_of_arrays. I don't actually know how much 
we need to do to support that, but maybe it'd be better not to depend on 
that.


A new cap is an option. A slightly weird but I think reasonable option 
would be to require a PIPE_CAP_GLSL_VERSION of 420, since 
ARB_compute_shader officially requires OpenGL 4.2 anyway, and then let 
us deal with the fallout when we up that version number.


Cheers,
Nicolai



   -ilia

On Sun, Jan 24, 2016 at 4:09 PM, Samuel Pitoiset
 wrote:

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/state_tracker/st_extensions.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 2374dcc..ec30d99 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -489,6 +489,7 @@ void st_init_extensions(struct pipe_screen *screen,
{ o(ARB_clear_texture),PIPE_CAP_CLEAR_TEXTURE   
 },
{ o(ARB_clip_control), PIPE_CAP_CLIP_HALFZ  
 },
{ o(ARB_color_buffer_float),   PIPE_CAP_VERTEX_COLOR_UNCLAMPED  
 },
+  { o(ARB_compute_shader),   PIPE_CAP_COMPUTE  
},
{ o(ARB_conditional_render_inverted),  
PIPE_CAP_CONDITIONAL_RENDER_INVERTED  },
{ o(ARB_copy_image),   
PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS },
{ o(ARB_depth_clamp),  PIPE_CAP_DEPTH_CLIP_DISABLE  
 },
--
2.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] gallium r300 driver for PowerPC

2016-01-25 Thread Nicolai Hähnle


On 25.01.2016 10:58, Herminio Hernandez Jr.  wrote:

Someone suggested that I should kill the program at runtime to see what the 
issue was. I did the same thing with valgrind and saw some similar out puts. 
See below

It is just a sample I can send more output. I wanted to compare the result I 
got from gdb with what I was seeing with Valgrind. Apologies if I was not clear.

Just to be clear this my first real attempt to debug and troubleshoot a 
program. So I am completely open to criticism if I am doing something wrong.

==30671== 668 bytes in 1 blocks are still reachable in loss record 60 of 70 
 ==30671==at 0xFFB50A4: calloc 
(vg_replace_malloc.c:711)   ==30671==by 0x400C537: _dl_new_object 
(in /lib/powerpc-linux-gnu/ld-2.21.so) ==30671==by 
0x4007847: _dl_map_object_from_fd (in /lib/powerpc-linux-gnu/ld-2.21.so)
 ==30671==by 0x4009DCB: _dl_map_object (in 
/lib/powerpc-linux-gnu/ld-2.21.so) ==30671==by 
0x4015673: dl_open_worker (in /lib/powerpc-linux-gnu/ld-2.21.so)
==30671==by 0x401031B: _dl_catch_error (in 
/lib/powerpc-linux-gnu/ld-2.21.so)
==30671==by 0x4014EBF: _dl_open (in /lib/powerpc-linux-gnu/ld-2.21.so)
  ==30671==by 0xF306E43: dlopen_doit (in 
/lib/powerpc-linux-gnu/libdl-2.21.so)
  ==30671==by 0x401031B: _dl_catch_error (in 
/lib/powerpc-linux-gnu/ld-2.21.so)
==30671==by 0xF3077F3: _dlerror_run (in 
/lib/powerpc-linux-gnu/libdl-2.21.so)
==30671==by 0xF306F1F: dlopen@@GLIBC_2.1 (in 
/lib/powerpc-linux-gnu/libdl-2.21.so)
==30671==by 0xFDE984B: driOpenDriver (dri_common.c:141)   ==30671==


This means that some memory that was allocated during the program run 
was not freed before program end. However, the block is still reachable, 
which usually indicates something that is not a genuine problem.


It seems everything is working fine, so I still don't understand what 
you're worried about.


Nicolai


Sent from my iPhone


On Jan 25, 2016, at 9:41 AM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:


On 24.01.2016 20:56, Herminio Hernandez, Jr. wrote:
So I believe I have all the debugging symbols installed. From what I am seeing 
in gdb and valgrind I am still thinking the issue is in the glx branch. For gdb 
I ran it twice and stopped it during it attempt to load the r300 driver and in 
it attempt to load the swrast driver. Both failed at the same place in the 
trace see below. Some is breaking when dlopen tries to load the driver. Just 
want to verify that I am looking at the right thing. Thanks!

Herminio

Starting program: /usr/bin/glxgears
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/powerpc-linux-gnu/libthread_db.so.1".
libGL: OpenDriver: trying /usr/lib/powerpc-linux-gnu/dri/tls/r300_dri.so
libGL: OpenDriver: trying /usr/lib/powerpc-linux-gnu/dri/r300_dri.so
^C^CQuit


So you do have the debugging symbols now, but this looks like you just 
interrupted the program manually. What's the point of that?

You originally reported some Valgrind issues. Do those still exist? What is 
really the problem?

Cheers,
Nicolai


(gdb) bt
#0  __GI__dl_debug_state () at dl-debug.c:74
#1  0xb7fd4730 in dl_open_worker (a=a@entry=0xbfffe7d8) at dl-open.c:306
#2  0xb7fcf31c in _dl_catch_error (objname=objname@entry=0xbfffe800, 
errstring=errstring@entry=0xbfffe7fc,
 mallocedp=mallocedp@entry=0xbfffe804, operate=0xb7fd4560 , 
args=args@entry=0xbfffe7d8) at dl-error.c:187
#3  0xb7fd3ec0 in _dl_open (file=0xbfffeb64 
"/usr/lib/powerpc-linux-gnu/dri/r300_dri.so", mode=-2147483390,
 caller_dlopen=0xfe728c8 <driOpenDriver+472>, nsid=-2, argc=1, 
argv=0xb424, env=0xb42c) at dl-open.c:653
#4  0x0f3bae44 in dlopen_doit (a=a@entry=0xbfffeb38) at dlopen.c:66
#5  0xb7fcf31c in _dl_catch_error (objname=0x10031c84, errstring=0x10031c88, 
mallocedp=0x10031c80,
 operate=0xf3badc0 , args=0xbfffeb38) at dl-error.c:187
#6  0x0f3bb7f4 in _dlerror_run (operate=0xf3badc0 , 
args=args@entry=0xbfffeb38) at dlerror.c:163
#7  0x0f3baf20 in __dlopen (file=file@entry=0xbfffeb64 
"/usr/lib/powerpc-linux-gnu/dri/r300_dri.so", mode=mode@entry=258)
 at dlopen.c:87
#8  0x0fe728c8 in driOpenDriver (driverName=0x10031c68 "r300") at 
../../../../src/glx/dri_common.c:141
#9  0x0fe76980 in dri2CreateScreen (screen=0, priv=0x1002fc20) at 
../../../../src/glx/dri2_glx.c:1211
#10 0x0fe41bb0 in AllocAndFetchScreenConfigs (priv=0x1002fc20, dpy=0x10025a10) 
at ../../../../src/glx/glxext.c:799
#11 __glXInitialize (dpy=dpy@entry=0x10025a10) at 
../../../../src/glx/glxext.c:910
#12 0x0fe3caa4 in GetGLXPrivScreenConfig (dpy=dpy@entry=0x10025a10, 
scrn=scrn@entry=0, ppriv=ppriv@entry=0xbfffed50,
 ppsc=ppsc@entry=0xbfffed54) at ../../../../src/glx/glxcmds.c:172
#13 0x0fe3cd04 in GetGLXPrivScreenConfig (ppsc=0xbfffed54, ppriv=0xbfffed50, 
scrn=, dpy=0x10025a10)
 at ../../../../src/g

Re: [Mesa-dev] [PATCH 3/3] si-report.py: process Max Waves

2016-01-25 Thread Nicolai Hähnle


For the series:
Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 23.01.2016 11:07, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

---
  si-report.py | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/si-report.py b/si-report.py
index 12e16c3..c7fe1b5 100755
--- a/si-report.py
+++ b/si-report.py
@@ -61,6 +61,7 @@ class si_stats:
  ('code_size', 'Code Size', 'bytes'),
  ('lds', 'LDS', 'blocks'),
  ('scratch', 'Scratch', 'bytes per wave'),
+('maxwaves', 'Max Waves', ''),
  ('waitstates', 'Wait states', ''),
  ]

@@ -156,7 +157,7 @@ class si_stats:
  class si_parser(object):
  re_stats = re.compile(
  r"^Shader Stats: SGPRS: ([0-9]+) VGPRS: ([0-9]+) Code Size: ([0-9]+) 
"+
-r"LDS: ([0-9]+) Scratch: ([0-9]+)$")
+r"LDS: ([0-9]+) Scratch: ([0-9]+) Max Waves: ([0-9]+)")
  re_nop = re.compile("^\ts_nop ([0-9]+)")

  def __init__(self):
@@ -183,6 +184,7 @@ class si_parser(object):
  self._stats.code_size = int(match.group(3))
  self._stats.lds = int(match.group(4))
  self._stats.scratch = int(match.group(5))
+self._stats.maxwaves = int(match.group(6))
  old_stats = self._stats
  self._stats = None
  return old_stats


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] gallium/ddebug: add 'verbose' option

2016-01-25 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This currently just writes out the name of dump files, which can be useful
to easily correlate those files with other log outputs (driver debug output,
apitrace calls, etc.)
---
 src/gallium/drivers/ddebug/dd_draw.c| 3 ++-
 src/gallium/drivers/ddebug/dd_pipe.h| 1 +
 src/gallium/drivers/ddebug/dd_screen.c  | 6 --
 src/gallium/drivers/ddebug/dd_util.h| 5 -
 src/gallium/drivers/radeonsi/si_debug.c | 2 +-
 5 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index de484ab..45e4e10 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -88,8 +88,9 @@ struct dd_call
 static FILE *
 dd_get_file_stream(struct dd_context *dctx)
 {
+   struct dd_screen *dscreen = dd_screen(dctx->base.screen);
struct pipe_screen *screen = dctx->pipe->screen;
-   FILE *f = dd_get_debug_file();
+   FILE *f = dd_get_debug_file(dscreen->verbose);
if (!f)
   return NULL;
 
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h 
b/src/gallium/drivers/ddebug/dd_pipe.h
index a045518..80098dc 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -45,6 +45,7 @@ struct dd_screen
unsigned timeout_ms;
enum dd_mode mode;
bool no_flush;
+   bool verbose;
unsigned skip_count;
 };
 
diff --git a/src/gallium/drivers/ddebug/dd_screen.c 
b/src/gallium/drivers/ddebug/dd_screen.c
index ab87d25..3706b2d 100644
--- a/src/gallium/drivers/ddebug/dd_screen.c
+++ b/src/gallium/drivers/ddebug/dd_screen.c
@@ -280,17 +280,18 @@ ddebug_screen_create(struct pipe_screen *screen)
   puts("");
   puts("Usage:");
   puts("");
-  puts("  GALLIUM_DDEBUG=\"always [noflush]\"");
+  puts("  GALLIUM_DDEBUG=\"always [noflush] [verbose]\"");
   puts("Flush and dump context and driver information after every draw 
call into");
   puts("$HOME/"DD_DIR"/.");
   puts("");
-  puts("  GALLIUM_DDEBUG=\"[timeout in ms] [noflush]\"");
+  puts("  GALLIUM_DDEBUG=\"[timeout in ms] [noflush] [verbose]\"");
   puts("Flush and detect a device hang after every draw call based on 
the given");
   puts("fence timeout and dump context and driver information into");
   puts("$HOME/"DD_DIR"/ when a hang is detected.");
   puts("");
   puts("  If 'noflush' is specified, do not flush on every draw call. In 
hang");
   puts("  detection mode, this only detect hangs in pipe->flush.");
+  puts("  If 'verbose' is specified, additional information is written to 
stderr.");
   puts("");
   puts("  GALLIUM_DDEBUG_SKIP=[count]");
   puts("Skip flush and hang detection for the given initial number of 
draw calls.");
@@ -341,6 +342,7 @@ ddebug_screen_create(struct pipe_screen *screen)
dscreen->timeout_ms = timeout;
dscreen->mode = dump_always ? DD_DUMP_ALL_CALLS : DD_DETECT_HANGS;
dscreen->no_flush = no_flush;
+   dscreen->verbose = strstr(option, "verbose") != NULL;
 
switch (dscreen->mode) {
case DD_DUMP_ALL_CALLS:
diff --git a/src/gallium/drivers/ddebug/dd_util.h 
b/src/gallium/drivers/ddebug/dd_util.h
index c217c8e..093bdff 100644
--- a/src/gallium/drivers/ddebug/dd_util.h
+++ b/src/gallium/drivers/ddebug/dd_util.h
@@ -40,7 +40,7 @@
 #define DD_DIR "ddebug_dumps"
 
 static inline FILE *
-dd_get_debug_file()
+dd_get_debug_file(bool verbose)
 {
static unsigned index;
char proc_name[128], dir[256], name[512];
@@ -65,6 +65,9 @@ dd_get_debug_file()
   return NULL;
}
 
+   if (verbose)
+  fprintf(stderr, "dd: dumping to file %s\n", name);
+
return f;
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
b/src/gallium/drivers/radeonsi/si_debug.c
index a07b1c5..e16ebbd 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -771,7 +771,7 @@ void si_check_vm_faults(struct si_context *sctx)
if (!si_vm_fault_occured(sctx, ))
return;
 
-   f = dd_get_debug_file();
+   f = dd_get_debug_file(false);
if (!f)
return;
 
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] gallium/ddebug: make 'noflush' also affect 'always' mode

2016-01-25 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This changes the default behavior of 'always' mode to be consistent with
hang detection mode.

I have used this to more easily compare dumped command streams using diff.
---
 src/gallium/drivers/ddebug/dd_draw.c   |  3 +++
 src/gallium/drivers/ddebug/dd_screen.c | 12 +++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index 0d7ee9a..de484ab 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -602,6 +602,7 @@ static void
 dd_after_draw(struct dd_context *dctx, struct dd_call *call)
 {
struct dd_screen *dscreen = dd_screen(dctx->base.screen);
+   struct pipe_context *pipe = dctx->pipe;
 
if (dctx->num_draw_calls >= dscreen->skip_count) {
   switch (dscreen->mode) {
@@ -615,6 +616,8 @@ dd_after_draw(struct dd_context *dctx, struct dd_call *call)
  }
  break;
   case DD_DUMP_ALL_CALLS:
+ if (!dscreen->no_flush)
+pipe->flush(pipe, NULL, 0);
  dd_dump_call(dctx, call, 0);
  break;
   default:
diff --git a/src/gallium/drivers/ddebug/dd_screen.c 
b/src/gallium/drivers/ddebug/dd_screen.c
index 2716845..ab87d25 100644
--- a/src/gallium/drivers/ddebug/dd_screen.c
+++ b/src/gallium/drivers/ddebug/dd_screen.c
@@ -270,7 +270,7 @@ ddebug_screen_create(struct pipe_screen *screen)
 {
struct dd_screen *dscreen;
const char *option = debug_get_option("GALLIUM_DDEBUG", NULL);
-   bool dump_always = option && !strcmp(option, "always");
+   bool dump_always = option && !strncmp(option, "always", 6);
bool no_flush = option && strstr(option, "noflush");
bool help = option && !strcmp(option, "help");
unsigned timeout = 0;
@@ -280,15 +280,17 @@ ddebug_screen_create(struct pipe_screen *screen)
   puts("");
   puts("Usage:");
   puts("");
-  puts("  GALLIUM_DDEBUG=always");
-  puts("Dump context and driver information after every draw call 
into");
+  puts("  GALLIUM_DDEBUG=\"always [noflush]\"");
+  puts("Flush and dump context and driver information after every draw 
call into");
   puts("$HOME/"DD_DIR"/.");
   puts("");
-  puts("  GALLIUM_DDEBUG=[timeout in ms] noflush");
+  puts("  GALLIUM_DDEBUG=\"[timeout in ms] [noflush]\"");
   puts("Flush and detect a device hang after every draw call based on 
the given");
   puts("fence timeout and dump context and driver information into");
   puts("$HOME/"DD_DIR"/ when a hang is detected.");
-  puts("If 'noflush' is specified, only detect hangs in pipe->flush.");
+  puts("");
+  puts("  If 'noflush' is specified, do not flush on every draw call. In 
hang");
+  puts("  detection mode, this only detect hangs in pipe->flush.");
   puts("");
   puts("  GALLIUM_DDEBUG_SKIP=[count]");
   puts("Skip flush and hang detection for the given initial number of 
draw calls.");
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: fix shader precompilation for shader-db

2016-01-25 Thread Nicolai Hähnle


On 23.01.2016 11:04, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

The addition of spi_shader_col_format killed all color outputs
in precompiled shaders.


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


---
  src/gallium/drivers/radeonsi/si_state_shaders.c | 43 +++--
  1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4eb962a..bca31dc 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -683,23 +683,21 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
  }

  /* Select the hw shader variant depending on the current state. */
-static int si_shader_select(struct pipe_context *ctx,
-   struct si_shader_ctx_state *state)
+static int si_shader_select_with_key(struct pipe_context *ctx,
+struct si_shader_ctx_state *state,
+union si_shader_key *key)
  {
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state->cso;
struct si_shader *current = state->current;
-   union si_shader_key key;
struct si_shader *iter, *shader = NULL;
int r;

-   si_shader_selector_key(ctx, sel, );
-
/* Check if we don't need to change anything.
 * This path is also used for most shaders that don't need multiple
 * variants, it will cost just a computation of the key and this
 * test. */
-   if (likely(current && memcmp(>key, , sizeof(key)) == 0))
+   if (likely(current && memcmp(>key, key, sizeof(*key)) == 0))
return 0;

pipe_mutex_lock(sel->mutex);
@@ -708,7 +706,7 @@ static int si_shader_select(struct pipe_context *ctx,
for (iter = sel->first_variant; iter; iter = iter->next_variant) {
/* Don't check the "current" shader. We checked it above. */
if (current != iter &&
-   memcmp(>key, , sizeof(key)) == 0) {
+   memcmp(>key, key, sizeof(*key)) == 0) {
state->current = iter;
pipe_mutex_unlock(sel->mutex);
return 0;
@@ -722,7 +720,7 @@ static int si_shader_select(struct pipe_context *ctx,
return -ENOMEM;
}
shader->selector = sel;
-   shader->key = key;
+   shader->key = *key;

r = si_shader_create(sctx->screen, sctx->tm, shader, >b.debug);
if (unlikely(r)) {
@@ -746,6 +744,15 @@ static int si_shader_select(struct pipe_context *ctx,
return 0;
  }

+static int si_shader_select(struct pipe_context *ctx,
+   struct si_shader_ctx_state *state)
+{
+   union si_shader_key key;
+
+   si_shader_selector_key(ctx, state->cso, );
+   return si_shader_select_with_key(ctx, state, );
+}
+
  static void *si_create_shader_selector(struct pipe_context *ctx,
   const struct pipe_shader_state *state)
  {
@@ -866,8 +873,26 @@ static void *si_create_shader_selector(struct pipe_context 
*ctx,
/* Pre-compilation. */
if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
struct si_shader_ctx_state state = {sel};
+   union si_shader_key key;
+
+   memset(, 0, sizeof(key));
+
+   /* Set reasonable defaults, so that the shader key doesn't
+* cause any code to be eliminated.
+*/
+   switch (sel->type) {
+   case PIPE_SHADER_TESS_CTRL:
+   key.tcs.prim_mode = PIPE_PRIM_TRIANGLES;
+   break;
+   case PIPE_SHADER_FRAGMENT:
+   for (i = 0; i < 8; i++)
+   if (sel->info.colors_written & (1 << i))
+   key.ps.spi_shader_col_format |=
+   V_028710_SPI_SHADER_FP16_ABGR 
<< (i * 4);
+   break;
+   }

-   if (si_shader_select(ctx, )) {
+   if (si_shader_select_with_key(ctx, , )) {
fprintf(stderr, "radeonsi: can't create a shader\n");
tgsi_free_tokens(sel->tokens);
FREE(sel);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: use llvm.amdgcn.s.barrier instead of llvm.AMDGPU.barrier.local

2016-01-25 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

The new name for the intrinsic was introduced in LLVM r258558.
---
 src/gallium/drivers/radeonsi/si_shader.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index c365b14..0723693 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3465,7 +3465,12 @@ static void si_llvm_emit_barrier(const struct 
lp_build_tgsi_action *action,
 {
struct gallivm_state *gallivm = bld_base->base.gallivm;
 
-   lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.barrier.local",
+   lp_build_intrinsic(gallivm->builder,
+#if HAVE_LLVM >= 0x0309
+   "llvm.amdgcn.s.barrier",
+#else
+   "llvm.AMDGPU.barrier.local",
+#endif
LLVMVoidTypeInContext(gallivm->context), NULL, 0,
LLVMNoUnwindAttribute);
 }
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/17] gallium: disable compute shaders for meta ops

2016-01-25 Thread Nicolai Hähnle


On 24.01.2016 16:09, Samuel Pitoiset wrote:

Loosely based on tessellation shaders.


Do we actually need this? The graphics pipeline and the compute pipeline 
are separate; draw commands should be unaffected by the currently set 
compute shader. Or am I missing something?


Cheers,
Nicolai



Signed-off-by: Samuel Pitoiset 
---
  src/gallium/auxiliary/hud/hud_context.c| 3 +++
  src/gallium/auxiliary/postprocess/pp_run.c | 3 +++
  src/gallium/auxiliary/util/u_blit.c| 3 +++
  src/mesa/state_tracker/st_cb_bitmap.c  | 3 +++
  src/mesa/state_tracker/st_cb_clear.c   | 3 +++
  src/mesa/state_tracker/st_cb_drawpixels.c  | 3 +++
  src/mesa/state_tracker/st_cb_drawtex.c | 3 +++
  7 files changed, 21 insertions(+)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index 75afebe..6387638 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -472,6 +472,7 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
 cso_save_viewport(cso);
 cso_save_stream_outputs(cso);
 cso_save_geometry_shader(cso);
+   cso_save_compute_shader(cso);
 cso_save_tessctrl_shader(cso);
 cso_save_tesseval_shader(cso);
 cso_save_vertex_shader(cso);
@@ -510,6 +511,7 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
 cso_set_tessctrl_shader_handle(cso, NULL);
 cso_set_tesseval_shader_handle(cso, NULL);
 cso_set_geometry_shader_handle(cso, NULL);
+   cso_set_compute_shader_handle(cso, NULL);
 cso_set_vertex_shader_handle(cso, hud->vs);
 cso_set_vertex_elements(cso, 2, hud->velems);
 cso_set_render_condition(cso, NULL, FALSE, 0);
@@ -606,6 +608,7 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
 cso_restore_tessctrl_shader(cso);
 cso_restore_tesseval_shader(cso);
 cso_restore_geometry_shader(cso);
+   cso_restore_compute_shader(cso);
 cso_restore_vertex_shader(cso);
 cso_restore_vertex_elements(cso);
 cso_restore_aux_vertex_buffer_slot(cso);
diff --git a/src/gallium/auxiliary/postprocess/pp_run.c 
b/src/gallium/auxiliary/postprocess/pp_run.c
index c6c7b88..58abef1 100644
--- a/src/gallium/auxiliary/postprocess/pp_run.c
+++ b/src/gallium/auxiliary/postprocess/pp_run.c
@@ -122,6 +122,7 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
 cso_save_tessctrl_shader(cso);
 cso_save_tesseval_shader(cso);
 cso_save_geometry_shader(cso);
+   cso_save_compute_shader(cso);
 cso_save_rasterizer(cso);
 cso_save_sample_mask(cso);
 cso_save_min_samples(cso);
@@ -144,6 +145,7 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
 cso_set_tessctrl_shader_handle(cso, NULL);
 cso_set_tesseval_shader_handle(cso, NULL);
 cso_set_geometry_shader_handle(cso, NULL);
+   cso_set_compute_shader_handle(cso, NULL);
 cso_set_render_condition(cso, NULL, FALSE, 0);

 // Kept only for this frame.
@@ -193,6 +195,7 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
 cso_restore_tessctrl_shader(cso);
 cso_restore_tesseval_shader(cso);
 cso_restore_geometry_shader(cso);
+   cso_restore_compute_shader(cso);
 cso_restore_rasterizer(cso);
 cso_restore_sample_mask(cso);
 cso_restore_min_samples(cso);
diff --git a/src/gallium/auxiliary/util/u_blit.c 
b/src/gallium/auxiliary/util/u_blit.c
index 9737c94..ffb7b88 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -556,6 +556,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
 cso_save_tessctrl_shader(ctx->cso);
 cso_save_tesseval_shader(ctx->cso);
 cso_save_geometry_shader(ctx->cso);
+   cso_save_compute_shader(ctx->cso);
 cso_save_vertex_elements(ctx->cso);
 cso_save_aux_vertex_buffer_slot(ctx->cso);

@@ -597,6 +598,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
 cso_set_tessctrl_shader_handle(ctx->cso, NULL);
 cso_set_tesseval_shader_handle(ctx->cso, NULL);
 cso_set_geometry_shader_handle(ctx->cso, NULL);
+   cso_set_compute_shader_handle(ctx->cso, NULL);

 /* drawing dest */
 memset(, 0, sizeof(fb));
@@ -639,6 +641,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
 cso_restore_tessctrl_shader(ctx->cso);
 cso_restore_tesseval_shader(ctx->cso);
 cso_restore_geometry_shader(ctx->cso);
+   cso_restore_compute_shader(ctx->cso);
 cso_restore_vertex_elements(ctx->cso);
 cso_restore_aux_vertex_buffer_slot(ctx->cso);
 cso_restore_stream_outputs(ctx->cso);
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c 
b/src/mesa/state_tracker/st_cb_bitmap.c
index d8c3dbd..3242644 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -312,6 +312,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, 
GLfloat z,
 cso_save_tessctrl_shader(cso);
 cso_save_tesseval_shader(cso);
 cso_save_geometry_shader(cso);
+   cso_save_compute_shader(cso);

Re: [Mesa-dev] [PATCH 2/2] radeonsi: implement SAMPLEPOS system value without a constant buffer load

2016-01-20 Thread Nicolai Hähnle


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 19.01.2016 20:39, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

We always get per-sample input position.
---
  src/gallium/drivers/radeonsi/si_shader.c | 15 +--
  1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 5c536f8..d788c41 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1119,9 +1119,20 @@ static void declare_system_value(
value = get_sample_id(radeon_bld);
break;

-   case TGSI_SEMANTIC_SAMPLEPOS:
-   value = load_sample_position(radeon_bld, 
get_sample_id(radeon_bld));
+   case TGSI_SEMANTIC_SAMPLEPOS: {
+   LLVMValueRef pos[4] = {
+   LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_X_FLOAT),
+   LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_Y_FLOAT),
+   lp_build_const_float(gallivm, 0),
+   lp_build_const_float(gallivm, 0)
+   };
+   pos[0] = lp_build_emit_llvm_unary(_bld->soa.bld_base,
+ TGSI_OPCODE_FRC, pos[0]);
+   pos[1] = lp_build_emit_llvm_unary(_bld->soa.bld_base,
+ TGSI_OPCODE_FRC, pos[1]);
+   value = lp_build_gather_values(gallivm, pos, 4);
break;
+   }

case TGSI_SEMANTIC_SAMPLEMASK:
/* Smoothing isn't MSAA in GL, but it's MSAA in hardware.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/4] radeonsi: enable late VS allocation (v2)

2016-01-20 Thread Nicolai Hähnle


Patches 1, 2 & 4 are

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 19.01.2016 20:20, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

---
  src/gallium/drivers/radeonsi/si_state.c | 21 +++--
  1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index f005461..ca32f5c 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3742,8 +3742,25 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 
S_00B31C_CU_EN(0x));
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 
S_00B21C_CU_EN(0x));
-   si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 
S_00B118_CU_EN(0x));
-   si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 
S_00B11C_LIMIT(0));
+
+   if (sscreen->b.info.num_good_compute_units /
+   (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 
4) {
+   /* Too few available compute units per SH. Disallowing
+* VS to run on CU0 could hurt us more than late VS
+* allocation would help.
+*
+* LATE_ALLOC_VS = 2 is the highest safe number.
+*/
+   si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 
S_00B118_CU_EN(0x));
+   si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 
S_00B11C_LIMIT(2));
+   } else {
+   /* Set LATE_ALLOC_VS == 31. It should be less than
+* the number of scratch waves. VS can't run on CU0.
+*/
+   si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 
S_00B118_CU_EN(0xfffe));
+   si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 
S_00B11C_LIMIT(31));
+   }
+
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 
S_00B01C_CU_EN(0x));
}



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/4] radeonsi: allow tessellation on CU1 and ES on CU0

2016-01-20 Thread Nicolai Hähnle


On 19.01.2016 20:20, Marek Olšák wrote:

From: Marek Olšák 

We don't use on-chip GS, so it's not required to reserve CU1 for ES.


Why is a deadlock not possible with an off-chip GS ring?

Nicolai


---
  src/gallium/drivers/radeonsi/si_state.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 4b674ed..f005461 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3738,9 +3738,9 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);

if (sctx->b.chip_class >= CIK) {
-   si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 
S_00B51C_CU_EN(0xfffc));
+   si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 
S_00B51C_CU_EN(0xfffe));
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
-   si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 
S_00B31C_CU_EN(0xfffe));
+   si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 
S_00B31C_CU_EN(0x));
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 
S_00B21C_CU_EN(0x));
si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 
S_00B118_CU_EN(0x));
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 
S_00B11C_LIMIT(0));


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] radeonsi: add max waves / CU to shader stats

2016-01-20 Thread Nicolai Hähnle


On 19.01.2016 20:39, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

---
  src/gallium/drivers/radeonsi/si_shader.c | 33 +---
  1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 0c5fd32..5c536f8 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3994,12 +3994,39 @@ static void si_shader_dump_stats(struct si_screen 
*sscreen,
 struct pipe_debug_callback *debug,
 unsigned processor)
  {
+   /* Compute the maximum number of waves.
+* The pixel shader additionally allocates 1 - 48 blocks of LDS
+* depending on non-compile times parameters.
+*/
+   unsigned ps_lds_size = processor == TGSI_PROCESSOR_FRAGMENT ? 1 : 0;
+   unsigned lds_size = ps_lds_size + conf->lds_size;
+   unsigned max_waves = 10;
+
+   if (conf->num_sgprs) {
+   if (sscreen->b.chip_class >= VI)
+   max_waves = MIN2(max_waves, 800 / conf->num_sgprs);


When/if the kernel starts using traps, we need to remember to 
conditionally add 16 here. Right now that's irrelevant.


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


+   else
+   max_waves = MIN2(max_waves, 512 / conf->num_sgprs);
+   }
+
+   if (conf->num_vgprs)
+   max_waves = MIN2(max_waves, 256 / conf->num_vgprs);
+
+   if (lds_size)
+   max_waves = MIN2(max_waves, 128 / lds_size);
+
if (r600_can_dump_shader(>b, processor)) {
fprintf(stderr, "*** SHADER STATS ***\n"
-   "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d 
blocks\n"
-   "Scratch: %d bytes per wave\n\n",
+   "SGPRS: %d\n"
+   "VGPRS: %d\n"
+   "Code Size: %d bytes\n"
+   "LDS: %d blocks\n"
+   "Scratch: %d bytes per wave\n"
+   "Max waves / CU: %d\n"
+   "\n",
conf->num_sgprs, conf->num_vgprs, code_size,
-   conf->lds_size, conf->scratch_bytes_per_wave);
+   conf->lds_size, conf->scratch_bytes_per_wave,
+   max_waves);
}

pipe_debug_message(debug, SHADER_INFO,


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: add DCC buffer for sampler views on new CS

2016-01-24 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This fixes a VM fault and possible lockup in high memory pressure situations.

Cc: "11.0 11.1" <mesa-sta...@lists.freedesktop.org>
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 33 +++
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index aad836d..6c79673 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -138,6 +138,22 @@ static void si_release_sampler_views(struct 
si_sampler_views *views)
si_release_descriptors(>desc);
 }
 
+static void si_sampler_view_add_buffers(struct si_context *sctx,
+   struct si_sampler_view *rview)
+{
+   if (rview->resource) {
+   radeon_add_to_buffer_list(>b, >b.gfx,
+   rview->resource, RADEON_USAGE_READ,
+   r600_get_sampler_view_priority(rview->resource));
+   }
+
+   if (rview->dcc_buffer && rview->dcc_buffer != rview->resource) {
+   radeon_add_to_buffer_list(>b, >b.gfx,
+   rview->dcc_buffer, RADEON_USAGE_READ,
+   RADEON_PRIO_DCC);
+   }
+}
+
 static void si_sampler_views_begin_new_cs(struct si_context *sctx,
  struct si_sampler_views *views)
 {
@@ -149,12 +165,7 @@ static void si_sampler_views_begin_new_cs(struct 
si_context *sctx,
struct si_sampler_view *rview =
(struct si_sampler_view*)views->views[i];
 
-   if (!rview->resource)
-   continue;
-
-   radeon_add_to_buffer_list(>b, >b.gfx,
- rview->resource, RADEON_USAGE_READ,
- 
r600_get_sampler_view_priority(rview->resource));
+   si_sampler_view_add_buffers(sctx, rview);
}
 
if (!views->desc.buffer)
@@ -176,15 +187,7 @@ static void si_set_sampler_view(struct si_context *sctx, 
unsigned shader,
struct si_sampler_view *rview =
(struct si_sampler_view*)view;
 
-   if (rview->resource)
-   radeon_add_to_buffer_list(>b, >b.gfx,
-   rview->resource, RADEON_USAGE_READ,
-   
r600_get_sampler_view_priority(rview->resource));
-
-   if (rview->dcc_buffer && rview->dcc_buffer != rview->resource)
-   radeon_add_to_buffer_list(>b, >b.gfx,
-   rview->dcc_buffer, RADEON_USAGE_READ,
-   RADEON_PRIO_DCC);
+   si_sampler_view_add_buffers(sctx, rview);
 
pipe_sampler_view_reference(>views[slot], view);
memcpy(views->desc.list + slot*8, view_desc, 8*4);
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: use llvm.amdgcn.s.barrier instead of llvm.AMDGPU.barrier.local

2016-01-26 Thread Nicolai Hähnle


On 26.01.2016 06:55, Marek Olšák wrote:

On Mon, Jan 25, 2016 at 10:49 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

The new name for the intrinsic was introduced in LLVM r258558.
---
  src/gallium/drivers/radeonsi/si_shader.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index c365b14..0723693 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3465,7 +3465,12 @@ static void si_llvm_emit_barrier(const struct 
lp_build_tgsi_action *action,
  {
 struct gallivm_state *gallivm = bld_base->base.gallivm;

-   lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.barrier.local",
+   lp_build_intrinsic(gallivm->builder,
+#if HAVE_LLVM >= 0x0309
+   "llvm.amdgcn.s.barrier",
+#else
+   "llvm.AMDGPU.barrier.local",
+#endif


My preference is not to use the preprocessor when we don't have to.
HAVE_LLVM is a constant, so:
HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier" : "llvm.AMDGPU.barrier.local"


Makes sense, done.

Cheers,
Nicolai


With that done:
Reviewed-by: Marek Olšák <marek.ol...@amd.com>

Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 17/17] st/mesa: expose ARB_compute_shader when compute is supported

2016-01-26 Thread Nicolai Hähnle


On 25.01.2016 23:45, Ilia Mirkin wrote:

On Mon, Jan 25, 2016 at 11:00 PM, Michel Dänzer <mic...@daenzer.net> wrote:

On 26.01.2016 01:17, Nicolai Hähnle wrote:

On 25.01.2016 10:04, Ilia Mirkin wrote:

Yeah so this isn't going to fly. r600/radeonsi both expose
PIPE_CAP_COMPUTE, for clover, but won't support these compute shaders
quite yet. Not sure what the solution is here... maybe just add
another cap? PIPE_CAP_GLSL_COMPUTE or something? Or require a backend
to also set PIPE_CAP_GLSL_VERSION to 430 for ARB_compute_shader to
become exposed? Don't know.


GLSL 430 also requires arrays_of_arrays. I don't actually know how much
we need to do to support that, but maybe it'd be better not to depend on
that.

A new cap is an option. A slightly weird but I think reasonable option
would be to require a PIPE_CAP_GLSL_VERSION of 420, since
ARB_compute_shader officially requires OpenGL 4.2 anyway, and then let
us deal with the fallout when we up that version number.


I guess that works, but FWIW, a more general solution might be to split
up PIPE_SHADER_CAP_PREFERRED_IR into individual caps for each IR, and
enable ARB_compute_shader only if TGSI is supported for PIPE_SHADER_COMPUTE.


How about a PIPE_SHADER_CAP_SUPPORTED_IRS which returns a bitfield of
all the supported pipe_shader_ir's.


That sounds good to me.

Nicolai



   -ilia


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] gallium: fix the documentation of PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE

2016-01-26 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This parameter is equivalent to the corresponding OpenGL implementation
limit which is in texels, not bytes.
---
Based on a discussion in IRC; I plan to push it with the rest of the
series on PBO uploads.

 src/gallium/docs/source/screen.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 1fcd882..c95136f 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -168,7 +168,7 @@ The integer capabilities:
   view it is intended to be used with, or herein undefined results may occur
   for permutational swizzles.
 * ``PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE``: The maximum accessible size with
-  a buffer sampler view, in bytes.
+  a buffer sampler view, in texels.
 * ``PIPE_CAP_MAX_VIEWPORTS``: The maximum number of viewports (and scissors
   since they are linked) a driver can support. Returning 0 is equivalent
   to returning 1 because every driver has to support at least a single
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallium: make image views non-persistent objects

2016-01-19 Thread Nicolai Hähnle


On 18.01.2016 22:08, Ilia Mirkin wrote:

Make them akin to shader buffers, with no refcounting/etc. Just used to
pass data about the bound image in ->set_shader_images.

Signed-off-by: Ilia Mirkin 
---

I don't really see a reason why these were refcounted objects. It seems like
it would be convenient to make these line up with shader buffers, so that's
what I've done here.

Please let me know if I'm missing something.


I haven't thought about this much, but at least Radeon does quite a bit 
of work in create_sampler_view.


Since everything boils down to the same hardware resource descriptors in 
the end, I'd expect the same to happen for a create_image_view. I 
believe we'll want a create_image_view which ends up calling code that 
is shared with create_sampler_view.


So make that a vote against this change from me.

Come to think of it, from a Radeon perspective I'm not sure why there is 
a separate pipe_image_view structure in the first place (other than 
perhaps reducing confusion about which combination of fields make sense).


Cheers,
Nicolai



  src/gallium/auxiliary/util/u_inlines.h| 11 ---
  src/gallium/drivers/ddebug/dd_context.c   | 28 +--
  src/gallium/drivers/ddebug/dd_pipe.h  |  2 +-
  src/gallium/drivers/ilo/ilo_state.c   |  2 +-
  src/gallium/drivers/nouveau/nvc0/nvc0_state.c |  2 +-
  src/gallium/include/pipe/p_context.h  | 14 ++
  src/gallium/include/pipe/p_state.h|  4 +---
  7 files changed, 7 insertions(+), 56 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_inlines.h 
b/src/gallium/auxiliary/util/u_inlines.h
index 57a3b0b..d081203 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -174,17 +174,6 @@ pipe_sampler_view_release(struct pipe_context *ctx,
  }

  static inline void
-pipe_image_view_reference(struct pipe_image_view **ptr, struct pipe_image_view 
*view)
-{
-   struct pipe_image_view *old_view = *ptr;
-
-   if (pipe_reference_described(&(*ptr)->reference, >reference,
-
(debug_reference_descriptor)debug_describe_image_view))
-  old_view->context->image_view_destroy(old_view->context, old_view);
-   *ptr = view;
-}
-
-static inline void
  pipe_so_target_reference(struct pipe_stream_output_target **ptr,
   struct pipe_stream_output_target *target)
  {
diff --git a/src/gallium/drivers/ddebug/dd_context.c 
b/src/gallium/drivers/ddebug/dd_context.c
index 3ae7764..9dfaa0a 100644
--- a/src/gallium/drivers/ddebug/dd_context.c
+++ b/src/gallium/drivers/ddebug/dd_context.c
@@ -415,30 +415,6 @@ dd_context_sampler_view_destroy(struct pipe_context *_pipe,
 pipe->sampler_view_destroy(pipe, view);
  }

-static struct pipe_image_view *
-dd_context_create_image_view(struct pipe_context *_pipe,
- struct pipe_resource *resource,
- const struct pipe_image_view *templ)
-{
-   struct pipe_context *pipe = dd_context(_pipe)->pipe;
-   struct pipe_image_view *view =
-  pipe->create_image_view(pipe, resource, templ);
-
-   if (!view)
-  return NULL;
-   view->context = _pipe;
-   return view;
-}
-
-static void
-dd_context_image_view_destroy(struct pipe_context *_pipe,
-  struct pipe_image_view *view)
-{
-   struct pipe_context *pipe = dd_context(_pipe)->pipe;
-
-   pipe->image_view_destroy(pipe, view);
-}
-
  static struct pipe_stream_output_target *
  dd_context_create_stream_output_target(struct pipe_context *_pipe,
 struct pipe_resource *res,
@@ -486,7 +462,7 @@ dd_context_set_sampler_views(struct pipe_context *_pipe, 
unsigned shader,
  static void
  dd_context_set_shader_images(struct pipe_context *_pipe, unsigned shader,
   unsigned start, unsigned num,
- struct pipe_image_view **views)
+ struct pipe_image_view *views)
  {
 struct dd_context *dctx = dd_context(_pipe);
 struct pipe_context *pipe = dctx->pipe;
@@ -744,8 +720,6 @@ dd_context_create(struct dd_screen *dscreen, struct 
pipe_context *pipe)
 CTX_INIT(sampler_view_destroy);
 CTX_INIT(create_surface);
 CTX_INIT(surface_destroy);
-   CTX_INIT(create_image_view);
-   CTX_INIT(image_view_destroy);
 CTX_INIT(transfer_map);
 CTX_INIT(transfer_flush_region);
 CTX_INIT(transfer_unmap);
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h 
b/src/gallium/drivers/ddebug/dd_pipe.h
index a045518..6505cea 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -93,7 +93,7 @@ struct dd_context
 struct pipe_constant_buffer 
constant_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
 struct pipe_sampler_view 
*sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
 struct dd_state

Re: [Mesa-dev] [PATCH 1/9] gallium: Add PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY

2016-01-19 Thread Nicolai Hähnle


On 19.01.2016 10:45, Fredrik Höglund wrote:

On Tuesday 19 January 2016, Nicolai Hähnle wrote:


On 18.01.2016 18:50, Ilia Mirkin wrote:

On Mon, Jan 18, 2016 at 5:22 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 90f..1ae557d 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -75,6 +75,8 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 return 1;
  case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
 return 2048;
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+  return 0;
  /* supported capabilities */
  case PIPE_CAP_TWO_SIDED_STENCIL:
  case PIPE_CAP_ANISOTROPIC_FILTER:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 712835c..99d7ae6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -121,6 +121,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 return 256;
  case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
 return 1; /* 256 for binding as RT, but that's not possible in GL */
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+  return 0;
  case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
 return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
  case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 7211df9..612f1c0 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -111,6 +111,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 return 256;
  case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
 return 1; /* 256 for binding as RT, but that's not possible in GL */
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+  return 0;
  case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
 return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
  case PIPE_CAP_MAX_VIEWPORTS:


I would greatly appreciate it if you could stick these into the
"unsupported" list of caps -- each of nv30/nv50/nvc0 should have one,
of varying length. (Same applies to the other cap you're adding.)


Do you really want that? I actually put them aside on purpose, because
the somewhat unusual sense of the cap means that the return value of 1
is something unsupported. Of course, one might argue that I should have
changed the boolean sense and renamed it to something like
PIPE_CAP_BUFFER_SAMPLER_VIEW_SWIZZLES.


That is actually the name I was going to use at first (minus the s).
But then it occurred to me that it would be misleading, since this
is not about pipe_sampler_view::swizzle_r/g/b/a.


It's about both the swizzles and the formats, isn't it? If it were only 
the formats, the deficiency could be fixed by using the swizzles.


Naming is hard. I'm happy with keeping it the way it is.

Cheers,
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/4] radeonsi: allow using all CUs for tessellation and on-chip GS (v2)

2016-01-22 Thread Nicolai Hähnle


This series is

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 22.01.2016 09:18, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

v2: After more discussion with hw teams, the kernel already contains the
 optimal settings allowing us to use all CUs.
---
  src/gallium/drivers/radeonsi/si_state.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index a3ddee8..67b2835 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3701,9 +3701,9 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);

if (sctx->b.chip_class >= CIK) {
-   si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 
S_00B51C_CU_EN(0xfffc));
+   si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 
S_00B51C_CU_EN(0x));
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
-   si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 
S_00B31C_CU_EN(0xfffe));
+   si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 
S_00B31C_CU_EN(0x));
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 
S_00B21C_CU_EN(0x));
si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 
S_00B118_CU_EN(0x));
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 
S_00B11C_LIMIT(0));


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] util/ralloc: Remove double zero'ing of rzalloc buffers

2016-01-22 Thread Nicolai Hähnle


On 22.01.2016 02:53, Jordan Justen wrote:

Juha-Pekka found this back in May 2015:
<1430915727-28677-1-git-send-email-juhapekka.heikk...@gmail.com>

 From the discussion, obviously it would be preferable to make
ralloc_size no longer return zeroed memory, but Juha-Pekka found that
it would break Mesa.

For now, let's point out the flaw, and stop doing the double zeroing
of rzalloc buffers.

Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com>
Cc: Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com>
Cc: Kenneth Graunke <kenn...@whitecape.org>
---

  For a release build, I saw the code size shrink by 64 bytes.

  src/util/ralloc.c | 15 +--
  1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/util/ralloc.c b/src/util/ralloc.c
index 6d4032b..24c1eee 100644
--- a/src/util/ralloc.c
+++ b/src/util/ralloc.c
@@ -49,6 +49,14 @@ _CRTIMP int _vscprintf(const char *format, va_list argptr);
  #endif
  #endif

+/* ralloc_size has always used calloc to allocate memory. This has allowed
+ * code using ralloc_size to depend on always receiving a cleared buffer.
+ *
+ * FIXME: Clean up the code base to allow this to be set to false, and then
+ * remove it altogether.
+ */
+static const bool always_allocate_zeroed_memory = true;
+
  #define CANARY 0x5A1106

  struct ralloc_header
@@ -110,7 +118,10 @@ ralloc_context(const void *ctx)
  void *
  ralloc_size(const void *ctx, size_t size)
  {
-   void *block = calloc(1, size + sizeof(ralloc_header));
+   void *block =
+  always_allocate_zeroed_memory ?
+  calloc(1, size + sizeof(ralloc_header)) :
+  malloc(size + sizeof(ralloc_header));


There's an integer overflow here which would be good to fix. Since it 
was there already in the older version, the patch is


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

as is.


 ralloc_header *info;
 ralloc_header *parent;

@@ -132,7 +143,7 @@ void *
  rzalloc_size(const void *ctx, size_t size)
  {
 void *ptr = ralloc_size(ctx, size);
-   if (likely(ptr != NULL))
+   if (!always_allocate_zeroed_memory && likely(ptr != NULL))
memset(ptr, 0, size);
 return ptr;
  }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/10] vbo: introduce a minmax_index cache (v3)

2016-01-22 Thread Nicolai Hähnle


On 11.01.2016 21:32, Nicolai Hähnle wrote:

here is a new version of the series. I'm calling it v3 even though I never
sent v2 because it contains v3 of the main patch. Changes relative to v1:

- disable cache on USAGE_TEXTURE_BUFFER and bool/GLboolean nit
- disable cache only temporarily when a persistent writable mapping exists
- add a MESA_NO_MINMAX_CACHE environment variable that disables caches
   completely
- track hits and misses and disable the cache on a per-BO basis if the
   hit rate is low

The last point is on a per-BO basis because applications don't always have
the same usage pattern for all BOs.

There is also a new util/hash_table patch which I added for an idea that I
ultimately rejected, but it turns out to be useful elsewhere, so I'm
sending it along while I'm at it.

I benchmarked this series with a couple of tests from PTS, using low
resolutions in an attempt to emphasize CPU usage. Their performance is
unaffected, with averages with/without this series within the measurement
tolerance of each other. That there's no regression is good news, and that
there's no improvement is to be expected - if one of these tests had been
affected by the problem this series addresses, someone else would probably
have done it much earlier ;)

Please review!


Ping... any opinions on this series? Other than Matt's coding style 
comment, I haven't received anything.


Nicolai


Thanks,
Nicolai
--
  docs/envvars.html |   1 +
  src/mesa/Makefile.sources |   1 +
  src/mesa/main/bufferobj.c |  59 +++-
  src/mesa/main/hash.c  |   4 +-
  src/mesa/main/mtypes.h|   9 +
  src/mesa/main/transformfeedback.h |   3 +
  src/mesa/vbo/vbo.h|   3 +
  src/mesa/vbo/vbo_exec_array.c | 148 --
  src/mesa/vbo/vbo_minmax_index.c   | 378 
  src/util/hash_table.c |  25 ++
  src/util/hash_table.h |   7 +
  src/util/tests/hash_table/Makefile.am |   1 +
  src/util/tests/hash_table/clear.c |  91 ++
  13 files changed, 570 insertions(+), 160 deletions(-)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] draw: use util_pstipple_* function for stipple pattern textures and samplers

2016-01-22 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This reduces code duplication.

Suggested-by: Jose Fonseca <jfons...@vmware.com>
---
 src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 121 +++-
 src/gallium/auxiliary/util/u_pstipple.c |   4 +-
 src/gallium/auxiliary/util/u_pstipple.h |   5 +
 3 files changed, 18 insertions(+), 112 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c 
b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index e468cc3..0298334 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -153,113 +153,6 @@ generate_pstip_fs(struct pstip_stage *pstip)
 
 
 /**
- * Load texture image with current stipple pattern.
- */
-static void
-pstip_update_texture(struct pstip_stage *pstip)
-{
-   static const uint bit31 = 1 << 31;
-   struct pipe_context *pipe = pstip->pipe;
-   struct pipe_transfer *transfer;
-   const uint *stipple = pstip->state.stipple->stipple;
-   uint i, j;
-   ubyte *data;
-
-   data = pipe_transfer_map(pipe, pstip->texture, 0, 0,
-PIPE_TRANSFER_WRITE, 0, 0, 32, 32, );
-
-   /*
-* Load alpha texture.
-* Note: 0 means keep the fragment, 255 means kill it.
-* We'll negate the texel value and use KILL_IF which kills if value
-* is negative.
-*/
-   for (i = 0; i < 32; i++) {
-  for (j = 0; j < 32; j++) {
- if (stipple[i] & (bit31 >> j)) {
-/* fragment "on" */
-data[i * transfer->stride + j] = 0;
- }
- else {
-/* fragment "off" */
-data[i * transfer->stride + j] = 255;
- }
-  }
-   }
-
-   /* unmap */
-   pipe_transfer_unmap(pipe, transfer);
-}
-
-
-/**
- * Create the texture map we'll use for stippling.
- */
-static boolean
-pstip_create_texture(struct pstip_stage *pstip)
-{
-   struct pipe_context *pipe = pstip->pipe;
-   struct pipe_screen *screen = pipe->screen;
-   struct pipe_resource texTemp;
-   struct pipe_sampler_view viewTempl;
-
-   memset(, 0, sizeof(texTemp));
-   texTemp.target = PIPE_TEXTURE_2D;
-   texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
-   texTemp.last_level = 0;
-   texTemp.width0 = 32;
-   texTemp.height0 = 32;
-   texTemp.depth0 = 1;
-   texTemp.array_size = 1;
-   texTemp.bind = PIPE_BIND_SAMPLER_VIEW;
-
-   pstip->texture = screen->resource_create(screen, );
-   if (pstip->texture == NULL)
-  return FALSE;
-
-   u_sampler_view_default_template(,
-   pstip->texture,
-   pstip->texture->format);
-   pstip->sampler_view = pipe->create_sampler_view(pipe,
-   pstip->texture,
-   );
-   if (!pstip->sampler_view) {
-  return FALSE;
-   }
-
-   return TRUE;
-}
-
-
-/**
- * Create the sampler CSO that'll be used for stippling.
- */
-static boolean
-pstip_create_sampler(struct pstip_stage *pstip)
-{
-   struct pipe_sampler_state sampler;
-   struct pipe_context *pipe = pstip->pipe;
-
-   memset(, 0, sizeof(sampler));
-   sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
-   sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
-   sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
-   sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
-   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
-   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
-   sampler.normalized_coords = 1;
-   sampler.min_lod = 0.0f;
-   sampler.max_lod = 0.0f;
-
-   pstip->sampler_cso = pipe->create_sampler_state(pipe, );
-   if (pstip->sampler_cso == NULL)
-  return FALSE;
-   
-   return TRUE;
-}
-
-
-/**
  * When we're about to draw our first stipple polygon in a batch, this function
  * is called to tell the driver to bind our modified fragment shader.
  */
@@ -537,7 +430,8 @@ pstip_set_polygon_stipple(struct pipe_context *pipe,
/* pass-through */
pstip->driver_set_polygon_stipple(pstip->pipe, stipple);
 
-   pstip_update_texture(pstip);
+   util_pstipple_update_stipple_texture(pstip->pipe, pstip->texture,
+pstip->state.stipple->stipple);
 }
 
 
@@ -573,10 +467,17 @@ draw_install_pstipple_stage(struct draw_context *draw,
pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple;
 
/* create special texture, sampler state */
-   if (!pstip_create_texture(pstip))
+   pstip->texture = util_pstipple_create_stipple_texture(pipe, NULL);
+   if (!pstip->texture)
+  goto fail;
+
+   pstip->sampler_view = util_pstipple_create_sampler_view(pipe,
+   pstip->texture);
+   if (!pstip->sampler_view)
   goto fail;
 
-   if (!pstip_create_sampler(pstip))
+   pstip->sampler_cso = util_pstipple_create

[Mesa-dev] [PATCH 1/2] draw: use util_pstipple_create_fragment_shader

2016-01-22 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This reduces code duplication. It also adds support for drivers where the
fragment position is a system value.

Suggested-by: Jose Fonseca <jfons...@vmware.com>
---
A basic polygon stippling test shows no regression on llvmpipe, but that's
the extent of my testing.

 src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 209 ++--
 1 file changed, 12 insertions(+), 197 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c 
b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index cf52ca4..e468cc3 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -43,10 +43,10 @@
 #include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_pstipple.h"
 #include "util/u_sampler.h"
 
 #include "tgsi/tgsi_transform.h"
-#include "tgsi/tgsi_dump.h"
 
 #include "draw_context.h"
 #include "draw_pipe.h"
@@ -114,178 +114,6 @@ struct pstip_stage
 };
 
 
-
-/**
- * Subclass of tgsi_transform_context, used for transforming the
- * user's fragment shader to add the extra texture sample and fragment kill
- * instructions.
- */
-struct pstip_transform_context {
-   struct tgsi_transform_context base;
-   uint tempsUsed;  /**< bitmask */
-   int wincoordInput;
-   int maxInput;
-   uint samplersUsed;  /**< bitfield of samplers used */
-   bool hasSview;
-   int freeSampler;  /** an available sampler for the pstipple */
-   int texTemp;  /**< temp registers */
-   int numImmed;
-};
-
-
-/**
- * TGSI declaration transform callback.
- * Look for a free sampler, a free input attrib, and two free temp regs.
- */
-static void
-pstip_transform_decl(struct tgsi_transform_context *ctx,
- struct tgsi_full_declaration *decl)
-{
-   struct pstip_transform_context *pctx = (struct pstip_transform_context *) 
ctx;
-
-   if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
-  uint i;
-  for (i = decl->Range.First;
-   i <= decl->Range.Last; i++) {
- pctx->samplersUsed |= 1 << i;
-  }
-   }
-   else if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
-  pctx->hasSview = true;
-   }
-   else if (decl->Declaration.File == TGSI_FILE_INPUT) {
-  pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last);
-  if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
- pctx->wincoordInput = (int) decl->Range.First;
-   }
-   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
-  uint i;
-  for (i = decl->Range.First;
-   i <= decl->Range.Last; i++) {
- pctx->tempsUsed |= (1 << i);
-  }
-   }
-
-   ctx->emit_declaration(ctx, decl);
-}
-
-
-/**
- * TGSI immediate declaration transform callback.
- * We're just counting the number of immediates here.
- */
-static void
-pstip_transform_immed(struct tgsi_transform_context *ctx,
-  struct tgsi_full_immediate *immed)
-{
-   struct pstip_transform_context *pctx = (struct pstip_transform_context *) 
ctx;
-   ctx->emit_immediate(ctx, immed); /* emit to output shader */
-   pctx->numImmed++;
-}
-
-
-/**
- * Find the lowest zero bit in the given word, or -1 if bitfield is all ones.
- */
-static int
-free_bit(uint bitfield)
-{
-   return ffs(~bitfield) - 1;
-}
-
-
-/**
- * TGSI transform prolog callback.
- */
-static void
-pstip_transform_prolog(struct tgsi_transform_context *ctx)
-{
-   struct pstip_transform_context *pctx = (struct pstip_transform_context *) 
ctx;
-   uint i;
-   int wincoordInput;
-
-   /* find free sampler */
-   pctx->freeSampler = free_bit(pctx->samplersUsed);
-   if (pctx->freeSampler >= PIPE_MAX_SAMPLERS)
-  pctx->freeSampler = PIPE_MAX_SAMPLERS - 1;
-
-   if (pctx->wincoordInput < 0)
-  wincoordInput = pctx->maxInput + 1;
-   else
-  wincoordInput = pctx->wincoordInput;
-
-   /* find one free temp reg */
-   for (i = 0; i < 32; i++) {
-  if ((pctx->tempsUsed & (1 << i)) == 0) {
-  /* found a free temp */
-  if (pctx->texTemp < 0)
- pctx->texTemp  = i;
-  else
- break;
-  }
-   }
-   assert(pctx->texTemp >= 0);
-
-   if (pctx->wincoordInput < 0) {
-  /* declare new position input reg */
-  tgsi_transform_input_decl(ctx, wincoordInput,
-TGSI_SEMANTIC_POSITION, 1,
-TGSI_INTERPOLATE_LINEAR);
-   }
-
-   /* declare new sampler */
-   tgsi_transform_sampler_decl(ctx, pctx->freeSampler);
-
-   /* if the src shader has SVIEW decl's for each SAMP decl, we
-* need to continue the trend and ensure there is a matching
-* SVIEW for the new SAMP we just created
-*/
-   if (pctx->hasSview) {
-  tgsi_transform_sampler_view_

[Mesa-dev] [PATCH v2 4/9] st/mesa: Accelerate PBO uploads

2016-01-21 Thread Nicolai Hähnle

From: Fredrik Höglund <fred...@kde.org>

Create a PIPE_BUFFER sampler view on the pixel-unpack buffer, and draw
the image on the texture with a fragment shader that maps fragment
coordinates to buffer coordinates.

Modifications by Nicolai Hähnle:
- various cleanups and fixes (e.g. error handling, corner cases)
- split try_pbo_upload into two functions, which will allow code to be
  shared with compressed texture uploads
- modify the source format selection to only test for support against
  the PIPE_BUFFER target

v2:
- update handling of TGSI_SEMANTIC_POSITION for recent changes in master
- MaxTextureBufferSize is number of texels, not bytes (Ilia Mirkin)
- only enable when integers are supported (Marek Olšák)
- try harder to hit the TextureBufferOffsetAlignment
- remove unnecessary MOV from the fragment shader
---
 src/mesa/state_tracker/st_cb_texture.c | 890 -
 src/mesa/state_tracker/st_cb_texture.h |   5 +
 src/mesa/state_tracker/st_context.c|   2 +
 src/mesa/state_tracker/st_context.h|  10 +
 4 files changed, 898 insertions(+), 9 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 0e3a030..9d90d03 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -60,6 +60,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "util/u_inlines.h"
+#include "util/u_upload_mgr.h"
 #include "pipe/p_shader_tokens.h"
 #include "util/u_tile.h"
 #include "util/u_format.h"
@@ -67,6 +68,9 @@
 #include "util/u_sampler.h"
 #include "util/u_math.h"
 #include "util/u_box.h"
+#include "util/u_simple_shaders.h"
+#include "cso_cache/cso_context.h"
+#include "tgsi/tgsi_ureg.h"
 
 #define DBG if (0) printf
 
@@ -686,6 +690,865 @@ st_get_blit_mask(GLenum srcFormat, GLenum dstFormat)
}
 }
 
+void
+st_init_pbo_upload(struct st_context *st)
+{
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_screen *screen = pipe->screen;
+
+   st->pbo_upload.enabled =
+  screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS) &&
+  screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT) >= 1 
&&
+  screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, 
PIPE_SHADER_CAP_INTEGERS);
+   if (!st->pbo_upload.enabled)
+  return;
+
+   st->pbo_upload.rgba_only =
+  screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY);
+
+   /* Create the vertex shader */
+   {
+  unsigned semantic_names[] = { TGSI_SEMANTIC_POSITION };
+  unsigned semantic_indexes[] = { 0 };
+
+  st->pbo_upload.vs = util_make_vertex_passthrough_shader(pipe, 1,
+  semantic_names,
+  semantic_indexes,
+  FALSE);
+   }
+
+   /* Blend state */
+   memset(>pbo_upload.blend, 0, sizeof(struct pipe_blend_state));
+   st->pbo_upload.blend.rt[0].colormask = PIPE_MASK_RGBA;
+
+   /* Rasterizer state */
+   memset(>pbo_upload.raster, 0, sizeof(struct pipe_rasterizer_state));
+   st->pbo_upload.raster.half_pixel_center = 1;
+}
+
+void
+st_destroy_pbo_upload(struct st_context *st)
+{
+   if (st->pbo_upload.fs) {
+  cso_delete_fragment_shader(st->cso_context, st->pbo_upload.fs);
+  st->pbo_upload.fs = NULL;
+   }
+
+   if (st->pbo_upload.vs) {
+  cso_delete_vertex_shader(st->cso_context, st->pbo_upload.vs);
+  st->pbo_upload.vs = NULL;
+   }
+}
+
+/**
+ * Converts format to a format with the same components, types
+ * and sizes, but with the components in RGBA order.
+ */
+static enum pipe_format
+unswizzle_format(enum pipe_format format)
+{
+   switch (format)
+   {
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_A8B8G8R8_UNORM:
+  return PIPE_FORMAT_R8G8B8A8_UNORM;
+
+   case PIPE_FORMAT_B10G10R10A2_UNORM:
+  return PIPE_FORMAT_R10G10B10A2_UNORM;
+
+   case PIPE_FORMAT_B10G10R10A2_SNORM:
+  return PIPE_FORMAT_R10G10B10A2_SNORM;
+
+   case PIPE_FORMAT_B10G10R10A2_UINT:
+  return PIPE_FORMAT_R10G10B10A2_UINT;
+
+   default:
+  return format;
+   }
+}
+
+/**
+ * Converts PIPE_FORMAT_A* to PIPE_FORMAT_R*.
+ */
+static enum pipe_format
+alpha_to_red(enum pipe_format format)
+{
+   switch (format)
+   {
+   case PIPE_FORMAT_A8_UNORM:
+  return PIPE_FORMAT_R8_UNORM;
+   case PIPE_FORMAT_A8_SNORM:
+  return PIPE_FORMAT_R8_SNORM;
+   case PIPE_FORMAT_A8_UINT:
+  return PIPE_FORMAT_R8_UINT;
+   case PIPE_FORMAT_A8_SINT:
+  return PIPE_FORMAT_R8_SINT;
+
+   case PIPE_FORMAT_A16_UNORM:
+  return PIPE_FORMAT_R16_UNORM;
+   case PIPE_FORMAT_A16_SNORM:
+  return PIPE_FORMAT_R16

[Mesa-dev] [PATCH v2 5/9] st/mesa: implement PBO upload for multiple layers

2016-01-21 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Use instancing to generate two triangles for each destination layer and use
a geometry shader to route the layer index.

v2:
- directly write layer in VS if supported by the driver (Marek Olšák)
---
 src/mesa/state_tracker/st_cb_texture.c | 159 +
 src/mesa/state_tracker/st_context.h|   3 +
 2 files changed, 143 insertions(+), 19 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 9d90d03..2321ff8 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -706,15 +706,13 @@ st_init_pbo_upload(struct st_context *st)
st->pbo_upload.rgba_only =
   screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY);
 
-   /* Create the vertex shader */
-   {
-  unsigned semantic_names[] = { TGSI_SEMANTIC_POSITION };
-  unsigned semantic_indexes[] = { 0 };
-
-  st->pbo_upload.vs = util_make_vertex_passthrough_shader(pipe, 1,
-  semantic_names,
-  semantic_indexes,
-  FALSE);
+   if (screen->get_param(screen, PIPE_CAP_TGSI_INSTANCEID)) {
+  if (screen->get_param(screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT)) {
+ st->pbo_upload.upload_layers = true;
+  } else if (screen->get_param(screen, 
PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES) >= 3) {
+ st->pbo_upload.upload_layers = true;
+ st->pbo_upload.use_gs = true;
+  }
}
 
/* Blend state */
@@ -734,6 +732,11 @@ st_destroy_pbo_upload(struct st_context *st)
   st->pbo_upload.fs = NULL;
}
 
+   if (st->pbo_upload.gs) {
+  cso_delete_geometry_shader(st->cso_context, st->pbo_upload.gs);
+  st->pbo_upload.gs = NULL;
+   }
+
if (st->pbo_upload.vs) {
   cso_delete_vertex_shader(st->cso_context, st->pbo_upload.vs);
   st->pbo_upload.vs = NULL;
@@ -1120,7 +1123,92 @@ reinterpret_formats(enum pipe_format *src_format, enum 
pipe_format *dst_format)
 }
 
 static void *
-create_pbo_upload_shader(struct st_context *st)
+create_pbo_upload_vs(struct st_context *st)
+{
+   struct ureg_program *ureg;
+   struct ureg_src in_pos;
+   struct ureg_src in_instanceid;
+   struct ureg_dst out_pos;
+   struct ureg_dst out_layer;
+
+   ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
+
+   in_pos = ureg_DECL_vs_input(ureg, TGSI_SEMANTIC_POSITION);
+
+   out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+
+   if (st->pbo_upload.upload_layers) {
+  in_instanceid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 
0);
+
+  if (!st->pbo_upload.use_gs)
+ out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
+   }
+
+   /* out_pos = in_pos */
+   ureg_MOV(ureg, out_pos, in_pos);
+
+   if (st->pbo_upload.upload_layers) {
+  if (st->pbo_upload.use_gs) {
+ /* out_pos.z = i2f(gl_InstanceID) */
+ ureg_I2F(ureg, ureg_writemask(out_pos, TGSI_WRITEMASK_Z),
+ureg_scalar(in_instanceid, TGSI_SWIZZLE_X));
+  } else {
+ /* out_layer = gl_InstanceID */
+ ureg_MOV(ureg, out_layer, in_instanceid);
+  }
+   }
+
+   ureg_END(ureg);
+
+   return ureg_create_shader_and_destroy(ureg, st->pipe);
+}
+
+static void *
+create_pbo_upload_gs(struct st_context *st)
+{
+   static const int zero = 0;
+   struct ureg_program *ureg;
+   struct ureg_dst out_pos;
+   struct ureg_dst out_layer;
+   struct ureg_src in_pos;
+   struct ureg_src imm;
+   unsigned i;
+
+   ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
+   if (!ureg)
+  return NULL;
+
+   ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_TRIANGLES);
+   ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_TRIANGLE_STRIP);
+   ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 3);
+
+   out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+   out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
+
+   in_pos = ureg_DECL_input(ureg, TGSI_SEMANTIC_POSITION, 0, 0, 1);
+
+   imm = ureg_DECL_immediate_int(ureg, , 1);
+
+   for (i = 0; i < 3; ++i) {
+  struct ureg_src in_pos_vertex = ureg_src_dimension(in_pos, i);
+
+  /* out_pos = in_pos[i] */
+  ureg_MOV(ureg, out_pos, in_pos_vertex);
+
+  /* out_layer.x = f2i(in_pos[i].z) */
+  ureg_F2I(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X),
+ ureg_scalar(in_pos_vertex, TGSI_SWIZZLE_Z));
+
+  ureg_EMIT(ureg, ureg_scalar(imm, TGSI_SWIZZLE_X));
+   }
+
+   ureg_END(ureg);
+
+   return ureg_create_shader_and_destroy(ureg, st->pipe);
+}
+
+static void *
+create_pbo_upload_fs(struct st_context *st)
 {
struct pipe_context *pipe = st->pipe;
struct pipe_screen *screen = pipe->screen;
@@ -1128,10 +1216,14 @@ create_pbo

[Mesa-dev] [PATCH v2 7/9] st/mesa: redirect CompressedTexSubImage to our own implementation

2016-01-21 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This is where PBO upload will go.
---
 src/mesa/state_tracker/st_cb_texture.c | 24 ++--
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index b33779c..4d996e3 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1921,6 +1921,18 @@ st_TexImage(struct gl_context * ctx, GLuint dims,
 
 
 static void
+st_CompressedTexSubImage(struct gl_context *ctx, GLuint dims,
+ struct gl_texture_image *texImage,
+ GLint x, GLint y, GLint z,
+ GLsizei w, GLsizei h, GLsizei d,
+ GLenum format, GLsizei imageSize, const GLvoid *data)
+{
+   _mesa_store_compressed_texsubimage(ctx, dims, texImage,
+  x, y, z, w, h, d,
+  format, imageSize, data);
+}
+
+static void
 st_CompressedTexImage(struct gl_context *ctx, GLuint dims,
   struct gl_texture_image *texImage,
   GLsizei imageSize, const GLvoid *data)
@@ -1948,11 +1960,11 @@ st_CompressedTexImage(struct gl_context *ctx, GLuint 
dims,
   return;
}
 
-   _mesa_store_compressed_texsubimage(ctx, dims, texImage,
-  0, 0, 0,
-  texImage->Width, texImage->Height, 
texImage->Depth,
-  texImage->TexFormat,
-  imageSize, data);
+   st_CompressedTexSubImage(ctx, dims, texImage,
+0, 0, 0,
+texImage->Width, texImage->Height, texImage->Depth,
+texImage->TexFormat,
+imageSize, data);
 }
 
 
@@ -2977,7 +2989,7 @@ st_init_texture_functions(struct dd_function_table 
*functions)
functions->QuerySamplesForFormat = st_QuerySamplesForFormat;
functions->TexImage = st_TexImage;
functions->TexSubImage = st_TexSubImage;
-   functions->CompressedTexSubImage = _mesa_store_compressed_texsubimage;
+   functions->CompressedTexSubImage = st_CompressedTexSubImage;
functions->CopyTexSubImage = st_CopyTexSubImage;
functions->GenerateMipmap = st_generate_mipmap;
 
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 3/9] st/mesa: use the correct address generation functions in st_TexSubImage blit

2016-01-21 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

We need to tell the address generation functions about the dimensionality of
the texture to correctly implement the part of Section 3.8.1 (Texture Image
Specification) of the OpenGL 2.1 specification which says:

"For the purposes of decoding the texture image, TexImage2D is
equivalent to calling TexImage3D with corresponding arguments
and depth of 1, except that
  ...
  * UNPACK SKIP IMAGES is ignored."

Fixes a low impact bug that was found by chance while browsing the spec and
extending piglit tests.

Cc: "11.0 11.1" <mesa-sta...@lists.freedesktop.org>
---
 src/mesa/state_tracker/st_cb_texture.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 0ceb370..0e3a030 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -849,18 +849,18 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
 /* 1D array textures.
  * We need to convert gallium coords to GL coords.
  */
-GLvoid *src = _mesa_image_address3d(unpack, pixels,
+GLvoid *src = _mesa_image_address2d(unpack, pixels,
 width, depth, format,
-type, 0, slice, 0);
+type, slice, 0);
 memcpy(map, src, bytesPerRow);
  }
  else {
 ubyte *slice_map = map;
 
 for (row = 0; row < (unsigned) height; row++) {
-   GLvoid *src = _mesa_image_address3d(unpack, pixels,
-   width, height, format,
-   type, slice, row, 0);
+   GLvoid *src = _mesa_image_address(dims, unpack, pixels,
+ width, height, format,
+ type, slice, row, 0);
memcpy(slice_map, src, bytesPerRow);
slice_map += transfer->stride;
 }
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 8/9] st/mesa: implement PBO upload for glCompressedTex(Sub)Image

2016-01-21 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

v2:
- use st->pbo_upload.enabled flag
---
 src/mesa/state_tracker/st_cb_texture.c | 115 +
 1 file changed, 115 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 4d996e3..e9ac9a6 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1927,6 +1927,121 @@ st_CompressedTexSubImage(struct gl_context *ctx, GLuint 
dims,
  GLsizei w, GLsizei h, GLsizei d,
  GLenum format, GLsizei imageSize, const GLvoid *data)
 {
+   struct st_context *st = st_context(ctx);
+   struct st_texture_image *stImage = st_texture_image(texImage);
+   struct st_texture_object *stObj = st_texture_object(texImage->TexObject);
+   struct pipe_resource *texture = stImage->pt;
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_screen *screen = pipe->screen;
+   struct pipe_resource *dst = stImage->pt;
+   struct pipe_surface *surface = NULL;
+   struct compressed_pixelstore store;
+   enum pipe_format copy_format;
+   unsigned bytes_per_block;
+   unsigned bw, bh;
+   intptr_t buf_offset;
+   bool success = false;
+
+   /* Check basic pre-conditions for PBO upload */
+   if (!st->prefer_blit_based_texture_transfer) {
+  goto fallback;
+   }
+
+   if (!_mesa_is_bufferobj(ctx->Unpack.BufferObj))
+  goto fallback;
+
+   if ((_mesa_is_format_etc2(texImage->TexFormat) && !st->has_etc2) ||
+   (texImage->TexFormat == MESA_FORMAT_ETC1_RGB8 && !st->has_etc1)) {
+  /* ETC isn't supported and is represented by uncompressed formats. */
+  goto fallback;
+   }
+
+   if (!dst) {
+  goto fallback;
+   }
+
+   if (!st->pbo_upload.enabled ||
+   !screen->get_param(screen, PIPE_CAP_SURFACE_REINTERPRET_BLOCKS)) {
+  goto fallback;
+   }
+
+   /* Choose the pipe format for the upload. */
+   bytes_per_block = util_format_get_blocksize(dst->format);
+   bw = util_format_get_blockwidth(dst->format);
+   bh = util_format_get_blockheight(dst->format);
+
+   switch (bytes_per_block) {
+   case 8:
+  copy_format = PIPE_FORMAT_R16G16B16A16_UINT;
+  break;
+   case 16:
+  copy_format = PIPE_FORMAT_R32G32B32A32_UINT;
+  break;
+   default:
+  goto fallback;
+   }
+
+   if (!screen->is_format_supported(screen, copy_format, PIPE_BUFFER, 0,
+PIPE_BIND_SAMPLER_VIEW)) {
+  goto fallback;
+   }
+
+   if (!screen->is_format_supported(screen, copy_format, dst->target,
+dst->nr_samples, PIPE_BIND_RENDER_TARGET)) 
{
+  goto fallback;
+   }
+
+   /* Interpret the pixelstore settings. */
+   _mesa_compute_compressed_pixelstore(dims, texImage->TexFormat, w, h, d,
+   >Unpack, );
+   assert(store.CopyBytesPerRow % bytes_per_block == 0);
+   assert(store.SkipBytes % bytes_per_block == 0);
+
+   /* Compute the offset into the buffer */
+   buf_offset = (intptr_t)data + store.SkipBytes;
+
+   if (buf_offset % bytes_per_block) {
+  goto fallback;
+   }
+
+   buf_offset = buf_offset / bytes_per_block;
+
+   /* Set up the surface. */
+   {
+  unsigned level = stObj->pt != stImage->pt ? 0 : 
texImage->TexObject->MinLevel + texImage->Level;
+  unsigned max_layer = util_max_layer(texture, level);
+
+  z += texImage->Face + texImage->TexObject->MinLayer;
+
+  struct pipe_surface templ;
+  memset(, 0, sizeof(templ));
+  templ.format = copy_format;
+  templ.u.tex.level = level;
+  templ.u.tex.first_layer = MIN2(z, max_layer);
+  templ.u.tex.last_layer = MIN2(z + d - 1, max_layer);
+
+  surface = pipe->create_surface(pipe, texture, );
+  if (!surface)
+ goto fallback;
+   }
+
+   success = try_pbo_upload_common(ctx, surface,
+   x / bw, y / bh,
+   store.CopyBytesPerRow / bytes_per_block,
+   store.CopyRowsPerSlice,
+   
st_buffer_object(ctx->Unpack.BufferObj)->buffer,
+   copy_format,
+   buf_offset,
+   bytes_per_block,
+   store.TotalBytesPerRow / bytes_per_block,
+   store.TotalRowsPerSlice);
+
+   pipe_surface_reference(, NULL);
+
+   if (success)
+  return;
+
+fallback:
_mesa_store_compressed_texsubimage(ctx, dims, texImage,
   x, y, z, w, h, d,
   format, imageSize, data);
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 1/9] gallium: Add PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY

2016-01-21 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This cap indicates that the driver only supports R, RG, RGB and RGBA
formats for PIPE_BUFFER sampler views.

v2: move into "unsupported features" section for nouveau (Ilia Mirkin)
---
 src/gallium/docs/source/screen.rst   | 4 
 src/gallium/drivers/freedreno/freedreno_screen.c | 2 ++
 src/gallium/drivers/i915/i915_screen.c   | 1 +
 src/gallium/drivers/ilo/ilo_screen.c | 2 ++
 src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
 src/gallium/drivers/r300/r300_screen.c   | 1 +
 src/gallium/drivers/r600/r600_pipe.c | 3 +++
 src/gallium/drivers/radeonsi/si_pipe.c   | 3 +++
 src/gallium/drivers/softpipe/sp_screen.c | 2 ++
 src/gallium/drivers/svga/svga_screen.c   | 1 +
 src/gallium/drivers/vc4/vc4_screen.c | 1 +
 src/gallium/drivers/virgl/virgl_screen.c | 2 ++
 src/gallium/include/pipe/p_defines.h | 1 +
 16 files changed, 27 insertions(+)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index d7ea123..06cb56d 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -138,6 +138,10 @@ The integer capabilities:
 * ``PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT``: Describes the required
   alignment for pipe_sampler_view::u.buf.first_element, in bytes.
   If a driver does not support first/last_element, it should return 0.
+* ``PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY``: Whether the driver only
+  supports R, RG, RGB and RGBA formats for PIPE_BUFFER sampler views.
+  When this is the case it should be assumed that the swizzle parameters
+  in the sampler view have no effect.
 * ``PIPE_CAP_TGSI_TEXCOORD``: This CAP describes a hw limitation.
   If true, the hardware cannot replace arbitrary shader inputs with sprite
   coordinates and hence the inputs that are desired to be replaceable must
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index 6562924..69a55d4 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -182,6 +182,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_CLIP_HALFZ:
return is_a3xx(screen) || is_a4xx(screen);
 
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+   return 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
if (is_a3xx(screen)) return 16;
if (is_a4xx(screen)) return 32;
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index ede5558..fc4708c 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -261,6 +261,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
   return 0;
 
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c 
b/src/gallium/drivers/ilo/ilo_screen.c
index fa32757..7711c19 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -428,6 +428,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
   return true;
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+  return 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
   return 1;
case PIPE_CAP_TGSI_TEXCOORD:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index fb52f5d..e9078e5 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -310,6 +310,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
   return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 90f..0569b57 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -183,6 +183,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
   return 0;
 
case PIPE_CAP_VENDOR_ID:

[Mesa-dev] [PATCH v2 0/9] st/mesa: accelerate texture uploads from PBOs

2016-01-21 Thread Nicolai Hähnle

Hi everybody,

here's an updated version of the series.

I decided to keep BUFFER_SAMPLER_VIEW_RGBA_ONLY as is, following Fredrik's
point that it affects not only the sampler swizzle but also the texture
format itself.

The major functionality changes are that we now try to fulfill larger
alignments by adjusting the buf_offset appropriately (this is not needed
for radeonsi, but I did some basic tests to make sure this works) and
we don't use a geometry shader if the driver can handle layer writes
in the VS.

Please review.

Thanks,
Nicolai
--
 src/gallium/docs/source/screen.rst   |   11 +
 .../drivers/freedreno/freedreno_screen.c |3 +
 src/gallium/drivers/i915/i915_screen.c   |2 +
 src/gallium/drivers/ilo/ilo_screen.c |3 +
 src/gallium/drivers/llvmpipe/lp_screen.c |2 +
 .../drivers/nouveau/nv30/nv30_screen.c   |2 +
 .../drivers/nouveau/nv50/nv50_screen.c   |2 +
 .../drivers/nouveau/nvc0/nvc0_screen.c   |2 +
 src/gallium/drivers/r300/r300_screen.c   |2 +
 src/gallium/drivers/r600/r600_pipe.c |4 +
 src/gallium/drivers/radeon/r600_texture.c|   26 +-
 src/gallium/drivers/radeonsi/si_pipe.c   |4 +
 src/gallium/drivers/softpipe/sp_screen.c |3 +
 src/gallium/drivers/svga/svga_screen.c   |2 +
 src/gallium/drivers/vc4/vc4_screen.c |2 +
 src/gallium/drivers/virgl/virgl_screen.c |3 +
 src/gallium/include/pipe/p_defines.h |2 +
 src/mesa/state_tracker/st_cb_texture.c   | 1178 +++-
 src/mesa/state_tracker/st_cb_texture.h   |5 +
 src/mesa/state_tracker/st_context.c  |2 +
 src/mesa/state_tracker/st_context.h  |   13 +
 21 files changed, 1254 insertions(+), 19 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 2/9] gallium: Add PIPE_CAP_SURFACE_REINTERPRET_BLOCKS

2016-01-21 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This cap indicates whether pipe->create_surface can reinterpret a texture
as a surface with a format of different block width/height (but equal
block size).

v2: fix whitespace
---
 src/gallium/docs/source/screen.rst   | 7 +++
 src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
 src/gallium/drivers/i915/i915_screen.c   | 1 +
 src/gallium/drivers/ilo/ilo_screen.c | 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
 src/gallium/drivers/r300/r300_screen.c   | 1 +
 src/gallium/drivers/r600/r600_pipe.c | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
 src/gallium/drivers/softpipe/sp_screen.c | 1 +
 src/gallium/drivers/svga/svga_screen.c   | 1 +
 src/gallium/drivers/vc4/vc4_screen.c | 1 +
 src/gallium/drivers/virgl/virgl_screen.c | 1 +
 src/gallium/include/pipe/p_defines.h | 1 +
 16 files changed, 22 insertions(+)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 06cb56d..ad1d16f 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -309,6 +309,13 @@ The integer capabilities:
   for buffers is supported.
 * ``PIPE_CAP_GENERATE_MIPMAP``: Indicates whether pipe_context::generate_mipmap
   is supported.
+* ``PIPE_CAP_SURFACE_REINTERPRET_BLOCKS``: Indicates whether
+  pipe_context::create_surface supports reinterpreting a texture as a surface
+  of a format with different block width/height (but same block size in bits).
+  For example, a compressed texture image can be interpreted as a
+  non-compressed surface whose texels are the same number of bits as the
+  compressed blocks, and vice versa. The width and height of the surface is
+  adjusted appropriately.
 
 
 .. _pipe_capf:
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index 69a55d4..59ebbec 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -249,6 +249,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
return 0;
 
case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index fc4708c..eb3e3d1 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -262,6 +262,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
   return 0;
 
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c 
b/src/gallium/drivers/ilo/ilo_screen.c
index 7711c19..1efd789 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -487,6 +487,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index e9078e5..591c45f 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -311,6 +311,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
   return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 0569b57..cf4cca3 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -184,6 +184,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 771de55..ccc134b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nou

[Mesa-dev] [PATCH v2 6/9] st/mesa: inline the implementation of _mesa_store_compressed_teximage

2016-01-21 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

We will write our own version of texsubimage for PBO uploads, and we will
want to call that here as well.
---
 src/mesa/state_tracker/st_cb_texture.c | 28 +++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 2321ff8..b33779c 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1926,7 +1926,33 @@ st_CompressedTexImage(struct gl_context *ctx, GLuint 
dims,
   GLsizei imageSize, const GLvoid *data)
 {
prep_teximage(ctx, texImage, GL_NONE, GL_NONE);
-   _mesa_store_compressed_teximage(ctx, dims, texImage, imageSize, data);
+
+   /* only 2D and 3D compressed images are supported at this time */
+   if (dims == 1) {
+  _mesa_problem(ctx, "Unexpected glCompressedTexImage1D call");
+  return;
+   }
+
+   /* This is pretty simple, because unlike the general texstore path we don't
+* have to worry about the usual image unpacking or image transfer
+* operations.
+*/
+   assert(texImage);
+   assert(texImage->Width > 0);
+   assert(texImage->Height > 0);
+   assert(texImage->Depth > 0);
+
+   /* allocate storage for texture data */
+   if (!st_AllocTextureImageBuffer(ctx, texImage)) {
+  _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage%uD", dims);
+  return;
+   }
+
+   _mesa_store_compressed_texsubimage(ctx, dims, texImage,
+  0, 0, 0,
+  texImage->Width, texImage->Height, 
texImage->Depth,
+  texImage->TexFormat,
+  imageSize, data);
 }
 
 
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/10] vbo: introduce a minmax_index cache (v3)

2016-01-22 Thread Nicolai Hähnle


On 22.01.2016 15:17, Marek Olšák wrote:

On Fri, Jan 22, 2016 at 6:17 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

On 11.01.2016 21:32, Nicolai Hähnle wrote:


here is a new version of the series. I'm calling it v3 even though I never
sent v2 because it contains v3 of the main patch. Changes relative to v1:

- disable cache on USAGE_TEXTURE_BUFFER and bool/GLboolean nit
- disable cache only temporarily when a persistent writable mapping exists
- add a MESA_NO_MINMAX_CACHE environment variable that disables caches
completely
- track hits and misses and disable the cache on a per-BO basis if the
hit rate is low

The last point is on a per-BO basis because applications don't always have
the same usage pattern for all BOs.

There is also a new util/hash_table patch which I added for an idea that I
ultimately rejected, but it turns out to be useful elsewhere, so I'm
sending it along while I'm at it.

I benchmarked this series with a couple of tests from PTS, using low
resolutions in an attempt to emphasize CPU usage. Their performance is
unaffected, with averages with/without this series within the measurement
tolerance of each other. That there's no regression is good news, and that
there's no improvement is to be expected - if one of these tests had been
affected by the problem this series addresses, someone else would probably
have done it much earlier ;)

Please review!



Ping... any opinions on this series? Other than Matt's coding style comment,
I haven't received anything.


I'm looking at your FDO repo and some patches have my Rbs. These that don't:

util/hash_table: add _mesa_hash_table_num_entries
mesa: add USAGE_DISABLE_MINMAX_CACHE flag to buffer UsageHistory
vbo: disable the minmax cache when the hit rate is low
mesa: add MESA_NO_MINMAX_CACHE environment variable

are:
Reviewed-by: Marek Olšák <marek.ol...@amd.com>

Anything else?


Thanks. I'll wait to see if Ian has anything else to say after his 
comments on the first version of the series, so I'll give it until some 
time next week.


Cheers,
Nicolai



Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] winsys/amdgpu: optionally use buffer lists with all allocated buffers

2016-01-22 Thread Nicolai Hähnle


On 22.01.2016 15:19, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

Set RADEON_ALL_BOS=1 to use it.


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


---
  src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 21 +
  src/gallium/winsys/amdgpu/drm/amdgpu_bo.h |  2 ++
  src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 33 ---
  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c |  3 +++
  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h |  5 
  5 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 82c803b..30a1aa8 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -128,6 +128,11 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
 int i;

+   pipe_mutex_lock(bo->ws->global_bo_list_lock);
+   LIST_DEL(>global_list_item);
+   bo->ws->num_buffers--;
+   pipe_mutex_unlock(bo->ws->global_bo_list_lock);
+
 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
 amdgpu_va_range_free(bo->va_handle);
 amdgpu_bo_free(bo->bo);
@@ -249,6 +254,16 @@ static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
 /* other functions are never called */
  };

+static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo)
+{
+   struct amdgpu_winsys *ws = bo->ws;
+
+   pipe_mutex_lock(ws->global_bo_list_lock);
+   LIST_ADDTAIL(>global_list_item, >global_bo_list);
+   ws->num_buffers++;
+   pipe_mutex_unlock(ws->global_bo_list_lock);
+}
+
  static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
   unsigned size,
   unsigned alignment,
@@ -319,6 +334,8 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct 
amdgpu_winsys *ws,
 else if (initial_domain & RADEON_DOMAIN_GTT)
ws->allocated_gtt += align(size, ws->gart_page_size);

+   amdgpu_add_buffer_to_global_list(bo);
+
 return bo;

  error_va_map:
@@ -588,6 +605,8 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct 
radeon_winsys *rws,
 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);

+   amdgpu_add_buffer_to_global_list(bo);
+
 return >base;

  error_va_map:
@@ -673,6 +692,8 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct 
radeon_winsys *rws,

  ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);

+amdgpu_add_buffer_to_global_list(bo);
+
  return (struct pb_buffer*)bo;

  error_va_map:
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
index 12cb920..54f5dbd 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -60,6 +60,8 @@ struct amdgpu_winsys_bo {

 /* Fences for buffer synchronization. */
 struct pipe_fence_handle *fence[RING_LAST];
+
+   struct list_head global_list_item;
  };

  bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 10f112d..83da740 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -605,6 +605,7 @@ static void amdgpu_cs_sync_flush(struct radeon_winsys_cs 
*rcs)
  }

  DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", FALSE)

  static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
  unsigned flags,
@@ -644,9 +645,35 @@ static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
 if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && 
!debug_get_option_noop()) {
int r;

-  r = amdgpu_bo_list_create(ws->dev, cs->num_buffers,
-cs->handles, cs->flags,
->request.resources);
+  /* Use a buffer list containing all allocated buffers if requested. */
+  if (debug_get_option_all_bos()) {
+ struct amdgpu_winsys_bo *bo;
+ amdgpu_bo_handle *handles;
+ unsigned num = 0;
+
+ pipe_mutex_lock(ws->global_bo_list_lock);
+
+ handles = malloc(sizeof(handles[0]) * ws->num_buffers);
+ if (!handles) {
+pipe_mutex_unlock(ws->global_bo_list_lock);
+goto cleanup;
+ }
+
+ LIST_FOR_EACH_ENTRY(bo, >global_bo_list, global_list_item) {
+assert(num < ws->num_buffers);
+handles[num++] = bo->bo;
+ }
+
+ r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
+

Re: [Mesa-dev] [PATCH 1/1] configure: Bail out on llvm-config component error

2016-02-15 Thread Nicolai Hähnle


On 12.02.2016 19:41, Jan Vesely wrote:

Signed-off-by: Jan Vesely <jan.ves...@rutgers.edu>
---
  configure.ac | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/configure.ac b/configure.ac
index 2750d4d..57330cb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2295,6 +2295,9 @@ dnl in LLVM_LIBS.

  if test "x$MESA_LLVM" != x0; then

+if ! $LLVM_CONFIG --libs ${LLVM_COMPONENTS} >/dev/null; then
+   AC_MSG_ERROR([Calling ${LLVM_CONFIG} failed])
+fi
  LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"

  dnl llvm-config may not give the right answer when llvm is a built as a



+1 for making the build process more user-friendly.

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] st/mesa: new st_DrawAtlasBitmaps() function for drawing bitmap text

2016-02-15 Thread Nicolai Hähnle


On 12.02.2016 20:07, Brian Paul wrote:

This basically saves the current pipeline state, sets up state for
rendering, constructs a set of textured quads, renders, then restores
the previous pipeline state.

It shouldn't be hard to implement a similar function for non-gallium
drives.  With some code refactoring, the vertex definition code could
probably be shared.


Except for the potential (trivial) interaction with my comments on the 
first patch, this is


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


---
  src/mesa/state_tracker/st_cb_bitmap.c | 143 +-
  src/mesa/state_tracker/st_context.h   |   1 +
  2 files changed, 141 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_bitmap.c 
b/src/mesa/state_tracker/st_cb_bitmap.c
index d84bfef..461159b 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -33,6 +33,7 @@
  #include "main/imports.h"
  #include "main/image.h"
  #include "main/bufferobj.h"
+#include "main/dlist.h"
  #include "main/macros.h"
  #include "main/pbo.h"
  #include "program/program.h"
@@ -51,6 +52,7 @@
  #include "pipe/p_shader_tokens.h"
  #include "util/u_inlines.h"
  #include "util/u_simple_shaders.h"
+#include "util/u_upload_mgr.h"
  #include "program/prog_instruction.h"
  #include "cso_cache/cso_context.h"

@@ -182,7 +184,8 @@ make_bitmap_texture(struct gl_context *ctx, GLsizei width, 
GLsizei height,
  static void
  setup_render_state(struct gl_context *ctx,
 struct pipe_sampler_view *sv,
-   const GLfloat *color)
+   const GLfloat *color,
+   bool atlas)
  {
 struct st_context *st = st_context(ctx);
 struct cso_context *cso = st->cso_context;
@@ -249,7 +252,10 @@ setup_render_state(struct gl_context *ctx,
for (i = 0; i < st->state.num_samplers[PIPE_SHADER_FRAGMENT]; i++) {
   samplers[i] = >state.samplers[PIPE_SHADER_FRAGMENT][i];
}
-  samplers[fpv->bitmap_sampler] = >bitmap.sampler;
+  if (atlas)
+ samplers[fpv->bitmap_sampler] = >bitmap.atlas_sampler;
+  else
+ samplers[fpv->bitmap_sampler] = >bitmap.sampler;
cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, num,
 (const struct pipe_sampler_state **) samplers);
 }
@@ -324,7 +330,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, 
GLfloat z,
assert(height <= (GLsizei) maxSize);
 }

-   setup_render_state(ctx, sv, color);
+   setup_render_state(ctx, sv, color, false);

 /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
 z = z * 2.0f - 1.0f;
@@ -571,6 +577,9 @@ init_bitmap_state(struct st_context *st)
 st->bitmap.sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
 st->bitmap.sampler.normalized_coords = st->internal_target == 
PIPE_TEXTURE_2D;

+   st->bitmap.atlas_sampler = st->bitmap.sampler;
+   st->bitmap.atlas_sampler.normalized_coords = 0;
+
 /* init baseline rasterizer state once */
 memset(>bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer));
 st->bitmap.rasterizer.half_pixel_center = 1;
@@ -665,11 +674,139 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
  }


+/**
+ * Called via ctx->Driver.DrawAtlasBitmap()
+ */
+static void
+st_DrawAtlasBitmaps(struct gl_context *ctx,
+const struct gl_bitmap_atlas *atlas,
+GLuint count, const GLubyte *ids)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct st_texture_object *stObj = st_texture_object(atlas->texObj);
+   struct pipe_sampler_view *sv;
+   /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
+   const float z = ctx->Current.RasterPos[2] * 2.0f - 1.0f;
+   const float *color = ctx->Current.RasterColor;
+   const float clip_x_scale = 2.0f / st->state.framebuffer.width;
+   const float clip_y_scale = 2.0f / st->state.framebuffer.height;
+   const unsigned num_verts = count * 4;
+   const unsigned num_vert_bytes = num_verts * sizeof(struct st_util_vertex);
+   struct st_util_vertex *verts;
+   struct pipe_vertex_buffer vb = {0};
+   unsigned i;
+
+   if (!st->bitmap.cache) {
+  init_bitmap_state(st);
+   }
+
+   st_flush_bitmap_cache(st);
+
+   st_validate_state(st);
+
+   sv = st_create_texture_sampler_view(pipe, stObj->pt);
+
+   setup_render_state(ctx, sv, color, true);
+
+   vb.stride = sizeof(struct st_util_vertex);
+
+   u_upload_alloc(st->uploader, 0, num_vert_bytes, 4,
+  _offset, , (void **) );
+
+   /* build quads vertex data */
+   for (i = 0; i < count; i++) {
+  const GLfloat epsilon = 0.0001F;
+  const struct gl_bitmap_glyph *g = >glyphs[ids[i]];
+

Re: [Mesa-dev] [PATCH 1/2] mesa: implement a display list / glBitmap texture atlas

2016-02-15 Thread Nicolai Hähnle




On 12.02.2016 20:07, Brian Paul wrote:

This improves the performance of applications which use glXUseXFont()
or wglUseFontBitmaps() and glCallLists() to draw bitmap text.

Basically, we collect all the glBitmap images from the display lists
and put them into a texture atlas.  To render the bitmaps for a
glCallLists() command, we render a set of textured quads where each
quad is textured with one bitmap image.  Actually, the rendering part
has to be done by the Mesa driver or Mesa/gallium state tracker.

Note that GLUT demos that use glutBitmapCharacter() don't benefit
from this.
---
  src/mesa/main/dd.h |   8 ++
  src/mesa/main/dlist.c  | 383 +
  src/mesa/main/dlist.h  |  38 +
  src/mesa/main/mtypes.h |   1 +
  src/mesa/main/shared.c |  15 ++
  5 files changed, 445 insertions(+)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 19ef304..5d1370c 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -35,6 +35,7 @@

  #include "glheader.h"

+struct gl_bitmap_atlas;
  struct gl_buffer_object;
  struct gl_context;
  struct gl_display_list;
@@ -154,6 +155,13 @@ struct dd_function_table {
   GLint x, GLint y, GLsizei width, GLsizei height,
   const struct gl_pixelstore_attrib *unpack,
   const GLubyte *bitmap );
+
+   /**
+* Called by display list code for optimized glCallLists/glBitmap rendering
+*/
+   void (*DrawAtlasBitmaps)(struct gl_context *ctx,
+const struct gl_bitmap_atlas *atlas,
+GLuint count, const GLubyte *ids);
 /*@}*/


diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 0e25efb..1927068 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -72,6 +72,9 @@
  #include "vbo/vbo.h"


+#define USE_BITMAP_ATLAS 1
+
+

  /**
   * Other parts of Mesa (such as the VBO module) can plug into the display
@@ -606,6 +609,259 @@ void mesa_print_display_list(GLuint list);


  /**
+ * Does the given display list only contain a single glBitmap call?
+ */
+static bool
+is_bitmap_list(const struct gl_display_list *dlist)
+{
+   const Node *n = dlist->Head;
+   if (n[0].opcode == OPCODE_BITMAP) {
+  n += InstSize[OPCODE_BITMAP];
+  if (n[0].opcode == OPCODE_END_OF_LIST)
+ return true;
+   }
+   return false;
+}
+
+
+/**
+ * Is the given display list an empty list?
+ */
+static bool
+is_empty_list(const struct gl_display_list *dlist)
+{
+   const Node *n = dlist->Head;
+   return n[0].opcode == OPCODE_END_OF_LIST;
+}
+
+
+/**
+ * Delete/free a gl_bitmap_atlas.  Called during context tear-down.
+ */
+void
+_mesa_delete_bitmap_atlas(struct gl_context *ctx, struct gl_bitmap_atlas 
*atlas)
+{
+   if (atlas->texObj) {
+  ctx->Driver.DeleteTexture(ctx, atlas->texObj);
+   }
+   free(atlas->glyphs);
+}
+
+
+/**
+ * Lookup a gl_bitmap_atlas by listBase ID.
+ */
+static struct gl_bitmap_atlas *
+lookup_bitmap_atlas(struct gl_context *ctx, GLuint listBase)
+{
+   struct gl_bitmap_atlas *atlas;
+
+   assert(listBase > 0);
+   atlas = _mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase);
+   return atlas;
+}
+
+
+/**
+ * Create new bitmap atlas and insert into hash table.
+ */
+static struct gl_bitmap_atlas *
+alloc_bitmap_atlas(struct gl_context *ctx, GLuint listBase)
+{
+   struct gl_bitmap_atlas *atlas;
+
+   assert(listBase > 0);
+   assert(_mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase) == NULL);
+
+   atlas = calloc(1, sizeof(*atlas));
+   if (atlas) {
+  _mesa_HashInsert(ctx->Shared->BitmapAtlas, listBase, atlas);
+   }
+
+   return atlas;
+}
+
+
+/**
+ * Try to build a bitmap atlas.  This involves examining a sequence of
+ * display lists which contain glBitmap commands and putting the bitmap
+ * images into a texture map (the atlas).
+ * If we succeed, gl_bitmap_atlas::complete will be set to true.
+ * If we fail, gl_bitmap_atlas::incomplete will be set to true.
+ */
+static void
+build_bitmap_atlas(struct gl_context *ctx, struct gl_bitmap_atlas *atlas,
+   GLuint listBase)
+{
+   unsigned i, row_height = 0, xpos = 0, ypos = 0;
+   GLubyte *map;
+   GLint map_stride;
+
+   assert(atlas);
+   assert(!atlas->complete);
+   assert(atlas->numBitmaps > 0);
+
+   /* We use a rectangle texture (non-normalized coords) for the atlas */
+   assert(ctx->Extensions.NV_texture_rectangle);
+
+   atlas->texWidth = 1024;
+   atlas->texHeight = 0;  /* determined below */


I don't see explicit checks for either NV_texture_rectangle or max 
texture size >= 1024 anywhere.


I see two alternative ways of handling this: either add an explicit 
check in render_bitmap_atlas, or expect drivers to only install 
DrawAtlasBitmaps when those preconditions are satisfied (in which case 
this should probably be documented in dd.h, and the st/mesa patch adjusted).



+
+   atlas->glyphs = malloc(atlas->numBitmaps * sizeof(atlas->glyphs[0]));
+   if (!atlas->glyphs) {
+  /* give up */
+

Re: [Mesa-dev] [PATCH] glsl: reject explicit location on atomic counter uniforms

2016-02-15 Thread Nicolai Hähnle




On 12.02.2016 17:53, Timothy Arceri wrote:

On Thu, 2016-02-11 at 20:10 -0500, Ilia Mirkin wrote:

This fixes

dEQP-GLES31.functional.uniform_location.negative.atomic_fragment
dEQP-GLES31.functional.uniform_location.negative.atomic_vertex

Both of which have lines like

layout(location = 3, binding = 0, offset = 0) uniform atomic_uint
uni0;

The ARB_explicit_uniform_location spec makes a very tangential
mention
regarding atomic counters, but location isn't something that makes
sense
with them.

Signed-off-by: Ilia Mirkin 
---

Had no clue where to stick this check... this seemed like as good a
place as any.

  src/compiler/glsl/ast_to_hir.cpp | 5 +
  1 file changed, 5 insertions(+)

diff --git a/src/compiler/glsl/ast_to_hir.cpp
b/src/compiler/glsl/ast_to_hir.cpp
index dbeb5c0..9fce06b 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -4179,6 +4179,11 @@ ast_declarator_list::hir(exec_list
*instructions,
  state->atomic_counter_offsets[qual_binding] =
qual_offset;
   }
}


Maybe we should just make this:
else {
   _mesa_glsl_error(, state, "invalid atomic counter layout
qualifier");
}

??


FWIW, I like Ilia's original message better because it gives the user 
more information about why exactly their layout qualifier is invalid. 
Helpful error messages are a good thing.


That said, I won't complain too loudly if making the code simpler or 
easier to follow ends up making the error messages slightly less helpful.


Cheers,
Nicolai




+
+  if (type->qualifier.flags.q.explicit_location) {
+ _mesa_glsl_error(, state,
+  "atomic counters cannot have an explicit
location");
+  }
 }

 if (this->declarations.is_empty()) {

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] mesa: implement a display list / glBitmap texture atlas

2016-02-15 Thread Nicolai Hähnle


On 15.02.2016 11:31, Brian Paul wrote:

On 02/15/2016 08:45 AM, Nicolai Hähnle wrote:



On 12.02.2016 20:07, Brian Paul wrote:

This improves the performance of applications which use glXUseXFont()
or wglUseFontBitmaps() and glCallLists() to draw bitmap text.

Basically, we collect all the glBitmap images from the display lists
and put them into a texture atlas.  To render the bitmaps for a
glCallLists() command, we render a set of textured quads where each
quad is textured with one bitmap image.  Actually, the rendering part
has to be done by the Mesa driver or Mesa/gallium state tracker.

Note that GLUT demos that use glutBitmapCharacter() don't benefit
from this.
---
  src/mesa/main/dd.h |   8 ++
  src/mesa/main/dlist.c  | 383
+
  src/mesa/main/dlist.h  |  38 +
  src/mesa/main/mtypes.h |   1 +
  src/mesa/main/shared.c |  15 ++
  5 files changed, 445 insertions(+)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 19ef304..5d1370c 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -35,6 +35,7 @@

  #include "glheader.h"

+struct gl_bitmap_atlas;
  struct gl_buffer_object;
  struct gl_context;
  struct gl_display_list;
@@ -154,6 +155,13 @@ struct dd_function_table {
 GLint x, GLint y, GLsizei width, GLsizei height,
 const struct gl_pixelstore_attrib *unpack,
 const GLubyte *bitmap );
+
+   /**
+* Called by display list code for optimized glCallLists/glBitmap
rendering
+*/
+   void (*DrawAtlasBitmaps)(struct gl_context *ctx,
+const struct gl_bitmap_atlas *atlas,
+GLuint count, const GLubyte *ids);
 /*@}*/


diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 0e25efb..1927068 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -72,6 +72,9 @@
  #include "vbo/vbo.h"


+#define USE_BITMAP_ATLAS 1
+
+

  /**
   * Other parts of Mesa (such as the VBO module) can plug into the
display
@@ -606,6 +609,259 @@ void mesa_print_display_list(GLuint list);


  /**
+ * Does the given display list only contain a single glBitmap call?
+ */
+static bool
+is_bitmap_list(const struct gl_display_list *dlist)
+{
+   const Node *n = dlist->Head;
+   if (n[0].opcode == OPCODE_BITMAP) {
+  n += InstSize[OPCODE_BITMAP];
+  if (n[0].opcode == OPCODE_END_OF_LIST)
+ return true;
+   }
+   return false;
+}
+
+
+/**
+ * Is the given display list an empty list?
+ */
+static bool
+is_empty_list(const struct gl_display_list *dlist)
+{
+   const Node *n = dlist->Head;
+   return n[0].opcode == OPCODE_END_OF_LIST;
+}
+
+
+/**
+ * Delete/free a gl_bitmap_atlas.  Called during context tear-down.
+ */
+void
+_mesa_delete_bitmap_atlas(struct gl_context *ctx, struct
gl_bitmap_atlas *atlas)
+{
+   if (atlas->texObj) {
+  ctx->Driver.DeleteTexture(ctx, atlas->texObj);
+   }
+   free(atlas->glyphs);
+}
+
+
+/**
+ * Lookup a gl_bitmap_atlas by listBase ID.
+ */
+static struct gl_bitmap_atlas *
+lookup_bitmap_atlas(struct gl_context *ctx, GLuint listBase)
+{
+   struct gl_bitmap_atlas *atlas;
+
+   assert(listBase > 0);
+   atlas = _mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase);
+   return atlas;
+}
+
+
+/**
+ * Create new bitmap atlas and insert into hash table.
+ */
+static struct gl_bitmap_atlas *
+alloc_bitmap_atlas(struct gl_context *ctx, GLuint listBase)
+{
+   struct gl_bitmap_atlas *atlas;
+
+   assert(listBase > 0);
+   assert(_mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase) ==
NULL);
+
+   atlas = calloc(1, sizeof(*atlas));
+   if (atlas) {
+  _mesa_HashInsert(ctx->Shared->BitmapAtlas, listBase, atlas);
+   }
+
+   return atlas;
+}
+
+
+/**
+ * Try to build a bitmap atlas.  This involves examining a sequence of
+ * display lists which contain glBitmap commands and putting the bitmap
+ * images into a texture map (the atlas).
+ * If we succeed, gl_bitmap_atlas::complete will be set to true.
+ * If we fail, gl_bitmap_atlas::incomplete will be set to true.
+ */
+static void
+build_bitmap_atlas(struct gl_context *ctx, struct gl_bitmap_atlas
*atlas,
+   GLuint listBase)
+{
+   unsigned i, row_height = 0, xpos = 0, ypos = 0;
+   GLubyte *map;
+   GLint map_stride;
+
+   assert(atlas);
+   assert(!atlas->complete);
+   assert(atlas->numBitmaps > 0);
+
+   /* We use a rectangle texture (non-normalized coords) for the
atlas */
+   assert(ctx->Extensions.NV_texture_rectangle);
+
+   atlas->texWidth = 1024;
+   atlas->texHeight = 0;  /* determined below */


I don't see explicit checks for either NV_texture_rectangle or max
texture size >= 1024 anywhere.

I see two alternative ways of handling this: either add an explicit
check in render_bitmap_atlas, or expect drivers to only install
DrawAtlasBitmaps when those preconditions are satisfied (in which case
this should probably be documented in dd.h, and the st/mesa pat

[Mesa-dev] [PATCH] st/mesa: count shader images in MaxCombinedShaderOutputResources

2016-02-15 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

---
This is on top of Ilia's Gallium images series. Ilia, I think it makes sense
for you to include this in your initial push if you agree.

 src/mesa/state_tracker/st_extensions.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 5763ba7..e58ff83 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -373,6 +373,7 @@ void st_init_limits(struct pipe_screen *screen,
  c->Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms +
  c->Program[MESA_SHADER_GEOMETRY].MaxImageUniforms +
  c->Program[MESA_SHADER_FRAGMENT].MaxImageUniforms;
+   c->MaxCombinedShaderOutputResources += c->MaxCombinedImageUniforms;
c->MaxImageUnits = MAX_IMAGE_UNITS;
c->MaxImageSamples = 0; /* XXX */
if (c->MaxCombinedImageUniforms) {
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 09/25] radeonsi: add code for combining and uploading shaders from 3 shader parts

2016-02-16 Thread Nicolai Hähnle


On 15.02.2016 18:59, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

---
  src/gallium/drivers/radeonsi/si_shader.c | 35 
  src/gallium/drivers/radeonsi/si_shader.h |  9 
  2 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index dbb9217..a6a0984 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4036,26 +4036,45 @@ void si_shader_apply_scratch_relocs(struct si_context 
*sctx,

  int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
*shader)
  {
-   const struct radeon_shader_binary *binary = >binary;
-   unsigned code_size = binary->code_size + binary->rodata_size;
+   const struct radeon_shader_binary *prolog =
+   shader->prolog ? >prolog->binary : NULL;
+   const struct radeon_shader_binary *epilog =
+   shader->epilog ? >epilog->binary : NULL;
+   const struct radeon_shader_binary *mainb = >binary;
+   unsigned bo_size =
+   (prolog ? prolog->code_size : 0) +
+   mainb->code_size +
+   (epilog ? epilog->code_size : mainb->rodata_size);
unsigned char *ptr;

+   assert(!prolog || !prolog->rodata_size);
+   assert((!prolog && !epilog) || !mainb->rodata_size);
+   assert(!epilog || !epilog->rodata_size);


Strictly speaking it should be possible for main to have rodata if there 
is a prolog but no epilog, right? In any case, patches 1-9 are


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


+
r600_resource_reference(>bo, NULL);
shader->bo = si_resource_create_custom(>b.b,
   PIPE_USAGE_IMMUTABLE,
-  code_size);
+  bo_size);
if (!shader->bo)
return -ENOMEM;

+   /* Upload. */
ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL,
PIPE_TRANSFER_READ_WRITE);
-   util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
-   if (binary->rodata_size > 0) {
-   ptr += binary->code_size;
-   util_memcpy_cpu_to_le32(ptr, binary->rodata,
-   binary->rodata_size);
+
+   if (prolog) {
+   util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size);
+   ptr += prolog->code_size;
}

+   util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size);
+   ptr += mainb->code_size;
+
+   if (epilog)
+   util_memcpy_cpu_to_le32(ptr, epilog->code, epilog->code_size);
+   else if (mainb->rodata_size > 0)
+   util_memcpy_cpu_to_le32(ptr, mainb->rodata, mainb->rodata_size);
+
sscreen->b.ws->buffer_unmap(shader->bo->buf);
return 0;
  }
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 9331156..4c3c14a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -304,6 +304,9 @@ struct si_shader {
struct si_shader_selector   *selector;
struct si_shader*next_variant;

+   struct si_shader_part   *prolog;
+   struct si_shader_part   *epilog;
+
struct si_shader*gs_copy_shader;
struct si_pm4_state *pm4;
struct r600_resource*bo;
@@ -322,6 +325,12 @@ struct si_shader {
unsignednr_param_exports;
  };

+struct si_shader_part {
+   struct si_shader_part *next;
+   struct radeon_shader_binary binary;
+   struct si_shader_config config;
+};
+
  static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
  {
if (sctx->gs_shader.cso)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/25] radeonsi: first bits for non-monolithic shaders

2016-02-16 Thread Nicolai Hähnle


On 15.02.2016 18:59, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

---
  src/gallium/drivers/radeonsi/si_pipe.c   |  1 +
  src/gallium/drivers/radeonsi/si_pipe.h   |  3 ++
  src/gallium/drivers/radeonsi/si_shader.c | 53 
  src/gallium/drivers/radeonsi/si_shader.h |  2 +-
  4 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index fa60732..448fe88 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -600,6 +600,7 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)

sscreen->b.has_cp_dma = true;
sscreen->b.has_streamout = true;
+   sscreen->use_monolithic_shaders = true;

if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | 
DBG_CS;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index b5790d6..2a2455c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -84,6 +84,9 @@ struct si_compute;
  struct si_screen {
struct r600_common_screen   b;
unsignedgs_table_depth;
+
+   /* Whether shaders are monolithic (1-part) or separate (3-part). */
+   booluse_monolithic_shaders;
  };

  struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index b058019..b74ed1e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -70,6 +70,12 @@ struct si_shader_context

unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
bool is_gs_copy_shader;
+
+   /* Whether to generate the optimized shader variant compiled as a whole
+* (without a prolog and epilog)
+*/
+   bool is_monolithic;
+
int param_streamout_config;
int param_streamout_write_index;
int param_streamout_offset[4];
@@ -3657,8 +3663,10 @@ static void create_function(struct si_shader_context 
*ctx)
struct lp_build_tgsi_context *bld_base = >radeon_bld.soa.bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct si_shader *shader = ctx->shader;
-   LLVMTypeRef params[SI_NUM_PARAMS], v2i32, v3i32;
+   LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v2i32, v3i32;
+   LLVMTypeRef returns[16+32*4];


This is a bit of a magic number, I guess something like max parameters 
plus attributes. Can you replace it by the appropriate defines?


Apart from this, patches 10-11 are

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


unsigned i, last_array_pointer, last_sgpr, num_params;
+   unsigned num_returns = 0;

v2i32 = LLVMVectorType(ctx->i32, 2);
v3i32 = LLVMVectorType(ctx->i32, 3);
@@ -3785,7 +3793,7 @@ static void create_function(struct si_shader_context *ctx)

assert(num_params <= Elements(params));

-   si_create_function(ctx, NULL, 0, params,
+   si_create_function(ctx, returns, num_returns, params,
   num_params, last_array_pointer, last_sgpr);

shader->num_input_sgprs = 0;
@@ -4492,9 +4500,11 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
  }

-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
-struct si_shader *shader,
-struct pipe_debug_callback *debug)
+static int si_compile_tgsi_shader(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ bool is_monolithic,
+ struct pipe_debug_callback *debug)
  {
struct si_shader_selector *sel = shader->selector;
struct tgsi_token *tokens = sel->tokens;
@@ -4524,6 +4534,7 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,

si_init_shader_ctx(, sscreen, shader, tm,
   poly_stipple ? _shader_info : >info);
+   ctx.is_monolithic = is_monolithic;

shader->uses_instanceid = sel->info.uses_instanceid;

@@ -4604,14 +4615,6 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
goto out;
}

-   si_shader_dump(sscreen, shader, debug, ctx.type);
-
-   r = si_shader_binary_upload(sscreen, shader);
-   if (r) {
-   fprintf(stderr, "LLVM failed to upload shader\n");
-   goto out;
-   }
-
radeon_llvm_dispose(_bld);

/* Calculate the number of frag

Re: [Mesa-dev] [PATCH 12/25] radeonsi: add VS prolog

2016-02-16 Thread Nicolai Hähnle


On 15.02.2016 18:59, Marek Olšák wrote:

From: Marek Olšák 

This is disabled with use_monolithic_shaders = true.
---
  src/gallium/drivers/radeonsi/si_pipe.c   |  19 +++
  src/gallium/drivers/radeonsi/si_pipe.h   |   3 +
  src/gallium/drivers/radeonsi/si_shader.c | 236 ++-
  src/gallium/drivers/radeonsi/si_shader.h |   9 ++
  4 files changed, 266 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 448fe88..7ce9570 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -22,6 +22,7 @@
   */

  #include "si_pipe.h"
+#include "si_shader.h"
  #include "si_public.h"
  #include "sid.h"

@@ -536,6 +537,11 @@ static int si_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, enu
  static void si_destroy_screen(struct pipe_screen* pscreen)
  {
struct si_screen *sscreen = (struct si_screen *)pscreen;
+   struct si_shader_part *parts[] = {
+   sscreen->vs_prologs,
+   /* this will be filled with other shader parts */
+   };
+   unsigned i;

if (!sscreen)
return;
@@ -543,6 +549,18 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
if (!sscreen->b.ws->unref(sscreen->b.ws))
return;

+   /* Free shader parts. */
+   for (i = 0; i < ARRAY_SIZE(parts); i++) {
+   while (parts[i]) {
+   struct si_shader_part *part = parts[i];
+
+   parts[i] = part->next;
+   radeon_shader_binary_clean(>binary);
+   FREE(part);
+   }
+   }
+   pipe_mutex_destroy(sscreen->shader_parts_mutex);
+
r600_destroy_common_screen(>b);
  }

@@ -600,6 +618,7 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)

sscreen->b.has_cp_dma = true;
sscreen->b.has_streamout = true;
+   pipe_mutex_init(sscreen->shader_parts_mutex);
sscreen->use_monolithic_shaders = true;

if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 2a2455c..f4bafc2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -87,6 +87,9 @@ struct si_screen {

/* Whether shaders are monolithic (1-part) or separate (3-part). */
booluse_monolithic_shaders;
+
+   pipe_mutex  shader_parts_mutex;
+   struct si_shader_part   *vs_prologs;
  };

  struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index b74ed1e..fbb8394 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -83,6 +83,7 @@ struct si_shader_context
int param_rel_auto_id;
int param_vs_prim_id;
int param_instance_id;
+   int param_vertex_index0;
int param_tes_u;
int param_tes_v;
int param_tes_rel_patch_id;
@@ -432,7 +433,11 @@ static void declare_input_vs(
/* Build the attribute offset */
attribute_offset = lp_build_const_int32(gallivm, 0);

-   if (divisor) {
+   if (!ctx->is_monolithic) {
+   buffer_index = LLVMGetParam(radeon_bld->main_fn,
+   ctx->param_vertex_index0 +
+   input_index);
+   } else if (divisor) {
/* Build index from instance ID, start instance and divisor */
ctx->shader->uses_instanceid = true;
buffer_index = get_instance_index_for_fetch(>radeon_bld,
@@ -3711,6 +3716,15 @@ static void create_function(struct si_shader_context 
*ctx)
params[ctx->param_rel_auto_id = num_params++] = ctx->i32;
params[ctx->param_vs_prim_id = num_params++] = ctx->i32;
params[ctx->param_instance_id = num_params++] = ctx->i32;
+
+   if (!ctx->is_monolithic &&
+   !ctx->is_gs_copy_shader) {
+   /* Vertex load indices. */
+   ctx->param_vertex_index0 = num_params;
+
+   for (i = 0; i < shader->selector->info.num_inputs; i++)
+   params[num_params++] = ctx->i32;
+   }
break;

case TGSI_PROCESSOR_TESS_CTRL:
@@ -4678,6 +4692,203 @@ out:
return r;
  }

+/**
+ * Create, compile and return a shader part (prolog or epilog).
+ *
+ * \param sscreen  screen
+ * \param list list of shader parts of the same category
+ * \param key  shader part key
+ * \param tm   LLVM target machine
+ * \param debugdebug callback
+ * \param compile  the callback responsible for compilation
+ * \return

Re: [Mesa-dev] [PATCH 09/25] radeonsi: add code for combining and uploading shaders from 3 shader parts

2016-02-16 Thread Nicolai Hähnle


On 16.02.2016 11:10, Marek Olšák wrote:

On Tue, Feb 16, 2016 at 4:53 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

On 15.02.2016 18:59, Marek Olšák wrote:


From: Marek Olšák <marek.ol...@amd.com>

---
   src/gallium/drivers/radeonsi/si_shader.c | 35

   src/gallium/drivers/radeonsi/si_shader.h |  9 
   2 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c
b/src/gallium/drivers/radeonsi/si_shader.c
index dbb9217..a6a0984 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4036,26 +4036,45 @@ void si_shader_apply_scratch_relocs(struct
si_context *sctx,

   int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader
*shader)
   {
-   const struct radeon_shader_binary *binary = >binary;
-   unsigned code_size = binary->code_size + binary->rodata_size;
+   const struct radeon_shader_binary *prolog =
+   shader->prolog ? >prolog->binary : NULL;
+   const struct radeon_shader_binary *epilog =
+   shader->epilog ? >epilog->binary : NULL;
+   const struct radeon_shader_binary *mainb = >binary;
+   unsigned bo_size =
+   (prolog ? prolog->code_size : 0) +
+   mainb->code_size +
+   (epilog ? epilog->code_size : mainb->rodata_size);
 unsigned char *ptr;

+   assert(!prolog || !prolog->rodata_size);
+   assert((!prolog && !epilog) || !mainb->rodata_size);
+   assert(!epilog || !epilog->rodata_size);



Strictly speaking it should be possible for main to have rodata if there is
a prolog but no epilog, right? In any case, patches 1-9 are


Yes. The thing is, the epilog is always present and can't be removed.
If it's empty, it must contain s_endpgm at least.

On the other hand, empty prologs aren't even compiled and
shader->prolog is NULL in that case.

We could support rodata for main if the compiler reserved some free
space for the epilog between the code and rodata.


Ah, thanks for the explanation, I forgot about the s_endpgm.



Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 13/25] radeonsi: add VS epilog

2016-02-16 Thread Nicolai Hähnle


On 15.02.2016 18:59, Marek Olšák wrote:

From: Marek Olšák 

It only exports the primitive ID.
Also used by TES when it's compiled as VS.

The VS input location of the primitive ID input is v2.


So the reason for having two unused outputs/return values of the main VS 
is so that primitive ID can get passed through without any moves? Sounds 
good, but may be worth documenting e.g. where VS_EPILOG_PRIMID_LOC is 
defined.


Nicolai


---
  src/gallium/drivers/radeonsi/si_pipe.c   |   2 +-
  src/gallium/drivers/radeonsi/si_pipe.h   |   1 +
  src/gallium/drivers/radeonsi/si_shader.c | 172 +--
  src/gallium/drivers/radeonsi/si_shader.h |   4 +
  4 files changed, 168 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 7ce9570..2b5ce3a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -539,7 +539,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
struct si_screen *sscreen = (struct si_screen *)pscreen;
struct si_shader_part *parts[] = {
sscreen->vs_prologs,
-   /* this will be filled with other shader parts */
+   sscreen->vs_epilogs,
};
unsigned i;

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index f4bafc2..8d98779 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -90,6 +90,7 @@ struct si_screen {

pipe_mutex  shader_parts_mutex;
struct si_shader_part   *vs_prologs;
+   struct si_shader_part   *vs_epilogs;
  };

  struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index fbb8394..0085c43 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -129,6 +129,7 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
   LLVMTargetMachineRef tm,
   struct tgsi_shader_info *info);

+#define VS_EPILOG_PRIMID_LOC 2

  #define PERSPECTIVE_BASE 0
  #define LINEAR_BASE 9
@@ -2230,16 +2231,26 @@ static void si_llvm_emit_vs_epilogue(struct 
lp_build_tgsi_context *bld_base)
  "");
}

-   /* Export PrimitiveID when PS needs it. */
-   if (si_vs_exports_prim_id(ctx->shader)) {
-   outputs[i].name = TGSI_SEMANTIC_PRIMID;
-   outputs[i].sid = 0;
-   outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
-  get_primitive_id(bld_base, 0));
-   outputs[i].values[1] = bld_base->base.undef;
-   outputs[i].values[2] = bld_base->base.undef;
-   outputs[i].values[3] = bld_base->base.undef;
-   i++;
+   if (ctx->is_monolithic) {
+   /* Export PrimitiveID when PS needs it. */
+   if (si_vs_exports_prim_id(ctx->shader)) {
+   outputs[i].name = TGSI_SEMANTIC_PRIMID;
+   outputs[i].sid = 0;
+   outputs[i].values[0] = bitcast(bld_base, 
TGSI_TYPE_FLOAT,
+  
get_primitive_id(bld_base, 0));
+   outputs[i].values[1] = bld_base->base.undef;
+   outputs[i].values[2] = bld_base->base.undef;
+   outputs[i].values[3] = bld_base->base.undef;
+   i++;
+   }
+   } else {
+   /* Return the primitive ID from the LLVM function. */
+   ctx->return_value =
+   LLVMBuildInsertValue(gallivm->builder,
+ctx->return_value,
+bitcast(bld_base, TGSI_TYPE_FLOAT,
+get_primitive_id(bld_base, 
0)),
+VS_EPILOG_PRIMID_LOC, "");
}

si_llvm_export_vs(bld_base, outputs, i);
@@ -3724,6 +3735,11 @@ static void create_function(struct si_shader_context 
*ctx)

for (i = 0; i < shader->selector->info.num_inputs; i++)
params[num_params++] = ctx->i32;
+
+   /* PrimitiveID output. */
+   if (!shader->key.vs.as_es && !shader->key.vs.as_ls)
+   for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
+   returns[num_returns++] = ctx->f32;
}
break;

@@ -3758,6 +3774,11 @@ static void create_function(struct si_shader_context 
*ctx)
params[ctx->param_tes_v = num_params++] = ctx->f32;
params[ctx->param_tes_rel_patch_id =

Re: [Mesa-dev] [PATCH 14/25] radeonsi: add TCS epilog

2016-02-16 Thread Nicolai Hähnle


On 15.02.2016 18:59, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_pipe.c   |   1 +
  src/gallium/drivers/radeonsi/si_pipe.h   |   1 +
  src/gallium/drivers/radeonsi/si_shader.c | 163 ---
  src/gallium/drivers/radeonsi/si_shader.h |   3 +
  4 files changed, 155 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 2b5ce3a..645d418 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -540,6 +540,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
struct si_shader_part *parts[] = {
sscreen->vs_prologs,
sscreen->vs_epilogs,
+   sscreen->tcs_epilogs,
};
unsigned i;

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 8d98779..d9175b9 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -91,6 +91,7 @@ struct si_screen {
pipe_mutex  shader_parts_mutex;
struct si_shader_part   *vs_prologs;
struct si_shader_part   *vs_epilogs;
+   struct si_shader_part   *tcs_epilogs;
  };

  struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 0085c43..bc6f8cd 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -109,9 +109,11 @@ struct si_shader_context
LLVMTypeRef i1;
LLVMTypeRef i8;
LLVMTypeRef i32;
+   LLVMTypeRef i64;
LLVMTypeRef i128;
LLVMTypeRef f32;
LLVMTypeRef v16i8;
+   LLVMTypeRef v2i32;
LLVMTypeRef v4i32;
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
@@ -2078,14 +2080,51 @@ static void si_write_tess_factors(struct 
lp_build_tgsi_context *bld_base,
  static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
  {
struct si_shader_context *ctx = si_shader_context(bld_base);
-   LLVMValueRef invocation_id;
+   LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;

+   rel_patch_id = get_rel_patch_id(ctx);
invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
+   tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);

-   si_write_tess_factors(bld_base,
- get_rel_patch_id(ctx),
- invocation_id,
- get_tcs_out_current_patch_data_offset(ctx));
+   if (!ctx->is_monolithic) {
+   /* Return epilog parameters from this function. */
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef ret = ctx->return_value;
+   LLVMValueRef rw_buffers, rw0, rw1, tf_soffset;
+   unsigned vgpr;
+
+   /* RW_BUFFERS pointer */
+   rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
+   rw_buffers = LLVMBuildPtrToInt(builder, rw_buffers, ctx->i64, 
"");
+   rw_buffers = LLVMBuildBitCast(builder, rw_buffers, ctx->v2i32, 
"");
+   rw0 = LLVMBuildExtractElement(builder, rw_buffers,
+ bld_base->uint_bld.zero, "");
+   rw1 = LLVMBuildExtractElement(builder, rw_buffers,
+ bld_base->uint_bld.one, "");
+   ret = LLVMBuildInsertValue(builder, ret, rw0, 0, "");
+   ret = LLVMBuildInsertValue(builder, ret, rw1, 1, "");


Ugh, that's a bit ugly even if it ends up being a no-op in the final 
binary. Doesn't LLVM at least support vector return values or maybe even 
i64?


Nicolai


+   /* Tess factor buffer soffset is after user SGPRs. */
+   tf_soffset = LLVMGetParam(ctx->radeon_bld.main_fn,
+ SI_PARAM_TESS_FACTOR_OFFSET);
+   ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
+  SI_TCS_NUM_USER_SGPR, "");
+
+   /* VGPRs */
+   rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
+   invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, 
invocation_id);
+   tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, 
tf_lds_offset);
+
+   vgpr = SI_TCS_NUM_USER_SGPR + 1;
+   ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, 
"");
+   ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, 
"");
+   ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, 
"");
+   ctx->return_value = ret;
+   return;
+   }
+
+   si_write_tess_factors(bld_base, rel_patch_id, invocation_id,

Re: [Mesa-dev] [PATCH 16/25] radeonsi: add PS prolog

2016-02-16 Thread Nicolai Hähnle

So, patches 12-16 also look good to me except for the comments I've sent 
on 12-14.


I'm a bit worried though that there is a lot of "almost code 
duplication" around the handling of input and output positions etc., and 
maintaining the two different code paths for monolithic and 
non-monolithic is brittle.


Here's an approach that I think could work to clean this up: keep only 
the non-monolithic code for LLVM IR function generation. Then implement 
monolithic mode with a helper that takes a sequence of LLVM IR functions 
and generates a master function that pipes each function's output into 
the input of the next. Then set the functions as always inline and rely 
on LLVM's inliner to stitch everything together.


This ends up with slightly higher overhead for the monolithic code path 
(although the unconditional inlining should be fast), but it would help 
clean the code up tremendously.


Cheers,
Nicolai

On 15.02.2016 18:59, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_pipe.c  |   1 +
  src/gallium/drivers/radeonsi/si_pipe.h  |   1 +
  src/gallium/drivers/radeonsi/si_shader.c| 324 +++-
  src/gallium/drivers/radeonsi/si_shader.h|  14 +-
  src/gallium/drivers/radeonsi/si_state_shaders.c |   7 +
  5 files changed, 345 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 02c430d..44f6047 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -541,6 +541,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
sscreen->vs_prologs,
sscreen->vs_epilogs,
sscreen->tcs_epilogs,
+   sscreen->ps_prologs,
sscreen->ps_epilogs
};
unsigned i;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 5d204ec..1ac7bc4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -92,6 +92,7 @@ struct si_screen {
struct si_shader_part   *vs_prologs;
struct si_shader_part   *vs_epilogs;
struct si_shader_part   *tcs_epilogs;
+   struct si_shader_part   *ps_prologs;
struct si_shader_part   *ps_epilogs;
  };

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 915ac1d..c6d4cb5 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -875,7 +875,8 @@ static int lookup_interp_param_index(unsigned interpolate, 
unsigned location)
  static unsigned select_interp_param(struct si_shader_context *ctx,
unsigned param)
  {
-   if (!ctx->shader->key.ps.prolog.force_persample_interp)
+   if (!ctx->shader->key.ps.prolog.force_persample_interp ||
+   !ctx->is_monolithic)
return param;

/* If the shader doesn't use center/centroid, just return the parameter.
@@ -1019,6 +1020,7 @@ static void declare_input_fs(
unsigned input_index,
const struct tgsi_full_declaration *decl)
  {
+   struct lp_build_context *base = _bld->soa.bld_base.base;
struct si_shader_context *ctx =
si_shader_context(_bld->soa.bld_base);
struct si_shader *shader = ctx->shader;
@@ -1026,6 +1028,26 @@ static void declare_input_fs(
LLVMValueRef interp_param = NULL;
int interp_param_idx;

+   /* Get colors from input VGPRs (set by the prolog). */
+   if (!ctx->is_monolithic &&
+   decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
+   unsigned i = decl->Semantic.Index;
+   unsigned colors_read = shader->selector->info.colors_read;
+   unsigned mask = colors_read >> (i * 4);
+   unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
+ (i ? util_bitcount(colors_read & 0xf) : 0);
+
+   radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
+   mask & 0x1 ? LLVMGetParam(main_fn, offset++) : 
base->undef;
+   radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
+   mask & 0x2 ? LLVMGetParam(main_fn, offset++) : 
base->undef;
+   radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
+   mask & 0x4 ? LLVMGetParam(main_fn, offset++) : 
base->undef;
+   radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
+   mask & 0x8 ? LLVMGetParam(main_fn, offset++) : 
base->undef;
+   return;
+   }
+
interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
 decl->Interp.Location);
if (interp_param_idx == -1)
@@ -3966,6 +3988,16 @@ static

Re: [Mesa-dev] [PATCH 21/25] radeonsi: use smaller types for some si_shader members

2016-02-16 Thread Nicolai Hähnle


On 15.02.2016 18:59, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

in order to decrease the shader size for a shader cache.
---
  src/gallium/drivers/radeonsi/si_shader.c | 3 +++
  src/gallium/drivers/radeonsi/si_shader.h | 6 +++---
  2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 2789788..3758009 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1889,6 +1889,7 @@ handle_semantic:
case TGSI_SEMANTIC_COLOR:
case TGSI_SEMANTIC_BCOLOR:
target = V_008DFC_SQ_EXP_PARAM + param_count;
+   assert(i < ARRAY_SIZE(shader->vs_output_param_offset));
shader->vs_output_param_offset[i] = param_count;
param_count++;
break;
@@ -1903,6 +1904,7 @@ handle_semantic:
case TGSI_SEMANTIC_TEXCOORD:
case TGSI_SEMANTIC_GENERIC:
target = V_008DFC_SQ_EXP_PARAM + param_count;
+   assert(i < ARRAY_SIZE(shader->vs_output_param_offset));
shader->vs_output_param_offset[i] = param_count;
param_count++;
break;
@@ -5268,6 +5270,7 @@ static bool si_get_vs_epilog(struct si_screen *sscreen,
unsigned offset = shader->nr_param_exports++;

epilog_key.vs_epilog.prim_id_param_offset = offset;
+   assert(index < ARRAY_SIZE(shader->vs_output_param_offset));
shader->vs_output_param_offset[index] = offset;
}

diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index ee81621..a77e54a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -359,10 +359,10 @@ struct si_shader {
ubyte   num_input_vgprs;
charface_vgpr_index;

-   unsignedvs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS];
+   ubyte   vs_output_param_offset[40];


Magic number - please replace with an appropriate #define or at least 
explain. Apart from that, patches 17-21:


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


booluses_instanceid;
-   unsignednr_pos_exports;
-   unsignednr_param_exports;
+   ubyte   nr_pos_exports;
+   ubyte   nr_param_exports;
  };

  struct si_shader_part {


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 25/25] radeonsi: implement binary shaders & shader cache in memory

2016-02-16 Thread Nicolai Hähnle


On 15.02.2016 18:59, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_pipe.c  |   5 +-
  src/gallium/drivers/radeonsi/si_pipe.h  |  16 ++
  src/gallium/drivers/radeonsi/si_shader.h|   4 +-
  src/gallium/drivers/radeonsi/si_state.h |   2 +
  src/gallium/drivers/radeonsi/si_state_shaders.c | 234 +++-
  5 files changed, 254 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 75d4775..a576237 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -563,7 +563,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
}
}
pipe_mutex_destroy(sscreen->shader_parts_mutex);
-
+   si_destroy_shader_cache(sscreen);
r600_destroy_common_screen(>b);
  }

@@ -611,7 +611,8 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)
sscreen->b.b.resource_create = r600_resource_create_common;

if (!r600_common_screen_init(>b, ws) ||
-   !si_init_gs_info(sscreen)) {
+   !si_init_gs_info(sscreen) ||
+   !si_init_shader_cache(sscreen)) {
FREE(sscreen);
return NULL;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 1ac7bc4..ef860a5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -80,6 +80,7 @@
  #define SI_MAX_BORDER_COLORS  4096

  struct si_compute;
+struct hash_table;

  struct si_screen {
struct r600_common_screen   b;
@@ -94,6 +95,21 @@ struct si_screen {
struct si_shader_part   *tcs_epilogs;
struct si_shader_part   *ps_prologs;
struct si_shader_part   *ps_epilogs;
+
+   /* Shader cache in memory.
+*
+* Design & limitations:
+* - The shader cache is per screen (= per process), never saved to
+*   disk, and skips redundant shader compilations from TGSI to 
bytecode.
+* - It can only be used with one-variant-per-shader support, in which
+*   case only the main (typically middle) part of shaders is cached.
+* - Only VS, TCS, TES, PS are cached, out of which only the hw VS
+*   variants of VS and TES are cached, so LS and ES aren't.
+* - GS and CS aren't cached, but it's certainly possible to cache
+*   those as well.
+*/
+   pipe_mutex  shader_cache_mutex;
+   struct hash_table   *shader_cache;
  };

  struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 48e048d..7e46871 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -362,8 +362,10 @@ struct si_shader {
struct r600_resource*bo;
struct r600_resource*scratch_bo;
union si_shader_key key;
-   struct radeon_shader_binary binary;
boolis_binary_shared;
+
+   /* The following data is all that's needed for binary shaders. */
+   struct radeon_shader_binary binary;
struct si_shader_config config;
struct si_shader_info   info;
  };
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index f64c4d4..40792cb 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
  /* si_state_shader.c */
  bool si_update_shaders(struct si_context *sctx);
  void si_init_shader_functions(struct si_context *sctx);
+bool si_init_shader_cache(struct si_screen *sscreen);
+void si_destroy_shader_cache(struct si_screen *sscreen);

  /* si_state_draw.c */
  void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index c62cbb7..bc3e5be 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -32,10 +32,217 @@

  #include "tgsi/tgsi_parse.h"
  #include "tgsi/tgsi_ureg.h"
+#include "util/hash_table.h"
+#include "util/u_hash.h"
  #include "util/u_memory.h"
  #include "util/u_prim.h"
  #include "util/u_simple_shaders.h"

+/* SHADER_CACHE */
+
+/**
+ * Return the TGSI binary in a buffer. The first 4 bytes contain its size as
+ * integer.
+ */
+static void *si_get_tgsi_binary(struct si_shader_selector *sel)
+{
+   unsigned tgsi_size = tgsi_num_tokens(sel->tokens) *
+sizeof(struct tgsi_token);
+   unsigned size = 4 + tgsi_size + sizeof(sel->so);
+   char *result = (char*)MALLOC(size);
+
+   if (!result)

Re: [Mesa-dev] [PATCH 24/25] gallium/radeon: remove unused radeon_shader_binary_free_* functions

2016-02-16 Thread Nicolai Hähnle


Patches 22-24 are also

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

Very nice series overall!

On 15.02.2016 18:59, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>

---
  src/gallium/drivers/radeon/radeon_elf_util.c | 19 ---
  src/gallium/drivers/radeon/radeon_elf_util.h | 14 --
  2 files changed, 33 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c 
b/src/gallium/drivers/radeon/radeon_elf_util.c
index 70a2c4d..8aaa85d 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.c
+++ b/src/gallium/drivers/radeon/radeon_elf_util.c
@@ -195,22 +195,3 @@ const unsigned char *radeon_shader_binary_config_start(
}
return binary->config;
  }
-
-void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs,
-   unsigned reloc_count)
-{
-   FREE(relocs);
-}
-
-void radeon_shader_binary_free_members(struct radeon_shader_binary *binary,
-   unsigned free_relocs)
-{
-   FREE(binary->code);
-   FREE(binary->config);
-   FREE(binary->rodata);
-
-   if (free_relocs) {
-   radeon_shader_binary_free_relocs(binary->relocs,
-   binary->reloc_count);
-   }
-}
diff --git a/src/gallium/drivers/radeon/radeon_elf_util.h 
b/src/gallium/drivers/radeon/radeon_elf_util.h
index ea4ab2f..c2af9e0 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.h
+++ b/src/gallium/drivers/radeon/radeon_elf_util.h
@@ -47,18 +47,4 @@ const unsigned char *radeon_shader_binary_config_start(
const struct radeon_shader_binary *binary,
uint64_t symbol_offset);

-/**
- * Free all memory allocated for members of \p binary.  This function does
- * not free \p binary.
- *
- * @param free_relocs If false, reolc information will not be freed.
- */
-void radeon_shader_binary_free_members(struct radeon_shader_binary *binary,
-   unsigned free_relocs);
-
-/**
- * Free \p relocs and all member data.
- */
-void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs,
-   unsigned reloc_count);
  #endif /* RADEON_ELF_UTIL_H */


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/25] radeonsi: first bits for non-monolithic shaders

2016-02-16 Thread Nicolai Hähnle


On 16.02.2016 11:39, Marek Olšák wrote:

On Tue, Feb 16, 2016 at 5:01 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:

On 15.02.2016 18:59, Marek Olšák wrote:


From: Marek Olšák <marek.ol...@amd.com>

---
   src/gallium/drivers/radeonsi/si_pipe.c   |  1 +
   src/gallium/drivers/radeonsi/si_pipe.h   |  3 ++
   src/gallium/drivers/radeonsi/si_shader.c | 53

   src/gallium/drivers/radeonsi/si_shader.h |  2 +-
   4 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c
b/src/gallium/drivers/radeonsi/si_pipe.c
index fa60732..448fe88 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -600,6 +600,7 @@ struct pipe_screen *radeonsi_screen_create(struct
radeon_winsys *ws)

 sscreen->b.has_cp_dma = true;
 sscreen->b.has_streamout = true;
+   sscreen->use_monolithic_shaders = true;

 if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
 sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS |
DBG_PS | DBG_CS;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
b/src/gallium/drivers/radeonsi/si_pipe.h
index b5790d6..2a2455c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -84,6 +84,9 @@ struct si_compute;
   struct si_screen {
 struct r600_common_screen   b;
 unsignedgs_table_depth;
+
+   /* Whether shaders are monolithic (1-part) or separate (3-part).
*/
+   booluse_monolithic_shaders;
   };

   struct si_blend_color {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c
b/src/gallium/drivers/radeonsi/si_shader.c
index b058019..b74ed1e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -70,6 +70,12 @@ struct si_shader_context

 unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader.
*/
 bool is_gs_copy_shader;
+
+   /* Whether to generate the optimized shader variant compiled as a
whole
+* (without a prolog and epilog)
+*/
+   bool is_monolithic;
+
 int param_streamout_config;
 int param_streamout_write_index;
 int param_streamout_offset[4];
@@ -3657,8 +3663,10 @@ static void create_function(struct
si_shader_context *ctx)
 struct lp_build_tgsi_context *bld_base =
>radeon_bld.soa.bld_base;
 struct gallivm_state *gallivm = bld_base->base.gallivm;
 struct si_shader *shader = ctx->shader;
-   LLVMTypeRef params[SI_NUM_PARAMS], v2i32, v3i32;
+   LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v2i32,
v3i32;
+   LLVMTypeRef returns[16+32*4];



This is a bit of a magic number, I guess something like max parameters plus
attributes. Can you replace it by the appropriate defines?


There is not a single definition that would express this clearly.

The prolog has to return up to 16 input SGPRs and 4-20 input VGPRs.
Additionally, the prolog returns other data in VGPRs. That's up to
4+16 VGPRs (16 vertex load addresses) for the VS and 20+8 VGPRs (2
vec4 colors) for the PS. The PS epilog returns one SGPR (but in s10 or
so, so we need to allocate 11) and 9*4 VGPRs at most. This all can
change in the future, who knows.

16+32*4 is much more than we'll ever need, but it shouldn't overflow
at least. Assertions also check if we don't overflow.


Hmm, I see. I guess I can live with it, as well as with the casts in 
patch 14.


Nicolai


Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: implement a display list / glBitmap texture atlas

2016-02-17 Thread Nicolai Hähnle


On 15.02.2016 11:52, Brian Paul wrote:

This improves the performance of applications which use glXUseXFont()
or wglUseFontBitmaps() and glCallLists() to draw bitmap text.

Basically, we collect all the glBitmap images from the display lists
and put them into a texture atlas.  To render the bitmaps for a
glCallLists() command, we render a set of textured quads where each
quad is textured with one bitmap image.  Actually, the rendering part
has to be done by the Mesa driver or Mesa/gallium state tracker.

Note that GLUT demos that use glutBitmapCharacter() don't benefit
from this.

v2, per Nicolai Hähnle:
- check the max tex rect size is at least 1024.
- add comment in dd.h that texture_rectangle is required.
- in _mesa_DeleteLists(), try to delete the atlas before the list(s)


Thanks!

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


---
  src/mesa/main/dd.h |   9 ++
  src/mesa/main/dlist.c  | 385 +
  src/mesa/main/dlist.h  |  38 +
  src/mesa/main/mtypes.h |   1 +
  src/mesa/main/shared.c |  15 ++
  5 files changed, 448 insertions(+)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 19ef304..3f5aa5d 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -35,6 +35,7 @@

  #include "glheader.h"

+struct gl_bitmap_atlas;
  struct gl_buffer_object;
  struct gl_context;
  struct gl_display_list;
@@ -154,6 +155,14 @@ struct dd_function_table {
   GLint x, GLint y, GLsizei width, GLsizei height,
   const struct gl_pixelstore_attrib *unpack,
   const GLubyte *bitmap );
+
+   /**
+* Called by display list code for optimized glCallLists/glBitmap rendering
+* The driver must support texture rectangles of width 1024 or more.
+*/
+   void (*DrawAtlasBitmaps)(struct gl_context *ctx,
+const struct gl_bitmap_atlas *atlas,
+GLuint count, const GLubyte *ids);
 /*@}*/


diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 0e25efb..afd2d83 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -72,6 +72,9 @@
  #include "vbo/vbo.h"


+#define USE_BITMAP_ATLAS 1
+
+

  /**
   * Other parts of Mesa (such as the VBO module) can plug into the display
@@ -606,6 +609,261 @@ void mesa_print_display_list(GLuint list);


  /**
+ * Does the given display list only contain a single glBitmap call?
+ */
+static bool
+is_bitmap_list(const struct gl_display_list *dlist)
+{
+   const Node *n = dlist->Head;
+   if (n[0].opcode == OPCODE_BITMAP) {
+  n += InstSize[OPCODE_BITMAP];
+  if (n[0].opcode == OPCODE_END_OF_LIST)
+ return true;
+   }
+   return false;
+}
+
+
+/**
+ * Is the given display list an empty list?
+ */
+static bool
+is_empty_list(const struct gl_display_list *dlist)
+{
+   const Node *n = dlist->Head;
+   return n[0].opcode == OPCODE_END_OF_LIST;
+}
+
+
+/**
+ * Delete/free a gl_bitmap_atlas.  Called during context tear-down.
+ */
+void
+_mesa_delete_bitmap_atlas(struct gl_context *ctx, struct gl_bitmap_atlas 
*atlas)
+{
+   if (atlas->texObj) {
+  ctx->Driver.DeleteTexture(ctx, atlas->texObj);
+   }
+   free(atlas->glyphs);
+}
+
+
+/**
+ * Lookup a gl_bitmap_atlas by listBase ID.
+ */
+static struct gl_bitmap_atlas *
+lookup_bitmap_atlas(struct gl_context *ctx, GLuint listBase)
+{
+   struct gl_bitmap_atlas *atlas;
+
+   assert(listBase > 0);
+   atlas = _mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase);
+   return atlas;
+}
+
+
+/**
+ * Create new bitmap atlas and insert into hash table.
+ */
+static struct gl_bitmap_atlas *
+alloc_bitmap_atlas(struct gl_context *ctx, GLuint listBase)
+{
+   struct gl_bitmap_atlas *atlas;
+
+   assert(listBase > 0);
+   assert(_mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase) == NULL);
+
+   atlas = calloc(1, sizeof(*atlas));
+   if (atlas) {
+  _mesa_HashInsert(ctx->Shared->BitmapAtlas, listBase, atlas);
+   }
+
+   return atlas;
+}
+
+
+/**
+ * Try to build a bitmap atlas.  This involves examining a sequence of
+ * display lists which contain glBitmap commands and putting the bitmap
+ * images into a texture map (the atlas).
+ * If we succeed, gl_bitmap_atlas::complete will be set to true.
+ * If we fail, gl_bitmap_atlas::incomplete will be set to true.
+ */
+static void
+build_bitmap_atlas(struct gl_context *ctx, struct gl_bitmap_atlas *atlas,
+   GLuint listBase)
+{
+   unsigned i, row_height = 0, xpos = 0, ypos = 0;
+   GLubyte *map;
+   GLint map_stride;
+
+   assert(atlas);
+   assert(!atlas->complete);
+   assert(atlas->numBitmaps > 0);
+
+   /* We use a rectangle texture (non-normalized coords) for the atlas */
+   assert(ctx->Extensions.NV_texture_rectangle);
+   assert(ctx->Const.MaxTextureRectSize >= 1024);
+
+   atlas->texWidth = 1024;
+   atlas->texHeight = 0;  /* determined below */
+
+   atlas->glyphs = ma

[Mesa-dev] [PATCH] st/mesa: disable depth/stencil/alpha tests in PBO upload

2016-02-18 Thread Nicolai Hähnle

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Noticed by Brian Paul.
---
 src/mesa/state_tracker/st_cb_texture.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index d09c360..8ee95d2 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1340,6 +1340,7 @@ try_pbo_upload_common(struct gl_context *ctx,
 CSO_BIT_FRAMEBUFFER |
 CSO_BIT_VIEWPORT |
 CSO_BIT_BLEND |
+CSO_BIT_DEPTH_STENCIL_ALPHA |
 CSO_BIT_RASTERIZER |
 CSO_BIT_STREAM_OUTPUTS |
 CSO_BITS_ALL_SHADERS));
@@ -1479,6 +1480,13 @@ try_pbo_upload_common(struct gl_context *ctx,
/* Blend state */
cso_set_blend(cso, >pbo_upload.blend);
 
+   /* Depth/stencil/alpha state */
+   {
+  struct pipe_depth_stencil_alpha_state dsa;
+  memset(, 0, sizeof(dsa));
+  cso_set_depth_stencil_alpha(cso, );
+   }
+
/* Rasterizer state */
cso_set_rasterizer(cso, >pbo_upload.raster);
 
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/mesa: fix pbo uploads

2016-02-18 Thread Nicolai Hähnle


On 18.02.2016 01:26, Ilia Mirkin wrote:

On Thu, Feb 18, 2016 at 1:07 AM, Ilia Mirkin <imir...@alum.mit.edu> wrote:

  - LOD must be provided in .w for TXF (even for buffer textures)
  - User buffer must be valid at draw time


Good catch.


  - Must have a sampler associated with the sampler view


Fine by me. Honestly, I'd prefer to relax the requirements for 
TXF/buffer textures, but if that's difficult for Nouveau I can accept it.


Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>



This makes PBO uploads work again on nouveau.

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---


Just realized that I also need to add CSO_BIT_FRAGMENT_SAMPLERS to the
save state. And also fixed a minor warning in my tree. Too minor to
resend, but final version available here:
https://github.com/imirkin/mesa/commit/c246a0e.patch


  src/mesa/state_tracker/st_cb_texture.c | 26 --
  1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index d09c360..eb09f3d 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1272,10 +1272,11 @@ create_pbo_upload_fs(struct st_context *st)
ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
 }

+   /* temp0.w = 0 */
+   ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 
0));
+
 /* out = txf(sampler, temp0.x) */
-   ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER,
-  ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X),
-  sampler);
+   ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler);

 ureg_release_temporary(ureg, temp0);

@@ -1353,6 +1354,7 @@ try_pbo_upload_common(struct gl_context *ctx,
   + (upload_height - 1 + (depth - 1) * image_height) * stride;
struct pipe_sampler_view templ;
struct pipe_sampler_view *sampler_view;
+  struct pipe_sampler_state sampler = {0}, *samplers[1] = {};

/* This should be ensured by Mesa before calling our callbacks */
assert((last_element + 1) * bytes_per_pixel <= buffer->width0);
@@ -1361,6 +1363,7 @@ try_pbo_upload_common(struct gl_context *ctx,
   goto fail;

memset(, 0, sizeof(templ));
+  templ.target = PIPE_BUFFER;
templ.format = src_format;
templ.u.buf.first_element = first_element;
templ.u.buf.last_element = last_element;
@@ -1376,6 +1379,8 @@ try_pbo_upload_common(struct gl_context *ctx,
cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 1, _view);

pipe_sampler_view_reference(_view, NULL);
+
+  cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers);
 }

 /* Upload vertices */
@@ -1423,16 +1428,17 @@ try_pbo_upload_common(struct gl_context *ctx,
 }

 /* Upload constants */
+   /* Note: the user buffer must be valid until draw time */
+   struct {
+  int32_t xoffset;
+  int32_t yoffset;
+  int32_t stride;
+  int32_t image_size;
+   } constants;
+
 {
struct pipe_constant_buffer cb;

-  struct {
- int32_t xoffset;
- int32_t yoffset;
- int32_t stride;
- int32_t image_size;
-  } constants;
-
constants.xoffset = -xoffset + skip_pixels;
constants.yoffset = -yoffset;
constants.stride = stride;
--
2.4.10


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

< 1 2 3 4 5 6 7 8 9 10 >

401 - 500 of 4139 matches

Mail list logo