Re: [Mesa-dev] [PATCH] glsl: fix ldexp lowering if bitfield insert lowering is also requested

2016-12-05 Thread Michel Dänzer
On 04/12/16 01:09 AM, srol...@vmware.com wrote:
> From: Roland Scheidegger 
> 
> Trivial, this just resurrects the code which was there once upon a time
> (the code can't lower instructions generated in the lowering pass there,
> and even if it could it would probably be suboptimal).
> This fixes piglit mesa_shader_integer_functions fs-ldexp.shader_test and
> vs-ldexp.shader_test with llvmpipe.

... and in exchange broke the same tests with radeonsi. :}

shader_runner: ../../../src/compiler/glsl/ir_validate.cpp:662: virtual 
ir_visitor_status {anonymous}::ir_validate::visit_leave(ir_expression*): 
Assertion `ir->operands[3]->type == ir->type' failed.

Thread 1 "shader_runner" received signal SIGABRT, Aborted.
__GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:58
58  ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:58
#1  0x74b9940a in __GI_abort () at abort.c:89
#2  0x74b90e47 in __assert_fail_base (fmt=, 
assertion=assertion@entry=0x70fe7208 "ir->operands[3]->type == ir->type", 
file=file@entry=0x70fe6618 "../../../src/compiler/glsl/ir_validate.cpp", 
line=line@entry=662, 
function=function@entry=0x70fe7e00 <(anonymous 
namespace)::ir_validate::visit_leave(ir_expression*)::__PRETTY_FUNCTION__> 
"virtual ir_visitor_status 
{anonymous}::ir_validate::visit_leave(ir_expression*)") at assert.c:92
#3  0x74b90ef2 in __GI___assert_fail 
(assertion=assertion@entry=0x70fe7208 "ir->operands[3]->type == ir->type", 
file=file@entry=0x70fe6618 "../../../src/compiler/glsl/ir_validate.cpp", 
line=line@entry=662, 
function=function@entry=0x70fe7e00 <(anonymous 
namespace)::ir_validate::visit_leave(ir_expression*)::__PRETTY_FUNCTION__> 
"virtual ir_visitor_status 
{anonymous}::ir_validate::visit_leave(ir_expression*)") at assert.c:101
#4  0x70a42e21 in (anonymous namespace)::ir_validate::visit_leave 
(this=, ir=) at 
../../../src/compiler/glsl/ir_validate.cpp:662
#5  0x70a3e2bb in ir_expression::accept (this=0x55958340, 
v=0x7fffd120) at ../../../src/compiler/glsl/ir_hv_accept.cpp:141
#6  0x70a3e2bb in ir_expression::accept (this=0x559583d0, 
v=0x7fffd120) at ../../../src/compiler/glsl/ir_hv_accept.cpp:141
#7  0x70a3e2bb in ir_expression::accept (this=0x559585c0, 
v=0x7fffd120) at ../../../src/compiler/glsl/ir_hv_accept.cpp:141
#8  0x70a3e5ef in ir_assignment::accept (this=0x5594b9f0, 
v=0x7fffd120) at ../../../src/compiler/glsl/ir_hv_accept.cpp:309
#9  0x70a3e11d in visit_list_elements (statement_list=true, 
l=0x55951410, v=0x7fffd120) at 
../../../src/compiler/glsl/ir_hv_accept.cpp:55
#10 ir_function_signature::accept (this=0x559513c0, v=0x7fffd120) at 
../../../src/compiler/glsl/ir_hv_accept.cpp:115
#11 0x70a3e209 in visit_list_elements (statement_list=false, 
l=0x55951348, v=0x7fffd120) at 
../../../src/compiler/glsl/ir_hv_accept.cpp:55
#12 ir_function::accept (this=0x55951320, v=0x7fffd120) at 
../../../src/compiler/glsl/ir_hv_accept.cpp:127
#13 0x70a3dede in visit_list_elements (v=v@entry=0x7fffd120, 
l=l@entry=0x55953180, statement_list=statement_list@entry=true) at 
../../../src/compiler/glsl/ir_hv_accept.cpp:55
#14 0x70a3dd1f in ir_hierarchical_visitor::run 
(this=this@entry=0x7fffd120, 
instructions=instructions@entry=0x55953180) at 
../../../src/compiler/glsl/ir_hierarchical_visitor.cpp:364
#15 0x70a45c7a in validate_ir_tree 
(instructions=instructions@entry=0x55953180) at 
../../../src/compiler/glsl/ir_validate.cpp:932
#16 0x70960da9 in st_link_shader (ctx=0x559014c0, 
prog=0x55953920) at ../../../src/mesa/state_tracker/st_glsl_to_tgsi.cpp:6879
#17 0x7098cb15 in _mesa_glsl_link_shader (ctx=ctx@entry=0x559014c0, 
prog=prog@entry=0x55953920) at ../../../src/mesa/program/ir_to_mesa.cpp:3066
#18 0x7082c60d in _mesa_link_program (ctx=0x559014c0, 
shProg=0x55953920) at ../../../src/mesa/main/shaderapi.c:1089
#19 0x77ac8795 in stub_glLinkProgram (program=3) at 
/home/daenzer/src/piglit-git/piglit/tests/util/piglit-dispatch-gen.c:33005
#20 0xce9c in link_and_use_shaders () at 
/home/daenzer/src/piglit-git/piglit/tests/shaders/shader_runner.c:1040
#21 0x555656fb in init_test (file=0x7fffe9ec 
"/home/daenzer/src/piglit-git/piglit/tests/spec/mesa_shader_integer_functions/execution/built-in-functions/fs-ldexp.shader_test")
 at /home/daenzer/src/piglit-git/piglit/tests/shaders/shader_runner.c:3686
#22 0x55566341 in piglit_init (argc=2, argv=0x7fffe6b8) at 
/home/daenzer/src/piglit-git/piglit/tests/shaders/shader_runner.c:4012
#23 0x77b39361 in run_test (gl_fw=0x55780c20, argc=2, 
argv=0x7fffe6b8) at 

[Mesa-dev] [PATCH V2] i965: delay adding built-in uniforms to Parameters list

2016-12-05 Thread Timothy Arceri
This is a step towards using NIR optimisations over GLSL IR
optimisations. Delaying adding built-in uniforms until after
we convert to NIR gives it a chance to optimise them away.

V2: move the new code back to brw_link_shader()
---
 src/mesa/drivers/dri/i965/brw_link.cpp | 42 +++---
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
b/src/mesa/drivers/dri/i965/brw_link.cpp
index 3f6041b..0eb26a5 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -228,29 +228,6 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
 
   _mesa_copy_linked_program_data(shProg, shader);
 
-  /* Make a pass over the IR to add state references for any built-in
-   * uniforms that are used.  This has to be done now (during linking).
-   * Code generation doesn't happen until the first time this shader is
-   * used for rendering.  Waiting until then to generate the parameters is
-   * too late.  At that point, the values for the built-in uniforms won't
-   * get sent to the shader.
-   */
-  foreach_in_list(ir_instruction, node, shader->ir) {
- ir_variable *var = node->as_variable();
-
- if ((var == NULL) || (var->data.mode != ir_var_uniform)
- || (strncmp(var->name, "gl_", 3) != 0))
-continue;
-
- const ir_state_slot *const slots = var->get_state_slots();
- assert(slots != NULL);
-
- for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
-_mesa_add_state_reference(prog->Parameters,
-  (gl_state_index *) slots[i].tokens);
- }
-  }
-
   prog->SamplersUsed = shader->active_samplers;
   prog->ShadowSamplers = shader->shadow_samplers;
   _mesa_update_shader_textures_used(shProg, prog);
@@ -269,6 +246,25 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
 
   prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
  compiler->scalar_stage[stage]);
+
+  /* Make a pass over the IR to add state references for any built-in
+   * uniforms that are used.  This has to be done now (during linking).
+   * Code generation doesn't happen until the first time this shader is
+   * used for rendering.  Waiting until then to generate the parameters is
+   * too late.  At that point, the values for the built-in uniforms won't
+   * get sent to the shader.
+   */
+  nir_foreach_variable(var, >nir->uniforms) {
+ if (strncmp(var->name, "gl_", 3) == 0) {
+const nir_state_slot *const slots = var->state_slots;
+assert(var->state_slots != NULL);
+
+for (unsigned int i = 0; i < var->num_state_slots; i++) {
+   _mesa_add_state_reference(prog->Parameters,
+ (gl_state_index *)slots[i].tokens);
+}
+ }
+  }
}
 
if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Don't force SSO layout for VS->TCS.

2016-12-05 Thread Timothy Arceri
On Mon, 2016-12-05 at 21:13 -0800, Kenneth Graunke wrote:
> This was a hack which worked around the VS and TCS disagreeing on
> their
> shared interface due to the lack of varying packing.  In particular,
> it
> was needed by Piglit's tcs-input-read-array-interface test.
> 
> However, that was just one case where things could go awry, so the
> previous commit forcibly made interfaces match.  This hack is no
> longer
> necessary.
> 
> It also seems to be broken, though I'm not sure why.  It fixes Piglit
> regressions in spec/arb_shader_image_load_store/semantics from commit
> ec1f159ac81ed964415d102eed4a0a29be8e7937.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98893
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 3 ++-
>  src/mesa/drivers/dri/i965/brw_vs.c | 4 +---
>  2 files changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
> index da2559c..c7278e4 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
> @@ -461,7 +461,8 @@ brw_compile_tcs(const struct brw_compiler
> *compiler,
> nir->info->patch_outputs_written = key->patch_outputs_written;
>  
> struct brw_vue_map input_vue_map;
> -   brw_compute_vue_map(devinfo, _vue_map, nir->info-
> >inputs_read, true);
> +   brw_compute_vue_map(devinfo, _vue_map, nir->info-
> >inputs_read,
> +   nir->info->separate_shader);
> brw_compute_tess_vue_map(_prog_data->vue_map,
>  nir->info->outputs_written,
>  nir->info->patch_outputs_written);
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c
> b/src/mesa/drivers/dri/i965/brw_vs.c
> index dea1cf4..9a1f934 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
> @@ -158,9 +158,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
>  
> brw_compute_vue_map(devinfo,
> _data.base.vue_map, outputs_written,
> -   prog ? prog->SeparateShader ||
> -  prog-
> >_LinkedShaders[MESA_SHADER_TESS_EVAL]
> -: false);
> +   vp->program.nir->info->separate_shader);

Nice. I think with my recent refactors these were the only major things
stopping us from moving the brw_compute_vue_map() calls
into brw_compile_vs() etc removing some code duplication from the
various code paths. I think there is still some refactoring that would
be required for handling edgeflag.

Anyway. This series is: 

Reviewed-by: Timothy Arceri 

>  
> if (0) {
>    _mesa_fprint_program_opt(stderr, >program,
> PROG_PRINT_DEBUG, true);
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: use getHostCPUFeatures on x86/llvm-4.0+.

2016-12-05 Thread Michel Dänzer
On 06/12/16 02:39 AM, Tim Rowley wrote:
> Use llvm provided API based on cpuid rather than our own
> manually mantained list of mattr enabling/disabling.

This change broke the llvmpipe unit test lp_test_format for me:

Testing PIPE_FORMAT_R32_FLOAT (float) ...
FAILED
  Packed: 00 00 00 00
  Unpacked (0,0): 1 0 0 1 obtained
  0 0 0 1 expected
FAILED
  Packed: 00 00 80 bf
  Unpacked (0,0): 1 0 0 1 obtained
  -1 0 0 1 expected


This is on:

processor   : 0
vendor_id   : AuthenticAMD
cpu family  : 21
model   : 48
model name  : AMD A10-7850K Radeon R7, 12 Compute Cores 4C+8G
stepping: 1
microcode   : 0x6003106
cpu MHz : 4100.000
cache size  : 2048 KB
physical id : 0
siblings: 4
core id : 0
cpu cores   : 2
apicid  : 16
initial apicid  : 0
fpu : yes
fpu_exception   : yes
cpuid level : 13
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 
pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb 
rdtscp lm constant_tsc rep_good nopl nonstop_tsc extd_apicid aperfmperf 
eagerfpu pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 popcnt aes xsave 
avx f16c lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 
3dnowprefetch osvw ibs xop skinit wdt lwp fma4 tce nodeid_msr tbm topoext 
perfctr_core perfctr_nb bpext ptsc cpb hw_pstate vmmcall fsgsbase bmi1 xsaveopt 
arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists 
pausefilter pfthreshold overflow_recov
bugs: fxsave_leak sysret_ss_attrs null_seg
bogomips: 8200.42
TLB size: 1536 4K pages
clflush size: 64
cache_alignment : 64
address sizes   : 48 bits physical, 48 bits virtual
power management: ts ttp tm 100mhzsteps hwpstate cpb eff_freq_ro [13]



-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] genxml/gen9: Change the default of MI_SEMAPHORE_WAIT::RegisterPoleMode

2016-12-05 Thread Jason Ekstrand
We would really like it to be false as that's what you get on hardware that
doesn't have RegisterPoleMode (Sky Lake for example).  While we're at it,
we change it to a boolean.  This fixes dEQP-VK.synchronization.smoke.events
on Broxton.

Cc: "13.0" 
---
 src/intel/genxml/gen9.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
index 211dd53..ecd7a05 100644
--- a/src/intel/genxml/gen9.xml
+++ b/src/intel/genxml/gen9.xml
@@ -3200,7 +3200,7 @@
   
   
 
-
+
 
   
   
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: delay adding built-in uniforms to Parameters list

2016-12-05 Thread Timothy Arceri
This is a step towards using NIR optimisations over GLSL IR
optimisations. Delaying adding built-in uniforms until after
we convert to NIR gives it a chance to optimise them away.
---
 src/mesa/drivers/dri/i965/brw_link.cpp  | 23 ---
 src/mesa/drivers/dri/i965/brw_program.c | 19 +++
 2 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
b/src/mesa/drivers/dri/i965/brw_link.cpp
index 3f6041b..1d0a85b 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -228,29 +228,6 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
 
   _mesa_copy_linked_program_data(shProg, shader);
 
-  /* Make a pass over the IR to add state references for any built-in
-   * uniforms that are used.  This has to be done now (during linking).
-   * Code generation doesn't happen until the first time this shader is
-   * used for rendering.  Waiting until then to generate the parameters is
-   * too late.  At that point, the values for the built-in uniforms won't
-   * get sent to the shader.
-   */
-  foreach_in_list(ir_instruction, node, shader->ir) {
- ir_variable *var = node->as_variable();
-
- if ((var == NULL) || (var->data.mode != ir_var_uniform)
- || (strncmp(var->name, "gl_", 3) != 0))
-continue;
-
- const ir_state_slot *const slots = var->get_state_slots();
- assert(slots != NULL);
-
- for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
-_mesa_add_state_reference(prog->Parameters,
-  (gl_state_index *) slots[i].tokens);
- }
-  }
-
   prog->SamplersUsed = shader->active_samplers;
   prog->ShadowSamplers = shader->shadow_samplers;
   _mesa_update_shader_textures_used(shProg, prog);
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index a502b8e..cc7ec94 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -90,6 +90,25 @@ brw_create_nir(struct brw_context *brw,
 
nir = brw_preprocess_nir(brw->screen->compiler, nir);
 
+   /* Make a pass over the IR to add state references for any built-in
+* uniforms that are used.  This has to be done now (during linking).
+* Code generation doesn't happen until the first time this shader is
+* used for rendering.  Waiting until then to generate the parameters is
+* too late.  At that point, the values for the built-in uniforms won't
+* get sent to the shader.
+*/
+   nir_foreach_variable(var, >uniforms) {
+  if (strncmp(var->name, "gl_", 3) == 0) {
+ const nir_state_slot *const slots = var->state_slots;
+ assert(var->state_slots != NULL);
+
+ for (unsigned int i = 0; i < var->num_state_slots; i++) {
+_mesa_add_state_reference(prog->Parameters,
+  (gl_state_index *)slots[i].tokens);
+ }
+  }
+   }
+
if (stage == MESA_SHADER_FRAGMENT) {
   static const struct nir_lower_wpos_ytransform_options wpos_options = {
  .state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0},
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Unify shader interfaces explicitly.

2016-12-05 Thread Kenneth Graunke
A while ago, I made i965 start compiling shaders independently.  The VUE
map layouts were based entirely on each shader's input/output bitfields.
Assuming the interfaces match, this works out well - both sides will
compute the same layout, and outputs are correctly routed to inputs.

At the time, I had assumed that the linker would guarantee that the
interfaces match.  While it usually succeeds, it unfortunately seems
to fail in some cases.

For example, Piglit's tcs-input-read-array-interface test has a VS
output array with two elements, but the TCS only reads one.  The linker
isn't able to eliminate the unused element from the VS, which makes the
interfaces not match.

Another case is where a shader other than the last writes clip/cull
distances.  These should be demoted to ordinary varyings, but they
currently aren't - so we think they still have some special meaning,
and prevent them from being eliminated.

Fixing the linker to guarantee this in all cases is complicated.  It
needs to be able to optimize out dead code.  It's tied into varying
packing and other messiness.  While we can certainly improve it---and
should---I'd rather not rely on it being correct in all cases.

This patch ORs adjacent stages' input/output bitfields together,
ensuring that their interface (and hence VUE map layout) will be
compatible.  This should safeguard us against linker insufficiencies.

Fixes line rendering in Dolphin, and the Piglit test based on it:
spec/glsl-1.50/execution/geometry/clip-distance-vs-gs-out.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97232
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_link.cpp | 29 +
 1 file changed, 29 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
b/src/mesa/drivers/dri/i965/brw_link.cpp
index 3f6041b..871a2fc 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -209,12 +209,32 @@ brw_new_shader(gl_shader_stage stage)
return >base;
 }
 
+static void
+unify_interfaces(struct shader_info **infos)
+{
+   struct shader_info *prev_info = NULL;
+
+   for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) {
+  if (!infos[i])
+ continue;
+
+  if (prev_info) {
+ prev_info->outputs_written |= infos[i]->inputs_read;
+ prev_info->patch_outputs_written |= infos[i]->patch_inputs_read;
+ infos[i]->inputs_read |= prev_info->outputs_written;
+ infos[i]->patch_inputs_read |= prev_info->patch_outputs_written;
+  }
+  prev_info = infos[i];
+   }
+}
+
 extern "C" GLboolean
 brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
 {
struct brw_context *brw = brw_context(ctx);
const struct brw_compiler *compiler = brw->screen->compiler;
unsigned int stage;
+   struct shader_info *infos[MESA_SHADER_STAGES] = { 0, };
 
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
   struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
@@ -269,8 +289,17 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
 
   prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
  compiler->scalar_stage[stage]);
+  infos[stage] = prog->nir->info;
}
 
+   /* The linker tries to dead code eliminate unused varying components,
+* and make sure interfaces match.  But it isn't able to do so in all
+* cases.  So, explicitly make the interfaces match by OR'ing together
+* the inputs_read/outputs_written bitfields of adjacent stages.
+*/
+   if (!shProg->SeparateShader)
+  unify_interfaces(infos);
+
if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
   for (unsigned i = 0; i < shProg->NumShaders; i++) {
  const struct gl_shader *sh = shProg->Shaders[i];
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: Don't force SSO layout for VS->TCS.

2016-12-05 Thread Kenneth Graunke
This was a hack which worked around the VS and TCS disagreeing on their
shared interface due to the lack of varying packing.  In particular, it
was needed by Piglit's tcs-input-read-array-interface test.

However, that was just one case where things could go awry, so the
previous commit forcibly made interfaces match.  This hack is no longer
necessary.

It also seems to be broken, though I'm not sure why.  It fixes Piglit
regressions in spec/arb_shader_image_load_store/semantics from commit
ec1f159ac81ed964415d102eed4a0a29be8e7937.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98893
Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 3 ++-
 src/mesa/drivers/dri/i965/brw_vs.c | 4 +---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index da2559c..c7278e4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -461,7 +461,8 @@ brw_compile_tcs(const struct brw_compiler *compiler,
nir->info->patch_outputs_written = key->patch_outputs_written;
 
struct brw_vue_map input_vue_map;
-   brw_compute_vue_map(devinfo, _vue_map, nir->info->inputs_read, true);
+   brw_compute_vue_map(devinfo, _vue_map, nir->info->inputs_read,
+   nir->info->separate_shader);
brw_compute_tess_vue_map(_prog_data->vue_map,
 nir->info->outputs_written,
 nir->info->patch_outputs_written);
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
b/src/mesa/drivers/dri/i965/brw_vs.c
index dea1cf4..9a1f934 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -158,9 +158,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
 
brw_compute_vue_map(devinfo,
_data.base.vue_map, outputs_written,
-   prog ? prog->SeparateShader ||
-  prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]
-: false);
+   vp->program.nir->info->separate_shader);
 
if (0) {
   _mesa_fprint_program_opt(stderr, >program, PROG_PRINT_DEBUG, true);
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH libdrm v2 5/5] xf86drm: implement an OpenBSD specific drmGetDevice2

2016-12-05 Thread Jonathan Gray
On Mon, Dec 05, 2016 at 05:56:40PM +, Emil Velikov wrote:
> On 1 December 2016 at 04:18, Jonathan Gray  wrote:
> > DRI devices on OpenBSD are not in their own directory.  They reside in
> > /dev with a large number of statically generated /dev nodes.
> >
> > Avoid stat'ing all of /dev on OpenBSD by implementing this custom path.
> >
> > v2:
> >- use drmGetMinorType to get node type
> >- adapt to drmProcessPciDevice changes
> >- verify drmParseSubsystemType type is PCI
> >- add a comment describing why this was added
> >
> Thanks for the update Jonathan.
> 
> I've pulled v2 in master,
> Emil

Thanks, going over what went in I see drmGetMinorNameForFD and
the OpenBSD drmGetDevice2 paths need to be adjusted to have the correct
minor for the control/render nodes.

ie

base = drmGetMinorBase(type);
if (min < base)
return error;

min -= base;

I'll send another patch for this.

And the common code could be split into a shared function?

drmGetDeviceNameFromFd2 would do the same thing as
drmGetDeviceNameFromFd on OpenBSD as far as I can tell so that could be
another shared function instead of the current "missing implementation"
warning.  Or should drmGetDeviceNameFromFd purposefully not handle
render/control nodes?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] nir/lower_tex: generalize get_texture_size()

2016-12-05 Thread Kenneth Graunke
On Thursday, December 1, 2016 8:53:16 AM PST Iago Toral Quiroga wrote:
> This was written specifically for RECT samplers. Make it more generic so
> we can call this from the gradient lowerings too.
> ---
>  src/compiler/nir/nir_lower_tex.c | 15 ++-
>  1 file changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/src/compiler/nir/nir_lower_tex.c 
> b/src/compiler/nir/nir_lower_tex.c
> index 0efd443..ccca59b 100644
> --- a/src/compiler/nir/nir_lower_tex.c
> +++ b/src/compiler/nir/nir_lower_tex.c
> @@ -154,22 +154,27 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex)
>  {
> b->cursor = nir_before_instr(>instr);
>  
> -   /* RECT textures should not be array: */
> -   assert(!tex->is_array);
> -
> nir_tex_instr *txs;
>  
> txs = nir_tex_instr_create(b->shader, 1);
> txs->op = nir_texop_txs;
> -   txs->sampler_dim = GLSL_SAMPLER_DIM_RECT;
> +   txs->sampler_dim = tex->sampler_dim;
> +   txs->is_array = tex->is_array;
> +   txs->is_shadow = tex->is_shadow;
> +   txs->is_new_style_shadow = tex->is_new_style_shadow;
> txs->texture_index = tex->texture_index;
> +   txs->texture = (nir_deref_var *)
> +  nir_copy_deref(txs, >texture->deref);
> +   txs->sampler_index = tex->sampler_index;
> +   txs->sampler = (nir_deref_var *)
> +  nir_copy_deref(txs, >sampler->deref);
> txs->dest_type = nir_type_int;
>  
> /* only single src, the lod: */
> txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
> txs->src[0].src_type = nir_tex_src_lod;
>  
> -   nir_ssa_dest_init(>instr, >dest, 2, 32, NULL);
> +   nir_ssa_dest_init(>instr, >dest, tex->coord_components, 32, 
> NULL);

Hrm.  Won't this make it start returning 3 for array textures?  It looks
like you want 3 for 3D, 1 for 1D, and 2 for everything else...

I guess you swizzle away the unnecessary components, but another option
would be to just pass in the number of components you wanted, rather
than emitting moves to scrap them.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/6] nir/lower_tex: add lowering for texture gradient on shadow samplers

2016-12-05 Thread Kenneth Graunke
On Thursday, December 1, 2016 8:53:19 AM PST Iago Toral Quiroga wrote:
> This is ported from the Intel lowering pass that we use with GLSL IR.
> This takes care of lowering texture gradients on shadow samplers other
> than cube maps. Intel hardware requires this for gen < 8.
> ---
>  src/compiler/nir/nir.h   |  7 +++
>  src/compiler/nir/nir_lower_tex.c | 40 
> 
>  2 files changed, 47 insertions(+)
> 
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index ed388c6..d494d5f 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2437,6 +2437,13 @@ typedef struct nir_lower_tex_options {
>  * If true, lower nir_texop_txd on cube maps with nir_texop_txl.
>  */
> bool lower_txd_cube_map;
> +
> +   /**
> +* If true, lower nir_texop_txd on shadow samplers (except cube maps)
> +* with nir_texop_txl. Notice that cube map shadow samplers are lowered
> +* with lower_txd_cube_map.
> +*/
> +   bool lower_txd_shadow;
>  } nir_lower_tex_options;
>  
>  bool nir_lower_tex(nir_shader *shader,
> diff --git a/src/compiler/nir/nir_lower_tex.c 
> b/src/compiler/nir/nir_lower_tex.c
> index c7c3db2..d5ea509 100644
> --- a/src/compiler/nir/nir_lower_tex.c
> +++ b/src/compiler/nir/nir_lower_tex.c
> @@ -556,6 +556,40 @@ lower_gradient_cube_map(nir_builder *b, nir_tex_instr 
> *tex)
>  }
>  
>  static void
> +lower_gradient_shadow(nir_builder *b, nir_tex_instr *tex)
> +{
> +   assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
> +   assert(tex->is_shadow);
> +   assert(tex->op == nir_texop_txd);
> +   assert(tex->dest.is_ssa);
> +
> +   /* Use textureSize() to get the width and height of LOD 0 */
> +   nir_ssa_def *size = get_texture_size(b, tex);
> +
> +   /* Scale the gradients by width and height.  Effectively, the incoming
> +* gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
> +* GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
> +*/
> +   nir_ssa_def *dPdx = nir_fmul(b, tex->src[2].src.ssa, size);
> +   nir_ssa_def *dPdy = nir_fmul(b, tex->src[3].src.ssa, size);

NIR texture sources aren't guaranteed to be in any particular order
(it's kind of annoying)...you should instead do:

nir_ssa_def *dPdx;
nir_ssa_def *dPdy;

for (unsigned i = 0; i < tex->num_srcs; i++) {
   switch (tex->src[i].src_type) {
   case nir_tex_src_ddx:
  dPdx = nir_fmul(b, tex->src[i].src.ssa, size);
  break;
   case nir_tex_src_ddy:
  dPdy = nir_fmul(b, tex->src[i].src.ssa, size);
  break;
   default:
  break;
   }
}

Same comment on patch two.  Maybe make a get_ddx_ddy() helper?

> +
> +   nir_ssa_def *rho;
> +   if (dPdx->num_components == 1) {
> +  rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
> +   } else {
> +  rho = nir_fmax(b,
> + nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
> + nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
> +   }
> +
> +   /* lod = log2(rho).  We're ignoring GL state biases for now. */
> +   nir_ssa_def *lod = nir_flog2(b, rho);
> +
> +   /* Replace the gradient instruction with an equivalent lod instruction */
> +   replace_gradient_with_lod(b, lod, tex);
> +}
> +
> +static void
>  saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
>  {
> b->cursor = nir_before_instr(>instr);
> @@ -786,6 +820,12 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
>   lower_gradient_cube_map(b, tex);
>   progress = true;
>}
> +
> +  if (tex->op == nir_texop_txd && options->lower_txd_shadow &&
> +  tex->is_shadow && tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) {
> + lower_gradient_shadow(b, tex);
> + progress = true;
> +  }

This seems a bit odd to me - if a driver sets options->lower_txd_shadow
but not options->lower_txd_cube_map, then textureGrad(samplerCubeShadow, ...)
would not be lowered.  I would sort of expect that case to be lowered if
either option is set.

I suppose it's not a problem in practice, as we set both, but it might
be nice to fix this anyway.

With the ddx/ddy source finding fixed in both patches, the series is:
Reviewed-by: Kenneth Graunke 

> }
>  
> return progress;
> 



signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Potentially EOL ilo gallium driver

2016-12-05 Thread Edward O'Callaghan
This patch is to potentially remove ourself from the maintaince
burden of the ilo driver that appears to now be essentially
unmaintained?

I am not sure of our policy here or if there are too many
users so this patch is really only to gauge a response of
how folks feel?

Kind Regards,

Edward O'Callaghan (1):
 [PATCH] ilo: EOL unmaintained older gallium intel driver
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/50] GL_ARB_gpu_shader_int64... this time for sure!

2016-12-05 Thread Ian Romanick
On 12/05/2016 10:31 AM, Matt Turner wrote:
> On Wed, Nov 30, 2016 at 1:11 PM, Matt Turner  wrote:
>> On 11/28, Ian Romanick wrote:
>>>
>>> From: Ian Romanick 
>>>
>>> I believe that I have addressed all of the review feedback from the
>>> previous iteration.  Many of the patches have been reviewed, and they
>>> should be ready to go.
>>>
>>> Patches marked with "vN" in the subject have changed in a non-trivial
>>> way since last being sent to he list.
>>>
>>> Several patches that have not changed need review:
>>>
>>>Patches 23 through 33 add lowering passes for 64-bit operations.
>>
>> I sent a few comments, and I cannot claim to have verified the division
>> routine, but the rest are
>>
>> Reviewed-by: Matt Turner 
> 
> Having thought more about this, I think doing this in GLSL IR is not
> the way we should be going.
> 
> Presumably we're going to want to support int64 in SPIR-V, and that
> necessitates lowering these operations in NIR.
> 
> I don't think it's a good idea to do this outside of NIR.

With this series and a follow-on series that will enable IVB, any GPU
that has real integers will be able to support int64.  The same will go
for fp64.  This applies not only to i965 but also basically every
Gallium driver that is still maintained.  Getting int64 and fp64 "for
free" on a number of Mesa-supported GPUs seems like a good thing.
Requiring all of Gallium enable NIR is... much more expensive than free.

At some point, probably soon, we will want all of this for SPIR-V, and
that does mean we'll need NIR support.  There will still presumably be
users of the GLSL lowering even then.  My expectation is that the NIR
lowering would be implemented in a similar manner to the GLSL lowering,
but it will be a bunch more typing.

> Thoughts?

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 6/7] i965: use nir_lower_indirect_derefs() for GLSL

2016-12-05 Thread Timothy Arceri
On Mon, 2016-12-05 at 11:59 -0800, Jason Ekstrand wrote:
> From: Timothy Arceri 
> 

Just in case this lands before NIR loop unrolling I've just sent a
slightly updated patch [1] in the latest series.

[1] https://patchwork.freedesktop.org/patch/125758/

> This moves the nir_lower_indirect_derefs() call into
> brw_preprocess_nir() so thats is called by both OpenGL and Vulkan
> and removes that call to the old GLSL IR pass
> lower_variable_index_to_cond_assign()
> 
> We want to do this pass in nir to be able to move loop unrolling
> to nir.
> 
> There is a increase of 1-3 instructions in a small number of shaders,
> and 2 Kerbal Space program shaders that increase by 32 instructions.
> 
> Shader-db results BDW:
> 
> total instructions in shared programs: 8705873 -> 8706194 (0.00%)
> instructions in affected programs: 32515 -> 32836 (0.99%)
> helped: 3
> HURT: 79
> 
> total cycles in shared programs: 74618120 -> 74583476 (-0.05%)
> cycles in affected programs: 528104 -> 493460 (-6.56%)
> helped: 47
> HURT: 37
> 
> LOST:   2
> GAINED: 0
> ---
>  src/intel/vulkan/anv_pipeline.c| 10 --
>  src/mesa/drivers/dri/i965/brw_link.cpp | 13 -
>  src/mesa/drivers/dri/i965/brw_nir.c| 10 ++
>  3 files changed, 10 insertions(+), 23 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_pipeline.c
> b/src/intel/vulkan/anv_pipeline.c
> index 9b65e35..6b0a3c9 100644
> --- a/src/intel/vulkan/anv_pipeline.c
> +++ b/src/intel/vulkan/anv_pipeline.c
> @@ -177,16 +177,6 @@ anv_shader_compile_to_nir(struct anv_device
> *device,
>  
> nir_shader_gather_info(nir, entry_point->impl);
>  
> -   nir_variable_mode indirect_mask = 0;
> -   if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput)
> -  indirect_mask |= nir_var_shader_in;
> -   if (compiler->glsl_compiler_options[stage].EmitNoIndirectOutput)
> -  indirect_mask |= nir_var_shader_out;
> -   if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp)
> -  indirect_mask |= nir_var_local;
> -
> -   nir_lower_indirect_derefs(nir, indirect_mask);
> -
> return nir;
>  }
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp
> b/src/mesa/drivers/dri/i965/brw_link.cpp
> index 3f6041b..19e691e 100644
> --- a/src/mesa/drivers/dri/i965/brw_link.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_link.cpp
> @@ -137,19 +137,6 @@ process_glsl_ir(struct brw_context *brw,
>  
> do_copy_propagation(shader->ir);
>  
> -   bool lowered_variable_indexing =
> -  lower_variable_index_to_cond_assign(shader->Stage, shader->ir,
> -  options-
> >EmitNoIndirectInput,
> -  options-
> >EmitNoIndirectOutput,
> -  options-
> >EmitNoIndirectTemp,
> -  options-
> >EmitNoIndirectUniform);
> -
> -   if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
> -  perf_debug("Unsupported form of variable indexing in %s;
> falling "
> - "back to very inefficient code generation\n",
> - _mesa_shader_stage_to_abbrev(shader->Stage));
> -   }
> -
> bool progress;
> do {
>    progress = false;
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
> b/src/mesa/drivers/dri/i965/brw_nir.c
> index 763e3ec..8768cee 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> @@ -485,6 +485,16 @@ brw_preprocess_nir(const struct brw_compiler
> *compiler, nir_shader *nir)
> /* Lower a bunch of stuff */
> OPT_V(nir_lower_var_copies);
>  
> +   nir_variable_mode indirect_mask = 0;
> +   if (compiler->glsl_compiler_options[nir-
> >stage].EmitNoIndirectInput)
> +  indirect_mask |= nir_var_shader_in;
> +   if (compiler->glsl_compiler_options[nir-
> >stage].EmitNoIndirectOutput)
> +  indirect_mask |= nir_var_shader_out;
> +   if (compiler->glsl_compiler_options[nir-
> >stage].EmitNoIndirectTemp)
> +  indirect_mask |= nir_var_local;
> +
> +   nir_lower_indirect_derefs(nir, indirect_mask);
> +
> /* Get rid of split copies */
> nir = nir_optimize(nir, is_scalar);
>  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/10] nir: create helper for fixing phi srcs when cloning

2016-12-05 Thread Timothy Arceri
This will be useful for fixing phi srcs when cloning a loop body
during loop unrolling.
---
 src/compiler/nir/nir_clone.c | 36 +---
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index 4f7bdd9..e6483b1 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -593,6 +593,26 @@ clone_cf_list(clone_state *state, struct exec_list *dst,
}
 }
 
+/* After we've cloned almost everything, we have to walk the list of phi
+ * sources and fix them up.  Thanks to loops, the block and SSA value for a
+ * phi source may not be defined when we first encounter it.  Instead, we
+ * add it to the phi_srcs list and we fix it up here.
+ */
+static void
+fixup_phi_srcs(clone_state *state)
+{
+   list_for_each_entry_safe(nir_phi_src, src, >phi_srcs, src.use_link) {
+  src->pred = remap_local(state, src->pred);
+  assert(src->src.is_ssa);
+  src->src.ssa = remap_local(state, src->src.ssa);
+
+  /* Remove from this list and place in the uses of the SSA def */
+  list_del(>src.use_link);
+  list_addtail(>src.use_link, >src.ssa->uses);
+   }
+   assert(list_empty(>phi_srcs));
+}
+
 static nir_function_impl *
 clone_function_impl(clone_state *state, const nir_function_impl *fi)
 {
@@ -614,21 +634,7 @@ clone_function_impl(clone_state *state, const 
nir_function_impl *fi)
 
clone_cf_list(state, >body, >body);
 
-   /* After we've cloned almost everything, we have to walk the list of phi
-* sources and fix them up.  Thanks to loops, the block and SSA value for a
-* phi source may not be defined when we first encounter it.  Instead, we
-* add it to the phi_srcs list and we fix it up here.
-*/
-   list_for_each_entry_safe(nir_phi_src, src, >phi_srcs, src.use_link) {
-  src->pred = remap_local(state, src->pred);
-  assert(src->src.is_ssa);
-  src->src.ssa = remap_local(state, src->src.ssa);
-
-  /* Remove from this list and place in the uses of the SSA def */
-  list_del(>src.use_link);
-  list_addtail(>src.use_link, >src.ssa->uses);
-   }
-   assert(list_empty(>phi_srcs));
+   fixup_phi_srcs(state);
 
/* All metadata is invalidated in the cloning process */
nfi->valid_metadata = 0;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/10] i965: use nir loop unrolling pass

2016-12-05 Thread Timothy Arceri
V2:
- enable on all gens
---
 src/mesa/drivers/dri/i965/brw_compiler.c |  5 -
 src/mesa/drivers/dri/i965/brw_nir.c  | 22 +-
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c 
b/src/mesa/drivers/dri/i965/brw_compiler.c
index 6a73719..fa8a772 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -55,6 +55,7 @@ static const struct nir_shader_compiler_options 
scalar_nir_options = {
.lower_unpack_snorm_4x8 = true,
.lower_unpack_unorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
+   .max_unroll_iterations = 32,
 };
 
 static const struct nir_shader_compiler_options vector_nir_options = {
@@ -75,6 +76,7 @@ static const struct nir_shader_compiler_options 
vector_nir_options = {
.lower_unpack_unorm_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+   .max_unroll_iterations = 32,
 };
 
 static const struct nir_shader_compiler_options vector_nir_options_gen6 = {
@@ -92,6 +94,7 @@ static const struct nir_shader_compiler_options 
vector_nir_options_gen6 = {
.lower_unpack_unorm_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+   .max_unroll_iterations = 32,
 };
 
 struct brw_compiler *
@@ -119,7 +122,7 @@ brw_compiler_create(void *mem_ctx, const struct 
gen_device_info *devinfo)
 
/* We want the GLSL compiler to emit code that uses condition codes */
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-  compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
+  compiler->glsl_compiler_options[i].MaxUnrollIterations = 0;
   compiler->glsl_compiler_options[i].MaxIfDepth =
  devinfo->gen < 6 ? 16 : UINT_MAX;
 
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
b/src/mesa/drivers/dri/i965/brw_nir.c
index 92f1ba7..c6691a9 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -396,8 +396,17 @@ brw_nir_lower_cs_shared(nir_shader *nir)
 #define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
 
 static nir_shader *
-nir_optimize(nir_shader *nir, bool is_scalar)
+nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
+ bool is_scalar)
 {
+   nir_variable_mode indirect_mask = 0;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput)
+  indirect_mask |= nir_var_shader_in;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput)
+  indirect_mask |= nir_var_shader_out;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp)
+  indirect_mask |= nir_var_local;
+
bool progress;
do {
   progress = false;
@@ -420,6 +429,9 @@ nir_optimize(nir_shader *nir, bool is_scalar)
   OPT(nir_opt_algebraic);
   OPT(nir_opt_constant_folding);
   OPT(nir_opt_dead_cf);
+  if (nir->options->max_unroll_iterations != 0) {
+ OPT(nir_opt_loop_unroll, indirect_mask);
+  }
   OPT(nir_opt_remove_phis);
   OPT(nir_opt_undef);
   OPT_V(nir_lower_doubles, nir_lower_drcp |
@@ -476,7 +488,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
 
OPT(nir_split_var_copies);
 
-   nir = nir_optimize(nir, is_scalar);
+   nir = nir_optimize(nir, compiler, is_scalar);
 
if (is_scalar) {
   OPT_V(nir_lower_load_const_to_scalar);
@@ -496,7 +508,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
nir_lower_indirect_derefs(nir, indirect_mask);
 
/* Get rid of split copies */
-   nir = nir_optimize(nir, is_scalar);
+   nir = nir_optimize(nir, compiler, is_scalar);
 
OPT_V(nir_lower_clip_cull_distance_arrays);
 
@@ -523,7 +535,7 @@ brw_postprocess_nir(nir_shader *nir, const struct 
brw_compiler *compiler,
bool progress; /* Written by OPT and OPT_V */
(void)progress;
 
-   nir = nir_optimize(nir, is_scalar);
+   nir = nir_optimize(nir, compiler, is_scalar);
 
if (devinfo->gen >= 6) {
   /* Try and fuse multiply-adds */
@@ -612,7 +624,7 @@ brw_nir_apply_sampler_key(nir_shader *nir,
 
if (nir_lower_tex(nir, _options)) {
   nir_validate_shader(nir);
-  nir = nir_optimize(nir, is_scalar);
+  nir = nir_optimize(nir, compiler, is_scalar);
}
 
return nir;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/10] nir: add helper for cloning nir_cf_list

2016-12-05 Thread Timothy Arceri
V2:
- updated to create a generic list clone helper nir_cf_list_clone()
- continue to assert on clone when fallback flag not set as suggested
  by Jason.
---
 src/compiler/nir/nir_clone.c| 58 +++--
 src/compiler/nir/nir_control_flow.h |  3 ++
 2 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index e6483b1..b9b7829 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -22,7 +22,7 @@
  */
 
 #include "nir.h"
-#include "nir_control_flow_private.h"
+#include "nir_control_flow.h"
 
 /* Secret Decoder Ring:
  *   clone_foo():
@@ -35,6 +35,11 @@ typedef struct {
/* True if we are cloning an entire shader. */
bool global_clone;
 
+   /* This allows us to clone a loop body without having to add srcs from
+* outside the loop to the remap table. This is useful for loop unrolling.
+*/
+   bool allow_remap_fallback;
+
/* maps orig ptr -> cloned ptr: */
struct hash_table *remap_table;
 
@@ -46,11 +51,19 @@ typedef struct {
 } clone_state;
 
 static void
-init_clone_state(clone_state *state, bool global)
+init_clone_state(clone_state *state, struct hash_table *remap_table,
+ bool global, bool allow_remap_fallback)
 {
state->global_clone = global;
-   state->remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-_mesa_key_pointer_equal);
+   state->allow_remap_fallback = allow_remap_fallback;
+
+   if (remap_table) {
+  state->remap_table = remap_table;
+   } else {
+  state->remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+   _mesa_key_pointer_equal);
+   }
+
list_inithead(>phi_srcs);
 }
 
@@ -72,9 +85,10 @@ _lookup_ptr(clone_state *state, const void *ptr, bool global)
   return (void *)ptr;
 
entry = _mesa_hash_table_search(state->remap_table, ptr);
-   assert(entry && "Failed to find pointer!");
-   if (!entry)
-  return NULL;
+   if (!entry) {
+  assert(state->allow_remap_fallback);
+  return (void *)ptr;
+   }
 
return entry->data;
 }
@@ -613,6 +627,32 @@ fixup_phi_srcs(clone_state *state)
assert(list_empty(>phi_srcs));
 }
 
+void
+nir_cf_list_clone(nir_cf_list *dst, nir_cf_list *src, nir_cf_node *parent,
+  struct hash_table *remap_table)
+{
+   exec_list_make_empty(>list);
+   dst->impl = src->impl;
+
+   if (exec_list_is_empty(>list))
+  return;
+
+   clone_state state;
+   init_clone_state(, remap_table, false, true);
+
+   /* We use the same shader */
+   state.ns = src->impl->function->shader;
+
+   /* Dest list needs to at least have one block */
+   nir_block *nblk = nir_block_create(state.ns);
+   nblk->cf_node.parent = parent;
+   exec_list_push_tail(>list, >cf_node.node);
+
+   clone_cf_list(, >list, >list);
+
+   fixup_phi_srcs();
+}
+
 static nir_function_impl *
 clone_function_impl(clone_state *state, const nir_function_impl *fi)
 {
@@ -646,7 +686,7 @@ nir_function_impl *
 nir_function_impl_clone(const nir_function_impl *fi)
 {
clone_state state;
-   init_clone_state(, false);
+   init_clone_state(, NULL, false, false);
 
/* We use the same shader */
state.ns = fi->function->shader;
@@ -686,7 +726,7 @@ nir_shader *
 nir_shader_clone(void *mem_ctx, const nir_shader *s)
 {
clone_state state;
-   init_clone_state(, true);
+   init_clone_state(, NULL, true, false);
 
nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options, NULL);
state.ns = ns;
diff --git a/src/compiler/nir/nir_control_flow.h 
b/src/compiler/nir/nir_control_flow.h
index b71382f..b496aec 100644
--- a/src/compiler/nir/nir_control_flow.h
+++ b/src/compiler/nir/nir_control_flow.h
@@ -141,6 +141,9 @@ void nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor 
cursor);
 
 void nir_cf_delete(nir_cf_list *cf_list);
 
+void nir_cf_list_clone(nir_cf_list *dst, nir_cf_list *src, nir_cf_node *parent,
+   struct hash_table *remap_table);
+
 static inline void
 nir_cf_list_extract(nir_cf_list *extracted, struct exec_list *cf_list)
 {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/10] nir: don't count removal of lcssa_phi as progress

2016-12-05 Thread Timothy Arceri
---
 src/compiler/nir/nir_opt_remove_phis.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_opt_remove_phis.c 
b/src/compiler/nir/nir_opt_remove_phis.c
index acaa6e1..d4344b0 100644
--- a/src/compiler/nir/nir_opt_remove_phis.c
+++ b/src/compiler/nir/nir_opt_remove_phis.c
@@ -73,6 +73,7 @@ remove_phis_block(nir_block *block, nir_builder *b)
  break;
 
   nir_phi_instr *phi = nir_instr_as_phi(instr);
+  bool is_lcssa_phi = phi->is_lcssa_phi;
 
   nir_ssa_def *def = NULL;
   nir_alu_instr *mov = NULL;
@@ -133,7 +134,8 @@ remove_phis_block(nir_block *block, nir_builder *b)
   nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(def));
   nir_instr_remove(instr);
 
-  progress = true;
+  if (!is_lcssa_phi)
+ progress = true;
}
 
return progress;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/10] nir: Add a LCSAA-pass

2016-12-05 Thread Timothy Arceri
From: Thomas Helland 

V2: Do a "depth first search" to convert to LCSSA

V3: Small comment fixup

V4: Rebase, adapt to removal of function overloads

V5: Rebase, adapt to relocation of nir to compiler/nir
Still need to adapt to potential if-uses
Work around nir_validate issue

V6 (Timothy):
 - tidy lcssa and stop leaking memory
 - dont rewrite the src for the lcssa phi node
 - validate lcssa phi srcs to avoid postvalidate assert
 - don't add new phi if one already exists
 - more lcssa phi validation fixes
 - Rather than marking ssa defs inside a loop just mark blocks inside
   a loop. This is simpler and fixes lcssa for intrinsics which do
   not have a destination.
 - don't create LCSSA phis for loops we won't unroll
 - require loop metadata for lcssa pass
 - handle case were the ssa defs use outside the loop is already a phi

V7: (Timothy)
- pass indirect mask to metadata call

v8: (Timothy)
- make convert to lcssa a helper function rather than a nir pass
- replace inside loop bitset with on the fly block index logic.
- remove lcssa phi validation special cases
- inline code from useless helpers, suggested by Jason.
- always do lcssa on loops, suggested by Jason.
- stop making lcssa phis special. Add as many source as the block
  has predecessors, suggested by Jason.

V9: (Timothy)
- fix regression with the is_lcssa_phi field not being initialised
  to false now that ralloc() doesn't zero out memory.
---
 src/compiler/Makefile.sources   |   1 +
 src/compiler/nir/nir.c  |   1 +
 src/compiler/nir/nir.h  |   4 +
 src/compiler/nir/nir_to_lcssa.c | 185 
 4 files changed, 191 insertions(+)
 create mode 100644 src/compiler/nir/nir_to_lcssa.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 3090677..d3e158a 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -252,6 +252,7 @@ NIR_FILES = \
nir/nir_search_helpers.h \
nir/nir_split_var_copies.c \
nir/nir_sweep.c \
+   nir/nir_to_lcssa.c \
nir/nir_to_ssa.c \
nir/nir_validate.c \
nir/nir_vla.h \
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 6e308d5..918cd04 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -561,6 +561,7 @@ nir_phi_instr_create(nir_shader *shader)
 {
nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
instr_init(>instr, nir_instr_type_phi);
+   instr->is_lcssa_phi = false;
 
dest_init(>dest);
exec_list_make_empty(>srcs);
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index b8b44a7..d948e97 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1400,6 +1400,8 @@ typedef struct {
struct exec_list srcs; /** < list of nir_phi_src */
 
nir_dest dest;
+
+   bool is_lcssa_phi;
 } nir_phi_instr;
 
 typedef struct {
@@ -2547,6 +2549,8 @@ void nir_convert_to_ssa(nir_shader *shader);
 bool nir_repair_ssa_impl(nir_function_impl *impl);
 bool nir_repair_ssa(nir_shader *shader);
 
+void nir_convert_loop_to_lcssa(nir_loop *loop);
+
 /* If phi_webs_only is true, only convert SSA values involved in phi nodes to
  * registers.  If false, convert all values (even those not involved in a phi
  * node) to registers.
diff --git a/src/compiler/nir/nir_to_lcssa.c b/src/compiler/nir/nir_to_lcssa.c
new file mode 100644
index 000..93b1c29
--- /dev/null
+++ b/src/compiler/nir/nir_to_lcssa.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright © 2015 Thomas Helland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * This pass converts the ssa-graph into "Loop Closed SSA form". This is
+ * done by placing phi nodes at the exits of the loop for all values
+ * that are used outside the loop. The result is it transforms:
+ *
+ * loop {->  loop {
+ *ssa2 = ->  ssa2 = ...
+ *if 

[Mesa-dev] [PATCH 09/10] nir: pass compiler rather than devinfo to functions that call nir_optimize

2016-12-05 Thread Timothy Arceri
Later we will pass compiler to nir_optimise to be used by the loop unroll
pass.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp  | 10 --
 src/mesa/drivers/dri/i965/brw_nir.c   |  7 ---
 src/mesa/drivers/dri/i965/brw_nir.h   |  4 ++--
 src/mesa/drivers/dri/i965/brw_shader.cpp  |  4 ++--
 src/mesa/drivers/dri/i965/brw_vec4.cpp|  5 ++---
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  5 ++---
 src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp|  4 ++--
 7 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c218f56..0dcfcb0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6425,14 +6425,13 @@ brw_compile_fs(const struct brw_compiler *compiler, 
void *log_data,
char **error_str)
 {
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
-   shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, >tex,
-  true);
+   shader = brw_nir_apply_sampler_key(shader, compiler, >tex, true);
brw_nir_lower_fs_inputs(shader, vue_map, prog, compiler->devinfo, key);
brw_nir_lower_fs_outputs(shader);
if (!key->multisample_fbo)
   NIR_PASS_V(shader, demote_sample_qualifiers);
NIR_PASS_V(shader, move_interpolation_to_top);
-   shader = brw_postprocess_nir(shader, compiler->devinfo, true);
+   shader = brw_postprocess_nir(shader, compiler, true);
 
/* key->alpha_test_func means simulating alpha testing via discards,
 * so the shader definitely kills pixels.
@@ -6655,8 +6654,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void 
*log_data,
char **error_str)
 {
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
-   shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, >tex,
-  true);
+   shader = brw_nir_apply_sampler_key(shader, compiler, >tex, true);
brw_nir_lower_cs_shared(shader);
prog_data->base.total_shared += shader->num_shared;
 
@@ -6669,7 +6667,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void 
*log_data,
(unsigned)4 * (prog_data->thread_local_id_index + 1));
 
brw_nir_lower_intrinsics(shader, _data->base);
-   shader = brw_postprocess_nir(shader, compiler->devinfo, true);
+   shader = brw_postprocess_nir(shader, compiler, true);
 
prog_data->local_size[0] = shader->info->cs.local_size[0];
prog_data->local_size[1] = shader->info->cs.local_size[1];
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
b/src/mesa/drivers/dri/i965/brw_nir.c
index 8768cee..92f1ba7 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -513,10 +513,10 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
  * will not work.
  */
 nir_shader *
-brw_postprocess_nir(nir_shader *nir,
-const struct gen_device_info *devinfo,
+brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
 bool is_scalar)
 {
+   const struct gen_device_info *devinfo = compiler->devinfo;
bool debug_enabled =
   (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage));
 
@@ -578,10 +578,11 @@ brw_postprocess_nir(nir_shader *nir,
 
 nir_shader *
 brw_nir_apply_sampler_key(nir_shader *nir,
-  const struct gen_device_info *devinfo,
+  const struct brw_compiler *compiler,
   const struct brw_sampler_prog_key_data *key_tex,
   bool is_scalar)
 {
+   const struct gen_device_info *devinfo = compiler->devinfo;
nir_lower_tex_options tex_options = { 0 };
 
/* Iron Lake and prior require lowering of all rectangle textures */
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h 
b/src/mesa/drivers/dri/i965/brw_nir.h
index 3c774d0..8cfb6c1 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -114,7 +114,7 @@ void brw_nir_lower_fs_outputs(nir_shader *nir);
 void brw_nir_lower_cs_shared(nir_shader *nir);
 
 nir_shader *brw_postprocess_nir(nir_shader *nir,
-const struct gen_device_info *devinfo,
+const struct brw_compiler *compiler,
 bool is_scalar);
 
 bool brw_nir_apply_attribute_workarounds(nir_shader *nir,
@@ -126,7 +126,7 @@ bool brw_nir_apply_trig_workarounds(nir_shader *nir);
 void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
 
 nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,
-  const struct gen_device_info *devinfo,
+  const struct brw_compiler *compiler,
   const struct brw_sampler_prog_key_data 
*key,
   bool is_scalar);
 
diff --git 

[Mesa-dev] [PATCH 08/10] nir: add a loop unrolling pass

2016-12-05 Thread Timothy Arceri
V2:
- tidy ups suggested by Connor.
- tidy up cloning logic and handle copy propagation
 based of suggestion by Connor.
- use nir_ssa_def_rewrite_uses to fix up lcssa phis
  suggested by Connor.
- add support for complex loop unrolling (two terminators)
- handle case were the ssa defs use outside the loop is already a phi
- support unrolling loops with multiple terminators when trip count
  is know for each terminator

V3:
- set correct num_components when creating phi in complex unroll
- rewrite update remap table based on Jasons suggestions.
- remove unrequired extract_loop_body() helper as suggested by Jason.
- simplify the lcssa phi fix up code for simple loops as per Jasons suggestions.
- use mem context to keep track of hash table memory as suggested by Jason.
- move is_{complex,simple}_loop helpers to the unroll code
- require nir_metadata_block_index
- partially rewrote complex unroll to be simpler and easier to follow.

V4:
- use rzalloc() when creating nir_phi_src but not setting pred right away
 fixes regression cause by ralloc() no longer zeroing memory.
---
 src/compiler/Makefile.sources  |   1 +
 src/compiler/nir/nir.h |   2 +
 src/compiler/nir/nir_opt_loop_unroll.c | 729 +
 3 files changed, 732 insertions(+)
 create mode 100644 src/compiler/nir/nir_opt_loop_unroll.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index d3e158a..799fb38 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -238,6 +238,7 @@ NIR_FILES = \
nir/nir_opt_dead_cf.c \
nir/nir_opt_gcm.c \
nir/nir_opt_global_to_local.c \
+   nir/nir_opt_loop_unroll.c \
nir/nir_opt_peephole_select.c \
nir/nir_opt_remove_phis.c \
nir/nir_opt_undef.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index d948e97..b8813e4 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2573,6 +2573,8 @@ bool nir_opt_dead_cf(nir_shader *shader);
 
 bool nir_opt_gcm(nir_shader *shader, bool value_number);
 
+bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
+
 bool nir_opt_peephole_select(nir_shader *shader, unsigned limit);
 
 bool nir_opt_remove_phis(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c 
b/src/compiler/nir/nir_opt_loop_unroll.c
new file mode 100644
index 000..8715757
--- /dev/null
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -0,0 +1,729 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_control_flow.h"
+
+static bool
+is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
+{
+   unsigned max_iter = shader->options->max_unroll_iterations;
+
+   if (li->trip_count > max_iter)
+  return false;
+
+   if (li->force_unroll)
+  return true;
+
+   bool loop_not_too_large =
+  li->num_instructions * li->trip_count <= max_iter * 25;
+
+   return loop_not_too_large;
+}
+
+static bool
+is_complex_loop(nir_shader *shader, nir_loop_info *li)
+{
+   unsigned num_lt = list_length(>loop_terminator_list);
+   return is_loop_small_enough_to_unroll(shader, li) && num_lt == 2;
+}
+
+static bool
+is_simple_loop(nir_shader *shader, nir_loop_info *li)
+{
+   return li->is_trip_count_known &&
+  is_loop_small_enough_to_unroll(shader, li);
+}
+
+static void
+move_cf_list_into_if(nir_cf_list *lst, nir_cf_node *if_node,
+ nir_block *last_blk, bool continue_from_then_branch)
+{
+   nir_if *if_stmt = nir_cf_node_as_if(if_node);
+   if (continue_from_then_branch) {
+  /* Move the rest of the loop inside the then */
+  nir_cf_reinsert(lst, nir_after_block(nir_if_last_then_block(if_stmt)));
+   } else {
+  /* Move the rest of the loop inside the else */
+  nir_cf_reinsert(lst, 

[Mesa-dev] V6 Loop unrolling in NIR

2016-12-05 Thread Timothy Arceri
Again thanks to Jason and Connor for their feedback, and to 
Thomas for getting this work started.

V6: Is mainly just a rebase but fixes a few regressions cause by
ralloc() being fixed recently to not zero out memory. There is also a fix for
the analysis pass for detecting jump instructions when a block contains nested
ifs.

Fixes piglit test for HSW and IVB:
spec/arb_tessellation_shader/execution/variable-indexing/tes-both-input-array-vec4-index-rd.shader_test

total instructions in shared programs: 8528769 -> 8528534 (-0.00%)
instructions in affected programs: 46967 -> 46732 (-0.50%)
helped: 62
HURT: 92

total cycles in shared programs: 67260892 -> 67248082 (-0.02%)
cycles in affected programs: 3178716 -> 3165906 (-0.40%)
helped: 1154
HURT: 1118

total loops in shared programs: 1868 -> 1214 (-35.01%)
loops in affected programs: 687 -> 33 (-95.20%)
helped: 655
HURT: 17

LOST:   5
GAINED: 0

Most of the HURT instruction count is caused by patch 2. Where the binary
search implemented in the NIR variable indexing lowering pass can result in
more instructions than the GLSL IR version. 

The HURT loops are caused by a couple of different things that can be
addressed in a follow up series. For example handling if statements
that contain a break but also other instructions, and different ways of
incrementing the induction variable e.g multiplication.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/10] i965: use nir_lower_indirect_derefs() for GLSL

2016-12-05 Thread Timothy Arceri
This moves the nir_lower_indirect_derefs() call into
brw_preprocess_nir() so thats is called by both OpenGL and Vulkan
and removes that call to the old GLSL IR pass
lower_variable_index_to_cond_assign()

We want to do this pass in nir to be able to move loop unrolling
to nir.

There is a increase of 1-3 instructions in a small number of shaders,
and 2 Kerbal Space program shaders that increase by 32 instructions.
The changes seem to be caused be the difference in the GLSL IR vs
NIR variable index lowering passes. The GLSL IR pass creates a
simple if ladder, while the NIR pass implements a binary search.

Shader-db results BDW:

total instructions in shared programs: 8705873 -> 8706194 (0.00%)
instructions in affected programs: 32515 -> 32836 (0.99%)
helped: 3
HURT: 79

total cycles in shared programs: 74618120 -> 74583476 (-0.05%)
cycles in affected programs: 528104 -> 493460 (-6.56%)
helped: 47
HURT: 37

LOST:   2
GAINED: 0

V2: remove the do_copy_propagation() call from the i965 GLSL IR
linking code. This call was added in f7741c52111 but since we are
moving the variable index lowering to NIR we no longer need it and
can just rely on the nir copy propagation pass.
---
 src/intel/vulkan/anv_pipeline.c| 10 --
 src/mesa/drivers/dri/i965/brw_link.cpp | 15 ---
 src/mesa/drivers/dri/i965/brw_nir.c| 10 ++
 3 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 9b65e35..6b0a3c9 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -177,16 +177,6 @@ anv_shader_compile_to_nir(struct anv_device *device,
 
nir_shader_gather_info(nir, entry_point->impl);
 
-   nir_variable_mode indirect_mask = 0;
-   if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput)
-  indirect_mask |= nir_var_shader_in;
-   if (compiler->glsl_compiler_options[stage].EmitNoIndirectOutput)
-  indirect_mask |= nir_var_shader_out;
-   if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp)
-  indirect_mask |= nir_var_local;
-
-   nir_lower_indirect_derefs(nir, indirect_mask);
-
return nir;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
b/src/mesa/drivers/dri/i965/brw_link.cpp
index 3f6041b..7902133 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -135,21 +135,6 @@ process_glsl_ir(struct brw_context *brw,
lower_noise(shader->ir);
lower_quadop_vector(shader->ir, false);
 
-   do_copy_propagation(shader->ir);
-
-   bool lowered_variable_indexing =
-  lower_variable_index_to_cond_assign(shader->Stage, shader->ir,
-  options->EmitNoIndirectInput,
-  options->EmitNoIndirectOutput,
-  options->EmitNoIndirectTemp,
-  options->EmitNoIndirectUniform);
-
-   if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
-  perf_debug("Unsupported form of variable indexing in %s; falling "
- "back to very inefficient code generation\n",
- _mesa_shader_stage_to_abbrev(shader->Stage));
-   }
-
bool progress;
do {
   progress = false;
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
b/src/mesa/drivers/dri/i965/brw_nir.c
index 763e3ec..8768cee 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -485,6 +485,16 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
/* Lower a bunch of stuff */
OPT_V(nir_lower_var_copies);
 
+   nir_variable_mode indirect_mask = 0;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput)
+  indirect_mask |= nir_var_shader_in;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput)
+  indirect_mask |= nir_var_shader_out;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp)
+  indirect_mask |= nir_var_local;
+
+   nir_lower_indirect_derefs(nir, indirect_mask);
+
/* Get rid of split copies */
nir = nir_optimize(nir, is_scalar);
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/10] nir: Add a loop analysis pass

2016-12-05 Thread Timothy Arceri
From: Thomas Helland 

This pass detects induction variables and calculates the
trip count of loops to be used for loop unrolling.

I've removed support for float induction values for now, for the
simple reason that they don't appear in my shader-db collection,
and so I don't see it as common enough that we want to pollute the
pass with this in the initial version.

V2: Rebase, adapt to removal of function overloads

V3: (Timothy Arceri)
 - don't try to find trip count if loop terminator conditional is a phi
 - fix trip count for do-while loops
 - replace conditional type != alu assert with return
 - disable unrolling of loops with continues
 - multiple fixes to memory allocation, stop leaking and don't destroy
   structs we want to use for unrolling.
 - fix iteration count bugs when induction var not on RHS of condition
 - add FIXME for && conditions
 - calculate trip count for unsigned induction/limit vars

V4: (Timothy Arceri)
- count instructions in a loop
- set the limiting_terminator even if we can't find the trip count for
 all terminators. This is needed for complex unrolling where we handle
 2 terminators and the trip count is unknown for one of them.
- restruct structs so we don't keep information not required after
 analysis and remove dead fields.
- force unrolling in some cases as per the rules in the GLSL IR pass

V5: (Timothy Arceri)
- fix metadata mask value 0x10 vs 0x16

V6: (Timothy Arceri)
- merge loop_variable and nir_loop_variable structs and lists suggested by Jason
- remove induction var hash table and store pointer to induction information in
  the loop_variable suggested by Jason.
- use lowercase list_addtail() suggested by Jason.
- tidy up init_loop_block() as per Jasons suggestions.
- replace switch with nir_op_infos[alu->op].num_inputs == 2 in
  is_var_basic_induction_var() as suggested by Jason.
- use nir_block_last_instr() in and rename foreach_cf_node_ex_loop() as 
suggested
  by Jason.
- fix else check for is_trivial_loop_terminator() as per Connors suggetions.
- simplify offset for induction valiables incremented before the exit 
conditions is
  checked.
- replace nir_op_isub check with assert() as it should have been lowered away.

V7: (Timothy Arceri)
- use rzalloc() on nir_loop struct creation. Worked previously because ralloc()
  was broken and always zeroed the struct.
- fix cf_node_find_loop_jumps() to find jumps when loops contain
  nested if statements. Code is tidier as a result.
---
 src/compiler/Makefile.sources   |   2 +
 src/compiler/nir/nir.c  |   2 +-
 src/compiler/nir/nir.h  |  36 +-
 src/compiler/nir/nir_loop_analyze.c | 941 
 src/compiler/nir/nir_metadata.c |   8 +-
 5 files changed, 986 insertions(+), 3 deletions(-)
 create mode 100644 src/compiler/nir/nir_loop_analyze.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 6a7dcd8..3090677 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -193,6 +193,8 @@ NIR_FILES = \
nir/nir_intrinsics.c \
nir/nir_intrinsics.h \
nir/nir_liveness.c \
+   nir/nir_loop_analyze.c \
+   nir/nir_loop_analyze.h \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_atomics.c \
nir/nir_lower_bitmap.c \
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index cfb032c..6e308d5 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -393,7 +393,7 @@ nir_if_create(nir_shader *shader)
 nir_loop *
 nir_loop_create(nir_shader *shader)
 {
-   nir_loop *loop = ralloc(shader, nir_loop);
+   nir_loop *loop = rzalloc(shader, nir_loop);
 
cf_init(>cf_node, nir_cf_node_loop);
 
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 3e6d168..b8b44a7 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1546,9 +1546,36 @@ typedef struct nir_if {
 } nir_if;
 
 typedef struct {
+   nir_if *nif;
+
+   nir_instr *conditional_instr;
+
+   struct list_head loop_terminator_link;
+} nir_loop_terminator;
+
+typedef struct {
+   /* Number of instructions in the loop */
+   unsigned num_instructions;
+
+   /* How many times the loop is run (if known) */
+   unsigned trip_count;
+   bool is_trip_count_known;
+
+   /* Unroll the loop regardless of its size */
+   bool force_unroll;
+
+   nir_loop_terminator *limiting_terminator;
+
+   /* A list of loop_terminators terminating this loop. */
+   struct list_head loop_terminator_list;
+} nir_loop_info;
+
+typedef struct {
nir_cf_node cf_node;
 
struct exec_list body; /** < list of nir_cf_node */
+
+   nir_loop_info *info;
 } nir_loop;
 
 /**
@@ -1561,6 +1588,7 @@ typedef enum {
nir_metadata_dominance = 0x2,
nir_metadata_live_ssa_defs = 0x4,
nir_metadata_not_properly_reset = 0x8,
+   nir_metadata_loop_analysis = 0x10,
 } nir_metadata;
 
 typedef struct {
@@ -1789,6 +1817,8 @@ typedef struct nir_shader_compiler_options {
 * 

[Mesa-dev] [PATCH 01/10] i965: allow sampler indirects on all gens

2016-12-05 Thread Timothy Arceri
Without this we will regress the max-samplers piglit test on Gen6
and lower when loop unrolling is done in NIR. There is a check
in the GLSL IR linker that errors when it finds indirects and
EmitNoIndirectSampler is set.

As far as I can tell there is no reason for not enabling this for
all gens regardless of whether they fully support ARB_gpu_shader5
or not.
---
 src/mesa/drivers/dri/i965/brw_compiler.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c 
b/src/mesa/drivers/dri/i965/brw_compiler.c
index 1aa72bc..6a73719 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -133,10 +133,6 @@ brw_compiler_create(void *mem_ctx, const struct 
gen_device_info *devinfo)
   compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
   compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar;
 
-  /* !ARB_gpu_shader5 */
-  if (devinfo->gen < 7)
- compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
-
   if (is_scalar) {
  compiler->glsl_compiler_options[i].NirOptions = _nir_options;
   } else {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv: Port the guardband setting code from i965

2016-12-05 Thread Jason Ekstrand
---
 src/intel/vulkan/gen8_cmd_buffer.c | 24 
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/intel/vulkan/gen8_cmd_buffer.c 
b/src/intel/vulkan/gen8_cmd_buffer.c
index 3e4aa9b..6b56d06 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -44,6 +44,22 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer 
*cmd_buffer)
for (uint32_t i = 0; i < count; i++) {
   const VkViewport *vp = [i];
 
+  /* According to the "Vertex X,Y Clamping and Quantization" section of
+   * the Strips and Fans documentation, objects must not have a
+   * screen-space extents of over 8192 pixels, or they may be
+   * mis-rasterized.  The maximum screen space coordinates of a small
+   * object may larger, but we have no way to enforce the object size
+   * other than through clipping.
+   *
+   * If you're surprised that we set clip to -gbx to +gbx and it seems
+   * like we'll end up with 16384 wide, note that for a 8192-wide render
+   * target, we'll end up with a normal (-1, 1) clip volume that just
+   * covers the drawable.
+   */
+  const float maximum_guardband_extent = 8192;
+  const float gbx = maximum_guardband_extent / vp->width;
+  const float gby = maximum_guardband_extent / vp->height;
+
   /* The gen7 state struct has just the matrix and guardband fields, the
* gen8 struct adds the min/max viewport fields. */
   struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = {
@@ -53,10 +69,10 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer 
*cmd_buffer)
  .ViewportMatrixElementm30 = vp->x + vp->width / 2,
  .ViewportMatrixElementm31 = vp->y + vp->height / 2,
  .ViewportMatrixElementm32 = 0.0,
- .XMinClipGuardband = -1.0f,
- .XMaxClipGuardband = 1.0f,
- .YMinClipGuardband = -1.0f,
- .YMaxClipGuardband = 1.0f,
+ .XMinClipGuardband = -gbx,
+ .XMaxClipGuardband = gbx,
+ .YMinClipGuardband = -gbx,
+ .YMaxClipGuardband = gbx,
  .XMinViewPort = vp->x,
  .XMaxViewPort = vp->x + vp->width - 1,
  .YMinViewPort = vp->y,
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Allocate at least some URB space even when max_vertices = 0.

2016-12-05 Thread Ian Romanick
On 12/05/2016 01:39 PM, Kenneth Graunke wrote:
> Allocating zero URB space is a really bad idea.  The hardware has to
> give threads a handle to their URB space, and threads have to use that
> to terminate the thread.  Having it be an empty region just breaks a
> lot of assumptions.  Hence, why we asserted that it isn't possible.
> 
> Unfortunately, it /is/ possible prior to Gen8, if max_vertices = 0.
> In theory a geometry shader could do SSBO/image access and maybe
> still accomplish something.  In reality, this is tripped up by
> conformance tests.
> 
> Gen8+ already avoids this problem by placing the vertex count DWord
> in the URB entry header.  This fixes things on earlier generations.
> 
> Signed-off-by: Kenneth Graunke 

Assuming this is the same patch you sent me a couple months ago,

Tested-by: Ian Romanick 

Possibly candidate for stable?

> ---
>  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> index 900d9d3..3894a63 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> @@ -781,7 +781,13 @@ brw_compile_gs(const struct brw_compiler *compiler, void 
> *log_data,
> if (compiler->devinfo->gen >= 8)
>output_size_bytes += 32;
>  
> -   assert(output_size_bytes >= 1);
> +   /* Shaders can technically set max_vertices = 0, at which point we
> +* may have a URB size of 0 bytes.  Nothing good can come from that,
> +* so enforce a minimum size.
> +*/
> +   if (output_size_bytes == 0)
> +  output_size_bytes = 1;
> +
> unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
> if (compiler->devinfo->gen == 6)
>max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/50] GL_ARB_gpu_shader_int64... this time for sure!

2016-12-05 Thread Connor Abbott
On Mon, Dec 5, 2016 at 5:48 PM, Matt Turner  wrote:
> On Mon, Dec 5, 2016 at 2:20 PM, Connor Abbott  wrote:
>> On Mon, Dec 5, 2016 at 5:09 PM, Connor Abbott  wrote:
>>> On Mon, Dec 5, 2016 at 3:22 PM, Matt Turner  wrote:
 On 12/05, Matt Turner wrote:
>
> On 11/28, Ian Romanick wrote:
>>
>> From: Ian Romanick 
>>Patches 42 through 50 enable the extension on BDW+.
>
>
> 42-48 are
>
> Reviewed-by: Matt Turner 
>
> I don't understand the 64-bit CMP issue, so I'm booting a SKL to see how
> fp64 works.


 Ah, I think  I see. Because 16x doubles take up 4 registers, we have to
 emit two CMP instructions, one with 1Q and one with 2Q:

 cmp.ge.f0(8)null<1>DF   g2.2<0,1,0>DF   (abs)g11<4,4,1>DF { align1
 1Q };
 cmp.ge.f0(8)null<1>DF   g2.2<0,1,0>DF   (abs)g7<4,4,1>DF { align1 
 2Q
 };

 (from fs-op-add-double-double.shader_test)

 Makes sense to me. 49 is

 Reviewed-by: Matt Turner 
>>>
>>> Actually, it's something a little different. The splitting you're
>>> talking about is handled just fine by curro's SIMD lowering pass. The
>>> issue here is that if you don't specify a null destination register
>>> (in which case this a moot point), CMP will always output the same
>>> destination bitsize as the source bitsize. That is, if you compare two
>>> registers with 8 doubles each (two SIMD8 registers each), the result
>>> will take up two SIMD8 registers instead of one as you'd expect. I
>>> couldn't track this down in the PRM, but I definitely remember having
>>> to implement it and getting wrong results without it. The end result
>>> is that you have to use a strided move to get the low 32 bits of each
>>> 64-bit destination channel, which is what subscript() does. This
>>> happens irrespective of whether you're compiling for SIMD8 or SIMD16.
>>> Of course, in this case some backend optimizations have managed to
>>> remove the destination register, so that's why you don't see it here,
>>> but if you do something trickier, like store the result to a buffer,
>>> the strided mov will be there.
>>>
>>> Anyways, that's what I remember of it... it's been a while.
>>
>> Although, the example you gave has a bug, since the second CMP
>> overwrites the result of the previous one... it looks like
>> lower_simd_width isn't offsetting the flag register correctly when
>> splitting the CMP.
>
> I assumed that quarter control would select which flag subregister to
> write... I sure hope that's how the hardware works.

Ah, yeah, that's right.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/vce Handle H.264 level 5.2

2016-12-05 Thread Andy Furniss

Leo Liu wrote:

Hi Andy,

Please rebase all your reviewed patches, and add RB, and send them to me
I will commit them for you.


Ok, thanks, I've sent two to you.



Cheers,

Leo



On 12/05/2016 03:14 PM, Andy Furniss wrote:

Hi, I did a v2, can it be applied?

https://patchwork.freedesktop.org/patch/118006/

Christian König wrote:

Am 25.10.2016 um 00:39 schrieb Andy Furniss:

For gstreamer
Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=91281

Signed-off-by: Andy Furniss 
---
  src/gallium/drivers/radeon/radeon_vce.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vce.c
b/src/gallium/drivers/radeon/radeon_vce.c
index ef93e46..6d6d414 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -178,13 +178,13 @@ static unsigned get_cpb_num(struct rvce_encoder
*enc)
  case 41:
  dpb = 32768;
  break;
-default:
  case 42:
  dpb = 34816;
  break;
  case 50:
  dpb = 110400;
  break;
+default:
  case 51:


Setting the default to the maximum is probably a good idea, but I would
also add an explicit "case 52:" here as well just for documentation
purposes.

With that fixed the patch is Reviewed-by: Christian König
.

Regards,
Christian.


  dpb = 184320;
  break;






___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] strict-aliasing violation fix

2016-12-05 Thread Edward O'Callaghan
Just fyi I have not tested this, however it should fix the
problem highlighted.

Please kindly Review,

Edward O'Callaghan (1):
 [PATCH] virgl: Fix a strict-aliasing violation in the encoder
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] virgl: Fix a strict-aliasing violation in the encoder

2016-12-05 Thread Edward O'Callaghan
As per the C spec, it is illegal to alias pointers to different
types. This results in undefined behaviour after optimization
passes, resulting in very subtle bugs that happen only on a
full moon..

Use a memcpy() as a well defined coercion between the double
to uint64_t interpretations of the memory.

Signed-off-by: Edward O'Callaghan 
---
 src/gallium/drivers/virgl/virgl_encode.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/virgl/virgl_encode.c 
b/src/gallium/drivers/virgl/virgl_encode.c
index be72f70..611e676 100644
--- a/src/gallium/drivers/virgl/virgl_encode.c
+++ b/src/gallium/drivers/virgl/virgl_encode.c
@@ -21,6 +21,8 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 #include 
+#include 
+#include 
 
 #include "util/u_format.h"
 #include "util/u_memory.h"
@@ -315,12 +317,16 @@ int virgl_encode_clear(struct virgl_context *ctx,
   double depth, unsigned stencil)
 {
int i;
+   uint64_t qword;
+
+   assert(sizeof(qword) == sizeof(depth));
+   memcpy(, , sizeof(qword));
 
virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_CLEAR, 0, 
VIRGL_OBJ_CLEAR_SIZE));
virgl_encoder_write_dword(ctx->cbuf, buffers);
for (i = 0; i < 4; i++)
   virgl_encoder_write_dword(ctx->cbuf, color->ui[i]);
-   virgl_encoder_write_qword(ctx->cbuf, *(uint64_t *));
+   virgl_encoder_write_qword(ctx->cbuf, qword);
virgl_encoder_write_dword(ctx->cbuf, stencil);
return 0;
 }
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] nir: Add a pass for lowering away constant initializers

2016-12-05 Thread Jason Ekstrand
On Thu, Jul 21, 2016 at 2:52 AM, Iago Toral  wrote:

> On Wed, 2016-07-20 at 15:28 -0700, Jason Ekstrand wrote:
> > Signed-off-by: Jason Ekstrand 
> > ---
> >  src/compiler/Makefile.sources  |   1 +
> >  src/compiler/nir/nir.h |   2 +
> >  src/compiler/nir/nir_lower_constant_initializers.c | 102
> > +
> >  3 files changed, 105 insertions(+)
> >  create mode 100644
> > src/compiler/nir/nir_lower_constant_initializers.c
> >
> > diff --git a/src/compiler/Makefile.sources
> > b/src/compiler/Makefile.sources
> > index 0ff9b23..127b62e 100644
> > --- a/src/compiler/Makefile.sources
> > +++ b/src/compiler/Makefile.sources
> > @@ -190,6 +190,7 @@ NIR_FILES = \
> >   nir/nir_lower_bitmap.c \
> >   nir/nir_lower_clamp_color_outputs.c \
> >   nir/nir_lower_clip.c \
> > + nir/nir_lower_constant_initializers.c \
> >   nir/nir_lower_double_ops.c \
> >   nir/nir_lower_double_packing.c \
> >   nir/nir_lower_drawpixels.c \
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index 54598a2..99c2fc0 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -2330,6 +2330,8 @@ void nir_lower_io_types(nir_shader *shader);
> >  void nir_lower_vars_to_ssa(nir_shader *shader);
> >
> >  bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode
> > modes);
> > +bool nir_lower_constant_initializers(nir_shader *shader,
> > + nir_variable_mode modes);
> >
> >  void nir_move_vec_src_uses_to_dest(nir_shader *shader);
> >  bool nir_lower_vec_to_movs(nir_shader *shader);
> > diff --git a/src/compiler/nir/nir_lower_constant_initializers.c
> > b/src/compiler/nir/nir_lower_constant_initializers.c
> > new file mode 100644
> > index 000..d0935e9
> > --- /dev/null
> > +++ b/src/compiler/nir/nir_lower_constant_initializers.c
> > @@ -0,0 +1,102 @@
> > +/*
> > + * Copyright © 2016 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > + * copy of this software and associated documentation files (the
> > "Software"),
> > + * to deal in the Software without restriction, including without
> > limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> > sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom
> > the
> > + * Software is furnished to do so, subject to the following
> > conditions:
> > + *
> > + * The above copyright notice and this permission notice (including
> > the next
> > + * paragraph) shall be included in all copies or substantial
> > portions of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> > EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> > EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
> > OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> > OTHER DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include "nir.h"
> > +#include "nir_builder.h"
> > +
> > +static bool
> > +deref_apply_constant_initializer(nir_deref_var *deref, void *state)
> > +{
> > +   struct nir_builder *b = state;
> > +
> > +   nir_load_const_instr *initializer =
> > +  nir_deref_get_const_initializer_load(b->shader, deref);
> > +   nir_builder_instr_insert(b, >instr);
> > +
> > +   nir_store_deref_var(b, deref, >def, 0xf);
> > +
> > +   return true;
> > +}
> > +
> > +static bool
> > +lower_const_initializer(struct nir_builder *b, struct exec_list
> > *var_list)
> > +{
> > +   bool progress = false;
> > +
> > +   b->cursor = nir_before_cf_list(>impl->body);
> > +
> > +   nir_foreach_variable(var, var_list) {
> > +  if (!var->constant_initializer)
> > + continue;
> > +
> > +  progress = true;
> > +
> > +  nir_deref_var deref;
> > +  deref.deref.deref_type = nir_deref_type_var,
> > +  deref.deref.child = NULL;
> > +  deref.deref.type = var->type,
> > +  deref.var = var;
> > +
> > +  nir_deref_foreach_leaf(,
> > deref_apply_constant_initializer, b);
> > +
> > +  var->constant_initializer = NULL;
> > +   }
> > +
> > +   return progress;
> > +}
> > +
> > +bool
> > +nir_lower_constant_initializers(nir_shader *shader,
> > nir_variable_mode modes)
> > +{
> > +   bool progress = false;
> > +
> > +   nir_builder builder;
> > +   if (modes & ~nir_var_local)
> > +  nir_builder_init(, nir_shader_get_entrypoint(shader)-
> > >impl);
> > +
> > +   if (modes & nir_var_shader_out)
> > +  progress |= lower_const_initializer(, 
> > >outputs);
> > +
> > +   if (modes & nir_var_global)
> > +  progress |= lower_const_initializer(, 
> > >globals);
> > +
> > +   if 

Re: [Mesa-dev] [PATCH v2 000/103] i965 Haswell ARB_gpu_shader_fp64 / OpenGL 4.0

2016-12-05 Thread Matt Turner

On 10/11, Iago Toral Quiroga wrote:

It's been some time since


... anyone has reviewed your patches. Sorry. :(

I'm going to review from your rebased i965-fp64-gen7-scalar-vec4-rc2
branch. There have probably been some reorderings or other changes due
to rebasing since the patches were sent, so I'm going to paste the list
of patches below and then attempt to list any review comments after the
patch name.


A couple of patches have an extra newline in the commit message between
*-by: tags. Would be nice to make a pass through and fix that.


i965/nir: double/dvec2 uniforms only need to be padded to a single vec4 slot
i965/vec4/nir: simplify glsl_type_for_nir_alu_type()
i965/vec4/nir: allocate two registers for dvec3/dvec4
i965/vec4/nir: Add bit-size information to types
i965/vec4/nir: support doubles in ALU operations
i965/vec4/nir: set the right type for 64-bit registers
i965/vec4/nir: fix emitting 64-bit immediates
i965/vec4: add support for printing DF immediates
i965/vec4: add double/float conversion pseudo-opcodes

I wonder if we should allow MOV F/DF and DF/F operations in the
IR and then have a lowering pass that "legalizes" them. I'm
happy to leave that experiment for after this series lands.

i965/vec4: translate d2f/f2d
i965: add brw_vecn_grf()
i965/vec4: set correct register regions for 32-bit and 64-bit
i965/disasm: align16 DF source regions have a width of 2

It's actually kind of weird to print width and horizontal stride
for align16 sources, since they don't exist in the instruction
word. We should probably print only the vertical stride. I don't
care if that's fixed a part of this series.

i965/vec4: We only support 32-bit integer ALU operations for now
i965/vec4: add dst_null_df()
i965/vec4: add VEC4_OPCODE_PICK_{LOW,HIGH}_32BIT opcodes
i965/vec4: add VEC4_OPCODE_SET_{LOW,HIGH}_32BIT opcodes

If I understand correctly, these opcodes map to instructions
like

mov(XXX) dst<1>UD  src<8,4,2>:UD

Is the exec_size 4? I ask, because if it's 8 (and the source
region spans two registers and the dest region spans one) that's
not a legal instruction. If it's 4, then it's legal.

i965/vec4: Fix DCE for VEC4_OPCODE_SET_{LOW,HIGH}_32BIT
i965/vec4: don't copy propagate vector opcodes that operate in align1 mode
i965/vec4: implement double unpacking

	This emits 
	

MOVdvec4_tmp, op[0]
PICK_LO/HI uvec4_tmp, dvec4_tmp
MOVdst, uvec4_tmp

I'm confused about the purpose of the MOVs. It seems like op[0]
should already be a dvec and dst should already be a uvec.

i965/vec4: implement double packing

More or less the same thing here. Looks like we don't need all
of the MOVs.

i965/vec4/nir: implement double comparisons

Trivial: A newline before the if() would be nice.

I have a memory of Curro telling me that the hardware maps each
32-bit chunk in the dst to a single bit in the flag register.
Maybe that's only on IVB, and maybe I'm misremembering. I'm
concerned that while the PICK_LOW+MOV will properly handle the
result that is written to the destination, the result written to
the flag register might be incorrect.

My commit d9b09f8a30 fixed some problems that seems similar in
my mind.

i965/vec4: fix indentation in get_nir_src()
i965/vec4: fix get_nir_dest() to use DF type for 64-bit destinations
i965/vec4: make opt_vector_float ignore doubles
i965/vec4: fix register allocation for 64-bit undef sources
i965/vec4: Rename DF to/from F generator opcodes

I'm not sure replacing "float" with "single" implies that the
opcodes can handle other 32-bit (integer) types, since "single"
is actually the name of the "float" type in some other
programming languages.

Maybe call them VEC4_OPCODE_TO_DOUBLE and
VEC4_OPCODE_FROM_DOUBLE?

i965/vec4: add helpers for conversions to/from doubles

Same thing here.

Also, same confusion about the purpose of the MOVs.

i965/vec4: implement hardware workaround for align16 double to float conversion

This always seemed like a really strange hardware bug, and one
that no one should ever hit.

I'd prefer that, instead of loading an immediate double and then
performing a conversion to float, that we just convert the
double to float in the compiler and emit an instruction to load
that.

i965/vec4: implement d2i, d2u, i2d and u2d
i965/vec4: implement d2b

Trivial: s/Curo/Curro/ in commit message.

Trivial: The comment says "predicated MOV", but it's actually a
MOV with conditional_mod.

i965/vec4: implement fsign() for doubles

Trivial: v2 comment and comment in code say "predicated MOV"
like previous patch

i965/vec4: fix optimize predicate for doubles


Re: [Mesa-dev] [PATCH 7/7] glx: unify GLX_SGIX_pbuffer aliased declarations

2016-12-05 Thread Jeremy Huddleston Sequoia

> On Dec 5, 2016, at 11:52 AM, Emil Velikov  wrote:
> 
> From: Emil Velikov 
> 
> No point in having an identical code in two places.
> 
> Not to mention that the Apple one incorrectly uses GLXDrawable as pbuf
> type. This change is both API and ABI safe since the header uses the
> correct GLXPbufferSGIX and both types are a typedef of the same
> primitive XID.
> 
> Cc: Jeremy Huddleston Sequoia 
> Signed-off-by: Emil Velikov 

Reviewed-by: Jeremy Sequoia 
(not tested yet, though)

> ---
> Jeremy, humble poke to send any/all Macports patches to the list ;-)

What patches are you referring to?  AFAIK, all the patches we have in MacPorts 
are hacks that have been rejected by mesa or are things I don't think should be 
in mesa due to lack of polish/hack status.  See:
https://github.com/macports/macports-ports/tree/master/x11/mesa/files

> ---
> src/glx/apple/glx_empty.c | 16 
> src/glx/glx_pbuffer.c |  2 --
> 2 files changed, 18 deletions(-)
> 
> diff --git a/src/glx/apple/glx_empty.c b/src/glx/apple/glx_empty.c
> index 1e9593c..33fffb5 100644
> --- a/src/glx/apple/glx_empty.c
> +++ b/src/glx/apple/glx_empty.c
> @@ -263,19 +263,3 @@ glXGetFBConfigFromVisualSGIX(Display * dpy, XVisualInfo 
> * visinfo)
>return NULL;
> }
> #endif
> -
> -
> -_X_EXPORT
> -GLX_ALIAS_VOID(glXDestroyGLXPbufferSGIX,
> -   (Display * dpy, GLXDrawable pbuf),
> -   (dpy, pbuf), glXDestroyPbuffer)
> -
> - _X_EXPORT GLX_ALIAS_VOID(glXSelectEventSGIX,
> -   (Display * dpy, GLXDrawable drawable,
> -unsigned long mask), (dpy, drawable, mask),
> -   glXSelectEvent)
> -
> - _X_EXPORT GLX_ALIAS_VOID(glXGetSelectedEventSGIX,
> -   (Display * dpy, GLXDrawable drawable,
> -unsigned long *mask), (dpy, drawable, mask),
> -   glXGetSelectedEvent)
> diff --git a/src/glx/glx_pbuffer.c b/src/glx/glx_pbuffer.c
> index 24c073c..0c15d2a 100644
> --- a/src/glx/glx_pbuffer.c
> +++ b/src/glx/glx_pbuffer.c
> @@ -992,7 +992,6 @@ glXDestroyWindow(Display * dpy, GLXWindow win)
> #endif
> }
> 
> -#ifndef GLX_USE_APPLEGL
> _GLX_PUBLIC
> GLX_ALIAS_VOID(glXDestroyGLXPbufferSGIX,
>(Display * dpy, GLXPbufferSGIX pbuf),
> @@ -1008,4 +1007,3 @@ GLX_ALIAS_VOID(glXGetSelectedEventSGIX,
>(Display * dpy, GLXDrawable drawable,
> unsigned long *mask), (dpy, drawable, mask),
>glXGetSelectedEvent)
> -#endif
> -- 
> 2.10.2
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/50] GL_ARB_gpu_shader_int64... this time for sure!

2016-12-05 Thread Matt Turner
On Mon, Dec 5, 2016 at 2:20 PM, Connor Abbott  wrote:
> On Mon, Dec 5, 2016 at 5:09 PM, Connor Abbott  wrote:
>> On Mon, Dec 5, 2016 at 3:22 PM, Matt Turner  wrote:
>>> On 12/05, Matt Turner wrote:

 On 11/28, Ian Romanick wrote:
>
> From: Ian Romanick 
>Patches 42 through 50 enable the extension on BDW+.


 42-48 are

 Reviewed-by: Matt Turner 

 I don't understand the 64-bit CMP issue, so I'm booting a SKL to see how
 fp64 works.
>>>
>>>
>>> Ah, I think  I see. Because 16x doubles take up 4 registers, we have to
>>> emit two CMP instructions, one with 1Q and one with 2Q:
>>>
>>> cmp.ge.f0(8)null<1>DF   g2.2<0,1,0>DF   (abs)g11<4,4,1>DF { align1
>>> 1Q };
>>> cmp.ge.f0(8)null<1>DF   g2.2<0,1,0>DF   (abs)g7<4,4,1>DF { align1 2Q
>>> };
>>>
>>> (from fs-op-add-double-double.shader_test)
>>>
>>> Makes sense to me. 49 is
>>>
>>> Reviewed-by: Matt Turner 
>>
>> Actually, it's something a little different. The splitting you're
>> talking about is handled just fine by curro's SIMD lowering pass. The
>> issue here is that if you don't specify a null destination register
>> (in which case this a moot point), CMP will always output the same
>> destination bitsize as the source bitsize. That is, if you compare two
>> registers with 8 doubles each (two SIMD8 registers each), the result
>> will take up two SIMD8 registers instead of one as you'd expect. I
>> couldn't track this down in the PRM, but I definitely remember having
>> to implement it and getting wrong results without it. The end result
>> is that you have to use a strided move to get the low 32 bits of each
>> 64-bit destination channel, which is what subscript() does. This
>> happens irrespective of whether you're compiling for SIMD8 or SIMD16.
>> Of course, in this case some backend optimizations have managed to
>> remove the destination register, so that's why you don't see it here,
>> but if you do something trickier, like store the result to a buffer,
>> the strided mov will be there.
>>
>> Anyways, that's what I remember of it... it's been a while.
>
> Although, the example you gave has a bug, since the second CMP
> overwrites the result of the previous one... it looks like
> lower_simd_width isn't offsetting the flag register correctly when
> splitting the CMP.

I assumed that quarter control would select which flag subregister to
write... I sure hope that's how the hardware works.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Allocate at least some URB space even when max_vertices = 0.

2016-12-05 Thread Anuj Phogat
On Mon, Dec 5, 2016 at 1:39 PM, Kenneth Graunke  wrote:
> Allocating zero URB space is a really bad idea.  The hardware has to
> give threads a handle to their URB space, and threads have to use that
> to terminate the thread.  Having it be an empty region just breaks a
> lot of assumptions.  Hence, why we asserted that it isn't possible.
>
> Unfortunately, it /is/ possible prior to Gen8, if max_vertices = 0.
> In theory a geometry shader could do SSBO/image access and maybe
> still accomplish something.  In reality, this is tripped up by
> conformance tests.
>
> Gen8+ already avoids this problem by placing the vertex count DWord
> in the URB entry header.  This fixes things on earlier generations.
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> index 900d9d3..3894a63 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> @@ -781,7 +781,13 @@ brw_compile_gs(const struct brw_compiler *compiler, void 
> *log_data,
> if (compiler->devinfo->gen >= 8)
>output_size_bytes += 32;
>
> -   assert(output_size_bytes >= 1);
> +   /* Shaders can technically set max_vertices = 0, at which point we
> +* may have a URB size of 0 bytes.  Nothing good can come from that,
> +* so enforce a minimum size.
> +*/
> +   if (output_size_bytes == 0)
> +  output_size_bytes = 1;
> +
> unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
> if (compiler->devinfo->gen == 6)
>max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
> --
> 2.10.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Sounds reasonable to me.
Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/7] scons: set HAVE_FUNC_ATTRIBUTE_ALIAS

2016-12-05 Thread Jose Fonseca

On 05/12/16 19:52, Emil Velikov wrote:

From: Emil Velikov 

Analogoust to the previous commit were we did so for autotools

Cc: Jose Fonseca 
Signed-off-by: Emil Velikov 
---
Jose, I'm not 100% sure if that holds true the compiler(s) that you
gents use. Can you please double-check ?

Thanks
Emil
---
 include/c99_compat.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/c99_compat.h b/include/c99_compat.h
index 24e96e0..cb690c6 100644
--- a/include/c99_compat.h
+++ b/include/c99_compat.h
@@ -163,6 +163,7 @@ test_c99_compat_h(const void * restrict a,
 #define HAVE_FUNC_ATTRIBUTE_UNUSED 1
 #define HAVE_FUNC_ATTRIBUTE_FORMAT 1
 #define HAVE_FUNC_ATTRIBUTE_PACKED 1
+#define HAVE_FUNC_ATTRIBUTE_ALIAS 1

 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
/* https://gcc.gnu.org/onlinedocs/gcc-4.3.6/gcc/Other-Builtins.html */




Per 
https://gcc.gnu.org/onlinedocs/gcc-4.2.4/gcc/Function-Attributes.html 
it's supported on GCC-4.2 so it should be there.


Reviewed-by: Jose Fonseca 

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/50] GL_ARB_gpu_shader_int64... this time for sure!

2016-12-05 Thread Connor Abbott
On Mon, Dec 5, 2016 at 5:09 PM, Connor Abbott  wrote:
> On Mon, Dec 5, 2016 at 3:22 PM, Matt Turner  wrote:
>> On 12/05, Matt Turner wrote:
>>>
>>> On 11/28, Ian Romanick wrote:

 From: Ian Romanick 
Patches 42 through 50 enable the extension on BDW+.
>>>
>>>
>>> 42-48 are
>>>
>>> Reviewed-by: Matt Turner 
>>>
>>> I don't understand the 64-bit CMP issue, so I'm booting a SKL to see how
>>> fp64 works.
>>
>>
>> Ah, I think  I see. Because 16x doubles take up 4 registers, we have to
>> emit two CMP instructions, one with 1Q and one with 2Q:
>>
>> cmp.ge.f0(8)null<1>DF   g2.2<0,1,0>DF   (abs)g11<4,4,1>DF { align1
>> 1Q };
>> cmp.ge.f0(8)null<1>DF   g2.2<0,1,0>DF   (abs)g7<4,4,1>DF { align1 2Q
>> };
>>
>> (from fs-op-add-double-double.shader_test)
>>
>> Makes sense to me. 49 is
>>
>> Reviewed-by: Matt Turner 
>
> Actually, it's something a little different. The splitting you're
> talking about is handled just fine by curro's SIMD lowering pass. The
> issue here is that if you don't specify a null destination register
> (in which case this a moot point), CMP will always output the same
> destination bitsize as the source bitsize. That is, if you compare two
> registers with 8 doubles each (two SIMD8 registers each), the result
> will take up two SIMD8 registers instead of one as you'd expect. I
> couldn't track this down in the PRM, but I definitely remember having
> to implement it and getting wrong results without it. The end result
> is that you have to use a strided move to get the low 32 bits of each
> 64-bit destination channel, which is what subscript() does. This
> happens irrespective of whether you're compiling for SIMD8 or SIMD16.
> Of course, in this case some backend optimizations have managed to
> remove the destination register, so that's why you don't see it here,
> but if you do something trickier, like store the result to a buffer,
> the strided mov will be there.
>
> Anyways, that's what I remember of it... it's been a while.

Although, the example you gave has a bug, since the second CMP
overwrites the result of the previous one... it looks like
lower_simd_width isn't offsetting the flag register correctly when
splitting the CMP.

>
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/50] GL_ARB_gpu_shader_int64... this time for sure!

2016-12-05 Thread Connor Abbott
On Mon, Dec 5, 2016 at 3:22 PM, Matt Turner  wrote:
> On 12/05, Matt Turner wrote:
>>
>> On 11/28, Ian Romanick wrote:
>>>
>>> From: Ian Romanick 
>>>Patches 42 through 50 enable the extension on BDW+.
>>
>>
>> 42-48 are
>>
>> Reviewed-by: Matt Turner 
>>
>> I don't understand the 64-bit CMP issue, so I'm booting a SKL to see how
>> fp64 works.
>
>
> Ah, I think  I see. Because 16x doubles take up 4 registers, we have to
> emit two CMP instructions, one with 1Q and one with 2Q:
>
> cmp.ge.f0(8)null<1>DF   g2.2<0,1,0>DF   (abs)g11<4,4,1>DF { align1
> 1Q };
> cmp.ge.f0(8)null<1>DF   g2.2<0,1,0>DF   (abs)g7<4,4,1>DF { align1 2Q
> };
>
> (from fs-op-add-double-double.shader_test)
>
> Makes sense to me. 49 is
>
> Reviewed-by: Matt Turner 

Actually, it's something a little different. The splitting you're
talking about is handled just fine by curro's SIMD lowering pass. The
issue here is that if you don't specify a null destination register
(in which case this a moot point), CMP will always output the same
destination bitsize as the source bitsize. That is, if you compare two
registers with 8 doubles each (two SIMD8 registers each), the result
will take up two SIMD8 registers instead of one as you'd expect. I
couldn't track this down in the PRM, but I definitely remember having
to implement it and getting wrong results without it. The end result
is that you have to use a strided move to get the low 32 bits of each
64-bit destination channel, which is what subscript() does. This
happens irrespective of whether you're compiling for SIMD8 or SIMD16.
Of course, in this case some backend optimizations have managed to
remove the destination register, so that's why you don't see it here,
but if you do something trickier, like store the result to a buffer,
the strided mov will be there.

Anyways, that's what I remember of it... it's been a while.

>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/TODO: Document sampling from HiZ

2016-12-05 Thread Jason Ekstrand
Ack

On Mon, Dec 5, 2016 at 1:47 PM, Nanley Chery  wrote:

> ---
>
> Hi everyone,
>
> I'm about to start working on this task.
>
>  src/intel/vulkan/TODO | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO
> index abfe4b6..915f423 100644
> --- a/src/intel/vulkan/TODO
> +++ b/src/intel/vulkan/TODO
> @@ -16,6 +16,7 @@ Code sharing with GL:
>   - Generalize blorp to use ISL and be sharable between the two drivers
>
>  Performance:
> + - Sampling from HiZ (Nanley)
>   - Multi-{sampled/gen8,LOD,subpass} HiZ
>   - Fast color clears (after HiZ?)
>   - Compressed multisample support
> --
> 2.10.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv/TODO: Document sampling from HiZ

2016-12-05 Thread Nanley Chery
---

Hi everyone,

I'm about to start working on this task.

 src/intel/vulkan/TODO | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO
index abfe4b6..915f423 100644
--- a/src/intel/vulkan/TODO
+++ b/src/intel/vulkan/TODO
@@ -16,6 +16,7 @@ Code sharing with GL:
  - Generalize blorp to use ISL and be sharable between the two drivers
 
 Performance:
+ - Sampling from HiZ (Nanley)
  - Multi-{sampled/gen8,LOD,subpass} HiZ
  - Fast color clears (after HiZ?)
  - Compressed multisample support
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Allocate at least some URB space even when max_vertices = 0.

2016-12-05 Thread Kenneth Graunke
Allocating zero URB space is a really bad idea.  The hardware has to
give threads a handle to their URB space, and threads have to use that
to terminate the thread.  Having it be an empty region just breaks a
lot of assumptions.  Hence, why we asserted that it isn't possible.

Unfortunately, it /is/ possible prior to Gen8, if max_vertices = 0.
In theory a geometry shader could do SSBO/image access and maybe
still accomplish something.  In reality, this is tripped up by
conformance tests.

Gen8+ already avoids this problem by placing the vertex count DWord
in the URB entry header.  This fixes things on earlier generations.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 900d9d3..3894a63 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -781,7 +781,13 @@ brw_compile_gs(const struct brw_compiler *compiler, void 
*log_data,
if (compiler->devinfo->gen >= 8)
   output_size_bytes += 32;
 
-   assert(output_size_bytes >= 1);
+   /* Shaders can technically set max_vertices = 0, at which point we
+* may have a URB size of 0 bytes.  Nothing good can come from that,
+* so enforce a minimum size.
+*/
+   if (output_size_bytes == 0)
+  output_size_bytes = 1;
+
unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (compiler->devinfo->gen == 6)
   max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] configure: enable glx-tls by default

2016-12-05 Thread Eric Anholt
Emil Velikov  writes:

> From: Emil Velikov 
>
> In the (not too) distant future we'd want to remove this option and
> effectively drop the other codepath(s) we have in our dispatch.
>
> Linux distributions have been using --enable-glx-tls for a number of
> years. Some/most BSD platforms still don't support this, yet this should
> serve as an encouragement to move things forwards.

These two are:

Reviewed-by: Eric Anholt 


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/50] GL_ARB_gpu_shader_int64... this time for sure!

2016-12-05 Thread Eric Anholt
Matt Turner  writes:

> On Wed, Nov 30, 2016 at 1:11 PM, Matt Turner  wrote:
>> On 11/28, Ian Romanick wrote:
>>>
>>> From: Ian Romanick 
>>>
>>> I believe that I have addressed all of the review feedback from the
>>> previous iteration.  Many of the patches have been reviewed, and they
>>> should be ready to go.
>>>
>>> Patches marked with "vN" in the subject have changed in a non-trivial
>>> way since last being sent to he list.
>>>
>>> Several patches that have not changed need review:
>>>
>>>Patches 23 through 33 add lowering passes for 64-bit operations.
>>
>>
>> I sent a few comments, and I cannot claim to have verified the division
>> routine, but the rest are
>>
>> Reviewed-by: Matt Turner 
>
> Having thought more about this, I think doing this in GLSL IR is not
> the way we should be going.
>
> Presumably we're going to want to support int64 in SPIR-V, and that
> necessitates lowering these operations in NIR.
>
> I don't think it's a good idea to do this outside of NIR.

That was my thought as well.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/vce Handle H.264 level 5.2

2016-12-05 Thread Leo Liu

Hi Andy,

Please rebase all your reviewed patches, and add RB, and send them to me 
I will commit them for you.


Cheers,

Leo



On 12/05/2016 03:14 PM, Andy Furniss wrote:

Hi, I did a v2, can it be applied?

https://patchwork.freedesktop.org/patch/118006/

Christian König wrote:

Am 25.10.2016 um 00:39 schrieb Andy Furniss:

For gstreamer
Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=91281

Signed-off-by: Andy Furniss 
---
  src/gallium/drivers/radeon/radeon_vce.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vce.c
b/src/gallium/drivers/radeon/radeon_vce.c
index ef93e46..6d6d414 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -178,13 +178,13 @@ static unsigned get_cpb_num(struct rvce_encoder
*enc)
  case 41:
  dpb = 32768;
  break;
-default:
  case 42:
  dpb = 34816;
  break;
  case 50:
  dpb = 110400;
  break;
+default:
  case 51:


Setting the default to the maximum is probably a good idea, but I would
also add an explicit "case 52:" here as well just for documentation
purposes.

With that fixed the patch is Reviewed-by: Christian König
.

Regards,
Christian.


  dpb = 184320;
  break;






___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Emit proper NOPs.

2016-12-05 Thread Ian Romanick
On 12/05/2016 12:24 PM, Matt Turner wrote:
> The PRMs for HSW and newer say that other than the opcode and DebugCtrl
> bits of the instruction word, the rest must be zero.
> 
> By zeroing the instruction word manually, we avoid using any of the
> state inherited through brw_codegen.
> 
> Bug: https://bugs.freedesktop.org/show_bug.cgi?id=96959
 ^zilla

Reviewed-by: Ian Romanick 

> ---
>  src/mesa/drivers/dri/i965/brw_eu_emit.c | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
> b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> index cfb3fa0..ca04221 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> @@ -1282,10 +1282,8 @@ brw_F16TO32(struct brw_codegen *p, struct brw_reg dst, 
> struct brw_reg src)
>  void brw_NOP(struct brw_codegen *p)
>  {
> brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
> -   brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_1);
> -   brw_set_dest(p, insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
> -   brw_set_src0(p, insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
> -   brw_set_src1(p, insn, brw_imm_ud(0x0));
> +   memset(insn, 0, sizeof(*insn));
> +   brw_inst_set_opcode(p->devinfo, insn, BRW_OPCODE_NOP);
>  }
>  
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 0/7] nir, i965/fs: Lower indirect local variables to scratch

2016-12-05 Thread Jason Ekstrand
On Dec 5, 2016 12:14 PM, "Connor Abbott"  wrote:

I'm a little worried about this since it seems like the
load/store_scratch intrinsics are basically doing the same thing as
registers were originally intended to do. Either we should use the
existing register lowering, and make it conditional on the size like
you've done here, or we should just gut larger-than-vec4 registers
entirely and go with this instead. TBH, I'm kinda leaning towards the
latter, since I know Rob has expressed some interest in using
something like this instead of registers, and it seems like nobody
really wants the ability to indirectly address stuff inside, say, an
add instruction anyways.


The vec4 backend does use indirects on registers today.  Another option in
this series would be to put the heuristic in lower_indirect_derefs instead
and let the larger indirected things turn into registers and do it that
way.  But I do like having separate instructions rather than those weird
indirect sources.

On Mon, Dec 5, 2016 at 2:59 PM, Jason Ekstrand  wrote:
> This little series implements lowering of indirectly accessed local
> variables larger than some threshold (8 floats?) to scratch space.  This
> improves the performance of the CSDof synmark test by about 45% because it
> uses a large temporary array which we lower to if-ladders and then to
piles
> of scratch.
>
> The approach I've taken here is to add a new set of NIR intrinsics for
> reading and writing scratch.  It's treated like any other form of IO with
a
> new nir_lower_vars_to_scratch pass that lowers everything over a given
size
> threshold to scratch space.  Why do this in NIR?  The primary reason is
> that this lets us lower to scratch *before* we do
nir_lower_indirect_derefs
> so we can still use registers for small indirects where an if-ladder is
> more efficient than scratch space.  Also, after gaving it a try, I really
> liked how those intrinsics turned out.
>
> This series is marked RFC because it's still a bit sketchy at the moment.
> There are a few things that would need to be finished before it's ready
for
> landing:
>
>  1) I should probably run it through piglit.
>  2) The back-end portion doesn't yet handle doubles
>  3) We should use send-from-GRF for non-spill direct scratch reads/writes.
> Right now, it's still using MRFs which isn't great.
>
> If people like where this series is going, I can probably find some time
to
> polish it to the point of mergeable.
>
> Jason Ekstrand (6):
>   nir: Add load/store_scratch intrinsics
>   nir: Add a pass for selectively lowering variables to scratch space
>   i965/fs: Add a CHANNEL_IDS opcode
>   i965/fs: Add DWord scattered read/write opcodes
>   i965/fs: Implement the new nir_scratch_load/store opcodes
>   i965: Lower large local arrays to scratch
>
> Timothy Arceri (1):
>   i965: use nir_lower_indirect_derefs() for GLSL
>
>  src/compiler/Makefile.sources |   1 +
>  src/compiler/nir/nir.h|   8 +-
>  src/compiler/nir/nir_clone.c  |   1 +
>  src/compiler/nir/nir_intrinsics.h |   6 +-
>  src/compiler/nir/nir_lower_scratch.c  | 258
++
>  src/intel/vulkan/anv_pipeline.c   |  10 -
>  src/mesa/drivers/dri/i965/brw_defines.h   |  10 +
>  src/mesa/drivers/dri/i965/brw_fs.cpp  | 113 ++
>  src/mesa/drivers/dri/i965/brw_fs.h|   6 +
>  src/mesa/drivers/dri/i965/brw_fs_cse.cpp  |   1 +
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp| 170 ++
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp  |  42 +++-
>  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |   4 +-
>  src/mesa/drivers/dri/i965/brw_link.cpp|  13 --
>  src/mesa/drivers/dri/i965/brw_nir.c   |  13 ++
>  src/mesa/drivers/dri/i965/brw_shader.cpp  |  12 +
>  16 files changed, 631 insertions(+), 37 deletions(-)
>  create mode 100644 src/compiler/nir/nir_lower_scratch.c
>
> --
> 2.5.0.400.gff86faf
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Emit proper NOPs.

2016-12-05 Thread Matt Turner
The PRMs for HSW and newer say that other than the opcode and DebugCtrl
bits of the instruction word, the rest must be zero.

By zeroing the instruction word manually, we avoid using any of the
state inherited through brw_codegen.

Bug: https://bugs.freedesktop.org/show_bug.cgi?id=96959
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index cfb3fa0..ca04221 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1282,10 +1282,8 @@ brw_F16TO32(struct brw_codegen *p, struct brw_reg dst, 
struct brw_reg src)
 void brw_NOP(struct brw_codegen *p)
 {
brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
-   brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_1);
-   brw_set_dest(p, insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
-   brw_set_src0(p, insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
-   brw_set_src1(p, insn, brw_imm_ud(0x0));
+   memset(insn, 0, sizeof(*insn));
+   brw_inst_set_opcode(p->devinfo, insn, BRW_OPCODE_NOP);
 }
 
 
-- 
2.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/50] GL_ARB_gpu_shader_int64... this time for sure!

2016-12-05 Thread Matt Turner

On 12/05, Matt Turner wrote:

On 11/28, Ian Romanick wrote:

From: Ian Romanick 
   Patches 42 through 50 enable the extension on BDW+.


42-48 are

Reviewed-by: Matt Turner 

I don't understand the 64-bit CMP issue, so I'm booting a SKL to see how
fp64 works.


Ah, I think  I see. Because 16x doubles take up 4 registers, we have to
emit two CMP instructions, one with 1Q and one with 2Q:

cmp.ge.f0(8)null<1>DF   g2.2<0,1,0>DF   (abs)g11<4,4,1>DF { align1 1Q };
cmp.ge.f0(8)null<1>DF   g2.2<0,1,0>DF   (abs)g7<4,4,1>DF { align1 2Q };

(from fs-op-add-double-double.shader_test)

Makes sense to me. 49 is

Reviewed-by: Matt Turner 


signature.asc
Description: Digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/vce Handle H.264 level 5.2

2016-12-05 Thread Andy Furniss

Hi, I did a v2, can it be applied?

https://patchwork.freedesktop.org/patch/118006/

Christian König wrote:

Am 25.10.2016 um 00:39 schrieb Andy Furniss:

For gstreamer
Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=91281

Signed-off-by: Andy Furniss 
---
  src/gallium/drivers/radeon/radeon_vce.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vce.c
b/src/gallium/drivers/radeon/radeon_vce.c
index ef93e46..6d6d414 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -178,13 +178,13 @@ static unsigned get_cpb_num(struct rvce_encoder
*enc)
  case 41:
  dpb = 32768;
  break;
-default:
  case 42:
  dpb = 34816;
  break;
  case 50:
  dpb = 110400;
  break;
+default:
  case 51:


Setting the default to the maximum is probably a good idea, but I would
also add an explicit "case 52:" here as well just for documentation
purposes.

With that fixed the patch is Reviewed-by: Christian König
.

Regards,
Christian.


  dpb = 184320;
  break;






___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 0/7] nir, i965/fs: Lower indirect local variables to scratch

2016-12-05 Thread Connor Abbott
I'm a little worried about this since it seems like the
load/store_scratch intrinsics are basically doing the same thing as
registers were originally intended to do. Either we should use the
existing register lowering, and make it conditional on the size like
you've done here, or we should just gut larger-than-vec4 registers
entirely and go with this instead. TBH, I'm kinda leaning towards the
latter, since I know Rob has expressed some interest in using
something like this instead of registers, and it seems like nobody
really wants the ability to indirectly address stuff inside, say, an
add instruction anyways.

On Mon, Dec 5, 2016 at 2:59 PM, Jason Ekstrand  wrote:
> This little series implements lowering of indirectly accessed local
> variables larger than some threshold (8 floats?) to scratch space.  This
> improves the performance of the CSDof synmark test by about 45% because it
> uses a large temporary array which we lower to if-ladders and then to piles
> of scratch.
>
> The approach I've taken here is to add a new set of NIR intrinsics for
> reading and writing scratch.  It's treated like any other form of IO with a
> new nir_lower_vars_to_scratch pass that lowers everything over a given size
> threshold to scratch space.  Why do this in NIR?  The primary reason is
> that this lets us lower to scratch *before* we do nir_lower_indirect_derefs
> so we can still use registers for small indirects where an if-ladder is
> more efficient than scratch space.  Also, after gaving it a try, I really
> liked how those intrinsics turned out.
>
> This series is marked RFC because it's still a bit sketchy at the moment.
> There are a few things that would need to be finished before it's ready for
> landing:
>
>  1) I should probably run it through piglit.
>  2) The back-end portion doesn't yet handle doubles
>  3) We should use send-from-GRF for non-spill direct scratch reads/writes.
> Right now, it's still using MRFs which isn't great.
>
> If people like where this series is going, I can probably find some time to
> polish it to the point of mergeable.
>
> Jason Ekstrand (6):
>   nir: Add load/store_scratch intrinsics
>   nir: Add a pass for selectively lowering variables to scratch space
>   i965/fs: Add a CHANNEL_IDS opcode
>   i965/fs: Add DWord scattered read/write opcodes
>   i965/fs: Implement the new nir_scratch_load/store opcodes
>   i965: Lower large local arrays to scratch
>
> Timothy Arceri (1):
>   i965: use nir_lower_indirect_derefs() for GLSL
>
>  src/compiler/Makefile.sources |   1 +
>  src/compiler/nir/nir.h|   8 +-
>  src/compiler/nir/nir_clone.c  |   1 +
>  src/compiler/nir/nir_intrinsics.h |   6 +-
>  src/compiler/nir/nir_lower_scratch.c  | 258 
> ++
>  src/intel/vulkan/anv_pipeline.c   |  10 -
>  src/mesa/drivers/dri/i965/brw_defines.h   |  10 +
>  src/mesa/drivers/dri/i965/brw_fs.cpp  | 113 ++
>  src/mesa/drivers/dri/i965/brw_fs.h|   6 +
>  src/mesa/drivers/dri/i965/brw_fs_cse.cpp  |   1 +
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp| 170 ++
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp  |  42 +++-
>  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |   4 +-
>  src/mesa/drivers/dri/i965/brw_link.cpp|  13 --
>  src/mesa/drivers/dri/i965/brw_nir.c   |  13 ++
>  src/mesa/drivers/dri/i965/brw_shader.cpp  |  12 +
>  16 files changed, 631 insertions(+), 37 deletions(-)
>  create mode 100644 src/compiler/nir/nir_lower_scratch.c
>
> --
> 2.5.0.400.gff86faf
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/omx/enc Raise default encode level

2016-12-05 Thread Andy Furniss

Can you apply this please.

Christian König wrote:

Am 25.10.2016 um 00:46 schrieb Andy Furniss:

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=91281

Signed-off-by: Andy Furniss 


Reviewed-by: Christian König .


---
  src/gallium/state_trackers/omx/vid_enc.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/omx/vid_enc.c
b/src/gallium/state_trackers/omx/vid_enc.c
index 0d7ab28..07f6799 100644
--- a/src/gallium/state_trackers/omx/vid_enc.c
+++ b/src/gallium/state_trackers/omx/vid_enc.c
@@ -246,7 +246,7 @@ static OMX_ERRORTYPE
vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam
 priv->quant.nQpB = OMX_VID_ENC_QUANT_B_FRAMES_DEFAULT;
 priv->profile_level.eProfile = OMX_VIDEO_AVCProfileBaseline;
-   priv->profile_level.eLevel = OMX_VIDEO_AVCLevel42;
+   priv->profile_level.eLevel = OMX_VIDEO_AVCLevel51;
 priv->force_pic_type.IntraRefreshVOP = OMX_FALSE;
 priv->frame_num = 0;






___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 3/7] i965/fs: Add a CHANNEL_IDS opcode

2016-12-05 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/brw_defines.h|  6 ++
 src/mesa/drivers/dri/i965/brw_fs.h |  3 +++
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp   |  1 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 28 ++
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 14 +++--
 src/mesa/drivers/dri/i965/brw_shader.cpp   |  2 ++
 6 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index f22a52f..16a72c4 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1095,6 +1095,12 @@ enum opcode {
 */
SHADER_OPCODE_BROADCAST,
 
+   /* Takes no sources and returns the index of each channel in its respective
+* SIMD channel in the destination register.  The instruction must have
+* WE_all set and the destination must have type BRW_REGISTER_TYPE_UW.
+*/
+   SHADER_OPCODE_CHANNEL_IDS,
+
VEC4_OPCODE_MOV_BYTES,
VEC4_OPCODE_PACK_BYTES,
VEC4_OPCODE_UNPACK_UNIFORM,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 002cee8..4ce0f56 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -437,6 +437,9 @@ private:
   struct brw_reg msg_data,
   unsigned msg_type);
 
+   void generate_channel_ids(fs_inst *inst,
+ struct brw_reg dst);
+
void generate_set_sample_id(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 48220ef..02ccda1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -79,6 +79,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
case FS_OPCODE_LINTERP:
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
case SHADER_OPCODE_BROADCAST:
+   case SHADER_OPCODE_CHANNEL_IDS:
case SHADER_OPCODE_MOV_INDIRECT:
case SHADER_OPCODE_TEX_LOGICAL:
case SHADER_OPCODE_TXD_LOGICAL:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c5b50e1..3ef2d5b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1435,6 +1435,30 @@ fs_generator::generate_set_simd4x2_offset(fs_inst *inst,
brw_pop_insn_state(p);
 }
 
+void
+fs_generator::generate_channel_ids(fs_inst *inst, struct brw_reg dst)
+{
+   assert(dst.type == BRW_REGISTER_TYPE_UW ||
+  dst.type == BRW_REGISTER_TYPE_W);
+   assert(inst->exec_size >= 8);
+
+   brw_push_insn_state(p);
+   brw_set_default_exec_size(p, BRW_EXECUTE_8);
+   brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_default_mask_control(p, true);
+   brw_MOV(p, dst, brw_imm_v(0x76543210));
+
+   if (inst->exec_size > 8) {
+  assert(inst->exec_size == 16);
+  brw_ADD(p, vec8(suboffset(dst, 8)), vec8(dst), brw_imm_w(8));
+   }
+
+   brw_pop_insn_state(p);
+
+   if (inst->group > 0)
+  brw_ADD(p, dst, dst, brw_imm_w(inst->group));
+}
+
 /* Sets vstride=1, width=4, hstride=0 of register src1 during
  * the ADD instruction.
  */
@@ -2056,6 +2080,10 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
  brw_broadcast(p, dst, src[0], src[1]);
  break;
 
+  case SHADER_OPCODE_CHANNEL_IDS:
+ generate_channel_ids(inst, dst);
+ break;
+
   case FS_OPCODE_SET_SAMPLE_ID:
  generate_set_sample_id(inst, dst, src[0], src[1]);
  break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 855266f..9478bb8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -4310,17 +4310,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
}
 
case nir_intrinsic_load_channel_num: {
-  fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
-  dest = retype(dest, BRW_REGISTER_TYPE_UD);
-  const fs_builder allbld8 = bld.group(8, 0).exec_all();
-  allbld8.MOV(tmp, brw_imm_v(0x76543210));
-  if (dispatch_width > 8)
- allbld8.ADD(byte_offset(tmp, 16), tmp, brw_imm_uw(8u));
-  if (dispatch_width > 16) {
- const fs_builder allbld16 = bld.group(16, 0).exec_all();
- allbld16.ADD(byte_offset(tmp, 32), tmp, brw_imm_uw(16u));
-  }
-  bld.MOV(dest, tmp);
+  fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_W);
+  bld.exec_all().emit(SHADER_OPCODE_CHANNEL_IDS, tmp);
+  bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), tmp);
   break;
}
 
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 25f745d..726bc7d 100644
--- 

[Mesa-dev] [RFC 7/7] i965: Lower large local arrays to scratch

2016-12-05 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/brw_nir.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
b/src/mesa/drivers/dri/i965/brw_nir.c
index 8768cee..f75db4d 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -485,6 +485,9 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
/* Lower a bunch of stuff */
OPT_V(nir_lower_var_copies);
 
+   OPT(nir_lower_vars_to_scratch, nir_var_local, 16, true,
+  type_size_scalar_bytes);
+
nir_variable_mode indirect_mask = 0;
if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput)
   indirect_mask |= nir_var_shader_in;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 2/7] nir: Add a pass for selectively lowering variables to scratch space

2016-12-05 Thread Jason Ekstrand
---
 src/compiler/Makefile.sources|   1 +
 src/compiler/nir/nir.h   |   8 +-
 src/compiler/nir/nir_clone.c |   1 +
 src/compiler/nir/nir_lower_scratch.c | 258 +++
 4 files changed, 267 insertions(+), 1 deletion(-)
 create mode 100644 src/compiler/nir/nir_lower_scratch.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 6a7dcd8..2a1594d 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -216,6 +216,7 @@ NIR_FILES = \
nir/nir_lower_phis_to_scalar.c \
nir/nir_lower_returns.c \
nir/nir_lower_samplers.c \
+   nir/nir_lower_scratch.c \
nir/nir_lower_system_values.c \
nir/nir_lower_tex.c \
nir/nir_lower_to_source_mods.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 9e8ed2c..61be6aa 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1822,7 +1822,7 @@ typedef struct nir_shader {
 * the highest index a load_input_*, load_uniform_*, etc. intrinsic can
 * access plus one
 */
-   unsigned num_inputs, num_uniforms, num_outputs, num_shared;
+   unsigned num_inputs, num_uniforms, num_outputs, num_shared, num_scratch;
 
/** The shader stage, such as MESA_SHADER_VERTEX. */
gl_shader_stage stage;
@@ -2315,6 +2315,12 @@ void nir_lower_io_to_temporaries(nir_shader *shader,
  nir_function_impl *entrypoint,
  bool outputs, bool inputs);
 
+bool nir_lower_vars_to_scratch(nir_shader *shader,
+   nir_variable_mode modes,
+   int size_threshold,
+   bool use_scalar_ops,
+   int (*type_size)(const struct glsl_type *));
+
 void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
 
 void nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index be89426..01314ad 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -719,6 +719,7 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s)
ns->num_uniforms = s->num_uniforms;
ns->num_outputs = s->num_outputs;
ns->num_shared = s->num_shared;
+   ns->num_scratch = s->num_scratch;
 
free_clone_state();
 
diff --git a/src/compiler/nir/nir_lower_scratch.c 
b/src/compiler/nir/nir_lower_scratch.c
new file mode 100644
index 000..a1d5590
--- /dev/null
+++ b/src/compiler/nir/nir_lower_scratch.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Jason Ekstrand (ja...@jlekstrand.net)
+ *
+ */
+
+/*
+ * This lowering pass converts references to variables with loads/stores to
+ * scratch space based on a few configurable parameters.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static bool
+deref_has_indirect(nir_deref_var *deref)
+{
+   for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
+  if (tail->deref_type != nir_deref_type_array)
+ continue;
+
+  nir_deref_array *arr = nir_deref_as_array(tail);
+  if (arr->deref_array_type == nir_deref_array_type_indirect)
+ return true;
+   }
+
+   return false;
+}
+
+static void
+lower_load_store(nir_builder *b,
+ nir_intrinsic_instr *intrin,
+ int (*type_size)(const struct glsl_type *),
+ bool scalar)
+{
+   b->cursor = nir_before_instr(>instr);
+
+   /* Just emit code and let constant-folding go to town */
+   nir_ssa_def *offset = nir_imm_int(b, 0);
+
+   nir_deref *tail = >variables[0]->deref;
+   while (tail->child != NULL) {
+  const struct glsl_type *parent_type = tail->type;
+  tail = tail->child;
+
+  if (tail->deref_type == 

[Mesa-dev] [RFC 6/7] i965: use nir_lower_indirect_derefs() for GLSL

2016-12-05 Thread Jason Ekstrand
From: Timothy Arceri 

This moves the nir_lower_indirect_derefs() call into
brw_preprocess_nir() so thats is called by both OpenGL and Vulkan
and removes that call to the old GLSL IR pass
lower_variable_index_to_cond_assign()

We want to do this pass in nir to be able to move loop unrolling
to nir.

There is a increase of 1-3 instructions in a small number of shaders,
and 2 Kerbal Space program shaders that increase by 32 instructions.

Shader-db results BDW:

total instructions in shared programs: 8705873 -> 8706194 (0.00%)
instructions in affected programs: 32515 -> 32836 (0.99%)
helped: 3
HURT: 79

total cycles in shared programs: 74618120 -> 74583476 (-0.05%)
cycles in affected programs: 528104 -> 493460 (-6.56%)
helped: 47
HURT: 37

LOST:   2
GAINED: 0
---
 src/intel/vulkan/anv_pipeline.c| 10 --
 src/mesa/drivers/dri/i965/brw_link.cpp | 13 -
 src/mesa/drivers/dri/i965/brw_nir.c| 10 ++
 3 files changed, 10 insertions(+), 23 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 9b65e35..6b0a3c9 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -177,16 +177,6 @@ anv_shader_compile_to_nir(struct anv_device *device,
 
nir_shader_gather_info(nir, entry_point->impl);
 
-   nir_variable_mode indirect_mask = 0;
-   if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput)
-  indirect_mask |= nir_var_shader_in;
-   if (compiler->glsl_compiler_options[stage].EmitNoIndirectOutput)
-  indirect_mask |= nir_var_shader_out;
-   if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp)
-  indirect_mask |= nir_var_local;
-
-   nir_lower_indirect_derefs(nir, indirect_mask);
-
return nir;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
b/src/mesa/drivers/dri/i965/brw_link.cpp
index 3f6041b..19e691e 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -137,19 +137,6 @@ process_glsl_ir(struct brw_context *brw,
 
do_copy_propagation(shader->ir);
 
-   bool lowered_variable_indexing =
-  lower_variable_index_to_cond_assign(shader->Stage, shader->ir,
-  options->EmitNoIndirectInput,
-  options->EmitNoIndirectOutput,
-  options->EmitNoIndirectTemp,
-  options->EmitNoIndirectUniform);
-
-   if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
-  perf_debug("Unsupported form of variable indexing in %s; falling "
- "back to very inefficient code generation\n",
- _mesa_shader_stage_to_abbrev(shader->Stage));
-   }
-
bool progress;
do {
   progress = false;
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
b/src/mesa/drivers/dri/i965/brw_nir.c
index 763e3ec..8768cee 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -485,6 +485,16 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
/* Lower a bunch of stuff */
OPT_V(nir_lower_var_copies);
 
+   nir_variable_mode indirect_mask = 0;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput)
+  indirect_mask |= nir_var_shader_in;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput)
+  indirect_mask |= nir_var_shader_out;
+   if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp)
+  indirect_mask |= nir_var_local;
+
+   nir_lower_indirect_derefs(nir, indirect_mask);
+
/* Get rid of split copies */
nir = nir_optimize(nir, is_scalar);
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 4/7] i965/fs: Add DWord scattered read/write opcodes

2016-12-05 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/brw_defines.h|   2 +
 src/mesa/drivers/dri/i965/brw_fs.cpp   |   2 +
 src/mesa/drivers/dri/i965/brw_fs.h |   3 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 142 +
 src/mesa/drivers/dri/i965/brw_shader.cpp   |   5 +
 5 files changed, 154 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 16a72c4..1fd0b94 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1063,6 +1063,8 @@ enum opcode {
SHADER_OPCODE_GEN4_SCRATCH_READ,
SHADER_OPCODE_GEN4_SCRATCH_WRITE,
SHADER_OPCODE_GEN7_SCRATCH_READ,
+   SHADER_OPCODE_DWORD_SCATTERED_READ,
+   SHADER_OPCODE_DWORD_SCATTERED_WRITE,
 
/**
 * Gen8+ SIMD8 URB Read messages.
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c218f56..8d47638 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -792,6 +792,8 @@ fs_inst::size_read(int arg) const
switch (opcode) {
case FS_OPCODE_FB_WRITE:
case FS_OPCODE_FB_READ:
+   case SHADER_OPCODE_DWORD_SCATTERED_READ:
+   case SHADER_OPCODE_DWORD_SCATTERED_WRITE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 4ce0f56..d34428b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -415,6 +415,9 @@ private:
void generate_scratch_write(fs_inst *inst, struct brw_reg src);
void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
+   void generate_dword_scattered_read(fs_inst *inst, struct brw_reg dst,
+  struct brw_reg payload);
+   void generate_dword_scattered_write(fs_inst *inst, struct brw_reg payload);
void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
 struct brw_reg index,
 struct brw_reg offset);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 3ef2d5b..1f753c1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1122,6 +1122,140 @@ fs_generator::generate_scratch_read_gen7(fs_inst *inst, 
struct brw_reg dst)
 }
 
 void
+fs_generator::generate_dword_scattered_read(fs_inst *inst, struct brw_reg dst,
+struct brw_reg payload)
+{
+   assert(inst->mlen != 0);
+
+   uint32_t msg_control;
+   int rlen;
+
+   struct brw_reg mrf;
+   if (devinfo->gen >= 7) {
+  mrf = retype(payload, BRW_REGISTER_TYPE_UD);
+   } else {
+  mrf = retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);
+   }
+
+   if (inst->exec_size == 8) {
+  msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS;
+  rlen = 1;
+   } else {
+  msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS;
+  rlen = 2;
+   }
+
+   unsigned msg_type;
+   if (devinfo->gen >= 7) {
+  msg_type = GEN7_DATAPORT_DC_DWORD_SCATTERED_READ;
+   } else if (devinfo->gen > 4 || devinfo->is_g4x) {
+  msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
+   } else {
+  msg_type = BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
+   }
+
+   brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+
+   assert(brw_inst_pred_control(devinfo, insn) == 0);
+   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
+
+   brw_set_dest(p, insn, retype(dst, BRW_REGISTER_TYPE_UD)); /* UW? */
+   if (devinfo->gen >= 6) {
+  brw_set_src0(p, insn, mrf);
+   } else {
+  brw_set_src0(p, insn, brw_null_reg());
+  brw_inst_set_base_mrf(devinfo, insn, inst->base_mrf);
+   }
+
+   const unsigned target_cache = devinfo->gen >= 7 ?
+  BRW_DATAPORT_READ_TARGET_DATA_CACHE :
+  BRW_DATAPORT_READ_TARGET_RENDER_CACHE;
+
+   brw_set_dp_read_message(p,
+   insn,
+   brw_scratch_surface_idx(p),
+   msg_control,
+   msg_type, /* msg_type */
+   target_cache,
+   inst->mlen, /* msg_length */
+   true, /* header_present */
+   rlen);
+}
+
+void
+fs_generator::generate_dword_scattered_write(fs_inst *inst,
+ struct brw_reg payload)
+{
+   assert(inst->mlen != 0);
+
+   struct brw_reg mrf;
+   if (devinfo->gen >= 7) {
+  mrf = retype(payload, BRW_REGISTER_TYPE_UD);
+   } else {
+  mrf = retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);
+   }
+
+   uint32_t msg_control;
+   

[Mesa-dev] [RFC 5/7] i965/fs: Implement the new nir_scratch_load/store opcodes

2016-12-05 Thread Jason Ekstrand
This uses either the normal scratch read/write messages that we use for
spilling or the DWORD scattered read/write messages for when we have
indirects.
---
 src/mesa/drivers/dri/i965/brw_defines.h   |   2 +
 src/mesa/drivers/dri/i965/brw_fs.cpp  | 111 ++
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp  |  30 ++
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |   4 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp  |   5 +
 5 files changed, 150 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 1fd0b94..0a96285 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1065,6 +1065,8 @@ enum opcode {
SHADER_OPCODE_GEN7_SCRATCH_READ,
SHADER_OPCODE_DWORD_SCATTERED_READ,
SHADER_OPCODE_DWORD_SCATTERED_WRITE,
+   SHADER_OPCODE_SCRATCH_READ_LOGICAL,
+   SHADER_OPCODE_SCRATCH_WRITE_LOGICAL,
 
/**
 * Gen8+ SIMD8 URB Read messages.
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 8d47638..594f1f4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3571,6 +3571,112 @@ fs_visitor::lower_minmax()
 }
 
 static void
+lower_scratch_logical_send(const fs_builder , fs_inst *inst)
+{
+   const gen_device_info *devinfo = bld.shader->devinfo;
+   const fs_reg  = inst->src[0];
+
+   /* The offsets provided by NIR are in bytes but they do not take channels
+* into account.  We need to multiply by dispatch width.
+*/
+   const unsigned dispatch_width =
+  static_cast(bld.shader)->dispatch_width;
+
+   if (offset.file == IMM) {
+  /* The immediate versions are easy.  Let's lower those first. */
+  if (inst->opcode == SHADER_OPCODE_SCRATCH_READ_LOGICAL) {
+ /* The Gen7 descriptor-based offset is 12 bits of HWORD units.
+  * Because the Gen7-style scratch block read is hardwired to BTI 255,
+  * on Gen9+ it would cause the DC to do an IA-coherent read, what
+  * largely outweighs the slight advantage from not having to provide
+  * the address as part of the message header, so we're better off
+  * using plain old oword block reads.
+  */
+ const unsigned byte_offset = offset.d * dispatch_width;
+ if (devinfo->gen >= 7 && devinfo->gen < 9 &&
+ byte_offset < (1 << 12) * REG_SIZE) {
+inst->opcode = SHADER_OPCODE_GEN7_SCRATCH_READ;
+ } else {
+inst->opcode = SHADER_OPCODE_GEN4_SCRATCH_READ;
+inst->base_mrf = 13;
+inst->mlen = 1; /* header contains offset */
+ }
+ inst->offset = byte_offset;
+ inst->sources = 0;
+ return;
+  } else {
+ assert(inst->opcode == SHADER_OPCODE_SCRATCH_WRITE_LOGICAL);
+ const unsigned byte_offset = offset.d * dispatch_width;
+ inst->opcode = SHADER_OPCODE_GEN4_SCRATCH_WRITE;
+ inst->mlen = 1 + (dispatch_width / 8); /* header, value */
+ inst->base_mrf = 13;
+ inst->offset = byte_offset;
+ /* Move the "value" source to the right spot */
+ inst->src[0] = inst->src[1];
+ inst->sources = 1;
+ return;
+  }
+   }
+
+   if (inst->opcode == SHADER_OPCODE_SCRATCH_READ_LOGICAL) {
+  inst->opcode = SHADER_OPCODE_DWORD_SCATTERED_READ;
+   } else {
+  assert(inst->opcode == SHADER_OPCODE_SCRATCH_WRITE_LOGICAL);
+  inst->opcode = SHADER_OPCODE_DWORD_SCATTERED_WRITE;
+   }
+
+   fs_reg sources[3];
+
+   const unsigned base_offset = 0;
+   const fs_builder hbld = bld.exec_all().group(8, 0);
+   sources[0] = hbld.vgrf(BRW_REGISTER_TYPE_UD);
+   hbld.MOV(sources[0], retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+   hbld.group(1, 0).MOV(component(sources[0], 2), brw_imm_ud(base_offset));
+
+   fs_reg channel_ids = bld.vgrf(BRW_REGISTER_TYPE_W);
+   bld.emit(SHADER_OPCODE_CHANNEL_IDS, channel_ids);
+
+   sources[1] = bld.vgrf(BRW_REGISTER_TYPE_D);
+   if (devinfo->gen < 6) {
+  /* On Gen < 6, the offsets are in bytes */
+  fs_reg bytes = bld.vgrf(BRW_REGISTER_TYPE_D);
+  bld.MUL(bytes, offset, brw_imm_d(dispatch_width));
+  fs_reg stagger = bld.vgrf(BRW_REGISTER_TYPE_D);
+  bld.MUL(stagger, channel_ids, brw_imm_d(4));
+  bld.ADD(sources[1], bytes, stagger);
+   } else {
+  /* On Gen >= 6, the offsets are in dwords */
+  fs_reg dwords = bld.vgrf(BRW_REGISTER_TYPE_D);
+  bld.MUL(dwords, offset, brw_imm_d(dispatch_width / 4));
+  bld.ADD(sources[1], dwords, channel_ids);
+   }
+
+   unsigned num_sources;
+   if (inst->opcode == SHADER_OPCODE_DWORD_SCATTERED_WRITE) {
+  sources[2] = inst->src[1]; /* value */
+  num_sources = 3;
+   } else {
+  num_sources = 2;
+   }
+
+   fs_reg payload;
+   if (devinfo->gen >= 7) {
+  payload = fs_reg(VGRF, -1, BRW_REGISTER_TYPE_F);
+  fs_inst *load = 

[Mesa-dev] [RFC 1/7] nir: Add load/store_scratch intrinsics

2016-12-05 Thread Jason Ekstrand
---
 src/compiler/nir/nir_intrinsics.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index f45bfe2..d269805 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -414,6 +414,8 @@ LOAD(shared, 1, 1, BASE, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { offset }. const_index[] = { base, range } */
 LOAD(push_constant, 1, 2, BASE, RANGE, xx,
  NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { offset }. const_index[] = { base, range } */
+LOAD(scratch, 1, 2, BASE, RANGE, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 
 /*
  * Stores work the same way as loads, except now the first source is the value
@@ -435,8 +437,10 @@ STORE(per_vertex_output, 3, 3, BASE, WRMASK, COMPONENT, 0)
 STORE(ssbo, 3, 1, WRMASK, xx, xx, 0)
 /* src[] = { value, offset }. const_index[] = { base, write_mask } */
 STORE(shared, 2, 2, BASE, WRMASK, xx, 0)
+/* src[] = { value, offset }. const_index[] = { base, range, write_mask } */
+STORE(scratch, 2, 3, BASE, RANGE, WRMASK, 0)
 
-LAST_INTRINSIC(store_shared)
+LAST_INTRINSIC(store_scratch)
 
 #undef DEFINE_SYSTEM_VALUE
 #undef INTRINSIC
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 0/7] nir, i965/fs: Lower indirect local variables to scratch

2016-12-05 Thread Jason Ekstrand
This little series implements lowering of indirectly accessed local
variables larger than some threshold (8 floats?) to scratch space.  This
improves the performance of the CSDof synmark test by about 45% because it
uses a large temporary array which we lower to if-ladders and then to piles
of scratch.

The approach I've taken here is to add a new set of NIR intrinsics for
reading and writing scratch.  It's treated like any other form of IO with a
new nir_lower_vars_to_scratch pass that lowers everything over a given size
threshold to scratch space.  Why do this in NIR?  The primary reason is
that this lets us lower to scratch *before* we do nir_lower_indirect_derefs
so we can still use registers for small indirects where an if-ladder is
more efficient than scratch space.  Also, after gaving it a try, I really
liked how those intrinsics turned out.

This series is marked RFC because it's still a bit sketchy at the moment.
There are a few things that would need to be finished before it's ready for
landing:

 1) I should probably run it through piglit.
 2) The back-end portion doesn't yet handle doubles
 3) We should use send-from-GRF for non-spill direct scratch reads/writes.
Right now, it's still using MRFs which isn't great.

If people like where this series is going, I can probably find some time to
polish it to the point of mergeable.

Jason Ekstrand (6):
  nir: Add load/store_scratch intrinsics
  nir: Add a pass for selectively lowering variables to scratch space
  i965/fs: Add a CHANNEL_IDS opcode
  i965/fs: Add DWord scattered read/write opcodes
  i965/fs: Implement the new nir_scratch_load/store opcodes
  i965: Lower large local arrays to scratch

Timothy Arceri (1):
  i965: use nir_lower_indirect_derefs() for GLSL

 src/compiler/Makefile.sources |   1 +
 src/compiler/nir/nir.h|   8 +-
 src/compiler/nir/nir_clone.c  |   1 +
 src/compiler/nir/nir_intrinsics.h |   6 +-
 src/compiler/nir/nir_lower_scratch.c  | 258 ++
 src/intel/vulkan/anv_pipeline.c   |  10 -
 src/mesa/drivers/dri/i965/brw_defines.h   |  10 +
 src/mesa/drivers/dri/i965/brw_fs.cpp  | 113 ++
 src/mesa/drivers/dri/i965/brw_fs.h|   6 +
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp  |   1 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp| 170 ++
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp  |  42 +++-
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |   4 +-
 src/mesa/drivers/dri/i965/brw_link.cpp|  13 --
 src/mesa/drivers/dri/i965/brw_nir.c   |  13 ++
 src/mesa/drivers/dri/i965/brw_shader.cpp  |  12 +
 16 files changed, 631 insertions(+), 37 deletions(-)
 create mode 100644 src/compiler/nir/nir_lower_scratch.c

-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/7] glx: use GLX_ALIAS for glXGetProcAddress

2016-12-05 Thread Ian Romanick
On 12/05/2016 11:52 AM, Emil Velikov wrote:
> From: Emil Velikov 
> 
> Use the macro, rather than open-coding it.
> 
> Signed-off-by: Emil Velikov 
> ---
>  src/glx/glxcmds.c | 17 -
>  1 file changed, 4 insertions(+), 13 deletions(-)
> 
> diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
> index 7266b58..6c7bbfd 100644
> --- a/src/glx/glxcmds.c
> +++ b/src/glx/glxcmds.c
> @@ -2663,19 +2663,10 @@ _GLX_PUBLIC void (*glXGetProcAddressARB(const GLubyte 
> * procName)) (void)
>   *
>   * \sa glXGetProcAddressARB
>   */
> -_GLX_PUBLIC void (*glXGetProcAddress(const GLubyte * procName)) (void)
> -# ifdef HAVE_FUNC_ATTRIBUTE_ALIAS
> -# if defined(USE_MGL_NAMESPACE)
> -   __attribute__ ((alias("mglXGetProcAddressARB")));
> -# else
> -   __attribute__ ((alias("glXGetProcAddressARB")));
> -# endif
> -#else
> -{
> -   return glXGetProcAddressARB(procName);
> -}
> -#endif /* HAVE_FUNC_ATTRIBUTE_ALIA */
> -
> +_GLX_PUBLIC
> +GLX_ALIAS(__GLXextFuncPtr, glXGetProcAddress,
> +  (const GLubyte * procName),
> +  (procName), glXGetProcAddressARB)

Does this still work correctly with USE_MGL_NAMESPACE?

>  
>  #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL)
>  /**
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/7] android: set HAVE_FUNC_ATTRIBUTE_ALIAS

2016-12-05 Thread Emil Velikov
From: Emil Velikov 

Analogous to previous two commits.

Strictly speaking it's not be applicable for Android since we don't
build GLX and related code.

Regardless keep things consistent with the other build systems.

Cc: Rob Herring 
Signed-off-by: Emil Velikov 
---
 Android.common.mk | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Android.common.mk b/Android.common.mk
index 9f64c22..ed5118a 100644
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -51,6 +51,7 @@ LOCAL_CFLAGS += \
-DHAVE_FUNC_ATTRIBUTE_UNUSED \
-DHAVE_FUNC_ATTRIBUTE_FORMAT \
-DHAVE_FUNC_ATTRIBUTE_PACKED \
+   _DHAVE_FUNC_ATTRIBUTE_ALIAS \
-DHAVE___BUILTIN_CTZ \
-DHAVE___BUILTIN_POPCOUNT \
-DHAVE___BUILTIN_POPCOUNTLL \
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/7] glx: use GLX_ALIAS for glXGetProcAddress

2016-12-05 Thread Emil Velikov
From: Emil Velikov 

Use the macro, rather than open-coding it.

Signed-off-by: Emil Velikov 
---
 src/glx/glxcmds.c | 17 -
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
index 7266b58..6c7bbfd 100644
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -2663,19 +2663,10 @@ _GLX_PUBLIC void (*glXGetProcAddressARB(const GLubyte * 
procName)) (void)
  *
  * \sa glXGetProcAddressARB
  */
-_GLX_PUBLIC void (*glXGetProcAddress(const GLubyte * procName)) (void)
-# ifdef HAVE_FUNC_ATTRIBUTE_ALIAS
-# if defined(USE_MGL_NAMESPACE)
-   __attribute__ ((alias("mglXGetProcAddressARB")));
-# else
-   __attribute__ ((alias("glXGetProcAddressARB")));
-# endif
-#else
-{
-   return glXGetProcAddressARB(procName);
-}
-#endif /* HAVE_FUNC_ATTRIBUTE_ALIA */
-
+_GLX_PUBLIC
+GLX_ALIAS(__GLXextFuncPtr, glXGetProcAddress,
+  (const GLubyte * procName),
+  (procName), glXGetProcAddressARB)
 
 #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL)
 /**
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/7] glx: remove always false ifdef GLX_NO_STATIC_EXTENSION_FUNCTIONS

2016-12-05 Thread Emil Velikov
From: Emil Velikov 

Quick search through git history (of both mesa and xserver) hows no
instances where this was ever set.

Signed-off-by: Emil Velikov 
---
 src/glx/glxextensions.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/src/glx/glxextensions.h b/src/glx/glxextensions.h
index 743ed97..6a67006 100644
--- a/src/glx/glxextensions.h
+++ b/src/glx/glxextensions.h
@@ -281,10 +281,6 @@ typedef void (*PFNGLXDISABLEEXTENSIONPROC) (const char 
*name);
 
 /* GLX_ALIAS should be used for functions with a non-void return type.
GLX_ALIAS_VOID is for functions with a void return type. */
-#ifdef GLX_NO_STATIC_EXTENSION_FUNCTIONS
-# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func)
-# define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func)
-#else
 # if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
 /* GLX_ALIAS and GLX_ALIAS_VOID both expand to the macro GLX_ALIAS2. Using the
  * extra expansion means that the name mangling macros in glx_mangle.h will
@@ -305,7 +301,6 @@ typedef void (*PFNGLXDISABLEEXTENSIONPROC) (const char 
*name);
void  real_func  proto_args  \
{ aliased_func args ; }
 # endif /* __GNUC__ */
-#endif /* GLX_NO_STATIC_EXTENSION_FUNCTIONS */
 
 #ifdef __cplusplus
 }
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/7] glx: unify GLX_SGIX_pbuffer aliased declarations

2016-12-05 Thread Emil Velikov
From: Emil Velikov 

No point in having an identical code in two places.

Not to mention that the Apple one incorrectly uses GLXDrawable as pbuf
type. This change is both API and ABI safe since the header uses the
correct GLXPbufferSGIX and both types are a typedef of the same
primitive XID.

Cc: Jeremy Huddleston Sequoia 
Signed-off-by: Emil Velikov 
---
Jeremy, humble poke to send any/all Macports patches to the list ;-)
---
 src/glx/apple/glx_empty.c | 16 
 src/glx/glx_pbuffer.c |  2 --
 2 files changed, 18 deletions(-)

diff --git a/src/glx/apple/glx_empty.c b/src/glx/apple/glx_empty.c
index 1e9593c..33fffb5 100644
--- a/src/glx/apple/glx_empty.c
+++ b/src/glx/apple/glx_empty.c
@@ -263,19 +263,3 @@ glXGetFBConfigFromVisualSGIX(Display * dpy, XVisualInfo * 
visinfo)
return NULL;
 }
 #endif
-
-
-_X_EXPORT
-GLX_ALIAS_VOID(glXDestroyGLXPbufferSGIX,
-   (Display * dpy, GLXDrawable pbuf),
-   (dpy, pbuf), glXDestroyPbuffer)
-
- _X_EXPORT GLX_ALIAS_VOID(glXSelectEventSGIX,
-   (Display * dpy, GLXDrawable drawable,
-unsigned long mask), (dpy, drawable, mask),
-   glXSelectEvent)
-
- _X_EXPORT GLX_ALIAS_VOID(glXGetSelectedEventSGIX,
-   (Display * dpy, GLXDrawable drawable,
-unsigned long *mask), (dpy, drawable, mask),
-   glXGetSelectedEvent)
diff --git a/src/glx/glx_pbuffer.c b/src/glx/glx_pbuffer.c
index 24c073c..0c15d2a 100644
--- a/src/glx/glx_pbuffer.c
+++ b/src/glx/glx_pbuffer.c
@@ -992,7 +992,6 @@ glXDestroyWindow(Display * dpy, GLXWindow win)
 #endif
 }
 
-#ifndef GLX_USE_APPLEGL
 _GLX_PUBLIC
 GLX_ALIAS_VOID(glXDestroyGLXPbufferSGIX,
(Display * dpy, GLXPbufferSGIX pbuf),
@@ -1008,4 +1007,3 @@ GLX_ALIAS_VOID(glXGetSelectedEventSGIX,
(Display * dpy, GLXDrawable drawable,
 unsigned long *mask), (dpy, drawable, mask),
glXGetSelectedEvent)
-#endif
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/7] scons: set HAVE_FUNC_ATTRIBUTE_ALIAS

2016-12-05 Thread Emil Velikov
From: Emil Velikov 

Analogoust to the previous commit were we did so for autotools

Cc: Jose Fonseca 
Signed-off-by: Emil Velikov 
---
Jose, I'm not 100% sure if that holds true the compiler(s) that you
gents use. Can you please double-check ?

Thanks
Emil
---
 include/c99_compat.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/c99_compat.h b/include/c99_compat.h
index 24e96e0..cb690c6 100644
--- a/include/c99_compat.h
+++ b/include/c99_compat.h
@@ -163,6 +163,7 @@ test_c99_compat_h(const void * restrict a,
 #define HAVE_FUNC_ATTRIBUTE_UNUSED 1
 #define HAVE_FUNC_ATTRIBUTE_FORMAT 1
 #define HAVE_FUNC_ATTRIBUTE_PACKED 1
+#define HAVE_FUNC_ATTRIBUTE_ALIAS 1
 
 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
/* https://gcc.gnu.org/onlinedocs/gcc-4.3.6/gcc/Other-Builtins.html */
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/7] mesa: make use of HAVE_FUNC_ATTRIBUTE_ALIAS macro

2016-12-05 Thread Emil Velikov
From: Emil Velikov 

We must make sure that xserver has an equivalent one-line
change to its configure.ac as the glx/glapi headers get copied over.

Then again, xserver does _not_ seem to set HAVE_ALIAS to begin with so
one might want to look into that first.

Cc: Adam Jackson 
Signed-off-by: Emil Velikov 
---
Not too sure if/how to split this. So if we have any suggestions I would
appreciate it.
---
 configure.ac |  9 +
 scons/gallium.py |  4 
 src/glx/glxcmds.c|  4 ++--
 src/glx/glxextensions.h  |  4 ++--
 src/mapi/glapi/gen/glX_proto_size.py | 10 ++
 src/mapi/glapi/gen/gl_x86_asm.py |  2 +-
 6 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/configure.ac b/configure.ac
index f251764..905538f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1766,7 +1766,6 @@ if test "x$enable_dri" = xyes; then
 # Platform specific settings and drivers to build
 case "$host_os" in
 linux*)
-DEFINES="$DEFINES -DHAVE_ALIAS"
 if test "x$enable_dri3" = xyes; then
 DEFINES="$DEFINES -DHAVE_DRI3"
 fi
@@ -1780,19 +1779,13 @@ if test "x$enable_dri" = xyes; then
 ;;
 esac
 ;;
-*freebsd* | dragonfly* | *netbsd* | openbsd*)
-DEFINES="$DEFINES -DHAVE_ALIAS"
-;;
-gnu*)
-DEFINES="$DEFINES -DHAVE_ALIAS"
-;;
 cygwin*)
 if test "x$with_dri_drivers" = "xyes"; then
 with_dri_drivers="swrast"
 fi
 ;;
 darwin*)
-DEFINES="$DEFINES -DGLX_ALIAS_UNSUPPORTED -DBUILDING_MESA"
+DEFINES="$DEFINES -DBUILDING_MESA"
 if test "x$with_dri_drivers" = "xyes"; then
 with_dri_drivers="swrast"
 fi
diff --git a/scons/gallium.py b/scons/gallium.py
index dc7fdce..767766f 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -323,10 +323,6 @@ def generate(env):
 'GLX_DIRECT_RENDERING',
 'GLX_INDIRECT_RENDERING',
 ]
-if env['platform'] in ('linux', 'freebsd'):
-cppdefines += ['HAVE_ALIAS']
-else:
-cppdefines += ['GLX_ALIAS_UNSUPPORTED']
 
 if env['platform'] in ('linux', 'darwin'):
 cppdefines += ['HAVE_XLOCALE_H']
diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
index 8980de3..7266b58 100644
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -2664,7 +2664,7 @@ _GLX_PUBLIC void (*glXGetProcAddressARB(const GLubyte * 
procName)) (void)
  * \sa glXGetProcAddressARB
  */
 _GLX_PUBLIC void (*glXGetProcAddress(const GLubyte * procName)) (void)
-#if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
+# ifdef HAVE_FUNC_ATTRIBUTE_ALIAS
 # if defined(USE_MGL_NAMESPACE)
__attribute__ ((alias("mglXGetProcAddressARB")));
 # else
@@ -2674,7 +2674,7 @@ _GLX_PUBLIC void (*glXGetProcAddress(const GLubyte * 
procName)) (void)
 {
return glXGetProcAddressARB(procName);
 }
-#endif /* __GNUC__ */
+#endif /* HAVE_FUNC_ATTRIBUTE_ALIA */
 
 
 #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL)
diff --git a/src/glx/glxextensions.h b/src/glx/glxextensions.h
index 6a67006..21ad02a 100644
--- a/src/glx/glxextensions.h
+++ b/src/glx/glxextensions.h
@@ -281,7 +281,7 @@ typedef void (*PFNGLXDISABLEEXTENSIONPROC) (const char 
*name);
 
 /* GLX_ALIAS should be used for functions with a non-void return type.
GLX_ALIAS_VOID is for functions with a void return type. */
-# if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
+# ifdef HAVE_FUNC_ATTRIBUTE_ALIAS
 /* GLX_ALIAS and GLX_ALIAS_VOID both expand to the macro GLX_ALIAS2. Using the
  * extra expansion means that the name mangling macros in glx_mangle.h will
  * apply before stringification, so the alias attribute will have a string like
@@ -300,7 +300,7 @@ typedef void (*PFNGLXDISABLEEXTENSIONPROC) (const char 
*name);
 #  define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func) \
void  real_func  proto_args  \
{ aliased_func args ; }
-# endif /* __GNUC__ */
+# endif /* HAVE_FUNC_ATTRIBUTE_ALIAS */
 
 #ifdef __cplusplus
 }
diff --git a/src/mapi/glapi/gen/glX_proto_size.py 
b/src/mapi/glapi/gen/glX_proto_size.py
index 3a1c554..d9f95a3 100644
--- a/src/mapi/glapi/gen/glX_proto_size.py
+++ b/src/mapi/glapi/gen/glX_proto_size.py
@@ -330,10 +330,7 @@ class PrintGlxSizeStubs_c(PrintGlxSizeStubs_common):
 self.printFastcall()
 print ''
 print ''
-print '#if defined(__CYGWIN__) || defined(__MINGW32__) || 
defined(GLX_USE_APPLEGL)'
-print '#  undef HAVE_ALIAS'
-print '#endif'
-print '#ifdef HAVE_ALIAS'
+print '#ifdef HAVE_FUNC_ATTRIBUTE_ALIAS'
 print '#  define ALIAS2(from,to) \\'
 print '_X_INTERNAL PURE FASTCALL GLint __gl ## from ## _size( 
GLenum e ) \\'
 print '__attribute__ ((alias( 

[Mesa-dev] [PATCH 2/7] configure: use standard check for attribure alias

2016-12-05 Thread Emil Velikov
From: Emil Velikov 

Currently we have two macros - HAVE_ALIAS and GLX_ALIAS_UNSUPPORTED.
To make it even better former of which is explicitly cleared in some
cases while not in others.

Clear all that up by using a single macro properly set during configure.

Signed-off-by: Emil Velikov 
---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index adca49d..f251764 100644
--- a/configure.ac
+++ b/configure.ac
@@ -238,6 +238,7 @@ AX_GCC_FUNC_ATTRIBUTE([unused])
 AX_GCC_FUNC_ATTRIBUTE([visibility])
 AX_GCC_FUNC_ATTRIBUTE([warn_unused_result])
 AX_GCC_FUNC_ATTRIBUTE([weak])
+AX_GCC_FUNC_ATTRIBUTE([alias])
 
 AM_CONDITIONAL([GEN_ASM_OFFSETS], test "x$GEN_ASM_OFFSETS" = xyes)
 
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 3/6] i915: Add support for xfb overflow on query buffer objects.

2016-12-05 Thread Ian Romanick
Minor nit: i965 in the subject. :)

On 12/05/2016 10:16 AM, Rafael Antognolli wrote:
> Enable getting the results of a transform feedback overflow query with a
> buffer object.
> 
> Signed-off-by: Rafael Antognolli 
> ---
>  src/mesa/drivers/dri/i965/hsw_queryobj.c | 108 
> +++
>  1 file changed, 108 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/hsw_queryobj.c 
> b/src/mesa/drivers/dri/i965/hsw_queryobj.c
> index 0da2c3d..0c9dbdc 100644
> --- a/src/mesa/drivers/dri/i965/hsw_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/hsw_queryobj.c
> @@ -187,6 +187,103 @@ gpr0_to_bool(struct brw_context *brw)
>  }
>  
>  static void
> +load_gen_written_data_to_regs(struct brw_context *brw,
> +  struct brw_query_object *query,
> +  int idx)
> +{
> +   int offset = idx * sizeof(uint64_t) * 4;
> +
> +   brw_load_register_mem64(brw,
> +   HSW_CS_GPR(1),
> +   query->bo,
> +   I915_GEM_DOMAIN_INSTRUCTION,
> +   I915_GEM_DOMAIN_INSTRUCTION,
> +   offset);
> +
> +   offset += sizeof(uint64_t);
> +   brw_load_register_mem64(brw,
> +   HSW_CS_GPR(2),
> +   query->bo,
> +   I915_GEM_DOMAIN_INSTRUCTION,
> +   I915_GEM_DOMAIN_INSTRUCTION,
> +   offset);
> +
> +   offset += sizeof(uint64_t);
> +   brw_load_register_mem64(brw,
> +   HSW_CS_GPR(3),
> +   query->bo,
> +   I915_GEM_DOMAIN_INSTRUCTION,
> +   I915_GEM_DOMAIN_INSTRUCTION,
> +   offset);
> +
> +   offset += sizeof(uint64_t);
> +   brw_load_register_mem64(brw,
> +   HSW_CS_GPR(4),
> +   query->bo,
> +   I915_GEM_DOMAIN_INSTRUCTION,
> +   I915_GEM_DOMAIN_INSTRUCTION,
> +   offset);
> +}
> +
> +/*
> + * R3 = R4 - R3;
> + * R1 = R2 - R1;
> + * R1 = R3 - R1;
> + * R0 = R0 | R1;
> + */
> +static void
> +calc_overflow_for_stream(struct brw_context *brw)
> +{
> +   static const uint32_t maths[] = {
> +  MI_MATH_ALU2(LOAD, SRCA, R4),
> +  MI_MATH_ALU2(LOAD, SRCB, R3),
> +  MI_MATH_ALU0(SUB),
> +  MI_MATH_ALU2(STORE, R3, ACCU),
> +  MI_MATH_ALU2(LOAD, SRCA, R2),
> +  MI_MATH_ALU2(LOAD, SRCB, R1),
> +  MI_MATH_ALU0(SUB),
> +  MI_MATH_ALU2(STORE, R1, ACCU),
> +  MI_MATH_ALU2(LOAD, SRCA, R3),
> +  MI_MATH_ALU2(LOAD, SRCB, R1),
> +  MI_MATH_ALU0(SUB),
> +  MI_MATH_ALU2(STORE, R1, ACCU),
> +  MI_MATH_ALU2(LOAD, SRCA, R1),
> +  MI_MATH_ALU2(LOAD, SRCB, R0),
> +  MI_MATH_ALU0(OR),
> +  MI_MATH_ALU2(STORE, R0, ACCU),
> +   };
> +
> +   BEGIN_BATCH(1 + ARRAY_SIZE(maths));
> +   OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2));
> +
> +   for (int m = 0; m < ARRAY_SIZE(maths); m++)
> +  OUT_BATCH(maths[m]);
> +
> +   ADVANCE_BATCH();
> +}
> +
> +static void
> +calc_overflow_to_gpr0(struct brw_context *brw, struct brw_query_object 
> *query,
> +   int count)
> +{
> +   brw_load_register_imm64(brw, HSW_CS_GPR(0), 0ull);
> +
> +   for (int i = 0; i < count; i++) {
> +  load_gen_written_data_to_regs(brw, query, i);
> +  calc_overflow_for_stream(brw);
> +   }
> +}
> +
> +static void
> +overflow_result_to_grp0(struct brw_context *brw,
> +struct brw_query_object *query,
> +int count)
> +{
> +   calc_overflow_to_gpr0(brw, query, count);
> +   gpr0_to_bool(brw);
> +}
> +
> +static void
>  hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query,
> struct gl_buffer_object *buf, intptr_t offset,
> GLenum pname, GLenum ptype)
> @@ -223,6 +320,11 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct 
> brw_query_object *query,
>I915_GEM_DOMAIN_INSTRUCTION,
>I915_GEM_DOMAIN_INSTRUCTION,
>0 * sizeof(uint64_t));
> +   } else if (query->Base.Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB
> +  || query->Base.Target == GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB) {
> +  /* Don't do anything in advance here, since the math for this is a 
> little
> +   * more complex.
> +   */
> } else {
>brw_load_register_mem64(brw,
>HSW_CS_GPR(1),
> @@ -274,6 +376,12 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct 
> brw_query_object *query,
> case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
>gpr0_to_bool(brw);
>break;
> +   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
> +  overflow_result_to_grp0(brw, query, 1);
> +  break;
> +   case 

Re: [Mesa-dev] [PATCH 00/50] GL_ARB_gpu_shader_int64... this time for sure!

2016-12-05 Thread Matt Turner

On 11/28, Ian Romanick wrote:

From: Ian Romanick 
   Patches 42 through 50 enable the extension on BDW+.


42-48 are

Reviewed-by: Matt Turner 

I don't understand the 64-bit CMP issue, so I'm booting a SKL to see how
fp64 works.


signature.asc
Description: Digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 1/6] mesa: Add types for ARB_transform_feedback_oveflow_query.

2016-12-05 Thread Rafael Antognolli
On Mon, Dec 05, 2016 at 11:01:54AM -0800, Ian Romanick wrote:
> Aside from my comments on patch 5, you're missing some changes to
> _mesa_BeginConditionalRender.  See
> https://cgit.freedesktop.org/~idr/mesa/commit/?h=ARB_transform_feedback_overflow_query=c17cf16caa18e44f9d53e05a7279066f8cc2f33c.

I did it on patch #4, but yes, it should have been merged with this
patch. Will do it on next iteration.

> On 12/05/2016 10:16 AM, Rafael Antognolli wrote:
> > Add some basic types and storage for the queries of this extension.
> > 
> > Signed-off-by: Rafael Antognolli 
> > ---
> >  src/mesa/main/mtypes.h  |  5 +
> >  src/mesa/main/queryobj.c| 21 +
> >  src/mesa/state_tracker/st_cb_queryobj.c |  6 ++
> >  3 files changed, 32 insertions(+)
> > 
> > diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> > index 459725b..dd6393d 100644
> > --- a/src/mesa/main/mtypes.h
> > +++ b/src/mesa/main/mtypes.h
> > @@ -3001,6 +3001,10 @@ struct gl_query_state
> > struct gl_query_object *PrimitivesGenerated[MAX_VERTEX_STREAMS];
> > struct gl_query_object *PrimitivesWritten[MAX_VERTEX_STREAMS];
> >  
> > +   /** GL_ARB_transform_feedback_overflow_query */
> > +   struct gl_query_object *TransformFeedbackOverflow[MAX_VERTEX_STREAMS];
> > +   struct gl_query_object *TransformFeedbackOverflowAny;
> > +
> > /** GL_ARB_timer_query */
> > struct gl_query_object *TimeElapsed;
> >  
> > @@ -3870,6 +3874,7 @@ struct gl_extensions
> > GLboolean ARB_transform_feedback2;
> > GLboolean ARB_transform_feedback3;
> > GLboolean ARB_transform_feedback_instanced;
> > +   GLboolean ARB_transform_feedback_overflow_query;
> > GLboolean ARB_uniform_buffer_object;
> > GLboolean ARB_vertex_attrib_64bit;
> > GLboolean ARB_vertex_program;
> > diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c
> > index 1fa0279..e4edb51 100644
> > --- a/src/mesa/main/queryobj.c
> > +++ b/src/mesa/main/queryobj.c
> > @@ -197,6 +197,16 @@ get_query_binding_point(struct gl_context *ctx, GLenum 
> > target, GLuint index)
> >   return >Query.PrimitivesWritten[index];
> >else
> >   return NULL;
> > +   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
> > +  if (ctx->Extensions.ARB_transform_feedback_overflow_query)
> > + return >Query.TransformFeedbackOverflow[index];
> > +  else
> > + return NULL;
> > +   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
> > +  if (ctx->Extensions.ARB_transform_feedback_overflow_query)
> > + return >Query.TransformFeedbackOverflowAny;
> > +  else
> > + return NULL;
> >  
> > case GL_VERTICES_SUBMITTED_ARB:
> > case GL_PRIMITIVES_SUBMITTED_ARB:
> > @@ -293,6 +303,8 @@ _mesa_CreateQueries(GLenum target, GLsizei n, GLuint 
> > *ids)
> > case GL_TIMESTAMP:
> > case GL_PRIMITIVES_GENERATED:
> > case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
> > +   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
> > +   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
> >break;
> > default:
> >_mesa_error(ctx, GL_INVALID_ENUM, "glCreateQueries(invalid target = 
> > %s)",
> > @@ -368,6 +380,7 @@ query_error_check_index(struct gl_context *ctx, GLenum 
> > target, GLuint index)
> > switch (target) {
> > case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
> > case GL_PRIMITIVES_GENERATED:
> > +   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
> >if (index >= ctx->Const.MaxVertexStreams) {
> >   _mesa_error(ctx, GL_INVALID_VALUE,
> >   "glBeginQueryIndexed(index>=MaxVertexStreams)");
> > @@ -677,6 +690,14 @@ _mesa_GetQueryIndexediv(GLenum target, GLuint index, 
> > GLenum pname,
> >   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
> >  *params = ctx->Const.QueryCounterBits.PrimitivesWritten;
> >  break;
> > + case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
> > + case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
> > +/* The minimum value of this is 1 if it's nonzero, and the 
> > value
> > + * is only ever GL_TRUE or GL_FALSE, so no sense in reporting 
> > more
> > + * bits.
> > + */
> > +*params = 1;
> > +break;
> >   case GL_VERTICES_SUBMITTED_ARB:
> >  *params = ctx->Const.QueryCounterBits.VerticesSubmitted;
> >  break;
> > diff --git a/src/mesa/state_tracker/st_cb_queryobj.c 
> > b/src/mesa/state_tracker/st_cb_queryobj.c
> > index 2489676..b1ac2aa 100644
> > --- a/src/mesa/state_tracker/st_cb_queryobj.c
> > +++ b/src/mesa/state_tracker/st_cb_queryobj.c
> > @@ -114,6 +114,12 @@ st_BeginQuery(struct gl_context *ctx, struct 
> > gl_query_object *q)
> > case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
> >type = PIPE_QUERY_PRIMITIVES_EMITTED;
> >break;
> > +   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
> > +  type = 

Re: [Mesa-dev] [RFC 5/6] i965: Enable ARB_transform_feedback_overflow_query.

2016-12-05 Thread Rafael Antognolli
On Mon, Dec 05, 2016 at 10:57:16AM -0800, Ian Romanick wrote:
> On 12/05/2016 10:16 AM, Rafael Antognolli wrote:
> > Signed-off-by: Rafael Antognolli 
> > ---
> >  src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
> >  src/mesa/main/extensions_table.h | 1 +
> >  src/mesa/main/get.c  | 1 +
> >  3 files changed, 3 insertions(+)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
> > b/src/mesa/drivers/dri/i965/intel_extensions.c
> > index 66079b5..ea56220 100644
> > --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> > +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> > @@ -320,6 +320,7 @@ intelInitExtensions(struct gl_context *ctx)
> >ctx->Extensions.EXT_framebuffer_multisample = true;
> >ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
> >ctx->Extensions.EXT_transform_feedback = true;
> > +  ctx->Extensions.ARB_transform_feedback_overflow_query = true;
> >ctx->Extensions.OES_depth_texture_cube_map = true;
> >ctx->Extensions.OES_sample_variables = true;
> 
> This would usually be in a patch just modifies code in
> src/mesa/drivers/dri/i965 or a patch by itself.  Also, update
> docs/features.txt and docs/relnotes/.

Ack, will do it.

> >  
> > diff --git a/src/mesa/main/extensions_table.h 
> > b/src/mesa/main/extensions_table.h
> > index d3ec551..8774d14 100644
> > --- a/src/mesa/main/extensions_table.h
> > +++ b/src/mesa/main/extensions_table.h
> > @@ -160,6 +160,7 @@ EXT(ARB_timer_query , 
> > ARB_timer_query
> >  EXT(ARB_transform_feedback2 , ARB_transform_feedback2  
> >   , GLL, GLC,  x ,  x , 2010)
> >  EXT(ARB_transform_feedback3 , ARB_transform_feedback3  
> >   , GLL, GLC,  x ,  x , 2010)
> >  EXT(ARB_transform_feedback_instanced, 
> > ARB_transform_feedback_instanced   , GLL, GLC,  x ,  x , 2011)
> > +EXT(ARB_transform_feedback_overflow_query   , 
> > ARB_transform_feedback_overflow_query  , GLL, GLC,  x ,  x , 2016)
> >  EXT(ARB_transpose_matrix, dummy_true   
> >   , GLL,  x ,  x ,  x , 1999)
> >  EXT(ARB_uniform_buffer_object   , ARB_uniform_buffer_object
> >   , GLL, GLC,  x ,  x , 2009)
> >  EXT(ARB_vertex_array_bgra   , EXT_vertex_array_bgra
> >   , GLL, GLC,  x ,  x , 2008)
> 
> This change should be added in the same patch that adds
> ARB_transform_feedback_overflow_query in mtypes.h.  See
> https://cgit.freedesktop.org/~idr/mesa/commit/?h=ARB_transform_feedback_overflow_query=7eaf174d7a28c52b717a38896c1e782302fcd87f.

OK.

> > diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
> > index 5f5e76a..f103018 100644
> > --- a/src/mesa/main/get.c
> > +++ b/src/mesa/main/get.c
> > @@ -455,6 +455,7 @@ EXTRA_EXT(ARB_sync);
> >  EXTRA_EXT(ARB_vertex_shader);
> >  EXTRA_EXT(EXT_transform_feedback);
> >  EXTRA_EXT(ARB_transform_feedback3);
> > +EXTRA_EXT(ARB_transform_feedback_overflow_query);
> >  EXTRA_EXT(EXT_pixel_buffer_object);
> >  EXTRA_EXT(ARB_vertex_program);
> >  EXTRA_EXT2(NV_point_sprite, ARB_point_sprite);
> > 
> 
> Does anything use this?

Hmm... no, I admit I was just copying code from other extensions. It
doesn't look like it's needed for this one. Sorry for this one.

Thanks,
Rafael
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] configure: cleanup GLX_USE_TLS handling

2016-12-05 Thread Emil Velikov
From: Emil Velikov 

Mesa requires ax_pthread_ok = yes, thus we can fold/rewrite the
conditional to follow the more common "if test" pattern.

No functional change intended.

Signed-off-by: Emil Velikov 
---
 configure.ac | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 52bd9a6..c5361be 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1710,8 +1710,9 @@ AC_ARG_ENABLE([glx-tls],
 [GLX_USE_TLS=yes])
 AC_SUBST(GLX_TLS, ${GLX_USE_TLS})
 
-AS_IF([test "x$GLX_USE_TLS" = xyes -a "x$ax_pthread_ok" = xyes],
-  [DEFINES="${DEFINES} -DGLX_USE_TLS"])
+if test "x$GLX_USE_TLS" = xyes; then
+DEFINES="$DEFINES -DGLX_USE_TLS"
+fi
 
 dnl Read-only text section on x86 hardened platforms
 AC_ARG_ENABLE([glx-read-only-text],
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] configure: enable glx-tls by default

2016-12-05 Thread Emil Velikov
From: Emil Velikov 

In the (not too) distant future we'd want to remove this option and
effectively drop the other codepath(s) we have in our dispatch.

Linux distributions have been using --enable-glx-tls for a number of
years. Some/most BSD platforms still don't support this, yet this should
serve as an encouragement to move things forwards.

Cc: Jean-Sébastien Pédron 
Cc: Jonathan Gray 
Cc: mesa-maintain...@lists.freedesktop.org
Signed-off-by: Emil Velikov 
---
Gents, please forward to the respective lists and fellow maintainers. Be
that on your or brother/sister distro.

Joining up on mesa-maintainers will be appreciated.
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index adca49d..52bd9a6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1707,7 +1707,7 @@ AC_ARG_ENABLE([glx-tls],
 [AS_HELP_STRING([--enable-glx-tls],
 [enable TLS support in GLX @<:@default=disabled@:>@])],
 [GLX_USE_TLS="$enableval"],
-[GLX_USE_TLS=no])
+[GLX_USE_TLS=yes])
 AC_SUBST(GLX_TLS, ${GLX_USE_TLS})
 
 AS_IF([test "x$GLX_USE_TLS" = xyes -a "x$ax_pthread_ok" = xyes],
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 1/6] mesa: Add types for ARB_transform_feedback_oveflow_query.

2016-12-05 Thread Ian Romanick
Aside from my comments on patch 5, you're missing some changes to
_mesa_BeginConditionalRender.  See
https://cgit.freedesktop.org/~idr/mesa/commit/?h=ARB_transform_feedback_overflow_query=c17cf16caa18e44f9d53e05a7279066f8cc2f33c.

On 12/05/2016 10:16 AM, Rafael Antognolli wrote:
> Add some basic types and storage for the queries of this extension.
> 
> Signed-off-by: Rafael Antognolli 
> ---
>  src/mesa/main/mtypes.h  |  5 +
>  src/mesa/main/queryobj.c| 21 +
>  src/mesa/state_tracker/st_cb_queryobj.c |  6 ++
>  3 files changed, 32 insertions(+)
> 
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 459725b..dd6393d 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -3001,6 +3001,10 @@ struct gl_query_state
> struct gl_query_object *PrimitivesGenerated[MAX_VERTEX_STREAMS];
> struct gl_query_object *PrimitivesWritten[MAX_VERTEX_STREAMS];
>  
> +   /** GL_ARB_transform_feedback_overflow_query */
> +   struct gl_query_object *TransformFeedbackOverflow[MAX_VERTEX_STREAMS];
> +   struct gl_query_object *TransformFeedbackOverflowAny;
> +
> /** GL_ARB_timer_query */
> struct gl_query_object *TimeElapsed;
>  
> @@ -3870,6 +3874,7 @@ struct gl_extensions
> GLboolean ARB_transform_feedback2;
> GLboolean ARB_transform_feedback3;
> GLboolean ARB_transform_feedback_instanced;
> +   GLboolean ARB_transform_feedback_overflow_query;
> GLboolean ARB_uniform_buffer_object;
> GLboolean ARB_vertex_attrib_64bit;
> GLboolean ARB_vertex_program;
> diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c
> index 1fa0279..e4edb51 100644
> --- a/src/mesa/main/queryobj.c
> +++ b/src/mesa/main/queryobj.c
> @@ -197,6 +197,16 @@ get_query_binding_point(struct gl_context *ctx, GLenum 
> target, GLuint index)
>   return >Query.PrimitivesWritten[index];
>else
>   return NULL;
> +   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
> +  if (ctx->Extensions.ARB_transform_feedback_overflow_query)
> + return >Query.TransformFeedbackOverflow[index];
> +  else
> + return NULL;
> +   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
> +  if (ctx->Extensions.ARB_transform_feedback_overflow_query)
> + return >Query.TransformFeedbackOverflowAny;
> +  else
> + return NULL;
>  
> case GL_VERTICES_SUBMITTED_ARB:
> case GL_PRIMITIVES_SUBMITTED_ARB:
> @@ -293,6 +303,8 @@ _mesa_CreateQueries(GLenum target, GLsizei n, GLuint *ids)
> case GL_TIMESTAMP:
> case GL_PRIMITIVES_GENERATED:
> case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
> +   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
> +   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
>break;
> default:
>_mesa_error(ctx, GL_INVALID_ENUM, "glCreateQueries(invalid target = 
> %s)",
> @@ -368,6 +380,7 @@ query_error_check_index(struct gl_context *ctx, GLenum 
> target, GLuint index)
> switch (target) {
> case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
> case GL_PRIMITIVES_GENERATED:
> +   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
>if (index >= ctx->Const.MaxVertexStreams) {
>   _mesa_error(ctx, GL_INVALID_VALUE,
>   "glBeginQueryIndexed(index>=MaxVertexStreams)");
> @@ -677,6 +690,14 @@ _mesa_GetQueryIndexediv(GLenum target, GLuint index, 
> GLenum pname,
>   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
>  *params = ctx->Const.QueryCounterBits.PrimitivesWritten;
>  break;
> + case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
> + case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
> +/* The minimum value of this is 1 if it's nonzero, and the value
> + * is only ever GL_TRUE or GL_FALSE, so no sense in reporting 
> more
> + * bits.
> + */
> +*params = 1;
> +break;
>   case GL_VERTICES_SUBMITTED_ARB:
>  *params = ctx->Const.QueryCounterBits.VerticesSubmitted;
>  break;
> diff --git a/src/mesa/state_tracker/st_cb_queryobj.c 
> b/src/mesa/state_tracker/st_cb_queryobj.c
> index 2489676..b1ac2aa 100644
> --- a/src/mesa/state_tracker/st_cb_queryobj.c
> +++ b/src/mesa/state_tracker/st_cb_queryobj.c
> @@ -114,6 +114,12 @@ st_BeginQuery(struct gl_context *ctx, struct 
> gl_query_object *q)
> case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
>type = PIPE_QUERY_PRIMITIVES_EMITTED;
>break;
> +   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
> +  type = PIPE_QUERY_SO_OVERFLOW_PREDICATE;
> +  break;
> +   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
> +  type = PIPE_QUERY_SO_OVERFLOW_PREDICATE;
> +  break;
> case GL_TIME_ELAPSED:
>if (st->has_time_elapsed)
>   type = PIPE_QUERY_TIME_ELAPSED;
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

Re: [Mesa-dev] [RFC 5/6] i965: Enable ARB_transform_feedback_overflow_query.

2016-12-05 Thread Ian Romanick
On 12/05/2016 10:16 AM, Rafael Antognolli wrote:
> Signed-off-by: Rafael Antognolli 
> ---
>  src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
>  src/mesa/main/extensions_table.h | 1 +
>  src/mesa/main/get.c  | 1 +
>  3 files changed, 3 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
> b/src/mesa/drivers/dri/i965/intel_extensions.c
> index 66079b5..ea56220 100644
> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> @@ -320,6 +320,7 @@ intelInitExtensions(struct gl_context *ctx)
>ctx->Extensions.EXT_framebuffer_multisample = true;
>ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
>ctx->Extensions.EXT_transform_feedback = true;
> +  ctx->Extensions.ARB_transform_feedback_overflow_query = true;
>ctx->Extensions.OES_depth_texture_cube_map = true;
>ctx->Extensions.OES_sample_variables = true;

This would usually be in a patch just modifies code in
src/mesa/drivers/dri/i965 or a patch by itself.  Also, update
docs/features.txt and docs/relnotes/.

>  
> diff --git a/src/mesa/main/extensions_table.h 
> b/src/mesa/main/extensions_table.h
> index d3ec551..8774d14 100644
> --- a/src/mesa/main/extensions_table.h
> +++ b/src/mesa/main/extensions_table.h
> @@ -160,6 +160,7 @@ EXT(ARB_timer_query , 
> ARB_timer_query
>  EXT(ARB_transform_feedback2 , ARB_transform_feedback2
> , GLL, GLC,  x ,  x , 2010)
>  EXT(ARB_transform_feedback3 , ARB_transform_feedback3
> , GLL, GLC,  x ,  x , 2010)
>  EXT(ARB_transform_feedback_instanced, 
> ARB_transform_feedback_instanced   , GLL, GLC,  x ,  x , 2011)
> +EXT(ARB_transform_feedback_overflow_query   , 
> ARB_transform_feedback_overflow_query  , GLL, GLC,  x ,  x , 2016)
>  EXT(ARB_transpose_matrix, dummy_true 
> , GLL,  x ,  x ,  x , 1999)
>  EXT(ARB_uniform_buffer_object   , ARB_uniform_buffer_object  
> , GLL, GLC,  x ,  x , 2009)
>  EXT(ARB_vertex_array_bgra   , EXT_vertex_array_bgra  
> , GLL, GLC,  x ,  x , 2008)

This change should be added in the same patch that adds
ARB_transform_feedback_overflow_query in mtypes.h.  See
https://cgit.freedesktop.org/~idr/mesa/commit/?h=ARB_transform_feedback_overflow_query=7eaf174d7a28c52b717a38896c1e782302fcd87f.

> diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
> index 5f5e76a..f103018 100644
> --- a/src/mesa/main/get.c
> +++ b/src/mesa/main/get.c
> @@ -455,6 +455,7 @@ EXTRA_EXT(ARB_sync);
>  EXTRA_EXT(ARB_vertex_shader);
>  EXTRA_EXT(EXT_transform_feedback);
>  EXTRA_EXT(ARB_transform_feedback3);
> +EXTRA_EXT(ARB_transform_feedback_overflow_query);
>  EXTRA_EXT(EXT_pixel_buffer_object);
>  EXTRA_EXT(ARB_vertex_program);
>  EXTRA_EXT2(NV_point_sprite, ARB_point_sprite);
> 

Does anything use this?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i915: Stop claiming GL 2.1 support.

2016-12-05 Thread Ian Romanick
On 12/02/2016 05:04 PM, Matt Turner wrote:
> On Fri, Dec 2, 2016 at 12:22 PM, Emil Velikov  
> wrote:
>> On 2 December 2016 at 19:49, Matt Turner  wrote:
>>> A user reporting an unrelated bug (98964) said that he has to set
>>> MESA_GL_VERSION_OVERRIDE=1.4 when running Chromium otherwise it's too
>>> slow. I presume that it's attempting to use GL 2.0/2.1 features that
>>> aren't hardware-supported on i915.
>> Ubuntu has been carrying a slightly different patch for a while [1].
>> JFYI - I cannot comment which one is the better option.
>>
>> -Emil
>>
>> [1] 
>> https://anonscm.debian.org/git/pkg-xorg/lib/mesa.git/tree/debian/patches/i915-dont-default-to-2.1.patch?h=ubuntu
> 
> Yeah, reverting the patch directly is probably better. Thanks for the heads 
> up.

I don't think Matt's original patch is enough because the driver will
still advertise 2.1 due to all the extensions being enabled.

At the same time, I think we should leave ES 2.0 enabled.  Many of the
limitations in ES 2.0 are specifically so that parts like i915 can play.

> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: use getHostCPUFeatures on x86/llvm-4.0+.

2016-12-05 Thread Roland Scheidegger
Am 05.12.2016 um 18:39 schrieb Tim Rowley:
> Use llvm provided API based on cpuid rather than our own
> manually mantained list of mattr enabling/disabling.
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 15 +++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
> b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> index a68428d..21d9e15 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> @@ -542,6 +542,20 @@ 
> lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
> llvm::SmallVector MAttrs;
>  
>  #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
> +#if HAVE_LLVM >= 0x0400
> +   /* llvm-3.7+ implements sys::getHostCPUFeatures for x86,
> +* which allows us to enable/disable code generation based
> +* on the results of cpuid.
> +*/
> +   llvm::StringMap features;
> +   llvm::sys::getHostCPUFeatures(features);
> +
> +   for (StringMapIterator f = features.begin();
> +f != features.end();
> +++f) {
> +  MAttrs.push_back(((*f).second ? "+" : "-") + (*f).first().str());
> +   }
> +#else
> /*
>  * We need to unset attributes because sometimes LLVM mistakenly assumes
>  * certain features are present given the processor name.
> @@ -596,6 +610,7 @@ 
> lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
> MAttrs.push_back("-avx512vl");
>  #endif
>  #endif
> +#endif
>  
>  #if defined(PIPE_ARCH_PPC)
> MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
> 

Reviewed-by: Roland Scheidegger 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/6] gallium: add pipe_screen::resource_changed

2016-12-05 Thread Roland Scheidegger
Am 05.12.2016 um 16:49 schrieb Philipp Zabel:
> Add a hook to tell drivers that an imported resource may have changed
> and they need to update their internal derived resources.
> 
> Signed-off-by: Philipp Zabel 
> ---
> Changes since v1:
>  - Clarified intended of pipe_screen::resource_changed
> ---
>  src/gallium/include/pipe/p_screen.h | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/src/gallium/include/pipe/p_screen.h 
> b/src/gallium/include/pipe/p_screen.h
> index 255647e..0d3e4b6 100644
> --- a/src/gallium/include/pipe/p_screen.h
> +++ b/src/gallium/include/pipe/p_screen.h
> @@ -224,6 +224,14 @@ struct pipe_screen {
> struct winsys_handle *handle,
> unsigned usage);
>  
> +   /**
> +* Mark the resource as changed so derived internal resources will be
> +* recreated on next use.
> +*
> +* This is necessary when reimporting external images that can't be 
> directly
> +* used as texture sampler source, to avoid sampling from old copies.
> +*/
> +   void (*resource_changed)(struct pipe_screen *, struct pipe_resource *pt);
>  
> void (*resource_destroy)(struct pipe_screen *,
>   struct pipe_resource *pt);
> 

Could you also add that src/gallium/docs/source/screen.rst?


Roland

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/50] GL_ARB_gpu_shader_int64... this time for sure!

2016-12-05 Thread Matt Turner
On Wed, Nov 30, 2016 at 1:11 PM, Matt Turner  wrote:
> On 11/28, Ian Romanick wrote:
>>
>> From: Ian Romanick 
>>
>> I believe that I have addressed all of the review feedback from the
>> previous iteration.  Many of the patches have been reviewed, and they
>> should be ready to go.
>>
>> Patches marked with "vN" in the subject have changed in a non-trivial
>> way since last being sent to he list.
>>
>> Several patches that have not changed need review:
>>
>>Patches 23 through 33 add lowering passes for 64-bit operations.
>
>
> I sent a few comments, and I cannot claim to have verified the division
> routine, but the rest are
>
> Reviewed-by: Matt Turner 

Having thought more about this, I think doing this in GLSL IR is not
the way we should be going.

Presumably we're going to want to support int64 in SPIR-V, and that
necessitates lowering these operations in NIR.

I don't think it's a good idea to do this outside of NIR.

Thoughts?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 1/6] mesa: Add types for ARB_transform_feedback_oveflow_query.

2016-12-05 Thread Rafael Antognolli
Add some basic types and storage for the queries of this extension.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/main/mtypes.h  |  5 +
 src/mesa/main/queryobj.c| 21 +
 src/mesa/state_tracker/st_cb_queryobj.c |  6 ++
 3 files changed, 32 insertions(+)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 459725b..dd6393d 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3001,6 +3001,10 @@ struct gl_query_state
struct gl_query_object *PrimitivesGenerated[MAX_VERTEX_STREAMS];
struct gl_query_object *PrimitivesWritten[MAX_VERTEX_STREAMS];
 
+   /** GL_ARB_transform_feedback_overflow_query */
+   struct gl_query_object *TransformFeedbackOverflow[MAX_VERTEX_STREAMS];
+   struct gl_query_object *TransformFeedbackOverflowAny;
+
/** GL_ARB_timer_query */
struct gl_query_object *TimeElapsed;
 
@@ -3870,6 +3874,7 @@ struct gl_extensions
GLboolean ARB_transform_feedback2;
GLboolean ARB_transform_feedback3;
GLboolean ARB_transform_feedback_instanced;
+   GLboolean ARB_transform_feedback_overflow_query;
GLboolean ARB_uniform_buffer_object;
GLboolean ARB_vertex_attrib_64bit;
GLboolean ARB_vertex_program;
diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c
index 1fa0279..e4edb51 100644
--- a/src/mesa/main/queryobj.c
+++ b/src/mesa/main/queryobj.c
@@ -197,6 +197,16 @@ get_query_binding_point(struct gl_context *ctx, GLenum 
target, GLuint index)
  return >Query.PrimitivesWritten[index];
   else
  return NULL;
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+  if (ctx->Extensions.ARB_transform_feedback_overflow_query)
+ return >Query.TransformFeedbackOverflow[index];
+  else
+ return NULL;
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
+  if (ctx->Extensions.ARB_transform_feedback_overflow_query)
+ return >Query.TransformFeedbackOverflowAny;
+  else
+ return NULL;
 
case GL_VERTICES_SUBMITTED_ARB:
case GL_PRIMITIVES_SUBMITTED_ARB:
@@ -293,6 +303,8 @@ _mesa_CreateQueries(GLenum target, GLsizei n, GLuint *ids)
case GL_TIMESTAMP:
case GL_PRIMITIVES_GENERATED:
case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
   break;
default:
   _mesa_error(ctx, GL_INVALID_ENUM, "glCreateQueries(invalid target = %s)",
@@ -368,6 +380,7 @@ query_error_check_index(struct gl_context *ctx, GLenum 
target, GLuint index)
switch (target) {
case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
case GL_PRIMITIVES_GENERATED:
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
   if (index >= ctx->Const.MaxVertexStreams) {
  _mesa_error(ctx, GL_INVALID_VALUE,
  "glBeginQueryIndexed(index>=MaxVertexStreams)");
@@ -677,6 +690,14 @@ _mesa_GetQueryIndexediv(GLenum target, GLuint index, 
GLenum pname,
  case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
 *params = ctx->Const.QueryCounterBits.PrimitivesWritten;
 break;
+ case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+ case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
+/* The minimum value of this is 1 if it's nonzero, and the value
+ * is only ever GL_TRUE or GL_FALSE, so no sense in reporting more
+ * bits.
+ */
+*params = 1;
+break;
  case GL_VERTICES_SUBMITTED_ARB:
 *params = ctx->Const.QueryCounterBits.VerticesSubmitted;
 break;
diff --git a/src/mesa/state_tracker/st_cb_queryobj.c 
b/src/mesa/state_tracker/st_cb_queryobj.c
index 2489676..b1ac2aa 100644
--- a/src/mesa/state_tracker/st_cb_queryobj.c
+++ b/src/mesa/state_tracker/st_cb_queryobj.c
@@ -114,6 +114,12 @@ st_BeginQuery(struct gl_context *ctx, struct 
gl_query_object *q)
case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
   type = PIPE_QUERY_PRIMITIVES_EMITTED;
   break;
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+  type = PIPE_QUERY_SO_OVERFLOW_PREDICATE;
+  break;
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
+  type = PIPE_QUERY_SO_OVERFLOW_PREDICATE;
+  break;
case GL_TIME_ELAPSED:
   if (st->has_time_elapsed)
  type = PIPE_QUERY_TIME_ELAPSED;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 3/6] i915: Add support for xfb overflow on query buffer objects.

2016-12-05 Thread Rafael Antognolli
Enable getting the results of a transform feedback overflow query with a
buffer object.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/hsw_queryobj.c | 108 +++
 1 file changed, 108 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/hsw_queryobj.c 
b/src/mesa/drivers/dri/i965/hsw_queryobj.c
index 0da2c3d..0c9dbdc 100644
--- a/src/mesa/drivers/dri/i965/hsw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/hsw_queryobj.c
@@ -187,6 +187,103 @@ gpr0_to_bool(struct brw_context *brw)
 }
 
 static void
+load_gen_written_data_to_regs(struct brw_context *brw,
+  struct brw_query_object *query,
+  int idx)
+{
+   int offset = idx * sizeof(uint64_t) * 4;
+
+   brw_load_register_mem64(brw,
+   HSW_CS_GPR(1),
+   query->bo,
+   I915_GEM_DOMAIN_INSTRUCTION,
+   I915_GEM_DOMAIN_INSTRUCTION,
+   offset);
+
+   offset += sizeof(uint64_t);
+   brw_load_register_mem64(brw,
+   HSW_CS_GPR(2),
+   query->bo,
+   I915_GEM_DOMAIN_INSTRUCTION,
+   I915_GEM_DOMAIN_INSTRUCTION,
+   offset);
+
+   offset += sizeof(uint64_t);
+   brw_load_register_mem64(brw,
+   HSW_CS_GPR(3),
+   query->bo,
+   I915_GEM_DOMAIN_INSTRUCTION,
+   I915_GEM_DOMAIN_INSTRUCTION,
+   offset);
+
+   offset += sizeof(uint64_t);
+   brw_load_register_mem64(brw,
+   HSW_CS_GPR(4),
+   query->bo,
+   I915_GEM_DOMAIN_INSTRUCTION,
+   I915_GEM_DOMAIN_INSTRUCTION,
+   offset);
+}
+
+/*
+ * R3 = R4 - R3;
+ * R1 = R2 - R1;
+ * R1 = R3 - R1;
+ * R0 = R0 | R1;
+ */
+static void
+calc_overflow_for_stream(struct brw_context *brw)
+{
+   static const uint32_t maths[] = {
+  MI_MATH_ALU2(LOAD, SRCA, R4),
+  MI_MATH_ALU2(LOAD, SRCB, R3),
+  MI_MATH_ALU0(SUB),
+  MI_MATH_ALU2(STORE, R3, ACCU),
+  MI_MATH_ALU2(LOAD, SRCA, R2),
+  MI_MATH_ALU2(LOAD, SRCB, R1),
+  MI_MATH_ALU0(SUB),
+  MI_MATH_ALU2(STORE, R1, ACCU),
+  MI_MATH_ALU2(LOAD, SRCA, R3),
+  MI_MATH_ALU2(LOAD, SRCB, R1),
+  MI_MATH_ALU0(SUB),
+  MI_MATH_ALU2(STORE, R1, ACCU),
+  MI_MATH_ALU2(LOAD, SRCA, R1),
+  MI_MATH_ALU2(LOAD, SRCB, R0),
+  MI_MATH_ALU0(OR),
+  MI_MATH_ALU2(STORE, R0, ACCU),
+   };
+
+   BEGIN_BATCH(1 + ARRAY_SIZE(maths));
+   OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2));
+
+   for (int m = 0; m < ARRAY_SIZE(maths); m++)
+  OUT_BATCH(maths[m]);
+
+   ADVANCE_BATCH();
+}
+
+static void
+calc_overflow_to_gpr0(struct brw_context *brw, struct brw_query_object *query,
+   int count)
+{
+   brw_load_register_imm64(brw, HSW_CS_GPR(0), 0ull);
+
+   for (int i = 0; i < count; i++) {
+  load_gen_written_data_to_regs(brw, query, i);
+  calc_overflow_for_stream(brw);
+   }
+}
+
+static void
+overflow_result_to_grp0(struct brw_context *brw,
+struct brw_query_object *query,
+int count)
+{
+   calc_overflow_to_gpr0(brw, query, count);
+   gpr0_to_bool(brw);
+}
+
+static void
 hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query,
struct gl_buffer_object *buf, intptr_t offset,
GLenum pname, GLenum ptype)
@@ -223,6 +320,11 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct 
brw_query_object *query,
   I915_GEM_DOMAIN_INSTRUCTION,
   I915_GEM_DOMAIN_INSTRUCTION,
   0 * sizeof(uint64_t));
+   } else if (query->Base.Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB
+  || query->Base.Target == GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB) {
+  /* Don't do anything in advance here, since the math for this is a little
+   * more complex.
+   */
} else {
   brw_load_register_mem64(brw,
   HSW_CS_GPR(1),
@@ -274,6 +376,12 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct 
brw_query_object *query,
case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
   gpr0_to_bool(brw);
   break;
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+  overflow_result_to_grp0(brw, query, 1);
+  break;
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
+  overflow_result_to_grp0(brw, query, MAX_VERTEX_STREAMS);
+  break;
}
 }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 5/6] i965: Enable ARB_transform_feedback_overflow_query.

2016-12-05 Thread Rafael Antognolli
Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
 src/mesa/main/extensions_table.h | 1 +
 src/mesa/main/get.c  | 1 +
 3 files changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 66079b5..ea56220 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -320,6 +320,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.EXT_framebuffer_multisample = true;
   ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
   ctx->Extensions.EXT_transform_feedback = true;
+  ctx->Extensions.ARB_transform_feedback_overflow_query = true;
   ctx->Extensions.OES_depth_texture_cube_map = true;
   ctx->Extensions.OES_sample_variables = true;
 
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index d3ec551..8774d14 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -160,6 +160,7 @@ EXT(ARB_timer_query , 
ARB_timer_query
 EXT(ARB_transform_feedback2 , ARB_transform_feedback2  
  , GLL, GLC,  x ,  x , 2010)
 EXT(ARB_transform_feedback3 , ARB_transform_feedback3  
  , GLL, GLC,  x ,  x , 2010)
 EXT(ARB_transform_feedback_instanced, ARB_transform_feedback_instanced 
  , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_transform_feedback_overflow_query   , 
ARB_transform_feedback_overflow_query  , GLL, GLC,  x ,  x , 2016)
 EXT(ARB_transpose_matrix, dummy_true   
  , GLL,  x ,  x ,  x , 1999)
 EXT(ARB_uniform_buffer_object   , ARB_uniform_buffer_object
  , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_vertex_array_bgra   , EXT_vertex_array_bgra
  , GLL, GLC,  x ,  x , 2008)
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 5f5e76a..f103018 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -455,6 +455,7 @@ EXTRA_EXT(ARB_sync);
 EXTRA_EXT(ARB_vertex_shader);
 EXTRA_EXT(EXT_transform_feedback);
 EXTRA_EXT(ARB_transform_feedback3);
+EXTRA_EXT(ARB_transform_feedback_overflow_query);
 EXTRA_EXT(EXT_pixel_buffer_object);
 EXTRA_EXT(ARB_vertex_program);
 EXTRA_EXT2(NV_point_sprite, ARB_point_sprite);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 2/6] i965: add plumbing for ARB_transform_feedback_overflow_query.

2016-12-05 Thread Rafael Antognolli
When querying for transform feedback overflow on one or all of the
streams, store information about number of generated and written
primitives. Then check whether generated == written.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/brw_queryobj.c   |  2 ++
 src/mesa/drivers/dri/i965/gen6_queryobj.c  | 58 ++
 src/mesa/drivers/dri/i965/gen7_sol_state.c |  9 +++--
 3 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c 
b/src/mesa/drivers/dri/i965/brw_queryobj.c
index dda17de..40b86a0 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -530,6 +530,8 @@ brw_is_query_pipelined(struct brw_query_object *query)
 
case GL_PRIMITIVES_GENERATED:
case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
case GL_VERTICES_SUBMITTED_ARB:
case GL_PRIMITIVES_SUBMITTED_ARB:
case GL_VERTEX_SHADER_INVOCATIONS_ARB:
diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c 
b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index bbd3c44..f46a6e6 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -98,6 +98,39 @@ write_xfb_primitives_written(struct brw_context *brw,
}
 }
 
+static void
+write_xfb_overflow_streams(struct gl_context *ctx,
+   drm_intel_bo *bo, int stream, int count,
+   int idx)
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   for (int i = 0; i < count; i++) {
+  write_xfb_primitives_written(brw, bo, stream + i, 4 * i + 0 + idx);
+  write_primitives_generated(brw, bo, stream + i, 4 * i + 2 + idx);
+   }
+
+   if (stream == 0)
+  ctx->NewDriverState |= BRW_NEW_RASTERIZER_DISCARD;
+}
+
+static bool
+check_xfb_overflow_streams(uint64_t *results, int count)
+{
+   bool overflow = false;
+
+   for (int i = 0; i < count; i++) {
+  uint64_t *result_i = [4 * i];
+
+  if ((result_i[3] - result_i[2]) != (result_i[1] - result_i[0])) {
+ overflow = true;
+ break;
+  }
+   }
+
+   return overflow;
+}
+
 static inline int
 pipeline_target_to_index(int target)
 {
@@ -225,6 +258,14 @@ gen6_queryobj_get_results(struct gl_context *ctx,
   query->Base.Result = results[1] - results[0];
   break;
 
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+  query->Base.Result = check_xfb_overflow_streams(results, 1);
+  break;
+
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
+  query->Base.Result = check_xfb_overflow_streams(results, 
MAX_VERTEX_STREAMS);
+  break;
+
case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
   query->Base.Result = (results[1] - results[0]);
   /* Implement the "WaDividePSInvocationCountBy4:HSW,BDW" workaround:
@@ -314,6 +355,14 @@ gen6_begin_query(struct gl_context *ctx, struct 
gl_query_object *q)
   write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 0);
   break;
 
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+  write_xfb_overflow_streams(ctx, query->bo, query->Base.Stream, 1, 0);
+  break;
+
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
+  write_xfb_overflow_streams(ctx, query->bo, 0, MAX_VERTEX_STREAMS, 0);
+  break;
+
case GL_VERTICES_SUBMITTED_ARB:
case GL_PRIMITIVES_SUBMITTED_ARB:
case GL_VERTEX_SHADER_INVOCATIONS_ARB:
@@ -368,6 +417,15 @@ gen6_end_query(struct gl_context *ctx, struct 
gl_query_object *q)
   write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 1);
   break;
 
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+  write_xfb_overflow_streams(ctx, query->bo, query->Base.Stream, 1, 1);
+  break;
+
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
+  write_xfb_overflow_streams(ctx, query->bo, 0, MAX_VERTEX_STREAMS, 1);
+  break;
+
+  /* calculate overflow here */
case GL_VERTICES_SUBMITTED_ARB:
case GL_PRIMITIVES_SUBMITTED_ARB:
case GL_VERTEX_SHADER_INVOCATIONS_ARB:
diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c 
b/src/mesa/drivers/dri/i965/gen7_sol_state.c
index 1775274..f63210b 100644
--- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
@@ -243,11 +243,14 @@ upload_3dstate_streamout(struct brw_context *brw, bool 
active,
 
   /* BRW_NEW_RASTERIZER_DISCARD */
   if (ctx->RasterDiscard) {
- if (!query_active(ctx->Query.PrimitivesGenerated[0])) {
+ if (!query_active(ctx->Query.PrimitivesGenerated[0]) &&
+ !query_active(ctx->Query.TransformFeedbackOverflow[0]) &&
+ !query_active(ctx->Query.TransformFeedbackOverflowAny)) {
 dw1 |= SO_RENDERING_DISABLE;
  } else {
-perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED "
-   "query active relies on the clipper.");
+

[Mesa-dev] [RFC 6/6] i965: Enable predicate support on gen >= 8.

2016-12-05 Thread Rafael Antognolli
Predication needs cmd parser only on gen7. For newer platforms, it
should be available without it.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index ea56220..c76e2e8 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -407,6 +407,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.OES_geometry_shader = true;
   ctx->Extensions.OES_texture_cube_map_array = true;
   ctx->Extensions.OES_viewport_array = true;
+  brw->predicate.supported = true;
}
 
if (brw->gen >= 9) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 4/6] i965: Add support for xfb overflow query on conditional render.

2016-12-05 Thread Rafael Antognolli
Enable the use of a transform feedback overflow query with
glBeginConditionalRender. The render commands will only execute if the
query is true (i.e. if there was an overflow).

Use ARB_conditional_render_inverted to change this behavior.

Signed-off-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/brw_conditional_render.c | 111 +++--
 src/mesa/main/condrender.c |   4 +-
 2 files changed, 104 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c 
b/src/mesa/drivers/dri/i965/brw_conditional_render.c
index 122a4ec..db2b722 100644
--- a/src/mesa/drivers/dri/i965/brw_conditional_render.c
+++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c
@@ -48,20 +48,83 @@ set_predicate_enable(struct brw_context *brw,
 }
 
 static void
-set_predicate_for_result(struct brw_context *brw,
- struct brw_query_object *query,
- bool inverted)
+set_predicate_for_overflow_query(struct brw_context *brw,
+ struct brw_query_object *query,
+ int stream_start, int count)
 {
-   int load_op;
+   /* R3 = R4 - R3; generated vertices
+* R1 = R2 - R1; written vertices
+* R1 = R3 - R1; there was an overflow on this stream
+* R0 = R0 | R1; accumulate whether there was any overflow
+*/
+   static const uint32_t maths[] = {
+  MI_MATH_ALU2(LOAD, SRCA, R4),
+  MI_MATH_ALU2(LOAD, SRCB, R3),
+  MI_MATH_ALU0(SUB),
+  MI_MATH_ALU2(STORE, R3, ACCU),
+  MI_MATH_ALU2(LOAD, SRCA, R2),
+  MI_MATH_ALU2(LOAD, SRCB, R1),
+  MI_MATH_ALU0(SUB),
+  MI_MATH_ALU2(STORE, R1, ACCU),
+  MI_MATH_ALU2(LOAD, SRCA, R3),
+  MI_MATH_ALU2(LOAD, SRCB, R1),
+  MI_MATH_ALU0(SUB),
+  MI_MATH_ALU2(STORE, R1, ACCU),
+  MI_MATH_ALU2(LOAD, SRCA, R1),
+  MI_MATH_ALU2(LOAD, SRCB, R0),
+  MI_MATH_ALU0(OR),
+  MI_MATH_ALU2(STORE, R0, ACCU),
+   };
 
-   assert(query->bo != NULL);
+   brw_load_register_imm64(brw, HSW_CS_GPR(0), 0ull);
 
-   /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
-* command when loading the values into the predicate source registers for
-* conditional rendering.
-*/
-   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
+   for (int i = stream_start; i < stream_start + count; i++) {
+  int offset = 32 * i;
+  brw_load_register_mem64(brw,
+  HSW_CS_GPR(1),
+  query->bo,
+  I915_GEM_DOMAIN_INSTRUCTION,
+  0, /* write domain */
+  offset);
+  offset += 8;
+  brw_load_register_mem64(brw,
+  HSW_CS_GPR(2),
+  query->bo,
+  I915_GEM_DOMAIN_INSTRUCTION,
+  0, /* write domain */
+  offset);
+  offset += 8;
+  brw_load_register_mem64(brw,
+  HSW_CS_GPR(3),
+  query->bo,
+  I915_GEM_DOMAIN_INSTRUCTION,
+  0, /* write domain */
+  offset);
+  offset += 8;
+  brw_load_register_mem64(brw,
+  HSW_CS_GPR(4),
+  query->bo,
+  I915_GEM_DOMAIN_INSTRUCTION,
+  0, /* write domain */
+  offset);
 
+  BEGIN_BATCH(1 + ARRAY_SIZE(maths));
+  OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2));
+
+  for (int m = 0; m < ARRAY_SIZE(maths); m++)
+ OUT_BATCH(maths[m]);
+
+  ADVANCE_BATCH();
+   }
+
+   brw_load_register_reg64(brw, HSW_CS_GPR(0), MI_PREDICATE_SRC0);
+   brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull);
+}
+
+static void
+set_predicate_for_occlusion_query(struct brw_context *brw,
+  struct brw_query_object *query)
+{
brw_load_register_mem64(brw,
MI_PREDICATE_SRC0,
query->bo,
@@ -74,6 +137,34 @@ set_predicate_for_result(struct brw_context *brw,
I915_GEM_DOMAIN_INSTRUCTION,
0, /* write domain */
8 /* offset */);
+}
+
+static void
+set_predicate_for_result(struct brw_context *brw,
+ struct brw_query_object *query,
+ bool inverted)
+{
+
+   int load_op;
+
+   assert(query->bo != NULL);
+
+   /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
+* command when loading the values into the predicate source registers for
+* conditional rendering.
+*/
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
+
+   switch (query->Base.Target) {
+   case 

[Mesa-dev] [RFC 0/6] Add support for ARB_transform_feedback_overflow_query.

2016-12-05 Thread Rafael Antognolli
This patch series is an initial version that implements support for
ARB_transform_feedback_overflow_query on i965. It does that by storing
the initial values of generated and written primitives on glBeginQuery,
and final values on glEndQuery, and then comparing those values.

I still need to write piglit tests for it, so it's not very tested, but
the basic functionality seems to work fine. Any feedback is welcome.

Thanks,
Rafael

Rafael Antognolli (6):
  mesa: Add types for ARB_transform_feedback_oveflow_query.
  i965: add plumbing for ARB_transform_feedback_overflow_query.
  i915: Add support for xfb overflow on query buffer objects.
  i965: Add support for xfb overflow query on conditional render.
  i965: Enable ARB_transform_feedback_overflow_query.
  i965: Enable predicate support on gen >= 8.

 src/mesa/drivers/dri/i965/brw_conditional_render.c | 111 +++--
 src/mesa/drivers/dri/i965/brw_queryobj.c   |   2 +
 src/mesa/drivers/dri/i965/gen6_queryobj.c  |  58 +++
 src/mesa/drivers/dri/i965/gen7_sol_state.c |   9 +-
 src/mesa/drivers/dri/i965/hsw_queryobj.c   | 108 
 src/mesa/drivers/dri/i965/intel_extensions.c   |   2 +
 src/mesa/main/condrender.c |   4 +-
 src/mesa/main/extensions_table.h   |   1 +
 src/mesa/main/get.c|   1 +
 src/mesa/main/mtypes.h |   5 +
 src/mesa/main/queryobj.c   |  21 
 src/mesa/state_tracker/st_cb_queryobj.c|   6 ++
 12 files changed, 314 insertions(+), 14 deletions(-)

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: include llvm version and vector width in renderer string

2016-12-05 Thread Cherniak, Bruce
Reviewed-by: Bruce Cherniak 

> On Dec 5, 2016, at 11:39 AM, Rowley, Timothy O  
> wrote:
> 
> Uses llvmpipe's string formating.
> ---
> src/gallium/drivers/swr/swr_screen.cpp | 12 +++-
> 1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
> b/src/gallium/drivers/swr/swr_screen.cpp
> index 75a9d02..539acf1 100644
> --- a/src/gallium/drivers/swr/swr_screen.cpp
> +++ b/src/gallium/drivers/swr/swr_screen.cpp
> @@ -35,6 +35,7 @@
> #include "util/u_inlines.h"
> #include "util/u_cpu_detect.h"
> #include "util/u_format_s3tc.h"
> +#include "util/u_string.h"
> 
> #include "state_tracker/sw_winsys.h"
> 
> @@ -67,7 +68,11 @@ extern "C" {
> static const char *
> swr_get_name(struct pipe_screen *screen)
> {
> -   return "SWR";
> +   static char buf[100];
> +   util_snprintf(buf, sizeof(buf), "SWR (LLVM %u.%u, %u bits)",
> + HAVE_LLVM >> 8, HAVE_LLVM & 0xff,
> + lp_native_vector_width );
> +   return buf;
> }
> 
> static const char *
> @@ -940,6 +945,11 @@ swr_create_screen_internal(struct sw_winsys *winsys)
>   g_GlobalKnobs.MAX_PRIMS_PER_DRAW.Value(49152);
>}
> 
> +   if (!lp_build_init()) {
> +  FREE(screen);
> +  return NULL;
> +   }
> +
>screen->winsys = winsys;
>screen->base.get_name = swr_get_name;
>screen->base.get_vendor = swr_get_vendor;
> -- 
> 2.9.3
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] docs: document how to (self-) reject stable patches

2016-12-05 Thread Nanley Chery
On Mon, Dec 05, 2016 at 04:11:21PM +, Emil Velikov wrote:
> From: Emil Velikov 
> 
> Document what has been the unofficial way to self-reject stable patches.
> 
> Namely: drop the mesa-stable tag and push the commit.
> 
> Cc: Nanley Chery 
> Signed-off-by: Emil Velikov 
> ---
> Nanley, I fear that adding this note to the criteria section will get
> lost.
> 
> Please let me know if you prefer it there and/or if we can make the
> wording clearer and less likely to miss.

Hi Emil,

I agree with your approach. Documenting this note in a manner that
stands out is a good idea. Thank you very much for making this part of
the stable-patch submission process explicit.

This patch is,
Reviewed-by: Nanley Chery 

> ---
>  docs/submittingpatches.html | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/docs/submittingpatches.html b/docs/submittingpatches.html
> index d1ca68d..141cec2 100644
> --- a/docs/submittingpatches.html
> +++ b/docs/submittingpatches.html
> @@ -249,6 +249,13 @@ copied to the mesa-stable@ mailing list when you use 
> "git send-email" to send
>  patches to the mesa-dev@ mailing list. If you prefer using --suppress-cc that
>  won't have any effect negative effect on the patch nomination.
>  
> +
> +Note: by removing the tag [as the commit is pushed] the patch is
> +explicitly rejected from inclusion in the stable branch(es).
> +
> +Thus, drop the line only if you want to cancel the 
> nomination.
> +
> +
>  Criteria for accepting patches to the stable branch
>  
>  Mesa has a designated release manager for each stable branch, and the release
> -- 
> 2.10.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: mark PIPE_CAP_NATIVE_FENCE_FD unsupported

2016-12-05 Thread Cherniak, Bruce
Reviewed-by: Bruce Cherniak  

> On Dec 5, 2016, at 11:40 AM, Tim Rowley  wrote:
> 
> ---
> src/gallium/drivers/swr/swr_screen.cpp | 1 +
> 1 file changed, 1 insertion(+)
> 
> diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
> b/src/gallium/drivers/swr/swr_screen.cpp
> index 539acf1..b5c2cd3 100644
> --- a/src/gallium/drivers/swr/swr_screen.cpp
> +++ b/src/gallium/drivers/swr/swr_screen.cpp
> @@ -317,6 +317,7 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap 
> param)
>case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
>case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
>case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
> +   case PIPE_CAP_NATIVE_FENCE_FD:
>   return 0;
> 
>case PIPE_CAP_VENDOR_ID:
> -- 
> 2.9.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: mark PIPE_CAP_NATIVE_FENCE_FD unsupported

2016-12-05 Thread Tim Rowley
---
 src/gallium/drivers/swr/swr_screen.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index 539acf1..b5c2cd3 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -317,6 +317,7 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+   case PIPE_CAP_NATIVE_FENCE_FD:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallivm: use getHostCPUFeatures on x86/llvm-4.0+.

2016-12-05 Thread Tim Rowley
Use llvm provided API based on cpuid rather than our own
manually mantained list of mattr enabling/disabling.
---
 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index a68428d..21d9e15 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -542,6 +542,20 @@ 
lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
llvm::SmallVector MAttrs;
 
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+#if HAVE_LLVM >= 0x0400
+   /* llvm-3.7+ implements sys::getHostCPUFeatures for x86,
+* which allows us to enable/disable code generation based
+* on the results of cpuid.
+*/
+   llvm::StringMap features;
+   llvm::sys::getHostCPUFeatures(features);
+
+   for (StringMapIterator f = features.begin();
+f != features.end();
+++f) {
+  MAttrs.push_back(((*f).second ? "+" : "-") + (*f).first().str());
+   }
+#else
/*
 * We need to unset attributes because sometimes LLVM mistakenly assumes
 * certain features are present given the processor name.
@@ -596,6 +610,7 @@ 
lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
MAttrs.push_back("-avx512vl");
 #endif
 #endif
+#endif
 
 #if defined(PIPE_ARCH_PPC)
MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: include llvm version and vector width in renderer string

2016-12-05 Thread Tim Rowley
Uses llvmpipe's string formating.
---
 src/gallium/drivers/swr/swr_screen.cpp | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index 75a9d02..539acf1 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -35,6 +35,7 @@
 #include "util/u_inlines.h"
 #include "util/u_cpu_detect.h"
 #include "util/u_format_s3tc.h"
+#include "util/u_string.h"
 
 #include "state_tracker/sw_winsys.h"
 
@@ -67,7 +68,11 @@ extern "C" {
 static const char *
 swr_get_name(struct pipe_screen *screen)
 {
-   return "SWR";
+   static char buf[100];
+   util_snprintf(buf, sizeof(buf), "SWR (LLVM %u.%u, %u bits)",
+ HAVE_LLVM >> 8, HAVE_LLVM & 0xff,
+ lp_native_vector_width );
+   return buf;
 }
 
 static const char *
@@ -940,6 +945,11 @@ swr_create_screen_internal(struct sw_winsys *winsys)
   g_GlobalKnobs.MAX_PRIMS_PER_DRAW.Value(49152);
}
 
+   if (!lp_build_init()) {
+  FREE(screen);
+  return NULL;
+   }
+
screen->winsys = winsys;
screen->base.get_name = swr_get_name;
screen->base.get_vendor = swr_get_vendor;
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] main: allow NEAREST_MIPMAP_NEAREST for stencil texturing

2016-12-05 Thread Kenneth Graunke
On Monday, December 5, 2016 1:40:06 PM PST srol...@vmware.com wrote:
> From: Roland Scheidegger 
> 
> As per GL 4.5 rules, which fixed a spec mistake in GL_ARB_stencil_texturing.
> The extension spec wasn't updated, but just allow it with older GL versions
> as well, hoping there aren't any crazy tests which want to see an error
> there... (Compile tested only.)
> 
> Reported by Józef Kucia 
> ---
>  src/mesa/main/texobj.h | 23 ---
>  1 file changed, 8 insertions(+), 15 deletions(-)

Yeah, allowing NEAREST_MIPMAP_NEAREST makes a ton of sense.
Thanks for reporting this, Józef, and thanks for fixing it, Roland!

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [ANNOUNCE] mesa 12.0.5

2016-12-05 Thread Emil Velikov
Mesa 12.0.5 is now available.

Note: this is the final anticipated release in the 12.0 series. Users are
encouraged to migrate to the 13.0 series in order to obtain future fixes.

In this release we have:

The SWR driver builds correctly with LLVM 3.7 up-to 3.9. The i965 and radeon
(r600, radeonsi) drivers have a number of GPU hang and rendering fixes.

The GLVND GLX implementation returns the correct GLX entry points, while the
Intel Vulkan driver advertises the api_version.


Adam Jackson (2):
  glx/glvnd: Don't modify the dummy slot in the dispatch table
  glx/glvnd: Fix dispatch function names and indices

Anuj Phogat (1):
  i965: Fix GPU hang related to multiple render targets and alpha testing

Emil Velikov (5):
  docs: add release notes for 12.0.4
  docs: add sha256 checksums for 12.0.4
  cherry-ignore: add reverted LLVM_LIBDIR patch
  Update version to 12.0.5
  docs: add release notes for 12.0.5

Haixia Shi (1):
  mesa: change state query return value for RGB565

Jason Ekstrand (3):
  i965/fs/generator: Don't use the address immediate for MOV_INDIRECT
  anv/cmd_buffer: Take a command buffer instead of a batch in two helpers
  anv/cmd_buffer: Enable a CS stall workaround for Sky Lake gt4

Kenneth Graunke (1):
  intel: Fix pixel shader scratch space allocation on Gen9+ platforms.

Marek Olšák (13):
  gallium/radeon: fix behavior of GLSL findLSB(0)
  gallium/radeon: make sure HTILE address is aligned properly
  radeonsi: fix an assertion failure in si_decompress_sampler_color_textures
  gallium/radeon: unify viewport emission code
  gallium/radeon: set VPORT_ZMIN/MAX registers correctly
  radeonsi: fix gl_PatchVerticesIn for tessellation evaluation shader
  radeonsi: fix a crash in imageSize for cubemap arrays
  radeonsi: emit TA_CS_BC_BASE_ADDR on SI only if the kernel allows it
  gallium/radeon: add support for sharing textures with DCC
between processes
  radeonsi: always set all blend registers
  radeonsi: set CB_BLEND1_CONTROL.ENABLE for dual source blending
  radeonsi: disable RB+ blend optimizations for dual source blending
  radeonsi: silence runtime warnings with LLVM 3.9

Matt Turner (1):
  anv: Replace "abi_versions" with correct "api_version".

Nanley Chery (1):
  mesa/fbobject: Update CubeMapFace when reusing textures

Steinar H. Gunderson (1):
  Fix races during _mesa_HashWalk().

Tim Rowley (3):
  swr: [rasterizer jitter] cleanup supporting different llvm versions
  swr: [rasterizer jitter] fix llvm-3.7 compile
  swr: [rasterizer] add support for llvm-3.9

git tag: mesa-12.0.5

https://mesa.freedesktop.org/archive/12.0.5/mesa-12.0.5.tar.gz
MD5:  2f9f7b1618f8edb768aade894f997139  mesa-12.0.5.tar.gz
SHA1: 4438ae977027f9dc23866c85660263adccb9f386  mesa-12.0.5.tar.gz
SHA256: 44d08a27d98bfeacd864381189e434d98afbf451689d01f80380dc1d66450e5b
 mesa-12.0.5.tar.gz
PGP:  https://mesa.freedesktop.org/archive/12.0.5/mesa-12.0.5.tar.gz.sig

https://mesa.freedesktop.org/archive/12.0.5/mesa-12.0.5.tar.xz
MD5:  7012526b35e589fd3acf7e07939e5915  mesa-12.0.5.tar.xz
SHA1: 383e876d8e3a14d5eb7e3c2b236254844b297ba6  mesa-12.0.5.tar.xz
SHA256: 2b0a972d8282860a11291c09c3ef01ac45171405951eb21a83c45ed2b4321924
 mesa-12.0.5.tar.xz
PGP:  https://mesa.freedesktop.org/archive/12.0.5/mesa-12.0.5.tar.xz.sig
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: implement pipeline statistics queries

2016-12-05 Thread Ilia Mirkin
On Mon, Dec 5, 2016 at 11:11 AM, Robert Bragg  wrote:
>
>
> On Sun, Nov 27, 2016 at 7:23 PM, Ilia Mirkin  wrote:
>>
>> The strategy is to just keep n anv_query_pool_slot entries per query
>> instead of one. The available bit is only valid in the last one.
>>
>> Signed-off-by: Ilia Mirkin 
>> ---
>>
>> I think this is in a pretty good state now. I've tested both the direct
>> and
>> buffer paths with a hacked up cube application, and I'm seeing
>> non-ridiculous
>> values for the various counters, although I haven't 100% verified them for
>> accuracy.
>>
>> This also implements the hsw/bdw workaround for dividing frag invocations
>> by 4,
>> copied from hsw_queryobj. I tested this on SKL and it seem to divide the
>> values
>> as expected.
>>
>> The cube patch I've been testing with is at
>> http://paste.debian.net/899374/
>> You can flip between copying to a buffer and explicit retrieval by
>> commenting
>> out the relevant function calls.
>>
>>  src/intel/vulkan/anv_device.c  |   2 +-
>>  src/intel/vulkan/anv_private.h |   4 +
>>  src/intel/vulkan/anv_query.c   |  99 ++
>>  src/intel/vulkan/genX_cmd_buffer.c | 260
>> -
>>  4 files changed, 308 insertions(+), 57 deletions(-)
>>
>>
>> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
>> index 99eb73c..7ad1970 100644
>> --- a/src/intel/vulkan/anv_device.c
>> +++ b/src/intel/vulkan/anv_device.c
>> @@ -427,7 +427,7 @@ void anv_GetPhysicalDeviceFeatures(
>>.textureCompressionASTC_LDR   = pdevice->info.gen >= 9,
>> /* FINISHME CHV */
>>.textureCompressionBC = true,
>>.occlusionQueryPrecise= true,
>> -  .pipelineStatisticsQuery  = false,
>> +  .pipelineStatisticsQuery  = true,
>>.fragmentStoresAndAtomics = true,
>>.shaderTessellationAndGeometryPointSize   = true,
>>.shaderImageGatherExtended= false,
>> diff --git a/src/intel/vulkan/anv_private.h
>> b/src/intel/vulkan/anv_private.h
>> index 2fc543d..7271609 100644
>> --- a/src/intel/vulkan/anv_private.h
>> +++ b/src/intel/vulkan/anv_private.h
>> @@ -1763,6 +1763,8 @@ struct anv_render_pass {
>> struct anv_subpass   subpasses[0];
>>  };
>>
>> +#define ANV_PIPELINE_STATISTICS_COUNT 11
>> +
>>  struct anv_query_pool_slot {
>> uint64_t begin;
>> uint64_t end;
>> @@ -1772,6 +1774,8 @@ struct anv_query_pool_slot {
>>  struct anv_query_pool {
>> VkQueryType  type;
>> uint32_t slots;
>> +   uint32_t pipeline_statistics;
>> +   uint32_t slot_stride;
>> struct anv_bobo;
>>  };
>>
>> diff --git a/src/intel/vulkan/anv_query.c b/src/intel/vulkan/anv_query.c
>> index 293257b..dc00859 100644
>> --- a/src/intel/vulkan/anv_query.c
>> +++ b/src/intel/vulkan/anv_query.c
>> @@ -38,8 +38,10 @@ VkResult anv_CreateQueryPool(
>> ANV_FROM_HANDLE(anv_device, device, _device);
>> struct anv_query_pool *pool;
>> VkResult result;
>> -   uint32_t slot_size;
>> -   uint64_t size;
>> +   uint32_t slot_size = sizeof(struct anv_query_pool_slot);
>> +   uint32_t slot_stride = 1;
>> +   uint64_t size = pCreateInfo->queryCount * slot_size;
>> +   uint32_t pipeline_statistics = 0;
>>
>> assert(pCreateInfo->sType ==
>> VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
>>
>> @@ -48,12 +50,16 @@ VkResult anv_CreateQueryPool(
>> case VK_QUERY_TYPE_TIMESTAMP:
>>break;
>> case VK_QUERY_TYPE_PIPELINE_STATISTICS:
>> -  return VK_ERROR_INCOMPATIBLE_DRIVER;
>> +  pipeline_statistics = pCreateInfo->pipelineStatistics &
>> + ((1 << ANV_PIPELINE_STATISTICS_COUNT) - 1);
>> +  slot_stride = _mesa_bitcount(pipeline_statistics);
>> +  size *= slot_stride;
>> +  break;
>> default:
>>assert(!"Invalid query type");
>> +  return VK_ERROR_INCOMPATIBLE_DRIVER;
>> }
>>
>> -   slot_size = sizeof(struct anv_query_pool_slot);
>> pool = vk_alloc2(>alloc, pAllocator, sizeof(*pool), 8,
>>   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
>> if (pool == NULL)
>> @@ -61,8 +67,9 @@ VkResult anv_CreateQueryPool(
>>
>> pool->type = pCreateInfo->queryType;
>> pool->slots = pCreateInfo->queryCount;
>> +   pool->pipeline_statistics = pipeline_statistics;
>> +   pool->slot_stride = slot_stride;
>>
>> -   size = pCreateInfo->queryCount * slot_size;
>> result = anv_bo_init_new(>bo, device, size);
>> if (result != VK_SUCCESS)
>>goto fail;
>> @@ -95,6 +102,27 @@ void anv_DestroyQueryPool(
>> vk_free2(>alloc, pAllocator, pool);
>>  }
>>
>> +static void *
>> +store_query_result(void *pData, VkQueryResultFlags flags,
>> +   uint64_t 

Re: [Mesa-dev] [PATCH] anv: implement pipeline statistics queries

2016-12-05 Thread Robert Bragg
On Sun, Nov 27, 2016 at 7:23 PM, Ilia Mirkin  wrote:

> The strategy is to just keep n anv_query_pool_slot entries per query
> instead of one. The available bit is only valid in the last one.
>
> Signed-off-by: Ilia Mirkin 
> ---
>
> I think this is in a pretty good state now. I've tested both the direct and
> buffer paths with a hacked up cube application, and I'm seeing
> non-ridiculous
> values for the various counters, although I haven't 100% verified them for
> accuracy.
>
> This also implements the hsw/bdw workaround for dividing frag invocations
> by 4,
> copied from hsw_queryobj. I tested this on SKL and it seem to divide the
> values
> as expected.
>
> The cube patch I've been testing with is at http://paste.debian.net/
> 899374/
> You can flip between copying to a buffer and explicit retrieval by
> commenting
> out the relevant function calls.
>
>  src/intel/vulkan/anv_device.c  |   2 +-
>  src/intel/vulkan/anv_private.h |   4 +
>  src/intel/vulkan/anv_query.c   |  99 ++
>  src/intel/vulkan/genX_cmd_buffer.c | 260 ++
> ++-
>  4 files changed, 308 insertions(+), 57 deletions(-)
>

> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 99eb73c..7ad1970 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -427,7 +427,7 @@ void anv_GetPhysicalDeviceFeatures(
>.textureCompressionASTC_LDR   = pdevice->info.gen >= 9,
> /* FINISHME CHV */
>.textureCompressionBC = true,
>.occlusionQueryPrecise= true,
> -  .pipelineStatisticsQuery  = false,
> +  .pipelineStatisticsQuery  = true,
>.fragmentStoresAndAtomics = true,
>.shaderTessellationAndGeometryPointSize   = true,
>.shaderImageGatherExtended= false,
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> private.h
> index 2fc543d..7271609 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -1763,6 +1763,8 @@ struct anv_render_pass {
> struct anv_subpass   subpasses[0];
>  };
>
> +#define ANV_PIPELINE_STATISTICS_COUNT 11
> +
>  struct anv_query_pool_slot {
> uint64_t begin;
> uint64_t end;
> @@ -1772,6 +1774,8 @@ struct anv_query_pool_slot {
>  struct anv_query_pool {
> VkQueryType  type;
> uint32_t slots;
> +   uint32_t pipeline_statistics;
> +   uint32_t slot_stride;
> struct anv_bobo;
>  };
>
> diff --git a/src/intel/vulkan/anv_query.c b/src/intel/vulkan/anv_query.c
> index 293257b..dc00859 100644
> --- a/src/intel/vulkan/anv_query.c
> +++ b/src/intel/vulkan/anv_query.c
> @@ -38,8 +38,10 @@ VkResult anv_CreateQueryPool(
> ANV_FROM_HANDLE(anv_device, device, _device);
> struct anv_query_pool *pool;
> VkResult result;
> -   uint32_t slot_size;
> -   uint64_t size;
> +   uint32_t slot_size = sizeof(struct anv_query_pool_slot);
> +   uint32_t slot_stride = 1;
> +   uint64_t size = pCreateInfo->queryCount * slot_size;
> +   uint32_t pipeline_statistics = 0;
>
> assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_
> CREATE_INFO);
>
> @@ -48,12 +50,16 @@ VkResult anv_CreateQueryPool(
> case VK_QUERY_TYPE_TIMESTAMP:
>break;
> case VK_QUERY_TYPE_PIPELINE_STATISTICS:
> -  return VK_ERROR_INCOMPATIBLE_DRIVER;
> +  pipeline_statistics = pCreateInfo->pipelineStatistics &
> + ((1 << ANV_PIPELINE_STATISTICS_COUNT) - 1);
> +  slot_stride = _mesa_bitcount(pipeline_statistics);
> +  size *= slot_stride;
> +  break;
> default:
>assert(!"Invalid query type");
> +  return VK_ERROR_INCOMPATIBLE_DRIVER;
> }
>
> -   slot_size = sizeof(struct anv_query_pool_slot);
> pool = vk_alloc2(>alloc, pAllocator, sizeof(*pool), 8,
>   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
> if (pool == NULL)
> @@ -61,8 +67,9 @@ VkResult anv_CreateQueryPool(
>
> pool->type = pCreateInfo->queryType;
> pool->slots = pCreateInfo->queryCount;
> +   pool->pipeline_statistics = pipeline_statistics;
> +   pool->slot_stride = slot_stride;
>
> -   size = pCreateInfo->queryCount * slot_size;
> result = anv_bo_init_new(>bo, device, size);
> if (result != VK_SUCCESS)
>goto fail;
> @@ -95,6 +102,27 @@ void anv_DestroyQueryPool(
> vk_free2(>alloc, pAllocator, pool);
>  }
>
> +static void *
> +store_query_result(void *pData, VkQueryResultFlags flags,
> +   uint64_t result, uint64_t available)
> +{
> +   if (flags & VK_QUERY_RESULT_64_BIT) {
> +  uint64_t *dst = pData;
> +  *dst++ = result;
> +  if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
> + 

[Mesa-dev] [PATCH] docs: document how to (self-) reject stable patches

2016-12-05 Thread Emil Velikov
From: Emil Velikov 

Document what has been the unofficial way to self-reject stable patches.

Namely: drop the mesa-stable tag and push the commit.

Cc: Nanley Chery 
Signed-off-by: Emil Velikov 
---
Nanley, I fear that adding this note to the criteria section will get
lost.

Please let me know if you prefer it there and/or if we can make the
wording clearer and less likely to miss.
---
 docs/submittingpatches.html | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/docs/submittingpatches.html b/docs/submittingpatches.html
index d1ca68d..141cec2 100644
--- a/docs/submittingpatches.html
+++ b/docs/submittingpatches.html
@@ -249,6 +249,13 @@ copied to the mesa-stable@ mailing list when you use "git 
send-email" to send
 patches to the mesa-dev@ mailing list. If you prefer using --suppress-cc that
 won't have any effect negative effect on the patch nomination.
 
+
+Note: by removing the tag [as the commit is pushed] the patch is
+explicitly rejected from inclusion in the stable branch(es).
+
+Thus, drop the line only if you want to cancel the nomination.
+
+
 Criteria for accepting patches to the stable branch
 
 Mesa has a designated release manager for each stable branch, and the release
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa] add EGL_TEXTURE_EXTERNAL_WL to WL_bind_wayland_display spec

2016-12-05 Thread Daniel Stone
Hi,

On 10 June 2014 at 15:46, Rob Clark  wrote:
> On Mon, Jun 9, 2014 at 5:53 AM, Pekka Paalanen  wrote:
>> On Thu, 16 Aug 2012 17:28:19 -0500 Rob Clark  wrote:
>>> From: Rob Clark 
>>
>> it looks like this patch never made it into Mesa. Also the
>> implementation apparently didn't make it into Mesa, as git pick-axe
>> does not find any mention of EGL_TEXTURE_EXTERNAL_WL.
>>
>> Still, the Weston patch was merged on Aug 31st, 2012.
>>
>> Oops. :-)
>
> heh, well I guess if weston is already using it, perhaps we should
> think about merging the mesa patch ;-)

Yeah, the fact there are three different email addresses for you in
this thread probably says that it's been long enough. ;) Pushed now.

Cheers,
Daniel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/6] etnaviv: implement resource_changed to invalidate internal resources derived from imported buffers

2016-12-05 Thread Philipp Zabel
Implement the resource_changed pipe callback to invalidate internal
resources derived from imported buffers. This is needed to update the
texture for re-imported renderables.

Signed-off-by: Philipp Zabel 
---
 src/gallium/drivers/etnaviv/etnaviv_resource.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_resource.c 
b/src/gallium/drivers/etnaviv/etnaviv_resource.c
index a8858c5..20ec8f8 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
@@ -275,6 +275,18 @@ etna_resource_create(struct pipe_screen *pscreen,
 }
 
 static void
+etna_resource_changed(struct pipe_screen *pscreen, struct pipe_resource *prsc)
+{
+   struct etna_resource *res = etna_resource(prsc);
+
+   /* Make sure texture is older than the imported renderable buffer,
+* so etna_update_sampler_source will copy the pixel data again.
+*/
+   if (res->texture)
+  etna_resource(res->texture)->seqno = res->seqno - 1;
+}
+
+static void
 etna_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc)
 {
struct etna_resource *rsc = etna_resource(prsc);
@@ -436,5 +448,6 @@ etna_resource_screen_init(struct pipe_screen *pscreen)
pscreen->resource_create = etna_resource_create;
pscreen->resource_from_handle = etna_resource_from_handle;
pscreen->resource_get_handle = etna_resource_get_handle;
+   pscreen->resource_changed = etna_resource_changed;
pscreen->resource_destroy = etna_resource_destroy;
 }
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >