Re: [Mesa-dev] [PATCH] mesa/math: Allocate memory for GLmatrix elements and its inverse contiguously

2018-04-16 Thread Thomas Helland
Hi, and thanks for the patch =)

Have you done any performance testing on this to verify it
gives us a speedup of any kind? I'm asking because it seems like
this might be something that a decent compiler should be able to do.
Performance related patches, at least in core mesa, usually have
some justification with benchmark numbers in the commit message.
Some style comments below

2018-04-17 1:03 GMT+02:00 Vlad Golovkin :
> When GLmatrix elements and its inverse are stored contiguously in memory it 
> is possible to
> allocate, free and copy these fields with 1 function call instead of 2.
> ---
>  src/mesa/math/m_matrix.c | 15 +--
>  1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/math/m_matrix.c b/src/mesa/math/m_matrix.c
> index 57a49533de..4ab78a1fb3 100644
> --- a/src/mesa/math/m_matrix.c
> +++ b/src/mesa/math/m_matrix.c
> @@ -1438,8 +1438,7 @@ _math_matrix_is_dirty( const GLmatrix *m )
>  void
>  _math_matrix_copy( GLmatrix *to, const GLmatrix *from )
>  {
> -   memcpy(to->m, from->m, 16 * sizeof(GLfloat));
> -   memcpy(to->inv, from->inv, 16 * sizeof(GLfloat));
> +   memcpy(to->m, from->m, 16 * 2 * sizeof(GLfloat));
> to->flags = from->flags;
> to->type = from->type;
>  }
> @@ -1470,12 +1469,17 @@ _math_matrix_loadf( GLmatrix *mat, const GLfloat *m )
>  void
>  _math_matrix_ctr( GLmatrix *m )
>  {
> -   m->m = _mesa_align_malloc( 16 * sizeof(GLfloat), 16 );
> +   m->m = _mesa_align_malloc( 16 * 2 * sizeof(GLfloat), 16 );
> if (m->m)
> +   {

Our style guides says to keep the curly bracket after an if on the same line.

> +  m->inv = m->m + 16;
>memcpy( m->m, Identity, sizeof(Identity) );
> -   m->inv = _mesa_align_malloc( 16 * sizeof(GLfloat), 16 );
> -   if (m->inv)
>memcpy( m->inv, Identity, sizeof(Identity) );
> +   }
> +   else
> +   {

} else {

Although I see that this file defaults to;

{
else {

for some reason. Feel free to follow existing style, or adjust to my comments.
Also, if we want to do this change it deserves a comment in the source.
> +  m->inv = NULL;
> +   }
> m->type = MATRIX_IDENTITY;
> m->flags = 0;
>  }
> @@ -1493,7 +1497,6 @@ _math_matrix_dtr( GLmatrix *m )
> _mesa_align_free( m->m );
> m->m = NULL;
>
> -   _mesa_align_free( m->inv );
> m->inv = NULL;
>  }
>
> --
> 2.14.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] ac/nir: Make the GFX9 buffer size fix apply to image loads/atomics too.

2018-04-16 Thread Bas Nieuwenhuizen
On Mon, Apr 16, 2018 at 1:17 PM, Juan A. Suarez Romero
 wrote:
> On Mon, 2018-04-16 at 00:09 +0200, Bas Nieuwenhuizen wrote:
>> No clue how I missed those ...
>>
>> Fixes: 4503ff760c "ac/nir: Add workaround for GFX9 buffer views."
>> CC: 
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105320
>
>
> Hi, Bas!
>
> This commit is a candidate for next 17.3 and 18.0 stable releases.
>
>
> Regarding the next 17.3.9 release, which should be released today, as this is
> the last one of 17.3, I understand you want this patch as part of the release,
> right?

Its fine not to. The urgency isn't there compared to the original fix,
and of course being the last release also carries the additional risk
of not being able to fix it if the patch turns out to break something
unexpected. I did not expect it to make it in.

>
> If that's the case, I cherry-picked the commit and solved some trivial
> conflicts. You can check to verify I correctly solved them at:
>
> https://github.com/Igalia/release-mesa/commit/37ad9fc7c6cecdb8a99d071ca6fdc2d663
> 7501a8
>
>
> Related with that commit, maybe you want to ensure that this one also is
> correct:
>
> https://github.com/Igalia/release-mesa/commit/51b4bdc7761b30a56299ee80f51521151d
> 4eec47

Looks correct to me, but I don't see any diffs with the backport I provided?
>
>
>
> Regarding the next 18.0.1, is it fine if we post-pone this patch for next
> 18.0.2, or do you think this must be added in 18.0.1?

18.0.2 is fine for the same reason as above.

>
>
> J.A.
>
>
>
>> ---
>>  src/amd/common/ac_nir_to_llvm.c | 39 +++--
>>  1 file changed, 22 insertions(+), 17 deletions(-)
>>
>> diff --git a/src/amd/common/ac_nir_to_llvm.c 
>> b/src/amd/common/ac_nir_to_llvm.c
>> index 3a3aa72988..45405d30fe 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -2191,6 +2191,25 @@ static LLVMValueRef get_image_coords(struct 
>> ac_nir_context *ctx,
>>   return res;
>>  }
>>
>> +static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
>> +const nir_intrinsic_instr 
>> *instr, bool write)
>> +{
>> + LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], 
>> AC_DESC_BUFFER, NULL, true, write);
>> + if (ctx->abi->gfx9_stride_size_workaround) {
>> + LLVMValueRef elem_count = 
>> LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 
>> 0), "");
>> + LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, 
>> rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
>> + stride = LLVMBuildLShr(ctx->ac.builder, stride, 
>> LLVMConstInt(ctx->ac.i32, 16, 0), "");
>> +
>> + LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
>> +   
>> LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
>> +   elem_count, 
>> stride, "");
>> +
>> + rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, 
>> new_elem_count,
>> +   LLVMConstInt(ctx->ac.i32, 2, 0), 
>> "");
>> + }
>> + return rsrc;
>> +}
>> +
>>  static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
>>const nir_intrinsic_instr *instr)
>>  {
>> @@ -2211,7 +2230,7 @@ static LLVMValueRef visit_image_load(struct 
>> ac_nir_context *ctx,
>>   unsigned num_channels = util_last_bit(mask);
>>   LLVMValueRef rsrc, vindex;
>>
>> - rsrc = get_sampler_desc(ctx, instr->variables[0], 
>> AC_DESC_BUFFER, NULL, true, false);
>> + rsrc = get_image_buffer_descriptor(ctx, instr, false);
>>   vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, 
>> instr->src[0]),
>>ctx->ac.i32_0, "");
>>
>> @@ -2262,20 +2281,7 @@ static void visit_image_store(struct ac_nir_context 
>> *ctx,
>>   glc = ctx->ac.i1true;
>>
>>   if (dim == GLSL_SAMPLER_DIM_BUF) {
>> - LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], 
>> AC_DESC_BUFFER, NULL, true, true);
>> -
>> - if (ctx->abi->gfx9_stride_size_workaround) {
>> - LLVMValueRef elem_count = 
>> LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 
>> 0), "");
>> - LLVMValueRef stride = 
>> LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 
>> 0), "");
>> - stride = LLVMBuildLShr(ctx->ac.builder, stride, 
>> LLVMConstInt(ctx->ac.i32, 16, 0), "");
>> -
>> - LLVMValueRef new_elem_count = 
>> LLVMBuildSelect(ctx->ac.builder,
>> -   
>> LLVMBuildICmp(ctx->ac.builder, 

Re: [Mesa-dev] [PATCH 1/2] mesa: GL_EXT_texture_norm16 extension plumbing

2018-04-16 Thread Tapani Pälli



On 17.04.2018 02:42, Ilia Mirkin wrote:

On Mon, Apr 16, 2018 at 7:36 PM, Tapani Pälli  wrote:

Hi;

On 16.04.2018 18:19, Ilia Mirkin wrote:


You never check EXT_texture_norm16 anywhere... is that right? Should
probably respect that in the _mesa_is_es3_* helpers. Also what about



Oops that is very correct, will add the checks.


shader support? I see no mention in the spec that these should be
usable as images, except with NV_image_formats is enabled. You're just
exposing them no matter what in ES 3.10.



Layout qualifiers added by the spec are exposed only when 3.10 ES and
NV_image_formats are both there, the boolean in map below (last column) is
the check for NV_image_formats, without it you'll get compilation error.


That's not quite right:

if ((state->is_version(map[i].required_glsl,
   map[i].required_essl) ||
 (state->NV_image_formats_enable &&
  map[i].nv_image_formats)) &&

So you'd need to leave the essl version alone, otherwise
NV_image_formats check has no effect. Also, do you plan on


Right I see, only mark those format true.


differentiating the case "have NV_image_formats but not
EXT_texture_norm16"?


I had a version that added another boolean but since both of these 
extensions require GL ES 3.1 it seemed to me that it would be unlikely 
to have platform to support 3.1 but not EXT_texture_norm16 (?)



Sounds like you need to add some negative compiler tests.



Will write some.

// Tapani
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/10] nir: return early when lowering a return at the end of a function

2018-04-16 Thread Jason Ekstrand
On Mon, Apr 9, 2018 at 9:34 PM, Timothy Arceri 
wrote:

> Otherwise we create unused conditional return flags and things
> get unnecessarily ugly fast when lowering nested functions.
> ---
>  src/compiler/nir/nir_lower_returns.c | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/src/compiler/nir/nir_lower_returns.c
> b/src/compiler/nir/nir_lower_returns.c
> index 423192adb8a..e1ba5f2ad64 100644
> --- a/src/compiler/nir/nir_lower_returns.c
> +++ b/src/compiler/nir/nir_lower_returns.c
> @@ -27,6 +27,7 @@
>
>  struct lower_returns_state {
> nir_builder builder;
> +   nir_function_impl *impl;
> struct exec_list *cf_list;
> nir_loop *loop;
> nir_variable *return_flag;
> @@ -180,6 +181,12 @@ lower_returns_in_block(nir_block *block, struct
> lower_returns_state *state)
>
> nir_instr_remove(>instr);
>
> +   /* If this is a return in the last block of the function there is
> nothing
> +* more to do once its removed.
> +*/
> +   if (block == nir_impl_last_block(state->impl))
>

You can pull the impl out of the builder.


> +  return true;
> +
> nir_builder *b = >builder;
>
> /* Set the return flag */
> @@ -252,6 +259,7 @@ nir_lower_returns_impl(nir_function_impl *impl)
>  {
> struct lower_returns_state state;
>
> +   state.impl = impl;
> state.cf_list = >body;
> state.loop = NULL;
> state.return_flag = NULL;
> --
> 2.17.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/10] nir: return early when lowering a return at the end of a function

2018-04-16 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Mon, Apr 9, 2018 at 9:34 PM, Timothy Arceri 
wrote:

> Otherwise we create unused conditional return flags and things
> get unnecessarily ugly fast when lowering nested functions.
> ---
>  src/compiler/nir/nir_lower_returns.c | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/src/compiler/nir/nir_lower_returns.c
> b/src/compiler/nir/nir_lower_returns.c
> index 423192adb8a..e1ba5f2ad64 100644
> --- a/src/compiler/nir/nir_lower_returns.c
> +++ b/src/compiler/nir/nir_lower_returns.c
> @@ -27,6 +27,7 @@
>
>  struct lower_returns_state {
> nir_builder builder;
> +   nir_function_impl *impl;
>

I think we could avoid this by looking at block->node.parent.  Meh.


> struct exec_list *cf_list;
> nir_loop *loop;
> nir_variable *return_flag;
> @@ -180,6 +181,12 @@ lower_returns_in_block(nir_block *block, struct
> lower_returns_state *state)
>
> nir_instr_remove(>instr);
>
> +   /* If this is a return in the last block of the function there is
> nothing
> +* more to do once its removed.
> +*/
> +   if (block == nir_impl_last_block(state->impl))
> +  return true;
> +
> nir_builder *b = >builder;
>
> /* Set the return flag */
> @@ -252,6 +259,7 @@ nir_lower_returns_impl(nir_function_impl *impl)
>  {
> struct lower_returns_state state;
>
> +   state.impl = impl;
> state.cf_list = >body;
> state.loop = NULL;
> state.return_flag = NULL;
> --
> 2.17.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] fix gcc 8 parenthesis warning

2018-04-16 Thread Timothy Arceri

Pushed. Thanks!

On 23/03/18 21:01, Marc Dietrich wrote:

fixes warnings like this:
[184/1137] Compiling C++ object 'src/compiler/glsl/glsl@sta/lower_jumps.cpp.o'.
In file included from ../src/mesa/main/mtypes.h:48,
  from ../src/compiler/glsl_types.h:149,
  from ../src/compiler/glsl/lower_jumps.cpp:59:
../src/compiler/glsl/lower_jumps.cpp: In member function 
'{anonymous}::block_record 
{anonymous}::ir_lower_jumps_visitor::visit_block(exec_list*)':
../src/compiler/glsl/list.h:650:17: warning: unnecessary parentheses in 
declaration of 'node' [-Wparentheses]
 for (__type *(__inst) = (__type *)(__list)->head_sentinel.next; \
  ^
../src/compiler/glsl/lower_jumps.cpp:510:7: note: in expansion of macro 
'foreach_in_list'
foreach_in_list(ir_instruction, node, list) {
^~~

Signed-off-by: Marc Dietrich 
---
  src/compiler/glsl/list.h | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/list.h b/src/compiler/glsl/list.h
index f77fe12991..2bfa273554 100644
--- a/src/compiler/glsl/list.h
+++ b/src/compiler/glsl/list.h
@@ -647,12 +647,12 @@ inline void exec_node::insert_before(exec_list *before)
  #endif
  
  #define foreach_in_list(__type, __inst, __list)  \

-   for (__type *(__inst) = (__type *)(__list)->head_sentinel.next; \
+   for (__type *__inst = (__type *)(__list)->head_sentinel.next; \
  !(__inst)->is_tail_sentinel();   \
  (__inst) = (__type *)(__inst)->next)
  
  #define foreach_in_list_reverse(__type, __inst, __list)   \

-   for (__type *(__inst) = (__type *)(__list)->tail_sentinel.prev; \
+   for (__type *__inst = (__type *)(__list)->tail_sentinel.prev; \
  !(__inst)->is_head_sentinel();\
  (__inst) = (__type *)(__inst)->prev)
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/9] radeonsi: add struct si_compiler containing LLVMTargetMachineRef

2018-04-16 Thread Timothy Arceri

Series:

Reviewed-by: Timothy Arceri 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/9] radeonsi: avoid a crash in gallivm_dispose_target_library_info

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pipe.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 09b9f588a6f..490a090da87 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -157,22 +157,25 @@ static void si_init_compiler(struct si_screen *sscreen,
compiler->data_layout = LLVMCopyStringRepOfTargetData(data_layout);
LLVMDisposeTargetData(data_layout);
 }
 
 static void si_destroy_compiler(struct si_compiler *compiler)
 {
if (compiler->data_layout)
LLVMDisposeMessage((char*)compiler->data_layout);
if (compiler->passmgr)
LLVMDisposePassManager(compiler->passmgr);
+#if HAVE_LLVM < 0x0500 || HAVE_LLVM >= 0x0700
+   /* This crashes on LLVM 5.0 and 6.0 and Ubuntu 18.04, so leak it there. 
*/
if (compiler->target_library_info)

gallivm_dispose_target_library_info(compiler->target_library_info);
+#endif
if (compiler->tm)
LLVMDisposeTargetMachine(compiler->tm);
 }
 
 /*
  * pipe_context
  */
 static void si_destroy_context(struct pipe_context *context)
 {
struct si_context *sctx = (struct si_context *)context;
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 8/9] Revert "radeonsi: fix potential use-after-free of debug callbacks"

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

This reverts commit b650fc09c3a35ca624aad5fe4b5c34867708f116.

There is a 100%-reproducible deadlock if I increase the number of compiler
queues.

Also:
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105379

Cc: 18.0 
---
 src/gallium/drivers/radeonsi/si_pipe.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 490a090da87..f1f1e3ad890 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -353,24 +353,20 @@ static void si_emit_string_marker(struct pipe_context 
*ctx,
dd_parse_apitrace_marker(string, len, >apitrace_call_number);
 
if (sctx->log)
u_log_printf(sctx->log, "\nString marker: %*s\n", len, string);
 }
 
 static void si_set_debug_callback(struct pipe_context *ctx,
  const struct pipe_debug_callback *cb)
 {
struct si_context *sctx = (struct si_context *)ctx;
-   struct si_screen *screen = sctx->screen;
-
-   util_queue_finish(>shader_compiler_queue);
-   util_queue_finish(>shader_compiler_queue_low_priority);
 
if (cb)
sctx->debug = *cb;
else
memset(>debug, 0, sizeof(sctx->debug));
 }
 
 static void si_set_log_context(struct pipe_context *ctx,
   struct u_log_context *log)
 {
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/9] radeonsi: add triple into si_compiler

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_llvm_util.c   | 6 +-
 src/amd/common/ac_llvm_util.h   | 4 +++-
 src/amd/vulkan/radv_shader.c| 2 +-
 src/gallium/drivers/radeonsi/si_pipe.c  | 3 ++-
 src/gallium/drivers/radeonsi/si_shader.h| 1 +
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 2 +-
 6 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index f3db1c5a4a4..c580d822ad4 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -115,21 +115,23 @@ const char *ac_get_llvm_processor_name(enum radeon_family 
family)
return "polaris11";
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_RAVEN:
return "gfx900";
default:
return "";
}
 }
 
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum 
ac_target_machine_options tm_options)
+LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
+ enum ac_target_machine_options 
tm_options,
+ const char **out_triple)
 {
assert(family >= CHIP_TAHITI);
char features[256];
const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? 
"amdgcn-mesa-mesa3d" : "amdgcn--";
LLVMTargetRef target = ac_get_llvm_target(triple);
 
snprintf(features, sizeof(features),
 
"+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s",
 tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
 tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
@@ -138,20 +140,22 @@ LLVMTargetMachineRef ac_create_target_machine(enum 
radeon_family family, enum ac

LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
 target,
 triple,
 ac_get_llvm_processor_name(family),
 features,
 LLVMCodeGenLevelDefault,
 LLVMRelocDefault,
 LLVMCodeModelDefault);
 
+   if (out_triple)
+   *out_triple = triple;
return tm;
 }
 
 static const char *attr_to_str(enum ac_func_attr attr)
 {
switch (attr) {
case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
case AC_FUNC_ATTR_INREG: return "inreg";
case AC_FUNC_ATTR_NOALIAS: return "noalias";
case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 9c6b89bf6c1..0aa803c5bc1 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -61,21 +61,23 @@ enum ac_target_machine_options {
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4),
 };
 
 enum ac_float_mode {
AC_FLOAT_MODE_DEFAULT,
AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
AC_FLOAT_MODE_UNSAFE_FP_MATH,
 };
 
 const char *ac_get_llvm_processor_name(enum radeon_family family);
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum 
ac_target_machine_options tm_options);
+LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
+ enum ac_target_machine_options 
tm_options,
+ const char **out_triple);
 
 LLVMTargetRef ac_get_llvm_target(const char *triple);
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
 bool ac_is_sgpr_param(LLVMValueRef param);
 void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
   int attr_idx, enum ac_func_attr attr);
 void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
unsigned attrib_mask);
 void ac_dump_module(LLVMModuleRef module);
 
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 9d49bc02a81..2e652f7b11e 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -482,21 +482,21 @@ shader_variant_create(struct radv_device *device,
options->dump_shader = radv_can_dump_shader(device, module);
options->dump_preoptir = options->dump_shader &&
 device->instance->debug_flags & 
RADV_DEBUG_PREOPTIR;
options->record_llvm_ir = device->keep_shader_info;
options->tess_offchip_block_dw_size = 
device->tess_offchip_block_dw_size;
 
if (options->supports_spill)
tm_options |= AC_TM_SUPPORTS_SPILL;
if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
tm_options |= AC_TM_SISCHED;
-   tm = ac_create_target_machine(chip_family, tm_options);
+   tm = 

[Mesa-dev] [PATCH 9/9] radeonsi: increase the number of compiler threads depending on the CPU

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

The compiler queue was limited to 3 threads, so shader-db running
on a 16-thread CPU would have a bottleneck on the 3-thread queue.
---
 src/gallium/drivers/radeonsi/si_pipe.c | 39 +-
 src/gallium/drivers/radeonsi/si_pipe.h |  6 ++--
 2 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index f1f1e3ad890..d044b191b71 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -848,21 +848,21 @@ static void si_disk_cache_create(struct si_screen 
*sscreen)
  shader_debug_flags);
free(timestamp_str);
}
}
 }
 
 struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
   const struct pipe_screen_config 
*config)
 {
struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
-   unsigned num_threads, num_compiler_threads, 
num_compiler_threads_lowprio, i;
+   unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i;
 
if (!sscreen) {
return NULL;
}
 
sscreen->ws = ws;
ws->query_info(ws, >info);
 
sscreen->debug_flags = debug_get_flags_option("R600_DEBUG",
debug_options, 0);
@@ -905,40 +905,53 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
(void) mtx_init(>gpu_load_mutex, mtx_plain);
 
if (!si_init_gs_info(sscreen) ||
!si_init_shader_cache(sscreen)) {
FREE(sscreen);
return NULL;
}
 
si_disk_cache_create(sscreen);
 
-   /* Only enable as many threads as we have target machines, but at most
-* the number of CPUs - 1 if there is more than one.
-*/
-   num_threads = sysconf(_SC_NPROCESSORS_ONLN);
-   num_threads = MAX2(1, num_threads - 1);
-   num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->compiler));
-   num_compiler_threads_lowprio =
-   MIN2(num_threads, ARRAY_SIZE(sscreen->compiler_lowp));
+   /* Determine the number of shader compiler threads. */
+   hw_threads = sysconf(_SC_NPROCESSORS_ONLN);
+
+   if (hw_threads >= 12) {
+   num_comp_hi_threads = hw_threads * 3 / 4;
+   num_comp_lo_threads = hw_threads / 3;
+   } else if (hw_threads >= 6) {
+   num_comp_hi_threads = hw_threads - 2;
+   num_comp_lo_threads = hw_threads / 2;
+   } else if (hw_threads >= 2) {
+   num_comp_hi_threads = hw_threads - 1;
+   num_comp_lo_threads = hw_threads / 2;
+   } else {
+   num_comp_hi_threads = 1;
+   num_comp_lo_threads = 1;
+   }
+
+   num_comp_hi_threads = MIN2(num_comp_hi_threads,
+  ARRAY_SIZE(sscreen->compiler));
+   num_comp_lo_threads = MIN2(num_comp_lo_threads,
+  ARRAY_SIZE(sscreen->compiler_lowp));
 
if (!util_queue_init(>shader_compiler_queue, "si_shader",
-32, num_compiler_threads,
+64, num_comp_hi_threads,
 UTIL_QUEUE_INIT_RESIZE_IF_FULL)) {
si_destroy_shader_cache(sscreen);
FREE(sscreen);
return NULL;
}
 
if (!util_queue_init(>shader_compiler_queue_low_priority,
 "si_shader_low",
-32, num_compiler_threads_lowprio,
+64, num_comp_lo_threads,
 UTIL_QUEUE_INIT_RESIZE_IF_FULL |
 UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) {
   si_destroy_shader_cache(sscreen);
   FREE(sscreen);
   return NULL;
}
 
si_handle_env_var_force_family(sscreen);
 
if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
@@ -1075,23 +1088,23 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
SI_CONTEXT_INV_VMEM_L1;
if (sscreen->info.chip_class <= VI) {
sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2;
sscreen->barrier_flags.L2_to_cp |= 
SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
 
if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
sscreen->debug_flags |= DBG_ALL_SHADERS;
 
-   for (i = 0; i < num_compiler_threads; i++)
+   for (i = 0; i < num_comp_hi_threads; i++)
si_init_compiler(sscreen, >compiler[i]);
-   for (i = 0; i < num_compiler_threads_lowprio; i++)
+   for (i = 0; i < num_comp_lo_threads; i++)

[Mesa-dev] [PATCH 5/9] radeonsi: move passmgr into si_compiler

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pipe.c| 30 
 src/gallium/drivers/radeonsi/si_pipe.h|  7 
 src/gallium/drivers/radeonsi/si_shader.h  |  1 +
 .../drivers/radeonsi/si_shader_tgsi_setup.c   | 34 +--
 4 files changed, 32 insertions(+), 40 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 482d667a7d4..d125f5a1d95 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -34,20 +34,26 @@
 #include "util/hash_table.h"
 #include "util/u_log.h"
 #include "util/u_memory.h"
 #include "util/u_suballoc.h"
 #include "util/u_tests.h"
 #include "util/u_upload_mgr.h"
 #include "util/xmlconfig.h"
 #include "vl/vl_decoder.h"
 #include "driver_ddebug/dd_util.h"
 
+#include 
+#include 
+#if HAVE_LLVM >= 0x0700
+#include 
+#endif
+
 static const struct debug_named_value debug_options[] = {
/* Shader logging options: */
{ "vs", DBG(VS), "Print vertex shaders" },
{ "ps", DBG(PS), "Print pixel shaders" },
{ "gs", DBG(GS), "Print geometry shaders" },
{ "tcs", DBG(TCS), "Print tessellation control shaders" },
{ "tes", DBG(TES), "Print tessellation evaluation shaders" },
{ "cs", DBG(CS), "Print compute shaders" },
{ "noir", DBG(NO_IR), "Don't print the LLVM IR"},
{ "notgsi", DBG(NO_TGSI), "Don't print the TGSI"},
@@ -114,24 +120,48 @@ static void si_init_compiler(struct si_screen *sscreen,
 
compiler->tm = ac_create_target_machine(sscreen->info.family,
tm_options, >triple);
if (!compiler->tm)
return;
 
compiler->target_library_info =
gallivm_create_target_library_info(compiler->triple);
if (!compiler->target_library_info)
return;
+
+   compiler->passmgr = LLVMCreatePassManager();
+   if (!compiler->passmgr)
+   return;
+
+   LLVMAddTargetLibraryInfo(compiler->target_library_info,
+compiler->passmgr);
+
+   /* Add LLVM passes into the pass manager. */
+   if (sscreen->debug_flags & DBG(CHECK_IR))
+   LLVMAddVerifierPass(compiler->passmgr);
+
+   LLVMAddAlwaysInlinerPass(compiler->passmgr);
+   /* This pass should eliminate all the load and store instructions. */
+   LLVMAddPromoteMemoryToRegisterPass(compiler->passmgr);
+   LLVMAddScalarReplAggregatesPass(compiler->passmgr);
+   LLVMAddLICMPass(compiler->passmgr);
+   LLVMAddAggressiveDCEPass(compiler->passmgr);
+   LLVMAddCFGSimplificationPass(compiler->passmgr);
+   /* This is recommended by the instruction combining pass. */
+   LLVMAddEarlyCSEMemSSAPass(compiler->passmgr);
+   LLVMAddInstructionCombiningPass(compiler->passmgr);
 }
 
 static void si_destroy_compiler(struct si_compiler *compiler)
 {
+   if (compiler->passmgr)
+   LLVMDisposePassManager(compiler->passmgr);
if (compiler->target_library_info)

gallivm_dispose_target_library_info(compiler->target_library_info);
if (compiler->tm)
LLVMDisposeTargetMachine(compiler->tm);
 }
 
 /*
  * pipe_context
  */
 static void si_destroy_context(struct pipe_context *context)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 54c9b725fcb..a67786c84d9 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1384,27 +1384,20 @@ static inline struct si_shader* si_get_vs_state(struct 
si_context *sctx)
struct si_shader_ctx_state *vs = si_get_vs(sctx);
return vs->current ? vs->current : NULL;
 }
 
 static inline bool si_can_dump_shader(struct si_screen *sscreen,
  unsigned processor)
 {
return sscreen->debug_flags & (1 << processor);
 }
 
-static inline bool si_extra_shader_checks(struct si_screen *sscreen,
- unsigned processor)
-{
-   return (sscreen->debug_flags & DBG(CHECK_IR)) ||
-  si_can_dump_shader(sscreen, processor);
-}
-
 static inline bool si_get_strmout_en(struct si_context *sctx)
 {
return sctx->streamout.streamout_enabled ||
   sctx->streamout.prims_gen_query_enabled;
 }
 
 static inline unsigned
 si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size)
 {
unsigned alignment, tcc_cache_line_size;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 8761bc7e7c9..a0122d23910 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -309,20 +309,21 @@ enum {
SI_FIX_FETCH_RGB_16_INT,
 };
 
 struct si_shader;
 
 /* Per-thread persistent LLVM objects. */
 struct si_compiler {
LLVMTargetMachineRef

[Mesa-dev] [PATCH 6/9] radeonsi: move data_layout into si_compiler

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pipe.c  | 9 +
 src/gallium/drivers/radeonsi/si_shader.h| 1 +
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 7 +--
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index d125f5a1d95..09b9f588a6f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -142,24 +142,33 @@ static void si_init_compiler(struct si_screen *sscreen,
LLVMAddAlwaysInlinerPass(compiler->passmgr);
/* This pass should eliminate all the load and store instructions. */
LLVMAddPromoteMemoryToRegisterPass(compiler->passmgr);
LLVMAddScalarReplAggregatesPass(compiler->passmgr);
LLVMAddLICMPass(compiler->passmgr);
LLVMAddAggressiveDCEPass(compiler->passmgr);
LLVMAddCFGSimplificationPass(compiler->passmgr);
/* This is recommended by the instruction combining pass. */
LLVMAddEarlyCSEMemSSAPass(compiler->passmgr);
LLVMAddInstructionCombiningPass(compiler->passmgr);
+
+   /* Get the data layout. */
+   LLVMTargetDataRef data_layout = 
LLVMCreateTargetDataLayout(compiler->tm);
+   if (!data_layout)
+   return;
+   compiler->data_layout = LLVMCopyStringRepOfTargetData(data_layout);
+   LLVMDisposeTargetData(data_layout);
 }
 
 static void si_destroy_compiler(struct si_compiler *compiler)
 {
+   if (compiler->data_layout)
+   LLVMDisposeMessage((char*)compiler->data_layout);
if (compiler->passmgr)
LLVMDisposePassManager(compiler->passmgr);
if (compiler->target_library_info)

gallivm_dispose_target_library_info(compiler->target_library_info);
if (compiler->tm)
LLVMDisposeTargetMachine(compiler->tm);
 }
 
 /*
  * pipe_context
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index a0122d23910..94366f41204 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -308,20 +308,21 @@ enum {
SI_FIX_FETCH_RGB_16,
SI_FIX_FETCH_RGB_16_INT,
 };
 
 struct si_shader;
 
 /* Per-thread persistent LLVM objects. */
 struct si_compiler {
LLVMTargetMachineReftm;
const char  *triple;
+   const char  *data_layout;
LLVMTargetLibraryInfoReftarget_library_info;
LLVMPassManagerRef  passmgr;
 };
 
 /* State of the context creating the shader object. */
 struct si_compiler_ctx_state {
/* Should only be used by si_init_shader_selector_async and
 * si_build_shader_variant if thread_index == -1 (non-threaded). */
struct si_compiler  *compiler;
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 29b1e50dc47..a4c2a1f27ab 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -998,26 +998,21 @@ void si_llvm_context_init(struct si_shader_context *ctx,
 * helper functions in the gallivm module.
 */
memset(ctx, 0, sizeof(*ctx));
ctx->screen = sscreen;
ctx->compiler = compiler;
 
ctx->gallivm.context = LLVMContextCreate();
ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
ctx->gallivm.context);
LLVMSetTarget(ctx->gallivm.module, compiler->triple);
-
-   LLVMTargetDataRef data_layout = 
LLVMCreateTargetDataLayout(compiler->tm);
-   char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
-   LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
-   LLVMDisposeTargetData(data_layout);
-   LLVMDisposeMessage(data_layout_str);
+   LLVMSetDataLayout(ctx->gallivm.module, compiler->data_layout);
 
bool unsafe_fpmath = (sscreen->debug_flags & DBG(UNSAFE_MATH)) != 0;
enum ac_float_mode float_mode =
unsafe_fpmath ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
 
ctx->gallivm.builder = ac_create_builder(ctx->gallivm.context,
 float_mode);
 
ac_llvm_context_init(>ac, ctx->gallivm.context,
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/9] radeonsi: add struct si_compiler containing LLVMTargetMachineRef

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

It will contain more variables.
---
 src/gallium/drivers/radeonsi/si_compute.c |  8 +--
 src/gallium/drivers/radeonsi/si_pipe.c| 52 ---
 src/gallium/drivers/radeonsi/si_pipe.h|  6 +-
 src/gallium/drivers/radeonsi/si_shader.c  | 66 +--
 src/gallium/drivers/radeonsi/si_shader.h  | 13 ++--
 .../drivers/radeonsi/si_shader_internal.h |  6 +-
 .../drivers/radeonsi/si_shader_tgsi_setup.c   | 13 ++--
 .../drivers/radeonsi/si_state_shaders.c   | 28 
 8 files changed, 101 insertions(+), 91 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 69c3dce0124..e95e79c7b46 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -79,27 +79,27 @@ static void code_object_to_config(const amd_kernel_code_t 
*code_object,
out_config->scratch_bytes_per_wave =
align(code_object->workitem_private_segment_byte_size * 64, 
1024);
 }
 
 /* Asynchronous compute shader compilation. */
 static void si_create_compute_state_async(void *job, int thread_index)
 {
struct si_compute *program = (struct si_compute *)job;
struct si_shader *shader = >shader;
struct si_shader_selector sel;
-   LLVMTargetMachineRef tm;
+   struct si_compiler *compiler;
struct pipe_debug_callback *debug = >compiler_ctx_state.debug;
 
assert(!debug->debug_message || debug->async);
assert(thread_index >= 0);
-   assert(thread_index < ARRAY_SIZE(program->screen->tm));
-   tm = program->screen->tm[thread_index];
+   assert(thread_index < ARRAY_SIZE(program->screen->compiler));
+   compiler = >screen->compiler[thread_index];
 
memset(, 0, sizeof(sel));
 
sel.screen = program->screen;
 
if (program->ir_type == PIPE_SHADER_IR_TGSI) {
tgsi_scan_shader(program->ir.tgsi, );
sel.tokens = program->ir.tgsi;
} else {
assert(program->ir_type == PIPE_SHADER_IR_NIR);
@@ -116,21 +116,21 @@ static void si_create_compute_state_async(void *job, int 
thread_index)
 >active_const_and_shader_buffers,
 >active_samplers_and_images);
 
program->shader.selector = 
program->shader.is_monolithic = true;
program->uses_grid_size = sel.info.uses_grid_size;
program->uses_block_size = sel.info.uses_block_size;
program->uses_bindless_samplers = sel.info.uses_bindless_samplers;
program->uses_bindless_images = sel.info.uses_bindless_images;
 
-   if (si_shader_create(program->screen, tm, >shader, debug)) {
+   if (si_shader_create(program->screen, compiler, >shader, 
debug)) {
program->shader.compilation_failed = true;
} else {
bool scratch_enabled = shader->config.scratch_bytes_per_wave > 
0;
unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS +
  (sel.info.uses_grid_size ? 3 : 0) +
  (sel.info.uses_block_size ? 3 : 0);
 
shader->config.rsrc1 =
S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) |
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 3de843af11c..fcf3556bcc1 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -95,20 +95,38 @@ static const struct debug_named_value debug_options[] = {
 
/* Tests: */
{ "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
{ "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and 
exit." },
{ "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault 
test and exit." },
{ "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM 
fault test and exit." },
 
DEBUG_NAMED_VALUE_END /* must be last */
 };
 
+static void si_init_compiler(struct si_screen *sscreen,
+struct si_compiler *compiler)
+{
+   enum ac_target_machine_options tm_options =
+   (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
+   (sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 
0) |
+   (sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 
0) |
+   (!sscreen->llvm_has_working_vgpr_indexing ? 
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0);
+
+   compiler->tm = ac_create_target_machine(sscreen->info.family, 
tm_options);
+}
+
+static void si_destroy_compiler(struct si_compiler *compiler)
+{
+   if (compiler->tm)
+   LLVMDisposeTargetMachine(compiler->tm);
+}
+
 /*
  * pipe_context
  */
 static void si_destroy_context(struct pipe_context *context)

[Mesa-dev] [PATCH 4/9] radeonsi: move target_library_info into si_compiler

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pipe.c  | 10 ++
 src/gallium/drivers/radeonsi/si_shader.h|  1 +
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c |  7 ++-
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index ad813888597..482d667a7d4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -22,20 +22,21 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "si_pipe.h"
 #include "si_public.h"
 #include "si_shader_internal.h"
 #include "sid.h"
 
 #include "radeon/radeon_uvd.h"
+#include "gallivm/lp_bld_misc.h"
 #include "util/disk_cache.h"
 #include "util/hash_table.h"
 #include "util/u_log.h"
 #include "util/u_memory.h"
 #include "util/u_suballoc.h"
 #include "util/u_tests.h"
 #include "util/u_upload_mgr.h"
 #include "util/xmlconfig.h"
 #include "vl/vl_decoder.h"
 #include "driver_ddebug/dd_util.h"
@@ -106,24 +107,33 @@ static void si_init_compiler(struct si_screen *sscreen,
 struct si_compiler *compiler)
 {
enum ac_target_machine_options tm_options =
(sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
(sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 
0) |
(sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 
0) |
(!sscreen->llvm_has_working_vgpr_indexing ? 
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0);
 
compiler->tm = ac_create_target_machine(sscreen->info.family,
tm_options, >triple);
+   if (!compiler->tm)
+   return;
+
+   compiler->target_library_info =
+   gallivm_create_target_library_info(compiler->triple);
+   if (!compiler->target_library_info)
+   return;
 }
 
 static void si_destroy_compiler(struct si_compiler *compiler)
 {
+   if (compiler->target_library_info)
+   
gallivm_dispose_target_library_info(compiler->target_library_info);
if (compiler->tm)
LLVMDisposeTargetMachine(compiler->tm);
 }
 
 /*
  * pipe_context
  */
 static void si_destroy_context(struct pipe_context *context)
 {
struct si_context *sctx = (struct si_context *)context;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index e6205a204c1..8761bc7e7c9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -308,20 +308,21 @@ enum {
SI_FIX_FETCH_RGB_16,
SI_FIX_FETCH_RGB_16_INT,
 };
 
 struct si_shader;
 
 /* Per-thread persistent LLVM objects. */
 struct si_compiler {
LLVMTargetMachineReftm;
const char  *triple;
+   LLVMTargetLibraryInfoReftarget_library_info;
 };
 
 /* State of the context creating the shader object. */
 struct si_compiler_ctx_state {
/* Should only be used by si_init_shader_selector_async and
 * si_build_shader_variant if thread_index == -1 (non-threaded). */
struct si_compiler  *compiler;
 
/* Used if thread_index == -1 or if debug.async is true. */
struct pipe_debug_callback  debug;
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index f354417b89e..86366f4063c 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1206,33 +1206,31 @@ void si_llvm_create_func(struct si_shader_context *ctx,
default:
unreachable("Unhandle shader type");
}
 
LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
 }
 
 void si_llvm_optimize_module(struct si_shader_context *ctx)
 {
struct gallivm_state *gallivm = >gallivm;
-   LLVMTargetLibraryInfoRef target_library_info;
 
/* Dump LLVM IR before any optimization passes */
if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
si_can_dump_shader(ctx->screen, ctx->type))
LLVMDumpModule(ctx->gallivm.module);
 
/* Create the pass manager */
gallivm->passmgr = LLVMCreatePassManager();
 
-   target_library_info =
-   gallivm_create_target_library_info(ctx->compiler->triple);
-   LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
+   LLVMAddTargetLibraryInfo(ctx->compiler->target_library_info,
+gallivm->passmgr);
 
if (si_extra_shader_checks(ctx->screen, ctx->type))
LLVMAddVerifierPass(gallivm->passmgr);
 
LLVMAddAlwaysInlinerPass(gallivm->passmgr);
 
/* This pass should eliminate all the load and store instructions */

[Mesa-dev] [PATCH 0/9] RadeonSI LLVM crash workaround for Ubuntu 18.04

2018-04-16 Thread Marek Olšák
Hi,

This cleanup is motivated by a Mesa/LLVM crash on Ubuntu 18.04.
It happens inside gallivm_dispose_target_library_info and only
LLVM 5.0 and 6.0 seem to be affected.

The workaround is to move target_library_info into the context
(1 instance) and the screen (1 instance per compiler thread) to reduce
the number of times it's created and deleted, and then simply leak
it because we can't destroy it without a crash.

The series might also result in some non-measurable compile time
savings, andd the last patch decreases shader-db run times
significantly on many-core CPUs. (there is no expected effect on apps)

Please review.

Thanks,
Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/9] radeonsi: use si_compiler::triple in si_llvm_optimize_module

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index cad81487d7e..f354417b89e 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1206,32 +1206,32 @@ void si_llvm_create_func(struct si_shader_context *ctx,
default:
unreachable("Unhandle shader type");
}
 
LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
 }
 
 void si_llvm_optimize_module(struct si_shader_context *ctx)
 {
struct gallivm_state *gallivm = >gallivm;
-   const char *triple = LLVMGetTarget(gallivm->module);
LLVMTargetLibraryInfoRef target_library_info;
 
/* Dump LLVM IR before any optimization passes */
if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
si_can_dump_shader(ctx->screen, ctx->type))
LLVMDumpModule(ctx->gallivm.module);
 
/* Create the pass manager */
gallivm->passmgr = LLVMCreatePassManager();
 
-   target_library_info = gallivm_create_target_library_info(triple);
+   target_library_info =
+   gallivm_create_target_library_info(ctx->compiler->triple);
LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
 
if (si_extra_shader_checks(ctx->screen, ctx->type))
LLVMAddVerifierPass(gallivm->passmgr);
 
LLVMAddAlwaysInlinerPass(gallivm->passmgr);
 
/* This pass should eliminate all the load and store instructions */
LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
 
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] clover: Fix host access validation for sub-buffer creation

2018-04-16 Thread Francisco Jerez
Aaron Watry  writes:

> On Mon, Apr 16, 2018, 5:24 PM Francisco Jerez  wrote:
>
>> Aaron Watry  writes:
>>
>> >   From CL 1.2 Section 5.2.1:
>> > CL_INVALID_VALUE if buffer was created with CL_MEM_HOST_WRITE_ONLY
>> and
>> > flags specify CL_MEM_HOST_READ_ONLY , or if buffer was created with
>> > CL_MEM_HOST_READ_ONLY and flags specify CL_MEM_HOST_WRITE_ONLY , or
>> if
>> > buffer was created with CL_MEM_HOST_NO_ACCESS and flags specify
>> > CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_WRITE_ONLY .
>> >
>> > Fixes CL 1.2 CTS test/api get_buffer_info
>> >
>>
>> What combination of flags is the test-case providing for both the
>> parent and sub buffer?
>>
>
> The original motivation for this was a CTS test that was creating a sub
> buffer with flags of:
> CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE
>
> With a parent buffer created as:
> CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE
>
> Which according to my reading of the spec should be allowed.
>

Right, I see.

>>
>> > Signed-off-by: Aaron Watry 
>> > Cc: Francisco Jerez 
>> > ---
>> >  src/gallium/state_trackers/clover/api/memory.cpp | 8 ++--
>> >  1 file changed, 6 insertions(+), 2 deletions(-)
>> >
>> > diff --git a/src/gallium/state_trackers/clover/api/memory.cpp
>> b/src/gallium/state_trackers/clover/api/memory.cpp
>> > index 9b3cd8b1f5..451e8a8c56 100644
>> > --- a/src/gallium/state_trackers/clover/api/memory.cpp
>> > +++ b/src/gallium/state_trackers/clover/api/memory.cpp
>> > @@ -57,10 +57,14 @@ namespace {
>> >parent.flags() &
>> host_access_flags) |
>> >   (parent.flags() & host_ptr_flags));
>> >
>> > - if (~flags & parent.flags() &
>> > - ((dev_access_flags & ~CL_MEM_READ_WRITE) |
>> host_access_flags))
>> > + if (~flags & parent.flags() & (dev_access_flags &
>> ~CL_MEM_READ_WRITE))
>> >  throw error(CL_INVALID_VALUE);
>> >

I think you want to keep the hunk above and then do something along the
lines of:

+ if (!(flags & CL_MEM_HOST_NO_ACCESS) &&
+ (~flags & parent.flags() & host_access_flags))
+throw error(CL_INVALID_VALUE);

>> > + //Check if new host access flags cause a mismatch between
>> host-read/write-only.
>> > + const cl_mem_flags new_flags = flags & ~(parent.flags()) &
>> ~CL_MEM_HOST_NO_ACCESS;
>> > + if (new_flags & host_access_flags & parent.flags())
>> > +throw error (CL_INVALID_VALUE);
>> > +
>>
>> This doesn't look correct to me, the condition will always evaluate to
>> zero, you're calculating the conjunction of ~parent.flags() and
>> parent.flags() which is zero, so the error will never be emitted.
>>
>
> I'll see what I can do. I agree with a fresh reading that it looks fishy at
> best.
>
> --Aaron
>
>>
>> >   return flags;
>> >
>> >} else {
>> > --
>> > 2.14.1
>>


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/9] radeonsi: don't use an indirect table for state atoms

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_blit.c|  12 +--
 src/gallium/drivers/radeonsi/si_clear.c   |  14 +--
 src/gallium/drivers/radeonsi/si_compute.c |   8 +-
 src/gallium/drivers/radeonsi/si_cp_dma.c  |   2 +-
 src/gallium/drivers/radeonsi/si_descriptors.c |  12 +--
 src/gallium/drivers/radeonsi/si_gfx_cs.c  |  34 +++---
 src/gallium/drivers/radeonsi/si_pipe.h|  17 ---
 src/gallium/drivers/radeonsi/si_query.c   |   4 +-
 src/gallium/drivers/radeonsi/si_state.c   | 100 +-
 src/gallium/drivers/radeonsi/si_state.h   |  43 
 src/gallium/drivers/radeonsi/si_state_draw.c  |  14 +--
 .../drivers/radeonsi/si_state_shaders.c   |  30 +++---
 .../drivers/radeonsi/si_state_streamout.c |  12 +--
 .../drivers/radeonsi/si_state_viewport.c  |  18 ++--
 14 files changed, 148 insertions(+), 172 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index d6eab58b3a8..bbdb5e8f271 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -78,21 +78,21 @@ void si_blitter_begin(struct si_context *sctx, enum 
si_blitter_op op)
 }
 
 void si_blitter_end(struct si_context *sctx)
 {
sctx->render_cond_force_off = false;
 
/* Restore shader pointers because the VS blit shader changed all
 * non-global VS user SGPRs. */
sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX);
sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL;
-   si_mark_atom_dirty(sctx, >shader_pointers.atom);
+   si_mark_atom_dirty(sctx, >atoms.s.shader_pointers);
 }
 
 static unsigned u_max_sample(struct pipe_resource *r)
 {
return r->nr_samples ? r->nr_samples - 1 : 0;
 }
 
 static unsigned
 si_blit_dbcb_copy(struct si_context *sctx,
  struct r600_texture *src,
@@ -102,21 +102,21 @@ si_blit_dbcb_copy(struct si_context *sctx,
  unsigned first_sample, unsigned last_sample)
 {
struct pipe_surface surf_tmpl = {{0}};
unsigned layer, sample, checked_last_layer, max_layer;
unsigned fully_copied_levels = 0;
 
if (planes & PIPE_MASK_Z)
sctx->dbcb_depth_copy_enabled = true;
if (planes & PIPE_MASK_S)
sctx->dbcb_stencil_copy_enabled = true;
-   si_mark_atom_dirty(sctx, >db_render_state);
+   si_mark_atom_dirty(sctx, >atoms.s.db_render_state);
 
assert(sctx->dbcb_depth_copy_enabled || 
sctx->dbcb_stencil_copy_enabled);
 
sctx->decompression_enabled = true;
 
while (level_mask) {
unsigned level = u_bit_scan(_mask);
 
/* The smaller the mipmap level, the less layers there are
 * as far as 3D textures are concerned. */
@@ -133,42 +133,42 @@ si_blit_dbcb_copy(struct si_context *sctx,
surf_tmpl.u.tex.last_layer = layer;
 
zsurf = sctx->b.create_surface(>b, 
>resource.b.b, _tmpl);
 
surf_tmpl.format = dst->resource.b.b.format;
cbsurf = sctx->b.create_surface(>b, 
>resource.b.b, _tmpl);
 
for (sample = first_sample; sample <= last_sample; 
sample++) {
if (sample != sctx->dbcb_copy_sample) {
sctx->dbcb_copy_sample = sample;
-   si_mark_atom_dirty(sctx, 
>db_render_state);
+   si_mark_atom_dirty(sctx, 
>atoms.s.db_render_state);
}
 
si_blitter_begin(sctx, SI_DECOMPRESS);

util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample,
  
sctx->custom_dsa_flush, 1.0f);
si_blitter_end(sctx);
}
 
pipe_surface_reference(, NULL);
pipe_surface_reference(, NULL);
}
 
if (first_layer == 0 && last_layer >= max_layer &&
first_sample == 0 && last_sample >= 
u_max_sample(>resource.b.b))
fully_copied_levels |= 1u << level;
}
 
sctx->decompression_enabled = false;
sctx->dbcb_depth_copy_enabled = false;
sctx->dbcb_stencil_copy_enabled = false;
-   si_mark_atom_dirty(sctx, >db_render_state);
+   si_mark_atom_dirty(sctx, >atoms.s.db_render_state);
 
return fully_copied_levels;
 }
 
 void si_blit_decompress_depth(struct pipe_context *ctx,
  struct r600_texture *texture,
  struct r600_texture *staging,
  unsigned first_level, unsigned last_level,
  unsigned 

[Mesa-dev] [PATCH 4/9] radeonsi: remove si_atom::id

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pipe.h   | 21 ++--
 src/gallium/drivers/radeonsi/si_state.c  | 14 -
 src/gallium/drivers/radeonsi/si_state.h  |  6 +++---
 src/gallium/drivers/radeonsi/si_state_draw.c |  2 +-
 4 files changed, 15 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 37ff05082cf..41f88b9688e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1332,44 +1332,45 @@ si_context_add_resource_size(struct si_context *sctx, 
struct pipe_resource *r)
sctx->gtt += res->gart_usage;
}
 }
 
 static inline void
 si_invalidate_draw_sh_constants(struct si_context *sctx)
 {
sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN;
 }
 
+static inline unsigned
+si_get_atom_bit(struct si_context *sctx, struct si_atom *atom)
+{
+   return 1 << (atom - sctx->atoms.array);
+}
+
 static inline void
-si_set_atom_dirty(struct si_context *sctx,
- struct si_atom *atom, bool dirty)
+si_set_atom_dirty(struct si_context *sctx, struct si_atom *atom, bool dirty)
 {
-   unsigned bit = 1 << atom->id;
+   unsigned bit = si_get_atom_bit(sctx, atom);
 
if (dirty)
sctx->dirty_atoms |= bit;
else
sctx->dirty_atoms &= ~bit;
 }
 
 static inline bool
-si_is_atom_dirty(struct si_context *sctx,
-struct si_atom *atom)
+si_is_atom_dirty(struct si_context *sctx, struct si_atom *atom)
 {
-   unsigned bit = 1 << atom->id;
-
-   return sctx->dirty_atoms & bit;
+   return (sctx->dirty_atoms & si_get_atom_bit(sctx, atom)) != 0;
 }
 
 static inline void
-si_mark_atom_dirty(struct si_context *sctx,
-  struct si_atom *atom)
+si_mark_atom_dirty(struct si_context *sctx, struct si_atom *atom)
 {
si_set_atom_dirty(sctx, atom, true);
 }
 
 static inline struct si_shader_ctx_state *si_get_vs(struct si_context *sctx)
 {
if (sctx->gs_shader.cso)
return >gs_shader;
if (sctx->tes_shader.cso)
return >tes_shader;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 8841077cca1..568b11d1eea 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -26,33 +26,25 @@
 #include "gfx9d.h"
 #include "si_query.h"
 
 #include "util/u_dual_blend.h"
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
 #include "util/u_resource.h"
 #include "util/u_upload_mgr.h"
 
-/* Initialize an external atom (owned by ../radeon). */
-static void
-si_init_external_atom(struct si_context *sctx, struct si_atom *atom)
-{
-   atom->id = atom - sctx->atoms.array;
-}
-
 /* Initialize an atom owned by radeonsi.  */
 void si_init_atom(struct si_context *sctx, struct si_atom *atom,
  void (*emit_func)(struct si_context *ctx, struct si_atom 
*state))
 {
atom->emit = emit_func;
-   atom->id = atom - sctx->atoms.array;
 }
 
 static unsigned si_map_swizzle(unsigned swizzle)
 {
switch (swizzle) {
case PIPE_SWIZZLE_Y:
return V_008F0C_SQ_SEL_Y;
case PIPE_SWIZZLE_Z:
return V_008F0C_SQ_SEL_Z;
case PIPE_SWIZZLE_W:
@@ -4519,26 +4511,20 @@ static void *si_create_blend_custom(struct si_context 
*sctx, unsigned mode)
memset(, 0, sizeof(blend));
blend.independent_blend_enable = true;
blend.rt[0].colormask = 0xf;
return si_create_blend_state_mode(>b, , mode);
 }
 
 static void si_init_config(struct si_context *sctx);
 
 void si_init_state_functions(struct si_context *sctx)
 {
-   si_init_external_atom(sctx, >atoms.s.render_cond);
-   si_init_external_atom(sctx, >atoms.s.streamout_begin);
-   si_init_external_atom(sctx, >atoms.s.streamout_enable);
-   si_init_external_atom(sctx, >atoms.s.scissors);
-   si_init_external_atom(sctx, >atoms.s.viewports);
-
si_init_atom(sctx, >atoms.s.framebuffer, 
si_emit_framebuffer_state);
si_init_atom(sctx, >atoms.s.msaa_sample_locs, 
si_emit_msaa_sample_locs);
si_init_atom(sctx, >atoms.s.db_render_state, 
si_emit_db_render_state);
si_init_atom(sctx, >atoms.s.dpbb_state, si_emit_dpbb_state);
si_init_atom(sctx, >atoms.s.msaa_config, si_emit_msaa_config);
si_init_atom(sctx, >atoms.s.sample_mask, si_emit_sample_mask);
si_init_atom(sctx, >atoms.s.cb_render_state, 
si_emit_cb_render_state);
si_init_atom(sctx, >atoms.s.blend_color, si_emit_blend_color);
si_init_atom(sctx, >atoms.s.clip_regs, si_emit_clip_regs);
si_init_atom(sctx, >atoms.s.clip_state, si_emit_clip_state);
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 6c9899d9468..4ee69b95bd3 100644
--- 

[Mesa-dev] [PATCH 9/9] radeonsi: rename r600_texture::resource to buffer

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

r600_resource could be renamed to si_buffer.
---
 src/gallium/drivers/radeon/radeon_vcn_dec.c   |   2 +-
 src/gallium/drivers/radeonsi/cik_sdma.c   |  22 +--
 src/gallium/drivers/radeonsi/si_blit.c|  50 +++---
 src/gallium/drivers/radeonsi/si_clear.c   |  36 ++--
 src/gallium/drivers/radeonsi/si_descriptors.c |  24 +--
 src/gallium/drivers/radeonsi/si_dma.c |  16 +-
 src/gallium/drivers/radeonsi/si_pipe.h|   4 +-
 src/gallium/drivers/radeonsi/si_state.c   |  70 
 src/gallium/drivers/radeonsi/si_texture.c | 168 +-
 src/gallium/drivers/radeonsi/si_uvd.c |  10 +-
 10 files changed, 201 insertions(+), 201 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c 
b/src/gallium/drivers/radeon/radeon_vcn_dec.c
index 46ad2853f1c..cee32d482f2 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_dec.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c
@@ -979,21 +979,21 @@ static struct pb_buffer *rvcn_dec_message_decode(struct 
radeon_decoder *dec,
break;
}
default:
assert(0);
return NULL;
}
 
if (dec->ctx.res)
decode->hw_ctxt_size = dec->ctx.res->buf->size;
 
-   return luma->resource.buf;
+   return luma->buffer.buf;
 }
 
 static void rvcn_dec_message_destroy(struct radeon_decoder *dec)
 {
rvcn_dec_message_header_t *header = dec->msg;
 
memset(dec->msg, 0, sizeof(rvcn_dec_message_header_t));
header->header_size = sizeof(rvcn_dec_message_header_t);
header->total_size = sizeof(rvcn_dec_message_header_t) -
sizeof(rvcn_dec_message_index_t);
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c 
b/src/gallium/drivers/radeonsi/cik_sdma.c
index 690e7ff5499..7a4b479b7eb 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -140,63 +140,63 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
  unsigned dst_level,
  unsigned dstx, unsigned dsty, unsigned dstz,
  struct pipe_resource *src,
  unsigned src_level,
  const struct pipe_box *src_box)
 {
struct radeon_info *info = >screen->info;
struct r600_texture *rsrc = (struct r600_texture*)src;
struct r600_texture *rdst = (struct r600_texture*)dst;
unsigned bpp = rdst->surface.bpe;
-   uint64_t dst_address = rdst->resource.gpu_address +
+   uint64_t dst_address = rdst->buffer.gpu_address +
   rdst->surface.u.legacy.level[dst_level].offset;
-   uint64_t src_address = rsrc->resource.gpu_address +
+   uint64_t src_address = rsrc->buffer.gpu_address +
   rsrc->surface.u.legacy.level[src_level].offset;
unsigned dst_mode = rdst->surface.u.legacy.level[dst_level].mode;
unsigned src_mode = rsrc->surface.u.legacy.level[src_level].mode;
unsigned dst_tile_index = 
rdst->surface.u.legacy.tiling_index[dst_level];
unsigned src_tile_index = 
rsrc->surface.u.legacy.tiling_index[src_level];
unsigned dst_tile_mode = info->si_tile_mode_array[dst_tile_index];
unsigned src_tile_mode = info->si_tile_mode_array[src_tile_index];
unsigned dst_micro_mode = G_009910_MICRO_TILE_MODE_NEW(dst_tile_mode);
unsigned src_micro_mode = G_009910_MICRO_TILE_MODE_NEW(src_tile_mode);
unsigned dst_tile_swizzle = dst_mode == RADEON_SURF_MODE_2D ?
rdst->surface.tile_swizzle : 0;
unsigned src_tile_swizzle = src_mode == RADEON_SURF_MODE_2D ?
rsrc->surface.tile_swizzle : 0;
unsigned dst_pitch = rdst->surface.u.legacy.level[dst_level].nblk_x;
unsigned src_pitch = rsrc->surface.u.legacy.level[src_level].nblk_x;
uint64_t dst_slice_pitch = 
((uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4) / bpp;
uint64_t src_slice_pitch = 
((uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4) / bpp;
-   unsigned dst_width = minify_as_blocks(rdst->resource.b.b.width0,
+   unsigned dst_width = minify_as_blocks(rdst->buffer.b.b.width0,
  dst_level, rdst->surface.blk_w);
-   unsigned src_width = minify_as_blocks(rsrc->resource.b.b.width0,
+   unsigned src_width = minify_as_blocks(rsrc->buffer.b.b.width0,
  src_level, rsrc->surface.blk_w);
-   unsigned dst_height = minify_as_blocks(rdst->resource.b.b.height0,
+   unsigned dst_height = minify_as_blocks(rdst->buffer.b.b.height0,
   dst_level, rdst->surface.blk_h);
-   unsigned src_height = 

[Mesa-dev] [PATCH 8/9] radeonsi: use r600_resource() typecast helper

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/radeon_vcn_dec.c   |  4 ++--
 src/gallium/drivers/radeon/radeon_video.c |  5 ++--
 src/gallium/drivers/radeonsi/si_buffer.c  | 20 ++--
 src/gallium/drivers/radeonsi/si_clear.c   |  2 +-
 src/gallium/drivers/radeonsi/si_compute.c | 14 +--
 src/gallium/drivers/radeonsi/si_cp_dma.c  |  6 ++---
 src/gallium/drivers/radeonsi/si_descriptors.c | 24 +--
 src/gallium/drivers/radeonsi/si_dma.c |  4 ++--
 src/gallium/drivers/radeonsi/si_gfx_cs.c  |  5 ++--
 src/gallium/drivers/radeonsi/si_pipe.c| 18 +++---
 src/gallium/drivers/radeonsi/si_pipe.h| 19 +++
 src/gallium/drivers/radeonsi/si_pm4.c |  2 +-
 src/gallium/drivers/radeonsi/si_query.c   |  6 ++---
 src/gallium/drivers/radeonsi/si_shader.c  |  3 +--
 src/gallium/drivers/radeonsi/si_state.c   |  2 +-
 src/gallium/drivers/radeonsi/si_state_draw.c  |  6 ++---
 .../drivers/radeonsi/si_state_shaders.c   |  8 +++
 .../drivers/radeonsi/si_state_streamout.c |  4 ++--
 src/gallium/drivers/radeonsi/si_texture.c |  6 ++---
 19 files changed, 80 insertions(+), 78 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c 
b/src/gallium/drivers/radeon/radeon_vcn_dec.c
index 046b371384a..46ad2853f1c 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_dec.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c
@@ -846,22 +846,22 @@ static struct pb_buffer *rvcn_dec_message_decode(struct 
radeon_decoder *dec,
index->filled = 0;
 
decode->stream_type = dec->stream_type;
decode->decode_flags = 0x1;
decode->width_in_samples = dec->base.width;
decode->height_in_samples = dec->base.height;
 
decode->bsd_size = align(dec->bs_size, 128);
decode->dpb_size = dec->dpb.res->buf->size;
decode->dt_size =
-   ((struct r600_resource *)((struct vl_video_buffer 
*)target)->resources[0])->buf->size +
-   ((struct r600_resource *)((struct vl_video_buffer 
*)target)->resources[1])->buf->size;
+   r600_resource(((struct vl_video_buffer 
*)target)->resources[0])->buf->size +
+   r600_resource(((struct vl_video_buffer 
*)target)->resources[1])->buf->size;
 
decode->sct_size = 0;
decode->sc_coeff_size = 0;
 
decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE;
decode->db_pitch = align(dec->base.width, 32);
decode->db_surf_tile_config = 0;
 
decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * 
luma->surface.blk_w;
decode->dt_uv_pitch = decode->dt_pitch / 2;
diff --git a/src/gallium/drivers/radeon/radeon_video.c 
b/src/gallium/drivers/radeon/radeon_video.c
index a2947df9590..f59b44736aa 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -56,23 +56,22 @@ unsigned si_vid_alloc_stream_handle()
 bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer 
*buffer,
  unsigned size, unsigned usage)
 {
memset(buffer, 0, sizeof(*buffer));
buffer->usage = usage;
 
/* Hardware buffer placement restrictions require the kernel to be
 * able to move buffers around individually, so request a
 * non-sub-allocated buffer.
 */
-   buffer->res = (struct r600_resource *)
-   pipe_buffer_create(screen, PIPE_BIND_SHARED,
-  usage, size);
+   buffer->res = r600_resource(pipe_buffer_create(screen, PIPE_BIND_SHARED,
+  usage, size));
 
return buffer->res != NULL;
 }
 
 /* destroy a buffer */
 void si_vid_destroy_buffer(struct rvid_buffer *buffer)
 {
r600_resource_reference(>res, NULL);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_buffer.c 
b/src/gallium/drivers/radeonsi/si_buffer.c
index a0855db571f..504e0c723dc 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -471,23 +471,23 @@ static void *si_buffer_transfer_map(struct pipe_context 
*ctx,
}
/* Use a staging buffer in cached GTT for reads. */
else if (((usage & PIPE_TRANSFER_READ) &&
  !(usage & PIPE_TRANSFER_PERSISTENT) &&
  (rbuffer->domains & RADEON_DOMAIN_VRAM ||
   rbuffer->flags & RADEON_FLAG_GTT_WC)) ||
 (rbuffer->flags & RADEON_FLAG_SPARSE)) {
struct r600_resource *staging;
 
assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC));
-   staging = (struct r600_resource*) pipe_buffer_create(
+   staging = r600_resource(pipe_buffer_create(
ctx->screen, 0, PIPE_USAGE_STAGING,
-   box->width + (box->x % 
SI_MAP_BUFFER_ALIGNMENT));
+   box->width + (box->x 

[Mesa-dev] [PATCH 7/9] radeonsi: remove unused atom parameter from si_atom::emit

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_compute.c |  3 +--
 src/gallium/drivers/radeonsi/si_descriptors.c |  3 +--
 src/gallium/drivers/radeonsi/si_query.c   |  3 +--
 src/gallium/drivers/radeonsi/si_state.c   | 21 +--
 src/gallium/drivers/radeonsi/si_state.h   |  7 +++
 .../drivers/radeonsi/si_state_binning.c   |  2 +-
 src/gallium/drivers/radeonsi/si_state_draw.c  |  8 +++
 .../drivers/radeonsi/si_state_shaders.c   |  5 ++---
 .../drivers/radeonsi/si_state_streamout.c |  5 ++---
 .../drivers/radeonsi/si_state_viewport.c  |  5 ++---
 10 files changed, 26 insertions(+), 36 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index e7867e7fe4f..f77367aef7f 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -857,22 +857,21 @@ static void si_launch_grid(
si_emit_cache_flush(sctx);
 
if (!si_switch_compute_shader(sctx, program, >shader,
code_object, info->pc))
return;
 
si_upload_compute_shader_descriptors(sctx);
si_emit_compute_shader_pointers(sctx);
 
if (si_is_atom_dirty(sctx, >atoms.s.render_cond)) {
-   sctx->atoms.s.render_cond.emit(sctx,
-  >atoms.s.render_cond);
+   sctx->atoms.s.render_cond.emit(sctx);
si_set_atom_dirty(sctx, >atoms.s.render_cond, false);
}
 
if ((program->input_size ||
 program->ir_type == PIPE_SHADER_IR_NATIVE) &&
unlikely(!si_upload_compute_input(sctx, code_object, info))) {
return;
}
 
/* Global buffers */
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 8bb2a5d95d8..a030cbe8229 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2146,22 +2146,21 @@ static void si_emit_global_shader_pointers(struct 
si_context *sctx,
si_emit_shader_pointer(sctx, descs,
   R_00B330_SPI_SHADER_USER_DATA_ES_0);
si_emit_shader_pointer(sctx, descs,
   R_00B230_SPI_SHADER_USER_DATA_GS_0);
si_emit_shader_pointer(sctx, descs,
   R_00B430_SPI_SHADER_USER_DATA_HS_0);
si_emit_shader_pointer(sctx, descs,
   R_00B530_SPI_SHADER_USER_DATA_LS_0);
 }
 
-void si_emit_graphics_shader_pointers(struct si_context *sctx,
-  struct si_atom *atom)
+void si_emit_graphics_shader_pointers(struct si_context *sctx)
 {
uint32_t *sh_base = sctx->shader_pointers.sh_base;
 
if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
si_emit_global_shader_pointers(sctx,
   
>descriptors[SI_DESCS_RW_BUFFERS]);
}
 
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
sh_base[PIPE_SHADER_VERTEX]);
diff --git a/src/gallium/drivers/radeonsi/si_query.c 
b/src/gallium/drivers/radeonsi/si_query.c
index d1b519e5508..d621f22f46b 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -929,22 +929,21 @@ static void emit_set_predicate(struct si_context *ctx,
radeon_emit(cs, va >> 32);
} else {
radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
radeon_emit(cs, va);
radeon_emit(cs, op | ((va >> 32) & 0xFF));
}
radeon_add_to_buffer_list(ctx, ctx->gfx_cs, buf, RADEON_USAGE_READ,
  RADEON_PRIO_QUERY);
 }
 
-static void si_emit_query_predication(struct si_context *ctx,
- struct si_atom *atom)
+static void si_emit_query_predication(struct si_context *ctx)
 {
struct si_query_hw *query = (struct si_query_hw *)ctx->render_cond;
struct si_query_buffer *qbuf;
uint32_t op;
bool flag_wait, invert;
 
if (!query)
return;
 
invert = ctx->render_cond_invert;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 928b465fe25..da254f2ba73 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -57,21 +57,21 @@ static unsigned si_pack_float_12p4(float x)
return x <= 0? 0 :
   x >= 4096 ? 0x : x * 16;
 }
 
 /*
  * Inferred framebuffer and blender state.
  *
  * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending
  * if there is not enough PS outputs.
  */
-static void si_emit_cb_render_state(struct si_context *sctx, struct si_atom 
*atom)

[Mesa-dev] [PATCH 7/7] radeonsi/gfx9: workaround for INTERP with indirect indexing

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

and clean up the conditions.

We might just simplify this because the only stage that can return
0 or 1 is VS.
---
 src/gallium/drivers/radeonsi/si_get.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_get.c 
b/src/gallium/drivers/radeonsi/si_get.c
index fb1c6eca135..47dca742b75 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -469,26 +469,33 @@ static int si_get_shader_param(struct pipe_screen* 
pscreen,
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
return 1;
 
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
/* TODO: Indirect indexing of GS inputs is unimplemented. */
-   return shader != PIPE_SHADER_GEOMETRY &&
-  (sscreen->llvm_has_working_vgpr_indexing ||
-   /* TCS and TES load inputs directly from LDS or
-* offchip memory, so indirect indexing is trivial. */
-   shader == PIPE_SHADER_TESS_CTRL ||
-   shader == PIPE_SHADER_TESS_EVAL);
+   if (shader == PIPE_SHADER_GEOMETRY)
+   return 0;
+
+   if (shader == PIPE_SHADER_VERTEX &&
+   !sscreen->llvm_has_working_vgpr_indexing)
+   return 0;
+
+   /* TCS and TES load inputs directly from LDS or offchip
+* memory, so indirect indexing is always supported.
+* PS has to support indirect indexing, because we can't
+* lower that to TEMPs for INTERP instructions.
+*/
+   return 1;
 
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
return sscreen->llvm_has_working_vgpr_indexing ||
   /* TCS stores outputs directly to memory. */
   shader == PIPE_SHADER_TESS_CTRL;
 
/* Unsupported boolean features. */
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_SUPPORTED_IRS:
case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/7] radeonsi: rewrite DCC format compatibility checking code

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

It might be better to use a slow compressed clear when clearing to 1.
---
 src/gallium/drivers/radeonsi/si_texture.c | 98 ++-
 1 file changed, 42 insertions(+), 56 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_texture.c 
b/src/gallium/drivers/radeonsi/si_texture.c
index 675e78766ad..e220d438de4 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -1888,91 +1888,77 @@ static void si_texture_transfer_unmap(struct 
pipe_context *ctx,
 
 static const struct u_resource_vtbl si_texture_vtbl =
 {
NULL,   /* get_handle */
si_texture_destroy, /* resource_destroy */
si_texture_transfer_map,/* transfer_map */
u_default_transfer_flush_region, /* transfer_flush_region */
si_texture_transfer_unmap,  /* transfer_unmap */
 };
 
-/* DCC channel type categories within which formats can be reinterpreted
- * while keeping the same DCC encoding. The swizzle must also match. */
-enum dcc_channel_type {
-   dcc_channel_float,
-   /* uint and sint can be merged if we never use TC-compatible DCC clear
-* encoding with the clear value of 1. */
-   dcc_channel_uint,
-   dcc_channel_sint,
-   dcc_channel_uint_10_10_10_2,
-   dcc_channel_incompatible,
-};
-
-/* Return the type of DCC encoding. */
-static enum dcc_channel_type
-vi_get_dcc_channel_type(const struct util_format_description *desc)
-{
-   int i;
-
-   /* Find the first non-void channel. */
-   for (i = 0; i < desc->nr_channels; i++)
-   if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
-   break;
-   if (i == desc->nr_channels)
-   return dcc_channel_incompatible;
-
-   switch (desc->channel[i].size) {
-   case 32:
-   case 16:
-   case 8:
-   if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
-   return dcc_channel_float;
-   if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
-   return dcc_channel_uint;
-   return dcc_channel_sint;
-   case 10:
-   return dcc_channel_uint_10_10_10_2;
-   default:
-   return dcc_channel_incompatible;
-   }
-}
-
-/* Return if it's allowed to reinterpret one format as another with DCC 
enabled. */
+/* Return if it's allowed to reinterpret one format as another with DCC 
enabled.
+ */
 bool vi_dcc_formats_compatible(enum pipe_format format1,
   enum pipe_format format2)
 {
const struct util_format_description *desc1, *desc2;
-   enum dcc_channel_type type1, type2;
-   int i;
 
+   /* No format change - exit early. */
+   if (format1 == format2)
+   return true;
+
+   format1 = si_simplify_cb_format(format1);
+   format2 = si_simplify_cb_format(format2);
+
+   /* Check again after format adjustments. */
if (format1 == format2)
return true;
 
desc1 = util_format_description(format1);
desc2 = util_format_description(format2);
 
-   if (desc1->nr_channels != desc2->nr_channels)
+   if (desc1->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
+   desc2->layout != UTIL_FORMAT_LAYOUT_PLAIN)
return false;
 
-   /* Swizzles must be the same. */
-   for (i = 0; i < desc1->nr_channels; i++)
-   if (desc1->swizzle[i] <= PIPE_SWIZZLE_W &&
-   desc2->swizzle[i] <= PIPE_SWIZZLE_W &&
-   desc1->swizzle[i] != desc2->swizzle[i])
-   return false;
+   /* Float and non-float are totally incompatible. */
+   if ((desc1->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) !=
+   (desc2->channel[0].type == UTIL_FORMAT_TYPE_FLOAT))
+   return false;
 
-   type1 = vi_get_dcc_channel_type(desc1);
-   type2 = vi_get_dcc_channel_type(desc2);
+   /* Channel sizes must match across DCC formats.
+* Comparing just the first 2 channels should be enough.
+*/
+   if (desc1->channel[0].size != desc2->channel[0].size ||
+   (desc1->nr_channels >= 2 &&
+desc1->channel[1].size != desc2->channel[1].size))
+   return false;
 
-   return type1 != dcc_channel_incompatible &&
-  type2 != dcc_channel_incompatible &&
-  type1 == type2;
+   /* Everything below is not needed if the driver never uses the DCC
+* clear code with the value of 1.
+*/
+
+   /* If the clear values are all 1 or all 0, this constraint can be
+* ignored. */
+   if (vi_alpha_is_on_msb(format1) != vi_alpha_is_on_msb(format2))
+   return false;
+
+   /* Channel types must match if the clear value of 1 is used.
+* The type categories are only float, signed, unsigned.
+* NORM and INT are always 

[Mesa-dev] [PATCH 2/9] radeonsi: rename r600_atom -> si_atom

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_descriptors.c |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.h| 40 ++---
 src/gallium/drivers/radeonsi/si_query.c   |  4 +-
 src/gallium/drivers/radeonsi/si_state.c   | 30 +-
 src/gallium/drivers/radeonsi/si_state.h   | 60 +--
 .../drivers/radeonsi/si_state_binning.c   |  2 +-
 src/gallium/drivers/radeonsi/si_state_draw.c  |  2 +-
 .../drivers/radeonsi/si_state_shaders.c   |  4 +-
 .../drivers/radeonsi/si_state_streamout.c |  4 +-
 .../drivers/radeonsi/si_state_viewport.c  |  4 +-
 10 files changed, 76 insertions(+), 76 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 4beeb2db6c2..8bd7c77c8c6 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2147,21 +2147,21 @@ static void si_emit_global_shader_pointers(struct 
si_context *sctx,
   R_00B330_SPI_SHADER_USER_DATA_ES_0);
si_emit_shader_pointer(sctx, descs,
   R_00B230_SPI_SHADER_USER_DATA_GS_0);
si_emit_shader_pointer(sctx, descs,
   R_00B430_SPI_SHADER_USER_DATA_HS_0);
si_emit_shader_pointer(sctx, descs,
   R_00B530_SPI_SHADER_USER_DATA_LS_0);
 }
 
 void si_emit_graphics_shader_pointers(struct si_context *sctx,
-  struct r600_atom *atom)
+  struct si_atom *atom)
 {
uint32_t *sh_base = sctx->shader_pointers.sh_base;
 
if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
si_emit_global_shader_pointers(sctx,
   
>descriptors[SI_DESCS_RW_BUFFERS]);
}
 
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
sh_base[PIPE_SHADER_VERTEX]);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 2ed764bd097..a76d52f7ea0 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -532,21 +532,21 @@ struct si_screen {
 * the number of cores. */
LLVMTargetMachineReftm[3]; /* used by the queue only */
 
struct util_queue   shader_compiler_queue_low_priority;
/* Use at most 2 low priority threads on quadcore and better.
 * We want to minimize the impact on multithreaded Mesa. */
LLVMTargetMachineReftm_low_priority[2]; /* at most 2 
threads */
 };
 
 struct si_blend_color {
-   struct r600_atomatom;
+   struct si_atom  atom;
struct pipe_blend_color state;
boolany_nonzeros;
 };
 
 struct si_sampler_view {
struct pipe_sampler_viewbase;
 /* [0..7] = image descriptor
  * [4..7] = buffer descriptor */
uint32_tstate[8];
uint32_tfmask_state[8];
@@ -587,21 +587,21 @@ struct si_samplers {
uint32_tneeds_color_decompress_mask;
 };
 
 struct si_images {
struct pipe_image_view  views[SI_NUM_IMAGES];
uint32_tneeds_color_decompress_mask;
unsignedenabled_mask;
 };
 
 struct si_framebuffer {
-   struct r600_atomatom;
+   struct si_atom  atom;
struct pipe_framebuffer_state   state;
unsignedcolorbuf_enabled_4bit;
unsignedspi_shader_col_format;
unsignedspi_shader_col_format_alpha;
unsignedspi_shader_col_format_blend;
unsignedspi_shader_col_format_blend_alpha;
ubyte   nr_samples:5; /* at most 16xAA */
ubyte   log_samples:3; /* at most 4 = 16xAA */
ubyte   compressed_cb_mask;
ubyte   uncompressed_cb_mask;
@@ -615,81 +615,81 @@ struct si_framebuffer {
 };
 
 struct si_signed_scissor {
int minx;
int miny;
int maxx;
int maxy;
 };
 
 struct si_scissors {
-   struct r600_atomatom;
+   struct si_atom  atom;
unsigneddirty_mask;
struct pipe_scissor_state   states[SI_MAX_VIEWPORTS];
 };
 
 struct si_viewports {
-   struct r600_atomatom;
+   struct si_atom  atom;
unsigneddirty_mask;
unsigneddepth_range_dirty_mask;
struct pipe_viewport_state

[Mesa-dev] [PATCH 5/9] radeonsi: remove function si_init_atom

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_descriptors.c |  3 +-
 src/gallium/drivers/radeonsi/si_state.c   | 29 +++
 src/gallium/drivers/radeonsi/si_state.h   |  4 ---
 .../drivers/radeonsi/si_state_shaders.c   |  5 ++--
 4 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 4efae9704bc..8bb2a5d95d8 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2755,22 +2755,21 @@ void si_init_all_descriptors(struct si_context *sctx)
sctx->b.set_shader_buffers = si_set_shader_buffers;
sctx->b.set_sampler_views = si_set_sampler_views;
sctx->b.create_texture_handle = si_create_texture_handle;
sctx->b.delete_texture_handle = si_delete_texture_handle;
sctx->b.make_texture_handle_resident = si_make_texture_handle_resident;
sctx->b.create_image_handle = si_create_image_handle;
sctx->b.delete_image_handle = si_delete_image_handle;
sctx->b.make_image_handle_resident = si_make_image_handle_resident;
 
/* Shader user data. */
-   si_init_atom(sctx, >atoms.s.shader_pointers,
-si_emit_graphics_shader_pointers);
+   sctx->atoms.s.shader_pointers.emit = si_emit_graphics_shader_pointers;
 
/* Set default and immutable mappings. */
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, 
R_00B130_SPI_SHADER_USER_DATA_VS_0);
 
if (sctx->chip_class >= GFX9) {
si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
  R_00B430_SPI_SHADER_USER_DATA_LS_0);
si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
  R_00B330_SPI_SHADER_USER_DATA_ES_0);
} else {
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 568b11d1eea..78ca1f436c9 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -26,27 +26,20 @@
 #include "gfx9d.h"
 #include "si_query.h"
 
 #include "util/u_dual_blend.h"
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
 #include "util/u_resource.h"
 #include "util/u_upload_mgr.h"
 
-/* Initialize an atom owned by radeonsi.  */
-void si_init_atom(struct si_context *sctx, struct si_atom *atom,
- void (*emit_func)(struct si_context *ctx, struct si_atom 
*state))
-{
-   atom->emit = emit_func;
-}
-
 static unsigned si_map_swizzle(unsigned swizzle)
 {
switch (swizzle) {
case PIPE_SWIZZLE_Y:
return V_008F0C_SQ_SEL_Y;
case PIPE_SWIZZLE_Z:
return V_008F0C_SQ_SEL_Z;
case PIPE_SWIZZLE_W:
return V_008F0C_SQ_SEL_W;
case PIPE_SWIZZLE_0:
@@ -4511,31 +4504,31 @@ static void *si_create_blend_custom(struct si_context 
*sctx, unsigned mode)
memset(, 0, sizeof(blend));
blend.independent_blend_enable = true;
blend.rt[0].colormask = 0xf;
return si_create_blend_state_mode(>b, , mode);
 }
 
 static void si_init_config(struct si_context *sctx);
 
 void si_init_state_functions(struct si_context *sctx)
 {
-   si_init_atom(sctx, >atoms.s.framebuffer, 
si_emit_framebuffer_state);
-   si_init_atom(sctx, >atoms.s.msaa_sample_locs, 
si_emit_msaa_sample_locs);
-   si_init_atom(sctx, >atoms.s.db_render_state, 
si_emit_db_render_state);
-   si_init_atom(sctx, >atoms.s.dpbb_state, si_emit_dpbb_state);
-   si_init_atom(sctx, >atoms.s.msaa_config, si_emit_msaa_config);
-   si_init_atom(sctx, >atoms.s.sample_mask, si_emit_sample_mask);
-   si_init_atom(sctx, >atoms.s.cb_render_state, 
si_emit_cb_render_state);
-   si_init_atom(sctx, >atoms.s.blend_color, si_emit_blend_color);
-   si_init_atom(sctx, >atoms.s.clip_regs, si_emit_clip_regs);
-   si_init_atom(sctx, >atoms.s.clip_state, si_emit_clip_state);
-   si_init_atom(sctx, >atoms.s.stencil_ref, si_emit_stencil_ref);
+   sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state;
+   sctx->atoms.s.msaa_sample_locs.emit = si_emit_msaa_sample_locs;
+   sctx->atoms.s.db_render_state.emit = si_emit_db_render_state;
+   sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state;
+   sctx->atoms.s.msaa_config.emit = si_emit_msaa_config;
+   sctx->atoms.s.sample_mask.emit = si_emit_sample_mask;
+   sctx->atoms.s.cb_render_state.emit = si_emit_cb_render_state;
+   sctx->atoms.s.blend_color.emit = si_emit_blend_color;
+   sctx->atoms.s.clip_regs.emit = si_emit_clip_regs;
+   sctx->atoms.s.clip_state.emit = si_emit_clip_state;
+   sctx->atoms.s.stencil_ref.emit = si_emit_stencil_ref;
 
sctx->b.create_blend_state = si_create_blend_state;
sctx->b.bind_blend_state = si_bind_blend_state;

[Mesa-dev] [PATCH 1/9] radeonsi: remove r600_pipe_common.h

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_pipe_common.h | 330 --
 .../drivers/radeon/radeon_vce_40_2_2.c|   2 +-
 src/gallium/drivers/radeon/radeon_vce_50.c|   2 +-
 .../drivers/radeon/radeon_vcn_enc_1_2.c   |   2 +-
 src/gallium/drivers/radeonsi/Makefile.sources |   1 -
 src/gallium/drivers/radeonsi/meson.build  |   1 -
 src/gallium/drivers/radeonsi/si_buffer.c  |   1 +
 src/gallium/drivers/radeonsi/si_pipe.c|   1 +
 src/gallium/drivers/radeonsi/si_pipe.h| 278 +++
 src/gallium/drivers/radeonsi/si_query.c   |   1 +
 src/gallium/drivers/radeonsi/si_shader.h  |   7 +-
 src/gallium/drivers/radeonsi/si_state.h   |  21 +-
 .../drivers/radeonsi/si_state_streamout.c |   1 +
 src/gallium/drivers/radeonsi/si_texture.c |   1 +
 14 files changed, 302 insertions(+), 347 deletions(-)
 delete mode 100644 src/gallium/drivers/radeon/r600_pipe_common.h

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
deleted file mode 100644
index 66e9a0b7819..000
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * Copyright 2013 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
- * SOFTWARE.
- */
-
-/**
- * This file is going to be removed.
- */
-
-#ifndef R600_PIPE_COMMON_H
-#define R600_PIPE_COMMON_H
-
-#include 
-
-#include "amd/common/ac_binary.h"
-
-#include "radeon/radeon_winsys.h"
-
-#include "util/disk_cache.h"
-#include "util/u_blitter.h"
-#include "util/list.h"
-#include "util/u_range.h"
-#include "util/slab.h"
-#include "util/u_suballoc.h"
-#include "util/u_transfer.h"
-#include "util/u_threaded_context.h"
-
-struct u_log_context;
-struct si_screen;
-struct si_context;
-struct si_perfcounters;
-struct tgsi_shader_info;
-struct si_qbo_state;
-
-/* Only 32-bit buffer allocations are supported, gallium doesn't support more
- * at the moment.
- */
-struct r600_resource {
-   struct threaded_resourceb;
-
-   /* Winsys objects. */
-   struct pb_buffer*buf;
-   uint64_tgpu_address;
-   /* Memory usage if the buffer placement is optimal. */
-   uint64_tvram_usage;
-   uint64_tgart_usage;
-
-   /* Resource properties. */
-   uint64_tbo_size;
-   unsignedbo_alignment;
-   enum radeon_bo_domain   domains;
-   enum radeon_bo_flag flags;
-   unsignedbind_history;
-   int max_forced_staging_uploads;
-
-   /* The buffer range which is initialized (with a write transfer,
-* streamout, DMA, or as a random access target). The rest of
-* the buffer is considered invalid and can be mapped unsynchronized.
-*
-* This allows unsychronized mapping of a buffer range which hasn't
-* been used yet. It's for applications which forget to use
-* the unsynchronized map flag and expect the driver to figure it out.
- */
-   struct util_range   valid_buffer_range;
-
-   /* For buffers only. This indicates that a write operation has been
-* performed by TC L2, but the cache hasn't been flushed.
-* Any hw block which doesn't use or bypasses TC L2 should check this
-* flag and flush the cache before using the buffer.
-*
-* For example, TC L2 must be flushed if a buffer which has been
-* modified by a shader store instruction is about to be used as
-* an index buffer. The reason is that VGT DMA index fetching doesn't
-* use TC L2.
-*/
-   boolTC_L2_dirty;
-
-   /* Whether the resource 

[Mesa-dev] [PATCH 6/9] radeonsi: inline 2 trivial state structures

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_blit.c   |  2 +-
 src/gallium/drivers/radeonsi/si_gfx_cs.c |  4 ++--
 src/gallium/drivers/radeonsi/si_pipe.c   |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.h   | 12 ++--
 src/gallium/drivers/radeonsi/si_state.c  | 10 +-
 5 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index bbdb5e8f271..bd20a900e69 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -50,21 +50,21 @@ void si_blitter_begin(struct si_context *sctx, enum 
si_blitter_op op)
util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso);
util_blitter_save_so_targets(sctx->blitter, sctx->streamout.num_targets,
 (struct 
pipe_stream_output_target**)sctx->streamout.targets);
util_blitter_save_rasterizer(sctx->blitter, 
sctx->queued.named.rasterizer);
 
if (op & SI_SAVE_FRAGMENT_STATE) {
util_blitter_save_blend(sctx->blitter, 
sctx->queued.named.blend);
util_blitter_save_depth_stencil_alpha(sctx->blitter, 
sctx->queued.named.dsa);
util_blitter_save_stencil_ref(sctx->blitter, 
>stencil_ref.state);
util_blitter_save_fragment_shader(sctx->blitter, 
sctx->ps_shader.cso);
-   util_blitter_save_sample_mask(sctx->blitter, 
sctx->sample_mask.sample_mask);
+   util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask);
util_blitter_save_scissor(sctx->blitter, 
>scissors.states[0]);
}
 
if (op & SI_SAVE_FRAMEBUFFER)
util_blitter_save_framebuffer(sctx->blitter, 
>framebuffer.state);
 
if (op & SI_SAVE_TEXTURES) {
util_blitter_save_fragment_sampler_states(
sctx->blitter, 2,

(void**)sctx->samplers[PIPE_SHADER_FRAGMENT].sampler_states);
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c 
b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index e864b627a96..1358010c63c 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -256,25 +256,25 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
ctx->framebuffer.dirty_zsbuf = true;
}
/* This should always be marked as dirty to set the framebuffer scissor
 * at least. */
si_mark_atom_dirty(ctx, >atoms.s.framebuffer);
 
si_mark_atom_dirty(ctx, >atoms.s.clip_regs);
/* CLEAR_STATE sets zeros. */
if (!has_clear_state || ctx->clip_state.any_nonzeros)
si_mark_atom_dirty(ctx, >atoms.s.clip_state);
-   ctx->msaa_sample_locs.nr_samples = 0;
+   ctx->sample_locs_num_samples = 0;
si_mark_atom_dirty(ctx, >atoms.s.msaa_sample_locs);
si_mark_atom_dirty(ctx, >atoms.s.msaa_config);
/* CLEAR_STATE sets 0x. */
-   if (!has_clear_state || ctx->sample_mask.sample_mask != 0x)
+   if (!has_clear_state || ctx->sample_mask != 0x)
si_mark_atom_dirty(ctx, >atoms.s.sample_mask);
si_mark_atom_dirty(ctx, >atoms.s.cb_render_state);
/* CLEAR_STATE sets zeros. */
if (!has_clear_state || ctx->blend_color.any_nonzeros)
si_mark_atom_dirty(ctx, >atoms.s.blend_color);
si_mark_atom_dirty(ctx, >atoms.s.db_render_state);
if (ctx->chip_class >= GFX9)
si_mark_atom_dirty(ctx, >atoms.s.dpbb_state);
si_mark_atom_dirty(ctx, >atoms.s.stencil_ref);
si_mark_atom_dirty(ctx, >atoms.s.spi_map);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index c5466bda9f8..abe7dd51eb4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -465,21 +465,21 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
 
if (sscreen->debug_flags & DBG(FORCE_DMA))
sctx->b.resource_copy_region = sctx->dma_copy;
 
sctx->blitter = util_blitter_create(>b);
if (sctx->blitter == NULL)
goto fail;
sctx->blitter->draw_rectangle = si_draw_rectangle;
sctx->blitter->skip_viewport_restore = true;
 
-   sctx->sample_mask.sample_mask = 0x;
+   sctx->sample_mask = 0x;
 
/* these must be last */
si_begin_new_gfx_cs(sctx);
 
if (sctx->chip_class >= GFX9) {
sctx->wait_mem_scratch = (struct r600_resource*)
pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4);
if (!sctx->wait_mem_scratch)
goto fail;
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 41f88b9688e..eef8e602fad 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ 

[Mesa-dev] [PATCH 1/7] ac/surface: handle DCC subresource fast clear restriction on VI

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_surface.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 7558dd91e34..c209b209da2 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -344,24 +344,36 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
 
ret = AddrComputeDccInfo(addrlib,
 AddrDccIn,
 AddrDccOut);
 
if (ret == ADDR_OK) {
surf_level->dcc_offset = surf->dcc_size;
-   surf_level->dcc_fast_clear_size = 
AddrDccOut->dccFastClearSize;
surf->num_dcc_levels = level + 1;
surf->dcc_size = surf_level->dcc_offset + 
AddrDccOut->dccRamSize;
surf->dcc_alignment = MAX2(surf->dcc_alignment, 
AddrDccOut->dccRamBaseAlign);
+
+   /* If the DCC size of a subresource (1 mip level or 1 
slice)
+* is not aligned, the DCC memory layout is not 
contiguous for
+* that subresource, which means we can't use fast 
clear.
+*
+* We only do fast clears for whole mipmap levels. If 
we did
+* per-slice fast clears, the same restriction would 
apply.
+* (i.e. only compute the slice size and see if it's 
aligned)
+*/
+   if (level == config->info.levels - 1 || 
AddrDccOut->dccRamSizeAligned)
+   surf_level->dcc_fast_clear_size = 
AddrDccOut->dccFastClearSize;
+   else
+   surf_level->dcc_fast_clear_size = 0;
}
}
 
/* TC-compatible HTILE. */
if (!is_stencil &&
AddrSurfInfoIn->flags.depth &&
surf_level->mode == RADEON_SURF_MODE_2D &&
level == 0) {
AddrHtileIn->flags.tcCompatible = 
AddrSurfInfoIn->flags.tcCompatible;
AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/7] radeonsi: implement DCC fast clear swizzle constraints more accurately

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

Reduce swizzle constraints to the ALPHA_IS_ON_MSB constraint and the clear
value of 1.

This significantly changes the DCC fast clear code, and fixes fast clear
for RGB formats without alpha.
---
 src/gallium/drivers/radeonsi/si_clear.c | 94 -
 src/gallium/drivers/radeonsi/si_pipe.h  |  2 +
 src/gallium/drivers/radeonsi/si_state.c |  4 +-
 3 files changed, 65 insertions(+), 35 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_clear.c 
b/src/gallium/drivers/radeonsi/si_clear.c
index 7a8fdf59797..03cb08502ef 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -86,69 +86,84 @@ static void si_set_clear_color(struct r600_texture *rtex,
util_format_write_4ui(surface_format, color->ui, 0, , 0, 0, 
0, 1, 1);
} else if (util_format_is_pure_sint(surface_format)) {
util_format_write_4i(surface_format, color->i, 0, , 0, 0, 0, 
1, 1);
} else {
util_pack_color(color->f, surface_format, );
}
 
memcpy(rtex->color_clear_value, , 2 * sizeof(uint32_t));
 }
 
-static bool vi_get_fast_clear_parameters(enum pipe_format surface_format,
+/** Linearize and convert luminace/intensity to red. */
+enum pipe_format si_simplify_cb_format(enum pipe_format format)
+{
+   format = util_format_linear(format);
+   format = util_format_luminance_to_red(format);
+   return util_format_intensity_to_red(format);
+}
+
+bool vi_alpha_is_on_msb(enum pipe_format format)
+{
+   format = si_simplify_cb_format(format);
+
+   /* Formats with 3 channels can't have alpha. */
+   if (util_format_description(format)->nr_channels == 3)
+   return 1; /* same as xxxA; is any value OK here? */
+
+   return si_translate_colorswap(format, false) <= 1;
+}
+
+static bool vi_get_fast_clear_parameters(enum pipe_format base_format,
+enum pipe_format surface_format,
 const union pipe_color_union *color,
 uint32_t* clear_value,
 bool *eliminate_needed)
 {
/* If we want to clear without needing a fast clear eliminate step, we
 * can set color and alpha independently to 0 or 1 (or 0/max for integer
 * formats).
 */
bool values[4] = {}; /* whether to clear to 0 or 1 */
-   int i;
bool color_value = false; /* clear color to 0 or 1 */
bool alpha_value = false; /* clear alpha to 0 or 1 */
int alpha_channel; /* index of the alpha component */
+   bool has_color = false;
+   bool has_alpha = false;
 
-   /* Convert luminance to red. (the latter can't handle L8_SRGB,
-* so convert to linear) */
-   surface_format = util_format_linear(surface_format);
-   surface_format = util_format_luminance_to_red(surface_format);
-
-   const struct util_format_description *desc = 
util_format_description(surface_format);
+   const struct util_format_description *desc =
+   util_format_description(si_simplify_cb_format(surface_format));
 
/* 128-bit fast clear with different R,G,B values is unsupported. */
if (desc->block.bits == 128 &&
(color->ui[0] != color->ui[1] ||
 color->ui[0] != color->ui[2]))
return false;
 
*eliminate_needed = true;
*clear_value = 0x20202020U; /* use CB clear color registers */
 
-   if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT ||
-   surface_format == PIPE_FORMAT_B5G6R5_UNORM ||
-   surface_format == PIPE_FORMAT_B5G6R5_SRGB ||
-   util_format_is_alpha(surface_format)) {
-   alpha_channel = -1;
-   } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
-   if (si_translate_colorswap(surface_format, false) <= 1)
-   alpha_channel = desc->nr_channels - 1;
-   else
-   alpha_channel = 0;
-   } else
+   if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
return true; /* need ELIMINATE_FAST_CLEAR */
 
-   for (i = 0; i < 4; ++i) {
-   int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
+   bool base_alpha_is_on_msb = vi_alpha_is_on_msb(base_format);
+   bool surf_alpha_is_on_msb = vi_alpha_is_on_msb(surface_format);
+
+   /* Formats with 3 channels can't have alpha. */
+   if (desc->nr_channels == 3)
+   alpha_channel = -1;
+   else if (surf_alpha_is_on_msb)
+   alpha_channel = desc->nr_channels - 1;
+   else
+   alpha_channel = 0;
 
-   if (desc->swizzle[i] < PIPE_SWIZZLE_X ||
-   desc->swizzle[i] > PIPE_SWIZZLE_W)
+   for (int i = 0; i < 4; ++i) {
+   if (desc->swizzle[i] >= PIPE_SWIZZLE_0)
continue;
 
if 

[Mesa-dev] [PATCH 3/7] radeonsi: fully enable 2x DCC MSAA for array and non-array textures

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

The clear code is exactly the same as for 1 sample buffers -
just clear the whole thing.
---
 src/gallium/drivers/radeonsi/si_clear.c   | 13 +++--
 src/gallium/drivers/radeonsi/si_pipe.c|  5 +
 src/gallium/drivers/radeonsi/si_pipe.h|  1 -
 src/gallium/drivers/radeonsi/si_texture.c | 15 ---
 4 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_clear.c 
b/src/gallium/drivers/radeonsi/si_clear.c
index b08a9558b4d..f2df9483c12 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -204,33 +204,34 @@ void vi_dcc_clear_level(struct si_context *sctx,
dcc_buffer = >dcc_separate_buffer->b.b;
dcc_offset = 0;
} else {
dcc_buffer = >resource.b.b;
dcc_offset = rtex->dcc_offset;
}
 
if (sctx->chip_class >= GFX9) {
/* Mipmap level clears aren't implemented. */
assert(rtex->resource.b.b.last_level == 0);
-   /* MSAA needs a different clear size. */
-   assert(rtex->resource.b.b.nr_samples <= 1);
+   /* 4x and 8x MSAA needs a sophisticated compute shader for
+* the clear. See AMDVLK. */
+   assert(rtex->resource.b.b.nr_samples <= 2);
clear_size = rtex->surface.dcc_size;
} else {
unsigned num_layers = util_num_layers(>resource.b.b, 
level);
 
/* If this is 0, fast clear isn't possible. (can occur with 
MSAA) */
assert(rtex->surface.u.legacy.level[level].dcc_fast_clear_size);
-   /* Layered MSAA DCC fast clears need to clear 
dcc_fast_clear_size
-* bytes for each layer. This is not currently implemented, and
-* therefore MSAA DCC isn't even enabled with multiple layers.
+   /* Layered 4x and 8x MSAA DCC fast clears need to clear
+* dcc_fast_clear_size bytes for each layer. A compute shader
+* would be more efficient than separate per-layer clear 
operations.
 */
-   assert(rtex->resource.b.b.nr_samples <= 1 || num_layers == 1);
+   assert(rtex->resource.b.b.nr_samples <= 2 || num_layers == 1);
 
dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset;
clear_size = 
rtex->surface.u.legacy.level[level].dcc_fast_clear_size *
 num_layers;
}
 
si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size,
clear_value, SI_COHERENCY_CB_META);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 466546b76b7..a10f1c9f794 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -83,21 +83,20 @@ static const struct debug_named_value debug_options[] = {
{ "dpbb", DBG(DPBB), "Enable DPBB." },
{ "dfsm", DBG(DFSM), "Enable DFSM." },
{ "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" },
{ "norbplus", DBG(NO_RB_PLUS), "Disable RB+." },
{ "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" },
{ "notiling", DBG(NO_TILING), "Disable tiling" },
{ "nodcc", DBG(NO_DCC), "Disable DCC." },
{ "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." },
{ "nodccfb", DBG(NO_DCC_FB), "Disable separate DCC on the main 
framebuffer" },
{ "nodccmsaa", DBG(NO_DCC_MSAA), "Disable DCC for MSAA" },
-   { "dccmsaa", DBG(DCC_MSAA), "Enable DCC for MSAA" },
{ "nofmask", DBG(NO_FMASK), "Disable MSAA compression" },
 
/* Tests: */
{ "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
{ "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and 
exit." },
{ "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault 
test and exit." },
{ "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM 
fault test and exit." },
 
DEBUG_NAMED_VALUE_END /* must be last */
 };
@@ -1004,23 +1003,21 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
sscreen->has_rbplus = true;
 
sscreen->rbplus_allowed =
!(sscreen->debug_flags & DBG(NO_RB_PLUS)) &&
(sscreen->info.family == CHIP_STONEY ||
 sscreen->info.family == CHIP_VEGA12 ||
 sscreen->info.family == CHIP_RAVEN);
}
 
sscreen->dcc_msaa_allowed =
-   !(sscreen->debug_flags & DBG(NO_DCC_MSAA)) &&
-   (sscreen->debug_flags & DBG(DCC_MSAA) ||
-sscreen->info.chip_class == VI);
+   !(sscreen->debug_flags & DBG(NO_DCC_MSAA));
 
sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI;
 
(void) 

[Mesa-dev] [PATCH 2/7] radeonsi: enable fast color clear for level 0 of mipmapped textures on <= VI

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

GFX9 is more complicated and needs a compute shader that we should just
copy from amdvlk.
---
 src/gallium/drivers/radeonsi/si_clear.c | 17 -
 src/gallium/drivers/radeonsi/si_state.c | 16 
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_clear.c 
b/src/gallium/drivers/radeonsi/si_clear.c
index 4e05d9bf5b2..b08a9558b4d 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -362,33 +362,40 @@ static void si_do_fast_color_clear(struct si_context 
*sctx,
unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
 
if (!fb->cbufs[i])
continue;
 
/* if this colorbuffer is not being cleared */
if (!(*buffers & clear_bit))
continue;
 
unsigned level = fb->cbufs[i]->u.tex.level;
+   if (level > 0)
+   continue;
+
tex = (struct r600_texture *)fb->cbufs[i]->texture;
 
+   /* TODO: GFX9: Implement DCC fast clear for level 0 of
+* mipmapped textures. Mipmapped DCC has to clear a rectangular
+* area of DCC for level 0 (because the whole miptree is
+* organized in a 2D plane).
+*/
+   if (sctx->chip_class >= GFX9 &&
+   tex->resource.b.b.last_level > 0)
+   continue;
+
/* the clear is allowed if all layers are bound */
if (fb->cbufs[i]->u.tex.first_layer != 0 ||
fb->cbufs[i]->u.tex.last_layer != 
util_max_layer(>resource.b.b, 0)) {
continue;
}
 
-   /* cannot clear mipmapped textures */
-   if (fb->cbufs[i]->texture->last_level != 0) {
-   continue;
-   }
-
/* only supported on tiled surfaces */
if (tex->surface.is_linear) {
continue;
}
 
/* shared textures can't use fast clear without an explicit 
flush,
 * because there is no way to communicate the clear color among
 * all clients
 */
if (tex->resource.b.is_shared &&
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 3faf36f2470..26f61afcab0 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2956,21 +2956,21 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom
 {
struct radeon_winsys_cs *cs = sctx->gfx_cs;
struct pipe_framebuffer_state *state = >framebuffer.state;
unsigned i, nr_cbufs = state->nr_cbufs;
struct r600_texture *tex = NULL;
struct r600_surface *cb = NULL;
unsigned cb_color_info = 0;
 
/* Colorbuffers. */
for (i = 0; i < nr_cbufs; i++) {
-   uint64_t cb_color_base, cb_color_fmask, cb_dcc_base;
+   uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, 
cb_dcc_base;
unsigned cb_color_attrib;
 
if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
continue;
 
cb = (struct r600_surface*)state->cbufs[i];
if (!cb) {
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i 
* 0x3C,
   
S_028C70_FORMAT(V_028C70_COLOR_INVALID));
continue;
@@ -2991,24 +2991,28 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom
 
if (tex->dcc_separate_buffer)
radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
  tex->dcc_separate_buffer,
  RADEON_USAGE_READWRITE,
  RADEON_PRIO_DCC);
 
/* Compute mutable surface parameters. */
cb_color_base = tex->resource.gpu_address >> 8;
cb_color_fmask = 0;
+   cb_color_cmask = tex->cmask.base_address_reg;
cb_dcc_base = 0;
cb_color_info = cb->cb_color_info | tex->cb_color_info;
cb_color_attrib = cb->cb_color_attrib;
 
+   if (cb->base.u.tex.level > 0)
+   cb_color_info &= C_028C70_FAST_CLEAR;
+
if (tex->fmask.size) {
cb_color_fmask = (tex->resource.gpu_address + 
tex->fmask.offset) >> 8;
cb_color_fmask |= tex->fmask.tile_swizzle;
}
 
/* Set up DCC. */
if (vi_dcc_enabled(tex, cb->base.u.tex.level)) {
bool is_msaa_resolve_dst = state->cbufs[0] 

[Mesa-dev] [PATCH 4/7] radeonsi: rename variables and document stuff around DCC fast clear

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_clear.c | 83 +
 1 file changed, 42 insertions(+), 41 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_clear.c 
b/src/gallium/drivers/radeonsi/si_clear.c
index f2df9483c12..7a8fdf59797 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -88,113 +88,114 @@ static void si_set_clear_color(struct r600_texture *rtex,
util_format_write_4i(surface_format, color->i, 0, , 0, 0, 0, 
1, 1);
} else {
util_pack_color(color->f, surface_format, );
}
 
memcpy(rtex->color_clear_value, , 2 * sizeof(uint32_t));
 }
 
 static bool vi_get_fast_clear_parameters(enum pipe_format surface_format,
 const union pipe_color_union *color,
-uint32_t* reset_value,
-bool* clear_words_needed)
+uint32_t* clear_value,
+bool *eliminate_needed)
 {
-   bool values[4] = {};
+   /* If we want to clear without needing a fast clear eliminate step, we
+* can set color and alpha independently to 0 or 1 (or 0/max for integer
+* formats).
+*/
+   bool values[4] = {}; /* whether to clear to 0 or 1 */
int i;
-   bool main_value = false;
-   bool extra_value = false;
-   int extra_channel;
+   bool color_value = false; /* clear color to 0 or 1 */
+   bool alpha_value = false; /* clear alpha to 0 or 1 */
+   int alpha_channel; /* index of the alpha component */
 
-   /* This is needed to get the correct DCC clear value for luminance 
formats.
-* 1) Get the linear format (because the next step can't handle 
L8_SRGB).
-* 2) Convert luminance to red. (the real hw format for luminance)
-*/
+   /* Convert luminance to red. (the latter can't handle L8_SRGB,
+* so convert to linear) */
surface_format = util_format_linear(surface_format);
surface_format = util_format_luminance_to_red(surface_format);
 
const struct util_format_description *desc = 
util_format_description(surface_format);
 
+   /* 128-bit fast clear with different R,G,B values is unsupported. */
if (desc->block.bits == 128 &&
(color->ui[0] != color->ui[1] ||
 color->ui[0] != color->ui[2]))
return false;
 
-   *clear_words_needed = true;
-   *reset_value = 0x20202020U;
-
-   /* If we want to clear without needing a fast clear eliminate step, we
-* can set each channel to 0 or 1 (or 0/max for integer formats). We
-* have two sets of flags, one for the last or first channel(extra) and
-* one for the other channels(main).
-*/
+   *eliminate_needed = true;
+   *clear_value = 0x20202020U; /* use CB clear color registers */
 
if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT ||
surface_format == PIPE_FORMAT_B5G6R5_UNORM ||
surface_format == PIPE_FORMAT_B5G6R5_SRGB ||
util_format_is_alpha(surface_format)) {
-   extra_channel = -1;
+   alpha_channel = -1;
} else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
if (si_translate_colorswap(surface_format, false) <= 1)
-   extra_channel = desc->nr_channels - 1;
+   alpha_channel = desc->nr_channels - 1;
else
-   extra_channel = 0;
+   alpha_channel = 0;
} else
-   return true;
+   return true; /* need ELIMINATE_FAST_CLEAR */
 
for (i = 0; i < 4; ++i) {
int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
 
if (desc->swizzle[i] < PIPE_SWIZZLE_X ||
desc->swizzle[i] > PIPE_SWIZZLE_W)
continue;
 
if (desc->channel[i].pure_integer &&
desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
/* Use the maximum value for clamping the clear color. 
*/
int max = u_bit_consecutive(0, desc->channel[i].size - 
1);
 
values[i] = color->i[i] != 0;
if (color->i[i] != 0 && MIN2(color->i[i], max) != max)
-   return true;
+   return true; /* need ELIMINATE_FAST_CLEAR */
} else if (desc->channel[i].pure_integer &&
   desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
/* Use the maximum value for clamping the clear color. 
*/
unsigned max = u_bit_consecutive(0, 
desc->channel[i].size);
 
values[i] = color->ui[i] != 0U;
if 

Re: [Mesa-dev] [PATCH] clover: Fix host access validation for sub-buffer creation

2018-04-16 Thread Aaron Watry
On Mon, Apr 16, 2018, 5:24 PM Francisco Jerez  wrote:

> Aaron Watry  writes:
>
> >   From CL 1.2 Section 5.2.1:
> > CL_INVALID_VALUE if buffer was created with CL_MEM_HOST_WRITE_ONLY
> and
> > flags specify CL_MEM_HOST_READ_ONLY , or if buffer was created with
> > CL_MEM_HOST_READ_ONLY and flags specify CL_MEM_HOST_WRITE_ONLY , or
> if
> > buffer was created with CL_MEM_HOST_NO_ACCESS and flags specify
> > CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_WRITE_ONLY .
> >
> > Fixes CL 1.2 CTS test/api get_buffer_info
> >
>
> What combination of flags is the test-case providing for both the
> parent and sub buffer?
>

The original motivation for this was a CTS test that was creating a sub
buffer with flags of:
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE

With a parent buffer created as:
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE

Which according to my reading of the spec should be allowed.

>
> > Signed-off-by: Aaron Watry 
> > Cc: Francisco Jerez 
> > ---
> >  src/gallium/state_trackers/clover/api/memory.cpp | 8 ++--
> >  1 file changed, 6 insertions(+), 2 deletions(-)
> >
> > diff --git a/src/gallium/state_trackers/clover/api/memory.cpp
> b/src/gallium/state_trackers/clover/api/memory.cpp
> > index 9b3cd8b1f5..451e8a8c56 100644
> > --- a/src/gallium/state_trackers/clover/api/memory.cpp
> > +++ b/src/gallium/state_trackers/clover/api/memory.cpp
> > @@ -57,10 +57,14 @@ namespace {
> >parent.flags() &
> host_access_flags) |
> >   (parent.flags() & host_ptr_flags));
> >
> > - if (~flags & parent.flags() &
> > - ((dev_access_flags & ~CL_MEM_READ_WRITE) |
> host_access_flags))
> > + if (~flags & parent.flags() & (dev_access_flags &
> ~CL_MEM_READ_WRITE))
> >  throw error(CL_INVALID_VALUE);
> >
> > + //Check if new host access flags cause a mismatch between
> host-read/write-only.
> > + const cl_mem_flags new_flags = flags & ~(parent.flags()) &
> ~CL_MEM_HOST_NO_ACCESS;
> > + if (new_flags & host_access_flags & parent.flags())
> > +throw error (CL_INVALID_VALUE);
> > +
>
> This doesn't look correct to me, the condition will always evaluate to
> zero, you're calculating the conjunction of ~parent.flags() and
> parent.flags() which is zero, so the error will never be emitted.
>

I'll see what I can do. I agree with a fresh reading that it looks fishy at
best.

--Aaron

>
> >   return flags;
> >
> >} else {
> > --
> > 2.14.1
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/7] meson: fix builds against LLVM built without rtti

2018-04-16 Thread Dylan Baker
Building without rtti is a frought with peril, but it's something that
autotools supports so we need to support it too.

Since we've moved to version 0.44 as a whole we can use the meson
functionality for accessing random llvm-config options we can check for
rtti and add -fno-rtti to all C++ code accordingly.

Signed-off-by: Dylan Baker 
---
 meson.build | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/meson.build b/meson.build
index b79d7b9c7f2..beb65b314ac 100644
--- a/meson.build
+++ b/meson.build
@@ -1143,6 +1143,13 @@ if with_llvm
 '-DHAVE_LLVM=0x0@0@0@1@'.format(_llvm_version[0], _llvm_version[1]),
 '-DMESA_LLVM_VERSION_PATCH=@0@'.format(_llvm_patch),
   ]
+
+  # LLVM can be built without rtti, turning off rtti changes the ABI of C++
+  # programs, so we need to build all C++ code in mesa without rtti as well to
+  # ensure that linking works.
+  if dep_llvm.get_configtool_variable('has-rtti') == 'NO'
+cpp_args('-fno-rtti')
+  endif
 elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
   error('The following drivers require LLVM: Radv, RadeonSI, SWR. One of these 
is enabled, but LLVM is disabled.')
 endif
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/7] meson: remove workaround for custom target creating .h and .c files

2018-04-16 Thread Dylan Baker
In more modern versions of meson a custom_target returns an index-able
object. This allows us to create accurate dependency models for targets
that rely only on the header and not on the code from anv_entrypoints.

Signed-off-by: Dylan Baker 
---
 src/intel/vulkan/meson.build | 17 -
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build
index 2dce0a1515b..0895bdac1cc 100644
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@@ -81,15 +81,6 @@ dev_icd = custom_target(
   install : false,
 )
 
-# TODO: workaround for anv_entrypoints combining the .h and .c files in it's
-# output. See issue #2346
-block_entrypoints = custom_target(
-  'block_entrypoints',
-  command : [prog_touch, '@OUTPUT@'],
-  output : 'null',
-  depends : anv_entrypoints,
-)
-
 libanv_gen_libs = []
 anv_gen_files = files(
   'genX_blorp_exec.c',
@@ -105,7 +96,7 @@ foreach g : [['70', ['gen7_cmd_buffer.c']], ['75', 
['gen7_cmd_buffer.c']],
   _gen = g[0]
   libanv_gen_libs += static_library(
 'libanv_gen@0@'.format(_gen),
-[anv_gen_files, g[1], block_entrypoints],
+[anv_gen_files, g[1], anv_entrypoints[0]],
 include_directories : [
   inc_common, inc_compiler, inc_drm_uapi, inc_intel, inc_vulkan_util,
   inc_vulkan_wsi,
@@ -186,7 +177,7 @@ libanv_common = static_library(
 
 libvulkan_intel = shared_library(
   'vulkan_intel',
-  [files('anv_gem.c'), block_entrypoints, anv_extensions_h],
+  [files('anv_gem.c'), anv_entrypoints[0], anv_extensions_h],
   include_directories : [
 inc_common, inc_intel, inc_compiler, inc_drm_uapi, inc_vulkan_util,
 inc_vulkan_wsi,
@@ -207,7 +198,7 @@ libvulkan_intel = shared_library(
 if with_tests
   libvulkan_intel_test = static_library(
 'vulkan_intel_test',
-[files('anv_gem_stubs.c'), block_entrypoints, anv_extensions_h],
+[files('anv_gem_stubs.c'), anv_entrypoints[0], anv_extensions_h],
 include_directories : [
   inc_common, inc_intel, inc_compiler, inc_drm_uapi, inc_vulkan_util,
   inc_vulkan_wsi,
@@ -229,7 +220,7 @@ if with_tests
   'anv_@0@'.format(t),
   executable(
 t,
-['tests/@0@.c'.format(t), dummy_cpp, block_entrypoints],
+['tests/@0@.c'.format(t), dummy_cpp, anv_entrypoints[0]],
 link_with : libvulkan_intel_test,
 dependencies : [dep_libdrm, dep_thread, dep_m, dep_valgrind],
 include_directories : [
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/7] meson: raise required version to 0.44.1

2018-04-16 Thread Dylan Baker
We have already required 0.44 for building clover and swr, so it was
already partially required. This just makes it required across the board
instead of just for clover and swr.

There is a bug in 0.44 which makes it impossible to build mesa in some
configurations, so require 0.44.1 which fixes this.

Signed-off-by: Dylan Baker 
---
 docs/meson.html | 5 +
 meson.build | 2 +-
 src/gallium/meson.build | 6 --
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/docs/meson.html b/docs/meson.html
index b8877585112..d05b5fe347b 100644
--- a/docs/meson.html
+++ b/docs/meson.html
@@ -24,10 +24,7 @@ for production
 The meson build is tested on on Linux, macOS, Cygwin and Haiku, it should
 work on FreeBSD, DragonflyBSD, NetBSD, and OpenBSD.
 
-Mesa requires Meson >= 0.42.0 to build in general.
-
-Additionaly, to build the Clover OpenCL state tracker or the OpenSWR driver
-meson 0.44.0 or greater is required.
+Mesa requires Meson >= 0.44.1 to build.
 
 Some older versions of meson do not check that they are too old and will error
 out in odd ways.
diff --git a/meson.build b/meson.build
index a4dfa62255c..fc3d611445e 100644
--- a/meson.build
+++ b/meson.build
@@ -25,7 +25,7 @@ project(
 [find_program('python', 'python2', 'python3'), 'bin/meson_get_version.py']
   ).stdout(),
   license : 'MIT',
-  meson_version : '>= 0.42',
+  meson_version : '>= 0.44.1',
   default_options : ['buildtype=debugoptimized', 'c_std=c99', 'cpp_std=c++11']
 )
 
diff --git a/src/gallium/meson.build b/src/gallium/meson.build
index c4dd8e1c26d..91ff729dfde 100644
--- a/src/gallium/meson.build
+++ b/src/gallium/meson.build
@@ -40,9 +40,6 @@ if with_platform_haiku
   subdir('winsys/sw/hgl')
 endif
 if with_gallium_swr
-  if meson.version().version_compare('< 0.44.0')
-error('SWR requires meson 0.44.0 or greater.')
-  endif
   subdir('drivers/swr')
 else
   driver_swr = declare_dependency()
@@ -144,9 +141,6 @@ if with_gallium_opencl
   # consumer
   subdir('targets/pipe-loader')
 
-  if meson.version().version_compare('< 0.44.0')
-error('OpenCL requires meson 0.44.0 or greater.')
-  endif
   subdir('state_trackers/clover')
   subdir('targets/opencl')
 endif
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/7] meson: allow empty sources when using link_whole

2018-04-16 Thread Dylan Baker
meson used to get grumpy if the sources list was empty, even when using
--whole-archive (link_whole). In more recent versions that's not true,
so remove the workaround.

Signed-off-by: Dylan Baker 
---
 src/glx/meson.build | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/glx/meson.build b/src/glx/meson.build
index 90ab552ac4d..dd8ba60ad80 100644
--- a/src/glx/meson.build
+++ b/src/glx/meson.build
@@ -157,17 +157,10 @@ libglx = static_library(
   build_by_default : false,
 )
 
-# workaround for bug #2180
-dummy_c = custom_target(
-  'dummy_c',
-  output : 'dummy.c',
-  command : [prog_touch, '@OUTPUT@'],
-)
-
 if with_glx == 'dri'
   libgl = shared_library(
 gl_lib_name,
-dummy_c,  # workaround for bug #2180
+[],
 include_directories : [inc_common, inc_glapi, inc_loader, inc_gl_internal],
 link_with : [libglapi_static, libglapi],
 link_whole : libglx,
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/7] meson: use array type options

2018-04-16 Thread Dylan Baker
This option type is nice since it involves less converting strings into
lists, and because it validates the values that are provided.

Signed-off-by: Dylan Baker 
---
 meson.build   | 113 +++---
 meson_options.txt |  48 
 2 files changed, 87 insertions(+), 74 deletions(-)

diff --git a/meson.build b/meson.build
index beb65b314ac..b0e2bc1f625 100644
--- a/meson.build
+++ b/meson.build
@@ -51,8 +51,8 @@ with_valgrind = get_option('valgrind')
 with_libunwind = get_option('libunwind')
 with_asm = get_option('asm')
 with_osmesa = get_option('osmesa')
-with_swr_arches = get_option('swr-arches').split(',')
-with_tools = get_option('tools').split(',')
+with_swr_arches = get_option('swr-arches')
+with_tools = get_option('tools')
 if with_tools.contains('all')
   with_tools = ['freedreno', 'glsl', 'intel', 'nir', 'nouveau']
 endif
@@ -101,31 +101,30 @@ with_dri_r200 = false
 with_dri_nouveau = false
 with_dri_swrast = false
 _drivers = get_option('dri-drivers')
-if _drivers == 'auto'
+if _drivers.contains('auto')
   if system_has_kms_drm
 # TODO: PPC, Sparc
 if ['x86', 'x86_64'].contains(host_machine.cpu_family())
-  _drivers = 'i915,i965,r100,r200,nouveau'
+  _drivers = ['i915', 'i965', 'r100', 'r200', 'nouveau']
 elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
-  _drivers = ''
+  _drivers = ['']
 else
   error('Unknown architecture. Please pass -Ddri-drivers to set driver 
options. Patches gladly accepted to fix this.')
 endif
   elif ['darwin', 'windows', 'cygwin', 'haiku'].contains(host_machine.system())
 # only swrast would make sense here, but gallium swrast is a much better 
default
-_drivers = ''
+_drivers = ['']
   else
 error('Unknown OS. Please pass -Ddri-drivers to set driver options. 
Patches gladly accepted to fix this.')
   endif
 endif
-if _drivers != ''
-  _split = _drivers.split(',')
-  with_dri_i915 = _split.contains('i915')
-  with_dri_i965 = _split.contains('i965')
-  with_dri_r100 = _split.contains('r100')
-  with_dri_r200 = _split.contains('r200')
-  with_dri_nouveau = _split.contains('nouveau')
-  with_dri_swrast = _split.contains('swrast')
+if _drivers != ['']
+  with_dri_i915 = _drivers.contains('i915')
+  with_dri_i965 = _drivers.contains('i965')
+  with_dri_r100 = _drivers.contains('r100')
+  with_dri_r200 = _drivers.contains('r200')
+  with_dri_nouveau = _drivers.contains('nouveau')
+  with_dri_swrast = _drivers.contains('swrast')
   with_dri = true
 endif
 
@@ -147,40 +146,44 @@ with_gallium_svga = false
 with_gallium_virgl = false
 with_gallium_swr = false
 _drivers = get_option('gallium-drivers')
-if _drivers == 'auto'
+if _drivers.contains('auto')
   if system_has_kms_drm
 # TODO: PPC, Sparc
 if ['x86', 'x86_64'].contains(host_machine.cpu_family())
-  _drivers = 'r300,r600,radeonsi,nouveau,virgl,svga,swrast'
+  _drivers = [
+'r300', 'r600', 'radeonsi', 'nouveau', 'virgl', 'svga', 'swrast'
+  ]
 elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
-  _drivers = 
'pl111,vc4,vc5,freedreno,etnaviv,imx,nouveau,tegra,virgl,swrast'
+  _drivers = [
+'pl111', 'vc4', 'vc5', 'freedreno', 'etnaviv', 'imx', 'nouveau',
+'tegra', 'virgl', 'swrast',
+  ]
 else
   error('Unknown architecture. Please pass -Dgallium-drivers to set driver 
options. Patches gladly accepted to fix this.')
 endif
   elif ['darwin', 'windows', 'cygwin', 'haiku'].contains(host_machine.system())
-_drivers = 'swrast'
+_drivers = ['swrast']
   else
 error('Unknown OS. Please pass -Dgallium-drivers to set driver options. 
Patches gladly accepted to fix this.')
   endif
 endif
-if _drivers != ''
-  _split = _drivers.split(',')
-  with_gallium_pl111 = _split.contains('pl111')
-  with_gallium_radeonsi = _split.contains('radeonsi')
-  with_gallium_r300 = _split.contains('r300')
-  with_gallium_r600 = _split.contains('r600')
-  with_gallium_nouveau = _split.contains('nouveau')
-  with_gallium_freedreno = _split.contains('freedreno')
-  with_gallium_softpipe = _split.contains('swrast')
-  with_gallium_vc4 = _split.contains('vc4')
-  with_gallium_vc5 = _split.contains('vc5')
-  with_gallium_etnaviv = _split.contains('etnaviv')
-  with_gallium_imx = _split.contains('imx')
-  with_gallium_tegra = _split.contains('tegra')
-  with_gallium_i915 = _split.contains('i915')
-  with_gallium_svga = _split.contains('svga')
-  with_gallium_virgl = _split.contains('virgl')
-  with_gallium_swr = _split.contains('swr')
+if _drivers != ['']
+  with_gallium_pl111 = _drivers.contains('pl111')
+  with_gallium_radeonsi = _drivers.contains('radeonsi')
+  with_gallium_r300 = _drivers.contains('r300')
+  with_gallium_r600 = _drivers.contains('r600')
+  with_gallium_nouveau = _drivers.contains('nouveau')
+  with_gallium_freedreno = _drivers.contains('freedreno')
+  with_gallium_softpipe = _drivers.contains('swrast')

[Mesa-dev] [PATCH 7/7] meson: use new warning function

2018-04-16 Thread Dylan Baker
Instead of emulating it with message.

Signed-off-by: Dylan Baker 
---
 meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index b0e2bc1f625..d3480ee5c8c 100644
--- a/meson.build
+++ b/meson.build
@@ -58,7 +58,7 @@ if with_tools.contains('all')
 endif
 if get_option('texture-float')
   pre_args += '-DTEXTURE_FLOAT_ENABLED'
-  message('WARNING: Floating-point texture enabled. Please consult 
docs/patents.txt and your lawyer before building mesa.')
+  warning('Floating-point texture enabled. Please consult docs/patents.txt and 
your lawyer before building mesa.')
 endif
 
 dri_drivers_path = get_option('dri-drivers-path')
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/7] meson: remove dummy_cpp

2018-04-16 Thread Dylan Baker
meson has gotten pretty smart about tracking C and C++ dependencies
(internal and external), and using the right linker. This wasn't always
the case and we created empty c++ files to force the use of the c++
linker. We don't need that any more.

Signed-off-by: Dylan Baker 
---
 meson.build  | 8 
 src/compiler/meson.build | 2 +-
 src/intel/vulkan/meson.build | 2 +-
 src/mesa/drivers/dri/meson.build | 2 +-
 4 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/meson.build b/meson.build
index fc3d611445e..b79d7b9c7f2 100644
--- a/meson.build
+++ b/meson.build
@@ -1311,14 +1311,6 @@ endif
 
 # TODO: gallium driver dirs
 
-# FIXME: this is a workaround for #2326
-prog_touch = find_program('touch')
-dummy_cpp = custom_target(
-  'dummy_cpp',
-  output : 'dummy.cpp',
-  command : [prog_touch, '@OUTPUT@'],
-)
-
 foreach a : pre_args
   add_project_arguments(a, language : ['c', 'cpp'])
 endforeach
diff --git a/src/compiler/meson.build b/src/compiler/meson.build
index da2464d7b8c..60b633832d4 100644
--- a/src/compiler/meson.build
+++ b/src/compiler/meson.build
@@ -62,7 +62,7 @@ subdir('nir')
 # dependency with nir/meson.build.
 spirv2nir = executable(
   'spirv2nir',
-  [files('spirv/spirv2nir.c'), dummy_cpp],
+  files('spirv/spirv2nir.c'),
   dependencies : [dep_m, dep_thread, idep_nir],
   include_directories : [inc_common, inc_nir, include_directories('spirv')],
   link_with : libmesa_util,
diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build
index 0895bdac1cc..06acc78391f 100644
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@@ -220,7 +220,7 @@ if with_tests
   'anv_@0@'.format(t),
   executable(
 t,
-['tests/@0@.c'.format(t), dummy_cpp, anv_entrypoints[0]],
+['tests/@0@.c'.format(t), anv_entrypoints[0]],
 link_with : libvulkan_intel_test,
 dependencies : [dep_libdrm, dep_thread, dep_m, dep_valgrind],
 include_directories : [
diff --git a/src/mesa/drivers/dri/meson.build b/src/mesa/drivers/dri/meson.build
index 8e23fd43ad6..943727b662a 100644
--- a/src/mesa/drivers/dri/meson.build
+++ b/src/mesa/drivers/dri/meson.build
@@ -44,7 +44,7 @@ endif
 if dri_drivers != []
   libmesa_dri_drivers = shared_library(
 'mesa_dri_drivers',
-dummy_cpp,  # see meson #2180
+[],
 link_whole : dri_drivers,
 link_with : [
   libmegadriver_stub, libdricommon, libxmlconfig, libglapi, libmesa_util,
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: GL_EXT_texture_norm16 extension plumbing

2018-04-16 Thread Ilia Mirkin
On Mon, Apr 16, 2018 at 7:36 PM, Tapani Pälli  wrote:
> Hi;
>
> On 16.04.2018 18:19, Ilia Mirkin wrote:
>>
>> You never check EXT_texture_norm16 anywhere... is that right? Should
>> probably respect that in the _mesa_is_es3_* helpers. Also what about
>
>
> Oops that is very correct, will add the checks.
>
>> shader support? I see no mention in the spec that these should be
>> usable as images, except with NV_image_formats is enabled. You're just
>> exposing them no matter what in ES 3.10.
>
>
> Layout qualifiers added by the spec are exposed only when 3.10 ES and
> NV_image_formats are both there, the boolean in map below (last column) is
> the check for NV_image_formats, without it you'll get compilation error.

That's not quite right:

   if ((state->is_version(map[i].required_glsl,
  map[i].required_essl) ||
(state->NV_image_formats_enable &&
 map[i].nv_image_formats)) &&

So you'd need to leave the essl version alone, otherwise
NV_image_formats check has no effect. Also, do you plan on
differentiating the case "have NV_image_formats but not
EXT_texture_norm16"?

Sounds like you need to add some negative compiler tests.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: GL_EXT_texture_norm16 extension plumbing

2018-04-16 Thread Tapani Pälli

Hi;

On 16.04.2018 18:19, Ilia Mirkin wrote:

You never check EXT_texture_norm16 anywhere... is that right? Should
probably respect that in the _mesa_is_es3_* helpers. Also what about


Oops that is very correct, will add the checks.


shader support? I see no mention in the spec that these should be
usable as images, except with NV_image_formats is enabled. You're just
exposing them no matter what in ES 3.10.


Layout qualifiers added by the spec are exposed only when 3.10 ES and 
NV_image_formats are both there, the boolean in map below (last column) 
is the check for NV_image_formats, without it you'll get compilation error.




On Mon, Apr 16, 2018 at 11:04 AM, Tapani Pälli  wrote:

Patch enables use of short and unsigned short data for texture uploads,
rendering and reading of framebuffers within the restrictions specified
in GL_EXT_texture_norm16 spec.

Patch also enables those 16bit format layout qualifiers listed in
GL_NV_image_formats that depend on EXT_texture_norm16.

Signed-off-by: Tapani Pälli 
---
  src/compiler/glsl/glsl_parser.yy | 12 +-
  src/mesa/main/extensions_table.h |  1 +
  src/mesa/main/glformats.c| 51 
  src/mesa/main/mtypes.h   |  1 +
  src/mesa/main/readpix.c  | 10 
  src/mesa/main/shaderimage.c  |  7 +++---
  6 files changed, 72 insertions(+), 10 deletions(-)

diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy
index e5ea41d4df..d6c6f39f6c 100644
--- a/src/compiler/glsl/glsl_parser.yy
+++ b/src/compiler/glsl/glsl_parser.yy
@@ -1340,18 +1340,18 @@ layout_qualifier_id:
 { "r32i", GL_R32I, GLSL_TYPE_INT, 130, 310, false },
 { "r16i", GL_R16I, GLSL_TYPE_INT, 130, 0, true },
 { "r8i", GL_R8I, GLSL_TYPE_INT, 130, 0, true },
-   { "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT, 130, 0, false },
+   { "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT, 130, 310, true },
 { "rgb10_a2", GL_RGB10_A2, GLSL_TYPE_FLOAT, 130, 0, true },
 { "rgba8", GL_RGBA8, GLSL_TYPE_FLOAT, 130, 310, false },
-   { "rg16", GL_RG16, GLSL_TYPE_FLOAT, 130, 0, false },
+   { "rg16", GL_RG16, GLSL_TYPE_FLOAT, 130, 310, true },
 { "rg8", GL_RG8, GLSL_TYPE_FLOAT, 130, 0, true },
-   { "r16", GL_R16, GLSL_TYPE_FLOAT, 130, 0, false },
+   { "r16", GL_R16, GLSL_TYPE_FLOAT, 130, 310, true },
 { "r8", GL_R8, GLSL_TYPE_FLOAT, 130, 0, true },
-   { "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT, 130, 0, 
false },
+   { "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT, 130, 310, 
true },
 { "rgba8_snorm", GL_RGBA8_SNORM, GLSL_TYPE_FLOAT, 130, 310, 
false },
-   { "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT, 130, 0, false },
+   { "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT, 130, 310, true 
},
 { "rg8_snorm", GL_RG8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true },
-   { "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT, 130, 0, false },
+   { "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT, 130, 310, true },
 { "r8_snorm", GL_R8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true }
  };

diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 492f7c3d20..ce5dd80886 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -283,6 +283,7 @@ EXT(EXT_texture_format_BGRA , dummy_true
  EXT(EXT_texture_integer , EXT_texture_integer 
   , GLL, GLC,  x ,  x , 2006)
  EXT(EXT_texture_lod_bias, dummy_true  
   , GLL,  x , ES1,  x , 1999)
  EXT(EXT_texture_mirror_clamp, EXT_texture_mirror_clamp
   , GLL, GLC,  x ,  x , 2004)
+EXT(EXT_texture_norm16  , EXT_texture_norm16   
  ,  x ,  x ,  x ,  31, 2014)
  EXT(EXT_texture_object  , dummy_true  
   , GLL,  x ,  x ,  x , 1995)
  EXT(EXT_texture_rectangle   , NV_texture_rectangle
   , GLL,  x ,  x ,  x , 2004)
  EXT(EXT_texture_rg  , ARB_texture_rg  
   ,  x ,  x ,  x , ES2, 2011)
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index 1e797c24c2..b87e4e9e60 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -2857,6 +2857,16 @@ _mesa_es3_error_check_format_and_type(const struct 
gl_context *ctx,
  return GL_INVALID_OPERATION;
   break;

+  case GL_UNSIGNED_SHORT:
+ if (internalFormat != GL_RGBA16)
+return GL_INVALID_OPERATION;
+ break;
+
+  case GL_SHORT:
+ if (internalFormat != GL_RGBA16_SNORM)
+return GL_INVALID_OPERATION;
+  

[Mesa-dev] [PATCH] mesa/math: Allocate memory for GLmatrix elements and its inverse contiguously

2018-04-16 Thread Vlad Golovkin
When GLmatrix elements and its inverse are stored contiguously in memory it is 
possible to
allocate, free and copy these fields with 1 function call instead of 2.
---
 src/mesa/math/m_matrix.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/mesa/math/m_matrix.c b/src/mesa/math/m_matrix.c
index 57a49533de..4ab78a1fb3 100644
--- a/src/mesa/math/m_matrix.c
+++ b/src/mesa/math/m_matrix.c
@@ -1438,8 +1438,7 @@ _math_matrix_is_dirty( const GLmatrix *m )
 void
 _math_matrix_copy( GLmatrix *to, const GLmatrix *from )
 {
-   memcpy(to->m, from->m, 16 * sizeof(GLfloat));
-   memcpy(to->inv, from->inv, 16 * sizeof(GLfloat));
+   memcpy(to->m, from->m, 16 * 2 * sizeof(GLfloat));
to->flags = from->flags;
to->type = from->type;
 }
@@ -1470,12 +1469,17 @@ _math_matrix_loadf( GLmatrix *mat, const GLfloat *m )
 void
 _math_matrix_ctr( GLmatrix *m )
 {
-   m->m = _mesa_align_malloc( 16 * sizeof(GLfloat), 16 );
+   m->m = _mesa_align_malloc( 16 * 2 * sizeof(GLfloat), 16 );
if (m->m)
+   {
+  m->inv = m->m + 16;
   memcpy( m->m, Identity, sizeof(Identity) );
-   m->inv = _mesa_align_malloc( 16 * sizeof(GLfloat), 16 );
-   if (m->inv)
   memcpy( m->inv, Identity, sizeof(Identity) );
+   }
+   else
+   {
+  m->inv = NULL;
+   }
m->type = MATRIX_IDENTITY;
m->flags = 0;
 }
@@ -1493,7 +1497,6 @@ _math_matrix_dtr( GLmatrix *m )
_mesa_align_free( m->m );
m->m = NULL;
 
-   _mesa_align_free( m->inv );
m->inv = NULL;
 }
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] dri: Add __DRI_IMAGE_FORMAT_SABGR8

2018-04-16 Thread Kenneth Graunke
On Monday, March 19, 2018 4:41:44 AM PDT Tapani Pälli wrote:
> Add format definition and required plumbing to create images.
> Note that there is no match to drm_fourcc definition, just like
> with existing _DRI_IMAGE_FOURCC_SARGB.
> 
> Signed-off-by: Tapani Pälli 
> ---
>  include/GL/internal/dri_interface.h| 2 ++
>  src/loader/loader_dri3_helper.c| 2 ++
>  src/mesa/drivers/dri/common/dri_util.c | 4 
>  src/mesa/drivers/dri/common/utils.c| 1 +
>  4 files changed, 9 insertions(+)
> 
> diff --git a/include/GL/internal/dri_interface.h 
> b/include/GL/internal/dri_interface.h
> index 4f4795c7ae..319a1fe4f9 100644
> --- a/include/GL/internal/dri_interface.h
> +++ b/include/GL/internal/dri_interface.h
> @@ -1253,6 +1253,7 @@ struct __DRIdri2ExtensionRec {
>  #define __DRI_IMAGE_FORMAT_YUYV 0x100f
>  #define __DRI_IMAGE_FORMAT_XBGR2101010  0x1010
>  #define __DRI_IMAGE_FORMAT_ABGR2101010  0x1011
> +#define __DRI_IMAGE_FORMAT_SABGR8   0x1012
>  
>  #define __DRI_IMAGE_USE_SHARE0x0001
>  #define __DRI_IMAGE_USE_SCANOUT  0x0002
> @@ -1289,6 +1290,7 @@ struct __DRIdri2ExtensionRec {
>  #define __DRI_IMAGE_FOURCC_ABGR  0x34324241
>  #define __DRI_IMAGE_FOURCC_XBGR  0x34324258
>  #define __DRI_IMAGE_FOURCC_SARGB 0x83324258
> +#define __DRI_IMAGE_FOURCC_SABGR 0x84324258

I don't actually know what these numbers mean, but assuming this is a
correct define, the patches look pretty reasonable to me.

Both patches are:
Reviewed-by: Kenneth Graunke 

("It's just sRGB, what could go wrong?" - quote on a tombstone)


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] clover: Fix host access validation for sub-buffer creation

2018-04-16 Thread Francisco Jerez
Aaron Watry  writes:

>   From CL 1.2 Section 5.2.1:
> CL_INVALID_VALUE if buffer was created with CL_MEM_HOST_WRITE_ONLY and
> flags specify CL_MEM_HOST_READ_ONLY , or if buffer was created with
> CL_MEM_HOST_READ_ONLY and flags specify CL_MEM_HOST_WRITE_ONLY , or if
> buffer was created with CL_MEM_HOST_NO_ACCESS and flags specify
> CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_WRITE_ONLY .
>
> Fixes CL 1.2 CTS test/api get_buffer_info
>

What combination of flags is the test-case providing for both the
parent and sub buffer?

> Signed-off-by: Aaron Watry 
> Cc: Francisco Jerez 
> ---
>  src/gallium/state_trackers/clover/api/memory.cpp | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/state_trackers/clover/api/memory.cpp 
> b/src/gallium/state_trackers/clover/api/memory.cpp
> index 9b3cd8b1f5..451e8a8c56 100644
> --- a/src/gallium/state_trackers/clover/api/memory.cpp
> +++ b/src/gallium/state_trackers/clover/api/memory.cpp
> @@ -57,10 +57,14 @@ namespace {
>parent.flags() & host_access_flags) |
>   (parent.flags() & host_ptr_flags));
>  
> - if (~flags & parent.flags() &
> - ((dev_access_flags & ~CL_MEM_READ_WRITE) | host_access_flags))
> + if (~flags & parent.flags() & (dev_access_flags & 
> ~CL_MEM_READ_WRITE))
>  throw error(CL_INVALID_VALUE);
>  
> + //Check if new host access flags cause a mismatch between 
> host-read/write-only.
> + const cl_mem_flags new_flags = flags & ~(parent.flags()) & 
> ~CL_MEM_HOST_NO_ACCESS;
> + if (new_flags & host_access_flags & parent.flags())
> +throw error (CL_INVALID_VALUE);
> +

This doesn't look correct to me, the condition will always evaluate to
zero, you're calculating the conjunction of ~parent.flags() and
parent.flags() which is zero, so the error will never be emitted.

>   return flags;
>  
>} else {
> -- 
> 2.14.1


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] bin/install_megadrivers: fix DESTDIR and -D*-path

2018-04-16 Thread Dylan Baker
Quoting Dylan Baker (2018-04-13 08:46:46)
> Quoting Dylan Baker (2018-04-09 14:02:51)
> > This fixes -Ddri-drivers-path, -Dvdpau-libs-path, etc. with DESTDIR when
> > those paths are absolute. Currently due to the way python's os.path.join
> > handles absolute paths these will ignore DESTDIR, which is bad. This
> > fixes them to be relative to DESTDIR if that is set.
> > 
> > Fixes: 3218056e0eb375eeda470058d06add1532acd6d4
> >("meson: Build i965 and dri stack")
> > Signed-off-by: Dylan Baker 
> > ---
> >  bin/install_megadrivers.py | 8 ++--
> >  1 file changed, 6 insertions(+), 2 deletions(-)
> > 
> > diff --git a/bin/install_megadrivers.py b/bin/install_megadrivers.py
> > index 7931a544bd2..c04a2a3eb34 100755
> > --- a/bin/install_megadrivers.py
> > +++ b/bin/install_megadrivers.py
> > @@ -1,6 +1,6 @@
> >  #!/usr/bin/env python
> >  # encoding=utf-8
> > -# Copyright © 2017 Intel Corporation
> > +# Copyright © 2017-2018 Intel Corporation
> >  
> >  # Permission is hereby granted, free of charge, to any person obtaining a 
> > copy
> >  # of this software and associated documentation files (the "Software"), to 
> > deal
> > @@ -35,7 +35,11 @@ def main():
> >  parser.add_argument('drivers', nargs='+')
> >  args = parser.parse_args()
> >  
> > -to = os.path.join(os.environ.get('MESON_INSTALL_DESTDIR_PREFIX'), 
> > args.libdir)
> > +if os.path.isabs(args.libdir):
> > +to = os.path.join(os.environ.get('DESTDIR', '/'), args.libdir[1:])
> > +else:
> > +to = os.path.join(os.environ['MESON_INSTALL_DESTDIR_PREFIX'], 
> > args.libdir)
> > +
> >  master = os.path.join(to, os.path.basename(args.megadriver))
> >  
> >  if not os.path.exists(to):
> > -- 
> > 2.17.0
> > 
> 
> ping. The first patch is really needed for 18.0.1
> 
> Dylan

anybody?


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] anv, radv: Drop XML workarounds for VK_ANDROID_native_buffer

2018-04-16 Thread Dylan Baker
Quoting Jason Ekstrand (2018-04-16 07:38:53)
> ---
>  src/amd/vulkan/radv_extensions.py  | 7 +--
>  src/intel/vulkan/anv_extensions_gen.py | 7 +--
>  2 files changed, 2 insertions(+), 12 deletions(-)
> 
> diff --git a/src/amd/vulkan/radv_extensions.py 
> b/src/amd/vulkan/radv_extensions.py
> index db37d61..099cae7 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -161,12 +161,7 @@ def _init_exts_from_xml(xml):
>  continue
>  
>  ext = ext_name_map[ext_name]
> -if ext_name == 'VK_ANDROID_native_buffer':
> -# VK_ANDROID_native_buffer is missing the type specifier.  Just
> -# hard-code it to be a device extension for now.
> -ext.type = 'device'
> -else:
> -ext.type = ext_elem.attrib['type']
> +ext.type = ext_elem.attrib['type']
>  
>  _TEMPLATE_H = Template(COPYRIGHT + """
>  #ifndef RADV_EXTENSIONS_H
> diff --git a/src/intel/vulkan/anv_extensions_gen.py 
> b/src/intel/vulkan/anv_extensions_gen.py
> index 57a5285..5ea8220 100644
> --- a/src/intel/vulkan/anv_extensions_gen.py
> +++ b/src/intel/vulkan/anv_extensions_gen.py
> @@ -46,12 +46,7 @@ def _init_exts_from_xml(xml):
>  continue
>  
>  ext = ext_name_map[ext_name]
> -if ext_name == 'VK_ANDROID_native_buffer':
> -# VK_ANDROID_native_buffer is missing the type specifier.  Just
> -# hard-code it to be a device extension for now.
> -ext.type = 'device'
> -else:
> -ext.type = ext_elem.attrib['type']
> +ext.type = ext_elem.attrib['type']
>  
>  _TEMPLATE_H = Template(COPYRIGHT + """
>  
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

I have you an informal rb this morning in person,

Reviewed-by: Dylan Baker 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] meson: Version libMesaOpenCL like autotools does

2018-04-16 Thread Dylan Baker
Quoting Jan Alexander Steffens (heftig) (2018-04-14 10:23:22)
> This is for parity with autotools. It names the library
> libMesaOpenCL.so.1.0.0 and points mesa.icd to the .1 symlink.
> 
> opencl_version now matches configure.ac's OPENCL_VERSION.
> 
> Signed-off-by: Jan Alexander Steffens (heftig) 
> ---
>  src/gallium/targets/opencl/meson.build | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/targets/opencl/meson.build 
> b/src/gallium/targets/opencl/meson.build
> index bebe0547d4..317ad8dab4 100644
> --- a/src/gallium/targets/opencl/meson.build
> +++ b/src/gallium/targets/opencl/meson.build
> @@ -20,7 +20,7 @@
>  
>  opencl_link_args = []
>  opencl_link_deps = []
> -opencl_version = '1.0'
> +opencl_version = '1'
>  
>  if with_ld_version_script
>opencl_link_args += [
> @@ -55,7 +55,7 @@ libopencl = shared_library(
>  cpp.find_library('clangLex', dirs : llvm_libdir),
>  cpp.find_library('clangBasic', dirs : llvm_libdir),
>],
> -  version : opencl_version,
> +  version : '@0@.0.0'.format(opencl_version),
>install : true,
>  )
>  
> -- 
> 2.16.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Dylan Baker 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] meson: Add library versions to swr drivers

2018-04-16 Thread Dylan Baker
Quoting Jan Alexander Steffens (heftig) (2018-04-14 10:23:21)
> This is for parity with autotools.
> 
> Signed-off-by: Jan Alexander Steffens (heftig) 
> ---
>  src/gallium/drivers/swr/meson.build | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/src/gallium/drivers/swr/meson.build 
> b/src/gallium/drivers/swr/meson.build
> index 4bcd4f4e38..e722602403 100644
> --- a/src/gallium/drivers/swr/meson.build
> +++ b/src/gallium/drivers/swr/meson.build
> @@ -201,6 +201,7 @@ if with_swr_arches.contains('avx')
>  link_args : [ld_args_gc_sections],
>  include_directories : [swr_incs],
>  dependencies : [dep_thread, dep_llvm],
> +version : '0.0.0',
>  install : true,
>)
>  endif
> @@ -233,6 +234,7 @@ if with_swr_arches.contains('avx2')
>  link_args : [ld_args_gc_sections],
>  include_directories : [swr_incs],
>  dependencies : [dep_thread, dep_llvm],
> +version : '0.0.0',
>  install : true,
>)
>  endif
> @@ -260,30 +262,32 @@ if with_swr_arches.contains('knl')
>  link_args : [ld_args_gc_sections],
>  include_directories : [swr_incs],
>  dependencies : [dep_thread, dep_llvm],
> +version : '0.0.0',
>  install : true,
>)
>  endif
>  
>  if with_swr_arches.contains('skx')
>swr_skx_args = cpp.first_supported_argument(
>  '-target-cpu=x86-skylake', '-march=skylake-avx512', '-xCORE-AVX512',
>  prefix : '''
>#if !defined(__AVX512F__) || !defined(__AVX512BW__)
># error
>#endif ''',
>)
>if swr_skx_args == []
>  error('Cannot find SKX support for swr.')
>endif
>  
>swr_arch_defines += '-DHAVE_SWR_SKX'
>swr_arch_libs += shared_library(
>  'swrSKX',
>  [files_swr_common, files_swr_arch],
>  cpp_args : [swr_cpp_args, swr_skx_args, '-DKNOB_ARCH=KNOB_ARCH_AVX512'],
>  link_args : [ld_args_gc_sections],
>  include_directories : [swr_incs],
>  dependencies : [dep_thread, dep_llvm],
> +version : '0.0.0',
>  install : true,
>)
>  endif
> -- 

I don't really see why it matters whether shared libraries that are for internal
use only have versions, but I don't object either:

Acked-by: Dylan Baker 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] meson: Define NDEBUG for non-debug-builds

2018-04-16 Thread Dylan Baker
Quoting Jan Alexander Steffens (heftig) (2018-04-14 10:23:20)
> This is for parity with autotools. We were suddenly getting assertion
> failures after switching to meson, see [1].
> 
> [1]: https://bugs.archlinux.org/task/58218
> 
> Signed-off-by: Jan Alexander Steffens (heftig) 
> ---
>  meson.build | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/meson.build b/meson.build
> index a4dfa62255..931e0389e6 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -705,6 +705,8 @@ endif
>  # Define DEBUG for debug builds only (debugoptimized is not included on this 
> one)
>  if get_option('buildtype') == 'debug'
>pre_args += '-DDEBUG'
> +else
> +  pre_args += '-DNDEBUG'
>  endif
>  
>  if get_option('shader-cache')
> -- 
> 2.16.2

NAK.

meson has -Db_ndebug for controlling NDEBUG, which is done separately from
optimization levels, this is in our documentation as well as the meson
documentation. For builds without asserts, set b_ndebug=true.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeonsi: don't emit partial flushes for internal CS flushes only

2018-04-16 Thread Nicolai Hähnle

On 16.04.2018 20:44, Marek Olšák wrote:
On Sun, Apr 15, 2018 at 2:47 PM, Nicolai Hähnle > wrote:


How much testing have you done with the radeon drm? It may be safer
to just skip that part of the changes.


Piglit on CIK.


Probably good enough. May be worth checking for regressions with 
deqp_egl though, that tends to expose problems with async flushes.


Cheers,
Nicolai



Marek



--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl_to_tgsi: try harder to lower unsupported ir_binop_vector_extract

2018-04-16 Thread Marek Olšák
From: Marek Olšák 

Cc: 18.0 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 01a58d7d2c6..b321112cf88 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7117,20 +7117,25 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
  /* Repeat it until it stops making changes. */
  bool progress;
  do {
 progress = do_common_optimization(ir, true, true, options,
   ctx->Const.NativeIntegers);
 progress |= lower_if_to_cond_assign((gl_shader_stage)i, ir,
 options->MaxIfDepth, 
if_threshold);
  } while (progress);
   }
 
+  /* Do this again to lower ir_binop_vector_extract introduced
+   * by optimization passes.
+   */
+  do_vec_index_to_cond_assign(ir);
+
   validate_ir_tree(ir);
}
 
build_program_resource_list(ctx, prog);
 
if (use_nir)
   return st_link_nir(ctx, prog);
 
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
   struct gl_linked_shader *shader = prog->_LinkedShaders[i];
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50: Extract needed value bits without shifting them before calling bitcount

2018-04-16 Thread Vlad Golovkin
This can save one instruction since bitcount doesn't care about specific
bits' positions.
---
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 6fd2982e3c..98eb0309ad 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -982,7 +982,7 @@ nv50_screen_create(struct nouveau_device *dev)
nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, );
 
screen->TPs = util_bitcount(value & 0x);
-   screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
+   screen->MPsInTP = util_bitcount(value & 0x0f00);
 
screen->mp_count = screen->TPs * screen->MPsInTP;
 
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/3] nvc0: Various improvements to nvc0_hw_get_query_result_resource

2018-04-16 Thread Rhys Perry
This patch set applies improvements related to the query buffer object
functionality of the nvc0 driver.

Changes in v2:
- Increase space requirement in patch 3 to ensure there is room for fence
  emission.

Rhys Perry (3):
  nvc0: set current bufctx to nvc0->bufctx at the end of nvc0_draw_vbo()
  nvc0: rewrite query buffer write macro to output 64-bit predicates
  nvc0: use a macro to write query result availability to a buffer

 src/gallium/drivers/nouveau/nvc0/mme/com9097.mme   | 136 --
 src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h |  98 +
 src/gallium/drivers/nouveau/nvc0/nvc0_macros.h |   2 +
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c   | 152 +++--
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |   1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c|   2 +-
 6 files changed, 250 insertions(+), 141 deletions(-)

-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/3] nvc0: use a macro to write query result availability to a buffer

2018-04-16 Thread Rhys Perry
Both the availability and result paths shared a bit of code so they were
marged.

Signed-off-by: Rhys Perry 
---
 src/gallium/drivers/nouveau/nvc0/mme/com9097.mme   |  45 
 src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h |  34 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_macros.h |   2 +
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c   | 128 ++---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |   1 +
 5 files changed, 141 insertions(+), 69 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme 
b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
index 371aa0ce36..c7c544aea0 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
@@ -563,3 +563,48 @@ qbw_write:
 qbw_done:
exit send (extrinsrt 0x0 $r7 0 16 16)
maddrsend 0x44 /* SERIALIZE */
+
+/* NVC0_3D_MACRO_QUERY_BUFFER_WRITE_AVAIL:
+ *
+ * Like NVC0_3D_MACRO_QUERY_BUFFER_WRITE, this uses the query engine to write
+ * out values.
+ *
+ * arg = write64 ? 1 : 0
+ * parm[0] = desired sequence
+ * parm[1] = actual sequence
+ * parm[2] = LSB of destination address
+ * parm[3] = MSB of destination address
+ */
+.section #mme9097_query_buffer_write_avail
+   parm $r2
+   parm $r3
+   parm $r4
+   parm $r5
+   mov $r6 (sub $r3 $r2)
+   mov $r6 (sbb 0x0 0x0)
+   branz annul $r6 #qbwa_not_avail
+qbwa_avail:
+   mov $r6 0x1
+   bra annul #qbwa_write
+qbwa_not_avail:
+   mov $r6 0x0
+qbwa_write:
+   maddr 0x16c0 /* QUERY_ADDRESS_HIGH */
+   send $r5
+   send $r4
+   send $r6
+   braz $r1 #qbwa_done
+   mov $r7 0x1000
+   send (extrinsrt 0x0 $r7 0 16 16)
+qbwa_high:
+   /* XXX: things seem to mess up if $r6 is replaced with 0x4 in the add */
+   mov $r6 0x4
+   mov $r4 (add $r4 $r6)
+   mov $r5 (adc $r5 0x0)
+   maddr 0x16c0 /* QUERY_ADDRESS_HIGH */
+   send $r5
+   send $r4
+   send 0x0
+qbwa_done:
+   exit send (extrinsrt 0x0 $r7 0 16 16)
+   maddrsend 0x44 /* SERIALIZE */
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h 
b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
index 083692f73a..aa416e6df8 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
@@ -379,3 +379,37 @@ uint32_t mme9097_query_buffer_write[] = {
0x8401c0c2,
0x00110071,
 };
+
+uint32_t mme9097_query_buffer_write_avail[] = {
+   0x0201,
+/* 0x0007: qbwa_avail */
+   0x0301,
+/* 0x0009: qbwa_not_avail */
+/* 0x000a: qbwa_write */
+   0x0401,
+   0x0501,
+/* 0x0011: qbwa_high */
+   0x00049e10,
+   0x00060610,
+/* 0x0018: qbwa_done */
+   0xf037,
+   0x4611,
+   0x8027,
+   0x0611,
+   0x05b00021,
+   0x2841,
+   0x2041,
+   0x3041,
+   0x00028807,
+   0x04000711,
+   0x8401c042,
+   0x00010611,
+   0x0001a410,
+   0x00022d10,
+   0x05b00021,
+   0x2841,
+   0x2041,
+   0x0041,
+   0x8401c0c2,
+   0x00110071,
+};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
index eeacc714f3..2964728a93 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
@@ -37,4 +37,6 @@
 
 #define NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT   0x3860
 
+#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE_AVAIL 0x3870
+
 #endif /* __NVC0_MACROS_H__ */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index 296cdacb62..0ba0066415 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -374,29 +374,11 @@ nvc0_hw_get_query_result_resource(struct nvc0_context 
*nvc0,
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_hw_query *hq = nvc0_hw_query(q);
struct nv04_resource *buf = nv04_resource(resource);
-   unsigned qoffset = 0, stride;
bool predicate = false;
-   uint32_t arg;
+   uint32_t arg = result_type >= PIPE_QUERY_TYPE_I64 ? 1 : 0;
 
assert(!hq->funcs || !hq->funcs->get_query_result);
 
-   if (index == -1) {
-  /* TODO: Use a macro to write the availability of the query */
-  if (hq->state != NVC0_HW_QUERY_STATE_READY)
- nvc0_hw_query_update(nvc0->screen->base.client, q);
-  uint32_t ready[2] = {hq->state == NVC0_HW_QUERY_STATE_READY};
-  nvc0->base.push_cb(>base, buf, offset,
- result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1,
- ready);
-
-  util_range_add(>valid_buffer_range, offset,
- offset + (result_type >= PIPE_QUERY_TYPE_I64 ? 8 : 4));
-
-  nvc0_resource_validate(buf, NOUVEAU_BO_WR);
-
-  return;
-   }
-
switch (q->type) {
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
@@ -406,7 

Re: [Mesa-dev] [PATCH v2] virgl: disable virgl when no 3D for virtio gpu.

2018-04-16 Thread Lepton Wu
On Thu, Apr 5, 2018 at 12:38 PM, Lepton Wu  wrote:
> If users are running mesa under old version of qemu or have turned off
> GL at runtime, virtio gpu driver actually doesn't work. Adds a detection
> here so mesa can fall back to software rendering.
>
> v2:
>  - move detection from loader to virgl (Ilia, Emil)
Ping. Thanks.
>
> Signed-off-by: Lepton Wu 
> ---
>  src/gallium/winsys/virgl/drm/virgl_drm_winsys.c | 11 +++
>  1 file changed, 11 insertions(+)
>
> diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c 
> b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> index cf3c3bac4b..4198ed7feb 100644
> --- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> +++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
> @@ -800,8 +800,15 @@ virgl_drm_winsys_create(int drmFD)
>  {
> struct virgl_drm_winsys *qdws;
> int ret;
> +   int gl = 0;
> struct drm_virtgpu_getparam getparam = {0};
>
> +   getparam.param = VIRTGPU_PARAM_3D_FEATURES;
> +   getparam.value = (uint64_t)(uintptr_t)
> +   ret = drmIoctl(drmFD, DRM_IOCTL_VIRTGPU_GETPARAM, );
> +   if (ret < 0 || !gl)
> +  return NULL;
> +
> qdws = CALLOC_STRUCT(virgl_drm_winsys);
> if (!qdws)
>return NULL;
> @@ -914,6 +921,10 @@ virgl_drm_screen_create(int fd)
>int dup_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
>
>vws = virgl_drm_winsys_create(dup_fd);
> +  if (!vws) {
> + close(dup_fd);
> + goto unlock;
> +  }
>
>pscreen = virgl_create_screen(vws);
>if (pscreen) {
> --
> 2.17.0.484.g0c8726318c-goog
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: fix ir_binop_gequal glsl_to_nir conversion

2018-04-16 Thread Jason Ekstrand

Pushed.  Thanks and welcome to Mesa!

On April 14, 2018 12:26:18 Jason Ekstrand  wrote:


Reviewed-by: Jason Ekstrand 

What driver is hitting this path?  The !supports_ints path isn't used to my
knowledge so if some driver has started using it, they're liable to find
more bugs than just this one. :-)

On April 14, 2018 12:16:48 Erico Nunes  wrote:

> ir_binop_gequal needs to be converted to nir_op_sge when native integers
> are not supported in the driver.
> Otherwise it becomes no different than ir_binop_less after the
> conversion.
>
> Signed-off-by: Erico Nunes 
> ---
> src/compiler/glsl/glsl_to_nir.cpp | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
> b/src/compiler/glsl/glsl_to_nir.cpp
> index 17d58acc4c..8e5e9c3491 100644
> --- a/src/compiler/glsl/glsl_to_nir.cpp
> +++ b/src/compiler/glsl/glsl_to_nir.cpp
> @@ -1832,7 +1832,7 @@ nir_visitor::visit(ir_expression *ir)
>  else
> result = nir_uge(, srcs[0], srcs[1]);
>   } else {
> - result = nir_slt(, srcs[0], srcs[1]);
> + result = nir_sge(, srcs[0], srcs[1]);
>   }
>   break;
>case ir_binop_equal:
> --
> 2.14.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106090] Compiling compute shader crashes RADV

2018-04-16 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106090

Bug ID: 106090
   Summary: Compiling compute shader crashes RADV
   Product: Mesa
   Version: 18.0
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/Vulkan/radeon
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: mais...@archlinux.us
QA Contact: mesa-dev@lists.freedesktop.org

Created attachment 138871
  --> https://bugs.freedesktop.org/attachment.cgi?id=138871=edit
Dump

To reproduce on RX470 (or something similar):

git clone git://github.com/Themaister/Fossilize
cd Fossilize
git submodule update --init
mkdir build
cd build
cmake ..
make -j8
./cli/fossilize-replay pipelines.json --enable-validation --filter-compute 2

...

WARNING: radv is not a conformant vulkan implementation, testing use only.
Fossilize INFO: Chose GPU:
Fossilize INFO:   name: AMD RADV POLARIS10 (LLVM 6.0.0)
Fossilize INFO:   apiVersion: 1.0.57
Fossilize INFO:   vendorID: 0x1002
Fossilize INFO:   deviceID: 0x67df
Fossilize INFO: Creating shader module #0
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #1
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #2
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #3
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #4
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #5
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #6
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #7
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #8
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #9
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #10
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #11
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #12
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #13
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #14
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #15
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #16
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #17
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #18
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #19
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #20
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating shader module #21
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #0
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #1
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #2
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #3
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #4
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #5
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #6
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #7
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #8
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #9
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #10
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #11
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #12
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #13
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #14
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating descriptor set layout #15
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating pipeline layout #0
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating pipeline layout #1
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating pipeline layout #2
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating pipeline layout #3
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating pipeline layout #4
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating pipeline layout #5
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating pipeline layout #6
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating pipeline layout #7
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating pipeline layout #8
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating pipeline layout #9
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating pipeline layout #10
Fossilize INFO:  ... Succeeded!
Fossilize INFO: Creating pipeline layout #11
Fossilize INFO:  ... Succeeded!

Re: [Mesa-dev] [PATCH 3/3] radeonsi: don't emit partial flushes for internal CS flushes only

2018-04-16 Thread Marek Olšák
On Sun, Apr 15, 2018 at 2:47 PM, Nicolai Hähnle  wrote:

> How much testing have you done with the radeon drm? It may be safer to
> just skip that part of the changes.
>

Piglit on CIK.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: Remove unnecessary memset call

2018-04-16 Thread Kyriazis, George

Reviewed-By: George Kyriazis 
>


On Apr 12, 2018, at 8:49 PM, Vlad Golovkin 
> wrote:

Zeroing memory after calloc is not necessary. This also allows to avoid
possible crash when allocation fails, because memset is called before
checking screen for NULL.
---
src/gallium/drivers/swr/swr_screen.cpp | 1 -
1 file changed, 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index 880a177c39..4e43ac55fb 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -1130,7 +1130,6 @@ struct pipe_screen *
swr_create_screen_internal(struct sw_winsys *winsys)
{
   struct swr_screen *screen = CALLOC_STRUCT(swr_screen);
-   memset(screen, 0, sizeof(struct swr_screen));

   if (!screen)
  return NULL;
--
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] intel/compiler: grf127 can not be dest when src and dest overlap in send

2018-04-16 Thread Chema Casanova
On 15/04/18 08:55, Matt Turner wrote:
> On Wed, Apr 11, 2018 at 7:30 PM, Jose Maria Casanova Crespo
>  wrote:
>> Implement at brw_eu_validate the restriction from Intel Broadwell PRM, vol 
>> 07,
>> section "Instruction Set Reference", subsection "EUISA Instructions", Send
>> Message (page 990):
>>
>> "r127 must not be used for return address when there is a src and dest 
>> overlap
>> in send instruction."
>>
>> Cc: Jason Ekstrand 
>> Cc: Matt Turner 
>> ---
>>  src/intel/compiler/brw_eu_validate.c | 9 +
>>  1 file changed, 9 insertions(+)
>>
>> diff --git a/src/intel/compiler/brw_eu_validate.c 
>> b/src/intel/compiler/brw_eu_validate.c
>> index d3189d1ef5e..0d711501303 100644
>> --- a/src/intel/compiler/brw_eu_validate.c
>> +++ b/src/intel/compiler/brw_eu_validate.c
>> @@ -261,6 +261,15 @@ send_restrictions(const struct gen_device_info *devinfo,
>>brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
>>"send with EOT must use g112-g127");
>>}
> 
> Put a newline here

Fixed locally.

>> +  if (devinfo->gen >= 8) {
>> + ERROR_IF(!dst_is_null(devinfo, inst) &&
>> +  (brw_inst_dst_da_reg_nr(devinfo, inst) +
>> +   brw_inst_rlen(devinfo, inst) > 127 ) &&
> 
> Remove the extra space after 127

Fixed locally.

>> +  (brw_inst_src0_da_reg_nr(devinfo, inst) +
>> +   brw_inst_mlen(devinfo, inst) >
>> +   brw_inst_dst_da_reg_nr(devinfo, inst)),
>> +  "r127 can not be dest when src and dest overlap in send");
> 
> I'd change the message to more closely match the docs:
> 
> "r127 must not be used for return address when there is a src and dest 
> overlap"
> 
> Thank you for extending the validator!
> 
> Reviewed-by: Matt Turner 

Thank you for the review.

Chema
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106074] radv: si_scissor_from_viewport returns incorrect result when using half-pixel viewport offset

2018-04-16 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106074

gloriouseggr...@gmail.com changed:

   What|Removed |Added

 CC||gloriouseggr...@gmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: GL_EXT_texture_norm16 extension plumbing

2018-04-16 Thread Ilia Mirkin
You never check EXT_texture_norm16 anywhere... is that right? Should
probably respect that in the _mesa_is_es3_* helpers. Also what about
shader support? I see no mention in the spec that these should be
usable as images, except with NV_image_formats is enabled. You're just
exposing them no matter what in ES 3.10.

On Mon, Apr 16, 2018 at 11:04 AM, Tapani Pälli  wrote:
> Patch enables use of short and unsigned short data for texture uploads,
> rendering and reading of framebuffers within the restrictions specified
> in GL_EXT_texture_norm16 spec.
>
> Patch also enables those 16bit format layout qualifiers listed in
> GL_NV_image_formats that depend on EXT_texture_norm16.
>
> Signed-off-by: Tapani Pälli 
> ---
>  src/compiler/glsl/glsl_parser.yy | 12 +-
>  src/mesa/main/extensions_table.h |  1 +
>  src/mesa/main/glformats.c| 51 
> 
>  src/mesa/main/mtypes.h   |  1 +
>  src/mesa/main/readpix.c  | 10 
>  src/mesa/main/shaderimage.c  |  7 +++---
>  6 files changed, 72 insertions(+), 10 deletions(-)
>
> diff --git a/src/compiler/glsl/glsl_parser.yy 
> b/src/compiler/glsl/glsl_parser.yy
> index e5ea41d4df..d6c6f39f6c 100644
> --- a/src/compiler/glsl/glsl_parser.yy
> +++ b/src/compiler/glsl/glsl_parser.yy
> @@ -1340,18 +1340,18 @@ layout_qualifier_id:
> { "r32i", GL_R32I, GLSL_TYPE_INT, 130, 310, false },
> { "r16i", GL_R16I, GLSL_TYPE_INT, 130, 0, true },
> { "r8i", GL_R8I, GLSL_TYPE_INT, 130, 0, true },
> -   { "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT, 130, 0, false },
> +   { "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT, 130, 310, true },
> { "rgb10_a2", GL_RGB10_A2, GLSL_TYPE_FLOAT, 130, 0, true },
> { "rgba8", GL_RGBA8, GLSL_TYPE_FLOAT, 130, 310, false },
> -   { "rg16", GL_RG16, GLSL_TYPE_FLOAT, 130, 0, false },
> +   { "rg16", GL_RG16, GLSL_TYPE_FLOAT, 130, 310, true },
> { "rg8", GL_RG8, GLSL_TYPE_FLOAT, 130, 0, true },
> -   { "r16", GL_R16, GLSL_TYPE_FLOAT, 130, 0, false },
> +   { "r16", GL_R16, GLSL_TYPE_FLOAT, 130, 310, true },
> { "r8", GL_R8, GLSL_TYPE_FLOAT, 130, 0, true },
> -   { "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT, 130, 0, 
> false },
> +   { "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT, 130, 310, 
> true },
> { "rgba8_snorm", GL_RGBA8_SNORM, GLSL_TYPE_FLOAT, 130, 310, 
> false },
> -   { "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT, 130, 0, false 
> },
> +   { "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT, 130, 310, 
> true },
> { "rg8_snorm", GL_RG8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true },
> -   { "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT, 130, 0, false },
> +   { "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT, 130, 310, true 
> },
> { "r8_snorm", GL_R8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true }
>  };
>
> diff --git a/src/mesa/main/extensions_table.h 
> b/src/mesa/main/extensions_table.h
> index 492f7c3d20..ce5dd80886 100644
> --- a/src/mesa/main/extensions_table.h
> +++ b/src/mesa/main/extensions_table.h
> @@ -283,6 +283,7 @@ EXT(EXT_texture_format_BGRA , dummy_true
>  EXT(EXT_texture_integer , EXT_texture_integer
> , GLL, GLC,  x ,  x , 2006)
>  EXT(EXT_texture_lod_bias, dummy_true 
> , GLL,  x , ES1,  x , 1999)
>  EXT(EXT_texture_mirror_clamp, EXT_texture_mirror_clamp   
> , GLL, GLC,  x ,  x , 2004)
> +EXT(EXT_texture_norm16  , EXT_texture_norm16 
> ,  x ,  x ,  x ,  31, 2014)
>  EXT(EXT_texture_object  , dummy_true 
> , GLL,  x ,  x ,  x , 1995)
>  EXT(EXT_texture_rectangle   , NV_texture_rectangle   
> , GLL,  x ,  x ,  x , 2004)
>  EXT(EXT_texture_rg  , ARB_texture_rg 
> ,  x ,  x ,  x , ES2, 2011)
> diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
> index 1e797c24c2..b87e4e9e60 100644
> --- a/src/mesa/main/glformats.c
> +++ b/src/mesa/main/glformats.c
> @@ -2857,6 +2857,16 @@ _mesa_es3_error_check_format_and_type(const struct 
> gl_context *ctx,
>  return GL_INVALID_OPERATION;
>   break;
>
> +  case GL_UNSIGNED_SHORT:
> + if (internalFormat != GL_RGBA16)
> +return GL_INVALID_OPERATION;
> + break;
> +
> +  case GL_SHORT:
> + if (internalFormat != GL_RGBA16_SNORM)
> +return GL_INVALID_OPERATION;
> + break;
> +
>case GL_UNSIGNED_SHORT_4_4_4_4:
>   switch (internalFormat) {
>   case GL_RGBA:
> @@ -2984,6 +2994,16 @@ 

Re: [Mesa-dev] [PATCH 3/3] meson: Version libMesaOpenCL like autotools does

2018-04-16 Thread Aaron Watry
Yes, please.

Fixes clover platform and device detection when installed in
/usr/local/ using ocl-icd for me when building with meson. I think
that was the last thing blocking me from switching over from autotools
on the rest of my machines.

Not familiar enough with the versioning/linking in meson, but you can
definitely have a:
Tested-By: Aaron Watry 

--Aaron

On Sat, Apr 14, 2018 at 12:23 PM, Jan Alexander Steffens (heftig)
 wrote:
> This is for parity with autotools. It names the library
> libMesaOpenCL.so.1.0.0 and points mesa.icd to the .1 symlink.
>
> opencl_version now matches configure.ac's OPENCL_VERSION.
>
> Signed-off-by: Jan Alexander Steffens (heftig) 
> ---
>  src/gallium/targets/opencl/meson.build | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/targets/opencl/meson.build 
> b/src/gallium/targets/opencl/meson.build
> index bebe0547d4..317ad8dab4 100644
> --- a/src/gallium/targets/opencl/meson.build
> +++ b/src/gallium/targets/opencl/meson.build
> @@ -20,7 +20,7 @@
>
>  opencl_link_args = []
>  opencl_link_deps = []
> -opencl_version = '1.0'
> +opencl_version = '1'
>
>  if with_ld_version_script
>opencl_link_args += [
> @@ -55,7 +55,7 @@ libopencl = shared_library(
>  cpp.find_library('clangLex', dirs : llvm_libdir),
>  cpp.find_library('clangBasic', dirs : llvm_libdir),
>],
> -  version : opencl_version,
> +  version : '@0@.0.0'.format(opencl_version),
>install : true,
>  )
>
> --
> 2.16.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: enable EXT_texture_norm16 for i965

2018-04-16 Thread Tapani Pälli
Signed-off-by: Tapani Pälli 
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 73a6c73f53..88a66e9e26 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -108,6 +108,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.EXT_texture_env_dot3 = true;
ctx->Extensions.EXT_texture_filter_anisotropic = true;
ctx->Extensions.EXT_texture_integer = true;
+   ctx->Extensions.EXT_texture_norm16 = true;
ctx->Extensions.EXT_texture_shared_exponent = true;
ctx->Extensions.EXT_texture_snorm = true;
ctx->Extensions.EXT_texture_sRGB = true;
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] mesa: GL_EXT_texture_norm16 extension plumbing

2018-04-16 Thread Tapani Pälli
Patch enables use of short and unsigned short data for texture uploads,
rendering and reading of framebuffers within the restrictions specified
in GL_EXT_texture_norm16 spec.

Patch also enables those 16bit format layout qualifiers listed in
GL_NV_image_formats that depend on EXT_texture_norm16.

Signed-off-by: Tapani Pälli 
---
 src/compiler/glsl/glsl_parser.yy | 12 +-
 src/mesa/main/extensions_table.h |  1 +
 src/mesa/main/glformats.c| 51 
 src/mesa/main/mtypes.h   |  1 +
 src/mesa/main/readpix.c  | 10 
 src/mesa/main/shaderimage.c  |  7 +++---
 6 files changed, 72 insertions(+), 10 deletions(-)

diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy
index e5ea41d4df..d6c6f39f6c 100644
--- a/src/compiler/glsl/glsl_parser.yy
+++ b/src/compiler/glsl/glsl_parser.yy
@@ -1340,18 +1340,18 @@ layout_qualifier_id:
{ "r32i", GL_R32I, GLSL_TYPE_INT, 130, 310, false },
{ "r16i", GL_R16I, GLSL_TYPE_INT, 130, 0, true },
{ "r8i", GL_R8I, GLSL_TYPE_INT, 130, 0, true },
-   { "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT, 130, 0, false },
+   { "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT, 130, 310, true },
{ "rgb10_a2", GL_RGB10_A2, GLSL_TYPE_FLOAT, 130, 0, true },
{ "rgba8", GL_RGBA8, GLSL_TYPE_FLOAT, 130, 310, false },
-   { "rg16", GL_RG16, GLSL_TYPE_FLOAT, 130, 0, false },
+   { "rg16", GL_RG16, GLSL_TYPE_FLOAT, 130, 310, true },
{ "rg8", GL_RG8, GLSL_TYPE_FLOAT, 130, 0, true },
-   { "r16", GL_R16, GLSL_TYPE_FLOAT, 130, 0, false },
+   { "r16", GL_R16, GLSL_TYPE_FLOAT, 130, 310, true },
{ "r8", GL_R8, GLSL_TYPE_FLOAT, 130, 0, true },
-   { "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT, 130, 0, 
false },
+   { "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT, 130, 310, 
true },
{ "rgba8_snorm", GL_RGBA8_SNORM, GLSL_TYPE_FLOAT, 130, 310, 
false },
-   { "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT, 130, 0, false },
+   { "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT, 130, 310, true 
},
{ "rg8_snorm", GL_RG8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true },
-   { "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT, 130, 0, false },
+   { "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT, 130, 310, true },
{ "r8_snorm", GL_R8_SNORM, GLSL_TYPE_FLOAT, 130, 0, true }
 };
 
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 492f7c3d20..ce5dd80886 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -283,6 +283,7 @@ EXT(EXT_texture_format_BGRA , dummy_true
 EXT(EXT_texture_integer , EXT_texture_integer  
  , GLL, GLC,  x ,  x , 2006)
 EXT(EXT_texture_lod_bias, dummy_true   
  , GLL,  x , ES1,  x , 1999)
 EXT(EXT_texture_mirror_clamp, EXT_texture_mirror_clamp 
  , GLL, GLC,  x ,  x , 2004)
+EXT(EXT_texture_norm16  , EXT_texture_norm16   
  ,  x ,  x ,  x ,  31, 2014)
 EXT(EXT_texture_object  , dummy_true   
  , GLL,  x ,  x ,  x , 1995)
 EXT(EXT_texture_rectangle   , NV_texture_rectangle 
  , GLL,  x ,  x ,  x , 2004)
 EXT(EXT_texture_rg  , ARB_texture_rg   
  ,  x ,  x ,  x , ES2, 2011)
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index 1e797c24c2..b87e4e9e60 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -2857,6 +2857,16 @@ _mesa_es3_error_check_format_and_type(const struct 
gl_context *ctx,
 return GL_INVALID_OPERATION;
  break;
 
+  case GL_UNSIGNED_SHORT:
+ if (internalFormat != GL_RGBA16)
+return GL_INVALID_OPERATION;
+ break;
+
+  case GL_SHORT:
+ if (internalFormat != GL_RGBA16_SNORM)
+return GL_INVALID_OPERATION;
+ break;
+
   case GL_UNSIGNED_SHORT_4_4_4_4:
  switch (internalFormat) {
  case GL_RGBA:
@@ -2984,6 +2994,16 @@ _mesa_es3_error_check_format_and_type(const struct 
gl_context *ctx,
 return GL_INVALID_OPERATION;
  break;
 
+  case GL_UNSIGNED_SHORT:
+ if (internalFormat != GL_RGB16)
+return GL_INVALID_OPERATION;
+ break;
+
+  case GL_SHORT:
+ if (internalFormat != GL_RGB16_SNORM)
+return GL_INVALID_OPERATION;
+ break;
+
   case GL_UNSIGNED_SHORT_5_6_5:
  switch (internalFormat) {
  case GL_RGB:
@@ -3115,6 +3135,16 @@ _mesa_es3_error_check_format_and_type(const struct 
gl_context *ctx,
 return 

Re: [Mesa-dev] [PATCH] nir: fix ir_binop_gequal glsl_to_nir conversion

2018-04-16 Thread Jason Ekstrand
On Mon, Apr 16, 2018 at 6:45 AM, Erico Nunes  wrote:

> On Sun, Apr 15, 2018 at 2:30 AM, Jason Ekstrand 
> wrote:
> > On April 14, 2018 12:43:35 Connor Abbott  wrote:
> > I think that it's probably impractical to use this path, and we should
> > probably delete it. There are just too many optimizations, e.g. in
> > nir_opt_algebraic and lowering passes that assume you have ints. I
> > think a better plan would be to silently convert ints to floats in the
> > lima driver, and maybe inhibit any optimizations that use bit
> > twiddling tricks if real int support isn't indicated.
> >
> > I'm not sure.  For quite a while prog_to_nir used these comparison
> > operations so we know they more it less work.  For all I know, maybe it
> > still does (I didn't actually check).  The only thing we need to worry
> about
> > in terms of correctness is any optimizations in nir_opt_algebraic which
> > consume only floats but produce integers.  Also, all drivers need to
> handle
> > imov simply because it's easy.
> >
> > That being said, we've done a lot of work to optimize the integer
> supporting
> > paths so you may actually get better code if you can figure out a good
> way
> > to lower the integers away.
>
> I'm not really using ints in my sample program, just floats. But still
> I'm getting nir_op_slt and nir_op_sge for the float comparison
> operations.
> Should I be getting nir_op_flt and nir_op_fge instead even with
> .native_integers disabled in glsl_to_nir?
>

Nope.  That's kind of what the native_integers option is for.  I'm just
saying that it isn't incredibly well tested so be ware.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106074] radv: si_scissor_from_viewport returns incorrect result when using half-pixel viewport offset

2018-04-16 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106074

--- Comment #1 from Philip Rebohle  ---
Created attachment 138867
  --> https://bugs.freedesktop.org/attachment.cgi?id=138867=edit
Proposed patch

The attached patch fixes the issue in FF XIV. I'm not sure if that is entirely
correct, but the use of the integer version of 'abs' in the following code
looks incorrect given that 'scale[i]' can be non-integer even if the viewport
size is integer.

rect.offset.x = translate[0] - abs(scale[0]);
rect.offset.y = translate[1] - abs(scale[1]);
rect.extent.width = ceilf(translate[0] + abs(scale[0])) - rect.offset.x;
rect.extent.height = ceilf(translate[1] + abs(scale[1])) - rect.offset.y;

Also it seems that my assumption that the size should be (1,1) was incorrect,
it should indeed be (2,2). This is in line with what RadeonSI returns.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] radeonsi: implement mechanism for IBs without partial flushes at the end (v6)

2018-04-16 Thread Marek Olšák
On Mon, Apr 16, 2018, 4:52 AM Christian König <
ckoenig.leichtzumer...@gmail.com> wrote:

> Am 15.04.2018 um 20:46 schrieb Nicolai Hähnle:
> > On 07.04.2018 04:31, Marek Olšák wrote:
> >> From: Marek Olšák 
> >>
> >> (This patch doesn't enable the behavior. It will be enabled in a later
> >> commit.)
> >>
> >> Draw calls from multiple IBs can be executed in parallel.
> >>
> >> v2: do emit partial flushes on SI
> >> v3: invalidate all shader caches at the beginning of IBs
> >> v4: don't call si_emit_cache_flush in si_flush_gfx_cs if not needed,
> >>  only do this for flushes invoked internally
> >> v5: empty IBs should wait for idle if the flush requires it
> >> v6: split the commit
> >>
> >> If we artificially limit the number of draw calls per IB to 5, we'll get
> >> a lot more IBs, leading to a lot more partial flushes. Let's see how
> >> the removal of partial flushes changes GPU utilization in that scenario:
> >>
> >> With partial flushes (time busy):
> >>  CP: 99%
> >>  SPI: 86%
> >>  CB: 73:
> >>
> >> Without partial flushes (time busy):
> >>  CP: 99%
> >>  SPI: 93%
> >>  CB: 81%
> >> ---
> >>   src/gallium/drivers/radeon/radeon_winsys.h |  7 
> >>   src/gallium/drivers/radeonsi/si_gfx_cs.c   | 52
> >> ++
> >>   src/gallium/drivers/radeonsi/si_pipe.h |  1 +
> >>   3 files changed, 46 insertions(+), 14 deletions(-)
> >> [snip]
> >> +/* Always invalidate caches at the beginning of IBs, because
> >> external
> >> + * users (e.g. BO evictions and SDMA/UVD/VCE IBs) can modify our
> >> + * buffers.
> >> + *
> >> + * Note that the cache flush done by the kernel at the end of
> >> GFX IBs
> >> + * isn't useful here, because that flush can finish after the
> >> following
> >> + * IB starts drawing.
> >> + *
> >> + * TODO: Do we also need to invalidate CB & DB caches?
> >
> > I don't think so.
> >
> > Kernel buffer move: CB & DB caches use logical addressing, so should
> > be unaffected.
>
> Are you sure about that? Basically we don't do any extra invalidation
> when BOs are moved by the kernel.
>
> But on the other hand the worst that could happen when we skip
> invalidation is that we don't read the same data into the caches which
> is already in the caches. E.g. the content of the BO doesn't change,
> just it's location.
>

When sdma is moving a buffer, that buffer is not being used by the gfx
queue. Caches are guaranteed to be invalidated after the last use of the
buffer, i.e. before sdma starts. I don't see a way for caches to be warm
when sdma completes.

Marek


> In other words it depends how the CB caches work.
>
> Christian.
>
> >
> > UVD: APIs should forbid writing to the currently bound framebuffer.
> >
> > CPU: Shouldn't be writing directly to the framebuffer, and even if it
> > does (linear framebuffer?), I believe OpenGL requires re-binding the
> > framebuffer.
> >
> > Cheers,
> > Nicolai
> >
> >
> >> + */
> >> +ctx->flags |= SI_CONTEXT_INV_ICACHE |
> >> +  SI_CONTEXT_INV_SMEM_L1 |
> >> +  SI_CONTEXT_INV_VMEM_L1 |
> >> +  SI_CONTEXT_INV_GLOBAL_L2 |
> >> +  SI_CONTEXT_START_PIPELINE_STATS;
> >> /* set all valid group as dirty so they get reemited on
> >>* next draw command
> >>*/
> >>   si_pm4_reset_emitted(ctx);
> >> /* The CS initialization should be emitted before everything
> >> else. */
> >>   si_pm4_emit(ctx, ctx->init_config);
> >>   if (ctx->init_config_gs_rings)
> >>   si_pm4_emit(ctx, ctx->init_config_gs_rings);
> >> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
> >> b/src/gallium/drivers/radeonsi/si_pipe.h
> >> index 0c90a6c6e46..f0f323ff3a7 100644
> >> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> >> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> >> @@ -540,20 +540,21 @@ struct si_context {
> >>   void*vs_blit_texcoord;
> >>   struct si_screen*screen;
> >>   struct pipe_debug_callbackdebug;
> >>   LLVMTargetMachineReftm; /* only non-threaded
> >> compilation */
> >>   struct si_shader_ctx_statefixed_func_tcs_shader;
> >>   struct r600_resource*wait_mem_scratch;
> >>   unsignedwait_mem_number;
> >>   uint16_tprefetch_L2_mask;
> >> boolgfx_flush_in_progress:1;
> >> +boolgfx_last_ib_is_busy:1;
> >>   boolcompute_is_busy:1;
> >> unsignednum_gfx_cs_flushes;
> >>   unsignedinitial_gfx_cs_size;
> >>   unsignedgpu_reset_counter;
> >>   unsignedlast_dirty_tex_counter;
> >>   unsignedlast_compressed_colortex_counter;
> >>   unsignedlast_num_draw_calls;
> >>   unsignedflags; /* flush flags */
> >>   /* Current unaccounted memory usage. */
> >>
> >
> >
>
> 

Re: [Mesa-dev] [PATCH v2] anv: fix number of planes for depth & stencil

2018-04-16 Thread Lionel Landwerlin

Thanks a lot Juan, it looks all good to me :)

On 16/04/18 04:09, Juan A. Suarez Romero wrote:

On Thu, 2018-04-12 at 14:54 -0700, Lionel Landwerlin wrote:

We're not counting correctly with depth & stencil images.

Additionally we need to move an assert that is meant just for color
attachments.

v2: Move an assert() (Reported by Craig)
 Change aspect mask checks (Francesco)

Signed-off-by: Lionel Landwerlin 
Fixes: a62a97933578a ("anv: enable multiple planes per image/imageView")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105994


Hi, Lionel!

As this patch is fixing a commit that is part of 17.3 stable series, and due the
next stable release, 17.3.9, is the last one for the series, I've cherry-picked
this patch to include in the release.

I solved some trivial conflicts, in case you want to verify them.

https://github.com/Igalia/release-mesa/commit/48d627d14a09fc8d95e4df48761166c634
a7b701


J.A.


---
  src/intel/vulkan/anv_private.h | 4 
  src/intel/vulkan/genX_cmd_buffer.c | 2 +-
  2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 53115ae470f..52d4ba58dc9 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -2356,6 +2356,10 @@ anv_image_aspect_get_planes(VkImageAspectFlags 
aspect_mask)
 if (aspect_mask & VK_IMAGE_ASPECT_PLANE_2_BIT)
planes++;
  
+   if ((aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0 &&

+   (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0)
+  planes++;
+
 return planes;
  }
  
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c

index 3c703f6be44..cbe623802e9 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1248,13 +1248,13 @@ genX(cmd_buffer_setup_attachments)(struct 
anv_cmd_buffer *cmd_buffer,
  
   struct anv_image_view *iview = framebuffer->attachments[i];

   anv_assert(iview->vk_format == att->format);
- anv_assert(iview->n_planes == 1);
  
   const uint32_t num_layers = iview->planes[0].isl.array_len;

   state->attachments[i].pending_clear_views = (1 << num_layers) - 1;
  
   union isl_color_value clear_color = { .u32 = { 0, } };

   if (att_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
+anv_assert(iview->n_planes == 1);
  assert(att_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
  color_attachment_compute_aux_usage(cmd_buffer->device,
 state, i, begin->renderArea,

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] anv, radv: Drop XML workarounds for VK_ANDROID_native_buffer

2018-04-16 Thread Samuel Pitoiset

Reviewed-by: Samuel Pitoiset 

On 04/16/2018 04:38 PM, Jason Ekstrand wrote:

---
  src/amd/vulkan/radv_extensions.py  | 7 +--
  src/intel/vulkan/anv_extensions_gen.py | 7 +--
  2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index db37d61..099cae7 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -161,12 +161,7 @@ def _init_exts_from_xml(xml):
  continue
  
  ext = ext_name_map[ext_name]

-if ext_name == 'VK_ANDROID_native_buffer':
-# VK_ANDROID_native_buffer is missing the type specifier.  Just
-# hard-code it to be a device extension for now.
-ext.type = 'device'
-else:
-ext.type = ext_elem.attrib['type']
+ext.type = ext_elem.attrib['type']
  
  _TEMPLATE_H = Template(COPYRIGHT + """

  #ifndef RADV_EXTENSIONS_H
diff --git a/src/intel/vulkan/anv_extensions_gen.py 
b/src/intel/vulkan/anv_extensions_gen.py
index 57a5285..5ea8220 100644
--- a/src/intel/vulkan/anv_extensions_gen.py
+++ b/src/intel/vulkan/anv_extensions_gen.py
@@ -46,12 +46,7 @@ def _init_exts_from_xml(xml):
  continue
  
  ext = ext_name_map[ext_name]

-if ext_name == 'VK_ANDROID_native_buffer':
-# VK_ANDROID_native_buffer is missing the type specifier.  Just
-# hard-code it to be a device extension for now.
-ext.type = 'device'
-else:
-ext.type = ext_elem.attrib['type']
+ext.type = ext_elem.attrib['type']
  
  _TEMPLATE_H = Template(COPYRIGHT + """
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] vulkan: Update the XML and headers to 1.1.73

2018-04-16 Thread Samuel Pitoiset

Acked-by: Samuel Pitoiset 

On 04/16/2018 04:38 PM, Jason Ekstrand wrote:

---
  include/vulkan/vulkan_core.h |  2 +-
  src/vulkan/registry/vk.xml   | 10 +-
  2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/vulkan/vulkan_core.h b/include/vulkan/vulkan_core.h
index ed0d596..2cafcdd 100644
--- a/include/vulkan/vulkan_core.h
+++ b/include/vulkan/vulkan_core.h
@@ -43,7 +43,7 @@ extern "C" {
  #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
  #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
  // Version of this file
-#define VK_HEADER_VERSION 72
+#define VK_HEADER_VERSION 73
  
  
  #define VK_NULL_HANDLE 0

diff --git a/src/vulkan/registry/vk.xml b/src/vulkan/registry/vk.xml
index a97ae92..3b19b3a 100644
--- a/src/vulkan/registry/vk.xml
+++ b/src/vulkan/registry/vk.xml
@@ -137,7 +137,7 @@ server.
  // Vulkan 1.1 version number
  #define VK_API_VERSION_1_1 VK_MAKE_VERSION(1, 1, 0)// 
Patch version should always be set to 0
  // Version of this file
-#define VK_HEADER_VERSION 72
+#define VK_HEADER_VERSION 73
  
  

  #define VK_DEFINE_HANDLE(object) typedef struct object##_T* 
object;
@@ -1410,7 +1410,7 @@ server.
  uint32_t   
maxGeometryOutputVerticesmax number of vertices that can be emitted in geometry 
stage
  uint32_t   
maxGeometryTotalOutputComponentsmax total number of components (all vertices) written 
in geometry stage
  fragment stage limits
-uint32_t   
maxFragmentInputComponentsmax number of input compontents read in fragment 
stage
+uint32_t   
maxFragmentInputComponentsmax number of input components read in fragment 
stage
  uint32_t   
maxFragmentOutputAttachmentsmax number of output attachments written in fragment 
stage
  uint32_t   
maxFragmentDualSrcAttachmentsmax number of output attachments written when using dual 
source blending
  uint32_t   
maxFragmentCombinedOutputResourcesmax total number of storage buffers, storage images 
and output buffers
@@ -2935,7 +2935,7 @@ server.
  VkConservativeRasterizationModeEXT   
conservativeRasterizationMode  
  float
extraPrimitiveOverestimationSize   
  
-
+
  VkStructureType
 sType
  void*
pNext
  VkBool32   
shaderInputAttachmentArrayDynamicIndexing
@@ -2959,7 +2959,7 @@ server.
  VkBool32   
descriptorBindingVariableDescriptorCount
  VkBool32   
runtimeDescriptorArray
  
-
+
  VkStructureType
 sType
  void*
pNext
  uint32_t   
maxUpdateAfterBindDescriptorsInAllPools
@@ -6719,7 +6719,7 @@ server.
  
  
  
-
+
  
  
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] anv, radv: Drop XML workarounds for VK_ANDROID_native_buffer

2018-04-16 Thread Jason Ekstrand
---
 src/amd/vulkan/radv_extensions.py  | 7 +--
 src/intel/vulkan/anv_extensions_gen.py | 7 +--
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index db37d61..099cae7 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -161,12 +161,7 @@ def _init_exts_from_xml(xml):
 continue
 
 ext = ext_name_map[ext_name]
-if ext_name == 'VK_ANDROID_native_buffer':
-# VK_ANDROID_native_buffer is missing the type specifier.  Just
-# hard-code it to be a device extension for now.
-ext.type = 'device'
-else:
-ext.type = ext_elem.attrib['type']
+ext.type = ext_elem.attrib['type']
 
 _TEMPLATE_H = Template(COPYRIGHT + """
 #ifndef RADV_EXTENSIONS_H
diff --git a/src/intel/vulkan/anv_extensions_gen.py 
b/src/intel/vulkan/anv_extensions_gen.py
index 57a5285..5ea8220 100644
--- a/src/intel/vulkan/anv_extensions_gen.py
+++ b/src/intel/vulkan/anv_extensions_gen.py
@@ -46,12 +46,7 @@ def _init_exts_from_xml(xml):
 continue
 
 ext = ext_name_map[ext_name]
-if ext_name == 'VK_ANDROID_native_buffer':
-# VK_ANDROID_native_buffer is missing the type specifier.  Just
-# hard-code it to be a device extension for now.
-ext.type = 'device'
-else:
-ext.type = ext_elem.attrib['type']
+ext.type = ext_elem.attrib['type']
 
 _TEMPLATE_H = Template(COPYRIGHT + """
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] vulkan: Update the XML and headers to 1.1.73

2018-04-16 Thread Jason Ekstrand
---
 include/vulkan/vulkan_core.h |  2 +-
 src/vulkan/registry/vk.xml   | 10 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/vulkan/vulkan_core.h b/include/vulkan/vulkan_core.h
index ed0d596..2cafcdd 100644
--- a/include/vulkan/vulkan_core.h
+++ b/include/vulkan/vulkan_core.h
@@ -43,7 +43,7 @@ extern "C" {
 #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
 #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
 // Version of this file
-#define VK_HEADER_VERSION 72
+#define VK_HEADER_VERSION 73
 
 
 #define VK_NULL_HANDLE 0
diff --git a/src/vulkan/registry/vk.xml b/src/vulkan/registry/vk.xml
index a97ae92..3b19b3a 100644
--- a/src/vulkan/registry/vk.xml
+++ b/src/vulkan/registry/vk.xml
@@ -137,7 +137,7 @@ server.
 // Vulkan 1.1 version number
 #define VK_API_VERSION_1_1 VK_MAKE_VERSION(1, 1, 
0)// Patch version should always be set to 0
 // Version of this file
-#define VK_HEADER_VERSION 72
+#define VK_HEADER_VERSION 73
 
 
 #define VK_DEFINE_HANDLE(object) typedef struct object##_T* 
object;
@@ -1410,7 +1410,7 @@ server.
 uint32_t   
maxGeometryOutputVerticesmax number of vertices that can 
be emitted in geometry stage
 uint32_t   
maxGeometryTotalOutputComponentsmax total number of 
components (all vertices) written in geometry stage
 fragment stage limits
-uint32_t   
maxFragmentInputComponentsmax number of input compontents 
read in fragment stage
+uint32_t   
maxFragmentInputComponentsmax number of input components 
read in fragment stage
 uint32_t   
maxFragmentOutputAttachmentsmax number of output 
attachments written in fragment stage
 uint32_t   
maxFragmentDualSrcAttachmentsmax number of output 
attachments written when using dual source blending
 uint32_t   
maxFragmentCombinedOutputResourcesmax total number of 
storage buffers, storage images and output buffers
@@ -2935,7 +2935,7 @@ server.
 VkConservativeRasterizationModeEXT
   
conservativeRasterizationMode  
 float 
   
extraPrimitiveOverestimationSize   
 
-
+
 VkStructureType
 sType
 void*   
 pNext
 VkBool32   
shaderInputAttachmentArrayDynamicIndexing
@@ -2959,7 +2959,7 @@ server.
 VkBool32   
descriptorBindingVariableDescriptorCount
 VkBool32   
runtimeDescriptorArray
 
-
+
 VkStructureType
 sType
 void*   
 pNext
 uint32_t   
maxUpdateAfterBindDescriptorsInAllPools
@@ -6719,7 +6719,7 @@ server.
 
 
 
-
+
 
 
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: fix ir_binop_gequal glsl_to_nir conversion

2018-04-16 Thread Erico Nunes
On Sun, Apr 15, 2018 at 2:30 AM, Jason Ekstrand  wrote:
> On April 14, 2018 12:43:35 Connor Abbott  wrote:
> I think that it's probably impractical to use this path, and we should
> probably delete it. There are just too many optimizations, e.g. in
> nir_opt_algebraic and lowering passes that assume you have ints. I
> think a better plan would be to silently convert ints to floats in the
> lima driver, and maybe inhibit any optimizations that use bit
> twiddling tricks if real int support isn't indicated.
>
> I'm not sure.  For quite a while prog_to_nir used these comparison
> operations so we know they more it less work.  For all I know, maybe it
> still does (I didn't actually check).  The only thing we need to worry about
> in terms of correctness is any optimizations in nir_opt_algebraic which
> consume only floats but produce integers.  Also, all drivers need to handle
> imov simply because it's easy.
>
> That being said, we've done a lot of work to optimize the integer supporting
> paths so you may actually get better code if you can figure out a good way
> to lower the integers away.

I'm not really using ints in my sample program, just floats. But still
I'm getting nir_op_slt and nir_op_sge for the float comparison
operations.
Should I be getting nir_op_flt and nir_op_fge instead even with
.native_integers disabled in glsl_to_nir?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 30/35] nvir/nir: implement nir_intrinsic_load_ubo

2018-04-16 Thread Karol Herbst
v4: use loadFrom helper

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 618b9ccfcc4..d65754ec4f6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1938,6 +1938,20 @@ Converter::visit(nir_intrinsic_instr *insn)
   mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
   break;
}
+   case nir_intrinsic_load_ubo: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectIndex;
+  Value *indirectOffset;
+  uint32_t index = getIndirect(>src[0], 0, indirectIndex) + 1;
+  uint32_t offset = getIndirect(>src[1], 0, indirectOffset);
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
+  indirectOffset, indirectIndex);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 33/35] nvir/nir: add memory barriers

2018-04-16 Thread Karol Herbst
v5: add more barrier intrinsics

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp  | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 15d77256a06..e5d85940ca7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -506,6 +506,14 @@ Converter::getSubOp(nir_intrinsic_op op)
CASE_OP_INTR_ATOM(and, AND);
CASE_OP_INTR_ATOM(comp_swap, CAS);
CASE_OP_INTR_ATOM(exchange, EXCH);
+   case nir_intrinsic_group_memory_barrier:
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_atomic_counter:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+  return NV50_IR_SUBOP_MEMBAR(M, GL);
+   case nir_intrinsic_memory_barrier_shared:
+  return NV50_IR_SUBOP_MEMBAR(M, CTA);
CASE_OP_INTR_ATOM(or, OR);
case nir_intrinsic_image_var_atomic_max:
CASE_OP_INTR_ATOM_S(imax, MAX);
@@ -2312,6 +2320,17 @@ Converter::visit(nir_intrinsic_instr *insn)
   bar->subOp = NV50_IR_SUBOP_BAR_SYNC;
   break;
}
+   case nir_intrinsic_group_memory_barrier:
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_atomic_counter:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_memory_barrier_shared: {
+  Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
+  bar->fixed = 1;
+  bar->subOp = getSubOp(op);
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 17/35] nvir/nir: add skeleton for nir_intrinsic_instr

2018-04-16 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp| 17 +
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index f4f844021a2..8a474eb1a8c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -105,6 +105,7 @@ private:
bool visit(nir_function *);
bool visit(nir_if *);
bool visit(nir_instr *);
+   bool visit(nir_intrinsic_instr *);
bool visit(nir_jump_instr *);
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
@@ -1277,6 +1278,8 @@ bool
 Converter::visit(nir_instr *insn)
 {
switch (insn->type) {
+   case nir_instr_type_intrinsic:
+  return visit(nir_instr_as_intrinsic(insn));
case nir_instr_type_jump:
   return visit(nir_instr_as_jump(insn));
case nir_instr_type_load_const:
@@ -1288,6 +1291,20 @@ Converter::visit(nir_instr *insn)
return true;
 }
 
+bool
+Converter::visit(nir_intrinsic_instr *insn)
+{
+   nir_intrinsic_op op = insn->intrinsic;
+
+   switch (op) {
+   default:
+  ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
+  return false;
+   }
+
+   return true;
+}
+
 bool
 Converter::visit(nir_jump_instr *insn)
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 28/35] nvir/nir: implement variable indexing

2018-04-16 Thread Karol Herbst
we store those arrays in local memory and reserve some space for each of the
arrays. The arrays are stored in a packed format, because we know quite easily
the context of each index. We don't do that in TGSI so far.

This causes various issues to come up in the MemoryOpt pass, because ld/st with
indirects aren't guarenteed to be aligned to 0x10 anymore.

v3: use fixed size vec4 arrays until we fix MemoryOpt
v4: fix for 64 bit types
v5: use loadFrom helper

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 58 ++
 1 file changed, 58 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 7dce61a1069..604a30db79f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -80,6 +80,7 @@ private:
typedef decltype(nir_ssa_def().index) NirSSADefIdx;
typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize;
typedef std::unordered_map NirDefMap;
+   typedef std::unordered_map NirArrayLMemOffsets;
typedef std::unordered_map 
NirBlockMap;
 
TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
@@ -157,6 +158,7 @@ private:
 
NirDefMap ssaDefs;
NirDefMap regDefs;
+   NirArrayLMemOffsets regToLmemOffset;
NirBlockMap blocks;
unsigned int curLoopDepth;
 
@@ -1269,6 +1271,7 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile 
file, operation op,
 bool
 Converter::parseNIR()
 {
+   info->bin.tlsSpace = 0;
info->io.clipDistances = nir->info.clip_distance_array_size;
info->io.cullDistances = nir->info.cull_distance_array_size;
 
@@ -1358,6 +1361,16 @@ Converter::visit(nir_function *function)
   break;
}
 
+   nir_foreach_register(reg, >impl->registers) {
+  if (reg->num_array_elems) {
+ /* TODO: packed variables would be nice, but MemoryOpt fails */
+ /* replace 4 with reg->num_components */
+ uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
+ regToLmemOffset[reg->index] = info->bin.tlsSpace;
+ info->bin.tlsSpace += size;
+  }
+   }
+
nir_index_ssa_defs(function->impl);
foreach_list_typed(nir_cf_node, node, node, >impl->body) {
   if (!visit(node))
@@ -2082,6 +2095,51 @@ Converter::visit(nir_alu_instr *insn)
 *   2. they basically just merge multiple values into one data type
 */
CASE_OPFI(mov):
+  if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) 
{
+ nir_reg_dest& reg = insn->dest.dest.reg;
+ auto goffset = regToLmemOffset[reg.reg->index];
+ auto comps = reg.reg->num_components;
+ auto size = reg.reg->bit_size / 8;
+ auto csize = 4 * size; /* TODO after fixing MemoryOpts: comps * size; 
*/
+ auto aoffset = csize * reg.base_offset;
+ Value *indirect = nullptr;
+
+ if (reg.indirect)
+indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
+  getSrc(reg.indirect, 0), mkImm(csize));
+
+ for (auto i = 0u; i < comps; ++i) {
+if (!((1u << i) & insn->dest.write_mask))
+   continue;
+
+Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + 
aoffset + i * size);
+mkStore(OP_STORE, dType, sym, indirect, getSrc(>src[0], i));
+ }
+ break;
+  } else if (!insn->src[0].src.is_ssa && 
insn->src[0].src.reg.reg->num_array_elems) {
+ LValues  = convert(>dest);
+ nir_reg_src& reg = insn->src[0].src.reg;
+ auto goffset = regToLmemOffset[reg.reg->index];
+ /* auto comps = reg.reg->num_components; */
+ auto size = reg.reg->bit_size / 8;
+ auto csize = 4 * size; /* TODO after fixing MemoryOpts: comps * size; 
*/
+ auto aoffset = csize * reg.base_offset;
+ Value *indirect = nullptr;
+
+ if (reg.indirect)
+indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), 
getSrc(reg.indirect, 0), mkImm(csize));
+
+ for (auto i = 0u; i < newDefs.size(); ++i)
+loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + 
aoffset, i, indirect);
+
+ break;
+  } else {
+ LValues  = convert(>dest);
+ for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
+mkMov(newDefs[c], getSrc(>src[0], c), dType);
+ }
+  }
+  break;
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4: {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 29/35] nvir/nir: implement geometry shader nir_intrinsics

2018-04-16 Thread Karol Herbst
v4: use smarter getIndirect helper
use new getSlotAddress helper
use loadFrom helper

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 27 ++
 1 file changed, 27 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 604a30db79f..618b9ccfcc4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -413,6 +413,10 @@ operation
 Converter::getOperation(nir_intrinsic_op op)
 {
switch (op) {
+   case nir_intrinsic_emit_vertex:
+  return OP_EMIT;
+   case nir_intrinsic_end_primitive:
+  return OP_RESTART;
default:
   ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
   assert(false);
@@ -1911,6 +1915,29 @@ Converter::visit(nir_intrinsic_instr *insn)
  ->subOp = NV50_IR_SUBOP_SHFL_IDX;
   break;
}
+   case nir_intrinsic_load_per_vertex_input: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectVertex;
+  Value *indirectOffset;
+  auto baseVertex = getIndirect(>src[0], 0, indirectVertex);
+  auto idx = getIndirect(insn, 1, 0, indirectOffset);
+
+  Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
+  mkImm(baseVertex), indirectVertex);
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
+  indirectOffset, vtxBase, info->in[idx].patch);
+  }
+  break;
+   }
+   case nir_intrinsic_emit_vertex:
+   case nir_intrinsic_end_primitive: {
+  auto idx = nir_intrinsic_stream_id(insn);
+  mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 24/35] nvir/nir: implement nir_ssa_undef_instr

2018-04-16 Thread Karol Herbst
v2: use mkOp

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 4606ffb792e..badb2398abb 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -143,6 +143,7 @@ private:
bool visit(nir_jump_instr *);
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
+   bool visit(nir_ssa_undef_instr *);
 
nir_shader *nir;
 
@@ -1463,6 +1464,8 @@ Converter::visit(nir_instr *insn)
   return visit(nir_instr_as_jump(insn));
case nir_instr_type_load_const:
   return visit(nir_instr_as_load_const(insn));
+   case nir_instr_type_ssa_undef:
+  return visit(nir_instr_as_ssa_undef(insn));
default:
   ERROR("unknown nir_instr type %u\n", insn->type);
   return false;
@@ -2160,6 +2163,16 @@ Converter::visit(nir_alu_instr *insn)
 }
 #undef DEFAULT_CHECKS
 
+bool
+Converter::visit(nir_ssa_undef_instr *insn)
+{
+   LValues  = convert(>def);
+   for (auto i = 0u; i < insn->def.num_components; ++i) {
+  mkOp(OP_NOP, TYPE_NONE, newDefs[i]);
+   }
+   return true;
+}
+
 bool
 Converter::run()
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 27/35] nvir/nir: implement vote and ballot

2018-04-16 Thread Karol Herbst
v2: add vote_eq support
use the new subop intrinsic helper
add ballot
v3: add read_(first_)invocation

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 42 ++
 1 file changed, 42 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 8088309272b..7dce61a1069 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -444,6 +444,12 @@ int
 Converter::getSubOp(nir_intrinsic_op op)
 {
switch (op) {
+   case nir_intrinsic_vote_all:
+  return NV50_IR_SUBOP_VOTE_ALL;
+   case nir_intrinsic_vote_any:
+  return NV50_IR_SUBOP_VOTE_ANY;
+   case nir_intrinsic_vote_ieq:
+  return NV50_IR_SUBOP_VOTE_UNI;
default:
   return 0;
}
@@ -1856,6 +1862,42 @@ Converter::visit(nir_intrinsic_instr *insn)
   loadImm(newDefs[0], 32u);
   break;
}
+   case nir_intrinsic_vote_all:
+   case nir_intrinsic_vote_any:
+   case nir_intrinsic_vote_ieq: {
+  LValues  = convert(>dest);
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(>src[0], 0), 
zero);
+  mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);
+  mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);
+  break;
+   }
+   case nir_intrinsic_ballot: {
+  LValues  = convert(>dest);
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(>src[0], 0), 
zero);
+  Instruction *ballot = mkOp1(OP_VOTE, TYPE_U32, getSSA(), pred);
+  ballot->subOp = NV50_IR_SUBOP_VOTE_ANY;
+  mkOp2(OP_MERGE, TYPE_U64, newDefs[0], ballot->getDef(0), 
loadImm(getSSA(), 0));
+  break;
+   }
+   case nir_intrinsic_read_first_invocation:
+   case nir_intrinsic_read_invocation: {
+  LValues  = convert(>dest);
+  const DataType dType = getDType(insn);
+  Value *tmp = getScratch();
+
+  if (op == nir_intrinsic_read_first_invocation) {
+ mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = 
NV50_IR_SUBOP_VOTE_ANY;
+ mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = 
NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+  } else
+ tmp = getSrc(>src[1], 0);
+
+  mkOp3(OP_SHFL, dType, newDefs[0], getSrc(>src[0], 0), tmp, 
mkImm(0x1f))
+ ->subOp = NV50_IR_SUBOP_SHFL_IDX;
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 20/35] nvir/nir: implement nir_intrinsic_store_(per_vertex_)output

2018-04-16 Thread Karol Herbst
v3: add workaround for RA issues
indirects have to be multiplied by 0x10
fix indirect access
v4: use smarter getIndirect helper
use storeTo helper
v5: don't use const_offset directly

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 46 ++
 1 file changed, 46 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 8a83a885889..b34fe7739d8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1270,6 +1270,11 @@ Converter::visit(nir_function *function)
 
setPosition(entry, true);
 
+   if (info->io.genUserClip > 0) {
+  for (int c = 0; c < 4; ++c)
+ clipVtx[c] = getScratch();
+   }
+
switch (prog->getType()) {
case Program::TYPE_TESSELLATION_CONTROL:
   outBase = mkOp2v(
@@ -1296,6 +1301,9 @@ Converter::visit(nir_function *function)
bb->cfg.attach(>cfg, Graph::Edge::TREE);
setPosition(exit, true);
 
+   if (info->io.genUserClip > 0)
+  handleUserClipPlanes();
+
/* TODO: for non main function this needs to be a OP_RETURN */
mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
return true;
@@ -1477,6 +1485,44 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_store_output:
+   case nir_intrinsic_store_per_vertex_output: {
+  Value *indirect;
+  DataType dType = getSType(insn->src[0], false, false);
+  auto idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 
0, indirect);
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+continue;
+
+ uint8_t offset = 0;
+ Value *src = getSrc(>src[0], i);
+ switch (prog->getType()) {
+ case Program::TYPE_FRAGMENT: {
+if (info->out[idx].sn == TGSI_SEMANTIC_POSITION) {
+   /* TGSI uses a different interface than NIR, TGSI stores that
+* value in the z component, NIR in X
+*/
+   offset += 2;
+   src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);
+}
+break;
+ }
+ case Program::TYPE_VERTEX: {
+if (info->io.genUserClip > 0) {
+   mkMov(clipVtx[i], src);
+   src = clipVtx[i];
+}
+break;
+ }
+ default:
+break;
+ }
+
+ storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + 
offset, indirect);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 26/35] nvir/nir: add getOperation for intrinsics

2018-04-16 Thread Karol Herbst
v7: don't assert in default case

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 553f74f7a75..8088309272b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -124,10 +124,12 @@ private:
std::vector getSTypes(nir_alu_instr*);
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
+   operation getOperation(nir_intrinsic_op);
operation getOperation(nir_op);
operation getOperation(nir_texop);
operation preOperationNeeded(nir_op);
 
+   int getSubOp(nir_intrinsic_op);
int getSubOp(nir_op);
 
CondCode getCondCode(nir_op);
@@ -405,6 +407,17 @@ Converter::getOperation(nir_texop op)
}
 }
 
+operation
+Converter::getOperation(nir_intrinsic_op op)
+{
+   switch (op) {
+   default:
+  ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
+  assert(false);
+  return OP_NOP;
+   }
+}
+
 operation
 Converter::preOperationNeeded(nir_op op)
 {
@@ -427,6 +440,15 @@ Converter::getSubOp(nir_op op)
}
 }
 
+int
+Converter::getSubOp(nir_intrinsic_op op)
+{
+   switch (op) {
+   default:
+  return 0;
+   }
+}
+
 CondCode
 Converter::getCondCode(nir_op op)
 {
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 18/35] nvir/nir: implement nir_alu_instr handling

2018-04-16 Thread Karol Herbst
v2: user bitfield_insert instead of bfi
rework switch helper macros
remove some lowering code (LoweringHelper is now used for this)
v3: add pack_half_2x16_split
add unpack_half_2x16_split_x/y
v5: replace first argument with nullptr in loadImm calls
prefer getSSA over getScratch

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 489 -
 1 file changed, 488 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 8a474eb1a8c..8368bbcc015 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -34,6 +34,31 @@
 #include 
 #include 
 
+#define CASE_OPFI(ni) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni
+#define CASE_OPFIU(ni) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni
+#define CASE_OPIU(ni) \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni
+
+#define CASE_OPFI_RET(ni, val) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+  return val
+#define CASE_OPFIU_RET(ni, val) \
+   case nir_op_f ## ni : \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni : \
+  return val
+#define CASE_OPIU_RET(ni, val) \
+   case nir_op_i ## ni : \
+   case nir_op_u ## ni : \
+  return val
+
 static int
 type_size(const struct glsl_type *type)
 {
@@ -97,9 +122,17 @@ private:
std::vector getSTypes(nir_alu_instr*);
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
+   operation getOperation(nir_op);
+   operation preOperationNeeded(nir_op);
+
+   int getSubOp(nir_op);
+
+   CondCode getCondCode(nir_op);
+
bool assignSlots();
bool parseNIR();
 
+   bool visit(nir_alu_instr *);
bool visit(nir_block *);
bool visit(nir_cf_node *);
bool visit(nir_function *);
@@ -118,6 +151,7 @@ private:
unsigned int curLoopDepth;
 
BasicBlock *exit;
+   Value *zero;
 
union {
   struct {
@@ -129,7 +163,10 @@ private:
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
  nir(nir),
- curLoopDepth(0) {}
+ curLoopDepth(0)
+{
+   zero = mkImm((uint32_t)0);
+}
 
 BasicBlock *
 Converter::convert(nir_block *block)
@@ -246,6 +283,137 @@ Converter::getSType(nir_src , bool isFloat, bool 
isSigned)
return typeOfSize(bitSize / 8, isFloat, isSigned);
 }
 
+operation
+Converter::getOperation(nir_op op)
+{
+   switch (op) {
+   // basic ops with float and int variants
+   CASE_OPFI_RET(abs, OP_ABS);
+   CASE_OPFI_RET(add, OP_ADD);
+   CASE_OPFI_RET(and, OP_AND);
+   CASE_OPFIU_RET(div, OP_DIV);
+   CASE_OPIU_RET(find_msb, OP_BFIND);
+   CASE_OPFIU_RET(max, OP_MAX);
+   CASE_OPFIU_RET(min, OP_MIN);
+   CASE_OPFIU_RET(mod, OP_MOD);
+   CASE_OPFI_RET(rem, OP_MOD);
+   CASE_OPFI_RET(mul, OP_MUL);
+   CASE_OPIU_RET(mul_high, OP_MUL);
+   CASE_OPFI_RET(neg, OP_NEG);
+   CASE_OPFI_RET(not, OP_NOT);
+   CASE_OPFI_RET(or, OP_OR);
+   CASE_OPFI_RET(eq, OP_SET);
+   CASE_OPFIU_RET(ge, OP_SET);
+   CASE_OPFIU_RET(lt, OP_SET);
+   CASE_OPFI_RET(ne, OP_SET);
+   CASE_OPIU_RET(shr, OP_SHR);
+   CASE_OPFI_RET(sub, OP_SUB);
+   CASE_OPFI_RET(xor, OP_XOR);
+   case nir_op_fceil:
+  return OP_CEIL;
+   case nir_op_fcos:
+  return OP_COS;
+   case nir_op_f2f32:
+   case nir_op_f2f64:
+   case nir_op_f2i32:
+   case nir_op_f2i64:
+   case nir_op_f2u32:
+   case nir_op_f2u64:
+   case nir_op_i2f32:
+   case nir_op_i2f64:
+   case nir_op_i2i32:
+   case nir_op_i2i64:
+   case nir_op_u2f32:
+   case nir_op_u2f64:
+   case nir_op_u2u32:
+   case nir_op_u2u64:
+  return OP_CVT;
+   case nir_op_fddx:
+   case nir_op_fddx_coarse:
+   case nir_op_fddx_fine:
+  return OP_DFDX;
+   case nir_op_fddy:
+   case nir_op_fddy_coarse:
+   case nir_op_fddy_fine:
+  return OP_DFDY;
+   case nir_op_fexp2:
+  return OP_EX2;
+   case nir_op_ffloor:
+  return OP_FLOOR;
+   case nir_op_ffma:
+  return OP_FMA;
+   case nir_op_flog2:
+  return OP_LG2;
+   case nir_op_pack_64_2x32_split:
+  return OP_MERGE;
+   case nir_op_frcp:
+  return OP_RCP;
+   case nir_op_frsq:
+  return OP_RSQ;
+   case nir_op_fsat:
+  return OP_SAT;
+   case nir_op_ishl:
+  return OP_SHL;
+   case nir_op_fsin:
+  return OP_SIN;
+   case nir_op_fsqrt:
+  return OP_SQRT;
+   case nir_op_ftrunc:
+  return OP_TRUNC;
+   default:
+  ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);
+  assert(false);
+  return OP_NOP;
+   }
+}
+
+operation
+Converter::preOperationNeeded(nir_op op)
+{
+   switch (op) {
+   case nir_op_fcos:
+   case nir_op_fsin:
+  return OP_PRESIN;
+   default:
+  return OP_NOP;
+   }
+}
+
+int
+Converter::getSubOp(nir_op op)
+{
+   switch (op) {
+   CASE_OPIU_RET(mul_high, NV50_IR_SUBOP_MUL_HIGH);
+   default:
+  return 0;
+   }
+}
+
+CondCode

[Mesa-dev] [PATCH v7 35/35] nvir/nir: implement intrinsic shader_clock

2018-04-16 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index dd73acb2366..d9ea665b9e1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -2354,6 +2354,14 @@ Converter::visit(nir_intrinsic_instr *insn)
   bar->subOp = getSubOp(op);
   break;
}
+   case nir_intrinsic_shader_clock: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+
+  loadImm(newDefs[0], 0u);
+  mkOp1v(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0));
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 32/35] nvir/nir: implement images

2018-04-16 Thread Karol Herbst
v3: fix compiler warnings
v4: use loadFrom helper
v5: fix signed min/max
v6: set tex mask
add support for indirect image access
set cache mode
v7: make compatible with 884d27bcf688d36c3bbe01bceca525595add3b33
rework the whole deref thing to prepare for bindless

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 402 -
 1 file changed, 382 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 24c70d0c5ae..15d77256a06 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -91,6 +91,8 @@ private:
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
 
+   ImgFormat convertGLImgFormat(GLuint);
+
Value* getSrc(nir_alu_src *, uint8_t component = 0);
Value* getSrc(nir_register *, uint8_t);
Value* getSrc(nir_src *, uint8_t, bool indirect = false);
@@ -120,6 +122,7 @@ private:
 
DataType getDType(nir_alu_instr*);
DataType getDType(nir_intrinsic_instr*);
+   DataType getDType(nir_intrinsic_instr*, bool isSigned);
DataType getDType(nir_op, NirSSADefBitSize);
 
std::vector getSTypes(nir_alu_instr*);
@@ -153,6 +156,11 @@ private:
 
/* tex stuff */
Value* applyProjection(Value *src, Value *proj);
+   unsigned int getNIRArgCount(TexInstruction::Target&);
+
+   /* image stuff */
+   uint16_t derefImageVar(nir_deref_var *, Value *, const glsl_type 
*&);
+   CacheMode getCacheModeFromVar(nir_variable *);
 
nir_shader *nir;
 
@@ -245,11 +253,30 @@ Converter::getDType(nir_alu_instr *insn)
 
 DataType
 Converter::getDType(nir_intrinsic_instr *insn)
+{
+   bool isSigned;
+   switch (insn->intrinsic) {
+   case nir_intrinsic_shared_atomic_imax:
+   case nir_intrinsic_shared_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_imin:
+  isSigned = true;
+  break;
+   default:
+  isSigned = false;
+  break;
+   }
+
+   return getDType(insn, isSigned);
+}
+
+DataType
+Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)
 {
if (insn->dest.is_ssa)
-  return typeOfSize(insn->dest.ssa.bit_size / 8, false, false);
+  return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);
else
-  return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false);
+  return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);
 }
 
 DataType
@@ -417,6 +444,22 @@ Converter::getOperation(nir_intrinsic_op op)
   return OP_EMIT;
case nir_intrinsic_end_primitive:
   return OP_RESTART;
+   case nir_intrinsic_image_var_atomic_add:
+   case nir_intrinsic_image_var_atomic_and:
+   case nir_intrinsic_image_var_atomic_comp_swap:
+   case nir_intrinsic_image_var_atomic_exchange:
+   case nir_intrinsic_image_var_atomic_max:
+   case nir_intrinsic_image_var_atomic_min:
+   case nir_intrinsic_image_var_atomic_or:
+   case nir_intrinsic_image_var_atomic_xor:
+  return OP_SUREDP;
+   case nir_intrinsic_image_var_load:
+  return OP_SULDP;
+   case nir_intrinsic_image_var_samples:
+   case nir_intrinsic_image_var_size:
+  return OP_SUQ;
+   case nir_intrinsic_image_var_store:
+  return OP_SUSTP;
default:
   ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);
   assert(false);
@@ -446,28 +489,31 @@ Converter::getSubOp(nir_op op)
}
 }
 
+#define CASE_OP_INTR_ATOM(nir, nvir) \
+   case nir_intrinsic_image_var_atomic_ ## nir : \
+   case nir_intrinsic_shared_atomic_ ## nir : \
+   case nir_intrinsic_ssbo_atomic_ ## nir : \
+  return NV50_IR_SUBOP_ATOM_ ## nvir
+#define CASE_OP_INTR_ATOM_S(nir, nvir) \
+   case nir_intrinsic_shared_atomic_ ## nir : \
+   case nir_intrinsic_ssbo_atomic_ ## nir : \
+  return NV50_IR_SUBOP_ATOM_ ## nvir
 int
 Converter::getSubOp(nir_intrinsic_op op)
 {
switch (op) {
-   case nir_intrinsic_ssbo_atomic_add:
-  return NV50_IR_SUBOP_ATOM_ADD;
-   case nir_intrinsic_ssbo_atomic_and:
-  return NV50_IR_SUBOP_ATOM_AND;
-   case nir_intrinsic_ssbo_atomic_comp_swap:
-  return NV50_IR_SUBOP_ATOM_CAS;
-   case nir_intrinsic_ssbo_atomic_exchange:
-  return NV50_IR_SUBOP_ATOM_EXCH;
-   case nir_intrinsic_ssbo_atomic_or:
-  return NV50_IR_SUBOP_ATOM_OR;
-   case nir_intrinsic_ssbo_atomic_imax:
-   case nir_intrinsic_ssbo_atomic_umax:
-  return NV50_IR_SUBOP_ATOM_MAX;
-   case nir_intrinsic_ssbo_atomic_imin:
-   case nir_intrinsic_ssbo_atomic_umin:
-  return NV50_IR_SUBOP_ATOM_MIN;
-   case nir_intrinsic_ssbo_atomic_xor:
-  return NV50_IR_SUBOP_ATOM_XOR;
+   CASE_OP_INTR_ATOM(add, ADD);
+   CASE_OP_INTR_ATOM(and, AND);
+   CASE_OP_INTR_ATOM(comp_swap, CAS);
+   CASE_OP_INTR_ATOM(exchange, EXCH);
+   CASE_OP_INTR_ATOM(or, OR);
+   case nir_intrinsic_image_var_atomic_max:
+   CASE_OP_INTR_ATOM_S(imax, MAX);
+   

[Mesa-dev] [PATCH v7 34/35] nvir/nir: implement load_per_vertex_output

2018-04-16 Thread Karol Herbst
v4: use smarter getIndirect helper
use new getSlotAddress helper
v5: use loadFrom helper

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 23 ++
 1 file changed, 23 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index e5d85940ca7..dd73acb2366 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -2069,6 +2069,29 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_load_per_vertex_output: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectVertex;
+  Value *indirectOffset;
+  auto baseVertex = getIndirect(>src[0], 0, indirectVertex);
+  auto idx = getIndirect(insn, 1, 0, indirectOffset);
+  Value *vtxBase = nullptr;
+
+  if (indirectVertex)
+ vtxBase = indirectVertex;
+  else
+ vtxBase = loadImm(nullptr, baseVertex);
+
+  vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, 
vtxBase);
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
+  indirectOffset, vtxBase, info->in[idx].patch);
+  }
+  break;
+   }
case nir_intrinsic_emit_vertex:
case nir_intrinsic_end_primitive: {
   auto idx = nir_intrinsic_stream_id(insn);
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 23/35] nvir/nir: implement loading system values

2018-04-16 Thread Karol Herbst
v2: support more sys values
fixed a bug where for multi component reads all values ended up in x
v3: add load_patch_vertices_in
v4: add subgroup stuff
v5: add helper invocation
v6: fix loading 64 bit system values

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 122 +
 1 file changed, 122 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 40bc00d7e14..4606ffb792e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -85,6 +85,7 @@ private:
LValues& convert(nir_alu_dest *);
BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
+   SVSemantic convert(nir_intrinsic_op);
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
 
@@ -1469,6 +1470,70 @@ Converter::visit(nir_instr *insn)
return true;
 }
 
+SVSemantic
+Converter::convert(nir_intrinsic_op intr)
+{
+   switch (intr) {
+   case nir_intrinsic_load_base_vertex:
+  return SV_BASEVERTEX;
+   case nir_intrinsic_load_base_instance:
+  return SV_BASEINSTANCE;
+   case nir_intrinsic_load_draw_id:
+  return SV_DRAWID;
+   case nir_intrinsic_load_front_face:
+  return SV_FACE;
+   case nir_intrinsic_load_helper_invocation:
+  return SV_THREAD_KILL;
+   case nir_intrinsic_load_instance_id:
+  return SV_INSTANCE_ID;
+   case nir_intrinsic_load_invocation_id:
+  return SV_INVOCATION_ID;
+   case nir_intrinsic_load_local_group_size:
+  return SV_NTID;
+   case nir_intrinsic_load_local_invocation_id:
+  return SV_TID;
+   case nir_intrinsic_load_num_work_groups:
+  return SV_NCTAID;
+   case nir_intrinsic_load_patch_vertices_in:
+  return SV_VERTEX_COUNT;
+   case nir_intrinsic_load_primitive_id:
+  return SV_PRIMITIVE_ID;
+   case nir_intrinsic_load_sample_id:
+  return SV_SAMPLE_INDEX;
+   case nir_intrinsic_load_sample_mask_in:
+  return SV_SAMPLE_MASK;
+   case nir_intrinsic_load_sample_pos:
+  return SV_SAMPLE_POS;
+   case nir_intrinsic_load_subgroup_eq_mask:
+  return SV_LANEMASK_EQ;
+   case nir_intrinsic_load_subgroup_ge_mask:
+  return SV_LANEMASK_GE;
+   case nir_intrinsic_load_subgroup_gt_mask:
+  return SV_LANEMASK_GT;
+   case nir_intrinsic_load_subgroup_le_mask:
+  return SV_LANEMASK_LE;
+   case nir_intrinsic_load_subgroup_lt_mask:
+  return SV_LANEMASK_LT;
+   case nir_intrinsic_load_subgroup_invocation:
+  return SV_LANEID;
+   case nir_intrinsic_load_tess_coord:
+  return SV_TESS_COORD;
+   case nir_intrinsic_load_tess_level_inner:
+  return SV_TESS_INNER;
+   case nir_intrinsic_load_tess_level_outer:
+  return SV_TESS_OUTER;
+   case nir_intrinsic_load_vertex_id:
+  return SV_VERTEX_ID;
+   case nir_intrinsic_load_work_group_id:
+  return SV_CTAID;
+   default:
+  ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",
+nir_intrinsic_infos[intr].name);
+  assert(false);
+  return SV_LAST;
+   }
+}
+
 bool
 Converter::visit(nir_intrinsic_instr *insn)
 {
@@ -1671,6 +1736,63 @@ Converter::visit(nir_intrinsic_instr *insn)
   mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
   break;
}
+   case nir_intrinsic_load_base_vertex:
+   case nir_intrinsic_load_base_instance:
+   case nir_intrinsic_load_draw_id:
+   case nir_intrinsic_load_front_face:
+   case nir_intrinsic_load_helper_invocation:
+   case nir_intrinsic_load_instance_id:
+   case nir_intrinsic_load_invocation_id:
+   case nir_intrinsic_load_local_group_size:
+   case nir_intrinsic_load_local_invocation_id:
+   case nir_intrinsic_load_num_work_groups:
+   case nir_intrinsic_load_patch_vertices_in:
+   case nir_intrinsic_load_primitive_id:
+   case nir_intrinsic_load_sample_id:
+   case nir_intrinsic_load_sample_mask_in:
+   case nir_intrinsic_load_sample_pos:
+   case nir_intrinsic_load_subgroup_eq_mask:
+   case nir_intrinsic_load_subgroup_ge_mask:
+   case nir_intrinsic_load_subgroup_gt_mask:
+   case nir_intrinsic_load_subgroup_le_mask:
+   case nir_intrinsic_load_subgroup_lt_mask:
+   case nir_intrinsic_load_subgroup_invocation:
+   case nir_intrinsic_load_tess_coord:
+   case nir_intrinsic_load_tess_level_inner:
+   case nir_intrinsic_load_tess_level_outer:
+   case nir_intrinsic_load_vertex_id:
+   case nir_intrinsic_load_work_group_id: {
+  const DataType dType = getDType(insn);
+  SVSemantic sv = convert(op);
+  LValues  = convert(>dest);
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ Value *def;
+ if (typeSizeof(dType) == 8)
+def = getSSA();
+ else
+def = newDefs[i];
+
+ if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {
+loadImm(def, 0u);
+ } else {
+Symbol *sym = mkSysVal(sv, i);
+Instruction *rdsv = 

[Mesa-dev] [PATCH v7 31/35] nvir/nir: implement ssbo intrinsics

2018-04-16 Thread Karol Herbst
v4: use loadFrom helper
v5: support indirect buffer access

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 90 ++
 1 file changed, 90 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index d65754ec4f6..24c70d0c5ae 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -450,6 +450,24 @@ int
 Converter::getSubOp(nir_intrinsic_op op)
 {
switch (op) {
+   case nir_intrinsic_ssbo_atomic_add:
+  return NV50_IR_SUBOP_ATOM_ADD;
+   case nir_intrinsic_ssbo_atomic_and:
+  return NV50_IR_SUBOP_ATOM_AND;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+  return NV50_IR_SUBOP_ATOM_CAS;
+   case nir_intrinsic_ssbo_atomic_exchange:
+  return NV50_IR_SUBOP_ATOM_EXCH;
+   case nir_intrinsic_ssbo_atomic_or:
+  return NV50_IR_SUBOP_ATOM_OR;
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_umax:
+  return NV50_IR_SUBOP_ATOM_MAX;
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umin:
+  return NV50_IR_SUBOP_ATOM_MIN;
+   case nir_intrinsic_ssbo_atomic_xor:
+  return NV50_IR_SUBOP_ATOM_XOR;
case nir_intrinsic_vote_all:
   return NV50_IR_SUBOP_VOTE_ALL;
case nir_intrinsic_vote_any:
@@ -1952,6 +1970,78 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_get_buffer_size: {
+  LValues  = convert(>dest);
+  const DataType dType = getDType(insn);
+  Value *indirectBuffer;
+  uint32_t buffer = getIndirect(>src[0], 0, indirectBuffer);
+
+  Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);
+  mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, 
indirectBuffer);
+  break;
+   }
+   case nir_intrinsic_store_ssbo: {
+  DataType sType = getSType(insn->src[0], false, false);
+  Value *indirectBuffer;
+  Value *indirectOffset;
+  uint32_t buffer = getIndirect(>src[1], 0, indirectBuffer);
+  uint32_t offset = getIndirect(>src[2], 0, indirectOffset);
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+continue;
+ Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
+offset + i * typeSizeof(sType));
+ mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(>src[0], 
i))
+->setIndirect(0, 1, indirectBuffer);
+  }
+  info->io.globalAccess |= 0x2;
+  break;
+   }
+   case nir_intrinsic_load_ssbo: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectBuffer;
+  Value *indirectOffset;
+  uint32_t buffer = getIndirect(>src[0], 0, indirectBuffer);
+  uint32_t offset = getIndirect(>src[1], 0, indirectOffset);
+
+  for (auto i = 0u; i < insn->num_components; ++i)
+ loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
+  indirectOffset, indirectBuffer);
+
+  info->io.globalAccess |= 0x1;
+  break;
+   }
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_ssbo_atomic_exchange:
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_ssbo_atomic_xor: {
+  const DataType dType = getDType(insn);
+  LValues  = convert(>dest);
+  Value *indirectBuffer;
+  Value *indirectOffset;
+  uint32_t buffer = getIndirect(>src[0], 0, indirectBuffer);
+  uint32_t offset = getIndirect(>src[1], 0, indirectOffset);
+
+  Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);
+  Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,
+getSrc(>src[2], 0));
+  if (op == nir_intrinsic_ssbo_atomic_comp_swap)
+ atom->setSrc(2, getSrc(>src[3], 0));
+  atom->setIndirect(0, 0, indirectOffset);
+  atom->setIndirect(0, 1, indirectBuffer);
+  atom->subOp = getSubOp(op);
+
+  info->io.globalAccess |= 0x2;
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 19/35] nvir/nir: implement nir_intrinsic_load_uniform

2018-04-16 Thread Karol Herbst
v2: use new getIndirect helper
fixes symbols for 64 bit types
v4: use smarter getIndirect helper
simplify address calculation
use loadFrom helper

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 8368bbcc015..8a83a885889 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1467,6 +1467,16 @@ Converter::visit(nir_intrinsic_instr *insn)
nir_intrinsic_op op = insn->intrinsic;
 
switch (op) {
+   case nir_intrinsic_load_uniform: {
+  LValues  = convert(>dest);
+  const DataType dType = getDType(insn);
+  Value *indirect;
+  auto coffset = getIndirect(insn, 0, 0, indirect);
+  for (auto i = 0; i < insn->num_components; ++i) {
+ loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, 
indirect);
+  }
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 22/35] nvir/nir: implement intrinsic_discard(_if)

2018-04-16 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 740dee5c95a..40bc00d7e14 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1657,6 +1657,20 @@ Converter::visit(nir_intrinsic_instr *insn)
   loadImm(newDefs[1], mode);
   break;
}
+   case nir_intrinsic_discard:
+  mkOp(OP_DISCARD, TYPE_NONE, NULL);
+  break;
+   case nir_intrinsic_discard_if: {
+  Value *pred = new_LValue(func, FILE_PREDICATE);
+  if (insn->num_components > 1) {
+ ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");
+ assert(false);
+ return false;
+  }
+  mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(>src[0], 0), 
zero);
+  mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);
+  break;
+   }
default:
   ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 25/35] nvir/nir: implement nir_instr_type_tex

2018-04-16 Thread Karol Herbst
a lot of those fields are not valid for a lot of tex ops. Not quite sure if
it's worth the effort to check for those or just keep it like that. It seems
to kind of work.

v2: reworked offset handling
add tex support with indirect R/S arguments
handle GLSL_SAMPLER_DIM_EXTERNAL
drop reference in convert(glsl_sampler_dim&, bool, bool)
fix tg4 component selection
v5: fill up coords args with scratch values if coords provided is less than 
TexTarget.getArgCount()
v7: prepare for bindless_texture support

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 232 +
 1 file changed, 232 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index badb2398abb..553f74f7a75 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -82,6 +82,7 @@ private:
typedef std::unordered_map NirDefMap;
typedef std::unordered_map 
NirBlockMap;
 
+   TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);
LValues& convert(nir_alu_dest *);
BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
@@ -124,6 +125,7 @@ private:
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
operation getOperation(nir_op);
+   operation getOperation(nir_texop);
operation preOperationNeeded(nir_op);
 
int getSubOp(nir_op);
@@ -144,6 +146,10 @@ private:
bool visit(nir_load_const_instr*);
bool visit(nir_loop *);
bool visit(nir_ssa_undef_instr *);
+   bool visit(nir_tex_instr *);
+
+   /* tex stuff */
+   Value* applyProjection(Value *src, Value *proj);
 
nir_shader *nir;
 
@@ -369,6 +375,36 @@ Converter::getOperation(nir_op op)
}
 }
 
+operation
+Converter::getOperation(nir_texop op)
+{
+   switch (op) {
+   case nir_texop_tex:
+  return OP_TEX;
+   case nir_texop_lod:
+  return OP_TXLQ;
+   case nir_texop_txb:
+  return OP_TXB;
+   case nir_texop_txd:
+  return OP_TXD;
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+  return OP_TXF;
+   case nir_texop_tg4:
+  return OP_TXG;
+   case nir_texop_txl:
+  return OP_TXL;
+   case nir_texop_query_levels:
+   case nir_texop_texture_samples:
+   case nir_texop_txs:
+  return OP_TXQ;
+   default:
+  ERROR("couldn't get operation for nir_texop %u\n", op);
+  assert(false);
+  return OP_NOP;
+   }
+}
+
 operation
 Converter::preOperationNeeded(nir_op op)
 {
@@ -1466,6 +1502,8 @@ Converter::visit(nir_instr *insn)
   return visit(nir_instr_as_load_const(insn));
case nir_instr_type_ssa_undef:
   return visit(nir_instr_as_ssa_undef(insn));
+   case nir_instr_type_tex:
+  return visit(nir_instr_as_tex(insn));
default:
   ERROR("unknown nir_instr type %u\n", insn->type);
   return false;
@@ -2173,6 +2211,200 @@ Converter::visit(nir_ssa_undef_instr *insn)
return true;
 }
 
+#define CASE_SAMPLER(ty) \
+   case GLSL_SAMPLER_DIM_ ## ty : \
+  if (isArray && !isShadow) \
+ return TEX_TARGET_ ## ty ## _ARRAY; \
+  else if (!isArray && isShadow) \
+ return TEX_TARGET_## ty ## _SHADOW; \
+  else if (isArray && isShadow) \
+ return TEX_TARGET_## ty ## _ARRAY_SHADOW; \
+  else \
+ return TEX_TARGET_ ## ty
+
+TexTarget
+Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)
+{
+   switch (dim) {
+   CASE_SAMPLER(1D);
+   CASE_SAMPLER(2D);
+   CASE_SAMPLER(CUBE);
+   case GLSL_SAMPLER_DIM_3D:
+  return TEX_TARGET_3D;
+   case GLSL_SAMPLER_DIM_MS:
+  if (isArray)
+ return TEX_TARGET_2D_MS_ARRAY;
+  return TEX_TARGET_2D_MS;
+   case GLSL_SAMPLER_DIM_RECT:
+  if (isShadow)
+ return TEX_TARGET_RECT_SHADOW;
+  return TEX_TARGET_RECT;
+   case GLSL_SAMPLER_DIM_BUF:
+  return TEX_TARGET_BUFFER;
+   case GLSL_SAMPLER_DIM_EXTERNAL:
+  return TEX_TARGET_2D;
+   default:
+  ERROR("unknown glsl_sampler_dim %u\n", dim);
+  assert(false);
+  return TEX_TARGET_COUNT;
+   }
+}
+#undef CASE_SAMPLER
+
+Value*
+Converter::applyProjection(Value *src, Value *proj)
+{
+   if (!proj)
+  return src;
+   return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);
+}
+
+bool
+Converter::visit(nir_tex_instr *insn)
+{
+   switch (insn->op) {
+   case nir_texop_lod:
+   case nir_texop_query_levels:
+   case nir_texop_tex:
+   case nir_texop_texture_samples:
+   case nir_texop_tg4:
+   case nir_texop_txb:
+   case nir_texop_txd:
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+   case nir_texop_txl:
+   case nir_texop_txs: {
+  LValues  = convert(>dest);
+  std::vector srcs;
+  std::vector defs;
+  std::vector offsets;
+  uint8_t mask = 0;
+  bool lz = false;
+  Value *proj = nullptr;
+  

[Mesa-dev] [PATCH v7 09/35] nvir/nir: run some passes to make the conversion easier

2018-04-16 Thread Karol Herbst
v2: add constant_folding
v6: print non final NIR only for verbose debugging

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 39 ++
 1 file changed, 39 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index b22c62fd434..0b7a5981f73 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -31,6 +31,12 @@
 #include "codegen/nv50_ir_lowering_helper.h"
 #include "codegen/nv50_ir_util.h"
 
+static int
+type_size(const struct glsl_type *type)
+{
+   return glsl_count_attribute_slots(type, false);
+}
+
 namespace {
 
 using namespace nv50_ir;
@@ -52,6 +58,39 @@ Converter::Converter(Program *prog, nir_shader *nir, 
nv50_ir_prog_info *info)
 bool
 Converter::run()
 {
+   bool progress;
+
+   if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
+  nir_print_shader(nir, stderr);
+
+   NIR_PASS_V(nir, nir_lower_io, nir_var_all, type_size, 
(nir_lower_io_options)0);
+   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
+   NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
+   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+   NIR_PASS_V(nir, nir_lower_alu_to_scalar);
+   NIR_PASS_V(nir, nir_lower_phis_to_scalar);
+
+   do {
+  progress = false;
+  /* some ops depend on having constants as sources, but those can also
+   * point to expressions made from constants like 0 + 1
+   */
+  NIR_PASS(progress, nir, nir_opt_constant_folding);
+  NIR_PASS(progress, nir, nir_copy_prop);
+  NIR_PASS(progress, nir, nir_opt_dce);
+  NIR_PASS(progress, nir, nir_opt_dead_cf);
+   } while (progress);
+
+   NIR_PASS_V(nir, nir_lower_locals_to_regs);
+   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_local);
+   NIR_PASS_V(nir, nir_convert_from_ssa, true);
+
+   /* Garbage collect dead instructions */
+   nir_sweep(nir);
+
+   if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
+  nir_print_shader(nir, stderr);
+
return false;
 }
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 21/35] nvir/nir: implement load_(interpolated_)input/output

2018-04-16 Thread Karol Herbst
v3: and load_output
v4: use smarter getIndirect helper
use new getSlotAddress helper
v5: don't use const_offset directly
fix for indirects
v6: add support for interpolateAt
v7: fix compiler warnings
add load_barycentric_sample
handle load_output for fragment shaders

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 134 +
 1 file changed, 134 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index b34fe7739d8..740dee5c95a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1523,6 +1523,140 @@ Converter::visit(nir_intrinsic_instr *insn)
   }
   break;
}
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_interpolated_input:
+   case nir_intrinsic_load_output: {
+  LValues  = convert(>dest);
+
+  /* FBFetch */
+  if (prog->getType() == Program::TYPE_FRAGMENT &&
+  op == nir_intrinsic_load_output) {
+ std::vector defs, srcs;
+ uint8_t mask = 0;
+
+ srcs.push_back(getSSA());
+ srcs.push_back(getSSA());
+ Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 
0));
+ Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 
1));
+ mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;
+ mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;
+
+ srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 
0)));
+ srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), 
mkSysVal(SV_SAMPLE_INDEX, 0)));
+
+ for (auto i = 0u; i < insn->num_components; ++i) {
+defs.push_back(newDefs[i]);
+mask |= 1 << i;
+ }
+
+ TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, 
defs, srcs);
+ texi->tex.levelZero = 1;
+ texi->tex.mask = mask;
+ texi->tex.useOffsets = 0;
+ texi->tex.r = 0x;
+ texi->tex.s = 0x;
+
+ info->prop.fp.readsFramebuffer = true;
+ break;
+  }
+
+  const DataType dType = getDType(insn);
+  Value *indirect;
+  bool input = op != nir_intrinsic_load_output;
+  operation nvirOp;
+  uint32_t mode = 0;
+
+  auto idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input 
? 1 : 0, 0, indirect);
+  nv50_ir_varying& vary = input ? info->in[idx] : info->out[idx];
+
+  /* see load_barycentric_* handling */
+  if (prog->getType() == Program::TYPE_FRAGMENT) {
+ mode = translateInterpMode(, nvirOp);
+ if (op == nir_intrinsic_load_interpolated_input) {
+ImmediateValue immMode;
+if (getSrc(>src[0], 
1)->getUniqueInsn()->src(0).getImmediate(immMode))
+   mode |= immMode.reg.data.u32;
+ }
+  }
+
+  for (auto i = 0u; i < insn->num_components; ++i) {
+ uint32_t address = getSlotAddress(insn, idx, i);
+ Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : 
FILE_SHADER_OUTPUT, 0, dType, address);
+ if (prog->getType() == Program::TYPE_FRAGMENT) {
+int s = 1;
+if (typeSizeof(dType) == 8) {
+   Value *lo = getSSA();
+   Value *hi = getSSA();
+   Instruction *interp;
+
+   interp = mkOp1(nvirOp, TYPE_U32, lo, sym);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s, getSrc(>src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+
+   Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : 
FILE_SHADER_OUTPUT, 0, dType, address + 4);
+   interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s++, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s++, getSrc(>src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+
+   mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);
+} else {
+   Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);
+   if (nvirOp == OP_PINTERP)
+  interp->setSrc(s++, fp.position);
+   if (mode & NV50_IR_INTERP_OFFSET)
+  interp->setSrc(s++, getSrc(>src[0], 0));
+   interp->setInterpolate(mode);
+   interp->setIndirect(0, 0, indirect);
+}
+ } else {
+mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;
+ }
+  }
+  break;
+   }
+   case nir_intrinsic_load_barycentric_at_offset:
+   case 

[Mesa-dev] [PATCH v7 14/35] nvir/nir: parse NIR shader info

2018-04-16 Thread Karol Herbst
v2: parse a few more fields
v3: add special handling for GL_ISOLINES

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 60 ++
 1 file changed, 60 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 00ca1ae1512..4bb99c6635c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -96,6 +96,7 @@ private:
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
bool assignSlots();
+   bool parseNIR();
 
nir_shader *nir;
 
@@ -996,6 +997,60 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile 
file, operation op,
}
 }
 
+bool
+Converter::parseNIR()
+{
+   info->io.clipDistances = nir->info.clip_distance_array_size;
+   info->io.cullDistances = nir->info.cull_distance_array_size;
+
+   switch(prog->getType()) {
+   case Program::TYPE_COMPUTE:
+  info->prop.cp.numThreads[0] = nir->info.cs.local_size[0];
+  info->prop.cp.numThreads[1] = nir->info.cs.local_size[1];
+  info->prop.cp.numThreads[2] = nir->info.cs.local_size[2];
+  info->bin.smemSize = nir->info.cs.shared_size;
+  break;
+   case Program::TYPE_FRAGMENT:
+  info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;
+  info->prop.fp.persampleInvocation =
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) ||
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
+  info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
+  info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
+  info->prop.fp.usesSampleMaskIn =
+ !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
+  break;
+   case Program::TYPE_GEOMETRY:
+  info->prop.gp.inputPrim = nir->info.gs.input_primitive;
+  info->prop.gp.instanceCount = nir->info.gs.invocations;
+  info->prop.gp.maxVertices = nir->info.gs.vertices_out;
+  info->prop.gp.outputPrim = nir->info.gs.output_primitive;
+  break;
+   case Program::TYPE_TESSELLATION_CONTROL:
+   case Program::TYPE_TESSELLATION_EVAL:
+  if (nir->info.tess.primitive_mode == GL_ISOLINES)
+ info->prop.tp.domain = GL_LINES;
+  else
+ info->prop.tp.domain = nir->info.tess.primitive_mode;
+  info->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;
+  info->prop.tp.outputPrim =
+ nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;
+  info->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;
+  info->prop.tp.winding = !nir->info.tess.ccw;
+  break;
+   case Program::TYPE_VERTEX:
+  info->prop.vp.usesDrawParameters =
+ (nir->info.system_values_read & 
BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX)) ||
+ (nir->info.system_values_read & 
BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE)) ||
+ (nir->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID));
+  break;
+   default:
+  break;
+   }
+
+   return true;
+}
+
 bool
 Converter::run()
 {
@@ -1029,6 +1084,11 @@ Converter::run()
/* Garbage collect dead instructions */
nir_sweep(nir);
 
+   if (!parseNIR()) {
+  ERROR("Couldn't prase NIR!\n");
+  return false;
+   }
+
if (!assignSlots()) {
   ERROR("Couldn't assign slots!\n");
   return false;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v7 12/35] nvir/nir: run assignSlots

2018-04-16 Thread Karol Herbst
v2: add support for geometry shaders
set idx
add some missing mappings
fix for 64bit inputs/outputs
fix up some FP color output index messup
parse centroid flag
v3: fix arrays in outputs as well
fix input/ouput size calculation for tessellation shaders
v4: add getSlotAddress helper
fix for 64 bit typed inputs
v5: change getSlotAddress interface for easier use
fix sample inputs
fix slot counting for mat
v7: fix driver_location of images

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 626 +
 1 file changed, 626 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 89c55a08ef8..1d1c4526d2b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -69,6 +69,13 @@ private:
uint32_t getIndirect(nir_src *, uint8_t, Value*&);
uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value*&);
 
+   uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);
+
+   void setInterpolate(nv50_ir_varying *,
+   decltype(nir_variable().data.interpolation),
+   bool centroid,
+   unsigned semantics);
+
bool isFloatType(nir_alu_type);
bool isSignedType(nir_alu_type);
bool isResultFloat(nir_op);
@@ -81,6 +88,8 @@ private:
std::vector getSTypes(nir_alu_instr*);
DataType getSType(nir_src&, bool isFloat, bool isSigned);
 
+   bool assignSlots();
+
nir_shader *nir;
 
NirDefMap ssaDefs;
@@ -303,6 +312,618 @@ Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t 
s, uint8_t c, Value *&
return idx;
 }
 
+static void
+vert_attrib_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
+{
+   if (slot >= VERT_ATTRIB_GENERIC0) {
+  *name = TGSI_SEMANTIC_GENERIC;
+  *index = slot - VERT_ATTRIB_GENERIC0;
+  return;
+   }
+
+   if (slot == VERT_ATTRIB_POINT_SIZE) {
+  ERROR("unknown vert attrib slot %u\n", slot);
+  assert(false);
+  return;
+   }
+
+   if (slot >= VERT_ATTRIB_TEX0) {
+  *name = TGSI_SEMANTIC_TEXCOORD;
+  *index = slot - VERT_ATTRIB_TEX0;
+  return;
+   }
+
+   switch (slot) {
+   case VERT_ATTRIB_COLOR0:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_COLOR1:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 1;
+  break;
+   case VERT_ATTRIB_EDGEFLAG:
+  *name = TGSI_SEMANTIC_EDGEFLAG;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_FOG:
+  *name = TGSI_SEMANTIC_FOG;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_NORMAL:
+  *name = TGSI_SEMANTIC_NORMAL;
+  *index = 0;
+  break;
+   case VERT_ATTRIB_POS:
+  *name = TGSI_SEMANTIC_POSITION;
+  *index = 0;
+  break;
+   default:
+  ERROR("unknown vert attrib slot %u\n", slot);
+  assert(false);
+  break;
+   }
+}
+
+static void
+varying_slot_to_tgsi_semantic(unsigned slot, unsigned *name, unsigned *index)
+{
+   if (slot >= VARYING_SLOT_PATCH0) {
+  *name = TGSI_SEMANTIC_PATCH;
+  *index = slot - VARYING_SLOT_PATCH0;
+  return;
+   }
+
+   if (slot >= VARYING_SLOT_VAR0) {
+  *name = TGSI_SEMANTIC_GENERIC;
+  *index = slot - VARYING_SLOT_VAR0;
+  return;
+   }
+
+   if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
+  *name = TGSI_SEMANTIC_TEXCOORD;
+  *index = slot - VARYING_SLOT_TEX0;
+  return;
+   }
+
+   switch (slot) {
+   case VARYING_SLOT_BFC0:
+  *name = TGSI_SEMANTIC_BCOLOR;
+  *index = 0;
+  break;
+   case VARYING_SLOT_BFC1:
+  *name = TGSI_SEMANTIC_BCOLOR;
+  *index = 1;
+  break;
+   case VARYING_SLOT_CLIP_DIST0:
+  *name = TGSI_SEMANTIC_CLIPDIST;
+  *index = 0;
+  break;
+   case VARYING_SLOT_CLIP_DIST1:
+  *name = TGSI_SEMANTIC_CLIPDIST;
+  *index = 1;
+  break;
+   case VARYING_SLOT_CLIP_VERTEX:
+  *name = TGSI_SEMANTIC_CLIPVERTEX;
+  *index = 0;
+  break;
+   case VARYING_SLOT_COL0:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 0;
+  break;
+   case VARYING_SLOT_COL1:
+  *name = TGSI_SEMANTIC_COLOR;
+  *index = 1;
+  break;
+   case VARYING_SLOT_EDGE:
+  *name = TGSI_SEMANTIC_EDGEFLAG;
+  *index = 0;
+  break;
+   case VARYING_SLOT_FACE:
+  *name = TGSI_SEMANTIC_FACE;
+  *index = 0;
+  break;
+   case VARYING_SLOT_FOGC:
+  *name = TGSI_SEMANTIC_FOG;
+  *index = 0;
+  break;
+   case VARYING_SLOT_LAYER:
+  *name = TGSI_SEMANTIC_LAYER;
+  *index = 0;
+  break;
+   case VARYING_SLOT_PNTC:
+  *name = TGSI_SEMANTIC_PCOORD;
+  *index = 0;
+  break;
+   case VARYING_SLOT_POS:
+  *name = TGSI_SEMANTIC_POSITION;
+  *index = 0;
+  break;
+   case VARYING_SLOT_PRIMITIVE_ID:
+  *name = TGSI_SEMANTIC_PRIMID;

[Mesa-dev] [PATCH v7 15/35] nvir/nir: implement CFG handling

2018-04-16 Thread Karol Herbst
v6: fix loops with blocks at the end nothing points to
skip blocks with no instructions and no predecessors

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   | 270 -
 1 file changed, 268 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 4bb99c6635c..c2512b01d5a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -55,8 +55,10 @@ private:
typedef decltype(nir_ssa_def().index) NirSSADefIdx;
typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize;
typedef std::unordered_map NirDefMap;
+   typedef std::unordered_map 
NirBlockMap;
 
LValues& convert(nir_alu_dest *);
+   BasicBlock* convert(nir_block *);
LValues& convert(nir_dest *);
LValues& convert(nir_register *);
LValues& convert(nir_ssa_def *);
@@ -98,15 +100,46 @@ private:
bool assignSlots();
bool parseNIR();
 
+   bool visit(nir_block *);
+   bool visit(nir_cf_node *);
+   bool visit(nir_function *);
+   bool visit(nir_if *);
+   bool visit(nir_instr *);
+   bool visit(nir_jump_instr *);
+   bool visit(nir_loop *);
+
nir_shader *nir;
 
NirDefMap ssaDefs;
NirDefMap regDefs;
+   NirBlockMap blocks;
+   unsigned int curLoopDepth;
+
+   BasicBlock *exit;
+
+   union {
+  struct {
+ Value *position;
+  } fp;
+   };
 };
 
 Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
- nir(nir) {}
+ nir(nir),
+ curLoopDepth(0) {}
+
+BasicBlock *
+Converter::convert(nir_block *block)
+{
+   NirBlockMap::iterator it = blocks.find(block->index);
+   if (it != blocks.end())
+  return (*it).second;
+
+   BasicBlock *bb = new BasicBlock(func);
+   blocks[block->index] = bb;
+   return bb;
+}
 
 bool
 Converter::isFloatType(nir_alu_type type)
@@ -1051,6 +1084,234 @@ Converter::parseNIR()
return true;
 }
 
+bool
+Converter::visit(nir_function *function)
+{
+   /* we only support emiting the main function for now */
+   assert(!strcmp(function->name, "main"));
+   assert(function->impl);
+
+   /* usually the blocks will set everything up, but main is special */
+   BasicBlock *entry = new BasicBlock(prog->main);
+   exit = new BasicBlock(prog->main);
+   blocks[nir_start_block(function->impl)->index] = entry;
+   prog->main->setEntry(entry);
+   prog->main->setExit(exit);
+
+   setPosition(entry, true);
+
+   switch (prog->getType()) {
+   case Program::TYPE_TESSELLATION_CONTROL:
+  outBase = mkOp2v(
+ OP_SUB, TYPE_U32, getSSA(),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
+  break;
+   case Program::TYPE_FRAGMENT: {
+  Symbol *sv = mkSysVal(SV_POSITION, 3);
+  fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
+  fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
+  break;
+   }
+   default:
+  break;
+   }
+
+   nir_index_ssa_defs(function->impl);
+   foreach_list_typed(nir_cf_node, node, node, >impl->body) {
+  if (!visit(node))
+ return false;
+   }
+
+   bb->cfg.attach(>cfg, Graph::Edge::TREE);
+   setPosition(exit, true);
+
+   /* TODO: for non main function this needs to be a OP_RETURN */
+   mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
+   return true;
+}
+
+bool
+Converter::visit(nir_cf_node *node)
+{
+   switch (node->type) {
+   case nir_cf_node_block:
+  if (!visit(nir_cf_node_as_block(node)))
+ return false;
+  break;
+   case nir_cf_node_if:
+  if (!visit(nir_cf_node_as_if(node)))
+ return false;
+  break;
+   case nir_cf_node_loop:
+  if (!visit(nir_cf_node_as_loop(node)))
+ return false;
+  break;
+   default:
+  ERROR("unknown nir_cf_node type %u\n", node->type);
+  return false;
+   }
+   return true;
+}
+
+bool
+Converter::visit(nir_block *block)
+{
+   if (!block->predecessors->entries && block->instr_list.is_empty())
+  return true;
+
+   BasicBlock *bb = convert(block);
+
+   setPosition(bb, true);
+   nir_foreach_instr(insn, block) {
+  if (!visit(insn))
+ return false;
+   }
+   return true;
+}
+
+bool
+Converter::visit(nir_if *nif)
+{
+   DataType sType = getSType(nif->condition, false, false);
+   Value *src = getSrc(>condition, 0);
+
+   nir_block *lastThen = nir_if_last_then_block(nif);
+   nir_block *lastElse = nir_if_last_else_block(nif);
+
+   assert(!lastThen->successors[1]);
+   assert(!lastElse->successors[1]);
+
+   BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
+   BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
+
+   bb->cfg.attach(>cfg, Graph::Edge::TREE);
+   bb->cfg.attach(>cfg, 

  1   2   >