Re: [Mesa-dev] [PATCH 3/3] mesa: rework how we free gl_shader_program_data

2017-11-07 Thread Tapani Pälli



On 11/08/2017 08:59 AM, Timothy Arceri wrote:



On 08/11/17 16:58, Tapani Pälli wrote:

One question below ...

On 11/08/2017 03:41 AM, Timothy Arceri wrote:

When I introduced gl_shader_program_data one of the intentions was to
fix a bug where a failed linking attempt freed data required by a
currently active program. However I seem to have failed to finish
hooking up the final steps required to have the data hang around.

Here we create a fresh instance of gl_shader_program_data every
time we link. gl_program has a reference to gl_shader_program_data
so it will be freed once the program is no longer active.

Cc: Neil Roberts 
Cc: "17.2 17.3" 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102177
---
  src/mesa/main/shaderobj.c   | 71 
+

  src/mesa/main/shaderobj.h   |  3 ++
  src/mesa/program/ir_to_mesa.cpp |  2 ++
  3 files changed, 35 insertions(+), 41 deletions(-)

diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index e2103bcde49..5501a0157db 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -192,7 +192,30 @@ _mesa_lookup_shader_err(struct gl_context *ctx, 
GLuint name, const char *caller)

/**/
  /*** Shader Program object functions ***/
/**/
+static void
+free_shader_program_data(struct gl_shader_program_data *data)
+{
+   if (data->UniformStorage) {
+  for (unsigned i = 0; i < data->NumUniformStorage; ++i)
+ _mesa_uniform_detach_all_driver_storage(&data->UniformStorage[i]);
+  ralloc_free(data->UniformStorage);
+   }
+
+   assert(data->InfoLog != NULL);
+   ralloc_free(data->InfoLog);
+
+   ralloc_free(data->UniformBlocks);
+
+   ralloc_free(data->ShaderStorageBlocks);
+   if (data->AtomicBuffers) {
+  ralloc_free(data->AtomicBuffers);
+   }
+
+   if (data->ProgramResourceList) {
+  ralloc_free(data->ProgramResourceList);
+   }
+}
  void
  _mesa_reference_shader_program_data(struct gl_context *ctx,
@@ -209,6 +232,7 @@ _mesa_reference_shader_program_data(struct 
gl_context *ctx,

    if (p_atomic_dec_zero(&oldData->RefCount)) {
   assert(ctx);
+ free_shader_program_data(oldData);
   ralloc_free(oldData);
    }
@@ -259,14 +283,16 @@ _mesa_reference_shader_program_(struct 
gl_context *ctx,

 }
  }
-static struct gl_shader_program_data *
-create_shader_program_data()
+struct gl_shader_program_data *
+_mesa_create_shader_program_data()
  {
 struct gl_shader_program_data *data;
 data = rzalloc(NULL, struct gl_shader_program_data);
 if (data)
    data->RefCount = 1;
+   data->InfoLog = ralloc_strdup(data, "");
+
 return data;
  }
@@ -286,8 +312,6 @@ init_shader_program(struct gl_shader_program *prog)
 prog->TransformFeedback.BufferMode = GL_INTERLEAVED_ATTRIBS;
 exec_list_make_empty(&prog->EmptyUniformLocations);
-
-   prog->data->InfoLog = ralloc_strdup(prog->data, "");
  }
  /**
@@ -300,7 +324,7 @@ _mesa_new_shader_program(GLuint name)
 shProg = rzalloc(NULL, struct gl_shader_program);
 if (shProg) {
    shProg->Name = name;
-  shProg->data = create_shader_program_data();
+  shProg->data = _mesa_create_shader_program_data();
    if (!shProg->data) {
   ralloc_free(shProg);
   return NULL;
@@ -310,7 +334,6 @@ _mesa_new_shader_program(GLuint name)
 return shProg;
  }
-
  /**
   * Clear (free) the shader program state that gets produced by 
linking.

   */
@@ -325,17 +348,6 @@ _mesa_clear_shader_program_data(struct 
gl_context *ctx,

    }
 }
-   shProg->data->linked_stages = 0;
-
-   if (shProg->data->UniformStorage) {
-  for (unsigned i = 0; i < shProg->data->NumUniformStorage; ++i)
- _mesa_uniform_detach_all_driver_storage(&shProg->data->
-    UniformStorage[i]);
-  ralloc_free(shProg->data->UniformStorage);
-  shProg->data->NumUniformStorage = 0;
-  shProg->data->UniformStorage = NULL;
-   }
-
 if (shProg->UniformRemapTable) {
    ralloc_free(shProg->UniformRemapTable);
    shProg->NumUniformRemapTable = 0;
@@ -347,29 +359,7 @@ _mesa_clear_shader_program_data(struct 
gl_context *ctx,

    shProg->UniformHash = NULL;
 }
-   assert(shProg->data->InfoLog != NULL);
-   ralloc_free(shProg->data->InfoLog);
-   shProg->data->InfoLog = ralloc_strdup(shProg->data, "");
-
-   ralloc_free(shProg->data->UniformBlocks);
-   shProg->data->UniformBlocks = NULL;
-   shProg->data->NumUniformBlocks = 0;
-
-   ralloc_free(shProg->data->ShaderStorageBlocks);
-   shProg->data->ShaderStorageBlocks = NULL;
-   shProg->data->NumShaderStorageBlocks = 0;
-
-   if (shProg->data->AtomicBuffers) {
-  ralloc_free(shProg->data->AtomicBuffers);
-  shProg->data->AtomicBuffers = NULL;
-  shProg->data->NumAtomicBuffers = 0;
-   }
-
-   if (shProg->data->ProgramResourceL

Re: [Mesa-dev] [RFC v5 08/19] egl/x11: Re-allocate buffers if format is suboptimal

2017-11-07 Thread Constantine Kharlamov
[snip]
> @@ -372,10 +372,22 @@ dri3_handle_present_event(struct loader_dri3_drawable 
> *draw,
>   switch (ce->mode) {
>   case XCB_PRESENT_COMPLETE_MODE_FLIP:
>  draw->flipping = true;
> +for (int b = 0; b < sizeof(draw->buffers) / 
> sizeof(draw->buffers[0]); b++) {
> +   if (draw->buffers[b])
> +  draw->buffers[b]->realloc_suboptimal = true;
> +}
>  break;
>   case XCB_PRESENT_COMPLETE_MODE_COPY:
>  draw->flipping = false;
>  break;
> +#if XCB_PRESENT_MAJOR_VERSION > 1 || (XCB_PRESENT_MAJOR_VERSION == 1 && 
> XCB_PRESENT_MINOR_VERSION >= 1)
> + case XCB_PRESENT_COMPLETE_MODE_SUBOPTIMAL_COPY:
> +draw->flipping = false;
> +for (int b = 0; b < sizeof(draw->buffers) / 
> sizeof(draw->buffers[0]); b++) {
> +   if (draw->buffers[b])
> +  draw->buffers[b]->suboptimal = true;
> +}
> +#endif

Probably worth using "ARRAY_SIZE(draw->buffers)"; ARRAY_SIZE is in util/macros.h
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] mesa: rework how we free gl_shader_program_data

2017-11-07 Thread Timothy Arceri



On 08/11/17 16:58, Tapani Pälli wrote:

One question below ...

On 11/08/2017 03:41 AM, Timothy Arceri wrote:

When I introduced gl_shader_program_data one of the intentions was to
fix a bug where a failed linking attempt freed data required by a
currently active program. However I seem to have failed to finish
hooking up the final steps required to have the data hang around.

Here we create a fresh instance of gl_shader_program_data every
time we link. gl_program has a reference to gl_shader_program_data
so it will be freed once the program is no longer active.

Cc: Neil Roberts 
Cc: "17.2 17.3" 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102177
---
  src/mesa/main/shaderobj.c   | 71 
+

  src/mesa/main/shaderobj.h   |  3 ++
  src/mesa/program/ir_to_mesa.cpp |  2 ++
  3 files changed, 35 insertions(+), 41 deletions(-)

diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index e2103bcde49..5501a0157db 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -192,7 +192,30 @@ _mesa_lookup_shader_err(struct gl_context *ctx, 
GLuint name, const char *caller)
  
/**/
  /*** Shader Program object functions
***/
  
/**/

+static void
+free_shader_program_data(struct gl_shader_program_data *data)
+{
+   if (data->UniformStorage) {
+  for (unsigned i = 0; i < data->NumUniformStorage; ++i)
+ 
_mesa_uniform_detach_all_driver_storage(&data->UniformStorage[i]);

+  ralloc_free(data->UniformStorage);
+   }
+
+   assert(data->InfoLog != NULL);
+   ralloc_free(data->InfoLog);
+
+   ralloc_free(data->UniformBlocks);
+
+   ralloc_free(data->ShaderStorageBlocks);
+   if (data->AtomicBuffers) {
+  ralloc_free(data->AtomicBuffers);
+   }
+
+   if (data->ProgramResourceList) {
+  ralloc_free(data->ProgramResourceList);
+   }
+}
  void
  _mesa_reference_shader_program_data(struct gl_context *ctx,
@@ -209,6 +232,7 @@ _mesa_reference_shader_program_data(struct 
gl_context *ctx,

    if (p_atomic_dec_zero(&oldData->RefCount)) {
   assert(ctx);
+ free_shader_program_data(oldData);
   ralloc_free(oldData);
    }
@@ -259,14 +283,16 @@ _mesa_reference_shader_program_(struct 
gl_context *ctx,

 }
  }
-static struct gl_shader_program_data *
-create_shader_program_data()
+struct gl_shader_program_data *
+_mesa_create_shader_program_data()
  {
 struct gl_shader_program_data *data;
 data = rzalloc(NULL, struct gl_shader_program_data);
 if (data)
    data->RefCount = 1;
+   data->InfoLog = ralloc_strdup(data, "");
+
 return data;
  }
@@ -286,8 +312,6 @@ init_shader_program(struct gl_shader_program *prog)
 prog->TransformFeedback.BufferMode = GL_INTERLEAVED_ATTRIBS;
 exec_list_make_empty(&prog->EmptyUniformLocations);
-
-   prog->data->InfoLog = ralloc_strdup(prog->data, "");
  }
  /**
@@ -300,7 +324,7 @@ _mesa_new_shader_program(GLuint name)
 shProg = rzalloc(NULL, struct gl_shader_program);
 if (shProg) {
    shProg->Name = name;
-  shProg->data = create_shader_program_data();
+  shProg->data = _mesa_create_shader_program_data();
    if (!shProg->data) {
   ralloc_free(shProg);
   return NULL;
@@ -310,7 +334,6 @@ _mesa_new_shader_program(GLuint name)
 return shProg;
  }
-
  /**
   * Clear (free) the shader program state that gets produced by linking.
   */
@@ -325,17 +348,6 @@ _mesa_clear_shader_program_data(struct gl_context 
*ctx,

    }
 }
-   shProg->data->linked_stages = 0;
-
-   if (shProg->data->UniformStorage) {
-  for (unsigned i = 0; i < shProg->data->NumUniformStorage; ++i)
- _mesa_uniform_detach_all_driver_storage(&shProg->data->
-    UniformStorage[i]);
-  ralloc_free(shProg->data->UniformStorage);
-  shProg->data->NumUniformStorage = 0;
-  shProg->data->UniformStorage = NULL;
-   }
-
 if (shProg->UniformRemapTable) {
    ralloc_free(shProg->UniformRemapTable);
    shProg->NumUniformRemapTable = 0;
@@ -347,29 +359,7 @@ _mesa_clear_shader_program_data(struct gl_context 
*ctx,

    shProg->UniformHash = NULL;
 }
-   assert(shProg->data->InfoLog != NULL);
-   ralloc_free(shProg->data->InfoLog);
-   shProg->data->InfoLog = ralloc_strdup(shProg->data, "");
-
-   ralloc_free(shProg->data->UniformBlocks);
-   shProg->data->UniformBlocks = NULL;
-   shProg->data->NumUniformBlocks = 0;
-
-   ralloc_free(shProg->data->ShaderStorageBlocks);
-   shProg->data->ShaderStorageBlocks = NULL;
-   shProg->data->NumShaderStorageBlocks = 0;
-
-   if (shProg->data->AtomicBuffers) {
-  ralloc_free(shProg->data->AtomicBuffers);
-  shProg->data->AtomicBuffers = NULL;
-  shProg->data->NumAtomicBuffers = 0;
-   }
-
-   if (shProg->data->ProgramResourceList

Re: [Mesa-dev] [PATCH 3/3] mesa: rework how we free gl_shader_program_data

2017-11-07 Thread Timothy Arceri

On 08/11/17 16:53, Kenneth Graunke wrote:

On Tuesday, November 7, 2017 5:41:59 PM PST Timothy Arceri wrote:

When I introduced gl_shader_program_data one of the intentions was to
fix a bug where a failed linking attempt freed data required by a
currently active program. However I seem to have failed to finish
hooking up the final steps required to have the data hang around.

Here we create a fresh instance of gl_shader_program_data every
time we link. gl_program has a reference to gl_shader_program_data
so it will be freed once the program is no longer active.

Cc: Neil Roberts 
Cc: "17.2 17.3" 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102177
---
  src/mesa/main/shaderobj.c   | 71 +
  src/mesa/main/shaderobj.h   |  3 ++
  src/mesa/program/ir_to_mesa.cpp |  2 ++
  3 files changed, 35 insertions(+), 41 deletions(-)

diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index e2103bcde49..5501a0157db 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -192,7 +192,30 @@ _mesa_lookup_shader_err(struct gl_context *ctx, GLuint 
name, const char *caller)
  /**/
  /*** Shader Program object functions***/
  /**/
+static void
+free_shader_program_data(struct gl_shader_program_data *data)
+{


I'm not sure why you need to ralloc_free things explicitly here.

Most of these fields appear to be ralloc'd using "data" as the context.
So, I'm not sure why you need to ralloc_free them all explicitly here.
When you ralloc_free data (in the caller), it will free them too.

Some of them weren't, but isn't that what you fixed in patch 2?


Yeah I did this first then noticed it was crashing so I fixed the 
rallocs in patch 2. I noticed after sending I probably could drop the 
rest of these rallocs and just leave the 
_mesa_uniform_detach_all_driver_storage() calls. I'll fix this up locally.





Otherwise, this looks good to me...hopefully Neil can take a look too...


+   if (data->UniformStorage) {
+  for (unsigned i = 0; i < data->NumUniformStorage; ++i)
+ _mesa_uniform_detach_all_driver_storage(&data->UniformStorage[i]);
+  ralloc_free(data->UniformStorage);
+   }
+
+   assert(data->InfoLog != NULL);
+   ralloc_free(data->InfoLog);
+
+   ralloc_free(data->UniformBlocks);
+
+   ralloc_free(data->ShaderStorageBlocks);
+   if (data->AtomicBuffers) {
+  ralloc_free(data->AtomicBuffers);
+   }
+
+   if (data->ProgramResourceList) {
+  ralloc_free(data->ProgramResourceList);
+   }
+}
  
  void

  _mesa_reference_shader_program_data(struct gl_context *ctx,
@@ -209,6 +232,7 @@ _mesa_reference_shader_program_data(struct gl_context *ctx,
  
if (p_atomic_dec_zero(&oldData->RefCount)) {

   assert(ctx);
+ free_shader_program_data(oldData);
   ralloc_free(oldData);
}
  
@@ -259,14 +283,16 @@ _mesa_reference_shader_program_(struct gl_context *ctx,

 }
  }
  
-static struct gl_shader_program_data *

-create_shader_program_data()
+struct gl_shader_program_data *
+_mesa_create_shader_program_data()
  {
 struct gl_shader_program_data *data;
 data = rzalloc(NULL, struct gl_shader_program_data);
 if (data)
data->RefCount = 1;
  
+   data->InfoLog = ralloc_strdup(data, "");

+
 return data;
  }
  
@@ -286,8 +312,6 @@ init_shader_program(struct gl_shader_program *prog)

 prog->TransformFeedback.BufferMode = GL_INTERLEAVED_ATTRIBS;
  
 exec_list_make_empty(&prog->EmptyUniformLocations);

-
-   prog->data->InfoLog = ralloc_strdup(prog->data, "");
  }
  
  /**

@@ -300,7 +324,7 @@ _mesa_new_shader_program(GLuint name)
 shProg = rzalloc(NULL, struct gl_shader_program);
 if (shProg) {
shProg->Name = name;
-  shProg->data = create_shader_program_data();
+  shProg->data = _mesa_create_shader_program_data();
if (!shProg->data) {
   ralloc_free(shProg);
   return NULL;
@@ -310,7 +334,6 @@ _mesa_new_shader_program(GLuint name)
 return shProg;
  }
  
-


Bonus whitespace change


  /**
   * Clear (free) the shader program state that gets produced by linking.
   */
@@ -325,17 +348,6 @@ _mesa_clear_shader_program_data(struct gl_context *ctx,
}
 }
  
-   shProg->data->linked_stages = 0;

-
-   if (shProg->data->UniformStorage) {
-  for (unsigned i = 0; i < shProg->data->NumUniformStorage; ++i)
- _mesa_uniform_detach_all_driver_storage(&shProg->data->
-UniformStorage[i]);
-  ralloc_free(shProg->data->UniformStorage);
-  shProg->data->NumUniformStorage = 0;
-  shProg->data->UniformStorage = NULL;
-   }
-
 if (shProg->UniformRemapTable) {
ralloc_free(shProg->UniformRemapTable);
shProg->NumUniformRemapTable = 0;
@@ -347,29 +359,7 @@ _mesa_clear_sha

Re: [Mesa-dev] [PATCH v2] glsl: Make #pragma STDGL invariant(all) only modify outputs.

2017-11-07 Thread Iago Toral
Yeah, this is nicer :)

Reviewed-by: Iago Toral Quiroga 

On Tue, 2017-11-07 at 10:20 -0800, Kenneth Graunke wrote:
> According to the GLSL ES 3.20, GLSL 4.50, and GLSL 1.20 specs:
> 
>    "To force all output variables to be invariant, use the pragma
> 
>    #pragma STDGL invariant(all)
> 
> before all declarations in a shader."
> 
> Notably, this is only supposed to affect output
> variables.  Furthermore,
> 
>    "Only variables output from a shader can be candidates for
> invariance."
> 
> It looks like this has been wrong since we first supported the pragma
> in
> 2011 (commit 86b4398cd158024f6be9fa830554a11c2a7ebe0c).
> 
> Fixes dEQP-
> GLES2.functional.shaders.preprocessor.pragmas.pragma_fragment.
> 
> v2: Now that all cases are identical (other than compute shaders,
> which
> have no output variables anyway), we can drop the switch
> statement
> entirely.  We also don't need the current_function == NULL check;
> this was a hold over from when we had a single var_mode_out for
> both
> function parameters and shader varyings, in the bad old days.
> 
> Reviewed-by: Iago Toral Quiroga  [v1]
> Reviewed-by: Ilia Mirkin  [v1]
> ---
>  src/compiler/glsl/ast_to_hir.cpp | 26 ++
>  1 file changed, 2 insertions(+), 24 deletions(-)
> 
> Good call, Ilia :)  I'd originally had a /* Invariance isn't
> meaningful
> for fragment shader outputs */ comment, but then I looked elsewhere
> in
> the file and realized that it actually was allowed.  So, I changed
> the
> case, but didn't think to combine it.
> 
> It turns out we can combine all of them...here's a better version.
> 
> diff --git a/src/compiler/glsl/ast_to_hir.cpp
> b/src/compiler/glsl/ast_to_hir.cpp
> index 441404f86d3..1794a1af5cb 100644
> --- a/src/compiler/glsl/ast_to_hir.cpp
> +++ b/src/compiler/glsl/ast_to_hir.cpp
> @@ -4077,30 +4077,8 @@ apply_type_qualifier_to_variable(const struct
> ast_type_qualifier *qual,
>    }
> }
>  
> -   if (state->all_invariant && (state->current_function == NULL)) {
> -  switch (state->stage) {
> -  case MESA_SHADER_VERTEX:
> - if (var->data.mode == ir_var_shader_out)
> -var->data.invariant = true;
> - break;
> -  case MESA_SHADER_TESS_CTRL:
> -  case MESA_SHADER_TESS_EVAL:
> -  case MESA_SHADER_GEOMETRY:
> - if ((var->data.mode == ir_var_shader_in)
> - || (var->data.mode == ir_var_shader_out))
> -var->data.invariant = true;
> - break;
> -  case MESA_SHADER_FRAGMENT:
> - if (var->data.mode == ir_var_shader_in)
> -var->data.invariant = true;
> - break;
> -  case MESA_SHADER_COMPUTE:
> - /* Invariance isn't meaningful in compute shaders. */
> - break;
> -  default:
> - break;
> -  }
> -   }
> +   if (state->all_invariant && var->data.mode == ir_var_shader_out)
> +  var->data.invariant = true;
>  
> var->data.interpolation =
>    interpret_interpolation_qualifier(qual, var->type,
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] i965: expose SRGB visuals and turn on EGL_KHR_gl_colorspace

2017-11-07 Thread Tapani Pälli



On 11/08/2017 07:25 AM, Kenneth Graunke wrote:

On Thursday, November 2, 2017 1:48:34 AM PST Tapani Pälli wrote:

Patch exposes sRGB visuals and adds DRI integer query support for
__DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB. Further changes make sure that
we mark if the app explicitly wanted sRGB and for these framebuffers
we don't turn sRGB off in intel_gles3_srgb_workaround. This way we
keep compatibility for existing applications relying on default sRGB
and ony add more visual support.

With this change, following dEQP tests start to pass:

dEQP-EGL.functional.wide_color.window__colorspace_srgb
dEQP-EGL.functional.wide_color.pbuffer__colorspace_srgb

v2: some code cleanup (Emil Velikov)
 update num_formats correctly (reported by dev...@gmail.com)

v3: cleanup, remove redundant is_srgb
 rename explicit_srgb as 'need_srgb' to follow style better

Signed-off-by: Tapani Pälli 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102264
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102354
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102503
---
  src/mesa/drivers/dri/i965/brw_context.c  | 16 ++--
  src/mesa/drivers/dri/i965/intel_fbo.h|  5 +
  src/mesa/drivers/dri/i965/intel_screen.c | 13 -
  3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 037e349fdb..0b8134ae9f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1142,8 +1142,8 @@ intelUnbindContext(__DRIcontext * driContextPriv)
   *
   * Unfortunately, renderbuffer setup happens before a context is created.  So
   * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
- * context (without an sRGB visual, though we don't have sRGB visuals exposed
- * yet), we go turn that back off before anyone finds out.
+ * context (without an sRGB visual), we go turn that back off before anyone
+ * finds out.
   */
  static void
  intel_gles3_srgb_workaround(struct brw_context *brw,
@@ -1154,15 +1154,19 @@ intel_gles3_srgb_workaround(struct brw_context *brw,
 if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
return;
  
-   /* Some day when we support the sRGB capable bit on visuals available for

-* GLES, we'll need to respect that and not disable things here.
-*/
-   fb->Visual.sRGBCapable = false;
 for (int i = 0; i < BUFFER_COUNT; i++) {
struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
+
+  /* Check if sRGB was specifically asked for. */
+  struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i);
+  if (irb && irb->need_srgb)
+ return;
+
if (rb)
   rb->Format = _mesa_get_srgb_format_linear(rb->Format);
 }
+   /* Disable sRGB from framebuffers that are not compatible. */
+   fb->Visual.sRGBCapable = false;
  }
  
  GLboolean

diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h 
b/src/mesa/drivers/dri/i965/intel_fbo.h
index 1e2494286b..608a1c4e7d 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.h
+++ b/src/mesa/drivers/dri/i965/intel_fbo.h
@@ -116,6 +116,11 @@ struct intel_renderbuffer
  * for the duration of a mapping.
  */
 bool singlesample_mt_is_tmp;
+
+   /**
+* Set to true when application specifically asked for a sRGB visual.
+*/
+   bool need_srgb;
  };
  
  
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c

index 10064c3236..90303df899 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1384,6 +1384,9 @@ brw_query_renderer_integer(__DRIscreen *dri_screen,
  0, BRW_CONTEXT_MEDIUM_PRIORITY) == 0)
   value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_MEDIUM;
return 0;
+   case __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB:
+  value[0] = 
screen->mesa_format_supports_render[MESA_FORMAT_B8G8R8A8_SRGB];


I think it would be better to do:

   value[0] = true;

as we may add other sRGB formats in the future.  In particular, I'm
thinking about Mario's 10-bit visual series.  We'd probably want to
expose this property regardless of whether we expose B8G8B8A8_SRGB,
R8G8B8A8_SRGB, B10G10R10A2_SRGB, or whatever...and checking one of
them would be confusing.

Plus, it's always true, so there's not much use in checking it...


Yes, this makes sense.


+  return 0;
 default:
return driQueryRendererIntegerCommon(dri_screen, param, value);
 }
@@ -1544,13 +1547,18 @@ intelCreateBuffer(__DRIscreen *dri_screen,
fb->Visual.sRGBCapable = true;
 }
  
+   /* mesaVis->sRGBCapable was set, user is asking for sRGB */

+   bool srgb_cap_set = mesaVis->redBits >= 8 && mesaVis->sRGBCapable;
+
 /* setup the hardware-based renderbuffers */
 rb = intel_create_winsys_renderbuffer(screen, rgbFormat, num_samples);
 _mesa_attach_and_own_rb(fb, BUFFER_F

Re: [Mesa-dev] [PATCH 2/2] r600: use the clamped versions of rcp/rsq for eg/cayman.

2017-11-07 Thread Ilia Mirkin
Actually cayman gets half of it - it gets the abs, but not clamped. I
wonder what happens if you go the other way -- use the IEEE version of
the op for RSQ() (presumably you're not testing this on cayman).

On Wed, Nov 8, 2017 at 1:18 AM, Ilia Mirkin  wrote:
> tgsi_rsq appears to ignore the passed-in op and always puts in
> ALU_OP1_RECIPSQRT_CLAMPED anyways. It also sticks an absolute value on
> the RSQ() argument. This only happens for eg, not cayman. (Probably
> why only the rcp_clamped change appeared to be necessary.)
>
> This is odd though, because there's no clamping like that in other
> drivers. The trace you made looks fine on both nvc0 and nv50.
>
> On Tue, Nov 7, 2017 at 11:01 PM,   wrote:
>> From: Roland Scheidegger 
>>
>> r600 already used the clamped versions, but for some reason this was
>> different to eg/cayman.
>> (Note that it has been different since essentially forever, 7 years, since
>> df62338c491f2cace1a48f99de78e83b5edd82fd in particular, which changed
>> this for r600 but not eg (cayman wasn't supported back then, but probably
>> copied this from the eg part later). The commit does not mention any reason
>> why this difference should exist.)
>> This seems a bit unfortunate, since it would be nice to use ieee arithmetic,
>> I have no idea what this could potentially break and no idea if it really
>> makes sense going back to legacy-style rcp/rsq...
>> This however prevents misrenderings in This War of Mine since using ieee
>> muls (ce7a045feeef8cad155f1c9aa07f166e146e3d00), albeit strictly speaking
>> only rcp_clamped is necessary for this. It seems likely the root cause is
>> some x * rcp(y) calculation where both x and y evaluate to 0. Albeit it
>> apparently works with other drivers, not sure what's up with that...
>>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103544
>> ---
>>  src/gallium/drivers/r600/r600_shader.c | 16 
>>  1 file changed, 12 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r600/r600_shader.c 
>> b/src/gallium/drivers/r600/r600_shader.c
>> index 6a755bb3fd..62fc4da901 100644
>> --- a/src/gallium/drivers/r600/r600_shader.c
>> +++ b/src/gallium/drivers/r600/r600_shader.c
>> @@ -9033,8 +9033,12 @@ static const struct r600_shader_tgsi_instruction 
>> eg_shader_tgsi_instruction[] =
>> [TGSI_OPCODE_ARL]   = { ALU_OP0_NOP, tgsi_eg_arl},
>> [TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
>> [TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
>> -   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
>> tgsi_trans_srcx_replicate},
>> -   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
>> +   /* XXX:
>> +* For state trackers other than OpenGL, we'll want to use
>> +* _RECIP_IEEE/_RECIPSQRT_IEEE instead.
>> +*/
>> +   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_CLAMPED, 
>> tgsi_trans_srcx_replicate},
>> +   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_CLAMPED, tgsi_rsq},
>> [TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
>> [TGSI_OPCODE_LOG]   = { ALU_OP0_NOP, tgsi_log},
>> [TGSI_OPCODE_MUL]   = { ALU_OP2_MUL_IEEE, tgsi_op2},
>> @@ -9256,8 +9260,12 @@ static const struct r600_shader_tgsi_instruction 
>> cm_shader_tgsi_instruction[] =
>> [TGSI_OPCODE_ARL]   = { ALU_OP0_NOP, tgsi_eg_arl},
>> [TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
>> [TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
>> -   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
>> cayman_emit_float_instr},
>> -   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_IEEE, 
>> cayman_emit_float_instr},
>> +   /* XXX:
>> +* For state trackers other than OpenGL, we'll want to use
>> +* _RECIP_IEEE/_RECIPSQRT_IEEE instead.
>> +*/
>> +   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_CLAMPED, 
>> cayman_emit_float_instr},
>> +   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_CLAMPED, 
>> cayman_emit_float_instr},
>> [TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
>> [TGSI_OPCODE_LOG]   = { ALU_OP0_NOP, tgsi_log},
>> [TGSI_OPCODE_MUL]   = { ALU_OP2_MUL_IEEE, tgsi_op2},
>> --
>> 2.12.3
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] r600: use the clamped versions of rcp/rsq for eg/cayman.

2017-11-07 Thread Ilia Mirkin
tgsi_rsq appears to ignore the passed-in op and always puts in
ALU_OP1_RECIPSQRT_CLAMPED anyways. It also sticks an absolute value on
the RSQ() argument. This only happens for eg, not cayman. (Probably
why only the rcp_clamped change appeared to be necessary.)

This is odd though, because there's no clamping like that in other
drivers. The trace you made looks fine on both nvc0 and nv50.

On Tue, Nov 7, 2017 at 11:01 PM,   wrote:
> From: Roland Scheidegger 
>
> r600 already used the clamped versions, but for some reason this was
> different to eg/cayman.
> (Note that it has been different since essentially forever, 7 years, since
> df62338c491f2cace1a48f99de78e83b5edd82fd in particular, which changed
> this for r600 but not eg (cayman wasn't supported back then, but probably
> copied this from the eg part later). The commit does not mention any reason
> why this difference should exist.)
> This seems a bit unfortunate, since it would be nice to use ieee arithmetic,
> I have no idea what this could potentially break and no idea if it really
> makes sense going back to legacy-style rcp/rsq...
> This however prevents misrenderings in This War of Mine since using ieee
> muls (ce7a045feeef8cad155f1c9aa07f166e146e3d00), albeit strictly speaking
> only rcp_clamped is necessary for this. It seems likely the root cause is
> some x * rcp(y) calculation where both x and y evaluate to 0. Albeit it
> apparently works with other drivers, not sure what's up with that...
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103544
> ---
>  src/gallium/drivers/r600/r600_shader.c | 16 
>  1 file changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index 6a755bb3fd..62fc4da901 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -9033,8 +9033,12 @@ static const struct r600_shader_tgsi_instruction 
> eg_shader_tgsi_instruction[] =
> [TGSI_OPCODE_ARL]   = { ALU_OP0_NOP, tgsi_eg_arl},
> [TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
> [TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
> -   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
> tgsi_trans_srcx_replicate},
> -   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
> +   /* XXX:
> +* For state trackers other than OpenGL, we'll want to use
> +* _RECIP_IEEE/_RECIPSQRT_IEEE instead.
> +*/
> +   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_CLAMPED, 
> tgsi_trans_srcx_replicate},
> +   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_CLAMPED, tgsi_rsq},
> [TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
> [TGSI_OPCODE_LOG]   = { ALU_OP0_NOP, tgsi_log},
> [TGSI_OPCODE_MUL]   = { ALU_OP2_MUL_IEEE, tgsi_op2},
> @@ -9256,8 +9260,12 @@ static const struct r600_shader_tgsi_instruction 
> cm_shader_tgsi_instruction[] =
> [TGSI_OPCODE_ARL]   = { ALU_OP0_NOP, tgsi_eg_arl},
> [TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
> [TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
> -   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
> cayman_emit_float_instr},
> -   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_IEEE, 
> cayman_emit_float_instr},
> +   /* XXX:
> +* For state trackers other than OpenGL, we'll want to use
> +* _RECIP_IEEE/_RECIPSQRT_IEEE instead.
> +*/
> +   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_CLAMPED, 
> cayman_emit_float_instr},
> +   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_CLAMPED, 
> cayman_emit_float_instr},
> [TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
> [TGSI_OPCODE_LOG]   = { ALU_OP0_NOP, tgsi_log},
> [TGSI_OPCODE_MUL]   = { ALU_OP2_MUL_IEEE, tgsi_op2},
> --
> 2.12.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] mesa: rework how we free gl_shader_program_data

2017-11-07 Thread Tapani Pälli

One question below ...

On 11/08/2017 03:41 AM, Timothy Arceri wrote:

When I introduced gl_shader_program_data one of the intentions was to
fix a bug where a failed linking attempt freed data required by a
currently active program. However I seem to have failed to finish
hooking up the final steps required to have the data hang around.

Here we create a fresh instance of gl_shader_program_data every
time we link. gl_program has a reference to gl_shader_program_data
so it will be freed once the program is no longer active.

Cc: Neil Roberts 
Cc: "17.2 17.3" 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102177
---
  src/mesa/main/shaderobj.c   | 71 +
  src/mesa/main/shaderobj.h   |  3 ++
  src/mesa/program/ir_to_mesa.cpp |  2 ++
  3 files changed, 35 insertions(+), 41 deletions(-)

diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index e2103bcde49..5501a0157db 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -192,7 +192,30 @@ _mesa_lookup_shader_err(struct gl_context *ctx, GLuint 
name, const char *caller)
  /**/
  /*** Shader Program object functions***/
  /**/
+static void
+free_shader_program_data(struct gl_shader_program_data *data)
+{
+   if (data->UniformStorage) {
+  for (unsigned i = 0; i < data->NumUniformStorage; ++i)
+ _mesa_uniform_detach_all_driver_storage(&data->UniformStorage[i]);
+  ralloc_free(data->UniformStorage);
+   }
+
+   assert(data->InfoLog != NULL);
+   ralloc_free(data->InfoLog);
+
+   ralloc_free(data->UniformBlocks);
+
+   ralloc_free(data->ShaderStorageBlocks);
  
+   if (data->AtomicBuffers) {

+  ralloc_free(data->AtomicBuffers);
+   }
+
+   if (data->ProgramResourceList) {
+  ralloc_free(data->ProgramResourceList);
+   }
+}
  
  void

  _mesa_reference_shader_program_data(struct gl_context *ctx,
@@ -209,6 +232,7 @@ _mesa_reference_shader_program_data(struct gl_context *ctx,
  
if (p_atomic_dec_zero(&oldData->RefCount)) {

   assert(ctx);
+ free_shader_program_data(oldData);
   ralloc_free(oldData);
}
  
@@ -259,14 +283,16 @@ _mesa_reference_shader_program_(struct gl_context *ctx,

 }
  }
  
-static struct gl_shader_program_data *

-create_shader_program_data()
+struct gl_shader_program_data *
+_mesa_create_shader_program_data()
  {
 struct gl_shader_program_data *data;
 data = rzalloc(NULL, struct gl_shader_program_data);
 if (data)
data->RefCount = 1;
  
+   data->InfoLog = ralloc_strdup(data, "");

+
 return data;
  }
  
@@ -286,8 +312,6 @@ init_shader_program(struct gl_shader_program *prog)

 prog->TransformFeedback.BufferMode = GL_INTERLEAVED_ATTRIBS;
  
 exec_list_make_empty(&prog->EmptyUniformLocations);

-
-   prog->data->InfoLog = ralloc_strdup(prog->data, "");
  }
  
  /**

@@ -300,7 +324,7 @@ _mesa_new_shader_program(GLuint name)
 shProg = rzalloc(NULL, struct gl_shader_program);
 if (shProg) {
shProg->Name = name;
-  shProg->data = create_shader_program_data();
+  shProg->data = _mesa_create_shader_program_data();
if (!shProg->data) {
   ralloc_free(shProg);
   return NULL;
@@ -310,7 +334,6 @@ _mesa_new_shader_program(GLuint name)
 return shProg;
  }
  
-

  /**
   * Clear (free) the shader program state that gets produced by linking.
   */
@@ -325,17 +348,6 @@ _mesa_clear_shader_program_data(struct gl_context *ctx,
}
 }
  
-   shProg->data->linked_stages = 0;

-
-   if (shProg->data->UniformStorage) {
-  for (unsigned i = 0; i < shProg->data->NumUniformStorage; ++i)
- _mesa_uniform_detach_all_driver_storage(&shProg->data->
-UniformStorage[i]);
-  ralloc_free(shProg->data->UniformStorage);
-  shProg->data->NumUniformStorage = 0;
-  shProg->data->UniformStorage = NULL;
-   }
-
 if (shProg->UniformRemapTable) {
ralloc_free(shProg->UniformRemapTable);
shProg->NumUniformRemapTable = 0;
@@ -347,29 +359,7 @@ _mesa_clear_shader_program_data(struct gl_context *ctx,
shProg->UniformHash = NULL;
 }
  
-   assert(shProg->data->InfoLog != NULL);

-   ralloc_free(shProg->data->InfoLog);
-   shProg->data->InfoLog = ralloc_strdup(shProg->data, "");
-
-   ralloc_free(shProg->data->UniformBlocks);
-   shProg->data->UniformBlocks = NULL;
-   shProg->data->NumUniformBlocks = 0;
-
-   ralloc_free(shProg->data->ShaderStorageBlocks);
-   shProg->data->ShaderStorageBlocks = NULL;
-   shProg->data->NumShaderStorageBlocks = 0;
-
-   if (shProg->data->AtomicBuffers) {
-  ralloc_free(shProg->data->AtomicBuffers);
-  shProg->data->AtomicBuffers = NULL;
-  shProg->data->NumAtomicBuffers = 0;
-   }
-
-   if (shProg->data->ProgramResourceList) {
- 

Re: [Mesa-dev] [PATCH 2/3] glsl: use the correct parent when allocating program data members

2017-11-07 Thread Kenneth Graunke
On Tuesday, November 7, 2017 5:41:58 PM PST Timothy Arceri wrote:
> Cc: "17.2 17.3" 
> ---
>  src/compiler/glsl/link_atomics.cpp  | 4 ++--
>  src/compiler/glsl/link_uniforms.cpp | 2 +-
>  src/compiler/glsl/linker.cpp| 6 +++---
>  src/compiler/glsl/shader_cache.cpp  | 4 ++--
>  4 files changed, 8 insertions(+), 8 deletions(-)

Patches 1-2 are:
Reviewed-by: Kenneth Graunke 

Jordan might want to take a look at patch 1 as well...


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] mesa: rework how we free gl_shader_program_data

2017-11-07 Thread Kenneth Graunke
On Tuesday, November 7, 2017 5:41:59 PM PST Timothy Arceri wrote:
> When I introduced gl_shader_program_data one of the intentions was to
> fix a bug where a failed linking attempt freed data required by a
> currently active program. However I seem to have failed to finish
> hooking up the final steps required to have the data hang around.
> 
> Here we create a fresh instance of gl_shader_program_data every
> time we link. gl_program has a reference to gl_shader_program_data
> so it will be freed once the program is no longer active.
> 
> Cc: Neil Roberts 
> Cc: "17.2 17.3" 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102177
> ---
>  src/mesa/main/shaderobj.c   | 71 
> +
>  src/mesa/main/shaderobj.h   |  3 ++
>  src/mesa/program/ir_to_mesa.cpp |  2 ++
>  3 files changed, 35 insertions(+), 41 deletions(-)
> 
> diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
> index e2103bcde49..5501a0157db 100644
> --- a/src/mesa/main/shaderobj.c
> +++ b/src/mesa/main/shaderobj.c
> @@ -192,7 +192,30 @@ _mesa_lookup_shader_err(struct gl_context *ctx, GLuint 
> name, const char *caller)
>  /**/
>  /*** Shader Program object functions***/
>  /**/
> +static void
> +free_shader_program_data(struct gl_shader_program_data *data)
> +{

I'm not sure why you need to ralloc_free things explicitly here.

Most of these fields appear to be ralloc'd using "data" as the context.
So, I'm not sure why you need to ralloc_free them all explicitly here.
When you ralloc_free data (in the caller), it will free them too.

Some of them weren't, but isn't that what you fixed in patch 2?

Otherwise, this looks good to me...hopefully Neil can take a look too...

> +   if (data->UniformStorage) {
> +  for (unsigned i = 0; i < data->NumUniformStorage; ++i)
> + _mesa_uniform_detach_all_driver_storage(&data->UniformStorage[i]);
> +  ralloc_free(data->UniformStorage);
> +   }
> +
> +   assert(data->InfoLog != NULL);
> +   ralloc_free(data->InfoLog);
> +
> +   ralloc_free(data->UniformBlocks);
> +
> +   ralloc_free(data->ShaderStorageBlocks);
> +   if (data->AtomicBuffers) {
> +  ralloc_free(data->AtomicBuffers);
> +   }
> +
> +   if (data->ProgramResourceList) {
> +  ralloc_free(data->ProgramResourceList);
> +   }
> +}
>  
>  void
>  _mesa_reference_shader_program_data(struct gl_context *ctx,
> @@ -209,6 +232,7 @@ _mesa_reference_shader_program_data(struct gl_context 
> *ctx,
>  
>if (p_atomic_dec_zero(&oldData->RefCount)) {
>   assert(ctx);
> + free_shader_program_data(oldData);
>   ralloc_free(oldData);
>}
>  
> @@ -259,14 +283,16 @@ _mesa_reference_shader_program_(struct gl_context *ctx,
> }
>  }
>  
> -static struct gl_shader_program_data *
> -create_shader_program_data()
> +struct gl_shader_program_data *
> +_mesa_create_shader_program_data()
>  {
> struct gl_shader_program_data *data;
> data = rzalloc(NULL, struct gl_shader_program_data);
> if (data)
>data->RefCount = 1;
>  
> +   data->InfoLog = ralloc_strdup(data, "");
> +
> return data;
>  }
>  
> @@ -286,8 +312,6 @@ init_shader_program(struct gl_shader_program *prog)
> prog->TransformFeedback.BufferMode = GL_INTERLEAVED_ATTRIBS;
>  
> exec_list_make_empty(&prog->EmptyUniformLocations);
> -
> -   prog->data->InfoLog = ralloc_strdup(prog->data, "");
>  }
>  
>  /**
> @@ -300,7 +324,7 @@ _mesa_new_shader_program(GLuint name)
> shProg = rzalloc(NULL, struct gl_shader_program);
> if (shProg) {
>shProg->Name = name;
> -  shProg->data = create_shader_program_data();
> +  shProg->data = _mesa_create_shader_program_data();
>if (!shProg->data) {
>   ralloc_free(shProg);
>   return NULL;
> @@ -310,7 +334,6 @@ _mesa_new_shader_program(GLuint name)
> return shProg;
>  }
>  
> -

Bonus whitespace change

>  /**
>   * Clear (free) the shader program state that gets produced by linking.
>   */
> @@ -325,17 +348,6 @@ _mesa_clear_shader_program_data(struct gl_context *ctx,
>}
> }
>  
> -   shProg->data->linked_stages = 0;
> -
> -   if (shProg->data->UniformStorage) {
> -  for (unsigned i = 0; i < shProg->data->NumUniformStorage; ++i)
> - _mesa_uniform_detach_all_driver_storage(&shProg->data->
> -UniformStorage[i]);
> -  ralloc_free(shProg->data->UniformStorage);
> -  shProg->data->NumUniformStorage = 0;
> -  shProg->data->UniformStorage = NULL;
> -   }
> -
> if (shProg->UniformRemapTable) {
>ralloc_free(shProg->UniformRemapTable);
>shProg->NumUniformRemapTable = 0;
> @@ -347,29 +359,7 @@ _mesa_clear_shader_program_data(struct gl_context *ctx,
>shProg->UniformHash = NULL;
> }
>  
> -   assert(shProg->data

Re: [Mesa-dev] [PATCH v3] i965: expose SRGB visuals and turn on EGL_KHR_gl_colorspace

2017-11-07 Thread Kenneth Graunke
On Thursday, November 2, 2017 1:48:34 AM PST Tapani Pälli wrote:
> Patch exposes sRGB visuals and adds DRI integer query support for
> __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB. Further changes make sure that
> we mark if the app explicitly wanted sRGB and for these framebuffers
> we don't turn sRGB off in intel_gles3_srgb_workaround. This way we
> keep compatibility for existing applications relying on default sRGB
> and ony add more visual support.
> 
> With this change, following dEQP tests start to pass:
> 
>dEQP-EGL.functional.wide_color.window__colorspace_srgb
>dEQP-EGL.functional.wide_color.pbuffer__colorspace_srgb
> 
> v2: some code cleanup (Emil Velikov)
> update num_formats correctly (reported by dev...@gmail.com)
> 
> v3: cleanup, remove redundant is_srgb
> rename explicit_srgb as 'need_srgb' to follow style better
> 
> Signed-off-by: Tapani Pälli 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102264
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102354
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102503
> ---
>  src/mesa/drivers/dri/i965/brw_context.c  | 16 ++--
>  src/mesa/drivers/dri/i965/intel_fbo.h|  5 +
>  src/mesa/drivers/dri/i965/intel_screen.c | 13 -
>  3 files changed, 27 insertions(+), 7 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
> b/src/mesa/drivers/dri/i965/brw_context.c
> index 037e349fdb..0b8134ae9f 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -1142,8 +1142,8 @@ intelUnbindContext(__DRIcontext * driContextPriv)
>   *
>   * Unfortunately, renderbuffer setup happens before a context is created.  So
>   * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
> - * context (without an sRGB visual, though we don't have sRGB visuals exposed
> - * yet), we go turn that back off before anyone finds out.
> + * context (without an sRGB visual), we go turn that back off before anyone
> + * finds out.
>   */
>  static void
>  intel_gles3_srgb_workaround(struct brw_context *brw,
> @@ -1154,15 +1154,19 @@ intel_gles3_srgb_workaround(struct brw_context *brw,
> if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
>return;
>  
> -   /* Some day when we support the sRGB capable bit on visuals available for
> -* GLES, we'll need to respect that and not disable things here.
> -*/
> -   fb->Visual.sRGBCapable = false;
> for (int i = 0; i < BUFFER_COUNT; i++) {
>struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
> +
> +  /* Check if sRGB was specifically asked for. */
> +  struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i);
> +  if (irb && irb->need_srgb)
> + return;
> +
>if (rb)
>   rb->Format = _mesa_get_srgb_format_linear(rb->Format);
> }
> +   /* Disable sRGB from framebuffers that are not compatible. */
> +   fb->Visual.sRGBCapable = false;
>  }
>  
>  GLboolean
> diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h 
> b/src/mesa/drivers/dri/i965/intel_fbo.h
> index 1e2494286b..608a1c4e7d 100644
> --- a/src/mesa/drivers/dri/i965/intel_fbo.h
> +++ b/src/mesa/drivers/dri/i965/intel_fbo.h
> @@ -116,6 +116,11 @@ struct intel_renderbuffer
>  * for the duration of a mapping.
>  */
> bool singlesample_mt_is_tmp;
> +
> +   /**
> +* Set to true when application specifically asked for a sRGB visual.
> +*/
> +   bool need_srgb;
>  };
>  
>  
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index 10064c3236..90303df899 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -1384,6 +1384,9 @@ brw_query_renderer_integer(__DRIscreen *dri_screen,
> 0, BRW_CONTEXT_MEDIUM_PRIORITY) == 0)
>   value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_MEDIUM;
>return 0;
> +   case __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB:
> +  value[0] = 
> screen->mesa_format_supports_render[MESA_FORMAT_B8G8R8A8_SRGB];

I think it would be better to do:

  value[0] = true;

as we may add other sRGB formats in the future.  In particular, I'm
thinking about Mario's 10-bit visual series.  We'd probably want to
expose this property regardless of whether we expose B8G8B8A8_SRGB,
R8G8B8A8_SRGB, B10G10R10A2_SRGB, or whatever...and checking one of
them would be confusing.

Plus, it's always true, so there's not much use in checking it...

> +  return 0;
> default:
>return driQueryRendererIntegerCommon(dri_screen, param, value);
> }
> @@ -1544,13 +1547,18 @@ intelCreateBuffer(__DRIscreen *dri_screen,
>fb->Visual.sRGBCapable = true;
> }
>  
> +   /* mesaVis->sRGBCapable was set, user is asking for sRGB */
> +   bool srgb_cap_set = mesaVis->redBits >= 8 && mesaVis->sRGBCapable;
> +
> /* setup the hardware-based renderbuffers */
> rb = intel_create_

[Mesa-dev] [PATCH 2/2] r600: use the clamped versions of rcp/rsq for eg/cayman.

2017-11-07 Thread sroland
From: Roland Scheidegger 

r600 already used the clamped versions, but for some reason this was
different to eg/cayman.
(Note that it has been different since essentially forever, 7 years, since
df62338c491f2cace1a48f99de78e83b5edd82fd in particular, which changed
this for r600 but not eg (cayman wasn't supported back then, but probably
copied this from the eg part later). The commit does not mention any reason
why this difference should exist.)
This seems a bit unfortunate, since it would be nice to use ieee arithmetic,
I have no idea what this could potentially break and no idea if it really
makes sense going back to legacy-style rcp/rsq...
This however prevents misrenderings in This War of Mine since using ieee
muls (ce7a045feeef8cad155f1c9aa07f166e146e3d00), albeit strictly speaking
only rcp_clamped is necessary for this. It seems likely the root cause is
some x * rcp(y) calculation where both x and y evaluate to 0. Albeit it
apparently works with other drivers, not sure what's up with that...

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103544
---
 src/gallium/drivers/r600/r600_shader.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 6a755bb3fd..62fc4da901 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -9033,8 +9033,12 @@ static const struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_ARL]   = { ALU_OP0_NOP, tgsi_eg_arl},
[TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
[TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
-   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
tgsi_trans_srcx_replicate},
-   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
+   /* XXX:
+* For state trackers other than OpenGL, we'll want to use
+* _RECIP_IEEE/_RECIPSQRT_IEEE instead.
+*/
+   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_CLAMPED, 
tgsi_trans_srcx_replicate},
+   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_CLAMPED, tgsi_rsq},
[TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG]   = { ALU_OP0_NOP, tgsi_log},
[TGSI_OPCODE_MUL]   = { ALU_OP2_MUL_IEEE, tgsi_op2},
@@ -9256,8 +9260,12 @@ static const struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] =
[TGSI_OPCODE_ARL]   = { ALU_OP0_NOP, tgsi_eg_arl},
[TGSI_OPCODE_MOV]   = { ALU_OP1_MOV, tgsi_op2},
[TGSI_OPCODE_LIT]   = { ALU_OP0_NOP, tgsi_lit},
-   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_IEEE, 
cayman_emit_float_instr},
-   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_IEEE, 
cayman_emit_float_instr},
+   /* XXX:
+* For state trackers other than OpenGL, we'll want to use
+* _RECIP_IEEE/_RECIPSQRT_IEEE instead.
+*/
+   [TGSI_OPCODE_RCP]   = { ALU_OP1_RECIP_CLAMPED, 
cayman_emit_float_instr},
+   [TGSI_OPCODE_RSQ]   = { ALU_OP1_RECIPSQRT_CLAMPED, 
cayman_emit_float_instr},
[TGSI_OPCODE_EXP]   = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG]   = { ALU_OP0_NOP, tgsi_log},
[TGSI_OPCODE_MUL]   = { ALU_OP2_MUL_IEEE, tgsi_op2},
-- 
2.12.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] r600: use min_dx10/max_dx10 instead of min/max_dx10

2017-11-07 Thread sroland
From: Roland Scheidegger 

I believe this is the safe thing to do, especially ever since the driver
actually generates NaNs for muls too.
Albeit since the radeon ISA docs are inaccurate/wrong there, I'm not
entirely sure what the non-dx10 versions do, but (as required by dx10)
the dx10 versions should pick a non-nan source over a nan source.
Other drivers presumably do the same (radeonsi, llvmpipe).
This was shown to make some difference for bug 103544, albeit it is not
required to fix it.
---
 src/gallium/drivers/r600/r600_shader.c  | 12 ++--
 src/gallium/drivers/r600/sb/sb_expr.cpp |  2 ++
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 188fbc9d47..6a755bb3fd 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -8844,8 +8844,8 @@ static const struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[]
[TGSI_OPCODE_DP3]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DP4]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST]   = { ALU_OP0_NOP, tgsi_opdst},
-   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN, tgsi_op2},
-   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX, tgsi_op2},
+   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN_DX10, tgsi_op2},
+   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX_DX10, tgsi_op2},
[TGSI_OPCODE_SLT]   = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE]   = { ALU_OP2_SETGE, tgsi_op2},
[TGSI_OPCODE_MAD]   = { ALU_OP3_MULADD_IEEE, tgsi_op3},
@@ -9042,8 +9042,8 @@ static const struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_DP3]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DP4]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST]   = { ALU_OP0_NOP, tgsi_opdst},
-   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN, tgsi_op2},
-   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX, tgsi_op2},
+   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN_DX10, tgsi_op2},
+   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX_DX10, tgsi_op2},
[TGSI_OPCODE_SLT]   = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE]   = { ALU_OP2_SETGE, tgsi_op2},
[TGSI_OPCODE_MAD]   = { ALU_OP3_MULADD_IEEE, tgsi_op3},
@@ -9265,8 +9265,8 @@ static const struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] =
[TGSI_OPCODE_DP3]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DP4]   = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST]   = { ALU_OP0_NOP, tgsi_opdst},
-   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN, tgsi_op2},
-   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX, tgsi_op2},
+   [TGSI_OPCODE_MIN]   = { ALU_OP2_MIN_DX10, tgsi_op2},
+   [TGSI_OPCODE_MAX]   = { ALU_OP2_MAX_DX10, tgsi_op2},
[TGSI_OPCODE_SLT]   = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE]   = { ALU_OP2_SETGE, tgsi_op2},
[TGSI_OPCODE_MAD]   = { ALU_OP3_MULADD_IEEE, tgsi_op3},
diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp 
b/src/gallium/drivers/r600/sb/sb_expr.cpp
index 3dd3a4815b..7a5d62c8e8 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.cpp
+++ b/src/gallium/drivers/r600/sb/sb_expr.cpp
@@ -753,7 +753,9 @@ bool expr_handler::fold_alu_op2(alu_node& n) {
n.bc.src[0].abs == n.bc.src[1].abs) {
switch (n.bc.op) {
case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
+   case ALU_OP2_MIN_DX10:
case ALU_OP2_MAX:
+   case ALU_OP2_MAX_DX10:
convert_to_mov(n, v0, n.bc.src[0].neg, 
n.bc.src[0].abs);
return fold_alu_op1(n);
case ALU_OP2_ADD:  // (ADD x, x) => (MUL x, 2)
-- 
2.12.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] etnaviv: Add sampler TS support

2017-11-07 Thread Wladimir
> +/* Return true if a resource has a TS, and it is valid for at least one 
> level */
> +static bool
> +etna_resource_has_valid_ts(struct pipe_resource *prsc)
> +{
> +   struct etna_resource *rsc = etna_resource(prsc);
> +
> +   if (!rsc->ts_bo)
> +  return false;
> +
> +   for (int level = 0; level <= rsc->base.last_level; level++)
> +  if (rsc->levels[level].ts_valid)
> + return true;
> +   return false;
> +}

>  static void
> -etna_update_sampler_source(struct pipe_sampler_view *view)
> +etna_update_sampler_source(struct etna_context *ctx, struct 
> pipe_sampler_view *view, int num)
>  {
> struct etna_resource *base = etna_resource(view->texture);
> struct etna_resource *to = base, *from = base;
> +   bool enable_sampler_ts = false;
>
> if (base->external && etna_resource_newer(etna_resource(base->external), 
> base))
>from = etna_resource(base->external);
> @@ -128,12 +199,19 @@ etna_update_sampler_source(struct pipe_sampler_view 
> *view)
>etna_copy_resource(view->context, &to->base, &from->base, 0,
>   view->texture->last_level);
>to->seqno = from->seqno;
> -   } else if ((to == from) && etna_resource_needs_flush(to)) {
> -  /* Resolve TS if needed, remove when adding sampler TS */
> -  etna_copy_resource(view->context, &to->base, &from->base, 0,
> - view->texture->last_level);
> -  to->flush_seqno = from->seqno;
> +   } else if ((to == from) &&
> + etna_resource_needs_flush(to) &&
> + etna_resource_has_valid_ts(&to->base)) {

I just realized - would it maybe make sense to roll the call to
etna_resource_has_valid_ts into etna_resource_needs_flush?

etna_resource_needs_flush is only called from two places - here, and
in resource_flush, where it also determines whether to do a
resolve-to-self, but before presenting the image. There it also only
makes sense to do if the resource has at least a valid TS.

Wladimir
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] glsl: use the correct parent when allocating program data members

2017-11-07 Thread Timothy Arceri
Cc: "17.2 17.3" 
---
 src/compiler/glsl/link_atomics.cpp  | 4 ++--
 src/compiler/glsl/link_uniforms.cpp | 2 +-
 src/compiler/glsl/linker.cpp| 6 +++---
 src/compiler/glsl/shader_cache.cpp  | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/compiler/glsl/link_atomics.cpp 
b/src/compiler/glsl/link_atomics.cpp
index d26e62a35b3..d4a6eed639f 100644
--- a/src/compiler/glsl/link_atomics.cpp
+++ b/src/compiler/glsl/link_atomics.cpp
@@ -207,7 +207,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
active_atomic_buffer *abs =
   find_active_atomic_counters(ctx, prog, &num_buffers);
 
-   prog->data->AtomicBuffers = rzalloc_array(prog, gl_active_atomic_buffer,
+   prog->data->AtomicBuffers = rzalloc_array(prog->data, 
gl_active_atomic_buffer,
  num_buffers);
prog->data->NumAtomicBuffers = num_buffers;
 
@@ -270,7 +270,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
  struct gl_program *gl_prog = prog->_LinkedShaders[j]->Program;
  gl_prog->info.num_abos = num_atomic_buffers[j];
  gl_prog->sh.AtomicBuffers =
-rzalloc_array(prog, gl_active_atomic_buffer *,
+rzalloc_array(gl_prog, gl_active_atomic_buffer *,
   num_atomic_buffers[j]);
 
  unsigned intra_stage_idx = 0;
diff --git a/src/compiler/glsl/link_uniforms.cpp 
b/src/compiler/glsl/link_uniforms.cpp
index 3da015eb7fd..30c48b94fce 100644
--- a/src/compiler/glsl/link_uniforms.cpp
+++ b/src/compiler/glsl/link_uniforms.cpp
@@ -1333,7 +1333,7 @@ link_assign_uniform_storage(struct gl_context *ctx,
 
union gl_constant_value *data;
if (prog->data->UniformStorage == NULL) {
-  prog->data->UniformStorage = rzalloc_array(prog,
+  prog->data->UniformStorage = rzalloc_array(prog->data,
  struct gl_uniform_storage,
  
prog->data->NumUniformStorage);
   data = rzalloc_array(prog->data->UniformStorage,
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index c29924ca251..7c9a290b5de 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -1201,8 +1201,8 @@ interstage_cross_validate_uniform_blocks(struct 
gl_shader_program *prog,
   }
 
   for (unsigned int j = 0; j < sh_num_blocks; j++) {
- int index = link_cross_validate_uniform_block(prog, &blks, num_blks,
-   sh_blks[j]);
+ int index = link_cross_validate_uniform_block(prog->data, &blks,
+   num_blks, sh_blks[j]);
 
  if (index == -1) {
 linker_error(prog, "buffer block `%s' has mismatching "
@@ -3610,7 +3610,7 @@ add_program_resource(struct gl_shader_program *prog,
   return true;
 
prog->data->ProgramResourceList =
-  reralloc(prog,
+  reralloc(prog->data,
prog->data->ProgramResourceList,
gl_program_resource,
prog->data->NumProgramResourceList + 1);
diff --git a/src/compiler/glsl/shader_cache.cpp 
b/src/compiler/glsl/shader_cache.cpp
index 89da19914fe..da1c72d42da 100644
--- a/src/compiler/glsl/shader_cache.cpp
+++ b/src/compiler/glsl/shader_cache.cpp
@@ -499,7 +499,7 @@ read_uniforms(struct blob_reader *metadata, struct 
gl_shader_program *prog)
prog->data->NumUniformStorage = blob_read_uint32(metadata);
prog->data->NumUniformDataSlots = blob_read_uint32(metadata);
 
-   uniforms = rzalloc_array(prog, struct gl_uniform_storage,
+   uniforms = rzalloc_array(prog->data, struct gl_uniform_storage,
 prog->data->NumUniformStorage);
prog->data->UniformStorage = uniforms;
 
@@ -955,7 +955,7 @@ read_program_resource_list(struct blob_reader *metadata,
prog->data->NumProgramResourceList = blob_read_uint32(metadata);
 
prog->data->ProgramResourceList =
-  ralloc_array(prog, gl_program_resource,
+  ralloc_array(prog->data, gl_program_resource,
prog->data->NumProgramResourceList);
 
for (unsigned i = 0; i < prog->data->NumProgramResourceList; i++) {
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] mesa: rework how we free gl_shader_program_data

2017-11-07 Thread Timothy Arceri
When I introduced gl_shader_program_data one of the intentions was to
fix a bug where a failed linking attempt freed data required by a
currently active program. However I seem to have failed to finish
hooking up the final steps required to have the data hang around.

Here we create a fresh instance of gl_shader_program_data every
time we link. gl_program has a reference to gl_shader_program_data
so it will be freed once the program is no longer active.

Cc: Neil Roberts 
Cc: "17.2 17.3" 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102177
---
 src/mesa/main/shaderobj.c   | 71 +
 src/mesa/main/shaderobj.h   |  3 ++
 src/mesa/program/ir_to_mesa.cpp |  2 ++
 3 files changed, 35 insertions(+), 41 deletions(-)

diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index e2103bcde49..5501a0157db 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -192,7 +192,30 @@ _mesa_lookup_shader_err(struct gl_context *ctx, GLuint 
name, const char *caller)
 /**/
 /*** Shader Program object functions***/
 /**/
+static void
+free_shader_program_data(struct gl_shader_program_data *data)
+{
+   if (data->UniformStorage) {
+  for (unsigned i = 0; i < data->NumUniformStorage; ++i)
+ _mesa_uniform_detach_all_driver_storage(&data->UniformStorage[i]);
+  ralloc_free(data->UniformStorage);
+   }
+
+   assert(data->InfoLog != NULL);
+   ralloc_free(data->InfoLog);
+
+   ralloc_free(data->UniformBlocks);
+
+   ralloc_free(data->ShaderStorageBlocks);
 
+   if (data->AtomicBuffers) {
+  ralloc_free(data->AtomicBuffers);
+   }
+
+   if (data->ProgramResourceList) {
+  ralloc_free(data->ProgramResourceList);
+   }
+}
 
 void
 _mesa_reference_shader_program_data(struct gl_context *ctx,
@@ -209,6 +232,7 @@ _mesa_reference_shader_program_data(struct gl_context *ctx,
 
   if (p_atomic_dec_zero(&oldData->RefCount)) {
  assert(ctx);
+ free_shader_program_data(oldData);
  ralloc_free(oldData);
   }
 
@@ -259,14 +283,16 @@ _mesa_reference_shader_program_(struct gl_context *ctx,
}
 }
 
-static struct gl_shader_program_data *
-create_shader_program_data()
+struct gl_shader_program_data *
+_mesa_create_shader_program_data()
 {
struct gl_shader_program_data *data;
data = rzalloc(NULL, struct gl_shader_program_data);
if (data)
   data->RefCount = 1;
 
+   data->InfoLog = ralloc_strdup(data, "");
+
return data;
 }
 
@@ -286,8 +312,6 @@ init_shader_program(struct gl_shader_program *prog)
prog->TransformFeedback.BufferMode = GL_INTERLEAVED_ATTRIBS;
 
exec_list_make_empty(&prog->EmptyUniformLocations);
-
-   prog->data->InfoLog = ralloc_strdup(prog->data, "");
 }
 
 /**
@@ -300,7 +324,7 @@ _mesa_new_shader_program(GLuint name)
shProg = rzalloc(NULL, struct gl_shader_program);
if (shProg) {
   shProg->Name = name;
-  shProg->data = create_shader_program_data();
+  shProg->data = _mesa_create_shader_program_data();
   if (!shProg->data) {
  ralloc_free(shProg);
  return NULL;
@@ -310,7 +334,6 @@ _mesa_new_shader_program(GLuint name)
return shProg;
 }
 
-
 /**
  * Clear (free) the shader program state that gets produced by linking.
  */
@@ -325,17 +348,6 @@ _mesa_clear_shader_program_data(struct gl_context *ctx,
   }
}
 
-   shProg->data->linked_stages = 0;
-
-   if (shProg->data->UniformStorage) {
-  for (unsigned i = 0; i < shProg->data->NumUniformStorage; ++i)
- _mesa_uniform_detach_all_driver_storage(&shProg->data->
-UniformStorage[i]);
-  ralloc_free(shProg->data->UniformStorage);
-  shProg->data->NumUniformStorage = 0;
-  shProg->data->UniformStorage = NULL;
-   }
-
if (shProg->UniformRemapTable) {
   ralloc_free(shProg->UniformRemapTable);
   shProg->NumUniformRemapTable = 0;
@@ -347,29 +359,7 @@ _mesa_clear_shader_program_data(struct gl_context *ctx,
   shProg->UniformHash = NULL;
}
 
-   assert(shProg->data->InfoLog != NULL);
-   ralloc_free(shProg->data->InfoLog);
-   shProg->data->InfoLog = ralloc_strdup(shProg->data, "");
-
-   ralloc_free(shProg->data->UniformBlocks);
-   shProg->data->UniformBlocks = NULL;
-   shProg->data->NumUniformBlocks = 0;
-
-   ralloc_free(shProg->data->ShaderStorageBlocks);
-   shProg->data->ShaderStorageBlocks = NULL;
-   shProg->data->NumShaderStorageBlocks = 0;
-
-   if (shProg->data->AtomicBuffers) {
-  ralloc_free(shProg->data->AtomicBuffers);
-  shProg->data->AtomicBuffers = NULL;
-  shProg->data->NumAtomicBuffers = 0;
-   }
-
-   if (shProg->data->ProgramResourceList) {
-  ralloc_free(shProg->data->ProgramResourceList);
-  shProg->data->ProgramResourceList = NULL;
-  shProg->data->NumProgramResourceLis

[Mesa-dev] [PATCH 1/3] glsl: drop cache_fallback

2017-11-07 Thread Timothy Arceri
This turned out to be a dead end, it is much easier and less error
prone to just cache the IR used by the drivers backend e.g. TGSI or
NIR.

Cc: "17.2 17.3" 
---
 src/compiler/glsl/link_uniforms.cpp | 15 ++-
 src/compiler/glsl/linker.cpp| 85 +
 src/compiler/glsl/shader_cache.cpp  |  2 +-
 src/mesa/main/mtypes.h  |  7 ++-
 src/mesa/main/shaderobj.c   | 23 +-
 5 files changed, 55 insertions(+), 77 deletions(-)

diff --git a/src/compiler/glsl/link_uniforms.cpp 
b/src/compiler/glsl/link_uniforms.cpp
index 7d141549f55..3da015eb7fd 100644
--- a/src/compiler/glsl/link_uniforms.cpp
+++ b/src/compiler/glsl/link_uniforms.cpp
@@ -1400,13 +1400,6 @@ link_assign_uniform_storage(struct gl_context *ctx,
  sizeof(shader->Program->sh.SamplerTargets));
}
 
-   /* If this is a fallback compile for a cache miss we already have the
-* correct uniform mappings and we don't want to reinitialise uniforms so
-* just return now.
-*/
-   if (prog->data->cache_fallback)
-  return;
-
 #ifndef NDEBUG
for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
   assert(prog->data->UniformStorage[i].storage != NULL ||
@@ -1431,11 +1424,9 @@ void
 link_assign_uniform_locations(struct gl_shader_program *prog,
   struct gl_context *ctx)
 {
-   if (!prog->data->cache_fallback) {
-  ralloc_free(prog->data->UniformStorage);
-  prog->data->UniformStorage = NULL;
-  prog->data->NumUniformStorage = 0;
-   }
+   ralloc_free(prog->data->UniformStorage);
+   prog->data->UniformStorage = NULL;
+   prog->data->NumUniformStorage = 0;
 
if (prog->UniformHash != NULL) {
   prog->UniformHash->clear();
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index eed885052aa..c29924ca251 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -2264,8 +2264,7 @@ link_intrastage_shaders(void *mem_ctx,
   return NULL;
}
 
-   if (!prog->data->cache_fallback)
-  _mesa_reference_shader_program_data(ctx, &gl_prog->sh.data, prog->data);
+   _mesa_reference_shader_program_data(ctx, &gl_prog->sh.data, prog->data);
 
/* Don't use _mesa_reference_program() just take ownership */
linked->Program = gl_prog;
@@ -2322,34 +2321,32 @@ link_intrastage_shaders(void *mem_ctx,
v.run(linked->ir);
v.fixup_unnamed_interface_types();
 
-   if (!prog->data->cache_fallback) {
-  /* Link up uniform blocks defined within this stage. */
-  link_uniform_blocks(mem_ctx, ctx, prog, linked, &ubo_blocks,
-  &num_ubo_blocks, &ssbo_blocks, &num_ssbo_blocks);
+   /* Link up uniform blocks defined within this stage. */
+   link_uniform_blocks(mem_ctx, ctx, prog, linked, &ubo_blocks,
+   &num_ubo_blocks, &ssbo_blocks, &num_ssbo_blocks);
 
-  if (!prog->data->LinkStatus) {
- _mesa_delete_linked_shader(ctx, linked);
- return NULL;
-  }
+   if (!prog->data->LinkStatus) {
+  _mesa_delete_linked_shader(ctx, linked);
+  return NULL;
+   }
 
-  /* Copy ubo blocks to linked shader list */
-  linked->Program->sh.UniformBlocks =
- ralloc_array(linked, gl_uniform_block *, num_ubo_blocks);
-  ralloc_steal(linked, ubo_blocks);
-  for (unsigned i = 0; i < num_ubo_blocks; i++) {
- linked->Program->sh.UniformBlocks[i] = &ubo_blocks[i];
-  }
-  linked->Program->info.num_ubos = num_ubo_blocks;
-
-  /* Copy ssbo blocks to linked shader list */
-  linked->Program->sh.ShaderStorageBlocks =
- ralloc_array(linked, gl_uniform_block *, num_ssbo_blocks);
-  ralloc_steal(linked, ssbo_blocks);
-  for (unsigned i = 0; i < num_ssbo_blocks; i++) {
- linked->Program->sh.ShaderStorageBlocks[i] = &ssbo_blocks[i];
-  }
-  linked->Program->info.num_ssbos = num_ssbo_blocks;
+   /* Copy ubo blocks to linked shader list */
+   linked->Program->sh.UniformBlocks =
+  ralloc_array(linked, gl_uniform_block *, num_ubo_blocks);
+   ralloc_steal(linked, ubo_blocks);
+   for (unsigned i = 0; i < num_ubo_blocks; i++) {
+  linked->Program->sh.UniformBlocks[i] = &ubo_blocks[i];
+   }
+   linked->Program->info.num_ubos = num_ubo_blocks;
+
+   /* Copy ssbo blocks to linked shader list */
+   linked->Program->sh.ShaderStorageBlocks =
+  ralloc_array(linked, gl_uniform_block *, num_ssbo_blocks);
+   ralloc_steal(linked, ssbo_blocks);
+   for (unsigned i = 0; i < num_ssbo_blocks; i++) {
+  linked->Program->sh.ShaderStorageBlocks[i] = &ssbo_blocks[i];
}
+   linked->Program->info.num_ssbos = num_ssbo_blocks;
 
/* At this point linked should contain all of the linked IR, so
 * validate it to make sure nothing went wrong.
@@ -4584,14 +4581,12 @@ link_and_validate_uniforms(struct gl_context *ctx,
update_array_sizes(prog);
link_assign_uniform_locations(prog, ctx);
 
-   if (!prog->data->cache_fallback) {
-  link_assign_ato

Re: [Mesa-dev] [RFC PATCH v1 24/30] RFC: anv: Support VkPhysicalDeviceImageDrmFormatModifierInfoEXT

2017-11-07 Thread Jason Ekstrand
One other comment (that I don't know what patch to make it on):  I think we
want to disallow the MUTABLE_FORMAT create bit with at least the CCS
modifier.  Also, the CCS modifier needs to only be usable with
CCS-supported formats.  You may have already thought of those two things
but I wanted to get it out of my brain.

--Jason

On Tue, Nov 7, 2017 at 12:38 PM, Jason Ekstrand 
wrote:

>
>
> On Tue, Nov 7, 2017 at 6:48 AM, Chad Versace 
> wrote:
>
>> Incremental implementation of VK_EXT_image_drm_format_modifier.
>> ---
>>  src/intel/vulkan/anv_formats.c | 45 ++
>> +++-
>>  1 file changed, 40 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/intel/vulkan/anv_formats.c
>> b/src/intel/vulkan/anv_formats.c
>> index dc46fdb5425..d6eeb9d1c45 100644
>> --- a/src/intel/vulkan/anv_formats.c
>> +++ b/src/intel/vulkan/anv_formats.c
>> @@ -813,6 +813,7 @@ static VkResult
>>  anv_get_image_format_properties(
>> struct anv_physical_device *physical_device,
>> const VkPhysicalDeviceImageFormatInfo2KHR *info,
>> +   const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *drm_info,
>> VkImageFormatProperties *pImageFormatProperties,
>> VkSamplerYcbcrConversionImageFormatPropertiesKHR
>> *pYcbcrImageFormatProperties)
>>  {
>> @@ -826,14 +827,34 @@ anv_get_image_format_properties(
>> if (format == NULL)
>>goto unsupported;
>>
>> +   uint64_t drm_format_mod = DRM_FORMAT_MOD_INVALID;
>> +   if (drm_info) {
>> +  assert(info->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT);
>> +  drm_format_mod = drm_info->drmFormatModifier;
>> +   }
>> +
>> VkFormatFeatureFlags format_feature_flags =
>>get_image_format_features(devinfo, info->format, format,
>> info->tiling,
>> -DRM_FORMAT_MOD_INVALID);
>> +drm_format_mod);
>> +
>> +   /* The core Vulkan spec places strict constraints on the image
>> capabilities
>> +* advertised here. For example, the core spec requires that
>> +* maxMipLevels == log2(maxWidth) + 1
>> +* when tiling is VK_IMAGE_TILING_OPTIMAL; and requires that
>> +* maxExtent >= VkPhysicalDeviceLimits::maxImageDimension${N}D.
>> +* However, the VK_EXT_image_drm_format_modifier specification
>> grants the
>> +* implementation the freedom to further restrict the image
>> capabilities
>> +* when tiling is VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT.
>>
>
> How about adding one extra paragraph here saying that we choose to only
> support "simple" 2D images.
>
>
>> +*/
>>
>> switch (info->type) {
>> default:
>>unreachable("bad VkImageType");
>> case VK_IMAGE_TYPE_1D:
>> +  /* We reject 1D images with modifiers due to FUD */
>>
>
> We could support 1D but meh.  Just use a texture buffer instead.
>
>
>> +  if (drm_info)
>> + goto unsupported;
>> +
>>maxExtent.width = 16384;
>>maxExtent.height = 1;
>>maxExtent.depth = 1;
>> @@ -848,10 +869,20 @@ anv_get_image_format_properties(
>>maxExtent.width = 16384;
>>maxExtent.height = 16384;
>>maxExtent.depth = 1;
>> -  maxMipLevels = 15; /* log2(maxWidth) + 1 */
>> -  maxArraySize = 2048;
>> +
>> +  if (drm_info) {
>> + maxMipLevels = 1;
>> + maxArraySize = 1;
>> +  } else {
>> + maxMipLevels = 15; /* log2(maxWidth) + 1 */
>> + maxArraySize = 2048;
>> +  }
>>break;
>> case VK_IMAGE_TYPE_3D:
>> +  /* We reject 3D images with modifiers due to FUD */
>>
>
> I have neither uncertainty nor doubt, but I do have a very healthy helping
> of fear. :-)  Let's just go with a global comment above and drop the ones
> that make us look like cowards. :-P
>
>
>> +  if (drm_info)
>> + goto unsupported;
>> +
>>maxExtent.width = 2048;
>>maxExtent.height = 2048;
>>maxExtent.depth = 2048;
>> @@ -976,7 +1007,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties(
>>.flags = createFlags,
>> };
>>
>> -   return anv_get_image_format_properties(physical_device, &info,
>> +   return anv_get_image_format_properties(physical_device, &info, NULL,
>>pImageFormatProperties, NULL);
>>  }
>>
>> @@ -1009,6 +1040,7 @@ VkResult anv_GetPhysicalDeviceImageForm
>> atProperties2KHR(
>>  {
>> ANV_FROM_HANDLE(anv_physical_device, physical_device,
>> physicalDevice);
>> const VkPhysicalDeviceExternalImageFormatInfoKHR *external_info =
>> NULL;
>> +   const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *drm_info = NULL;
>> VkExternalImageFormatPropertiesKHR *external_props = NULL;
>> VkSamplerYcbcrConversionImageFormatPropertiesKHR *ycbcr_props = NULL;
>> VkResult result;
>> @@ -1019,6 +1051,9 @@ VkResult anv_GetPhysicalDeviceImageForm
>> atProperties2KHR(
>>case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO
>> _KHR:
>>   external_info = (const void *) s;
>>   

Re: [Mesa-dev] [PATCH] mesa/shaderapi: Do a dry run of linking an in-use program

2017-11-07 Thread Timothy Arceri



On 08/11/17 02:35, Neil Roberts wrote:

Timothy Arceri  writes:


You’re right, I think that would be a better way to handle it. I guess
if this was done then you don’t really need the second link. There are
several pointers for the uniform state that you would need to keep and I
think there is more state than just the uniforms as well. Perhaps pretty
much everything that is freed in _mesa_clear_shader_program_data should
be kept?


You only need to keep things that are accessed by the backend post
linking (such as the uniforms), things that are queried via the api can
be trashed as per the spec. I'm pretty sure we don't need most of
those.


Right, I guess we could keep a small selection of the pointers that are
cleared by _mesa_clear_shader_program_data. However it might be messy to
maintain as it’s likely that someone could add new members to
gl_shader_program_data and forget to update this function. Already just
preserving the neccessary uniform state is a bit fiddly because the
allocation we want to maintain is owned by UniformStorage but it is
accessed via UniformDataSlots in the i965 driver. I’m not exactly sure
how this works in Gallium.

Just to double check, I made a little Piglit test to check using atomic
counters and sure enough it gets a similar Valgrind error and sporadic
failures due to accessing shProg->data->AtomicBuffers, so we would at
least need to conserve that too.


Ok I've taken a closer look at all this, I was sure I did a bunch of 
work to avoid this issue and it seems I didn't finish off the last 
couple of steps (I think part of the issue was not having a piglit test 
for it at the time, thanks for fixing that). I'll send a patchset shortly.




https://github.com/bpeel/piglit/commit/d95701afbb9367ed1e82af27c98f18

Regards,
- Neil


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] non-shared glapi

2017-11-07 Thread Dylan Baker
Jason let me know today that non-shared glapi is broken in meson. The tests for
non-shared glapi are broken in autotools and meson. Is anyone still using
non-shared glapi, or can we delete it?


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] meson: Add script to use VERSION file for getting version

2017-11-07 Thread Dylan Baker
Quoting Dylan Baker (2017-11-01 11:58:16)
> Meson has up until this point set it's version in the root meson.build
> script. While there are other build systems them creates "one more
> thing" to duplicate between meson and every other build system, this
> script is a simple "read, strip, print" sort of deal to allow meson to
> read the VERSION file.
> 
> I chose to implement this in python since python is portable, and to
> keep the meson.build script clean. This is also complicated by the fact
> that the project() call *must* be the first non-comment,non-blank in the
> toplevel meson.build script.
> 
> Signed-off-by: Dylan Baker 
> ---
>  meson.build  |  2 +-
>  scripts/meson_get_version.py | 35 +++
>  2 files changed, 36 insertions(+), 1 deletion(-)
>  create mode 100644 scripts/meson_get_version.py
> 
> diff --git a/meson.build b/meson.build
> index 6ad8c8bbf4b..3f77380f7df 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -21,7 +21,7 @@
>  project(
>'mesa',
>['c', 'cpp'],
> -  version : '17.3.0-devel',
> +  version : 
> run_command(find_program('scripts/meson_get_version.py')).stdout(),
>license : 'MIT',
>meson_version : '>= 0.42',
>default_options : ['c_std=c99', 'cpp_std=c++11']
> diff --git a/scripts/meson_get_version.py b/scripts/meson_get_version.py
> new file mode 100644
> index 000..a221e26f250
> --- /dev/null
> +++ b/scripts/meson_get_version.py
> @@ -0,0 +1,35 @@
> +#!/usr/bin/env python
> +# encoding=utf-8
> +# Copyright © 2017 Intel Corporation
> +
> +# Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> +# of this software and associated documentation files (the "Software"), to 
> deal
> +# in the Software without restriction, including without limitation the 
> rights
> +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> +# copies of the Software, and to permit persons to whom the Software is
> +# furnished to do so, subject to the following conditions:
> +
> +# The above copyright notice and this permission notice shall be included in
> +# all copies or substantial portions of the Software.
> +
> +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
> THE
> +# SOFTWARE.
> +
> +from __future__ import print_function
> +import os
> +
> +
> +def main():
> +filename = os.path.join(os.environ['MESON_SOURCE_ROOT'], 'VERSION')
> +with open(filename) as f:
> +version = f.read().strip()
> +print(version, end='')
> +
> +
> +if __name__ == '__main__':
> +main()
> -- 
> 2.14.3
> 

ping


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] meson: Add threads dependencies to glsl_compiler executable

2017-11-07 Thread Dylan Baker
Quoting Dylan Baker (2017-10-27 11:22:09)
> Fixes compiling the optional standalone glsl compiler.
> 
> Reported-by: DrNick (on irc)
> Signed-off-by: Dylan Baker 
> ---
> 
> This is not compiled by default, but can be built by:
> meson build
> ninja -C build src/compiler/glsl/glsl_compiler
> 
>  src/compiler/glsl/meson.build | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/compiler/glsl/meson.build b/src/compiler/glsl/meson.build
> index 76fcafb9910..aa0e7153f42 100644
> --- a/src/compiler/glsl/meson.build
> +++ b/src/compiler/glsl/meson.build
> @@ -223,7 +223,7 @@ glsl_compiler = executable(
>'main.cpp',
>c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
>cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
> -  dependencies : [dep_clock],
> +  dependencies : [dep_clock, dep_thread],
>include_directories : [inc_common],
>link_with : [libglsl_standalone],
>build_by_default : false,
> -- 
> 2.14.2
> 

ping


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] st/mesa: use enum types instead of int/unsigned (v3)

2017-11-07 Thread Brian Paul
Use the proper enum types for various variables.  Makes life in gdb
a little nicer.  Note that the size of enum bitfields must be one
larger so the high bit is always zero (for MSVC).

v2: also increase size of image_format bitfield, per Eric Engestrom.
v3: use the new ASSERT_BITFIELD_SIZE() macro

Reviewed-by: Charmaine Lee 
Reviewed-by: Roland Scheidegger 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp   | 18 +++---
 src/mesa/state_tracker/st_glsl_to_tgsi_private.h |  6 +++---
 src/mesa/state_tracker/st_mesa_to_tgsi.c |  6 +++---
 src/mesa/state_tracker/st_mesa_to_tgsi.h |  7 ---
 4 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 54e1961..e93c48c 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -58,6 +58,8 @@
 #include "st_glsl_to_tgsi_temprename.h"
 
 #include "util/hash_table.h"
+#include "util/bitfield_assert.h"
+
 #include 
 
 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |\
@@ -179,10 +181,10 @@ public:
int num_address_regs;
uint32_t samplers_used;
glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
-   int sampler_targets[PIPE_MAX_SAMPLERS];   /**< One of TGSI_TEXTURE_* */
+   enum tgsi_texture_type sampler_targets[PIPE_MAX_SAMPLERS];
int images_used;
int image_targets[PIPE_MAX_SHADER_IMAGES];
-   unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
+   enum pipe_format image_formats[PIPE_MAX_SHADER_IMAGES];
bool indirect_addr_consts;
int wpos_transform_const;
 
@@ -6169,6 +6171,15 @@ st_translate_program(
assert(numInputs <= ARRAY_SIZE(t->inputs));
assert(numOutputs <= ARRAY_SIZE(t->outputs));
 
+   ASSERT_BITFIELD_SIZE(st_src_reg, type, GLSL_TYPE_ERROR);
+   ASSERT_BITFIELD_SIZE(st_dst_reg, type, GLSL_TYPE_ERROR);
+   ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format, 
PIPE_FORMAT_COUNT);
+   ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_target,
+(gl_texture_index) (NUM_TEXTURE_TARGETS - 1));
+   ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format,
+(enum pipe_format) (PIPE_FORMAT_COUNT - 1));
+   ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, op, TGSI_OPCODE_LAST - 1);
+
t = CALLOC_STRUCT(st_translate);
if (!t) {
   ret = PIPE_ERROR_OUT_OF_MEMORY;
@@ -6489,7 +6500,8 @@ st_translate_program(
/* texture samplers */
for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
   if (program->samplers_used & (1u << i)) {
- unsigned type = st_translate_texture_type(program->sampler_types[i]);
+ enum tgsi_return_type type =
+st_translate_texture_type(program->sampler_types[i]);
 
  t->samplers[i] = ureg_DECL_sampler(ureg, i);
 
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_private.h 
b/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
index d57525d..3e51936 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
@@ -127,13 +127,13 @@ public:
unsigned is_64bit_expanded:1;
unsigned sampler_base:5;
unsigned sampler_array_size:6; /**< 1-based size of sampler array, 1 if not 
array */
-   unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */
+   gl_texture_index tex_target:5;
glsl_base_type tex_type:5;
unsigned tex_shadow:1;
-   unsigned image_format:9;
+   enum pipe_format image_format:10;
unsigned tex_offset_num_offset:3;
unsigned dead_mask:4; /**< Used in dead code elimination */
-   unsigned buffer_access:3; /**< buffer access type */
+   unsigned buffer_access:3; /**< bitmask of TGSI_MEMORY_x bits */
 
const struct tgsi_opcode_info *info;
 };
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c 
b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 275ca76..75825c3 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -164,8 +164,8 @@ src_register(struct st_translate *t,
 /**
  * Map mesa texture target to TGSI texture target.
  */
-unsigned
-st_translate_texture_target(GLuint textarget, GLboolean shadow)
+enum tgsi_texture_type
+st_translate_texture_target(gl_texture_index textarget, GLboolean shadow)
 {
if (shadow) {
   switch (textarget) {
@@ -223,7 +223,7 @@ st_translate_texture_target(GLuint textarget, GLboolean 
shadow)
 /**
  * Map GLSL base type to TGSI return type.
  */
-unsigned
+enum tgsi_return_type
 st_translate_texture_type(enum glsl_base_type type)
 {
switch (type) {
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h 
b/src/mesa/state_tracker/st_mesa_to_tgsi.h
index 106cf85..06e8b70 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.h
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h
@@ -30,6 +30,7 @@
 #define ST_MESA_TO_TGSI_H
 
 #include "main/glheader.h"
+#include "main/mtypes.h"
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_defines.h"
@@ -62,10 +63,10 @@ st_tra

[Mesa-dev] [PATCH 3/4] glsl: s/unsigned/glsl_base_type/ in glsl type code (v2)

2017-11-07 Thread Brian Paul
Declare glsl_type::sampled_type as glsl_base_type as we do for the
base_type field.  And make base_type a bitfield to save a few bytes.

Update glsl_type constructor to take glsl_base_type instead of unsigned
and pass GLSL_TYPE_VOID instead of zero.

No Piglit regressions with llvmpipe.

v2:
- Declare both base_type and sampled_type as 8-bit fields
- Use the new ASSERT_BITFIELD_SIZE() macro.
---
 src/compiler/glsl_types.cpp | 30 +++---
 src/compiler/glsl_types.h   | 28 +---
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index 704b63c..107a81f 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -50,9 +50,9 @@ glsl_type::glsl_type(GLenum gl_type,
  glsl_base_type base_type, unsigned vector_elements,
  unsigned matrix_columns, const char *name) :
gl_type(gl_type),
-   base_type(base_type),
+   base_type(base_type), sampled_type(GLSL_TYPE_VOID),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0),
vector_elements(vector_elements), matrix_columns(matrix_columns),
length(0)
 {
@@ -79,11 +79,11 @@ glsl_type::glsl_type(GLenum gl_type,
 
 glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
  enum glsl_sampler_dim dim, bool shadow, bool array,
- unsigned type, const char *name) :
+ glsl_base_type type, const char *name) :
gl_type(gl_type),
-   base_type(base_type),
+   base_type(base_type), sampled_type(type),
sampler_dimensionality(dim), sampler_shadow(shadow),
-   sampler_array(array), sampled_type(type), interface_packing(0),
+   sampler_array(array), interface_packing(0),
interface_row_major(0), length(0)
 {
mtx_lock(&glsl_type::mem_mutex);
@@ -102,9 +102,9 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type 
base_type,
 glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
  const char *name) :
gl_type(0),
-   base_type(GLSL_TYPE_STRUCT),
+   base_type(GLSL_TYPE_STRUCT), sampled_type(GLSL_TYPE_VOID),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0),
vector_elements(0), matrix_columns(0),
length(num_fields)
 {
@@ -131,9 +131,9 @@ glsl_type::glsl_type(const glsl_struct_field *fields, 
unsigned num_fields,
  enum glsl_interface_packing packing,
  bool row_major, const char *name) :
gl_type(0),
-   base_type(GLSL_TYPE_INTERFACE),
+   base_type(GLSL_TYPE_INTERFACE), sampled_type(GLSL_TYPE_VOID),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing((unsigned) packing),
+   interface_packing((unsigned) packing),
interface_row_major((unsigned) row_major),
vector_elements(0), matrix_columns(0),
length(num_fields)
@@ -159,9 +159,9 @@ glsl_type::glsl_type(const glsl_struct_field *fields, 
unsigned num_fields,
 glsl_type::glsl_type(const glsl_type *return_type,
  const glsl_function_param *params, unsigned num_params) :
gl_type(0),
-   base_type(GLSL_TYPE_FUNCTION),
+   base_type(GLSL_TYPE_FUNCTION), sampled_type(GLSL_TYPE_VOID),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0),
vector_elements(0), matrix_columns(0),
length(num_params)
 {
@@ -191,9 +191,9 @@ glsl_type::glsl_type(const glsl_type *return_type,
 
 glsl_type::glsl_type(const char *subroutine_name) :
gl_type(0),
-   base_type(GLSL_TYPE_SUBROUTINE),
+   base_type(GLSL_TYPE_SUBROUTINE), sampled_type(GLSL_TYPE_VOID),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0),
vector_elements(1), matrix_columns(1),
length(0)
 {
@@ -442,9 +442,9 @@ _mesa_glsl_release_types(void)
 
 
 glsl_type::glsl_type(const glsl_type *array, unsigned length) :
-   base_type(GLSL_TYPE_ARRAY),
+   base_type(GLSL_TYPE_ARRAY), sampled_type(GLSL_TYPE_VOID),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   sampled_type(0), interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0),
vector_elements(0), matrix_columns(0),
length(length), name(NULL)
 {
diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h
index 0b4a66c..3a3d3d8 100644
--- a/src/compiler/glsl_types.h
+++ b/src/compiler/glsl_types.h
@@ -28,6 +28,7 @@
 #include 
 #include 
 
+#include "util/bitfield_assert.h"
 #include "shader_enums.h"
 #i

[Mesa-dev] [PATCH 1/4] util: add new bitfield_assert.h header

2017-11-07 Thread Brian Paul
Contains a macro for checking that bitfields are large enough to
hold the largest expected value.
---
 src/util/bitfield_assert.h | 49 ++
 1 file changed, 49 insertions(+)
 create mode 100644 src/util/bitfield_assert.h

diff --git a/src/util/bitfield_assert.h b/src/util/bitfield_assert.h
new file mode 100644
index 000..dab2b21
--- /dev/null
+++ b/src/util/bitfield_assert.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2017 VMware, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef BITFIELD_ASSERT_H
+#define BITFIELD_ASSERT_H
+
+
+#include 
+
+
+/**
+ * Check that STRUCT::FIELD can hold MAXVAL.  We use a lot of bitfields
+ * in Mesa/gallium.  We have to be sure they're of sufficient size to
+ * hold the largest expected value.
+ * Note that with MSVC, enums are signed and enum bitfields need one extra
+ * high bit (always zero) to ensure the max value is handled correctly.
+ * This macro will detect that with MSVC, but not GCC.
+ */
+#define ASSERT_BITFIELD_SIZE(STRUCT, FIELD, MAXVAL) \
+   do { \
+  STRUCT s; \
+  s.FIELD = (MAXVAL); \
+  assert((int) s.FIELD == (MAXVAL) && "Insufficient bitfield size!"); \
+   } while (0)
+
+
+
+#endif /* BITFIELD_ASSERT_H */
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] util/tgsi: use ASSERT_BITFIELD_SIZE() to check opcode field size

2017-11-07 Thread Brian Paul
I've noticed at least two places where we store the TGSI opcode in
an unsigned:8 bitfield.  We're at 249 opcodes now.  If we hit 256 we'll
need to grow those bitfields.  Use the new ASSERT_BITFIELD_SIZE() macro
to detect that.
---
 src/gallium/auxiliary/tgsi/tgsi_info.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 4e39950..23dc95a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -27,6 +27,7 @@
 
 #include "util/u_debug.h"
 #include "util/u_memory.h"
+#include "util/bitfield_assert.h"
 #include "tgsi_info.h"
 
 #define NONE TGSI_OUTPUT_NONE
@@ -55,6 +56,8 @@ tgsi_get_opcode_info( uint opcode )
 {
static boolean firsttime = 1;
 
+   ASSERT_BITFIELD_SIZE(struct tgsi_opcode_info, opcode, TGSI_OPCODE_LAST - 1);
+
if (firsttime) {
   unsigned i;
   firsttime = 0;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/18] radeon/vcn: add common encode part

2017-11-07 Thread Dylan Baker
Quoting boyuan.zh...@amd.com (2017-11-07 13:59:02)
> From: Boyuan Zhang 
> 
> Signed-off-by: Boyuan Zhang 
> ---
>  src/gallium/drivers/radeon/Makefile.sources |   3 +
>  src/gallium/drivers/radeon/radeon_vcn_enc.c | 166 +
>  src/gallium/drivers/radeon/radeon_vcn_enc.h |  82 
>  src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 237 
> 
>  4 files changed, 488 insertions(+)
>  create mode 100644 src/gallium/drivers/radeon/radeon_vcn_enc.c
>  create mode 100644 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
> 
> diff --git a/src/gallium/drivers/radeon/Makefile.sources 
> b/src/gallium/drivers/radeon/Makefile.sources
> index 22de129..0871666 100644
> --- a/src/gallium/drivers/radeon/Makefile.sources
> +++ b/src/gallium/drivers/radeon/Makefile.sources
> @@ -13,6 +13,9 @@ C_SOURCES := \
> radeon_uvd.h \
> radeon_vcn_dec.c \
> radeon_vcn_dec.h \
> +   radeon_vcn_enc.c \
> +   radeon_vcn_enc_1_2.c \
> +   radeon_vcn_enc.h \

Please add the .c files to src/gallium/drivers/radeon/meson.build as well.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/18] radeon/vcn: add encode begin frame

2017-11-07 Thread Alex Deucher
On Tue, Nov 7, 2017 at 4:59 PM,   wrote:
> From: Boyuan Zhang 

Same here :)
Add implementation for begin_frame interface for VCN encode.

Alex

>
> Signed-off-by: Boyuan Zhang 
> ---
>  src/gallium/drivers/radeon/radeon_vcn_enc.c | 23 ++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c 
> b/src/gallium/drivers/radeon/radeon_vcn_enc.c
> index 437c2fc..51cef7f 100644
> --- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
> @@ -126,7 +126,28 @@ static void radeon_enc_begin_frame(struct 
> pipe_video_codec *encoder,
>  struct 
> pipe_video_buffer *source,
>  struct 
> pipe_picture_desc *picture)
>  {
> -   /* TODO*/
> +   struct radeon_encoder *enc = (struct radeon_encoder*)encoder;
> +   struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;
> +   struct pipe_h264_enc_picture_desc *pic = (struct 
> pipe_h264_enc_picture_desc *)picture;
> +
> +   radeon_vcn_enc_get_param(enc, pic);
> +
> +   enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
> +   enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
> +
> +   enc->need_feedback = false;
> +
> +   if (!enc->stream_handle) {
> +   struct rvid_buffer fb;
> +   enc->stream_handle = si_vid_alloc_stream_handle();
> +   enc->si = CALLOC_STRUCT(rvid_buffer);
> +   si_vid_create_buffer(enc->screen, enc->si, 128 * 1024, 
> PIPE_USAGE_STAGING);
> +   si_vid_create_buffer(enc->screen, &fb, 4096, 
> PIPE_USAGE_STAGING);
> +   enc->fb = &fb;
> +   enc->begin(enc, pic);
> +   flush(enc);
> +   si_vid_destroy_buffer(&fb);
> +   }
>  }
>
>  static void radeon_enc_encode_bitstream(struct pipe_video_codec *encoder,
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/18] radeon/vcn: add encode header algorithms

2017-11-07 Thread Alex Deucher
On Tue, Nov 7, 2017 at 4:59 PM,   wrote:
> From: Boyuan Zhang 

Better patch description please.  What are these for?  Please provide
a brief overview.

>
> Signed-off-by: Boyuan Zhang 
> ---
>  src/gallium/drivers/radeon/radeon_vcn_enc.h |   6 ++
>  src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 121 
> 
>  2 files changed, 127 insertions(+)
>
> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.h 
> b/src/gallium/drivers/radeon/radeon_vcn_enc.h
> index cbdf9c0..651502c 100644
> --- a/src/gallium/drivers/radeon/radeon_vcn_enc.h
> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.h
> @@ -396,9 +396,15 @@ struct radeon_encoder {
> struct radeon_enc_h264_enc_pic  enc_pic;
>
> unsignedalignment;
> +   unsignedshifter;
> +   unsignedbits_in_shifter;
> +   unsignednum_zeros;
> +   unsignedbyte_index;
> +   unsignedbits_output;
> uint32_ttotal_task_size;
> uint32_t*   p_task_size;
>
> +   boolemulation_prevention;
> boolneed_feedback;
>  };
>
> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c 
> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
> index f75f3d6..5170c67 100644
> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
> @@ -49,6 +49,7 @@ RADEON_ENC_CS(cmd)
> enc->total_task_size += *begin;}
>
>  static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
> +static const unsigned index_to_shifts[4] = {24, 16, 8, 0};
>
>  static void radeon_enc_add_buffer(struct radeon_encoder *enc, struct 
> pb_buffer *buf,
>   enum 
> radeon_bo_usage usage, enum radeon_bo_domain domain,
> @@ -63,6 +64,126 @@ static void radeon_enc_add_buffer(struct radeon_encoder 
> *enc, struct pb_buffer *
> RADEON_ENC_CS(addr);
>  }
>
> +static void radeon_enc_set_emulation_prevention(struct radeon_encoder *enc, 
> bool set)
> +{
> +   if (set != enc->emulation_prevention) {
> +   enc->emulation_prevention = set;
> +   enc->num_zeros = 0;
> +   }
> +}
> +
> +static void radeon_enc_output_one_byte(struct radeon_encoder *enc, unsigned 
> char byte)
> +{
> +   if (enc->byte_index == 0)
> +   enc->cs->current.buf[enc->cs->current.cdw] = 0;
> +   enc->cs->current.buf[enc->cs->current.cdw] |= ((unsigned int)(byte) 
> << index_to_shifts[enc->byte_index]);
> +   enc->byte_index++;
> +
> +   if (enc->byte_index >= 4) {
> +   enc->byte_index = 0;
> +   enc->cs->current.cdw++;
> +   }
> +}
> +
> +static void radeon_enc_emulation_prevention(struct radeon_encoder *enc, 
> unsigned char byte)
> +{
> +   if(enc->emulation_prevention) {
> +   if((enc->num_zeros >= 2) && ((byte == 0x00) || (byte == 0x01) 
> || (byte == 0x03))) {
> +radeon_enc_output_one_byte(enc, 0x03);
> +enc->bits_output += 8;
> +enc->num_zeros = 0;
> +}
> +enc->num_zeros = (byte == 0 ? (enc->num_zeros + 1) : 0);
> +}
> +}
> +
> +static void radeon_enc_code_fixed_bits(struct radeon_encoder *enc, unsigned 
> int value, unsigned int num_bits)
> +{
> +   unsigned int bits_to_pack = 0;
> +
> +   while(num_bits > 0) {
> +   unsigned int value_to_pack = value & (0x >> (32 - 
> num_bits));
> +   bits_to_pack = num_bits > (32 - enc->bits_in_shifter) ? (32 - 
> enc->bits_in_shifter) : num_bits;
> +
> +   if (bits_to_pack < num_bits)
> +   value_to_pack = value_to_pack >> (num_bits - 
> bits_to_pack);
> +
> +   enc->shifter |= value_to_pack << (32 - enc->bits_in_shifter - 
> bits_to_pack);
> +   num_bits -= bits_to_pack;
> +   enc->bits_in_shifter += bits_to_pack;
> +
> +   while(enc->bits_in_shifter >= 8) {
> +   unsigned char output_byte = (unsigned 
> char)(enc->shifter >> 24);
> +   enc->shifter <<= 8;
> +   radeon_enc_emulation_prevention(enc, output_byte);
> +   radeon_enc_output_one_byte(enc, output_byte);
> +   enc->bits_in_shifter -= 8;
> +   enc->bits_output += 8;
> +   }
> +   }
> +}
> +
> +static void radeon_enc_reset(struct radeon_encoder *enc)
> +{
> +   enc->emulation_prevention = false;
> +   enc->shifter = 0;
> +   enc->bits_in_shifter = 0;
> +   enc->bits_output = 0;
> +   enc->num_zeros = 0;
> +   enc->byte_index = 0;
> +}
> +
> +static void radeon_enc_byte_align(struct radeon_encoder *enc)
> +{
> +   unsigned int num_padding_zeros = (32 - enc-

Re: [Mesa-dev] [PATCH 08/18] radeon/vcn: add ib implementations

2017-11-07 Thread Alex Deucher
On Tue, Nov 7, 2017 at 4:59 PM,   wrote:
> From: Boyuan Zhang 
>

Better patch description please.  Something like:
Implement the command buffer submission interfaces for VCN encode.


> Signed-off-by: Boyuan Zhang 
> ---
>  src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 292 
> ++--
>  1 file changed, 268 insertions(+), 24 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c 
> b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
> index ffd1155..f75f3d6 100644
> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
> @@ -65,121 +65,365 @@ static void radeon_enc_add_buffer(struct radeon_encoder 
> *enc, struct pb_buffer *
>
>  static void radeon_enc_session_info(struct radeon_encoder *enc)
>  {
> -   /* TODO*/
> +   unsigned int interface_version = ((RENCODE_FW_INTERFACE_MAJOR_VERSION 
> << RENCODE_IF_MAJOR_VERSION_SHIFT) |
> + 
> (RENCODE_FW_INTERFACE_MINOR_VERSION << RENCODE_IF_MINOR_VERSION_SHIFT));
> +   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_SESSION_INFO);
> +   RADEON_ENC_CS(interface_version);
> +   RADEON_ENC_READWRITE(enc->si->res->buf, enc->si->res->domains, 0x0);
> +   RADEON_ENC_END();
>  }
>
>  static void radeon_enc_task_info(struct radeon_encoder *enc, bool 
> need_feedback)
>  {
> -   /* TODO*/
> +   enc->enc_pic.task_info.task_id++;
> +
> +   if (need_feedback)
> +   enc->enc_pic.task_info.allowed_max_num_feedbacks = 1;
> +   else
> +   enc->enc_pic.task_info.allowed_max_num_feedbacks = 0;
> +
> +   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_TASK_INFO);
> +   enc->p_task_size = &enc->cs->current.buf[enc->cs->current.cdw++];
> +   RADEON_ENC_CS(enc->enc_pic.task_info.task_id);
> +   RADEON_ENC_CS(enc->enc_pic.task_info.allowed_max_num_feedbacks);
> +   RADEON_ENC_END();
>  }
>
>  static void radeon_enc_session_init(struct radeon_encoder *enc)
>  {
> -   /* TODO*/
> +   enc->enc_pic.session_init.encode_standard = 
> RENCODE_ENCODE_STANDARD_H264;
> +   enc->enc_pic.session_init.aligned_picture_width = 
> align(enc->base.width, 16);
> +   enc->enc_pic.session_init.aligned_picture_height = 
> align(enc->base.height, 16);
> +   enc->enc_pic.session_init.padding_width = 
> enc->enc_pic.session_init.aligned_picture_width - enc->base.width;
> +   enc->enc_pic.session_init.padding_height = 
> enc->enc_pic.session_init.aligned_picture_height - enc->base.height;
> +   enc->enc_pic.session_init.pre_encode_mode = 
> RENCODE_PREENCODE_MODE_NONE;
> +   enc->enc_pic.session_init.pre_encode_chroma_enabled = false;
> +
> +   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_SESSION_INIT);
> +   RADEON_ENC_CS(enc->enc_pic.session_init.encode_standard);
> +   RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width);
> +   RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height);
> +   RADEON_ENC_CS(enc->enc_pic.session_init.padding_width);
> +   RADEON_ENC_CS(enc->enc_pic.session_init.padding_height);
> +   RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode);
> +   RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled);
> +   RADEON_ENC_END();
>  }
>
>  static void radeon_enc_layer_control(struct radeon_encoder *enc)
>  {
> -   /* TODO*/
> +   enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1;
> +   enc->enc_pic.layer_ctrl.num_temporal_layers = 1;
> +
> +   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_LAYER_CONTROL);
> +   RADEON_ENC_CS(enc->enc_pic.layer_ctrl.max_num_temporal_layers);
> +   RADEON_ENC_CS(enc->enc_pic.layer_ctrl.num_temporal_layers);
> +   RADEON_ENC_END();
>  }
>
>  static void radeon_enc_layer_select(struct radeon_encoder *enc)
>  {
> -   /* TODO*/
> +   enc->enc_pic.layer_sel.temporal_layer_index = 0;
> +
> +   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_LAYER_SELECT);
> +   RADEON_ENC_CS(enc->enc_pic.layer_sel.temporal_layer_index);
> +   RADEON_ENC_END();
>  }
>
>  static void radeon_enc_slice_control(struct radeon_encoder *enc)
>  {
> -   /* TODO*/
> +   enc->enc_pic.slice_ctrl.slice_control_mode = 
> RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS;
> +   enc->enc_pic.slice_ctrl.num_mbs_per_slice = align(enc->base.width, 
> 16) / 16 * align(enc->base.height, 16) / 16;
> +
> +   RADEON_ENC_BEGIN(RENCODE_H264_IB_PARAM_SLICE_CONTROL);
> +   RADEON_ENC_CS(enc->enc_pic.slice_ctrl.slice_control_mode);
> +   RADEON_ENC_CS(enc->enc_pic.slice_ctrl.num_mbs_per_slice);
> +   RADEON_ENC_END();
>  }
>
>  static void radeon_enc_spec_misc(struct radeon_encoder *enc)
>  {
> -   /* TODO*/
> +   enc->enc_pic.spec_misc.constrained_intra_pred_flag = 0;
> +   enc->enc_pic.spec_misc.cabac_enable = 0;
> +   enc->enc_pic.spec_misc.cabac_init_idc = 0;
> +   enc->enc_pic.spec_misc.half_pel_enabled = 1;
> +   enc->enc_pic

Re: [Mesa-dev] [PATCH 07/18] radeon/vcn: add common encode part

2017-11-07 Thread Alex Deucher
On Tue, Nov 7, 2017 at 4:59 PM,   wrote:
> From: Boyuan Zhang 

Better patch description please.  Something like:
Fill out a skeleton interface for video encode on vcn hardware.

>
> Signed-off-by: Boyuan Zhang 
> ---
>  src/gallium/drivers/radeon/Makefile.sources |   3 +
>  src/gallium/drivers/radeon/radeon_vcn_enc.c | 166 +
>  src/gallium/drivers/radeon/radeon_vcn_enc.h |  82 
>  src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 237 
> 
>  4 files changed, 488 insertions(+)
>  create mode 100644 src/gallium/drivers/radeon/radeon_vcn_enc.c
>  create mode 100644 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>
> diff --git a/src/gallium/drivers/radeon/Makefile.sources 
> b/src/gallium/drivers/radeon/Makefile.sources
> index 22de129..0871666 100644
> --- a/src/gallium/drivers/radeon/Makefile.sources
> +++ b/src/gallium/drivers/radeon/Makefile.sources
> @@ -13,6 +13,9 @@ C_SOURCES := \
> radeon_uvd.h \
> radeon_vcn_dec.c \
> radeon_vcn_dec.h \
> +   radeon_vcn_enc.c \
> +   radeon_vcn_enc_1_2.c \
> +   radeon_vcn_enc.h \
> radeon_vce_40_2_2.c \
> radeon_vce_50.c \
> radeon_vce_52.c \
> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c 
> b/src/gallium/drivers/radeon/radeon_vcn_enc.c
> new file mode 100644
> index 000..437c2fc
> --- /dev/null
> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
> @@ -0,0 +1,166 @@
> +/**
> + *
> + * Copyright 2017 Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction, including
> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sub license, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
> + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + **/
> +
> +#include 
> +
> +#include "pipe/p_video_codec.h"
> +
> +#include "util/u_video.h"
> +#include "util/u_memory.h"
> +
> +#include "vl/vl_video_buffer.h"
> +
> +#include "r600_pipe_common.h"
> +#include "radeon_video.h"
> +#include "radeon_vcn_enc.h"
> +
> +static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct 
> pipe_h264_enc_picture_desc *pic)
> +{
> +   enc->enc_pic.picture_type = pic->picture_type;
> +   enc->enc_pic.frame_num = pic->frame_num;
> +   enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
> +   enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
> +   enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
> +   enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
> +   enc->enc_pic.not_referenced = pic->not_referenced;
> +   enc->enc_pic.is_idr = pic->is_idr;
> +   enc->enc_pic.crop_left = 0;
> +   enc->enc_pic.crop_right = (align(enc->base.width, 16) - 
> enc->base.width) / 2;
> +   enc->enc_pic.crop_top = 0;
> +   enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - 
> enc->base.height) / 2;
> +}
> +
> +static void flush(struct radeon_encoder *enc)
> +{
> +   enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL);
> +}
> +
> +static void radeon_enc_flush(struct pipe_video_codec *encoder)
> +{
> +   struct radeon_encoder *enc = (struct radeon_encoder*)encoder;
> +   flush(enc);
> +}
> +
> +static void radeon_enc_cs_flush(void *ctx, unsigned flags,
> +   struct 
> pipe_fence_handle **fence)
> +{
> +   // just ignored
> +}
> +
> +static unsigned get_cpb_num(struct radeon_encoder *enc)
> +{
> +   unsigned w = align(enc->base.width, 16) / 16;
> +   unsigned h = align(enc->base.height, 16) / 16;
> +   unsigned dpb;
> +
> +   switch (enc->base.level) {
> +   case 10:
> +   dpb = 396;
> +   break;
> +   case 11:
> +   dpb = 900;
> +   break;
> +   case 12:
> +   case 13:
> +   case 20:
> +   dpb = 2376;
> +   break;
> +   c

Re: [Mesa-dev] [PATCH 06/18] st/va: implement poc type

2017-11-07 Thread Alex Deucher
On Tue, Nov 7, 2017 at 4:59 PM,   wrote:
> From: Boyuan Zhang 
>

Same comment as patch 5.  Please provide some details as to what this is.

Alex

> Signed-off-by: Boyuan Zhang 
> ---
>  src/gallium/state_trackers/va/picture.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/gallium/state_trackers/va/picture.c 
> b/src/gallium/state_trackers/va/picture.c
> index 7427b98..55ca16e 100644
> --- a/src/gallium/state_trackers/va/picture.c
> +++ b/src/gallium/state_trackers/va/picture.c
> @@ -396,6 +396,7 @@ handleVAEncSequenceParameterBufferType(vlVaDriver *drv, 
> vlVaContext *context, vl
> context->desc.h264enc.gop_size = h264->intra_idr_period * 
> context->gop_coeff;
> context->desc.h264enc.rate_ctrl.frame_rate_num = h264->time_scale / 2;
> context->desc.h264enc.rate_ctrl.frame_rate_den = h264->num_units_in_tick;
> +   context->desc.h264enc.pic_order_cnt_type = 
> h264->seq_fields.bits.pic_order_cnt_type;
> return VA_STATUS_SUCCESS;
>  }
>
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/18] vl: add poc type

2017-11-07 Thread Alex Deucher
On Tue, Nov 7, 2017 at 4:59 PM,   wrote:
> From: Boyuan Zhang 
>

Please add a better patch description.  Why do you need this new
element and what does it mean?

Alex


> Signed-off-by: Boyuan Zhang 
> ---
>  src/gallium/include/pipe/p_video_state.h | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/gallium/include/pipe/p_video_state.h 
> b/src/gallium/include/pipe/p_video_state.h
> index abd5d36..1d57165 100644
> --- a/src/gallium/include/pipe/p_video_state.h
> +++ b/src/gallium/include/pipe/p_video_state.h
> @@ -400,6 +400,7 @@ struct pipe_h264_enc_picture_desc
> unsigned idr_pic_id;
> unsigned gop_cnt;
> unsigned pic_order_cnt;
> +   unsigned pic_order_cnt_type;
> unsigned ref_idx_l0;
> unsigned ref_idx_l1;
> unsigned gop_size;
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/18] winsys/amdgpu: add vcn enc cs support

2017-11-07 Thread Alex Deucher
On Tue, Nov 7, 2017 at 4:58 PM,   wrote:
> From: Boyuan Zhang 
>
> Signed-off-by: Boyuan Zhang 
> ---
>  src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
> b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> index 0b47af9..75e1891 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> @@ -25,6 +25,10 @@
>   * next paragraph) shall be included in all copies or substantial portions
>   * of the Software.
>   */
> +/*
> + * Authors:
> + *  Marek Olšák 
> + */

Drop this hunk.

>
>  #include "amdgpu_cs.h"
>  #include "os/os_time.h"
> @@ -326,7 +330,8 @@ static bool amdgpu_cs_has_user_fence(struct 
> amdgpu_cs_context *cs)
>  {
> return cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD &&
>cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCE &&
> -  cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC;
> +  cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC &&
> +  cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC;
>  }
>
>  static bool amdgpu_cs_has_chaining(struct amdgpu_cs *cs)
> @@ -779,6 +784,10 @@ static bool amdgpu_init_cs_context(struct 
> amdgpu_cs_context *cs,
>cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_DEC;
>break;
>
> +  case RING_VCN_ENC:
> +  cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_ENC;
> +  break;
> +
> default:
> case RING_GFX:
>cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_GFX;
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/18] radeon/vcn: add vcn encode interface

2017-11-07 Thread Alex Deucher
On Tue, Nov 7, 2017 at 4:58 PM,   wrote:
> From: Boyuan Zhang 
>
> Signed-off-by: Boyuan Zhang 
> ---
>  src/gallium/drivers/radeon/radeon_vcn_enc.h | 325 
> 
>  1 file changed, 325 insertions(+)
>  create mode 100644 src/gallium/drivers/radeon/radeon_vcn_enc.h
>
> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.h 
> b/src/gallium/drivers/radeon/radeon_vcn_enc.h
> new file mode 100644
> index 000..a58ff6b
> --- /dev/null
> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.h
> @@ -0,0 +1,325 @@
> +/**
> + *
> + * Copyright 2017 Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> +   + * "Software"), to deal in the Software without restriction, 
> including

Extras '+' here.

> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sub license, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> +   + * next paragraph) shall be included in all copies or 
> substantial portions

and here.

> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
> + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + **/
> +
> +#ifndef _RADEON_VCN_ENC_H
> +#define _RADEON_VCN_ENC_H
> +
> +#define RENCODE_FW_INTERFACE_MAJOR_VERSION 1
> +#define RENCODE_FW_INTERFACE_MINOR_VERSION 2
> +
> +#define RENCODE_IB_PARAM_SESSION_INFO  0x0001
> +#define RENCODE_IB_PARAM_TASK_INFO 0x0002
> +#define RENCODE_IB_PARAM_SESSION_INIT  0x0003
> +#define RENCODE_IB_PARAM_LAYER_CONTROL 0x0004
> +#define RENCODE_IB_PARAM_LAYER_SELECT  0x0005
> +#define RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x0006
> +#define RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT   0x0007
> +#define RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE  0x0008
> +#define RENCODE_IB_PARAM_QUALITY_PARAMS0x0009
> +#define RENCODE_IB_PARAM_SLICE_HEADER  0x000a
> +#define RENCODE_IB_PARAM_ENCODE_PARAMS 0x000b
> +#define RENCODE_IB_PARAM_INTRA_REFRESH 0x000c
> +#define RENCODE_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x000d
> +#define RENCODE_IB_PARAM_VIDEO_BITSTREAM_BUFFER0x000e
> +#define RENCODE_IB_PARAM_FEEDBACK_BUFFER   0x0010
> +#define RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU0x0020
> +
> +#define RENCODE_H264_IB_PARAM_SLICE_CONTROL0x0021
> +#define RENCODE_H264_IB_PARAM_SPEC_MISC0x0022
> +#define RENCODE_H264_IB_PARAM_ENCODE_PARAMS0x0023
> +#define RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER0x0024
> +
> +#define RENCODE_IB_OP_INITIALIZE   0x0101
> +#define RENCODE_IB_OP_CLOSE_SESSION0x0102
> +#define RENCODE_IB_OP_ENCODE   0x0103
> +#define RENCODE_IB_OP_INIT_RC  0x0104
> +#define RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL 0x0105
> +#define RENCODE_IB_OP_SET_SPEED_ENCODING_MODE  0x0106
> +#define RENCODE_IB_OP_SET_BALANCE_ENCODING_MODE0x0107
> +#define RENCODE_IB_OP_SET_QUALITY_ENCODING_MODE0x0108
> +
> +#define RENCODE_IF_MAJOR_VERSION_MASK  0x
> +#define RENCODE_IF_MAJOR_VERSION_SHIFT 16
> +#define RENCODE_IF_MINOR_VERSION_MASK  0x
> +#define RENCODE_IF_MINOR_VERSION_SHIFT 0
> +
> +#define RENCODE_ENCODE_STANDARD_H264   1
> +
> +#define RENCODE_PREENCODE_MODE_NONE0x
> +#define RENCODE_PREENCODE_MODE_1X  0x0001
> +#define RENCODE_PREENCODE_MODE_2X  0x0002
> +#define RENCODE_P

Re: [Mesa-dev] [PATCH] anv/meson: Generate dev_icd.json

2017-11-07 Thread Dylan Baker
Quoting Eric Engestrom (2017-11-07 07:25:53)
> On Wednesday, 2017-11-01 13:49:03 -0700, Chad Versace wrote:
> > I tested this in a setup where the builddir was outside of the srcdir.
> > ---
> >  src/intel/vulkan/meson.build | 12 
> >  1 file changed, 12 insertions(+)
> > 
> > diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build
> > index ff24e304ef5..e8b7f407507 100644
> > --- a/src/intel/vulkan/meson.build
> > +++ b/src/intel/vulkan/meson.build
> > @@ -48,6 +48,18 @@ intel_icd = custom_target(
> >install : true,
> >  )
> >  
> > +dev_icd = custom_target(
> > +  'dev_icd',
> > +  input : 'anv_icd.py',
> > +  output : 'dev_icd.@0@.json'.format(target_machine.cpu()),
> 
> Strictly speaking, shouldn't that be `host_machine` [1] ?
> I don't see how one would do a canadian build of mesa though, so
> host == target should always be true.

That's my fault. There are (or were) a number of cases where I used target
instead of host, that can also be a follow up.

In any case:
Acked-by: Dylan Baker 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/18] radeon/vcn: add common encode part

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/Makefile.sources |   3 +
 src/gallium/drivers/radeon/radeon_vcn_enc.c | 166 +
 src/gallium/drivers/radeon/radeon_vcn_enc.h |  82 
 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 237 
 4 files changed, 488 insertions(+)
 create mode 100644 src/gallium/drivers/radeon/radeon_vcn_enc.c
 create mode 100644 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c

diff --git a/src/gallium/drivers/radeon/Makefile.sources 
b/src/gallium/drivers/radeon/Makefile.sources
index 22de129..0871666 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -13,6 +13,9 @@ C_SOURCES := \
radeon_uvd.h \
radeon_vcn_dec.c \
radeon_vcn_dec.h \
+   radeon_vcn_enc.c \
+   radeon_vcn_enc_1_2.c \
+   radeon_vcn_enc.h \
radeon_vce_40_2_2.c \
radeon_vce_50.c \
radeon_vce_52.c \
diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc.c
new file mode 100644
index 000..437c2fc
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -0,0 +1,166 @@
+/**
+ *
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+
+#include 
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_vcn_enc.h"
+
+static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct 
pipe_h264_enc_picture_desc *pic)
+{
+   enc->enc_pic.picture_type = pic->picture_type;
+   enc->enc_pic.frame_num = pic->frame_num;
+   enc->enc_pic.pic_order_cnt = pic->pic_order_cnt;
+   enc->enc_pic.pic_order_cnt_type = pic->pic_order_cnt_type;
+   enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
+   enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
+   enc->enc_pic.not_referenced = pic->not_referenced;
+   enc->enc_pic.is_idr = pic->is_idr;
+   enc->enc_pic.crop_left = 0;
+   enc->enc_pic.crop_right = (align(enc->base.width, 16) - 
enc->base.width) / 2;
+   enc->enc_pic.crop_top = 0;
+   enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - 
enc->base.height) / 2;
+}
+
+static void flush(struct radeon_encoder *enc)
+{
+   enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL);
+}
+
+static void radeon_enc_flush(struct pipe_video_codec *encoder)
+{
+   struct radeon_encoder *enc = (struct radeon_encoder*)encoder;
+   flush(enc);
+}
+
+static void radeon_enc_cs_flush(void *ctx, unsigned flags,
+   struct 
pipe_fence_handle **fence)
+{
+   // just ignored
+}
+
+static unsigned get_cpb_num(struct radeon_encoder *enc)
+{
+   unsigned w = align(enc->base.width, 16) / 16;
+   unsigned h = align(enc->base.height, 16) / 16;
+   unsigned dpb;
+
+   switch (enc->base.level) {
+   case 10:
+   dpb = 396;
+   break;
+   case 11:
+   dpb = 900;
+   break;
+   case 12:
+   case 13:
+   case 20:
+   dpb = 2376;
+   break;
+   case 21:
+   dpb = 4752;
+   break;
+   case 22:
+   case 30:
+   dpb = 8100;
+   break;
+   case 31:
+   dpb = 18000;
+   break;
+   case 32:
+   dpb = 20480;
+   break;
+   case 40:
+   case 41:
+   dpb = 32768;
+   break;
+   case 42:
+   dpb = 34816;
+   

[Mesa-dev] [PATCH 14/18] radeon/vcn: add encode destroy

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vcn_enc.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc.c
index a21deb1..0cb8e87 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -180,7 +180,21 @@ static void radeon_enc_end_frame(struct pipe_video_codec 
*encoder,
 
 static void radeon_enc_destroy(struct pipe_video_codec *encoder)
 {
-   /* TODO*/
+   struct radeon_encoder *enc = (struct radeon_encoder*)encoder;
+
+   if (enc->stream_handle) {
+   struct rvid_buffer fb;
+   enc->need_feedback = false;
+   si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
+   enc->fb = &fb;
+   enc->destroy(enc);
+   flush(enc);
+   si_vid_destroy_buffer(&fb);
+   }
+
+   si_vid_destroy_buffer(&enc->cpb);
+   enc->ws->cs_destroy(enc->cs);
+   FREE(enc);
 }
 
 static void radeon_enc_get_feedback(struct pipe_video_codec *encoder,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/18] radeon/vcn: add encode header implementations

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 234 
 1 file changed, 234 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
index 5170c67..c6dc420 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
@@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct 
radeon_encoder *enc)
RADEON_ENC_END();
 }
 
+static void radeon_enc_nalu_sps(struct radeon_encoder *enc)
+{
+   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
+   RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);
+   uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+   radeon_enc_reset(enc);
+   radeon_enc_set_emulation_prevention(enc, false);
+   radeon_enc_code_fixed_bits(enc, 0x0001, 32);
+   radeon_enc_code_fixed_bits(enc, 0x67, 8);
+   radeon_enc_byte_align(enc);
+   radeon_enc_set_emulation_prevention(enc, true);
+   radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);
+   radeon_enc_code_fixed_bits(enc, 0x04, 8);
+   radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.level_idc, 8);
+   radeon_enc_code_ue(enc, 0x0);
+
+   if(enc->enc_pic.spec_misc.profile_idc == 100 || 
enc->enc_pic.spec_misc.profile_idc == 110 || enc->enc_pic.spec_misc.profile_idc 
== 122 ||
+   enc->enc_pic.spec_misc.profile_idc == 244 || 
enc->enc_pic.spec_misc.profile_idc == 44 || enc->enc_pic.spec_misc.profile_idc 
== 83 ||
+   enc->enc_pic.spec_misc.profile_idc == 86 || 
enc->enc_pic.spec_misc.profile_idc == 118 || enc->enc_pic.spec_misc.profile_idc 
== 128 ||
+   enc->enc_pic.spec_misc.profile_idc == 138) {
+   radeon_enc_code_ue(enc, 0x1);
+   radeon_enc_code_ue(enc, 0x0);
+   radeon_enc_code_ue(enc, 0x0);
+   radeon_enc_code_fixed_bits(enc, 0x0, 2);
+   }
+
+   radeon_enc_code_ue(enc, 1);
+   radeon_enc_code_ue(enc, enc->enc_pic.pic_order_cnt_type);
+
+   if (enc->enc_pic.pic_order_cnt_type == 0)
+   radeon_enc_code_ue(enc, 1);
+
+   radeon_enc_code_ue(enc, (enc->base.max_references + 1));
+   radeon_enc_code_fixed_bits(enc, 
enc->enc_pic.layer_ctrl.max_num_temporal_layers > 1 ? 0x1 : 0x0, 1);
+   radeon_enc_code_ue(enc, 
(enc->enc_pic.session_init.aligned_picture_width / 16 - 1));
+   radeon_enc_code_ue(enc, 
(enc->enc_pic.session_init.aligned_picture_height / 16 - 1));
+   bool progressive_only = true;
+   radeon_enc_code_fixed_bits(enc, progressive_only ? 0x1 : 0x0, 1);
+
+   if (!progressive_only)
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+
+   if ((enc->enc_pic.crop_left != 0) || (enc->enc_pic.crop_right != 0) ||
+   (enc->enc_pic.crop_top != 0) || 
(enc->enc_pic.crop_bottom != 0)) {
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+   radeon_enc_code_ue(enc, enc->enc_pic.crop_left);
+   radeon_enc_code_ue(enc, enc->enc_pic.crop_right);
+   radeon_enc_code_ue(enc, enc->enc_pic.crop_top);
+   radeon_enc_code_ue(enc, enc->enc_pic.crop_bottom);
+   } else
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+   radeon_enc_code_ue(enc, 0x0);
+   radeon_enc_code_ue(enc, 0x0);
+   radeon_enc_code_ue(enc, 16);
+   radeon_enc_code_ue(enc, 16);
+   radeon_enc_code_ue(enc, 0x0);
+   radeon_enc_code_ue(enc, (enc->base.max_references + 1));
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+   radeon_enc_byte_align(enc);
+   radeon_enc_flush_headers(enc);
+   *size_in_bytes = (enc->bits_output + 7) / 8;
+   RADEON_ENC_END();
+}
+
+static void radeon_enc_nalu_pps(struct radeon_encoder *enc)
+{
+   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
+   RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS);
+   uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
+   radeon_enc_reset(enc);
+   radeon_enc_set_emulation_prevention(enc, false);
+   radeon_enc_code_fixed_bits(enc, 0x0001, 32);
+   radeon_enc_code_fixed_bits(enc, 0x68, 8);
+   radeon_enc_byte_align(enc);
+   radeon_enc_set_emulatio

[Mesa-dev] [PATCH 02/18] radeon/winsys: add vcn enc ring type

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_winsys.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index 2d3f646..a14e6e3 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -78,6 +78,7 @@ enum ring_type {
 RING_VCE,
 RING_UVD_ENC,
 RING_VCN_DEC,
+RING_VCN_ENC,
 RING_LAST,
 };
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/18] radeon/common: add vcn enc ip info query

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/amd/common/ac_gpu_info.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 2e56012..b0c0a08 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -98,7 +98,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 {
struct amdgpu_buffer_size_alignments alignment_info = {};
struct amdgpu_heap_info vram, vram_vis, gtt;
-   struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = 
{}, vcn_dec = {};
+   struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = 
{}, vcn_dec = {}, vcn_enc = {};
uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature 
= 0;
int r, i, j;
drmDevicePtr devinfo;
@@ -174,6 +174,14 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
}
}
 
+   if (info->drm_major == 3 && info->drm_minor >= 17) {
+   r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_ENC, 0, 
&vcn_enc);
+   if (r) {
+   fprintf(stderr, "amdgpu: 
amdgpu_query_hw_ip_info(vcn_dec) failed.\n");
+   return false;
+   }
+   }
+
r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0,
&info->me_fw_version,
&info->me_fw_feature);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/18] radeon/vcn: add vcn encode interface

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vcn_enc.h | 325 
 1 file changed, 325 insertions(+)
 create mode 100644 src/gallium/drivers/radeon/radeon_vcn_enc.h

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.h 
b/src/gallium/drivers/radeon/radeon_vcn_enc.h
new file mode 100644
index 000..a58ff6b
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.h
@@ -0,0 +1,325 @@
+/**
+ *
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+   + * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+   + * next paragraph) shall be included in all copies or 
substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+
+#ifndef _RADEON_VCN_ENC_H
+#define _RADEON_VCN_ENC_H
+
+#define RENCODE_FW_INTERFACE_MAJOR_VERSION 1
+#define RENCODE_FW_INTERFACE_MINOR_VERSION 2
+
+#define RENCODE_IB_PARAM_SESSION_INFO  0x0001
+#define RENCODE_IB_PARAM_TASK_INFO 0x0002
+#define RENCODE_IB_PARAM_SESSION_INIT  0x0003
+#define RENCODE_IB_PARAM_LAYER_CONTROL 0x0004
+#define RENCODE_IB_PARAM_LAYER_SELECT  0x0005
+#define RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x0006
+#define RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT   0x0007
+#define RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE  0x0008
+#define RENCODE_IB_PARAM_QUALITY_PARAMS0x0009
+#define RENCODE_IB_PARAM_SLICE_HEADER  0x000a
+#define RENCODE_IB_PARAM_ENCODE_PARAMS 0x000b
+#define RENCODE_IB_PARAM_INTRA_REFRESH 0x000c
+#define RENCODE_IB_PARAM_ENCODE_CONTEXT_BUFFER 0x000d
+#define RENCODE_IB_PARAM_VIDEO_BITSTREAM_BUFFER0x000e
+#define RENCODE_IB_PARAM_FEEDBACK_BUFFER   0x0010
+#define RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU0x0020
+
+#define RENCODE_H264_IB_PARAM_SLICE_CONTROL0x0021
+#define RENCODE_H264_IB_PARAM_SPEC_MISC0x0022
+#define RENCODE_H264_IB_PARAM_ENCODE_PARAMS0x0023
+#define RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER0x0024
+
+#define RENCODE_IB_OP_INITIALIZE   0x0101
+#define RENCODE_IB_OP_CLOSE_SESSION0x0102
+#define RENCODE_IB_OP_ENCODE   0x0103
+#define RENCODE_IB_OP_INIT_RC  0x0104
+#define RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL 0x0105
+#define RENCODE_IB_OP_SET_SPEED_ENCODING_MODE  0x0106
+#define RENCODE_IB_OP_SET_BALANCE_ENCODING_MODE0x0107
+#define RENCODE_IB_OP_SET_QUALITY_ENCODING_MODE0x0108
+
+#define RENCODE_IF_MAJOR_VERSION_MASK  0x
+#define RENCODE_IF_MAJOR_VERSION_SHIFT 16
+#define RENCODE_IF_MINOR_VERSION_MASK  0x
+#define RENCODE_IF_MINOR_VERSION_SHIFT 0
+
+#define RENCODE_ENCODE_STANDARD_H264   1
+
+#define RENCODE_PREENCODE_MODE_NONE0x
+#define RENCODE_PREENCODE_MODE_1X  0x0001
+#define RENCODE_PREENCODE_MODE_2X  0x0002
+#define RENCODE_PREENCODE_MODE_4X  0x0004
+
+#define RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS  0x
+#define RENCODE_H264_SLICE_CONTROL_MODE_FIXED_BITS 0x0001
+
+#define RENCODE_RATE_CONTROL_METHOD_NONE  

[Mesa-dev] [PATCH 16/18] radeon/vcn: add create encoder

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vcn_enc.c | 82 -
 1 file changed, 81 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc.c
index ab68039..0819b6f 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -221,7 +221,87 @@ struct pipe_video_codec *radeon_create_encoder(struct 
pipe_context *context,
struct radeon_winsys* ws,
radeon_enc_get_buffer get_buffer)
 {
-   /* TODO*/
+   struct r600_common_screen *rscreen = (struct r600_common_screen 
*)context->screen;
+   struct r600_common_context *rctx = (struct r600_common_context*)context;
+   struct radeon_encoder *enc;
+   struct pipe_video_buffer *tmp_buf, templat = {};
+   struct radeon_surf *tmp_surf;
+   unsigned cpb_size;
+
+   enc = CALLOC_STRUCT(radeon_encoder);
+
+   if (!enc)
+   return NULL;
+
+   enc->alignment = 256;
+   enc->base = *templ;
+   enc->base.context = context;
+   enc->base.destroy = radeon_enc_destroy;
+   enc->base.begin_frame = radeon_enc_begin_frame;
+   enc->base.encode_bitstream = radeon_enc_encode_bitstream;
+   enc->base.end_frame = radeon_enc_end_frame;
+   enc->base.flush = radeon_enc_flush;
+   enc->base.get_feedback = radeon_enc_get_feedback;
+   enc->get_buffer = get_buffer;
+   enc->bits_in_shifter = 0;
+   enc->screen = context->screen;
+   enc->ws = ws;
+   enc->cs = ws->cs_create(rctx->ctx, RING_VCN_ENC, radeon_enc_cs_flush, 
enc);
+
+   if (!enc->cs) {
+   RVID_ERR("Can't get command submission context.\n");
+   goto error;
+   }
+
+   struct rvid_buffer si;
+   si_vid_create_buffer(enc->screen, &si, 128 * 1024, PIPE_USAGE_STAGING);
+   enc->si = &si;
+
+   templat.buffer_format = PIPE_FORMAT_NV12;
+   templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+   templat.width = enc->base.width;
+   templat.height = enc->base.height;
+   templat.interlaced = false;
+
+   if (!(tmp_buf = context->create_video_buffer(context, &templat))) {
+   RVID_ERR("Can't create video buffer.\n");
+   goto error;
+   }
+
+   enc->cpb_num = get_cpb_num(enc);
+
+   if (!enc->cpb_num)
+   goto error;
+
+   get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, 
&tmp_surf);
+
+   cpb_size = (rscreen->chip_class < GFX9) ?
+  align(tmp_surf->u.legacy.level[0].nblk_x * 
tmp_surf->bpe, 128) *
+  align(tmp_surf->u.legacy.level[0].nblk_y, 32) :
+  align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 
256) *
+  align(tmp_surf->u.gfx9.surf_height, 32);
+
+   cpb_size = cpb_size * 3 / 2;
+   cpb_size = cpb_size * enc->cpb_num;
+   tmp_buf->destroy(tmp_buf);
+
+   if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, 
PIPE_USAGE_DEFAULT)) {
+   RVID_ERR("Can't create CPB buffer.\n");
+   goto error;
+   }
+
+   radeon_enc_1_2_init(enc);
+
+   return &enc->base;
+
+error:
+   if (enc->cs)
+   enc->ws->cs_destroy(enc->cs);
+
+   si_vid_destroy_buffer(&enc->cpb);
+
+   FREE(enc);
+   return NULL;
 }
 
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/18] radeon/vcn: add encode header algorithms

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vcn_enc.h |   6 ++
 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 121 
 2 files changed, 127 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.h 
b/src/gallium/drivers/radeon/radeon_vcn_enc.h
index cbdf9c0..651502c 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.h
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.h
@@ -396,9 +396,15 @@ struct radeon_encoder {
struct radeon_enc_h264_enc_pic  enc_pic;
 
unsignedalignment;
+   unsignedshifter;
+   unsignedbits_in_shifter;
+   unsignednum_zeros;
+   unsignedbyte_index;
+   unsignedbits_output;
uint32_ttotal_task_size;
uint32_t*   p_task_size;
 
+   boolemulation_prevention;
boolneed_feedback;
 };
 
diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
index f75f3d6..5170c67 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
@@ -49,6 +49,7 @@ RADEON_ENC_CS(cmd)
enc->total_task_size += *begin;}
 
 static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
+static const unsigned index_to_shifts[4] = {24, 16, 8, 0};
 
 static void radeon_enc_add_buffer(struct radeon_encoder *enc, struct pb_buffer 
*buf,
  enum 
radeon_bo_usage usage, enum radeon_bo_domain domain,
@@ -63,6 +64,126 @@ static void radeon_enc_add_buffer(struct radeon_encoder 
*enc, struct pb_buffer *
RADEON_ENC_CS(addr);
 }
 
+static void radeon_enc_set_emulation_prevention(struct radeon_encoder *enc, 
bool set)
+{
+   if (set != enc->emulation_prevention) {
+   enc->emulation_prevention = set;
+   enc->num_zeros = 0;
+   }
+}
+
+static void radeon_enc_output_one_byte(struct radeon_encoder *enc, unsigned 
char byte)
+{
+   if (enc->byte_index == 0)
+   enc->cs->current.buf[enc->cs->current.cdw] = 0;
+   enc->cs->current.buf[enc->cs->current.cdw] |= ((unsigned int)(byte) << 
index_to_shifts[enc->byte_index]);
+   enc->byte_index++;
+
+   if (enc->byte_index >= 4) {
+   enc->byte_index = 0;
+   enc->cs->current.cdw++;
+   }
+}
+
+static void radeon_enc_emulation_prevention(struct radeon_encoder *enc, 
unsigned char byte)
+{
+   if(enc->emulation_prevention) {
+   if((enc->num_zeros >= 2) && ((byte == 0x00) || (byte == 0x01) 
|| (byte == 0x03))) {
+radeon_enc_output_one_byte(enc, 0x03);
+enc->bits_output += 8;
+enc->num_zeros = 0;
+}
+enc->num_zeros = (byte == 0 ? (enc->num_zeros + 1) : 0);
+}
+}
+
+static void radeon_enc_code_fixed_bits(struct radeon_encoder *enc, unsigned 
int value, unsigned int num_bits)
+{
+   unsigned int bits_to_pack = 0;
+
+   while(num_bits > 0) {
+   unsigned int value_to_pack = value & (0x >> (32 - 
num_bits));
+   bits_to_pack = num_bits > (32 - enc->bits_in_shifter) ? (32 - 
enc->bits_in_shifter) : num_bits;
+
+   if (bits_to_pack < num_bits)
+   value_to_pack = value_to_pack >> (num_bits - 
bits_to_pack);
+
+   enc->shifter |= value_to_pack << (32 - enc->bits_in_shifter - 
bits_to_pack);
+   num_bits -= bits_to_pack;
+   enc->bits_in_shifter += bits_to_pack;
+
+   while(enc->bits_in_shifter >= 8) {
+   unsigned char output_byte = (unsigned 
char)(enc->shifter >> 24);
+   enc->shifter <<= 8;
+   radeon_enc_emulation_prevention(enc, output_byte);
+   radeon_enc_output_one_byte(enc, output_byte);
+   enc->bits_in_shifter -= 8;
+   enc->bits_output += 8;
+   }
+   }
+}
+
+static void radeon_enc_reset(struct radeon_encoder *enc)
+{
+   enc->emulation_prevention = false;
+   enc->shifter = 0;
+   enc->bits_in_shifter = 0;
+   enc->bits_output = 0;
+   enc->num_zeros = 0;
+   enc->byte_index = 0;
+}
+
+static void radeon_enc_byte_align(struct radeon_encoder *enc)
+{
+   unsigned int num_padding_zeros = (32 - enc->bits_in_shifter) % 8;
+
+   if (num_padding_zeros > 0)
+   radeon_enc_code_fixed_bits(enc, 0, num_padding_zeros);
+}
+
+static void radeon_enc_flush_headers(struct radeon_encoder *enc)
+{
+   if (enc->bits_in_shifter != 0) {
+   unsigned char output_byte = (unsigned char)(enc->shifter >> 24);
+   radeon_enc_emulation_prevention(enc, outp

[Mesa-dev] [PATCH 17/18] radeonsi: enable vcn encode

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeonsi/si_uvd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_uvd.c 
b/src/gallium/drivers/radeonsi/si_uvd.c
index 2fc5e30..09fdb23 100644
--- a/src/gallium/drivers/radeonsi/si_uvd.c
+++ b/src/gallium/drivers/radeonsi/si_uvd.c
@@ -30,6 +30,7 @@
 #include "radeon/radeon_uvd.h"
 #include "radeon/radeon_vce.h"
 #include "radeon/radeon_vcn_dec.h"
+#include "radeon/radeon_vcn_enc.h"
 
 /**
  * creates an video buffer with an UVD compatible memory layout
@@ -146,7 +147,8 @@ struct pipe_video_codec *si_uvd_create_decoder(struct 
pipe_context *context,
bool vcn = (ctx->b.family == CHIP_RAVEN) ? true : false;
 
if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)
-   return si_vce_create_encoder(context, templ, ctx->b.ws, 
si_vce_get_buffer);
+   return (vcn) ? radeon_create_encoder(context, templ, ctx->b.ws, 
si_vce_get_buffer) :
+   si_vce_create_encoder(context, templ, ctx->b.ws, 
si_vce_get_buffer);
 
return (vcn) ?  radeon_create_decoder(context, templ) :
si_common_uvd_create_decoder(context, templ, si_uvd_set_dtb);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/18] st/va: implement poc type

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/state_trackers/va/picture.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/state_trackers/va/picture.c 
b/src/gallium/state_trackers/va/picture.c
index 7427b98..55ca16e 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -396,6 +396,7 @@ handleVAEncSequenceParameterBufferType(vlVaDriver *drv, 
vlVaContext *context, vl
context->desc.h264enc.gop_size = h264->intra_idr_period * 
context->gop_coeff;
context->desc.h264enc.rate_ctrl.frame_rate_num = h264->time_scale / 2;
context->desc.h264enc.rate_ctrl.frame_rate_den = h264->num_units_in_tick;
+   context->desc.h264enc.pic_order_cnt_type = 
h264->seq_fields.bits.pic_order_cnt_type;
return VA_STATUS_SUCCESS;
 }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/18] vl: add poc type

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/include/pipe/p_video_state.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/include/pipe/p_video_state.h 
b/src/gallium/include/pipe/p_video_state.h
index abd5d36..1d57165 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -400,6 +400,7 @@ struct pipe_h264_enc_picture_desc
unsigned idr_pic_id;
unsigned gop_cnt;
unsigned pic_order_cnt;
+   unsigned pic_order_cnt_type;
unsigned ref_idx_l0;
unsigned ref_idx_l1;
unsigned gop_size;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/18] radeon/vcn: add encode end frame

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vcn_enc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc.c
index 053cf0e..a21deb1 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -174,7 +174,8 @@ static void radeon_enc_end_frame(struct pipe_video_codec 
*encoder,
   struct pipe_video_buffer 
*source,
   struct pipe_picture_desc 
*picture)
 {
-   /* TODO*/
+   struct radeon_encoder *enc = (struct radeon_encoder*)encoder;
+   flush(enc);
 }
 
 static void radeon_enc_destroy(struct pipe_video_codec *encoder)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/18] radeon/vcn: add encode begin frame

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vcn_enc.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc.c
index 437c2fc..51cef7f 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -126,7 +126,28 @@ static void radeon_enc_begin_frame(struct pipe_video_codec 
*encoder,
 struct 
pipe_video_buffer *source,
 struct 
pipe_picture_desc *picture)
 {
-   /* TODO*/
+   struct radeon_encoder *enc = (struct radeon_encoder*)encoder;
+   struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;
+   struct pipe_h264_enc_picture_desc *pic = (struct 
pipe_h264_enc_picture_desc *)picture;
+
+   radeon_vcn_enc_get_param(enc, pic);
+
+   enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
+   enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
+
+   enc->need_feedback = false;
+
+   if (!enc->stream_handle) {
+   struct rvid_buffer fb;
+   enc->stream_handle = si_vid_alloc_stream_handle();
+   enc->si = CALLOC_STRUCT(rvid_buffer);
+   si_vid_create_buffer(enc->screen, enc->si, 128 * 1024, 
PIPE_USAGE_STAGING);
+   si_vid_create_buffer(enc->screen, &fb, 4096, 
PIPE_USAGE_STAGING);
+   enc->fb = &fb;
+   enc->begin(enc, pic);
+   flush(enc);
+   si_vid_destroy_buffer(&fb);
+   }
 }
 
 static void radeon_enc_encode_bitstream(struct pipe_video_codec *encoder,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/18] radeon/vcn: add ib implementations

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 292 ++--
 1 file changed, 268 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
index ffd1155..f75f3d6 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
@@ -65,121 +65,365 @@ static void radeon_enc_add_buffer(struct radeon_encoder 
*enc, struct pb_buffer *
 
 static void radeon_enc_session_info(struct radeon_encoder *enc)
 {
-   /* TODO*/
+   unsigned int interface_version = ((RENCODE_FW_INTERFACE_MAJOR_VERSION 
<< RENCODE_IF_MAJOR_VERSION_SHIFT) |
+ 
(RENCODE_FW_INTERFACE_MINOR_VERSION << RENCODE_IF_MINOR_VERSION_SHIFT));
+   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_SESSION_INFO);
+   RADEON_ENC_CS(interface_version);
+   RADEON_ENC_READWRITE(enc->si->res->buf, enc->si->res->domains, 0x0);
+   RADEON_ENC_END();
 }
 
 static void radeon_enc_task_info(struct radeon_encoder *enc, bool 
need_feedback)
 {
-   /* TODO*/
+   enc->enc_pic.task_info.task_id++;
+
+   if (need_feedback)
+   enc->enc_pic.task_info.allowed_max_num_feedbacks = 1;
+   else
+   enc->enc_pic.task_info.allowed_max_num_feedbacks = 0;
+
+   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_TASK_INFO);
+   enc->p_task_size = &enc->cs->current.buf[enc->cs->current.cdw++];
+   RADEON_ENC_CS(enc->enc_pic.task_info.task_id);
+   RADEON_ENC_CS(enc->enc_pic.task_info.allowed_max_num_feedbacks);
+   RADEON_ENC_END();
 }
 
 static void radeon_enc_session_init(struct radeon_encoder *enc)
 {
-   /* TODO*/
+   enc->enc_pic.session_init.encode_standard = 
RENCODE_ENCODE_STANDARD_H264;
+   enc->enc_pic.session_init.aligned_picture_width = 
align(enc->base.width, 16);
+   enc->enc_pic.session_init.aligned_picture_height = 
align(enc->base.height, 16);
+   enc->enc_pic.session_init.padding_width = 
enc->enc_pic.session_init.aligned_picture_width - enc->base.width;
+   enc->enc_pic.session_init.padding_height = 
enc->enc_pic.session_init.aligned_picture_height - enc->base.height;
+   enc->enc_pic.session_init.pre_encode_mode = RENCODE_PREENCODE_MODE_NONE;
+   enc->enc_pic.session_init.pre_encode_chroma_enabled = false;
+
+   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_SESSION_INIT);
+   RADEON_ENC_CS(enc->enc_pic.session_init.encode_standard);
+   RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width);
+   RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height);
+   RADEON_ENC_CS(enc->enc_pic.session_init.padding_width);
+   RADEON_ENC_CS(enc->enc_pic.session_init.padding_height);
+   RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode);
+   RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled);
+   RADEON_ENC_END();
 }
 
 static void radeon_enc_layer_control(struct radeon_encoder *enc)
 {
-   /* TODO*/
+   enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1;
+   enc->enc_pic.layer_ctrl.num_temporal_layers = 1;
+
+   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_LAYER_CONTROL);
+   RADEON_ENC_CS(enc->enc_pic.layer_ctrl.max_num_temporal_layers);
+   RADEON_ENC_CS(enc->enc_pic.layer_ctrl.num_temporal_layers);
+   RADEON_ENC_END();
 }
 
 static void radeon_enc_layer_select(struct radeon_encoder *enc)
 {
-   /* TODO*/
+   enc->enc_pic.layer_sel.temporal_layer_index = 0;
+
+   RADEON_ENC_BEGIN(RENCODE_IB_PARAM_LAYER_SELECT);
+   RADEON_ENC_CS(enc->enc_pic.layer_sel.temporal_layer_index);
+   RADEON_ENC_END();
 }
 
 static void radeon_enc_slice_control(struct radeon_encoder *enc)
 {
-   /* TODO*/
+   enc->enc_pic.slice_ctrl.slice_control_mode = 
RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS;
+   enc->enc_pic.slice_ctrl.num_mbs_per_slice = align(enc->base.width, 16) 
/ 16 * align(enc->base.height, 16) / 16;
+
+   RADEON_ENC_BEGIN(RENCODE_H264_IB_PARAM_SLICE_CONTROL);
+   RADEON_ENC_CS(enc->enc_pic.slice_ctrl.slice_control_mode);
+   RADEON_ENC_CS(enc->enc_pic.slice_ctrl.num_mbs_per_slice);
+   RADEON_ENC_END();
 }
 
 static void radeon_enc_spec_misc(struct radeon_encoder *enc)
 {
-   /* TODO*/
+   enc->enc_pic.spec_misc.constrained_intra_pred_flag = 0;
+   enc->enc_pic.spec_misc.cabac_enable = 0;
+   enc->enc_pic.spec_misc.cabac_init_idc = 0;
+   enc->enc_pic.spec_misc.half_pel_enabled = 1;
+   enc->enc_pic.spec_misc.quarter_pel_enabled = 1;
+   enc->enc_pic.spec_misc.profile_idc = profiles[enc->base.profile - 
PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE];
+   enc->enc_pic.spec_misc.level_idc = enc->base.level;
+
+   RADEON_ENC_BEGIN(RENCODE_H264_IB_PARAM_SPEC_MISC);
+   RADEON_ENC_CS(enc->enc_pic.spec_misc.constrained_intra_pred_flag);
+   RADEON_ENC_CS(enc->enc_pic.spe

[Mesa-dev] [PATCH 18/18] radeon/video: enable encode support for raven

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_video.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_video.c 
b/src/gallium/drivers/radeon/radeon_video.c
index ea76ca1..4edd0a4 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -230,7 +230,8 @@ int si_vid_get_video_param(struct pipe_screen *screen,
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
-   si_vce_is_fw_version_supported(rscreen);
+   (si_vce_is_fw_version_supported(rscreen) ||
+   rscreen->family == CHIP_RAVEN);
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/18] radeon/vcn: add encode bitstream

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vcn_enc.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc.c
index 51cef7f..053cf0e 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -155,7 +155,19 @@ static void radeon_enc_encode_bitstream(struct 
pipe_video_codec *encoder,
  struct 
pipe_resource *destination,
  void **fb)
 {
-   /* TODO*/
+   struct radeon_encoder *enc = (struct radeon_encoder*)encoder;
+   enc->get_buffer(destination, &enc->bs_handle, NULL);
+   enc->bs_size = destination->width0;
+
+   *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
+
+   if (!si_vid_create_buffer(enc->screen, enc->fb, 4096, 
PIPE_USAGE_STAGING)) {
+   RVID_ERR("Can't create feedback buffer.\n");
+   return;
+   }
+
+   enc->need_feedback = true;
+   enc->encode(enc);
 }
 
 static void radeon_enc_end_frame(struct pipe_video_codec *encoder,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/18] radeon/vcn: add encode get feedback

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vcn_enc.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c 
b/src/gallium/drivers/radeon/radeon_vcn_enc.c
index 0cb8e87..ab68039 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -200,7 +200,20 @@ static void radeon_enc_destroy(struct pipe_video_codec 
*encoder)
 static void radeon_enc_get_feedback(struct pipe_video_codec *encoder,
  void *feedback, 
unsigned *size)
 {
-   /* TODO*/
+   struct radeon_encoder *enc = (struct radeon_encoder*)encoder;
+   struct rvid_buffer *fb = feedback;
+
+   if (size) {
+   uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, 
PIPE_TRANSFER_READ_WRITE);
+   if (ptr[1])
+   *size = ptr[6];
+   else
+   *size = 0;
+   enc->ws->buffer_unmap(fb->res->buf);
+   }
+
+   si_vid_destroy_buffer(fb);
+   FREE(fb);
 }
 
 struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/18] winsys/amdgpu: add vcn enc cs support

2017-11-07 Thread boyuan.zhang
From: Boyuan Zhang 

Signed-off-by: Boyuan Zhang 
---
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 0b47af9..75e1891 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -25,6 +25,10 @@
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
  */
+/*
+ * Authors:
+ *  Marek Olšák 
+ */
 
 #include "amdgpu_cs.h"
 #include "os/os_time.h"
@@ -326,7 +330,8 @@ static bool amdgpu_cs_has_user_fence(struct 
amdgpu_cs_context *cs)
 {
return cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD &&
   cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCE &&
-  cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC;
+  cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC &&
+  cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC;
 }
 
 static bool amdgpu_cs_has_chaining(struct amdgpu_cs *cs)
@@ -779,6 +784,10 @@ static bool amdgpu_init_cs_context(struct 
amdgpu_cs_context *cs,
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_DEC;
   break;
 
+  case RING_VCN_ENC:
+  cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_ENC;
+  break;
+
default:
case RING_GFX:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_GFX;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] amd/addrlib: update to latest version

2017-11-07 Thread Marek Olšák
On Tue, Nov 7, 2017 at 9:01 PM, Nicolai Hähnle  wrote:
> On 07.11.2017 18:35, Michel Dänzer wrote:
>>
>> On 07/11/17 06:28 PM, Marek Olšák wrote:
>>>
>>> Hi,
>>>
>>> This patch is too large for the mailing list:
>>>
>>>
>>> https://cgit.freedesktop.org/~mareko/mesa/commit/?h=addrlib&id=0e0f044268d3c1af2e78f161aaa2d92c30167cc1
>>
>>
>>  From the commit log:
>>
>>> I just overwrote all Mesa files with internal addrlib and discarded
>>> hunks that we should probably keep, but I might have missed something.
>>
>>
>> FWIW, if a separate branch was used for importing addrlib changes, Git
>> could keep track of our changes to it in the Mesa tree.
>
>
> I concur in principle. In practice, I explored doing that, but the commit
> discipline on the internal addrlib repository is pretty crappy, so we'd end
> up having to massage commits anyway. Maybe we can find a sweet spot
> somewhere by updating slightly more regularly, perhaps once a month.

That's too much time-consuming work with no benefit. I used to do
that, but it sucked. I prefer 1 commit with everything - easy conflict
resolution, not having to rebase 60 commits that don't make sense.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 8/8] r600: add support for hw atomic counters. (v3)

2017-11-07 Thread Nicolai Hähnle

On 07.11.2017 19:38, Dave Airlie wrote:

On 8 November 2017 at 03:26, Nicolai Hähnle  wrote:

On 07.11.2017 07:31, Dave Airlie wrote:


From: Dave Airlie 

This adds support for the evergreen/cayman atomic counters.

These are implemented using GDS append/consume counters. The values
for each counter are loaded before drawing and saved after each draw
using special CP packets.



I admit I'm a bit confused by this at the hardware level.

My understanding of GDS is that it's mostly another copy of LDS (but
global), and all GDS instructions are atomic by default. There is extra
append-consume hardware, but it's main point is to support use cases where
operations have to be ordered by wave, or where a wave return is supposed to
be blocked (for producer/consumer kernels and ring buffer management).

So this should really work without the append/consume counters as well, just
with regular GDS memory. Is there a particular reason why you haven't done
that? I suppose it might require more stuff to manage GDS allocations in the
kernel, and if it works with this approach...


Because this is what the closed source driver did. I've pretty much
had to program
this from traces I took from it.

I don't want to diverge too much from what it did as debugging gets harder.

The append/consume counters should be faster than GDS, and it looks to be why
this extension was created in the first place.


The append/consume counters are physically part of the GDS block :)

But it's possible that they're better optimized for the case where all 
threads in a "wave" attempt to access the same counter. In GDS memory 
you'd quite likely get slowed down by non-stop bank conflicts, while the 
append counters might have special hardware to deal with the issue.


Cheers,
Nicolai




Acked-by: Nicolai Hähnle 


Thanks.
Dave.




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH v1 00/30] anv: dma-buf and DRM format modifiers

2017-11-07 Thread Jason Ekstrand
All of the pre-work patches have been reviewed by myself and Lionel.  I've
also read through the rest of the series and things look pretty good to
me.  I did make some scattered comments but they shouldn't be a big deal.

My primary concern with the series is the lack of CCS support.  Getting
that working correctly is clearly the biggest question mark in all of
modifiers so I'm hesitant to pass judgment on this as a patch series (I
think the spec is ok) with that piece still missing.

--Jason

On Tue, Nov 7, 2017 at 6:47 AM, Chad Versace 
wrote:

> Overview:
>
> This series implements 3 extensions:
>
> VK_EXT_external_memory_dma_buf
> VK_EXT_queue_family_foreign
> VK_EXT_image_drm_format_modifier
>
> The patch series lives on my tag 'chadv/review/anv-dma-buf-v01'.
> http://git.kiwitree.net/cgit/~chadv/mesa/log/?h=chadv/
> review/anv-dma-buf-v01
>
> The work-in-progress lives on my branch 'wip/anv-dma-buf'.
> http://git.kiwitree.net/cgit/~chadv/mesa/log/?h=wip/anv-dma-buf
>
> Specifications:
>
> For each extension, you can find a git branch of the Vulkan
> specification, as well as an online build of the spec, at
> http://kiwitree.net/~chadv/vulkan/.
>
> All 3 extension specifications are still drafts at various stages in
> the
> spec lifecycle.
>
> - VK_EXT_external_memory_dma_buf
>
>   I'll ask Khronos tomorrow morning (Wed 8 Nov) to merge this
>   extension. That would schedule it for publication no later
>   Fri 17 Nov, unless something goes wrong.
>
> - VK_EXT_queue_family_foreign
>
>   This extension is a slow, little thorn in my side. But it's
> needed
>   for spec correctness in the interaction among
>   VK_KHR_external_memory + VK_EXT_external_memory_dma_buf
>   + VK_EXT_image_drm_format_modifier. In anvil today, in this
> patch
>   series, it's implementation is a no-op.
>
>   In Khronos, the discussion on this extension is wrapping up.
>   I expect to ask Khronos to merge it no later than Wed 15 Nov, but
>   hopefully sooner.
>
> - VK_EXT_image_drm_format_modifier
>
>   This is the big extension in the series. Its API is complete, in
>   my opinion, and the specification language appears complete to
>   the untrained eye. But there remain a few loose ends in the spec
>   language that I need to finish before submitting it to Khronos.
>   I've documented all the loose ends in the TODO section of the
>   extensions's appendix [1].
>
>   [1]: http://git.kiwitree.net/cgit/~chadv/vulkan-spec/tree/doc/
> specs/vulkan/appendices/VK_EXT_image_drm_format_modifier.
> txt?h=1.0-VK_EXT_image_drm_format_modifier].
>
>   The API is complete, though, modulo review on mesa-dev. So please
>   proceed to review the extension language and the implementation.
>
> Testing:
>
> - vkcube
>
>   I've tested portions of VK_EXT_image_drm_format_modifier with
>   a hacked version of krh's vkcube [2]. The following checklist
>   shows what I've tested so far.
>
> [x] vkGetPhysicalDeviceFormatProperties2KHR
> [x] vkGetPhysicalDeviceImageFormatProperties2KHR
> [x] vkCreateImage
> [x] VkImageDrmFormatModifierListCreateInfoEXT
> [ ] VkImageExplicitDrmFormatModifierEXT
> [ ] Resolves of compression surfaces
> [x] vkGetImageDrmFormatModifierEXT
> [x] vkGetImageSubresouceLayout
>
>   [2]: vkcube: http://github.com/chadversary/
> vkcube/commits/wip/vk-drm-format-mods
>
> - vk-gl-cts
>
>   I'm doing a full run of dEQP-VK.*. I'm still waiting.
>
> - crucible
>
>   We really need to write crucible tests to hammer some tricky
>   corner cases.  I haven't written them yet. Volunteers?
>
> Chad Versace (30):
>   anv: Remove unused variable 'gen'
>   anv: Suffix anv-private 'VK' tokens with 'ANV'
>   anv: Refactor get_buffer_format_properties()
>   anv: Better types for 'aspect' function params
>   anv: Fix get_image_format_properties() - depthstencil (v2)
>   anv: Fix get_image_format_properties() - ASTC
>   anv: Refactor get_image_format_properties() - plane_format
>   anv: Refactor get_image_format_properties() - base_isl_format
>   anv: Refactor get_image_format_properties() - Reduce params
>   anv: Fix get_image_format_properties() - 3-channel formats
>   anv: Fix get_image_format_properties() - YCbCr
>   anv: Rename get_image_format_properties()
>   anv: Simplify anv_get_image_format_properties()
>   anv: Simplify anv_physical_device_get_format_properties()
>   anv: Remove anv_physical_device_get_format_properties()
>   anv: Refactor anv_get_format_plane() - explicit unsupported
>   anv/image: Refactor choice of isl_tiling_flags_t
>   anv: Refactor anv_GetImageSubresourceLayout()
>   HACK: vulkan: Update headers and registry to chadv/1.0-dma-buf@a79a0ab
>   HACK: vulkan: Install V

Re: [Mesa-dev] [RFC PATCH v1 27/30] RFC: anv: Support VkImageExplicitDrmFormatModifierCreateInfoEXT

2017-11-07 Thread Jason Ekstrand
On Tue, Nov 7, 2017 at 6:48 AM, Chad Versace 
wrote:

> Incremental implementation of VK_EXT_image_drm_format_modifier.
> ---
>  src/intel/vulkan/anv_image.c | 84 ++
> +-
>  1 file changed, 75 insertions(+), 9 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index ec6cdbc6168..bf636ce4b65 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -288,6 +288,7 @@ static VkResult
>  make_surface(const struct anv_device *dev,
>   struct anv_image *image,
>   const struct anv_image_create_info *anv_info,
> + const VkImageExplicitDrmFormatModifierCreateInfoEXT
> *explicit_drm_info,
>   isl_tiling_flags_t tiling_flags,
>   VkImageAspectFlagBits aspect)
>  {
> @@ -308,9 +309,16 @@ make_surface(const struct anv_device *dev,
>anv_get_format_plane(&dev->info, image->vk_format, aspect,
> image->tiling);
> struct anv_surface *anv_surf = &image->planes[plane].surface;
>
> +   const VkSubresourceLayout *drm_plane_layout = explicit_drm_info ?
> +  &explicit_drm_info->pPlaneLayouts[plane] : NULL;
> +
> const isl_surf_usage_flags_t usage =
>choose_isl_surf_usage(vk_info, anv_info->isl_extra_usage_flags,
> aspect);
>
> +   uint32_t row_pitch = anv_info->stride;
> +   if (explicit_drm_info)
> +  row_pitch = drm_plane_layout->rowPitch;
> +
> /* If an image is created as BLOCK_TEXEL_VIEW_COMPATIBLE, then we need
> to
>  * fall back to linear on Broadwell and earlier because we aren't
>  * guaranteed that we can handle offsets correctly.  On Sky Lake, the
> @@ -336,18 +344,71 @@ make_surface(const struct anv_device *dev,
>.array_len = vk_info->arrayLayers,
>.samples = vk_info->samples,
>.min_alignment = 0,
> -  .row_pitch = anv_info->stride,
> +  .row_pitch = row_pitch,
>.usage = usage,
>.tiling_flags = tiling_flags);
>
> -   /* isl_surf_init() will fail only if provided invalid input. Invalid
> input
> -* is illegal in Vulkan.
> -*/
> -   assert(ok);
> +   if (!ok) {
> +  /* isl_surf_init() fails only when provided invalid input. Invalid
> input
> +   * is illegal in Vulkan unless
> +   * VkImageExplicitDrmFormatModifierCreateInfoEXT is given.
> +   */
> +  assert(explicit_drm_info);
> +  return vk_errorf(dev->instance, dev,
> +   VK_ERROR_INVALID_DRM_FORMAT_
> MODIFIER_PLANE_LAYOUT_EXT,
> +   "isl_surf_init() failed for plane %u", plane);
> +   }
>
> -   image->planes[plane].aux_usage = ISL_AUX_USAGE_NONE;
> +   if (explicit_drm_info) {
> +  /* The VK_EXT_image_drm_format_modifier spec permits support of any
> +   * image, but we restrict support to simple images.
> +   */
> +  assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
> +  assert(image->type == VK_IMAGE_TYPE_2D);
> +  assert(image->array_size == 1);
> +  assert(image->samples == 1);
> +
> +  /* FINISHME: YCbCr images with DRM format modifiers */
> +  assert(!anv_get_format(image->vk_format)->can_ycbcr);
> +
> +  if (drm_plane_layout->size < anv_surf->isl.size) {
> + return vk_errorf(dev->instance, dev,
> +  VK_ERROR_INVALID_DRM_FORMAT_
> MODIFIER_PLANE_LAYOUT_EXT,
> +  "VkSubresourceLayout::size too small for plane
> %u", plane);
> +  }
>
> -   add_surface(image, anv_surf, plane);
> +  if (drm_plane_layout->offset & (anv_surf->isl.alignment - 1)) {
> + return vk_errorf(dev->instance, dev,
> +  VK_ERROR_INVALID_DRM_FORMAT_
> MODIFIER_PLANE_LAYOUT_EXT,
> +  "VkSubresourceLayout::offset misaligned for
> plane "
> +  "%u", plane);
> +  }
> +
> +  if (drm_plane_layout->arrayPitch != 0) {
> + return vk_errorf(dev->instance, dev,
> +  VK_ERROR_INVALID_DRM_FORMAT_
> MODIFIER_PLANE_LAYOUT_EXT,
> +  "VkSubresourceLayout::arrayPitch must be 0");
> +  }
> +
> +  if (drm_plane_layout->depthPitch != 0) {
> + return vk_errorf(dev->instance, dev,
> +  VK_ERROR_INVALID_DRM_FORMAT_
> MODIFIER_PLANE_LAYOUT_EXT,
> +  "VkSubresourceLayout::depthPitch must be 0");
> +  }
> +
> +  anv_surf->offset = drm_plane_layout->offset;
> +
> +  image->planes[plane].offset = drm_plane_layout->offset;
> +  image->planes[plane].alignment = anv_surf->isl.alignment;
> +  image->planes[plane].size = drm_plane_layout->size;
> +
> +  image->size = image->planes[plane].offset +
> image->planes[plane].size;
> +  image->alignment = image->planes[plane].alignment;
>

This all looks correct for now, but I'm having trouble seeing how CCS will
fit into all of this.


> +   } else {
> +  add_surface(image, anv_surf, plane);
> +   }
> +
> +   image->planes

Re: [Mesa-dev] [PATCH] radv: Don't expose heaps with 0 memory.

2017-11-07 Thread Bas Nieuwenhuizen
Hi Andres,

It is a fix, but yeah the changes are pretty extensive. I don't have a
strong opinion on this one.

- Bas

On Fri, Nov 3, 2017 at 4:10 PM, Andres Gomez  wrote:
> Bas, this commit landed without mentioning any specific stable queue.
>
> For 17.2, this is quite an extensive change and has several trivial
> conflicts. I think it is OK to merge if you really meant it for 17.2
> too, but just double checking.
>
> Thanks!
>
> On Wed, 2017-11-01 at 20:58 +0100, Bas Nieuwenhuizen wrote:
>> It confuses CTS. This pregenerates the heap info into the
>> physical device, so we can use it for translating contiguous
>> indices into our "standard" ones.
>>
>> This also makes the WSI a bit smarter in case the first preferred
>> heap does not exist.
>>
>> CC: 
>> ---
>>  src/amd/vulkan/radv_device.c  | 135 
>> ++
>>  src/amd/vulkan/radv_private.h |   3 +
>>  src/amd/vulkan/radv_wsi.c |  16 -
>>  3 files changed, 101 insertions(+), 53 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index 0c2f6fa6312..e95f3ee32af 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -104,6 +104,75 @@ get_chip_name(enum radeon_family family)
>>   }
>>  }
>>
>> +static void
>> +radv_physical_device_init_mem_types(struct radv_physical_device *device)
>> +{
>> + STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
>> + uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
>> +   device->rad_info.vram_vis_size);
>> +
>> + int vram_index = -1, visible_vram_index = -1, gart_index = -1;
>> + device->memory_properties.memoryHeapCount = 0;
>> + if (device->rad_info.vram_size - visible_vram_size > 0) {
>> + vram_index = device->memory_properties.memoryHeapCount++;
>> + device->memory_properties.memoryHeaps[vram_index] = 
>> (VkMemoryHeap) {
>> + .size = device->rad_info.vram_size - visible_vram_size,
>> + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
>> + };
>> + }
>> + if (visible_vram_size) {
>> + visible_vram_index = 
>> device->memory_properties.memoryHeapCount++;
>> + device->memory_properties.memoryHeaps[visible_vram_index] = 
>> (VkMemoryHeap) {
>> + .size = visible_vram_size,
>> + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
>> + };
>> + }
>> + if (device->rad_info.gart_size > 0) {
>> + gart_index = device->memory_properties.memoryHeapCount++;
>> + device->memory_properties.memoryHeaps[gart_index] = 
>> (VkMemoryHeap) {
>> + .size = device->rad_info.gart_size,
>> + .flags = 0,
>> + };
>> + }
>> +
>> + STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
>> + unsigned type_count = 0;
>> + if (vram_index >= 0) {
>> + device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
>> + device->memory_properties.memoryTypes[type_count++] = 
>> (VkMemoryType) {
>> + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
>> + .heapIndex = vram_index,
>> + };
>> + }
>> + if (gart_index >= 0) {
>> + device->mem_type_indices[type_count] = 
>> RADV_MEM_TYPE_GTT_WRITE_COMBINE;
>> + device->memory_properties.memoryTypes[type_count++] = 
>> (VkMemoryType) {
>> + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
>> + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
>> + .heapIndex = gart_index,
>> + };
>> + }
>> + if (visible_vram_index >= 0) {
>> + device->mem_type_indices[type_count] = 
>> RADV_MEM_TYPE_VRAM_CPU_ACCESS;
>> + device->memory_properties.memoryTypes[type_count++] = 
>> (VkMemoryType) {
>> + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
>> + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
>> + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
>> + .heapIndex = visible_vram_index,
>> + };
>> + }
>> + if (gart_index >= 0) {
>> + device->mem_type_indices[type_count] = 
>> RADV_MEM_TYPE_GTT_CACHED;
>> + device->memory_properties.memoryTypes[type_count++] = 
>> (VkMemoryType) {
>> + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
>> + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
>> + VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
>> + .heapIndex = gart_index,
>> + };
>> + }
>> + device->memory_properties.memoryTypeCount = type_count;
>> +}
>> +
>>  static VkResult
>>  radv_physical_device_init(struct radv_physical_device *device,
>> struct radv_instance *instance,
>> 

Re: [Mesa-dev] [RFC PATCH v1 25/30] RFC: anv: Support VkImageDrmFormatModifierListCreateInfoEXT

2017-11-07 Thread Jason Ekstrand
On Tue, Nov 7, 2017 at 6:48 AM, Chad Versace 
wrote:

> Incremental implementation of VK_EXT_image_drm_format_modifier.
> ---
>  src/intel/vulkan/anv_image.c   | 72 ++
> 
>  src/intel/vulkan/anv_private.h |  6 
>  2 files changed, 72 insertions(+), 6 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index ba932ba47c3..8d434293124 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -27,6 +27,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  #include "anv_private.h"
>  #include "util/debug.h"
> @@ -35,11 +36,12 @@
>  #include "vk_format_info.h"
>
>  static isl_surf_usage_flags_t
> -choose_isl_surf_usage(VkImageCreateFlags vk_create_flags,
> -  VkImageUsageFlags vk_usage,
> +choose_isl_surf_usage(const VkImageCreateInfo *vk_info,
>isl_surf_usage_flags_t isl_extra_usage,
>VkImageAspectFlagBits aspect)
>  {
> +   VkImageCreateFlags vk_create_flags = vk_info->flags;
> +   VkImageUsageFlags vk_usage = vk_info->usage;
> isl_surf_usage_flags_t isl_usage = isl_extra_usage;
>
> if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT)
> @@ -87,11 +89,15 @@ choose_isl_surf_usage(VkImageCreateFlags
> vk_create_flags,
>isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
> }
>
> +   if (vk_info->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
> +  isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT;
> +
> return isl_usage;
>  }
>
>  static isl_tiling_flags_t
> -choose_isl_tiling_flags(const struct anv_image_create_info *anv_info)
> +choose_isl_tiling_flags(const struct anv_image_create_info *anv_info,
> +const struct isl_drm_modifier_info *isl_mod_info)
>  {
> const VkImageCreateInfo *base_info = anv_info->vk_info;
> isl_tiling_flags_t flags = 0;
> @@ -100,11 +106,16 @@ choose_isl_tiling_flags(const struct
> anv_image_create_info *anv_info)
> default:
>unreachable("bad VkImageTiling");
> case VK_IMAGE_TILING_OPTIMAL:
> +  assert(isl_mod_info == NULL);
>flags = ISL_TILING_ANY_MASK;
>break;
> case VK_IMAGE_TILING_LINEAR:
> +  assert(isl_mod_info == NULL);
>flags = ISL_TILING_LINEAR_BIT;
>break;
> +   case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT:
> +  flags = 1 << isl_mod_info->tiling;
> +  break;
> }
>
> if (anv_info->isl_tiling_flags)
> @@ -298,8 +309,7 @@ make_surface(const struct anv_device *dev,
> struct anv_surface *anv_surf = &image->planes[plane].surface;
>
> const isl_surf_usage_flags_t usage =
> -  choose_isl_surf_usage(vk_info->flags, image->usage,
> -anv_info->isl_extra_usage_flags, aspect);
> +  choose_isl_surf_usage(vk_info, anv_info->isl_extra_usage_flags,
> aspect);
>
> /* If an image is created as BLOCK_TEXEL_VIEW_COMPATIBLE, then we need
> to
>  * fall back to linear on Broadwell and earlier because we aren't
> @@ -492,6 +502,38 @@ make_surface(const struct anv_device *dev,
> return VK_SUCCESS;
>  }
>
> +static uint32_t
> +score_drm_format_mod(uint64_t mod)
> +{
> +   switch (mod) {
> +   default: unreachable("bad DRM format modifier");
> +   case I915_FORMAT_MOD_Y_TILED: return 3;
> +   case I915_FORMAT_MOD_X_TILED: return 2;
> +   case DRM_FORMAT_MOD_LINEAR: return 1;
> +   }
> +}
> +
> +static const struct isl_drm_modifier_info *
> +choose_drm_format_mod(const VkImageDrmFormatModifierListCreateInfoEXT
> *mod_list)
> +{
> +   uint64_t best_mod = UINT64_MAX;
> +   uint32_t best_score = 0;
> +
> +   for (uint32_t i = 0; i < mod_list->drmFormatModifierCount; ++i) {
> +  uint64_t mod = mod_list->pDrmFormatModifiers[i];
> +  uint32_t score = score_drm_format_mod(mod);
> +
> +  if (score > best_score) {
> + best_mod = mod;
> + best_score = score;
> +  }
> +   }
> +
> +   assert(best_score != 0);
> +
> +   return isl_drm_modifier_get_info(best_mod);
> +}
>

We did things slightly differently in intel_screen.c but I like this better.


> +
>  VkResult
>  anv_image_create(VkDevice _device,
>   const struct anv_image_create_info *create_info,
> @@ -500,11 +542,26 @@ anv_image_create(VkDevice _device,
>  {
> ANV_FROM_HANDLE(anv_device, device, _device);
> const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
> +   const VkImageDrmFormatModifierListCreateInfoEXT *vk_mod_list = NULL;
> +   const struct isl_drm_modifier_info *isl_mod_info = NULL;
> struct anv_image *image = NULL;
> VkResult r;
>
> assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
>
> +   /* Extract input structs */
> +   vk_foreach_struct_const(s, pCreateInfo->pNext) {
> +  switch (s->sType) {
> +  case VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_
> INFO_EXT:
> + vk_mod_list = (const VkImageDrmFormatModifierListCreateInfoEXT
> *) s;
> + isl_mod_info = choose_drm_for

Re: [Mesa-dev] [RFC PATCH v1 24/30] RFC: anv: Support VkPhysicalDeviceImageDrmFormatModifierInfoEXT

2017-11-07 Thread Jason Ekstrand
On Tue, Nov 7, 2017 at 6:48 AM, Chad Versace 
wrote:

> Incremental implementation of VK_EXT_image_drm_format_modifier.
> ---
>  src/intel/vulkan/anv_formats.c | 45 ++
> +++-
>  1 file changed, 40 insertions(+), 5 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_
> formats.c
> index dc46fdb5425..d6eeb9d1c45 100644
> --- a/src/intel/vulkan/anv_formats.c
> +++ b/src/intel/vulkan/anv_formats.c
> @@ -813,6 +813,7 @@ static VkResult
>  anv_get_image_format_properties(
> struct anv_physical_device *physical_device,
> const VkPhysicalDeviceImageFormatInfo2KHR *info,
> +   const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *drm_info,
> VkImageFormatProperties *pImageFormatProperties,
> VkSamplerYcbcrConversionImageFormatPropertiesKHR
> *pYcbcrImageFormatProperties)
>  {
> @@ -826,14 +827,34 @@ anv_get_image_format_properties(
> if (format == NULL)
>goto unsupported;
>
> +   uint64_t drm_format_mod = DRM_FORMAT_MOD_INVALID;
> +   if (drm_info) {
> +  assert(info->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT);
> +  drm_format_mod = drm_info->drmFormatModifier;
> +   }
> +
> VkFormatFeatureFlags format_feature_flags =
>get_image_format_features(devinfo, info->format, format,
> info->tiling,
> -DRM_FORMAT_MOD_INVALID);
> +drm_format_mod);
> +
> +   /* The core Vulkan spec places strict constraints on the image
> capabilities
> +* advertised here. For example, the core spec requires that
> +* maxMipLevels == log2(maxWidth) + 1
> +* when tiling is VK_IMAGE_TILING_OPTIMAL; and requires that
> +* maxExtent >= VkPhysicalDeviceLimits::maxImageDimension${N}D.
> +* However, the VK_EXT_image_drm_format_modifier specification grants
> the
> +* implementation the freedom to further restrict the image
> capabilities
> +* when tiling is VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT.
>

How about adding one extra paragraph here saying that we choose to only
support "simple" 2D images.


> +*/
>
> switch (info->type) {
> default:
>unreachable("bad VkImageType");
> case VK_IMAGE_TYPE_1D:
> +  /* We reject 1D images with modifiers due to FUD */
>

We could support 1D but meh.  Just use a texture buffer instead.


> +  if (drm_info)
> + goto unsupported;
> +
>maxExtent.width = 16384;
>maxExtent.height = 1;
>maxExtent.depth = 1;
> @@ -848,10 +869,20 @@ anv_get_image_format_properties(
>maxExtent.width = 16384;
>maxExtent.height = 16384;
>maxExtent.depth = 1;
> -  maxMipLevels = 15; /* log2(maxWidth) + 1 */
> -  maxArraySize = 2048;
> +
> +  if (drm_info) {
> + maxMipLevels = 1;
> + maxArraySize = 1;
> +  } else {
> + maxMipLevels = 15; /* log2(maxWidth) + 1 */
> + maxArraySize = 2048;
> +  }
>break;
> case VK_IMAGE_TYPE_3D:
> +  /* We reject 3D images with modifiers due to FUD */
>

I have neither uncertainty nor doubt, but I do have a very healthy helping
of fear. :-)  Let's just go with a global comment above and drop the ones
that make us look like cowards. :-P


> +  if (drm_info)
> + goto unsupported;
> +
>maxExtent.width = 2048;
>maxExtent.height = 2048;
>maxExtent.depth = 2048;
> @@ -976,7 +1007,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties(
>.flags = createFlags,
> };
>
> -   return anv_get_image_format_properties(physical_device, &info,
> +   return anv_get_image_format_properties(physical_device, &info, NULL,
>pImageFormatProperties, NULL);
>  }
>
> @@ -1009,6 +1040,7 @@ VkResult anv_GetPhysicalDeviceImageFormatPr
> operties2KHR(
>  {
> ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
> const VkPhysicalDeviceExternalImageFormatInfoKHR *external_info =
> NULL;
> +   const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *drm_info = NULL;
> VkExternalImageFormatPropertiesKHR *external_props = NULL;
> VkSamplerYcbcrConversionImageFormatPropertiesKHR *ycbcr_props = NULL;
> VkResult result;
> @@ -1019,6 +1051,9 @@ VkResult anv_GetPhysicalDeviceImageFormatPr
> operties2KHR(
>case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_
> INFO_KHR:
>   external_info = (const void *) s;
>   break;
> +  case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_
> MODIFIER_INFO_EXT:
> + drm_info = (const void *) s;
> + break;
>default:
>   anv_debug_ignored_stype(s->sType);
>   break;
> @@ -1041,7 +1076,7 @@ VkResult anv_GetPhysicalDeviceImageFormatPr
> operties2KHR(
> }
>
> result = anv_get_image_format_properties(physical_device, base_info,
> -   &base_props->imageFormatProperties, ycbcr_props);
> +   drm_info, &base_props->imageFo

Re: [Mesa-dev] [RFC PATCH v1 23/30] RFC: anv: Support VkDrmFormatModifierPropertiesListEXT

2017-11-07 Thread Jason Ekstrand
On Tue, Nov 7, 2017 at 6:48 AM, Chad Versace 
wrote:

> Incremental implementation of VK_EXT_image_drm_format_modifier.
> ---
>  src/intel/vulkan/anv_formats.c | 144 ++
> +++
>  1 file changed, 132 insertions(+), 12 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_
> formats.c
> index 0dd990bb9a8..dc46fdb5425 100644
> --- a/src/intel/vulkan/anv_formats.c
> +++ b/src/intel/vulkan/anv_formats.c
> @@ -21,6 +21,8 @@
>   * IN THE SOFTWARE.
>   */
>
> +#include 
> +
>  #include "anv_private.h"
>  #include "vk_enum_to_str.h"
>  #include "vk_format_info.h"
> @@ -425,6 +427,9 @@ anv_get_format_plane(const struct gen_device_info
> *devinfo, VkFormat vk_format,
>return unsupported;
>
> if (aspect & (VK_IMAGE_ASPECT_DEPTH_BIT |
> VK_IMAGE_ASPECT_STENCIL_BIT)) {
> +  if (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
> + return unsupported;
> +
>assert(vk_format_aspects(vk_format) &
>   (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
>return plane_format;
> @@ -435,6 +440,18 @@ anv_get_format_plane(const struct gen_device_info
> *devinfo, VkFormat vk_format,
> const struct isl_format_layout *isl_layout =
>isl_format_get_layout(plane_format.isl_format);
>
> +   /* For VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, the image's
> driver-internal
> +* format must be the user-facing VkFormat. Modifying the VkFormat in
> any
> +* way, including swizzling, is illegal.
> +*/
>

What are you doing with this comment?  There's no code to go with it.


> +
> +   /* For now, for no reason other than FUD, we decline to support texture
> +* compression with modifiers.
>
+*/
> +   if (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT &&
> +   isl_layout->txc != ISL_TXC_NONE)
> +  return unsupported;
> +
> if (tiling == VK_IMAGE_TILING_OPTIMAL &&
> !util_is_power_of_two(isl_layout->bpb)) {
>/* Tiled formats *must* be power-of-two because we need up upload
> @@ -456,7 +473,8 @@ anv_get_format_plane(const struct gen_device_info
> *devinfo, VkFormat vk_format,
> /* The B4G4R4A4 format isn't available prior to Broadwell so we have
> to fall
>  * back to a format with a more complex swizzle.
>  */
> -   if (vk_format == VK_FORMAT_B4G4R4A4_UNORM_PACK16 && devinfo->gen < 8)
> {
> +   if (tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT &&
> +   vk_format == VK_FORMAT_B4G4R4A4_UNORM_PACK16 && devinfo->gen < 8)
> {
>

This is not the only way you can get swizzling.  This one is just a special
case because it's gen-dependent.  Instead, we should just have a thing at
the end which checks whether or not it's a swizzled format and bails.


>plane_format.isl_format = ISL_FORMAT_B4G4R4A4_UNORM;
>plane_format.swizzle = ISL_SWIZZLE(GREEN, RED, ALPHA, BLUE);
> }
> @@ -466,21 +484,29 @@ anv_get_format_plane(const struct gen_device_info
> *devinfo, VkFormat vk_format,
>
>  // Format capabilities
>
> +/**
> + * Parameter drm_format_mod must be DRM_FORMAT_MOD_INVALID unless
> vk_tiling is
> + * VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT.
> + */
>  static VkFormatFeatureFlags
>  get_image_format_features(const struct gen_device_info *devinfo,
>VkFormat vk_format,
>const struct anv_format *anv_format,
> -  VkImageTiling vk_tiling)
> +  VkImageTiling vk_tiling,
> +  uint64_t drm_format_mod)
>  {
> VkFormatFeatureFlags flags = 0;
>
> +   if (vk_tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
> +  assert(drm_format_mod == DRM_FORMAT_MOD_INVALID);
> +
> if (anv_format == NULL)
>return 0;
>
> const VkImageAspectFlags aspects = vk_format_aspects(vk_format);
>
> if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
> VK_IMAGE_ASPECT_STENCIL_BIT)) {
> -  if (vk_tiling == VK_IMAGE_TILING_LINEAR)
> +  if (vk_tiling != VK_IMAGE_TILING_OPTIMAL)
>   return 0;
>
>flags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
> @@ -503,6 +529,17 @@ get_image_format_features(const struct
> gen_device_info *devinfo,
> if (plane_format.isl_format == ISL_FORMAT_UNSUPPORTED)
>return 0;
>
> +   const struct isl_format_layout *isl_layout =
> +  isl_format_get_layout(plane_format.isl_format);
> +
> +   if (vk_tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
> +  assert(isl_layout->txc == ISL_TXC_NONE);
> +
> +   /* For VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, the base format and
> +* non-base format must be the same, because the image's
> driver-internal
> +* format must be the user-facing VkFormat. Modifying the VkFormat in
> any
> +* way, including swizzling, is illegal.
> +*/
>

Again, this comment seems to be dangling.


> struct anv_format_plane base_plane_format = plane_format;
> if (vk_tiling == VK_IMAGE_TILING_OPTIMAL) {
>base_plane_fo

Re: [Mesa-dev] [RFC PATCH v1 21/30] RFC: anv: Implement VK_EXT_external_memory_dma_buf

2017-11-07 Thread Jason Ekstrand
On Tue, Nov 7, 2017 at 6:48 AM, Chad Versace 
wrote:

> The draft spec lives at
> http://kiwitree.net/~chadv/vulkan/#1.0-VK_EXT_external_memory_dma_buf.
>
> I plan to ask Khronos to merge the spec this week.
> ---
>  src/intel/vulkan/anv_device.c  | 13 ++---
>  src/intel/vulkan/anv_extensions.py |  1 +
>  src/intel/vulkan/anv_formats.c | 23 ---
>  3 files changed, 27 insertions(+), 10 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 67028e8da9f..a28eaf242ca 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1538,11 +1538,11 @@ VkResult anv_AllocateMemory(
>  * ignored.
>  */
> if (fd_info && fd_info->handleType) {
> -  /* At the moment, we only support the OPAQUE_FD memory type which is
> -   * just a GEM buffer.
> -   */
> +  /* At the moment, we support only the below handle types. */
>assert(fd_info->handleType ==
> - VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
> +   VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
> + fd_info->handleType ==
> +   VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
>
>result = anv_bo_cache_import(device, &device->bo_cache,
> fd_info->fd, &mem->bo);
> @@ -1616,9 +1616,8 @@ VkResult anv_GetMemoryFdKHR(
>
> assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
>
> -   /* We support only one handle type. */
> -   assert(pGetFdInfo->handleType ==
> -  VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
> +   assert(pGetFdInfo->handleType == 
> VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR
> ||
> +  pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE
> _DMA_BUF_BIT_EXT);
>
> return anv_bo_cache_export(dev, &dev->bo_cache, mem->bo, pFd);
>  }
> diff --git a/src/intel/vulkan/anv_extensions.py
> b/src/intel/vulkan/anv_extensions.py
> index b1e984b8cd0..093c89fef01 100644
> --- a/src/intel/vulkan/anv_extensions.py
> +++ b/src/intel/vulkan/anv_extensions.py
> @@ -86,6 +86,7 @@ EXTENSIONS = [
>  Extension('VK_KHR_xlib_surface',  6,
> 'VK_USE_PLATFORM_XLIB_KHR'),
>  Extension('VK_KHX_multiview', 1, True),
>  Extension('VK_EXT_debug_report',  8, True),
> +Extension('VK_EXT_external_memory_dma_buf',   1, True),
>  ]
>
>  class VkVersion:
> diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats
> .c
> index 810f26cc750..0dd990bb9a8 100644
> --- a/src/intel/vulkan/anv_formats.c
> +++ b/src/intel/vulkan/anv_formats.c
> @@ -860,7 +860,7 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties(
>pImageFormatProperties, NULL);
>  }
>
> -static const VkExternalMemoryPropertiesKHR prime_fd_props = {
> +static const VkExternalMemoryPropertiesKHR opaque_fd_props = {
> /* If we can handle external, then we can both import and export it. */
> .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHR
> |
>   VK_EXTERNAL_MEMORY_FEATURE_IMP
> ORTABLE_BIT_KHR,
> @@ -871,6 +871,17 @@ static const VkExternalMemoryPropertiesKHR
> prime_fd_props = {
>VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
>  };
>
> +static const VkExternalMemoryPropertiesKHR dma_buf_props = {
> +   /* If we can handle external, then we can both import and export it. */
> +   .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHR
> |
> + VK_EXTERNAL_MEMORY_FEATURE_IM
> PORTABLE_BIT_KHR,
> +   /* For the moment, let's not support mixing and matching */
> +   .exportFromImportedHandleTypes =
> +  VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
> +   .compatibleHandleTypes =
> +  VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
> +};
>

My plan for this has always been to just add the DMA_BUF_BIT flags to
prime_fd_props and use that for both types.


> +
>  VkResult anv_GetPhysicalDeviceImageFormatProperties2KHR(
>  VkPhysicalDevicephysicalDevice,
>  const VkPhysicalDeviceImageFormatInfo2KHR*  base_info,
> @@ -924,7 +935,10 @@ VkResult anv_GetPhysicalDeviceImageForm
> atProperties2KHR(
>switch (external_info->handleType) {
>case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
>   if (external_props)
> -external_props->externalMemoryProperties = prime_fd_props;
> +external_props->externalMemoryProperties = opaque_fd_props;
> +  case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
> + if (external_props)
> +external_props->externalMemoryProperties = dma_buf_props;
>   break;
>default:
>   /* From the Vulkan 1.0.42 spec:
> @@ -1005,7 +1019,10 @@ void anv_GetPhysicalDeviceExternalB
> ufferPropertiesKHR(
>
> switch (pExternalBufferInfo->handle

Re: [Mesa-dev] [PATCH] gallivm: fix compilation against LLVM r317488

2017-11-07 Thread Tobias Droste
Am Dienstag, 7. November 2017, 11:16:14 CET schrieb Samuel Pitoiset:
> 
> On 11/07/2017 11:15 AM, Michel Dänzer wrote:
> > On 07/11/17 11:03 AM, Michel Dänzer wrote:
> >> On 07/11/17 09:25 AM, Samuel Pitoiset wrote:
> >>> The unsafe algebra codepath has been redefined a lot, and
> >>> setUnsafeAlgebra() has been replaced with setFast().
> >>>
> >>> Signed-off-by: Samuel Pitoiset 
> >>> ---
> >>>   src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 4 
> >>>   1 file changed, 4 insertions(+)
> >>>
> >>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp 
> >>> b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> >>> index d988910a7e..1319407290 100644
> >>> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> >>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> >>> @@ -830,7 +830,11 @@ lp_create_builder(LLVMContextRef ctx, enum 
> >>> lp_float_mode float_mode)
> >>> llvm::unwrap(builder)->setFastMathFlags(flags);
> >>> break;
> >>>  case LP_FLOAT_MODE_UNSAFE_FP_MATH:
> >>> +#if HAVE_LLVM >= 0x0600
> >>> +  flags.setFast();
> >>> +#else
> >>> flags.setUnsafeAlgebra();
> >>> +#endif
> >>> llvm::unwrap(builder)->setFastMathFlags(flags);
> >>> break;
> >>>  }
> >>>
> >>
> >> Reviewed-and-Tested-by: Michel Dänzer 
> > 
> > But note that Tobias Droste already submitted the same patch last night:
> > 
> > https://patchwork.freedesktop.org/patch/186737/
> 
> Ah right.

If you want to use my patch, then someone needs to push my patch for me, as I 
don't have the rights to do so. 

But I don't insist on it being chosen as the one that goes in. I'm fine if you 
just push yours.

> 
> > 
> > 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: use enum types instead of int/unsigned (v2)

2017-11-07 Thread Roland Scheidegger
Am 07.11.2017 um 20:52 schrieb Brian Paul:
> On 11/07/2017 11:09 AM, Roland Scheidegger wrote:
>> Am 07.11.2017 um 18:57 schrieb Brian Paul:
>>> On 11/07/2017 09:07 AM, Roland Scheidegger wrote:
 Am 07.11.2017 um 16:12 schrieb Brian Paul:
> Use the proper enum types for various variables.  Makes life in gdb
> a little nicer.  Note that the size of enum bitfields must be one
> larger so the high bit is always zero (for MSVC).
>
> v2: also increase size of image_format bitfield, per Eric Engestrom.
>
> Reviewed-by: Charmaine Lee 
> ---
>    src/mesa/state_tracker/st_glsl_to_tgsi.cpp   | 7 ---
>    src/mesa/state_tracker/st_glsl_to_tgsi_private.h | 6 +++---
>    src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 +++---
>    src/mesa/state_tracker/st_mesa_to_tgsi.h | 7 ---
>    4 files changed, 14 insertions(+), 12 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 54e1961..2048b59 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -179,10 +179,10 @@ public:
>   int num_address_regs;
>   uint32_t samplers_used;
>   glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
> -   int sampler_targets[PIPE_MAX_SAMPLERS];   /**< One of
> TGSI_TEXTURE_* */
> +   enum tgsi_texture_type sampler_targets[PIPE_MAX_SAMPLERS];
>   int images_used;
>   int image_targets[PIPE_MAX_SHADER_IMAGES];
> -   unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
> +   enum pipe_format image_formats[PIPE_MAX_SHADER_IMAGES];
>   bool indirect_addr_consts;
>   int wpos_transform_const;
>
> @@ -6489,7 +6489,8 @@ st_translate_program(
>   /* texture samplers */
>   for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
>  if (program->samplers_used & (1u << i)) {
> - unsigned type =
> st_translate_texture_type(program->sampler_types[i]);
> + enum tgsi_return_type type =
> +    st_translate_texture_type(program->sampler_types[i]);
>
>     t->samplers[i] = ureg_DECL_sampler(ureg, i);
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
> b/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
> index d57525d..3e51936 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
> @@ -127,13 +127,13 @@ public:
>   unsigned is_64bit_expanded:1;
>   unsigned sampler_base:5;
>   unsigned sampler_array_size:6; /**< 1-based size of sampler
> array, 1 if not array */
> -   unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */
> +   gl_texture_index tex_target:5;
>   glsl_base_type tex_type:5;
>   unsigned tex_shadow:1;
> -   unsigned image_format:9;
> +   enum pipe_format image_format:10;

 Due to this being a enum which can easily grow, this is of course quite
 dangerous (doubly so now due to needing one bit more than you'd
 assume).
 I think it would be nice if assignment somewhere were guarded by a
 ASSERT(x.image_format == image_format) afterwards at least.
 (Unfortunately I don't see a way to easily make a nice static
 assertion,
 you'd have to use something like
 STATIC_ASSERT(PIPE_FORMAT_COUNT < 1 << (10-1)) which isn't tied
 directly
 to the bitfield definition.)
>>>
>>> OK, I've come up with a simple runtime assertion to check for sufficient
>>> bitfield size:
>>>
>>> /* Check that STRUCT::BITFIELD can hold MAXVAL */
>>> #define ASSERT_BITFIELD_SIZE(STRUCT, FIELD, MAXVAL) \
>>>     { \
>>>    STRUCT s; \
>>>    s.FIELD = MAXVAL; \
>>>    assert((int) s.FIELD == MAXVAL && "Insufficient bitfield
>>> size!");  \
>>>     }
>>>
>>> This works identically for signed and unsigned enum fields with gcc and
>>> MSVC.
>> Yes, it's just a pity you can't do it with compile time asserts.
>> (Albeit I'd think the compiler will actually optimize it out in any case
>> if you've got any optimizations enabled, as it can still determine the
>> assertion will never fail at compile time.)
> 
> GCC actually gives a compile-time warning when it can determine that the
> value won't fit in the field.  That's almost as good as a static
> assertion.  And the assertion seems to work at -O3 too.
Yes, that's what I meant - if it can figure out at compile time the
value won't fit and warn about it, it will also throw it out if it can
determine at compile time when it will fit, hence it nearly amounts to
the same as a static assert.

Roland


> 
> -Brian
> 
> 
>>
>>>
>>> For enum bitfields, MSVC asserts if the extra padding bit is missing.
>>> But I haven't managed to make that happen with gcc.
>>>
>>> I guess that's OK though.  If I institute this assertion macro w

Re: [Mesa-dev] [PATCH] amd/addrlib: update to latest version

2017-11-07 Thread Nicolai Hähnle

On 07.11.2017 18:35, Michel Dänzer wrote:

On 07/11/17 06:28 PM, Marek Olšák wrote:

Hi,

This patch is too large for the mailing list:

https://cgit.freedesktop.org/~mareko/mesa/commit/?h=addrlib&id=0e0f044268d3c1af2e78f161aaa2d92c30167cc1


 From the commit log:


I just overwrote all Mesa files with internal addrlib and discarded
hunks that we should probably keep, but I might have missed something.


FWIW, if a separate branch was used for importing addrlib changes, Git
could keep track of our changes to it in the Mesa tree.


I concur in principle. In practice, I explored doing that, but the 
commit discipline on the internal addrlib repository is pretty crappy, 
so we'd end up having to massage commits anyway. Maybe we can find a 
sweet spot somewhere by updating slightly more regularly, perhaps once a 
month.


With Dylan's comment addressed,

Acked-by: Nicolai Hähnle 

Cheers,
Nicolai







--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: s/unsigned/glsl_base_type/ in glsl type code

2017-11-07 Thread Ian Romanick
On 11/06/2017 02:54 PM, Brian Paul wrote:
> On 11/06/2017 02:27 PM, Ian Romanick wrote:
>> On 11/06/2017 01:00 PM, Brian Paul wrote:
>>> Declare glsl_type::sampled_type as glsl_base_type as we do for the
>>> base_type field.  And make base_type a bitfield to save a few bytes.
>>
>> Hmm... I have mixed feelings about this.  I made a conscious decision to
>> have base_type be "full size" because it's used a lot.  I suspect there
>> will be some increase in code size across this change.  There's probably
>> also some performance difference, but it may not be enough to be
>> measurable.  I do like actually using type names. :)
>>
>> As new base types were added, sampled_type remained 2 bits because GLSL
>> only allows float, int and uint.  This is the reason GLSL_TYPE_UINT64
>> and GLSL_TYPE_INT64 are not grouped with GLSL_TYPE_UINT and
>> GLSL_TYPE_INT.
>>
>> I wonder if it might be more compact (in terms of generated code) to
>> make both fields 8 bits and group them together.
> 
> Probably.  I can do that in a v2.  Otherwise, I'm fine with leaving
> base_type unchanged.  Your call.

I tried my original suggestion, and I also tried using
__attribute__((__packed__)) on the glsl_base_type enum.  Is there an
equivalent to this for Visual Studio?

Here are the results I got for an optimized release build:

   textdata bss dec hex filename
10339299 345184  549312 11233795 ab6a03 lib64/i965_dri.so before
10346970 345184  549312 11241466 ab87fa lib64/i965_dri.so this patch
10340979 345184  549312 11235475 ab7093 lib64/i965_dri.so :8 for both
10339963 345184  549312 11234459 ab6c9b lib64/i965_dri.so packed enum

This patch adds about 7.5k, and using :8 for sampled_type and
glsl_base_type adds only 1.5k.  The packed enum is the clear winner.  It
only adds ~600 bytes.

The results for 32-bit are similar.

   textdata bss dec hex filename
11599089 270876  422916 12292881 bb9311 lib/i965_dri.so before
11608245 270876  422916 12302037 bbb6d5 lib/i965_dri.so this patch
11600549 270876  422916 12294341 bb98c5 lib/i965_dri.so :8 for both
11600780 270876  422916 12294572 bb99ac lib/i965_dri.so packed enum

Here, :8 and packed enum are about the same.  Which, honestly, is what I
would have expected.  8 bits is 8 bits... it really shouldn't matter how
you declare them. *shrug*

> -Brian
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: use enum types instead of int/unsigned (v2)

2017-11-07 Thread Brian Paul

On 11/07/2017 11:09 AM, Roland Scheidegger wrote:

Am 07.11.2017 um 18:57 schrieb Brian Paul:

On 11/07/2017 09:07 AM, Roland Scheidegger wrote:

Am 07.11.2017 um 16:12 schrieb Brian Paul:

Use the proper enum types for various variables.  Makes life in gdb
a little nicer.  Note that the size of enum bitfields must be one
larger so the high bit is always zero (for MSVC).

v2: also increase size of image_format bitfield, per Eric Engestrom.

Reviewed-by: Charmaine Lee 
---
   src/mesa/state_tracker/st_glsl_to_tgsi.cpp   | 7 ---
   src/mesa/state_tracker/st_glsl_to_tgsi_private.h | 6 +++---
   src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 +++---
   src/mesa/state_tracker/st_mesa_to_tgsi.h | 7 ---
   4 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 54e1961..2048b59 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -179,10 +179,10 @@ public:
  int num_address_regs;
  uint32_t samplers_used;
  glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
-   int sampler_targets[PIPE_MAX_SAMPLERS];   /**< One of
TGSI_TEXTURE_* */
+   enum tgsi_texture_type sampler_targets[PIPE_MAX_SAMPLERS];
  int images_used;
  int image_targets[PIPE_MAX_SHADER_IMAGES];
-   unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
+   enum pipe_format image_formats[PIPE_MAX_SHADER_IMAGES];
  bool indirect_addr_consts;
  int wpos_transform_const;

@@ -6489,7 +6489,8 @@ st_translate_program(
  /* texture samplers */
  for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
 if (program->samplers_used & (1u << i)) {
- unsigned type =
st_translate_texture_type(program->sampler_types[i]);
+ enum tgsi_return_type type =
+st_translate_texture_type(program->sampler_types[i]);

t->samplers[i] = ureg_DECL_sampler(ureg, i);

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
b/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
index d57525d..3e51936 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
@@ -127,13 +127,13 @@ public:
  unsigned is_64bit_expanded:1;
  unsigned sampler_base:5;
  unsigned sampler_array_size:6; /**< 1-based size of sampler
array, 1 if not array */
-   unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */
+   gl_texture_index tex_target:5;
  glsl_base_type tex_type:5;
  unsigned tex_shadow:1;
-   unsigned image_format:9;
+   enum pipe_format image_format:10;


Due to this being a enum which can easily grow, this is of course quite
dangerous (doubly so now due to needing one bit more than you'd assume).
I think it would be nice if assignment somewhere were guarded by a
ASSERT(x.image_format == image_format) afterwards at least.
(Unfortunately I don't see a way to easily make a nice static assertion,
you'd have to use something like
STATIC_ASSERT(PIPE_FORMAT_COUNT < 1 << (10-1)) which isn't tied directly
to the bitfield definition.)


OK, I've come up with a simple runtime assertion to check for sufficient
bitfield size:

/* Check that STRUCT::BITFIELD can hold MAXVAL */
#define ASSERT_BITFIELD_SIZE(STRUCT, FIELD, MAXVAL) \
{ \
   STRUCT s; \
   s.FIELD = MAXVAL; \
   assert((int) s.FIELD == MAXVAL && "Insufficient bitfield size!");  \
}

This works identically for signed and unsigned enum fields with gcc and
MSVC.

Yes, it's just a pity you can't do it with compile time asserts.
(Albeit I'd think the compiler will actually optimize it out in any case
if you've got any optimizations enabled, as it can still determine the
assertion will never fail at compile time.)


GCC actually gives a compile-time warning when it can determine that the 
value won't fit in the field.  That's almost as good as a static 
assertion.  And the assertion seems to work at -O3 too.


-Brian






For enum bitfields, MSVC asserts if the extra padding bit is missing.
But I haven't managed to make that happen with gcc.

I guess that's OK though.  If I institute this assertion macro we won't
detect the MSVC padding issue with gcc but I'll probably hit it soon
enough on Windows if it happens.

Yes, I think that should suffice and be a good solution.

Roland





FWIW, I also came up with a macro that can compute the number of bits in
a bitfield:

#define SIZEOF_BITFIELD(STRUCT, FIELD, SIZE_OUT) \
{ \
SIZE_OUT = 32; \
unsigned i; \
for (i = 0; i < 32; i++) { \
   struct STRUCT test; \
   test.FIELD = 1 << i; \
   if (abs(test.FIELD) != 1 << i) { \
  SIZE_OUT = i; \
  break; \
   } \
} \
}

It handles int, unsigned and enum bitfields correctly with gcc and MSVC.
  I don't think we need it right now.  But there it is if we need it
someday.

I'm going to rework my patches to use the assertion macro.

-Brian




In any case,
Reviewed-by:

Re: [Mesa-dev] [PATCH 3/3] etnaviv: Add sampler TS support

2017-11-07 Thread Christian Gmeiner
2017-11-07 17:43 GMT+01:00 Wladimir J. van der Laan :
> Sampler TS is an hardware optimization that can be used when rendering
> to textures. After rendering to a resource with TS enabled, the
> texture unit can use this to bypass lookups to empty tiles. This also
> means a resolve-in-place can be avoided to flush the TS.
>
> This commit is also an optimization when not using sampler TS, as
> resolve-in-place will now be skipped if a resource has no (valid) TS.
>
> Signed-off-by: Wladimir J. van der Laan 

Reviewed-by: Christian Gmeiner 

> ---
>  src/gallium/drivers/etnaviv/etnaviv_emit.c| 26 
>  src/gallium/drivers/etnaviv/etnaviv_texture.c | 92 
> +--
>  src/gallium/drivers/etnaviv/etnaviv_texture.h |  5 ++
>  3 files changed, 116 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c 
> b/src/gallium/drivers/etnaviv/etnaviv_emit.c
> index d313af6..bd2a570 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_emit.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c
> @@ -627,6 +627,32 @@ etna_emit_state(struct etna_context *ctx)
>/*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, 
> &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);
>/*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, 
> ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);
> }
> +   if (unlikely(dirty & ETNA_DIRTY_SAMPLER_VIEWS)) {
> +  for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
> + if ((1 << x) & active_samplers) {
> +struct etna_sampler_view *sv = 
> etna_sampler_view(ctx->sampler_view[x]);
> +/*01720*/ EMIT_STATE(TS_SAMPLER_CONFIG(x), 
> sv->TS_SAMPLER_CONFIG);
> + }
> +  }
> +  for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
> + if ((1 << x) & active_samplers) {
> +struct etna_sampler_view *sv = 
> etna_sampler_view(ctx->sampler_view[x]);
> +/*01740*/ EMIT_STATE_RELOC(TS_SAMPLER_STATUS_BASE(x), 
> &sv->TS_SAMPLER_STATUS_BASE);
> + }
> +  }
> +  for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
> + if ((1 << x) & active_samplers) {
> +struct etna_sampler_view *sv = 
> etna_sampler_view(ctx->sampler_view[x]);
> +/*01760*/ EMIT_STATE(TS_SAMPLER_CLEAR_VALUE(x), 
> sv->TS_SAMPLER_CLEAR_VALUE);
> + }
> +  }
> +  for (int x = 0; x < VIVS_TS_SAMPLER__LEN; ++x) {
> + if ((1 << x) & active_samplers) {
> +struct etna_sampler_view *sv = 
> etna_sampler_view(ctx->sampler_view[x]);
> +/*01780*/ EMIT_STATE(TS_SAMPLER_CLEAR_VALUE2(x), 
> sv->TS_SAMPLER_CLEAR_VALUE2);
> + }
> +  }
> +   }
> if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) {
>for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
>   uint32_t val = 0; /* 0 == sampler inactive */
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_texture.c 
> b/src/gallium/drivers/etnaviv/etnaviv_texture.c
> index 34529c6..3d5e88b 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_texture.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_texture.c
> @@ -112,11 +112,82 @@ etna_delete_sampler_state(struct pipe_context *pctx, 
> void *ss)
> FREE(ss);
>  }
>
> +/* Return true if a resource has a TS, and it is valid for at least one 
> level */
> +static bool
> +etna_resource_has_valid_ts(struct pipe_resource *prsc)
> +{
> +   struct etna_resource *rsc = etna_resource(prsc);
> +
> +   if (!rsc->ts_bo)
> +  return false;
> +
> +   for (int level = 0; level <= rsc->base.last_level; level++)
> +  if (rsc->levels[level].ts_valid)
> + return true;
> +   return false;
> +}
> +
> +/* Return true if the GPU can use sampler TS with this sampler view.
> + * Sampler TS is an optimization used when rendering to textures, where
> + * a resolve-in-place can be avoided when rendering has left a (valid) TS.
> + */
> +static bool
> +etna_can_use_sampler_ts(struct etna_context *ctx, struct pipe_sampler_view 
> *view, int num)
> +{
> +/* Can use sampler TS when:
> + * - the hardware supports sampler TS.
> + * - the sampler view will be bound to sampler  + *   HALTI5 adds a mapping from sampler to sampler TS unit, but this is 
> AFAIK
> + *   absent on earlier models.
> + * - it is a texture, not a buffer.
> + * - the sampler view has a supported format for sampler TS.
> + * - the sampler will have one LOD, and it happens to be level 0.
> + *   (it is not sure if the hw supports it for other levels, but 
> available
> + *   state strongly suggests only one at a time).
> + * - the resource TS is valid for level 0.
> + */
> +   struct etna_resource *rsc = etna_resource(view->texture);
> +   struct etna_screen *screen = etna_screen(rsc->base.screen);
> +   return VIV_FEATURE(screen, chipMinorFeatures2, TEXTURE_TILED_READ) &&
> +  num < VIVS_TS_SAMPLER__LEN &&
> +  rsc->base.target != PIPE_BUFFER &&
> +  translate_ts_sampler_format(rsc->base.format) != ETNA_NO_MATCH &&

Re: [Mesa-dev] [PATCH 8/8] r600: add support for hw atomic counters. (v3)

2017-11-07 Thread Dave Airlie
On 8 November 2017 at 03:26, Nicolai Hähnle  wrote:
> On 07.11.2017 07:31, Dave Airlie wrote:
>>
>> From: Dave Airlie 
>>
>> This adds support for the evergreen/cayman atomic counters.
>>
>> These are implemented using GDS append/consume counters. The values
>> for each counter are loaded before drawing and saved after each draw
>> using special CP packets.
>
>
> I admit I'm a bit confused by this at the hardware level.
>
> My understanding of GDS is that it's mostly another copy of LDS (but
> global), and all GDS instructions are atomic by default. There is extra
> append-consume hardware, but it's main point is to support use cases where
> operations have to be ordered by wave, or where a wave return is supposed to
> be blocked (for producer/consumer kernels and ring buffer management).
>
> So this should really work without the append/consume counters as well, just
> with regular GDS memory. Is there a particular reason why you haven't done
> that? I suppose it might require more stuff to manage GDS allocations in the
> kernel, and if it works with this approach...

Because this is what the closed source driver did. I've pretty much
had to program
this from traces I took from it.

I don't want to diverge too much from what it did as debugging gets harder.

The append/consume counters should be faster than GDS, and it looks to be why
this extension was created in the first place.

> Acked-by: Nicolai Hähnle 

Thanks.
Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] etnaviv: Add TS_SAMPLER formats to etnaviv_format

2017-11-07 Thread Christian Gmeiner
2017-11-07 17:43 GMT+01:00 Wladimir J. van der Laan :
> Sampler TS introduces yet another format enumeration for renderable
> formats. Introduce it into etnaviv_format as unobtrusively as possible.
>
> Signed-off-by: Wladimir J. van der Laan 

Reviewed-by: Christian Gmeiner 

> ---
>  src/gallium/drivers/etnaviv/etnaviv_format.c | 19 +++
>  src/gallium/drivers/etnaviv/etnaviv_format.h |  3 +++
>  2 files changed, 22 insertions(+)
>
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_format.c 
> b/src/gallium/drivers/etnaviv/etnaviv_format.c
> index 3dd212f..f3cba46 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_format.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_format.c
> @@ -40,6 +40,7 @@ struct etna_format {
> unsigned vtx;
> unsigned tex;
> unsigned rs;
> +   unsigned ts;
> boolean present;
> const unsigned char tex_swiz[4];
>  };
> @@ -53,6 +54,13 @@ struct etna_format {
>  #define RS_FORMAT_X8B8G8R8(RS_FORMAT_X8R8G8B8 | RS_FORMAT_RB_SWAP)
>  #define RS_FORMAT_A8B8G8R8(RS_FORMAT_A8R8G8B8 | RS_FORMAT_RB_SWAP)
>
> +#define TS_SAMPLER_FORMAT_NONE  ETNA_NO_MATCH
> +#define TS_SAMPLER_FORMAT_X4R4G4B4  TS_SAMPLER_FORMAT_A4R4G4B4
> +#define TS_SAMPLER_FORMAT_X1R5G5B5  TS_SAMPLER_FORMAT_A1R5G5B5
> +#define TS_SAMPLER_FORMAT_YUY2  TS_SAMPLER_FORMAT_NONE /* Not supported 
> AFAIK */
> +#define TS_SAMPLER_FORMAT_X8B8G8R8  TS_SAMPLER_FORMAT_X8R8G8B8
> +#define TS_SAMPLER_FORMAT_A8B8G8R8  TS_SAMPLER_FORMAT_A8R8G8B8
> +
>  #define SWIZ(x,y,z,w) {\
> PIPE_SWIZZLE_##x,   \
> PIPE_SWIZZLE_##y,   \
> @@ -66,6 +74,7 @@ struct etna_format {
>.vtx = FE_DATA_TYPE_##vtxfmt, \
>.tex = TEXTURE_FORMAT_##texfmt, \
>.rs = RS_FORMAT_##rsfmt,\
> +  .ts = TS_SAMPLER_FORMAT_##rsfmt,\
>.present = 1,   \
>.tex_swiz = texswiz,\
> }
> @@ -76,6 +85,7 @@ struct etna_format {
>.vtx = ETNA_NO_MATCH,\
>.tex = TEXTURE_FORMAT_##fmt, \
>.rs = RS_FORMAT_##rsfmt, \
> +  .ts = TS_SAMPLER_FORMAT_##rsfmt, \
>.present = 1,\
>.tex_swiz = swiz,\
> }
> @@ -356,3 +366,12 @@ translate_vertex_format_type(enum pipe_format fmt)
>
> return formats[fmt].vtx;
>  }
> +
> +uint32_t
> +translate_ts_sampler_format(enum pipe_format fmt)
> +{
> +   if (!formats[fmt].present)
> +  return ETNA_NO_MATCH;
> +
> +   return formats[fmt].ts;
> +}
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_format.h 
> b/src/gallium/drivers/etnaviv/etnaviv_format.h
> index 1672d67..20c8e1b 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_format.h
> +++ b/src/gallium/drivers/etnaviv/etnaviv_format.h
> @@ -53,4 +53,7 @@ translate_rs_format_rb_swap(enum pipe_format fmt);
>  uint32_t
>  translate_vertex_format_type(enum pipe_format fmt);
>
> +uint32_t
> +translate_ts_sampler_format(enum pipe_format fmt);
> +
>  #endif /* ETNAVIV_FORMAT_H_ */
> --
> 2.7.4
>



-- 
greets
--
Christian Gmeiner, MSc

https://christian-gmeiner.info
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] etnaviv: rnndb update

2017-11-07 Thread Christian Gmeiner
2017-11-07 17:43 GMT+01:00 Wladimir J. van der Laan :
> Update rnndb to etna_viv 07c756a.
>
> Signed-off-by: Wladimir J. van der Laan 

Reviewed-by: Christian Gmeiner 

> ---
>  src/gallium/drivers/etnaviv/hw/common.xml.h |  2 +-
>  src/gallium/drivers/etnaviv/hw/common_3d.xml.h  |  2 +-
>  src/gallium/drivers/etnaviv/hw/state.xml.h  |  4 ++--
>  src/gallium/drivers/etnaviv/hw/state_3d.xml.h   | 14 --
>  src/gallium/drivers/etnaviv/hw/state_blt.xml.h  |  4 ++--
>  src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h |  3 ++-
>  6 files changed, 20 insertions(+), 9 deletions(-)
>
> diff --git a/src/gallium/drivers/etnaviv/hw/common.xml.h 
> b/src/gallium/drivers/etnaviv/hw/common.xml.h
> index b98fa84..60bde8b 100644
> --- a/src/gallium/drivers/etnaviv/hw/common.xml.h
> +++ b/src/gallium/drivers/etnaviv/hw/common.xml.h
> @@ -11,7 +11,7 @@ The rules-ng-ng source files this header was generated from 
> are:
>  - texdesc_3d.xml (   3183 bytes, from 2017-10-31 19:05:01)
>  - copyright.xml  (   1597 bytes, from 2016-10-29 07:29:22)
>  - common.xml (  26187 bytes, from 2017-10-31 19:05:01)
> -- common_3d.xml  (  14547 bytes, from 2017-11-01 16:08:07)
> +- common_3d.xml  (  14615 bytes, from 2017-11-04 14:03:35)
>
>  Copyright (C) 2012-2017 by the following authors:
>  - Wladimir J. van der Laan 
> diff --git a/src/gallium/drivers/etnaviv/hw/common_3d.xml.h 
> b/src/gallium/drivers/etnaviv/hw/common_3d.xml.h
> index 8f19d5b..d110a36 100644
> --- a/src/gallium/drivers/etnaviv/hw/common_3d.xml.h
> +++ b/src/gallium/drivers/etnaviv/hw/common_3d.xml.h
> @@ -11,7 +11,7 @@ The rules-ng-ng source files this header was generated from 
> are:
>  - texdesc_3d.xml (   3183 bytes, from 2017-10-31 19:05:01)
>  - copyright.xml  (   1597 bytes, from 2016-10-29 07:29:22)
>  - common.xml (  26187 bytes, from 2017-10-31 19:05:01)
> -- common_3d.xml  (  14547 bytes, from 2017-11-01 16:08:07)
> +- common_3d.xml  (  14615 bytes, from 2017-11-04 14:03:35)
>
>  Copyright (C) 2012-2017 by the following authors:
>  - Wladimir J. van der Laan 
> diff --git a/src/gallium/drivers/etnaviv/hw/state.xml.h 
> b/src/gallium/drivers/etnaviv/hw/state.xml.h
> index 0a93a4f..485c0eb 100644
> --- a/src/gallium/drivers/etnaviv/hw/state.xml.h
> +++ b/src/gallium/drivers/etnaviv/hw/state.xml.h
> @@ -10,11 +10,11 @@ git clone git://0x04.net/rules-ng-ng
>  The rules-ng-ng source files this header was generated from are:
>  - state.xml (  26087 bytes, from 2017-10-30 13:44:54)
>  - common.xml(  26187 bytes, from 2017-10-31 19:05:01)
> -- common_3d.xml (  14547 bytes, from 2017-11-01 16:08:07)
> +- common_3d.xml (  14615 bytes, from 2017-11-04 14:03:35)
>  - state_hi.xml  (  27733 bytes, from 2017-10-02 19:00:30)
>  - copyright.xml (   1597 bytes, from 2016-10-29 07:29:22)
>  - state_2d.xml  (  51552 bytes, from 2016-10-29 07:29:22)
> -- state_3d.xml  (  79520 bytes, from 2017-10-31 19:05:01)
> +- state_3d.xml  (  79992 bytes, from 2017-11-07 10:44:35)
>  - state_blt.xml (  13405 bytes, from 2017-10-16 17:42:46)
>  - state_vg.xml  (   5975 bytes, from 2016-10-29 07:29:22)
>
> diff --git a/src/gallium/drivers/etnaviv/hw/state_3d.xml.h 
> b/src/gallium/drivers/etnaviv/hw/state_3d.xml.h
> index c5722aa..13122789 100644
> --- a/src/gallium/drivers/etnaviv/hw/state_3d.xml.h
> +++ b/src/gallium/drivers/etnaviv/hw/state_3d.xml.h
> @@ -10,11 +10,11 @@ git clone git://0x04.net/rules-ng-ng
>  The rules-ng-ng source files this header was generated from are:
>  - state.xml (  26087 bytes, from 2017-10-30 13:44:54)
>  - common.xml(  26187 bytes, from 2017-10-31 19:05:01)
> -- common_3d.xml (  14547 bytes, from 2017-11-01 16:08:07)
> +- common_3d.xml (  14615 bytes, from 2017-11-04 14:03:35)
>  - state_hi.xml  (  27733 bytes, from 2017-10-02 19:00:30)
>  - copyright.xml (   1597 bytes, from 2016-10-29 07:29:22)
>  - state_2d.xml  (  51552 bytes, from 2016-10-29 07:29:22)
> -- state_3d.xml  (  79520 bytes, from 2017-10-31 19:05:01)
> +- state_3d.xml  (  79992 bytes, from 2017-11-07 10:44:35)
>  - state_blt.xml (  13405 bytes, from 2017-10-16 17:42:46)
>  - state_vg.xml  (   5975 bytes, from 2016-10-29 07:29:22)
>
> @@ -122,6 +122,14 @@ DEALINGS IN THE SOFTWARE.
>  #define LOGIC_OP_OR_REVERSE0x000d
>  #define LOGIC_OP_OR0x000e
>  #define LOGIC_OP_SET   0x000f
> +#define TS_SAMPLER_FORMAT_A4R4G4B4 0x
> +#define TS_SAMPLER_FORMAT_A1R5G5B5 0x0001
> +#define TS_SAMPLER_FORMAT_R5G6B5   0x0002
> +#define TS_SAMPLER_FORMAT_A8R8G8B8 0x0003
> +#define TS_SAMPLER_FORMAT_X8R8G8B8 0x0004
> +#define TS_SAMPLER_FORMAT_D24X8
> 0x0005
> +#define TS_SAMPLER_FORMAT_D16  0x0008
> +#def

[Mesa-dev] [PATCH v2] glsl: Make #pragma STDGL invariant(all) only modify outputs.

2017-11-07 Thread Kenneth Graunke
According to the GLSL ES 3.20, GLSL 4.50, and GLSL 1.20 specs:

   "To force all output variables to be invariant, use the pragma

   #pragma STDGL invariant(all)

before all declarations in a shader."

Notably, this is only supposed to affect output variables.  Furthermore,

   "Only variables output from a shader can be candidates for invariance."

It looks like this has been wrong since we first supported the pragma in
2011 (commit 86b4398cd158024f6be9fa830554a11c2a7ebe0c).

Fixes dEQP-GLES2.functional.shaders.preprocessor.pragmas.pragma_fragment.

v2: Now that all cases are identical (other than compute shaders, which
have no output variables anyway), we can drop the switch statement
entirely.  We also don't need the current_function == NULL check;
this was a hold over from when we had a single var_mode_out for both
function parameters and shader varyings, in the bad old days.

Reviewed-by: Iago Toral Quiroga  [v1]
Reviewed-by: Ilia Mirkin  [v1]
---
 src/compiler/glsl/ast_to_hir.cpp | 26 ++
 1 file changed, 2 insertions(+), 24 deletions(-)

Good call, Ilia :)  I'd originally had a /* Invariance isn't meaningful
for fragment shader outputs */ comment, but then I looked elsewhere in
the file and realized that it actually was allowed.  So, I changed the
case, but didn't think to combine it.

It turns out we can combine all of them...here's a better version.

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 441404f86d3..1794a1af5cb 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -4077,30 +4077,8 @@ apply_type_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
   }
}
 
-   if (state->all_invariant && (state->current_function == NULL)) {
-  switch (state->stage) {
-  case MESA_SHADER_VERTEX:
- if (var->data.mode == ir_var_shader_out)
-var->data.invariant = true;
- break;
-  case MESA_SHADER_TESS_CTRL:
-  case MESA_SHADER_TESS_EVAL:
-  case MESA_SHADER_GEOMETRY:
- if ((var->data.mode == ir_var_shader_in)
- || (var->data.mode == ir_var_shader_out))
-var->data.invariant = true;
- break;
-  case MESA_SHADER_FRAGMENT:
- if (var->data.mode == ir_var_shader_in)
-var->data.invariant = true;
- break;
-  case MESA_SHADER_COMPUTE:
- /* Invariance isn't meaningful in compute shaders. */
- break;
-  default:
- break;
-  }
-   }
+   if (state->all_invariant && var->data.mode == ir_var_shader_out)
+  var->data.invariant = true;
 
var->data.interpolation =
   interpret_interpolation_qualifier(qual, var->type,
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] amd/addrlib: update to latest version

2017-11-07 Thread Marek Olšák
On Tue, Nov 7, 2017 at 6:35 PM, Michel Dänzer  wrote:
> On 07/11/17 06:28 PM, Marek Olšák wrote:
>> Hi,
>>
>> This patch is too large for the mailing list:
>>
>> https://cgit.freedesktop.org/~mareko/mesa/commit/?h=addrlib&id=0e0f044268d3c1af2e78f161aaa2d92c30167cc1
>
> From the commit log:
>
>> I just overwrote all Mesa files with internal addrlib and discarded
>> hunks that we should probably keep, but I might have missed something.
>
> FWIW, if a separate branch was used for importing addrlib changes, Git
> could keep track of our changes to it in the Mesa tree.

Previous addrlib updates were like that. I chose a range of addrlib
commits, then "git mv address src/amd/addrlib" before the first
commit, apply all commits, and cherry-pick them to Mesa. But this time
it was too ugly.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: use enum types instead of int/unsigned (v2)

2017-11-07 Thread Roland Scheidegger
Am 07.11.2017 um 18:57 schrieb Brian Paul:
> On 11/07/2017 09:07 AM, Roland Scheidegger wrote:
>> Am 07.11.2017 um 16:12 schrieb Brian Paul:
>>> Use the proper enum types for various variables.  Makes life in gdb
>>> a little nicer.  Note that the size of enum bitfields must be one
>>> larger so the high bit is always zero (for MSVC).
>>>
>>> v2: also increase size of image_format bitfield, per Eric Engestrom.
>>>
>>> Reviewed-by: Charmaine Lee 
>>> ---
>>>   src/mesa/state_tracker/st_glsl_to_tgsi.cpp   | 7 ---
>>>   src/mesa/state_tracker/st_glsl_to_tgsi_private.h | 6 +++---
>>>   src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 +++---
>>>   src/mesa/state_tracker/st_mesa_to_tgsi.h | 7 ---
>>>   4 files changed, 14 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>> index 54e1961..2048b59 100644
>>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>> @@ -179,10 +179,10 @@ public:
>>>  int num_address_regs;
>>>  uint32_t samplers_used;
>>>  glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
>>> -   int sampler_targets[PIPE_MAX_SAMPLERS];   /**< One of
>>> TGSI_TEXTURE_* */
>>> +   enum tgsi_texture_type sampler_targets[PIPE_MAX_SAMPLERS];
>>>  int images_used;
>>>  int image_targets[PIPE_MAX_SHADER_IMAGES];
>>> -   unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
>>> +   enum pipe_format image_formats[PIPE_MAX_SHADER_IMAGES];
>>>  bool indirect_addr_consts;
>>>  int wpos_transform_const;
>>>
>>> @@ -6489,7 +6489,8 @@ st_translate_program(
>>>  /* texture samplers */
>>>  for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
>>>     if (program->samplers_used & (1u << i)) {
>>> - unsigned type =
>>> st_translate_texture_type(program->sampler_types[i]);
>>> + enum tgsi_return_type type =
>>> +    st_translate_texture_type(program->sampler_types[i]);
>>>
>>>    t->samplers[i] = ureg_DECL_sampler(ureg, i);
>>>
>>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
>>> b/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
>>> index d57525d..3e51936 100644
>>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
>>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
>>> @@ -127,13 +127,13 @@ public:
>>>  unsigned is_64bit_expanded:1;
>>>  unsigned sampler_base:5;
>>>  unsigned sampler_array_size:6; /**< 1-based size of sampler
>>> array, 1 if not array */
>>> -   unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */
>>> +   gl_texture_index tex_target:5;
>>>  glsl_base_type tex_type:5;
>>>  unsigned tex_shadow:1;
>>> -   unsigned image_format:9;
>>> +   enum pipe_format image_format:10;
>>
>> Due to this being a enum which can easily grow, this is of course quite
>> dangerous (doubly so now due to needing one bit more than you'd assume).
>> I think it would be nice if assignment somewhere were guarded by a
>> ASSERT(x.image_format == image_format) afterwards at least.
>> (Unfortunately I don't see a way to easily make a nice static assertion,
>> you'd have to use something like
>> STATIC_ASSERT(PIPE_FORMAT_COUNT < 1 << (10-1)) which isn't tied directly
>> to the bitfield definition.)
> 
> OK, I've come up with a simple runtime assertion to check for sufficient
> bitfield size:
> 
> /* Check that STRUCT::BITFIELD can hold MAXVAL */
> #define ASSERT_BITFIELD_SIZE(STRUCT, FIELD, MAXVAL) \
>    { \
>   STRUCT s; \
>   s.FIELD = MAXVAL; \
>   assert((int) s.FIELD == MAXVAL && "Insufficient bitfield size!");  \
>    }
> 
> This works identically for signed and unsigned enum fields with gcc and
> MSVC.
Yes, it's just a pity you can't do it with compile time asserts.
(Albeit I'd think the compiler will actually optimize it out in any case
if you've got any optimizations enabled, as it can still determine the
assertion will never fail at compile time.)

> 
> For enum bitfields, MSVC asserts if the extra padding bit is missing.
> But I haven't managed to make that happen with gcc.
> 
> I guess that's OK though.  If I institute this assertion macro we won't
> detect the MSVC padding issue with gcc but I'll probably hit it soon
> enough on Windows if it happens.
Yes, I think that should suffice and be a good solution.

Roland


> 
> 
> FWIW, I also came up with a macro that can compute the number of bits in
> a bitfield:
> 
> #define SIZEOF_BITFIELD(STRUCT, FIELD, SIZE_OUT) \
> { \
>    SIZE_OUT = 32; \
>    unsigned i; \
>    for (i = 0; i < 32; i++) { \
>   struct STRUCT test; \
>   test.FIELD = 1 << i; \
>   if (abs(test.FIELD) != 1 << i) { \
>  SIZE_OUT = i; \
>  break; \
>   } \
>    } \
> }
> 
> It handles int, unsigned and enum bitfields correctly with gcc and MSVC.
>  I don't think we need it right now.  But there it is if we need it
> someday.
> 
> I'm going to rework my patches to use the asserti

Re: [Mesa-dev] [RFC PATCH v1 00/30] anv: dma-buf and DRM format modifiers

2017-11-07 Thread Jason Ekstrand
On Tue, Nov 7, 2017 at 9:04 AM, Chad Versace 
wrote:

> On Tue 07 Nov 2017, Jason Ekstrand wrote:
> > On Tue, Nov 7, 2017 at 8:11 AM, Jason Ekstrand <[1]ja...@jlekstrand.net>
> wrote:
> >
> > On Tue, Nov 7, 2017 at 6:47 AM, Chad Versace <[2]
> chadvers...@chromium.org>
> > wrote:
> >
> > - crucible
> >
> >   We really need to write crucible tests to hammer some
> tricky
> >   corner cases.  I haven't written them yet. Volunteers?
> >
> >
> > If you expect anything other than anv to implement this, I think
> those
> > tests should probably go in vk-gl-cts.  I'm sorry.
> >
> >
> > Also, before we can land the actual implementation of the three VK_EXT
> > extensions, I would like there to be some reasonable level of automated
> > testing.  If this is something that some compositor stack is going to
> depend
> > on, then I'd really like to not break it randomly.  I'm sorry if that's
> a bit
> > hard-line, but having dEQP has made us all lazy about actually writing
> tests
> > for things.
>
> That's a reasonable requirement. After all, this will be a critical
> extension for systems where the compositor uses it.
>
> But, to write those tests, I'll need to `set tabstop=4` in vim! I don't
> know if I can swallow that pain :)
>

With therapy, I think you can get over it.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] amd/addrlib: update to latest version

2017-11-07 Thread Dylan Baker
It looks like the file deletions aren't applied to meson.

Quoting Marek Olšák (2017-11-07 09:28:39)
> Hi,
> 
> This patch is too large for the mailing list:
> 
> https://cgit.freedesktop.org/~mareko/mesa/commit/?h=addrlib&id=0e0f044268d3c1af2e78f161aaa2d92c30167cc1
> 
> The plan is to push this on Thursday if there is no feedback.
> 
> Marek
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: use enum types instead of int/unsigned (v2)

2017-11-07 Thread Brian Paul

On 11/07/2017 09:07 AM, Roland Scheidegger wrote:

Am 07.11.2017 um 16:12 schrieb Brian Paul:

Use the proper enum types for various variables.  Makes life in gdb
a little nicer.  Note that the size of enum bitfields must be one
larger so the high bit is always zero (for MSVC).

v2: also increase size of image_format bitfield, per Eric Engestrom.

Reviewed-by: Charmaine Lee 
---
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp   | 7 ---
  src/mesa/state_tracker/st_glsl_to_tgsi_private.h | 6 +++---
  src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 +++---
  src/mesa/state_tracker/st_mesa_to_tgsi.h | 7 ---
  4 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 54e1961..2048b59 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -179,10 +179,10 @@ public:
 int num_address_regs;
 uint32_t samplers_used;
 glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
-   int sampler_targets[PIPE_MAX_SAMPLERS];   /**< One of TGSI_TEXTURE_* */
+   enum tgsi_texture_type sampler_targets[PIPE_MAX_SAMPLERS];
 int images_used;
 int image_targets[PIPE_MAX_SHADER_IMAGES];
-   unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
+   enum pipe_format image_formats[PIPE_MAX_SHADER_IMAGES];
 bool indirect_addr_consts;
 int wpos_transform_const;

@@ -6489,7 +6489,8 @@ st_translate_program(
 /* texture samplers */
 for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
if (program->samplers_used & (1u << i)) {
- unsigned type = st_translate_texture_type(program->sampler_types[i]);
+ enum tgsi_return_type type =
+st_translate_texture_type(program->sampler_types[i]);

   t->samplers[i] = ureg_DECL_sampler(ureg, i);

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi_private.h 
b/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
index d57525d..3e51936 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
@@ -127,13 +127,13 @@ public:
 unsigned is_64bit_expanded:1;
 unsigned sampler_base:5;
 unsigned sampler_array_size:6; /**< 1-based size of sampler array, 1 if 
not array */
-   unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */
+   gl_texture_index tex_target:5;
 glsl_base_type tex_type:5;
 unsigned tex_shadow:1;
-   unsigned image_format:9;
+   enum pipe_format image_format:10;


Due to this being a enum which can easily grow, this is of course quite
dangerous (doubly so now due to needing one bit more than you'd assume).
I think it would be nice if assignment somewhere were guarded by a
ASSERT(x.image_format == image_format) afterwards at least.
(Unfortunately I don't see a way to easily make a nice static assertion,
you'd have to use something like
STATIC_ASSERT(PIPE_FORMAT_COUNT < 1 << (10-1)) which isn't tied directly
to the bitfield definition.)


OK, I've come up with a simple runtime assertion to check for sufficient 
bitfield size:


/* Check that STRUCT::BITFIELD can hold MAXVAL */
#define ASSERT_BITFIELD_SIZE(STRUCT, FIELD, MAXVAL) \
   { \
  STRUCT s; \
  s.FIELD = MAXVAL; \
  assert((int) s.FIELD == MAXVAL && "Insufficient bitfield size!");  \
   }

This works identically for signed and unsigned enum fields with gcc and 
MSVC.


For enum bitfields, MSVC asserts if the extra padding bit is missing. 
But I haven't managed to make that happen with gcc.


I guess that's OK though.  If I institute this assertion macro we won't 
detect the MSVC padding issue with gcc but I'll probably hit it soon 
enough on Windows if it happens.



FWIW, I also came up with a macro that can compute the number of bits in 
a bitfield:


#define SIZEOF_BITFIELD(STRUCT, FIELD, SIZE_OUT) \
{ \
   SIZE_OUT = 32; \
   unsigned i; \
   for (i = 0; i < 32; i++) { \
  struct STRUCT test; \
  test.FIELD = 1 << i; \
  if (abs(test.FIELD) != 1 << i) { \
 SIZE_OUT = i; \
 break; \
  } \
   } \
}

It handles int, unsigned and enum bitfields correctly with gcc and MSVC. 
 I don't think we need it right now.  But there it is if we need it 
someday.


I'm going to rework my patches to use the assertion macro.

-Brian




In any case,
Reviewed-by: Roland Scheidegger 



 unsigned tex_offset_num_offset:3;
 unsigned dead_mask:4; /**< Used in dead code elimination */
-   unsigned buffer_access:3; /**< buffer access type */
+   unsigned buffer_access:3; /**< bitmask of TGSI_MEMORY_x bits */

 const struct tgsi_opcode_info *info;
  };
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c 
b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index fa9fa44..8a61776 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -166,8 +166,8 @@ src_register( struct st_translate *t,
  /**
   * Map mesa texture target to TGSI texture target.
   */
-uns

Re: [Mesa-dev] [PATCH v3] i965 : optimized bucket index calculation

2017-11-07 Thread Ian Romanick
On 11/06/2017 08:30 PM, aravindan.muthuku...@intel.com wrote:
> From: Aravindan Muthukumar 
> 
> Now the complexity has been reduced to O(1)
> 
> Algorithm calculates the index using matrix method.
> Matrix arrangement is as below:
> Assuming PAGE_SIZE is 4096.
> 
>   1*4096   2*40963*40964*4096
>   5*4096   6*40967*40968*4096
>   10*4096  12*4096   14*4096   16*4096
>   20*4096  24*4096   28*4096   32*4096
>...  ...   ...   ...
>...  ...   ...   ...
>...  ...   ...   max_cache_size
> 
> From this matrix its clearly seen that every row
> follows the below way:
>   ...   ...   ...n
> n+(1/4)n  n+(1/2)n  n+(3/4)n2n
> 
> Row is calculated as log2(size/PAGE_SIZE)
> Column is calculated as converting the difference
> between the elements to fit into power size of two
> and indexing it.
> 
> Final Index is (row*4)+(col-1)
> 
> Tested with Intel Mesa CI.
> 
> Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20)
> 
> v3: review comments implemented (Ian).
> v2: review comments implemented (Jason).
>  
> Signed-off-by: Aravindan Muthukumar 
> Signed-off-by: Kedar Karanje 
> Reviewed-by: Yogesh Marathe 
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c | 38 
> +++---
>  1 file changed, 30 insertions(+), 8 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 17036b5..9a423da 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -86,6 +86,8 @@
>  
>  #define memclear(s) memset(&s, 0, sizeof(s))
>  
> +#define PAGE_SIZE 4096
> +
>  #define FILE_DEBUG_FLAG DEBUG_BUFMGR
>  
>  static inline int
> @@ -180,19 +182,35 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t 
> pitch, uint32_t tiling)
> return ALIGN(pitch, tile_width);
>  }
>  
> +/*
> + * This function finds the correct bucket fit for the input size.
> + * The function works with O(1) complexity when the requested size
> + * was queried instead of iterating the size through all the buckets.
> + */
>  static struct bo_cache_bucket *
>  bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
>  {
> -   int i;
> +   /* Calculating the pages and rounding up to the page size. */
> +   const unsigned int pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
>  
> -   for (i = 0; i < bufmgr->num_buckets; i++) {
> -  struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i];
> -  if (bucket->size >= size) {
> - return bucket;
> -  }
> -   }
> +   /* Finding the row number based on the calculated pages. */
> +   const unsigned int rows = 30 - __builtin_clz((pages - 1) | 3);
>  
> -   return NULL;

Why did you make random (and incorrect) style changes and delete
(useful) comments from the code I sent?

> +   const unsigned int row_max_pages = 4 << rows;
> +   const unsigned int prev_row_max_pages = (row_max_pages / 2) & ~2;
> +
> +   /* Finding the column number using column interval. */
> +   int col_size_log2 = rows - 1;
> +   col_size_log2 += (col_size_log2 < 0);
> +
> +   const unsigned int col = ( (pages - prev_row_max_pages +
> +( (1 << col_size_log2) - 1) ) >> col_size_log2 );
> +
> +   /* Calculating the index based on the row and column. */
> +   const unsigned int index = (rows * 4) + (col - 1);
> +
> +   return (index < bufmgr->num_buckets) ?
> +  &bufmgr->cache_bucket[index] : NULL;
>  }
>  
>  int
> @@ -1254,6 +1272,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
> list_inithead(&bufmgr->cache_bucket[i].head);
> bufmgr->cache_bucket[i].size = size;
> bufmgr->num_buckets++;
> +
> +   assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]);
> +   assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]);
> +   assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]);
>  }
>  
>  static void
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/8] gallium/tgsi: start adding hw atomics (v3)

2017-11-07 Thread Nicolai Hähnle

On 07.11.2017 18:26, Nicolai Hähnle wrote:

On 07.11.2017 17:57, Marek Olšák wrote:

With HW atomic counters, MaxAtomicBufferSize is a pretty small number
(counters * 4). TGSI has maximum index = 32K.


Ah, you're right.


I forgot: the other comments (about the assertion in patch 2, and about 
non-contiguous buffers in patch 5 -- which for some reason didn't get 
sent before) still stand.





Patches 1-7:

Reviewed-by: Nicolai Hähnle 




Marek

On Tue, Nov 7, 2017 at 5:43 PM, Nicolai Hähnle  
wrote:

On 07.11.2017 17:25, Nicolai Hähnle wrote:


On 07.11.2017 07:31, Dave Airlie wrote:


diff --git a/src/gallium/docs/source/tgsi.rst
b/src/gallium/docs/source/tgsi.rst
index 1a51fe9..0c331f2 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -2638,9 +2638,11 @@ logical operations.  In this context atomicity
means that another
   concurrent memory access operation that affects the same memory
   location is guaranteed to be performed strictly before or after the
   entire execution of the atomic operation. The resource may be a 
BUFFER,
-IMAGE, or MEMORY.  In the case of an image, the offset works the 
same as

for
-``LOAD`` and ``STORE``, specified above. These atomic operations may
-only be used with 32-bit integer image formats.
+IMAGE, ATOMIC, or MEMORY.  In the case of an image, the offset works
+the same as for ``LOAD`` and ``STORE``, specified above. For atomic
+counters, the offset is an immediate index to the base hw atomic
+counter for this operation.
+These atomic operations may only be used with 32-bit integer image
formats.
   .. opcode:: ATOMUADD - Atomic integer addition
@@ -3440,7 +3442,6 @@ TGSI_SEMANTIC_SUBGROUP_LT_MASK
   A bit mask of ``bit index < TGSI_SEMANTIC_SUBGROUP_INVOCATION``, 
i.e.
   ``(1 << subgroup_invocation) - 1`` in arbitrary precision 
arithmetic.

-



Stray whitespace change.



   Declaration Interpolate
   ^^^
@@ -3517,6 +3518,31 @@ accessing a misaligned address is undefined.
   Usage of the STORE opcode is only allowed if the WR (writable) flag
   is set.
+Hardware Atomic Register File
+^
+
+Hardware atomics are declared as a 2D array with an optional array 
id.

+
+The first member of the dimension is the buffer resource the atomic
+is located in.
+The second member is a range into the buffer resource, either for
+one or multiple counters. If this is an array, the declaration 
will have

+an unique array id.
+
+Each counter is 4 bytes in size, and index and ranges are in counters
not bytes.
+DCL ATOMIC[0][0]
+DCL ATOMIC[0][1]
+
+This declares two atomics, one at the start of the buffer and one 
in the

+second 4 bytes.
+
+DCL ATOMIC[0][0]
+DCL ATOMIC[1][0]
+DCL ATOMIC[1][1..3], ARRAY(1)
+
+This declares 5 atomics, one in buffer 0 at 0,
+one in buffer 1 at 0, and an array of 3 atomics in
+the buffer 1, starting at 1.



My understanding is that these ranges could be highly non-contiguous,
right? I.e., you could have

DCL ATOMIC[0][15]
DCL ATOMIC[0][8423..8430], ARRAY(1)
DCL ATOMIC[0][25112]

... corresponding to the offsets in the GLSL shader. The doc should 
really
point this out explicitly. Also, this might cause trouble because 
the TGSI

range tokens don't have enough bits to represent high offsets.



Thinking about it some more, here's one way to deal with it. Have
st_glsl_to_tgsi pack the the indices (by keeping track of the number of
counters per atomic counter buffer), and then add an "atomic counter 
offset"

dword to the TGSI. The decls above could become:

   DCL ATOMIC[0][0], OFFSET(25112)
   DCL ATOMIC[0][1..8], OFFSET(8423), ARRAY(1)
   DCL ATOMIC[0][9], OFFSET(15)

(the point is the order of TGSI indices doesn't matter outside of 
counter

arrays)

The driver would then compute the number of counters per HW atomic 
counter
buffer to assign on-chip HW atomic memory during compile time, and 
then copy

memory from the given offsets to the on-chip HW atomic memory as part of
state validation before draws and dispatches.

Cheers,
Nicolai

--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev






--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/8] st/mesa: start adding support for hw atomics atom.

2017-11-07 Thread Nicolai Hähnle

On 07.11.2017 07:31, Dave Airlie wrote:

From: Dave Airlie 

This adds a new atom that calls the new driver API to
bind buffers containing hw atomics.

Signed-off-by: Dave Airlie 
---
  src/mesa/state_tracker/st_atom_atomicbuf.c   | 37 
  src/mesa/state_tracker/st_atom_list.h|  2 ++
  src/mesa/state_tracker/st_cb_bufferobjects.c |  2 +-
  src/mesa/state_tracker/st_context.c  |  9 ++-
  src/mesa/state_tracker/st_context.h  |  1 +
  5 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_atomicbuf.c 
b/src/mesa/state_tracker/st_atom_atomicbuf.c
index ee5944f..9c518a0 100644
--- a/src/mesa/state_tracker/st_atom_atomicbuf.c
+++ b/src/mesa/state_tracker/st_atom_atomicbuf.c
@@ -128,3 +128,40 @@ st_bind_cs_atomics(struct st_context *st)
  
 st_bind_atomics(st, prog, PIPE_SHADER_COMPUTE);

  }
+
+void
+st_bind_hw_atomic_buffers(struct st_context *st)
+{
+   struct pipe_shader_buffer buffers[PIPE_MAX_HW_ATOMIC_BUFFERS];
+   int i;
+   int num_buffers = 0;
+
+   if (!st->has_hw_atomics)
+  return;
+
+   for (i = 0; i < MAX_COMBINED_ATOMIC_BUFFERS; i++) {
+  struct gl_buffer_binding *binding = &st->ctx->AtomicBufferBindings[i];
+  struct st_buffer_object *st_obj = 
st_buffer_object(binding->BufferObject);
+  struct pipe_shader_buffer *sb = &buffers[num_buffers];
+
+  if (st_obj && st_obj->buffer) {
+sb->buffer = st_obj->buffer;
+sb->buffer_offset = binding->Offset;
+sb->buffer_size = st_obj->buffer->width0 - binding->Offset;
+
+/* AutomaticSize is FALSE if the buffer was set with BindBufferRange.
+ * Take the minimum just to be sure.
+ */
+if (!binding->AutomaticSize)
+  sb->buffer_size = MIN2(sb->buffer_size, (unsigned) binding->Size);
+num_buffers++;
+  } else {
+sb->buffer = NULL;
+sb->buffer_offset = 0;
+sb->buffer_size = 0;
+  }
+   }
+
+   st->pipe->set_hw_atomic_buffers(st->pipe, 0, num_buffers,
+   buffers);


This looks like it's likely incorrect if there are "gaps", i.e. atomic 
buffer bindings 0 and 2 are set and used by shaders, but 1 is not set.




+}
diff --git a/src/mesa/state_tracker/st_atom_list.h 
b/src/mesa/state_tracker/st_atom_list.h
index b76854e..8f50a72 100644
--- a/src/mesa/state_tracker/st_atom_list.h
+++ b/src/mesa/state_tracker/st_atom_list.h
@@ -66,6 +66,8 @@ ST_STATE(ST_NEW_GS_SSBOS, st_bind_gs_ssbos)
  ST_STATE(ST_NEW_PIXEL_TRANSFER, st_update_pixel_transfer)
  ST_STATE(ST_NEW_TESS_STATE, st_update_tess)
  
+ST_STATE(ST_NEW_HW_ATOMICS, st_bind_hw_atomic_buffers)

+
  /* this must be done after the vertex program update */
  ST_STATE(ST_NEW_VERTEX_ARRAYS, st_update_array)
  
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c

index 86ebfc6..03fae95 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -348,7 +348,7 @@ bufferobj_data(struct gl_context *ctx,
 if (st_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
ctx->NewDriverState |= ST_NEW_SAMPLER_VIEWS | ST_NEW_IMAGE_UNITS;
 if (st_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
-  ctx->NewDriverState |= ST_NEW_ATOMIC_BUFFER;
+  ctx->NewDriverState |= st->has_hw_atomics ? ST_NEW_HW_ATOMICS : 
ST_NEW_ATOMIC_BUFFER;
  
 return GL_TRUE;

  }
diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index 5d8dd8b..e82090b 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -405,6 +405,10 @@ st_create_context_priv( struct gl_context *ctx, struct 
pipe_context *pipe,
 st->has_multi_draw_indirect =
screen->get_param(screen, PIPE_CAP_MULTI_DRAW_INDIRECT);
  
+   st->has_hw_atomics =

+  screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
+   PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS) ? true 
: false;
+
 /* GL limits and extensions */
 st_init_limits(pipe->screen, &ctx->Const, &ctx->Extensions);
 st_init_extensions(pipe->screen, &ctx->Const,
@@ -497,7 +501,10 @@ static void st_init_driver_flags(struct st_context *st)
  
 /* Shader resources */

 f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS;
-   f->NewAtomicBuffer = ST_NEW_ATOMIC_BUFFER;
+   if (st->has_hw_atomics)
+  f->NewAtomicBuffer = ST_NEW_HW_ATOMICS;
+   else
+  f->NewAtomicBuffer = ST_NEW_ATOMIC_BUFFER;
 f->NewShaderStorageBuffer = ST_NEW_STORAGE_BUFFER;
 f->NewImageUnits = ST_NEW_IMAGE_UNITS;
  
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h

index ced915e..9f33eed 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -129,6 +129,7 @@ struct st_context
 boolean invalidate_on_gl_viewport;
 boolean draw_needs_minmax_index;
 boolean vertex_array_out_of_memor

Re: [Mesa-dev] [PATCH] amd/addrlib: update to latest version

2017-11-07 Thread Michel Dänzer
On 07/11/17 06:28 PM, Marek Olšák wrote:
> Hi,
> 
> This patch is too large for the mailing list:
> 
> https://cgit.freedesktop.org/~mareko/mesa/commit/?h=addrlib&id=0e0f044268d3c1af2e78f161aaa2d92c30167cc1

From the commit log:

> I just overwrote all Mesa files with internal addrlib and discarded
> hunks that we should probably keep, but I might have missed something.

FWIW, if a separate branch was used for importing addrlib changes, Git
could keep track of our changes to it in the Mesa tree.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] amd/addrlib: update to latest version

2017-11-07 Thread Marek Olšák
Hi,

This patch is too large for the mailing list:

https://cgit.freedesktop.org/~mareko/mesa/commit/?h=addrlib&id=0e0f044268d3c1af2e78f161aaa2d92c30167cc1

The plan is to push this on Thursday if there is no feedback.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] docs: add documentation for building with meson

2017-11-07 Thread Dylan Baker
v2: - Add information about CC, CXX, CFLAGS, and CXXFLAGS (Nicolai)
- Add message at top that meson for mesa is still a work in progress
- Add trailing "/" to directories (Eric E.)
- Fix a number of spelling/grammar/style suggestions from Eric E.
- Make a number of changes as suggested by Emil.
v3: - Fix order of commands in example (Eric E.)
- Add documentation for overriding LLVM version (Eric E.)
v4: - Rebase on master
- update default buildtype
- add note about b_ndebug
- Clarify meson configure a bit

Signed-off-by: Dylan Baker 
Reviewed-by: Eric Engestrom  (v3)
---
 docs/contents.html |   1 +
 docs/meson.html| 151 +
 2 files changed, 152 insertions(+)
 create mode 100644 docs/meson.html

diff --git a/docs/contents.html b/docs/contents.html
index d5455421091..9a86019e2f6 100644
--- a/docs/contents.html
+++ b/docs/contents.html
@@ -43,6 +43,7 @@
 Compiling / Installing
   
 Autoconf
+Meson
   
 
 Precompiled Libraries
diff --git a/docs/meson.html b/docs/meson.html
new file mode 100644
index 000..ee505b1d5ee
--- /dev/null
+++ b/docs/meson.html
@@ -0,0 +1,151 @@
+http://www.w3.org/TR/html4/loose.dtd";>
+
+
+  
+  Compilation and Installation using Meson
+  
+
+
+
+
+  The Mesa 3D Graphics Library
+
+
+
+
+
+Compilation and Installation using Meson
+
+1. Basic Usage
+
+The Meson build system for Mesa is still under active development,
+and should not be used in production environments.
+
+The meson build is currently only tested on linux, and is known to not work
+on macOS, Windows, and haiku. This will be fixed.
+
+
+The meson program is used to configure the source directory and generates
+either a ninja build file, or Visual Studio® build files. The latter, and must
+be enabled via the --backend switch, as ninja is always the default. Meson only
+supports out-of-tree builds, and must be passed a directory to put built and
+generated sources into. We'll call that directory "build" for examples.
+
+
+
+meson build/
+
+
+
+To see a description of your options you can run "meson configure" along with a
+build directory to view the selected options for. This will show your meson
+global arguments and project arguments, along with their defaults and your
+local settings.
+
+
+
+meson configure build/
+
+
+
+With additional arguments "meson configure" is used to change options on
+already configured build directory. All options passed to this command are in
+the form -D"command"="value".
+
+
+
+meson configure build/ -Dprefix=/tmp/install -Dglx=true
+
+
+
+Once you've run meson successfully you can use your configured backend to build
+the project. With ninja, the -C option can be be used to point at a directory
+to build.
+
+
+
+ninja -C build/
+
+
+
+Without arguments, it will produce libGL.so and/or several other libraries
+depending on the options you have chosen. Later, if you want to rebuild for a
+different configuration, you should run ninja clean before
+rebuilding, or create a new out of tree build directory (meson supports an
+unlimited number of them) for each configuration you want to build.
+
+
+CC, CFLAGS, CXX, CXXFLAGS
+These environment variables
+control the C and C++ compilers used during the build. The default compilers
+depends on your operating system. Meson supports most of the popular compilers,
+a complete list is available
+http://mesonbuild.com/Reference-tables.html#compiler-ids";>here.
+
+These arguments are consumed and stored by meson when it is initialized or
+re-initialized. Therefore passing them to meson configure will not do anything,
+and passing them to ninja will only do something if ninja decides to
+re-initialze meson, for example, if a meson.build file has been changed.
+Changing these variables will not cause all targets to be rebuilt, so running
+ninja clean is recomended when changing CFLAGS or CXXFLAGS. meson will never
+change compiler in a configured build directory.
+
+
+
+CC=clang CXX=clang++ meson build-clang
+ninja -C build-clang
+ninja -C build-clang clean
+touch meson.build
+CFLAGS=-Wno-typedef-redefinition ninja -C build-clang
+
+
+
+LLVM
+Meson includes upstream logic to wrap llvm-config using it's standard
+dependncy interface. It will search $PATH (or %PATH% on windows) for
+llvm-config, so using an LLVM from a non-standard path is as easy as
+PATH=/path/with/llvm-config:$PATH meson build.
+
+
+
+PKG_CONFIG_PATH
+The
+pkg-config utility is a hard requirement for configuring and
+building Mesa on Linux and *BSD. It is used to search for external libraries
+on the system. This environment variable is used to control the search
+path for pkg-config. For instance, setting
+PKG_CONFIG_PATH=/usr/X11R6/lib/pkgconfig will search for
+package metadata in /usr/X11R6 before the standard
+directories.
+
+
+
+
+One of the oddities of meson is that some options are different when passed to
+the meson than to meson configure. Thes

Re: [Mesa-dev] [PATCH 8/8] r600: add support for hw atomic counters. (v3)

2017-11-07 Thread Nicolai Hähnle

On 07.11.2017 07:31, Dave Airlie wrote:

From: Dave Airlie 

This adds support for the evergreen/cayman atomic counters.

These are implemented using GDS append/consume counters. The values
for each counter are loaded before drawing and saved after each draw
using special CP packets.


I admit I'm a bit confused by this at the hardware level.

My understanding of GDS is that it's mostly another copy of LDS (but 
global), and all GDS instructions are atomic by default. There is extra 
append-consume hardware, but it's main point is to support use cases 
where operations have to be ordered by wave, or where a wave return is 
supposed to be blocked (for producer/consumer kernels and ring buffer 
management).


So this should really work without the append/consume counters as well, 
just with regular GDS memory. Is there a particular reason why you 
haven't done that? I suppose it might require more stuff to manage GDS 
allocations in the kernel, and if it works with this approach...


Acked-by: Nicolai Hähnle 




v2: move hw atomic assignment into driver.
v3: fix messing up caps (Gert Wollny), only store ranges in driver,
drop buffers.

Signed-off-by: Dave Airlie 
---
  src/gallium/drivers/r600/evergreen_state.c   | 159 ++
  src/gallium/drivers/r600/r600_pipe.c |  15 ++
  src/gallium/drivers/r600/r600_pipe.h |  22 +++
  src/gallium/drivers/r600/r600_shader.c   | 239 ---
  src/gallium/drivers/r600/r600_shader.h   |  19 +++
  src/gallium/drivers/r600/r600_state_common.c |  46 ++
  src/gallium/drivers/r600/r600d_common.h  |   2 +
  7 files changed, 480 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 96eb35a..634cd96 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3716,6 +3716,38 @@ static void evergreen_set_tess_state(struct pipe_context 
*ctx,
rctx->tess_state_dirty = true;
  }
  
+static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,

+   unsigned start_slot,
+   unsigned count,
+   const struct pipe_shader_buffer 
*buffers)
+{
+   struct r600_context *rctx = (struct r600_context *)ctx;
+   struct r600_atomic_buffer_state *astate;
+   int i, idx;
+
+   astate = &rctx->atomic_buffer_state;
+
+   /* we'd probably like to expand this to 8 later so put the logic in */
+   for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
+   const struct pipe_shader_buffer *buf;
+   struct pipe_shader_buffer *abuf;
+
+   abuf = &astate->buffer[i];
+
+   if (!buffers || !buffers[idx].buffer) {
+   pipe_resource_reference(&abuf->buffer, NULL);
+   astate->enabled_mask &= ~(1 << i);
+   continue;
+   }
+   buf = &buffers[idx];
+
+   pipe_resource_reference(&abuf->buffer, buf->buffer);
+   abuf->buffer_offset = buf->buffer_offset;
+   abuf->buffer_size = buf->buffer_size;
+   astate->enabled_mask |= (1 << i);
+   }
+}
+
  void evergreen_init_state_functions(struct r600_context *rctx)
  {
unsigned id = 1;
@@ -3801,6 +3833,7 @@ void evergreen_init_state_functions(struct r600_context 
*rctx)
rctx->b.b.set_polygon_stipple = evergreen_set_polygon_stipple;
rctx->b.b.set_min_samples = evergreen_set_min_samples;
rctx->b.b.set_tess_state = evergreen_set_tess_state;
+   rctx->b.b.set_hw_atomic_buffers = evergreen_set_hw_atomic_buffers;
if (rctx->b.chip_class == EVERGREEN)
  rctx->b.b.get_sample_position = evergreen_get_sample_position;
  else
@@ -4107,3 +4140,129 @@ void eg_trace_emit(struct r600_context *rctx)
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, AC_ENCODE_TRACE_POINT(rctx->trace_id));
  }
+
+bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
+   struct r600_shader_atomic 
*combined_atomics,
+   uint8_t *atomic_used_mask_p)
+{
+   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+   struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
+   unsigned pkt_flags = 0;
+   uint8_t atomic_used_mask = 0;
+   int i, j, k;
+
+   for (i = 0; i < EG_NUM_HW_STAGES; i++) {
+   uint8_t num_atomic_stage;
+   struct r600_pipe_shader *pshader;
+
+   pshader = rctx->hw_shader_stages[i].shader;
+   if (!pshader)
+   continue;
+
+   num_atomic_stage = pshader->shader.nhwatomic_ranges;
+   if (!num_atomic_stage)
+   continue;
+
+   for

Re: [Mesa-dev] [PATCH 2/8] gallium/tgsi: start adding hw atomics (v3)

2017-11-07 Thread Nicolai Hähnle

On 07.11.2017 17:57, Marek Olšák wrote:

With HW atomic counters, MaxAtomicBufferSize is a pretty small number
(counters * 4). TGSI has maximum index = 32K.


Ah, you're right.

Patches 1-7:

Reviewed-by: Nicolai Hähnle 




Marek

On Tue, Nov 7, 2017 at 5:43 PM, Nicolai Hähnle  wrote:

On 07.11.2017 17:25, Nicolai Hähnle wrote:


On 07.11.2017 07:31, Dave Airlie wrote:


diff --git a/src/gallium/docs/source/tgsi.rst
b/src/gallium/docs/source/tgsi.rst
index 1a51fe9..0c331f2 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -2638,9 +2638,11 @@ logical operations.  In this context atomicity
means that another
   concurrent memory access operation that affects the same memory
   location is guaranteed to be performed strictly before or after the
   entire execution of the atomic operation. The resource may be a BUFFER,
-IMAGE, or MEMORY.  In the case of an image, the offset works the same as
for
-``LOAD`` and ``STORE``, specified above. These atomic operations may
-only be used with 32-bit integer image formats.
+IMAGE, ATOMIC, or MEMORY.  In the case of an image, the offset works
+the same as for ``LOAD`` and ``STORE``, specified above. For atomic
+counters, the offset is an immediate index to the base hw atomic
+counter for this operation.
+These atomic operations may only be used with 32-bit integer image
formats.
   .. opcode:: ATOMUADD - Atomic integer addition
@@ -3440,7 +3442,6 @@ TGSI_SEMANTIC_SUBGROUP_LT_MASK
   A bit mask of ``bit index < TGSI_SEMANTIC_SUBGROUP_INVOCATION``, i.e.
   ``(1 << subgroup_invocation) - 1`` in arbitrary precision arithmetic.
-



Stray whitespace change.



   Declaration Interpolate
   ^^^
@@ -3517,6 +3518,31 @@ accessing a misaligned address is undefined.
   Usage of the STORE opcode is only allowed if the WR (writable) flag
   is set.
+Hardware Atomic Register File
+^
+
+Hardware atomics are declared as a 2D array with an optional array id.
+
+The first member of the dimension is the buffer resource the atomic
+is located in.
+The second member is a range into the buffer resource, either for
+one or multiple counters. If this is an array, the declaration will have
+an unique array id.
+
+Each counter is 4 bytes in size, and index and ranges are in counters
not bytes.
+DCL ATOMIC[0][0]
+DCL ATOMIC[0][1]
+
+This declares two atomics, one at the start of the buffer and one in the
+second 4 bytes.
+
+DCL ATOMIC[0][0]
+DCL ATOMIC[1][0]
+DCL ATOMIC[1][1..3], ARRAY(1)
+
+This declares 5 atomics, one in buffer 0 at 0,
+one in buffer 1 at 0, and an array of 3 atomics in
+the buffer 1, starting at 1.



My understanding is that these ranges could be highly non-contiguous,
right? I.e., you could have

DCL ATOMIC[0][15]
DCL ATOMIC[0][8423..8430], ARRAY(1)
DCL ATOMIC[0][25112]

... corresponding to the offsets in the GLSL shader. The doc should really
point this out explicitly. Also, this might cause trouble because the TGSI
range tokens don't have enough bits to represent high offsets.



Thinking about it some more, here's one way to deal with it. Have
st_glsl_to_tgsi pack the the indices (by keeping track of the number of
counters per atomic counter buffer), and then add an "atomic counter offset"
dword to the TGSI. The decls above could become:

   DCL ATOMIC[0][0], OFFSET(25112)
   DCL ATOMIC[0][1..8], OFFSET(8423), ARRAY(1)
   DCL ATOMIC[0][9], OFFSET(15)

(the point is the order of TGSI indices doesn't matter outside of counter
arrays)

The driver would then compute the number of counters per HW atomic counter
buffer to assign on-chip HW atomic memory during compile time, and then copy
memory from the given offsets to the on-chip HW atomic memory as part of
state validation before draws and dispatches.

Cheers,
Nicolai

--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Announcement: Meson build type change

2017-11-07 Thread Dylan Baker
Quoting Eric Engestrom (2017-11-07 05:57:55)
> On Tuesday, 2017-11-07 06:54:59 -0700, Brian Paul wrote:
> > On 11/07/2017 05:25 AM, Eric Engestrom wrote:
> > > Hi all,
> > > 
> > > As a result of the discussion that started with this message:
> > > https://lists.freedesktop.org/archives/mesa-dev/2017-November/175104.html
> > > 
> > > The default build type for Meson builds has been changed to
> > > `debugoptimized` in commit d5597f09c6a18a3ed2dd [1].
> > > 
> > > For you devs, this means that some debug information will now be missing
> > > if you use the default build type, due to the -O2 optimisations.
> > > You can set the build type by giving `meson` or `meson configure` the
> > > additional `-D buildtype=debug` option.
> > > 
> > > (Note that existing build dirs are not affected, as Meson only uses the
> > > default values during the first initialisation, and stores them for
> > > subsequent invocations.)
> > > 
> > > Also note that asserts are not automatically disabled on release builds,
> > > but are controlled by an independent option, `b_ndebug`, defaulting to
> > > `false`. Setting this option to `true` means "compile asserts out".
> > > Custom assertion code should be compiled based on the standard NDEBUG
> > > (which assert() uses), not Mesa's internal DEBUG.
> > > 
> > > If you have any question regarding Meson, Dylan and I will probably be
> > > able to help you, so don't hesitate to ask :)
> > 
> > I still don't see any mention of meson in the docs/ directory.  I'd really
> > appreciate someone writing a meson.html file that explains how to use meson
> > and how to set build options like you just described.
> 
> Dylan wrote one, but I guess he hasn't pushed it yet. Dylan?
> 
> > 
> > -Brian
> > 

I'll resend it in current form.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH v1 00/30] anv: dma-buf and DRM format modifiers

2017-11-07 Thread Chad Versace
On Tue 07 Nov 2017, Jason Ekstrand wrote:
> On Tue, Nov 7, 2017 at 8:11 AM, Jason Ekstrand <[1]ja...@jlekstrand.net> 
> wrote:
> 
> On Tue, Nov 7, 2017 at 6:47 AM, Chad Versace <[2]chadvers...@chromium.org>
> wrote:
> 
>     - crucible
> 
>       We really need to write crucible tests to hammer some tricky
>       corner cases.  I haven't written them yet. Volunteers?
> 
> 
> If you expect anything other than anv to implement this, I think those
> tests should probably go in vk-gl-cts.  I'm sorry.
> 
> 
> Also, before we can land the actual implementation of the three VK_EXT
> extensions, I would like there to be some reasonable level of automated
> testing.  If this is something that some compositor stack is going to depend
> on, then I'd really like to not break it randomly.  I'm sorry if that's a bit
> hard-line, but having dEQP has made us all lazy about actually writing tests
> for things.

That's a reasonable requirement. After all, this will be a critical
extension for systems where the compositor uses it.

But, to write those tests, I'll need to `set tabstop=4` in vim! I don't
know if I can swallow that pain :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Transform fb buffers are only active if a variable uses them

2017-11-07 Thread Nicolai Hähnle

On 06.11.2017 12:23, Neil Roberts wrote:

The GL spec will soon be revised to clarify that a buffer binding for
a transform feedback buffer is only required if a variable is actually
defined to use the buffer binding point. Previously a declaration for
the default transform buffer would make it require a binding even if
nothing was declared to use the default buffer.

Affects:
KHR-GL44/45.enhanced_layouts.xfb_stride_of_empty_list
KHR-GL44/45.enhanced_layouts.xfb_stride_of_empty_list_and_api


Reviewed-by: Nicolai Hähnle 



---
  src/compiler/glsl/link_varyings.cpp | 24 +++-
  1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/compiler/glsl/link_varyings.cpp 
b/src/compiler/glsl/link_varyings.cpp
index 66a20a2..e663930 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -1364,7 +1364,6 @@ store_tfeedback_info(struct gl_context *ctx, struct 
gl_shader_program *prog,
if (has_xfb_qualifiers) {
   for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
  if (prog->TransformFeedback.BufferStride[j]) {
-   buffers |= 1 << j;
 explicit_stride[j] = true;
 xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
prog->TransformFeedback.BufferStride[j] / 4;
@@ -1389,10 +1388,24 @@ store_tfeedback_info(struct gl_context *ctx, struct 
gl_shader_program *prog,
  num_buffers++;
  buffer_stream_id = -1;
  continue;
- } else if (tfeedback_decls[i].is_varying()) {
+ }
+
+ if (has_xfb_qualifiers) {
+buffer = tfeedback_decls[i].get_buffer();
+ } else {
+buffer = num_buffers;
+ }
+
+ if (tfeedback_decls[i].is_varying()) {
  if (buffer_stream_id == -1)  {
 /* First varying writing to this buffer: remember its stream */
 buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
+
+   /* Only mark a buffer as active when there is a varying
+* attached to it. This behaviour is based on a revised version
+* of section 13.2.2 of the GL 4.6 spec.
+*/
+   buffers |= 1 << buffer;
  } else if (buffer_stream_id !=
 (int) tfeedback_decls[i].get_stream_id()) {
 /* Varying writes to the same buffer from a different stream */
@@ -1408,13 +1421,6 @@ store_tfeedback_info(struct gl_context *ctx, struct 
gl_shader_program *prog,
  }
   }
  
- if (has_xfb_qualifiers) {

-buffer = tfeedback_decls[i].get_buffer();
- } else {
-buffer = num_buffers;
- }
- buffers |= 1 << buffer;
-
   if (!tfeedback_decls[i].store(ctx, prog,
 xfb_prog->sh.LinkedTransformFeedback,
 buffer, num_buffers, num_outputs,




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: remove unused field in the PCI ID table

2017-11-07 Thread Nicolai Hähnle

Reviewed-by: Nicolai Hähnle 

On 07.11.2017 15:28, Marek Olšák wrote:

From: Marek Olšák 

---
  include/pci_ids/radeonsi_pci_ids.h| 458 +++---
  src/amd/common/ac_gpu_info.c  |   2 +-
  src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |   2 +-
  src/loader/pci_id_driver_map.h|   2 +-
  4 files changed, 232 insertions(+), 232 deletions(-)

diff --git a/include/pci_ids/radeonsi_pci_ids.h 
b/include/pci_ids/radeonsi_pci_ids.h
index 9453c1c..6a3594e 100644
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -1,229 +1,229 @@
-CHIPSET(0x6780, TAHITI_6780, TAHITI)
-CHIPSET(0x6784, TAHITI_6784, TAHITI)
-CHIPSET(0x6788, TAHITI_6788, TAHITI)
-CHIPSET(0x678A, TAHITI_678A, TAHITI)
-CHIPSET(0x6790, TAHITI_6790, TAHITI)
-CHIPSET(0x6791, TAHITI_6791, TAHITI)
-CHIPSET(0x6792, TAHITI_6792, TAHITI)
-CHIPSET(0x6798, TAHITI_6798, TAHITI)
-CHIPSET(0x6799, TAHITI_6799, TAHITI)
-CHIPSET(0x679A, TAHITI_679A, TAHITI)
-CHIPSET(0x679B, TAHITI_679B, TAHITI)
-CHIPSET(0x679E, TAHITI_679E, TAHITI)
-CHIPSET(0x679F, TAHITI_679F, TAHITI)
-
-CHIPSET(0x6800, PITCAIRN_6800, PITCAIRN)
-CHIPSET(0x6801, PITCAIRN_6801, PITCAIRN)
-CHIPSET(0x6802, PITCAIRN_6802, PITCAIRN)
-CHIPSET(0x6806, PITCAIRN_6806, PITCAIRN)
-CHIPSET(0x6808, PITCAIRN_6808, PITCAIRN)
-CHIPSET(0x6809, PITCAIRN_6809, PITCAIRN)
-CHIPSET(0x6810, PITCAIRN_6810, PITCAIRN)
-CHIPSET(0x6811, PITCAIRN_6811, PITCAIRN)
-CHIPSET(0x6816, PITCAIRN_6816, PITCAIRN)
-CHIPSET(0x6817, PITCAIRN_6817, PITCAIRN)
-CHIPSET(0x6818, PITCAIRN_6818, PITCAIRN)
-CHIPSET(0x6819, PITCAIRN_6819, PITCAIRN)
-CHIPSET(0x684C, PITCAIRN_684C, PITCAIRN)
-
-CHIPSET(0x6820, VERDE_6820, VERDE)
-CHIPSET(0x6821, VERDE_6821, VERDE)
-CHIPSET(0x6822, VERDE_6822, VERDE)
-CHIPSET(0x6823, VERDE_6823, VERDE)
-CHIPSET(0x6824, VERDE_6824, VERDE)
-CHIPSET(0x6825, VERDE_6825, VERDE)
-CHIPSET(0x6826, VERDE_6826, VERDE)
-CHIPSET(0x6827, VERDE_6827, VERDE)
-CHIPSET(0x6828, VERDE_6828, VERDE)
-CHIPSET(0x6829, VERDE_6829, VERDE)
-CHIPSET(0x682A, VERDE_682A, VERDE)
-CHIPSET(0x682B, VERDE_682B, VERDE)
-CHIPSET(0x682C, VERDE_682C, VERDE)
-CHIPSET(0x682D, VERDE_682D, VERDE)
-CHIPSET(0x682F, VERDE_682F, VERDE)
-CHIPSET(0x6830, VERDE_6830, VERDE)
-CHIPSET(0x6831, VERDE_6831, VERDE)
-CHIPSET(0x6835, VERDE_6835, VERDE)
-CHIPSET(0x6837, VERDE_6837, VERDE)
-CHIPSET(0x6838, VERDE_6838, VERDE)
-CHIPSET(0x6839, VERDE_6839, VERDE)
-CHIPSET(0x683B, VERDE_683B, VERDE)
-CHIPSET(0x683D, VERDE_683D, VERDE)
-CHIPSET(0x683F, VERDE_683F, VERDE)
-
-CHIPSET(0x6600, OLAND_6600, OLAND)
-CHIPSET(0x6601, OLAND_6601, OLAND)
-CHIPSET(0x6602, OLAND_6602, OLAND)
-CHIPSET(0x6603, OLAND_6603, OLAND)
-CHIPSET(0x6604, OLAND_6604, OLAND)
-CHIPSET(0x6605, OLAND_6605, OLAND)
-CHIPSET(0x6606, OLAND_6606, OLAND)
-CHIPSET(0x6607, OLAND_6607, OLAND)
-CHIPSET(0x6608, OLAND_6608, OLAND)
-CHIPSET(0x6610, OLAND_6610, OLAND)
-CHIPSET(0x6611, OLAND_6611, OLAND)
-CHIPSET(0x6613, OLAND_6613, OLAND)
-CHIPSET(0x6617, OLAND_6617, OLAND)
-CHIPSET(0x6620, OLAND_6620, OLAND)
-CHIPSET(0x6621, OLAND_6621, OLAND)
-CHIPSET(0x6623, OLAND_6623, OLAND)
-CHIPSET(0x6631, OLAND_6631, OLAND)
-
-CHIPSET(0x6660, HAINAN_6660, HAINAN)
-CHIPSET(0x6663, HAINAN_6663, HAINAN)
-CHIPSET(0x6664, HAINAN_6664, HAINAN)
-CHIPSET(0x6665, HAINAN_6665, HAINAN)
-CHIPSET(0x6667, HAINAN_6667, HAINAN)
-CHIPSET(0x666F, HAINAN_666F, HAINAN)
-
-CHIPSET(0x6640, BONAIRE_6640, BONAIRE)
-CHIPSET(0x6641, BONAIRE_6641, BONAIRE)
-CHIPSET(0x6646, BONAIRE_6646, BONAIRE)
-CHIPSET(0x6647, BONAIRE_6647, BONAIRE)
-CHIPSET(0x6649, BONAIRE_6649, BONAIRE)
-CHIPSET(0x6650, BONAIRE_6650, BONAIRE)
-CHIPSET(0x6651, BONAIRE_6651, BONAIRE)
-CHIPSET(0x6658, BONAIRE_6658, BONAIRE)
-CHIPSET(0x665C, BONAIRE_665C, BONAIRE)
-CHIPSET(0x665D, BONAIRE_665D, BONAIRE)
-CHIPSET(0x665F, BONAIRE_665F, BONAIRE)
-
-CHIPSET(0x9830, KABINI_9830, KABINI)
-CHIPSET(0x9831, KABINI_9831, KABINI)
-CHIPSET(0x9832, KABINI_9832, KABINI)
-CHIPSET(0x9833, KABINI_9833, KABINI)
-CHIPSET(0x9834, KABINI_9834, KABINI)
-CHIPSET(0x9835, KABINI_9835, KABINI)
-CHIPSET(0x9836, KABINI_9836, KABINI)
-CHIPSET(0x9837, KABINI_9837, KABINI)
-CHIPSET(0x9838, KABINI_9838, KABINI)
-CHIPSET(0x9839, KABINI_9839, KABINI)
-CHIPSET(0x983A, KABINI_983A, KABINI)
-CHIPSET(0x983B, KABINI_983B, KABINI)
-CHIPSET(0x983C, KABINI_983C, KABINI)
-CHIPSET(0x983D, KABINI_983D, KABINI)
-CHIPSET(0x983E, KABINI_983E, KABINI)
-CHIPSET(0x983F, KABINI_983F, KABINI)
-
-CHIPSET(0x9850, MULLINS_9850, MULLINS)
-CHIPSET(0x9851, MULLINS_9851, MULLINS)
-CHIPSET(0x9852, MULLINS_9852, MULLINS)
-CHIPSET(0x9853, MULLINS_9853, MULLINS)
-CHIPSET(0x9854, MULLINS_9854, MULLINS)
-CHIPSET(0x9855, MULLINS_9855, MULLINS)
-CHIPSET(0x9856, MULLINS_9856, MULLINS)
-CHIPSET(0x9857, MULLINS_9857, MULLINS)
-CHIPSET(0x9858, MULLINS_9858, MULLINS)
-CHIPSET(0x9859, MULLINS_9859, MULLINS)
-CHIPSET(0x985A, MULLINS_985A, MULLINS)
-CHIPSET(0x985B, MULLINS_985B, MULLINS)
-CHIPSET(0x985C, MULLINS_985C, MULLINS)
-CHIPSET(0x985D, MULLINS_985D, MULLINS)

Re: [Mesa-dev] [PATCH 2/8] gallium/tgsi: start adding hw atomics (v3)

2017-11-07 Thread Marek Olšák
With HW atomic counters, MaxAtomicBufferSize is a pretty small number
(counters * 4). TGSI has maximum index = 32K.

Marek

On Tue, Nov 7, 2017 at 5:43 PM, Nicolai Hähnle  wrote:
> On 07.11.2017 17:25, Nicolai Hähnle wrote:
>>
>> On 07.11.2017 07:31, Dave Airlie wrote:
>>>
>>> diff --git a/src/gallium/docs/source/tgsi.rst
>>> b/src/gallium/docs/source/tgsi.rst
>>> index 1a51fe9..0c331f2 100644
>>> --- a/src/gallium/docs/source/tgsi.rst
>>> +++ b/src/gallium/docs/source/tgsi.rst
>>> @@ -2638,9 +2638,11 @@ logical operations.  In this context atomicity
>>> means that another
>>>   concurrent memory access operation that affects the same memory
>>>   location is guaranteed to be performed strictly before or after the
>>>   entire execution of the atomic operation. The resource may be a BUFFER,
>>> -IMAGE, or MEMORY.  In the case of an image, the offset works the same as
>>> for
>>> -``LOAD`` and ``STORE``, specified above. These atomic operations may
>>> -only be used with 32-bit integer image formats.
>>> +IMAGE, ATOMIC, or MEMORY.  In the case of an image, the offset works
>>> +the same as for ``LOAD`` and ``STORE``, specified above. For atomic
>>> +counters, the offset is an immediate index to the base hw atomic
>>> +counter for this operation.
>>> +These atomic operations may only be used with 32-bit integer image
>>> formats.
>>>   .. opcode:: ATOMUADD - Atomic integer addition
>>> @@ -3440,7 +3442,6 @@ TGSI_SEMANTIC_SUBGROUP_LT_MASK
>>>   A bit mask of ``bit index < TGSI_SEMANTIC_SUBGROUP_INVOCATION``, i.e.
>>>   ``(1 << subgroup_invocation) - 1`` in arbitrary precision arithmetic.
>>> -
>>
>>
>> Stray whitespace change.
>>
>>
>>>   Declaration Interpolate
>>>   ^^^
>>> @@ -3517,6 +3518,31 @@ accessing a misaligned address is undefined.
>>>   Usage of the STORE opcode is only allowed if the WR (writable) flag
>>>   is set.
>>> +Hardware Atomic Register File
>>> +^
>>> +
>>> +Hardware atomics are declared as a 2D array with an optional array id.
>>> +
>>> +The first member of the dimension is the buffer resource the atomic
>>> +is located in.
>>> +The second member is a range into the buffer resource, either for
>>> +one or multiple counters. If this is an array, the declaration will have
>>> +an unique array id.
>>> +
>>> +Each counter is 4 bytes in size, and index and ranges are in counters
>>> not bytes.
>>> +DCL ATOMIC[0][0]
>>> +DCL ATOMIC[0][1]
>>> +
>>> +This declares two atomics, one at the start of the buffer and one in the
>>> +second 4 bytes.
>>> +
>>> +DCL ATOMIC[0][0]
>>> +DCL ATOMIC[1][0]
>>> +DCL ATOMIC[1][1..3], ARRAY(1)
>>> +
>>> +This declares 5 atomics, one in buffer 0 at 0,
>>> +one in buffer 1 at 0, and an array of 3 atomics in
>>> +the buffer 1, starting at 1.
>>
>>
>> My understanding is that these ranges could be highly non-contiguous,
>> right? I.e., you could have
>>
>> DCL ATOMIC[0][15]
>> DCL ATOMIC[0][8423..8430], ARRAY(1)
>> DCL ATOMIC[0][25112]
>>
>> ... corresponding to the offsets in the GLSL shader. The doc should really
>> point this out explicitly. Also, this might cause trouble because the TGSI
>> range tokens don't have enough bits to represent high offsets.
>
>
> Thinking about it some more, here's one way to deal with it. Have
> st_glsl_to_tgsi pack the the indices (by keeping track of the number of
> counters per atomic counter buffer), and then add an "atomic counter offset"
> dword to the TGSI. The decls above could become:
>
>   DCL ATOMIC[0][0], OFFSET(25112)
>   DCL ATOMIC[0][1..8], OFFSET(8423), ARRAY(1)
>   DCL ATOMIC[0][9], OFFSET(15)
>
> (the point is the order of TGSI indices doesn't matter outside of counter
> arrays)
>
> The driver would then compute the number of counters per HW atomic counter
> buffer to assign on-chip HW atomic memory during compile time, and then copy
> memory from the given offsets to the on-chip HW atomic memory as part of
> state validation before draws and dispatches.
>
> Cheers,
> Nicolai
>
> --
> Lerne, wie die Welt wirklich ist,
> Aber vergiss niemals, wie sie sein sollte.
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] radeonsi: add si_screen::has_ls_vgpr_init_bug

2017-11-07 Thread Nicolai Hähnle

For the series:

Reviewed-by: Nicolai Hähnle 

On 07.11.2017 04:12, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_pipe.c   | 2 ++
  src/gallium/drivers/radeonsi/si_pipe.h   | 1 +
  src/gallium/drivers/radeonsi/si_shader.c | 3 +--
  src/gallium/drivers/radeonsi/si_state_draw.c | 2 +-
  4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 1ca5ca3..391997d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1067,20 +1067,22 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
sscreen->assume_no_z_fights =
driQueryOptionb(config->options, "radeonsi_assume_no_z_fights");
sscreen->commutative_blend_add =
driQueryOptionb(config->options, 
"radeonsi_commutative_blend_add");
sscreen->clear_db_cache_before_clear =
driQueryOptionb(config->options, 
"radeonsi_clear_db_cache_before_clear");
sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 
&&
sscreen->b.family <= 
CHIP_POLARIS12) ||
   sscreen->b.family == CHIP_VEGA10 ||
   sscreen->b.family == CHIP_RAVEN;
+   sscreen->has_ls_vgpr_init_bug = sscreen->b.family == CHIP_VEGA10 ||
+   sscreen->b.family == CHIP_RAVEN;
  
  	if (sscreen->b.debug_flags & DBG(DPBB)) {

sscreen->dpbb_allowed = true;
} else {
/* Only enable primitive binning on Raven by default. */
sscreen->dpbb_allowed = sscreen->b.family == CHIP_RAVEN &&
!(sscreen->b.debug_flags & 
DBG(NO_DPBB));
}
  
  	if (sscreen->b.debug_flags & DBG(DFSM)) {

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index ab82064..6be51bb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -90,20 +90,21 @@ struct si_screen {
unsignedgs_table_depth;
unsignedtess_offchip_block_dw_size;
boolhas_clear_state;
boolhas_distributed_tess;
boolhas_draw_indirect_multi;
boolhas_out_of_order_rast;
boolassume_no_z_fights;
boolcommutative_blend_add;
boolclear_db_cache_before_clear;
boolhas_msaa_sample_loc_bug;
+   boolhas_ls_vgpr_init_bug;
booldpbb_allowed;
booldfsm_allowed;
boolllvm_has_working_vgpr_indexing;
  
  	/* Whether shaders are monolithic (1-part) or separate (3-part). */

booluse_monolithic_shaders;
boolrecord_llvm_ir;
  
  	mtx_t			shader_parts_mutex;

struct si_shader_part   *vs_prologs;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 6bc08dd..c95f8d7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6875,22 +6875,21 @@ static void si_build_vs_prolog_function(struct 
si_shader_context *ctx,
  
  	/* Create the function. */

si_create_function(ctx, "vs_prolog", returns, num_returns, &fninfo, 0);
func = ctx->main_fn;
  
  	if (key->vs_prolog.num_merged_next_stage_vgprs) {

if (!key->vs_prolog.is_monolithic)
si_init_exec_from_input(ctx, 3, 0);
  
  		if (key->vs_prolog.as_ls &&

-   (ctx->screen->b.family == CHIP_VEGA10 ||
-ctx->screen->b.family == CHIP_RAVEN)) {
+   ctx->screen->has_ls_vgpr_init_bug) {
/* If there are no HS threads, SPI loads the LS VGPRs
 * starting at VGPR 0. Shift them back to where they
 * belong.
 */
LLVMValueRef has_hs_threads =
LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
unpack_param(ctx, 3, 8, 8),
ctx->i32_0, "");
  
  			for (i = 4; i > 0; --i) {

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index b17828e..2d9fcfe 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1275,21 +1275,21 @@ void si_draw_vbo(struct pipe_context *ctx,

Re: [Mesa-dev] [RFC PATCH v1 09/30] anv: Refactor get_image_format_properties() - Reduce params

2017-11-07 Thread Jason Ekstrand
Actually, ignore all my comments.  Patches 1-18 are

Reviewed-by: Jason Ekstrand 

On Tue, Nov 7, 2017 at 8:47 AM, Jason Ekstrand  wrote:

> On Tue, Nov 7, 2017 at 6:47 AM, Chad Versace 
> wrote:
>
>> Replace parameters 'enum isl_format' and 'struct anv_format_plane' with
>> new parameter 'const struct anv_format *'.
>>
>
> This patch makes me nervous for a few reasons.  I made a bunch of comments
> below.  However, I'd like you to ignore all of them except for the one
> about anv_format since I think they all get fixed in later patches.
>
>
>> The goal is to incrementally fix get_image_format_properties() to return
>> a correct result.  Currently, it returns incorrect VkFormatFeatureFlags
>> which the caller must clean up.
>> ---
>>  src/intel/vulkan/anv_formats.c | 32 +---
>>  1 file changed, 21 insertions(+), 11 deletions(-)
>>
>> diff --git a/src/intel/vulkan/anv_formats.c
>> b/src/intel/vulkan/anv_formats.c
>> index 3cc0673cbaf..151c1c9e066 100644
>> --- a/src/intel/vulkan/anv_formats.c
>> +++ b/src/intel/vulkan/anv_formats.c
>> @@ -469,13 +469,12 @@ anv_get_format_plane(const struct gen_device_info
>> *devinfo, VkFormat vk_format,
>>  static VkFormatFeatureFlags
>>  get_image_format_properties(const struct gen_device_info *devinfo,
>>  VkFormat vk_format,
>> -enum isl_format base_isl_format,
>> -struct anv_format_plane plane_format,
>> +const struct anv_format *anv_format,
>>
>
> At this point, we may as well just take the vk_format and vk_tiling and be
> done with it.  Are we really gaining anything by also taking the anv_format?
>
>
>>  VkImageTiling vk_tiling)
>>  {
>> VkFormatFeatureFlags flags = 0;
>>
>> -   if (plane_format.isl_format == ISL_FORMAT_UNSUPPORTED)
>> +   if (anv_format == NULL)
>>
>
> Does the caller actually ensure that these are the same?  I think it does
> but it's subtle.
>
>
>>return 0;
>>
>> const VkImageAspectFlags aspects = vk_format_aspects(vk_format);
>> @@ -497,6 +496,22 @@ get_image_format_properties(const struct
>> gen_device_info *devinfo,
>>return flags;
>> }
>>
>> +   const struct anv_format_plane plane_format =
>> +  anv_get_format_plane(devinfo, vk_format, VK_IMAGE_ASPECT_COLOR_BIT,
>> +   vk_tiling);
>>
>
> If we want to move this a bit higher, I think we can just always use plane
> 0.  We handle planar formats specially anyway.  I'm not actually convinced
> that doing so really helps us but it's a thought.  In any case, I think we
> want it to be after YUV.
>
>
>> +
>> +   if (plane_format.isl_format == ISL_FORMAT_UNSUPPORTED)
>> +  return 0;
>>
>
> If we always use plane 0 (which you do because you're passing
> ASPECT_COLOR_BIT), then this is redundant with the check at the top.
>
>
>> +
>> +   struct anv_format_plane base_plane_format = plane_format;
>> +   if (vk_tiling == VK_IMAGE_TILING_OPTIMAL) {
>> +  base_plane_format = anv_get_format_plane(devinfo, vk_format,
>> +   VK_IMAGE_ASPECT_COLOR_BIT,
>> +   VK_IMAGE_TILING_LINEAR);
>> +   }
>> +
>> +   enum isl_format base_isl_format = base_plane_format.isl_format;
>> +
>> /* ASTC textures must be in Y-tiled memory */
>> if (vk_tiling == VK_IMAGE_TILING_LINEAR &&
>> isl_format_get_layout(plane_format.isl_format)->txc ==
>> ISL_TXC_ASTC)
>> @@ -593,20 +608,15 @@ anv_physical_device_get_format_properties(struct
>> anv_physical_device *physical_d
>> if (format == NULL) {
>>/* Nothing to do here */
>> } else {
>> -  struct anv_format_plane linear_fmt, tiled_fmt;
>> +  struct anv_format_plane linear_fmt;
>>linear_fmt = anv_get_format_plane(&physical_device->info,
>> vk_format,
>>  VK_IMAGE_ASPECT_COLOR_BIT,
>>  VK_IMAGE_TILING_LINEAR);
>> -  tiled_fmt = anv_get_format_plane(&physical_device->info,
>> vk_format,
>> -   VK_IMAGE_ASPECT_COLOR_BIT,
>> -   VK_IMAGE_TILING_OPTIMAL);
>>
>>linear = get_image_format_properties(&physical_device->info,
>> vk_format,
>> -   linear_fmt.isl_format,
>> linear_fmt,
>> -   VK_IMAGE_TILING_LINEAR);
>> +   format,
>> VK_IMAGE_TILING_LINEAR);
>>tiled = get_image_format_properties(&physical_device->info,
>> vk_format,
>> -  linear_fmt.isl_format,
>> tiled_fmt,
>> -  VK_IMAGE_TILING_OPTIMAL);
>> +  format,
>> VK_IMAGE_TILING_OPTIMAL);
>>
>
> This is the part that really makes me nervous.  Before, it was clear that
> get_i

Re: [Mesa-dev] [PATCH 2/2] r600g: use SIMPLE_FLOAT for blending to avoid NaNs in RTs

2017-11-07 Thread Nicolai Hähnle

On 06.11.2017 15:40, Ilia Mirkin wrote:

On Mon, Nov 6, 2017 at 8:48 AM, Ilia Mirkin  wrote:

On Mon, Nov 6, 2017 at 6:21 AM, Nicolai Hähnle  wrote:

On 06.11.2017 05:22, Ilia Mirkin wrote:


Radeonsi also sets this flag.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103544
Signed-off-by: Ilia Mirkin 
---

This needs testing with the fbo-float-nan piglit that was recently added.
Just
guessing that this is the right flag to set here.



Assuming that the test passes:


Well, the test can't pass or fail. The behavior is undefined. But Dave
ran it last night to see what would happen. It appears that with the
current code, if you have a float RB with a infinity or nan in it, and
then you set the dst blend factor to GL_ZERO, then you'll still end up
with a NaN in the result.

With this change, you'll end up with what most people would expect
with a GL_ZERO dst blend factor. Unless your src blend factor is e.g.
GL_DST_ALPHA, the dst op is multiplied. So if e.g. the RB has
(0,0,0,infinity), and shader output is (0,0,0,infinity), then src *
GL_DST_ALPHA + dst * GL_ZERO = (nan, nan, nan, nan). However src *
GL_SRC_ALPHA + dst + GL_ZERO = (0, 0, 0, infinity).


Ugh. I managed to confuse myself. With the patch and src = dst =
(0,0,0,infinity):

src * GL_DST_ALPHA + dst * GL_ZERO = (nan, nan, nan, nan).
src * GL_SRC_ALPHA + dst * GL_ZERO = (nan, nan, nan, infinity)



Perhaps there's another flag which controls 0 * inf = 0 vs NaN thing
for blending, but it wasn't immediately apparent from the docs. NVIDIA
blob drivers configure the hw for 0 * inf = 0.



Reviewed-by: Nicolai Hähnle 


Let me know if this stands, given the above info.


Fine by me.

Cheers,
Nicolai










   src/gallium/drivers/r600/evergreen_state.c | 1 +
   src/gallium/drivers/r600/r600_state.c  | 1 +
   2 files changed, 2 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_state.c
b/src/gallium/drivers/r600/evergreen_state.c
index 96eb35a9818..131778dea9f 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1211,6 +1211,7 @@ static void
evergreen_set_color_surface_common(struct r600_context *rctx,
 S_028C70_COMP_SWAP(swap) |
 S_028C70_BLEND_CLAMP(blend_clamp) |
 S_028C70_BLEND_BYPASS(blend_bypass) |
+   S_028C70_SIMPLE_FLOAT(1) |
 S_028C70_NUMBER_TYPE(ntype) |
 S_028C70_ENDIAN(endian);
   diff --git a/src/gallium/drivers/r600/r600_state.c
b/src/gallium/drivers/r600/r600_state.c
index c21e8dabb1f..0c331537460 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -898,6 +898,7 @@ static void r600_init_color_surface(struct
r600_context *rctx,
 S_0280A0_COMP_SWAP(swap) |
 S_0280A0_BLEND_BYPASS(blend_bypass) |
 S_0280A0_BLEND_CLAMP(blend_clamp) |
+   S_0280A0_SIMPLE_FLOAT(1) |
 S_0280A0_NUMBER_TYPE(ntype) |
 S_0280A0_ENDIAN(endian);





--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.



--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] loader/dri3: Improve dri3 thread-safety

2017-11-07 Thread Nicolai Hähnle

On 06.11.2017 12:53, Thomas Hellstrom wrote:

On 11/06/2017 12:14 PM, Nicolai Hähnle wrote:

On 03.11.2017 12:02, Thomas Hellstrom wrote:
It turned out that with recent changes that call into dri3 from 
glFinish(),
it appears like different thread end up waiting for X events 
simultaneously,
causing deadlocks since they steal events from eachoter and update 
the dri3

counters behind eachothers backs.

This patch intends to improve on that. It allows at most one thread at a
time to wait on events for a single drawable. If another thread 
intends to
do the same, it's put to sleep until the first thread finishes 
waiting, and
then it rechecks counters and optionally retries the waiting. Threads 
that

poll for X events never pulls X events off the event queue if there are
other threads waiting for events on that drawable. Counters in the
dri3 drawable structure are protected by a mutex. Finally, the mutex we
introduce is never held while waiting for the X server to avoid
unnecessary stalls.

This does not make dri3 drawables completely thread-safe but at least 
it's a

first step.

Bugzilla: 
https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D102358&d=DwICaQ&c=uilaK90D4TOVoH58JNXRgQ&r=wnSlgOCqfpNS4d02vP68_E9q2BNMCwfD2OZ_6dCFVQQ&m=yqE5Xb9bQg5hA8gP7s0b3dSSZoPWaAEKACqD8qfhdZo&s=wf-xBzPlZ805RCV1hnDgoyW0fYe2ZX3Qwie66SU936g&e= 

Fixes: d5ba75f8881 "st/dri2 Plumb the flush_swapbuffer functionality 
through to dri3"

Signed-off-by: Thomas Hellstrom 
---
  src/loader/loader_dri3_helper.c | 77 
+++--

  src/loader/loader_dri3_helper.h | 10 ++
  2 files changed, 69 insertions(+), 18 deletions(-)

diff --git a/src/loader/loader_dri3_helper.c 
b/src/loader/loader_dri3_helper.c

index 19ab581..7e6b8b2 100644
--- a/src/loader/loader_dri3_helper.c
+++ b/src/loader/loader_dri3_helper.c
@@ -32,7 +32,6 @@
    #include 
  -#include 
  #include "loader_dri3_helper.h"
    /* From xmlpool/options.h, user exposed so should be stable */
@@ -186,8 +185,11 @@ dri3_fence_await(xcb_connection_t *c, struct 
loader_dri3_drawable *draw,

  {
 xcb_flush(c);
 xshmfence_await(buffer->shm_fence);
-   if (draw)
+   if (draw) {
+  mtx_lock(&draw->mtx);
    dri3_flush_present_events(draw);
+  mtx_unlock(&draw->mtx);
+   }
  }
    static void
@@ -245,6 +247,9 @@ loader_dri3_drawable_fini(struct 
loader_dri3_drawable *draw)

    xcb_discard_reply(draw->conn, cookie.sequence);
    xcb_unregister_for_special_event(draw->conn, 
draw->special_event);

 }
+
+   cnd_destroy(&draw->event_cnd);
+   mtx_destroy(&draw->mtx);
  }
    int
@@ -276,6 +281,8 @@ loader_dri3_drawable_init(xcb_connection_t *conn,
   draw->cur_blit_source = -1;
 draw->back_format = __DRI_IMAGE_FORMAT_NONE;
+   mtx_init(&draw->mtx, mtx_plain);
+   cnd_init(&draw->event_cnd);
   if (draw->ext->config)
draw->ext->config->configQueryi(draw->dri_screen,
@@ -407,13 +414,27 @@ dri3_handle_present_event(struct 
loader_dri3_drawable *draw,

  }
    static bool
-dri3_wait_for_event(struct loader_dri3_drawable *draw)
+dri3_wait_for_event_locked(struct loader_dri3_drawable *draw)
  {
 xcb_generic_event_t *ev;
 xcb_present_generic_event_t *ge;
   xcb_flush(draw->conn);


(Why) Doesn't the flush need to be protected as well? Can it be 
removed entirely given that it's already called from 
loader_dri3_wait_for_msc? Though dri3_find_back is different - why?




Thanks for reviewing.

AFAIK, (correc me if I'm wrong) xcb should be thread-safe enough to 
allow multiple threads to call xcb_flush() simultaneously so we 
shouldn't need to explicitly "serialize" it.


There might indeed be unnecessary xcb_flushes(). In the dri3_find_back 
case when we're initially just checking for arrived events, we know that 
any preceding swapbuffers (that govern the reuse of back buffers) will 
have flushed xcb. However when we explicitly wait for special events, 
any forgotten flush would be catastrophic, causing a deadlock.


We should probably audit the code for unnecessary xcb flushes, but as a 
follow-up patch.


Fair enough.

Acked-by: Nicolai Hähnle 




/Thomas



Apart from that, the patch does indeed look like a good first step, 
though Keep in mind that I'm not too familiar with this stuff...


Cheers,
Nicolai





--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glsl: add varying resources for arrays of complex types

2017-11-07 Thread Nicolai Hähnle

Looks plausible.

Reviewed-by: Nicolai Hähnle 

On 02.11.2017 18:49, Juan A. Suarez Romero wrote:

This patch is mostly a patch done by Ilia Mirkin.

It fixes KHR-GL45.enhanced_layouts.varying_structure_locations.

v2: fix locations for TCS/TES/GS inputs and outputs (Ilia)

CC: Ilia Mirkin 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103098
Signed-off-by: Juan A. Suarez Romero 
---
  src/compiler/glsl/linker.cpp | 63 +---
  1 file changed, 59 insertions(+), 4 deletions(-)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 004529157ee..73611797abd 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -3802,6 +3802,7 @@ add_shader_variable(const struct gl_context *ctx,
  GLenum programInterface, ir_variable *var,
  const char *name, const glsl_type *type,
  bool use_implicit_location, int location,
+bool inouts_share_location,
  const glsl_type *outermost_struct_type = NULL)
  {
 const glsl_type *interface_type = var->get_interface_type();
@@ -3864,7 +3865,7 @@ add_shader_variable(const struct gl_context *ctx,
stage_mask, programInterface,
var, field_name, field->type,
use_implicit_location, field_location,
-  outermost_struct_type))
+  false, outermost_struct_type))
  return false;
  
   field_location += field->type->count_attribute_slots(false);

@@ -3872,6 +3873,43 @@ add_shader_variable(const struct gl_context *ctx,
return true;
 }
  
+   case GLSL_TYPE_ARRAY: {

+  /* The ARB_program_interface_query spec says:
+   *
+   * "For an active variable declared as an array of basic types, a
+   *  single entry will be generated, with its name string formed by
+   *  concatenating the name of the array and the string "[0]"."
+   *
+   * "For an active variable declared as an array of an aggregate data
+   *  type (structures or arrays), a separate entry will be generated
+   *  for each active array element, unless noted immediately below.
+   *  The name of each entry is formed by concatenating the name of
+   *  the array, the "[" character, an integer identifying the element
+   *  number, and the "]" character.  These enumeration rules are
+   *  applied recursively, treating each enumerated array element as a
+   *  separate active variable."
+   */
+  const struct glsl_type *array_type = type->fields.array;
+  if (array_type->base_type == GLSL_TYPE_STRUCT ||
+  array_type->base_type == GLSL_TYPE_ARRAY) {
+ unsigned elem_location = location;
+ unsigned stride = inouts_share_location ? 0 :
+   array_type->count_attribute_slots(false);
+ for (unsigned i = 0; i < type->length; i++) {
+char *elem = ralloc_asprintf(shProg, "%s[%d]", name, i);
+if (!add_shader_variable(ctx, shProg, resource_set,
+ stage_mask, programInterface,
+ var, elem, array_type,
+ use_implicit_location, elem_location,
+ false, outermost_struct_type))
+   return false;
+elem_location += stride;
+ }
+ return true;
+  }
+  /* fallthrough */
+   }
+
 default: {
/* The ARB_program_interface_query spec says:
 *
@@ -3892,6 +3930,20 @@ add_shader_variable(const struct gl_context *ctx,
 }
  }
  
+static bool

+inout_has_same_location(const ir_variable *var, unsigned stage)
+{
+   if (!var->data.patch &&
+   ((var->data.mode == ir_var_shader_out &&
+ stage == MESA_SHADER_TESS_CTRL) ||
+(var->data.mode == ir_var_shader_in &&
+ (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
+  stage == MESA_SHADER_GEOMETRY
+  return true;
+   else
+  return false;
+}
+
  static bool
  add_interface_variables(const struct gl_context *ctx,
  struct gl_shader_program *shProg,
@@ -3948,7 +4000,8 @@ add_interface_variables(const struct gl_context *ctx,
if (!add_shader_variable(ctx, shProg, resource_set,
 1 << stage, programInterface,
 var, var->name, var->type, 
vs_input_or_fs_output,
-   var->data.location - loc_bias))
+   var->data.location - loc_bias,
+   inout_has_same_location(var, stage)))
   return false;
 }
 return true;
@@ -3986,7 +4039,8 @@ add_packed_varyings(const struct gl_context *ctx,

Re: [Mesa-dev] [RFC PATCH v1 09/30] anv: Refactor get_image_format_properties() - Reduce params

2017-11-07 Thread Jason Ekstrand
On Tue, Nov 7, 2017 at 6:47 AM, Chad Versace 
wrote:

> Replace parameters 'enum isl_format' and 'struct anv_format_plane' with
> new parameter 'const struct anv_format *'.
>

This patch makes me nervous for a few reasons.  I made a bunch of comments
below.  However, I'd like you to ignore all of them except for the one
about anv_format since I think they all get fixed in later patches.


> The goal is to incrementally fix get_image_format_properties() to return
> a correct result.  Currently, it returns incorrect VkFormatFeatureFlags
> which the caller must clean up.
> ---
>  src/intel/vulkan/anv_formats.c | 32 +---
>  1 file changed, 21 insertions(+), 11 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_
> formats.c
> index 3cc0673cbaf..151c1c9e066 100644
> --- a/src/intel/vulkan/anv_formats.c
> +++ b/src/intel/vulkan/anv_formats.c
> @@ -469,13 +469,12 @@ anv_get_format_plane(const struct gen_device_info
> *devinfo, VkFormat vk_format,
>  static VkFormatFeatureFlags
>  get_image_format_properties(const struct gen_device_info *devinfo,
>  VkFormat vk_format,
> -enum isl_format base_isl_format,
> -struct anv_format_plane plane_format,
> +const struct anv_format *anv_format,
>

At this point, we may as well just take the vk_format and vk_tiling and be
done with it.  Are we really gaining anything by also taking the anv_format?


>  VkImageTiling vk_tiling)
>  {
> VkFormatFeatureFlags flags = 0;
>
> -   if (plane_format.isl_format == ISL_FORMAT_UNSUPPORTED)
> +   if (anv_format == NULL)
>

Does the caller actually ensure that these are the same?  I think it does
but it's subtle.


>return 0;
>
> const VkImageAspectFlags aspects = vk_format_aspects(vk_format);
> @@ -497,6 +496,22 @@ get_image_format_properties(const struct
> gen_device_info *devinfo,
>return flags;
> }
>
> +   const struct anv_format_plane plane_format =
> +  anv_get_format_plane(devinfo, vk_format, VK_IMAGE_ASPECT_COLOR_BIT,
> +   vk_tiling);
>

If we want to move this a bit higher, I think we can just always use plane
0.  We handle planar formats specially anyway.  I'm not actually convinced
that doing so really helps us but it's a thought.  In any case, I think we
want it to be after YUV.


> +
> +   if (plane_format.isl_format == ISL_FORMAT_UNSUPPORTED)
> +  return 0;
>

If we always use plane 0 (which you do because you're passing
ASPECT_COLOR_BIT), then this is redundant with the check at the top.


> +
> +   struct anv_format_plane base_plane_format = plane_format;
> +   if (vk_tiling == VK_IMAGE_TILING_OPTIMAL) {
> +  base_plane_format = anv_get_format_plane(devinfo, vk_format,
> +   VK_IMAGE_ASPECT_COLOR_BIT,
> +   VK_IMAGE_TILING_LINEAR);
> +   }
> +
> +   enum isl_format base_isl_format = base_plane_format.isl_format;
> +
> /* ASTC textures must be in Y-tiled memory */
> if (vk_tiling == VK_IMAGE_TILING_LINEAR &&
> isl_format_get_layout(plane_format.isl_format)->txc ==
> ISL_TXC_ASTC)
> @@ -593,20 +608,15 @@ anv_physical_device_get_format_properties(struct
> anv_physical_device *physical_d
> if (format == NULL) {
>/* Nothing to do here */
> } else {
> -  struct anv_format_plane linear_fmt, tiled_fmt;
> +  struct anv_format_plane linear_fmt;
>linear_fmt = anv_get_format_plane(&physical_device->info,
> vk_format,
>  VK_IMAGE_ASPECT_COLOR_BIT,
>  VK_IMAGE_TILING_LINEAR);
> -  tiled_fmt = anv_get_format_plane(&physical_device->info, vk_format,
> -   VK_IMAGE_ASPECT_COLOR_BIT,
> -   VK_IMAGE_TILING_OPTIMAL);
>
>linear = get_image_format_properties(&physical_device->info,
> vk_format,
> -   linear_fmt.isl_format,
> linear_fmt,
> -   VK_IMAGE_TILING_LINEAR);
> +   format,
> VK_IMAGE_TILING_LINEAR);
>tiled = get_image_format_properties(&physical_device->info,
> vk_format,
> -  linear_fmt.isl_format,
> tiled_fmt,
> -  VK_IMAGE_TILING_OPTIMAL);
> +  format,
> VK_IMAGE_TILING_OPTIMAL);
>

This is the part that really makes me nervous.  Before, it was clear that
get_image_format_properties was doing different things for tiled vs. linear.


>
>/* XXX: We handle 3-channel formats by switching them out for RGBX
> or
> * RGBA formats behind-the-scenes.  This works fine for textures
> --
> 2.13.0
>
> __

[Mesa-dev] [PATCH 1/3] etnaviv: rnndb update

2017-11-07 Thread Wladimir J. van der Laan
Update rnndb to etna_viv 07c756a.

Signed-off-by: Wladimir J. van der Laan 
---
 src/gallium/drivers/etnaviv/hw/common.xml.h |  2 +-
 src/gallium/drivers/etnaviv/hw/common_3d.xml.h  |  2 +-
 src/gallium/drivers/etnaviv/hw/state.xml.h  |  4 ++--
 src/gallium/drivers/etnaviv/hw/state_3d.xml.h   | 14 --
 src/gallium/drivers/etnaviv/hw/state_blt.xml.h  |  4 ++--
 src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h |  3 ++-
 6 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/etnaviv/hw/common.xml.h 
b/src/gallium/drivers/etnaviv/hw/common.xml.h
index b98fa84..60bde8b 100644
--- a/src/gallium/drivers/etnaviv/hw/common.xml.h
+++ b/src/gallium/drivers/etnaviv/hw/common.xml.h
@@ -11,7 +11,7 @@ The rules-ng-ng source files this header was generated from 
are:
 - texdesc_3d.xml (   3183 bytes, from 2017-10-31 19:05:01)
 - copyright.xml  (   1597 bytes, from 2016-10-29 07:29:22)
 - common.xml (  26187 bytes, from 2017-10-31 19:05:01)
-- common_3d.xml  (  14547 bytes, from 2017-11-01 16:08:07)
+- common_3d.xml  (  14615 bytes, from 2017-11-04 14:03:35)
 
 Copyright (C) 2012-2017 by the following authors:
 - Wladimir J. van der Laan 
diff --git a/src/gallium/drivers/etnaviv/hw/common_3d.xml.h 
b/src/gallium/drivers/etnaviv/hw/common_3d.xml.h
index 8f19d5b..d110a36 100644
--- a/src/gallium/drivers/etnaviv/hw/common_3d.xml.h
+++ b/src/gallium/drivers/etnaviv/hw/common_3d.xml.h
@@ -11,7 +11,7 @@ The rules-ng-ng source files this header was generated from 
are:
 - texdesc_3d.xml (   3183 bytes, from 2017-10-31 19:05:01)
 - copyright.xml  (   1597 bytes, from 2016-10-29 07:29:22)
 - common.xml (  26187 bytes, from 2017-10-31 19:05:01)
-- common_3d.xml  (  14547 bytes, from 2017-11-01 16:08:07)
+- common_3d.xml  (  14615 bytes, from 2017-11-04 14:03:35)
 
 Copyright (C) 2012-2017 by the following authors:
 - Wladimir J. van der Laan 
diff --git a/src/gallium/drivers/etnaviv/hw/state.xml.h 
b/src/gallium/drivers/etnaviv/hw/state.xml.h
index 0a93a4f..485c0eb 100644
--- a/src/gallium/drivers/etnaviv/hw/state.xml.h
+++ b/src/gallium/drivers/etnaviv/hw/state.xml.h
@@ -10,11 +10,11 @@ git clone git://0x04.net/rules-ng-ng
 The rules-ng-ng source files this header was generated from are:
 - state.xml (  26087 bytes, from 2017-10-30 13:44:54)
 - common.xml(  26187 bytes, from 2017-10-31 19:05:01)
-- common_3d.xml (  14547 bytes, from 2017-11-01 16:08:07)
+- common_3d.xml (  14615 bytes, from 2017-11-04 14:03:35)
 - state_hi.xml  (  27733 bytes, from 2017-10-02 19:00:30)
 - copyright.xml (   1597 bytes, from 2016-10-29 07:29:22)
 - state_2d.xml  (  51552 bytes, from 2016-10-29 07:29:22)
-- state_3d.xml  (  79520 bytes, from 2017-10-31 19:05:01)
+- state_3d.xml  (  79992 bytes, from 2017-11-07 10:44:35)
 - state_blt.xml (  13405 bytes, from 2017-10-16 17:42:46)
 - state_vg.xml  (   5975 bytes, from 2016-10-29 07:29:22)
 
diff --git a/src/gallium/drivers/etnaviv/hw/state_3d.xml.h 
b/src/gallium/drivers/etnaviv/hw/state_3d.xml.h
index c5722aa..13122789 100644
--- a/src/gallium/drivers/etnaviv/hw/state_3d.xml.h
+++ b/src/gallium/drivers/etnaviv/hw/state_3d.xml.h
@@ -10,11 +10,11 @@ git clone git://0x04.net/rules-ng-ng
 The rules-ng-ng source files this header was generated from are:
 - state.xml (  26087 bytes, from 2017-10-30 13:44:54)
 - common.xml(  26187 bytes, from 2017-10-31 19:05:01)
-- common_3d.xml (  14547 bytes, from 2017-11-01 16:08:07)
+- common_3d.xml (  14615 bytes, from 2017-11-04 14:03:35)
 - state_hi.xml  (  27733 bytes, from 2017-10-02 19:00:30)
 - copyright.xml (   1597 bytes, from 2016-10-29 07:29:22)
 - state_2d.xml  (  51552 bytes, from 2016-10-29 07:29:22)
-- state_3d.xml  (  79520 bytes, from 2017-10-31 19:05:01)
+- state_3d.xml  (  79992 bytes, from 2017-11-07 10:44:35)
 - state_blt.xml (  13405 bytes, from 2017-10-16 17:42:46)
 - state_vg.xml  (   5975 bytes, from 2016-10-29 07:29:22)
 
@@ -122,6 +122,14 @@ DEALINGS IN THE SOFTWARE.
 #define LOGIC_OP_OR_REVERSE0x000d
 #define LOGIC_OP_OR0x000e
 #define LOGIC_OP_SET   0x000f
+#define TS_SAMPLER_FORMAT_A4R4G4B4 0x
+#define TS_SAMPLER_FORMAT_A1R5G5B5 0x0001
+#define TS_SAMPLER_FORMAT_R5G6B5   0x0002
+#define TS_SAMPLER_FORMAT_A8R8G8B8 0x0003
+#define TS_SAMPLER_FORMAT_X8R8G8B8 0x0004
+#define TS_SAMPLER_FORMAT_D24X8
0x0005
+#define TS_SAMPLER_FORMAT_D16  0x0008
+#define TS_SAMPLER_FORMAT_RAW  0x000f
 #define VARYING_NUM_COMPONENTS_VAR0__MASK  0x0007
 #define VARYING_NUM_COMPONENTS_VAR0__SHIFT 0
 #define VARYING_NUM_COMPONENTS_VAR0(x)  

[Mesa-dev] [PATCH 2/3] etnaviv: Add TS_SAMPLER formats to etnaviv_format

2017-11-07 Thread Wladimir J. van der Laan
Sampler TS introduces yet another format enumeration for renderable
formats. Introduce it into etnaviv_format as unobtrusively as possible.

Signed-off-by: Wladimir J. van der Laan 
---
 src/gallium/drivers/etnaviv/etnaviv_format.c | 19 +++
 src/gallium/drivers/etnaviv/etnaviv_format.h |  3 +++
 2 files changed, 22 insertions(+)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_format.c 
b/src/gallium/drivers/etnaviv/etnaviv_format.c
index 3dd212f..f3cba46 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_format.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_format.c
@@ -40,6 +40,7 @@ struct etna_format {
unsigned vtx;
unsigned tex;
unsigned rs;
+   unsigned ts;
boolean present;
const unsigned char tex_swiz[4];
 };
@@ -53,6 +54,13 @@ struct etna_format {
 #define RS_FORMAT_X8B8G8R8(RS_FORMAT_X8R8G8B8 | RS_FORMAT_RB_SWAP)
 #define RS_FORMAT_A8B8G8R8(RS_FORMAT_A8R8G8B8 | RS_FORMAT_RB_SWAP)
 
+#define TS_SAMPLER_FORMAT_NONE  ETNA_NO_MATCH
+#define TS_SAMPLER_FORMAT_X4R4G4B4  TS_SAMPLER_FORMAT_A4R4G4B4
+#define TS_SAMPLER_FORMAT_X1R5G5B5  TS_SAMPLER_FORMAT_A1R5G5B5
+#define TS_SAMPLER_FORMAT_YUY2  TS_SAMPLER_FORMAT_NONE /* Not supported 
AFAIK */
+#define TS_SAMPLER_FORMAT_X8B8G8R8  TS_SAMPLER_FORMAT_X8R8G8B8
+#define TS_SAMPLER_FORMAT_A8B8G8R8  TS_SAMPLER_FORMAT_A8R8G8B8
+
 #define SWIZ(x,y,z,w) {\
PIPE_SWIZZLE_##x,   \
PIPE_SWIZZLE_##y,   \
@@ -66,6 +74,7 @@ struct etna_format {
   .vtx = FE_DATA_TYPE_##vtxfmt, \
   .tex = TEXTURE_FORMAT_##texfmt, \
   .rs = RS_FORMAT_##rsfmt,\
+  .ts = TS_SAMPLER_FORMAT_##rsfmt,\
   .present = 1,   \
   .tex_swiz = texswiz,\
}
@@ -76,6 +85,7 @@ struct etna_format {
   .vtx = ETNA_NO_MATCH,\
   .tex = TEXTURE_FORMAT_##fmt, \
   .rs = RS_FORMAT_##rsfmt, \
+  .ts = TS_SAMPLER_FORMAT_##rsfmt, \
   .present = 1,\
   .tex_swiz = swiz,\
}
@@ -356,3 +366,12 @@ translate_vertex_format_type(enum pipe_format fmt)
 
return formats[fmt].vtx;
 }
+
+uint32_t
+translate_ts_sampler_format(enum pipe_format fmt)
+{
+   if (!formats[fmt].present)
+  return ETNA_NO_MATCH;
+
+   return formats[fmt].ts;
+}
diff --git a/src/gallium/drivers/etnaviv/etnaviv_format.h 
b/src/gallium/drivers/etnaviv/etnaviv_format.h
index 1672d67..20c8e1b 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_format.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_format.h
@@ -53,4 +53,7 @@ translate_rs_format_rb_swap(enum pipe_format fmt);
 uint32_t
 translate_vertex_format_type(enum pipe_format fmt);
 
+uint32_t
+translate_ts_sampler_format(enum pipe_format fmt);
+
 #endif /* ETNAVIV_FORMAT_H_ */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   3   >