Re: [Mesa-dev] [PATCH v2] nir: Get rid of nir_constant_data

2016-12-01 Thread Iago Toral
Reviewed-by: Iago Toral Quiroga 

On Thu, 2016-12-01 at 16:07 -0800, Jason Ekstrand wrote:
> This has bothered me for about as long as NIR has been around.  Why
> do we
> have two different unions for constants?  No good reason other than
> one of
> them is a direct port from GLSL IR.
> ---
>  src/compiler/glsl/glsl_to_nir.cpp  | 35 +---
>  src/compiler/nir/nir.c | 32 +++---
>  src/compiler/nir/nir.h | 30 ++---
>  src/compiler/nir/nir_clone.c   |  2 +-
>  src/compiler/nir/nir_print.c   | 29 ++---
>  src/compiler/spirv/spirv_to_nir.c  | 67 +---
> --
>  src/compiler/spirv/vtn_variables.c |  8 ++---
>  7 files changed, 98 insertions(+), 105 deletions(-)
> 
> diff --git a/src/compiler/glsl/glsl_to_nir.cpp
> b/src/compiler/glsl/glsl_to_nir.cpp
> index 628f8de..0b74b7e 100644
> --- a/src/compiler/glsl/glsl_to_nir.cpp
> +++ b/src/compiler/glsl/glsl_to_nir.cpp
> @@ -198,34 +198,47 @@ constant_copy(ir_constant *ir, void *mem_ctx)
>  
> nir_constant *ret = ralloc(mem_ctx, nir_constant);
>  
> -   unsigned total_elems = ir->type->components();
> +   const unsigned rows = ir->type->vector_elements;
> +   const unsigned cols = ir->type->matrix_columns;
> unsigned i;
>  
> ret->num_elements = 0;
> switch (ir->type->base_type) {
> case GLSL_TYPE_UINT:
> -  for (i = 0; i < total_elems; i++)
> - ret->value.u[i] = ir->value.u[i];
> +  for (unsigned c = 0; c < cols; c++) {
> + for (unsigned r = 0; r < rows; r++)
> +ret->values[c].u32[r] = ir->value.u[c * rows + r];
> +  }
>    break;
>  
> case GLSL_TYPE_INT:
> -  for (i = 0; i < total_elems; i++)
> - ret->value.i[i] = ir->value.i[i];
> +  for (unsigned c = 0; c < cols; c++) {
> + for (unsigned r = 0; r < rows; r++)
> +ret->values[c].i32[r] = ir->value.i[c * rows + r];
> +  }
>    break;
>  
> case GLSL_TYPE_FLOAT:
> -  for (i = 0; i < total_elems; i++)
> - ret->value.f[i] = ir->value.f[i];
> +  for (unsigned c = 0; c < cols; c++) {
> + for (unsigned r = 0; r < rows; r++)
> +ret->values[c].f32[r] = ir->value.f[c * rows + r];
> +  }
>    break;
>  
> case GLSL_TYPE_DOUBLE:
> -  for (i = 0; i < total_elems; i++)
> - ret->value.d[i] = ir->value.d[i];
> +  for (unsigned c = 0; c < cols; c++) {
> + for (unsigned r = 0; r < rows; r++)
> +ret->values[c].f64[r] = ir->value.d[c * rows + r];
> +  }
>    break;
>  
> case GLSL_TYPE_BOOL:
> -  for (i = 0; i < total_elems; i++)
> - ret->value.b[i] = ir->value.b[i];
> +  for (unsigned c = 0; c < cols; c++) {
> + for (unsigned r = 0; r < rows; r++) {
> +ret->values[c].u32[r] = ir->value.b[c * rows + r] ?
> +NIR_TRUE : NIR_FALSE;
> + }
> +  }
>    break;
>  
> case GLSL_TYPE_STRUCT:
> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
> index cfb032c..2d882f7 100644
> --- a/src/compiler/nir/nir.c
> +++ b/src/compiler/nir/nir.c
> @@ -806,7 +806,7 @@ nir_deref_get_const_initializer_load(nir_shader
> *shader, nir_deref_var *deref)
> assert(constant);
>  
> const nir_deref *tail = >deref;
> -   unsigned matrix_offset = 0;
> +   unsigned matrix_col = 0;
> while (tail->child) {
>    switch (tail->child->deref_type) {
>    case nir_deref_type_array: {
> @@ -814,7 +814,7 @@ nir_deref_get_const_initializer_load(nir_shader
> *shader, nir_deref_var *deref)
>   assert(arr->deref_array_type ==
> nir_deref_array_type_direct);
>   if (glsl_type_is_matrix(tail->type)) {
>  assert(arr->deref.child == NULL);
> -matrix_offset = arr->base_offset;
> +matrix_col = arr->base_offset;
>   } else {
>  constant = constant->elements[arr->base_offset];
>   }
> @@ -838,24 +838,16 @@ nir_deref_get_const_initializer_load(nir_shader
> *shader, nir_deref_var *deref)
>    nir_load_const_instr_create(shader,
> glsl_get_vector_elements(tail->type),
>    bit_size);
>  
> -   matrix_offset *= load->def.num_components;
> -   for (unsigned i = 0; i < load->def.num_components; i++) {
> -  switch (glsl_get_base_type(tail->type)) {
> -  case GLSL_TYPE_FLOAT:
> -  case GLSL_TYPE_INT:
> -  case GLSL_TYPE_UINT:
> - load->value.u32[i] = constant->value.u[matrix_offset + i];
> - break;
> -  case GLSL_TYPE_DOUBLE:
> - load->value.f64[i] = constant->value.d[matrix_offset + i];
> - break;
> -  case GLSL_TYPE_BOOL:
> - load->value.u32[i] = constant->value.b[matrix_offset + i] ?
> - NIR_TRUE : NIR_FALSE;
> - break;
> -  default:
> - unreachable("Invalid immediate type");
> -  }
> +   switch 

Re: [Mesa-dev] [PATCH 19/22] anv/nir: add support for dvec3/4 consuming two locations

2016-12-01 Thread Jason Ekstrand
I'm not sure how I feel about this one.  It seems like it would almost be
easier to just pick one convention or the other for NIR and adjust one of
the drivers accordingly.  I don't know that I have a huge preference which
convention we choose.  I guess the Vulkan convention matches our hardware a
bit better.  In either case, converting from one to the other should be a
simple matter of building a remap table or a creative use of popcount.

On Fri, Nov 25, 2016 at 12:52 AM, Juan A. Suarez Romero  wrote:

> One difference between OpenGL and Vulkan regarding 64-bit vertex
> attribute types is that dvec3 and dvec4 consumes just one location in
> OpenGL, while in Vulkan it consumes two locations.
>
> Thus, in OpenGL for each dvec3/dvec4 vertex attrib we mark just one bit
> in our internal inputs_read bitmap (and also the corresponding bit in
> double_inputs_read bitmap) while in Vulkan we mark two consecutive bits
> in both bitmaps.
>
> This is handled with a nir option called "dvec3_consumes_two_locations",
> which is set to True for Vulkan code. And all the computation regarding
> emitting vertices as well as the mapping between attributes and physical
> registers use this option to correctly do the work.
> ---
>  src/amd/vulkan/radv_pipeline.c   |  1 +
>  src/compiler/nir/nir.h   |  5 +++
>  src/compiler/nir/nir_gather_info.c   |  6 +--
>  src/gallium/drivers/freedreno/ir3/ir3_nir.c  |  1 +
>  src/intel/vulkan/anv_device.c|  2 +-
>  src/intel/vulkan/genX_pipeline.c | 62
> +---
>  src/mesa/drivers/dri/i965/brw_compiler.c | 23 ++-
>  src/mesa/drivers/dri/i965/brw_compiler.h |  2 +-
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 14 +--
>  src/mesa/drivers/dri/i965/brw_nir.c  | 18 +---
>  src/mesa/drivers/dri/i965/brw_vec4.cpp   | 13 --
>  src/mesa/drivers/dri/i965/intel_screen.c |  3 +-
>  12 files changed, 105 insertions(+), 45 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_
> pipeline.c
> index ee5d812..90d4650 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -59,6 +59,7 @@ static const struct nir_shader_compiler_options
> nir_options = {
> .lower_unpack_unorm_4x8 = true,
> .lower_extract_byte = true,
> .lower_extract_word = true,
> +   .dvec3_consumes_two_locations = true,
>  };
>
>  VkResult radv_CreateShaderModule(
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 1679d89..0fc8f39 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -1794,6 +1794,11 @@ typedef struct nir_shader_compiler_options {
>  * information must be inferred from the list of input nir_variables.
>  */
> bool use_interpolated_input_intrinsics;
> +
> +   /**
> +* In Vulkan, a dvec3/dvec4 consumes two locations instead just one.
> +*/
> +   bool dvec3_consumes_two_locations;
>  } nir_shader_compiler_options;
>
>  typedef struct nir_shader {
> diff --git a/src/compiler/nir/nir_gather_info.c
> b/src/compiler/nir/nir_gather_info.c
> index 07c9949..8c80671 100644
> --- a/src/compiler/nir/nir_gather_info.c
> +++ b/src/compiler/nir/nir_gather_info.c
> @@ -96,7 +96,7 @@ mark_whole_variable(nir_shader *shader, nir_variable
> *var)
>
> const unsigned slots =
>var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
> -: glsl_count_attribute_slots(type,
> is_vertex_input);
> +: glsl_count_attribute_slots(type,
> is_vertex_input && !shader->options->dvec3_consumes_two_locations);
>

This makes no sense, why are we passing is_vertex_input &&
!dvec3_consumes_two_locations to an argument labled is_vertex_input?


>
> set_io_mask(shader, var, 0, slots);
>  }
> @@ -168,7 +168,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var
> *deref)
> var->data.mode == nir_var_shader_in)
>is_vertex_input = true;
>
> -   unsigned offset = get_io_offset(deref, is_vertex_input);
> +   unsigned offset = get_io_offset(deref, is_vertex_input &&
> !shader->options->dvec3_consumes_two_locations);
>

Same here


> if (offset == -1)
>return false;
>
> @@ -184,7 +184,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var
> *deref)
> }
>
> /* double element width for double types that takes two slots */
> -   if (!is_vertex_input &&
> +   if ((!is_vertex_input || shader->options->dvec3_consumes_two_locations)
> &&
> glsl_type_is_dual_slot(glsl_without_array(type))) {
>

This makes a bit more sense but I'm still not quite seeing it.


>elem_width *= 2;
> }
> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> index 2d86a52..5c5c9ad 100644
> --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> @@ -50,6 +50,7 @@ static 

[Mesa-dev] [AppVeyor] mesa master #2863 completed

2016-12-01 Thread AppVeyor


Build mesa 2863 completed



Commit c45d84ad83 by Timothy Arceri on 12/2/2016 5:44 AM:

Revert "st/mesa: get Version from gl_program rather than gl_shader_program"\n\nThis reverts commit 6bf63b011992dbbc899a28bde5692070dbcf965a.\n\nA patch that adds a reference to gl_shader_program_data to gl_program\nneeds to land befor this one.


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [AppVeyor] mesa master #2862 failed

2016-12-01 Thread Timothy Arceri
On Fri, 2016-12-02 at 04:51 +0100, Dieter Nützel wrote:
> Hello Timothy,
> 
> it fails, here:
> 
> state_tracker/st_atom_texture.c: In function 'update_textures':
> state_tracker/st_atom_texture.c:130:49: error: 'const struct 
> ' has no member named 'data'
>   prog->sh.data->Version);

Apologies. I thought I'd landed a prerequisite patch already I've
reverted this for now.

>   ^
> 
> Greetings,
> Dieter
> 
> Am 02.12.2016 04:15, schrieb AppVeyor:
> > 
> > BUILD MESA 2862 FAILED [1]
> > 
> >  Commit 6bf63b0119 by Timothy Arceri on 11/9/2016 4:01 AM:
> > st/mesa: get Version from gl_program rather than
> > gl_shader_program\n\nReviewed-by: Nicolai Hähnle
> > 
> > 
> > Configure your notification preferences [2]
> > 
> > Links:
> > --
> > [1] https://ci.appveyor.com/project/jrfonseca-fdo/mesa/build/2862
> > [2] https://ci.appveyor.com/notifications
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa master #2862 failed

2016-12-01 Thread Dieter Nützel

Hello Timothy,

it fails, here:

state_tracker/st_atom_texture.c: In function 'update_textures':
state_tracker/st_atom_texture.c:130:49: error: 'const struct 
' has no member named 'data'

 prog->sh.data->Version);
 ^

Greetings,
Dieter

Am 02.12.2016 04:15, schrieb AppVeyor:

BUILD MESA 2862 FAILED [1]

 Commit 6bf63b0119 by Timothy Arceri on 11/9/2016 4:01 AM:
st/mesa: get Version from gl_program rather than
gl_shader_program\n\nReviewed-by: Nicolai Hähnle


Configure your notification preferences [2]

Links:
--
[1] https://ci.appveyor.com/project/jrfonseca-fdo/mesa/build/2862
[2] https://ci.appveyor.com/notifications
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa master #2862 failed

2016-12-01 Thread AppVeyor



Build mesa 2862 failed


Commit 6bf63b0119 by Timothy Arceri on 11/9/2016 4:01 AM:

st/mesa: get Version from gl_program rather than gl_shader_program\n\nReviewed-by: Nicolai Hähnle 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/22] isl: fix VA64 support for double and dvecN vertex attributes

2016-12-01 Thread Jason Ekstrand
On Fri, Nov 25, 2016 at 12:52 AM, Juan A. Suarez Romero  wrote:

> From: Samuel Iglesias Gonsálvez 
>
> We use *64*_PASSTHRU formats to upload vertex attributes of 64 bits
> to avoid conversions. From the BDW PRM, Volume 2d, page 586
> (VERTEX_ELEMENT_STATE):
>
>  "When SourceElementFormat is set to one of the *64*_PASSTHRU
>  formats, 64-bit components are stored in the URB without any
>  conversion. In this case, vertex elements must be written as 128
>  or 256 bits, with VFCOMP_STORE_0 being used to pad the output
>  as required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red
>  component into the URB, Component 1 must be specified as
>  VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE)
>  in order to output a 128-bit vertex element, or Components 1-3 must
>  be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex
>  element. Likewise, use of R64G64B64_PASSTHRU requires Component 3
>  to be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex
>  element."
>
> Signed-off-by: Samuel Iglesias Gonsálvez 
> ---
>  src/intel/isl/isl_format.c  | 4 ++--
>  src/intel/isl/isl_format_layout.csv | 3 ---
>  src/intel/vulkan/anv_formats.c  | 8 
>  3 files changed, 6 insertions(+), 9 deletions(-)
>
> diff --git a/src/intel/isl/isl_format.c b/src/intel/isl/isl_format.c
> index 98806f4..92b630a 100644
> --- a/src/intel/isl/isl_format.c
> +++ b/src/intel/isl/isl_format.c
> @@ -97,7 +97,7 @@ static const struct surface_format_info format_info[] = {
> SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,x,   R32G32B32A32_SSCALED)
> SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,x,   R32G32B32A32_USCALED)
> SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   R32G32B32A32_SFIXED)
> -   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R64G64_PASSTHRU)
> +   SF( x,  x,  x,  x,  x,  x, 80,  x,  x,x,   R64G64_PASSTHRU)
> SF( Y, 50,  x,  x,  x,  x,  Y,  Y,  x,x,   R32G32B32_FLOAT)
> SF( Y,  x,  x,  x,  x,  x,  Y,  Y,  x,x,   R32G32B32_SINT)
> SF( Y,  x,  x,  x,  x,  x,  Y,  Y,  x,x,   R32G32B32_UINT)
> @@ -131,7 +131,7 @@ static const struct surface_format_info format_info[]
> = {
> SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,x,   R32G32_SSCALED)
> SF( x,  x,  x,  x,  x,  x,  Y,  x,  x,x,   R32G32_USCALED)
> SF( x,  x,  x,  x,  x,  x, 75,  x,  x,x,   R32G32_SFIXED)
> -   SF( x,  x,  x,  x,  x,  x,  x,  x,  x,x,   R64_PASSTHRU)
> +   SF( x,  x,  x,  x,  x,  x, 80,  x,  x,x,   R64_PASSTHRU)
> SF( Y,  Y,  x,  Y,  Y,  Y,  Y,  x, 60,   90,   B8G8R8A8_UNORM)
> SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x,x,   B8G8R8A8_UNORM_SRGB)
>  /* smpl filt shad CK  RT  AB  VB  SO  color ccs_e */
> diff --git a/src/intel/isl/isl_format_layout.csv
> b/src/intel/isl/isl_format_layout.csv
> index f0f31c7..b1e298b 100644
> --- a/src/intel/isl/isl_format_layout.csv
> +++ b/src/intel/isl/isl_format_layout.csv
> @@ -96,7 +96,6 @@ X32_TYPELESS_G8X24_UINT ,  64,  1,  1,  1,  x32,
> ui8,  x24, , ,
>  L32A32_FLOAT,  64,  1,  1,  1, , , , sf32,
> sf32, ,, linear,
>  R32G32_UNORM,  64,  1,  1,  1, un32, un32, , ,
>  , ,, linear,
>  R32G32_SNORM,  64,  1,  1,  1, sn32, sn32, , ,
>  , ,, linear,
> -R64_FLOAT   ,  64,  1,  1,  1, sf64, , , ,
>  , ,, linear,
>  R16G16B16X16_UNORM  ,  64,  1,  1,  1, un16, un16, un16,  x16,
>  , ,, linear,
>  R16G16B16X16_FLOAT  ,  64,  1,  1,  1, sf16, sf16, sf16,  x16,
>  , ,, linear,
>  A32X32_FLOAT,  64,  1,  1,  1, , , , sf32,
> x32, ,,  alpha,
> @@ -243,8 +242,6 @@ R8G8B8_UNORM,  24,  1,  1,  1,  un8,
> un8,  un8, , ,
>  R8G8B8_SNORM,  24,  1,  1,  1,  sn8,  sn8,  sn8, ,
>  , ,, linear,
>  R8G8B8_SSCALED  ,  24,  1,  1,  1,  ss8,  ss8,  ss8, ,
>  , ,, linear,
>  R8G8B8_USCALED  ,  24,  1,  1,  1,  us8,  us8,  us8, ,
>  , ,, linear,
> -R64G64B64A64_FLOAT  , 256,  1,  1,  1, sf64, sf64, sf64, sf64,
>  , ,, linear,
> -R64G64B64_FLOAT , 196,  1,  1,  1, sf64, sf64, sf64, ,
>  , ,, linear,
>

Why are we deleting these three formats?  Do they not exist?  Are their
entries wrong?  The isl_format_layout table contains information for
several formats that are unused in either driver.  I'd rather leave them
here unless they are causing a problem.


>  BC4_SNORM   ,  64,  4,  4,  1,  sn8, , , ,
>  , ,, linear, rgtc1
>  BC5_SNORM   , 128,  4,  4,  1,  sn8,  sn8, , ,
>  , ,, linear, rgtc2
>  R16G16B16_FLOAT ,  48,  1,  1,  1, sf16, sf16, sf16, ,
>  , ,, linear,
> diff --git 

Re: [Mesa-dev] [PATCH 04/22] spirv: add DF support to vtn_const_ssa_value()

2016-12-01 Thread Jason Ekstrand
If you don't mind rebasing on it, my "get rid of nir_constant_data" patch
should let you drop most of this and patch 5.

On Fri, Nov 25, 2016 at 12:52 AM, Juan A. Suarez Romero  wrote:

> From: Samuel Iglesias Gonsálvez 
>
> Signed-off-by: Samuel Iglesias Gonsálvez 
> ---
>  src/compiler/spirv/spirv_to_nir.c | 24 +---
>  1 file changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index dadf7fc..8569bc8 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -98,14 +98,19 @@ vtn_const_ssa_value(struct vtn_builder *b,
> nir_constant *constant,
> case GLSL_TYPE_UINT:
> case GLSL_TYPE_BOOL:
> case GLSL_TYPE_FLOAT:
> -   case GLSL_TYPE_DOUBLE:
> +   case GLSL_TYPE_DOUBLE: {
> +  int bit_size = glsl_get_bit_size(type);
>if (glsl_type_is_vector_or_scalar(type)) {
>   unsigned num_components = glsl_get_vector_elements(val->type);
>   nir_load_const_instr *load =
> -nir_load_const_instr_create(b->shader, num_components, 32);
> +nir_load_const_instr_create(b->shader, num_components,
> bit_size);
>
> - for (unsigned i = 0; i < num_components; i++)
> -load->value.u32[i] = constant->value.u[i];
> + for (unsigned i = 0; i < num_components; i++) {
> +if (bit_size == 64)
> +   load->value.f64[i] = constant->value.d[i];
> +else
> +   load->value.u32[i] = constant->value.u[i];
> + }
>
>   nir_instr_insert_before_cf_list(>impl->body, >instr);
>   val->def = >def;
> @@ -119,10 +124,14 @@ vtn_const_ssa_value(struct vtn_builder *b,
> nir_constant *constant,
>  struct vtn_ssa_value *col_val = rzalloc(b, struct
> vtn_ssa_value);
>  col_val->type = glsl_get_column_type(val->type);
>  nir_load_const_instr *load =
> -   nir_load_const_instr_create(b->shader, rows, 32);
> +   nir_load_const_instr_create(b->shader, rows, bit_size);
>
> -for (unsigned j = 0; j < rows; j++)
> -   load->value.u32[j] = constant->value.u[rows * i + j];
> +for (unsigned j = 0; j < rows; j++) {
> +   if (bit_size == 64)
> +  load->value.f64[j] = constant->value.d[rows * i + j];
> +   else
> +  load->value.u32[j] = constant->value.u[rows * i + j];
> +}
>
>  nir_instr_insert_before_cf_list(>impl->body,
> >instr);
>  col_val->def = >def;
> @@ -131,6 +140,7 @@ vtn_const_ssa_value(struct vtn_builder *b,
> nir_constant *constant,
>   }
>}
>break;
> +   }
>
> case GLSL_TYPE_ARRAY: {
>unsigned elems = glsl_get_length(val->type);
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/22] spirv/nir: implement DF conversions

2016-12-01 Thread Jason Ekstrand
On Fri, Nov 25, 2016 at 12:52 AM, Juan A. Suarez Romero  wrote:

> From: Samuel Iglesias Gonsálvez 
>
> SPIR-V does not have special opcodes for DF conversions. We need to
> identify
> them by checking the bit size of the operand and the result.
>
> Signed-off-by: Samuel Iglesias Gonsálvez 
> ---
>  src/compiler/spirv/spirv_to_nir.c | 29 ++---
>  src/compiler/spirv/vtn_alu.c  | 37 +++---
> ---
>  src/compiler/spirv/vtn_private.h  |  3 ++-
>  3 files changed, 51 insertions(+), 18 deletions(-)
>
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index a13f72a..81c73da 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -1211,12 +1211,21 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp
> opcode,
>
>default: {
>   bool swap;
> - nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, );
> -
> - unsigned num_components = glsl_get_vector_elements(val->
> const_type);
>   unsigned bit_size =
>  glsl_get_bit_size(val->const_type);
>
> + bool is_double_dst = bit_size == 64;
> + bool is_double_src = is_double_dst;
> + /* We assume there is no double conversion here */
> + assert(bit_size != 64 ||
> +(opcode != SpvOpConvertFToU && opcode != SpvOpConvertFToS
> &&
> + opcode != SpvOpConvertSToF && opcode != SpvOpConvertUToF
> &&
> + opcode != SpvOpFConvert));
> + nir_op op =
> +vtn_nir_alu_op_for_spirv_opcode(opcode, ,
> +is_double_dst, is_double_src);
> +
> + unsigned num_components = glsl_get_vector_elements(val->
> const_type);
>   nir_const_value src[4];
>   assert(count <= 7);
>   for (unsigned i = 0; i < count - 4; i++) {
> @@ -1224,16 +1233,22 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp
> opcode,
> vtn_value(b, w[4 + i], vtn_value_type_constant)->constant;
>
>  unsigned j = swap ? 1 - i : i;
> -assert(bit_size == 32);
>  for (unsigned k = 0; k < num_components; k++)
> -   src[j].u32[k] = c->value.u[k];
> +   if (!is_double_src)
> +  src[j].u32[k] = c->value.u[k];
> +   else
> +  src[j].f64[k] = c->value.d[k];
>   }
>
>   nir_const_value res = nir_eval_const_opcode(op, num_components,
>   bit_size, src);
>
> - for (unsigned k = 0; k < num_components; k++)
> -val->constant->value.u[k] = res.u32[k];
> + for (unsigned k = 0; k < num_components; k++) {
> +if (!is_double_dst)
> +   val->constant->value.u[k] = res.u32[k];
> +else
> +   val->constant->value.d[k] = res.f64[k];
> + }
>
>   break;
>} /* default */
> diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c
> index 95ff2b1..e444d3f 100644
> --- a/src/compiler/spirv/vtn_alu.c
> +++ b/src/compiler/spirv/vtn_alu.c
> @@ -211,7 +211,8 @@ vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp
> opcode,
>  }
>
>  nir_op
> -vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap)
> +vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap,
> +bool is_double_dst, bool is_double_src)
>

I think it would be better if we did this as dst_bit_size and
src_bit_size.  That would make this simpler for basically every caller.
Also, it makes it more 8/16-bit ready.


>  {
> /* Indicates that the first two arguments should be swapped.  This is
>  * used for implementing greater-than and less-than-or-equal.
> @@ -284,16 +285,21 @@ vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool
> *swap)
> case SpvOpFUnordGreaterThanEqual:   return nir_op_fge;
>
> /* Conversions: */
> -   case SpvOpConvertFToU:   return nir_op_f2u;
> -   case SpvOpConvertFToS:   return nir_op_f2i;
> -   case SpvOpConvertSToF:   return nir_op_i2f;
> -   case SpvOpConvertUToF:   return nir_op_u2f;
> +   case SpvOpConvertFToU:   return is_double_src ? nir_op_d2u :
> nir_op_f2u;
> +   case SpvOpConvertFToS:   return is_double_src ? nir_op_d2i :
> nir_op_f2i;
> +   case SpvOpConvertSToF:   return is_double_dst ? nir_op_i2d :
> nir_op_i2f;
> +   case SpvOpConvertUToF:   return is_double_dst ? nir_op_u2d :
> nir_op_u2f;
>

The time is soon coming (not sure if you want to do this now or not) to add
a nir helper:

nir_op nir_type2type_op(nir_alu_type src, nir_alu_type dst);

I'm OK open-coding it for now, but as soon as we add int64 or any 8 or
16-bit types, we'll want it.


> case SpvOpBitcast:   return nir_op_imov;
> case SpvOpUConvert:
> case 

Re: [Mesa-dev] [PATCH 50/58] st/mesa: get Version from gl_program rather than gl_shader_program

2016-12-01 Thread Timothy Arceri
On Thu, 2016-12-01 at 12:38 +0100, Nicolai Hähnle wrote:
> Hmm, I wonder what the rules are when different shaders have
> different 
> versions and are linked together?

Yeah I was wondering the same when I changed this. It seems that in
linker.cpp we get the max version from the shaders and store it here.


> Then again, the use of the 
> glsl_version in st_sampler_view.c pretty much admits that it's
> already a 
> hack, so I think this is fine.
> 
> Patches 49 & 50:
> 
> Reviewed-by: Nicolai Hähnle 

Thanks for the reviews :)

> 
> On 20.11.2016 14:29, Timothy Arceri wrote:
> > 
> > ---
> >  src/mesa/state_tracker/st_atom_texture.c | 5 +
> >  1 file changed, 1 insertion(+), 4 deletions(-)
> > 
> > diff --git a/src/mesa/state_tracker/st_atom_texture.c
> > b/src/mesa/state_tracker/st_atom_texture.c
> > index 5fe042b..58e753d 100644
> > --- a/src/mesa/state_tracker/st_atom_texture.c
> > +++ b/src/mesa/state_tracker/st_atom_texture.c
> > @@ -111,9 +111,6 @@ update_textures(struct st_context *st,
> > GLbitfield free_slots = ~prog->SamplersUsed;
> > GLbitfield external_samplers_used = prog->ExternalSamplersUsed;
> > GLuint unit;
> > -   struct gl_shader_program *shader =
> > -  st->ctx->_Shader->CurrentProgram[mesa_shader];
> > -   unsigned glsl_version = shader ? shader->data->Version : 0;
> > enum pipe_shader_type shader_stage =
> > st_shader_stage_to_ptarget(mesa_shader);
> > 
> > if (samplers_used == 0x0 && old_max == 0)
> > @@ -130,7 +127,7 @@ update_textures(struct st_context *st,
> >   GLboolean retval;
> > 
> >   retval = update_single_texture(st, _view,
> > texUnit,
> > -glsl_version);
> > +prog->sh.data->Version);
> >   if (retval == GL_FALSE)
> >  continue;
> > 
> > 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: Fix active_queries count

2016-12-01 Thread Bruce Cherniak
The active_query count was incorrect for query types that don't require
a begin_query.  Removed the unnecessary assert.
---
 src/gallium/drivers/swr/swr_query.cpp | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_query.cpp 
b/src/gallium/drivers/swr/swr_query.cpp
index a95e0d8..6eb0781 100644
--- a/src/gallium/drivers/swr/swr_query.cpp
+++ b/src/gallium/drivers/swr/swr_query.cpp
@@ -165,8 +165,9 @@ swr_begin_query(struct pipe_context *pipe, struct 
pipe_query *q)
/* Initialize Results */
memset(>result, 0, sizeof(pq->result));
switch (pq->type) {
+   case PIPE_QUERY_GPU_FINISHED:
case PIPE_QUERY_TIMESTAMP:
-  /* nothing to do */
+  /* nothing to do, but don't want the default */
   break;
case PIPE_QUERY_TIME_ELAPSED:
   pq->result.timestamp_start = swr_get_timestamp(pipe->screen);
@@ -181,10 +182,10 @@ swr_begin_query(struct pipe_context *pipe, struct 
pipe_query *q)
  SwrEnableStatsFE(ctx->swrContext, TRUE);
  SwrEnableStatsBE(ctx->swrContext, TRUE);
   }
+  ctx->active_queries++;
   break;
}
 
-   ctx->active_queries++;
 
return true;
 }
@@ -195,11 +196,10 @@ swr_end_query(struct pipe_context *pipe, struct 
pipe_query *q)
struct swr_context *ctx = swr_context(pipe);
struct swr_query *pq = swr_query(q);
 
-   assert(ctx->active_queries
-  && "swr_end_query, there are no active queries!");
-   ctx->active_queries--;
-
switch (pq->type) {
+   case PIPE_QUERY_GPU_FINISHED:
+  /* nothing to do, but don't want the default */
+  break;
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
   pq->result.timestamp_end = swr_get_timestamp(pipe->screen);
@@ -214,6 +214,7 @@ swr_end_query(struct pipe_context *pipe, struct pipe_query 
*q)
   swr_fence_submit(ctx, pq->fence);
 
   /* Only change stat collection if there are no active queries */
+  ctx->active_queries--;
   if (ctx->active_queries == 0) {
  SwrEnableStatsFE(ctx->swrContext, FALSE);
  SwrEnableStatsBE(ctx->swrContext, FALSE);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 3/7] intel/blorp_blit: Adjust blorp surface parameters for split blits

2016-12-01 Thread Jason Ekstrand
On Wed, Nov 30, 2016 at 8:12 PM, Jordan Justen 
wrote:

> If try_blorp_blit() previously returned that a blit was too large,
> shrink_surface_params() will be used to update the surface parameters
> for the smaller blit so the blit operation can proceed.
>
> v2:
>  * Use double instead of float. (Jason)
>
> Signed-off-by: Jordan Justen 
> ---
>  src/intel/blorp/blorp_blit.c | 108 ++
> +++--
>  1 file changed, 105 insertions(+), 3 deletions(-)
>
> diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
> index b12a4ec..5ca3190 100644
> --- a/src/intel/blorp/blorp_blit.c
> +++ b/src/intel/blorp/blorp_blit.c
> @@ -1486,6 +1486,12 @@ surf_retile_w_to_y(const struct isl_device *isl_dev,
> info->tile_y_sa /= 2;
>  }
>
> +static bool
> +can_shrink_surfaces(const struct blorp_params *params)
> +{
> +   return false;
> +}
> +
>  struct blt_axis {
> double src0, src1, dst0, dst1;
> bool mirror;
> @@ -1732,12 +1738,98 @@ adjust_split_coords(const struct blt_axis *orig,
> split_coords->src1 = orig->src1 + (scale >= 0.0 ? delta1 : delta0);
>  }
>
> +static const struct isl_extent2d
> +get_px_size_sa(const struct isl_surf *surf)
> +{
> +   static const struct isl_extent2d one_to_one = { .w = 1, .h = 1 };
> +
> +   if (surf->msaa_layout != ISL_MSAA_LAYOUT_INTERLEAVED)
> +  return one_to_one;
> +   else
> +  return isl_get_interleaved_msaa_px_size_sa(surf->samples);
> +}
> +
> +static void
> +shrink_surface_params(const struct isl_device *dev,
> +  struct brw_blorp_surface_info *info,
> +  double *x0, double *x1, double *y0, double *y1)
> +{
> +   uint32_t byte_offset, x_offset_sa, y_offset_sa, size;
> +   struct isl_extent2d px_size_sa;
> +   int adjust;
> +
> +   surf_convert_to_single_slice(dev, info);
> +
> +   px_size_sa = get_px_size_sa(>surf);
> +
> +   x_offset_sa = (uint32_t)*x0 * px_size_sa.w + info->tile_x_sa;
> +   y_offset_sa = (uint32_t)*y0 * px_size_sa.h + info->tile_y_sa;
> +   isl_tiling_get_intratile_offset_sa(dev, info->surf.tiling,
> +  info->surf.format,
> info->surf.row_pitch,
> +  x_offset_sa, y_offset_sa,
> +  _offset,
> +  >tile_x_sa, >tile_y_sa);
>

If we're going to do things this early, we should just make our own
temporary variables for tile_x/y instead of trying to re-use the ones from
info


> +
> +   info->addr.offset += byte_offset;
> +
> +   adjust = (int)info->tile_x_sa / px_size_sa.w - (int)*x0;
> +   *x0 += adjust;
> +   *x1 += adjust;
> +   info->tile_x_sa = 0;
>

That way we don't have to reset them to 0.


> +
> +   adjust = (int)info->tile_y_sa / px_size_sa.h - (int)*y0;
> +   *y0 += adjust;
> +   *y1 += adjust;
> +   info->tile_y_sa = 0;
>
+
> +   size = MIN2((uint32_t)ceilf(*x1), info->surf.logical_level0_px.width);
> +   double phys_scale =
> +  info->surf.phys_level0_sa.width / info->surf.logical_level0_px.w
> idth;
> +   info->surf.logical_level0_px.width = size;
> +   info->surf.phys_level0_sa.width =
> +  (uint32_t)ceilf(phys_scale * size);
>

Why not juts set logical_level0_px and then multiply by px_size_sa to get
the physical size?  That seems more reliable by using a double to help with
integer calculations.


> +
> +   size = MIN2((uint32_t)ceilf(*y1), info->surf.logical_level0_px.h
> eight);
> +   phys_scale =
> +  info->surf.phys_level0_sa.height / info->surf.logical_level0_px.h
> eight;
> +   info->surf.logical_level0_px.height = (uint32_t)ceilf(size);
> +   info->surf.phys_level0_sa.height =
> +  (uint32_t)ceilf(phys_scale * size);
> +}
> +
> +static void
> +shrink_surfaces(const struct isl_device *dev,
> +struct blorp_params *params,
> +struct brw_blorp_blit_prog_key *wm_prog_key,
> +struct blt_coords *coords)
> +{
> +   /* Shrink source surface */
> +   shrink_surface_params(dev,
> + >src,
> + >x.src0,
> + >x.src1,
> + >y.src0,
> + >y.src1);
> +   wm_prog_key->need_src_offset = false;
> +
> +   /* Shrink destination surface */
> +   shrink_surface_params(dev,
> + >dst,
> + >x.dst0,
> + >x.dst1,
> + >y.dst0,
> + >y.dst1);
>

I commented in our meeting that we probably want to support splitting only
one of the surfaces.  Let's merge it first and plan to fix it as a
follow-up.


> +   wm_prog_key->need_dst_offset = wm_prog_key->dst_rgb;
> +}
> +
>  static void
>  do_blorp_blit(struct blorp_batch *batch,
> -  struct blorp_params *params,
> +  const struct blorp_params *orig_params,
>struct brw_blorp_blit_prog_key 

[Mesa-dev] [PATCH v2] nir: Get rid of nir_constant_data

2016-12-01 Thread Jason Ekstrand
This has bothered me for about as long as NIR has been around.  Why do we
have two different unions for constants?  No good reason other than one of
them is a direct port from GLSL IR.
---
 src/compiler/glsl/glsl_to_nir.cpp  | 35 +---
 src/compiler/nir/nir.c | 32 +++---
 src/compiler/nir/nir.h | 30 ++---
 src/compiler/nir/nir_clone.c   |  2 +-
 src/compiler/nir/nir_print.c   | 29 ++---
 src/compiler/spirv/spirv_to_nir.c  | 67 +-
 src/compiler/spirv/vtn_variables.c |  8 ++---
 7 files changed, 98 insertions(+), 105 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 628f8de..0b74b7e 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -198,34 +198,47 @@ constant_copy(ir_constant *ir, void *mem_ctx)
 
nir_constant *ret = ralloc(mem_ctx, nir_constant);
 
-   unsigned total_elems = ir->type->components();
+   const unsigned rows = ir->type->vector_elements;
+   const unsigned cols = ir->type->matrix_columns;
unsigned i;
 
ret->num_elements = 0;
switch (ir->type->base_type) {
case GLSL_TYPE_UINT:
-  for (i = 0; i < total_elems; i++)
- ret->value.u[i] = ir->value.u[i];
+  for (unsigned c = 0; c < cols; c++) {
+ for (unsigned r = 0; r < rows; r++)
+ret->values[c].u32[r] = ir->value.u[c * rows + r];
+  }
   break;
 
case GLSL_TYPE_INT:
-  for (i = 0; i < total_elems; i++)
- ret->value.i[i] = ir->value.i[i];
+  for (unsigned c = 0; c < cols; c++) {
+ for (unsigned r = 0; r < rows; r++)
+ret->values[c].i32[r] = ir->value.i[c * rows + r];
+  }
   break;
 
case GLSL_TYPE_FLOAT:
-  for (i = 0; i < total_elems; i++)
- ret->value.f[i] = ir->value.f[i];
+  for (unsigned c = 0; c < cols; c++) {
+ for (unsigned r = 0; r < rows; r++)
+ret->values[c].f32[r] = ir->value.f[c * rows + r];
+  }
   break;
 
case GLSL_TYPE_DOUBLE:
-  for (i = 0; i < total_elems; i++)
- ret->value.d[i] = ir->value.d[i];
+  for (unsigned c = 0; c < cols; c++) {
+ for (unsigned r = 0; r < rows; r++)
+ret->values[c].f64[r] = ir->value.d[c * rows + r];
+  }
   break;
 
case GLSL_TYPE_BOOL:
-  for (i = 0; i < total_elems; i++)
- ret->value.b[i] = ir->value.b[i];
+  for (unsigned c = 0; c < cols; c++) {
+ for (unsigned r = 0; r < rows; r++) {
+ret->values[c].u32[r] = ir->value.b[c * rows + r] ?
+NIR_TRUE : NIR_FALSE;
+ }
+  }
   break;
 
case GLSL_TYPE_STRUCT:
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index cfb032c..2d882f7 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -806,7 +806,7 @@ nir_deref_get_const_initializer_load(nir_shader *shader, 
nir_deref_var *deref)
assert(constant);
 
const nir_deref *tail = >deref;
-   unsigned matrix_offset = 0;
+   unsigned matrix_col = 0;
while (tail->child) {
   switch (tail->child->deref_type) {
   case nir_deref_type_array: {
@@ -814,7 +814,7 @@ nir_deref_get_const_initializer_load(nir_shader *shader, 
nir_deref_var *deref)
  assert(arr->deref_array_type == nir_deref_array_type_direct);
  if (glsl_type_is_matrix(tail->type)) {
 assert(arr->deref.child == NULL);
-matrix_offset = arr->base_offset;
+matrix_col = arr->base_offset;
  } else {
 constant = constant->elements[arr->base_offset];
  }
@@ -838,24 +838,16 @@ nir_deref_get_const_initializer_load(nir_shader *shader, 
nir_deref_var *deref)
   nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type),
   bit_size);
 
-   matrix_offset *= load->def.num_components;
-   for (unsigned i = 0; i < load->def.num_components; i++) {
-  switch (glsl_get_base_type(tail->type)) {
-  case GLSL_TYPE_FLOAT:
-  case GLSL_TYPE_INT:
-  case GLSL_TYPE_UINT:
- load->value.u32[i] = constant->value.u[matrix_offset + i];
- break;
-  case GLSL_TYPE_DOUBLE:
- load->value.f64[i] = constant->value.d[matrix_offset + i];
- break;
-  case GLSL_TYPE_BOOL:
- load->value.u32[i] = constant->value.b[matrix_offset + i] ?
- NIR_TRUE : NIR_FALSE;
- break;
-  default:
- unreachable("Invalid immediate type");
-  }
+   switch (glsl_get_base_type(tail->type)) {
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_BOOL:
+  load->value = constant->values[matrix_col];
+  break;
+   default:
+  unreachable("Invalid immediate type");
}
 
return load;
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 

Re: [Mesa-dev] [PATCH] nir: Get rid of nir_constant_data

2016-12-01 Thread Jason Ekstrand
On Thu, Dec 1, 2016 at 12:46 AM, Iago Toral  wrote:

> On Tue, 2016-11-29 at 22:51 -0800, Jason Ekstrand wrote:
> > This has bothered me for about as long as NIR has been around.  Why
> > do we
> > have two different unions for constants?  No good reason other than
> > one of
> > them is a direct port from GLSL IR.
> > ---
> >  src/compiler/glsl/glsl_to_nir.cpp  | 35 
> >  src/compiler/nir/nir.c | 36 +++--
> >  src/compiler/nir/nir.h | 30 +++--
> >  src/compiler/nir/nir_clone.c   |  2 +-
> >  src/compiler/nir/nir_print.c   | 29 
> >  src/compiler/spirv/spirv_to_nir.c  | 55 --
> > 
> >  src/compiler/spirv/vtn_variables.c |  8 +++---
> >  7 files changed, 96 insertions(+), 99 deletions(-)
> (...)
> > @@ -838,24 +838,20 @@ nir_deref_get_const_initializer_load(nir_shader
> > *shader, nir_deref_var *deref)
> >nir_load_const_instr_create(shader,
> > glsl_get_vector_elements(tail->type),
> >bit_size);
> >
> > -   matrix_offset *= load->def.num_components;
> > -   for (unsigned i = 0; i < load->def.num_components; i++) {
> > -  switch (glsl_get_base_type(tail->type)) {
> > -  case GLSL_TYPE_FLOAT:
> > -  case GLSL_TYPE_INT:
> > -  case GLSL_TYPE_UINT:
> > - load->value.u32[i] = constant->value.u[matrix_offset + i];
> > - break;
> > -  case GLSL_TYPE_DOUBLE:
> > - load->value.f64[i] = constant->value.d[matrix_offset + i];
> > - break;
> > -  case GLSL_TYPE_BOOL:
> > - load->value.u32[i] = constant->value.b[matrix_offset + i] ?
> > - NIR_TRUE : NIR_FALSE;
> > - break;
> > -  default:
> > - unreachable("Invalid immediate type");
> > -  }
> > +   switch (glsl_get_base_type(tail->type)) {
> > +   case GLSL_TYPE_FLOAT:
> > +   case GLSL_TYPE_INT:
> > +   case GLSL_TYPE_UINT:
> > +  load->value = constant->values[matrix_col];
> > +  break;
> > +   case GLSL_TYPE_DOUBLE:
> > +  load->value = constant->values[matrix_col];
> > +  break;
> > +   case GLSL_TYPE_BOOL:
> > +  load->value = constant->values[matrix_col];
> > +  break;
>
> You can merge the double and bool cases in with the rest, it is the
> same code now.
>

Good Call.  Fixed.


> > +   default:
> > +  unreachable("Invalid immediate type");
> > }
> >
> (...)
> >
> > diff --git a/src/compiler/spirv/spirv_to_nir.c
> > b/src/compiler/spirv/spirv_to_nir.c
> > index 34968a4..f41df32 100644
> > --- a/src/compiler/spirv/spirv_to_nir.c
> > +++ b/src/compiler/spirv/spirv_to_nir.c
> (...)
> >
> >   uint32_t u[8];
> >   for (unsigned i = 0; i < len0; i++)
> > -u[i] = v0->constant->value.u[i];
> > +u[i] = v0->constant->values[0].u32[i];
> >   for (unsigned i = 0; i < len1; i++)
> > -u[len0 + i] = v1->constant->value.u[i];
> > +u[len0 + i] = v1->constant->values[0].u32[i];
> >
> >   for (unsigned i = 0; i < count - 6; i++) {
> >  uint32_t comp = w[i + 6];
> >  if (comp == (uint32_t)-1) {
> > -   val->constant->value.u[i] = 0xdeadbeef;
> > +   val->constant->values[0].u32[i] = 0xdeadbeef;
> >  } else {
> > -   val->constant->value.u[i] = u[comp];
> > +   val->constant->values[0].u32[i] = u[comp];
> >  }
> >   }
> >   break;
> > @@ -1137,7 +1133,7 @@ vtn_handle_constant(struct vtn_builder *b,
> > SpvOp opcode,
> >  } else {
> > unsigned num_components =
> > glsl_get_vector_elements(type);
> > for (unsigned i = 0; i < num_components; i++)
> > -  val->constant->value.u[i] = (*c)->value.u[elem +
> > i];
> > +  val->constant->values[0].u32[i] = (*c)-
> > >values[0].u32[elem + i];
> >  }
> >   } else {
> >  struct vtn_value *insert =
> > @@ -1148,7 +1144,7 @@ vtn_handle_constant(struct vtn_builder *b,
> > SpvOp opcode,
> >  } else {
> > unsigned num_components =
> > glsl_get_vector_elements(type);
> > for (unsigned i = 0; i < num_components; i++)
> > -  (*c)->value.u[elem + i] = insert->constant-
> > >value.u[i];
> > +  (*c)->values[0].u32[elem + i] = insert->constant-
> > >values[0].u32[i];
>
> Is this correct for matrix types? We do:
>
> elem += w[i] * glsl_get_vector_elements(type);
>
> in a loop above, so I guess elem can be > 4 and we would end up
> indexing out of bounds  here. Don't we need to use elem to index into
> 'values' instead and maybe tweak the code above to have elem track the
> number of columns instead of individual components we need to offset?


You're right.  I'm sending a v2.


>
> Iago
>
> >  }
> >   }
> >   

Re: [Mesa-dev] [PATCH 14/18] i965: Replace reg_type_size[] with a function.

2016-12-01 Thread Matt Turner
On Sun, Nov 27, 2016 at 1:26 AM, Kenneth Graunke  wrote:
> On Tuesday, November 22, 2016 11:59:48 AM PST Matt Turner wrote:
>> A function is necessary to handle immediate types.
>> ---
>>  src/mesa/drivers/dri/i965/brw_disasm.c  | 35 
>>  src/mesa/drivers/dri/i965/brw_eu_emit.c | 58 
>> +++--
>>  src/mesa/drivers/dri/i965/brw_reg.h |  8 +
>>  3 files changed, 77 insertions(+), 24 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c 
>> b/src/mesa/drivers/dri/i965/brw_disasm.c
>> index 5e51be7..3786e4b 100644
>> --- a/src/mesa/drivers/dri/i965/brw_disasm.c
>> +++ b/src/mesa/drivers/dri/i965/brw_disasm.c
>> @@ -257,20 +257,6 @@ static const char *const three_source_reg_encoding[] = {
>> [BRW_3SRC_TYPE_DF] = "DF",
>>  };
>>
>> -const int reg_type_size[] = {
>> -   [BRW_HW_REG_TYPE_UD]  = 4,
>> -   [BRW_HW_REG_TYPE_D]   = 4,
>> -   [BRW_HW_REG_TYPE_UW]  = 2,
>> -   [BRW_HW_REG_TYPE_W]   = 2,
>> -   [BRW_HW_REG_NON_IMM_TYPE_UB]  = 1,
>> -   [BRW_HW_REG_NON_IMM_TYPE_B]   = 1,
>> -   [GEN7_HW_REG_NON_IMM_TYPE_DF] = 8,
>> -   [BRW_HW_REG_TYPE_F]   = 4,
>> -   [GEN8_HW_REG_TYPE_UQ] = 8,
>> -   [GEN8_HW_REG_TYPE_Q]  = 8,
>> -   [GEN8_HW_REG_NON_IMM_TYPE_HF] = 2,
>> -};
>> -
>>  static const char *const reg_file[4] = {
>> [0] = "A",
>> [1] = "g",
>> @@ -734,6 +720,7 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr)
>>  static int
>>  dest(FILE *file, const struct gen_device_info *devinfo, brw_inst *inst)
>>  {
>> +   unsigned elem_size = brw_element_size(devinfo, inst, dst);
>> int err = 0;
>>
>> if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
>> @@ -744,7 +731,7 @@ dest(FILE *file, const struct gen_device_info *devinfo, 
>> brw_inst *inst)
>>  return 0;
>>   if (brw_inst_dst_da1_subreg_nr(devinfo, inst))
>>  format(file, ".%"PRIu64, brw_inst_dst_da1_subreg_nr(devinfo, 
>> inst) /
>> -   reg_type_size[brw_inst_dst_reg_type(devinfo, inst)]);
>> +   elem_size);
>>   string(file, "<");
>>   err |= control(file, "horiz stride", horiz_stride,
>>  brw_inst_dst_hstride(devinfo, inst), NULL);
>> @@ -755,7 +742,7 @@ dest(FILE *file, const struct gen_device_info *devinfo, 
>> brw_inst *inst)
>>   string(file, "g[a0");
>>   if (brw_inst_dst_ia_subreg_nr(devinfo, inst))
>>  format(file, ".%"PRIu64, brw_inst_dst_ia_subreg_nr(devinfo, 
>> inst) /
>> -   reg_type_size[brw_inst_dst_reg_type(devinfo, inst)]);
>> +   elem_size);
>>   if (brw_inst_dst_ia1_addr_imm(devinfo, inst))
>>  format(file, " %d", brw_inst_dst_ia1_addr_imm(devinfo, inst));
>>   string(file, "]<");
>> @@ -773,7 +760,7 @@ dest(FILE *file, const struct gen_device_info *devinfo, 
>> brw_inst *inst)
>>  return 0;
>>   if (brw_inst_dst_da16_subreg_nr(devinfo, inst))
>>  format(file, ".%"PRIu64, brw_inst_dst_da16_subreg_nr(devinfo, 
>> inst) /
>> -   reg_type_size[brw_inst_dst_reg_type(devinfo, inst)]);
>> +   elem_size);
>>   string(file, "<1>");
>>   err |= control(file, "writemask", writemask,
>>  brw_inst_da16_writemask(devinfo, inst), NULL);
>> @@ -850,8 +837,10 @@ src_da1(FILE *file,
>> err |= reg(file, _reg_file, reg_num);
>> if (err == -1)
>>return 0;
>> -   if (sub_reg_num)
>> -  format(file, ".%d", sub_reg_num / reg_type_size[type]);   /* use 
>> formal style like spec */
>> +   if (sub_reg_num) {
>> +  unsigned elem_size = brw_hw_reg_type_to_size(devinfo, type, 
>> _reg_file);
>> +  format(file, ".%d", sub_reg_num / elem_size);   /* use formal style 
>> like spec */
>> +   }
>> src_align1_region(file, _vert_stride, _width, _horiz_stride);
>> err |= control(file, "src reg encoding", reg_encoding, type, NULL);
>> return err;
>> @@ -936,10 +925,14 @@ src_da16(FILE *file,
>> err |= reg(file, _reg_file, _reg_nr);
>> if (err == -1)
>>return 0;
>> -   if (_subreg_nr)
>> +   if (_subreg_nr) {
>> +  unsigned elem_size =
>> + brw_hw_reg_type_to_size(devinfo, _reg_type, _reg_file);
>> +
>>/* bit4 for subreg number byte addressing. Make this same meaning as
>>   in da1 case, so output looks consistent. */
>> -  format(file, ".%d", 16 / reg_type_size[_reg_type]);
>> +  format(file, ".%d", 16 / elem_size);
>> +   }
>> string(file, "<");
>> err |= control(file, "vert stride", vert_stride, _vert_stride, NULL);
>> string(file, ",4,1>");
>> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
>> b/src/mesa/drivers/dri/i965/brw_eu_emit.c
>> index f3aa2bc..de98102 100644
>> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
>> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
>> @@ -141,6 +141,59 @@ 

Re: [Mesa-dev] [PATCH 14/27] i965: Allow aux buffers to have an offset

2016-12-01 Thread Ben Widawsky

On 16-12-01 14:41:20, Kenneth Graunke wrote:

On Thursday, December 1, 2016 2:09:55 PM PST Ben Widawsky wrote:

From: Ben Widawsky 

Previously our aux buffers (MCS, and HiZ) never had an offset because
they were in their own buffer object. When using the CCS lossless
compression feature, it's desirable to store the data at an offset from
the main framebuffer, ie. share a buffer object. This patch just makes
having an aux offset possible.

Thanks to Ken for helping me find the most simple fix...

Cc: Kenneth Graunke 
Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 3 +--
 src/mesa/drivers/dri/i965/intel_image.h  | 3 +++
 src/mesa/drivers/dri/i965/intel_screen.c | 5 +
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 6c44381..69be3eb 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -143,7 +143,6 @@ brw_emit_surface_state(struct brw_context *brw,
   aux_surf = _surf_s;

   if (mt->mcs_buf) {
- assert(mt->mcs_buf->offset == 0);
  aux_bo = mt->mcs_buf->bo;
  aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
   } else {
@@ -185,7 +184,7 @@ brw_emit_surface_state(struct brw_context *brw,
   uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
   drm_intel_bo_emit_reloc(brw->batch.bo,
   *surf_offset + brw->isl_dev.ss.aux_addr_offset,
-  aux_bo, *aux_addr & 0xfff,
+  aux_bo, *aux_addr - aux_bo->offset64,
   read_domains, write_domains);
}
 }


This seems like two separate patches - one above (fixing SURFACE_STATE
to actually handle non-zero aux offsets), and one below (adding offset
fields for DRI images).  Could we split it?

The one above would get a:
Reviewed-by: Kenneth Graunke 


Sure I'll split it, thanks. It was logically the same in my opinion because
before we added an aux_offset, aux_addr should always have been just the low 12
bits...



I'd need to read the one below in more context.


diff --git a/src/mesa/drivers/dri/i965/intel_image.h 
b/src/mesa/drivers/dri/i965/intel_image.h
index bbda952..7b3c624 100644
--- a/src/mesa/drivers/dri/i965/intel_image.h
+++ b/src/mesa/drivers/dri/i965/intel_image.h
@@ -92,6 +92,9 @@ struct __DRIimageRec {
/** The image was created with EGL_EXT_image_dma_buf_import. */
bool dma_buf_imported;

+   /** The image has some ancillary data associated with it at offset. */
+   uint32_t aux_offset;
+
/**
 * Provided by EGL_EXT_image_dma_buf_import.
 * \{
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index e0402cf..0f19a6e 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -572,6 +572,11 @@ create_image_with_modifier(struct intel_screen *screen,
image->tile_y = I915_TILING_Y;
image->modifier = modifier;

+   if (image->planar_format)
+  assert(image->planar_format->nplanes == 1);
+
+   image->aux_offset = 0; /* y_tiled_height * pitch; */
+
return true;
 }








--
Ben Widawsky, Intel Open Source Technology Center
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/27] i965: Allow aux buffers to have an offset

2016-12-01 Thread Kenneth Graunke
On Thursday, December 1, 2016 2:09:55 PM PST Ben Widawsky wrote:
> From: Ben Widawsky 
> 
> Previously our aux buffers (MCS, and HiZ) never had an offset because
> they were in their own buffer object. When using the CCS lossless
> compression feature, it's desirable to store the data at an offset from
> the main framebuffer, ie. share a buffer object. This patch just makes
> having an aux offset possible.
> 
> Thanks to Ken for helping me find the most simple fix...
> 
> Cc: Kenneth Graunke 
> Signed-off-by: Ben Widawsky 
> ---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 3 +--
>  src/mesa/drivers/dri/i965/intel_image.h  | 3 +++
>  src/mesa/drivers/dri/i965/intel_screen.c | 5 +
>  3 files changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 6c44381..69be3eb 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -143,7 +143,6 @@ brw_emit_surface_state(struct brw_context *brw,
>aux_surf = _surf_s;
>  
>if (mt->mcs_buf) {
> - assert(mt->mcs_buf->offset == 0);
>   aux_bo = mt->mcs_buf->bo;
>   aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
>} else {
> @@ -185,7 +184,7 @@ brw_emit_surface_state(struct brw_context *brw,
>uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
>drm_intel_bo_emit_reloc(brw->batch.bo,
>*surf_offset + brw->isl_dev.ss.aux_addr_offset,
> -  aux_bo, *aux_addr & 0xfff,
> +  aux_bo, *aux_addr - aux_bo->offset64,
>read_domains, write_domains);
> }
>  }

This seems like two separate patches - one above (fixing SURFACE_STATE
to actually handle non-zero aux offsets), and one below (adding offset
fields for DRI images).  Could we split it?

The one above would get a:
Reviewed-by: Kenneth Graunke 

I'd need to read the one below in more context.

> diff --git a/src/mesa/drivers/dri/i965/intel_image.h 
> b/src/mesa/drivers/dri/i965/intel_image.h
> index bbda952..7b3c624 100644
> --- a/src/mesa/drivers/dri/i965/intel_image.h
> +++ b/src/mesa/drivers/dri/i965/intel_image.h
> @@ -92,6 +92,9 @@ struct __DRIimageRec {
> /** The image was created with EGL_EXT_image_dma_buf_import. */
> bool dma_buf_imported;
>  
> +   /** The image has some ancillary data associated with it at offset. */
> +   uint32_t aux_offset;
> +
> /**
>  * Provided by EGL_EXT_image_dma_buf_import.
>  * \{
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index e0402cf..0f19a6e 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -572,6 +572,11 @@ create_image_with_modifier(struct intel_screen *screen,
> image->tile_y = I915_TILING_Y;
> image->modifier = modifier;
>  
> +   if (image->planar_format)
> +  assert(image->planar_format->nplanes == 1);
> +
> +   image->aux_offset = 0; /* y_tiled_height * pitch; */
> +
> return true;
>  }
>  
> 



signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa 13.1.0 release plan

2016-12-01 Thread Timothy Arceri
On Wed, 2016-11-30 at 20:23 +, Emil Velikov wrote:
> Hi all,
> 
> With holidays not far off, it might be a nice idea to consider the
> branchpoint/release schedule for the next release.
> 
> I will be having limited internet access during 20 Dec - 7 Jan, thus
> the I'm leaning towards following:
>  Jan 13 2017 - Feature freeze/Release candidate 1
>  Jan 20 2017 - Release candidate 2
>  Jan 27 2017 - Release candidate 3
>  Feb 03 2017 - Release candidate 4/final release
> 
> How does this align with people's schedules ?
> 
> Please let me know if you have any work we want to land before the
> next branchpoint.

I'd really like to land the on-disk shader cache (although disabled by
default for this release).

Bump again for reviews on these series:

https://patchwork.freedesktop.org/series/15613/

https://patchwork.freedesktop.org/series/15775/

https://patchwork.freedesktop.org/series/16072/

There are a lot of patches but they really are pretty straight forward
especially in the first two series. Most of this can land separately so
once again even partial reviews appreciated.

We have a lot of interest from users and from Valve in this series so
it would be a shame to let it slip yet another release.

Thanks,
Tim


> 
> Thanks
> Emil
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/27] i965: Handle Y-tile modifier

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

This doesn't actually enable Y-tiling, it simply parses it and moves on.

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_image.h  |  1 +
 src/mesa/drivers/dri/i965/intel_screen.c | 36 +---
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_image.h 
b/src/mesa/drivers/dri/i965/intel_image.h
index fd63919..bbda952 100644
--- a/src/mesa/drivers/dri/i965/intel_image.h
+++ b/src/mesa/drivers/dri/i965/intel_image.h
@@ -80,6 +80,7 @@ struct __DRIimageRec {
uint32_t strides[3];
uint32_t offsets[3];
struct intel_image_format *planar_format;
+   uint64_t modifier; /** fb modifier (fourcc) */
 
/* particular miptree level */
GLuint width;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index b5bb4a0..e54332f 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -23,6 +23,7 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -547,11 +548,22 @@ __intel_create_image(__DRIscreen *dri_screen,
 {
__DRIimage *image;
struct intel_screen *screen = dri_screen->driverPrivate;
-   uint32_t tiling;
+   uint32_t tiling = I915_TILING_X;
int cpp;
unsigned long pitch;
 
-   tiling = I915_TILING_X;
+   image = intel_allocate_image(screen, format, loaderPrivate);
+   if (image == NULL)
+  return NULL;
+
+   for (int i = 0; i < count; i++) {
+  switch (modifiers[i]) {
+  case I915_FORMAT_MOD_Y_TILED:
+ image->modifier = I915_FORMAT_MOD_Y_TILED;
+ break;
+  }
+   }
+
if (use & __DRI_IMAGE_USE_CURSOR) {
   if (width != 64 || height != 64)
 return NULL;
@@ -561,10 +573,6 @@ __intel_create_image(__DRIscreen *dri_screen,
if (use & __DRI_IMAGE_USE_LINEAR)
   tiling = I915_TILING_NONE;
 
-   image = intel_allocate_image(screen, format, loaderPrivate);
-   if (image == NULL)
-  return NULL;
-
cpp = _mesa_get_format_bytes(image->format);
image->bo = drm_intel_bo_alloc_tiled(screen->bufmgr, "image",
 width, height, cpp, ,
@@ -597,8 +605,20 @@ intel_create_image_with_modifiers(__DRIscreen *dri_screen,
   const unsigned count,
   void *loaderPrivate)
 {
-   return __intel_create_image(dri_screen, width, height, format, use, NULL, 0,
-   loaderPrivate);
+   uint64_t local_mods[count];
+   int local_count = 0;
+
+   /* This compacts the actual modifiers to the ones we know how to handle */
+   for (int i = 0; i < count; i++) {
+  switch (modifiers[i]) {
+  case I915_FORMAT_MOD_Y_TILED:
+ local_mods[local_count++] = I915_FORMAT_MOD_Y_TILED;
+ break;
+  }
+   }
+
+   return __intel_create_image(dri_screen, width, height, format, use,
+   local_mods, local_count, loaderPrivate);
 }
 
 static GLboolean
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 27/27] i965: Handle compression modifier

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

FINISHME: Use the kernel's final choice for the fb modifier

bwidawsk@norris2:~/intel-gfx/kmscube (modifiers $) 
~/scripts/measure_bandwidth.sh ./kmscube none
Read bandwidth: 603.91 MiB/s
Write bandwidth: 615.28 MiB/s
bwidawsk@norris2:~/intel-gfx/kmscube (modifiers $) 
~/scripts/measure_bandwidth.sh ./kmscube ytile
Read bandwidth: 571.13 MiB/s
Write bandwidth: 555.51 MiB/s
bwidawsk@norris2:~/intel-gfx/kmscube (modifiers $) 
~/scripts/measure_bandwidth.sh ./kmscube ccs
Read bandwidth: 259.34 MiB/s
Write bandwidth: 337.83 MiB/s

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 7ee21ee..36639f4 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -637,9 +637,15 @@ __intel_create_image(__DRIscreen *dri_screen,
if (use & __DRI_IMAGE_USE_LINEAR)
   tiling = I915_TILING_NONE;
 
+   /* This determines the prioritized modifier to pick for later */
for (int i = 0; i < count; i++) {
   switch (modifiers[i]) {
   case I915_FORMAT_MOD_Y_TILED:
+ /* Y-tiling is the lowest priority modifier */
+ if (modifier)
+continue;
+ /* fallthrough */
+  case /* I915_FORMAT_MOD_CCS */ fourcc_mod_code(INTEL, 4):
  /* Kernel provides no way to query support for this. Assume GEN check
   * is enough :/
   */
@@ -649,12 +655,11 @@ __intel_create_image(__DRIscreen *dri_screen,
  }
 
  if (tiling == I915_TILING_NONE) {
-_mesa_warning(NULL, "Invalid use/modifier combination (%x %llx)\n",
-  use, I915_FORMAT_MOD_Y_TILED);
+_mesa_warning(NULL, "Invalid use/modifier combination (%x %" 
PRIx64")\n",
+  use, modifiers[i]);
 continue;
  }
-
- modifier = I915_FORMAT_MOD_Y_TILED;
+ modifier = modifiers[i];
  break;
   }
}
@@ -707,7 +712,8 @@ intel_create_image_with_modifiers(__DRIscreen *dri_screen,
for (int i = 0; i < count; i++) {
   switch (modifiers[i]) {
   case I915_FORMAT_MOD_Y_TILED:
- local_mods[local_count++] = I915_FORMAT_MOD_Y_TILED;
+  case /* I915_FORMAT_MOD_CCS */ fourcc_mod_code(INTEL, 4):
+ local_mods[local_count++] = modifiers[i];
  break;
   }
}
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/27] i965: Pretend that CCS modified images are two planes

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 91eb7ec..f40761a 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -747,7 +747,7 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
case __DRI_IMAGE_ATTRIB_FOURCC:
   return intel_lookup_fourcc(image->dri_format, value);
case __DRI_IMAGE_ATTRIB_NUM_PLANES:
-  *value = 1;
+  *value = image->aux_offset ? 2: 1;
   return true;
case __DRI_IMAGE_ATTRIB_OFFSET:
   *value = image->offset;
@@ -956,9 +956,17 @@ intel_from_planar(__DRIimage *parent, int plane, void 
*loaderPrivate)
 int width, height, offset, stride, dri_format, index;
 struct intel_image_format *f;
 __DRIimage *image;
-
-if (parent == NULL || parent->planar_format == NULL)
-return NULL;
+bool is_aux = parent->aux_offset && plane == 1;
+
+if (parent == NULL || parent->planar_format == NULL) {
+   if (is_aux) {
+  offset = parent->aux_offset;
+  stride = ALIGN(parent->pitch / 32, 128);
+  dri_format = parent->dri_format;
+  goto done;
+   }
+   return NULL;
+}
 
 f = parent->planar_format;
 
@@ -972,11 +980,13 @@ intel_from_planar(__DRIimage *parent, int plane, void 
*loaderPrivate)
 offset = parent->offsets[index];
 stride = parent->strides[index];
 
+done:
 image = intel_allocate_image(parent->screen, dri_format, loaderPrivate);
 if (image == NULL)
return NULL;
 
-if (offset + height * stride > parent->bo->size) {
+if (!is_aux &&
+offset + height * stride > parent->bo->size) {
_mesa_warning(NULL, "intel_create_sub_image: subimage out of bounds");
free(image);
return NULL;
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/27] gbm: Export a per plane getter for stride

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Signed-off-by: Ben Widawsky 
---
 src/gbm/backends/dri/gbm_dri.c | 26 +-
 src/gbm/gbm-symbols-check  |  1 +
 src/gbm/main/gbm.c | 15 ++-
 src/gbm/main/gbm.h |  3 +++
 4 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index f3ca228..67d67d8 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -625,7 +625,31 @@ gbm_dri_bo_get_planes(struct gbm_bo *_bo)
 static uint32_t
 gbm_dri_bo_get_stride(struct gbm_bo *_bo, int plane)
 {
-   return _bo->stride;
+   struct gbm_dri_device *dri = gbm_dri_device(_bo->gbm);
+   struct gbm_dri_bo *bo = gbm_dri_bo(_bo);
+   __DRIimage *image;
+   int stride = 0;
+
+   if (!dri->image || dri->image->base.version < 11 || 
!dri->image->fromPlanar) {
+  errno = ENOSYS;
+  return 0;
+   }
+
+   if (bo->image == NULL)
+  return _bo->stride;
+
+   if (plane >= get_number_planes(dri, bo->image))
+  return 0;
+
+   image = dri->image->fromPlanar(bo->image, plane, NULL);
+   if (!image) {
+  /* Use the parent stride */
+  image = bo->image;
+   }
+
+   dri->image->queryImage(image, __DRI_IMAGE_ATTRIB_STRIDE, );
+
+   return (uint32_t)stride;
 }
 
 static void
diff --git a/src/gbm/gbm-symbols-check b/src/gbm/gbm-symbols-check
index 8c4da1b..944f3e1 100755
--- a/src/gbm/gbm-symbols-check
+++ b/src/gbm/gbm-symbols-check
@@ -14,6 +14,7 @@ gbm_bo_unmap
 gbm_bo_get_width
 gbm_bo_get_height
 gbm_bo_get_stride
+gbm_bo_get_stride_for_plane
 gbm_bo_get_format
 gbm_bo_get_device
 gbm_bo_get_handle
diff --git a/src/gbm/main/gbm.c b/src/gbm/main/gbm.c
index 14c31ad..1c19540 100644
--- a/src/gbm/main/gbm.c
+++ b/src/gbm/main/gbm.c
@@ -165,7 +165,20 @@ gbm_bo_get_height(struct gbm_bo *bo)
 GBM_EXPORT uint32_t
 gbm_bo_get_stride(struct gbm_bo *bo)
 {
-   return bo->gbm->bo_get_stride(bo, 0);
+   return gbm_bo_get_stride_for_plane(bo, 0);
+}
+
+/** Get the stride for the given plane
+ *
+ * \param bo The buffer object
+ * \param plane for which you want the stride
+ *
+ * \sa gbm_bo_get_stride()
+ */
+GBM_EXPORT uint32_t
+gbm_bo_get_stride_for_plane(struct gbm_bo *bo, int plane)
+{
+   return bo->gbm->bo_get_stride(bo, plane);
 }
 
 /** Get the format of the buffer object
diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h
index b4873ab..3e104d1 100644
--- a/src/gbm/main/gbm.h
+++ b/src/gbm/main/gbm.h
@@ -304,6 +304,9 @@ uint32_t
 gbm_bo_get_stride(struct gbm_bo *bo);
 
 uint32_t
+gbm_bo_get_stride_for_plane(struct gbm_bo *bo, int plane);
+
+uint32_t
 gbm_bo_get_format(struct gbm_bo *bo);
 
 struct gbm_device *
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 18/27] i965/miptree: Add a return for updating of winsys

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

There is nothing particularly useful to do currently if the update
fails, but there is no point carrying on either. As a result, this has a
behavior change.

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/brw_context.c   | 14 --
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  6 +++---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  2 +-
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index b928f94..593fa67 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1645,9 +1645,10 @@ intel_process_dri2_buffer(struct brw_context *brw,
   return;
}
 
-   intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
-drawable->w, drawable->h,
-buffer->pitch);
+   if (intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
+drawable->w, drawable->h,
+buffer->pitch))
+  return;
 
if (_mesa_is_front_buffer_drawing(fb) &&
(buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
@@ -1703,9 +1704,10 @@ intel_update_image_buffer(struct brw_context *intel,
if (last_mt && last_mt->bo == buffer->bo)
   return;
 
-   intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
-buffer->width, buffer->height,
-buffer->pitch);
+   if (intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
+buffer->width, buffer->height,
+buffer->pitch))
+  return;
 
if (_mesa_is_front_buffer_drawing(fb) &&
buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index d002546..74db507 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -908,7 +908,7 @@ intel_miptree_create_for_image(struct brw_context *intel,
  * that will contain the actual rendering (which is lazily resolved to
  * irb->singlesample_mt).
  */
-void
+int
 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
  struct intel_renderbuffer *irb,
  drm_intel_bo *bo,
@@ -974,12 +974,12 @@ intel_update_winsys_renderbuffer_miptree(struct 
brw_context *intel,
  irb->mt = multisample_mt;
   }
}
-   return;
+   return 0;
 
 fail:
intel_miptree_release(>singlesample_mt);
intel_miptree_release(>mt);
-   return;
+   return -1;
 }
 
 struct intel_mipmap_tree*
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 7b9a7be..85fe118 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -726,7 +726,7 @@ intel_miptree_create_for_image(struct brw_context *intel,
uint32_t pitch,
uint32_t layout_flags);
 
-void
+int
 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
  struct intel_renderbuffer *irb,
  drm_intel_bo *bo,
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/27] i965: Bring back always Y-tiled on SKL+

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

commit 6a0d036483caf87d43ebe2edd1905873446c9589
Author: Ben Widawsky 
Date:   Thu Apr 21 20:14:58 2016 -0700

i965: Always use Y-tiled buffers on SKL+

Aside from the benchmark gains that were initially posted, I was able to
collect memory bandwidth data running kmscube (19x12 display).

bwidawsk@norris2:~/intel-gfx/kmscube (modifiers $) 
~/scripts/measure_bandwidth.sh ./kmscube none
Read bandwidth: 603.37 MiB/s
Write bandwidth: 619.20 MiB/s
bwidawsk@norris2:~/intel-gfx/kmscube (modifiers $) 
~/scripts/measure_bandwidth.sh ./kmscube ytile
Read bandwidth: 572.56 MiB/s
Write bandwidth: 556.95 MiB/s

v2: Use new modifier mechanism, and expose new DRIimage version

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 24 ++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index afe3189..d81524f 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -559,6 +559,14 @@ __intel_create_image(__DRIscreen *dri_screen,
for (int i = 0; i < count; i++) {
   switch (modifiers[i]) {
   case I915_FORMAT_MOD_Y_TILED:
+ /* Kernel provides no way to query support for this. Assume GEN check
+  * is enough :/
+  */
+ if (screen->devinfo.gen < 9) {
+_mesa_warning(NULL, "Invalid Y-tiling parameter\n");
+continue;
+ }
+
  image->modifier = I915_FORMAT_MOD_Y_TILED;
  break;
   }
@@ -573,6 +581,11 @@ __intel_create_image(__DRIscreen *dri_screen,
if (use & __DRI_IMAGE_USE_LINEAR)
   tiling = I915_TILING_NONE;
 
+   if (image->modifier == I915_FORMAT_MOD_Y_TILED) {
+  assert(tiling != I915_TILING_NONE);
+  tiling = I915_TILING_Y;
+   }
+
cpp = _mesa_get_format_bytes(image->format);
image->bo = drm_intel_bo_alloc_tiled(screen->bufmgr, "image",
 width, height, cpp, ,
@@ -584,6 +597,7 @@ __intel_create_image(__DRIscreen *dri_screen,
image->width = width;
image->height = height;
image->pitch = pitch;
+   image->tile_y = tiling == I915_TILING_Y;
 
return image;
 }
@@ -901,7 +915,7 @@ intel_from_planar(__DRIimage *parent, int plane, void 
*loaderPrivate)
 }
 
 static const __DRIimageExtension intelImageExtension = {
-.base = { __DRI_IMAGE, 13 },
+.base = { __DRI_IMAGE, 14 },
 
 .createImageFromName= intel_create_image_from_name,
 .createImageFromRenderbuffer= intel_create_image_from_renderbuffer,
@@ -1776,7 +1790,13 @@ intelAllocateBuffer(__DRIscreen *dri_screen,
   return NULL;
 
/* The front and back buffers are color buffers, which are X tiled. */
-   uint32_t tiling = I915_TILING_X;
+   uint32_t tiling;
+   if (screen->devinfo.gen >= 9) {
+  tiling = I915_TILING_Y;
+   } else {
+  tiling = I915_TILING_X;
+   }
+
unsigned long pitch;
int cpp = format / 8;
intelBuffer->bo = drm_intel_bo_alloc_tiled(screen->bufmgr,
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/27] i965: Create correctly sized mcs for an image

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 37 
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 0f19a6e..91eb7ec 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -545,8 +545,11 @@ create_image_with_modifier(struct intel_screen *screen,
 {
uint32_t tiling;
unsigned long pitch;
+   unsigned ccs_height = 0;
 
switch (modifier) {
+   case /* I915_FORMAT_MOD_CCS */ fourcc_mod_code(INTEL, 4):
+  ccs_height = ALIGN(DIV_ROUND_UP(height, 16), 32);
case I915_FORMAT_MOD_Y_TILED:
   tiling = I915_TILING_Y;
}
@@ -554,10 +557,35 @@ create_image_with_modifier(struct intel_screen *screen,
/* For now, all modifiers require some tiling */
assert(tiling);
 
+   /*
+* CCS width is always going to be less than or equal to the image's width.
+* All we need to do is make sure we add extra rows (height) for the CCS.
+*
+* A pair of CCS bits correspond to 8x4 pixels, and must be cacheline
+* granularity. Each CCS tile is laid out in 8b strips, which corresponds to
+* 1024x512 pixel region. In memory, it looks like the following:
+*
+* ┌─┐
+* │ │
+* │ │
+* │ │
+* │  Image  │
+* │ │
+* │ │
+* │x│
+* ├─┬───┘
+* │ │   |
+* │ccs  │  unused   |
+* └─┘---┘
+* <--pitch-->
+*/
+   unsigned y_tiled_height = ALIGN(height, 32);
+
cpp = _mesa_get_format_bytes(image->format);
-   image->bo = drm_intel_bo_alloc_tiled(screen->bufmgr, "image+mod",
-width, height, cpp, ,
-, 0);
+   image->bo = drm_intel_bo_alloc_tiled(screen->bufmgr,
+ccs_height ? "image+ccs" : "image",
+width, y_tiled_height + ccs_height,
+cpp, , , 0);
if (image->bo == NULL)
   return false;
 
@@ -575,7 +603,8 @@ create_image_with_modifier(struct intel_screen *screen,
if (image->planar_format)
   assert(image->planar_format->nplanes == 1);
 
-   image->aux_offset = 0; /* y_tiled_height * pitch; */
+   if (ccs_height)
+  image->aux_offset = y_tiled_height * pitch;
 
return true;
 }
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/27] i965: Separate image allocation with modifiers

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Since the code doesn't support modifiers yet, this patch should do
nothing other than prepare for more patches.

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 73 ++--
 1 file changed, 60 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index d81524f..e0402cf 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -538,6 +538,43 @@ intel_destroy_image(__DRIimage *image)
free(image);
 }
 
+static int
+create_image_with_modifier(struct intel_screen *screen,
+ __DRIimage *image, uint64_t modifier,
+ int width, int height, int cpp)
+{
+   uint32_t tiling;
+   unsigned long pitch;
+
+   switch (modifier) {
+   case I915_FORMAT_MOD_Y_TILED:
+  tiling = I915_TILING_Y;
+   }
+
+   /* For now, all modifiers require some tiling */
+   assert(tiling);
+
+   cpp = _mesa_get_format_bytes(image->format);
+   image->bo = drm_intel_bo_alloc_tiled(screen->bufmgr, "image+mod",
+width, height, cpp, ,
+, 0);
+   if (image->bo == NULL)
+  return false;
+
+   if (tiling != I915_TILING_Y) {
+  drm_intel_bo_unreference(image->bo);
+  return false;
+   }
+
+   image->width = width;
+   image->height = height;
+   image->pitch = pitch;
+   image->tile_y = I915_TILING_Y;
+   image->modifier = modifier;
+
+   return true;
+}
+
 static __DRIimage *
 __intel_create_image(__DRIscreen *dri_screen,
   int width, int height, int format,
@@ -551,11 +588,21 @@ __intel_create_image(__DRIscreen *dri_screen,
uint32_t tiling = I915_TILING_X;
int cpp;
unsigned long pitch;
+   uint64_t modifier = 0;
 
image = intel_allocate_image(screen, format, loaderPrivate);
if (image == NULL)
   return NULL;
 
+   if (use & __DRI_IMAGE_USE_CURSOR) {
+  if (width != 64 || height != 64)
+return NULL;
+  tiling = I915_TILING_NONE;
+   }
+
+   if (use & __DRI_IMAGE_USE_LINEAR)
+  tiling = I915_TILING_NONE;
+
for (int i = 0; i < count; i++) {
   switch (modifiers[i]) {
   case I915_FORMAT_MOD_Y_TILED:
@@ -567,26 +614,26 @@ __intel_create_image(__DRIscreen *dri_screen,
 continue;
  }
 
- image->modifier = I915_FORMAT_MOD_Y_TILED;
+ if (tiling == I915_TILING_NONE) {
+_mesa_warning(NULL, "Invalid use/modifier combination (%x %llx)\n",
+  use, I915_FORMAT_MOD_Y_TILED);
+continue;
+ }
+
+ modifier = I915_FORMAT_MOD_Y_TILED;
  break;
   }
}
 
-   if (use & __DRI_IMAGE_USE_CURSOR) {
-  if (width != 64 || height != 64)
-return NULL;
-  tiling = I915_TILING_NONE;
-   }
-
-   if (use & __DRI_IMAGE_USE_LINEAR)
-  tiling = I915_TILING_NONE;
+   cpp = _mesa_get_format_bytes(image->format);
 
-   if (image->modifier == I915_FORMAT_MOD_Y_TILED) {
-  assert(tiling != I915_TILING_NONE);
-  tiling = I915_TILING_Y;
+   if (modifier) {
+  if (create_image_with_modifier(screen, image, modifier, width,
+ height, cpp)) {
+ return image;
+  }
}
 
-   cpp = _mesa_get_format_bytes(image->format);
image->bo = drm_intel_bo_alloc_tiled(screen->bufmgr, "image",
 width, height, cpp, ,
 , 0);
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 21/27] i965: Make CCS stride match kernel's expectations

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Cc: Ville Syrjälä 
Cc: Jason Ekstrand 
Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index f40761a..7ee21ee 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -961,7 +961,7 @@ intel_from_planar(__DRIimage *parent, int plane, void 
*loaderPrivate)
 if (parent == NULL || parent->planar_format == NULL) {
if (is_aux) {
   offset = parent->aux_offset;
-  stride = ALIGN(parent->pitch / 32, 128);
+  stride = ALIGN(parent->pitch / 64, 128);
   dri_format = parent->dri_format;
   goto done;
}
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 24/27] i965: Add new resolve hints full and partial

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Upper layers of the code will have the need to specify full or partial
resolves (more on this in the next patch). This code simply adds the new
enums and plumbs it in as minimally as necessary.

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/brw_blorp.c | 3 ++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index f31c732..7079530 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -962,7 +962,8 @@ brw_blorp_resolve_color(struct brw_context *brw, struct 
intel_mipmap_tree *mt,
 
enum blorp_fast_clear_op resolve_op;
if (brw->gen >= 9) {
-  if (surf.aux_usage == ISL_AUX_USAGE_CCS_E)
+  if (surf.aux_usage == ISL_AUX_USAGE_CCS_E &&
+  hint != INTEL_RESOLVE_HINT_CLEAR_COLOR)
  resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
   else
  resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 25470a1..079b4a7 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -947,7 +947,9 @@ intel_miptree_used_for_rendering(const struct brw_context 
*brw,
  */
 enum intel_resolve_hint {
INTEL_RESOLVE_HINT_NO_HINT = 0,
-   INTEL_RESOLVE_HINT_IGNORE_CCS_E
+   INTEL_RESOLVE_HINT_IGNORE_CCS_E,
+   INTEL_RESOLVE_HINT_CLEAR_COLOR,
+   INTEL_RESOLVE_HINT_FULL,
 };
 
 bool
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 26/27] i965: Remove scanout restriction from lossless compression

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Cc: Topi Pohjolainen 
Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b79de08..b297f79 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -157,7 +157,7 @@ intel_miptree_supports_non_msrt_fast_clear(struct 
brw_context *brw,
if (mt->disable_aux_buffers)
   return false;
 
-   if (mt->is_scanout)
+   if (mt->is_scanout && mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS)
   return false;
 
/* This function applies only to non-multisampled render targets. */
@@ -528,10 +528,6 @@ intel_miptree_create_layout(struct brw_context *brw,
   const UNUSED bool is_lossless_compressed_aux =
  brw->gen >= 9 && num_samples == 1 &&
  mt->format == MESA_FORMAT_R_UINT32;
-
-  /* For now, nothing else has this requirement */
-  assert(is_lossless_compressed_aux ||
- (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
}
 
brw_miptree_layout(brw, mt, layout_flags);
@@ -752,11 +748,9 @@ intel_miptree_create(struct brw_context *brw,
* resolves.
*/
   const bool lossless_compression_disabled = INTEL_DEBUG & DEBUG_NO_RBC;
-  assert(!mt->is_scanout);
   const bool is_lossless_compressed =
  unlikely(!lossless_compression_disabled) &&
- brw->gen >= 9 && !mt->is_scanout &&
- intel_miptree_supports_lossless_compressed(brw, mt);
+ brw->gen >= 9 && intel_miptree_supports_lossless_compressed(brw, mt);
 
   if (is_lossless_compressed) {
  intel_miptree_alloc_non_msrt_mcs(brw, mt, is_lossless_compressed);
@@ -1043,7 +1037,7 @@ intel_miptree_release(struct intel_mipmap_tree **mt)
 drm_intel_bo_unreference((*mt)->hiz_buf->aux_base.bo);
  free((*mt)->hiz_buf);
   }
-  if ((*mt)->mcs_buf && !(*mt)->is_scanout) {
+  if ((*mt)->mcs_buf) {
  drm_intel_bo_unreference((*mt)->mcs_buf->bo);
  free((*mt)->mcs_buf);
   }
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 23/27] i965: Plumb resolve hints from miptrees to blorp

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/brw_blorp.c | 3 ++-
 src/mesa/drivers/dri/i965/brw_blorp.h | 3 ++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 84180de..f31c732 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -945,7 +945,8 @@ brw_blorp_clear_color(struct brw_context *brw, struct 
gl_framebuffer *fb,
 
 void
 brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt,
-unsigned level, unsigned layer)
+unsigned level, unsigned layer,
+enum intel_resolve_hint hint)
 {
DBG("%s to mt %p level %u layer %u\n", __FUNCTION__, mt, level, layer);
 
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h 
b/src/mesa/drivers/dri/i965/brw_blorp.h
index 277b00e..8a8e0fc 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -65,7 +65,8 @@ brw_blorp_clear_color(struct brw_context *brw, struct 
gl_framebuffer *fb,
 void
 brw_blorp_resolve_color(struct brw_context *brw,
 struct intel_mipmap_tree *mt,
-unsigned level, unsigned layer);
+unsigned level, unsigned layer,
+enum intel_resolve_hint hint);
 
 void
 intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index d0c23e8..dd71a06 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2353,7 +2353,7 @@ intel_miptree_resolve_color(struct brw_context *brw,
   if (item) {
  assert(item->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED);
 
- brw_blorp_resolve_color(brw, mt, level, start_layer);
+ brw_blorp_resolve_color(brw, mt, level, start_layer, hint);
  intel_resolve_map_remove(item);
  resolved = true;
   }
@@ -2374,7 +2374,7 @@ intel_miptree_all_slices_resolve_color(struct brw_context 
*brw,
>color_resolve_map) {
   assert(map->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED);
 
-  brw_blorp_resolve_color(brw, mt, map->level, map->layer);
+  brw_blorp_resolve_color(brw, mt, map->level, map->layer, hint);
   intel_resolve_map_remove(map);
}
 }
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 22/27] i965: Change resolve flags to enum

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

In the foreseeable future it doesn't seem to make sense to have multiple
resolve flags. What does make sense is to have the caller give an
indication to the lower layers what it things should be done for
resolve. The enum change distinguishes this binary selection.

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/brw_blorp.c |  6 +++---
 src/mesa/drivers/dri/i965/brw_context.c   | 13 +++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 12 ++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 13 -
 4 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 4c1d858..84180de 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -209,12 +209,12 @@ blorp_surf_for_miptree(struct brw_context *brw,
 surf->aux_usage = ISL_AUX_USAGE_NONE;
  }
   } else if (!(safe_aux_usage & (1 << surf->aux_usage))) {
- uint32_t flags = 0;
+ enum intel_resolve_hint hint = 0;
  if (safe_aux_usage & (1 << ISL_AUX_USAGE_CCS_E))
-flags |= INTEL_MIPTREE_IGNORE_CCS_E;
+hint = INTEL_RESOLVE_HINT_IGNORE_CCS_E;
 
  intel_miptree_resolve_color(brw, mt,
- *level, start_layer, num_layers, flags);
+ *level, start_layer, num_layers, hint);
 
  assert(!intel_miptree_has_color_unresolved(mt, *level, 1,
 start_layer, num_layers));
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 00d48c7..cfd7a54 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -261,9 +261,10 @@ intel_update_state(struct gl_context * ctx, GLuint 
new_state)
   /* Sampling engine understands lossless compression and resolving
* those surfaces should be skipped for performance reasons.
*/
-  const int flags = intel_texture_view_requires_resolve(brw, tex_obj) ?
-   0 : INTEL_MIPTREE_IGNORE_CCS_E;
-  intel_miptree_all_slices_resolve_color(brw, tex_obj->mt, flags);
+  const enum intel_resolve_hint hint =
+ intel_texture_view_requires_resolve(brw, tex_obj) ?  0 :
+ INTEL_RESOLVE_HINT_IGNORE_CCS_E;
+  intel_miptree_all_slices_resolve_color(brw, tex_obj->mt, hint);
   brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
 
   if (tex_obj->base.StencilSampling ||
@@ -316,9 +317,9 @@ intel_update_state(struct gl_context * ctx, GLuint 
new_state)
 intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 
  if (irb &&
- intel_miptree_resolve_color(
-brw, irb->mt, irb->mt_level, irb->mt_layer, irb->layer_count,
-INTEL_MIPTREE_IGNORE_CCS_E))
+ intel_miptree_resolve_color(brw, irb->mt, irb->mt_level,
+ irb->mt_layer, irb->layer_count,
+ INTEL_RESOLVE_HINT_IGNORE_CCS_E))
 brw_render_cache_set_check_flush(brw, irb->mt->bo);
   }
}
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index cfecfd1..d0c23e8 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2306,7 +2306,7 @@ intel_miptree_used_for_rendering(const struct brw_context 
*brw,
 static bool
 intel_miptree_needs_color_resolve(const struct brw_context *brw,
   const struct intel_mipmap_tree *mt,
-  int flags)
+  enum intel_resolve_hint hint)
 {
if (mt->no_ccs)
   return false;
@@ -2318,7 +2318,7 @@ intel_miptree_needs_color_resolve(const struct 
brw_context *brw,
 * surfaces called "lossless compressed". These don't need to be always
 * resolved.
 */
-   if ((flags & INTEL_MIPTREE_IGNORE_CCS_E) && is_lossless_compressed)
+   if ((hint == INTEL_RESOLVE_HINT_IGNORE_CCS_E) && is_lossless_compressed)
   return false;
 
/* Fast color clear resolves only make sense for non-MSAA buffers. */
@@ -2332,11 +2332,11 @@ bool
 intel_miptree_resolve_color(struct brw_context *brw,
 struct intel_mipmap_tree *mt, unsigned level,
 unsigned start_layer, unsigned num_layers,
-int flags)
+enum intel_resolve_hint hint)
 {
intel_miptree_check_color_resolve(brw, mt, level, start_layer);
 
-   if (!intel_miptree_needs_color_resolve(brw, mt, flags))
+   if (!intel_miptree_needs_color_resolve(brw, mt, hint))
   return false;
 
/* Arrayed fast clear is only supported for gen8+. */
@@ -2365,9 +2365,9 @@ 

[Mesa-dev] [PATCH 02/27] gbm: Fix width height getters return type (trivial)

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Signed-off-by: Ben Widawsky 
---
 src/gbm/main/gbm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h
index 59daaa1..efb329e 100644
--- a/src/gbm/main/gbm.h
+++ b/src/gbm/main/gbm.h
@@ -294,10 +294,10 @@ gbm_bo_map(struct gbm_bo *bo,
 void
 gbm_bo_unmap(struct gbm_bo *bo, void *map_data);
 
-uint32_t
+unsigned int
 gbm_bo_get_width(struct gbm_bo *bo);
 
-uint32_t
+unsigned int
 gbm_bo_get_height(struct gbm_bo *bo);
 
 uint32_t
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/27] i965/miptree: Allocate mt earlier in update winsys

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Allows us to continue utilizing common miptree creation using __DRIimage
without creating a new DRIimage (for the intel_process_dri2_buffer()
case).

This is a bit ugly, but I think it's the best one can do.

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/brw_context.c   | 31 +++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 17 ++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  2 +-
 3 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 593fa67..00d48c7 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1645,10 +1645,24 @@ intel_process_dri2_buffer(struct brw_context *brw,
   return;
}
 
-   if (intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
+   struct intel_mipmap_tree *mt = intel_miptree_create_for_bo(brw,
+  bo,
+  
intel_rb_format(rb),
+  0,
+  drawable->w,
+  drawable->h,
+  1,
+  buffer->pitch,
+  
MIPTREE_LAYOUT_FOR_SCANOUT);
+   if (!mt)
+  return;
+
+   if (intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
 drawable->w, drawable->h,
-buffer->pitch))
+buffer->pitch)) {
+  intel_miptree_release();
   return;
+   }
 
if (_mesa_is_front_buffer_drawing(fb) &&
(buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
@@ -1704,10 +1718,19 @@ intel_update_image_buffer(struct brw_context *intel,
if (last_mt && last_mt->bo == buffer->bo)
   return;
 
-   if (intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
+   struct intel_mipmap_tree *mt = intel_miptree_create_for_image(intel,
+ buffer, 
intel_rb_format(rb), 0,
+ 
buffer->width, buffer->height,
+ 
buffer->pitch, MIPTREE_LAYOUT_FOR_SCANOUT);
+   if (!mt)
+  return;
+
+   if (intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
 buffer->width, buffer->height,
-buffer->pitch))
+buffer->pitch)) {
+  intel_miptree_release();
   return;
+   }
 
if (_mesa_is_front_buffer_drawing(fb) &&
buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 74db507..cfecfd1 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -911,11 +911,10 @@ intel_miptree_create_for_image(struct brw_context *intel,
 int
 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
  struct intel_renderbuffer *irb,
- drm_intel_bo *bo,
+ struct intel_mipmap_tree 
*singlesample_mt,
  uint32_t width, uint32_t height,
  uint32_t pitch)
 {
-   struct intel_mipmap_tree *singlesample_mt = NULL;
struct intel_mipmap_tree *multisample_mt = NULL;
struct gl_renderbuffer *rb = >Base.Base;
mesa_format format = rb->Format;
@@ -926,18 +925,7 @@ intel_update_winsys_renderbuffer_miptree(struct 
brw_context *intel,
 */
assert(_mesa_get_format_base_format(format) == GL_RGB ||
   _mesa_get_format_base_format(format) == GL_RGBA);
-
-   singlesample_mt = intel_miptree_create_for_bo(intel,
- bo,
- format,
- 0,
- width,
- height,
- 1,
- pitch,
- MIPTREE_LAYOUT_FOR_SCANOUT);
-   if (!singlesample_mt)
-  goto fail;
+   assert(singlesample_mt);
 
/* If this miptree is capable of supporting fast color clears, set
 * mcs_state appropriately to ensure that 

[Mesa-dev] [PATCH 04/27] gbm: Create a gbm_device getter for stride

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

This will be used so we can query information per plane.

Signed-off-by: Ben Widawsky 
---
 src/gbm/backends/dri/gbm_dri.c | 7 +++
 src/gbm/main/gbm.c | 2 +-
 src/gbm/main/gbmint.h  | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index c61d56b..f3ca228 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -622,6 +622,12 @@ gbm_dri_bo_get_planes(struct gbm_bo *_bo)
return get_number_planes(dri, bo->image);
 }
 
+static uint32_t
+gbm_dri_bo_get_stride(struct gbm_bo *_bo, int plane)
+{
+   return _bo->stride;
+}
+
 static void
 gbm_dri_bo_destroy(struct gbm_bo *_bo)
 {
@@ -1080,6 +1086,7 @@ dri_device_create(int fd)
dri->base.base.bo_write = gbm_dri_bo_write;
dri->base.base.bo_get_fd = gbm_dri_bo_get_fd;
dri->base.base.bo_get_planes = gbm_dri_bo_get_planes;
+   dri->base.base.bo_get_stride = gbm_dri_bo_get_stride;
dri->base.base.bo_destroy = gbm_dri_bo_destroy;
dri->base.base.destroy = dri_destroy;
dri->base.base.surface_create = gbm_dri_surface_create;
diff --git a/src/gbm/main/gbm.c b/src/gbm/main/gbm.c
index b5e0316..14c31ad 100644
--- a/src/gbm/main/gbm.c
+++ b/src/gbm/main/gbm.c
@@ -165,7 +165,7 @@ gbm_bo_get_height(struct gbm_bo *bo)
 GBM_EXPORT uint32_t
 gbm_bo_get_stride(struct gbm_bo *bo)
 {
-   return bo->stride;
+   return bo->gbm->bo_get_stride(bo, 0);
 }
 
 /** Get the format of the buffer object
diff --git a/src/gbm/main/gbmint.h b/src/gbm/main/gbmint.h
index c6a6701..35d3bcb 100644
--- a/src/gbm/main/gbmint.h
+++ b/src/gbm/main/gbmint.h
@@ -77,6 +77,7 @@ struct gbm_device {
int (*bo_write)(struct gbm_bo *bo, const void *buf, size_t data);
int (*bo_get_fd)(struct gbm_bo *bo);
int (*bo_get_planes)(struct gbm_bo *bo);
+   uint32_t (*bo_get_stride)(struct gbm_bo *bo, int plane);
void (*bo_destroy)(struct gbm_bo *bo);
 
struct gbm_surface *(*surface_create)(struct gbm_device *gbm,
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/27] i965/miptree: Add a helper functions for image creation

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

This provides a common function or creating miptrees when there is an
existing DRIimage to use. That provides an easy way to add CCS
allocation.

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_fbo.c | 17 -
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 25 -
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 10 ++
 src/mesa/drivers/dri/i965/intel_tex_image.c   | 17 -
 4 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
b/src/mesa/drivers/dri/i965/intel_fbo.c
index de0cd6a..f6e1759 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -362,15 +362,14 @@ intel_image_target_renderbuffer_storage(struct gl_context 
*ctx,
 * buffer's content to the main buffer nor for invalidating the aux buffer's
 * content.
 */
-   irb->mt = intel_miptree_create_for_bo(brw,
- image->bo,
- image->format,
- image->offset,
- image->width,
- image->height,
- 1,
- image->pitch,
- MIPTREE_LAYOUT_DISABLE_AUX);
+   irb->mt = intel_miptree_create_for_image(brw,
+image,
+image->format,
+image->offset,
+image->width,
+image->height,
+image->pitch,
+0);
if (!irb->mt)
   return;
 
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index a9b350e..cfa2dc0 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -24,7 +24,6 @@
  */
 
 #include 
-#include 
 
 #include "intel_batchbuffer.h"
 #include "intel_mipmap_tree.h"
@@ -32,6 +31,7 @@
 #include "intel_tex.h"
 #include "intel_blit.h"
 #include "intel_fbo.h"
+#include "intel_image.h"
 
 #include "brw_blorp.h"
 #include "brw_context.h"
@@ -810,6 +810,29 @@ intel_miptree_create_for_bo(struct brw_context *brw,
return mt;
 }
 
+struct intel_mipmap_tree *
+intel_miptree_create_for_image(struct brw_context *intel,
+   __DRIimage *image,
+   mesa_format format,
+   uint32_t offset,
+   uint32_t width,
+   uint32_t height,
+   uint32_t pitch,
+   uint32_t layout_flags)
+{
+   layout_flags = (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) ?
+  MIPTREE_LAYOUT_FOR_SCANOUT : MIPTREE_LAYOUT_DISABLE_AUX;
+   return intel_miptree_create_for_bo(intel,
+  image->bo,
+  format,
+  offset,
+  width,
+  height,
+  1,
+  pitch,
+  layout_flags);
+}
+
 /**
  * For a singlesample renderbuffer, this simply wraps the given BO with a
  * miptree.
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index c67b4de..7b9a7be 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -716,6 +716,16 @@ intel_miptree_create_for_bo(struct brw_context *brw,
 int pitch,
 uint32_t layout_flags);
 
+struct intel_mipmap_tree *
+intel_miptree_create_for_image(struct brw_context *intel,
+   __DRIimage *image,
+   mesa_format format,
+   uint32_t offset,
+   uint32_t width,
+   uint32_t height,
+   uint32_t pitch,
+   uint32_t layout_flags);
+
 void
 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
  struct intel_renderbuffer *irb,
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 141996f..2d79183 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -221,11 +221,11 @@ create_mt_for_planar_dri_image(struct brw_context *brw,
* 

[Mesa-dev] [PATCH 00/27] Renderbuffer Decompression (and GBM modifiers)

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

This patch series ultimately adds support within the i965 driver for
Renderbuffer Decompression with GBM. In short, this feature reduces memory
bandwidth by allowing the GPU to work with losslessly compressed data and having
that compression scheme understood by the display engine for decompression. The
display engine will decompress on the fly and scanout the image.

Quoting from the final patch, the bandwidth savings on a SKL GT4 with a 19x10
display running kmscube:

Without compression:
Read bandwidth: 603.91 MiB/s
Write bandwidth: 615.28 MiB/s

With compression:
Read bandwidth: 259.34 MiB/s
Write bandwidth: 337.83 MiB/s


The hardware achieves this savings by maintaining an auxiliary buffer
containing "opaque" compression information. It's opaque in the sense that the
low level compression scheme is not needed, but, knowledge of the overall
layout of the compressed data is required. The auxiliary buffer is created by
the driver on behalf of the client when requested. That buffer needs to be
passed along wherever the main image's buffer goes.

The overall strategy is that the buffer/surface is created with a list of
modifiers. The list of modifiers the hardware is capable of using will come from
a new kernel API that is aware of the hardware and general constraints. A client
will request the list of modifiers and pass it directly back in during buffer
creation (potentially the client can prune the list, but as of now there is no
reason to.) This new API is being developed by Kristian. I did not get far
enough to play with that.

For EGL, a similar mechanism would exist whereby when importing a buffer into
EGL, one would provide a modifier and probably a pointer to the auxiliary data
upon import. (Import therefore might require multiple dma-buf fds), but for i965
and Intel, this wouldn't be necessary.

Here is a brief description of the series:
1-6 Adds support in GBM for per plane functions where necessary. This is
required because the kernel expects the auxiliary buffer to be passed along as a
plane. It has its own offset, and stride, and the client shouldn't need to
calculate those.

7-9 Adds support in GBM to understand modifiers. When creating a buffer or
surface, the client is expected to pass in a list of modifiers that the driver
will optimally choose from. As a result of this, the GBM APIs need to support
modifiers.

10-12 Support Y-tiled modifier. Y-tiling was already a modifier exposed by the
kernel. With the previous patches in place, it's easy to support this too.

13-26 Plumbing to support sending CCS buffers to display. Leveraging much of the
existing code for MCS buffers, these patches creating an MCS for the scanout
buffer. The trickery here is that a single BO contains both the main surface and
the auxiliary data. Previously, auxiliary data always lived in its own BO.

27 Support CCS-modifier. Finally, the code can parse the CCS fb modifier(s) and
realize the bandwidth savings that come with it.

This was tested using kmscube
(https://github.com/bwidawsk/kmscube/tree/modifiers). The kmscube implementation
is missing support for GET_PLANE2 - which is currently being worked on by
Kristian.

Upstream plan:
1. All of the patches up through 26 should be mergeable today after review.
2. After 1-12 land, client support of Y-tiling should be achievable. Modesetting
driver can probably be updated as can things like Weston. Clients assuming a new
enough kernel should be able to blindly set the y tiled modifier.
3. Once kernel and libdrm support for CCS modifiers, patch 27 can land, however
CCS isn't yet usable, it is only available as a prototype.
4. Kristian's GET_PLANE2 interface needs to be solidified and land.
5. Clients will utilize #3 and #4 to use CCS.
6. Protocol work, EGL, Wayland, DRIX - etc

When Kristian's interface is ready, kmscube can be modified to make use of it.

Rob: are you interested in a PR for kmscube?

Definition of terms:
Renderbuffer Decompression - In the ARM world, this is AFBC. Having the graphics
driver utilize lossless surface compression for the scanout buffer and sending
those surfaces, compressed, to the kernel (via KMS) for the display engine to
directly consume.

Renderbuffer Compression - Utilizing compressed surfaces for many buffer types
(scanout, textures, whatever), and decompressing (ie. resolving) those surfaces
before passing them along.

Ben Widawsky (27):
  gbm: Move getters to match order in header file (trivial)
  gbm: Fix width height getters return type (trivial)
  gbm: Export a plane getter function
  gbm: Create a gbm_device getter for stride
  gbm: Export a per plane getter for stride
  gbm: Export a per plane getter for offset
  i965/dri: Store the screen associated with the image
  dri: Add an image creation with modifiers
  gbm: Introduce modifiers into surface/bo creation
  i965: Handle Y-tile modifier
  gbm: Get modifiers from DRI
  i965: Bring back always Y-tiled on SKL+
  i965: 

[Mesa-dev] [PATCH 08/27] dri: Add an image creation with modifiers

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Modifiers will be obtains or guessed by the client and passed in during
image creation/import.

This requires bumping the DRIimage version.

Signed-off-by: Ben Widawsky 
---
 include/GL/internal/dri_interface.h  | 28 +++-
 src/gallium/state_trackers/dri/dri2.c|  1 +
 src/mesa/drivers/dri/i965/intel_screen.c | 26 +-
 3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/include/GL/internal/dri_interface.h 
b/include/GL/internal/dri_interface.h
index d0b1bc6..657e158 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -1094,7 +1094,7 @@ struct __DRIdri2ExtensionRec {
  * extensions.
  */
 #define __DRI_IMAGE "DRI_IMAGE"
-#define __DRI_IMAGE_VERSION 13
+#define __DRI_IMAGE_VERSION 14
 
 /**
  * These formats correspond to the similarly named MESA_FORMAT_*
@@ -1209,6 +1209,8 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_ATTRIB_NUM_PLANES   0x2009 /* available in versions 11 */
 
 #define __DRI_IMAGE_ATTRIB_OFFSET 0x200A /* available in versions 13 */
+#define __DRI_IMAGE_ATTRIB_MODIFIER_LOWER 0x200B /* available in versions 14 */
+#define __DRI_IMAGE_ATTRIB_MODIFIER_UPPER 0x200C /* available in versions 14 */
 
 enum __DRIYUVColorSpace {
__DRI_YUV_COLOR_SPACE_UNDEFINED = 0,
@@ -1420,6 +1422,30 @@ struct __DRIimageExtensionRec {
 */
void (*unmapImage)(__DRIcontext *context, __DRIimage *image, void *data);
 
+
+   /**
+* Creates an image with implementations favorite modifiers.
+*
+* This acts like createImage except there is a list of modifiers passed in
+* which the implementation may selectively use to create the DRIimage. The
+* result should be the implementation selects one modifier (perhaps it 
would
+* hold on to a few and later pick).
+*
+* The created image should be destroyed with destroyImage().
+*
+* Returns the new DRIimage. The chosen modifier can be obtained later on
+* through some API visible functionality if required.
+*
+* \sa __DRIimageRec::createImage
+*
+* \since 14
+*/
+   __DRIimage *(*createImageWithModifiers)(__DRIscreen *screen,
+   int width, int height, int format,
+   unsigned int use,
+   const uint64_t *modifiers,
+   const unsigned int modifier_count,
+   void *loaderPrivate);
 };
 
 
diff --git a/src/gallium/state_trackers/dri/dri2.c 
b/src/gallium/state_trackers/dri/dri2.c
index 9ec069b..c9fbe84 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -1409,6 +1409,7 @@ static __DRIimageExtension dri2ImageExtension = {
 .getCapabilities  = dri2_get_capabilities,
 .mapImage = dri2_map_image,
 .unmapImage   = dri2_unmap_image,
+.createImageWithModifiers = NULL,
 };
 
 
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 5808bde..b5bb4a0 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -538,9 +538,11 @@ intel_destroy_image(__DRIimage *image)
 }
 
 static __DRIimage *
-intel_create_image(__DRIscreen *dri_screen,
+__intel_create_image(__DRIscreen *dri_screen,
   int width, int height, int format,
   unsigned int use,
+   const uint64_t *modifiers,
+   unsigned count,
   void *loaderPrivate)
 {
__DRIimage *image;
@@ -578,6 +580,27 @@ intel_create_image(__DRIscreen *dri_screen,
return image;
 }
 
+static __DRIimage *
+intel_create_image(__DRIscreen *dri_screen,
+  int width, int height, int format,
+  unsigned int use,
+  void *loaderPrivate)
+{
+   return __intel_create_image(dri_screen, width, height, format, use, NULL, 
0, loaderPrivate);
+}
+
+static __DRIimage *
+intel_create_image_with_modifiers(__DRIscreen *dri_screen,
+  int width, int height, int format,
+  unsigned int use,
+  const uint64_t *modifiers,
+  const unsigned count,
+  void *loaderPrivate)
+{
+   return __intel_create_image(dri_screen, width, height, format, use, NULL, 0,
+   loaderPrivate);
+}
+
 static GLboolean
 intel_query_image(__DRIimage *image, int attrib, int *value)
 {
@@ -870,6 +893,7 @@ static const __DRIimageExtension intelImageExtension = {
 .getCapabilities= NULL,
 .mapImage   = NULL,
 .unmapImage = NULL,
+.createImageWithModifiers   

[Mesa-dev] [PATCH 09/27] gbm: Introduce modifiers into surface/bo creation

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

The idea behind modifiers like this is that the user of GBM will have
some mechanism to query what properties the hardware supports for its BO
or surface. This information is directly passed in (and stored) so that
the DRI implementation can create an image with the appropriate
attributes.

A getter() will be added later so that the user GBM will be able to
query what modifier should be used.

I've opted to store all modifiers passed in during creation and to make
the determination happen at actual creation time for no reason other
than it seems more flexible.

Cc: Kristian Høgsberg 
Signed-off-by: Ben Widawsky 
---
 src/egl/drivers/dri2/platform_drm.c |  7 ---
 src/gbm/backends/dri/gbm_dri.c  | 33 +++--
 src/gbm/gbm-symbols-check   |  2 ++
 src/gbm/main/gbm.c  | 28 ++--
 src/gbm/main/gbm.h  | 12 
 src/gbm/main/gbmint.h   | 16 ++--
 6 files changed, 85 insertions(+), 13 deletions(-)

diff --git a/src/egl/drivers/dri2/platform_drm.c 
b/src/egl/drivers/dri2/platform_drm.c
index 2099314..5c92d2a 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -229,9 +229,10 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)
if (dri2_surf->back == NULL)
   return -1;
if (dri2_surf->back->bo == NULL)
-  dri2_surf->back->bo = gbm_bo_create(_dpy->gbm_dri->base.base,
- surf->base.width, surf->base.height,
- surf->base.format, surf->base.flags);
+  dri2_surf->back->bo = 
gbm_bo_create_with_modifiers(_dpy->gbm_dri->base.base,
+ surf->base.width, 
surf->base.height,
+ surf->base.format, 
surf->base.flags,
+ surf->base.modifiers, 
surf->base.count);
if (dri2_surf->back->bo == NULL)
   return -1;
 
diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index 57d7bbe..b1bbbd1 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -931,7 +931,8 @@ free_bo:
 static struct gbm_bo *
 gbm_dri_bo_create(struct gbm_device *gbm,
   uint32_t width, uint32_t height,
-  uint32_t format, uint32_t usage)
+  uint32_t format, uint32_t usage,
+  const uint64_t *modifiers, const unsigned int count)
 {
struct gbm_dri_device *dri = gbm_dri_device(gbm);
struct gbm_dri_bo *bo;
@@ -996,13 +997,22 @@ gbm_dri_bo_create(struct gbm_device *gbm,
dri_use |= __DRI_IMAGE_USE_SHARE;
 
bo->image =
-  dri->image->createImage(dri->screen,
-  width, height,
-  dri_format, dri_use,
-  bo);
+  dri->image->createImageWithModifiers(dri->screen,
+   width, height,
+   dri_format, dri_use,
+   modifiers, count,
+   bo);
if (bo->image == NULL)
   goto failed;
 
+   bo->base.base.modifiers = calloc(count, sizeof(*modifiers));
+   if (!bo->base.base.modifiers) {
+  dri->image->destroyImage(bo->image);
+  goto failed;
+   }
+   bo->base.base.count = count;
+   memcpy(bo->base.base.modifiers, modifiers, count * sizeof(*modifiers));
+
dri->image->queryImage(bo->image, __DRI_IMAGE_ATTRIB_HANDLE,
   >base.base.handle.s32);
dri->image->queryImage(bo->image, __DRI_IMAGE_ATTRIB_STRIDE,
@@ -1073,7 +1083,8 @@ gbm_dri_bo_unmap(struct gbm_bo *_bo, void *map_data)
 static struct gbm_surface *
 gbm_dri_surface_create(struct gbm_device *gbm,
uint32_t width, uint32_t height,
-  uint32_t format, uint32_t flags)
+  uint32_t format, uint32_t flags,
+   const uint64_t *modifiers, const unsigned count)
 {
struct gbm_dri_surface *surf;
 
@@ -1087,6 +1098,15 @@ gbm_dri_surface_create(struct gbm_device *gbm,
surf->base.format = format;
surf->base.flags = flags;
 
+   surf->base.modifiers = calloc(count, sizeof(*modifiers));
+   if (count && !surf->base.modifiers) {
+  free(surf);
+  return NULL;
+   }
+
+   surf->base.count = count;
+   memcpy(surf->base.modifiers, modifiers, count * sizeof(*modifiers));
+
return >base;
 }
 
@@ -1095,6 +1115,7 @@ gbm_dri_surface_destroy(struct gbm_surface *_surf)
 {
struct gbm_dri_surface *surf = gbm_dri_surface(_surf);
 
+   free(surf->base.modifiers);
free(surf);
 }
 
diff --git a/src/gbm/gbm-symbols-check b/src/gbm/gbm-symbols-check
index f6857dd..ba6186c 100755
--- a/src/gbm/gbm-symbols-check
+++ 

[Mesa-dev] [PATCH 11/27] gbm: Get modifiers from DRI

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

v2: Use stored modifiers from create instead of queryImage

Discussion with Kristian yielded that there is no need for per plane
modifiers.

Signed-off-by: Ben Widawsky 
---
 src/gbm/backends/dri/gbm_dri.c   | 32 
 src/gbm/gbm-symbols-check|  1 +
 src/gbm/main/gbm.c   | 22 ++
 src/gbm/main/gbm.h   |  3 +++
 src/gbm/main/gbmint.h|  5 +
 src/mesa/drivers/dri/i965/intel_screen.c |  6 ++
 6 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index b1bbbd1..355f9e1 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -38,6 +38,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include  /* dri_interface needs GL types */
 #include 
@@ -678,6 +679,28 @@ gbm_dri_bo_get_offset(struct gbm_bo *_bo, int plane)
return (uint32_t)offset;
 }
 
+static uint64_t
+gbm_dri_bo_get_modifier(struct gbm_bo *_bo)
+{
+   struct gbm_dri_device *dri = gbm_dri_device(_bo->gbm);
+   struct gbm_dri_bo *bo = gbm_dri_bo(_bo);
+
+   if (!dri->image || dri->image->base.version < 14) {
+  errno = ENOSYS;
+  return 0;
+   }
+
+   uint64_t ret = 0;
+   int mod;
+   dri->image->queryImage(bo->image, __DRI_IMAGE_ATTRIB_MODIFIER_UPPER, );
+   ret = (uint64_t)mod << 32;
+
+   dri->image->queryImage(bo->image, __DRI_IMAGE_ATTRIB_MODIFIER_LOWER, );
+   ret |= mod;
+
+   return ret;
+}
+
 static void
 gbm_dri_bo_destroy(struct gbm_bo *_bo)
 {
@@ -1005,14 +1028,6 @@ gbm_dri_bo_create(struct gbm_device *gbm,
if (bo->image == NULL)
   goto failed;
 
-   bo->base.base.modifiers = calloc(count, sizeof(*modifiers));
-   if (!bo->base.base.modifiers) {
-  dri->image->destroyImage(bo->image);
-  goto failed;
-   }
-   bo->base.base.count = count;
-   memcpy(bo->base.base.modifiers, modifiers, count * sizeof(*modifiers));
-
dri->image->queryImage(bo->image, __DRI_IMAGE_ATTRIB_HANDLE,
   >base.base.handle.s32);
dri->image->queryImage(bo->image, __DRI_IMAGE_ATTRIB_STRIDE,
@@ -1159,6 +1174,7 @@ dri_device_create(int fd)
dri->base.base.bo_get_planes = gbm_dri_bo_get_planes;
dri->base.base.bo_get_stride = gbm_dri_bo_get_stride;
dri->base.base.bo_get_offset = gbm_dri_bo_get_offset;
+   dri->base.base.bo_get_modifier = gbm_dri_bo_get_modifier;
dri->base.base.bo_destroy = gbm_dri_bo_destroy;
dri->base.base.destroy = dri_destroy;
dri->base.base.surface_create = gbm_dri_surface_create;
diff --git a/src/gbm/gbm-symbols-check b/src/gbm/gbm-symbols-check
index ba6186c..4c977d4 100755
--- a/src/gbm/gbm-symbols-check
+++ b/src/gbm/gbm-symbols-check
@@ -22,6 +22,7 @@ gbm_bo_get_device
 gbm_bo_get_handle
 gbm_bo_get_fd
 gbm_bo_get_plane_count
+gbm_bo_get_modifier
 gbm_bo_write
 gbm_bo_set_user_data
 gbm_bo_get_user_data
diff --git a/src/gbm/main/gbm.c b/src/gbm/main/gbm.c
index 81408ad..e792c0f 100644
--- a/src/gbm/main/gbm.c
+++ b/src/gbm/main/gbm.c
@@ -262,6 +262,28 @@ gbm_bo_get_plane_count(struct gbm_bo *bo)
return bo->gbm->bo_get_planes(bo);
 }
 
+/**
+ * Get the chosen modifier for the buffer object
+ *
+ * This function returns the modifier that was chosen for the object. These
+ * properties may be generic, or platform/implementation
+ * dependent.
+ *
+ * \param bo The buffer object
+ * \return Returns count of modifiers, and modifiers \param is set to point to
+ * the array of modifiers
+ * \sa gbm_bo_create_with_modifiers() where possible modifiers are set
+ * \sa gbm_surface_create_with_modifiers() where possible modifiers are set
+ * \sa define DRM_FORMAT_MOD_* in drm_fourcc.h for possible modifiers
+ */
+
+GBM_EXPORT uint64_t
+gbm_bo_get_modifier(struct gbm_bo *bo)
+{
+
+   return bo->gbm->bo_get_modifier(bo);
+}
+
 /** Write data into the buffer object
  *
  * If the buffer object was created with the GBM_BO_USE_WRITE flag,
diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h
index c573093..7c1c6ae 100644
--- a/src/gbm/main/gbm.h
+++ b/src/gbm/main/gbm.h
@@ -327,6 +327,9 @@ gbm_bo_get_handle(struct gbm_bo *bo);
 int
 gbm_bo_get_fd(struct gbm_bo *bo);
 
+uint64_t
+gbm_bo_get_modifier(struct gbm_bo *bo);
+
 int
 gbm_bo_get_plane_count(struct gbm_bo *bo);
 
diff --git a/src/gbm/main/gbmint.h b/src/gbm/main/gbmint.h
index 75b57e7..682984d 100644
--- a/src/gbm/main/gbmint.h
+++ b/src/gbm/main/gbmint.h
@@ -81,6 +81,7 @@ struct gbm_device {
int (*bo_get_planes)(struct gbm_bo *bo);
uint32_t (*bo_get_stride)(struct gbm_bo *bo, int plane);
uint32_t (*bo_get_offset)(struct gbm_bo *bo, int plane);
+   uint64_t (*bo_get_modifier)(struct gbm_bo *bo);
void (*bo_destroy)(struct gbm_bo *bo);
 
struct gbm_surface *(*surface_create)(struct gbm_device *gbm,
@@ -106,10 +107,6 @@ struct gbm_bo {
uint32_t height;
uint32_t stride;
uint32_t format;
-   struct {

[Mesa-dev] [PATCH 14/27] i965: Allow aux buffers to have an offset

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Previously our aux buffers (MCS, and HiZ) never had an offset because
they were in their own buffer object. When using the CCS lossless
compression feature, it's desirable to store the data at an offset from
the main framebuffer, ie. share a buffer object. This patch just makes
having an aux offset possible.

Thanks to Ken for helping me find the most simple fix...

Cc: Kenneth Graunke 
Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 3 +--
 src/mesa/drivers/dri/i965/intel_image.h  | 3 +++
 src/mesa/drivers/dri/i965/intel_screen.c | 5 +
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 6c44381..69be3eb 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -143,7 +143,6 @@ brw_emit_surface_state(struct brw_context *brw,
   aux_surf = _surf_s;
 
   if (mt->mcs_buf) {
- assert(mt->mcs_buf->offset == 0);
  aux_bo = mt->mcs_buf->bo;
  aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
   } else {
@@ -185,7 +184,7 @@ brw_emit_surface_state(struct brw_context *brw,
   uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
   drm_intel_bo_emit_reloc(brw->batch.bo,
   *surf_offset + brw->isl_dev.ss.aux_addr_offset,
-  aux_bo, *aux_addr & 0xfff,
+  aux_bo, *aux_addr - aux_bo->offset64,
   read_domains, write_domains);
}
 }
diff --git a/src/mesa/drivers/dri/i965/intel_image.h 
b/src/mesa/drivers/dri/i965/intel_image.h
index bbda952..7b3c624 100644
--- a/src/mesa/drivers/dri/i965/intel_image.h
+++ b/src/mesa/drivers/dri/i965/intel_image.h
@@ -92,6 +92,9 @@ struct __DRIimageRec {
/** The image was created with EGL_EXT_image_dma_buf_import. */
bool dma_buf_imported;
 
+   /** The image has some ancillary data associated with it at offset. */
+   uint32_t aux_offset;
+
/**
 * Provided by EGL_EXT_image_dma_buf_import.
 * \{
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index e0402cf..0f19a6e 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -572,6 +572,11 @@ create_image_with_modifier(struct intel_screen *screen,
image->tile_y = I915_TILING_Y;
image->modifier = modifier;
 
+   if (image->planar_format)
+  assert(image->planar_format->nplanes == 1);
+
+   image->aux_offset = 0; /* y_tiled_height * pitch; */
+
return true;
 }
 
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/27] gbm: Move getters to match order in header file (trivial)

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Other things are out of order, but I need to add a getter so I'm just
fixing those.

This helps people adding to GBM know where the right place to put things
is.

Signed-off-by: Ben Widawsky 
---
 src/gbm/main/gbm.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/gbm/main/gbm.c b/src/gbm/main/gbm.c
index 9ef1990..00113fa 100644
--- a/src/gbm/main/gbm.c
+++ b/src/gbm/main/gbm.c
@@ -181,6 +181,17 @@ gbm_bo_get_format(struct gbm_bo *bo)
return bo->format;
 }
 
+/** Get the gbm device used to create the buffer object
+ *
+ * \param bo The buffer object
+ * \return Returns the gbm device with which the buffer object was created
+ */
+GBM_EXPORT struct gbm_device *
+gbm_bo_get_device(struct gbm_bo *bo)
+{
+   return bo->gbm;
+}
+
 /** Get the handle of the buffer object
  *
  * This is stored in the platform generic union gbm_bo_handle type. However
@@ -232,17 +243,6 @@ gbm_bo_write(struct gbm_bo *bo, const void *buf, size_t 
count)
return bo->gbm->bo_write(bo, buf, count);
 }
 
-/** Get the gbm device used to create the buffer object
- *
- * \param bo The buffer object
- * \return Returns the gbm device with which the buffer object was created
- */
-GBM_EXPORT struct gbm_device *
-gbm_bo_get_device(struct gbm_bo *bo)
-{
-   return bo->gbm;
-}
-
 /** Set the user data associated with a buffer object
  *
  * \param bo The buffer object
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/27] i965/miptree: Allocate mcs_buf for an image's CCS_E

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

This code will disable actually creating these buffers for the scanout,
but it puts the allocation in place.

Primarily this patch is split out for review, it can be squashed in
later if preferred.

Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 89 +++
 1 file changed, 77 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index cfa2dc0..d002546 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -58,6 +58,11 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
 struct intel_mipmap_tree *mt,
 GLuint num_samples);
 
+static void
+intel_miptree_init_mcs(struct brw_context *brw,
+   struct intel_mipmap_tree *mt,
+   int init_value);
+
 /**
  * Determine which MSAA layout should be used by the MSAA surface being
  * created, based on the chip generation and the surface type.
@@ -152,6 +157,9 @@ intel_miptree_supports_non_msrt_fast_clear(struct 
brw_context *brw,
if (mt->disable_aux_buffers)
   return false;
 
+   if (mt->is_scanout)
+  return false;
+
/* This function applies only to non-multisampled render targets. */
if (mt->num_samples > 1)
   return false;
@@ -744,6 +752,7 @@ intel_miptree_create(struct brw_context *brw,
* resolves.
*/
   const bool lossless_compression_disabled = INTEL_DEBUG & DEBUG_NO_RBC;
+  assert(!mt->is_scanout);
   const bool is_lossless_compressed =
  unlikely(!lossless_compression_disabled) &&
  brw->gen >= 9 && !mt->is_scanout &&
@@ -810,6 +819,36 @@ intel_miptree_create_for_bo(struct brw_context *brw,
return mt;
 }
 
+static bool
+create_ccs_buf_for_image(struct brw_context *intel,
+ __DRIimage *image,
+ struct intel_mipmap_tree *mt)
+{
+
+   struct isl_surf temp_main_surf;
+   struct isl_surf temp_ccs_surf;
+   uint32_t offset = mt->offset + image->aux_offset;
+
+   intel_miptree_get_isl_surf(intel, mt, _main_surf);
+   if (!isl_surf_get_ccs_surf(>isl_dev, _main_surf, 
_ccs_surf))
+  return false;
+
+   mt->mcs_buf = calloc(1, sizeof(*mt->mcs_buf));
+   mt->mcs_buf->bo = image->bo;
+   drm_intel_bo_reference(image->bo);
+
+   mt->mcs_buf->offset = offset;
+   mt->mcs_buf->size = temp_ccs_surf.size;
+   mt->mcs_buf->pitch = temp_ccs_surf.row_pitch;
+   mt->mcs_buf->qpitch = isl_surf_get_array_pitch_sa_rows(_ccs_surf);
+
+   intel_miptree_init_mcs(intel, mt, 0);
+   mt->no_ccs = false;
+   mt->msaa_layout = INTEL_MSAA_LAYOUT_CMS;
+
+   return true;
+}
+
 struct intel_mipmap_tree *
 intel_miptree_create_for_image(struct brw_context *intel,
__DRIimage *image,
@@ -820,17 +859,43 @@ intel_miptree_create_for_image(struct brw_context *intel,
uint32_t pitch,
uint32_t layout_flags)
 {
-   layout_flags = (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) ?
-  MIPTREE_LAYOUT_FOR_SCANOUT : MIPTREE_LAYOUT_DISABLE_AUX;
-   return intel_miptree_create_for_bo(intel,
-  image->bo,
-  format,
-  offset,
-  width,
-  height,
-  1,
-  pitch,
-  layout_flags);
+   struct intel_mipmap_tree *mt;
+
+   /* Other flags will be ignored, so make sure the caller didn't pass any. */
+   assert((layout_flags & ~MIPTREE_LAYOUT_FOR_SCANOUT) == 0);
+
+   if (!image->aux_offset)
+  layout_flags |= MIPTREE_LAYOUT_DISABLE_AUX;
+   else
+  layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
+
+   mt = intel_miptree_create_for_bo(intel,
+image->bo,
+format,
+offset,
+width,
+height,
+1,
+pitch,
+layout_flags);
+
+   if (!intel_tiling_supports_non_msrt_mcs(intel, mt->tiling)) {
+  assert(image->aux_offset == 0);
+  return mt;
+   }
+
+   if (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX)
+  return mt;
+
+   layout_flags &= ~MIPTREE_LAYOUT_FOR_SCANOUT;
+
+   assert(image->aux_offset);
+   assert(mt->num_samples >= 0);
+   assert(mt->last_level < 2);
+
+   create_ccs_buf_for_image(intel, image, mt);
+
+   return mt;
 }
 
 /**
@@ -991,7 +1056,7 @@ intel_miptree_release(struct intel_mipmap_tree **mt)
 drm_intel_bo_unreference((*mt)->hiz_buf->aux_base.bo);
  

[Mesa-dev] [PATCH 03/27] gbm: Export a plane getter function

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

This will be used by clients that need to know the number of planes
allocated for them on behalf of the GL or other API. The best current
example of this is when an extra "plane" is allocated to store
compression data for the primary plane.

Cc: Daniel Stone 
Signed-off-by: Ben Widawsky 
---
 src/gbm/backends/dri/gbm_dri.c | 25 +
 src/gbm/gbm-symbols-check  |  1 +
 src/gbm/main/gbm.c | 10 ++
 src/gbm/main/gbm.h |  3 +++
 src/gbm/main/gbmint.h  |  1 +
 5 files changed, 40 insertions(+)

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index 45cb42a..c61d56b 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -598,6 +598,30 @@ gbm_dri_bo_get_fd(struct gbm_bo *_bo)
return fd;
 }
 
+static int
+get_number_planes(struct gbm_dri_device *dri, __DRIimage *image)
+{
+   int num_planes = 0;
+   dri->image->queryImage(image, __DRI_IMAGE_ATTRIB_NUM_PLANES, _planes);
+
+   if (num_planes <= 0)
+  num_planes = 1;
+
+   return num_planes;
+}
+
+static int
+gbm_dri_bo_get_planes(struct gbm_bo *_bo)
+{
+   struct gbm_dri_device *dri = gbm_dri_device(_bo->gbm);
+   struct gbm_dri_bo *bo = gbm_dri_bo(_bo);
+
+   if (bo->image == NULL)
+  return -1;
+
+   return get_number_planes(dri, bo->image);
+}
+
 static void
 gbm_dri_bo_destroy(struct gbm_bo *_bo)
 {
@@ -1055,6 +1079,7 @@ dri_device_create(int fd)
dri->base.base.is_format_supported = gbm_dri_is_format_supported;
dri->base.base.bo_write = gbm_dri_bo_write;
dri->base.base.bo_get_fd = gbm_dri_bo_get_fd;
+   dri->base.base.bo_get_planes = gbm_dri_bo_get_planes;
dri->base.base.bo_destroy = gbm_dri_bo_destroy;
dri->base.base.destroy = dri_destroy;
dri->base.base.surface_create = gbm_dri_surface_create;
diff --git a/src/gbm/gbm-symbols-check b/src/gbm/gbm-symbols-check
index 5a333ff..8c4da1b 100755
--- a/src/gbm/gbm-symbols-check
+++ b/src/gbm/gbm-symbols-check
@@ -18,6 +18,7 @@ gbm_bo_get_format
 gbm_bo_get_device
 gbm_bo_get_handle
 gbm_bo_get_fd
+gbm_bo_get_plane_count
 gbm_bo_write
 gbm_bo_set_user_data
 gbm_bo_get_user_data
diff --git a/src/gbm/main/gbm.c b/src/gbm/main/gbm.c
index 00113fa..b5e0316 100644
--- a/src/gbm/main/gbm.c
+++ b/src/gbm/main/gbm.c
@@ -223,6 +223,16 @@ gbm_bo_get_fd(struct gbm_bo *bo)
return bo->gbm->bo_get_fd(bo);
 }
 
+/** Get the number of planes for the given bo.
+ *
+ * \param bo The buffer object
+ * \return The number of planes
+ */
+GBM_EXPORT int
+gbm_bo_get_plane_count(struct gbm_bo *bo)
+{
+   return bo->gbm->bo_get_planes(bo);
+}
 
 /** Write data into the buffer object
  *
diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h
index efb329e..b4873ab 100644
--- a/src/gbm/main/gbm.h
+++ b/src/gbm/main/gbm.h
@@ -316,6 +316,9 @@ int
 gbm_bo_get_fd(struct gbm_bo *bo);
 
 int
+gbm_bo_get_plane_count(struct gbm_bo *bo);
+
+int
 gbm_bo_write(struct gbm_bo *bo, const void *buf, size_t count);
 
 void
diff --git a/src/gbm/main/gbmint.h b/src/gbm/main/gbmint.h
index cfef5ee..c6a6701 100644
--- a/src/gbm/main/gbmint.h
+++ b/src/gbm/main/gbmint.h
@@ -76,6 +76,7 @@ struct gbm_device {
void (*bo_unmap)(struct gbm_bo *bo, void *map_data);
int (*bo_write)(struct gbm_bo *bo, const void *buf, size_t data);
int (*bo_get_fd)(struct gbm_bo *bo);
+   int (*bo_get_planes)(struct gbm_bo *bo);
void (*bo_destroy)(struct gbm_bo *bo);
 
struct gbm_surface *(*surface_create)(struct gbm_device *gbm,
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/27] gbm: Export a per plane getter for offset

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

Unlike stride, there was no previous offset getter, so it can be right
on the first try.

Signed-off-by: Ben Widawsky 
---
 src/gbm/backends/dri/gbm_dri.c | 27 +++
 src/gbm/gbm-symbols-check  |  1 +
 src/gbm/main/gbm.c | 15 +++
 src/gbm/main/gbm.h |  3 +++
 src/gbm/main/gbmint.h  |  1 +
 5 files changed, 47 insertions(+)

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index 67d67d8..57d7bbe 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -652,6 +652,32 @@ gbm_dri_bo_get_stride(struct gbm_bo *_bo, int plane)
return (uint32_t)stride;
 }
 
+static uint32_t
+gbm_dri_bo_get_offset(struct gbm_bo *_bo, int plane)
+{
+   struct gbm_dri_device *dri = gbm_dri_device(_bo->gbm);
+   struct gbm_dri_bo *bo = gbm_dri_bo(_bo);
+   int offset = 0;
+
+   if (!dri->image || dri->image->base.version < 13 || 
!dri->image->fromPlanar) {
+  errno = ENOSYS;
+  return 0;
+   }
+
+   if (plane >= get_number_planes(dri, bo->image))
+  return 0;
+
+   __DRIimage *image = dri->image->fromPlanar(bo->image, plane, NULL);
+   if (!image) {
+  /* Use the parent offset */
+  image = bo->image;
+   }
+
+   dri->image->queryImage(image, __DRI_IMAGE_ATTRIB_OFFSET, );
+
+   return (uint32_t)offset;
+}
+
 static void
 gbm_dri_bo_destroy(struct gbm_bo *_bo)
 {
@@ -,6 +1137,7 @@ dri_device_create(int fd)
dri->base.base.bo_get_fd = gbm_dri_bo_get_fd;
dri->base.base.bo_get_planes = gbm_dri_bo_get_planes;
dri->base.base.bo_get_stride = gbm_dri_bo_get_stride;
+   dri->base.base.bo_get_offset = gbm_dri_bo_get_offset;
dri->base.base.bo_destroy = gbm_dri_bo_destroy;
dri->base.base.destroy = dri_destroy;
dri->base.base.surface_create = gbm_dri_surface_create;
diff --git a/src/gbm/gbm-symbols-check b/src/gbm/gbm-symbols-check
index 944f3e1..f6857dd 100755
--- a/src/gbm/gbm-symbols-check
+++ b/src/gbm/gbm-symbols-check
@@ -16,6 +16,7 @@ gbm_bo_get_height
 gbm_bo_get_stride
 gbm_bo_get_stride_for_plane
 gbm_bo_get_format
+gbm_bo_get_offset
 gbm_bo_get_device
 gbm_bo_get_handle
 gbm_bo_get_fd
diff --git a/src/gbm/main/gbm.c b/src/gbm/main/gbm.c
index 1c19540..1c5e303 100644
--- a/src/gbm/main/gbm.c
+++ b/src/gbm/main/gbm.c
@@ -194,6 +194,21 @@ gbm_bo_get_format(struct gbm_bo *bo)
return bo->format;
 }
 
+/** Get the offset for the data of the specified plane
+ *
+ * Extra planes, and even the first plane, may have an offset from the start of
+ * the buffer object. This function will provide the offset for the given plane
+ * to be used in various KMS APIs.
+ *
+ * \param bo The buffer object
+ * \return The offset
+ */
+GBM_EXPORT uint32_t
+gbm_bo_get_offset(struct gbm_bo *bo, int plane)
+{
+   return bo->gbm->bo_get_offset(bo, plane);
+}
+
 /** Get the gbm device used to create the buffer object
  *
  * \param bo The buffer object
diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h
index 3e104d1..5a5d8d4 100644
--- a/src/gbm/main/gbm.h
+++ b/src/gbm/main/gbm.h
@@ -309,6 +309,9 @@ gbm_bo_get_stride_for_plane(struct gbm_bo *bo, int plane);
 uint32_t
 gbm_bo_get_format(struct gbm_bo *bo);
 
+uint32_t
+gbm_bo_get_offset(struct gbm_bo *bo, int plane);
+
 struct gbm_device *
 gbm_bo_get_device(struct gbm_bo *bo);
 
diff --git a/src/gbm/main/gbmint.h b/src/gbm/main/gbmint.h
index 35d3bcb..5dac336 100644
--- a/src/gbm/main/gbmint.h
+++ b/src/gbm/main/gbmint.h
@@ -78,6 +78,7 @@ struct gbm_device {
int (*bo_get_fd)(struct gbm_bo *bo);
int (*bo_get_planes)(struct gbm_bo *bo);
uint32_t (*bo_get_stride)(struct gbm_bo *bo, int plane);
+   uint32_t (*bo_get_offset)(struct gbm_bo *bo, int plane);
void (*bo_destroy)(struct gbm_bo *bo);
 
struct gbm_surface *(*surface_create)(struct gbm_device *gbm,
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/27] i965/dri: Store the screen associated with the image

2016-12-01 Thread Ben Widawsky
From: Ben Widawsky 

I intend to need to get to the devinfo structure, and storing the screen
is an easy way to do that.

It seems to be the consensus that you cannot share an image between
multiple screens.

Scape-goat: Rob Clark 
Signed-off-by: Ben Widawsky 
---
 src/mesa/drivers/dri/i965/intel_image.h  |  1 +
 src/mesa/drivers/dri/i965/intel_screen.c | 16 ++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_image.h 
b/src/mesa/drivers/dri/i965/intel_image.h
index 9b3816e..fd63919 100644
--- a/src/mesa/drivers/dri/i965/intel_image.h
+++ b/src/mesa/drivers/dri/i965/intel_image.h
@@ -65,6 +65,7 @@ struct intel_image_format {
 };
 
 struct __DRIimageRec {
+   struct intel_screen *screen;
drm_intel_bo *bo;
uint32_t pitch; /**< in bytes */
GLenum internal_format;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index e1c3c19..5808bde 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -349,7 +349,8 @@ static boolean intel_lookup_fourcc(int dri_format, int 
*fourcc)
 }
 
 static __DRIimage *
-intel_allocate_image(int dri_format, void *loaderPrivate)
+intel_allocate_image(struct intel_screen *screen, int dri_format,
+ void *loaderPrivate)
 {
 __DRIimage *image;
 
@@ -357,6 +358,7 @@ intel_allocate_image(int dri_format, void *loaderPrivate)
 if (image == NULL)
return NULL;
 
+image->screen = screen;
 image->dri_format = dri_format;
 image->offset = 0;
 
@@ -407,7 +409,7 @@ intel_create_image_from_name(__DRIscreen *dri_screen,
 __DRIimage *image;
 int cpp;
 
-image = intel_allocate_image(format, loaderPrivate);
+image = intel_allocate_image(screen, format, loaderPrivate);
 if (image == NULL)
return NULL;
 
@@ -557,7 +559,7 @@ intel_create_image(__DRIscreen *dri_screen,
if (use & __DRI_IMAGE_USE_LINEAR)
   tiling = I915_TILING_NONE;
 
-   image = intel_allocate_image(format, loaderPrivate);
+   image = intel_allocate_image(screen, format, loaderPrivate);
if (image == NULL)
   return NULL;
 
@@ -719,9 +721,11 @@ intel_create_image_from_fds(__DRIscreen *dri_screen,
   return NULL;
 
if (f->nplanes == 1)
-  image = intel_allocate_image(f->planes[0].dri_format, loaderPrivate);
+  image = intel_allocate_image(screen, f->planes[0].dri_format,
+   loaderPrivate);
else
-  image = intel_allocate_image(__DRI_IMAGE_FORMAT_NONE, loaderPrivate);
+  image = intel_allocate_image(screen, __DRI_IMAGE_FORMAT_NONE,
+   loaderPrivate);
 
if (image == NULL)
   return NULL;
@@ -824,7 +828,7 @@ intel_from_planar(__DRIimage *parent, int plane, void 
*loaderPrivate)
 offset = parent->offsets[index];
 stride = parent->strides[index];
 
-image = intel_allocate_image(dri_format, loaderPrivate);
+image = intel_allocate_image(parent->screen, dri_format, loaderPrivate);
 if (image == NULL)
return NULL;
 
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nir/gcm: Rework the schedule late loop

2016-12-01 Thread Jason Ekstrand
This fixes a bug in code motion that occurred when the best block is the
same as the schedule early block.  In this case, because we're checking
(lca != def->parent_instr->block) at the top of the loop, we never get to
the check for loop depth so we wouldn't move it out of the loop.  This
commit reworks the loop to be a simple for loop up the dominator chain and
we place the (lca != def->parent_instr->block) check at the end of the
loop.
---
 src/compiler/nir/nir_opt_gcm.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c
index 77eb8e6..9d7f59c 100644
--- a/src/compiler/nir/nir_opt_gcm.c
+++ b/src/compiler/nir/nir_opt_gcm.c
@@ -326,12 +326,13 @@ gcm_schedule_late_def(nir_ssa_def *def, void *void_state)
 * as far outside loops as we can get.
 */
nir_block *best = lca;
-   while (lca != def->parent_instr->block) {
-  assert(lca);
-  if (state->blocks[lca->index].loop_depth <
+   for (nir_block *block = lca; block != NULL; block = block->imm_dom) {
+  if (state->blocks[block->index].loop_depth <
   state->blocks[best->index].loop_depth)
- best = lca;
-  lca = lca->imm_dom;
+ best = block;
+
+  if (block == def->parent_instr->block)
+ break;
}
def->parent_instr->block = best;
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa 13.1.0 release plan

2016-12-01 Thread Marek Olšák
On Wed, Nov 30, 2016 at 9:23 PM, Emil Velikov  wrote:
> Hi all,
>
> With holidays not far off, it might be a nice idea to consider the
> branchpoint/release schedule for the next release.
>
> I will be having limited internet access during 20 Dec - 7 Jan, thus
> the I'm leaning towards following:
>  Jan 13 2017 - Feature freeze/Release candidate 1
>  Jan 20 2017 - Release candidate 2
>  Jan 27 2017 - Release candidate 3
>  Feb 03 2017 - Release candidate 4/final release

Sounds like a good plan for 17.0. :)

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/cmd_buffer: Actually use the stencil dimension

2016-12-01 Thread Nanley Chery
On Wed, Nov 30, 2016 at 05:55:32PM -0800, Jason Ekstrand wrote:
> On Wed, Nov 30, 2016 at 10:20 AM, Nanley Chery 
> wrote:
> 
> > On Tue, Nov 29, 2016 at 05:41:58PM -0800, Jason Ekstrand wrote:
> > > In an attempt to fix 3DSTATE_DEPTH_BUFFER for stencil-only cases, I
> > > accidentally kept setting the SurfaceType to 2D in the stencil-only case
> > > thanks to a copy+paste error.
> > >
> > > Cc: Nanley Chery 
> > > ---
> > >  src/intel/vulkan/genX_cmd_buffer.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > Thanks for the fix! This patch is,
> > Reviewed-by: Nanley Chery 
> >
> 
> Thanks!  Pushed.  Sorry for the mess.
> 

No worries.

> 
> > >
> > > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> > b/src/intel/vulkan/genX_cmd_buffer.c
> > > index 73f4523..f761d9a 100644
> > > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > > @@ -2153,7 +2153,7 @@ cmd_buffer_emit_depth_stencil(struct
> > anv_cmd_buffer *cmd_buffer)
> > > */
> > >anv_batch_emit(_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER),
> > db) {
> > >   if (has_stencil) {
> > > -db.SurfaceType   = SURFTYPE_2D;
> > > +db.SurfaceType   =
> > > depth_stencil_surface_type(image->stencil_surface.isl.
> > dim);
> > >   } else {
> > >  db.SurfaceType   = SURFTYPE_2D;
> > > --
> > > 2.5.0.400.gff86faf
> > >
> > > ___
> > > mesa-dev mailing list
> > > mesa-dev@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] mesa: add support for GL_INTEL_conservative_rasterization

2016-12-01 Thread Ilia Mirkin
Ah, I see. The difference is that it exists, unlike the ARB one :) I was
confusing with the NV variant.

On Dec 1, 2016 2:10 PM, "Ilia Mirkin"  wrote:

Is this different from the arb variant?

On Dec 1, 2016 10:56 AM, "Lionel Landwerlin" 
wrote:

> Signed-off-by: Lionel Landwerlin 
> ---
>  src/compiler/glsl/ast.h  |  5 
>  src/compiler/glsl/ast_to_hir.cpp |  5 
>  src/compiler/glsl/ast_type.cpp   | 16 +++-
>  src/compiler/glsl/glsl_parser.yy | 34 ++
>  src/compiler/glsl/glsl_parser_extras.cpp |  4 +++
>  src/compiler/glsl/glsl_parser_extras.h   |  4 +++
>  src/compiler/glsl/linker.cpp |  3 +++
>  src/compiler/shader_info.h   |  6 +
>  src/mesa/main/api_validate.c | 42
> 
>  src/mesa/main/enable.c   | 12 +
>  src/mesa/main/extensions_table.h |  1 +
>  src/mesa/main/mtypes.h   |  3 +++
>  src/mesa/main/shaderapi.c|  1 +
>  13 files changed, 130 insertions(+), 6 deletions(-)
>
> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
> index df3a744..0e32c36 100644
> --- a/src/compiler/glsl/ast.h
> +++ b/src/compiler/glsl/ast.h
> @@ -610,6 +610,11 @@ struct ast_type_qualifier {
>* Flag set if GL_ARB_post_depth_coverage layout qualifier is
> used.
>*/
>   unsigned post_depth_coverage:1;
> + /**
> +  * Flag set if GL_INTEL_conservartive_rasterization layout
> qualifier
> +  * is used.
> +  */
> + unsigned inner_coverage:1;
>}
>/** \brief Set of flags, accessed by name. */
>q;
> diff --git a/src/compiler/glsl/ast_to_hir.cpp
> b/src/compiler/glsl/ast_to_hir.cpp
> index 2434ce5..1e14d27 100644
> --- a/src/compiler/glsl/ast_to_hir.cpp
> +++ b/src/compiler/glsl/ast_to_hir.cpp
> @@ -3633,6 +3633,11 @@ apply_layout_qualifier_to_variable(const struct
> ast_type_qualifier *qual,
> "valid in fragment shader input layout
> declaration.");
> }
>
> +   if (qual->flags.q.inner_coverage) {
> +  _mesa_glsl_error(loc, state, "inner_coverage layout qualifier only "
> +   "valid in fragment shader input layout
> declaration.");
> +   }
> +
> if (qual->flags.q.post_depth_coverage) {
>_mesa_glsl_error(loc, state, "post_depth_coverage layout qualifier
> only "
> "valid in fragment shader input layout
> declaration.");
> diff --git a/src/compiler/glsl/ast_type.cpp b/src/compiler/glsl/ast_type.c
> pp
> index aa1ae7e..d68e6e2 100644
> --- a/src/compiler/glsl/ast_type.cpp
> +++ b/src/compiler/glsl/ast_type.cpp
> @@ -579,6 +579,7 @@ ast_type_qualifier::validate_in_qualifier(YYLTYPE
> *loc,
>break;
> case MESA_SHADER_FRAGMENT:
>valid_in_mask.flags.q.early_fragment_tests = 1;
> +  valid_in_mask.flags.q.inner_coverage = 1;
>valid_in_mask.flags.q.post_depth_coverage = 1;
>break;
> case MESA_SHADER_COMPUTE:
> @@ -634,11 +635,23 @@ ast_type_qualifier::merge_into_in_qualifier(YYLTYPE
> *loc,
>state->in_qualifier->flags.q.early_fragment_tests = false;
> }
>
> +   if (state->in_qualifier->flags.q.inner_coverage) {
> +  state->fs_inner_coverage = true;
> +  state->in_qualifier->flags.q.inner_coverage = false;
> +   }
> +
> if (state->in_qualifier->flags.q.post_depth_coverage) {
>state->fs_post_depth_coverage = true;
>state->in_qualifier->flags.q.post_depth_coverage = false;
> }
>
> +   if (state->fs_inner_coverage && state->fs_post_depth_coverage) {
> +  _mesa_glsl_error(loc, state,
> +   "inner_coverage & post_depth_coverage layout
> qualifiers "
> +   "are mutally exclusives");
> +  r = false;
> +   }
> +
> /* We allow the creation of multiple cs_input_layout nodes. Coherence
> among
>  * all existing nodes is checked later, when the AST node is
> transformed
>  * into HIR.
> @@ -707,7 +720,7 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
>  "%s '%s':"
>  "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
>  "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
> -"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
> +"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
>  message, name,
>  bad.flags.q.invariant ? " invariant" : "",
>  bad.flags.q.precise ? " precise" : "",
> @@ -768,6 +781,7 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
>  bad.flags.q.vertices ? " vertices" : "",
>  bad.flags.q.subroutine ? " subroutine" : "",
>  bad.flags.q.subroutine_def ? " subroutine_def" : "",
> +bad.flags.q.inner_coverage ? " 

Re: [Mesa-dev] [PATCH 1/2] mesa: add support for GL_INTEL_conservative_rasterization

2016-12-01 Thread Ilia Mirkin
Is this different from the arb variant?

On Dec 1, 2016 10:56 AM, "Lionel Landwerlin" 
wrote:

> Signed-off-by: Lionel Landwerlin 
> ---
>  src/compiler/glsl/ast.h  |  5 
>  src/compiler/glsl/ast_to_hir.cpp |  5 
>  src/compiler/glsl/ast_type.cpp   | 16 +++-
>  src/compiler/glsl/glsl_parser.yy | 34 ++
>  src/compiler/glsl/glsl_parser_extras.cpp |  4 +++
>  src/compiler/glsl/glsl_parser_extras.h   |  4 +++
>  src/compiler/glsl/linker.cpp |  3 +++
>  src/compiler/shader_info.h   |  6 +
>  src/mesa/main/api_validate.c | 42
> 
>  src/mesa/main/enable.c   | 12 +
>  src/mesa/main/extensions_table.h |  1 +
>  src/mesa/main/mtypes.h   |  3 +++
>  src/mesa/main/shaderapi.c|  1 +
>  13 files changed, 130 insertions(+), 6 deletions(-)
>
> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
> index df3a744..0e32c36 100644
> --- a/src/compiler/glsl/ast.h
> +++ b/src/compiler/glsl/ast.h
> @@ -610,6 +610,11 @@ struct ast_type_qualifier {
>* Flag set if GL_ARB_post_depth_coverage layout qualifier is
> used.
>*/
>   unsigned post_depth_coverage:1;
> + /**
> +  * Flag set if GL_INTEL_conservartive_rasterization layout
> qualifier
> +  * is used.
> +  */
> + unsigned inner_coverage:1;
>}
>/** \brief Set of flags, accessed by name. */
>q;
> diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_
> hir.cpp
> index 2434ce5..1e14d27 100644
> --- a/src/compiler/glsl/ast_to_hir.cpp
> +++ b/src/compiler/glsl/ast_to_hir.cpp
> @@ -3633,6 +3633,11 @@ apply_layout_qualifier_to_variable(const struct
> ast_type_qualifier *qual,
> "valid in fragment shader input layout
> declaration.");
> }
>
> +   if (qual->flags.q.inner_coverage) {
> +  _mesa_glsl_error(loc, state, "inner_coverage layout qualifier only "
> +   "valid in fragment shader input layout
> declaration.");
> +   }
> +
> if (qual->flags.q.post_depth_coverage) {
>_mesa_glsl_error(loc, state, "post_depth_coverage layout qualifier
> only "
> "valid in fragment shader input layout
> declaration.");
> diff --git a/src/compiler/glsl/ast_type.cpp b/src/compiler/glsl/ast_type.
> cpp
> index aa1ae7e..d68e6e2 100644
> --- a/src/compiler/glsl/ast_type.cpp
> +++ b/src/compiler/glsl/ast_type.cpp
> @@ -579,6 +579,7 @@ ast_type_qualifier::validate_in_qualifier(YYLTYPE
> *loc,
>break;
> case MESA_SHADER_FRAGMENT:
>valid_in_mask.flags.q.early_fragment_tests = 1;
> +  valid_in_mask.flags.q.inner_coverage = 1;
>valid_in_mask.flags.q.post_depth_coverage = 1;
>break;
> case MESA_SHADER_COMPUTE:
> @@ -634,11 +635,23 @@ ast_type_qualifier::merge_into_in_qualifier(YYLTYPE
> *loc,
>state->in_qualifier->flags.q.early_fragment_tests = false;
> }
>
> +   if (state->in_qualifier->flags.q.inner_coverage) {
> +  state->fs_inner_coverage = true;
> +  state->in_qualifier->flags.q.inner_coverage = false;
> +   }
> +
> if (state->in_qualifier->flags.q.post_depth_coverage) {
>state->fs_post_depth_coverage = true;
>state->in_qualifier->flags.q.post_depth_coverage = false;
> }
>
> +   if (state->fs_inner_coverage && state->fs_post_depth_coverage) {
> +  _mesa_glsl_error(loc, state,
> +   "inner_coverage & post_depth_coverage layout
> qualifiers "
> +   "are mutally exclusives");
> +  r = false;
> +   }
> +
> /* We allow the creation of multiple cs_input_layout nodes. Coherence
> among
>  * all existing nodes is checked later, when the AST node is
> transformed
>  * into HIR.
> @@ -707,7 +720,7 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
>  "%s '%s':"
>  "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
>  "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
> -"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
> +"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
>  message, name,
>  bad.flags.q.invariant ? " invariant" : "",
>  bad.flags.q.precise ? " precise" : "",
> @@ -768,6 +781,7 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
>  bad.flags.q.vertices ? " vertices" : "",
>  bad.flags.q.subroutine ? " subroutine" : "",
>  bad.flags.q.subroutine_def ? " subroutine_def" : "",
> +bad.flags.q.inner_coverage ? " inner_coverage" : "",
>  bad.flags.q.post_depth_coverage ? "
> post_depth_coverage" : "");
> return false;
>  }
> diff --git 

[Mesa-dev] [PATCH 3/3] i965: Delete the meta-base CopyImageSubData implementation

2016-12-01 Thread Jason Ekstrand
When I originally implemented the ARB_copy_image extension, the fast-path
was written in meta using texture views.  This path only worked if both
images were uncompressed color images.  All of the other cases fell back to
the blitter or, in the worst case, mapping and memcpy on the CPU.  Now that
we have the blorp path, it handles all copies ever and the old meta,
blitter, and CPU paths are only used on gen5 and below.  The primary reason
why we needed the meta path (apart from having a slow blitter on later
hardware) was to handle multisampling which gen5 and earlier don't support
anyway.  Since the blitter is reasonably fast on gen5, we can just delete
the meta path and get rid of all that terrible code.

If we decide that we're ok with just disabling ARB_copy_image on gen5 and
earlier (I personally am), then we could get rid of another 300 lines or so
of semi-hairy code.
---
 src/mesa/Makefile.sources|   1 -
 src/mesa/drivers/common/meta.h   |  10 -
 src/mesa/drivers/common/meta_copy_image.c| 307 ---
 src/mesa/drivers/dri/i965/intel_copy_image.c |  10 -
 4 files changed, 328 deletions(-)
 delete mode 100644 src/mesa/drivers/common/meta_copy_image.c

diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 410a61a..ee737b0 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -621,7 +621,6 @@ COMMON_DRIVER_FILES =   \
drivers/common/driverfuncs.c\
drivers/common/driverfuncs.h\
drivers/common/meta_blit.c  \
-   drivers/common/meta_copy_image.c\
drivers/common/meta_generate_mipmap.c   \
drivers/common/meta_tex_subimage.c  \
drivers/common/meta.c \
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index a7018f5..0a913e9 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -492,16 +492,6 @@ _mesa_meta_and_swrast_BlitFramebuffer(struct gl_context 
*ctx,
   GLint dstX1, GLint dstY1,
   GLbitfield mask, GLenum filter);
 
-bool
-_mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
- struct gl_texture_image 
*src_tex_image,
- struct gl_renderbuffer 
*src_renderbuffer,
- int src_x, int src_y, int src_z,
- struct gl_texture_image 
*dst_tex_image,
- struct gl_renderbuffer 
*dst_renderbuffer,
- int dst_x, int dst_y, int dst_z,
- int src_width, int src_height);
-
 extern void
 _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers);
 
diff --git a/src/mesa/drivers/common/meta_copy_image.c 
b/src/mesa/drivers/common/meta_copy_image.c
deleted file mode 100644
index e1c90a3..000
--- a/src/mesa/drivers/common/meta_copy_image.c
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2014 Intel Corporation.  All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "glheader.h"
-#include "context.h"
-#include "enums.h"
-#include "imports.h"
-#include "macros.h"
-#include "teximage.h"
-#include "texobj.h"
-#include "fbobject.h"
-#include "framebuffer.h"
-#include "buffers.h"
-#include "state.h"
-#include "mtypes.h"
-#include "meta.h"
-
-/**
- * Create a texture image that wraps a renderbuffer.
- */
-static struct gl_texture_image *
-wrap_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
-{
-   GLenum texTarget;
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
-
-   if (rb->NumSamples > 1)
-  texTarget = GL_TEXTURE_2D_MULTISAMPLE;
-   else
-  texTarget = GL_TEXTURE_2D;
-
-   /* 

[Mesa-dev] [PATCH 1/3] i965/blit: Break the guts of intel_miptree_blit into a helper

2016-12-01 Thread Jason Ekstrand
Cc: "13.0" 
---
 src/mesa/drivers/dri/i965/intel_blit.c | 151 ++---
 1 file changed, 84 insertions(+), 67 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c 
b/src/mesa/drivers/dri/i965/intel_blit.c
index 4944b8c..15e45d4 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -215,6 +215,86 @@ get_blit_intratile_offset_el(const struct brw_context *brw,
}
 }
 
+static bool
+emit_miptree_blit(struct brw_context *brw,
+  struct intel_mipmap_tree *src_mt,
+  uint32_t src_x, uint32_t src_y,
+  struct intel_mipmap_tree *dst_mt,
+  uint32_t dst_x, uint32_t dst_y,
+  uint32_t width, uint32_t height,
+  bool reverse, GLenum logicop)
+{
+   /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
+* Data Size Limitations):
+*
+*The BLT engine is capable of transferring very large quantities of
+*graphics data. Any graphics data read from and written to the
+*destination is permitted to represent a number of pixels that
+*occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
+*at the destination. The maximum number of pixels that may be
+*represented per scan line’s worth of graphics data depends on the
+*color depth.
+*
+* Furthermore, intelEmitCopyBlit (which is called below) uses a signed
+* 16-bit integer to represent buffer pitch, so it can only handle buffer
+* pitches < 32k. However, the pitch is measured in bytes for linear buffers
+* and dwords for tiled buffers.
+*
+* As a result of these two limitations, we can only use the blitter to do
+* this copy when the miptree's pitch is less than 32k linear or 128k tiled.
+*/
+   if (blt_pitch(src_mt) >= 32768 || blt_pitch(dst_mt) >= 32768) {
+  perf_debug("Falling back due to >= 32k/128k pitch\n");
+  return false;
+   }
+
+   /* We need to split the blit into chunks that each fit within the blitter's
+* restrictions.  We can't use a chunk size of 32768 because we need to
+* ensure that src_tile_x + chunk_size fits.  We choose 16384 because it's
+* a nice round power of two, big enough that performance won't suffer, and
+* small enough to guarantee everything fits.
+*/
+   const uint32_t max_chunk_size = 16384;
+
+   for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
+  for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
+ const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
+ const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
+
+ uint32_t src_offset, src_tile_x, src_tile_y;
+ get_blit_intratile_offset_el(brw, src_mt,
+  src_x + chunk_x, src_y + chunk_y,
+  _offset, _tile_x, _tile_y);
+
+ uint32_t dst_offset, dst_tile_x, dst_tile_y;
+ get_blit_intratile_offset_el(brw, dst_mt,
+  dst_x + chunk_x, dst_y + chunk_y,
+  _offset, _tile_x, _tile_y);
+
+ if (!intelEmitCopyBlit(brw,
+src_mt->cpp,
+reverse ? -src_mt->pitch : src_mt->pitch,
+src_mt->bo, src_mt->offset + src_offset,
+src_mt->tiling,
+src_mt->tr_mode,
+dst_mt->pitch,
+dst_mt->bo, dst_mt->offset + dst_offset,
+dst_mt->tiling,
+dst_mt->tr_mode,
+src_tile_x, src_tile_y,
+dst_tile_x, dst_tile_y,
+chunk_w, chunk_h,
+logicop)) {
+/* If this is ever going to fail, it will fail on the first chunk 
*/
+assert(chunk_x == 0 && chunk_y == 0);
+return false;
+ }
+  }
+   }
+
+   return true;
+}
+
 /**
  * Implements a rectangular block transfer (blit) of pixels between two
  * miptrees.
@@ -265,30 +345,6 @@ intel_miptree_blit(struct brw_context *brw,
   return false;
}
 
-   /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
-* Data Size Limitations):
-*
-*The BLT engine is capable of transferring very large quantities of
-*graphics data. Any graphics data read from and written to the
-*destination is permitted to represent a number of pixels that
-*occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
-*at the destination. The maximum number of pixels that may be
-*represented per scan line’s worth of graphics data depends 

[Mesa-dev] [PATCH 2/3] i965/copy_image: Re-implement the blitter path with emit_miptree_blit

2016-12-01 Thread Jason Ekstrand
By using emit_miptree_blit which does chunking, this fixes the blitter path
for the case where the image is too tall to blit normally.  We also pull it
into intel_blit as intel_miptree_copy.  This matches the naming of the
blorp blit and copy functions brw_blorp_blit and brw_blorp_copy.

Cc: "13.0" 
---
 src/mesa/drivers/dri/i965/intel_blit.c   |  68 ++
 src/mesa/drivers/dri/i965/intel_blit.h   |   9 +++
 src/mesa/drivers/dri/i965/intel_copy_image.c | 100 +--
 3 files changed, 80 insertions(+), 97 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c 
b/src/mesa/drivers/dri/i965/intel_blit.c
index 15e45d4..03a35ee 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -386,6 +386,74 @@ intel_miptree_blit(struct brw_context *brw,
return true;
 }
 
+bool
+intel_miptree_copy(struct brw_context *brw,
+   struct intel_mipmap_tree *src_mt,
+   int src_level, int src_slice,
+   uint32_t src_x, uint32_t src_y,
+   struct intel_mipmap_tree *dst_mt,
+   int dst_level, int dst_slice,
+   uint32_t dst_x, uint32_t dst_y,
+   uint32_t src_width, uint32_t src_height)
+{
+   /* The blitter doesn't understand multisampling at all. */
+   if (src_mt->num_samples > 0 || dst_mt->num_samples > 0)
+  return false;
+
+   if (src_mt->format == MESA_FORMAT_S_UINT8)
+  return false;
+
+   /* The blitter has no idea about HiZ or fast color clears, so we need to
+* resolve the miptrees before we do anything.
+*/
+   intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_slice);
+   intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_slice);
+   intel_miptree_resolve_color(brw, src_mt, src_level, src_slice, 1, 0);
+   intel_miptree_resolve_color(brw, dst_mt, dst_level, dst_slice, 1, 0);
+
+   uint32_t src_image_x, src_image_y;
+   intel_miptree_get_image_offset(src_mt, src_level, src_slice,
+  _image_x, _image_y);
+
+   if (_mesa_is_format_compressed(src_mt->format)) {
+  GLuint bw, bh;
+  _mesa_get_format_block_size(src_mt->format, , );
+
+  assert(src_x % bw == 0);
+  assert(src_y % bh == 0);
+  assert(src_width % bw == 0);
+  assert(src_height % bh == 0);
+
+  src_x /= (int)bw;
+  src_y /= (int)bh;
+  src_width /= (int)bw;
+  src_height /= (int)bh;
+   }
+   src_x += src_image_x;
+   src_y += src_image_y;
+
+   uint32_t dst_image_x, dst_image_y;
+   intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
+  _image_x, _image_y);
+
+   if (_mesa_is_format_compressed(dst_mt->format)) {
+  GLuint bw, bh;
+  _mesa_get_format_block_size(dst_mt->format, , );
+
+  assert(dst_x % bw == 0);
+  assert(dst_y % bh == 0);
+
+  dst_x /= (int)bw;
+  dst_y /= (int)bh;
+   }
+   dst_x += dst_image_x;
+   dst_y += dst_image_y;
+
+   return emit_miptree_blit(brw, src_mt, src_x, src_y,
+dst_mt, dst_x, dst_y,
+src_width, src_height, false, GL_COPY);
+}
+
 static bool
 alignment_valid(struct brw_context *brw, unsigned offset, uint32_t tiling)
 {
diff --git a/src/mesa/drivers/dri/i965/intel_blit.h 
b/src/mesa/drivers/dri/i965/intel_blit.h
index f4ed919..6925795 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.h
+++ b/src/mesa/drivers/dri/i965/intel_blit.h
@@ -58,6 +58,15 @@ bool intel_miptree_blit(struct brw_context *brw,
 uint32_t width, uint32_t height,
 GLenum logicop);
 
+bool intel_miptree_copy(struct brw_context *brw,
+struct intel_mipmap_tree *src_mt,
+int src_level, int src_slice,
+uint32_t src_x, uint32_t src_y,
+struct intel_mipmap_tree *dst_mt,
+int dst_level, int dst_slice,
+uint32_t dst_x, uint32_t dst_y,
+uint32_t src_width, uint32_t src_height);
+
 bool
 intelEmitImmediateColorExpandBlit(struct brw_context *brw,
  GLuint cpp,
diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c 
b/src/mesa/drivers/dri/i965/intel_copy_image.c
index 3b5bf31..56eaed6 100644
--- a/src/mesa/drivers/dri/i965/intel_copy_image.c
+++ b/src/mesa/drivers/dri/i965/intel_copy_image.c
@@ -34,98 +34,6 @@
 #include "main/teximage.h"
 #include "drivers/common/meta.h"
 
-static bool
-copy_image_with_blitter(struct brw_context *brw,
-struct intel_mipmap_tree *src_mt, int src_level,
-int src_x, int src_y, int src_z,
-struct intel_mipmap_tree *dst_mt, int dst_level,
-int dst_x, int dst_y, int dst_z,
-int src_width, int src_height)
-{
-   GLuint 

Re: [Mesa-dev] [PATCH 00/10] mesa: support for EGL_ANDROID_native_fence_sync (vN+1)

2016-12-01 Thread Chad Versace
Hi Rob, just checking on the status on the patch set. Do you plan to
send a revised series or commit them as-is? I ask because my i965
patches depend on your patches 1-4, and I'm trying to decide to when to
resend them.

On Fri 18 Nov 2016, Rob Clark wrote:
> This patchset implements support for EGL_ANDROID_native_fence_sync[1]
> for egl and gallium.  This extension provides support for native fence
> fd's (file descriptors) for the GPU.  In a similar way to dma-buf fd's,
> which provide a reference-counted userspace handle to buffers which
> can be shared across drivers and across processes, native fence fd's
> provide a reference-counted userspace handle to fences which can be
> shared across drivers and across processes.
> 
> This extension is already in use on android, and should be useful in
> other environments.
> 
> Patch 04/10 has some fixes from Chad squashed in, as well as a couple
> fixes for issues that Rafael found while writing piglit tests.
> 
> The kernel patches for freedreno (drm/msm) are upstream (v4.9) and the
> libdrm patches are in v2.4.72.  Kernel patches for drm/virtio are in
> flight (so corresponding gallium patch is not ready to push).  Kernel
> plus libdrm plus mesa patches for i965 are in flight.
> 
> This has been tested on piglit[2] (i965, freedreno), kmscube[3] (virgl,
> freedreno), and drm-hwc2[4] (virgl, freedreno).
> 
> [1] 
> https://www.khronos.org/registry/egl/extensions/ANDROID/EGL_ANDROID_native_fence_sync.txt
> [2] https://patchwork.freedesktop.org/series/14498/
> [3] https://github.com/robclark/kmscube/commits/atomic-fence
> [4] 
> https://git.collabora.com/cgit/user/robertfoss/drm_hwcomposer.git/log/?h=hwc2_fence_v2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] tgsi: store writes_primid when scanning tgsi

2016-12-01 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Thu, Dec 1, 2016 at 6:14 PM, Tim Rowley  wrote:
> ---
>  src/gallium/auxiliary/tgsi/tgsi_scan.c | 3 +++
>  src/gallium/auxiliary/tgsi/tgsi_scan.h | 1 +
>  2 files changed, 4 insertions(+)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
> b/src/gallium/auxiliary/tgsi/tgsi_scan.c
> index 84d6456..77fe6b3 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
> @@ -548,6 +548,9 @@ scan_declaration(struct tgsi_shader_info *info,
>   info->num_outputs = MAX2(info->num_outputs, reg + 1);
>
>   switch (semName) {
> + case TGSI_SEMANTIC_PRIMID:
> +info->writes_primid = true;
> +break;
>   case TGSI_SEMANTIC_VIEWPORT_INDEX:
>  info->writes_viewport_index = true;
>  break;
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h 
> b/src/gallium/auxiliary/tgsi/tgsi_scan.h
> index fe503e8..e4f4018 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
> @@ -113,6 +113,7 @@ struct tgsi_shader_info
> boolean writes_position;
> boolean writes_psize;
> boolean writes_clipvertex;
> +   boolean writes_primid;
> boolean writes_viewport_index;
> boolean writes_layer;
> boolean writes_memory; /**< contains stores or atomics to buffers or 
> images */
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] tgsi: store writes_primid when scanning tgsi

2016-12-01 Thread Tim Rowley
---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 3 +++
 src/gallium/auxiliary/tgsi/tgsi_scan.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 84d6456..77fe6b3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -548,6 +548,9 @@ scan_declaration(struct tgsi_shader_info *info,
  info->num_outputs = MAX2(info->num_outputs, reg + 1);
 
  switch (semName) {
+ case TGSI_SEMANTIC_PRIMID:
+info->writes_primid = true;
+break;
  case TGSI_SEMANTIC_VIEWPORT_INDEX:
 info->writes_viewport_index = true;
 break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h 
b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index fe503e8..e4f4018 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -113,6 +113,7 @@ struct tgsi_shader_info
boolean writes_position;
boolean writes_psize;
boolean writes_clipvertex;
+   boolean writes_primid;
boolean writes_viewport_index;
boolean writes_layer;
boolean writes_memory; /**< contains stores or atomics to buffers or images 
*/
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 25/50] glsl: Add "built-in" functions to do 64x64 => 64 multiplication

2016-12-01 Thread Ian Romanick
On 11/30/2016 12:19 PM, Matt Turner wrote:
> On 11/28, Ian Romanick wrote:
>> From: Ian Romanick 

[snip]

>> +   if (parser->extension_list) {
>> +  /* If MESA_shader_integer_functions is supported, then the
>> building
>> +   * blocks required for the 64x64 => 64 multiply exist.  Add
>> defines for
>> +   * those functions so that they can be tested.
>> +   */
>> +  if (parser->extension_list->MESA_shader_integer_functions) {
>> + add_builtin_define(parser, "__have_builtin_builtin_umul64", 1);
> 
> Just FYI, Clang has a similar mechanism called __has_builtin(x). It
> might be nice to match that, but not a big deal either way.
> 
> http://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros

I'll take a look at that.  I'm definitely in favor of using a solution
that may be more familiar to people.  I used the simple define because
it was the least typing. :)

>> +  }
>> +   }
>> +
>>if (explicitly_set) {
>>   ralloc_asprintf_rewrite_tail(>output,
>> >output_length,
>>"#version %" PRIiMAX "%s%s", version,

[snip]

>> diff --git a/src/compiler/glsl/int64.glsl b/src/compiler/glsl/int64.glsl
>> new file mode 100644
>> index 000..f5fb010
>> --- /dev/null
>> +++ b/src/compiler/glsl/int64.glsl
>> @@ -0,0 +1,19 @@
>> +/* Compile with:
>> + *
>> + * glsl_compiler --version 140 --dump-builder int64.glsl >
>> builtin_int64.h
>> + *
>> + * Using version 1.40+ prevents built-in variables from being included.
>> + */
> 
> I'm not in love with checking in generated code, but I detest the
> build-time bootstrapping... so I guess I'm okay with this.

I'd much rather build-time bootstrap, but I don't think there's any way
to make it work across the platforms that Mesa supports and
cross-builds.  At least I couldn't think of a way that wasn't excruciating.

> If it were just this and sign64, I'd definitely advocate for just coding
> them directly.

This is also something of a test-run for the soft fp64 work that is
coming.  We'll use a similar mechanism for that.  I want to make sure we
end up with something that... we like as much as we can before it gets
too embedded.

>> +#version 140
>> +#extension GL_MESA_shader_integer_functions: require
>> +
>> +uvec2
>> +umul64(uvec2 a, uvec2 b)
>> +{
>> +   uvec2 result;
>> +
>> +   umulExtended(a.x, b.x, result.y, result.x);
>> +   result.y += a.x * b.y + a.y * b.x;
>> +
>> +   return result;
>> +}
>> -- 
>> 2.7.4
>>

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 27/50] glsl: Add "built-in" function for 64-bit integer sign()

2016-12-01 Thread Ian Romanick
On 11/30/2016 12:46 PM, Matt Turner wrote:
> On 11/28, Ian Romanick wrote:
>> From: Ian Romanick 
>>
>> These functions are directly available in shaders.  A #define is added
>> to detect the presence.  This allows these functions to be tested using
>> piglit regardless of whether the driver uses them for lowering.  The
>> GLSL spec says that functions and macros beginning with __ are reserved
>> for use by the implementation... hey, that's us!
>>
>> Signed-off-by: Ian Romanick 
>> ---
>> src/compiler/glsl/builtin_functions.cpp |   4 +
>> src/compiler/glsl/builtin_functions.h   |   3 +
>> src/compiler/glsl/builtin_int64.h   |  26 
>> src/compiler/glsl/glcpp/glcpp-parse.y   |   1 +
>> src/compiler/glsl/int64.glsl|  11 ++
>> src/compiler/glsl/udivmod64.h   | 206
>> 
>> 6 files changed, 251 insertions(+)
>> create mode 100644 src/compiler/glsl/udivmod64.h
> 
> This is supposed to be in a later patch.

Blarg.  This isn't supposed to be in any patch. :( This was just junk
left in my tree.  The division and modulus routines are (correctly)
added to builtin_int64.h in patch 29.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2] Add INTEL_conservative_rasterization support

2016-12-01 Thread Lionel Landwerlin
Forgot to mentioned that this is based on top of Plamena's work (in case 
you want to test) :


https://patchwork.freedesktop.org/series/15512/

On 01/12/16 15:56, Lionel Landwerlin wrote:

Hi,

Here are a couple of patches to add support for the
INTEL_conservative_rasterization extension.

This is available on Gen9+ platforms.

You can find associated piglit tests here :

   https://patchwork.freedesktop.org/series/16230/

Cheers,

Lionel Landwerlin (2):
   mesa: add support for GL_INTEL_conservative_rasterization
   i965: enable INTEL_conservative_rasterization on Gen9+

  docs/relnotes/13.1.0.html|  1 +
  src/compiler/glsl/ast.h  |  5 
  src/compiler/glsl/ast_to_hir.cpp |  5 
  src/compiler/glsl/ast_type.cpp   | 16 ++-
  src/compiler/glsl/glsl_parser.yy | 34 ++
  src/compiler/glsl/glsl_parser_extras.cpp |  4 +++
  src/compiler/glsl/glsl_parser_extras.h   |  4 +++
  src/compiler/glsl/linker.cpp |  3 ++
  src/compiler/shader_info.h   |  6 
  src/mesa/drivers/dri/i965/brw_compiler.h |  1 +
  src/mesa/drivers/dri/i965/brw_defines.h  |  1 +
  src/mesa/drivers/dri/i965/brw_fs.cpp |  1 +
  src/mesa/drivers/dri/i965/gen8_ps_state.c| 13 +
  src/mesa/drivers/dri/i965/gen8_sf_state.c|  6 
  src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
  src/mesa/main/api_validate.c | 42 
  src/mesa/main/enable.c   | 12 
  src/mesa/main/extensions_table.h |  1 +
  src/mesa/main/mtypes.h   |  3 ++
  src/mesa/main/shaderapi.c|  1 +
  20 files changed, 149 insertions(+), 11 deletions(-)

--
2.10.2



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: enable INTEL_conservative_rasterization on Gen9+

2016-12-01 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 docs/relnotes/13.1.0.html|  1 +
 src/mesa/drivers/dri/i965/brw_compiler.h |  1 +
 src/mesa/drivers/dri/i965/brw_defines.h  |  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp |  1 +
 src/mesa/drivers/dri/i965/gen8_ps_state.c| 13 -
 src/mesa/drivers/dri/i965/gen8_sf_state.c|  6 ++
 src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
 7 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/docs/relnotes/13.1.0.html b/docs/relnotes/13.1.0.html
index a160cda..51efce5 100644
--- a/docs/relnotes/13.1.0.html
+++ b/docs/relnotes/13.1.0.html
@@ -45,6 +45,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 GL_NV_image_formats on any driver supporting 
GL_ARB_shader_image_load_store (i965, nvc0, radeonsi, softpipe)
+INTEL_conservative_rasterization (i965)
 GL_ARB_post_depth_coverage on i965/gen9+
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h 
b/src/mesa/drivers/dri/i965/brw_compiler.h
index 410641f..a97f874 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -398,6 +398,7 @@ struct brw_wm_prog_data {
 
bool early_fragment_tests;
bool post_depth_coverage;
+   bool inner_coverage;
bool dispatch_8;
bool dispatch_16;
bool dual_src_blend;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index f22a52f..cae8e9a 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2373,6 +2373,7 @@ enum brw_message_target {
 #define _3DSTATE_RASTER 0x7850 /* GEN8+ */
 /* DW1 */
 # define GEN9_RASTER_VIEWPORT_Z_FAR_CLIP_TEST_ENABLE(1 << 26)
+# define GEN9_RASTER_CONSERVATIVE_RASTERIZATION_ENABLE  (1 << 24)
 # define GEN8_RASTER_FRONT_WINDING_CCW  (1 << 21)
 # define GEN8_RASTER_CULL_BOTH  (0 << 16)
 # define GEN8_RASTER_CULL_NONE  (1 << 16)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ce0c07e..b5d1381 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6455,6 +6455,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
 
prog_data->early_fragment_tests = shader->info->fs.early_fragment_tests;
prog_data->post_depth_coverage = shader->info->fs.post_depth_coverage;
+   prog_data->inner_coverage = shader->info->fs.inner_coverage;
 
prog_data->barycentric_interp_modes =
   brw_compute_barycentric_interp_modes(compiler->devinfo, shader);
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c 
b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index 33ef023..e43192d 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -32,6 +32,7 @@ void
 gen8_upload_ps_extra(struct brw_context *brw,
  const struct brw_wm_prog_data *prog_data)
 {
+   struct gl_context *ctx = >ctx;
uint32_t dw1 = 0;
 
dw1 |= GEN8_PSX_PIXEL_SHADER_VALID;
@@ -52,14 +53,15 @@ gen8_upload_ps_extra(struct brw_context *brw,
if (prog_data->persample_dispatch)
   dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
 
+   /* _NEW_POLYGON */
if (prog_data->uses_sample_mask) {
   if (brw->gen >= 9) {
- if (prog_data->post_depth_coverage) {
+ if (prog_data->post_depth_coverage)
 dw1 |= BRW_PCICMS_DEPTH << 
GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
- }
- else {
+ else if (prog_data->inner_coverage && 
ctx->IntelConservativeRasterization)
 dw1 |= BRW_PSICMS_INNER << 
GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
- }
+ else
+dw1 |= BRW_PSICMS_NORMAL << 
GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
   }
   else {
  dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
@@ -289,7 +291,8 @@ upload_ps_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen8_ps_state = {
.dirty = {
-  .mesa  = _NEW_MULTISAMPLE,
+  .mesa  = _NEW_MULTISAMPLE |
+   _NEW_POLYGON,
   .brw   = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_FS_PROG_DATA,
diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c 
b/src/mesa/drivers/dri/i965/gen8_sf_state.c
index 5d77b39..afe7b52 100644
--- a/src/mesa/drivers/dri/i965/gen8_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c
@@ -319,6 +319,12 @@ upload_raster(struct brw_context *brw)
   }
}
 
+   /* _NEW_POLYGON */
+   if (ctx->IntelConservativeRasterization) {
+  if (brw->gen >= 9)
+ dw1 |= GEN9_RASTER_CONSERVATIVE_RASTERIZATION_ENABLE;
+   }
+
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_RASTER << 16 | (5 - 2));
OUT_BATCH(dw1);
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 19f4684..c1f42aa 

[Mesa-dev] [PATCH 0/2] Add INTEL_conservative_rasterization support

2016-12-01 Thread Lionel Landwerlin
Hi,

Here are a couple of patches to add support for the
INTEL_conservative_rasterization extension.

This is available on Gen9+ platforms.

You can find associated piglit tests here :

  https://patchwork.freedesktop.org/series/16230/

Cheers,

Lionel Landwerlin (2):
  mesa: add support for GL_INTEL_conservative_rasterization
  i965: enable INTEL_conservative_rasterization on Gen9+

 docs/relnotes/13.1.0.html|  1 +
 src/compiler/glsl/ast.h  |  5 
 src/compiler/glsl/ast_to_hir.cpp |  5 
 src/compiler/glsl/ast_type.cpp   | 16 ++-
 src/compiler/glsl/glsl_parser.yy | 34 ++
 src/compiler/glsl/glsl_parser_extras.cpp |  4 +++
 src/compiler/glsl/glsl_parser_extras.h   |  4 +++
 src/compiler/glsl/linker.cpp |  3 ++
 src/compiler/shader_info.h   |  6 
 src/mesa/drivers/dri/i965/brw_compiler.h |  1 +
 src/mesa/drivers/dri/i965/brw_defines.h  |  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp |  1 +
 src/mesa/drivers/dri/i965/gen8_ps_state.c| 13 +
 src/mesa/drivers/dri/i965/gen8_sf_state.c|  6 
 src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
 src/mesa/main/api_validate.c | 42 
 src/mesa/main/enable.c   | 12 
 src/mesa/main/extensions_table.h |  1 +
 src/mesa/main/mtypes.h   |  3 ++
 src/mesa/main/shaderapi.c|  1 +
 20 files changed, 149 insertions(+), 11 deletions(-)

--
2.10.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] mesa: add support for GL_INTEL_conservative_rasterization

2016-12-01 Thread Lionel Landwerlin
Signed-off-by: Lionel Landwerlin 
---
 src/compiler/glsl/ast.h  |  5 
 src/compiler/glsl/ast_to_hir.cpp |  5 
 src/compiler/glsl/ast_type.cpp   | 16 +++-
 src/compiler/glsl/glsl_parser.yy | 34 ++
 src/compiler/glsl/glsl_parser_extras.cpp |  4 +++
 src/compiler/glsl/glsl_parser_extras.h   |  4 +++
 src/compiler/glsl/linker.cpp |  3 +++
 src/compiler/shader_info.h   |  6 +
 src/mesa/main/api_validate.c | 42 
 src/mesa/main/enable.c   | 12 +
 src/mesa/main/extensions_table.h |  1 +
 src/mesa/main/mtypes.h   |  3 +++
 src/mesa/main/shaderapi.c|  1 +
 13 files changed, 130 insertions(+), 6 deletions(-)

diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
index df3a744..0e32c36 100644
--- a/src/compiler/glsl/ast.h
+++ b/src/compiler/glsl/ast.h
@@ -610,6 +610,11 @@ struct ast_type_qualifier {
   * Flag set if GL_ARB_post_depth_coverage layout qualifier is used.
   */
  unsigned post_depth_coverage:1;
+ /**
+  * Flag set if GL_INTEL_conservartive_rasterization layout qualifier
+  * is used.
+  */
+ unsigned inner_coverage:1;
   }
   /** \brief Set of flags, accessed by name. */
   q;
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 2434ce5..1e14d27 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -3633,6 +3633,11 @@ apply_layout_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
"valid in fragment shader input layout declaration.");
}
 
+   if (qual->flags.q.inner_coverage) {
+  _mesa_glsl_error(loc, state, "inner_coverage layout qualifier only "
+   "valid in fragment shader input layout declaration.");
+   }
+
if (qual->flags.q.post_depth_coverage) {
   _mesa_glsl_error(loc, state, "post_depth_coverage layout qualifier only "
"valid in fragment shader input layout declaration.");
diff --git a/src/compiler/glsl/ast_type.cpp b/src/compiler/glsl/ast_type.cpp
index aa1ae7e..d68e6e2 100644
--- a/src/compiler/glsl/ast_type.cpp
+++ b/src/compiler/glsl/ast_type.cpp
@@ -579,6 +579,7 @@ ast_type_qualifier::validate_in_qualifier(YYLTYPE *loc,
   break;
case MESA_SHADER_FRAGMENT:
   valid_in_mask.flags.q.early_fragment_tests = 1;
+  valid_in_mask.flags.q.inner_coverage = 1;
   valid_in_mask.flags.q.post_depth_coverage = 1;
   break;
case MESA_SHADER_COMPUTE:
@@ -634,11 +635,23 @@ ast_type_qualifier::merge_into_in_qualifier(YYLTYPE *loc,
   state->in_qualifier->flags.q.early_fragment_tests = false;
}
 
+   if (state->in_qualifier->flags.q.inner_coverage) {
+  state->fs_inner_coverage = true;
+  state->in_qualifier->flags.q.inner_coverage = false;
+   }
+
if (state->in_qualifier->flags.q.post_depth_coverage) {
   state->fs_post_depth_coverage = true;
   state->in_qualifier->flags.q.post_depth_coverage = false;
}
 
+   if (state->fs_inner_coverage && state->fs_post_depth_coverage) {
+  _mesa_glsl_error(loc, state,
+   "inner_coverage & post_depth_coverage layout qualifiers 
"
+   "are mutally exclusives");
+  r = false;
+   }
+
/* We allow the creation of multiple cs_input_layout nodes. Coherence among
 * all existing nodes is checked later, when the AST node is transformed
 * into HIR.
@@ -707,7 +720,7 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
 "%s '%s':"
 "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
 "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
-"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
 message, name,
 bad.flags.q.invariant ? " invariant" : "",
 bad.flags.q.precise ? " precise" : "",
@@ -768,6 +781,7 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
 bad.flags.q.vertices ? " vertices" : "",
 bad.flags.q.subroutine ? " subroutine" : "",
 bad.flags.q.subroutine_def ? " subroutine_def" : "",
+bad.flags.q.inner_coverage ? " inner_coverage" : "",
 bad.flags.q.post_depth_coverage ? " post_depth_coverage" : 
"");
return false;
 }
diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy
index 09b7e79..e3893d5 100644
--- a/src/compiler/glsl/glsl_parser.yy
+++ b/src/compiler/glsl/glsl_parser.yy
@@ -1372,9 +1372,10 @@ layout_qualifier_id:
}
 }
  }
+  }
 
- if (!$$.flags.i &&
- match_layout_qualifier($1, "early_fragment_tests", state) == 

Re: [Mesa-dev] [PATCH 0/3] gallium driver for Vivante GPUs

2016-12-01 Thread Emil Velikov
Hi Christian,

Hats off for the tremendous work - both to you and fellow etnaviv hackers !

On 30 November 2016 at 13:44, Christian Gmeiner
 wrote:
> As the original patchstack is now about 300 patches, I have choosen to
> squash the patches together into three different parts.
>

> - etnaviv gallium driver
>   glxgears renders so its time to get this driver mainline.
>
I've poked you on this one on IRC, but I'll mention it here for posterity:

Having the etnaviv_dri.so sounds a bit odd. You mentioned it was used
by the armada ddx, yet I'm too familiar with either one to know why
using imx_dri.so isn't feasible.
Note: I'm _not_ saying "this is rubbish you cannot have etnaviv_dri.so".

All I'm asking for is to document $reason why one cannot use the
latter - be that in the commit message and/or code.

With those couple of small suggestions, I'd say "when can we ship it" ;-)

Thanks
Emil
P.S. Seems like 2/3 could use virtually every comment from 3/3. I.e.
the targets/dri changes did not make it in the squashed branch.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] imx: gallium driver for imx-drm scanout driver

2016-12-01 Thread Emil Velikov
On 30 November 2016 at 13:44, Christian Gmeiner
 wrote:
> The imx (stub) driver is needed to get hardware acceleration from
> etnaviv on a platform using imx-drm kms driver. This adds support
> for wayland and native kms egl apps.
>
> Signed-off-by: Christian Gmeiner 
> ---
>  configure.ac   | 12 +++
>  src/gallium/Makefile.am|  4 +++
>  .../auxiliary/pipe-loader/pipe_loader_drm.c|  5 +++
>  src/gallium/auxiliary/target-helpers/drm_helper.h  | 23 
>  .../auxiliary/target-helpers/drm_helper_public.h   |  3 ++
>  src/gallium/drivers/imx/Automake.inc   |  9 +
>  src/gallium/drivers/imx/Makefile.am|  9 +
>  src/gallium/winsys/imx/drm/Makefile.am | 33 +
>  src/gallium/winsys/imx/drm/Makefile.sources|  3 ++
>  src/gallium/winsys/imx/drm/imx_drm_public.h| 31 
>  src/gallium/winsys/imx/drm/imx_drm_winsys.c| 41 
> ++
>  11 files changed, 173 insertions(+)
I think you want to add the following to src/gallium/targets/dri/Makefile.am

include $(top_srcdir)/src/gallium/drivers/imx/Automake.inc

Otherwise there will be no imx_dri.so module which you can use.
^^ is a must have afaics, everything else (mentioned below) can be
tackled at a later stage.

A set of targets/pipe-loader/* changes would be nice... unless I beat
you to it and fold the final round of duplication that we have in the
pipe-loader/targets topic ;-)


> +include Makefile.sources
> +include $(top_srcdir)/src/gallium/Automake.inc
> +
> +AM_CFLAGS = \
> +   -I$(top_srcdir)/src/gallium/drivers \
Add the following and then ...
   -I$(top_srcdir)/src/gallium/winsys \

> +   $(GALLIUM_WINSYS_CFLAGS) \
> +   $(IMX_CFLAGS)
> +
> +noinst_LTLIBRARIES = libimxdrm.la
> +
> +libimxdrm_la_SOURCES = $(C_SOURCES)
> \ No newline at end of file
Please add the missing newlines throughout.


> diff --git a/src/gallium/winsys/imx/drm/imx_drm_public.h 
> b/src/gallium/winsys/imx/drm/imx_drm_public.h
> new file mode 100644
> index 000..2d93da2
> --- /dev/null
> +++ b/src/gallium/winsys/imx/drm/imx_drm_public.h
> @@ -0,0 +1,31 @@
> +/*
> + * Copyright © 2014 NVIDIA Corporation
> + *
Disclaimer: IANAL

Here and other copyright notices could be updated to reflect you.
Things have changed noticeably that any recemblense with the original
is conicidential.

> +
> +#include "../../imx/drm/imx_drm_public.h"
> +#include "../winsys/etnaviv/drm/etnaviv_drm_public.h"
... this will become

#include "imx_drm_public.h"
#include "etnaviv/drm/etnaviv_drm_public.h"

I think 2/3 could use of similar cleanups.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] gallium: add renderonly library

2016-12-01 Thread Emil Velikov
On 1 December 2016 at 12:00, Nicolai Hähnle  wrote:
> Congratulations on a huge amount of work! Obviously I can't say much about
> the driver itself. Some things that I noticed for the renderonly library.
>
> On 30.11.2016 14:44, Christian Gmeiner wrote:
>>
>> This a very lightweight library to add basic support for
>> renderonly GPUs. It does all the magic regarding in/exporting
>> buffers etc. This library will likely break android support and
>> hopefully will get replaced with a better solution based on gbm2.
>
>
> Some more comments would be _really_ helpful. What is the purpose of a
> "scanout" object? What does for_prime vs. for_resource mean? What does
> intermediate_rendering mean and what is it good for?
>
> The lifecycle of the renderonly object itself looks wrong to me:
> renderonly_screen_create calls the actual driver's screen_create, but the
> actual driver's screen_create frees the renderonly struct? Please make it
> consistent: Either have a proper wrapper or (better, since this is so
> lightweight) have the driver's screen_create call the
> renderonly_screen_create.
>
I think Nicolai has it spot on here - both on the comment and wrapping side.
Please include some documentation (even the GBM2 snippet from the
cover letter) here. Be that in the summary and/or code.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] gallium: add renderonly library

2016-12-01 Thread Nicolai Hähnle
Congratulations on a huge amount of work! Obviously I can't say much 
about the driver itself. Some things that I noticed for the renderonly 
library.


On 30.11.2016 14:44, Christian Gmeiner wrote:

This a very lightweight library to add basic support for
renderonly GPUs. It does all the magic regarding in/exporting
buffers etc. This library will likely break android support and
hopefully will get replaced with a better solution based on gbm2.


Some more comments would be _really_ helpful. What is the purpose of a 
"scanout" object? What does for_prime vs. for_resource mean? What does 
intermediate_rendering mean and what is it good for?


The lifecycle of the renderonly object itself looks wrong to me: 
renderonly_screen_create calls the actual driver's screen_create, but 
the actual driver's screen_create frees the renderonly struct? Please 
make it consistent: Either have a proper wrapper or (better, since this 
is so lightweight) have the driver's screen_create call the 
renderonly_screen_create.


Cheers,
Nicolai


Signed-off-by: Christian Gmeiner 
---
 src/gallium/Automake.inc  |   5 +
 src/gallium/auxiliary/Makefile.am |  10 ++
 src/gallium/auxiliary/Makefile.sources|   4 +
 src/gallium/auxiliary/renderonly/renderonly.c | 199 ++
 src/gallium/auxiliary/renderonly/renderonly.h |  81 +++
 5 files changed, 299 insertions(+)
 create mode 100644 src/gallium/auxiliary/renderonly/renderonly.c
 create mode 100644 src/gallium/auxiliary/renderonly/renderonly.h

diff --git a/src/gallium/Automake.inc b/src/gallium/Automake.inc
index 6fe2e22..6aadcb9 100644
--- a/src/gallium/Automake.inc
+++ b/src/gallium/Automake.inc
@@ -50,6 +50,11 @@ GALLIUM_COMMON_LIB_DEPS = \
$(PTHREAD_LIBS) \
$(DLOPEN_LIBS)

+if HAVE_LIBDRM
+GALLIUM_COMMON_LIB_DEPS += \
+   $(LIBDRM_LIBS)
+endif
+
 GALLIUM_WINSYS_CFLAGS = \
-I$(top_srcdir)/src \
-I$(top_srcdir)/include \
diff --git a/src/gallium/auxiliary/Makefile.am 
b/src/gallium/auxiliary/Makefile.am
index 4a4a4fb..6b63cf1 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -20,6 +20,16 @@ libgallium_la_SOURCES = \
$(NIR_SOURCES) \
$(GENERATED_SOURCES)

+if HAVE_LIBDRM
+
+AM_CFLAGS += \
+   $(LIBDRM_CFLAGS)
+
+libgallium_la_SOURCES += \
+   $(RENDERONLY_SOURCES)
+
+endif
+
 if HAVE_MESA_LLVM

 AM_CFLAGS += \
diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index 5d4fe30..8d3e4a9 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -435,3 +435,7 @@ GALLIVM_SOURCES := \
draw/draw_llvm_sample.c \
draw/draw_pt_fetch_shade_pipeline_llvm.c \
draw/draw_vs_llvm.c
+
+RENDERONLY_SOURCES := \
+   renderonly/renderonly.c \
+   renderonly/renderonly.h
diff --git a/src/gallium/auxiliary/renderonly/renderonly.c 
b/src/gallium/auxiliary/renderonly/renderonly.c
new file mode 100644
index 000..c4ea784
--- /dev/null
+++ b/src/gallium/auxiliary/renderonly/renderonly.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) 2016 Christian Gmeiner 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *Christian Gmeiner 
+ */
+
+#include "renderonly/renderonly.h"
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "state_tracker/drm_driver.h"
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+
+struct pipe_screen *
+renderonly_screen_create(int fd, const struct renderonly_ops *ops, void *priv)
+{
+   struct renderonly *ro;
+
+   ro = CALLOC_STRUCT(renderonly);
+   if (!ro)
+  return NULL;
+
+   ro->kms_fd = fd;
+   ro->ops = ops;
+   ro->priv = priv;
+
+   ro->screen = ops->create(ro);
+   if (!ro->screen)
+  goto cleanup;
+
+   return 

Re: [Mesa-dev] [PATCH 50/58] st/mesa: get Version from gl_program rather than gl_shader_program

2016-12-01 Thread Nicolai Hähnle
Hmm, I wonder what the rules are when different shaders have different 
versions and are linked together? Then again, the use of the 
glsl_version in st_sampler_view.c pretty much admits that it's already a 
hack, so I think this is fine.


Patches 49 & 50:

Reviewed-by: Nicolai Hähnle 

On 20.11.2016 14:29, Timothy Arceri wrote:

---
 src/mesa/state_tracker/st_atom_texture.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_texture.c 
b/src/mesa/state_tracker/st_atom_texture.c
index 5fe042b..58e753d 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -111,9 +111,6 @@ update_textures(struct st_context *st,
GLbitfield free_slots = ~prog->SamplersUsed;
GLbitfield external_samplers_used = prog->ExternalSamplersUsed;
GLuint unit;
-   struct gl_shader_program *shader =
-  st->ctx->_Shader->CurrentProgram[mesa_shader];
-   unsigned glsl_version = shader ? shader->data->Version : 0;
enum pipe_shader_type shader_stage = 
st_shader_stage_to_ptarget(mesa_shader);

if (samplers_used == 0x0 && old_max == 0)
@@ -130,7 +127,7 @@ update_textures(struct st_context *st,
  GLboolean retval;

  retval = update_single_texture(st, _view, texUnit,
-glsl_version);
+prog->sh.data->Version);
  if (retval == GL_FALSE)
 continue;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 48/58] st/mesa: pass gl_program to st_bind_ubos()

2016-12-01 Thread Nicolai Hähnle

Patches 44-48:

Reviewed-by: Nicolai Hähnle 

On 20.11.2016 14:29, Timothy Arceri wrote:

We no longer need anything from gl_linked_shader.
---
 src/mesa/state_tracker/st_atom_constbuf.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_constbuf.c 
b/src/mesa/state_tracker/st_atom_constbuf.c
index 45bb885..ef1a6f2 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -238,21 +238,21 @@ const struct st_tracked_state st_update_cs_constants = {
update_cs_constants /* update */
 };

-static void st_bind_ubos(struct st_context *st,
-   struct gl_linked_shader *shader,
-   unsigned shader_type)
+static void st_bind_ubos(struct st_context *st, struct gl_program *prog,
+ unsigned shader_type)
 {
unsigned i;
struct pipe_constant_buffer cb = { 0 };

-   if (!shader)
+   if (!prog)
   return;

-   for (i = 0; i < shader->Program->info.num_ubos; i++) {
+   for (i = 0; i < prog->info.num_ubos; i++) {
   struct gl_uniform_buffer_binding *binding;
   struct st_buffer_object *st_obj;

-  binding = 
>ctx->UniformBufferBindings[shader->Program->sh.UniformBlocks[i]->Binding];
+  binding =
+ >ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
   st_obj = st_buffer_object(binding->BufferObject);

   cb.buffer = st_obj->buffer;
@@ -284,7 +284,7 @@ static void bind_vs_ubos(struct st_context *st)
if (!prog)
   return;

-   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_VERTEX], 
PIPE_SHADER_VERTEX);
+   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program, 
PIPE_SHADER_VERTEX);
 }

 const struct st_tracked_state st_bind_vs_ubos = {
@@ -299,7 +299,7 @@ static void bind_fs_ubos(struct st_context *st)
if (!prog)
   return;

-   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], 
PIPE_SHADER_FRAGMENT);
+   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program, 
PIPE_SHADER_FRAGMENT);
 }

 const struct st_tracked_state st_bind_fs_ubos = {
@@ -314,7 +314,7 @@ static void bind_gs_ubos(struct st_context *st)
if (!prog)
   return;

-   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], 
PIPE_SHADER_GEOMETRY);
+   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program, 
PIPE_SHADER_GEOMETRY);
 }

 const struct st_tracked_state st_bind_gs_ubos = {
@@ -329,7 +329,7 @@ static void bind_tcs_ubos(struct st_context *st)
if (!prog)
   return;

-   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_TESS_CTRL], 
PIPE_SHADER_TESS_CTRL);
+   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program, 
PIPE_SHADER_TESS_CTRL);
 }

 const struct st_tracked_state st_bind_tcs_ubos = {
@@ -344,7 +344,7 @@ static void bind_tes_ubos(struct st_context *st)
if (!prog)
   return;

-   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_TESS_EVAL], 
PIPE_SHADER_TESS_EVAL);
+   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]->Program, 
PIPE_SHADER_TESS_EVAL);
 }

 const struct st_tracked_state st_bind_tes_ubos = {
@@ -359,7 +359,7 @@ static void bind_cs_ubos(struct st_context *st)
if (!prog)
   return;

-   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
+   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE]->Program,
 PIPE_SHADER_COMPUTE);
 }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: only verify that enabled arrays have backing buffers

2016-12-01 Thread Tapani Pälli



On 12/01/2016 12:59 PM, Ilia Mirkin wrote:

On Thu, Dec 1, 2016 at 5:50 AM, Tapani Pälli  wrote:



On 12/01/2016 12:19 PM, Tapani Pälli wrote:



On 12/01/2016 12:04 AM, Ilia Mirkin wrote:


We were previously also verifying that no backing buffers were available
when an array wasn't enabled. This is has no basis in the spec, and it
causes GLupeN64 to fail as a result.



I'm a bit puzzled about the API usage here, is the app attempting to
render something without a VAO and we are having DefaultVAO in use?



ok realized that can't be the case as there's a separate check for that.


No, they just have some VBO's bound with attribs that got disabled in
the VAO. i.e. a glDisableVertexAttribArray() call which removes the
bit from _Enabled, but not the VertexAttribBufferMask.


ok now I get it, and new test still checks if there is something in 
enabled that is not in buffer mask so this LGTM:


Reviewed-by: Tapani Pälli 







Fixes: c2e146f487 ("mesa: error out in indirect draw when vertex
bindings mismatch")
Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Ilia Mirkin 
Reviewed-by: Timothy Arceri 
---
 src/mesa/main/api_validate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index d3b4cab..071c16d 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -925,7 +925,7 @@ valid_draw_indirect(struct gl_context *ctx,
 * buffer bound.
 */
if (_mesa_is_gles31(ctx) &&
-   ctx->Array.VAO->_Enabled !=
ctx->Array.VAO->VertexAttribBufferMask) {
+   ctx->Array.VAO->_Enabled &
~ctx->Array.VAO->VertexAttribBufferMask) {
   _mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name);
   return GL_FALSE;
}


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: only verify that enabled arrays have backing buffers

2016-12-01 Thread Ilia Mirkin
On Thu, Dec 1, 2016 at 5:50 AM, Tapani Pälli  wrote:
>
>
> On 12/01/2016 12:19 PM, Tapani Pälli wrote:
>>
>>
>> On 12/01/2016 12:04 AM, Ilia Mirkin wrote:
>>>
>>> We were previously also verifying that no backing buffers were available
>>> when an array wasn't enabled. This is has no basis in the spec, and it
>>> causes GLupeN64 to fail as a result.
>>
>>
>> I'm a bit puzzled about the API usage here, is the app attempting to
>> render something without a VAO and we are having DefaultVAO in use?
>
>
> ok realized that can't be the case as there's a separate check for that.

No, they just have some VBO's bound with attribs that got disabled in
the VAO. i.e. a glDisableVertexAttribArray() call which removes the
bit from _Enabled, but not the VertexAttribBufferMask.

>
>>
>>> Fixes: c2e146f487 ("mesa: error out in indirect draw when vertex
>>> bindings mismatch")
>>> Cc: mesa-sta...@lists.freedesktop.org
>>> Signed-off-by: Ilia Mirkin 
>>> Reviewed-by: Timothy Arceri 
>>> ---
>>>  src/mesa/main/api_validate.c | 2 +-
>>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
>>> index d3b4cab..071c16d 100644
>>> --- a/src/mesa/main/api_validate.c
>>> +++ b/src/mesa/main/api_validate.c
>>> @@ -925,7 +925,7 @@ valid_draw_indirect(struct gl_context *ctx,
>>>  * buffer bound.
>>>  */
>>> if (_mesa_is_gles31(ctx) &&
>>> -   ctx->Array.VAO->_Enabled !=
>>> ctx->Array.VAO->VertexAttribBufferMask) {
>>> +   ctx->Array.VAO->_Enabled &
>>> ~ctx->Array.VAO->VertexAttribBufferMask) {
>>>_mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name);
>>>return GL_FALSE;
>>> }
>>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: only verify that enabled arrays have backing buffers

2016-12-01 Thread Tapani Pälli



On 12/01/2016 12:19 PM, Tapani Pälli wrote:


On 12/01/2016 12:04 AM, Ilia Mirkin wrote:

We were previously also verifying that no backing buffers were available
when an array wasn't enabled. This is has no basis in the spec, and it
causes GLupeN64 to fail as a result.


I'm a bit puzzled about the API usage here, is the app attempting to
render something without a VAO and we are having DefaultVAO in use?


ok realized that can't be the case as there's a separate check for that.




Fixes: c2e146f487 ("mesa: error out in indirect draw when vertex
bindings mismatch")
Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Ilia Mirkin 
Reviewed-by: Timothy Arceri 
---
 src/mesa/main/api_validate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index d3b4cab..071c16d 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -925,7 +925,7 @@ valid_draw_indirect(struct gl_context *ctx,
 * buffer bound.
 */
if (_mesa_is_gles31(ctx) &&
-   ctx->Array.VAO->_Enabled !=
ctx->Array.VAO->VertexAttribBufferMask) {
+   ctx->Array.VAO->_Enabled &
~ctx->Array.VAO->VertexAttribBufferMask) {
   _mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name);
   return GL_FALSE;
}


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: only verify that enabled arrays have backing buffers

2016-12-01 Thread Tapani Pälli


On 12/01/2016 12:04 AM, Ilia Mirkin wrote:

We were previously also verifying that no backing buffers were available
when an array wasn't enabled. This is has no basis in the spec, and it
causes GLupeN64 to fail as a result.


I'm a bit puzzled about the API usage here, is the app attempting to 
render something without a VAO and we are having DefaultVAO in use?




Fixes: c2e146f487 ("mesa: error out in indirect draw when vertex bindings 
mismatch")
Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Ilia Mirkin 
Reviewed-by: Timothy Arceri 
---
 src/mesa/main/api_validate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index d3b4cab..071c16d 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -925,7 +925,7 @@ valid_draw_indirect(struct gl_context *ctx,
 * buffer bound.
 */
if (_mesa_is_gles31(ctx) &&
-   ctx->Array.VAO->_Enabled != ctx->Array.VAO->VertexAttribBufferMask) {
+   ctx->Array.VAO->_Enabled & ~ctx->Array.VAO->VertexAttribBufferMask) {
   _mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name);
   return GL_FALSE;
}


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Mesa 13.1.0 release plan

2016-12-01 Thread Christian Gmeiner
2016-11-30 21:23 GMT+01:00 Emil Velikov :
> Hi all,
>
> With holidays not far off, it might be a nice idea to consider the
> branchpoint/release schedule for the next release.
>
> I will be having limited internet access during 20 Dec - 7 Jan, thus
> the I'm leaning towards following:
>  Jan 13 2017 - Feature freeze/Release candidate 1
>  Jan 20 2017 - Release candidate 2
>  Jan 27 2017 - Release candidate 3
>  Feb 03 2017 - Release candidate 4/final release
>
> How does this align with people's schedules ?
>
> Please let me know if you have any work we want to land before the
> next branchpoint.
>

I am interested in landing etnaviv.

thanks
--
Christian Gmeiner, MSc

https://soundcloud.com/christian-gmeiner
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Add i965 plumbing for ARB_post_depth_coverage for i965 (gen9+).

2016-12-01 Thread Lionel Landwerlin

On 01/12/16 04:02, Chris Forbes wrote:

A couple of notes on existing weirdness here:
- Naming of GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT is bizarre (not 
your fault)
- Is BRW_PSICMS_INNER really the right thing for the normal mode? Why 
not BRW_PSICMS_NORMAL? Perhaps whoever added this stuff can shed some 
light here?


Hi,

I've noticed this as well and I have another change to use 
BRW_PSICMS_NORMAL.
My understanding is that this has NORMAL or INNER_CONSERVATIVE produce 
the same result unless you set the 
RASTER_CONSERVATIVE_RASTERIZATION_ENABLE bit in the 3DSTATE_RASTER 
instruction.


Cheers,

-
Lionel



Actual change here looks good, so:

Reviewed-by: Chris Forbes >



On Thu, Dec 1, 2016 at 9:00 AM, Plamena Manolova 
> wrote:


This extension allows the fragment shader to control whether values in
gl_SampleMaskIn[] reflect the coverage after application of the early
depth and stencil tests.

Signed-off-by: Plamena Manolova >
---
 docs/relnotes/13.1.0.html|  1 +
 src/mesa/drivers/dri/i965/brw_compiler.h|  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp |  1 +
 src/mesa/drivers/dri/i965/gen8_ps_state.c| 13 ++---
 src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
 5 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/docs/relnotes/13.1.0.html b/docs/relnotes/13.1.0.html
index 4f76cc2..a160cda 100644
--- a/docs/relnotes/13.1.0.html
+++ b/docs/relnotes/13.1.0.html
@@ -45,6 +45,7 @@ Note: some of the new features are only
available with certain drivers.

 
 GL_NV_image_formats on any driver supporting
GL_ARB_shader_image_load_store (i965, nvc0, radeonsi, softpipe)
+GL_ARB_post_depth_coverage on i965/gen9+
 

 Bug fixes
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h
b/src/mesa/drivers/dri/i965/brw_compiler.h
index 65a7478..410641f 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -397,6 +397,7 @@ struct brw_wm_prog_data {
bool computed_stencil;

bool early_fragment_tests;
+   bool post_depth_coverage;
bool dispatch_8;
bool dispatch_16;
bool dual_src_blend;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c218f56..ce0c07e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6454,6 +6454,7 @@ brw_compile_fs(const struct brw_compiler
*compiler, void *log_data,
shader->info->outputs_read);

prog_data->early_fragment_tests =
shader->info->fs.early_fragment_tests;
+   prog_data->post_depth_coverage =
shader->info->fs.post_depth_coverage;

prog_data->barycentric_interp_modes =
   brw_compute_barycentric_interp_modes(compiler->devinfo,
shader);
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c
b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index a4eb962..33ef023 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -53,10 +53,17 @@ gen8_upload_ps_extra(struct brw_context *brw,
   dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;

if (prog_data->uses_sample_mask) {
-  if (brw->gen >= 9)
- dw1 |= BRW_PSICMS_INNER <<
GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
-  else
+  if (brw->gen >= 9) {
+ if (prog_data->post_depth_coverage) {
+dw1 |= BRW_PCICMS_DEPTH <<
GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
+ }
+ else {
+dw1 |= BRW_PSICMS_INNER <<
GEN9_PSX_SHADER_NORMAL_COVERAGE_MASK_SHIFT;
+ }
+  }
+  else {
  dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
+  }
}

if (prog_data->uses_omask)
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 66079b5..19f4684 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -415,6 +415,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.KHR_texture_compression_astc_ldr = true;
   ctx->Extensions.KHR_texture_compression_astc_sliced_3d = true;
   ctx->Extensions.MESA_shader_framebuffer_fetch = true;
+  ctx->Extensions.ARB_post_depth_coverage = true;
}

if (ctx->API == API_OPENGL_CORE)
--
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org 

Re: [Mesa-dev] Mesa 13.1.0 release plan

2016-12-01 Thread Nicolai Hähnle

On 30.11.2016 21:23, Emil Velikov wrote:

Hi all,

With holidays not far off, it might be a nice idea to consider the
branchpoint/release schedule for the next release.


+1 on the 17.0 question.



I will be having limited internet access during 20 Dec - 7 Jan, thus
the I'm leaning towards following:
 Jan 13 2017 - Feature freeze/Release candidate 1
 Jan 20 2017 - Release candidate 2
 Jan 27 2017 - Release candidate 3
 Feb 03 2017 - Release candidate 4/final release

How does this align with people's schedules ?

Please let me know if you have any work we want to land before the
next branchpoint.


I was hoping to get GLCTS failures for radeonsi down to 0. We're 
currently at 18 (including patches not on master and some pending LLVM 
changes). For some of the failures this may need spec clarification 
feedback, which tends to be not the fastest process in the world (e.g. 
I'm thinking of the program_interface_query stuff). Apart from those, 
which are a big unknown, the schedule is probably doable.


Cheers,
Nicolai


Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: reset linked_stages bitmask when re-linking

2016-12-01 Thread Nicolai Hähnle

On 30.11.2016 22:58, Timothy Arceri wrote:

34953f8907fdd added this bitmask but it wasn't being reset when
a program was relinked. If a stage was removed from the new
program then it could case a crash as we expect the linked shader
for that stage to not be null.

Fixes crashes in:
ESEXT-CTS.tessellation_shader.single.xfb_captures_data_from_correct_stage
ES31-CTS.core.tessellation_shader.single.xfb_captures_data_from_correct_stage

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98917


Reviewed-by: Nicolai Hähnle 


---
 src/mesa/main/shaderobj.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index 998656a..6fcdf44 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -348,6 +348,8 @@ _mesa_clear_shader_program_data(struct gl_context *ctx,
   }
}

+   shProg->data->linked_stages = 0;
+
if (shProg->data->UniformStorage) {
   for (unsigned i = 0; i < shProg->data->NumUniformStorage; ++i)
  _mesa_uniform_detach_all_driver_storage(>data->


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: Get rid of nir_constant_data

2016-12-01 Thread Iago Toral
On Tue, 2016-11-29 at 22:51 -0800, Jason Ekstrand wrote:
> This has bothered me for about as long as NIR has been around.  Why
> do we
> have two different unions for constants?  No good reason other than
> one of
> them is a direct port from GLSL IR.
> ---
>  src/compiler/glsl/glsl_to_nir.cpp  | 35 
>  src/compiler/nir/nir.c | 36 +++--
>  src/compiler/nir/nir.h | 30 +++--
>  src/compiler/nir/nir_clone.c   |  2 +-
>  src/compiler/nir/nir_print.c   | 29 
>  src/compiler/spirv/spirv_to_nir.c  | 55 --
> 
>  src/compiler/spirv/vtn_variables.c |  8 +++---
>  7 files changed, 96 insertions(+), 99 deletions(-)
(...)
> @@ -838,24 +838,20 @@ nir_deref_get_const_initializer_load(nir_shader
> *shader, nir_deref_var *deref)
>    nir_load_const_instr_create(shader,
> glsl_get_vector_elements(tail->type),
>    bit_size);
>  
> -   matrix_offset *= load->def.num_components;
> -   for (unsigned i = 0; i < load->def.num_components; i++) {
> -  switch (glsl_get_base_type(tail->type)) {
> -  case GLSL_TYPE_FLOAT:
> -  case GLSL_TYPE_INT:
> -  case GLSL_TYPE_UINT:
> - load->value.u32[i] = constant->value.u[matrix_offset + i];
> - break;
> -  case GLSL_TYPE_DOUBLE:
> - load->value.f64[i] = constant->value.d[matrix_offset + i];
> - break;
> -  case GLSL_TYPE_BOOL:
> - load->value.u32[i] = constant->value.b[matrix_offset + i] ?
> - NIR_TRUE : NIR_FALSE;
> - break;
> -  default:
> - unreachable("Invalid immediate type");
> -  }
> +   switch (glsl_get_base_type(tail->type)) {
> +   case GLSL_TYPE_FLOAT:
> +   case GLSL_TYPE_INT:
> +   case GLSL_TYPE_UINT:
> +  load->value = constant->values[matrix_col];
> +  break;
> +   case GLSL_TYPE_DOUBLE:
> +  load->value = constant->values[matrix_col];
> +  break;
> +   case GLSL_TYPE_BOOL:
> +  load->value = constant->values[matrix_col];
> +  break;

You can merge the double and bool cases in with the rest, it is the
same code now.

> +   default:
> +  unreachable("Invalid immediate type");
> }
>  
(...)
>  
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index 34968a4..f41df32 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
(...)
>  
>   uint32_t u[8];
>   for (unsigned i = 0; i < len0; i++)
> -u[i] = v0->constant->value.u[i];
> +u[i] = v0->constant->values[0].u32[i];
>   for (unsigned i = 0; i < len1; i++)
> -u[len0 + i] = v1->constant->value.u[i];
> +u[len0 + i] = v1->constant->values[0].u32[i];
>  
>   for (unsigned i = 0; i < count - 6; i++) {
>  uint32_t comp = w[i + 6];
>  if (comp == (uint32_t)-1) {
> -   val->constant->value.u[i] = 0xdeadbeef;
> +   val->constant->values[0].u32[i] = 0xdeadbeef;
>  } else {
> -   val->constant->value.u[i] = u[comp];
> +   val->constant->values[0].u32[i] = u[comp];
>  }
>   }
>   break;
> @@ -1137,7 +1133,7 @@ vtn_handle_constant(struct vtn_builder *b,
> SpvOp opcode,
>  } else {
> unsigned num_components =
> glsl_get_vector_elements(type);
> for (unsigned i = 0; i < num_components; i++)
> -  val->constant->value.u[i] = (*c)->value.u[elem +
> i];
> +  val->constant->values[0].u32[i] = (*c)-
> >values[0].u32[elem + i];
>  }
>   } else {
>  struct vtn_value *insert =
> @@ -1148,7 +1144,7 @@ vtn_handle_constant(struct vtn_builder *b,
> SpvOp opcode,
>  } else {
> unsigned num_components =
> glsl_get_vector_elements(type);
> for (unsigned i = 0; i < num_components; i++)
> -  (*c)->value.u[elem + i] = insert->constant-
> >value.u[i];
> +  (*c)->values[0].u32[elem + i] = insert->constant-
> >values[0].u32[i];

Is this correct for matrix types? We do:

elem += w[i] * glsl_get_vector_elements(type);

in a loop above, so I guess elem can be > 4 and we would end up
indexing out of bounds  here. Don't we need to use elem to index into
'values' instead and maybe tweak the code above to have elem track the
number of columns instead of individual components we need to offset?

Iago

>  }
>   }
>   break;
> @@ -1170,16 +1166,11 @@ vtn_handle_constant(struct vtn_builder *b,
> SpvOp opcode,
>  
>  unsigned j = swap ? 1 - i : i;
>  assert(bit_size == 32);
> -for (unsigned k = 0; k < num_components; k++)
> -   src[j].u32[k] = c->value.u[k];
> +src[j] = c->values[0];
>   }
>  
> -