Re: [Mesa-dev] [PATCH 19/22] anv/nir: add support for dvec3/4 consuming two locations

2016-12-05 Thread Juan A. Suarez Romero
On Thu, 2016-12-01 at 22:22 -0800, Jason Ekstrand wrote:
> +Ken
> 
> On Thu, Dec 1, 2016 at 10:17 PM, Jason Ekstrand  > wrote:
> > I'm not sure how I feel about this one.  It seems like it would
> > almost be easier to just pick one convention or the other for NIR
> > and adjust one of the drivers accordingly.  I don't know that I
> > have a huge preference which convention we choose.  I guess the
> > Vulkan convention matches our hardware a bit better.  In either
> > case, converting from one to the other should be a simple matter of
> > building a remap table or a creative use of popcount.
> > 

As you said in another email, I also think it would be more easy to
just mark 2 bits per dvec3/dvec4 in inputs_read and change the GL
driver accordingly, than the other way around, as it is done in TGSI.

> > On Fri, Nov 25, 2016 at 12:52 AM, Juan A. Suarez Romero  > galia.com> wrote:
> > > One difference between OpenGL and Vulkan regarding 64-bit vertex
> > > 
> > > attribute types is that dvec3 and dvec4 consumes just one
> > > location in
> > > 
> > > OpenGL, while in Vulkan it consumes two locations.
> > > 
> > > 
> > > 
> > > Thus, in OpenGL for each dvec3/dvec4 vertex attrib we mark just
> > > one bit
> > > 
> > > in our internal inputs_read bitmap (and also the corresponding
> > > bit in
> > > 
> > > double_inputs_read bitmap) while in Vulkan we mark two
> > > consecutive bits
> > > 
> > > in both bitmaps.
> > > 
> > > 
> > > 
> > > This is handled with a nir option called
> > > "dvec3_consumes_two_locations",
> > > 
> > > which is set to True for Vulkan code. And all the computation
> > > regarding
> > > 
> > > emitting vertices as well as the mapping between attributes and
> > > physical
> > > 
> > > registers use this option to correctly do the work.
> > > 
> > > ---
> > > 
> > >  src/amd/vulkan/radv_pipeline.c               |  1 +
> > > 
> > >  src/compiler/nir/nir.h                       |  5 +++
> > > 
> > >  src/compiler/nir/nir_gather_info.c           |  6 +--
> > > 
> > >  src/gallium/drivers/freedreno/ir3/ir3_nir.c  |  1 +
> > > 
> > >  src/intel/vulkan/anv_device.c                |  2 +-
> > > 
> > >  src/intel/vulkan/genX_pipeline.c             | 62
> > > +---
> > > 
> > >  src/mesa/drivers/dri/i965/brw_compiler.c     | 23 ++-
> > > 
> > >  src/mesa/drivers/dri/i965/brw_compiler.h     |  2 +-
> > > 
> > >  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 14 +--
> > > 
> > >  src/mesa/drivers/dri/i965/brw_nir.c          | 18 +---
> > > 
> > >  src/mesa/drivers/dri/i965/brw_vec4.cpp       | 13 --
> > > 
> > >  src/mesa/drivers/dri/i965/intel_screen.c     |  3 +-
> > > 
> > >  12 files changed, 105 insertions(+), 45 deletions(-)
> > > 
> > > 
> > > 
> > > diff --git a/src/amd/vulkan/radv_pipeline.c
> > > b/src/amd/vulkan/radv_pipeline.c
> > > 
> > > index ee5d812..90d4650 100644
> > > 
> > > --- a/src/amd/vulkan/radv_pipeline.c
> > > 
> > > +++ b/src/amd/vulkan/radv_pipeline.c
> > > 
> > > @@ -59,6 +59,7 @@ static const struct nir_shader_compiler_options
> > > nir_options = {
> > > 
> > >         .lower_unpack_unorm_4x8 = true,
> > > 
> > >         .lower_extract_byte = true,
> > > 
> > >         .lower_extract_word = true,
> > > 
> > > +       .dvec3_consumes_two_locations = true,
> > > 
> > >  };
> > > 
> > > 
> > > 
> > >  VkResult radv_CreateShaderModule(
> > > 
> > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > > 
> > > index 1679d89..0fc8f39 100644
> > > 
> > > --- a/src/compiler/nir/nir.h
> > > 
> > > +++ b/src/compiler/nir/nir.h
> > > 
> > > @@ -1794,6 +1794,11 @@ typedef struct nir_shader_compiler_options
> > > {
> > > 
> > >      * information must be inferred from the list of input
> > > nir_variables.
> > > 
> > >      */
> > > 
> > >     bool use_interpolated_input_intrinsics;
> > > 
> > > +
> > > 
> > > +   /**
> > > 
> > > +    * In Vulkan, a dvec3/dvec4 consumes two locations instead
> > > just one.
> > > 
> > > +    */
> > > 
> > > +   bool dvec3_consumes_two_locations;
> > > 
> > >  } nir_shader_compiler_options;
> > > 
> > > 
> > > 
> > >  typedef struct nir_shader {
> > > 
> > > diff --git a/src/compiler/nir/nir_gather_info.c
> > > b/src/compiler/nir/nir_gather_info.c
> > > 
> > > index 07c9949..8c80671 100644
> > > 
> > > --- a/src/compiler/nir/nir_gather_info.c
> > > 
> > > +++ b/src/compiler/nir/nir_gather_info.c
> > > 
> > > @@ -96,7 +96,7 @@ mark_whole_variable(nir_shader *shader,
> > > nir_variable *var)
> > > 
> > > 
> > > 
> > >     const unsigned slots =
> > > 
> > >        var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
> > > 
> > > -                        : glsl_count_attribute_slots(type,
> > > is_vertex_input);
> > > 
> > > +                        : glsl_count_attribute_slots(type,
> > > is_vertex_input && !shader->options-
> > > >dvec3_consumes_two_locations);
> > 
> > This makes no sense, why are we passing is_vertex_input &&
> > 

Re: [Mesa-dev] [PATCH 19/22] anv/nir: add support for dvec3/4 consuming two locations

2016-12-02 Thread Jason Ekstrand
On Thu, Dec 1, 2016 at 10:22 PM, Jason Ekstrand 
wrote:

> +Ken
>
> On Thu, Dec 1, 2016 at 10:17 PM, Jason Ekstrand 
> wrote:
>
>> I'm not sure how I feel about this one.  It seems like it would almost be
>> easier to just pick one convention or the other for NIR and adjust one of
>> the drivers accordingly.  I don't know that I have a huge preference which
>> convention we choose.  I guess the Vulkan convention matches our hardware a
>> bit better.  In either case, converting from one to the other should be a
>> simple matter of building a remap table or a creative use of popcount.
>>
>
As another data point, TGSI uses 2 slots for dvec3/4 inputs.  I think the
simplest and most consistent thing to do is make NIR use 2 slots and just
change the GL driver to work that way.  That way we can keep GL's oddity as
contained as possible.


> On Fri, Nov 25, 2016 at 12:52 AM, Juan A. Suarez Romero <
>> jasua...@igalia.com> wrote:
>>
>>> One difference between OpenGL and Vulkan regarding 64-bit vertex
>>> attribute types is that dvec3 and dvec4 consumes just one location in
>>> OpenGL, while in Vulkan it consumes two locations.
>>>
>>> Thus, in OpenGL for each dvec3/dvec4 vertex attrib we mark just one bit
>>> in our internal inputs_read bitmap (and also the corresponding bit in
>>> double_inputs_read bitmap) while in Vulkan we mark two consecutive bits
>>> in both bitmaps.
>>>
>>> This is handled with a nir option called "dvec3_consumes_two_locations",
>>> which is set to True for Vulkan code. And all the computation regarding
>>> emitting vertices as well as the mapping between attributes and physical
>>> registers use this option to correctly do the work.
>>> ---
>>>  src/amd/vulkan/radv_pipeline.c   |  1 +
>>>  src/compiler/nir/nir.h   |  5 +++
>>>  src/compiler/nir/nir_gather_info.c   |  6 +--
>>>  src/gallium/drivers/freedreno/ir3/ir3_nir.c  |  1 +
>>>  src/intel/vulkan/anv_device.c|  2 +-
>>>  src/intel/vulkan/genX_pipeline.c | 62
>>> +---
>>>  src/mesa/drivers/dri/i965/brw_compiler.c | 23 ++-
>>>  src/mesa/drivers/dri/i965/brw_compiler.h |  2 +-
>>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 14 +--
>>>  src/mesa/drivers/dri/i965/brw_nir.c  | 18 +---
>>>  src/mesa/drivers/dri/i965/brw_vec4.cpp   | 13 --
>>>  src/mesa/drivers/dri/i965/intel_screen.c |  3 +-
>>>  12 files changed, 105 insertions(+), 45 deletions(-)
>>>
>>> diff --git a/src/amd/vulkan/radv_pipeline.c
>>> b/src/amd/vulkan/radv_pipeline.c
>>> index ee5d812..90d4650 100644
>>> --- a/src/amd/vulkan/radv_pipeline.c
>>> +++ b/src/amd/vulkan/radv_pipeline.c
>>> @@ -59,6 +59,7 @@ static const struct nir_shader_compiler_options
>>> nir_options = {
>>> .lower_unpack_unorm_4x8 = true,
>>> .lower_extract_byte = true,
>>> .lower_extract_word = true,
>>> +   .dvec3_consumes_two_locations = true,
>>>  };
>>>
>>>  VkResult radv_CreateShaderModule(
>>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>>> index 1679d89..0fc8f39 100644
>>> --- a/src/compiler/nir/nir.h
>>> +++ b/src/compiler/nir/nir.h
>>> @@ -1794,6 +1794,11 @@ typedef struct nir_shader_compiler_options {
>>>  * information must be inferred from the list of input nir_variables.
>>>  */
>>> bool use_interpolated_input_intrinsics;
>>> +
>>> +   /**
>>> +* In Vulkan, a dvec3/dvec4 consumes two locations instead just one.
>>> +*/
>>> +   bool dvec3_consumes_two_locations;
>>>  } nir_shader_compiler_options;
>>>
>>>  typedef struct nir_shader {
>>> diff --git a/src/compiler/nir/nir_gather_info.c
>>> b/src/compiler/nir/nir_gather_info.c
>>> index 07c9949..8c80671 100644
>>> --- a/src/compiler/nir/nir_gather_info.c
>>> +++ b/src/compiler/nir/nir_gather_info.c
>>> @@ -96,7 +96,7 @@ mark_whole_variable(nir_shader *shader, nir_variable
>>> *var)
>>>
>>> const unsigned slots =
>>>var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
>>> -: glsl_count_attribute_slots(type,
>>> is_vertex_input);
>>> +: glsl_count_attribute_slots(type,
>>> is_vertex_input && !shader->options->dvec3_consumes_two_locations);
>>>
>>
>> This makes no sense, why are we passing is_vertex_input &&
>> !dvec3_consumes_two_locations to an argument labled is_vertex_input?
>>
>>
>>>
>>> set_io_mask(shader, var, 0, slots);
>>>  }
>>> @@ -168,7 +168,7 @@ try_mask_partial_io(nir_shader *shader,
>>> nir_deref_var *deref)
>>> var->data.mode == nir_var_shader_in)
>>>is_vertex_input = true;
>>>
>>> -   unsigned offset = get_io_offset(deref, is_vertex_input);
>>> +   unsigned offset = get_io_offset(deref, is_vertex_input &&
>>> !shader->options->dvec3_consumes_two_locations);
>>>
>>
>> Same here
>>
>>
>>> if (offset == -1)
>>>return false;
>>>
>>> @@ -184,7 +184,7 @@ try_mask_partial_io(nir_shader *shader,
>>> 

Re: [Mesa-dev] [PATCH 19/22] anv/nir: add support for dvec3/4 consuming two locations

2016-12-01 Thread Jason Ekstrand
I'm not sure how I feel about this one.  It seems like it would almost be
easier to just pick one convention or the other for NIR and adjust one of
the drivers accordingly.  I don't know that I have a huge preference which
convention we choose.  I guess the Vulkan convention matches our hardware a
bit better.  In either case, converting from one to the other should be a
simple matter of building a remap table or a creative use of popcount.

On Fri, Nov 25, 2016 at 12:52 AM, Juan A. Suarez Romero  wrote:

> One difference between OpenGL and Vulkan regarding 64-bit vertex
> attribute types is that dvec3 and dvec4 consumes just one location in
> OpenGL, while in Vulkan it consumes two locations.
>
> Thus, in OpenGL for each dvec3/dvec4 vertex attrib we mark just one bit
> in our internal inputs_read bitmap (and also the corresponding bit in
> double_inputs_read bitmap) while in Vulkan we mark two consecutive bits
> in both bitmaps.
>
> This is handled with a nir option called "dvec3_consumes_two_locations",
> which is set to True for Vulkan code. And all the computation regarding
> emitting vertices as well as the mapping between attributes and physical
> registers use this option to correctly do the work.
> ---
>  src/amd/vulkan/radv_pipeline.c   |  1 +
>  src/compiler/nir/nir.h   |  5 +++
>  src/compiler/nir/nir_gather_info.c   |  6 +--
>  src/gallium/drivers/freedreno/ir3/ir3_nir.c  |  1 +
>  src/intel/vulkan/anv_device.c|  2 +-
>  src/intel/vulkan/genX_pipeline.c | 62
> +---
>  src/mesa/drivers/dri/i965/brw_compiler.c | 23 ++-
>  src/mesa/drivers/dri/i965/brw_compiler.h |  2 +-
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 14 +--
>  src/mesa/drivers/dri/i965/brw_nir.c  | 18 +---
>  src/mesa/drivers/dri/i965/brw_vec4.cpp   | 13 --
>  src/mesa/drivers/dri/i965/intel_screen.c |  3 +-
>  12 files changed, 105 insertions(+), 45 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_
> pipeline.c
> index ee5d812..90d4650 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -59,6 +59,7 @@ static const struct nir_shader_compiler_options
> nir_options = {
> .lower_unpack_unorm_4x8 = true,
> .lower_extract_byte = true,
> .lower_extract_word = true,
> +   .dvec3_consumes_two_locations = true,
>  };
>
>  VkResult radv_CreateShaderModule(
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 1679d89..0fc8f39 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -1794,6 +1794,11 @@ typedef struct nir_shader_compiler_options {
>  * information must be inferred from the list of input nir_variables.
>  */
> bool use_interpolated_input_intrinsics;
> +
> +   /**
> +* In Vulkan, a dvec3/dvec4 consumes two locations instead just one.
> +*/
> +   bool dvec3_consumes_two_locations;
>  } nir_shader_compiler_options;
>
>  typedef struct nir_shader {
> diff --git a/src/compiler/nir/nir_gather_info.c
> b/src/compiler/nir/nir_gather_info.c
> index 07c9949..8c80671 100644
> --- a/src/compiler/nir/nir_gather_info.c
> +++ b/src/compiler/nir/nir_gather_info.c
> @@ -96,7 +96,7 @@ mark_whole_variable(nir_shader *shader, nir_variable
> *var)
>
> const unsigned slots =
>var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
> -: glsl_count_attribute_slots(type,
> is_vertex_input);
> +: glsl_count_attribute_slots(type,
> is_vertex_input && !shader->options->dvec3_consumes_two_locations);
>

This makes no sense, why are we passing is_vertex_input &&
!dvec3_consumes_two_locations to an argument labled is_vertex_input?


>
> set_io_mask(shader, var, 0, slots);
>  }
> @@ -168,7 +168,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var
> *deref)
> var->data.mode == nir_var_shader_in)
>is_vertex_input = true;
>
> -   unsigned offset = get_io_offset(deref, is_vertex_input);
> +   unsigned offset = get_io_offset(deref, is_vertex_input &&
> !shader->options->dvec3_consumes_two_locations);
>

Same here


> if (offset == -1)
>return false;
>
> @@ -184,7 +184,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var
> *deref)
> }
>
> /* double element width for double types that takes two slots */
> -   if (!is_vertex_input &&
> +   if ((!is_vertex_input || shader->options->dvec3_consumes_two_locations)
> &&
> glsl_type_is_dual_slot(glsl_without_array(type))) {
>

This makes a bit more sense but I'm still not quite seeing it.


>elem_width *= 2;
> }
> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> index 2d86a52..5c5c9ad 100644
> --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> @@ -50,6 +50,7 @@ static 

[Mesa-dev] [PATCH 19/22] anv/nir: add support for dvec3/4 consuming two locations

2016-11-25 Thread Juan A. Suarez Romero
One difference between OpenGL and Vulkan regarding 64-bit vertex
attribute types is that dvec3 and dvec4 consumes just one location in
OpenGL, while in Vulkan it consumes two locations.

Thus, in OpenGL for each dvec3/dvec4 vertex attrib we mark just one bit
in our internal inputs_read bitmap (and also the corresponding bit in
double_inputs_read bitmap) while in Vulkan we mark two consecutive bits
in both bitmaps.

This is handled with a nir option called "dvec3_consumes_two_locations",
which is set to True for Vulkan code. And all the computation regarding
emitting vertices as well as the mapping between attributes and physical
registers use this option to correctly do the work.
---
 src/amd/vulkan/radv_pipeline.c   |  1 +
 src/compiler/nir/nir.h   |  5 +++
 src/compiler/nir/nir_gather_info.c   |  6 +--
 src/gallium/drivers/freedreno/ir3/ir3_nir.c  |  1 +
 src/intel/vulkan/anv_device.c|  2 +-
 src/intel/vulkan/genX_pipeline.c | 62 +---
 src/mesa/drivers/dri/i965/brw_compiler.c | 23 ++-
 src/mesa/drivers/dri/i965/brw_compiler.h |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 14 +--
 src/mesa/drivers/dri/i965/brw_nir.c  | 18 +---
 src/mesa/drivers/dri/i965/brw_vec4.cpp   | 13 --
 src/mesa/drivers/dri/i965/intel_screen.c |  3 +-
 12 files changed, 105 insertions(+), 45 deletions(-)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index ee5d812..90d4650 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -59,6 +59,7 @@ static const struct nir_shader_compiler_options nir_options = 
{
.lower_unpack_unorm_4x8 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+   .dvec3_consumes_two_locations = true,
 };
 
 VkResult radv_CreateShaderModule(
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 1679d89..0fc8f39 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1794,6 +1794,11 @@ typedef struct nir_shader_compiler_options {
 * information must be inferred from the list of input nir_variables.
 */
bool use_interpolated_input_intrinsics;
+
+   /**
+* In Vulkan, a dvec3/dvec4 consumes two locations instead just one.
+*/
+   bool dvec3_consumes_two_locations;
 } nir_shader_compiler_options;
 
 typedef struct nir_shader {
diff --git a/src/compiler/nir/nir_gather_info.c 
b/src/compiler/nir/nir_gather_info.c
index 07c9949..8c80671 100644
--- a/src/compiler/nir/nir_gather_info.c
+++ b/src/compiler/nir/nir_gather_info.c
@@ -96,7 +96,7 @@ mark_whole_variable(nir_shader *shader, nir_variable *var)
 
const unsigned slots =
   var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
-: glsl_count_attribute_slots(type, is_vertex_input);
+: glsl_count_attribute_slots(type, is_vertex_input && 
!shader->options->dvec3_consumes_two_locations);
 
set_io_mask(shader, var, 0, slots);
 }
@@ -168,7 +168,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var 
*deref)
var->data.mode == nir_var_shader_in)
   is_vertex_input = true;
 
-   unsigned offset = get_io_offset(deref, is_vertex_input);
+   unsigned offset = get_io_offset(deref, is_vertex_input && 
!shader->options->dvec3_consumes_two_locations);
if (offset == -1)
   return false;
 
@@ -184,7 +184,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var 
*deref)
}
 
/* double element width for double types that takes two slots */
-   if (!is_vertex_input &&
+   if ((!is_vertex_input || shader->options->dvec3_consumes_two_locations) &&
glsl_type_is_dual_slot(glsl_without_array(type))) {
   elem_width *= 2;
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c 
b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
index 2d86a52..5c5c9ad 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
@@ -50,6 +50,7 @@ static const nir_shader_compiler_options options = {
.vertex_id_zero_based = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+   .dvec3_consumes_two_locations = false,
 };
 
 struct nir_shader *
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 2c8ac49..725848f 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -167,7 +167,7 @@ anv_physical_device_init(struct anv_physical_device *device,
 
brw_process_intel_debug_variable();
 
-   device->compiler = brw_compiler_create(NULL, >info);
+   device->compiler = brw_compiler_create(NULL, >info, true);
if (device->compiler == NULL) {
   result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
   goto fail;
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index cb164ad..97c40b8 100644
---