Re: [Mesa-dev] [PATCH 2/4] gallium: add initial support for conservative rasterization

2018-03-23 Thread Marek Olšák
On Fri, Mar 23, 2018 at 8:54 PM, Ilia Mirkin  wrote:

> On Fri, Mar 23, 2018 at 8:51 PM, Marek Olšák  wrote:
> > diff --git a/src/gallium/include/pipe/p_state.h
> > b/src/gallium/include/pipe/p_state.h
> >>
> >> index 4dce399f84..913a79faee 100644
> >> --- a/src/gallium/include/pipe/p_state.h
> >> +++ b/src/gallium/include/pipe/p_state.h
> >> @@ -113,6 +113,7 @@ struct pipe_rasterizer_state
> >> unsigned line_smooth:1;
> >> unsigned line_stipple_enable:1;
> >> unsigned line_last_pixel:1;
> >> +   unsigned conservative_raster_mode:2; /**< PIPE_CONSERVATIVE_RASTER_x
> >> */
> >>
> >> /**
> >>  * Use the first vertex of a primitive as the provoking vertex for
> >> @@ -186,6 +187,7 @@ struct pipe_rasterizer_state
> >> float offset_units;
> >> float offset_scale;
> >> float offset_clamp;
> >> +   float conservative_raster_dilate;
> >>  };
> >>
> >>
> >> @@ -199,6 +201,10 @@ struct pipe_viewport_state
> >>  {
> >> float scale[3];
> >> float translate[3];
> >> +   /*
> >> +* Conservative rasterization subpixel precision bias in bits
> >> +*/
> >> +   uint16_t subpixel_precision[2];
> >
> >
> > Actually, there is one thing that I disagree with. pipe_viewport_state is
> > per-viewport and there are up to 16 viewports, but the extensions don't
> > allow per-viewport settings of subpixel_precision. The obvious thing to
> do
> > is to add a new pipe_context function that only sets subpixel_precision,
> but
> > how about this:
> >
> > Would it be possible to move all the new states into a new CSO:
> > pipe_conservative_raster_state?
>
> FWIW the hardware does support this per viewport. But you're right -
> that level of detail is not exposed in GL. I think gallium tends to be
> closer to the hw, but I'd be fine with moving this out if you're
> concerned about context/etc size.
>

Yeah, it could be uint8_t subpixel_precision[16] in the new CSO in the
future, but I don't see a reason for it to be 16x now when no code uses it.
uint8_t seems enough for 2 4-bit values since nvc0 only allows values
between 0 and 8.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH shader-db 4/4] run: handling binding of attribute variable name

2018-03-23 Thread Dongwon Kim
I realized this model won't work with parellel execution.
I will fix it and post another version shortly.

On Wed, Mar 14, 2018 at 03:15:20PM -0700, Kenneth Graunke wrote:
> On Friday, March 9, 2018 2:28:36 PM PDT Dongwon Kim wrote:
> > Optional binding of variables can be processed before linking shader
> > objects for creating shader program. It is activated by adding lines
> > with a keyword "BindAttribLoc" followed by name and index as,
> > 
> > "BindAttribLoc name_str1 "
> > 
> > For example,
> > 
> > [require]
> > ..
> > BindAttrbLoc vertex 1
> > BindAttrbLoc coord 2
> > BindAttrbLoc col 3
> > 
> > This makes the shader-db run
> > 
> > glBindAttribLocation(p, 1, "vertex");
> > glBindAttribLocation(p, 2, "coord");
> > glBindAttribLocation(p, 3, "col");
> > 
> > before glLinkProgram() to include these binding info in binary shader
> > program.
> > 
> > Signed-off-by: Dongwon Kim 
> 
> Matt, do you have an opinion on this?  This seems like the sort of
> commands that would normally go in the [test] block, rather than the
> [require] block.  But it looks like shader_runner doesn't have any
> syntax for glBindAttribLocation today.
> 
> It's definitely a useful thing to have if we're going to use run.c
> to produce shader binaries for ARB_get_program_binary...
> 
> > ---
> >  run.c | 79 
> > +++
> >  1 file changed, 79 insertions(+)
> > 
> > diff --git a/run.c b/run.c
> > index bbab5d9..fe2a97a 100644
> > --- a/run.c
> > +++ b/run.c
> > @@ -76,6 +76,12 @@ struct shader {
> >  int type;
> >  };
> >  
> > +struct binding_var {
> > +char *name;
> > +GLint index;
> > +struct binding_var *next;
> > +};
> > +
> >  static bool
> >  extension_in_string(const char *haystack, const char *needle)
> >  {
> > @@ -105,6 +111,10 @@ extension_in_string(const char *haystack, const char 
> > *needle)
> >  return false;
> >  }
> >  
> > +#define SKIP_SPACES(str) while (*(str) == ' ') str++
> > +
> > +struct binding_var binding_head = {"NULL", -1, NULL};
> > +
> >  static struct shader *
> >  get_shaders(const struct context_info *core, const struct context_info 
> > *compat,
> >  const struct context_info *es,
> > @@ -120,6 +130,7 @@ get_shaders(const struct context_info *core, const 
> > struct context_info *compat,
> >  static const char *fp_req = "\nGL_ARB_fragment_program";
> >  static const char *vp_req = "\nGL_ARB_vertex_program";
> >  static const char *sso_req = "\nSSO ENABLED";
> > +static const char *binding = "\nBindAttribLoc";
> >  static const char *gs = "geometry shader]\n";
> >  static const char *fs = "fragment ";
> >  static const char *vs = "vertex ";
> > @@ -186,11 +197,13 @@ get_shaders(const struct context_info *core, const 
> > struct context_info *compat,
> >  const struct context_info *info = *type == TYPE_CORE ? core : compat;
> >  
> >  const char *extension_text = text;
> > +
> >  while ((extension_text = memmem(extension_text, end_text - 
> > extension_text,
> >  "\nGL_", strlen("\nGL_"))) != NULL) {
> >  extension_text += 1;
> >  const char *newline = memchr(extension_text, '\n',
> >   end_text - extension_text);
> > +
> >  if (memmem(info->extension_string, info->extension_string_len,
> > extension_text, newline - extension_text) == NULL) {
> >  fprintf(stderr, "SKIP: %s requires unavailable extension 
> > %.*s\n",
> > @@ -202,6 +215,62 @@ get_shaders(const struct context_info *core, const 
> > struct context_info *compat,
> >  }
> >  }
> >  
> > +/* process binding */
> > +struct binding_var *binding_prev = _head;
> > +const char *pre_binding_text = text;
> > +
> > +while ((pre_binding_text = memmem(pre_binding_text, end_text - 
> > pre_binding_text,
> > +  binding, strlen(binding))) != NULL) {
> > +pre_binding_text += strlen(binding);
> > +
> > +const char *newline = memchr(pre_binding_text, '\n', end_text - 
> > pre_binding_text);
> > +
> > +SKIP_SPACES(pre_binding_text);
> > +
> > +char *endword = memchr(pre_binding_text, ' ', newline - 
> > pre_binding_text);
> > +
> > +/* if there's no more space in the same line */
> > +if (!endword) {
> > +fprintf(stderr, "SKIP: can't find attr index for this 
> > binding\n");
> > +continue;
> > +}
> > +
> > +char *binding_name = (char *)calloc(1, endword - pre_binding_text 
> > + 1);
> > +
> > +strncpy(binding_name, pre_binding_text, endword - 
> > pre_binding_text);
> > +
> > +pre_binding_text = endword;
> > +
> > +SKIP_SPACES(pre_binding_text);
> > +if (*pre_binding_text == '\n') {
> > +fprintf(stderr, "SKIP: can't find attr variable name for this 
> > binding\n");
> > +   

[Mesa-dev] [PATCH shaderdb] run: shader program file created via GetProgramBinary (v5)

2018-03-23 Thread Dongwon Kim
With optin '-b', shader-db now generates a shader program binary file
using GetProgramBinary(). This shader program binary can be loaded via
ProgramBinary() to be executed by an application later.

v2: 1. define MAX_LOG_LEN and use it as the size of gl log
2. define MAX_PROG_SIZE and use it as the max size of extracted
   shader_program
3. out_file is now pointer allocated by strdup for the file name

v3: 1. automatically using original shader test file's name +  ".bin"
   as a filename for program binary - better way to cover the case
   with batch compilation of many shader test files in the same
   directory
2. remove --out= since it is now unnecessary (due to v3-1.)
   to provide custom file name. Instead, option, "--bin", which is
   basically a flag that enables getting program binary as a file.
3. Now it tries to get the length of binary by reading program's
   GL_PROGRAM_BINARY_LENGTH_OES parameter

v4: 1. '--bin' -> '-b'
2. stop generating binary program when failing to retrieve the binary
   size
3. error checking after malloc for binary program
4. changed some of variable names
5. several consecutive fprintfs are consolidated
6. removed MAX_LOG_LEN and MAX_PROG_SIZE

v5: bug fix: +1 to the length of the output file to cover '/0'

Signed-off-by: Dongwon Kim 
---
 run.c | 81 ---
 1 file changed, 78 insertions(+), 3 deletions(-)

diff --git a/run.c b/run.c
index 69e64c7..4712e27 100644
--- a/run.c
+++ b/run.c
@@ -356,7 +356,8 @@ const struct platform platforms[] = {
 void print_usage(const char *prog_name)
 {
 fprintf(stderr,
-"Usage: %s [-d ] [-j ] [-o ] [-p 
] \n",
+"Usage: %s [-d ] [-j ] [-o ] "
+"[-p ] [-b] \n",
 prog_name);
 }
 
@@ -435,10 +436,11 @@ main(int argc, char **argv)
 char device_path[64];
 int device_id = 0;
 int opt;
+bool generate_prog_bin = 0;
 
 max_threads = omp_get_max_threads();
 
-while ((opt = getopt(argc, argv, "d:j:o:p:")) != -1) {
+while ((opt = getopt(argc, argv, "d:j:o:p:b")) != -1) {
 switch(opt) {
 case 'd': {
 char *endptr;
@@ -478,6 +480,9 @@ main(int argc, char **argv)
 case 'j':
 max_threads = atoi(optarg);
 break;
+case 'b':
+generate_prog_bin = 1;
+break;
 default:
 fprintf(stderr, "Unknown option: %x\n", opt);
 print_usage(argv[0]);
@@ -813,18 +818,24 @@ main(int argc, char **argv)
 const_text = text;
 GLuint prog = glCreateShaderProgramv(shader[i].type, 1,
  _text);
+
+if (generate_prog_bin)
+fprintf(stderr,
+"Currently, program binary generation "
+"doesn't support SSO.\n");
+
 glDeleteProgram(prog);
 free(text);
 }
 } else if (type == TYPE_CORE || type == TYPE_COMPAT || type == 
TYPE_ES) {
 GLuint prog = glCreateProgram();
+GLint param;
 
 for (unsigned i = 0; i < num_shaders; i++) {
 GLuint s = glCreateShader(shader[i].type);
 glShaderSource(s, 1, [i].text, [i].length);
 glCompileShader(s);
 
-GLint param;
 glGetShaderiv(s, GL_COMPILE_STATUS, );
 if (unlikely(!param)) {
 GLchar log[4096];
@@ -839,6 +850,70 @@ main(int argc, char **argv)
 }
 
 glLinkProgram(prog);
+
+glGetProgramiv(prog, GL_LINK_STATUS, );
+if (unlikely(!param)) {
+GLchar log[4096];
+GLsizei length;
+glGetProgramInfoLog(prog, sizeof(log), , log);
+
+fprintf(stderr, "ERROR: failed to link progam:\n%s\n",
+   log);
+} else if (generate_prog_bin) {
+/* generating shader program binary */
+char *prog_buf;
+GLenum format;
+GLsizei length = 0;
+FILE *fp;
+
+glGetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, );
+
+if (glGetError() != GL_NO_ERROR) {
+fprintf(stderr,
+"ERROR: failed to generate a program binary "
+"(invalid program size).\n");
+continue;
+}
+
+prog_buf = (char *)malloc(length);
+
+if (!prog_buf) {
+fprintf(stderr,
+"ERROR: failed to 

Re: [Mesa-dev] [PATCH 2/4] gallium: add initial support for conservative rasterization

2018-03-23 Thread Ilia Mirkin
On Fri, Mar 23, 2018 at 8:51 PM, Marek Olšák  wrote:
> diff --git a/src/gallium/include/pipe/p_state.h
> b/src/gallium/include/pipe/p_state.h
>>
>> index 4dce399f84..913a79faee 100644
>> --- a/src/gallium/include/pipe/p_state.h
>> +++ b/src/gallium/include/pipe/p_state.h
>> @@ -113,6 +113,7 @@ struct pipe_rasterizer_state
>> unsigned line_smooth:1;
>> unsigned line_stipple_enable:1;
>> unsigned line_last_pixel:1;
>> +   unsigned conservative_raster_mode:2; /**< PIPE_CONSERVATIVE_RASTER_x
>> */
>>
>> /**
>>  * Use the first vertex of a primitive as the provoking vertex for
>> @@ -186,6 +187,7 @@ struct pipe_rasterizer_state
>> float offset_units;
>> float offset_scale;
>> float offset_clamp;
>> +   float conservative_raster_dilate;
>>  };
>>
>>
>> @@ -199,6 +201,10 @@ struct pipe_viewport_state
>>  {
>> float scale[3];
>> float translate[3];
>> +   /*
>> +* Conservative rasterization subpixel precision bias in bits
>> +*/
>> +   uint16_t subpixel_precision[2];
>
>
> Actually, there is one thing that I disagree with. pipe_viewport_state is
> per-viewport and there are up to 16 viewports, but the extensions don't
> allow per-viewport settings of subpixel_precision. The obvious thing to do
> is to add a new pipe_context function that only sets subpixel_precision, but
> how about this:
>
> Would it be possible to move all the new states into a new CSO:
> pipe_conservative_raster_state?

FWIW the hardware does support this per viewport. But you're right -
that level of detail is not exposed in GL. I think gallium tends to be
closer to the hw, but I'd be fine with moving this out if you're
concerned about context/etc size.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] gallium: add initial support for conservative rasterization

2018-03-23 Thread Marek Olšák
diff --git a/src/gallium/include/pipe/p_state.h
b/src/gallium/include/pipe/p_state.h

> index 4dce399f84..913a79faee 100644
> --- a/src/gallium/include/pipe/p_state.h
> +++ b/src/gallium/include/pipe/p_state.h
> @@ -113,6 +113,7 @@ struct pipe_rasterizer_state
> unsigned line_smooth:1;
> unsigned line_stipple_enable:1;
> unsigned line_last_pixel:1;
> +   unsigned conservative_raster_mode:2; /**< PIPE_CONSERVATIVE_RASTER_x */
>
> /**
>  * Use the first vertex of a primitive as the provoking vertex for
> @@ -186,6 +187,7 @@ struct pipe_rasterizer_state
> float offset_units;
> float offset_scale;
> float offset_clamp;
> +   float conservative_raster_dilate;
>  };
>
>
> @@ -199,6 +201,10 @@ struct pipe_viewport_state
>  {
> float scale[3];
> float translate[3];
> +   /*
> +* Conservative rasterization subpixel precision bias in bits
> +*/
> +   uint16_t subpixel_precision[2];
>

Actually, there is one thing that I disagree with. pipe_viewport_state is
per-viewport and there are up to 16 viewports, but the extensions don't
allow per-viewport settings of subpixel_precision. The obvious thing to do
is to add a new pipe_context function that only sets subpixel_precision,
but how about this:

Would it be possible to move all the new states into a new CSO:
pipe_conservative_raster_state?

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] gallium: add initial support for conservative rasterization

2018-03-23 Thread Marek Olšák
On Wed, Mar 21, 2018 at 8:04 PM, Roland Scheidegger 
wrote:

> Am 22.03.2018 um 00:43 schrieb Ilia Mirkin:
> > On Wed, Mar 21, 2018 at 7:37 PM, Roland Scheidegger 
> wrote:
> >> Personally I'm not a big proponent on propagating single-vendor
> >> extensions (which are useless for anything but one specific driver) more
> >> or less directly through to gallium.
> >> There's an intel extension doing similar things already too.
> >> Ideally we'd end up with some bits in gallium which can do whatever the
> >> standardized version of it is going to require in some sensible way - at
> >> least I'd hope that such an extension will surface...
> >
> > Agreed. When/if such an extension materializes, we can adjust the
> > gallium API in a logical way to cover all the cases. Until then, this
> > is the functionality that exists on the GPUs in question.
> >
>
> I'm wondering, which bits of these could be done on AMD gpus too? Vega
> chips support conservative rasterization too.
> My guess is that what will end up in a standardized extension is
> probably similar to what's supported by d3d...
>
> I'm not just not sure it's really worth the trouble of bothering the
> gallium interface with basically experimental additions. From what I can
> tell you could instead implement intel's extension and expose that on
> nvidia gpus instead (albeit I'm not sure nvidia can do all of that
> neither) - from a quick look the interfaces would be quite different if
> you started with that instead.
>
> But whatever, I'm not too concerned, but maybe the AMD guys are...
>

I'm totally OK with this interface even though I don't have information
right now about how to implement the feature on AMD.

If people want to support their driver and want to add interfaces that no
other driver cares about, that's also fine. I guess the motivation is to
compete with the closed source driver, which is very reasonable. If/when we
implement the feature, we might make necessary adjustments to the interface
to support AMD while making sure that the adjustments don't affect other
drivers.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: Add gen11 to anv_genX_call

2018-03-23 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 22/03/18 20:58, Jordan Justen wrote:

Signed-off-by: Jordan Justen 
---
  src/intel/vulkan/anv_cmd_buffer.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
b/src/intel/vulkan/anv_cmd_buffer.c
index 8f4bf3f0bb9..33687920a38 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -332,6 +332,9 @@ VkResult anv_ResetCommandBuffer(
 case 10:\
gen10_##func(__VA_ARGS__);   \
break;   \
+   case 11:\
+  gen11_##func(__VA_ARGS__);   \
+  break;   \
 default:\
assert(!"Unknown hardware generation");  \
 }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: Set genX_table for gen11

2018-03-23 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 23/03/18 22:03, Jordan Justen wrote:

Signed-off-by: Jordan Justen 
---
  src/intel/vulkan/anv_device.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 4cacba93430..d400a1328b4 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1367,6 +1367,9 @@ anv_device_init_dispatch(struct anv_device *device)
  {
 const struct anv_dispatch_table *genX_table;
 switch (device->info.gen) {
+   case 11:
+  genX_table = _dispatch_table;
+  break;
 case 10:
genX_table = _dispatch_table;
break;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] nir: Don't condition 'a-b < 0' -> 'a < b' on is_not_used_by_conditional

2018-03-23 Thread Matt Turner
On Fri, Mar 23, 2018 at 3:44 PM, Ian Romanick  wrote:
> On 03/23/2018 11:39 AM, Matt Turner wrote:
>> On Wed, Mar 21, 2018 at 5:58 PM, Ian Romanick  wrote:
>>> From: Ian Romanick 
>>>
>>> Now that i965 recognizes that a-b generates the same conditions as 'a <
>>> b', there is no reason to condition this transformation on 'is not used
>>> by conditional.'
>>>
>>> Since this was the only user of the is_not_used_by_conditional function,
>>> delete it.
>>>
>>> All Gen6+ platforms had similar results. (Skylake shown)
>>> total instructions in shared programs: 14400775 -> 14400595 (<.01%)
>>> instructions in affected programs: 36712 -> 36532 (-0.49%)
>>> helped: 182
>>> HURT: 26
>>
>> Looked at why these are hurt? :)
>
> I should have known you were going to ask that. :)  Since the changes
> were small, I tried to avoid looking at them so that I wouldn't get
> distracted again.  But since you asked...
>
> Shaders from 4 apps were hurt.  All but one of the hurt sharders was an
> ARB_fragment_program.  I looked at a shader from each of the four apps,
> and in all of those cases a ffma / compare with zero became a multiply /
> compare with something else.
>
> We already have a general case of this transformation in last
> optimizations.  So, I tried making this transformation more general (by
> removing the fneg) and added a transformation to strip the fnegs from -a
> < -b.  The results on SKL are below.  I'm not excited about the "max: 27
>  x̄: 2.26".
>
> I think I'd like to land this patch as-is.  I'm also going to play with
> moving all the comparison munging out of late optimizations.  I have
> some patches in the queue that add some more transformations, so it will
> be interesting to see the effect of moving them earlier.

Okay. Works for me.

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 09/19] nir/vtn: initial OpenCL.std extension

2018-03-23 Thread Jason Ekstrand
On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst  wrote:

> From: Rob Clark 
>
> Not complete, mostly just adding things as I encounter them in CTS.  But
> not getting far enough yet to hit most of the OpenCL.std instructions.
>
> v2: update hadd definition (Karol Herbst )
>
> Signed-off-by: Rob Clark 
> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/nir/meson.build  |   1 +
>  src/compiler/nir/nir_opcodes.py   |   3 +-
>  src/compiler/spirv/spirv_to_nir.c |   2 +
>  src/compiler/spirv/vtn_opencl.c   | 266 ++
> 
>  src/compiler/spirv/vtn_private.h  |   3 +
>  5 files changed, 274 insertions(+), 1 deletion(-)
>  create mode 100644 src/compiler/spirv/vtn_opencl.c
>
> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> index a70c236b958..213a139a1b8 100644
> --- a/src/compiler/nir/meson.build
> +++ b/src/compiler/nir/meson.build
> @@ -192,6 +192,7 @@ files_libnir = files(
>'../spirv/vtn_amd.c',
>'../spirv/vtn_cfg.c',
>'../spirv/vtn_glsl450.c',
> +  '../spirv/vtn_opencl.c',
>'../spirv/vtn_private.h',
>'../spirv/vtn_subgroup.c',
>'../spirv/vtn_variables.c',
> diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_
> opcodes.py
> index 65d13200624..86fd6b6d68e 100644
> --- a/src/compiler/nir/nir_opcodes.py
> +++ b/src/compiler/nir/nir_opcodes.py
> @@ -768,4 +768,5 @@ dst.z = src2.x;
>  dst.w = src3.x;
>  """)
>
> -
> +binop("ihadd", tint, commutative, "(src0 >> 1) + (src1 >> 1) + (src0 &
> src1 & 1)")
> +binop("uhadd", tuint, commutative, "(src0 >> 1) + (src1 >> 1) + (src0 &
> src1 & 1)")
>

This bit should be it's own patch.  It looks correct to me.  Might as well
add the other two when you make a patch for it though.

For whatever it's worth, Intel hardware calls this opcode AVG (for
average).  I'm not sure what "hadd" is supposed to stand for.  Maybe
half-add?  Anyway, I don't care about naming.

--Jason


> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index 3acb3fc0b42..6a16d77a771 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -379,6 +379,8 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp
> opcode,
>} else if ((strcmp((const char *)[2], "SPV_AMD_gcn_shader") == 0)
>  && (b->options && b->options->caps.gcn_shader)) {
>   val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
> +  } else if (strcmp(ext, "OpenCL.std") == 0) {
> + val->ext_handler = vtn_handle_opencl_instruction;
>} else {
>   vtn_fail("Unsupported extension: %s", ext);
>}
> diff --git a/src/compiler/spirv/vtn_opencl.c b/src/compiler/spirv/vtn_
> opencl.c
> new file mode 100644
> index 000..3c5ecd22452
> --- /dev/null
> +++ b/src/compiler/spirv/vtn_opencl.c
> @@ -0,0 +1,266 @@
> +/*
> + * Copyright © 2018 Red Hat
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without
> limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> next
> + * paragraph) shall be included in all copies or substantial portions of
> the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *Rob Clark (robdcl...@gmail.com)
> + */
> +
> +#include "vtn_private.h"
> +#include "OpenCL.std.h"
> +
> +typedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b, enum OpenCLstd
> opcode,
> +unsigned num_srcs, nir_ssa_def
> **srcs);
> +
> +static void
> +handle_instr(struct vtn_builder *b, enum OpenCLstd opcode, const uint32_t
> *w,
> + unsigned count, nir_handler handler)
> +{
> +   const struct glsl_type *dest_type =
> +  vtn_value(b, w[1], vtn_value_type_type)->type->type;
> +
> +   unsigned num_srcs = count - 5;
> +   nir_ssa_def *srcs[3] = { NULL, };
> +   vtn_assert(num_srcs <= ARRAY_SIZE(srcs));
> +   for (unsigned i = 0; i < num_srcs; i++) {
> +  srcs[i] = vtn_ssa_value(b, w[i + 5])->def;
> +   }
> +
> +   

Re: [Mesa-dev] [PATCH 6/6] nir: Don't condition 'a-b < 0' -> 'a < b' on is_not_used_by_conditional

2018-03-23 Thread Ian Romanick
On 03/23/2018 11:39 AM, Matt Turner wrote:
> On Wed, Mar 21, 2018 at 5:58 PM, Ian Romanick  wrote:
>> From: Ian Romanick 
>>
>> Now that i965 recognizes that a-b generates the same conditions as 'a <
>> b', there is no reason to condition this transformation on 'is not used
>> by conditional.'
>>
>> Since this was the only user of the is_not_used_by_conditional function,
>> delete it.
>>
>> All Gen6+ platforms had similar results. (Skylake shown)
>> total instructions in shared programs: 14400775 -> 14400595 (<.01%)
>> instructions in affected programs: 36712 -> 36532 (-0.49%)
>> helped: 182
>> HURT: 26
> 
> Looked at why these are hurt? :)

I should have known you were going to ask that. :)  Since the changes
were small, I tried to avoid looking at them so that I wouldn't get
distracted again.  But since you asked...

Shaders from 4 apps were hurt.  All but one of the hurt sharders was an
ARB_fragment_program.  I looked at a shader from each of the four apps,
and in all of those cases a ffma / compare with zero became a multiply /
compare with something else.

We already have a general case of this transformation in last
optimizations.  So, I tried making this transformation more general (by
removing the fneg) and added a transformation to strip the fnegs from -a
< -b.  The results on SKL are below.  I'm not excited about the "max: 27
 x̄: 2.26".

I think I'd like to land this patch as-is.  I'm also going to play with
moving all the comparison munging out of late optimizations.  I have
some patches in the queue that add some more transformations, so it will
be interesting to see the effect of moving them earlier.

total instructions in shared programs: 14400775 -> 14399973 (<.01%)
instructions in affected programs: 242118 -> 241316 (-0.33%)
helped: 1298
HURT: 325
helped stats (abs) min: 1 max: 7 x̄: 1.18 x̃: 1
helped stats (rel) min: 0.11% max: 9.09% x̄: 1.73% x̃: 1.16%
HURT stats (abs)   min: 1 max: 27 x̄: 2.26 x̃: 1
HURT stats (rel)   min: 0.14% max: 5.26% x̄: 0.79% x̃: 0.43%
95% mean confidence interval for instructions value: -0.61 -0.38
95% mean confidence interval for instructions %-change: -1.32% -1.14%
Instructions are helped.

total cycles in shared programs: 532929592 -> 532919072 (<.01%)
cycles in affected programs: 3525194 -> 3514674 (-0.30%)
helped: 1896
HURT: 608
helped stats (abs) min: 1 max: 714 x̄: 14.12 x̃: 4
helped stats (rel) min: 0.03% max: 26.92% x̄: 1.10% x̃: 0.68%
HURT stats (abs)   min: 1 max: 281 x̄: 26.72 x̃: 8
HURT stats (rel)   min: 0.01% max: 40.94% x̄: 2.19% x̃: 0.95%
95% mean confidence interval for cycles value: -5.88 -2.53
95% mean confidence interval for cycles %-change: -0.41% -0.19%
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: Add gen11 to anv_genX_call

2018-03-23 Thread Jordan Justen
On 2018-03-22 14:19:47, Lionel Landwerlin wrote:
> There is another macro anv_genX_call() in anv_cmd_buffer.c & anv_blorp.c

This patch was changing anv_genX_call in anv_cmd_buffer.c.

anv_blorp.c appeared to have gen11 handled.

> Also a switch in anv_device.c in anv_device_init_dispatch.

You're right. I replied to your email with another patch for
anv_device.c.

-Jordan

> If you just knock those off in the same patch, that would be great.
> 
> Thanks!
> 
> -
> Lionel
> 
> On 22/03/18 20:58, Jordan Justen wrote:
> > Signed-off-by: Jordan Justen 
> > ---
> >   src/intel/vulkan/anv_cmd_buffer.c | 3 +++
> >   1 file changed, 3 insertions(+)
> >
> > diff --git a/src/intel/vulkan/anv_cmd_buffer.c 
> > b/src/intel/vulkan/anv_cmd_buffer.c
> > index 8f4bf3f0bb9..33687920a38 100644
> > --- a/src/intel/vulkan/anv_cmd_buffer.c
> > +++ b/src/intel/vulkan/anv_cmd_buffer.c
> > @@ -332,6 +332,9 @@ VkResult anv_ResetCommandBuffer(
> >  case 10:\
> > gen10_##func(__VA_ARGS__);   \
> > break;   \
> > +   case 11:\
> > +  gen11_##func(__VA_ARGS__);   \
> > +  break;   \
> >  default:\
> > assert(!"Unknown hardware generation");  \
> >  }
> 
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv: Set genX_table for gen11

2018-03-23 Thread Jordan Justen
Signed-off-by: Jordan Justen 
---
 src/intel/vulkan/anv_device.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 4cacba93430..d400a1328b4 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1367,6 +1367,9 @@ anv_device_init_dispatch(struct anv_device *device)
 {
const struct anv_dispatch_table *genX_table;
switch (device->info.gen) {
+   case 11:
+  genX_table = _dispatch_table;
+  break;
case 10:
   genX_table = _dispatch_table;
   break;
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/vec4: Fix null destination register in 3-source instructions

2018-03-23 Thread Matt Turner
Thanks!

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 51/61] spirv: Update vtn_pointer_to/from_ssa to handle deref pointers

2018-03-23 Thread Jason Ekstrand
Now that pointers can be derefs and derefs just produce SSA values, we
can convert any pointer to/from SSA.
---
 src/compiler/spirv/vtn_variables.c | 73 ++
 1 file changed, 42 insertions(+), 31 deletions(-)

diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 6efd43c..4e3696f 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1499,30 +1499,34 @@ vtn_storage_class_to_mode(struct vtn_builder *b,
 nir_ssa_def *
 vtn_pointer_to_ssa(struct vtn_builder *b, struct vtn_pointer *ptr)
 {
-   /* This pointer needs to have a pointer type with actual storage */
-   vtn_assert(ptr->ptr_type);
-   vtn_assert(ptr->ptr_type->type);
-
-   if (!ptr->offset) {
-  /* If we don't have an offset then we must be a pointer to the variable
-   * itself.
-   */
-  vtn_assert(!ptr->offset && !ptr->block_index);
+   if (vtn_pointer_uses_ssa_offset(b, ptr)) {
+  /* This pointer needs to have a pointer type with actual storage */
+  vtn_assert(ptr->ptr_type);
+  vtn_assert(ptr->ptr_type->type);
+
+  if (!ptr->offset) {
+ /* If we don't have an offset then we must be a pointer to the 
variable
+  * itself.
+  */
+ vtn_assert(!ptr->offset && !ptr->block_index);
 
-  struct vtn_access_chain chain = {
- .length = 0,
-  };
-  ptr = vtn_ssa_offset_pointer_dereference(b, ptr, );
-   }
+ struct vtn_access_chain chain = {
+.length = 0,
+ };
+ ptr = vtn_ssa_offset_pointer_dereference(b, ptr, );
+  }
 
-   vtn_assert(ptr->offset);
-   if (ptr->block_index) {
-  vtn_assert(ptr->mode == vtn_variable_mode_ubo ||
- ptr->mode == vtn_variable_mode_ssbo);
-  return nir_vec2(>nb, ptr->block_index, ptr->offset);
+  vtn_assert(ptr->offset);
+  if (ptr->block_index) {
+ vtn_assert(ptr->mode == vtn_variable_mode_ubo ||
+ptr->mode == vtn_variable_mode_ssbo);
+ return nir_vec2(>nb, ptr->block_index, ptr->offset);
+  } else {
+ vtn_assert(ptr->mode == vtn_variable_mode_workgroup);
+ return ptr->offset;
+  }
} else {
-  vtn_assert(ptr->mode == vtn_variable_mode_workgroup);
-  return ptr->offset;
+  return _pointer_to_deref(b, ptr)->dest.ssa;
}
 }
 
@@ -1532,28 +1536,35 @@ vtn_pointer_from_ssa(struct vtn_builder *b, nir_ssa_def 
*ssa,
 {
vtn_assert(ssa->num_components <= 2 && ssa->bit_size == 32);
vtn_assert(ptr_type->base_type == vtn_base_type_pointer);
-   vtn_assert(ptr_type->deref->base_type != vtn_base_type_pointer);
-   /* This pointer type needs to have actual storage */
-   vtn_assert(ptr_type->type);
+
+   struct vtn_type *interface_type = ptr_type->deref;
+   while (interface_type->base_type == vtn_base_type_array)
+  interface_type = interface_type->array_element;
 
struct vtn_pointer *ptr = rzalloc(b, struct vtn_pointer);
+   nir_variable_mode nir_mode;
ptr->mode = vtn_storage_class_to_mode(b, ptr_type->storage_class,
- ptr_type, NULL);
+ interface_type, _mode);
ptr->type = ptr_type->deref;
ptr->ptr_type = ptr_type;
 
-   if (ssa->num_components > 1) {
+   if (ptr->mode == vtn_variable_mode_ubo ||
+   ptr->mode == vtn_variable_mode_ssbo) {
+  /* This pointer type needs to have actual storage */
+  vtn_assert(ptr_type->type);
   vtn_assert(ssa->num_components == 2);
-  vtn_assert(ptr->mode == vtn_variable_mode_ubo ||
- ptr->mode == vtn_variable_mode_ssbo);
   ptr->block_index = nir_channel(>nb, ssa, 0);
   ptr->offset = nir_channel(>nb, ssa, 1);
-   } else {
+   } else if (ptr->mode == vtn_variable_mode_workgroup ||
+  ptr->mode == vtn_variable_mode_push_constant) {
+  /* This pointer type needs to have actual storage */
+  vtn_assert(ptr_type->type);
   vtn_assert(ssa->num_components == 1);
-  vtn_assert(ptr->mode == vtn_variable_mode_workgroup ||
- ptr->mode == vtn_variable_mode_push_constant);
   ptr->block_index = NULL;
   ptr->offset = ssa;
+   } else {
+  ptr->deref = nir_build_deref_cast(>nb, ssa, nir_mode,
+ptr_type->deref->type);
}
 
return ptr;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/61] nir: Move to using instructions for derefs

2018-03-23 Thread Jason Ekstrand
On Fri, Mar 23, 2018 at 2:42 PM, Jason Ekstrand 
wrote:

> This is something that Connor and I have been talking about for some time
> now.  The basic idea is to replace the current singly linked nir_deref list
> with deref instructions.  This is similar to what LLVM does and it offers
> quite a bit more freedom when we start getting more realistic pointers from
> compute applications.
>
> This series implements an almost complete conversion for both i965 and anv.
> The two remaining gaps are nir_lower_locals_to_regs and nir_lower_samplers.
> The former will have to wait for ir3 to be converted and the later will
> have to wait for radeonsi.  I've got patches for nir_lower_samplers but not
> nir_lower_samplers_as_deref which is required by at least radeonsi.  Once
> those are in place, we should be able to drop the lowering pass from the
> Intel back-end completely.
>
> The next step (which I will start on next week) will be removing legacy
> derefs from core NIR.  This will also involve significant reworks in some
> passes such as vars_to_ssa which still uses legacy derefs internally even
> for things which use deref instructions.
>
> Clearly, we can't remove anything until all of the other drivers are
> converted.  However, this series should be a good basis for anyone wanting
> to work on converting another driver since almost all of the core NIR
> passes now work with both types of derefs so you can convert in whatever
> way makes sense.
>
> This series can be found as a branch on gitlab:
>
> https://gitlab.freedesktop.org/jekstrand/mesa/commits/
> review/nir-deref-instrs-v1
>

Oops, that's supposed to be -v2

https://gitlab.freedesktop.org/jekstrand/mesa/commits/review/nir-deref-instr-v2


> Cc: Rob Clark 
> Cc: Timothy Arceri 
> Cc: Eric Anholt 
> Cc: Connor Abbott 
> Cc: Bas Nieuwenhuizen 
> Cc: Karol Herbst 
>
> Jason Ekstrand (61):
>   nir: Add src/dest num_components helpers
>   nir: Return a cursor from nir_instr_remove
>   nir/vars_to_ssa: Remove copies from the correct set
>   nir/lower_indirect_derefs: Support interp_var_at intrinsics
>   intel/vec4: Set channel_sizes for MOV_INDIRECT sources
>   nir/validator: Validate that all used variables exist
>   nir: Add a deref instruction type
>   nir/builder: Add deref building helpers
>   nir: Add _deref versions of all of the _var intrinsics
>   nir: Add deref sources to texture instructions
>   nir: Add helpers for working with deref instructions
>   anv,i965,radv,st,ir3: Call nir_lower_deref_instrs
>   glsl/nir: Only claim to handle intrinsic functions
>   glsl/nir: Use deref instructions instead of dref chains
>   prog/nir: Simplify some load/store operations
>   prog/nir: Use deref instructions for params
>   nir/lower_atomics: Rework the main walker loop a bit
>   nir: Support deref instructions in remove_dead_variables
>   nir: Add a pass for fixing deref modes
>   nir: Support deref instructions in lower_global_vars_to_local
>   nir: Support deref instructions in lower_io_to_temporaries
>   nir: Add a deref path helper struct
>   nir: Support deref instructions in lower_var_copies
>   nir: Support deref instructions in split_var_copies
>   nir: Support deref instructions in lower_vars_to_ssa
>   nir: Support deref instructions in lower_indirect_derefs
>   nir/deref: Add a deref cleanup function
>   nir: Support deref instructions in lower_system_values
>   nir: Support deref instructions in lower_clip_cull
>   nir: Support deref instructions in propagate_invariant
>   nir: Support deref instructions in gather_info
>   nir: Support deref instructions in lower_io
>   nir: Support deref instructions in lower_atomics
>   nir: Support deref instructions in lower_wpos_ytransform
>   nir: Support deref instructions in lower_pos_center
>   nir: Support deref instructions in remove_unused_varyings
>   intel,ir3: Disable nir_opt_copy_prop_vars
>   intel/nir: Fixup deref modes after lowering patch vertices
>   i965: Move nir_lower_deref_instrs to right before locals_to_regs
>   st/nir: Move lower_deref_instrs later
>   spirv: Use deref instructions for most variables
>   nir: Add a concept of per-member structs and a lowering pass
>   nir/lower_system_values: Support SYSTEM_VALUE_LOCAL_GROUP_SIZE
>   spirv: Use the LOCAL_GROUP_SIZE system value
>   nir/spirv: Pass nir_variable_data into apply_var_decoration
>   anv/pipeline: Lower more constant initializers earlier
>   spirv: Use NIR per-member splitting
>   spirv: Make push constants an offset-based pointer
>   spirv: Clean up vtn_pointer_to_offset
>   spirv: Allow pointers to have a deref at the base
>   spirv: Update vtn_pointer_to/from_ssa to handle deref pointers
>   spirv: Record the type of functions
>   spirv/cfg: Make the builder fully capable for both walks
>   nir,spirv: Rework function calls
>   anv/pipeline: Do less deref instruction 

[Mesa-dev] [PATCH 29/61] nir: Support deref instructions in lower_clip_cull

2018-03-23 Thread Jason Ekstrand
---
 .../nir/nir_lower_clip_cull_distance_arrays.c  | 69 --
 1 file changed, 65 insertions(+), 4 deletions(-)

diff --git a/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c 
b/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
index 95eda82..69b31d5 100644
--- a/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
+++ b/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
@@ -74,9 +74,9 @@ update_type(nir_variable *var, gl_shader_stage stage, 
unsigned length)
  * Rewrite any clip/cull distances to refer to the new combined array.
  */
 static void
-rewrite_references(nir_instr *instr,
-   nir_variable *combined,
-   unsigned cull_offset)
+rewrite_var_references(nir_instr *instr,
+   nir_variable *combined,
+   unsigned cull_offset)
 {
if (instr->type != nir_instr_type_intrinsic)
   return;
@@ -121,6 +121,63 @@ rewrite_references(nir_instr *instr,
/* There's no need to update writemasks; it's a scalar array. */
 }
 
+static void
+rewrite_clip_cull_deref(nir_builder *b,
+nir_deref_instr *deref,
+const struct glsl_type *type,
+unsigned tail_offset)
+{
+   deref->type = type;
+
+   if (glsl_type_is_array(type)) {
+  const struct glsl_type *child_type = glsl_get_array_element(type);
+  nir_foreach_use(src, >dest.ssa) {
+ rewrite_clip_cull_deref(b, nir_instr_as_deref(src->parent_instr),
+ child_type, tail_offset);
+  }
+   } else {
+  assert(glsl_type_is_scalar(type));
+
+  /* This is the end of the line.  Add the tail offset if needed */
+  if (tail_offset > 0) {
+ b->cursor = nir_before_instr(>instr);
+ assert(deref->deref_type == nir_deref_type_array);
+ nir_ssa_def *index = nir_iadd(b, deref->arr.index.ssa,
+  nir_imm_int(b, tail_offset));
+ nir_instr_rewrite_src(>instr, >arr.index,
+   nir_src_for_ssa(index));
+  }
+   }
+}
+
+static void
+rewrite_references(nir_builder *b,
+   nir_instr *instr,
+   nir_variable *combined,
+   unsigned cull_offset)
+{
+   if (instr->type != nir_instr_type_deref)
+  return;
+
+   nir_deref_instr *deref = nir_instr_as_deref(instr);
+   if (deref->deref_type != nir_deref_type_var)
+  return;
+
+   if (deref->var->data.mode != combined->data.mode)
+  return;
+
+   const unsigned location = deref->var->data.location;
+   if (location != VARYING_SLOT_CLIP_DIST0 &&
+   location != VARYING_SLOT_CULL_DIST0)
+  return;
+
+   deref->var = combined;
+   if (location == VARYING_SLOT_CULL_DIST0)
+  rewrite_clip_cull_deref(b, deref, combined->type, cull_offset);
+   else
+  rewrite_clip_cull_deref(b, deref, combined->type, 0);
+}
+
 static bool
 combine_clip_cull(nir_shader *nir,
   struct exec_list *vars,
@@ -163,9 +220,13 @@ combine_clip_cull(nir_shader *nir,
  /* Rewrite CullDistance to reference the combined array */
  nir_foreach_function(function, nir) {
 if (function->impl) {
+   nir_builder b;
+   nir_builder_init(, function->impl);
+
nir_foreach_block(block, function->impl) {
   nir_foreach_instr(instr, block) {
- rewrite_references(instr, clip, clip_array_size);
+ rewrite_var_references(instr, clip, clip_array_size);
+ rewrite_references(, instr, clip, clip_array_size);
   }
}
 }
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 56/61] anv/pipeline: Convert lower_input_attachments to deref instructions

2018-03-23 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_nir_lower_input_attachments.c | 31 +++---
 src/intel/vulkan/anv_pipeline.c|  6 ++---
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/intel/vulkan/anv_nir_lower_input_attachments.c 
b/src/intel/vulkan/anv_nir_lower_input_attachments.c
index 6dc4f90..81e5ad5 100644
--- a/src/intel/vulkan/anv_nir_lower_input_attachments.c
+++ b/src/intel/vulkan/anv_nir_lower_input_attachments.c
@@ -43,10 +43,10 @@ load_frag_coord(nir_builder *b)
 static void
 try_lower_input_load(nir_function_impl *impl, nir_intrinsic_instr *load)
 {
+   nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
+   assert(glsl_type_is_image(deref->type));
 
-   const struct glsl_type *image_type =
-  glsl_without_array(load->variables[0]->var->type);
-   enum glsl_sampler_dim image_dim = glsl_get_sampler_dim(image_type);
+   enum glsl_sampler_dim image_dim = glsl_get_sampler_dim(deref->type);
if (image_dim != GLSL_SAMPLER_DIM_SUBPASS &&
image_dim != GLSL_SAMPLER_DIM_SUBPASS_MS)
   return;
@@ -58,7 +58,7 @@ try_lower_input_load(nir_function_impl *impl, 
nir_intrinsic_instr *load)
b.cursor = nir_before_instr(>instr);
 
nir_ssa_def *frag_coord = nir_f2i32(, load_frag_coord());
-   nir_ssa_def *offset = nir_ssa_for_src(, load->src[0], 2);
+   nir_ssa_def *offset = nir_ssa_for_src(, load->src[1], 2);
nir_ssa_def *pos = nir_iadd(, frag_coord, offset);
 
nir_ssa_def *layer =
@@ -66,11 +66,11 @@ try_lower_input_load(nir_function_impl *impl, 
nir_intrinsic_instr *load)
nir_ssa_def *coord =
   nir_vec3(, nir_channel(, pos, 0), nir_channel(, pos, 1), layer);
 
-   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2 + multisampled);
+   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3 + multisampled);
 
tex->op = nir_texop_txf;
 
-   switch (glsl_get_sampler_result_type(image_type)) {
+   switch (glsl_get_sampler_result_type(deref->type)) {
case GLSL_TYPE_FLOAT:
   tex->dest_type = nir_type_float;
   break;
@@ -86,22 +86,23 @@ try_lower_input_load(nir_function_impl *impl, 
nir_intrinsic_instr *load)
tex->is_array = true;
tex->is_shadow = false;
 
-   tex->texture = nir_deref_var_clone(load->variables[0], tex);
-   tex->sampler = NULL;
tex->texture_index = 0;
tex->sampler_index = 0;
 
-   tex->src[0].src_type = nir_tex_src_coord;
-   tex->src[0].src = nir_src_for_ssa(coord);
+   tex->src[0].src_type = nir_tex_src_texture_deref;
+   tex->src[0].src = nir_src_for_ssa(>dest.ssa);
+
+   tex->src[1].src_type = nir_tex_src_coord;
+   tex->src[1].src = nir_src_for_ssa(coord);
tex->coord_components = 3;
 
-   tex->src[1].src_type = nir_tex_src_lod;
-   tex->src[1].src = nir_src_for_ssa(nir_imm_int(, 0));
+   tex->src[2].src_type = nir_tex_src_lod;
+   tex->src[2].src = nir_src_for_ssa(nir_imm_int(, 0));
 
if (image_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) {
   tex->op = nir_texop_txf_ms;
-  tex->src[2].src_type = nir_tex_src_ms_index;
-  tex->src[2].src = load->src[1];
+  tex->src[3].src_type = nir_tex_src_ms_index;
+  tex->src[3].src = load->src[2];
}
 
nir_ssa_dest_init(>instr, >dest, 4, 32, NULL);
@@ -127,7 +128,7 @@ anv_nir_lower_input_attachments(nir_shader *shader)
 
 nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
 
-if (load->intrinsic != nir_intrinsic_image_var_load)
+if (load->intrinsic != nir_intrinsic_image_deref_load)
continue;
 
 try_lower_input_load(function->impl, load);
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index b4a9d83..4aef86a 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -215,12 +215,12 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
 
nir = brw_preprocess_nir(compiler, nir);
 
-   NIR_PASS_V(nir, nir_lower_deref_instrs,
-  nir_lower_texture_derefs | nir_lower_image_derefs);
-
if (stage == MESA_SHADER_FRAGMENT)
   NIR_PASS_V(nir, anv_nir_lower_input_attachments);
 
+   NIR_PASS_V(nir, nir_lower_deref_instrs,
+  nir_lower_texture_derefs | nir_lower_image_derefs);
+
return nir;
 }
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 48/61] spirv: Make push constants an offset-based pointer

2018-03-23 Thread Jason Ekstrand
Push constants have been a weird edge-case for a while in that they have
explitic offsets but we've been internally building access chains for
them.  This mostly works but it means that passing pointers to push
constants through as function arguments is broken.  The easy thing to do
for now is to just treat them like UBOs or SSBOs only without a block
index.  This does loose a bit of information since we no longer have an
accurate access range and any indirect access will look like it could
read the whole block.  Unfortunately, there's not much we can do about
that.  Once NIR derefs get a bit more powerful, we can plumb these
through as derefs and be able to reason about them again.
---
 src/compiler/spirv/spirv_to_nir.c  |  7 +++
 src/compiler/spirv/vtn_variables.c | 38 ++
 2 files changed, 17 insertions(+), 28 deletions(-)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 29b4512..06368df 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1168,6 +1168,13 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
  val->type->type = glsl_vector_type(GLSL_TYPE_UINT, 2);
   }
 
+  if (storage_class == SpvStorageClassPushConstant) {
+ /* These can actually be stored to nir_variables and used as SSA
+  * values so they need a real glsl_type.
+  */
+ val->type->type = glsl_uint_type();
+  }
+
   if (storage_class == SpvStorageClassWorkgroup &&
   b->options->lower_workgroup_access_to_offsets) {
  uint32_t size, align;
diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 49cb837..944c1ab 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -64,6 +64,7 @@ vtn_pointer_uses_ssa_offset(struct vtn_builder *b,
 {
return ptr->mode == vtn_variable_mode_ubo ||
   ptr->mode == vtn_variable_mode_ssbo ||
+  ptr->mode == vtn_variable_mode_push_constant ||
   (ptr->mode == vtn_variable_mode_workgroup &&
b->options->lower_workgroup_access_to_offsets);
 }
@@ -269,6 +270,12 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
  }
 
  offset = nir_imm_int(>nb, base->var->shared_location);
+  } else if (base->mode == vtn_variable_mode_push_constant) {
+ /* Push constants neither need nor have a block index */
+ vtn_assert(!block_index);
+
+ /* Start off with at the start of the push constant block. */
+ offset = nir_imm_int(>nb, 0);
   } else {
  /* The code above should have ensured a block_index when needed. */
  vtn_assert(block_index);
@@ -662,31 +669,6 @@ vtn_type_block_size(struct vtn_builder *b, struct vtn_type 
*type)
 }
 
 static void
-vtn_access_chain_get_offset_size(struct vtn_builder *b,
- struct vtn_access_chain *chain,
- struct vtn_type *type,
- unsigned *access_offset,
- unsigned *access_size)
-{
-   *access_offset = 0;
-
-   for (unsigned i = 0; i < chain->length; i++) {
-  if (chain->link[i].mode != vtn_access_mode_literal)
- break;
-
-  if (glsl_type_is_struct(type->type)) {
- *access_offset += type->offsets[chain->link[i].id];
- type = type->members[chain->link[i].id];
-  } else {
- *access_offset += type->stride * chain->link[i].id;
- type = type->array_element;
-  }
-   }
-
-   *access_size = vtn_type_block_size(b, type);
-}
-
-static void
 _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
  nir_ssa_def *index, nir_ssa_def *offset,
  unsigned access_offset, unsigned access_size,
@@ -882,8 +864,7 @@ vtn_block_load(struct vtn_builder *b, struct vtn_pointer 
*src)
   break;
case vtn_variable_mode_push_constant:
   op = nir_intrinsic_load_push_constant;
-  vtn_access_chain_get_offset_size(b, src->chain, src->var->type,
-   _offset, _size);
+  access_size = b->shader->num_uniforms;
   break;
case vtn_variable_mode_workgroup:
   op = nir_intrinsic_load_shared;
@@ -1657,7 +1638,8 @@ vtn_pointer_from_ssa(struct vtn_builder *b, nir_ssa_def 
*ssa,
   ptr->offset = nir_channel(>nb, ssa, 1);
} else {
   vtn_assert(ssa->num_components == 1);
-  vtn_assert(ptr->mode == vtn_variable_mode_workgroup);
+  vtn_assert(ptr->mode == vtn_variable_mode_workgroup ||
+ ptr->mode == vtn_variable_mode_push_constant);
   ptr->block_index = NULL;
   ptr->offset = ssa;
}
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 49/61] spirv: Clean up vtn_pointer_to_offset

2018-03-23 Thread Jason Ekstrand
Now that push constants are using on-the-fly offsets, we no longer need
to handle access chains in vtn_pointer_to_offset.
---
 src/compiler/spirv/spirv_to_nir.c  |  2 +-
 src/compiler/spirv/vtn_private.h   |  2 +-
 src/compiler/spirv/vtn_variables.c | 89 ++
 3 files changed, 16 insertions(+), 77 deletions(-)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 06368df..b1d68da 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -2616,7 +2616,7 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, 
SpvOp opcode,
   }
} else {
   nir_ssa_def *offset, *index;
-  offset = vtn_pointer_to_offset(b, ptr, , NULL);
+  offset = vtn_pointer_to_offset(b, ptr, );
 
   nir_intrinsic_op op;
   if (ptr->mode == vtn_variable_mode_ssbo) {
diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h
index 6591b22..1eaa78b 100644
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -673,7 +673,7 @@ nir_deref_instr *vtn_pointer_to_deref(struct vtn_builder *b,
   struct vtn_pointer *ptr);
 nir_ssa_def *
 vtn_pointer_to_offset(struct vtn_builder *b, struct vtn_pointer *ptr,
-  nir_ssa_def **index_out, unsigned *end_idx_out);
+  nir_ssa_def **index_out);
 
 struct vtn_ssa_value *
 vtn_local_load(struct vtn_builder *b, nir_deref_instr *src);
diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 944c1ab..11ba043 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -548,70 +548,17 @@ vtn_local_store(struct vtn_builder *b, struct 
vtn_ssa_value *src,
 
 nir_ssa_def *
 vtn_pointer_to_offset(struct vtn_builder *b, struct vtn_pointer *ptr,
-  nir_ssa_def **index_out, unsigned *end_idx_out)
+  nir_ssa_def **index_out)
 {
-   if (vtn_pointer_uses_ssa_offset(b, ptr)) {
-  if (!ptr->offset) {
- struct vtn_access_chain chain = {
-.length = 0,
- };
- ptr = vtn_ssa_offset_pointer_dereference(b, ptr, );
-  }
-  *index_out = ptr->block_index;
-  return ptr->offset;
-   }
-
-   vtn_assert(ptr->mode == vtn_variable_mode_push_constant);
-   *index_out = NULL;
-
-   unsigned idx = 0;
-   struct vtn_type *type = ptr->var->type;
-   nir_ssa_def *offset = nir_imm_int(>nb, 0);
-
-   if (ptr->chain) {
-  for (; idx < ptr->chain->length; idx++) {
- enum glsl_base_type base_type = glsl_get_base_type(type->type);
- switch (base_type) {
- case GLSL_TYPE_UINT:
- case GLSL_TYPE_INT:
- case GLSL_TYPE_UINT16:
- case GLSL_TYPE_INT16:
- case GLSL_TYPE_UINT8:
- case GLSL_TYPE_INT8:
- case GLSL_TYPE_UINT64:
- case GLSL_TYPE_INT64:
- case GLSL_TYPE_FLOAT:
- case GLSL_TYPE_FLOAT16:
- case GLSL_TYPE_DOUBLE:
- case GLSL_TYPE_BOOL:
- case GLSL_TYPE_ARRAY:
-offset = nir_iadd(>nb, offset,
-  vtn_access_link_as_ssa(b, ptr->chain->link[idx],
- type->stride));
-
-type = type->array_element;
-break;
-
- case GLSL_TYPE_STRUCT: {
-vtn_assert(ptr->chain->link[idx].mode == vtn_access_mode_literal);
-unsigned member = ptr->chain->link[idx].id;
-offset = nir_iadd(>nb, offset,
-  nir_imm_int(>nb, type->offsets[member]));
-type = type->members[member];
-break;
- }
-
- default:
-vtn_fail("Invalid type for deref");
- }
-  }
+   assert(vtn_pointer_uses_ssa_offset(b, ptr));
+   if (!ptr->offset) {
+  struct vtn_access_chain chain = {
+ .length = 0,
+  };
+  ptr = vtn_ssa_offset_pointer_dereference(b, ptr, );
}
-
-   vtn_assert(type == ptr->type);
-   if (end_idx_out)
-  *end_idx_out = idx;
-
-   return offset;
+   *index_out = ptr->block_index;
+   return ptr->offset;
 }
 
 /* Tries to compute the size of an interface block based on the strides and
@@ -718,13 +665,9 @@ static void
 _vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load,
   nir_ssa_def *index, nir_ssa_def *offset,
   unsigned access_offset, unsigned access_size,
-  struct vtn_access_chain *chain, unsigned chain_idx,
   struct vtn_type *type, struct vtn_ssa_value **inout)
 {
-   if (chain && chain_idx >= chain->length)
-  chain = NULL;
-
-   if (load && chain == NULL && *inout == NULL)
+   if (load && *inout == NULL)
   *inout = vtn_create_ssa_value(b, type->type);
 
enum glsl_base_type base_type = glsl_get_base_type(type->type);
@@ -826,7 +769,6 @@ _vtn_block_load_store(struct 

[Mesa-dev] [PATCH 57/61] anv/pipeline: Convert YCbCr lowering to deref instructiosn

2018-03-23 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_nir_lower_ycbcr_textures.c | 34 ++---
 src/intel/vulkan/anv_pipeline.c |  6 ++---
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c 
b/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
index ebf1fd9..5a971d9 100644
--- a/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
+++ b/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
@@ -30,6 +30,7 @@ struct ycbcr_state {
nir_builder *builder;
nir_ssa_def *image_size;
nir_tex_instr *origin_tex;
+   nir_deref_instr *tex_deref;
struct anv_ycbcr_conversion *conversion;
 };
 
@@ -152,22 +153,24 @@ convert_ycbcr(struct ycbcr_state *state,
 
 /* TODO: we should probably replace this with a push constant/uniform. */
 static nir_ssa_def *
-get_texture_size(struct ycbcr_state *state, nir_deref_var *texture)
+get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture)
 {
if (state->image_size)
   return state->image_size;
 
nir_builder *b = state->builder;
-   const struct glsl_type *type = nir_deref_tail(>deref)->type;
-   nir_tex_instr *tex = nir_tex_instr_create(b->shader, 0);
+   const struct glsl_type *type = texture->type;
+   nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
 
tex->op = nir_texop_txs;
tex->sampler_dim = glsl_get_sampler_dim(type);
tex->is_array = glsl_sampler_type_is_array(type);
tex->is_shadow = glsl_sampler_type_is_shadow(type);
-   tex->texture = nir_deref_var_clone(texture, tex);
tex->dest_type = nir_type_int;
 
+   tex->src[0].src_type = nir_tex_src_texture_deref;
+   tex->src[0].src = nir_src_for_ssa(>dest.ssa);
+
nir_ssa_dest_init(>instr, >dest,
  nir_tex_instr_dest_size(tex), 32, NULL);
nir_builder_instr_insert(b, >instr);
@@ -199,8 +202,7 @@ implicit_downsampled_coords(struct ycbcr_state *state,
 {
nir_builder *b = state->builder;
struct anv_ycbcr_conversion *conversion = state->conversion;
-   nir_ssa_def *image_size = get_texture_size(state,
-  state->origin_tex->texture);
+   nir_ssa_def *image_size = get_texture_size(state, state->tex_deref);
nir_ssa_def *comp[4] = { NULL, };
int c;
 
@@ -266,10 +268,7 @@ create_plane_tex_instr_implicit(struct ycbcr_state *state,
 
tex->texture_index = old_tex->texture_index;
tex->texture_array_size = old_tex->texture_array_size;
-   tex->texture = nir_deref_var_clone(old_tex->texture, tex);
-
tex->sampler_index = old_tex->sampler_index;
-   tex->sampler = nir_deref_var_clone(old_tex->sampler, tex);
 
nir_ssa_dest_init(>instr, >dest,
  old_tex->dest.ssa.num_components,
@@ -320,7 +319,11 @@ try_lower_tex_ycbcr(struct anv_pipeline_layout *layout,
 nir_builder *builder,
 nir_tex_instr *tex)
 {
-   nir_variable *var = tex->texture->var;
+   int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
+   assert(deref_src_idx >= 0);
+   nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
+
+   nir_variable *var = nir_deref_instr_get_variable(deref);
const struct anv_descriptor_set_layout *set_layout =
   layout->set[var->data.descriptor_set].layout;
const struct anv_descriptor_set_binding_layout *binding =
@@ -338,14 +341,14 @@ try_lower_tex_ycbcr(struct anv_pipeline_layout *layout,
   return false;
 
unsigned texture_index = tex->texture_index;
-   if (tex->texture->deref.child) {
-  assert(tex->texture->deref.child->deref_type == nir_deref_type_array);
-  nir_deref_array *deref_array = 
nir_deref_as_array(tex->texture->deref.child);
-  if (deref_array->deref_array_type != nir_deref_array_type_direct)
+   if (deref->deref_type != nir_deref_type_var) {
+  assert(deref->deref_type == nir_deref_type_array);
+  nir_const_value *const_index = nir_src_as_const_value(deref->arr.index);
+  if (!const_index)
  return false;
   size_t hw_binding_size =
  anv_descriptor_set_binding_layout_get_hw_size(binding);
-  texture_index += MIN2(deref_array->base_offset, hw_binding_size - 1);
+  texture_index += MIN2(const_index->u32[0], hw_binding_size - 1);
}
const struct anv_sampler *sampler =
   binding->immutable_samplers[texture_index];
@@ -356,6 +359,7 @@ try_lower_tex_ycbcr(struct anv_pipeline_layout *layout,
struct ycbcr_state state = {
   .builder = builder,
   .origin_tex = tex,
+  .tex_deref = deref,
   .conversion = sampler->conversion,
};
 
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 4aef86a..8e077c4 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -218,9 +218,6 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
if (stage == MESA_SHADER_FRAGMENT)
   NIR_PASS_V(nir, anv_nir_lower_input_attachments);
 
-   NIR_PASS_V(nir, 

[Mesa-dev] [PATCH 60/61] intel/fs: Use image_deref intrinsics instead of image_var

2018-03-23 Thread Jason Ekstrand
Since we had to rewrite the deref walking loop anyway, I took the
opportunity to make it a bit clearer and more efficient.  In particular,
in the AoA case, we will now emit one minmax instead of one per array
level.
---
 src/intel/compiler/brw_fs.h   |   2 +-
 src/intel/compiler/brw_fs_nir.cpp | 157 --
 src/intel/compiler/brw_nir.c  |   2 +-
 3 files changed, 86 insertions(+), 75 deletions(-)

diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index e384db8..9a47076 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -235,7 +235,7 @@ public:
fs_reg get_nir_src(const nir_src );
fs_reg get_nir_src_imm(const nir_src );
fs_reg get_nir_dest(const nir_dest );
-   fs_reg get_nir_image_deref(const nir_deref_var *deref);
+   fs_reg get_nir_image_deref(nir_deref_instr *deref);
fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
void emit_percomp(const brw::fs_builder , const fs_inst ,
  unsigned wr_mask);
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index f5d5399..40c9092 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -413,6 +413,10 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
   nir_emit_alu(abld, nir_instr_as_alu(instr));
   break;
 
+   case nir_instr_type_deref:
+  /* Derefs can exist for images but they do nothing */
+  break;
+
case nir_instr_type_intrinsic:
   switch (stage) {
   case MESA_SHADER_VERTEX:
@@ -1595,51 +1599,56 @@ fs_visitor::get_nir_dest(const nir_dest )
 }
 
 fs_reg
-fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
+fs_visitor::get_nir_image_deref(nir_deref_instr *deref)
 {
-   fs_reg image(UNIFORM, deref->var->data.driver_location / 4,
-BRW_REGISTER_TYPE_UD);
-   fs_reg indirect;
-   unsigned indirect_max = 0;
-
-   for (const nir_deref *tail = >deref; tail->child;
-tail = tail->child) {
-  const nir_deref_array *deref_array = nir_deref_as_array(tail->child);
-  assert(tail->child->deref_type == nir_deref_type_array);
-  const unsigned size = glsl_get_length(tail->type);
-  const unsigned element_size = type_size_scalar(deref_array->deref.type);
-  const unsigned base = MIN2(deref_array->base_offset, size - 1);
-  image = offset(image, bld, base * element_size);
-
-  if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
- fs_reg tmp = vgrf(glsl_type::uint_type);
-
- /* Accessing an invalid surface index with the dataport can result
-  * in a hang.  According to the spec "if the index used to
-  * select an individual element is negative or greater than or
-  * equal to the size of the array, the results of the operation
-  * are undefined but may not lead to termination" -- which is one
-  * of the possible outcomes of the hang.  Clamp the index to
-  * prevent access outside of the array bounds.
-  */
- bld.emit_minmax(tmp, retype(get_nir_src(deref_array->indirect),
- BRW_REGISTER_TYPE_UD),
- brw_imm_ud(size - base - 1), BRW_CONDITIONAL_L);
-
- indirect_max += element_size * (tail->type->length - 1);
-
- bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4));
- if (indirect.file == BAD_FILE) {
-indirect = tmp;
- } else {
-bld.ADD(indirect, indirect, tmp);
- }
+   fs_reg arr_offset = brw_imm_ud(0);
+   unsigned array_size = BRW_IMAGE_PARAM_SIZE * 4;
+   nir_deref_instr *head = deref;
+   while (head->deref_type != nir_deref_type_var) {
+  assert(head->deref_type == nir_deref_type_array);
+
+  /* This level's element size is the previous level's array size */
+  const unsigned elem_size = array_size;
+
+  fs_reg index = retype(get_nir_src_imm(head->arr.index),
+BRW_REGISTER_TYPE_UD);
+  if (arr_offset.file == BRW_IMMEDIATE_VALUE &&
+  index.file == BRW_IMMEDIATE_VALUE) {
+ arr_offset.ud += index.ud * elem_size;
+  } else if (index.file == BRW_IMMEDIATE_VALUE) {
+ bld.ADD(arr_offset, arr_offset, brw_imm_ud(index.ud * elem_size));
+  } else {
+ fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
+ bld.MUL(tmp, index, brw_imm_ud(elem_size));
+ bld.ADD(tmp, tmp, arr_offset);
+ arr_offset = tmp;
   }
+
+  head = nir_deref_instr_parent(head);
+  assert(glsl_type_is_array(head->type));
+  array_size = elem_size * glsl_get_length(head->type);
}
 
-   if (indirect.file == BAD_FILE) {
-  return image;
+   assert(head->deref_type == nir_deref_type_var);
+   const unsigned max_arr_offset = array_size - (BRW_IMAGE_PARAM_SIZE * 4);
+   fs_reg image(UNIFORM, head->var->data.driver_location / 4,
+BRW_REGISTER_TYPE_UD);
+
+   if (arr_offset.file == 

[Mesa-dev] [PATCH 34/61] nir: Support deref instructions in lower_wpos_ytransform

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_lower_wpos_ytransform.c | 51 +++-
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c 
b/src/compiler/nir/nir_lower_wpos_ytransform.c
index 62166e7..6212702 100644
--- a/src/compiler/nir/nir_lower_wpos_ytransform.c
+++ b/src/compiler/nir/nir_lower_wpos_ytransform.c
@@ -77,11 +77,10 @@ nir_cmp(nir_builder *b, nir_ssa_def *src0, nir_ssa_def 
*src1, nir_ssa_def *src2)
 /* see emit_wpos_adjustment() in st_mesa_to_tgsi.c */
 static void
 emit_wpos_adjustment(lower_wpos_ytransform_state *state,
- nir_intrinsic_instr *intr,
+ nir_intrinsic_instr *intr, nir_variable *fragcoord,
  bool invert, float adjX, float adjY[2])
 {
nir_builder *b = >b;
-   nir_variable *fragcoord = intr->variables[0]->var;
nir_ssa_def *wpostrans, *wpos_temp, *wpos_temp_y, *wpos_input;
 
assert(intr->dest.is_ssa);
@@ -144,10 +143,10 @@ emit_wpos_adjustment(lower_wpos_ytransform_state *state,
 }
 
 static void
-lower_fragcoord(lower_wpos_ytransform_state *state, nir_intrinsic_instr *intr)
+lower_fragcoord(lower_wpos_ytransform_state *state,
+nir_intrinsic_instr *intr, nir_variable *fragcoord)
 {
const nir_lower_wpos_ytransform_options *options = state->options;
-   nir_variable *fragcoord = intr->variables[0]->var;
float adjX = 0.0f;
float adjY[2] = { 0.0f, 0.0f };
bool invert = false;
@@ -229,7 +228,7 @@ lower_fragcoord(lower_wpos_ytransform_state *state, 
nir_intrinsic_instr *intr)
   }
}
 
-   emit_wpos_adjustment(state, intr, invert, adjX, adjY);
+   emit_wpos_adjustment(state, intr, fragcoord, invert, adjX, adjY);
 }
 
 /* turns 'fddy(p)' into 'fddy(fmul(p, transform.x))' */
@@ -253,7 +252,25 @@ lower_fddy(lower_wpos_ytransform_state *state, 
nir_alu_instr *fddy)
   fddy->src[0].swizzle[i] = MIN2(i, pt->num_components - 1);
 }
 
-/* Multiply interp_var_at_offset's offset by transform.x to flip it. */
+/* Multiply interp_deref_at_offset's offset by transform.x to flip it. */
+static void
+lower_interp_deref_at_offset(lower_wpos_ytransform_state *state,
+   nir_intrinsic_instr *interp)
+{
+   nir_builder *b = >b;
+   nir_ssa_def *offset;
+   nir_ssa_def *flip_y;
+
+   b->cursor = nir_before_instr(>instr);
+
+   offset = nir_ssa_for_src(b, interp->src[1], 2);
+   flip_y = nir_fmul(b, nir_channel(b, offset, 1),
+nir_channel(b, get_transform(state), 0));
+   nir_instr_rewrite_src(>instr, >src[1],
+ nir_src_for_ssa(nir_vec2(b, nir_channel(b, offset, 0),
+ flip_y)));
+}
+
 static void
 lower_interp_var_at_offset(lower_wpos_ytransform_state *state,
nir_intrinsic_instr *interp)
@@ -298,7 +315,21 @@ lower_wpos_ytransform_block(lower_wpos_ytransform_state 
*state, nir_block *block
nir_foreach_instr_safe(instr, block) {
   if (instr->type == nir_instr_type_intrinsic) {
  nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
- if (intr->intrinsic == nir_intrinsic_load_var) {
+ if (intr->intrinsic == nir_intrinsic_load_deref) {
+nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+nir_variable *var = nir_deref_instr_get_variable(deref);
+
+if ((var->data.mode == nir_var_shader_in &&
+ var->data.location == VARYING_SLOT_POS) ||
+(var->data.mode == nir_var_system_value &&
+ var->data.location == SYSTEM_VALUE_FRAG_COORD)) {
+   /* gl_FragCoord should not have array/struct derefs: */
+   lower_fragcoord(state, intr, var);
+} else if (var->data.mode == nir_var_system_value &&
+   var->data.location == SYSTEM_VALUE_SAMPLE_POS) {
+   lower_load_sample_pos(state, intr);
+}
+ } else if (intr->intrinsic == nir_intrinsic_load_var) {
 nir_deref_var *dvar = intr->variables[0];
 nir_variable *var = dvar->var;
 
@@ -308,16 +339,18 @@ lower_wpos_ytransform_block(lower_wpos_ytransform_state 
*state, nir_block *block
  var->data.location == SYSTEM_VALUE_FRAG_COORD)) {
/* gl_FragCoord should not have array/struct derefs: */
assert(dvar->deref.child == NULL);
-   lower_fragcoord(state, intr);
+   lower_fragcoord(state, intr, var);
 } else if (var->data.mode == nir_var_system_value &&
var->data.location == SYSTEM_VALUE_SAMPLE_POS) {
assert(dvar->deref.child == NULL);
lower_load_sample_pos(state, intr);
 }
  } else if (intr->intrinsic == nir_intrinsic_load_frag_coord) {
-lower_fragcoord(state, intr);
+lower_fragcoord(state, intr, NULL);
  } else if (intr->intrinsic == 

[Mesa-dev] [PATCH 21/61] nir: Support deref instructions in lower_io_to_temporaries

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_lower_io_to_temporaries.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/nir/nir_lower_io_to_temporaries.c 
b/src/compiler/nir/nir_lower_io_to_temporaries.c
index 301ba65..7ba66ba 100644
--- a/src/compiler/nir/nir_lower_io_to_temporaries.c
+++ b/src/compiler/nir/nir_lower_io_to_temporaries.c
@@ -198,4 +198,6 @@ nir_lower_io_to_temporaries(nir_shader *shader, 
nir_function_impl *entrypoint,
 
exec_list_append(>globals, _inputs);
exec_list_append(>globals, _outputs);
+
+   nir_fixup_deref_modes(shader);
 }
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 54/61] nir,spirv: Rework function calls

2018-03-23 Thread Jason Ekstrand
This commit completely reworks function calls in NIR.  Instead of having
a set of variables for the parameters and return value, nir_call_instr
now has simply has a number of sources which get mapped to load_param
intrinsics inside the functions.  It's up to the client API to build an
ABI on top of that.  In SPIR-V, out parameters are handled by passing
the result of a deref through as an SSA value and storing to it.

This virtue of this approach can be seen by how much it allows us to
delete from core NIR.  In particular, nir_inline_functions gets halved
and goes from a fairly difficult pass to understand in detail to almost
trivial.  It also simplifies spirv_to_nir somewhat because NIR functions
never were a good fit for SPIR-V.

Unfortunately, there is no good way to do this without a mega-commit.
Core NIR and SPIR-V have to be changed at the same time.  This also
requires changes to anv and radv because nir_inline_functions couldn't
handle deref instructions before this change and can't work without them
after this change.
---
 src/amd/vulkan/radv_shader.c |   5 +-
 src/compiler/glsl/glsl_to_nir.cpp|   1 -
 src/compiler/nir/nir.c   |  55 +++
 src/compiler/nir/nir.h   |  35 ++---
 src/compiler/nir/nir_builder.h   |  16 ++
 src/compiler/nir/nir_clone.c |  15 +-
 src/compiler/nir/nir_inline_functions.c  | 193 ---
 src/compiler/nir/nir_intrinsics.h|   4 +
 src/compiler/nir/nir_print.c |  63 +---
 src/compiler/nir/nir_remove_dead_variables.c |  20 +--
 src/compiler/nir/nir_serialize.c |  44 ++
 src/compiler/nir/nir_sweep.c |   4 -
 src/compiler/nir/nir_validate.c  |  37 ++---
 src/compiler/spirv/spirv_to_nir.c|  64 
 src/compiler/spirv/vtn_cfg.c | 221 +--
 src/compiler/spirv/vtn_private.h |   1 -
 src/compiler/spirv/vtn_variables.c   |   3 -
 src/intel/vulkan/anv_pipeline.c  |   5 +-
 18 files changed, 256 insertions(+), 530 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index e5825ed..6376979 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -228,8 +228,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
 
free(spec_entries);
 
-   NIR_PASS_V(nir, nir_lower_deref_instrs, ~0);
-
/* We have to lower away local constant initializers right 
before we
 * inline functions.  That way they get properly initialized at 
the top
 * of the function and not at the top of its caller.
@@ -237,6 +235,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
NIR_PASS_V(nir, nir_lower_returns);
NIR_PASS_V(nir, nir_inline_functions);
+   NIR_PASS_V(nir, nir_copy_prop);
 
/* Pick off the single entrypoint that we want */
foreach_list_typed_safe(nir_function, func, node, 
>functions) {
@@ -246,6 +245,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
assert(exec_list_length(>functions) == 1);
entry_point->name = ralloc_strdup(entry_point, "main");
 
+   NIR_PASS_V(nir, nir_lower_deref_instrs, ~0);
+
/* Make sure we lower constant initializers on output variables 
so that
 * nir_remove_dead_variables below sees the corresponding stores
 */
diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index db0c911..07a993b 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -514,7 +514,6 @@ nir_visitor::visit(ir_function_signature *ir)
 
   assert(strcmp(func->name, "main") == 0);
   assert(ir->parameters.is_empty());
-  assert(func->return_type == glsl_type::void_type);
 
   this->is_global = false;
 
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 2ed96a1..7f52cfd 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -125,10 +125,6 @@ nir_shader_add_variable(nir_shader *shader, nir_variable 
*var)
   assert(!"nir_shader_add_variable cannot be used for local variables");
   break;
 
-   case nir_var_param:
-  assert(!"nir_shader_add_variable cannot be used for function 
parameters");
-  break;
-
case nir_var_global:
   exec_list_push_tail(>globals, >node);
   break;
@@ -205,7 +201,6 @@ nir_function_create(nir_shader *shader, const char *name)
func->shader = shader;
func->num_params = 0;
func->params = NULL;
-   func->return_type = glsl_void_type();
func->impl = NULL;
 
return func;
@@ -286,9 +281,6 @@ nir_function_impl_create_bare(nir_shader *shader)

[Mesa-dev] [PATCH 40/61] st/nir: Move lower_deref_instrs later

2018-03-23 Thread Jason Ekstrand
---
 src/mesa/state_tracker/st_glsl_to_nir.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 7d111d6..f62135a 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -328,7 +328,6 @@ st_glsl_to_nir(struct st_context *st, struct gl_program 
*prog,
   return prog->nir;
 
nir_shader *nir = glsl_to_nir(shader_program, stage, options);
-   nir_lower_deref_instrs(nir, (nir_lower_deref_flags)~0);
 
nir_variable_mode mask =
   (nir_variable_mode) (nir_var_shader_in | nir_var_shader_out);
@@ -527,6 +526,8 @@ st_nir_get_mesa_program(struct gl_context *ctx,
 
nir_shader *nir = st_glsl_to_nir(st, prog, shader_program, shader->Stage);
 
+   nir_lower_deref_instrs(nir, (nir_lower_deref_flags)~0);
+
set_st_program(prog, shader_program, nir);
prog->nir = nir;
 }
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 41/61] spirv: Use deref instructions for most variables

2018-03-23 Thread Jason Ekstrand
The only thing still using old-school drefs are function calls.
---
 src/compiler/spirv/spirv_to_nir.c  | 119 +++--
 src/compiler/spirv/vtn_cfg.c   |   8 +-
 src/compiler/spirv/vtn_glsl450.c   |  19 ++--
 src/compiler/spirv/vtn_private.h   |  13 ++-
 src/compiler/spirv/vtn_variables.c | 211 +
 5 files changed, 182 insertions(+), 188 deletions(-)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index e91d1e3..29b4512 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -29,6 +29,7 @@
 #include "nir/nir_vla.h"
 #include "nir/nir_control_flow.h"
 #include "nir/nir_constant_expressions.h"
+#include "nir/nir_deref.h"
 #include "spirv_info.h"
 
 #include 
@@ -1778,7 +1779,7 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp 
opcode,
   struct vtn_value *arg = vtn_untyped_value(b, arg_id);
   if (arg->value_type == vtn_value_type_pointer &&
   arg->pointer->ptr_type->type == NULL) {
- nir_deref_var *d = vtn_pointer_to_deref(b, arg->pointer);
+ nir_deref_var *d = vtn_pointer_to_deref_var(b, arg->pointer);
  call->params[i] = nir_deref_var_clone(d, call);
   } else {
  struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id);
@@ -1788,7 +1789,8 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp 
opcode,
 nir_local_variable_create(b->nb.impl, arg_ssa->type, "arg_tmp");
  call->params[i] = nir_deref_var_create(call, tmp);
 
- vtn_local_store(b, arg_ssa, call->params[i]);
+ vtn_local_store(b, arg_ssa,
+ nir_build_deref_for_chain(>nb, call->params[i]));
   }
}
 
@@ -1805,7 +1807,9 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp 
opcode,
if (glsl_type_is_void(callee->return_type)) {
   vtn_push_value(b, w[2], vtn_value_type_undef);
} else {
-  vtn_push_ssa(b, w[2], res_type, vtn_local_load(b, call->return_deref));
+  nir_deref_instr *return_deref =
+ nir_build_deref_for_chain(>nb, call->return_deref);
+  vtn_push_ssa(b, w[2], res_type, vtn_local_load(b, return_deref));
}
 }
 
@@ -1959,9 +1963,41 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
   vtn_fail("Unhandled opcode");
}
 
-   nir_tex_src srcs[8]; /* 8 should be enough */
+   nir_tex_src srcs[10]; /* 10 should be enough */
nir_tex_src *p = srcs;
 
+   nir_deref_instr *sampler = vtn_pointer_to_deref(b, sampled.sampler);
+   nir_deref_instr *texture =
+  sampled.image ? vtn_pointer_to_deref(b, sampled.image) : sampler;
+
+   p->src = nir_src_for_ssa(>dest.ssa);
+   p->src_type = nir_tex_src_texture_deref;
+   p++;
+
+   switch (texop) {
+   case nir_texop_tex:
+   case nir_texop_txb:
+   case nir_texop_txl:
+   case nir_texop_txd:
+   case nir_texop_tg4:
+  /* These operations require a sampler */
+  p->src = nir_src_for_ssa(>dest.ssa);
+  p->src_type = nir_tex_src_sampler_deref;
+  p++;
+  break;
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+   case nir_texop_txs:
+   case nir_texop_lod:
+   case nir_texop_query_levels:
+   case nir_texop_texture_samples:
+   case nir_texop_samples_identical:
+  /* These don't */
+  break;
+   case nir_texop_txf_ms_mcs:
+  vtn_fail("unexpected nir_texop_txf_ms_mcs");
+   }
+
unsigned idx = 4;
 
struct nir_ssa_def *coord;
@@ -2119,40 +2155,6 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
   vtn_fail("Invalid base type for sampler result");
}
 
-   nir_deref_var *sampler = vtn_pointer_to_deref(b, sampled.sampler);
-   nir_deref_var *texture;
-   if (sampled.image) {
-  nir_deref_var *image = vtn_pointer_to_deref(b, sampled.image);
-  texture = image;
-   } else {
-  texture = sampler;
-   }
-
-   instr->texture = nir_deref_var_clone(texture, instr);
-
-   switch (instr->op) {
-   case nir_texop_tex:
-   case nir_texop_txb:
-   case nir_texop_txl:
-   case nir_texop_txd:
-   case nir_texop_tg4:
-  /* These operations require a sampler */
-  instr->sampler = nir_deref_var_clone(sampler, instr);
-  break;
-   case nir_texop_txf:
-   case nir_texop_txf_ms:
-   case nir_texop_txs:
-   case nir_texop_lod:
-   case nir_texop_query_levels:
-   case nir_texop_texture_samples:
-   case nir_texop_samples_identical:
-  /* These don't */
-  instr->sampler = NULL;
-  break;
-   case nir_texop_txf_ms_mcs:
-  vtn_fail("unexpected nir_texop_txf_ms_mcs");
-   }
-
nir_ssa_dest_init(>instr, >dest,
  nir_tex_instr_dest_size(instr), 32, NULL);
 
@@ -2177,8 +2179,6 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
  instrs[i]->is_new_style_shadow = instr->is_new_style_shadow;
  instrs[i]->component = instr->component;
  instrs[i]->dest_type = instr->dest_type;
- instrs[i]->texture = nir_deref_var_clone(texture, instrs[i]);
- instrs[i]->sampler = NULL;
 

[Mesa-dev] [PATCH 24/61] nir: Support deref instructions in split_var_copies

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_split_var_copies.c | 42 +
 1 file changed, 42 insertions(+)

diff --git a/src/compiler/nir/nir_split_var_copies.c 
b/src/compiler/nir/nir_split_var_copies.c
index bc3ceed..bcd1f10 100644
--- a/src/compiler/nir/nir_split_var_copies.c
+++ b/src/compiler/nir/nir_split_var_copies.c
@@ -26,6 +26,7 @@
  */
 
 #include "nir.h"
+#include "nir_builder.h"
 
 /*
  * Implements "copy splitting" which is similar to structure splitting only
@@ -259,6 +260,25 @@ split_var_copies_block(nir_block *block, struct 
split_var_copies_state *state)
return true;
 }
 
+static void
+split_deref_copy_instr(nir_builder *b,
+   nir_deref_instr *dst, nir_deref_instr *src)
+{
+   assert(dst->type == src->type);
+   if (glsl_type_is_vector_or_scalar(src->type)) {
+  nir_copy_deref(b, dst, src);
+   } else if (glsl_type_is_struct(src->type)) {
+  for (unsigned i = 0; i < glsl_get_length(src->type); i++) {
+ split_deref_copy_instr(b, nir_build_deref_struct(b, dst, i),
+   nir_build_deref_struct(b, src, i));
+  }
+   } else {
+  assert(glsl_type_is_matrix(src->type) || glsl_type_is_array(src->type));
+  split_deref_copy_instr(b, nir_build_deref_array_wildcard(b, dst),
+nir_build_deref_array_wildcard(b, src));
+   }
+}
+
 static bool
 split_var_copies_impl(nir_function_impl *impl)
 {
@@ -268,8 +288,30 @@ split_var_copies_impl(nir_function_impl *impl)
state.dead_ctx = ralloc_context(NULL);
state.progress = false;
 
+   nir_builder b;
+   nir_builder_init(, impl);
+
nir_foreach_block(block, impl) {
   split_var_copies_block(block, );
+
+  nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+continue;
+
+ nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
+ if (copy->intrinsic != nir_intrinsic_copy_deref)
+continue;
+
+ b.cursor = nir_instr_remove(>instr);
+
+ nir_deref_instr *dst =
+nir_instr_as_deref(copy->src[0].ssa->parent_instr);
+ nir_deref_instr *src =
+nir_instr_as_deref(copy->src[1].ssa->parent_instr);
+ split_deref_copy_instr(, dst, src);
+
+ state.progress = true;
+  }
}
 
ralloc_free(state.dead_ctx);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 58/61] anv/apply_pipeline_layout: Simplify extract_tex_src_plane

2018-03-23 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 46 +++-
 1 file changed, 12 insertions(+), 34 deletions(-)

diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c 
b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index d5a08f7..87d9a91 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -159,7 +159,7 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
 static void
 lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref,
 unsigned *const_index, unsigned array_size,
-nir_tex_src_type src_type, bool allow_indirect,
+nir_tex_src_type src_type,
 struct apply_pipeline_layout_state *state)
 {
nir_builder *b = >builder;
@@ -176,7 +176,7 @@ lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref,
   * aggregated into arrays in shader code, irrespective of the
   * shaderSampledImageArrayDynamicIndexing feature.
   */
- assert(allow_indirect);
+ assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1);
 
  nir_ssa_def *index =
 nir_iadd(b, nir_imm_int(b, deref_array->base_offset),
@@ -206,37 +206,16 @@ cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var 
*deref)
nir_instr_rewrite_src(>instr, _array->indirect, NIR_SRC_INIT);
 }
 
-static bool
-has_tex_src_plane(nir_tex_instr *tex)
-{
-   for (unsigned i = 0; i < tex->num_srcs; i++) {
-  if (tex->src[i].src_type == nir_tex_src_plane)
- return true;
-   }
-
-   return false;
-}
-
 static uint32_t
-extract_tex_src_plane(nir_tex_instr *tex)
+tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
 {
-   unsigned plane = 0;
-
-   int plane_src_idx = -1;
-   for (unsigned i = 0; i < tex->num_srcs; i++) {
-  if (tex->src[i].src_type == nir_tex_src_plane) {
- nir_const_value *const_plane =
-nir_src_as_const_value(tex->src[i].src);
-
- /* Our color conversion lowering pass should only ever insert
-  * constants. */
- assert(const_plane);
- plane = const_plane->u32[0];
- plane_src_idx = i;
-  }
-   }
+   int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
+   if (plane_src_idx < 0)
+  return 0;
+
+   unsigned plane =
+  nir_src_as_const_value(tex->src[plane_src_idx].src)->u32[0];
 
-   assert(plane_src_idx >= 0);
nir_tex_instr_remove_src(tex, plane_src_idx);
 
return plane;
@@ -254,12 +233,11 @@ lower_tex(nir_tex_instr *tex, struct 
apply_pipeline_layout_state *state)
unsigned binding = tex->texture->var->data.binding;
unsigned array_size =
   state->layout->set[set].layout->binding[binding].array_size;
-   bool has_plane = has_tex_src_plane(tex);
-   unsigned plane = has_plane ? extract_tex_src_plane(tex) : 0;
+   unsigned plane = tex_instr_get_and_remove_plane_src(tex);
 
tex->texture_index = state->set[set].surface_offsets[binding];
lower_tex_deref(tex, tex->texture, >texture_index, array_size,
-   nir_tex_src_texture_offset, !has_plane, state);
+   nir_tex_src_texture_offset, state);
tex->texture_index += plane;
 
if (tex->sampler) {
@@ -269,7 +247,7 @@ lower_tex(nir_tex_instr *tex, struct 
apply_pipeline_layout_state *state)
  state->layout->set[set].layout->binding[binding].array_size;
   tex->sampler_index = state->set[set].sampler_offsets[binding];
   lower_tex_deref(tex, tex->sampler, >sampler_index, array_size,
-  nir_tex_src_sampler_offset, !has_plane, state);
+  nir_tex_src_sampler_offset, state);
   tex->sampler_index += plane;
}
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 61/61] intel/nir: Only lower load/store derefs

2018-03-23 Thread Jason Ekstrand
Everything else should already be handled.
---
 src/intel/compiler/brw_nir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 8bd9b96..b8c18ea 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -740,7 +740,7 @@ brw_postprocess_nir(nir_shader *nir, const struct 
brw_compiler *compiler,
OPT(nir_opt_dce);
OPT(nir_opt_move_comparisons);
 
-   OPT(nir_lower_deref_instrs, ~nir_lower_image_derefs);
+   OPT(nir_lower_deref_instrs, nir_lower_load_store_derefs);
 
OPT(nir_lower_locals_to_regs);
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 46/61] anv/pipeline: Lower more constant initializers earlier

2018-03-23 Thread Jason Ekstrand
Once we've gotten rid of everything but the main entrypoint, there's no
reason why we should go ahead and lower them all.  This is what radv
does and it will make future work easier.
---
 src/intel/vulkan/anv_pipeline.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 4af304e..3323e84 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -188,10 +188,12 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
assert(exec_list_length(>functions) == 1);
entry_point->name = ralloc_strdup(entry_point, "main");
 
-   /* Make sure we lower constant initializers on output variables so that
-* nir_remove_dead_variables below sees the corresponding stores
+   /* Now that we've deleted all but the main function, we can go ahead and
+* lower the rest of the constant initializers.  We do this here so that
+* nir_remove_dead_variables and split_per_member_structs below see the
+* corresponding stores.
 */
-   NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_shader_out);
+   NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
 
NIR_PASS_V(nir, nir_remove_dead_variables,
   nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
@@ -199,10 +201,6 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
if (stage == MESA_SHADER_FRAGMENT)
   NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable);
 
-   /* Now that we've deleted all but the main function, we can go ahead and
-* lower the rest of the constant initializers.
-*/
-   NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
NIR_PASS_V(nir, nir_propagate_invariant);
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
   entry_point->impl, true, false);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 36/61] nir: Support deref instructions in remove_unused_varyings

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_linking_helpers.c | 50 ++
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 2b0a266..1a0cb91 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -62,29 +62,33 @@ static void
 tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t 
*patches_read)
 {
nir_foreach_function(function, shader) {
-  if (function->impl) {
- nir_foreach_block(block, function->impl) {
-nir_foreach_instr(instr, block) {
-   if (instr->type != nir_instr_type_intrinsic)
-  continue;
-
-   nir_intrinsic_instr *intrin_instr =
-  nir_instr_as_intrinsic(instr);
-   if (intrin_instr->intrinsic == nir_intrinsic_load_var &&
-   intrin_instr->variables[0]->var->data.mode ==
-   nir_var_shader_out) {
-
-  nir_variable *var = intrin_instr->variables[0]->var;
-  if (var->data.patch) {
- patches_read[var->data.location_frac] |=
-get_variable_io_mask(intrin_instr->variables[0]->var,
- shader->info.stage);
-  } else {
- read[var->data.location_frac] |=
-get_variable_io_mask(intrin_instr->variables[0]->var,
- shader->info.stage);
-  }
-   }
+  if (!function->impl)
+ continue;
+
+  nir_foreach_block(block, function->impl) {
+ nir_foreach_instr(instr, block) {
+if (instr->type != nir_instr_type_intrinsic)
+   continue;
+
+nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+nir_variable *var;
+if (intrin->intrinsic == nir_intrinsic_load_var) {
+   var = intrin->variables[0]->var;
+} else if (intrin->intrinsic == nir_intrinsic_load_deref) {
+   var = 
nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0]));
+} else {
+   continue;
+}
+
+if (var->data.mode != nir_var_shader_out)
+   continue;
+
+if (var->data.patch) {
+   patches_read[var->data.location_frac] |=
+  get_variable_io_mask(var, shader->info.stage);
+} else {
+   read[var->data.location_frac] |=
+  get_variable_io_mask(var, shader->info.stage);
 }
  }
   }
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 55/61] anv/pipeline: Do less deref instruction lowering

2018-03-23 Thread Jason Ekstrand
This commit removes most of the deref instruction lowering.  Instead of
lowering early, we only lower textures and images and we only do so
right before any of the anv image lowering passes.
---
 src/intel/vulkan/anv_pipeline.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 020f4fd..b4a9d83 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -187,8 +187,6 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
assert(exec_list_length(>functions) == 1);
entry_point->name = ralloc_strdup(entry_point, "main");
 
-   NIR_PASS_V(nir, nir_lower_deref_instrs, ~0);
-
/* Now that we've deleted all but the main function, we can go ahead and
 * lower the rest of the constant initializers.  We do this here so that
 * nir_remove_dead_variables and split_per_member_structs below see the
@@ -217,6 +215,9 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
 
nir = brw_preprocess_nir(compiler, nir);
 
+   NIR_PASS_V(nir, nir_lower_deref_instrs,
+  nir_lower_texture_derefs | nir_lower_image_derefs);
+
if (stage == MESA_SHADER_FRAGMENT)
   NIR_PASS_V(nir, anv_nir_lower_input_attachments);
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 39/61] i965: Move nir_lower_deref_instrs to right before locals_to_regs

2018-03-23 Thread Jason Ekstrand
---
 src/intel/compiler/brw_nir.c| 2 ++
 src/mesa/drivers/dri/i965/brw_program.c | 3 +--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index cf994ac..4fc6cae 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -740,6 +740,8 @@ brw_postprocess_nir(nir_shader *nir, const struct 
brw_compiler *compiler,
OPT(nir_opt_dce);
OPT(nir_opt_move_comparisons);
 
+   OPT(nir_lower_deref_instrs, ~0);
+
OPT(nir_lower_locals_to_regs);
 
if (unlikely(debug_enabled)) {
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index a871432..1ad4f74 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -77,7 +77,7 @@ brw_create_nir(struct brw_context *brw,
/* First, lower the GLSL IR or Mesa IR to NIR */
if (shader_prog) {
   nir = glsl_to_nir(shader_prog, stage, options);
-  nir_lower_deref_instrs(nir, ~0);
+  nir_lower_deref_instrs(nir, nir_lower_texture_derefs);
   nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
   nir_lower_returns(nir);
   nir_validate_shader(nir);
@@ -85,7 +85,6 @@ brw_create_nir(struct brw_context *brw,
  nir_shader_get_entrypoint(nir), true, false);
} else {
   nir = prog_to_nir(prog, options);
-  nir_lower_deref_instrs(nir, ~0);
   NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
}
nir_validate_shader(nir);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 53/61] spirv/cfg: Make the builder fully capable for both walks

2018-03-23 Thread Jason Ekstrand
We were only initializing vtn_builder::func for the pre-walk where we
build the CFG.  We were only initializing the nir_builder for the later
walk through the instructions even though were were setting b->cursor
for the pre-walk.  Let's set both both places so that everything is
consistent.  This useful because we handle OpFunctionParameter in the
pre-walk and we're going to need to be able to emit instructions.
---
 src/compiler/spirv/vtn_cfg.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c
index 692e3b6..6e80ff5 100644
--- a/src/compiler/spirv/vtn_cfg.c
+++ b/src/compiler/spirv/vtn_cfg.c
@@ -95,6 +95,7 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, 
SpvOp opcode,
   func->return_type = func_type->return_type->type;
 
   b->func->impl = nir_function_impl_create(func);
+  nir_builder_init(>nb, func->impl);
   b->nb.cursor = nir_before_cf_list(>func->impl->body);
 
   b->func_param_idx = 0;
@@ -878,6 +879,7 @@ vtn_function_emit(struct vtn_builder *b, struct 
vtn_function *func,
   vtn_instruction_handler instruction_handler)
 {
nir_builder_init(>nb, func->impl);
+   b->func = func;
b->nb.cursor = nir_after_cf_list(>impl->body);
b->has_loop_continue = false;
b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 30/61] nir: Support deref instructions in propagate_invariant

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_propagate_invariant.c | 23 ---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/compiler/nir/nir_propagate_invariant.c 
b/src/compiler/nir/nir_propagate_invariant.c
index 7b5bd6c..b48b91c 100644
--- a/src/compiler/nir/nir_propagate_invariant.c
+++ b/src/compiler/nir/nir_propagate_invariant.c
@@ -74,6 +74,15 @@ var_is_invariant(nir_variable *var, struct set * invariants)
return var->data.invariant || _mesa_set_search(invariants, var);
 }
 
+static nir_variable *
+intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i)
+{
+   if (nir_intrinsic_infos[intrin->intrinsic].num_variables == 0)
+  return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i]));
+   else
+  return intrin->variables[0]->var;
+}
+
 static void
 propagate_invariant_instr(nir_instr *instr, struct set *invariants)
 {
@@ -99,14 +108,16 @@ propagate_invariant_instr(nir_instr *instr, struct set 
*invariants)
   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
   switch (intrin->intrinsic) {
   case nir_intrinsic_copy_var:
+  case nir_intrinsic_copy_deref:
  /* If the destination is invariant then so is the source */
- if (var_is_invariant(intrin->variables[0]->var, invariants))
-add_var(intrin->variables[1]->var, invariants);
+ if (var_is_invariant(intrinsic_get_var(intrin, 0), invariants))
+add_var(intrinsic_get_var(intrin, 1), invariants);
  break;
 
   case nir_intrinsic_load_var:
+  case nir_intrinsic_load_deref:
  if (dest_is_invariant(>dest, invariants))
-add_var(intrin->variables[0]->var, invariants);
+add_var(intrinsic_get_var(intrin, 0), invariants);
  break;
 
   case nir_intrinsic_store_var:
@@ -114,12 +125,18 @@ propagate_invariant_instr(nir_instr *instr, struct set 
*invariants)
 add_src(>src[0], invariants);
  break;
 
+  case nir_intrinsic_store_deref:
+ if (var_is_invariant(intrinsic_get_var(intrin, 0), invariants))
+add_src(>src[1], invariants);
+ break;
+
   default:
  /* Nothing to do */
  break;
   }
}
 
+   case nir_instr_type_deref:
case nir_instr_type_jump:
case nir_instr_type_ssa_undef:
case nir_instr_type_load_const:
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 43/61] nir/lower_system_values: Support SYSTEM_VALUE_LOCAL_GROUP_SIZE

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_lower_system_values.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/compiler/nir/nir_lower_system_values.c 
b/src/compiler/nir/nir_lower_system_values.c
index 104df51..5191fb3 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -106,6 +106,16 @@ convert_block(nir_block *block, nir_builder *b)
  break;
   }
 
+  case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {
+ nir_const_value local_size;
+ memset(_size, 0, sizeof(local_size));
+ local_size.u32[0] = b->shader->info.cs.local_size[0];
+ local_size.u32[1] = b->shader->info.cs.local_size[1];
+ local_size.u32[2] = b->shader->info.cs.local_size[2];
+ sysval = nir_build_imm(b, 3, 32, local_size);
+ break;
+  }
+
   case SYSTEM_VALUE_VERTEX_ID:
  if (b->shader->options->vertex_id_zero_based) {
 sysval = nir_iadd(b,
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 44/61] spirv: Use the LOCAL_GROUP_SIZE system value

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/spirv/vtn_variables.c | 17 ++---
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index aeb09dd..26b2adf 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1261,8 +1261,8 @@ vtn_get_builtin_location(struct vtn_builder *b,
   set_mode_system_value(b, mode);
   break;
case SpvBuiltInWorkgroupSize:
-  /* This should already be handled */
-  vtn_fail("unsupported builtin");
+  *location = SYSTEM_VALUE_LOCAL_GROUP_SIZE;
+  set_mode_system_value(b, mode);
   break;
case SpvBuiltInWorkgroupId:
   *location = SYSTEM_VALUE_WORK_GROUP_ID;
@@ -1392,19 +1392,6 @@ apply_var_decoration(struct vtn_builder *b, nir_variable 
*nir_var,
case SpvDecorationBuiltIn: {
   SpvBuiltIn builtin = dec->literals[0];
 
-  if (builtin == SpvBuiltInWorkgroupSize) {
- /* This shouldn't be a builtin.  It's actually a constant. */
- nir_var->data.mode = nir_var_global;
- nir_var->data.read_only = true;
-
- nir_constant *c = rzalloc(nir_var, nir_constant);
- c->values[0].u32[0] = b->shader->info.cs.local_size[0];
- c->values[0].u32[1] = b->shader->info.cs.local_size[1];
- c->values[0].u32[2] = b->shader->info.cs.local_size[2];
- nir_var->constant_initializer = c;
- break;
-  }
-
   nir_variable_mode mode = nir_var->data.mode;
   vtn_get_builtin_location(b, builtin, _var->data.location, );
   nir_var->data.mode = mode;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 31/61] nir: Support deref instructions in gather_info

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_gather_info.c | 26 --
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir_gather_info.c 
b/src/compiler/nir/nir_gather_info.c
index 743f968..50d67b6 100644
--- a/src/compiler/nir/nir_gather_info.c
+++ b/src/compiler/nir/nir_gather_info.c
@@ -219,7 +219,8 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var 
*deref, bool is_output_rea
 }
 
 static void
-gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader)
+gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
+  void *dead_ctx)
 {
switch (instr->intrinsic) {
case nir_intrinsic_discard:
@@ -228,12 +229,22 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, 
nir_shader *shader)
   shader->info.fs.uses_discard = true;
   break;
 
+   case nir_intrinsic_interp_deref_at_centroid:
+   case nir_intrinsic_interp_deref_at_sample:
+   case nir_intrinsic_interp_deref_at_offset:
case nir_intrinsic_interp_var_at_centroid:
case nir_intrinsic_interp_var_at_sample:
case nir_intrinsic_interp_var_at_offset:
+   case nir_intrinsic_load_deref:
case nir_intrinsic_load_var:
+   case nir_intrinsic_store_deref:
case nir_intrinsic_store_var: {
-  nir_variable *var = instr->variables[0]->var;
+  nir_deref_var *deref;
+  if (nir_intrinsic_infos[instr->intrinsic].num_variables > 0)
+ deref = instr->variables[0];
+  else
+ deref = nir_deref_instr_to_deref(nir_src_as_deref(instr->src[0]), 
dead_ctx);
+  nir_variable *var = deref->var;
 
   if (var->data.mode == nir_var_shader_in ||
   var->data.mode == nir_var_shader_out) {
@@ -242,7 +253,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, 
nir_shader *shader)
  instr->intrinsic == nir_intrinsic_load_var)
 is_output_read = true;
 
- if (!try_mask_partial_io(shader, instr->variables[0], is_output_read))
+ if (!try_mask_partial_io(shader, deref, is_output_read))
 mark_whole_variable(shader, var, is_output_read);
 
  /* We need to track which input_reads bits correspond to a
@@ -328,7 +339,7 @@ gather_alu_info(nir_alu_instr *instr, nir_shader *shader)
 }
 
 static void
-gather_info_block(nir_block *block, nir_shader *shader)
+gather_info_block(nir_block *block, nir_shader *shader, void *dead_ctx)
 {
nir_foreach_instr(instr, block) {
   switch (instr->type) {
@@ -336,7 +347,7 @@ gather_info_block(nir_block *block, nir_shader *shader)
  gather_alu_info(nir_instr_as_alu(instr), shader);
  break;
   case nir_instr_type_intrinsic:
- gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader);
+ gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader, 
dead_ctx);
  break;
   case nir_instr_type_tex:
  gather_tex_info(nir_instr_as_tex(instr), shader);
@@ -384,7 +395,10 @@ nir_shader_gather_info(nir_shader *shader, 
nir_function_impl *entrypoint)
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
   shader->info.fs.uses_sample_qualifier = false;
}
+
+   void *dead_ctx = ralloc_context(NULL);
nir_foreach_block(block, entrypoint) {
-  gather_info_block(block, shader);
+  gather_info_block(block, shader, dead_ctx);
}
+   ralloc_free(dead_ctx);
 }
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 47/61] spirv: Use NIR per-member splitting

2018-03-23 Thread Jason Ekstrand
Before, we were doing structure splitting in spirv_to_nir.
Unfortunately, this doesn't really work when you think about passing
struct pointers into functions.  Doing it later in NIR is a much better
plan.
---
 src/amd/vulkan/radv_shader.c   |   7 ++
 src/compiler/spirv/vtn_private.h   |   1 -
 src/compiler/spirv/vtn_variables.c | 146 ++---
 src/intel/vulkan/anv_pipeline.c|   6 ++
 4 files changed, 51 insertions(+), 109 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index d8cfd2e..e5825ed 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -258,6 +258,13 @@ radv_shader_compile_to_nir(struct radv_device *device,
 * lower the rest of the constant initializers.
 */
NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
+
+   /* Split member structs.  We do this before 
lower_io_to_temporaries so that
+* it doesn't lower system values to temporaries by accident.
+*/
+   NIR_PASS_V(nir, nir_split_var_copies);
+   NIR_PASS_V(nir, nir_split_per_member_structs);
+
NIR_PASS_V(nir, nir_lower_system_values);
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
}
diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h
index 4098144..6591b22 100644
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -455,7 +455,6 @@ struct vtn_variable {
bool patch;
 
nir_variable *var;
-   nir_variable **members;
 
int shared_location;
 
diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 03c898e..49cb837 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -350,37 +350,6 @@ vtn_pointer_dereference(struct vtn_builder *b,
}
 }
 
-/* Crawls a chain of array derefs and rewrites the types so that the
- * lengths stay the same but the terminal type is the one given by
- * tail_type.  This is useful for split structures.
- */
-static const struct glsl_type *
-rewrite_deref_var(struct vtn_builder *b, nir_deref_instr *deref,
-  struct nir_variable *var)
-{
-   /* Always set the mode */
-   deref->mode = var->data.mode;
-
-   if (deref->deref_type == nir_deref_type_var) {
-  assert(deref->var == NULL);
-  deref->var = var;
-  deref->type = var->type;
-   } else {
-  assert(deref->deref_type == nir_deref_type_array);
-  assert(deref->parent.is_ssa);
-  nir_deref_instr *parent =
- nir_instr_as_deref(deref->parent.ssa->parent_instr);
-  deref->type = rewrite_deref_var(b, parent, var);
-  assert(deref->type);
-   }
-
-   /* Return of the child type of this deref*/
-   if (glsl_type_is_array(deref->type))
-  return glsl_get_array_element(deref->type);
-   else
-  return NULL;
-}
-
 struct vtn_pointer *
 vtn_pointer_for_variable(struct vtn_builder *b,
  struct vtn_variable *var, struct vtn_type *ptr_type)
@@ -409,25 +378,19 @@ vtn_pointer_to_deref(struct vtn_builder *b, struct 
vtn_pointer *ptr)
nir_ssa_dest_init(_var->instr, _var->dest, 1, 32, NULL);
nir_builder_instr_insert(>nb, _var->instr);
 
-   if (ptr->var->var) {
-  deref_var->mode = ptr->var->var->data.mode;
-  deref_var->type = ptr->var->var->type;
-  deref_var->var = ptr->var->var;
-  /* Raw variable access */
-  if (!ptr->chain)
- return deref_var;
-   } else {
-  vtn_assert(ptr->var->members);
-  /* We'll fill out the rest of the deref_var later */
-  deref_var->type = ptr->var->type->type;
-   }
+   assert(ptr->var->var);
+   deref_var->mode = ptr->var->var->data.mode;
+   deref_var->type = ptr->var->var->type;
+   deref_var->var = ptr->var->var;
+   /* Raw variable access */
+   if (!ptr->chain)
+  return deref_var;
 
struct vtn_access_chain *chain = ptr->chain;
vtn_assert(chain);
 
struct vtn_type *deref_type = ptr->var->type;
nir_deref_instr *tail = deref_var;
-   nir_variable **members = ptr->var->members;
 
for (unsigned i = 0; i < chain->length; i++) {
   enum glsl_base_type base_type = glsl_get_base_type(deref_type->type);
@@ -462,13 +425,7 @@ vtn_pointer_to_deref(struct vtn_builder *b, struct 
vtn_pointer *ptr)
  vtn_assert(chain->link[i].mode == vtn_access_mode_literal);
  unsigned idx = chain->link[i].id;
  deref_type = deref_type->members[idx];
- if (members) {
-rewrite_deref_var(b, tail, members[idx]);
-assert(tail->type == deref_type->type);
-members = NULL;
- } else {
-tail = nir_build_deref_struct(>nb, tail, idx);
- }
+ tail = nir_build_deref_struct(>nb, tail, idx);
  break;
   }
   default:
@@ -476,7 +433,6 @@ vtn_pointer_to_deref(struct vtn_builder *b, struct 
vtn_pointer *ptr)
   

[Mesa-dev] [PATCH 33/61] nir: Support deref instructions in lower_atomics

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_lower_atomics.c | 119 +--
 1 file changed, 113 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir_lower_atomics.c 
b/src/compiler/nir/nir_lower_atomics.c
index ee66aa3..770ebe2 100644
--- a/src/compiler/nir/nir_lower_atomics.c
+++ b/src/compiler/nir/nir_lower_atomics.c
@@ -27,6 +27,7 @@
 
 #include "compiler/glsl/ir_uniform.h"
 #include "nir.h"
+#include "nir_builder.h"
 #include "main/config.h"
 #include 
 
@@ -36,9 +37,9 @@
  */
 
 static bool
-lower_instr(nir_intrinsic_instr *instr,
-const struct gl_shader_program *shader_program,
-nir_shader *shader, bool use_binding_as_idx)
+lower_var_instr(nir_intrinsic_instr *instr,
+const struct gl_shader_program *shader_program,
+nir_shader *shader, bool use_binding_as_idx)
 {
nir_intrinsic_op op;
switch (instr->intrinsic) {
@@ -175,6 +176,104 @@ lower_instr(nir_intrinsic_instr *instr,
return true;
 }
 
+static bool
+lower_deref_instr(nir_builder *b, nir_intrinsic_instr *instr,
+  const struct gl_shader_program *shader_program,
+  nir_shader *shader, bool use_binding_as_idx)
+{
+   nir_intrinsic_op op;
+   switch (instr->intrinsic) {
+   case nir_intrinsic_atomic_counter_read_deref:
+  op = nir_intrinsic_atomic_counter_read;
+  break;
+
+   case nir_intrinsic_atomic_counter_inc_deref:
+  op = nir_intrinsic_atomic_counter_inc;
+  break;
+
+   case nir_intrinsic_atomic_counter_dec_deref:
+  op = nir_intrinsic_atomic_counter_dec;
+  break;
+
+   case nir_intrinsic_atomic_counter_add_deref:
+  op = nir_intrinsic_atomic_counter_add;
+  break;
+
+   case nir_intrinsic_atomic_counter_min_deref:
+  op = nir_intrinsic_atomic_counter_min;
+  break;
+
+   case nir_intrinsic_atomic_counter_max_deref:
+  op = nir_intrinsic_atomic_counter_max;
+  break;
+
+   case nir_intrinsic_atomic_counter_and_deref:
+  op = nir_intrinsic_atomic_counter_and;
+  break;
+
+   case nir_intrinsic_atomic_counter_or_deref:
+  op = nir_intrinsic_atomic_counter_or;
+  break;
+
+   case nir_intrinsic_atomic_counter_xor_deref:
+  op = nir_intrinsic_atomic_counter_xor;
+  break;
+
+   case nir_intrinsic_atomic_counter_exchange_deref:
+  op = nir_intrinsic_atomic_counter_exchange;
+  break;
+
+   case nir_intrinsic_atomic_counter_comp_swap_deref:
+  op = nir_intrinsic_atomic_counter_comp_swap;
+  break;
+
+   default:
+  return false;
+   }
+
+   nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   if (var->data.mode != nir_var_uniform &&
+   var->data.mode != nir_var_shader_storage &&
+   var->data.mode != nir_var_shared)
+  return false; /* atomics passed as function arguments can't be lowered */
+
+   const unsigned uniform_loc = var->data.location;
+   const unsigned idx = use_binding_as_idx ?
+  instr->variables[0]->var->data.binding :
+  
shader_program->data->UniformStorage[uniform_loc].opaque[shader->info.stage].index;
+
+   b->cursor = nir_before_instr(>instr);
+
+   nir_ssa_def *offset = nir_imm_int(b, var->data.offset);
+   for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var;
+d = nir_deref_instr_parent(d)) {
+  assert(d->deref_type == nir_deref_type_array);
+  assert(d->arr.index.is_ssa);
+
+  unsigned array_stride = ATOMIC_COUNTER_SIZE;
+  if (glsl_type_is_array(d->type))
+ array_stride *= glsl_get_aoa_size(d->type);
+
+  offset = nir_iadd(b, offset, nir_imul(b, d->arr.index.ssa,
+nir_imm_int(b, array_stride)));
+   }
+
+   /* Since the first source is a deref and the first source in the lowered
+* instruction is the offset, we can just swap it out and change the
+* opcode.
+*/
+   instr->intrinsic = op;
+   nir_instr_rewrite_src(>instr, >src[0],
+ nir_src_for_ssa(offset));
+   nir_intrinsic_set_base(instr, idx);
+
+   nir_deref_instr_cleanup(deref);
+
+   return true;
+}
+
 bool
 nir_lower_atomics(nir_shader *shader,
   const struct gl_shader_program *shader_program,
@@ -188,14 +287,22 @@ nir_lower_atomics(nir_shader *shader,
 
   bool impl_progress = false;
 
+  nir_builder build;
+  nir_builder_init(, function->impl);
+
   nir_foreach_block(block, function->impl) {
  nir_foreach_instr_safe(instr, block) {
 if (instr->type != nir_instr_type_intrinsic)
continue;
 
-impl_progress |= lower_instr(nir_instr_as_intrinsic(instr),
- shader_program, shader,
- use_binding_as_idx);
+impl_progress |= lower_var_instr(nir_instr_as_intrinsic(instr),
+ shader_program, shader,
+

[Mesa-dev] [PATCH 52/61] spirv: Record the type of functions

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/spirv/vtn_cfg.c | 4 ++--
 src/compiler/spirv/vtn_private.h | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c
index 12d68d6..692e3b6 100644
--- a/src/compiler/spirv/vtn_cfg.c
+++ b/src/compiler/spirv/vtn_cfg.c
@@ -56,8 +56,8 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, 
SpvOp opcode,
   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function);
   val->func = b->func;
 
-  const struct vtn_type *func_type =
- vtn_value(b, w[4], vtn_value_type_type)->type;
+  b->func->type = vtn_value(b, w[4], vtn_value_type_type)->type;
+  const struct vtn_type *func_type = b->func->type;
 
   vtn_assert(func_type->return_type->type == result_type);
 
diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h
index e4f4e64..7f4cda8 100644
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -215,6 +215,8 @@ struct vtn_block {
 struct vtn_function {
struct exec_node node;
 
+   struct vtn_type *type;
+
bool referenced;
bool emitted;
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 59/61] anv/pipeline: Convert apply_pipeline_layout to deref instructions

2018-03-23 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 149 ---
 src/intel/vulkan/anv_pipeline.c  |   3 -
 2 files changed, 78 insertions(+), 74 deletions(-)

diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c 
b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index 87d9a91..774efe8 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -54,6 +54,24 @@ add_var_binding(struct apply_pipeline_layout_state *state, 
nir_variable *var)
 }
 
 static void
+add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src)
+{
+   nir_deref_instr *deref = nir_src_as_deref(src);
+   add_var_binding(state, nir_deref_instr_get_variable(deref));
+}
+
+static void
+add_tex_src_binding(struct apply_pipeline_layout_state *state,
+nir_tex_instr *tex, nir_tex_src_type deref_src_type)
+{
+   int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
+   if (deref_src_idx < 0)
+  return;
+
+   add_deref_src_binding(state, tex->src[deref_src_idx].src);
+}
+
+static void
 get_used_bindings_block(nir_block *block,
 struct apply_pipeline_layout_state *state)
 {
@@ -67,19 +85,19 @@ get_used_bindings_block(nir_block *block,
 nir_intrinsic_binding(intrin));
 break;
 
- case nir_intrinsic_image_var_load:
- case nir_intrinsic_image_var_store:
- case nir_intrinsic_image_var_atomic_add:
- case nir_intrinsic_image_var_atomic_min:
- case nir_intrinsic_image_var_atomic_max:
- case nir_intrinsic_image_var_atomic_and:
- case nir_intrinsic_image_var_atomic_or:
- case nir_intrinsic_image_var_atomic_xor:
- case nir_intrinsic_image_var_atomic_exchange:
- case nir_intrinsic_image_var_atomic_comp_swap:
- case nir_intrinsic_image_var_size:
- case nir_intrinsic_image_var_samples:
-add_var_binding(state, intrin->variables[0]->var);
+ case nir_intrinsic_image_deref_load:
+ case nir_intrinsic_image_deref_store:
+ case nir_intrinsic_image_deref_atomic_add:
+ case nir_intrinsic_image_deref_atomic_min:
+ case nir_intrinsic_image_deref_atomic_max:
+ case nir_intrinsic_image_deref_atomic_and:
+ case nir_intrinsic_image_deref_atomic_or:
+ case nir_intrinsic_image_deref_atomic_xor:
+ case nir_intrinsic_image_deref_atomic_exchange:
+ case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_deref_size:
+ case nir_intrinsic_image_deref_samples:
+add_deref_src_binding(state, intrin->src[0]);
 break;
 
  default:
@@ -89,10 +107,8 @@ get_used_bindings_block(nir_block *block,
   }
   case nir_instr_type_tex: {
  nir_tex_instr *tex = nir_instr_as_tex(instr);
- assert(tex->texture);
- add_var_binding(state, tex->texture->var);
- if (tex->sampler)
-add_var_binding(state, tex->sampler->var);
+ add_tex_src_binding(state, tex, nir_tex_src_texture_deref);
+ add_tex_src_binding(state, tex, nir_tex_src_sampler_deref);
  break;
   }
   default:
@@ -157,18 +173,42 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
 }
 
 static void
-lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref,
-unsigned *const_index, unsigned array_size,
-nir_tex_src_type src_type,
+lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type,
+unsigned *base_index,
 struct apply_pipeline_layout_state *state)
 {
-   nir_builder *b = >builder;
+   int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
+   if (deref_src_idx < 0)
+  return;
+
+   nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   unsigned set = var->data.descriptor_set;
+   unsigned binding = var->data.binding;
+   unsigned array_size =
+  state->layout->set[set].layout->binding[binding].array_size;
+
+   nir_tex_src_type offset_src_type;
+   if (deref_src_type == nir_tex_src_texture_deref) {
+  offset_src_type = nir_tex_src_texture_offset;
+  *base_index = state->set[set].surface_offsets[binding];
+   } else {
+  assert(deref_src_type == nir_tex_src_sampler_deref);
+  offset_src_type = nir_tex_src_sampler_offset;
+  *base_index = state->set[set].sampler_offsets[binding];
+   }
 
-   if (deref->deref.child) {
-  assert(deref->deref.child->deref_type == nir_deref_type_array);
-  nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
+   nir_ssa_def *index = NULL;
+   if (deref->deref_type != nir_deref_type_var) {
+  assert(deref->deref_type == nir_deref_type_array);
+
+  nir_const_value *const_index = nir_src_as_const_value(deref->arr.index);
+  if 

[Mesa-dev] [PATCH 45/61] nir/spirv: Pass nir_variable_data into apply_var_decoration

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/spirv/vtn_variables.c | 52 +++---
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 26b2adf..03c898e 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1342,70 +1342,70 @@ vtn_get_builtin_location(struct vtn_builder *b,
 }
 
 static void
-apply_var_decoration(struct vtn_builder *b, nir_variable *nir_var,
+apply_var_decoration(struct vtn_builder *b,
+ struct nir_variable_data *var_data,
  const struct vtn_decoration *dec)
 {
switch (dec->decoration) {
case SpvDecorationRelaxedPrecision:
   break; /* FIXME: Do nothing with this for now. */
case SpvDecorationNoPerspective:
-  nir_var->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
+  var_data->interpolation = INTERP_MODE_NOPERSPECTIVE;
   break;
case SpvDecorationFlat:
-  nir_var->data.interpolation = INTERP_MODE_FLAT;
+  var_data->interpolation = INTERP_MODE_FLAT;
   break;
case SpvDecorationCentroid:
-  nir_var->data.centroid = true;
+  var_data->centroid = true;
   break;
case SpvDecorationSample:
-  nir_var->data.sample = true;
+  var_data->sample = true;
   break;
case SpvDecorationInvariant:
-  nir_var->data.invariant = true;
+  var_data->invariant = true;
   break;
case SpvDecorationConstant:
-  vtn_assert(nir_var->constant_initializer != NULL);
-  nir_var->data.read_only = true;
+  var_data->read_only = true;
   break;
case SpvDecorationNonReadable:
-  nir_var->data.image.write_only = true;
+  var_data->image.write_only = true;
   break;
case SpvDecorationNonWritable:
-  nir_var->data.read_only = true;
-  nir_var->data.image.read_only = true;
+  var_data->read_only = true;
+  var_data->image.read_only = true;
   break;
case SpvDecorationRestrict:
-  nir_var->data.image.restrict_flag = true;
+  var_data->image.restrict_flag = true;
   break;
case SpvDecorationVolatile:
-  nir_var->data.image._volatile = true;
+  var_data->image._volatile = true;
   break;
case SpvDecorationCoherent:
-  nir_var->data.image.coherent = true;
+  var_data->image.coherent = true;
   break;
case SpvDecorationComponent:
-  nir_var->data.location_frac = dec->literals[0];
+  var_data->location_frac = dec->literals[0];
   break;
case SpvDecorationIndex:
-  nir_var->data.index = dec->literals[0];
+  var_data->index = dec->literals[0];
   break;
case SpvDecorationBuiltIn: {
   SpvBuiltIn builtin = dec->literals[0];
 
-  nir_variable_mode mode = nir_var->data.mode;
-  vtn_get_builtin_location(b, builtin, _var->data.location, );
-  nir_var->data.mode = mode;
+  nir_variable_mode mode = var_data->mode;
+  vtn_get_builtin_location(b, builtin, _data->location, );
+  var_data->mode = mode;
 
   switch (builtin) {
   case SpvBuiltInTessLevelOuter:
   case SpvBuiltInTessLevelInner:
- nir_var->data.compact = true;
+ var_data->compact = true;
  break;
   case SpvBuiltInSamplePosition:
- nir_var->data.origin_upper_left = b->origin_upper_left;
+ var_data->origin_upper_left = b->origin_upper_left;
  /* fallthrough */
   case SpvBuiltInFragCoord:
- nir_var->data.pixel_center_integer = b->pixel_center_integer;
+ var_data->pixel_center_integer = b->pixel_center_integer;
  break;
   default:
  break;
@@ -1424,7 +1424,7 @@ apply_var_decoration(struct vtn_builder *b, nir_variable 
*nir_var,
   break; /* Do nothing with these here */
 
case SpvDecorationPatch:
-  nir_var->data.patch = true;
+  var_data->patch = true;
   break;
 
case SpvDecorationLocation:
@@ -1550,17 +1550,17 @@ var_decoration_cb(struct vtn_builder *b, struct 
vtn_value *val, int member,
} else {
   if (vtn_var->var) {
  assert(member == -1);
- apply_var_decoration(b, vtn_var->var, dec);
+ apply_var_decoration(b, _var->var->data, dec);
   } else if (vtn_var->members) {
  if (member >= 0) {
 /* Member decorations must come from a type */
 assert(val->value_type == vtn_value_type_type);
-apply_var_decoration(b, vtn_var->members[member], dec);
+apply_var_decoration(b, _var->members[member]->data, dec);
  } else {
 unsigned length =
glsl_get_length(glsl_without_array(vtn_var->type->type));
 for (unsigned i = 0; i < length; i++)
-   apply_var_decoration(b, vtn_var->members[i], dec);
+   apply_var_decoration(b, _var->members[i]->data, dec);
  }
   } else {
  /* A few variables, those with external storage, have no actual
-- 
2.5.0.400.gff86faf


[Mesa-dev] [PATCH 38/61] intel/nir: Fixup deref modes after lowering patch vertices

2018-03-23 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp 
b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index 69da83a..0fd1492 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -268,4 +268,6 @@ brw_nir_lower_patch_vertices_in_to_uniform(nir_shader *nir)
   exec_node_remove(>node);
   exec_list_push_tail(>uniforms, >node);
}
+
+   nir_fixup_deref_modes(nir);
 }
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 50/61] spirv: Allow pointers to have a deref at the base

2018-03-23 Thread Jason Ekstrand
Previously, pointers fell into two categories: index/offset for UBOs,
SSBOs, etc. and var + access chain for logical pointers.  This commit
adds another logical pointer mode that's deref + access chain.

It's tempting to think that we can just replace variable-based pointers
with deref-based or at least replace the access chain with a deref
chain.  Unfortunately, there are a few sticky bits that prevent this:

 1) We can't return deref-based pointers from OpVariable because those
opcodes may come outside of a function so there's no place to emit
the deref instructions.

 2) We can't always use variable-based pointers because we may not
always know the variable.  (We do now, but he upcoming function
rework will take that option away.)

 3) We also can't replace the access chain struct with a deref.  Due to
the re-ordering we do in order to handle loop continues, the derefs
we would emit as part of OpAccessChain may not dominate their uses.
We normally fix this up with nir_repair_ssa but that generates phi
nodes which we don't want in the middle of our deref chains.

All in all, we have no real better option than to support partial access
chains while also re-emitting the deref instructions on the spot.
---
 src/compiler/spirv/vtn_private.h   |  9 +-
 src/compiler/spirv/vtn_variables.c | 57 ++
 2 files changed, 23 insertions(+), 43 deletions(-)

diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h
index 1eaa78b..e4f4e64 100644
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -428,10 +428,17 @@ struct vtn_pointer {
/** The referenced variable, if known
 *
 * This field may be NULL if the pointer uses a (block_index, offset) pair
-* instead of an access chain.
+* instead of an access chain or if the access chain starts at a deref.
 */
struct vtn_variable *var;
 
+   /** The deref at the base of the chain
+*
+* This field may be NULL if the pointer uses a (block_index, offset) pair
+* instead of an access chain or if the access chain starts at a variable.
+*/
+   nir_deref_instr *deref;
+
/** An access chain describing how to get from var to the referenced data
 *
 * This field may be NULL if the pointer references the entire variable or
diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 11ba043..6efd43c 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -112,6 +112,7 @@ vtn_access_chain_pointer_dereference(struct vtn_builder *b,
ptr->mode = base->mode;
ptr->type = type;
ptr->var = base->var;
+   ptr->deref = base->deref;
ptr->chain = chain;
 
return ptr;
@@ -377,46 +378,30 @@ nir_deref_instr *
 vtn_pointer_to_deref(struct vtn_builder *b, struct vtn_pointer *ptr)
 {
/* Do on-the-fly copy propagation for samplers. */
-   if (ptr->var->copy_prop_sampler)
+   if (ptr->var && ptr->var->copy_prop_sampler)
   return vtn_pointer_to_deref(b, ptr->var->copy_prop_sampler);
 
-   nir_deref_instr *deref_var =
-  nir_deref_instr_create(b->nb.shader, nir_deref_type_var);
-   nir_ssa_dest_init(_var->instr, _var->dest, 1, 32, NULL);
-   nir_builder_instr_insert(>nb, _var->instr);
+   nir_deref_instr *tail;
+   if (ptr->deref) {
+  tail = ptr->deref;
+   } else {
+  assert(ptr->var && ptr->var->var);
+  tail = nir_build_deref_var(>nb, ptr->var->var);
+   }
 
-   assert(ptr->var->var);
-   deref_var->mode = ptr->var->var->data.mode;
-   deref_var->type = ptr->var->var->type;
-   deref_var->var = ptr->var->var;
/* Raw variable access */
if (!ptr->chain)
-  return deref_var;
+  return tail;
 
struct vtn_access_chain *chain = ptr->chain;
vtn_assert(chain);
 
-   struct vtn_type *deref_type = ptr->var->type;
-   nir_deref_instr *tail = deref_var;
-
for (unsigned i = 0; i < chain->length; i++) {
-  enum glsl_base_type base_type = glsl_get_base_type(deref_type->type);
-  switch (base_type) {
-  case GLSL_TYPE_UINT:
-  case GLSL_TYPE_INT:
-  case GLSL_TYPE_UINT16:
-  case GLSL_TYPE_INT16:
-  case GLSL_TYPE_UINT8:
-  case GLSL_TYPE_INT8:
-  case GLSL_TYPE_UINT64:
-  case GLSL_TYPE_INT64:
-  case GLSL_TYPE_FLOAT:
-  case GLSL_TYPE_FLOAT16:
-  case GLSL_TYPE_DOUBLE:
-  case GLSL_TYPE_BOOL:
-  case GLSL_TYPE_ARRAY: {
- deref_type = deref_type->array_element;
-
+  if (glsl_type_is_struct(tail->type)) {
+ vtn_assert(chain->link[i].mode == vtn_access_mode_literal);
+ unsigned idx = chain->link[i].id;
+ tail = nir_build_deref_struct(>nb, tail, idx);
+  } else {
  nir_ssa_def *index;
  if (chain->link[i].mode == vtn_access_mode_literal) {
 index = nir_imm_int(>nb, chain->link[i].id);
@@ -425,18 +410,6 @@ vtn_pointer_to_deref(struct vtn_builder *b, struct 
vtn_pointer *ptr)
  

[Mesa-dev] [PATCH 37/61] intel, ir3: Disable nir_opt_copy_prop_vars

2018-03-23 Thread Jason Ekstrand
This pass doesn't handle deref instructions yet.  Making it handle both
legacy derefs and deref instructions would be painful.  Since it's not
important for correctness, just disable it for now.
---
 src/gallium/drivers/freedreno/ir3/ir3_nir.c | 2 +-
 src/intel/compiler/brw_nir.c| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c 
b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
index cd1f9c5..da434bf 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
@@ -91,7 +91,7 @@ ir3_optimize_loop(nir_shader *s)
progress = false;
 
OPT_V(s, nir_lower_vars_to_ssa);
-   progress |= OPT(s, nir_opt_copy_prop_vars);
+   /* progress |= OPT(s, nir_opt_copy_prop_vars); */
progress |= OPT(s, nir_lower_alu_to_scalar);
progress |= OPT(s, nir_lower_phis_to_scalar);
 
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 69ab162..cf994ac 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -538,7 +538,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler 
*compiler,
do {
   progress = false;
   OPT(nir_lower_vars_to_ssa);
-  OPT(nir_opt_copy_prop_vars);
+  /* OPT(nir_opt_copy_prop_vars); */
 
   if (is_scalar) {
  OPT(nir_lower_alu_to_scalar);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 28/61] nir: Support deref instructions in lower_system_values

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_lower_system_values.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/compiler/nir/nir_lower_system_values.c 
b/src/compiler/nir/nir_lower_system_values.c
index fb560ee..104df51 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -39,10 +39,15 @@ convert_block(nir_block *block, nir_builder *b)
 
   nir_intrinsic_instr *load_var = nir_instr_as_intrinsic(instr);
 
-  if (load_var->intrinsic != nir_intrinsic_load_var)
- continue;
+  nir_variable *var;
+  if (load_var->intrinsic == nir_intrinsic_load_var) {
+ var = load_var->variables[0]->var;
+  } else if (load_var->intrinsic == nir_intrinsic_load_deref) {
+ var = 
nir_deref_instr_get_variable(nir_src_as_deref(load_var->src[0]));
+  } else {
+ continue; /* Not a load instruction */
+  }
 
-  nir_variable *var = load_var->variables[0]->var;
   if (var->data.mode != nir_var_system_value)
  continue;
 
@@ -150,6 +155,8 @@ convert_block(nir_block *block, nir_builder *b)
 
   nir_ssa_def_rewrite_uses(_var->dest.ssa, nir_src_for_ssa(sysval));
   nir_instr_remove(_var->instr);
+  if (load_var->intrinsic == nir_intrinsic_load_deref)
+ nir_deref_instr_cleanup(nir_src_as_deref(load_var->src[0]));
 
   progress = true;
}
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 32/61] nir: Support deref instructions in lower_io

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_lower_io.c | 70 +
 1 file changed, 50 insertions(+), 20 deletions(-)

diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index df91feb..549583d 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -35,6 +35,7 @@
 #include "nir_builder.h"
 
 struct lower_io_state {
+   void *dead_ctx;
nir_builder builder;
int (*type_size)(const struct glsl_type *type);
nir_variable_mode modes;
@@ -156,11 +157,10 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,
 
 static nir_intrinsic_instr *
 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
-   nir_ssa_def *vertex_index, nir_ssa_def *offset,
+   nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
unsigned component)
 {
const nir_shader *nir = state->builder.shader;
-   nir_variable *var = intrin->variables[0]->var;
nir_variable_mode mode = var->data.mode;
nir_ssa_def *barycentric = NULL;
 
@@ -229,10 +229,9 @@ lower_load(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
 
 static nir_intrinsic_instr *
 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
-nir_ssa_def *vertex_index, nir_ssa_def *offset,
+nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
 unsigned component)
 {
-   nir_variable *var = intrin->variables[0]->var;
nir_variable_mode mode = var->data.mode;
 
nir_intrinsic_op op;
@@ -248,7 +247,10 @@ lower_store(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
   nir_intrinsic_instr_create(state->builder.shader, op);
store->num_components = intrin->num_components;
 
-   nir_src_copy(>src[0], >src[0], store);
+   if (intrin->intrinsic == nir_intrinsic_store_var)
+  nir_src_copy(>src[0], >src[0], store);
+   else
+  nir_src_copy(>src[0], >src[1], store);
 
nir_intrinsic_set_base(store, var->data.driver_location);
 
@@ -267,10 +269,8 @@ lower_store(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
 
 static nir_intrinsic_instr *
 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
- nir_ssa_def *offset)
+ nir_variable *var, nir_ssa_def *offset)
 {
-   nir_variable *var = intrin->variables[0]->var;
-
assert(var->data.mode == nir_var_shared);
 
nir_intrinsic_op op;
@@ -306,27 +306,28 @@ lower_atomic(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
 
 static nir_intrinsic_instr *
 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
- nir_ssa_def *offset, unsigned component)
+ nir_variable *var, nir_ssa_def *offset, unsigned 
component)
 {
-   nir_variable *var = intrin->variables[0]->var;
-
assert(var->data.mode == nir_var_shader_in);
 
/* Ignore interpolateAt() for flat variables - flat is flat. */
if (var->data.interpolation == INTERP_MODE_FLAT)
-  return lower_load(intrin, state, NULL, offset, component);
+  return lower_load(intrin, state, NULL, var, offset, component);
 
nir_intrinsic_op bary_op;
switch (intrin->intrinsic) {
case nir_intrinsic_interp_var_at_centroid:
+   case nir_intrinsic_interp_deref_at_centroid:
   bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
 nir_intrinsic_load_barycentric_sample :
 nir_intrinsic_load_barycentric_centroid;
   break;
case nir_intrinsic_interp_var_at_sample:
+   case nir_intrinsic_interp_deref_at_sample:
   bary_op = nir_intrinsic_load_barycentric_at_sample;
   break;
case nir_intrinsic_interp_var_at_offset:
+   case nir_intrinsic_interp_deref_at_offset:
   bary_op = nir_intrinsic_load_barycentric_at_offset;
   break;
default:
@@ -339,9 +340,14 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
nir_ssa_dest_init(_setup->instr, _setup->dest, 2, 32, NULL);
nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
 
-   if (intrin->intrinsic != nir_intrinsic_interp_var_at_centroid)
+   if (intrin->intrinsic == nir_intrinsic_interp_var_at_sample ||
+   intrin->intrinsic == nir_intrinsic_interp_var_at_offset)
   nir_src_copy(_setup->src[0], >src[0], bary_setup);
 
+   if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
+   intrin->intrinsic == nir_intrinsic_interp_deref_at_offset)
+  nir_src_copy(_setup->src[0], >src[1], bary_setup);
+
nir_builder_instr_insert(>builder, _setup->instr);
 
nir_intrinsic_instr *load =
@@ -374,7 +380,9 @@ nir_lower_io_block(nir_block *block,
 
   switch (intrin->intrinsic) {
   case nir_intrinsic_load_var:
+  case nir_intrinsic_load_deref:
   case nir_intrinsic_store_var:
+  case nir_intrinsic_store_deref:
   case nir_intrinsic_var_atomic_add:
   case nir_intrinsic_var_atomic_imin:
   case 

[Mesa-dev] [PATCH 27/61] nir/deref: Add a deref cleanup function

2018-03-23 Thread Jason Ekstrand
Sometimes it's useful for a pass to be able to clean up its own derefs
instead of waiting for DCE.  This little helper makes it very easy.
---
 src/compiler/nir/nir.h   |  2 ++
 src/compiler/nir/nir_deref.c | 13 +
 2 files changed, 15 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index e0c46e7..018592a 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1019,6 +1019,8 @@ nir_deref_instr_get_variable(nir_deref_instr *instr)
return instr->var;
 }
 
+void nir_deref_instr_cleanup(nir_deref_instr *instr);
+
 nir_deref_var *
 nir_deref_instr_to_deref(nir_deref_instr *instr, void *mem_ctx);
 
diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c
index 3546013..5add79c 100644
--- a/src/compiler/nir/nir_deref.c
+++ b/src/compiler/nir/nir_deref.c
@@ -73,6 +73,19 @@ nir_deref_path_finish(struct nir_deref_path *path)
   ralloc_free(path->path);
 }
 
+void
+nir_deref_instr_cleanup(nir_deref_instr *instr)
+{
+   for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) {
+  /* If anyone is using this deref, leave it alone */
+  assert(d->dest.is_ssa);
+  if (!list_empty(>dest.ssa.uses))
+ return;
+
+  nir_instr_remove(>instr);
+   }
+}
+
 nir_deref_var *
 nir_deref_instr_to_deref(nir_deref_instr *instr, void *mem_ctx)
 {
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 42/61] nir: Add a concept of per-member structs and a lowering pass

2018-03-23 Thread Jason Ekstrand
This adds a concept of "members" to a variable with an interface type.
It allows you to specify the full variable data for each member of the
interface instead of once for the variable.  We also add a lowering pass
to lower those variables to a sequence of variables and rewrite all the
derefs accordingly.
---
 src/compiler/Makefile.sources   |   1 +
 src/compiler/nir/meson.build|   1 +
 src/compiler/nir/nir.h  |  12 +
 src/compiler/nir/nir_clone.c|   8 +
 src/compiler/nir/nir_serialize.c|  12 +
 src/compiler/nir/nir_split_per_member_structs.c | 289 
 src/compiler/nir/nir_validate.c |   7 +
 7 files changed, 330 insertions(+)
 create mode 100644 src/compiler/nir/nir_split_per_member_structs.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 4a21eb7..e03c24b 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -284,6 +284,7 @@ NIR_FILES = \
nir/nir_search_helpers.h \
nir/nir_serialize.c \
nir/nir_serialize.h \
+   nir/nir_split_per_member_structs.c \
nir/nir_split_var_copies.c \
nir/nir_sweep.c \
nir/nir_to_lcssa.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index b84b39c..5094e41 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -178,6 +178,7 @@ files_libnir = files(
   'nir_search_helpers.h',
   'nir_serialize.c',
   'nir_serialize.h',
+  'nir_split_per_member_structs.c',
   'nir_split_var_copies.c',
   'nir_sweep.c',
   'nir_to_lcssa.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 018592a..04073fb 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -362,6 +362,17 @@ typedef struct nir_variable {
 * \sa ir_variable::location
 */
const struct glsl_type *interface_type;
+
+   /**
+* Description of per-member data for per-member struct variables
+*
+* This is used for variables which are actually an amalgamation of
+* multiple entities such as a struct of built-in values or a struct of
+* inputs each with their own layout specifier.  This is only allowed on
+* variables with a struct or array of array of struct type.
+*/
+   unsigned num_members;
+   struct nir_variable_data *members;
 } nir_variable;
 
 #define nir_foreach_variable(var, var_list) \
@@ -2589,6 +2600,7 @@ void nir_dump_cfg(nir_shader *shader, FILE *fp);
 int nir_gs_count_vertices(const nir_shader *shader);
 
 bool nir_split_var_copies(nir_shader *shader);
+bool nir_split_per_member_structs(nir_shader *shader);
 
 bool nir_lower_returns_impl(nir_function_impl *impl);
 bool nir_lower_returns(nir_shader *shader);
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index 20eaaff..b4c6965 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -161,6 +161,14 @@ nir_variable_clone(const nir_variable *var, nir_shader 
*shader)
}
nvar->interface_type = var->interface_type;
 
+   nvar->num_members = var->num_members;
+   if (var->num_members) {
+  nvar->members = ralloc_array(nvar, struct nir_variable_data,
+   var->num_members);
+  memcpy(nvar->members, var->members,
+ var->num_members * sizeof(*var->members));
+   }
+
return nvar;
 }
 
diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c
index 834a65b..e2147b6 100644
--- a/src/compiler/nir/nir_serialize.c
+++ b/src/compiler/nir/nir_serialize.c
@@ -149,6 +149,11 @@ write_variable(write_ctx *ctx, const nir_variable *var)
blob_write_uint32(ctx->blob, !!(var->interface_type));
if (var->interface_type)
   encode_type_to_blob(ctx->blob, var->interface_type);
+   blob_write_uint32(ctx->blob, var->num_members);
+   if (var->num_members > 0) {
+  blob_write_bytes(ctx->blob, (uint8_t *) var->members,
+   var->num_members * sizeof(*var->members));
+   }
 }
 
 static nir_variable *
@@ -180,6 +185,13 @@ read_variable(read_ctx *ctx)
   var->interface_type = decode_type_from_blob(ctx->blob);
else
   var->interface_type = NULL;
+   var->num_members = blob_read_uint32(ctx->blob);
+   if (var->num_members > 0) {
+  var->members = ralloc_array(var, struct nir_variable_data,
+  var->num_members);
+  blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
+  var->num_members * sizeof(*var->members));
+   }
 
return var;
 }
diff --git a/src/compiler/nir/nir_split_per_member_structs.c 
b/src/compiler/nir/nir_split_per_member_structs.c
new file mode 100644
index 000..98aaaca
--- /dev/null
+++ b/src/compiler/nir/nir_split_per_member_structs.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of 

[Mesa-dev] [PATCH 17/61] nir/lower_atomics: Rework the main walker loop a bit

2018-03-23 Thread Jason Ekstrand
This replaces some "if (...} { }" with "if (...) continue;" to reduce
nesting depth and makes nir_metadata_preserve conditional on progress
for the given impl.
---
 src/compiler/nir/nir_lower_atomics.c | 24 
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/compiler/nir/nir_lower_atomics.c 
b/src/compiler/nir/nir_lower_atomics.c
index 6b046bc..ee66aa3 100644
--- a/src/compiler/nir/nir_lower_atomics.c
+++ b/src/compiler/nir/nir_lower_atomics.c
@@ -183,18 +183,26 @@ nir_lower_atomics(nir_shader *shader,
bool progress = false;
 
nir_foreach_function(function, shader) {
-  if (function->impl) {
- nir_foreach_block(block, function->impl) {
-nir_foreach_instr_safe(instr, block) {
-   if (instr->type == nir_instr_type_intrinsic)
-  progress |= lower_instr(nir_instr_as_intrinsic(instr),
-  shader_program, shader,
-  use_binding_as_idx);
-}
+  if (!function->impl)
+ continue;
+
+  bool impl_progress = false;
+
+  nir_foreach_block(block, function->impl) {
+ nir_foreach_instr_safe(instr, block) {
+if (instr->type != nir_instr_type_intrinsic)
+   continue;
+
+impl_progress |= lower_instr(nir_instr_as_intrinsic(instr),
+ shader_program, shader,
+ use_binding_as_idx);
  }
+  }
 
+  if (impl_progress) {
  nir_metadata_preserve(function->impl, nir_metadata_block_index |
nir_metadata_dominance);
+ progress = true;
   }
}
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 35/61] nir: Support deref instructions in lower_pos_center

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_lower_wpos_center.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_lower_wpos_center.c 
b/src/compiler/nir/nir_lower_wpos_center.c
index dca810d..5b68cac 100644
--- a/src/compiler/nir/nir_lower_wpos_center.c
+++ b/src/compiler/nir/nir_lower_wpos_center.c
@@ -81,7 +81,18 @@ lower_wpos_center_block(nir_builder *b, nir_block *block,
nir_foreach_instr(instr, block) {
   if (instr->type == nir_instr_type_intrinsic) {
  nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
- if (intr->intrinsic == nir_intrinsic_load_var) {
+ if (intr->intrinsic == nir_intrinsic_load_deref) {
+nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+nir_variable *var = nir_deref_instr_get_variable(deref);
+
+if (var->data.mode == nir_var_shader_in &&
+var->data.location == VARYING_SLOT_POS) {
+   /* gl_FragCoord should not have array/struct derefs: */
+   assert(deref->deref_type == nir_deref_type_var);
+   update_fragcoord(b, intr, for_sample_shading);
+   progress = true;
+}
+ } else if (intr->intrinsic == nir_intrinsic_load_var) {
 nir_deref_var *dvar = intr->variables[0];
 nir_variable *var = dvar->var;
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 23/61] nir: Support deref instructions in lower_var_copies

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir.h  |  3 ++
 src/compiler/nir/nir_builder.h  | 48 ++
 src/compiler/nir/nir_lower_var_copies.c | 90 +++--
 3 files changed, 138 insertions(+), 3 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 4b86339..e0c46e7 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -81,6 +81,7 @@ name(const in_type *parent) \
 struct nir_function;
 struct nir_shader;
 struct nir_instr;
+struct nir_builder;
 
 
 /**
@@ -2607,6 +2608,8 @@ bool nir_lower_deref_instrs(nir_shader *shader,
 enum nir_lower_deref_flags flags);
 
 void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader);
+void nir_lower_deref_copy_instr(struct nir_builder *b,
+nir_intrinsic_instr *copy);
 bool nir_lower_var_copies(nir_shader *shader);
 
 void nir_fixup_deref_modes(nir_shader *shader);
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 634a55d..77ac37c 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -660,6 +660,54 @@ nir_build_deref_for_chain(nir_builder *b, nir_deref_var 
*deref_var)
return tail;
 }
 
+/** Returns a deref that follows another but starting from the given parent
+ *
+ * The new deref will be the same type and take the same array or struct index
+ * as the leader deref but it may have a different parent.  This is very
+ * useful for walking deref paths.
+ */
+static inline nir_deref_instr *
+nir_build_deref_follower(nir_builder *b, nir_deref_instr *parent,
+ nir_deref_instr *leader)
+{
+   /* If the derefs would have the same parent, don't make a new one */
+   assert(leader->parent.is_ssa);
+   if (leader->parent.ssa == >dest.ssa)
+  return leader;
+
+   UNUSED nir_deref_instr *leader_parent = nir_src_as_deref(leader->parent);
+
+   switch (leader->deref_type) {
+   case nir_deref_type_var:
+  unreachable("A var dereference cannot have a parent");
+  break;
+
+   case nir_deref_type_array:
+   case nir_deref_type_array_wildcard:
+  assert(glsl_type_is_matrix(parent->type) ||
+ glsl_type_is_array(parent->type));
+  assert(glsl_get_length(parent->type) ==
+ glsl_get_length(leader_parent->type));
+
+  if (leader->deref_type == nir_deref_type_array) {
+ assert(leader->arr.index.is_ssa);
+ return nir_build_deref_array(b, parent, leader->arr.index.ssa);
+  } else {
+ return nir_build_deref_array_wildcard(b, parent);
+  }
+
+   case nir_deref_type_struct:
+  assert(glsl_type_is_struct(parent->type));
+  assert(glsl_get_length(parent->type) ==
+ glsl_get_length(leader_parent->type));
+
+  return nir_build_deref_struct(b, parent, leader->strct.index);
+
+   default:
+  unreachable("Invalid deref instruction type");
+   }
+}
+
 static inline nir_ssa_def *
 nir_load_deref(nir_builder *build, nir_deref_instr *deref)
 {
diff --git a/src/compiler/nir/nir_lower_var_copies.c 
b/src/compiler/nir/nir_lower_var_copies.c
index 6288bdc..efe5c67 100644
--- a/src/compiler/nir/nir_lower_var_copies.c
+++ b/src/compiler/nir/nir_lower_var_copies.c
@@ -26,6 +26,8 @@
  */
 
 #include "nir.h"
+#include "nir_builder.h"
+#include "nir_deref.h"
 #include "compiler/nir_types.h"
 
 /*
@@ -154,23 +156,105 @@ nir_lower_var_copy_instr(nir_intrinsic_instr *copy, 
nir_shader *shader)
 >variables[1]->deref, shader);
 }
 
+static nir_deref_instr *
+build_deref_to_next_wildcard(nir_builder *b,
+ nir_deref_instr *parent,
+ nir_deref_instr ***deref_arr)
+{
+   for (; **deref_arr; (*deref_arr)++) {
+  if ((**deref_arr)->deref_type == nir_deref_type_array_wildcard)
+ return parent;
+
+  parent = nir_build_deref_follower(b, parent, **deref_arr);
+   }
+
+   assert(**deref_arr == NULL);
+   *deref_arr = NULL;
+   return parent;
+}
+
+static void
+emit_deref_copy_load_store(nir_builder *b,
+   nir_deref_instr *dst_deref,
+   nir_deref_instr **dst_deref_arr,
+   nir_deref_instr *src_deref,
+   nir_deref_instr **src_deref_arr)
+{
+   if (dst_deref_arr || src_deref_arr) {
+  assert(dst_deref_arr && src_deref_arr);
+  dst_deref = build_deref_to_next_wildcard(b, dst_deref, _deref_arr);
+  src_deref = build_deref_to_next_wildcard(b, src_deref, _deref_arr);
+   }
+
+   if (dst_deref_arr || src_deref_arr) {
+  assert(dst_deref_arr && src_deref_arr);
+  assert((*dst_deref_arr)->deref_type == nir_deref_type_array_wildcard);
+  assert((*src_deref_arr)->deref_type == nir_deref_type_array_wildcard);
+
+  unsigned length = glsl_get_length(src_deref->type);
+  /* The wildcards should represent the same number of elements */
+  

[Mesa-dev] [PATCH 25/61] nir: Support deref instructions in lower_vars_to_ssa

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_lower_vars_to_ssa.c | 75 
 1 file changed, 58 insertions(+), 17 deletions(-)

diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c 
b/src/compiler/nir/nir_lower_vars_to_ssa.c
index 0cc6514..403ce26 100644
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -357,11 +357,37 @@ deref_may_be_aliased(nir_deref_var *deref,
 >deref, state);
 }
 
+static struct deref_node *
+get_deref_node_for_instr(nir_intrinsic_instr *instr, unsigned idx,
+ struct lower_variables_state *state)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_var:
+   case nir_intrinsic_store_var:
+   case nir_intrinsic_copy_var:
+  return get_deref_node(instr->variables[idx], state);
+
+   case nir_intrinsic_load_deref:
+   case nir_intrinsic_store_deref:
+   case nir_intrinsic_copy_deref: {
+  assert(instr->src[idx].is_ssa);
+  nir_deref_instr *deref_instr =
+ nir_instr_as_deref(instr->src[idx].ssa->parent_instr);
+  nir_deref_var *deref_var =
+ nir_deref_instr_to_deref(deref_instr, state->dead_ctx);
+  return get_deref_node(deref_var, state);
+   }
+
+   default:
+  unreachable("Unhanded instruction type");
+   }
+}
+
 static void
 register_load_instr(nir_intrinsic_instr *load_instr,
 struct lower_variables_state *state)
 {
-   struct deref_node *node = get_deref_node(load_instr->variables[0], state);
+   struct deref_node *node = get_deref_node_for_instr(load_instr, 0, state);
if (node == NULL)
   return;
 
@@ -376,7 +402,7 @@ static void
 register_store_instr(nir_intrinsic_instr *store_instr,
  struct lower_variables_state *state)
 {
-   struct deref_node *node = get_deref_node(store_instr->variables[0], state);
+   struct deref_node *node = get_deref_node_for_instr(store_instr, 0, state);
if (node == NULL)
   return;
 
@@ -393,8 +419,7 @@ register_copy_instr(nir_intrinsic_instr *copy_instr,
 {
for (unsigned idx = 0; idx < 2; idx++) {
   struct deref_node *node =
- get_deref_node(copy_instr->variables[idx], state);
-
+ get_deref_node_for_instr(copy_instr, idx, state);
   if (node == NULL)
  continue;
 
@@ -419,14 +444,17 @@ register_variable_uses_block(nir_block *block,
 
   switch (intrin->intrinsic) {
   case nir_intrinsic_load_var:
+  case nir_intrinsic_load_deref:
  register_load_instr(intrin, state);
  break;
 
   case nir_intrinsic_store_var:
+  case nir_intrinsic_store_deref:
  register_store_instr(intrin, state);
  break;
 
   case nir_intrinsic_copy_var:
+  case nir_intrinsic_copy_deref:
  register_copy_instr(intrin, state);
  break;
 
@@ -448,15 +476,20 @@ lower_copies_to_load_store(struct deref_node *node,
if (!node->copies)
   return true;
 
+   nir_builder b;
+   nir_builder_init(, state->impl);
+
struct set_entry *copy_entry;
set_foreach(node->copies, copy_entry) {
   nir_intrinsic_instr *copy = (void *)copy_entry->key;
 
-  nir_lower_var_copy_instr(copy, state->shader);
+  if (copy->intrinsic == nir_intrinsic_copy_var)
+ nir_lower_var_copy_instr(copy, state->shader);
+  else
+ nir_lower_deref_copy_instr(, copy);
 
   for (unsigned i = 0; i < 2; ++i) {
- struct deref_node *arg_node =
-get_deref_node(copy->variables[i], state);
+ struct deref_node *arg_node = get_deref_node_for_instr(copy, i, 
state);
 
  /* Only bother removing copy entries for other nodes */
  if (arg_node == NULL || arg_node == node)
@@ -496,10 +529,10 @@ rename_variables(struct lower_variables_state *state)
  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 
  switch (intrin->intrinsic) {
- case nir_intrinsic_load_var: {
+ case nir_intrinsic_load_var:
+ case nir_intrinsic_load_deref: {
 struct deref_node *node =
-   get_deref_node(intrin->variables[0], state);
-
+   get_deref_node_for_instr(intrin, 0, state);
 if (node == NULL) {
/* If we hit this path then we are referencing an invalid
 * value.  Most likely, we unrolled something and are
@@ -544,9 +577,19 @@ rename_variables(struct lower_variables_state *state)
 break;
  }
 
- case nir_intrinsic_store_var: {
+ case nir_intrinsic_store_var:
+ case nir_intrinsic_store_deref: {
 struct deref_node *node =
-   get_deref_node(intrin->variables[0], state);
+   get_deref_node_for_instr(intrin, 0, state);
+
+nir_ssa_def *value;
+if (intrin->intrinsic == nir_intrinsic_store_var) {
+   assert(intrin->src[0].is_ssa);
+   value = intrin->src[0].ssa;
+} else {
+   

[Mesa-dev] [PATCH 19/61] nir: Add a pass for fixing deref modes

2018-03-23 Thread Jason Ekstrand
This will be needed by anything which changes variable modes without
rewriting derefs.
---
 src/compiler/nir/nir.h   |  2 ++
 src/compiler/nir/nir_deref.c | 30 ++
 2 files changed, 32 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 9c1716b..4b86339 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2609,6 +2609,8 @@ bool nir_lower_deref_instrs(nir_shader *shader,
 void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader);
 bool nir_lower_var_copies(nir_shader *shader);
 
+void nir_fixup_deref_modes(nir_shader *shader);
+
 bool nir_lower_global_vars_to_local(nir_shader *shader);
 
 bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes);
diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c
index 87a8192..af5d75f 100644
--- a/src/compiler/nir/nir_deref.c
+++ b/src/compiler/nir/nir_deref.c
@@ -299,3 +299,33 @@ nir_lower_deref_instrs(nir_shader *shader,
 
return progress;
 }
+
+void
+nir_fixup_deref_modes(nir_shader *shader)
+{
+   nir_foreach_function(function, shader) {
+  if (!function->impl)
+ continue;
+
+  nir_foreach_block(block, function->impl) {
+ nir_foreach_instr(instr, block) {
+if (instr->type != nir_instr_type_deref)
+   continue;
+
+nir_deref_instr *deref = nir_instr_as_deref(instr);
+
+nir_variable_mode parent_mode;
+if (deref->deref_type == nir_deref_type_var) {
+   parent_mode = deref->var->data.mode;
+} else {
+   assert(deref->parent.is_ssa);
+   nir_deref_instr *parent =
+  nir_instr_as_deref(deref->parent.ssa->parent_instr);
+   parent_mode = parent->mode;
+}
+
+deref->mode = parent_mode;
+ }
+  }
+   }
+}
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 26/61] nir: Support deref instructions in lower_indirect_derefs

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_lower_indirect_derefs.c | 156 +++
 1 file changed, 156 insertions(+)

diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c 
b/src/compiler/nir/nir_lower_indirect_derefs.c
index 02f202d..ebeb79b 100644
--- a/src/compiler/nir/nir_lower_indirect_derefs.c
+++ b/src/compiler/nir/nir_lower_indirect_derefs.c
@@ -23,6 +23,7 @@
 
 #include "nir.h"
 #include "nir_builder.h"
+#include "nir_deref.h"
 
 static void
 emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
@@ -184,6 +185,160 @@ lower_indirect_block(nir_block *block, nir_builder *b,
return progress;
 }
 
+static void
+emit_load_store_deref(nir_builder *b, nir_intrinsic_instr *orig_instr,
+  nir_deref_instr *parent,
+  nir_deref_instr **deref_arr,
+  nir_ssa_def **dest, nir_ssa_def *src);
+
+static void
+emit_indirect_load_store_deref(nir_builder *b, nir_intrinsic_instr *orig_instr,
+   nir_deref_instr *parent,
+   nir_deref_instr **deref_arr,
+   int start, int end,
+   nir_ssa_def **dest, nir_ssa_def *src)
+{
+   assert(start < end);
+   if (start == end - 1) {
+  nir_ssa_def *index = nir_imm_int(b, start);
+  emit_load_store_deref(b, orig_instr,
+nir_build_deref_array(b, parent, index),
+deref_arr + 1, dest, src);
+   } else {
+  int mid = start + (end - start) / 2;
+
+  nir_ssa_def *then_dest, *else_dest;
+
+  nir_deref_instr *deref = *deref_arr;
+  assert(deref->deref_type == nir_deref_type_array);
+
+  nir_push_if(b, nir_ilt(b, deref->arr.index.ssa, nir_imm_int(b, mid)));
+  emit_indirect_load_store_deref(b, orig_instr, parent, deref_arr,
+ start, mid, _dest, src);
+  nir_push_else(b, NULL);
+  emit_indirect_load_store_deref(b, orig_instr, parent, deref_arr,
+ mid, end, _dest, src);
+  nir_pop_if(b, NULL);
+
+  if (src == NULL)
+ *dest = nir_if_phi(b, then_dest, else_dest);
+   }
+}
+
+static void
+emit_load_store_deref(nir_builder *b, nir_intrinsic_instr *orig_instr,
+  nir_deref_instr *parent,
+  nir_deref_instr **deref_arr,
+  nir_ssa_def **dest, nir_ssa_def *src)
+{
+   for (; *deref_arr; deref_arr++) {
+  nir_deref_instr *deref = *deref_arr;
+  if (deref->deref_type == nir_deref_type_array &&
+  nir_src_as_const_value(deref->arr.index) == NULL) {
+ int length = glsl_get_length(parent->type);
+
+ emit_indirect_load_store_deref(b, orig_instr, parent, deref_arr,
+0, length, dest, src);
+ return;
+  }
+
+  parent = nir_build_deref_follower(b, parent, deref);
+   }
+
+   /* We reached the end of the deref chain.  Emit the instruction */
+   assert(*deref_arr == NULL);
+
+   if (src == NULL) {
+  /* This is a load instruction */
+  nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader, orig_instr->intrinsic);
+  load->num_components = orig_instr->num_components;
+
+  load->src[0] = nir_src_for_ssa(>dest.ssa);
+
+  /* Copy over any other sources.  This is needed for interp_deref_at */
+  for (unsigned i = 1;
+   i < nir_intrinsic_infos[orig_instr->intrinsic].num_srcs; i++)
+ nir_src_copy(>src[i], _instr->src[i], load);
+
+  nir_ssa_dest_init(>instr, >dest,
+orig_instr->dest.ssa.num_components,
+orig_instr->dest.ssa.bit_size, NULL);
+  nir_builder_instr_insert(b, >instr);
+  *dest = >dest.ssa;
+   } else {
+  assert(orig_instr->intrinsic == nir_intrinsic_store_deref);
+  nir_store_deref(b, parent, src, nir_intrinsic_write_mask(orig_instr));
+   }
+}
+
+static bool
+lower_indirect_derefs_block(nir_block *block, nir_builder *b,
+nir_variable_mode modes)
+{
+   bool progress = false;
+
+   nir_foreach_instr_safe(instr, block) {
+  if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+  if (intrin->intrinsic != nir_intrinsic_load_deref &&
+  intrin->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
+  intrin->intrinsic != nir_intrinsic_interp_deref_at_sample &&
+  intrin->intrinsic != nir_intrinsic_interp_deref_at_offset &&
+  intrin->intrinsic != nir_intrinsic_store_deref)
+ continue;
+
+  nir_deref_instr *deref =
+ nir_instr_as_deref(intrin->src[0].ssa->parent_instr);
+
+  /* Walk the deref chain back to the base and look for indirects */
+  bool has_indirect = false;
+  nir_deref_instr *base = deref;
+  while (base->deref_type != nir_deref_type_var) {
+ if 

[Mesa-dev] [PATCH 15/61] prog/nir: Simplify some load/store operations

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_builder.h |  6 ++
 src/mesa/program/prog_to_nir.c | 29 ++---
 2 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index f475d13..634a55d 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -525,6 +525,12 @@ nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr 
*instr, unsigned srcn)
return nir_imov_alu(build, *src, num_components);
 }
 
+static inline nir_ssa_def *
+nir_load_reg(nir_builder *build, nir_register *reg)
+{
+   return nir_ssa_for_src(build, nir_src_for_reg(reg), reg->num_components);
+}
+
 static inline nir_deref_instr *
 nir_build_deref_var(nir_builder *build, nir_variable *var)
 {
diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c
index b49616c..8ad1dc0 100644
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -136,15 +136,8 @@ ptn_get_src(struct ptn_compile *c, const struct 
prog_src_register *prog_src)
 
   assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
 
-  nir_intrinsic_instr *load =
- nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
-  load->num_components = 4;
-  load->variables[0] = nir_deref_var_create(load, 
c->input_vars[prog_src->Index]);
-
-  nir_ssa_dest_init(>instr, >dest, 4, 32, NULL);
-  nir_builder_instr_insert(b, >instr);
-
-  src.src = nir_src_for_ssa(>dest.ssa);
+  nir_variable *var = c->input_vars[prog_src->Index];
+  src.src = nir_src_for_ssa(nir_load_var(b, var));
   break;
}
case PROGRAM_STATE_VAR:
@@ -861,27 +854,17 @@ ptn_add_output_stores(struct ptn_compile *c)
nir_builder *b = >build;
 
nir_foreach_variable(var, >shader->outputs) {
-  nir_intrinsic_instr *store =
- nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
-  store->num_components = glsl_get_vector_elements(var->type);
-  nir_intrinsic_set_write_mask(store, (1 << store->num_components) - 1);
-  store->variables[0] =
- nir_deref_var_create(store, c->output_vars[var->data.location]);
-
+  nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]);
   if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
   var->data.location == FRAG_RESULT_DEPTH) {
  /* result.depth has this strange convention of being the .z component 
of
   * a vec4 with undefined .xyw components.  We resolve it to a scalar, 
to
   * match GLSL's gl_FragDepth and the expectations of most backends.
   */
- nir_alu_src alu_src = { NIR_SRC_INIT };
- alu_src.src = nir_src_for_reg(c->output_regs[FRAG_RESULT_DEPTH]);
- alu_src.swizzle[0] = SWIZZLE_Z;
- store->src[0] = nir_src_for_ssa(nir_fmov_alu(b, alu_src, 1));
-  } else {
- store->src[0].reg.reg = c->output_regs[var->data.location];
+ src = nir_channel(b, src, 2);
   }
-  nir_builder_instr_insert(b, >instr);
+  unsigned num_components = glsl_get_vector_elements(var->type);
+  nir_store_var(b, var, src, (1 << num_components) - 1);
}
 }
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/61] nir: Support deref instructions in lower_global_vars_to_local

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_lower_global_vars_to_local.c | 62 +++
 1 file changed, 42 insertions(+), 20 deletions(-)

diff --git a/src/compiler/nir/nir_lower_global_vars_to_local.c 
b/src/compiler/nir/nir_lower_global_vars_to_local.c
index c8fdfde..14aa366 100644
--- a/src/compiler/nir/nir_lower_global_vars_to_local.c
+++ b/src/compiler/nir/nir_lower_global_vars_to_local.c
@@ -32,31 +32,50 @@
 
 #include "nir.h"
 
+static void
+register_var_use(nir_variable *var, nir_function_impl *impl,
+ struct hash_table *var_func_table)
+{
+   if (var->data.mode != nir_var_global)
+  return;
+
+   struct hash_entry *entry =
+  _mesa_hash_table_search(var_func_table, var);
+
+   if (entry) {
+  if (entry->data != impl)
+ entry->data = NULL;
+   } else {
+  _mesa_hash_table_insert(var_func_table, var, impl);
+   }
+}
+
 static bool
 mark_global_var_uses_block(nir_block *block, nir_function_impl *impl,
struct hash_table *var_func_table)
 {
nir_foreach_instr(instr, block) {
-  if (instr->type != nir_instr_type_intrinsic)
- continue;
-
-  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-  unsigned num_vars = nir_intrinsic_infos[intrin->intrinsic].num_variables;
-
-  for (unsigned i = 0; i < num_vars; i++) {
- nir_variable *var = intrin->variables[i]->var;
- if (var->data.mode != nir_var_global)
-continue;
-
- struct hash_entry *entry =
-_mesa_hash_table_search(var_func_table, var);
-
- if (entry) {
-if (entry->data != impl)
-   entry->data = NULL;
- } else {
-_mesa_hash_table_insert(var_func_table, var, impl);
- }
+  switch (instr->type) {
+  case nir_instr_type_deref: {
+ nir_deref_instr *deref = nir_instr_as_deref(instr);
+ if (deref->deref_type == nir_deref_type_var)
+register_var_use(deref->var, impl, var_func_table);
+ break;
+  }
+
+  case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ unsigned num_vars =
+nir_intrinsic_infos[intrin->intrinsic].num_variables;
+
+ for (unsigned i = 0; i < num_vars; i++)
+register_var_use(intrin->variables[i]->var, impl, var_func_table);
+ break;
+  }
+
+  default:
+ /* Nothing to do */
+ break;
   }
}
 
@@ -103,5 +122,8 @@ nir_lower_global_vars_to_local(nir_shader *shader)
 
_mesa_hash_table_destroy(var_func_table, NULL);
 
+   if (progress)
+  nir_fixup_deref_modes(shader);
+
return progress;
 }
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/61] nir: Add deref sources to texture instructions

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir.h   | 2 ++
 src/compiler/nir/nir_print.c | 6 ++
 2 files changed, 8 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 591d53e..ce9e458 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1249,6 +1249,8 @@ typedef enum {
nir_tex_src_ms_mcs, /* MSAA compression value */
nir_tex_src_ddx,
nir_tex_src_ddy,
+   nir_tex_src_texture_deref, /* < deref pointing to the texture */
+   nir_tex_src_sampler_deref, /* < deref pointing to the sampler */
nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */
nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
nir_tex_src_plane,  /* < selects plane for planar textures */
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 55e4b38..d1b5754 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -819,6 +819,12 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
   case nir_tex_src_ddy:
  fprintf(fp, "(ddy)");
  break;
+  case nir_tex_src_texture_deref:
+ fprintf(fp, "(texture_deref)");
+ break;
+  case nir_tex_src_sampler_deref:
+ fprintf(fp, "(sampler_deref)");
+ break;
   case nir_tex_src_texture_offset:
  fprintf(fp, "(texture_offset)");
  break;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/61] prog/nir: Use deref instructions for params

2018-03-23 Thread Jason Ekstrand
---
 src/mesa/program/prog_to_nir.c | 36 ++--
 1 file changed, 6 insertions(+), 30 deletions(-)

diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c
index 8ad1dc0..436d1c0 100644
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -168,38 +168,14 @@ ptn_get_src(struct ptn_compile *c, const struct 
prog_src_register *prog_src)
  nir_ssa_dest_init(>instr, >dest, 4, 32, NULL);
  load->num_components = 4;
 
- load->variables[0] = nir_deref_var_create(load, c->parameters);
- nir_deref_array *deref_arr =
-nir_deref_array_create(load->variables[0]);
- deref_arr->deref.type = glsl_vec4_type();
- load->variables[0]->deref.child = _arr->deref;
-
- if (prog_src->RelAddr) {
-deref_arr->deref_array_type = nir_deref_array_type_indirect;
-
-nir_alu_src addr_src = { NIR_SRC_INIT };
-addr_src.src = nir_src_for_reg(c->addr_reg);
-nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1);
-
-if (prog_src->Index < 0) {
-   /* This is a negative offset which should be added to the 
address
-* register's value.
-*/
-   reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index));
-
-   deref_arr->base_offset = 0;
-} else {
-   deref_arr->base_offset = prog_src->Index;
-}
-deref_arr->indirect = nir_src_for_ssa(reladdr);
- } else {
-deref_arr->deref_array_type = nir_deref_array_type_direct;
-deref_arr->base_offset = prog_src->Index;
- }
+ nir_deref_instr *deref = nir_build_deref_var(b, c->parameters);
 
- nir_builder_instr_insert(b, >instr);
+ nir_ssa_def *index = nir_imm_int(b, prog_src->Index);
+ if (prog_src->RelAddr)
+index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg));
+ deref = nir_build_deref_array(b, deref, nir_channel(b, index, 0));
 
- src.src = nir_src_for_ssa(>dest.ssa);
+ src.src = nir_src_for_ssa(nir_load_deref(b, deref));
  break;
   }
   default:
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/61] anv, i965, radv, st, ir3: Call nir_lower_deref_instrs

2018-03-23 Thread Jason Ekstrand
This inserts a call to nir_lower_deref_instrs at every call site of
glsl_to_nir, spirv_to_nir, and prog_to_nir.
---
 src/amd/vulkan/radv_shader.c| 2 ++
 src/gallium/drivers/freedreno/ir3/ir3_cmdline.c | 3 +++
 src/intel/vulkan/anv_pipeline.c | 2 ++
 src/mesa/drivers/dri/i965/brw_program.c | 2 ++
 src/mesa/state_tracker/st_glsl_to_nir.cpp   | 1 +
 5 files changed, 10 insertions(+)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index c693580..d8cfd2e 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -228,6 +228,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
 
free(spec_entries);
 
+   NIR_PASS_V(nir, nir_lower_deref_instrs, ~0);
+
/* We have to lower away local constant initializers right 
before we
 * inline functions.  That way they get properly initialized at 
the top
 * of the function and not at the top of its caller.
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c 
b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index 41bd1de..07e97c3 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -112,6 +112,7 @@ load_glsl(unsigned num_files, char* const* files, 
gl_shader_stage stage)
errx(1, "couldn't parse `%s'", files[0]);
 
nir_shader *nir = glsl_to_nir(prog, stage, 
ir3_get_compiler_options(compiler));
+   nir_lower_deref_instrs(nir, ~0);
 
/* required NIR passes: */
/* TODO cmdline args for some of the conditional lowering passes? */
@@ -231,6 +232,8 @@ load_spirv(const char *filename, const char *entry, 
gl_shader_stage stage)
_options,
ir3_get_compiler_options(compiler));
 
+   NIR_PASS_V(entry_point->shader, nir_lower_deref_instrs, ~0);
+
nir_print_shader(entry_point->shader, stdout);
 
return entry_point->shader;
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 4ca1e0b..4af304e 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -170,6 +170,8 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
   nir_print_shader(nir, stderr);
}
 
+   NIR_PASS_V(nir, nir_lower_deref_instrs, ~0);
+
/* We have to lower away local constant initializers right before we
 * inline functions.  That way they get properly initialized at the top
 * of the function and not at the top of its caller.
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 4ba46a3..a871432 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -77,6 +77,7 @@ brw_create_nir(struct brw_context *brw,
/* First, lower the GLSL IR or Mesa IR to NIR */
if (shader_prog) {
   nir = glsl_to_nir(shader_prog, stage, options);
+  nir_lower_deref_instrs(nir, ~0);
   nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
   nir_lower_returns(nir);
   nir_validate_shader(nir);
@@ -84,6 +85,7 @@ brw_create_nir(struct brw_context *brw,
  nir_shader_get_entrypoint(nir), true, false);
} else {
   nir = prog_to_nir(prog, options);
+  nir_lower_deref_instrs(nir, ~0);
   NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
}
nir_validate_shader(nir);
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 9bb99f3..7d111d6 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -328,6 +328,7 @@ st_glsl_to_nir(struct st_context *st, struct gl_program 
*prog,
   return prog->nir;
 
nir_shader *nir = glsl_to_nir(shader_program, stage, options);
+   nir_lower_deref_instrs(nir, (nir_lower_deref_flags)~0);
 
nir_variable_mode mask =
   (nir_variable_mode) (nir_var_shader_in | nir_var_shader_out);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/61] glsl/nir: Only claim to handle intrinsic functions

2018-03-23 Thread Jason Ekstrand
Non-intrinsic function handling has never actually been tested and
probably doesn't work.  Just get rid of it for now.  We can always add
it back in later if it's useful.
---
 src/compiler/glsl/glsl_to_nir.cpp | 25 ++---
 1 file changed, 2 insertions(+), 23 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 9e938da..1c842b7 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -614,14 +614,7 @@ nir_visitor::visit(ir_loop_jump *ir)
 void
 nir_visitor::visit(ir_return *ir)
 {
-   if (ir->value != NULL) {
-  nir_intrinsic_instr *copy =
- nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
-
-  copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var);
-  copy->variables[1] = evaluate_deref(>instr, ir->value);
-   }
-
+   assert(ir->value == NULL);
nir_jump_instr *instr = nir_jump_instr_create(this->shader, 
nir_jump_return);
nir_builder_instr_insert(, >instr);
 }
@@ -1231,21 +1224,7 @@ nir_visitor::visit(ir_call *ir)
   return;
}
 
-   struct hash_entry *entry =
-  _mesa_hash_table_search(this->overload_table, ir->callee);
-   assert(entry);
-   nir_function *callee = (nir_function *) entry->data;
-
-   nir_call_instr *instr = nir_call_instr_create(this->shader, callee);
-
-   unsigned i = 0;
-   foreach_in_list(ir_dereference, param, >actual_parameters) {
-  instr->params[i] = evaluate_deref(>instr, param);
-  i++;
-   }
-
-   instr->return_deref = evaluate_deref(>instr, ir->return_deref);
-   nir_builder_instr_insert(, >instr);
+   unreachable("glsl_to_nir only handles function calls to intrinsics");
 }
 
 void
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/61] nir: Add helpers for working with deref instructions

2018-03-23 Thread Jason Ekstrand
This commit adds a pass for lowering deref instructions to deref chains
as well as some smaller helpers to ease the transition.
---
 src/compiler/Makefile.sources  |   1 +
 src/compiler/nir/meson.build   |   1 +
 src/compiler/nir/nir.h |  33 +
 src/compiler/nir/nir_builder.h |  23 
 src/compiler/nir/nir_deref.c   | 301 +
 5 files changed, 359 insertions(+)
 create mode 100644 src/compiler/nir/nir_deref.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 2dc4836..4a4dfdf 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -195,6 +195,7 @@ NIR_FILES = \
nir/nir_control_flow.c \
nir/nir_control_flow.h \
nir/nir_control_flow_private.h \
+   nir/nir_deref.c \
nir/nir_dominance.c \
nir/nir_from_ssa.c \
nir/nir_gather_info.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 887035d..54709df 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -89,6 +89,7 @@ files_libnir = files(
   'nir_control_flow.c',
   'nir_control_flow.h',
   'nir_control_flow_private.h',
+  'nir_deref.c',
   'nir_dominance.c',
   'nir_from_ssa.c',
   'nir_gather_info.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ce9e458..9c1716b 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1000,6 +1000,27 @@ nir_src_as_deref(nir_src src)
return nir_instr_as_deref(src.ssa->parent_instr);
 }
 
+static inline nir_deref_instr *
+nir_deref_instr_parent(nir_deref_instr *instr)
+{
+   if (instr->deref_type == nir_deref_type_var)
+  return NULL;
+   else
+  return nir_src_as_deref(instr->parent);
+}
+
+static inline nir_variable *
+nir_deref_instr_get_variable(nir_deref_instr *instr)
+{
+   while (instr->deref_type != nir_deref_type_var)
+  instr = nir_deref_instr_parent(instr);
+
+   return instr->var;
+}
+
+nir_deref_var *
+nir_deref_instr_to_deref(nir_deref_instr *instr, void *mem_ctx);
+
 typedef struct {
nir_instr instr;
 
@@ -2573,6 +2594,18 @@ bool nir_inline_functions(nir_shader *shader);
 
 bool nir_propagate_invariant(nir_shader *shader);
 
+enum nir_lower_deref_flags {
+   nir_lower_load_store_derefs =   (1 << 0),
+   nir_lower_texture_derefs =  (1 << 1),
+   nir_lower_interp_derefs =   (1 << 2),
+   nir_lower_atomic_counter_derefs =   (1 << 3),
+   nir_lower_atomic_derefs =   (1 << 4),
+   nir_lower_image_derefs =(1 << 5),
+};
+
+bool nir_lower_deref_instrs(nir_shader *shader,
+enum nir_lower_deref_flags flags);
+
 void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader);
 bool nir_lower_var_copies(nir_shader *shader);
 
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 0513e31..f475d13 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -631,6 +631,29 @@ nir_build_deref_cast(nir_builder *build, nir_ssa_def 
*parent,
return deref;
 }
 
+static inline nir_deref_instr *
+nir_build_deref_for_chain(nir_builder *b, nir_deref_var *deref_var)
+{
+   nir_deref_instr *tail = nir_build_deref_var(b, deref_var->var);
+   for (nir_deref *d = deref_var->deref.child; d; d = d->child) {
+  if (d->deref_type == nir_deref_type_array) {
+ nir_deref_array *a = nir_deref_as_array(d);
+ assert(a->deref_array_type != nir_deref_array_type_wildcard);
+
+ nir_ssa_def *index = nir_imm_int(b, a->base_offset);
+ if (a->deref_array_type == nir_deref_array_type_indirect)
+index = nir_iadd(b, index, nir_ssa_for_src(b, a->indirect, 1));
+
+ tail = nir_build_deref_array(b, tail, index);
+  } else {
+ nir_deref_struct *s = nir_deref_as_struct(d);
+ tail = nir_build_deref_struct(b, tail, s->index);
+  }
+   }
+
+   return tail;
+}
+
 static inline nir_ssa_def *
 nir_load_deref(nir_builder *build, nir_deref_instr *deref)
 {
diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c
new file mode 100644
index 000..87a8192
--- /dev/null
+++ b/src/compiler/nir/nir_deref.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, 

[Mesa-dev] [PATCH 14/61] glsl/nir: Use deref instructions instead of dref chains

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/glsl/glsl_to_nir.cpp | 239 +++---
 1 file changed, 94 insertions(+), 145 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 1c842b7..db0c911 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -90,12 +90,10 @@ private:
nir_builder b;
nir_ssa_def *result; /* result of the expression tree last visited */
 
-   nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir);
+   nir_deref_instr *evaluate_deref(ir_instruction *ir);
 
-   /* the head of the dereference chain we're creating */
-   nir_deref_var *deref_head;
-   /* the tail of the dereference chain we're creating */
-   nir_deref *deref_tail;
+   /* most recent deref instruction created */
+   nir_deref_instr *deref;
 
nir_variable *var; /* variable created by ir_variable visitor */
 
@@ -198,8 +196,6 @@ nir_visitor::nir_visitor(nir_shader *shader)
this->result = NULL;
this->impl = NULL;
this->var = NULL;
-   this->deref_head = NULL;
-   this->deref_tail = NULL;
memset(>b, 0, sizeof(this->b));
 }
 
@@ -209,12 +205,11 @@ nir_visitor::~nir_visitor()
_mesa_hash_table_destroy(this->overload_table, NULL);
 }
 
-nir_deref_var *
-nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir)
+nir_deref_instr *
+nir_visitor::evaluate_deref(ir_instruction *ir)
 {
ir->accept(this);
-   ralloc_steal(mem_ctx, this->deref_head);
-   return this->deref_head;
+   return this->deref;
 }
 
 static nir_constant *
@@ -627,76 +622,76 @@ nir_visitor::visit(ir_call *ir)
 
   switch (ir->callee->intrinsic_id) {
   case ir_intrinsic_atomic_counter_read:
- op = nir_intrinsic_atomic_counter_read_var;
+ op = nir_intrinsic_atomic_counter_read_deref;
  break;
   case ir_intrinsic_atomic_counter_increment:
- op = nir_intrinsic_atomic_counter_inc_var;
+ op = nir_intrinsic_atomic_counter_inc_deref;
  break;
   case ir_intrinsic_atomic_counter_predecrement:
- op = nir_intrinsic_atomic_counter_dec_var;
+ op = nir_intrinsic_atomic_counter_dec_deref;
  break;
   case ir_intrinsic_atomic_counter_add:
- op = nir_intrinsic_atomic_counter_add_var;
+ op = nir_intrinsic_atomic_counter_add_deref;
  break;
   case ir_intrinsic_atomic_counter_and:
- op = nir_intrinsic_atomic_counter_and_var;
+ op = nir_intrinsic_atomic_counter_and_deref;
  break;
   case ir_intrinsic_atomic_counter_or:
- op = nir_intrinsic_atomic_counter_or_var;
+ op = nir_intrinsic_atomic_counter_or_deref;
  break;
   case ir_intrinsic_atomic_counter_xor:
- op = nir_intrinsic_atomic_counter_xor_var;
+ op = nir_intrinsic_atomic_counter_xor_deref;
  break;
   case ir_intrinsic_atomic_counter_min:
- op = nir_intrinsic_atomic_counter_min_var;
+ op = nir_intrinsic_atomic_counter_min_deref;
  break;
   case ir_intrinsic_atomic_counter_max:
- op = nir_intrinsic_atomic_counter_max_var;
+ op = nir_intrinsic_atomic_counter_max_deref;
  break;
   case ir_intrinsic_atomic_counter_exchange:
- op = nir_intrinsic_atomic_counter_exchange_var;
+ op = nir_intrinsic_atomic_counter_exchange_deref;
  break;
   case ir_intrinsic_atomic_counter_comp_swap:
- op = nir_intrinsic_atomic_counter_comp_swap_var;
+ op = nir_intrinsic_atomic_counter_comp_swap_deref;
  break;
   case ir_intrinsic_image_load:
- op = nir_intrinsic_image_var_load;
+ op = nir_intrinsic_image_deref_load;
  break;
   case ir_intrinsic_image_store:
- op = nir_intrinsic_image_var_store;
+ op = nir_intrinsic_image_deref_store;
  break;
   case ir_intrinsic_image_atomic_add:
- op = nir_intrinsic_image_var_atomic_add;
+ op = nir_intrinsic_image_deref_atomic_add;
  break;
   case ir_intrinsic_image_atomic_min:
- op = nir_intrinsic_image_var_atomic_min;
+ op = nir_intrinsic_image_deref_atomic_min;
  break;
   case ir_intrinsic_image_atomic_max:
- op = nir_intrinsic_image_var_atomic_max;
+ op = nir_intrinsic_image_deref_atomic_max;
  break;
   case ir_intrinsic_image_atomic_and:
- op = nir_intrinsic_image_var_atomic_and;
+ op = nir_intrinsic_image_deref_atomic_and;
  break;
   case ir_intrinsic_image_atomic_or:
- op = nir_intrinsic_image_var_atomic_or;
+ op = nir_intrinsic_image_deref_atomic_or;
  break;
   case ir_intrinsic_image_atomic_xor:
- op = nir_intrinsic_image_var_atomic_xor;
+ op = nir_intrinsic_image_deref_atomic_xor;
  break;
   case ir_intrinsic_image_atomic_exchange:
- op = nir_intrinsic_image_var_atomic_exchange;
+ op = 

[Mesa-dev] [PATCH 22/61] nir: Add a deref path helper struct

2018-03-23 Thread Jason Ekstrand
This commit introduces a new nir_deref.h header for helpers that are
less common and really only needed by a few heavy-duty passes.  In this
header is a new struct for representing a full deref path which can be
walked in either direction.
---
 src/compiler/Makefile.sources |  1 +
 src/compiler/nir/meson.build  |  1 +
 src/compiler/nir/nir_deref.c  | 49 ++
 src/compiler/nir/nir_deref.h  | 55 +++
 4 files changed, 106 insertions(+)
 create mode 100644 src/compiler/nir/nir_deref.h

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 4a4dfdf..4a21eb7 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -196,6 +196,7 @@ NIR_FILES = \
nir/nir_control_flow.h \
nir/nir_control_flow_private.h \
nir/nir_deref.c \
+   nir/nir_deref.h \
nir/nir_dominance.c \
nir/nir_from_ssa.c \
nir/nir_gather_info.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 54709df..b84b39c 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -90,6 +90,7 @@ files_libnir = files(
   'nir_control_flow.h',
   'nir_control_flow_private.h',
   'nir_deref.c',
+  'nir_deref.h',
   'nir_dominance.c',
   'nir_from_ssa.c',
   'nir_gather_info.c',
diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c
index af5d75f..3546013 100644
--- a/src/compiler/nir/nir_deref.c
+++ b/src/compiler/nir/nir_deref.c
@@ -23,6 +23,55 @@
 
 #include "nir.h"
 #include "nir_builder.h"
+#include "nir_deref.h"
+
+void
+nir_deref_path_init(struct nir_deref_path *path,
+nir_deref_instr *deref, void *mem_ctx)
+{
+   /* The length of the short path is at most ARRAY_SIZE - 1 because we need
+* room for the NULL terminator.
+*/
+   static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1;
+
+   int count = 0;
+
+   nir_deref_instr **tail = >_short_path[max_short_path_len];
+   nir_deref_instr **head = tail;
+
+   *tail = NULL;
+   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
+  count++;
+  if (count <= max_short_path_len)
+ *(--head) = d;
+   }
+
+   if (count <= max_short_path_len) {
+  /* If we're under max_short_path_len, just use the short path. */
+  path->path = head;
+  goto done;
+   }
+
+   path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1);
+   head = tail = path->path + count;
+   *tail = NULL;
+   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d))
+  *(--head) = d;
+
+done:
+   assert(head == path->path);
+   assert(tail == head + count);
+   assert((*head)->deref_type == nir_deref_type_var);
+   assert(*tail == NULL);
+}
+
+void
+nir_deref_path_finish(struct nir_deref_path *path)
+{
+   if (path->path < >_short_path[0] ||
+   path->path > >_short_path[ARRAY_SIZE(path->_short_path) - 1])
+  ralloc_free(path->path);
+}
 
 nir_deref_var *
 nir_deref_instr_to_deref(nir_deref_instr *instr, void *mem_ctx)
diff --git a/src/compiler/nir/nir_deref.h b/src/compiler/nir/nir_deref.h
new file mode 100644
index 000..7597b77
--- /dev/null
+++ b/src/compiler/nir/nir_deref.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef NIR_DEREF_H
+#define NIR_DEREF_H
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct nir_deref_path {
+   /** Short path so we can keep it on the stack most of the time. */
+   nir_deref_instr *_short_path[7];
+
+   /** A null-terminated array view of a deref chain
+*
+* The first element of this array will be the variable dereference
+* followed by every deref_instr on the path to the final one.  The last
+* element in the array is a NULL pointer which acts as a terminator.

[Mesa-dev] [PATCH 09/61] nir: Add _deref versions of all of the _var intrinsics

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir.h|  2 +-
 src/compiler/nir/nir_builder.h| 37 +
 src/compiler/nir/nir_intrinsics.h | 84 +++
 3 files changed, 122 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index b02c241..591d53e 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1164,7 +1164,7 @@ typedef enum {
 
 } nir_intrinsic_index_flag;
 
-#define NIR_INTRINSIC_MAX_INPUTS 4
+#define NIR_INTRINSIC_MAX_INPUTS 5
 
 typedef struct {
const char *name;
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 66f705b..0513e31 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -632,6 +632,43 @@ nir_build_deref_cast(nir_builder *build, nir_ssa_def 
*parent,
 }
 
 static inline nir_ssa_def *
+nir_load_deref(nir_builder *build, nir_deref_instr *deref)
+{
+   nir_intrinsic_instr *load =
+  nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_deref);
+   load->num_components = glsl_get_vector_elements(deref->type);
+   load->src[0] = nir_src_for_ssa(>dest.ssa);
+   nir_ssa_dest_init(>instr, >dest, load->num_components,
+ glsl_get_bit_size(deref->type), NULL);
+   nir_builder_instr_insert(build, >instr);
+   return >dest.ssa;
+}
+
+static inline void
+nir_store_deref(nir_builder *build, nir_deref_instr *deref,
+nir_ssa_def *value, unsigned writemask)
+{
+   nir_intrinsic_instr *store =
+  nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_deref);
+   store->num_components = glsl_get_vector_elements(deref->type);
+   store->src[0] = nir_src_for_ssa(>dest.ssa);
+   store->src[1] = nir_src_for_ssa(value);
+   nir_intrinsic_set_write_mask(store,
+writemask & ((1 << store->num_components) - 
1));
+   nir_builder_instr_insert(build, >instr);
+}
+
+static inline void
+nir_copy_deref(nir_builder *build, nir_deref_instr *dest, nir_deref_instr *src)
+{
+   nir_intrinsic_instr *copy =
+  nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_deref);
+   copy->src[0] = nir_src_for_ssa(>dest.ssa);
+   copy->src[1] = nir_src_for_ssa(>dest.ssa);
+   nir_builder_instr_insert(build, >instr);
+}
+
+static inline nir_ssa_def *
 nir_load_var(nir_builder *build, nir_variable *var)
 {
const unsigned num_components = glsl_get_vector_elements(var->type);
diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 8f3d3bc..c14a9ef 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -49,6 +49,14 @@ INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, 
xx, 0)
 INTRINSIC(copy_var, 0, ARR(0), false, 0, 2, 0, xx, xx, xx, 0)
 
 /*
+ * Pointer versions of the _var intrinsics which take a deref as the first (or
+ * second, in the case of copy) source.
+ */
+INTRINSIC(load_deref, 1, ARR(1), true, 0, 0, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(store_deref, 2, ARR(1, 0), false, 0, 0, 1, WRMASK, xx, xx, 0)
+INTRINSIC(copy_deref, 2, ARR(1, 1), false, 0, 0, 0, xx, xx, xx, 0)
+
+/*
  * Interpolation of input.  The interp_var_at* intrinsics are similar to the
  * load_var intrinsic acting on a shader input except that they interpolate
  * the input differently.  The at_sample and at_offset intrinsics take an
@@ -64,6 +72,21 @@ INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, 
xx, xx, xx,
   NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
 /*
+ * Interpolation of input.  The interp_deref_at* intrinsics are similar to the
+ * load_deref intrinsic acting on a shader input except that they interpolate
+ * the input differently.  The at_sample and at_offset intrinsics take an
+ * additional source that is an integer sample id or a vec2 position offset
+ * respectively.
+ */
+
+INTRINSIC(interp_deref_at_centroid, 1, ARR(1, 0), true, 0, 0, 0, xx, xx, xx,
+  NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+INTRINSIC(interp_deref_at_sample, 2, ARR(1, 1), true, 0, 0, 0, xx, xx, xx,
+  NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+INTRINSIC(interp_deref_at_offset, 2, ARR(1, 2), true, 0, 0, 0, xx, xx, xx,
+  NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+/*
  * Ask the driver for the size of a given buffer. It takes the buffer index
  * as source.
  */
@@ -217,12 +240,15 @@ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 
xx, xx, xx, 0)
 
 #define ATOMIC(name, flags) \
INTRINSIC(name##_var, 0, ARR(0), true, 1, 1, 0, xx, xx, xx, flags) \
+   INTRINSIC(name##_deref, 1, ARR(1), true, 1, 0, 0, xx, xx, xx, flags) \
INTRINSIC(name, 1, ARR(1), true, 1, 0, 1, BASE, xx, xx, flags)
 #define ATOMIC2(name) \
INTRINSIC(name##_var, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) \
+   INTRINSIC(name##_deref, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) \
INTRINSIC(name, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
 

[Mesa-dev] [PATCH 18/61] nir: Support deref instructions in remove_dead_variables

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_remove_dead_variables.c | 99 
 1 file changed, 99 insertions(+)

diff --git a/src/compiler/nir/nir_remove_dead_variables.c 
b/src/compiler/nir/nir_remove_dead_variables.c
index eff66f9..6b1927f 100644
--- a/src/compiler/nir/nir_remove_dead_variables.c
+++ b/src/compiler/nir/nir_remove_dead_variables.c
@@ -27,6 +27,55 @@
 
 #include "nir.h"
 
+static bool
+deref_used_for_not_store(nir_deref_instr *deref)
+{
+   nir_foreach_use(src, >dest.ssa) {
+  switch (src->parent_instr->type) {
+  case nir_instr_type_deref:
+ if (deref_used_for_not_store(nir_instr_as_deref(src->parent_instr)))
+return true;
+ break;
+
+  case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrin =
+nir_instr_as_intrinsic(src->parent_instr);
+ /* The first source of copy and store intrinsics is the deref to
+  * write.  Don't record those.
+  */
+ if ((intrin->intrinsic != nir_intrinsic_store_deref &&
+  intrin->intrinsic != nir_intrinsic_copy_var) ||
+ src != >src[0])
+return true;
+ break;
+  }
+
+  default:
+ /* If it's used by any other instruction type (most likely a texture
+  * instruction), consider it used.
+  */
+ return true;
+  }
+   }
+
+   return false;
+}
+
+static void
+add_var_use_deref(nir_deref_instr *deref, struct set *live)
+{
+   if (deref->deref_type != nir_deref_type_var)
+  return;
+
+   /* If it's not a local that never escapes the shader, then any access at
+* all means we need to keep it alive.
+*/
+   assert(deref->mode == deref->var->data.mode);
+   if (!(deref->mode & (nir_var_local | nir_var_global | nir_var_shared)) ||
+   deref_used_for_not_store(deref))
+  _mesa_set_add(live, deref->var);
+}
+
 static void
 add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live,
   nir_variable_mode modes)
@@ -100,6 +149,10 @@ add_var_use_shader(nir_shader *shader, struct set *live, 
nir_variable_mode modes
  nir_foreach_block(block, function->impl) {
 nir_foreach_instr(instr, block) {
switch(instr->type) {
+   case nir_instr_type_deref:
+  add_var_use_deref(nir_instr_as_deref(instr), live);
+  break;
+
case nir_instr_type_intrinsic:
   add_var_use_intrinsic(nir_instr_as_intrinsic(instr), live,
 modes);
@@ -123,6 +176,33 @@ add_var_use_shader(nir_shader *shader, struct set *live, 
nir_variable_mode modes
 }
 
 static void
+remove_dead_deref(nir_deref_instr *deref)
+{
+   nir_foreach_use(src, >dest.ssa) {
+  switch (src->parent_instr->type) {
+  case nir_instr_type_deref:
+ remove_dead_deref(nir_instr_as_deref(src->parent_instr));
+ break;
+
+  case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrin =
+nir_instr_as_intrinsic(src->parent_instr);
+
+ assert(intrin->intrinsic == nir_intrinsic_copy_deref ||
+intrin->intrinsic == nir_intrinsic_store_deref);
+ nir_instr_remove(>instr);
+ break;
+  }
+
+  default:
+ unreachable("This must have been marked as live!");
+  }
+   }
+
+   nir_instr_remove(>instr);
+}
+
+static void
 remove_dead_var_writes(nir_shader *shader, struct set *live)
 {
nir_foreach_function(function, shader) {
@@ -144,6 +224,25 @@ remove_dead_var_writes(nir_shader *shader, struct set 
*live)
nir_instr_remove(instr);
  }
   }
+
+  /* We walk the list of instructions backwards because we're going to
+   * delete a deref and all of it's uses and we don't want to end up
+   * deleting stuff ahead of us.
+   */
+  nir_foreach_block_reverse(block, function->impl) {
+ nir_foreach_instr_reverse_safe(instr, block) {
+if (instr->type != nir_instr_type_deref)
+   continue;
+
+nir_deref_instr *deref = nir_instr_as_deref(instr);
+if (deref->deref_type != nir_deref_type_var)
+   continue;
+
+/* If it's been marked as dead, delete it */
+if (deref->var->data.mode == 0)
+   remove_dead_deref(deref);
+ }
+  }
}
 }
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/61] nir/builder: Add deref building helpers

2018-03-23 Thread Jason Ekstrand
---
 src/compiler/nir/nir_builder.h | 106 +
 1 file changed, 106 insertions(+)

diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 36e0ae3..66f705b 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -525,6 +525,112 @@ nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr 
*instr, unsigned srcn)
return nir_imov_alu(build, *src, num_components);
 }
 
+static inline nir_deref_instr *
+nir_build_deref_var(nir_builder *build, nir_variable *var)
+{
+   nir_deref_instr *deref =
+  nir_deref_instr_create(build->shader, nir_deref_type_var);
+
+   deref->mode = var->data.mode;
+   deref->type = var->type;
+   deref->var = var;
+
+   nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
+
+   nir_builder_instr_insert(build, >instr);
+
+   return deref;
+}
+
+static inline nir_deref_instr *
+nir_build_deref_array(nir_builder *build, nir_deref_instr *parent,
+  nir_ssa_def *index)
+{
+   assert(glsl_type_is_array(parent->type) ||
+  glsl_type_is_matrix(parent->type) ||
+  glsl_type_is_vector(parent->type));
+
+   nir_deref_instr *deref =
+  nir_deref_instr_create(build->shader, nir_deref_type_array);
+
+   deref->mode = parent->mode;
+   deref->type = glsl_get_array_element(parent->type);
+   deref->parent = nir_src_for_ssa(>dest.ssa);
+   deref->arr.index = nir_src_for_ssa(index);
+
+   nir_ssa_dest_init(>instr, >dest,
+ parent->dest.ssa.num_components,
+ parent->dest.ssa.bit_size, NULL);
+
+   nir_builder_instr_insert(build, >instr);
+
+   return deref;
+}
+
+static inline nir_deref_instr *
+nir_build_deref_array_wildcard(nir_builder *build, nir_deref_instr *parent)
+{
+   assert(glsl_type_is_array(parent->type) ||
+  glsl_type_is_matrix(parent->type));
+
+   nir_deref_instr *deref =
+  nir_deref_instr_create(build->shader, nir_deref_type_array_wildcard);
+
+   deref->mode = parent->mode;
+   deref->type = glsl_get_array_element(parent->type);
+   deref->parent = nir_src_for_ssa(>dest.ssa);
+
+   nir_ssa_dest_init(>instr, >dest,
+ parent->dest.ssa.num_components,
+ parent->dest.ssa.bit_size, NULL);
+
+   nir_builder_instr_insert(build, >instr);
+
+   return deref;
+}
+
+static inline nir_deref_instr *
+nir_build_deref_struct(nir_builder *build, nir_deref_instr *parent,
+   unsigned index)
+{
+   assert(glsl_type_is_struct(parent->type));
+
+   nir_deref_instr *deref =
+  nir_deref_instr_create(build->shader, nir_deref_type_struct);
+
+   deref->mode = parent->mode;
+   deref->type = glsl_get_struct_field(parent->type, index);
+   deref->parent = nir_src_for_ssa(>dest.ssa);
+   deref->strct.index = index;
+
+   nir_ssa_dest_init(>instr, >dest,
+ parent->dest.ssa.num_components,
+ parent->dest.ssa.bit_size, NULL);
+
+   nir_builder_instr_insert(build, >instr);
+
+   return deref;
+}
+
+static inline nir_deref_instr *
+nir_build_deref_cast(nir_builder *build, nir_ssa_def *parent,
+ nir_variable_mode mode, const struct glsl_type *type)
+{
+   nir_deref_instr *deref =
+  nir_deref_instr_create(build->shader, nir_deref_type_cast);
+
+   deref->mode = mode;
+   deref->type = type;
+   deref->parent = nir_src_for_ssa(parent);
+
+   nir_ssa_dest_init(>instr, >dest,
+ parent->num_components, parent->bit_size, NULL);
+
+   nir_builder_instr_insert(build, >instr);
+
+   return deref;
+}
+
 static inline nir_ssa_def *
 nir_load_var(nir_builder *build, nir_variable *var)
 {
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/61] nir/validator: Validate that all used variables exist

2018-03-23 Thread Jason Ekstrand
We were validating this for locals but nothing else.
---
 src/compiler/nir/nir_validate.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index a49948f..e9d6bd5 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -96,7 +96,9 @@ typedef struct {
/* bitset of registers we have currently found; used to check uniqueness */
BITSET_WORD *regs_found;
 
-   /* map of local variable -> function implementation where it is defined */
+   /* map of variable -> function implementation where it is defined or NULL
+* if it is a global variable
+*/
struct hash_table *var_defs;
 
/* map of instruction/var/etc to failed assert string */
@@ -448,12 +450,10 @@ validate_deref_chain(nir_deref *deref, nir_variable_mode 
mode,
 static void
 validate_var_use(nir_variable *var, validate_state *state)
 {
-   if (var->data.mode == nir_var_local) {
-  struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var);
-
-  validate_assert(state, entry);
+   struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var);
+   validate_assert(state, entry);
+   if (var->data.mode == nir_var_local)
   validate_assert(state, (nir_function_impl *) entry->data == state->impl);
-   }
 }
 
 static void
@@ -1000,7 +1000,9 @@ validate_var_decl(nir_variable *var, bool is_global, 
validate_state *state)
 * support)
 */
 
-   if (!is_global) {
+   if (is_global) {
+  _mesa_hash_table_insert(state->var_defs, var, NULL);
+   } else {
   _mesa_hash_table_insert(state->var_defs, var, state->impl);
}
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/61] nir: Add src/dest num_components helpers

2018-03-23 Thread Jason Ekstrand
We already have these for bit_size
---
 src/compiler/nir/nir.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 0d207d0..dcd7045 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -626,11 +626,23 @@ nir_src_bit_size(nir_src src)
 }
 
 static inline unsigned
+nir_src_num_components(nir_src src)
+{
+   return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components;
+}
+
+static inline unsigned
 nir_dest_bit_size(nir_dest dest)
 {
return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size;
 }
 
+static inline unsigned
+nir_dest_num_components(nir_dest dest)
+{
+   return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components;
+}
+
 void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
 void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/61] nir: Move to using instructions for derefs

2018-03-23 Thread Jason Ekstrand
This is something that Connor and I have been talking about for some time
now.  The basic idea is to replace the current singly linked nir_deref list
with deref instructions.  This is similar to what LLVM does and it offers
quite a bit more freedom when we start getting more realistic pointers from
compute applications.

This series implements an almost complete conversion for both i965 and anv.
The two remaining gaps are nir_lower_locals_to_regs and nir_lower_samplers.
The former will have to wait for ir3 to be converted and the later will
have to wait for radeonsi.  I've got patches for nir_lower_samplers but not
nir_lower_samplers_as_deref which is required by at least radeonsi.  Once
those are in place, we should be able to drop the lowering pass from the
Intel back-end completely.

The next step (which I will start on next week) will be removing legacy
derefs from core NIR.  This will also involve significant reworks in some
passes such as vars_to_ssa which still uses legacy derefs internally even
for things which use deref instructions.

Clearly, we can't remove anything until all of the other drivers are
converted.  However, this series should be a good basis for anyone wanting
to work on converting another driver since almost all of the core NIR
passes now work with both types of derefs so you can convert in whatever
way makes sense.

This series can be found as a branch on gitlab:

https://gitlab.freedesktop.org/jekstrand/mesa/commits/review/nir-deref-instrs-v1

Cc: Rob Clark 
Cc: Timothy Arceri 
Cc: Eric Anholt 
Cc: Connor Abbott 
Cc: Bas Nieuwenhuizen 
Cc: Karol Herbst 

Jason Ekstrand (61):
  nir: Add src/dest num_components helpers
  nir: Return a cursor from nir_instr_remove
  nir/vars_to_ssa: Remove copies from the correct set
  nir/lower_indirect_derefs: Support interp_var_at intrinsics
  intel/vec4: Set channel_sizes for MOV_INDIRECT sources
  nir/validator: Validate that all used variables exist
  nir: Add a deref instruction type
  nir/builder: Add deref building helpers
  nir: Add _deref versions of all of the _var intrinsics
  nir: Add deref sources to texture instructions
  nir: Add helpers for working with deref instructions
  anv,i965,radv,st,ir3: Call nir_lower_deref_instrs
  glsl/nir: Only claim to handle intrinsic functions
  glsl/nir: Use deref instructions instead of dref chains
  prog/nir: Simplify some load/store operations
  prog/nir: Use deref instructions for params
  nir/lower_atomics: Rework the main walker loop a bit
  nir: Support deref instructions in remove_dead_variables
  nir: Add a pass for fixing deref modes
  nir: Support deref instructions in lower_global_vars_to_local
  nir: Support deref instructions in lower_io_to_temporaries
  nir: Add a deref path helper struct
  nir: Support deref instructions in lower_var_copies
  nir: Support deref instructions in split_var_copies
  nir: Support deref instructions in lower_vars_to_ssa
  nir: Support deref instructions in lower_indirect_derefs
  nir/deref: Add a deref cleanup function
  nir: Support deref instructions in lower_system_values
  nir: Support deref instructions in lower_clip_cull
  nir: Support deref instructions in propagate_invariant
  nir: Support deref instructions in gather_info
  nir: Support deref instructions in lower_io
  nir: Support deref instructions in lower_atomics
  nir: Support deref instructions in lower_wpos_ytransform
  nir: Support deref instructions in lower_pos_center
  nir: Support deref instructions in remove_unused_varyings
  intel,ir3: Disable nir_opt_copy_prop_vars
  intel/nir: Fixup deref modes after lowering patch vertices
  i965: Move nir_lower_deref_instrs to right before locals_to_regs
  st/nir: Move lower_deref_instrs later
  spirv: Use deref instructions for most variables
  nir: Add a concept of per-member structs and a lowering pass
  nir/lower_system_values: Support SYSTEM_VALUE_LOCAL_GROUP_SIZE
  spirv: Use the LOCAL_GROUP_SIZE system value
  nir/spirv: Pass nir_variable_data into apply_var_decoration
  anv/pipeline: Lower more constant initializers earlier
  spirv: Use NIR per-member splitting
  spirv: Make push constants an offset-based pointer
  spirv: Clean up vtn_pointer_to_offset
  spirv: Allow pointers to have a deref at the base
  spirv: Update vtn_pointer_to/from_ssa to handle deref pointers
  spirv: Record the type of functions
  spirv/cfg: Make the builder fully capable for both walks
  nir,spirv: Rework function calls
  anv/pipeline: Do less deref instruction lowering
  anv/pipeline: Convert lower_input_attachments to deref instructions
  anv/pipeline: Convert YCbCr lowering to deref instructiosn
  anv/apply_pipeline_layout: Simplify extract_tex_src_plane
  anv/pipeline: Convert apply_pipeline_layout to deref instructions
  intel/fs: Use image_deref intrinsics instead of image_var
  intel/nir: Only lower load/store derefs

 

[Mesa-dev] [PATCH 02/61] nir: Return a cursor from nir_instr_remove

2018-03-23 Thread Jason Ekstrand
Because nir_instr_remove is an inline wrapper around nir_instr_remove_v,
the compiler should be able to tell that the return value is unused and
not emit the extra code in most cases.
---
 src/compiler/nir/nir.c|  2 +-
 src/compiler/nir/nir.h| 16 +++-
 src/compiler/nir/nir_opt_copy_prop_vars.c | 19 ++-
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index a97b119..b16d6fa 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -1158,7 +1158,7 @@ remove_defs_uses(nir_instr *instr)
nir_foreach_src(instr, remove_use_cb, instr);
 }
 
-void nir_instr_remove(nir_instr *instr)
+void nir_instr_remove_v(nir_instr *instr)
 {
remove_defs_uses(instr);
exec_node_remove(>node);
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index dcd7045..8176855 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2277,7 +2277,21 @@ nir_instr_insert_after_cf_list(struct exec_list *list, 
nir_instr *after)
nir_instr_insert(nir_after_cf_list(list), after);
 }
 
-void nir_instr_remove(nir_instr *instr);
+void nir_instr_remove_v(nir_instr *instr);
+
+static inline nir_cursor
+nir_instr_remove(nir_instr *instr)
+{
+   nir_cursor cursor;
+   nir_instr *prev = nir_instr_prev(instr);
+   if (prev) {
+  cursor = nir_after_instr(prev);
+   } else {
+  cursor = nir_before_block(instr->block);
+   }
+   nir_instr_remove_v(instr);
+   return cursor;
+}
 
 /** @} */
 
diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c 
b/src/compiler/nir/nir_opt_copy_prop_vars.c
index 89ddc8d..cc8f00f 100644
--- a/src/compiler/nir/nir_opt_copy_prop_vars.c
+++ b/src/compiler/nir/nir_opt_copy_prop_vars.c
@@ -349,21 +349,6 @@ store_to_entry(struct copy_prop_var_state *state, struct 
copy_entry *entry,
}
 }
 
-/* Remove an instruction and return a cursor pointing to where it was */
-static nir_cursor
-instr_remove_cursor(nir_instr *instr)
-{
-   nir_cursor cursor;
-   nir_instr *prev = nir_instr_prev(instr);
-   if (prev) {
-  cursor = nir_after_instr(prev);
-   } else {
-  cursor = nir_before_block(instr->block);
-   }
-   nir_instr_remove(instr);
-   return cursor;
-}
-
 /* Do a "load" from an SSA-based entry return it in "value" as a value with a
  * single SSA def.  Because an entry could reference up to 4 different SSA
  * defs, a vecN operation may be inserted to combine them into a single SSA
@@ -396,7 +381,7 @@ load_from_ssa_entry_value(struct copy_prop_var_state *state,
 
if (all_same) {
   /* Our work here is done */
-  b->cursor = instr_remove_cursor(>instr);
+  b->cursor = nir_instr_remove(>instr);
   intrin->instr.block = NULL;
   return true;
}
@@ -594,7 +579,7 @@ load_from_deref_entry_value(struct copy_prop_var_state 
*state,
   value_tail->child = nir_deref_clone(src_tail->child, value_tail);
}
 
-   b->cursor = instr_remove_cursor(>instr);
+   b->cursor = nir_instr_remove(>instr);
 
return true;
 }
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/61] intel/vec4: Set channel_sizes for MOV_INDIRECT sources

2018-03-23 Thread Jason Ekstrand
Otherwise, any indirect push constant access results in an assertion
failure when we start digging through the channel_sizes array.  This
fixes dEQP-VK.pipeline.push_constant.graphics_pipeline.dynamic_index_vert
on Haswell.  It should be a harmless no-op for GL since indirect push
constants aren't used there.

Fixes: e69e5c7006d "i965/vec4: load dvec3/4 uniforms first in the..."
---
 src/intel/compiler/brw_vec4.cpp | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index e483814..d1dd709 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -688,8 +688,11 @@ vec4_visitor::pack_uniform_registers()
   * the next part of our packing algorithm.
   */
  int reg = inst->src[0].nr;
- for (unsigned i = 0; i < vec4s_read; i++)
+ int channel_size = type_sz(inst->src[0].type) / 4;
+ for (unsigned i = 0; i < vec4s_read; i++) {
 chans_used[reg + i] = 4;
+channel_sizes[reg + i] = MAX2(channel_sizes[reg + i], 
channel_size);
+ }
   }
}
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/61] nir/vars_to_ssa: Remove copies from the correct set

2018-03-23 Thread Jason Ekstrand
Cc: mesa-sta...@lists.freedesktop.org
---
 src/compiler/nir/nir_lower_vars_to_ssa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c 
b/src/compiler/nir/nir_lower_vars_to_ssa.c
index e8cfe30..0cc6514 100644
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -464,7 +464,7 @@ lower_copies_to_load_store(struct deref_node *node,
 
  struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, 
copy);
  assert(arg_entry);
- _mesa_set_remove(node->copies, arg_entry);
+ _mesa_set_remove(arg_node->copies, arg_entry);
   }
 
   nir_instr_remove(>instr);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/61] nir/lower_indirect_derefs: Support interp_var_at intrinsics

2018-03-23 Thread Jason Ekstrand
This fixes the fs-interpolateAtCentroid-block-array piglit test on i965.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/compiler/nir/nir_lower_indirect_derefs.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c 
b/src/compiler/nir/nir_lower_indirect_derefs.c
index c949224..02f202d 100644
--- a/src/compiler/nir/nir_lower_indirect_derefs.c
+++ b/src/compiler/nir/nir_lower_indirect_derefs.c
@@ -95,9 +95,15 @@ emit_load_store(nir_builder *b, nir_intrinsic_instr 
*orig_instr,
if (src == NULL) {
   /* This is a load instruction */
   nir_intrinsic_instr *load =
- nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
+ nir_intrinsic_instr_create(b->shader, orig_instr->intrinsic);
   load->num_components = orig_instr->num_components;
   load->variables[0] = nir_deref_var_clone(deref, load);
+
+  /* Copy over any sources.  This is needed for interp_var_at */
+  for (unsigned i = 0;
+   i < nir_intrinsic_infos[orig_instr->intrinsic].num_srcs; i++)
+ nir_src_copy(>src[i], _instr->src[i], load);
+
   unsigned bit_size = orig_instr->dest.ssa.bit_size;
   nir_ssa_dest_init(>instr, >dest,
 load->num_components, bit_size, NULL);
@@ -142,6 +148,9 @@ lower_indirect_block(nir_block *block, nir_builder *b,
 
   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
   if (intrin->intrinsic != nir_intrinsic_load_var &&
+  intrin->intrinsic != nir_intrinsic_interp_var_at_centroid &&
+  intrin->intrinsic != nir_intrinsic_interp_var_at_sample &&
+  intrin->intrinsic != nir_intrinsic_interp_var_at_offset &&
   intrin->intrinsic != nir_intrinsic_store_var)
  continue;
 
@@ -158,7 +167,7 @@ lower_indirect_block(nir_block *block, nir_builder *b,
 
   b->cursor = nir_before_instr(>instr);
 
-  if (intrin->intrinsic == nir_intrinsic_load_var) {
+  if (intrin->intrinsic != nir_intrinsic_store_var) {
  nir_ssa_def *result;
  emit_load_store(b, intrin, intrin->variables[0],
  >variables[0]->deref, , NULL);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/61] nir: Add a deref instruction type

2018-03-23 Thread Jason Ekstrand
This commit adds a new instruction type to NIR for handling derefs.
Nothing uses it yet but this adds the data structure as well as all of
the code to validate, print, clone, and [de]serialize them.
---
 src/compiler/nir/nir.c| 50 +++
 src/compiler/nir/nir.h| 58 -
 src/compiler/nir/nir_clone.c  | 42 
 src/compiler/nir/nir_instr_set.c  | 78 +
 src/compiler/nir/nir_opt_copy_propagate.c | 62 +++
 src/compiler/nir/nir_opt_dce.c|  7 +++
 src/compiler/nir/nir_print.c  | 56 +
 src/compiler/nir/nir_serialize.c  | 81 ++
 src/compiler/nir/nir_validate.c   | 83 +++
 9 files changed, 506 insertions(+), 11 deletions(-)

diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index b16d6fa..2ed96a1 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -469,6 +469,26 @@ nir_alu_instr_create(nir_shader *shader, nir_op op)
return instr;
 }
 
+nir_deref_instr *
+nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
+{
+   nir_deref_instr *instr =
+  rzalloc_size(shader, sizeof(nir_deref_instr));
+
+   instr_init(>instr, nir_instr_type_deref);
+
+   instr->deref_type = deref_type;
+   if (deref_type != nir_deref_type_var)
+  src_init(>parent);
+
+   if (deref_type == nir_deref_type_array)
+  src_init(>arr.index);
+
+   dest_init(>dest);
+
+   return instr;
+}
+
 nir_jump_instr *
 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
 {
@@ -1198,6 +1218,12 @@ visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb 
cb, void *state)
 }
 
 static bool
+visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state)
+{
+   return cb(>dest, state);
+}
+
+static bool
 visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
  void *state)
 {
@@ -1238,6 +1264,8 @@ nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb 
cb, void *state)
switch (instr->type) {
case nir_instr_type_alu:
   return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
+   case nir_instr_type_deref:
+  return visit_deref_dest(nir_instr_as_deref(instr), cb, state);
case nir_instr_type_intrinsic:
   return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
case nir_instr_type_tex:
@@ -1283,6 +1311,7 @@ nir_foreach_ssa_def(nir_instr *instr, 
nir_foreach_ssa_def_cb cb, void *state)
 {
switch (instr->type) {
case nir_instr_type_alu:
+   case nir_instr_type_deref:
case nir_instr_type_tex:
case nir_instr_type_intrinsic:
case nir_instr_type_phi:
@@ -1349,6 +1378,23 @@ visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb 
cb, void *state)
 }
 
 static bool
+visit_deref_instr_src(nir_deref_instr *instr,
+  nir_foreach_src_cb cb, void *state)
+{
+   if (instr->deref_type != nir_deref_type_var) {
+  if (!visit_src(>parent, cb, state))
+ return false;
+   }
+
+   if (instr->deref_type == nir_deref_type_array) {
+  if (!visit_src(>arr.index, cb, state))
+ return false;
+   }
+
+   return true;
+}
+
+static bool
 visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
 {
for (unsigned i = 0; i < instr->num_srcs; i++) {
@@ -1436,6 +1482,10 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, 
void *state)
   if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
  return false;
   break;
+   case nir_instr_type_deref:
+  if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state))
+ return false;
+  break;
case nir_instr_type_intrinsic:
   if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
  return false;
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 8176855..b02c241 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -421,6 +421,7 @@ typedef struct nir_register {
 
 typedef enum {
nir_instr_type_alu,
+   nir_instr_type_deref,
nir_instr_type_call,
nir_instr_type_tex,
nir_instr_type_intrinsic,
@@ -888,7 +889,9 @@ bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const 
nir_alu_instr *alu2,
 typedef enum {
nir_deref_type_var,
nir_deref_type_array,
-   nir_deref_type_struct
+   nir_deref_type_array_wildcard,
+   nir_deref_type_struct,
+   nir_deref_type_cast,
 } nir_deref_type;
 
 typedef struct nir_deref {
@@ -950,6 +953,56 @@ nir_deref_tail(nir_deref *deref)
 typedef struct {
nir_instr instr;
 
+   /** The type of this deref instruction */
+   nir_deref_type deref_type;
+
+   /** The mode of the underlying variable */
+   nir_variable_mode mode;
+
+   /** The dereferenced type of the resulting pointer value */
+   const struct glsl_type *type;
+
+   union {
+  /** Variable being dereferenced if deref_type is a deref_var */
+ 

Re: [Mesa-dev] [PATCH v3 06/19] RFC: nir/vtn: "raw" pointer support

2018-03-23 Thread Jason Ekstrand
On Fri, Mar 23, 2018 at 2:15 PM, Karol Herbst  wrote:

> On Fri, Mar 23, 2018 at 10:07 PM, Jason Ekstrand 
> wrote:
> > +list
> >
> > On Fri, Mar 23, 2018 at 1:45 PM, Karol Herbst 
> wrote:
> >>
> >> On Fri, Mar 23, 2018 at 9:30 PM, Jason Ekstrand 
> >> wrote:
> >> > As I've been rewriting core NIR deref handling, I've been thinking
> about
> >> > this problem quite a bit.  One objective I have is to actually make
> UBO
> >> > and
> >> > SSBO access go through derefs instead of just being an offset and
> index
> >> > so
> >> > that the compiler can better reason about them.  In particular, I want
> >> > to be
> >> > able to start doing load/store elimination on SSBOs, SLM, and whatever
> >> > CL
> >> > has which would be great for everyone's compute performance (GL,
> Vulkan,
> >> > CL,
> >> > etc.).
> >> >
> >> > I would be lying if I said I had a full plan but I do have part of a
> >> > plan.
> >> > In my patch which adds the deref instructions, I add a new "cast"
> deref
> >> > type
> >> > which takes an arbitrary value as it's source and kicks out a deref
> with
> >> > a
> >> > type.  Whenever we discover that the source of the cast is actually
> >> > another
> >> > deref which is compatible (same type etc.), copy propagation gets rid
> of
> >> > the
> >> > cast for you.  The idea is that, instead of doing a load_raw(raw_ptr),
> >> > you
> >> > would do a load((type *)raw_ptr).
> >> >
> >> > Right now, most of the core NIR optimizations will throw a fit if they
> >> > ever
> >> > see a cast.  This is intentional because it requires us to manually go
> >> > through and handle casts.  This would mean that, at the moment, you
> >> > would
> >> > have to lower to load_raw intrinsics almost immediately after coming
> out
> >> > of
> >> > SPIR-V.
> >> >
> >>
> >> Well it gets more fun with OpenCL 2.0 where you can have generic
> >> pointer where you only know the type at creation type. You can also
> >> declare generic pointers as function inputs in a way, that you never
> >> actually know from where you have to load if you only have that one
> >> function. So the actual load operation depends on when you create the
> >> initial pointer variable (you can cast from X to generic, but not the
> >> other way around).
> >>
> >> Which in the end means you can end up with load(generic_ptr) and only
> >> following the chain up to it's creation (with function inlining in
> >> mind) you know the actual memory target.
> >
> >
> > Yup.  And there will always be crazy cases where you can't actually
> follow
> > it and you have to emit a pile of code to load different ways depending
> on
> > some bits somewhere that tell you how to load it.  I'm well aware of the
> > insanity. :-)  This is part of the reason why I'm glad I'm not trying to
> > write an OpenCL 2.0 driver.
> >
> > This insanity is exactly why I'm suggesting the pointer casting.  Sure,
> you
> > may not know the data type until the actual load.  In that case, you end
> up
> > with the cast being right before the load.  If you don't know the storage
> > class, maybe you have to switch and do multiple casts based on some bits.
> > Alternatively, if you don't know the storage class, we can just let the
> > deref mode be 0 for "I don't know". or maybe multiple bits for "these are
> > the things it might be".  In any case, I think we can handle it.
> >
>
> there shouldn't be a situation where we don't know, except when you
> don't inline all functions. I think Rob had the idea of fat pointers
> where a pointer is a vec2 and the 2nd component contains the actual
> pointer type and you end up with a switch over the type to get the
> correct storage class. And if the compiler inlines all functions, it
> should be able to optimize that switch away.
>

Right.  Today, we live in a world where all functions are inlined.  Sadly,
I fear that world may come to and end one of these days. :(


> > It's insane but we need some sort of structure to be able to reason about
> > the insanity.  Immediately lowering everything to load_raw is a good way
> to
> > get a driver off the ground.  What it's not so good for is making an
> > optimizing compiler that can reason about these crazy pointers and
> actually
> > optimize them.  Lest I sound too negative, I'm 100% fine with taking a
> short
> > path to getting something working now so long as it doesn't cloud up our
> > ability to do better in the future.
> >
> >>
> >> And I think the issue here is not that it is some kind of raw pointer
> >> in the patch, but more like an unbound/physical pointer, which doesn't
> >> relate to any variable. It is just a value like any other int/long as
> >> well.
> >>
> >> > On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst 
> >> > wrote:
> >> >>
> >> >> From: Rob Clark 
> >> >>
> >> >> An attempt to add physical pointer support to vtn.  I'm not totally
> >> >> happy about the 

Re: [Mesa-dev] [PATCH v3 06/19] RFC: nir/vtn: "raw" pointer support

2018-03-23 Thread Karol Herbst
On Fri, Mar 23, 2018 at 10:07 PM, Jason Ekstrand  wrote:
> +list
>
> On Fri, Mar 23, 2018 at 1:45 PM, Karol Herbst  wrote:
>>
>> On Fri, Mar 23, 2018 at 9:30 PM, Jason Ekstrand 
>> wrote:
>> > As I've been rewriting core NIR deref handling, I've been thinking about
>> > this problem quite a bit.  One objective I have is to actually make UBO
>> > and
>> > SSBO access go through derefs instead of just being an offset and index
>> > so
>> > that the compiler can better reason about them.  In particular, I want
>> > to be
>> > able to start doing load/store elimination on SSBOs, SLM, and whatever
>> > CL
>> > has which would be great for everyone's compute performance (GL, Vulkan,
>> > CL,
>> > etc.).
>> >
>> > I would be lying if I said I had a full plan but I do have part of a
>> > plan.
>> > In my patch which adds the deref instructions, I add a new "cast" deref
>> > type
>> > which takes an arbitrary value as it's source and kicks out a deref with
>> > a
>> > type.  Whenever we discover that the source of the cast is actually
>> > another
>> > deref which is compatible (same type etc.), copy propagation gets rid of
>> > the
>> > cast for you.  The idea is that, instead of doing a load_raw(raw_ptr),
>> > you
>> > would do a load((type *)raw_ptr).
>> >
>> > Right now, most of the core NIR optimizations will throw a fit if they
>> > ever
>> > see a cast.  This is intentional because it requires us to manually go
>> > through and handle casts.  This would mean that, at the moment, you
>> > would
>> > have to lower to load_raw intrinsics almost immediately after coming out
>> > of
>> > SPIR-V.
>> >
>>
>> Well it gets more fun with OpenCL 2.0 where you can have generic
>> pointer where you only know the type at creation type. You can also
>> declare generic pointers as function inputs in a way, that you never
>> actually know from where you have to load if you only have that one
>> function. So the actual load operation depends on when you create the
>> initial pointer variable (you can cast from X to generic, but not the
>> other way around).
>>
>> Which in the end means you can end up with load(generic_ptr) and only
>> following the chain up to it's creation (with function inlining in
>> mind) you know the actual memory target.
>
>
> Yup.  And there will always be crazy cases where you can't actually follow
> it and you have to emit a pile of code to load different ways depending on
> some bits somewhere that tell you how to load it.  I'm well aware of the
> insanity. :-)  This is part of the reason why I'm glad I'm not trying to
> write an OpenCL 2.0 driver.
>
> This insanity is exactly why I'm suggesting the pointer casting.  Sure, you
> may not know the data type until the actual load.  In that case, you end up
> with the cast being right before the load.  If you don't know the storage
> class, maybe you have to switch and do multiple casts based on some bits.
> Alternatively, if you don't know the storage class, we can just let the
> deref mode be 0 for "I don't know". or maybe multiple bits for "these are
> the things it might be".  In any case, I think we can handle it.
>

there shouldn't be a situation where we don't know, except when you
don't inline all functions. I think Rob had the idea of fat pointers
where a pointer is a vec2 and the 2nd component contains the actual
pointer type and you end up with a switch over the type to get the
correct storage class. And if the compiler inlines all functions, it
should be able to optimize that switch away.

> It's insane but we need some sort of structure to be able to reason about
> the insanity.  Immediately lowering everything to load_raw is a good way to
> get a driver off the ground.  What it's not so good for is making an
> optimizing compiler that can reason about these crazy pointers and actually
> optimize them.  Lest I sound too negative, I'm 100% fine with taking a short
> path to getting something working now so long as it doesn't cloud up our
> ability to do better in the future.
>
>>
>> And I think the issue here is not that it is some kind of raw pointer
>> in the patch, but more like an unbound/physical pointer, which doesn't
>> relate to any variable. It is just a value like any other int/long as
>> well.
>>
>> > On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst 
>> > wrote:
>> >>
>> >> From: Rob Clark 
>> >>
>> >> An attempt to add physical pointer support to vtn.  I'm not totally
>> >> happy about the handling of logical pointers vs physical pointers.
>> >> So this is really more of an RFS (request for suggestions)
>> >>
>> >> v2: treat vec3 types as vec4 when dereferencing
>> >>
>> >> Signed-off-by: Karol Herbst 
>> >> ---
>> >>  src/compiler/spirv/spirv_to_nir.c  |  87 ---
>> >>  src/compiler/spirv/vtn_private.h   |  20 ++-
>> >>  src/compiler/spirv/vtn_variables.c | 300
>> >> 

Re: [Mesa-dev] [PATCH v3 06/19] RFC: nir/vtn: "raw" pointer support

2018-03-23 Thread Jason Ekstrand
+list

On Fri, Mar 23, 2018 at 1:45 PM, Karol Herbst  wrote:

> On Fri, Mar 23, 2018 at 9:30 PM, Jason Ekstrand 
> wrote:
> > As I've been rewriting core NIR deref handling, I've been thinking about
> > this problem quite a bit.  One objective I have is to actually make UBO
> and
> > SSBO access go through derefs instead of just being an offset and index
> so
> > that the compiler can better reason about them.  In particular, I want
> to be
> > able to start doing load/store elimination on SSBOs, SLM, and whatever CL
> > has which would be great for everyone's compute performance (GL, Vulkan,
> CL,
> > etc.).
> >
> > I would be lying if I said I had a full plan but I do have part of a
> plan.
> > In my patch which adds the deref instructions, I add a new "cast" deref
> type
> > which takes an arbitrary value as it's source and kicks out a deref with
> a
> > type.  Whenever we discover that the source of the cast is actually
> another
> > deref which is compatible (same type etc.), copy propagation gets rid of
> the
> > cast for you.  The idea is that, instead of doing a load_raw(raw_ptr),
> you
> > would do a load((type *)raw_ptr).
> >
> > Right now, most of the core NIR optimizations will throw a fit if they
> ever
> > see a cast.  This is intentional because it requires us to manually go
> > through and handle casts.  This would mean that, at the moment, you would
> > have to lower to load_raw intrinsics almost immediately after coming out
> of
> > SPIR-V.
> >
>
> Well it gets more fun with OpenCL 2.0 where you can have generic
> pointer where you only know the type at creation type. You can also
> declare generic pointers as function inputs in a way, that you never
> actually know from where you have to load if you only have that one
> function. So the actual load operation depends on when you create the
> initial pointer variable (you can cast from X to generic, but not the
> other way around).
>
> Which in the end means you can end up with load(generic_ptr) and only
> following the chain up to it's creation (with function inlining in
> mind) you know the actual memory target.
>

Yup.  And there will always be crazy cases where you can't actually follow
it and you have to emit a pile of code to load different ways depending on
some bits somewhere that tell you how to load it.  I'm well aware of the
insanity. :-)  This is part of the reason why I'm glad I'm not trying to
write an OpenCL 2.0 driver.

This insanity is exactly why I'm suggesting the pointer casting.  Sure, you
may not know the data type until the actual load.  In that case, you end up
with the cast being right before the load.  If you don't know the storage
class, maybe you have to switch and do multiple casts based on some bits.
Alternatively, if you don't know the storage class, we can just let the
deref mode be 0 for "I don't know". or maybe multiple bits for "these are
the things it might be".  In any case, I think we can handle it.

It's insane but we need some sort of structure to be able to reason about
the insanity.  Immediately lowering everything to load_raw is a good way to
get a driver off the ground.  What it's not so good for is making an
optimizing compiler that can reason about these crazy pointers and actually
optimize them.  Lest I sound too negative, I'm 100% fine with taking a
short path to getting something working now so long as it doesn't cloud up
our ability to do better in the future.


> And I think the issue here is not that it is some kind of raw pointer
> in the patch, but more like an unbound/physical pointer, which doesn't
> relate to any variable. It is just a value like any other int/long as
> well.
>
> > On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst 
> wrote:
> >>
> >> From: Rob Clark 
> >>
> >> An attempt to add physical pointer support to vtn.  I'm not totally
> >> happy about the handling of logical pointers vs physical pointers.
> >> So this is really more of an RFS (request for suggestions)
> >>
> >> v2: treat vec3 types as vec4 when dereferencing
> >>
> >> Signed-off-by: Karol Herbst 
> >> ---
> >>  src/compiler/spirv/spirv_to_nir.c  |  87 ---
> >>  src/compiler/spirv/vtn_private.h   |  20 ++-
> >>  src/compiler/spirv/vtn_variables.c | 300
> >> -
> >>  3 files changed, 347 insertions(+), 60 deletions(-)
> >>
> >> diff --git a/src/compiler/spirv/spirv_to_nir.c
> >> b/src/compiler/spirv/spirv_to_nir.c
> >> index 334bcab9a82..d58a68f80ef 100644
> >> --- a/src/compiler/spirv/spirv_to_nir.c
> >> +++ b/src/compiler/spirv/spirv_to_nir.c
> >> @@ -572,6 +572,7 @@ vtn_types_compatible(struct vtn_builder *b,
> >>   vtn_types_compatible(b, t1->array_element,
> >> t2->array_element);
> >>
> >> case vtn_base_type_pointer:
> >> +   case vtn_base_type_raw_pointer:
> >>return vtn_types_compatible(b, t1->deref, t2->deref);
> >>
> >> case 

Re: [Mesa-dev] [PATCH 1/6] i965: Add negative_equals methods

2018-03-23 Thread Ian Romanick
On 03/23/2018 12:17 PM, Chema Casanova wrote:
> 
> 
> On 23/03/18 19:27, Matt Turner wrote:
>> On Wed, Mar 21, 2018 at 5:58 PM, Ian Romanick  wrote:
>>> From: Ian Romanick 
>>>
>>> This method is similar to the existing ::equals methods.  Instead of
>>> testing that two src_regs are equal to each other, it tests that one is
>>> the negation of the other.
>>>
>>> v2: Simplify various checks based on suggestions from Matt.  Use
>>> src_reg::type instead of fixed_hw_reg.type in a check.  Also suggested
>>> by Matt.
>>>
>>> v3: Rebase on 3 years.  Fix some problems with negative_equals with VF
>>> constants.  Add fs_reg::negative_equals.
>>>
>>> Signed-off-by: Ian Romanick 
>>> ---
>>>  src/intel/compiler/brw_fs.cpp |  7 ++
>>>  src/intel/compiler/brw_ir_fs.h|  1 +
>>>  src/intel/compiler/brw_ir_vec4.h  |  1 +
>>>  src/intel/compiler/brw_reg.h  | 46 
>>> +++
>>>  src/intel/compiler/brw_shader.cpp |  6 +
>>>  src/intel/compiler/brw_shader.h   |  1 +
>>>  src/intel/compiler/brw_vec4.cpp   |  7 ++
>>>  7 files changed, 69 insertions(+)
>>>
>>> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
>>> index 6eea532..3d454c3 100644
>>> --- a/src/intel/compiler/brw_fs.cpp
>>> +++ b/src/intel/compiler/brw_fs.cpp
>>> @@ -454,6 +454,13 @@ fs_reg::equals(const fs_reg ) const
>>>  }
>>>
>>>  bool
>>> +fs_reg::negative_equals(const fs_reg ) const
>>> +{
>>> +   return (this->backend_reg::negative_equals(r) &&
>>> +   stride == r.stride);
>>> +}
>>> +
>>> +bool
>>>  fs_reg::is_contiguous() const
>>>  {
>>> return stride == 1;
>>> diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
>>> index 54797ff..f06a33c 100644
>>> --- a/src/intel/compiler/brw_ir_fs.h
>>> +++ b/src/intel/compiler/brw_ir_fs.h
>>> @@ -41,6 +41,7 @@ public:
>>> fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type);
>>>
>>> bool equals(const fs_reg ) const;
>>> +   bool negative_equals(const fs_reg ) const;
>>> bool is_contiguous() const;
>>>
>>> /**
>>> diff --git a/src/intel/compiler/brw_ir_vec4.h 
>>> b/src/intel/compiler/brw_ir_vec4.h
>>> index cbaff2f..95c5119 100644
>>> --- a/src/intel/compiler/brw_ir_vec4.h
>>> +++ b/src/intel/compiler/brw_ir_vec4.h
>>> @@ -43,6 +43,7 @@ public:
>>> src_reg(struct ::brw_reg reg);
>>>
>>> bool equals(const src_reg ) const;
>>> +   bool negative_equals(const src_reg ) const;
>>>
>>> src_reg(class vec4_visitor *v, const struct glsl_type *type);
>>> src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
>>> diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h
>>> index 7ad144b..732bddf 100644
>>> --- a/src/intel/compiler/brw_reg.h
>>> +++ b/src/intel/compiler/brw_reg.h
>>> @@ -255,6 +255,52 @@ brw_regs_equal(const struct brw_reg *a, const struct 
>>> brw_reg *b)
>>> return a->bits == b->bits && (df ? a->u64 == b->u64 : a->ud == b->ud);
>>>  }
>>>
>>> +static inline bool
>>> +brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b)
>>> +{
>>> +   if (a->file == IMM) {
>>> +  if (a->bits != b->bits)
>>> + return false;
>>> +
>>> +  switch (a->type) {
>>> +  case BRW_REGISTER_TYPE_UQ:
>>> +  case BRW_REGISTER_TYPE_Q:
>>> + return a->d64 == -b->d64;
>>> +  case BRW_REGISTER_TYPE_DF:
>>> + return a->df == -b->df;
>>> +  case BRW_REGISTER_TYPE_UD:
>>> +  case BRW_REGISTER_TYPE_D:
>>> + return a->d == -b->d;
>>> +  case BRW_REGISTER_TYPE_F:
>>> + return a->f == -b->f;
>>> +  case BRW_REGISTER_TYPE_VF:
>>> + /* It is tempting to treat 0 as a negation of 0 (and -0 as a 
>>> negation
>>> +  * of -0).  There are occasions where 0 or -0 is used and the 
>>> exact
>>> +  * bit pattern is desired.  At the very least, changing this to 
>>> allow
>>> +  * 0 as a negation of 0 causes some fp64 tests to fail on IVB.
>>> +  */
>>> + return a->ud == (b->ud ^ 0x80808080);
>>> +  case BRW_REGISTER_TYPE_UW:
>>> +  case BRW_REGISTER_TYPE_W:
>>> +  case BRW_REGISTER_TYPE_UV:
>>> +  case BRW_REGISTER_TYPE_V:
>>> +  case BRW_REGISTER_TYPE_HF:
>>> +  case BRW_REGISTER_TYPE_UB:
>>> +  case BRW_REGISTER_TYPE_B:
>>
>> There are no B/UB immediates, so you can move these to default. In
>> fact, I'd get rid of the default so we'll get a warning if there are
>> unhandled types. Probably the only one not already in the list is NF,
>> which should also be unreachable.
> 
>> Returning false for unimplemented types seems fine. Immediates of
>> those types are sufficiently rare that I don't expect this to catch
>> anything, and in the rare occurrence that it does I wouldn't want the
>> compiler to assert fail or do something undefined. Really I only
>> expect HF to ever get hit, and only after we actually start using it.
> 

Re: [Mesa-dev] [PATCH v3 15/19] nir: use load_local_group_size

2018-03-23 Thread Jason Ekstrand
On Fri, Mar 23, 2018 at 1:35 PM, Karol Herbst  wrote:

> On Fri, Mar 23, 2018 at 9:18 PM, Jason Ekstrand 
> wrote:
> > On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst 
> wrote:
> >>
> >> From: Rob Clark 
> >>
> >> If local_size is not known at compile time, which is the case with
> >> clover, use the load_local_group_size intrinsic instead.
> >>
> >> Signed-off-by: Karol Herbst 
> >> ---
> >>  src/compiler/nir/nir_lower_system_values.c | 25
> +
> >>  1 file changed, 17 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/src/compiler/nir/nir_lower_system_values.c
> >> b/src/compiler/nir/nir_lower_system_values.c
> >> index d507c28f421..ff4e09c8e61 100644
> >> --- a/src/compiler/nir/nir_lower_system_values.c
> >> +++ b/src/compiler/nir/nir_lower_system_values.c
> >> @@ -57,19 +57,28 @@ convert_block(nir_block *block, nir_builder *b)
> >>*"The value of gl_GlobalInvocationID is equal to
> >>*gl_WorkGroupID * gl_WorkGroupSize +
> gl_LocalInvocationID"
> >>*/
> >> + nir_ssa_def *local_size_def;
> >>
> >> - nir_const_value local_size;
> >> - memset(_size, 0, sizeof(local_size));
> >> - local_size.u64[0] = b->shader->info.cs.local_size[0];
> >> - local_size.u64[1] = b->shader->info.cs.local_size[1];
> >> - local_size.u64[2] = b->shader->info.cs.local_size[2];
> >> + /* if local_size[] is already known, use that, otherwise use
> >> +  * load_local_group_size intrinsic:
> >> +  */
> >> + if (b->shader->info.cs.local_size[0]) {
> >> +nir_const_value local_size;
> >> +memset(_size, 0, sizeof(local_size));
> >> +local_size.u64[0] = b->shader->info.cs.local_size[0];
> >> +local_size.u64[1] = b->shader->info.cs.local_size[1];
> >> +local_size.u64[2] = b->shader->info.cs.local_size[2];
> >> +
> >> +local_size_def = nir_build_imm(b, 3, bit_size, local_size);
> >>
> >> + } else {
> >> +local_size_def = nir_load_local_group_size(b, bit_size);
> >> + }
> >
> >
> > I commented on an earlier patch about how the approach to building the
> > 32/64-bit immediates is wrong.
> >
>
> oh right, I totally forgot about that.
>
> > Setting that aside, this patch looks fine to me in principal.  There's a
> > part of me that doesn't like using cs.local_size[0] being the trigger
> but I
> > think it's probably ok.  Maybe we should assert that cs_local_size is
> either
> > all zero (second case) or all not zero (first case) just to be safe.
> >
>
> I think the main problem here is, that even with OpenCL kernels you
> can specify it, but then overwrite it at runtime again. So yes I
> agree, that we need something better here.
>

Oh, that's tricky then.  We could make nir_lower_system_values take a flag
or OpenCL callers could just whack it all to 0 after spirv_to_nir.c.  Or
you could do recompiles or something.

I think this looks good for now and we can let OpenCL users of NIR whack it
to 0.  It's a fairly obvious behavior of "if you don't have it, load it"
and we can let the CL driver sort out how they want to handle recompiles.


> >>
> >>
> >>   nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);
> >>   nir_ssa_def *local_id = nir_load_local_invocation_id(b,
> >> bit_size);
> >>
> >> - sysval = nir_iadd(b, nir_imul(b, group_id,
> >> -   nir_build_imm(b, 3, bit_size,
> >> local_size)),
> >> -  local_id);
> >> + sysval = nir_iadd(b, nir_imul(b, group_id, local_size_def),
> >> +   local_id);
> >>   break;
> >>}
> >>
> >> --
> >> 2.14.3
> >>
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
> >
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/vec4: Fix null destination register in 3-source instructions

2018-03-23 Thread Ian Romanick
From: Ian Romanick 

A recent commit (see below) triggered some cases where conditional
modifier propagation and dead code elimination would cause a MAD
instruction like the following to be generated:

mad.l.f0  null, ...

Matt pointed out that fs_visitor::fixup_3src_null_dest() fixes cases
like this in the scalar backend.  This commit basically ports that code
to the vec4 backend.

NOTE: I have sent a couple tests to the piglit list that reproduce this
bug *without* the commit mentioned below.  This commit fixes those
tests.

Signed-off-by: Ian Romanick 
Cc: Tapani Pälli 
Cc: Matt Turner 
Cc: mesa-sta...@lists.freedesktop.org
Fixes: ee63933a7 ("nir: Distribute binary operations with constants into bcsel")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105704
---
 src/intel/compiler/brw_vec4.cpp | 26 ++
 src/intel/compiler/brw_vec4.h   |  1 +
 2 files changed, 27 insertions(+)

diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index e483814..fb8ffee 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -1945,6 +1945,30 @@ is_align1_df(vec4_instruction *inst)
}
 }
 
+/**
+ * Three source instruction must have a GRF/MRF destination register.
+ * ARF NULL is not allowed.  Fix that up by allocating a temporary GRF.
+ */
+void
+vec4_visitor::fixup_3src_null_dest()
+{
+   bool progress = false;
+
+   foreach_block_and_inst_safe (block, vec4_instruction, inst, cfg) {
+  if (inst->is_3src(devinfo) && inst->dst.is_null()) {
+ const unsigned size_written = type_sz(inst->dst.type);
+ const unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE);
+
+ inst->dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)),
+inst->dst.type);
+ progress = true;
+  }
+   }
+
+   if (progress)
+  invalidate_live_intervals();
+}
+
 void
 vec4_visitor::convert_to_hw_regs()
 {
@@ -2696,6 +2720,8 @@ vec4_visitor::run()
   OPT(scalarize_df);
}
 
+   fixup_3src_null_dest();
+
bool allocated_without_spills = reg_allocate();
 
if (!allocated_without_spills) {
diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h
index 39ce51c..71880db 100644
--- a/src/intel/compiler/brw_vec4.h
+++ b/src/intel/compiler/brw_vec4.h
@@ -158,6 +158,7 @@ public:
void opt_set_dependency_control();
void opt_schedule_instructions();
void convert_to_hw_regs();
+   void fixup_3src_null_dest();
 
bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
bool lower_simd_width();
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 15/19] nir: use load_local_group_size

2018-03-23 Thread Karol Herbst
On Fri, Mar 23, 2018 at 9:18 PM, Jason Ekstrand  wrote:
> On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst  wrote:
>>
>> From: Rob Clark 
>>
>> If local_size is not known at compile time, which is the case with
>> clover, use the load_local_group_size intrinsic instead.
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/compiler/nir/nir_lower_system_values.c | 25 +
>>  1 file changed, 17 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/compiler/nir/nir_lower_system_values.c
>> b/src/compiler/nir/nir_lower_system_values.c
>> index d507c28f421..ff4e09c8e61 100644
>> --- a/src/compiler/nir/nir_lower_system_values.c
>> +++ b/src/compiler/nir/nir_lower_system_values.c
>> @@ -57,19 +57,28 @@ convert_block(nir_block *block, nir_builder *b)
>>*"The value of gl_GlobalInvocationID is equal to
>>*gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
>>*/
>> + nir_ssa_def *local_size_def;
>>
>> - nir_const_value local_size;
>> - memset(_size, 0, sizeof(local_size));
>> - local_size.u64[0] = b->shader->info.cs.local_size[0];
>> - local_size.u64[1] = b->shader->info.cs.local_size[1];
>> - local_size.u64[2] = b->shader->info.cs.local_size[2];
>> + /* if local_size[] is already known, use that, otherwise use
>> +  * load_local_group_size intrinsic:
>> +  */
>> + if (b->shader->info.cs.local_size[0]) {
>> +nir_const_value local_size;
>> +memset(_size, 0, sizeof(local_size));
>> +local_size.u64[0] = b->shader->info.cs.local_size[0];
>> +local_size.u64[1] = b->shader->info.cs.local_size[1];
>> +local_size.u64[2] = b->shader->info.cs.local_size[2];
>> +
>> +local_size_def = nir_build_imm(b, 3, bit_size, local_size);
>>
>> + } else {
>> +local_size_def = nir_load_local_group_size(b, bit_size);
>> + }
>
>
> I commented on an earlier patch about how the approach to building the
> 32/64-bit immediates is wrong.
>

oh right, I totally forgot about that.

> Setting that aside, this patch looks fine to me in principal.  There's a
> part of me that doesn't like using cs.local_size[0] being the trigger but I
> think it's probably ok.  Maybe we should assert that cs_local_size is either
> all zero (second case) or all not zero (first case) just to be safe.
>

I think the main problem here is, that even with OpenCL kernels you
can specify it, but then overwrite it at runtime again. So yes I
agree, that we need something better here.

>>
>>
>>   nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);
>>   nir_ssa_def *local_id = nir_load_local_invocation_id(b,
>> bit_size);
>>
>> - sysval = nir_iadd(b, nir_imul(b, group_id,
>> -   nir_build_imm(b, 3, bit_size,
>> local_size)),
>> -  local_id);
>> + sysval = nir_iadd(b, nir_imul(b, group_id, local_size_def),
>> +   local_id);
>>   break;
>>}
>>
>> --
>> 2.14.3
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 12/19] nir: specify bit_size when loading system values

2018-03-23 Thread Karol Herbst
On Fri, Mar 23, 2018 at 9:15 PM, Jason Ekstrand  wrote:
> On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst  wrote:
>>
>> With OpenCL the size of some system value depends on the Physical model
>> choosen, so we need a way to load any system value as 32 or 64 bit.
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/compiler/nir/nir_builder.h   | 10 +---
>>  src/compiler/nir/nir_lower_alpha_test.c  |  2 +-
>>  src/compiler/nir/nir_lower_clip.c|  3 ++-
>>  src/compiler/nir/nir_lower_subgroups.c   |  8 +++---
>>  src/compiler/nir/nir_lower_system_values.c   | 31
>> 
>>  src/compiler/nir/nir_lower_two_sided_color.c |  2 +-
>>  src/compiler/nir/nir_lower_wpos_center.c |  2 +-
>>  src/compiler/spirv/vtn_subgroup.c|  2 +-
>>  src/gallium/auxiliary/nir/tgsi_to_nir.c  |  3 ++-
>>  src/intel/blorp/blorp_blit.c |  2 +-
>>  src/intel/blorp/blorp_clear.c|  2 +-
>>  src/intel/compiler/brw_nir_lower_cs_intrinsics.c |  6 ++---
>>  src/mesa/drivers/dri/i965/brw_tcs.c  |  2 +-
>>  13 files changed, 40 insertions(+), 35 deletions(-)
>>
>> diff --git a/src/compiler/nir/nir_builder.h
>> b/src/compiler/nir/nir_builder.h
>> index 36e0ae3ac63..4e93cd08169 100644
>> --- a/src/compiler/nir/nir_builder.h
>> +++ b/src/compiler/nir/nir_builder.h
>> @@ -612,13 +612,14 @@ nir_copy_var(nir_builder *build, nir_variable *dest,
>> nir_variable *src)
>>
>>  /* Generic builder for system values. */
>>  static inline nir_ssa_def *
>> -nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
>> +nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index,
>> +  unsigned bit_size)
>>  {
>> nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader,
>> op);
>> load->num_components = nir_intrinsic_infos[op].dest_components;
>> load->const_index[0] = index;
>> nir_ssa_dest_init(>instr, >dest,
>> - nir_intrinsic_infos[op].dest_components, 32, NULL);
>> + nir_intrinsic_infos[op].dest_components, bit_size,
>> NULL);
>> nir_builder_instr_insert(build, >instr);
>> return >dest.ssa;
>>  }
>> @@ -630,9 +631,10 @@ nir_load_system_value(nir_builder *build,
>> nir_intrinsic_op op, int index)
>>
>>  #define DEFINE_SYSTEM_VALUE(name)
>> \
>> static inline nir_ssa_def *
>> \
>> -   nir_load_##name(nir_builder *build)
>> \
>> +   nir_load_##name(nir_builder *build, unsigned bit_size)
>> \
>
>
> I was really hoping that this change wouldn't touch every single intrinsic
> helper.  Maybe with Rob's python-based intrinsics table we can do something
> better.
>

I was kind of thinking of declaring builtins as either 32, 64 or 32/64
bit and just generate a function with a bit_size argument for the
later maybe, but I think we really want to do that in python and not
with C preprocessor macros :)

>>
>> {
>> \
>> -  return nir_load_system_value(build, nir_intrinsic_load_##name, 0);
>> \
>> +  return nir_load_system_value(build, nir_intrinsic_load_##name, 0,
>> \
>> +   bit_size);
>> \
>> }
>>
>>  #include "nir_intrinsics.h"
>> diff --git a/src/compiler/nir/nir_lower_alpha_test.c
>> b/src/compiler/nir/nir_lower_alpha_test.c
>> index 6bf9ff142df..29f91ab9428 100644
>> --- a/src/compiler/nir/nir_lower_alpha_test.c
>> +++ b/src/compiler/nir/nir_lower_alpha_test.c
>> @@ -92,7 +92,7 @@ nir_lower_alpha_test(nir_shader *shader, enum
>> compare_func func,
>>
>> nir_ssa_def *condition =
>>nir_compare_func(, func,
>> -   alpha, nir_load_alpha_ref_float());
>> +   alpha, nir_load_alpha_ref_float(,
>> 32));
>>
>> nir_intrinsic_instr *discard =
>>nir_intrinsic_instr_create(b.shader,
>> diff --git a/src/compiler/nir/nir_lower_clip.c
>> b/src/compiler/nir/nir_lower_clip.c
>> index ea12f51a7bb..b9a91f7d40b 100644
>> --- a/src/compiler/nir/nir_lower_clip.c
>> +++ b/src/compiler/nir/nir_lower_clip.c
>> @@ -174,7 +174,8 @@ lower_clip_vs(nir_function_impl *impl, unsigned
>> ucp_enables,
>> for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
>>if (ucp_enables & (1 << plane)) {
>>   nir_ssa_def *ucp =
>> -nir_load_system_value(, nir_intrinsic_load_user_clip_plane,
>> plane);
>> +nir_load_system_value(, nir_intrinsic_load_user_clip_plane,
>> +  plane, 32);
>>
>>   /* calculate clipdist[plane] - dot(ucp, cv): */
>>   clipdist[plane] = nir_fdot4(, ucp, cv);
>> diff --git a/src/compiler/nir/nir_lower_subgroups.c
>> b/src/compiler/nir/nir_lower_subgroups.c
>> index 0d3c83b7951..7e910c013a9 100644
>> --- a/src/compiler/nir/nir_lower_subgroups.c
>> +++ 

Re: [Mesa-dev] [PATCH v3 06/19] RFC: nir/vtn: "raw" pointer support

2018-03-23 Thread Jason Ekstrand
As I've been rewriting core NIR deref handling, I've been thinking about
this problem quite a bit.  One objective I have is to actually make UBO and
SSBO access go through derefs instead of just being an offset and index so
that the compiler can better reason about them.  In particular, I want to
be able to start doing load/store elimination on SSBOs, SLM, and whatever
CL has which would be great for everyone's compute performance (GL, Vulkan,
CL, etc.).

I would be lying if I said I had a full plan but I do have part of a plan.
In my patch which adds the deref instructions, I add a new "cast" deref
type which takes an arbitrary value as it's source and kicks out a deref
with a type.  Whenever we discover that the source of the cast is actually
another deref which is compatible (same type etc.), copy propagation gets
rid of the cast for you.  The idea is that, instead of doing a
load_raw(raw_ptr), you would do a load((type *)raw_ptr).

Right now, most of the core NIR optimizations will throw a fit if they ever
see a cast.  This is intentional because it requires us to manually go
through and handle casts.  This would mean that, at the moment, you would
have to lower to load_raw intrinsics almost immediately after coming out of
SPIR-V.

On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst  wrote:

> From: Rob Clark 
>
> An attempt to add physical pointer support to vtn.  I'm not totally
> happy about the handling of logical pointers vs physical pointers.
> So this is really more of an RFS (request for suggestions)
>
> v2: treat vec3 types as vec4 when dereferencing
>
> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/spirv/spirv_to_nir.c  |  87 ---
>  src/compiler/spirv/vtn_private.h   |  20 ++-
>  src/compiler/spirv/vtn_variables.c | 300 ++
> ++-
>  3 files changed, 347 insertions(+), 60 deletions(-)
>
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index 334bcab9a82..d58a68f80ef 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -572,6 +572,7 @@ vtn_types_compatible(struct vtn_builder *b,
>   vtn_types_compatible(b, t1->array_element,
> t2->array_element);
>
> case vtn_base_type_pointer:
> +   case vtn_base_type_raw_pointer:
>return vtn_types_compatible(b, t1->deref, t2->deref);
>
> case vtn_base_type_struct:
> @@ -609,6 +610,7 @@ vtn_type_copy(struct vtn_builder *b, struct vtn_type
> *src)
> case vtn_base_type_matrix:
> case vtn_base_type_array:
> case vtn_base_type_pointer:
> +   case vtn_base_type_raw_pointer:
> case vtn_base_type_image:
> case vtn_base_type_sampler:
> case vtn_base_type_sampled_image:
> @@ -939,6 +941,14 @@ vtn_type_layout_std430(struct vtn_builder *b, struct
> vtn_type *type,
>return type;
> }
>
> +   case vtn_base_type_raw_pointer: {
> +  uint32_t comp_size = b->ptr_size / 8;
> +  vtn_assert(comp_size);
> +  *size_out = comp_size;
> +  *align_out = comp_size;
> +  return type;
> +   }
> +
> case vtn_base_type_vector: {
>uint32_t comp_size = glsl_get_bit_size(type->type) / 8;
>assert(type->length > 0 && type->length <= 4);
> @@ -1003,6 +1013,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
>val->type->base_type = vtn_base_type_scalar;
>val->type->type = glsl_bool_type();
>val->type->length = 1;
> +  val->type->stride = 4;
>break;
> case SpvOpTypeInt: {
>int bit_size = w[2];
> @@ -1025,6 +1036,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
>   vtn_fail("Invalid int bit size");
>}
>val->type->length = 1;
> +  val->type->stride = bit_size / 8;
>break;
> }
>
> @@ -1045,6 +1057,7 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
>   vtn_fail("Invalid float bit size");
>}
>val->type->length = 1;
> +  val->type->stride = bit_size / 8;
>break;
> }
>
> @@ -1061,6 +1074,10 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
>val->type->type = glsl_vector_type(glsl_get_base_type(base->type),
> elems);
>val->type->length = elems;
>val->type->stride = glsl_get_bit_size(base->type) / 8;
> +  /* special case: vec3 is aligned to vec4 */
> +  if (elems == 3)
> + elems = 4;
> +  val->type->stride *= elems;
>val->type->array_element = base;
>break;
> }
> @@ -1138,7 +1155,11 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
>
>const char *name = val->name ? val->name : "struct";
>
> -  val->type->type = glsl_struct_type(fields, num_fields, name, false);
> +  val->type->type = glsl_struct_type(fields, num_fields, name,
> + val->type->packed);
> +  // TODO stride for a struct only matters for kernel shaders, where
> +  // cl_size is the 

Re: [Mesa-dev] [PATCH v3 15/19] nir: use load_local_group_size

2018-03-23 Thread Jason Ekstrand
On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst  wrote:

> From: Rob Clark 
>
> If local_size is not known at compile time, which is the case with
> clover, use the load_local_group_size intrinsic instead.
>
> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/nir/nir_lower_system_values.c | 25 +
>  1 file changed, 17 insertions(+), 8 deletions(-)
>
> diff --git a/src/compiler/nir/nir_lower_system_values.c
> b/src/compiler/nir/nir_lower_system_values.c
> index d507c28f421..ff4e09c8e61 100644
> --- a/src/compiler/nir/nir_lower_system_values.c
> +++ b/src/compiler/nir/nir_lower_system_values.c
> @@ -57,19 +57,28 @@ convert_block(nir_block *block, nir_builder *b)
>*"The value of gl_GlobalInvocationID is equal to
>*gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
>*/
> + nir_ssa_def *local_size_def;
>
> - nir_const_value local_size;
> - memset(_size, 0, sizeof(local_size));
> - local_size.u64[0] = b->shader->info.cs.local_size[0];
> - local_size.u64[1] = b->shader->info.cs.local_size[1];
> - local_size.u64[2] = b->shader->info.cs.local_size[2];
> + /* if local_size[] is already known, use that, otherwise use
> +  * load_local_group_size intrinsic:
> +  */
> + if (b->shader->info.cs.local_size[0]) {
> +nir_const_value local_size;
> +memset(_size, 0, sizeof(local_size));
> +local_size.u64[0] = b->shader->info.cs.local_size[0];
> +local_size.u64[1] = b->shader->info.cs.local_size[1];
> +local_size.u64[2] = b->shader->info.cs.local_size[2];
> +
> +local_size_def = nir_build_imm(b, 3, bit_size, local_size);

+ } else {
> +local_size_def = nir_load_local_group_size(b, bit_size);
> + }
>

I commented on an earlier patch about how the approach to building the
32/64-bit immediates is wrong.

Setting that aside, this patch looks fine to me in principal.  There's a
part of me that doesn't like using cs.local_size[0] being the trigger but I
think it's probably ok.  Maybe we should assert that cs_local_size is
either all zero (second case) or all not zero (first case) just to be safe.


>
>   nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);
>   nir_ssa_def *local_id = nir_load_local_invocation_id(b,
> bit_size);
>
> - sysval = nir_iadd(b, nir_imul(b, group_id,
> -   nir_build_imm(b, 3, bit_size,
> local_size)),
> -  local_id);
> + sysval = nir_iadd(b, nir_imul(b, group_id, local_size_def),
> +   local_id);
>   break;
>}
>
> --
> 2.14.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 12/19] nir: specify bit_size when loading system values

2018-03-23 Thread Jason Ekstrand
On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst  wrote:

> With OpenCL the size of some system value depends on the Physical model
> choosen, so we need a way to load any system value as 32 or 64 bit.
>
> Signed-off-by: Karol Herbst 
> ---
>  src/compiler/nir/nir_builder.h   | 10 +---
>  src/compiler/nir/nir_lower_alpha_test.c  |  2 +-
>  src/compiler/nir/nir_lower_clip.c|  3 ++-
>  src/compiler/nir/nir_lower_subgroups.c   |  8 +++---
>  src/compiler/nir/nir_lower_system_values.c   | 31
> 
>  src/compiler/nir/nir_lower_two_sided_color.c |  2 +-
>  src/compiler/nir/nir_lower_wpos_center.c |  2 +-
>  src/compiler/spirv/vtn_subgroup.c|  2 +-
>  src/gallium/auxiliary/nir/tgsi_to_nir.c  |  3 ++-
>  src/intel/blorp/blorp_blit.c |  2 +-
>  src/intel/blorp/blorp_clear.c|  2 +-
>  src/intel/compiler/brw_nir_lower_cs_intrinsics.c |  6 ++---
>  src/mesa/drivers/dri/i965/brw_tcs.c  |  2 +-
>  13 files changed, 40 insertions(+), 35 deletions(-)
>
> diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_
> builder.h
> index 36e0ae3ac63..4e93cd08169 100644
> --- a/src/compiler/nir/nir_builder.h
> +++ b/src/compiler/nir/nir_builder.h
> @@ -612,13 +612,14 @@ nir_copy_var(nir_builder *build, nir_variable *dest,
> nir_variable *src)
>
>  /* Generic builder for system values. */
>  static inline nir_ssa_def *
> -nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
> +nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index,
> +  unsigned bit_size)
>  {
> nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader,
> op);
> load->num_components = nir_intrinsic_infos[op].dest_components;
> load->const_index[0] = index;
> nir_ssa_dest_init(>instr, >dest,
> - nir_intrinsic_infos[op].dest_components, 32, NULL);
> + nir_intrinsic_infos[op].dest_components, bit_size,
> NULL);
> nir_builder_instr_insert(build, >instr);
> return >dest.ssa;
>  }
> @@ -630,9 +631,10 @@ nir_load_system_value(nir_builder *build,
> nir_intrinsic_op op, int index)
>
>  #define DEFINE_SYSTEM_VALUE(name)\
> static inline nir_ssa_def *   \
> -   nir_load_##name(nir_builder *build)   \
> +   nir_load_##name(nir_builder *build, unsigned bit_size)\
>

I was really hoping that this change wouldn't touch every single intrinsic
helper.  Maybe with Rob's python-based intrinsics table we can do something
better.


> { \
> -  return nir_load_system_value(build, nir_intrinsic_load_##name, 0); \
> +  return nir_load_system_value(build, nir_intrinsic_load_##name, 0,  \
> +   bit_size);\
> }
>
>  #include "nir_intrinsics.h"
> diff --git a/src/compiler/nir/nir_lower_alpha_test.c
> b/src/compiler/nir/nir_lower_alpha_test.c
> index 6bf9ff142df..29f91ab9428 100644
> --- a/src/compiler/nir/nir_lower_alpha_test.c
> +++ b/src/compiler/nir/nir_lower_alpha_test.c
> @@ -92,7 +92,7 @@ nir_lower_alpha_test(nir_shader *shader, enum
> compare_func func,
>
> nir_ssa_def *condition =
>nir_compare_func(, func,
> -   alpha, nir_load_alpha_ref_float());
> +   alpha, nir_load_alpha_ref_float(,
> 32));
>
> nir_intrinsic_instr *discard =
>nir_intrinsic_instr_create(b.shader,
> diff --git a/src/compiler/nir/nir_lower_clip.c
> b/src/compiler/nir/nir_lower_clip.c
> index ea12f51a7bb..b9a91f7d40b 100644
> --- a/src/compiler/nir/nir_lower_clip.c
> +++ b/src/compiler/nir/nir_lower_clip.c
> @@ -174,7 +174,8 @@ lower_clip_vs(nir_function_impl *impl, unsigned
> ucp_enables,
> for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
>if (ucp_enables & (1 << plane)) {
>   nir_ssa_def *ucp =
> -nir_load_system_value(, nir_intrinsic_load_user_clip_plane,
> plane);
> +nir_load_system_value(, nir_intrinsic_load_user_clip_plane,
> +  plane, 32);
>
>   /* calculate clipdist[plane] - dot(ucp, cv): */
>   clipdist[plane] = nir_fdot4(, ucp, cv);
> diff --git a/src/compiler/nir/nir_lower_subgroups.c
> b/src/compiler/nir/nir_lower_subgroups.c
> index 0d3c83b7951..7e910c013a9 100644
> --- a/src/compiler/nir/nir_lower_subgroups.c
> +++ b/src/compiler/nir/nir_lower_subgroups.c
> @@ -190,7 +190,7 @@ static nir_ssa_def *
>  lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
>bool lower_to_scalar)
>  {
> -   nir_ssa_def *index = nir_load_subgroup_invocation(b);

[Mesa-dev] [PATCH 3/5] radeonsi: move FMASK shader logic to shared code

2018-03-23 Thread Marek Olšák
From: Marek Olšák 

We'll need it for FBFETCH in both TGSI and NIR paths.
---
 src/amd/common/ac_llvm_build.c| 56 +
 src/amd/common/ac_llvm_build.h|  3 +
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 74 +--
 3 files changed, 61 insertions(+), 72 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1ae2b9dd170..c8e594fee15 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2404,10 +2404,66 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context 
*ctx, LLVMValueRef param,
value = LLVMBuildLShr(ctx->builder, value,
  LLVMConstInt(ctx->i32, rshift, false), 
"");
 
if (rshift + bitwidth < 32) {
unsigned mask = (1 << bitwidth) - 1;
value = LLVMBuildAnd(ctx->builder, value,
 LLVMConstInt(ctx->i32, mask, false), "");
}
return value;
 }
+
+/* Adjust the sample index according to FMASK.
+ *
+ * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
+ * which is the identity mapping. Each nibble says which physical sample
+ * should be fetched to get that sample.
+ *
+ * For example, 0x1100 means there are only 2 samples stored and
+ * the second sample covers 3/4 of the pixel. When reading samples 0
+ * and 1, return physical sample 0 (determined by the first two 0s
+ * in FMASK), otherwise return physical sample 1.
+ *
+ * The sample index should be adjusted as follows:
+ *   addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF;
+ */
+void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
+ LLVMValueRef *addr, bool is_array_tex)
+{
+   struct ac_image_args fmask_load = {};
+   fmask_load.opcode = ac_image_load;
+   fmask_load.resource = fmask;
+   fmask_load.dmask = 0xf;
+   fmask_load.da = is_array_tex;
+
+   LLVMValueRef fmask_addr[4];
+   memcpy(fmask_addr, addr, sizeof(fmask_addr[0]) * 3);
+   fmask_addr[3] = LLVMGetUndef(ac->i32);
+
+   fmask_load.addr = ac_build_gather_values(ac, fmask_addr,
+is_array_tex ? 4 : 2);
+
+   LLVMValueRef fmask_value = ac_build_image_opcode(ac, _load);
+   fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value,
+ ac->i32_0, "");
+
+   /* Apply the formula. */
+   unsigned sample_chan = is_array_tex ? 3 : 2;
+   LLVMValueRef final_sample;
+   final_sample = LLVMBuildMul(ac->builder, addr[sample_chan],
+   LLVMConstInt(ac->i32, 4, 0), "");
+   final_sample = LLVMBuildLShr(ac->builder, fmask_value, final_sample, 
"");
+   final_sample = LLVMBuildAnd(ac->builder, final_sample,
+   LLVMConstInt(ac->i32, 0xF, 0), "");
+
+   /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
+* resource descriptor is 0 (invalid),
+*/
+   LLVMValueRef tmp;
+   tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, "");
+   tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, "");
+   tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, "");
+
+   /* Replace the MSAA sample index. */
+   addr[sample_chan] = LLVMBuildSelect(ac->builder, tmp, final_sample,
+   addr[sample_chan], "");
+}
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 6adcc11448c..75181c7b3c1 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -397,15 +397,18 @@ LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context 
*ac, LLVMTypeRef type,
 
 LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
 LLVMTypeRef type);
 
 LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
unsigned count);
 
 LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
 unsigned rshift, unsigned bitwidth);
 
+void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
+ LLVMValueRef *addr, bool is_array_tex);
+
 #ifdef __cplusplus
 }
 #endif
 
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 9e2a0ebbb55..f5fa18fd38a 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1499,94 +1499,24 @@ static void tex_fetch_args(
address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 
TGSI_CHAN_X);
 
if (count > 16) {
assert(!"Cannot handle more than 16 texture address 
parameters");
count 

[Mesa-dev] [PATCH 5/5] radeonsi: implement GL_KHR_blend_equation_advanced

2018-03-23 Thread Marek Olšák
From: Marek Olšák 

MSAA is supported using sample shading. Layered rendering and all texture
targets are also supported.
---
 docs/features.txt |  2 +-
 docs/relnotes/18.1.0.html |  1 +
 src/gallium/drivers/radeonsi/si_blit.c|  8 +++
 src/gallium/drivers/radeonsi/si_descriptors.c | 87 +--
 src/gallium/drivers/radeonsi/si_get.c |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.h|  9 +++
 src/gallium/drivers/radeonsi/si_shader.c  |  4 +-
 src/gallium/drivers/radeonsi/si_shader.h  |  3 +
 src/gallium/drivers/radeonsi/si_shader_internal.h |  1 +
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 59 +++
 src/gallium/drivers/radeonsi/si_state.c   | 19 +++--
 src/gallium/drivers/radeonsi/si_state.h   |  8 +++
 src/gallium/drivers/radeonsi/si_state_binning.c   |  2 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c   | 18 +
 14 files changed, 205 insertions(+), 18 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 5eae34bf0df..d579d245eb4 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -262,21 +262,21 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, r600, 
radeonsi
 
   Additional functionality not covered above:
   glMemoryBarrierByRegion   DONE
   glGetTexLevelParameter[fi]v - needs updates   DONE
   glGetBooleani_v - restrict to GLES enums
   gl_HelperInvocation support   DONE (i965, r600)
 
 GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+
 
   GL_EXT_color_buffer_float DONE (all drivers)
-  GL_KHR_blend_equation_advancedDONE (i965, nvc0)
+  GL_KHR_blend_equation_advancedDONE (i965, nvc0, 
radeonsi)
   GL_KHR_debug  DONE (all drivers)
   GL_KHR_robustness DONE (i965, nvc0, 
radeonsi)
   GL_KHR_texture_compression_astc_ldr   DONE (freedreno, 
i965/gen9+)
   GL_OES_copy_image DONE (all drivers)
   GL_OES_draw_buffers_indexed   DONE (all drivers that 
support GL_ARB_draw_buffers_blend)
   GL_OES_draw_elements_base_vertex  DONE (all drivers)
   GL_OES_geometry_shaderDONE (i965/hsw+, nvc0, 
radeonsi)
   GL_OES_gpu_shader5DONE (all drivers that 
support GL_ARB_gpu_shader5)
   GL_OES_primitive_bounding_box DONE (i965/gen7+, 
nvc0, radeonsi)
   GL_OES_sample_shading DONE (i965, nvc0, 
r600, radeonsi)
diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html
index 3e119078731..a89861d2bda 100644
--- a/docs/relnotes/18.1.0.html
+++ b/docs/relnotes/18.1.0.html
@@ -43,20 +43,21 @@ TBD.
 Note: some of the new features are only available with certain drivers.
 
 
 
 OpenGL 3.1 with ARB_compatibility on nv50, nvc0, r600, radeonsi, softpipe, 
llvmpipe, svga
 GL_ARB_bindless_texture on nvc0/maxwell+
 GL_EXT_semaphore on radeonsi
 GL_EXT_semaphore_fd on radeonsi
 GL_EXT_shader_framebuffer_fetch on i965 on desktop GL (GLES was already 
supported)
 GL_EXT_shader_framebuffer_fetch_non_coherent on i965
+GL_KHR_blend_equation_advanced on radeonsi
 Disk shader cache support for i965 enabled by default
 
 
 Bug fixes
 
 
 TBD
 
 
 Changes
diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index f1c4f6d1e72..d9d489825f8 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -799,20 +799,28 @@ void si_decompress_textures(struct si_context *sctx, 
unsigned shader_mask)
si_decompress_resident_textures(sctx);
if (sctx->uses_bindless_images)
si_decompress_resident_images(sctx);
} else if (shader_mask & (1 << PIPE_SHADER_COMPUTE)) {
if (sctx->cs_shader_state.program->uses_bindless_samplers)
si_decompress_resident_textures(sctx);
if (sctx->cs_shader_state.program->uses_bindless_images)
si_decompress_resident_images(sctx);
}
 
+   if (sctx->ps_uses_fbfetch) {
+   struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0];
+   si_decompress_color_texture(sctx,
+   (struct r600_texture*)cb0->texture,
+   cb0->u.tex.first_layer,
+   cb0->u.tex.last_layer);
+   }
+
si_check_render_feedback(sctx);
 }
 
 /* Helper for decompressing a portion of a color or depth resource before
  * blitting if any decompression is needed.
  * The driver doesn't decompress resources automatically while u_blitter is
  

[Mesa-dev] [PATCH 2/5] radeonsi: add R600_DEBUG=nofmask to disable MSAA compression

2018-03-23 Thread Marek Olšák
From: Marek Olšák 

For testing.
---
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
 src/gallium/drivers/radeon/r600_texture.c | 13 +++--
 src/gallium/drivers/radeonsi/si_pipe.c|  1 +
 src/gallium/drivers/radeonsi/si_pipe.h|  1 +
 src/gallium/drivers/radeonsi/si_state.c   | 15 +++
 5 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 79419036665..4df039d33a4 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -103,20 +103,21 @@ enum {
DBG_DFSM,
DBG_NO_HYPERZ,
DBG_NO_RB_PLUS,
DBG_NO_2D_TILING,
DBG_NO_TILING,
DBG_NO_DCC,
DBG_NO_DCC_CLEAR,
DBG_NO_DCC_FB,
DBG_NO_DCC_MSAA,
DBG_DCC_MSAA,
+   DBG_NO_FMASK,
 
/* Tests: */
DBG_TEST_DMA,
DBG_TEST_VMFAULT_CP,
DBG_TEST_VMFAULT_SDMA,
DBG_TEST_VMFAULT_SHADER,
 };
 
 #define DBG_ALL_SHADERS(((1 << (DBG_CS + 1)) - 1))
 #define DBG(name)  (1ull << DBG_##name)
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 3a0a79187b8..b5c04c3f663 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1259,26 +1259,27 @@ r600_texture_create_object(struct pipe_screen *screen,
}
 
if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
 R600_RESOURCE_FLAG_FLUSHED_DEPTH))) {
rtex->db_compatible = true;
 
if (!(sscreen->debug_flags & DBG(NO_HYPERZ)))
r600_texture_allocate_htile(sscreen, rtex);
}
} else {
-   if (base->nr_samples > 1) {
-   if (!buf) {
-   r600_texture_allocate_fmask(sscreen, rtex);
-   r600_texture_allocate_cmask(sscreen, rtex);
-   rtex->cmask_buffer = >resource;
-   }
+   if (base->nr_samples > 1 &&
+   !buf &&
+   !(sscreen->debug_flags & DBG(NO_FMASK))) {
+   r600_texture_allocate_fmask(sscreen, rtex);
+   r600_texture_allocate_cmask(sscreen, rtex);
+   rtex->cmask_buffer = >resource;
+
if (!rtex->fmask.size || !rtex->cmask.size) {
FREE(rtex);
return NULL;
}
}
 
/* Shared textures must always set up DCC here.
 * If it's not present, it will be disabled by
 * apply_opaque_metadata later.
 */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 3d787d58cd1..ecd11d635a4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -81,20 +81,21 @@ static const struct debug_named_value debug_options[] = {
{ "dfsm", DBG(DFSM), "Enable DFSM." },
{ "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" },
{ "norbplus", DBG(NO_RB_PLUS), "Disable RB+." },
{ "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" },
{ "notiling", DBG(NO_TILING), "Disable tiling" },
{ "nodcc", DBG(NO_DCC), "Disable DCC." },
{ "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." },
{ "nodccfb", DBG(NO_DCC_FB), "Disable separate DCC on the main 
framebuffer" },
{ "nodccmsaa", DBG(NO_DCC_MSAA), "Disable DCC for MSAA" },
{ "dccmsaa", DBG(DCC_MSAA), "Enable DCC for MSAA" },
+   { "nofmask", DBG(NO_FMASK), "Disable MSAA compression" },
 
/* Tests: */
{ "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
{ "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and 
exit." },
{ "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault 
test and exit." },
{ "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM 
fault test and exit." },
 
DEBUG_NAMED_VALUE_END /* must be last */
 };
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 2053dcb9fcd..dbb04ed7e45 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -278,20 +278,21 @@ struct si_framebuffer {
struct r600_atomatom;
struct pipe_framebuffer_state   state;
unsignedcolorbuf_enabled_4bit;
unsignedspi_shader_col_format;
unsignedspi_shader_col_format_alpha;
unsigned

[Mesa-dev] [PATCH 4/5] radeonsi: rename unpack_param -> si_unpack_param

2018-03-23 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c  | 54 +++
 src/gallium/drivers/radeonsi/si_shader_internal.h |  4 ++
 2 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 8ae742c93f6..08b071e810b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -249,23 +249,23 @@ static LLVMValueRef unpack_llvm_param(struct 
si_shader_context *ctx,
 
if (rshift + bitwidth < 32) {
unsigned mask = (1 << bitwidth) - 1;
value = LLVMBuildAnd(ctx->ac.builder, value,
 LLVMConstInt(ctx->i32, mask, 0), "");
}
 
return value;
 }
 
-static LLVMValueRef unpack_param(struct si_shader_context *ctx,
-unsigned param, unsigned rshift,
-unsigned bitwidth)
+LLVMValueRef si_unpack_param(struct si_shader_context *ctx,
+unsigned param, unsigned rshift,
+unsigned bitwidth)
 {
LLVMValueRef value = LLVMGetParam(ctx->main_fn, param);
 
return unpack_llvm_param(ctx, value, rshift, bitwidth);
 }
 
 static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)
 {
switch (ctx->type) {
case PIPE_SHADER_TESS_CTRL:
@@ -298,21 +298,21 @@ static LLVMValueRef get_rel_patch_id(struct 
si_shader_context *ctx)
  * - TCS outputs for patch 2= get_tcs_out_current_patch_offset (if 
RelPatchID==2)
  * - Per-patch TCS outputs for patch 2  = 
get_tcs_out_current_patch_data_offset (if RelPatchID==2)
  * - ...
  *
  * All three shaders VS(LS), TCS, TES share the same LDS space.
  */
 
 static LLVMValueRef
 get_tcs_in_patch_stride(struct si_shader_context *ctx)
 {
-   return unpack_param(ctx, ctx->param_vs_state_bits, 8, 13);
+   return si_unpack_param(ctx, ctx->param_vs_state_bits, 8, 13);
 }
 
 static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context 
*ctx)
 {
assert(ctx->type == PIPE_SHADER_TESS_CTRL);
 
if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
return 
util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4;
 
return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
@@ -321,46 +321,46 @@ static unsigned 
get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *
 static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
 {
unsigned stride = get_tcs_out_vertex_dw_stride_constant(ctx);
 
return LLVMConstInt(ctx->i32, stride, 0);
 }
 
 static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
 {
if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
-   return unpack_param(ctx, ctx->param_tcs_out_lds_layout, 0, 13);
+   return si_unpack_param(ctx, ctx->param_tcs_out_lds_layout, 0, 
13);
 
const struct tgsi_shader_info *info = >shader->selector->info;
unsigned tcs_out_vertices = 
info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
unsigned num_patch_outputs = 
util_last_bit64(ctx->shader->selector->patch_outputs_written);
unsigned patch_dw_stride = tcs_out_vertices * vertex_dw_stride +
   num_patch_outputs * 4;
return LLVMConstInt(ctx->i32, patch_dw_stride, 0);
 }
 
 static LLVMValueRef
 get_tcs_out_patch0_offset(struct si_shader_context *ctx)
 {
return lp_build_mul_imm(>bld_base.uint_bld,
-   unpack_param(ctx,
+   si_unpack_param(ctx,
 ctx->param_tcs_out_lds_offsets,
 0, 16),
4);
 }
 
 static LLVMValueRef
 get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
 {
return lp_build_mul_imm(>bld_base.uint_bld,
-   unpack_param(ctx,
+   si_unpack_param(ctx,
 ctx->param_tcs_out_lds_offsets,
 16, 16),
4);
 }
 
 static LLVMValueRef
 get_tcs_in_current_patch_offset(struct si_shader_context *ctx)
 {
LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
@@ -398,39 +398,39 @@ get_tcs_out_current_patch_data_offset(struct 
si_shader_context *ctx)
 static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx)
 {
unsigned tcs_out_vertices =
ctx->shader->selector ?

ctx->shader->selector->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : 0;
 
/* If !tcs_out_vertices, 

[Mesa-dev] [PATCH 0/5] KHR_blend_equation_advanced for RadeonSI

2018-03-23 Thread Marek Olšák
Hi,

This is the second and fianl version, and it adds MSAA support and FBFETCH 
tests into Gallium.

Please review.

Thanks,
Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] gallium/u_tests: test FBFETCH and shader-based blending with MSAA

2018-03-23 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/auxiliary/util/u_tests.c | 168 ++-
 1 file changed, 128 insertions(+), 40 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_tests.c 
b/src/gallium/auxiliary/util/u_tests.c
index 86eee6e68b1..293a4580a9f 100644
--- a/src/gallium/auxiliary/util/u_tests.c
+++ b/src/gallium/auxiliary/util/u_tests.c
@@ -37,29 +37,31 @@
 #include "util/u_tile.h"
 #include "tgsi/tgsi_strings.h"
 #include "tgsi/tgsi_text.h"
 #include "cso_cache/cso_context.h"
 #include 
 
 #define TOLERANCE 0.01
 
 static struct pipe_resource *
 util_create_texture2d(struct pipe_screen *screen, unsigned width,
-  unsigned height, enum pipe_format format)
+  unsigned height, enum pipe_format format,
+  unsigned num_samples)
 {
struct pipe_resource templ = {{0}};
 
templ.target = PIPE_TEXTURE_2D;
templ.width0 = width;
templ.height0 = height;
templ.depth0 = 1;
templ.array_size = 1;
+   templ.nr_samples = num_samples;
templ.format = format;
templ.usage = PIPE_USAGE_DEFAULT;
templ.bind = PIPE_BIND_SAMPLER_VIEW |
 (util_format_is_depth_or_stencil(format) ?
 PIPE_BIND_DEPTH_STENCIL : PIPE_BIND_RENDER_TARGET);
 
return screen->resource_create(screen, );
 }
 
 static void
@@ -181,20 +183,34 @@ util_draw_fullscreen_quad(struct cso_context *cso)
static float vertices[] = {
  -1, -1, 0, 1,   0, 0, 0, 0,
  -1,  1, 0, 1,   0, 1, 0, 0,
   1,  1, 0, 1,   1, 1, 0, 0,
   1, -1, 0, 1,   1, 0, 0, 0
};
util_set_interleaved_vertex_elements(cso, 2);
util_draw_user_vertex_buffer(cso, vertices, PIPE_PRIM_QUADS, 4, 2);
 }
 
+static void
+util_draw_fullscreen_quad_fill(struct cso_context *cso,
+   float r, float g, float b, float a)
+{
+   float vertices[] = {
+ -1, -1, 0, 1,   r, g, b, a,
+ -1,  1, 0, 1,   r, g, b, a,
+  1,  1, 0, 1,   r, g, b, a,
+  1, -1, 0, 1,   r, g, b, a,
+   };
+   util_set_interleaved_vertex_elements(cso, 2);
+   util_draw_user_vertex_buffer(cso, vertices, PIPE_PRIM_QUADS, 4, 2);
+}
+
 /**
  * Probe and test if the rectangle contains the expected color.
  *
  * If "num_expected_colors" > 1, at least one expected color must match
  * the probed color. "expected" should be an array of 4*num_expected_colors
  * floats.
  */
 static bool
 util_probe_rect_rgba_multi(struct pipe_context *ctx, struct pipe_resource *tex,
unsigned offx, unsigned offy, unsigned w,
@@ -299,21 +315,21 @@ tgsi_vs_window_space_position(struct pipe_context *ctx)
static const float red[] = {1, 0, 0, 1};
 
if (!ctx->screen->get_param(ctx->screen,
PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION)) {
   util_report_result(SKIP);
   return;
}
 
cso = cso_create_context(ctx, 0);
cb = util_create_texture2d(ctx->screen, 256, 256,
-  PIPE_FORMAT_R8G8B8A8_UNORM);
+  PIPE_FORMAT_R8G8B8A8_UNORM, 0);
util_set_common_states_and_clear(cso, ctx, cb);
 
/* Fragment shader. */
fs = util_make_fragment_passthrough_shader(ctx, TGSI_SEMANTIC_GENERIC,
TGSI_INTERPOLATE_LINEAR, TRUE);
cso_set_fragment_shader_handle(cso, fs);
 
/* Vertex shader. */
vs = util_set_passthrough_vertex_shader(cso, ctx, true);
 
@@ -359,21 +375,21 @@ null_sampler_view(struct pipe_context *ctx, unsigned 
tgsi_tex_target)
 
if (tgsi_tex_target == TGSI_TEXTURE_BUFFER &&
!ctx->screen->get_param(ctx->screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS)) {
   util_report_result_helper(SKIP, "%s: %s", __func__,
 tgsi_texture_names[tgsi_tex_target]);
   return;
}
 
cso = cso_create_context(ctx, 0);
cb = util_create_texture2d(ctx->screen, 256, 256,
-  PIPE_FORMAT_R8G8B8A8_UNORM);
+  PIPE_FORMAT_R8G8B8A8_UNORM, 0);
util_set_common_states_and_clear(cso, ctx, cb);
 
ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT, 0, 1, NULL);
 
/* Fragment shader. */
fs = util_make_fragment_tex_shader(ctx, tgsi_tex_target,
   TGSI_INTERPOLATE_LINEAR,
   TGSI_RETURN_TYPE_FLOAT,
   TGSI_RETURN_TYPE_FLOAT, false, false);
cso_set_fragment_shader_handle(cso, fs);
@@ -402,21 +418,21 @@ util_test_constant_buffer(struct pipe_context *ctx,
   struct pipe_resource *constbuf)
 {
struct cso_context *cso;
struct pipe_resource *cb;
void *fs, *vs;
bool pass = true;
static const float zero[] = {0, 0, 0, 0};
 
cso = cso_create_context(ctx, 0);
cb = util_create_texture2d(ctx->screen, 256, 256,
-  PIPE_FORMAT_R8G8B8A8_UNORM);
+  

[Mesa-dev] [PATCH v3 16/19] nir: add load_kernel_param

2018-03-23 Thread Karol Herbst
OpenCL kernels have parameters (see pipe_grid_info::input), and so we
need a way to access them.

The offset source is the offset of the parameter to load in the kernel input
buffer.

v2: improve commit message
remove BASE
split lower_io changes into separate commit

Signed-off-by: Karol Herbst 
---
 src/compiler/nir/nir_intrinsics.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 6597eaea87b..fb8d53b3c0d 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -495,6 +495,8 @@ LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | 
NIR_INTRINSIC_CAN_REOR
 LOAD(input, 1, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE | 
NIR_INTRINSIC_CAN_REORDER)
 /* src[] = { vertex, offset }. const_index[] = { base, component } */
 LOAD(per_vertex_input, 2, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE 
| NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { address }. No const_index */
+LOAD(kernel_param, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | 
NIR_INTRINSIC_CAN_REORDER)
 /* src[] = { barycoord, offset }. const_index[] = { base, component } */
 INTRINSIC(load_interpolated_input, 2, ARR(2, 1), true, 0, 0,
   2, BASE, COMPONENT, xx,
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 15/19] nir: use load_local_group_size

2018-03-23 Thread Karol Herbst
From: Rob Clark 

If local_size is not known at compile time, which is the case with
clover, use the load_local_group_size intrinsic instead.

Signed-off-by: Karol Herbst 
---
 src/compiler/nir/nir_lower_system_values.c | 25 +
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/compiler/nir/nir_lower_system_values.c 
b/src/compiler/nir/nir_lower_system_values.c
index d507c28f421..ff4e09c8e61 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -57,19 +57,28 @@ convert_block(nir_block *block, nir_builder *b)
   *"The value of gl_GlobalInvocationID is equal to
   *gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
   */
+ nir_ssa_def *local_size_def;
 
- nir_const_value local_size;
- memset(_size, 0, sizeof(local_size));
- local_size.u64[0] = b->shader->info.cs.local_size[0];
- local_size.u64[1] = b->shader->info.cs.local_size[1];
- local_size.u64[2] = b->shader->info.cs.local_size[2];
+ /* if local_size[] is already known, use that, otherwise use
+  * load_local_group_size intrinsic:
+  */
+ if (b->shader->info.cs.local_size[0]) {
+nir_const_value local_size;
+memset(_size, 0, sizeof(local_size));
+local_size.u64[0] = b->shader->info.cs.local_size[0];
+local_size.u64[1] = b->shader->info.cs.local_size[1];
+local_size.u64[2] = b->shader->info.cs.local_size[2];
+
+local_size_def = nir_build_imm(b, 3, bit_size, local_size);
+ } else {
+local_size_def = nir_load_local_group_size(b, bit_size);
+ }
 
  nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);
  nir_ssa_def *local_id = nir_load_local_invocation_id(b, bit_size);
 
- sysval = nir_iadd(b, nir_imul(b, group_id,
-   nir_build_imm(b, 3, bit_size, 
local_size)),
-  local_id);
+ sysval = nir_iadd(b, nir_imul(b, group_id, local_size_def),
+   local_id);
  break;
   }
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 17/19] RFC nir/lower_io: lower kernel entry param load_vars to load_kernel_param

2018-03-23 Thread Karol Herbst
For OpenCL kernels we have an input buffer where most of the parameters are
stored. For this we have to keep track of alignment and padding rules to
correctly identify the offset of each parameter inside that buffer.

For this we can just rely on the new cl_size and cl_alignment glsl_type
functions.

Signed-off-by: Karol Herbst 
---
 src/compiler/nir/nir_lower_io.c | 39 ---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index df91febd68d..ed8e361651c 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -39,6 +39,7 @@ struct lower_io_state {
int (*type_size)(const struct glsl_type *type);
nir_variable_mode modes;
nir_lower_io_options options;
+   unsigned *offsets;
 };
 
 void
@@ -159,7 +160,8 @@ lower_load(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
nir_ssa_def *vertex_index, nir_ssa_def *offset,
unsigned component)
 {
-   const nir_shader *nir = state->builder.shader;
+   nir_builder *b = >builder;
+   nir_shader *nir = b->shader;
nir_variable *var = intrin->variables[0]->var;
nir_variable_mode mode = var->data.mode;
nir_ssa_def *barycentric = NULL;
@@ -199,6 +201,11 @@ lower_load(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
case nir_var_shared:
   op = nir_intrinsic_load_shared;
   break;
+   case nir_var_param:
+  if (nir_cf_node_get_function(>instr.block->cf_node) == 
nir_shader_get_entrypoint(nir)) {
+ op = nir_intrinsic_load_kernel_param;
+ break;
+  }
default:
   unreachable("Unknown variable mode");
}
@@ -207,7 +214,9 @@ lower_load(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
   nir_intrinsic_instr_create(state->builder.shader, op);
load->num_components = intrin->num_components;
 
-   nir_intrinsic_set_base(load, var->data.driver_location);
+   if (op != nir_intrinsic_load_kernel_param)
+  nir_intrinsic_set_base(load, var->data.driver_location);
+
if (mode == nir_var_shader_in || mode == nir_var_shader_out)
   nir_intrinsic_set_component(load, component);
 
@@ -220,6 +229,8 @@ lower_load(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
} else if (barycentric) {
   load->src[0] = nir_src_for_ssa(barycentric);
   load->src[1] = nir_src_for_ssa(offset);
+   } else if (op == nir_intrinsic_load_kernel_param) {
+  load->src[0] = nir_src_for_ssa(nir_imm_int(b, 
state->offsets[var->data.location]));
} else {
   load->src[0] = nir_src_for_ssa(offset);
}
@@ -407,7 +418,8 @@ nir_lower_io_block(nir_block *block,
   if (mode != nir_var_shader_in &&
   mode != nir_var_shader_out &&
   mode != nir_var_shared &&
-  mode != nir_var_uniform)
+  mode != nir_var_uniform &&
+  mode != nir_var_param)
  continue;
 
   b->cursor = nir_before_instr(instr);
@@ -481,6 +493,22 @@ nir_lower_io_block(nir_block *block,
return progress;
 }
 
+static void
+nir_lower_io_calc_param_offsets(struct lower_io_state *state,
+nir_function_impl *impl)
+{
+   state->offsets = ralloc_array(state->builder.shader, unsigned,
+ impl->num_params);
+   state->offsets[0] = 0;
+   for (int i = 0; i < impl->num_params; ++i) {
+  nir_variable *var = impl->params[i];
+  state->offsets[i] = align(state->offsets[i], 
glsl_get_cl_alignment(var->type));
+  if (i + 1 < impl->num_params)
+ state->offsets[i + 1] = state->offsets[i] + 
glsl_get_cl_size(var->type);
+   }
+   ralloc_free(state->offsets);
+}
+
 static bool
 nir_lower_io_impl(nir_function_impl *impl,
   nir_variable_mode modes,
@@ -495,6 +523,11 @@ nir_lower_io_impl(nir_function_impl *impl,
state.type_size = type_size;
state.options = options;
 
+   if (modes & nir_var_param &&
+   impl == nir_shader_get_entrypoint(state.builder.shader) &&
+   impl->num_params)
+  nir_lower_io_calc_param_offsets(, impl);
+
nir_foreach_block(block, impl) {
   progress |= nir_lower_io_block(block, );
}
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 19/19] RFC: nir/vtn: member in struct deref

2018-03-23 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/compiler/spirv/vtn_private.h   |  5 +++--
 src/compiler/spirv/vtn_variables.c | 14 +++---
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h
index 510c12faa87..45b581bf80e 100644
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -700,12 +700,13 @@ void vtn_local_store(struct vtn_builder *b, struct 
vtn_ssa_value *src,
  nir_deref_var *dest);
 
 struct vtn_ssa_value *vtn_pointer_load(struct vtn_builder *b,
-   struct vtn_pointer *ptr);
+   struct vtn_pointer *ptr,
+   struct vtn_type *);
 void vtn_pointer_store(struct vtn_builder *b, struct vtn_ssa_value *src,
struct vtn_pointer *ptr);
 
 struct vtn_ssa_value *
-vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src);
+vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src, struct 
vtn_type *);
 
 void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
 struct vtn_pointer *dest);
diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index 6cf1a63f8c9..76b38b85e80 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -739,9 +739,9 @@ vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value 
*src,
 }
 
 struct vtn_ssa_value *
-vtn_pointer_load(struct vtn_builder *b, struct vtn_pointer *ptr)
+vtn_pointer_load(struct vtn_builder *b, struct vtn_pointer *ptr, struct 
vtn_type *res_type)
 {
-   const struct glsl_type *type = ptr->type->type;
+   const struct glsl_type *type = res_type->type;
struct vtn_ssa_value *val = vtn_create_ssa_value(b, type);
nir_intrinsic_op op = nir_intrinsic_load_global;
 
@@ -1207,7 +1207,7 @@ _vtn_variable_load_store(struct vtn_builder *b, bool load,
   * with it.  Just directly generate load/store_global intrinsics:
   */
  if (load) {
-*inout = vtn_pointer_load(b, ptr);
+*inout = vtn_pointer_load(b, ptr, ptr->type);
  } else {
 vtn_pointer_store(b, *inout, ptr);
  }
@@ -1244,12 +1244,12 @@ _vtn_variable_load_store(struct vtn_builder *b, bool 
load,
 }
 
 struct vtn_ssa_value *
-vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src)
+vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src, struct 
vtn_type *type)
 {
if (vtn_pointer_is_external_block(b, src)) {
   return vtn_block_load(b, src);
} else if (!src->var) {
-  return vtn_pointer_load(b, src);
+  return vtn_pointer_load(b, src, type);
} else {
   struct vtn_ssa_value *val = NULL;
   _vtn_variable_load_store(b, true, src, );
@@ -1298,7 +1298,7 @@ _vtn_variable_copy(struct vtn_builder *b, struct 
vtn_pointer *dest,
* ensure that matrices get loaded in the optimal way even if they
* are storred row-major in a UBO.
*/
-  vtn_variable_store(b, vtn_variable_load(b, src), dest);
+  vtn_variable_store(b, vtn_variable_load(b, src, src->type), dest);
   return;
 
case GLSL_TYPE_ARRAY:
@@ -2322,7 +2322,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
  return;
   }
 
-  vtn_push_ssa(b, w[2], res_type, vtn_variable_load(b, src));
+  vtn_push_ssa(b, w[2], res_type, vtn_variable_load(b, src, res_type));
   break;
}
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 14/19] nir/vtn/opencl: support fma

2018-03-23 Thread Karol Herbst
Signed-off-by: Karol Herbst 
---
 src/compiler/spirv/vtn_opencl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/compiler/spirv/vtn_opencl.c b/src/compiler/spirv/vtn_opencl.c
index 3c5ecd22452..723a7edf9c2 100644
--- a/src/compiler/spirv/vtn_opencl.c
+++ b/src/compiler/spirv/vtn_opencl.c
@@ -58,6 +58,7 @@ static nir_op
 nir_alu_op_for_opencl_opcode(struct vtn_builder *b, enum OpenCLstd opcode)
 {
switch (opcode) {
+   case Fma: return nir_op_ffma;
case SHadd: return nir_op_ihadd;
case UHadd: return nir_op_uhadd;
default:
@@ -236,6 +237,7 @@ vtn_handle_opencl_instruction(struct vtn_builder *b, 
uint32_t ext_opcode,
switch (ext_opcode) {
case SHadd:
case UHadd:
+   case Fma:
   handle_instr(b, ext_opcode, w, count, handle_alu);
   return true;
case Vloadn:
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 09/19] nir/vtn: initial OpenCL.std extension

2018-03-23 Thread Karol Herbst
From: Rob Clark 

Not complete, mostly just adding things as I encounter them in CTS.  But
not getting far enough yet to hit most of the OpenCL.std instructions.

v2: update hadd definition (Karol Herbst )

Signed-off-by: Rob Clark 
Signed-off-by: Karol Herbst 
---
 src/compiler/nir/meson.build  |   1 +
 src/compiler/nir/nir_opcodes.py   |   3 +-
 src/compiler/spirv/spirv_to_nir.c |   2 +
 src/compiler/spirv/vtn_opencl.c   | 266 ++
 src/compiler/spirv/vtn_private.h  |   3 +
 5 files changed, 274 insertions(+), 1 deletion(-)
 create mode 100644 src/compiler/spirv/vtn_opencl.c

diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index a70c236b958..213a139a1b8 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -192,6 +192,7 @@ files_libnir = files(
   '../spirv/vtn_amd.c',
   '../spirv/vtn_cfg.c',
   '../spirv/vtn_glsl450.c',
+  '../spirv/vtn_opencl.c',
   '../spirv/vtn_private.h',
   '../spirv/vtn_subgroup.c',
   '../spirv/vtn_variables.c',
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 65d13200624..86fd6b6d68e 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -768,4 +768,5 @@ dst.z = src2.x;
 dst.w = src3.x;
 """)
 
-
+binop("ihadd", tint, commutative, "(src0 >> 1) + (src1 >> 1) + (src0 & src1 & 
1)")
+binop("uhadd", tuint, commutative, "(src0 >> 1) + (src1 >> 1) + (src0 & src1 & 
1)")
diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 3acb3fc0b42..6a16d77a771 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -379,6 +379,8 @@ vtn_handle_extension(struct vtn_builder *b, SpvOp opcode,
   } else if ((strcmp((const char *)[2], "SPV_AMD_gcn_shader") == 0)
 && (b->options && b->options->caps.gcn_shader)) {
  val->ext_handler = vtn_handle_amd_gcn_shader_instruction;
+  } else if (strcmp(ext, "OpenCL.std") == 0) {
+ val->ext_handler = vtn_handle_opencl_instruction;
   } else {
  vtn_fail("Unsupported extension: %s", ext);
   }
diff --git a/src/compiler/spirv/vtn_opencl.c b/src/compiler/spirv/vtn_opencl.c
new file mode 100644
index 000..3c5ecd22452
--- /dev/null
+++ b/src/compiler/spirv/vtn_opencl.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright © 2018 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Rob Clark (robdcl...@gmail.com)
+ */
+
+#include "vtn_private.h"
+#include "OpenCL.std.h"
+
+typedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b, enum OpenCLstd 
opcode,
+unsigned num_srcs, nir_ssa_def **srcs);
+
+static void
+handle_instr(struct vtn_builder *b, enum OpenCLstd opcode, const uint32_t *w,
+ unsigned count, nir_handler handler)
+{
+   const struct glsl_type *dest_type =
+  vtn_value(b, w[1], vtn_value_type_type)->type->type;
+
+   unsigned num_srcs = count - 5;
+   nir_ssa_def *srcs[3] = { NULL, };
+   vtn_assert(num_srcs <= ARRAY_SIZE(srcs));
+   for (unsigned i = 0; i < num_srcs; i++) {
+  srcs[i] = vtn_ssa_value(b, w[i + 5])->def;
+   }
+
+   nir_ssa_def *result = handler(b, opcode, num_srcs, srcs);
+   if (result) {
+  struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+  val->ssa = vtn_create_ssa_value(b, dest_type);
+  val->ssa->def = result;
+   } else {
+  vtn_assert(dest_type == glsl_void_type());
+   }
+}
+
+static nir_op
+nir_alu_op_for_opencl_opcode(struct vtn_builder *b, enum OpenCLstd opcode)
+{
+   switch (opcode) {
+   case SHadd: return nir_op_ihadd;
+   case UHadd: return nir_op_uhadd;
+   default:
+  vtn_fail("No NIR equivalent");
+   }
+}
+
+static nir_ssa_def *
+handle_alu(struct vtn_builder *b, enum OpenCLstd opcode, 

  1   2   >